{ "best_global_step": 467975, "best_metric": 0.03080366345672468, "best_model_checkpoint": "logs/whisper-large-v3-mixed-10eps-clean-text-199k/checkpoint-467975", "epoch": 5.0, "eval_steps": 500, "global_step": 467975, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010684331427960895, "grad_norm": 19.364164352416992, "learning_rate": 9e-09, "loss": 2.5805, "step": 10 }, { "epoch": 0.0002136866285592179, "grad_norm": 18.474424362182617, "learning_rate": 1.8999999999999998e-08, "loss": 2.7987, "step": 20 }, { "epoch": 0.0003205299428388269, "grad_norm": 28.113643646240234, "learning_rate": 2.9e-08, "loss": 2.9154, "step": 30 }, { "epoch": 0.0004273732571184358, "grad_norm": 30.94753646850586, "learning_rate": 3.9e-08, "loss": 3.0312, "step": 40 }, { "epoch": 0.0005342165713980448, "grad_norm": 23.736467361450195, "learning_rate": 4.9e-08, "loss": 2.9218, "step": 50 }, { "epoch": 0.0006410598856776538, "grad_norm": 24.789615631103516, "learning_rate": 5.899999999999999e-08, "loss": 2.8598, "step": 60 }, { "epoch": 0.0007479031999572627, "grad_norm": 28.850522994995117, "learning_rate": 6.900000000000001e-08, "loss": 2.8894, "step": 70 }, { "epoch": 0.0008547465142368716, "grad_norm": 16.068086624145508, "learning_rate": 7.899999999999999e-08, "loss": 2.8622, "step": 80 }, { "epoch": 0.0009615898285164806, "grad_norm": 20.449785232543945, "learning_rate": 8.899999999999999e-08, "loss": 2.6066, "step": 90 }, { "epoch": 0.0010684331427960895, "grad_norm": 16.824962615966797, "learning_rate": 9.9e-08, "loss": 3.0712, "step": 100 }, { "epoch": 0.0011752764570756985, "grad_norm": 39.207847595214844, "learning_rate": 1.09e-07, "loss": 2.6578, "step": 110 }, { "epoch": 0.0012821197713553075, "grad_norm": 17.082727432250977, "learning_rate": 1.19e-07, "loss": 2.4776, "step": 120 }, { "epoch": 0.0013889630856349165, "grad_norm": 22.472583770751953, "learning_rate": 1.29e-07, "loss": 2.5818, "step": 130 }, { "epoch": 0.0014958063999145255, "grad_norm": 23.60643768310547, "learning_rate": 1.3900000000000001e-07, "loss": 2.4228, "step": 140 }, { "epoch": 0.0016026497141941342, "grad_norm": 15.628372192382812, "learning_rate": 1.49e-07, "loss": 2.1954, "step": 150 }, { "epoch": 0.0017094930284737432, "grad_norm": 12.416412353515625, "learning_rate": 1.59e-07, "loss": 2.5077, "step": 160 }, { "epoch": 0.0018163363427533522, "grad_norm": 23.469127655029297, "learning_rate": 1.69e-07, "loss": 2.6188, "step": 170 }, { "epoch": 0.0019231796570329611, "grad_norm": 12.438040733337402, "learning_rate": 1.7899999999999997e-07, "loss": 2.0317, "step": 180 }, { "epoch": 0.00203002297131257, "grad_norm": 21.303089141845703, "learning_rate": 1.8899999999999999e-07, "loss": 2.3155, "step": 190 }, { "epoch": 0.002136866285592179, "grad_norm": 17.338855743408203, "learning_rate": 1.99e-07, "loss": 2.2739, "step": 200 }, { "epoch": 0.002243709599871788, "grad_norm": 11.659978866577148, "learning_rate": 2.0899999999999998e-07, "loss": 1.9454, "step": 210 }, { "epoch": 0.002350552914151397, "grad_norm": 23.847211837768555, "learning_rate": 2.19e-07, "loss": 2.0292, "step": 220 }, { "epoch": 0.002457396228431006, "grad_norm": 22.926746368408203, "learning_rate": 2.29e-07, "loss": 1.9958, "step": 230 }, { "epoch": 0.002564239542710615, "grad_norm": 13.162216186523438, "learning_rate": 2.3899999999999996e-07, "loss": 1.9191, "step": 240 }, { "epoch": 0.002671082856990224, "grad_norm": 16.514406204223633, "learning_rate": 2.4899999999999997e-07, "loss": 1.8359, "step": 250 }, { "epoch": 0.002777926171269833, "grad_norm": 10.390565872192383, "learning_rate": 2.59e-07, "loss": 1.9345, "step": 260 }, { "epoch": 0.002884769485549442, "grad_norm": 13.677699089050293, "learning_rate": 2.69e-07, "loss": 1.8476, "step": 270 }, { "epoch": 0.002991612799829051, "grad_norm": 10.87039566040039, "learning_rate": 2.79e-07, "loss": 1.9313, "step": 280 }, { "epoch": 0.0030984561141086595, "grad_norm": 14.82325553894043, "learning_rate": 2.8899999999999995e-07, "loss": 1.8775, "step": 290 }, { "epoch": 0.0032052994283882684, "grad_norm": 10.759012222290039, "learning_rate": 2.9899999999999996e-07, "loss": 1.6692, "step": 300 }, { "epoch": 0.0033121427426678774, "grad_norm": 16.905271530151367, "learning_rate": 3.09e-07, "loss": 1.678, "step": 310 }, { "epoch": 0.0034189860569474864, "grad_norm": 13.601309776306152, "learning_rate": 3.19e-07, "loss": 1.8424, "step": 320 }, { "epoch": 0.0035258293712270954, "grad_norm": 15.467519760131836, "learning_rate": 3.29e-07, "loss": 1.7484, "step": 330 }, { "epoch": 0.0036326726855067043, "grad_norm": 10.579943656921387, "learning_rate": 3.39e-07, "loss": 1.7507, "step": 340 }, { "epoch": 0.0037395159997863133, "grad_norm": 15.088194847106934, "learning_rate": 3.4899999999999996e-07, "loss": 1.6371, "step": 350 }, { "epoch": 0.0038463593140659223, "grad_norm": 17.70872688293457, "learning_rate": 3.5899999999999997e-07, "loss": 1.7639, "step": 360 }, { "epoch": 0.003953202628345531, "grad_norm": 13.244065284729004, "learning_rate": 3.69e-07, "loss": 1.6399, "step": 370 }, { "epoch": 0.00406004594262514, "grad_norm": 11.583232879638672, "learning_rate": 3.79e-07, "loss": 1.7164, "step": 380 }, { "epoch": 0.004166889256904749, "grad_norm": 19.063251495361328, "learning_rate": 3.89e-07, "loss": 1.6953, "step": 390 }, { "epoch": 0.004273732571184358, "grad_norm": 10.63552474975586, "learning_rate": 3.99e-07, "loss": 1.5413, "step": 400 }, { "epoch": 0.004380575885463967, "grad_norm": 30.022159576416016, "learning_rate": 4.0899999999999997e-07, "loss": 1.7687, "step": 410 }, { "epoch": 0.004487419199743576, "grad_norm": 19.710725784301758, "learning_rate": 4.19e-07, "loss": 1.6606, "step": 420 }, { "epoch": 0.004594262514023185, "grad_norm": 18.17629623413086, "learning_rate": 4.29e-07, "loss": 1.6588, "step": 430 }, { "epoch": 0.004701105828302794, "grad_norm": 12.706962585449219, "learning_rate": 4.39e-07, "loss": 1.7893, "step": 440 }, { "epoch": 0.004807949142582403, "grad_norm": 13.300002098083496, "learning_rate": 4.49e-07, "loss": 1.4873, "step": 450 }, { "epoch": 0.004914792456862012, "grad_norm": 23.042755126953125, "learning_rate": 4.59e-07, "loss": 1.6226, "step": 460 }, { "epoch": 0.005021635771141621, "grad_norm": 23.68556022644043, "learning_rate": 4.689999999999999e-07, "loss": 1.6311, "step": 470 }, { "epoch": 0.00512847908542123, "grad_norm": 11.906347274780273, "learning_rate": 4.79e-07, "loss": 1.5329, "step": 480 }, { "epoch": 0.005235322399700839, "grad_norm": 14.198004722595215, "learning_rate": 4.89e-07, "loss": 1.42, "step": 490 }, { "epoch": 0.005342165713980448, "grad_norm": 18.850391387939453, "learning_rate": 4.99e-07, "loss": 1.5677, "step": 500 }, { "epoch": 0.005449009028260057, "grad_norm": 12.645710945129395, "learning_rate": 5.09e-07, "loss": 1.5233, "step": 510 }, { "epoch": 0.005555852342539666, "grad_norm": 18.488271713256836, "learning_rate": 5.19e-07, "loss": 1.5165, "step": 520 }, { "epoch": 0.005662695656819275, "grad_norm": 15.958084106445312, "learning_rate": 5.29e-07, "loss": 1.4221, "step": 530 }, { "epoch": 0.005769538971098884, "grad_norm": 14.166272163391113, "learning_rate": 5.39e-07, "loss": 1.4581, "step": 540 }, { "epoch": 0.005876382285378493, "grad_norm": 20.195781707763672, "learning_rate": 5.490000000000001e-07, "loss": 1.5135, "step": 550 }, { "epoch": 0.005983225599658102, "grad_norm": 13.617680549621582, "learning_rate": 5.590000000000001e-07, "loss": 1.3427, "step": 560 }, { "epoch": 0.00609006891393771, "grad_norm": 16.375032424926758, "learning_rate": 5.69e-07, "loss": 1.3928, "step": 570 }, { "epoch": 0.006196912228217319, "grad_norm": 22.720869064331055, "learning_rate": 5.79e-07, "loss": 1.3895, "step": 580 }, { "epoch": 0.006303755542496928, "grad_norm": 13.051531791687012, "learning_rate": 5.89e-07, "loss": 1.4302, "step": 590 }, { "epoch": 0.006410598856776537, "grad_norm": 11.113779067993164, "learning_rate": 5.989999999999999e-07, "loss": 1.3268, "step": 600 }, { "epoch": 0.006517442171056146, "grad_norm": 24.27621841430664, "learning_rate": 6.089999999999999e-07, "loss": 1.303, "step": 610 }, { "epoch": 0.006624285485335755, "grad_norm": 25.651588439941406, "learning_rate": 6.189999999999999e-07, "loss": 1.285, "step": 620 }, { "epoch": 0.006731128799615364, "grad_norm": 20.259742736816406, "learning_rate": 6.289999999999999e-07, "loss": 1.2716, "step": 630 }, { "epoch": 0.006837972113894973, "grad_norm": 15.694610595703125, "learning_rate": 6.389999999999999e-07, "loss": 1.3008, "step": 640 }, { "epoch": 0.006944815428174582, "grad_norm": 20.767314910888672, "learning_rate": 6.49e-07, "loss": 1.4027, "step": 650 }, { "epoch": 0.007051658742454191, "grad_norm": 13.511693000793457, "learning_rate": 6.59e-07, "loss": 1.2719, "step": 660 }, { "epoch": 0.0071585020567338, "grad_norm": 51.28995132446289, "learning_rate": 6.69e-07, "loss": 1.1999, "step": 670 }, { "epoch": 0.007265345371013409, "grad_norm": 18.465375900268555, "learning_rate": 6.79e-07, "loss": 1.1432, "step": 680 }, { "epoch": 0.007372188685293018, "grad_norm": 12.255020141601562, "learning_rate": 6.889999999999999e-07, "loss": 1.1772, "step": 690 }, { "epoch": 0.007479031999572627, "grad_norm": 18.089868545532227, "learning_rate": 6.989999999999999e-07, "loss": 1.132, "step": 700 }, { "epoch": 0.007585875313852236, "grad_norm": 22.867273330688477, "learning_rate": 7.089999999999999e-07, "loss": 1.207, "step": 710 }, { "epoch": 0.007692718628131845, "grad_norm": 26.46473503112793, "learning_rate": 7.189999999999999e-07, "loss": 1.2488, "step": 720 }, { "epoch": 0.0077995619424114536, "grad_norm": 20.10772705078125, "learning_rate": 7.289999999999999e-07, "loss": 1.1113, "step": 730 }, { "epoch": 0.007906405256691063, "grad_norm": 23.63939666748047, "learning_rate": 7.389999999999999e-07, "loss": 1.0928, "step": 740 }, { "epoch": 0.008013248570970672, "grad_norm": 30.970836639404297, "learning_rate": 7.489999999999999e-07, "loss": 1.1542, "step": 750 }, { "epoch": 0.00812009188525028, "grad_norm": 12.552924156188965, "learning_rate": 7.59e-07, "loss": 1.1134, "step": 760 }, { "epoch": 0.008226935199529889, "grad_norm": 18.908920288085938, "learning_rate": 7.69e-07, "loss": 1.1052, "step": 770 }, { "epoch": 0.008333778513809498, "grad_norm": 19.375513076782227, "learning_rate": 7.79e-07, "loss": 0.9896, "step": 780 }, { "epoch": 0.008440621828089107, "grad_norm": 33.372230529785156, "learning_rate": 7.89e-07, "loss": 1.1667, "step": 790 }, { "epoch": 0.008547465142368716, "grad_norm": 17.932170867919922, "learning_rate": 7.99e-07, "loss": 0.936, "step": 800 }, { "epoch": 0.008654308456648325, "grad_norm": 28.773630142211914, "learning_rate": 8.09e-07, "loss": 1.051, "step": 810 }, { "epoch": 0.008761151770927934, "grad_norm": 15.835607528686523, "learning_rate": 8.189999999999999e-07, "loss": 1.2244, "step": 820 }, { "epoch": 0.008867995085207542, "grad_norm": 17.440214157104492, "learning_rate": 8.289999999999999e-07, "loss": 0.9574, "step": 830 }, { "epoch": 0.008974838399487152, "grad_norm": 19.480573654174805, "learning_rate": 8.389999999999999e-07, "loss": 0.9398, "step": 840 }, { "epoch": 0.00908168171376676, "grad_norm": 25.62271499633789, "learning_rate": 8.489999999999999e-07, "loss": 0.9646, "step": 850 }, { "epoch": 0.00918852502804637, "grad_norm": 16.214929580688477, "learning_rate": 8.59e-07, "loss": 1.0449, "step": 860 }, { "epoch": 0.009295368342325978, "grad_norm": 21.07990837097168, "learning_rate": 8.69e-07, "loss": 1.0398, "step": 870 }, { "epoch": 0.009402211656605588, "grad_norm": 14.449224472045898, "learning_rate": 8.79e-07, "loss": 0.8575, "step": 880 }, { "epoch": 0.009509054970885196, "grad_norm": 17.848814010620117, "learning_rate": 8.89e-07, "loss": 0.9105, "step": 890 }, { "epoch": 0.009615898285164806, "grad_norm": 23.596515655517578, "learning_rate": 8.99e-07, "loss": 0.8888, "step": 900 }, { "epoch": 0.009722741599444414, "grad_norm": 26.200397491455078, "learning_rate": 9.09e-07, "loss": 0.9829, "step": 910 }, { "epoch": 0.009829584913724024, "grad_norm": 16.727380752563477, "learning_rate": 9.19e-07, "loss": 0.9512, "step": 920 }, { "epoch": 0.009936428228003632, "grad_norm": 23.978736877441406, "learning_rate": 9.29e-07, "loss": 0.9312, "step": 930 }, { "epoch": 0.010043271542283242, "grad_norm": 15.278177261352539, "learning_rate": 9.389999999999999e-07, "loss": 0.7853, "step": 940 }, { "epoch": 0.01015011485656285, "grad_norm": 17.549436569213867, "learning_rate": 9.489999999999999e-07, "loss": 0.9738, "step": 950 }, { "epoch": 0.01025695817084246, "grad_norm": 12.054234504699707, "learning_rate": 9.589999999999998e-07, "loss": 1.0024, "step": 960 }, { "epoch": 0.010363801485122068, "grad_norm": 13.502250671386719, "learning_rate": 9.69e-07, "loss": 0.7762, "step": 970 }, { "epoch": 0.010470644799401678, "grad_norm": 27.318227767944336, "learning_rate": 9.789999999999999e-07, "loss": 0.869, "step": 980 }, { "epoch": 0.010577488113681286, "grad_norm": 19.829986572265625, "learning_rate": 9.89e-07, "loss": 0.8373, "step": 990 }, { "epoch": 0.010684331427960896, "grad_norm": 27.820192337036133, "learning_rate": 9.989999999999999e-07, "loss": 0.8411, "step": 1000 }, { "epoch": 0.010791174742240504, "grad_norm": 17.436874389648438, "learning_rate": 9.999999997713623e-07, "loss": 0.8765, "step": 1010 }, { "epoch": 0.010898018056520114, "grad_norm": 17.684722900390625, "learning_rate": 9.999999989810094e-07, "loss": 0.8926, "step": 1020 }, { "epoch": 0.011004861370799722, "grad_norm": 25.916025161743164, "learning_rate": 9.999999976261188e-07, "loss": 0.833, "step": 1030 }, { "epoch": 0.011111704685079332, "grad_norm": 28.49773406982422, "learning_rate": 9.999999957066904e-07, "loss": 0.9949, "step": 1040 }, { "epoch": 0.01121854799935894, "grad_norm": 20.203250885009766, "learning_rate": 9.999999932227244e-07, "loss": 0.8533, "step": 1050 }, { "epoch": 0.01132539131363855, "grad_norm": 32.48914337158203, "learning_rate": 9.999999901742206e-07, "loss": 0.8565, "step": 1060 }, { "epoch": 0.011432234627918158, "grad_norm": 31.41819953918457, "learning_rate": 9.999999865611791e-07, "loss": 0.8918, "step": 1070 }, { "epoch": 0.011539077942197768, "grad_norm": 16.383657455444336, "learning_rate": 9.999999823836001e-07, "loss": 0.8776, "step": 1080 }, { "epoch": 0.011645921256477376, "grad_norm": 17.307090759277344, "learning_rate": 9.999999776414832e-07, "loss": 0.7645, "step": 1090 }, { "epoch": 0.011752764570756986, "grad_norm": 29.692623138427734, "learning_rate": 9.999999723348284e-07, "loss": 0.8257, "step": 1100 }, { "epoch": 0.011859607885036594, "grad_norm": 21.38955307006836, "learning_rate": 9.99999966463636e-07, "loss": 0.7565, "step": 1110 }, { "epoch": 0.011966451199316204, "grad_norm": 30.900634765625, "learning_rate": 9.999999600279062e-07, "loss": 0.8059, "step": 1120 }, { "epoch": 0.012073294513595812, "grad_norm": 19.73147201538086, "learning_rate": 9.999999530276384e-07, "loss": 0.8477, "step": 1130 }, { "epoch": 0.01218013782787542, "grad_norm": 29.184682846069336, "learning_rate": 9.999999454628329e-07, "loss": 0.7287, "step": 1140 }, { "epoch": 0.01228698114215503, "grad_norm": 38.782684326171875, "learning_rate": 9.999999373334898e-07, "loss": 0.7535, "step": 1150 }, { "epoch": 0.012393824456434638, "grad_norm": 14.094932556152344, "learning_rate": 9.99999928639609e-07, "loss": 0.6895, "step": 1160 }, { "epoch": 0.012500667770714248, "grad_norm": 27.264318466186523, "learning_rate": 9.999999193811906e-07, "loss": 0.8176, "step": 1170 }, { "epoch": 0.012607511084993856, "grad_norm": 28.45049285888672, "learning_rate": 9.999999095582345e-07, "loss": 0.6449, "step": 1180 }, { "epoch": 0.012714354399273466, "grad_norm": 18.869548797607422, "learning_rate": 9.999998991707408e-07, "loss": 0.6709, "step": 1190 }, { "epoch": 0.012821197713553074, "grad_norm": 28.295047760009766, "learning_rate": 9.999998882187096e-07, "loss": 0.67, "step": 1200 }, { "epoch": 0.012928041027832684, "grad_norm": 32.3592529296875, "learning_rate": 9.999998767021407e-07, "loss": 0.593, "step": 1210 }, { "epoch": 0.013034884342112292, "grad_norm": 32.31857681274414, "learning_rate": 9.99999864621034e-07, "loss": 0.7831, "step": 1220 }, { "epoch": 0.013141727656391902, "grad_norm": 22.91666603088379, "learning_rate": 9.999998519753901e-07, "loss": 0.659, "step": 1230 }, { "epoch": 0.01324857097067151, "grad_norm": 24.547237396240234, "learning_rate": 9.999998387652083e-07, "loss": 0.6691, "step": 1240 }, { "epoch": 0.01335541428495112, "grad_norm": 21.411128997802734, "learning_rate": 9.999998249904892e-07, "loss": 0.6429, "step": 1250 }, { "epoch": 0.013462257599230728, "grad_norm": 25.10348892211914, "learning_rate": 9.999998106512323e-07, "loss": 0.7838, "step": 1260 }, { "epoch": 0.013569100913510337, "grad_norm": 34.26380920410156, "learning_rate": 9.99999795747438e-07, "loss": 0.5625, "step": 1270 }, { "epoch": 0.013675944227789946, "grad_norm": 15.086063385009766, "learning_rate": 9.999997802791063e-07, "loss": 0.5773, "step": 1280 }, { "epoch": 0.013782787542069555, "grad_norm": 18.153806686401367, "learning_rate": 9.99999764246237e-07, "loss": 0.6411, "step": 1290 }, { "epoch": 0.013889630856349163, "grad_norm": 31.448535919189453, "learning_rate": 9.999997476488302e-07, "loss": 0.6244, "step": 1300 }, { "epoch": 0.013996474170628773, "grad_norm": 20.343231201171875, "learning_rate": 9.99999730486886e-07, "loss": 0.547, "step": 1310 }, { "epoch": 0.014103317484908381, "grad_norm": 25.130403518676758, "learning_rate": 9.999997127604043e-07, "loss": 0.6397, "step": 1320 }, { "epoch": 0.014210160799187991, "grad_norm": 24.03260612487793, "learning_rate": 9.999996944693853e-07, "loss": 0.7423, "step": 1330 }, { "epoch": 0.0143170041134676, "grad_norm": 35.5667610168457, "learning_rate": 9.999996756138288e-07, "loss": 0.5565, "step": 1340 }, { "epoch": 0.01442384742774721, "grad_norm": 15.427313804626465, "learning_rate": 9.99999656193735e-07, "loss": 0.7198, "step": 1350 }, { "epoch": 0.014530690742026817, "grad_norm": 12.719769477844238, "learning_rate": 9.99999636209104e-07, "loss": 0.5438, "step": 1360 }, { "epoch": 0.014637534056306427, "grad_norm": 40.56574630737305, "learning_rate": 9.999996156599354e-07, "loss": 0.7209, "step": 1370 }, { "epoch": 0.014744377370586035, "grad_norm": 22.957561492919922, "learning_rate": 9.999995945462295e-07, "loss": 0.7641, "step": 1380 }, { "epoch": 0.014851220684865645, "grad_norm": 21.6916561126709, "learning_rate": 9.999995728679865e-07, "loss": 0.5269, "step": 1390 }, { "epoch": 0.014958063999145253, "grad_norm": 14.265226364135742, "learning_rate": 9.99999550625206e-07, "loss": 0.5405, "step": 1400 }, { "epoch": 0.015064907313424863, "grad_norm": 34.02663040161133, "learning_rate": 9.999995278178885e-07, "loss": 0.5395, "step": 1410 }, { "epoch": 0.015171750627704471, "grad_norm": 17.742246627807617, "learning_rate": 9.999995044460338e-07, "loss": 0.6078, "step": 1420 }, { "epoch": 0.015278593941984081, "grad_norm": 29.083486557006836, "learning_rate": 9.99999480509642e-07, "loss": 0.6089, "step": 1430 }, { "epoch": 0.01538543725626369, "grad_norm": 19.8161563873291, "learning_rate": 9.999994560087129e-07, "loss": 0.5093, "step": 1440 }, { "epoch": 0.015492280570543299, "grad_norm": 25.544170379638672, "learning_rate": 9.999994309432466e-07, "loss": 0.5478, "step": 1450 }, { "epoch": 0.015599123884822907, "grad_norm": 21.947019577026367, "learning_rate": 9.999994053132432e-07, "loss": 0.6747, "step": 1460 }, { "epoch": 0.015705967199102517, "grad_norm": 29.92225456237793, "learning_rate": 9.99999379118703e-07, "loss": 0.4884, "step": 1470 }, { "epoch": 0.015812810513382125, "grad_norm": 13.170169830322266, "learning_rate": 9.999993523596256e-07, "loss": 0.5031, "step": 1480 }, { "epoch": 0.015919653827661733, "grad_norm": 25.833812713623047, "learning_rate": 9.999993250360111e-07, "loss": 0.4939, "step": 1490 }, { "epoch": 0.016026497141941345, "grad_norm": 14.251887321472168, "learning_rate": 9.999992971478596e-07, "loss": 0.6288, "step": 1500 }, { "epoch": 0.016133340456220953, "grad_norm": 15.274778366088867, "learning_rate": 9.999992686951712e-07, "loss": 0.5989, "step": 1510 }, { "epoch": 0.01624018377050056, "grad_norm": 8.674311637878418, "learning_rate": 9.999992396779462e-07, "loss": 0.4771, "step": 1520 }, { "epoch": 0.01634702708478017, "grad_norm": 21.438268661499023, "learning_rate": 9.99999210096184e-07, "loss": 0.4827, "step": 1530 }, { "epoch": 0.016453870399059777, "grad_norm": 11.39437484741211, "learning_rate": 9.99999179949885e-07, "loss": 0.4961, "step": 1540 }, { "epoch": 0.01656071371333939, "grad_norm": 15.08348274230957, "learning_rate": 9.999991492390492e-07, "loss": 0.4958, "step": 1550 }, { "epoch": 0.016667557027618997, "grad_norm": 24.625354766845703, "learning_rate": 9.999991179636766e-07, "loss": 0.5144, "step": 1560 }, { "epoch": 0.016774400341898605, "grad_norm": 16.12522315979004, "learning_rate": 9.999990861237674e-07, "loss": 0.5237, "step": 1570 }, { "epoch": 0.016881243656178213, "grad_norm": 20.445648193359375, "learning_rate": 9.999990537193214e-07, "loss": 0.4556, "step": 1580 }, { "epoch": 0.016988086970457825, "grad_norm": 36.159423828125, "learning_rate": 9.999990207503386e-07, "loss": 0.6148, "step": 1590 }, { "epoch": 0.017094930284737433, "grad_norm": 23.81528091430664, "learning_rate": 9.999989872168195e-07, "loss": 0.5557, "step": 1600 }, { "epoch": 0.01720177359901704, "grad_norm": 13.993439674377441, "learning_rate": 9.999989531187636e-07, "loss": 0.5148, "step": 1610 }, { "epoch": 0.01730861691329665, "grad_norm": 22.78022575378418, "learning_rate": 9.99998918456171e-07, "loss": 0.5013, "step": 1620 }, { "epoch": 0.01741546022757626, "grad_norm": 8.337483406066895, "learning_rate": 9.999988832290422e-07, "loss": 0.4072, "step": 1630 }, { "epoch": 0.01752230354185587, "grad_norm": 17.143312454223633, "learning_rate": 9.999988474373766e-07, "loss": 0.5502, "step": 1640 }, { "epoch": 0.017629146856135477, "grad_norm": 26.95514488220215, "learning_rate": 9.999988110811747e-07, "loss": 0.6639, "step": 1650 }, { "epoch": 0.017735990170415085, "grad_norm": 28.49688720703125, "learning_rate": 9.999987741604365e-07, "loss": 0.5547, "step": 1660 }, { "epoch": 0.017842833484694696, "grad_norm": 27.756229400634766, "learning_rate": 9.99998736675162e-07, "loss": 0.5164, "step": 1670 }, { "epoch": 0.017949676798974305, "grad_norm": 7.8590593338012695, "learning_rate": 9.99998698625351e-07, "loss": 0.3982, "step": 1680 }, { "epoch": 0.018056520113253913, "grad_norm": 22.170989990234375, "learning_rate": 9.999986600110038e-07, "loss": 0.627, "step": 1690 }, { "epoch": 0.01816336342753352, "grad_norm": 15.6681547164917, "learning_rate": 9.999986208321203e-07, "loss": 0.3786, "step": 1700 }, { "epoch": 0.018270206741813132, "grad_norm": 11.621328353881836, "learning_rate": 9.999985810887009e-07, "loss": 0.4295, "step": 1710 }, { "epoch": 0.01837705005609274, "grad_norm": 21.878740310668945, "learning_rate": 9.999985407807452e-07, "loss": 0.6279, "step": 1720 }, { "epoch": 0.01848389337037235, "grad_norm": 11.948835372924805, "learning_rate": 9.999984999082532e-07, "loss": 0.4856, "step": 1730 }, { "epoch": 0.018590736684651957, "grad_norm": 16.102783203125, "learning_rate": 9.999984584712253e-07, "loss": 0.4898, "step": 1740 }, { "epoch": 0.01869757999893157, "grad_norm": 15.117629051208496, "learning_rate": 9.999984164696615e-07, "loss": 0.3796, "step": 1750 }, { "epoch": 0.018804423313211176, "grad_norm": 9.525590896606445, "learning_rate": 9.999983739035617e-07, "loss": 0.4794, "step": 1760 }, { "epoch": 0.018911266627490785, "grad_norm": 19.462093353271484, "learning_rate": 9.99998330772926e-07, "loss": 0.4953, "step": 1770 }, { "epoch": 0.019018109941770393, "grad_norm": 15.85865592956543, "learning_rate": 9.999982870777544e-07, "loss": 0.4182, "step": 1780 }, { "epoch": 0.019124953256050004, "grad_norm": 22.46986961364746, "learning_rate": 9.999982428180472e-07, "loss": 0.6589, "step": 1790 }, { "epoch": 0.019231796570329612, "grad_norm": 22.6545467376709, "learning_rate": 9.99998197993804e-07, "loss": 0.4603, "step": 1800 }, { "epoch": 0.01933863988460922, "grad_norm": 20.5671443939209, "learning_rate": 9.999981526050253e-07, "loss": 0.4239, "step": 1810 }, { "epoch": 0.01944548319888883, "grad_norm": 21.539894104003906, "learning_rate": 9.999981066517106e-07, "loss": 0.4279, "step": 1820 }, { "epoch": 0.01955232651316844, "grad_norm": 18.263813018798828, "learning_rate": 9.999980601338605e-07, "loss": 0.4878, "step": 1830 }, { "epoch": 0.019659169827448048, "grad_norm": 37.190223693847656, "learning_rate": 9.999980130514749e-07, "loss": 0.4293, "step": 1840 }, { "epoch": 0.019766013141727656, "grad_norm": 16.193592071533203, "learning_rate": 9.999979654045538e-07, "loss": 0.5027, "step": 1850 }, { "epoch": 0.019872856456007264, "grad_norm": 11.223594665527344, "learning_rate": 9.999979171930972e-07, "loss": 0.4939, "step": 1860 }, { "epoch": 0.019979699770286876, "grad_norm": 17.22928810119629, "learning_rate": 9.999978684171053e-07, "loss": 0.3775, "step": 1870 }, { "epoch": 0.020086543084566484, "grad_norm": 9.113835334777832, "learning_rate": 9.999978190765781e-07, "loss": 0.5203, "step": 1880 }, { "epoch": 0.020193386398846092, "grad_norm": 11.177996635437012, "learning_rate": 9.999977691715155e-07, "loss": 0.5567, "step": 1890 }, { "epoch": 0.0203002297131257, "grad_norm": 20.846891403198242, "learning_rate": 9.999977187019177e-07, "loss": 0.4906, "step": 1900 }, { "epoch": 0.02040707302740531, "grad_norm": 19.245729446411133, "learning_rate": 9.999976676677848e-07, "loss": 0.4876, "step": 1910 }, { "epoch": 0.02051391634168492, "grad_norm": 18.81003189086914, "learning_rate": 9.999976160691168e-07, "loss": 0.4018, "step": 1920 }, { "epoch": 0.020620759655964528, "grad_norm": 12.369691848754883, "learning_rate": 9.999975639059137e-07, "loss": 0.3903, "step": 1930 }, { "epoch": 0.020727602970244136, "grad_norm": 12.353583335876465, "learning_rate": 9.999975111781758e-07, "loss": 0.4196, "step": 1940 }, { "epoch": 0.020834446284523744, "grad_norm": 13.172237396240234, "learning_rate": 9.999974578859028e-07, "loss": 0.4599, "step": 1950 }, { "epoch": 0.020941289598803356, "grad_norm": 11.855949401855469, "learning_rate": 9.999974040290948e-07, "loss": 0.4273, "step": 1960 }, { "epoch": 0.021048132913082964, "grad_norm": 25.31606674194336, "learning_rate": 9.999973496077523e-07, "loss": 0.3781, "step": 1970 }, { "epoch": 0.021154976227362572, "grad_norm": 15.509110450744629, "learning_rate": 9.99997294621875e-07, "loss": 0.4447, "step": 1980 }, { "epoch": 0.02126181954164218, "grad_norm": 18.76573944091797, "learning_rate": 9.99997239071463e-07, "loss": 0.4805, "step": 1990 }, { "epoch": 0.021368662855921792, "grad_norm": 18.191818237304688, "learning_rate": 9.99997182956516e-07, "loss": 0.4548, "step": 2000 }, { "epoch": 0.0214755061702014, "grad_norm": 14.893383979797363, "learning_rate": 9.999971262770347e-07, "loss": 0.4489, "step": 2010 }, { "epoch": 0.021582349484481008, "grad_norm": 21.934574127197266, "learning_rate": 9.99997069033019e-07, "loss": 0.4273, "step": 2020 }, { "epoch": 0.021689192798760616, "grad_norm": 12.001498222351074, "learning_rate": 9.999970112244688e-07, "loss": 0.3587, "step": 2030 }, { "epoch": 0.021796036113040228, "grad_norm": 17.182058334350586, "learning_rate": 9.999969528513843e-07, "loss": 0.4706, "step": 2040 }, { "epoch": 0.021902879427319836, "grad_norm": 19.61953353881836, "learning_rate": 9.999968939137656e-07, "loss": 0.299, "step": 2050 }, { "epoch": 0.022009722741599444, "grad_norm": 8.613227844238281, "learning_rate": 9.999968344116126e-07, "loss": 0.4623, "step": 2060 }, { "epoch": 0.022116566055879052, "grad_norm": 20.734663009643555, "learning_rate": 9.999967743449253e-07, "loss": 0.3995, "step": 2070 }, { "epoch": 0.022223409370158664, "grad_norm": 24.131549835205078, "learning_rate": 9.99996713713704e-07, "loss": 0.4078, "step": 2080 }, { "epoch": 0.022330252684438272, "grad_norm": 12.159090995788574, "learning_rate": 9.999966525179487e-07, "loss": 0.2499, "step": 2090 }, { "epoch": 0.02243709599871788, "grad_norm": 10.760337829589844, "learning_rate": 9.999965907576592e-07, "loss": 0.2855, "step": 2100 }, { "epoch": 0.022543939312997488, "grad_norm": 15.838318824768066, "learning_rate": 9.99996528432836e-07, "loss": 0.3379, "step": 2110 }, { "epoch": 0.0226507826272771, "grad_norm": 29.774900436401367, "learning_rate": 9.999964655434792e-07, "loss": 0.5812, "step": 2120 }, { "epoch": 0.022757625941556708, "grad_norm": 20.033592224121094, "learning_rate": 9.999964020895884e-07, "loss": 0.2989, "step": 2130 }, { "epoch": 0.022864469255836316, "grad_norm": 9.237654685974121, "learning_rate": 9.99996338071164e-07, "loss": 0.3206, "step": 2140 }, { "epoch": 0.022971312570115924, "grad_norm": 25.621370315551758, "learning_rate": 9.99996273488206e-07, "loss": 0.3762, "step": 2150 }, { "epoch": 0.023078155884395535, "grad_norm": 18.913705825805664, "learning_rate": 9.999962083407143e-07, "loss": 0.4137, "step": 2160 }, { "epoch": 0.023184999198675144, "grad_norm": 25.89129638671875, "learning_rate": 9.999961426286894e-07, "loss": 0.4719, "step": 2170 }, { "epoch": 0.02329184251295475, "grad_norm": 17.706846237182617, "learning_rate": 9.99996076352131e-07, "loss": 0.321, "step": 2180 }, { "epoch": 0.02339868582723436, "grad_norm": 17.31884765625, "learning_rate": 9.99996009511039e-07, "loss": 0.3109, "step": 2190 }, { "epoch": 0.02350552914151397, "grad_norm": 17.169387817382812, "learning_rate": 9.999959421054144e-07, "loss": 0.31, "step": 2200 }, { "epoch": 0.02361237245579358, "grad_norm": 6.533199310302734, "learning_rate": 9.999958741352561e-07, "loss": 0.3018, "step": 2210 }, { "epoch": 0.023719215770073188, "grad_norm": 14.441643714904785, "learning_rate": 9.99995805600565e-07, "loss": 0.3914, "step": 2220 }, { "epoch": 0.023826059084352796, "grad_norm": 12.449651718139648, "learning_rate": 9.999957365013407e-07, "loss": 0.4838, "step": 2230 }, { "epoch": 0.023932902398632407, "grad_norm": 15.345419883728027, "learning_rate": 9.999956668375838e-07, "loss": 0.4901, "step": 2240 }, { "epoch": 0.024039745712912015, "grad_norm": 12.664660453796387, "learning_rate": 9.99995596609294e-07, "loss": 0.3024, "step": 2250 }, { "epoch": 0.024146589027191624, "grad_norm": 14.732717514038086, "learning_rate": 9.99995525816471e-07, "loss": 0.2454, "step": 2260 }, { "epoch": 0.02425343234147123, "grad_norm": 11.897661209106445, "learning_rate": 9.999954544591158e-07, "loss": 0.256, "step": 2270 }, { "epoch": 0.02436027565575084, "grad_norm": 13.44116497039795, "learning_rate": 9.999953825372278e-07, "loss": 0.3144, "step": 2280 }, { "epoch": 0.02446711897003045, "grad_norm": 30.09218978881836, "learning_rate": 9.999953100508072e-07, "loss": 0.429, "step": 2290 }, { "epoch": 0.02457396228431006, "grad_norm": 17.364856719970703, "learning_rate": 9.999952369998542e-07, "loss": 0.4462, "step": 2300 }, { "epoch": 0.024680805598589668, "grad_norm": 15.58460807800293, "learning_rate": 9.99995163384369e-07, "loss": 0.3707, "step": 2310 }, { "epoch": 0.024787648912869276, "grad_norm": 22.709197998046875, "learning_rate": 9.999950892043514e-07, "loss": 0.5107, "step": 2320 }, { "epoch": 0.024894492227148887, "grad_norm": 21.642234802246094, "learning_rate": 9.999950144598015e-07, "loss": 0.296, "step": 2330 }, { "epoch": 0.025001335541428495, "grad_norm": 14.450118064880371, "learning_rate": 9.999949391507197e-07, "loss": 0.4353, "step": 2340 }, { "epoch": 0.025108178855708103, "grad_norm": 11.27927017211914, "learning_rate": 9.999948632771058e-07, "loss": 0.366, "step": 2350 }, { "epoch": 0.02521502216998771, "grad_norm": 13.555230140686035, "learning_rate": 9.9999478683896e-07, "loss": 0.2773, "step": 2360 }, { "epoch": 0.025321865484267323, "grad_norm": 16.0972843170166, "learning_rate": 9.999947098362821e-07, "loss": 0.3488, "step": 2370 }, { "epoch": 0.02542870879854693, "grad_norm": 31.002683639526367, "learning_rate": 9.999946322690727e-07, "loss": 0.4862, "step": 2380 }, { "epoch": 0.02553555211282654, "grad_norm": 24.140422821044922, "learning_rate": 9.999945541373317e-07, "loss": 0.4097, "step": 2390 }, { "epoch": 0.025642395427106147, "grad_norm": 11.85222053527832, "learning_rate": 9.999944754410589e-07, "loss": 0.4069, "step": 2400 }, { "epoch": 0.02574923874138576, "grad_norm": 9.600472450256348, "learning_rate": 9.999943961802547e-07, "loss": 0.289, "step": 2410 }, { "epoch": 0.025856082055665367, "grad_norm": 10.75174331665039, "learning_rate": 9.99994316354919e-07, "loss": 0.3154, "step": 2420 }, { "epoch": 0.025962925369944975, "grad_norm": 17.594995498657227, "learning_rate": 9.99994235965052e-07, "loss": 0.3575, "step": 2430 }, { "epoch": 0.026069768684224583, "grad_norm": 9.631568908691406, "learning_rate": 9.999941550106538e-07, "loss": 0.3837, "step": 2440 }, { "epoch": 0.026176611998504195, "grad_norm": 6.7133402824401855, "learning_rate": 9.999940734917246e-07, "loss": 0.2717, "step": 2450 }, { "epoch": 0.026283455312783803, "grad_norm": 12.473257064819336, "learning_rate": 9.999939914082642e-07, "loss": 0.3555, "step": 2460 }, { "epoch": 0.02639029862706341, "grad_norm": 12.064770698547363, "learning_rate": 9.99993908760273e-07, "loss": 0.3976, "step": 2470 }, { "epoch": 0.02649714194134302, "grad_norm": 28.133289337158203, "learning_rate": 9.999938255477507e-07, "loss": 0.3169, "step": 2480 }, { "epoch": 0.02660398525562263, "grad_norm": 16.504127502441406, "learning_rate": 9.99993741770698e-07, "loss": 0.5796, "step": 2490 }, { "epoch": 0.02671082856990224, "grad_norm": 9.642769813537598, "learning_rate": 9.999936574291142e-07, "loss": 0.2702, "step": 2500 }, { "epoch": 0.026817671884181847, "grad_norm": 24.422245025634766, "learning_rate": 9.99993572523e-07, "loss": 0.4101, "step": 2510 }, { "epoch": 0.026924515198461455, "grad_norm": 13.089735984802246, "learning_rate": 9.999934870523553e-07, "loss": 0.2777, "step": 2520 }, { "epoch": 0.027031358512741067, "grad_norm": 13.085718154907227, "learning_rate": 9.999934010171802e-07, "loss": 0.2949, "step": 2530 }, { "epoch": 0.027138201827020675, "grad_norm": 8.453664779663086, "learning_rate": 9.999933144174749e-07, "loss": 0.3247, "step": 2540 }, { "epoch": 0.027245045141300283, "grad_norm": 8.616175651550293, "learning_rate": 9.999932272532394e-07, "loss": 0.2489, "step": 2550 }, { "epoch": 0.02735188845557989, "grad_norm": 10.648241996765137, "learning_rate": 9.999931395244739e-07, "loss": 0.2702, "step": 2560 }, { "epoch": 0.027458731769859503, "grad_norm": 5.437968730926514, "learning_rate": 9.999930512311782e-07, "loss": 0.3871, "step": 2570 }, { "epoch": 0.02756557508413911, "grad_norm": 13.582603454589844, "learning_rate": 9.999929623733527e-07, "loss": 0.3728, "step": 2580 }, { "epoch": 0.02767241839841872, "grad_norm": 13.515789985656738, "learning_rate": 9.999928729509975e-07, "loss": 0.2433, "step": 2590 }, { "epoch": 0.027779261712698327, "grad_norm": 21.830284118652344, "learning_rate": 9.999927829641124e-07, "loss": 0.2817, "step": 2600 }, { "epoch": 0.02788610502697794, "grad_norm": 22.24539566040039, "learning_rate": 9.999926924126979e-07, "loss": 0.3518, "step": 2610 }, { "epoch": 0.027992948341257547, "grad_norm": 21.68387222290039, "learning_rate": 9.999926012967539e-07, "loss": 0.4338, "step": 2620 }, { "epoch": 0.028099791655537155, "grad_norm": 12.022394180297852, "learning_rate": 9.999925096162804e-07, "loss": 0.3888, "step": 2630 }, { "epoch": 0.028206634969816763, "grad_norm": 15.335295677185059, "learning_rate": 9.999924173712777e-07, "loss": 0.2986, "step": 2640 }, { "epoch": 0.02831347828409637, "grad_norm": 22.488176345825195, "learning_rate": 9.999923245617457e-07, "loss": 0.3784, "step": 2650 }, { "epoch": 0.028420321598375983, "grad_norm": 30.108360290527344, "learning_rate": 9.999922311876848e-07, "loss": 0.3438, "step": 2660 }, { "epoch": 0.02852716491265559, "grad_norm": 10.72911548614502, "learning_rate": 9.99992137249095e-07, "loss": 0.4215, "step": 2670 }, { "epoch": 0.0286340082269352, "grad_norm": 8.758018493652344, "learning_rate": 9.99992042745976e-07, "loss": 0.3213, "step": 2680 }, { "epoch": 0.028740851541214807, "grad_norm": 28.76270294189453, "learning_rate": 9.999919476783284e-07, "loss": 0.3422, "step": 2690 }, { "epoch": 0.02884769485549442, "grad_norm": 15.68340015411377, "learning_rate": 9.999918520461523e-07, "loss": 0.3968, "step": 2700 }, { "epoch": 0.028954538169774027, "grad_norm": 12.398266792297363, "learning_rate": 9.999917558494476e-07, "loss": 0.309, "step": 2710 }, { "epoch": 0.029061381484053635, "grad_norm": 13.003069877624512, "learning_rate": 9.999916590882145e-07, "loss": 0.2985, "step": 2720 }, { "epoch": 0.029168224798333243, "grad_norm": 17.476404190063477, "learning_rate": 9.99991561762453e-07, "loss": 0.3845, "step": 2730 }, { "epoch": 0.029275068112612854, "grad_norm": 10.947807312011719, "learning_rate": 9.999914638721634e-07, "loss": 0.3058, "step": 2740 }, { "epoch": 0.029381911426892462, "grad_norm": 25.774873733520508, "learning_rate": 9.999913654173455e-07, "loss": 0.3779, "step": 2750 }, { "epoch": 0.02948875474117207, "grad_norm": 10.66384220123291, "learning_rate": 9.999912663979999e-07, "loss": 0.3268, "step": 2760 }, { "epoch": 0.02959559805545168, "grad_norm": 12.778026580810547, "learning_rate": 9.999911668141262e-07, "loss": 0.2704, "step": 2770 }, { "epoch": 0.02970244136973129, "grad_norm": 14.738914489746094, "learning_rate": 9.999910666657246e-07, "loss": 0.2765, "step": 2780 }, { "epoch": 0.0298092846840109, "grad_norm": 15.872540473937988, "learning_rate": 9.999909659527957e-07, "loss": 0.2705, "step": 2790 }, { "epoch": 0.029916127998290507, "grad_norm": 8.931339263916016, "learning_rate": 9.999908646753392e-07, "loss": 0.27, "step": 2800 }, { "epoch": 0.030022971312570115, "grad_norm": 9.6753511428833, "learning_rate": 9.999907628333552e-07, "loss": 0.2557, "step": 2810 }, { "epoch": 0.030129814626849726, "grad_norm": 13.93957233428955, "learning_rate": 9.999906604268438e-07, "loss": 0.2339, "step": 2820 }, { "epoch": 0.030236657941129334, "grad_norm": 12.635860443115234, "learning_rate": 9.999905574558053e-07, "loss": 0.4326, "step": 2830 }, { "epoch": 0.030343501255408942, "grad_norm": 37.0772705078125, "learning_rate": 9.999904539202398e-07, "loss": 0.4187, "step": 2840 }, { "epoch": 0.03045034456968855, "grad_norm": 9.37900447845459, "learning_rate": 9.999903498201473e-07, "loss": 0.322, "step": 2850 }, { "epoch": 0.030557187883968162, "grad_norm": 21.39643096923828, "learning_rate": 9.999902451555278e-07, "loss": 0.2927, "step": 2860 }, { "epoch": 0.03066403119824777, "grad_norm": 8.879594802856445, "learning_rate": 9.999901399263818e-07, "loss": 0.2126, "step": 2870 }, { "epoch": 0.03077087451252738, "grad_norm": 19.445392608642578, "learning_rate": 9.99990034132709e-07, "loss": 0.3536, "step": 2880 }, { "epoch": 0.030877717826806986, "grad_norm": 17.53609848022461, "learning_rate": 9.9998992777451e-07, "loss": 0.2158, "step": 2890 }, { "epoch": 0.030984561141086598, "grad_norm": 11.909503936767578, "learning_rate": 9.999898208517843e-07, "loss": 0.3796, "step": 2900 }, { "epoch": 0.031091404455366206, "grad_norm": 7.718962669372559, "learning_rate": 9.999897133645325e-07, "loss": 0.2981, "step": 2910 }, { "epoch": 0.031198247769645814, "grad_norm": 17.693714141845703, "learning_rate": 9.999896053127545e-07, "loss": 0.469, "step": 2920 }, { "epoch": 0.03130509108392542, "grad_norm": 34.291778564453125, "learning_rate": 9.999894966964506e-07, "loss": 0.4611, "step": 2930 }, { "epoch": 0.031411934398205034, "grad_norm": 9.858500480651855, "learning_rate": 9.999893875156208e-07, "loss": 0.2744, "step": 2940 }, { "epoch": 0.03151877771248464, "grad_norm": 11.554060935974121, "learning_rate": 9.999892777702652e-07, "loss": 0.3058, "step": 2950 }, { "epoch": 0.03162562102676425, "grad_norm": 9.583712577819824, "learning_rate": 9.99989167460384e-07, "loss": 0.3065, "step": 2960 }, { "epoch": 0.03173246434104386, "grad_norm": 7.0807695388793945, "learning_rate": 9.999890565859772e-07, "loss": 0.3427, "step": 2970 }, { "epoch": 0.031839307655323466, "grad_norm": 6.813404560089111, "learning_rate": 9.999889451470453e-07, "loss": 0.2427, "step": 2980 }, { "epoch": 0.03194615096960308, "grad_norm": 10.091156959533691, "learning_rate": 9.99988833143588e-07, "loss": 0.344, "step": 2990 }, { "epoch": 0.03205299428388269, "grad_norm": 3.660794258117676, "learning_rate": 9.999887205756054e-07, "loss": 0.3294, "step": 3000 }, { "epoch": 0.032159837598162294, "grad_norm": 11.897393226623535, "learning_rate": 9.99988607443098e-07, "loss": 0.3382, "step": 3010 }, { "epoch": 0.032266680912441906, "grad_norm": 14.648067474365234, "learning_rate": 9.999884937460656e-07, "loss": 0.2641, "step": 3020 }, { "epoch": 0.03237352422672151, "grad_norm": 8.719098091125488, "learning_rate": 9.999883794845086e-07, "loss": 0.2405, "step": 3030 }, { "epoch": 0.03248036754100112, "grad_norm": 17.766204833984375, "learning_rate": 9.999882646584268e-07, "loss": 0.3343, "step": 3040 }, { "epoch": 0.032587210855280734, "grad_norm": 8.678058624267578, "learning_rate": 9.999881492678207e-07, "loss": 0.2575, "step": 3050 }, { "epoch": 0.03269405416956034, "grad_norm": 13.05073070526123, "learning_rate": 9.999880333126903e-07, "loss": 0.4026, "step": 3060 }, { "epoch": 0.03280089748383995, "grad_norm": 7.4086594581604, "learning_rate": 9.999879167930354e-07, "loss": 0.3292, "step": 3070 }, { "epoch": 0.032907740798119554, "grad_norm": 9.80783748626709, "learning_rate": 9.999877997088565e-07, "loss": 0.4183, "step": 3080 }, { "epoch": 0.033014584112399166, "grad_norm": 10.671646118164062, "learning_rate": 9.999876820601537e-07, "loss": 0.253, "step": 3090 }, { "epoch": 0.03312142742667878, "grad_norm": 11.983243942260742, "learning_rate": 9.999875638469271e-07, "loss": 0.3822, "step": 3100 }, { "epoch": 0.03322827074095838, "grad_norm": 12.386449813842773, "learning_rate": 9.999874450691768e-07, "loss": 0.2341, "step": 3110 }, { "epoch": 0.033335114055237994, "grad_norm": 13.09696102142334, "learning_rate": 9.99987325726903e-07, "loss": 0.2557, "step": 3120 }, { "epoch": 0.033441957369517605, "grad_norm": 14.389138221740723, "learning_rate": 9.999872058201056e-07, "loss": 0.2197, "step": 3130 }, { "epoch": 0.03354880068379721, "grad_norm": 23.044815063476562, "learning_rate": 9.999870853487849e-07, "loss": 0.3699, "step": 3140 }, { "epoch": 0.03365564399807682, "grad_norm": 14.282970428466797, "learning_rate": 9.99986964312941e-07, "loss": 0.3275, "step": 3150 }, { "epoch": 0.033762487312356426, "grad_norm": 9.054555892944336, "learning_rate": 9.999868427125744e-07, "loss": 0.2363, "step": 3160 }, { "epoch": 0.03386933062663604, "grad_norm": 17.947898864746094, "learning_rate": 9.999867205476846e-07, "loss": 0.3018, "step": 3170 }, { "epoch": 0.03397617394091565, "grad_norm": 4.897971153259277, "learning_rate": 9.999865978182723e-07, "loss": 0.3486, "step": 3180 }, { "epoch": 0.034083017255195254, "grad_norm": 11.35064697265625, "learning_rate": 9.999864745243371e-07, "loss": 0.3536, "step": 3190 }, { "epoch": 0.034189860569474866, "grad_norm": 13.849260330200195, "learning_rate": 9.999863506658797e-07, "loss": 0.3901, "step": 3200 }, { "epoch": 0.03429670388375448, "grad_norm": 9.394926071166992, "learning_rate": 9.999862262429e-07, "loss": 0.2578, "step": 3210 }, { "epoch": 0.03440354719803408, "grad_norm": 21.65723991394043, "learning_rate": 9.999861012553978e-07, "loss": 0.3296, "step": 3220 }, { "epoch": 0.03451039051231369, "grad_norm": 11.932403564453125, "learning_rate": 9.999859757033738e-07, "loss": 0.2632, "step": 3230 }, { "epoch": 0.0346172338265933, "grad_norm": 11.572576522827148, "learning_rate": 9.999858495868278e-07, "loss": 0.3099, "step": 3240 }, { "epoch": 0.03472407714087291, "grad_norm": 17.785783767700195, "learning_rate": 9.999857229057603e-07, "loss": 0.2372, "step": 3250 }, { "epoch": 0.03483092045515252, "grad_norm": 14.047842025756836, "learning_rate": 9.999855956601708e-07, "loss": 0.2327, "step": 3260 }, { "epoch": 0.034937763769432126, "grad_norm": 19.837182998657227, "learning_rate": 9.9998546785006e-07, "loss": 0.2196, "step": 3270 }, { "epoch": 0.03504460708371174, "grad_norm": 17.33720588684082, "learning_rate": 9.99985339475428e-07, "loss": 0.2658, "step": 3280 }, { "epoch": 0.03515145039799135, "grad_norm": 10.315780639648438, "learning_rate": 9.999852105362746e-07, "loss": 0.2458, "step": 3290 }, { "epoch": 0.035258293712270954, "grad_norm": 13.767029762268066, "learning_rate": 9.999850810326003e-07, "loss": 0.2512, "step": 3300 }, { "epoch": 0.035365137026550565, "grad_norm": 17.510679244995117, "learning_rate": 9.99984950964405e-07, "loss": 0.339, "step": 3310 }, { "epoch": 0.03547198034083017, "grad_norm": 9.884395599365234, "learning_rate": 9.999848203316892e-07, "loss": 0.338, "step": 3320 }, { "epoch": 0.03557882365510978, "grad_norm": 18.537569046020508, "learning_rate": 9.999846891344525e-07, "loss": 0.3145, "step": 3330 }, { "epoch": 0.03568566696938939, "grad_norm": 12.401773452758789, "learning_rate": 9.999845573726955e-07, "loss": 0.2454, "step": 3340 }, { "epoch": 0.035792510283669, "grad_norm": 7.351169109344482, "learning_rate": 9.999844250464181e-07, "loss": 0.2624, "step": 3350 }, { "epoch": 0.03589935359794861, "grad_norm": 9.387731552124023, "learning_rate": 9.999842921556206e-07, "loss": 0.1677, "step": 3360 }, { "epoch": 0.03600619691222822, "grad_norm": 16.223052978515625, "learning_rate": 9.99984158700303e-07, "loss": 0.2985, "step": 3370 }, { "epoch": 0.036113040226507825, "grad_norm": 14.552090644836426, "learning_rate": 9.999840246804658e-07, "loss": 0.2272, "step": 3380 }, { "epoch": 0.03621988354078744, "grad_norm": 7.894333362579346, "learning_rate": 9.999838900961088e-07, "loss": 0.205, "step": 3390 }, { "epoch": 0.03632672685506704, "grad_norm": 12.590490341186523, "learning_rate": 9.999837549472322e-07, "loss": 0.2196, "step": 3400 }, { "epoch": 0.03643357016934665, "grad_norm": 16.574974060058594, "learning_rate": 9.999836192338363e-07, "loss": 0.2875, "step": 3410 }, { "epoch": 0.036540413483626265, "grad_norm": 19.677106857299805, "learning_rate": 9.999834829559211e-07, "loss": 0.3756, "step": 3420 }, { "epoch": 0.03664725679790587, "grad_norm": 8.599081039428711, "learning_rate": 9.999833461134866e-07, "loss": 0.2775, "step": 3430 }, { "epoch": 0.03675410011218548, "grad_norm": 12.015551567077637, "learning_rate": 9.999832087065335e-07, "loss": 0.3374, "step": 3440 }, { "epoch": 0.036860943426465086, "grad_norm": 18.570045471191406, "learning_rate": 9.999830707350613e-07, "loss": 0.2426, "step": 3450 }, { "epoch": 0.0369677867407447, "grad_norm": 7.632505893707275, "learning_rate": 9.999829321990706e-07, "loss": 0.4247, "step": 3460 }, { "epoch": 0.03707463005502431, "grad_norm": 5.234110355377197, "learning_rate": 9.999827930985615e-07, "loss": 0.1953, "step": 3470 }, { "epoch": 0.03718147336930391, "grad_norm": 1.8524376153945923, "learning_rate": 9.999826534335344e-07, "loss": 0.2492, "step": 3480 }, { "epoch": 0.037288316683583525, "grad_norm": 25.5422420501709, "learning_rate": 9.999825132039888e-07, "loss": 0.2819, "step": 3490 }, { "epoch": 0.03739515999786314, "grad_norm": 13.516596794128418, "learning_rate": 9.999823724099252e-07, "loss": 0.2754, "step": 3500 }, { "epoch": 0.03750200331214274, "grad_norm": 19.081682205200195, "learning_rate": 9.999822310513437e-07, "loss": 0.2796, "step": 3510 }, { "epoch": 0.03760884662642235, "grad_norm": 20.443096160888672, "learning_rate": 9.999820891282447e-07, "loss": 0.3112, "step": 3520 }, { "epoch": 0.03771568994070196, "grad_norm": 7.085078239440918, "learning_rate": 9.99981946640628e-07, "loss": 0.2216, "step": 3530 }, { "epoch": 0.03782253325498157, "grad_norm": 15.721661567687988, "learning_rate": 9.999818035884943e-07, "loss": 0.2155, "step": 3540 }, { "epoch": 0.03792937656926118, "grad_norm": 22.326013565063477, "learning_rate": 9.99981659971843e-07, "loss": 0.3088, "step": 3550 }, { "epoch": 0.038036219883540785, "grad_norm": 9.21169376373291, "learning_rate": 9.99981515790675e-07, "loss": 0.2614, "step": 3560 }, { "epoch": 0.0381430631978204, "grad_norm": 23.570236206054688, "learning_rate": 9.999813710449898e-07, "loss": 0.312, "step": 3570 }, { "epoch": 0.03824990651210001, "grad_norm": 10.835984230041504, "learning_rate": 9.99981225734788e-07, "loss": 0.2356, "step": 3580 }, { "epoch": 0.03835674982637961, "grad_norm": 20.306486129760742, "learning_rate": 9.999810798600699e-07, "loss": 0.2099, "step": 3590 }, { "epoch": 0.038463593140659225, "grad_norm": 18.58904266357422, "learning_rate": 9.999809334208353e-07, "loss": 0.3537, "step": 3600 }, { "epoch": 0.03857043645493883, "grad_norm": 14.12850284576416, "learning_rate": 9.999807864170845e-07, "loss": 0.2423, "step": 3610 }, { "epoch": 0.03867727976921844, "grad_norm": 8.83126163482666, "learning_rate": 9.999806388488175e-07, "loss": 0.2091, "step": 3620 }, { "epoch": 0.03878412308349805, "grad_norm": 22.751623153686523, "learning_rate": 9.999804907160348e-07, "loss": 0.2601, "step": 3630 }, { "epoch": 0.03889096639777766, "grad_norm": 14.923648834228516, "learning_rate": 9.999803420187362e-07, "loss": 0.2851, "step": 3640 }, { "epoch": 0.03899780971205727, "grad_norm": 24.682941436767578, "learning_rate": 9.999801927569223e-07, "loss": 0.2156, "step": 3650 }, { "epoch": 0.03910465302633688, "grad_norm": 10.236119270324707, "learning_rate": 9.999800429305927e-07, "loss": 0.2399, "step": 3660 }, { "epoch": 0.039211496340616485, "grad_norm": 15.646685600280762, "learning_rate": 9.999798925397483e-07, "loss": 0.3145, "step": 3670 }, { "epoch": 0.039318339654896096, "grad_norm": 13.889656066894531, "learning_rate": 9.999797415843886e-07, "loss": 0.414, "step": 3680 }, { "epoch": 0.0394251829691757, "grad_norm": 11.73732852935791, "learning_rate": 9.99979590064514e-07, "loss": 0.1797, "step": 3690 }, { "epoch": 0.03953202628345531, "grad_norm": 7.486172676086426, "learning_rate": 9.99979437980125e-07, "loss": 0.3161, "step": 3700 }, { "epoch": 0.039638869597734924, "grad_norm": 14.738080978393555, "learning_rate": 9.999792853312213e-07, "loss": 0.182, "step": 3710 }, { "epoch": 0.03974571291201453, "grad_norm": 10.967604637145996, "learning_rate": 9.99979132117803e-07, "loss": 0.3004, "step": 3720 }, { "epoch": 0.03985255622629414, "grad_norm": 10.578365325927734, "learning_rate": 9.999789783398708e-07, "loss": 0.2052, "step": 3730 }, { "epoch": 0.03995939954057375, "grad_norm": 10.975369453430176, "learning_rate": 9.999788239974248e-07, "loss": 0.2218, "step": 3740 }, { "epoch": 0.04006624285485336, "grad_norm": 29.775510787963867, "learning_rate": 9.999786690904646e-07, "loss": 0.2847, "step": 3750 }, { "epoch": 0.04017308616913297, "grad_norm": 19.645702362060547, "learning_rate": 9.99978513618991e-07, "loss": 0.2579, "step": 3760 }, { "epoch": 0.04027992948341257, "grad_norm": 7.35372257232666, "learning_rate": 9.999783575830038e-07, "loss": 0.2155, "step": 3770 }, { "epoch": 0.040386772797692184, "grad_norm": 11.742916107177734, "learning_rate": 9.999782009825032e-07, "loss": 0.19, "step": 3780 }, { "epoch": 0.040493616111971796, "grad_norm": 5.183712005615234, "learning_rate": 9.999780438174895e-07, "loss": 0.2481, "step": 3790 }, { "epoch": 0.0406004594262514, "grad_norm": 9.5199556350708, "learning_rate": 9.99977886087963e-07, "loss": 0.3391, "step": 3800 }, { "epoch": 0.04070730274053101, "grad_norm": 15.931138038635254, "learning_rate": 9.999777277939237e-07, "loss": 0.2886, "step": 3810 }, { "epoch": 0.04081414605481062, "grad_norm": 7.652268409729004, "learning_rate": 9.999775689353718e-07, "loss": 0.1544, "step": 3820 }, { "epoch": 0.04092098936909023, "grad_norm": 14.74348258972168, "learning_rate": 9.999774095123074e-07, "loss": 0.2536, "step": 3830 }, { "epoch": 0.04102783268336984, "grad_norm": 13.0457124710083, "learning_rate": 9.999772495247309e-07, "loss": 0.2333, "step": 3840 }, { "epoch": 0.041134675997649445, "grad_norm": 19.717552185058594, "learning_rate": 9.999770889726423e-07, "loss": 0.2479, "step": 3850 }, { "epoch": 0.041241519311929056, "grad_norm": 8.157952308654785, "learning_rate": 9.99976927856042e-07, "loss": 0.1754, "step": 3860 }, { "epoch": 0.04134836262620867, "grad_norm": 16.152061462402344, "learning_rate": 9.999767661749296e-07, "loss": 0.2505, "step": 3870 }, { "epoch": 0.04145520594048827, "grad_norm": 13.8950777053833, "learning_rate": 9.99976603929306e-07, "loss": 0.2195, "step": 3880 }, { "epoch": 0.041562049254767884, "grad_norm": 5.213844299316406, "learning_rate": 9.999764411191711e-07, "loss": 0.1387, "step": 3890 }, { "epoch": 0.04166889256904749, "grad_norm": 11.473280906677246, "learning_rate": 9.999762777445248e-07, "loss": 0.2287, "step": 3900 }, { "epoch": 0.0417757358833271, "grad_norm": 7.0992302894592285, "learning_rate": 9.999761138053678e-07, "loss": 0.3337, "step": 3910 }, { "epoch": 0.04188257919760671, "grad_norm": 5.561223983764648, "learning_rate": 9.999759493016999e-07, "loss": 0.2578, "step": 3920 }, { "epoch": 0.041989422511886317, "grad_norm": 18.11592674255371, "learning_rate": 9.999757842335217e-07, "loss": 0.2579, "step": 3930 }, { "epoch": 0.04209626582616593, "grad_norm": 9.725078582763672, "learning_rate": 9.999756186008329e-07, "loss": 0.2117, "step": 3940 }, { "epoch": 0.04220310914044554, "grad_norm": 11.440482139587402, "learning_rate": 9.999754524036337e-07, "loss": 0.2481, "step": 3950 }, { "epoch": 0.042309952454725144, "grad_norm": 15.330198287963867, "learning_rate": 9.999752856419247e-07, "loss": 0.293, "step": 3960 }, { "epoch": 0.042416795769004756, "grad_norm": 6.955080032348633, "learning_rate": 9.999751183157058e-07, "loss": 0.2353, "step": 3970 }, { "epoch": 0.04252363908328436, "grad_norm": 13.616303443908691, "learning_rate": 9.999749504249773e-07, "loss": 0.1974, "step": 3980 }, { "epoch": 0.04263048239756397, "grad_norm": 8.394429206848145, "learning_rate": 9.999747819697393e-07, "loss": 0.2025, "step": 3990 }, { "epoch": 0.042737325711843584, "grad_norm": 28.451749801635742, "learning_rate": 9.99974612949992e-07, "loss": 0.2805, "step": 4000 }, { "epoch": 0.04284416902612319, "grad_norm": 8.71789836883545, "learning_rate": 9.999744433657358e-07, "loss": 0.2086, "step": 4010 }, { "epoch": 0.0429510123404028, "grad_norm": 6.177505016326904, "learning_rate": 9.999742732169706e-07, "loss": 0.2453, "step": 4020 }, { "epoch": 0.04305785565468241, "grad_norm": 9.878747940063477, "learning_rate": 9.999741025036966e-07, "loss": 0.2159, "step": 4030 }, { "epoch": 0.043164698968962016, "grad_norm": 19.86847496032715, "learning_rate": 9.999739312259144e-07, "loss": 0.1853, "step": 4040 }, { "epoch": 0.04327154228324163, "grad_norm": 20.704540252685547, "learning_rate": 9.999737593836235e-07, "loss": 0.2231, "step": 4050 }, { "epoch": 0.04337838559752123, "grad_norm": 6.956898212432861, "learning_rate": 9.999735869768247e-07, "loss": 0.1538, "step": 4060 }, { "epoch": 0.043485228911800844, "grad_norm": 14.967305183410645, "learning_rate": 9.99973414005518e-07, "loss": 0.2635, "step": 4070 }, { "epoch": 0.043592072226080456, "grad_norm": 7.705904960632324, "learning_rate": 9.999732404697036e-07, "loss": 0.1994, "step": 4080 }, { "epoch": 0.04369891554036006, "grad_norm": 13.439447402954102, "learning_rate": 9.999730663693816e-07, "loss": 0.2218, "step": 4090 }, { "epoch": 0.04380575885463967, "grad_norm": 11.895673751831055, "learning_rate": 9.999728917045525e-07, "loss": 0.2385, "step": 4100 }, { "epoch": 0.04391260216891928, "grad_norm": 9.873332977294922, "learning_rate": 9.99972716475216e-07, "loss": 0.2076, "step": 4110 }, { "epoch": 0.04401944548319889, "grad_norm": 7.853028774261475, "learning_rate": 9.999725406813727e-07, "loss": 0.1675, "step": 4120 }, { "epoch": 0.0441262887974785, "grad_norm": 12.848979949951172, "learning_rate": 9.999723643230227e-07, "loss": 0.2379, "step": 4130 }, { "epoch": 0.044233132111758104, "grad_norm": 15.295011520385742, "learning_rate": 9.99972187400166e-07, "loss": 0.3474, "step": 4140 }, { "epoch": 0.044339975426037716, "grad_norm": 8.90441608428955, "learning_rate": 9.99972009912803e-07, "loss": 0.2614, "step": 4150 }, { "epoch": 0.04444681874031733, "grad_norm": 23.299819946289062, "learning_rate": 9.99971831860934e-07, "loss": 0.3707, "step": 4160 }, { "epoch": 0.04455366205459693, "grad_norm": 12.560900688171387, "learning_rate": 9.99971653244559e-07, "loss": 0.3106, "step": 4170 }, { "epoch": 0.044660505368876544, "grad_norm": 10.945021629333496, "learning_rate": 9.999714740636783e-07, "loss": 0.2744, "step": 4180 }, { "epoch": 0.04476734868315615, "grad_norm": 9.581548690795898, "learning_rate": 9.99971294318292e-07, "loss": 0.2368, "step": 4190 }, { "epoch": 0.04487419199743576, "grad_norm": 22.930072784423828, "learning_rate": 9.999711140084004e-07, "loss": 0.2688, "step": 4200 }, { "epoch": 0.04498103531171537, "grad_norm": 23.16007423400879, "learning_rate": 9.999709331340037e-07, "loss": 0.2129, "step": 4210 }, { "epoch": 0.045087878625994976, "grad_norm": 7.0545830726623535, "learning_rate": 9.99970751695102e-07, "loss": 0.2455, "step": 4220 }, { "epoch": 0.04519472194027459, "grad_norm": 20.041959762573242, "learning_rate": 9.999705696916956e-07, "loss": 0.2259, "step": 4230 }, { "epoch": 0.0453015652545542, "grad_norm": 7.177011966705322, "learning_rate": 9.999703871237848e-07, "loss": 0.2736, "step": 4240 }, { "epoch": 0.045408408568833804, "grad_norm": 8.863842964172363, "learning_rate": 9.999702039913697e-07, "loss": 0.1539, "step": 4250 }, { "epoch": 0.045515251883113415, "grad_norm": 6.705515384674072, "learning_rate": 9.999700202944505e-07, "loss": 0.1893, "step": 4260 }, { "epoch": 0.04562209519739302, "grad_norm": 8.0303316116333, "learning_rate": 9.999698360330273e-07, "loss": 0.2982, "step": 4270 }, { "epoch": 0.04572893851167263, "grad_norm": 15.525993347167969, "learning_rate": 9.999696512071005e-07, "loss": 0.2231, "step": 4280 }, { "epoch": 0.04583578182595224, "grad_norm": 12.259110450744629, "learning_rate": 9.999694658166702e-07, "loss": 0.3073, "step": 4290 }, { "epoch": 0.04594262514023185, "grad_norm": 13.247410774230957, "learning_rate": 9.999692798617366e-07, "loss": 0.1877, "step": 4300 }, { "epoch": 0.04604946845451146, "grad_norm": 18.07620620727539, "learning_rate": 9.999690933423e-07, "loss": 0.2769, "step": 4310 }, { "epoch": 0.04615631176879107, "grad_norm": 7.270727157592773, "learning_rate": 9.999689062583605e-07, "loss": 0.2395, "step": 4320 }, { "epoch": 0.046263155083070676, "grad_norm": 15.304471969604492, "learning_rate": 9.999687186099185e-07, "loss": 0.2292, "step": 4330 }, { "epoch": 0.04636999839735029, "grad_norm": 3.871476888656616, "learning_rate": 9.99968530396974e-07, "loss": 0.1724, "step": 4340 }, { "epoch": 0.04647684171162989, "grad_norm": 22.166658401489258, "learning_rate": 9.999683416195273e-07, "loss": 0.2086, "step": 4350 }, { "epoch": 0.0465836850259095, "grad_norm": 5.898710250854492, "learning_rate": 9.999681522775787e-07, "loss": 0.2378, "step": 4360 }, { "epoch": 0.046690528340189115, "grad_norm": 10.826058387756348, "learning_rate": 9.999679623711281e-07, "loss": 0.1839, "step": 4370 }, { "epoch": 0.04679737165446872, "grad_norm": 16.379108428955078, "learning_rate": 9.999677719001763e-07, "loss": 0.213, "step": 4380 }, { "epoch": 0.04690421496874833, "grad_norm": 5.566311359405518, "learning_rate": 9.999675808647227e-07, "loss": 0.1846, "step": 4390 }, { "epoch": 0.04701105828302794, "grad_norm": 6.090289115905762, "learning_rate": 9.999673892647682e-07, "loss": 0.2405, "step": 4400 }, { "epoch": 0.04711790159730755, "grad_norm": 7.1293745040893555, "learning_rate": 9.99967197100313e-07, "loss": 0.2109, "step": 4410 }, { "epoch": 0.04722474491158716, "grad_norm": 22.48902702331543, "learning_rate": 9.999670043713567e-07, "loss": 0.2391, "step": 4420 }, { "epoch": 0.047331588225866764, "grad_norm": 23.489065170288086, "learning_rate": 9.999668110779e-07, "loss": 0.2778, "step": 4430 }, { "epoch": 0.047438431540146375, "grad_norm": 7.930143356323242, "learning_rate": 9.999666172199432e-07, "loss": 0.1645, "step": 4440 }, { "epoch": 0.04754527485442599, "grad_norm": 9.40731143951416, "learning_rate": 9.999664227974863e-07, "loss": 0.2069, "step": 4450 }, { "epoch": 0.04765211816870559, "grad_norm": 19.922691345214844, "learning_rate": 9.999662278105296e-07, "loss": 0.2033, "step": 4460 }, { "epoch": 0.0477589614829852, "grad_norm": 6.7075114250183105, "learning_rate": 9.999660322590733e-07, "loss": 0.1674, "step": 4470 }, { "epoch": 0.047865804797264815, "grad_norm": 27.423946380615234, "learning_rate": 9.999658361431176e-07, "loss": 0.2725, "step": 4480 }, { "epoch": 0.04797264811154442, "grad_norm": 13.639397621154785, "learning_rate": 9.999656394626625e-07, "loss": 0.1935, "step": 4490 }, { "epoch": 0.04807949142582403, "grad_norm": 14.249063491821289, "learning_rate": 9.999654422177088e-07, "loss": 0.2916, "step": 4500 }, { "epoch": 0.048186334740103635, "grad_norm": 9.038057327270508, "learning_rate": 9.999652444082562e-07, "loss": 0.199, "step": 4510 }, { "epoch": 0.04829317805438325, "grad_norm": 10.663360595703125, "learning_rate": 9.999650460343053e-07, "loss": 0.1678, "step": 4520 }, { "epoch": 0.04840002136866286, "grad_norm": 19.173704147338867, "learning_rate": 9.999648470958558e-07, "loss": 0.1749, "step": 4530 }, { "epoch": 0.04850686468294246, "grad_norm": 12.786581039428711, "learning_rate": 9.999646475929084e-07, "loss": 0.3588, "step": 4540 }, { "epoch": 0.048613707997222075, "grad_norm": 16.60015296936035, "learning_rate": 9.999644475254633e-07, "loss": 0.2663, "step": 4550 }, { "epoch": 0.04872055131150168, "grad_norm": 9.13940715789795, "learning_rate": 9.999642468935205e-07, "loss": 0.2317, "step": 4560 }, { "epoch": 0.04882739462578129, "grad_norm": 9.691901206970215, "learning_rate": 9.999640456970805e-07, "loss": 0.2324, "step": 4570 }, { "epoch": 0.0489342379400609, "grad_norm": 15.278684616088867, "learning_rate": 9.999638439361431e-07, "loss": 0.2578, "step": 4580 }, { "epoch": 0.04904108125434051, "grad_norm": 7.386744499206543, "learning_rate": 9.99963641610709e-07, "loss": 0.2239, "step": 4590 }, { "epoch": 0.04914792456862012, "grad_norm": 19.595003128051758, "learning_rate": 9.999634387207781e-07, "loss": 0.2623, "step": 4600 }, { "epoch": 0.04925476788289973, "grad_norm": 15.502608299255371, "learning_rate": 9.999632352663507e-07, "loss": 0.2664, "step": 4610 }, { "epoch": 0.049361611197179335, "grad_norm": 12.146615982055664, "learning_rate": 9.99963031247427e-07, "loss": 0.1496, "step": 4620 }, { "epoch": 0.04946845451145895, "grad_norm": 9.131296157836914, "learning_rate": 9.999628266640075e-07, "loss": 0.2731, "step": 4630 }, { "epoch": 0.04957529782573855, "grad_norm": 6.904205322265625, "learning_rate": 9.99962621516092e-07, "loss": 0.2166, "step": 4640 }, { "epoch": 0.04968214114001816, "grad_norm": 9.370551109313965, "learning_rate": 9.999624158036813e-07, "loss": 0.1579, "step": 4650 }, { "epoch": 0.049788984454297774, "grad_norm": 11.68946647644043, "learning_rate": 9.99962209526775e-07, "loss": 0.1941, "step": 4660 }, { "epoch": 0.04989582776857738, "grad_norm": 11.660593032836914, "learning_rate": 9.99962002685374e-07, "loss": 0.2008, "step": 4670 }, { "epoch": 0.05000267108285699, "grad_norm": 20.267566680908203, "learning_rate": 9.999617952794778e-07, "loss": 0.2349, "step": 4680 }, { "epoch": 0.0501095143971366, "grad_norm": 4.1346235275268555, "learning_rate": 9.99961587309087e-07, "loss": 0.1977, "step": 4690 }, { "epoch": 0.05021635771141621, "grad_norm": 9.610401153564453, "learning_rate": 9.999613787742021e-07, "loss": 0.2221, "step": 4700 }, { "epoch": 0.05032320102569582, "grad_norm": 23.02598762512207, "learning_rate": 9.99961169674823e-07, "loss": 0.1759, "step": 4710 }, { "epoch": 0.05043004433997542, "grad_norm": 5.9471306800842285, "learning_rate": 9.999609600109498e-07, "loss": 0.1952, "step": 4720 }, { "epoch": 0.050536887654255035, "grad_norm": 9.397197723388672, "learning_rate": 9.99960749782583e-07, "loss": 0.2014, "step": 4730 }, { "epoch": 0.050643730968534646, "grad_norm": 6.339483737945557, "learning_rate": 9.99960538989723e-07, "loss": 0.2544, "step": 4740 }, { "epoch": 0.05075057428281425, "grad_norm": 62.45442581176758, "learning_rate": 9.999603276323695e-07, "loss": 0.2031, "step": 4750 }, { "epoch": 0.05085741759709386, "grad_norm": 16.126590728759766, "learning_rate": 9.999601157105232e-07, "loss": 0.2883, "step": 4760 }, { "epoch": 0.050964260911373474, "grad_norm": 18.117076873779297, "learning_rate": 9.999599032241843e-07, "loss": 0.2561, "step": 4770 }, { "epoch": 0.05107110422565308, "grad_norm": 11.021537780761719, "learning_rate": 9.99959690173353e-07, "loss": 0.2947, "step": 4780 }, { "epoch": 0.05117794753993269, "grad_norm": 6.165689468383789, "learning_rate": 9.999594765580292e-07, "loss": 0.1871, "step": 4790 }, { "epoch": 0.051284790854212295, "grad_norm": 13.0767822265625, "learning_rate": 9.999592623782135e-07, "loss": 0.1915, "step": 4800 }, { "epoch": 0.051391634168491906, "grad_norm": 8.887067794799805, "learning_rate": 9.99959047633906e-07, "loss": 0.1958, "step": 4810 }, { "epoch": 0.05149847748277152, "grad_norm": 7.286264896392822, "learning_rate": 9.999588323251075e-07, "loss": 0.2165, "step": 4820 }, { "epoch": 0.05160532079705112, "grad_norm": 12.059643745422363, "learning_rate": 9.999586164518173e-07, "loss": 0.2122, "step": 4830 }, { "epoch": 0.051712164111330734, "grad_norm": 8.017216682434082, "learning_rate": 9.99958400014036e-07, "loss": 0.1828, "step": 4840 }, { "epoch": 0.051819007425610346, "grad_norm": 16.11429214477539, "learning_rate": 9.999581830117642e-07, "loss": 0.2461, "step": 4850 }, { "epoch": 0.05192585073988995, "grad_norm": 11.35747241973877, "learning_rate": 9.999579654450017e-07, "loss": 0.3967, "step": 4860 }, { "epoch": 0.05203269405416956, "grad_norm": 3.1575634479522705, "learning_rate": 9.99957747313749e-07, "loss": 0.2112, "step": 4870 }, { "epoch": 0.05213953736844917, "grad_norm": 10.682443618774414, "learning_rate": 9.999575286180063e-07, "loss": 0.1384, "step": 4880 }, { "epoch": 0.05224638068272878, "grad_norm": 8.626518249511719, "learning_rate": 9.999573093577736e-07, "loss": 0.1373, "step": 4890 }, { "epoch": 0.05235322399700839, "grad_norm": 5.9906439781188965, "learning_rate": 9.999570895330516e-07, "loss": 0.2066, "step": 4900 }, { "epoch": 0.052460067311287994, "grad_norm": 5.5821919441223145, "learning_rate": 9.999568691438403e-07, "loss": 0.2434, "step": 4910 }, { "epoch": 0.052566910625567606, "grad_norm": 10.340855598449707, "learning_rate": 9.999566481901399e-07, "loss": 0.2274, "step": 4920 }, { "epoch": 0.05267375393984721, "grad_norm": 10.682199478149414, "learning_rate": 9.999564266719507e-07, "loss": 0.2512, "step": 4930 }, { "epoch": 0.05278059725412682, "grad_norm": 8.445542335510254, "learning_rate": 9.999562045892729e-07, "loss": 0.1556, "step": 4940 }, { "epoch": 0.052887440568406434, "grad_norm": 2.430833101272583, "learning_rate": 9.99955981942107e-07, "loss": 0.1482, "step": 4950 }, { "epoch": 0.05299428388268604, "grad_norm": 18.8692569732666, "learning_rate": 9.999557587304528e-07, "loss": 0.2348, "step": 4960 }, { "epoch": 0.05310112719696565, "grad_norm": 7.013325214385986, "learning_rate": 9.999555349543112e-07, "loss": 0.2043, "step": 4970 }, { "epoch": 0.05320797051124526, "grad_norm": 15.100579261779785, "learning_rate": 9.999553106136818e-07, "loss": 0.2928, "step": 4980 }, { "epoch": 0.053314813825524866, "grad_norm": 15.436379432678223, "learning_rate": 9.99955085708565e-07, "loss": 0.1886, "step": 4990 }, { "epoch": 0.05342165713980448, "grad_norm": 7.473067760467529, "learning_rate": 9.999548602389614e-07, "loss": 0.2162, "step": 5000 }, { "epoch": 0.05352850045408408, "grad_norm": 13.78758430480957, "learning_rate": 9.99954634204871e-07, "loss": 0.252, "step": 5010 }, { "epoch": 0.053635343768363694, "grad_norm": 18.302783966064453, "learning_rate": 9.99954407606294e-07, "loss": 0.289, "step": 5020 }, { "epoch": 0.053742187082643306, "grad_norm": 5.95299768447876, "learning_rate": 9.999541804432307e-07, "loss": 0.1834, "step": 5030 }, { "epoch": 0.05384903039692291, "grad_norm": 26.51877784729004, "learning_rate": 9.999539527156814e-07, "loss": 0.2203, "step": 5040 }, { "epoch": 0.05395587371120252, "grad_norm": 14.122089385986328, "learning_rate": 9.999537244236467e-07, "loss": 0.1783, "step": 5050 }, { "epoch": 0.054062717025482133, "grad_norm": 15.364194869995117, "learning_rate": 9.999534955671262e-07, "loss": 0.3117, "step": 5060 }, { "epoch": 0.05416956033976174, "grad_norm": 13.347944259643555, "learning_rate": 9.999532661461204e-07, "loss": 0.2992, "step": 5070 }, { "epoch": 0.05427640365404135, "grad_norm": 5.314188480377197, "learning_rate": 9.9995303616063e-07, "loss": 0.1841, "step": 5080 }, { "epoch": 0.054383246968320954, "grad_norm": 11.561779022216797, "learning_rate": 9.999528056106546e-07, "loss": 0.2971, "step": 5090 }, { "epoch": 0.054490090282600566, "grad_norm": 4.737578392028809, "learning_rate": 9.999525744961947e-07, "loss": 0.1818, "step": 5100 }, { "epoch": 0.05459693359688018, "grad_norm": 6.85491418838501, "learning_rate": 9.999523428172507e-07, "loss": 0.2491, "step": 5110 }, { "epoch": 0.05470377691115978, "grad_norm": 16.26641273498535, "learning_rate": 9.999521105738228e-07, "loss": 0.1909, "step": 5120 }, { "epoch": 0.054810620225439394, "grad_norm": 8.77699089050293, "learning_rate": 9.99951877765911e-07, "loss": 0.1636, "step": 5130 }, { "epoch": 0.054917463539719005, "grad_norm": 18.471126556396484, "learning_rate": 9.999516443935161e-07, "loss": 0.2697, "step": 5140 }, { "epoch": 0.05502430685399861, "grad_norm": 11.392878532409668, "learning_rate": 9.99951410456638e-07, "loss": 0.2898, "step": 5150 }, { "epoch": 0.05513115016827822, "grad_norm": 11.108060836791992, "learning_rate": 9.99951175955277e-07, "loss": 0.196, "step": 5160 }, { "epoch": 0.055237993482557826, "grad_norm": 19.535079956054688, "learning_rate": 9.999509408894334e-07, "loss": 0.1157, "step": 5170 }, { "epoch": 0.05534483679683744, "grad_norm": 7.110168933868408, "learning_rate": 9.999507052591073e-07, "loss": 0.2004, "step": 5180 }, { "epoch": 0.05545168011111705, "grad_norm": 8.139898300170898, "learning_rate": 9.999504690642993e-07, "loss": 0.1552, "step": 5190 }, { "epoch": 0.055558523425396654, "grad_norm": 12.095194816589355, "learning_rate": 9.999502323050096e-07, "loss": 0.1808, "step": 5200 }, { "epoch": 0.055665366739676266, "grad_norm": 9.669289588928223, "learning_rate": 9.99949994981238e-07, "loss": 0.1843, "step": 5210 }, { "epoch": 0.05577221005395588, "grad_norm": 9.777056694030762, "learning_rate": 9.999497570929853e-07, "loss": 0.2047, "step": 5220 }, { "epoch": 0.05587905336823548, "grad_norm": 10.718254089355469, "learning_rate": 9.999495186402517e-07, "loss": 0.2521, "step": 5230 }, { "epoch": 0.05598589668251509, "grad_norm": 14.618975639343262, "learning_rate": 9.999492796230373e-07, "loss": 0.2304, "step": 5240 }, { "epoch": 0.0560927399967947, "grad_norm": 7.044897079467773, "learning_rate": 9.999490400413424e-07, "loss": 0.2266, "step": 5250 }, { "epoch": 0.05619958331107431, "grad_norm": 9.298372268676758, "learning_rate": 9.999487998951673e-07, "loss": 0.1578, "step": 5260 }, { "epoch": 0.05630642662535392, "grad_norm": 11.20351505279541, "learning_rate": 9.999485591845122e-07, "loss": 0.256, "step": 5270 }, { "epoch": 0.056413269939633526, "grad_norm": 3.2677834033966064, "learning_rate": 9.999483179093775e-07, "loss": 0.2025, "step": 5280 }, { "epoch": 0.05652011325391314, "grad_norm": 8.638566970825195, "learning_rate": 9.999480760697634e-07, "loss": 0.2979, "step": 5290 }, { "epoch": 0.05662695656819274, "grad_norm": 6.7062087059021, "learning_rate": 9.999478336656702e-07, "loss": 0.1481, "step": 5300 }, { "epoch": 0.056733799882472354, "grad_norm": 13.070672988891602, "learning_rate": 9.999475906970982e-07, "loss": 0.1991, "step": 5310 }, { "epoch": 0.056840643196751965, "grad_norm": 19.11290740966797, "learning_rate": 9.999473471640477e-07, "loss": 0.2282, "step": 5320 }, { "epoch": 0.05694748651103157, "grad_norm": 8.875261306762695, "learning_rate": 9.999471030665188e-07, "loss": 0.1867, "step": 5330 }, { "epoch": 0.05705432982531118, "grad_norm": 14.288536071777344, "learning_rate": 9.99946858404512e-07, "loss": 0.1907, "step": 5340 }, { "epoch": 0.05716117313959079, "grad_norm": 20.174917221069336, "learning_rate": 9.999466131780272e-07, "loss": 0.2089, "step": 5350 }, { "epoch": 0.0572680164538704, "grad_norm": 3.861151695251465, "learning_rate": 9.999463673870652e-07, "loss": 0.3043, "step": 5360 }, { "epoch": 0.05737485976815001, "grad_norm": 16.595224380493164, "learning_rate": 9.99946121031626e-07, "loss": 0.2419, "step": 5370 }, { "epoch": 0.057481703082429614, "grad_norm": 4.877514839172363, "learning_rate": 9.999458741117098e-07, "loss": 0.2347, "step": 5380 }, { "epoch": 0.057588546396709225, "grad_norm": 10.695318222045898, "learning_rate": 9.99945626627317e-07, "loss": 0.1419, "step": 5390 }, { "epoch": 0.05769538971098884, "grad_norm": 7.145232200622559, "learning_rate": 9.99945378578448e-07, "loss": 0.2257, "step": 5400 }, { "epoch": 0.05780223302526844, "grad_norm": 5.129803657531738, "learning_rate": 9.999451299651026e-07, "loss": 0.145, "step": 5410 }, { "epoch": 0.05790907633954805, "grad_norm": 10.803520202636719, "learning_rate": 9.999448807872817e-07, "loss": 0.1676, "step": 5420 }, { "epoch": 0.058015919653827665, "grad_norm": 7.378076553344727, "learning_rate": 9.999446310449852e-07, "loss": 0.2892, "step": 5430 }, { "epoch": 0.05812276296810727, "grad_norm": 11.625714302062988, "learning_rate": 9.999443807382136e-07, "loss": 0.2269, "step": 5440 }, { "epoch": 0.05822960628238688, "grad_norm": 3.2761993408203125, "learning_rate": 9.999441298669669e-07, "loss": 0.2119, "step": 5450 }, { "epoch": 0.058336449596666486, "grad_norm": 8.6680269241333, "learning_rate": 9.999438784312455e-07, "loss": 0.2619, "step": 5460 }, { "epoch": 0.0584432929109461, "grad_norm": 10.887924194335938, "learning_rate": 9.9994362643105e-07, "loss": 0.1919, "step": 5470 }, { "epoch": 0.05855013622522571, "grad_norm": 10.655946731567383, "learning_rate": 9.999433738663803e-07, "loss": 0.1751, "step": 5480 }, { "epoch": 0.05865697953950531, "grad_norm": 8.049858093261719, "learning_rate": 9.999431207372366e-07, "loss": 0.1615, "step": 5490 }, { "epoch": 0.058763822853784925, "grad_norm": 21.363178253173828, "learning_rate": 9.999428670436196e-07, "loss": 0.2092, "step": 5500 }, { "epoch": 0.05887066616806454, "grad_norm": 10.402629852294922, "learning_rate": 9.999426127855292e-07, "loss": 0.3071, "step": 5510 }, { "epoch": 0.05897750948234414, "grad_norm": 8.299538612365723, "learning_rate": 9.99942357962966e-07, "loss": 0.1224, "step": 5520 }, { "epoch": 0.05908435279662375, "grad_norm": 16.648521423339844, "learning_rate": 9.9994210257593e-07, "loss": 0.3099, "step": 5530 }, { "epoch": 0.05919119611090336, "grad_norm": 5.906061172485352, "learning_rate": 9.999418466244218e-07, "loss": 0.1066, "step": 5540 }, { "epoch": 0.05929803942518297, "grad_norm": 5.711528778076172, "learning_rate": 9.999415901084416e-07, "loss": 0.1598, "step": 5550 }, { "epoch": 0.05940488273946258, "grad_norm": 12.607137680053711, "learning_rate": 9.999413330279894e-07, "loss": 0.2071, "step": 5560 }, { "epoch": 0.059511726053742185, "grad_norm": 6.531956195831299, "learning_rate": 9.999410753830658e-07, "loss": 0.2373, "step": 5570 }, { "epoch": 0.0596185693680218, "grad_norm": 7.07895040512085, "learning_rate": 9.999408171736709e-07, "loss": 0.214, "step": 5580 }, { "epoch": 0.05972541268230141, "grad_norm": 11.290973663330078, "learning_rate": 9.999405583998052e-07, "loss": 0.1639, "step": 5590 }, { "epoch": 0.05983225599658101, "grad_norm": 7.284054756164551, "learning_rate": 9.999402990614689e-07, "loss": 0.1812, "step": 5600 }, { "epoch": 0.059939099310860625, "grad_norm": 1.312965989112854, "learning_rate": 9.99940039158662e-07, "loss": 0.1415, "step": 5610 }, { "epoch": 0.06004594262514023, "grad_norm": 13.831145286560059, "learning_rate": 9.999397786913854e-07, "loss": 0.1676, "step": 5620 }, { "epoch": 0.06015278593941984, "grad_norm": 30.47917366027832, "learning_rate": 9.999395176596388e-07, "loss": 0.2234, "step": 5630 }, { "epoch": 0.06025962925369945, "grad_norm": 7.701778888702393, "learning_rate": 9.99939256063423e-07, "loss": 0.1321, "step": 5640 }, { "epoch": 0.06036647256797906, "grad_norm": 4.588547229766846, "learning_rate": 9.99938993902738e-07, "loss": 0.2175, "step": 5650 }, { "epoch": 0.06047331588225867, "grad_norm": 4.881935119628906, "learning_rate": 9.99938731177584e-07, "loss": 0.1988, "step": 5660 }, { "epoch": 0.06058015919653827, "grad_norm": 9.763856887817383, "learning_rate": 9.999384678879614e-07, "loss": 0.2573, "step": 5670 }, { "epoch": 0.060687002510817885, "grad_norm": 4.630614280700684, "learning_rate": 9.999382040338708e-07, "loss": 0.2167, "step": 5680 }, { "epoch": 0.060793845825097496, "grad_norm": 5.215827941894531, "learning_rate": 9.99937939615312e-07, "loss": 0.1232, "step": 5690 }, { "epoch": 0.0609006891393771, "grad_norm": 6.995580196380615, "learning_rate": 9.999376746322856e-07, "loss": 0.2181, "step": 5700 }, { "epoch": 0.06100753245365671, "grad_norm": 6.486918926239014, "learning_rate": 9.999374090847918e-07, "loss": 0.1386, "step": 5710 }, { "epoch": 0.061114375767936324, "grad_norm": 11.828004837036133, "learning_rate": 9.99937142972831e-07, "loss": 0.2567, "step": 5720 }, { "epoch": 0.06122121908221593, "grad_norm": 28.82221221923828, "learning_rate": 9.999368762964036e-07, "loss": 0.2292, "step": 5730 }, { "epoch": 0.06132806239649554, "grad_norm": 4.990237712860107, "learning_rate": 9.999366090555095e-07, "loss": 0.1631, "step": 5740 }, { "epoch": 0.061434905710775145, "grad_norm": 7.940027236938477, "learning_rate": 9.999363412501491e-07, "loss": 0.1824, "step": 5750 }, { "epoch": 0.06154174902505476, "grad_norm": 8.283963203430176, "learning_rate": 9.999360728803232e-07, "loss": 0.2393, "step": 5760 }, { "epoch": 0.06164859233933437, "grad_norm": 17.214176177978516, "learning_rate": 9.999358039460315e-07, "loss": 0.1138, "step": 5770 }, { "epoch": 0.06175543565361397, "grad_norm": 17.697834014892578, "learning_rate": 9.999355344472746e-07, "loss": 0.1996, "step": 5780 }, { "epoch": 0.061862278967893584, "grad_norm": 2.5064120292663574, "learning_rate": 9.999352643840528e-07, "loss": 0.2275, "step": 5790 }, { "epoch": 0.061969122282173196, "grad_norm": 9.746196746826172, "learning_rate": 9.999349937563663e-07, "loss": 0.1445, "step": 5800 }, { "epoch": 0.0620759655964528, "grad_norm": 7.51751708984375, "learning_rate": 9.999347225642153e-07, "loss": 0.3302, "step": 5810 }, { "epoch": 0.06218280891073241, "grad_norm": 8.64585018157959, "learning_rate": 9.999344508076006e-07, "loss": 0.0999, "step": 5820 }, { "epoch": 0.06228965222501202, "grad_norm": 11.483014106750488, "learning_rate": 9.99934178486522e-07, "loss": 0.2163, "step": 5830 }, { "epoch": 0.06239649553929163, "grad_norm": 20.467042922973633, "learning_rate": 9.9993390560098e-07, "loss": 0.1901, "step": 5840 }, { "epoch": 0.06250333885357123, "grad_norm": 14.429570198059082, "learning_rate": 9.99933632150975e-07, "loss": 0.2295, "step": 5850 }, { "epoch": 0.06261018216785084, "grad_norm": 6.041487693786621, "learning_rate": 9.99933358136507e-07, "loss": 0.1659, "step": 5860 }, { "epoch": 0.06271702548213046, "grad_norm": 12.282051086425781, "learning_rate": 9.999330835575766e-07, "loss": 0.187, "step": 5870 }, { "epoch": 0.06282386879641007, "grad_norm": 4.900712966918945, "learning_rate": 9.99932808414184e-07, "loss": 0.1852, "step": 5880 }, { "epoch": 0.06293071211068968, "grad_norm": 5.871162414550781, "learning_rate": 9.999325327063296e-07, "loss": 0.1617, "step": 5890 }, { "epoch": 0.06303755542496928, "grad_norm": 18.529428482055664, "learning_rate": 9.999322564340136e-07, "loss": 0.1799, "step": 5900 }, { "epoch": 0.06314439873924889, "grad_norm": 23.932392120361328, "learning_rate": 9.999319795972364e-07, "loss": 0.1832, "step": 5910 }, { "epoch": 0.0632512420535285, "grad_norm": 20.87433433532715, "learning_rate": 9.99931702195998e-07, "loss": 0.1635, "step": 5920 }, { "epoch": 0.06335808536780811, "grad_norm": 14.112552642822266, "learning_rate": 9.999314242302992e-07, "loss": 0.1512, "step": 5930 }, { "epoch": 0.06346492868208772, "grad_norm": 9.770139694213867, "learning_rate": 9.9993114570014e-07, "loss": 0.152, "step": 5940 }, { "epoch": 0.06357177199636732, "grad_norm": 12.457402229309082, "learning_rate": 9.999308666055209e-07, "loss": 0.2558, "step": 5950 }, { "epoch": 0.06367861531064693, "grad_norm": 17.888538360595703, "learning_rate": 9.999305869464422e-07, "loss": 0.1811, "step": 5960 }, { "epoch": 0.06378545862492654, "grad_norm": 12.90222454071045, "learning_rate": 9.999303067229038e-07, "loss": 0.1425, "step": 5970 }, { "epoch": 0.06389230193920616, "grad_norm": 7.377408027648926, "learning_rate": 9.999300259349066e-07, "loss": 0.2994, "step": 5980 }, { "epoch": 0.06399914525348577, "grad_norm": 22.4921817779541, "learning_rate": 9.999297445824506e-07, "loss": 0.2388, "step": 5990 }, { "epoch": 0.06410598856776538, "grad_norm": 1.537762999534607, "learning_rate": 9.999294626655363e-07, "loss": 0.144, "step": 6000 }, { "epoch": 0.06421283188204498, "grad_norm": 2.00443172454834, "learning_rate": 9.99929180184164e-07, "loss": 0.1947, "step": 6010 }, { "epoch": 0.06431967519632459, "grad_norm": 10.811220169067383, "learning_rate": 9.999288971383337e-07, "loss": 0.2468, "step": 6020 }, { "epoch": 0.0644265185106042, "grad_norm": 27.78573226928711, "learning_rate": 9.99928613528046e-07, "loss": 0.1974, "step": 6030 }, { "epoch": 0.06453336182488381, "grad_norm": 6.783809185028076, "learning_rate": 9.999283293533012e-07, "loss": 0.1529, "step": 6040 }, { "epoch": 0.06464020513916342, "grad_norm": 2.3171942234039307, "learning_rate": 9.999280446140993e-07, "loss": 0.1568, "step": 6050 }, { "epoch": 0.06474704845344302, "grad_norm": 7.527047634124756, "learning_rate": 9.999277593104413e-07, "loss": 0.1921, "step": 6060 }, { "epoch": 0.06485389176772263, "grad_norm": 14.65018367767334, "learning_rate": 9.99927473442327e-07, "loss": 0.2651, "step": 6070 }, { "epoch": 0.06496073508200224, "grad_norm": 19.61504554748535, "learning_rate": 9.999271870097568e-07, "loss": 0.2889, "step": 6080 }, { "epoch": 0.06506757839628186, "grad_norm": 6.15338659286499, "learning_rate": 9.99926900012731e-07, "loss": 0.1754, "step": 6090 }, { "epoch": 0.06517442171056147, "grad_norm": 5.669368267059326, "learning_rate": 9.999266124512502e-07, "loss": 0.292, "step": 6100 }, { "epoch": 0.06528126502484106, "grad_norm": 18.437314987182617, "learning_rate": 9.999263243253144e-07, "loss": 0.2649, "step": 6110 }, { "epoch": 0.06538810833912068, "grad_norm": 11.388840675354004, "learning_rate": 9.99926035634924e-07, "loss": 0.1337, "step": 6120 }, { "epoch": 0.06549495165340029, "grad_norm": 8.37376594543457, "learning_rate": 9.999257463800797e-07, "loss": 0.2196, "step": 6130 }, { "epoch": 0.0656017949676799, "grad_norm": 8.434807777404785, "learning_rate": 9.99925456560781e-07, "loss": 0.1391, "step": 6140 }, { "epoch": 0.06570863828195951, "grad_norm": 37.06401824951172, "learning_rate": 9.999251661770292e-07, "loss": 0.2874, "step": 6150 }, { "epoch": 0.06581548159623911, "grad_norm": 11.949532508850098, "learning_rate": 9.99924875228824e-07, "loss": 0.183, "step": 6160 }, { "epoch": 0.06592232491051872, "grad_norm": 15.16279125213623, "learning_rate": 9.999245837161656e-07, "loss": 0.1463, "step": 6170 }, { "epoch": 0.06602916822479833, "grad_norm": 18.227584838867188, "learning_rate": 9.999242916390549e-07, "loss": 0.1881, "step": 6180 }, { "epoch": 0.06613601153907794, "grad_norm": 5.406609058380127, "learning_rate": 9.99923998997492e-07, "loss": 0.1867, "step": 6190 }, { "epoch": 0.06624285485335756, "grad_norm": 2.5688674449920654, "learning_rate": 9.99923705791477e-07, "loss": 0.2208, "step": 6200 }, { "epoch": 0.06634969816763717, "grad_norm": 1.8454196453094482, "learning_rate": 9.999234120210104e-07, "loss": 0.1667, "step": 6210 }, { "epoch": 0.06645654148191676, "grad_norm": 7.30733060836792, "learning_rate": 9.999231176860925e-07, "loss": 0.1656, "step": 6220 }, { "epoch": 0.06656338479619638, "grad_norm": 9.250910758972168, "learning_rate": 9.999228227867237e-07, "loss": 0.1792, "step": 6230 }, { "epoch": 0.06667022811047599, "grad_norm": 13.478572845458984, "learning_rate": 9.999225273229047e-07, "loss": 0.1529, "step": 6240 }, { "epoch": 0.0667770714247556, "grad_norm": 8.734251022338867, "learning_rate": 9.99922231294635e-07, "loss": 0.1917, "step": 6250 }, { "epoch": 0.06688391473903521, "grad_norm": 9.941147804260254, "learning_rate": 9.999219347019153e-07, "loss": 0.1671, "step": 6260 }, { "epoch": 0.06699075805331481, "grad_norm": 13.484299659729004, "learning_rate": 9.999216375447463e-07, "loss": 0.1712, "step": 6270 }, { "epoch": 0.06709760136759442, "grad_norm": 15.045302391052246, "learning_rate": 9.99921339823128e-07, "loss": 0.1453, "step": 6280 }, { "epoch": 0.06720444468187403, "grad_norm": 15.816582679748535, "learning_rate": 9.999210415370608e-07, "loss": 0.1433, "step": 6290 }, { "epoch": 0.06731128799615364, "grad_norm": 19.36079216003418, "learning_rate": 9.999207426865448e-07, "loss": 0.1312, "step": 6300 }, { "epoch": 0.06741813131043325, "grad_norm": 8.594057083129883, "learning_rate": 9.999204432715807e-07, "loss": 0.1451, "step": 6310 }, { "epoch": 0.06752497462471285, "grad_norm": 10.244545936584473, "learning_rate": 9.999201432921686e-07, "loss": 0.1815, "step": 6320 }, { "epoch": 0.06763181793899246, "grad_norm": 4.853641510009766, "learning_rate": 9.999198427483089e-07, "loss": 0.2483, "step": 6330 }, { "epoch": 0.06773866125327208, "grad_norm": 9.808058738708496, "learning_rate": 9.999195416400022e-07, "loss": 0.2317, "step": 6340 }, { "epoch": 0.06784550456755169, "grad_norm": 11.013009071350098, "learning_rate": 9.999192399672483e-07, "loss": 0.2081, "step": 6350 }, { "epoch": 0.0679523478818313, "grad_norm": 19.782459259033203, "learning_rate": 9.99918937730048e-07, "loss": 0.1809, "step": 6360 }, { "epoch": 0.06805919119611091, "grad_norm": 3.7591307163238525, "learning_rate": 9.999186349284015e-07, "loss": 0.218, "step": 6370 }, { "epoch": 0.06816603451039051, "grad_norm": 10.801717758178711, "learning_rate": 9.999183315623092e-07, "loss": 0.285, "step": 6380 }, { "epoch": 0.06827287782467012, "grad_norm": 4.665641784667969, "learning_rate": 9.999180276317712e-07, "loss": 0.2115, "step": 6390 }, { "epoch": 0.06837972113894973, "grad_norm": 34.86461639404297, "learning_rate": 9.999177231367882e-07, "loss": 0.2314, "step": 6400 }, { "epoch": 0.06848656445322934, "grad_norm": 9.38023853302002, "learning_rate": 9.999174180773602e-07, "loss": 0.1811, "step": 6410 }, { "epoch": 0.06859340776750895, "grad_norm": 17.54096221923828, "learning_rate": 9.999171124534877e-07, "loss": 0.2341, "step": 6420 }, { "epoch": 0.06870025108178855, "grad_norm": 7.52151346206665, "learning_rate": 9.999168062651712e-07, "loss": 0.2833, "step": 6430 }, { "epoch": 0.06880709439606816, "grad_norm": 8.245055198669434, "learning_rate": 9.999164995124107e-07, "loss": 0.1731, "step": 6440 }, { "epoch": 0.06891393771034778, "grad_norm": 6.408012866973877, "learning_rate": 9.99916192195207e-07, "loss": 0.135, "step": 6450 }, { "epoch": 0.06902078102462739, "grad_norm": 25.787927627563477, "learning_rate": 9.9991588431356e-07, "loss": 0.2186, "step": 6460 }, { "epoch": 0.069127624338907, "grad_norm": 15.844432830810547, "learning_rate": 9.999155758674702e-07, "loss": 0.1822, "step": 6470 }, { "epoch": 0.0692344676531866, "grad_norm": 14.955353736877441, "learning_rate": 9.999152668569382e-07, "loss": 0.1572, "step": 6480 }, { "epoch": 0.06934131096746621, "grad_norm": 18.827726364135742, "learning_rate": 9.99914957281964e-07, "loss": 0.2306, "step": 6490 }, { "epoch": 0.06944815428174582, "grad_norm": 21.888784408569336, "learning_rate": 9.99914647142548e-07, "loss": 0.2358, "step": 6500 }, { "epoch": 0.06955499759602543, "grad_norm": 12.911130905151367, "learning_rate": 9.99914336438691e-07, "loss": 0.2419, "step": 6510 }, { "epoch": 0.06966184091030504, "grad_norm": 8.689960479736328, "learning_rate": 9.999140251703924e-07, "loss": 0.1592, "step": 6520 }, { "epoch": 0.06976868422458464, "grad_norm": 8.375138282775879, "learning_rate": 9.999137133376534e-07, "loss": 0.2445, "step": 6530 }, { "epoch": 0.06987552753886425, "grad_norm": 10.130725860595703, "learning_rate": 9.99913400940474e-07, "loss": 0.1855, "step": 6540 }, { "epoch": 0.06998237085314386, "grad_norm": 15.058960914611816, "learning_rate": 9.99913087978855e-07, "loss": 0.1888, "step": 6550 }, { "epoch": 0.07008921416742347, "grad_norm": 8.17198371887207, "learning_rate": 9.999127744527962e-07, "loss": 0.3002, "step": 6560 }, { "epoch": 0.07019605748170309, "grad_norm": 7.013925552368164, "learning_rate": 9.99912460362298e-07, "loss": 0.1205, "step": 6570 }, { "epoch": 0.0703029007959827, "grad_norm": 10.72343635559082, "learning_rate": 9.99912145707361e-07, "loss": 0.1887, "step": 6580 }, { "epoch": 0.0704097441102623, "grad_norm": 14.470781326293945, "learning_rate": 9.999118304879855e-07, "loss": 0.1513, "step": 6590 }, { "epoch": 0.07051658742454191, "grad_norm": 7.938608646392822, "learning_rate": 9.999115147041717e-07, "loss": 0.2081, "step": 6600 }, { "epoch": 0.07062343073882152, "grad_norm": 10.903901100158691, "learning_rate": 9.999111983559204e-07, "loss": 0.1972, "step": 6610 }, { "epoch": 0.07073027405310113, "grad_norm": 8.548347473144531, "learning_rate": 9.999108814432313e-07, "loss": 0.2046, "step": 6620 }, { "epoch": 0.07083711736738074, "grad_norm": 2.0780656337738037, "learning_rate": 9.99910563966105e-07, "loss": 0.2102, "step": 6630 }, { "epoch": 0.07094396068166034, "grad_norm": 6.593322277069092, "learning_rate": 9.999102459245421e-07, "loss": 0.1992, "step": 6640 }, { "epoch": 0.07105080399593995, "grad_norm": 23.55541229248047, "learning_rate": 9.99909927318543e-07, "loss": 0.2405, "step": 6650 }, { "epoch": 0.07115764731021956, "grad_norm": 9.410792350769043, "learning_rate": 9.999096081481076e-07, "loss": 0.1723, "step": 6660 }, { "epoch": 0.07126449062449917, "grad_norm": 9.977240562438965, "learning_rate": 9.999092884132364e-07, "loss": 0.1592, "step": 6670 }, { "epoch": 0.07137133393877879, "grad_norm": 11.389723777770996, "learning_rate": 9.999089681139302e-07, "loss": 0.2217, "step": 6680 }, { "epoch": 0.07147817725305838, "grad_norm": 3.9559080600738525, "learning_rate": 9.99908647250189e-07, "loss": 0.1057, "step": 6690 }, { "epoch": 0.071585020567338, "grad_norm": 5.918581962585449, "learning_rate": 9.99908325822013e-07, "loss": 0.2282, "step": 6700 }, { "epoch": 0.0716918638816176, "grad_norm": 9.412646293640137, "learning_rate": 9.99908003829403e-07, "loss": 0.28, "step": 6710 }, { "epoch": 0.07179870719589722, "grad_norm": 19.0795841217041, "learning_rate": 9.99907681272359e-07, "loss": 0.2815, "step": 6720 }, { "epoch": 0.07190555051017683, "grad_norm": 4.713856220245361, "learning_rate": 9.999073581508815e-07, "loss": 0.1775, "step": 6730 }, { "epoch": 0.07201239382445644, "grad_norm": 9.491265296936035, "learning_rate": 9.99907034464971e-07, "loss": 0.085, "step": 6740 }, { "epoch": 0.07211923713873604, "grad_norm": 4.101664066314697, "learning_rate": 9.999067102146275e-07, "loss": 0.2029, "step": 6750 }, { "epoch": 0.07222608045301565, "grad_norm": 15.398086547851562, "learning_rate": 9.999063853998518e-07, "loss": 0.1596, "step": 6760 }, { "epoch": 0.07233292376729526, "grad_norm": 22.929306030273438, "learning_rate": 9.99906060020644e-07, "loss": 0.1687, "step": 6770 }, { "epoch": 0.07243976708157487, "grad_norm": 6.124815464019775, "learning_rate": 9.999057340770046e-07, "loss": 0.1083, "step": 6780 }, { "epoch": 0.07254661039585449, "grad_norm": 11.15190601348877, "learning_rate": 9.999054075689338e-07, "loss": 0.1948, "step": 6790 }, { "epoch": 0.07265345371013408, "grad_norm": 4.6408586502075195, "learning_rate": 9.99905080496432e-07, "loss": 0.2082, "step": 6800 }, { "epoch": 0.0727602970244137, "grad_norm": 18.350997924804688, "learning_rate": 9.999047528594996e-07, "loss": 0.1802, "step": 6810 }, { "epoch": 0.0728671403386933, "grad_norm": 11.846437454223633, "learning_rate": 9.99904424658137e-07, "loss": 0.2455, "step": 6820 }, { "epoch": 0.07297398365297292, "grad_norm": 18.651506423950195, "learning_rate": 9.999040958923448e-07, "loss": 0.1777, "step": 6830 }, { "epoch": 0.07308082696725253, "grad_norm": 12.637604713439941, "learning_rate": 9.99903766562123e-07, "loss": 0.1918, "step": 6840 }, { "epoch": 0.07318767028153213, "grad_norm": 4.418329238891602, "learning_rate": 9.99903436667472e-07, "loss": 0.2163, "step": 6850 }, { "epoch": 0.07329451359581174, "grad_norm": 11.491019248962402, "learning_rate": 9.999031062083928e-07, "loss": 0.1368, "step": 6860 }, { "epoch": 0.07340135691009135, "grad_norm": 1.909867286682129, "learning_rate": 9.999027751848848e-07, "loss": 0.1204, "step": 6870 }, { "epoch": 0.07350820022437096, "grad_norm": 21.257152557373047, "learning_rate": 9.999024435969488e-07, "loss": 0.2101, "step": 6880 }, { "epoch": 0.07361504353865057, "grad_norm": 12.739274024963379, "learning_rate": 9.999021114445854e-07, "loss": 0.1843, "step": 6890 }, { "epoch": 0.07372188685293017, "grad_norm": 8.971274375915527, "learning_rate": 9.999017787277949e-07, "loss": 0.1152, "step": 6900 }, { "epoch": 0.07382873016720978, "grad_norm": 8.552412986755371, "learning_rate": 9.999014454465774e-07, "loss": 0.1226, "step": 6910 }, { "epoch": 0.0739355734814894, "grad_norm": 18.019495010375977, "learning_rate": 9.999011116009333e-07, "loss": 0.176, "step": 6920 }, { "epoch": 0.074042416795769, "grad_norm": 8.699457168579102, "learning_rate": 9.999007771908631e-07, "loss": 0.1273, "step": 6930 }, { "epoch": 0.07414926011004862, "grad_norm": 8.681180000305176, "learning_rate": 9.999004422163674e-07, "loss": 0.1021, "step": 6940 }, { "epoch": 0.07425610342432823, "grad_norm": 7.330161094665527, "learning_rate": 9.999001066774465e-07, "loss": 0.145, "step": 6950 }, { "epoch": 0.07436294673860783, "grad_norm": 5.478315353393555, "learning_rate": 9.998997705741003e-07, "loss": 0.0786, "step": 6960 }, { "epoch": 0.07446979005288744, "grad_norm": 21.889371871948242, "learning_rate": 9.998994339063298e-07, "loss": 0.1326, "step": 6970 }, { "epoch": 0.07457663336716705, "grad_norm": 10.525063514709473, "learning_rate": 9.99899096674135e-07, "loss": 0.1861, "step": 6980 }, { "epoch": 0.07468347668144666, "grad_norm": 9.752753257751465, "learning_rate": 9.998987588775163e-07, "loss": 0.2472, "step": 6990 }, { "epoch": 0.07479031999572627, "grad_norm": 16.127544403076172, "learning_rate": 9.998984205164742e-07, "loss": 0.1498, "step": 7000 }, { "epoch": 0.07489716331000587, "grad_norm": 15.930707931518555, "learning_rate": 9.998980815910093e-07, "loss": 0.162, "step": 7010 }, { "epoch": 0.07500400662428548, "grad_norm": 11.118165969848633, "learning_rate": 9.998977421011213e-07, "loss": 0.1, "step": 7020 }, { "epoch": 0.0751108499385651, "grad_norm": 12.529261589050293, "learning_rate": 9.998974020468115e-07, "loss": 0.1767, "step": 7030 }, { "epoch": 0.0752176932528447, "grad_norm": 9.332788467407227, "learning_rate": 9.998970614280794e-07, "loss": 0.0993, "step": 7040 }, { "epoch": 0.07532453656712432, "grad_norm": 11.04000473022461, "learning_rate": 9.998967202449263e-07, "loss": 0.2017, "step": 7050 }, { "epoch": 0.07543137988140391, "grad_norm": 29.366209030151367, "learning_rate": 9.998963784973516e-07, "loss": 0.1981, "step": 7060 }, { "epoch": 0.07553822319568353, "grad_norm": 9.081625938415527, "learning_rate": 9.998960361853564e-07, "loss": 0.2, "step": 7070 }, { "epoch": 0.07564506650996314, "grad_norm": 5.55546236038208, "learning_rate": 9.998956933089405e-07, "loss": 0.1306, "step": 7080 }, { "epoch": 0.07575190982424275, "grad_norm": 5.994335174560547, "learning_rate": 9.998953498681051e-07, "loss": 0.1456, "step": 7090 }, { "epoch": 0.07585875313852236, "grad_norm": 11.957130432128906, "learning_rate": 9.9989500586285e-07, "loss": 0.2367, "step": 7100 }, { "epoch": 0.07596559645280197, "grad_norm": 1.8109331130981445, "learning_rate": 9.998946612931756e-07, "loss": 0.2186, "step": 7110 }, { "epoch": 0.07607243976708157, "grad_norm": 6.966744899749756, "learning_rate": 9.998943161590822e-07, "loss": 0.1751, "step": 7120 }, { "epoch": 0.07617928308136118, "grad_norm": 10.39409351348877, "learning_rate": 9.998939704605707e-07, "loss": 0.1147, "step": 7130 }, { "epoch": 0.0762861263956408, "grad_norm": 10.138586044311523, "learning_rate": 9.99893624197641e-07, "loss": 0.278, "step": 7140 }, { "epoch": 0.0763929697099204, "grad_norm": 8.318290710449219, "learning_rate": 9.998932773702938e-07, "loss": 0.1723, "step": 7150 }, { "epoch": 0.07649981302420002, "grad_norm": 8.229764938354492, "learning_rate": 9.998929299785293e-07, "loss": 0.1733, "step": 7160 }, { "epoch": 0.07660665633847961, "grad_norm": 7.4028778076171875, "learning_rate": 9.99892582022348e-07, "loss": 0.1175, "step": 7170 }, { "epoch": 0.07671349965275923, "grad_norm": 11.117753982543945, "learning_rate": 9.998922335017502e-07, "loss": 0.1306, "step": 7180 }, { "epoch": 0.07682034296703884, "grad_norm": 13.543798446655273, "learning_rate": 9.998918844167363e-07, "loss": 0.2261, "step": 7190 }, { "epoch": 0.07692718628131845, "grad_norm": 13.982254981994629, "learning_rate": 9.99891534767307e-07, "loss": 0.1767, "step": 7200 }, { "epoch": 0.07703402959559806, "grad_norm": 13.859818458557129, "learning_rate": 9.99891184553462e-07, "loss": 0.1991, "step": 7210 }, { "epoch": 0.07714087290987766, "grad_norm": 12.851730346679688, "learning_rate": 9.998908337752025e-07, "loss": 0.1489, "step": 7220 }, { "epoch": 0.07724771622415727, "grad_norm": 12.254497528076172, "learning_rate": 9.998904824325282e-07, "loss": 0.1594, "step": 7230 }, { "epoch": 0.07735455953843688, "grad_norm": 17.98581314086914, "learning_rate": 9.9989013052544e-07, "loss": 0.149, "step": 7240 }, { "epoch": 0.0774614028527165, "grad_norm": 4.027436256408691, "learning_rate": 9.998897780539381e-07, "loss": 0.172, "step": 7250 }, { "epoch": 0.0775682461669961, "grad_norm": 11.610339164733887, "learning_rate": 9.99889425018023e-07, "loss": 0.0858, "step": 7260 }, { "epoch": 0.0776750894812757, "grad_norm": 10.839123725891113, "learning_rate": 9.998890714176948e-07, "loss": 0.2466, "step": 7270 }, { "epoch": 0.07778193279555531, "grad_norm": 16.762907028198242, "learning_rate": 9.998887172529543e-07, "loss": 0.2284, "step": 7280 }, { "epoch": 0.07788877610983493, "grad_norm": 14.286136627197266, "learning_rate": 9.998883625238017e-07, "loss": 0.1474, "step": 7290 }, { "epoch": 0.07799561942411454, "grad_norm": 14.187190055847168, "learning_rate": 9.998880072302373e-07, "loss": 0.1276, "step": 7300 }, { "epoch": 0.07810246273839415, "grad_norm": 4.219259262084961, "learning_rate": 9.998876513722616e-07, "loss": 0.1597, "step": 7310 }, { "epoch": 0.07820930605267376, "grad_norm": 3.890031576156616, "learning_rate": 9.99887294949875e-07, "loss": 0.1277, "step": 7320 }, { "epoch": 0.07831614936695336, "grad_norm": 16.631317138671875, "learning_rate": 9.99886937963078e-07, "loss": 0.1479, "step": 7330 }, { "epoch": 0.07842299268123297, "grad_norm": 22.898555755615234, "learning_rate": 9.99886580411871e-07, "loss": 0.1822, "step": 7340 }, { "epoch": 0.07852983599551258, "grad_norm": 9.025625228881836, "learning_rate": 9.998862222962544e-07, "loss": 0.1411, "step": 7350 }, { "epoch": 0.07863667930979219, "grad_norm": 10.92245864868164, "learning_rate": 9.998858636162281e-07, "loss": 0.1519, "step": 7360 }, { "epoch": 0.0787435226240718, "grad_norm": 10.696118354797363, "learning_rate": 9.998855043717933e-07, "loss": 0.1368, "step": 7370 }, { "epoch": 0.0788503659383514, "grad_norm": 10.401052474975586, "learning_rate": 9.9988514456295e-07, "loss": 0.1803, "step": 7380 }, { "epoch": 0.07895720925263101, "grad_norm": 4.554389953613281, "learning_rate": 9.998847841896986e-07, "loss": 0.0852, "step": 7390 }, { "epoch": 0.07906405256691063, "grad_norm": 21.98663330078125, "learning_rate": 9.998844232520394e-07, "loss": 0.2946, "step": 7400 }, { "epoch": 0.07917089588119024, "grad_norm": 9.553565979003906, "learning_rate": 9.998840617499732e-07, "loss": 0.1721, "step": 7410 }, { "epoch": 0.07927773919546985, "grad_norm": 12.332935333251953, "learning_rate": 9.998836996835e-07, "loss": 0.1884, "step": 7420 }, { "epoch": 0.07938458250974945, "grad_norm": 33.288002014160156, "learning_rate": 9.998833370526202e-07, "loss": 0.2417, "step": 7430 }, { "epoch": 0.07949142582402906, "grad_norm": 9.190409660339355, "learning_rate": 9.998829738573347e-07, "loss": 0.172, "step": 7440 }, { "epoch": 0.07959826913830867, "grad_norm": 6.247187614440918, "learning_rate": 9.998826100976436e-07, "loss": 0.2065, "step": 7450 }, { "epoch": 0.07970511245258828, "grad_norm": 3.4309158325195312, "learning_rate": 9.998822457735471e-07, "loss": 0.1546, "step": 7460 }, { "epoch": 0.07981195576686789, "grad_norm": 15.837090492248535, "learning_rate": 9.99881880885046e-07, "loss": 0.1217, "step": 7470 }, { "epoch": 0.0799187990811475, "grad_norm": 5.065699577331543, "learning_rate": 9.998815154321405e-07, "loss": 0.137, "step": 7480 }, { "epoch": 0.0800256423954271, "grad_norm": 2.5374133586883545, "learning_rate": 9.998811494148309e-07, "loss": 0.2193, "step": 7490 }, { "epoch": 0.08013248570970671, "grad_norm": 11.696308135986328, "learning_rate": 9.99880782833118e-07, "loss": 0.1382, "step": 7500 }, { "epoch": 0.08023932902398632, "grad_norm": 5.264753818511963, "learning_rate": 9.998804156870017e-07, "loss": 0.1576, "step": 7510 }, { "epoch": 0.08034617233826594, "grad_norm": 4.0075860023498535, "learning_rate": 9.998800479764828e-07, "loss": 0.1882, "step": 7520 }, { "epoch": 0.08045301565254555, "grad_norm": 6.174065113067627, "learning_rate": 9.998796797015616e-07, "loss": 0.1968, "step": 7530 }, { "epoch": 0.08055985896682515, "grad_norm": 19.156707763671875, "learning_rate": 9.998793108622385e-07, "loss": 0.1595, "step": 7540 }, { "epoch": 0.08066670228110476, "grad_norm": 13.875516891479492, "learning_rate": 9.998789414585139e-07, "loss": 0.1072, "step": 7550 }, { "epoch": 0.08077354559538437, "grad_norm": 6.49717903137207, "learning_rate": 9.998785714903883e-07, "loss": 0.1157, "step": 7560 }, { "epoch": 0.08088038890966398, "grad_norm": 1.2867403030395508, "learning_rate": 9.998782009578622e-07, "loss": 0.1526, "step": 7570 }, { "epoch": 0.08098723222394359, "grad_norm": 10.713732719421387, "learning_rate": 9.998778298609356e-07, "loss": 0.1531, "step": 7580 }, { "epoch": 0.08109407553822319, "grad_norm": 9.058196067810059, "learning_rate": 9.998774581996094e-07, "loss": 0.1493, "step": 7590 }, { "epoch": 0.0812009188525028, "grad_norm": 14.957418441772461, "learning_rate": 9.998770859738837e-07, "loss": 0.228, "step": 7600 }, { "epoch": 0.08130776216678241, "grad_norm": 16.206233978271484, "learning_rate": 9.99876713183759e-07, "loss": 0.1771, "step": 7610 }, { "epoch": 0.08141460548106202, "grad_norm": 6.637372016906738, "learning_rate": 9.99876339829236e-07, "loss": 0.1778, "step": 7620 }, { "epoch": 0.08152144879534164, "grad_norm": 11.694803237915039, "learning_rate": 9.998759659103147e-07, "loss": 0.1653, "step": 7630 }, { "epoch": 0.08162829210962123, "grad_norm": 6.07877779006958, "learning_rate": 9.99875591426996e-07, "loss": 0.1168, "step": 7640 }, { "epoch": 0.08173513542390085, "grad_norm": 6.449027061462402, "learning_rate": 9.998752163792796e-07, "loss": 0.1114, "step": 7650 }, { "epoch": 0.08184197873818046, "grad_norm": 4.075556755065918, "learning_rate": 9.998748407671664e-07, "loss": 0.1699, "step": 7660 }, { "epoch": 0.08194882205246007, "grad_norm": 4.013980388641357, "learning_rate": 9.99874464590657e-07, "loss": 0.0988, "step": 7670 }, { "epoch": 0.08205566536673968, "grad_norm": 14.399674415588379, "learning_rate": 9.998740878497516e-07, "loss": 0.1456, "step": 7680 }, { "epoch": 0.08216250868101929, "grad_norm": 9.341167449951172, "learning_rate": 9.998737105444506e-07, "loss": 0.1753, "step": 7690 }, { "epoch": 0.08226935199529889, "grad_norm": 6.766317844390869, "learning_rate": 9.998733326747545e-07, "loss": 0.1923, "step": 7700 }, { "epoch": 0.0823761953095785, "grad_norm": 4.6071014404296875, "learning_rate": 9.998729542406633e-07, "loss": 0.2197, "step": 7710 }, { "epoch": 0.08248303862385811, "grad_norm": 14.100629806518555, "learning_rate": 9.998725752421783e-07, "loss": 0.1648, "step": 7720 }, { "epoch": 0.08258988193813772, "grad_norm": 7.45490837097168, "learning_rate": 9.99872195679299e-07, "loss": 0.1085, "step": 7730 }, { "epoch": 0.08269672525241734, "grad_norm": 18.382966995239258, "learning_rate": 9.998718155520265e-07, "loss": 0.2197, "step": 7740 }, { "epoch": 0.08280356856669693, "grad_norm": 9.6092529296875, "learning_rate": 9.99871434860361e-07, "loss": 0.134, "step": 7750 }, { "epoch": 0.08291041188097655, "grad_norm": 7.784520149230957, "learning_rate": 9.998710536043028e-07, "loss": 0.1464, "step": 7760 }, { "epoch": 0.08301725519525616, "grad_norm": 18.96788787841797, "learning_rate": 9.998706717838527e-07, "loss": 0.2393, "step": 7770 }, { "epoch": 0.08312409850953577, "grad_norm": 9.689068794250488, "learning_rate": 9.998702893990108e-07, "loss": 0.1378, "step": 7780 }, { "epoch": 0.08323094182381538, "grad_norm": 7.711380481719971, "learning_rate": 9.998699064497775e-07, "loss": 0.1853, "step": 7790 }, { "epoch": 0.08333778513809498, "grad_norm": 16.797754287719727, "learning_rate": 9.998695229361534e-07, "loss": 0.1506, "step": 7800 }, { "epoch": 0.08344462845237459, "grad_norm": 16.971078872680664, "learning_rate": 9.99869138858139e-07, "loss": 0.1332, "step": 7810 }, { "epoch": 0.0835514717666542, "grad_norm": 9.66726016998291, "learning_rate": 9.998687542157345e-07, "loss": 0.1613, "step": 7820 }, { "epoch": 0.08365831508093381, "grad_norm": 12.349627494812012, "learning_rate": 9.998683690089405e-07, "loss": 0.1964, "step": 7830 }, { "epoch": 0.08376515839521342, "grad_norm": 17.673622131347656, "learning_rate": 9.998679832377572e-07, "loss": 0.2089, "step": 7840 }, { "epoch": 0.08387200170949304, "grad_norm": 6.957237243652344, "learning_rate": 9.998675969021855e-07, "loss": 0.1426, "step": 7850 }, { "epoch": 0.08397884502377263, "grad_norm": 5.591282367706299, "learning_rate": 9.998672100022254e-07, "loss": 0.2046, "step": 7860 }, { "epoch": 0.08408568833805224, "grad_norm": 5.313127517700195, "learning_rate": 9.998668225378774e-07, "loss": 0.1129, "step": 7870 }, { "epoch": 0.08419253165233186, "grad_norm": 12.76300048828125, "learning_rate": 9.998664345091421e-07, "loss": 0.2016, "step": 7880 }, { "epoch": 0.08429937496661147, "grad_norm": 31.837615966796875, "learning_rate": 9.998660459160199e-07, "loss": 0.2756, "step": 7890 }, { "epoch": 0.08440621828089108, "grad_norm": 20.223291397094727, "learning_rate": 9.998656567585113e-07, "loss": 0.2167, "step": 7900 }, { "epoch": 0.08451306159517068, "grad_norm": 10.162139892578125, "learning_rate": 9.998652670366167e-07, "loss": 0.1569, "step": 7910 }, { "epoch": 0.08461990490945029, "grad_norm": 34.81833267211914, "learning_rate": 9.99864876750336e-07, "loss": 0.1296, "step": 7920 }, { "epoch": 0.0847267482237299, "grad_norm": 11.110297203063965, "learning_rate": 9.998644858996707e-07, "loss": 0.1562, "step": 7930 }, { "epoch": 0.08483359153800951, "grad_norm": 19.53407859802246, "learning_rate": 9.998640944846203e-07, "loss": 0.1833, "step": 7940 }, { "epoch": 0.08494043485228912, "grad_norm": 2.7637665271759033, "learning_rate": 9.99863702505186e-07, "loss": 0.2004, "step": 7950 }, { "epoch": 0.08504727816656872, "grad_norm": 20.42803955078125, "learning_rate": 9.998633099613675e-07, "loss": 0.2273, "step": 7960 }, { "epoch": 0.08515412148084833, "grad_norm": 7.236715793609619, "learning_rate": 9.998629168531658e-07, "loss": 0.1672, "step": 7970 }, { "epoch": 0.08526096479512794, "grad_norm": 17.544605255126953, "learning_rate": 9.99862523180581e-07, "loss": 0.1588, "step": 7980 }, { "epoch": 0.08536780810940756, "grad_norm": 6.859541416168213, "learning_rate": 9.998621289436138e-07, "loss": 0.2467, "step": 7990 }, { "epoch": 0.08547465142368717, "grad_norm": 5.803008079528809, "learning_rate": 9.998617341422643e-07, "loss": 0.1627, "step": 8000 }, { "epoch": 0.08558149473796677, "grad_norm": 8.60721206665039, "learning_rate": 9.998613387765334e-07, "loss": 0.175, "step": 8010 }, { "epoch": 0.08568833805224638, "grad_norm": 4.279228687286377, "learning_rate": 9.998609428464213e-07, "loss": 0.1434, "step": 8020 }, { "epoch": 0.08579518136652599, "grad_norm": 3.3473620414733887, "learning_rate": 9.998605463519284e-07, "loss": 0.1437, "step": 8030 }, { "epoch": 0.0859020246808056, "grad_norm": 5.846226692199707, "learning_rate": 9.99860149293055e-07, "loss": 0.1724, "step": 8040 }, { "epoch": 0.08600886799508521, "grad_norm": 3.7781765460968018, "learning_rate": 9.998597516698023e-07, "loss": 0.113, "step": 8050 }, { "epoch": 0.08611571130936482, "grad_norm": 2.387667655944824, "learning_rate": 9.9985935348217e-07, "loss": 0.1321, "step": 8060 }, { "epoch": 0.08622255462364442, "grad_norm": 5.657523155212402, "learning_rate": 9.998589547301586e-07, "loss": 0.1147, "step": 8070 }, { "epoch": 0.08632939793792403, "grad_norm": 8.546092987060547, "learning_rate": 9.998585554137689e-07, "loss": 0.126, "step": 8080 }, { "epoch": 0.08643624125220364, "grad_norm": 4.470923900604248, "learning_rate": 9.99858155533001e-07, "loss": 0.1809, "step": 8090 }, { "epoch": 0.08654308456648326, "grad_norm": 16.838489532470703, "learning_rate": 9.998577550878554e-07, "loss": 0.1533, "step": 8100 }, { "epoch": 0.08664992788076287, "grad_norm": 5.176460266113281, "learning_rate": 9.99857354078333e-07, "loss": 0.1603, "step": 8110 }, { "epoch": 0.08675677119504246, "grad_norm": 15.86832046508789, "learning_rate": 9.998569525044338e-07, "loss": 0.0923, "step": 8120 }, { "epoch": 0.08686361450932208, "grad_norm": 2.8183014392852783, "learning_rate": 9.998565503661582e-07, "loss": 0.1609, "step": 8130 }, { "epoch": 0.08697045782360169, "grad_norm": 6.383554935455322, "learning_rate": 9.998561476635072e-07, "loss": 0.1202, "step": 8140 }, { "epoch": 0.0870773011378813, "grad_norm": 11.623083114624023, "learning_rate": 9.998557443964806e-07, "loss": 0.1822, "step": 8150 }, { "epoch": 0.08718414445216091, "grad_norm": 21.285303115844727, "learning_rate": 9.998553405650791e-07, "loss": 0.2095, "step": 8160 }, { "epoch": 0.08729098776644051, "grad_norm": 1.2098145484924316, "learning_rate": 9.998549361693034e-07, "loss": 0.122, "step": 8170 }, { "epoch": 0.08739783108072012, "grad_norm": 22.38066864013672, "learning_rate": 9.998545312091537e-07, "loss": 0.2898, "step": 8180 }, { "epoch": 0.08750467439499973, "grad_norm": 2.684157371520996, "learning_rate": 9.998541256846306e-07, "loss": 0.0659, "step": 8190 }, { "epoch": 0.08761151770927934, "grad_norm": 4.407344341278076, "learning_rate": 9.998537195957342e-07, "loss": 0.2261, "step": 8200 }, { "epoch": 0.08771836102355896, "grad_norm": 1.6983006000518799, "learning_rate": 9.998533129424654e-07, "loss": 0.1631, "step": 8210 }, { "epoch": 0.08782520433783857, "grad_norm": 11.508522033691406, "learning_rate": 9.998529057248242e-07, "loss": 0.2476, "step": 8220 }, { "epoch": 0.08793204765211816, "grad_norm": 10.13829517364502, "learning_rate": 9.998524979428117e-07, "loss": 0.0988, "step": 8230 }, { "epoch": 0.08803889096639778, "grad_norm": 12.649779319763184, "learning_rate": 9.998520895964278e-07, "loss": 0.1475, "step": 8240 }, { "epoch": 0.08814573428067739, "grad_norm": 8.993244171142578, "learning_rate": 9.998516806856734e-07, "loss": 0.1686, "step": 8250 }, { "epoch": 0.088252577594957, "grad_norm": 2.580174207687378, "learning_rate": 9.998512712105486e-07, "loss": 0.1196, "step": 8260 }, { "epoch": 0.08835942090923661, "grad_norm": 7.3360795974731445, "learning_rate": 9.99850861171054e-07, "loss": 0.1556, "step": 8270 }, { "epoch": 0.08846626422351621, "grad_norm": 11.958368301391602, "learning_rate": 9.998504505671898e-07, "loss": 0.1532, "step": 8280 }, { "epoch": 0.08857310753779582, "grad_norm": 9.017427444458008, "learning_rate": 9.99850039398957e-07, "loss": 0.064, "step": 8290 }, { "epoch": 0.08867995085207543, "grad_norm": 10.177976608276367, "learning_rate": 9.998496276663558e-07, "loss": 0.136, "step": 8300 }, { "epoch": 0.08878679416635504, "grad_norm": 22.811681747436523, "learning_rate": 9.998492153693865e-07, "loss": 0.1984, "step": 8310 }, { "epoch": 0.08889363748063465, "grad_norm": 13.730399131774902, "learning_rate": 9.998488025080497e-07, "loss": 0.1809, "step": 8320 }, { "epoch": 0.08900048079491425, "grad_norm": 11.305329322814941, "learning_rate": 9.998483890823458e-07, "loss": 0.1862, "step": 8330 }, { "epoch": 0.08910732410919386, "grad_norm": 4.473128795623779, "learning_rate": 9.998479750922758e-07, "loss": 0.1469, "step": 8340 }, { "epoch": 0.08921416742347348, "grad_norm": 0.9480452537536621, "learning_rate": 9.998475605378393e-07, "loss": 0.218, "step": 8350 }, { "epoch": 0.08932101073775309, "grad_norm": 6.9542036056518555, "learning_rate": 9.998471454190373e-07, "loss": 0.2006, "step": 8360 }, { "epoch": 0.0894278540520327, "grad_norm": 16.345651626586914, "learning_rate": 9.9984672973587e-07, "loss": 0.199, "step": 8370 }, { "epoch": 0.0895346973663123, "grad_norm": 3.94219970703125, "learning_rate": 9.998463134883381e-07, "loss": 0.2135, "step": 8380 }, { "epoch": 0.08964154068059191, "grad_norm": 11.545137405395508, "learning_rate": 9.998458966764422e-07, "loss": 0.1924, "step": 8390 }, { "epoch": 0.08974838399487152, "grad_norm": 2.5869784355163574, "learning_rate": 9.998454793001822e-07, "loss": 0.1174, "step": 8400 }, { "epoch": 0.08985522730915113, "grad_norm": 11.211112022399902, "learning_rate": 9.998450613595592e-07, "loss": 0.1442, "step": 8410 }, { "epoch": 0.08996207062343074, "grad_norm": 5.685575485229492, "learning_rate": 9.998446428545733e-07, "loss": 0.174, "step": 8420 }, { "epoch": 0.09006891393771035, "grad_norm": 12.697787284851074, "learning_rate": 9.998442237852251e-07, "loss": 0.2652, "step": 8430 }, { "epoch": 0.09017575725198995, "grad_norm": 14.95093059539795, "learning_rate": 9.99843804151515e-07, "loss": 0.1655, "step": 8440 }, { "epoch": 0.09028260056626956, "grad_norm": 11.63737964630127, "learning_rate": 9.998433839534437e-07, "loss": 0.0971, "step": 8450 }, { "epoch": 0.09038944388054918, "grad_norm": 13.049890518188477, "learning_rate": 9.998429631910113e-07, "loss": 0.152, "step": 8460 }, { "epoch": 0.09049628719482879, "grad_norm": 3.8542582988739014, "learning_rate": 9.998425418642184e-07, "loss": 0.1304, "step": 8470 }, { "epoch": 0.0906031305091084, "grad_norm": 7.216392993927002, "learning_rate": 9.998421199730657e-07, "loss": 0.1765, "step": 8480 }, { "epoch": 0.090709973823388, "grad_norm": 10.411423683166504, "learning_rate": 9.998416975175535e-07, "loss": 0.1553, "step": 8490 }, { "epoch": 0.09081681713766761, "grad_norm": 13.862836837768555, "learning_rate": 9.998412744976822e-07, "loss": 0.1583, "step": 8500 }, { "epoch": 0.09092366045194722, "grad_norm": 8.584076881408691, "learning_rate": 9.998408509134525e-07, "loss": 0.2896, "step": 8510 }, { "epoch": 0.09103050376622683, "grad_norm": 5.373610973358154, "learning_rate": 9.998404267648649e-07, "loss": 0.2105, "step": 8520 }, { "epoch": 0.09113734708050644, "grad_norm": 8.95551586151123, "learning_rate": 9.998400020519195e-07, "loss": 0.1972, "step": 8530 }, { "epoch": 0.09124419039478604, "grad_norm": 5.656252384185791, "learning_rate": 9.99839576774617e-07, "loss": 0.1593, "step": 8540 }, { "epoch": 0.09135103370906565, "grad_norm": 4.491117477416992, "learning_rate": 9.99839150932958e-07, "loss": 0.1385, "step": 8550 }, { "epoch": 0.09145787702334526, "grad_norm": 10.182936668395996, "learning_rate": 9.998387245269428e-07, "loss": 0.0991, "step": 8560 }, { "epoch": 0.09156472033762487, "grad_norm": 4.254732131958008, "learning_rate": 9.99838297556572e-07, "loss": 0.1174, "step": 8570 }, { "epoch": 0.09167156365190449, "grad_norm": 4.603586673736572, "learning_rate": 9.99837870021846e-07, "loss": 0.1327, "step": 8580 }, { "epoch": 0.0917784069661841, "grad_norm": 8.06760311126709, "learning_rate": 9.998374419227652e-07, "loss": 0.1081, "step": 8590 }, { "epoch": 0.0918852502804637, "grad_norm": 5.663012504577637, "learning_rate": 9.998370132593306e-07, "loss": 0.1037, "step": 8600 }, { "epoch": 0.09199209359474331, "grad_norm": 14.623392105102539, "learning_rate": 9.99836584031542e-07, "loss": 0.1273, "step": 8610 }, { "epoch": 0.09209893690902292, "grad_norm": 8.693857192993164, "learning_rate": 9.998361542394001e-07, "loss": 0.1414, "step": 8620 }, { "epoch": 0.09220578022330253, "grad_norm": 7.7424235343933105, "learning_rate": 9.998357238829057e-07, "loss": 0.1585, "step": 8630 }, { "epoch": 0.09231262353758214, "grad_norm": 4.619305610656738, "learning_rate": 9.998352929620588e-07, "loss": 0.0794, "step": 8640 }, { "epoch": 0.09241946685186174, "grad_norm": 8.44320297241211, "learning_rate": 9.998348614768602e-07, "loss": 0.1039, "step": 8650 }, { "epoch": 0.09252631016614135, "grad_norm": 11.87239933013916, "learning_rate": 9.998344294273105e-07, "loss": 0.1454, "step": 8660 }, { "epoch": 0.09263315348042096, "grad_norm": 16.34006690979004, "learning_rate": 9.998339968134098e-07, "loss": 0.2522, "step": 8670 }, { "epoch": 0.09273999679470057, "grad_norm": 8.476698875427246, "learning_rate": 9.998335636351587e-07, "loss": 0.1216, "step": 8680 }, { "epoch": 0.09284684010898019, "grad_norm": 8.598991394042969, "learning_rate": 9.99833129892558e-07, "loss": 0.0971, "step": 8690 }, { "epoch": 0.09295368342325978, "grad_norm": 5.718452453613281, "learning_rate": 9.99832695585608e-07, "loss": 0.1221, "step": 8700 }, { "epoch": 0.0930605267375394, "grad_norm": 8.338899612426758, "learning_rate": 9.99832260714309e-07, "loss": 0.1175, "step": 8710 }, { "epoch": 0.093167370051819, "grad_norm": 8.793356895446777, "learning_rate": 9.998318252786616e-07, "loss": 0.1478, "step": 8720 }, { "epoch": 0.09327421336609862, "grad_norm": 25.31538200378418, "learning_rate": 9.998313892786666e-07, "loss": 0.146, "step": 8730 }, { "epoch": 0.09338105668037823, "grad_norm": 8.190805435180664, "learning_rate": 9.99830952714324e-07, "loss": 0.0997, "step": 8740 }, { "epoch": 0.09348789999465783, "grad_norm": 7.817429542541504, "learning_rate": 9.998305155856348e-07, "loss": 0.121, "step": 8750 }, { "epoch": 0.09359474330893744, "grad_norm": 25.835880279541016, "learning_rate": 9.99830077892599e-07, "loss": 0.189, "step": 8760 }, { "epoch": 0.09370158662321705, "grad_norm": 5.943512916564941, "learning_rate": 9.998296396352173e-07, "loss": 0.1775, "step": 8770 }, { "epoch": 0.09380842993749666, "grad_norm": 25.60430335998535, "learning_rate": 9.998292008134904e-07, "loss": 0.1429, "step": 8780 }, { "epoch": 0.09391527325177627, "grad_norm": 7.3445234298706055, "learning_rate": 9.998287614274185e-07, "loss": 0.1905, "step": 8790 }, { "epoch": 0.09402211656605589, "grad_norm": 2.855144500732422, "learning_rate": 9.998283214770022e-07, "loss": 0.1396, "step": 8800 }, { "epoch": 0.09412895988033548, "grad_norm": 4.085385799407959, "learning_rate": 9.99827880962242e-07, "loss": 0.094, "step": 8810 }, { "epoch": 0.0942358031946151, "grad_norm": 16.745887756347656, "learning_rate": 9.998274398831384e-07, "loss": 0.1573, "step": 8820 }, { "epoch": 0.0943426465088947, "grad_norm": 3.2576675415039062, "learning_rate": 9.99826998239692e-07, "loss": 0.1549, "step": 8830 }, { "epoch": 0.09444948982317432, "grad_norm": 16.399703979492188, "learning_rate": 9.99826556031903e-07, "loss": 0.1647, "step": 8840 }, { "epoch": 0.09455633313745393, "grad_norm": 3.7278847694396973, "learning_rate": 9.998261132597722e-07, "loss": 0.09, "step": 8850 }, { "epoch": 0.09466317645173353, "grad_norm": 13.631550788879395, "learning_rate": 9.998256699233e-07, "loss": 0.1742, "step": 8860 }, { "epoch": 0.09477001976601314, "grad_norm": 16.831289291381836, "learning_rate": 9.998252260224868e-07, "loss": 0.1732, "step": 8870 }, { "epoch": 0.09487686308029275, "grad_norm": 9.933229446411133, "learning_rate": 9.998247815573332e-07, "loss": 0.1336, "step": 8880 }, { "epoch": 0.09498370639457236, "grad_norm": 1.2687921524047852, "learning_rate": 9.9982433652784e-07, "loss": 0.1212, "step": 8890 }, { "epoch": 0.09509054970885197, "grad_norm": 4.719185829162598, "learning_rate": 9.998238909340071e-07, "loss": 0.1704, "step": 8900 }, { "epoch": 0.09519739302313157, "grad_norm": 9.886419296264648, "learning_rate": 9.998234447758354e-07, "loss": 0.1816, "step": 8910 }, { "epoch": 0.09530423633741118, "grad_norm": 20.0003604888916, "learning_rate": 9.998229980533253e-07, "loss": 0.1557, "step": 8920 }, { "epoch": 0.0954110796516908, "grad_norm": 12.452217102050781, "learning_rate": 9.998225507664774e-07, "loss": 0.1439, "step": 8930 }, { "epoch": 0.0955179229659704, "grad_norm": 11.195161819458008, "learning_rate": 9.998221029152918e-07, "loss": 0.155, "step": 8940 }, { "epoch": 0.09562476628025002, "grad_norm": 8.725634574890137, "learning_rate": 9.998216544997697e-07, "loss": 0.1431, "step": 8950 }, { "epoch": 0.09573160959452963, "grad_norm": 7.244667053222656, "learning_rate": 9.99821205519911e-07, "loss": 0.1061, "step": 8960 }, { "epoch": 0.09583845290880923, "grad_norm": 19.773887634277344, "learning_rate": 9.998207559757167e-07, "loss": 0.2354, "step": 8970 }, { "epoch": 0.09594529622308884, "grad_norm": 14.350264549255371, "learning_rate": 9.998203058671866e-07, "loss": 0.0965, "step": 8980 }, { "epoch": 0.09605213953736845, "grad_norm": 3.3534748554229736, "learning_rate": 9.99819855194322e-07, "loss": 0.173, "step": 8990 }, { "epoch": 0.09615898285164806, "grad_norm": 9.576162338256836, "learning_rate": 9.99819403957123e-07, "loss": 0.1531, "step": 9000 }, { "epoch": 0.09626582616592767, "grad_norm": 6.349783420562744, "learning_rate": 9.9981895215559e-07, "loss": 0.173, "step": 9010 }, { "epoch": 0.09637266948020727, "grad_norm": 11.258218765258789, "learning_rate": 9.998184997897238e-07, "loss": 0.0802, "step": 9020 }, { "epoch": 0.09647951279448688, "grad_norm": 6.584847450256348, "learning_rate": 9.99818046859525e-07, "loss": 0.0869, "step": 9030 }, { "epoch": 0.0965863561087665, "grad_norm": 10.511890411376953, "learning_rate": 9.998175933649934e-07, "loss": 0.1403, "step": 9040 }, { "epoch": 0.0966931994230461, "grad_norm": 14.430582046508789, "learning_rate": 9.998171393061306e-07, "loss": 0.1436, "step": 9050 }, { "epoch": 0.09680004273732572, "grad_norm": 9.832712173461914, "learning_rate": 9.99816684682936e-07, "loss": 0.1449, "step": 9060 }, { "epoch": 0.09690688605160531, "grad_norm": 21.27777862548828, "learning_rate": 9.99816229495411e-07, "loss": 0.1628, "step": 9070 }, { "epoch": 0.09701372936588493, "grad_norm": 22.71845245361328, "learning_rate": 9.998157737435557e-07, "loss": 0.0794, "step": 9080 }, { "epoch": 0.09712057268016454, "grad_norm": 12.155074119567871, "learning_rate": 9.998153174273704e-07, "loss": 0.1119, "step": 9090 }, { "epoch": 0.09722741599444415, "grad_norm": 6.261107921600342, "learning_rate": 9.998148605468562e-07, "loss": 0.0986, "step": 9100 }, { "epoch": 0.09733425930872376, "grad_norm": 11.863046646118164, "learning_rate": 9.998144031020132e-07, "loss": 0.1776, "step": 9110 }, { "epoch": 0.09744110262300336, "grad_norm": 5.518316745758057, "learning_rate": 9.99813945092842e-07, "loss": 0.144, "step": 9120 }, { "epoch": 0.09754794593728297, "grad_norm": 6.795338153839111, "learning_rate": 9.998134865193432e-07, "loss": 0.1951, "step": 9130 }, { "epoch": 0.09765478925156258, "grad_norm": 10.64171028137207, "learning_rate": 9.99813027381517e-07, "loss": 0.2013, "step": 9140 }, { "epoch": 0.0977616325658422, "grad_norm": 3.964015007019043, "learning_rate": 9.998125676793646e-07, "loss": 0.1715, "step": 9150 }, { "epoch": 0.0978684758801218, "grad_norm": 6.990323066711426, "learning_rate": 9.998121074128857e-07, "loss": 0.1976, "step": 9160 }, { "epoch": 0.09797531919440142, "grad_norm": 11.445578575134277, "learning_rate": 9.998116465820813e-07, "loss": 0.1367, "step": 9170 }, { "epoch": 0.09808216250868101, "grad_norm": 18.043292999267578, "learning_rate": 9.99811185186952e-07, "loss": 0.192, "step": 9180 }, { "epoch": 0.09818900582296063, "grad_norm": 12.044958114624023, "learning_rate": 9.99810723227498e-07, "loss": 0.1392, "step": 9190 }, { "epoch": 0.09829584913724024, "grad_norm": 10.264577865600586, "learning_rate": 9.9981026070372e-07, "loss": 0.1418, "step": 9200 }, { "epoch": 0.09840269245151985, "grad_norm": 3.483595848083496, "learning_rate": 9.998097976156185e-07, "loss": 0.1647, "step": 9210 }, { "epoch": 0.09850953576579946, "grad_norm": 8.28663444519043, "learning_rate": 9.998093339631942e-07, "loss": 0.1373, "step": 9220 }, { "epoch": 0.09861637908007906, "grad_norm": 3.1394383907318115, "learning_rate": 9.99808869746447e-07, "loss": 0.1337, "step": 9230 }, { "epoch": 0.09872322239435867, "grad_norm": 9.424484252929688, "learning_rate": 9.998084049653782e-07, "loss": 0.1242, "step": 9240 }, { "epoch": 0.09883006570863828, "grad_norm": 13.880688667297363, "learning_rate": 9.99807939619988e-07, "loss": 0.1176, "step": 9250 }, { "epoch": 0.0989369090229179, "grad_norm": 46.50714111328125, "learning_rate": 9.998074737102768e-07, "loss": 0.2095, "step": 9260 }, { "epoch": 0.0990437523371975, "grad_norm": 2.9302902221679688, "learning_rate": 9.998070072362452e-07, "loss": 0.1436, "step": 9270 }, { "epoch": 0.0991505956514771, "grad_norm": 9.950193405151367, "learning_rate": 9.99806540197894e-07, "loss": 0.2149, "step": 9280 }, { "epoch": 0.09925743896575671, "grad_norm": 23.698654174804688, "learning_rate": 9.998060725952231e-07, "loss": 0.2805, "step": 9290 }, { "epoch": 0.09936428228003633, "grad_norm": 14.001026153564453, "learning_rate": 9.998056044282338e-07, "loss": 0.1952, "step": 9300 }, { "epoch": 0.09947112559431594, "grad_norm": 7.776955604553223, "learning_rate": 9.998051356969258e-07, "loss": 0.1553, "step": 9310 }, { "epoch": 0.09957796890859555, "grad_norm": 9.910149574279785, "learning_rate": 9.998046664013006e-07, "loss": 0.1629, "step": 9320 }, { "epoch": 0.09968481222287516, "grad_norm": 10.319472312927246, "learning_rate": 9.99804196541358e-07, "loss": 0.1403, "step": 9330 }, { "epoch": 0.09979165553715476, "grad_norm": 4.92820930480957, "learning_rate": 9.998037261170986e-07, "loss": 0.11, "step": 9340 }, { "epoch": 0.09989849885143437, "grad_norm": 6.738072872161865, "learning_rate": 9.998032551285233e-07, "loss": 0.1123, "step": 9350 }, { "epoch": 0.10000534216571398, "grad_norm": 10.44190788269043, "learning_rate": 9.998027835756321e-07, "loss": 0.1189, "step": 9360 }, { "epoch": 0.10011218547999359, "grad_norm": 9.93952465057373, "learning_rate": 9.998023114584263e-07, "loss": 0.1288, "step": 9370 }, { "epoch": 0.1002190287942732, "grad_norm": 10.664374351501465, "learning_rate": 9.998018387769056e-07, "loss": 0.2033, "step": 9380 }, { "epoch": 0.1003258721085528, "grad_norm": 5.664488315582275, "learning_rate": 9.998013655310711e-07, "loss": 0.1566, "step": 9390 }, { "epoch": 0.10043271542283241, "grad_norm": 4.947147846221924, "learning_rate": 9.99800891720923e-07, "loss": 0.1701, "step": 9400 }, { "epoch": 0.10053955873711203, "grad_norm": 11.922121047973633, "learning_rate": 9.99800417346462e-07, "loss": 0.1697, "step": 9410 }, { "epoch": 0.10064640205139164, "grad_norm": 8.82121467590332, "learning_rate": 9.997999424076885e-07, "loss": 0.1115, "step": 9420 }, { "epoch": 0.10075324536567125, "grad_norm": 12.904620170593262, "learning_rate": 9.997994669046034e-07, "loss": 0.1543, "step": 9430 }, { "epoch": 0.10086008867995085, "grad_norm": 10.147104263305664, "learning_rate": 9.997989908372068e-07, "loss": 0.1173, "step": 9440 }, { "epoch": 0.10096693199423046, "grad_norm": 3.1510329246520996, "learning_rate": 9.997985142054994e-07, "loss": 0.0849, "step": 9450 }, { "epoch": 0.10107377530851007, "grad_norm": 2.2043538093566895, "learning_rate": 9.997980370094818e-07, "loss": 0.1414, "step": 9460 }, { "epoch": 0.10118061862278968, "grad_norm": 24.867198944091797, "learning_rate": 9.997975592491546e-07, "loss": 0.2676, "step": 9470 }, { "epoch": 0.10128746193706929, "grad_norm": 10.22747802734375, "learning_rate": 9.997970809245182e-07, "loss": 0.1175, "step": 9480 }, { "epoch": 0.10139430525134889, "grad_norm": 4.735809803009033, "learning_rate": 9.99796602035573e-07, "loss": 0.068, "step": 9490 }, { "epoch": 0.1015011485656285, "grad_norm": 7.874955654144287, "learning_rate": 9.9979612258232e-07, "loss": 0.1349, "step": 9500 }, { "epoch": 0.10160799187990811, "grad_norm": 6.332164764404297, "learning_rate": 9.99795642564759e-07, "loss": 0.1318, "step": 9510 }, { "epoch": 0.10171483519418772, "grad_norm": 11.74311351776123, "learning_rate": 9.997951619828914e-07, "loss": 0.1768, "step": 9520 }, { "epoch": 0.10182167850846734, "grad_norm": 4.282811641693115, "learning_rate": 9.997946808367173e-07, "loss": 0.0951, "step": 9530 }, { "epoch": 0.10192852182274695, "grad_norm": 11.791820526123047, "learning_rate": 9.99794199126237e-07, "loss": 0.1662, "step": 9540 }, { "epoch": 0.10203536513702655, "grad_norm": 4.747093200683594, "learning_rate": 9.997937168514517e-07, "loss": 0.0867, "step": 9550 }, { "epoch": 0.10214220845130616, "grad_norm": 5.647449970245361, "learning_rate": 9.997932340123613e-07, "loss": 0.1232, "step": 9560 }, { "epoch": 0.10224905176558577, "grad_norm": 14.841607093811035, "learning_rate": 9.99792750608967e-07, "loss": 0.1102, "step": 9570 }, { "epoch": 0.10235589507986538, "grad_norm": 8.502397537231445, "learning_rate": 9.997922666412686e-07, "loss": 0.0887, "step": 9580 }, { "epoch": 0.10246273839414499, "grad_norm": 6.293425559997559, "learning_rate": 9.99791782109267e-07, "loss": 0.1549, "step": 9590 }, { "epoch": 0.10256958170842459, "grad_norm": 5.0457987785339355, "learning_rate": 9.99791297012963e-07, "loss": 0.0944, "step": 9600 }, { "epoch": 0.1026764250227042, "grad_norm": 3.0308468341827393, "learning_rate": 9.997908113523568e-07, "loss": 0.1191, "step": 9610 }, { "epoch": 0.10278326833698381, "grad_norm": 4.580444812774658, "learning_rate": 9.99790325127449e-07, "loss": 0.1194, "step": 9620 }, { "epoch": 0.10289011165126342, "grad_norm": 11.544838905334473, "learning_rate": 9.9978983833824e-07, "loss": 0.1135, "step": 9630 }, { "epoch": 0.10299695496554304, "grad_norm": 4.096873760223389, "learning_rate": 9.99789350984731e-07, "loss": 0.1936, "step": 9640 }, { "epoch": 0.10310379827982263, "grad_norm": 6.145724773406982, "learning_rate": 9.997888630669218e-07, "loss": 0.1393, "step": 9650 }, { "epoch": 0.10321064159410225, "grad_norm": 12.990532875061035, "learning_rate": 9.997883745848133e-07, "loss": 0.2391, "step": 9660 }, { "epoch": 0.10331748490838186, "grad_norm": 2.875537633895874, "learning_rate": 9.99787885538406e-07, "loss": 0.099, "step": 9670 }, { "epoch": 0.10342432822266147, "grad_norm": 8.050862312316895, "learning_rate": 9.997873959277006e-07, "loss": 0.112, "step": 9680 }, { "epoch": 0.10353117153694108, "grad_norm": 5.208729267120361, "learning_rate": 9.997869057526974e-07, "loss": 0.0909, "step": 9690 }, { "epoch": 0.10363801485122069, "grad_norm": 6.268998622894287, "learning_rate": 9.99786415013397e-07, "loss": 0.1598, "step": 9700 }, { "epoch": 0.10374485816550029, "grad_norm": 5.08357048034668, "learning_rate": 9.997859237098e-07, "loss": 0.1654, "step": 9710 }, { "epoch": 0.1038517014797799, "grad_norm": 9.334271430969238, "learning_rate": 9.997854318419073e-07, "loss": 0.0911, "step": 9720 }, { "epoch": 0.10395854479405951, "grad_norm": 14.767313003540039, "learning_rate": 9.997849394097188e-07, "loss": 0.1288, "step": 9730 }, { "epoch": 0.10406538810833912, "grad_norm": 15.411629676818848, "learning_rate": 9.997844464132355e-07, "loss": 0.1581, "step": 9740 }, { "epoch": 0.10417223142261874, "grad_norm": 5.891499042510986, "learning_rate": 9.997839528524577e-07, "loss": 0.1447, "step": 9750 }, { "epoch": 0.10427907473689833, "grad_norm": 2.623004913330078, "learning_rate": 9.99783458727386e-07, "loss": 0.0979, "step": 9760 }, { "epoch": 0.10438591805117794, "grad_norm": 13.885771751403809, "learning_rate": 9.997829640380214e-07, "loss": 0.1328, "step": 9770 }, { "epoch": 0.10449276136545756, "grad_norm": 18.501689910888672, "learning_rate": 9.99782468784364e-07, "loss": 0.2029, "step": 9780 }, { "epoch": 0.10459960467973717, "grad_norm": 15.28384780883789, "learning_rate": 9.997819729664146e-07, "loss": 0.0782, "step": 9790 }, { "epoch": 0.10470644799401678, "grad_norm": 8.96816349029541, "learning_rate": 9.997814765841733e-07, "loss": 0.0684, "step": 9800 }, { "epoch": 0.10481329130829638, "grad_norm": 6.005783557891846, "learning_rate": 9.997809796376412e-07, "loss": 0.1053, "step": 9810 }, { "epoch": 0.10492013462257599, "grad_norm": 11.000699996948242, "learning_rate": 9.997804821268185e-07, "loss": 0.1678, "step": 9820 }, { "epoch": 0.1050269779368556, "grad_norm": 8.647515296936035, "learning_rate": 9.99779984051706e-07, "loss": 0.1351, "step": 9830 }, { "epoch": 0.10513382125113521, "grad_norm": 5.500864028930664, "learning_rate": 9.997794854123043e-07, "loss": 0.2244, "step": 9840 }, { "epoch": 0.10524066456541482, "grad_norm": 10.7434720993042, "learning_rate": 9.997789862086138e-07, "loss": 0.1252, "step": 9850 }, { "epoch": 0.10534750787969442, "grad_norm": 8.01232624053955, "learning_rate": 9.99778486440635e-07, "loss": 0.0676, "step": 9860 }, { "epoch": 0.10545435119397403, "grad_norm": 9.13167667388916, "learning_rate": 9.997779861083687e-07, "loss": 0.1146, "step": 9870 }, { "epoch": 0.10556119450825364, "grad_norm": 24.679731369018555, "learning_rate": 9.997774852118153e-07, "loss": 0.148, "step": 9880 }, { "epoch": 0.10566803782253326, "grad_norm": 10.33593463897705, "learning_rate": 9.997769837509754e-07, "loss": 0.0761, "step": 9890 }, { "epoch": 0.10577488113681287, "grad_norm": 10.124504089355469, "learning_rate": 9.997764817258493e-07, "loss": 0.1238, "step": 9900 }, { "epoch": 0.10588172445109248, "grad_norm": 2.2583086490631104, "learning_rate": 9.99775979136438e-07, "loss": 0.0468, "step": 9910 }, { "epoch": 0.10598856776537208, "grad_norm": 4.824578285217285, "learning_rate": 9.99775475982742e-07, "loss": 0.168, "step": 9920 }, { "epoch": 0.10609541107965169, "grad_norm": 10.182038307189941, "learning_rate": 9.997749722647617e-07, "loss": 0.1367, "step": 9930 }, { "epoch": 0.1062022543939313, "grad_norm": 9.104002952575684, "learning_rate": 9.99774467982498e-07, "loss": 0.1628, "step": 9940 }, { "epoch": 0.10630909770821091, "grad_norm": 2.7795801162719727, "learning_rate": 9.99773963135951e-07, "loss": 0.1701, "step": 9950 }, { "epoch": 0.10641594102249052, "grad_norm": 7.665253162384033, "learning_rate": 9.997734577251213e-07, "loss": 0.1718, "step": 9960 }, { "epoch": 0.10652278433677012, "grad_norm": 24.044931411743164, "learning_rate": 9.997729517500097e-07, "loss": 0.1495, "step": 9970 }, { "epoch": 0.10662962765104973, "grad_norm": 9.59731388092041, "learning_rate": 9.99772445210617e-07, "loss": 0.1628, "step": 9980 }, { "epoch": 0.10673647096532934, "grad_norm": 9.718066215515137, "learning_rate": 9.997719381069432e-07, "loss": 0.2048, "step": 9990 }, { "epoch": 0.10684331427960896, "grad_norm": 8.582053184509277, "learning_rate": 9.997714304389893e-07, "loss": 0.1491, "step": 10000 }, { "epoch": 0.10695015759388857, "grad_norm": 8.702549934387207, "learning_rate": 9.997709222067556e-07, "loss": 0.1955, "step": 10010 }, { "epoch": 0.10705700090816817, "grad_norm": 3.613725185394287, "learning_rate": 9.99770413410243e-07, "loss": 0.1246, "step": 10020 }, { "epoch": 0.10716384422244778, "grad_norm": 8.749323844909668, "learning_rate": 9.997699040494517e-07, "loss": 0.115, "step": 10030 }, { "epoch": 0.10727068753672739, "grad_norm": 6.130801677703857, "learning_rate": 9.997693941243826e-07, "loss": 0.1493, "step": 10040 }, { "epoch": 0.107377530851007, "grad_norm": 13.965917587280273, "learning_rate": 9.997688836350359e-07, "loss": 0.1383, "step": 10050 }, { "epoch": 0.10748437416528661, "grad_norm": 32.327091217041016, "learning_rate": 9.997683725814127e-07, "loss": 0.1573, "step": 10060 }, { "epoch": 0.10759121747956622, "grad_norm": 6.686976432800293, "learning_rate": 9.997678609635131e-07, "loss": 0.2116, "step": 10070 }, { "epoch": 0.10769806079384582, "grad_norm": 4.064884185791016, "learning_rate": 9.99767348781338e-07, "loss": 0.1132, "step": 10080 }, { "epoch": 0.10780490410812543, "grad_norm": 11.656142234802246, "learning_rate": 9.99766836034888e-07, "loss": 0.1089, "step": 10090 }, { "epoch": 0.10791174742240504, "grad_norm": 16.80620574951172, "learning_rate": 9.997663227241632e-07, "loss": 0.1147, "step": 10100 }, { "epoch": 0.10801859073668466, "grad_norm": 7.74354887008667, "learning_rate": 9.997658088491647e-07, "loss": 0.0854, "step": 10110 }, { "epoch": 0.10812543405096427, "grad_norm": 12.753549575805664, "learning_rate": 9.997652944098926e-07, "loss": 0.1349, "step": 10120 }, { "epoch": 0.10823227736524386, "grad_norm": 19.242712020874023, "learning_rate": 9.997647794063481e-07, "loss": 0.1995, "step": 10130 }, { "epoch": 0.10833912067952348, "grad_norm": 4.394542694091797, "learning_rate": 9.997642638385313e-07, "loss": 0.148, "step": 10140 }, { "epoch": 0.10844596399380309, "grad_norm": 14.14993667602539, "learning_rate": 9.997637477064429e-07, "loss": 0.2114, "step": 10150 }, { "epoch": 0.1085528073080827, "grad_norm": 5.069561004638672, "learning_rate": 9.997632310100836e-07, "loss": 0.2632, "step": 10160 }, { "epoch": 0.10865965062236231, "grad_norm": 14.097416877746582, "learning_rate": 9.997627137494538e-07, "loss": 0.0575, "step": 10170 }, { "epoch": 0.10876649393664191, "grad_norm": 8.008431434631348, "learning_rate": 9.997621959245542e-07, "loss": 0.1038, "step": 10180 }, { "epoch": 0.10887333725092152, "grad_norm": 6.1442742347717285, "learning_rate": 9.997616775353853e-07, "loss": 0.1603, "step": 10190 }, { "epoch": 0.10898018056520113, "grad_norm": 2.11332631111145, "learning_rate": 9.99761158581948e-07, "loss": 0.1104, "step": 10200 }, { "epoch": 0.10908702387948074, "grad_norm": 9.111188888549805, "learning_rate": 9.997606390642424e-07, "loss": 0.1097, "step": 10210 }, { "epoch": 0.10919386719376035, "grad_norm": 2.974498748779297, "learning_rate": 9.997601189822693e-07, "loss": 0.1877, "step": 10220 }, { "epoch": 0.10930071050803995, "grad_norm": 13.304160118103027, "learning_rate": 9.997595983360293e-07, "loss": 0.1746, "step": 10230 }, { "epoch": 0.10940755382231956, "grad_norm": 8.770279884338379, "learning_rate": 9.997590771255231e-07, "loss": 0.1612, "step": 10240 }, { "epoch": 0.10951439713659918, "grad_norm": 6.020545959472656, "learning_rate": 9.997585553507513e-07, "loss": 0.0765, "step": 10250 }, { "epoch": 0.10962124045087879, "grad_norm": 5.6189727783203125, "learning_rate": 9.997580330117141e-07, "loss": 0.0848, "step": 10260 }, { "epoch": 0.1097280837651584, "grad_norm": 19.945772171020508, "learning_rate": 9.997575101084126e-07, "loss": 0.2534, "step": 10270 }, { "epoch": 0.10983492707943801, "grad_norm": 4.873698711395264, "learning_rate": 9.99756986640847e-07, "loss": 0.1751, "step": 10280 }, { "epoch": 0.10994177039371761, "grad_norm": 6.462708473205566, "learning_rate": 9.997564626090181e-07, "loss": 0.1428, "step": 10290 }, { "epoch": 0.11004861370799722, "grad_norm": 10.543448448181152, "learning_rate": 9.997559380129266e-07, "loss": 0.1796, "step": 10300 }, { "epoch": 0.11015545702227683, "grad_norm": 9.634420394897461, "learning_rate": 9.997554128525725e-07, "loss": 0.1638, "step": 10310 }, { "epoch": 0.11026230033655644, "grad_norm": 15.24234390258789, "learning_rate": 9.997548871279571e-07, "loss": 0.0962, "step": 10320 }, { "epoch": 0.11036914365083605, "grad_norm": 12.516305923461914, "learning_rate": 9.997543608390808e-07, "loss": 0.1242, "step": 10330 }, { "epoch": 0.11047598696511565, "grad_norm": 7.230502128601074, "learning_rate": 9.997538339859439e-07, "loss": 0.0807, "step": 10340 }, { "epoch": 0.11058283027939526, "grad_norm": 16.128650665283203, "learning_rate": 9.997533065685473e-07, "loss": 0.2077, "step": 10350 }, { "epoch": 0.11068967359367488, "grad_norm": 9.097505569458008, "learning_rate": 9.997527785868915e-07, "loss": 0.1999, "step": 10360 }, { "epoch": 0.11079651690795449, "grad_norm": 12.552800178527832, "learning_rate": 9.997522500409773e-07, "loss": 0.1436, "step": 10370 }, { "epoch": 0.1109033602222341, "grad_norm": 3.5522539615631104, "learning_rate": 9.997517209308046e-07, "loss": 0.0911, "step": 10380 }, { "epoch": 0.1110102035365137, "grad_norm": 10.825117111206055, "learning_rate": 9.997511912563749e-07, "loss": 0.1096, "step": 10390 }, { "epoch": 0.11111704685079331, "grad_norm": 5.011261463165283, "learning_rate": 9.997506610176882e-07, "loss": 0.2075, "step": 10400 }, { "epoch": 0.11122389016507292, "grad_norm": 5.362621307373047, "learning_rate": 9.997501302147456e-07, "loss": 0.1687, "step": 10410 }, { "epoch": 0.11133073347935253, "grad_norm": 10.706496238708496, "learning_rate": 9.997495988475472e-07, "loss": 0.1476, "step": 10420 }, { "epoch": 0.11143757679363214, "grad_norm": 1.5744067430496216, "learning_rate": 9.997490669160935e-07, "loss": 0.0702, "step": 10430 }, { "epoch": 0.11154442010791175, "grad_norm": 7.558877944946289, "learning_rate": 9.997485344203854e-07, "loss": 0.1893, "step": 10440 }, { "epoch": 0.11165126342219135, "grad_norm": 16.830350875854492, "learning_rate": 9.997480013604238e-07, "loss": 0.1605, "step": 10450 }, { "epoch": 0.11175810673647096, "grad_norm": 4.502820014953613, "learning_rate": 9.99747467736209e-07, "loss": 0.082, "step": 10460 }, { "epoch": 0.11186495005075058, "grad_norm": 5.050264835357666, "learning_rate": 9.997469335477415e-07, "loss": 0.1948, "step": 10470 }, { "epoch": 0.11197179336503019, "grad_norm": 16.660484313964844, "learning_rate": 9.997463987950218e-07, "loss": 0.1688, "step": 10480 }, { "epoch": 0.1120786366793098, "grad_norm": 9.38974380493164, "learning_rate": 9.99745863478051e-07, "loss": 0.2766, "step": 10490 }, { "epoch": 0.1121854799935894, "grad_norm": 15.961588859558105, "learning_rate": 9.997453275968293e-07, "loss": 0.1087, "step": 10500 }, { "epoch": 0.11229232330786901, "grad_norm": 9.011829376220703, "learning_rate": 9.997447911513573e-07, "loss": 0.1302, "step": 10510 }, { "epoch": 0.11239916662214862, "grad_norm": 4.602438449859619, "learning_rate": 9.99744254141636e-07, "loss": 0.0765, "step": 10520 }, { "epoch": 0.11250600993642823, "grad_norm": 8.056294441223145, "learning_rate": 9.997437165676653e-07, "loss": 0.123, "step": 10530 }, { "epoch": 0.11261285325070784, "grad_norm": 9.938076972961426, "learning_rate": 9.997431784294466e-07, "loss": 0.1821, "step": 10540 }, { "epoch": 0.11271969656498744, "grad_norm": 12.499938011169434, "learning_rate": 9.9974263972698e-07, "loss": 0.1447, "step": 10550 }, { "epoch": 0.11282653987926705, "grad_norm": 16.598939895629883, "learning_rate": 9.99742100460266e-07, "loss": 0.148, "step": 10560 }, { "epoch": 0.11293338319354666, "grad_norm": 4.441255569458008, "learning_rate": 9.997415606293058e-07, "loss": 0.0963, "step": 10570 }, { "epoch": 0.11304022650782627, "grad_norm": 15.034006118774414, "learning_rate": 9.997410202340994e-07, "loss": 0.1476, "step": 10580 }, { "epoch": 0.11314706982210589, "grad_norm": 3.608612060546875, "learning_rate": 9.997404792746478e-07, "loss": 0.1642, "step": 10590 }, { "epoch": 0.11325391313638548, "grad_norm": 1.8754066228866577, "learning_rate": 9.997399377509515e-07, "loss": 0.1119, "step": 10600 }, { "epoch": 0.1133607564506651, "grad_norm": 8.442082405090332, "learning_rate": 9.997393956630112e-07, "loss": 0.1269, "step": 10610 }, { "epoch": 0.11346759976494471, "grad_norm": 12.339032173156738, "learning_rate": 9.997388530108271e-07, "loss": 0.1336, "step": 10620 }, { "epoch": 0.11357444307922432, "grad_norm": 3.2359559535980225, "learning_rate": 9.997383097944003e-07, "loss": 0.2328, "step": 10630 }, { "epoch": 0.11368128639350393, "grad_norm": 4.000979900360107, "learning_rate": 9.997377660137312e-07, "loss": 0.0641, "step": 10640 }, { "epoch": 0.11378812970778354, "grad_norm": 22.23797035217285, "learning_rate": 9.997372216688206e-07, "loss": 0.1202, "step": 10650 }, { "epoch": 0.11389497302206314, "grad_norm": 2.981384038925171, "learning_rate": 9.997366767596687e-07, "loss": 0.1533, "step": 10660 }, { "epoch": 0.11400181633634275, "grad_norm": 5.500410079956055, "learning_rate": 9.997361312862766e-07, "loss": 0.1348, "step": 10670 }, { "epoch": 0.11410865965062236, "grad_norm": 13.635712623596191, "learning_rate": 9.997355852486446e-07, "loss": 0.149, "step": 10680 }, { "epoch": 0.11421550296490197, "grad_norm": 8.133596420288086, "learning_rate": 9.997350386467734e-07, "loss": 0.1116, "step": 10690 }, { "epoch": 0.11432234627918159, "grad_norm": 8.738600730895996, "learning_rate": 9.997344914806637e-07, "loss": 0.1192, "step": 10700 }, { "epoch": 0.11442918959346118, "grad_norm": 3.561344861984253, "learning_rate": 9.99733943750316e-07, "loss": 0.105, "step": 10710 }, { "epoch": 0.1145360329077408, "grad_norm": 23.19366455078125, "learning_rate": 9.997333954557307e-07, "loss": 0.2102, "step": 10720 }, { "epoch": 0.1146428762220204, "grad_norm": 4.903240203857422, "learning_rate": 9.99732846596909e-07, "loss": 0.1382, "step": 10730 }, { "epoch": 0.11474971953630002, "grad_norm": 2.4305694103240967, "learning_rate": 9.99732297173851e-07, "loss": 0.0514, "step": 10740 }, { "epoch": 0.11485656285057963, "grad_norm": 9.324113845825195, "learning_rate": 9.997317471865577e-07, "loss": 0.1689, "step": 10750 }, { "epoch": 0.11496340616485923, "grad_norm": 13.101240158081055, "learning_rate": 9.997311966350295e-07, "loss": 0.1244, "step": 10760 }, { "epoch": 0.11507024947913884, "grad_norm": 28.46270751953125, "learning_rate": 9.99730645519267e-07, "loss": 0.2528, "step": 10770 }, { "epoch": 0.11517709279341845, "grad_norm": 3.8714327812194824, "learning_rate": 9.997300938392709e-07, "loss": 0.1035, "step": 10780 }, { "epoch": 0.11528393610769806, "grad_norm": 4.91882848739624, "learning_rate": 9.99729541595042e-07, "loss": 0.1904, "step": 10790 }, { "epoch": 0.11539077942197767, "grad_norm": 20.41175079345703, "learning_rate": 9.997289887865805e-07, "loss": 0.1415, "step": 10800 }, { "epoch": 0.11549762273625729, "grad_norm": 3.631378173828125, "learning_rate": 9.997284354138873e-07, "loss": 0.099, "step": 10810 }, { "epoch": 0.11560446605053688, "grad_norm": 11.846348762512207, "learning_rate": 9.99727881476963e-07, "loss": 0.1444, "step": 10820 }, { "epoch": 0.1157113093648165, "grad_norm": 4.110328197479248, "learning_rate": 9.997273269758082e-07, "loss": 0.1501, "step": 10830 }, { "epoch": 0.1158181526790961, "grad_norm": 5.301319599151611, "learning_rate": 9.997267719104236e-07, "loss": 0.076, "step": 10840 }, { "epoch": 0.11592499599337572, "grad_norm": 10.63001537322998, "learning_rate": 9.997262162808096e-07, "loss": 0.1002, "step": 10850 }, { "epoch": 0.11603183930765533, "grad_norm": 10.600651741027832, "learning_rate": 9.99725660086967e-07, "loss": 0.1439, "step": 10860 }, { "epoch": 0.11613868262193493, "grad_norm": 14.939397811889648, "learning_rate": 9.997251033288966e-07, "loss": 0.1398, "step": 10870 }, { "epoch": 0.11624552593621454, "grad_norm": 7.493093967437744, "learning_rate": 9.997245460065986e-07, "loss": 0.0933, "step": 10880 }, { "epoch": 0.11635236925049415, "grad_norm": 6.150001525878906, "learning_rate": 9.99723988120074e-07, "loss": 0.095, "step": 10890 }, { "epoch": 0.11645921256477376, "grad_norm": 5.284640312194824, "learning_rate": 9.997234296693234e-07, "loss": 0.1466, "step": 10900 }, { "epoch": 0.11656605587905337, "grad_norm": 14.568331718444824, "learning_rate": 9.997228706543472e-07, "loss": 0.1367, "step": 10910 }, { "epoch": 0.11667289919333297, "grad_norm": 4.3186774253845215, "learning_rate": 9.997223110751463e-07, "loss": 0.1256, "step": 10920 }, { "epoch": 0.11677974250761258, "grad_norm": 5.616306781768799, "learning_rate": 9.99721750931721e-07, "loss": 0.0627, "step": 10930 }, { "epoch": 0.1168865858218922, "grad_norm": 9.068931579589844, "learning_rate": 9.997211902240723e-07, "loss": 0.116, "step": 10940 }, { "epoch": 0.1169934291361718, "grad_norm": 7.871899127960205, "learning_rate": 9.997206289522006e-07, "loss": 0.0824, "step": 10950 }, { "epoch": 0.11710027245045142, "grad_norm": 4.990908622741699, "learning_rate": 9.997200671161066e-07, "loss": 0.181, "step": 10960 }, { "epoch": 0.11720711576473102, "grad_norm": 1.0389387607574463, "learning_rate": 9.997195047157909e-07, "loss": 0.1275, "step": 10970 }, { "epoch": 0.11731395907901063, "grad_norm": 0.8515418767929077, "learning_rate": 9.997189417512542e-07, "loss": 0.1209, "step": 10980 }, { "epoch": 0.11742080239329024, "grad_norm": 10.54148006439209, "learning_rate": 9.99718378222497e-07, "loss": 0.071, "step": 10990 }, { "epoch": 0.11752764570756985, "grad_norm": 11.740126609802246, "learning_rate": 9.997178141295202e-07, "loss": 0.1075, "step": 11000 }, { "epoch": 0.11763448902184946, "grad_norm": 4.864722728729248, "learning_rate": 9.997172494723241e-07, "loss": 0.1834, "step": 11010 }, { "epoch": 0.11774133233612907, "grad_norm": 11.465394020080566, "learning_rate": 9.997166842509095e-07, "loss": 0.184, "step": 11020 }, { "epoch": 0.11784817565040867, "grad_norm": 6.03188419342041, "learning_rate": 9.99716118465277e-07, "loss": 0.2015, "step": 11030 }, { "epoch": 0.11795501896468828, "grad_norm": 7.358538627624512, "learning_rate": 9.997155521154277e-07, "loss": 0.1273, "step": 11040 }, { "epoch": 0.1180618622789679, "grad_norm": 8.832139015197754, "learning_rate": 9.997149852013615e-07, "loss": 0.174, "step": 11050 }, { "epoch": 0.1181687055932475, "grad_norm": 17.284833908081055, "learning_rate": 9.997144177230794e-07, "loss": 0.2069, "step": 11060 }, { "epoch": 0.11827554890752712, "grad_norm": 0.7853026986122131, "learning_rate": 9.99713849680582e-07, "loss": 0.1249, "step": 11070 }, { "epoch": 0.11838239222180671, "grad_norm": 6.25445032119751, "learning_rate": 9.997132810738699e-07, "loss": 0.1172, "step": 11080 }, { "epoch": 0.11848923553608633, "grad_norm": 5.325164794921875, "learning_rate": 9.997127119029439e-07, "loss": 0.0659, "step": 11090 }, { "epoch": 0.11859607885036594, "grad_norm": 25.321264266967773, "learning_rate": 9.997121421678046e-07, "loss": 0.2333, "step": 11100 }, { "epoch": 0.11870292216464555, "grad_norm": 9.210681915283203, "learning_rate": 9.997115718684524e-07, "loss": 0.0857, "step": 11110 }, { "epoch": 0.11880976547892516, "grad_norm": 13.043185234069824, "learning_rate": 9.997110010048881e-07, "loss": 0.203, "step": 11120 }, { "epoch": 0.11891660879320476, "grad_norm": 11.599796295166016, "learning_rate": 9.997104295771125e-07, "loss": 0.1731, "step": 11130 }, { "epoch": 0.11902345210748437, "grad_norm": 7.4014892578125, "learning_rate": 9.997098575851261e-07, "loss": 0.1842, "step": 11140 }, { "epoch": 0.11913029542176398, "grad_norm": 7.356665134429932, "learning_rate": 9.997092850289294e-07, "loss": 0.0974, "step": 11150 }, { "epoch": 0.1192371387360436, "grad_norm": 3.266385555267334, "learning_rate": 9.997087119085234e-07, "loss": 0.055, "step": 11160 }, { "epoch": 0.1193439820503232, "grad_norm": 25.571945190429688, "learning_rate": 9.997081382239085e-07, "loss": 0.1183, "step": 11170 }, { "epoch": 0.11945082536460282, "grad_norm": 10.906647682189941, "learning_rate": 9.997075639750854e-07, "loss": 0.0989, "step": 11180 }, { "epoch": 0.11955766867888241, "grad_norm": 1.0549551248550415, "learning_rate": 9.997069891620547e-07, "loss": 0.0569, "step": 11190 }, { "epoch": 0.11966451199316203, "grad_norm": 9.683361053466797, "learning_rate": 9.997064137848171e-07, "loss": 0.1955, "step": 11200 }, { "epoch": 0.11977135530744164, "grad_norm": 4.6353864669799805, "learning_rate": 9.997058378433734e-07, "loss": 0.1296, "step": 11210 }, { "epoch": 0.11987819862172125, "grad_norm": 4.2411041259765625, "learning_rate": 9.99705261337724e-07, "loss": 0.1298, "step": 11220 }, { "epoch": 0.11998504193600086, "grad_norm": 14.137490272521973, "learning_rate": 9.997046842678695e-07, "loss": 0.2298, "step": 11230 }, { "epoch": 0.12009188525028046, "grad_norm": 9.055830001831055, "learning_rate": 9.997041066338108e-07, "loss": 0.1252, "step": 11240 }, { "epoch": 0.12019872856456007, "grad_norm": 13.247469902038574, "learning_rate": 9.997035284355484e-07, "loss": 0.1706, "step": 11250 }, { "epoch": 0.12030557187883968, "grad_norm": 3.136047601699829, "learning_rate": 9.99702949673083e-07, "loss": 0.0928, "step": 11260 }, { "epoch": 0.1204124151931193, "grad_norm": 7.711578845977783, "learning_rate": 9.997023703464154e-07, "loss": 0.1195, "step": 11270 }, { "epoch": 0.1205192585073989, "grad_norm": 9.427225112915039, "learning_rate": 9.997017904555458e-07, "loss": 0.0913, "step": 11280 }, { "epoch": 0.1206261018216785, "grad_norm": 5.109034061431885, "learning_rate": 9.997012100004755e-07, "loss": 0.1945, "step": 11290 }, { "epoch": 0.12073294513595811, "grad_norm": 5.965717792510986, "learning_rate": 9.997006289812048e-07, "loss": 0.0664, "step": 11300 }, { "epoch": 0.12083978845023773, "grad_norm": 4.810303211212158, "learning_rate": 9.997000473977343e-07, "loss": 0.135, "step": 11310 }, { "epoch": 0.12094663176451734, "grad_norm": 2.307042360305786, "learning_rate": 9.996994652500645e-07, "loss": 0.1198, "step": 11320 }, { "epoch": 0.12105347507879695, "grad_norm": 10.309368133544922, "learning_rate": 9.996988825381966e-07, "loss": 0.1845, "step": 11330 }, { "epoch": 0.12116031839307655, "grad_norm": 12.220246315002441, "learning_rate": 9.996982992621309e-07, "loss": 0.1652, "step": 11340 }, { "epoch": 0.12126716170735616, "grad_norm": 13.202608108520508, "learning_rate": 9.99697715421868e-07, "loss": 0.1143, "step": 11350 }, { "epoch": 0.12137400502163577, "grad_norm": 7.286433696746826, "learning_rate": 9.996971310174087e-07, "loss": 0.0937, "step": 11360 }, { "epoch": 0.12148084833591538, "grad_norm": 11.468899726867676, "learning_rate": 9.996965460487536e-07, "loss": 0.1747, "step": 11370 }, { "epoch": 0.12158769165019499, "grad_norm": 2.682650327682495, "learning_rate": 9.996959605159035e-07, "loss": 0.1178, "step": 11380 }, { "epoch": 0.1216945349644746, "grad_norm": 10.975958824157715, "learning_rate": 9.996953744188587e-07, "loss": 0.1385, "step": 11390 }, { "epoch": 0.1218013782787542, "grad_norm": 12.243515968322754, "learning_rate": 9.996947877576203e-07, "loss": 0.1433, "step": 11400 }, { "epoch": 0.12190822159303381, "grad_norm": 6.170722007751465, "learning_rate": 9.99694200532189e-07, "loss": 0.0889, "step": 11410 }, { "epoch": 0.12201506490731343, "grad_norm": 8.542251586914062, "learning_rate": 9.996936127425648e-07, "loss": 0.1382, "step": 11420 }, { "epoch": 0.12212190822159304, "grad_norm": 3.981497049331665, "learning_rate": 9.99693024388749e-07, "loss": 0.1124, "step": 11430 }, { "epoch": 0.12222875153587265, "grad_norm": 3.3314363956451416, "learning_rate": 9.99692435470742e-07, "loss": 0.1188, "step": 11440 }, { "epoch": 0.12233559485015225, "grad_norm": 2.108536958694458, "learning_rate": 9.996918459885445e-07, "loss": 0.1317, "step": 11450 }, { "epoch": 0.12244243816443186, "grad_norm": 9.8397855758667, "learning_rate": 9.996912559421574e-07, "loss": 0.1387, "step": 11460 }, { "epoch": 0.12254928147871147, "grad_norm": 11.049248695373535, "learning_rate": 9.99690665331581e-07, "loss": 0.1466, "step": 11470 }, { "epoch": 0.12265612479299108, "grad_norm": 10.136812210083008, "learning_rate": 9.996900741568162e-07, "loss": 0.1812, "step": 11480 }, { "epoch": 0.12276296810727069, "grad_norm": 9.440664291381836, "learning_rate": 9.996894824178635e-07, "loss": 0.1347, "step": 11490 }, { "epoch": 0.12286981142155029, "grad_norm": 4.178328514099121, "learning_rate": 9.996888901147237e-07, "loss": 0.0973, "step": 11500 }, { "epoch": 0.1229766547358299, "grad_norm": 17.442174911499023, "learning_rate": 9.996882972473975e-07, "loss": 0.1602, "step": 11510 }, { "epoch": 0.12308349805010951, "grad_norm": 2.8130042552948, "learning_rate": 9.996877038158853e-07, "loss": 0.1766, "step": 11520 }, { "epoch": 0.12319034136438912, "grad_norm": 7.315703868865967, "learning_rate": 9.99687109820188e-07, "loss": 0.1695, "step": 11530 }, { "epoch": 0.12329718467866874, "grad_norm": 17.495182037353516, "learning_rate": 9.996865152603064e-07, "loss": 0.1809, "step": 11540 }, { "epoch": 0.12340402799294835, "grad_norm": 10.857769966125488, "learning_rate": 9.996859201362411e-07, "loss": 0.0881, "step": 11550 }, { "epoch": 0.12351087130722795, "grad_norm": 5.630815029144287, "learning_rate": 9.996853244479924e-07, "loss": 0.1596, "step": 11560 }, { "epoch": 0.12361771462150756, "grad_norm": 20.036300659179688, "learning_rate": 9.996847281955613e-07, "loss": 0.2139, "step": 11570 }, { "epoch": 0.12372455793578717, "grad_norm": 7.717648983001709, "learning_rate": 9.996841313789486e-07, "loss": 0.144, "step": 11580 }, { "epoch": 0.12383140125006678, "grad_norm": 3.548654317855835, "learning_rate": 9.996835339981549e-07, "loss": 0.2302, "step": 11590 }, { "epoch": 0.12393824456434639, "grad_norm": 5.018899917602539, "learning_rate": 9.996829360531804e-07, "loss": 0.1046, "step": 11600 }, { "epoch": 0.12404508787862599, "grad_norm": 5.560338020324707, "learning_rate": 9.996823375440265e-07, "loss": 0.0909, "step": 11610 }, { "epoch": 0.1241519311929056, "grad_norm": 9.446442604064941, "learning_rate": 9.996817384706932e-07, "loss": 0.098, "step": 11620 }, { "epoch": 0.12425877450718521, "grad_norm": 12.634520530700684, "learning_rate": 9.996811388331817e-07, "loss": 0.1061, "step": 11630 }, { "epoch": 0.12436561782146482, "grad_norm": 6.596890449523926, "learning_rate": 9.996805386314926e-07, "loss": 0.1203, "step": 11640 }, { "epoch": 0.12447246113574444, "grad_norm": 8.755389213562012, "learning_rate": 9.996799378656263e-07, "loss": 0.1507, "step": 11650 }, { "epoch": 0.12457930445002403, "grad_norm": 0.7776631712913513, "learning_rate": 9.996793365355835e-07, "loss": 0.1675, "step": 11660 }, { "epoch": 0.12468614776430365, "grad_norm": 10.56057071685791, "learning_rate": 9.996787346413654e-07, "loss": 0.2286, "step": 11670 }, { "epoch": 0.12479299107858326, "grad_norm": 13.335821151733398, "learning_rate": 9.996781321829718e-07, "loss": 0.1331, "step": 11680 }, { "epoch": 0.12489983439286287, "grad_norm": 5.079697608947754, "learning_rate": 9.996775291604042e-07, "loss": 0.0938, "step": 11690 }, { "epoch": 0.12500667770714247, "grad_norm": 5.333065986633301, "learning_rate": 9.99676925573663e-07, "loss": 0.1341, "step": 11700 }, { "epoch": 0.1251135210214221, "grad_norm": 9.510236740112305, "learning_rate": 9.996763214227488e-07, "loss": 0.1251, "step": 11710 }, { "epoch": 0.1252203643357017, "grad_norm": 2.8392553329467773, "learning_rate": 9.996757167076623e-07, "loss": 0.0765, "step": 11720 }, { "epoch": 0.12532720764998131, "grad_norm": 10.657398223876953, "learning_rate": 9.99675111428404e-07, "loss": 0.1103, "step": 11730 }, { "epoch": 0.1254340509642609, "grad_norm": 5.123448371887207, "learning_rate": 9.99674505584975e-07, "loss": 0.1077, "step": 11740 }, { "epoch": 0.1255408942785405, "grad_norm": 9.199493408203125, "learning_rate": 9.99673899177376e-07, "loss": 0.0704, "step": 11750 }, { "epoch": 0.12564773759282014, "grad_norm": 5.4650559425354, "learning_rate": 9.996732922056071e-07, "loss": 0.076, "step": 11760 }, { "epoch": 0.12575458090709973, "grad_norm": 8.774948120117188, "learning_rate": 9.996726846696693e-07, "loss": 0.1235, "step": 11770 }, { "epoch": 0.12586142422137936, "grad_norm": 13.33957290649414, "learning_rate": 9.996720765695634e-07, "loss": 0.0947, "step": 11780 }, { "epoch": 0.12596826753565896, "grad_norm": 9.582402229309082, "learning_rate": 9.9967146790529e-07, "loss": 0.1543, "step": 11790 }, { "epoch": 0.12607511084993855, "grad_norm": 9.3993558883667, "learning_rate": 9.996708586768502e-07, "loss": 0.1536, "step": 11800 }, { "epoch": 0.12618195416421818, "grad_norm": 2.600651741027832, "learning_rate": 9.996702488842437e-07, "loss": 0.1638, "step": 11810 }, { "epoch": 0.12628879747849778, "grad_norm": 8.523005485534668, "learning_rate": 9.99669638527472e-07, "loss": 0.1439, "step": 11820 }, { "epoch": 0.1263956407927774, "grad_norm": 13.569544792175293, "learning_rate": 9.996690276065356e-07, "loss": 0.1446, "step": 11830 }, { "epoch": 0.126502484107057, "grad_norm": 14.95146369934082, "learning_rate": 9.996684161214352e-07, "loss": 0.2375, "step": 11840 }, { "epoch": 0.1266093274213366, "grad_norm": 20.087806701660156, "learning_rate": 9.996678040721715e-07, "loss": 0.1773, "step": 11850 }, { "epoch": 0.12671617073561622, "grad_norm": 8.108590126037598, "learning_rate": 9.99667191458745e-07, "loss": 0.1099, "step": 11860 }, { "epoch": 0.12682301404989582, "grad_norm": 0.34820765256881714, "learning_rate": 9.996665782811567e-07, "loss": 0.1215, "step": 11870 }, { "epoch": 0.12692985736417545, "grad_norm": 6.114502429962158, "learning_rate": 9.996659645394068e-07, "loss": 0.0655, "step": 11880 }, { "epoch": 0.12703670067845504, "grad_norm": 12.396708488464355, "learning_rate": 9.996653502334967e-07, "loss": 0.1137, "step": 11890 }, { "epoch": 0.12714354399273464, "grad_norm": 5.847069263458252, "learning_rate": 9.996647353634265e-07, "loss": 0.1211, "step": 11900 }, { "epoch": 0.12725038730701427, "grad_norm": 12.014381408691406, "learning_rate": 9.99664119929197e-07, "loss": 0.1137, "step": 11910 }, { "epoch": 0.12735723062129387, "grad_norm": 13.114019393920898, "learning_rate": 9.99663503930809e-07, "loss": 0.1088, "step": 11920 }, { "epoch": 0.1274640739355735, "grad_norm": 5.887757301330566, "learning_rate": 9.996628873682633e-07, "loss": 0.0513, "step": 11930 }, { "epoch": 0.1275709172498531, "grad_norm": 7.3989973068237305, "learning_rate": 9.996622702415605e-07, "loss": 0.1141, "step": 11940 }, { "epoch": 0.1276777605641327, "grad_norm": 7.058403015136719, "learning_rate": 9.996616525507014e-07, "loss": 0.0766, "step": 11950 }, { "epoch": 0.1277846038784123, "grad_norm": 14.313182830810547, "learning_rate": 9.996610342956863e-07, "loss": 0.2601, "step": 11960 }, { "epoch": 0.1278914471926919, "grad_norm": 16.14702033996582, "learning_rate": 9.996604154765164e-07, "loss": 0.2971, "step": 11970 }, { "epoch": 0.12799829050697153, "grad_norm": 5.737916469573975, "learning_rate": 9.996597960931923e-07, "loss": 0.128, "step": 11980 }, { "epoch": 0.12810513382125113, "grad_norm": 6.432700157165527, "learning_rate": 9.996591761457143e-07, "loss": 0.1699, "step": 11990 }, { "epoch": 0.12821197713553076, "grad_norm": 9.655089378356934, "learning_rate": 9.996585556340836e-07, "loss": 0.2127, "step": 12000 }, { "epoch": 0.12831882044981036, "grad_norm": 1.7009398937225342, "learning_rate": 9.996579345583004e-07, "loss": 0.1298, "step": 12010 }, { "epoch": 0.12842566376408995, "grad_norm": 1.2946677207946777, "learning_rate": 9.99657312918366e-07, "loss": 0.1766, "step": 12020 }, { "epoch": 0.12853250707836958, "grad_norm": 18.323684692382812, "learning_rate": 9.996566907142804e-07, "loss": 0.1146, "step": 12030 }, { "epoch": 0.12863935039264918, "grad_norm": 8.01203441619873, "learning_rate": 9.99656067946045e-07, "loss": 0.1661, "step": 12040 }, { "epoch": 0.1287461937069288, "grad_norm": 4.748772621154785, "learning_rate": 9.996554446136602e-07, "loss": 0.1889, "step": 12050 }, { "epoch": 0.1288530370212084, "grad_norm": 5.003492832183838, "learning_rate": 9.996548207171264e-07, "loss": 0.1105, "step": 12060 }, { "epoch": 0.128959880335488, "grad_norm": 3.4436638355255127, "learning_rate": 9.99654196256445e-07, "loss": 0.2121, "step": 12070 }, { "epoch": 0.12906672364976762, "grad_norm": 2.713142156600952, "learning_rate": 9.99653571231616e-07, "loss": 0.1686, "step": 12080 }, { "epoch": 0.12917356696404722, "grad_norm": 9.03262996673584, "learning_rate": 9.996529456426405e-07, "loss": 0.105, "step": 12090 }, { "epoch": 0.12928041027832685, "grad_norm": 3.408414840698242, "learning_rate": 9.99652319489519e-07, "loss": 0.1213, "step": 12100 }, { "epoch": 0.12938725359260644, "grad_norm": 8.070503234863281, "learning_rate": 9.996516927722525e-07, "loss": 0.0707, "step": 12110 }, { "epoch": 0.12949409690688604, "grad_norm": 1.6882319450378418, "learning_rate": 9.996510654908414e-07, "loss": 0.171, "step": 12120 }, { "epoch": 0.12960094022116567, "grad_norm": 4.044895648956299, "learning_rate": 9.996504376452867e-07, "loss": 0.1292, "step": 12130 }, { "epoch": 0.12970778353544526, "grad_norm": 0.9079118967056274, "learning_rate": 9.996498092355887e-07, "loss": 0.1313, "step": 12140 }, { "epoch": 0.1298146268497249, "grad_norm": 10.375832557678223, "learning_rate": 9.996491802617487e-07, "loss": 0.1494, "step": 12150 }, { "epoch": 0.1299214701640045, "grad_norm": 17.87332534790039, "learning_rate": 9.996485507237669e-07, "loss": 0.2582, "step": 12160 }, { "epoch": 0.13002831347828409, "grad_norm": 20.943754196166992, "learning_rate": 9.99647920621644e-07, "loss": 0.2344, "step": 12170 }, { "epoch": 0.1301351567925637, "grad_norm": 6.6375603675842285, "learning_rate": 9.99647289955381e-07, "loss": 0.0932, "step": 12180 }, { "epoch": 0.1302420001068433, "grad_norm": 9.697604179382324, "learning_rate": 9.996466587249787e-07, "loss": 0.1219, "step": 12190 }, { "epoch": 0.13034884342112293, "grad_norm": 11.642143249511719, "learning_rate": 9.996460269304373e-07, "loss": 0.16, "step": 12200 }, { "epoch": 0.13045568673540253, "grad_norm": 12.831646919250488, "learning_rate": 9.99645394571758e-07, "loss": 0.0704, "step": 12210 }, { "epoch": 0.13056253004968213, "grad_norm": 4.536239147186279, "learning_rate": 9.996447616489414e-07, "loss": 0.1021, "step": 12220 }, { "epoch": 0.13066937336396175, "grad_norm": 9.069478034973145, "learning_rate": 9.99644128161988e-07, "loss": 0.0801, "step": 12230 }, { "epoch": 0.13077621667824135, "grad_norm": 10.131321907043457, "learning_rate": 9.996434941108986e-07, "loss": 0.1189, "step": 12240 }, { "epoch": 0.13088305999252098, "grad_norm": 11.701071739196777, "learning_rate": 9.996428594956742e-07, "loss": 0.1046, "step": 12250 }, { "epoch": 0.13098990330680058, "grad_norm": 20.082704544067383, "learning_rate": 9.996422243163151e-07, "loss": 0.1815, "step": 12260 }, { "epoch": 0.13109674662108017, "grad_norm": 11.036195755004883, "learning_rate": 9.996415885728223e-07, "loss": 0.1685, "step": 12270 }, { "epoch": 0.1312035899353598, "grad_norm": 6.296863555908203, "learning_rate": 9.996409522651965e-07, "loss": 0.1773, "step": 12280 }, { "epoch": 0.1313104332496394, "grad_norm": 7.733234405517578, "learning_rate": 9.996403153934385e-07, "loss": 0.1238, "step": 12290 }, { "epoch": 0.13141727656391902, "grad_norm": 8.078277587890625, "learning_rate": 9.996396779575487e-07, "loss": 0.1191, "step": 12300 }, { "epoch": 0.13152411987819862, "grad_norm": 20.97793197631836, "learning_rate": 9.99639039957528e-07, "loss": 0.274, "step": 12310 }, { "epoch": 0.13163096319247822, "grad_norm": 3.666635036468506, "learning_rate": 9.99638401393377e-07, "loss": 0.1235, "step": 12320 }, { "epoch": 0.13173780650675784, "grad_norm": 4.295056343078613, "learning_rate": 9.996377622650966e-07, "loss": 0.1411, "step": 12330 }, { "epoch": 0.13184464982103744, "grad_norm": 11.798577308654785, "learning_rate": 9.996371225726876e-07, "loss": 0.1759, "step": 12340 }, { "epoch": 0.13195149313531707, "grad_norm": 6.17579984664917, "learning_rate": 9.996364823161505e-07, "loss": 0.1447, "step": 12350 }, { "epoch": 0.13205833644959666, "grad_norm": 1.2698882818222046, "learning_rate": 9.996358414954861e-07, "loss": 0.1777, "step": 12360 }, { "epoch": 0.1321651797638763, "grad_norm": 7.1720099449157715, "learning_rate": 9.99635200110695e-07, "loss": 0.106, "step": 12370 }, { "epoch": 0.1322720230781559, "grad_norm": 20.403644561767578, "learning_rate": 9.996345581617781e-07, "loss": 0.1494, "step": 12380 }, { "epoch": 0.13237886639243548, "grad_norm": 10.230523109436035, "learning_rate": 9.996339156487362e-07, "loss": 0.0752, "step": 12390 }, { "epoch": 0.1324857097067151, "grad_norm": 7.153466701507568, "learning_rate": 9.9963327257157e-07, "loss": 0.1395, "step": 12400 }, { "epoch": 0.1325925530209947, "grad_norm": 12.467714309692383, "learning_rate": 9.996326289302799e-07, "loss": 0.1049, "step": 12410 }, { "epoch": 0.13269939633527433, "grad_norm": 4.302323818206787, "learning_rate": 9.996319847248668e-07, "loss": 0.2123, "step": 12420 }, { "epoch": 0.13280623964955393, "grad_norm": 2.708528995513916, "learning_rate": 9.996313399553316e-07, "loss": 0.1337, "step": 12430 }, { "epoch": 0.13291308296383353, "grad_norm": 16.000261306762695, "learning_rate": 9.996306946216748e-07, "loss": 0.1203, "step": 12440 }, { "epoch": 0.13301992627811315, "grad_norm": 4.368537902832031, "learning_rate": 9.996300487238975e-07, "loss": 0.0813, "step": 12450 }, { "epoch": 0.13312676959239275, "grad_norm": 20.87213134765625, "learning_rate": 9.99629402262e-07, "loss": 0.107, "step": 12460 }, { "epoch": 0.13323361290667238, "grad_norm": 17.463470458984375, "learning_rate": 9.996287552359833e-07, "loss": 0.1383, "step": 12470 }, { "epoch": 0.13334045622095198, "grad_norm": 8.188814163208008, "learning_rate": 9.996281076458477e-07, "loss": 0.1719, "step": 12480 }, { "epoch": 0.13344729953523157, "grad_norm": 13.396835327148438, "learning_rate": 9.996274594915945e-07, "loss": 0.0944, "step": 12490 }, { "epoch": 0.1335541428495112, "grad_norm": 15.024794578552246, "learning_rate": 9.996268107732244e-07, "loss": 0.2468, "step": 12500 }, { "epoch": 0.1336609861637908, "grad_norm": 8.403534889221191, "learning_rate": 9.996261614907375e-07, "loss": 0.106, "step": 12510 }, { "epoch": 0.13376782947807042, "grad_norm": 2.470710515975952, "learning_rate": 9.996255116441351e-07, "loss": 0.0736, "step": 12520 }, { "epoch": 0.13387467279235002, "grad_norm": 8.826809883117676, "learning_rate": 9.99624861233418e-07, "loss": 0.1617, "step": 12530 }, { "epoch": 0.13398151610662962, "grad_norm": 22.054380416870117, "learning_rate": 9.996242102585865e-07, "loss": 0.1593, "step": 12540 }, { "epoch": 0.13408835942090924, "grad_norm": 8.369638442993164, "learning_rate": 9.996235587196416e-07, "loss": 0.0968, "step": 12550 }, { "epoch": 0.13419520273518884, "grad_norm": 9.42421817779541, "learning_rate": 9.99622906616584e-07, "loss": 0.0992, "step": 12560 }, { "epoch": 0.13430204604946847, "grad_norm": 0.8671594858169556, "learning_rate": 9.996222539494145e-07, "loss": 0.1134, "step": 12570 }, { "epoch": 0.13440888936374806, "grad_norm": 10.4993257522583, "learning_rate": 9.996216007181337e-07, "loss": 0.1193, "step": 12580 }, { "epoch": 0.13451573267802766, "grad_norm": 10.046136856079102, "learning_rate": 9.996209469227425e-07, "loss": 0.1751, "step": 12590 }, { "epoch": 0.1346225759923073, "grad_norm": 7.408071041107178, "learning_rate": 9.996202925632414e-07, "loss": 0.1071, "step": 12600 }, { "epoch": 0.13472941930658688, "grad_norm": 9.057212829589844, "learning_rate": 9.996196376396313e-07, "loss": 0.108, "step": 12610 }, { "epoch": 0.1348362626208665, "grad_norm": 9.436811447143555, "learning_rate": 9.99618982151913e-07, "loss": 0.1265, "step": 12620 }, { "epoch": 0.1349431059351461, "grad_norm": 9.376510620117188, "learning_rate": 9.99618326100087e-07, "loss": 0.2032, "step": 12630 }, { "epoch": 0.1350499492494257, "grad_norm": 16.324230194091797, "learning_rate": 9.996176694841545e-07, "loss": 0.2056, "step": 12640 }, { "epoch": 0.13515679256370533, "grad_norm": 7.604057312011719, "learning_rate": 9.996170123041158e-07, "loss": 0.1085, "step": 12650 }, { "epoch": 0.13526363587798493, "grad_norm": 4.603692531585693, "learning_rate": 9.996163545599715e-07, "loss": 0.0694, "step": 12660 }, { "epoch": 0.13537047919226455, "grad_norm": 4.776761531829834, "learning_rate": 9.99615696251723e-07, "loss": 0.1038, "step": 12670 }, { "epoch": 0.13547732250654415, "grad_norm": 3.1776061058044434, "learning_rate": 9.996150373793705e-07, "loss": 0.0917, "step": 12680 }, { "epoch": 0.13558416582082375, "grad_norm": 3.0435631275177, "learning_rate": 9.99614377942915e-07, "loss": 0.1161, "step": 12690 }, { "epoch": 0.13569100913510337, "grad_norm": 6.070560932159424, "learning_rate": 9.996137179423572e-07, "loss": 0.0883, "step": 12700 }, { "epoch": 0.13579785244938297, "grad_norm": 7.319560527801514, "learning_rate": 9.996130573776977e-07, "loss": 0.2132, "step": 12710 }, { "epoch": 0.1359046957636626, "grad_norm": 3.4685213565826416, "learning_rate": 9.996123962489375e-07, "loss": 0.0434, "step": 12720 }, { "epoch": 0.1360115390779422, "grad_norm": 5.308444499969482, "learning_rate": 9.99611734556077e-07, "loss": 0.0904, "step": 12730 }, { "epoch": 0.13611838239222182, "grad_norm": 0.6323571801185608, "learning_rate": 9.996110722991173e-07, "loss": 0.1274, "step": 12740 }, { "epoch": 0.13622522570650142, "grad_norm": 13.217618942260742, "learning_rate": 9.99610409478059e-07, "loss": 0.1259, "step": 12750 }, { "epoch": 0.13633206902078102, "grad_norm": 11.471963882446289, "learning_rate": 9.996097460929027e-07, "loss": 0.0732, "step": 12760 }, { "epoch": 0.13643891233506064, "grad_norm": 9.019196510314941, "learning_rate": 9.996090821436492e-07, "loss": 0.2398, "step": 12770 }, { "epoch": 0.13654575564934024, "grad_norm": 6.657240390777588, "learning_rate": 9.996084176302995e-07, "loss": 0.1101, "step": 12780 }, { "epoch": 0.13665259896361986, "grad_norm": 25.78663444519043, "learning_rate": 9.996077525528543e-07, "loss": 0.2435, "step": 12790 }, { "epoch": 0.13675944227789946, "grad_norm": 3.46791934967041, "learning_rate": 9.99607086911314e-07, "loss": 0.1238, "step": 12800 }, { "epoch": 0.13686628559217906, "grad_norm": 14.896232604980469, "learning_rate": 9.996064207056798e-07, "loss": 0.0939, "step": 12810 }, { "epoch": 0.13697312890645869, "grad_norm": 3.930396795272827, "learning_rate": 9.99605753935952e-07, "loss": 0.0786, "step": 12820 }, { "epoch": 0.13707997222073828, "grad_norm": 27.251291275024414, "learning_rate": 9.99605086602132e-07, "loss": 0.1304, "step": 12830 }, { "epoch": 0.1371868155350179, "grad_norm": 2.4839041233062744, "learning_rate": 9.996044187042197e-07, "loss": 0.1435, "step": 12840 }, { "epoch": 0.1372936588492975, "grad_norm": 9.470680236816406, "learning_rate": 9.996037502422165e-07, "loss": 0.1079, "step": 12850 }, { "epoch": 0.1374005021635771, "grad_norm": 2.306366443634033, "learning_rate": 9.99603081216123e-07, "loss": 0.0671, "step": 12860 }, { "epoch": 0.13750734547785673, "grad_norm": 10.48354434967041, "learning_rate": 9.996024116259399e-07, "loss": 0.0787, "step": 12870 }, { "epoch": 0.13761418879213633, "grad_norm": 3.6046390533447266, "learning_rate": 9.99601741471668e-07, "loss": 0.1012, "step": 12880 }, { "epoch": 0.13772103210641595, "grad_norm": 12.445320129394531, "learning_rate": 9.996010707533081e-07, "loss": 0.173, "step": 12890 }, { "epoch": 0.13782787542069555, "grad_norm": 8.056326866149902, "learning_rate": 9.996003994708605e-07, "loss": 0.1509, "step": 12900 }, { "epoch": 0.13793471873497515, "grad_norm": 2.003675937652588, "learning_rate": 9.995997276243267e-07, "loss": 0.083, "step": 12910 }, { "epoch": 0.13804156204925477, "grad_norm": 2.1573314666748047, "learning_rate": 9.99599055213707e-07, "loss": 0.074, "step": 12920 }, { "epoch": 0.13814840536353437, "grad_norm": 0.412472665309906, "learning_rate": 9.995983822390022e-07, "loss": 0.1526, "step": 12930 }, { "epoch": 0.138255248677814, "grad_norm": 9.611077308654785, "learning_rate": 9.995977087002132e-07, "loss": 0.1068, "step": 12940 }, { "epoch": 0.1383620919920936, "grad_norm": 10.222915649414062, "learning_rate": 9.995970345973407e-07, "loss": 0.2278, "step": 12950 }, { "epoch": 0.1384689353063732, "grad_norm": 3.709597110748291, "learning_rate": 9.995963599303854e-07, "loss": 0.1441, "step": 12960 }, { "epoch": 0.13857577862065282, "grad_norm": 2.841869354248047, "learning_rate": 9.99595684699348e-07, "loss": 0.0643, "step": 12970 }, { "epoch": 0.13868262193493242, "grad_norm": 6.013016700744629, "learning_rate": 9.995950089042295e-07, "loss": 0.1271, "step": 12980 }, { "epoch": 0.13878946524921204, "grad_norm": 6.876600742340088, "learning_rate": 9.995943325450307e-07, "loss": 0.1603, "step": 12990 }, { "epoch": 0.13889630856349164, "grad_norm": 2.9844701290130615, "learning_rate": 9.99593655621752e-07, "loss": 0.1279, "step": 13000 }, { "epoch": 0.13900315187777124, "grad_norm": 7.099213123321533, "learning_rate": 9.995929781343943e-07, "loss": 0.163, "step": 13010 }, { "epoch": 0.13910999519205086, "grad_norm": 6.455996513366699, "learning_rate": 9.995923000829586e-07, "loss": 0.0936, "step": 13020 }, { "epoch": 0.13921683850633046, "grad_norm": 1.4071123600006104, "learning_rate": 9.995916214674452e-07, "loss": 0.1148, "step": 13030 }, { "epoch": 0.13932368182061008, "grad_norm": 7.411684989929199, "learning_rate": 9.995909422878554e-07, "loss": 0.1278, "step": 13040 }, { "epoch": 0.13943052513488968, "grad_norm": 7.395327568054199, "learning_rate": 9.995902625441896e-07, "loss": 0.1506, "step": 13050 }, { "epoch": 0.13953736844916928, "grad_norm": 9.20186996459961, "learning_rate": 9.995895822364487e-07, "loss": 0.0672, "step": 13060 }, { "epoch": 0.1396442117634489, "grad_norm": 9.341349601745605, "learning_rate": 9.995889013646335e-07, "loss": 0.1133, "step": 13070 }, { "epoch": 0.1397510550777285, "grad_norm": 11.66809368133545, "learning_rate": 9.995882199287446e-07, "loss": 0.1529, "step": 13080 }, { "epoch": 0.13985789839200813, "grad_norm": 15.39354133605957, "learning_rate": 9.99587537928783e-07, "loss": 0.2138, "step": 13090 }, { "epoch": 0.13996474170628773, "grad_norm": 11.967482566833496, "learning_rate": 9.995868553647493e-07, "loss": 0.1001, "step": 13100 }, { "epoch": 0.14007158502056735, "grad_norm": 3.352689027786255, "learning_rate": 9.995861722366444e-07, "loss": 0.0889, "step": 13110 }, { "epoch": 0.14017842833484695, "grad_norm": 6.546017169952393, "learning_rate": 9.995854885444692e-07, "loss": 0.0865, "step": 13120 }, { "epoch": 0.14028527164912655, "grad_norm": 29.71421241760254, "learning_rate": 9.99584804288224e-07, "loss": 0.1602, "step": 13130 }, { "epoch": 0.14039211496340617, "grad_norm": 5.214634418487549, "learning_rate": 9.9958411946791e-07, "loss": 0.1475, "step": 13140 }, { "epoch": 0.14049895827768577, "grad_norm": 14.007558822631836, "learning_rate": 9.995834340835276e-07, "loss": 0.1699, "step": 13150 }, { "epoch": 0.1406058015919654, "grad_norm": 10.27359676361084, "learning_rate": 9.99582748135078e-07, "loss": 0.1412, "step": 13160 }, { "epoch": 0.140712644906245, "grad_norm": 12.822403907775879, "learning_rate": 9.995820616225618e-07, "loss": 0.1971, "step": 13170 }, { "epoch": 0.1408194882205246, "grad_norm": 12.451199531555176, "learning_rate": 9.995813745459796e-07, "loss": 0.2448, "step": 13180 }, { "epoch": 0.14092633153480422, "grad_norm": 3.8239853382110596, "learning_rate": 9.995806869053325e-07, "loss": 0.1082, "step": 13190 }, { "epoch": 0.14103317484908381, "grad_norm": 4.859539031982422, "learning_rate": 9.99579998700621e-07, "loss": 0.0585, "step": 13200 }, { "epoch": 0.14114001816336344, "grad_norm": 3.625699520111084, "learning_rate": 9.995793099318459e-07, "loss": 0.1452, "step": 13210 }, { "epoch": 0.14124686147764304, "grad_norm": 15.265073776245117, "learning_rate": 9.995786205990082e-07, "loss": 0.1385, "step": 13220 }, { "epoch": 0.14135370479192264, "grad_norm": 7.731176853179932, "learning_rate": 9.995779307021085e-07, "loss": 0.092, "step": 13230 }, { "epoch": 0.14146054810620226, "grad_norm": 0.42984187602996826, "learning_rate": 9.995772402411475e-07, "loss": 0.1251, "step": 13240 }, { "epoch": 0.14156739142048186, "grad_norm": 10.105648040771484, "learning_rate": 9.99576549216126e-07, "loss": 0.098, "step": 13250 }, { "epoch": 0.14167423473476148, "grad_norm": 6.520411014556885, "learning_rate": 9.995758576270453e-07, "loss": 0.179, "step": 13260 }, { "epoch": 0.14178107804904108, "grad_norm": 13.114969253540039, "learning_rate": 9.995751654739053e-07, "loss": 0.1266, "step": 13270 }, { "epoch": 0.14188792136332068, "grad_norm": 13.772871017456055, "learning_rate": 9.995744727567075e-07, "loss": 0.1737, "step": 13280 }, { "epoch": 0.1419947646776003, "grad_norm": 7.256973743438721, "learning_rate": 9.995737794754523e-07, "loss": 0.0807, "step": 13290 }, { "epoch": 0.1421016079918799, "grad_norm": 10.44771957397461, "learning_rate": 9.995730856301406e-07, "loss": 0.1588, "step": 13300 }, { "epoch": 0.14220845130615953, "grad_norm": 3.57045316696167, "learning_rate": 9.995723912207732e-07, "loss": 0.1417, "step": 13310 }, { "epoch": 0.14231529462043913, "grad_norm": 5.3675618171691895, "learning_rate": 9.99571696247351e-07, "loss": 0.0805, "step": 13320 }, { "epoch": 0.14242213793471872, "grad_norm": 0.8890939950942993, "learning_rate": 9.995710007098744e-07, "loss": 0.0879, "step": 13330 }, { "epoch": 0.14252898124899835, "grad_norm": 8.65937328338623, "learning_rate": 9.995703046083446e-07, "loss": 0.1035, "step": 13340 }, { "epoch": 0.14263582456327795, "grad_norm": 7.414252758026123, "learning_rate": 9.995696079427622e-07, "loss": 0.0805, "step": 13350 }, { "epoch": 0.14274266787755757, "grad_norm": 5.264216899871826, "learning_rate": 9.99568910713128e-07, "loss": 0.1132, "step": 13360 }, { "epoch": 0.14284951119183717, "grad_norm": 3.807345151901245, "learning_rate": 9.995682129194426e-07, "loss": 0.055, "step": 13370 }, { "epoch": 0.14295635450611677, "grad_norm": 10.994006156921387, "learning_rate": 9.995675145617074e-07, "loss": 0.1327, "step": 13380 }, { "epoch": 0.1430631978203964, "grad_norm": 20.663490295410156, "learning_rate": 9.995668156399225e-07, "loss": 0.2713, "step": 13390 }, { "epoch": 0.143170041134676, "grad_norm": 4.836803913116455, "learning_rate": 9.99566116154089e-07, "loss": 0.1161, "step": 13400 }, { "epoch": 0.14327688444895562, "grad_norm": 14.760605812072754, "learning_rate": 9.995654161042078e-07, "loss": 0.1398, "step": 13410 }, { "epoch": 0.1433837277632352, "grad_norm": 12.334403991699219, "learning_rate": 9.995647154902793e-07, "loss": 0.0975, "step": 13420 }, { "epoch": 0.1434905710775148, "grad_norm": 3.8642938137054443, "learning_rate": 9.995640143123045e-07, "loss": 0.0671, "step": 13430 }, { "epoch": 0.14359741439179444, "grad_norm": 2.861056089401245, "learning_rate": 9.995633125702844e-07, "loss": 0.118, "step": 13440 }, { "epoch": 0.14370425770607403, "grad_norm": 6.501709938049316, "learning_rate": 9.995626102642197e-07, "loss": 0.0944, "step": 13450 }, { "epoch": 0.14381110102035366, "grad_norm": 8.395400047302246, "learning_rate": 9.99561907394111e-07, "loss": 0.0918, "step": 13460 }, { "epoch": 0.14391794433463326, "grad_norm": 4.966168403625488, "learning_rate": 9.99561203959959e-07, "loss": 0.0753, "step": 13470 }, { "epoch": 0.14402478764891288, "grad_norm": 4.3693132400512695, "learning_rate": 9.995604999617651e-07, "loss": 0.1394, "step": 13480 }, { "epoch": 0.14413163096319248, "grad_norm": 11.928872108459473, "learning_rate": 9.995597953995294e-07, "loss": 0.0894, "step": 13490 }, { "epoch": 0.14423847427747208, "grad_norm": 9.468280792236328, "learning_rate": 9.99559090273253e-07, "loss": 0.1018, "step": 13500 }, { "epoch": 0.1443453175917517, "grad_norm": 12.265036582946777, "learning_rate": 9.995583845829367e-07, "loss": 0.0983, "step": 13510 }, { "epoch": 0.1444521609060313, "grad_norm": 14.584814071655273, "learning_rate": 9.995576783285814e-07, "loss": 0.0771, "step": 13520 }, { "epoch": 0.14455900422031093, "grad_norm": 11.509416580200195, "learning_rate": 9.995569715101876e-07, "loss": 0.1335, "step": 13530 }, { "epoch": 0.14466584753459052, "grad_norm": 1.913245439529419, "learning_rate": 9.995562641277566e-07, "loss": 0.1561, "step": 13540 }, { "epoch": 0.14477269084887012, "grad_norm": 1.7887523174285889, "learning_rate": 9.995555561812884e-07, "loss": 0.0814, "step": 13550 }, { "epoch": 0.14487953416314975, "grad_norm": 3.9557945728302, "learning_rate": 9.995548476707846e-07, "loss": 0.0587, "step": 13560 }, { "epoch": 0.14498637747742935, "grad_norm": 9.388169288635254, "learning_rate": 9.995541385962456e-07, "loss": 0.1449, "step": 13570 }, { "epoch": 0.14509322079170897, "grad_norm": 2.919395923614502, "learning_rate": 9.995534289576724e-07, "loss": 0.1007, "step": 13580 }, { "epoch": 0.14520006410598857, "grad_norm": 10.15235424041748, "learning_rate": 9.995527187550654e-07, "loss": 0.0902, "step": 13590 }, { "epoch": 0.14530690742026817, "grad_norm": 5.403456211090088, "learning_rate": 9.995520079884258e-07, "loss": 0.0949, "step": 13600 }, { "epoch": 0.1454137507345478, "grad_norm": 5.34095573425293, "learning_rate": 9.995512966577544e-07, "loss": 0.0648, "step": 13610 }, { "epoch": 0.1455205940488274, "grad_norm": 7.8343119621276855, "learning_rate": 9.995505847630518e-07, "loss": 0.179, "step": 13620 }, { "epoch": 0.14562743736310702, "grad_norm": 5.478022575378418, "learning_rate": 9.99549872304319e-07, "loss": 0.1209, "step": 13630 }, { "epoch": 0.1457342806773866, "grad_norm": 6.294150352478027, "learning_rate": 9.995491592815566e-07, "loss": 0.0935, "step": 13640 }, { "epoch": 0.1458411239916662, "grad_norm": 3.6226913928985596, "learning_rate": 9.995484456947654e-07, "loss": 0.0694, "step": 13650 }, { "epoch": 0.14594796730594584, "grad_norm": 10.074156761169434, "learning_rate": 9.995477315439463e-07, "loss": 0.2538, "step": 13660 }, { "epoch": 0.14605481062022543, "grad_norm": 3.251246929168701, "learning_rate": 9.995470168291002e-07, "loss": 0.0698, "step": 13670 }, { "epoch": 0.14616165393450506, "grad_norm": 9.53766918182373, "learning_rate": 9.995463015502278e-07, "loss": 0.0455, "step": 13680 }, { "epoch": 0.14626849724878466, "grad_norm": 18.35679054260254, "learning_rate": 9.9954558570733e-07, "loss": 0.1636, "step": 13690 }, { "epoch": 0.14637534056306425, "grad_norm": 11.617161750793457, "learning_rate": 9.995448693004074e-07, "loss": 0.1275, "step": 13700 }, { "epoch": 0.14648218387734388, "grad_norm": 12.644540786743164, "learning_rate": 9.99544152329461e-07, "loss": 0.0715, "step": 13710 }, { "epoch": 0.14658902719162348, "grad_norm": 23.646867752075195, "learning_rate": 9.995434347944916e-07, "loss": 0.1878, "step": 13720 }, { "epoch": 0.1466958705059031, "grad_norm": 6.3758368492126465, "learning_rate": 9.995427166954997e-07, "loss": 0.0848, "step": 13730 }, { "epoch": 0.1468027138201827, "grad_norm": 9.914971351623535, "learning_rate": 9.995419980324867e-07, "loss": 0.0953, "step": 13740 }, { "epoch": 0.1469095571344623, "grad_norm": 1.0172020196914673, "learning_rate": 9.995412788054529e-07, "loss": 0.1877, "step": 13750 }, { "epoch": 0.14701640044874192, "grad_norm": 13.476710319519043, "learning_rate": 9.995405590143993e-07, "loss": 0.1009, "step": 13760 }, { "epoch": 0.14712324376302152, "grad_norm": 0.6987784504890442, "learning_rate": 9.995398386593269e-07, "loss": 0.0774, "step": 13770 }, { "epoch": 0.14723008707730115, "grad_norm": 4.215651988983154, "learning_rate": 9.995391177402361e-07, "loss": 0.0882, "step": 13780 }, { "epoch": 0.14733693039158074, "grad_norm": 13.022830963134766, "learning_rate": 9.99538396257128e-07, "loss": 0.0892, "step": 13790 }, { "epoch": 0.14744377370586034, "grad_norm": 10.24752426147461, "learning_rate": 9.995376742100034e-07, "loss": 0.0921, "step": 13800 }, { "epoch": 0.14755061702013997, "grad_norm": 10.898420333862305, "learning_rate": 9.99536951598863e-07, "loss": 0.0663, "step": 13810 }, { "epoch": 0.14765746033441957, "grad_norm": 7.916815280914307, "learning_rate": 9.995362284237077e-07, "loss": 0.195, "step": 13820 }, { "epoch": 0.1477643036486992, "grad_norm": 19.91455841064453, "learning_rate": 9.99535504684538e-07, "loss": 0.1193, "step": 13830 }, { "epoch": 0.1478711469629788, "grad_norm": 14.613158226013184, "learning_rate": 9.995347803813557e-07, "loss": 0.0652, "step": 13840 }, { "epoch": 0.14797799027725841, "grad_norm": 39.28537368774414, "learning_rate": 9.995340555141605e-07, "loss": 0.1721, "step": 13850 }, { "epoch": 0.148084833591538, "grad_norm": 4.944307804107666, "learning_rate": 9.995333300829537e-07, "loss": 0.1108, "step": 13860 }, { "epoch": 0.1481916769058176, "grad_norm": 4.696804523468018, "learning_rate": 9.99532604087736e-07, "loss": 0.1226, "step": 13870 }, { "epoch": 0.14829852022009724, "grad_norm": 3.007112741470337, "learning_rate": 9.995318775285084e-07, "loss": 0.0977, "step": 13880 }, { "epoch": 0.14840536353437683, "grad_norm": 9.817365646362305, "learning_rate": 9.995311504052714e-07, "loss": 0.095, "step": 13890 }, { "epoch": 0.14851220684865646, "grad_norm": 9.650055885314941, "learning_rate": 9.995304227180262e-07, "loss": 0.1278, "step": 13900 }, { "epoch": 0.14861905016293606, "grad_norm": 3.6873843669891357, "learning_rate": 9.995296944667734e-07, "loss": 0.1095, "step": 13910 }, { "epoch": 0.14872589347721565, "grad_norm": 13.225981712341309, "learning_rate": 9.99528965651514e-07, "loss": 0.2087, "step": 13920 }, { "epoch": 0.14883273679149528, "grad_norm": 1.8812813758850098, "learning_rate": 9.995282362722485e-07, "loss": 0.0751, "step": 13930 }, { "epoch": 0.14893958010577488, "grad_norm": 9.147274017333984, "learning_rate": 9.99527506328978e-07, "loss": 0.0758, "step": 13940 }, { "epoch": 0.1490464234200545, "grad_norm": 10.134631156921387, "learning_rate": 9.995267758217034e-07, "loss": 0.1747, "step": 13950 }, { "epoch": 0.1491532667343341, "grad_norm": 14.109942436218262, "learning_rate": 9.995260447504252e-07, "loss": 0.2592, "step": 13960 }, { "epoch": 0.1492601100486137, "grad_norm": 7.079834938049316, "learning_rate": 9.995253131151443e-07, "loss": 0.2408, "step": 13970 }, { "epoch": 0.14936695336289332, "grad_norm": 2.493605136871338, "learning_rate": 9.99524580915862e-07, "loss": 0.1686, "step": 13980 }, { "epoch": 0.14947379667717292, "grad_norm": 4.152739524841309, "learning_rate": 9.995238481525784e-07, "loss": 0.0425, "step": 13990 }, { "epoch": 0.14958063999145255, "grad_norm": 18.125734329223633, "learning_rate": 9.995231148252948e-07, "loss": 0.1606, "step": 14000 }, { "epoch": 0.14968748330573214, "grad_norm": 14.580142974853516, "learning_rate": 9.995223809340118e-07, "loss": 0.1199, "step": 14010 }, { "epoch": 0.14979432662001174, "grad_norm": 6.852952480316162, "learning_rate": 9.995216464787304e-07, "loss": 0.0647, "step": 14020 }, { "epoch": 0.14990116993429137, "grad_norm": 7.665865421295166, "learning_rate": 9.995209114594515e-07, "loss": 0.0726, "step": 14030 }, { "epoch": 0.15000801324857096, "grad_norm": 19.599552154541016, "learning_rate": 9.995201758761757e-07, "loss": 0.113, "step": 14040 }, { "epoch": 0.1501148565628506, "grad_norm": 8.274740219116211, "learning_rate": 9.995194397289038e-07, "loss": 0.1435, "step": 14050 }, { "epoch": 0.1502216998771302, "grad_norm": 14.229656219482422, "learning_rate": 9.995187030176368e-07, "loss": 0.0638, "step": 14060 }, { "epoch": 0.15032854319140979, "grad_norm": 10.560535430908203, "learning_rate": 9.995179657423756e-07, "loss": 0.1696, "step": 14070 }, { "epoch": 0.1504353865056894, "grad_norm": 2.787205457687378, "learning_rate": 9.995172279031208e-07, "loss": 0.0664, "step": 14080 }, { "epoch": 0.150542229819969, "grad_norm": 6.951884746551514, "learning_rate": 9.995164894998734e-07, "loss": 0.127, "step": 14090 }, { "epoch": 0.15064907313424863, "grad_norm": 2.025482177734375, "learning_rate": 9.99515750532634e-07, "loss": 0.1215, "step": 14100 }, { "epoch": 0.15075591644852823, "grad_norm": 11.092940330505371, "learning_rate": 9.995150110014038e-07, "loss": 0.1123, "step": 14110 }, { "epoch": 0.15086275976280783, "grad_norm": 11.659250259399414, "learning_rate": 9.995142709061836e-07, "loss": 0.2008, "step": 14120 }, { "epoch": 0.15096960307708746, "grad_norm": 4.421813488006592, "learning_rate": 9.99513530246974e-07, "loss": 0.059, "step": 14130 }, { "epoch": 0.15107644639136705, "grad_norm": 3.1809933185577393, "learning_rate": 9.995127890237758e-07, "loss": 0.1297, "step": 14140 }, { "epoch": 0.15118328970564668, "grad_norm": 16.86284637451172, "learning_rate": 9.9951204723659e-07, "loss": 0.1431, "step": 14150 }, { "epoch": 0.15129013301992628, "grad_norm": 8.700796127319336, "learning_rate": 9.995113048854174e-07, "loss": 0.2054, "step": 14160 }, { "epoch": 0.15139697633420587, "grad_norm": 4.701057434082031, "learning_rate": 9.995105619702589e-07, "loss": 0.0764, "step": 14170 }, { "epoch": 0.1515038196484855, "grad_norm": 16.508596420288086, "learning_rate": 9.995098184911152e-07, "loss": 0.1732, "step": 14180 }, { "epoch": 0.1516106629627651, "grad_norm": 11.733125686645508, "learning_rate": 9.995090744479873e-07, "loss": 0.1848, "step": 14190 }, { "epoch": 0.15171750627704472, "grad_norm": 4.814170837402344, "learning_rate": 9.995083298408758e-07, "loss": 0.0951, "step": 14200 }, { "epoch": 0.15182434959132432, "grad_norm": 6.959049701690674, "learning_rate": 9.995075846697819e-07, "loss": 0.148, "step": 14210 }, { "epoch": 0.15193119290560395, "grad_norm": 13.192453384399414, "learning_rate": 9.99506838934706e-07, "loss": 0.1134, "step": 14220 }, { "epoch": 0.15203803621988354, "grad_norm": 9.869263648986816, "learning_rate": 9.995060926356493e-07, "loss": 0.1155, "step": 14230 }, { "epoch": 0.15214487953416314, "grad_norm": 8.17391300201416, "learning_rate": 9.995053457726126e-07, "loss": 0.0773, "step": 14240 }, { "epoch": 0.15225172284844277, "grad_norm": 1.5351961851119995, "learning_rate": 9.995045983455965e-07, "loss": 0.0782, "step": 14250 }, { "epoch": 0.15235856616272236, "grad_norm": 6.692237854003906, "learning_rate": 9.995038503546022e-07, "loss": 0.0735, "step": 14260 }, { "epoch": 0.152465409477002, "grad_norm": 14.170541763305664, "learning_rate": 9.995031017996303e-07, "loss": 0.1464, "step": 14270 }, { "epoch": 0.1525722527912816, "grad_norm": 4.573349952697754, "learning_rate": 9.995023526806816e-07, "loss": 0.1782, "step": 14280 }, { "epoch": 0.15267909610556119, "grad_norm": 9.95660400390625, "learning_rate": 9.995016029977572e-07, "loss": 0.0594, "step": 14290 }, { "epoch": 0.1527859394198408, "grad_norm": 1.6631077527999878, "learning_rate": 9.995008527508577e-07, "loss": 0.0846, "step": 14300 }, { "epoch": 0.1528927827341204, "grad_norm": 3.8721635341644287, "learning_rate": 9.99500101939984e-07, "loss": 0.1614, "step": 14310 }, { "epoch": 0.15299962604840003, "grad_norm": 9.389126777648926, "learning_rate": 9.99499350565137e-07, "loss": 0.0623, "step": 14320 }, { "epoch": 0.15310646936267963, "grad_norm": 28.30794906616211, "learning_rate": 9.994985986263177e-07, "loss": 0.189, "step": 14330 }, { "epoch": 0.15321331267695923, "grad_norm": 13.464760780334473, "learning_rate": 9.994978461235264e-07, "loss": 0.1757, "step": 14340 }, { "epoch": 0.15332015599123885, "grad_norm": 1.2154521942138672, "learning_rate": 9.994970930567646e-07, "loss": 0.1124, "step": 14350 }, { "epoch": 0.15342699930551845, "grad_norm": 5.138197422027588, "learning_rate": 9.994963394260329e-07, "loss": 0.0727, "step": 14360 }, { "epoch": 0.15353384261979808, "grad_norm": 8.14420223236084, "learning_rate": 9.99495585231332e-07, "loss": 0.0923, "step": 14370 }, { "epoch": 0.15364068593407768, "grad_norm": 14.245452880859375, "learning_rate": 9.99494830472663e-07, "loss": 0.3001, "step": 14380 }, { "epoch": 0.15374752924835727, "grad_norm": 1.9235866069793701, "learning_rate": 9.994940751500266e-07, "loss": 0.0478, "step": 14390 }, { "epoch": 0.1538543725626369, "grad_norm": 9.550278663635254, "learning_rate": 9.994933192634238e-07, "loss": 0.0861, "step": 14400 }, { "epoch": 0.1539612158769165, "grad_norm": 7.555599689483643, "learning_rate": 9.994925628128552e-07, "loss": 0.1183, "step": 14410 }, { "epoch": 0.15406805919119612, "grad_norm": 7.9543657302856445, "learning_rate": 9.994918057983217e-07, "loss": 0.0593, "step": 14420 }, { "epoch": 0.15417490250547572, "grad_norm": 4.222015380859375, "learning_rate": 9.994910482198244e-07, "loss": 0.1474, "step": 14430 }, { "epoch": 0.15428174581975532, "grad_norm": 3.838346481323242, "learning_rate": 9.99490290077364e-07, "loss": 0.1337, "step": 14440 }, { "epoch": 0.15438858913403494, "grad_norm": 10.748543739318848, "learning_rate": 9.994895313709415e-07, "loss": 0.1907, "step": 14450 }, { "epoch": 0.15449543244831454, "grad_norm": 9.275106430053711, "learning_rate": 9.994887721005575e-07, "loss": 0.0589, "step": 14460 }, { "epoch": 0.15460227576259417, "grad_norm": 5.401554107666016, "learning_rate": 9.99488012266213e-07, "loss": 0.1339, "step": 14470 }, { "epoch": 0.15470911907687376, "grad_norm": 4.681499481201172, "learning_rate": 9.994872518679086e-07, "loss": 0.1359, "step": 14480 }, { "epoch": 0.15481596239115336, "grad_norm": 1.7320390939712524, "learning_rate": 9.994864909056458e-07, "loss": 0.1009, "step": 14490 }, { "epoch": 0.154922805705433, "grad_norm": 7.414167404174805, "learning_rate": 9.994857293794247e-07, "loss": 0.0841, "step": 14500 }, { "epoch": 0.15502964901971258, "grad_norm": 6.058319568634033, "learning_rate": 9.994849672892466e-07, "loss": 0.095, "step": 14510 }, { "epoch": 0.1551364923339922, "grad_norm": 13.034213066101074, "learning_rate": 9.994842046351125e-07, "loss": 0.1576, "step": 14520 }, { "epoch": 0.1552433356482718, "grad_norm": 7.955039978027344, "learning_rate": 9.994834414170228e-07, "loss": 0.1168, "step": 14530 }, { "epoch": 0.1553501789625514, "grad_norm": 2.3715169429779053, "learning_rate": 9.994826776349787e-07, "loss": 0.0807, "step": 14540 }, { "epoch": 0.15545702227683103, "grad_norm": 7.592650890350342, "learning_rate": 9.994819132889808e-07, "loss": 0.0948, "step": 14550 }, { "epoch": 0.15556386559111063, "grad_norm": 3.3775644302368164, "learning_rate": 9.994811483790304e-07, "loss": 0.1523, "step": 14560 }, { "epoch": 0.15567070890539025, "grad_norm": 10.65873908996582, "learning_rate": 9.994803829051279e-07, "loss": 0.1681, "step": 14570 }, { "epoch": 0.15577755221966985, "grad_norm": 2.3701531887054443, "learning_rate": 9.994796168672744e-07, "loss": 0.0526, "step": 14580 }, { "epoch": 0.15588439553394948, "grad_norm": 4.040130138397217, "learning_rate": 9.994788502654708e-07, "loss": 0.0921, "step": 14590 }, { "epoch": 0.15599123884822907, "grad_norm": 12.75753402709961, "learning_rate": 9.994780830997177e-07, "loss": 0.1237, "step": 14600 }, { "epoch": 0.15609808216250867, "grad_norm": 15.855953216552734, "learning_rate": 9.994773153700163e-07, "loss": 0.1058, "step": 14610 }, { "epoch": 0.1562049254767883, "grad_norm": 8.492485046386719, "learning_rate": 9.994765470763673e-07, "loss": 0.0982, "step": 14620 }, { "epoch": 0.1563117687910679, "grad_norm": 19.198640823364258, "learning_rate": 9.994757782187714e-07, "loss": 0.1182, "step": 14630 }, { "epoch": 0.15641861210534752, "grad_norm": 4.8633551597595215, "learning_rate": 9.994750087972298e-07, "loss": 0.1518, "step": 14640 }, { "epoch": 0.15652545541962712, "grad_norm": 9.64168643951416, "learning_rate": 9.994742388117433e-07, "loss": 0.1075, "step": 14650 }, { "epoch": 0.15663229873390672, "grad_norm": 4.952120780944824, "learning_rate": 9.994734682623126e-07, "loss": 0.1407, "step": 14660 }, { "epoch": 0.15673914204818634, "grad_norm": 3.7646102905273438, "learning_rate": 9.994726971489385e-07, "loss": 0.1338, "step": 14670 }, { "epoch": 0.15684598536246594, "grad_norm": 4.035956382751465, "learning_rate": 9.994719254716223e-07, "loss": 0.1174, "step": 14680 }, { "epoch": 0.15695282867674556, "grad_norm": 1.768005132675171, "learning_rate": 9.994711532303644e-07, "loss": 0.0893, "step": 14690 }, { "epoch": 0.15705967199102516, "grad_norm": 4.801392078399658, "learning_rate": 9.99470380425166e-07, "loss": 0.1736, "step": 14700 }, { "epoch": 0.15716651530530476, "grad_norm": 7.104418754577637, "learning_rate": 9.994696070560275e-07, "loss": 0.0927, "step": 14710 }, { "epoch": 0.15727335861958439, "grad_norm": 4.736766338348389, "learning_rate": 9.994688331229505e-07, "loss": 0.1205, "step": 14720 }, { "epoch": 0.15738020193386398, "grad_norm": 11.118884086608887, "learning_rate": 9.994680586259354e-07, "loss": 0.1102, "step": 14730 }, { "epoch": 0.1574870452481436, "grad_norm": 7.911611080169678, "learning_rate": 9.994672835649832e-07, "loss": 0.2227, "step": 14740 }, { "epoch": 0.1575938885624232, "grad_norm": 4.885548114776611, "learning_rate": 9.994665079400946e-07, "loss": 0.0971, "step": 14750 }, { "epoch": 0.1577007318767028, "grad_norm": 2.6491870880126953, "learning_rate": 9.994657317512706e-07, "loss": 0.109, "step": 14760 }, { "epoch": 0.15780757519098243, "grad_norm": 1.405897617340088, "learning_rate": 9.994649549985124e-07, "loss": 0.1207, "step": 14770 }, { "epoch": 0.15791441850526203, "grad_norm": 8.168403625488281, "learning_rate": 9.994641776818203e-07, "loss": 0.0641, "step": 14780 }, { "epoch": 0.15802126181954165, "grad_norm": 18.170074462890625, "learning_rate": 9.994633998011956e-07, "loss": 0.2052, "step": 14790 }, { "epoch": 0.15812810513382125, "grad_norm": 9.392176628112793, "learning_rate": 9.994626213566387e-07, "loss": 0.0973, "step": 14800 }, { "epoch": 0.15823494844810085, "grad_norm": 8.605061531066895, "learning_rate": 9.994618423481513e-07, "loss": 0.0751, "step": 14810 }, { "epoch": 0.15834179176238047, "grad_norm": 3.318263053894043, "learning_rate": 9.994610627757334e-07, "loss": 0.0783, "step": 14820 }, { "epoch": 0.15844863507666007, "grad_norm": 13.068801879882812, "learning_rate": 9.994602826393865e-07, "loss": 0.1052, "step": 14830 }, { "epoch": 0.1585554783909397, "grad_norm": 12.300949096679688, "learning_rate": 9.99459501939111e-07, "loss": 0.1178, "step": 14840 }, { "epoch": 0.1586623217052193, "grad_norm": 0.8117077946662903, "learning_rate": 9.994587206749083e-07, "loss": 0.0826, "step": 14850 }, { "epoch": 0.1587691650194989, "grad_norm": 6.619767189025879, "learning_rate": 9.994579388467789e-07, "loss": 0.0473, "step": 14860 }, { "epoch": 0.15887600833377852, "grad_norm": 21.224546432495117, "learning_rate": 9.994571564547236e-07, "loss": 0.2093, "step": 14870 }, { "epoch": 0.15898285164805812, "grad_norm": 4.865340232849121, "learning_rate": 9.994563734987437e-07, "loss": 0.1113, "step": 14880 }, { "epoch": 0.15908969496233774, "grad_norm": 12.197991371154785, "learning_rate": 9.9945558997884e-07, "loss": 0.0985, "step": 14890 }, { "epoch": 0.15919653827661734, "grad_norm": 4.808229923248291, "learning_rate": 9.99454805895013e-07, "loss": 0.1561, "step": 14900 }, { "epoch": 0.15930338159089694, "grad_norm": 8.26944637298584, "learning_rate": 9.994540212472638e-07, "loss": 0.0952, "step": 14910 }, { "epoch": 0.15941022490517656, "grad_norm": 8.768424987792969, "learning_rate": 9.994532360355935e-07, "loss": 0.0501, "step": 14920 }, { "epoch": 0.15951706821945616, "grad_norm": 5.688266754150391, "learning_rate": 9.994524502600025e-07, "loss": 0.1043, "step": 14930 }, { "epoch": 0.15962391153373579, "grad_norm": 1.4434783458709717, "learning_rate": 9.994516639204921e-07, "loss": 0.116, "step": 14940 }, { "epoch": 0.15973075484801538, "grad_norm": 4.133965969085693, "learning_rate": 9.994508770170634e-07, "loss": 0.1095, "step": 14950 }, { "epoch": 0.159837598162295, "grad_norm": 10.865691184997559, "learning_rate": 9.994500895497166e-07, "loss": 0.2128, "step": 14960 }, { "epoch": 0.1599444414765746, "grad_norm": 9.554251670837402, "learning_rate": 9.994493015184531e-07, "loss": 0.1487, "step": 14970 }, { "epoch": 0.1600512847908542, "grad_norm": 16.841754913330078, "learning_rate": 9.994485129232737e-07, "loss": 0.1255, "step": 14980 }, { "epoch": 0.16015812810513383, "grad_norm": 5.906912803649902, "learning_rate": 9.99447723764179e-07, "loss": 0.0871, "step": 14990 }, { "epoch": 0.16026497141941343, "grad_norm": 0.47207215428352356, "learning_rate": 9.994469340411704e-07, "loss": 0.1193, "step": 15000 }, { "epoch": 0.16037181473369305, "grad_norm": 6.155993938446045, "learning_rate": 9.994461437542482e-07, "loss": 0.1906, "step": 15010 }, { "epoch": 0.16047865804797265, "grad_norm": 12.376232147216797, "learning_rate": 9.994453529034139e-07, "loss": 0.0795, "step": 15020 }, { "epoch": 0.16058550136225225, "grad_norm": 7.531607627868652, "learning_rate": 9.99444561488668e-07, "loss": 0.1938, "step": 15030 }, { "epoch": 0.16069234467653187, "grad_norm": 2.630357503890991, "learning_rate": 9.994437695100114e-07, "loss": 0.1003, "step": 15040 }, { "epoch": 0.16079918799081147, "grad_norm": 2.469242811203003, "learning_rate": 9.994429769674452e-07, "loss": 0.1047, "step": 15050 }, { "epoch": 0.1609060313050911, "grad_norm": 6.228058815002441, "learning_rate": 9.994421838609702e-07, "loss": 0.0797, "step": 15060 }, { "epoch": 0.1610128746193707, "grad_norm": 4.943014144897461, "learning_rate": 9.994413901905872e-07, "loss": 0.1391, "step": 15070 }, { "epoch": 0.1611197179336503, "grad_norm": 13.827351570129395, "learning_rate": 9.994405959562973e-07, "loss": 0.1189, "step": 15080 }, { "epoch": 0.16122656124792992, "grad_norm": 4.972727298736572, "learning_rate": 9.994398011581012e-07, "loss": 0.1415, "step": 15090 }, { "epoch": 0.16133340456220951, "grad_norm": 3.6233558654785156, "learning_rate": 9.99439005796e-07, "loss": 0.1008, "step": 15100 }, { "epoch": 0.16144024787648914, "grad_norm": 7.376228332519531, "learning_rate": 9.994382098699942e-07, "loss": 0.0806, "step": 15110 }, { "epoch": 0.16154709119076874, "grad_norm": 7.916995048522949, "learning_rate": 9.99437413380085e-07, "loss": 0.1377, "step": 15120 }, { "epoch": 0.16165393450504834, "grad_norm": 18.379831314086914, "learning_rate": 9.994366163262735e-07, "loss": 0.0826, "step": 15130 }, { "epoch": 0.16176077781932796, "grad_norm": 4.154972076416016, "learning_rate": 9.994358187085602e-07, "loss": 0.1417, "step": 15140 }, { "epoch": 0.16186762113360756, "grad_norm": 11.288728713989258, "learning_rate": 9.99435020526946e-07, "loss": 0.087, "step": 15150 }, { "epoch": 0.16197446444788718, "grad_norm": 17.118623733520508, "learning_rate": 9.994342217814324e-07, "loss": 0.1688, "step": 15160 }, { "epoch": 0.16208130776216678, "grad_norm": 4.976250648498535, "learning_rate": 9.994334224720195e-07, "loss": 0.0864, "step": 15170 }, { "epoch": 0.16218815107644638, "grad_norm": 7.028039932250977, "learning_rate": 9.994326225987088e-07, "loss": 0.1054, "step": 15180 }, { "epoch": 0.162294994390726, "grad_norm": 27.8679141998291, "learning_rate": 9.99431822161501e-07, "loss": 0.1615, "step": 15190 }, { "epoch": 0.1624018377050056, "grad_norm": 11.45948600769043, "learning_rate": 9.994310211603966e-07, "loss": 0.1402, "step": 15200 }, { "epoch": 0.16250868101928523, "grad_norm": 6.845743656158447, "learning_rate": 9.994302195953974e-07, "loss": 0.0651, "step": 15210 }, { "epoch": 0.16261552433356483, "grad_norm": 1.9089219570159912, "learning_rate": 9.994294174665034e-07, "loss": 0.1262, "step": 15220 }, { "epoch": 0.16272236764784442, "grad_norm": 3.9887163639068604, "learning_rate": 9.99428614773716e-07, "loss": 0.0714, "step": 15230 }, { "epoch": 0.16282921096212405, "grad_norm": 4.797959327697754, "learning_rate": 9.99427811517036e-07, "loss": 0.1089, "step": 15240 }, { "epoch": 0.16293605427640365, "grad_norm": 7.53749418258667, "learning_rate": 9.994270076964643e-07, "loss": 0.138, "step": 15250 }, { "epoch": 0.16304289759068327, "grad_norm": 3.6458494663238525, "learning_rate": 9.99426203312002e-07, "loss": 0.1089, "step": 15260 }, { "epoch": 0.16314974090496287, "grad_norm": 6.091950416564941, "learning_rate": 9.994253983636496e-07, "loss": 0.087, "step": 15270 }, { "epoch": 0.16325658421924247, "grad_norm": 4.778130531311035, "learning_rate": 9.994245928514085e-07, "loss": 0.1322, "step": 15280 }, { "epoch": 0.1633634275335221, "grad_norm": 27.522226333618164, "learning_rate": 9.99423786775279e-07, "loss": 0.0972, "step": 15290 }, { "epoch": 0.1634702708478017, "grad_norm": 7.107833385467529, "learning_rate": 9.994229801352625e-07, "loss": 0.0637, "step": 15300 }, { "epoch": 0.16357711416208132, "grad_norm": 4.82394552230835, "learning_rate": 9.994221729313598e-07, "loss": 0.1339, "step": 15310 }, { "epoch": 0.16368395747636091, "grad_norm": 4.689070224761963, "learning_rate": 9.994213651635716e-07, "loss": 0.1062, "step": 15320 }, { "epoch": 0.16379080079064054, "grad_norm": 1.094294786453247, "learning_rate": 9.994205568318992e-07, "loss": 0.1149, "step": 15330 }, { "epoch": 0.16389764410492014, "grad_norm": 9.945146560668945, "learning_rate": 9.994197479363433e-07, "loss": 0.1018, "step": 15340 }, { "epoch": 0.16400448741919973, "grad_norm": 3.7855515480041504, "learning_rate": 9.994189384769047e-07, "loss": 0.1019, "step": 15350 }, { "epoch": 0.16411133073347936, "grad_norm": 3.8471221923828125, "learning_rate": 9.994181284535845e-07, "loss": 0.0904, "step": 15360 }, { "epoch": 0.16421817404775896, "grad_norm": 5.174076557159424, "learning_rate": 9.994173178663834e-07, "loss": 0.0577, "step": 15370 }, { "epoch": 0.16432501736203858, "grad_norm": 5.408407211303711, "learning_rate": 9.994165067153027e-07, "loss": 0.088, "step": 15380 }, { "epoch": 0.16443186067631818, "grad_norm": 6.9571614265441895, "learning_rate": 9.994156950003428e-07, "loss": 0.0705, "step": 15390 }, { "epoch": 0.16453870399059778, "grad_norm": 0.19285848736763, "learning_rate": 9.994148827215049e-07, "loss": 0.1588, "step": 15400 }, { "epoch": 0.1646455473048774, "grad_norm": 7.741544246673584, "learning_rate": 9.994140698787901e-07, "loss": 0.1125, "step": 15410 }, { "epoch": 0.164752390619157, "grad_norm": 5.402172565460205, "learning_rate": 9.994132564721992e-07, "loss": 0.1435, "step": 15420 }, { "epoch": 0.16485923393343663, "grad_norm": 13.354803085327148, "learning_rate": 9.994124425017327e-07, "loss": 0.095, "step": 15430 }, { "epoch": 0.16496607724771623, "grad_norm": 8.75878620147705, "learning_rate": 9.994116279673921e-07, "loss": 0.1394, "step": 15440 }, { "epoch": 0.16507292056199582, "grad_norm": 0.380035400390625, "learning_rate": 9.994108128691782e-07, "loss": 0.1475, "step": 15450 }, { "epoch": 0.16517976387627545, "grad_norm": 11.317544937133789, "learning_rate": 9.994099972070916e-07, "loss": 0.0812, "step": 15460 }, { "epoch": 0.16528660719055505, "grad_norm": 2.468966007232666, "learning_rate": 9.994091809811333e-07, "loss": 0.1074, "step": 15470 }, { "epoch": 0.16539345050483467, "grad_norm": 4.852606296539307, "learning_rate": 9.994083641913045e-07, "loss": 0.1283, "step": 15480 }, { "epoch": 0.16550029381911427, "grad_norm": 7.218128204345703, "learning_rate": 9.99407546837606e-07, "loss": 0.065, "step": 15490 }, { "epoch": 0.16560713713339387, "grad_norm": 0.11438921093940735, "learning_rate": 9.994067289200385e-07, "loss": 0.0574, "step": 15500 }, { "epoch": 0.1657139804476735, "grad_norm": 17.9600772857666, "learning_rate": 9.994059104386031e-07, "loss": 0.0819, "step": 15510 }, { "epoch": 0.1658208237619531, "grad_norm": 4.52810001373291, "learning_rate": 9.994050913933012e-07, "loss": 0.1837, "step": 15520 }, { "epoch": 0.16592766707623272, "grad_norm": 1.6530869007110596, "learning_rate": 9.99404271784133e-07, "loss": 0.0566, "step": 15530 }, { "epoch": 0.1660345103905123, "grad_norm": 5.379418849945068, "learning_rate": 9.994034516110994e-07, "loss": 0.111, "step": 15540 }, { "epoch": 0.1661413537047919, "grad_norm": 6.819212436676025, "learning_rate": 9.994026308742019e-07, "loss": 0.1564, "step": 15550 }, { "epoch": 0.16624819701907154, "grad_norm": 6.5147624015808105, "learning_rate": 9.994018095734411e-07, "loss": 0.1648, "step": 15560 }, { "epoch": 0.16635504033335113, "grad_norm": 2.8953917026519775, "learning_rate": 9.99400987708818e-07, "loss": 0.0701, "step": 15570 }, { "epoch": 0.16646188364763076, "grad_norm": 6.7969441413879395, "learning_rate": 9.994001652803334e-07, "loss": 0.0818, "step": 15580 }, { "epoch": 0.16656872696191036, "grad_norm": 6.830463409423828, "learning_rate": 9.993993422879883e-07, "loss": 0.1026, "step": 15590 }, { "epoch": 0.16667557027618995, "grad_norm": 1.4561883211135864, "learning_rate": 9.993985187317838e-07, "loss": 0.1124, "step": 15600 }, { "epoch": 0.16678241359046958, "grad_norm": 3.0232112407684326, "learning_rate": 9.993976946117204e-07, "loss": 0.0997, "step": 15610 }, { "epoch": 0.16688925690474918, "grad_norm": 4.206973552703857, "learning_rate": 9.993968699277995e-07, "loss": 0.122, "step": 15620 }, { "epoch": 0.1669961002190288, "grad_norm": 11.510461807250977, "learning_rate": 9.99396044680022e-07, "loss": 0.1244, "step": 15630 }, { "epoch": 0.1671029435333084, "grad_norm": 0.8852484822273254, "learning_rate": 9.993952188683885e-07, "loss": 0.0753, "step": 15640 }, { "epoch": 0.167209786847588, "grad_norm": 7.3590569496154785, "learning_rate": 9.993943924929e-07, "loss": 0.0891, "step": 15650 }, { "epoch": 0.16731663016186762, "grad_norm": 4.674417495727539, "learning_rate": 9.993935655535576e-07, "loss": 0.1273, "step": 15660 }, { "epoch": 0.16742347347614722, "grad_norm": 16.76272964477539, "learning_rate": 9.993927380503625e-07, "loss": 0.1087, "step": 15670 }, { "epoch": 0.16753031679042685, "grad_norm": 6.286702632904053, "learning_rate": 9.99391909983315e-07, "loss": 0.0998, "step": 15680 }, { "epoch": 0.16763716010470645, "grad_norm": 10.95615291595459, "learning_rate": 9.993910813524164e-07, "loss": 0.1108, "step": 15690 }, { "epoch": 0.16774400341898607, "grad_norm": 3.374419689178467, "learning_rate": 9.993902521576677e-07, "loss": 0.1896, "step": 15700 }, { "epoch": 0.16785084673326567, "grad_norm": 17.324325561523438, "learning_rate": 9.993894223990693e-07, "loss": 0.0748, "step": 15710 }, { "epoch": 0.16795769004754527, "grad_norm": 10.491403579711914, "learning_rate": 9.99388592076623e-07, "loss": 0.1285, "step": 15720 }, { "epoch": 0.1680645333618249, "grad_norm": 12.683252334594727, "learning_rate": 9.99387761190329e-07, "loss": 0.0708, "step": 15730 }, { "epoch": 0.1681713766761045, "grad_norm": 0.891338050365448, "learning_rate": 9.993869297401887e-07, "loss": 0.0721, "step": 15740 }, { "epoch": 0.16827821999038411, "grad_norm": 7.873406887054443, "learning_rate": 9.993860977262028e-07, "loss": 0.0996, "step": 15750 }, { "epoch": 0.1683850633046637, "grad_norm": 13.115976333618164, "learning_rate": 9.993852651483724e-07, "loss": 0.0555, "step": 15760 }, { "epoch": 0.1684919066189433, "grad_norm": 5.289334774017334, "learning_rate": 9.99384432006698e-07, "loss": 0.081, "step": 15770 }, { "epoch": 0.16859874993322294, "grad_norm": 4.089598655700684, "learning_rate": 9.993835983011813e-07, "loss": 0.0927, "step": 15780 }, { "epoch": 0.16870559324750253, "grad_norm": 20.25081443786621, "learning_rate": 9.993827640318227e-07, "loss": 0.1231, "step": 15790 }, { "epoch": 0.16881243656178216, "grad_norm": 11.351251602172852, "learning_rate": 9.993819291986232e-07, "loss": 0.0878, "step": 15800 }, { "epoch": 0.16891927987606176, "grad_norm": 2.8612027168273926, "learning_rate": 9.99381093801584e-07, "loss": 0.16, "step": 15810 }, { "epoch": 0.16902612319034135, "grad_norm": 7.419195652008057, "learning_rate": 9.993802578407056e-07, "loss": 0.1112, "step": 15820 }, { "epoch": 0.16913296650462098, "grad_norm": 1.0767593383789062, "learning_rate": 9.993794213159894e-07, "loss": 0.1424, "step": 15830 }, { "epoch": 0.16923980981890058, "grad_norm": 3.1202762126922607, "learning_rate": 9.993785842274361e-07, "loss": 0.1089, "step": 15840 }, { "epoch": 0.1693466531331802, "grad_norm": 9.47021198272705, "learning_rate": 9.993777465750465e-07, "loss": 0.1295, "step": 15850 }, { "epoch": 0.1694534964474598, "grad_norm": 11.094075202941895, "learning_rate": 9.99376908358822e-07, "loss": 0.1374, "step": 15860 }, { "epoch": 0.1695603397617394, "grad_norm": 4.675162315368652, "learning_rate": 9.993760695787632e-07, "loss": 0.0823, "step": 15870 }, { "epoch": 0.16966718307601902, "grad_norm": 19.980762481689453, "learning_rate": 9.99375230234871e-07, "loss": 0.1542, "step": 15880 }, { "epoch": 0.16977402639029862, "grad_norm": 5.318307399749756, "learning_rate": 9.993743903271465e-07, "loss": 0.1047, "step": 15890 }, { "epoch": 0.16988086970457825, "grad_norm": 1.418548345565796, "learning_rate": 9.99373549855591e-07, "loss": 0.0577, "step": 15900 }, { "epoch": 0.16998771301885784, "grad_norm": 12.618672370910645, "learning_rate": 9.993727088202045e-07, "loss": 0.0811, "step": 15910 }, { "epoch": 0.17009455633313744, "grad_norm": 3.097977638244629, "learning_rate": 9.993718672209888e-07, "loss": 0.1712, "step": 15920 }, { "epoch": 0.17020139964741707, "grad_norm": 6.5254225730896, "learning_rate": 9.993710250579446e-07, "loss": 0.1235, "step": 15930 }, { "epoch": 0.17030824296169667, "grad_norm": 22.88911247253418, "learning_rate": 9.993701823310727e-07, "loss": 0.1336, "step": 15940 }, { "epoch": 0.1704150862759763, "grad_norm": 5.296114921569824, "learning_rate": 9.993693390403744e-07, "loss": 0.1067, "step": 15950 }, { "epoch": 0.1705219295902559, "grad_norm": 6.559475898742676, "learning_rate": 9.993684951858501e-07, "loss": 0.116, "step": 15960 }, { "epoch": 0.1706287729045355, "grad_norm": 1.909394383430481, "learning_rate": 9.993676507675013e-07, "loss": 0.093, "step": 15970 }, { "epoch": 0.1707356162188151, "grad_norm": 5.177957057952881, "learning_rate": 9.993668057853288e-07, "loss": 0.0664, "step": 15980 }, { "epoch": 0.1708424595330947, "grad_norm": 10.636980056762695, "learning_rate": 9.993659602393335e-07, "loss": 0.1467, "step": 15990 }, { "epoch": 0.17094930284737433, "grad_norm": 9.950413703918457, "learning_rate": 9.99365114129516e-07, "loss": 0.1099, "step": 16000 }, { "epoch": 0.17105614616165393, "grad_norm": 5.894413471221924, "learning_rate": 9.993642674558778e-07, "loss": 0.0753, "step": 16010 }, { "epoch": 0.17116298947593353, "grad_norm": 6.8343505859375, "learning_rate": 9.993634202184197e-07, "loss": 0.1618, "step": 16020 }, { "epoch": 0.17126983279021316, "grad_norm": 8.610051155090332, "learning_rate": 9.993625724171426e-07, "loss": 0.1273, "step": 16030 }, { "epoch": 0.17137667610449275, "grad_norm": 18.31136703491211, "learning_rate": 9.993617240520476e-07, "loss": 0.2106, "step": 16040 }, { "epoch": 0.17148351941877238, "grad_norm": 7.019164562225342, "learning_rate": 9.993608751231353e-07, "loss": 0.1354, "step": 16050 }, { "epoch": 0.17159036273305198, "grad_norm": 4.032989501953125, "learning_rate": 9.993600256304069e-07, "loss": 0.1217, "step": 16060 }, { "epoch": 0.1716972060473316, "grad_norm": 4.71375846862793, "learning_rate": 9.993591755738635e-07, "loss": 0.1451, "step": 16070 }, { "epoch": 0.1718040493616112, "grad_norm": 16.224918365478516, "learning_rate": 9.993583249535058e-07, "loss": 0.1639, "step": 16080 }, { "epoch": 0.1719108926758908, "grad_norm": 9.995664596557617, "learning_rate": 9.99357473769335e-07, "loss": 0.1398, "step": 16090 }, { "epoch": 0.17201773599017042, "grad_norm": 4.482454776763916, "learning_rate": 9.993566220213517e-07, "loss": 0.1282, "step": 16100 }, { "epoch": 0.17212457930445002, "grad_norm": 5.520421028137207, "learning_rate": 9.993557697095573e-07, "loss": 0.2051, "step": 16110 }, { "epoch": 0.17223142261872965, "grad_norm": 11.69730281829834, "learning_rate": 9.993549168339523e-07, "loss": 0.0995, "step": 16120 }, { "epoch": 0.17233826593300924, "grad_norm": 9.662040710449219, "learning_rate": 9.99354063394538e-07, "loss": 0.156, "step": 16130 }, { "epoch": 0.17244510924728884, "grad_norm": 13.255555152893066, "learning_rate": 9.993532093913154e-07, "loss": 0.0549, "step": 16140 }, { "epoch": 0.17255195256156847, "grad_norm": 8.935491561889648, "learning_rate": 9.993523548242851e-07, "loss": 0.0676, "step": 16150 }, { "epoch": 0.17265879587584806, "grad_norm": 4.888911247253418, "learning_rate": 9.993514996934483e-07, "loss": 0.1102, "step": 16160 }, { "epoch": 0.1727656391901277, "grad_norm": 3.656848669052124, "learning_rate": 9.993506439988063e-07, "loss": 0.1034, "step": 16170 }, { "epoch": 0.1728724825044073, "grad_norm": 1.696494698524475, "learning_rate": 9.993497877403595e-07, "loss": 0.1144, "step": 16180 }, { "epoch": 0.17297932581868689, "grad_norm": 7.781134128570557, "learning_rate": 9.993489309181093e-07, "loss": 0.1779, "step": 16190 }, { "epoch": 0.1730861691329665, "grad_norm": 14.174970626831055, "learning_rate": 9.993480735320562e-07, "loss": 0.1492, "step": 16200 }, { "epoch": 0.1731930124472461, "grad_norm": 12.86280345916748, "learning_rate": 9.993472155822015e-07, "loss": 0.1055, "step": 16210 }, { "epoch": 0.17329985576152573, "grad_norm": 4.418257713317871, "learning_rate": 9.993463570685464e-07, "loss": 0.1, "step": 16220 }, { "epoch": 0.17340669907580533, "grad_norm": 20.805585861206055, "learning_rate": 9.993454979910913e-07, "loss": 0.1422, "step": 16230 }, { "epoch": 0.17351354239008493, "grad_norm": 6.912331581115723, "learning_rate": 9.993446383498374e-07, "loss": 0.0985, "step": 16240 }, { "epoch": 0.17362038570436455, "grad_norm": 3.796238422393799, "learning_rate": 9.993437781447858e-07, "loss": 0.0915, "step": 16250 }, { "epoch": 0.17372722901864415, "grad_norm": 48.575374603271484, "learning_rate": 9.993429173759374e-07, "loss": 0.0983, "step": 16260 }, { "epoch": 0.17383407233292378, "grad_norm": 6.789795398712158, "learning_rate": 9.993420560432933e-07, "loss": 0.0987, "step": 16270 }, { "epoch": 0.17394091564720338, "grad_norm": 1.2979460954666138, "learning_rate": 9.993411941468542e-07, "loss": 0.1043, "step": 16280 }, { "epoch": 0.17404775896148297, "grad_norm": 2.5534019470214844, "learning_rate": 9.993403316866212e-07, "loss": 0.1314, "step": 16290 }, { "epoch": 0.1741546022757626, "grad_norm": 6.920089244842529, "learning_rate": 9.99339468662595e-07, "loss": 0.153, "step": 16300 }, { "epoch": 0.1742614455900422, "grad_norm": 5.80765962600708, "learning_rate": 9.993386050747772e-07, "loss": 0.1265, "step": 16310 }, { "epoch": 0.17436828890432182, "grad_norm": 3.063386917114258, "learning_rate": 9.993377409231685e-07, "loss": 0.0838, "step": 16320 }, { "epoch": 0.17447513221860142, "grad_norm": 1.3707499504089355, "learning_rate": 9.993368762077696e-07, "loss": 0.1121, "step": 16330 }, { "epoch": 0.17458197553288102, "grad_norm": 2.981325149536133, "learning_rate": 9.993360109285818e-07, "loss": 0.0506, "step": 16340 }, { "epoch": 0.17468881884716064, "grad_norm": 11.03311538696289, "learning_rate": 9.993351450856058e-07, "loss": 0.1057, "step": 16350 }, { "epoch": 0.17479566216144024, "grad_norm": 2.6824791431427, "learning_rate": 9.993342786788429e-07, "loss": 0.0585, "step": 16360 }, { "epoch": 0.17490250547571987, "grad_norm": 6.969327926635742, "learning_rate": 9.99333411708294e-07, "loss": 0.075, "step": 16370 }, { "epoch": 0.17500934878999946, "grad_norm": 4.576785564422607, "learning_rate": 9.993325441739596e-07, "loss": 0.1698, "step": 16380 }, { "epoch": 0.17511619210427906, "grad_norm": 7.271945953369141, "learning_rate": 9.993316760758412e-07, "loss": 0.1492, "step": 16390 }, { "epoch": 0.1752230354185587, "grad_norm": 1.1816242933273315, "learning_rate": 9.993308074139399e-07, "loss": 0.074, "step": 16400 }, { "epoch": 0.17532987873283828, "grad_norm": 5.370372295379639, "learning_rate": 9.993299381882563e-07, "loss": 0.1654, "step": 16410 }, { "epoch": 0.1754367220471179, "grad_norm": 4.017648220062256, "learning_rate": 9.993290683987915e-07, "loss": 0.1049, "step": 16420 }, { "epoch": 0.1755435653613975, "grad_norm": 4.0328569412231445, "learning_rate": 9.993281980455466e-07, "loss": 0.1456, "step": 16430 }, { "epoch": 0.17565040867567713, "grad_norm": 1.2907562255859375, "learning_rate": 9.993273271285222e-07, "loss": 0.1658, "step": 16440 }, { "epoch": 0.17575725198995673, "grad_norm": 2.6753194332122803, "learning_rate": 9.993264556477198e-07, "loss": 0.0415, "step": 16450 }, { "epoch": 0.17586409530423633, "grad_norm": 0.8788372278213501, "learning_rate": 9.9932558360314e-07, "loss": 0.096, "step": 16460 }, { "epoch": 0.17597093861851595, "grad_norm": 3.7648870944976807, "learning_rate": 9.99324710994784e-07, "loss": 0.148, "step": 16470 }, { "epoch": 0.17607778193279555, "grad_norm": 6.900119781494141, "learning_rate": 9.993238378226529e-07, "loss": 0.1417, "step": 16480 }, { "epoch": 0.17618462524707518, "grad_norm": 12.787519454956055, "learning_rate": 9.993229640867474e-07, "loss": 0.1196, "step": 16490 }, { "epoch": 0.17629146856135477, "grad_norm": 11.373929977416992, "learning_rate": 9.993220897870684e-07, "loss": 0.1332, "step": 16500 }, { "epoch": 0.17639831187563437, "grad_norm": 18.792360305786133, "learning_rate": 9.993212149236172e-07, "loss": 0.1315, "step": 16510 }, { "epoch": 0.176505155189914, "grad_norm": 1.4052274227142334, "learning_rate": 9.993203394963946e-07, "loss": 0.1457, "step": 16520 }, { "epoch": 0.1766119985041936, "grad_norm": 4.8279194831848145, "learning_rate": 9.993194635054018e-07, "loss": 0.2002, "step": 16530 }, { "epoch": 0.17671884181847322, "grad_norm": 16.424543380737305, "learning_rate": 9.993185869506395e-07, "loss": 0.1346, "step": 16540 }, { "epoch": 0.17682568513275282, "grad_norm": 9.453006744384766, "learning_rate": 9.993177098321087e-07, "loss": 0.1048, "step": 16550 }, { "epoch": 0.17693252844703242, "grad_norm": 9.217620849609375, "learning_rate": 9.993168321498109e-07, "loss": 0.0641, "step": 16560 }, { "epoch": 0.17703937176131204, "grad_norm": 3.969357490539551, "learning_rate": 9.993159539037465e-07, "loss": 0.1665, "step": 16570 }, { "epoch": 0.17714621507559164, "grad_norm": 1.2578911781311035, "learning_rate": 9.993150750939167e-07, "loss": 0.0739, "step": 16580 }, { "epoch": 0.17725305838987127, "grad_norm": 22.077259063720703, "learning_rate": 9.993141957203226e-07, "loss": 0.1489, "step": 16590 }, { "epoch": 0.17735990170415086, "grad_norm": 1.5955455303192139, "learning_rate": 9.993133157829649e-07, "loss": 0.0743, "step": 16600 }, { "epoch": 0.17746674501843046, "grad_norm": 7.412127494812012, "learning_rate": 9.99312435281845e-07, "loss": 0.1305, "step": 16610 }, { "epoch": 0.1775735883327101, "grad_norm": 9.974932670593262, "learning_rate": 9.993115542169634e-07, "loss": 0.1134, "step": 16620 }, { "epoch": 0.17768043164698968, "grad_norm": 4.687510013580322, "learning_rate": 9.993106725883216e-07, "loss": 0.1886, "step": 16630 }, { "epoch": 0.1777872749612693, "grad_norm": 0.9691879749298096, "learning_rate": 9.993097903959202e-07, "loss": 0.0977, "step": 16640 }, { "epoch": 0.1778941182755489, "grad_norm": 3.2068183422088623, "learning_rate": 9.993089076397605e-07, "loss": 0.1372, "step": 16650 }, { "epoch": 0.1780009615898285, "grad_norm": 5.954931259155273, "learning_rate": 9.993080243198432e-07, "loss": 0.1131, "step": 16660 }, { "epoch": 0.17810780490410813, "grad_norm": 1.9249738454818726, "learning_rate": 9.993071404361696e-07, "loss": 0.0984, "step": 16670 }, { "epoch": 0.17821464821838773, "grad_norm": 5.740667343139648, "learning_rate": 9.993062559887405e-07, "loss": 0.0643, "step": 16680 }, { "epoch": 0.17832149153266735, "grad_norm": 1.9965718984603882, "learning_rate": 9.99305370977557e-07, "loss": 0.1157, "step": 16690 }, { "epoch": 0.17842833484694695, "grad_norm": 20.06185531616211, "learning_rate": 9.993044854026201e-07, "loss": 0.0932, "step": 16700 }, { "epoch": 0.17853517816122655, "grad_norm": 7.8685622215271, "learning_rate": 9.993035992639307e-07, "loss": 0.1583, "step": 16710 }, { "epoch": 0.17864202147550617, "grad_norm": 8.067046165466309, "learning_rate": 9.993027125614899e-07, "loss": 0.0971, "step": 16720 }, { "epoch": 0.17874886478978577, "grad_norm": 0.6912784576416016, "learning_rate": 9.993018252952984e-07, "loss": 0.0913, "step": 16730 }, { "epoch": 0.1788557081040654, "grad_norm": 15.423175811767578, "learning_rate": 9.993009374653578e-07, "loss": 0.125, "step": 16740 }, { "epoch": 0.178962551418345, "grad_norm": 21.274131774902344, "learning_rate": 9.993000490716685e-07, "loss": 0.136, "step": 16750 }, { "epoch": 0.1790693947326246, "grad_norm": 4.037989616394043, "learning_rate": 9.99299160114232e-07, "loss": 0.0763, "step": 16760 }, { "epoch": 0.17917623804690422, "grad_norm": 4.156820774078369, "learning_rate": 9.992982705930488e-07, "loss": 0.1337, "step": 16770 }, { "epoch": 0.17928308136118382, "grad_norm": 4.64811372756958, "learning_rate": 9.992973805081204e-07, "loss": 0.0613, "step": 16780 }, { "epoch": 0.17938992467546344, "grad_norm": 18.729755401611328, "learning_rate": 9.992964898594475e-07, "loss": 0.1849, "step": 16790 }, { "epoch": 0.17949676798974304, "grad_norm": 0.1679881513118744, "learning_rate": 9.992955986470311e-07, "loss": 0.085, "step": 16800 }, { "epoch": 0.17960361130402266, "grad_norm": 13.766027450561523, "learning_rate": 9.992947068708722e-07, "loss": 0.1779, "step": 16810 }, { "epoch": 0.17971045461830226, "grad_norm": 12.094706535339355, "learning_rate": 9.99293814530972e-07, "loss": 0.1827, "step": 16820 }, { "epoch": 0.17981729793258186, "grad_norm": 14.611696243286133, "learning_rate": 9.992929216273314e-07, "loss": 0.1225, "step": 16830 }, { "epoch": 0.17992414124686149, "grad_norm": 4.161685466766357, "learning_rate": 9.992920281599516e-07, "loss": 0.07, "step": 16840 }, { "epoch": 0.18003098456114108, "grad_norm": 0.5531151294708252, "learning_rate": 9.992911341288332e-07, "loss": 0.0932, "step": 16850 }, { "epoch": 0.1801378278754207, "grad_norm": 4.145230770111084, "learning_rate": 9.992902395339776e-07, "loss": 0.1386, "step": 16860 }, { "epoch": 0.1802446711897003, "grad_norm": 5.6402812004089355, "learning_rate": 9.992893443753853e-07, "loss": 0.0832, "step": 16870 }, { "epoch": 0.1803515145039799, "grad_norm": 21.774389266967773, "learning_rate": 9.992884486530579e-07, "loss": 0.1243, "step": 16880 }, { "epoch": 0.18045835781825953, "grad_norm": 8.090280532836914, "learning_rate": 9.992875523669961e-07, "loss": 0.0436, "step": 16890 }, { "epoch": 0.18056520113253913, "grad_norm": 1.960252285003662, "learning_rate": 9.99286655517201e-07, "loss": 0.1027, "step": 16900 }, { "epoch": 0.18067204444681875, "grad_norm": 15.448142051696777, "learning_rate": 9.992857581036736e-07, "loss": 0.1968, "step": 16910 }, { "epoch": 0.18077888776109835, "grad_norm": 6.920088291168213, "learning_rate": 9.992848601264148e-07, "loss": 0.1147, "step": 16920 }, { "epoch": 0.18088573107537795, "grad_norm": 1.0072449445724487, "learning_rate": 9.992839615854258e-07, "loss": 0.0844, "step": 16930 }, { "epoch": 0.18099257438965757, "grad_norm": 3.8795130252838135, "learning_rate": 9.992830624807076e-07, "loss": 0.0701, "step": 16940 }, { "epoch": 0.18109941770393717, "grad_norm": 2.848365068435669, "learning_rate": 9.99282162812261e-07, "loss": 0.0637, "step": 16950 }, { "epoch": 0.1812062610182168, "grad_norm": 18.996904373168945, "learning_rate": 9.992812625800872e-07, "loss": 0.1471, "step": 16960 }, { "epoch": 0.1813131043324964, "grad_norm": 7.4671630859375, "learning_rate": 9.992803617841873e-07, "loss": 0.0736, "step": 16970 }, { "epoch": 0.181419947646776, "grad_norm": 3.5570404529571533, "learning_rate": 9.992794604245619e-07, "loss": 0.1381, "step": 16980 }, { "epoch": 0.18152679096105562, "grad_norm": 3.927020311355591, "learning_rate": 9.992785585012125e-07, "loss": 0.0473, "step": 16990 }, { "epoch": 0.18163363427533522, "grad_norm": 10.271227836608887, "learning_rate": 9.9927765601414e-07, "loss": 0.1036, "step": 17000 }, { "epoch": 0.18174047758961484, "grad_norm": 11.20836353302002, "learning_rate": 9.99276752963345e-07, "loss": 0.101, "step": 17010 }, { "epoch": 0.18184732090389444, "grad_norm": 2.418144702911377, "learning_rate": 9.992758493488292e-07, "loss": 0.0636, "step": 17020 }, { "epoch": 0.18195416421817404, "grad_norm": 17.948490142822266, "learning_rate": 9.99274945170593e-07, "loss": 0.119, "step": 17030 }, { "epoch": 0.18206100753245366, "grad_norm": 13.2782564163208, "learning_rate": 9.99274040428638e-07, "loss": 0.0952, "step": 17040 }, { "epoch": 0.18216785084673326, "grad_norm": 7.944283962249756, "learning_rate": 9.992731351229648e-07, "loss": 0.0593, "step": 17050 }, { "epoch": 0.18227469416101288, "grad_norm": 10.854174613952637, "learning_rate": 9.992722292535746e-07, "loss": 0.071, "step": 17060 }, { "epoch": 0.18238153747529248, "grad_norm": 7.7546257972717285, "learning_rate": 9.992713228204684e-07, "loss": 0.09, "step": 17070 }, { "epoch": 0.18248838078957208, "grad_norm": 6.5779218673706055, "learning_rate": 9.99270415823647e-07, "loss": 0.0584, "step": 17080 }, { "epoch": 0.1825952241038517, "grad_norm": 10.10021686553955, "learning_rate": 9.992695082631118e-07, "loss": 0.0837, "step": 17090 }, { "epoch": 0.1827020674181313, "grad_norm": 1.6205711364746094, "learning_rate": 9.992686001388636e-07, "loss": 0.1072, "step": 17100 }, { "epoch": 0.18280891073241093, "grad_norm": 8.123907089233398, "learning_rate": 9.992676914509035e-07, "loss": 0.1045, "step": 17110 }, { "epoch": 0.18291575404669053, "grad_norm": 28.792356491088867, "learning_rate": 9.992667821992324e-07, "loss": 0.1664, "step": 17120 }, { "epoch": 0.18302259736097012, "grad_norm": 3.6131839752197266, "learning_rate": 9.992658723838514e-07, "loss": 0.0781, "step": 17130 }, { "epoch": 0.18312944067524975, "grad_norm": 5.633286952972412, "learning_rate": 9.992649620047617e-07, "loss": 0.0726, "step": 17140 }, { "epoch": 0.18323628398952935, "grad_norm": 8.215598106384277, "learning_rate": 9.992640510619641e-07, "loss": 0.0895, "step": 17150 }, { "epoch": 0.18334312730380897, "grad_norm": 6.5348968505859375, "learning_rate": 9.992631395554598e-07, "loss": 0.0847, "step": 17160 }, { "epoch": 0.18344997061808857, "grad_norm": 12.708793640136719, "learning_rate": 9.992622274852495e-07, "loss": 0.1616, "step": 17170 }, { "epoch": 0.1835568139323682, "grad_norm": 5.661486625671387, "learning_rate": 9.992613148513348e-07, "loss": 0.0635, "step": 17180 }, { "epoch": 0.1836636572466478, "grad_norm": 4.646244049072266, "learning_rate": 9.99260401653716e-07, "loss": 0.1855, "step": 17190 }, { "epoch": 0.1837705005609274, "grad_norm": 6.220321178436279, "learning_rate": 9.992594878923947e-07, "loss": 0.0645, "step": 17200 }, { "epoch": 0.18387734387520702, "grad_norm": 9.05195426940918, "learning_rate": 9.99258573567372e-07, "loss": 0.1097, "step": 17210 }, { "epoch": 0.18398418718948661, "grad_norm": 14.706059455871582, "learning_rate": 9.992576586786483e-07, "loss": 0.0945, "step": 17220 }, { "epoch": 0.18409103050376624, "grad_norm": 14.320455551147461, "learning_rate": 9.992567432262251e-07, "loss": 0.0502, "step": 17230 }, { "epoch": 0.18419787381804584, "grad_norm": 4.395234107971191, "learning_rate": 9.992558272101034e-07, "loss": 0.048, "step": 17240 }, { "epoch": 0.18430471713232544, "grad_norm": 5.959733486175537, "learning_rate": 9.992549106302842e-07, "loss": 0.0765, "step": 17250 }, { "epoch": 0.18441156044660506, "grad_norm": 6.817502975463867, "learning_rate": 9.992539934867689e-07, "loss": 0.1176, "step": 17260 }, { "epoch": 0.18451840376088466, "grad_norm": 15.100199699401855, "learning_rate": 9.992530757795577e-07, "loss": 0.0926, "step": 17270 }, { "epoch": 0.18462524707516428, "grad_norm": 11.620977401733398, "learning_rate": 9.992521575086523e-07, "loss": 0.0471, "step": 17280 }, { "epoch": 0.18473209038944388, "grad_norm": 1.4108967781066895, "learning_rate": 9.992512386740534e-07, "loss": 0.1336, "step": 17290 }, { "epoch": 0.18483893370372348, "grad_norm": 1.912554383277893, "learning_rate": 9.992503192757622e-07, "loss": 0.1355, "step": 17300 }, { "epoch": 0.1849457770180031, "grad_norm": 0.32695654034614563, "learning_rate": 9.992493993137798e-07, "loss": 0.1367, "step": 17310 }, { "epoch": 0.1850526203322827, "grad_norm": 5.104562282562256, "learning_rate": 9.99248478788107e-07, "loss": 0.0645, "step": 17320 }, { "epoch": 0.18515946364656233, "grad_norm": 5.652390956878662, "learning_rate": 9.992475576987451e-07, "loss": 0.0808, "step": 17330 }, { "epoch": 0.18526630696084193, "grad_norm": 8.140969276428223, "learning_rate": 9.99246636045695e-07, "loss": 0.0986, "step": 17340 }, { "epoch": 0.18537315027512152, "grad_norm": 10.690690040588379, "learning_rate": 9.99245713828958e-07, "loss": 0.1973, "step": 17350 }, { "epoch": 0.18547999358940115, "grad_norm": 32.86014175415039, "learning_rate": 9.992447910485344e-07, "loss": 0.1197, "step": 17360 }, { "epoch": 0.18558683690368075, "grad_norm": 4.593208312988281, "learning_rate": 9.992438677044262e-07, "loss": 0.1038, "step": 17370 }, { "epoch": 0.18569368021796037, "grad_norm": 6.78018045425415, "learning_rate": 9.992429437966336e-07, "loss": 0.1262, "step": 17380 }, { "epoch": 0.18580052353223997, "grad_norm": 6.154634475708008, "learning_rate": 9.992420193251583e-07, "loss": 0.1051, "step": 17390 }, { "epoch": 0.18590736684651957, "grad_norm": 14.777422904968262, "learning_rate": 9.99241094290001e-07, "loss": 0.1148, "step": 17400 }, { "epoch": 0.1860142101607992, "grad_norm": 6.561121463775635, "learning_rate": 9.99240168691163e-07, "loss": 0.0996, "step": 17410 }, { "epoch": 0.1861210534750788, "grad_norm": 15.255274772644043, "learning_rate": 9.99239242528645e-07, "loss": 0.1238, "step": 17420 }, { "epoch": 0.18622789678935842, "grad_norm": 10.457624435424805, "learning_rate": 9.992383158024482e-07, "loss": 0.1193, "step": 17430 }, { "epoch": 0.186334740103638, "grad_norm": 30.29150390625, "learning_rate": 9.992373885125737e-07, "loss": 0.0881, "step": 17440 }, { "epoch": 0.1864415834179176, "grad_norm": 3.285130023956299, "learning_rate": 9.992364606590228e-07, "loss": 0.0586, "step": 17450 }, { "epoch": 0.18654842673219724, "grad_norm": 30.677688598632812, "learning_rate": 9.992355322417958e-07, "loss": 0.1915, "step": 17460 }, { "epoch": 0.18665527004647683, "grad_norm": 5.668385028839111, "learning_rate": 9.992346032608944e-07, "loss": 0.1248, "step": 17470 }, { "epoch": 0.18676211336075646, "grad_norm": 12.540237426757812, "learning_rate": 9.992336737163194e-07, "loss": 0.1305, "step": 17480 }, { "epoch": 0.18686895667503606, "grad_norm": 5.4349775314331055, "learning_rate": 9.99232743608072e-07, "loss": 0.1574, "step": 17490 }, { "epoch": 0.18697579998931566, "grad_norm": 6.924818992614746, "learning_rate": 9.99231812936153e-07, "loss": 0.1411, "step": 17500 }, { "epoch": 0.18708264330359528, "grad_norm": 9.825945854187012, "learning_rate": 9.992308817005636e-07, "loss": 0.088, "step": 17510 }, { "epoch": 0.18718948661787488, "grad_norm": 6.845285892486572, "learning_rate": 9.992299499013051e-07, "loss": 0.1076, "step": 17520 }, { "epoch": 0.1872963299321545, "grad_norm": 9.280754089355469, "learning_rate": 9.992290175383782e-07, "loss": 0.0743, "step": 17530 }, { "epoch": 0.1874031732464341, "grad_norm": 4.648150444030762, "learning_rate": 9.99228084611784e-07, "loss": 0.0517, "step": 17540 }, { "epoch": 0.18751001656071373, "grad_norm": 1.393118143081665, "learning_rate": 9.992271511215236e-07, "loss": 0.1282, "step": 17550 }, { "epoch": 0.18761685987499332, "grad_norm": 8.399618148803711, "learning_rate": 9.992262170675981e-07, "loss": 0.051, "step": 17560 }, { "epoch": 0.18772370318927292, "grad_norm": 3.101672410964966, "learning_rate": 9.992252824500086e-07, "loss": 0.1228, "step": 17570 }, { "epoch": 0.18783054650355255, "grad_norm": 11.586338996887207, "learning_rate": 9.99224347268756e-07, "loss": 0.0757, "step": 17580 }, { "epoch": 0.18793738981783215, "grad_norm": 19.388103485107422, "learning_rate": 9.992234115238414e-07, "loss": 0.0982, "step": 17590 }, { "epoch": 0.18804423313211177, "grad_norm": 5.951871395111084, "learning_rate": 9.99222475215266e-07, "loss": 0.0823, "step": 17600 }, { "epoch": 0.18815107644639137, "grad_norm": 12.153385162353516, "learning_rate": 9.992215383430307e-07, "loss": 0.0869, "step": 17610 }, { "epoch": 0.18825791976067097, "grad_norm": 6.585153579711914, "learning_rate": 9.992206009071366e-07, "loss": 0.1284, "step": 17620 }, { "epoch": 0.1883647630749506, "grad_norm": 24.573375701904297, "learning_rate": 9.992196629075846e-07, "loss": 0.099, "step": 17630 }, { "epoch": 0.1884716063892302, "grad_norm": 3.20802640914917, "learning_rate": 9.992187243443761e-07, "loss": 0.0723, "step": 17640 }, { "epoch": 0.18857844970350982, "grad_norm": 8.818449974060059, "learning_rate": 9.99217785217512e-07, "loss": 0.0623, "step": 17650 }, { "epoch": 0.1886852930177894, "grad_norm": 23.53980827331543, "learning_rate": 9.992168455269933e-07, "loss": 0.1149, "step": 17660 }, { "epoch": 0.188792136332069, "grad_norm": 7.038118839263916, "learning_rate": 9.992159052728212e-07, "loss": 0.0779, "step": 17670 }, { "epoch": 0.18889897964634864, "grad_norm": 17.075931549072266, "learning_rate": 9.992149644549966e-07, "loss": 0.1311, "step": 17680 }, { "epoch": 0.18900582296062823, "grad_norm": 8.69446849822998, "learning_rate": 9.992140230735204e-07, "loss": 0.0778, "step": 17690 }, { "epoch": 0.18911266627490786, "grad_norm": 7.41160249710083, "learning_rate": 9.992130811283941e-07, "loss": 0.0845, "step": 17700 }, { "epoch": 0.18921950958918746, "grad_norm": 6.687047481536865, "learning_rate": 9.992121386196187e-07, "loss": 0.0825, "step": 17710 }, { "epoch": 0.18932635290346705, "grad_norm": 6.818108558654785, "learning_rate": 9.992111955471949e-07, "loss": 0.126, "step": 17720 }, { "epoch": 0.18943319621774668, "grad_norm": 2.3020124435424805, "learning_rate": 9.99210251911124e-07, "loss": 0.1238, "step": 17730 }, { "epoch": 0.18954003953202628, "grad_norm": 7.308884143829346, "learning_rate": 9.992093077114072e-07, "loss": 0.1213, "step": 17740 }, { "epoch": 0.1896468828463059, "grad_norm": 10.218155860900879, "learning_rate": 9.992083629480452e-07, "loss": 0.0967, "step": 17750 }, { "epoch": 0.1897537261605855, "grad_norm": 7.011641502380371, "learning_rate": 9.992074176210395e-07, "loss": 0.0792, "step": 17760 }, { "epoch": 0.1898605694748651, "grad_norm": 5.3655853271484375, "learning_rate": 9.992064717303908e-07, "loss": 0.0554, "step": 17770 }, { "epoch": 0.18996741278914472, "grad_norm": 19.2479190826416, "learning_rate": 9.992055252761003e-07, "loss": 0.0455, "step": 17780 }, { "epoch": 0.19007425610342432, "grad_norm": 8.752022743225098, "learning_rate": 9.99204578258169e-07, "loss": 0.1538, "step": 17790 }, { "epoch": 0.19018109941770395, "grad_norm": 9.34160327911377, "learning_rate": 9.992036306765982e-07, "loss": 0.0769, "step": 17800 }, { "epoch": 0.19028794273198354, "grad_norm": 10.36617660522461, "learning_rate": 9.992026825313888e-07, "loss": 0.0717, "step": 17810 }, { "epoch": 0.19039478604626314, "grad_norm": 2.330026149749756, "learning_rate": 9.99201733822542e-07, "loss": 0.0779, "step": 17820 }, { "epoch": 0.19050162936054277, "grad_norm": 4.407431125640869, "learning_rate": 9.992007845500587e-07, "loss": 0.0565, "step": 17830 }, { "epoch": 0.19060847267482237, "grad_norm": 10.995022773742676, "learning_rate": 9.9919983471394e-07, "loss": 0.0961, "step": 17840 }, { "epoch": 0.190715315989102, "grad_norm": 1.4823858737945557, "learning_rate": 9.99198884314187e-07, "loss": 0.1157, "step": 17850 }, { "epoch": 0.1908221593033816, "grad_norm": 6.791286468505859, "learning_rate": 9.991979333508007e-07, "loss": 0.0405, "step": 17860 }, { "epoch": 0.1909290026176612, "grad_norm": 1.6285597085952759, "learning_rate": 9.991969818237824e-07, "loss": 0.1102, "step": 17870 }, { "epoch": 0.1910358459319408, "grad_norm": 2.9721884727478027, "learning_rate": 9.99196029733133e-07, "loss": 0.1128, "step": 17880 }, { "epoch": 0.1911426892462204, "grad_norm": 8.174809455871582, "learning_rate": 9.991950770788536e-07, "loss": 0.0786, "step": 17890 }, { "epoch": 0.19124953256050004, "grad_norm": 8.221257209777832, "learning_rate": 9.991941238609453e-07, "loss": 0.095, "step": 17900 }, { "epoch": 0.19135637587477963, "grad_norm": 8.333964347839355, "learning_rate": 9.991931700794092e-07, "loss": 0.133, "step": 17910 }, { "epoch": 0.19146321918905926, "grad_norm": 8.92789363861084, "learning_rate": 9.991922157342462e-07, "loss": 0.1742, "step": 17920 }, { "epoch": 0.19157006250333886, "grad_norm": 4.768162250518799, "learning_rate": 9.991912608254577e-07, "loss": 0.0923, "step": 17930 }, { "epoch": 0.19167690581761845, "grad_norm": 16.143211364746094, "learning_rate": 9.991903053530444e-07, "loss": 0.1743, "step": 17940 }, { "epoch": 0.19178374913189808, "grad_norm": 15.11259651184082, "learning_rate": 9.991893493170077e-07, "loss": 0.0986, "step": 17950 }, { "epoch": 0.19189059244617768, "grad_norm": 9.736990928649902, "learning_rate": 9.991883927173485e-07, "loss": 0.1217, "step": 17960 }, { "epoch": 0.1919974357604573, "grad_norm": 5.773087501525879, "learning_rate": 9.99187435554068e-07, "loss": 0.0775, "step": 17970 }, { "epoch": 0.1921042790747369, "grad_norm": 4.168891429901123, "learning_rate": 9.991864778271671e-07, "loss": 0.1468, "step": 17980 }, { "epoch": 0.1922111223890165, "grad_norm": 8.312082290649414, "learning_rate": 9.99185519536647e-07, "loss": 0.0603, "step": 17990 }, { "epoch": 0.19231796570329612, "grad_norm": 22.032075881958008, "learning_rate": 9.991845606825089e-07, "loss": 0.1369, "step": 18000 }, { "epoch": 0.19242480901757572, "grad_norm": 4.599510192871094, "learning_rate": 9.991836012647535e-07, "loss": 0.1226, "step": 18010 }, { "epoch": 0.19253165233185535, "grad_norm": 3.615070343017578, "learning_rate": 9.991826412833824e-07, "loss": 0.0568, "step": 18020 }, { "epoch": 0.19263849564613494, "grad_norm": 0.10231059789657593, "learning_rate": 9.991816807383962e-07, "loss": 0.234, "step": 18030 }, { "epoch": 0.19274533896041454, "grad_norm": 5.31121826171875, "learning_rate": 9.991807196297963e-07, "loss": 0.0844, "step": 18040 }, { "epoch": 0.19285218227469417, "grad_norm": 4.681241989135742, "learning_rate": 9.991797579575837e-07, "loss": 0.0604, "step": 18050 }, { "epoch": 0.19295902558897376, "grad_norm": 7.401819229125977, "learning_rate": 9.991787957217597e-07, "loss": 0.1347, "step": 18060 }, { "epoch": 0.1930658689032534, "grad_norm": 4.8073906898498535, "learning_rate": 9.991778329223248e-07, "loss": 0.0777, "step": 18070 }, { "epoch": 0.193172712217533, "grad_norm": 7.953901767730713, "learning_rate": 9.991768695592808e-07, "loss": 0.1005, "step": 18080 }, { "epoch": 0.19327955553181259, "grad_norm": 14.5172700881958, "learning_rate": 9.99175905632628e-07, "loss": 0.0768, "step": 18090 }, { "epoch": 0.1933863988460922, "grad_norm": 2.9677140712738037, "learning_rate": 9.991749411423684e-07, "loss": 0.1135, "step": 18100 }, { "epoch": 0.1934932421603718, "grad_norm": 7.602723121643066, "learning_rate": 9.991739760885024e-07, "loss": 0.2532, "step": 18110 }, { "epoch": 0.19360008547465143, "grad_norm": 3.8427789211273193, "learning_rate": 9.991730104710312e-07, "loss": 0.0906, "step": 18120 }, { "epoch": 0.19370692878893103, "grad_norm": 2.4952423572540283, "learning_rate": 9.99172044289956e-07, "loss": 0.0714, "step": 18130 }, { "epoch": 0.19381377210321063, "grad_norm": 1.0695301294326782, "learning_rate": 9.991710775452782e-07, "loss": 0.115, "step": 18140 }, { "epoch": 0.19392061541749026, "grad_norm": 12.525152206420898, "learning_rate": 9.991701102369982e-07, "loss": 0.1513, "step": 18150 }, { "epoch": 0.19402745873176985, "grad_norm": 1.9930459260940552, "learning_rate": 9.991691423651178e-07, "loss": 0.0839, "step": 18160 }, { "epoch": 0.19413430204604948, "grad_norm": 8.366788864135742, "learning_rate": 9.991681739296377e-07, "loss": 0.1442, "step": 18170 }, { "epoch": 0.19424114536032908, "grad_norm": 11.524225234985352, "learning_rate": 9.991672049305588e-07, "loss": 0.0916, "step": 18180 }, { "epoch": 0.19434798867460867, "grad_norm": 5.188520431518555, "learning_rate": 9.991662353678826e-07, "loss": 0.0701, "step": 18190 }, { "epoch": 0.1944548319888883, "grad_norm": 0.5288195610046387, "learning_rate": 9.9916526524161e-07, "loss": 0.1117, "step": 18200 }, { "epoch": 0.1945616753031679, "grad_norm": 15.422120094299316, "learning_rate": 9.991642945517422e-07, "loss": 0.193, "step": 18210 }, { "epoch": 0.19466851861744752, "grad_norm": 13.775043487548828, "learning_rate": 9.991633232982803e-07, "loss": 0.1199, "step": 18220 }, { "epoch": 0.19477536193172712, "grad_norm": 10.732864379882812, "learning_rate": 9.991623514812253e-07, "loss": 0.123, "step": 18230 }, { "epoch": 0.19488220524600672, "grad_norm": 9.54747200012207, "learning_rate": 9.991613791005783e-07, "loss": 0.1096, "step": 18240 }, { "epoch": 0.19498904856028634, "grad_norm": 14.368531227111816, "learning_rate": 9.991604061563405e-07, "loss": 0.1283, "step": 18250 }, { "epoch": 0.19509589187456594, "grad_norm": 6.494058132171631, "learning_rate": 9.99159432648513e-07, "loss": 0.0724, "step": 18260 }, { "epoch": 0.19520273518884557, "grad_norm": 4.930190086364746, "learning_rate": 9.991584585770964e-07, "loss": 0.1471, "step": 18270 }, { "epoch": 0.19530957850312516, "grad_norm": 9.709909439086914, "learning_rate": 9.991574839420926e-07, "loss": 0.1401, "step": 18280 }, { "epoch": 0.1954164218174048, "grad_norm": 12.7833251953125, "learning_rate": 9.991565087435023e-07, "loss": 0.0597, "step": 18290 }, { "epoch": 0.1955232651316844, "grad_norm": 9.881338119506836, "learning_rate": 9.991555329813266e-07, "loss": 0.0933, "step": 18300 }, { "epoch": 0.19563010844596398, "grad_norm": 8.037247657775879, "learning_rate": 9.991545566555667e-07, "loss": 0.1144, "step": 18310 }, { "epoch": 0.1957369517602436, "grad_norm": 2.849341869354248, "learning_rate": 9.991535797662234e-07, "loss": 0.1793, "step": 18320 }, { "epoch": 0.1958437950745232, "grad_norm": 7.6960859298706055, "learning_rate": 9.991526023132981e-07, "loss": 0.077, "step": 18330 }, { "epoch": 0.19595063838880283, "grad_norm": 0.20960663259029388, "learning_rate": 9.99151624296792e-07, "loss": 0.0681, "step": 18340 }, { "epoch": 0.19605748170308243, "grad_norm": 4.539361953735352, "learning_rate": 9.99150645716706e-07, "loss": 0.0819, "step": 18350 }, { "epoch": 0.19616432501736203, "grad_norm": 5.407511234283447, "learning_rate": 9.991496665730411e-07, "loss": 0.1279, "step": 18360 }, { "epoch": 0.19627116833164165, "grad_norm": 5.315698146820068, "learning_rate": 9.991486868657989e-07, "loss": 0.1293, "step": 18370 }, { "epoch": 0.19637801164592125, "grad_norm": 9.599943161010742, "learning_rate": 9.991477065949798e-07, "loss": 0.166, "step": 18380 }, { "epoch": 0.19648485496020088, "grad_norm": 2.220705032348633, "learning_rate": 9.991467257605853e-07, "loss": 0.1459, "step": 18390 }, { "epoch": 0.19659169827448048, "grad_norm": 7.151031494140625, "learning_rate": 9.991457443626165e-07, "loss": 0.1089, "step": 18400 }, { "epoch": 0.19669854158876007, "grad_norm": 0.1392924189567566, "learning_rate": 9.991447624010746e-07, "loss": 0.0763, "step": 18410 }, { "epoch": 0.1968053849030397, "grad_norm": 4.619791030883789, "learning_rate": 9.991437798759606e-07, "loss": 0.0683, "step": 18420 }, { "epoch": 0.1969122282173193, "grad_norm": 8.887897491455078, "learning_rate": 9.991427967872753e-07, "loss": 0.0835, "step": 18430 }, { "epoch": 0.19701907153159892, "grad_norm": 2.656264305114746, "learning_rate": 9.991418131350205e-07, "loss": 0.0865, "step": 18440 }, { "epoch": 0.19712591484587852, "grad_norm": 2.4361941814422607, "learning_rate": 9.991408289191967e-07, "loss": 0.1208, "step": 18450 }, { "epoch": 0.19723275816015812, "grad_norm": 24.89484405517578, "learning_rate": 9.991398441398053e-07, "loss": 0.132, "step": 18460 }, { "epoch": 0.19733960147443774, "grad_norm": 1.4637129306793213, "learning_rate": 9.991388587968475e-07, "loss": 0.1188, "step": 18470 }, { "epoch": 0.19744644478871734, "grad_norm": 15.020047187805176, "learning_rate": 9.99137872890324e-07, "loss": 0.113, "step": 18480 }, { "epoch": 0.19755328810299697, "grad_norm": 5.9400224685668945, "learning_rate": 9.991368864202362e-07, "loss": 0.1231, "step": 18490 }, { "epoch": 0.19766013141727656, "grad_norm": 6.697634220123291, "learning_rate": 9.991358993865854e-07, "loss": 0.073, "step": 18500 }, { "epoch": 0.19776697473155616, "grad_norm": 8.964263916015625, "learning_rate": 9.991349117893725e-07, "loss": 0.1431, "step": 18510 }, { "epoch": 0.1978738180458358, "grad_norm": 0.8683481812477112, "learning_rate": 9.991339236285982e-07, "loss": 0.1079, "step": 18520 }, { "epoch": 0.19798066136011538, "grad_norm": 21.548343658447266, "learning_rate": 9.991329349042643e-07, "loss": 0.0674, "step": 18530 }, { "epoch": 0.198087504674395, "grad_norm": 5.3486175537109375, "learning_rate": 9.991319456163717e-07, "loss": 0.0974, "step": 18540 }, { "epoch": 0.1981943479886746, "grad_norm": 6.871163368225098, "learning_rate": 9.991309557649214e-07, "loss": 0.179, "step": 18550 }, { "epoch": 0.1983011913029542, "grad_norm": 20.607688903808594, "learning_rate": 9.991299653499147e-07, "loss": 0.0794, "step": 18560 }, { "epoch": 0.19840803461723383, "grad_norm": 0.3389686942100525, "learning_rate": 9.991289743713525e-07, "loss": 0.1127, "step": 18570 }, { "epoch": 0.19851487793151343, "grad_norm": 9.760563850402832, "learning_rate": 9.99127982829236e-07, "loss": 0.1093, "step": 18580 }, { "epoch": 0.19862172124579305, "grad_norm": 8.002815246582031, "learning_rate": 9.991269907235662e-07, "loss": 0.0939, "step": 18590 }, { "epoch": 0.19872856456007265, "grad_norm": 4.0670905113220215, "learning_rate": 9.991259980543445e-07, "loss": 0.1289, "step": 18600 }, { "epoch": 0.19883540787435225, "grad_norm": 7.321254730224609, "learning_rate": 9.991250048215717e-07, "loss": 0.0741, "step": 18610 }, { "epoch": 0.19894225118863187, "grad_norm": 10.833147048950195, "learning_rate": 9.991240110252495e-07, "loss": 0.1079, "step": 18620 }, { "epoch": 0.19904909450291147, "grad_norm": 14.10369873046875, "learning_rate": 9.991230166653784e-07, "loss": 0.1046, "step": 18630 }, { "epoch": 0.1991559378171911, "grad_norm": 5.070258140563965, "learning_rate": 9.991220217419596e-07, "loss": 0.057, "step": 18640 }, { "epoch": 0.1992627811314707, "grad_norm": 6.643021106719971, "learning_rate": 9.991210262549945e-07, "loss": 0.0636, "step": 18650 }, { "epoch": 0.19936962444575032, "grad_norm": 6.406921863555908, "learning_rate": 9.991200302044843e-07, "loss": 0.1235, "step": 18660 }, { "epoch": 0.19947646776002992, "grad_norm": 0.7114527821540833, "learning_rate": 9.991190335904296e-07, "loss": 0.1383, "step": 18670 }, { "epoch": 0.19958331107430952, "grad_norm": 5.380575656890869, "learning_rate": 9.991180364128319e-07, "loss": 0.051, "step": 18680 }, { "epoch": 0.19969015438858914, "grad_norm": 16.823272705078125, "learning_rate": 9.991170386716923e-07, "loss": 0.091, "step": 18690 }, { "epoch": 0.19979699770286874, "grad_norm": 1.348037600517273, "learning_rate": 9.99116040367012e-07, "loss": 0.1024, "step": 18700 }, { "epoch": 0.19990384101714836, "grad_norm": 6.503073215484619, "learning_rate": 9.991150414987918e-07, "loss": 0.0859, "step": 18710 }, { "epoch": 0.20001068433142796, "grad_norm": 7.048142910003662, "learning_rate": 9.991140420670331e-07, "loss": 0.0581, "step": 18720 }, { "epoch": 0.20011752764570756, "grad_norm": 10.579118728637695, "learning_rate": 9.99113042071737e-07, "loss": 0.0604, "step": 18730 }, { "epoch": 0.20022437095998719, "grad_norm": 4.51597785949707, "learning_rate": 9.991120415129048e-07, "loss": 0.0749, "step": 18740 }, { "epoch": 0.20033121427426678, "grad_norm": 13.099281311035156, "learning_rate": 9.991110403905374e-07, "loss": 0.1139, "step": 18750 }, { "epoch": 0.2004380575885464, "grad_norm": 4.656643867492676, "learning_rate": 9.991100387046358e-07, "loss": 0.1878, "step": 18760 }, { "epoch": 0.200544900902826, "grad_norm": 7.300422191619873, "learning_rate": 9.991090364552013e-07, "loss": 0.0507, "step": 18770 }, { "epoch": 0.2006517442171056, "grad_norm": 4.912667751312256, "learning_rate": 9.991080336422351e-07, "loss": 0.0725, "step": 18780 }, { "epoch": 0.20075858753138523, "grad_norm": 11.83582878112793, "learning_rate": 9.991070302657384e-07, "loss": 0.1317, "step": 18790 }, { "epoch": 0.20086543084566483, "grad_norm": 15.349848747253418, "learning_rate": 9.991060263257118e-07, "loss": 0.1394, "step": 18800 }, { "epoch": 0.20097227415994445, "grad_norm": 10.416425704956055, "learning_rate": 9.99105021822157e-07, "loss": 0.0711, "step": 18810 }, { "epoch": 0.20107911747422405, "grad_norm": 6.265444755554199, "learning_rate": 9.99104016755075e-07, "loss": 0.0667, "step": 18820 }, { "epoch": 0.20118596078850365, "grad_norm": 8.10095500946045, "learning_rate": 9.99103011124467e-07, "loss": 0.0795, "step": 18830 }, { "epoch": 0.20129280410278327, "grad_norm": 5.850371837615967, "learning_rate": 9.99102004930334e-07, "loss": 0.0884, "step": 18840 }, { "epoch": 0.20139964741706287, "grad_norm": 2.8304316997528076, "learning_rate": 9.99100998172677e-07, "loss": 0.0962, "step": 18850 }, { "epoch": 0.2015064907313425, "grad_norm": 11.57104206085205, "learning_rate": 9.990999908514975e-07, "loss": 0.0956, "step": 18860 }, { "epoch": 0.2016133340456221, "grad_norm": 6.750555038452148, "learning_rate": 9.990989829667964e-07, "loss": 0.1692, "step": 18870 }, { "epoch": 0.2017201773599017, "grad_norm": 4.5231170654296875, "learning_rate": 9.99097974518575e-07, "loss": 0.0683, "step": 18880 }, { "epoch": 0.20182702067418132, "grad_norm": 1.3463084697723389, "learning_rate": 9.99096965506834e-07, "loss": 0.0894, "step": 18890 }, { "epoch": 0.20193386398846092, "grad_norm": 5.034124851226807, "learning_rate": 9.99095955931575e-07, "loss": 0.1757, "step": 18900 }, { "epoch": 0.20204070730274054, "grad_norm": 2.14351224899292, "learning_rate": 9.99094945792799e-07, "loss": 0.0924, "step": 18910 }, { "epoch": 0.20214755061702014, "grad_norm": 7.407104969024658, "learning_rate": 9.990939350905072e-07, "loss": 0.1273, "step": 18920 }, { "epoch": 0.20225439393129974, "grad_norm": 23.215293884277344, "learning_rate": 9.990929238247007e-07, "loss": 0.1081, "step": 18930 }, { "epoch": 0.20236123724557936, "grad_norm": 9.418161392211914, "learning_rate": 9.990919119953805e-07, "loss": 0.1254, "step": 18940 }, { "epoch": 0.20246808055985896, "grad_norm": 6.0380048751831055, "learning_rate": 9.99090899602548e-07, "loss": 0.1066, "step": 18950 }, { "epoch": 0.20257492387413858, "grad_norm": 3.1573503017425537, "learning_rate": 9.99089886646204e-07, "loss": 0.0831, "step": 18960 }, { "epoch": 0.20268176718841818, "grad_norm": 5.833226203918457, "learning_rate": 9.9908887312635e-07, "loss": 0.0736, "step": 18970 }, { "epoch": 0.20278861050269778, "grad_norm": 16.909229278564453, "learning_rate": 9.99087859042987e-07, "loss": 0.1559, "step": 18980 }, { "epoch": 0.2028954538169774, "grad_norm": 5.495065212249756, "learning_rate": 9.99086844396116e-07, "loss": 0.0633, "step": 18990 }, { "epoch": 0.203002297131257, "grad_norm": 9.5047025680542, "learning_rate": 9.990858291857385e-07, "loss": 0.1192, "step": 19000 }, { "epoch": 0.20310914044553663, "grad_norm": 2.4669291973114014, "learning_rate": 9.990848134118553e-07, "loss": 0.0646, "step": 19010 }, { "epoch": 0.20321598375981623, "grad_norm": 8.224502563476562, "learning_rate": 9.990837970744676e-07, "loss": 0.071, "step": 19020 }, { "epoch": 0.20332282707409585, "grad_norm": 8.82370376586914, "learning_rate": 9.990827801735768e-07, "loss": 0.1483, "step": 19030 }, { "epoch": 0.20342967038837545, "grad_norm": 8.03547477722168, "learning_rate": 9.990817627091839e-07, "loss": 0.117, "step": 19040 }, { "epoch": 0.20353651370265505, "grad_norm": 5.314922332763672, "learning_rate": 9.9908074468129e-07, "loss": 0.0614, "step": 19050 }, { "epoch": 0.20364335701693467, "grad_norm": 11.326737403869629, "learning_rate": 9.99079726089896e-07, "loss": 0.0632, "step": 19060 }, { "epoch": 0.20375020033121427, "grad_norm": 2.8663368225097656, "learning_rate": 9.990787069350036e-07, "loss": 0.0978, "step": 19070 }, { "epoch": 0.2038570436454939, "grad_norm": 2.558516502380371, "learning_rate": 9.990776872166136e-07, "loss": 0.2054, "step": 19080 }, { "epoch": 0.2039638869597735, "grad_norm": 0.5619614720344543, "learning_rate": 9.990766669347273e-07, "loss": 0.1238, "step": 19090 }, { "epoch": 0.2040707302740531, "grad_norm": 5.280391693115234, "learning_rate": 9.990756460893455e-07, "loss": 0.0985, "step": 19100 }, { "epoch": 0.20417757358833272, "grad_norm": 8.290445327758789, "learning_rate": 9.990746246804698e-07, "loss": 0.0995, "step": 19110 }, { "epoch": 0.20428441690261231, "grad_norm": 8.336491584777832, "learning_rate": 9.990736027081012e-07, "loss": 0.1184, "step": 19120 }, { "epoch": 0.20439126021689194, "grad_norm": 0.44152066111564636, "learning_rate": 9.99072580172241e-07, "loss": 0.0594, "step": 19130 }, { "epoch": 0.20449810353117154, "grad_norm": 5.966788291931152, "learning_rate": 9.9907155707289e-07, "loss": 0.112, "step": 19140 }, { "epoch": 0.20460494684545114, "grad_norm": 6.062358379364014, "learning_rate": 9.990705334100495e-07, "loss": 0.1216, "step": 19150 }, { "epoch": 0.20471179015973076, "grad_norm": 14.452366828918457, "learning_rate": 9.990695091837207e-07, "loss": 0.141, "step": 19160 }, { "epoch": 0.20481863347401036, "grad_norm": 5.40852689743042, "learning_rate": 9.99068484393905e-07, "loss": 0.092, "step": 19170 }, { "epoch": 0.20492547678828998, "grad_norm": 0.7389920353889465, "learning_rate": 9.99067459040603e-07, "loss": 0.1086, "step": 19180 }, { "epoch": 0.20503232010256958, "grad_norm": 7.364739418029785, "learning_rate": 9.990664331238163e-07, "loss": 0.0756, "step": 19190 }, { "epoch": 0.20513916341684918, "grad_norm": 8.817967414855957, "learning_rate": 9.99065406643546e-07, "loss": 0.1015, "step": 19200 }, { "epoch": 0.2052460067311288, "grad_norm": 8.10056209564209, "learning_rate": 9.99064379599793e-07, "loss": 0.1833, "step": 19210 }, { "epoch": 0.2053528500454084, "grad_norm": 5.013944149017334, "learning_rate": 9.99063351992559e-07, "loss": 0.0571, "step": 19220 }, { "epoch": 0.20545969335968803, "grad_norm": 9.728681564331055, "learning_rate": 9.990623238218444e-07, "loss": 0.0995, "step": 19230 }, { "epoch": 0.20556653667396763, "grad_norm": 2.7292983531951904, "learning_rate": 9.990612950876512e-07, "loss": 0.047, "step": 19240 }, { "epoch": 0.20567337998824722, "grad_norm": 11.754239082336426, "learning_rate": 9.990602657899798e-07, "loss": 0.1229, "step": 19250 }, { "epoch": 0.20578022330252685, "grad_norm": 7.857410907745361, "learning_rate": 9.99059235928832e-07, "loss": 0.1421, "step": 19260 }, { "epoch": 0.20588706661680645, "grad_norm": 2.185675859451294, "learning_rate": 9.990582055042082e-07, "loss": 0.0991, "step": 19270 }, { "epoch": 0.20599390993108607, "grad_norm": 0.2753666937351227, "learning_rate": 9.990571745161103e-07, "loss": 0.0732, "step": 19280 }, { "epoch": 0.20610075324536567, "grad_norm": 3.7319021224975586, "learning_rate": 9.990561429645393e-07, "loss": 0.0434, "step": 19290 }, { "epoch": 0.20620759655964527, "grad_norm": 11.691444396972656, "learning_rate": 9.99055110849496e-07, "loss": 0.1135, "step": 19300 }, { "epoch": 0.2063144398739249, "grad_norm": 9.28930377960205, "learning_rate": 9.99054078170982e-07, "loss": 0.1096, "step": 19310 }, { "epoch": 0.2064212831882045, "grad_norm": 4.890061378479004, "learning_rate": 9.99053044928998e-07, "loss": 0.1096, "step": 19320 }, { "epoch": 0.20652812650248412, "grad_norm": 16.322458267211914, "learning_rate": 9.990520111235457e-07, "loss": 0.0628, "step": 19330 }, { "epoch": 0.20663496981676371, "grad_norm": 3.2031731605529785, "learning_rate": 9.99050976754626e-07, "loss": 0.1402, "step": 19340 }, { "epoch": 0.2067418131310433, "grad_norm": 1.0979528427124023, "learning_rate": 9.9904994182224e-07, "loss": 0.069, "step": 19350 }, { "epoch": 0.20684865644532294, "grad_norm": 1.0334197282791138, "learning_rate": 9.990489063263892e-07, "loss": 0.0795, "step": 19360 }, { "epoch": 0.20695549975960253, "grad_norm": 11.722919464111328, "learning_rate": 9.990478702670742e-07, "loss": 0.1094, "step": 19370 }, { "epoch": 0.20706234307388216, "grad_norm": 8.791352272033691, "learning_rate": 9.990468336442965e-07, "loss": 0.0747, "step": 19380 }, { "epoch": 0.20716918638816176, "grad_norm": 12.015801429748535, "learning_rate": 9.990457964580574e-07, "loss": 0.1391, "step": 19390 }, { "epoch": 0.20727602970244138, "grad_norm": 15.856680870056152, "learning_rate": 9.99044758708358e-07, "loss": 0.1164, "step": 19400 }, { "epoch": 0.20738287301672098, "grad_norm": 4.841763973236084, "learning_rate": 9.990437203951992e-07, "loss": 0.0764, "step": 19410 }, { "epoch": 0.20748971633100058, "grad_norm": 3.6984968185424805, "learning_rate": 9.990426815185826e-07, "loss": 0.0908, "step": 19420 }, { "epoch": 0.2075965596452802, "grad_norm": 11.85938835144043, "learning_rate": 9.99041642078509e-07, "loss": 0.0946, "step": 19430 }, { "epoch": 0.2077034029595598, "grad_norm": 5.389333248138428, "learning_rate": 9.990406020749798e-07, "loss": 0.1188, "step": 19440 }, { "epoch": 0.20781024627383943, "grad_norm": 1.8142845630645752, "learning_rate": 9.990395615079959e-07, "loss": 0.0346, "step": 19450 }, { "epoch": 0.20791708958811903, "grad_norm": 4.876092433929443, "learning_rate": 9.990385203775589e-07, "loss": 0.1173, "step": 19460 }, { "epoch": 0.20802393290239862, "grad_norm": 3.0302629470825195, "learning_rate": 9.990374786836694e-07, "loss": 0.1368, "step": 19470 }, { "epoch": 0.20813077621667825, "grad_norm": 25.78553009033203, "learning_rate": 9.990364364263294e-07, "loss": 0.1928, "step": 19480 }, { "epoch": 0.20823761953095785, "grad_norm": 0.6525272727012634, "learning_rate": 9.990353936055392e-07, "loss": 0.1158, "step": 19490 }, { "epoch": 0.20834446284523747, "grad_norm": 2.5702931880950928, "learning_rate": 9.990343502213007e-07, "loss": 0.1255, "step": 19500 }, { "epoch": 0.20845130615951707, "grad_norm": 6.754818439483643, "learning_rate": 9.990333062736145e-07, "loss": 0.0738, "step": 19510 }, { "epoch": 0.20855814947379667, "grad_norm": 4.058218002319336, "learning_rate": 9.990322617624822e-07, "loss": 0.0679, "step": 19520 }, { "epoch": 0.2086649927880763, "grad_norm": 2.2689409255981445, "learning_rate": 9.990312166879047e-07, "loss": 0.0839, "step": 19530 }, { "epoch": 0.2087718361023559, "grad_norm": 6.301631450653076, "learning_rate": 9.990301710498834e-07, "loss": 0.1297, "step": 19540 }, { "epoch": 0.20887867941663552, "grad_norm": 6.797725677490234, "learning_rate": 9.990291248484192e-07, "loss": 0.0755, "step": 19550 }, { "epoch": 0.2089855227309151, "grad_norm": 14.201148986816406, "learning_rate": 9.990280780835137e-07, "loss": 0.0572, "step": 19560 }, { "epoch": 0.2090923660451947, "grad_norm": 10.343079566955566, "learning_rate": 9.990270307551679e-07, "loss": 0.0962, "step": 19570 }, { "epoch": 0.20919920935947434, "grad_norm": 12.617980003356934, "learning_rate": 9.990259828633825e-07, "loss": 0.115, "step": 19580 }, { "epoch": 0.20930605267375393, "grad_norm": 8.302742958068848, "learning_rate": 9.990249344081595e-07, "loss": 0.1033, "step": 19590 }, { "epoch": 0.20941289598803356, "grad_norm": 3.7587532997131348, "learning_rate": 9.990238853894994e-07, "loss": 0.0472, "step": 19600 }, { "epoch": 0.20951973930231316, "grad_norm": 1.1378746032714844, "learning_rate": 9.990228358074037e-07, "loss": 0.1239, "step": 19610 }, { "epoch": 0.20962658261659275, "grad_norm": 3.3875253200531006, "learning_rate": 9.990217856618737e-07, "loss": 0.0484, "step": 19620 }, { "epoch": 0.20973342593087238, "grad_norm": 4.808964729309082, "learning_rate": 9.990207349529105e-07, "loss": 0.0793, "step": 19630 }, { "epoch": 0.20984026924515198, "grad_norm": 8.652388572692871, "learning_rate": 9.990196836805152e-07, "loss": 0.0579, "step": 19640 }, { "epoch": 0.2099471125594316, "grad_norm": 5.2869791984558105, "learning_rate": 9.990186318446888e-07, "loss": 0.1195, "step": 19650 }, { "epoch": 0.2100539558737112, "grad_norm": 17.710697174072266, "learning_rate": 9.990175794454328e-07, "loss": 0.1784, "step": 19660 }, { "epoch": 0.2101607991879908, "grad_norm": 2.7319819927215576, "learning_rate": 9.990165264827483e-07, "loss": 0.074, "step": 19670 }, { "epoch": 0.21026764250227042, "grad_norm": 7.868301868438721, "learning_rate": 9.990154729566364e-07, "loss": 0.1118, "step": 19680 }, { "epoch": 0.21037448581655002, "grad_norm": 8.869053840637207, "learning_rate": 9.990144188670986e-07, "loss": 0.1186, "step": 19690 }, { "epoch": 0.21048132913082965, "grad_norm": 4.703739643096924, "learning_rate": 9.990133642141357e-07, "loss": 0.1129, "step": 19700 }, { "epoch": 0.21058817244510925, "grad_norm": 7.760389804840088, "learning_rate": 9.990123089977492e-07, "loss": 0.0668, "step": 19710 }, { "epoch": 0.21069501575938884, "grad_norm": 7.2590508460998535, "learning_rate": 9.9901125321794e-07, "loss": 0.0939, "step": 19720 }, { "epoch": 0.21080185907366847, "grad_norm": 4.284564018249512, "learning_rate": 9.990101968747093e-07, "loss": 0.1741, "step": 19730 }, { "epoch": 0.21090870238794807, "grad_norm": 0.167665034532547, "learning_rate": 9.990091399680586e-07, "loss": 0.057, "step": 19740 }, { "epoch": 0.2110155457022277, "grad_norm": 7.901883602142334, "learning_rate": 9.990080824979889e-07, "loss": 0.1217, "step": 19750 }, { "epoch": 0.2111223890165073, "grad_norm": 2.646538496017456, "learning_rate": 9.990070244645014e-07, "loss": 0.1195, "step": 19760 }, { "epoch": 0.21122923233078691, "grad_norm": 17.658592224121094, "learning_rate": 9.990059658675972e-07, "loss": 0.1329, "step": 19770 }, { "epoch": 0.2113360756450665, "grad_norm": 2.297734498977661, "learning_rate": 9.99004906707278e-07, "loss": 0.0832, "step": 19780 }, { "epoch": 0.2114429189593461, "grad_norm": 7.221688270568848, "learning_rate": 9.990038469835442e-07, "loss": 0.1078, "step": 19790 }, { "epoch": 0.21154976227362574, "grad_norm": 13.893406867980957, "learning_rate": 9.990027866963975e-07, "loss": 0.0237, "step": 19800 }, { "epoch": 0.21165660558790533, "grad_norm": 12.324637413024902, "learning_rate": 9.99001725845839e-07, "loss": 0.0783, "step": 19810 }, { "epoch": 0.21176344890218496, "grad_norm": 15.215293884277344, "learning_rate": 9.990006644318699e-07, "loss": 0.0807, "step": 19820 }, { "epoch": 0.21187029221646456, "grad_norm": 11.778332710266113, "learning_rate": 9.989996024544913e-07, "loss": 0.0537, "step": 19830 }, { "epoch": 0.21197713553074415, "grad_norm": 6.836451530456543, "learning_rate": 9.989985399137046e-07, "loss": 0.0921, "step": 19840 }, { "epoch": 0.21208397884502378, "grad_norm": 7.626835823059082, "learning_rate": 9.98997476809511e-07, "loss": 0.126, "step": 19850 }, { "epoch": 0.21219082215930338, "grad_norm": 12.246728897094727, "learning_rate": 9.989964131419114e-07, "loss": 0.1305, "step": 19860 }, { "epoch": 0.212297665473583, "grad_norm": 3.8732502460479736, "learning_rate": 9.989953489109072e-07, "loss": 0.1781, "step": 19870 }, { "epoch": 0.2124045087878626, "grad_norm": 13.904925346374512, "learning_rate": 9.989942841164997e-07, "loss": 0.1173, "step": 19880 }, { "epoch": 0.2125113521021422, "grad_norm": 6.293644428253174, "learning_rate": 9.989932187586898e-07, "loss": 0.0875, "step": 19890 }, { "epoch": 0.21261819541642182, "grad_norm": 8.831871032714844, "learning_rate": 9.98992152837479e-07, "loss": 0.0749, "step": 19900 }, { "epoch": 0.21272503873070142, "grad_norm": 4.466905117034912, "learning_rate": 9.989910863528686e-07, "loss": 0.1053, "step": 19910 }, { "epoch": 0.21283188204498105, "grad_norm": 4.982603549957275, "learning_rate": 9.989900193048592e-07, "loss": 0.0639, "step": 19920 }, { "epoch": 0.21293872535926064, "grad_norm": 2.462975263595581, "learning_rate": 9.989889516934527e-07, "loss": 0.0791, "step": 19930 }, { "epoch": 0.21304556867354024, "grad_norm": 7.308987140655518, "learning_rate": 9.9898788351865e-07, "loss": 0.1822, "step": 19940 }, { "epoch": 0.21315241198781987, "grad_norm": 0.5920897126197815, "learning_rate": 9.989868147804523e-07, "loss": 0.0697, "step": 19950 }, { "epoch": 0.21325925530209947, "grad_norm": 14.182689666748047, "learning_rate": 9.989857454788607e-07, "loss": 0.1156, "step": 19960 }, { "epoch": 0.2133660986163791, "grad_norm": 8.177668571472168, "learning_rate": 9.989846756138766e-07, "loss": 0.1357, "step": 19970 }, { "epoch": 0.2134729419306587, "grad_norm": 11.206467628479004, "learning_rate": 9.98983605185501e-07, "loss": 0.0932, "step": 19980 }, { "epoch": 0.21357978524493829, "grad_norm": 3.3632397651672363, "learning_rate": 9.989825341937355e-07, "loss": 0.1421, "step": 19990 }, { "epoch": 0.2136866285592179, "grad_norm": 4.429119110107422, "learning_rate": 9.98981462638581e-07, "loss": 0.0589, "step": 20000 }, { "epoch": 0.2137934718734975, "grad_norm": 5.086691856384277, "learning_rate": 9.989803905200387e-07, "loss": 0.0434, "step": 20010 }, { "epoch": 0.21390031518777713, "grad_norm": 2.889127492904663, "learning_rate": 9.9897931783811e-07, "loss": 0.0716, "step": 20020 }, { "epoch": 0.21400715850205673, "grad_norm": 1.5025084018707275, "learning_rate": 9.98978244592796e-07, "loss": 0.0694, "step": 20030 }, { "epoch": 0.21411400181633633, "grad_norm": 0.6142967343330383, "learning_rate": 9.989771707840976e-07, "loss": 0.143, "step": 20040 }, { "epoch": 0.21422084513061596, "grad_norm": 11.15526008605957, "learning_rate": 9.989760964120167e-07, "loss": 0.1985, "step": 20050 }, { "epoch": 0.21432768844489555, "grad_norm": 23.57408332824707, "learning_rate": 9.989750214765539e-07, "loss": 0.0979, "step": 20060 }, { "epoch": 0.21443453175917518, "grad_norm": 0.7317531108856201, "learning_rate": 9.989739459777108e-07, "loss": 0.1647, "step": 20070 }, { "epoch": 0.21454137507345478, "grad_norm": 0.9597266316413879, "learning_rate": 9.989728699154883e-07, "loss": 0.0491, "step": 20080 }, { "epoch": 0.21464821838773437, "grad_norm": 5.453729629516602, "learning_rate": 9.989717932898877e-07, "loss": 0.0356, "step": 20090 }, { "epoch": 0.214755061702014, "grad_norm": 0.6340095400810242, "learning_rate": 9.989707161009104e-07, "loss": 0.1251, "step": 20100 }, { "epoch": 0.2148619050162936, "grad_norm": 8.518510818481445, "learning_rate": 9.989696383485575e-07, "loss": 0.0969, "step": 20110 }, { "epoch": 0.21496874833057322, "grad_norm": 7.046810150146484, "learning_rate": 9.989685600328301e-07, "loss": 0.0771, "step": 20120 }, { "epoch": 0.21507559164485282, "grad_norm": 6.4214887619018555, "learning_rate": 9.989674811537297e-07, "loss": 0.0806, "step": 20130 }, { "epoch": 0.21518243495913245, "grad_norm": 5.364199638366699, "learning_rate": 9.989664017112572e-07, "loss": 0.0743, "step": 20140 }, { "epoch": 0.21528927827341204, "grad_norm": 16.05266571044922, "learning_rate": 9.989653217054143e-07, "loss": 0.0894, "step": 20150 }, { "epoch": 0.21539612158769164, "grad_norm": 15.425728797912598, "learning_rate": 9.989642411362015e-07, "loss": 0.1017, "step": 20160 }, { "epoch": 0.21550296490197127, "grad_norm": 5.073915958404541, "learning_rate": 9.989631600036207e-07, "loss": 0.1462, "step": 20170 }, { "epoch": 0.21560980821625086, "grad_norm": 7.500725746154785, "learning_rate": 9.989620783076728e-07, "loss": 0.0629, "step": 20180 }, { "epoch": 0.2157166515305305, "grad_norm": 0.24729548394680023, "learning_rate": 9.989609960483589e-07, "loss": 0.0738, "step": 20190 }, { "epoch": 0.2158234948448101, "grad_norm": 0.3556077778339386, "learning_rate": 9.989599132256805e-07, "loss": 0.0408, "step": 20200 }, { "epoch": 0.21593033815908969, "grad_norm": 12.122138977050781, "learning_rate": 9.989588298396387e-07, "loss": 0.1344, "step": 20210 }, { "epoch": 0.2160371814733693, "grad_norm": 2.982710838317871, "learning_rate": 9.989577458902348e-07, "loss": 0.047, "step": 20220 }, { "epoch": 0.2161440247876489, "grad_norm": 4.991745948791504, "learning_rate": 9.989566613774698e-07, "loss": 0.0917, "step": 20230 }, { "epoch": 0.21625086810192853, "grad_norm": 2.096879005432129, "learning_rate": 9.98955576301345e-07, "loss": 0.0654, "step": 20240 }, { "epoch": 0.21635771141620813, "grad_norm": 14.726085662841797, "learning_rate": 9.989544906618618e-07, "loss": 0.156, "step": 20250 }, { "epoch": 0.21646455473048773, "grad_norm": 1.4150030612945557, "learning_rate": 9.989534044590214e-07, "loss": 0.1467, "step": 20260 }, { "epoch": 0.21657139804476735, "grad_norm": 11.4904146194458, "learning_rate": 9.989523176928247e-07, "loss": 0.0906, "step": 20270 }, { "epoch": 0.21667824135904695, "grad_norm": 9.155594825744629, "learning_rate": 9.989512303632734e-07, "loss": 0.0534, "step": 20280 }, { "epoch": 0.21678508467332658, "grad_norm": 3.188910722732544, "learning_rate": 9.989501424703685e-07, "loss": 0.1293, "step": 20290 }, { "epoch": 0.21689192798760618, "grad_norm": 1.9413813352584839, "learning_rate": 9.989490540141113e-07, "loss": 0.0431, "step": 20300 }, { "epoch": 0.21699877130188577, "grad_norm": 4.456952095031738, "learning_rate": 9.989479649945027e-07, "loss": 0.1134, "step": 20310 }, { "epoch": 0.2171056146161654, "grad_norm": 4.409600257873535, "learning_rate": 9.989468754115443e-07, "loss": 0.1145, "step": 20320 }, { "epoch": 0.217212457930445, "grad_norm": 27.535934448242188, "learning_rate": 9.989457852652372e-07, "loss": 0.1459, "step": 20330 }, { "epoch": 0.21731930124472462, "grad_norm": 7.630462646484375, "learning_rate": 9.989446945555829e-07, "loss": 0.09, "step": 20340 }, { "epoch": 0.21742614455900422, "grad_norm": 5.015145778656006, "learning_rate": 9.98943603282582e-07, "loss": 0.1012, "step": 20350 }, { "epoch": 0.21753298787328382, "grad_norm": 3.41326904296875, "learning_rate": 9.989425114462365e-07, "loss": 0.0547, "step": 20360 }, { "epoch": 0.21763983118756344, "grad_norm": 6.73268985748291, "learning_rate": 9.989414190465469e-07, "loss": 0.1132, "step": 20370 }, { "epoch": 0.21774667450184304, "grad_norm": 2.928614616394043, "learning_rate": 9.98940326083515e-07, "loss": 0.0882, "step": 20380 }, { "epoch": 0.21785351781612267, "grad_norm": 1.9429291486740112, "learning_rate": 9.989392325571417e-07, "loss": 0.0214, "step": 20390 }, { "epoch": 0.21796036113040226, "grad_norm": 6.556309700012207, "learning_rate": 9.989381384674283e-07, "loss": 0.1018, "step": 20400 }, { "epoch": 0.21806720444468186, "grad_norm": 6.63754940032959, "learning_rate": 9.989370438143763e-07, "loss": 0.057, "step": 20410 }, { "epoch": 0.2181740477589615, "grad_norm": 5.804018974304199, "learning_rate": 9.989359485979867e-07, "loss": 0.0445, "step": 20420 }, { "epoch": 0.21828089107324108, "grad_norm": 5.365806579589844, "learning_rate": 9.989348528182606e-07, "loss": 0.0719, "step": 20430 }, { "epoch": 0.2183877343875207, "grad_norm": 2.6097195148468018, "learning_rate": 9.989337564751994e-07, "loss": 0.0702, "step": 20440 }, { "epoch": 0.2184945777018003, "grad_norm": 12.390637397766113, "learning_rate": 9.989326595688043e-07, "loss": 0.1028, "step": 20450 }, { "epoch": 0.2186014210160799, "grad_norm": 1.5611488819122314, "learning_rate": 9.989315620990766e-07, "loss": 0.11, "step": 20460 }, { "epoch": 0.21870826433035953, "grad_norm": 8.241961479187012, "learning_rate": 9.989304640660176e-07, "loss": 0.1263, "step": 20470 }, { "epoch": 0.21881510764463913, "grad_norm": 9.988886833190918, "learning_rate": 9.989293654696285e-07, "loss": 0.0676, "step": 20480 }, { "epoch": 0.21892195095891875, "grad_norm": 9.126877784729004, "learning_rate": 9.989282663099104e-07, "loss": 0.1267, "step": 20490 }, { "epoch": 0.21902879427319835, "grad_norm": 7.107575416564941, "learning_rate": 9.989271665868646e-07, "loss": 0.0826, "step": 20500 }, { "epoch": 0.21913563758747798, "grad_norm": 10.338302612304688, "learning_rate": 9.989260663004923e-07, "loss": 0.0994, "step": 20510 }, { "epoch": 0.21924248090175757, "grad_norm": 9.988638877868652, "learning_rate": 9.98924965450795e-07, "loss": 0.124, "step": 20520 }, { "epoch": 0.21934932421603717, "grad_norm": 1.238213062286377, "learning_rate": 9.989238640377737e-07, "loss": 0.1715, "step": 20530 }, { "epoch": 0.2194561675303168, "grad_norm": 8.24879264831543, "learning_rate": 9.989227620614299e-07, "loss": 0.1128, "step": 20540 }, { "epoch": 0.2195630108445964, "grad_norm": 9.010254859924316, "learning_rate": 9.989216595217642e-07, "loss": 0.1547, "step": 20550 }, { "epoch": 0.21966985415887602, "grad_norm": 6.7068562507629395, "learning_rate": 9.989205564187785e-07, "loss": 0.0722, "step": 20560 }, { "epoch": 0.21977669747315562, "grad_norm": 5.557854652404785, "learning_rate": 9.98919452752474e-07, "loss": 0.0938, "step": 20570 }, { "epoch": 0.21988354078743522, "grad_norm": 32.037113189697266, "learning_rate": 9.989183485228516e-07, "loss": 0.0926, "step": 20580 }, { "epoch": 0.21999038410171484, "grad_norm": 11.054306983947754, "learning_rate": 9.989172437299127e-07, "loss": 0.0993, "step": 20590 }, { "epoch": 0.22009722741599444, "grad_norm": 14.895569801330566, "learning_rate": 9.989161383736588e-07, "loss": 0.1088, "step": 20600 }, { "epoch": 0.22020407073027407, "grad_norm": 14.452960968017578, "learning_rate": 9.989150324540907e-07, "loss": 0.0998, "step": 20610 }, { "epoch": 0.22031091404455366, "grad_norm": 1.6576937437057495, "learning_rate": 9.9891392597121e-07, "loss": 0.0524, "step": 20620 }, { "epoch": 0.22041775735883326, "grad_norm": 10.24935531616211, "learning_rate": 9.989128189250178e-07, "loss": 0.0768, "step": 20630 }, { "epoch": 0.22052460067311289, "grad_norm": 4.971003532409668, "learning_rate": 9.989117113155152e-07, "loss": 0.046, "step": 20640 }, { "epoch": 0.22063144398739248, "grad_norm": 13.210367202758789, "learning_rate": 9.98910603142704e-07, "loss": 0.1045, "step": 20650 }, { "epoch": 0.2207382873016721, "grad_norm": 5.61377477645874, "learning_rate": 9.989094944065847e-07, "loss": 0.0646, "step": 20660 }, { "epoch": 0.2208451306159517, "grad_norm": 8.982158660888672, "learning_rate": 9.98908385107159e-07, "loss": 0.1333, "step": 20670 }, { "epoch": 0.2209519739302313, "grad_norm": 10.104991912841797, "learning_rate": 9.989072752444282e-07, "loss": 0.0626, "step": 20680 }, { "epoch": 0.22105881724451093, "grad_norm": 8.614872932434082, "learning_rate": 9.989061648183935e-07, "loss": 0.121, "step": 20690 }, { "epoch": 0.22116566055879053, "grad_norm": 5.025222301483154, "learning_rate": 9.989050538290558e-07, "loss": 0.061, "step": 20700 }, { "epoch": 0.22127250387307015, "grad_norm": 4.788694858551025, "learning_rate": 9.98903942276417e-07, "loss": 0.112, "step": 20710 }, { "epoch": 0.22137934718734975, "grad_norm": 0.7462779879570007, "learning_rate": 9.989028301604776e-07, "loss": 0.0747, "step": 20720 }, { "epoch": 0.22148619050162935, "grad_norm": 4.050173282623291, "learning_rate": 9.989017174812395e-07, "loss": 0.1191, "step": 20730 }, { "epoch": 0.22159303381590897, "grad_norm": 4.190202713012695, "learning_rate": 9.989006042387036e-07, "loss": 0.066, "step": 20740 }, { "epoch": 0.22169987713018857, "grad_norm": 9.9595308303833, "learning_rate": 9.988994904328715e-07, "loss": 0.0709, "step": 20750 }, { "epoch": 0.2218067204444682, "grad_norm": 7.290085792541504, "learning_rate": 9.98898376063744e-07, "loss": 0.1594, "step": 20760 }, { "epoch": 0.2219135637587478, "grad_norm": 10.25640869140625, "learning_rate": 9.988972611313225e-07, "loss": 0.1116, "step": 20770 }, { "epoch": 0.2220204070730274, "grad_norm": 3.247654914855957, "learning_rate": 9.988961456356086e-07, "loss": 0.0933, "step": 20780 }, { "epoch": 0.22212725038730702, "grad_norm": 7.358919143676758, "learning_rate": 9.98895029576603e-07, "loss": 0.1311, "step": 20790 }, { "epoch": 0.22223409370158662, "grad_norm": 26.963884353637695, "learning_rate": 9.988939129543076e-07, "loss": 0.1199, "step": 20800 }, { "epoch": 0.22234093701586624, "grad_norm": 7.633873462677002, "learning_rate": 9.98892795768723e-07, "loss": 0.0917, "step": 20810 }, { "epoch": 0.22244778033014584, "grad_norm": 8.03652286529541, "learning_rate": 9.98891678019851e-07, "loss": 0.164, "step": 20820 }, { "epoch": 0.22255462364442544, "grad_norm": 4.491298198699951, "learning_rate": 9.988905597076925e-07, "loss": 0.0491, "step": 20830 }, { "epoch": 0.22266146695870506, "grad_norm": 15.693954467773438, "learning_rate": 9.98889440832249e-07, "loss": 0.1238, "step": 20840 }, { "epoch": 0.22276831027298466, "grad_norm": 1.605010747909546, "learning_rate": 9.988883213935217e-07, "loss": 0.1094, "step": 20850 }, { "epoch": 0.22287515358726429, "grad_norm": 10.28848934173584, "learning_rate": 9.988872013915117e-07, "loss": 0.0806, "step": 20860 }, { "epoch": 0.22298199690154388, "grad_norm": 13.05347728729248, "learning_rate": 9.988860808262205e-07, "loss": 0.13, "step": 20870 }, { "epoch": 0.2230888402158235, "grad_norm": 11.406903266906738, "learning_rate": 9.988849596976492e-07, "loss": 0.0931, "step": 20880 }, { "epoch": 0.2231956835301031, "grad_norm": 6.1580915451049805, "learning_rate": 9.988838380057991e-07, "loss": 0.1553, "step": 20890 }, { "epoch": 0.2233025268443827, "grad_norm": 0.6314306855201721, "learning_rate": 9.988827157506717e-07, "loss": 0.1367, "step": 20900 }, { "epoch": 0.22340937015866233, "grad_norm": 18.98554229736328, "learning_rate": 9.988815929322678e-07, "loss": 0.136, "step": 20910 }, { "epoch": 0.22351621347294193, "grad_norm": 5.6849188804626465, "learning_rate": 9.98880469550589e-07, "loss": 0.195, "step": 20920 }, { "epoch": 0.22362305678722155, "grad_norm": 7.301675796508789, "learning_rate": 9.988793456056366e-07, "loss": 0.1323, "step": 20930 }, { "epoch": 0.22372990010150115, "grad_norm": 7.669824600219727, "learning_rate": 9.988782210974115e-07, "loss": 0.1028, "step": 20940 }, { "epoch": 0.22383674341578075, "grad_norm": 4.996910095214844, "learning_rate": 9.988770960259156e-07, "loss": 0.0352, "step": 20950 }, { "epoch": 0.22394358673006037, "grad_norm": 10.170954704284668, "learning_rate": 9.988759703911496e-07, "loss": 0.0576, "step": 20960 }, { "epoch": 0.22405043004433997, "grad_norm": 5.927069187164307, "learning_rate": 9.988748441931151e-07, "loss": 0.1028, "step": 20970 }, { "epoch": 0.2241572733586196, "grad_norm": 7.163818359375, "learning_rate": 9.98873717431813e-07, "loss": 0.0904, "step": 20980 }, { "epoch": 0.2242641166728992, "grad_norm": 5.403792858123779, "learning_rate": 9.988725901072451e-07, "loss": 0.1107, "step": 20990 }, { "epoch": 0.2243709599871788, "grad_norm": 5.243914604187012, "learning_rate": 9.988714622194124e-07, "loss": 0.1036, "step": 21000 }, { "epoch": 0.22447780330145842, "grad_norm": 11.545454978942871, "learning_rate": 9.98870333768316e-07, "loss": 0.1185, "step": 21010 }, { "epoch": 0.22458464661573802, "grad_norm": 2.326408624649048, "learning_rate": 9.988692047539573e-07, "loss": 0.0307, "step": 21020 }, { "epoch": 0.22469148993001764, "grad_norm": 16.370962142944336, "learning_rate": 9.988680751763379e-07, "loss": 0.1159, "step": 21030 }, { "epoch": 0.22479833324429724, "grad_norm": 5.1230902671813965, "learning_rate": 9.988669450354586e-07, "loss": 0.0528, "step": 21040 }, { "epoch": 0.22490517655857684, "grad_norm": 2.394813060760498, "learning_rate": 9.988658143313207e-07, "loss": 0.0915, "step": 21050 }, { "epoch": 0.22501201987285646, "grad_norm": 4.368041038513184, "learning_rate": 9.98864683063926e-07, "loss": 0.1414, "step": 21060 }, { "epoch": 0.22511886318713606, "grad_norm": 5.588547229766846, "learning_rate": 9.98863551233275e-07, "loss": 0.1539, "step": 21070 }, { "epoch": 0.22522570650141568, "grad_norm": 16.462839126586914, "learning_rate": 9.988624188393698e-07, "loss": 0.1057, "step": 21080 }, { "epoch": 0.22533254981569528, "grad_norm": 2.8127236366271973, "learning_rate": 9.988612858822112e-07, "loss": 0.131, "step": 21090 }, { "epoch": 0.22543939312997488, "grad_norm": 0.7956454157829285, "learning_rate": 9.988601523618003e-07, "loss": 0.1201, "step": 21100 }, { "epoch": 0.2255462364442545, "grad_norm": 21.743003845214844, "learning_rate": 9.988590182781388e-07, "loss": 0.1939, "step": 21110 }, { "epoch": 0.2256530797585341, "grad_norm": 4.080498695373535, "learning_rate": 9.988578836312279e-07, "loss": 0.094, "step": 21120 }, { "epoch": 0.22575992307281373, "grad_norm": 10.772379875183105, "learning_rate": 9.988567484210689e-07, "loss": 0.102, "step": 21130 }, { "epoch": 0.22586676638709333, "grad_norm": 3.2052905559539795, "learning_rate": 9.988556126476626e-07, "loss": 0.0886, "step": 21140 }, { "epoch": 0.22597360970137292, "grad_norm": 4.824997425079346, "learning_rate": 9.98854476311011e-07, "loss": 0.1145, "step": 21150 }, { "epoch": 0.22608045301565255, "grad_norm": 5.103947639465332, "learning_rate": 9.98853339411115e-07, "loss": 0.0842, "step": 21160 }, { "epoch": 0.22618729632993215, "grad_norm": 8.179486274719238, "learning_rate": 9.988522019479758e-07, "loss": 0.1024, "step": 21170 }, { "epoch": 0.22629413964421177, "grad_norm": 7.015074729919434, "learning_rate": 9.988510639215948e-07, "loss": 0.1417, "step": 21180 }, { "epoch": 0.22640098295849137, "grad_norm": 6.638325214385986, "learning_rate": 9.988499253319734e-07, "loss": 0.0663, "step": 21190 }, { "epoch": 0.22650782627277097, "grad_norm": 7.262575626373291, "learning_rate": 9.988487861791128e-07, "loss": 0.0744, "step": 21200 }, { "epoch": 0.2266146695870506, "grad_norm": 0.8510277271270752, "learning_rate": 9.988476464630142e-07, "loss": 0.0949, "step": 21210 }, { "epoch": 0.2267215129013302, "grad_norm": 1.7812234163284302, "learning_rate": 9.988465061836789e-07, "loss": 0.1031, "step": 21220 }, { "epoch": 0.22682835621560982, "grad_norm": 5.851855754852295, "learning_rate": 9.988453653411082e-07, "loss": 0.0628, "step": 21230 }, { "epoch": 0.22693519952988941, "grad_norm": 9.748872756958008, "learning_rate": 9.988442239353036e-07, "loss": 0.0499, "step": 21240 }, { "epoch": 0.22704204284416904, "grad_norm": 4.417013645172119, "learning_rate": 9.988430819662664e-07, "loss": 0.0614, "step": 21250 }, { "epoch": 0.22714888615844864, "grad_norm": 4.3640007972717285, "learning_rate": 9.988419394339973e-07, "loss": 0.1204, "step": 21260 }, { "epoch": 0.22725572947272824, "grad_norm": 7.345926761627197, "learning_rate": 9.988407963384981e-07, "loss": 0.1272, "step": 21270 }, { "epoch": 0.22736257278700786, "grad_norm": 16.468826293945312, "learning_rate": 9.988396526797701e-07, "loss": 0.0889, "step": 21280 }, { "epoch": 0.22746941610128746, "grad_norm": 6.236489772796631, "learning_rate": 9.988385084578145e-07, "loss": 0.1637, "step": 21290 }, { "epoch": 0.22757625941556708, "grad_norm": 12.230937957763672, "learning_rate": 9.988373636726327e-07, "loss": 0.0384, "step": 21300 }, { "epoch": 0.22768310272984668, "grad_norm": 5.752098083496094, "learning_rate": 9.988362183242255e-07, "loss": 0.121, "step": 21310 }, { "epoch": 0.22778994604412628, "grad_norm": 8.703009605407715, "learning_rate": 9.988350724125949e-07, "loss": 0.132, "step": 21320 }, { "epoch": 0.2278967893584059, "grad_norm": 2.8564984798431396, "learning_rate": 9.988339259377417e-07, "loss": 0.0678, "step": 21330 }, { "epoch": 0.2280036326726855, "grad_norm": 7.6344499588012695, "learning_rate": 9.988327788996673e-07, "loss": 0.17, "step": 21340 }, { "epoch": 0.22811047598696513, "grad_norm": 9.108752250671387, "learning_rate": 9.98831631298373e-07, "loss": 0.1185, "step": 21350 }, { "epoch": 0.22821731930124473, "grad_norm": 6.483731746673584, "learning_rate": 9.988304831338604e-07, "loss": 0.0788, "step": 21360 }, { "epoch": 0.22832416261552432, "grad_norm": 8.229147911071777, "learning_rate": 9.988293344061305e-07, "loss": 0.05, "step": 21370 }, { "epoch": 0.22843100592980395, "grad_norm": 7.831254959106445, "learning_rate": 9.988281851151843e-07, "loss": 0.064, "step": 21380 }, { "epoch": 0.22853784924408355, "grad_norm": 10.416476249694824, "learning_rate": 9.988270352610238e-07, "loss": 0.1406, "step": 21390 }, { "epoch": 0.22864469255836317, "grad_norm": 4.89608097076416, "learning_rate": 9.988258848436496e-07, "loss": 0.0607, "step": 21400 }, { "epoch": 0.22875153587264277, "grad_norm": 7.863381862640381, "learning_rate": 9.988247338630636e-07, "loss": 0.1057, "step": 21410 }, { "epoch": 0.22885837918692237, "grad_norm": 0.7938302755355835, "learning_rate": 9.988235823192667e-07, "loss": 0.068, "step": 21420 }, { "epoch": 0.228965222501202, "grad_norm": 1.0665024518966675, "learning_rate": 9.988224302122604e-07, "loss": 0.1219, "step": 21430 }, { "epoch": 0.2290720658154816, "grad_norm": 2.995386838912964, "learning_rate": 9.988212775420459e-07, "loss": 0.143, "step": 21440 }, { "epoch": 0.22917890912976122, "grad_norm": 8.65895938873291, "learning_rate": 9.988201243086245e-07, "loss": 0.049, "step": 21450 }, { "epoch": 0.2292857524440408, "grad_norm": 7.800942897796631, "learning_rate": 9.988189705119976e-07, "loss": 0.1018, "step": 21460 }, { "epoch": 0.2293925957583204, "grad_norm": 3.769697427749634, "learning_rate": 9.988178161521664e-07, "loss": 0.1062, "step": 21470 }, { "epoch": 0.22949943907260004, "grad_norm": 0.4944688379764557, "learning_rate": 9.988166612291322e-07, "loss": 0.121, "step": 21480 }, { "epoch": 0.22960628238687963, "grad_norm": 7.076420783996582, "learning_rate": 9.988155057428965e-07, "loss": 0.0908, "step": 21490 }, { "epoch": 0.22971312570115926, "grad_norm": 3.7488200664520264, "learning_rate": 9.988143496934603e-07, "loss": 0.0362, "step": 21500 }, { "epoch": 0.22981996901543886, "grad_norm": 6.337381362915039, "learning_rate": 9.98813193080825e-07, "loss": 0.0917, "step": 21510 }, { "epoch": 0.22992681232971846, "grad_norm": 8.818718910217285, "learning_rate": 9.98812035904992e-07, "loss": 0.0624, "step": 21520 }, { "epoch": 0.23003365564399808, "grad_norm": 9.861817359924316, "learning_rate": 9.988108781659625e-07, "loss": 0.1479, "step": 21530 }, { "epoch": 0.23014049895827768, "grad_norm": 5.2587456703186035, "learning_rate": 9.98809719863738e-07, "loss": 0.1613, "step": 21540 }, { "epoch": 0.2302473422725573, "grad_norm": 12.817851066589355, "learning_rate": 9.988085609983196e-07, "loss": 0.1675, "step": 21550 }, { "epoch": 0.2303541855868369, "grad_norm": 5.983283996582031, "learning_rate": 9.988074015697088e-07, "loss": 0.0633, "step": 21560 }, { "epoch": 0.2304610289011165, "grad_norm": 3.7507312297821045, "learning_rate": 9.988062415779068e-07, "loss": 0.0542, "step": 21570 }, { "epoch": 0.23056787221539612, "grad_norm": 9.226533889770508, "learning_rate": 9.988050810229148e-07, "loss": 0.0579, "step": 21580 }, { "epoch": 0.23067471552967572, "grad_norm": 1.299646019935608, "learning_rate": 9.988039199047343e-07, "loss": 0.0675, "step": 21590 }, { "epoch": 0.23078155884395535, "grad_norm": 11.693202018737793, "learning_rate": 9.988027582233665e-07, "loss": 0.1239, "step": 21600 }, { "epoch": 0.23088840215823495, "grad_norm": 4.896111488342285, "learning_rate": 9.988015959788127e-07, "loss": 0.0349, "step": 21610 }, { "epoch": 0.23099524547251457, "grad_norm": 7.434712886810303, "learning_rate": 9.988004331710742e-07, "loss": 0.0859, "step": 21620 }, { "epoch": 0.23110208878679417, "grad_norm": 1.2775111198425293, "learning_rate": 9.987992698001523e-07, "loss": 0.0738, "step": 21630 }, { "epoch": 0.23120893210107377, "grad_norm": 0.38697323203086853, "learning_rate": 9.987981058660487e-07, "loss": 0.034, "step": 21640 }, { "epoch": 0.2313157754153534, "grad_norm": 0.5663081407546997, "learning_rate": 9.987969413687641e-07, "loss": 0.1509, "step": 21650 }, { "epoch": 0.231422618729633, "grad_norm": 15.48705005645752, "learning_rate": 9.987957763083003e-07, "loss": 0.0999, "step": 21660 }, { "epoch": 0.23152946204391261, "grad_norm": 9.110795974731445, "learning_rate": 9.987946106846582e-07, "loss": 0.1229, "step": 21670 }, { "epoch": 0.2316363053581922, "grad_norm": 5.596159934997559, "learning_rate": 9.987934444978396e-07, "loss": 0.0752, "step": 21680 }, { "epoch": 0.2317431486724718, "grad_norm": 10.239542007446289, "learning_rate": 9.987922777478453e-07, "loss": 0.0874, "step": 21690 }, { "epoch": 0.23184999198675144, "grad_norm": 11.702371597290039, "learning_rate": 9.987911104346772e-07, "loss": 0.1346, "step": 21700 }, { "epoch": 0.23195683530103103, "grad_norm": 6.450510025024414, "learning_rate": 9.98789942558336e-07, "loss": 0.0961, "step": 21710 }, { "epoch": 0.23206367861531066, "grad_norm": 5.773599147796631, "learning_rate": 9.987887741188235e-07, "loss": 0.1488, "step": 21720 }, { "epoch": 0.23217052192959026, "grad_norm": 6.277382850646973, "learning_rate": 9.987876051161406e-07, "loss": 0.0649, "step": 21730 }, { "epoch": 0.23227736524386985, "grad_norm": 11.550101280212402, "learning_rate": 9.98786435550289e-07, "loss": 0.1119, "step": 21740 }, { "epoch": 0.23238420855814948, "grad_norm": 10.15304946899414, "learning_rate": 9.987852654212698e-07, "loss": 0.1271, "step": 21750 }, { "epoch": 0.23249105187242908, "grad_norm": 8.268630027770996, "learning_rate": 9.987840947290844e-07, "loss": 0.107, "step": 21760 }, { "epoch": 0.2325978951867087, "grad_norm": 10.85132884979248, "learning_rate": 9.987829234737342e-07, "loss": 0.0588, "step": 21770 }, { "epoch": 0.2327047385009883, "grad_norm": 5.545354843139648, "learning_rate": 9.987817516552205e-07, "loss": 0.0775, "step": 21780 }, { "epoch": 0.2328115818152679, "grad_norm": 5.862803936004639, "learning_rate": 9.987805792735445e-07, "loss": 0.0646, "step": 21790 }, { "epoch": 0.23291842512954752, "grad_norm": 3.9794673919677734, "learning_rate": 9.987794063287074e-07, "loss": 0.0932, "step": 21800 }, { "epoch": 0.23302526844382712, "grad_norm": 10.083396911621094, "learning_rate": 9.987782328207109e-07, "loss": 0.085, "step": 21810 }, { "epoch": 0.23313211175810675, "grad_norm": 17.033353805541992, "learning_rate": 9.98777058749556e-07, "loss": 0.0822, "step": 21820 }, { "epoch": 0.23323895507238634, "grad_norm": 10.209253311157227, "learning_rate": 9.987758841152442e-07, "loss": 0.0531, "step": 21830 }, { "epoch": 0.23334579838666594, "grad_norm": 5.477370738983154, "learning_rate": 9.987747089177767e-07, "loss": 0.069, "step": 21840 }, { "epoch": 0.23345264170094557, "grad_norm": 8.064384460449219, "learning_rate": 9.98773533157155e-07, "loss": 0.0776, "step": 21850 }, { "epoch": 0.23355948501522517, "grad_norm": 8.066946029663086, "learning_rate": 9.987723568333804e-07, "loss": 0.0673, "step": 21860 }, { "epoch": 0.2336663283295048, "grad_norm": 16.16689109802246, "learning_rate": 9.987711799464543e-07, "loss": 0.162, "step": 21870 }, { "epoch": 0.2337731716437844, "grad_norm": 5.557161808013916, "learning_rate": 9.987700024963775e-07, "loss": 0.0613, "step": 21880 }, { "epoch": 0.233880014958064, "grad_norm": 1.47846257686615, "learning_rate": 9.98768824483152e-07, "loss": 0.0873, "step": 21890 }, { "epoch": 0.2339868582723436, "grad_norm": 3.0294017791748047, "learning_rate": 9.987676459067788e-07, "loss": 0.0598, "step": 21900 }, { "epoch": 0.2340937015866232, "grad_norm": 6.8797101974487305, "learning_rate": 9.987664667672594e-07, "loss": 0.0357, "step": 21910 }, { "epoch": 0.23420054490090284, "grad_norm": 6.233519077301025, "learning_rate": 9.987652870645946e-07, "loss": 0.154, "step": 21920 }, { "epoch": 0.23430738821518243, "grad_norm": 4.158416748046875, "learning_rate": 9.987641067987865e-07, "loss": 0.0769, "step": 21930 }, { "epoch": 0.23441423152946203, "grad_norm": 10.996828079223633, "learning_rate": 9.98762925969836e-07, "loss": 0.0864, "step": 21940 }, { "epoch": 0.23452107484374166, "grad_norm": 3.899684429168701, "learning_rate": 9.987617445777444e-07, "loss": 0.1412, "step": 21950 }, { "epoch": 0.23462791815802125, "grad_norm": 8.759056091308594, "learning_rate": 9.987605626225135e-07, "loss": 0.0878, "step": 21960 }, { "epoch": 0.23473476147230088, "grad_norm": 6.344759464263916, "learning_rate": 9.987593801041439e-07, "loss": 0.055, "step": 21970 }, { "epoch": 0.23484160478658048, "grad_norm": 4.027966499328613, "learning_rate": 9.987581970226375e-07, "loss": 0.0652, "step": 21980 }, { "epoch": 0.2349484481008601, "grad_norm": 10.383831024169922, "learning_rate": 9.987570133779955e-07, "loss": 0.13, "step": 21990 }, { "epoch": 0.2350552914151397, "grad_norm": 0.9384181499481201, "learning_rate": 9.987558291702191e-07, "loss": 0.0929, "step": 22000 }, { "epoch": 0.2351621347294193, "grad_norm": 1.8489418029785156, "learning_rate": 9.987546443993096e-07, "loss": 0.0682, "step": 22010 }, { "epoch": 0.23526897804369892, "grad_norm": 7.446983814239502, "learning_rate": 9.987534590652687e-07, "loss": 0.0772, "step": 22020 }, { "epoch": 0.23537582135797852, "grad_norm": 9.910747528076172, "learning_rate": 9.987522731680973e-07, "loss": 0.0878, "step": 22030 }, { "epoch": 0.23548266467225815, "grad_norm": 7.52967643737793, "learning_rate": 9.987510867077971e-07, "loss": 0.0346, "step": 22040 }, { "epoch": 0.23558950798653774, "grad_norm": 7.043484687805176, "learning_rate": 9.987498996843693e-07, "loss": 0.0669, "step": 22050 }, { "epoch": 0.23569635130081734, "grad_norm": 6.284886360168457, "learning_rate": 9.987487120978151e-07, "loss": 0.1281, "step": 22060 }, { "epoch": 0.23580319461509697, "grad_norm": 1.192415475845337, "learning_rate": 9.98747523948136e-07, "loss": 0.106, "step": 22070 }, { "epoch": 0.23591003792937656, "grad_norm": 0.7340536713600159, "learning_rate": 9.987463352353334e-07, "loss": 0.0665, "step": 22080 }, { "epoch": 0.2360168812436562, "grad_norm": 8.030810356140137, "learning_rate": 9.987451459594082e-07, "loss": 0.1437, "step": 22090 }, { "epoch": 0.2361237245579358, "grad_norm": 6.505945682525635, "learning_rate": 9.987439561203626e-07, "loss": 0.066, "step": 22100 }, { "epoch": 0.23623056787221539, "grad_norm": 4.739274024963379, "learning_rate": 9.98742765718197e-07, "loss": 0.1126, "step": 22110 }, { "epoch": 0.236337411186495, "grad_norm": 11.532386779785156, "learning_rate": 9.987415747529135e-07, "loss": 0.1285, "step": 22120 }, { "epoch": 0.2364442545007746, "grad_norm": 5.119411468505859, "learning_rate": 9.98740383224513e-07, "loss": 0.0625, "step": 22130 }, { "epoch": 0.23655109781505423, "grad_norm": 8.538934707641602, "learning_rate": 9.987391911329967e-07, "loss": 0.1706, "step": 22140 }, { "epoch": 0.23665794112933383, "grad_norm": 2.764946699142456, "learning_rate": 9.987379984783666e-07, "loss": 0.0631, "step": 22150 }, { "epoch": 0.23676478444361343, "grad_norm": 1.4031763076782227, "learning_rate": 9.987368052606236e-07, "loss": 0.0933, "step": 22160 }, { "epoch": 0.23687162775789306, "grad_norm": 0.5298720598220825, "learning_rate": 9.98735611479769e-07, "loss": 0.1148, "step": 22170 }, { "epoch": 0.23697847107217265, "grad_norm": 5.150391578674316, "learning_rate": 9.98734417135804e-07, "loss": 0.1181, "step": 22180 }, { "epoch": 0.23708531438645228, "grad_norm": 5.498784065246582, "learning_rate": 9.987332222287305e-07, "loss": 0.0756, "step": 22190 }, { "epoch": 0.23719215770073188, "grad_norm": 6.395235538482666, "learning_rate": 9.987320267585495e-07, "loss": 0.1068, "step": 22200 }, { "epoch": 0.23729900101501147, "grad_norm": 11.39021110534668, "learning_rate": 9.987308307252625e-07, "loss": 0.0619, "step": 22210 }, { "epoch": 0.2374058443292911, "grad_norm": 3.7065227031707764, "learning_rate": 9.987296341288708e-07, "loss": 0.0632, "step": 22220 }, { "epoch": 0.2375126876435707, "grad_norm": 7.65900182723999, "learning_rate": 9.987284369693755e-07, "loss": 0.0728, "step": 22230 }, { "epoch": 0.23761953095785032, "grad_norm": 3.7329540252685547, "learning_rate": 9.987272392467783e-07, "loss": 0.0438, "step": 22240 }, { "epoch": 0.23772637427212992, "grad_norm": 5.172196388244629, "learning_rate": 9.987260409610803e-07, "loss": 0.1237, "step": 22250 }, { "epoch": 0.23783321758640952, "grad_norm": 44.36036682128906, "learning_rate": 9.987248421122829e-07, "loss": 0.0985, "step": 22260 }, { "epoch": 0.23794006090068914, "grad_norm": 0.9885264039039612, "learning_rate": 9.987236427003875e-07, "loss": 0.0682, "step": 22270 }, { "epoch": 0.23804690421496874, "grad_norm": 13.261791229248047, "learning_rate": 9.987224427253958e-07, "loss": 0.0888, "step": 22280 }, { "epoch": 0.23815374752924837, "grad_norm": 6.857364177703857, "learning_rate": 9.987212421873087e-07, "loss": 0.0813, "step": 22290 }, { "epoch": 0.23826059084352796, "grad_norm": 3.035547971725464, "learning_rate": 9.987200410861274e-07, "loss": 0.0786, "step": 22300 }, { "epoch": 0.23836743415780756, "grad_norm": 1.590539574623108, "learning_rate": 9.987188394218537e-07, "loss": 0.0863, "step": 22310 }, { "epoch": 0.2384742774720872, "grad_norm": 4.5299859046936035, "learning_rate": 9.987176371944887e-07, "loss": 0.0629, "step": 22320 }, { "epoch": 0.23858112078636678, "grad_norm": 1.2696881294250488, "learning_rate": 9.98716434404034e-07, "loss": 0.1417, "step": 22330 }, { "epoch": 0.2386879641006464, "grad_norm": 2.3722333908081055, "learning_rate": 9.987152310504907e-07, "loss": 0.0651, "step": 22340 }, { "epoch": 0.238794807414926, "grad_norm": 13.75820541381836, "learning_rate": 9.987140271338602e-07, "loss": 0.0707, "step": 22350 }, { "epoch": 0.23890165072920563, "grad_norm": 4.64481782913208, "learning_rate": 9.98712822654144e-07, "loss": 0.0533, "step": 22360 }, { "epoch": 0.23900849404348523, "grad_norm": 9.302773475646973, "learning_rate": 9.987116176113437e-07, "loss": 0.0352, "step": 22370 }, { "epoch": 0.23911533735776483, "grad_norm": 2.272918939590454, "learning_rate": 9.9871041200546e-07, "loss": 0.2479, "step": 22380 }, { "epoch": 0.23922218067204445, "grad_norm": 4.452145099639893, "learning_rate": 9.987092058364947e-07, "loss": 0.0499, "step": 22390 }, { "epoch": 0.23932902398632405, "grad_norm": 12.85420036315918, "learning_rate": 9.987079991044489e-07, "loss": 0.0551, "step": 22400 }, { "epoch": 0.23943586730060368, "grad_norm": 4.572704315185547, "learning_rate": 9.987067918093243e-07, "loss": 0.0853, "step": 22410 }, { "epoch": 0.23954271061488328, "grad_norm": 12.057808876037598, "learning_rate": 9.98705583951122e-07, "loss": 0.1287, "step": 22420 }, { "epoch": 0.23964955392916287, "grad_norm": 11.753384590148926, "learning_rate": 9.987043755298436e-07, "loss": 0.1018, "step": 22430 }, { "epoch": 0.2397563972434425, "grad_norm": 3.232950448989868, "learning_rate": 9.987031665454902e-07, "loss": 0.0464, "step": 22440 }, { "epoch": 0.2398632405577221, "grad_norm": 6.152698516845703, "learning_rate": 9.987019569980633e-07, "loss": 0.1011, "step": 22450 }, { "epoch": 0.23997008387200172, "grad_norm": 2.588799476623535, "learning_rate": 9.987007468875643e-07, "loss": 0.045, "step": 22460 }, { "epoch": 0.24007692718628132, "grad_norm": 6.692866802215576, "learning_rate": 9.986995362139945e-07, "loss": 0.0739, "step": 22470 }, { "epoch": 0.24018377050056092, "grad_norm": 7.974318981170654, "learning_rate": 9.986983249773552e-07, "loss": 0.0935, "step": 22480 }, { "epoch": 0.24029061381484054, "grad_norm": 6.989202976226807, "learning_rate": 9.98697113177648e-07, "loss": 0.0989, "step": 22490 }, { "epoch": 0.24039745712912014, "grad_norm": 4.471804141998291, "learning_rate": 9.98695900814874e-07, "loss": 0.0865, "step": 22500 }, { "epoch": 0.24050430044339977, "grad_norm": 0.3651285767555237, "learning_rate": 9.986946878890347e-07, "loss": 0.0347, "step": 22510 }, { "epoch": 0.24061114375767936, "grad_norm": 5.97463846206665, "learning_rate": 9.986934744001317e-07, "loss": 0.2159, "step": 22520 }, { "epoch": 0.24071798707195896, "grad_norm": 2.1675667762756348, "learning_rate": 9.986922603481658e-07, "loss": 0.0959, "step": 22530 }, { "epoch": 0.2408248303862386, "grad_norm": 3.6039371490478516, "learning_rate": 9.986910457331389e-07, "loss": 0.1052, "step": 22540 }, { "epoch": 0.24093167370051818, "grad_norm": 5.592883110046387, "learning_rate": 9.98689830555052e-07, "loss": 0.1698, "step": 22550 }, { "epoch": 0.2410385170147978, "grad_norm": 1.4512124061584473, "learning_rate": 9.986886148139068e-07, "loss": 0.1066, "step": 22560 }, { "epoch": 0.2411453603290774, "grad_norm": 15.388579368591309, "learning_rate": 9.986873985097045e-07, "loss": 0.1161, "step": 22570 }, { "epoch": 0.241252203643357, "grad_norm": 4.441309928894043, "learning_rate": 9.986861816424463e-07, "loss": 0.1344, "step": 22580 }, { "epoch": 0.24135904695763663, "grad_norm": 4.476674556732178, "learning_rate": 9.98684964212134e-07, "loss": 0.1388, "step": 22590 }, { "epoch": 0.24146589027191623, "grad_norm": 17.1566219329834, "learning_rate": 9.986837462187687e-07, "loss": 0.0874, "step": 22600 }, { "epoch": 0.24157273358619585, "grad_norm": 6.389243125915527, "learning_rate": 9.98682527662352e-07, "loss": 0.0613, "step": 22610 }, { "epoch": 0.24167957690047545, "grad_norm": 6.91513204574585, "learning_rate": 9.986813085428846e-07, "loss": 0.1551, "step": 22620 }, { "epoch": 0.24178642021475505, "grad_norm": 11.71635913848877, "learning_rate": 9.986800888603687e-07, "loss": 0.0964, "step": 22630 }, { "epoch": 0.24189326352903467, "grad_norm": 8.25503158569336, "learning_rate": 9.986788686148052e-07, "loss": 0.0661, "step": 22640 }, { "epoch": 0.24200010684331427, "grad_norm": 6.943230152130127, "learning_rate": 9.986776478061958e-07, "loss": 0.0667, "step": 22650 }, { "epoch": 0.2421069501575939, "grad_norm": 7.647779941558838, "learning_rate": 9.986764264345413e-07, "loss": 0.2288, "step": 22660 }, { "epoch": 0.2422137934718735, "grad_norm": 7.551876068115234, "learning_rate": 9.986752044998438e-07, "loss": 0.0804, "step": 22670 }, { "epoch": 0.2423206367861531, "grad_norm": 5.128215789794922, "learning_rate": 9.986739820021044e-07, "loss": 0.0698, "step": 22680 }, { "epoch": 0.24242748010043272, "grad_norm": 22.849224090576172, "learning_rate": 9.986727589413244e-07, "loss": 0.0502, "step": 22690 }, { "epoch": 0.24253432341471232, "grad_norm": 0.06592605262994766, "learning_rate": 9.98671535317505e-07, "loss": 0.184, "step": 22700 }, { "epoch": 0.24264116672899194, "grad_norm": 4.166656970977783, "learning_rate": 9.986703111306481e-07, "loss": 0.0837, "step": 22710 }, { "epoch": 0.24274801004327154, "grad_norm": 9.355585098266602, "learning_rate": 9.986690863807546e-07, "loss": 0.0768, "step": 22720 }, { "epoch": 0.24285485335755116, "grad_norm": 6.131486415863037, "learning_rate": 9.986678610678262e-07, "loss": 0.0573, "step": 22730 }, { "epoch": 0.24296169667183076, "grad_norm": 1.0690784454345703, "learning_rate": 9.98666635191864e-07, "loss": 0.0346, "step": 22740 }, { "epoch": 0.24306853998611036, "grad_norm": 2.0316848754882812, "learning_rate": 9.986654087528697e-07, "loss": 0.0646, "step": 22750 }, { "epoch": 0.24317538330038999, "grad_norm": 9.033143043518066, "learning_rate": 9.986641817508444e-07, "loss": 0.035, "step": 22760 }, { "epoch": 0.24328222661466958, "grad_norm": 11.335744857788086, "learning_rate": 9.986629541857896e-07, "loss": 0.0771, "step": 22770 }, { "epoch": 0.2433890699289492, "grad_norm": 14.163846015930176, "learning_rate": 9.986617260577068e-07, "loss": 0.0908, "step": 22780 }, { "epoch": 0.2434959132432288, "grad_norm": 4.2110676765441895, "learning_rate": 9.986604973665972e-07, "loss": 0.074, "step": 22790 }, { "epoch": 0.2436027565575084, "grad_norm": 2.1069633960723877, "learning_rate": 9.986592681124622e-07, "loss": 0.0782, "step": 22800 }, { "epoch": 0.24370959987178803, "grad_norm": 8.521682739257812, "learning_rate": 9.986580382953031e-07, "loss": 0.1213, "step": 22810 }, { "epoch": 0.24381644318606763, "grad_norm": 8.014294624328613, "learning_rate": 9.986568079151217e-07, "loss": 0.1483, "step": 22820 }, { "epoch": 0.24392328650034725, "grad_norm": 1.0911333560943604, "learning_rate": 9.986555769719191e-07, "loss": 0.073, "step": 22830 }, { "epoch": 0.24403012981462685, "grad_norm": 14.215956687927246, "learning_rate": 9.986543454656968e-07, "loss": 0.0771, "step": 22840 }, { "epoch": 0.24413697312890645, "grad_norm": 0.6790792942047119, "learning_rate": 9.98653113396456e-07, "loss": 0.0715, "step": 22850 }, { "epoch": 0.24424381644318607, "grad_norm": 0.09552479535341263, "learning_rate": 9.986518807641982e-07, "loss": 0.0824, "step": 22860 }, { "epoch": 0.24435065975746567, "grad_norm": 4.233817100524902, "learning_rate": 9.986506475689248e-07, "loss": 0.0544, "step": 22870 }, { "epoch": 0.2444575030717453, "grad_norm": 0.4022049307823181, "learning_rate": 9.98649413810637e-07, "loss": 0.0908, "step": 22880 }, { "epoch": 0.2445643463860249, "grad_norm": 3.99324107170105, "learning_rate": 9.986481794893367e-07, "loss": 0.0718, "step": 22890 }, { "epoch": 0.2446711897003045, "grad_norm": 6.449671268463135, "learning_rate": 9.986469446050247e-07, "loss": 0.1207, "step": 22900 }, { "epoch": 0.24477803301458412, "grad_norm": 8.789812088012695, "learning_rate": 9.986457091577027e-07, "loss": 0.1397, "step": 22910 }, { "epoch": 0.24488487632886372, "grad_norm": 1.9924235343933105, "learning_rate": 9.986444731473722e-07, "loss": 0.1167, "step": 22920 }, { "epoch": 0.24499171964314334, "grad_norm": 1.0613410472869873, "learning_rate": 9.986432365740344e-07, "loss": 0.0758, "step": 22930 }, { "epoch": 0.24509856295742294, "grad_norm": 5.39992094039917, "learning_rate": 9.986419994376908e-07, "loss": 0.1183, "step": 22940 }, { "epoch": 0.24520540627170254, "grad_norm": 3.245453119277954, "learning_rate": 9.986407617383427e-07, "loss": 0.1261, "step": 22950 }, { "epoch": 0.24531224958598216, "grad_norm": 13.670222282409668, "learning_rate": 9.986395234759914e-07, "loss": 0.0819, "step": 22960 }, { "epoch": 0.24541909290026176, "grad_norm": 15.666271209716797, "learning_rate": 9.986382846506386e-07, "loss": 0.1174, "step": 22970 }, { "epoch": 0.24552593621454138, "grad_norm": 13.435857772827148, "learning_rate": 9.986370452622857e-07, "loss": 0.1863, "step": 22980 }, { "epoch": 0.24563277952882098, "grad_norm": 2.2246527671813965, "learning_rate": 9.986358053109338e-07, "loss": 0.0479, "step": 22990 }, { "epoch": 0.24573962284310058, "grad_norm": 13.135716438293457, "learning_rate": 9.986345647965844e-07, "loss": 0.1457, "step": 23000 }, { "epoch": 0.2458464661573802, "grad_norm": 4.180865287780762, "learning_rate": 9.986333237192389e-07, "loss": 0.0609, "step": 23010 }, { "epoch": 0.2459533094716598, "grad_norm": 5.355839252471924, "learning_rate": 9.986320820788988e-07, "loss": 0.0263, "step": 23020 }, { "epoch": 0.24606015278593943, "grad_norm": 5.028972148895264, "learning_rate": 9.986308398755655e-07, "loss": 0.1014, "step": 23030 }, { "epoch": 0.24616699610021903, "grad_norm": 0.5216099619865417, "learning_rate": 9.986295971092403e-07, "loss": 0.0498, "step": 23040 }, { "epoch": 0.24627383941449862, "grad_norm": 3.9977262020111084, "learning_rate": 9.986283537799245e-07, "loss": 0.0413, "step": 23050 }, { "epoch": 0.24638068272877825, "grad_norm": 2.378783941268921, "learning_rate": 9.986271098876198e-07, "loss": 0.0613, "step": 23060 }, { "epoch": 0.24648752604305785, "grad_norm": 5.034444808959961, "learning_rate": 9.986258654323275e-07, "loss": 0.0615, "step": 23070 }, { "epoch": 0.24659436935733747, "grad_norm": 1.0803810358047485, "learning_rate": 9.98624620414049e-07, "loss": 0.0613, "step": 23080 }, { "epoch": 0.24670121267161707, "grad_norm": 3.2057623863220215, "learning_rate": 9.986233748327856e-07, "loss": 0.1211, "step": 23090 }, { "epoch": 0.2468080559858967, "grad_norm": 10.92878532409668, "learning_rate": 9.986221286885388e-07, "loss": 0.0771, "step": 23100 }, { "epoch": 0.2469148993001763, "grad_norm": 2.7645254135131836, "learning_rate": 9.986208819813098e-07, "loss": 0.0673, "step": 23110 }, { "epoch": 0.2470217426144559, "grad_norm": 2.081035614013672, "learning_rate": 9.986196347111004e-07, "loss": 0.1478, "step": 23120 }, { "epoch": 0.24712858592873552, "grad_norm": 9.161211013793945, "learning_rate": 9.986183868779118e-07, "loss": 0.0558, "step": 23130 }, { "epoch": 0.24723542924301511, "grad_norm": 8.800082206726074, "learning_rate": 9.986171384817453e-07, "loss": 0.0747, "step": 23140 }, { "epoch": 0.24734227255729474, "grad_norm": 3.7731237411499023, "learning_rate": 9.986158895226024e-07, "loss": 0.121, "step": 23150 }, { "epoch": 0.24744911587157434, "grad_norm": 13.078442573547363, "learning_rate": 9.986146400004847e-07, "loss": 0.1285, "step": 23160 }, { "epoch": 0.24755595918585394, "grad_norm": 3.9494357109069824, "learning_rate": 9.986133899153934e-07, "loss": 0.056, "step": 23170 }, { "epoch": 0.24766280250013356, "grad_norm": 9.634156227111816, "learning_rate": 9.9861213926733e-07, "loss": 0.1866, "step": 23180 }, { "epoch": 0.24776964581441316, "grad_norm": 16.253402709960938, "learning_rate": 9.986108880562955e-07, "loss": 0.0734, "step": 23190 }, { "epoch": 0.24787648912869278, "grad_norm": 5.2761030197143555, "learning_rate": 9.98609636282292e-07, "loss": 0.0628, "step": 23200 }, { "epoch": 0.24798333244297238, "grad_norm": 4.372275352478027, "learning_rate": 9.986083839453206e-07, "loss": 0.07, "step": 23210 }, { "epoch": 0.24809017575725198, "grad_norm": 5.528087615966797, "learning_rate": 9.986071310453825e-07, "loss": 0.0413, "step": 23220 }, { "epoch": 0.2481970190715316, "grad_norm": 0.8729193806648254, "learning_rate": 9.986058775824795e-07, "loss": 0.0537, "step": 23230 }, { "epoch": 0.2483038623858112, "grad_norm": 2.929642915725708, "learning_rate": 9.986046235566129e-07, "loss": 0.0886, "step": 23240 }, { "epoch": 0.24841070570009083, "grad_norm": 28.666454315185547, "learning_rate": 9.986033689677838e-07, "loss": 0.1274, "step": 23250 }, { "epoch": 0.24851754901437043, "grad_norm": 1.3951692581176758, "learning_rate": 9.98602113815994e-07, "loss": 0.0834, "step": 23260 }, { "epoch": 0.24862439232865002, "grad_norm": 5.382077693939209, "learning_rate": 9.986008581012448e-07, "loss": 0.066, "step": 23270 }, { "epoch": 0.24873123564292965, "grad_norm": 2.3997979164123535, "learning_rate": 9.985996018235377e-07, "loss": 0.0954, "step": 23280 }, { "epoch": 0.24883807895720925, "grad_norm": 5.8330488204956055, "learning_rate": 9.985983449828737e-07, "loss": 0.1285, "step": 23290 }, { "epoch": 0.24894492227148887, "grad_norm": 14.554146766662598, "learning_rate": 9.985970875792549e-07, "loss": 0.0775, "step": 23300 }, { "epoch": 0.24905176558576847, "grad_norm": 5.367945671081543, "learning_rate": 9.98595829612682e-07, "loss": 0.1189, "step": 23310 }, { "epoch": 0.24915860890004807, "grad_norm": 5.484611511230469, "learning_rate": 9.98594571083157e-07, "loss": 0.0706, "step": 23320 }, { "epoch": 0.2492654522143277, "grad_norm": 1.5170931816101074, "learning_rate": 9.98593311990681e-07, "loss": 0.0406, "step": 23330 }, { "epoch": 0.2493722955286073, "grad_norm": 8.45321273803711, "learning_rate": 9.985920523352557e-07, "loss": 0.0678, "step": 23340 }, { "epoch": 0.24947913884288692, "grad_norm": 3.0457773208618164, "learning_rate": 9.985907921168822e-07, "loss": 0.1372, "step": 23350 }, { "epoch": 0.2495859821571665, "grad_norm": 6.019702434539795, "learning_rate": 9.985895313355622e-07, "loss": 0.056, "step": 23360 }, { "epoch": 0.2496928254714461, "grad_norm": 2.306927442550659, "learning_rate": 9.985882699912968e-07, "loss": 0.0785, "step": 23370 }, { "epoch": 0.24979966878572574, "grad_norm": 6.808680057525635, "learning_rate": 9.985870080840877e-07, "loss": 0.0647, "step": 23380 }, { "epoch": 0.24990651210000533, "grad_norm": 5.476617336273193, "learning_rate": 9.985857456139362e-07, "loss": 0.0951, "step": 23390 }, { "epoch": 0.25001335541428493, "grad_norm": 8.837297439575195, "learning_rate": 9.985844825808438e-07, "loss": 0.0888, "step": 23400 }, { "epoch": 0.25012019872856456, "grad_norm": 6.987573623657227, "learning_rate": 9.98583218984812e-07, "loss": 0.0826, "step": 23410 }, { "epoch": 0.2502270420428442, "grad_norm": 6.494831562042236, "learning_rate": 9.98581954825842e-07, "loss": 0.1023, "step": 23420 }, { "epoch": 0.25033388535712375, "grad_norm": 8.329599380493164, "learning_rate": 9.985806901039353e-07, "loss": 0.0677, "step": 23430 }, { "epoch": 0.2504407286714034, "grad_norm": 4.281790256500244, "learning_rate": 9.985794248190933e-07, "loss": 0.105, "step": 23440 }, { "epoch": 0.250547571985683, "grad_norm": 1.7253378629684448, "learning_rate": 9.985781589713175e-07, "loss": 0.0662, "step": 23450 }, { "epoch": 0.25065441529996263, "grad_norm": 4.764012336730957, "learning_rate": 9.985768925606095e-07, "loss": 0.0719, "step": 23460 }, { "epoch": 0.2507612586142422, "grad_norm": 1.5509291887283325, "learning_rate": 9.985756255869706e-07, "loss": 0.0797, "step": 23470 }, { "epoch": 0.2508681019285218, "grad_norm": 15.194594383239746, "learning_rate": 9.98574358050402e-07, "loss": 0.05, "step": 23480 }, { "epoch": 0.25097494524280145, "grad_norm": 24.194395065307617, "learning_rate": 9.985730899509054e-07, "loss": 0.1841, "step": 23490 }, { "epoch": 0.251081788557081, "grad_norm": 5.577239036560059, "learning_rate": 9.98571821288482e-07, "loss": 0.0677, "step": 23500 }, { "epoch": 0.25118863187136065, "grad_norm": 1.7488504648208618, "learning_rate": 9.985705520631338e-07, "loss": 0.0484, "step": 23510 }, { "epoch": 0.25129547518564027, "grad_norm": 11.103981971740723, "learning_rate": 9.985692822748616e-07, "loss": 0.1256, "step": 23520 }, { "epoch": 0.25140231849991984, "grad_norm": 1.5677838325500488, "learning_rate": 9.98568011923667e-07, "loss": 0.1465, "step": 23530 }, { "epoch": 0.25150916181419947, "grad_norm": 0.8999701738357544, "learning_rate": 9.985667410095516e-07, "loss": 0.0689, "step": 23540 }, { "epoch": 0.2516160051284791, "grad_norm": 4.916934013366699, "learning_rate": 9.985654695325165e-07, "loss": 0.0364, "step": 23550 }, { "epoch": 0.2517228484427587, "grad_norm": 6.648294925689697, "learning_rate": 9.985641974925635e-07, "loss": 0.0505, "step": 23560 }, { "epoch": 0.2518296917570383, "grad_norm": 22.558269500732422, "learning_rate": 9.985629248896938e-07, "loss": 0.1188, "step": 23570 }, { "epoch": 0.2519365350713179, "grad_norm": 6.262667655944824, "learning_rate": 9.98561651723909e-07, "loss": 0.1039, "step": 23580 }, { "epoch": 0.25204337838559754, "grad_norm": 13.923494338989258, "learning_rate": 9.985603779952106e-07, "loss": 0.0862, "step": 23590 }, { "epoch": 0.2521502216998771, "grad_norm": 17.882577896118164, "learning_rate": 9.985591037035996e-07, "loss": 0.0937, "step": 23600 }, { "epoch": 0.25225706501415673, "grad_norm": 4.131006717681885, "learning_rate": 9.98557828849078e-07, "loss": 0.1261, "step": 23610 }, { "epoch": 0.25236390832843636, "grad_norm": 12.465825080871582, "learning_rate": 9.985565534316468e-07, "loss": 0.057, "step": 23620 }, { "epoch": 0.252470751642716, "grad_norm": 3.6144497394561768, "learning_rate": 9.985552774513078e-07, "loss": 0.0842, "step": 23630 }, { "epoch": 0.25257759495699555, "grad_norm": 4.368657112121582, "learning_rate": 9.985540009080622e-07, "loss": 0.1273, "step": 23640 }, { "epoch": 0.2526844382712752, "grad_norm": 5.519163608551025, "learning_rate": 9.985527238019115e-07, "loss": 0.0703, "step": 23650 }, { "epoch": 0.2527912815855548, "grad_norm": 3.5521156787872314, "learning_rate": 9.985514461328572e-07, "loss": 0.0396, "step": 23660 }, { "epoch": 0.2528981248998344, "grad_norm": 1.7284197807312012, "learning_rate": 9.985501679009005e-07, "loss": 0.0501, "step": 23670 }, { "epoch": 0.253004968214114, "grad_norm": 10.330145835876465, "learning_rate": 9.985488891060433e-07, "loss": 0.0542, "step": 23680 }, { "epoch": 0.2531118115283936, "grad_norm": 10.302055358886719, "learning_rate": 9.985476097482865e-07, "loss": 0.1174, "step": 23690 }, { "epoch": 0.2532186548426732, "grad_norm": 4.225879192352295, "learning_rate": 9.985463298276322e-07, "loss": 0.1517, "step": 23700 }, { "epoch": 0.2533254981569528, "grad_norm": 4.425339698791504, "learning_rate": 9.985450493440812e-07, "loss": 0.0279, "step": 23710 }, { "epoch": 0.25343234147123245, "grad_norm": 4.351646900177002, "learning_rate": 9.98543768297635e-07, "loss": 0.1143, "step": 23720 }, { "epoch": 0.2535391847855121, "grad_norm": 7.601349353790283, "learning_rate": 9.985424866882956e-07, "loss": 0.0824, "step": 23730 }, { "epoch": 0.25364602809979164, "grad_norm": 3.6980979442596436, "learning_rate": 9.98541204516064e-07, "loss": 0.1101, "step": 23740 }, { "epoch": 0.25375287141407127, "grad_norm": 14.621017456054688, "learning_rate": 9.985399217809418e-07, "loss": 0.1827, "step": 23750 }, { "epoch": 0.2538597147283509, "grad_norm": 7.6890950202941895, "learning_rate": 9.985386384829304e-07, "loss": 0.1002, "step": 23760 }, { "epoch": 0.25396655804263046, "grad_norm": 4.399034023284912, "learning_rate": 9.985373546220312e-07, "loss": 0.0914, "step": 23770 }, { "epoch": 0.2540734013569101, "grad_norm": 0.4786250591278076, "learning_rate": 9.985360701982458e-07, "loss": 0.0761, "step": 23780 }, { "epoch": 0.2541802446711897, "grad_norm": 2.4169023036956787, "learning_rate": 9.985347852115752e-07, "loss": 0.0734, "step": 23790 }, { "epoch": 0.2542870879854693, "grad_norm": 6.777407646179199, "learning_rate": 9.985334996620216e-07, "loss": 0.0654, "step": 23800 }, { "epoch": 0.2543939312997489, "grad_norm": 9.009784698486328, "learning_rate": 9.98532213549586e-07, "loss": 0.0552, "step": 23810 }, { "epoch": 0.25450077461402854, "grad_norm": 4.533880233764648, "learning_rate": 9.985309268742697e-07, "loss": 0.0708, "step": 23820 }, { "epoch": 0.25460761792830816, "grad_norm": 6.340930461883545, "learning_rate": 9.985296396360744e-07, "loss": 0.0596, "step": 23830 }, { "epoch": 0.25471446124258773, "grad_norm": 8.729634284973145, "learning_rate": 9.985283518350018e-07, "loss": 0.1478, "step": 23840 }, { "epoch": 0.25482130455686736, "grad_norm": 7.024272441864014, "learning_rate": 9.985270634710527e-07, "loss": 0.1035, "step": 23850 }, { "epoch": 0.254928147871147, "grad_norm": 11.367793083190918, "learning_rate": 9.985257745442292e-07, "loss": 0.1503, "step": 23860 }, { "epoch": 0.25503499118542655, "grad_norm": 3.1696343421936035, "learning_rate": 9.985244850545324e-07, "loss": 0.1256, "step": 23870 }, { "epoch": 0.2551418344997062, "grad_norm": 4.579983234405518, "learning_rate": 9.985231950019637e-07, "loss": 0.0712, "step": 23880 }, { "epoch": 0.2552486778139858, "grad_norm": 2.5897107124328613, "learning_rate": 9.985219043865247e-07, "loss": 0.0372, "step": 23890 }, { "epoch": 0.2553555211282654, "grad_norm": 4.399744987487793, "learning_rate": 9.98520613208217e-07, "loss": 0.0543, "step": 23900 }, { "epoch": 0.255462364442545, "grad_norm": 8.105873107910156, "learning_rate": 9.985193214670418e-07, "loss": 0.1007, "step": 23910 }, { "epoch": 0.2555692077568246, "grad_norm": 11.87517261505127, "learning_rate": 9.985180291630006e-07, "loss": 0.0831, "step": 23920 }, { "epoch": 0.25567605107110425, "grad_norm": 5.612618923187256, "learning_rate": 9.985167362960952e-07, "loss": 0.1443, "step": 23930 }, { "epoch": 0.2557828943853838, "grad_norm": 2.6151514053344727, "learning_rate": 9.985154428663264e-07, "loss": 0.0686, "step": 23940 }, { "epoch": 0.25588973769966344, "grad_norm": 1.529066801071167, "learning_rate": 9.985141488736964e-07, "loss": 0.1285, "step": 23950 }, { "epoch": 0.25599658101394307, "grad_norm": 11.893051147460938, "learning_rate": 9.98512854318206e-07, "loss": 0.0779, "step": 23960 }, { "epoch": 0.25610342432822264, "grad_norm": 6.311747074127197, "learning_rate": 9.985115591998572e-07, "loss": 0.1358, "step": 23970 }, { "epoch": 0.25621026764250227, "grad_norm": 3.853360176086426, "learning_rate": 9.98510263518651e-07, "loss": 0.0678, "step": 23980 }, { "epoch": 0.2563171109567819, "grad_norm": 1.8033156394958496, "learning_rate": 9.985089672745893e-07, "loss": 0.0552, "step": 23990 }, { "epoch": 0.2564239542710615, "grad_norm": 30.191146850585938, "learning_rate": 9.98507670467673e-07, "loss": 0.1056, "step": 24000 }, { "epoch": 0.2565307975853411, "grad_norm": 7.443345069885254, "learning_rate": 9.985063730979043e-07, "loss": 0.1444, "step": 24010 }, { "epoch": 0.2566376408996207, "grad_norm": 2.9193575382232666, "learning_rate": 9.985050751652842e-07, "loss": 0.0627, "step": 24020 }, { "epoch": 0.25674448421390034, "grad_norm": 1.4548406600952148, "learning_rate": 9.985037766698142e-07, "loss": 0.0775, "step": 24030 }, { "epoch": 0.2568513275281799, "grad_norm": 12.574426651000977, "learning_rate": 9.985024776114958e-07, "loss": 0.0756, "step": 24040 }, { "epoch": 0.25695817084245953, "grad_norm": 1.389706015586853, "learning_rate": 9.985011779903306e-07, "loss": 0.0745, "step": 24050 }, { "epoch": 0.25706501415673916, "grad_norm": 2.5299720764160156, "learning_rate": 9.984998778063196e-07, "loss": 0.0185, "step": 24060 }, { "epoch": 0.2571718574710187, "grad_norm": 2.640799045562744, "learning_rate": 9.98498577059465e-07, "loss": 0.1276, "step": 24070 }, { "epoch": 0.25727870078529835, "grad_norm": 15.082968711853027, "learning_rate": 9.984972757497675e-07, "loss": 0.0933, "step": 24080 }, { "epoch": 0.257385544099578, "grad_norm": 4.819526195526123, "learning_rate": 9.984959738772294e-07, "loss": 0.1108, "step": 24090 }, { "epoch": 0.2574923874138576, "grad_norm": 2.9521713256835938, "learning_rate": 9.984946714418514e-07, "loss": 0.0675, "step": 24100 }, { "epoch": 0.2575992307281372, "grad_norm": 4.388309478759766, "learning_rate": 9.984933684436353e-07, "loss": 0.0812, "step": 24110 }, { "epoch": 0.2577060740424168, "grad_norm": 10.251129150390625, "learning_rate": 9.984920648825828e-07, "loss": 0.1453, "step": 24120 }, { "epoch": 0.2578129173566964, "grad_norm": 7.074570655822754, "learning_rate": 9.98490760758695e-07, "loss": 0.123, "step": 24130 }, { "epoch": 0.257919760670976, "grad_norm": 10.069452285766602, "learning_rate": 9.984894560719734e-07, "loss": 0.1501, "step": 24140 }, { "epoch": 0.2580266039852556, "grad_norm": 7.815382480621338, "learning_rate": 9.984881508224197e-07, "loss": 0.1089, "step": 24150 }, { "epoch": 0.25813344729953525, "grad_norm": 5.86928653717041, "learning_rate": 9.984868450100353e-07, "loss": 0.1035, "step": 24160 }, { "epoch": 0.2582402906138148, "grad_norm": 2.4495909214019775, "learning_rate": 9.984855386348217e-07, "loss": 0.0888, "step": 24170 }, { "epoch": 0.25834713392809444, "grad_norm": 14.64348030090332, "learning_rate": 9.9848423169678e-07, "loss": 0.0655, "step": 24180 }, { "epoch": 0.25845397724237407, "grad_norm": 7.4521565437316895, "learning_rate": 9.984829241959123e-07, "loss": 0.091, "step": 24190 }, { "epoch": 0.2585608205566537, "grad_norm": 9.815692901611328, "learning_rate": 9.984816161322196e-07, "loss": 0.142, "step": 24200 }, { "epoch": 0.25866766387093326, "grad_norm": 0.2899840176105499, "learning_rate": 9.984803075057035e-07, "loss": 0.0559, "step": 24210 }, { "epoch": 0.2587745071852129, "grad_norm": 10.353034019470215, "learning_rate": 9.984789983163654e-07, "loss": 0.0224, "step": 24220 }, { "epoch": 0.2588813504994925, "grad_norm": 26.495922088623047, "learning_rate": 9.984776885642072e-07, "loss": 0.0972, "step": 24230 }, { "epoch": 0.2589881938137721, "grad_norm": 6.421078205108643, "learning_rate": 9.984763782492298e-07, "loss": 0.0813, "step": 24240 }, { "epoch": 0.2590950371280517, "grad_norm": 8.539603233337402, "learning_rate": 9.98475067371435e-07, "loss": 0.156, "step": 24250 }, { "epoch": 0.25920188044233133, "grad_norm": 2.7590954303741455, "learning_rate": 9.984737559308244e-07, "loss": 0.0385, "step": 24260 }, { "epoch": 0.2593087237566109, "grad_norm": 3.037179470062256, "learning_rate": 9.984724439273994e-07, "loss": 0.0389, "step": 24270 }, { "epoch": 0.25941556707089053, "grad_norm": 5.101518630981445, "learning_rate": 9.98471131361161e-07, "loss": 0.0642, "step": 24280 }, { "epoch": 0.25952241038517015, "grad_norm": 2.0008444786071777, "learning_rate": 9.984698182321112e-07, "loss": 0.1311, "step": 24290 }, { "epoch": 0.2596292536994498, "grad_norm": 8.384849548339844, "learning_rate": 9.984685045402515e-07, "loss": 0.1439, "step": 24300 }, { "epoch": 0.25973609701372935, "grad_norm": 9.522950172424316, "learning_rate": 9.984671902855831e-07, "loss": 0.1096, "step": 24310 }, { "epoch": 0.259842940328009, "grad_norm": 5.23199987411499, "learning_rate": 9.984658754681077e-07, "loss": 0.0678, "step": 24320 }, { "epoch": 0.2599497836422886, "grad_norm": 4.545924663543701, "learning_rate": 9.984645600878265e-07, "loss": 0.084, "step": 24330 }, { "epoch": 0.26005662695656817, "grad_norm": 11.333037376403809, "learning_rate": 9.984632441447415e-07, "loss": 0.1361, "step": 24340 }, { "epoch": 0.2601634702708478, "grad_norm": 7.696308612823486, "learning_rate": 9.984619276388538e-07, "loss": 0.0704, "step": 24350 }, { "epoch": 0.2602703135851274, "grad_norm": 1.0615657567977905, "learning_rate": 9.984606105701649e-07, "loss": 0.0553, "step": 24360 }, { "epoch": 0.26037715689940705, "grad_norm": 4.743792533874512, "learning_rate": 9.984592929386764e-07, "loss": 0.0552, "step": 24370 }, { "epoch": 0.2604840002136866, "grad_norm": 2.5879831314086914, "learning_rate": 9.984579747443896e-07, "loss": 0.0569, "step": 24380 }, { "epoch": 0.26059084352796624, "grad_norm": 4.820690155029297, "learning_rate": 9.984566559873063e-07, "loss": 0.0254, "step": 24390 }, { "epoch": 0.26069768684224587, "grad_norm": 5.9881720542907715, "learning_rate": 9.984553366674277e-07, "loss": 0.1163, "step": 24400 }, { "epoch": 0.26080453015652544, "grad_norm": 4.75787353515625, "learning_rate": 9.984540167847554e-07, "loss": 0.0564, "step": 24410 }, { "epoch": 0.26091137347080506, "grad_norm": 6.844357967376709, "learning_rate": 9.984526963392909e-07, "loss": 0.1367, "step": 24420 }, { "epoch": 0.2610182167850847, "grad_norm": 8.669455528259277, "learning_rate": 9.984513753310357e-07, "loss": 0.1071, "step": 24430 }, { "epoch": 0.26112506009936426, "grad_norm": 3.436830520629883, "learning_rate": 9.984500537599914e-07, "loss": 0.0352, "step": 24440 }, { "epoch": 0.2612319034136439, "grad_norm": 2.6148533821105957, "learning_rate": 9.984487316261593e-07, "loss": 0.0821, "step": 24450 }, { "epoch": 0.2613387467279235, "grad_norm": 2.8498964309692383, "learning_rate": 9.984474089295408e-07, "loss": 0.0576, "step": 24460 }, { "epoch": 0.26144559004220314, "grad_norm": 1.0772981643676758, "learning_rate": 9.984460856701378e-07, "loss": 0.0882, "step": 24470 }, { "epoch": 0.2615524333564827, "grad_norm": 5.658003330230713, "learning_rate": 9.984447618479514e-07, "loss": 0.0753, "step": 24480 }, { "epoch": 0.26165927667076233, "grad_norm": 1.1123753786087036, "learning_rate": 9.984434374629833e-07, "loss": 0.0599, "step": 24490 }, { "epoch": 0.26176611998504196, "grad_norm": 4.701704502105713, "learning_rate": 9.984421125152348e-07, "loss": 0.1425, "step": 24500 }, { "epoch": 0.2618729632993215, "grad_norm": 5.575578689575195, "learning_rate": 9.984407870047077e-07, "loss": 0.0602, "step": 24510 }, { "epoch": 0.26197980661360115, "grad_norm": 0.9067448377609253, "learning_rate": 9.984394609314034e-07, "loss": 0.0955, "step": 24520 }, { "epoch": 0.2620866499278808, "grad_norm": 7.567287921905518, "learning_rate": 9.984381342953232e-07, "loss": 0.0825, "step": 24530 }, { "epoch": 0.26219349324216035, "grad_norm": 5.0084991455078125, "learning_rate": 9.984368070964686e-07, "loss": 0.068, "step": 24540 }, { "epoch": 0.26230033655644, "grad_norm": 20.842121124267578, "learning_rate": 9.984354793348413e-07, "loss": 0.1003, "step": 24550 }, { "epoch": 0.2624071798707196, "grad_norm": 2.375394821166992, "learning_rate": 9.98434151010443e-07, "loss": 0.1575, "step": 24560 }, { "epoch": 0.2625140231849992, "grad_norm": 9.320456504821777, "learning_rate": 9.984328221232745e-07, "loss": 0.096, "step": 24570 }, { "epoch": 0.2626208664992788, "grad_norm": 7.486758232116699, "learning_rate": 9.984314926733382e-07, "loss": 0.1195, "step": 24580 }, { "epoch": 0.2627277098135584, "grad_norm": 15.936107635498047, "learning_rate": 9.984301626606346e-07, "loss": 0.0619, "step": 24590 }, { "epoch": 0.26283455312783804, "grad_norm": 7.580717086791992, "learning_rate": 9.984288320851662e-07, "loss": 0.087, "step": 24600 }, { "epoch": 0.2629413964421176, "grad_norm": 8.15630054473877, "learning_rate": 9.984275009469338e-07, "loss": 0.1149, "step": 24610 }, { "epoch": 0.26304823975639724, "grad_norm": 1.152143120765686, "learning_rate": 9.984261692459392e-07, "loss": 0.0671, "step": 24620 }, { "epoch": 0.26315508307067687, "grad_norm": 4.174810409545898, "learning_rate": 9.984248369821838e-07, "loss": 0.0618, "step": 24630 }, { "epoch": 0.26326192638495644, "grad_norm": 1.4119259119033813, "learning_rate": 9.984235041556692e-07, "loss": 0.1099, "step": 24640 }, { "epoch": 0.26336876969923606, "grad_norm": 5.58882999420166, "learning_rate": 9.984221707663967e-07, "loss": 0.0573, "step": 24650 }, { "epoch": 0.2634756130135157, "grad_norm": 3.2252743244171143, "learning_rate": 9.98420836814368e-07, "loss": 0.0689, "step": 24660 }, { "epoch": 0.2635824563277953, "grad_norm": 6.46970796585083, "learning_rate": 9.984195022995846e-07, "loss": 0.0697, "step": 24670 }, { "epoch": 0.2636892996420749, "grad_norm": 3.7580573558807373, "learning_rate": 9.98418167222048e-07, "loss": 0.0679, "step": 24680 }, { "epoch": 0.2637961429563545, "grad_norm": 11.930779457092285, "learning_rate": 9.984168315817596e-07, "loss": 0.2157, "step": 24690 }, { "epoch": 0.26390298627063413, "grad_norm": 3.7780795097351074, "learning_rate": 9.98415495378721e-07, "loss": 0.0399, "step": 24700 }, { "epoch": 0.2640098295849137, "grad_norm": 13.460844993591309, "learning_rate": 9.984141586129337e-07, "loss": 0.1309, "step": 24710 }, { "epoch": 0.2641166728991933, "grad_norm": 4.504373550415039, "learning_rate": 9.984128212843992e-07, "loss": 0.0918, "step": 24720 }, { "epoch": 0.26422351621347295, "grad_norm": 2.4978203773498535, "learning_rate": 9.984114833931189e-07, "loss": 0.0801, "step": 24730 }, { "epoch": 0.2643303595277526, "grad_norm": 17.104095458984375, "learning_rate": 9.984101449390946e-07, "loss": 0.0597, "step": 24740 }, { "epoch": 0.26443720284203215, "grad_norm": 7.472921848297119, "learning_rate": 9.984088059223277e-07, "loss": 0.0816, "step": 24750 }, { "epoch": 0.2645440461563118, "grad_norm": 6.616048812866211, "learning_rate": 9.984074663428195e-07, "loss": 0.0807, "step": 24760 }, { "epoch": 0.2646508894705914, "grad_norm": 12.094049453735352, "learning_rate": 9.984061262005717e-07, "loss": 0.0894, "step": 24770 }, { "epoch": 0.26475773278487097, "grad_norm": 3.7910566329956055, "learning_rate": 9.984047854955856e-07, "loss": 0.1135, "step": 24780 }, { "epoch": 0.2648645760991506, "grad_norm": 7.494775295257568, "learning_rate": 9.984034442278631e-07, "loss": 0.0787, "step": 24790 }, { "epoch": 0.2649714194134302, "grad_norm": 8.938714981079102, "learning_rate": 9.984021023974052e-07, "loss": 0.112, "step": 24800 }, { "epoch": 0.2650782627277098, "grad_norm": 2.4930241107940674, "learning_rate": 9.984007600042138e-07, "loss": 0.0597, "step": 24810 }, { "epoch": 0.2651851060419894, "grad_norm": 9.195953369140625, "learning_rate": 9.983994170482906e-07, "loss": 0.0462, "step": 24820 }, { "epoch": 0.26529194935626904, "grad_norm": 8.740516662597656, "learning_rate": 9.983980735296367e-07, "loss": 0.1175, "step": 24830 }, { "epoch": 0.26539879267054867, "grad_norm": 13.238208770751953, "learning_rate": 9.983967294482538e-07, "loss": 0.1165, "step": 24840 }, { "epoch": 0.26550563598482824, "grad_norm": 2.18537974357605, "learning_rate": 9.983953848041433e-07, "loss": 0.0396, "step": 24850 }, { "epoch": 0.26561247929910786, "grad_norm": 1.2873589992523193, "learning_rate": 9.983940395973068e-07, "loss": 0.0962, "step": 24860 }, { "epoch": 0.2657193226133875, "grad_norm": 6.274259090423584, "learning_rate": 9.98392693827746e-07, "loss": 0.1671, "step": 24870 }, { "epoch": 0.26582616592766706, "grad_norm": 8.586501121520996, "learning_rate": 9.983913474954617e-07, "loss": 0.1083, "step": 24880 }, { "epoch": 0.2659330092419467, "grad_norm": 17.732730865478516, "learning_rate": 9.983900006004563e-07, "loss": 0.1267, "step": 24890 }, { "epoch": 0.2660398525562263, "grad_norm": 11.399343490600586, "learning_rate": 9.98388653142731e-07, "loss": 0.1469, "step": 24900 }, { "epoch": 0.2661466958705059, "grad_norm": 4.533442497253418, "learning_rate": 9.983873051222873e-07, "loss": 0.0422, "step": 24910 }, { "epoch": 0.2662535391847855, "grad_norm": 9.822003364562988, "learning_rate": 9.983859565391269e-07, "loss": 0.1838, "step": 24920 }, { "epoch": 0.26636038249906513, "grad_norm": 5.458733081817627, "learning_rate": 9.983846073932507e-07, "loss": 0.0859, "step": 24930 }, { "epoch": 0.26646722581334475, "grad_norm": 3.139207124710083, "learning_rate": 9.98383257684661e-07, "loss": 0.1037, "step": 24940 }, { "epoch": 0.2665740691276243, "grad_norm": 8.38292121887207, "learning_rate": 9.983819074133588e-07, "loss": 0.1061, "step": 24950 }, { "epoch": 0.26668091244190395, "grad_norm": 17.03679084777832, "learning_rate": 9.98380556579346e-07, "loss": 0.1215, "step": 24960 }, { "epoch": 0.2667877557561836, "grad_norm": 5.770288467407227, "learning_rate": 9.983792051826238e-07, "loss": 0.0819, "step": 24970 }, { "epoch": 0.26689459907046315, "grad_norm": 10.599370002746582, "learning_rate": 9.983778532231939e-07, "loss": 0.1296, "step": 24980 }, { "epoch": 0.26700144238474277, "grad_norm": 15.571303367614746, "learning_rate": 9.983765007010578e-07, "loss": 0.096, "step": 24990 }, { "epoch": 0.2671082856990224, "grad_norm": 5.434173107147217, "learning_rate": 9.98375147616217e-07, "loss": 0.1205, "step": 25000 }, { "epoch": 0.26721512901330197, "grad_norm": 9.780803680419922, "learning_rate": 9.98373793968673e-07, "loss": 0.114, "step": 25010 }, { "epoch": 0.2673219723275816, "grad_norm": 20.252418518066406, "learning_rate": 9.983724397584275e-07, "loss": 0.0955, "step": 25020 }, { "epoch": 0.2674288156418612, "grad_norm": 12.472146987915039, "learning_rate": 9.983710849854817e-07, "loss": 0.1406, "step": 25030 }, { "epoch": 0.26753565895614084, "grad_norm": 4.7508978843688965, "learning_rate": 9.983697296498374e-07, "loss": 0.1318, "step": 25040 }, { "epoch": 0.2676425022704204, "grad_norm": 8.285655975341797, "learning_rate": 9.98368373751496e-07, "loss": 0.0703, "step": 25050 }, { "epoch": 0.26774934558470004, "grad_norm": 17.225730895996094, "learning_rate": 9.983670172904594e-07, "loss": 0.1293, "step": 25060 }, { "epoch": 0.26785618889897966, "grad_norm": 6.187322616577148, "learning_rate": 9.983656602667285e-07, "loss": 0.0829, "step": 25070 }, { "epoch": 0.26796303221325923, "grad_norm": 6.540861129760742, "learning_rate": 9.983643026803055e-07, "loss": 0.0642, "step": 25080 }, { "epoch": 0.26806987552753886, "grad_norm": 10.96634578704834, "learning_rate": 9.983629445311914e-07, "loss": 0.1615, "step": 25090 }, { "epoch": 0.2681767188418185, "grad_norm": 4.540390968322754, "learning_rate": 9.983615858193878e-07, "loss": 0.058, "step": 25100 }, { "epoch": 0.2682835621560981, "grad_norm": 6.660357475280762, "learning_rate": 9.983602265448965e-07, "loss": 0.0468, "step": 25110 }, { "epoch": 0.2683904054703777, "grad_norm": 0.49804845452308655, "learning_rate": 9.983588667077188e-07, "loss": 0.0398, "step": 25120 }, { "epoch": 0.2684972487846573, "grad_norm": 6.629505157470703, "learning_rate": 9.983575063078565e-07, "loss": 0.0543, "step": 25130 }, { "epoch": 0.26860409209893693, "grad_norm": 6.844878196716309, "learning_rate": 9.983561453453107e-07, "loss": 0.0912, "step": 25140 }, { "epoch": 0.2687109354132165, "grad_norm": 1.9831430912017822, "learning_rate": 9.983547838200834e-07, "loss": 0.0757, "step": 25150 }, { "epoch": 0.2688177787274961, "grad_norm": 2.874849319458008, "learning_rate": 9.983534217321759e-07, "loss": 0.0408, "step": 25160 }, { "epoch": 0.26892462204177575, "grad_norm": 4.020269870758057, "learning_rate": 9.983520590815897e-07, "loss": 0.0856, "step": 25170 }, { "epoch": 0.2690314653560553, "grad_norm": 9.898876190185547, "learning_rate": 9.983506958683263e-07, "loss": 0.0915, "step": 25180 }, { "epoch": 0.26913830867033495, "grad_norm": 3.2190592288970947, "learning_rate": 9.983493320923876e-07, "loss": 0.0788, "step": 25190 }, { "epoch": 0.2692451519846146, "grad_norm": 5.7080864906311035, "learning_rate": 9.98347967753775e-07, "loss": 0.1478, "step": 25200 }, { "epoch": 0.2693519952988942, "grad_norm": 2.4804036617279053, "learning_rate": 9.983466028524896e-07, "loss": 0.0706, "step": 25210 }, { "epoch": 0.26945883861317377, "grad_norm": 10.006226539611816, "learning_rate": 9.983452373885333e-07, "loss": 0.1369, "step": 25220 }, { "epoch": 0.2695656819274534, "grad_norm": 12.6200590133667, "learning_rate": 9.983438713619078e-07, "loss": 0.1253, "step": 25230 }, { "epoch": 0.269672525241733, "grad_norm": 0.19028541445732117, "learning_rate": 9.983425047726144e-07, "loss": 0.1215, "step": 25240 }, { "epoch": 0.2697793685560126, "grad_norm": 5.300641059875488, "learning_rate": 9.983411376206546e-07, "loss": 0.1241, "step": 25250 }, { "epoch": 0.2698862118702922, "grad_norm": 16.818504333496094, "learning_rate": 9.9833976990603e-07, "loss": 0.1586, "step": 25260 }, { "epoch": 0.26999305518457184, "grad_norm": 15.998523712158203, "learning_rate": 9.983384016287423e-07, "loss": 0.1138, "step": 25270 }, { "epoch": 0.2700998984988514, "grad_norm": 8.598217964172363, "learning_rate": 9.983370327887932e-07, "loss": 0.0986, "step": 25280 }, { "epoch": 0.27020674181313104, "grad_norm": 3.451300859451294, "learning_rate": 9.983356633861836e-07, "loss": 0.1128, "step": 25290 }, { "epoch": 0.27031358512741066, "grad_norm": 0.5079317092895508, "learning_rate": 9.983342934209155e-07, "loss": 0.1228, "step": 25300 }, { "epoch": 0.2704204284416903, "grad_norm": 1.6396390199661255, "learning_rate": 9.983329228929904e-07, "loss": 0.0688, "step": 25310 }, { "epoch": 0.27052727175596986, "grad_norm": 9.742481231689453, "learning_rate": 9.9833155180241e-07, "loss": 0.0792, "step": 25320 }, { "epoch": 0.2706341150702495, "grad_norm": 0.7655878067016602, "learning_rate": 9.983301801491755e-07, "loss": 0.0397, "step": 25330 }, { "epoch": 0.2707409583845291, "grad_norm": 8.680913925170898, "learning_rate": 9.983288079332885e-07, "loss": 0.1538, "step": 25340 }, { "epoch": 0.2708478016988087, "grad_norm": 2.5178802013397217, "learning_rate": 9.983274351547507e-07, "loss": 0.0717, "step": 25350 }, { "epoch": 0.2709546450130883, "grad_norm": 4.51560640335083, "learning_rate": 9.983260618135637e-07, "loss": 0.0625, "step": 25360 }, { "epoch": 0.2710614883273679, "grad_norm": 6.507753372192383, "learning_rate": 9.983246879097292e-07, "loss": 0.1503, "step": 25370 }, { "epoch": 0.2711683316416475, "grad_norm": 7.102429389953613, "learning_rate": 9.983233134432483e-07, "loss": 0.0492, "step": 25380 }, { "epoch": 0.2712751749559271, "grad_norm": 10.781960487365723, "learning_rate": 9.983219384141227e-07, "loss": 0.1033, "step": 25390 }, { "epoch": 0.27138201827020675, "grad_norm": 3.1193103790283203, "learning_rate": 9.98320562822354e-07, "loss": 0.0451, "step": 25400 }, { "epoch": 0.2714888615844864, "grad_norm": 6.006561279296875, "learning_rate": 9.983191866679439e-07, "loss": 0.1489, "step": 25410 }, { "epoch": 0.27159570489876594, "grad_norm": 10.543487548828125, "learning_rate": 9.983178099508937e-07, "loss": 0.0772, "step": 25420 }, { "epoch": 0.27170254821304557, "grad_norm": 18.55575942993164, "learning_rate": 9.983164326712051e-07, "loss": 0.0904, "step": 25430 }, { "epoch": 0.2718093915273252, "grad_norm": 2.7643983364105225, "learning_rate": 9.9831505482888e-07, "loss": 0.0369, "step": 25440 }, { "epoch": 0.27191623484160476, "grad_norm": 7.956113815307617, "learning_rate": 9.983136764239191e-07, "loss": 0.1192, "step": 25450 }, { "epoch": 0.2720230781558844, "grad_norm": 5.70299768447876, "learning_rate": 9.983122974563247e-07, "loss": 0.0791, "step": 25460 }, { "epoch": 0.272129921470164, "grad_norm": 6.61067533493042, "learning_rate": 9.98310917926098e-07, "loss": 0.0794, "step": 25470 }, { "epoch": 0.27223676478444364, "grad_norm": 5.286468029022217, "learning_rate": 9.983095378332407e-07, "loss": 0.0894, "step": 25480 }, { "epoch": 0.2723436080987232, "grad_norm": 16.967416763305664, "learning_rate": 9.983081571777544e-07, "loss": 0.1243, "step": 25490 }, { "epoch": 0.27245045141300284, "grad_norm": 5.1451616287231445, "learning_rate": 9.983067759596406e-07, "loss": 0.0986, "step": 25500 }, { "epoch": 0.27255729472728246, "grad_norm": 13.290553092956543, "learning_rate": 9.983053941789007e-07, "loss": 0.1216, "step": 25510 }, { "epoch": 0.27266413804156203, "grad_norm": 5.167559623718262, "learning_rate": 9.983040118355365e-07, "loss": 0.0618, "step": 25520 }, { "epoch": 0.27277098135584166, "grad_norm": 8.495426177978516, "learning_rate": 9.983026289295492e-07, "loss": 0.1311, "step": 25530 }, { "epoch": 0.2728778246701213, "grad_norm": 6.102572917938232, "learning_rate": 9.98301245460941e-07, "loss": 0.065, "step": 25540 }, { "epoch": 0.27298466798440085, "grad_norm": 2.360682964324951, "learning_rate": 9.982998614297128e-07, "loss": 0.0995, "step": 25550 }, { "epoch": 0.2730915112986805, "grad_norm": 0.6310194134712219, "learning_rate": 9.982984768358667e-07, "loss": 0.0744, "step": 25560 }, { "epoch": 0.2731983546129601, "grad_norm": 13.006681442260742, "learning_rate": 9.982970916794038e-07, "loss": 0.1115, "step": 25570 }, { "epoch": 0.27330519792723973, "grad_norm": 2.1959383487701416, "learning_rate": 9.982957059603262e-07, "loss": 0.0652, "step": 25580 }, { "epoch": 0.2734120412415193, "grad_norm": 0.7150450944900513, "learning_rate": 9.982943196786348e-07, "loss": 0.0726, "step": 25590 }, { "epoch": 0.2735188845557989, "grad_norm": 6.103546619415283, "learning_rate": 9.982929328343315e-07, "loss": 0.0559, "step": 25600 }, { "epoch": 0.27362572787007855, "grad_norm": 4.063754081726074, "learning_rate": 9.98291545427418e-07, "loss": 0.1045, "step": 25610 }, { "epoch": 0.2737325711843581, "grad_norm": 3.5024657249450684, "learning_rate": 9.982901574578959e-07, "loss": 0.047, "step": 25620 }, { "epoch": 0.27383941449863775, "grad_norm": 1.2712757587432861, "learning_rate": 9.982887689257663e-07, "loss": 0.0742, "step": 25630 }, { "epoch": 0.27394625781291737, "grad_norm": 0.15860749781131744, "learning_rate": 9.982873798310311e-07, "loss": 0.1235, "step": 25640 }, { "epoch": 0.27405310112719694, "grad_norm": 9.037103652954102, "learning_rate": 9.98285990173692e-07, "loss": 0.0924, "step": 25650 }, { "epoch": 0.27415994444147657, "grad_norm": 7.180314540863037, "learning_rate": 9.982845999537504e-07, "loss": 0.1492, "step": 25660 }, { "epoch": 0.2742667877557562, "grad_norm": 12.79547119140625, "learning_rate": 9.982832091712078e-07, "loss": 0.0448, "step": 25670 }, { "epoch": 0.2743736310700358, "grad_norm": 3.690624713897705, "learning_rate": 9.98281817826066e-07, "loss": 0.0458, "step": 25680 }, { "epoch": 0.2744804743843154, "grad_norm": 1.5685893297195435, "learning_rate": 9.98280425918326e-07, "loss": 0.0635, "step": 25690 }, { "epoch": 0.274587317698595, "grad_norm": 7.725526332855225, "learning_rate": 9.982790334479903e-07, "loss": 0.087, "step": 25700 }, { "epoch": 0.27469416101287464, "grad_norm": 5.206895351409912, "learning_rate": 9.982776404150597e-07, "loss": 0.0732, "step": 25710 }, { "epoch": 0.2748010043271542, "grad_norm": 26.44392204284668, "learning_rate": 9.982762468195362e-07, "loss": 0.133, "step": 25720 }, { "epoch": 0.27490784764143383, "grad_norm": 3.399414300918579, "learning_rate": 9.98274852661421e-07, "loss": 0.1027, "step": 25730 }, { "epoch": 0.27501469095571346, "grad_norm": 4.3522725105285645, "learning_rate": 9.98273457940716e-07, "loss": 0.0867, "step": 25740 }, { "epoch": 0.27512153426999303, "grad_norm": 20.12546157836914, "learning_rate": 9.982720626574226e-07, "loss": 0.1879, "step": 25750 }, { "epoch": 0.27522837758427265, "grad_norm": 27.6238956451416, "learning_rate": 9.982706668115426e-07, "loss": 0.1015, "step": 25760 }, { "epoch": 0.2753352208985523, "grad_norm": 9.460490226745605, "learning_rate": 9.982692704030773e-07, "loss": 0.069, "step": 25770 }, { "epoch": 0.2754420642128319, "grad_norm": 4.27138614654541, "learning_rate": 9.982678734320284e-07, "loss": 0.0659, "step": 25780 }, { "epoch": 0.2755489075271115, "grad_norm": 6.260087013244629, "learning_rate": 9.982664758983973e-07, "loss": 0.0652, "step": 25790 }, { "epoch": 0.2756557508413911, "grad_norm": 6.195069789886475, "learning_rate": 9.98265077802186e-07, "loss": 0.1171, "step": 25800 }, { "epoch": 0.2757625941556707, "grad_norm": 2.353013515472412, "learning_rate": 9.982636791433956e-07, "loss": 0.0908, "step": 25810 }, { "epoch": 0.2758694374699503, "grad_norm": 5.93916130065918, "learning_rate": 9.98262279922028e-07, "loss": 0.0885, "step": 25820 }, { "epoch": 0.2759762807842299, "grad_norm": 6.773888111114502, "learning_rate": 9.982608801380848e-07, "loss": 0.0698, "step": 25830 }, { "epoch": 0.27608312409850955, "grad_norm": 13.786937713623047, "learning_rate": 9.982594797915672e-07, "loss": 0.0491, "step": 25840 }, { "epoch": 0.2761899674127892, "grad_norm": 6.978860378265381, "learning_rate": 9.982580788824773e-07, "loss": 0.0652, "step": 25850 }, { "epoch": 0.27629681072706874, "grad_norm": 9.984570503234863, "learning_rate": 9.982566774108164e-07, "loss": 0.082, "step": 25860 }, { "epoch": 0.27640365404134837, "grad_norm": 6.059791088104248, "learning_rate": 9.982552753765859e-07, "loss": 0.0791, "step": 25870 }, { "epoch": 0.276510497355628, "grad_norm": 3.3340113162994385, "learning_rate": 9.982538727797878e-07, "loss": 0.1137, "step": 25880 }, { "epoch": 0.27661734066990756, "grad_norm": 9.472005844116211, "learning_rate": 9.982524696204234e-07, "loss": 0.0639, "step": 25890 }, { "epoch": 0.2767241839841872, "grad_norm": 10.359381675720215, "learning_rate": 9.982510658984943e-07, "loss": 0.1158, "step": 25900 }, { "epoch": 0.2768310272984668, "grad_norm": 4.820700645446777, "learning_rate": 9.982496616140022e-07, "loss": 0.0804, "step": 25910 }, { "epoch": 0.2769378706127464, "grad_norm": 18.006305694580078, "learning_rate": 9.982482567669485e-07, "loss": 0.1019, "step": 25920 }, { "epoch": 0.277044713927026, "grad_norm": 8.406676292419434, "learning_rate": 9.98246851357335e-07, "loss": 0.044, "step": 25930 }, { "epoch": 0.27715155724130563, "grad_norm": 1.4459799528121948, "learning_rate": 9.982454453851633e-07, "loss": 0.0792, "step": 25940 }, { "epoch": 0.27725840055558526, "grad_norm": 2.4805216789245605, "learning_rate": 9.982440388504347e-07, "loss": 0.0605, "step": 25950 }, { "epoch": 0.27736524386986483, "grad_norm": 6.693126201629639, "learning_rate": 9.982426317531512e-07, "loss": 0.1053, "step": 25960 }, { "epoch": 0.27747208718414446, "grad_norm": 2.4305672645568848, "learning_rate": 9.98241224093314e-07, "loss": 0.0265, "step": 25970 }, { "epoch": 0.2775789304984241, "grad_norm": 6.312663555145264, "learning_rate": 9.98239815870925e-07, "loss": 0.0826, "step": 25980 }, { "epoch": 0.27768577381270365, "grad_norm": 2.387772798538208, "learning_rate": 9.982384070859854e-07, "loss": 0.0689, "step": 25990 }, { "epoch": 0.2777926171269833, "grad_norm": 2.8404972553253174, "learning_rate": 9.982369977384974e-07, "loss": 0.0869, "step": 26000 }, { "epoch": 0.2778994604412629, "grad_norm": 2.515047550201416, "learning_rate": 9.98235587828462e-07, "loss": 0.0972, "step": 26010 }, { "epoch": 0.27800630375554247, "grad_norm": 20.290691375732422, "learning_rate": 9.98234177355881e-07, "loss": 0.0729, "step": 26020 }, { "epoch": 0.2781131470698221, "grad_norm": 10.477701187133789, "learning_rate": 9.98232766320756e-07, "loss": 0.1237, "step": 26030 }, { "epoch": 0.2782199903841017, "grad_norm": 7.632126331329346, "learning_rate": 9.982313547230888e-07, "loss": 0.0961, "step": 26040 }, { "epoch": 0.27832683369838135, "grad_norm": 3.3949265480041504, "learning_rate": 9.982299425628805e-07, "loss": 0.1127, "step": 26050 }, { "epoch": 0.2784336770126609, "grad_norm": 1.4977613687515259, "learning_rate": 9.98228529840133e-07, "loss": 0.0666, "step": 26060 }, { "epoch": 0.27854052032694054, "grad_norm": 3.833341360092163, "learning_rate": 9.982271165548482e-07, "loss": 0.0476, "step": 26070 }, { "epoch": 0.27864736364122017, "grad_norm": 3.213980197906494, "learning_rate": 9.982257027070272e-07, "loss": 0.0882, "step": 26080 }, { "epoch": 0.27875420695549974, "grad_norm": 14.249927520751953, "learning_rate": 9.982242882966716e-07, "loss": 0.0891, "step": 26090 }, { "epoch": 0.27886105026977936, "grad_norm": 6.05118465423584, "learning_rate": 9.982228733237833e-07, "loss": 0.0669, "step": 26100 }, { "epoch": 0.278967893584059, "grad_norm": 7.314014434814453, "learning_rate": 9.98221457788364e-07, "loss": 0.0728, "step": 26110 }, { "epoch": 0.27907473689833856, "grad_norm": 4.723127841949463, "learning_rate": 9.98220041690415e-07, "loss": 0.0723, "step": 26120 }, { "epoch": 0.2791815802126182, "grad_norm": 9.60251522064209, "learning_rate": 9.982186250299379e-07, "loss": 0.0918, "step": 26130 }, { "epoch": 0.2792884235268978, "grad_norm": 8.462583541870117, "learning_rate": 9.982172078069343e-07, "loss": 0.107, "step": 26140 }, { "epoch": 0.27939526684117744, "grad_norm": 0.2405615895986557, "learning_rate": 9.98215790021406e-07, "loss": 0.1396, "step": 26150 }, { "epoch": 0.279502110155457, "grad_norm": 7.656961441040039, "learning_rate": 9.982143716733543e-07, "loss": 0.0396, "step": 26160 }, { "epoch": 0.27960895346973663, "grad_norm": 2.514706611633301, "learning_rate": 9.98212952762781e-07, "loss": 0.062, "step": 26170 }, { "epoch": 0.27971579678401626, "grad_norm": 2.257713794708252, "learning_rate": 9.982115332896876e-07, "loss": 0.0897, "step": 26180 }, { "epoch": 0.2798226400982958, "grad_norm": 8.908638000488281, "learning_rate": 9.98210113254076e-07, "loss": 0.0358, "step": 26190 }, { "epoch": 0.27992948341257545, "grad_norm": 9.448912620544434, "learning_rate": 9.982086926559476e-07, "loss": 0.038, "step": 26200 }, { "epoch": 0.2800363267268551, "grad_norm": 0.11489880830049515, "learning_rate": 9.982072714953038e-07, "loss": 0.1211, "step": 26210 }, { "epoch": 0.2801431700411347, "grad_norm": 16.447242736816406, "learning_rate": 9.982058497721463e-07, "loss": 0.1079, "step": 26220 }, { "epoch": 0.2802500133554143, "grad_norm": 8.23537826538086, "learning_rate": 9.982044274864772e-07, "loss": 0.0936, "step": 26230 }, { "epoch": 0.2803568566696939, "grad_norm": 2.7671456336975098, "learning_rate": 9.982030046382975e-07, "loss": 0.2373, "step": 26240 }, { "epoch": 0.2804636999839735, "grad_norm": 0.565457284450531, "learning_rate": 9.982015812276087e-07, "loss": 0.0779, "step": 26250 }, { "epoch": 0.2805705432982531, "grad_norm": 10.384592056274414, "learning_rate": 9.982001572544132e-07, "loss": 0.1344, "step": 26260 }, { "epoch": 0.2806773866125327, "grad_norm": 2.109734296798706, "learning_rate": 9.981987327187118e-07, "loss": 0.0939, "step": 26270 }, { "epoch": 0.28078422992681235, "grad_norm": 4.597861289978027, "learning_rate": 9.981973076205066e-07, "loss": 0.0849, "step": 26280 }, { "epoch": 0.2808910732410919, "grad_norm": 1.3549830913543701, "learning_rate": 9.981958819597989e-07, "loss": 0.0888, "step": 26290 }, { "epoch": 0.28099791655537154, "grad_norm": 12.512744903564453, "learning_rate": 9.981944557365904e-07, "loss": 0.1329, "step": 26300 }, { "epoch": 0.28110475986965117, "grad_norm": 3.311314821243286, "learning_rate": 9.98193028950883e-07, "loss": 0.1409, "step": 26310 }, { "epoch": 0.2812116031839308, "grad_norm": 8.956313133239746, "learning_rate": 9.981916016026779e-07, "loss": 0.0773, "step": 26320 }, { "epoch": 0.28131844649821036, "grad_norm": 3.6674396991729736, "learning_rate": 9.981901736919769e-07, "loss": 0.0392, "step": 26330 }, { "epoch": 0.28142528981249, "grad_norm": 4.242141246795654, "learning_rate": 9.981887452187816e-07, "loss": 0.0696, "step": 26340 }, { "epoch": 0.2815321331267696, "grad_norm": 4.039493560791016, "learning_rate": 9.981873161830938e-07, "loss": 0.1504, "step": 26350 }, { "epoch": 0.2816389764410492, "grad_norm": 6.349425315856934, "learning_rate": 9.981858865849147e-07, "loss": 0.0935, "step": 26360 }, { "epoch": 0.2817458197553288, "grad_norm": 1.3687494993209839, "learning_rate": 9.981844564242462e-07, "loss": 0.0983, "step": 26370 }, { "epoch": 0.28185266306960843, "grad_norm": 5.181824207305908, "learning_rate": 9.981830257010898e-07, "loss": 0.07, "step": 26380 }, { "epoch": 0.281959506383888, "grad_norm": 7.7272772789001465, "learning_rate": 9.981815944154472e-07, "loss": 0.1799, "step": 26390 }, { "epoch": 0.28206634969816763, "grad_norm": 11.835515022277832, "learning_rate": 9.981801625673199e-07, "loss": 0.2211, "step": 26400 }, { "epoch": 0.28217319301244725, "grad_norm": 2.573580026626587, "learning_rate": 9.981787301567097e-07, "loss": 0.0772, "step": 26410 }, { "epoch": 0.2822800363267269, "grad_norm": 8.542791366577148, "learning_rate": 9.98177297183618e-07, "loss": 0.0963, "step": 26420 }, { "epoch": 0.28238687964100645, "grad_norm": 3.117298126220703, "learning_rate": 9.981758636480467e-07, "loss": 0.0853, "step": 26430 }, { "epoch": 0.2824937229552861, "grad_norm": 17.84525489807129, "learning_rate": 9.981744295499973e-07, "loss": 0.0901, "step": 26440 }, { "epoch": 0.2826005662695657, "grad_norm": 3.985910654067993, "learning_rate": 9.98172994889471e-07, "loss": 0.0447, "step": 26450 }, { "epoch": 0.28270740958384527, "grad_norm": 9.561933517456055, "learning_rate": 9.981715596664702e-07, "loss": 0.0741, "step": 26460 }, { "epoch": 0.2828142528981249, "grad_norm": 10.552595138549805, "learning_rate": 9.981701238809958e-07, "loss": 0.1122, "step": 26470 }, { "epoch": 0.2829210962124045, "grad_norm": 4.770920276641846, "learning_rate": 9.9816868753305e-07, "loss": 0.0878, "step": 26480 }, { "epoch": 0.2830279395266841, "grad_norm": 7.051063060760498, "learning_rate": 9.98167250622634e-07, "loss": 0.0964, "step": 26490 }, { "epoch": 0.2831347828409637, "grad_norm": 6.834279537200928, "learning_rate": 9.981658131497497e-07, "loss": 0.1284, "step": 26500 }, { "epoch": 0.28324162615524334, "grad_norm": 8.314404487609863, "learning_rate": 9.981643751143984e-07, "loss": 0.0672, "step": 26510 }, { "epoch": 0.28334846946952297, "grad_norm": 8.753673553466797, "learning_rate": 9.98162936516582e-07, "loss": 0.1078, "step": 26520 }, { "epoch": 0.28345531278380254, "grad_norm": 0.1448584347963333, "learning_rate": 9.98161497356302e-07, "loss": 0.0609, "step": 26530 }, { "epoch": 0.28356215609808216, "grad_norm": 13.902240753173828, "learning_rate": 9.981600576335602e-07, "loss": 0.0357, "step": 26540 }, { "epoch": 0.2836689994123618, "grad_norm": 20.10854721069336, "learning_rate": 9.981586173483581e-07, "loss": 0.0425, "step": 26550 }, { "epoch": 0.28377584272664136, "grad_norm": 6.93664026260376, "learning_rate": 9.981571765006974e-07, "loss": 0.0765, "step": 26560 }, { "epoch": 0.283882686040921, "grad_norm": 2.040959358215332, "learning_rate": 9.981557350905795e-07, "loss": 0.0637, "step": 26570 }, { "epoch": 0.2839895293552006, "grad_norm": 12.657175064086914, "learning_rate": 9.98154293118006e-07, "loss": 0.0842, "step": 26580 }, { "epoch": 0.28409637266948023, "grad_norm": 12.26334285736084, "learning_rate": 9.98152850582979e-07, "loss": 0.0911, "step": 26590 }, { "epoch": 0.2842032159837598, "grad_norm": 6.813023090362549, "learning_rate": 9.981514074854996e-07, "loss": 0.0852, "step": 26600 }, { "epoch": 0.28431005929803943, "grad_norm": 5.942630767822266, "learning_rate": 9.981499638255699e-07, "loss": 0.0582, "step": 26610 }, { "epoch": 0.28441690261231906, "grad_norm": 1.11404287815094, "learning_rate": 9.981485196031912e-07, "loss": 0.1086, "step": 26620 }, { "epoch": 0.2845237459265986, "grad_norm": 10.31403636932373, "learning_rate": 9.981470748183652e-07, "loss": 0.0976, "step": 26630 }, { "epoch": 0.28463058924087825, "grad_norm": 1.1882545948028564, "learning_rate": 9.981456294710935e-07, "loss": 0.0762, "step": 26640 }, { "epoch": 0.2847374325551579, "grad_norm": 4.511030673980713, "learning_rate": 9.98144183561378e-07, "loss": 0.091, "step": 26650 }, { "epoch": 0.28484427586943745, "grad_norm": 23.771554946899414, "learning_rate": 9.981427370892198e-07, "loss": 0.1126, "step": 26660 }, { "epoch": 0.28495111918371707, "grad_norm": 6.722363471984863, "learning_rate": 9.98141290054621e-07, "loss": 0.0558, "step": 26670 }, { "epoch": 0.2850579624979967, "grad_norm": 3.041578531265259, "learning_rate": 9.98139842457583e-07, "loss": 0.0886, "step": 26680 }, { "epoch": 0.2851648058122763, "grad_norm": 2.488835573196411, "learning_rate": 9.98138394298108e-07, "loss": 0.0766, "step": 26690 }, { "epoch": 0.2852716491265559, "grad_norm": 4.542619705200195, "learning_rate": 9.981369455761966e-07, "loss": 0.113, "step": 26700 }, { "epoch": 0.2853784924408355, "grad_norm": 3.244781494140625, "learning_rate": 9.981354962918512e-07, "loss": 0.0788, "step": 26710 }, { "epoch": 0.28548533575511514, "grad_norm": 0.17649804055690765, "learning_rate": 9.981340464450731e-07, "loss": 0.0882, "step": 26720 }, { "epoch": 0.2855921790693947, "grad_norm": 1.9236360788345337, "learning_rate": 9.981325960358641e-07, "loss": 0.0792, "step": 26730 }, { "epoch": 0.28569902238367434, "grad_norm": 8.027731895446777, "learning_rate": 9.98131145064226e-07, "loss": 0.0849, "step": 26740 }, { "epoch": 0.28580586569795396, "grad_norm": 2.7985572814941406, "learning_rate": 9.9812969353016e-07, "loss": 0.0446, "step": 26750 }, { "epoch": 0.28591270901223353, "grad_norm": 2.7920613288879395, "learning_rate": 9.981282414336682e-07, "loss": 0.11, "step": 26760 }, { "epoch": 0.28601955232651316, "grad_norm": 1.2233048677444458, "learning_rate": 9.98126788774752e-07, "loss": 0.0757, "step": 26770 }, { "epoch": 0.2861263956407928, "grad_norm": 3.020500898361206, "learning_rate": 9.98125335553413e-07, "loss": 0.0734, "step": 26780 }, { "epoch": 0.2862332389550724, "grad_norm": 9.55583667755127, "learning_rate": 9.981238817696528e-07, "loss": 0.0992, "step": 26790 }, { "epoch": 0.286340082269352, "grad_norm": 3.4033915996551514, "learning_rate": 9.981224274234732e-07, "loss": 0.0929, "step": 26800 }, { "epoch": 0.2864469255836316, "grad_norm": 2.04217529296875, "learning_rate": 9.981209725148758e-07, "loss": 0.0478, "step": 26810 }, { "epoch": 0.28655376889791123, "grad_norm": 1.0872235298156738, "learning_rate": 9.981195170438623e-07, "loss": 0.0497, "step": 26820 }, { "epoch": 0.2866606122121908, "grad_norm": 3.075441360473633, "learning_rate": 9.981180610104342e-07, "loss": 0.084, "step": 26830 }, { "epoch": 0.2867674555264704, "grad_norm": 8.006388664245605, "learning_rate": 9.981166044145933e-07, "loss": 0.1093, "step": 26840 }, { "epoch": 0.28687429884075005, "grad_norm": 3.701176404953003, "learning_rate": 9.98115147256341e-07, "loss": 0.0736, "step": 26850 }, { "epoch": 0.2869811421550296, "grad_norm": 0.5013125538825989, "learning_rate": 9.981136895356791e-07, "loss": 0.1145, "step": 26860 }, { "epoch": 0.28708798546930925, "grad_norm": 12.393486976623535, "learning_rate": 9.981122312526096e-07, "loss": 0.1034, "step": 26870 }, { "epoch": 0.2871948287835889, "grad_norm": 3.930121421813965, "learning_rate": 9.981107724071333e-07, "loss": 0.0672, "step": 26880 }, { "epoch": 0.2873016720978685, "grad_norm": 0.8159496784210205, "learning_rate": 9.981093129992527e-07, "loss": 0.0331, "step": 26890 }, { "epoch": 0.28740851541214807, "grad_norm": 4.5851149559021, "learning_rate": 9.98107853028969e-07, "loss": 0.105, "step": 26900 }, { "epoch": 0.2875153587264277, "grad_norm": 4.699177265167236, "learning_rate": 9.981063924962837e-07, "loss": 0.1289, "step": 26910 }, { "epoch": 0.2876222020407073, "grad_norm": 6.995891094207764, "learning_rate": 9.98104931401199e-07, "loss": 0.0459, "step": 26920 }, { "epoch": 0.2877290453549869, "grad_norm": 5.75288200378418, "learning_rate": 9.981034697437163e-07, "loss": 0.0872, "step": 26930 }, { "epoch": 0.2878358886692665, "grad_norm": 7.634392261505127, "learning_rate": 9.98102007523837e-07, "loss": 0.0883, "step": 26940 }, { "epoch": 0.28794273198354614, "grad_norm": 3.302622079849243, "learning_rate": 9.981005447415628e-07, "loss": 0.1443, "step": 26950 }, { "epoch": 0.28804957529782577, "grad_norm": 2.025637626647949, "learning_rate": 9.98099081396896e-07, "loss": 0.1126, "step": 26960 }, { "epoch": 0.28815641861210534, "grad_norm": 4.737741470336914, "learning_rate": 9.980976174898372e-07, "loss": 0.134, "step": 26970 }, { "epoch": 0.28826326192638496, "grad_norm": 6.7576775550842285, "learning_rate": 9.980961530203889e-07, "loss": 0.0943, "step": 26980 }, { "epoch": 0.2883701052406646, "grad_norm": 4.049859523773193, "learning_rate": 9.980946879885522e-07, "loss": 0.0584, "step": 26990 }, { "epoch": 0.28847694855494416, "grad_norm": 1.0440410375595093, "learning_rate": 9.980932223943292e-07, "loss": 0.0531, "step": 27000 }, { "epoch": 0.2885837918692238, "grad_norm": 4.225861549377441, "learning_rate": 9.980917562377213e-07, "loss": 0.0395, "step": 27010 }, { "epoch": 0.2886906351835034, "grad_norm": 8.00046443939209, "learning_rate": 9.980902895187302e-07, "loss": 0.118, "step": 27020 }, { "epoch": 0.288797478497783, "grad_norm": 1.668624758720398, "learning_rate": 9.980888222373576e-07, "loss": 0.0376, "step": 27030 }, { "epoch": 0.2889043218120626, "grad_norm": 7.792720317840576, "learning_rate": 9.98087354393605e-07, "loss": 0.0426, "step": 27040 }, { "epoch": 0.28901116512634223, "grad_norm": 6.352911949157715, "learning_rate": 9.980858859874744e-07, "loss": 0.1249, "step": 27050 }, { "epoch": 0.28911800844062185, "grad_norm": 8.978296279907227, "learning_rate": 9.980844170189673e-07, "loss": 0.0838, "step": 27060 }, { "epoch": 0.2892248517549014, "grad_norm": 7.225490093231201, "learning_rate": 9.98082947488085e-07, "loss": 0.0826, "step": 27070 }, { "epoch": 0.28933169506918105, "grad_norm": 8.218534469604492, "learning_rate": 9.980814773948297e-07, "loss": 0.0749, "step": 27080 }, { "epoch": 0.2894385383834607, "grad_norm": 14.229435920715332, "learning_rate": 9.980800067392028e-07, "loss": 0.1484, "step": 27090 }, { "epoch": 0.28954538169774025, "grad_norm": 6.543548107147217, "learning_rate": 9.980785355212058e-07, "loss": 0.161, "step": 27100 }, { "epoch": 0.28965222501201987, "grad_norm": 6.529452800750732, "learning_rate": 9.980770637408406e-07, "loss": 0.1057, "step": 27110 }, { "epoch": 0.2897590683262995, "grad_norm": 20.323469161987305, "learning_rate": 9.98075591398109e-07, "loss": 0.1517, "step": 27120 }, { "epoch": 0.28986591164057907, "grad_norm": 6.791004657745361, "learning_rate": 9.980741184930123e-07, "loss": 0.0839, "step": 27130 }, { "epoch": 0.2899727549548587, "grad_norm": 7.201937675476074, "learning_rate": 9.980726450255522e-07, "loss": 0.0731, "step": 27140 }, { "epoch": 0.2900795982691383, "grad_norm": 5.707582473754883, "learning_rate": 9.980711709957307e-07, "loss": 0.1118, "step": 27150 }, { "epoch": 0.29018644158341794, "grad_norm": 13.663991928100586, "learning_rate": 9.980696964035494e-07, "loss": 0.0908, "step": 27160 }, { "epoch": 0.2902932848976975, "grad_norm": 9.903478622436523, "learning_rate": 9.980682212490095e-07, "loss": 0.0764, "step": 27170 }, { "epoch": 0.29040012821197714, "grad_norm": 10.251953125, "learning_rate": 9.98066745532113e-07, "loss": 0.1076, "step": 27180 }, { "epoch": 0.29050697152625676, "grad_norm": 6.537292957305908, "learning_rate": 9.980652692528617e-07, "loss": 0.0629, "step": 27190 }, { "epoch": 0.29061381484053633, "grad_norm": 6.770008087158203, "learning_rate": 9.980637924112571e-07, "loss": 0.0871, "step": 27200 }, { "epoch": 0.29072065815481596, "grad_norm": 6.699681758880615, "learning_rate": 9.980623150073008e-07, "loss": 0.1025, "step": 27210 }, { "epoch": 0.2908275014690956, "grad_norm": 0.39077505469322205, "learning_rate": 9.980608370409949e-07, "loss": 0.0669, "step": 27220 }, { "epoch": 0.29093434478337515, "grad_norm": 4.082211971282959, "learning_rate": 9.980593585123401e-07, "loss": 0.0856, "step": 27230 }, { "epoch": 0.2910411880976548, "grad_norm": 0.3633310794830322, "learning_rate": 9.980578794213392e-07, "loss": 0.0575, "step": 27240 }, { "epoch": 0.2911480314119344, "grad_norm": 35.57957458496094, "learning_rate": 9.980563997679933e-07, "loss": 0.162, "step": 27250 }, { "epoch": 0.29125487472621403, "grad_norm": 1.311787724494934, "learning_rate": 9.98054919552304e-07, "loss": 0.0695, "step": 27260 }, { "epoch": 0.2913617180404936, "grad_norm": 1.8150300979614258, "learning_rate": 9.980534387742731e-07, "loss": 0.042, "step": 27270 }, { "epoch": 0.2914685613547732, "grad_norm": 9.400355339050293, "learning_rate": 9.980519574339023e-07, "loss": 0.0798, "step": 27280 }, { "epoch": 0.29157540466905285, "grad_norm": 5.815833568572998, "learning_rate": 9.980504755311935e-07, "loss": 0.0754, "step": 27290 }, { "epoch": 0.2916822479833324, "grad_norm": 5.7622904777526855, "learning_rate": 9.980489930661477e-07, "loss": 0.062, "step": 27300 }, { "epoch": 0.29178909129761205, "grad_norm": 1.844357967376709, "learning_rate": 9.980475100387672e-07, "loss": 0.0867, "step": 27310 }, { "epoch": 0.29189593461189167, "grad_norm": 1.3264672756195068, "learning_rate": 9.980460264490536e-07, "loss": 0.0956, "step": 27320 }, { "epoch": 0.2920027779261713, "grad_norm": 4.4081268310546875, "learning_rate": 9.980445422970083e-07, "loss": 0.085, "step": 27330 }, { "epoch": 0.29210962124045087, "grad_norm": 7.286587715148926, "learning_rate": 9.980430575826331e-07, "loss": 0.0917, "step": 27340 }, { "epoch": 0.2922164645547305, "grad_norm": 6.712029457092285, "learning_rate": 9.980415723059297e-07, "loss": 0.0947, "step": 27350 }, { "epoch": 0.2923233078690101, "grad_norm": 7.552040100097656, "learning_rate": 9.980400864668998e-07, "loss": 0.1256, "step": 27360 }, { "epoch": 0.2924301511832897, "grad_norm": 9.453348159790039, "learning_rate": 9.980386000655452e-07, "loss": 0.0699, "step": 27370 }, { "epoch": 0.2925369944975693, "grad_norm": 2.736222982406616, "learning_rate": 9.980371131018672e-07, "loss": 0.0394, "step": 27380 }, { "epoch": 0.29264383781184894, "grad_norm": 2.3758625984191895, "learning_rate": 9.98035625575868e-07, "loss": 0.0496, "step": 27390 }, { "epoch": 0.2927506811261285, "grad_norm": 5.627198219299316, "learning_rate": 9.980341374875486e-07, "loss": 0.0888, "step": 27400 }, { "epoch": 0.29285752444040813, "grad_norm": 5.888602256774902, "learning_rate": 9.980326488369114e-07, "loss": 0.0525, "step": 27410 }, { "epoch": 0.29296436775468776, "grad_norm": 2.2141711711883545, "learning_rate": 9.980311596239576e-07, "loss": 0.0718, "step": 27420 }, { "epoch": 0.2930712110689674, "grad_norm": 5.36975622177124, "learning_rate": 9.98029669848689e-07, "loss": 0.095, "step": 27430 }, { "epoch": 0.29317805438324696, "grad_norm": 7.351634502410889, "learning_rate": 9.980281795111074e-07, "loss": 0.0786, "step": 27440 }, { "epoch": 0.2932848976975266, "grad_norm": 3.680399179458618, "learning_rate": 9.980266886112144e-07, "loss": 0.1018, "step": 27450 }, { "epoch": 0.2933917410118062, "grad_norm": 0.6754186153411865, "learning_rate": 9.980251971490118e-07, "loss": 0.1116, "step": 27460 }, { "epoch": 0.2934985843260858, "grad_norm": 6.376875877380371, "learning_rate": 9.980237051245009e-07, "loss": 0.092, "step": 27470 }, { "epoch": 0.2936054276403654, "grad_norm": 6.201003074645996, "learning_rate": 9.980222125376837e-07, "loss": 0.1072, "step": 27480 }, { "epoch": 0.293712270954645, "grad_norm": 23.666330337524414, "learning_rate": 9.980207193885619e-07, "loss": 0.0625, "step": 27490 }, { "epoch": 0.2938191142689246, "grad_norm": 3.8760948181152344, "learning_rate": 9.980192256771372e-07, "loss": 0.0909, "step": 27500 }, { "epoch": 0.2939259575832042, "grad_norm": 3.510251045227051, "learning_rate": 9.980177314034112e-07, "loss": 0.0665, "step": 27510 }, { "epoch": 0.29403280089748385, "grad_norm": 6.5326080322265625, "learning_rate": 9.980162365673855e-07, "loss": 0.068, "step": 27520 }, { "epoch": 0.2941396442117635, "grad_norm": 4.728061199188232, "learning_rate": 9.980147411690619e-07, "loss": 0.1147, "step": 27530 }, { "epoch": 0.29424648752604304, "grad_norm": 0.0609247162938118, "learning_rate": 9.98013245208442e-07, "loss": 0.105, "step": 27540 }, { "epoch": 0.29435333084032267, "grad_norm": 21.297771453857422, "learning_rate": 9.980117486855275e-07, "loss": 0.141, "step": 27550 }, { "epoch": 0.2944601741546023, "grad_norm": 7.82897424697876, "learning_rate": 9.980102516003205e-07, "loss": 0.0955, "step": 27560 }, { "epoch": 0.29456701746888186, "grad_norm": 1.9953221082687378, "learning_rate": 9.98008753952822e-07, "loss": 0.0401, "step": 27570 }, { "epoch": 0.2946738607831615, "grad_norm": 8.628555297851562, "learning_rate": 9.980072557430342e-07, "loss": 0.0504, "step": 27580 }, { "epoch": 0.2947807040974411, "grad_norm": 13.312864303588867, "learning_rate": 9.980057569709587e-07, "loss": 0.049, "step": 27590 }, { "epoch": 0.2948875474117207, "grad_norm": 5.9839277267456055, "learning_rate": 9.980042576365968e-07, "loss": 0.1108, "step": 27600 }, { "epoch": 0.2949943907260003, "grad_norm": 15.68162727355957, "learning_rate": 9.980027577399508e-07, "loss": 0.091, "step": 27610 }, { "epoch": 0.29510123404027994, "grad_norm": 15.998839378356934, "learning_rate": 9.980012572810218e-07, "loss": 0.1546, "step": 27620 }, { "epoch": 0.29520807735455956, "grad_norm": 6.296170234680176, "learning_rate": 9.97999756259812e-07, "loss": 0.0911, "step": 27630 }, { "epoch": 0.29531492066883913, "grad_norm": 12.245062828063965, "learning_rate": 9.97998254676323e-07, "loss": 0.1168, "step": 27640 }, { "epoch": 0.29542176398311876, "grad_norm": 0.5443320870399475, "learning_rate": 9.979967525305563e-07, "loss": 0.0579, "step": 27650 }, { "epoch": 0.2955286072973984, "grad_norm": 12.11567497253418, "learning_rate": 9.979952498225135e-07, "loss": 0.0691, "step": 27660 }, { "epoch": 0.29563545061167795, "grad_norm": 1.0560204982757568, "learning_rate": 9.979937465521969e-07, "loss": 0.0362, "step": 27670 }, { "epoch": 0.2957422939259576, "grad_norm": 8.294034957885742, "learning_rate": 9.979922427196073e-07, "loss": 0.0927, "step": 27680 }, { "epoch": 0.2958491372402372, "grad_norm": 8.427302360534668, "learning_rate": 9.97990738324747e-07, "loss": 0.2213, "step": 27690 }, { "epoch": 0.29595598055451683, "grad_norm": 0.11870922893285751, "learning_rate": 9.979892333676178e-07, "loss": 0.0993, "step": 27700 }, { "epoch": 0.2960628238687964, "grad_norm": 6.7048420906066895, "learning_rate": 9.97987727848221e-07, "loss": 0.0848, "step": 27710 }, { "epoch": 0.296169667183076, "grad_norm": 6.534012794494629, "learning_rate": 9.979862217665584e-07, "loss": 0.0586, "step": 27720 }, { "epoch": 0.29627651049735565, "grad_norm": 9.098217964172363, "learning_rate": 9.979847151226319e-07, "loss": 0.1201, "step": 27730 }, { "epoch": 0.2963833538116352, "grad_norm": 5.055124282836914, "learning_rate": 9.97983207916443e-07, "loss": 0.1799, "step": 27740 }, { "epoch": 0.29649019712591484, "grad_norm": 5.044402599334717, "learning_rate": 9.979817001479935e-07, "loss": 0.0759, "step": 27750 }, { "epoch": 0.29659704044019447, "grad_norm": 5.762636184692383, "learning_rate": 9.979801918172852e-07, "loss": 0.0713, "step": 27760 }, { "epoch": 0.29670388375447404, "grad_norm": 4.333587646484375, "learning_rate": 9.979786829243196e-07, "loss": 0.0646, "step": 27770 }, { "epoch": 0.29681072706875367, "grad_norm": 9.909966468811035, "learning_rate": 9.979771734690982e-07, "loss": 0.1055, "step": 27780 }, { "epoch": 0.2969175703830333, "grad_norm": 1.1006776094436646, "learning_rate": 9.979756634516233e-07, "loss": 0.0836, "step": 27790 }, { "epoch": 0.2970244136973129, "grad_norm": 1.1456948518753052, "learning_rate": 9.979741528718962e-07, "loss": 0.0586, "step": 27800 }, { "epoch": 0.2971312570115925, "grad_norm": 0.7701963186264038, "learning_rate": 9.979726417299188e-07, "loss": 0.1463, "step": 27810 }, { "epoch": 0.2972381003258721, "grad_norm": 20.414445877075195, "learning_rate": 9.979711300256925e-07, "loss": 0.0639, "step": 27820 }, { "epoch": 0.29734494364015174, "grad_norm": 11.089432716369629, "learning_rate": 9.979696177592194e-07, "loss": 0.0851, "step": 27830 }, { "epoch": 0.2974517869544313, "grad_norm": 0.075216144323349, "learning_rate": 9.97968104930501e-07, "loss": 0.0393, "step": 27840 }, { "epoch": 0.29755863026871093, "grad_norm": 3.849308729171753, "learning_rate": 9.97966591539539e-07, "loss": 0.0612, "step": 27850 }, { "epoch": 0.29766547358299056, "grad_norm": 1.9808437824249268, "learning_rate": 9.979650775863351e-07, "loss": 0.0688, "step": 27860 }, { "epoch": 0.29777231689727013, "grad_norm": 5.720856189727783, "learning_rate": 9.97963563070891e-07, "loss": 0.0564, "step": 27870 }, { "epoch": 0.29787916021154975, "grad_norm": 24.383968353271484, "learning_rate": 9.979620479932085e-07, "loss": 0.0771, "step": 27880 }, { "epoch": 0.2979860035258294, "grad_norm": 2.253438949584961, "learning_rate": 9.979605323532894e-07, "loss": 0.0788, "step": 27890 }, { "epoch": 0.298092846840109, "grad_norm": 3.020901918411255, "learning_rate": 9.97959016151135e-07, "loss": 0.0834, "step": 27900 }, { "epoch": 0.2981996901543886, "grad_norm": 6.392467975616455, "learning_rate": 9.979574993867475e-07, "loss": 0.0681, "step": 27910 }, { "epoch": 0.2983065334686682, "grad_norm": 0.2744046747684479, "learning_rate": 9.979559820601283e-07, "loss": 0.0628, "step": 27920 }, { "epoch": 0.2984133767829478, "grad_norm": 10.899913787841797, "learning_rate": 9.979544641712792e-07, "loss": 0.0693, "step": 27930 }, { "epoch": 0.2985202200972274, "grad_norm": 5.395066738128662, "learning_rate": 9.979529457202022e-07, "loss": 0.0737, "step": 27940 }, { "epoch": 0.298627063411507, "grad_norm": 9.961792945861816, "learning_rate": 9.979514267068983e-07, "loss": 0.0882, "step": 27950 }, { "epoch": 0.29873390672578665, "grad_norm": 13.164856910705566, "learning_rate": 9.9794990713137e-07, "loss": 0.0757, "step": 27960 }, { "epoch": 0.2988407500400662, "grad_norm": 10.57785415649414, "learning_rate": 9.979483869936185e-07, "loss": 0.1029, "step": 27970 }, { "epoch": 0.29894759335434584, "grad_norm": 0.5997467637062073, "learning_rate": 9.979468662936458e-07, "loss": 0.1264, "step": 27980 }, { "epoch": 0.29905443666862547, "grad_norm": 6.239917278289795, "learning_rate": 9.979453450314534e-07, "loss": 0.0666, "step": 27990 }, { "epoch": 0.2991612799829051, "grad_norm": 10.324417114257812, "learning_rate": 9.979438232070433e-07, "loss": 0.0532, "step": 28000 }, { "epoch": 0.29926812329718466, "grad_norm": 11.12569808959961, "learning_rate": 9.979423008204168e-07, "loss": 0.078, "step": 28010 }, { "epoch": 0.2993749666114643, "grad_norm": 6.583670616149902, "learning_rate": 9.97940777871576e-07, "loss": 0.116, "step": 28020 }, { "epoch": 0.2994818099257439, "grad_norm": 4.892675399780273, "learning_rate": 9.979392543605223e-07, "loss": 0.117, "step": 28030 }, { "epoch": 0.2995886532400235, "grad_norm": 3.7192325592041016, "learning_rate": 9.979377302872577e-07, "loss": 0.0486, "step": 28040 }, { "epoch": 0.2996954965543031, "grad_norm": 5.5892181396484375, "learning_rate": 9.97936205651784e-07, "loss": 0.0497, "step": 28050 }, { "epoch": 0.29980233986858273, "grad_norm": 4.3137898445129395, "learning_rate": 9.979346804541026e-07, "loss": 0.0672, "step": 28060 }, { "epoch": 0.29990918318286236, "grad_norm": 6.18864107131958, "learning_rate": 9.979331546942153e-07, "loss": 0.096, "step": 28070 }, { "epoch": 0.30001602649714193, "grad_norm": 7.767447471618652, "learning_rate": 9.97931628372124e-07, "loss": 0.0466, "step": 28080 }, { "epoch": 0.30012286981142156, "grad_norm": 5.574315071105957, "learning_rate": 9.979301014878302e-07, "loss": 0.0145, "step": 28090 }, { "epoch": 0.3002297131257012, "grad_norm": 1.8184000253677368, "learning_rate": 9.97928574041336e-07, "loss": 0.0513, "step": 28100 }, { "epoch": 0.30033655643998075, "grad_norm": 0.930545449256897, "learning_rate": 9.979270460326426e-07, "loss": 0.1182, "step": 28110 }, { "epoch": 0.3004433997542604, "grad_norm": 0.25873231887817383, "learning_rate": 9.97925517461752e-07, "loss": 0.1214, "step": 28120 }, { "epoch": 0.30055024306854, "grad_norm": 4.4222822189331055, "learning_rate": 9.979239883286661e-07, "loss": 0.0942, "step": 28130 }, { "epoch": 0.30065708638281957, "grad_norm": 0.7873660326004028, "learning_rate": 9.979224586333864e-07, "loss": 0.0295, "step": 28140 }, { "epoch": 0.3007639296970992, "grad_norm": 5.1706695556640625, "learning_rate": 9.979209283759144e-07, "loss": 0.1012, "step": 28150 }, { "epoch": 0.3008707730113788, "grad_norm": 11.913961410522461, "learning_rate": 9.979193975562522e-07, "loss": 0.0399, "step": 28160 }, { "epoch": 0.30097761632565845, "grad_norm": 1.116402268409729, "learning_rate": 9.979178661744016e-07, "loss": 0.0968, "step": 28170 }, { "epoch": 0.301084459639938, "grad_norm": 12.583025932312012, "learning_rate": 9.979163342303641e-07, "loss": 0.1575, "step": 28180 }, { "epoch": 0.30119130295421764, "grad_norm": 4.64235782623291, "learning_rate": 9.979148017241414e-07, "loss": 0.0832, "step": 28190 }, { "epoch": 0.30129814626849727, "grad_norm": 4.612027645111084, "learning_rate": 9.979132686557353e-07, "loss": 0.0791, "step": 28200 }, { "epoch": 0.30140498958277684, "grad_norm": 8.700075149536133, "learning_rate": 9.979117350251477e-07, "loss": 0.067, "step": 28210 }, { "epoch": 0.30151183289705646, "grad_norm": 7.562619686126709, "learning_rate": 9.979102008323799e-07, "loss": 0.0801, "step": 28220 }, { "epoch": 0.3016186762113361, "grad_norm": 4.672749042510986, "learning_rate": 9.979086660774342e-07, "loss": 0.106, "step": 28230 }, { "epoch": 0.30172551952561566, "grad_norm": 8.815134048461914, "learning_rate": 9.979071307603118e-07, "loss": 0.0751, "step": 28240 }, { "epoch": 0.3018323628398953, "grad_norm": 10.415574073791504, "learning_rate": 9.979055948810146e-07, "loss": 0.0523, "step": 28250 }, { "epoch": 0.3019392061541749, "grad_norm": 8.639555931091309, "learning_rate": 9.979040584395446e-07, "loss": 0.089, "step": 28260 }, { "epoch": 0.30204604946845454, "grad_norm": 31.12871551513672, "learning_rate": 9.979025214359034e-07, "loss": 0.0969, "step": 28270 }, { "epoch": 0.3021528927827341, "grad_norm": 1.091300368309021, "learning_rate": 9.979009838700924e-07, "loss": 0.0752, "step": 28280 }, { "epoch": 0.30225973609701373, "grad_norm": 6.48581075668335, "learning_rate": 9.97899445742114e-07, "loss": 0.0713, "step": 28290 }, { "epoch": 0.30236657941129336, "grad_norm": 0.14184069633483887, "learning_rate": 9.978979070519692e-07, "loss": 0.1693, "step": 28300 }, { "epoch": 0.3024734227255729, "grad_norm": 0.08249267935752869, "learning_rate": 9.978963677996602e-07, "loss": 0.0506, "step": 28310 }, { "epoch": 0.30258026603985255, "grad_norm": 3.99629282951355, "learning_rate": 9.978948279851887e-07, "loss": 0.0509, "step": 28320 }, { "epoch": 0.3026871093541322, "grad_norm": 8.38998031616211, "learning_rate": 9.978932876085562e-07, "loss": 0.1231, "step": 28330 }, { "epoch": 0.30279395266841175, "grad_norm": 2.2400124073028564, "learning_rate": 9.978917466697648e-07, "loss": 0.0438, "step": 28340 }, { "epoch": 0.3029007959826914, "grad_norm": 8.808452606201172, "learning_rate": 9.978902051688157e-07, "loss": 0.0551, "step": 28350 }, { "epoch": 0.303007639296971, "grad_norm": 8.868367195129395, "learning_rate": 9.978886631057115e-07, "loss": 0.063, "step": 28360 }, { "epoch": 0.3031144826112506, "grad_norm": 11.864962577819824, "learning_rate": 9.97887120480453e-07, "loss": 0.1024, "step": 28370 }, { "epoch": 0.3032213259255302, "grad_norm": 12.276761054992676, "learning_rate": 9.978855772930426e-07, "loss": 0.0568, "step": 28380 }, { "epoch": 0.3033281692398098, "grad_norm": 1.2220734357833862, "learning_rate": 9.978840335434818e-07, "loss": 0.0471, "step": 28390 }, { "epoch": 0.30343501255408944, "grad_norm": 22.099952697753906, "learning_rate": 9.97882489231772e-07, "loss": 0.1402, "step": 28400 }, { "epoch": 0.303541855868369, "grad_norm": 22.527082443237305, "learning_rate": 9.978809443579156e-07, "loss": 0.0687, "step": 28410 }, { "epoch": 0.30364869918264864, "grad_norm": 8.22404956817627, "learning_rate": 9.97879398921914e-07, "loss": 0.0424, "step": 28420 }, { "epoch": 0.30375554249692827, "grad_norm": 7.2088623046875, "learning_rate": 9.97877852923769e-07, "loss": 0.0997, "step": 28430 }, { "epoch": 0.3038623858112079, "grad_norm": 21.01363754272461, "learning_rate": 9.978763063634822e-07, "loss": 0.0832, "step": 28440 }, { "epoch": 0.30396922912548746, "grad_norm": 2.6954686641693115, "learning_rate": 9.978747592410557e-07, "loss": 0.0649, "step": 28450 }, { "epoch": 0.3040760724397671, "grad_norm": 10.664271354675293, "learning_rate": 9.97873211556491e-07, "loss": 0.0534, "step": 28460 }, { "epoch": 0.3041829157540467, "grad_norm": 1.3537225723266602, "learning_rate": 9.978716633097896e-07, "loss": 0.1118, "step": 28470 }, { "epoch": 0.3042897590683263, "grad_norm": 21.088502883911133, "learning_rate": 9.978701145009537e-07, "loss": 0.2039, "step": 28480 }, { "epoch": 0.3043966023826059, "grad_norm": 1.30097496509552, "learning_rate": 9.978685651299848e-07, "loss": 0.0918, "step": 28490 }, { "epoch": 0.30450344569688553, "grad_norm": 6.455030918121338, "learning_rate": 9.978670151968849e-07, "loss": 0.0847, "step": 28500 }, { "epoch": 0.3046102890111651, "grad_norm": 7.565269470214844, "learning_rate": 9.978654647016553e-07, "loss": 0.0708, "step": 28510 }, { "epoch": 0.30471713232544473, "grad_norm": 8.158624649047852, "learning_rate": 9.97863913644298e-07, "loss": 0.1078, "step": 28520 }, { "epoch": 0.30482397563972435, "grad_norm": 2.7800540924072266, "learning_rate": 9.97862362024815e-07, "loss": 0.0965, "step": 28530 }, { "epoch": 0.304930818954004, "grad_norm": 7.123230934143066, "learning_rate": 9.978608098432078e-07, "loss": 0.1378, "step": 28540 }, { "epoch": 0.30503766226828355, "grad_norm": 7.351923942565918, "learning_rate": 9.978592570994782e-07, "loss": 0.0787, "step": 28550 }, { "epoch": 0.3051445055825632, "grad_norm": 5.113492012023926, "learning_rate": 9.978577037936277e-07, "loss": 0.0392, "step": 28560 }, { "epoch": 0.3052513488968428, "grad_norm": 2.6008927822113037, "learning_rate": 9.978561499256584e-07, "loss": 0.0804, "step": 28570 }, { "epoch": 0.30535819221112237, "grad_norm": 5.985692024230957, "learning_rate": 9.97854595495572e-07, "loss": 0.0258, "step": 28580 }, { "epoch": 0.305465035525402, "grad_norm": 2.767453670501709, "learning_rate": 9.9785304050337e-07, "loss": 0.0465, "step": 28590 }, { "epoch": 0.3055718788396816, "grad_norm": 0.3559315502643585, "learning_rate": 9.978514849490545e-07, "loss": 0.0545, "step": 28600 }, { "epoch": 0.3056787221539612, "grad_norm": 13.12157154083252, "learning_rate": 9.978499288326269e-07, "loss": 0.0553, "step": 28610 }, { "epoch": 0.3057855654682408, "grad_norm": 3.5919172763824463, "learning_rate": 9.978483721540894e-07, "loss": 0.0666, "step": 28620 }, { "epoch": 0.30589240878252044, "grad_norm": 13.184812545776367, "learning_rate": 9.978468149134433e-07, "loss": 0.0527, "step": 28630 }, { "epoch": 0.30599925209680007, "grad_norm": 4.782997131347656, "learning_rate": 9.978452571106909e-07, "loss": 0.0825, "step": 28640 }, { "epoch": 0.30610609541107964, "grad_norm": 3.7657644748687744, "learning_rate": 9.978436987458334e-07, "loss": 0.0684, "step": 28650 }, { "epoch": 0.30621293872535926, "grad_norm": 10.22922134399414, "learning_rate": 9.978421398188728e-07, "loss": 0.0821, "step": 28660 }, { "epoch": 0.3063197820396389, "grad_norm": 1.1607067584991455, "learning_rate": 9.97840580329811e-07, "loss": 0.055, "step": 28670 }, { "epoch": 0.30642662535391846, "grad_norm": 4.129158020019531, "learning_rate": 9.978390202786494e-07, "loss": 0.105, "step": 28680 }, { "epoch": 0.3065334686681981, "grad_norm": 3.2028696537017822, "learning_rate": 9.9783745966539e-07, "loss": 0.0889, "step": 28690 }, { "epoch": 0.3066403119824777, "grad_norm": 3.5041749477386475, "learning_rate": 9.978358984900346e-07, "loss": 0.0909, "step": 28700 }, { "epoch": 0.3067471552967573, "grad_norm": 4.3291449546813965, "learning_rate": 9.978343367525848e-07, "loss": 0.0719, "step": 28710 }, { "epoch": 0.3068539986110369, "grad_norm": 8.713674545288086, "learning_rate": 9.978327744530427e-07, "loss": 0.0653, "step": 28720 }, { "epoch": 0.30696084192531653, "grad_norm": 4.8611063957214355, "learning_rate": 9.978312115914094e-07, "loss": 0.0538, "step": 28730 }, { "epoch": 0.30706768523959616, "grad_norm": 12.400928497314453, "learning_rate": 9.978296481676875e-07, "loss": 0.1196, "step": 28740 }, { "epoch": 0.3071745285538757, "grad_norm": 3.6354238986968994, "learning_rate": 9.978280841818781e-07, "loss": 0.0626, "step": 28750 }, { "epoch": 0.30728137186815535, "grad_norm": 7.126413345336914, "learning_rate": 9.978265196339834e-07, "loss": 0.1563, "step": 28760 }, { "epoch": 0.307388215182435, "grad_norm": 3.678448438644409, "learning_rate": 9.97824954524005e-07, "loss": 0.0939, "step": 28770 }, { "epoch": 0.30749505849671455, "grad_norm": 2.838298797607422, "learning_rate": 9.978233888519446e-07, "loss": 0.1001, "step": 28780 }, { "epoch": 0.30760190181099417, "grad_norm": 6.237448215484619, "learning_rate": 9.97821822617804e-07, "loss": 0.1162, "step": 28790 }, { "epoch": 0.3077087451252738, "grad_norm": 0.29147860407829285, "learning_rate": 9.978202558215848e-07, "loss": 0.0563, "step": 28800 }, { "epoch": 0.3078155884395534, "grad_norm": 0.29588207602500916, "learning_rate": 9.978186884632891e-07, "loss": 0.1164, "step": 28810 }, { "epoch": 0.307922431753833, "grad_norm": 9.674430847167969, "learning_rate": 9.978171205429187e-07, "loss": 0.1219, "step": 28820 }, { "epoch": 0.3080292750681126, "grad_norm": 7.270303249359131, "learning_rate": 9.97815552060475e-07, "loss": 0.0351, "step": 28830 }, { "epoch": 0.30813611838239224, "grad_norm": 5.157218933105469, "learning_rate": 9.9781398301596e-07, "loss": 0.111, "step": 28840 }, { "epoch": 0.3082429616966718, "grad_norm": 9.36655330657959, "learning_rate": 9.978124134093755e-07, "loss": 0.0818, "step": 28850 }, { "epoch": 0.30834980501095144, "grad_norm": 43.704254150390625, "learning_rate": 9.978108432407232e-07, "loss": 0.1541, "step": 28860 }, { "epoch": 0.30845664832523106, "grad_norm": 5.638408184051514, "learning_rate": 9.978092725100049e-07, "loss": 0.0705, "step": 28870 }, { "epoch": 0.30856349163951063, "grad_norm": 5.305511951446533, "learning_rate": 9.978077012172223e-07, "loss": 0.086, "step": 28880 }, { "epoch": 0.30867033495379026, "grad_norm": 0.8999250531196594, "learning_rate": 9.978061293623773e-07, "loss": 0.0437, "step": 28890 }, { "epoch": 0.3087771782680699, "grad_norm": 5.905198574066162, "learning_rate": 9.978045569454715e-07, "loss": 0.0797, "step": 28900 }, { "epoch": 0.3088840215823495, "grad_norm": 1.502103328704834, "learning_rate": 9.97802983966507e-07, "loss": 0.0714, "step": 28910 }, { "epoch": 0.3089908648966291, "grad_norm": 6.7707414627075195, "learning_rate": 9.97801410425485e-07, "loss": 0.0329, "step": 28920 }, { "epoch": 0.3090977082109087, "grad_norm": 6.583604335784912, "learning_rate": 9.97799836322408e-07, "loss": 0.0703, "step": 28930 }, { "epoch": 0.30920455152518833, "grad_norm": 5.914343357086182, "learning_rate": 9.977982616572774e-07, "loss": 0.0984, "step": 28940 }, { "epoch": 0.3093113948394679, "grad_norm": 8.727392196655273, "learning_rate": 9.977966864300948e-07, "loss": 0.1142, "step": 28950 }, { "epoch": 0.3094182381537475, "grad_norm": 0.9049507975578308, "learning_rate": 9.977951106408622e-07, "loss": 0.0454, "step": 28960 }, { "epoch": 0.30952508146802715, "grad_norm": 2.1553421020507812, "learning_rate": 9.977935342895813e-07, "loss": 0.0802, "step": 28970 }, { "epoch": 0.3096319247823067, "grad_norm": 9.853100776672363, "learning_rate": 9.97791957376254e-07, "loss": 0.0408, "step": 28980 }, { "epoch": 0.30973876809658635, "grad_norm": 0.034955888986587524, "learning_rate": 9.977903799008822e-07, "loss": 0.0857, "step": 28990 }, { "epoch": 0.309845611410866, "grad_norm": 5.04058313369751, "learning_rate": 9.977888018634672e-07, "loss": 0.0513, "step": 29000 }, { "epoch": 0.3099524547251456, "grad_norm": 9.921769142150879, "learning_rate": 9.977872232640112e-07, "loss": 0.0612, "step": 29010 }, { "epoch": 0.31005929803942517, "grad_norm": 9.330183982849121, "learning_rate": 9.977856441025158e-07, "loss": 0.0416, "step": 29020 }, { "epoch": 0.3101661413537048, "grad_norm": 6.087348461151123, "learning_rate": 9.97784064378983e-07, "loss": 0.1409, "step": 29030 }, { "epoch": 0.3102729846679844, "grad_norm": 1.0116819143295288, "learning_rate": 9.977824840934142e-07, "loss": 0.0968, "step": 29040 }, { "epoch": 0.310379827982264, "grad_norm": 8.92811107635498, "learning_rate": 9.977809032458116e-07, "loss": 0.0477, "step": 29050 }, { "epoch": 0.3104866712965436, "grad_norm": 4.512068748474121, "learning_rate": 9.977793218361766e-07, "loss": 0.0816, "step": 29060 }, { "epoch": 0.31059351461082324, "grad_norm": 0.5644201636314392, "learning_rate": 9.977777398645113e-07, "loss": 0.0782, "step": 29070 }, { "epoch": 0.3107003579251028, "grad_norm": 7.307891368865967, "learning_rate": 9.977761573308174e-07, "loss": 0.0983, "step": 29080 }, { "epoch": 0.31080720123938244, "grad_norm": 11.253081321716309, "learning_rate": 9.977745742350966e-07, "loss": 0.0757, "step": 29090 }, { "epoch": 0.31091404455366206, "grad_norm": 1.0707584619522095, "learning_rate": 9.977729905773506e-07, "loss": 0.2116, "step": 29100 }, { "epoch": 0.3110208878679417, "grad_norm": 3.1594512462615967, "learning_rate": 9.977714063575816e-07, "loss": 0.0629, "step": 29110 }, { "epoch": 0.31112773118222126, "grad_norm": 6.095585346221924, "learning_rate": 9.977698215757909e-07, "loss": 0.1354, "step": 29120 }, { "epoch": 0.3112345744965009, "grad_norm": 5.843634605407715, "learning_rate": 9.977682362319805e-07, "loss": 0.1525, "step": 29130 }, { "epoch": 0.3113414178107805, "grad_norm": 4.048203945159912, "learning_rate": 9.977666503261523e-07, "loss": 0.0673, "step": 29140 }, { "epoch": 0.3114482611250601, "grad_norm": 11.545858383178711, "learning_rate": 9.97765063858308e-07, "loss": 0.0397, "step": 29150 }, { "epoch": 0.3115551044393397, "grad_norm": 15.010643005371094, "learning_rate": 9.977634768284493e-07, "loss": 0.1108, "step": 29160 }, { "epoch": 0.31166194775361933, "grad_norm": 10.1986083984375, "learning_rate": 9.977618892365781e-07, "loss": 0.0429, "step": 29170 }, { "epoch": 0.31176879106789895, "grad_norm": 5.922360897064209, "learning_rate": 9.977603010826961e-07, "loss": 0.1238, "step": 29180 }, { "epoch": 0.3118756343821785, "grad_norm": 2.0089151859283447, "learning_rate": 9.977587123668052e-07, "loss": 0.052, "step": 29190 }, { "epoch": 0.31198247769645815, "grad_norm": 0.14378847181797028, "learning_rate": 9.977571230889073e-07, "loss": 0.0777, "step": 29200 }, { "epoch": 0.3120893210107378, "grad_norm": 4.04680061340332, "learning_rate": 9.977555332490037e-07, "loss": 0.0871, "step": 29210 }, { "epoch": 0.31219616432501734, "grad_norm": 6.8911919593811035, "learning_rate": 9.97753942847097e-07, "loss": 0.0393, "step": 29220 }, { "epoch": 0.31230300763929697, "grad_norm": 4.201440334320068, "learning_rate": 9.97752351883188e-07, "loss": 0.0545, "step": 29230 }, { "epoch": 0.3124098509535766, "grad_norm": 6.387643337249756, "learning_rate": 9.977507603572792e-07, "loss": 0.1107, "step": 29240 }, { "epoch": 0.31251669426785617, "grad_norm": 4.174589157104492, "learning_rate": 9.977491682693724e-07, "loss": 0.0806, "step": 29250 }, { "epoch": 0.3126235375821358, "grad_norm": 6.048783779144287, "learning_rate": 9.977475756194692e-07, "loss": 0.1409, "step": 29260 }, { "epoch": 0.3127303808964154, "grad_norm": 4.021758079528809, "learning_rate": 9.977459824075714e-07, "loss": 0.1144, "step": 29270 }, { "epoch": 0.31283722421069504, "grad_norm": 2.791282892227173, "learning_rate": 9.977443886336807e-07, "loss": 0.1024, "step": 29280 }, { "epoch": 0.3129440675249746, "grad_norm": 4.488757133483887, "learning_rate": 9.97742794297799e-07, "loss": 0.0616, "step": 29290 }, { "epoch": 0.31305091083925424, "grad_norm": 9.365114212036133, "learning_rate": 9.977411993999283e-07, "loss": 0.1442, "step": 29300 }, { "epoch": 0.31315775415353386, "grad_norm": 24.96857452392578, "learning_rate": 9.9773960394007e-07, "loss": 0.061, "step": 29310 }, { "epoch": 0.31326459746781343, "grad_norm": 5.349048137664795, "learning_rate": 9.977380079182264e-07, "loss": 0.0967, "step": 29320 }, { "epoch": 0.31337144078209306, "grad_norm": 13.89199447631836, "learning_rate": 9.97736411334399e-07, "loss": 0.0976, "step": 29330 }, { "epoch": 0.3134782840963727, "grad_norm": 4.905890941619873, "learning_rate": 9.977348141885892e-07, "loss": 0.1251, "step": 29340 }, { "epoch": 0.31358512741065225, "grad_norm": 7.566993236541748, "learning_rate": 9.977332164807996e-07, "loss": 0.0616, "step": 29350 }, { "epoch": 0.3136919707249319, "grad_norm": 2.0137369632720947, "learning_rate": 9.977316182110316e-07, "loss": 0.064, "step": 29360 }, { "epoch": 0.3137988140392115, "grad_norm": 1.7982041835784912, "learning_rate": 9.97730019379287e-07, "loss": 0.1039, "step": 29370 }, { "epoch": 0.31390565735349113, "grad_norm": 4.3855671882629395, "learning_rate": 9.977284199855676e-07, "loss": 0.0769, "step": 29380 }, { "epoch": 0.3140125006677707, "grad_norm": 9.23268985748291, "learning_rate": 9.977268200298754e-07, "loss": 0.1158, "step": 29390 }, { "epoch": 0.3141193439820503, "grad_norm": 3.175793170928955, "learning_rate": 9.97725219512212e-07, "loss": 0.0634, "step": 29400 }, { "epoch": 0.31422618729632995, "grad_norm": 1.743062973022461, "learning_rate": 9.97723618432579e-07, "loss": 0.0852, "step": 29410 }, { "epoch": 0.3143330306106095, "grad_norm": 7.467675685882568, "learning_rate": 9.977220167909789e-07, "loss": 0.1052, "step": 29420 }, { "epoch": 0.31443987392488915, "grad_norm": 0.7323868870735168, "learning_rate": 9.977204145874128e-07, "loss": 0.0886, "step": 29430 }, { "epoch": 0.31454671723916877, "grad_norm": 3.6467602252960205, "learning_rate": 9.977188118218826e-07, "loss": 0.1076, "step": 29440 }, { "epoch": 0.31465356055344834, "grad_norm": 2.9597814083099365, "learning_rate": 9.977172084943906e-07, "loss": 0.037, "step": 29450 }, { "epoch": 0.31476040386772797, "grad_norm": 4.869311809539795, "learning_rate": 9.977156046049383e-07, "loss": 0.0413, "step": 29460 }, { "epoch": 0.3148672471820076, "grad_norm": 8.160491943359375, "learning_rate": 9.977140001535274e-07, "loss": 0.1111, "step": 29470 }, { "epoch": 0.3149740904962872, "grad_norm": 4.857499122619629, "learning_rate": 9.9771239514016e-07, "loss": 0.0884, "step": 29480 }, { "epoch": 0.3150809338105668, "grad_norm": 4.512084484100342, "learning_rate": 9.977107895648376e-07, "loss": 0.0981, "step": 29490 }, { "epoch": 0.3151877771248464, "grad_norm": 3.7441422939300537, "learning_rate": 9.97709183427562e-07, "loss": 0.1397, "step": 29500 }, { "epoch": 0.31529462043912604, "grad_norm": 1.2644370794296265, "learning_rate": 9.977075767283354e-07, "loss": 0.0645, "step": 29510 }, { "epoch": 0.3154014637534056, "grad_norm": 13.147141456604004, "learning_rate": 9.977059694671594e-07, "loss": 0.0467, "step": 29520 }, { "epoch": 0.31550830706768523, "grad_norm": 21.896472930908203, "learning_rate": 9.977043616440358e-07, "loss": 0.1426, "step": 29530 }, { "epoch": 0.31561515038196486, "grad_norm": 2.6480400562286377, "learning_rate": 9.977027532589663e-07, "loss": 0.0513, "step": 29540 }, { "epoch": 0.3157219936962445, "grad_norm": 12.523386001586914, "learning_rate": 9.97701144311953e-07, "loss": 0.1162, "step": 29550 }, { "epoch": 0.31582883701052406, "grad_norm": 6.351444244384766, "learning_rate": 9.976995348029972e-07, "loss": 0.0586, "step": 29560 }, { "epoch": 0.3159356803248037, "grad_norm": 8.514094352722168, "learning_rate": 9.976979247321013e-07, "loss": 0.0863, "step": 29570 }, { "epoch": 0.3160425236390833, "grad_norm": 0.46667206287384033, "learning_rate": 9.97696314099267e-07, "loss": 0.0625, "step": 29580 }, { "epoch": 0.3161493669533629, "grad_norm": 10.233365058898926, "learning_rate": 9.97694702904496e-07, "loss": 0.052, "step": 29590 }, { "epoch": 0.3162562102676425, "grad_norm": 7.241458892822266, "learning_rate": 9.9769309114779e-07, "loss": 0.0573, "step": 29600 }, { "epoch": 0.3163630535819221, "grad_norm": 9.196579933166504, "learning_rate": 9.976914788291508e-07, "loss": 0.1513, "step": 29610 }, { "epoch": 0.3164698968962017, "grad_norm": 4.4914655685424805, "learning_rate": 9.976898659485803e-07, "loss": 0.0901, "step": 29620 }, { "epoch": 0.3165767402104813, "grad_norm": 1.0324344635009766, "learning_rate": 9.976882525060805e-07, "loss": 0.0576, "step": 29630 }, { "epoch": 0.31668358352476095, "grad_norm": 0.5113873481750488, "learning_rate": 9.976866385016534e-07, "loss": 0.1083, "step": 29640 }, { "epoch": 0.3167904268390406, "grad_norm": 3.5464529991149902, "learning_rate": 9.976850239353001e-07, "loss": 0.0759, "step": 29650 }, { "epoch": 0.31689727015332014, "grad_norm": 1.0992178916931152, "learning_rate": 9.97683408807023e-07, "loss": 0.0639, "step": 29660 }, { "epoch": 0.31700411346759977, "grad_norm": 7.894894123077393, "learning_rate": 9.97681793116824e-07, "loss": 0.0627, "step": 29670 }, { "epoch": 0.3171109567818794, "grad_norm": 3.5159058570861816, "learning_rate": 9.976801768647045e-07, "loss": 0.0767, "step": 29680 }, { "epoch": 0.31721780009615896, "grad_norm": 6.70307731628418, "learning_rate": 9.976785600506663e-07, "loss": 0.0599, "step": 29690 }, { "epoch": 0.3173246434104386, "grad_norm": 5.132009983062744, "learning_rate": 9.976769426747117e-07, "loss": 0.1027, "step": 29700 }, { "epoch": 0.3174314867247182, "grad_norm": 11.322327613830566, "learning_rate": 9.976753247368423e-07, "loss": 0.1013, "step": 29710 }, { "epoch": 0.3175383300389978, "grad_norm": 15.469308853149414, "learning_rate": 9.976737062370596e-07, "loss": 0.12, "step": 29720 }, { "epoch": 0.3176451733532774, "grad_norm": 2.469942331314087, "learning_rate": 9.976720871753662e-07, "loss": 0.0234, "step": 29730 }, { "epoch": 0.31775201666755704, "grad_norm": 4.212401390075684, "learning_rate": 9.97670467551763e-07, "loss": 0.0517, "step": 29740 }, { "epoch": 0.31785885998183666, "grad_norm": 2.350952625274658, "learning_rate": 9.976688473662526e-07, "loss": 0.0802, "step": 29750 }, { "epoch": 0.31796570329611623, "grad_norm": 9.477609634399414, "learning_rate": 9.976672266188365e-07, "loss": 0.0691, "step": 29760 }, { "epoch": 0.31807254661039586, "grad_norm": 1.1444613933563232, "learning_rate": 9.976656053095163e-07, "loss": 0.1201, "step": 29770 }, { "epoch": 0.3181793899246755, "grad_norm": 11.75113582611084, "learning_rate": 9.976639834382941e-07, "loss": 0.0675, "step": 29780 }, { "epoch": 0.31828623323895505, "grad_norm": 4.594038486480713, "learning_rate": 9.97662361005172e-07, "loss": 0.0526, "step": 29790 }, { "epoch": 0.3183930765532347, "grad_norm": 26.703508377075195, "learning_rate": 9.976607380101511e-07, "loss": 0.082, "step": 29800 }, { "epoch": 0.3184999198675143, "grad_norm": 0.8482630252838135, "learning_rate": 9.976591144532341e-07, "loss": 0.1092, "step": 29810 }, { "epoch": 0.3186067631817939, "grad_norm": 11.479350090026855, "learning_rate": 9.976574903344221e-07, "loss": 0.091, "step": 29820 }, { "epoch": 0.3187136064960735, "grad_norm": 6.2967915534973145, "learning_rate": 9.976558656537172e-07, "loss": 0.0845, "step": 29830 }, { "epoch": 0.3188204498103531, "grad_norm": 9.876012802124023, "learning_rate": 9.976542404111216e-07, "loss": 0.0539, "step": 29840 }, { "epoch": 0.31892729312463275, "grad_norm": 5.7048234939575195, "learning_rate": 9.976526146066366e-07, "loss": 0.0703, "step": 29850 }, { "epoch": 0.3190341364389123, "grad_norm": 0.3765224516391754, "learning_rate": 9.976509882402642e-07, "loss": 0.0821, "step": 29860 }, { "epoch": 0.31914097975319194, "grad_norm": 2.3763067722320557, "learning_rate": 9.976493613120063e-07, "loss": 0.0393, "step": 29870 }, { "epoch": 0.31924782306747157, "grad_norm": 9.43361759185791, "learning_rate": 9.976477338218647e-07, "loss": 0.1379, "step": 29880 }, { "epoch": 0.31935466638175114, "grad_norm": 8.108382225036621, "learning_rate": 9.976461057698414e-07, "loss": 0.0812, "step": 29890 }, { "epoch": 0.31946150969603077, "grad_norm": 0.9788596034049988, "learning_rate": 9.976444771559378e-07, "loss": 0.1137, "step": 29900 }, { "epoch": 0.3195683530103104, "grad_norm": 16.665130615234375, "learning_rate": 9.976428479801561e-07, "loss": 0.1448, "step": 29910 }, { "epoch": 0.31967519632459, "grad_norm": 2.704075813293457, "learning_rate": 9.976412182424983e-07, "loss": 0.0902, "step": 29920 }, { "epoch": 0.3197820396388696, "grad_norm": 7.066327095031738, "learning_rate": 9.97639587942966e-07, "loss": 0.1112, "step": 29930 }, { "epoch": 0.3198888829531492, "grad_norm": 11.258089065551758, "learning_rate": 9.976379570815607e-07, "loss": 0.0792, "step": 29940 }, { "epoch": 0.31999572626742884, "grad_norm": 18.6341609954834, "learning_rate": 9.97636325658285e-07, "loss": 0.0484, "step": 29950 }, { "epoch": 0.3201025695817084, "grad_norm": 8.001336097717285, "learning_rate": 9.9763469367314e-07, "loss": 0.0438, "step": 29960 }, { "epoch": 0.32020941289598803, "grad_norm": 13.283615112304688, "learning_rate": 9.97633061126128e-07, "loss": 0.0459, "step": 29970 }, { "epoch": 0.32031625621026766, "grad_norm": 1.680458664894104, "learning_rate": 9.976314280172507e-07, "loss": 0.0469, "step": 29980 }, { "epoch": 0.32042309952454723, "grad_norm": 0.9886888861656189, "learning_rate": 9.9762979434651e-07, "loss": 0.1301, "step": 29990 }, { "epoch": 0.32052994283882685, "grad_norm": 8.421693801879883, "learning_rate": 9.976281601139078e-07, "loss": 0.046, "step": 30000 }, { "epoch": 0.3206367861531065, "grad_norm": 4.007447242736816, "learning_rate": 9.976265253194455e-07, "loss": 0.0416, "step": 30010 }, { "epoch": 0.3207436294673861, "grad_norm": 0.32728636264801025, "learning_rate": 9.976248899631258e-07, "loss": 0.0259, "step": 30020 }, { "epoch": 0.3208504727816657, "grad_norm": 12.909259796142578, "learning_rate": 9.976232540449496e-07, "loss": 0.0526, "step": 30030 }, { "epoch": 0.3209573160959453, "grad_norm": 5.413796424865723, "learning_rate": 9.976216175649195e-07, "loss": 0.0865, "step": 30040 }, { "epoch": 0.3210641594102249, "grad_norm": 14.763965606689453, "learning_rate": 9.976199805230367e-07, "loss": 0.0836, "step": 30050 }, { "epoch": 0.3211710027245045, "grad_norm": 2.8538448810577393, "learning_rate": 9.976183429193037e-07, "loss": 0.0515, "step": 30060 }, { "epoch": 0.3212778460387841, "grad_norm": 5.337742805480957, "learning_rate": 9.97616704753722e-07, "loss": 0.0943, "step": 30070 }, { "epoch": 0.32138468935306375, "grad_norm": 7.270519733428955, "learning_rate": 9.976150660262932e-07, "loss": 0.0734, "step": 30080 }, { "epoch": 0.3214915326673433, "grad_norm": 7.568999767303467, "learning_rate": 9.976134267370197e-07, "loss": 0.1986, "step": 30090 }, { "epoch": 0.32159837598162294, "grad_norm": 8.132533073425293, "learning_rate": 9.97611786885903e-07, "loss": 0.085, "step": 30100 }, { "epoch": 0.32170521929590257, "grad_norm": 7.714494228363037, "learning_rate": 9.97610146472945e-07, "loss": 0.0834, "step": 30110 }, { "epoch": 0.3218120626101822, "grad_norm": 1.7845940589904785, "learning_rate": 9.976085054981477e-07, "loss": 0.0755, "step": 30120 }, { "epoch": 0.32191890592446176, "grad_norm": 5.413444519042969, "learning_rate": 9.976068639615126e-07, "loss": 0.0387, "step": 30130 }, { "epoch": 0.3220257492387414, "grad_norm": 7.633319854736328, "learning_rate": 9.976052218630423e-07, "loss": 0.0603, "step": 30140 }, { "epoch": 0.322132592553021, "grad_norm": 5.463374137878418, "learning_rate": 9.976035792027377e-07, "loss": 0.0768, "step": 30150 }, { "epoch": 0.3222394358673006, "grad_norm": 13.380141258239746, "learning_rate": 9.97601935980601e-07, "loss": 0.1351, "step": 30160 }, { "epoch": 0.3223462791815802, "grad_norm": 4.204174518585205, "learning_rate": 9.976002921966345e-07, "loss": 0.072, "step": 30170 }, { "epoch": 0.32245312249585983, "grad_norm": 4.559765338897705, "learning_rate": 9.975986478508398e-07, "loss": 0.1234, "step": 30180 }, { "epoch": 0.3225599658101394, "grad_norm": 8.371955871582031, "learning_rate": 9.975970029432183e-07, "loss": 0.0457, "step": 30190 }, { "epoch": 0.32266680912441903, "grad_norm": 25.126874923706055, "learning_rate": 9.975953574737724e-07, "loss": 0.1537, "step": 30200 }, { "epoch": 0.32277365243869865, "grad_norm": 1.5320472717285156, "learning_rate": 9.975937114425037e-07, "loss": 0.0555, "step": 30210 }, { "epoch": 0.3228804957529783, "grad_norm": 6.720611095428467, "learning_rate": 9.975920648494144e-07, "loss": 0.0612, "step": 30220 }, { "epoch": 0.32298733906725785, "grad_norm": 2.4147236347198486, "learning_rate": 9.975904176945059e-07, "loss": 0.0463, "step": 30230 }, { "epoch": 0.3230941823815375, "grad_norm": 13.561771392822266, "learning_rate": 9.975887699777803e-07, "loss": 0.125, "step": 30240 }, { "epoch": 0.3232010256958171, "grad_norm": 1.360355257987976, "learning_rate": 9.975871216992394e-07, "loss": 0.0833, "step": 30250 }, { "epoch": 0.32330786901009667, "grad_norm": 9.864860534667969, "learning_rate": 9.97585472858885e-07, "loss": 0.0581, "step": 30260 }, { "epoch": 0.3234147123243763, "grad_norm": 14.0172758102417, "learning_rate": 9.975838234567194e-07, "loss": 0.0422, "step": 30270 }, { "epoch": 0.3235215556386559, "grad_norm": 4.0745110511779785, "learning_rate": 9.975821734927436e-07, "loss": 0.0514, "step": 30280 }, { "epoch": 0.32362839895293555, "grad_norm": 1.6447768211364746, "learning_rate": 9.975805229669602e-07, "loss": 0.1151, "step": 30290 }, { "epoch": 0.3237352422672151, "grad_norm": 14.797849655151367, "learning_rate": 9.975788718793709e-07, "loss": 0.1469, "step": 30300 }, { "epoch": 0.32384208558149474, "grad_norm": 4.365163326263428, "learning_rate": 9.975772202299776e-07, "loss": 0.098, "step": 30310 }, { "epoch": 0.32394892889577437, "grad_norm": 3.130448341369629, "learning_rate": 9.975755680187818e-07, "loss": 0.0515, "step": 30320 }, { "epoch": 0.32405577221005394, "grad_norm": 12.351964950561523, "learning_rate": 9.975739152457858e-07, "loss": 0.1385, "step": 30330 }, { "epoch": 0.32416261552433356, "grad_norm": 4.956358432769775, "learning_rate": 9.975722619109914e-07, "loss": 0.0523, "step": 30340 }, { "epoch": 0.3242694588386132, "grad_norm": 5.263832092285156, "learning_rate": 9.975706080144003e-07, "loss": 0.12, "step": 30350 }, { "epoch": 0.32437630215289276, "grad_norm": 3.1113338470458984, "learning_rate": 9.975689535560142e-07, "loss": 0.048, "step": 30360 }, { "epoch": 0.3244831454671724, "grad_norm": 3.6092817783355713, "learning_rate": 9.975672985358355e-07, "loss": 0.1366, "step": 30370 }, { "epoch": 0.324589988781452, "grad_norm": 12.929241180419922, "learning_rate": 9.975656429538655e-07, "loss": 0.1666, "step": 30380 }, { "epoch": 0.32469683209573164, "grad_norm": 3.231868267059326, "learning_rate": 9.975639868101065e-07, "loss": 0.1029, "step": 30390 }, { "epoch": 0.3248036754100112, "grad_norm": 5.911936283111572, "learning_rate": 9.975623301045602e-07, "loss": 0.1575, "step": 30400 }, { "epoch": 0.32491051872429083, "grad_norm": 4.614807605743408, "learning_rate": 9.975606728372284e-07, "loss": 0.0567, "step": 30410 }, { "epoch": 0.32501736203857046, "grad_norm": 19.224002838134766, "learning_rate": 9.97559015008113e-07, "loss": 0.0821, "step": 30420 }, { "epoch": 0.32512420535285, "grad_norm": 6.7893829345703125, "learning_rate": 9.97557356617216e-07, "loss": 0.0441, "step": 30430 }, { "epoch": 0.32523104866712965, "grad_norm": 6.1656317710876465, "learning_rate": 9.975556976645392e-07, "loss": 0.0675, "step": 30440 }, { "epoch": 0.3253378919814093, "grad_norm": 3.0438880920410156, "learning_rate": 9.975540381500844e-07, "loss": 0.1208, "step": 30450 }, { "epoch": 0.32544473529568885, "grad_norm": 0.801247239112854, "learning_rate": 9.975523780738537e-07, "loss": 0.0659, "step": 30460 }, { "epoch": 0.3255515786099685, "grad_norm": 3.2232918739318848, "learning_rate": 9.975507174358487e-07, "loss": 0.0936, "step": 30470 }, { "epoch": 0.3256584219242481, "grad_norm": 0.8429405689239502, "learning_rate": 9.975490562360713e-07, "loss": 0.0781, "step": 30480 }, { "epoch": 0.3257652652385277, "grad_norm": 3.1990387439727783, "learning_rate": 9.975473944745237e-07, "loss": 0.0503, "step": 30490 }, { "epoch": 0.3258721085528073, "grad_norm": 9.881217002868652, "learning_rate": 9.975457321512073e-07, "loss": 0.0555, "step": 30500 }, { "epoch": 0.3259789518670869, "grad_norm": 9.680261611938477, "learning_rate": 9.975440692661243e-07, "loss": 0.0689, "step": 30510 }, { "epoch": 0.32608579518136654, "grad_norm": 3.909970283508301, "learning_rate": 9.975424058192765e-07, "loss": 0.0841, "step": 30520 }, { "epoch": 0.3261926384956461, "grad_norm": 6.839663982391357, "learning_rate": 9.975407418106659e-07, "loss": 0.0787, "step": 30530 }, { "epoch": 0.32629948180992574, "grad_norm": 0.6177923083305359, "learning_rate": 9.97539077240294e-07, "loss": 0.0541, "step": 30540 }, { "epoch": 0.32640632512420537, "grad_norm": 11.033072471618652, "learning_rate": 9.975374121081633e-07, "loss": 0.1057, "step": 30550 }, { "epoch": 0.32651316843848494, "grad_norm": 2.1328248977661133, "learning_rate": 9.97535746414275e-07, "loss": 0.1215, "step": 30560 }, { "epoch": 0.32662001175276456, "grad_norm": 3.7660248279571533, "learning_rate": 9.975340801586314e-07, "loss": 0.0475, "step": 30570 }, { "epoch": 0.3267268550670442, "grad_norm": 6.4480061531066895, "learning_rate": 9.975324133412341e-07, "loss": 0.1213, "step": 30580 }, { "epoch": 0.3268336983813238, "grad_norm": 2.7303545475006104, "learning_rate": 9.975307459620854e-07, "loss": 0.0652, "step": 30590 }, { "epoch": 0.3269405416956034, "grad_norm": 0.3093433678150177, "learning_rate": 9.975290780211868e-07, "loss": 0.0506, "step": 30600 }, { "epoch": 0.327047385009883, "grad_norm": 1.0350780487060547, "learning_rate": 9.975274095185404e-07, "loss": 0.0449, "step": 30610 }, { "epoch": 0.32715422832416263, "grad_norm": 1.5671160221099854, "learning_rate": 9.97525740454148e-07, "loss": 0.0363, "step": 30620 }, { "epoch": 0.3272610716384422, "grad_norm": 1.253946304321289, "learning_rate": 9.975240708280114e-07, "loss": 0.0401, "step": 30630 }, { "epoch": 0.32736791495272183, "grad_norm": 11.38263988494873, "learning_rate": 9.975224006401326e-07, "loss": 0.0788, "step": 30640 }, { "epoch": 0.32747475826700145, "grad_norm": 2.003100872039795, "learning_rate": 9.975207298905136e-07, "loss": 0.1065, "step": 30650 }, { "epoch": 0.3275816015812811, "grad_norm": 1.8278290033340454, "learning_rate": 9.975190585791559e-07, "loss": 0.0842, "step": 30660 }, { "epoch": 0.32768844489556065, "grad_norm": 9.994914054870605, "learning_rate": 9.975173867060618e-07, "loss": 0.123, "step": 30670 }, { "epoch": 0.3277952882098403, "grad_norm": 5.063523769378662, "learning_rate": 9.975157142712329e-07, "loss": 0.0776, "step": 30680 }, { "epoch": 0.3279021315241199, "grad_norm": 9.906636238098145, "learning_rate": 9.975140412746714e-07, "loss": 0.0691, "step": 30690 }, { "epoch": 0.32800897483839947, "grad_norm": 3.923361301422119, "learning_rate": 9.975123677163788e-07, "loss": 0.0585, "step": 30700 }, { "epoch": 0.3281158181526791, "grad_norm": 2.429189443588257, "learning_rate": 9.975106935963573e-07, "loss": 0.0981, "step": 30710 }, { "epoch": 0.3282226614669587, "grad_norm": 0.1798214167356491, "learning_rate": 9.975090189146087e-07, "loss": 0.0898, "step": 30720 }, { "epoch": 0.3283295047812383, "grad_norm": 7.581548690795898, "learning_rate": 9.975073436711347e-07, "loss": 0.0372, "step": 30730 }, { "epoch": 0.3284363480955179, "grad_norm": 19.066734313964844, "learning_rate": 9.975056678659376e-07, "loss": 0.1368, "step": 30740 }, { "epoch": 0.32854319140979754, "grad_norm": 32.41464614868164, "learning_rate": 9.97503991499019e-07, "loss": 0.1419, "step": 30750 }, { "epoch": 0.32865003472407717, "grad_norm": 1.4640237092971802, "learning_rate": 9.975023145703807e-07, "loss": 0.05, "step": 30760 }, { "epoch": 0.32875687803835674, "grad_norm": 6.644399166107178, "learning_rate": 9.975006370800248e-07, "loss": 0.0858, "step": 30770 }, { "epoch": 0.32886372135263636, "grad_norm": 1.923177719116211, "learning_rate": 9.974989590279533e-07, "loss": 0.1299, "step": 30780 }, { "epoch": 0.328970564666916, "grad_norm": 3.1758155822753906, "learning_rate": 9.974972804141676e-07, "loss": 0.0208, "step": 30790 }, { "epoch": 0.32907740798119556, "grad_norm": 4.759339332580566, "learning_rate": 9.974956012386702e-07, "loss": 0.0597, "step": 30800 }, { "epoch": 0.3291842512954752, "grad_norm": 2.329082489013672, "learning_rate": 9.974939215014626e-07, "loss": 0.1129, "step": 30810 }, { "epoch": 0.3292910946097548, "grad_norm": 6.2686944007873535, "learning_rate": 9.974922412025469e-07, "loss": 0.0616, "step": 30820 }, { "epoch": 0.3293979379240344, "grad_norm": 9.808917045593262, "learning_rate": 9.974905603419248e-07, "loss": 0.0789, "step": 30830 }, { "epoch": 0.329504781238314, "grad_norm": 4.338709354400635, "learning_rate": 9.974888789195983e-07, "loss": 0.0613, "step": 30840 }, { "epoch": 0.32961162455259363, "grad_norm": 11.67113208770752, "learning_rate": 9.974871969355694e-07, "loss": 0.051, "step": 30850 }, { "epoch": 0.32971846786687325, "grad_norm": 4.8662590980529785, "learning_rate": 9.974855143898399e-07, "loss": 0.0686, "step": 30860 }, { "epoch": 0.3298253111811528, "grad_norm": 7.422527313232422, "learning_rate": 9.974838312824117e-07, "loss": 0.053, "step": 30870 }, { "epoch": 0.32993215449543245, "grad_norm": 1.9923104047775269, "learning_rate": 9.974821476132867e-07, "loss": 0.1859, "step": 30880 }, { "epoch": 0.3300389978097121, "grad_norm": 2.6145849227905273, "learning_rate": 9.974804633824669e-07, "loss": 0.06, "step": 30890 }, { "epoch": 0.33014584112399165, "grad_norm": 3.5047943592071533, "learning_rate": 9.97478778589954e-07, "loss": 0.0632, "step": 30900 }, { "epoch": 0.33025268443827127, "grad_norm": 3.5129549503326416, "learning_rate": 9.9747709323575e-07, "loss": 0.0573, "step": 30910 }, { "epoch": 0.3303595277525509, "grad_norm": 8.041608810424805, "learning_rate": 9.974754073198568e-07, "loss": 0.0997, "step": 30920 }, { "epoch": 0.33046637106683047, "grad_norm": 7.6725664138793945, "learning_rate": 9.974737208422764e-07, "loss": 0.0868, "step": 30930 }, { "epoch": 0.3305732143811101, "grad_norm": 3.0232982635498047, "learning_rate": 9.974720338030106e-07, "loss": 0.0784, "step": 30940 }, { "epoch": 0.3306800576953897, "grad_norm": 5.438957691192627, "learning_rate": 9.974703462020612e-07, "loss": 0.1107, "step": 30950 }, { "epoch": 0.33078690100966934, "grad_norm": 3.068556070327759, "learning_rate": 9.974686580394304e-07, "loss": 0.1365, "step": 30960 }, { "epoch": 0.3308937443239489, "grad_norm": 3.771728992462158, "learning_rate": 9.974669693151199e-07, "loss": 0.0732, "step": 30970 }, { "epoch": 0.33100058763822854, "grad_norm": 4.593353271484375, "learning_rate": 9.974652800291316e-07, "loss": 0.0483, "step": 30980 }, { "epoch": 0.33110743095250816, "grad_norm": 2.905226707458496, "learning_rate": 9.974635901814675e-07, "loss": 0.0679, "step": 30990 }, { "epoch": 0.33121427426678773, "grad_norm": 5.621945858001709, "learning_rate": 9.974618997721296e-07, "loss": 0.0783, "step": 31000 }, { "epoch": 0.33132111758106736, "grad_norm": 6.632312297821045, "learning_rate": 9.974602088011196e-07, "loss": 0.105, "step": 31010 }, { "epoch": 0.331427960895347, "grad_norm": 4.718873500823975, "learning_rate": 9.974585172684393e-07, "loss": 0.0839, "step": 31020 }, { "epoch": 0.3315348042096266, "grad_norm": 7.259344577789307, "learning_rate": 9.974568251740909e-07, "loss": 0.0836, "step": 31030 }, { "epoch": 0.3316416475239062, "grad_norm": 2.0332694053649902, "learning_rate": 9.97455132518076e-07, "loss": 0.09, "step": 31040 }, { "epoch": 0.3317484908381858, "grad_norm": 5.212382793426514, "learning_rate": 9.97453439300397e-07, "loss": 0.0722, "step": 31050 }, { "epoch": 0.33185533415246543, "grad_norm": 14.229048728942871, "learning_rate": 9.974517455210554e-07, "loss": 0.1487, "step": 31060 }, { "epoch": 0.331962177466745, "grad_norm": 5.594980239868164, "learning_rate": 9.974500511800531e-07, "loss": 0.069, "step": 31070 }, { "epoch": 0.3320690207810246, "grad_norm": 0.16061753034591675, "learning_rate": 9.974483562773925e-07, "loss": 0.0447, "step": 31080 }, { "epoch": 0.33217586409530425, "grad_norm": 1.5197795629501343, "learning_rate": 9.97446660813075e-07, "loss": 0.1016, "step": 31090 }, { "epoch": 0.3322827074095838, "grad_norm": 6.713162422180176, "learning_rate": 9.974449647871026e-07, "loss": 0.1328, "step": 31100 }, { "epoch": 0.33238955072386345, "grad_norm": 1.4959561824798584, "learning_rate": 9.974432681994773e-07, "loss": 0.0785, "step": 31110 }, { "epoch": 0.3324963940381431, "grad_norm": 8.340291023254395, "learning_rate": 9.97441571050201e-07, "loss": 0.068, "step": 31120 }, { "epoch": 0.3326032373524227, "grad_norm": 0.3834173083305359, "learning_rate": 9.974398733392757e-07, "loss": 0.0674, "step": 31130 }, { "epoch": 0.33271008066670227, "grad_norm": 5.078202247619629, "learning_rate": 9.97438175066703e-07, "loss": 0.0773, "step": 31140 }, { "epoch": 0.3328169239809819, "grad_norm": 4.8935346603393555, "learning_rate": 9.974364762324855e-07, "loss": 0.1237, "step": 31150 }, { "epoch": 0.3329237672952615, "grad_norm": 8.420475959777832, "learning_rate": 9.974347768366243e-07, "loss": 0.0977, "step": 31160 }, { "epoch": 0.3330306106095411, "grad_norm": 3.289426803588867, "learning_rate": 9.974330768791218e-07, "loss": 0.0455, "step": 31170 }, { "epoch": 0.3331374539238207, "grad_norm": 6.5535759925842285, "learning_rate": 9.9743137635998e-07, "loss": 0.0946, "step": 31180 }, { "epoch": 0.33324429723810034, "grad_norm": 4.869293212890625, "learning_rate": 9.974296752792004e-07, "loss": 0.0824, "step": 31190 }, { "epoch": 0.3333511405523799, "grad_norm": 6.857877254486084, "learning_rate": 9.974279736367853e-07, "loss": 0.084, "step": 31200 }, { "epoch": 0.33345798386665954, "grad_norm": 6.0453877449035645, "learning_rate": 9.974262714327363e-07, "loss": 0.0788, "step": 31210 }, { "epoch": 0.33356482718093916, "grad_norm": 0.3419090509414673, "learning_rate": 9.974245686670555e-07, "loss": 0.0244, "step": 31220 }, { "epoch": 0.3336716704952188, "grad_norm": 1.0463066101074219, "learning_rate": 9.97422865339745e-07, "loss": 0.126, "step": 31230 }, { "epoch": 0.33377851380949836, "grad_norm": 5.713249683380127, "learning_rate": 9.974211614508064e-07, "loss": 0.1446, "step": 31240 }, { "epoch": 0.333885357123778, "grad_norm": 8.291815757751465, "learning_rate": 9.97419457000242e-07, "loss": 0.0615, "step": 31250 }, { "epoch": 0.3339922004380576, "grad_norm": 3.065725803375244, "learning_rate": 9.974177519880532e-07, "loss": 0.0909, "step": 31260 }, { "epoch": 0.3340990437523372, "grad_norm": 4.398470401763916, "learning_rate": 9.974160464142423e-07, "loss": 0.1291, "step": 31270 }, { "epoch": 0.3342058870666168, "grad_norm": 7.328251838684082, "learning_rate": 9.974143402788112e-07, "loss": 0.1419, "step": 31280 }, { "epoch": 0.33431273038089643, "grad_norm": 3.3915863037109375, "learning_rate": 9.974126335817617e-07, "loss": 0.0729, "step": 31290 }, { "epoch": 0.334419573695176, "grad_norm": 8.674160957336426, "learning_rate": 9.974109263230959e-07, "loss": 0.0633, "step": 31300 }, { "epoch": 0.3345264170094556, "grad_norm": 4.208299160003662, "learning_rate": 9.974092185028156e-07, "loss": 0.047, "step": 31310 }, { "epoch": 0.33463326032373525, "grad_norm": 5.907741069793701, "learning_rate": 9.974075101209227e-07, "loss": 0.0589, "step": 31320 }, { "epoch": 0.3347401036380149, "grad_norm": 2.822718858718872, "learning_rate": 9.97405801177419e-07, "loss": 0.0486, "step": 31330 }, { "epoch": 0.33484694695229444, "grad_norm": 0.5325786471366882, "learning_rate": 9.97404091672307e-07, "loss": 0.057, "step": 31340 }, { "epoch": 0.33495379026657407, "grad_norm": 6.225010395050049, "learning_rate": 9.97402381605588e-07, "loss": 0.0982, "step": 31350 }, { "epoch": 0.3350606335808537, "grad_norm": 5.192558288574219, "learning_rate": 9.974006709772641e-07, "loss": 0.0559, "step": 31360 }, { "epoch": 0.33516747689513327, "grad_norm": 10.349194526672363, "learning_rate": 9.973989597873376e-07, "loss": 0.1548, "step": 31370 }, { "epoch": 0.3352743202094129, "grad_norm": 2.458470344543457, "learning_rate": 9.9739724803581e-07, "loss": 0.0618, "step": 31380 }, { "epoch": 0.3353811635236925, "grad_norm": 4.281682968139648, "learning_rate": 9.973955357226832e-07, "loss": 0.1141, "step": 31390 }, { "epoch": 0.33548800683797214, "grad_norm": 13.76350212097168, "learning_rate": 9.973938228479594e-07, "loss": 0.1037, "step": 31400 }, { "epoch": 0.3355948501522517, "grad_norm": 0.3610151708126068, "learning_rate": 9.973921094116405e-07, "loss": 0.0254, "step": 31410 }, { "epoch": 0.33570169346653134, "grad_norm": 3.3014156818389893, "learning_rate": 9.973903954137283e-07, "loss": 0.0991, "step": 31420 }, { "epoch": 0.33580853678081096, "grad_norm": 4.504477024078369, "learning_rate": 9.973886808542249e-07, "loss": 0.0577, "step": 31430 }, { "epoch": 0.33591538009509053, "grad_norm": 0.9620334506034851, "learning_rate": 9.97386965733132e-07, "loss": 0.0871, "step": 31440 }, { "epoch": 0.33602222340937016, "grad_norm": 0.054604724049568176, "learning_rate": 9.973852500504517e-07, "loss": 0.053, "step": 31450 }, { "epoch": 0.3361290667236498, "grad_norm": 10.325439453125, "learning_rate": 9.97383533806186e-07, "loss": 0.0743, "step": 31460 }, { "epoch": 0.33623591003792935, "grad_norm": 10.33714485168457, "learning_rate": 9.973818170003367e-07, "loss": 0.0617, "step": 31470 }, { "epoch": 0.336342753352209, "grad_norm": 4.794345855712891, "learning_rate": 9.973800996329059e-07, "loss": 0.059, "step": 31480 }, { "epoch": 0.3364495966664886, "grad_norm": 5.923993110656738, "learning_rate": 9.973783817038952e-07, "loss": 0.0779, "step": 31490 }, { "epoch": 0.33655643998076823, "grad_norm": 5.16822624206543, "learning_rate": 9.973766632133067e-07, "loss": 0.0874, "step": 31500 }, { "epoch": 0.3366632832950478, "grad_norm": 11.441422462463379, "learning_rate": 9.973749441611429e-07, "loss": 0.0613, "step": 31510 }, { "epoch": 0.3367701266093274, "grad_norm": 6.37536096572876, "learning_rate": 9.973732245474048e-07, "loss": 0.0873, "step": 31520 }, { "epoch": 0.33687696992360705, "grad_norm": 1.5109679698944092, "learning_rate": 9.97371504372095e-07, "loss": 0.1125, "step": 31530 }, { "epoch": 0.3369838132378866, "grad_norm": 0.3007887601852417, "learning_rate": 9.973697836352152e-07, "loss": 0.0385, "step": 31540 }, { "epoch": 0.33709065655216625, "grad_norm": 3.962700128555298, "learning_rate": 9.973680623367673e-07, "loss": 0.0515, "step": 31550 }, { "epoch": 0.33719749986644587, "grad_norm": 8.606374740600586, "learning_rate": 9.973663404767532e-07, "loss": 0.0711, "step": 31560 }, { "epoch": 0.33730434318072544, "grad_norm": 9.486335754394531, "learning_rate": 9.973646180551753e-07, "loss": 0.0657, "step": 31570 }, { "epoch": 0.33741118649500507, "grad_norm": 13.344366073608398, "learning_rate": 9.97362895072035e-07, "loss": 0.0376, "step": 31580 }, { "epoch": 0.3375180298092847, "grad_norm": 0.8653081655502319, "learning_rate": 9.973611715273344e-07, "loss": 0.1295, "step": 31590 }, { "epoch": 0.3376248731235643, "grad_norm": 4.3700785636901855, "learning_rate": 9.973594474210757e-07, "loss": 0.0499, "step": 31600 }, { "epoch": 0.3377317164378439, "grad_norm": 9.558357238769531, "learning_rate": 9.973577227532604e-07, "loss": 0.079, "step": 31610 }, { "epoch": 0.3378385597521235, "grad_norm": 2.217658758163452, "learning_rate": 9.97355997523891e-07, "loss": 0.1042, "step": 31620 }, { "epoch": 0.33794540306640314, "grad_norm": 1.4647603034973145, "learning_rate": 9.973542717329687e-07, "loss": 0.0921, "step": 31630 }, { "epoch": 0.3380522463806827, "grad_norm": 11.319646835327148, "learning_rate": 9.973525453804962e-07, "loss": 0.0586, "step": 31640 }, { "epoch": 0.33815908969496233, "grad_norm": 0.6181707382202148, "learning_rate": 9.97350818466475e-07, "loss": 0.0582, "step": 31650 }, { "epoch": 0.33826593300924196, "grad_norm": 0.9577288627624512, "learning_rate": 9.973490909909074e-07, "loss": 0.1066, "step": 31660 }, { "epoch": 0.33837277632352153, "grad_norm": 3.5539932250976562, "learning_rate": 9.973473629537951e-07, "loss": 0.0505, "step": 31670 }, { "epoch": 0.33847961963780115, "grad_norm": 9.453080177307129, "learning_rate": 9.9734563435514e-07, "loss": 0.1099, "step": 31680 }, { "epoch": 0.3385864629520808, "grad_norm": 0.3964158296585083, "learning_rate": 9.973439051949443e-07, "loss": 0.0508, "step": 31690 }, { "epoch": 0.3386933062663604, "grad_norm": 5.479601860046387, "learning_rate": 9.973421754732097e-07, "loss": 0.0527, "step": 31700 }, { "epoch": 0.33880014958064, "grad_norm": 1.5537760257720947, "learning_rate": 9.97340445189938e-07, "loss": 0.0268, "step": 31710 }, { "epoch": 0.3389069928949196, "grad_norm": 12.376073837280273, "learning_rate": 9.973387143451316e-07, "loss": 0.1085, "step": 31720 }, { "epoch": 0.3390138362091992, "grad_norm": 4.511935234069824, "learning_rate": 9.973369829387925e-07, "loss": 0.0698, "step": 31730 }, { "epoch": 0.3391206795234788, "grad_norm": 3.5481438636779785, "learning_rate": 9.97335250970922e-07, "loss": 0.0397, "step": 31740 }, { "epoch": 0.3392275228377584, "grad_norm": 4.117732524871826, "learning_rate": 9.973335184415227e-07, "loss": 0.1137, "step": 31750 }, { "epoch": 0.33933436615203805, "grad_norm": 0.01982271671295166, "learning_rate": 9.973317853505962e-07, "loss": 0.058, "step": 31760 }, { "epoch": 0.3394412094663177, "grad_norm": 4.006892681121826, "learning_rate": 9.973300516981447e-07, "loss": 0.0875, "step": 31770 }, { "epoch": 0.33954805278059724, "grad_norm": 10.432394027709961, "learning_rate": 9.9732831748417e-07, "loss": 0.0795, "step": 31780 }, { "epoch": 0.33965489609487687, "grad_norm": 10.17798900604248, "learning_rate": 9.973265827086742e-07, "loss": 0.1025, "step": 31790 }, { "epoch": 0.3397617394091565, "grad_norm": 4.455431938171387, "learning_rate": 9.97324847371659e-07, "loss": 0.1244, "step": 31800 }, { "epoch": 0.33986858272343606, "grad_norm": 12.235346794128418, "learning_rate": 9.973231114731264e-07, "loss": 0.0841, "step": 31810 }, { "epoch": 0.3399754260377157, "grad_norm": 0.46964704990386963, "learning_rate": 9.973213750130787e-07, "loss": 0.1019, "step": 31820 }, { "epoch": 0.3400822693519953, "grad_norm": 5.7335968017578125, "learning_rate": 9.973196379915174e-07, "loss": 0.1301, "step": 31830 }, { "epoch": 0.3401891126662749, "grad_norm": 11.297664642333984, "learning_rate": 9.973179004084449e-07, "loss": 0.1361, "step": 31840 }, { "epoch": 0.3402959559805545, "grad_norm": 16.24539566040039, "learning_rate": 9.97316162263863e-07, "loss": 0.0981, "step": 31850 }, { "epoch": 0.34040279929483414, "grad_norm": 7.017355442047119, "learning_rate": 9.973144235577736e-07, "loss": 0.0536, "step": 31860 }, { "epoch": 0.34050964260911376, "grad_norm": 2.429983615875244, "learning_rate": 9.973126842901785e-07, "loss": 0.0574, "step": 31870 }, { "epoch": 0.34061648592339333, "grad_norm": 3.9503724575042725, "learning_rate": 9.9731094446108e-07, "loss": 0.065, "step": 31880 }, { "epoch": 0.34072332923767296, "grad_norm": 1.1548196077346802, "learning_rate": 9.973092040704798e-07, "loss": 0.0293, "step": 31890 }, { "epoch": 0.3408301725519526, "grad_norm": 25.588895797729492, "learning_rate": 9.9730746311838e-07, "loss": 0.076, "step": 31900 }, { "epoch": 0.34093701586623215, "grad_norm": 2.9696481227874756, "learning_rate": 9.973057216047824e-07, "loss": 0.0866, "step": 31910 }, { "epoch": 0.3410438591805118, "grad_norm": 3.007420063018799, "learning_rate": 9.973039795296894e-07, "loss": 0.0832, "step": 31920 }, { "epoch": 0.3411507024947914, "grad_norm": 4.059077739715576, "learning_rate": 9.973022368931025e-07, "loss": 0.0719, "step": 31930 }, { "epoch": 0.341257545809071, "grad_norm": 4.513855457305908, "learning_rate": 9.97300493695024e-07, "loss": 0.1164, "step": 31940 }, { "epoch": 0.3413643891233506, "grad_norm": 14.747129440307617, "learning_rate": 9.972987499354555e-07, "loss": 0.0714, "step": 31950 }, { "epoch": 0.3414712324376302, "grad_norm": 9.661539077758789, "learning_rate": 9.972970056143992e-07, "loss": 0.0453, "step": 31960 }, { "epoch": 0.34157807575190985, "grad_norm": 5.668349266052246, "learning_rate": 9.972952607318571e-07, "loss": 0.0852, "step": 31970 }, { "epoch": 0.3416849190661894, "grad_norm": 2.1912119388580322, "learning_rate": 9.972935152878312e-07, "loss": 0.0893, "step": 31980 }, { "epoch": 0.34179176238046904, "grad_norm": 14.632978439331055, "learning_rate": 9.972917692823234e-07, "loss": 0.0777, "step": 31990 }, { "epoch": 0.34189860569474867, "grad_norm": 2.6525933742523193, "learning_rate": 9.972900227153356e-07, "loss": 0.0759, "step": 32000 }, { "epoch": 0.34200544900902824, "grad_norm": 9.081059455871582, "learning_rate": 9.972882755868698e-07, "loss": 0.0373, "step": 32010 }, { "epoch": 0.34211229232330786, "grad_norm": 3.8839306831359863, "learning_rate": 9.97286527896928e-07, "loss": 0.0787, "step": 32020 }, { "epoch": 0.3422191356375875, "grad_norm": 6.050451755523682, "learning_rate": 9.97284779645512e-07, "loss": 0.0325, "step": 32030 }, { "epoch": 0.34232597895186706, "grad_norm": 1.309314489364624, "learning_rate": 9.972830308326243e-07, "loss": 0.1005, "step": 32040 }, { "epoch": 0.3424328222661467, "grad_norm": 7.116697311401367, "learning_rate": 9.972812814582666e-07, "loss": 0.0808, "step": 32050 }, { "epoch": 0.3425396655804263, "grad_norm": 0.9152068495750427, "learning_rate": 9.972795315224405e-07, "loss": 0.0633, "step": 32060 }, { "epoch": 0.34264650889470594, "grad_norm": 6.070446491241455, "learning_rate": 9.972777810251483e-07, "loss": 0.0761, "step": 32070 }, { "epoch": 0.3427533522089855, "grad_norm": 1.3881127834320068, "learning_rate": 9.972760299663921e-07, "loss": 0.0795, "step": 32080 }, { "epoch": 0.34286019552326513, "grad_norm": 9.105254173278809, "learning_rate": 9.972742783461735e-07, "loss": 0.0484, "step": 32090 }, { "epoch": 0.34296703883754476, "grad_norm": 0.05549923703074455, "learning_rate": 9.97272526164495e-07, "loss": 0.0945, "step": 32100 }, { "epoch": 0.3430738821518243, "grad_norm": 12.452683448791504, "learning_rate": 9.972707734213582e-07, "loss": 0.0442, "step": 32110 }, { "epoch": 0.34318072546610395, "grad_norm": 0.30504143238067627, "learning_rate": 9.97269020116765e-07, "loss": 0.073, "step": 32120 }, { "epoch": 0.3432875687803836, "grad_norm": 0.13305293023586273, "learning_rate": 9.972672662507177e-07, "loss": 0.0403, "step": 32130 }, { "epoch": 0.3433944120946632, "grad_norm": 6.941234111785889, "learning_rate": 9.97265511823218e-07, "loss": 0.0358, "step": 32140 }, { "epoch": 0.3435012554089428, "grad_norm": 0.3932543694972992, "learning_rate": 9.972637568342682e-07, "loss": 0.0383, "step": 32150 }, { "epoch": 0.3436080987232224, "grad_norm": 9.680147171020508, "learning_rate": 9.9726200128387e-07, "loss": 0.0847, "step": 32160 }, { "epoch": 0.343714942037502, "grad_norm": 2.358069896697998, "learning_rate": 9.972602451720253e-07, "loss": 0.083, "step": 32170 }, { "epoch": 0.3438217853517816, "grad_norm": 7.446281909942627, "learning_rate": 9.972584884987365e-07, "loss": 0.0993, "step": 32180 }, { "epoch": 0.3439286286660612, "grad_norm": 6.701829433441162, "learning_rate": 9.972567312640052e-07, "loss": 0.0754, "step": 32190 }, { "epoch": 0.34403547198034085, "grad_norm": 0.6264658570289612, "learning_rate": 9.972549734678334e-07, "loss": 0.2205, "step": 32200 }, { "epoch": 0.3441423152946204, "grad_norm": 5.815455436706543, "learning_rate": 9.972532151102235e-07, "loss": 0.0589, "step": 32210 }, { "epoch": 0.34424915860890004, "grad_norm": 8.251812934875488, "learning_rate": 9.97251456191177e-07, "loss": 0.0721, "step": 32220 }, { "epoch": 0.34435600192317967, "grad_norm": 1.6397258043289185, "learning_rate": 9.972496967106962e-07, "loss": 0.0599, "step": 32230 }, { "epoch": 0.3444628452374593, "grad_norm": 2.428067922592163, "learning_rate": 9.97247936668783e-07, "loss": 0.0825, "step": 32240 }, { "epoch": 0.34456968855173886, "grad_norm": 7.637507438659668, "learning_rate": 9.972461760654392e-07, "loss": 0.0324, "step": 32250 }, { "epoch": 0.3446765318660185, "grad_norm": 16.766359329223633, "learning_rate": 9.97244414900667e-07, "loss": 0.0515, "step": 32260 }, { "epoch": 0.3447833751802981, "grad_norm": 3.6000876426696777, "learning_rate": 9.972426531744682e-07, "loss": 0.0582, "step": 32270 }, { "epoch": 0.3448902184945777, "grad_norm": 13.719196319580078, "learning_rate": 9.97240890886845e-07, "loss": 0.0771, "step": 32280 }, { "epoch": 0.3449970618088573, "grad_norm": 6.921380996704102, "learning_rate": 9.972391280377995e-07, "loss": 0.0681, "step": 32290 }, { "epoch": 0.34510390512313693, "grad_norm": 5.518684387207031, "learning_rate": 9.972373646273333e-07, "loss": 0.0527, "step": 32300 }, { "epoch": 0.3452107484374165, "grad_norm": 12.573498725891113, "learning_rate": 9.972356006554486e-07, "loss": 0.1126, "step": 32310 }, { "epoch": 0.34531759175169613, "grad_norm": 11.874955177307129, "learning_rate": 9.972338361221473e-07, "loss": 0.088, "step": 32320 }, { "epoch": 0.34542443506597575, "grad_norm": 3.287874460220337, "learning_rate": 9.972320710274315e-07, "loss": 0.1051, "step": 32330 }, { "epoch": 0.3455312783802554, "grad_norm": 14.439663887023926, "learning_rate": 9.972303053713033e-07, "loss": 0.0682, "step": 32340 }, { "epoch": 0.34563812169453495, "grad_norm": 3.6240901947021484, "learning_rate": 9.972285391537645e-07, "loss": 0.0471, "step": 32350 }, { "epoch": 0.3457449650088146, "grad_norm": 0.9224768280982971, "learning_rate": 9.97226772374817e-07, "loss": 0.0944, "step": 32360 }, { "epoch": 0.3458518083230942, "grad_norm": 2.173664093017578, "learning_rate": 9.972250050344634e-07, "loss": 0.0629, "step": 32370 }, { "epoch": 0.34595865163737377, "grad_norm": 4.657337665557861, "learning_rate": 9.972232371327047e-07, "loss": 0.0405, "step": 32380 }, { "epoch": 0.3460654949516534, "grad_norm": 1.7505929470062256, "learning_rate": 9.972214686695437e-07, "loss": 0.0561, "step": 32390 }, { "epoch": 0.346172338265933, "grad_norm": 1.7296820878982544, "learning_rate": 9.972196996449823e-07, "loss": 0.0604, "step": 32400 }, { "epoch": 0.3462791815802126, "grad_norm": 9.026908874511719, "learning_rate": 9.97217930059022e-07, "loss": 0.0321, "step": 32410 }, { "epoch": 0.3463860248944922, "grad_norm": 4.6034255027771, "learning_rate": 9.972161599116654e-07, "loss": 0.1131, "step": 32420 }, { "epoch": 0.34649286820877184, "grad_norm": 9.335317611694336, "learning_rate": 9.97214389202914e-07, "loss": 0.1278, "step": 32430 }, { "epoch": 0.34659971152305147, "grad_norm": 3.3574562072753906, "learning_rate": 9.9721261793277e-07, "loss": 0.0572, "step": 32440 }, { "epoch": 0.34670655483733104, "grad_norm": 4.507028102874756, "learning_rate": 9.972108461012356e-07, "loss": 0.0896, "step": 32450 }, { "epoch": 0.34681339815161066, "grad_norm": 3.580512523651123, "learning_rate": 9.972090737083126e-07, "loss": 0.0242, "step": 32460 }, { "epoch": 0.3469202414658903, "grad_norm": 0.17014069855213165, "learning_rate": 9.972073007540029e-07, "loss": 0.0395, "step": 32470 }, { "epoch": 0.34702708478016986, "grad_norm": 10.330716133117676, "learning_rate": 9.97205527238309e-07, "loss": 0.0825, "step": 32480 }, { "epoch": 0.3471339280944495, "grad_norm": 5.535180568695068, "learning_rate": 9.97203753161232e-07, "loss": 0.0729, "step": 32490 }, { "epoch": 0.3472407714087291, "grad_norm": 12.48715877532959, "learning_rate": 9.972019785227747e-07, "loss": 0.0886, "step": 32500 }, { "epoch": 0.34734761472300874, "grad_norm": 11.308769226074219, "learning_rate": 9.972002033229389e-07, "loss": 0.1051, "step": 32510 }, { "epoch": 0.3474544580372883, "grad_norm": 5.878144264221191, "learning_rate": 9.971984275617265e-07, "loss": 0.0492, "step": 32520 }, { "epoch": 0.34756130135156793, "grad_norm": 6.6059250831604, "learning_rate": 9.971966512391396e-07, "loss": 0.0556, "step": 32530 }, { "epoch": 0.34766814466584756, "grad_norm": 4.267014503479004, "learning_rate": 9.971948743551802e-07, "loss": 0.0345, "step": 32540 }, { "epoch": 0.3477749879801271, "grad_norm": 0.08716293424367905, "learning_rate": 9.9719309690985e-07, "loss": 0.0944, "step": 32550 }, { "epoch": 0.34788183129440675, "grad_norm": 9.964695930480957, "learning_rate": 9.971913189031516e-07, "loss": 0.0736, "step": 32560 }, { "epoch": 0.3479886746086864, "grad_norm": 4.52943754196167, "learning_rate": 9.971895403350864e-07, "loss": 0.0585, "step": 32570 }, { "epoch": 0.34809551792296595, "grad_norm": 8.988051414489746, "learning_rate": 9.971877612056568e-07, "loss": 0.1502, "step": 32580 }, { "epoch": 0.3482023612372456, "grad_norm": 6.673439025878906, "learning_rate": 9.971859815148646e-07, "loss": 0.0634, "step": 32590 }, { "epoch": 0.3483092045515252, "grad_norm": 5.063999652862549, "learning_rate": 9.97184201262712e-07, "loss": 0.0535, "step": 32600 }, { "epoch": 0.3484160478658048, "grad_norm": 2.0822463035583496, "learning_rate": 9.97182420449201e-07, "loss": 0.0372, "step": 32610 }, { "epoch": 0.3485228911800844, "grad_norm": 0.2215094268321991, "learning_rate": 9.971806390743334e-07, "loss": 0.1083, "step": 32620 }, { "epoch": 0.348629734494364, "grad_norm": 9.109903335571289, "learning_rate": 9.971788571381113e-07, "loss": 0.0881, "step": 32630 }, { "epoch": 0.34873657780864364, "grad_norm": 0.470768541097641, "learning_rate": 9.971770746405367e-07, "loss": 0.1746, "step": 32640 }, { "epoch": 0.3488434211229232, "grad_norm": 2.462048292160034, "learning_rate": 9.971752915816118e-07, "loss": 0.0609, "step": 32650 }, { "epoch": 0.34895026443720284, "grad_norm": 2.6554160118103027, "learning_rate": 9.971735079613387e-07, "loss": 0.0502, "step": 32660 }, { "epoch": 0.34905710775148246, "grad_norm": 1.8025308847427368, "learning_rate": 9.971717237797188e-07, "loss": 0.0475, "step": 32670 }, { "epoch": 0.34916395106576203, "grad_norm": 0.04803065210580826, "learning_rate": 9.971699390367545e-07, "loss": 0.0787, "step": 32680 }, { "epoch": 0.34927079438004166, "grad_norm": 12.79206371307373, "learning_rate": 9.97168153732448e-07, "loss": 0.0345, "step": 32690 }, { "epoch": 0.3493776376943213, "grad_norm": 3.000336170196533, "learning_rate": 9.97166367866801e-07, "loss": 0.1443, "step": 32700 }, { "epoch": 0.3494844810086009, "grad_norm": 0.9181612133979797, "learning_rate": 9.971645814398158e-07, "loss": 0.1095, "step": 32710 }, { "epoch": 0.3495913243228805, "grad_norm": 1.6677433252334595, "learning_rate": 9.971627944514943e-07, "loss": 0.1088, "step": 32720 }, { "epoch": 0.3496981676371601, "grad_norm": 6.032459259033203, "learning_rate": 9.971610069018383e-07, "loss": 0.0355, "step": 32730 }, { "epoch": 0.34980501095143973, "grad_norm": 1.2769032716751099, "learning_rate": 9.9715921879085e-07, "loss": 0.0826, "step": 32740 }, { "epoch": 0.3499118542657193, "grad_norm": 8.75798225402832, "learning_rate": 9.971574301185316e-07, "loss": 0.1457, "step": 32750 }, { "epoch": 0.3500186975799989, "grad_norm": 2.689578056335449, "learning_rate": 9.971556408848847e-07, "loss": 0.0384, "step": 32760 }, { "epoch": 0.35012554089427855, "grad_norm": 5.466772556304932, "learning_rate": 9.971538510899119e-07, "loss": 0.0366, "step": 32770 }, { "epoch": 0.3502323842085581, "grad_norm": 3.159372329711914, "learning_rate": 9.971520607336145e-07, "loss": 0.1424, "step": 32780 }, { "epoch": 0.35033922752283775, "grad_norm": 4.524974822998047, "learning_rate": 9.97150269815995e-07, "loss": 0.1075, "step": 32790 }, { "epoch": 0.3504460708371174, "grad_norm": 6.628153324127197, "learning_rate": 9.971484783370554e-07, "loss": 0.0318, "step": 32800 }, { "epoch": 0.350552914151397, "grad_norm": 5.849856376647949, "learning_rate": 9.971466862967977e-07, "loss": 0.0698, "step": 32810 }, { "epoch": 0.35065975746567657, "grad_norm": 2.7998721599578857, "learning_rate": 9.971448936952237e-07, "loss": 0.0489, "step": 32820 }, { "epoch": 0.3507666007799562, "grad_norm": 3.8883044719696045, "learning_rate": 9.971431005323359e-07, "loss": 0.0616, "step": 32830 }, { "epoch": 0.3508734440942358, "grad_norm": 5.039912223815918, "learning_rate": 9.971413068081358e-07, "loss": 0.1141, "step": 32840 }, { "epoch": 0.3509802874085154, "grad_norm": 2.5486106872558594, "learning_rate": 9.971395125226256e-07, "loss": 0.0544, "step": 32850 }, { "epoch": 0.351087130722795, "grad_norm": 0.10139316320419312, "learning_rate": 9.971377176758074e-07, "loss": 0.0752, "step": 32860 }, { "epoch": 0.35119397403707464, "grad_norm": 18.28685188293457, "learning_rate": 9.971359222676832e-07, "loss": 0.072, "step": 32870 }, { "epoch": 0.35130081735135427, "grad_norm": 2.150559902191162, "learning_rate": 9.971341262982552e-07, "loss": 0.0813, "step": 32880 }, { "epoch": 0.35140766066563384, "grad_norm": 10.159263610839844, "learning_rate": 9.97132329767525e-07, "loss": 0.1344, "step": 32890 }, { "epoch": 0.35151450397991346, "grad_norm": 3.933250904083252, "learning_rate": 9.97130532675495e-07, "loss": 0.1191, "step": 32900 }, { "epoch": 0.3516213472941931, "grad_norm": 0.944945752620697, "learning_rate": 9.971287350221672e-07, "loss": 0.0738, "step": 32910 }, { "epoch": 0.35172819060847266, "grad_norm": 3.5072872638702393, "learning_rate": 9.971269368075433e-07, "loss": 0.0508, "step": 32920 }, { "epoch": 0.3518350339227523, "grad_norm": 3.7764992713928223, "learning_rate": 9.971251380316257e-07, "loss": 0.1012, "step": 32930 }, { "epoch": 0.3519418772370319, "grad_norm": 5.0206170082092285, "learning_rate": 9.971233386944161e-07, "loss": 0.0473, "step": 32940 }, { "epoch": 0.3520487205513115, "grad_norm": 3.0884668827056885, "learning_rate": 9.97121538795917e-07, "loss": 0.0387, "step": 32950 }, { "epoch": 0.3521555638655911, "grad_norm": 5.194634914398193, "learning_rate": 9.971197383361302e-07, "loss": 0.0507, "step": 32960 }, { "epoch": 0.35226240717987073, "grad_norm": 0.5129450559616089, "learning_rate": 9.971179373150576e-07, "loss": 0.065, "step": 32970 }, { "epoch": 0.35236925049415035, "grad_norm": 11.149081230163574, "learning_rate": 9.971161357327012e-07, "loss": 0.0529, "step": 32980 }, { "epoch": 0.3524760938084299, "grad_norm": 0.051428187638521194, "learning_rate": 9.971143335890633e-07, "loss": 0.0707, "step": 32990 }, { "epoch": 0.35258293712270955, "grad_norm": 1.7508548498153687, "learning_rate": 9.971125308841458e-07, "loss": 0.0858, "step": 33000 }, { "epoch": 0.3526897804369892, "grad_norm": 9.49628734588623, "learning_rate": 9.971107276179508e-07, "loss": 0.0746, "step": 33010 }, { "epoch": 0.35279662375126875, "grad_norm": 6.590031147003174, "learning_rate": 9.971089237904802e-07, "loss": 0.0648, "step": 33020 }, { "epoch": 0.35290346706554837, "grad_norm": 11.236677169799805, "learning_rate": 9.97107119401736e-07, "loss": 0.0793, "step": 33030 }, { "epoch": 0.353010310379828, "grad_norm": 9.725785255432129, "learning_rate": 9.971053144517205e-07, "loss": 0.0627, "step": 33040 }, { "epoch": 0.35311715369410757, "grad_norm": 5.817989349365234, "learning_rate": 9.971035089404355e-07, "loss": 0.1431, "step": 33050 }, { "epoch": 0.3532239970083872, "grad_norm": 0.17846199870109558, "learning_rate": 9.97101702867883e-07, "loss": 0.0973, "step": 33060 }, { "epoch": 0.3533308403226668, "grad_norm": 3.3748278617858887, "learning_rate": 9.970998962340653e-07, "loss": 0.1691, "step": 33070 }, { "epoch": 0.35343768363694644, "grad_norm": 6.223636627197266, "learning_rate": 9.970980890389844e-07, "loss": 0.0392, "step": 33080 }, { "epoch": 0.353544526951226, "grad_norm": 1.3520747423171997, "learning_rate": 9.97096281282642e-07, "loss": 0.0361, "step": 33090 }, { "epoch": 0.35365137026550564, "grad_norm": 6.837438106536865, "learning_rate": 9.970944729650405e-07, "loss": 0.1281, "step": 33100 }, { "epoch": 0.35375821357978526, "grad_norm": 5.475065231323242, "learning_rate": 9.970926640861818e-07, "loss": 0.0757, "step": 33110 }, { "epoch": 0.35386505689406483, "grad_norm": 6.263794422149658, "learning_rate": 9.97090854646068e-07, "loss": 0.0855, "step": 33120 }, { "epoch": 0.35397190020834446, "grad_norm": 5.098770618438721, "learning_rate": 9.970890446447011e-07, "loss": 0.055, "step": 33130 }, { "epoch": 0.3540787435226241, "grad_norm": 10.238348007202148, "learning_rate": 9.970872340820832e-07, "loss": 0.1076, "step": 33140 }, { "epoch": 0.35418558683690365, "grad_norm": 10.37037181854248, "learning_rate": 9.970854229582164e-07, "loss": 0.0913, "step": 33150 }, { "epoch": 0.3542924301511833, "grad_norm": 5.770518779754639, "learning_rate": 9.970836112731024e-07, "loss": 0.0254, "step": 33160 }, { "epoch": 0.3543992734654629, "grad_norm": 5.568965435028076, "learning_rate": 9.970817990267437e-07, "loss": 0.0549, "step": 33170 }, { "epoch": 0.35450611677974253, "grad_norm": 15.897297859191895, "learning_rate": 9.97079986219142e-07, "loss": 0.1176, "step": 33180 }, { "epoch": 0.3546129600940221, "grad_norm": 3.1235413551330566, "learning_rate": 9.970781728502995e-07, "loss": 0.053, "step": 33190 }, { "epoch": 0.3547198034083017, "grad_norm": 6.939749717712402, "learning_rate": 9.970763589202182e-07, "loss": 0.0578, "step": 33200 }, { "epoch": 0.35482664672258135, "grad_norm": 6.740415096282959, "learning_rate": 9.970745444289001e-07, "loss": 0.0571, "step": 33210 }, { "epoch": 0.3549334900368609, "grad_norm": 4.0588860511779785, "learning_rate": 9.970727293763476e-07, "loss": 0.082, "step": 33220 }, { "epoch": 0.35504033335114055, "grad_norm": 0.5204378962516785, "learning_rate": 9.970709137625622e-07, "loss": 0.0413, "step": 33230 }, { "epoch": 0.3551471766654202, "grad_norm": 3.0650031566619873, "learning_rate": 9.970690975875463e-07, "loss": 0.0544, "step": 33240 }, { "epoch": 0.3552540199796998, "grad_norm": 11.49732780456543, "learning_rate": 9.970672808513018e-07, "loss": 0.0848, "step": 33250 }, { "epoch": 0.35536086329397937, "grad_norm": 8.293569564819336, "learning_rate": 9.97065463553831e-07, "loss": 0.0692, "step": 33260 }, { "epoch": 0.355467706608259, "grad_norm": 13.67852783203125, "learning_rate": 9.970636456951357e-07, "loss": 0.0416, "step": 33270 }, { "epoch": 0.3555745499225386, "grad_norm": 10.913304328918457, "learning_rate": 9.97061827275218e-07, "loss": 0.0808, "step": 33280 }, { "epoch": 0.3556813932368182, "grad_norm": 0.5207985639572144, "learning_rate": 9.9706000829408e-07, "loss": 0.1242, "step": 33290 }, { "epoch": 0.3557882365510978, "grad_norm": 5.593007564544678, "learning_rate": 9.970581887517238e-07, "loss": 0.0587, "step": 33300 }, { "epoch": 0.35589507986537744, "grad_norm": 10.723196983337402, "learning_rate": 9.970563686481512e-07, "loss": 0.0706, "step": 33310 }, { "epoch": 0.356001923179657, "grad_norm": 5.2475504875183105, "learning_rate": 9.970545479833647e-07, "loss": 0.0395, "step": 33320 }, { "epoch": 0.35610876649393663, "grad_norm": 4.999516487121582, "learning_rate": 9.970527267573658e-07, "loss": 0.1357, "step": 33330 }, { "epoch": 0.35621560980821626, "grad_norm": 12.623056411743164, "learning_rate": 9.970509049701572e-07, "loss": 0.2022, "step": 33340 }, { "epoch": 0.3563224531224959, "grad_norm": 7.487840175628662, "learning_rate": 9.970490826217403e-07, "loss": 0.2386, "step": 33350 }, { "epoch": 0.35642929643677546, "grad_norm": 0.4123791456222534, "learning_rate": 9.970472597121177e-07, "loss": 0.0778, "step": 33360 }, { "epoch": 0.3565361397510551, "grad_norm": 7.625919818878174, "learning_rate": 9.970454362412911e-07, "loss": 0.0959, "step": 33370 }, { "epoch": 0.3566429830653347, "grad_norm": 12.681042671203613, "learning_rate": 9.970436122092626e-07, "loss": 0.2235, "step": 33380 }, { "epoch": 0.3567498263796143, "grad_norm": 1.0479344129562378, "learning_rate": 9.970417876160346e-07, "loss": 0.1381, "step": 33390 }, { "epoch": 0.3568566696938939, "grad_norm": 10.304030418395996, "learning_rate": 9.970399624616089e-07, "loss": 0.1287, "step": 33400 }, { "epoch": 0.3569635130081735, "grad_norm": 2.7634270191192627, "learning_rate": 9.970381367459874e-07, "loss": 0.0438, "step": 33410 }, { "epoch": 0.3570703563224531, "grad_norm": 0.8857501745223999, "learning_rate": 9.970363104691723e-07, "loss": 0.0601, "step": 33420 }, { "epoch": 0.3571771996367327, "grad_norm": 11.900836944580078, "learning_rate": 9.97034483631166e-07, "loss": 0.1343, "step": 33430 }, { "epoch": 0.35728404295101235, "grad_norm": 5.513725757598877, "learning_rate": 9.9703265623197e-07, "loss": 0.145, "step": 33440 }, { "epoch": 0.357390886265292, "grad_norm": 0.6072643399238586, "learning_rate": 9.970308282715867e-07, "loss": 0.0705, "step": 33450 }, { "epoch": 0.35749772957957154, "grad_norm": 1.5507172346115112, "learning_rate": 9.970289997500178e-07, "loss": 0.0983, "step": 33460 }, { "epoch": 0.35760457289385117, "grad_norm": 6.768437385559082, "learning_rate": 9.97027170667266e-07, "loss": 0.0631, "step": 33470 }, { "epoch": 0.3577114162081308, "grad_norm": 5.167582035064697, "learning_rate": 9.970253410233329e-07, "loss": 0.0558, "step": 33480 }, { "epoch": 0.35781825952241036, "grad_norm": 3.491060495376587, "learning_rate": 9.970235108182206e-07, "loss": 0.0577, "step": 33490 }, { "epoch": 0.35792510283669, "grad_norm": 0.48186200857162476, "learning_rate": 9.970216800519314e-07, "loss": 0.0588, "step": 33500 }, { "epoch": 0.3580319461509696, "grad_norm": 7.825994968414307, "learning_rate": 9.97019848724467e-07, "loss": 0.0716, "step": 33510 }, { "epoch": 0.3581387894652492, "grad_norm": 9.431046485900879, "learning_rate": 9.9701801683583e-07, "loss": 0.0512, "step": 33520 }, { "epoch": 0.3582456327795288, "grad_norm": 3.1578338146209717, "learning_rate": 9.97016184386022e-07, "loss": 0.1096, "step": 33530 }, { "epoch": 0.35835247609380844, "grad_norm": 8.03589153289795, "learning_rate": 9.97014351375045e-07, "loss": 0.1355, "step": 33540 }, { "epoch": 0.35845931940808806, "grad_norm": 18.941492080688477, "learning_rate": 9.970125178029016e-07, "loss": 0.0765, "step": 33550 }, { "epoch": 0.35856616272236763, "grad_norm": 6.686914920806885, "learning_rate": 9.970106836695934e-07, "loss": 0.1034, "step": 33560 }, { "epoch": 0.35867300603664726, "grad_norm": 20.02733039855957, "learning_rate": 9.970088489751226e-07, "loss": 0.0865, "step": 33570 }, { "epoch": 0.3587798493509269, "grad_norm": 6.701009750366211, "learning_rate": 9.970070137194912e-07, "loss": 0.072, "step": 33580 }, { "epoch": 0.35888669266520645, "grad_norm": 4.269547939300537, "learning_rate": 9.970051779027016e-07, "loss": 0.0788, "step": 33590 }, { "epoch": 0.3589935359794861, "grad_norm": 8.399358749389648, "learning_rate": 9.970033415247556e-07, "loss": 0.0771, "step": 33600 }, { "epoch": 0.3591003792937657, "grad_norm": 3.2647714614868164, "learning_rate": 9.970015045856552e-07, "loss": 0.0952, "step": 33610 }, { "epoch": 0.35920722260804533, "grad_norm": 4.584978103637695, "learning_rate": 9.969996670854026e-07, "loss": 0.0646, "step": 33620 }, { "epoch": 0.3593140659223249, "grad_norm": 12.515028953552246, "learning_rate": 9.96997829024e-07, "loss": 0.0735, "step": 33630 }, { "epoch": 0.3594209092366045, "grad_norm": 6.807084560394287, "learning_rate": 9.969959904014494e-07, "loss": 0.1293, "step": 33640 }, { "epoch": 0.35952775255088415, "grad_norm": 1.3565354347229004, "learning_rate": 9.969941512177527e-07, "loss": 0.14, "step": 33650 }, { "epoch": 0.3596345958651637, "grad_norm": 5.807781219482422, "learning_rate": 9.96992311472912e-07, "loss": 0.0947, "step": 33660 }, { "epoch": 0.35974143917944335, "grad_norm": 0.4803210198879242, "learning_rate": 9.969904711669294e-07, "loss": 0.069, "step": 33670 }, { "epoch": 0.35984828249372297, "grad_norm": 9.680197715759277, "learning_rate": 9.969886302998073e-07, "loss": 0.0695, "step": 33680 }, { "epoch": 0.35995512580800254, "grad_norm": 13.80816650390625, "learning_rate": 9.969867888715474e-07, "loss": 0.0996, "step": 33690 }, { "epoch": 0.36006196912228217, "grad_norm": 24.56484603881836, "learning_rate": 9.96984946882152e-07, "loss": 0.1652, "step": 33700 }, { "epoch": 0.3601688124365618, "grad_norm": 5.034705638885498, "learning_rate": 9.96983104331623e-07, "loss": 0.0695, "step": 33710 }, { "epoch": 0.3602756557508414, "grad_norm": 0.9942853450775146, "learning_rate": 9.969812612199626e-07, "loss": 0.0739, "step": 33720 }, { "epoch": 0.360382499065121, "grad_norm": 4.052458763122559, "learning_rate": 9.969794175471728e-07, "loss": 0.0226, "step": 33730 }, { "epoch": 0.3604893423794006, "grad_norm": 0.045172419399023056, "learning_rate": 9.969775733132557e-07, "loss": 0.0745, "step": 33740 }, { "epoch": 0.36059618569368024, "grad_norm": 4.536799907684326, "learning_rate": 9.969757285182135e-07, "loss": 0.0638, "step": 33750 }, { "epoch": 0.3607030290079598, "grad_norm": 0.8980869054794312, "learning_rate": 9.969738831620482e-07, "loss": 0.0643, "step": 33760 }, { "epoch": 0.36080987232223943, "grad_norm": 16.8734188079834, "learning_rate": 9.969720372447619e-07, "loss": 0.1113, "step": 33770 }, { "epoch": 0.36091671563651906, "grad_norm": 4.621104717254639, "learning_rate": 9.969701907663567e-07, "loss": 0.0419, "step": 33780 }, { "epoch": 0.36102355895079863, "grad_norm": 0.5570918321609497, "learning_rate": 9.969683437268343e-07, "loss": 0.0915, "step": 33790 }, { "epoch": 0.36113040226507825, "grad_norm": 0.7990960478782654, "learning_rate": 9.969664961261975e-07, "loss": 0.103, "step": 33800 }, { "epoch": 0.3612372455793579, "grad_norm": 4.526246547698975, "learning_rate": 9.96964647964448e-07, "loss": 0.0519, "step": 33810 }, { "epoch": 0.3613440888936375, "grad_norm": 4.875202655792236, "learning_rate": 9.969627992415878e-07, "loss": 0.079, "step": 33820 }, { "epoch": 0.3614509322079171, "grad_norm": 4.805809020996094, "learning_rate": 9.96960949957619e-07, "loss": 0.2372, "step": 33830 }, { "epoch": 0.3615577755221967, "grad_norm": 5.257619380950928, "learning_rate": 9.96959100112544e-07, "loss": 0.0735, "step": 33840 }, { "epoch": 0.3616646188364763, "grad_norm": 3.3237791061401367, "learning_rate": 9.969572497063645e-07, "loss": 0.071, "step": 33850 }, { "epoch": 0.3617714621507559, "grad_norm": 6.695068836212158, "learning_rate": 9.969553987390828e-07, "loss": 0.1171, "step": 33860 }, { "epoch": 0.3618783054650355, "grad_norm": 8.3712797164917, "learning_rate": 9.969535472107008e-07, "loss": 0.1125, "step": 33870 }, { "epoch": 0.36198514877931515, "grad_norm": 9.217957496643066, "learning_rate": 9.96951695121221e-07, "loss": 0.2115, "step": 33880 }, { "epoch": 0.3620919920935947, "grad_norm": 0.8715286254882812, "learning_rate": 9.96949842470645e-07, "loss": 0.1068, "step": 33890 }, { "epoch": 0.36219883540787434, "grad_norm": 5.6829328536987305, "learning_rate": 9.969479892589753e-07, "loss": 0.151, "step": 33900 }, { "epoch": 0.36230567872215397, "grad_norm": 8.707310676574707, "learning_rate": 9.969461354862137e-07, "loss": 0.1065, "step": 33910 }, { "epoch": 0.3624125220364336, "grad_norm": 4.2483344078063965, "learning_rate": 9.969442811523625e-07, "loss": 0.0837, "step": 33920 }, { "epoch": 0.36251936535071316, "grad_norm": 4.471710205078125, "learning_rate": 9.969424262574238e-07, "loss": 0.1095, "step": 33930 }, { "epoch": 0.3626262086649928, "grad_norm": 3.492003917694092, "learning_rate": 9.969405708013994e-07, "loss": 0.0633, "step": 33940 }, { "epoch": 0.3627330519792724, "grad_norm": 7.8567070960998535, "learning_rate": 9.969387147842915e-07, "loss": 0.0512, "step": 33950 }, { "epoch": 0.362839895293552, "grad_norm": 0.10088519752025604, "learning_rate": 9.969368582061026e-07, "loss": 0.0426, "step": 33960 }, { "epoch": 0.3629467386078316, "grad_norm": 4.960391998291016, "learning_rate": 9.969350010668342e-07, "loss": 0.0681, "step": 33970 }, { "epoch": 0.36305358192211123, "grad_norm": 5.089324951171875, "learning_rate": 9.96933143366489e-07, "loss": 0.0445, "step": 33980 }, { "epoch": 0.36316042523639086, "grad_norm": 7.006871700286865, "learning_rate": 9.969312851050683e-07, "loss": 0.0579, "step": 33990 }, { "epoch": 0.36326726855067043, "grad_norm": 9.576234817504883, "learning_rate": 9.96929426282575e-07, "loss": 0.0917, "step": 34000 }, { "epoch": 0.36337411186495006, "grad_norm": 0.952678382396698, "learning_rate": 9.969275668990109e-07, "loss": 0.0969, "step": 34010 }, { "epoch": 0.3634809551792297, "grad_norm": 10.586201667785645, "learning_rate": 9.969257069543779e-07, "loss": 0.068, "step": 34020 }, { "epoch": 0.36358779849350925, "grad_norm": 2.6262290477752686, "learning_rate": 9.969238464486785e-07, "loss": 0.0778, "step": 34030 }, { "epoch": 0.3636946418077889, "grad_norm": 3.4167563915252686, "learning_rate": 9.969219853819144e-07, "loss": 0.0929, "step": 34040 }, { "epoch": 0.3638014851220685, "grad_norm": 0.911107063293457, "learning_rate": 9.969201237540878e-07, "loss": 0.0429, "step": 34050 }, { "epoch": 0.36390832843634807, "grad_norm": 1.0960724353790283, "learning_rate": 9.96918261565201e-07, "loss": 0.0604, "step": 34060 }, { "epoch": 0.3640151717506277, "grad_norm": 4.9524407386779785, "learning_rate": 9.96916398815256e-07, "loss": 0.0543, "step": 34070 }, { "epoch": 0.3641220150649073, "grad_norm": 5.884570121765137, "learning_rate": 9.969145355042548e-07, "loss": 0.0483, "step": 34080 }, { "epoch": 0.36422885837918695, "grad_norm": 6.949102878570557, "learning_rate": 9.969126716321996e-07, "loss": 0.0827, "step": 34090 }, { "epoch": 0.3643357016934665, "grad_norm": 3.2060985565185547, "learning_rate": 9.969108071990926e-07, "loss": 0.0718, "step": 34100 }, { "epoch": 0.36444254500774614, "grad_norm": 8.9959135055542, "learning_rate": 9.969089422049358e-07, "loss": 0.069, "step": 34110 }, { "epoch": 0.36454938832202577, "grad_norm": 7.567461013793945, "learning_rate": 9.969070766497312e-07, "loss": 0.0789, "step": 34120 }, { "epoch": 0.36465623163630534, "grad_norm": 7.726063251495361, "learning_rate": 9.96905210533481e-07, "loss": 0.0848, "step": 34130 }, { "epoch": 0.36476307495058496, "grad_norm": 4.65573263168335, "learning_rate": 9.969033438561874e-07, "loss": 0.0904, "step": 34140 }, { "epoch": 0.3648699182648646, "grad_norm": 0.5474836230278015, "learning_rate": 9.969014766178523e-07, "loss": 0.0187, "step": 34150 }, { "epoch": 0.36497676157914416, "grad_norm": 11.9646577835083, "learning_rate": 9.96899608818478e-07, "loss": 0.1003, "step": 34160 }, { "epoch": 0.3650836048934238, "grad_norm": 6.991507530212402, "learning_rate": 9.968977404580665e-07, "loss": 0.1495, "step": 34170 }, { "epoch": 0.3651904482077034, "grad_norm": 6.658339023590088, "learning_rate": 9.968958715366198e-07, "loss": 0.0584, "step": 34180 }, { "epoch": 0.36529729152198304, "grad_norm": 7.140768527984619, "learning_rate": 9.968940020541406e-07, "loss": 0.0646, "step": 34190 }, { "epoch": 0.3654041348362626, "grad_norm": 3.1514930725097656, "learning_rate": 9.968921320106302e-07, "loss": 0.0866, "step": 34200 }, { "epoch": 0.36551097815054223, "grad_norm": 1.796670913696289, "learning_rate": 9.968902614060912e-07, "loss": 0.0427, "step": 34210 }, { "epoch": 0.36561782146482186, "grad_norm": 2.8282434940338135, "learning_rate": 9.968883902405256e-07, "loss": 0.0527, "step": 34220 }, { "epoch": 0.3657246647791014, "grad_norm": 8.157999038696289, "learning_rate": 9.968865185139355e-07, "loss": 0.0539, "step": 34230 }, { "epoch": 0.36583150809338105, "grad_norm": 0.0784592404961586, "learning_rate": 9.96884646226323e-07, "loss": 0.0997, "step": 34240 }, { "epoch": 0.3659383514076607, "grad_norm": 5.904616832733154, "learning_rate": 9.9688277337769e-07, "loss": 0.1284, "step": 34250 }, { "epoch": 0.36604519472194025, "grad_norm": 7.655698776245117, "learning_rate": 9.968808999680392e-07, "loss": 0.0332, "step": 34260 }, { "epoch": 0.3661520380362199, "grad_norm": 10.571208000183105, "learning_rate": 9.968790259973724e-07, "loss": 0.1102, "step": 34270 }, { "epoch": 0.3662588813504995, "grad_norm": 0.2439480423927307, "learning_rate": 9.968771514656915e-07, "loss": 0.0454, "step": 34280 }, { "epoch": 0.3663657246647791, "grad_norm": 6.982454776763916, "learning_rate": 9.968752763729988e-07, "loss": 0.0362, "step": 34290 }, { "epoch": 0.3664725679790587, "grad_norm": 0.8911207318305969, "learning_rate": 9.968734007192964e-07, "loss": 0.0707, "step": 34300 }, { "epoch": 0.3665794112933383, "grad_norm": 6.904146671295166, "learning_rate": 9.968715245045864e-07, "loss": 0.088, "step": 34310 }, { "epoch": 0.36668625460761795, "grad_norm": 17.977157592773438, "learning_rate": 9.968696477288708e-07, "loss": 0.074, "step": 34320 }, { "epoch": 0.3667930979218975, "grad_norm": 0.4361738860607147, "learning_rate": 9.968677703921522e-07, "loss": 0.0578, "step": 34330 }, { "epoch": 0.36689994123617714, "grad_norm": 3.691166639328003, "learning_rate": 9.968658924944325e-07, "loss": 0.0379, "step": 34340 }, { "epoch": 0.36700678455045677, "grad_norm": 0.2090682089328766, "learning_rate": 9.968640140357133e-07, "loss": 0.0577, "step": 34350 }, { "epoch": 0.3671136278647364, "grad_norm": 0.328339159488678, "learning_rate": 9.968621350159974e-07, "loss": 0.1077, "step": 34360 }, { "epoch": 0.36722047117901596, "grad_norm": 5.205174446105957, "learning_rate": 9.968602554352865e-07, "loss": 0.0332, "step": 34370 }, { "epoch": 0.3673273144932956, "grad_norm": 12.044384956359863, "learning_rate": 9.96858375293583e-07, "loss": 0.0637, "step": 34380 }, { "epoch": 0.3674341578075752, "grad_norm": 5.541980743408203, "learning_rate": 9.968564945908888e-07, "loss": 0.1602, "step": 34390 }, { "epoch": 0.3675410011218548, "grad_norm": 0.8606353402137756, "learning_rate": 9.96854613327206e-07, "loss": 0.0685, "step": 34400 }, { "epoch": 0.3676478444361344, "grad_norm": 2.41695499420166, "learning_rate": 9.968527315025371e-07, "loss": 0.1013, "step": 34410 }, { "epoch": 0.36775468775041403, "grad_norm": 11.713624954223633, "learning_rate": 9.968508491168836e-07, "loss": 0.0657, "step": 34420 }, { "epoch": 0.3678615310646936, "grad_norm": 0.5855165123939514, "learning_rate": 9.968489661702484e-07, "loss": 0.0622, "step": 34430 }, { "epoch": 0.36796837437897323, "grad_norm": 5.495676517486572, "learning_rate": 9.968470826626329e-07, "loss": 0.0483, "step": 34440 }, { "epoch": 0.36807521769325285, "grad_norm": 9.755167007446289, "learning_rate": 9.968451985940399e-07, "loss": 0.059, "step": 34450 }, { "epoch": 0.3681820610075325, "grad_norm": 2.240387439727783, "learning_rate": 9.96843313964471e-07, "loss": 0.0438, "step": 34460 }, { "epoch": 0.36828890432181205, "grad_norm": 7.828404426574707, "learning_rate": 9.968414287739284e-07, "loss": 0.1155, "step": 34470 }, { "epoch": 0.3683957476360917, "grad_norm": 3.812260150909424, "learning_rate": 9.968395430224143e-07, "loss": 0.0275, "step": 34480 }, { "epoch": 0.3685025909503713, "grad_norm": 1.3514078855514526, "learning_rate": 9.96837656709931e-07, "loss": 0.0743, "step": 34490 }, { "epoch": 0.36860943426465087, "grad_norm": 1.799082636833191, "learning_rate": 9.968357698364804e-07, "loss": 0.0935, "step": 34500 }, { "epoch": 0.3687162775789305, "grad_norm": 6.176942825317383, "learning_rate": 9.968338824020647e-07, "loss": 0.0865, "step": 34510 }, { "epoch": 0.3688231208932101, "grad_norm": 1.8564999103546143, "learning_rate": 9.968319944066863e-07, "loss": 0.0825, "step": 34520 }, { "epoch": 0.3689299642074897, "grad_norm": 4.767177104949951, "learning_rate": 9.968301058503467e-07, "loss": 0.0424, "step": 34530 }, { "epoch": 0.3690368075217693, "grad_norm": 4.967041492462158, "learning_rate": 9.968282167330488e-07, "loss": 0.0766, "step": 34540 }, { "epoch": 0.36914365083604894, "grad_norm": 3.327918291091919, "learning_rate": 9.968263270547942e-07, "loss": 0.1002, "step": 34550 }, { "epoch": 0.36925049415032857, "grad_norm": 8.521563529968262, "learning_rate": 9.96824436815585e-07, "loss": 0.1013, "step": 34560 }, { "epoch": 0.36935733746460814, "grad_norm": 12.58253002166748, "learning_rate": 9.968225460154236e-07, "loss": 0.0635, "step": 34570 }, { "epoch": 0.36946418077888776, "grad_norm": 0.0944720059633255, "learning_rate": 9.96820654654312e-07, "loss": 0.0874, "step": 34580 }, { "epoch": 0.3695710240931674, "grad_norm": 15.125404357910156, "learning_rate": 9.968187627322526e-07, "loss": 0.0711, "step": 34590 }, { "epoch": 0.36967786740744696, "grad_norm": 10.04246711730957, "learning_rate": 9.968168702492472e-07, "loss": 0.0949, "step": 34600 }, { "epoch": 0.3697847107217266, "grad_norm": 8.105138778686523, "learning_rate": 9.96814977205298e-07, "loss": 0.1327, "step": 34610 }, { "epoch": 0.3698915540360062, "grad_norm": 5.993307113647461, "learning_rate": 9.968130836004073e-07, "loss": 0.0456, "step": 34620 }, { "epoch": 0.3699983973502858, "grad_norm": 2.7047669887542725, "learning_rate": 9.96811189434577e-07, "loss": 0.0658, "step": 34630 }, { "epoch": 0.3701052406645654, "grad_norm": 9.249874114990234, "learning_rate": 9.968092947078095e-07, "loss": 0.1394, "step": 34640 }, { "epoch": 0.37021208397884503, "grad_norm": 7.638448715209961, "learning_rate": 9.968073994201067e-07, "loss": 0.0641, "step": 34650 }, { "epoch": 0.37031892729312466, "grad_norm": 0.42763203382492065, "learning_rate": 9.96805503571471e-07, "loss": 0.2034, "step": 34660 }, { "epoch": 0.3704257706074042, "grad_norm": 8.339678764343262, "learning_rate": 9.968036071619044e-07, "loss": 0.158, "step": 34670 }, { "epoch": 0.37053261392168385, "grad_norm": 5.579150676727295, "learning_rate": 9.968017101914089e-07, "loss": 0.0712, "step": 34680 }, { "epoch": 0.3706394572359635, "grad_norm": 8.941954612731934, "learning_rate": 9.967998126599868e-07, "loss": 0.0741, "step": 34690 }, { "epoch": 0.37074630055024305, "grad_norm": 6.162837028503418, "learning_rate": 9.967979145676402e-07, "loss": 0.0755, "step": 34700 }, { "epoch": 0.37085314386452267, "grad_norm": 5.2937703132629395, "learning_rate": 9.967960159143714e-07, "loss": 0.1472, "step": 34710 }, { "epoch": 0.3709599871788023, "grad_norm": 1.2618250846862793, "learning_rate": 9.967941167001822e-07, "loss": 0.0375, "step": 34720 }, { "epoch": 0.3710668304930819, "grad_norm": 10.08207893371582, "learning_rate": 9.96792216925075e-07, "loss": 0.0898, "step": 34730 }, { "epoch": 0.3711736738073615, "grad_norm": 22.19584083557129, "learning_rate": 9.967903165890519e-07, "loss": 0.1046, "step": 34740 }, { "epoch": 0.3712805171216411, "grad_norm": 5.2686333656311035, "learning_rate": 9.967884156921152e-07, "loss": 0.1583, "step": 34750 }, { "epoch": 0.37138736043592074, "grad_norm": 4.794113636016846, "learning_rate": 9.967865142342666e-07, "loss": 0.0836, "step": 34760 }, { "epoch": 0.3714942037502003, "grad_norm": 11.301597595214844, "learning_rate": 9.967846122155087e-07, "loss": 0.0625, "step": 34770 }, { "epoch": 0.37160104706447994, "grad_norm": 6.57139253616333, "learning_rate": 9.967827096358435e-07, "loss": 0.1517, "step": 34780 }, { "epoch": 0.37170789037875956, "grad_norm": 5.504082679748535, "learning_rate": 9.96780806495273e-07, "loss": 0.0659, "step": 34790 }, { "epoch": 0.37181473369303913, "grad_norm": 4.135239601135254, "learning_rate": 9.967789027937995e-07, "loss": 0.0976, "step": 34800 }, { "epoch": 0.37192157700731876, "grad_norm": 9.826181411743164, "learning_rate": 9.967769985314254e-07, "loss": 0.0778, "step": 34810 }, { "epoch": 0.3720284203215984, "grad_norm": 6.476254463195801, "learning_rate": 9.967750937081523e-07, "loss": 0.0924, "step": 34820 }, { "epoch": 0.372135263635878, "grad_norm": 2.22046160697937, "learning_rate": 9.967731883239828e-07, "loss": 0.053, "step": 34830 }, { "epoch": 0.3722421069501576, "grad_norm": 1.85293447971344, "learning_rate": 9.967712823789186e-07, "loss": 0.0928, "step": 34840 }, { "epoch": 0.3723489502644372, "grad_norm": 0.55201256275177, "learning_rate": 9.967693758729622e-07, "loss": 0.126, "step": 34850 }, { "epoch": 0.37245579357871683, "grad_norm": 3.359788417816162, "learning_rate": 9.967674688061158e-07, "loss": 0.0732, "step": 34860 }, { "epoch": 0.3725626368929964, "grad_norm": 7.454131603240967, "learning_rate": 9.967655611783816e-07, "loss": 0.082, "step": 34870 }, { "epoch": 0.372669480207276, "grad_norm": 3.5571486949920654, "learning_rate": 9.967636529897614e-07, "loss": 0.1235, "step": 34880 }, { "epoch": 0.37277632352155565, "grad_norm": 5.593442916870117, "learning_rate": 9.967617442402574e-07, "loss": 0.1461, "step": 34890 }, { "epoch": 0.3728831668358352, "grad_norm": 3.853564739227295, "learning_rate": 9.96759834929872e-07, "loss": 0.092, "step": 34900 }, { "epoch": 0.37299001015011485, "grad_norm": 2.8195364475250244, "learning_rate": 9.967579250586074e-07, "loss": 0.0577, "step": 34910 }, { "epoch": 0.3730968534643945, "grad_norm": 4.47077751159668, "learning_rate": 9.967560146264656e-07, "loss": 0.0358, "step": 34920 }, { "epoch": 0.3732036967786741, "grad_norm": 3.0757598876953125, "learning_rate": 9.967541036334485e-07, "loss": 0.0779, "step": 34930 }, { "epoch": 0.37331054009295367, "grad_norm": 15.871150970458984, "learning_rate": 9.967521920795588e-07, "loss": 0.0922, "step": 34940 }, { "epoch": 0.3734173834072333, "grad_norm": 5.307459354400635, "learning_rate": 9.967502799647981e-07, "loss": 0.1947, "step": 34950 }, { "epoch": 0.3735242267215129, "grad_norm": 6.812588691711426, "learning_rate": 9.967483672891691e-07, "loss": 0.0573, "step": 34960 }, { "epoch": 0.3736310700357925, "grad_norm": 12.031180381774902, "learning_rate": 9.967464540526737e-07, "loss": 0.0771, "step": 34970 }, { "epoch": 0.3737379133500721, "grad_norm": 4.902695655822754, "learning_rate": 9.96744540255314e-07, "loss": 0.0564, "step": 34980 }, { "epoch": 0.37384475666435174, "grad_norm": 4.003803730010986, "learning_rate": 9.96742625897092e-07, "loss": 0.041, "step": 34990 }, { "epoch": 0.3739515999786313, "grad_norm": 6.3780131340026855, "learning_rate": 9.967407109780105e-07, "loss": 0.1045, "step": 35000 }, { "epoch": 0.37405844329291094, "grad_norm": 8.184185981750488, "learning_rate": 9.96738795498071e-07, "loss": 0.0491, "step": 35010 }, { "epoch": 0.37416528660719056, "grad_norm": 6.924594879150391, "learning_rate": 9.96736879457276e-07, "loss": 0.0676, "step": 35020 }, { "epoch": 0.3742721299214702, "grad_norm": 1.8237941265106201, "learning_rate": 9.967349628556275e-07, "loss": 0.0914, "step": 35030 }, { "epoch": 0.37437897323574976, "grad_norm": 18.504648208618164, "learning_rate": 9.967330456931277e-07, "loss": 0.051, "step": 35040 }, { "epoch": 0.3744858165500294, "grad_norm": 5.036975860595703, "learning_rate": 9.967311279697789e-07, "loss": 0.0566, "step": 35050 }, { "epoch": 0.374592659864309, "grad_norm": 9.920068740844727, "learning_rate": 9.96729209685583e-07, "loss": 0.0417, "step": 35060 }, { "epoch": 0.3746995031785886, "grad_norm": 4.503050327301025, "learning_rate": 9.967272908405426e-07, "loss": 0.0506, "step": 35070 }, { "epoch": 0.3748063464928682, "grad_norm": 3.650576114654541, "learning_rate": 9.967253714346596e-07, "loss": 0.0584, "step": 35080 }, { "epoch": 0.37491318980714783, "grad_norm": 2.680004596710205, "learning_rate": 9.96723451467936e-07, "loss": 0.0829, "step": 35090 }, { "epoch": 0.37502003312142745, "grad_norm": 2.9346022605895996, "learning_rate": 9.96721530940374e-07, "loss": 0.1208, "step": 35100 }, { "epoch": 0.375126876435707, "grad_norm": 4.305143356323242, "learning_rate": 9.96719609851976e-07, "loss": 0.0754, "step": 35110 }, { "epoch": 0.37523371974998665, "grad_norm": 4.670695781707764, "learning_rate": 9.967176882027445e-07, "loss": 0.0238, "step": 35120 }, { "epoch": 0.3753405630642663, "grad_norm": 4.522750377655029, "learning_rate": 9.967157659926807e-07, "loss": 0.0644, "step": 35130 }, { "epoch": 0.37544740637854584, "grad_norm": 4.134542942047119, "learning_rate": 9.967138432217874e-07, "loss": 0.0674, "step": 35140 }, { "epoch": 0.37555424969282547, "grad_norm": 4.697578430175781, "learning_rate": 9.96711919890067e-07, "loss": 0.0617, "step": 35150 }, { "epoch": 0.3756610930071051, "grad_norm": 6.7060346603393555, "learning_rate": 9.96709995997521e-07, "loss": 0.0757, "step": 35160 }, { "epoch": 0.37576793632138467, "grad_norm": 7.448645114898682, "learning_rate": 9.96708071544152e-07, "loss": 0.0383, "step": 35170 }, { "epoch": 0.3758747796356643, "grad_norm": 6.055588245391846, "learning_rate": 9.967061465299622e-07, "loss": 0.0451, "step": 35180 }, { "epoch": 0.3759816229499439, "grad_norm": 3.307208776473999, "learning_rate": 9.967042209549536e-07, "loss": 0.0387, "step": 35190 }, { "epoch": 0.37608846626422354, "grad_norm": 2.8391525745391846, "learning_rate": 9.967022948191283e-07, "loss": 0.0248, "step": 35200 }, { "epoch": 0.3761953095785031, "grad_norm": 6.234426975250244, "learning_rate": 9.96700368122489e-07, "loss": 0.0726, "step": 35210 }, { "epoch": 0.37630215289278274, "grad_norm": 38.53215789794922, "learning_rate": 9.966984408650372e-07, "loss": 0.1092, "step": 35220 }, { "epoch": 0.37640899620706236, "grad_norm": 1.787322998046875, "learning_rate": 9.966965130467756e-07, "loss": 0.0757, "step": 35230 }, { "epoch": 0.37651583952134193, "grad_norm": 4.4612579345703125, "learning_rate": 9.966945846677059e-07, "loss": 0.095, "step": 35240 }, { "epoch": 0.37662268283562156, "grad_norm": 7.218130588531494, "learning_rate": 9.966926557278305e-07, "loss": 0.1144, "step": 35250 }, { "epoch": 0.3767295261499012, "grad_norm": 6.4826178550720215, "learning_rate": 9.96690726227152e-07, "loss": 0.1233, "step": 35260 }, { "epoch": 0.37683636946418075, "grad_norm": 15.704365730285645, "learning_rate": 9.966887961656717e-07, "loss": 0.1025, "step": 35270 }, { "epoch": 0.3769432127784604, "grad_norm": 10.957018852233887, "learning_rate": 9.966868655433926e-07, "loss": 0.0922, "step": 35280 }, { "epoch": 0.37705005609274, "grad_norm": 11.466814041137695, "learning_rate": 9.966849343603165e-07, "loss": 0.0738, "step": 35290 }, { "epoch": 0.37715689940701963, "grad_norm": 1.2020844221115112, "learning_rate": 9.966830026164455e-07, "loss": 0.0367, "step": 35300 }, { "epoch": 0.3772637427212992, "grad_norm": 10.788073539733887, "learning_rate": 9.966810703117819e-07, "loss": 0.0947, "step": 35310 }, { "epoch": 0.3773705860355788, "grad_norm": 6.407772541046143, "learning_rate": 9.966791374463279e-07, "loss": 0.0514, "step": 35320 }, { "epoch": 0.37747742934985845, "grad_norm": 10.572036743164062, "learning_rate": 9.966772040200857e-07, "loss": 0.1211, "step": 35330 }, { "epoch": 0.377584272664138, "grad_norm": 1.6764929294586182, "learning_rate": 9.966752700330576e-07, "loss": 0.0838, "step": 35340 }, { "epoch": 0.37769111597841765, "grad_norm": 4.643740653991699, "learning_rate": 9.966733354852456e-07, "loss": 0.0448, "step": 35350 }, { "epoch": 0.37779795929269727, "grad_norm": 0.3672913610935211, "learning_rate": 9.966714003766516e-07, "loss": 0.0665, "step": 35360 }, { "epoch": 0.37790480260697684, "grad_norm": 17.803571701049805, "learning_rate": 9.966694647072783e-07, "loss": 0.0797, "step": 35370 }, { "epoch": 0.37801164592125647, "grad_norm": 7.263293266296387, "learning_rate": 9.966675284771278e-07, "loss": 0.0727, "step": 35380 }, { "epoch": 0.3781184892355361, "grad_norm": 1.6626604795455933, "learning_rate": 9.96665591686202e-07, "loss": 0.0403, "step": 35390 }, { "epoch": 0.3782253325498157, "grad_norm": 1.2743518352508545, "learning_rate": 9.966636543345033e-07, "loss": 0.0294, "step": 35400 }, { "epoch": 0.3783321758640953, "grad_norm": 0.2090955525636673, "learning_rate": 9.96661716422034e-07, "loss": 0.1279, "step": 35410 }, { "epoch": 0.3784390191783749, "grad_norm": 5.128239631652832, "learning_rate": 9.96659777948796e-07, "loss": 0.0735, "step": 35420 }, { "epoch": 0.37854586249265454, "grad_norm": 8.059375762939453, "learning_rate": 9.966578389147916e-07, "loss": 0.0827, "step": 35430 }, { "epoch": 0.3786527058069341, "grad_norm": 0.6545472741127014, "learning_rate": 9.966558993200232e-07, "loss": 0.0739, "step": 35440 }, { "epoch": 0.37875954912121373, "grad_norm": 8.463041305541992, "learning_rate": 9.966539591644926e-07, "loss": 0.0968, "step": 35450 }, { "epoch": 0.37886639243549336, "grad_norm": 14.92458438873291, "learning_rate": 9.966520184482023e-07, "loss": 0.1413, "step": 35460 }, { "epoch": 0.378973235749773, "grad_norm": 5.033496379852295, "learning_rate": 9.966500771711544e-07, "loss": 0.0824, "step": 35470 }, { "epoch": 0.37908007906405256, "grad_norm": 1.9830915927886963, "learning_rate": 9.966481353333508e-07, "loss": 0.036, "step": 35480 }, { "epoch": 0.3791869223783322, "grad_norm": 2.8438894748687744, "learning_rate": 9.966461929347944e-07, "loss": 0.0207, "step": 35490 }, { "epoch": 0.3792937656926118, "grad_norm": 9.602019309997559, "learning_rate": 9.966442499754868e-07, "loss": 0.0804, "step": 35500 }, { "epoch": 0.3794006090068914, "grad_norm": 0.46861788630485535, "learning_rate": 9.966423064554302e-07, "loss": 0.0247, "step": 35510 }, { "epoch": 0.379507452321171, "grad_norm": 7.370344638824463, "learning_rate": 9.966403623746272e-07, "loss": 0.0527, "step": 35520 }, { "epoch": 0.3796142956354506, "grad_norm": 0.2367640733718872, "learning_rate": 9.966384177330795e-07, "loss": 0.0574, "step": 35530 }, { "epoch": 0.3797211389497302, "grad_norm": 4.208661079406738, "learning_rate": 9.966364725307897e-07, "loss": 0.0677, "step": 35540 }, { "epoch": 0.3798279822640098, "grad_norm": 5.919600963592529, "learning_rate": 9.966345267677598e-07, "loss": 0.0569, "step": 35550 }, { "epoch": 0.37993482557828945, "grad_norm": 4.628713130950928, "learning_rate": 9.966325804439922e-07, "loss": 0.0956, "step": 35560 }, { "epoch": 0.3800416688925691, "grad_norm": 2.2890167236328125, "learning_rate": 9.966306335594887e-07, "loss": 0.0836, "step": 35570 }, { "epoch": 0.38014851220684864, "grad_norm": 0.5121745467185974, "learning_rate": 9.966286861142518e-07, "loss": 0.1202, "step": 35580 }, { "epoch": 0.38025535552112827, "grad_norm": 9.11091136932373, "learning_rate": 9.966267381082836e-07, "loss": 0.0701, "step": 35590 }, { "epoch": 0.3803621988354079, "grad_norm": 1.0043786764144897, "learning_rate": 9.966247895415865e-07, "loss": 0.06, "step": 35600 }, { "epoch": 0.38046904214968746, "grad_norm": 7.625442028045654, "learning_rate": 9.966228404141622e-07, "loss": 0.0326, "step": 35610 }, { "epoch": 0.3805758854639671, "grad_norm": 9.520454406738281, "learning_rate": 9.966208907260136e-07, "loss": 0.0632, "step": 35620 }, { "epoch": 0.3806827287782467, "grad_norm": 3.807349920272827, "learning_rate": 9.966189404771423e-07, "loss": 0.0964, "step": 35630 }, { "epoch": 0.3807895720925263, "grad_norm": 17.1983699798584, "learning_rate": 9.96616989667551e-07, "loss": 0.164, "step": 35640 }, { "epoch": 0.3808964154068059, "grad_norm": 3.2558722496032715, "learning_rate": 9.966150382972413e-07, "loss": 0.0761, "step": 35650 }, { "epoch": 0.38100325872108554, "grad_norm": 9.287938117980957, "learning_rate": 9.96613086366216e-07, "loss": 0.0945, "step": 35660 }, { "epoch": 0.38111010203536516, "grad_norm": 0.5073709487915039, "learning_rate": 9.966111338744768e-07, "loss": 0.0328, "step": 35670 }, { "epoch": 0.38121694534964473, "grad_norm": 1.8771631717681885, "learning_rate": 9.966091808220262e-07, "loss": 0.0803, "step": 35680 }, { "epoch": 0.38132378866392436, "grad_norm": 19.984079360961914, "learning_rate": 9.966072272088667e-07, "loss": 0.1094, "step": 35690 }, { "epoch": 0.381430631978204, "grad_norm": 3.793384313583374, "learning_rate": 9.966052730349997e-07, "loss": 0.0431, "step": 35700 }, { "epoch": 0.38153747529248355, "grad_norm": 19.5322208404541, "learning_rate": 9.96603318300428e-07, "loss": 0.0705, "step": 35710 }, { "epoch": 0.3816443186067632, "grad_norm": 5.857026100158691, "learning_rate": 9.966013630051537e-07, "loss": 0.0949, "step": 35720 }, { "epoch": 0.3817511619210428, "grad_norm": 3.2541120052337646, "learning_rate": 9.965994071491792e-07, "loss": 0.1119, "step": 35730 }, { "epoch": 0.3818580052353224, "grad_norm": 1.2860678434371948, "learning_rate": 9.965974507325062e-07, "loss": 0.054, "step": 35740 }, { "epoch": 0.381964848549602, "grad_norm": 3.2124545574188232, "learning_rate": 9.965954937551373e-07, "loss": 0.0574, "step": 35750 }, { "epoch": 0.3820716918638816, "grad_norm": 12.175816535949707, "learning_rate": 9.965935362170746e-07, "loss": 0.0998, "step": 35760 }, { "epoch": 0.38217853517816125, "grad_norm": 6.7687907218933105, "learning_rate": 9.965915781183202e-07, "loss": 0.0513, "step": 35770 }, { "epoch": 0.3822853784924408, "grad_norm": 17.408632278442383, "learning_rate": 9.965896194588768e-07, "loss": 0.0547, "step": 35780 }, { "epoch": 0.38239222180672044, "grad_norm": 8.313252449035645, "learning_rate": 9.96587660238746e-07, "loss": 0.081, "step": 35790 }, { "epoch": 0.38249906512100007, "grad_norm": 3.303103446960449, "learning_rate": 9.9658570045793e-07, "loss": 0.0922, "step": 35800 }, { "epoch": 0.38260590843527964, "grad_norm": 5.443629741668701, "learning_rate": 9.965837401164317e-07, "loss": 0.0265, "step": 35810 }, { "epoch": 0.38271275174955927, "grad_norm": 2.981069326400757, "learning_rate": 9.965817792142525e-07, "loss": 0.1286, "step": 35820 }, { "epoch": 0.3828195950638389, "grad_norm": 3.0738797187805176, "learning_rate": 9.965798177513954e-07, "loss": 0.0887, "step": 35830 }, { "epoch": 0.3829264383781185, "grad_norm": 11.364229202270508, "learning_rate": 9.96577855727862e-07, "loss": 0.1012, "step": 35840 }, { "epoch": 0.3830332816923981, "grad_norm": 17.387577056884766, "learning_rate": 9.965758931436547e-07, "loss": 0.0573, "step": 35850 }, { "epoch": 0.3831401250066777, "grad_norm": 6.72292423248291, "learning_rate": 9.965739299987756e-07, "loss": 0.1086, "step": 35860 }, { "epoch": 0.38324696832095734, "grad_norm": 0.481717050075531, "learning_rate": 9.965719662932272e-07, "loss": 0.1007, "step": 35870 }, { "epoch": 0.3833538116352369, "grad_norm": 2.893275737762451, "learning_rate": 9.965700020270116e-07, "loss": 0.0543, "step": 35880 }, { "epoch": 0.38346065494951653, "grad_norm": 13.44503116607666, "learning_rate": 9.965680372001311e-07, "loss": 0.0507, "step": 35890 }, { "epoch": 0.38356749826379616, "grad_norm": 9.156193733215332, "learning_rate": 9.965660718125877e-07, "loss": 0.0871, "step": 35900 }, { "epoch": 0.38367434157807573, "grad_norm": 4.201329231262207, "learning_rate": 9.965641058643836e-07, "loss": 0.0862, "step": 35910 }, { "epoch": 0.38378118489235535, "grad_norm": 5.639634132385254, "learning_rate": 9.965621393555212e-07, "loss": 0.0517, "step": 35920 }, { "epoch": 0.383888028206635, "grad_norm": 0.09855441749095917, "learning_rate": 9.965601722860029e-07, "loss": 0.0618, "step": 35930 }, { "epoch": 0.3839948715209146, "grad_norm": 3.053396701812744, "learning_rate": 9.965582046558304e-07, "loss": 0.108, "step": 35940 }, { "epoch": 0.3841017148351942, "grad_norm": 13.43065071105957, "learning_rate": 9.965562364650062e-07, "loss": 0.0812, "step": 35950 }, { "epoch": 0.3842085581494738, "grad_norm": 6.596292018890381, "learning_rate": 9.965542677135329e-07, "loss": 0.1054, "step": 35960 }, { "epoch": 0.3843154014637534, "grad_norm": 0.05137518048286438, "learning_rate": 9.96552298401412e-07, "loss": 0.0727, "step": 35970 }, { "epoch": 0.384422244778033, "grad_norm": 9.023834228515625, "learning_rate": 9.965503285286461e-07, "loss": 0.0994, "step": 35980 }, { "epoch": 0.3845290880923126, "grad_norm": 5.38601541519165, "learning_rate": 9.965483580952376e-07, "loss": 0.1027, "step": 35990 }, { "epoch": 0.38463593140659225, "grad_norm": 5.3363518714904785, "learning_rate": 9.965463871011884e-07, "loss": 0.0662, "step": 36000 }, { "epoch": 0.3847427747208718, "grad_norm": 4.252333641052246, "learning_rate": 9.965444155465008e-07, "loss": 0.072, "step": 36010 }, { "epoch": 0.38484961803515144, "grad_norm": 6.211766242980957, "learning_rate": 9.96542443431177e-07, "loss": 0.0692, "step": 36020 }, { "epoch": 0.38495646134943107, "grad_norm": 8.937881469726562, "learning_rate": 9.965404707552196e-07, "loss": 0.0551, "step": 36030 }, { "epoch": 0.3850633046637107, "grad_norm": 12.253077507019043, "learning_rate": 9.965384975186303e-07, "loss": 0.1032, "step": 36040 }, { "epoch": 0.38517014797799026, "grad_norm": 3.25405216217041, "learning_rate": 9.965365237214118e-07, "loss": 0.1306, "step": 36050 }, { "epoch": 0.3852769912922699, "grad_norm": 9.892730712890625, "learning_rate": 9.96534549363566e-07, "loss": 0.0958, "step": 36060 }, { "epoch": 0.3853838346065495, "grad_norm": 6.657432556152344, "learning_rate": 9.96532574445095e-07, "loss": 0.1043, "step": 36070 }, { "epoch": 0.3854906779208291, "grad_norm": 12.074800491333008, "learning_rate": 9.965305989660014e-07, "loss": 0.0625, "step": 36080 }, { "epoch": 0.3855975212351087, "grad_norm": 7.398042678833008, "learning_rate": 9.965286229262872e-07, "loss": 0.1555, "step": 36090 }, { "epoch": 0.38570436454938833, "grad_norm": 11.970807075500488, "learning_rate": 9.96526646325955e-07, "loss": 0.1241, "step": 36100 }, { "epoch": 0.3858112078636679, "grad_norm": 3.098191022872925, "learning_rate": 9.965246691650064e-07, "loss": 0.0775, "step": 36110 }, { "epoch": 0.38591805117794753, "grad_norm": 5.0059003829956055, "learning_rate": 9.965226914434441e-07, "loss": 0.0379, "step": 36120 }, { "epoch": 0.38602489449222716, "grad_norm": 12.98741626739502, "learning_rate": 9.965207131612704e-07, "loss": 0.0651, "step": 36130 }, { "epoch": 0.3861317378065068, "grad_norm": 12.15999984741211, "learning_rate": 9.96518734318487e-07, "loss": 0.0572, "step": 36140 }, { "epoch": 0.38623858112078635, "grad_norm": 3.562178373336792, "learning_rate": 9.965167549150966e-07, "loss": 0.0379, "step": 36150 }, { "epoch": 0.386345424435066, "grad_norm": 1.885649561882019, "learning_rate": 9.965147749511015e-07, "loss": 0.0543, "step": 36160 }, { "epoch": 0.3864522677493456, "grad_norm": 24.076765060424805, "learning_rate": 9.965127944265035e-07, "loss": 0.1006, "step": 36170 }, { "epoch": 0.38655911106362517, "grad_norm": 5.363022804260254, "learning_rate": 9.965108133413052e-07, "loss": 0.0427, "step": 36180 }, { "epoch": 0.3866659543779048, "grad_norm": 1.003836750984192, "learning_rate": 9.965088316955086e-07, "loss": 0.1231, "step": 36190 }, { "epoch": 0.3867727976921844, "grad_norm": 10.22407341003418, "learning_rate": 9.965068494891163e-07, "loss": 0.1182, "step": 36200 }, { "epoch": 0.38687964100646405, "grad_norm": 0.47538915276527405, "learning_rate": 9.9650486672213e-07, "loss": 0.0457, "step": 36210 }, { "epoch": 0.3869864843207436, "grad_norm": 7.92677116394043, "learning_rate": 9.965028833945523e-07, "loss": 0.0902, "step": 36220 }, { "epoch": 0.38709332763502324, "grad_norm": 3.15899395942688, "learning_rate": 9.965008995063854e-07, "loss": 0.1392, "step": 36230 }, { "epoch": 0.38720017094930287, "grad_norm": 21.554882049560547, "learning_rate": 9.964989150576318e-07, "loss": 0.0758, "step": 36240 }, { "epoch": 0.38730701426358244, "grad_norm": 3.0147712230682373, "learning_rate": 9.964969300482932e-07, "loss": 0.0967, "step": 36250 }, { "epoch": 0.38741385757786206, "grad_norm": 1.4746757745742798, "learning_rate": 9.96494944478372e-07, "loss": 0.0275, "step": 36260 }, { "epoch": 0.3875207008921417, "grad_norm": 4.554011821746826, "learning_rate": 9.964929583478707e-07, "loss": 0.0483, "step": 36270 }, { "epoch": 0.38762754420642126, "grad_norm": 7.3199920654296875, "learning_rate": 9.964909716567911e-07, "loss": 0.0757, "step": 36280 }, { "epoch": 0.3877343875207009, "grad_norm": 7.83318567276001, "learning_rate": 9.96488984405136e-07, "loss": 0.122, "step": 36290 }, { "epoch": 0.3878412308349805, "grad_norm": 4.4185333251953125, "learning_rate": 9.964869965929073e-07, "loss": 0.0758, "step": 36300 }, { "epoch": 0.38794807414926014, "grad_norm": 0.09003319591283798, "learning_rate": 9.964850082201074e-07, "loss": 0.049, "step": 36310 }, { "epoch": 0.3880549174635397, "grad_norm": 5.902983665466309, "learning_rate": 9.964830192867383e-07, "loss": 0.0712, "step": 36320 }, { "epoch": 0.38816176077781933, "grad_norm": 2.7814602851867676, "learning_rate": 9.964810297928025e-07, "loss": 0.0523, "step": 36330 }, { "epoch": 0.38826860409209896, "grad_norm": 7.21425724029541, "learning_rate": 9.96479039738302e-07, "loss": 0.0827, "step": 36340 }, { "epoch": 0.3883754474063785, "grad_norm": 0.4623080790042877, "learning_rate": 9.964770491232395e-07, "loss": 0.0461, "step": 36350 }, { "epoch": 0.38848229072065815, "grad_norm": 0.6919568777084351, "learning_rate": 9.964750579476166e-07, "loss": 0.0468, "step": 36360 }, { "epoch": 0.3885891340349378, "grad_norm": 9.604665756225586, "learning_rate": 9.96473066211436e-07, "loss": 0.0756, "step": 36370 }, { "epoch": 0.38869597734921735, "grad_norm": 1.5657105445861816, "learning_rate": 9.964710739146998e-07, "loss": 0.0392, "step": 36380 }, { "epoch": 0.388802820663497, "grad_norm": 13.946240425109863, "learning_rate": 9.964690810574105e-07, "loss": 0.0663, "step": 36390 }, { "epoch": 0.3889096639777766, "grad_norm": 7.584840297698975, "learning_rate": 9.9646708763957e-07, "loss": 0.0589, "step": 36400 }, { "epoch": 0.3890165072920562, "grad_norm": 4.609457015991211, "learning_rate": 9.964650936611806e-07, "loss": 0.0611, "step": 36410 }, { "epoch": 0.3891233506063358, "grad_norm": 4.461284160614014, "learning_rate": 9.964630991222446e-07, "loss": 0.0639, "step": 36420 }, { "epoch": 0.3892301939206154, "grad_norm": 2.466282367706299, "learning_rate": 9.964611040227647e-07, "loss": 0.0846, "step": 36430 }, { "epoch": 0.38933703723489504, "grad_norm": 11.21489143371582, "learning_rate": 9.964591083627423e-07, "loss": 0.0882, "step": 36440 }, { "epoch": 0.3894438805491746, "grad_norm": 3.0766704082489014, "learning_rate": 9.964571121421804e-07, "loss": 0.0992, "step": 36450 }, { "epoch": 0.38955072386345424, "grad_norm": 11.949014663696289, "learning_rate": 9.964551153610806e-07, "loss": 0.0869, "step": 36460 }, { "epoch": 0.38965756717773387, "grad_norm": 5.648654460906982, "learning_rate": 9.964531180194459e-07, "loss": 0.0367, "step": 36470 }, { "epoch": 0.38976441049201344, "grad_norm": 2.859365940093994, "learning_rate": 9.96451120117278e-07, "loss": 0.0525, "step": 36480 }, { "epoch": 0.38987125380629306, "grad_norm": 2.2737324237823486, "learning_rate": 9.964491216545791e-07, "loss": 0.0637, "step": 36490 }, { "epoch": 0.3899780971205727, "grad_norm": 2.518822193145752, "learning_rate": 9.964471226313518e-07, "loss": 0.041, "step": 36500 }, { "epoch": 0.3900849404348523, "grad_norm": 1.0785861015319824, "learning_rate": 9.964451230475984e-07, "loss": 0.0779, "step": 36510 }, { "epoch": 0.3901917837491319, "grad_norm": 2.189044952392578, "learning_rate": 9.964431229033209e-07, "loss": 0.0865, "step": 36520 }, { "epoch": 0.3902986270634115, "grad_norm": 9.806112289428711, "learning_rate": 9.964411221985216e-07, "loss": 0.1406, "step": 36530 }, { "epoch": 0.39040547037769113, "grad_norm": 0.20784959197044373, "learning_rate": 9.964391209332028e-07, "loss": 0.0776, "step": 36540 }, { "epoch": 0.3905123136919707, "grad_norm": 5.260464191436768, "learning_rate": 9.96437119107367e-07, "loss": 0.0792, "step": 36550 }, { "epoch": 0.39061915700625033, "grad_norm": 9.237141609191895, "learning_rate": 9.964351167210158e-07, "loss": 0.0536, "step": 36560 }, { "epoch": 0.39072600032052995, "grad_norm": 16.939104080200195, "learning_rate": 9.964331137741522e-07, "loss": 0.1773, "step": 36570 }, { "epoch": 0.3908328436348096, "grad_norm": 2.0094077587127686, "learning_rate": 9.96431110266778e-07, "loss": 0.1434, "step": 36580 }, { "epoch": 0.39093968694908915, "grad_norm": 4.706939697265625, "learning_rate": 9.964291061988956e-07, "loss": 0.0463, "step": 36590 }, { "epoch": 0.3910465302633688, "grad_norm": 2.996520519256592, "learning_rate": 9.964271015705072e-07, "loss": 0.0808, "step": 36600 }, { "epoch": 0.3911533735776484, "grad_norm": 1.1424195766448975, "learning_rate": 9.964250963816153e-07, "loss": 0.0782, "step": 36610 }, { "epoch": 0.39126021689192797, "grad_norm": 2.3405609130859375, "learning_rate": 9.96423090632222e-07, "loss": 0.0614, "step": 36620 }, { "epoch": 0.3913670602062076, "grad_norm": 7.653176307678223, "learning_rate": 9.964210843223294e-07, "loss": 0.1001, "step": 36630 }, { "epoch": 0.3914739035204872, "grad_norm": 6.637063503265381, "learning_rate": 9.9641907745194e-07, "loss": 0.1475, "step": 36640 }, { "epoch": 0.3915807468347668, "grad_norm": 11.049494743347168, "learning_rate": 9.964170700210564e-07, "loss": 0.0895, "step": 36650 }, { "epoch": 0.3916875901490464, "grad_norm": 7.4770894050598145, "learning_rate": 9.964150620296801e-07, "loss": 0.0369, "step": 36660 }, { "epoch": 0.39179443346332604, "grad_norm": 4.675745964050293, "learning_rate": 9.964130534778137e-07, "loss": 0.1067, "step": 36670 }, { "epoch": 0.39190127677760567, "grad_norm": 5.450390815734863, "learning_rate": 9.964110443654595e-07, "loss": 0.1146, "step": 36680 }, { "epoch": 0.39200812009188524, "grad_norm": 6.391129016876221, "learning_rate": 9.964090346926197e-07, "loss": 0.0967, "step": 36690 }, { "epoch": 0.39211496340616486, "grad_norm": 4.096208095550537, "learning_rate": 9.964070244592968e-07, "loss": 0.0902, "step": 36700 }, { "epoch": 0.3922218067204445, "grad_norm": 0.10245450586080551, "learning_rate": 9.964050136654928e-07, "loss": 0.0764, "step": 36710 }, { "epoch": 0.39232865003472406, "grad_norm": 8.416041374206543, "learning_rate": 9.964030023112103e-07, "loss": 0.1505, "step": 36720 }, { "epoch": 0.3924354933490037, "grad_norm": 4.766955852508545, "learning_rate": 9.96400990396451e-07, "loss": 0.0521, "step": 36730 }, { "epoch": 0.3925423366632833, "grad_norm": 0.8932043313980103, "learning_rate": 9.963989779212177e-07, "loss": 0.0916, "step": 36740 }, { "epoch": 0.3926491799775629, "grad_norm": 5.979391098022461, "learning_rate": 9.963969648855125e-07, "loss": 0.0818, "step": 36750 }, { "epoch": 0.3927560232918425, "grad_norm": 7.1495866775512695, "learning_rate": 9.963949512893377e-07, "loss": 0.1214, "step": 36760 }, { "epoch": 0.39286286660612213, "grad_norm": 2.2626709938049316, "learning_rate": 9.963929371326953e-07, "loss": 0.0471, "step": 36770 }, { "epoch": 0.39296970992040176, "grad_norm": 6.212576866149902, "learning_rate": 9.96390922415588e-07, "loss": 0.0601, "step": 36780 }, { "epoch": 0.3930765532346813, "grad_norm": 4.479438304901123, "learning_rate": 9.963889071380179e-07, "loss": 0.0583, "step": 36790 }, { "epoch": 0.39318339654896095, "grad_norm": 2.4252898693084717, "learning_rate": 9.963868912999872e-07, "loss": 0.0795, "step": 36800 }, { "epoch": 0.3932902398632406, "grad_norm": 5.675975322723389, "learning_rate": 9.963848749014983e-07, "loss": 0.0572, "step": 36810 }, { "epoch": 0.39339708317752015, "grad_norm": 5.114990234375, "learning_rate": 9.963828579425533e-07, "loss": 0.0617, "step": 36820 }, { "epoch": 0.39350392649179977, "grad_norm": 6.551698207855225, "learning_rate": 9.963808404231546e-07, "loss": 0.0886, "step": 36830 }, { "epoch": 0.3936107698060794, "grad_norm": 7.94685697555542, "learning_rate": 9.963788223433045e-07, "loss": 0.0429, "step": 36840 }, { "epoch": 0.39371761312035897, "grad_norm": 5.289630889892578, "learning_rate": 9.963768037030053e-07, "loss": 0.1097, "step": 36850 }, { "epoch": 0.3938244564346386, "grad_norm": 4.041104316711426, "learning_rate": 9.96374784502259e-07, "loss": 0.0533, "step": 36860 }, { "epoch": 0.3939312997489182, "grad_norm": 7.951751708984375, "learning_rate": 9.963727647410685e-07, "loss": 0.1113, "step": 36870 }, { "epoch": 0.39403814306319784, "grad_norm": 1.000449776649475, "learning_rate": 9.963707444194355e-07, "loss": 0.0739, "step": 36880 }, { "epoch": 0.3941449863774774, "grad_norm": 3.4423165321350098, "learning_rate": 9.963687235373626e-07, "loss": 0.041, "step": 36890 }, { "epoch": 0.39425182969175704, "grad_norm": 1.0756397247314453, "learning_rate": 9.963667020948516e-07, "loss": 0.0524, "step": 36900 }, { "epoch": 0.39435867300603666, "grad_norm": 2.565861225128174, "learning_rate": 9.963646800919055e-07, "loss": 0.0328, "step": 36910 }, { "epoch": 0.39446551632031623, "grad_norm": 3.6226601600646973, "learning_rate": 9.96362657528526e-07, "loss": 0.0403, "step": 36920 }, { "epoch": 0.39457235963459586, "grad_norm": 1.145254373550415, "learning_rate": 9.963606344047156e-07, "loss": 0.0801, "step": 36930 }, { "epoch": 0.3946792029488755, "grad_norm": 1.6556400060653687, "learning_rate": 9.963586107204766e-07, "loss": 0.0478, "step": 36940 }, { "epoch": 0.3947860462631551, "grad_norm": 5.023741245269775, "learning_rate": 9.963565864758114e-07, "loss": 0.0618, "step": 36950 }, { "epoch": 0.3948928895774347, "grad_norm": 10.327051162719727, "learning_rate": 9.963545616707221e-07, "loss": 0.1695, "step": 36960 }, { "epoch": 0.3949997328917143, "grad_norm": 9.175734519958496, "learning_rate": 9.963525363052108e-07, "loss": 0.0312, "step": 36970 }, { "epoch": 0.39510657620599393, "grad_norm": 0.37810489535331726, "learning_rate": 9.963505103792801e-07, "loss": 0.054, "step": 36980 }, { "epoch": 0.3952134195202735, "grad_norm": 17.737567901611328, "learning_rate": 9.963484838929325e-07, "loss": 0.0817, "step": 36990 }, { "epoch": 0.3953202628345531, "grad_norm": 6.68107271194458, "learning_rate": 9.963464568461699e-07, "loss": 0.0606, "step": 37000 }, { "epoch": 0.39542710614883275, "grad_norm": 9.168426513671875, "learning_rate": 9.963444292389946e-07, "loss": 0.0497, "step": 37010 }, { "epoch": 0.3955339494631123, "grad_norm": 3.8116791248321533, "learning_rate": 9.96342401071409e-07, "loss": 0.1003, "step": 37020 }, { "epoch": 0.39564079277739195, "grad_norm": 10.704115867614746, "learning_rate": 9.963403723434153e-07, "loss": 0.0931, "step": 37030 }, { "epoch": 0.3957476360916716, "grad_norm": 0.7510799169540405, "learning_rate": 9.963383430550158e-07, "loss": 0.1027, "step": 37040 }, { "epoch": 0.3958544794059512, "grad_norm": 2.9559547901153564, "learning_rate": 9.96336313206213e-07, "loss": 0.0894, "step": 37050 }, { "epoch": 0.39596132272023077, "grad_norm": 1.5663689374923706, "learning_rate": 9.96334282797009e-07, "loss": 0.0582, "step": 37060 }, { "epoch": 0.3960681660345104, "grad_norm": 1.6112432479858398, "learning_rate": 9.963322518274062e-07, "loss": 0.0232, "step": 37070 }, { "epoch": 0.39617500934879, "grad_norm": 8.517353057861328, "learning_rate": 9.963302202974069e-07, "loss": 0.073, "step": 37080 }, { "epoch": 0.3962818526630696, "grad_norm": 1.0479854345321655, "learning_rate": 9.963281882070132e-07, "loss": 0.0384, "step": 37090 }, { "epoch": 0.3963886959773492, "grad_norm": 3.7016639709472656, "learning_rate": 9.963261555562273e-07, "loss": 0.074, "step": 37100 }, { "epoch": 0.39649553929162884, "grad_norm": 2.269474506378174, "learning_rate": 9.96324122345052e-07, "loss": 0.0862, "step": 37110 }, { "epoch": 0.3966023826059084, "grad_norm": 6.231327056884766, "learning_rate": 9.963220885734891e-07, "loss": 0.1878, "step": 37120 }, { "epoch": 0.39670922592018804, "grad_norm": 0.2016013264656067, "learning_rate": 9.963200542415414e-07, "loss": 0.0247, "step": 37130 }, { "epoch": 0.39681606923446766, "grad_norm": 0.48367738723754883, "learning_rate": 9.963180193492106e-07, "loss": 0.081, "step": 37140 }, { "epoch": 0.3969229125487473, "grad_norm": 4.306090354919434, "learning_rate": 9.963159838964994e-07, "loss": 0.0418, "step": 37150 }, { "epoch": 0.39702975586302686, "grad_norm": 9.393610954284668, "learning_rate": 9.963139478834102e-07, "loss": 0.0588, "step": 37160 }, { "epoch": 0.3971365991773065, "grad_norm": 4.704793930053711, "learning_rate": 9.963119113099448e-07, "loss": 0.0528, "step": 37170 }, { "epoch": 0.3972434424915861, "grad_norm": 7.826394557952881, "learning_rate": 9.963098741761061e-07, "loss": 0.0706, "step": 37180 }, { "epoch": 0.3973502858058657, "grad_norm": 2.795032024383545, "learning_rate": 9.963078364818958e-07, "loss": 0.1103, "step": 37190 }, { "epoch": 0.3974571291201453, "grad_norm": 20.511043548583984, "learning_rate": 9.963057982273165e-07, "loss": 0.1407, "step": 37200 }, { "epoch": 0.39756397243442493, "grad_norm": 1.8910795450210571, "learning_rate": 9.963037594123707e-07, "loss": 0.1517, "step": 37210 }, { "epoch": 0.3976708157487045, "grad_norm": 6.994448184967041, "learning_rate": 9.963017200370602e-07, "loss": 0.0842, "step": 37220 }, { "epoch": 0.3977776590629841, "grad_norm": 3.316885232925415, "learning_rate": 9.96299680101388e-07, "loss": 0.0661, "step": 37230 }, { "epoch": 0.39788450237726375, "grad_norm": 0.21350568532943726, "learning_rate": 9.962976396053557e-07, "loss": 0.0855, "step": 37240 }, { "epoch": 0.3979913456915434, "grad_norm": 4.612175941467285, "learning_rate": 9.96295598548966e-07, "loss": 0.0492, "step": 37250 }, { "epoch": 0.39809818900582294, "grad_norm": 5.8477277755737305, "learning_rate": 9.96293556932221e-07, "loss": 0.0694, "step": 37260 }, { "epoch": 0.39820503232010257, "grad_norm": 0.8608704805374146, "learning_rate": 9.962915147551233e-07, "loss": 0.0546, "step": 37270 }, { "epoch": 0.3983118756343822, "grad_norm": 4.3399577140808105, "learning_rate": 9.96289472017675e-07, "loss": 0.0553, "step": 37280 }, { "epoch": 0.39841871894866177, "grad_norm": 2.309677839279175, "learning_rate": 9.962874287198783e-07, "loss": 0.0223, "step": 37290 }, { "epoch": 0.3985255622629414, "grad_norm": 6.619284152984619, "learning_rate": 9.962853848617358e-07, "loss": 0.0882, "step": 37300 }, { "epoch": 0.398632405577221, "grad_norm": 6.454741954803467, "learning_rate": 9.962833404432496e-07, "loss": 0.062, "step": 37310 }, { "epoch": 0.39873924889150064, "grad_norm": 4.663016319274902, "learning_rate": 9.96281295464422e-07, "loss": 0.0585, "step": 37320 }, { "epoch": 0.3988460922057802, "grad_norm": 10.760947227478027, "learning_rate": 9.962792499252553e-07, "loss": 0.0664, "step": 37330 }, { "epoch": 0.39895293552005984, "grad_norm": 11.431370735168457, "learning_rate": 9.962772038257522e-07, "loss": 0.1107, "step": 37340 }, { "epoch": 0.39905977883433946, "grad_norm": 6.64814567565918, "learning_rate": 9.962751571659144e-07, "loss": 0.0723, "step": 37350 }, { "epoch": 0.39916662214861903, "grad_norm": 5.821361064910889, "learning_rate": 9.962731099457443e-07, "loss": 0.093, "step": 37360 }, { "epoch": 0.39927346546289866, "grad_norm": 6.840620040893555, "learning_rate": 9.962710621652446e-07, "loss": 0.1128, "step": 37370 }, { "epoch": 0.3993803087771783, "grad_norm": 0.03253341466188431, "learning_rate": 9.962690138244176e-07, "loss": 0.0504, "step": 37380 }, { "epoch": 0.39948715209145785, "grad_norm": 8.165924072265625, "learning_rate": 9.962669649232652e-07, "loss": 0.0391, "step": 37390 }, { "epoch": 0.3995939954057375, "grad_norm": 8.297797203063965, "learning_rate": 9.9626491546179e-07, "loss": 0.1512, "step": 37400 }, { "epoch": 0.3997008387200171, "grad_norm": 8.09914779663086, "learning_rate": 9.962628654399942e-07, "loss": 0.0473, "step": 37410 }, { "epoch": 0.39980768203429673, "grad_norm": 4.6598052978515625, "learning_rate": 9.962608148578803e-07, "loss": 0.0684, "step": 37420 }, { "epoch": 0.3999145253485763, "grad_norm": 0.08247154206037521, "learning_rate": 9.962587637154505e-07, "loss": 0.0235, "step": 37430 }, { "epoch": 0.4000213686628559, "grad_norm": 3.373666763305664, "learning_rate": 9.96256712012707e-07, "loss": 0.1103, "step": 37440 }, { "epoch": 0.40012821197713555, "grad_norm": 5.366694450378418, "learning_rate": 9.962546597496522e-07, "loss": 0.085, "step": 37450 }, { "epoch": 0.4002350552914151, "grad_norm": 6.5802836418151855, "learning_rate": 9.962526069262885e-07, "loss": 0.0544, "step": 37460 }, { "epoch": 0.40034189860569475, "grad_norm": 5.1228346824646, "learning_rate": 9.962505535426182e-07, "loss": 0.0858, "step": 37470 }, { "epoch": 0.40044874191997437, "grad_norm": 6.859469890594482, "learning_rate": 9.962484995986434e-07, "loss": 0.0675, "step": 37480 }, { "epoch": 0.40055558523425394, "grad_norm": 0.11661524325609207, "learning_rate": 9.962464450943668e-07, "loss": 0.0839, "step": 37490 }, { "epoch": 0.40066242854853357, "grad_norm": 8.874444007873535, "learning_rate": 9.962443900297903e-07, "loss": 0.1521, "step": 37500 }, { "epoch": 0.4007692718628132, "grad_norm": 18.714570999145508, "learning_rate": 9.962423344049166e-07, "loss": 0.1458, "step": 37510 }, { "epoch": 0.4008761151770928, "grad_norm": 4.615023136138916, "learning_rate": 9.962402782197478e-07, "loss": 0.0733, "step": 37520 }, { "epoch": 0.4009829584913724, "grad_norm": 0.20452632009983063, "learning_rate": 9.96238221474286e-07, "loss": 0.0665, "step": 37530 }, { "epoch": 0.401089801805652, "grad_norm": 4.583807945251465, "learning_rate": 9.962361641685343e-07, "loss": 0.0698, "step": 37540 }, { "epoch": 0.40119664511993164, "grad_norm": 4.4028730392456055, "learning_rate": 9.962341063024942e-07, "loss": 0.1005, "step": 37550 }, { "epoch": 0.4013034884342112, "grad_norm": 4.79547119140625, "learning_rate": 9.962320478761685e-07, "loss": 0.1034, "step": 37560 }, { "epoch": 0.40141033174849083, "grad_norm": 5.909955024719238, "learning_rate": 9.962299888895593e-07, "loss": 0.111, "step": 37570 }, { "epoch": 0.40151717506277046, "grad_norm": 0.04251880943775177, "learning_rate": 9.96227929342669e-07, "loss": 0.0196, "step": 37580 }, { "epoch": 0.40162401837705003, "grad_norm": 11.864899635314941, "learning_rate": 9.962258692354997e-07, "loss": 0.1156, "step": 37590 }, { "epoch": 0.40173086169132965, "grad_norm": 0.9289451837539673, "learning_rate": 9.962238085680542e-07, "loss": 0.0779, "step": 37600 }, { "epoch": 0.4018377050056093, "grad_norm": 14.691471099853516, "learning_rate": 9.962217473403346e-07, "loss": 0.1276, "step": 37610 }, { "epoch": 0.4019445483198889, "grad_norm": 0.7212806344032288, "learning_rate": 9.96219685552343e-07, "loss": 0.0447, "step": 37620 }, { "epoch": 0.4020513916341685, "grad_norm": 12.286632537841797, "learning_rate": 9.96217623204082e-07, "loss": 0.0687, "step": 37630 }, { "epoch": 0.4021582349484481, "grad_norm": 11.516887664794922, "learning_rate": 9.96215560295554e-07, "loss": 0.0452, "step": 37640 }, { "epoch": 0.4022650782627277, "grad_norm": 0.8454249501228333, "learning_rate": 9.962134968267608e-07, "loss": 0.059, "step": 37650 }, { "epoch": 0.4023719215770073, "grad_norm": 25.621057510375977, "learning_rate": 9.962114327977054e-07, "loss": 0.0913, "step": 37660 }, { "epoch": 0.4024787648912869, "grad_norm": 12.866533279418945, "learning_rate": 9.962093682083898e-07, "loss": 0.138, "step": 37670 }, { "epoch": 0.40258560820556655, "grad_norm": 6.583380699157715, "learning_rate": 9.962073030588165e-07, "loss": 0.0746, "step": 37680 }, { "epoch": 0.4026924515198462, "grad_norm": 1.8703161478042603, "learning_rate": 9.962052373489875e-07, "loss": 0.0856, "step": 37690 }, { "epoch": 0.40279929483412574, "grad_norm": 3.600663185119629, "learning_rate": 9.962031710789054e-07, "loss": 0.0747, "step": 37700 }, { "epoch": 0.40290613814840537, "grad_norm": 7.534358501434326, "learning_rate": 9.962011042485725e-07, "loss": 0.095, "step": 37710 }, { "epoch": 0.403012981462685, "grad_norm": 7.510565757751465, "learning_rate": 9.961990368579912e-07, "loss": 0.0537, "step": 37720 }, { "epoch": 0.40311982477696456, "grad_norm": 4.141178131103516, "learning_rate": 9.961969689071635e-07, "loss": 0.0606, "step": 37730 }, { "epoch": 0.4032266680912442, "grad_norm": 5.611835479736328, "learning_rate": 9.96194900396092e-07, "loss": 0.0689, "step": 37740 }, { "epoch": 0.4033335114055238, "grad_norm": 6.6650824546813965, "learning_rate": 9.961928313247791e-07, "loss": 0.0992, "step": 37750 }, { "epoch": 0.4034403547198034, "grad_norm": 2.3263962268829346, "learning_rate": 9.961907616932272e-07, "loss": 0.07, "step": 37760 }, { "epoch": 0.403547198034083, "grad_norm": 3.5777745246887207, "learning_rate": 9.961886915014383e-07, "loss": 0.0954, "step": 37770 }, { "epoch": 0.40365404134836264, "grad_norm": 6.980558395385742, "learning_rate": 9.96186620749415e-07, "loss": 0.0681, "step": 37780 }, { "epoch": 0.40376088466264226, "grad_norm": 10.933034896850586, "learning_rate": 9.961845494371595e-07, "loss": 0.132, "step": 37790 }, { "epoch": 0.40386772797692183, "grad_norm": 0.9685969948768616, "learning_rate": 9.96182477564674e-07, "loss": 0.1258, "step": 37800 }, { "epoch": 0.40397457129120146, "grad_norm": 17.0969295501709, "learning_rate": 9.961804051319614e-07, "loss": 0.0737, "step": 37810 }, { "epoch": 0.4040814146054811, "grad_norm": 25.26797866821289, "learning_rate": 9.961783321390236e-07, "loss": 0.0775, "step": 37820 }, { "epoch": 0.40418825791976065, "grad_norm": 11.865228652954102, "learning_rate": 9.961762585858628e-07, "loss": 0.0603, "step": 37830 }, { "epoch": 0.4042951012340403, "grad_norm": 1.9986896514892578, "learning_rate": 9.961741844724818e-07, "loss": 0.1431, "step": 37840 }, { "epoch": 0.4044019445483199, "grad_norm": 5.01453161239624, "learning_rate": 9.961721097988825e-07, "loss": 0.047, "step": 37850 }, { "epoch": 0.4045087878625995, "grad_norm": 3.603078603744507, "learning_rate": 9.961700345650673e-07, "loss": 0.0286, "step": 37860 }, { "epoch": 0.4046156311768791, "grad_norm": 8.226618766784668, "learning_rate": 9.961679587710392e-07, "loss": 0.137, "step": 37870 }, { "epoch": 0.4047224744911587, "grad_norm": 0.15241098403930664, "learning_rate": 9.961658824167995e-07, "loss": 0.0358, "step": 37880 }, { "epoch": 0.40482931780543835, "grad_norm": 7.8076348304748535, "learning_rate": 9.961638055023513e-07, "loss": 0.0689, "step": 37890 }, { "epoch": 0.4049361611197179, "grad_norm": 2.437180757522583, "learning_rate": 9.961617280276969e-07, "loss": 0.0733, "step": 37900 }, { "epoch": 0.40504300443399754, "grad_norm": 4.708544731140137, "learning_rate": 9.961596499928382e-07, "loss": 0.063, "step": 37910 }, { "epoch": 0.40514984774827717, "grad_norm": 3.02947998046875, "learning_rate": 9.96157571397778e-07, "loss": 0.108, "step": 37920 }, { "epoch": 0.40525669106255674, "grad_norm": 11.775115013122559, "learning_rate": 9.961554922425182e-07, "loss": 0.1188, "step": 37930 }, { "epoch": 0.40536353437683637, "grad_norm": 2.215566396713257, "learning_rate": 9.961534125270615e-07, "loss": 0.0318, "step": 37940 }, { "epoch": 0.405470377691116, "grad_norm": 6.427131175994873, "learning_rate": 9.961513322514103e-07, "loss": 0.0784, "step": 37950 }, { "epoch": 0.40557722100539556, "grad_norm": 3.9100868701934814, "learning_rate": 9.961492514155668e-07, "loss": 0.09, "step": 37960 }, { "epoch": 0.4056840643196752, "grad_norm": 0.0828964039683342, "learning_rate": 9.96147170019533e-07, "loss": 0.0202, "step": 37970 }, { "epoch": 0.4057909076339548, "grad_norm": 5.250890254974365, "learning_rate": 9.96145088063312e-07, "loss": 0.0528, "step": 37980 }, { "epoch": 0.40589775094823444, "grad_norm": 3.907400369644165, "learning_rate": 9.961430055469056e-07, "loss": 0.092, "step": 37990 }, { "epoch": 0.406004594262514, "grad_norm": 6.985583782196045, "learning_rate": 9.961409224703162e-07, "loss": 0.0691, "step": 38000 }, { "epoch": 0.40611143757679363, "grad_norm": 0.5393176674842834, "learning_rate": 9.961388388335464e-07, "loss": 0.0266, "step": 38010 }, { "epoch": 0.40621828089107326, "grad_norm": 8.554756164550781, "learning_rate": 9.961367546365982e-07, "loss": 0.1267, "step": 38020 }, { "epoch": 0.40632512420535283, "grad_norm": 1.0208022594451904, "learning_rate": 9.961346698794743e-07, "loss": 0.0844, "step": 38030 }, { "epoch": 0.40643196751963245, "grad_norm": 3.3632726669311523, "learning_rate": 9.961325845621772e-07, "loss": 0.0537, "step": 38040 }, { "epoch": 0.4065388108339121, "grad_norm": 2.46376633644104, "learning_rate": 9.961304986847086e-07, "loss": 0.0912, "step": 38050 }, { "epoch": 0.4066456541481917, "grad_norm": 13.881991386413574, "learning_rate": 9.961284122470713e-07, "loss": 0.0873, "step": 38060 }, { "epoch": 0.4067524974624713, "grad_norm": 4.535536766052246, "learning_rate": 9.961263252492676e-07, "loss": 0.0964, "step": 38070 }, { "epoch": 0.4068593407767509, "grad_norm": 8.077715873718262, "learning_rate": 9.961242376912998e-07, "loss": 0.0659, "step": 38080 }, { "epoch": 0.4069661840910305, "grad_norm": 11.338400840759277, "learning_rate": 9.961221495731701e-07, "loss": 0.0859, "step": 38090 }, { "epoch": 0.4070730274053101, "grad_norm": 2.1458487510681152, "learning_rate": 9.961200608948814e-07, "loss": 0.08, "step": 38100 }, { "epoch": 0.4071798707195897, "grad_norm": 7.512385368347168, "learning_rate": 9.961179716564357e-07, "loss": 0.0934, "step": 38110 }, { "epoch": 0.40728671403386935, "grad_norm": 11.600432395935059, "learning_rate": 9.961158818578351e-07, "loss": 0.0763, "step": 38120 }, { "epoch": 0.4073935573481489, "grad_norm": 4.268868923187256, "learning_rate": 9.961137914990824e-07, "loss": 0.0517, "step": 38130 }, { "epoch": 0.40750040066242854, "grad_norm": 0.06959464401006699, "learning_rate": 9.961117005801798e-07, "loss": 0.0582, "step": 38140 }, { "epoch": 0.40760724397670817, "grad_norm": 1.5979139804840088, "learning_rate": 9.961096091011295e-07, "loss": 0.0549, "step": 38150 }, { "epoch": 0.4077140872909878, "grad_norm": 4.9189252853393555, "learning_rate": 9.961075170619341e-07, "loss": 0.1031, "step": 38160 }, { "epoch": 0.40782093060526736, "grad_norm": 2.5659148693084717, "learning_rate": 9.961054244625959e-07, "loss": 0.1475, "step": 38170 }, { "epoch": 0.407927773919547, "grad_norm": 6.289501667022705, "learning_rate": 9.961033313031173e-07, "loss": 0.0921, "step": 38180 }, { "epoch": 0.4080346172338266, "grad_norm": 0.871651828289032, "learning_rate": 9.961012375835005e-07, "loss": 0.028, "step": 38190 }, { "epoch": 0.4081414605481062, "grad_norm": 9.405370712280273, "learning_rate": 9.96099143303748e-07, "loss": 0.0593, "step": 38200 }, { "epoch": 0.4082483038623858, "grad_norm": 5.225762367248535, "learning_rate": 9.96097048463862e-07, "loss": 0.0567, "step": 38210 }, { "epoch": 0.40835514717666543, "grad_norm": 39.34467697143555, "learning_rate": 9.96094953063845e-07, "loss": 0.1047, "step": 38220 }, { "epoch": 0.408461990490945, "grad_norm": 12.779241561889648, "learning_rate": 9.960928571036996e-07, "loss": 0.0625, "step": 38230 }, { "epoch": 0.40856883380522463, "grad_norm": 6.324641227722168, "learning_rate": 9.960907605834276e-07, "loss": 0.074, "step": 38240 }, { "epoch": 0.40867567711950425, "grad_norm": 0.3788647949695587, "learning_rate": 9.960886635030318e-07, "loss": 0.0309, "step": 38250 }, { "epoch": 0.4087825204337839, "grad_norm": 2.1563961505889893, "learning_rate": 9.960865658625145e-07, "loss": 0.0252, "step": 38260 }, { "epoch": 0.40888936374806345, "grad_norm": 1.3004791736602783, "learning_rate": 9.96084467661878e-07, "loss": 0.0781, "step": 38270 }, { "epoch": 0.4089962070623431, "grad_norm": 11.270027160644531, "learning_rate": 9.960823689011248e-07, "loss": 0.0462, "step": 38280 }, { "epoch": 0.4091030503766227, "grad_norm": 6.626291275024414, "learning_rate": 9.960802695802571e-07, "loss": 0.098, "step": 38290 }, { "epoch": 0.40920989369090227, "grad_norm": 5.307034492492676, "learning_rate": 9.960781696992774e-07, "loss": 0.041, "step": 38300 }, { "epoch": 0.4093167370051819, "grad_norm": 4.976483345031738, "learning_rate": 9.96076069258188e-07, "loss": 0.0782, "step": 38310 }, { "epoch": 0.4094235803194615, "grad_norm": 10.255281448364258, "learning_rate": 9.960739682569912e-07, "loss": 0.0574, "step": 38320 }, { "epoch": 0.4095304236337411, "grad_norm": 7.849445343017578, "learning_rate": 9.960718666956892e-07, "loss": 0.1031, "step": 38330 }, { "epoch": 0.4096372669480207, "grad_norm": 9.043989181518555, "learning_rate": 9.96069764574285e-07, "loss": 0.0673, "step": 38340 }, { "epoch": 0.40974411026230034, "grad_norm": 4.004673004150391, "learning_rate": 9.960676618927805e-07, "loss": 0.0405, "step": 38350 }, { "epoch": 0.40985095357657997, "grad_norm": 2.7668237686157227, "learning_rate": 9.96065558651178e-07, "loss": 0.1138, "step": 38360 }, { "epoch": 0.40995779689085954, "grad_norm": 9.825562477111816, "learning_rate": 9.960634548494803e-07, "loss": 0.076, "step": 38370 }, { "epoch": 0.41006464020513916, "grad_norm": 11.223435401916504, "learning_rate": 9.960613504876895e-07, "loss": 0.0754, "step": 38380 }, { "epoch": 0.4101714835194188, "grad_norm": 5.203284740447998, "learning_rate": 9.960592455658078e-07, "loss": 0.0309, "step": 38390 }, { "epoch": 0.41027832683369836, "grad_norm": 2.555147647857666, "learning_rate": 9.96057140083838e-07, "loss": 0.0905, "step": 38400 }, { "epoch": 0.410385170147978, "grad_norm": 1.6623132228851318, "learning_rate": 9.960550340417823e-07, "loss": 0.028, "step": 38410 }, { "epoch": 0.4104920134622576, "grad_norm": 5.7238616943359375, "learning_rate": 9.960529274396426e-07, "loss": 0.0824, "step": 38420 }, { "epoch": 0.41059885677653724, "grad_norm": 32.97119140625, "learning_rate": 9.960508202774221e-07, "loss": 0.0829, "step": 38430 }, { "epoch": 0.4107057000908168, "grad_norm": 8.052684783935547, "learning_rate": 9.960487125551225e-07, "loss": 0.0599, "step": 38440 }, { "epoch": 0.41081254340509643, "grad_norm": 3.3476459980010986, "learning_rate": 9.960466042727467e-07, "loss": 0.0747, "step": 38450 }, { "epoch": 0.41091938671937606, "grad_norm": 4.024989128112793, "learning_rate": 9.960444954302966e-07, "loss": 0.1043, "step": 38460 }, { "epoch": 0.4110262300336556, "grad_norm": 4.0760064125061035, "learning_rate": 9.960423860277752e-07, "loss": 0.0373, "step": 38470 }, { "epoch": 0.41113307334793525, "grad_norm": 11.491220474243164, "learning_rate": 9.960402760651843e-07, "loss": 0.0966, "step": 38480 }, { "epoch": 0.4112399166622149, "grad_norm": 3.124333620071411, "learning_rate": 9.960381655425264e-07, "loss": 0.0779, "step": 38490 }, { "epoch": 0.41134675997649445, "grad_norm": 0.25512951612472534, "learning_rate": 9.96036054459804e-07, "loss": 0.0638, "step": 38500 }, { "epoch": 0.4114536032907741, "grad_norm": 25.961076736450195, "learning_rate": 9.960339428170196e-07, "loss": 0.064, "step": 38510 }, { "epoch": 0.4115604466050537, "grad_norm": 5.869999408721924, "learning_rate": 9.960318306141753e-07, "loss": 0.0576, "step": 38520 }, { "epoch": 0.4116672899193333, "grad_norm": 10.646655082702637, "learning_rate": 9.960297178512738e-07, "loss": 0.0823, "step": 38530 }, { "epoch": 0.4117741332336129, "grad_norm": 5.810949802398682, "learning_rate": 9.960276045283172e-07, "loss": 0.071, "step": 38540 }, { "epoch": 0.4118809765478925, "grad_norm": 5.610140323638916, "learning_rate": 9.960254906453078e-07, "loss": 0.1101, "step": 38550 }, { "epoch": 0.41198781986217214, "grad_norm": 0.32544663548469543, "learning_rate": 9.960233762022486e-07, "loss": 0.0904, "step": 38560 }, { "epoch": 0.4120946631764517, "grad_norm": 1.523302674293518, "learning_rate": 9.960212611991413e-07, "loss": 0.0689, "step": 38570 }, { "epoch": 0.41220150649073134, "grad_norm": 0.5132659077644348, "learning_rate": 9.960191456359885e-07, "loss": 0.0889, "step": 38580 }, { "epoch": 0.41230834980501097, "grad_norm": 2.9711921215057373, "learning_rate": 9.960170295127928e-07, "loss": 0.0865, "step": 38590 }, { "epoch": 0.41241519311929054, "grad_norm": 0.3104078471660614, "learning_rate": 9.960149128295563e-07, "loss": 0.0447, "step": 38600 }, { "epoch": 0.41252203643357016, "grad_norm": 0.5234014987945557, "learning_rate": 9.960127955862817e-07, "loss": 0.0695, "step": 38610 }, { "epoch": 0.4126288797478498, "grad_norm": 3.059873104095459, "learning_rate": 9.960106777829712e-07, "loss": 0.1255, "step": 38620 }, { "epoch": 0.4127357230621294, "grad_norm": 2.8812153339385986, "learning_rate": 9.96008559419627e-07, "loss": 0.0368, "step": 38630 }, { "epoch": 0.412842566376409, "grad_norm": 3.982086181640625, "learning_rate": 9.960064404962518e-07, "loss": 0.0711, "step": 38640 }, { "epoch": 0.4129494096906886, "grad_norm": 3.791135311126709, "learning_rate": 9.96004321012848e-07, "loss": 0.093, "step": 38650 }, { "epoch": 0.41305625300496823, "grad_norm": 11.460350036621094, "learning_rate": 9.960022009694178e-07, "loss": 0.0597, "step": 38660 }, { "epoch": 0.4131630963192478, "grad_norm": 2.283268690109253, "learning_rate": 9.96000080365964e-07, "loss": 0.0516, "step": 38670 }, { "epoch": 0.41326993963352743, "grad_norm": 37.071102142333984, "learning_rate": 9.95997959202488e-07, "loss": 0.1132, "step": 38680 }, { "epoch": 0.41337678294780705, "grad_norm": 2.0368034839630127, "learning_rate": 9.959958374789934e-07, "loss": 0.0797, "step": 38690 }, { "epoch": 0.4134836262620866, "grad_norm": 4.137094020843506, "learning_rate": 9.95993715195482e-07, "loss": 0.064, "step": 38700 }, { "epoch": 0.41359046957636625, "grad_norm": 21.062788009643555, "learning_rate": 9.95991592351956e-07, "loss": 0.0869, "step": 38710 }, { "epoch": 0.4136973128906459, "grad_norm": 9.558795928955078, "learning_rate": 9.959894689484183e-07, "loss": 0.0315, "step": 38720 }, { "epoch": 0.4138041562049255, "grad_norm": 1.3971573114395142, "learning_rate": 9.95987344984871e-07, "loss": 0.0397, "step": 38730 }, { "epoch": 0.41391099951920507, "grad_norm": 25.498746871948242, "learning_rate": 9.959852204613166e-07, "loss": 0.0678, "step": 38740 }, { "epoch": 0.4140178428334847, "grad_norm": 3.4016988277435303, "learning_rate": 9.959830953777573e-07, "loss": 0.0518, "step": 38750 }, { "epoch": 0.4141246861477643, "grad_norm": 11.52574634552002, "learning_rate": 9.959809697341958e-07, "loss": 0.1041, "step": 38760 }, { "epoch": 0.4142315294620439, "grad_norm": 0.62363600730896, "learning_rate": 9.959788435306343e-07, "loss": 0.0756, "step": 38770 }, { "epoch": 0.4143383727763235, "grad_norm": 13.5717134475708, "learning_rate": 9.959767167670753e-07, "loss": 0.0981, "step": 38780 }, { "epoch": 0.41444521609060314, "grad_norm": 1.636170744895935, "learning_rate": 9.95974589443521e-07, "loss": 0.0694, "step": 38790 }, { "epoch": 0.41455205940488277, "grad_norm": 1.6474109888076782, "learning_rate": 9.959724615599741e-07, "loss": 0.056, "step": 38800 }, { "epoch": 0.41465890271916234, "grad_norm": 2.9932973384857178, "learning_rate": 9.959703331164369e-07, "loss": 0.0492, "step": 38810 }, { "epoch": 0.41476574603344196, "grad_norm": 9.058839797973633, "learning_rate": 9.959682041129115e-07, "loss": 0.1102, "step": 38820 }, { "epoch": 0.4148725893477216, "grad_norm": 4.579658031463623, "learning_rate": 9.959660745494009e-07, "loss": 0.0909, "step": 38830 }, { "epoch": 0.41497943266200116, "grad_norm": 0.5247811675071716, "learning_rate": 9.95963944425907e-07, "loss": 0.0509, "step": 38840 }, { "epoch": 0.4150862759762808, "grad_norm": 5.97403621673584, "learning_rate": 9.959618137424324e-07, "loss": 0.1017, "step": 38850 }, { "epoch": 0.4151931192905604, "grad_norm": 0.1635982096195221, "learning_rate": 9.959596824989794e-07, "loss": 0.0423, "step": 38860 }, { "epoch": 0.41529996260484, "grad_norm": 6.721592426300049, "learning_rate": 9.959575506955507e-07, "loss": 0.0925, "step": 38870 }, { "epoch": 0.4154068059191196, "grad_norm": 5.873764514923096, "learning_rate": 9.959554183321482e-07, "loss": 0.1116, "step": 38880 }, { "epoch": 0.41551364923339923, "grad_norm": 10.353910446166992, "learning_rate": 9.959532854087748e-07, "loss": 0.0661, "step": 38890 }, { "epoch": 0.41562049254767885, "grad_norm": 0.49210894107818604, "learning_rate": 9.959511519254326e-07, "loss": 0.029, "step": 38900 }, { "epoch": 0.4157273358619584, "grad_norm": 1.5880333185195923, "learning_rate": 9.959490178821244e-07, "loss": 0.0887, "step": 38910 }, { "epoch": 0.41583417917623805, "grad_norm": 5.595909595489502, "learning_rate": 9.95946883278852e-07, "loss": 0.0625, "step": 38920 }, { "epoch": 0.4159410224905177, "grad_norm": 7.465338706970215, "learning_rate": 9.959447481156183e-07, "loss": 0.0732, "step": 38930 }, { "epoch": 0.41604786580479725, "grad_norm": 5.495815277099609, "learning_rate": 9.959426123924253e-07, "loss": 0.1068, "step": 38940 }, { "epoch": 0.41615470911907687, "grad_norm": 1.1862281560897827, "learning_rate": 9.95940476109276e-07, "loss": 0.07, "step": 38950 }, { "epoch": 0.4162615524333565, "grad_norm": 10.018896102905273, "learning_rate": 9.959383392661722e-07, "loss": 0.0575, "step": 38960 }, { "epoch": 0.41636839574763607, "grad_norm": 8.100520133972168, "learning_rate": 9.959362018631168e-07, "loss": 0.0719, "step": 38970 }, { "epoch": 0.4164752390619157, "grad_norm": 0.24513749778270721, "learning_rate": 9.95934063900112e-07, "loss": 0.0588, "step": 38980 }, { "epoch": 0.4165820823761953, "grad_norm": 7.231564044952393, "learning_rate": 9.9593192537716e-07, "loss": 0.0886, "step": 38990 }, { "epoch": 0.41668892569047494, "grad_norm": 0.7697303891181946, "learning_rate": 9.959297862942634e-07, "loss": 0.0649, "step": 39000 }, { "epoch": 0.4167957690047545, "grad_norm": 4.031486988067627, "learning_rate": 9.959276466514249e-07, "loss": 0.0469, "step": 39010 }, { "epoch": 0.41690261231903414, "grad_norm": 1.7463065385818481, "learning_rate": 9.959255064486465e-07, "loss": 0.0354, "step": 39020 }, { "epoch": 0.41700945563331376, "grad_norm": 5.385086536407471, "learning_rate": 9.95923365685931e-07, "loss": 0.0451, "step": 39030 }, { "epoch": 0.41711629894759333, "grad_norm": 10.321410179138184, "learning_rate": 9.959212243632803e-07, "loss": 0.0804, "step": 39040 }, { "epoch": 0.41722314226187296, "grad_norm": 0.08797372877597809, "learning_rate": 9.959190824806972e-07, "loss": 0.041, "step": 39050 }, { "epoch": 0.4173299855761526, "grad_norm": 12.714484214782715, "learning_rate": 9.959169400381841e-07, "loss": 0.1002, "step": 39060 }, { "epoch": 0.41743682889043215, "grad_norm": 6.749820709228516, "learning_rate": 9.959147970357434e-07, "loss": 0.0528, "step": 39070 }, { "epoch": 0.4175436722047118, "grad_norm": 9.706427574157715, "learning_rate": 9.959126534733772e-07, "loss": 0.0794, "step": 39080 }, { "epoch": 0.4176505155189914, "grad_norm": 3.9879419803619385, "learning_rate": 9.959105093510883e-07, "loss": 0.0503, "step": 39090 }, { "epoch": 0.41775735883327103, "grad_norm": 2.9431443214416504, "learning_rate": 9.959083646688792e-07, "loss": 0.0657, "step": 39100 }, { "epoch": 0.4178642021475506, "grad_norm": 2.312004327774048, "learning_rate": 9.95906219426752e-07, "loss": 0.0367, "step": 39110 }, { "epoch": 0.4179710454618302, "grad_norm": 7.599615097045898, "learning_rate": 9.959040736247092e-07, "loss": 0.0584, "step": 39120 }, { "epoch": 0.41807788877610985, "grad_norm": 1.0285484790802002, "learning_rate": 9.959019272627534e-07, "loss": 0.0437, "step": 39130 }, { "epoch": 0.4181847320903894, "grad_norm": 7.028924942016602, "learning_rate": 9.958997803408866e-07, "loss": 0.1033, "step": 39140 }, { "epoch": 0.41829157540466905, "grad_norm": 0.17465092241764069, "learning_rate": 9.958976328591118e-07, "loss": 0.1571, "step": 39150 }, { "epoch": 0.4183984187189487, "grad_norm": 9.581758499145508, "learning_rate": 9.95895484817431e-07, "loss": 0.082, "step": 39160 }, { "epoch": 0.4185052620332283, "grad_norm": 1.6641510725021362, "learning_rate": 9.95893336215847e-07, "loss": 0.0734, "step": 39170 }, { "epoch": 0.41861210534750787, "grad_norm": 5.189086437225342, "learning_rate": 9.95891187054362e-07, "loss": 0.1055, "step": 39180 }, { "epoch": 0.4187189486617875, "grad_norm": 8.486808776855469, "learning_rate": 9.95889037332978e-07, "loss": 0.1602, "step": 39190 }, { "epoch": 0.4188257919760671, "grad_norm": 1.0651347637176514, "learning_rate": 9.95886887051698e-07, "loss": 0.0279, "step": 39200 }, { "epoch": 0.4189326352903467, "grad_norm": 4.298303604125977, "learning_rate": 9.958847362105245e-07, "loss": 0.0783, "step": 39210 }, { "epoch": 0.4190394786046263, "grad_norm": 1.9647940397262573, "learning_rate": 9.958825848094596e-07, "loss": 0.0479, "step": 39220 }, { "epoch": 0.41914632191890594, "grad_norm": 0.3926200568675995, "learning_rate": 9.958804328485058e-07, "loss": 0.0842, "step": 39230 }, { "epoch": 0.4192531652331855, "grad_norm": 1.5240901708602905, "learning_rate": 9.958782803276656e-07, "loss": 0.044, "step": 39240 }, { "epoch": 0.41936000854746514, "grad_norm": 15.760418891906738, "learning_rate": 9.958761272469414e-07, "loss": 0.1207, "step": 39250 }, { "epoch": 0.41946685186174476, "grad_norm": 10.3841552734375, "learning_rate": 9.958739736063356e-07, "loss": 0.1072, "step": 39260 }, { "epoch": 0.4195736951760244, "grad_norm": 9.929596900939941, "learning_rate": 9.958718194058506e-07, "loss": 0.0668, "step": 39270 }, { "epoch": 0.41968053849030396, "grad_norm": 12.280391693115234, "learning_rate": 9.958696646454892e-07, "loss": 0.0834, "step": 39280 }, { "epoch": 0.4197873818045836, "grad_norm": 5.293983459472656, "learning_rate": 9.95867509325253e-07, "loss": 0.1125, "step": 39290 }, { "epoch": 0.4198942251188632, "grad_norm": 2.2923598289489746, "learning_rate": 9.958653534451455e-07, "loss": 0.066, "step": 39300 }, { "epoch": 0.4200010684331428, "grad_norm": 0.6518195867538452, "learning_rate": 9.958631970051683e-07, "loss": 0.0333, "step": 39310 }, { "epoch": 0.4201079117474224, "grad_norm": 4.468212604522705, "learning_rate": 9.95861040005324e-07, "loss": 0.1124, "step": 39320 }, { "epoch": 0.420214755061702, "grad_norm": 3.496720552444458, "learning_rate": 9.958588824456153e-07, "loss": 0.0415, "step": 39330 }, { "epoch": 0.4203215983759816, "grad_norm": 6.289754390716553, "learning_rate": 9.958567243260448e-07, "loss": 0.0921, "step": 39340 }, { "epoch": 0.4204284416902612, "grad_norm": 5.843822479248047, "learning_rate": 9.958545656466143e-07, "loss": 0.0211, "step": 39350 }, { "epoch": 0.42053528500454085, "grad_norm": 1.4456145763397217, "learning_rate": 9.958524064073265e-07, "loss": 0.0504, "step": 39360 }, { "epoch": 0.4206421283188205, "grad_norm": 1.1489981412887573, "learning_rate": 9.958502466081843e-07, "loss": 0.0683, "step": 39370 }, { "epoch": 0.42074897163310004, "grad_norm": 11.075671195983887, "learning_rate": 9.958480862491893e-07, "loss": 0.0639, "step": 39380 }, { "epoch": 0.42085581494737967, "grad_norm": 4.065089225769043, "learning_rate": 9.958459253303447e-07, "loss": 0.0754, "step": 39390 }, { "epoch": 0.4209626582616593, "grad_norm": 11.401103973388672, "learning_rate": 9.958437638516523e-07, "loss": 0.1421, "step": 39400 }, { "epoch": 0.42106950157593886, "grad_norm": 0.3024977445602417, "learning_rate": 9.95841601813115e-07, "loss": 0.1446, "step": 39410 }, { "epoch": 0.4211763448902185, "grad_norm": 6.136619567871094, "learning_rate": 9.958394392147354e-07, "loss": 0.0604, "step": 39420 }, { "epoch": 0.4212831882044981, "grad_norm": 30.805082321166992, "learning_rate": 9.958372760565153e-07, "loss": 0.1212, "step": 39430 }, { "epoch": 0.4213900315187777, "grad_norm": 10.939842224121094, "learning_rate": 9.958351123384576e-07, "loss": 0.078, "step": 39440 }, { "epoch": 0.4214968748330573, "grad_norm": 5.624400615692139, "learning_rate": 9.958329480605647e-07, "loss": 0.136, "step": 39450 }, { "epoch": 0.42160371814733694, "grad_norm": 7.6083807945251465, "learning_rate": 9.95830783222839e-07, "loss": 0.0577, "step": 39460 }, { "epoch": 0.42171056146161656, "grad_norm": 4.185024261474609, "learning_rate": 9.958286178252828e-07, "loss": 0.0573, "step": 39470 }, { "epoch": 0.42181740477589613, "grad_norm": 1.6573609113693237, "learning_rate": 9.958264518678987e-07, "loss": 0.0447, "step": 39480 }, { "epoch": 0.42192424809017576, "grad_norm": 5.8433146476745605, "learning_rate": 9.958242853506892e-07, "loss": 0.0545, "step": 39490 }, { "epoch": 0.4220310914044554, "grad_norm": 11.968660354614258, "learning_rate": 9.958221182736567e-07, "loss": 0.0799, "step": 39500 }, { "epoch": 0.42213793471873495, "grad_norm": 5.160298824310303, "learning_rate": 9.958199506368034e-07, "loss": 0.0446, "step": 39510 }, { "epoch": 0.4222447780330146, "grad_norm": 4.210342884063721, "learning_rate": 9.95817782440132e-07, "loss": 0.1647, "step": 39520 }, { "epoch": 0.4223516213472942, "grad_norm": 2.4313771724700928, "learning_rate": 9.95815613683645e-07, "loss": 0.0398, "step": 39530 }, { "epoch": 0.42245846466157383, "grad_norm": 6.634998321533203, "learning_rate": 9.958134443673448e-07, "loss": 0.0988, "step": 39540 }, { "epoch": 0.4225653079758534, "grad_norm": 9.159881591796875, "learning_rate": 9.958112744912337e-07, "loss": 0.0842, "step": 39550 }, { "epoch": 0.422672151290133, "grad_norm": 9.180052757263184, "learning_rate": 9.958091040553143e-07, "loss": 0.1032, "step": 39560 }, { "epoch": 0.42277899460441265, "grad_norm": 0.15696653723716736, "learning_rate": 9.958069330595889e-07, "loss": 0.0302, "step": 39570 }, { "epoch": 0.4228858379186922, "grad_norm": 2.788809061050415, "learning_rate": 9.9580476150406e-07, "loss": 0.049, "step": 39580 }, { "epoch": 0.42299268123297185, "grad_norm": 0.7665315866470337, "learning_rate": 9.958025893887303e-07, "loss": 0.0174, "step": 39590 }, { "epoch": 0.42309952454725147, "grad_norm": 12.049650192260742, "learning_rate": 9.95800416713602e-07, "loss": 0.1383, "step": 39600 }, { "epoch": 0.42320636786153104, "grad_norm": 1.7783904075622559, "learning_rate": 9.957982434786777e-07, "loss": 0.0635, "step": 39610 }, { "epoch": 0.42331321117581067, "grad_norm": 6.352034568786621, "learning_rate": 9.957960696839595e-07, "loss": 0.0955, "step": 39620 }, { "epoch": 0.4234200544900903, "grad_norm": 5.576201438903809, "learning_rate": 9.957938953294505e-07, "loss": 0.0349, "step": 39630 }, { "epoch": 0.4235268978043699, "grad_norm": 9.877531051635742, "learning_rate": 9.957917204151525e-07, "loss": 0.1152, "step": 39640 }, { "epoch": 0.4236337411186495, "grad_norm": 5.397068500518799, "learning_rate": 9.957895449410683e-07, "loss": 0.0589, "step": 39650 }, { "epoch": 0.4237405844329291, "grad_norm": 0.48394814133644104, "learning_rate": 9.957873689072002e-07, "loss": 0.0184, "step": 39660 }, { "epoch": 0.42384742774720874, "grad_norm": 4.600818634033203, "learning_rate": 9.957851923135508e-07, "loss": 0.0505, "step": 39670 }, { "epoch": 0.4239542710614883, "grad_norm": 5.378605842590332, "learning_rate": 9.957830151601224e-07, "loss": 0.1101, "step": 39680 }, { "epoch": 0.42406111437576793, "grad_norm": 1.2728520631790161, "learning_rate": 9.957808374469178e-07, "loss": 0.0452, "step": 39690 }, { "epoch": 0.42416795769004756, "grad_norm": 6.336208343505859, "learning_rate": 9.95778659173939e-07, "loss": 0.0381, "step": 39700 }, { "epoch": 0.42427480100432713, "grad_norm": 2.736097812652588, "learning_rate": 9.957764803411888e-07, "loss": 0.0804, "step": 39710 }, { "epoch": 0.42438164431860675, "grad_norm": 2.5624096393585205, "learning_rate": 9.957743009486695e-07, "loss": 0.0752, "step": 39720 }, { "epoch": 0.4244884876328864, "grad_norm": 14.266327857971191, "learning_rate": 9.957721209963836e-07, "loss": 0.1612, "step": 39730 }, { "epoch": 0.424595330947166, "grad_norm": 1.2394824028015137, "learning_rate": 9.957699404843335e-07, "loss": 0.0142, "step": 39740 }, { "epoch": 0.4247021742614456, "grad_norm": 2.843736171722412, "learning_rate": 9.957677594125219e-07, "loss": 0.0311, "step": 39750 }, { "epoch": 0.4248090175757252, "grad_norm": 9.287449836730957, "learning_rate": 9.957655777809508e-07, "loss": 0.0664, "step": 39760 }, { "epoch": 0.4249158608900048, "grad_norm": 4.1425347328186035, "learning_rate": 9.957633955896232e-07, "loss": 0.0269, "step": 39770 }, { "epoch": 0.4250227042042844, "grad_norm": 4.048210620880127, "learning_rate": 9.95761212838541e-07, "loss": 0.064, "step": 39780 }, { "epoch": 0.425129547518564, "grad_norm": 3.4289982318878174, "learning_rate": 9.957590295277072e-07, "loss": 0.0562, "step": 39790 }, { "epoch": 0.42523639083284365, "grad_norm": 4.744258403778076, "learning_rate": 9.957568456571243e-07, "loss": 0.0459, "step": 39800 }, { "epoch": 0.4253432341471232, "grad_norm": 5.064414978027344, "learning_rate": 9.95754661226794e-07, "loss": 0.0651, "step": 39810 }, { "epoch": 0.42545007746140284, "grad_norm": 6.994448184967041, "learning_rate": 9.957524762367195e-07, "loss": 0.0708, "step": 39820 }, { "epoch": 0.42555692077568247, "grad_norm": 0.5970485806465149, "learning_rate": 9.95750290686903e-07, "loss": 0.0631, "step": 39830 }, { "epoch": 0.4256637640899621, "grad_norm": 7.825284004211426, "learning_rate": 9.95748104577347e-07, "loss": 0.051, "step": 39840 }, { "epoch": 0.42577060740424166, "grad_norm": 7.1167826652526855, "learning_rate": 9.957459179080541e-07, "loss": 0.0987, "step": 39850 }, { "epoch": 0.4258774507185213, "grad_norm": 5.83777379989624, "learning_rate": 9.957437306790266e-07, "loss": 0.0376, "step": 39860 }, { "epoch": 0.4259842940328009, "grad_norm": 7.856752395629883, "learning_rate": 9.95741542890267e-07, "loss": 0.0798, "step": 39870 }, { "epoch": 0.4260911373470805, "grad_norm": 6.327116012573242, "learning_rate": 9.957393545417776e-07, "loss": 0.0687, "step": 39880 }, { "epoch": 0.4261979806613601, "grad_norm": 9.17811393737793, "learning_rate": 9.957371656335612e-07, "loss": 0.0578, "step": 39890 }, { "epoch": 0.42630482397563974, "grad_norm": 9.423510551452637, "learning_rate": 9.957349761656202e-07, "loss": 0.1454, "step": 39900 }, { "epoch": 0.42641166728991936, "grad_norm": 2.406230926513672, "learning_rate": 9.957327861379569e-07, "loss": 0.1015, "step": 39910 }, { "epoch": 0.42651851060419893, "grad_norm": 2.7219889163970947, "learning_rate": 9.95730595550574e-07, "loss": 0.0673, "step": 39920 }, { "epoch": 0.42662535391847856, "grad_norm": 2.7972917556762695, "learning_rate": 9.957284044034735e-07, "loss": 0.0608, "step": 39930 }, { "epoch": 0.4267321972327582, "grad_norm": 4.769553184509277, "learning_rate": 9.957262126966586e-07, "loss": 0.0575, "step": 39940 }, { "epoch": 0.42683904054703775, "grad_norm": 5.348198413848877, "learning_rate": 9.957240204301311e-07, "loss": 0.1103, "step": 39950 }, { "epoch": 0.4269458838613174, "grad_norm": 0.04124445095658302, "learning_rate": 9.95721827603894e-07, "loss": 0.0677, "step": 39960 }, { "epoch": 0.427052727175597, "grad_norm": 6.930832862854004, "learning_rate": 9.957196342179495e-07, "loss": 0.0935, "step": 39970 }, { "epoch": 0.42715957048987657, "grad_norm": 4.952212810516357, "learning_rate": 9.957174402723001e-07, "loss": 0.1076, "step": 39980 }, { "epoch": 0.4272664138041562, "grad_norm": 8.999712944030762, "learning_rate": 9.957152457669483e-07, "loss": 0.0647, "step": 39990 }, { "epoch": 0.4273732571184358, "grad_norm": 2.536430835723877, "learning_rate": 9.957130507018966e-07, "loss": 0.0541, "step": 40000 }, { "epoch": 0.42748010043271545, "grad_norm": 7.983861446380615, "learning_rate": 9.957108550771473e-07, "loss": 0.0928, "step": 40010 }, { "epoch": 0.427586943746995, "grad_norm": 2.3441193103790283, "learning_rate": 9.95708658892703e-07, "loss": 0.0998, "step": 40020 }, { "epoch": 0.42769378706127464, "grad_norm": 4.337894439697266, "learning_rate": 9.957064621485664e-07, "loss": 0.0813, "step": 40030 }, { "epoch": 0.42780063037555427, "grad_norm": 3.490471124649048, "learning_rate": 9.957042648447399e-07, "loss": 0.0401, "step": 40040 }, { "epoch": 0.42790747368983384, "grad_norm": 3.157106399536133, "learning_rate": 9.957020669812256e-07, "loss": 0.0483, "step": 40050 }, { "epoch": 0.42801431700411346, "grad_norm": 6.855078220367432, "learning_rate": 9.956998685580262e-07, "loss": 0.1178, "step": 40060 }, { "epoch": 0.4281211603183931, "grad_norm": 5.016062259674072, "learning_rate": 9.956976695751444e-07, "loss": 0.0709, "step": 40070 }, { "epoch": 0.42822800363267266, "grad_norm": 6.401536464691162, "learning_rate": 9.956954700325825e-07, "loss": 0.0424, "step": 40080 }, { "epoch": 0.4283348469469523, "grad_norm": 3.1257810592651367, "learning_rate": 9.956932699303432e-07, "loss": 0.0667, "step": 40090 }, { "epoch": 0.4284416902612319, "grad_norm": 1.4913318157196045, "learning_rate": 9.956910692684285e-07, "loss": 0.0316, "step": 40100 }, { "epoch": 0.42854853357551154, "grad_norm": 6.302784442901611, "learning_rate": 9.956888680468413e-07, "loss": 0.0849, "step": 40110 }, { "epoch": 0.4286553768897911, "grad_norm": 8.951469421386719, "learning_rate": 9.956866662655838e-07, "loss": 0.0939, "step": 40120 }, { "epoch": 0.42876222020407073, "grad_norm": 6.544116497039795, "learning_rate": 9.956844639246588e-07, "loss": 0.1107, "step": 40130 }, { "epoch": 0.42886906351835036, "grad_norm": 0.5086197853088379, "learning_rate": 9.956822610240686e-07, "loss": 0.0201, "step": 40140 }, { "epoch": 0.4289759068326299, "grad_norm": 0.8987064957618713, "learning_rate": 9.956800575638159e-07, "loss": 0.0641, "step": 40150 }, { "epoch": 0.42908275014690955, "grad_norm": 8.88115119934082, "learning_rate": 9.956778535439028e-07, "loss": 0.0869, "step": 40160 }, { "epoch": 0.4291895934611892, "grad_norm": 9.143919944763184, "learning_rate": 9.956756489643318e-07, "loss": 0.0506, "step": 40170 }, { "epoch": 0.42929643677546875, "grad_norm": 1.43183434009552, "learning_rate": 9.95673443825106e-07, "loss": 0.0267, "step": 40180 }, { "epoch": 0.4294032800897484, "grad_norm": 3.9273693561553955, "learning_rate": 9.95671238126227e-07, "loss": 0.0231, "step": 40190 }, { "epoch": 0.429510123404028, "grad_norm": 0.07608212530612946, "learning_rate": 9.956690318676983e-07, "loss": 0.0372, "step": 40200 }, { "epoch": 0.4296169667183076, "grad_norm": 15.677568435668945, "learning_rate": 9.956668250495215e-07, "loss": 0.0598, "step": 40210 }, { "epoch": 0.4297238100325872, "grad_norm": 2.094557523727417, "learning_rate": 9.956646176716996e-07, "loss": 0.069, "step": 40220 }, { "epoch": 0.4298306533468668, "grad_norm": 0.06544408947229385, "learning_rate": 9.956624097342348e-07, "loss": 0.096, "step": 40230 }, { "epoch": 0.42993749666114645, "grad_norm": 11.67132568359375, "learning_rate": 9.956602012371298e-07, "loss": 0.1441, "step": 40240 }, { "epoch": 0.430044339975426, "grad_norm": 7.523744106292725, "learning_rate": 9.95657992180387e-07, "loss": 0.0934, "step": 40250 }, { "epoch": 0.43015118328970564, "grad_norm": 6.176375865936279, "learning_rate": 9.95655782564009e-07, "loss": 0.0667, "step": 40260 }, { "epoch": 0.43025802660398527, "grad_norm": 5.148463249206543, "learning_rate": 9.95653572387998e-07, "loss": 0.0598, "step": 40270 }, { "epoch": 0.4303648699182649, "grad_norm": 7.400693416595459, "learning_rate": 9.95651361652357e-07, "loss": 0.0349, "step": 40280 }, { "epoch": 0.43047171323254446, "grad_norm": 0.7656101584434509, "learning_rate": 9.956491503570882e-07, "loss": 0.1397, "step": 40290 }, { "epoch": 0.4305785565468241, "grad_norm": 12.0819673538208, "learning_rate": 9.956469385021938e-07, "loss": 0.0638, "step": 40300 }, { "epoch": 0.4306853998611037, "grad_norm": 6.7773966789245605, "learning_rate": 9.956447260876769e-07, "loss": 0.1017, "step": 40310 }, { "epoch": 0.4307922431753833, "grad_norm": 5.89115047454834, "learning_rate": 9.956425131135395e-07, "loss": 0.0676, "step": 40320 }, { "epoch": 0.4308990864896629, "grad_norm": 1.1388416290283203, "learning_rate": 9.956402995797843e-07, "loss": 0.049, "step": 40330 }, { "epoch": 0.43100592980394253, "grad_norm": 3.2248728275299072, "learning_rate": 9.956380854864138e-07, "loss": 0.0998, "step": 40340 }, { "epoch": 0.4311127731182221, "grad_norm": 11.257454872131348, "learning_rate": 9.956358708334304e-07, "loss": 0.0744, "step": 40350 }, { "epoch": 0.43121961643250173, "grad_norm": 5.467590808868408, "learning_rate": 9.956336556208369e-07, "loss": 0.0885, "step": 40360 }, { "epoch": 0.43132645974678135, "grad_norm": 2.171799421310425, "learning_rate": 9.956314398486356e-07, "loss": 0.0824, "step": 40370 }, { "epoch": 0.431433303061061, "grad_norm": 7.89835786819458, "learning_rate": 9.956292235168289e-07, "loss": 0.0692, "step": 40380 }, { "epoch": 0.43154014637534055, "grad_norm": 3.1515655517578125, "learning_rate": 9.956270066254194e-07, "loss": 0.0635, "step": 40390 }, { "epoch": 0.4316469896896202, "grad_norm": 7.309101104736328, "learning_rate": 9.956247891744097e-07, "loss": 0.1265, "step": 40400 }, { "epoch": 0.4317538330038998, "grad_norm": 6.043359279632568, "learning_rate": 9.95622571163802e-07, "loss": 0.0711, "step": 40410 }, { "epoch": 0.43186067631817937, "grad_norm": 4.05972957611084, "learning_rate": 9.956203525935994e-07, "loss": 0.0842, "step": 40420 }, { "epoch": 0.431967519632459, "grad_norm": 10.596019744873047, "learning_rate": 9.956181334638036e-07, "loss": 0.0444, "step": 40430 }, { "epoch": 0.4320743629467386, "grad_norm": 7.785014629364014, "learning_rate": 9.956159137744177e-07, "loss": 0.0724, "step": 40440 }, { "epoch": 0.4321812062610182, "grad_norm": 3.793996572494507, "learning_rate": 9.95613693525444e-07, "loss": 0.0546, "step": 40450 }, { "epoch": 0.4322880495752978, "grad_norm": 9.35112476348877, "learning_rate": 9.95611472716885e-07, "loss": 0.0297, "step": 40460 }, { "epoch": 0.43239489288957744, "grad_norm": 8.63243293762207, "learning_rate": 9.956092513487432e-07, "loss": 0.0611, "step": 40470 }, { "epoch": 0.43250173620385707, "grad_norm": 3.687354803085327, "learning_rate": 9.956070294210216e-07, "loss": 0.0325, "step": 40480 }, { "epoch": 0.43260857951813664, "grad_norm": 0.5146741271018982, "learning_rate": 9.956048069337218e-07, "loss": 0.0382, "step": 40490 }, { "epoch": 0.43271542283241626, "grad_norm": 0.10573692619800568, "learning_rate": 9.956025838868467e-07, "loss": 0.0446, "step": 40500 }, { "epoch": 0.4328222661466959, "grad_norm": 0.10009904950857162, "learning_rate": 9.956003602803991e-07, "loss": 0.1321, "step": 40510 }, { "epoch": 0.43292910946097546, "grad_norm": 10.863147735595703, "learning_rate": 9.955981361143813e-07, "loss": 0.1087, "step": 40520 }, { "epoch": 0.4330359527752551, "grad_norm": 6.383211135864258, "learning_rate": 9.955959113887957e-07, "loss": 0.0462, "step": 40530 }, { "epoch": 0.4331427960895347, "grad_norm": 5.580731391906738, "learning_rate": 9.955936861036449e-07, "loss": 0.0634, "step": 40540 }, { "epoch": 0.4332496394038143, "grad_norm": 5.411102771759033, "learning_rate": 9.955914602589314e-07, "loss": 0.0502, "step": 40550 }, { "epoch": 0.4333564827180939, "grad_norm": 5.365822792053223, "learning_rate": 9.955892338546578e-07, "loss": 0.114, "step": 40560 }, { "epoch": 0.43346332603237353, "grad_norm": 0.17826315760612488, "learning_rate": 9.955870068908266e-07, "loss": 0.0686, "step": 40570 }, { "epoch": 0.43357016934665316, "grad_norm": 1.3784059286117554, "learning_rate": 9.955847793674403e-07, "loss": 0.0305, "step": 40580 }, { "epoch": 0.4336770126609327, "grad_norm": 1.9171133041381836, "learning_rate": 9.955825512845014e-07, "loss": 0.077, "step": 40590 }, { "epoch": 0.43378385597521235, "grad_norm": 3.4486637115478516, "learning_rate": 9.95580322642012e-07, "loss": 0.1831, "step": 40600 }, { "epoch": 0.433890699289492, "grad_norm": 1.9682364463806152, "learning_rate": 9.955780934399755e-07, "loss": 0.0375, "step": 40610 }, { "epoch": 0.43399754260377155, "grad_norm": 4.418386459350586, "learning_rate": 9.955758636783938e-07, "loss": 0.1057, "step": 40620 }, { "epoch": 0.43410438591805117, "grad_norm": 1.052494764328003, "learning_rate": 9.955736333572694e-07, "loss": 0.11, "step": 40630 }, { "epoch": 0.4342112292323308, "grad_norm": 3.4992504119873047, "learning_rate": 9.955714024766052e-07, "loss": 0.0578, "step": 40640 }, { "epoch": 0.4343180725466104, "grad_norm": 3.3607518672943115, "learning_rate": 9.95569171036403e-07, "loss": 0.0793, "step": 40650 }, { "epoch": 0.43442491586089, "grad_norm": 0.38364675641059875, "learning_rate": 9.955669390366662e-07, "loss": 0.068, "step": 40660 }, { "epoch": 0.4345317591751696, "grad_norm": 6.584150314331055, "learning_rate": 9.95564706477397e-07, "loss": 0.0668, "step": 40670 }, { "epoch": 0.43463860248944924, "grad_norm": 0.3692546784877777, "learning_rate": 9.955624733585977e-07, "loss": 0.0749, "step": 40680 }, { "epoch": 0.4347454458037288, "grad_norm": 1.6719437837600708, "learning_rate": 9.95560239680271e-07, "loss": 0.0926, "step": 40690 }, { "epoch": 0.43485228911800844, "grad_norm": 4.365730285644531, "learning_rate": 9.955580054424194e-07, "loss": 0.032, "step": 40700 }, { "epoch": 0.43495913243228806, "grad_norm": 6.957233428955078, "learning_rate": 9.955557706450454e-07, "loss": 0.0709, "step": 40710 }, { "epoch": 0.43506597574656763, "grad_norm": 32.0419807434082, "learning_rate": 9.955535352881515e-07, "loss": 0.0854, "step": 40720 }, { "epoch": 0.43517281906084726, "grad_norm": 2.0936903953552246, "learning_rate": 9.955512993717402e-07, "loss": 0.0684, "step": 40730 }, { "epoch": 0.4352796623751269, "grad_norm": 21.895112991333008, "learning_rate": 9.955490628958141e-07, "loss": 0.0328, "step": 40740 }, { "epoch": 0.4353865056894065, "grad_norm": 0.17256003618240356, "learning_rate": 9.955468258603758e-07, "loss": 0.0737, "step": 40750 }, { "epoch": 0.4354933490036861, "grad_norm": 2.275385856628418, "learning_rate": 9.955445882654277e-07, "loss": 0.0379, "step": 40760 }, { "epoch": 0.4356001923179657, "grad_norm": 2.1059494018554688, "learning_rate": 9.955423501109723e-07, "loss": 0.1426, "step": 40770 }, { "epoch": 0.43570703563224533, "grad_norm": 4.588961601257324, "learning_rate": 9.955401113970121e-07, "loss": 0.075, "step": 40780 }, { "epoch": 0.4358138789465249, "grad_norm": 7.709067344665527, "learning_rate": 9.9553787212355e-07, "loss": 0.0736, "step": 40790 }, { "epoch": 0.4359207222608045, "grad_norm": 2.339542865753174, "learning_rate": 9.955356322905881e-07, "loss": 0.0491, "step": 40800 }, { "epoch": 0.43602756557508415, "grad_norm": 0.6203132271766663, "learning_rate": 9.95533391898129e-07, "loss": 0.0459, "step": 40810 }, { "epoch": 0.4361344088893637, "grad_norm": 10.132434844970703, "learning_rate": 9.955311509461754e-07, "loss": 0.0459, "step": 40820 }, { "epoch": 0.43624125220364335, "grad_norm": 9.70915412902832, "learning_rate": 9.955289094347297e-07, "loss": 0.0507, "step": 40830 }, { "epoch": 0.436348095517923, "grad_norm": 8.156548500061035, "learning_rate": 9.955266673637944e-07, "loss": 0.1092, "step": 40840 }, { "epoch": 0.4364549388322026, "grad_norm": 0.9476695656776428, "learning_rate": 9.955244247333723e-07, "loss": 0.0714, "step": 40850 }, { "epoch": 0.43656178214648217, "grad_norm": 4.735349655151367, "learning_rate": 9.955221815434654e-07, "loss": 0.0357, "step": 40860 }, { "epoch": 0.4366686254607618, "grad_norm": 3.8207147121429443, "learning_rate": 9.95519937794077e-07, "loss": 0.0588, "step": 40870 }, { "epoch": 0.4367754687750414, "grad_norm": 3.4071483612060547, "learning_rate": 9.955176934852088e-07, "loss": 0.0527, "step": 40880 }, { "epoch": 0.436882312089321, "grad_norm": 1.75228750705719, "learning_rate": 9.955154486168638e-07, "loss": 0.0586, "step": 40890 }, { "epoch": 0.4369891554036006, "grad_norm": 3.8220057487487793, "learning_rate": 9.955132031890446e-07, "loss": 0.1757, "step": 40900 }, { "epoch": 0.43709599871788024, "grad_norm": 0.8865406513214111, "learning_rate": 9.955109572017534e-07, "loss": 0.0639, "step": 40910 }, { "epoch": 0.4372028420321598, "grad_norm": 10.789359092712402, "learning_rate": 9.95508710654993e-07, "loss": 0.0361, "step": 40920 }, { "epoch": 0.43730968534643944, "grad_norm": 4.163739204406738, "learning_rate": 9.95506463548766e-07, "loss": 0.0827, "step": 40930 }, { "epoch": 0.43741652866071906, "grad_norm": 0.7417662739753723, "learning_rate": 9.955042158830746e-07, "loss": 0.0592, "step": 40940 }, { "epoch": 0.4375233719749987, "grad_norm": 2.9896163940429688, "learning_rate": 9.955019676579217e-07, "loss": 0.0695, "step": 40950 }, { "epoch": 0.43763021528927826, "grad_norm": 1.7617740631103516, "learning_rate": 9.954997188733097e-07, "loss": 0.0794, "step": 40960 }, { "epoch": 0.4377370586035579, "grad_norm": 2.7628896236419678, "learning_rate": 9.954974695292409e-07, "loss": 0.0488, "step": 40970 }, { "epoch": 0.4378439019178375, "grad_norm": 2.3978865146636963, "learning_rate": 9.954952196257182e-07, "loss": 0.1208, "step": 40980 }, { "epoch": 0.4379507452321171, "grad_norm": 6.371761798858643, "learning_rate": 9.95492969162744e-07, "loss": 0.0455, "step": 40990 }, { "epoch": 0.4380575885463967, "grad_norm": 12.032692909240723, "learning_rate": 9.95490718140321e-07, "loss": 0.0807, "step": 41000 }, { "epoch": 0.43816443186067633, "grad_norm": 2.0655226707458496, "learning_rate": 9.954884665584513e-07, "loss": 0.0765, "step": 41010 }, { "epoch": 0.43827127517495595, "grad_norm": 4.959074020385742, "learning_rate": 9.95486214417138e-07, "loss": 0.0958, "step": 41020 }, { "epoch": 0.4383781184892355, "grad_norm": 3.746699571609497, "learning_rate": 9.954839617163832e-07, "loss": 0.0832, "step": 41030 }, { "epoch": 0.43848496180351515, "grad_norm": 8.00246524810791, "learning_rate": 9.954817084561897e-07, "loss": 0.1081, "step": 41040 }, { "epoch": 0.4385918051177948, "grad_norm": 0.6364697217941284, "learning_rate": 9.954794546365597e-07, "loss": 0.0618, "step": 41050 }, { "epoch": 0.43869864843207435, "grad_norm": 1.2987955808639526, "learning_rate": 9.954772002574962e-07, "loss": 0.0661, "step": 41060 }, { "epoch": 0.43880549174635397, "grad_norm": 8.363035202026367, "learning_rate": 9.954749453190016e-07, "loss": 0.0378, "step": 41070 }, { "epoch": 0.4389123350606336, "grad_norm": 4.6007161140441895, "learning_rate": 9.954726898210783e-07, "loss": 0.0253, "step": 41080 }, { "epoch": 0.43901917837491317, "grad_norm": 0.23094289004802704, "learning_rate": 9.95470433763729e-07, "loss": 0.0938, "step": 41090 }, { "epoch": 0.4391260216891928, "grad_norm": 3.5805726051330566, "learning_rate": 9.95468177146956e-07, "loss": 0.0585, "step": 41100 }, { "epoch": 0.4392328650034724, "grad_norm": 1.4897574186325073, "learning_rate": 9.954659199707624e-07, "loss": 0.0601, "step": 41110 }, { "epoch": 0.43933970831775204, "grad_norm": 2.8077423572540283, "learning_rate": 9.954636622351503e-07, "loss": 0.0545, "step": 41120 }, { "epoch": 0.4394465516320316, "grad_norm": 8.541966438293457, "learning_rate": 9.954614039401222e-07, "loss": 0.081, "step": 41130 }, { "epoch": 0.43955339494631124, "grad_norm": 4.080243110656738, "learning_rate": 9.954591450856807e-07, "loss": 0.0442, "step": 41140 }, { "epoch": 0.43966023826059086, "grad_norm": 0.44023972749710083, "learning_rate": 9.954568856718284e-07, "loss": 0.0627, "step": 41150 }, { "epoch": 0.43976708157487043, "grad_norm": 3.3049745559692383, "learning_rate": 9.95454625698568e-07, "loss": 0.1224, "step": 41160 }, { "epoch": 0.43987392488915006, "grad_norm": 4.082745552062988, "learning_rate": 9.954523651659021e-07, "loss": 0.0659, "step": 41170 }, { "epoch": 0.4399807682034297, "grad_norm": 5.244743824005127, "learning_rate": 9.954501040738328e-07, "loss": 0.0958, "step": 41180 }, { "epoch": 0.44008761151770925, "grad_norm": 12.98624324798584, "learning_rate": 9.954478424223632e-07, "loss": 0.0934, "step": 41190 }, { "epoch": 0.4401944548319889, "grad_norm": 1.4901314973831177, "learning_rate": 9.954455802114954e-07, "loss": 0.116, "step": 41200 }, { "epoch": 0.4403012981462685, "grad_norm": 1.1124027967453003, "learning_rate": 9.954433174412323e-07, "loss": 0.0326, "step": 41210 }, { "epoch": 0.44040814146054813, "grad_norm": 6.723342418670654, "learning_rate": 9.954410541115761e-07, "loss": 0.1459, "step": 41220 }, { "epoch": 0.4405149847748277, "grad_norm": 2.4804656505584717, "learning_rate": 9.954387902225297e-07, "loss": 0.0295, "step": 41230 }, { "epoch": 0.4406218280891073, "grad_norm": 3.487473249435425, "learning_rate": 9.954365257740952e-07, "loss": 0.0678, "step": 41240 }, { "epoch": 0.44072867140338695, "grad_norm": 6.304961681365967, "learning_rate": 9.95434260766276e-07, "loss": 0.0487, "step": 41250 }, { "epoch": 0.4408355147176665, "grad_norm": 0.07665221393108368, "learning_rate": 9.954319951990737e-07, "loss": 0.0913, "step": 41260 }, { "epoch": 0.44094235803194615, "grad_norm": 6.290826320648193, "learning_rate": 9.954297290724916e-07, "loss": 0.0437, "step": 41270 }, { "epoch": 0.44104920134622577, "grad_norm": 0.5268715620040894, "learning_rate": 9.954274623865318e-07, "loss": 0.0274, "step": 41280 }, { "epoch": 0.44115604466050534, "grad_norm": 0.09096083790063858, "learning_rate": 9.954251951411968e-07, "loss": 0.07, "step": 41290 }, { "epoch": 0.44126288797478497, "grad_norm": 8.227689743041992, "learning_rate": 9.954229273364896e-07, "loss": 0.0352, "step": 41300 }, { "epoch": 0.4413697312890646, "grad_norm": 4.5962324142456055, "learning_rate": 9.954206589724125e-07, "loss": 0.0326, "step": 41310 }, { "epoch": 0.4414765746033442, "grad_norm": 2.392143487930298, "learning_rate": 9.95418390048968e-07, "loss": 0.0507, "step": 41320 }, { "epoch": 0.4415834179176238, "grad_norm": 6.688662052154541, "learning_rate": 9.95416120566159e-07, "loss": 0.0807, "step": 41330 }, { "epoch": 0.4416902612319034, "grad_norm": 8.689175605773926, "learning_rate": 9.954138505239876e-07, "loss": 0.1018, "step": 41340 }, { "epoch": 0.44179710454618304, "grad_norm": 5.2861328125, "learning_rate": 9.954115799224566e-07, "loss": 0.1134, "step": 41350 }, { "epoch": 0.4419039478604626, "grad_norm": 0.3700202405452728, "learning_rate": 9.954093087615685e-07, "loss": 0.0571, "step": 41360 }, { "epoch": 0.44201079117474223, "grad_norm": 7.720363140106201, "learning_rate": 9.95407037041326e-07, "loss": 0.0446, "step": 41370 }, { "epoch": 0.44211763448902186, "grad_norm": 10.508763313293457, "learning_rate": 9.954047647617314e-07, "loss": 0.0557, "step": 41380 }, { "epoch": 0.4422244778033015, "grad_norm": 8.454058647155762, "learning_rate": 9.954024919227876e-07, "loss": 0.0672, "step": 41390 }, { "epoch": 0.44233132111758106, "grad_norm": 0.5865356922149658, "learning_rate": 9.95400218524497e-07, "loss": 0.0315, "step": 41400 }, { "epoch": 0.4424381644318607, "grad_norm": 1.3986427783966064, "learning_rate": 9.953979445668622e-07, "loss": 0.0452, "step": 41410 }, { "epoch": 0.4425450077461403, "grad_norm": 0.19741475582122803, "learning_rate": 9.953956700498854e-07, "loss": 0.0798, "step": 41420 }, { "epoch": 0.4426518510604199, "grad_norm": 1.6542631387710571, "learning_rate": 9.953933949735697e-07, "loss": 0.0233, "step": 41430 }, { "epoch": 0.4427586943746995, "grad_norm": 5.159347057342529, "learning_rate": 9.953911193379176e-07, "loss": 0.047, "step": 41440 }, { "epoch": 0.4428655376889791, "grad_norm": 6.352680206298828, "learning_rate": 9.953888431429314e-07, "loss": 0.1362, "step": 41450 }, { "epoch": 0.4429723810032587, "grad_norm": 7.785636901855469, "learning_rate": 9.953865663886139e-07, "loss": 0.093, "step": 41460 }, { "epoch": 0.4430792243175383, "grad_norm": 3.5101070404052734, "learning_rate": 9.953842890749676e-07, "loss": 0.0491, "step": 41470 }, { "epoch": 0.44318606763181795, "grad_norm": 13.029703140258789, "learning_rate": 9.953820112019948e-07, "loss": 0.0621, "step": 41480 }, { "epoch": 0.4432929109460976, "grad_norm": 0.7301111817359924, "learning_rate": 9.953797327696984e-07, "loss": 0.0685, "step": 41490 }, { "epoch": 0.44339975426037714, "grad_norm": 0.20550452172756195, "learning_rate": 9.95377453778081e-07, "loss": 0.0623, "step": 41500 }, { "epoch": 0.44350659757465677, "grad_norm": 5.0659613609313965, "learning_rate": 9.953751742271452e-07, "loss": 0.0933, "step": 41510 }, { "epoch": 0.4436134408889364, "grad_norm": 14.381269454956055, "learning_rate": 9.953728941168933e-07, "loss": 0.1417, "step": 41520 }, { "epoch": 0.44372028420321596, "grad_norm": 7.419436931610107, "learning_rate": 9.953706134473279e-07, "loss": 0.1, "step": 41530 }, { "epoch": 0.4438271275174956, "grad_norm": 2.83941912651062, "learning_rate": 9.953683322184518e-07, "loss": 0.0618, "step": 41540 }, { "epoch": 0.4439339708317752, "grad_norm": 8.518295288085938, "learning_rate": 9.953660504302674e-07, "loss": 0.1392, "step": 41550 }, { "epoch": 0.4440408141460548, "grad_norm": 1.5805652141571045, "learning_rate": 9.953637680827775e-07, "loss": 0.0502, "step": 41560 }, { "epoch": 0.4441476574603344, "grad_norm": 11.100358963012695, "learning_rate": 9.953614851759844e-07, "loss": 0.0907, "step": 41570 }, { "epoch": 0.44425450077461404, "grad_norm": 2.939805746078491, "learning_rate": 9.953592017098908e-07, "loss": 0.0728, "step": 41580 }, { "epoch": 0.44436134408889366, "grad_norm": 3.4135079383850098, "learning_rate": 9.953569176844993e-07, "loss": 0.0459, "step": 41590 }, { "epoch": 0.44446818740317323, "grad_norm": 9.400092124938965, "learning_rate": 9.953546330998124e-07, "loss": 0.054, "step": 41600 }, { "epoch": 0.44457503071745286, "grad_norm": 3.9585933685302734, "learning_rate": 9.953523479558327e-07, "loss": 0.0399, "step": 41610 }, { "epoch": 0.4446818740317325, "grad_norm": 13.53377628326416, "learning_rate": 9.95350062252563e-07, "loss": 0.0889, "step": 41620 }, { "epoch": 0.44478871734601205, "grad_norm": 5.597105026245117, "learning_rate": 9.953477759900055e-07, "loss": 0.0485, "step": 41630 }, { "epoch": 0.4448955606602917, "grad_norm": 14.516762733459473, "learning_rate": 9.953454891681631e-07, "loss": 0.0658, "step": 41640 }, { "epoch": 0.4450024039745713, "grad_norm": 7.9283857345581055, "learning_rate": 9.953432017870382e-07, "loss": 0.0845, "step": 41650 }, { "epoch": 0.4451092472888509, "grad_norm": 7.544015884399414, "learning_rate": 9.953409138466335e-07, "loss": 0.0662, "step": 41660 }, { "epoch": 0.4452160906031305, "grad_norm": 9.851947784423828, "learning_rate": 9.953386253469515e-07, "loss": 0.0649, "step": 41670 }, { "epoch": 0.4453229339174101, "grad_norm": 2.9883644580841064, "learning_rate": 9.953363362879947e-07, "loss": 0.0568, "step": 41680 }, { "epoch": 0.44542977723168975, "grad_norm": 0.6816707253456116, "learning_rate": 9.95334046669766e-07, "loss": 0.1301, "step": 41690 }, { "epoch": 0.4455366205459693, "grad_norm": 4.051411151885986, "learning_rate": 9.953317564922676e-07, "loss": 0.0481, "step": 41700 }, { "epoch": 0.44564346386024895, "grad_norm": 2.7995762825012207, "learning_rate": 9.953294657555022e-07, "loss": 0.0627, "step": 41710 }, { "epoch": 0.44575030717452857, "grad_norm": 2.546060085296631, "learning_rate": 9.953271744594727e-07, "loss": 0.0401, "step": 41720 }, { "epoch": 0.44585715048880814, "grad_norm": 7.084181785583496, "learning_rate": 9.953248826041813e-07, "loss": 0.0577, "step": 41730 }, { "epoch": 0.44596399380308777, "grad_norm": 0.6892095804214478, "learning_rate": 9.953225901896308e-07, "loss": 0.0377, "step": 41740 }, { "epoch": 0.4460708371173674, "grad_norm": 3.2239444255828857, "learning_rate": 9.953202972158236e-07, "loss": 0.0414, "step": 41750 }, { "epoch": 0.446177680431647, "grad_norm": 10.700425148010254, "learning_rate": 9.953180036827623e-07, "loss": 0.0551, "step": 41760 }, { "epoch": 0.4462845237459266, "grad_norm": 5.458123207092285, "learning_rate": 9.953157095904499e-07, "loss": 0.092, "step": 41770 }, { "epoch": 0.4463913670602062, "grad_norm": 6.797430992126465, "learning_rate": 9.953134149388884e-07, "loss": 0.0941, "step": 41780 }, { "epoch": 0.44649821037448584, "grad_norm": 0.3001190423965454, "learning_rate": 9.953111197280807e-07, "loss": 0.0722, "step": 41790 }, { "epoch": 0.4466050536887654, "grad_norm": 4.052976131439209, "learning_rate": 9.953088239580296e-07, "loss": 0.0336, "step": 41800 }, { "epoch": 0.44671189700304503, "grad_norm": 0.20265261828899384, "learning_rate": 9.953065276287374e-07, "loss": 0.0262, "step": 41810 }, { "epoch": 0.44681874031732466, "grad_norm": 4.045266628265381, "learning_rate": 9.953042307402065e-07, "loss": 0.0679, "step": 41820 }, { "epoch": 0.44692558363160423, "grad_norm": 1.8618226051330566, "learning_rate": 9.9530193329244e-07, "loss": 0.037, "step": 41830 }, { "epoch": 0.44703242694588385, "grad_norm": 1.8801071643829346, "learning_rate": 9.9529963528544e-07, "loss": 0.1042, "step": 41840 }, { "epoch": 0.4471392702601635, "grad_norm": 7.765472412109375, "learning_rate": 9.952973367192094e-07, "loss": 0.0502, "step": 41850 }, { "epoch": 0.4472461135744431, "grad_norm": 3.2877960205078125, "learning_rate": 9.952950375937507e-07, "loss": 0.1086, "step": 41860 }, { "epoch": 0.4473529568887227, "grad_norm": 0.2307390421628952, "learning_rate": 9.952927379090667e-07, "loss": 0.0658, "step": 41870 }, { "epoch": 0.4474598002030023, "grad_norm": 4.562685489654541, "learning_rate": 9.9529043766516e-07, "loss": 0.0785, "step": 41880 }, { "epoch": 0.4475666435172819, "grad_norm": 5.680283069610596, "learning_rate": 9.952881368620326e-07, "loss": 0.0384, "step": 41890 }, { "epoch": 0.4476734868315615, "grad_norm": 6.287425994873047, "learning_rate": 9.952858354996876e-07, "loss": 0.0788, "step": 41900 }, { "epoch": 0.4477803301458411, "grad_norm": 19.95343017578125, "learning_rate": 9.952835335781276e-07, "loss": 0.2028, "step": 41910 }, { "epoch": 0.44788717346012075, "grad_norm": 3.966665029525757, "learning_rate": 9.95281231097355e-07, "loss": 0.0234, "step": 41920 }, { "epoch": 0.4479940167744003, "grad_norm": 0.9838342070579529, "learning_rate": 9.952789280573725e-07, "loss": 0.1215, "step": 41930 }, { "epoch": 0.44810086008867994, "grad_norm": 0.3756176233291626, "learning_rate": 9.95276624458183e-07, "loss": 0.0799, "step": 41940 }, { "epoch": 0.44820770340295957, "grad_norm": 2.5753872394561768, "learning_rate": 9.952743202997885e-07, "loss": 0.053, "step": 41950 }, { "epoch": 0.4483145467172392, "grad_norm": 3.8784165382385254, "learning_rate": 9.952720155821918e-07, "loss": 0.0379, "step": 41960 }, { "epoch": 0.44842139003151876, "grad_norm": 4.816900253295898, "learning_rate": 9.952697103053959e-07, "loss": 0.0196, "step": 41970 }, { "epoch": 0.4485282333457984, "grad_norm": 3.024921417236328, "learning_rate": 9.952674044694032e-07, "loss": 0.0306, "step": 41980 }, { "epoch": 0.448635076660078, "grad_norm": 6.932129859924316, "learning_rate": 9.952650980742158e-07, "loss": 0.0514, "step": 41990 }, { "epoch": 0.4487419199743576, "grad_norm": 8.040742874145508, "learning_rate": 9.952627911198372e-07, "loss": 0.0978, "step": 42000 }, { "epoch": 0.4488487632886372, "grad_norm": 0.39455071091651917, "learning_rate": 9.952604836062693e-07, "loss": 0.0834, "step": 42010 }, { "epoch": 0.44895560660291683, "grad_norm": 3.9359686374664307, "learning_rate": 9.95258175533515e-07, "loss": 0.041, "step": 42020 }, { "epoch": 0.4490624499171964, "grad_norm": 4.675909519195557, "learning_rate": 9.952558669015768e-07, "loss": 0.0886, "step": 42030 }, { "epoch": 0.44916929323147603, "grad_norm": 5.21193790435791, "learning_rate": 9.952535577104574e-07, "loss": 0.0754, "step": 42040 }, { "epoch": 0.44927613654575566, "grad_norm": 1.3114013671875, "learning_rate": 9.952512479601592e-07, "loss": 0.0828, "step": 42050 }, { "epoch": 0.4493829798600353, "grad_norm": 4.983396053314209, "learning_rate": 9.95248937650685e-07, "loss": 0.0472, "step": 42060 }, { "epoch": 0.44948982317431485, "grad_norm": 7.820662021636963, "learning_rate": 9.952466267820373e-07, "loss": 0.0804, "step": 42070 }, { "epoch": 0.4495966664885945, "grad_norm": 2.3029613494873047, "learning_rate": 9.95244315354219e-07, "loss": 0.1259, "step": 42080 }, { "epoch": 0.4497035098028741, "grad_norm": 3.4226694107055664, "learning_rate": 9.952420033672324e-07, "loss": 0.0925, "step": 42090 }, { "epoch": 0.44981035311715367, "grad_norm": 5.545770168304443, "learning_rate": 9.952396908210803e-07, "loss": 0.0598, "step": 42100 }, { "epoch": 0.4499171964314333, "grad_norm": 7.036192417144775, "learning_rate": 9.95237377715765e-07, "loss": 0.0382, "step": 42110 }, { "epoch": 0.4500240397457129, "grad_norm": 4.8428568840026855, "learning_rate": 9.952350640512895e-07, "loss": 0.0555, "step": 42120 }, { "epoch": 0.45013088305999255, "grad_norm": 9.151873588562012, "learning_rate": 9.952327498276561e-07, "loss": 0.0619, "step": 42130 }, { "epoch": 0.4502377263742721, "grad_norm": 7.388598442077637, "learning_rate": 9.952304350448677e-07, "loss": 0.1215, "step": 42140 }, { "epoch": 0.45034456968855174, "grad_norm": 6.651576995849609, "learning_rate": 9.952281197029267e-07, "loss": 0.0322, "step": 42150 }, { "epoch": 0.45045141300283137, "grad_norm": 0.5077807903289795, "learning_rate": 9.952258038018358e-07, "loss": 0.0632, "step": 42160 }, { "epoch": 0.45055825631711094, "grad_norm": 10.73521900177002, "learning_rate": 9.952234873415974e-07, "loss": 0.0437, "step": 42170 }, { "epoch": 0.45066509963139056, "grad_norm": 3.7167694568634033, "learning_rate": 9.952211703222145e-07, "loss": 0.1067, "step": 42180 }, { "epoch": 0.4507719429456702, "grad_norm": 1.8984131813049316, "learning_rate": 9.952188527436894e-07, "loss": 0.0572, "step": 42190 }, { "epoch": 0.45087878625994976, "grad_norm": 3.304717540740967, "learning_rate": 9.95216534606025e-07, "loss": 0.0702, "step": 42200 }, { "epoch": 0.4509856295742294, "grad_norm": 11.280165672302246, "learning_rate": 9.952142159092236e-07, "loss": 0.0961, "step": 42210 }, { "epoch": 0.451092472888509, "grad_norm": 1.256473422050476, "learning_rate": 9.952118966532881e-07, "loss": 0.0572, "step": 42220 }, { "epoch": 0.45119931620278864, "grad_norm": 2.5217673778533936, "learning_rate": 9.952095768382209e-07, "loss": 0.0829, "step": 42230 }, { "epoch": 0.4513061595170682, "grad_norm": 12.349893569946289, "learning_rate": 9.952072564640248e-07, "loss": 0.0656, "step": 42240 }, { "epoch": 0.45141300283134783, "grad_norm": 1.6494303941726685, "learning_rate": 9.952049355307024e-07, "loss": 0.1079, "step": 42250 }, { "epoch": 0.45151984614562746, "grad_norm": 10.065942764282227, "learning_rate": 9.95202614038256e-07, "loss": 0.1116, "step": 42260 }, { "epoch": 0.451626689459907, "grad_norm": 0.87047278881073, "learning_rate": 9.952002919866887e-07, "loss": 0.0559, "step": 42270 }, { "epoch": 0.45173353277418665, "grad_norm": 10.700372695922852, "learning_rate": 9.951979693760028e-07, "loss": 0.0374, "step": 42280 }, { "epoch": 0.4518403760884663, "grad_norm": 7.784235954284668, "learning_rate": 9.95195646206201e-07, "loss": 0.1006, "step": 42290 }, { "epoch": 0.45194721940274585, "grad_norm": 5.8604302406311035, "learning_rate": 9.951933224772858e-07, "loss": 0.0775, "step": 42300 }, { "epoch": 0.4520540627170255, "grad_norm": 2.1458287239074707, "learning_rate": 9.951909981892604e-07, "loss": 0.1243, "step": 42310 }, { "epoch": 0.4521609060313051, "grad_norm": 4.48453426361084, "learning_rate": 9.951886733421267e-07, "loss": 0.0621, "step": 42320 }, { "epoch": 0.4522677493455847, "grad_norm": 3.3352010250091553, "learning_rate": 9.951863479358875e-07, "loss": 0.0177, "step": 42330 }, { "epoch": 0.4523745926598643, "grad_norm": 19.706064224243164, "learning_rate": 9.951840219705456e-07, "loss": 0.0739, "step": 42340 }, { "epoch": 0.4524814359741439, "grad_norm": 0.31098970770835876, "learning_rate": 9.951816954461037e-07, "loss": 0.037, "step": 42350 }, { "epoch": 0.45258827928842355, "grad_norm": 3.63342022895813, "learning_rate": 9.951793683625641e-07, "loss": 0.0606, "step": 42360 }, { "epoch": 0.4526951226027031, "grad_norm": 14.001362800598145, "learning_rate": 9.951770407199299e-07, "loss": 0.1192, "step": 42370 }, { "epoch": 0.45280196591698274, "grad_norm": 0.051028717309236526, "learning_rate": 9.951747125182033e-07, "loss": 0.091, "step": 42380 }, { "epoch": 0.45290880923126237, "grad_norm": 0.5838165879249573, "learning_rate": 9.951723837573871e-07, "loss": 0.0776, "step": 42390 }, { "epoch": 0.45301565254554194, "grad_norm": 4.006165027618408, "learning_rate": 9.951700544374839e-07, "loss": 0.0513, "step": 42400 }, { "epoch": 0.45312249585982156, "grad_norm": 8.427961349487305, "learning_rate": 9.951677245584963e-07, "loss": 0.064, "step": 42410 }, { "epoch": 0.4532293391741012, "grad_norm": 0.3502299189567566, "learning_rate": 9.95165394120427e-07, "loss": 0.0995, "step": 42420 }, { "epoch": 0.4533361824883808, "grad_norm": 11.637531280517578, "learning_rate": 9.951630631232786e-07, "loss": 0.0949, "step": 42430 }, { "epoch": 0.4534430258026604, "grad_norm": 6.028947830200195, "learning_rate": 9.951607315670538e-07, "loss": 0.0337, "step": 42440 }, { "epoch": 0.45354986911694, "grad_norm": 7.1742472648620605, "learning_rate": 9.951583994517548e-07, "loss": 0.029, "step": 42450 }, { "epoch": 0.45365671243121963, "grad_norm": 0.08182086795568466, "learning_rate": 9.951560667773851e-07, "loss": 0.0325, "step": 42460 }, { "epoch": 0.4537635557454992, "grad_norm": 2.3806419372558594, "learning_rate": 9.951537335439466e-07, "loss": 0.04, "step": 42470 }, { "epoch": 0.45387039905977883, "grad_norm": 6.7285614013671875, "learning_rate": 9.951513997514421e-07, "loss": 0.1224, "step": 42480 }, { "epoch": 0.45397724237405845, "grad_norm": 0.35036715865135193, "learning_rate": 9.951490653998745e-07, "loss": 0.0506, "step": 42490 }, { "epoch": 0.4540840856883381, "grad_norm": 5.167492389678955, "learning_rate": 9.95146730489246e-07, "loss": 0.0647, "step": 42500 }, { "epoch": 0.45419092900261765, "grad_norm": 0.6424847841262817, "learning_rate": 9.951443950195597e-07, "loss": 0.0692, "step": 42510 }, { "epoch": 0.4542977723168973, "grad_norm": 0.4464206099510193, "learning_rate": 9.951420589908177e-07, "loss": 0.0978, "step": 42520 }, { "epoch": 0.4544046156311769, "grad_norm": 0.676734447479248, "learning_rate": 9.95139722403023e-07, "loss": 0.0685, "step": 42530 }, { "epoch": 0.45451145894545647, "grad_norm": 5.882489204406738, "learning_rate": 9.951373852561785e-07, "loss": 0.0183, "step": 42540 }, { "epoch": 0.4546183022597361, "grad_norm": 13.855396270751953, "learning_rate": 9.951350475502864e-07, "loss": 0.1146, "step": 42550 }, { "epoch": 0.4547251455740157, "grad_norm": 9.471186637878418, "learning_rate": 9.951327092853495e-07, "loss": 0.068, "step": 42560 }, { "epoch": 0.4548319888882953, "grad_norm": 3.81266450881958, "learning_rate": 9.951303704613702e-07, "loss": 0.107, "step": 42570 }, { "epoch": 0.4549388322025749, "grad_norm": 5.789464950561523, "learning_rate": 9.951280310783513e-07, "loss": 0.1014, "step": 42580 }, { "epoch": 0.45504567551685454, "grad_norm": 4.014623165130615, "learning_rate": 9.951256911362958e-07, "loss": 0.0339, "step": 42590 }, { "epoch": 0.45515251883113417, "grad_norm": 12.676750183105469, "learning_rate": 9.951233506352057e-07, "loss": 0.0706, "step": 42600 }, { "epoch": 0.45525936214541374, "grad_norm": 6.879147529602051, "learning_rate": 9.951210095750842e-07, "loss": 0.06, "step": 42610 }, { "epoch": 0.45536620545969336, "grad_norm": 5.179680824279785, "learning_rate": 9.951186679559335e-07, "loss": 0.0913, "step": 42620 }, { "epoch": 0.455473048773973, "grad_norm": 8.032012939453125, "learning_rate": 9.951163257777567e-07, "loss": 0.0841, "step": 42630 }, { "epoch": 0.45557989208825256, "grad_norm": 0.5818307995796204, "learning_rate": 9.951139830405562e-07, "loss": 0.0938, "step": 42640 }, { "epoch": 0.4556867354025322, "grad_norm": 6.5638837814331055, "learning_rate": 9.951116397443344e-07, "loss": 0.0454, "step": 42650 }, { "epoch": 0.4557935787168118, "grad_norm": 3.249434232711792, "learning_rate": 9.951092958890942e-07, "loss": 0.0639, "step": 42660 }, { "epoch": 0.4559004220310914, "grad_norm": 7.647275447845459, "learning_rate": 9.951069514748385e-07, "loss": 0.0622, "step": 42670 }, { "epoch": 0.456007265345371, "grad_norm": 2.2229483127593994, "learning_rate": 9.951046065015695e-07, "loss": 0.0424, "step": 42680 }, { "epoch": 0.45611410865965063, "grad_norm": 4.765829563140869, "learning_rate": 9.9510226096929e-07, "loss": 0.0458, "step": 42690 }, { "epoch": 0.45622095197393026, "grad_norm": 3.3496975898742676, "learning_rate": 9.95099914878003e-07, "loss": 0.062, "step": 42700 }, { "epoch": 0.4563277952882098, "grad_norm": 1.3470065593719482, "learning_rate": 9.950975682277105e-07, "loss": 0.0196, "step": 42710 }, { "epoch": 0.45643463860248945, "grad_norm": 7.337266445159912, "learning_rate": 9.950952210184155e-07, "loss": 0.1321, "step": 42720 }, { "epoch": 0.4565414819167691, "grad_norm": 1.2838521003723145, "learning_rate": 9.950928732501207e-07, "loss": 0.0275, "step": 42730 }, { "epoch": 0.45664832523104865, "grad_norm": 4.052188873291016, "learning_rate": 9.950905249228288e-07, "loss": 0.0691, "step": 42740 }, { "epoch": 0.45675516854532827, "grad_norm": 8.057295799255371, "learning_rate": 9.950881760365422e-07, "loss": 0.1161, "step": 42750 }, { "epoch": 0.4568620118596079, "grad_norm": 5.850875377655029, "learning_rate": 9.950858265912636e-07, "loss": 0.0899, "step": 42760 }, { "epoch": 0.45696885517388747, "grad_norm": 3.696180820465088, "learning_rate": 9.950834765869957e-07, "loss": 0.0608, "step": 42770 }, { "epoch": 0.4570756984881671, "grad_norm": 4.176772117614746, "learning_rate": 9.950811260237414e-07, "loss": 0.0555, "step": 42780 }, { "epoch": 0.4571825418024467, "grad_norm": 3.3514580726623535, "learning_rate": 9.95078774901503e-07, "loss": 0.043, "step": 42790 }, { "epoch": 0.45728938511672634, "grad_norm": 3.381065607070923, "learning_rate": 9.950764232202833e-07, "loss": 0.0399, "step": 42800 }, { "epoch": 0.4573962284310059, "grad_norm": 6.549558162689209, "learning_rate": 9.95074070980085e-07, "loss": 0.0308, "step": 42810 }, { "epoch": 0.45750307174528554, "grad_norm": 13.541289329528809, "learning_rate": 9.95071718180911e-07, "loss": 0.0603, "step": 42820 }, { "epoch": 0.45760991505956516, "grad_norm": 0.4008840322494507, "learning_rate": 9.950693648227632e-07, "loss": 0.0311, "step": 42830 }, { "epoch": 0.45771675837384473, "grad_norm": 4.832168102264404, "learning_rate": 9.950670109056449e-07, "loss": 0.0706, "step": 42840 }, { "epoch": 0.45782360168812436, "grad_norm": 0.3904339373111725, "learning_rate": 9.950646564295587e-07, "loss": 0.0458, "step": 42850 }, { "epoch": 0.457930445002404, "grad_norm": 2.683917760848999, "learning_rate": 9.95062301394507e-07, "loss": 0.0612, "step": 42860 }, { "epoch": 0.4580372883166836, "grad_norm": 7.571223735809326, "learning_rate": 9.950599458004924e-07, "loss": 0.0649, "step": 42870 }, { "epoch": 0.4581441316309632, "grad_norm": 4.790097236633301, "learning_rate": 9.950575896475182e-07, "loss": 0.105, "step": 42880 }, { "epoch": 0.4582509749452428, "grad_norm": 10.542489051818848, "learning_rate": 9.950552329355863e-07, "loss": 0.0984, "step": 42890 }, { "epoch": 0.45835781825952243, "grad_norm": 12.4044189453125, "learning_rate": 9.950528756646998e-07, "loss": 0.131, "step": 42900 }, { "epoch": 0.458464661573802, "grad_norm": 5.08418607711792, "learning_rate": 9.95050517834861e-07, "loss": 0.09, "step": 42910 }, { "epoch": 0.4585715048880816, "grad_norm": 1.7317728996276855, "learning_rate": 9.950481594460733e-07, "loss": 0.1089, "step": 42920 }, { "epoch": 0.45867834820236125, "grad_norm": 1.8749042749404907, "learning_rate": 9.950458004983384e-07, "loss": 0.0687, "step": 42930 }, { "epoch": 0.4587851915166408, "grad_norm": 8.383543014526367, "learning_rate": 9.950434409916596e-07, "loss": 0.0712, "step": 42940 }, { "epoch": 0.45889203483092045, "grad_norm": 5.458146572113037, "learning_rate": 9.950410809260395e-07, "loss": 0.0542, "step": 42950 }, { "epoch": 0.4589988781452001, "grad_norm": 6.407410144805908, "learning_rate": 9.950387203014805e-07, "loss": 0.1157, "step": 42960 }, { "epoch": 0.4591057214594797, "grad_norm": 3.835763692855835, "learning_rate": 9.950363591179856e-07, "loss": 0.0292, "step": 42970 }, { "epoch": 0.45921256477375927, "grad_norm": 0.08724413067102432, "learning_rate": 9.95033997375557e-07, "loss": 0.0597, "step": 42980 }, { "epoch": 0.4593194080880389, "grad_norm": 0.10253328830003738, "learning_rate": 9.95031635074198e-07, "loss": 0.0197, "step": 42990 }, { "epoch": 0.4594262514023185, "grad_norm": 6.102709770202637, "learning_rate": 9.950292722139106e-07, "loss": 0.076, "step": 43000 }, { "epoch": 0.4595330947165981, "grad_norm": 5.96434211730957, "learning_rate": 9.950269087946982e-07, "loss": 0.0693, "step": 43010 }, { "epoch": 0.4596399380308777, "grad_norm": 4.18808126449585, "learning_rate": 9.950245448165625e-07, "loss": 0.0255, "step": 43020 }, { "epoch": 0.45974678134515734, "grad_norm": 8.874141693115234, "learning_rate": 9.95022180279507e-07, "loss": 0.0831, "step": 43030 }, { "epoch": 0.4598536246594369, "grad_norm": 8.162872314453125, "learning_rate": 9.950198151835342e-07, "loss": 0.0478, "step": 43040 }, { "epoch": 0.45996046797371654, "grad_norm": 6.094615936279297, "learning_rate": 9.950174495286467e-07, "loss": 0.138, "step": 43050 }, { "epoch": 0.46006731128799616, "grad_norm": 6.076599597930908, "learning_rate": 9.95015083314847e-07, "loss": 0.0928, "step": 43060 }, { "epoch": 0.4601741546022758, "grad_norm": 6.199653625488281, "learning_rate": 9.95012716542138e-07, "loss": 0.0352, "step": 43070 }, { "epoch": 0.46028099791655536, "grad_norm": 6.621981143951416, "learning_rate": 9.950103492105222e-07, "loss": 0.0697, "step": 43080 }, { "epoch": 0.460387841230835, "grad_norm": 6.074044704437256, "learning_rate": 9.950079813200023e-07, "loss": 0.0552, "step": 43090 }, { "epoch": 0.4604946845451146, "grad_norm": 4.811496734619141, "learning_rate": 9.950056128705811e-07, "loss": 0.1543, "step": 43100 }, { "epoch": 0.4606015278593942, "grad_norm": 0.25545066595077515, "learning_rate": 9.950032438622613e-07, "loss": 0.0444, "step": 43110 }, { "epoch": 0.4607083711736738, "grad_norm": 2.7531328201293945, "learning_rate": 9.950008742950453e-07, "loss": 0.0846, "step": 43120 }, { "epoch": 0.46081521448795343, "grad_norm": 10.272781372070312, "learning_rate": 9.949985041689361e-07, "loss": 0.0697, "step": 43130 }, { "epoch": 0.460922057802233, "grad_norm": 0.7197121381759644, "learning_rate": 9.94996133483936e-07, "loss": 0.0267, "step": 43140 }, { "epoch": 0.4610289011165126, "grad_norm": 3.1845552921295166, "learning_rate": 9.949937622400481e-07, "loss": 0.0569, "step": 43150 }, { "epoch": 0.46113574443079225, "grad_norm": 4.233950614929199, "learning_rate": 9.94991390437275e-07, "loss": 0.14, "step": 43160 }, { "epoch": 0.4612425877450719, "grad_norm": 1.6934902667999268, "learning_rate": 9.94989018075619e-07, "loss": 0.0403, "step": 43170 }, { "epoch": 0.46134943105935144, "grad_norm": 2.046147108078003, "learning_rate": 9.949866451550832e-07, "loss": 0.0224, "step": 43180 }, { "epoch": 0.46145627437363107, "grad_norm": 1.7355424165725708, "learning_rate": 9.949842716756701e-07, "loss": 0.0369, "step": 43190 }, { "epoch": 0.4615631176879107, "grad_norm": 3.9846138954162598, "learning_rate": 9.949818976373824e-07, "loss": 0.11, "step": 43200 }, { "epoch": 0.46166996100219027, "grad_norm": 0.09653767198324203, "learning_rate": 9.949795230402228e-07, "loss": 0.0483, "step": 43210 }, { "epoch": 0.4617768043164699, "grad_norm": 5.214033603668213, "learning_rate": 9.94977147884194e-07, "loss": 0.1334, "step": 43220 }, { "epoch": 0.4618836476307495, "grad_norm": 0.16996774077415466, "learning_rate": 9.949747721692986e-07, "loss": 0.069, "step": 43230 }, { "epoch": 0.46199049094502914, "grad_norm": 3.7106711864471436, "learning_rate": 9.94972395895539e-07, "loss": 0.0357, "step": 43240 }, { "epoch": 0.4620973342593087, "grad_norm": 6.7775115966796875, "learning_rate": 9.949700190629187e-07, "loss": 0.0616, "step": 43250 }, { "epoch": 0.46220417757358834, "grad_norm": 0.1346663534641266, "learning_rate": 9.949676416714397e-07, "loss": 0.0587, "step": 43260 }, { "epoch": 0.46231102088786796, "grad_norm": 5.889057636260986, "learning_rate": 9.949652637211049e-07, "loss": 0.078, "step": 43270 }, { "epoch": 0.46241786420214753, "grad_norm": 13.097143173217773, "learning_rate": 9.949628852119168e-07, "loss": 0.0936, "step": 43280 }, { "epoch": 0.46252470751642716, "grad_norm": 0.09004885703325272, "learning_rate": 9.949605061438784e-07, "loss": 0.061, "step": 43290 }, { "epoch": 0.4626315508307068, "grad_norm": 3.863342523574829, "learning_rate": 9.949581265169922e-07, "loss": 0.056, "step": 43300 }, { "epoch": 0.46273839414498635, "grad_norm": 2.323033094406128, "learning_rate": 9.949557463312608e-07, "loss": 0.0239, "step": 43310 }, { "epoch": 0.462845237459266, "grad_norm": 3.0246593952178955, "learning_rate": 9.949533655866871e-07, "loss": 0.0306, "step": 43320 }, { "epoch": 0.4629520807735456, "grad_norm": 0.285964697599411, "learning_rate": 9.94950984283274e-07, "loss": 0.1096, "step": 43330 }, { "epoch": 0.46305892408782523, "grad_norm": 8.001331329345703, "learning_rate": 9.949486024210233e-07, "loss": 0.0346, "step": 43340 }, { "epoch": 0.4631657674021048, "grad_norm": 0.30146458745002747, "learning_rate": 9.949462199999387e-07, "loss": 0.0667, "step": 43350 }, { "epoch": 0.4632726107163844, "grad_norm": 0.2697497308254242, "learning_rate": 9.949438370200225e-07, "loss": 0.0589, "step": 43360 }, { "epoch": 0.46337945403066405, "grad_norm": 0.7654200196266174, "learning_rate": 9.94941453481277e-07, "loss": 0.1383, "step": 43370 }, { "epoch": 0.4634862973449436, "grad_norm": 3.0100674629211426, "learning_rate": 9.949390693837054e-07, "loss": 0.0804, "step": 43380 }, { "epoch": 0.46359314065922325, "grad_norm": 0.7499169707298279, "learning_rate": 9.949366847273104e-07, "loss": 0.0741, "step": 43390 }, { "epoch": 0.46369998397350287, "grad_norm": 0.8532320261001587, "learning_rate": 9.949342995120943e-07, "loss": 0.0432, "step": 43400 }, { "epoch": 0.46380682728778244, "grad_norm": 5.393438816070557, "learning_rate": 9.9493191373806e-07, "loss": 0.0612, "step": 43410 }, { "epoch": 0.46391367060206207, "grad_norm": 0.6722122430801392, "learning_rate": 9.949295274052105e-07, "loss": 0.0226, "step": 43420 }, { "epoch": 0.4640205139163417, "grad_norm": 2.814387559890747, "learning_rate": 9.94927140513548e-07, "loss": 0.0682, "step": 43430 }, { "epoch": 0.4641273572306213, "grad_norm": 0.16174060106277466, "learning_rate": 9.949247530630755e-07, "loss": 0.0229, "step": 43440 }, { "epoch": 0.4642342005449009, "grad_norm": 11.004631042480469, "learning_rate": 9.949223650537956e-07, "loss": 0.0831, "step": 43450 }, { "epoch": 0.4643410438591805, "grad_norm": 0.942173182964325, "learning_rate": 9.94919976485711e-07, "loss": 0.0226, "step": 43460 }, { "epoch": 0.46444788717346014, "grad_norm": 1.9551265239715576, "learning_rate": 9.949175873588243e-07, "loss": 0.0961, "step": 43470 }, { "epoch": 0.4645547304877397, "grad_norm": 2.070716381072998, "learning_rate": 9.949151976731383e-07, "loss": 0.0839, "step": 43480 }, { "epoch": 0.46466157380201933, "grad_norm": 6.7787370681762695, "learning_rate": 9.949128074286556e-07, "loss": 0.0382, "step": 43490 }, { "epoch": 0.46476841711629896, "grad_norm": 0.28593966364860535, "learning_rate": 9.949104166253791e-07, "loss": 0.034, "step": 43500 }, { "epoch": 0.46487526043057853, "grad_norm": 1.312368392944336, "learning_rate": 9.949080252633116e-07, "loss": 0.1012, "step": 43510 }, { "epoch": 0.46498210374485816, "grad_norm": 0.7943368554115295, "learning_rate": 9.949056333424552e-07, "loss": 0.023, "step": 43520 }, { "epoch": 0.4650889470591378, "grad_norm": 3.0768609046936035, "learning_rate": 9.949032408628133e-07, "loss": 0.0663, "step": 43530 }, { "epoch": 0.4651957903734174, "grad_norm": 11.574678421020508, "learning_rate": 9.949008478243882e-07, "loss": 0.0991, "step": 43540 }, { "epoch": 0.465302633687697, "grad_norm": 7.343482971191406, "learning_rate": 9.948984542271826e-07, "loss": 0.0952, "step": 43550 }, { "epoch": 0.4654094770019766, "grad_norm": 0.4500872492790222, "learning_rate": 9.948960600711994e-07, "loss": 0.0332, "step": 43560 }, { "epoch": 0.4655163203162562, "grad_norm": 13.84740161895752, "learning_rate": 9.94893665356441e-07, "loss": 0.0666, "step": 43570 }, { "epoch": 0.4656231636305358, "grad_norm": 15.652152061462402, "learning_rate": 9.948912700829104e-07, "loss": 0.0318, "step": 43580 }, { "epoch": 0.4657300069448154, "grad_norm": 3.841923475265503, "learning_rate": 9.948888742506102e-07, "loss": 0.0666, "step": 43590 }, { "epoch": 0.46583685025909505, "grad_norm": 4.55827522277832, "learning_rate": 9.948864778595434e-07, "loss": 0.0585, "step": 43600 }, { "epoch": 0.4659436935733747, "grad_norm": 10.630289077758789, "learning_rate": 9.948840809097122e-07, "loss": 0.0855, "step": 43610 }, { "epoch": 0.46605053688765424, "grad_norm": 0.8035747408866882, "learning_rate": 9.948816834011194e-07, "loss": 0.0623, "step": 43620 }, { "epoch": 0.46615738020193387, "grad_norm": 0.1503131240606308, "learning_rate": 9.94879285333768e-07, "loss": 0.0381, "step": 43630 }, { "epoch": 0.4662642235162135, "grad_norm": 1.5227925777435303, "learning_rate": 9.948768867076602e-07, "loss": 0.0627, "step": 43640 }, { "epoch": 0.46637106683049306, "grad_norm": 7.659900188446045, "learning_rate": 9.948744875227994e-07, "loss": 0.0652, "step": 43650 }, { "epoch": 0.4664779101447727, "grad_norm": 2.16231632232666, "learning_rate": 9.948720877791878e-07, "loss": 0.0596, "step": 43660 }, { "epoch": 0.4665847534590523, "grad_norm": 0.7930048108100891, "learning_rate": 9.948696874768283e-07, "loss": 0.0252, "step": 43670 }, { "epoch": 0.4666915967733319, "grad_norm": 1.7763108015060425, "learning_rate": 9.948672866157236e-07, "loss": 0.0539, "step": 43680 }, { "epoch": 0.4667984400876115, "grad_norm": 3.9719324111938477, "learning_rate": 9.948648851958766e-07, "loss": 0.0991, "step": 43690 }, { "epoch": 0.46690528340189114, "grad_norm": 5.74525785446167, "learning_rate": 9.948624832172893e-07, "loss": 0.0909, "step": 43700 }, { "epoch": 0.46701212671617076, "grad_norm": 5.02345609664917, "learning_rate": 9.948600806799653e-07, "loss": 0.0675, "step": 43710 }, { "epoch": 0.46711897003045033, "grad_norm": 5.826532363891602, "learning_rate": 9.94857677583907e-07, "loss": 0.0458, "step": 43720 }, { "epoch": 0.46722581334472996, "grad_norm": 5.6338653564453125, "learning_rate": 9.948552739291166e-07, "loss": 0.0982, "step": 43730 }, { "epoch": 0.4673326566590096, "grad_norm": 3.4431025981903076, "learning_rate": 9.948528697155976e-07, "loss": 0.0548, "step": 43740 }, { "epoch": 0.46743949997328915, "grad_norm": 1.5248456001281738, "learning_rate": 9.948504649433523e-07, "loss": 0.0609, "step": 43750 }, { "epoch": 0.4675463432875688, "grad_norm": 7.560227870941162, "learning_rate": 9.948480596123834e-07, "loss": 0.0754, "step": 43760 }, { "epoch": 0.4676531866018484, "grad_norm": 1.2078713178634644, "learning_rate": 9.948456537226937e-07, "loss": 0.0523, "step": 43770 }, { "epoch": 0.467760029916128, "grad_norm": 8.189746856689453, "learning_rate": 9.948432472742858e-07, "loss": 0.1919, "step": 43780 }, { "epoch": 0.4678668732304076, "grad_norm": 2.527489423751831, "learning_rate": 9.948408402671626e-07, "loss": 0.0856, "step": 43790 }, { "epoch": 0.4679737165446872, "grad_norm": 1.8070200681686401, "learning_rate": 9.948384327013268e-07, "loss": 0.0468, "step": 43800 }, { "epoch": 0.46808055985896685, "grad_norm": 8.265336990356445, "learning_rate": 9.948360245767812e-07, "loss": 0.07, "step": 43810 }, { "epoch": 0.4681874031732464, "grad_norm": 7.303919792175293, "learning_rate": 9.948336158935281e-07, "loss": 0.0476, "step": 43820 }, { "epoch": 0.46829424648752604, "grad_norm": 19.703807830810547, "learning_rate": 9.948312066515707e-07, "loss": 0.0486, "step": 43830 }, { "epoch": 0.46840108980180567, "grad_norm": 2.4277217388153076, "learning_rate": 9.948287968509116e-07, "loss": 0.047, "step": 43840 }, { "epoch": 0.46850793311608524, "grad_norm": 7.3915815353393555, "learning_rate": 9.948263864915531e-07, "loss": 0.1017, "step": 43850 }, { "epoch": 0.46861477643036487, "grad_norm": 4.499969005584717, "learning_rate": 9.948239755734985e-07, "loss": 0.047, "step": 43860 }, { "epoch": 0.4687216197446445, "grad_norm": 1.9052084684371948, "learning_rate": 9.948215640967503e-07, "loss": 0.0805, "step": 43870 }, { "epoch": 0.46882846305892406, "grad_norm": 7.080766677856445, "learning_rate": 9.948191520613112e-07, "loss": 0.024, "step": 43880 }, { "epoch": 0.4689353063732037, "grad_norm": 3.203662633895874, "learning_rate": 9.948167394671838e-07, "loss": 0.1008, "step": 43890 }, { "epoch": 0.4690421496874833, "grad_norm": 5.2350006103515625, "learning_rate": 9.94814326314371e-07, "loss": 0.0923, "step": 43900 }, { "epoch": 0.46914899300176294, "grad_norm": 4.520995616912842, "learning_rate": 9.948119126028755e-07, "loss": 0.0501, "step": 43910 }, { "epoch": 0.4692558363160425, "grad_norm": 0.3132641911506653, "learning_rate": 9.948094983327e-07, "loss": 0.0765, "step": 43920 }, { "epoch": 0.46936267963032213, "grad_norm": 3.5415964126586914, "learning_rate": 9.948070835038473e-07, "loss": 0.0789, "step": 43930 }, { "epoch": 0.46946952294460176, "grad_norm": 9.56460189819336, "learning_rate": 9.9480466811632e-07, "loss": 0.0761, "step": 43940 }, { "epoch": 0.46957636625888133, "grad_norm": 2.7202017307281494, "learning_rate": 9.94802252170121e-07, "loss": 0.1264, "step": 43950 }, { "epoch": 0.46968320957316095, "grad_norm": 24.899730682373047, "learning_rate": 9.947998356652528e-07, "loss": 0.0856, "step": 43960 }, { "epoch": 0.4697900528874406, "grad_norm": 2.887664794921875, "learning_rate": 9.947974186017184e-07, "loss": 0.0814, "step": 43970 }, { "epoch": 0.4698968962017202, "grad_norm": 14.106945991516113, "learning_rate": 9.9479500097952e-07, "loss": 0.1156, "step": 43980 }, { "epoch": 0.4700037395159998, "grad_norm": 0.8767251968383789, "learning_rate": 9.94792582798661e-07, "loss": 0.0725, "step": 43990 }, { "epoch": 0.4701105828302794, "grad_norm": 4.268429756164551, "learning_rate": 9.947901640591437e-07, "loss": 0.0313, "step": 44000 }, { "epoch": 0.470217426144559, "grad_norm": 0.8893640041351318, "learning_rate": 9.947877447609712e-07, "loss": 0.0234, "step": 44010 }, { "epoch": 0.4703242694588386, "grad_norm": 0.6116833686828613, "learning_rate": 9.947853249041457e-07, "loss": 0.0381, "step": 44020 }, { "epoch": 0.4704311127731182, "grad_norm": 15.544695854187012, "learning_rate": 9.947829044886705e-07, "loss": 0.1087, "step": 44030 }, { "epoch": 0.47053795608739785, "grad_norm": 5.398011684417725, "learning_rate": 9.947804835145478e-07, "loss": 0.0546, "step": 44040 }, { "epoch": 0.4706447994016774, "grad_norm": 6.7042999267578125, "learning_rate": 9.947780619817808e-07, "loss": 0.071, "step": 44050 }, { "epoch": 0.47075164271595704, "grad_norm": 7.687565803527832, "learning_rate": 9.947756398903716e-07, "loss": 0.0854, "step": 44060 }, { "epoch": 0.47085848603023667, "grad_norm": 12.14150333404541, "learning_rate": 9.947732172403237e-07, "loss": 0.047, "step": 44070 }, { "epoch": 0.4709653293445163, "grad_norm": 5.18653678894043, "learning_rate": 9.947707940316396e-07, "loss": 0.042, "step": 44080 }, { "epoch": 0.47107217265879586, "grad_norm": 2.6234309673309326, "learning_rate": 9.947683702643217e-07, "loss": 0.0689, "step": 44090 }, { "epoch": 0.4711790159730755, "grad_norm": 0.4445379674434662, "learning_rate": 9.947659459383732e-07, "loss": 0.0957, "step": 44100 }, { "epoch": 0.4712858592873551, "grad_norm": 7.996902942657471, "learning_rate": 9.947635210537964e-07, "loss": 0.0871, "step": 44110 }, { "epoch": 0.4713927026016347, "grad_norm": 21.345060348510742, "learning_rate": 9.947610956105945e-07, "loss": 0.1122, "step": 44120 }, { "epoch": 0.4714995459159143, "grad_norm": 2.4838099479675293, "learning_rate": 9.947586696087697e-07, "loss": 0.052, "step": 44130 }, { "epoch": 0.47160638923019393, "grad_norm": 4.588616371154785, "learning_rate": 9.947562430483251e-07, "loss": 0.082, "step": 44140 }, { "epoch": 0.4717132325444735, "grad_norm": 8.335647583007812, "learning_rate": 9.947538159292634e-07, "loss": 0.111, "step": 44150 }, { "epoch": 0.47182007585875313, "grad_norm": 4.142678260803223, "learning_rate": 9.947513882515873e-07, "loss": 0.0884, "step": 44160 }, { "epoch": 0.47192691917303276, "grad_norm": 4.083523750305176, "learning_rate": 9.947489600152994e-07, "loss": 0.1554, "step": 44170 }, { "epoch": 0.4720337624873124, "grad_norm": 3.70002818107605, "learning_rate": 9.94746531220403e-07, "loss": 0.0786, "step": 44180 }, { "epoch": 0.47214060580159195, "grad_norm": 5.484951972961426, "learning_rate": 9.947441018668999e-07, "loss": 0.0778, "step": 44190 }, { "epoch": 0.4722474491158716, "grad_norm": 0.8418497443199158, "learning_rate": 9.947416719547936e-07, "loss": 0.0269, "step": 44200 }, { "epoch": 0.4723542924301512, "grad_norm": 2.586139678955078, "learning_rate": 9.947392414840867e-07, "loss": 0.0897, "step": 44210 }, { "epoch": 0.47246113574443077, "grad_norm": 4.224028587341309, "learning_rate": 9.947368104547818e-07, "loss": 0.0703, "step": 44220 }, { "epoch": 0.4725679790587104, "grad_norm": 17.357606887817383, "learning_rate": 9.947343788668816e-07, "loss": 0.0509, "step": 44230 }, { "epoch": 0.47267482237299, "grad_norm": 14.603134155273438, "learning_rate": 9.94731946720389e-07, "loss": 0.0469, "step": 44240 }, { "epoch": 0.4727816656872696, "grad_norm": 11.306777000427246, "learning_rate": 9.947295140153066e-07, "loss": 0.0972, "step": 44250 }, { "epoch": 0.4728885090015492, "grad_norm": 5.950926780700684, "learning_rate": 9.947270807516375e-07, "loss": 0.0485, "step": 44260 }, { "epoch": 0.47299535231582884, "grad_norm": 3.335711717605591, "learning_rate": 9.94724646929384e-07, "loss": 0.0317, "step": 44270 }, { "epoch": 0.47310219563010847, "grad_norm": 2.777517318725586, "learning_rate": 9.94722212548549e-07, "loss": 0.0208, "step": 44280 }, { "epoch": 0.47320903894438804, "grad_norm": 9.096565246582031, "learning_rate": 9.947197776091353e-07, "loss": 0.0363, "step": 44290 }, { "epoch": 0.47331588225866766, "grad_norm": 4.328815460205078, "learning_rate": 9.94717342111146e-07, "loss": 0.086, "step": 44300 }, { "epoch": 0.4734227255729473, "grad_norm": 2.030388832092285, "learning_rate": 9.94714906054583e-07, "loss": 0.0457, "step": 44310 }, { "epoch": 0.47352956888722686, "grad_norm": 1.4150774478912354, "learning_rate": 9.947124694394498e-07, "loss": 0.025, "step": 44320 }, { "epoch": 0.4736364122015065, "grad_norm": 2.1805272102355957, "learning_rate": 9.947100322657487e-07, "loss": 0.0715, "step": 44330 }, { "epoch": 0.4737432555157861, "grad_norm": 9.47410774230957, "learning_rate": 9.947075945334827e-07, "loss": 0.0469, "step": 44340 }, { "epoch": 0.47385009883006574, "grad_norm": 2.2383551597595215, "learning_rate": 9.947051562426544e-07, "loss": 0.0987, "step": 44350 }, { "epoch": 0.4739569421443453, "grad_norm": 5.681424617767334, "learning_rate": 9.94702717393267e-07, "loss": 0.0832, "step": 44360 }, { "epoch": 0.47406378545862493, "grad_norm": 5.621098518371582, "learning_rate": 9.947002779853225e-07, "loss": 0.0567, "step": 44370 }, { "epoch": 0.47417062877290456, "grad_norm": 1.6435046195983887, "learning_rate": 9.946978380188242e-07, "loss": 0.0373, "step": 44380 }, { "epoch": 0.4742774720871841, "grad_norm": 0.5185169577598572, "learning_rate": 9.946953974937748e-07, "loss": 0.0607, "step": 44390 }, { "epoch": 0.47438431540146375, "grad_norm": 3.763533592224121, "learning_rate": 9.946929564101768e-07, "loss": 0.079, "step": 44400 }, { "epoch": 0.4744911587157434, "grad_norm": 10.73742389678955, "learning_rate": 9.946905147680332e-07, "loss": 0.0926, "step": 44410 }, { "epoch": 0.47459800203002295, "grad_norm": 1.510758399963379, "learning_rate": 9.946880725673466e-07, "loss": 0.0584, "step": 44420 }, { "epoch": 0.4747048453443026, "grad_norm": 20.207427978515625, "learning_rate": 9.9468562980812e-07, "loss": 0.097, "step": 44430 }, { "epoch": 0.4748116886585822, "grad_norm": 1.8407964706420898, "learning_rate": 9.946831864903558e-07, "loss": 0.0715, "step": 44440 }, { "epoch": 0.4749185319728618, "grad_norm": 10.06351089477539, "learning_rate": 9.946807426140572e-07, "loss": 0.0662, "step": 44450 }, { "epoch": 0.4750253752871414, "grad_norm": 4.626868724822998, "learning_rate": 9.946782981792263e-07, "loss": 0.11, "step": 44460 }, { "epoch": 0.475132218601421, "grad_norm": 1.935513973236084, "learning_rate": 9.946758531858665e-07, "loss": 0.0543, "step": 44470 }, { "epoch": 0.47523906191570064, "grad_norm": 1.3603827953338623, "learning_rate": 9.946734076339804e-07, "loss": 0.0282, "step": 44480 }, { "epoch": 0.4753459052299802, "grad_norm": 3.2659525871276855, "learning_rate": 9.946709615235707e-07, "loss": 0.0346, "step": 44490 }, { "epoch": 0.47545274854425984, "grad_norm": 2.3209588527679443, "learning_rate": 9.9466851485464e-07, "loss": 0.0319, "step": 44500 }, { "epoch": 0.47555959185853947, "grad_norm": 1.051530361175537, "learning_rate": 9.946660676271913e-07, "loss": 0.0421, "step": 44510 }, { "epoch": 0.47566643517281904, "grad_norm": 1.1979944705963135, "learning_rate": 9.946636198412272e-07, "loss": 0.0326, "step": 44520 }, { "epoch": 0.47577327848709866, "grad_norm": 6.192265510559082, "learning_rate": 9.946611714967506e-07, "loss": 0.06, "step": 44530 }, { "epoch": 0.4758801218013783, "grad_norm": 4.491703033447266, "learning_rate": 9.946587225937641e-07, "loss": 0.0918, "step": 44540 }, { "epoch": 0.4759869651156579, "grad_norm": 1.2200862169265747, "learning_rate": 9.946562731322708e-07, "loss": 0.0171, "step": 44550 }, { "epoch": 0.4760938084299375, "grad_norm": 1.3382668495178223, "learning_rate": 9.94653823112273e-07, "loss": 0.05, "step": 44560 }, { "epoch": 0.4762006517442171, "grad_norm": 15.117173194885254, "learning_rate": 9.946513725337737e-07, "loss": 0.0612, "step": 44570 }, { "epoch": 0.47630749505849673, "grad_norm": 9.505473136901855, "learning_rate": 9.94648921396776e-07, "loss": 0.1018, "step": 44580 }, { "epoch": 0.4764143383727763, "grad_norm": 2.257307529449463, "learning_rate": 9.94646469701282e-07, "loss": 0.0643, "step": 44590 }, { "epoch": 0.47652118168705593, "grad_norm": 3.9715473651885986, "learning_rate": 9.946440174472949e-07, "loss": 0.1529, "step": 44600 }, { "epoch": 0.47662802500133555, "grad_norm": 1.6217483282089233, "learning_rate": 9.946415646348175e-07, "loss": 0.0533, "step": 44610 }, { "epoch": 0.4767348683156151, "grad_norm": 11.548699378967285, "learning_rate": 9.946391112638521e-07, "loss": 0.0865, "step": 44620 }, { "epoch": 0.47684171162989475, "grad_norm": 7.026651382446289, "learning_rate": 9.946366573344021e-07, "loss": 0.1154, "step": 44630 }, { "epoch": 0.4769485549441744, "grad_norm": 6.25260066986084, "learning_rate": 9.946342028464698e-07, "loss": 0.1298, "step": 44640 }, { "epoch": 0.477055398258454, "grad_norm": 9.111979484558105, "learning_rate": 9.946317478000582e-07, "loss": 0.0635, "step": 44650 }, { "epoch": 0.47716224157273357, "grad_norm": 0.13001622259616852, "learning_rate": 9.946292921951703e-07, "loss": 0.0529, "step": 44660 }, { "epoch": 0.4772690848870132, "grad_norm": 8.613039016723633, "learning_rate": 9.946268360318083e-07, "loss": 0.0661, "step": 44670 }, { "epoch": 0.4773759282012928, "grad_norm": 0.12822005152702332, "learning_rate": 9.946243793099753e-07, "loss": 0.0706, "step": 44680 }, { "epoch": 0.4774827715155724, "grad_norm": 4.024501800537109, "learning_rate": 9.946219220296742e-07, "loss": 0.054, "step": 44690 }, { "epoch": 0.477589614829852, "grad_norm": 2.8657712936401367, "learning_rate": 9.946194641909075e-07, "loss": 0.0717, "step": 44700 }, { "epoch": 0.47769645814413164, "grad_norm": 0.8884826302528381, "learning_rate": 9.94617005793678e-07, "loss": 0.0636, "step": 44710 }, { "epoch": 0.47780330145841127, "grad_norm": 7.331453800201416, "learning_rate": 9.946145468379888e-07, "loss": 0.0608, "step": 44720 }, { "epoch": 0.47791014477269084, "grad_norm": 11.104986190795898, "learning_rate": 9.946120873238422e-07, "loss": 0.0597, "step": 44730 }, { "epoch": 0.47801698808697046, "grad_norm": 9.839834213256836, "learning_rate": 9.946096272512415e-07, "loss": 0.1133, "step": 44740 }, { "epoch": 0.4781238314012501, "grad_norm": 33.140281677246094, "learning_rate": 9.94607166620189e-07, "loss": 0.1494, "step": 44750 }, { "epoch": 0.47823067471552966, "grad_norm": 3.839642286300659, "learning_rate": 9.946047054306877e-07, "loss": 0.0418, "step": 44760 }, { "epoch": 0.4783375180298093, "grad_norm": 13.520012855529785, "learning_rate": 9.946022436827403e-07, "loss": 0.056, "step": 44770 }, { "epoch": 0.4784443613440889, "grad_norm": 3.5524611473083496, "learning_rate": 9.945997813763498e-07, "loss": 0.1165, "step": 44780 }, { "epoch": 0.4785512046583685, "grad_norm": 4.094461441040039, "learning_rate": 9.94597318511519e-07, "loss": 0.0732, "step": 44790 }, { "epoch": 0.4786580479726481, "grad_norm": 2.5034782886505127, "learning_rate": 9.9459485508825e-07, "loss": 0.0583, "step": 44800 }, { "epoch": 0.47876489128692773, "grad_norm": 2.8400845527648926, "learning_rate": 9.945923911065464e-07, "loss": 0.0597, "step": 44810 }, { "epoch": 0.47887173460120736, "grad_norm": 1.1027241945266724, "learning_rate": 9.945899265664106e-07, "loss": 0.0299, "step": 44820 }, { "epoch": 0.4789785779154869, "grad_norm": 1.6598314046859741, "learning_rate": 9.945874614678454e-07, "loss": 0.0762, "step": 44830 }, { "epoch": 0.47908542122976655, "grad_norm": 0.22442562878131866, "learning_rate": 9.945849958108537e-07, "loss": 0.0808, "step": 44840 }, { "epoch": 0.4791922645440462, "grad_norm": 5.686928749084473, "learning_rate": 9.945825295954381e-07, "loss": 0.2029, "step": 44850 }, { "epoch": 0.47929910785832575, "grad_norm": 5.456426620483398, "learning_rate": 9.945800628216015e-07, "loss": 0.0542, "step": 44860 }, { "epoch": 0.47940595117260537, "grad_norm": 4.943167209625244, "learning_rate": 9.945775954893467e-07, "loss": 0.0217, "step": 44870 }, { "epoch": 0.479512794486885, "grad_norm": 0.5235735177993774, "learning_rate": 9.945751275986766e-07, "loss": 0.0354, "step": 44880 }, { "epoch": 0.47961963780116457, "grad_norm": 5.61978816986084, "learning_rate": 9.945726591495939e-07, "loss": 0.0511, "step": 44890 }, { "epoch": 0.4797264811154442, "grad_norm": 5.055930137634277, "learning_rate": 9.94570190142101e-07, "loss": 0.091, "step": 44900 }, { "epoch": 0.4798333244297238, "grad_norm": 9.023307800292969, "learning_rate": 9.945677205762013e-07, "loss": 0.053, "step": 44910 }, { "epoch": 0.47994016774400344, "grad_norm": 4.917418956756592, "learning_rate": 9.945652504518973e-07, "loss": 0.0379, "step": 44920 }, { "epoch": 0.480047011058283, "grad_norm": 4.003932476043701, "learning_rate": 9.945627797691918e-07, "loss": 0.036, "step": 44930 }, { "epoch": 0.48015385437256264, "grad_norm": 2.0948798656463623, "learning_rate": 9.945603085280875e-07, "loss": 0.0709, "step": 44940 }, { "epoch": 0.48026069768684226, "grad_norm": 6.906229496002197, "learning_rate": 9.945578367285874e-07, "loss": 0.089, "step": 44950 }, { "epoch": 0.48036754100112183, "grad_norm": 4.830273151397705, "learning_rate": 9.94555364370694e-07, "loss": 0.0505, "step": 44960 }, { "epoch": 0.48047438431540146, "grad_norm": 0.1887221783399582, "learning_rate": 9.945528914544104e-07, "loss": 0.0855, "step": 44970 }, { "epoch": 0.4805812276296811, "grad_norm": 3.3597843647003174, "learning_rate": 9.945504179797393e-07, "loss": 0.0207, "step": 44980 }, { "epoch": 0.48068807094396065, "grad_norm": 1.9610884189605713, "learning_rate": 9.945479439466833e-07, "loss": 0.1045, "step": 44990 }, { "epoch": 0.4807949142582403, "grad_norm": 3.9184253215789795, "learning_rate": 9.945454693552455e-07, "loss": 0.0385, "step": 45000 }, { "epoch": 0.4809017575725199, "grad_norm": 1.0977287292480469, "learning_rate": 9.945429942054284e-07, "loss": 0.0518, "step": 45010 }, { "epoch": 0.48100860088679953, "grad_norm": 6.466468811035156, "learning_rate": 9.94540518497235e-07, "loss": 0.0618, "step": 45020 }, { "epoch": 0.4811154442010791, "grad_norm": 2.981811285018921, "learning_rate": 9.945380422306681e-07, "loss": 0.0849, "step": 45030 }, { "epoch": 0.4812222875153587, "grad_norm": 3.8234703540802, "learning_rate": 9.945355654057304e-07, "loss": 0.0584, "step": 45040 }, { "epoch": 0.48132913082963835, "grad_norm": 13.794965744018555, "learning_rate": 9.945330880224247e-07, "loss": 0.107, "step": 45050 }, { "epoch": 0.4814359741439179, "grad_norm": 8.88117504119873, "learning_rate": 9.945306100807538e-07, "loss": 0.0877, "step": 45060 }, { "epoch": 0.48154281745819755, "grad_norm": 2.6892852783203125, "learning_rate": 9.945281315807205e-07, "loss": 0.0297, "step": 45070 }, { "epoch": 0.4816496607724772, "grad_norm": 1.3788690567016602, "learning_rate": 9.945256525223276e-07, "loss": 0.054, "step": 45080 }, { "epoch": 0.4817565040867568, "grad_norm": 4.049251079559326, "learning_rate": 9.945231729055781e-07, "loss": 0.0317, "step": 45090 }, { "epoch": 0.48186334740103637, "grad_norm": 14.425333023071289, "learning_rate": 9.945206927304743e-07, "loss": 0.1089, "step": 45100 }, { "epoch": 0.481970190715316, "grad_norm": 9.71670913696289, "learning_rate": 9.945182119970197e-07, "loss": 0.1246, "step": 45110 }, { "epoch": 0.4820770340295956, "grad_norm": 4.153923988342285, "learning_rate": 9.945157307052164e-07, "loss": 0.0567, "step": 45120 }, { "epoch": 0.4821838773438752, "grad_norm": 5.762472629547119, "learning_rate": 9.945132488550676e-07, "loss": 0.131, "step": 45130 }, { "epoch": 0.4822907206581548, "grad_norm": 8.017349243164062, "learning_rate": 9.945107664465762e-07, "loss": 0.0659, "step": 45140 }, { "epoch": 0.48239756397243444, "grad_norm": 9.042156219482422, "learning_rate": 9.945082834797445e-07, "loss": 0.0817, "step": 45150 }, { "epoch": 0.482504407286714, "grad_norm": 6.174088001251221, "learning_rate": 9.945057999545758e-07, "loss": 0.0516, "step": 45160 }, { "epoch": 0.48261125060099364, "grad_norm": 3.3140358924865723, "learning_rate": 9.945033158710728e-07, "loss": 0.0608, "step": 45170 }, { "epoch": 0.48271809391527326, "grad_norm": 3.0027925968170166, "learning_rate": 9.945008312292382e-07, "loss": 0.0449, "step": 45180 }, { "epoch": 0.4828249372295529, "grad_norm": 2.017124652862549, "learning_rate": 9.944983460290747e-07, "loss": 0.0355, "step": 45190 }, { "epoch": 0.48293178054383246, "grad_norm": 4.935925483703613, "learning_rate": 9.944958602705853e-07, "loss": 0.0838, "step": 45200 }, { "epoch": 0.4830386238581121, "grad_norm": 5.741629600524902, "learning_rate": 9.944933739537728e-07, "loss": 0.0247, "step": 45210 }, { "epoch": 0.4831454671723917, "grad_norm": 7.1833696365356445, "learning_rate": 9.944908870786399e-07, "loss": 0.1155, "step": 45220 }, { "epoch": 0.4832523104866713, "grad_norm": 4.841455459594727, "learning_rate": 9.944883996451896e-07, "loss": 0.0869, "step": 45230 }, { "epoch": 0.4833591538009509, "grad_norm": 11.325895309448242, "learning_rate": 9.944859116534245e-07, "loss": 0.081, "step": 45240 }, { "epoch": 0.48346599711523053, "grad_norm": 0.5571889877319336, "learning_rate": 9.944834231033474e-07, "loss": 0.0958, "step": 45250 }, { "epoch": 0.4835728404295101, "grad_norm": 3.1208581924438477, "learning_rate": 9.944809339949613e-07, "loss": 0.062, "step": 45260 }, { "epoch": 0.4836796837437897, "grad_norm": 5.290499210357666, "learning_rate": 9.94478444328269e-07, "loss": 0.094, "step": 45270 }, { "epoch": 0.48378652705806935, "grad_norm": 2.032050132751465, "learning_rate": 9.94475954103273e-07, "loss": 0.0522, "step": 45280 }, { "epoch": 0.483893370372349, "grad_norm": 6.643368721008301, "learning_rate": 9.944734633199766e-07, "loss": 0.069, "step": 45290 }, { "epoch": 0.48400021368662854, "grad_norm": 2.6115074157714844, "learning_rate": 9.94470971978382e-07, "loss": 0.0421, "step": 45300 }, { "epoch": 0.48410705700090817, "grad_norm": 0.36851686239242554, "learning_rate": 9.944684800784926e-07, "loss": 0.0716, "step": 45310 }, { "epoch": 0.4842139003151878, "grad_norm": 2.6124026775360107, "learning_rate": 9.94465987620311e-07, "loss": 0.0501, "step": 45320 }, { "epoch": 0.48432074362946737, "grad_norm": 2.9921298027038574, "learning_rate": 9.944634946038397e-07, "loss": 0.0378, "step": 45330 }, { "epoch": 0.484427586943747, "grad_norm": 2.6343162059783936, "learning_rate": 9.944610010290819e-07, "loss": 0.0619, "step": 45340 }, { "epoch": 0.4845344302580266, "grad_norm": 1.7573473453521729, "learning_rate": 9.944585068960406e-07, "loss": 0.0551, "step": 45350 }, { "epoch": 0.4846412735723062, "grad_norm": 7.1036787033081055, "learning_rate": 9.94456012204718e-07, "loss": 0.0218, "step": 45360 }, { "epoch": 0.4847481168865858, "grad_norm": 4.939263343811035, "learning_rate": 9.944535169551174e-07, "loss": 0.0955, "step": 45370 }, { "epoch": 0.48485496020086544, "grad_norm": 8.010347366333008, "learning_rate": 9.944510211472413e-07, "loss": 0.0724, "step": 45380 }, { "epoch": 0.48496180351514506, "grad_norm": 2.4416816234588623, "learning_rate": 9.944485247810927e-07, "loss": 0.0922, "step": 45390 }, { "epoch": 0.48506864682942463, "grad_norm": 4.896984100341797, "learning_rate": 9.944460278566747e-07, "loss": 0.1211, "step": 45400 }, { "epoch": 0.48517549014370426, "grad_norm": 3.3296737670898438, "learning_rate": 9.944435303739895e-07, "loss": 0.0889, "step": 45410 }, { "epoch": 0.4852823334579839, "grad_norm": 4.765622138977051, "learning_rate": 9.944410323330404e-07, "loss": 0.0636, "step": 45420 }, { "epoch": 0.48538917677226345, "grad_norm": 4.202752113342285, "learning_rate": 9.944385337338298e-07, "loss": 0.0635, "step": 45430 }, { "epoch": 0.4854960200865431, "grad_norm": 7.9954681396484375, "learning_rate": 9.944360345763613e-07, "loss": 0.0828, "step": 45440 }, { "epoch": 0.4856028634008227, "grad_norm": 4.337503433227539, "learning_rate": 9.944335348606368e-07, "loss": 0.0556, "step": 45450 }, { "epoch": 0.48570970671510233, "grad_norm": 1.9513074159622192, "learning_rate": 9.944310345866593e-07, "loss": 0.05, "step": 45460 }, { "epoch": 0.4858165500293819, "grad_norm": 10.536066055297852, "learning_rate": 9.944285337544324e-07, "loss": 0.0819, "step": 45470 }, { "epoch": 0.4859233933436615, "grad_norm": 4.2356767654418945, "learning_rate": 9.94426032363958e-07, "loss": 0.0535, "step": 45480 }, { "epoch": 0.48603023665794115, "grad_norm": 5.306307792663574, "learning_rate": 9.944235304152395e-07, "loss": 0.0651, "step": 45490 }, { "epoch": 0.4861370799722207, "grad_norm": 3.6552517414093018, "learning_rate": 9.944210279082795e-07, "loss": 0.068, "step": 45500 }, { "epoch": 0.48624392328650035, "grad_norm": 0.28176671266555786, "learning_rate": 9.944185248430807e-07, "loss": 0.0966, "step": 45510 }, { "epoch": 0.48635076660077997, "grad_norm": 0.0689936950802803, "learning_rate": 9.944160212196462e-07, "loss": 0.0578, "step": 45520 }, { "epoch": 0.48645760991505954, "grad_norm": 0.3080001175403595, "learning_rate": 9.944135170379784e-07, "loss": 0.0637, "step": 45530 }, { "epoch": 0.48656445322933917, "grad_norm": 5.879272937774658, "learning_rate": 9.944110122980807e-07, "loss": 0.0377, "step": 45540 }, { "epoch": 0.4866712965436188, "grad_norm": 3.8638651371002197, "learning_rate": 9.944085069999557e-07, "loss": 0.0659, "step": 45550 }, { "epoch": 0.4867781398578984, "grad_norm": 4.391874313354492, "learning_rate": 9.94406001143606e-07, "loss": 0.0892, "step": 45560 }, { "epoch": 0.486884983172178, "grad_norm": 4.248264312744141, "learning_rate": 9.944034947290348e-07, "loss": 0.0522, "step": 45570 }, { "epoch": 0.4869918264864576, "grad_norm": 1.384369134902954, "learning_rate": 9.944009877562446e-07, "loss": 0.0834, "step": 45580 }, { "epoch": 0.48709866980073724, "grad_norm": 5.950744152069092, "learning_rate": 9.943984802252385e-07, "loss": 0.0655, "step": 45590 }, { "epoch": 0.4872055131150168, "grad_norm": 4.822970390319824, "learning_rate": 9.94395972136019e-07, "loss": 0.0651, "step": 45600 }, { "epoch": 0.48731235642929643, "grad_norm": 4.547923564910889, "learning_rate": 9.943934634885894e-07, "loss": 0.0472, "step": 45610 }, { "epoch": 0.48741919974357606, "grad_norm": 6.1403117179870605, "learning_rate": 9.94390954282952e-07, "loss": 0.0352, "step": 45620 }, { "epoch": 0.48752604305785563, "grad_norm": 3.6398377418518066, "learning_rate": 9.9438844451911e-07, "loss": 0.0785, "step": 45630 }, { "epoch": 0.48763288637213525, "grad_norm": 0.10783490538597107, "learning_rate": 9.943859341970662e-07, "loss": 0.0491, "step": 45640 }, { "epoch": 0.4877397296864149, "grad_norm": 5.453753471374512, "learning_rate": 9.943834233168232e-07, "loss": 0.0707, "step": 45650 }, { "epoch": 0.4878465730006945, "grad_norm": 0.3300391435623169, "learning_rate": 9.94380911878384e-07, "loss": 0.0353, "step": 45660 }, { "epoch": 0.4879534163149741, "grad_norm": 8.235820770263672, "learning_rate": 9.943783998817516e-07, "loss": 0.0783, "step": 45670 }, { "epoch": 0.4880602596292537, "grad_norm": 0.5114448070526123, "learning_rate": 9.943758873269287e-07, "loss": 0.1102, "step": 45680 }, { "epoch": 0.4881671029435333, "grad_norm": 1.045058250427246, "learning_rate": 9.94373374213918e-07, "loss": 0.0374, "step": 45690 }, { "epoch": 0.4882739462578129, "grad_norm": 0.586676836013794, "learning_rate": 9.943708605427225e-07, "loss": 0.0429, "step": 45700 }, { "epoch": 0.4883807895720925, "grad_norm": 3.453284978866577, "learning_rate": 9.94368346313345e-07, "loss": 0.087, "step": 45710 }, { "epoch": 0.48848763288637215, "grad_norm": 4.915791988372803, "learning_rate": 9.943658315257883e-07, "loss": 0.0442, "step": 45720 }, { "epoch": 0.4885944762006517, "grad_norm": 0.31672903895378113, "learning_rate": 9.943633161800554e-07, "loss": 0.035, "step": 45730 }, { "epoch": 0.48870131951493134, "grad_norm": 2.8640594482421875, "learning_rate": 9.943608002761487e-07, "loss": 0.0358, "step": 45740 }, { "epoch": 0.48880816282921097, "grad_norm": 14.694345474243164, "learning_rate": 9.943582838140716e-07, "loss": 0.0778, "step": 45750 }, { "epoch": 0.4889150061434906, "grad_norm": 1.7553828954696655, "learning_rate": 9.943557667938266e-07, "loss": 0.0705, "step": 45760 }, { "epoch": 0.48902184945777016, "grad_norm": 2.454974412918091, "learning_rate": 9.943532492154167e-07, "loss": 0.1094, "step": 45770 }, { "epoch": 0.4891286927720498, "grad_norm": 2.502998113632202, "learning_rate": 9.943507310788446e-07, "loss": 0.0623, "step": 45780 }, { "epoch": 0.4892355360863294, "grad_norm": 10.343847274780273, "learning_rate": 9.94348212384113e-07, "loss": 0.0781, "step": 45790 }, { "epoch": 0.489342379400609, "grad_norm": 7.014061450958252, "learning_rate": 9.943456931312253e-07, "loss": 0.0481, "step": 45800 }, { "epoch": 0.4894492227148886, "grad_norm": 1.8322709798812866, "learning_rate": 9.94343173320184e-07, "loss": 0.1372, "step": 45810 }, { "epoch": 0.48955606602916824, "grad_norm": 4.535898685455322, "learning_rate": 9.943406529509917e-07, "loss": 0.0727, "step": 45820 }, { "epoch": 0.48966290934344786, "grad_norm": 3.1310462951660156, "learning_rate": 9.943381320236517e-07, "loss": 0.0457, "step": 45830 }, { "epoch": 0.48976975265772743, "grad_norm": 4.483223915100098, "learning_rate": 9.943356105381666e-07, "loss": 0.0825, "step": 45840 }, { "epoch": 0.48987659597200706, "grad_norm": 4.538271903991699, "learning_rate": 9.94333088494539e-07, "loss": 0.0555, "step": 45850 }, { "epoch": 0.4899834392862867, "grad_norm": 6.99383020401001, "learning_rate": 9.943305658927725e-07, "loss": 0.0738, "step": 45860 }, { "epoch": 0.49009028260056625, "grad_norm": 6.670404434204102, "learning_rate": 9.943280427328692e-07, "loss": 0.0827, "step": 45870 }, { "epoch": 0.4901971259148459, "grad_norm": 3.016026020050049, "learning_rate": 9.943255190148323e-07, "loss": 0.0348, "step": 45880 }, { "epoch": 0.4903039692291255, "grad_norm": 0.4022600054740906, "learning_rate": 9.943229947386645e-07, "loss": 0.0723, "step": 45890 }, { "epoch": 0.4904108125434051, "grad_norm": 11.130544662475586, "learning_rate": 9.943204699043688e-07, "loss": 0.0642, "step": 45900 }, { "epoch": 0.4905176558576847, "grad_norm": 1.793514370918274, "learning_rate": 9.94317944511948e-07, "loss": 0.0488, "step": 45910 }, { "epoch": 0.4906244991719643, "grad_norm": 0.37964770197868347, "learning_rate": 9.94315418561405e-07, "loss": 0.0717, "step": 45920 }, { "epoch": 0.49073134248624395, "grad_norm": 0.6030840277671814, "learning_rate": 9.943128920527423e-07, "loss": 0.0934, "step": 45930 }, { "epoch": 0.4908381858005235, "grad_norm": 3.0123069286346436, "learning_rate": 9.943103649859634e-07, "loss": 0.0321, "step": 45940 }, { "epoch": 0.49094502911480314, "grad_norm": 0.297217458486557, "learning_rate": 9.943078373610706e-07, "loss": 0.0621, "step": 45950 }, { "epoch": 0.49105187242908277, "grad_norm": 8.982544898986816, "learning_rate": 9.94305309178067e-07, "loss": 0.0915, "step": 45960 }, { "epoch": 0.49115871574336234, "grad_norm": 10.844429969787598, "learning_rate": 9.943027804369552e-07, "loss": 0.0846, "step": 45970 }, { "epoch": 0.49126555905764197, "grad_norm": 4.646797180175781, "learning_rate": 9.943002511377385e-07, "loss": 0.04, "step": 45980 }, { "epoch": 0.4913724023719216, "grad_norm": 0.15787345170974731, "learning_rate": 9.942977212804196e-07, "loss": 0.0421, "step": 45990 }, { "epoch": 0.49147924568620116, "grad_norm": 4.173447132110596, "learning_rate": 9.94295190865001e-07, "loss": 0.1062, "step": 46000 }, { "epoch": 0.4915860890004808, "grad_norm": 12.776180267333984, "learning_rate": 9.94292659891486e-07, "loss": 0.0771, "step": 46010 }, { "epoch": 0.4916929323147604, "grad_norm": 0.20542065799236298, "learning_rate": 9.942901283598771e-07, "loss": 0.0327, "step": 46020 }, { "epoch": 0.49179977562904004, "grad_norm": 4.140134334564209, "learning_rate": 9.942875962701775e-07, "loss": 0.0395, "step": 46030 }, { "epoch": 0.4919066189433196, "grad_norm": 0.7030633687973022, "learning_rate": 9.9428506362239e-07, "loss": 0.0274, "step": 46040 }, { "epoch": 0.49201346225759923, "grad_norm": 0.19458244740962982, "learning_rate": 9.94282530416517e-07, "loss": 0.0351, "step": 46050 }, { "epoch": 0.49212030557187886, "grad_norm": 4.2714643478393555, "learning_rate": 9.942799966525618e-07, "loss": 0.0606, "step": 46060 }, { "epoch": 0.4922271488861584, "grad_norm": 4.704801559448242, "learning_rate": 9.942774623305274e-07, "loss": 0.033, "step": 46070 }, { "epoch": 0.49233399220043805, "grad_norm": 2.8339498043060303, "learning_rate": 9.942749274504163e-07, "loss": 0.0285, "step": 46080 }, { "epoch": 0.4924408355147177, "grad_norm": 0.3401469588279724, "learning_rate": 9.942723920122315e-07, "loss": 0.0598, "step": 46090 }, { "epoch": 0.49254767882899725, "grad_norm": 1.1578357219696045, "learning_rate": 9.94269856015976e-07, "loss": 0.0375, "step": 46100 }, { "epoch": 0.4926545221432769, "grad_norm": 22.752649307250977, "learning_rate": 9.942673194616525e-07, "loss": 0.1612, "step": 46110 }, { "epoch": 0.4927613654575565, "grad_norm": 6.805580139160156, "learning_rate": 9.942647823492638e-07, "loss": 0.1638, "step": 46120 }, { "epoch": 0.4928682087718361, "grad_norm": 0.5313088297843933, "learning_rate": 9.942622446788128e-07, "loss": 0.0475, "step": 46130 }, { "epoch": 0.4929750520861157, "grad_norm": 2.29673171043396, "learning_rate": 9.942597064503025e-07, "loss": 0.0922, "step": 46140 }, { "epoch": 0.4930818954003953, "grad_norm": 1.7649240493774414, "learning_rate": 9.942571676637358e-07, "loss": 0.0663, "step": 46150 }, { "epoch": 0.49318873871467495, "grad_norm": 4.141146183013916, "learning_rate": 9.942546283191153e-07, "loss": 0.044, "step": 46160 }, { "epoch": 0.4932955820289545, "grad_norm": 0.5169604420661926, "learning_rate": 9.94252088416444e-07, "loss": 0.0749, "step": 46170 }, { "epoch": 0.49340242534323414, "grad_norm": 2.99605131149292, "learning_rate": 9.942495479557248e-07, "loss": 0.0656, "step": 46180 }, { "epoch": 0.49350926865751377, "grad_norm": 14.416366577148438, "learning_rate": 9.942470069369607e-07, "loss": 0.0897, "step": 46190 }, { "epoch": 0.4936161119717934, "grad_norm": 14.838985443115234, "learning_rate": 9.942444653601544e-07, "loss": 0.0807, "step": 46200 }, { "epoch": 0.49372295528607296, "grad_norm": 4.055057525634766, "learning_rate": 9.942419232253087e-07, "loss": 0.046, "step": 46210 }, { "epoch": 0.4938297986003526, "grad_norm": 5.352225303649902, "learning_rate": 9.942393805324266e-07, "loss": 0.0565, "step": 46220 }, { "epoch": 0.4939366419146322, "grad_norm": 3.5044424533843994, "learning_rate": 9.94236837281511e-07, "loss": 0.0555, "step": 46230 }, { "epoch": 0.4940434852289118, "grad_norm": 7.992396354675293, "learning_rate": 9.942342934725647e-07, "loss": 0.079, "step": 46240 }, { "epoch": 0.4941503285431914, "grad_norm": 10.484291076660156, "learning_rate": 9.942317491055905e-07, "loss": 0.031, "step": 46250 }, { "epoch": 0.49425717185747103, "grad_norm": 5.747292995452881, "learning_rate": 9.942292041805912e-07, "loss": 0.0323, "step": 46260 }, { "epoch": 0.4943640151717506, "grad_norm": 7.393857002258301, "learning_rate": 9.942266586975703e-07, "loss": 0.1097, "step": 46270 }, { "epoch": 0.49447085848603023, "grad_norm": 8.896150588989258, "learning_rate": 9.9422411265653e-07, "loss": 0.0769, "step": 46280 }, { "epoch": 0.49457770180030985, "grad_norm": 2.523561477661133, "learning_rate": 9.942215660574731e-07, "loss": 0.0783, "step": 46290 }, { "epoch": 0.4946845451145895, "grad_norm": 6.41712760925293, "learning_rate": 9.94219018900403e-07, "loss": 0.1103, "step": 46300 }, { "epoch": 0.49479138842886905, "grad_norm": 6.830181121826172, "learning_rate": 9.942164711853222e-07, "loss": 0.0598, "step": 46310 }, { "epoch": 0.4948982317431487, "grad_norm": 6.400134086608887, "learning_rate": 9.942139229122338e-07, "loss": 0.0534, "step": 46320 }, { "epoch": 0.4950050750574283, "grad_norm": 4.780030250549316, "learning_rate": 9.942113740811406e-07, "loss": 0.0749, "step": 46330 }, { "epoch": 0.49511191837170787, "grad_norm": 12.315019607543945, "learning_rate": 9.942088246920452e-07, "loss": 0.1001, "step": 46340 }, { "epoch": 0.4952187616859875, "grad_norm": 22.617008209228516, "learning_rate": 9.942062747449511e-07, "loss": 0.1461, "step": 46350 }, { "epoch": 0.4953256050002671, "grad_norm": 7.000864028930664, "learning_rate": 9.942037242398606e-07, "loss": 0.1124, "step": 46360 }, { "epoch": 0.4954324483145467, "grad_norm": 3.2098424434661865, "learning_rate": 9.94201173176777e-07, "loss": 0.069, "step": 46370 }, { "epoch": 0.4955392916288263, "grad_norm": 5.527942180633545, "learning_rate": 9.941986215557029e-07, "loss": 0.056, "step": 46380 }, { "epoch": 0.49564613494310594, "grad_norm": 7.656278610229492, "learning_rate": 9.941960693766412e-07, "loss": 0.0593, "step": 46390 }, { "epoch": 0.49575297825738557, "grad_norm": 5.065549373626709, "learning_rate": 9.941935166395947e-07, "loss": 0.0503, "step": 46400 }, { "epoch": 0.49585982157166514, "grad_norm": 3.958540916442871, "learning_rate": 9.941909633445665e-07, "loss": 0.0382, "step": 46410 }, { "epoch": 0.49596666488594476, "grad_norm": 4.525500774383545, "learning_rate": 9.941884094915596e-07, "loss": 0.11, "step": 46420 }, { "epoch": 0.4960735082002244, "grad_norm": 2.349195957183838, "learning_rate": 9.941858550805763e-07, "loss": 0.075, "step": 46430 }, { "epoch": 0.49618035151450396, "grad_norm": 4.162837982177734, "learning_rate": 9.941833001116202e-07, "loss": 0.0503, "step": 46440 }, { "epoch": 0.4962871948287836, "grad_norm": 10.238779067993164, "learning_rate": 9.94180744584694e-07, "loss": 0.0446, "step": 46450 }, { "epoch": 0.4963940381430632, "grad_norm": 6.715668201446533, "learning_rate": 9.941781884998e-07, "loss": 0.0644, "step": 46460 }, { "epoch": 0.4965008814573428, "grad_norm": 0.4309215843677521, "learning_rate": 9.94175631856942e-07, "loss": 0.0267, "step": 46470 }, { "epoch": 0.4966077247716224, "grad_norm": 1.5939027070999146, "learning_rate": 9.94173074656122e-07, "loss": 0.0682, "step": 46480 }, { "epoch": 0.49671456808590203, "grad_norm": 4.836390495300293, "learning_rate": 9.941705168973435e-07, "loss": 0.0868, "step": 46490 }, { "epoch": 0.49682141140018166, "grad_norm": 0.39089593291282654, "learning_rate": 9.94167958580609e-07, "loss": 0.0261, "step": 46500 }, { "epoch": 0.4969282547144612, "grad_norm": 0.15348908305168152, "learning_rate": 9.941653997059218e-07, "loss": 0.0486, "step": 46510 }, { "epoch": 0.49703509802874085, "grad_norm": 10.903852462768555, "learning_rate": 9.941628402732846e-07, "loss": 0.0735, "step": 46520 }, { "epoch": 0.4971419413430205, "grad_norm": 5.802734375, "learning_rate": 9.941602802827e-07, "loss": 0.0953, "step": 46530 }, { "epoch": 0.49724878465730005, "grad_norm": 15.298409461975098, "learning_rate": 9.941577197341713e-07, "loss": 0.1301, "step": 46540 }, { "epoch": 0.4973556279715797, "grad_norm": 1.2625529766082764, "learning_rate": 9.941551586277013e-07, "loss": 0.0618, "step": 46550 }, { "epoch": 0.4974624712858593, "grad_norm": 20.46965980529785, "learning_rate": 9.941525969632928e-07, "loss": 0.0736, "step": 46560 }, { "epoch": 0.4975693146001389, "grad_norm": 0.7871448397636414, "learning_rate": 9.941500347409488e-07, "loss": 0.0619, "step": 46570 }, { "epoch": 0.4976761579144185, "grad_norm": 3.3652498722076416, "learning_rate": 9.94147471960672e-07, "loss": 0.0334, "step": 46580 }, { "epoch": 0.4977830012286981, "grad_norm": 5.51565408706665, "learning_rate": 9.941449086224655e-07, "loss": 0.035, "step": 46590 }, { "epoch": 0.49788984454297774, "grad_norm": 4.71399450302124, "learning_rate": 9.941423447263318e-07, "loss": 0.045, "step": 46600 }, { "epoch": 0.4979966878572573, "grad_norm": 1.4637161493301392, "learning_rate": 9.941397802722743e-07, "loss": 0.0438, "step": 46610 }, { "epoch": 0.49810353117153694, "grad_norm": 2.951052188873291, "learning_rate": 9.94137215260296e-07, "loss": 0.0883, "step": 46620 }, { "epoch": 0.49821037448581657, "grad_norm": 11.602792739868164, "learning_rate": 9.941346496903991e-07, "loss": 0.0591, "step": 46630 }, { "epoch": 0.49831721780009613, "grad_norm": 4.1251373291015625, "learning_rate": 9.941320835625867e-07, "loss": 0.0731, "step": 46640 }, { "epoch": 0.49842406111437576, "grad_norm": 2.8567497730255127, "learning_rate": 9.941295168768623e-07, "loss": 0.0585, "step": 46650 }, { "epoch": 0.4985309044286554, "grad_norm": 1.7779545783996582, "learning_rate": 9.941269496332282e-07, "loss": 0.0742, "step": 46660 }, { "epoch": 0.498637747742935, "grad_norm": 21.399446487426758, "learning_rate": 9.941243818316876e-07, "loss": 0.1516, "step": 46670 }, { "epoch": 0.4987445910572146, "grad_norm": 3.312063694000244, "learning_rate": 9.941218134722433e-07, "loss": 0.0627, "step": 46680 }, { "epoch": 0.4988514343714942, "grad_norm": 16.83258056640625, "learning_rate": 9.941192445548981e-07, "loss": 0.1282, "step": 46690 }, { "epoch": 0.49895827768577383, "grad_norm": 7.828887462615967, "learning_rate": 9.94116675079655e-07, "loss": 0.081, "step": 46700 }, { "epoch": 0.4990651210000534, "grad_norm": 4.0803022384643555, "learning_rate": 9.941141050465169e-07, "loss": 0.0462, "step": 46710 }, { "epoch": 0.499171964314333, "grad_norm": 1.7259234189987183, "learning_rate": 9.941115344554865e-07, "loss": 0.0626, "step": 46720 }, { "epoch": 0.49927880762861265, "grad_norm": 0.665280818939209, "learning_rate": 9.941089633065672e-07, "loss": 0.0803, "step": 46730 }, { "epoch": 0.4993856509428922, "grad_norm": 1.2437797784805298, "learning_rate": 9.941063915997613e-07, "loss": 0.0577, "step": 46740 }, { "epoch": 0.49949249425717185, "grad_norm": 0.49661844968795776, "learning_rate": 9.941038193350722e-07, "loss": 0.0441, "step": 46750 }, { "epoch": 0.4995993375714515, "grad_norm": 16.1831111907959, "learning_rate": 9.941012465125026e-07, "loss": 0.048, "step": 46760 }, { "epoch": 0.4997061808857311, "grad_norm": 4.925310134887695, "learning_rate": 9.94098673132055e-07, "loss": 0.0715, "step": 46770 }, { "epoch": 0.49981302420001067, "grad_norm": 1.6474570035934448, "learning_rate": 9.94096099193733e-07, "loss": 0.076, "step": 46780 }, { "epoch": 0.4999198675142903, "grad_norm": 6.221865177154541, "learning_rate": 9.940935246975392e-07, "loss": 0.0297, "step": 46790 }, { "epoch": 0.5000267108285699, "grad_norm": 5.55496883392334, "learning_rate": 9.940909496434766e-07, "loss": 0.0528, "step": 46800 }, { "epoch": 0.5001335541428495, "grad_norm": 4.751633167266846, "learning_rate": 9.94088374031548e-07, "loss": 0.0244, "step": 46810 }, { "epoch": 0.5002403974571291, "grad_norm": 7.684353351593018, "learning_rate": 9.940857978617565e-07, "loss": 0.1291, "step": 46820 }, { "epoch": 0.5003472407714087, "grad_norm": 2.7641797065734863, "learning_rate": 9.940832211341045e-07, "loss": 0.1225, "step": 46830 }, { "epoch": 0.5004540840856884, "grad_norm": 6.353870391845703, "learning_rate": 9.940806438485954e-07, "loss": 0.0813, "step": 46840 }, { "epoch": 0.5005609273999679, "grad_norm": 8.610542297363281, "learning_rate": 9.94078066005232e-07, "loss": 0.0603, "step": 46850 }, { "epoch": 0.5006677707142475, "grad_norm": 2.1009528636932373, "learning_rate": 9.940754876040171e-07, "loss": 0.0304, "step": 46860 }, { "epoch": 0.5007746140285272, "grad_norm": 2.3779349327087402, "learning_rate": 9.940729086449537e-07, "loss": 0.0513, "step": 46870 }, { "epoch": 0.5008814573428068, "grad_norm": 0.3560081422328949, "learning_rate": 9.940703291280447e-07, "loss": 0.1259, "step": 46880 }, { "epoch": 0.5009883006570864, "grad_norm": 4.274011611938477, "learning_rate": 9.940677490532931e-07, "loss": 0.0388, "step": 46890 }, { "epoch": 0.501095143971366, "grad_norm": 1.0254935026168823, "learning_rate": 9.940651684207016e-07, "loss": 0.0641, "step": 46900 }, { "epoch": 0.5012019872856456, "grad_norm": 5.593594551086426, "learning_rate": 9.940625872302734e-07, "loss": 0.0534, "step": 46910 }, { "epoch": 0.5013088305999253, "grad_norm": 1.5328986644744873, "learning_rate": 9.94060005482011e-07, "loss": 0.0621, "step": 46920 }, { "epoch": 0.5014156739142048, "grad_norm": 0.4422934949398041, "learning_rate": 9.940574231759177e-07, "loss": 0.0406, "step": 46930 }, { "epoch": 0.5015225172284844, "grad_norm": 4.660633087158203, "learning_rate": 9.940548403119963e-07, "loss": 0.0361, "step": 46940 }, { "epoch": 0.5016293605427641, "grad_norm": 4.562511920928955, "learning_rate": 9.940522568902498e-07, "loss": 0.0579, "step": 46950 }, { "epoch": 0.5017362038570436, "grad_norm": 13.762687683105469, "learning_rate": 9.940496729106808e-07, "loss": 0.1247, "step": 46960 }, { "epoch": 0.5018430471713232, "grad_norm": 6.033919334411621, "learning_rate": 9.940470883732926e-07, "loss": 0.0591, "step": 46970 }, { "epoch": 0.5019498904856029, "grad_norm": 11.201345443725586, "learning_rate": 9.940445032780878e-07, "loss": 0.0711, "step": 46980 }, { "epoch": 0.5020567337998825, "grad_norm": 5.310857772827148, "learning_rate": 9.940419176250697e-07, "loss": 0.0735, "step": 46990 }, { "epoch": 0.502163577114162, "grad_norm": 8.066424369812012, "learning_rate": 9.940393314142408e-07, "loss": 0.0465, "step": 47000 }, { "epoch": 0.5022704204284417, "grad_norm": 0.3482680022716522, "learning_rate": 9.94036744645604e-07, "loss": 0.0258, "step": 47010 }, { "epoch": 0.5023772637427213, "grad_norm": 10.214625358581543, "learning_rate": 9.94034157319163e-07, "loss": 0.1404, "step": 47020 }, { "epoch": 0.5024841070570009, "grad_norm": 8.190669059753418, "learning_rate": 9.940315694349196e-07, "loss": 0.0613, "step": 47030 }, { "epoch": 0.5025909503712805, "grad_norm": 0.10252641141414642, "learning_rate": 9.940289809928776e-07, "loss": 0.0551, "step": 47040 }, { "epoch": 0.5026977936855601, "grad_norm": 3.2629754543304443, "learning_rate": 9.940263919930395e-07, "loss": 0.0645, "step": 47050 }, { "epoch": 0.5028046369998397, "grad_norm": 2.5189096927642822, "learning_rate": 9.940238024354083e-07, "loss": 0.0688, "step": 47060 }, { "epoch": 0.5029114803141194, "grad_norm": 5.646570205688477, "learning_rate": 9.940212123199868e-07, "loss": 0.0438, "step": 47070 }, { "epoch": 0.5030183236283989, "grad_norm": 1.2598068714141846, "learning_rate": 9.940186216467783e-07, "loss": 0.0547, "step": 47080 }, { "epoch": 0.5031251669426786, "grad_norm": 21.697446823120117, "learning_rate": 9.940160304157854e-07, "loss": 0.0774, "step": 47090 }, { "epoch": 0.5032320102569582, "grad_norm": 2.748431444168091, "learning_rate": 9.940134386270112e-07, "loss": 0.0491, "step": 47100 }, { "epoch": 0.5033388535712378, "grad_norm": 3.692765951156616, "learning_rate": 9.940108462804584e-07, "loss": 0.0375, "step": 47110 }, { "epoch": 0.5034456968855174, "grad_norm": 0.4178506135940552, "learning_rate": 9.940082533761302e-07, "loss": 0.0414, "step": 47120 }, { "epoch": 0.503552540199797, "grad_norm": 22.528419494628906, "learning_rate": 9.940056599140293e-07, "loss": 0.0547, "step": 47130 }, { "epoch": 0.5036593835140766, "grad_norm": 7.736530303955078, "learning_rate": 9.940030658941588e-07, "loss": 0.1269, "step": 47140 }, { "epoch": 0.5037662268283563, "grad_norm": 1.686167597770691, "learning_rate": 9.940004713165214e-07, "loss": 0.0346, "step": 47150 }, { "epoch": 0.5038730701426358, "grad_norm": 12.870863914489746, "learning_rate": 9.939978761811203e-07, "loss": 0.0484, "step": 47160 }, { "epoch": 0.5039799134569154, "grad_norm": 8.69678783416748, "learning_rate": 9.939952804879583e-07, "loss": 0.111, "step": 47170 }, { "epoch": 0.5040867567711951, "grad_norm": 4.703291416168213, "learning_rate": 9.939926842370384e-07, "loss": 0.0501, "step": 47180 }, { "epoch": 0.5041936000854746, "grad_norm": 7.569994926452637, "learning_rate": 9.939900874283635e-07, "loss": 0.0177, "step": 47190 }, { "epoch": 0.5043004433997542, "grad_norm": 1.9196566343307495, "learning_rate": 9.939874900619364e-07, "loss": 0.0273, "step": 47200 }, { "epoch": 0.5044072867140339, "grad_norm": 5.967081546783447, "learning_rate": 9.939848921377601e-07, "loss": 0.084, "step": 47210 }, { "epoch": 0.5045141300283135, "grad_norm": 1.3773152828216553, "learning_rate": 9.939822936558377e-07, "loss": 0.0985, "step": 47220 }, { "epoch": 0.504620973342593, "grad_norm": 4.418347358703613, "learning_rate": 9.93979694616172e-07, "loss": 0.0718, "step": 47230 }, { "epoch": 0.5047278166568727, "grad_norm": 0.38660550117492676, "learning_rate": 9.939770950187658e-07, "loss": 0.0867, "step": 47240 }, { "epoch": 0.5048346599711523, "grad_norm": 11.837910652160645, "learning_rate": 9.939744948636225e-07, "loss": 0.0709, "step": 47250 }, { "epoch": 0.504941503285432, "grad_norm": 5.5736236572265625, "learning_rate": 9.939718941507444e-07, "loss": 0.0785, "step": 47260 }, { "epoch": 0.5050483465997115, "grad_norm": 1.9462529420852661, "learning_rate": 9.939692928801347e-07, "loss": 0.0873, "step": 47270 }, { "epoch": 0.5051551899139911, "grad_norm": 9.859810829162598, "learning_rate": 9.939666910517966e-07, "loss": 0.1112, "step": 47280 }, { "epoch": 0.5052620332282708, "grad_norm": 7.19001579284668, "learning_rate": 9.939640886657328e-07, "loss": 0.0444, "step": 47290 }, { "epoch": 0.5053688765425504, "grad_norm": 5.201794624328613, "learning_rate": 9.939614857219462e-07, "loss": 0.0564, "step": 47300 }, { "epoch": 0.5054757198568299, "grad_norm": 5.532021999359131, "learning_rate": 9.939588822204398e-07, "loss": 0.0608, "step": 47310 }, { "epoch": 0.5055825631711096, "grad_norm": 5.749303340911865, "learning_rate": 9.939562781612166e-07, "loss": 0.0619, "step": 47320 }, { "epoch": 0.5056894064853892, "grad_norm": 4.717123985290527, "learning_rate": 9.939536735442793e-07, "loss": 0.0819, "step": 47330 }, { "epoch": 0.5057962497996688, "grad_norm": 0.8342215418815613, "learning_rate": 9.939510683696313e-07, "loss": 0.08, "step": 47340 }, { "epoch": 0.5059030931139484, "grad_norm": 4.52409553527832, "learning_rate": 9.939484626372751e-07, "loss": 0.0387, "step": 47350 }, { "epoch": 0.506009936428228, "grad_norm": 4.708372116088867, "learning_rate": 9.939458563472139e-07, "loss": 0.0564, "step": 47360 }, { "epoch": 0.5061167797425076, "grad_norm": 2.697094202041626, "learning_rate": 9.939432494994504e-07, "loss": 0.0373, "step": 47370 }, { "epoch": 0.5062236230567873, "grad_norm": 0.5736643075942993, "learning_rate": 9.939406420939878e-07, "loss": 0.1148, "step": 47380 }, { "epoch": 0.5063304663710668, "grad_norm": 7.299639701843262, "learning_rate": 9.93938034130829e-07, "loss": 0.0988, "step": 47390 }, { "epoch": 0.5064373096853464, "grad_norm": 5.310299396514893, "learning_rate": 9.939354256099766e-07, "loss": 0.1075, "step": 47400 }, { "epoch": 0.5065441529996261, "grad_norm": 1.9273823499679565, "learning_rate": 9.939328165314341e-07, "loss": 0.0347, "step": 47410 }, { "epoch": 0.5066509963139056, "grad_norm": 8.811335563659668, "learning_rate": 9.93930206895204e-07, "loss": 0.0544, "step": 47420 }, { "epoch": 0.5067578396281852, "grad_norm": 6.392253398895264, "learning_rate": 9.939275967012896e-07, "loss": 0.1689, "step": 47430 }, { "epoch": 0.5068646829424649, "grad_norm": 0.18738298118114471, "learning_rate": 9.939249859496936e-07, "loss": 0.0707, "step": 47440 }, { "epoch": 0.5069715262567445, "grad_norm": 10.73447036743164, "learning_rate": 9.93922374640419e-07, "loss": 0.0741, "step": 47450 }, { "epoch": 0.5070783695710241, "grad_norm": 0.5125343799591064, "learning_rate": 9.939197627734686e-07, "loss": 0.0251, "step": 47460 }, { "epoch": 0.5071852128853037, "grad_norm": 0.6637358665466309, "learning_rate": 9.939171503488457e-07, "loss": 0.0503, "step": 47470 }, { "epoch": 0.5072920561995833, "grad_norm": 3.3578124046325684, "learning_rate": 9.939145373665531e-07, "loss": 0.1042, "step": 47480 }, { "epoch": 0.507398899513863, "grad_norm": 0.8280336856842041, "learning_rate": 9.939119238265937e-07, "loss": 0.0311, "step": 47490 }, { "epoch": 0.5075057428281425, "grad_norm": 2.456437110900879, "learning_rate": 9.939093097289702e-07, "loss": 0.0674, "step": 47500 }, { "epoch": 0.5076125861424221, "grad_norm": 14.283388137817383, "learning_rate": 9.939066950736861e-07, "loss": 0.1042, "step": 47510 }, { "epoch": 0.5077194294567018, "grad_norm": 0.7473019957542419, "learning_rate": 9.939040798607439e-07, "loss": 0.0458, "step": 47520 }, { "epoch": 0.5078262727709814, "grad_norm": 8.326225280761719, "learning_rate": 9.93901464090147e-07, "loss": 0.0974, "step": 47530 }, { "epoch": 0.5079331160852609, "grad_norm": 7.667551040649414, "learning_rate": 9.938988477618977e-07, "loss": 0.075, "step": 47540 }, { "epoch": 0.5080399593995406, "grad_norm": 6.068304538726807, "learning_rate": 9.938962308759994e-07, "loss": 0.184, "step": 47550 }, { "epoch": 0.5081468027138202, "grad_norm": 1.5430448055267334, "learning_rate": 9.93893613432455e-07, "loss": 0.1158, "step": 47560 }, { "epoch": 0.5082536460280997, "grad_norm": 6.186598300933838, "learning_rate": 9.938909954312674e-07, "loss": 0.0616, "step": 47570 }, { "epoch": 0.5083604893423794, "grad_norm": 0.8309311866760254, "learning_rate": 9.938883768724398e-07, "loss": 0.082, "step": 47580 }, { "epoch": 0.508467332656659, "grad_norm": 0.24976320564746857, "learning_rate": 9.938857577559748e-07, "loss": 0.0779, "step": 47590 }, { "epoch": 0.5085741759709386, "grad_norm": 6.0741963386535645, "learning_rate": 9.938831380818754e-07, "loss": 0.1334, "step": 47600 }, { "epoch": 0.5086810192852182, "grad_norm": 3.971909999847412, "learning_rate": 9.93880517850145e-07, "loss": 0.0525, "step": 47610 }, { "epoch": 0.5087878625994978, "grad_norm": 0.10580061376094818, "learning_rate": 9.93877897060786e-07, "loss": 0.0444, "step": 47620 }, { "epoch": 0.5088947059137775, "grad_norm": 1.6493991613388062, "learning_rate": 9.938752757138016e-07, "loss": 0.1073, "step": 47630 }, { "epoch": 0.5090015492280571, "grad_norm": 14.517300605773926, "learning_rate": 9.938726538091946e-07, "loss": 0.0724, "step": 47640 }, { "epoch": 0.5091083925423366, "grad_norm": 8.172855377197266, "learning_rate": 9.938700313469684e-07, "loss": 0.0548, "step": 47650 }, { "epoch": 0.5092152358566163, "grad_norm": 2.7381935119628906, "learning_rate": 9.938674083271255e-07, "loss": 0.0585, "step": 47660 }, { "epoch": 0.5093220791708959, "grad_norm": 8.809667587280273, "learning_rate": 9.93864784749669e-07, "loss": 0.0273, "step": 47670 }, { "epoch": 0.5094289224851755, "grad_norm": 5.849188327789307, "learning_rate": 9.93862160614602e-07, "loss": 0.0781, "step": 47680 }, { "epoch": 0.5095357657994551, "grad_norm": 0.1670810580253601, "learning_rate": 9.938595359219272e-07, "loss": 0.0315, "step": 47690 }, { "epoch": 0.5096426091137347, "grad_norm": 2.891932249069214, "learning_rate": 9.938569106716477e-07, "loss": 0.0824, "step": 47700 }, { "epoch": 0.5097494524280143, "grad_norm": 8.541707038879395, "learning_rate": 9.938542848637664e-07, "loss": 0.1106, "step": 47710 }, { "epoch": 0.509856295742294, "grad_norm": 4.674514293670654, "learning_rate": 9.938516584982865e-07, "loss": 0.0783, "step": 47720 }, { "epoch": 0.5099631390565735, "grad_norm": 4.25338077545166, "learning_rate": 9.938490315752108e-07, "loss": 0.0456, "step": 47730 }, { "epoch": 0.5100699823708531, "grad_norm": 2.750638008117676, "learning_rate": 9.938464040945423e-07, "loss": 0.0656, "step": 47740 }, { "epoch": 0.5101768256851328, "grad_norm": 0.2946256995201111, "learning_rate": 9.93843776056284e-07, "loss": 0.0622, "step": 47750 }, { "epoch": 0.5102836689994124, "grad_norm": 3.127431631088257, "learning_rate": 9.938411474604384e-07, "loss": 0.1042, "step": 47760 }, { "epoch": 0.5103905123136919, "grad_norm": 2.33294415473938, "learning_rate": 9.938385183070093e-07, "loss": 0.0516, "step": 47770 }, { "epoch": 0.5104973556279716, "grad_norm": 2.2444214820861816, "learning_rate": 9.93835888595999e-07, "loss": 0.0776, "step": 47780 }, { "epoch": 0.5106041989422512, "grad_norm": 4.479578018188477, "learning_rate": 9.93833258327411e-07, "loss": 0.0516, "step": 47790 }, { "epoch": 0.5107110422565307, "grad_norm": 14.799856185913086, "learning_rate": 9.938306275012478e-07, "loss": 0.1153, "step": 47800 }, { "epoch": 0.5108178855708104, "grad_norm": 3.2007229328155518, "learning_rate": 9.938279961175125e-07, "loss": 0.1103, "step": 47810 }, { "epoch": 0.51092472888509, "grad_norm": 9.066326141357422, "learning_rate": 9.93825364176208e-07, "loss": 0.0552, "step": 47820 }, { "epoch": 0.5110315721993697, "grad_norm": 1.9630593061447144, "learning_rate": 9.938227316773376e-07, "loss": 0.0883, "step": 47830 }, { "epoch": 0.5111384155136492, "grad_norm": 3.3582262992858887, "learning_rate": 9.93820098620904e-07, "loss": 0.0691, "step": 47840 }, { "epoch": 0.5112452588279288, "grad_norm": 6.727539539337158, "learning_rate": 9.938174650069104e-07, "loss": 0.051, "step": 47850 }, { "epoch": 0.5113521021422085, "grad_norm": 7.273297309875488, "learning_rate": 9.938148308353593e-07, "loss": 0.0563, "step": 47860 }, { "epoch": 0.5114589454564881, "grad_norm": 6.023773670196533, "learning_rate": 9.938121961062542e-07, "loss": 0.0748, "step": 47870 }, { "epoch": 0.5115657887707676, "grad_norm": 7.022560119628906, "learning_rate": 9.938095608195978e-07, "loss": 0.0498, "step": 47880 }, { "epoch": 0.5116726320850473, "grad_norm": 0.40189462900161743, "learning_rate": 9.938069249753932e-07, "loss": 0.0914, "step": 47890 }, { "epoch": 0.5117794753993269, "grad_norm": 3.363334894180298, "learning_rate": 9.938042885736433e-07, "loss": 0.019, "step": 47900 }, { "epoch": 0.5118863187136065, "grad_norm": 7.6837158203125, "learning_rate": 9.93801651614351e-07, "loss": 0.0258, "step": 47910 }, { "epoch": 0.5119931620278861, "grad_norm": 0.2640886902809143, "learning_rate": 9.937990140975195e-07, "loss": 0.0158, "step": 47920 }, { "epoch": 0.5121000053421657, "grad_norm": 3.3332998752593994, "learning_rate": 9.937963760231516e-07, "loss": 0.0625, "step": 47930 }, { "epoch": 0.5122068486564453, "grad_norm": 1.7650269269943237, "learning_rate": 9.937937373912502e-07, "loss": 0.0861, "step": 47940 }, { "epoch": 0.512313691970725, "grad_norm": 0.7329373359680176, "learning_rate": 9.937910982018185e-07, "loss": 0.0964, "step": 47950 }, { "epoch": 0.5124205352850045, "grad_norm": 10.045247077941895, "learning_rate": 9.937884584548594e-07, "loss": 0.0137, "step": 47960 }, { "epoch": 0.5125273785992841, "grad_norm": 0.3060839772224426, "learning_rate": 9.937858181503758e-07, "loss": 0.0245, "step": 47970 }, { "epoch": 0.5126342219135638, "grad_norm": 6.521216869354248, "learning_rate": 9.937831772883709e-07, "loss": 0.0357, "step": 47980 }, { "epoch": 0.5127410652278434, "grad_norm": 8.907071113586426, "learning_rate": 9.937805358688474e-07, "loss": 0.0895, "step": 47990 }, { "epoch": 0.512847908542123, "grad_norm": 6.455463886260986, "learning_rate": 9.937778938918082e-07, "loss": 0.0468, "step": 48000 }, { "epoch": 0.5129547518564026, "grad_norm": 7.695789813995361, "learning_rate": 9.937752513572567e-07, "loss": 0.0477, "step": 48010 }, { "epoch": 0.5130615951706822, "grad_norm": 0.10821010172367096, "learning_rate": 9.93772608265196e-07, "loss": 0.0443, "step": 48020 }, { "epoch": 0.5131684384849619, "grad_norm": 0.4779176115989685, "learning_rate": 9.937699646156282e-07, "loss": 0.1228, "step": 48030 }, { "epoch": 0.5132752817992414, "grad_norm": 5.658228397369385, "learning_rate": 9.93767320408557e-07, "loss": 0.0796, "step": 48040 }, { "epoch": 0.513382125113521, "grad_norm": 1.1402251720428467, "learning_rate": 9.937646756439854e-07, "loss": 0.1439, "step": 48050 }, { "epoch": 0.5134889684278007, "grad_norm": 0.22448205947875977, "learning_rate": 9.937620303219162e-07, "loss": 0.0231, "step": 48060 }, { "epoch": 0.5135958117420802, "grad_norm": 7.4322590827941895, "learning_rate": 9.937593844423521e-07, "loss": 0.0409, "step": 48070 }, { "epoch": 0.5137026550563598, "grad_norm": 7.462368488311768, "learning_rate": 9.937567380052968e-07, "loss": 0.0863, "step": 48080 }, { "epoch": 0.5138094983706395, "grad_norm": 6.766030788421631, "learning_rate": 9.937540910107528e-07, "loss": 0.0826, "step": 48090 }, { "epoch": 0.5139163416849191, "grad_norm": 8.201971054077148, "learning_rate": 9.937514434587228e-07, "loss": 0.0466, "step": 48100 }, { "epoch": 0.5140231849991986, "grad_norm": 22.768198013305664, "learning_rate": 9.937487953492105e-07, "loss": 0.1193, "step": 48110 }, { "epoch": 0.5141300283134783, "grad_norm": 3.267453670501709, "learning_rate": 9.937461466822186e-07, "loss": 0.023, "step": 48120 }, { "epoch": 0.5142368716277579, "grad_norm": 0.9236301183700562, "learning_rate": 9.9374349745775e-07, "loss": 0.0387, "step": 48130 }, { "epoch": 0.5143437149420375, "grad_norm": 0.2958388030529022, "learning_rate": 9.937408476758073e-07, "loss": 0.084, "step": 48140 }, { "epoch": 0.5144505582563171, "grad_norm": 0.06452515721321106, "learning_rate": 9.937381973363943e-07, "loss": 0.0268, "step": 48150 }, { "epoch": 0.5145574015705967, "grad_norm": 17.82480812072754, "learning_rate": 9.937355464395136e-07, "loss": 0.0832, "step": 48160 }, { "epoch": 0.5146642448848763, "grad_norm": 10.058454513549805, "learning_rate": 9.937328949851681e-07, "loss": 0.1268, "step": 48170 }, { "epoch": 0.514771088199156, "grad_norm": 0.4784223735332489, "learning_rate": 9.937302429733608e-07, "loss": 0.055, "step": 48180 }, { "epoch": 0.5148779315134355, "grad_norm": 1.7976797819137573, "learning_rate": 9.93727590404095e-07, "loss": 0.0371, "step": 48190 }, { "epoch": 0.5149847748277152, "grad_norm": 2.5893023014068604, "learning_rate": 9.937249372773737e-07, "loss": 0.1726, "step": 48200 }, { "epoch": 0.5150916181419948, "grad_norm": 3.5938141345977783, "learning_rate": 9.937222835931994e-07, "loss": 0.0863, "step": 48210 }, { "epoch": 0.5151984614562743, "grad_norm": 0.3808004558086395, "learning_rate": 9.937196293515753e-07, "loss": 0.0713, "step": 48220 }, { "epoch": 0.515305304770554, "grad_norm": 1.1922377347946167, "learning_rate": 9.937169745525046e-07, "loss": 0.0559, "step": 48230 }, { "epoch": 0.5154121480848336, "grad_norm": 2.603348970413208, "learning_rate": 9.937143191959902e-07, "loss": 0.053, "step": 48240 }, { "epoch": 0.5155189913991132, "grad_norm": 7.556079387664795, "learning_rate": 9.937116632820348e-07, "loss": 0.1114, "step": 48250 }, { "epoch": 0.5156258347133928, "grad_norm": 11.983637809753418, "learning_rate": 9.93709006810642e-07, "loss": 0.0515, "step": 48260 }, { "epoch": 0.5157326780276724, "grad_norm": 2.151590585708618, "learning_rate": 9.937063497818143e-07, "loss": 0.0741, "step": 48270 }, { "epoch": 0.515839521341952, "grad_norm": 0.3568267524242401, "learning_rate": 9.93703692195555e-07, "loss": 0.0589, "step": 48280 }, { "epoch": 0.5159463646562317, "grad_norm": 6.305749416351318, "learning_rate": 9.937010340518668e-07, "loss": 0.0465, "step": 48290 }, { "epoch": 0.5160532079705112, "grad_norm": 5.169943332672119, "learning_rate": 9.93698375350753e-07, "loss": 0.0792, "step": 48300 }, { "epoch": 0.5161600512847908, "grad_norm": 6.057862758636475, "learning_rate": 9.936957160922165e-07, "loss": 0.0346, "step": 48310 }, { "epoch": 0.5162668945990705, "grad_norm": 9.0591402053833, "learning_rate": 9.936930562762601e-07, "loss": 0.041, "step": 48320 }, { "epoch": 0.5163737379133501, "grad_norm": 10.503833770751953, "learning_rate": 9.936903959028873e-07, "loss": 0.083, "step": 48330 }, { "epoch": 0.5164805812276296, "grad_norm": 6.447635173797607, "learning_rate": 9.936877349721004e-07, "loss": 0.0617, "step": 48340 }, { "epoch": 0.5165874245419093, "grad_norm": 0.6019269227981567, "learning_rate": 9.93685073483903e-07, "loss": 0.069, "step": 48350 }, { "epoch": 0.5166942678561889, "grad_norm": 7.335671424865723, "learning_rate": 9.93682411438298e-07, "loss": 0.0332, "step": 48360 }, { "epoch": 0.5168011111704686, "grad_norm": 4.563606262207031, "learning_rate": 9.936797488352881e-07, "loss": 0.11, "step": 48370 }, { "epoch": 0.5169079544847481, "grad_norm": 3.6555306911468506, "learning_rate": 9.936770856748766e-07, "loss": 0.1106, "step": 48380 }, { "epoch": 0.5170147977990277, "grad_norm": 5.932145595550537, "learning_rate": 9.936744219570663e-07, "loss": 0.0766, "step": 48390 }, { "epoch": 0.5171216411133074, "grad_norm": 2.647160053253174, "learning_rate": 9.936717576818603e-07, "loss": 0.0394, "step": 48400 }, { "epoch": 0.517228484427587, "grad_norm": 5.901256084442139, "learning_rate": 9.936690928492617e-07, "loss": 0.0172, "step": 48410 }, { "epoch": 0.5173353277418665, "grad_norm": 10.307297706604004, "learning_rate": 9.936664274592735e-07, "loss": 0.1033, "step": 48420 }, { "epoch": 0.5174421710561462, "grad_norm": 1.020622968673706, "learning_rate": 9.936637615118985e-07, "loss": 0.0905, "step": 48430 }, { "epoch": 0.5175490143704258, "grad_norm": 1.750797152519226, "learning_rate": 9.9366109500714e-07, "loss": 0.0207, "step": 48440 }, { "epoch": 0.5176558576847053, "grad_norm": 0.9986087679862976, "learning_rate": 9.93658427945001e-07, "loss": 0.0279, "step": 48450 }, { "epoch": 0.517762700998985, "grad_norm": 12.654694557189941, "learning_rate": 9.93655760325484e-07, "loss": 0.0792, "step": 48460 }, { "epoch": 0.5178695443132646, "grad_norm": 11.76315689086914, "learning_rate": 9.936530921485927e-07, "loss": 0.0759, "step": 48470 }, { "epoch": 0.5179763876275442, "grad_norm": 4.764457702636719, "learning_rate": 9.936504234143295e-07, "loss": 0.0726, "step": 48480 }, { "epoch": 0.5180832309418238, "grad_norm": 3.4444973468780518, "learning_rate": 9.93647754122698e-07, "loss": 0.0523, "step": 48490 }, { "epoch": 0.5181900742561034, "grad_norm": 1.0422991514205933, "learning_rate": 9.936450842737007e-07, "loss": 0.0834, "step": 48500 }, { "epoch": 0.518296917570383, "grad_norm": 13.611505508422852, "learning_rate": 9.93642413867341e-07, "loss": 0.0694, "step": 48510 }, { "epoch": 0.5184037608846627, "grad_norm": 3.795123815536499, "learning_rate": 9.936397429036218e-07, "loss": 0.0697, "step": 48520 }, { "epoch": 0.5185106041989422, "grad_norm": 10.634369850158691, "learning_rate": 9.93637071382546e-07, "loss": 0.0238, "step": 48530 }, { "epoch": 0.5186174475132218, "grad_norm": 3.2041995525360107, "learning_rate": 9.936343993041167e-07, "loss": 0.0391, "step": 48540 }, { "epoch": 0.5187242908275015, "grad_norm": 0.22154869139194489, "learning_rate": 9.93631726668337e-07, "loss": 0.0646, "step": 48550 }, { "epoch": 0.5188311341417811, "grad_norm": 1.2500109672546387, "learning_rate": 9.936290534752096e-07, "loss": 0.0901, "step": 48560 }, { "epoch": 0.5189379774560607, "grad_norm": 2.345238447189331, "learning_rate": 9.936263797247379e-07, "loss": 0.0336, "step": 48570 }, { "epoch": 0.5190448207703403, "grad_norm": 7.102802753448486, "learning_rate": 9.936237054169247e-07, "loss": 0.0416, "step": 48580 }, { "epoch": 0.5191516640846199, "grad_norm": 11.853686332702637, "learning_rate": 9.93621030551773e-07, "loss": 0.0459, "step": 48590 }, { "epoch": 0.5192585073988996, "grad_norm": 2.2288243770599365, "learning_rate": 9.936183551292861e-07, "loss": 0.097, "step": 48600 }, { "epoch": 0.5193653507131791, "grad_norm": 2.806379556655884, "learning_rate": 9.936156791494667e-07, "loss": 0.0531, "step": 48610 }, { "epoch": 0.5194721940274587, "grad_norm": 4.276363849639893, "learning_rate": 9.93613002612318e-07, "loss": 0.0297, "step": 48620 }, { "epoch": 0.5195790373417384, "grad_norm": 8.95284366607666, "learning_rate": 9.936103255178429e-07, "loss": 0.0532, "step": 48630 }, { "epoch": 0.519685880656018, "grad_norm": 0.025183651596307755, "learning_rate": 9.936076478660444e-07, "loss": 0.0469, "step": 48640 }, { "epoch": 0.5197927239702975, "grad_norm": 2.4582319259643555, "learning_rate": 9.93604969656926e-07, "loss": 0.111, "step": 48650 }, { "epoch": 0.5198995672845772, "grad_norm": 6.426405429840088, "learning_rate": 9.9360229089049e-07, "loss": 0.129, "step": 48660 }, { "epoch": 0.5200064105988568, "grad_norm": 0.1432153582572937, "learning_rate": 9.935996115667399e-07, "loss": 0.0536, "step": 48670 }, { "epoch": 0.5201132539131363, "grad_norm": 3.3275036811828613, "learning_rate": 9.935969316856783e-07, "loss": 0.0466, "step": 48680 }, { "epoch": 0.520220097227416, "grad_norm": 5.55291223526001, "learning_rate": 9.935942512473087e-07, "loss": 0.0805, "step": 48690 }, { "epoch": 0.5203269405416956, "grad_norm": 10.953999519348145, "learning_rate": 9.93591570251634e-07, "loss": 0.0622, "step": 48700 }, { "epoch": 0.5204337838559752, "grad_norm": 8.642187118530273, "learning_rate": 9.935888886986572e-07, "loss": 0.041, "step": 48710 }, { "epoch": 0.5205406271702548, "grad_norm": 18.580188751220703, "learning_rate": 9.935862065883811e-07, "loss": 0.0348, "step": 48720 }, { "epoch": 0.5206474704845344, "grad_norm": 3.910207509994507, "learning_rate": 9.93583523920809e-07, "loss": 0.06, "step": 48730 }, { "epoch": 0.5207543137988141, "grad_norm": 2.7541439533233643, "learning_rate": 9.935808406959441e-07, "loss": 0.0602, "step": 48740 }, { "epoch": 0.5208611571130937, "grad_norm": 5.314891815185547, "learning_rate": 9.93578156913789e-07, "loss": 0.0797, "step": 48750 }, { "epoch": 0.5209680004273732, "grad_norm": 2.639338970184326, "learning_rate": 9.935754725743466e-07, "loss": 0.0589, "step": 48760 }, { "epoch": 0.5210748437416529, "grad_norm": 8.996767044067383, "learning_rate": 9.935727876776206e-07, "loss": 0.0747, "step": 48770 }, { "epoch": 0.5211816870559325, "grad_norm": 0.11532878875732422, "learning_rate": 9.935701022236136e-07, "loss": 0.0305, "step": 48780 }, { "epoch": 0.521288530370212, "grad_norm": 1.6326793432235718, "learning_rate": 9.935674162123286e-07, "loss": 0.0531, "step": 48790 }, { "epoch": 0.5213953736844917, "grad_norm": 3.741731643676758, "learning_rate": 9.93564729643769e-07, "loss": 0.0402, "step": 48800 }, { "epoch": 0.5215022169987713, "grad_norm": 1.0827133655548096, "learning_rate": 9.935620425179374e-07, "loss": 0.0568, "step": 48810 }, { "epoch": 0.5216090603130509, "grad_norm": 4.849574089050293, "learning_rate": 9.935593548348368e-07, "loss": 0.0588, "step": 48820 }, { "epoch": 0.5217159036273306, "grad_norm": 0.9861624836921692, "learning_rate": 9.935566665944708e-07, "loss": 0.0675, "step": 48830 }, { "epoch": 0.5218227469416101, "grad_norm": 5.969191551208496, "learning_rate": 9.935539777968417e-07, "loss": 0.0372, "step": 48840 }, { "epoch": 0.5219295902558897, "grad_norm": 6.203457832336426, "learning_rate": 9.935512884419532e-07, "loss": 0.0634, "step": 48850 }, { "epoch": 0.5220364335701694, "grad_norm": 9.80270004272461, "learning_rate": 9.93548598529808e-07, "loss": 0.0973, "step": 48860 }, { "epoch": 0.522143276884449, "grad_norm": 3.595104932785034, "learning_rate": 9.93545908060409e-07, "loss": 0.0786, "step": 48870 }, { "epoch": 0.5222501201987285, "grad_norm": 0.740516185760498, "learning_rate": 9.935432170337598e-07, "loss": 0.0677, "step": 48880 }, { "epoch": 0.5223569635130082, "grad_norm": 0.9430676102638245, "learning_rate": 9.935405254498628e-07, "loss": 0.026, "step": 48890 }, { "epoch": 0.5224638068272878, "grad_norm": 5.60391092300415, "learning_rate": 9.935378333087213e-07, "loss": 0.0584, "step": 48900 }, { "epoch": 0.5225706501415673, "grad_norm": 2.5624687671661377, "learning_rate": 9.935351406103384e-07, "loss": 0.0333, "step": 48910 }, { "epoch": 0.522677493455847, "grad_norm": 19.395889282226562, "learning_rate": 9.93532447354717e-07, "loss": 0.0946, "step": 48920 }, { "epoch": 0.5227843367701266, "grad_norm": 5.3628058433532715, "learning_rate": 9.935297535418602e-07, "loss": 0.0373, "step": 48930 }, { "epoch": 0.5228911800844063, "grad_norm": 0.10618436336517334, "learning_rate": 9.935270591717712e-07, "loss": 0.0466, "step": 48940 }, { "epoch": 0.5229980233986858, "grad_norm": 4.3848795890808105, "learning_rate": 9.935243642444528e-07, "loss": 0.0832, "step": 48950 }, { "epoch": 0.5231048667129654, "grad_norm": 4.574032306671143, "learning_rate": 9.935216687599081e-07, "loss": 0.046, "step": 48960 }, { "epoch": 0.5232117100272451, "grad_norm": 0.30580464005470276, "learning_rate": 9.935189727181403e-07, "loss": 0.048, "step": 48970 }, { "epoch": 0.5233185533415247, "grad_norm": 11.213449478149414, "learning_rate": 9.935162761191522e-07, "loss": 0.0671, "step": 48980 }, { "epoch": 0.5234253966558042, "grad_norm": 4.9749627113342285, "learning_rate": 9.93513578962947e-07, "loss": 0.0282, "step": 48990 }, { "epoch": 0.5235322399700839, "grad_norm": 2.769207239151001, "learning_rate": 9.93510881249528e-07, "loss": 0.0492, "step": 49000 }, { "epoch": 0.5236390832843635, "grad_norm": 5.909313201904297, "learning_rate": 9.935081829788977e-07, "loss": 0.0958, "step": 49010 }, { "epoch": 0.523745926598643, "grad_norm": 2.7091617584228516, "learning_rate": 9.935054841510593e-07, "loss": 0.032, "step": 49020 }, { "epoch": 0.5238527699129227, "grad_norm": 0.03288126736879349, "learning_rate": 9.935027847660162e-07, "loss": 0.0929, "step": 49030 }, { "epoch": 0.5239596132272023, "grad_norm": 6.643800258636475, "learning_rate": 9.93500084823771e-07, "loss": 0.1278, "step": 49040 }, { "epoch": 0.5240664565414819, "grad_norm": 0.1712818741798401, "learning_rate": 9.934973843243274e-07, "loss": 0.0663, "step": 49050 }, { "epoch": 0.5241732998557616, "grad_norm": 12.52841854095459, "learning_rate": 9.934946832676876e-07, "loss": 0.0437, "step": 49060 }, { "epoch": 0.5242801431700411, "grad_norm": 5.622478008270264, "learning_rate": 9.934919816538551e-07, "loss": 0.1006, "step": 49070 }, { "epoch": 0.5243869864843207, "grad_norm": 7.233508586883545, "learning_rate": 9.93489279482833e-07, "loss": 0.0246, "step": 49080 }, { "epoch": 0.5244938297986004, "grad_norm": 7.252168655395508, "learning_rate": 9.93486576754624e-07, "loss": 0.0364, "step": 49090 }, { "epoch": 0.52460067311288, "grad_norm": 6.046225070953369, "learning_rate": 9.934838734692318e-07, "loss": 0.0749, "step": 49100 }, { "epoch": 0.5247075164271596, "grad_norm": 8.894904136657715, "learning_rate": 9.93481169626659e-07, "loss": 0.0447, "step": 49110 }, { "epoch": 0.5248143597414392, "grad_norm": 5.855550765991211, "learning_rate": 9.934784652269085e-07, "loss": 0.0612, "step": 49120 }, { "epoch": 0.5249212030557188, "grad_norm": 8.792521476745605, "learning_rate": 9.934757602699838e-07, "loss": 0.0631, "step": 49130 }, { "epoch": 0.5250280463699984, "grad_norm": 6.9787187576293945, "learning_rate": 9.934730547558873e-07, "loss": 0.0236, "step": 49140 }, { "epoch": 0.525134889684278, "grad_norm": 1.72672700881958, "learning_rate": 9.93470348684623e-07, "loss": 0.0915, "step": 49150 }, { "epoch": 0.5252417329985576, "grad_norm": 3.2648191452026367, "learning_rate": 9.93467642056193e-07, "loss": 0.0543, "step": 49160 }, { "epoch": 0.5253485763128373, "grad_norm": 17.153730392456055, "learning_rate": 9.934649348706012e-07, "loss": 0.1288, "step": 49170 }, { "epoch": 0.5254554196271168, "grad_norm": 0.35350099205970764, "learning_rate": 9.9346222712785e-07, "loss": 0.0543, "step": 49180 }, { "epoch": 0.5255622629413964, "grad_norm": 13.776056289672852, "learning_rate": 9.934595188279427e-07, "loss": 0.1929, "step": 49190 }, { "epoch": 0.5256691062556761, "grad_norm": 6.367738246917725, "learning_rate": 9.934568099708825e-07, "loss": 0.135, "step": 49200 }, { "epoch": 0.5257759495699557, "grad_norm": 1.6847537755966187, "learning_rate": 9.934541005566722e-07, "loss": 0.0443, "step": 49210 }, { "epoch": 0.5258827928842352, "grad_norm": 4.957223415374756, "learning_rate": 9.93451390585315e-07, "loss": 0.1016, "step": 49220 }, { "epoch": 0.5259896361985149, "grad_norm": 2.6244895458221436, "learning_rate": 9.934486800568138e-07, "loss": 0.0498, "step": 49230 }, { "epoch": 0.5260964795127945, "grad_norm": 0.1829553097486496, "learning_rate": 9.93445968971172e-07, "loss": 0.0431, "step": 49240 }, { "epoch": 0.526203322827074, "grad_norm": 3.3699002265930176, "learning_rate": 9.934432573283923e-07, "loss": 0.0899, "step": 49250 }, { "epoch": 0.5263101661413537, "grad_norm": 0.13393966853618622, "learning_rate": 9.934405451284783e-07, "loss": 0.0251, "step": 49260 }, { "epoch": 0.5264170094556333, "grad_norm": 4.195777416229248, "learning_rate": 9.93437832371432e-07, "loss": 0.0491, "step": 49270 }, { "epoch": 0.5265238527699129, "grad_norm": 5.327297210693359, "learning_rate": 9.934351190572576e-07, "loss": 0.0557, "step": 49280 }, { "epoch": 0.5266306960841926, "grad_norm": 3.8631224632263184, "learning_rate": 9.934324051859576e-07, "loss": 0.1277, "step": 49290 }, { "epoch": 0.5267375393984721, "grad_norm": 1.1775397062301636, "learning_rate": 9.934296907575355e-07, "loss": 0.0567, "step": 49300 }, { "epoch": 0.5268443827127518, "grad_norm": 8.067597389221191, "learning_rate": 9.934269757719937e-07, "loss": 0.0586, "step": 49310 }, { "epoch": 0.5269512260270314, "grad_norm": 3.541511297225952, "learning_rate": 9.934242602293357e-07, "loss": 0.0779, "step": 49320 }, { "epoch": 0.5270580693413109, "grad_norm": 1.4161063432693481, "learning_rate": 9.934215441295644e-07, "loss": 0.0486, "step": 49330 }, { "epoch": 0.5271649126555906, "grad_norm": 5.738320350646973, "learning_rate": 9.934188274726828e-07, "loss": 0.0928, "step": 49340 }, { "epoch": 0.5272717559698702, "grad_norm": 4.062239170074463, "learning_rate": 9.934161102586943e-07, "loss": 0.0379, "step": 49350 }, { "epoch": 0.5273785992841498, "grad_norm": 0.6668030023574829, "learning_rate": 9.93413392487602e-07, "loss": 0.0701, "step": 49360 }, { "epoch": 0.5274854425984294, "grad_norm": 0.8680458068847656, "learning_rate": 9.934106741594083e-07, "loss": 0.06, "step": 49370 }, { "epoch": 0.527592285912709, "grad_norm": 2.5126826763153076, "learning_rate": 9.93407955274117e-07, "loss": 0.0634, "step": 49380 }, { "epoch": 0.5276991292269886, "grad_norm": 1.3303380012512207, "learning_rate": 9.934052358317308e-07, "loss": 0.0672, "step": 49390 }, { "epoch": 0.5278059725412683, "grad_norm": 3.789292097091675, "learning_rate": 9.934025158322529e-07, "loss": 0.0496, "step": 49400 }, { "epoch": 0.5279128158555478, "grad_norm": 3.706911325454712, "learning_rate": 9.93399795275686e-07, "loss": 0.0575, "step": 49410 }, { "epoch": 0.5280196591698274, "grad_norm": 3.577610492706299, "learning_rate": 9.933970741620339e-07, "loss": 0.0998, "step": 49420 }, { "epoch": 0.5281265024841071, "grad_norm": 6.005700588226318, "learning_rate": 9.933943524912992e-07, "loss": 0.0295, "step": 49430 }, { "epoch": 0.5282333457983867, "grad_norm": 0.7962259650230408, "learning_rate": 9.933916302634849e-07, "loss": 0.0289, "step": 49440 }, { "epoch": 0.5283401891126662, "grad_norm": 0.3534296751022339, "learning_rate": 9.933889074785941e-07, "loss": 0.0419, "step": 49450 }, { "epoch": 0.5284470324269459, "grad_norm": 8.674234390258789, "learning_rate": 9.933861841366302e-07, "loss": 0.045, "step": 49460 }, { "epoch": 0.5285538757412255, "grad_norm": 4.639036178588867, "learning_rate": 9.933834602375962e-07, "loss": 0.081, "step": 49470 }, { "epoch": 0.5286607190555052, "grad_norm": 7.649438381195068, "learning_rate": 9.933807357814948e-07, "loss": 0.0316, "step": 49480 }, { "epoch": 0.5287675623697847, "grad_norm": 0.9154661297798157, "learning_rate": 9.933780107683291e-07, "loss": 0.0983, "step": 49490 }, { "epoch": 0.5288744056840643, "grad_norm": 2.145911455154419, "learning_rate": 9.933752851981027e-07, "loss": 0.032, "step": 49500 }, { "epoch": 0.528981248998344, "grad_norm": 9.155630111694336, "learning_rate": 9.933725590708185e-07, "loss": 0.0747, "step": 49510 }, { "epoch": 0.5290880923126235, "grad_norm": 4.0126824378967285, "learning_rate": 9.933698323864792e-07, "loss": 0.0237, "step": 49520 }, { "epoch": 0.5291949356269031, "grad_norm": 4.08574914932251, "learning_rate": 9.93367105145088e-07, "loss": 0.062, "step": 49530 }, { "epoch": 0.5293017789411828, "grad_norm": 4.774998188018799, "learning_rate": 9.933643773466484e-07, "loss": 0.1086, "step": 49540 }, { "epoch": 0.5294086222554624, "grad_norm": 0.322364866733551, "learning_rate": 9.933616489911631e-07, "loss": 0.0243, "step": 49550 }, { "epoch": 0.5295154655697419, "grad_norm": 12.343957901000977, "learning_rate": 9.93358920078635e-07, "loss": 0.0988, "step": 49560 }, { "epoch": 0.5296223088840216, "grad_norm": 2.4342567920684814, "learning_rate": 9.933561906090677e-07, "loss": 0.0538, "step": 49570 }, { "epoch": 0.5297291521983012, "grad_norm": 3.3611936569213867, "learning_rate": 9.933534605824642e-07, "loss": 0.0779, "step": 49580 }, { "epoch": 0.5298359955125808, "grad_norm": 0.057096581906080246, "learning_rate": 9.933507299988272e-07, "loss": 0.0581, "step": 49590 }, { "epoch": 0.5299428388268604, "grad_norm": 0.5890622735023499, "learning_rate": 9.9334799885816e-07, "loss": 0.0779, "step": 49600 }, { "epoch": 0.53004968214114, "grad_norm": 5.370098114013672, "learning_rate": 9.933452671604656e-07, "loss": 0.0707, "step": 49610 }, { "epoch": 0.5301565254554196, "grad_norm": 0.4503081142902374, "learning_rate": 9.933425349057472e-07, "loss": 0.0771, "step": 49620 }, { "epoch": 0.5302633687696993, "grad_norm": 0.022155923768877983, "learning_rate": 9.933398020940078e-07, "loss": 0.0419, "step": 49630 }, { "epoch": 0.5303702120839788, "grad_norm": 23.416677474975586, "learning_rate": 9.933370687252507e-07, "loss": 0.1066, "step": 49640 }, { "epoch": 0.5304770553982584, "grad_norm": 0.6441125869750977, "learning_rate": 9.933343347994787e-07, "loss": 0.0135, "step": 49650 }, { "epoch": 0.5305838987125381, "grad_norm": 2.2806344032287598, "learning_rate": 9.93331600316695e-07, "loss": 0.05, "step": 49660 }, { "epoch": 0.5306907420268177, "grad_norm": 8.7387113571167, "learning_rate": 9.933288652769026e-07, "loss": 0.069, "step": 49670 }, { "epoch": 0.5307975853410973, "grad_norm": 2.018902063369751, "learning_rate": 9.933261296801047e-07, "loss": 0.0353, "step": 49680 }, { "epoch": 0.5309044286553769, "grad_norm": 0.8600861430168152, "learning_rate": 9.933233935263045e-07, "loss": 0.0596, "step": 49690 }, { "epoch": 0.5310112719696565, "grad_norm": 4.001698970794678, "learning_rate": 9.93320656815505e-07, "loss": 0.0366, "step": 49700 }, { "epoch": 0.5311181152839362, "grad_norm": 6.435973167419434, "learning_rate": 9.93317919547709e-07, "loss": 0.0689, "step": 49710 }, { "epoch": 0.5312249585982157, "grad_norm": 0.10396767407655716, "learning_rate": 9.933151817229202e-07, "loss": 0.0505, "step": 49720 }, { "epoch": 0.5313318019124953, "grad_norm": 1.1896716356277466, "learning_rate": 9.93312443341141e-07, "loss": 0.024, "step": 49730 }, { "epoch": 0.531438645226775, "grad_norm": 6.446236610412598, "learning_rate": 9.933097044023749e-07, "loss": 0.0424, "step": 49740 }, { "epoch": 0.5315454885410545, "grad_norm": 2.657240867614746, "learning_rate": 9.93306964906625e-07, "loss": 0.0267, "step": 49750 }, { "epoch": 0.5316523318553341, "grad_norm": 5.774423122406006, "learning_rate": 9.933042248538943e-07, "loss": 0.0205, "step": 49760 }, { "epoch": 0.5317591751696138, "grad_norm": 4.006486415863037, "learning_rate": 9.933014842441858e-07, "loss": 0.0697, "step": 49770 }, { "epoch": 0.5318660184838934, "grad_norm": 1.3907712697982788, "learning_rate": 9.932987430775027e-07, "loss": 0.0875, "step": 49780 }, { "epoch": 0.5319728617981729, "grad_norm": 7.741258144378662, "learning_rate": 9.93296001353848e-07, "loss": 0.0493, "step": 49790 }, { "epoch": 0.5320797051124526, "grad_norm": 0.10619796067476273, "learning_rate": 9.932932590732252e-07, "loss": 0.0315, "step": 49800 }, { "epoch": 0.5321865484267322, "grad_norm": 18.153013229370117, "learning_rate": 9.93290516235637e-07, "loss": 0.0433, "step": 49810 }, { "epoch": 0.5322933917410118, "grad_norm": 1.0820798873901367, "learning_rate": 9.932877728410863e-07, "loss": 0.0554, "step": 49820 }, { "epoch": 0.5324002350552914, "grad_norm": 0.12839514017105103, "learning_rate": 9.932850288895766e-07, "loss": 0.087, "step": 49830 }, { "epoch": 0.532507078369571, "grad_norm": 0.6033779382705688, "learning_rate": 9.93282284381111e-07, "loss": 0.0373, "step": 49840 }, { "epoch": 0.5326139216838507, "grad_norm": 7.144733905792236, "learning_rate": 9.932795393156924e-07, "loss": 0.0985, "step": 49850 }, { "epoch": 0.5327207649981303, "grad_norm": 4.621224403381348, "learning_rate": 9.932767936933239e-07, "loss": 0.1027, "step": 49860 }, { "epoch": 0.5328276083124098, "grad_norm": 4.073400497436523, "learning_rate": 9.932740475140089e-07, "loss": 0.04, "step": 49870 }, { "epoch": 0.5329344516266895, "grad_norm": 4.67491340637207, "learning_rate": 9.9327130077775e-07, "loss": 0.0521, "step": 49880 }, { "epoch": 0.5330412949409691, "grad_norm": 19.285707473754883, "learning_rate": 9.932685534845505e-07, "loss": 0.0768, "step": 49890 }, { "epoch": 0.5331481382552486, "grad_norm": 3.0238542556762695, "learning_rate": 9.93265805634414e-07, "loss": 0.0845, "step": 49900 }, { "epoch": 0.5332549815695283, "grad_norm": 0.2523699104785919, "learning_rate": 9.932630572273428e-07, "loss": 0.0522, "step": 49910 }, { "epoch": 0.5333618248838079, "grad_norm": 0.8251456618309021, "learning_rate": 9.932603082633405e-07, "loss": 0.0855, "step": 49920 }, { "epoch": 0.5334686681980875, "grad_norm": 2.7104291915893555, "learning_rate": 9.9325755874241e-07, "loss": 0.038, "step": 49930 }, { "epoch": 0.5335755115123672, "grad_norm": 4.943725109100342, "learning_rate": 9.932548086645548e-07, "loss": 0.0394, "step": 49940 }, { "epoch": 0.5336823548266467, "grad_norm": 0.04150838032364845, "learning_rate": 9.932520580297775e-07, "loss": 0.0273, "step": 49950 }, { "epoch": 0.5337891981409263, "grad_norm": 8.077657699584961, "learning_rate": 9.93249306838081e-07, "loss": 0.0713, "step": 49960 }, { "epoch": 0.533896041455206, "grad_norm": 8.475976943969727, "learning_rate": 9.932465550894694e-07, "loss": 0.0974, "step": 49970 }, { "epoch": 0.5340028847694855, "grad_norm": 0.27640798687934875, "learning_rate": 9.93243802783945e-07, "loss": 0.0328, "step": 49980 }, { "epoch": 0.5341097280837651, "grad_norm": 3.763471841812134, "learning_rate": 9.93241049921511e-07, "loss": 0.0637, "step": 49990 }, { "epoch": 0.5342165713980448, "grad_norm": 1.1175607442855835, "learning_rate": 9.932382965021708e-07, "loss": 0.101, "step": 50000 }, { "epoch": 0.5343234147123244, "grad_norm": 5.033003330230713, "learning_rate": 9.932355425259273e-07, "loss": 0.0487, "step": 50010 }, { "epoch": 0.5344302580266039, "grad_norm": 10.515188217163086, "learning_rate": 9.932327879927837e-07, "loss": 0.0591, "step": 50020 }, { "epoch": 0.5345371013408836, "grad_norm": 9.309477806091309, "learning_rate": 9.93230032902743e-07, "loss": 0.0513, "step": 50030 }, { "epoch": 0.5346439446551632, "grad_norm": 4.262132167816162, "learning_rate": 9.932272772558082e-07, "loss": 0.0439, "step": 50040 }, { "epoch": 0.5347507879694429, "grad_norm": 3.294828414916992, "learning_rate": 9.932245210519828e-07, "loss": 0.048, "step": 50050 }, { "epoch": 0.5348576312837224, "grad_norm": 6.06978178024292, "learning_rate": 9.932217642912697e-07, "loss": 0.0836, "step": 50060 }, { "epoch": 0.534964474598002, "grad_norm": 5.633352756500244, "learning_rate": 9.932190069736718e-07, "loss": 0.0459, "step": 50070 }, { "epoch": 0.5350713179122817, "grad_norm": 9.379634857177734, "learning_rate": 9.932162490991925e-07, "loss": 0.0524, "step": 50080 }, { "epoch": 0.5351781612265613, "grad_norm": 2.634030818939209, "learning_rate": 9.93213490667835e-07, "loss": 0.0822, "step": 50090 }, { "epoch": 0.5352850045408408, "grad_norm": 8.535542488098145, "learning_rate": 9.93210731679602e-07, "loss": 0.0584, "step": 50100 }, { "epoch": 0.5353918478551205, "grad_norm": 0.1563343107700348, "learning_rate": 9.93207972134497e-07, "loss": 0.0309, "step": 50110 }, { "epoch": 0.5354986911694001, "grad_norm": 0.3137955665588379, "learning_rate": 9.93205212032523e-07, "loss": 0.0536, "step": 50120 }, { "epoch": 0.5356055344836796, "grad_norm": 5.485335826873779, "learning_rate": 9.93202451373683e-07, "loss": 0.0483, "step": 50130 }, { "epoch": 0.5357123777979593, "grad_norm": 1.2997221946716309, "learning_rate": 9.931996901579801e-07, "loss": 0.0377, "step": 50140 }, { "epoch": 0.5358192211122389, "grad_norm": 0.21743763983249664, "learning_rate": 9.931969283854177e-07, "loss": 0.0354, "step": 50150 }, { "epoch": 0.5359260644265185, "grad_norm": 0.1808733195066452, "learning_rate": 9.931941660559987e-07, "loss": 0.0646, "step": 50160 }, { "epoch": 0.5360329077407981, "grad_norm": 6.948164463043213, "learning_rate": 9.931914031697263e-07, "loss": 0.0468, "step": 50170 }, { "epoch": 0.5361397510550777, "grad_norm": 3.517723321914673, "learning_rate": 9.931886397266036e-07, "loss": 0.0486, "step": 50180 }, { "epoch": 0.5362465943693573, "grad_norm": 1.6234511137008667, "learning_rate": 9.931858757266335e-07, "loss": 0.0483, "step": 50190 }, { "epoch": 0.536353437683637, "grad_norm": 2.1331839561462402, "learning_rate": 9.931831111698196e-07, "loss": 0.0293, "step": 50200 }, { "epoch": 0.5364602809979165, "grad_norm": 1.198807716369629, "learning_rate": 9.931803460561646e-07, "loss": 0.0401, "step": 50210 }, { "epoch": 0.5365671243121962, "grad_norm": 2.0911519527435303, "learning_rate": 9.93177580385672e-07, "loss": 0.0275, "step": 50220 }, { "epoch": 0.5366739676264758, "grad_norm": 0.4417724013328552, "learning_rate": 9.931748141583443e-07, "loss": 0.0537, "step": 50230 }, { "epoch": 0.5367808109407554, "grad_norm": 9.413606643676758, "learning_rate": 9.93172047374185e-07, "loss": 0.051, "step": 50240 }, { "epoch": 0.536887654255035, "grad_norm": 4.541274547576904, "learning_rate": 9.931692800331976e-07, "loss": 0.0664, "step": 50250 }, { "epoch": 0.5369944975693146, "grad_norm": 0.7812145352363586, "learning_rate": 9.931665121353848e-07, "loss": 0.0698, "step": 50260 }, { "epoch": 0.5371013408835942, "grad_norm": 0.1038840115070343, "learning_rate": 9.931637436807495e-07, "loss": 0.0146, "step": 50270 }, { "epoch": 0.5372081841978739, "grad_norm": 1.9933732748031616, "learning_rate": 9.931609746692955e-07, "loss": 0.0528, "step": 50280 }, { "epoch": 0.5373150275121534, "grad_norm": 3.1384198665618896, "learning_rate": 9.931582051010252e-07, "loss": 0.0659, "step": 50290 }, { "epoch": 0.537421870826433, "grad_norm": 12.659268379211426, "learning_rate": 9.931554349759422e-07, "loss": 0.0661, "step": 50300 }, { "epoch": 0.5375287141407127, "grad_norm": 10.550230979919434, "learning_rate": 9.931526642940495e-07, "loss": 0.0276, "step": 50310 }, { "epoch": 0.5376355574549923, "grad_norm": 12.002209663391113, "learning_rate": 9.931498930553503e-07, "loss": 0.1076, "step": 50320 }, { "epoch": 0.5377424007692718, "grad_norm": 0.07033955305814743, "learning_rate": 9.931471212598475e-07, "loss": 0.0841, "step": 50330 }, { "epoch": 0.5378492440835515, "grad_norm": 6.399211406707764, "learning_rate": 9.931443489075446e-07, "loss": 0.0873, "step": 50340 }, { "epoch": 0.5379560873978311, "grad_norm": 3.42712140083313, "learning_rate": 9.931415759984443e-07, "loss": 0.036, "step": 50350 }, { "epoch": 0.5380629307121106, "grad_norm": 1.6025991439819336, "learning_rate": 9.9313880253255e-07, "loss": 0.0362, "step": 50360 }, { "epoch": 0.5381697740263903, "grad_norm": 23.47894859313965, "learning_rate": 9.931360285098646e-07, "loss": 0.1343, "step": 50370 }, { "epoch": 0.5382766173406699, "grad_norm": 9.569060325622559, "learning_rate": 9.931332539303917e-07, "loss": 0.0909, "step": 50380 }, { "epoch": 0.5383834606549495, "grad_norm": 4.302401065826416, "learning_rate": 9.93130478794134e-07, "loss": 0.0269, "step": 50390 }, { "epoch": 0.5384903039692291, "grad_norm": 12.089101791381836, "learning_rate": 9.931277031010947e-07, "loss": 0.0537, "step": 50400 }, { "epoch": 0.5385971472835087, "grad_norm": 1.4914683103561401, "learning_rate": 9.931249268512773e-07, "loss": 0.0449, "step": 50410 }, { "epoch": 0.5387039905977884, "grad_norm": 2.8594603538513184, "learning_rate": 9.931221500446843e-07, "loss": 0.0418, "step": 50420 }, { "epoch": 0.538810833912068, "grad_norm": 14.412798881530762, "learning_rate": 9.931193726813195e-07, "loss": 0.084, "step": 50430 }, { "epoch": 0.5389176772263475, "grad_norm": 4.1425089836120605, "learning_rate": 9.931165947611854e-07, "loss": 0.0869, "step": 50440 }, { "epoch": 0.5390245205406272, "grad_norm": 15.263579368591309, "learning_rate": 9.931138162842856e-07, "loss": 0.1097, "step": 50450 }, { "epoch": 0.5391313638549068, "grad_norm": 6.461804389953613, "learning_rate": 9.93111037250623e-07, "loss": 0.1054, "step": 50460 }, { "epoch": 0.5392382071691864, "grad_norm": 0.04899538308382034, "learning_rate": 9.931082576602008e-07, "loss": 0.0544, "step": 50470 }, { "epoch": 0.539345050483466, "grad_norm": 0.384141206741333, "learning_rate": 9.931054775130224e-07, "loss": 0.097, "step": 50480 }, { "epoch": 0.5394518937977456, "grad_norm": 1.8776133060455322, "learning_rate": 9.931026968090904e-07, "loss": 0.0295, "step": 50490 }, { "epoch": 0.5395587371120252, "grad_norm": 6.306477069854736, "learning_rate": 9.930999155484084e-07, "loss": 0.1355, "step": 50500 }, { "epoch": 0.5396655804263049, "grad_norm": 7.233392238616943, "learning_rate": 9.930971337309794e-07, "loss": 0.0541, "step": 50510 }, { "epoch": 0.5397724237405844, "grad_norm": 5.361950874328613, "learning_rate": 9.930943513568065e-07, "loss": 0.1095, "step": 50520 }, { "epoch": 0.539879267054864, "grad_norm": 14.780197143554688, "learning_rate": 9.930915684258928e-07, "loss": 0.059, "step": 50530 }, { "epoch": 0.5399861103691437, "grad_norm": 6.148445129394531, "learning_rate": 9.930887849382415e-07, "loss": 0.0641, "step": 50540 }, { "epoch": 0.5400929536834232, "grad_norm": 2.8837502002716064, "learning_rate": 9.930860008938558e-07, "loss": 0.119, "step": 50550 }, { "epoch": 0.5401997969977028, "grad_norm": 1.3607566356658936, "learning_rate": 9.930832162927388e-07, "loss": 0.0473, "step": 50560 }, { "epoch": 0.5403066403119825, "grad_norm": 13.173155784606934, "learning_rate": 9.930804311348935e-07, "loss": 0.1251, "step": 50570 }, { "epoch": 0.5404134836262621, "grad_norm": 8.917181968688965, "learning_rate": 9.930776454203233e-07, "loss": 0.0787, "step": 50580 }, { "epoch": 0.5405203269405418, "grad_norm": 9.181658744812012, "learning_rate": 9.930748591490312e-07, "loss": 0.0311, "step": 50590 }, { "epoch": 0.5406271702548213, "grad_norm": 3.5903408527374268, "learning_rate": 9.930720723210206e-07, "loss": 0.0312, "step": 50600 }, { "epoch": 0.5407340135691009, "grad_norm": 1.281383752822876, "learning_rate": 9.93069284936294e-07, "loss": 0.0838, "step": 50610 }, { "epoch": 0.5408408568833806, "grad_norm": 4.764537334442139, "learning_rate": 9.930664969948553e-07, "loss": 0.0404, "step": 50620 }, { "epoch": 0.5409477001976601, "grad_norm": 9.310266494750977, "learning_rate": 9.93063708496707e-07, "loss": 0.0421, "step": 50630 }, { "epoch": 0.5410545435119397, "grad_norm": 0.4425053298473358, "learning_rate": 9.93060919441853e-07, "loss": 0.0602, "step": 50640 }, { "epoch": 0.5411613868262194, "grad_norm": 2.2248003482818604, "learning_rate": 9.930581298302955e-07, "loss": 0.0389, "step": 50650 }, { "epoch": 0.541268230140499, "grad_norm": 5.257008075714111, "learning_rate": 9.930553396620386e-07, "loss": 0.1, "step": 50660 }, { "epoch": 0.5413750734547785, "grad_norm": 10.21248722076416, "learning_rate": 9.930525489370847e-07, "loss": 0.0761, "step": 50670 }, { "epoch": 0.5414819167690582, "grad_norm": 8.131824493408203, "learning_rate": 9.930497576554375e-07, "loss": 0.0632, "step": 50680 }, { "epoch": 0.5415887600833378, "grad_norm": 1.5463820695877075, "learning_rate": 9.930469658170997e-07, "loss": 0.0444, "step": 50690 }, { "epoch": 0.5416956033976174, "grad_norm": 0.16970428824424744, "learning_rate": 9.930441734220747e-07, "loss": 0.0581, "step": 50700 }, { "epoch": 0.541802446711897, "grad_norm": 3.06522274017334, "learning_rate": 9.930413804703657e-07, "loss": 0.025, "step": 50710 }, { "epoch": 0.5419092900261766, "grad_norm": 8.98687744140625, "learning_rate": 9.930385869619758e-07, "loss": 0.093, "step": 50720 }, { "epoch": 0.5420161333404562, "grad_norm": 1.3275460004806519, "learning_rate": 9.93035792896908e-07, "loss": 0.0232, "step": 50730 }, { "epoch": 0.5421229766547359, "grad_norm": 1.8870184421539307, "learning_rate": 9.930329982751655e-07, "loss": 0.1094, "step": 50740 }, { "epoch": 0.5422298199690154, "grad_norm": 4.465517997741699, "learning_rate": 9.930302030967517e-07, "loss": 0.1058, "step": 50750 }, { "epoch": 0.542336663283295, "grad_norm": 4.661494731903076, "learning_rate": 9.930274073616696e-07, "loss": 0.0798, "step": 50760 }, { "epoch": 0.5424435065975747, "grad_norm": 2.193678855895996, "learning_rate": 9.930246110699222e-07, "loss": 0.0827, "step": 50770 }, { "epoch": 0.5425503499118542, "grad_norm": 8.911137580871582, "learning_rate": 9.93021814221513e-07, "loss": 0.073, "step": 50780 }, { "epoch": 0.5426571932261339, "grad_norm": 4.832857131958008, "learning_rate": 9.93019016816445e-07, "loss": 0.101, "step": 50790 }, { "epoch": 0.5427640365404135, "grad_norm": 2.623305320739746, "learning_rate": 9.93016218854721e-07, "loss": 0.0546, "step": 50800 }, { "epoch": 0.5428708798546931, "grad_norm": 1.0405802726745605, "learning_rate": 9.930134203363448e-07, "loss": 0.0692, "step": 50810 }, { "epoch": 0.5429777231689727, "grad_norm": 1.2412666082382202, "learning_rate": 9.930106212613191e-07, "loss": 0.0419, "step": 50820 }, { "epoch": 0.5430845664832523, "grad_norm": 9.410032272338867, "learning_rate": 9.930078216296472e-07, "loss": 0.0374, "step": 50830 }, { "epoch": 0.5431914097975319, "grad_norm": 3.866417407989502, "learning_rate": 9.930050214413323e-07, "loss": 0.0789, "step": 50840 }, { "epoch": 0.5432982531118116, "grad_norm": 8.804144859313965, "learning_rate": 9.930022206963774e-07, "loss": 0.0662, "step": 50850 }, { "epoch": 0.5434050964260911, "grad_norm": 3.5399656295776367, "learning_rate": 9.92999419394786e-07, "loss": 0.1185, "step": 50860 }, { "epoch": 0.5435119397403707, "grad_norm": 10.304216384887695, "learning_rate": 9.92996617536561e-07, "loss": 0.0795, "step": 50870 }, { "epoch": 0.5436187830546504, "grad_norm": 6.81239652633667, "learning_rate": 9.929938151217055e-07, "loss": 0.0413, "step": 50880 }, { "epoch": 0.54372562636893, "grad_norm": 0.7190576791763306, "learning_rate": 9.929910121502226e-07, "loss": 0.081, "step": 50890 }, { "epoch": 0.5438324696832095, "grad_norm": 6.731222152709961, "learning_rate": 9.92988208622116e-07, "loss": 0.1201, "step": 50900 }, { "epoch": 0.5439393129974892, "grad_norm": 16.487882614135742, "learning_rate": 9.929854045373883e-07, "loss": 0.1594, "step": 50910 }, { "epoch": 0.5440461563117688, "grad_norm": 3.284471035003662, "learning_rate": 9.929825998960428e-07, "loss": 0.0628, "step": 50920 }, { "epoch": 0.5441529996260484, "grad_norm": 1.7726777791976929, "learning_rate": 9.92979794698083e-07, "loss": 0.0773, "step": 50930 }, { "epoch": 0.544259842940328, "grad_norm": 2.7958788871765137, "learning_rate": 9.929769889435117e-07, "loss": 0.099, "step": 50940 }, { "epoch": 0.5443666862546076, "grad_norm": 2.356689691543579, "learning_rate": 9.929741826323321e-07, "loss": 0.0245, "step": 50950 }, { "epoch": 0.5444735295688873, "grad_norm": 3.6023447513580322, "learning_rate": 9.929713757645475e-07, "loss": 0.0995, "step": 50960 }, { "epoch": 0.5445803728831669, "grad_norm": 2.7659201622009277, "learning_rate": 9.92968568340161e-07, "loss": 0.0701, "step": 50970 }, { "epoch": 0.5446872161974464, "grad_norm": 8.298452377319336, "learning_rate": 9.929657603591758e-07, "loss": 0.0683, "step": 50980 }, { "epoch": 0.5447940595117261, "grad_norm": 1.6747390031814575, "learning_rate": 9.929629518215949e-07, "loss": 0.0795, "step": 50990 }, { "epoch": 0.5449009028260057, "grad_norm": 3.3276538848876953, "learning_rate": 9.929601427274218e-07, "loss": 0.0636, "step": 51000 }, { "epoch": 0.5450077461402852, "grad_norm": 8.142719268798828, "learning_rate": 9.929573330766594e-07, "loss": 0.0778, "step": 51010 }, { "epoch": 0.5451145894545649, "grad_norm": 2.631894826889038, "learning_rate": 9.92954522869311e-07, "loss": 0.0309, "step": 51020 }, { "epoch": 0.5452214327688445, "grad_norm": 0.3155657947063446, "learning_rate": 9.929517121053798e-07, "loss": 0.0428, "step": 51030 }, { "epoch": 0.5453282760831241, "grad_norm": 3.2119009494781494, "learning_rate": 9.929489007848688e-07, "loss": 0.1187, "step": 51040 }, { "epoch": 0.5454351193974037, "grad_norm": 1.0013575553894043, "learning_rate": 9.929460889077815e-07, "loss": 0.0361, "step": 51050 }, { "epoch": 0.5455419627116833, "grad_norm": 1.9599223136901855, "learning_rate": 9.929432764741205e-07, "loss": 0.0664, "step": 51060 }, { "epoch": 0.5456488060259629, "grad_norm": 1.2580074071884155, "learning_rate": 9.929404634838896e-07, "loss": 0.082, "step": 51070 }, { "epoch": 0.5457556493402426, "grad_norm": 4.614104270935059, "learning_rate": 9.929376499370918e-07, "loss": 0.0921, "step": 51080 }, { "epoch": 0.5458624926545221, "grad_norm": 10.782045364379883, "learning_rate": 9.9293483583373e-07, "loss": 0.0329, "step": 51090 }, { "epoch": 0.5459693359688017, "grad_norm": 2.41269850730896, "learning_rate": 9.929320211738076e-07, "loss": 0.0425, "step": 51100 }, { "epoch": 0.5460761792830814, "grad_norm": 1.3338193893432617, "learning_rate": 9.929292059573277e-07, "loss": 0.1217, "step": 51110 }, { "epoch": 0.546183022597361, "grad_norm": 3.0151119232177734, "learning_rate": 9.929263901842937e-07, "loss": 0.0528, "step": 51120 }, { "epoch": 0.5462898659116405, "grad_norm": 0.8995873332023621, "learning_rate": 9.929235738547085e-07, "loss": 0.0561, "step": 51130 }, { "epoch": 0.5463967092259202, "grad_norm": 5.5953450202941895, "learning_rate": 9.929207569685752e-07, "loss": 0.0536, "step": 51140 }, { "epoch": 0.5465035525401998, "grad_norm": 8.780747413635254, "learning_rate": 9.929179395258974e-07, "loss": 0.1073, "step": 51150 }, { "epoch": 0.5466103958544795, "grad_norm": 0.09426108002662659, "learning_rate": 9.92915121526678e-07, "loss": 0.0475, "step": 51160 }, { "epoch": 0.546717239168759, "grad_norm": 5.884275913238525, "learning_rate": 9.929123029709202e-07, "loss": 0.0679, "step": 51170 }, { "epoch": 0.5468240824830386, "grad_norm": 7.970150470733643, "learning_rate": 9.929094838586272e-07, "loss": 0.0734, "step": 51180 }, { "epoch": 0.5469309257973183, "grad_norm": 13.86677074432373, "learning_rate": 9.92906664189802e-07, "loss": 0.1484, "step": 51190 }, { "epoch": 0.5470377691115978, "grad_norm": 2.4585466384887695, "learning_rate": 9.929038439644484e-07, "loss": 0.0675, "step": 51200 }, { "epoch": 0.5471446124258774, "grad_norm": 13.221986770629883, "learning_rate": 9.929010231825687e-07, "loss": 0.0493, "step": 51210 }, { "epoch": 0.5472514557401571, "grad_norm": 12.433615684509277, "learning_rate": 9.92898201844167e-07, "loss": 0.0804, "step": 51220 }, { "epoch": 0.5473582990544367, "grad_norm": 2.938591241836548, "learning_rate": 9.928953799492456e-07, "loss": 0.032, "step": 51230 }, { "epoch": 0.5474651423687162, "grad_norm": 3.4259331226348877, "learning_rate": 9.928925574978085e-07, "loss": 0.0465, "step": 51240 }, { "epoch": 0.5475719856829959, "grad_norm": 3.882934331893921, "learning_rate": 9.928897344898583e-07, "loss": 0.0645, "step": 51250 }, { "epoch": 0.5476788289972755, "grad_norm": 16.755725860595703, "learning_rate": 9.928869109253983e-07, "loss": 0.1576, "step": 51260 }, { "epoch": 0.5477856723115551, "grad_norm": 4.515752792358398, "learning_rate": 9.92884086804432e-07, "loss": 0.047, "step": 51270 }, { "epoch": 0.5478925156258347, "grad_norm": 0.5388508439064026, "learning_rate": 9.928812621269623e-07, "loss": 0.0888, "step": 51280 }, { "epoch": 0.5479993589401143, "grad_norm": 0.5705265402793884, "learning_rate": 9.928784368929923e-07, "loss": 0.0359, "step": 51290 }, { "epoch": 0.5481062022543939, "grad_norm": 4.082810401916504, "learning_rate": 9.928756111025253e-07, "loss": 0.043, "step": 51300 }, { "epoch": 0.5482130455686736, "grad_norm": 5.41595983505249, "learning_rate": 9.928727847555648e-07, "loss": 0.0714, "step": 51310 }, { "epoch": 0.5483198888829531, "grad_norm": 0.5163976550102234, "learning_rate": 9.928699578521135e-07, "loss": 0.0345, "step": 51320 }, { "epoch": 0.5484267321972328, "grad_norm": 10.920387268066406, "learning_rate": 9.92867130392175e-07, "loss": 0.0776, "step": 51330 }, { "epoch": 0.5485335755115124, "grad_norm": 1.5858469009399414, "learning_rate": 9.928643023757524e-07, "loss": 0.0755, "step": 51340 }, { "epoch": 0.548640418825792, "grad_norm": 2.5090463161468506, "learning_rate": 9.928614738028485e-07, "loss": 0.0976, "step": 51350 }, { "epoch": 0.5487472621400716, "grad_norm": 0.2146640568971634, "learning_rate": 9.92858644673467e-07, "loss": 0.1326, "step": 51360 }, { "epoch": 0.5488541054543512, "grad_norm": 2.556997299194336, "learning_rate": 9.92855814987611e-07, "loss": 0.0562, "step": 51370 }, { "epoch": 0.5489609487686308, "grad_norm": 0.6264485120773315, "learning_rate": 9.928529847452833e-07, "loss": 0.0601, "step": 51380 }, { "epoch": 0.5490677920829105, "grad_norm": 2.567525863647461, "learning_rate": 9.928501539464875e-07, "loss": 0.1003, "step": 51390 }, { "epoch": 0.54917463539719, "grad_norm": 20.960134506225586, "learning_rate": 9.928473225912266e-07, "loss": 0.0548, "step": 51400 }, { "epoch": 0.5492814787114696, "grad_norm": 7.369685649871826, "learning_rate": 9.92844490679504e-07, "loss": 0.1014, "step": 51410 }, { "epoch": 0.5493883220257493, "grad_norm": 1.2315016984939575, "learning_rate": 9.928416582113228e-07, "loss": 0.0395, "step": 51420 }, { "epoch": 0.5494951653400288, "grad_norm": 3.7443277835845947, "learning_rate": 9.928388251866862e-07, "loss": 0.0817, "step": 51430 }, { "epoch": 0.5496020086543084, "grad_norm": 4.2916436195373535, "learning_rate": 9.928359916055973e-07, "loss": 0.0276, "step": 51440 }, { "epoch": 0.5497088519685881, "grad_norm": 0.5278875827789307, "learning_rate": 9.928331574680593e-07, "loss": 0.0276, "step": 51450 }, { "epoch": 0.5498156952828677, "grad_norm": 5.826109409332275, "learning_rate": 9.928303227740756e-07, "loss": 0.0536, "step": 51460 }, { "epoch": 0.5499225385971472, "grad_norm": 0.7307567000389099, "learning_rate": 9.92827487523649e-07, "loss": 0.1455, "step": 51470 }, { "epoch": 0.5500293819114269, "grad_norm": 0.5381167531013489, "learning_rate": 9.928246517167832e-07, "loss": 0.0929, "step": 51480 }, { "epoch": 0.5501362252257065, "grad_norm": 11.261945724487305, "learning_rate": 9.928218153534811e-07, "loss": 0.0604, "step": 51490 }, { "epoch": 0.5502430685399861, "grad_norm": 2.2074697017669678, "learning_rate": 9.928189784337462e-07, "loss": 0.078, "step": 51500 }, { "epoch": 0.5503499118542657, "grad_norm": 2.768449544906616, "learning_rate": 9.928161409575813e-07, "loss": 0.0672, "step": 51510 }, { "epoch": 0.5504567551685453, "grad_norm": 10.456093788146973, "learning_rate": 9.928133029249896e-07, "loss": 0.0596, "step": 51520 }, { "epoch": 0.550563598482825, "grad_norm": 0.27126380801200867, "learning_rate": 9.928104643359747e-07, "loss": 0.0473, "step": 51530 }, { "epoch": 0.5506704417971046, "grad_norm": 6.3172736167907715, "learning_rate": 9.928076251905395e-07, "loss": 0.0321, "step": 51540 }, { "epoch": 0.5507772851113841, "grad_norm": 1.0664331912994385, "learning_rate": 9.928047854886873e-07, "loss": 0.1754, "step": 51550 }, { "epoch": 0.5508841284256638, "grad_norm": 7.528334617614746, "learning_rate": 9.928019452304216e-07, "loss": 0.0635, "step": 51560 }, { "epoch": 0.5509909717399434, "grad_norm": 0.4157755970954895, "learning_rate": 9.92799104415745e-07, "loss": 0.0335, "step": 51570 }, { "epoch": 0.551097815054223, "grad_norm": 2.745344877243042, "learning_rate": 9.92796263044661e-07, "loss": 0.0319, "step": 51580 }, { "epoch": 0.5512046583685026, "grad_norm": 4.434918403625488, "learning_rate": 9.927934211171728e-07, "loss": 0.1123, "step": 51590 }, { "epoch": 0.5513115016827822, "grad_norm": 1.8466923236846924, "learning_rate": 9.92790578633284e-07, "loss": 0.0833, "step": 51600 }, { "epoch": 0.5514183449970618, "grad_norm": 10.172691345214844, "learning_rate": 9.92787735592997e-07, "loss": 0.032, "step": 51610 }, { "epoch": 0.5515251883113415, "grad_norm": 1.1894474029541016, "learning_rate": 9.927848919963157e-07, "loss": 0.1016, "step": 51620 }, { "epoch": 0.551632031625621, "grad_norm": 5.075449466705322, "learning_rate": 9.92782047843243e-07, "loss": 0.0826, "step": 51630 }, { "epoch": 0.5517388749399006, "grad_norm": 4.018208980560303, "learning_rate": 9.927792031337822e-07, "loss": 0.022, "step": 51640 }, { "epoch": 0.5518457182541803, "grad_norm": 1.775592565536499, "learning_rate": 9.927763578679364e-07, "loss": 0.0631, "step": 51650 }, { "epoch": 0.5519525615684598, "grad_norm": 0.8944257497787476, "learning_rate": 9.92773512045709e-07, "loss": 0.0779, "step": 51660 }, { "epoch": 0.5520594048827394, "grad_norm": 8.938873291015625, "learning_rate": 9.92770665667103e-07, "loss": 0.0748, "step": 51670 }, { "epoch": 0.5521662481970191, "grad_norm": 10.769268989562988, "learning_rate": 9.92767818732122e-07, "loss": 0.06, "step": 51680 }, { "epoch": 0.5522730915112987, "grad_norm": 1.3716411590576172, "learning_rate": 9.927649712407686e-07, "loss": 0.0349, "step": 51690 }, { "epoch": 0.5523799348255783, "grad_norm": 8.337854385375977, "learning_rate": 9.927621231930466e-07, "loss": 0.1022, "step": 51700 }, { "epoch": 0.5524867781398579, "grad_norm": 6.403128147125244, "learning_rate": 9.927592745889589e-07, "loss": 0.0181, "step": 51710 }, { "epoch": 0.5525936214541375, "grad_norm": 3.7175793647766113, "learning_rate": 9.927564254285087e-07, "loss": 0.0481, "step": 51720 }, { "epoch": 0.5527004647684172, "grad_norm": 0.22433660924434662, "learning_rate": 9.927535757116994e-07, "loss": 0.0235, "step": 51730 }, { "epoch": 0.5528073080826967, "grad_norm": 11.013493537902832, "learning_rate": 9.92750725438534e-07, "loss": 0.0724, "step": 51740 }, { "epoch": 0.5529141513969763, "grad_norm": 1.139629602432251, "learning_rate": 9.92747874609016e-07, "loss": 0.0259, "step": 51750 }, { "epoch": 0.553020994711256, "grad_norm": 1.2604318857192993, "learning_rate": 9.927450232231485e-07, "loss": 0.0276, "step": 51760 }, { "epoch": 0.5531278380255356, "grad_norm": 10.866415023803711, "learning_rate": 9.927421712809346e-07, "loss": 0.0707, "step": 51770 }, { "epoch": 0.5532346813398151, "grad_norm": 0.6420121192932129, "learning_rate": 9.927393187823776e-07, "loss": 0.0527, "step": 51780 }, { "epoch": 0.5533415246540948, "grad_norm": 3.230656385421753, "learning_rate": 9.927364657274807e-07, "loss": 0.0475, "step": 51790 }, { "epoch": 0.5534483679683744, "grad_norm": 3.1002657413482666, "learning_rate": 9.92733612116247e-07, "loss": 0.0584, "step": 51800 }, { "epoch": 0.553555211282654, "grad_norm": 2.9801673889160156, "learning_rate": 9.9273075794868e-07, "loss": 0.06, "step": 51810 }, { "epoch": 0.5536620545969336, "grad_norm": 2.7695133686065674, "learning_rate": 9.927279032247828e-07, "loss": 0.03, "step": 51820 }, { "epoch": 0.5537688979112132, "grad_norm": 5.860328674316406, "learning_rate": 9.927250479445586e-07, "loss": 0.0507, "step": 51830 }, { "epoch": 0.5538757412254928, "grad_norm": 7.119085788726807, "learning_rate": 9.927221921080108e-07, "loss": 0.0608, "step": 51840 }, { "epoch": 0.5539825845397724, "grad_norm": 0.28615444898605347, "learning_rate": 9.92719335715142e-07, "loss": 0.0642, "step": 51850 }, { "epoch": 0.554089427854052, "grad_norm": 0.9027224183082581, "learning_rate": 9.927164787659563e-07, "loss": 0.0088, "step": 51860 }, { "epoch": 0.5541962711683316, "grad_norm": 3.349437952041626, "learning_rate": 9.927136212604563e-07, "loss": 0.0556, "step": 51870 }, { "epoch": 0.5543031144826113, "grad_norm": 3.5126192569732666, "learning_rate": 9.927107631986455e-07, "loss": 0.0455, "step": 51880 }, { "epoch": 0.5544099577968908, "grad_norm": 1.8877233266830444, "learning_rate": 9.92707904580527e-07, "loss": 0.076, "step": 51890 }, { "epoch": 0.5545168011111705, "grad_norm": 1.7965128421783447, "learning_rate": 9.927050454061043e-07, "loss": 0.0424, "step": 51900 }, { "epoch": 0.5546236444254501, "grad_norm": 3.484086275100708, "learning_rate": 9.927021856753803e-07, "loss": 0.0671, "step": 51910 }, { "epoch": 0.5547304877397297, "grad_norm": 4.979389667510986, "learning_rate": 9.926993253883585e-07, "loss": 0.0505, "step": 51920 }, { "epoch": 0.5548373310540093, "grad_norm": 0.22641655802726746, "learning_rate": 9.926964645450417e-07, "loss": 0.0842, "step": 51930 }, { "epoch": 0.5549441743682889, "grad_norm": 10.581740379333496, "learning_rate": 9.926936031454335e-07, "loss": 0.047, "step": 51940 }, { "epoch": 0.5550510176825685, "grad_norm": 6.476860046386719, "learning_rate": 9.926907411895372e-07, "loss": 0.1702, "step": 51950 }, { "epoch": 0.5551578609968482, "grad_norm": 6.738346576690674, "learning_rate": 9.926878786773556e-07, "loss": 0.0357, "step": 51960 }, { "epoch": 0.5552647043111277, "grad_norm": 1.1865829229354858, "learning_rate": 9.926850156088924e-07, "loss": 0.0542, "step": 51970 }, { "epoch": 0.5553715476254073, "grad_norm": 7.080357074737549, "learning_rate": 9.926821519841504e-07, "loss": 0.1103, "step": 51980 }, { "epoch": 0.555478390939687, "grad_norm": 19.485349655151367, "learning_rate": 9.926792878031333e-07, "loss": 0.1313, "step": 51990 }, { "epoch": 0.5555852342539666, "grad_norm": 12.355940818786621, "learning_rate": 9.92676423065844e-07, "loss": 0.083, "step": 52000 }, { "epoch": 0.5556920775682461, "grad_norm": 12.530294418334961, "learning_rate": 9.926735577722858e-07, "loss": 0.0378, "step": 52010 }, { "epoch": 0.5557989208825258, "grad_norm": 4.750148296356201, "learning_rate": 9.926706919224622e-07, "loss": 0.0599, "step": 52020 }, { "epoch": 0.5559057641968054, "grad_norm": 2.5012121200561523, "learning_rate": 9.926678255163757e-07, "loss": 0.0602, "step": 52030 }, { "epoch": 0.5560126075110849, "grad_norm": 0.371900349855423, "learning_rate": 9.926649585540305e-07, "loss": 0.0367, "step": 52040 }, { "epoch": 0.5561194508253646, "grad_norm": 12.202421188354492, "learning_rate": 9.926620910354293e-07, "loss": 0.1089, "step": 52050 }, { "epoch": 0.5562262941396442, "grad_norm": 5.1298627853393555, "learning_rate": 9.926592229605753e-07, "loss": 0.1119, "step": 52060 }, { "epoch": 0.5563331374539239, "grad_norm": 1.2205449342727661, "learning_rate": 9.92656354329472e-07, "loss": 0.0487, "step": 52070 }, { "epoch": 0.5564399807682034, "grad_norm": 0.5000659823417664, "learning_rate": 9.926534851421224e-07, "loss": 0.0385, "step": 52080 }, { "epoch": 0.556546824082483, "grad_norm": 3.8643369674682617, "learning_rate": 9.9265061539853e-07, "loss": 0.0351, "step": 52090 }, { "epoch": 0.5566536673967627, "grad_norm": 6.774889945983887, "learning_rate": 9.926477450986975e-07, "loss": 0.0657, "step": 52100 }, { "epoch": 0.5567605107110423, "grad_norm": 2.789396286010742, "learning_rate": 9.926448742426287e-07, "loss": 0.0703, "step": 52110 }, { "epoch": 0.5568673540253218, "grad_norm": 5.258449077606201, "learning_rate": 9.926420028303268e-07, "loss": 0.0559, "step": 52120 }, { "epoch": 0.5569741973396015, "grad_norm": 16.441692352294922, "learning_rate": 9.926391308617947e-07, "loss": 0.0839, "step": 52130 }, { "epoch": 0.5570810406538811, "grad_norm": 1.2309423685073853, "learning_rate": 9.92636258337036e-07, "loss": 0.0916, "step": 52140 }, { "epoch": 0.5571878839681607, "grad_norm": 7.668003559112549, "learning_rate": 9.926333852560537e-07, "loss": 0.0478, "step": 52150 }, { "epoch": 0.5572947272824403, "grad_norm": 1.315459132194519, "learning_rate": 9.926305116188512e-07, "loss": 0.0187, "step": 52160 }, { "epoch": 0.5574015705967199, "grad_norm": 0.4821256995201111, "learning_rate": 9.926276374254316e-07, "loss": 0.0319, "step": 52170 }, { "epoch": 0.5575084139109995, "grad_norm": 10.13183307647705, "learning_rate": 9.926247626757981e-07, "loss": 0.0219, "step": 52180 }, { "epoch": 0.5576152572252792, "grad_norm": 4.834751605987549, "learning_rate": 9.926218873699543e-07, "loss": 0.1171, "step": 52190 }, { "epoch": 0.5577221005395587, "grad_norm": 11.726234436035156, "learning_rate": 9.92619011507903e-07, "loss": 0.0261, "step": 52200 }, { "epoch": 0.5578289438538383, "grad_norm": 2.8939316272735596, "learning_rate": 9.926161350896479e-07, "loss": 0.1221, "step": 52210 }, { "epoch": 0.557935787168118, "grad_norm": 8.868781089782715, "learning_rate": 9.926132581151919e-07, "loss": 0.0703, "step": 52220 }, { "epoch": 0.5580426304823976, "grad_norm": 10.92878246307373, "learning_rate": 9.92610380584538e-07, "loss": 0.0577, "step": 52230 }, { "epoch": 0.5581494737966771, "grad_norm": 9.252559661865234, "learning_rate": 9.926075024976902e-07, "loss": 0.0704, "step": 52240 }, { "epoch": 0.5582563171109568, "grad_norm": 4.677590847015381, "learning_rate": 9.926046238546513e-07, "loss": 0.1466, "step": 52250 }, { "epoch": 0.5583631604252364, "grad_norm": 12.514898300170898, "learning_rate": 9.926017446554246e-07, "loss": 0.116, "step": 52260 }, { "epoch": 0.558470003739516, "grad_norm": 6.798707008361816, "learning_rate": 9.925988649000133e-07, "loss": 0.1626, "step": 52270 }, { "epoch": 0.5585768470537956, "grad_norm": 5.709285259246826, "learning_rate": 9.925959845884207e-07, "loss": 0.0517, "step": 52280 }, { "epoch": 0.5586836903680752, "grad_norm": 26.984323501586914, "learning_rate": 9.9259310372065e-07, "loss": 0.1035, "step": 52290 }, { "epoch": 0.5587905336823549, "grad_norm": 7.55532693862915, "learning_rate": 9.925902222967046e-07, "loss": 0.0938, "step": 52300 }, { "epoch": 0.5588973769966344, "grad_norm": 3.967128276824951, "learning_rate": 9.925873403165877e-07, "loss": 0.0479, "step": 52310 }, { "epoch": 0.559004220310914, "grad_norm": 5.72982120513916, "learning_rate": 9.925844577803025e-07, "loss": 0.0462, "step": 52320 }, { "epoch": 0.5591110636251937, "grad_norm": 25.21442985534668, "learning_rate": 9.925815746878523e-07, "loss": 0.0427, "step": 52330 }, { "epoch": 0.5592179069394733, "grad_norm": 5.4681477546691895, "learning_rate": 9.925786910392402e-07, "loss": 0.0176, "step": 52340 }, { "epoch": 0.5593247502537528, "grad_norm": 7.574718475341797, "learning_rate": 9.925758068344697e-07, "loss": 0.0794, "step": 52350 }, { "epoch": 0.5594315935680325, "grad_norm": 0.23733742535114288, "learning_rate": 9.925729220735437e-07, "loss": 0.0259, "step": 52360 }, { "epoch": 0.5595384368823121, "grad_norm": 4.858626365661621, "learning_rate": 9.92570036756466e-07, "loss": 0.0224, "step": 52370 }, { "epoch": 0.5596452801965917, "grad_norm": 4.020324230194092, "learning_rate": 9.925671508832395e-07, "loss": 0.0667, "step": 52380 }, { "epoch": 0.5597521235108713, "grad_norm": 5.964779376983643, "learning_rate": 9.925642644538674e-07, "loss": 0.0445, "step": 52390 }, { "epoch": 0.5598589668251509, "grad_norm": 3.656344175338745, "learning_rate": 9.92561377468353e-07, "loss": 0.065, "step": 52400 }, { "epoch": 0.5599658101394305, "grad_norm": 0.21542447805404663, "learning_rate": 9.925584899266997e-07, "loss": 0.066, "step": 52410 }, { "epoch": 0.5600726534537102, "grad_norm": 5.4903693199157715, "learning_rate": 9.925556018289107e-07, "loss": 0.0578, "step": 52420 }, { "epoch": 0.5601794967679897, "grad_norm": 1.0371556282043457, "learning_rate": 9.925527131749893e-07, "loss": 0.0347, "step": 52430 }, { "epoch": 0.5602863400822694, "grad_norm": 0.43691423535346985, "learning_rate": 9.925498239649387e-07, "loss": 0.0843, "step": 52440 }, { "epoch": 0.560393183396549, "grad_norm": 27.075363159179688, "learning_rate": 9.925469341987622e-07, "loss": 0.075, "step": 52450 }, { "epoch": 0.5605000267108285, "grad_norm": 3.942248821258545, "learning_rate": 9.92544043876463e-07, "loss": 0.066, "step": 52460 }, { "epoch": 0.5606068700251082, "grad_norm": 2.261798858642578, "learning_rate": 9.925411529980443e-07, "loss": 0.0681, "step": 52470 }, { "epoch": 0.5607137133393878, "grad_norm": 10.499285697937012, "learning_rate": 9.925382615635096e-07, "loss": 0.1036, "step": 52480 }, { "epoch": 0.5608205566536674, "grad_norm": 3.5925002098083496, "learning_rate": 9.92535369572862e-07, "loss": 0.0251, "step": 52490 }, { "epoch": 0.560927399967947, "grad_norm": 1.3059409856796265, "learning_rate": 9.925324770261047e-07, "loss": 0.0394, "step": 52500 }, { "epoch": 0.5610342432822266, "grad_norm": 2.929320812225342, "learning_rate": 9.925295839232412e-07, "loss": 0.0559, "step": 52510 }, { "epoch": 0.5611410865965062, "grad_norm": 2.6609113216400146, "learning_rate": 9.925266902642746e-07, "loss": 0.0705, "step": 52520 }, { "epoch": 0.5612479299107859, "grad_norm": 5.059167385101318, "learning_rate": 9.925237960492081e-07, "loss": 0.0882, "step": 52530 }, { "epoch": 0.5613547732250654, "grad_norm": 0.30025264620780945, "learning_rate": 9.925209012780453e-07, "loss": 0.0852, "step": 52540 }, { "epoch": 0.561461616539345, "grad_norm": 2.8846871852874756, "learning_rate": 9.92518005950789e-07, "loss": 0.0212, "step": 52550 }, { "epoch": 0.5615684598536247, "grad_norm": 10.825695991516113, "learning_rate": 9.925151100674427e-07, "loss": 0.0642, "step": 52560 }, { "epoch": 0.5616753031679043, "grad_norm": 4.605319499969482, "learning_rate": 9.925122136280097e-07, "loss": 0.0802, "step": 52570 }, { "epoch": 0.5617821464821838, "grad_norm": 2.6117029190063477, "learning_rate": 9.925093166324934e-07, "loss": 0.0537, "step": 52580 }, { "epoch": 0.5618889897964635, "grad_norm": 5.722823619842529, "learning_rate": 9.925064190808966e-07, "loss": 0.0661, "step": 52590 }, { "epoch": 0.5619958331107431, "grad_norm": 5.430977821350098, "learning_rate": 9.925035209732232e-07, "loss": 0.0586, "step": 52600 }, { "epoch": 0.5621026764250227, "grad_norm": 0.2249298393726349, "learning_rate": 9.92500622309476e-07, "loss": 0.0824, "step": 52610 }, { "epoch": 0.5622095197393023, "grad_norm": 6.372142791748047, "learning_rate": 9.924977230896584e-07, "loss": 0.067, "step": 52620 }, { "epoch": 0.5623163630535819, "grad_norm": 2.5558550357818604, "learning_rate": 9.924948233137738e-07, "loss": 0.0508, "step": 52630 }, { "epoch": 0.5624232063678616, "grad_norm": 0.046240516006946564, "learning_rate": 9.92491922981825e-07, "loss": 0.0789, "step": 52640 }, { "epoch": 0.5625300496821412, "grad_norm": 0.15248334407806396, "learning_rate": 9.924890220938163e-07, "loss": 0.0668, "step": 52650 }, { "epoch": 0.5626368929964207, "grad_norm": 5.95747709274292, "learning_rate": 9.9248612064975e-07, "loss": 0.0623, "step": 52660 }, { "epoch": 0.5627437363107004, "grad_norm": 3.5221142768859863, "learning_rate": 9.924832186496295e-07, "loss": 0.0944, "step": 52670 }, { "epoch": 0.56285057962498, "grad_norm": 0.18047156929969788, "learning_rate": 9.924803160934584e-07, "loss": 0.0732, "step": 52680 }, { "epoch": 0.5629574229392595, "grad_norm": 5.762155532836914, "learning_rate": 9.9247741298124e-07, "loss": 0.0565, "step": 52690 }, { "epoch": 0.5630642662535392, "grad_norm": 0.4542432725429535, "learning_rate": 9.924745093129774e-07, "loss": 0.0403, "step": 52700 }, { "epoch": 0.5631711095678188, "grad_norm": 6.325902938842773, "learning_rate": 9.924716050886738e-07, "loss": 0.0684, "step": 52710 }, { "epoch": 0.5632779528820984, "grad_norm": 0.12555156648159027, "learning_rate": 9.924687003083327e-07, "loss": 0.0544, "step": 52720 }, { "epoch": 0.563384796196378, "grad_norm": 3.2618355751037598, "learning_rate": 9.924657949719572e-07, "loss": 0.0246, "step": 52730 }, { "epoch": 0.5634916395106576, "grad_norm": 6.157393932342529, "learning_rate": 9.924628890795508e-07, "loss": 0.0892, "step": 52740 }, { "epoch": 0.5635984828249372, "grad_norm": 3.21551251411438, "learning_rate": 9.924599826311163e-07, "loss": 0.0638, "step": 52750 }, { "epoch": 0.5637053261392169, "grad_norm": 16.26932144165039, "learning_rate": 9.924570756266577e-07, "loss": 0.0082, "step": 52760 }, { "epoch": 0.5638121694534964, "grad_norm": 0.05138324573636055, "learning_rate": 9.924541680661776e-07, "loss": 0.0453, "step": 52770 }, { "epoch": 0.563919012767776, "grad_norm": 5.786288738250732, "learning_rate": 9.924512599496797e-07, "loss": 0.0733, "step": 52780 }, { "epoch": 0.5640258560820557, "grad_norm": 5.870917797088623, "learning_rate": 9.924483512771672e-07, "loss": 0.0443, "step": 52790 }, { "epoch": 0.5641326993963353, "grad_norm": 1.970409870147705, "learning_rate": 9.924454420486433e-07, "loss": 0.053, "step": 52800 }, { "epoch": 0.5642395427106149, "grad_norm": 1.9806756973266602, "learning_rate": 9.924425322641112e-07, "loss": 0.0537, "step": 52810 }, { "epoch": 0.5643463860248945, "grad_norm": 5.008246421813965, "learning_rate": 9.924396219235745e-07, "loss": 0.0349, "step": 52820 }, { "epoch": 0.5644532293391741, "grad_norm": 0.5204228758811951, "learning_rate": 9.924367110270361e-07, "loss": 0.0838, "step": 52830 }, { "epoch": 0.5645600726534538, "grad_norm": 1.2958998680114746, "learning_rate": 9.924337995744996e-07, "loss": 0.0456, "step": 52840 }, { "epoch": 0.5646669159677333, "grad_norm": 6.0969061851501465, "learning_rate": 9.924308875659683e-07, "loss": 0.1082, "step": 52850 }, { "epoch": 0.5647737592820129, "grad_norm": 9.645760536193848, "learning_rate": 9.924279750014452e-07, "loss": 0.0433, "step": 52860 }, { "epoch": 0.5648806025962926, "grad_norm": 5.127379417419434, "learning_rate": 9.924250618809337e-07, "loss": 0.1056, "step": 52870 }, { "epoch": 0.5649874459105722, "grad_norm": 3.968700408935547, "learning_rate": 9.924221482044374e-07, "loss": 0.0791, "step": 52880 }, { "epoch": 0.5650942892248517, "grad_norm": 0.1399228721857071, "learning_rate": 9.92419233971959e-07, "loss": 0.0299, "step": 52890 }, { "epoch": 0.5652011325391314, "grad_norm": 5.938442230224609, "learning_rate": 9.924163191835025e-07, "loss": 0.0918, "step": 52900 }, { "epoch": 0.565307975853411, "grad_norm": 6.538224220275879, "learning_rate": 9.924134038390707e-07, "loss": 0.0899, "step": 52910 }, { "epoch": 0.5654148191676905, "grad_norm": 3.3645405769348145, "learning_rate": 9.924104879386667e-07, "loss": 0.0799, "step": 52920 }, { "epoch": 0.5655216624819702, "grad_norm": 3.517929792404175, "learning_rate": 9.924075714822943e-07, "loss": 0.164, "step": 52930 }, { "epoch": 0.5656285057962498, "grad_norm": 5.069491863250732, "learning_rate": 9.924046544699567e-07, "loss": 0.0602, "step": 52940 }, { "epoch": 0.5657353491105294, "grad_norm": 0.021825317293405533, "learning_rate": 9.924017369016568e-07, "loss": 0.0692, "step": 52950 }, { "epoch": 0.565842192424809, "grad_norm": 3.9520492553710938, "learning_rate": 9.923988187773983e-07, "loss": 0.0771, "step": 52960 }, { "epoch": 0.5659490357390886, "grad_norm": 3.076876640319824, "learning_rate": 9.923959000971847e-07, "loss": 0.0443, "step": 52970 }, { "epoch": 0.5660558790533682, "grad_norm": 3.6188104152679443, "learning_rate": 9.923929808610185e-07, "loss": 0.1394, "step": 52980 }, { "epoch": 0.5661627223676479, "grad_norm": 0.31644555926322937, "learning_rate": 9.923900610689039e-07, "loss": 0.0813, "step": 52990 }, { "epoch": 0.5662695656819274, "grad_norm": 2.9097237586975098, "learning_rate": 9.923871407208432e-07, "loss": 0.0202, "step": 53000 }, { "epoch": 0.5663764089962071, "grad_norm": 0.26746654510498047, "learning_rate": 9.923842198168405e-07, "loss": 0.042, "step": 53010 }, { "epoch": 0.5664832523104867, "grad_norm": 2.527320623397827, "learning_rate": 9.923812983568992e-07, "loss": 0.0363, "step": 53020 }, { "epoch": 0.5665900956247663, "grad_norm": 12.665781021118164, "learning_rate": 9.923783763410217e-07, "loss": 0.0455, "step": 53030 }, { "epoch": 0.5666969389390459, "grad_norm": 7.000811576843262, "learning_rate": 9.923754537692124e-07, "loss": 0.0532, "step": 53040 }, { "epoch": 0.5668037822533255, "grad_norm": 4.994381904602051, "learning_rate": 9.923725306414736e-07, "loss": 0.0615, "step": 53050 }, { "epoch": 0.5669106255676051, "grad_norm": 0.9065472483634949, "learning_rate": 9.923696069578093e-07, "loss": 0.0617, "step": 53060 }, { "epoch": 0.5670174688818848, "grad_norm": 2.9507288932800293, "learning_rate": 9.923666827182224e-07, "loss": 0.0747, "step": 53070 }, { "epoch": 0.5671243121961643, "grad_norm": 6.6243720054626465, "learning_rate": 9.923637579227166e-07, "loss": 0.1122, "step": 53080 }, { "epoch": 0.5672311555104439, "grad_norm": 0.44530731439590454, "learning_rate": 9.923608325712947e-07, "loss": 0.0687, "step": 53090 }, { "epoch": 0.5673379988247236, "grad_norm": 3.388035535812378, "learning_rate": 9.923579066639603e-07, "loss": 0.0552, "step": 53100 }, { "epoch": 0.5674448421390031, "grad_norm": 0.25654736161231995, "learning_rate": 9.923549802007167e-07, "loss": 0.0504, "step": 53110 }, { "epoch": 0.5675516854532827, "grad_norm": 2.4482245445251465, "learning_rate": 9.923520531815673e-07, "loss": 0.0301, "step": 53120 }, { "epoch": 0.5676585287675624, "grad_norm": 10.602155685424805, "learning_rate": 9.92349125606515e-07, "loss": 0.0831, "step": 53130 }, { "epoch": 0.567765372081842, "grad_norm": 2.2936360836029053, "learning_rate": 9.923461974755637e-07, "loss": 0.0316, "step": 53140 }, { "epoch": 0.5678722153961215, "grad_norm": 11.639281272888184, "learning_rate": 9.92343268788716e-07, "loss": 0.0655, "step": 53150 }, { "epoch": 0.5679790587104012, "grad_norm": 2.7236216068267822, "learning_rate": 9.92340339545976e-07, "loss": 0.032, "step": 53160 }, { "epoch": 0.5680859020246808, "grad_norm": 5.553865909576416, "learning_rate": 9.923374097473464e-07, "loss": 0.0462, "step": 53170 }, { "epoch": 0.5681927453389605, "grad_norm": 0.1557794064283371, "learning_rate": 9.923344793928306e-07, "loss": 0.0378, "step": 53180 }, { "epoch": 0.56829958865324, "grad_norm": 4.424191474914551, "learning_rate": 9.923315484824322e-07, "loss": 0.0349, "step": 53190 }, { "epoch": 0.5684064319675196, "grad_norm": 0.230403870344162, "learning_rate": 9.923286170161541e-07, "loss": 0.0659, "step": 53200 }, { "epoch": 0.5685132752817993, "grad_norm": 12.537850379943848, "learning_rate": 9.92325684994e-07, "loss": 0.091, "step": 53210 }, { "epoch": 0.5686201185960789, "grad_norm": 4.328704357147217, "learning_rate": 9.923227524159732e-07, "loss": 0.0445, "step": 53220 }, { "epoch": 0.5687269619103584, "grad_norm": 9.954025268554688, "learning_rate": 9.923198192820766e-07, "loss": 0.1003, "step": 53230 }, { "epoch": 0.5688338052246381, "grad_norm": 1.4311549663543701, "learning_rate": 9.923168855923138e-07, "loss": 0.0717, "step": 53240 }, { "epoch": 0.5689406485389177, "grad_norm": 5.391652584075928, "learning_rate": 9.923139513466884e-07, "loss": 0.0247, "step": 53250 }, { "epoch": 0.5690474918531973, "grad_norm": 9.013694763183594, "learning_rate": 9.923110165452029e-07, "loss": 0.0446, "step": 53260 }, { "epoch": 0.5691543351674769, "grad_norm": 14.034777641296387, "learning_rate": 9.923080811878613e-07, "loss": 0.0711, "step": 53270 }, { "epoch": 0.5692611784817565, "grad_norm": 1.2576557397842407, "learning_rate": 9.923051452746669e-07, "loss": 0.0652, "step": 53280 }, { "epoch": 0.5693680217960361, "grad_norm": 0.542329728603363, "learning_rate": 9.923022088056225e-07, "loss": 0.1369, "step": 53290 }, { "epoch": 0.5694748651103158, "grad_norm": 0.2105463594198227, "learning_rate": 9.922992717807319e-07, "loss": 0.0429, "step": 53300 }, { "epoch": 0.5695817084245953, "grad_norm": 5.276654243469238, "learning_rate": 9.922963341999983e-07, "loss": 0.0797, "step": 53310 }, { "epoch": 0.5696885517388749, "grad_norm": 8.582971572875977, "learning_rate": 9.92293396063425e-07, "loss": 0.0592, "step": 53320 }, { "epoch": 0.5697953950531546, "grad_norm": 0.05386999249458313, "learning_rate": 9.922904573710153e-07, "loss": 0.1022, "step": 53330 }, { "epoch": 0.5699022383674341, "grad_norm": 2.999285936355591, "learning_rate": 9.922875181227726e-07, "loss": 0.1127, "step": 53340 }, { "epoch": 0.5700090816817137, "grad_norm": 0.0927618145942688, "learning_rate": 9.922845783187e-07, "loss": 0.0571, "step": 53350 }, { "epoch": 0.5701159249959934, "grad_norm": 3.945990800857544, "learning_rate": 9.92281637958801e-07, "loss": 0.0508, "step": 53360 }, { "epoch": 0.570222768310273, "grad_norm": 6.0897040367126465, "learning_rate": 9.92278697043079e-07, "loss": 0.0428, "step": 53370 }, { "epoch": 0.5703296116245526, "grad_norm": 0.5969240665435791, "learning_rate": 9.922757555715368e-07, "loss": 0.0455, "step": 53380 }, { "epoch": 0.5704364549388322, "grad_norm": 7.684547424316406, "learning_rate": 9.922728135441784e-07, "loss": 0.051, "step": 53390 }, { "epoch": 0.5705432982531118, "grad_norm": 8.790072441101074, "learning_rate": 9.92269870961007e-07, "loss": 0.125, "step": 53400 }, { "epoch": 0.5706501415673915, "grad_norm": 0.1816566288471222, "learning_rate": 9.922669278220256e-07, "loss": 0.1016, "step": 53410 }, { "epoch": 0.570756984881671, "grad_norm": 5.317990779876709, "learning_rate": 9.922639841272375e-07, "loss": 0.0746, "step": 53420 }, { "epoch": 0.5708638281959506, "grad_norm": 0.571884274482727, "learning_rate": 9.922610398766465e-07, "loss": 0.0666, "step": 53430 }, { "epoch": 0.5709706715102303, "grad_norm": 4.3136067390441895, "learning_rate": 9.922580950702554e-07, "loss": 0.1089, "step": 53440 }, { "epoch": 0.5710775148245099, "grad_norm": 7.778277397155762, "learning_rate": 9.92255149708068e-07, "loss": 0.1121, "step": 53450 }, { "epoch": 0.5711843581387894, "grad_norm": 9.675710678100586, "learning_rate": 9.922522037900873e-07, "loss": 0.0729, "step": 53460 }, { "epoch": 0.5712912014530691, "grad_norm": 4.323632717132568, "learning_rate": 9.922492573163167e-07, "loss": 0.0621, "step": 53470 }, { "epoch": 0.5713980447673487, "grad_norm": 0.8740009069442749, "learning_rate": 9.922463102867595e-07, "loss": 0.0627, "step": 53480 }, { "epoch": 0.5715048880816282, "grad_norm": 3.2212657928466797, "learning_rate": 9.92243362701419e-07, "loss": 0.0873, "step": 53490 }, { "epoch": 0.5716117313959079, "grad_norm": 5.981582164764404, "learning_rate": 9.922404145602986e-07, "loss": 0.1319, "step": 53500 }, { "epoch": 0.5717185747101875, "grad_norm": 1.7053965330123901, "learning_rate": 9.922374658634017e-07, "loss": 0.0709, "step": 53510 }, { "epoch": 0.5718254180244671, "grad_norm": 11.928475379943848, "learning_rate": 9.922345166107316e-07, "loss": 0.0564, "step": 53520 }, { "epoch": 0.5719322613387468, "grad_norm": 7.1694135665893555, "learning_rate": 9.922315668022916e-07, "loss": 0.0798, "step": 53530 }, { "epoch": 0.5720391046530263, "grad_norm": 5.868022441864014, "learning_rate": 9.922286164380849e-07, "loss": 0.0701, "step": 53540 }, { "epoch": 0.572145947967306, "grad_norm": 11.783507347106934, "learning_rate": 9.922256655181148e-07, "loss": 0.1326, "step": 53550 }, { "epoch": 0.5722527912815856, "grad_norm": 8.425873756408691, "learning_rate": 9.92222714042385e-07, "loss": 0.0648, "step": 53560 }, { "epoch": 0.5723596345958651, "grad_norm": 14.044206619262695, "learning_rate": 9.922197620108985e-07, "loss": 0.0449, "step": 53570 }, { "epoch": 0.5724664779101448, "grad_norm": 3.212545871734619, "learning_rate": 9.922168094236589e-07, "loss": 0.0853, "step": 53580 }, { "epoch": 0.5725733212244244, "grad_norm": 4.179814338684082, "learning_rate": 9.922138562806693e-07, "loss": 0.0694, "step": 53590 }, { "epoch": 0.572680164538704, "grad_norm": 5.887369155883789, "learning_rate": 9.92210902581933e-07, "loss": 0.0774, "step": 53600 }, { "epoch": 0.5727870078529836, "grad_norm": 7.252881050109863, "learning_rate": 9.922079483274534e-07, "loss": 0.0271, "step": 53610 }, { "epoch": 0.5728938511672632, "grad_norm": 0.008011074736714363, "learning_rate": 9.922049935172341e-07, "loss": 0.0487, "step": 53620 }, { "epoch": 0.5730006944815428, "grad_norm": 0.030686020851135254, "learning_rate": 9.92202038151278e-07, "loss": 0.0894, "step": 53630 }, { "epoch": 0.5731075377958225, "grad_norm": 5.529747486114502, "learning_rate": 9.921990822295888e-07, "loss": 0.0926, "step": 53640 }, { "epoch": 0.573214381110102, "grad_norm": 8.499136924743652, "learning_rate": 9.921961257521695e-07, "loss": 0.062, "step": 53650 }, { "epoch": 0.5733212244243816, "grad_norm": 1.3111629486083984, "learning_rate": 9.921931687190238e-07, "loss": 0.0955, "step": 53660 }, { "epoch": 0.5734280677386613, "grad_norm": 9.00832462310791, "learning_rate": 9.921902111301548e-07, "loss": 0.0501, "step": 53670 }, { "epoch": 0.5735349110529409, "grad_norm": 10.41060733795166, "learning_rate": 9.92187252985566e-07, "loss": 0.0394, "step": 53680 }, { "epoch": 0.5736417543672204, "grad_norm": 0.7812352180480957, "learning_rate": 9.921842942852606e-07, "loss": 0.0522, "step": 53690 }, { "epoch": 0.5737485976815001, "grad_norm": 0.04795831814408302, "learning_rate": 9.92181335029242e-07, "loss": 0.1039, "step": 53700 }, { "epoch": 0.5738554409957797, "grad_norm": 4.690507411956787, "learning_rate": 9.921783752175134e-07, "loss": 0.1238, "step": 53710 }, { "epoch": 0.5739622843100592, "grad_norm": 6.649718761444092, "learning_rate": 9.921754148500784e-07, "loss": 0.0539, "step": 53720 }, { "epoch": 0.5740691276243389, "grad_norm": 4.553938865661621, "learning_rate": 9.921724539269403e-07, "loss": 0.0572, "step": 53730 }, { "epoch": 0.5741759709386185, "grad_norm": 2.4010255336761475, "learning_rate": 9.921694924481021e-07, "loss": 0.1079, "step": 53740 }, { "epoch": 0.5742828142528982, "grad_norm": 4.672682762145996, "learning_rate": 9.921665304135676e-07, "loss": 0.0343, "step": 53750 }, { "epoch": 0.5743896575671777, "grad_norm": 0.02233506180346012, "learning_rate": 9.921635678233398e-07, "loss": 0.0563, "step": 53760 }, { "epoch": 0.5744965008814573, "grad_norm": 3.3911325931549072, "learning_rate": 9.921606046774223e-07, "loss": 0.0682, "step": 53770 }, { "epoch": 0.574603344195737, "grad_norm": 2.683587074279785, "learning_rate": 9.921576409758182e-07, "loss": 0.032, "step": 53780 }, { "epoch": 0.5747101875100166, "grad_norm": 9.055368423461914, "learning_rate": 9.921546767185312e-07, "loss": 0.0965, "step": 53790 }, { "epoch": 0.5748170308242961, "grad_norm": 1.5033249855041504, "learning_rate": 9.921517119055643e-07, "loss": 0.1203, "step": 53800 }, { "epoch": 0.5749238741385758, "grad_norm": 0.3287407159805298, "learning_rate": 9.921487465369208e-07, "loss": 0.0421, "step": 53810 }, { "epoch": 0.5750307174528554, "grad_norm": 2.905931234359741, "learning_rate": 9.921457806126043e-07, "loss": 0.069, "step": 53820 }, { "epoch": 0.575137560767135, "grad_norm": 7.088010787963867, "learning_rate": 9.921428141326183e-07, "loss": 0.0324, "step": 53830 }, { "epoch": 0.5752444040814146, "grad_norm": 4.609252452850342, "learning_rate": 9.921398470969658e-07, "loss": 0.0339, "step": 53840 }, { "epoch": 0.5753512473956942, "grad_norm": 8.689729690551758, "learning_rate": 9.921368795056501e-07, "loss": 0.0654, "step": 53850 }, { "epoch": 0.5754580907099738, "grad_norm": 4.369722843170166, "learning_rate": 9.92133911358675e-07, "loss": 0.0368, "step": 53860 }, { "epoch": 0.5755649340242535, "grad_norm": 0.7903986573219299, "learning_rate": 9.921309426560433e-07, "loss": 0.0321, "step": 53870 }, { "epoch": 0.575671777338533, "grad_norm": 0.03854775056242943, "learning_rate": 9.921279733977588e-07, "loss": 0.0746, "step": 53880 }, { "epoch": 0.5757786206528126, "grad_norm": 0.3234756290912628, "learning_rate": 9.921250035838246e-07, "loss": 0.0409, "step": 53890 }, { "epoch": 0.5758854639670923, "grad_norm": 3.809147834777832, "learning_rate": 9.92122033214244e-07, "loss": 0.0344, "step": 53900 }, { "epoch": 0.5759923072813719, "grad_norm": 0.3252154290676117, "learning_rate": 9.921190622890207e-07, "loss": 0.0904, "step": 53910 }, { "epoch": 0.5760991505956515, "grad_norm": 3.781925916671753, "learning_rate": 9.921160908081579e-07, "loss": 0.0468, "step": 53920 }, { "epoch": 0.5762059939099311, "grad_norm": 0.43790674209594727, "learning_rate": 9.921131187716588e-07, "loss": 0.0728, "step": 53930 }, { "epoch": 0.5763128372242107, "grad_norm": 3.984470844268799, "learning_rate": 9.921101461795267e-07, "loss": 0.0607, "step": 53940 }, { "epoch": 0.5764196805384904, "grad_norm": 4.754366397857666, "learning_rate": 9.92107173031765e-07, "loss": 0.0679, "step": 53950 }, { "epoch": 0.5765265238527699, "grad_norm": 8.085291862487793, "learning_rate": 9.921041993283776e-07, "loss": 0.1256, "step": 53960 }, { "epoch": 0.5766333671670495, "grad_norm": 3.8040308952331543, "learning_rate": 9.921012250693672e-07, "loss": 0.0534, "step": 53970 }, { "epoch": 0.5767402104813292, "grad_norm": 0.41475099325180054, "learning_rate": 9.920982502547373e-07, "loss": 0.0456, "step": 53980 }, { "epoch": 0.5768470537956087, "grad_norm": 10.954474449157715, "learning_rate": 9.920952748844915e-07, "loss": 0.0409, "step": 53990 }, { "epoch": 0.5769538971098883, "grad_norm": 6.593587398529053, "learning_rate": 9.92092298958633e-07, "loss": 0.0482, "step": 54000 }, { "epoch": 0.577060740424168, "grad_norm": 3.818796396255493, "learning_rate": 9.92089322477165e-07, "loss": 0.0223, "step": 54010 }, { "epoch": 0.5771675837384476, "grad_norm": 6.868524551391602, "learning_rate": 9.92086345440091e-07, "loss": 0.0221, "step": 54020 }, { "epoch": 0.5772744270527271, "grad_norm": 10.815800666809082, "learning_rate": 9.920833678474144e-07, "loss": 0.0501, "step": 54030 }, { "epoch": 0.5773812703670068, "grad_norm": 4.725552558898926, "learning_rate": 9.920803896991386e-07, "loss": 0.0432, "step": 54040 }, { "epoch": 0.5774881136812864, "grad_norm": 5.167820930480957, "learning_rate": 9.920774109952669e-07, "loss": 0.1071, "step": 54050 }, { "epoch": 0.577594956995566, "grad_norm": 5.707150936126709, "learning_rate": 9.920744317358026e-07, "loss": 0.0602, "step": 54060 }, { "epoch": 0.5777018003098456, "grad_norm": 0.3975360691547394, "learning_rate": 9.92071451920749e-07, "loss": 0.0745, "step": 54070 }, { "epoch": 0.5778086436241252, "grad_norm": 3.92110013961792, "learning_rate": 9.9206847155011e-07, "loss": 0.0805, "step": 54080 }, { "epoch": 0.5779154869384048, "grad_norm": 4.276169776916504, "learning_rate": 9.920654906238884e-07, "loss": 0.0384, "step": 54090 }, { "epoch": 0.5780223302526845, "grad_norm": 6.381209850311279, "learning_rate": 9.920625091420875e-07, "loss": 0.0703, "step": 54100 }, { "epoch": 0.578129173566964, "grad_norm": 23.208112716674805, "learning_rate": 9.92059527104711e-07, "loss": 0.1507, "step": 54110 }, { "epoch": 0.5782360168812437, "grad_norm": 6.672577381134033, "learning_rate": 9.920565445117624e-07, "loss": 0.0527, "step": 54120 }, { "epoch": 0.5783428601955233, "grad_norm": 0.14478275179862976, "learning_rate": 9.920535613632446e-07, "loss": 0.0181, "step": 54130 }, { "epoch": 0.5784497035098028, "grad_norm": 2.2801270484924316, "learning_rate": 9.920505776591612e-07, "loss": 0.047, "step": 54140 }, { "epoch": 0.5785565468240825, "grad_norm": 0.15108433365821838, "learning_rate": 9.920475933995155e-07, "loss": 0.025, "step": 54150 }, { "epoch": 0.5786633901383621, "grad_norm": 0.18934985995292664, "learning_rate": 9.92044608584311e-07, "loss": 0.0643, "step": 54160 }, { "epoch": 0.5787702334526417, "grad_norm": 3.8829472064971924, "learning_rate": 9.92041623213551e-07, "loss": 0.0379, "step": 54170 }, { "epoch": 0.5788770767669214, "grad_norm": 0.7552933096885681, "learning_rate": 9.920386372872389e-07, "loss": 0.0564, "step": 54180 }, { "epoch": 0.5789839200812009, "grad_norm": 5.532723426818848, "learning_rate": 9.92035650805378e-07, "loss": 0.0724, "step": 54190 }, { "epoch": 0.5790907633954805, "grad_norm": 4.1165385246276855, "learning_rate": 9.920326637679717e-07, "loss": 0.0535, "step": 54200 }, { "epoch": 0.5791976067097602, "grad_norm": 7.937173366546631, "learning_rate": 9.920296761750235e-07, "loss": 0.0503, "step": 54210 }, { "epoch": 0.5793044500240397, "grad_norm": 3.4731087684631348, "learning_rate": 9.920266880265365e-07, "loss": 0.0688, "step": 54220 }, { "epoch": 0.5794112933383193, "grad_norm": 0.8715401887893677, "learning_rate": 9.920236993225144e-07, "loss": 0.0643, "step": 54230 }, { "epoch": 0.579518136652599, "grad_norm": 21.3492374420166, "learning_rate": 9.920207100629603e-07, "loss": 0.0662, "step": 54240 }, { "epoch": 0.5796249799668786, "grad_norm": 0.1706097573041916, "learning_rate": 9.920177202478777e-07, "loss": 0.0363, "step": 54250 }, { "epoch": 0.5797318232811581, "grad_norm": 3.9289004802703857, "learning_rate": 9.9201472987727e-07, "loss": 0.0512, "step": 54260 }, { "epoch": 0.5798386665954378, "grad_norm": 1.4219738245010376, "learning_rate": 9.920117389511405e-07, "loss": 0.0966, "step": 54270 }, { "epoch": 0.5799455099097174, "grad_norm": 0.9080549478530884, "learning_rate": 9.920087474694926e-07, "loss": 0.0713, "step": 54280 }, { "epoch": 0.5800523532239971, "grad_norm": 23.104881286621094, "learning_rate": 9.920057554323296e-07, "loss": 0.0803, "step": 54290 }, { "epoch": 0.5801591965382766, "grad_norm": 0.5784618258476257, "learning_rate": 9.920027628396551e-07, "loss": 0.0096, "step": 54300 }, { "epoch": 0.5802660398525562, "grad_norm": 4.137352466583252, "learning_rate": 9.919997696914723e-07, "loss": 0.0471, "step": 54310 }, { "epoch": 0.5803728831668359, "grad_norm": 5.074397087097168, "learning_rate": 9.919967759877847e-07, "loss": 0.0889, "step": 54320 }, { "epoch": 0.5804797264811155, "grad_norm": 2.4592816829681396, "learning_rate": 9.919937817285957e-07, "loss": 0.0527, "step": 54330 }, { "epoch": 0.580586569795395, "grad_norm": 6.216898441314697, "learning_rate": 9.919907869139083e-07, "loss": 0.072, "step": 54340 }, { "epoch": 0.5806934131096747, "grad_norm": 1.7191038131713867, "learning_rate": 9.919877915437264e-07, "loss": 0.0527, "step": 54350 }, { "epoch": 0.5808002564239543, "grad_norm": 6.005406379699707, "learning_rate": 9.91984795618053e-07, "loss": 0.0446, "step": 54360 }, { "epoch": 0.5809070997382338, "grad_norm": 0.039574138820171356, "learning_rate": 9.919817991368917e-07, "loss": 0.0538, "step": 54370 }, { "epoch": 0.5810139430525135, "grad_norm": 0.29223302006721497, "learning_rate": 9.919788021002459e-07, "loss": 0.0305, "step": 54380 }, { "epoch": 0.5811207863667931, "grad_norm": 0.8772163391113281, "learning_rate": 9.919758045081189e-07, "loss": 0.0483, "step": 54390 }, { "epoch": 0.5812276296810727, "grad_norm": 0.1473662257194519, "learning_rate": 9.91972806360514e-07, "loss": 0.3076, "step": 54400 }, { "epoch": 0.5813344729953523, "grad_norm": 2.1798975467681885, "learning_rate": 9.919698076574346e-07, "loss": 0.0787, "step": 54410 }, { "epoch": 0.5814413163096319, "grad_norm": 3.9103798866271973, "learning_rate": 9.919668083988842e-07, "loss": 0.1024, "step": 54420 }, { "epoch": 0.5815481596239115, "grad_norm": 4.647755146026611, "learning_rate": 9.91963808584866e-07, "loss": 0.0455, "step": 54430 }, { "epoch": 0.5816550029381912, "grad_norm": 7.900379657745361, "learning_rate": 9.919608082153839e-07, "loss": 0.1029, "step": 54440 }, { "epoch": 0.5817618462524707, "grad_norm": 0.5330588817596436, "learning_rate": 9.919578072904408e-07, "loss": 0.0278, "step": 54450 }, { "epoch": 0.5818686895667503, "grad_norm": 3.49462890625, "learning_rate": 9.919548058100399e-07, "loss": 0.075, "step": 54460 }, { "epoch": 0.58197553288103, "grad_norm": 5.695958614349365, "learning_rate": 9.919518037741851e-07, "loss": 0.0783, "step": 54470 }, { "epoch": 0.5820823761953096, "grad_norm": 2.908521890640259, "learning_rate": 9.919488011828798e-07, "loss": 0.0392, "step": 54480 }, { "epoch": 0.5821892195095892, "grad_norm": 6.361995220184326, "learning_rate": 9.91945798036127e-07, "loss": 0.0251, "step": 54490 }, { "epoch": 0.5822960628238688, "grad_norm": 5.101492404937744, "learning_rate": 9.919427943339302e-07, "loss": 0.0318, "step": 54500 }, { "epoch": 0.5824029061381484, "grad_norm": 6.365163326263428, "learning_rate": 9.91939790076293e-07, "loss": 0.0397, "step": 54510 }, { "epoch": 0.5825097494524281, "grad_norm": 4.811028480529785, "learning_rate": 9.919367852632184e-07, "loss": 0.0269, "step": 54520 }, { "epoch": 0.5826165927667076, "grad_norm": 1.8188434839248657, "learning_rate": 9.919337798947103e-07, "loss": 0.023, "step": 54530 }, { "epoch": 0.5827234360809872, "grad_norm": 2.3710219860076904, "learning_rate": 9.919307739707717e-07, "loss": 0.061, "step": 54540 }, { "epoch": 0.5828302793952669, "grad_norm": 1.8966403007507324, "learning_rate": 9.919277674914062e-07, "loss": 0.0231, "step": 54550 }, { "epoch": 0.5829371227095465, "grad_norm": 8.127806663513184, "learning_rate": 9.91924760456617e-07, "loss": 0.0525, "step": 54560 }, { "epoch": 0.583043966023826, "grad_norm": 4.088716983795166, "learning_rate": 9.919217528664077e-07, "loss": 0.1235, "step": 54570 }, { "epoch": 0.5831508093381057, "grad_norm": 5.410754680633545, "learning_rate": 9.919187447207814e-07, "loss": 0.0666, "step": 54580 }, { "epoch": 0.5832576526523853, "grad_norm": 5.085279941558838, "learning_rate": 9.919157360197421e-07, "loss": 0.0529, "step": 54590 }, { "epoch": 0.5833644959666648, "grad_norm": 7.2403717041015625, "learning_rate": 9.919127267632927e-07, "loss": 0.0418, "step": 54600 }, { "epoch": 0.5834713392809445, "grad_norm": 2.016364574432373, "learning_rate": 9.919097169514366e-07, "loss": 0.0408, "step": 54610 }, { "epoch": 0.5835781825952241, "grad_norm": 9.62164306640625, "learning_rate": 9.919067065841774e-07, "loss": 0.0847, "step": 54620 }, { "epoch": 0.5836850259095037, "grad_norm": 0.18092744052410126, "learning_rate": 9.919036956615183e-07, "loss": 0.0518, "step": 54630 }, { "epoch": 0.5837918692237833, "grad_norm": 1.0723997354507446, "learning_rate": 9.919006841834628e-07, "loss": 0.0748, "step": 54640 }, { "epoch": 0.5838987125380629, "grad_norm": 2.0828516483306885, "learning_rate": 9.918976721500142e-07, "loss": 0.043, "step": 54650 }, { "epoch": 0.5840055558523426, "grad_norm": 6.1513543128967285, "learning_rate": 9.918946595611763e-07, "loss": 0.0439, "step": 54660 }, { "epoch": 0.5841123991666222, "grad_norm": 2.0665836334228516, "learning_rate": 9.91891646416952e-07, "loss": 0.0913, "step": 54670 }, { "epoch": 0.5842192424809017, "grad_norm": 8.928906440734863, "learning_rate": 9.91888632717345e-07, "loss": 0.0494, "step": 54680 }, { "epoch": 0.5843260857951814, "grad_norm": 0.2418118417263031, "learning_rate": 9.918856184623586e-07, "loss": 0.0607, "step": 54690 }, { "epoch": 0.584432929109461, "grad_norm": 8.113680839538574, "learning_rate": 9.91882603651996e-07, "loss": 0.0722, "step": 54700 }, { "epoch": 0.5845397724237406, "grad_norm": 0.3479001820087433, "learning_rate": 9.918795882862612e-07, "loss": 0.0712, "step": 54710 }, { "epoch": 0.5846466157380202, "grad_norm": 11.269012451171875, "learning_rate": 9.91876572365157e-07, "loss": 0.0491, "step": 54720 }, { "epoch": 0.5847534590522998, "grad_norm": 4.690757751464844, "learning_rate": 9.91873555888687e-07, "loss": 0.0644, "step": 54730 }, { "epoch": 0.5848603023665794, "grad_norm": 3.737149477005005, "learning_rate": 9.918705388568548e-07, "loss": 0.0508, "step": 54740 }, { "epoch": 0.5849671456808591, "grad_norm": 3.672966957092285, "learning_rate": 9.918675212696635e-07, "loss": 0.0622, "step": 54750 }, { "epoch": 0.5850739889951386, "grad_norm": 6.500947952270508, "learning_rate": 9.918645031271166e-07, "loss": 0.042, "step": 54760 }, { "epoch": 0.5851808323094182, "grad_norm": 4.726306915283203, "learning_rate": 9.918614844292177e-07, "loss": 0.0602, "step": 54770 }, { "epoch": 0.5852876756236979, "grad_norm": 0.28694236278533936, "learning_rate": 9.918584651759698e-07, "loss": 0.0473, "step": 54780 }, { "epoch": 0.5853945189379774, "grad_norm": 3.67287278175354, "learning_rate": 9.918554453673766e-07, "loss": 0.0565, "step": 54790 }, { "epoch": 0.585501362252257, "grad_norm": 3.247486114501953, "learning_rate": 9.918524250034418e-07, "loss": 0.058, "step": 54800 }, { "epoch": 0.5856082055665367, "grad_norm": 6.595465183258057, "learning_rate": 9.918494040841682e-07, "loss": 0.0327, "step": 54810 }, { "epoch": 0.5857150488808163, "grad_norm": 17.654701232910156, "learning_rate": 9.918463826095597e-07, "loss": 0.0928, "step": 54820 }, { "epoch": 0.5858218921950958, "grad_norm": 0.3129287660121918, "learning_rate": 9.918433605796195e-07, "loss": 0.0127, "step": 54830 }, { "epoch": 0.5859287355093755, "grad_norm": 2.356055498123169, "learning_rate": 9.91840337994351e-07, "loss": 0.063, "step": 54840 }, { "epoch": 0.5860355788236551, "grad_norm": 5.734476566314697, "learning_rate": 9.918373148537576e-07, "loss": 0.0663, "step": 54850 }, { "epoch": 0.5861424221379348, "grad_norm": 0.6931033134460449, "learning_rate": 9.918342911578426e-07, "loss": 0.0599, "step": 54860 }, { "epoch": 0.5862492654522143, "grad_norm": 0.10957475751638412, "learning_rate": 9.918312669066097e-07, "loss": 0.0574, "step": 54870 }, { "epoch": 0.5863561087664939, "grad_norm": 3.176840305328369, "learning_rate": 9.918282421000623e-07, "loss": 0.0697, "step": 54880 }, { "epoch": 0.5864629520807736, "grad_norm": 8.76550006866455, "learning_rate": 9.918252167382036e-07, "loss": 0.0831, "step": 54890 }, { "epoch": 0.5865697953950532, "grad_norm": 9.18884563446045, "learning_rate": 9.91822190821037e-07, "loss": 0.055, "step": 54900 }, { "epoch": 0.5866766387093327, "grad_norm": 6.1308274269104, "learning_rate": 9.91819164348566e-07, "loss": 0.0864, "step": 54910 }, { "epoch": 0.5867834820236124, "grad_norm": 8.310546875, "learning_rate": 9.918161373207942e-07, "loss": 0.1064, "step": 54920 }, { "epoch": 0.586890325337892, "grad_norm": 1.1858011484146118, "learning_rate": 9.918131097377248e-07, "loss": 0.0465, "step": 54930 }, { "epoch": 0.5869971686521716, "grad_norm": 6.219872951507568, "learning_rate": 9.918100815993614e-07, "loss": 0.0559, "step": 54940 }, { "epoch": 0.5871040119664512, "grad_norm": 0.3026527762413025, "learning_rate": 9.918070529057073e-07, "loss": 0.0347, "step": 54950 }, { "epoch": 0.5872108552807308, "grad_norm": 1.1082018613815308, "learning_rate": 9.918040236567657e-07, "loss": 0.0187, "step": 54960 }, { "epoch": 0.5873176985950104, "grad_norm": 6.693530082702637, "learning_rate": 9.918009938525404e-07, "loss": 0.0851, "step": 54970 }, { "epoch": 0.58742454190929, "grad_norm": 2.381864309310913, "learning_rate": 9.917979634930347e-07, "loss": 0.0392, "step": 54980 }, { "epoch": 0.5875313852235696, "grad_norm": 5.527963638305664, "learning_rate": 9.917949325782518e-07, "loss": 0.0398, "step": 54990 }, { "epoch": 0.5876382285378492, "grad_norm": 8.336455345153809, "learning_rate": 9.917919011081955e-07, "loss": 0.04, "step": 55000 }, { "epoch": 0.5877450718521289, "grad_norm": 14.600458145141602, "learning_rate": 9.917888690828688e-07, "loss": 0.051, "step": 55010 }, { "epoch": 0.5878519151664084, "grad_norm": 1.1405614614486694, "learning_rate": 9.917858365022756e-07, "loss": 0.0751, "step": 55020 }, { "epoch": 0.5879587584806881, "grad_norm": 11.956127166748047, "learning_rate": 9.91782803366419e-07, "loss": 0.1251, "step": 55030 }, { "epoch": 0.5880656017949677, "grad_norm": 3.47715425491333, "learning_rate": 9.917797696753023e-07, "loss": 0.0829, "step": 55040 }, { "epoch": 0.5881724451092473, "grad_norm": 1.2200175523757935, "learning_rate": 9.917767354289294e-07, "loss": 0.0486, "step": 55050 }, { "epoch": 0.588279288423527, "grad_norm": 6.460307598114014, "learning_rate": 9.917737006273034e-07, "loss": 0.1514, "step": 55060 }, { "epoch": 0.5883861317378065, "grad_norm": 4.0302886962890625, "learning_rate": 9.917706652704277e-07, "loss": 0.0368, "step": 55070 }, { "epoch": 0.5884929750520861, "grad_norm": 7.895974159240723, "learning_rate": 9.917676293583057e-07, "loss": 0.0981, "step": 55080 }, { "epoch": 0.5885998183663658, "grad_norm": 2.9051029682159424, "learning_rate": 9.91764592890941e-07, "loss": 0.0633, "step": 55090 }, { "epoch": 0.5887066616806453, "grad_norm": 0.23755653202533722, "learning_rate": 9.91761555868337e-07, "loss": 0.0836, "step": 55100 }, { "epoch": 0.5888135049949249, "grad_norm": 1.036483645439148, "learning_rate": 9.91758518290497e-07, "loss": 0.0221, "step": 55110 }, { "epoch": 0.5889203483092046, "grad_norm": 6.620837211608887, "learning_rate": 9.91755480157425e-07, "loss": 0.0985, "step": 55120 }, { "epoch": 0.5890271916234842, "grad_norm": 2.9110240936279297, "learning_rate": 9.917524414691232e-07, "loss": 0.1112, "step": 55130 }, { "epoch": 0.5891340349377637, "grad_norm": 10.170003890991211, "learning_rate": 9.917494022255962e-07, "loss": 0.0599, "step": 55140 }, { "epoch": 0.5892408782520434, "grad_norm": 2.7447140216827393, "learning_rate": 9.91746362426847e-07, "loss": 0.0331, "step": 55150 }, { "epoch": 0.589347721566323, "grad_norm": 5.957892894744873, "learning_rate": 9.917433220728791e-07, "loss": 0.0572, "step": 55160 }, { "epoch": 0.5894545648806025, "grad_norm": 0.09128842502832413, "learning_rate": 9.917402811636958e-07, "loss": 0.0985, "step": 55170 }, { "epoch": 0.5895614081948822, "grad_norm": 1.145108699798584, "learning_rate": 9.917372396993005e-07, "loss": 0.0732, "step": 55180 }, { "epoch": 0.5896682515091618, "grad_norm": 4.709391117095947, "learning_rate": 9.917341976796967e-07, "loss": 0.031, "step": 55190 }, { "epoch": 0.5897750948234414, "grad_norm": 5.067708492279053, "learning_rate": 9.91731155104888e-07, "loss": 0.0317, "step": 55200 }, { "epoch": 0.589881938137721, "grad_norm": 3.101736068725586, "learning_rate": 9.917281119748777e-07, "loss": 0.0389, "step": 55210 }, { "epoch": 0.5899887814520006, "grad_norm": 7.033797264099121, "learning_rate": 9.917250682896692e-07, "loss": 0.0718, "step": 55220 }, { "epoch": 0.5900956247662803, "grad_norm": 2.204451084136963, "learning_rate": 9.91722024049266e-07, "loss": 0.0754, "step": 55230 }, { "epoch": 0.5902024680805599, "grad_norm": 1.6558879613876343, "learning_rate": 9.917189792536719e-07, "loss": 0.0738, "step": 55240 }, { "epoch": 0.5903093113948394, "grad_norm": 9.853666305541992, "learning_rate": 9.917159339028895e-07, "loss": 0.048, "step": 55250 }, { "epoch": 0.5904161547091191, "grad_norm": 0.33390745520591736, "learning_rate": 9.917128879969229e-07, "loss": 0.0706, "step": 55260 }, { "epoch": 0.5905229980233987, "grad_norm": 5.73103141784668, "learning_rate": 9.91709841535775e-07, "loss": 0.0514, "step": 55270 }, { "epoch": 0.5906298413376783, "grad_norm": 2.2436017990112305, "learning_rate": 9.9170679451945e-07, "loss": 0.0555, "step": 55280 }, { "epoch": 0.5907366846519579, "grad_norm": 8.159823417663574, "learning_rate": 9.917037469479508e-07, "loss": 0.0442, "step": 55290 }, { "epoch": 0.5908435279662375, "grad_norm": 6.5039896965026855, "learning_rate": 9.917006988212808e-07, "loss": 0.0456, "step": 55300 }, { "epoch": 0.5909503712805171, "grad_norm": 3.8696675300598145, "learning_rate": 9.916976501394438e-07, "loss": 0.0345, "step": 55310 }, { "epoch": 0.5910572145947968, "grad_norm": 6.073395252227783, "learning_rate": 9.91694600902443e-07, "loss": 0.0557, "step": 55320 }, { "epoch": 0.5911640579090763, "grad_norm": 0.3401258885860443, "learning_rate": 9.916915511102817e-07, "loss": 0.0582, "step": 55330 }, { "epoch": 0.5912709012233559, "grad_norm": 0.9549832344055176, "learning_rate": 9.916885007629638e-07, "loss": 0.0611, "step": 55340 }, { "epoch": 0.5913777445376356, "grad_norm": 5.155816555023193, "learning_rate": 9.916854498604923e-07, "loss": 0.104, "step": 55350 }, { "epoch": 0.5914845878519152, "grad_norm": 7.464504718780518, "learning_rate": 9.916823984028707e-07, "loss": 0.0475, "step": 55360 }, { "epoch": 0.5915914311661947, "grad_norm": 11.165543556213379, "learning_rate": 9.91679346390103e-07, "loss": 0.1018, "step": 55370 }, { "epoch": 0.5916982744804744, "grad_norm": 5.416499614715576, "learning_rate": 9.916762938221918e-07, "loss": 0.0766, "step": 55380 }, { "epoch": 0.591805117794754, "grad_norm": 4.740756988525391, "learning_rate": 9.91673240699141e-07, "loss": 0.0785, "step": 55390 }, { "epoch": 0.5919119611090337, "grad_norm": 12.552521705627441, "learning_rate": 9.916701870209542e-07, "loss": 0.0537, "step": 55400 }, { "epoch": 0.5920188044233132, "grad_norm": 1.658237099647522, "learning_rate": 9.916671327876345e-07, "loss": 0.024, "step": 55410 }, { "epoch": 0.5921256477375928, "grad_norm": 11.314929962158203, "learning_rate": 9.916640779991857e-07, "loss": 0.0663, "step": 55420 }, { "epoch": 0.5922324910518725, "grad_norm": 2.369097948074341, "learning_rate": 9.916610226556107e-07, "loss": 0.0264, "step": 55430 }, { "epoch": 0.592339334366152, "grad_norm": 2.3364944458007812, "learning_rate": 9.916579667569137e-07, "loss": 0.1155, "step": 55440 }, { "epoch": 0.5924461776804316, "grad_norm": 13.337577819824219, "learning_rate": 9.916549103030975e-07, "loss": 0.0745, "step": 55450 }, { "epoch": 0.5925530209947113, "grad_norm": 6.012083053588867, "learning_rate": 9.916518532941658e-07, "loss": 0.1389, "step": 55460 }, { "epoch": 0.5926598643089909, "grad_norm": 1.243518590927124, "learning_rate": 9.916487957301221e-07, "loss": 0.0347, "step": 55470 }, { "epoch": 0.5927667076232704, "grad_norm": 2.7512543201446533, "learning_rate": 9.9164573761097e-07, "loss": 0.0812, "step": 55480 }, { "epoch": 0.5928735509375501, "grad_norm": 10.121052742004395, "learning_rate": 9.916426789367123e-07, "loss": 0.0893, "step": 55490 }, { "epoch": 0.5929803942518297, "grad_norm": 0.4508603513240814, "learning_rate": 9.916396197073532e-07, "loss": 0.0486, "step": 55500 }, { "epoch": 0.5930872375661093, "grad_norm": 2.6461663246154785, "learning_rate": 9.916365599228959e-07, "loss": 0.0894, "step": 55510 }, { "epoch": 0.5931940808803889, "grad_norm": 10.406205177307129, "learning_rate": 9.916334995833438e-07, "loss": 0.0808, "step": 55520 }, { "epoch": 0.5933009241946685, "grad_norm": 6.5468430519104, "learning_rate": 9.916304386887003e-07, "loss": 0.0852, "step": 55530 }, { "epoch": 0.5934077675089481, "grad_norm": 2.13484525680542, "learning_rate": 9.91627377238969e-07, "loss": 0.0404, "step": 55540 }, { "epoch": 0.5935146108232278, "grad_norm": 1.7912160158157349, "learning_rate": 9.916243152341531e-07, "loss": 0.0673, "step": 55550 }, { "epoch": 0.5936214541375073, "grad_norm": 11.30615520477295, "learning_rate": 9.916212526742564e-07, "loss": 0.0784, "step": 55560 }, { "epoch": 0.5937282974517869, "grad_norm": 15.016559600830078, "learning_rate": 9.91618189559282e-07, "loss": 0.0503, "step": 55570 }, { "epoch": 0.5938351407660666, "grad_norm": 0.1411653310060501, "learning_rate": 9.91615125889234e-07, "loss": 0.0681, "step": 55580 }, { "epoch": 0.5939419840803462, "grad_norm": 11.727179527282715, "learning_rate": 9.91612061664115e-07, "loss": 0.0662, "step": 55590 }, { "epoch": 0.5940488273946258, "grad_norm": 1.9891618490219116, "learning_rate": 9.91608996883929e-07, "loss": 0.0887, "step": 55600 }, { "epoch": 0.5941556707089054, "grad_norm": 1.7687228918075562, "learning_rate": 9.916059315486793e-07, "loss": 0.0413, "step": 55610 }, { "epoch": 0.594262514023185, "grad_norm": 11.18144702911377, "learning_rate": 9.916028656583698e-07, "loss": 0.1015, "step": 55620 }, { "epoch": 0.5943693573374647, "grad_norm": 11.089632987976074, "learning_rate": 9.915997992130031e-07, "loss": 0.0655, "step": 55630 }, { "epoch": 0.5944762006517442, "grad_norm": 1.694751501083374, "learning_rate": 9.915967322125834e-07, "loss": 0.0298, "step": 55640 }, { "epoch": 0.5945830439660238, "grad_norm": 4.830526351928711, "learning_rate": 9.915936646571136e-07, "loss": 0.0833, "step": 55650 }, { "epoch": 0.5946898872803035, "grad_norm": 2.3695390224456787, "learning_rate": 9.915905965465975e-07, "loss": 0.0651, "step": 55660 }, { "epoch": 0.594796730594583, "grad_norm": 5.891043186187744, "learning_rate": 9.915875278810387e-07, "loss": 0.0698, "step": 55670 }, { "epoch": 0.5949035739088626, "grad_norm": 11.869022369384766, "learning_rate": 9.915844586604404e-07, "loss": 0.0502, "step": 55680 }, { "epoch": 0.5950104172231423, "grad_norm": 7.667956352233887, "learning_rate": 9.915813888848062e-07, "loss": 0.0326, "step": 55690 }, { "epoch": 0.5951172605374219, "grad_norm": 5.216766357421875, "learning_rate": 9.915783185541394e-07, "loss": 0.0811, "step": 55700 }, { "epoch": 0.5952241038517014, "grad_norm": 0.4044879376888275, "learning_rate": 9.915752476684435e-07, "loss": 0.0475, "step": 55710 }, { "epoch": 0.5953309471659811, "grad_norm": 0.45595782995224, "learning_rate": 9.915721762277223e-07, "loss": 0.0579, "step": 55720 }, { "epoch": 0.5954377904802607, "grad_norm": 4.4327898025512695, "learning_rate": 9.91569104231979e-07, "loss": 0.0881, "step": 55730 }, { "epoch": 0.5955446337945403, "grad_norm": 12.145088195800781, "learning_rate": 9.915660316812168e-07, "loss": 0.0913, "step": 55740 }, { "epoch": 0.5956514771088199, "grad_norm": 0.7962266206741333, "learning_rate": 9.915629585754397e-07, "loss": 0.0432, "step": 55750 }, { "epoch": 0.5957583204230995, "grad_norm": 0.2740163803100586, "learning_rate": 9.915598849146509e-07, "loss": 0.0322, "step": 55760 }, { "epoch": 0.5958651637373792, "grad_norm": 0.5508605241775513, "learning_rate": 9.915568106988536e-07, "loss": 0.02, "step": 55770 }, { "epoch": 0.5959720070516588, "grad_norm": 3.5684425830841064, "learning_rate": 9.915537359280518e-07, "loss": 0.0509, "step": 55780 }, { "epoch": 0.5960788503659383, "grad_norm": 0.297091543674469, "learning_rate": 9.915506606022488e-07, "loss": 0.1255, "step": 55790 }, { "epoch": 0.596185693680218, "grad_norm": 2.03604793548584, "learning_rate": 9.91547584721448e-07, "loss": 0.0406, "step": 55800 }, { "epoch": 0.5962925369944976, "grad_norm": 0.0675896629691124, "learning_rate": 9.915445082856528e-07, "loss": 0.042, "step": 55810 }, { "epoch": 0.5963993803087771, "grad_norm": 6.545508861541748, "learning_rate": 9.915414312948666e-07, "loss": 0.0516, "step": 55820 }, { "epoch": 0.5965062236230568, "grad_norm": 9.087000846862793, "learning_rate": 9.915383537490933e-07, "loss": 0.051, "step": 55830 }, { "epoch": 0.5966130669373364, "grad_norm": 1.1567455530166626, "learning_rate": 9.915352756483359e-07, "loss": 0.0259, "step": 55840 }, { "epoch": 0.596719910251616, "grad_norm": 3.4537782669067383, "learning_rate": 9.915321969925982e-07, "loss": 0.0368, "step": 55850 }, { "epoch": 0.5968267535658957, "grad_norm": 5.762649059295654, "learning_rate": 9.915291177818836e-07, "loss": 0.0805, "step": 55860 }, { "epoch": 0.5969335968801752, "grad_norm": 1.3556100130081177, "learning_rate": 9.915260380161954e-07, "loss": 0.0451, "step": 55870 }, { "epoch": 0.5970404401944548, "grad_norm": 1.5998272895812988, "learning_rate": 9.915229576955372e-07, "loss": 0.0119, "step": 55880 }, { "epoch": 0.5971472835087345, "grad_norm": 11.324553489685059, "learning_rate": 9.915198768199125e-07, "loss": 0.0704, "step": 55890 }, { "epoch": 0.597254126823014, "grad_norm": 2.9927806854248047, "learning_rate": 9.91516795389325e-07, "loss": 0.0604, "step": 55900 }, { "epoch": 0.5973609701372936, "grad_norm": 15.68317985534668, "learning_rate": 9.915137134037775e-07, "loss": 0.0463, "step": 55910 }, { "epoch": 0.5974678134515733, "grad_norm": 3.912405014038086, "learning_rate": 9.915106308632741e-07, "loss": 0.0811, "step": 55920 }, { "epoch": 0.5975746567658529, "grad_norm": 3.54656982421875, "learning_rate": 9.915075477678183e-07, "loss": 0.0545, "step": 55930 }, { "epoch": 0.5976815000801324, "grad_norm": 5.664978981018066, "learning_rate": 9.915044641174132e-07, "loss": 0.0233, "step": 55940 }, { "epoch": 0.5977883433944121, "grad_norm": 3.7358760833740234, "learning_rate": 9.915013799120624e-07, "loss": 0.1232, "step": 55950 }, { "epoch": 0.5978951867086917, "grad_norm": 9.431941032409668, "learning_rate": 9.914982951517697e-07, "loss": 0.0539, "step": 55960 }, { "epoch": 0.5980020300229714, "grad_norm": 0.801758885383606, "learning_rate": 9.91495209836538e-07, "loss": 0.0863, "step": 55970 }, { "epoch": 0.5981088733372509, "grad_norm": 18.825063705444336, "learning_rate": 9.914921239663715e-07, "loss": 0.1097, "step": 55980 }, { "epoch": 0.5982157166515305, "grad_norm": 1.4749119281768799, "learning_rate": 9.91489037541273e-07, "loss": 0.1345, "step": 55990 }, { "epoch": 0.5983225599658102, "grad_norm": 12.533096313476562, "learning_rate": 9.914859505612464e-07, "loss": 0.0398, "step": 56000 }, { "epoch": 0.5984294032800898, "grad_norm": 5.249363899230957, "learning_rate": 9.91482863026295e-07, "loss": 0.0609, "step": 56010 }, { "epoch": 0.5985362465943693, "grad_norm": 5.426599025726318, "learning_rate": 9.914797749364223e-07, "loss": 0.0628, "step": 56020 }, { "epoch": 0.598643089908649, "grad_norm": 16.415185928344727, "learning_rate": 9.91476686291632e-07, "loss": 0.0765, "step": 56030 }, { "epoch": 0.5987499332229286, "grad_norm": 10.118657112121582, "learning_rate": 9.914735970919274e-07, "loss": 0.0982, "step": 56040 }, { "epoch": 0.5988567765372081, "grad_norm": 1.4045130014419556, "learning_rate": 9.91470507337312e-07, "loss": 0.0443, "step": 56050 }, { "epoch": 0.5989636198514878, "grad_norm": 0.7457356452941895, "learning_rate": 9.914674170277893e-07, "loss": 0.0111, "step": 56060 }, { "epoch": 0.5990704631657674, "grad_norm": 6.489363670349121, "learning_rate": 9.914643261633625e-07, "loss": 0.0257, "step": 56070 }, { "epoch": 0.599177306480047, "grad_norm": 1.985495686531067, "learning_rate": 9.914612347440358e-07, "loss": 0.0639, "step": 56080 }, { "epoch": 0.5992841497943266, "grad_norm": 3.0887584686279297, "learning_rate": 9.914581427698123e-07, "loss": 0.0509, "step": 56090 }, { "epoch": 0.5993909931086062, "grad_norm": 3.6900417804718018, "learning_rate": 9.914550502406952e-07, "loss": 0.078, "step": 56100 }, { "epoch": 0.5994978364228858, "grad_norm": 9.300666809082031, "learning_rate": 9.914519571566884e-07, "loss": 0.0311, "step": 56110 }, { "epoch": 0.5996046797371655, "grad_norm": 9.18491268157959, "learning_rate": 9.914488635177951e-07, "loss": 0.0514, "step": 56120 }, { "epoch": 0.599711523051445, "grad_norm": 2.3708279132843018, "learning_rate": 9.91445769324019e-07, "loss": 0.0968, "step": 56130 }, { "epoch": 0.5998183663657247, "grad_norm": 12.574299812316895, "learning_rate": 9.914426745753636e-07, "loss": 0.1202, "step": 56140 }, { "epoch": 0.5999252096800043, "grad_norm": 8.013497352600098, "learning_rate": 9.914395792718324e-07, "loss": 0.0657, "step": 56150 }, { "epoch": 0.6000320529942839, "grad_norm": 2.3700106143951416, "learning_rate": 9.914364834134286e-07, "loss": 0.0376, "step": 56160 }, { "epoch": 0.6001388963085635, "grad_norm": 5.296321868896484, "learning_rate": 9.91433387000156e-07, "loss": 0.0801, "step": 56170 }, { "epoch": 0.6002457396228431, "grad_norm": 3.891500949859619, "learning_rate": 9.914302900320182e-07, "loss": 0.0437, "step": 56180 }, { "epoch": 0.6003525829371227, "grad_norm": 12.172918319702148, "learning_rate": 9.914271925090184e-07, "loss": 0.0479, "step": 56190 }, { "epoch": 0.6004594262514024, "grad_norm": 3.2094085216522217, "learning_rate": 9.914240944311602e-07, "loss": 0.0259, "step": 56200 }, { "epoch": 0.6005662695656819, "grad_norm": 8.574021339416504, "learning_rate": 9.914209957984471e-07, "loss": 0.0661, "step": 56210 }, { "epoch": 0.6006731128799615, "grad_norm": 5.377097129821777, "learning_rate": 9.914178966108826e-07, "loss": 0.058, "step": 56220 }, { "epoch": 0.6007799561942412, "grad_norm": 0.23046469688415527, "learning_rate": 9.914147968684701e-07, "loss": 0.0899, "step": 56230 }, { "epoch": 0.6008867995085208, "grad_norm": 2.6610560417175293, "learning_rate": 9.914116965712134e-07, "loss": 0.111, "step": 56240 }, { "epoch": 0.6009936428228003, "grad_norm": 3.5697383880615234, "learning_rate": 9.914085957191158e-07, "loss": 0.037, "step": 56250 }, { "epoch": 0.60110048613708, "grad_norm": 4.433481693267822, "learning_rate": 9.914054943121805e-07, "loss": 0.0765, "step": 56260 }, { "epoch": 0.6012073294513596, "grad_norm": 0.3985430598258972, "learning_rate": 9.914023923504118e-07, "loss": 0.0299, "step": 56270 }, { "epoch": 0.6013141727656391, "grad_norm": 1.5606701374053955, "learning_rate": 9.913992898338125e-07, "loss": 0.0754, "step": 56280 }, { "epoch": 0.6014210160799188, "grad_norm": 0.19307087361812592, "learning_rate": 9.91396186762386e-07, "loss": 0.0512, "step": 56290 }, { "epoch": 0.6015278593941984, "grad_norm": 7.972893238067627, "learning_rate": 9.913930831361364e-07, "loss": 0.0354, "step": 56300 }, { "epoch": 0.601634702708478, "grad_norm": 13.703584671020508, "learning_rate": 9.913899789550671e-07, "loss": 0.108, "step": 56310 }, { "epoch": 0.6017415460227576, "grad_norm": 0.9678082466125488, "learning_rate": 9.913868742191812e-07, "loss": 0.0592, "step": 56320 }, { "epoch": 0.6018483893370372, "grad_norm": 10.263482093811035, "learning_rate": 9.913837689284823e-07, "loss": 0.0387, "step": 56330 }, { "epoch": 0.6019552326513169, "grad_norm": 2.3913815021514893, "learning_rate": 9.913806630829743e-07, "loss": 0.0783, "step": 56340 }, { "epoch": 0.6020620759655965, "grad_norm": 3.3467135429382324, "learning_rate": 9.913775566826603e-07, "loss": 0.0521, "step": 56350 }, { "epoch": 0.602168919279876, "grad_norm": 2.233083486557007, "learning_rate": 9.91374449727544e-07, "loss": 0.0243, "step": 56360 }, { "epoch": 0.6022757625941557, "grad_norm": 10.484015464782715, "learning_rate": 9.913713422176288e-07, "loss": 0.0753, "step": 56370 }, { "epoch": 0.6023826059084353, "grad_norm": 2.5605924129486084, "learning_rate": 9.913682341529184e-07, "loss": 0.0592, "step": 56380 }, { "epoch": 0.6024894492227149, "grad_norm": 0.057802148163318634, "learning_rate": 9.913651255334162e-07, "loss": 0.0624, "step": 56390 }, { "epoch": 0.6025962925369945, "grad_norm": 0.2643194794654846, "learning_rate": 9.913620163591255e-07, "loss": 0.0345, "step": 56400 }, { "epoch": 0.6027031358512741, "grad_norm": 0.2843420207500458, "learning_rate": 9.913589066300502e-07, "loss": 0.0831, "step": 56410 }, { "epoch": 0.6028099791655537, "grad_norm": 1.1654984951019287, "learning_rate": 9.913557963461933e-07, "loss": 0.0651, "step": 56420 }, { "epoch": 0.6029168224798334, "grad_norm": 6.011092662811279, "learning_rate": 9.913526855075588e-07, "loss": 0.0917, "step": 56430 }, { "epoch": 0.6030236657941129, "grad_norm": 9.682816505432129, "learning_rate": 9.913495741141502e-07, "loss": 0.0264, "step": 56440 }, { "epoch": 0.6031305091083925, "grad_norm": 2.7437751293182373, "learning_rate": 9.913464621659705e-07, "loss": 0.0393, "step": 56450 }, { "epoch": 0.6032373524226722, "grad_norm": 7.562823295593262, "learning_rate": 9.913433496630239e-07, "loss": 0.0745, "step": 56460 }, { "epoch": 0.6033441957369517, "grad_norm": 0.48884299397468567, "learning_rate": 9.913402366053135e-07, "loss": 0.0404, "step": 56470 }, { "epoch": 0.6034510390512313, "grad_norm": 3.7383675575256348, "learning_rate": 9.913371229928428e-07, "loss": 0.0418, "step": 56480 }, { "epoch": 0.603557882365511, "grad_norm": 3.318807363510132, "learning_rate": 9.913340088256156e-07, "loss": 0.0667, "step": 56490 }, { "epoch": 0.6036647256797906, "grad_norm": 4.710349082946777, "learning_rate": 9.913308941036349e-07, "loss": 0.0469, "step": 56500 }, { "epoch": 0.6037715689940703, "grad_norm": 4.4005279541015625, "learning_rate": 9.913277788269048e-07, "loss": 0.0709, "step": 56510 }, { "epoch": 0.6038784123083498, "grad_norm": 1.3651502132415771, "learning_rate": 9.913246629954283e-07, "loss": 0.0439, "step": 56520 }, { "epoch": 0.6039852556226294, "grad_norm": 3.229794979095459, "learning_rate": 9.913215466092093e-07, "loss": 0.0284, "step": 56530 }, { "epoch": 0.6040920989369091, "grad_norm": 1.6906818151474, "learning_rate": 9.913184296682514e-07, "loss": 0.035, "step": 56540 }, { "epoch": 0.6041989422511886, "grad_norm": 3.8938546180725098, "learning_rate": 9.913153121725578e-07, "loss": 0.0582, "step": 56550 }, { "epoch": 0.6043057855654682, "grad_norm": 13.985146522521973, "learning_rate": 9.91312194122132e-07, "loss": 0.0604, "step": 56560 }, { "epoch": 0.6044126288797479, "grad_norm": 6.375821590423584, "learning_rate": 9.913090755169776e-07, "loss": 0.075, "step": 56570 }, { "epoch": 0.6045194721940275, "grad_norm": 3.0121653079986572, "learning_rate": 9.913059563570982e-07, "loss": 0.0789, "step": 56580 }, { "epoch": 0.604626315508307, "grad_norm": 4.399259567260742, "learning_rate": 9.913028366424974e-07, "loss": 0.0381, "step": 56590 }, { "epoch": 0.6047331588225867, "grad_norm": 1.435147762298584, "learning_rate": 9.912997163731788e-07, "loss": 0.0364, "step": 56600 }, { "epoch": 0.6048400021368663, "grad_norm": 4.282299041748047, "learning_rate": 9.912965955491454e-07, "loss": 0.0186, "step": 56610 }, { "epoch": 0.6049468454511459, "grad_norm": 0.045953843742609024, "learning_rate": 9.912934741704012e-07, "loss": 0.1039, "step": 56620 }, { "epoch": 0.6050536887654255, "grad_norm": 6.952363014221191, "learning_rate": 9.912903522369495e-07, "loss": 0.0971, "step": 56630 }, { "epoch": 0.6051605320797051, "grad_norm": 0.676834225654602, "learning_rate": 9.912872297487942e-07, "loss": 0.0906, "step": 56640 }, { "epoch": 0.6052673753939847, "grad_norm": 5.324426174163818, "learning_rate": 9.91284106705938e-07, "loss": 0.0233, "step": 56650 }, { "epoch": 0.6053742187082644, "grad_norm": 3.121919870376587, "learning_rate": 9.912809831083856e-07, "loss": 0.0512, "step": 56660 }, { "epoch": 0.6054810620225439, "grad_norm": 2.9085307121276855, "learning_rate": 9.912778589561396e-07, "loss": 0.0746, "step": 56670 }, { "epoch": 0.6055879053368235, "grad_norm": 0.8765286207199097, "learning_rate": 9.912747342492036e-07, "loss": 0.0924, "step": 56680 }, { "epoch": 0.6056947486511032, "grad_norm": 1.973892092704773, "learning_rate": 9.912716089875816e-07, "loss": 0.085, "step": 56690 }, { "epoch": 0.6058015919653827, "grad_norm": 0.7196788787841797, "learning_rate": 9.91268483171277e-07, "loss": 0.0568, "step": 56700 }, { "epoch": 0.6059084352796624, "grad_norm": 0.2384418547153473, "learning_rate": 9.91265356800293e-07, "loss": 0.0381, "step": 56710 }, { "epoch": 0.606015278593942, "grad_norm": 4.761810302734375, "learning_rate": 9.912622298746333e-07, "loss": 0.0895, "step": 56720 }, { "epoch": 0.6061221219082216, "grad_norm": 6.4487481117248535, "learning_rate": 9.912591023943015e-07, "loss": 0.0269, "step": 56730 }, { "epoch": 0.6062289652225012, "grad_norm": 4.817795753479004, "learning_rate": 9.912559743593011e-07, "loss": 0.0652, "step": 56740 }, { "epoch": 0.6063358085367808, "grad_norm": 4.208765029907227, "learning_rate": 9.912528457696358e-07, "loss": 0.062, "step": 56750 }, { "epoch": 0.6064426518510604, "grad_norm": 1.982797384262085, "learning_rate": 9.912497166253088e-07, "loss": 0.0739, "step": 56760 }, { "epoch": 0.6065494951653401, "grad_norm": 7.502370834350586, "learning_rate": 9.912465869263238e-07, "loss": 0.0779, "step": 56770 }, { "epoch": 0.6066563384796196, "grad_norm": 5.089793682098389, "learning_rate": 9.912434566726844e-07, "loss": 0.1046, "step": 56780 }, { "epoch": 0.6067631817938992, "grad_norm": 2.391251802444458, "learning_rate": 9.91240325864394e-07, "loss": 0.1143, "step": 56790 }, { "epoch": 0.6068700251081789, "grad_norm": 2.730790138244629, "learning_rate": 9.912371945014562e-07, "loss": 0.0467, "step": 56800 }, { "epoch": 0.6069768684224585, "grad_norm": 0.6022157669067383, "learning_rate": 9.912340625838745e-07, "loss": 0.0796, "step": 56810 }, { "epoch": 0.607083711736738, "grad_norm": 0.18151065707206726, "learning_rate": 9.912309301116526e-07, "loss": 0.0456, "step": 56820 }, { "epoch": 0.6071905550510177, "grad_norm": 1.1167917251586914, "learning_rate": 9.912277970847938e-07, "loss": 0.0224, "step": 56830 }, { "epoch": 0.6072973983652973, "grad_norm": 4.0166707038879395, "learning_rate": 9.912246635033016e-07, "loss": 0.0428, "step": 56840 }, { "epoch": 0.6074042416795769, "grad_norm": 5.688817024230957, "learning_rate": 9.912215293671799e-07, "loss": 0.0238, "step": 56850 }, { "epoch": 0.6075110849938565, "grad_norm": Infinity, "learning_rate": 9.91218394676432e-07, "loss": 0.0526, "step": 56860 }, { "epoch": 0.6076179283081361, "grad_norm": 6.838900089263916, "learning_rate": 9.912152594310615e-07, "loss": 0.0945, "step": 56870 }, { "epoch": 0.6077247716224158, "grad_norm": 12.427655220031738, "learning_rate": 9.912121236310717e-07, "loss": 0.0414, "step": 56880 }, { "epoch": 0.6078316149366954, "grad_norm": 9.865021705627441, "learning_rate": 9.912089872764665e-07, "loss": 0.0645, "step": 56890 }, { "epoch": 0.6079384582509749, "grad_norm": 8.080875396728516, "learning_rate": 9.912058503672491e-07, "loss": 0.0707, "step": 56900 }, { "epoch": 0.6080453015652546, "grad_norm": 1.4097615480422974, "learning_rate": 9.912027129034236e-07, "loss": 0.0366, "step": 56910 }, { "epoch": 0.6081521448795342, "grad_norm": 0.03939443826675415, "learning_rate": 9.911995748849928e-07, "loss": 0.0827, "step": 56920 }, { "epoch": 0.6082589881938137, "grad_norm": 7.224018573760986, "learning_rate": 9.911964363119607e-07, "loss": 0.0355, "step": 56930 }, { "epoch": 0.6083658315080934, "grad_norm": 4.599344253540039, "learning_rate": 9.911932971843307e-07, "loss": 0.0739, "step": 56940 }, { "epoch": 0.608472674822373, "grad_norm": 5.264162540435791, "learning_rate": 9.911901575021066e-07, "loss": 0.1259, "step": 56950 }, { "epoch": 0.6085795181366526, "grad_norm": 0.08251900970935822, "learning_rate": 9.911870172652915e-07, "loss": 0.0514, "step": 56960 }, { "epoch": 0.6086863614509322, "grad_norm": 1.9367868900299072, "learning_rate": 9.911838764738894e-07, "loss": 0.0489, "step": 56970 }, { "epoch": 0.6087932047652118, "grad_norm": 0.7492705583572388, "learning_rate": 9.911807351279035e-07, "loss": 0.0347, "step": 56980 }, { "epoch": 0.6089000480794914, "grad_norm": 7.771596908569336, "learning_rate": 9.911775932273374e-07, "loss": 0.0878, "step": 56990 }, { "epoch": 0.6090068913937711, "grad_norm": 14.955669403076172, "learning_rate": 9.911744507721948e-07, "loss": 0.1209, "step": 57000 }, { "epoch": 0.6091137347080506, "grad_norm": 6.9483442306518555, "learning_rate": 9.911713077624793e-07, "loss": 0.0316, "step": 57010 }, { "epoch": 0.6092205780223302, "grad_norm": 8.565505981445312, "learning_rate": 9.911681641981943e-07, "loss": 0.0524, "step": 57020 }, { "epoch": 0.6093274213366099, "grad_norm": 3.250321865081787, "learning_rate": 9.911650200793432e-07, "loss": 0.0783, "step": 57030 }, { "epoch": 0.6094342646508895, "grad_norm": 0.2900017499923706, "learning_rate": 9.9116187540593e-07, "loss": 0.0565, "step": 57040 }, { "epoch": 0.609541107965169, "grad_norm": 2.3122875690460205, "learning_rate": 9.911587301779575e-07, "loss": 0.0987, "step": 57050 }, { "epoch": 0.6096479512794487, "grad_norm": 6.326930522918701, "learning_rate": 9.911555843954303e-07, "loss": 0.0448, "step": 57060 }, { "epoch": 0.6097547945937283, "grad_norm": 1.4013605117797852, "learning_rate": 9.911524380583508e-07, "loss": 0.0486, "step": 57070 }, { "epoch": 0.609861637908008, "grad_norm": 7.4358649253845215, "learning_rate": 9.911492911667237e-07, "loss": 0.0293, "step": 57080 }, { "epoch": 0.6099684812222875, "grad_norm": 10.532360076904297, "learning_rate": 9.911461437205516e-07, "loss": 0.059, "step": 57090 }, { "epoch": 0.6100753245365671, "grad_norm": 0.1821107566356659, "learning_rate": 9.911429957198386e-07, "loss": 0.0618, "step": 57100 }, { "epoch": 0.6101821678508468, "grad_norm": 6.698869228363037, "learning_rate": 9.91139847164588e-07, "loss": 0.1184, "step": 57110 }, { "epoch": 0.6102890111651263, "grad_norm": 0.5603436827659607, "learning_rate": 9.911366980548035e-07, "loss": 0.036, "step": 57120 }, { "epoch": 0.6103958544794059, "grad_norm": 5.687689304351807, "learning_rate": 9.911335483904886e-07, "loss": 0.053, "step": 57130 }, { "epoch": 0.6105026977936856, "grad_norm": 2.193126678466797, "learning_rate": 9.911303981716469e-07, "loss": 0.0359, "step": 57140 }, { "epoch": 0.6106095411079652, "grad_norm": 0.03144070506095886, "learning_rate": 9.911272473982816e-07, "loss": 0.0532, "step": 57150 }, { "epoch": 0.6107163844222447, "grad_norm": 2.5890748500823975, "learning_rate": 9.91124096070397e-07, "loss": 0.039, "step": 57160 }, { "epoch": 0.6108232277365244, "grad_norm": 29.62249183654785, "learning_rate": 9.911209441879959e-07, "loss": 0.0934, "step": 57170 }, { "epoch": 0.610930071050804, "grad_norm": 2.9517245292663574, "learning_rate": 9.911177917510825e-07, "loss": 0.0351, "step": 57180 }, { "epoch": 0.6110369143650836, "grad_norm": 5.491103649139404, "learning_rate": 9.911146387596599e-07, "loss": 0.1104, "step": 57190 }, { "epoch": 0.6111437576793632, "grad_norm": 0.05348893254995346, "learning_rate": 9.911114852137316e-07, "loss": 0.1319, "step": 57200 }, { "epoch": 0.6112506009936428, "grad_norm": 3.4838850498199463, "learning_rate": 9.911083311133016e-07, "loss": 0.0701, "step": 57210 }, { "epoch": 0.6113574443079224, "grad_norm": 10.172528266906738, "learning_rate": 9.911051764583734e-07, "loss": 0.0839, "step": 57220 }, { "epoch": 0.6114642876222021, "grad_norm": 0.12084268778562546, "learning_rate": 9.911020212489501e-07, "loss": 0.1403, "step": 57230 }, { "epoch": 0.6115711309364816, "grad_norm": 0.7112078666687012, "learning_rate": 9.910988654850356e-07, "loss": 0.0407, "step": 57240 }, { "epoch": 0.6116779742507613, "grad_norm": 7.493352890014648, "learning_rate": 9.910957091666335e-07, "loss": 0.0857, "step": 57250 }, { "epoch": 0.6117848175650409, "grad_norm": 1.0865193605422974, "learning_rate": 9.91092552293747e-07, "loss": 0.0265, "step": 57260 }, { "epoch": 0.6118916608793205, "grad_norm": 7.1682820320129395, "learning_rate": 9.910893948663802e-07, "loss": 0.0593, "step": 57270 }, { "epoch": 0.6119985041936001, "grad_norm": 3.7897002696990967, "learning_rate": 9.910862368845366e-07, "loss": 0.0464, "step": 57280 }, { "epoch": 0.6121053475078797, "grad_norm": 4.207104682922363, "learning_rate": 9.910830783482192e-07, "loss": 0.0372, "step": 57290 }, { "epoch": 0.6122121908221593, "grad_norm": 3.4481801986694336, "learning_rate": 9.910799192574322e-07, "loss": 0.0764, "step": 57300 }, { "epoch": 0.612319034136439, "grad_norm": 1.0558093786239624, "learning_rate": 9.910767596121787e-07, "loss": 0.0296, "step": 57310 }, { "epoch": 0.6124258774507185, "grad_norm": 6.90328311920166, "learning_rate": 9.910735994124627e-07, "loss": 0.038, "step": 57320 }, { "epoch": 0.6125327207649981, "grad_norm": 1.9763439893722534, "learning_rate": 9.910704386582874e-07, "loss": 0.0091, "step": 57330 }, { "epoch": 0.6126395640792778, "grad_norm": 11.76220417022705, "learning_rate": 9.910672773496564e-07, "loss": 0.1033, "step": 57340 }, { "epoch": 0.6127464073935573, "grad_norm": 0.5974443554878235, "learning_rate": 9.910641154865735e-07, "loss": 0.0374, "step": 57350 }, { "epoch": 0.6128532507078369, "grad_norm": 8.399185180664062, "learning_rate": 9.910609530690421e-07, "loss": 0.0553, "step": 57360 }, { "epoch": 0.6129600940221166, "grad_norm": 13.68364429473877, "learning_rate": 9.91057790097066e-07, "loss": 0.0675, "step": 57370 }, { "epoch": 0.6130669373363962, "grad_norm": 0.2277234047651291, "learning_rate": 9.910546265706485e-07, "loss": 0.0469, "step": 57380 }, { "epoch": 0.6131737806506757, "grad_norm": 2.9008376598358154, "learning_rate": 9.910514624897932e-07, "loss": 0.0782, "step": 57390 }, { "epoch": 0.6132806239649554, "grad_norm": 5.2220048904418945, "learning_rate": 9.910482978545039e-07, "loss": 0.057, "step": 57400 }, { "epoch": 0.613387467279235, "grad_norm": 8.206957817077637, "learning_rate": 9.910451326647838e-07, "loss": 0.0351, "step": 57410 }, { "epoch": 0.6134943105935146, "grad_norm": 3.142976760864258, "learning_rate": 9.910419669206366e-07, "loss": 0.125, "step": 57420 }, { "epoch": 0.6136011539077942, "grad_norm": 13.42587661743164, "learning_rate": 9.910388006220663e-07, "loss": 0.0562, "step": 57430 }, { "epoch": 0.6137079972220738, "grad_norm": 6.6211771965026855, "learning_rate": 9.91035633769076e-07, "loss": 0.0406, "step": 57440 }, { "epoch": 0.6138148405363535, "grad_norm": 0.7645140886306763, "learning_rate": 9.910324663616695e-07, "loss": 0.0862, "step": 57450 }, { "epoch": 0.6139216838506331, "grad_norm": 0.6627894043922424, "learning_rate": 9.9102929839985e-07, "loss": 0.0904, "step": 57460 }, { "epoch": 0.6140285271649126, "grad_norm": 3.0497148036956787, "learning_rate": 9.910261298836215e-07, "loss": 0.0391, "step": 57470 }, { "epoch": 0.6141353704791923, "grad_norm": 1.5370830297470093, "learning_rate": 9.910229608129875e-07, "loss": 0.0453, "step": 57480 }, { "epoch": 0.6142422137934719, "grad_norm": 4.296352386474609, "learning_rate": 9.910197911879513e-07, "loss": 0.0619, "step": 57490 }, { "epoch": 0.6143490571077515, "grad_norm": 3.852898597717285, "learning_rate": 9.91016621008517e-07, "loss": 0.0731, "step": 57500 }, { "epoch": 0.6144559004220311, "grad_norm": 6.109777927398682, "learning_rate": 9.91013450274688e-07, "loss": 0.0813, "step": 57510 }, { "epoch": 0.6145627437363107, "grad_norm": 12.634313583374023, "learning_rate": 9.910102789864674e-07, "loss": 0.0421, "step": 57520 }, { "epoch": 0.6146695870505903, "grad_norm": 2.847212553024292, "learning_rate": 9.910071071438592e-07, "loss": 0.0889, "step": 57530 }, { "epoch": 0.61477643036487, "grad_norm": 2.669275999069214, "learning_rate": 9.910039347468669e-07, "loss": 0.0596, "step": 57540 }, { "epoch": 0.6148832736791495, "grad_norm": 6.158924579620361, "learning_rate": 9.910007617954942e-07, "loss": 0.0391, "step": 57550 }, { "epoch": 0.6149901169934291, "grad_norm": 0.23180389404296875, "learning_rate": 9.909975882897446e-07, "loss": 0.0651, "step": 57560 }, { "epoch": 0.6150969603077088, "grad_norm": 0.40066930651664734, "learning_rate": 9.909944142296215e-07, "loss": 0.0828, "step": 57570 }, { "epoch": 0.6152038036219883, "grad_norm": 1.895707368850708, "learning_rate": 9.909912396151287e-07, "loss": 0.0376, "step": 57580 }, { "epoch": 0.6153106469362679, "grad_norm": 1.0663354396820068, "learning_rate": 9.9098806444627e-07, "loss": 0.0277, "step": 57590 }, { "epoch": 0.6154174902505476, "grad_norm": 8.212186813354492, "learning_rate": 9.909848887230484e-07, "loss": 0.0507, "step": 57600 }, { "epoch": 0.6155243335648272, "grad_norm": 0.24460197985172272, "learning_rate": 9.90981712445468e-07, "loss": 0.0597, "step": 57610 }, { "epoch": 0.6156311768791068, "grad_norm": 0.9376721978187561, "learning_rate": 9.90978535613532e-07, "loss": 0.0757, "step": 57620 }, { "epoch": 0.6157380201933864, "grad_norm": 2.73901104927063, "learning_rate": 9.909753582272444e-07, "loss": 0.0304, "step": 57630 }, { "epoch": 0.615844863507666, "grad_norm": 7.678127288818359, "learning_rate": 9.909721802866084e-07, "loss": 0.1386, "step": 57640 }, { "epoch": 0.6159517068219457, "grad_norm": 5.9845476150512695, "learning_rate": 9.909690017916277e-07, "loss": 0.0422, "step": 57650 }, { "epoch": 0.6160585501362252, "grad_norm": 12.398871421813965, "learning_rate": 9.90965822742306e-07, "loss": 0.0279, "step": 57660 }, { "epoch": 0.6161653934505048, "grad_norm": 7.346787452697754, "learning_rate": 9.90962643138647e-07, "loss": 0.0673, "step": 57670 }, { "epoch": 0.6162722367647845, "grad_norm": 2.366978406906128, "learning_rate": 9.909594629806541e-07, "loss": 0.0214, "step": 57680 }, { "epoch": 0.6163790800790641, "grad_norm": 0.8525202870368958, "learning_rate": 9.909562822683309e-07, "loss": 0.0824, "step": 57690 }, { "epoch": 0.6164859233933436, "grad_norm": 8.408951759338379, "learning_rate": 9.909531010016807e-07, "loss": 0.0702, "step": 57700 }, { "epoch": 0.6165927667076233, "grad_norm": 3.978572368621826, "learning_rate": 9.909499191807077e-07, "loss": 0.058, "step": 57710 }, { "epoch": 0.6166996100219029, "grad_norm": 5.020391941070557, "learning_rate": 9.90946736805415e-07, "loss": 0.0596, "step": 57720 }, { "epoch": 0.6168064533361824, "grad_norm": 16.209749221801758, "learning_rate": 9.909435538758067e-07, "loss": 0.0995, "step": 57730 }, { "epoch": 0.6169132966504621, "grad_norm": 1.9928497076034546, "learning_rate": 9.909403703918857e-07, "loss": 0.0528, "step": 57740 }, { "epoch": 0.6170201399647417, "grad_norm": 1.8270318508148193, "learning_rate": 9.909371863536563e-07, "loss": 0.0601, "step": 57750 }, { "epoch": 0.6171269832790213, "grad_norm": 5.0719218254089355, "learning_rate": 9.909340017611215e-07, "loss": 0.086, "step": 57760 }, { "epoch": 0.617233826593301, "grad_norm": 3.152817964553833, "learning_rate": 9.909308166142851e-07, "loss": 0.0304, "step": 57770 }, { "epoch": 0.6173406699075805, "grad_norm": 2.021934747695923, "learning_rate": 9.909276309131508e-07, "loss": 0.027, "step": 57780 }, { "epoch": 0.6174475132218601, "grad_norm": 2.1204795837402344, "learning_rate": 9.909244446577224e-07, "loss": 0.0629, "step": 57790 }, { "epoch": 0.6175543565361398, "grad_norm": 12.940731048583984, "learning_rate": 9.90921257848003e-07, "loss": 0.0702, "step": 57800 }, { "epoch": 0.6176611998504193, "grad_norm": 6.538764953613281, "learning_rate": 9.909180704839968e-07, "loss": 0.0579, "step": 57810 }, { "epoch": 0.617768043164699, "grad_norm": 3.01916241645813, "learning_rate": 9.909148825657067e-07, "loss": 0.0535, "step": 57820 }, { "epoch": 0.6178748864789786, "grad_norm": 14.49029541015625, "learning_rate": 9.909116940931368e-07, "loss": 0.0502, "step": 57830 }, { "epoch": 0.6179817297932582, "grad_norm": 0.2597980201244354, "learning_rate": 9.909085050662904e-07, "loss": 0.0544, "step": 57840 }, { "epoch": 0.6180885731075378, "grad_norm": 1.0619844198226929, "learning_rate": 9.909053154851714e-07, "loss": 0.0263, "step": 57850 }, { "epoch": 0.6181954164218174, "grad_norm": 0.4123646020889282, "learning_rate": 9.909021253497832e-07, "loss": 0.0636, "step": 57860 }, { "epoch": 0.618302259736097, "grad_norm": 0.6522839069366455, "learning_rate": 9.908989346601295e-07, "loss": 0.0403, "step": 57870 }, { "epoch": 0.6184091030503767, "grad_norm": 2.2685952186584473, "learning_rate": 9.908957434162138e-07, "loss": 0.0405, "step": 57880 }, { "epoch": 0.6185159463646562, "grad_norm": 2.7662601470947266, "learning_rate": 9.908925516180396e-07, "loss": 0.0911, "step": 57890 }, { "epoch": 0.6186227896789358, "grad_norm": 0.3632086515426636, "learning_rate": 9.908893592656108e-07, "loss": 0.0816, "step": 57900 }, { "epoch": 0.6187296329932155, "grad_norm": 4.534092426300049, "learning_rate": 9.90886166358931e-07, "loss": 0.0547, "step": 57910 }, { "epoch": 0.618836476307495, "grad_norm": 5.281960964202881, "learning_rate": 9.908829728980035e-07, "loss": 0.0405, "step": 57920 }, { "epoch": 0.6189433196217746, "grad_norm": 2.3171491622924805, "learning_rate": 9.90879778882832e-07, "loss": 0.1054, "step": 57930 }, { "epoch": 0.6190501629360543, "grad_norm": 4.910862922668457, "learning_rate": 9.908765843134203e-07, "loss": 0.045, "step": 57940 }, { "epoch": 0.6191570062503339, "grad_norm": 1.9004830121994019, "learning_rate": 9.90873389189772e-07, "loss": 0.0333, "step": 57950 }, { "epoch": 0.6192638495646134, "grad_norm": 10.519145011901855, "learning_rate": 9.908701935118905e-07, "loss": 0.0843, "step": 57960 }, { "epoch": 0.6193706928788931, "grad_norm": 3.3472354412078857, "learning_rate": 9.908669972797794e-07, "loss": 0.0488, "step": 57970 }, { "epoch": 0.6194775361931727, "grad_norm": 7.0407795906066895, "learning_rate": 9.908638004934425e-07, "loss": 0.0289, "step": 57980 }, { "epoch": 0.6195843795074524, "grad_norm": 0.3314145803451538, "learning_rate": 9.908606031528833e-07, "loss": 0.0436, "step": 57990 }, { "epoch": 0.619691222821732, "grad_norm": 1.7781320810317993, "learning_rate": 9.908574052581056e-07, "loss": 0.0301, "step": 58000 }, { "epoch": 0.6197980661360115, "grad_norm": 4.751467227935791, "learning_rate": 9.908542068091126e-07, "loss": 0.031, "step": 58010 }, { "epoch": 0.6199049094502912, "grad_norm": 3.472429037094116, "learning_rate": 9.908510078059082e-07, "loss": 0.0423, "step": 58020 }, { "epoch": 0.6200117527645708, "grad_norm": 5.848938465118408, "learning_rate": 9.90847808248496e-07, "loss": 0.035, "step": 58030 }, { "epoch": 0.6201185960788503, "grad_norm": 1.50105619430542, "learning_rate": 9.908446081368796e-07, "loss": 0.0429, "step": 58040 }, { "epoch": 0.62022543939313, "grad_norm": 1.4160490036010742, "learning_rate": 9.908414074710626e-07, "loss": 0.1001, "step": 58050 }, { "epoch": 0.6203322827074096, "grad_norm": 15.12875747680664, "learning_rate": 9.908382062510487e-07, "loss": 0.0131, "step": 58060 }, { "epoch": 0.6204391260216892, "grad_norm": 13.7490873336792, "learning_rate": 9.908350044768413e-07, "loss": 0.076, "step": 58070 }, { "epoch": 0.6205459693359688, "grad_norm": 3.0469002723693848, "learning_rate": 9.90831802148444e-07, "loss": 0.093, "step": 58080 }, { "epoch": 0.6206528126502484, "grad_norm": 6.162664413452148, "learning_rate": 9.908285992658608e-07, "loss": 0.0516, "step": 58090 }, { "epoch": 0.620759655964528, "grad_norm": 4.356833457946777, "learning_rate": 9.90825395829095e-07, "loss": 0.0671, "step": 58100 }, { "epoch": 0.6208664992788077, "grad_norm": 5.577589988708496, "learning_rate": 9.908221918381504e-07, "loss": 0.0295, "step": 58110 }, { "epoch": 0.6209733425930872, "grad_norm": 1.8000833988189697, "learning_rate": 9.9081898729303e-07, "loss": 0.0475, "step": 58120 }, { "epoch": 0.6210801859073668, "grad_norm": 3.269240617752075, "learning_rate": 9.908157821937384e-07, "loss": 0.078, "step": 58130 }, { "epoch": 0.6211870292216465, "grad_norm": 9.736898422241211, "learning_rate": 9.908125765402787e-07, "loss": 0.1763, "step": 58140 }, { "epoch": 0.621293872535926, "grad_norm": 0.5857178568840027, "learning_rate": 9.908093703326544e-07, "loss": 0.0517, "step": 58150 }, { "epoch": 0.6214007158502056, "grad_norm": 0.15888310968875885, "learning_rate": 9.908061635708694e-07, "loss": 0.034, "step": 58160 }, { "epoch": 0.6215075591644853, "grad_norm": 4.597342491149902, "learning_rate": 9.90802956254927e-07, "loss": 0.0489, "step": 58170 }, { "epoch": 0.6216144024787649, "grad_norm": 1.7809571027755737, "learning_rate": 9.907997483848313e-07, "loss": 0.0472, "step": 58180 }, { "epoch": 0.6217212457930446, "grad_norm": 25.55840492248535, "learning_rate": 9.907965399605855e-07, "loss": 0.1286, "step": 58190 }, { "epoch": 0.6218280891073241, "grad_norm": 0.39278754591941833, "learning_rate": 9.907933309821934e-07, "loss": 0.0671, "step": 58200 }, { "epoch": 0.6219349324216037, "grad_norm": 5.176609039306641, "learning_rate": 9.907901214496584e-07, "loss": 0.0691, "step": 58210 }, { "epoch": 0.6220417757358834, "grad_norm": 11.49799633026123, "learning_rate": 9.907869113629845e-07, "loss": 0.1145, "step": 58220 }, { "epoch": 0.6221486190501629, "grad_norm": 0.0784844234585762, "learning_rate": 9.90783700722175e-07, "loss": 0.0234, "step": 58230 }, { "epoch": 0.6222554623644425, "grad_norm": 4.8387370109558105, "learning_rate": 9.90780489527234e-07, "loss": 0.0658, "step": 58240 }, { "epoch": 0.6223623056787222, "grad_norm": 0.38617250323295593, "learning_rate": 9.907772777781645e-07, "loss": 0.0236, "step": 58250 }, { "epoch": 0.6224691489930018, "grad_norm": 7.428925037384033, "learning_rate": 9.907740654749705e-07, "loss": 0.0544, "step": 58260 }, { "epoch": 0.6225759923072813, "grad_norm": 5.290518283843994, "learning_rate": 9.907708526176554e-07, "loss": 0.0517, "step": 58270 }, { "epoch": 0.622682835621561, "grad_norm": 0.05181358754634857, "learning_rate": 9.90767639206223e-07, "loss": 0.0187, "step": 58280 }, { "epoch": 0.6227896789358406, "grad_norm": 0.14219149947166443, "learning_rate": 9.907644252406771e-07, "loss": 0.0728, "step": 58290 }, { "epoch": 0.6228965222501202, "grad_norm": 9.773454666137695, "learning_rate": 9.90761210721021e-07, "loss": 0.0648, "step": 58300 }, { "epoch": 0.6230033655643998, "grad_norm": 0.10349767655134201, "learning_rate": 9.907579956472584e-07, "loss": 0.0547, "step": 58310 }, { "epoch": 0.6231102088786794, "grad_norm": 9.943220138549805, "learning_rate": 9.90754780019393e-07, "loss": 0.0424, "step": 58320 }, { "epoch": 0.623217052192959, "grad_norm": 4.016119956970215, "learning_rate": 9.907515638374285e-07, "loss": 0.048, "step": 58330 }, { "epoch": 0.6233238955072387, "grad_norm": 3.9673073291778564, "learning_rate": 9.907483471013683e-07, "loss": 0.0496, "step": 58340 }, { "epoch": 0.6234307388215182, "grad_norm": 3.2925922870635986, "learning_rate": 9.907451298112164e-07, "loss": 0.0503, "step": 58350 }, { "epoch": 0.6235375821357979, "grad_norm": 0.4956612288951874, "learning_rate": 9.90741911966976e-07, "loss": 0.0545, "step": 58360 }, { "epoch": 0.6236444254500775, "grad_norm": 5.017014026641846, "learning_rate": 9.90738693568651e-07, "loss": 0.047, "step": 58370 }, { "epoch": 0.623751268764357, "grad_norm": 0.4178209900856018, "learning_rate": 9.90735474616245e-07, "loss": 0.0123, "step": 58380 }, { "epoch": 0.6238581120786367, "grad_norm": 7.276305198669434, "learning_rate": 9.907322551097616e-07, "loss": 0.0738, "step": 58390 }, { "epoch": 0.6239649553929163, "grad_norm": 4.217870712280273, "learning_rate": 9.907290350492044e-07, "loss": 0.0741, "step": 58400 }, { "epoch": 0.6240717987071959, "grad_norm": 1.496741771697998, "learning_rate": 9.907258144345772e-07, "loss": 0.032, "step": 58410 }, { "epoch": 0.6241786420214755, "grad_norm": 3.6977925300598145, "learning_rate": 9.907225932658834e-07, "loss": 0.0305, "step": 58420 }, { "epoch": 0.6242854853357551, "grad_norm": 16.637479782104492, "learning_rate": 9.907193715431268e-07, "loss": 0.0947, "step": 58430 }, { "epoch": 0.6243923286500347, "grad_norm": 5.999231338500977, "learning_rate": 9.907161492663109e-07, "loss": 0.0991, "step": 58440 }, { "epoch": 0.6244991719643144, "grad_norm": 0.07394043356180191, "learning_rate": 9.907129264354397e-07, "loss": 0.0877, "step": 58450 }, { "epoch": 0.6246060152785939, "grad_norm": 0.5597349405288696, "learning_rate": 9.907097030505165e-07, "loss": 0.0519, "step": 58460 }, { "epoch": 0.6247128585928735, "grad_norm": 4.6912922859191895, "learning_rate": 9.90706479111545e-07, "loss": 0.0735, "step": 58470 }, { "epoch": 0.6248197019071532, "grad_norm": 3.566645622253418, "learning_rate": 9.907032546185285e-07, "loss": 0.1237, "step": 58480 }, { "epoch": 0.6249265452214328, "grad_norm": 2.9924511909484863, "learning_rate": 9.907000295714714e-07, "loss": 0.0449, "step": 58490 }, { "epoch": 0.6250333885357123, "grad_norm": 5.491155624389648, "learning_rate": 9.906968039703767e-07, "loss": 0.0333, "step": 58500 }, { "epoch": 0.625140231849992, "grad_norm": 9.778607368469238, "learning_rate": 9.906935778152482e-07, "loss": 0.0425, "step": 58510 }, { "epoch": 0.6252470751642716, "grad_norm": 5.922614574432373, "learning_rate": 9.9069035110609e-07, "loss": 0.0451, "step": 58520 }, { "epoch": 0.6253539184785512, "grad_norm": 3.7848691940307617, "learning_rate": 9.90687123842905e-07, "loss": 0.0466, "step": 58530 }, { "epoch": 0.6254607617928308, "grad_norm": 5.03468132019043, "learning_rate": 9.906838960256974e-07, "loss": 0.0491, "step": 58540 }, { "epoch": 0.6255676051071104, "grad_norm": 11.904940605163574, "learning_rate": 9.906806676544706e-07, "loss": 0.1151, "step": 58550 }, { "epoch": 0.6256744484213901, "grad_norm": 5.374282360076904, "learning_rate": 9.906774387292282e-07, "loss": 0.0478, "step": 58560 }, { "epoch": 0.6257812917356697, "grad_norm": 2.023373603820801, "learning_rate": 9.90674209249974e-07, "loss": 0.0459, "step": 58570 }, { "epoch": 0.6258881350499492, "grad_norm": 3.003427028656006, "learning_rate": 9.906709792167117e-07, "loss": 0.0789, "step": 58580 }, { "epoch": 0.6259949783642289, "grad_norm": 2.72495436668396, "learning_rate": 9.906677486294448e-07, "loss": 0.0365, "step": 58590 }, { "epoch": 0.6261018216785085, "grad_norm": 3.261798620223999, "learning_rate": 9.906645174881769e-07, "loss": 0.0262, "step": 58600 }, { "epoch": 0.626208664992788, "grad_norm": 4.60574197769165, "learning_rate": 9.906612857929117e-07, "loss": 0.0223, "step": 58610 }, { "epoch": 0.6263155083070677, "grad_norm": 7.201447486877441, "learning_rate": 9.90658053543653e-07, "loss": 0.0901, "step": 58620 }, { "epoch": 0.6264223516213473, "grad_norm": 5.647231101989746, "learning_rate": 9.906548207404041e-07, "loss": 0.0731, "step": 58630 }, { "epoch": 0.6265291949356269, "grad_norm": 4.707530498504639, "learning_rate": 9.90651587383169e-07, "loss": 0.0306, "step": 58640 }, { "epoch": 0.6266360382499065, "grad_norm": 3.4726932048797607, "learning_rate": 9.906483534719514e-07, "loss": 0.0597, "step": 58650 }, { "epoch": 0.6267428815641861, "grad_norm": 24.599645614624023, "learning_rate": 9.906451190067546e-07, "loss": 0.1955, "step": 58660 }, { "epoch": 0.6268497248784657, "grad_norm": 0.9910723567008972, "learning_rate": 9.906418839875825e-07, "loss": 0.0284, "step": 58670 }, { "epoch": 0.6269565681927454, "grad_norm": 1.5520403385162354, "learning_rate": 9.906386484144385e-07, "loss": 0.0118, "step": 58680 }, { "epoch": 0.6270634115070249, "grad_norm": 3.128310203552246, "learning_rate": 9.906354122873265e-07, "loss": 0.0868, "step": 58690 }, { "epoch": 0.6271702548213045, "grad_norm": 4.584487438201904, "learning_rate": 9.906321756062503e-07, "loss": 0.0368, "step": 58700 }, { "epoch": 0.6272770981355842, "grad_norm": 1.393027424812317, "learning_rate": 9.906289383712133e-07, "loss": 0.0513, "step": 58710 }, { "epoch": 0.6273839414498638, "grad_norm": 3.2504894733428955, "learning_rate": 9.90625700582219e-07, "loss": 0.0882, "step": 58720 }, { "epoch": 0.6274907847641434, "grad_norm": 3.9454565048217773, "learning_rate": 9.906224622392714e-07, "loss": 0.0573, "step": 58730 }, { "epoch": 0.627597628078423, "grad_norm": 2.391444206237793, "learning_rate": 9.90619223342374e-07, "loss": 0.0508, "step": 58740 }, { "epoch": 0.6277044713927026, "grad_norm": 0.7373693585395813, "learning_rate": 9.906159838915305e-07, "loss": 0.0645, "step": 58750 }, { "epoch": 0.6278113147069823, "grad_norm": 32.218318939208984, "learning_rate": 9.906127438867444e-07, "loss": 0.0817, "step": 58760 }, { "epoch": 0.6279181580212618, "grad_norm": 0.3530944287776947, "learning_rate": 9.906095033280196e-07, "loss": 0.0678, "step": 58770 }, { "epoch": 0.6280250013355414, "grad_norm": 2.550126552581787, "learning_rate": 9.906062622153596e-07, "loss": 0.0194, "step": 58780 }, { "epoch": 0.6281318446498211, "grad_norm": 3.9192376136779785, "learning_rate": 9.90603020548768e-07, "loss": 0.0335, "step": 58790 }, { "epoch": 0.6282386879641007, "grad_norm": 0.5713973641395569, "learning_rate": 9.905997783282487e-07, "loss": 0.0402, "step": 58800 }, { "epoch": 0.6283455312783802, "grad_norm": 11.396613121032715, "learning_rate": 9.90596535553805e-07, "loss": 0.0357, "step": 58810 }, { "epoch": 0.6284523745926599, "grad_norm": 0.36268746852874756, "learning_rate": 9.905932922254411e-07, "loss": 0.0313, "step": 58820 }, { "epoch": 0.6285592179069395, "grad_norm": 6.784884452819824, "learning_rate": 9.905900483431601e-07, "loss": 0.0557, "step": 58830 }, { "epoch": 0.628666061221219, "grad_norm": 0.13350340723991394, "learning_rate": 9.905868039069662e-07, "loss": 0.0676, "step": 58840 }, { "epoch": 0.6287729045354987, "grad_norm": 13.355428695678711, "learning_rate": 9.905835589168625e-07, "loss": 0.0914, "step": 58850 }, { "epoch": 0.6288797478497783, "grad_norm": 5.234879016876221, "learning_rate": 9.90580313372853e-07, "loss": 0.0817, "step": 58860 }, { "epoch": 0.6289865911640579, "grad_norm": 4.058248043060303, "learning_rate": 9.905770672749414e-07, "loss": 0.0517, "step": 58870 }, { "epoch": 0.6290934344783375, "grad_norm": 4.176079273223877, "learning_rate": 9.90573820623131e-07, "loss": 0.0577, "step": 58880 }, { "epoch": 0.6292002777926171, "grad_norm": 0.11386489123106003, "learning_rate": 9.90570573417426e-07, "loss": 0.0294, "step": 58890 }, { "epoch": 0.6293071211068967, "grad_norm": 4.127323150634766, "learning_rate": 9.905673256578298e-07, "loss": 0.0582, "step": 58900 }, { "epoch": 0.6294139644211764, "grad_norm": 8.317451477050781, "learning_rate": 9.90564077344346e-07, "loss": 0.0601, "step": 58910 }, { "epoch": 0.6295208077354559, "grad_norm": 10.390144348144531, "learning_rate": 9.905608284769783e-07, "loss": 0.0391, "step": 58920 }, { "epoch": 0.6296276510497356, "grad_norm": 0.44650572538375854, "learning_rate": 9.905575790557304e-07, "loss": 0.0136, "step": 58930 }, { "epoch": 0.6297344943640152, "grad_norm": 2.041252374649048, "learning_rate": 9.90554329080606e-07, "loss": 0.0277, "step": 58940 }, { "epoch": 0.6298413376782948, "grad_norm": 1.6522841453552246, "learning_rate": 9.905510785516087e-07, "loss": 0.0496, "step": 58950 }, { "epoch": 0.6299481809925744, "grad_norm": 4.872433185577393, "learning_rate": 9.905478274687424e-07, "loss": 0.0777, "step": 58960 }, { "epoch": 0.630055024306854, "grad_norm": 3.9129040241241455, "learning_rate": 9.905445758320103e-07, "loss": 0.0478, "step": 58970 }, { "epoch": 0.6301618676211336, "grad_norm": 7.623159885406494, "learning_rate": 9.905413236414165e-07, "loss": 0.1057, "step": 58980 }, { "epoch": 0.6302687109354133, "grad_norm": 2.3271470069885254, "learning_rate": 9.905380708969644e-07, "loss": 0.029, "step": 58990 }, { "epoch": 0.6303755542496928, "grad_norm": 0.8248317837715149, "learning_rate": 9.90534817598658e-07, "loss": 0.0484, "step": 59000 }, { "epoch": 0.6304823975639724, "grad_norm": 8.349699974060059, "learning_rate": 9.905315637465006e-07, "loss": 0.0423, "step": 59010 }, { "epoch": 0.6305892408782521, "grad_norm": 29.558401107788086, "learning_rate": 9.905283093404962e-07, "loss": 0.09, "step": 59020 }, { "epoch": 0.6306960841925316, "grad_norm": 3.478827714920044, "learning_rate": 9.90525054380648e-07, "loss": 0.072, "step": 59030 }, { "epoch": 0.6308029275068112, "grad_norm": 3.5066943168640137, "learning_rate": 9.905217988669602e-07, "loss": 0.0363, "step": 59040 }, { "epoch": 0.6309097708210909, "grad_norm": 2.658238172531128, "learning_rate": 9.905185427994365e-07, "loss": 0.0232, "step": 59050 }, { "epoch": 0.6310166141353705, "grad_norm": 1.940871238708496, "learning_rate": 9.9051528617808e-07, "loss": 0.0562, "step": 59060 }, { "epoch": 0.63112345744965, "grad_norm": 4.574649810791016, "learning_rate": 9.90512029002895e-07, "loss": 0.0448, "step": 59070 }, { "epoch": 0.6312303007639297, "grad_norm": 0.579522430896759, "learning_rate": 9.905087712738848e-07, "loss": 0.0471, "step": 59080 }, { "epoch": 0.6313371440782093, "grad_norm": 7.231847286224365, "learning_rate": 9.905055129910529e-07, "loss": 0.0349, "step": 59090 }, { "epoch": 0.631443987392489, "grad_norm": 1.447637915611267, "learning_rate": 9.905022541544035e-07, "loss": 0.0465, "step": 59100 }, { "epoch": 0.6315508307067685, "grad_norm": 7.498901844024658, "learning_rate": 9.9049899476394e-07, "loss": 0.0288, "step": 59110 }, { "epoch": 0.6316576740210481, "grad_norm": 1.9002606868743896, "learning_rate": 9.90495734819666e-07, "loss": 0.0912, "step": 59120 }, { "epoch": 0.6317645173353278, "grad_norm": 5.3919878005981445, "learning_rate": 9.904924743215855e-07, "loss": 0.0934, "step": 59130 }, { "epoch": 0.6318713606496074, "grad_norm": 2.512406349182129, "learning_rate": 9.904892132697019e-07, "loss": 0.0887, "step": 59140 }, { "epoch": 0.6319782039638869, "grad_norm": 3.1284847259521484, "learning_rate": 9.90485951664019e-07, "loss": 0.0303, "step": 59150 }, { "epoch": 0.6320850472781666, "grad_norm": 8.720368385314941, "learning_rate": 9.904826895045403e-07, "loss": 0.083, "step": 59160 }, { "epoch": 0.6321918905924462, "grad_norm": 0.13207121193408966, "learning_rate": 9.904794267912698e-07, "loss": 0.0344, "step": 59170 }, { "epoch": 0.6322987339067258, "grad_norm": 1.3916891813278198, "learning_rate": 9.90476163524211e-07, "loss": 0.0441, "step": 59180 }, { "epoch": 0.6324055772210054, "grad_norm": 1.1576985120773315, "learning_rate": 9.904728997033674e-07, "loss": 0.0574, "step": 59190 }, { "epoch": 0.632512420535285, "grad_norm": 3.5364127159118652, "learning_rate": 9.90469635328743e-07, "loss": 0.0573, "step": 59200 }, { "epoch": 0.6326192638495646, "grad_norm": 2.9288036823272705, "learning_rate": 9.904663704003413e-07, "loss": 0.0234, "step": 59210 }, { "epoch": 0.6327261071638443, "grad_norm": 14.158616065979004, "learning_rate": 9.904631049181662e-07, "loss": 0.0633, "step": 59220 }, { "epoch": 0.6328329504781238, "grad_norm": 0.48252782225608826, "learning_rate": 9.904598388822211e-07, "loss": 0.0883, "step": 59230 }, { "epoch": 0.6329397937924034, "grad_norm": 7.569336891174316, "learning_rate": 9.9045657229251e-07, "loss": 0.0685, "step": 59240 }, { "epoch": 0.6330466371066831, "grad_norm": 0.7857148051261902, "learning_rate": 9.904533051490363e-07, "loss": 0.0898, "step": 59250 }, { "epoch": 0.6331534804209626, "grad_norm": 9.007905960083008, "learning_rate": 9.904500374518036e-07, "loss": 0.0503, "step": 59260 }, { "epoch": 0.6332603237352422, "grad_norm": 0.32053589820861816, "learning_rate": 9.90446769200816e-07, "loss": 0.044, "step": 59270 }, { "epoch": 0.6333671670495219, "grad_norm": 0.025072235614061356, "learning_rate": 9.904435003960772e-07, "loss": 0.0242, "step": 59280 }, { "epoch": 0.6334740103638015, "grad_norm": 4.145884037017822, "learning_rate": 9.904402310375903e-07, "loss": 0.0666, "step": 59290 }, { "epoch": 0.6335808536780811, "grad_norm": 1.04153311252594, "learning_rate": 9.904369611253596e-07, "loss": 0.0611, "step": 59300 }, { "epoch": 0.6336876969923607, "grad_norm": 4.260897636413574, "learning_rate": 9.904336906593884e-07, "loss": 0.0261, "step": 59310 }, { "epoch": 0.6337945403066403, "grad_norm": 0.6082358956336975, "learning_rate": 9.904304196396808e-07, "loss": 0.0897, "step": 59320 }, { "epoch": 0.63390138362092, "grad_norm": 0.2629424035549164, "learning_rate": 9.904271480662398e-07, "loss": 0.0506, "step": 59330 }, { "epoch": 0.6340082269351995, "grad_norm": 1.1142559051513672, "learning_rate": 9.904238759390698e-07, "loss": 0.0216, "step": 59340 }, { "epoch": 0.6341150702494791, "grad_norm": 6.377157211303711, "learning_rate": 9.904206032581741e-07, "loss": 0.0628, "step": 59350 }, { "epoch": 0.6342219135637588, "grad_norm": 2.555880308151245, "learning_rate": 9.904173300235567e-07, "loss": 0.0551, "step": 59360 }, { "epoch": 0.6343287568780384, "grad_norm": 10.144213676452637, "learning_rate": 9.904140562352213e-07, "loss": 0.0657, "step": 59370 }, { "epoch": 0.6344356001923179, "grad_norm": 2.418257474899292, "learning_rate": 9.904107818931712e-07, "loss": 0.0448, "step": 59380 }, { "epoch": 0.6345424435065976, "grad_norm": 4.920646667480469, "learning_rate": 9.9040750699741e-07, "loss": 0.0618, "step": 59390 }, { "epoch": 0.6346492868208772, "grad_norm": 7.819351673126221, "learning_rate": 9.90404231547942e-07, "loss": 0.059, "step": 59400 }, { "epoch": 0.6347561301351567, "grad_norm": 5.770858287811279, "learning_rate": 9.904009555447708e-07, "loss": 0.061, "step": 59410 }, { "epoch": 0.6348629734494364, "grad_norm": 8.163155555725098, "learning_rate": 9.903976789878995e-07, "loss": 0.0708, "step": 59420 }, { "epoch": 0.634969816763716, "grad_norm": 0.47775474190711975, "learning_rate": 9.903944018773326e-07, "loss": 0.0417, "step": 59430 }, { "epoch": 0.6350766600779956, "grad_norm": 9.603184700012207, "learning_rate": 9.903911242130733e-07, "loss": 0.1536, "step": 59440 }, { "epoch": 0.6351835033922753, "grad_norm": 5.330282688140869, "learning_rate": 9.903878459951254e-07, "loss": 0.0467, "step": 59450 }, { "epoch": 0.6352903467065548, "grad_norm": 6.212027072906494, "learning_rate": 9.903845672234925e-07, "loss": 0.0335, "step": 59460 }, { "epoch": 0.6353971900208345, "grad_norm": 0.19495666027069092, "learning_rate": 9.903812878981783e-07, "loss": 0.078, "step": 59470 }, { "epoch": 0.6355040333351141, "grad_norm": 6.4698872566223145, "learning_rate": 9.90378008019187e-07, "loss": 0.0591, "step": 59480 }, { "epoch": 0.6356108766493936, "grad_norm": 3.0772178173065186, "learning_rate": 9.903747275865216e-07, "loss": 0.0239, "step": 59490 }, { "epoch": 0.6357177199636733, "grad_norm": 2.7690184116363525, "learning_rate": 9.903714466001862e-07, "loss": 0.0726, "step": 59500 }, { "epoch": 0.6358245632779529, "grad_norm": 6.8412885665893555, "learning_rate": 9.903681650601846e-07, "loss": 0.0591, "step": 59510 }, { "epoch": 0.6359314065922325, "grad_norm": 3.5360920429229736, "learning_rate": 9.9036488296652e-07, "loss": 0.0182, "step": 59520 }, { "epoch": 0.6360382499065121, "grad_norm": 1.1794888973236084, "learning_rate": 9.903616003191969e-07, "loss": 0.0821, "step": 59530 }, { "epoch": 0.6361450932207917, "grad_norm": 4.620115756988525, "learning_rate": 9.903583171182182e-07, "loss": 0.0379, "step": 59540 }, { "epoch": 0.6362519365350713, "grad_norm": 0.23038575053215027, "learning_rate": 9.90355033363588e-07, "loss": 0.0473, "step": 59550 }, { "epoch": 0.636358779849351, "grad_norm": 8.668352127075195, "learning_rate": 9.903517490553098e-07, "loss": 0.1032, "step": 59560 }, { "epoch": 0.6364656231636305, "grad_norm": 13.037688255310059, "learning_rate": 9.903484641933877e-07, "loss": 0.0408, "step": 59570 }, { "epoch": 0.6365724664779101, "grad_norm": 4.455525875091553, "learning_rate": 9.903451787778251e-07, "loss": 0.0241, "step": 59580 }, { "epoch": 0.6366793097921898, "grad_norm": 21.960969924926758, "learning_rate": 9.90341892808626e-07, "loss": 0.0332, "step": 59590 }, { "epoch": 0.6367861531064694, "grad_norm": 9.164210319519043, "learning_rate": 9.903386062857935e-07, "loss": 0.0472, "step": 59600 }, { "epoch": 0.6368929964207489, "grad_norm": 5.70854377746582, "learning_rate": 9.90335319209332e-07, "loss": 0.06, "step": 59610 }, { "epoch": 0.6369998397350286, "grad_norm": 1.6802794933319092, "learning_rate": 9.903320315792449e-07, "loss": 0.0403, "step": 59620 }, { "epoch": 0.6371066830493082, "grad_norm": 0.29376113414764404, "learning_rate": 9.903287433955358e-07, "loss": 0.1446, "step": 59630 }, { "epoch": 0.6372135263635877, "grad_norm": 2.466935873031616, "learning_rate": 9.903254546582084e-07, "loss": 0.0281, "step": 59640 }, { "epoch": 0.6373203696778674, "grad_norm": 1.9901864528656006, "learning_rate": 9.903221653672668e-07, "loss": 0.0239, "step": 59650 }, { "epoch": 0.637427212992147, "grad_norm": 3.182743549346924, "learning_rate": 9.903188755227144e-07, "loss": 0.0444, "step": 59660 }, { "epoch": 0.6375340563064267, "grad_norm": 4.620925426483154, "learning_rate": 9.90315585124555e-07, "loss": 0.0666, "step": 59670 }, { "epoch": 0.6376408996207062, "grad_norm": 13.377758026123047, "learning_rate": 9.903122941727924e-07, "loss": 0.0436, "step": 59680 }, { "epoch": 0.6377477429349858, "grad_norm": 5.228799819946289, "learning_rate": 9.9030900266743e-07, "loss": 0.0415, "step": 59690 }, { "epoch": 0.6378545862492655, "grad_norm": 6.981053352355957, "learning_rate": 9.90305710608472e-07, "loss": 0.0765, "step": 59700 }, { "epoch": 0.6379614295635451, "grad_norm": 1.6717491149902344, "learning_rate": 9.903024179959217e-07, "loss": 0.0564, "step": 59710 }, { "epoch": 0.6380682728778246, "grad_norm": 7.371074199676514, "learning_rate": 9.90299124829783e-07, "loss": 0.0287, "step": 59720 }, { "epoch": 0.6381751161921043, "grad_norm": 3.054931879043579, "learning_rate": 9.902958311100595e-07, "loss": 0.0405, "step": 59730 }, { "epoch": 0.6382819595063839, "grad_norm": 9.224679946899414, "learning_rate": 9.902925368367552e-07, "loss": 0.0578, "step": 59740 }, { "epoch": 0.6383888028206635, "grad_norm": 1.553840160369873, "learning_rate": 9.902892420098735e-07, "loss": 0.0553, "step": 59750 }, { "epoch": 0.6384956461349431, "grad_norm": 6.493645191192627, "learning_rate": 9.90285946629418e-07, "loss": 0.0733, "step": 59760 }, { "epoch": 0.6386024894492227, "grad_norm": 4.491770267486572, "learning_rate": 9.90282650695393e-07, "loss": 0.0541, "step": 59770 }, { "epoch": 0.6387093327635023, "grad_norm": 0.07379014045000076, "learning_rate": 9.902793542078017e-07, "loss": 0.038, "step": 59780 }, { "epoch": 0.638816176077782, "grad_norm": 4.656371116638184, "learning_rate": 9.90276057166648e-07, "loss": 0.0417, "step": 59790 }, { "epoch": 0.6389230193920615, "grad_norm": 2.175861358642578, "learning_rate": 9.902727595719357e-07, "loss": 0.0144, "step": 59800 }, { "epoch": 0.6390298627063411, "grad_norm": 5.204283237457275, "learning_rate": 9.902694614236685e-07, "loss": 0.0532, "step": 59810 }, { "epoch": 0.6391367060206208, "grad_norm": 9.686318397521973, "learning_rate": 9.9026616272185e-07, "loss": 0.0511, "step": 59820 }, { "epoch": 0.6392435493349004, "grad_norm": 15.035902976989746, "learning_rate": 9.90262863466484e-07, "loss": 0.0472, "step": 59830 }, { "epoch": 0.63935039264918, "grad_norm": 2.327258586883545, "learning_rate": 9.902595636575744e-07, "loss": 0.0419, "step": 59840 }, { "epoch": 0.6394572359634596, "grad_norm": 5.073943138122559, "learning_rate": 9.902562632951244e-07, "loss": 0.2335, "step": 59850 }, { "epoch": 0.6395640792777392, "grad_norm": 0.1283232569694519, "learning_rate": 9.902529623791384e-07, "loss": 0.0516, "step": 59860 }, { "epoch": 0.6396709225920189, "grad_norm": 0.04051993414759636, "learning_rate": 9.902496609096196e-07, "loss": 0.0642, "step": 59870 }, { "epoch": 0.6397777659062984, "grad_norm": 0.19044487178325653, "learning_rate": 9.902463588865721e-07, "loss": 0.0388, "step": 59880 }, { "epoch": 0.639884609220578, "grad_norm": 8.398543357849121, "learning_rate": 9.902430563099992e-07, "loss": 0.1062, "step": 59890 }, { "epoch": 0.6399914525348577, "grad_norm": 2.174858570098877, "learning_rate": 9.90239753179905e-07, "loss": 0.0667, "step": 59900 }, { "epoch": 0.6400982958491372, "grad_norm": 4.105521202087402, "learning_rate": 9.902364494962934e-07, "loss": 0.0446, "step": 59910 }, { "epoch": 0.6402051391634168, "grad_norm": 15.00501823425293, "learning_rate": 9.902331452591676e-07, "loss": 0.0519, "step": 59920 }, { "epoch": 0.6403119824776965, "grad_norm": 2.1837923526763916, "learning_rate": 9.902298404685317e-07, "loss": 0.1154, "step": 59930 }, { "epoch": 0.6404188257919761, "grad_norm": 0.4817730188369751, "learning_rate": 9.90226535124389e-07, "loss": 0.0146, "step": 59940 }, { "epoch": 0.6405256691062556, "grad_norm": 10.34208869934082, "learning_rate": 9.902232292267439e-07, "loss": 0.107, "step": 59950 }, { "epoch": 0.6406325124205353, "grad_norm": 0.03651333227753639, "learning_rate": 9.902199227755996e-07, "loss": 0.0444, "step": 59960 }, { "epoch": 0.6407393557348149, "grad_norm": 6.073085784912109, "learning_rate": 9.9021661577096e-07, "loss": 0.0688, "step": 59970 }, { "epoch": 0.6408461990490945, "grad_norm": 4.898118019104004, "learning_rate": 9.90213308212829e-07, "loss": 0.0197, "step": 59980 }, { "epoch": 0.6409530423633741, "grad_norm": 5.055467128753662, "learning_rate": 9.9021000010121e-07, "loss": 0.1009, "step": 59990 }, { "epoch": 0.6410598856776537, "grad_norm": 11.68450927734375, "learning_rate": 9.902066914361069e-07, "loss": 0.1123, "step": 60000 }, { "epoch": 0.6411667289919333, "grad_norm": 5.392338275909424, "learning_rate": 9.902033822175236e-07, "loss": 0.0207, "step": 60010 }, { "epoch": 0.641273572306213, "grad_norm": 5.575991153717041, "learning_rate": 9.902000724454636e-07, "loss": 0.0367, "step": 60020 }, { "epoch": 0.6413804156204925, "grad_norm": 0.7646024823188782, "learning_rate": 9.901967621199307e-07, "loss": 0.054, "step": 60030 }, { "epoch": 0.6414872589347722, "grad_norm": 3.468109130859375, "learning_rate": 9.901934512409287e-07, "loss": 0.0498, "step": 60040 }, { "epoch": 0.6415941022490518, "grad_norm": 4.087096691131592, "learning_rate": 9.901901398084614e-07, "loss": 0.0574, "step": 60050 }, { "epoch": 0.6417009455633313, "grad_norm": 0.03848874941468239, "learning_rate": 9.901868278225323e-07, "loss": 0.039, "step": 60060 }, { "epoch": 0.641807788877611, "grad_norm": 6.798482894897461, "learning_rate": 9.901835152831452e-07, "loss": 0.0618, "step": 60070 }, { "epoch": 0.6419146321918906, "grad_norm": 4.707817554473877, "learning_rate": 9.90180202190304e-07, "loss": 0.0254, "step": 60080 }, { "epoch": 0.6420214755061702, "grad_norm": 2.6272029876708984, "learning_rate": 9.901768885440123e-07, "loss": 0.0368, "step": 60090 }, { "epoch": 0.6421283188204499, "grad_norm": 7.853589057922363, "learning_rate": 9.901735743442741e-07, "loss": 0.0607, "step": 60100 }, { "epoch": 0.6422351621347294, "grad_norm": 2.93462872505188, "learning_rate": 9.901702595910927e-07, "loss": 0.0656, "step": 60110 }, { "epoch": 0.642342005449009, "grad_norm": 10.365803718566895, "learning_rate": 9.901669442844722e-07, "loss": 0.038, "step": 60120 }, { "epoch": 0.6424488487632887, "grad_norm": 0.0975884422659874, "learning_rate": 9.901636284244163e-07, "loss": 0.0279, "step": 60130 }, { "epoch": 0.6425556920775682, "grad_norm": 9.86459732055664, "learning_rate": 9.901603120109285e-07, "loss": 0.0473, "step": 60140 }, { "epoch": 0.6426625353918478, "grad_norm": 3.3226306438446045, "learning_rate": 9.90156995044013e-07, "loss": 0.0989, "step": 60150 }, { "epoch": 0.6427693787061275, "grad_norm": 3.146987199783325, "learning_rate": 9.901536775236729e-07, "loss": 0.0357, "step": 60160 }, { "epoch": 0.6428762220204071, "grad_norm": 0.4039653241634369, "learning_rate": 9.901503594499124e-07, "loss": 0.0628, "step": 60170 }, { "epoch": 0.6429830653346866, "grad_norm": 4.7025370597839355, "learning_rate": 9.901470408227352e-07, "loss": 0.0506, "step": 60180 }, { "epoch": 0.6430899086489663, "grad_norm": 3.1982054710388184, "learning_rate": 9.901437216421451e-07, "loss": 0.1016, "step": 60190 }, { "epoch": 0.6431967519632459, "grad_norm": 4.445255279541016, "learning_rate": 9.901404019081457e-07, "loss": 0.095, "step": 60200 }, { "epoch": 0.6433035952775256, "grad_norm": 0.2738701403141022, "learning_rate": 9.901370816207407e-07, "loss": 0.027, "step": 60210 }, { "epoch": 0.6434104385918051, "grad_norm": 0.5081431269645691, "learning_rate": 9.901337607799339e-07, "loss": 0.0557, "step": 60220 }, { "epoch": 0.6435172819060847, "grad_norm": 0.11295943707227707, "learning_rate": 9.901304393857293e-07, "loss": 0.0258, "step": 60230 }, { "epoch": 0.6436241252203644, "grad_norm": 2.182788848876953, "learning_rate": 9.901271174381302e-07, "loss": 0.052, "step": 60240 }, { "epoch": 0.643730968534644, "grad_norm": 1.816301703453064, "learning_rate": 9.90123794937141e-07, "loss": 0.0448, "step": 60250 }, { "epoch": 0.6438378118489235, "grad_norm": 7.8175883293151855, "learning_rate": 9.901204718827645e-07, "loss": 0.0594, "step": 60260 }, { "epoch": 0.6439446551632032, "grad_norm": 6.781685829162598, "learning_rate": 9.901171482750053e-07, "loss": 0.032, "step": 60270 }, { "epoch": 0.6440514984774828, "grad_norm": 5.093094348907471, "learning_rate": 9.901138241138669e-07, "loss": 0.0356, "step": 60280 }, { "epoch": 0.6441583417917623, "grad_norm": 6.836797714233398, "learning_rate": 9.901104993993527e-07, "loss": 0.0843, "step": 60290 }, { "epoch": 0.644265185106042, "grad_norm": 1.6651941537857056, "learning_rate": 9.90107174131467e-07, "loss": 0.057, "step": 60300 }, { "epoch": 0.6443720284203216, "grad_norm": 4.419467449188232, "learning_rate": 9.901038483102134e-07, "loss": 0.0482, "step": 60310 }, { "epoch": 0.6444788717346012, "grad_norm": 6.65192985534668, "learning_rate": 9.901005219355953e-07, "loss": 0.0483, "step": 60320 }, { "epoch": 0.6445857150488808, "grad_norm": 4.317964553833008, "learning_rate": 9.900971950076169e-07, "loss": 0.0327, "step": 60330 }, { "epoch": 0.6446925583631604, "grad_norm": 0.9264973998069763, "learning_rate": 9.900938675262818e-07, "loss": 0.0446, "step": 60340 }, { "epoch": 0.64479940167744, "grad_norm": 8.507773399353027, "learning_rate": 9.900905394915935e-07, "loss": 0.0946, "step": 60350 }, { "epoch": 0.6449062449917197, "grad_norm": 5.586978435516357, "learning_rate": 9.900872109035563e-07, "loss": 0.081, "step": 60360 }, { "epoch": 0.6450130883059992, "grad_norm": 1.3178250789642334, "learning_rate": 9.900838817621735e-07, "loss": 0.0463, "step": 60370 }, { "epoch": 0.6451199316202788, "grad_norm": 6.106439590454102, "learning_rate": 9.90080552067449e-07, "loss": 0.0412, "step": 60380 }, { "epoch": 0.6452267749345585, "grad_norm": 0.5762922763824463, "learning_rate": 9.900772218193868e-07, "loss": 0.0361, "step": 60390 }, { "epoch": 0.6453336182488381, "grad_norm": 4.464817523956299, "learning_rate": 9.9007389101799e-07, "loss": 0.03, "step": 60400 }, { "epoch": 0.6454404615631177, "grad_norm": 1.8718935251235962, "learning_rate": 9.90070559663263e-07, "loss": 0.0327, "step": 60410 }, { "epoch": 0.6455473048773973, "grad_norm": 2.402963638305664, "learning_rate": 9.900672277552094e-07, "loss": 0.027, "step": 60420 }, { "epoch": 0.6456541481916769, "grad_norm": 3.1815218925476074, "learning_rate": 9.90063895293833e-07, "loss": 0.0546, "step": 60430 }, { "epoch": 0.6457609915059566, "grad_norm": 1.0982264280319214, "learning_rate": 9.90060562279137e-07, "loss": 0.0358, "step": 60440 }, { "epoch": 0.6458678348202361, "grad_norm": 8.334935188293457, "learning_rate": 9.900572287111262e-07, "loss": 0.0576, "step": 60450 }, { "epoch": 0.6459746781345157, "grad_norm": 4.891700744628906, "learning_rate": 9.900538945898035e-07, "loss": 0.084, "step": 60460 }, { "epoch": 0.6460815214487954, "grad_norm": 2.0086402893066406, "learning_rate": 9.900505599151731e-07, "loss": 0.0479, "step": 60470 }, { "epoch": 0.646188364763075, "grad_norm": 3.1805005073547363, "learning_rate": 9.900472246872387e-07, "loss": 0.0491, "step": 60480 }, { "epoch": 0.6462952080773545, "grad_norm": 8.13625717163086, "learning_rate": 9.900438889060038e-07, "loss": 0.0224, "step": 60490 }, { "epoch": 0.6464020513916342, "grad_norm": 0.21946236491203308, "learning_rate": 9.900405525714726e-07, "loss": 0.0283, "step": 60500 }, { "epoch": 0.6465088947059138, "grad_norm": 0.04909326508641243, "learning_rate": 9.900372156836484e-07, "loss": 0.0397, "step": 60510 }, { "epoch": 0.6466157380201933, "grad_norm": 15.131725311279297, "learning_rate": 9.900338782425353e-07, "loss": 0.0854, "step": 60520 }, { "epoch": 0.646722581334473, "grad_norm": 8.582606315612793, "learning_rate": 9.90030540248137e-07, "loss": 0.0837, "step": 60530 }, { "epoch": 0.6468294246487526, "grad_norm": 6.572701454162598, "learning_rate": 9.90027201700457e-07, "loss": 0.0402, "step": 60540 }, { "epoch": 0.6469362679630322, "grad_norm": 0.4233948290348053, "learning_rate": 9.900238625994996e-07, "loss": 0.0184, "step": 60550 }, { "epoch": 0.6470431112773118, "grad_norm": 8.132149696350098, "learning_rate": 9.90020522945268e-07, "loss": 0.1447, "step": 60560 }, { "epoch": 0.6471499545915914, "grad_norm": 3.703704833984375, "learning_rate": 9.900171827377667e-07, "loss": 0.0638, "step": 60570 }, { "epoch": 0.6472567979058711, "grad_norm": 2.9759716987609863, "learning_rate": 9.900138419769985e-07, "loss": 0.0375, "step": 60580 }, { "epoch": 0.6473636412201507, "grad_norm": 8.408112525939941, "learning_rate": 9.90010500662968e-07, "loss": 0.0673, "step": 60590 }, { "epoch": 0.6474704845344302, "grad_norm": 1.6928538084030151, "learning_rate": 9.900071587956786e-07, "loss": 0.0332, "step": 60600 }, { "epoch": 0.6475773278487099, "grad_norm": 0.7606973052024841, "learning_rate": 9.90003816375134e-07, "loss": 0.0164, "step": 60610 }, { "epoch": 0.6476841711629895, "grad_norm": 0.5466145873069763, "learning_rate": 9.900004734013384e-07, "loss": 0.0781, "step": 60620 }, { "epoch": 0.6477910144772691, "grad_norm": 10.103898048400879, "learning_rate": 9.89997129874295e-07, "loss": 0.0859, "step": 60630 }, { "epoch": 0.6478978577915487, "grad_norm": 3.3648834228515625, "learning_rate": 9.89993785794008e-07, "loss": 0.0666, "step": 60640 }, { "epoch": 0.6480047011058283, "grad_norm": 5.090521812438965, "learning_rate": 9.89990441160481e-07, "loss": 0.0592, "step": 60650 }, { "epoch": 0.6481115444201079, "grad_norm": 7.856717109680176, "learning_rate": 9.899870959737177e-07, "loss": 0.079, "step": 60660 }, { "epoch": 0.6482183877343876, "grad_norm": 0.020667551085352898, "learning_rate": 9.899837502337222e-07, "loss": 0.065, "step": 60670 }, { "epoch": 0.6483252310486671, "grad_norm": 0.8474335670471191, "learning_rate": 9.89980403940498e-07, "loss": 0.0487, "step": 60680 }, { "epoch": 0.6484320743629467, "grad_norm": 7.181340217590332, "learning_rate": 9.89977057094049e-07, "loss": 0.0377, "step": 60690 }, { "epoch": 0.6485389176772264, "grad_norm": 0.6426616907119751, "learning_rate": 9.899737096943788e-07, "loss": 0.0599, "step": 60700 }, { "epoch": 0.648645760991506, "grad_norm": 2.729174852371216, "learning_rate": 9.899703617414912e-07, "loss": 0.0662, "step": 60710 }, { "epoch": 0.6487526043057855, "grad_norm": 8.077860832214355, "learning_rate": 9.899670132353902e-07, "loss": 0.059, "step": 60720 }, { "epoch": 0.6488594476200652, "grad_norm": 0.3667912483215332, "learning_rate": 9.899636641760795e-07, "loss": 0.0402, "step": 60730 }, { "epoch": 0.6489662909343448, "grad_norm": 7.027559757232666, "learning_rate": 9.899603145635629e-07, "loss": 0.0529, "step": 60740 }, { "epoch": 0.6490731342486243, "grad_norm": 7.706571578979492, "learning_rate": 9.89956964397844e-07, "loss": 0.0611, "step": 60750 }, { "epoch": 0.649179977562904, "grad_norm": 10.420540809631348, "learning_rate": 9.899536136789268e-07, "loss": 0.0689, "step": 60760 }, { "epoch": 0.6492868208771836, "grad_norm": 5.667309284210205, "learning_rate": 9.899502624068147e-07, "loss": 0.0218, "step": 60770 }, { "epoch": 0.6493936641914633, "grad_norm": 0.011685212142765522, "learning_rate": 9.899469105815122e-07, "loss": 0.0173, "step": 60780 }, { "epoch": 0.6495005075057428, "grad_norm": 0.5036737322807312, "learning_rate": 9.899435582030224e-07, "loss": 0.0937, "step": 60790 }, { "epoch": 0.6496073508200224, "grad_norm": 1.0587689876556396, "learning_rate": 9.899402052713494e-07, "loss": 0.0318, "step": 60800 }, { "epoch": 0.6497141941343021, "grad_norm": 7.558722972869873, "learning_rate": 9.89936851786497e-07, "loss": 0.0542, "step": 60810 }, { "epoch": 0.6498210374485817, "grad_norm": 4.330317974090576, "learning_rate": 9.89933497748469e-07, "loss": 0.0456, "step": 60820 }, { "epoch": 0.6499278807628612, "grad_norm": 3.9149482250213623, "learning_rate": 9.899301431572688e-07, "loss": 0.0538, "step": 60830 }, { "epoch": 0.6500347240771409, "grad_norm": 7.27716064453125, "learning_rate": 9.899267880129008e-07, "loss": 0.0866, "step": 60840 }, { "epoch": 0.6501415673914205, "grad_norm": 0.044029563665390015, "learning_rate": 9.899234323153683e-07, "loss": 0.0195, "step": 60850 }, { "epoch": 0.6502484107057, "grad_norm": 0.2127303034067154, "learning_rate": 9.899200760646752e-07, "loss": 0.0873, "step": 60860 }, { "epoch": 0.6503552540199797, "grad_norm": 0.6666250228881836, "learning_rate": 9.899167192608256e-07, "loss": 0.0621, "step": 60870 }, { "epoch": 0.6504620973342593, "grad_norm": 0.6538630723953247, "learning_rate": 9.899133619038228e-07, "loss": 0.0371, "step": 60880 }, { "epoch": 0.6505689406485389, "grad_norm": 0.3003406524658203, "learning_rate": 9.89910003993671e-07, "loss": 0.0671, "step": 60890 }, { "epoch": 0.6506757839628186, "grad_norm": 10.68221378326416, "learning_rate": 9.89906645530374e-07, "loss": 0.0871, "step": 60900 }, { "epoch": 0.6507826272770981, "grad_norm": 2.1328415870666504, "learning_rate": 9.899032865139352e-07, "loss": 0.0602, "step": 60910 }, { "epoch": 0.6508894705913777, "grad_norm": 1.839298963546753, "learning_rate": 9.898999269443585e-07, "loss": 0.0387, "step": 60920 }, { "epoch": 0.6509963139056574, "grad_norm": 0.9537131190299988, "learning_rate": 9.898965668216479e-07, "loss": 0.0511, "step": 60930 }, { "epoch": 0.651103157219937, "grad_norm": 1.964809775352478, "learning_rate": 9.898932061458072e-07, "loss": 0.056, "step": 60940 }, { "epoch": 0.6512100005342166, "grad_norm": 6.415159702301025, "learning_rate": 9.8988984491684e-07, "loss": 0.0543, "step": 60950 }, { "epoch": 0.6513168438484962, "grad_norm": 7.806416988372803, "learning_rate": 9.898864831347503e-07, "loss": 0.0279, "step": 60960 }, { "epoch": 0.6514236871627758, "grad_norm": 7.01381254196167, "learning_rate": 9.898831207995417e-07, "loss": 0.0634, "step": 60970 }, { "epoch": 0.6515305304770554, "grad_norm": 3.36879563331604, "learning_rate": 9.898797579112181e-07, "loss": 0.0473, "step": 60980 }, { "epoch": 0.651637373791335, "grad_norm": 2.39817476272583, "learning_rate": 9.898763944697833e-07, "loss": 0.062, "step": 60990 }, { "epoch": 0.6517442171056146, "grad_norm": 8.89627456665039, "learning_rate": 9.89873030475241e-07, "loss": 0.0613, "step": 61000 }, { "epoch": 0.6518510604198943, "grad_norm": 3.197713851928711, "learning_rate": 9.898696659275951e-07, "loss": 0.0561, "step": 61010 }, { "epoch": 0.6519579037341738, "grad_norm": 3.4805047512054443, "learning_rate": 9.898663008268496e-07, "loss": 0.0317, "step": 61020 }, { "epoch": 0.6520647470484534, "grad_norm": 1.2094550132751465, "learning_rate": 9.898629351730077e-07, "loss": 0.0253, "step": 61030 }, { "epoch": 0.6521715903627331, "grad_norm": 4.50748872756958, "learning_rate": 9.898595689660738e-07, "loss": 0.0423, "step": 61040 }, { "epoch": 0.6522784336770127, "grad_norm": 0.27992820739746094, "learning_rate": 9.898562022060515e-07, "loss": 0.0604, "step": 61050 }, { "epoch": 0.6523852769912922, "grad_norm": 5.761508464813232, "learning_rate": 9.898528348929445e-07, "loss": 0.042, "step": 61060 }, { "epoch": 0.6524921203055719, "grad_norm": 3.8670787811279297, "learning_rate": 9.898494670267567e-07, "loss": 0.0217, "step": 61070 }, { "epoch": 0.6525989636198515, "grad_norm": 4.5011067390441895, "learning_rate": 9.898460986074918e-07, "loss": 0.1628, "step": 61080 }, { "epoch": 0.652705806934131, "grad_norm": 3.6485371589660645, "learning_rate": 9.898427296351537e-07, "loss": 0.0493, "step": 61090 }, { "epoch": 0.6528126502484107, "grad_norm": 0.8861950039863586, "learning_rate": 9.898393601097463e-07, "loss": 0.0231, "step": 61100 }, { "epoch": 0.6529194935626903, "grad_norm": 3.8026137351989746, "learning_rate": 9.898359900312732e-07, "loss": 0.0884, "step": 61110 }, { "epoch": 0.6530263368769699, "grad_norm": 3.441845417022705, "learning_rate": 9.898326193997384e-07, "loss": 0.097, "step": 61120 }, { "epoch": 0.6531331801912496, "grad_norm": 9.916088104248047, "learning_rate": 9.898292482151456e-07, "loss": 0.0464, "step": 61130 }, { "epoch": 0.6532400235055291, "grad_norm": 0.7083143591880798, "learning_rate": 9.898258764774984e-07, "loss": 0.057, "step": 61140 }, { "epoch": 0.6533468668198088, "grad_norm": 12.344877243041992, "learning_rate": 9.89822504186801e-07, "loss": 0.0891, "step": 61150 }, { "epoch": 0.6534537101340884, "grad_norm": 6.902560234069824, "learning_rate": 9.898191313430571e-07, "loss": 0.1277, "step": 61160 }, { "epoch": 0.6535605534483679, "grad_norm": 1.5041289329528809, "learning_rate": 9.898157579462702e-07, "loss": 0.0475, "step": 61170 }, { "epoch": 0.6536673967626476, "grad_norm": 3.2729413509368896, "learning_rate": 9.898123839964444e-07, "loss": 0.079, "step": 61180 }, { "epoch": 0.6537742400769272, "grad_norm": 6.706870079040527, "learning_rate": 9.898090094935837e-07, "loss": 0.0522, "step": 61190 }, { "epoch": 0.6538810833912068, "grad_norm": 2.033815622329712, "learning_rate": 9.898056344376913e-07, "loss": 0.0341, "step": 61200 }, { "epoch": 0.6539879267054864, "grad_norm": 0.6574592590332031, "learning_rate": 9.898022588287715e-07, "loss": 0.0218, "step": 61210 }, { "epoch": 0.654094770019766, "grad_norm": 6.5225396156311035, "learning_rate": 9.897988826668282e-07, "loss": 0.035, "step": 61220 }, { "epoch": 0.6542016133340456, "grad_norm": 39.79845428466797, "learning_rate": 9.897955059518649e-07, "loss": 0.1764, "step": 61230 }, { "epoch": 0.6543084566483253, "grad_norm": 2.229403257369995, "learning_rate": 9.897921286838854e-07, "loss": 0.0853, "step": 61240 }, { "epoch": 0.6544152999626048, "grad_norm": 2.93881893157959, "learning_rate": 9.897887508628935e-07, "loss": 0.036, "step": 61250 }, { "epoch": 0.6545221432768844, "grad_norm": 21.74176597595215, "learning_rate": 9.897853724888934e-07, "loss": 0.0566, "step": 61260 }, { "epoch": 0.6546289865911641, "grad_norm": 2.9977402687072754, "learning_rate": 9.897819935618885e-07, "loss": 0.0663, "step": 61270 }, { "epoch": 0.6547358299054437, "grad_norm": 1.121813178062439, "learning_rate": 9.897786140818826e-07, "loss": 0.1143, "step": 61280 }, { "epoch": 0.6548426732197232, "grad_norm": 2.825026035308838, "learning_rate": 9.897752340488801e-07, "loss": 0.0255, "step": 61290 }, { "epoch": 0.6549495165340029, "grad_norm": 5.587005615234375, "learning_rate": 9.897718534628841e-07, "loss": 0.0388, "step": 61300 }, { "epoch": 0.6550563598482825, "grad_norm": 8.992952346801758, "learning_rate": 9.897684723238989e-07, "loss": 0.0429, "step": 61310 }, { "epoch": 0.6551632031625622, "grad_norm": 1.3208478689193726, "learning_rate": 9.89765090631928e-07, "loss": 0.0611, "step": 61320 }, { "epoch": 0.6552700464768417, "grad_norm": 1.7546741962432861, "learning_rate": 9.897617083869753e-07, "loss": 0.0588, "step": 61330 }, { "epoch": 0.6553768897911213, "grad_norm": 9.201828956604004, "learning_rate": 9.897583255890447e-07, "loss": 0.1019, "step": 61340 }, { "epoch": 0.655483733105401, "grad_norm": 1.0693464279174805, "learning_rate": 9.8975494223814e-07, "loss": 0.0913, "step": 61350 }, { "epoch": 0.6555905764196805, "grad_norm": 3.8641791343688965, "learning_rate": 9.897515583342651e-07, "loss": 0.031, "step": 61360 }, { "epoch": 0.6556974197339601, "grad_norm": 0.48243167996406555, "learning_rate": 9.897481738774236e-07, "loss": 0.0159, "step": 61370 }, { "epoch": 0.6558042630482398, "grad_norm": 1.8017985820770264, "learning_rate": 9.897447888676193e-07, "loss": 0.0357, "step": 61380 }, { "epoch": 0.6559111063625194, "grad_norm": 0.2966797947883606, "learning_rate": 9.897414033048567e-07, "loss": 0.0288, "step": 61390 }, { "epoch": 0.6560179496767989, "grad_norm": 6.994764804840088, "learning_rate": 9.897380171891386e-07, "loss": 0.1432, "step": 61400 }, { "epoch": 0.6561247929910786, "grad_norm": 4.515913486480713, "learning_rate": 9.897346305204694e-07, "loss": 0.0561, "step": 61410 }, { "epoch": 0.6562316363053582, "grad_norm": 2.901076555252075, "learning_rate": 9.897312432988529e-07, "loss": 0.0333, "step": 61420 }, { "epoch": 0.6563384796196378, "grad_norm": 0.05620325356721878, "learning_rate": 9.89727855524293e-07, "loss": 0.0528, "step": 61430 }, { "epoch": 0.6564453229339174, "grad_norm": 0.06484230607748032, "learning_rate": 9.897244671967932e-07, "loss": 0.0916, "step": 61440 }, { "epoch": 0.656552166248197, "grad_norm": 19.549482345581055, "learning_rate": 9.897210783163577e-07, "loss": 0.0629, "step": 61450 }, { "epoch": 0.6566590095624766, "grad_norm": 0.03659376502037048, "learning_rate": 9.8971768888299e-07, "loss": 0.0692, "step": 61460 }, { "epoch": 0.6567658528767563, "grad_norm": 5.451208114624023, "learning_rate": 9.89714298896694e-07, "loss": 0.0571, "step": 61470 }, { "epoch": 0.6568726961910358, "grad_norm": 3.662372589111328, "learning_rate": 9.897109083574737e-07, "loss": 0.0443, "step": 61480 }, { "epoch": 0.6569795395053154, "grad_norm": 5.672257900238037, "learning_rate": 9.89707517265333e-07, "loss": 0.0846, "step": 61490 }, { "epoch": 0.6570863828195951, "grad_norm": 0.7506486177444458, "learning_rate": 9.89704125620275e-07, "loss": 0.0477, "step": 61500 }, { "epoch": 0.6571932261338747, "grad_norm": 0.7276674509048462, "learning_rate": 9.897007334223046e-07, "loss": 0.0363, "step": 61510 }, { "epoch": 0.6573000694481543, "grad_norm": 3.3125622272491455, "learning_rate": 9.896973406714251e-07, "loss": 0.0845, "step": 61520 }, { "epoch": 0.6574069127624339, "grad_norm": 4.705358028411865, "learning_rate": 9.896939473676402e-07, "loss": 0.0282, "step": 61530 }, { "epoch": 0.6575137560767135, "grad_norm": 7.996755123138428, "learning_rate": 9.89690553510954e-07, "loss": 0.0546, "step": 61540 }, { "epoch": 0.6576205993909932, "grad_norm": 6.955320835113525, "learning_rate": 9.896871591013699e-07, "loss": 0.08, "step": 61550 }, { "epoch": 0.6577274427052727, "grad_norm": 3.721801996231079, "learning_rate": 9.896837641388922e-07, "loss": 0.0463, "step": 61560 }, { "epoch": 0.6578342860195523, "grad_norm": 0.36050161719322205, "learning_rate": 9.896803686235246e-07, "loss": 0.0298, "step": 61570 }, { "epoch": 0.657941129333832, "grad_norm": 7.7151665687561035, "learning_rate": 9.896769725552709e-07, "loss": 0.0997, "step": 61580 }, { "epoch": 0.6580479726481115, "grad_norm": 4.547688007354736, "learning_rate": 9.89673575934135e-07, "loss": 0.0372, "step": 61590 }, { "epoch": 0.6581548159623911, "grad_norm": 4.6251630783081055, "learning_rate": 9.896701787601206e-07, "loss": 0.0482, "step": 61600 }, { "epoch": 0.6582616592766708, "grad_norm": 4.855052947998047, "learning_rate": 9.896667810332316e-07, "loss": 0.068, "step": 61610 }, { "epoch": 0.6583685025909504, "grad_norm": 7.174849987030029, "learning_rate": 9.89663382753472e-07, "loss": 0.0652, "step": 61620 }, { "epoch": 0.6584753459052299, "grad_norm": 0.5538113117218018, "learning_rate": 9.896599839208453e-07, "loss": 0.0472, "step": 61630 }, { "epoch": 0.6585821892195096, "grad_norm": 1.600481390953064, "learning_rate": 9.896565845353556e-07, "loss": 0.0354, "step": 61640 }, { "epoch": 0.6586890325337892, "grad_norm": 8.92296314239502, "learning_rate": 9.896531845970066e-07, "loss": 0.0468, "step": 61650 }, { "epoch": 0.6587958758480688, "grad_norm": 12.432973861694336, "learning_rate": 9.896497841058022e-07, "loss": 0.0987, "step": 61660 }, { "epoch": 0.6589027191623484, "grad_norm": 0.1020006313920021, "learning_rate": 9.896463830617464e-07, "loss": 0.078, "step": 61670 }, { "epoch": 0.659009562476628, "grad_norm": 2.4357712268829346, "learning_rate": 9.896429814648428e-07, "loss": 0.0513, "step": 61680 }, { "epoch": 0.6591164057909077, "grad_norm": 5.910037517547607, "learning_rate": 9.896395793150953e-07, "loss": 0.0495, "step": 61690 }, { "epoch": 0.6592232491051873, "grad_norm": 2.074585437774658, "learning_rate": 9.896361766125077e-07, "loss": 0.0324, "step": 61700 }, { "epoch": 0.6593300924194668, "grad_norm": 3.38710880279541, "learning_rate": 9.896327733570839e-07, "loss": 0.0709, "step": 61710 }, { "epoch": 0.6594369357337465, "grad_norm": 0.02535540610551834, "learning_rate": 9.896293695488277e-07, "loss": 0.0956, "step": 61720 }, { "epoch": 0.6595437790480261, "grad_norm": 0.07822804898023605, "learning_rate": 9.896259651877432e-07, "loss": 0.0425, "step": 61730 }, { "epoch": 0.6596506223623056, "grad_norm": 6.033788681030273, "learning_rate": 9.89622560273834e-07, "loss": 0.0629, "step": 61740 }, { "epoch": 0.6597574656765853, "grad_norm": 0.5509985089302063, "learning_rate": 9.896191548071039e-07, "loss": 0.0514, "step": 61750 }, { "epoch": 0.6598643089908649, "grad_norm": 3.8810644149780273, "learning_rate": 9.896157487875568e-07, "loss": 0.0867, "step": 61760 }, { "epoch": 0.6599711523051445, "grad_norm": 7.1284284591674805, "learning_rate": 9.896123422151965e-07, "loss": 0.0466, "step": 61770 }, { "epoch": 0.6600779956194242, "grad_norm": 14.503457069396973, "learning_rate": 9.89608935090027e-07, "loss": 0.066, "step": 61780 }, { "epoch": 0.6601848389337037, "grad_norm": 6.289482593536377, "learning_rate": 9.896055274120522e-07, "loss": 0.0975, "step": 61790 }, { "epoch": 0.6602916822479833, "grad_norm": 1.9402146339416504, "learning_rate": 9.896021191812754e-07, "loss": 0.0379, "step": 61800 }, { "epoch": 0.660398525562263, "grad_norm": 12.054044723510742, "learning_rate": 9.895987103977012e-07, "loss": 0.1373, "step": 61810 }, { "epoch": 0.6605053688765425, "grad_norm": 0.5171782374382019, "learning_rate": 9.895953010613328e-07, "loss": 0.1032, "step": 61820 }, { "epoch": 0.6606122121908221, "grad_norm": 8.581141471862793, "learning_rate": 9.895918911721748e-07, "loss": 0.0727, "step": 61830 }, { "epoch": 0.6607190555051018, "grad_norm": 6.239411354064941, "learning_rate": 9.895884807302302e-07, "loss": 0.0442, "step": 61840 }, { "epoch": 0.6608258988193814, "grad_norm": 6.513180732727051, "learning_rate": 9.895850697355034e-07, "loss": 0.045, "step": 61850 }, { "epoch": 0.6609327421336609, "grad_norm": 8.440550804138184, "learning_rate": 9.895816581879983e-07, "loss": 0.1053, "step": 61860 }, { "epoch": 0.6610395854479406, "grad_norm": 4.756428241729736, "learning_rate": 9.895782460877184e-07, "loss": 0.0376, "step": 61870 }, { "epoch": 0.6611464287622202, "grad_norm": 3.699054479598999, "learning_rate": 9.895748334346676e-07, "loss": 0.0206, "step": 61880 }, { "epoch": 0.6612532720764999, "grad_norm": 0.0654861330986023, "learning_rate": 9.8957142022885e-07, "loss": 0.047, "step": 61890 }, { "epoch": 0.6613601153907794, "grad_norm": 4.073225975036621, "learning_rate": 9.895680064702693e-07, "loss": 0.1182, "step": 61900 }, { "epoch": 0.661466958705059, "grad_norm": 2.508584499359131, "learning_rate": 9.895645921589293e-07, "loss": 0.0603, "step": 61910 }, { "epoch": 0.6615738020193387, "grad_norm": 0.4440830945968628, "learning_rate": 9.89561177294834e-07, "loss": 0.0188, "step": 61920 }, { "epoch": 0.6616806453336183, "grad_norm": 2.5146710872650146, "learning_rate": 9.895577618779871e-07, "loss": 0.0738, "step": 61930 }, { "epoch": 0.6617874886478978, "grad_norm": 5.625848770141602, "learning_rate": 9.895543459083925e-07, "loss": 0.0492, "step": 61940 }, { "epoch": 0.6618943319621775, "grad_norm": 1.7294000387191772, "learning_rate": 9.895509293860543e-07, "loss": 0.1082, "step": 61950 }, { "epoch": 0.6620011752764571, "grad_norm": 9.841338157653809, "learning_rate": 9.89547512310976e-07, "loss": 0.0662, "step": 61960 }, { "epoch": 0.6621080185907366, "grad_norm": 0.5279327630996704, "learning_rate": 9.895440946831616e-07, "loss": 0.0322, "step": 61970 }, { "epoch": 0.6622148619050163, "grad_norm": 4.525262832641602, "learning_rate": 9.895406765026151e-07, "loss": 0.0707, "step": 61980 }, { "epoch": 0.6623217052192959, "grad_norm": 3.387377977371216, "learning_rate": 9.8953725776934e-07, "loss": 0.0401, "step": 61990 }, { "epoch": 0.6624285485335755, "grad_norm": 6.06552791595459, "learning_rate": 9.895338384833407e-07, "loss": 0.0212, "step": 62000 }, { "epoch": 0.6625353918478551, "grad_norm": 3.0620131492614746, "learning_rate": 9.895304186446207e-07, "loss": 0.058, "step": 62010 }, { "epoch": 0.6626422351621347, "grad_norm": 5.121240615844727, "learning_rate": 9.895269982531838e-07, "loss": 0.0593, "step": 62020 }, { "epoch": 0.6627490784764143, "grad_norm": 5.991496562957764, "learning_rate": 9.89523577309034e-07, "loss": 0.0653, "step": 62030 }, { "epoch": 0.662855921790694, "grad_norm": 15.071014404296875, "learning_rate": 9.895201558121752e-07, "loss": 0.0187, "step": 62040 }, { "epoch": 0.6629627651049735, "grad_norm": 1.7294387817382812, "learning_rate": 9.89516733762611e-07, "loss": 0.0556, "step": 62050 }, { "epoch": 0.6630696084192532, "grad_norm": 13.39449691772461, "learning_rate": 9.895133111603456e-07, "loss": 0.085, "step": 62060 }, { "epoch": 0.6631764517335328, "grad_norm": 5.305702209472656, "learning_rate": 9.895098880053826e-07, "loss": 0.0502, "step": 62070 }, { "epoch": 0.6632832950478124, "grad_norm": 5.9640421867370605, "learning_rate": 9.89506464297726e-07, "loss": 0.0919, "step": 62080 }, { "epoch": 0.663390138362092, "grad_norm": 6.322524547576904, "learning_rate": 9.895030400373798e-07, "loss": 0.0705, "step": 62090 }, { "epoch": 0.6634969816763716, "grad_norm": 0.14129537343978882, "learning_rate": 9.894996152243477e-07, "loss": 0.1076, "step": 62100 }, { "epoch": 0.6636038249906512, "grad_norm": 0.17098386585712433, "learning_rate": 9.894961898586335e-07, "loss": 0.0605, "step": 62110 }, { "epoch": 0.6637106683049309, "grad_norm": 32.247703552246094, "learning_rate": 9.894927639402412e-07, "loss": 0.0514, "step": 62120 }, { "epoch": 0.6638175116192104, "grad_norm": 3.9045052528381348, "learning_rate": 9.894893374691747e-07, "loss": 0.0559, "step": 62130 }, { "epoch": 0.66392435493349, "grad_norm": 0.08976325392723083, "learning_rate": 9.894859104454376e-07, "loss": 0.0339, "step": 62140 }, { "epoch": 0.6640311982477697, "grad_norm": 0.18938669562339783, "learning_rate": 9.894824828690342e-07, "loss": 0.0431, "step": 62150 }, { "epoch": 0.6641380415620493, "grad_norm": 0.11739135533571243, "learning_rate": 9.89479054739968e-07, "loss": 0.031, "step": 62160 }, { "epoch": 0.6642448848763288, "grad_norm": 9.364602088928223, "learning_rate": 9.894756260582432e-07, "loss": 0.0579, "step": 62170 }, { "epoch": 0.6643517281906085, "grad_norm": 6.209105491638184, "learning_rate": 9.894721968238634e-07, "loss": 0.071, "step": 62180 }, { "epoch": 0.6644585715048881, "grad_norm": 0.289619117975235, "learning_rate": 9.894687670368323e-07, "loss": 0.1503, "step": 62190 }, { "epoch": 0.6645654148191676, "grad_norm": 0.3520185351371765, "learning_rate": 9.894653366971541e-07, "loss": 0.0493, "step": 62200 }, { "epoch": 0.6646722581334473, "grad_norm": 1.6657379865646362, "learning_rate": 9.894619058048327e-07, "loss": 0.0696, "step": 62210 }, { "epoch": 0.6647791014477269, "grad_norm": 10.62143325805664, "learning_rate": 9.89458474359872e-07, "loss": 0.058, "step": 62220 }, { "epoch": 0.6648859447620065, "grad_norm": 0.8574704527854919, "learning_rate": 9.894550423622753e-07, "loss": 0.0487, "step": 62230 }, { "epoch": 0.6649927880762861, "grad_norm": 0.9918674230575562, "learning_rate": 9.894516098120474e-07, "loss": 0.0416, "step": 62240 }, { "epoch": 0.6650996313905657, "grad_norm": 0.9657748937606812, "learning_rate": 9.894481767091915e-07, "loss": 0.0448, "step": 62250 }, { "epoch": 0.6652064747048454, "grad_norm": 0.018811693415045738, "learning_rate": 9.894447430537116e-07, "loss": 0.056, "step": 62260 }, { "epoch": 0.665313318019125, "grad_norm": 5.097419738769531, "learning_rate": 9.894413088456118e-07, "loss": 0.0443, "step": 62270 }, { "epoch": 0.6654201613334045, "grad_norm": 3.932457208633423, "learning_rate": 9.894378740848957e-07, "loss": 0.0913, "step": 62280 }, { "epoch": 0.6655270046476842, "grad_norm": 3.112285614013672, "learning_rate": 9.894344387715674e-07, "loss": 0.0735, "step": 62290 }, { "epoch": 0.6656338479619638, "grad_norm": 8.662628173828125, "learning_rate": 9.894310029056305e-07, "loss": 0.2287, "step": 62300 }, { "epoch": 0.6657406912762434, "grad_norm": 6.0687384605407715, "learning_rate": 9.89427566487089e-07, "loss": 0.0445, "step": 62310 }, { "epoch": 0.665847534590523, "grad_norm": 2.8309311866760254, "learning_rate": 9.89424129515947e-07, "loss": 0.0961, "step": 62320 }, { "epoch": 0.6659543779048026, "grad_norm": 3.291903495788574, "learning_rate": 9.894206919922082e-07, "loss": 0.0772, "step": 62330 }, { "epoch": 0.6660612212190822, "grad_norm": 0.2358669489622116, "learning_rate": 9.894172539158766e-07, "loss": 0.0451, "step": 62340 }, { "epoch": 0.6661680645333619, "grad_norm": 13.23020076751709, "learning_rate": 9.89413815286956e-07, "loss": 0.0535, "step": 62350 }, { "epoch": 0.6662749078476414, "grad_norm": 0.6598325371742249, "learning_rate": 9.894103761054503e-07, "loss": 0.024, "step": 62360 }, { "epoch": 0.666381751161921, "grad_norm": 2.2373170852661133, "learning_rate": 9.894069363713631e-07, "loss": 0.079, "step": 62370 }, { "epoch": 0.6664885944762007, "grad_norm": 0.04786563292145729, "learning_rate": 9.894034960846986e-07, "loss": 0.0587, "step": 62380 }, { "epoch": 0.6665954377904802, "grad_norm": 6.05449914932251, "learning_rate": 9.894000552454607e-07, "loss": 0.0807, "step": 62390 }, { "epoch": 0.6667022811047598, "grad_norm": 20.816240310668945, "learning_rate": 9.89396613853653e-07, "loss": 0.0376, "step": 62400 }, { "epoch": 0.6668091244190395, "grad_norm": 4.033621311187744, "learning_rate": 9.8939317190928e-07, "loss": 0.0481, "step": 62410 }, { "epoch": 0.6669159677333191, "grad_norm": 0.027393853291869164, "learning_rate": 9.893897294123449e-07, "loss": 0.107, "step": 62420 }, { "epoch": 0.6670228110475988, "grad_norm": 0.6246309876441956, "learning_rate": 9.89386286362852e-07, "loss": 0.0674, "step": 62430 }, { "epoch": 0.6671296543618783, "grad_norm": 2.3090617656707764, "learning_rate": 9.893828427608048e-07, "loss": 0.1022, "step": 62440 }, { "epoch": 0.6672364976761579, "grad_norm": 6.0273613929748535, "learning_rate": 9.893793986062075e-07, "loss": 0.0486, "step": 62450 }, { "epoch": 0.6673433409904376, "grad_norm": 7.962324619293213, "learning_rate": 9.89375953899064e-07, "loss": 0.0578, "step": 62460 }, { "epoch": 0.6674501843047171, "grad_norm": 4.054715156555176, "learning_rate": 9.893725086393782e-07, "loss": 0.0789, "step": 62470 }, { "epoch": 0.6675570276189967, "grad_norm": 6.794997215270996, "learning_rate": 9.893690628271537e-07, "loss": 0.112, "step": 62480 }, { "epoch": 0.6676638709332764, "grad_norm": 0.06515160948038101, "learning_rate": 9.893656164623947e-07, "loss": 0.0224, "step": 62490 }, { "epoch": 0.667770714247556, "grad_norm": 1.823323369026184, "learning_rate": 9.893621695451052e-07, "loss": 0.0263, "step": 62500 }, { "epoch": 0.6678775575618355, "grad_norm": 4.042397975921631, "learning_rate": 9.893587220752885e-07, "loss": 0.0261, "step": 62510 }, { "epoch": 0.6679844008761152, "grad_norm": 5.563321113586426, "learning_rate": 9.89355274052949e-07, "loss": 0.0756, "step": 62520 }, { "epoch": 0.6680912441903948, "grad_norm": 4.894139766693115, "learning_rate": 9.893518254780907e-07, "loss": 0.0495, "step": 62530 }, { "epoch": 0.6681980875046744, "grad_norm": 0.04392862319946289, "learning_rate": 9.893483763507169e-07, "loss": 0.0812, "step": 62540 }, { "epoch": 0.668304930818954, "grad_norm": 4.119204521179199, "learning_rate": 9.89344926670832e-07, "loss": 0.0649, "step": 62550 }, { "epoch": 0.6684117741332336, "grad_norm": 1.3737775087356567, "learning_rate": 9.893414764384398e-07, "loss": 0.015, "step": 62560 }, { "epoch": 0.6685186174475132, "grad_norm": 2.53204083442688, "learning_rate": 9.89338025653544e-07, "loss": 0.0437, "step": 62570 }, { "epoch": 0.6686254607617929, "grad_norm": 9.168816566467285, "learning_rate": 9.893345743161487e-07, "loss": 0.0352, "step": 62580 }, { "epoch": 0.6687323040760724, "grad_norm": 8.702327728271484, "learning_rate": 9.89331122426258e-07, "loss": 0.0654, "step": 62590 }, { "epoch": 0.668839147390352, "grad_norm": 5.282926559448242, "learning_rate": 9.89327669983875e-07, "loss": 0.0801, "step": 62600 }, { "epoch": 0.6689459907046317, "grad_norm": 7.035800457000732, "learning_rate": 9.893242169890045e-07, "loss": 0.0316, "step": 62610 }, { "epoch": 0.6690528340189112, "grad_norm": 13.147891998291016, "learning_rate": 9.8932076344165e-07, "loss": 0.0678, "step": 62620 }, { "epoch": 0.6691596773331909, "grad_norm": 2.9539852142333984, "learning_rate": 9.893173093418153e-07, "loss": 0.0989, "step": 62630 }, { "epoch": 0.6692665206474705, "grad_norm": 1.6240012645721436, "learning_rate": 9.893138546895046e-07, "loss": 0.0491, "step": 62640 }, { "epoch": 0.6693733639617501, "grad_norm": 2.2466325759887695, "learning_rate": 9.893103994847215e-07, "loss": 0.0857, "step": 62650 }, { "epoch": 0.6694802072760297, "grad_norm": 6.601909637451172, "learning_rate": 9.8930694372747e-07, "loss": 0.0588, "step": 62660 }, { "epoch": 0.6695870505903093, "grad_norm": 0.0225654486566782, "learning_rate": 9.893034874177542e-07, "loss": 0.0365, "step": 62670 }, { "epoch": 0.6696938939045889, "grad_norm": 5.386174201965332, "learning_rate": 9.893000305555777e-07, "loss": 0.0749, "step": 62680 }, { "epoch": 0.6698007372188686, "grad_norm": 2.6880955696105957, "learning_rate": 9.892965731409445e-07, "loss": 0.0374, "step": 62690 }, { "epoch": 0.6699075805331481, "grad_norm": 1.4718601703643799, "learning_rate": 9.892931151738587e-07, "loss": 0.0477, "step": 62700 }, { "epoch": 0.6700144238474277, "grad_norm": 5.7790679931640625, "learning_rate": 9.892896566543238e-07, "loss": 0.0436, "step": 62710 }, { "epoch": 0.6701212671617074, "grad_norm": 1.7095609903335571, "learning_rate": 9.89286197582344e-07, "loss": 0.0258, "step": 62720 }, { "epoch": 0.670228110475987, "grad_norm": 3.7191858291625977, "learning_rate": 9.892827379579235e-07, "loss": 0.0859, "step": 62730 }, { "epoch": 0.6703349537902665, "grad_norm": 0.7101860642433167, "learning_rate": 9.892792777810655e-07, "loss": 0.0577, "step": 62740 }, { "epoch": 0.6704417971045462, "grad_norm": 5.011669158935547, "learning_rate": 9.892758170517743e-07, "loss": 0.0407, "step": 62750 }, { "epoch": 0.6705486404188258, "grad_norm": 4.684459686279297, "learning_rate": 9.89272355770054e-07, "loss": 0.0542, "step": 62760 }, { "epoch": 0.6706554837331054, "grad_norm": 0.0892757847905159, "learning_rate": 9.89268893935908e-07, "loss": 0.0711, "step": 62770 }, { "epoch": 0.670762327047385, "grad_norm": 4.65041971206665, "learning_rate": 9.892654315493407e-07, "loss": 0.0635, "step": 62780 }, { "epoch": 0.6708691703616646, "grad_norm": 0.1560571938753128, "learning_rate": 9.892619686103557e-07, "loss": 0.0364, "step": 62790 }, { "epoch": 0.6709760136759443, "grad_norm": 4.906403541564941, "learning_rate": 9.89258505118957e-07, "loss": 0.0258, "step": 62800 }, { "epoch": 0.6710828569902239, "grad_norm": 0.3093414604663849, "learning_rate": 9.892550410751486e-07, "loss": 0.069, "step": 62810 }, { "epoch": 0.6711897003045034, "grad_norm": 1.3447855710983276, "learning_rate": 9.892515764789343e-07, "loss": 0.0263, "step": 62820 }, { "epoch": 0.6712965436187831, "grad_norm": 8.514033317565918, "learning_rate": 9.892481113303182e-07, "loss": 0.0498, "step": 62830 }, { "epoch": 0.6714033869330627, "grad_norm": 3.396009683609009, "learning_rate": 9.892446456293038e-07, "loss": 0.0603, "step": 62840 }, { "epoch": 0.6715102302473422, "grad_norm": 0.18353015184402466, "learning_rate": 9.892411793758954e-07, "loss": 0.0315, "step": 62850 }, { "epoch": 0.6716170735616219, "grad_norm": 16.395124435424805, "learning_rate": 9.892377125700968e-07, "loss": 0.0435, "step": 62860 }, { "epoch": 0.6717239168759015, "grad_norm": 14.234663963317871, "learning_rate": 9.892342452119118e-07, "loss": 0.0321, "step": 62870 }, { "epoch": 0.6718307601901811, "grad_norm": 0.49969518184661865, "learning_rate": 9.892307773013444e-07, "loss": 0.0652, "step": 62880 }, { "epoch": 0.6719376035044607, "grad_norm": 0.01891300082206726, "learning_rate": 9.892273088383987e-07, "loss": 0.0534, "step": 62890 }, { "epoch": 0.6720444468187403, "grad_norm": 0.06209995597600937, "learning_rate": 9.892238398230782e-07, "loss": 0.0287, "step": 62900 }, { "epoch": 0.6721512901330199, "grad_norm": 4.76632833480835, "learning_rate": 9.892203702553872e-07, "loss": 0.0487, "step": 62910 }, { "epoch": 0.6722581334472996, "grad_norm": 3.2756826877593994, "learning_rate": 9.892169001353295e-07, "loss": 0.091, "step": 62920 }, { "epoch": 0.6723649767615791, "grad_norm": 5.84531307220459, "learning_rate": 9.89213429462909e-07, "loss": 0.0563, "step": 62930 }, { "epoch": 0.6724718200758587, "grad_norm": 5.194230556488037, "learning_rate": 9.892099582381297e-07, "loss": 0.0295, "step": 62940 }, { "epoch": 0.6725786633901384, "grad_norm": 0.9345850348472595, "learning_rate": 9.892064864609952e-07, "loss": 0.0607, "step": 62950 }, { "epoch": 0.672685506704418, "grad_norm": 9.5568208694458, "learning_rate": 9.892030141315098e-07, "loss": 0.0872, "step": 62960 }, { "epoch": 0.6727923500186975, "grad_norm": 7.056663513183594, "learning_rate": 9.891995412496773e-07, "loss": 0.0355, "step": 62970 }, { "epoch": 0.6728991933329772, "grad_norm": 2.6130635738372803, "learning_rate": 9.891960678155015e-07, "loss": 0.0904, "step": 62980 }, { "epoch": 0.6730060366472568, "grad_norm": 2.2852632999420166, "learning_rate": 9.891925938289864e-07, "loss": 0.0477, "step": 62990 }, { "epoch": 0.6731128799615365, "grad_norm": 1.6971690654754639, "learning_rate": 9.891891192901361e-07, "loss": 0.0684, "step": 63000 }, { "epoch": 0.673219723275816, "grad_norm": 4.035637855529785, "learning_rate": 9.891856441989543e-07, "loss": 0.0533, "step": 63010 }, { "epoch": 0.6733265665900956, "grad_norm": 10.338704109191895, "learning_rate": 9.89182168555445e-07, "loss": 0.081, "step": 63020 }, { "epoch": 0.6734334099043753, "grad_norm": 9.300134658813477, "learning_rate": 9.891786923596118e-07, "loss": 0.0247, "step": 63030 }, { "epoch": 0.6735402532186548, "grad_norm": 4.548288345336914, "learning_rate": 9.891752156114593e-07, "loss": 0.0784, "step": 63040 }, { "epoch": 0.6736470965329344, "grad_norm": 2.2919516563415527, "learning_rate": 9.89171738310991e-07, "loss": 0.026, "step": 63050 }, { "epoch": 0.6737539398472141, "grad_norm": 4.291372776031494, "learning_rate": 9.891682604582108e-07, "loss": 0.0494, "step": 63060 }, { "epoch": 0.6738607831614937, "grad_norm": 4.774412631988525, "learning_rate": 9.891647820531229e-07, "loss": 0.0344, "step": 63070 }, { "epoch": 0.6739676264757732, "grad_norm": 1.056469202041626, "learning_rate": 9.89161303095731e-07, "loss": 0.0452, "step": 63080 }, { "epoch": 0.6740744697900529, "grad_norm": 2.299889087677002, "learning_rate": 9.891578235860388e-07, "loss": 0.0784, "step": 63090 }, { "epoch": 0.6741813131043325, "grad_norm": 2.4909567832946777, "learning_rate": 9.891543435240507e-07, "loss": 0.0227, "step": 63100 }, { "epoch": 0.6742881564186121, "grad_norm": 8.031089782714844, "learning_rate": 9.891508629097704e-07, "loss": 0.0486, "step": 63110 }, { "epoch": 0.6743949997328917, "grad_norm": 0.2646557092666626, "learning_rate": 9.89147381743202e-07, "loss": 0.06, "step": 63120 }, { "epoch": 0.6745018430471713, "grad_norm": Infinity, "learning_rate": 9.89143900024349e-07, "loss": 0.1213, "step": 63130 }, { "epoch": 0.6746086863614509, "grad_norm": 9.620062828063965, "learning_rate": 9.89140417753216e-07, "loss": 0.0865, "step": 63140 }, { "epoch": 0.6747155296757306, "grad_norm": 3.9879162311553955, "learning_rate": 9.891369349298062e-07, "loss": 0.0158, "step": 63150 }, { "epoch": 0.6748223729900101, "grad_norm": 0.16315875947475433, "learning_rate": 9.89133451554124e-07, "loss": 0.0542, "step": 63160 }, { "epoch": 0.6749292163042898, "grad_norm": 0.12404423207044601, "learning_rate": 9.89129967626173e-07, "loss": 0.0178, "step": 63170 }, { "epoch": 0.6750360596185694, "grad_norm": 4.9017252922058105, "learning_rate": 9.891264831459578e-07, "loss": 0.0925, "step": 63180 }, { "epoch": 0.675142902932849, "grad_norm": 0.09149085730314255, "learning_rate": 9.891229981134816e-07, "loss": 0.0506, "step": 63190 }, { "epoch": 0.6752497462471286, "grad_norm": 13.233097076416016, "learning_rate": 9.891195125287487e-07, "loss": 0.0987, "step": 63200 }, { "epoch": 0.6753565895614082, "grad_norm": 2.576957941055298, "learning_rate": 9.89116026391763e-07, "loss": 0.0561, "step": 63210 }, { "epoch": 0.6754634328756878, "grad_norm": 4.434541702270508, "learning_rate": 9.891125397025283e-07, "loss": 0.0239, "step": 63220 }, { "epoch": 0.6755702761899675, "grad_norm": 0.05781955644488335, "learning_rate": 9.891090524610485e-07, "loss": 0.019, "step": 63230 }, { "epoch": 0.675677119504247, "grad_norm": 0.7529359459877014, "learning_rate": 9.89105564667328e-07, "loss": 0.0369, "step": 63240 }, { "epoch": 0.6757839628185266, "grad_norm": 3.423269033432007, "learning_rate": 9.891020763213701e-07, "loss": 0.0206, "step": 63250 }, { "epoch": 0.6758908061328063, "grad_norm": 2.036926746368408, "learning_rate": 9.890985874231792e-07, "loss": 0.0883, "step": 63260 }, { "epoch": 0.6759976494470858, "grad_norm": 0.11579418182373047, "learning_rate": 9.89095097972759e-07, "loss": 0.0862, "step": 63270 }, { "epoch": 0.6761044927613654, "grad_norm": 4.13153076171875, "learning_rate": 9.890916079701137e-07, "loss": 0.0384, "step": 63280 }, { "epoch": 0.6762113360756451, "grad_norm": 0.9103259444236755, "learning_rate": 9.890881174152469e-07, "loss": 0.026, "step": 63290 }, { "epoch": 0.6763181793899247, "grad_norm": 0.053202345967292786, "learning_rate": 9.890846263081628e-07, "loss": 0.031, "step": 63300 }, { "epoch": 0.6764250227042042, "grad_norm": 1.8252339363098145, "learning_rate": 9.89081134648865e-07, "loss": 0.066, "step": 63310 }, { "epoch": 0.6765318660184839, "grad_norm": 0.06292259693145752, "learning_rate": 9.890776424373578e-07, "loss": 0.0524, "step": 63320 }, { "epoch": 0.6766387093327635, "grad_norm": 7.639394283294678, "learning_rate": 9.890741496736452e-07, "loss": 0.1507, "step": 63330 }, { "epoch": 0.6767455526470431, "grad_norm": 3.652768611907959, "learning_rate": 9.890706563577309e-07, "loss": 0.0337, "step": 63340 }, { "epoch": 0.6768523959613227, "grad_norm": 3.583911180496216, "learning_rate": 9.890671624896188e-07, "loss": 0.0745, "step": 63350 }, { "epoch": 0.6769592392756023, "grad_norm": 5.50698709487915, "learning_rate": 9.89063668069313e-07, "loss": 0.0652, "step": 63360 }, { "epoch": 0.677066082589882, "grad_norm": 5.808091163635254, "learning_rate": 9.890601730968176e-07, "loss": 0.0547, "step": 63370 }, { "epoch": 0.6771729259041616, "grad_norm": 1.8485382795333862, "learning_rate": 9.89056677572136e-07, "loss": 0.0883, "step": 63380 }, { "epoch": 0.6772797692184411, "grad_norm": 6.287992000579834, "learning_rate": 9.890531814952727e-07, "loss": 0.0587, "step": 63390 }, { "epoch": 0.6773866125327208, "grad_norm": 0.3310726583003998, "learning_rate": 9.890496848662316e-07, "loss": 0.044, "step": 63400 }, { "epoch": 0.6774934558470004, "grad_norm": 14.56425952911377, "learning_rate": 9.890461876850162e-07, "loss": 0.0571, "step": 63410 }, { "epoch": 0.67760029916128, "grad_norm": 5.841499328613281, "learning_rate": 9.89042689951631e-07, "loss": 0.1505, "step": 63420 }, { "epoch": 0.6777071424755596, "grad_norm": 19.057567596435547, "learning_rate": 9.890391916660795e-07, "loss": 0.0545, "step": 63430 }, { "epoch": 0.6778139857898392, "grad_norm": 0.35110533237457275, "learning_rate": 9.89035692828366e-07, "loss": 0.0367, "step": 63440 }, { "epoch": 0.6779208291041188, "grad_norm": 16.89459228515625, "learning_rate": 9.89032193438494e-07, "loss": 0.0957, "step": 63450 }, { "epoch": 0.6780276724183985, "grad_norm": 21.507953643798828, "learning_rate": 9.890286934964682e-07, "loss": 0.1029, "step": 63460 }, { "epoch": 0.678134515732678, "grad_norm": 6.260114669799805, "learning_rate": 9.890251930022918e-07, "loss": 0.0639, "step": 63470 }, { "epoch": 0.6782413590469576, "grad_norm": 1.3686294555664062, "learning_rate": 9.890216919559692e-07, "loss": 0.0919, "step": 63480 }, { "epoch": 0.6783482023612373, "grad_norm": 9.112306594848633, "learning_rate": 9.890181903575041e-07, "loss": 0.0341, "step": 63490 }, { "epoch": 0.6784550456755168, "grad_norm": 8.914026260375977, "learning_rate": 9.890146882069006e-07, "loss": 0.0661, "step": 63500 }, { "epoch": 0.6785618889897964, "grad_norm": 7.583337783813477, "learning_rate": 9.890111855041625e-07, "loss": 0.1061, "step": 63510 }, { "epoch": 0.6786687323040761, "grad_norm": 0.9961904883384705, "learning_rate": 9.89007682249294e-07, "loss": 0.0122, "step": 63520 }, { "epoch": 0.6787755756183557, "grad_norm": 4.64343786239624, "learning_rate": 9.890041784422989e-07, "loss": 0.049, "step": 63530 }, { "epoch": 0.6788824189326353, "grad_norm": 7.847376823425293, "learning_rate": 9.89000674083181e-07, "loss": 0.0393, "step": 63540 }, { "epoch": 0.6789892622469149, "grad_norm": 0.18807101249694824, "learning_rate": 9.889971691719447e-07, "loss": 0.0527, "step": 63550 }, { "epoch": 0.6790961055611945, "grad_norm": 2.1493592262268066, "learning_rate": 9.889936637085934e-07, "loss": 0.097, "step": 63560 }, { "epoch": 0.6792029488754742, "grad_norm": 0.022448087111115456, "learning_rate": 9.889901576931317e-07, "loss": 0.0409, "step": 63570 }, { "epoch": 0.6793097921897537, "grad_norm": 4.787312984466553, "learning_rate": 9.889866511255628e-07, "loss": 0.1654, "step": 63580 }, { "epoch": 0.6794166355040333, "grad_norm": 5.741869926452637, "learning_rate": 9.889831440058914e-07, "loss": 0.0365, "step": 63590 }, { "epoch": 0.679523478818313, "grad_norm": 3.4619803428649902, "learning_rate": 9.88979636334121e-07, "loss": 0.0578, "step": 63600 }, { "epoch": 0.6796303221325926, "grad_norm": 4.426049709320068, "learning_rate": 9.889761281102557e-07, "loss": 0.0593, "step": 63610 }, { "epoch": 0.6797371654468721, "grad_norm": 6.803235054016113, "learning_rate": 9.889726193342994e-07, "loss": 0.0652, "step": 63620 }, { "epoch": 0.6798440087611518, "grad_norm": 7.908076286315918, "learning_rate": 9.889691100062561e-07, "loss": 0.0438, "step": 63630 }, { "epoch": 0.6799508520754314, "grad_norm": 10.045546531677246, "learning_rate": 9.8896560012613e-07, "loss": 0.0488, "step": 63640 }, { "epoch": 0.680057695389711, "grad_norm": 1.160406470298767, "learning_rate": 9.889620896939245e-07, "loss": 0.0622, "step": 63650 }, { "epoch": 0.6801645387039906, "grad_norm": 0.16730113327503204, "learning_rate": 9.88958578709644e-07, "loss": 0.0551, "step": 63660 }, { "epoch": 0.6802713820182702, "grad_norm": 1.0150498151779175, "learning_rate": 9.889550671732925e-07, "loss": 0.0695, "step": 63670 }, { "epoch": 0.6803782253325498, "grad_norm": 12.199872016906738, "learning_rate": 9.889515550848737e-07, "loss": 0.0736, "step": 63680 }, { "epoch": 0.6804850686468294, "grad_norm": 1.3792575597763062, "learning_rate": 9.889480424443918e-07, "loss": 0.0742, "step": 63690 }, { "epoch": 0.680591911961109, "grad_norm": 7.111745357513428, "learning_rate": 9.889445292518505e-07, "loss": 0.0282, "step": 63700 }, { "epoch": 0.6806987552753886, "grad_norm": 1.2925728559494019, "learning_rate": 9.88941015507254e-07, "loss": 0.0491, "step": 63710 }, { "epoch": 0.6808055985896683, "grad_norm": 12.220307350158691, "learning_rate": 9.889375012106062e-07, "loss": 0.0465, "step": 63720 }, { "epoch": 0.6809124419039478, "grad_norm": 6.1420979499816895, "learning_rate": 9.889339863619111e-07, "loss": 0.0493, "step": 63730 }, { "epoch": 0.6810192852182275, "grad_norm": 7.938559055328369, "learning_rate": 9.889304709611725e-07, "loss": 0.0492, "step": 63740 }, { "epoch": 0.6811261285325071, "grad_norm": 0.2593532204627991, "learning_rate": 9.889269550083945e-07, "loss": 0.0417, "step": 63750 }, { "epoch": 0.6812329718467867, "grad_norm": 2.2104713916778564, "learning_rate": 9.889234385035813e-07, "loss": 0.0504, "step": 63760 }, { "epoch": 0.6813398151610663, "grad_norm": 4.9856767654418945, "learning_rate": 9.889199214467363e-07, "loss": 0.0447, "step": 63770 }, { "epoch": 0.6814466584753459, "grad_norm": 0.11744767427444458, "learning_rate": 9.88916403837864e-07, "loss": 0.0482, "step": 63780 }, { "epoch": 0.6815535017896255, "grad_norm": 4.664394378662109, "learning_rate": 9.889128856769682e-07, "loss": 0.0588, "step": 63790 }, { "epoch": 0.6816603451039052, "grad_norm": 3.4678993225097656, "learning_rate": 9.88909366964053e-07, "loss": 0.0681, "step": 63800 }, { "epoch": 0.6817671884181847, "grad_norm": 4.679099082946777, "learning_rate": 9.889058476991218e-07, "loss": 0.0522, "step": 63810 }, { "epoch": 0.6818740317324643, "grad_norm": 4.670466899871826, "learning_rate": 9.889023278821794e-07, "loss": 0.0281, "step": 63820 }, { "epoch": 0.681980875046744, "grad_norm": 1.2045093774795532, "learning_rate": 9.88898807513229e-07, "loss": 0.0885, "step": 63830 }, { "epoch": 0.6820877183610236, "grad_norm": 5.413934230804443, "learning_rate": 9.888952865922753e-07, "loss": 0.0417, "step": 63840 }, { "epoch": 0.6821945616753031, "grad_norm": 1.0345489978790283, "learning_rate": 9.888917651193218e-07, "loss": 0.0469, "step": 63850 }, { "epoch": 0.6823014049895828, "grad_norm": 24.261564254760742, "learning_rate": 9.888882430943726e-07, "loss": 0.0445, "step": 63860 }, { "epoch": 0.6824082483038624, "grad_norm": 0.10427626222372055, "learning_rate": 9.888847205174317e-07, "loss": 0.0534, "step": 63870 }, { "epoch": 0.682515091618142, "grad_norm": 8.426017761230469, "learning_rate": 9.888811973885031e-07, "loss": 0.029, "step": 63880 }, { "epoch": 0.6826219349324216, "grad_norm": 6.676443099975586, "learning_rate": 9.888776737075905e-07, "loss": 0.0941, "step": 63890 }, { "epoch": 0.6827287782467012, "grad_norm": 2.5935943126678467, "learning_rate": 9.888741494746983e-07, "loss": 0.069, "step": 63900 }, { "epoch": 0.6828356215609809, "grad_norm": 0.03206528350710869, "learning_rate": 9.888706246898303e-07, "loss": 0.085, "step": 63910 }, { "epoch": 0.6829424648752604, "grad_norm": 1.711532473564148, "learning_rate": 9.888670993529903e-07, "loss": 0.0807, "step": 63920 }, { "epoch": 0.68304930818954, "grad_norm": 1.6129122972488403, "learning_rate": 9.888635734641826e-07, "loss": 0.0703, "step": 63930 }, { "epoch": 0.6831561515038197, "grad_norm": 0.2784747779369354, "learning_rate": 9.888600470234111e-07, "loss": 0.0607, "step": 63940 }, { "epoch": 0.6832629948180993, "grad_norm": 4.000959396362305, "learning_rate": 9.888565200306795e-07, "loss": 0.0576, "step": 63950 }, { "epoch": 0.6833698381323788, "grad_norm": 2.691763401031494, "learning_rate": 9.88852992485992e-07, "loss": 0.0563, "step": 63960 }, { "epoch": 0.6834766814466585, "grad_norm": 1.6402755975723267, "learning_rate": 9.888494643893527e-07, "loss": 0.0778, "step": 63970 }, { "epoch": 0.6835835247609381, "grad_norm": 0.6148321628570557, "learning_rate": 9.888459357407655e-07, "loss": 0.1326, "step": 63980 }, { "epoch": 0.6836903680752177, "grad_norm": 6.711965084075928, "learning_rate": 9.888424065402343e-07, "loss": 0.0373, "step": 63990 }, { "epoch": 0.6837972113894973, "grad_norm": 6.538328170776367, "learning_rate": 9.88838876787763e-07, "loss": 0.0649, "step": 64000 }, { "epoch": 0.6839040547037769, "grad_norm": 7.587958812713623, "learning_rate": 9.888353464833558e-07, "loss": 0.0432, "step": 64010 }, { "epoch": 0.6840108980180565, "grad_norm": 3.9577767848968506, "learning_rate": 9.888318156270168e-07, "loss": 0.0423, "step": 64020 }, { "epoch": 0.6841177413323362, "grad_norm": 4.350121974945068, "learning_rate": 9.888282842187497e-07, "loss": 0.0353, "step": 64030 }, { "epoch": 0.6842245846466157, "grad_norm": 7.9156317710876465, "learning_rate": 9.888247522585583e-07, "loss": 0.0176, "step": 64040 }, { "epoch": 0.6843314279608953, "grad_norm": 0.3008672893047333, "learning_rate": 9.888212197464471e-07, "loss": 0.0263, "step": 64050 }, { "epoch": 0.684438271275175, "grad_norm": 8.757911682128906, "learning_rate": 9.888176866824199e-07, "loss": 0.036, "step": 64060 }, { "epoch": 0.6845451145894546, "grad_norm": 5.948240756988525, "learning_rate": 9.888141530664804e-07, "loss": 0.0306, "step": 64070 }, { "epoch": 0.6846519579037341, "grad_norm": 17.20732879638672, "learning_rate": 9.888106188986329e-07, "loss": 0.0833, "step": 64080 }, { "epoch": 0.6847588012180138, "grad_norm": 5.127196311950684, "learning_rate": 9.888070841788813e-07, "loss": 0.0477, "step": 64090 }, { "epoch": 0.6848656445322934, "grad_norm": 9.573934555053711, "learning_rate": 9.888035489072298e-07, "loss": 0.0241, "step": 64100 }, { "epoch": 0.684972487846573, "grad_norm": 2.732330083847046, "learning_rate": 9.88800013083682e-07, "loss": 0.0387, "step": 64110 }, { "epoch": 0.6850793311608526, "grad_norm": 3.1182143688201904, "learning_rate": 9.887964767082422e-07, "loss": 0.0705, "step": 64120 }, { "epoch": 0.6851861744751322, "grad_norm": 3.11867618560791, "learning_rate": 9.887929397809143e-07, "loss": 0.051, "step": 64130 }, { "epoch": 0.6852930177894119, "grad_norm": 12.443208694458008, "learning_rate": 9.887894023017022e-07, "loss": 0.1031, "step": 64140 }, { "epoch": 0.6853998611036914, "grad_norm": 3.1540465354919434, "learning_rate": 9.887858642706102e-07, "loss": 0.1155, "step": 64150 }, { "epoch": 0.685506704417971, "grad_norm": 0.5287481546401978, "learning_rate": 9.887823256876418e-07, "loss": 0.0398, "step": 64160 }, { "epoch": 0.6856135477322507, "grad_norm": 6.523237705230713, "learning_rate": 9.887787865528013e-07, "loss": 0.0953, "step": 64170 }, { "epoch": 0.6857203910465303, "grad_norm": 7.145761966705322, "learning_rate": 9.887752468660928e-07, "loss": 0.0765, "step": 64180 }, { "epoch": 0.6858272343608098, "grad_norm": 2.5043601989746094, "learning_rate": 9.887717066275202e-07, "loss": 0.0848, "step": 64190 }, { "epoch": 0.6859340776750895, "grad_norm": 3.0313127040863037, "learning_rate": 9.887681658370873e-07, "loss": 0.0667, "step": 64200 }, { "epoch": 0.6860409209893691, "grad_norm": 6.143802165985107, "learning_rate": 9.88764624494798e-07, "loss": 0.0501, "step": 64210 }, { "epoch": 0.6861477643036487, "grad_norm": 4.554862022399902, "learning_rate": 9.88761082600657e-07, "loss": 0.0507, "step": 64220 }, { "epoch": 0.6862546076179283, "grad_norm": 12.724005699157715, "learning_rate": 9.887575401546677e-07, "loss": 0.1553, "step": 64230 }, { "epoch": 0.6863614509322079, "grad_norm": 0.8034178614616394, "learning_rate": 9.887539971568341e-07, "loss": 0.0708, "step": 64240 }, { "epoch": 0.6864682942464875, "grad_norm": 2.5915355682373047, "learning_rate": 9.887504536071605e-07, "loss": 0.0645, "step": 64250 }, { "epoch": 0.6865751375607672, "grad_norm": 2.1264004707336426, "learning_rate": 9.887469095056507e-07, "loss": 0.0804, "step": 64260 }, { "epoch": 0.6866819808750467, "grad_norm": 2.5998997688293457, "learning_rate": 9.887433648523088e-07, "loss": 0.0401, "step": 64270 }, { "epoch": 0.6867888241893264, "grad_norm": 8.141846656799316, "learning_rate": 9.887398196471388e-07, "loss": 0.0539, "step": 64280 }, { "epoch": 0.686895667503606, "grad_norm": 9.385599136352539, "learning_rate": 9.887362738901445e-07, "loss": 0.0263, "step": 64290 }, { "epoch": 0.6870025108178855, "grad_norm": 0.051845647394657135, "learning_rate": 9.887327275813301e-07, "loss": 0.0633, "step": 64300 }, { "epoch": 0.6871093541321652, "grad_norm": 9.634757041931152, "learning_rate": 9.887291807206996e-07, "loss": 0.0618, "step": 64310 }, { "epoch": 0.6872161974464448, "grad_norm": 9.827731132507324, "learning_rate": 9.887256333082572e-07, "loss": 0.0652, "step": 64320 }, { "epoch": 0.6873230407607244, "grad_norm": 3.652719736099243, "learning_rate": 9.887220853440065e-07, "loss": 0.0605, "step": 64330 }, { "epoch": 0.687429884075004, "grad_norm": 0.046580247581005096, "learning_rate": 9.887185368279515e-07, "loss": 0.094, "step": 64340 }, { "epoch": 0.6875367273892836, "grad_norm": 4.455263614654541, "learning_rate": 9.887149877600965e-07, "loss": 0.0511, "step": 64350 }, { "epoch": 0.6876435707035632, "grad_norm": 2.917984962463379, "learning_rate": 9.887114381404454e-07, "loss": 0.0765, "step": 64360 }, { "epoch": 0.6877504140178429, "grad_norm": 6.03432035446167, "learning_rate": 9.887078879690024e-07, "loss": 0.0753, "step": 64370 }, { "epoch": 0.6878572573321224, "grad_norm": 5.6302971839904785, "learning_rate": 9.88704337245771e-07, "loss": 0.042, "step": 64380 }, { "epoch": 0.687964100646402, "grad_norm": 1.4083974361419678, "learning_rate": 9.887007859707556e-07, "loss": 0.0688, "step": 64390 }, { "epoch": 0.6880709439606817, "grad_norm": 6.304161071777344, "learning_rate": 9.886972341439601e-07, "loss": 0.0615, "step": 64400 }, { "epoch": 0.6881777872749613, "grad_norm": 7.632072925567627, "learning_rate": 9.886936817653887e-07, "loss": 0.0554, "step": 64410 }, { "epoch": 0.6882846305892408, "grad_norm": 0.7618305683135986, "learning_rate": 9.88690128835045e-07, "loss": 0.0473, "step": 64420 }, { "epoch": 0.6883914739035205, "grad_norm": 0.14036907255649567, "learning_rate": 9.886865753529337e-07, "loss": 0.0584, "step": 64430 }, { "epoch": 0.6884983172178001, "grad_norm": 4.614004135131836, "learning_rate": 9.88683021319058e-07, "loss": 0.0202, "step": 64440 }, { "epoch": 0.6886051605320797, "grad_norm": 17.630634307861328, "learning_rate": 9.886794667334223e-07, "loss": 0.1113, "step": 64450 }, { "epoch": 0.6887120038463593, "grad_norm": 1.7710623741149902, "learning_rate": 9.886759115960307e-07, "loss": 0.03, "step": 64460 }, { "epoch": 0.6888188471606389, "grad_norm": 0.7346431016921997, "learning_rate": 9.886723559068872e-07, "loss": 0.0142, "step": 64470 }, { "epoch": 0.6889256904749186, "grad_norm": 2.3913605213165283, "learning_rate": 9.886687996659955e-07, "loss": 0.1313, "step": 64480 }, { "epoch": 0.6890325337891982, "grad_norm": 9.301289558410645, "learning_rate": 9.8866524287336e-07, "loss": 0.0905, "step": 64490 }, { "epoch": 0.6891393771034777, "grad_norm": 0.685440182685852, "learning_rate": 9.886616855289846e-07, "loss": 0.0735, "step": 64500 }, { "epoch": 0.6892462204177574, "grad_norm": 0.2360488474369049, "learning_rate": 9.886581276328732e-07, "loss": 0.0383, "step": 64510 }, { "epoch": 0.689353063732037, "grad_norm": 9.215972900390625, "learning_rate": 9.8865456918503e-07, "loss": 0.0834, "step": 64520 }, { "epoch": 0.6894599070463165, "grad_norm": 3.9327468872070312, "learning_rate": 9.886510101854586e-07, "loss": 0.0354, "step": 64530 }, { "epoch": 0.6895667503605962, "grad_norm": 8.761998176574707, "learning_rate": 9.886474506341635e-07, "loss": 0.0552, "step": 64540 }, { "epoch": 0.6896735936748758, "grad_norm": 3.092581272125244, "learning_rate": 9.886438905311487e-07, "loss": 0.0438, "step": 64550 }, { "epoch": 0.6897804369891554, "grad_norm": 1.355461597442627, "learning_rate": 9.88640329876418e-07, "loss": 0.0466, "step": 64560 }, { "epoch": 0.689887280303435, "grad_norm": 2.223238229751587, "learning_rate": 9.886367686699754e-07, "loss": 0.0876, "step": 64570 }, { "epoch": 0.6899941236177146, "grad_norm": 7.013116359710693, "learning_rate": 9.886332069118252e-07, "loss": 0.1833, "step": 64580 }, { "epoch": 0.6901009669319942, "grad_norm": 2.9882614612579346, "learning_rate": 9.88629644601971e-07, "loss": 0.0431, "step": 64590 }, { "epoch": 0.6902078102462739, "grad_norm": 3.6611781120300293, "learning_rate": 9.886260817404171e-07, "loss": 0.0985, "step": 64600 }, { "epoch": 0.6903146535605534, "grad_norm": 3.1520283222198486, "learning_rate": 9.886225183271674e-07, "loss": 0.0772, "step": 64610 }, { "epoch": 0.690421496874833, "grad_norm": 1.7766555547714233, "learning_rate": 9.886189543622261e-07, "loss": 0.0887, "step": 64620 }, { "epoch": 0.6905283401891127, "grad_norm": 0.24966853857040405, "learning_rate": 9.886153898455972e-07, "loss": 0.05, "step": 64630 }, { "epoch": 0.6906351835033923, "grad_norm": 5.426985263824463, "learning_rate": 9.886118247772845e-07, "loss": 0.0529, "step": 64640 }, { "epoch": 0.6907420268176719, "grad_norm": 0.09811552613973618, "learning_rate": 9.886082591572925e-07, "loss": 0.0121, "step": 64650 }, { "epoch": 0.6908488701319515, "grad_norm": 5.212610721588135, "learning_rate": 9.886046929856245e-07, "loss": 0.1304, "step": 64660 }, { "epoch": 0.6909557134462311, "grad_norm": 19.600481033325195, "learning_rate": 9.88601126262285e-07, "loss": 0.0591, "step": 64670 }, { "epoch": 0.6910625567605108, "grad_norm": 4.920321464538574, "learning_rate": 9.88597558987278e-07, "loss": 0.1172, "step": 64680 }, { "epoch": 0.6911694000747903, "grad_norm": 4.0273213386535645, "learning_rate": 9.885939911606077e-07, "loss": 0.068, "step": 64690 }, { "epoch": 0.6912762433890699, "grad_norm": 13.987190246582031, "learning_rate": 9.885904227822776e-07, "loss": 0.1333, "step": 64700 }, { "epoch": 0.6913830867033496, "grad_norm": 8.93196964263916, "learning_rate": 9.885868538522922e-07, "loss": 0.0469, "step": 64710 }, { "epoch": 0.6914899300176292, "grad_norm": 0.7386751174926758, "learning_rate": 9.885832843706553e-07, "loss": 0.0237, "step": 64720 }, { "epoch": 0.6915967733319087, "grad_norm": 3.992704391479492, "learning_rate": 9.885797143373712e-07, "loss": 0.0482, "step": 64730 }, { "epoch": 0.6917036166461884, "grad_norm": 0.046997856348752975, "learning_rate": 9.885761437524434e-07, "loss": 0.0405, "step": 64740 }, { "epoch": 0.691810459960468, "grad_norm": 4.663941860198975, "learning_rate": 9.885725726158764e-07, "loss": 0.0907, "step": 64750 }, { "epoch": 0.6919173032747475, "grad_norm": 16.692882537841797, "learning_rate": 9.885690009276742e-07, "loss": 0.0166, "step": 64760 }, { "epoch": 0.6920241465890272, "grad_norm": 3.1703457832336426, "learning_rate": 9.885654286878406e-07, "loss": 0.0409, "step": 64770 }, { "epoch": 0.6921309899033068, "grad_norm": 8.142263412475586, "learning_rate": 9.885618558963798e-07, "loss": 0.044, "step": 64780 }, { "epoch": 0.6922378332175864, "grad_norm": 2.9187493324279785, "learning_rate": 9.885582825532957e-07, "loss": 0.042, "step": 64790 }, { "epoch": 0.692344676531866, "grad_norm": 10.490195274353027, "learning_rate": 9.885547086585924e-07, "loss": 0.09, "step": 64800 }, { "epoch": 0.6924515198461456, "grad_norm": 1.5471739768981934, "learning_rate": 9.88551134212274e-07, "loss": 0.0255, "step": 64810 }, { "epoch": 0.6925583631604252, "grad_norm": 0.21218223869800568, "learning_rate": 9.885475592143446e-07, "loss": 0.0604, "step": 64820 }, { "epoch": 0.6926652064747049, "grad_norm": 6.601600170135498, "learning_rate": 9.885439836648081e-07, "loss": 0.0352, "step": 64830 }, { "epoch": 0.6927720497889844, "grad_norm": 4.631577491760254, "learning_rate": 9.885404075636685e-07, "loss": 0.0329, "step": 64840 }, { "epoch": 0.6928788931032641, "grad_norm": 0.21134474873542786, "learning_rate": 9.8853683091093e-07, "loss": 0.0363, "step": 64850 }, { "epoch": 0.6929857364175437, "grad_norm": 4.521158218383789, "learning_rate": 9.885332537065965e-07, "loss": 0.0306, "step": 64860 }, { "epoch": 0.6930925797318233, "grad_norm": 1.3472607135772705, "learning_rate": 9.885296759506722e-07, "loss": 0.0859, "step": 64870 }, { "epoch": 0.6931994230461029, "grad_norm": 0.639691174030304, "learning_rate": 9.885260976431609e-07, "loss": 0.0773, "step": 64880 }, { "epoch": 0.6933062663603825, "grad_norm": 8.154003143310547, "learning_rate": 9.885225187840667e-07, "loss": 0.0443, "step": 64890 }, { "epoch": 0.6934131096746621, "grad_norm": 0.03979405760765076, "learning_rate": 9.885189393733939e-07, "loss": 0.0252, "step": 64900 }, { "epoch": 0.6935199529889418, "grad_norm": 8.416817665100098, "learning_rate": 9.885153594111463e-07, "loss": 0.0805, "step": 64910 }, { "epoch": 0.6936267963032213, "grad_norm": 7.11937141418457, "learning_rate": 9.885117788973277e-07, "loss": 0.0682, "step": 64920 }, { "epoch": 0.6937336396175009, "grad_norm": 6.792361736297607, "learning_rate": 9.885081978319428e-07, "loss": 0.0606, "step": 64930 }, { "epoch": 0.6938404829317806, "grad_norm": 5.489040851593018, "learning_rate": 9.885046162149951e-07, "loss": 0.0685, "step": 64940 }, { "epoch": 0.6939473262460601, "grad_norm": 4.6184186935424805, "learning_rate": 9.885010340464889e-07, "loss": 0.0178, "step": 64950 }, { "epoch": 0.6940541695603397, "grad_norm": 0.18139570951461792, "learning_rate": 9.884974513264282e-07, "loss": 0.0317, "step": 64960 }, { "epoch": 0.6941610128746194, "grad_norm": 6.422749042510986, "learning_rate": 9.884938680548167e-07, "loss": 0.0849, "step": 64970 }, { "epoch": 0.694267856188899, "grad_norm": 1.694350004196167, "learning_rate": 9.884902842316591e-07, "loss": 0.044, "step": 64980 }, { "epoch": 0.6943746995031785, "grad_norm": 2.687535047531128, "learning_rate": 9.88486699856959e-07, "loss": 0.0423, "step": 64990 }, { "epoch": 0.6944815428174582, "grad_norm": 5.337855815887451, "learning_rate": 9.884831149307205e-07, "loss": 0.0499, "step": 65000 }, { "epoch": 0.6945883861317378, "grad_norm": 1.608269453048706, "learning_rate": 9.884795294529478e-07, "loss": 0.0724, "step": 65010 }, { "epoch": 0.6946952294460175, "grad_norm": 2.4541828632354736, "learning_rate": 9.884759434236446e-07, "loss": 0.051, "step": 65020 }, { "epoch": 0.694802072760297, "grad_norm": 1.3066612482070923, "learning_rate": 9.884723568428154e-07, "loss": 0.0191, "step": 65030 }, { "epoch": 0.6949089160745766, "grad_norm": 1.8914562463760376, "learning_rate": 9.884687697104642e-07, "loss": 0.0299, "step": 65040 }, { "epoch": 0.6950157593888563, "grad_norm": 5.538301467895508, "learning_rate": 9.884651820265946e-07, "loss": 0.0516, "step": 65050 }, { "epoch": 0.6951226027031359, "grad_norm": 7.714901924133301, "learning_rate": 9.88461593791211e-07, "loss": 0.1077, "step": 65060 }, { "epoch": 0.6952294460174154, "grad_norm": 0.2682695686817169, "learning_rate": 9.884580050043175e-07, "loss": 0.0334, "step": 65070 }, { "epoch": 0.6953362893316951, "grad_norm": 3.1502506732940674, "learning_rate": 9.884544156659179e-07, "loss": 0.0885, "step": 65080 }, { "epoch": 0.6954431326459747, "grad_norm": 8.280532836914062, "learning_rate": 9.884508257760164e-07, "loss": 0.049, "step": 65090 }, { "epoch": 0.6955499759602543, "grad_norm": 1.6455062627792358, "learning_rate": 9.884472353346173e-07, "loss": 0.1582, "step": 65100 }, { "epoch": 0.6956568192745339, "grad_norm": 2.1003386974334717, "learning_rate": 9.88443644341724e-07, "loss": 0.0493, "step": 65110 }, { "epoch": 0.6957636625888135, "grad_norm": 1.717922568321228, "learning_rate": 9.884400527973413e-07, "loss": 0.0227, "step": 65120 }, { "epoch": 0.6958705059030931, "grad_norm": 1.2738977670669556, "learning_rate": 9.88436460701473e-07, "loss": 0.0166, "step": 65130 }, { "epoch": 0.6959773492173728, "grad_norm": 4.838457107543945, "learning_rate": 9.884328680541228e-07, "loss": 0.0452, "step": 65140 }, { "epoch": 0.6960841925316523, "grad_norm": 0.8456753492355347, "learning_rate": 9.884292748552951e-07, "loss": 0.0155, "step": 65150 }, { "epoch": 0.6961910358459319, "grad_norm": 3.2864997386932373, "learning_rate": 9.88425681104994e-07, "loss": 0.0548, "step": 65160 }, { "epoch": 0.6962978791602116, "grad_norm": 13.012099266052246, "learning_rate": 9.884220868032233e-07, "loss": 0.0527, "step": 65170 }, { "epoch": 0.6964047224744911, "grad_norm": 5.521188735961914, "learning_rate": 9.884184919499872e-07, "loss": 0.0397, "step": 65180 }, { "epoch": 0.6965115657887707, "grad_norm": 2.398834705352783, "learning_rate": 9.8841489654529e-07, "loss": 0.1689, "step": 65190 }, { "epoch": 0.6966184091030504, "grad_norm": 3.1879401206970215, "learning_rate": 9.884113005891352e-07, "loss": 0.0435, "step": 65200 }, { "epoch": 0.69672525241733, "grad_norm": 1.7029879093170166, "learning_rate": 9.884077040815273e-07, "loss": 0.0408, "step": 65210 }, { "epoch": 0.6968320957316096, "grad_norm": 0.09389053285121918, "learning_rate": 9.884041070224702e-07, "loss": 0.0355, "step": 65220 }, { "epoch": 0.6969389390458892, "grad_norm": 0.3792547285556793, "learning_rate": 9.88400509411968e-07, "loss": 0.0501, "step": 65230 }, { "epoch": 0.6970457823601688, "grad_norm": 1.6461195945739746, "learning_rate": 9.88396911250025e-07, "loss": 0.0965, "step": 65240 }, { "epoch": 0.6971526256744485, "grad_norm": 3.776374340057373, "learning_rate": 9.883933125366447e-07, "loss": 0.069, "step": 65250 }, { "epoch": 0.697259468988728, "grad_norm": 0.8536652326583862, "learning_rate": 9.883897132718317e-07, "loss": 0.0687, "step": 65260 }, { "epoch": 0.6973663123030076, "grad_norm": 9.019474983215332, "learning_rate": 9.883861134555899e-07, "loss": 0.0529, "step": 65270 }, { "epoch": 0.6974731556172873, "grad_norm": 4.562135696411133, "learning_rate": 9.88382513087923e-07, "loss": 0.0788, "step": 65280 }, { "epoch": 0.6975799989315669, "grad_norm": 1.1018160581588745, "learning_rate": 9.883789121688356e-07, "loss": 0.0643, "step": 65290 }, { "epoch": 0.6976868422458464, "grad_norm": 6.625439167022705, "learning_rate": 9.883753106983314e-07, "loss": 0.2485, "step": 65300 }, { "epoch": 0.6977936855601261, "grad_norm": 4.034071445465088, "learning_rate": 9.883717086764149e-07, "loss": 0.0768, "step": 65310 }, { "epoch": 0.6979005288744057, "grad_norm": 0.49663323163986206, "learning_rate": 9.883681061030897e-07, "loss": 0.0584, "step": 65320 }, { "epoch": 0.6980073721886852, "grad_norm": 5.435672283172607, "learning_rate": 9.8836450297836e-07, "loss": 0.0463, "step": 65330 }, { "epoch": 0.6981142155029649, "grad_norm": 0.5463560819625854, "learning_rate": 9.883608993022301e-07, "loss": 0.0426, "step": 65340 }, { "epoch": 0.6982210588172445, "grad_norm": 4.844033718109131, "learning_rate": 9.883572950747036e-07, "loss": 0.0351, "step": 65350 }, { "epoch": 0.6983279021315241, "grad_norm": 8.280253410339355, "learning_rate": 9.883536902957853e-07, "loss": 0.071, "step": 65360 }, { "epoch": 0.6984347454458038, "grad_norm": 7.848785877227783, "learning_rate": 9.883500849654783e-07, "loss": 0.0949, "step": 65370 }, { "epoch": 0.6985415887600833, "grad_norm": 7.076210021972656, "learning_rate": 9.883464790837877e-07, "loss": 0.036, "step": 65380 }, { "epoch": 0.698648432074363, "grad_norm": 8.508544921875, "learning_rate": 9.883428726507168e-07, "loss": 0.0862, "step": 65390 }, { "epoch": 0.6987552753886426, "grad_norm": 3.3551907539367676, "learning_rate": 9.8833926566627e-07, "loss": 0.0667, "step": 65400 }, { "epoch": 0.6988621187029221, "grad_norm": 0.13290274143218994, "learning_rate": 9.883356581304512e-07, "loss": 0.0413, "step": 65410 }, { "epoch": 0.6989689620172018, "grad_norm": 9.5170316696167, "learning_rate": 9.883320500432647e-07, "loss": 0.2059, "step": 65420 }, { "epoch": 0.6990758053314814, "grad_norm": 0.8041607141494751, "learning_rate": 9.883284414047143e-07, "loss": 0.1069, "step": 65430 }, { "epoch": 0.699182648645761, "grad_norm": 11.937788009643555, "learning_rate": 9.883248322148044e-07, "loss": 0.036, "step": 65440 }, { "epoch": 0.6992894919600406, "grad_norm": 10.839727401733398, "learning_rate": 9.883212224735389e-07, "loss": 0.0611, "step": 65450 }, { "epoch": 0.6993963352743202, "grad_norm": 7.122100353240967, "learning_rate": 9.883176121809219e-07, "loss": 0.062, "step": 65460 }, { "epoch": 0.6995031785885998, "grad_norm": 13.908819198608398, "learning_rate": 9.883140013369573e-07, "loss": 0.0468, "step": 65470 }, { "epoch": 0.6996100219028795, "grad_norm": 0.1073964461684227, "learning_rate": 9.883103899416495e-07, "loss": 0.0605, "step": 65480 }, { "epoch": 0.699716865217159, "grad_norm": 2.0337157249450684, "learning_rate": 9.883067779950025e-07, "loss": 0.0217, "step": 65490 }, { "epoch": 0.6998237085314386, "grad_norm": 7.832742691040039, "learning_rate": 9.8830316549702e-07, "loss": 0.0414, "step": 65500 }, { "epoch": 0.6999305518457183, "grad_norm": 0.20886904001235962, "learning_rate": 9.882995524477067e-07, "loss": 0.0244, "step": 65510 }, { "epoch": 0.7000373951599979, "grad_norm": 9.296253204345703, "learning_rate": 9.88295938847066e-07, "loss": 0.0419, "step": 65520 }, { "epoch": 0.7001442384742774, "grad_norm": 1.4281598329544067, "learning_rate": 9.882923246951026e-07, "loss": 0.0557, "step": 65530 }, { "epoch": 0.7002510817885571, "grad_norm": 1.033436894416809, "learning_rate": 9.882887099918203e-07, "loss": 0.0138, "step": 65540 }, { "epoch": 0.7003579251028367, "grad_norm": 29.766361236572266, "learning_rate": 9.88285094737223e-07, "loss": 0.0765, "step": 65550 }, { "epoch": 0.7004647684171162, "grad_norm": 1.3425474166870117, "learning_rate": 9.882814789313151e-07, "loss": 0.0344, "step": 65560 }, { "epoch": 0.7005716117313959, "grad_norm": 2.8168721199035645, "learning_rate": 9.882778625741006e-07, "loss": 0.16, "step": 65570 }, { "epoch": 0.7006784550456755, "grad_norm": 4.580911159515381, "learning_rate": 9.882742456655834e-07, "loss": 0.0935, "step": 65580 }, { "epoch": 0.7007852983599552, "grad_norm": 4.272826671600342, "learning_rate": 9.88270628205768e-07, "loss": 0.0401, "step": 65590 }, { "epoch": 0.7008921416742347, "grad_norm": 4.24888801574707, "learning_rate": 9.88267010194658e-07, "loss": 0.0811, "step": 65600 }, { "epoch": 0.7009989849885143, "grad_norm": 7.3113694190979, "learning_rate": 9.882633916322576e-07, "loss": 0.1336, "step": 65610 }, { "epoch": 0.701105828302794, "grad_norm": 3.60835337638855, "learning_rate": 9.88259772518571e-07, "loss": 0.0453, "step": 65620 }, { "epoch": 0.7012126716170736, "grad_norm": 0.07802344858646393, "learning_rate": 9.882561528536024e-07, "loss": 0.0469, "step": 65630 }, { "epoch": 0.7013195149313531, "grad_norm": 6.477908611297607, "learning_rate": 9.882525326373558e-07, "loss": 0.1023, "step": 65640 }, { "epoch": 0.7014263582456328, "grad_norm": 3.7259578704833984, "learning_rate": 9.88248911869835e-07, "loss": 0.0385, "step": 65650 }, { "epoch": 0.7015332015599124, "grad_norm": 8.028604507446289, "learning_rate": 9.882452905510446e-07, "loss": 0.0313, "step": 65660 }, { "epoch": 0.701640044874192, "grad_norm": 5.1677727699279785, "learning_rate": 9.882416686809885e-07, "loss": 0.0496, "step": 65670 }, { "epoch": 0.7017468881884716, "grad_norm": 2.1298811435699463, "learning_rate": 9.882380462596705e-07, "loss": 0.0297, "step": 65680 }, { "epoch": 0.7018537315027512, "grad_norm": 2.833909273147583, "learning_rate": 9.882344232870948e-07, "loss": 0.0593, "step": 65690 }, { "epoch": 0.7019605748170308, "grad_norm": 1.441524624824524, "learning_rate": 9.882307997632658e-07, "loss": 0.057, "step": 65700 }, { "epoch": 0.7020674181313105, "grad_norm": 14.632392883300781, "learning_rate": 9.882271756881873e-07, "loss": 0.0714, "step": 65710 }, { "epoch": 0.70217426144559, "grad_norm": 0.2594617009162903, "learning_rate": 9.882235510618634e-07, "loss": 0.0559, "step": 65720 }, { "epoch": 0.7022811047598696, "grad_norm": 4.878102779388428, "learning_rate": 9.882199258842983e-07, "loss": 0.0172, "step": 65730 }, { "epoch": 0.7023879480741493, "grad_norm": 7.622181415557861, "learning_rate": 9.88216300155496e-07, "loss": 0.0202, "step": 65740 }, { "epoch": 0.7024947913884289, "grad_norm": 1.517794132232666, "learning_rate": 9.88212673875461e-07, "loss": 0.051, "step": 65750 }, { "epoch": 0.7026016347027085, "grad_norm": 2.2540464401245117, "learning_rate": 9.882090470441965e-07, "loss": 0.0757, "step": 65760 }, { "epoch": 0.7027084780169881, "grad_norm": 2.159656286239624, "learning_rate": 9.882054196617074e-07, "loss": 0.1262, "step": 65770 }, { "epoch": 0.7028153213312677, "grad_norm": 2.112820625305176, "learning_rate": 9.882017917279975e-07, "loss": 0.0308, "step": 65780 }, { "epoch": 0.7029221646455474, "grad_norm": 5.325644016265869, "learning_rate": 9.88198163243071e-07, "loss": 0.0613, "step": 65790 }, { "epoch": 0.7030290079598269, "grad_norm": 0.1389806717634201, "learning_rate": 9.881945342069318e-07, "loss": 0.1119, "step": 65800 }, { "epoch": 0.7031358512741065, "grad_norm": 4.968956470489502, "learning_rate": 9.881909046195842e-07, "loss": 0.044, "step": 65810 }, { "epoch": 0.7032426945883862, "grad_norm": 11.803641319274902, "learning_rate": 9.881872744810323e-07, "loss": 0.0637, "step": 65820 }, { "epoch": 0.7033495379026657, "grad_norm": 2.525144577026367, "learning_rate": 9.881836437912802e-07, "loss": 0.1074, "step": 65830 }, { "epoch": 0.7034563812169453, "grad_norm": 2.511776924133301, "learning_rate": 9.881800125503317e-07, "loss": 0.0196, "step": 65840 }, { "epoch": 0.703563224531225, "grad_norm": 11.00643539428711, "learning_rate": 9.881763807581913e-07, "loss": 0.0715, "step": 65850 }, { "epoch": 0.7036700678455046, "grad_norm": 0.04575633257627487, "learning_rate": 9.881727484148627e-07, "loss": 0.0193, "step": 65860 }, { "epoch": 0.7037769111597841, "grad_norm": 4.754474639892578, "learning_rate": 9.881691155203503e-07, "loss": 0.0192, "step": 65870 }, { "epoch": 0.7038837544740638, "grad_norm": 0.07893852144479752, "learning_rate": 9.881654820746584e-07, "loss": 0.0428, "step": 65880 }, { "epoch": 0.7039905977883434, "grad_norm": 2.185800552368164, "learning_rate": 9.881618480777905e-07, "loss": 0.0726, "step": 65890 }, { "epoch": 0.704097441102623, "grad_norm": 1.292284607887268, "learning_rate": 9.88158213529751e-07, "loss": 0.0841, "step": 65900 }, { "epoch": 0.7042042844169026, "grad_norm": 0.7393956780433655, "learning_rate": 9.881545784305442e-07, "loss": 0.0413, "step": 65910 }, { "epoch": 0.7043111277311822, "grad_norm": 3.066287040710449, "learning_rate": 9.881509427801741e-07, "loss": 0.0281, "step": 65920 }, { "epoch": 0.7044179710454618, "grad_norm": 8.764139175415039, "learning_rate": 9.881473065786446e-07, "loss": 0.0666, "step": 65930 }, { "epoch": 0.7045248143597415, "grad_norm": 8.329154014587402, "learning_rate": 9.8814366982596e-07, "loss": 0.1024, "step": 65940 }, { "epoch": 0.704631657674021, "grad_norm": 0.8690100908279419, "learning_rate": 9.881400325221243e-07, "loss": 0.0285, "step": 65950 }, { "epoch": 0.7047385009883007, "grad_norm": 5.189846038818359, "learning_rate": 9.881363946671418e-07, "loss": 0.1049, "step": 65960 }, { "epoch": 0.7048453443025803, "grad_norm": 0.017007814720273018, "learning_rate": 9.881327562610165e-07, "loss": 0.0422, "step": 65970 }, { "epoch": 0.7049521876168598, "grad_norm": 6.573307991027832, "learning_rate": 9.881291173037523e-07, "loss": 0.071, "step": 65980 }, { "epoch": 0.7050590309311395, "grad_norm": 13.50135612487793, "learning_rate": 9.881254777953535e-07, "loss": 0.0345, "step": 65990 }, { "epoch": 0.7051658742454191, "grad_norm": 2.7940192222595215, "learning_rate": 9.881218377358243e-07, "loss": 0.0303, "step": 66000 }, { "epoch": 0.7052727175596987, "grad_norm": 3.4485111236572266, "learning_rate": 9.881181971251686e-07, "loss": 0.0442, "step": 66010 }, { "epoch": 0.7053795608739784, "grad_norm": 5.684798240661621, "learning_rate": 9.881145559633906e-07, "loss": 0.0235, "step": 66020 }, { "epoch": 0.7054864041882579, "grad_norm": 0.11728593707084656, "learning_rate": 9.881109142504944e-07, "loss": 0.0299, "step": 66030 }, { "epoch": 0.7055932475025375, "grad_norm": 7.588687896728516, "learning_rate": 9.881072719864843e-07, "loss": 0.0792, "step": 66040 }, { "epoch": 0.7057000908168172, "grad_norm": 1.5272457599639893, "learning_rate": 9.881036291713643e-07, "loss": 0.048, "step": 66050 }, { "epoch": 0.7058069341310967, "grad_norm": 0.35414060950279236, "learning_rate": 9.880999858051382e-07, "loss": 0.0428, "step": 66060 }, { "epoch": 0.7059137774453763, "grad_norm": 2.2634329795837402, "learning_rate": 9.880963418878105e-07, "loss": 0.0285, "step": 66070 }, { "epoch": 0.706020620759656, "grad_norm": 3.2899630069732666, "learning_rate": 9.880926974193853e-07, "loss": 0.0832, "step": 66080 }, { "epoch": 0.7061274640739356, "grad_norm": 6.182611465454102, "learning_rate": 9.880890523998667e-07, "loss": 0.0508, "step": 66090 }, { "epoch": 0.7062343073882151, "grad_norm": 0.12854649126529694, "learning_rate": 9.880854068292583e-07, "loss": 0.0426, "step": 66100 }, { "epoch": 0.7063411507024948, "grad_norm": 1.3124027252197266, "learning_rate": 9.88081760707565e-07, "loss": 0.0778, "step": 66110 }, { "epoch": 0.7064479940167744, "grad_norm": 11.287179946899414, "learning_rate": 9.880781140347905e-07, "loss": 0.0385, "step": 66120 }, { "epoch": 0.7065548373310541, "grad_norm": 0.48841890692710876, "learning_rate": 9.880744668109388e-07, "loss": 0.0208, "step": 66130 }, { "epoch": 0.7066616806453336, "grad_norm": 0.29440703988075256, "learning_rate": 9.880708190360144e-07, "loss": 0.0503, "step": 66140 }, { "epoch": 0.7067685239596132, "grad_norm": 6.3529767990112305, "learning_rate": 9.88067170710021e-07, "loss": 0.0281, "step": 66150 }, { "epoch": 0.7068753672738929, "grad_norm": 3.9745798110961914, "learning_rate": 9.880635218329631e-07, "loss": 0.1357, "step": 66160 }, { "epoch": 0.7069822105881725, "grad_norm": 5.6764678955078125, "learning_rate": 9.880598724048445e-07, "loss": 0.0329, "step": 66170 }, { "epoch": 0.707089053902452, "grad_norm": 2.6208338737487793, "learning_rate": 9.880562224256696e-07, "loss": 0.0262, "step": 66180 }, { "epoch": 0.7071958972167317, "grad_norm": 1.660232663154602, "learning_rate": 9.880525718954423e-07, "loss": 0.0533, "step": 66190 }, { "epoch": 0.7073027405310113, "grad_norm": 0.09036597609519958, "learning_rate": 9.88048920814167e-07, "loss": 0.0274, "step": 66200 }, { "epoch": 0.7074095838452908, "grad_norm": 6.362185478210449, "learning_rate": 9.880452691818473e-07, "loss": 0.1115, "step": 66210 }, { "epoch": 0.7075164271595705, "grad_norm": 6.600462913513184, "learning_rate": 9.880416169984878e-07, "loss": 0.0396, "step": 66220 }, { "epoch": 0.7076232704738501, "grad_norm": 0.8371544480323792, "learning_rate": 9.880379642640926e-07, "loss": 0.037, "step": 66230 }, { "epoch": 0.7077301137881297, "grad_norm": 13.893668174743652, "learning_rate": 9.880343109786657e-07, "loss": 0.0811, "step": 66240 }, { "epoch": 0.7078369571024093, "grad_norm": 5.562362194061279, "learning_rate": 9.88030657142211e-07, "loss": 0.0507, "step": 66250 }, { "epoch": 0.7079438004166889, "grad_norm": 1.869838833808899, "learning_rate": 9.88027002754733e-07, "loss": 0.0357, "step": 66260 }, { "epoch": 0.7080506437309685, "grad_norm": 2.7960827350616455, "learning_rate": 9.880233478162359e-07, "loss": 0.0275, "step": 66270 }, { "epoch": 0.7081574870452482, "grad_norm": 4.587303638458252, "learning_rate": 9.880196923267232e-07, "loss": 0.0422, "step": 66280 }, { "epoch": 0.7082643303595277, "grad_norm": 4.575030326843262, "learning_rate": 9.880160362861998e-07, "loss": 0.0346, "step": 66290 }, { "epoch": 0.7083711736738073, "grad_norm": 0.38821160793304443, "learning_rate": 9.880123796946693e-07, "loss": 0.0163, "step": 66300 }, { "epoch": 0.708478016988087, "grad_norm": 3.599684953689575, "learning_rate": 9.880087225521359e-07, "loss": 0.0745, "step": 66310 }, { "epoch": 0.7085848603023666, "grad_norm": 1.8108335733413696, "learning_rate": 9.88005064858604e-07, "loss": 0.0555, "step": 66320 }, { "epoch": 0.7086917036166462, "grad_norm": 6.799755573272705, "learning_rate": 9.880014066140774e-07, "loss": 0.0311, "step": 66330 }, { "epoch": 0.7087985469309258, "grad_norm": 4.645906925201416, "learning_rate": 9.879977478185605e-07, "loss": 0.0331, "step": 66340 }, { "epoch": 0.7089053902452054, "grad_norm": 3.7994325160980225, "learning_rate": 9.879940884720572e-07, "loss": 0.0663, "step": 66350 }, { "epoch": 0.7090122335594851, "grad_norm": 3.096924304962158, "learning_rate": 9.879904285745718e-07, "loss": 0.0551, "step": 66360 }, { "epoch": 0.7091190768737646, "grad_norm": 8.442614555358887, "learning_rate": 9.879867681261084e-07, "loss": 0.0627, "step": 66370 }, { "epoch": 0.7092259201880442, "grad_norm": 7.7235188484191895, "learning_rate": 9.87983107126671e-07, "loss": 0.0327, "step": 66380 }, { "epoch": 0.7093327635023239, "grad_norm": 4.464550495147705, "learning_rate": 9.87979445576264e-07, "loss": 0.0861, "step": 66390 }, { "epoch": 0.7094396068166035, "grad_norm": 7.849057197570801, "learning_rate": 9.879757834748912e-07, "loss": 0.0614, "step": 66400 }, { "epoch": 0.709546450130883, "grad_norm": 0.05119980126619339, "learning_rate": 9.87972120822557e-07, "loss": 0.0512, "step": 66410 }, { "epoch": 0.7096532934451627, "grad_norm": 7.294569969177246, "learning_rate": 9.879684576192655e-07, "loss": 0.0472, "step": 66420 }, { "epoch": 0.7097601367594423, "grad_norm": 4.1024322509765625, "learning_rate": 9.879647938650207e-07, "loss": 0.0238, "step": 66430 }, { "epoch": 0.7098669800737218, "grad_norm": 2.0595877170562744, "learning_rate": 9.87961129559827e-07, "loss": 0.0405, "step": 66440 }, { "epoch": 0.7099738233880015, "grad_norm": 1.1606311798095703, "learning_rate": 9.87957464703688e-07, "loss": 0.0483, "step": 66450 }, { "epoch": 0.7100806667022811, "grad_norm": 14.674859046936035, "learning_rate": 9.879537992966084e-07, "loss": 0.1244, "step": 66460 }, { "epoch": 0.7101875100165607, "grad_norm": 3.9762179851531982, "learning_rate": 9.879501333385923e-07, "loss": 0.0689, "step": 66470 }, { "epoch": 0.7102943533308403, "grad_norm": 4.253082275390625, "learning_rate": 9.879464668296435e-07, "loss": 0.0384, "step": 66480 }, { "epoch": 0.7104011966451199, "grad_norm": 10.9423246383667, "learning_rate": 9.879427997697662e-07, "loss": 0.0505, "step": 66490 }, { "epoch": 0.7105080399593996, "grad_norm": 7.348918437957764, "learning_rate": 9.879391321589648e-07, "loss": 0.0698, "step": 66500 }, { "epoch": 0.7106148832736792, "grad_norm": 8.218724250793457, "learning_rate": 9.879354639972433e-07, "loss": 0.0467, "step": 66510 }, { "epoch": 0.7107217265879587, "grad_norm": 6.1545329093933105, "learning_rate": 9.879317952846057e-07, "loss": 0.0311, "step": 66520 }, { "epoch": 0.7108285699022384, "grad_norm": 2.1190133094787598, "learning_rate": 9.879281260210567e-07, "loss": 0.0579, "step": 66530 }, { "epoch": 0.710935413216518, "grad_norm": 3.7635233402252197, "learning_rate": 9.879244562065997e-07, "loss": 0.0778, "step": 66540 }, { "epoch": 0.7110422565307976, "grad_norm": 0.1473037451505661, "learning_rate": 9.87920785841239e-07, "loss": 0.0309, "step": 66550 }, { "epoch": 0.7111490998450772, "grad_norm": 11.231466293334961, "learning_rate": 9.879171149249792e-07, "loss": 0.0807, "step": 66560 }, { "epoch": 0.7112559431593568, "grad_norm": 2.1699063777923584, "learning_rate": 9.87913443457824e-07, "loss": 0.0374, "step": 66570 }, { "epoch": 0.7113627864736364, "grad_norm": 7.879241466522217, "learning_rate": 9.879097714397777e-07, "loss": 0.1215, "step": 66580 }, { "epoch": 0.7114696297879161, "grad_norm": 0.06675680726766586, "learning_rate": 9.879060988708445e-07, "loss": 0.0236, "step": 66590 }, { "epoch": 0.7115764731021956, "grad_norm": 3.003157138824463, "learning_rate": 9.879024257510282e-07, "loss": 0.029, "step": 66600 }, { "epoch": 0.7116833164164752, "grad_norm": 4.29885196685791, "learning_rate": 9.878987520803337e-07, "loss": 0.0602, "step": 66610 }, { "epoch": 0.7117901597307549, "grad_norm": 3.652623414993286, "learning_rate": 9.878950778587645e-07, "loss": 0.0558, "step": 66620 }, { "epoch": 0.7118970030450344, "grad_norm": 3.3855206966400146, "learning_rate": 9.878914030863249e-07, "loss": 0.0333, "step": 66630 }, { "epoch": 0.712003846359314, "grad_norm": 0.7732223272323608, "learning_rate": 9.87887727763019e-07, "loss": 0.0504, "step": 66640 }, { "epoch": 0.7121106896735937, "grad_norm": 0.362918496131897, "learning_rate": 9.878840518888511e-07, "loss": 0.0498, "step": 66650 }, { "epoch": 0.7122175329878733, "grad_norm": 4.155614376068115, "learning_rate": 9.878803754638255e-07, "loss": 0.1257, "step": 66660 }, { "epoch": 0.7123243763021528, "grad_norm": 1.5703285932540894, "learning_rate": 9.878766984879459e-07, "loss": 0.0872, "step": 66670 }, { "epoch": 0.7124312196164325, "grad_norm": 0.1500519961118698, "learning_rate": 9.878730209612167e-07, "loss": 0.0772, "step": 66680 }, { "epoch": 0.7125380629307121, "grad_norm": 8.25706672668457, "learning_rate": 9.87869342883642e-07, "loss": 0.0305, "step": 66690 }, { "epoch": 0.7126449062449918, "grad_norm": 2.0939371585845947, "learning_rate": 9.878656642552261e-07, "loss": 0.0372, "step": 66700 }, { "epoch": 0.7127517495592713, "grad_norm": 2.7694313526153564, "learning_rate": 9.87861985075973e-07, "loss": 0.037, "step": 66710 }, { "epoch": 0.7128585928735509, "grad_norm": 6.374475002288818, "learning_rate": 9.87858305345887e-07, "loss": 0.0229, "step": 66720 }, { "epoch": 0.7129654361878306, "grad_norm": 17.03853416442871, "learning_rate": 9.87854625064972e-07, "loss": 0.0769, "step": 66730 }, { "epoch": 0.7130722795021102, "grad_norm": 5.175872802734375, "learning_rate": 9.878509442332322e-07, "loss": 0.0834, "step": 66740 }, { "epoch": 0.7131791228163897, "grad_norm": 0.4016783833503723, "learning_rate": 9.878472628506722e-07, "loss": 0.1096, "step": 66750 }, { "epoch": 0.7132859661306694, "grad_norm": 12.020563125610352, "learning_rate": 9.878435809172956e-07, "loss": 0.0538, "step": 66760 }, { "epoch": 0.713392809444949, "grad_norm": 7.058763027191162, "learning_rate": 9.87839898433107e-07, "loss": 0.0612, "step": 66770 }, { "epoch": 0.7134996527592286, "grad_norm": 3.5097434520721436, "learning_rate": 9.878362153981102e-07, "loss": 0.0483, "step": 66780 }, { "epoch": 0.7136064960735082, "grad_norm": 2.5915839672088623, "learning_rate": 9.878325318123093e-07, "loss": 0.0419, "step": 66790 }, { "epoch": 0.7137133393877878, "grad_norm": 3.98883056640625, "learning_rate": 9.87828847675709e-07, "loss": 0.0633, "step": 66800 }, { "epoch": 0.7138201827020674, "grad_norm": 13.416006088256836, "learning_rate": 9.878251629883129e-07, "loss": 0.0859, "step": 66810 }, { "epoch": 0.713927026016347, "grad_norm": 2.9716436862945557, "learning_rate": 9.878214777501252e-07, "loss": 0.032, "step": 66820 }, { "epoch": 0.7140338693306266, "grad_norm": 8.186772346496582, "learning_rate": 9.878177919611505e-07, "loss": 0.1067, "step": 66830 }, { "epoch": 0.7141407126449062, "grad_norm": 4.650566577911377, "learning_rate": 9.878141056213926e-07, "loss": 0.1104, "step": 66840 }, { "epoch": 0.7142475559591859, "grad_norm": 3.5464351177215576, "learning_rate": 9.878104187308558e-07, "loss": 0.0399, "step": 66850 }, { "epoch": 0.7143543992734654, "grad_norm": 12.494811058044434, "learning_rate": 9.87806731289544e-07, "loss": 0.0097, "step": 66860 }, { "epoch": 0.7144612425877451, "grad_norm": 5.186887264251709, "learning_rate": 9.878030432974621e-07, "loss": 0.0478, "step": 66870 }, { "epoch": 0.7145680859020247, "grad_norm": 8.10091495513916, "learning_rate": 9.877993547546133e-07, "loss": 0.0373, "step": 66880 }, { "epoch": 0.7146749292163043, "grad_norm": 1.6013859510421753, "learning_rate": 9.877956656610024e-07, "loss": 0.0417, "step": 66890 }, { "epoch": 0.714781772530584, "grad_norm": 0.614139974117279, "learning_rate": 9.877919760166334e-07, "loss": 0.0439, "step": 66900 }, { "epoch": 0.7148886158448635, "grad_norm": 10.17529582977295, "learning_rate": 9.877882858215102e-07, "loss": 0.0749, "step": 66910 }, { "epoch": 0.7149954591591431, "grad_norm": 3.9008071422576904, "learning_rate": 9.877845950756375e-07, "loss": 0.0612, "step": 66920 }, { "epoch": 0.7151023024734228, "grad_norm": 0.3407957851886749, "learning_rate": 9.87780903779019e-07, "loss": 0.0388, "step": 66930 }, { "epoch": 0.7152091457877023, "grad_norm": 8.511664390563965, "learning_rate": 9.877772119316593e-07, "loss": 0.0641, "step": 66940 }, { "epoch": 0.7153159891019819, "grad_norm": 4.1456780433654785, "learning_rate": 9.87773519533562e-07, "loss": 0.0671, "step": 66950 }, { "epoch": 0.7154228324162616, "grad_norm": 3.7645788192749023, "learning_rate": 9.877698265847315e-07, "loss": 0.0489, "step": 66960 }, { "epoch": 0.7155296757305412, "grad_norm": 4.161352634429932, "learning_rate": 9.877661330851725e-07, "loss": 0.0586, "step": 66970 }, { "epoch": 0.7156365190448207, "grad_norm": 3.156970500946045, "learning_rate": 9.877624390348883e-07, "loss": 0.023, "step": 66980 }, { "epoch": 0.7157433623591004, "grad_norm": 2.731487274169922, "learning_rate": 9.877587444338837e-07, "loss": 0.0424, "step": 66990 }, { "epoch": 0.71585020567338, "grad_norm": 18.396604537963867, "learning_rate": 9.877550492821628e-07, "loss": 0.0898, "step": 67000 }, { "epoch": 0.7159570489876596, "grad_norm": 0.18012486398220062, "learning_rate": 9.877513535797294e-07, "loss": 0.0207, "step": 67010 }, { "epoch": 0.7160638923019392, "grad_norm": 0.4237319529056549, "learning_rate": 9.87747657326588e-07, "loss": 0.0675, "step": 67020 }, { "epoch": 0.7161707356162188, "grad_norm": 0.27867674827575684, "learning_rate": 9.877439605227427e-07, "loss": 0.0981, "step": 67030 }, { "epoch": 0.7162775789304984, "grad_norm": 11.198986053466797, "learning_rate": 9.877402631681974e-07, "loss": 0.0446, "step": 67040 }, { "epoch": 0.716384422244778, "grad_norm": 16.16998863220215, "learning_rate": 9.87736565262957e-07, "loss": 0.0541, "step": 67050 }, { "epoch": 0.7164912655590576, "grad_norm": 6.26210880279541, "learning_rate": 9.87732866807025e-07, "loss": 0.0567, "step": 67060 }, { "epoch": 0.7165981088733373, "grad_norm": 1.8319627046585083, "learning_rate": 9.877291678004056e-07, "loss": 0.0468, "step": 67070 }, { "epoch": 0.7167049521876169, "grad_norm": 1.4350643157958984, "learning_rate": 9.877254682431033e-07, "loss": 0.0139, "step": 67080 }, { "epoch": 0.7168117955018964, "grad_norm": 6.6121826171875, "learning_rate": 9.877217681351222e-07, "loss": 0.072, "step": 67090 }, { "epoch": 0.7169186388161761, "grad_norm": 16.826093673706055, "learning_rate": 9.877180674764663e-07, "loss": 0.1169, "step": 67100 }, { "epoch": 0.7170254821304557, "grad_norm": 1.2345410585403442, "learning_rate": 9.8771436626714e-07, "loss": 0.0572, "step": 67110 }, { "epoch": 0.7171323254447353, "grad_norm": 12.225398063659668, "learning_rate": 9.877106645071472e-07, "loss": 0.0958, "step": 67120 }, { "epoch": 0.717239168759015, "grad_norm": 4.281678676605225, "learning_rate": 9.877069621964923e-07, "loss": 0.0686, "step": 67130 }, { "epoch": 0.7173460120732945, "grad_norm": 6.111149787902832, "learning_rate": 9.877032593351796e-07, "loss": 0.0611, "step": 67140 }, { "epoch": 0.7174528553875741, "grad_norm": 2.484851121902466, "learning_rate": 9.87699555923213e-07, "loss": 0.095, "step": 67150 }, { "epoch": 0.7175596987018538, "grad_norm": 13.725628852844238, "learning_rate": 9.87695851960597e-07, "loss": 0.0593, "step": 67160 }, { "epoch": 0.7176665420161333, "grad_norm": 0.8816632628440857, "learning_rate": 9.876921474473351e-07, "loss": 0.0402, "step": 67170 }, { "epoch": 0.7177733853304129, "grad_norm": 0.13150231540203094, "learning_rate": 9.876884423834324e-07, "loss": 0.0187, "step": 67180 }, { "epoch": 0.7178802286446926, "grad_norm": 2.1085474491119385, "learning_rate": 9.876847367688925e-07, "loss": 0.0471, "step": 67190 }, { "epoch": 0.7179870719589722, "grad_norm": 6.226451873779297, "learning_rate": 9.876810306037194e-07, "loss": 0.0393, "step": 67200 }, { "epoch": 0.7180939152732517, "grad_norm": 0.29637861251831055, "learning_rate": 9.87677323887918e-07, "loss": 0.0291, "step": 67210 }, { "epoch": 0.7182007585875314, "grad_norm": 4.594784259796143, "learning_rate": 9.87673616621492e-07, "loss": 0.047, "step": 67220 }, { "epoch": 0.718307601901811, "grad_norm": 1.7557132244110107, "learning_rate": 9.876699088044458e-07, "loss": 0.0711, "step": 67230 }, { "epoch": 0.7184144452160907, "grad_norm": 8.24785041809082, "learning_rate": 9.876662004367832e-07, "loss": 0.082, "step": 67240 }, { "epoch": 0.7185212885303702, "grad_norm": 2.1525418758392334, "learning_rate": 9.87662491518509e-07, "loss": 0.0313, "step": 67250 }, { "epoch": 0.7186281318446498, "grad_norm": 3.2032032012939453, "learning_rate": 9.876587820496269e-07, "loss": 0.03, "step": 67260 }, { "epoch": 0.7187349751589295, "grad_norm": 4.474038600921631, "learning_rate": 9.87655072030141e-07, "loss": 0.0534, "step": 67270 }, { "epoch": 0.718841818473209, "grad_norm": 2.932219982147217, "learning_rate": 9.87651361460056e-07, "loss": 0.0667, "step": 67280 }, { "epoch": 0.7189486617874886, "grad_norm": 2.1686315536499023, "learning_rate": 9.876476503393757e-07, "loss": 0.0545, "step": 67290 }, { "epoch": 0.7190555051017683, "grad_norm": 3.2784650325775146, "learning_rate": 9.876439386681044e-07, "loss": 0.0493, "step": 67300 }, { "epoch": 0.7191623484160479, "grad_norm": 0.03175608068704605, "learning_rate": 9.876402264462463e-07, "loss": 0.041, "step": 67310 }, { "epoch": 0.7192691917303274, "grad_norm": 1.5508118867874146, "learning_rate": 9.876365136738055e-07, "loss": 0.0782, "step": 67320 }, { "epoch": 0.7193760350446071, "grad_norm": 0.10470955818891525, "learning_rate": 9.876328003507863e-07, "loss": 0.0446, "step": 67330 }, { "epoch": 0.7194828783588867, "grad_norm": 1.174425482749939, "learning_rate": 9.87629086477193e-07, "loss": 0.0555, "step": 67340 }, { "epoch": 0.7195897216731663, "grad_norm": 2.1516823768615723, "learning_rate": 9.876253720530296e-07, "loss": 0.0721, "step": 67350 }, { "epoch": 0.7196965649874459, "grad_norm": 2.994216203689575, "learning_rate": 9.876216570783003e-07, "loss": 0.0561, "step": 67360 }, { "epoch": 0.7198034083017255, "grad_norm": 0.5575035810470581, "learning_rate": 9.876179415530092e-07, "loss": 0.0409, "step": 67370 }, { "epoch": 0.7199102516160051, "grad_norm": 8.203546524047852, "learning_rate": 9.876142254771609e-07, "loss": 0.097, "step": 67380 }, { "epoch": 0.7200170949302848, "grad_norm": 0.03824484720826149, "learning_rate": 9.876105088507593e-07, "loss": 0.0703, "step": 67390 }, { "epoch": 0.7201239382445643, "grad_norm": 2.992733955383301, "learning_rate": 9.876067916738086e-07, "loss": 0.038, "step": 67400 }, { "epoch": 0.7202307815588439, "grad_norm": 4.8779168128967285, "learning_rate": 9.87603073946313e-07, "loss": 0.0566, "step": 67410 }, { "epoch": 0.7203376248731236, "grad_norm": NaN, "learning_rate": 9.875993556682768e-07, "loss": 0.1338, "step": 67420 }, { "epoch": 0.7204444681874032, "grad_norm": 7.4405598640441895, "learning_rate": 9.875956368397041e-07, "loss": 0.0266, "step": 67430 }, { "epoch": 0.7205513115016828, "grad_norm": 9.209036827087402, "learning_rate": 9.875919174605988e-07, "loss": 0.1035, "step": 67440 }, { "epoch": 0.7206581548159624, "grad_norm": 0.11320623755455017, "learning_rate": 9.87588197530966e-07, "loss": 0.0119, "step": 67450 }, { "epoch": 0.720764998130242, "grad_norm": 6.473723888397217, "learning_rate": 9.875844770508088e-07, "loss": 0.0243, "step": 67460 }, { "epoch": 0.7208718414445217, "grad_norm": 20.1607723236084, "learning_rate": 9.875807560201324e-07, "loss": 0.0796, "step": 67470 }, { "epoch": 0.7209786847588012, "grad_norm": 0.028230441734194756, "learning_rate": 9.875770344389403e-07, "loss": 0.05, "step": 67480 }, { "epoch": 0.7210855280730808, "grad_norm": 0.5665192604064941, "learning_rate": 9.875733123072367e-07, "loss": 0.0163, "step": 67490 }, { "epoch": 0.7211923713873605, "grad_norm": 6.767659664154053, "learning_rate": 9.875695896250264e-07, "loss": 0.0579, "step": 67500 }, { "epoch": 0.72129921470164, "grad_norm": 1.8435792922973633, "learning_rate": 9.87565866392313e-07, "loss": 0.044, "step": 67510 }, { "epoch": 0.7214060580159196, "grad_norm": 0.5979946255683899, "learning_rate": 9.87562142609101e-07, "loss": 0.0382, "step": 67520 }, { "epoch": 0.7215129013301993, "grad_norm": 0.12863564491271973, "learning_rate": 9.875584182753946e-07, "loss": 0.1028, "step": 67530 }, { "epoch": 0.7216197446444789, "grad_norm": 2.808441162109375, "learning_rate": 9.87554693391198e-07, "loss": 0.1139, "step": 67540 }, { "epoch": 0.7217265879587584, "grad_norm": 0.14811085164546967, "learning_rate": 9.875509679565153e-07, "loss": 0.046, "step": 67550 }, { "epoch": 0.7218334312730381, "grad_norm": 18.20289421081543, "learning_rate": 9.875472419713507e-07, "loss": 0.0535, "step": 67560 }, { "epoch": 0.7219402745873177, "grad_norm": 3.0722320079803467, "learning_rate": 9.875435154357085e-07, "loss": 0.052, "step": 67570 }, { "epoch": 0.7220471179015973, "grad_norm": 1.110822081565857, "learning_rate": 9.87539788349593e-07, "loss": 0.086, "step": 67580 }, { "epoch": 0.7221539612158769, "grad_norm": 6.227743625640869, "learning_rate": 9.87536060713008e-07, "loss": 0.0806, "step": 67590 }, { "epoch": 0.7222608045301565, "grad_norm": 5.305145740509033, "learning_rate": 9.875323325259582e-07, "loss": 0.106, "step": 67600 }, { "epoch": 0.7223676478444362, "grad_norm": 1.9558308124542236, "learning_rate": 9.875286037884475e-07, "loss": 0.0219, "step": 67610 }, { "epoch": 0.7224744911587158, "grad_norm": 2.3787200450897217, "learning_rate": 9.875248745004803e-07, "loss": 0.0428, "step": 67620 }, { "epoch": 0.7225813344729953, "grad_norm": 1.069076657295227, "learning_rate": 9.875211446620606e-07, "loss": 0.0472, "step": 67630 }, { "epoch": 0.722688177787275, "grad_norm": 4.784438610076904, "learning_rate": 9.875174142731928e-07, "loss": 0.0395, "step": 67640 }, { "epoch": 0.7227950211015546, "grad_norm": 10.724990844726562, "learning_rate": 9.875136833338812e-07, "loss": 0.0399, "step": 67650 }, { "epoch": 0.7229018644158342, "grad_norm": 5.052735328674316, "learning_rate": 9.875099518441296e-07, "loss": 0.0447, "step": 67660 }, { "epoch": 0.7230087077301138, "grad_norm": 2.0714564323425293, "learning_rate": 9.875062198039426e-07, "loss": 0.0985, "step": 67670 }, { "epoch": 0.7231155510443934, "grad_norm": 3.938307046890259, "learning_rate": 9.875024872133243e-07, "loss": 0.0366, "step": 67680 }, { "epoch": 0.723222394358673, "grad_norm": 3.6341731548309326, "learning_rate": 9.874987540722788e-07, "loss": 0.0567, "step": 67690 }, { "epoch": 0.7233292376729527, "grad_norm": 4.837507247924805, "learning_rate": 9.874950203808106e-07, "loss": 0.0691, "step": 67700 }, { "epoch": 0.7234360809872322, "grad_norm": 4.61895751953125, "learning_rate": 9.874912861389236e-07, "loss": 0.0484, "step": 67710 }, { "epoch": 0.7235429243015118, "grad_norm": 8.749780654907227, "learning_rate": 9.87487551346622e-07, "loss": 0.0614, "step": 67720 }, { "epoch": 0.7236497676157915, "grad_norm": 6.893030643463135, "learning_rate": 9.874838160039104e-07, "loss": 0.0431, "step": 67730 }, { "epoch": 0.723756610930071, "grad_norm": 4.921199798583984, "learning_rate": 9.874800801107926e-07, "loss": 0.1657, "step": 67740 }, { "epoch": 0.7238634542443506, "grad_norm": 16.272098541259766, "learning_rate": 9.874763436672732e-07, "loss": 0.06, "step": 67750 }, { "epoch": 0.7239702975586303, "grad_norm": 4.831850528717041, "learning_rate": 9.87472606673356e-07, "loss": 0.0496, "step": 67760 }, { "epoch": 0.7240771408729099, "grad_norm": 0.23886021971702576, "learning_rate": 9.874688691290456e-07, "loss": 0.0209, "step": 67770 }, { "epoch": 0.7241839841871894, "grad_norm": 18.940641403198242, "learning_rate": 9.87465131034346e-07, "loss": 0.0581, "step": 67780 }, { "epoch": 0.7242908275014691, "grad_norm": 5.5022454261779785, "learning_rate": 9.874613923892616e-07, "loss": 0.0926, "step": 67790 }, { "epoch": 0.7243976708157487, "grad_norm": 0.76689612865448, "learning_rate": 9.874576531937964e-07, "loss": 0.0567, "step": 67800 }, { "epoch": 0.7245045141300284, "grad_norm": 2.1472859382629395, "learning_rate": 9.874539134479545e-07, "loss": 0.0561, "step": 67810 }, { "epoch": 0.7246113574443079, "grad_norm": 6.884981632232666, "learning_rate": 9.874501731517407e-07, "loss": 0.0999, "step": 67820 }, { "epoch": 0.7247182007585875, "grad_norm": 4.509773254394531, "learning_rate": 9.874464323051586e-07, "loss": 0.0703, "step": 67830 }, { "epoch": 0.7248250440728672, "grad_norm": 0.27087312936782837, "learning_rate": 9.874426909082128e-07, "loss": 0.0266, "step": 67840 }, { "epoch": 0.7249318873871468, "grad_norm": 1.9688668251037598, "learning_rate": 9.874389489609073e-07, "loss": 0.0369, "step": 67850 }, { "epoch": 0.7250387307014263, "grad_norm": 2.3417418003082275, "learning_rate": 9.874352064632466e-07, "loss": 0.0372, "step": 67860 }, { "epoch": 0.725145574015706, "grad_norm": 3.3093483448028564, "learning_rate": 9.874314634152348e-07, "loss": 0.0464, "step": 67870 }, { "epoch": 0.7252524173299856, "grad_norm": 1.5092369318008423, "learning_rate": 9.87427719816876e-07, "loss": 0.0327, "step": 67880 }, { "epoch": 0.7253592606442651, "grad_norm": 0.059662654995918274, "learning_rate": 9.874239756681744e-07, "loss": 0.0723, "step": 67890 }, { "epoch": 0.7254661039585448, "grad_norm": 4.11098051071167, "learning_rate": 9.874202309691344e-07, "loss": 0.0711, "step": 67900 }, { "epoch": 0.7255729472728244, "grad_norm": 6.215399265289307, "learning_rate": 9.874164857197604e-07, "loss": 0.0572, "step": 67910 }, { "epoch": 0.725679790587104, "grad_norm": 3.7050929069519043, "learning_rate": 9.874127399200563e-07, "loss": 0.0809, "step": 67920 }, { "epoch": 0.7257866339013836, "grad_norm": 3.6250834465026855, "learning_rate": 9.874089935700263e-07, "loss": 0.0233, "step": 67930 }, { "epoch": 0.7258934772156632, "grad_norm": 4.829389572143555, "learning_rate": 9.874052466696747e-07, "loss": 0.0982, "step": 67940 }, { "epoch": 0.7260003205299428, "grad_norm": 1.766709804534912, "learning_rate": 9.87401499219006e-07, "loss": 0.0383, "step": 67950 }, { "epoch": 0.7261071638442225, "grad_norm": 2.5578596591949463, "learning_rate": 9.87397751218024e-07, "loss": 0.0243, "step": 67960 }, { "epoch": 0.726214007158502, "grad_norm": 4.280163764953613, "learning_rate": 9.873940026667334e-07, "loss": 0.0453, "step": 67970 }, { "epoch": 0.7263208504727817, "grad_norm": 3.491286277770996, "learning_rate": 9.87390253565138e-07, "loss": 0.0238, "step": 67980 }, { "epoch": 0.7264276937870613, "grad_norm": 0.0802760124206543, "learning_rate": 9.873865039132423e-07, "loss": 0.0616, "step": 67990 }, { "epoch": 0.7265345371013409, "grad_norm": 5.10016393661499, "learning_rate": 9.873827537110506e-07, "loss": 0.0288, "step": 68000 }, { "epoch": 0.7266413804156205, "grad_norm": 12.019126892089844, "learning_rate": 9.873790029585668e-07, "loss": 0.1112, "step": 68010 }, { "epoch": 0.7267482237299001, "grad_norm": 0.044656261801719666, "learning_rate": 9.873752516557953e-07, "loss": 0.0469, "step": 68020 }, { "epoch": 0.7268550670441797, "grad_norm": 8.280713081359863, "learning_rate": 9.873714998027404e-07, "loss": 0.1035, "step": 68030 }, { "epoch": 0.7269619103584594, "grad_norm": 4.224981307983398, "learning_rate": 9.873677473994062e-07, "loss": 0.0648, "step": 68040 }, { "epoch": 0.7270687536727389, "grad_norm": 2.903017282485962, "learning_rate": 9.873639944457972e-07, "loss": 0.0391, "step": 68050 }, { "epoch": 0.7271755969870185, "grad_norm": 0.1465601772069931, "learning_rate": 9.873602409419174e-07, "loss": 0.0795, "step": 68060 }, { "epoch": 0.7272824403012982, "grad_norm": 0.11508607864379883, "learning_rate": 9.87356486887771e-07, "loss": 0.052, "step": 68070 }, { "epoch": 0.7273892836155778, "grad_norm": 5.504039764404297, "learning_rate": 9.873527322833625e-07, "loss": 0.0429, "step": 68080 }, { "epoch": 0.7274961269298573, "grad_norm": 1.0644526481628418, "learning_rate": 9.87348977128696e-07, "loss": 0.0239, "step": 68090 }, { "epoch": 0.727602970244137, "grad_norm": 5.7104105949401855, "learning_rate": 9.873452214237755e-07, "loss": 0.0262, "step": 68100 }, { "epoch": 0.7277098135584166, "grad_norm": 22.06399154663086, "learning_rate": 9.873414651686057e-07, "loss": 0.1564, "step": 68110 }, { "epoch": 0.7278166568726961, "grad_norm": 4.377931118011475, "learning_rate": 9.873377083631906e-07, "loss": 0.0421, "step": 68120 }, { "epoch": 0.7279235001869758, "grad_norm": 4.7257399559021, "learning_rate": 9.873339510075342e-07, "loss": 0.0473, "step": 68130 }, { "epoch": 0.7280303435012554, "grad_norm": 3.8383073806762695, "learning_rate": 9.873301931016414e-07, "loss": 0.0411, "step": 68140 }, { "epoch": 0.728137186815535, "grad_norm": 5.938319683074951, "learning_rate": 9.873264346455158e-07, "loss": 0.0259, "step": 68150 }, { "epoch": 0.7282440301298146, "grad_norm": 2.3248634338378906, "learning_rate": 9.873226756391618e-07, "loss": 0.0426, "step": 68160 }, { "epoch": 0.7283508734440942, "grad_norm": 8.419249534606934, "learning_rate": 9.873189160825837e-07, "loss": 0.0691, "step": 68170 }, { "epoch": 0.7284577167583739, "grad_norm": 5.778326511383057, "learning_rate": 9.873151559757858e-07, "loss": 0.0303, "step": 68180 }, { "epoch": 0.7285645600726535, "grad_norm": 1.1586179733276367, "learning_rate": 9.873113953187724e-07, "loss": 0.0456, "step": 68190 }, { "epoch": 0.728671403386933, "grad_norm": 0.7752490043640137, "learning_rate": 9.873076341115477e-07, "loss": 0.0228, "step": 68200 }, { "epoch": 0.7287782467012127, "grad_norm": 2.6907877922058105, "learning_rate": 9.873038723541158e-07, "loss": 0.0317, "step": 68210 }, { "epoch": 0.7288850900154923, "grad_norm": 6.153522491455078, "learning_rate": 9.87300110046481e-07, "loss": 0.0187, "step": 68220 }, { "epoch": 0.7289919333297719, "grad_norm": 1.035781741142273, "learning_rate": 9.872963471886476e-07, "loss": 0.0332, "step": 68230 }, { "epoch": 0.7290987766440515, "grad_norm": 0.25770822167396545, "learning_rate": 9.8729258378062e-07, "loss": 0.0536, "step": 68240 }, { "epoch": 0.7292056199583311, "grad_norm": 8.816206932067871, "learning_rate": 9.872888198224021e-07, "loss": 0.0513, "step": 68250 }, { "epoch": 0.7293124632726107, "grad_norm": 4.148745536804199, "learning_rate": 9.872850553139985e-07, "loss": 0.0692, "step": 68260 }, { "epoch": 0.7294193065868904, "grad_norm": 4.09399938583374, "learning_rate": 9.872812902554132e-07, "loss": 0.053, "step": 68270 }, { "epoch": 0.7295261499011699, "grad_norm": 0.46647128462791443, "learning_rate": 9.872775246466506e-07, "loss": 0.0241, "step": 68280 }, { "epoch": 0.7296329932154495, "grad_norm": 2.882601499557495, "learning_rate": 9.87273758487715e-07, "loss": 0.0999, "step": 68290 }, { "epoch": 0.7297398365297292, "grad_norm": 1.1965898275375366, "learning_rate": 9.872699917786102e-07, "loss": 0.0369, "step": 68300 }, { "epoch": 0.7298466798440087, "grad_norm": 9.856890678405762, "learning_rate": 9.872662245193412e-07, "loss": 0.0759, "step": 68310 }, { "epoch": 0.7299535231582883, "grad_norm": 2.523669958114624, "learning_rate": 9.872624567099117e-07, "loss": 0.0544, "step": 68320 }, { "epoch": 0.730060366472568, "grad_norm": 3.187988758087158, "learning_rate": 9.87258688350326e-07, "loss": 0.074, "step": 68330 }, { "epoch": 0.7301672097868476, "grad_norm": 0.05406424030661583, "learning_rate": 9.872549194405885e-07, "loss": 0.0263, "step": 68340 }, { "epoch": 0.7302740531011273, "grad_norm": 7.0485005378723145, "learning_rate": 9.872511499807036e-07, "loss": 0.0483, "step": 68350 }, { "epoch": 0.7303808964154068, "grad_norm": 2.3926241397857666, "learning_rate": 9.872473799706753e-07, "loss": 0.081, "step": 68360 }, { "epoch": 0.7304877397296864, "grad_norm": 0.9974356889724731, "learning_rate": 9.87243609410508e-07, "loss": 0.0805, "step": 68370 }, { "epoch": 0.7305945830439661, "grad_norm": 40.1805534362793, "learning_rate": 9.872398383002058e-07, "loss": 0.0735, "step": 68380 }, { "epoch": 0.7307014263582456, "grad_norm": 5.477231025695801, "learning_rate": 9.872360666397729e-07, "loss": 0.076, "step": 68390 }, { "epoch": 0.7308082696725252, "grad_norm": 3.8139896392822266, "learning_rate": 9.87232294429214e-07, "loss": 0.1009, "step": 68400 }, { "epoch": 0.7309151129868049, "grad_norm": 4.492918968200684, "learning_rate": 9.872285216685327e-07, "loss": 0.0409, "step": 68410 }, { "epoch": 0.7310219563010845, "grad_norm": 3.797868251800537, "learning_rate": 9.87224748357734e-07, "loss": 0.0997, "step": 68420 }, { "epoch": 0.731128799615364, "grad_norm": 11.444067001342773, "learning_rate": 9.872209744968216e-07, "loss": 0.0279, "step": 68430 }, { "epoch": 0.7312356429296437, "grad_norm": 1.776390552520752, "learning_rate": 9.872172000858e-07, "loss": 0.0578, "step": 68440 }, { "epoch": 0.7313424862439233, "grad_norm": 7.3650221824646, "learning_rate": 9.872134251246732e-07, "loss": 0.0704, "step": 68450 }, { "epoch": 0.7314493295582029, "grad_norm": 0.26127469539642334, "learning_rate": 9.87209649613446e-07, "loss": 0.0486, "step": 68460 }, { "epoch": 0.7315561728724825, "grad_norm": 2.523336887359619, "learning_rate": 9.872058735521222e-07, "loss": 0.0424, "step": 68470 }, { "epoch": 0.7316630161867621, "grad_norm": 4.920791149139404, "learning_rate": 9.872020969407061e-07, "loss": 0.0268, "step": 68480 }, { "epoch": 0.7317698595010417, "grad_norm": 6.237458229064941, "learning_rate": 9.871983197792021e-07, "loss": 0.0741, "step": 68490 }, { "epoch": 0.7318767028153214, "grad_norm": 0.26810356974601746, "learning_rate": 9.871945420676146e-07, "loss": 0.0452, "step": 68500 }, { "epoch": 0.7319835461296009, "grad_norm": 3.992182493209839, "learning_rate": 9.871907638059475e-07, "loss": 0.0654, "step": 68510 }, { "epoch": 0.7320903894438805, "grad_norm": 1.2547917366027832, "learning_rate": 9.871869849942054e-07, "loss": 0.0397, "step": 68520 }, { "epoch": 0.7321972327581602, "grad_norm": 3.5880818367004395, "learning_rate": 9.871832056323925e-07, "loss": 0.0626, "step": 68530 }, { "epoch": 0.7323040760724397, "grad_norm": 6.153078079223633, "learning_rate": 9.871794257205128e-07, "loss": 0.0685, "step": 68540 }, { "epoch": 0.7324109193867194, "grad_norm": 2.3108623027801514, "learning_rate": 9.871756452585707e-07, "loss": 0.0479, "step": 68550 }, { "epoch": 0.732517762700999, "grad_norm": 0.23605401813983917, "learning_rate": 9.871718642465707e-07, "loss": 0.03, "step": 68560 }, { "epoch": 0.7326246060152786, "grad_norm": 4.029296398162842, "learning_rate": 9.871680826845169e-07, "loss": 0.0408, "step": 68570 }, { "epoch": 0.7327314493295582, "grad_norm": 0.18338213860988617, "learning_rate": 9.871643005724134e-07, "loss": 0.0609, "step": 68580 }, { "epoch": 0.7328382926438378, "grad_norm": 2.951608419418335, "learning_rate": 9.871605179102647e-07, "loss": 0.0242, "step": 68590 }, { "epoch": 0.7329451359581174, "grad_norm": 10.077783584594727, "learning_rate": 9.871567346980752e-07, "loss": 0.0542, "step": 68600 }, { "epoch": 0.7330519792723971, "grad_norm": 12.02418327331543, "learning_rate": 9.871529509358486e-07, "loss": 0.0326, "step": 68610 }, { "epoch": 0.7331588225866766, "grad_norm": 11.646150588989258, "learning_rate": 9.871491666235898e-07, "loss": 0.0395, "step": 68620 }, { "epoch": 0.7332656659009562, "grad_norm": 10.109759330749512, "learning_rate": 9.871453817613029e-07, "loss": 0.0626, "step": 68630 }, { "epoch": 0.7333725092152359, "grad_norm": 1.7750900983810425, "learning_rate": 9.87141596348992e-07, "loss": 0.1165, "step": 68640 }, { "epoch": 0.7334793525295155, "grad_norm": 0.03799916058778763, "learning_rate": 9.871378103866614e-07, "loss": 0.0397, "step": 68650 }, { "epoch": 0.733586195843795, "grad_norm": 4.763128757476807, "learning_rate": 9.871340238743156e-07, "loss": 0.0823, "step": 68660 }, { "epoch": 0.7336930391580747, "grad_norm": 1.421433448791504, "learning_rate": 9.871302368119584e-07, "loss": 0.0811, "step": 68670 }, { "epoch": 0.7337998824723543, "grad_norm": 20.183025360107422, "learning_rate": 9.871264491995948e-07, "loss": 0.0825, "step": 68680 }, { "epoch": 0.7339067257866339, "grad_norm": 0.09555816650390625, "learning_rate": 9.871226610372286e-07, "loss": 0.0567, "step": 68690 }, { "epoch": 0.7340135691009135, "grad_norm": 3.276845932006836, "learning_rate": 9.87118872324864e-07, "loss": 0.1243, "step": 68700 }, { "epoch": 0.7341204124151931, "grad_norm": 11.932962417602539, "learning_rate": 9.871150830625055e-07, "loss": 0.0526, "step": 68710 }, { "epoch": 0.7342272557294728, "grad_norm": 3.985184669494629, "learning_rate": 9.871112932501572e-07, "loss": 0.048, "step": 68720 }, { "epoch": 0.7343340990437524, "grad_norm": 0.2783907949924469, "learning_rate": 9.871075028878235e-07, "loss": 0.0387, "step": 68730 }, { "epoch": 0.7344409423580319, "grad_norm": 5.610470771789551, "learning_rate": 9.871037119755088e-07, "loss": 0.1522, "step": 68740 }, { "epoch": 0.7345477856723116, "grad_norm": 12.781237602233887, "learning_rate": 9.870999205132172e-07, "loss": 0.0252, "step": 68750 }, { "epoch": 0.7346546289865912, "grad_norm": 7.072183132171631, "learning_rate": 9.87096128500953e-07, "loss": 0.0893, "step": 68760 }, { "epoch": 0.7347614723008707, "grad_norm": 0.9605688452720642, "learning_rate": 9.870923359387205e-07, "loss": 0.0613, "step": 68770 }, { "epoch": 0.7348683156151504, "grad_norm": 1.685846209526062, "learning_rate": 9.870885428265241e-07, "loss": 0.0203, "step": 68780 }, { "epoch": 0.73497515892943, "grad_norm": 8.986141204833984, "learning_rate": 9.87084749164368e-07, "loss": 0.1382, "step": 68790 }, { "epoch": 0.7350820022437096, "grad_norm": 0.3838563561439514, "learning_rate": 9.870809549522562e-07, "loss": 0.0901, "step": 68800 }, { "epoch": 0.7351888455579892, "grad_norm": 5.525947570800781, "learning_rate": 9.870771601901932e-07, "loss": 0.0501, "step": 68810 }, { "epoch": 0.7352956888722688, "grad_norm": 0.1029856726527214, "learning_rate": 9.870733648781836e-07, "loss": 0.0713, "step": 68820 }, { "epoch": 0.7354025321865484, "grad_norm": 6.498860836029053, "learning_rate": 9.870695690162314e-07, "loss": 0.0445, "step": 68830 }, { "epoch": 0.7355093755008281, "grad_norm": 9.248024940490723, "learning_rate": 9.870657726043408e-07, "loss": 0.0471, "step": 68840 }, { "epoch": 0.7356162188151076, "grad_norm": 3.8915038108825684, "learning_rate": 9.87061975642516e-07, "loss": 0.0444, "step": 68850 }, { "epoch": 0.7357230621293872, "grad_norm": 2.755844831466675, "learning_rate": 9.87058178130762e-07, "loss": 0.0194, "step": 68860 }, { "epoch": 0.7358299054436669, "grad_norm": 8.32214641571045, "learning_rate": 9.87054380069082e-07, "loss": 0.0549, "step": 68870 }, { "epoch": 0.7359367487579465, "grad_norm": 0.016437090933322906, "learning_rate": 9.87050581457481e-07, "loss": 0.0458, "step": 68880 }, { "epoch": 0.736043592072226, "grad_norm": 2.1496219635009766, "learning_rate": 9.870467822959632e-07, "loss": 0.0113, "step": 68890 }, { "epoch": 0.7361504353865057, "grad_norm": 6.595841884613037, "learning_rate": 9.87042982584533e-07, "loss": 0.0535, "step": 68900 }, { "epoch": 0.7362572787007853, "grad_norm": 12.91238021850586, "learning_rate": 9.870391823231942e-07, "loss": 0.0721, "step": 68910 }, { "epoch": 0.736364122015065, "grad_norm": 6.172682285308838, "learning_rate": 9.870353815119517e-07, "loss": 0.0578, "step": 68920 }, { "epoch": 0.7364709653293445, "grad_norm": 0.03884636610746384, "learning_rate": 9.870315801508092e-07, "loss": 0.0715, "step": 68930 }, { "epoch": 0.7365778086436241, "grad_norm": 1.191352367401123, "learning_rate": 9.870277782397715e-07, "loss": 0.0566, "step": 68940 }, { "epoch": 0.7366846519579038, "grad_norm": 15.610954284667969, "learning_rate": 9.870239757788426e-07, "loss": 0.0476, "step": 68950 }, { "epoch": 0.7367914952721833, "grad_norm": 2.0844719409942627, "learning_rate": 9.87020172768027e-07, "loss": 0.0466, "step": 68960 }, { "epoch": 0.7368983385864629, "grad_norm": 2.1318471431732178, "learning_rate": 9.870163692073285e-07, "loss": 0.0093, "step": 68970 }, { "epoch": 0.7370051819007426, "grad_norm": 2.8454174995422363, "learning_rate": 9.870125650967521e-07, "loss": 0.0371, "step": 68980 }, { "epoch": 0.7371120252150222, "grad_norm": 5.273050308227539, "learning_rate": 9.870087604363016e-07, "loss": 0.0197, "step": 68990 }, { "epoch": 0.7372188685293017, "grad_norm": 3.191472053527832, "learning_rate": 9.870049552259815e-07, "loss": 0.0508, "step": 69000 }, { "epoch": 0.7373257118435814, "grad_norm": 1.2199307680130005, "learning_rate": 9.870011494657962e-07, "loss": 0.0872, "step": 69010 }, { "epoch": 0.737432555157861, "grad_norm": 6.776773452758789, "learning_rate": 9.869973431557497e-07, "loss": 0.0397, "step": 69020 }, { "epoch": 0.7375393984721406, "grad_norm": 0.09943712502717972, "learning_rate": 9.869935362958465e-07, "loss": 0.0378, "step": 69030 }, { "epoch": 0.7376462417864202, "grad_norm": 6.010618209838867, "learning_rate": 9.869897288860907e-07, "loss": 0.0698, "step": 69040 }, { "epoch": 0.7377530851006998, "grad_norm": 2.6866846084594727, "learning_rate": 9.869859209264868e-07, "loss": 0.0488, "step": 69050 }, { "epoch": 0.7378599284149794, "grad_norm": 3.525345802307129, "learning_rate": 9.869821124170392e-07, "loss": 0.0551, "step": 69060 }, { "epoch": 0.7379667717292591, "grad_norm": 3.6479485034942627, "learning_rate": 9.86978303357752e-07, "loss": 0.0314, "step": 69070 }, { "epoch": 0.7380736150435386, "grad_norm": 7.883652687072754, "learning_rate": 9.869744937486293e-07, "loss": 0.0594, "step": 69080 }, { "epoch": 0.7381804583578183, "grad_norm": 0.07759520411491394, "learning_rate": 9.869706835896758e-07, "loss": 0.0726, "step": 69090 }, { "epoch": 0.7382873016720979, "grad_norm": 3.0879626274108887, "learning_rate": 9.869668728808958e-07, "loss": 0.0392, "step": 69100 }, { "epoch": 0.7383941449863775, "grad_norm": 2.0907199382781982, "learning_rate": 9.869630616222933e-07, "loss": 0.0293, "step": 69110 }, { "epoch": 0.7385009883006571, "grad_norm": 2.171288013458252, "learning_rate": 9.869592498138726e-07, "loss": 0.0573, "step": 69120 }, { "epoch": 0.7386078316149367, "grad_norm": 2.3950464725494385, "learning_rate": 9.869554374556384e-07, "loss": 0.0553, "step": 69130 }, { "epoch": 0.7387146749292163, "grad_norm": 9.856226921081543, "learning_rate": 9.869516245475946e-07, "loss": 0.0942, "step": 69140 }, { "epoch": 0.738821518243496, "grad_norm": 0.24680939316749573, "learning_rate": 9.869478110897458e-07, "loss": 0.1291, "step": 69150 }, { "epoch": 0.7389283615577755, "grad_norm": 6.6500420570373535, "learning_rate": 9.86943997082096e-07, "loss": 0.0777, "step": 69160 }, { "epoch": 0.7390352048720551, "grad_norm": 5.287370681762695, "learning_rate": 9.869401825246498e-07, "loss": 0.0476, "step": 69170 }, { "epoch": 0.7391420481863348, "grad_norm": 3.216127634048462, "learning_rate": 9.869363674174114e-07, "loss": 0.0649, "step": 69180 }, { "epoch": 0.7392488915006143, "grad_norm": 10.95785140991211, "learning_rate": 9.869325517603851e-07, "loss": 0.0789, "step": 69190 }, { "epoch": 0.7393557348148939, "grad_norm": 24.192617416381836, "learning_rate": 9.869287355535752e-07, "loss": 0.0792, "step": 69200 }, { "epoch": 0.7394625781291736, "grad_norm": 7.491496562957764, "learning_rate": 9.86924918796986e-07, "loss": 0.0228, "step": 69210 }, { "epoch": 0.7395694214434532, "grad_norm": 1.974509835243225, "learning_rate": 9.869211014906216e-07, "loss": 0.0506, "step": 69220 }, { "epoch": 0.7396762647577327, "grad_norm": 0.3339357078075409, "learning_rate": 9.869172836344867e-07, "loss": 0.0297, "step": 69230 }, { "epoch": 0.7397831080720124, "grad_norm": 0.7870033979415894, "learning_rate": 9.869134652285855e-07, "loss": 0.0229, "step": 69240 }, { "epoch": 0.739889951386292, "grad_norm": 7.557244300842285, "learning_rate": 9.869096462729221e-07, "loss": 0.0737, "step": 69250 }, { "epoch": 0.7399967947005716, "grad_norm": 0.1372545063495636, "learning_rate": 9.869058267675012e-07, "loss": 0.023, "step": 69260 }, { "epoch": 0.7401036380148512, "grad_norm": 4.317192077636719, "learning_rate": 9.869020067123267e-07, "loss": 0.0219, "step": 69270 }, { "epoch": 0.7402104813291308, "grad_norm": 0.5258705019950867, "learning_rate": 9.868981861074032e-07, "loss": 0.0617, "step": 69280 }, { "epoch": 0.7403173246434105, "grad_norm": 5.118124008178711, "learning_rate": 9.868943649527345e-07, "loss": 0.0612, "step": 69290 }, { "epoch": 0.7404241679576901, "grad_norm": 5.271101474761963, "learning_rate": 9.868905432483258e-07, "loss": 0.0402, "step": 69300 }, { "epoch": 0.7405310112719696, "grad_norm": 0.09591253101825714, "learning_rate": 9.868867209941807e-07, "loss": 0.0511, "step": 69310 }, { "epoch": 0.7406378545862493, "grad_norm": 0.31106066703796387, "learning_rate": 9.868828981903038e-07, "loss": 0.0483, "step": 69320 }, { "epoch": 0.7407446979005289, "grad_norm": 5.319920063018799, "learning_rate": 9.868790748366993e-07, "loss": 0.0449, "step": 69330 }, { "epoch": 0.7408515412148085, "grad_norm": 4.5990095138549805, "learning_rate": 9.868752509333717e-07, "loss": 0.0263, "step": 69340 }, { "epoch": 0.7409583845290881, "grad_norm": 3.212219715118408, "learning_rate": 9.86871426480325e-07, "loss": 0.0566, "step": 69350 }, { "epoch": 0.7410652278433677, "grad_norm": 5.433862686157227, "learning_rate": 9.86867601477564e-07, "loss": 0.0535, "step": 69360 }, { "epoch": 0.7411720711576473, "grad_norm": 5.054100513458252, "learning_rate": 9.868637759250926e-07, "loss": 0.0477, "step": 69370 }, { "epoch": 0.741278914471927, "grad_norm": 4.7720465660095215, "learning_rate": 9.86859949822915e-07, "loss": 0.0351, "step": 69380 }, { "epoch": 0.7413857577862065, "grad_norm": 4.812330722808838, "learning_rate": 9.86856123171036e-07, "loss": 0.0585, "step": 69390 }, { "epoch": 0.7414926011004861, "grad_norm": 4.282682418823242, "learning_rate": 9.868522959694598e-07, "loss": 0.0558, "step": 69400 }, { "epoch": 0.7415994444147658, "grad_norm": 0.32877105474472046, "learning_rate": 9.868484682181905e-07, "loss": 0.0463, "step": 69410 }, { "epoch": 0.7417062877290453, "grad_norm": 3.609462022781372, "learning_rate": 9.868446399172326e-07, "loss": 0.0467, "step": 69420 }, { "epoch": 0.7418131310433249, "grad_norm": 2.522510528564453, "learning_rate": 9.868408110665903e-07, "loss": 0.0741, "step": 69430 }, { "epoch": 0.7419199743576046, "grad_norm": 0.3470630645751953, "learning_rate": 9.868369816662679e-07, "loss": 0.0455, "step": 69440 }, { "epoch": 0.7420268176718842, "grad_norm": 7.3034796714782715, "learning_rate": 9.8683315171627e-07, "loss": 0.0814, "step": 69450 }, { "epoch": 0.7421336609861638, "grad_norm": 0.8497369289398193, "learning_rate": 9.868293212166005e-07, "loss": 0.1417, "step": 69460 }, { "epoch": 0.7422405043004434, "grad_norm": 7.809314250946045, "learning_rate": 9.868254901672642e-07, "loss": 0.0491, "step": 69470 }, { "epoch": 0.742347347614723, "grad_norm": 6.109817028045654, "learning_rate": 9.86821658568265e-07, "loss": 0.0787, "step": 69480 }, { "epoch": 0.7424541909290027, "grad_norm": 3.189387321472168, "learning_rate": 9.868178264196075e-07, "loss": 0.105, "step": 69490 }, { "epoch": 0.7425610342432822, "grad_norm": 4.6180925369262695, "learning_rate": 9.868139937212958e-07, "loss": 0.0368, "step": 69500 }, { "epoch": 0.7426678775575618, "grad_norm": 1.6632741689682007, "learning_rate": 9.868101604733343e-07, "loss": 0.0677, "step": 69510 }, { "epoch": 0.7427747208718415, "grad_norm": 5.127358913421631, "learning_rate": 9.868063266757276e-07, "loss": 0.1088, "step": 69520 }, { "epoch": 0.7428815641861211, "grad_norm": 3.938817024230957, "learning_rate": 9.868024923284798e-07, "loss": 0.0407, "step": 69530 }, { "epoch": 0.7429884075004006, "grad_norm": 19.86385154724121, "learning_rate": 9.867986574315952e-07, "loss": 0.0922, "step": 69540 }, { "epoch": 0.7430952508146803, "grad_norm": 11.958419799804688, "learning_rate": 9.867948219850781e-07, "loss": 0.0707, "step": 69550 }, { "epoch": 0.7432020941289599, "grad_norm": 1.5919833183288574, "learning_rate": 9.86790985988933e-07, "loss": 0.0885, "step": 69560 }, { "epoch": 0.7433089374432394, "grad_norm": 2.2723941802978516, "learning_rate": 9.867871494431641e-07, "loss": 0.0468, "step": 69570 }, { "epoch": 0.7434157807575191, "grad_norm": 0.2590941786766052, "learning_rate": 9.867833123477757e-07, "loss": 0.0495, "step": 69580 }, { "epoch": 0.7435226240717987, "grad_norm": 14.933932304382324, "learning_rate": 9.867794747027725e-07, "loss": 0.0832, "step": 69590 }, { "epoch": 0.7436294673860783, "grad_norm": 5.856683254241943, "learning_rate": 9.867756365081583e-07, "loss": 0.0768, "step": 69600 }, { "epoch": 0.743736310700358, "grad_norm": 0.15642637014389038, "learning_rate": 9.867717977639376e-07, "loss": 0.0562, "step": 69610 }, { "epoch": 0.7438431540146375, "grad_norm": 5.981917381286621, "learning_rate": 9.86767958470115e-07, "loss": 0.0785, "step": 69620 }, { "epoch": 0.7439499973289171, "grad_norm": 3.430574893951416, "learning_rate": 9.867641186266944e-07, "loss": 0.0209, "step": 69630 }, { "epoch": 0.7440568406431968, "grad_norm": 7.188695430755615, "learning_rate": 9.867602782336804e-07, "loss": 0.0242, "step": 69640 }, { "epoch": 0.7441636839574763, "grad_norm": 11.014579772949219, "learning_rate": 9.867564372910774e-07, "loss": 0.062, "step": 69650 }, { "epoch": 0.744270527271756, "grad_norm": 1.359962821006775, "learning_rate": 9.867525957988898e-07, "loss": 0.047, "step": 69660 }, { "epoch": 0.7443773705860356, "grad_norm": 1.6784290075302124, "learning_rate": 9.867487537571216e-07, "loss": 0.0402, "step": 69670 }, { "epoch": 0.7444842139003152, "grad_norm": 7.252579689025879, "learning_rate": 9.867449111657772e-07, "loss": 0.0454, "step": 69680 }, { "epoch": 0.7445910572145948, "grad_norm": 5.035976409912109, "learning_rate": 9.867410680248614e-07, "loss": 0.0551, "step": 69690 }, { "epoch": 0.7446979005288744, "grad_norm": 3.7836787700653076, "learning_rate": 9.867372243343778e-07, "loss": 0.0755, "step": 69700 }, { "epoch": 0.744804743843154, "grad_norm": 20.579471588134766, "learning_rate": 9.867333800943313e-07, "loss": 0.0591, "step": 69710 }, { "epoch": 0.7449115871574337, "grad_norm": 0.1729716956615448, "learning_rate": 9.867295353047261e-07, "loss": 0.0282, "step": 69720 }, { "epoch": 0.7450184304717132, "grad_norm": 2.5932042598724365, "learning_rate": 9.867256899655667e-07, "loss": 0.0439, "step": 69730 }, { "epoch": 0.7451252737859928, "grad_norm": 8.386298179626465, "learning_rate": 9.86721844076857e-07, "loss": 0.0738, "step": 69740 }, { "epoch": 0.7452321171002725, "grad_norm": 8.1503267288208, "learning_rate": 9.867179976386018e-07, "loss": 0.0461, "step": 69750 }, { "epoch": 0.745338960414552, "grad_norm": 4.37579345703125, "learning_rate": 9.867141506508052e-07, "loss": 0.0522, "step": 69760 }, { "epoch": 0.7454458037288316, "grad_norm": 0.8668251037597656, "learning_rate": 9.867103031134715e-07, "loss": 0.0258, "step": 69770 }, { "epoch": 0.7455526470431113, "grad_norm": 1.5529168844223022, "learning_rate": 9.867064550266051e-07, "loss": 0.033, "step": 69780 }, { "epoch": 0.7456594903573909, "grad_norm": 3.793625593185425, "learning_rate": 9.867026063902107e-07, "loss": 0.0285, "step": 69790 }, { "epoch": 0.7457663336716704, "grad_norm": 2.7046055793762207, "learning_rate": 9.86698757204292e-07, "loss": 0.106, "step": 69800 }, { "epoch": 0.7458731769859501, "grad_norm": 0.3492128849029541, "learning_rate": 9.866949074688538e-07, "loss": 0.0332, "step": 69810 }, { "epoch": 0.7459800203002297, "grad_norm": 2.3655996322631836, "learning_rate": 9.866910571839002e-07, "loss": 0.0352, "step": 69820 }, { "epoch": 0.7460868636145094, "grad_norm": 0.22197692096233368, "learning_rate": 9.866872063494358e-07, "loss": 0.0313, "step": 69830 }, { "epoch": 0.746193706928789, "grad_norm": 5.876216888427734, "learning_rate": 9.866833549654647e-07, "loss": 0.0532, "step": 69840 }, { "epoch": 0.7463005502430685, "grad_norm": 7.37278413772583, "learning_rate": 9.866795030319913e-07, "loss": 0.0606, "step": 69850 }, { "epoch": 0.7464073935573482, "grad_norm": 1.9138013124465942, "learning_rate": 9.8667565054902e-07, "loss": 0.0417, "step": 69860 }, { "epoch": 0.7465142368716278, "grad_norm": 0.04463859274983406, "learning_rate": 9.866717975165555e-07, "loss": 0.0459, "step": 69870 }, { "epoch": 0.7466210801859073, "grad_norm": 10.898069381713867, "learning_rate": 9.866679439346014e-07, "loss": 0.1239, "step": 69880 }, { "epoch": 0.746727923500187, "grad_norm": 4.803865909576416, "learning_rate": 9.866640898031627e-07, "loss": 0.0464, "step": 69890 }, { "epoch": 0.7468347668144666, "grad_norm": 1.9762459993362427, "learning_rate": 9.866602351222435e-07, "loss": 0.0455, "step": 69900 }, { "epoch": 0.7469416101287462, "grad_norm": 0.05499671399593353, "learning_rate": 9.86656379891848e-07, "loss": 0.0773, "step": 69910 }, { "epoch": 0.7470484534430258, "grad_norm": 7.491184711456299, "learning_rate": 9.866525241119809e-07, "loss": 0.0858, "step": 69920 }, { "epoch": 0.7471552967573054, "grad_norm": 0.10004129260778427, "learning_rate": 9.866486677826464e-07, "loss": 0.091, "step": 69930 }, { "epoch": 0.747262140071585, "grad_norm": 0.19916488230228424, "learning_rate": 9.866448109038487e-07, "loss": 0.0188, "step": 69940 }, { "epoch": 0.7473689833858647, "grad_norm": 0.20101763308048248, "learning_rate": 9.866409534755923e-07, "loss": 0.0414, "step": 69950 }, { "epoch": 0.7474758267001442, "grad_norm": 0.9177365899085999, "learning_rate": 9.866370954978815e-07, "loss": 0.0531, "step": 69960 }, { "epoch": 0.7475826700144238, "grad_norm": 3.177067518234253, "learning_rate": 9.866332369707208e-07, "loss": 0.0329, "step": 69970 }, { "epoch": 0.7476895133287035, "grad_norm": 3.227600336074829, "learning_rate": 9.866293778941144e-07, "loss": 0.0236, "step": 69980 }, { "epoch": 0.747796356642983, "grad_norm": 2.0798699855804443, "learning_rate": 9.866255182680667e-07, "loss": 0.078, "step": 69990 }, { "epoch": 0.7479031999572626, "grad_norm": 10.095832824707031, "learning_rate": 9.86621658092582e-07, "loss": 0.0791, "step": 70000 }, { "epoch": 0.7480100432715423, "grad_norm": 4.854669570922852, "learning_rate": 9.86617797367665e-07, "loss": 0.0554, "step": 70010 }, { "epoch": 0.7481168865858219, "grad_norm": 3.2785651683807373, "learning_rate": 9.866139360933195e-07, "loss": 0.1121, "step": 70020 }, { "epoch": 0.7482237299001016, "grad_norm": 0.9712145328521729, "learning_rate": 9.8661007426955e-07, "loss": 0.0536, "step": 70030 }, { "epoch": 0.7483305732143811, "grad_norm": 8.611326217651367, "learning_rate": 9.866062118963612e-07, "loss": 0.0191, "step": 70040 }, { "epoch": 0.7484374165286607, "grad_norm": 0.2794719934463501, "learning_rate": 9.866023489737574e-07, "loss": 0.0364, "step": 70050 }, { "epoch": 0.7485442598429404, "grad_norm": 10.739063262939453, "learning_rate": 9.865984855017426e-07, "loss": 0.1421, "step": 70060 }, { "epoch": 0.7486511031572199, "grad_norm": 0.059424515813589096, "learning_rate": 9.865946214803215e-07, "loss": 0.1025, "step": 70070 }, { "epoch": 0.7487579464714995, "grad_norm": 2.377265453338623, "learning_rate": 9.865907569094982e-07, "loss": 0.146, "step": 70080 }, { "epoch": 0.7488647897857792, "grad_norm": 1.0332709550857544, "learning_rate": 9.865868917892774e-07, "loss": 0.0145, "step": 70090 }, { "epoch": 0.7489716331000588, "grad_norm": 1.1587663888931274, "learning_rate": 9.865830261196632e-07, "loss": 0.0136, "step": 70100 }, { "epoch": 0.7490784764143383, "grad_norm": 11.613935470581055, "learning_rate": 9.865791599006602e-07, "loss": 0.0624, "step": 70110 }, { "epoch": 0.749185319728618, "grad_norm": 2.6105289459228516, "learning_rate": 9.865752931322723e-07, "loss": 0.0383, "step": 70120 }, { "epoch": 0.7492921630428976, "grad_norm": 5.3731513023376465, "learning_rate": 9.865714258145043e-07, "loss": 0.0329, "step": 70130 }, { "epoch": 0.7493990063571772, "grad_norm": 0.15018890798091888, "learning_rate": 9.865675579473606e-07, "loss": 0.045, "step": 70140 }, { "epoch": 0.7495058496714568, "grad_norm": 0.08151382207870483, "learning_rate": 9.865636895308452e-07, "loss": 0.0684, "step": 70150 }, { "epoch": 0.7496126929857364, "grad_norm": 4.869112968444824, "learning_rate": 9.865598205649627e-07, "loss": 0.0941, "step": 70160 }, { "epoch": 0.749719536300016, "grad_norm": 7.924300670623779, "learning_rate": 9.865559510497174e-07, "loss": 0.0507, "step": 70170 }, { "epoch": 0.7498263796142957, "grad_norm": 1.1998893022537231, "learning_rate": 9.865520809851137e-07, "loss": 0.1833, "step": 70180 }, { "epoch": 0.7499332229285752, "grad_norm": 0.43710508942604065, "learning_rate": 9.86548210371156e-07, "loss": 0.0428, "step": 70190 }, { "epoch": 0.7500400662428549, "grad_norm": 4.625349998474121, "learning_rate": 9.865443392078488e-07, "loss": 0.0682, "step": 70200 }, { "epoch": 0.7501469095571345, "grad_norm": 0.033990904688835144, "learning_rate": 9.865404674951964e-07, "loss": 0.0746, "step": 70210 }, { "epoch": 0.750253752871414, "grad_norm": 1.693393588066101, "learning_rate": 9.865365952332029e-07, "loss": 0.0675, "step": 70220 }, { "epoch": 0.7503605961856937, "grad_norm": 0.11412429064512253, "learning_rate": 9.865327224218727e-07, "loss": 0.0311, "step": 70230 }, { "epoch": 0.7504674394999733, "grad_norm": 0.42099910974502563, "learning_rate": 9.865288490612106e-07, "loss": 0.0318, "step": 70240 }, { "epoch": 0.7505742828142529, "grad_norm": 9.477389335632324, "learning_rate": 9.865249751512206e-07, "loss": 0.1248, "step": 70250 }, { "epoch": 0.7506811261285325, "grad_norm": 0.29316675662994385, "learning_rate": 9.865211006919072e-07, "loss": 0.0522, "step": 70260 }, { "epoch": 0.7507879694428121, "grad_norm": 5.919749736785889, "learning_rate": 9.865172256832747e-07, "loss": 0.0388, "step": 70270 }, { "epoch": 0.7508948127570917, "grad_norm": 1.3476076126098633, "learning_rate": 9.865133501253276e-07, "loss": 0.0557, "step": 70280 }, { "epoch": 0.7510016560713714, "grad_norm": 0.18235261738300323, "learning_rate": 9.865094740180702e-07, "loss": 0.0722, "step": 70290 }, { "epoch": 0.7511084993856509, "grad_norm": 2.8819491863250732, "learning_rate": 9.865055973615068e-07, "loss": 0.0524, "step": 70300 }, { "epoch": 0.7512153426999305, "grad_norm": 5.561132431030273, "learning_rate": 9.86501720155642e-07, "loss": 0.0316, "step": 70310 }, { "epoch": 0.7513221860142102, "grad_norm": 2.6342222690582275, "learning_rate": 9.8649784240048e-07, "loss": 0.0252, "step": 70320 }, { "epoch": 0.7514290293284898, "grad_norm": 13.392782211303711, "learning_rate": 9.864939640960253e-07, "loss": 0.072, "step": 70330 }, { "epoch": 0.7515358726427693, "grad_norm": 4.665111541748047, "learning_rate": 9.86490085242282e-07, "loss": 0.02, "step": 70340 }, { "epoch": 0.751642715957049, "grad_norm": 8.789945602416992, "learning_rate": 9.86486205839255e-07, "loss": 0.0462, "step": 70350 }, { "epoch": 0.7517495592713286, "grad_norm": 2.5176587104797363, "learning_rate": 9.864823258869478e-07, "loss": 0.0579, "step": 70360 }, { "epoch": 0.7518564025856082, "grad_norm": 1.5556174516677856, "learning_rate": 9.864784453853657e-07, "loss": 0.0767, "step": 70370 }, { "epoch": 0.7519632458998878, "grad_norm": 2.4268808364868164, "learning_rate": 9.864745643345128e-07, "loss": 0.0368, "step": 70380 }, { "epoch": 0.7520700892141674, "grad_norm": 7.783247947692871, "learning_rate": 9.864706827343934e-07, "loss": 0.0438, "step": 70390 }, { "epoch": 0.7521769325284471, "grad_norm": 3.673661470413208, "learning_rate": 9.864668005850118e-07, "loss": 0.0473, "step": 70400 }, { "epoch": 0.7522837758427267, "grad_norm": 6.440492630004883, "learning_rate": 9.864629178863724e-07, "loss": 0.073, "step": 70410 }, { "epoch": 0.7523906191570062, "grad_norm": 5.558745861053467, "learning_rate": 9.864590346384797e-07, "loss": 0.083, "step": 70420 }, { "epoch": 0.7524974624712859, "grad_norm": 3.1526260375976562, "learning_rate": 9.86455150841338e-07, "loss": 0.0312, "step": 70430 }, { "epoch": 0.7526043057855655, "grad_norm": 1.0315574407577515, "learning_rate": 9.864512664949519e-07, "loss": 0.0411, "step": 70440 }, { "epoch": 0.752711149099845, "grad_norm": 8.77629566192627, "learning_rate": 9.864473815993252e-07, "loss": 0.0845, "step": 70450 }, { "epoch": 0.7528179924141247, "grad_norm": 0.20121808350086212, "learning_rate": 9.86443496154463e-07, "loss": 0.0221, "step": 70460 }, { "epoch": 0.7529248357284043, "grad_norm": 13.140968322753906, "learning_rate": 9.864396101603693e-07, "loss": 0.0437, "step": 70470 }, { "epoch": 0.7530316790426839, "grad_norm": 6.797906398773193, "learning_rate": 9.864357236170488e-07, "loss": 0.1402, "step": 70480 }, { "epoch": 0.7531385223569635, "grad_norm": 0.047943443059921265, "learning_rate": 9.864318365245055e-07, "loss": 0.087, "step": 70490 }, { "epoch": 0.7532453656712431, "grad_norm": 0.43282875418663025, "learning_rate": 9.864279488827438e-07, "loss": 0.0576, "step": 70500 }, { "epoch": 0.7533522089855227, "grad_norm": 5.145969390869141, "learning_rate": 9.864240606917682e-07, "loss": 0.0373, "step": 70510 }, { "epoch": 0.7534590522998024, "grad_norm": 1.486365795135498, "learning_rate": 9.864201719515833e-07, "loss": 0.0463, "step": 70520 }, { "epoch": 0.7535658956140819, "grad_norm": 1.434826135635376, "learning_rate": 9.864162826621934e-07, "loss": 0.0604, "step": 70530 }, { "epoch": 0.7536727389283615, "grad_norm": 19.81304359436035, "learning_rate": 9.864123928236026e-07, "loss": 0.0684, "step": 70540 }, { "epoch": 0.7537795822426412, "grad_norm": 0.14801125228405, "learning_rate": 9.864085024358155e-07, "loss": 0.073, "step": 70550 }, { "epoch": 0.7538864255569208, "grad_norm": 21.011728286743164, "learning_rate": 9.864046114988366e-07, "loss": 0.0674, "step": 70560 }, { "epoch": 0.7539932688712004, "grad_norm": 1.0045146942138672, "learning_rate": 9.864007200126702e-07, "loss": 0.0993, "step": 70570 }, { "epoch": 0.75410011218548, "grad_norm": 7.425657272338867, "learning_rate": 9.863968279773206e-07, "loss": 0.0418, "step": 70580 }, { "epoch": 0.7542069554997596, "grad_norm": 0.2609758973121643, "learning_rate": 9.863929353927921e-07, "loss": 0.0353, "step": 70590 }, { "epoch": 0.7543137988140393, "grad_norm": 6.581186771392822, "learning_rate": 9.863890422590896e-07, "loss": 0.0674, "step": 70600 }, { "epoch": 0.7544206421283188, "grad_norm": 2.7413203716278076, "learning_rate": 9.863851485762171e-07, "loss": 0.0493, "step": 70610 }, { "epoch": 0.7545274854425984, "grad_norm": 2.2254135608673096, "learning_rate": 9.863812543441789e-07, "loss": 0.1031, "step": 70620 }, { "epoch": 0.7546343287568781, "grad_norm": 8.50871753692627, "learning_rate": 9.863773595629797e-07, "loss": 0.0547, "step": 70630 }, { "epoch": 0.7547411720711577, "grad_norm": 5.936168193817139, "learning_rate": 9.863734642326237e-07, "loss": 0.0481, "step": 70640 }, { "epoch": 0.7548480153854372, "grad_norm": 4.808219909667969, "learning_rate": 9.863695683531153e-07, "loss": 0.0237, "step": 70650 }, { "epoch": 0.7549548586997169, "grad_norm": 0.6925937533378601, "learning_rate": 9.86365671924459e-07, "loss": 0.0276, "step": 70660 }, { "epoch": 0.7550617020139965, "grad_norm": 6.924749851226807, "learning_rate": 9.86361774946659e-07, "loss": 0.0449, "step": 70670 }, { "epoch": 0.755168545328276, "grad_norm": 7.031162738800049, "learning_rate": 9.8635787741972e-07, "loss": 0.0351, "step": 70680 }, { "epoch": 0.7552753886425557, "grad_norm": 1.9049619436264038, "learning_rate": 9.863539793436464e-07, "loss": 0.0633, "step": 70690 }, { "epoch": 0.7553822319568353, "grad_norm": 6.533553123474121, "learning_rate": 9.86350080718442e-07, "loss": 0.0404, "step": 70700 }, { "epoch": 0.7554890752711149, "grad_norm": 0.015096825547516346, "learning_rate": 9.863461815441123e-07, "loss": 0.0568, "step": 70710 }, { "epoch": 0.7555959185853945, "grad_norm": 1.000516653060913, "learning_rate": 9.863422818206605e-07, "loss": 0.0289, "step": 70720 }, { "epoch": 0.7557027618996741, "grad_norm": 0.22507494688034058, "learning_rate": 9.863383815480917e-07, "loss": 0.1327, "step": 70730 }, { "epoch": 0.7558096052139537, "grad_norm": 0.9556881189346313, "learning_rate": 9.863344807264103e-07, "loss": 0.0401, "step": 70740 }, { "epoch": 0.7559164485282334, "grad_norm": 1.3126951456069946, "learning_rate": 9.863305793556205e-07, "loss": 0.1228, "step": 70750 }, { "epoch": 0.7560232918425129, "grad_norm": 6.9435834884643555, "learning_rate": 9.863266774357268e-07, "loss": 0.0568, "step": 70760 }, { "epoch": 0.7561301351567926, "grad_norm": 1.5521349906921387, "learning_rate": 9.863227749667335e-07, "loss": 0.0766, "step": 70770 }, { "epoch": 0.7562369784710722, "grad_norm": 2.5061726570129395, "learning_rate": 9.86318871948645e-07, "loss": 0.0136, "step": 70780 }, { "epoch": 0.7563438217853518, "grad_norm": 0.1962871253490448, "learning_rate": 9.86314968381466e-07, "loss": 0.2411, "step": 70790 }, { "epoch": 0.7564506650996314, "grad_norm": 1.8111987113952637, "learning_rate": 9.863110642652006e-07, "loss": 0.0315, "step": 70800 }, { "epoch": 0.756557508413911, "grad_norm": 2.743232011795044, "learning_rate": 9.863071595998533e-07, "loss": 0.09, "step": 70810 }, { "epoch": 0.7566643517281906, "grad_norm": 3.365100622177124, "learning_rate": 9.863032543854285e-07, "loss": 0.0189, "step": 70820 }, { "epoch": 0.7567711950424703, "grad_norm": 1.7854059934616089, "learning_rate": 9.862993486219307e-07, "loss": 0.0361, "step": 70830 }, { "epoch": 0.7568780383567498, "grad_norm": 7.42725133895874, "learning_rate": 9.862954423093641e-07, "loss": 0.0244, "step": 70840 }, { "epoch": 0.7569848816710294, "grad_norm": 13.732216835021973, "learning_rate": 9.862915354477332e-07, "loss": 0.1274, "step": 70850 }, { "epoch": 0.7570917249853091, "grad_norm": 3.1364970207214355, "learning_rate": 9.862876280370427e-07, "loss": 0.0315, "step": 70860 }, { "epoch": 0.7571985682995886, "grad_norm": 9.702777862548828, "learning_rate": 9.862837200772966e-07, "loss": 0.0888, "step": 70870 }, { "epoch": 0.7573054116138682, "grad_norm": 3.9529435634613037, "learning_rate": 9.862798115684994e-07, "loss": 0.0412, "step": 70880 }, { "epoch": 0.7574122549281479, "grad_norm": 0.6770209074020386, "learning_rate": 9.862759025106556e-07, "loss": 0.0862, "step": 70890 }, { "epoch": 0.7575190982424275, "grad_norm": 5.4306488037109375, "learning_rate": 9.8627199290377e-07, "loss": 0.044, "step": 70900 }, { "epoch": 0.757625941556707, "grad_norm": 5.688507080078125, "learning_rate": 9.862680827478463e-07, "loss": 0.0449, "step": 70910 }, { "epoch": 0.7577327848709867, "grad_norm": 0.311870813369751, "learning_rate": 9.862641720428893e-07, "loss": 0.0389, "step": 70920 }, { "epoch": 0.7578396281852663, "grad_norm": 13.174574851989746, "learning_rate": 9.862602607889032e-07, "loss": 0.0703, "step": 70930 }, { "epoch": 0.757946471499546, "grad_norm": 1.15528404712677, "learning_rate": 9.862563489858926e-07, "loss": 0.0153, "step": 70940 }, { "epoch": 0.7580533148138255, "grad_norm": 0.2906292974948883, "learning_rate": 9.86252436633862e-07, "loss": 0.0386, "step": 70950 }, { "epoch": 0.7581601581281051, "grad_norm": 0.07684623450040817, "learning_rate": 9.862485237328158e-07, "loss": 0.0718, "step": 70960 }, { "epoch": 0.7582670014423848, "grad_norm": 0.6371121406555176, "learning_rate": 9.86244610282758e-07, "loss": 0.0461, "step": 70970 }, { "epoch": 0.7583738447566644, "grad_norm": 8.462949752807617, "learning_rate": 9.862406962836936e-07, "loss": 0.0707, "step": 70980 }, { "epoch": 0.7584806880709439, "grad_norm": 0.17239587008953094, "learning_rate": 9.862367817356266e-07, "loss": 0.0422, "step": 70990 }, { "epoch": 0.7585875313852236, "grad_norm": 0.44647446274757385, "learning_rate": 9.862328666385618e-07, "loss": 0.0382, "step": 71000 }, { "epoch": 0.7586943746995032, "grad_norm": 6.408627986907959, "learning_rate": 9.862289509925032e-07, "loss": 0.0862, "step": 71010 }, { "epoch": 0.7588012180137828, "grad_norm": 2.4842689037323, "learning_rate": 9.862250347974554e-07, "loss": 0.0326, "step": 71020 }, { "epoch": 0.7589080613280624, "grad_norm": 1.7449227571487427, "learning_rate": 9.862211180534227e-07, "loss": 0.0625, "step": 71030 }, { "epoch": 0.759014904642342, "grad_norm": 0.046046607196331024, "learning_rate": 9.8621720076041e-07, "loss": 0.0611, "step": 71040 }, { "epoch": 0.7591217479566216, "grad_norm": 5.456098556518555, "learning_rate": 9.862132829184214e-07, "loss": 0.0441, "step": 71050 }, { "epoch": 0.7592285912709013, "grad_norm": 0.05071752518415451, "learning_rate": 9.86209364527461e-07, "loss": 0.085, "step": 71060 }, { "epoch": 0.7593354345851808, "grad_norm": 0.2627234160900116, "learning_rate": 9.862054455875337e-07, "loss": 0.0322, "step": 71070 }, { "epoch": 0.7594422778994604, "grad_norm": 0.6206657290458679, "learning_rate": 9.862015260986438e-07, "loss": 0.021, "step": 71080 }, { "epoch": 0.7595491212137401, "grad_norm": 5.6326375007629395, "learning_rate": 9.861976060607956e-07, "loss": 0.0392, "step": 71090 }, { "epoch": 0.7596559645280196, "grad_norm": 0.13584208488464355, "learning_rate": 9.861936854739935e-07, "loss": 0.022, "step": 71100 }, { "epoch": 0.7597628078422992, "grad_norm": 16.326038360595703, "learning_rate": 9.861897643382422e-07, "loss": 0.0749, "step": 71110 }, { "epoch": 0.7598696511565789, "grad_norm": 1.1735752820968628, "learning_rate": 9.86185842653546e-07, "loss": 0.0604, "step": 71120 }, { "epoch": 0.7599764944708585, "grad_norm": 7.667687892913818, "learning_rate": 9.861819204199092e-07, "loss": 0.0546, "step": 71130 }, { "epoch": 0.7600833377851381, "grad_norm": 5.60287618637085, "learning_rate": 9.861779976373363e-07, "loss": 0.0383, "step": 71140 }, { "epoch": 0.7601901810994177, "grad_norm": 0.2091558277606964, "learning_rate": 9.861740743058318e-07, "loss": 0.0497, "step": 71150 }, { "epoch": 0.7602970244136973, "grad_norm": 7.4494805335998535, "learning_rate": 9.861701504254002e-07, "loss": 0.0938, "step": 71160 }, { "epoch": 0.760403867727977, "grad_norm": 10.221284866333008, "learning_rate": 9.861662259960455e-07, "loss": 0.1171, "step": 71170 }, { "epoch": 0.7605107110422565, "grad_norm": 2.2913007736206055, "learning_rate": 9.861623010177726e-07, "loss": 0.0587, "step": 71180 }, { "epoch": 0.7606175543565361, "grad_norm": 0.6322542428970337, "learning_rate": 9.861583754905859e-07, "loss": 0.0575, "step": 71190 }, { "epoch": 0.7607243976708158, "grad_norm": 7.1487908363342285, "learning_rate": 9.861544494144892e-07, "loss": 0.0434, "step": 71200 }, { "epoch": 0.7608312409850954, "grad_norm": 0.11260197311639786, "learning_rate": 9.86150522789488e-07, "loss": 0.0729, "step": 71210 }, { "epoch": 0.7609380842993749, "grad_norm": 1.963066577911377, "learning_rate": 9.86146595615586e-07, "loss": 0.0447, "step": 71220 }, { "epoch": 0.7610449276136546, "grad_norm": 4.167762756347656, "learning_rate": 9.861426678927878e-07, "loss": 0.029, "step": 71230 }, { "epoch": 0.7611517709279342, "grad_norm": 5.533859729766846, "learning_rate": 9.86138739621098e-07, "loss": 0.0288, "step": 71240 }, { "epoch": 0.7612586142422137, "grad_norm": 10.674192428588867, "learning_rate": 9.861348108005205e-07, "loss": 0.0702, "step": 71250 }, { "epoch": 0.7613654575564934, "grad_norm": 0.40765637159347534, "learning_rate": 9.861308814310602e-07, "loss": 0.0723, "step": 71260 }, { "epoch": 0.761472300870773, "grad_norm": 15.056966781616211, "learning_rate": 9.861269515127215e-07, "loss": 0.0623, "step": 71270 }, { "epoch": 0.7615791441850526, "grad_norm": 5.2052812576293945, "learning_rate": 9.86123021045509e-07, "loss": 0.0429, "step": 71280 }, { "epoch": 0.7616859874993323, "grad_norm": 3.8677823543548584, "learning_rate": 9.861190900294265e-07, "loss": 0.0162, "step": 71290 }, { "epoch": 0.7617928308136118, "grad_norm": 4.958491802215576, "learning_rate": 9.861151584644793e-07, "loss": 0.0481, "step": 71300 }, { "epoch": 0.7618996741278915, "grad_norm": 6.727055072784424, "learning_rate": 9.86111226350671e-07, "loss": 0.0606, "step": 71310 }, { "epoch": 0.7620065174421711, "grad_norm": 6.955885887145996, "learning_rate": 9.861072936880068e-07, "loss": 0.0601, "step": 71320 }, { "epoch": 0.7621133607564506, "grad_norm": 8.049912452697754, "learning_rate": 9.861033604764906e-07, "loss": 0.0685, "step": 71330 }, { "epoch": 0.7622202040707303, "grad_norm": 4.773233890533447, "learning_rate": 9.86099426716127e-07, "loss": 0.0983, "step": 71340 }, { "epoch": 0.7623270473850099, "grad_norm": 0.9763320088386536, "learning_rate": 9.860954924069205e-07, "loss": 0.0556, "step": 71350 }, { "epoch": 0.7624338906992895, "grad_norm": 0.121109239757061, "learning_rate": 9.860915575488756e-07, "loss": 0.0234, "step": 71360 }, { "epoch": 0.7625407340135691, "grad_norm": 8.274250030517578, "learning_rate": 9.860876221419966e-07, "loss": 0.1437, "step": 71370 }, { "epoch": 0.7626475773278487, "grad_norm": 4.187465190887451, "learning_rate": 9.860836861862878e-07, "loss": 0.0077, "step": 71380 }, { "epoch": 0.7627544206421283, "grad_norm": 5.7929182052612305, "learning_rate": 9.860797496817538e-07, "loss": 0.0372, "step": 71390 }, { "epoch": 0.762861263956408, "grad_norm": 21.232746124267578, "learning_rate": 9.860758126283992e-07, "loss": 0.153, "step": 71400 }, { "epoch": 0.7629681072706875, "grad_norm": 0.14346541464328766, "learning_rate": 9.860718750262285e-07, "loss": 0.064, "step": 71410 }, { "epoch": 0.7630749505849671, "grad_norm": 6.04549503326416, "learning_rate": 9.860679368752458e-07, "loss": 0.0587, "step": 71420 }, { "epoch": 0.7631817938992468, "grad_norm": 7.805062294006348, "learning_rate": 9.860639981754557e-07, "loss": 0.0998, "step": 71430 }, { "epoch": 0.7632886372135264, "grad_norm": 5.064630031585693, "learning_rate": 9.860600589268628e-07, "loss": 0.0505, "step": 71440 }, { "epoch": 0.7633954805278059, "grad_norm": 2.742582321166992, "learning_rate": 9.860561191294712e-07, "loss": 0.0612, "step": 71450 }, { "epoch": 0.7635023238420856, "grad_norm": 0.5640512704849243, "learning_rate": 9.860521787832856e-07, "loss": 0.0668, "step": 71460 }, { "epoch": 0.7636091671563652, "grad_norm": 0.1515711396932602, "learning_rate": 9.860482378883107e-07, "loss": 0.0234, "step": 71470 }, { "epoch": 0.7637160104706447, "grad_norm": 0.07065300643444061, "learning_rate": 9.860442964445503e-07, "loss": 0.042, "step": 71480 }, { "epoch": 0.7638228537849244, "grad_norm": 3.4307806491851807, "learning_rate": 9.860403544520092e-07, "loss": 0.1457, "step": 71490 }, { "epoch": 0.763929697099204, "grad_norm": 1.9225603342056274, "learning_rate": 9.860364119106921e-07, "loss": 0.0314, "step": 71500 }, { "epoch": 0.7640365404134837, "grad_norm": 6.255831718444824, "learning_rate": 9.86032468820603e-07, "loss": 0.0272, "step": 71510 }, { "epoch": 0.7641433837277632, "grad_norm": 0.17247147858142853, "learning_rate": 9.860285251817468e-07, "loss": 0.0608, "step": 71520 }, { "epoch": 0.7642502270420428, "grad_norm": 5.516976356506348, "learning_rate": 9.860245809941274e-07, "loss": 0.0913, "step": 71530 }, { "epoch": 0.7643570703563225, "grad_norm": 1.84359610080719, "learning_rate": 9.860206362577499e-07, "loss": 0.0405, "step": 71540 }, { "epoch": 0.7644639136706021, "grad_norm": 6.058727741241455, "learning_rate": 9.860166909726182e-07, "loss": 0.0379, "step": 71550 }, { "epoch": 0.7645707569848816, "grad_norm": 1.3297944068908691, "learning_rate": 9.86012745138737e-07, "loss": 0.0903, "step": 71560 }, { "epoch": 0.7646776002991613, "grad_norm": 3.9068727493286133, "learning_rate": 9.860087987561108e-07, "loss": 0.0663, "step": 71570 }, { "epoch": 0.7647844436134409, "grad_norm": 4.75870943069458, "learning_rate": 9.86004851824744e-07, "loss": 0.0705, "step": 71580 }, { "epoch": 0.7648912869277205, "grad_norm": 11.126572608947754, "learning_rate": 9.860009043446408e-07, "loss": 0.0472, "step": 71590 }, { "epoch": 0.7649981302420001, "grad_norm": 7.580310344696045, "learning_rate": 9.859969563158062e-07, "loss": 0.0493, "step": 71600 }, { "epoch": 0.7651049735562797, "grad_norm": 4.80064582824707, "learning_rate": 9.859930077382442e-07, "loss": 0.0407, "step": 71610 }, { "epoch": 0.7652118168705593, "grad_norm": 0.23376323282718658, "learning_rate": 9.859890586119596e-07, "loss": 0.0432, "step": 71620 }, { "epoch": 0.765318660184839, "grad_norm": 3.159846544265747, "learning_rate": 9.859851089369564e-07, "loss": 0.0263, "step": 71630 }, { "epoch": 0.7654255034991185, "grad_norm": 10.873710632324219, "learning_rate": 9.859811587132394e-07, "loss": 0.0378, "step": 71640 }, { "epoch": 0.7655323468133981, "grad_norm": 8.881135940551758, "learning_rate": 9.85977207940813e-07, "loss": 0.0637, "step": 71650 }, { "epoch": 0.7656391901276778, "grad_norm": 8.660689353942871, "learning_rate": 9.859732566196818e-07, "loss": 0.0421, "step": 71660 }, { "epoch": 0.7657460334419574, "grad_norm": 0.22577032446861267, "learning_rate": 9.8596930474985e-07, "loss": 0.0874, "step": 71670 }, { "epoch": 0.765852876756237, "grad_norm": 8.065903663635254, "learning_rate": 9.859653523313223e-07, "loss": 0.043, "step": 71680 }, { "epoch": 0.7659597200705166, "grad_norm": 5.156991004943848, "learning_rate": 9.859613993641028e-07, "loss": 0.123, "step": 71690 }, { "epoch": 0.7660665633847962, "grad_norm": 1.8893015384674072, "learning_rate": 9.859574458481964e-07, "loss": 0.0461, "step": 71700 }, { "epoch": 0.7661734066990759, "grad_norm": 14.109644889831543, "learning_rate": 9.859534917836072e-07, "loss": 0.0836, "step": 71710 }, { "epoch": 0.7662802500133554, "grad_norm": 4.305779933929443, "learning_rate": 9.859495371703402e-07, "loss": 0.0392, "step": 71720 }, { "epoch": 0.766387093327635, "grad_norm": 8.197087287902832, "learning_rate": 9.85945582008399e-07, "loss": 0.0674, "step": 71730 }, { "epoch": 0.7664939366419147, "grad_norm": 4.717278957366943, "learning_rate": 9.859416262977888e-07, "loss": 0.0541, "step": 71740 }, { "epoch": 0.7666007799561942, "grad_norm": 3.3818655014038086, "learning_rate": 9.85937670038514e-07, "loss": 0.0439, "step": 71750 }, { "epoch": 0.7667076232704738, "grad_norm": 4.717384338378906, "learning_rate": 9.859337132305786e-07, "loss": 0.0756, "step": 71760 }, { "epoch": 0.7668144665847535, "grad_norm": 5.309249401092529, "learning_rate": 9.859297558739874e-07, "loss": 0.0437, "step": 71770 }, { "epoch": 0.7669213098990331, "grad_norm": 4.864528656005859, "learning_rate": 9.85925797968745e-07, "loss": 0.0132, "step": 71780 }, { "epoch": 0.7670281532133126, "grad_norm": 0.05770118162035942, "learning_rate": 9.859218395148557e-07, "loss": 0.0501, "step": 71790 }, { "epoch": 0.7671349965275923, "grad_norm": 1.941985011100769, "learning_rate": 9.859178805123239e-07, "loss": 0.038, "step": 71800 }, { "epoch": 0.7672418398418719, "grad_norm": 3.7692015171051025, "learning_rate": 9.85913920961154e-07, "loss": 0.0466, "step": 71810 }, { "epoch": 0.7673486831561515, "grad_norm": 5.552738666534424, "learning_rate": 9.859099608613509e-07, "loss": 0.0574, "step": 71820 }, { "epoch": 0.7674555264704311, "grad_norm": 3.9182302951812744, "learning_rate": 9.859060002129186e-07, "loss": 0.0279, "step": 71830 }, { "epoch": 0.7675623697847107, "grad_norm": 3.8464317321777344, "learning_rate": 9.859020390158616e-07, "loss": 0.0276, "step": 71840 }, { "epoch": 0.7676692130989903, "grad_norm": 0.14857079088687897, "learning_rate": 9.858980772701847e-07, "loss": 0.039, "step": 71850 }, { "epoch": 0.76777605641327, "grad_norm": 0.46216684579849243, "learning_rate": 9.858941149758923e-07, "loss": 0.0237, "step": 71860 }, { "epoch": 0.7678828997275495, "grad_norm": 13.725249290466309, "learning_rate": 9.858901521329886e-07, "loss": 0.1079, "step": 71870 }, { "epoch": 0.7679897430418292, "grad_norm": 2.2902448177337646, "learning_rate": 9.858861887414783e-07, "loss": 0.0181, "step": 71880 }, { "epoch": 0.7680965863561088, "grad_norm": 0.07255423814058304, "learning_rate": 9.858822248013659e-07, "loss": 0.0972, "step": 71890 }, { "epoch": 0.7682034296703883, "grad_norm": 5.0281805992126465, "learning_rate": 9.858782603126556e-07, "loss": 0.0165, "step": 71900 }, { "epoch": 0.768310272984668, "grad_norm": 3.096144437789917, "learning_rate": 9.858742952753522e-07, "loss": 0.0421, "step": 71910 }, { "epoch": 0.7684171162989476, "grad_norm": 2.678105115890503, "learning_rate": 9.858703296894603e-07, "loss": 0.0813, "step": 71920 }, { "epoch": 0.7685239596132272, "grad_norm": 6.165895938873291, "learning_rate": 9.858663635549838e-07, "loss": 0.0525, "step": 71930 }, { "epoch": 0.7686308029275069, "grad_norm": 6.030255317687988, "learning_rate": 9.858623968719276e-07, "loss": 0.0406, "step": 71940 }, { "epoch": 0.7687376462417864, "grad_norm": 5.6257710456848145, "learning_rate": 9.85858429640296e-07, "loss": 0.0565, "step": 71950 }, { "epoch": 0.768844489556066, "grad_norm": 1.2856122255325317, "learning_rate": 9.858544618600936e-07, "loss": 0.157, "step": 71960 }, { "epoch": 0.7689513328703457, "grad_norm": 2.221614122390747, "learning_rate": 9.85850493531325e-07, "loss": 0.1233, "step": 71970 }, { "epoch": 0.7690581761846252, "grad_norm": 0.9387118816375732, "learning_rate": 9.858465246539943e-07, "loss": 0.0829, "step": 71980 }, { "epoch": 0.7691650194989048, "grad_norm": 10.094903945922852, "learning_rate": 9.858425552281062e-07, "loss": 0.0839, "step": 71990 }, { "epoch": 0.7692718628131845, "grad_norm": 0.8668379187583923, "learning_rate": 9.858385852536655e-07, "loss": 0.0465, "step": 72000 }, { "epoch": 0.7693787061274641, "grad_norm": 4.584769248962402, "learning_rate": 9.85834614730676e-07, "loss": 0.038, "step": 72010 }, { "epoch": 0.7694855494417436, "grad_norm": 5.7818779945373535, "learning_rate": 9.858306436591427e-07, "loss": 0.0577, "step": 72020 }, { "epoch": 0.7695923927560233, "grad_norm": 13.930357933044434, "learning_rate": 9.8582667203907e-07, "loss": 0.0914, "step": 72030 }, { "epoch": 0.7696992360703029, "grad_norm": 0.07786744087934494, "learning_rate": 9.858226998704621e-07, "loss": 0.0442, "step": 72040 }, { "epoch": 0.7698060793845826, "grad_norm": 4.288685321807861, "learning_rate": 9.85818727153324e-07, "loss": 0.0812, "step": 72050 }, { "epoch": 0.7699129226988621, "grad_norm": 5.1056013107299805, "learning_rate": 9.858147538876598e-07, "loss": 0.0537, "step": 72060 }, { "epoch": 0.7700197660131417, "grad_norm": 14.247671127319336, "learning_rate": 9.85810780073474e-07, "loss": 0.0439, "step": 72070 }, { "epoch": 0.7701266093274214, "grad_norm": 4.649367809295654, "learning_rate": 9.858068057107711e-07, "loss": 0.0275, "step": 72080 }, { "epoch": 0.770233452641701, "grad_norm": 6.067554473876953, "learning_rate": 9.858028307995557e-07, "loss": 0.0392, "step": 72090 }, { "epoch": 0.7703402959559805, "grad_norm": 2.117982864379883, "learning_rate": 9.857988553398325e-07, "loss": 0.0637, "step": 72100 }, { "epoch": 0.7704471392702602, "grad_norm": 0.05750010535120964, "learning_rate": 9.857948793316055e-07, "loss": 0.0396, "step": 72110 }, { "epoch": 0.7705539825845398, "grad_norm": 0.8938471078872681, "learning_rate": 9.857909027748795e-07, "loss": 0.0376, "step": 72120 }, { "epoch": 0.7706608258988193, "grad_norm": 2.506213665008545, "learning_rate": 9.857869256696588e-07, "loss": 0.0441, "step": 72130 }, { "epoch": 0.770767669213099, "grad_norm": 4.9718708992004395, "learning_rate": 9.85782948015948e-07, "loss": 0.0554, "step": 72140 }, { "epoch": 0.7708745125273786, "grad_norm": 0.2627197802066803, "learning_rate": 9.857789698137515e-07, "loss": 0.0357, "step": 72150 }, { "epoch": 0.7709813558416582, "grad_norm": 10.068482398986816, "learning_rate": 9.85774991063074e-07, "loss": 0.0536, "step": 72160 }, { "epoch": 0.7710881991559378, "grad_norm": 0.7456693053245544, "learning_rate": 9.857710117639199e-07, "loss": 0.0553, "step": 72170 }, { "epoch": 0.7711950424702174, "grad_norm": 4.675839900970459, "learning_rate": 9.857670319162939e-07, "loss": 0.0594, "step": 72180 }, { "epoch": 0.771301885784497, "grad_norm": 8.514832496643066, "learning_rate": 9.857630515201998e-07, "loss": 0.0521, "step": 72190 }, { "epoch": 0.7714087290987767, "grad_norm": 0.8483130931854248, "learning_rate": 9.857590705756429e-07, "loss": 0.0334, "step": 72200 }, { "epoch": 0.7715155724130562, "grad_norm": 0.0889667421579361, "learning_rate": 9.857550890826272e-07, "loss": 0.019, "step": 72210 }, { "epoch": 0.7716224157273358, "grad_norm": 0.13071583211421967, "learning_rate": 9.857511070411573e-07, "loss": 0.0525, "step": 72220 }, { "epoch": 0.7717292590416155, "grad_norm": 2.7815239429473877, "learning_rate": 9.85747124451238e-07, "loss": 0.1253, "step": 72230 }, { "epoch": 0.7718361023558951, "grad_norm": 7.457352638244629, "learning_rate": 9.857431413128733e-07, "loss": 0.1077, "step": 72240 }, { "epoch": 0.7719429456701747, "grad_norm": 0.17095105350017548, "learning_rate": 9.857391576260677e-07, "loss": 0.0324, "step": 72250 }, { "epoch": 0.7720497889844543, "grad_norm": 5.650644779205322, "learning_rate": 9.857351733908262e-07, "loss": 0.0503, "step": 72260 }, { "epoch": 0.7721566322987339, "grad_norm": 0.4819979667663574, "learning_rate": 9.85731188607153e-07, "loss": 0.1226, "step": 72270 }, { "epoch": 0.7722634756130136, "grad_norm": 5.773536682128906, "learning_rate": 9.857272032750528e-07, "loss": 0.0383, "step": 72280 }, { "epoch": 0.7723703189272931, "grad_norm": 11.370001792907715, "learning_rate": 9.857232173945297e-07, "loss": 0.0449, "step": 72290 }, { "epoch": 0.7724771622415727, "grad_norm": 16.61819839477539, "learning_rate": 9.857192309655884e-07, "loss": 0.0978, "step": 72300 }, { "epoch": 0.7725840055558524, "grad_norm": 5.020782470703125, "learning_rate": 9.857152439882336e-07, "loss": 0.0248, "step": 72310 }, { "epoch": 0.772690848870132, "grad_norm": 2.7217071056365967, "learning_rate": 9.857112564624695e-07, "loss": 0.03, "step": 72320 }, { "epoch": 0.7727976921844115, "grad_norm": 1.7465307712554932, "learning_rate": 9.85707268388301e-07, "loss": 0.0454, "step": 72330 }, { "epoch": 0.7729045354986912, "grad_norm": 1.8072553873062134, "learning_rate": 9.85703279765732e-07, "loss": 0.1223, "step": 72340 }, { "epoch": 0.7730113788129708, "grad_norm": 3.4063992500305176, "learning_rate": 9.856992905947674e-07, "loss": 0.0374, "step": 72350 }, { "epoch": 0.7731182221272503, "grad_norm": 0.5358248353004456, "learning_rate": 9.856953008754118e-07, "loss": 0.0304, "step": 72360 }, { "epoch": 0.77322506544153, "grad_norm": 4.662021160125732, "learning_rate": 9.856913106076695e-07, "loss": 0.0608, "step": 72370 }, { "epoch": 0.7733319087558096, "grad_norm": 9.527785301208496, "learning_rate": 9.856873197915449e-07, "loss": 0.0408, "step": 72380 }, { "epoch": 0.7734387520700892, "grad_norm": 0.3771785497665405, "learning_rate": 9.85683328427043e-07, "loss": 0.0313, "step": 72390 }, { "epoch": 0.7735455953843688, "grad_norm": 3.0896518230438232, "learning_rate": 9.856793365141676e-07, "loss": 0.0278, "step": 72400 }, { "epoch": 0.7736524386986484, "grad_norm": 1.9180302619934082, "learning_rate": 9.856753440529238e-07, "loss": 0.0674, "step": 72410 }, { "epoch": 0.7737592820129281, "grad_norm": 4.703939914703369, "learning_rate": 9.856713510433157e-07, "loss": 0.0762, "step": 72420 }, { "epoch": 0.7738661253272077, "grad_norm": 3.004176378250122, "learning_rate": 9.85667357485348e-07, "loss": 0.0909, "step": 72430 }, { "epoch": 0.7739729686414872, "grad_norm": 0.16660676896572113, "learning_rate": 9.856633633790253e-07, "loss": 0.0534, "step": 72440 }, { "epoch": 0.7740798119557669, "grad_norm": 1.0840834379196167, "learning_rate": 9.85659368724352e-07, "loss": 0.0152, "step": 72450 }, { "epoch": 0.7741866552700465, "grad_norm": 0.08762931823730469, "learning_rate": 9.856553735213326e-07, "loss": 0.0549, "step": 72460 }, { "epoch": 0.7742934985843261, "grad_norm": 0.08153042942285538, "learning_rate": 9.856513777699716e-07, "loss": 0.0329, "step": 72470 }, { "epoch": 0.7744003418986057, "grad_norm": 9.5573091506958, "learning_rate": 9.856473814702735e-07, "loss": 0.028, "step": 72480 }, { "epoch": 0.7745071852128853, "grad_norm": 3.032783269882202, "learning_rate": 9.85643384622243e-07, "loss": 0.0639, "step": 72490 }, { "epoch": 0.7746140285271649, "grad_norm": 2.034266471862793, "learning_rate": 9.856393872258844e-07, "loss": 0.0812, "step": 72500 }, { "epoch": 0.7747208718414446, "grad_norm": 1.5162233114242554, "learning_rate": 9.856353892812023e-07, "loss": 0.0479, "step": 72510 }, { "epoch": 0.7748277151557241, "grad_norm": 0.732028067111969, "learning_rate": 9.856313907882013e-07, "loss": 0.0241, "step": 72520 }, { "epoch": 0.7749345584700037, "grad_norm": 4.424887657165527, "learning_rate": 9.856273917468856e-07, "loss": 0.0306, "step": 72530 }, { "epoch": 0.7750414017842834, "grad_norm": 8.657243728637695, "learning_rate": 9.8562339215726e-07, "loss": 0.0539, "step": 72540 }, { "epoch": 0.775148245098563, "grad_norm": 9.671794891357422, "learning_rate": 9.85619392019329e-07, "loss": 0.0624, "step": 72550 }, { "epoch": 0.7752550884128425, "grad_norm": 0.0710410475730896, "learning_rate": 9.85615391333097e-07, "loss": 0.1117, "step": 72560 }, { "epoch": 0.7753619317271222, "grad_norm": 0.06333193182945251, "learning_rate": 9.856113900985685e-07, "loss": 0.0984, "step": 72570 }, { "epoch": 0.7754687750414018, "grad_norm": 0.2595025300979614, "learning_rate": 9.85607388315748e-07, "loss": 0.0179, "step": 72580 }, { "epoch": 0.7755756183556813, "grad_norm": 3.5958194732666016, "learning_rate": 9.856033859846405e-07, "loss": 0.0602, "step": 72590 }, { "epoch": 0.775682461669961, "grad_norm": 1.1635676622390747, "learning_rate": 9.8559938310525e-07, "loss": 0.0403, "step": 72600 }, { "epoch": 0.7757893049842406, "grad_norm": 0.030884170904755592, "learning_rate": 9.855953796775808e-07, "loss": 0.0514, "step": 72610 }, { "epoch": 0.7758961482985203, "grad_norm": 2.871244192123413, "learning_rate": 9.85591375701638e-07, "loss": 0.0136, "step": 72620 }, { "epoch": 0.7760029916127998, "grad_norm": 4.0767669677734375, "learning_rate": 9.855873711774258e-07, "loss": 0.1302, "step": 72630 }, { "epoch": 0.7761098349270794, "grad_norm": 8.363579750061035, "learning_rate": 9.85583366104949e-07, "loss": 0.1104, "step": 72640 }, { "epoch": 0.7762166782413591, "grad_norm": 0.5219932198524475, "learning_rate": 9.855793604842117e-07, "loss": 0.101, "step": 72650 }, { "epoch": 0.7763235215556387, "grad_norm": 3.9242472648620605, "learning_rate": 9.85575354315219e-07, "loss": 0.0239, "step": 72660 }, { "epoch": 0.7764303648699182, "grad_norm": 5.905201435089111, "learning_rate": 9.855713475979748e-07, "loss": 0.0352, "step": 72670 }, { "epoch": 0.7765372081841979, "grad_norm": 5.279913425445557, "learning_rate": 9.85567340332484e-07, "loss": 0.0334, "step": 72680 }, { "epoch": 0.7766440514984775, "grad_norm": 3.96923565864563, "learning_rate": 9.855633325187508e-07, "loss": 0.0469, "step": 72690 }, { "epoch": 0.776750894812757, "grad_norm": 3.730478286743164, "learning_rate": 9.855593241567803e-07, "loss": 0.0688, "step": 72700 }, { "epoch": 0.7768577381270367, "grad_norm": 0.09264136850833893, "learning_rate": 9.855553152465765e-07, "loss": 0.0783, "step": 72710 }, { "epoch": 0.7769645814413163, "grad_norm": 2.880779266357422, "learning_rate": 9.85551305788144e-07, "loss": 0.0256, "step": 72720 }, { "epoch": 0.7770714247555959, "grad_norm": 1.0804507732391357, "learning_rate": 9.855472957814877e-07, "loss": 0.0184, "step": 72730 }, { "epoch": 0.7771782680698756, "grad_norm": 3.8397226333618164, "learning_rate": 9.855432852266117e-07, "loss": 0.0729, "step": 72740 }, { "epoch": 0.7772851113841551, "grad_norm": 3.9726808071136475, "learning_rate": 9.855392741235206e-07, "loss": 0.0326, "step": 72750 }, { "epoch": 0.7773919546984347, "grad_norm": 11.456717491149902, "learning_rate": 9.855352624722191e-07, "loss": 0.0613, "step": 72760 }, { "epoch": 0.7774987980127144, "grad_norm": 0.8472369313240051, "learning_rate": 9.855312502727116e-07, "loss": 0.0324, "step": 72770 }, { "epoch": 0.777605641326994, "grad_norm": 3.4122681617736816, "learning_rate": 9.855272375250028e-07, "loss": 0.0297, "step": 72780 }, { "epoch": 0.7777124846412736, "grad_norm": 2.00390887260437, "learning_rate": 9.855232242290969e-07, "loss": 0.0536, "step": 72790 }, { "epoch": 0.7778193279555532, "grad_norm": 5.708277225494385, "learning_rate": 9.855192103849989e-07, "loss": 0.0509, "step": 72800 }, { "epoch": 0.7779261712698328, "grad_norm": 14.135817527770996, "learning_rate": 9.855151959927128e-07, "loss": 0.0848, "step": 72810 }, { "epoch": 0.7780330145841124, "grad_norm": 6.535804271697998, "learning_rate": 9.855111810522437e-07, "loss": 0.0513, "step": 72820 }, { "epoch": 0.778139857898392, "grad_norm": 0.008827196434140205, "learning_rate": 9.855071655635956e-07, "loss": 0.0347, "step": 72830 }, { "epoch": 0.7782467012126716, "grad_norm": 4.805764675140381, "learning_rate": 9.855031495267734e-07, "loss": 0.0685, "step": 72840 }, { "epoch": 0.7783535445269513, "grad_norm": 2.41328501701355, "learning_rate": 9.854991329417814e-07, "loss": 0.0323, "step": 72850 }, { "epoch": 0.7784603878412308, "grad_norm": 0.2110910415649414, "learning_rate": 9.854951158086241e-07, "loss": 0.1232, "step": 72860 }, { "epoch": 0.7785672311555104, "grad_norm": 1.201629638671875, "learning_rate": 9.854910981273065e-07, "loss": 0.0362, "step": 72870 }, { "epoch": 0.7786740744697901, "grad_norm": 4.996440410614014, "learning_rate": 9.854870798978324e-07, "loss": 0.0915, "step": 72880 }, { "epoch": 0.7787809177840697, "grad_norm": 0.38321009278297424, "learning_rate": 9.85483061120207e-07, "loss": 0.1282, "step": 72890 }, { "epoch": 0.7788877610983492, "grad_norm": 5.666555404663086, "learning_rate": 9.854790417944348e-07, "loss": 0.0728, "step": 72900 }, { "epoch": 0.7789946044126289, "grad_norm": 21.023683547973633, "learning_rate": 9.854750219205199e-07, "loss": 0.0608, "step": 72910 }, { "epoch": 0.7791014477269085, "grad_norm": 2.124443769454956, "learning_rate": 9.854710014984671e-07, "loss": 0.0864, "step": 72920 }, { "epoch": 0.779208291041188, "grad_norm": 9.858437538146973, "learning_rate": 9.854669805282807e-07, "loss": 0.0425, "step": 72930 }, { "epoch": 0.7793151343554677, "grad_norm": 2.681037664413452, "learning_rate": 9.854629590099657e-07, "loss": 0.0076, "step": 72940 }, { "epoch": 0.7794219776697473, "grad_norm": 0.6442480087280273, "learning_rate": 9.854589369435261e-07, "loss": 0.0634, "step": 72950 }, { "epoch": 0.7795288209840269, "grad_norm": 11.44860553741455, "learning_rate": 9.85454914328967e-07, "loss": 0.0659, "step": 72960 }, { "epoch": 0.7796356642983066, "grad_norm": 4.429714679718018, "learning_rate": 9.854508911662927e-07, "loss": 0.0636, "step": 72970 }, { "epoch": 0.7797425076125861, "grad_norm": 0.06654783338308334, "learning_rate": 9.854468674555074e-07, "loss": 0.0344, "step": 72980 }, { "epoch": 0.7798493509268658, "grad_norm": 7.828478813171387, "learning_rate": 9.854428431966163e-07, "loss": 0.0245, "step": 72990 }, { "epoch": 0.7799561942411454, "grad_norm": 0.6122833490371704, "learning_rate": 9.854388183896232e-07, "loss": 0.0093, "step": 73000 }, { "epoch": 0.7800630375554249, "grad_norm": 9.760828971862793, "learning_rate": 9.854347930345333e-07, "loss": 0.1045, "step": 73010 }, { "epoch": 0.7801698808697046, "grad_norm": 0.9173670411109924, "learning_rate": 9.85430767131351e-07, "loss": 0.0548, "step": 73020 }, { "epoch": 0.7802767241839842, "grad_norm": 5.494332790374756, "learning_rate": 9.854267406800804e-07, "loss": 0.0385, "step": 73030 }, { "epoch": 0.7803835674982638, "grad_norm": 3.1332061290740967, "learning_rate": 9.854227136807265e-07, "loss": 0.0837, "step": 73040 }, { "epoch": 0.7804904108125434, "grad_norm": 3.224593162536621, "learning_rate": 9.854186861332937e-07, "loss": 0.0279, "step": 73050 }, { "epoch": 0.780597254126823, "grad_norm": 0.0672481581568718, "learning_rate": 9.854146580377867e-07, "loss": 0.0569, "step": 73060 }, { "epoch": 0.7807040974411026, "grad_norm": 10.151841163635254, "learning_rate": 9.854106293942098e-07, "loss": 0.1044, "step": 73070 }, { "epoch": 0.7808109407553823, "grad_norm": 17.89554214477539, "learning_rate": 9.854066002025676e-07, "loss": 0.0435, "step": 73080 }, { "epoch": 0.7809177840696618, "grad_norm": 9.027514457702637, "learning_rate": 9.854025704628649e-07, "loss": 0.0454, "step": 73090 }, { "epoch": 0.7810246273839414, "grad_norm": 17.7392578125, "learning_rate": 9.85398540175106e-07, "loss": 0.0684, "step": 73100 }, { "epoch": 0.7811314706982211, "grad_norm": 4.206531524658203, "learning_rate": 9.853945093392954e-07, "loss": 0.0477, "step": 73110 }, { "epoch": 0.7812383140125007, "grad_norm": 3.22586727142334, "learning_rate": 9.85390477955438e-07, "loss": 0.0299, "step": 73120 }, { "epoch": 0.7813451573267802, "grad_norm": 1.709170937538147, "learning_rate": 9.853864460235377e-07, "loss": 0.0847, "step": 73130 }, { "epoch": 0.7814520006410599, "grad_norm": 0.08685916662216187, "learning_rate": 9.853824135435997e-07, "loss": 0.037, "step": 73140 }, { "epoch": 0.7815588439553395, "grad_norm": 1.313123106956482, "learning_rate": 9.853783805156284e-07, "loss": 0.0825, "step": 73150 }, { "epoch": 0.7816656872696192, "grad_norm": 6.693521499633789, "learning_rate": 9.853743469396283e-07, "loss": 0.0394, "step": 73160 }, { "epoch": 0.7817725305838987, "grad_norm": 7.795204162597656, "learning_rate": 9.853703128156038e-07, "loss": 0.0491, "step": 73170 }, { "epoch": 0.7818793738981783, "grad_norm": 0.2461291402578354, "learning_rate": 9.853662781435596e-07, "loss": 0.0176, "step": 73180 }, { "epoch": 0.781986217212458, "grad_norm": 2.7584757804870605, "learning_rate": 9.853622429235003e-07, "loss": 0.121, "step": 73190 }, { "epoch": 0.7820930605267375, "grad_norm": 13.95661735534668, "learning_rate": 9.853582071554306e-07, "loss": 0.0589, "step": 73200 }, { "epoch": 0.7821999038410171, "grad_norm": 4.497905731201172, "learning_rate": 9.853541708393545e-07, "loss": 0.0996, "step": 73210 }, { "epoch": 0.7823067471552968, "grad_norm": 4.19580602645874, "learning_rate": 9.85350133975277e-07, "loss": 0.0562, "step": 73220 }, { "epoch": 0.7824135904695764, "grad_norm": 26.241817474365234, "learning_rate": 9.853460965632026e-07, "loss": 0.0767, "step": 73230 }, { "epoch": 0.7825204337838559, "grad_norm": 7.21012544631958, "learning_rate": 9.853420586031357e-07, "loss": 0.0504, "step": 73240 }, { "epoch": 0.7826272770981356, "grad_norm": 0.09108487516641617, "learning_rate": 9.853380200950811e-07, "loss": 0.0384, "step": 73250 }, { "epoch": 0.7827341204124152, "grad_norm": 13.565040588378906, "learning_rate": 9.853339810390434e-07, "loss": 0.0925, "step": 73260 }, { "epoch": 0.7828409637266948, "grad_norm": 7.405921936035156, "learning_rate": 9.853299414350268e-07, "loss": 0.0629, "step": 73270 }, { "epoch": 0.7829478070409744, "grad_norm": 0.07864876091480255, "learning_rate": 9.853259012830362e-07, "loss": 0.046, "step": 73280 }, { "epoch": 0.783054650355254, "grad_norm": 2.0235066413879395, "learning_rate": 9.853218605830759e-07, "loss": 0.0714, "step": 73290 }, { "epoch": 0.7831614936695336, "grad_norm": 6.960446357727051, "learning_rate": 9.853178193351507e-07, "loss": 0.0552, "step": 73300 }, { "epoch": 0.7832683369838133, "grad_norm": 0.06771114468574524, "learning_rate": 9.85313777539265e-07, "loss": 0.0968, "step": 73310 }, { "epoch": 0.7833751802980928, "grad_norm": 1.6853585243225098, "learning_rate": 9.853097351954234e-07, "loss": 0.0416, "step": 73320 }, { "epoch": 0.7834820236123724, "grad_norm": 13.841181755065918, "learning_rate": 9.853056923036304e-07, "loss": 0.0799, "step": 73330 }, { "epoch": 0.7835888669266521, "grad_norm": 0.9863329529762268, "learning_rate": 9.853016488638908e-07, "loss": 0.0245, "step": 73340 }, { "epoch": 0.7836957102409317, "grad_norm": 0.8390516042709351, "learning_rate": 9.85297604876209e-07, "loss": 0.0362, "step": 73350 }, { "epoch": 0.7838025535552113, "grad_norm": 1.7896376848220825, "learning_rate": 9.852935603405895e-07, "loss": 0.036, "step": 73360 }, { "epoch": 0.7839093968694909, "grad_norm": 1.0505465269088745, "learning_rate": 9.852895152570371e-07, "loss": 0.0355, "step": 73370 }, { "epoch": 0.7840162401837705, "grad_norm": 0.8011146187782288, "learning_rate": 9.85285469625556e-07, "loss": 0.0274, "step": 73380 }, { "epoch": 0.7841230834980502, "grad_norm": 3.468810796737671, "learning_rate": 9.85281423446151e-07, "loss": 0.0907, "step": 73390 }, { "epoch": 0.7842299268123297, "grad_norm": 7.20208215713501, "learning_rate": 9.852773767188269e-07, "loss": 0.0279, "step": 73400 }, { "epoch": 0.7843367701266093, "grad_norm": 1.8484575748443604, "learning_rate": 9.852733294435878e-07, "loss": 0.0409, "step": 73410 }, { "epoch": 0.784443613440889, "grad_norm": 6.281784534454346, "learning_rate": 9.852692816204385e-07, "loss": 0.0614, "step": 73420 }, { "epoch": 0.7845504567551685, "grad_norm": 6.213539123535156, "learning_rate": 9.852652332493835e-07, "loss": 0.0662, "step": 73430 }, { "epoch": 0.7846573000694481, "grad_norm": 23.192188262939453, "learning_rate": 9.852611843304275e-07, "loss": 0.1519, "step": 73440 }, { "epoch": 0.7847641433837278, "grad_norm": 5.424847602844238, "learning_rate": 9.85257134863575e-07, "loss": 0.04, "step": 73450 }, { "epoch": 0.7848709866980074, "grad_norm": 2.302748441696167, "learning_rate": 9.852530848488305e-07, "loss": 0.053, "step": 73460 }, { "epoch": 0.7849778300122869, "grad_norm": 2.234052896499634, "learning_rate": 9.852490342861988e-07, "loss": 0.0163, "step": 73470 }, { "epoch": 0.7850846733265666, "grad_norm": 3.820080280303955, "learning_rate": 9.85244983175684e-07, "loss": 0.0478, "step": 73480 }, { "epoch": 0.7851915166408462, "grad_norm": 4.055393695831299, "learning_rate": 9.852409315172913e-07, "loss": 0.0575, "step": 73490 }, { "epoch": 0.7852983599551258, "grad_norm": 0.41802313923835754, "learning_rate": 9.852368793110252e-07, "loss": 0.0614, "step": 73500 }, { "epoch": 0.7854052032694054, "grad_norm": 9.884041786193848, "learning_rate": 9.852328265568896e-07, "loss": 0.1416, "step": 73510 }, { "epoch": 0.785512046583685, "grad_norm": 2.9899404048919678, "learning_rate": 9.852287732548896e-07, "loss": 0.0733, "step": 73520 }, { "epoch": 0.7856188898979647, "grad_norm": 0.17690341174602509, "learning_rate": 9.852247194050299e-07, "loss": 0.0548, "step": 73530 }, { "epoch": 0.7857257332122443, "grad_norm": 0.10053081065416336, "learning_rate": 9.852206650073146e-07, "loss": 0.0485, "step": 73540 }, { "epoch": 0.7858325765265238, "grad_norm": 3.9964699745178223, "learning_rate": 9.852166100617485e-07, "loss": 0.0628, "step": 73550 }, { "epoch": 0.7859394198408035, "grad_norm": 7.592008590698242, "learning_rate": 9.852125545683366e-07, "loss": 0.0523, "step": 73560 }, { "epoch": 0.7860462631550831, "grad_norm": 1.011540174484253, "learning_rate": 9.852084985270828e-07, "loss": 0.0656, "step": 73570 }, { "epoch": 0.7861531064693627, "grad_norm": 1.3920549154281616, "learning_rate": 9.85204441937992e-07, "loss": 0.0174, "step": 73580 }, { "epoch": 0.7862599497836423, "grad_norm": 2.782212018966675, "learning_rate": 9.852003848010687e-07, "loss": 0.0768, "step": 73590 }, { "epoch": 0.7863667930979219, "grad_norm": 8.734095573425293, "learning_rate": 9.851963271163178e-07, "loss": 0.0654, "step": 73600 }, { "epoch": 0.7864736364122015, "grad_norm": 3.5451369285583496, "learning_rate": 9.851922688837436e-07, "loss": 0.0472, "step": 73610 }, { "epoch": 0.7865804797264812, "grad_norm": 3.2561185359954834, "learning_rate": 9.851882101033506e-07, "loss": 0.0136, "step": 73620 }, { "epoch": 0.7866873230407607, "grad_norm": 3.2873754501342773, "learning_rate": 9.851841507751436e-07, "loss": 0.0706, "step": 73630 }, { "epoch": 0.7867941663550403, "grad_norm": 15.116170883178711, "learning_rate": 9.851800908991268e-07, "loss": 0.0318, "step": 73640 }, { "epoch": 0.78690100966932, "grad_norm": 20.82744789123535, "learning_rate": 9.851760304753053e-07, "loss": 0.064, "step": 73650 }, { "epoch": 0.7870078529835995, "grad_norm": 0.05680253729224205, "learning_rate": 9.851719695036835e-07, "loss": 0.1354, "step": 73660 }, { "epoch": 0.7871146962978791, "grad_norm": 6.502133369445801, "learning_rate": 9.851679079842657e-07, "loss": 0.0613, "step": 73670 }, { "epoch": 0.7872215396121588, "grad_norm": 0.11441642791032791, "learning_rate": 9.851638459170569e-07, "loss": 0.0384, "step": 73680 }, { "epoch": 0.7873283829264384, "grad_norm": 1.520760416984558, "learning_rate": 9.851597833020615e-07, "loss": 0.0434, "step": 73690 }, { "epoch": 0.7874352262407179, "grad_norm": 8.60092830657959, "learning_rate": 9.85155720139284e-07, "loss": 0.0449, "step": 73700 }, { "epoch": 0.7875420695549976, "grad_norm": 0.16621936857700348, "learning_rate": 9.85151656428729e-07, "loss": 0.054, "step": 73710 }, { "epoch": 0.7876489128692772, "grad_norm": 16.98639678955078, "learning_rate": 9.851475921704014e-07, "loss": 0.0672, "step": 73720 }, { "epoch": 0.7877557561835569, "grad_norm": 2.4081385135650635, "learning_rate": 9.851435273643052e-07, "loss": 0.0389, "step": 73730 }, { "epoch": 0.7878625994978364, "grad_norm": 3.4395124912261963, "learning_rate": 9.851394620104457e-07, "loss": 0.0345, "step": 73740 }, { "epoch": 0.787969442812116, "grad_norm": 19.59077262878418, "learning_rate": 9.85135396108827e-07, "loss": 0.1234, "step": 73750 }, { "epoch": 0.7880762861263957, "grad_norm": 3.1957147121429443, "learning_rate": 9.851313296594536e-07, "loss": 0.0968, "step": 73760 }, { "epoch": 0.7881831294406753, "grad_norm": 4.864903450012207, "learning_rate": 9.851272626623305e-07, "loss": 0.0723, "step": 73770 }, { "epoch": 0.7882899727549548, "grad_norm": 0.6784520149230957, "learning_rate": 9.851231951174624e-07, "loss": 0.0415, "step": 73780 }, { "epoch": 0.7883968160692345, "grad_norm": 0.4718436896800995, "learning_rate": 9.851191270248531e-07, "loss": 0.0512, "step": 73790 }, { "epoch": 0.7885036593835141, "grad_norm": 0.028086107224225998, "learning_rate": 9.85115058384508e-07, "loss": 0.0301, "step": 73800 }, { "epoch": 0.7886105026977936, "grad_norm": 12.0298490524292, "learning_rate": 9.851109891964312e-07, "loss": 0.0359, "step": 73810 }, { "epoch": 0.7887173460120733, "grad_norm": 2.63179874420166, "learning_rate": 9.851069194606276e-07, "loss": 0.061, "step": 73820 }, { "epoch": 0.7888241893263529, "grad_norm": 2.659179449081421, "learning_rate": 9.851028491771018e-07, "loss": 0.111, "step": 73830 }, { "epoch": 0.7889310326406325, "grad_norm": 0.1439969688653946, "learning_rate": 9.850987783458583e-07, "loss": 0.0696, "step": 73840 }, { "epoch": 0.7890378759549121, "grad_norm": 6.296466827392578, "learning_rate": 9.850947069669014e-07, "loss": 0.0567, "step": 73850 }, { "epoch": 0.7891447192691917, "grad_norm": 0.397968590259552, "learning_rate": 9.85090635040236e-07, "loss": 0.0617, "step": 73860 }, { "epoch": 0.7892515625834713, "grad_norm": 6.797563076019287, "learning_rate": 9.850865625658668e-07, "loss": 0.0523, "step": 73870 }, { "epoch": 0.789358405897751, "grad_norm": 2.3775482177734375, "learning_rate": 9.850824895437981e-07, "loss": 0.0481, "step": 73880 }, { "epoch": 0.7894652492120305, "grad_norm": 0.4803400933742523, "learning_rate": 9.850784159740348e-07, "loss": 0.0943, "step": 73890 }, { "epoch": 0.7895720925263102, "grad_norm": 0.9236984848976135, "learning_rate": 9.850743418565812e-07, "loss": 0.0426, "step": 73900 }, { "epoch": 0.7896789358405898, "grad_norm": 7.373581886291504, "learning_rate": 9.850702671914423e-07, "loss": 0.0691, "step": 73910 }, { "epoch": 0.7897857791548694, "grad_norm": 2.2011220455169678, "learning_rate": 9.850661919786222e-07, "loss": 0.0534, "step": 73920 }, { "epoch": 0.789892622469149, "grad_norm": 1.2466005086898804, "learning_rate": 9.85062116218126e-07, "loss": 0.0684, "step": 73930 }, { "epoch": 0.7899994657834286, "grad_norm": 5.722202301025391, "learning_rate": 9.85058039909958e-07, "loss": 0.0768, "step": 73940 }, { "epoch": 0.7901063090977082, "grad_norm": 7.014162540435791, "learning_rate": 9.850539630541227e-07, "loss": 0.0247, "step": 73950 }, { "epoch": 0.7902131524119879, "grad_norm": 3.75954270362854, "learning_rate": 9.85049885650625e-07, "loss": 0.0459, "step": 73960 }, { "epoch": 0.7903199957262674, "grad_norm": 0.42613402009010315, "learning_rate": 9.850458076994691e-07, "loss": 0.086, "step": 73970 }, { "epoch": 0.790426839040547, "grad_norm": 4.209072113037109, "learning_rate": 9.850417292006604e-07, "loss": 0.0373, "step": 73980 }, { "epoch": 0.7905336823548267, "grad_norm": 0.11400165408849716, "learning_rate": 9.850376501542026e-07, "loss": 0.0295, "step": 73990 }, { "epoch": 0.7906405256691063, "grad_norm": 6.098175525665283, "learning_rate": 9.850335705601006e-07, "loss": 0.0352, "step": 74000 }, { "epoch": 0.7907473689833858, "grad_norm": 3.0596256256103516, "learning_rate": 9.850294904183594e-07, "loss": 0.032, "step": 74010 }, { "epoch": 0.7908542122976655, "grad_norm": 1.4050906896591187, "learning_rate": 9.850254097289833e-07, "loss": 0.0503, "step": 74020 }, { "epoch": 0.7909610556119451, "grad_norm": 7.882275581359863, "learning_rate": 9.850213284919768e-07, "loss": 0.0973, "step": 74030 }, { "epoch": 0.7910678989262246, "grad_norm": 6.874490737915039, "learning_rate": 9.850172467073445e-07, "loss": 0.0305, "step": 74040 }, { "epoch": 0.7911747422405043, "grad_norm": 4.900363445281982, "learning_rate": 9.850131643750912e-07, "loss": 0.023, "step": 74050 }, { "epoch": 0.7912815855547839, "grad_norm": 4.742335319519043, "learning_rate": 9.850090814952215e-07, "loss": 0.08, "step": 74060 }, { "epoch": 0.7913884288690635, "grad_norm": 0.24274779856204987, "learning_rate": 9.850049980677398e-07, "loss": 0.0286, "step": 74070 }, { "epoch": 0.7914952721833431, "grad_norm": 1.3526561260223389, "learning_rate": 9.85000914092651e-07, "loss": 0.062, "step": 74080 }, { "epoch": 0.7916021154976227, "grad_norm": 3.97179913520813, "learning_rate": 9.849968295699595e-07, "loss": 0.1311, "step": 74090 }, { "epoch": 0.7917089588119024, "grad_norm": 1.116090178489685, "learning_rate": 9.849927444996699e-07, "loss": 0.0614, "step": 74100 }, { "epoch": 0.791815802126182, "grad_norm": 8.054187774658203, "learning_rate": 9.84988658881787e-07, "loss": 0.0578, "step": 74110 }, { "epoch": 0.7919226454404615, "grad_norm": 0.1262868195772171, "learning_rate": 9.849845727163153e-07, "loss": 0.0386, "step": 74120 }, { "epoch": 0.7920294887547412, "grad_norm": 2.90812349319458, "learning_rate": 9.849804860032594e-07, "loss": 0.055, "step": 74130 }, { "epoch": 0.7921363320690208, "grad_norm": 5.390753269195557, "learning_rate": 9.849763987426239e-07, "loss": 0.0639, "step": 74140 }, { "epoch": 0.7922431753833004, "grad_norm": 0.13991688191890717, "learning_rate": 9.849723109344135e-07, "loss": 0.0574, "step": 74150 }, { "epoch": 0.79235001869758, "grad_norm": 0.19328297674655914, "learning_rate": 9.849682225786326e-07, "loss": 0.0503, "step": 74160 }, { "epoch": 0.7924568620118596, "grad_norm": 1.1685798168182373, "learning_rate": 9.849641336752861e-07, "loss": 0.0936, "step": 74170 }, { "epoch": 0.7925637053261392, "grad_norm": 0.03427761420607567, "learning_rate": 9.849600442243786e-07, "loss": 0.0578, "step": 74180 }, { "epoch": 0.7926705486404189, "grad_norm": 2.554809808731079, "learning_rate": 9.849559542259145e-07, "loss": 0.0245, "step": 74190 }, { "epoch": 0.7927773919546984, "grad_norm": 4.393284797668457, "learning_rate": 9.849518636798984e-07, "loss": 0.0364, "step": 74200 }, { "epoch": 0.792884235268978, "grad_norm": 0.49812424182891846, "learning_rate": 9.84947772586335e-07, "loss": 0.0306, "step": 74210 }, { "epoch": 0.7929910785832577, "grad_norm": 0.5855562090873718, "learning_rate": 9.849436809452292e-07, "loss": 0.0655, "step": 74220 }, { "epoch": 0.7930979218975373, "grad_norm": 4.458217144012451, "learning_rate": 9.849395887565854e-07, "loss": 0.0439, "step": 74230 }, { "epoch": 0.7932047652118168, "grad_norm": 0.11751354485750198, "learning_rate": 9.84935496020408e-07, "loss": 0.0914, "step": 74240 }, { "epoch": 0.7933116085260965, "grad_norm": 1.276033639907837, "learning_rate": 9.849314027367021e-07, "loss": 0.0588, "step": 74250 }, { "epoch": 0.7934184518403761, "grad_norm": 1.9200459718704224, "learning_rate": 9.84927308905472e-07, "loss": 0.0335, "step": 74260 }, { "epoch": 0.7935252951546558, "grad_norm": 0.8192474246025085, "learning_rate": 9.849232145267222e-07, "loss": 0.0145, "step": 74270 }, { "epoch": 0.7936321384689353, "grad_norm": 1.7734158039093018, "learning_rate": 9.849191196004573e-07, "loss": 0.0139, "step": 74280 }, { "epoch": 0.7937389817832149, "grad_norm": 9.743027687072754, "learning_rate": 9.849150241266824e-07, "loss": 0.0922, "step": 74290 }, { "epoch": 0.7938458250974946, "grad_norm": 13.767600059509277, "learning_rate": 9.84910928105402e-07, "loss": 0.0409, "step": 74300 }, { "epoch": 0.7939526684117741, "grad_norm": 0.36076995730400085, "learning_rate": 9.849068315366201e-07, "loss": 0.0915, "step": 74310 }, { "epoch": 0.7940595117260537, "grad_norm": 0.2747754454612732, "learning_rate": 9.849027344203423e-07, "loss": 0.0654, "step": 74320 }, { "epoch": 0.7941663550403334, "grad_norm": 4.438244342803955, "learning_rate": 9.848986367565724e-07, "loss": 0.0316, "step": 74330 }, { "epoch": 0.794273198354613, "grad_norm": 3.4785051345825195, "learning_rate": 9.848945385453153e-07, "loss": 0.0536, "step": 74340 }, { "epoch": 0.7943800416688925, "grad_norm": 12.374698638916016, "learning_rate": 9.848904397865758e-07, "loss": 0.1223, "step": 74350 }, { "epoch": 0.7944868849831722, "grad_norm": 9.625441551208496, "learning_rate": 9.848863404803584e-07, "loss": 0.0428, "step": 74360 }, { "epoch": 0.7945937282974518, "grad_norm": 0.5750668048858643, "learning_rate": 9.848822406266676e-07, "loss": 0.0582, "step": 74370 }, { "epoch": 0.7947005716117314, "grad_norm": 0.7669278979301453, "learning_rate": 9.848781402255083e-07, "loss": 0.0247, "step": 74380 }, { "epoch": 0.794807414926011, "grad_norm": 3.834824323654175, "learning_rate": 9.848740392768852e-07, "loss": 0.1757, "step": 74390 }, { "epoch": 0.7949142582402906, "grad_norm": 0.41058799624443054, "learning_rate": 9.848699377808024e-07, "loss": 0.032, "step": 74400 }, { "epoch": 0.7950211015545702, "grad_norm": 0.24188335239887238, "learning_rate": 9.848658357372648e-07, "loss": 0.0566, "step": 74410 }, { "epoch": 0.7951279448688499, "grad_norm": 4.858987808227539, "learning_rate": 9.848617331462772e-07, "loss": 0.0325, "step": 74420 }, { "epoch": 0.7952347881831294, "grad_norm": 0.849024772644043, "learning_rate": 9.848576300078439e-07, "loss": 0.0852, "step": 74430 }, { "epoch": 0.795341631497409, "grad_norm": 4.504781723022461, "learning_rate": 9.8485352632197e-07, "loss": 0.0782, "step": 74440 }, { "epoch": 0.7954484748116887, "grad_norm": 12.929730415344238, "learning_rate": 9.848494220886598e-07, "loss": 0.1114, "step": 74450 }, { "epoch": 0.7955553181259682, "grad_norm": 7.38010311126709, "learning_rate": 9.848453173079181e-07, "loss": 0.0315, "step": 74460 }, { "epoch": 0.7956621614402479, "grad_norm": 6.042380332946777, "learning_rate": 9.848412119797494e-07, "loss": 0.0273, "step": 74470 }, { "epoch": 0.7957690047545275, "grad_norm": 0.08271586894989014, "learning_rate": 9.848371061041582e-07, "loss": 0.0568, "step": 74480 }, { "epoch": 0.7958758480688071, "grad_norm": 1.6259342432022095, "learning_rate": 9.848329996811496e-07, "loss": 0.0443, "step": 74490 }, { "epoch": 0.7959826913830867, "grad_norm": 0.029721032828092575, "learning_rate": 9.848288927107278e-07, "loss": 0.0307, "step": 74500 }, { "epoch": 0.7960895346973663, "grad_norm": 0.8590894341468811, "learning_rate": 9.848247851928975e-07, "loss": 0.009, "step": 74510 }, { "epoch": 0.7961963780116459, "grad_norm": 1.7544158697128296, "learning_rate": 9.848206771276636e-07, "loss": 0.0462, "step": 74520 }, { "epoch": 0.7963032213259256, "grad_norm": 16.473674774169922, "learning_rate": 9.848165685150304e-07, "loss": 0.0286, "step": 74530 }, { "epoch": 0.7964100646402051, "grad_norm": 3.2823946475982666, "learning_rate": 9.848124593550028e-07, "loss": 0.0572, "step": 74540 }, { "epoch": 0.7965169079544847, "grad_norm": 0.4756781756877899, "learning_rate": 9.848083496475853e-07, "loss": 0.0429, "step": 74550 }, { "epoch": 0.7966237512687644, "grad_norm": 9.281535148620605, "learning_rate": 9.848042393927826e-07, "loss": 0.0638, "step": 74560 }, { "epoch": 0.796730594583044, "grad_norm": 1.308072566986084, "learning_rate": 9.848001285905994e-07, "loss": 0.0334, "step": 74570 }, { "epoch": 0.7968374378973235, "grad_norm": 3.9900221824645996, "learning_rate": 9.8479601724104e-07, "loss": 0.0549, "step": 74580 }, { "epoch": 0.7969442812116032, "grad_norm": 0.1905544400215149, "learning_rate": 9.847919053441095e-07, "loss": 0.1283, "step": 74590 }, { "epoch": 0.7970511245258828, "grad_norm": 2.2239394187927246, "learning_rate": 9.847877928998125e-07, "loss": 0.0811, "step": 74600 }, { "epoch": 0.7971579678401624, "grad_norm": 0.0690026581287384, "learning_rate": 9.847836799081534e-07, "loss": 0.0315, "step": 74610 }, { "epoch": 0.797264811154442, "grad_norm": 0.17921306192874908, "learning_rate": 9.847795663691368e-07, "loss": 0.0379, "step": 74620 }, { "epoch": 0.7973716544687216, "grad_norm": 1.745661973953247, "learning_rate": 9.847754522827675e-07, "loss": 0.0461, "step": 74630 }, { "epoch": 0.7974784977830013, "grad_norm": 1.8574565649032593, "learning_rate": 9.847713376490502e-07, "loss": 0.0631, "step": 74640 }, { "epoch": 0.7975853410972809, "grad_norm": 1.5379763841629028, "learning_rate": 9.847672224679895e-07, "loss": 0.0272, "step": 74650 }, { "epoch": 0.7976921844115604, "grad_norm": 4.982049942016602, "learning_rate": 9.8476310673959e-07, "loss": 0.0883, "step": 74660 }, { "epoch": 0.7977990277258401, "grad_norm": 1.2625049352645874, "learning_rate": 9.847589904638564e-07, "loss": 0.0581, "step": 74670 }, { "epoch": 0.7979058710401197, "grad_norm": 9.298952102661133, "learning_rate": 9.847548736407933e-07, "loss": 0.0319, "step": 74680 }, { "epoch": 0.7980127143543992, "grad_norm": 0.24115917086601257, "learning_rate": 9.847507562704053e-07, "loss": 0.054, "step": 74690 }, { "epoch": 0.7981195576686789, "grad_norm": 17.624126434326172, "learning_rate": 9.847466383526972e-07, "loss": 0.0792, "step": 74700 }, { "epoch": 0.7982264009829585, "grad_norm": 5.129913330078125, "learning_rate": 9.847425198876734e-07, "loss": 0.0566, "step": 74710 }, { "epoch": 0.7983332442972381, "grad_norm": 7.683373928070068, "learning_rate": 9.847384008753387e-07, "loss": 0.034, "step": 74720 }, { "epoch": 0.7984400876115177, "grad_norm": 2.3072495460510254, "learning_rate": 9.84734281315698e-07, "loss": 0.0316, "step": 74730 }, { "epoch": 0.7985469309257973, "grad_norm": 2.0883243083953857, "learning_rate": 9.847301612087556e-07, "loss": 0.0426, "step": 74740 }, { "epoch": 0.7986537742400769, "grad_norm": 6.475033760070801, "learning_rate": 9.847260405545161e-07, "loss": 0.0443, "step": 74750 }, { "epoch": 0.7987606175543566, "grad_norm": 4.262459754943848, "learning_rate": 9.847219193529845e-07, "loss": 0.0898, "step": 74760 }, { "epoch": 0.7988674608686361, "grad_norm": 0.3051716685295105, "learning_rate": 9.847177976041653e-07, "loss": 0.0438, "step": 74770 }, { "epoch": 0.7989743041829157, "grad_norm": 8.235636711120605, "learning_rate": 9.84713675308063e-07, "loss": 0.031, "step": 74780 }, { "epoch": 0.7990811474971954, "grad_norm": 0.14289678633213043, "learning_rate": 9.847095524646825e-07, "loss": 0.0291, "step": 74790 }, { "epoch": 0.799187990811475, "grad_norm": 5.304196357727051, "learning_rate": 9.847054290740281e-07, "loss": 0.0869, "step": 74800 }, { "epoch": 0.7992948341257545, "grad_norm": 5.644130706787109, "learning_rate": 9.847013051361051e-07, "loss": 0.0863, "step": 74810 }, { "epoch": 0.7994016774400342, "grad_norm": 0.13865983486175537, "learning_rate": 9.846971806509175e-07, "loss": 0.0315, "step": 74820 }, { "epoch": 0.7995085207543138, "grad_norm": 0.18390999734401703, "learning_rate": 9.8469305561847e-07, "loss": 0.0363, "step": 74830 }, { "epoch": 0.7996153640685935, "grad_norm": 4.453251361846924, "learning_rate": 9.846889300387678e-07, "loss": 0.0498, "step": 74840 }, { "epoch": 0.799722207382873, "grad_norm": 9.22917366027832, "learning_rate": 9.84684803911815e-07, "loss": 0.0777, "step": 74850 }, { "epoch": 0.7998290506971526, "grad_norm": 0.3096700608730316, "learning_rate": 9.846806772376166e-07, "loss": 0.0237, "step": 74860 }, { "epoch": 0.7999358940114323, "grad_norm": 2.6243598461151123, "learning_rate": 9.84676550016177e-07, "loss": 0.0291, "step": 74870 }, { "epoch": 0.8000427373257119, "grad_norm": 5.555517196655273, "learning_rate": 9.846724222475011e-07, "loss": 0.0678, "step": 74880 }, { "epoch": 0.8001495806399914, "grad_norm": 4.89472770690918, "learning_rate": 9.846682939315935e-07, "loss": 0.0582, "step": 74890 }, { "epoch": 0.8002564239542711, "grad_norm": 8.448092460632324, "learning_rate": 9.846641650684587e-07, "loss": 0.0373, "step": 74900 }, { "epoch": 0.8003632672685507, "grad_norm": 3.9039316177368164, "learning_rate": 9.846600356581015e-07, "loss": 0.042, "step": 74910 }, { "epoch": 0.8004701105828302, "grad_norm": 0.6362904906272888, "learning_rate": 9.846559057005266e-07, "loss": 0.0304, "step": 74920 }, { "epoch": 0.8005769538971099, "grad_norm": 10.974737167358398, "learning_rate": 9.846517751957387e-07, "loss": 0.0569, "step": 74930 }, { "epoch": 0.8006837972113895, "grad_norm": 3.4612460136413574, "learning_rate": 9.846476441437422e-07, "loss": 0.0729, "step": 74940 }, { "epoch": 0.8007906405256691, "grad_norm": 6.438390731811523, "learning_rate": 9.846435125445417e-07, "loss": 0.0438, "step": 74950 }, { "epoch": 0.8008974838399487, "grad_norm": 7.050875186920166, "learning_rate": 9.846393803981425e-07, "loss": 0.0374, "step": 74960 }, { "epoch": 0.8010043271542283, "grad_norm": 0.7439132928848267, "learning_rate": 9.846352477045488e-07, "loss": 0.0181, "step": 74970 }, { "epoch": 0.8011111704685079, "grad_norm": 8.016627311706543, "learning_rate": 9.846311144637652e-07, "loss": 0.0603, "step": 74980 }, { "epoch": 0.8012180137827876, "grad_norm": 2.8760719299316406, "learning_rate": 9.846269806757964e-07, "loss": 0.0239, "step": 74990 }, { "epoch": 0.8013248570970671, "grad_norm": 1.6786768436431885, "learning_rate": 9.846228463406474e-07, "loss": 0.0466, "step": 75000 }, { "epoch": 0.8014317004113468, "grad_norm": 3.1008684635162354, "learning_rate": 9.846187114583226e-07, "loss": 0.1809, "step": 75010 }, { "epoch": 0.8015385437256264, "grad_norm": 8.111466407775879, "learning_rate": 9.846145760288267e-07, "loss": 0.0335, "step": 75020 }, { "epoch": 0.801645387039906, "grad_norm": 4.3863325119018555, "learning_rate": 9.846104400521641e-07, "loss": 0.0605, "step": 75030 }, { "epoch": 0.8017522303541856, "grad_norm": 0.09284863620996475, "learning_rate": 9.846063035283401e-07, "loss": 0.0755, "step": 75040 }, { "epoch": 0.8018590736684652, "grad_norm": 13.235751152038574, "learning_rate": 9.84602166457359e-07, "loss": 0.0341, "step": 75050 }, { "epoch": 0.8019659169827448, "grad_norm": 6.68062686920166, "learning_rate": 9.845980288392253e-07, "loss": 0.025, "step": 75060 }, { "epoch": 0.8020727602970245, "grad_norm": 2.4105117321014404, "learning_rate": 9.845938906739438e-07, "loss": 0.0403, "step": 75070 }, { "epoch": 0.802179603611304, "grad_norm": 4.856713771820068, "learning_rate": 9.845897519615195e-07, "loss": 0.0985, "step": 75080 }, { "epoch": 0.8022864469255836, "grad_norm": 4.08375358581543, "learning_rate": 9.845856127019565e-07, "loss": 0.0153, "step": 75090 }, { "epoch": 0.8023932902398633, "grad_norm": 0.15193113684654236, "learning_rate": 9.845814728952598e-07, "loss": 0.0498, "step": 75100 }, { "epoch": 0.8025001335541428, "grad_norm": 0.9134777188301086, "learning_rate": 9.845773325414343e-07, "loss": 0.0514, "step": 75110 }, { "epoch": 0.8026069768684224, "grad_norm": 0.5353898406028748, "learning_rate": 9.84573191640484e-07, "loss": 0.0612, "step": 75120 }, { "epoch": 0.8027138201827021, "grad_norm": 2.9163196086883545, "learning_rate": 9.845690501924144e-07, "loss": 0.0259, "step": 75130 }, { "epoch": 0.8028206634969817, "grad_norm": 3.1699347496032715, "learning_rate": 9.845649081972295e-07, "loss": 0.0737, "step": 75140 }, { "epoch": 0.8029275068112612, "grad_norm": 1.657881736755371, "learning_rate": 9.845607656549345e-07, "loss": 0.1228, "step": 75150 }, { "epoch": 0.8030343501255409, "grad_norm": 7.968960762023926, "learning_rate": 9.845566225655335e-07, "loss": 0.0313, "step": 75160 }, { "epoch": 0.8031411934398205, "grad_norm": 4.113638401031494, "learning_rate": 9.845524789290317e-07, "loss": 0.0164, "step": 75170 }, { "epoch": 0.8032480367541001, "grad_norm": 5.564634323120117, "learning_rate": 9.845483347454335e-07, "loss": 0.0758, "step": 75180 }, { "epoch": 0.8033548800683797, "grad_norm": 7.612639904022217, "learning_rate": 9.845441900147437e-07, "loss": 0.0706, "step": 75190 }, { "epoch": 0.8034617233826593, "grad_norm": 0.11359153687953949, "learning_rate": 9.84540044736967e-07, "loss": 0.0781, "step": 75200 }, { "epoch": 0.803568566696939, "grad_norm": 9.01003360748291, "learning_rate": 9.84535898912108e-07, "loss": 0.041, "step": 75210 }, { "epoch": 0.8036754100112186, "grad_norm": 0.1424337774515152, "learning_rate": 9.845317525401714e-07, "loss": 0.1088, "step": 75220 }, { "epoch": 0.8037822533254981, "grad_norm": 0.5328603386878967, "learning_rate": 9.845276056211618e-07, "loss": 0.0445, "step": 75230 }, { "epoch": 0.8038890966397778, "grad_norm": 2.8508999347686768, "learning_rate": 9.845234581550841e-07, "loss": 0.0385, "step": 75240 }, { "epoch": 0.8039959399540574, "grad_norm": 0.9477649927139282, "learning_rate": 9.845193101419428e-07, "loss": 0.0427, "step": 75250 }, { "epoch": 0.804102783268337, "grad_norm": 5.168483257293701, "learning_rate": 9.845151615817425e-07, "loss": 0.0173, "step": 75260 }, { "epoch": 0.8042096265826166, "grad_norm": 1.265040397644043, "learning_rate": 9.845110124744881e-07, "loss": 0.0258, "step": 75270 }, { "epoch": 0.8043164698968962, "grad_norm": 0.45808807015419006, "learning_rate": 9.845068628201843e-07, "loss": 0.0249, "step": 75280 }, { "epoch": 0.8044233132111758, "grad_norm": 2.4475457668304443, "learning_rate": 9.845027126188357e-07, "loss": 0.0387, "step": 75290 }, { "epoch": 0.8045301565254555, "grad_norm": 7.175082206726074, "learning_rate": 9.844985618704469e-07, "loss": 0.1252, "step": 75300 }, { "epoch": 0.804636999839735, "grad_norm": 3.187826633453369, "learning_rate": 9.844944105750226e-07, "loss": 0.0405, "step": 75310 }, { "epoch": 0.8047438431540146, "grad_norm": 11.894221305847168, "learning_rate": 9.844902587325673e-07, "loss": 0.0683, "step": 75320 }, { "epoch": 0.8048506864682943, "grad_norm": 6.370968341827393, "learning_rate": 9.844861063430863e-07, "loss": 0.0823, "step": 75330 }, { "epoch": 0.8049575297825738, "grad_norm": 8.035983085632324, "learning_rate": 9.84481953406584e-07, "loss": 0.0597, "step": 75340 }, { "epoch": 0.8050643730968534, "grad_norm": 0.07011321187019348, "learning_rate": 9.844777999230646e-07, "loss": 0.0527, "step": 75350 }, { "epoch": 0.8051712164111331, "grad_norm": 0.011817633174359798, "learning_rate": 9.844736458925335e-07, "loss": 0.0525, "step": 75360 }, { "epoch": 0.8052780597254127, "grad_norm": 4.0041913986206055, "learning_rate": 9.84469491314995e-07, "loss": 0.0455, "step": 75370 }, { "epoch": 0.8053849030396923, "grad_norm": 3.7043397426605225, "learning_rate": 9.84465336190454e-07, "loss": 0.0296, "step": 75380 }, { "epoch": 0.8054917463539719, "grad_norm": 0.03517724201083183, "learning_rate": 9.844611805189149e-07, "loss": 0.0681, "step": 75390 }, { "epoch": 0.8055985896682515, "grad_norm": 5.284664154052734, "learning_rate": 9.844570243003827e-07, "loss": 0.0224, "step": 75400 }, { "epoch": 0.8057054329825312, "grad_norm": 0.49141624569892883, "learning_rate": 9.84452867534862e-07, "loss": 0.0278, "step": 75410 }, { "epoch": 0.8058122762968107, "grad_norm": 0.7255398631095886, "learning_rate": 9.844487102223573e-07, "loss": 0.0568, "step": 75420 }, { "epoch": 0.8059191196110903, "grad_norm": 2.437333106994629, "learning_rate": 9.844445523628735e-07, "loss": 0.0396, "step": 75430 }, { "epoch": 0.80602596292537, "grad_norm": 21.231159210205078, "learning_rate": 9.844403939564151e-07, "loss": 0.154, "step": 75440 }, { "epoch": 0.8061328062396496, "grad_norm": 3.398556709289551, "learning_rate": 9.844362350029872e-07, "loss": 0.0405, "step": 75450 }, { "epoch": 0.8062396495539291, "grad_norm": 4.140965938568115, "learning_rate": 9.84432075502594e-07, "loss": 0.0869, "step": 75460 }, { "epoch": 0.8063464928682088, "grad_norm": 0.7449488043785095, "learning_rate": 9.844279154552406e-07, "loss": 0.0402, "step": 75470 }, { "epoch": 0.8064533361824884, "grad_norm": 6.823208808898926, "learning_rate": 9.844237548609313e-07, "loss": 0.0408, "step": 75480 }, { "epoch": 0.806560179496768, "grad_norm": 2.1881232261657715, "learning_rate": 9.84419593719671e-07, "loss": 0.05, "step": 75490 }, { "epoch": 0.8066670228110476, "grad_norm": 0.35593292117118835, "learning_rate": 9.844154320314649e-07, "loss": 0.0423, "step": 75500 }, { "epoch": 0.8067738661253272, "grad_norm": 1.660081386566162, "learning_rate": 9.844112697963168e-07, "loss": 0.0193, "step": 75510 }, { "epoch": 0.8068807094396068, "grad_norm": 2.228060722351074, "learning_rate": 9.844071070142317e-07, "loss": 0.0333, "step": 75520 }, { "epoch": 0.8069875527538865, "grad_norm": 2.255467414855957, "learning_rate": 9.844029436852147e-07, "loss": 0.0442, "step": 75530 }, { "epoch": 0.807094396068166, "grad_norm": 0.09288227558135986, "learning_rate": 9.8439877980927e-07, "loss": 0.0356, "step": 75540 }, { "epoch": 0.8072012393824456, "grad_norm": 0.019571835175156593, "learning_rate": 9.84394615386403e-07, "loss": 0.0242, "step": 75550 }, { "epoch": 0.8073080826967253, "grad_norm": 13.96224594116211, "learning_rate": 9.843904504166173e-07, "loss": 0.0949, "step": 75560 }, { "epoch": 0.8074149260110048, "grad_norm": 4.67062520980835, "learning_rate": 9.843862848999186e-07, "loss": 0.0565, "step": 75570 }, { "epoch": 0.8075217693252845, "grad_norm": 5.373455047607422, "learning_rate": 9.84382118836311e-07, "loss": 0.1163, "step": 75580 }, { "epoch": 0.8076286126395641, "grad_norm": 6.728788375854492, "learning_rate": 9.843779522257996e-07, "loss": 0.0506, "step": 75590 }, { "epoch": 0.8077354559538437, "grad_norm": 6.9889235496521, "learning_rate": 9.84373785068389e-07, "loss": 0.0275, "step": 75600 }, { "epoch": 0.8078422992681233, "grad_norm": 0.571483314037323, "learning_rate": 9.843696173640836e-07, "loss": 0.0348, "step": 75610 }, { "epoch": 0.8079491425824029, "grad_norm": 1.0614720582962036, "learning_rate": 9.843654491128885e-07, "loss": 0.0533, "step": 75620 }, { "epoch": 0.8080559858966825, "grad_norm": 0.14265930652618408, "learning_rate": 9.843612803148083e-07, "loss": 0.0198, "step": 75630 }, { "epoch": 0.8081628292109622, "grad_norm": 0.30773571133613586, "learning_rate": 9.843571109698475e-07, "loss": 0.0315, "step": 75640 }, { "epoch": 0.8082696725252417, "grad_norm": 3.53964900970459, "learning_rate": 9.843529410780112e-07, "loss": 0.026, "step": 75650 }, { "epoch": 0.8083765158395213, "grad_norm": 6.701896667480469, "learning_rate": 9.843487706393038e-07, "loss": 0.0806, "step": 75660 }, { "epoch": 0.808483359153801, "grad_norm": 6.001153945922852, "learning_rate": 9.843445996537298e-07, "loss": 0.0349, "step": 75670 }, { "epoch": 0.8085902024680806, "grad_norm": 12.847487449645996, "learning_rate": 9.843404281212944e-07, "loss": 0.0714, "step": 75680 }, { "epoch": 0.8086970457823601, "grad_norm": 6.135814666748047, "learning_rate": 9.84336256042002e-07, "loss": 0.107, "step": 75690 }, { "epoch": 0.8088038890966398, "grad_norm": 8.115071296691895, "learning_rate": 9.843320834158577e-07, "loss": 0.0629, "step": 75700 }, { "epoch": 0.8089107324109194, "grad_norm": 3.4099814891815186, "learning_rate": 9.843279102428656e-07, "loss": 0.0407, "step": 75710 }, { "epoch": 0.809017575725199, "grad_norm": 10.597474098205566, "learning_rate": 9.84323736523031e-07, "loss": 0.0423, "step": 75720 }, { "epoch": 0.8091244190394786, "grad_norm": 5.483123779296875, "learning_rate": 9.84319562256358e-07, "loss": 0.0801, "step": 75730 }, { "epoch": 0.8092312623537582, "grad_norm": 0.008911998011171818, "learning_rate": 9.84315387442852e-07, "loss": 0.0545, "step": 75740 }, { "epoch": 0.8093381056680379, "grad_norm": 3.6400678157806396, "learning_rate": 9.843112120825172e-07, "loss": 0.0378, "step": 75750 }, { "epoch": 0.8094449489823174, "grad_norm": 0.6582525968551636, "learning_rate": 9.843070361753586e-07, "loss": 0.0439, "step": 75760 }, { "epoch": 0.809551792296597, "grad_norm": 4.74220085144043, "learning_rate": 9.843028597213806e-07, "loss": 0.044, "step": 75770 }, { "epoch": 0.8096586356108767, "grad_norm": 3.6579837799072266, "learning_rate": 9.842986827205884e-07, "loss": 0.0834, "step": 75780 }, { "epoch": 0.8097654789251563, "grad_norm": 6.38803768157959, "learning_rate": 9.842945051729863e-07, "loss": 0.064, "step": 75790 }, { "epoch": 0.8098723222394358, "grad_norm": 3.3609049320220947, "learning_rate": 9.842903270785792e-07, "loss": 0.1086, "step": 75800 }, { "epoch": 0.8099791655537155, "grad_norm": 4.8348870277404785, "learning_rate": 9.842861484373716e-07, "loss": 0.0536, "step": 75810 }, { "epoch": 0.8100860088679951, "grad_norm": 6.238564968109131, "learning_rate": 9.842819692493686e-07, "loss": 0.0894, "step": 75820 }, { "epoch": 0.8101928521822747, "grad_norm": 12.214879035949707, "learning_rate": 9.842777895145747e-07, "loss": 0.0352, "step": 75830 }, { "epoch": 0.8102996954965543, "grad_norm": 27.354976654052734, "learning_rate": 9.842736092329943e-07, "loss": 0.0156, "step": 75840 }, { "epoch": 0.8104065388108339, "grad_norm": 0.3933981657028198, "learning_rate": 9.842694284046328e-07, "loss": 0.0589, "step": 75850 }, { "epoch": 0.8105133821251135, "grad_norm": 4.653611660003662, "learning_rate": 9.842652470294944e-07, "loss": 0.0457, "step": 75860 }, { "epoch": 0.8106202254393932, "grad_norm": 2.50700044631958, "learning_rate": 9.84261065107584e-07, "loss": 0.0649, "step": 75870 }, { "epoch": 0.8107270687536727, "grad_norm": 6.873180866241455, "learning_rate": 9.842568826389063e-07, "loss": 0.0339, "step": 75880 }, { "epoch": 0.8108339120679523, "grad_norm": 8.676417350769043, "learning_rate": 9.84252699623466e-07, "loss": 0.0409, "step": 75890 }, { "epoch": 0.810940755382232, "grad_norm": 18.48773193359375, "learning_rate": 9.84248516061268e-07, "loss": 0.0662, "step": 75900 }, { "epoch": 0.8110475986965116, "grad_norm": 5.015176296234131, "learning_rate": 9.842443319523168e-07, "loss": 0.0386, "step": 75910 }, { "epoch": 0.8111544420107911, "grad_norm": 6.937308311462402, "learning_rate": 9.842401472966173e-07, "loss": 0.0641, "step": 75920 }, { "epoch": 0.8112612853250708, "grad_norm": 1.529335856437683, "learning_rate": 9.84235962094174e-07, "loss": 0.0299, "step": 75930 }, { "epoch": 0.8113681286393504, "grad_norm": 6.542335033416748, "learning_rate": 9.84231776344992e-07, "loss": 0.0875, "step": 75940 }, { "epoch": 0.81147497195363, "grad_norm": 2.872239828109741, "learning_rate": 9.842275900490754e-07, "loss": 0.0532, "step": 75950 }, { "epoch": 0.8115818152679096, "grad_norm": 2.757791519165039, "learning_rate": 9.842234032064295e-07, "loss": 0.0524, "step": 75960 }, { "epoch": 0.8116886585821892, "grad_norm": 0.27434414625167847, "learning_rate": 9.842192158170587e-07, "loss": 0.0354, "step": 75970 }, { "epoch": 0.8117955018964689, "grad_norm": 2.5594353675842285, "learning_rate": 9.84215027880968e-07, "loss": 0.0598, "step": 75980 }, { "epoch": 0.8119023452107484, "grad_norm": 8.206853866577148, "learning_rate": 9.84210839398162e-07, "loss": 0.0331, "step": 75990 }, { "epoch": 0.812009188525028, "grad_norm": 5.096264362335205, "learning_rate": 9.842066503686453e-07, "loss": 0.0572, "step": 76000 }, { "epoch": 0.8121160318393077, "grad_norm": 8.031249046325684, "learning_rate": 9.84202460792423e-07, "loss": 0.0956, "step": 76010 }, { "epoch": 0.8122228751535873, "grad_norm": 0.3205806016921997, "learning_rate": 9.841982706694994e-07, "loss": 0.0759, "step": 76020 }, { "epoch": 0.8123297184678668, "grad_norm": 4.4750847816467285, "learning_rate": 9.841940799998795e-07, "loss": 0.0496, "step": 76030 }, { "epoch": 0.8124365617821465, "grad_norm": 7.950380802154541, "learning_rate": 9.841898887835675e-07, "loss": 0.0476, "step": 76040 }, { "epoch": 0.8125434050964261, "grad_norm": 3.6937777996063232, "learning_rate": 9.84185697020569e-07, "loss": 0.0794, "step": 76050 }, { "epoch": 0.8126502484107057, "grad_norm": 4.655900478363037, "learning_rate": 9.841815047108881e-07, "loss": 0.065, "step": 76060 }, { "epoch": 0.8127570917249853, "grad_norm": 0.5313923358917236, "learning_rate": 9.8417731185453e-07, "loss": 0.0337, "step": 76070 }, { "epoch": 0.8128639350392649, "grad_norm": 10.697770118713379, "learning_rate": 9.84173118451499e-07, "loss": 0.0319, "step": 76080 }, { "epoch": 0.8129707783535445, "grad_norm": 3.3499391078948975, "learning_rate": 9.841689245018e-07, "loss": 0.025, "step": 76090 }, { "epoch": 0.8130776216678242, "grad_norm": 2.6322708129882812, "learning_rate": 9.84164730005438e-07, "loss": 0.0341, "step": 76100 }, { "epoch": 0.8131844649821037, "grad_norm": 0.09869703650474548, "learning_rate": 9.84160534962417e-07, "loss": 0.0472, "step": 76110 }, { "epoch": 0.8132913082963834, "grad_norm": 4.681544780731201, "learning_rate": 9.841563393727425e-07, "loss": 0.1047, "step": 76120 }, { "epoch": 0.813398151610663, "grad_norm": 0.6513119339942932, "learning_rate": 9.84152143236419e-07, "loss": 0.1013, "step": 76130 }, { "epoch": 0.8135049949249425, "grad_norm": 2.4874982833862305, "learning_rate": 9.841479465534512e-07, "loss": 0.0513, "step": 76140 }, { "epoch": 0.8136118382392222, "grad_norm": 10.992114067077637, "learning_rate": 9.841437493238436e-07, "loss": 0.0724, "step": 76150 }, { "epoch": 0.8137186815535018, "grad_norm": 7.526919841766357, "learning_rate": 9.841395515476013e-07, "loss": 0.1039, "step": 76160 }, { "epoch": 0.8138255248677814, "grad_norm": 2.5138111114501953, "learning_rate": 9.84135353224729e-07, "loss": 0.0738, "step": 76170 }, { "epoch": 0.813932368182061, "grad_norm": 6.529427528381348, "learning_rate": 9.841311543552312e-07, "loss": 0.0432, "step": 76180 }, { "epoch": 0.8140392114963406, "grad_norm": 2.324551582336426, "learning_rate": 9.841269549391128e-07, "loss": 0.0723, "step": 76190 }, { "epoch": 0.8141460548106202, "grad_norm": 1.6757681369781494, "learning_rate": 9.841227549763787e-07, "loss": 0.0721, "step": 76200 }, { "epoch": 0.8142528981248999, "grad_norm": 6.33344841003418, "learning_rate": 9.841185544670332e-07, "loss": 0.0462, "step": 76210 }, { "epoch": 0.8143597414391794, "grad_norm": 3.612961769104004, "learning_rate": 9.841143534110817e-07, "loss": 0.0357, "step": 76220 }, { "epoch": 0.814466584753459, "grad_norm": 1.8157752752304077, "learning_rate": 9.841101518085282e-07, "loss": 0.06, "step": 76230 }, { "epoch": 0.8145734280677387, "grad_norm": 0.7117887139320374, "learning_rate": 9.841059496593778e-07, "loss": 0.0962, "step": 76240 }, { "epoch": 0.8146802713820183, "grad_norm": 2.194922924041748, "learning_rate": 9.841017469636355e-07, "loss": 0.0485, "step": 76250 }, { "epoch": 0.8147871146962978, "grad_norm": 0.1251555234193802, "learning_rate": 9.840975437213056e-07, "loss": 0.0759, "step": 76260 }, { "epoch": 0.8148939580105775, "grad_norm": 2.4417247772216797, "learning_rate": 9.840933399323932e-07, "loss": 0.0753, "step": 76270 }, { "epoch": 0.8150008013248571, "grad_norm": 4.801278591156006, "learning_rate": 9.840891355969027e-07, "loss": 0.0306, "step": 76280 }, { "epoch": 0.8151076446391367, "grad_norm": 6.201228141784668, "learning_rate": 9.84084930714839e-07, "loss": 0.026, "step": 76290 }, { "epoch": 0.8152144879534163, "grad_norm": 3.6509039402008057, "learning_rate": 9.84080725286207e-07, "loss": 0.0221, "step": 76300 }, { "epoch": 0.8153213312676959, "grad_norm": 7.930644512176514, "learning_rate": 9.840765193110115e-07, "loss": 0.0717, "step": 76310 }, { "epoch": 0.8154281745819756, "grad_norm": 2.3988964557647705, "learning_rate": 9.840723127892568e-07, "loss": 0.0309, "step": 76320 }, { "epoch": 0.8155350178962552, "grad_norm": 6.866725921630859, "learning_rate": 9.84068105720948e-07, "loss": 0.0403, "step": 76330 }, { "epoch": 0.8156418612105347, "grad_norm": 21.319551467895508, "learning_rate": 9.840638981060898e-07, "loss": 0.1347, "step": 76340 }, { "epoch": 0.8157487045248144, "grad_norm": 0.1058952659368515, "learning_rate": 9.84059689944687e-07, "loss": 0.0184, "step": 76350 }, { "epoch": 0.815855547839094, "grad_norm": 6.779922962188721, "learning_rate": 9.84055481236744e-07, "loss": 0.0676, "step": 76360 }, { "epoch": 0.8159623911533735, "grad_norm": 3.2917680740356445, "learning_rate": 9.84051271982266e-07, "loss": 0.0486, "step": 76370 }, { "epoch": 0.8160692344676532, "grad_norm": 0.03555778041481972, "learning_rate": 9.840470621812577e-07, "loss": 0.0487, "step": 76380 }, { "epoch": 0.8161760777819328, "grad_norm": 2.6875672340393066, "learning_rate": 9.840428518337237e-07, "loss": 0.0593, "step": 76390 }, { "epoch": 0.8162829210962124, "grad_norm": 4.580925941467285, "learning_rate": 9.840386409396685e-07, "loss": 0.1175, "step": 76400 }, { "epoch": 0.816389764410492, "grad_norm": 0.146800696849823, "learning_rate": 9.840344294990975e-07, "loss": 0.0318, "step": 76410 }, { "epoch": 0.8164966077247716, "grad_norm": 0.03754694387316704, "learning_rate": 9.84030217512015e-07, "loss": 0.0181, "step": 76420 }, { "epoch": 0.8166034510390512, "grad_norm": 3.4940102100372314, "learning_rate": 9.840260049784257e-07, "loss": 0.0719, "step": 76430 }, { "epoch": 0.8167102943533309, "grad_norm": 0.09687789529561996, "learning_rate": 9.840217918983347e-07, "loss": 0.0815, "step": 76440 }, { "epoch": 0.8168171376676104, "grad_norm": 1.7406764030456543, "learning_rate": 9.840175782717464e-07, "loss": 0.0443, "step": 76450 }, { "epoch": 0.81692398098189, "grad_norm": 5.044529914855957, "learning_rate": 9.840133640986658e-07, "loss": 0.0347, "step": 76460 }, { "epoch": 0.8170308242961697, "grad_norm": 4.959123611450195, "learning_rate": 9.840091493790977e-07, "loss": 0.0254, "step": 76470 }, { "epoch": 0.8171376676104493, "grad_norm": 5.523759365081787, "learning_rate": 9.840049341130464e-07, "loss": 0.0467, "step": 76480 }, { "epoch": 0.8172445109247289, "grad_norm": 2.8292531967163086, "learning_rate": 9.840007183005174e-07, "loss": 0.0696, "step": 76490 }, { "epoch": 0.8173513542390085, "grad_norm": 3.2011895179748535, "learning_rate": 9.839965019415147e-07, "loss": 0.0321, "step": 76500 }, { "epoch": 0.8174581975532881, "grad_norm": 14.874895095825195, "learning_rate": 9.839922850360439e-07, "loss": 0.1127, "step": 76510 }, { "epoch": 0.8175650408675678, "grad_norm": 0.780813455581665, "learning_rate": 9.839880675841089e-07, "loss": 0.0518, "step": 76520 }, { "epoch": 0.8176718841818473, "grad_norm": 1.756955623626709, "learning_rate": 9.839838495857148e-07, "loss": 0.0828, "step": 76530 }, { "epoch": 0.8177787274961269, "grad_norm": 5.2365593910217285, "learning_rate": 9.839796310408667e-07, "loss": 0.0627, "step": 76540 }, { "epoch": 0.8178855708104066, "grad_norm": 1.5890450477600098, "learning_rate": 9.83975411949569e-07, "loss": 0.0489, "step": 76550 }, { "epoch": 0.8179924141246862, "grad_norm": 2.591599702835083, "learning_rate": 9.839711923118264e-07, "loss": 0.0701, "step": 76560 }, { "epoch": 0.8180992574389657, "grad_norm": 6.1076836585998535, "learning_rate": 9.839669721276437e-07, "loss": 0.0246, "step": 76570 }, { "epoch": 0.8182061007532454, "grad_norm": 4.958249092102051, "learning_rate": 9.839627513970258e-07, "loss": 0.1271, "step": 76580 }, { "epoch": 0.818312944067525, "grad_norm": 4.647489547729492, "learning_rate": 9.839585301199777e-07, "loss": 0.1346, "step": 76590 }, { "epoch": 0.8184197873818045, "grad_norm": 21.326345443725586, "learning_rate": 9.839543082965038e-07, "loss": 0.195, "step": 76600 }, { "epoch": 0.8185266306960842, "grad_norm": 6.654753684997559, "learning_rate": 9.839500859266088e-07, "loss": 0.0551, "step": 76610 }, { "epoch": 0.8186334740103638, "grad_norm": 3.358752489089966, "learning_rate": 9.839458630102976e-07, "loss": 0.018, "step": 76620 }, { "epoch": 0.8187403173246434, "grad_norm": 0.023706655949354172, "learning_rate": 9.839416395475752e-07, "loss": 0.0336, "step": 76630 }, { "epoch": 0.818847160638923, "grad_norm": 4.957104682922363, "learning_rate": 9.83937415538446e-07, "loss": 0.0961, "step": 76640 }, { "epoch": 0.8189540039532026, "grad_norm": 3.7124509811401367, "learning_rate": 9.839331909829152e-07, "loss": 0.0398, "step": 76650 }, { "epoch": 0.8190608472674822, "grad_norm": 4.3419294357299805, "learning_rate": 9.83928965880987e-07, "loss": 0.1334, "step": 76660 }, { "epoch": 0.8191676905817619, "grad_norm": 3.147646903991699, "learning_rate": 9.839247402326666e-07, "loss": 0.0474, "step": 76670 }, { "epoch": 0.8192745338960414, "grad_norm": 4.432706356048584, "learning_rate": 9.839205140379586e-07, "loss": 0.0446, "step": 76680 }, { "epoch": 0.8193813772103211, "grad_norm": 2.189260244369507, "learning_rate": 9.839162872968678e-07, "loss": 0.0258, "step": 76690 }, { "epoch": 0.8194882205246007, "grad_norm": 2.3245275020599365, "learning_rate": 9.83912060009399e-07, "loss": 0.0304, "step": 76700 }, { "epoch": 0.8195950638388803, "grad_norm": 23.350236892700195, "learning_rate": 9.839078321755572e-07, "loss": 0.0563, "step": 76710 }, { "epoch": 0.8197019071531599, "grad_norm": 5.441501140594482, "learning_rate": 9.839036037953466e-07, "loss": 0.0598, "step": 76720 }, { "epoch": 0.8198087504674395, "grad_norm": 1.2214771509170532, "learning_rate": 9.838993748687725e-07, "loss": 0.0515, "step": 76730 }, { "epoch": 0.8199155937817191, "grad_norm": 4.960319995880127, "learning_rate": 9.838951453958393e-07, "loss": 0.0563, "step": 76740 }, { "epoch": 0.8200224370959988, "grad_norm": 0.10780107229948044, "learning_rate": 9.838909153765523e-07, "loss": 0.0303, "step": 76750 }, { "epoch": 0.8201292804102783, "grad_norm": 11.11605167388916, "learning_rate": 9.838866848109155e-07, "loss": 0.0247, "step": 76760 }, { "epoch": 0.8202361237245579, "grad_norm": 3.7272567749023438, "learning_rate": 9.838824536989343e-07, "loss": 0.022, "step": 76770 }, { "epoch": 0.8203429670388376, "grad_norm": 2.986937999725342, "learning_rate": 9.838782220406135e-07, "loss": 0.0238, "step": 76780 }, { "epoch": 0.8204498103531171, "grad_norm": 0.6331614255905151, "learning_rate": 9.838739898359574e-07, "loss": 0.0825, "step": 76790 }, { "epoch": 0.8205566536673967, "grad_norm": 7.885697364807129, "learning_rate": 9.83869757084971e-07, "loss": 0.0867, "step": 76800 }, { "epoch": 0.8206634969816764, "grad_norm": 0.13485336303710938, "learning_rate": 9.838655237876593e-07, "loss": 0.0575, "step": 76810 }, { "epoch": 0.820770340295956, "grad_norm": 11.292470932006836, "learning_rate": 9.83861289944027e-07, "loss": 0.0591, "step": 76820 }, { "epoch": 0.8208771836102355, "grad_norm": 2.005066394805908, "learning_rate": 9.838570555540785e-07, "loss": 0.0548, "step": 76830 }, { "epoch": 0.8209840269245152, "grad_norm": 3.267996072769165, "learning_rate": 9.838528206178192e-07, "loss": 0.0484, "step": 76840 }, { "epoch": 0.8210908702387948, "grad_norm": 3.810391426086426, "learning_rate": 9.838485851352533e-07, "loss": 0.0348, "step": 76850 }, { "epoch": 0.8211977135530745, "grad_norm": 4.3527445793151855, "learning_rate": 9.83844349106386e-07, "loss": 0.0369, "step": 76860 }, { "epoch": 0.821304556867354, "grad_norm": 10.216440200805664, "learning_rate": 9.838401125312216e-07, "loss": 0.0487, "step": 76870 }, { "epoch": 0.8214114001816336, "grad_norm": 5.466678619384766, "learning_rate": 9.838358754097655e-07, "loss": 0.0609, "step": 76880 }, { "epoch": 0.8215182434959133, "grad_norm": 8.074718475341797, "learning_rate": 9.838316377420221e-07, "loss": 0.0384, "step": 76890 }, { "epoch": 0.8216250868101929, "grad_norm": 1.1313962936401367, "learning_rate": 9.838273995279963e-07, "loss": 0.0492, "step": 76900 }, { "epoch": 0.8217319301244724, "grad_norm": 0.8898693919181824, "learning_rate": 9.838231607676926e-07, "loss": 0.0662, "step": 76910 }, { "epoch": 0.8218387734387521, "grad_norm": 0.9729078412055969, "learning_rate": 9.838189214611164e-07, "loss": 0.0298, "step": 76920 }, { "epoch": 0.8219456167530317, "grad_norm": 0.7568118572235107, "learning_rate": 9.83814681608272e-07, "loss": 0.0364, "step": 76930 }, { "epoch": 0.8220524600673113, "grad_norm": 0.07774972170591354, "learning_rate": 9.83810441209164e-07, "loss": 0.0216, "step": 76940 }, { "epoch": 0.8221593033815909, "grad_norm": 5.874301433563232, "learning_rate": 9.838062002637979e-07, "loss": 0.071, "step": 76950 }, { "epoch": 0.8222661466958705, "grad_norm": 2.324251890182495, "learning_rate": 9.83801958772178e-07, "loss": 0.0675, "step": 76960 }, { "epoch": 0.8223729900101501, "grad_norm": 8.05961799621582, "learning_rate": 9.83797716734309e-07, "loss": 0.0907, "step": 76970 }, { "epoch": 0.8224798333244298, "grad_norm": 3.8629469871520996, "learning_rate": 9.837934741501961e-07, "loss": 0.0551, "step": 76980 }, { "epoch": 0.8225866766387093, "grad_norm": 7.865749835968018, "learning_rate": 9.837892310198437e-07, "loss": 0.1166, "step": 76990 }, { "epoch": 0.8226935199529889, "grad_norm": 0.01882760226726532, "learning_rate": 9.837849873432566e-07, "loss": 0.0303, "step": 77000 }, { "epoch": 0.8228003632672686, "grad_norm": 6.73514986038208, "learning_rate": 9.8378074312044e-07, "loss": 0.032, "step": 77010 }, { "epoch": 0.8229072065815481, "grad_norm": 0.7516509294509888, "learning_rate": 9.837764983513983e-07, "loss": 0.0288, "step": 77020 }, { "epoch": 0.8230140498958277, "grad_norm": 10.661561012268066, "learning_rate": 9.837722530361366e-07, "loss": 0.0497, "step": 77030 }, { "epoch": 0.8231208932101074, "grad_norm": 3.9837448596954346, "learning_rate": 9.837680071746594e-07, "loss": 0.0701, "step": 77040 }, { "epoch": 0.823227736524387, "grad_norm": 9.695905685424805, "learning_rate": 9.837637607669714e-07, "loss": 0.0754, "step": 77050 }, { "epoch": 0.8233345798386666, "grad_norm": 0.01325257495045662, "learning_rate": 9.83759513813078e-07, "loss": 0.0229, "step": 77060 }, { "epoch": 0.8234414231529462, "grad_norm": 5.70453405380249, "learning_rate": 9.837552663129833e-07, "loss": 0.036, "step": 77070 }, { "epoch": 0.8235482664672258, "grad_norm": 1.0453060865402222, "learning_rate": 9.837510182666926e-07, "loss": 0.0372, "step": 77080 }, { "epoch": 0.8236551097815055, "grad_norm": 11.042989730834961, "learning_rate": 9.837467696742104e-07, "loss": 0.0319, "step": 77090 }, { "epoch": 0.823761953095785, "grad_norm": 2.9340384006500244, "learning_rate": 9.837425205355413e-07, "loss": 0.0144, "step": 77100 }, { "epoch": 0.8238687964100646, "grad_norm": 5.362502098083496, "learning_rate": 9.837382708506908e-07, "loss": 0.1513, "step": 77110 }, { "epoch": 0.8239756397243443, "grad_norm": 5.372032165527344, "learning_rate": 9.837340206196632e-07, "loss": 0.0607, "step": 77120 }, { "epoch": 0.8240824830386239, "grad_norm": 0.063614122569561, "learning_rate": 9.83729769842463e-07, "loss": 0.0526, "step": 77130 }, { "epoch": 0.8241893263529034, "grad_norm": 5.1000075340271, "learning_rate": 9.83725518519096e-07, "loss": 0.0709, "step": 77140 }, { "epoch": 0.8242961696671831, "grad_norm": 7.199287414550781, "learning_rate": 9.83721266649566e-07, "loss": 0.0418, "step": 77150 }, { "epoch": 0.8244030129814627, "grad_norm": 7.122185230255127, "learning_rate": 9.837170142338782e-07, "loss": 0.063, "step": 77160 }, { "epoch": 0.8245098562957422, "grad_norm": 4.92838716506958, "learning_rate": 9.837127612720376e-07, "loss": 0.0565, "step": 77170 }, { "epoch": 0.8246166996100219, "grad_norm": 3.8254079818725586, "learning_rate": 9.837085077640485e-07, "loss": 0.0221, "step": 77180 }, { "epoch": 0.8247235429243015, "grad_norm": 2.534904718399048, "learning_rate": 9.83704253709916e-07, "loss": 0.0349, "step": 77190 }, { "epoch": 0.8248303862385811, "grad_norm": 6.73873233795166, "learning_rate": 9.836999991096451e-07, "loss": 0.0728, "step": 77200 }, { "epoch": 0.8249372295528608, "grad_norm": 5.423922538757324, "learning_rate": 9.836957439632403e-07, "loss": 0.0702, "step": 77210 }, { "epoch": 0.8250440728671403, "grad_norm": 4.759479999542236, "learning_rate": 9.836914882707065e-07, "loss": 0.0338, "step": 77220 }, { "epoch": 0.82515091618142, "grad_norm": 4.771040439605713, "learning_rate": 9.836872320320484e-07, "loss": 0.0405, "step": 77230 }, { "epoch": 0.8252577594956996, "grad_norm": 4.756048679351807, "learning_rate": 9.836829752472711e-07, "loss": 0.0871, "step": 77240 }, { "epoch": 0.8253646028099791, "grad_norm": 7.824487209320068, "learning_rate": 9.83678717916379e-07, "loss": 0.0582, "step": 77250 }, { "epoch": 0.8254714461242588, "grad_norm": 2.9167699813842773, "learning_rate": 9.836744600393774e-07, "loss": 0.0326, "step": 77260 }, { "epoch": 0.8255782894385384, "grad_norm": 4.208913326263428, "learning_rate": 9.836702016162705e-07, "loss": 0.0583, "step": 77270 }, { "epoch": 0.825685132752818, "grad_norm": 2.307734966278076, "learning_rate": 9.836659426470636e-07, "loss": 0.0712, "step": 77280 }, { "epoch": 0.8257919760670976, "grad_norm": 5.960275650024414, "learning_rate": 9.836616831317615e-07, "loss": 0.0502, "step": 77290 }, { "epoch": 0.8258988193813772, "grad_norm": 21.511709213256836, "learning_rate": 9.836574230703688e-07, "loss": 0.0824, "step": 77300 }, { "epoch": 0.8260056626956568, "grad_norm": 1.1274845600128174, "learning_rate": 9.8365316246289e-07, "loss": 0.0251, "step": 77310 }, { "epoch": 0.8261125060099365, "grad_norm": 3.675053358078003, "learning_rate": 9.836489013093307e-07, "loss": 0.0417, "step": 77320 }, { "epoch": 0.826219349324216, "grad_norm": 3.8981518745422363, "learning_rate": 9.836446396096952e-07, "loss": 0.0496, "step": 77330 }, { "epoch": 0.8263261926384956, "grad_norm": 3.7906036376953125, "learning_rate": 9.836403773639883e-07, "loss": 0.1377, "step": 77340 }, { "epoch": 0.8264330359527753, "grad_norm": 1.9738589525222778, "learning_rate": 9.83636114572215e-07, "loss": 0.113, "step": 77350 }, { "epoch": 0.8265398792670549, "grad_norm": 0.2576870322227478, "learning_rate": 9.8363185123438e-07, "loss": 0.031, "step": 77360 }, { "epoch": 0.8266467225813344, "grad_norm": 16.95796012878418, "learning_rate": 9.83627587350488e-07, "loss": 0.1207, "step": 77370 }, { "epoch": 0.8267535658956141, "grad_norm": 3.798097610473633, "learning_rate": 9.83623322920544e-07, "loss": 0.031, "step": 77380 }, { "epoch": 0.8268604092098937, "grad_norm": 0.3384556472301483, "learning_rate": 9.83619057944553e-07, "loss": 0.0128, "step": 77390 }, { "epoch": 0.8269672525241732, "grad_norm": 4.592766284942627, "learning_rate": 9.836147924225194e-07, "loss": 0.0594, "step": 77400 }, { "epoch": 0.8270740958384529, "grad_norm": 0.5728756785392761, "learning_rate": 9.836105263544482e-07, "loss": 0.0245, "step": 77410 }, { "epoch": 0.8271809391527325, "grad_norm": 2.539541482925415, "learning_rate": 9.836062597403442e-07, "loss": 0.0297, "step": 77420 }, { "epoch": 0.8272877824670122, "grad_norm": 0.7799562215805054, "learning_rate": 9.836019925802123e-07, "loss": 0.024, "step": 77430 }, { "epoch": 0.8273946257812917, "grad_norm": 7.553918838500977, "learning_rate": 9.835977248740573e-07, "loss": 0.0704, "step": 77440 }, { "epoch": 0.8275014690955713, "grad_norm": 0.04813185706734657, "learning_rate": 9.83593456621884e-07, "loss": 0.0168, "step": 77450 }, { "epoch": 0.827608312409851, "grad_norm": 3.6819894313812256, "learning_rate": 9.83589187823697e-07, "loss": 0.0498, "step": 77460 }, { "epoch": 0.8277151557241306, "grad_norm": 1.1177250146865845, "learning_rate": 9.835849184795012e-07, "loss": 0.0584, "step": 77470 }, { "epoch": 0.8278219990384101, "grad_norm": 4.9643940925598145, "learning_rate": 9.835806485893019e-07, "loss": 0.0388, "step": 77480 }, { "epoch": 0.8279288423526898, "grad_norm": 0.6207778453826904, "learning_rate": 9.835763781531034e-07, "loss": 0.049, "step": 77490 }, { "epoch": 0.8280356856669694, "grad_norm": 0.23278379440307617, "learning_rate": 9.835721071709106e-07, "loss": 0.0417, "step": 77500 }, { "epoch": 0.828142528981249, "grad_norm": 1.4310994148254395, "learning_rate": 9.835678356427285e-07, "loss": 0.0742, "step": 77510 }, { "epoch": 0.8282493722955286, "grad_norm": 1.448041558265686, "learning_rate": 9.835635635685618e-07, "loss": 0.0282, "step": 77520 }, { "epoch": 0.8283562156098082, "grad_norm": 0.13187269866466522, "learning_rate": 9.83559290948415e-07, "loss": 0.0565, "step": 77530 }, { "epoch": 0.8284630589240878, "grad_norm": 0.621487557888031, "learning_rate": 9.835550177822938e-07, "loss": 0.0379, "step": 77540 }, { "epoch": 0.8285699022383675, "grad_norm": 2.160205364227295, "learning_rate": 9.835507440702021e-07, "loss": 0.0282, "step": 77550 }, { "epoch": 0.828676745552647, "grad_norm": 2.5222280025482178, "learning_rate": 9.835464698121455e-07, "loss": 0.0611, "step": 77560 }, { "epoch": 0.8287835888669266, "grad_norm": 3.385195255279541, "learning_rate": 9.83542195008128e-07, "loss": 0.0262, "step": 77570 }, { "epoch": 0.8288904321812063, "grad_norm": 2.5582175254821777, "learning_rate": 9.835379196581552e-07, "loss": 0.1112, "step": 77580 }, { "epoch": 0.8289972754954859, "grad_norm": 1.4066824913024902, "learning_rate": 9.835336437622316e-07, "loss": 0.0302, "step": 77590 }, { "epoch": 0.8291041188097655, "grad_norm": 2.212358236312866, "learning_rate": 9.835293673203617e-07, "loss": 0.0227, "step": 77600 }, { "epoch": 0.8292109621240451, "grad_norm": 0.9383023381233215, "learning_rate": 9.835250903325508e-07, "loss": 0.0413, "step": 77610 }, { "epoch": 0.8293178054383247, "grad_norm": 2.3805835247039795, "learning_rate": 9.835208127988036e-07, "loss": 0.0726, "step": 77620 }, { "epoch": 0.8294246487526044, "grad_norm": 0.08441180735826492, "learning_rate": 9.83516534719125e-07, "loss": 0.0827, "step": 77630 }, { "epoch": 0.8295314920668839, "grad_norm": 0.11441903561353683, "learning_rate": 9.835122560935196e-07, "loss": 0.0538, "step": 77640 }, { "epoch": 0.8296383353811635, "grad_norm": 4.259881973266602, "learning_rate": 9.835079769219925e-07, "loss": 0.0368, "step": 77650 }, { "epoch": 0.8297451786954432, "grad_norm": 1.6566500663757324, "learning_rate": 9.835036972045482e-07, "loss": 0.0273, "step": 77660 }, { "epoch": 0.8298520220097227, "grad_norm": 0.6861130595207214, "learning_rate": 9.83499416941192e-07, "loss": 0.022, "step": 77670 }, { "epoch": 0.8299588653240023, "grad_norm": 10.4451904296875, "learning_rate": 9.834951361319283e-07, "loss": 0.1236, "step": 77680 }, { "epoch": 0.830065708638282, "grad_norm": 0.4793764352798462, "learning_rate": 9.83490854776762e-07, "loss": 0.0455, "step": 77690 }, { "epoch": 0.8301725519525616, "grad_norm": 0.03826366364955902, "learning_rate": 9.834865728756984e-07, "loss": 0.0278, "step": 77700 }, { "epoch": 0.8302793952668411, "grad_norm": 8.402400016784668, "learning_rate": 9.834822904287417e-07, "loss": 0.0521, "step": 77710 }, { "epoch": 0.8303862385811208, "grad_norm": 0.2961188852787018, "learning_rate": 9.83478007435897e-07, "loss": 0.0118, "step": 77720 }, { "epoch": 0.8304930818954004, "grad_norm": 2.5055806636810303, "learning_rate": 9.834737238971693e-07, "loss": 0.0338, "step": 77730 }, { "epoch": 0.83059992520968, "grad_norm": 2.0153322219848633, "learning_rate": 9.834694398125631e-07, "loss": 0.0742, "step": 77740 }, { "epoch": 0.8307067685239596, "grad_norm": 5.776683807373047, "learning_rate": 9.834651551820836e-07, "loss": 0.0387, "step": 77750 }, { "epoch": 0.8308136118382392, "grad_norm": 0.6961976885795593, "learning_rate": 9.834608700057352e-07, "loss": 0.0731, "step": 77760 }, { "epoch": 0.8309204551525188, "grad_norm": 2.7385120391845703, "learning_rate": 9.834565842835231e-07, "loss": 0.0602, "step": 77770 }, { "epoch": 0.8310272984667985, "grad_norm": 0.812671422958374, "learning_rate": 9.83452298015452e-07, "loss": 0.0327, "step": 77780 }, { "epoch": 0.831134141781078, "grad_norm": 0.05734671652317047, "learning_rate": 9.834480112015269e-07, "loss": 0.024, "step": 77790 }, { "epoch": 0.8312409850953577, "grad_norm": 4.439445972442627, "learning_rate": 9.834437238417524e-07, "loss": 0.0571, "step": 77800 }, { "epoch": 0.8313478284096373, "grad_norm": 0.13941003382205963, "learning_rate": 9.834394359361336e-07, "loss": 0.0684, "step": 77810 }, { "epoch": 0.8314546717239168, "grad_norm": 5.3384108543396, "learning_rate": 9.83435147484675e-07, "loss": 0.0359, "step": 77820 }, { "epoch": 0.8315615150381965, "grad_norm": 0.3098452389240265, "learning_rate": 9.834308584873816e-07, "loss": 0.0373, "step": 77830 }, { "epoch": 0.8316683583524761, "grad_norm": 0.5727357268333435, "learning_rate": 9.834265689442584e-07, "loss": 0.0558, "step": 77840 }, { "epoch": 0.8317752016667557, "grad_norm": 4.96358060836792, "learning_rate": 9.8342227885531e-07, "loss": 0.0884, "step": 77850 }, { "epoch": 0.8318820449810354, "grad_norm": 6.940036296844482, "learning_rate": 9.834179882205416e-07, "loss": 0.0413, "step": 77860 }, { "epoch": 0.8319888882953149, "grad_norm": 12.399532318115234, "learning_rate": 9.834136970399575e-07, "loss": 0.035, "step": 77870 }, { "epoch": 0.8320957316095945, "grad_norm": 1.0158027410507202, "learning_rate": 9.83409405313563e-07, "loss": 0.0416, "step": 77880 }, { "epoch": 0.8322025749238742, "grad_norm": 7.599735260009766, "learning_rate": 9.83405113041363e-07, "loss": 0.0148, "step": 77890 }, { "epoch": 0.8323094182381537, "grad_norm": 0.8305882811546326, "learning_rate": 9.83400820223362e-07, "loss": 0.0472, "step": 77900 }, { "epoch": 0.8324162615524333, "grad_norm": 0.49478617310523987, "learning_rate": 9.833965268595647e-07, "loss": 0.0419, "step": 77910 }, { "epoch": 0.832523104866713, "grad_norm": 2.8281710147857666, "learning_rate": 9.833922329499766e-07, "loss": 0.0334, "step": 77920 }, { "epoch": 0.8326299481809926, "grad_norm": 0.9517900347709656, "learning_rate": 9.83387938494602e-07, "loss": 0.033, "step": 77930 }, { "epoch": 0.8327367914952721, "grad_norm": 3.6776039600372314, "learning_rate": 9.83383643493446e-07, "loss": 0.1487, "step": 77940 }, { "epoch": 0.8328436348095518, "grad_norm": 0.16580082476139069, "learning_rate": 9.833793479465133e-07, "loss": 0.0535, "step": 77950 }, { "epoch": 0.8329504781238314, "grad_norm": 1.3079155683517456, "learning_rate": 9.83375051853809e-07, "loss": 0.019, "step": 77960 }, { "epoch": 0.8330573214381111, "grad_norm": 4.355323314666748, "learning_rate": 9.833707552153377e-07, "loss": 0.1656, "step": 77970 }, { "epoch": 0.8331641647523906, "grad_norm": 1.5034124851226807, "learning_rate": 9.833664580311044e-07, "loss": 0.0392, "step": 77980 }, { "epoch": 0.8332710080666702, "grad_norm": 15.735550880432129, "learning_rate": 9.833621603011138e-07, "loss": 0.0676, "step": 77990 }, { "epoch": 0.8333778513809499, "grad_norm": 0.044465042650699615, "learning_rate": 9.833578620253708e-07, "loss": 0.0542, "step": 78000 }, { "epoch": 0.8334846946952295, "grad_norm": 0.7029580473899841, "learning_rate": 9.833535632038804e-07, "loss": 0.0174, "step": 78010 }, { "epoch": 0.833591538009509, "grad_norm": 4.241741180419922, "learning_rate": 9.833492638366473e-07, "loss": 0.1101, "step": 78020 }, { "epoch": 0.8336983813237887, "grad_norm": 7.674578666687012, "learning_rate": 9.833449639236764e-07, "loss": 0.0294, "step": 78030 }, { "epoch": 0.8338052246380683, "grad_norm": 0.2816876471042633, "learning_rate": 9.833406634649726e-07, "loss": 0.0441, "step": 78040 }, { "epoch": 0.8339120679523478, "grad_norm": 9.14945125579834, "learning_rate": 9.833363624605407e-07, "loss": 0.0603, "step": 78050 }, { "epoch": 0.8340189112666275, "grad_norm": 8.35700511932373, "learning_rate": 9.833320609103855e-07, "loss": 0.0298, "step": 78060 }, { "epoch": 0.8341257545809071, "grad_norm": 0.08062566816806793, "learning_rate": 9.833277588145118e-07, "loss": 0.1006, "step": 78070 }, { "epoch": 0.8342325978951867, "grad_norm": 2.6503031253814697, "learning_rate": 9.833234561729249e-07, "loss": 0.0575, "step": 78080 }, { "epoch": 0.8343394412094663, "grad_norm": 0.19296041131019592, "learning_rate": 9.833191529856291e-07, "loss": 0.0509, "step": 78090 }, { "epoch": 0.8344462845237459, "grad_norm": 4.904388427734375, "learning_rate": 9.833148492526294e-07, "loss": 0.0478, "step": 78100 }, { "epoch": 0.8345531278380255, "grad_norm": 0.1840846687555313, "learning_rate": 9.83310544973931e-07, "loss": 0.0326, "step": 78110 }, { "epoch": 0.8346599711523052, "grad_norm": 10.562145233154297, "learning_rate": 9.833062401495387e-07, "loss": 0.1003, "step": 78120 }, { "epoch": 0.8347668144665847, "grad_norm": 2.7629170417785645, "learning_rate": 9.833019347794569e-07, "loss": 0.0326, "step": 78130 }, { "epoch": 0.8348736577808643, "grad_norm": 4.942113876342773, "learning_rate": 9.832976288636907e-07, "loss": 0.0416, "step": 78140 }, { "epoch": 0.834980501095144, "grad_norm": 7.659608840942383, "learning_rate": 9.83293322402245e-07, "loss": 0.0283, "step": 78150 }, { "epoch": 0.8350873444094236, "grad_norm": 3.223613977432251, "learning_rate": 9.832890153951248e-07, "loss": 0.0277, "step": 78160 }, { "epoch": 0.8351941877237032, "grad_norm": 0.2567313611507416, "learning_rate": 9.832847078423347e-07, "loss": 0.0833, "step": 78170 }, { "epoch": 0.8353010310379828, "grad_norm": 0.006690627429634333, "learning_rate": 9.8328039974388e-07, "loss": 0.0418, "step": 78180 }, { "epoch": 0.8354078743522624, "grad_norm": 1.4293142557144165, "learning_rate": 9.832760910997648e-07, "loss": 0.025, "step": 78190 }, { "epoch": 0.8355147176665421, "grad_norm": 8.947951316833496, "learning_rate": 9.832717819099948e-07, "loss": 0.0999, "step": 78200 }, { "epoch": 0.8356215609808216, "grad_norm": 3.1891887187957764, "learning_rate": 9.832674721745743e-07, "loss": 0.0359, "step": 78210 }, { "epoch": 0.8357284042951012, "grad_norm": 6.6974663734436035, "learning_rate": 9.832631618935082e-07, "loss": 0.0774, "step": 78220 }, { "epoch": 0.8358352476093809, "grad_norm": 1.0936884880065918, "learning_rate": 9.832588510668016e-07, "loss": 0.045, "step": 78230 }, { "epoch": 0.8359420909236605, "grad_norm": 1.4947434663772583, "learning_rate": 9.832545396944596e-07, "loss": 0.0678, "step": 78240 }, { "epoch": 0.83604893423794, "grad_norm": 2.8286328315734863, "learning_rate": 9.832502277764863e-07, "loss": 0.0615, "step": 78250 }, { "epoch": 0.8361557775522197, "grad_norm": 0.45939478278160095, "learning_rate": 9.832459153128874e-07, "loss": 0.0395, "step": 78260 }, { "epoch": 0.8362626208664993, "grad_norm": 1.8669342994689941, "learning_rate": 9.832416023036671e-07, "loss": 0.0493, "step": 78270 }, { "epoch": 0.8363694641807788, "grad_norm": 14.265661239624023, "learning_rate": 9.832372887488308e-07, "loss": 0.0363, "step": 78280 }, { "epoch": 0.8364763074950585, "grad_norm": 2.167175054550171, "learning_rate": 9.83232974648383e-07, "loss": 0.0744, "step": 78290 }, { "epoch": 0.8365831508093381, "grad_norm": 0.05007980763912201, "learning_rate": 9.832286600023287e-07, "loss": 0.0147, "step": 78300 }, { "epoch": 0.8366899941236177, "grad_norm": 0.05481177940964699, "learning_rate": 9.832243448106728e-07, "loss": 0.075, "step": 78310 }, { "epoch": 0.8367968374378973, "grad_norm": 10.14416790008545, "learning_rate": 9.8322002907342e-07, "loss": 0.0623, "step": 78320 }, { "epoch": 0.8369036807521769, "grad_norm": 4.5282816886901855, "learning_rate": 9.832157127905754e-07, "loss": 0.0506, "step": 78330 }, { "epoch": 0.8370105240664566, "grad_norm": 2.443380117416382, "learning_rate": 9.83211395962144e-07, "loss": 0.0407, "step": 78340 }, { "epoch": 0.8371173673807362, "grad_norm": 13.061779022216797, "learning_rate": 9.832070785881301e-07, "loss": 0.0767, "step": 78350 }, { "epoch": 0.8372242106950157, "grad_norm": 6.186529636383057, "learning_rate": 9.832027606685392e-07, "loss": 0.0102, "step": 78360 }, { "epoch": 0.8373310540092954, "grad_norm": 4.012228965759277, "learning_rate": 9.831984422033758e-07, "loss": 0.0283, "step": 78370 }, { "epoch": 0.837437897323575, "grad_norm": 3.737344741821289, "learning_rate": 9.83194123192645e-07, "loss": 0.0613, "step": 78380 }, { "epoch": 0.8375447406378546, "grad_norm": 6.0701985359191895, "learning_rate": 9.831898036363514e-07, "loss": 0.059, "step": 78390 }, { "epoch": 0.8376515839521342, "grad_norm": 1.804724931716919, "learning_rate": 9.831854835345004e-07, "loss": 0.0181, "step": 78400 }, { "epoch": 0.8377584272664138, "grad_norm": 9.344862937927246, "learning_rate": 9.831811628870961e-07, "loss": 0.102, "step": 78410 }, { "epoch": 0.8378652705806934, "grad_norm": 2.2312090396881104, "learning_rate": 9.83176841694144e-07, "loss": 0.0643, "step": 78420 }, { "epoch": 0.8379721138949731, "grad_norm": 0.2806842625141144, "learning_rate": 9.83172519955649e-07, "loss": 0.0525, "step": 78430 }, { "epoch": 0.8380789572092526, "grad_norm": 2.2337839603424072, "learning_rate": 9.831681976716153e-07, "loss": 0.0385, "step": 78440 }, { "epoch": 0.8381858005235322, "grad_norm": 0.14931608736515045, "learning_rate": 9.831638748420484e-07, "loss": 0.0171, "step": 78450 }, { "epoch": 0.8382926438378119, "grad_norm": 0.4535174071788788, "learning_rate": 9.83159551466953e-07, "loss": 0.0777, "step": 78460 }, { "epoch": 0.8383994871520914, "grad_norm": 0.19635312259197235, "learning_rate": 9.831552275463342e-07, "loss": 0.0964, "step": 78470 }, { "epoch": 0.838506330466371, "grad_norm": 1.7635188102722168, "learning_rate": 9.831509030801965e-07, "loss": 0.0378, "step": 78480 }, { "epoch": 0.8386131737806507, "grad_norm": 3.6305594444274902, "learning_rate": 9.83146578068545e-07, "loss": 0.0736, "step": 78490 }, { "epoch": 0.8387200170949303, "grad_norm": 8.794336318969727, "learning_rate": 9.831422525113846e-07, "loss": 0.0386, "step": 78500 }, { "epoch": 0.8388268604092098, "grad_norm": 2.3000996112823486, "learning_rate": 9.8313792640872e-07, "loss": 0.0458, "step": 78510 }, { "epoch": 0.8389337037234895, "grad_norm": 2.8817453384399414, "learning_rate": 9.831335997605564e-07, "loss": 0.0415, "step": 78520 }, { "epoch": 0.8390405470377691, "grad_norm": 2.5432794094085693, "learning_rate": 9.831292725668984e-07, "loss": 0.0293, "step": 78530 }, { "epoch": 0.8391473903520488, "grad_norm": 0.4165109097957611, "learning_rate": 9.83124944827751e-07, "loss": 0.0418, "step": 78540 }, { "epoch": 0.8392542336663283, "grad_norm": 2.756343364715576, "learning_rate": 9.831206165431192e-07, "loss": 0.0263, "step": 78550 }, { "epoch": 0.8393610769806079, "grad_norm": 0.19140782952308655, "learning_rate": 9.831162877130076e-07, "loss": 0.0346, "step": 78560 }, { "epoch": 0.8394679202948876, "grad_norm": 4.9538726806640625, "learning_rate": 9.831119583374212e-07, "loss": 0.0629, "step": 78570 }, { "epoch": 0.8395747636091672, "grad_norm": 5.358476638793945, "learning_rate": 9.83107628416365e-07, "loss": 0.0504, "step": 78580 }, { "epoch": 0.8396816069234467, "grad_norm": 5.026293754577637, "learning_rate": 9.831032979498439e-07, "loss": 0.0389, "step": 78590 }, { "epoch": 0.8397884502377264, "grad_norm": 0.6575669050216675, "learning_rate": 9.830989669378626e-07, "loss": 0.0591, "step": 78600 }, { "epoch": 0.839895293552006, "grad_norm": 1.7456623315811157, "learning_rate": 9.830946353804263e-07, "loss": 0.0296, "step": 78610 }, { "epoch": 0.8400021368662856, "grad_norm": 8.755425453186035, "learning_rate": 9.830903032775395e-07, "loss": 0.114, "step": 78620 }, { "epoch": 0.8401089801805652, "grad_norm": 7.04033899307251, "learning_rate": 9.830859706292073e-07, "loss": 0.0718, "step": 78630 }, { "epoch": 0.8402158234948448, "grad_norm": 1.5370979309082031, "learning_rate": 9.830816374354348e-07, "loss": 0.0202, "step": 78640 }, { "epoch": 0.8403226668091244, "grad_norm": 1.3927212953567505, "learning_rate": 9.830773036962265e-07, "loss": 0.0445, "step": 78650 }, { "epoch": 0.840429510123404, "grad_norm": 2.8627586364746094, "learning_rate": 9.830729694115873e-07, "loss": 0.0541, "step": 78660 }, { "epoch": 0.8405363534376836, "grad_norm": 3.2974843978881836, "learning_rate": 9.830686345815225e-07, "loss": 0.0298, "step": 78670 }, { "epoch": 0.8406431967519632, "grad_norm": 17.633920669555664, "learning_rate": 9.830642992060367e-07, "loss": 0.1272, "step": 78680 }, { "epoch": 0.8407500400662429, "grad_norm": 5.64838171005249, "learning_rate": 9.830599632851349e-07, "loss": 0.0617, "step": 78690 }, { "epoch": 0.8408568833805224, "grad_norm": 6.577862739562988, "learning_rate": 9.830556268188218e-07, "loss": 0.0381, "step": 78700 }, { "epoch": 0.8409637266948021, "grad_norm": 7.956282615661621, "learning_rate": 9.830512898071025e-07, "loss": 0.0288, "step": 78710 }, { "epoch": 0.8410705700090817, "grad_norm": 7.192888259887695, "learning_rate": 9.830469522499819e-07, "loss": 0.1074, "step": 78720 }, { "epoch": 0.8411774133233613, "grad_norm": 3.7939157485961914, "learning_rate": 9.830426141474648e-07, "loss": 0.0495, "step": 78730 }, { "epoch": 0.841284256637641, "grad_norm": 1.418676733970642, "learning_rate": 9.83038275499556e-07, "loss": 0.0383, "step": 78740 }, { "epoch": 0.8413910999519205, "grad_norm": 0.3951783776283264, "learning_rate": 9.830339363062607e-07, "loss": 0.0483, "step": 78750 }, { "epoch": 0.8414979432662001, "grad_norm": 0.010776829905807972, "learning_rate": 9.830295965675836e-07, "loss": 0.078, "step": 78760 }, { "epoch": 0.8416047865804798, "grad_norm": 2.060105323791504, "learning_rate": 9.830252562835295e-07, "loss": 0.0281, "step": 78770 }, { "epoch": 0.8417116298947593, "grad_norm": 4.110933303833008, "learning_rate": 9.830209154541036e-07, "loss": 0.0779, "step": 78780 }, { "epoch": 0.8418184732090389, "grad_norm": 28.155195236206055, "learning_rate": 9.830165740793106e-07, "loss": 0.094, "step": 78790 }, { "epoch": 0.8419253165233186, "grad_norm": 0.046991970390081406, "learning_rate": 9.830122321591554e-07, "loss": 0.0266, "step": 78800 }, { "epoch": 0.8420321598375982, "grad_norm": 6.891066074371338, "learning_rate": 9.83007889693643e-07, "loss": 0.061, "step": 78810 }, { "epoch": 0.8421390031518777, "grad_norm": 3.64277720451355, "learning_rate": 9.830035466827781e-07, "loss": 0.0737, "step": 78820 }, { "epoch": 0.8422458464661574, "grad_norm": 2.4206268787384033, "learning_rate": 9.82999203126566e-07, "loss": 0.0441, "step": 78830 }, { "epoch": 0.842352689780437, "grad_norm": 8.714338302612305, "learning_rate": 9.82994859025011e-07, "loss": 0.0476, "step": 78840 }, { "epoch": 0.8424595330947166, "grad_norm": 15.819845199584961, "learning_rate": 9.829905143781185e-07, "loss": 0.0398, "step": 78850 }, { "epoch": 0.8425663764089962, "grad_norm": 0.8533859848976135, "learning_rate": 9.82986169185893e-07, "loss": 0.036, "step": 78860 }, { "epoch": 0.8426732197232758, "grad_norm": 4.893970966339111, "learning_rate": 9.829818234483402e-07, "loss": 0.0601, "step": 78870 }, { "epoch": 0.8427800630375554, "grad_norm": 3.5061211585998535, "learning_rate": 9.829774771654642e-07, "loss": 0.0441, "step": 78880 }, { "epoch": 0.842886906351835, "grad_norm": 19.104076385498047, "learning_rate": 9.829731303372702e-07, "loss": 0.0244, "step": 78890 }, { "epoch": 0.8429937496661146, "grad_norm": 0.0124284652993083, "learning_rate": 9.82968782963763e-07, "loss": 0.0377, "step": 78900 }, { "epoch": 0.8431005929803943, "grad_norm": 2.6136035919189453, "learning_rate": 9.829644350449476e-07, "loss": 0.0396, "step": 78910 }, { "epoch": 0.8432074362946739, "grad_norm": 5.864988327026367, "learning_rate": 9.82960086580829e-07, "loss": 0.0938, "step": 78920 }, { "epoch": 0.8433142796089534, "grad_norm": 0.0672781765460968, "learning_rate": 9.82955737571412e-07, "loss": 0.0661, "step": 78930 }, { "epoch": 0.8434211229232331, "grad_norm": 1.7667759656906128, "learning_rate": 9.829513880167014e-07, "loss": 0.0656, "step": 78940 }, { "epoch": 0.8435279662375127, "grad_norm": 29.535852432250977, "learning_rate": 9.829470379167025e-07, "loss": 0.0853, "step": 78950 }, { "epoch": 0.8436348095517923, "grad_norm": 1.0109211206436157, "learning_rate": 9.829426872714195e-07, "loss": 0.1582, "step": 78960 }, { "epoch": 0.843741652866072, "grad_norm": 0.07122774422168732, "learning_rate": 9.82938336080858e-07, "loss": 0.1098, "step": 78970 }, { "epoch": 0.8438484961803515, "grad_norm": 7.773120403289795, "learning_rate": 9.829339843450228e-07, "loss": 0.0498, "step": 78980 }, { "epoch": 0.8439553394946311, "grad_norm": 1.7889485359191895, "learning_rate": 9.829296320639187e-07, "loss": 0.0165, "step": 78990 }, { "epoch": 0.8440621828089108, "grad_norm": 5.590688228607178, "learning_rate": 9.829252792375504e-07, "loss": 0.0373, "step": 79000 }, { "epoch": 0.8441690261231903, "grad_norm": 7.594400405883789, "learning_rate": 9.82920925865923e-07, "loss": 0.017, "step": 79010 }, { "epoch": 0.8442758694374699, "grad_norm": 8.012165069580078, "learning_rate": 9.829165719490416e-07, "loss": 0.1022, "step": 79020 }, { "epoch": 0.8443827127517496, "grad_norm": 0.012953161261975765, "learning_rate": 9.829122174869109e-07, "loss": 0.0257, "step": 79030 }, { "epoch": 0.8444895560660292, "grad_norm": 3.9034430980682373, "learning_rate": 9.829078624795357e-07, "loss": 0.0406, "step": 79040 }, { "epoch": 0.8445963993803087, "grad_norm": 3.3533623218536377, "learning_rate": 9.829035069269212e-07, "loss": 0.0464, "step": 79050 }, { "epoch": 0.8447032426945884, "grad_norm": 7.000791549682617, "learning_rate": 9.82899150829072e-07, "loss": 0.1631, "step": 79060 }, { "epoch": 0.844810086008868, "grad_norm": 10.006515502929688, "learning_rate": 9.828947941859935e-07, "loss": 0.0329, "step": 79070 }, { "epoch": 0.8449169293231477, "grad_norm": 31.824132919311523, "learning_rate": 9.828904369976902e-07, "loss": 0.0431, "step": 79080 }, { "epoch": 0.8450237726374272, "grad_norm": 0.630737841129303, "learning_rate": 9.828860792641673e-07, "loss": 0.0378, "step": 79090 }, { "epoch": 0.8451306159517068, "grad_norm": 2.931330919265747, "learning_rate": 9.828817209854294e-07, "loss": 0.0156, "step": 79100 }, { "epoch": 0.8452374592659865, "grad_norm": 10.727128982543945, "learning_rate": 9.828773621614817e-07, "loss": 0.0835, "step": 79110 }, { "epoch": 0.845344302580266, "grad_norm": 8.885704040527344, "learning_rate": 9.828730027923289e-07, "loss": 0.1454, "step": 79120 }, { "epoch": 0.8454511458945456, "grad_norm": 3.2075610160827637, "learning_rate": 9.82868642877976e-07, "loss": 0.0348, "step": 79130 }, { "epoch": 0.8455579892088253, "grad_norm": 7.131420135498047, "learning_rate": 9.82864282418428e-07, "loss": 0.0648, "step": 79140 }, { "epoch": 0.8456648325231049, "grad_norm": 0.10730679333209991, "learning_rate": 9.8285992141369e-07, "loss": 0.0295, "step": 79150 }, { "epoch": 0.8457716758373844, "grad_norm": 5.364264488220215, "learning_rate": 9.828555598637665e-07, "loss": 0.0613, "step": 79160 }, { "epoch": 0.8458785191516641, "grad_norm": 0.113277368247509, "learning_rate": 9.828511977686627e-07, "loss": 0.0608, "step": 79170 }, { "epoch": 0.8459853624659437, "grad_norm": 4.839511871337891, "learning_rate": 9.828468351283834e-07, "loss": 0.0233, "step": 79180 }, { "epoch": 0.8460922057802233, "grad_norm": 18.4932918548584, "learning_rate": 9.828424719429335e-07, "loss": 0.0882, "step": 79190 }, { "epoch": 0.8461990490945029, "grad_norm": 5.445060729980469, "learning_rate": 9.828381082123182e-07, "loss": 0.0974, "step": 79200 }, { "epoch": 0.8463058924087825, "grad_norm": 3.4469518661499023, "learning_rate": 9.828337439365421e-07, "loss": 0.0303, "step": 79210 }, { "epoch": 0.8464127357230621, "grad_norm": 0.04729227349162102, "learning_rate": 9.828293791156104e-07, "loss": 0.0468, "step": 79220 }, { "epoch": 0.8465195790373418, "grad_norm": 5.295597076416016, "learning_rate": 9.828250137495277e-07, "loss": 0.117, "step": 79230 }, { "epoch": 0.8466264223516213, "grad_norm": 6.3846116065979, "learning_rate": 9.828206478382992e-07, "loss": 0.0266, "step": 79240 }, { "epoch": 0.8467332656659009, "grad_norm": 6.984841346740723, "learning_rate": 9.8281628138193e-07, "loss": 0.088, "step": 79250 }, { "epoch": 0.8468401089801806, "grad_norm": 9.771425247192383, "learning_rate": 9.828119143804243e-07, "loss": 0.0372, "step": 79260 }, { "epoch": 0.8469469522944602, "grad_norm": 2.2695865631103516, "learning_rate": 9.828075468337879e-07, "loss": 0.0302, "step": 79270 }, { "epoch": 0.8470537956087398, "grad_norm": 0.48830825090408325, "learning_rate": 9.82803178742025e-07, "loss": 0.0855, "step": 79280 }, { "epoch": 0.8471606389230194, "grad_norm": 0.5984625816345215, "learning_rate": 9.827988101051412e-07, "loss": 0.0349, "step": 79290 }, { "epoch": 0.847267482237299, "grad_norm": 2.1789793968200684, "learning_rate": 9.82794440923141e-07, "loss": 0.0415, "step": 79300 }, { "epoch": 0.8473743255515787, "grad_norm": 14.423985481262207, "learning_rate": 9.827900711960293e-07, "loss": 0.0772, "step": 79310 }, { "epoch": 0.8474811688658582, "grad_norm": 8.723679542541504, "learning_rate": 9.827857009238113e-07, "loss": 0.1414, "step": 79320 }, { "epoch": 0.8475880121801378, "grad_norm": 8.761364936828613, "learning_rate": 9.827813301064917e-07, "loss": 0.0423, "step": 79330 }, { "epoch": 0.8476948554944175, "grad_norm": 0.5610337257385254, "learning_rate": 9.827769587440757e-07, "loss": 0.0447, "step": 79340 }, { "epoch": 0.847801698808697, "grad_norm": 0.7121062874794006, "learning_rate": 9.82772586836568e-07, "loss": 0.0462, "step": 79350 }, { "epoch": 0.8479085421229766, "grad_norm": 3.5860562324523926, "learning_rate": 9.827682143839736e-07, "loss": 0.0401, "step": 79360 }, { "epoch": 0.8480153854372563, "grad_norm": 23.453777313232422, "learning_rate": 9.827638413862976e-07, "loss": 0.111, "step": 79370 }, { "epoch": 0.8481222287515359, "grad_norm": 1.985913872718811, "learning_rate": 9.827594678435446e-07, "loss": 0.0419, "step": 79380 }, { "epoch": 0.8482290720658154, "grad_norm": 6.033663749694824, "learning_rate": 9.827550937557198e-07, "loss": 0.0505, "step": 79390 }, { "epoch": 0.8483359153800951, "grad_norm": 16.04884147644043, "learning_rate": 9.82750719122828e-07, "loss": 0.0495, "step": 79400 }, { "epoch": 0.8484427586943747, "grad_norm": 1.333526372909546, "learning_rate": 9.827463439448742e-07, "loss": 0.0208, "step": 79410 }, { "epoch": 0.8485496020086543, "grad_norm": 0.022610289976000786, "learning_rate": 9.827419682218635e-07, "loss": 0.0526, "step": 79420 }, { "epoch": 0.8486564453229339, "grad_norm": 0.19808945059776306, "learning_rate": 9.827375919538006e-07, "loss": 0.0078, "step": 79430 }, { "epoch": 0.8487632886372135, "grad_norm": 0.2508186101913452, "learning_rate": 9.827332151406905e-07, "loss": 0.0429, "step": 79440 }, { "epoch": 0.8488701319514932, "grad_norm": 2.8383021354675293, "learning_rate": 9.82728837782538e-07, "loss": 0.0677, "step": 79450 }, { "epoch": 0.8489769752657728, "grad_norm": 6.472952365875244, "learning_rate": 9.827244598793486e-07, "loss": 0.0207, "step": 79460 }, { "epoch": 0.8490838185800523, "grad_norm": 0.265159547328949, "learning_rate": 9.827200814311265e-07, "loss": 0.0449, "step": 79470 }, { "epoch": 0.849190661894332, "grad_norm": 8.962322235107422, "learning_rate": 9.827157024378772e-07, "loss": 0.0446, "step": 79480 }, { "epoch": 0.8492975052086116, "grad_norm": 3.0570788383483887, "learning_rate": 9.827113228996054e-07, "loss": 0.0304, "step": 79490 }, { "epoch": 0.8494043485228912, "grad_norm": 5.263904571533203, "learning_rate": 9.827069428163158e-07, "loss": 0.0627, "step": 79500 }, { "epoch": 0.8495111918371708, "grad_norm": 4.779104709625244, "learning_rate": 9.827025621880141e-07, "loss": 0.0551, "step": 79510 }, { "epoch": 0.8496180351514504, "grad_norm": 5.287930488586426, "learning_rate": 9.826981810147045e-07, "loss": 0.0721, "step": 79520 }, { "epoch": 0.84972487846573, "grad_norm": 12.710418701171875, "learning_rate": 9.826937992963923e-07, "loss": 0.0457, "step": 79530 }, { "epoch": 0.8498317217800097, "grad_norm": 7.653957366943359, "learning_rate": 9.826894170330823e-07, "loss": 0.0404, "step": 79540 }, { "epoch": 0.8499385650942892, "grad_norm": 1.6371917724609375, "learning_rate": 9.826850342247795e-07, "loss": 0.031, "step": 79550 }, { "epoch": 0.8500454084085688, "grad_norm": 0.14638088643550873, "learning_rate": 9.82680650871489e-07, "loss": 0.0393, "step": 79560 }, { "epoch": 0.8501522517228485, "grad_norm": 1.0551683902740479, "learning_rate": 9.826762669732155e-07, "loss": 0.0169, "step": 79570 }, { "epoch": 0.850259095037128, "grad_norm": 1.6978416442871094, "learning_rate": 9.826718825299642e-07, "loss": 0.0277, "step": 79580 }, { "epoch": 0.8503659383514076, "grad_norm": 2.6082229614257812, "learning_rate": 9.826674975417397e-07, "loss": 0.0655, "step": 79590 }, { "epoch": 0.8504727816656873, "grad_norm": 0.6852821111679077, "learning_rate": 9.826631120085474e-07, "loss": 0.0825, "step": 79600 }, { "epoch": 0.8505796249799669, "grad_norm": 6.355146884918213, "learning_rate": 9.826587259303918e-07, "loss": 0.0964, "step": 79610 }, { "epoch": 0.8506864682942464, "grad_norm": 3.4078822135925293, "learning_rate": 9.826543393072781e-07, "loss": 0.0505, "step": 79620 }, { "epoch": 0.8507933116085261, "grad_norm": 0.24936030805110931, "learning_rate": 9.826499521392114e-07, "loss": 0.0241, "step": 79630 }, { "epoch": 0.8509001549228057, "grad_norm": 5.336216926574707, "learning_rate": 9.826455644261963e-07, "loss": 0.0294, "step": 79640 }, { "epoch": 0.8510069982370854, "grad_norm": 1.5184818506240845, "learning_rate": 9.82641176168238e-07, "loss": 0.0548, "step": 79650 }, { "epoch": 0.8511138415513649, "grad_norm": 0.11427605152130127, "learning_rate": 9.826367873653415e-07, "loss": 0.0297, "step": 79660 }, { "epoch": 0.8512206848656445, "grad_norm": 6.335838317871094, "learning_rate": 9.826323980175115e-07, "loss": 0.0671, "step": 79670 }, { "epoch": 0.8513275281799242, "grad_norm": 2.5372912883758545, "learning_rate": 9.82628008124753e-07, "loss": 0.0323, "step": 79680 }, { "epoch": 0.8514343714942038, "grad_norm": 1.9104371070861816, "learning_rate": 9.826236176870713e-07, "loss": 0.1036, "step": 79690 }, { "epoch": 0.8515412148084833, "grad_norm": 2.992614507675171, "learning_rate": 9.826192267044712e-07, "loss": 0.0285, "step": 79700 }, { "epoch": 0.851648058122763, "grad_norm": 0.9465147256851196, "learning_rate": 9.826148351769573e-07, "loss": 0.0589, "step": 79710 }, { "epoch": 0.8517549014370426, "grad_norm": 0.4518792927265167, "learning_rate": 9.826104431045349e-07, "loss": 0.0438, "step": 79720 }, { "epoch": 0.8518617447513221, "grad_norm": 1.6836597919464111, "learning_rate": 9.826060504872087e-07, "loss": 0.0466, "step": 79730 }, { "epoch": 0.8519685880656018, "grad_norm": 0.33878493309020996, "learning_rate": 9.826016573249842e-07, "loss": 0.0198, "step": 79740 }, { "epoch": 0.8520754313798814, "grad_norm": 11.172062873840332, "learning_rate": 9.825972636178657e-07, "loss": 0.0554, "step": 79750 }, { "epoch": 0.852182274694161, "grad_norm": 4.169539928436279, "learning_rate": 9.825928693658587e-07, "loss": 0.0766, "step": 79760 }, { "epoch": 0.8522891180084406, "grad_norm": 1.9143136739730835, "learning_rate": 9.825884745689677e-07, "loss": 0.0234, "step": 79770 }, { "epoch": 0.8523959613227202, "grad_norm": 4.9151129722595215, "learning_rate": 9.82584079227198e-07, "loss": 0.04, "step": 79780 }, { "epoch": 0.8525028046369998, "grad_norm": 4.168757915496826, "learning_rate": 9.825796833405545e-07, "loss": 0.0606, "step": 79790 }, { "epoch": 0.8526096479512795, "grad_norm": 4.597988128662109, "learning_rate": 9.825752869090422e-07, "loss": 0.0437, "step": 79800 }, { "epoch": 0.852716491265559, "grad_norm": 2.2441296577453613, "learning_rate": 9.825708899326657e-07, "loss": 0.0828, "step": 79810 }, { "epoch": 0.8528233345798387, "grad_norm": 0.9499688148498535, "learning_rate": 9.825664924114305e-07, "loss": 0.0192, "step": 79820 }, { "epoch": 0.8529301778941183, "grad_norm": 1.3722466230392456, "learning_rate": 9.825620943453411e-07, "loss": 0.0236, "step": 79830 }, { "epoch": 0.8530370212083979, "grad_norm": 0.14616447687149048, "learning_rate": 9.825576957344029e-07, "loss": 0.0494, "step": 79840 }, { "epoch": 0.8531438645226775, "grad_norm": 2.5344133377075195, "learning_rate": 9.825532965786206e-07, "loss": 0.0287, "step": 79850 }, { "epoch": 0.8532507078369571, "grad_norm": 0.25307804346084595, "learning_rate": 9.82548896877999e-07, "loss": 0.0258, "step": 79860 }, { "epoch": 0.8533575511512367, "grad_norm": 0.6215012073516846, "learning_rate": 9.825444966325434e-07, "loss": 0.0799, "step": 79870 }, { "epoch": 0.8534643944655164, "grad_norm": 8.576884269714355, "learning_rate": 9.825400958422586e-07, "loss": 0.0156, "step": 79880 }, { "epoch": 0.8535712377797959, "grad_norm": 2.948624610900879, "learning_rate": 9.825356945071498e-07, "loss": 0.0927, "step": 79890 }, { "epoch": 0.8536780810940755, "grad_norm": 0.014111185446381569, "learning_rate": 9.825312926272215e-07, "loss": 0.0454, "step": 79900 }, { "epoch": 0.8537849244083552, "grad_norm": 5.9827423095703125, "learning_rate": 9.825268902024792e-07, "loss": 0.109, "step": 79910 }, { "epoch": 0.8538917677226348, "grad_norm": 0.4766590893268585, "learning_rate": 9.825224872329276e-07, "loss": 0.046, "step": 79920 }, { "epoch": 0.8539986110369143, "grad_norm": 1.2660890817642212, "learning_rate": 9.825180837185717e-07, "loss": 0.0502, "step": 79930 }, { "epoch": 0.854105454351194, "grad_norm": 2.4322431087493896, "learning_rate": 9.825136796594163e-07, "loss": 0.0679, "step": 79940 }, { "epoch": 0.8542122976654736, "grad_norm": 4.614443302154541, "learning_rate": 9.825092750554667e-07, "loss": 0.0842, "step": 79950 }, { "epoch": 0.8543191409797531, "grad_norm": 5.250021457672119, "learning_rate": 9.825048699067275e-07, "loss": 0.0717, "step": 79960 }, { "epoch": 0.8544259842940328, "grad_norm": 5.5022993087768555, "learning_rate": 9.82500464213204e-07, "loss": 0.0313, "step": 79970 }, { "epoch": 0.8545328276083124, "grad_norm": 1.4650061130523682, "learning_rate": 9.824960579749012e-07, "loss": 0.0308, "step": 79980 }, { "epoch": 0.854639670922592, "grad_norm": 6.641465187072754, "learning_rate": 9.824916511918238e-07, "loss": 0.0482, "step": 79990 }, { "epoch": 0.8547465142368716, "grad_norm": 2.5907270908355713, "learning_rate": 9.82487243863977e-07, "loss": 0.041, "step": 80000 }, { "epoch": 0.8548533575511512, "grad_norm": 0.3158169090747833, "learning_rate": 9.824828359913655e-07, "loss": 0.0371, "step": 80010 }, { "epoch": 0.8549602008654309, "grad_norm": 10.381688117980957, "learning_rate": 9.824784275739945e-07, "loss": 0.0363, "step": 80020 }, { "epoch": 0.8550670441797105, "grad_norm": 4.0509233474731445, "learning_rate": 9.824740186118692e-07, "loss": 0.049, "step": 80030 }, { "epoch": 0.85517388749399, "grad_norm": 6.422720909118652, "learning_rate": 9.82469609104994e-07, "loss": 0.05, "step": 80040 }, { "epoch": 0.8552807308082697, "grad_norm": 2.380936622619629, "learning_rate": 9.824651990533743e-07, "loss": 0.0442, "step": 80050 }, { "epoch": 0.8553875741225493, "grad_norm": 1.6970462799072266, "learning_rate": 9.824607884570151e-07, "loss": 0.0802, "step": 80060 }, { "epoch": 0.8554944174368289, "grad_norm": 0.617483377456665, "learning_rate": 9.824563773159212e-07, "loss": 0.0539, "step": 80070 }, { "epoch": 0.8556012607511085, "grad_norm": 2.0694165229797363, "learning_rate": 9.824519656300975e-07, "loss": 0.043, "step": 80080 }, { "epoch": 0.8557081040653881, "grad_norm": 6.760987758636475, "learning_rate": 9.824475533995494e-07, "loss": 0.0618, "step": 80090 }, { "epoch": 0.8558149473796677, "grad_norm": 4.95552396774292, "learning_rate": 9.824431406242814e-07, "loss": 0.0531, "step": 80100 }, { "epoch": 0.8559217906939474, "grad_norm": 0.04591125249862671, "learning_rate": 9.824387273042986e-07, "loss": 0.0301, "step": 80110 }, { "epoch": 0.8560286340082269, "grad_norm": 0.48423007130622864, "learning_rate": 9.824343134396063e-07, "loss": 0.0357, "step": 80120 }, { "epoch": 0.8561354773225065, "grad_norm": 5.009514331817627, "learning_rate": 9.82429899030209e-07, "loss": 0.0341, "step": 80130 }, { "epoch": 0.8562423206367862, "grad_norm": 4.472187042236328, "learning_rate": 9.82425484076112e-07, "loss": 0.056, "step": 80140 }, { "epoch": 0.8563491639510658, "grad_norm": 1.5899767875671387, "learning_rate": 9.824210685773204e-07, "loss": 0.0283, "step": 80150 }, { "epoch": 0.8564560072653453, "grad_norm": 1.9715718030929565, "learning_rate": 9.824166525338389e-07, "loss": 0.0609, "step": 80160 }, { "epoch": 0.856562850579625, "grad_norm": 27.06756019592285, "learning_rate": 9.824122359456726e-07, "loss": 0.1298, "step": 80170 }, { "epoch": 0.8566696938939046, "grad_norm": 3.044725179672241, "learning_rate": 9.824078188128265e-07, "loss": 0.0459, "step": 80180 }, { "epoch": 0.8567765372081843, "grad_norm": 6.739738464355469, "learning_rate": 9.824034011353053e-07, "loss": 0.0554, "step": 80190 }, { "epoch": 0.8568833805224638, "grad_norm": 5.577783107757568, "learning_rate": 9.823989829131145e-07, "loss": 0.0709, "step": 80200 }, { "epoch": 0.8569902238367434, "grad_norm": 0.12339073419570923, "learning_rate": 9.823945641462588e-07, "loss": 0.0517, "step": 80210 }, { "epoch": 0.8570970671510231, "grad_norm": 0.3511351943016052, "learning_rate": 9.823901448347433e-07, "loss": 0.0867, "step": 80220 }, { "epoch": 0.8572039104653026, "grad_norm": 3.922956943511963, "learning_rate": 9.823857249785727e-07, "loss": 0.0444, "step": 80230 }, { "epoch": 0.8573107537795822, "grad_norm": 0.4404810965061188, "learning_rate": 9.823813045777525e-07, "loss": 0.0948, "step": 80240 }, { "epoch": 0.8574175970938619, "grad_norm": 1.70713472366333, "learning_rate": 9.823768836322872e-07, "loss": 0.0519, "step": 80250 }, { "epoch": 0.8575244404081415, "grad_norm": 6.3507981300354, "learning_rate": 9.82372462142182e-07, "loss": 0.1023, "step": 80260 }, { "epoch": 0.857631283722421, "grad_norm": 4.057175159454346, "learning_rate": 9.82368040107442e-07, "loss": 0.0188, "step": 80270 }, { "epoch": 0.8577381270367007, "grad_norm": 1.4055476188659668, "learning_rate": 9.82363617528072e-07, "loss": 0.0252, "step": 80280 }, { "epoch": 0.8578449703509803, "grad_norm": 0.31454482674598694, "learning_rate": 9.823591944040772e-07, "loss": 0.017, "step": 80290 }, { "epoch": 0.8579518136652599, "grad_norm": 0.14906401932239532, "learning_rate": 9.823547707354623e-07, "loss": 0.1023, "step": 80300 }, { "epoch": 0.8580586569795395, "grad_norm": 1.8824468851089478, "learning_rate": 9.823503465222324e-07, "loss": 0.0994, "step": 80310 }, { "epoch": 0.8581655002938191, "grad_norm": 5.586404800415039, "learning_rate": 9.823459217643927e-07, "loss": 0.0692, "step": 80320 }, { "epoch": 0.8582723436080987, "grad_norm": 3.1164541244506836, "learning_rate": 9.823414964619481e-07, "loss": 0.0304, "step": 80330 }, { "epoch": 0.8583791869223784, "grad_norm": 1.7461706399917603, "learning_rate": 9.823370706149033e-07, "loss": 0.0212, "step": 80340 }, { "epoch": 0.8584860302366579, "grad_norm": 1.7048509120941162, "learning_rate": 9.823326442232638e-07, "loss": 0.0726, "step": 80350 }, { "epoch": 0.8585928735509375, "grad_norm": 5.85764741897583, "learning_rate": 9.823282172870341e-07, "loss": 0.0593, "step": 80360 }, { "epoch": 0.8586997168652172, "grad_norm": 2.1697275638580322, "learning_rate": 9.823237898062196e-07, "loss": 0.0284, "step": 80370 }, { "epoch": 0.8588065601794967, "grad_norm": 9.063243865966797, "learning_rate": 9.823193617808252e-07, "loss": 0.062, "step": 80380 }, { "epoch": 0.8589134034937764, "grad_norm": 1.5969434976577759, "learning_rate": 9.823149332108557e-07, "loss": 0.0358, "step": 80390 }, { "epoch": 0.859020246808056, "grad_norm": 5.302501678466797, "learning_rate": 9.823105040963164e-07, "loss": 0.0715, "step": 80400 }, { "epoch": 0.8591270901223356, "grad_norm": 8.966384887695312, "learning_rate": 9.82306074437212e-07, "loss": 0.0671, "step": 80410 }, { "epoch": 0.8592339334366152, "grad_norm": 10.337442398071289, "learning_rate": 9.823016442335477e-07, "loss": 0.0326, "step": 80420 }, { "epoch": 0.8593407767508948, "grad_norm": 16.0092716217041, "learning_rate": 9.822972134853283e-07, "loss": 0.0667, "step": 80430 }, { "epoch": 0.8594476200651744, "grad_norm": 3.878296136856079, "learning_rate": 9.822927821925591e-07, "loss": 0.0547, "step": 80440 }, { "epoch": 0.8595544633794541, "grad_norm": 4.49561882019043, "learning_rate": 9.82288350355245e-07, "loss": 0.0659, "step": 80450 }, { "epoch": 0.8596613066937336, "grad_norm": 1.2944084405899048, "learning_rate": 9.822839179733909e-07, "loss": 0.0321, "step": 80460 }, { "epoch": 0.8597681500080132, "grad_norm": 0.034836865961551666, "learning_rate": 9.822794850470018e-07, "loss": 0.0609, "step": 80470 }, { "epoch": 0.8598749933222929, "grad_norm": 4.192441940307617, "learning_rate": 9.822750515760827e-07, "loss": 0.0305, "step": 80480 }, { "epoch": 0.8599818366365725, "grad_norm": 8.150152206420898, "learning_rate": 9.822706175606388e-07, "loss": 0.0855, "step": 80490 }, { "epoch": 0.860088679950852, "grad_norm": 3.0391461849212646, "learning_rate": 9.822661830006748e-07, "loss": 0.017, "step": 80500 }, { "epoch": 0.8601955232651317, "grad_norm": 3.3272900581359863, "learning_rate": 9.822617478961962e-07, "loss": 0.0318, "step": 80510 }, { "epoch": 0.8603023665794113, "grad_norm": 0.04427073150873184, "learning_rate": 9.822573122472073e-07, "loss": 0.0102, "step": 80520 }, { "epoch": 0.8604092098936909, "grad_norm": 10.254922866821289, "learning_rate": 9.822528760537138e-07, "loss": 0.0788, "step": 80530 }, { "epoch": 0.8605160532079705, "grad_norm": 10.082180976867676, "learning_rate": 9.822484393157203e-07, "loss": 0.0755, "step": 80540 }, { "epoch": 0.8606228965222501, "grad_norm": 0.04411105066537857, "learning_rate": 9.82244002033232e-07, "loss": 0.0139, "step": 80550 }, { "epoch": 0.8607297398365298, "grad_norm": 0.3704183101654053, "learning_rate": 9.822395642062536e-07, "loss": 0.0512, "step": 80560 }, { "epoch": 0.8608365831508094, "grad_norm": 4.646478652954102, "learning_rate": 9.822351258347905e-07, "loss": 0.0495, "step": 80570 }, { "epoch": 0.8609434264650889, "grad_norm": 1.587266206741333, "learning_rate": 9.822306869188474e-07, "loss": 0.0632, "step": 80580 }, { "epoch": 0.8610502697793686, "grad_norm": 2.901066541671753, "learning_rate": 9.822262474584298e-07, "loss": 0.0479, "step": 80590 }, { "epoch": 0.8611571130936482, "grad_norm": 16.397785186767578, "learning_rate": 9.82221807453542e-07, "loss": 0.0658, "step": 80600 }, { "epoch": 0.8612639564079277, "grad_norm": 1.3127988576889038, "learning_rate": 9.822173669041895e-07, "loss": 0.0246, "step": 80610 }, { "epoch": 0.8613707997222074, "grad_norm": 0.592758297920227, "learning_rate": 9.822129258103773e-07, "loss": 0.0317, "step": 80620 }, { "epoch": 0.861477643036487, "grad_norm": 7.285306453704834, "learning_rate": 9.822084841721103e-07, "loss": 0.0648, "step": 80630 }, { "epoch": 0.8615844863507666, "grad_norm": 1.728203296661377, "learning_rate": 9.822040419893934e-07, "loss": 0.0296, "step": 80640 }, { "epoch": 0.8616913296650462, "grad_norm": 0.7334738373756409, "learning_rate": 9.821995992622319e-07, "loss": 0.0663, "step": 80650 }, { "epoch": 0.8617981729793258, "grad_norm": 11.578062057495117, "learning_rate": 9.821951559906304e-07, "loss": 0.1163, "step": 80660 }, { "epoch": 0.8619050162936054, "grad_norm": 0.09826746582984924, "learning_rate": 9.821907121745945e-07, "loss": 0.0188, "step": 80670 }, { "epoch": 0.8620118596078851, "grad_norm": 6.657737731933594, "learning_rate": 9.821862678141286e-07, "loss": 0.0518, "step": 80680 }, { "epoch": 0.8621187029221646, "grad_norm": 9.234353065490723, "learning_rate": 9.821818229092382e-07, "loss": 0.0601, "step": 80690 }, { "epoch": 0.8622255462364442, "grad_norm": 0.32399287819862366, "learning_rate": 9.821773774599282e-07, "loss": 0.0552, "step": 80700 }, { "epoch": 0.8623323895507239, "grad_norm": 2.8074982166290283, "learning_rate": 9.821729314662034e-07, "loss": 0.0887, "step": 80710 }, { "epoch": 0.8624392328650035, "grad_norm": 8.684821128845215, "learning_rate": 9.82168484928069e-07, "loss": 0.0407, "step": 80720 }, { "epoch": 0.862546076179283, "grad_norm": 7.2120795249938965, "learning_rate": 9.8216403784553e-07, "loss": 0.1468, "step": 80730 }, { "epoch": 0.8626529194935627, "grad_norm": 7.5762410163879395, "learning_rate": 9.821595902185913e-07, "loss": 0.0544, "step": 80740 }, { "epoch": 0.8627597628078423, "grad_norm": 4.643422603607178, "learning_rate": 9.821551420472583e-07, "loss": 0.0902, "step": 80750 }, { "epoch": 0.862866606122122, "grad_norm": 4.132071495056152, "learning_rate": 9.821506933315355e-07, "loss": 0.0412, "step": 80760 }, { "epoch": 0.8629734494364015, "grad_norm": 2.1973843574523926, "learning_rate": 9.821462440714283e-07, "loss": 0.0425, "step": 80770 }, { "epoch": 0.8630802927506811, "grad_norm": 0.5946135520935059, "learning_rate": 9.821417942669415e-07, "loss": 0.0971, "step": 80780 }, { "epoch": 0.8631871360649608, "grad_norm": 0.8685376644134521, "learning_rate": 9.821373439180803e-07, "loss": 0.0523, "step": 80790 }, { "epoch": 0.8632939793792404, "grad_norm": 0.22290030121803284, "learning_rate": 9.821328930248497e-07, "loss": 0.0249, "step": 80800 }, { "epoch": 0.8634008226935199, "grad_norm": 0.18900007009506226, "learning_rate": 9.821284415872545e-07, "loss": 0.1076, "step": 80810 }, { "epoch": 0.8635076660077996, "grad_norm": 3.31657338142395, "learning_rate": 9.821239896053e-07, "loss": 0.0369, "step": 80820 }, { "epoch": 0.8636145093220792, "grad_norm": 1.6722970008850098, "learning_rate": 9.821195370789912e-07, "loss": 0.0613, "step": 80830 }, { "epoch": 0.8637213526363587, "grad_norm": 1.0239214897155762, "learning_rate": 9.82115084008333e-07, "loss": 0.0194, "step": 80840 }, { "epoch": 0.8638281959506384, "grad_norm": 6.674562931060791, "learning_rate": 9.821106303933307e-07, "loss": 0.0305, "step": 80850 }, { "epoch": 0.863935039264918, "grad_norm": 3.4473745822906494, "learning_rate": 9.821061762339889e-07, "loss": 0.0506, "step": 80860 }, { "epoch": 0.8640418825791976, "grad_norm": 2.4769952297210693, "learning_rate": 9.821017215303126e-07, "loss": 0.0731, "step": 80870 }, { "epoch": 0.8641487258934772, "grad_norm": 3.6221723556518555, "learning_rate": 9.820972662823075e-07, "loss": 0.0367, "step": 80880 }, { "epoch": 0.8642555692077568, "grad_norm": 0.10702955722808838, "learning_rate": 9.82092810489978e-07, "loss": 0.0235, "step": 80890 }, { "epoch": 0.8643624125220364, "grad_norm": 4.1957292556762695, "learning_rate": 9.820883541533293e-07, "loss": 0.0341, "step": 80900 }, { "epoch": 0.8644692558363161, "grad_norm": 2.0272107124328613, "learning_rate": 9.820838972723664e-07, "loss": 0.0126, "step": 80910 }, { "epoch": 0.8645760991505956, "grad_norm": 4.557272911071777, "learning_rate": 9.820794398470947e-07, "loss": 0.0264, "step": 80920 }, { "epoch": 0.8646829424648753, "grad_norm": 6.282866954803467, "learning_rate": 9.820749818775188e-07, "loss": 0.0845, "step": 80930 }, { "epoch": 0.8647897857791549, "grad_norm": 7.168159008026123, "learning_rate": 9.820705233636435e-07, "loss": 0.1034, "step": 80940 }, { "epoch": 0.8648966290934345, "grad_norm": 3.064159393310547, "learning_rate": 9.820660643054746e-07, "loss": 0.054, "step": 80950 }, { "epoch": 0.8650034724077141, "grad_norm": 4.0571746826171875, "learning_rate": 9.820616047030165e-07, "loss": 0.063, "step": 80960 }, { "epoch": 0.8651103157219937, "grad_norm": 5.8371806144714355, "learning_rate": 9.820571445562746e-07, "loss": 0.0252, "step": 80970 }, { "epoch": 0.8652171590362733, "grad_norm": 0.616450846195221, "learning_rate": 9.820526838652538e-07, "loss": 0.0139, "step": 80980 }, { "epoch": 0.865324002350553, "grad_norm": 7.496796607971191, "learning_rate": 9.820482226299593e-07, "loss": 0.0578, "step": 80990 }, { "epoch": 0.8654308456648325, "grad_norm": 0.6842488646507263, "learning_rate": 9.820437608503957e-07, "loss": 0.0293, "step": 81000 }, { "epoch": 0.8655376889791121, "grad_norm": 9.239485740661621, "learning_rate": 9.820392985265683e-07, "loss": 0.0935, "step": 81010 }, { "epoch": 0.8656445322933918, "grad_norm": 4.212435245513916, "learning_rate": 9.820348356584825e-07, "loss": 0.0762, "step": 81020 }, { "epoch": 0.8657513756076713, "grad_norm": 5.277173042297363, "learning_rate": 9.820303722461426e-07, "loss": 0.0535, "step": 81030 }, { "epoch": 0.8658582189219509, "grad_norm": 11.05827522277832, "learning_rate": 9.820259082895542e-07, "loss": 0.0649, "step": 81040 }, { "epoch": 0.8659650622362306, "grad_norm": 0.033349234610795975, "learning_rate": 9.820214437887222e-07, "loss": 0.1299, "step": 81050 }, { "epoch": 0.8660719055505102, "grad_norm": 4.0487213134765625, "learning_rate": 9.820169787436516e-07, "loss": 0.0223, "step": 81060 }, { "epoch": 0.8661787488647897, "grad_norm": 0.01746503822505474, "learning_rate": 9.820125131543472e-07, "loss": 0.0458, "step": 81070 }, { "epoch": 0.8662855921790694, "grad_norm": 1.3076658248901367, "learning_rate": 9.820080470208147e-07, "loss": 0.0413, "step": 81080 }, { "epoch": 0.866392435493349, "grad_norm": 0.4910069406032562, "learning_rate": 9.820035803430585e-07, "loss": 0.0996, "step": 81090 }, { "epoch": 0.8664992788076286, "grad_norm": 0.12280444800853729, "learning_rate": 9.81999113121084e-07, "loss": 0.0601, "step": 81100 }, { "epoch": 0.8666061221219082, "grad_norm": 2.5402770042419434, "learning_rate": 9.81994645354896e-07, "loss": 0.0848, "step": 81110 }, { "epoch": 0.8667129654361878, "grad_norm": 4.152318954467773, "learning_rate": 9.819901770444996e-07, "loss": 0.0331, "step": 81120 }, { "epoch": 0.8668198087504675, "grad_norm": 2.871241569519043, "learning_rate": 9.819857081899e-07, "loss": 0.0272, "step": 81130 }, { "epoch": 0.8669266520647471, "grad_norm": 0.49226757884025574, "learning_rate": 9.819812387911023e-07, "loss": 0.1056, "step": 81140 }, { "epoch": 0.8670334953790266, "grad_norm": 10.677475929260254, "learning_rate": 9.819767688481113e-07, "loss": 0.1157, "step": 81150 }, { "epoch": 0.8671403386933063, "grad_norm": 0.009411467239260674, "learning_rate": 9.819722983609321e-07, "loss": 0.056, "step": 81160 }, { "epoch": 0.8672471820075859, "grad_norm": 0.0770353376865387, "learning_rate": 9.819678273295699e-07, "loss": 0.0468, "step": 81170 }, { "epoch": 0.8673540253218655, "grad_norm": 0.26988935470581055, "learning_rate": 9.819633557540296e-07, "loss": 0.0283, "step": 81180 }, { "epoch": 0.8674608686361451, "grad_norm": 1.2010746002197266, "learning_rate": 9.819588836343162e-07, "loss": 0.1442, "step": 81190 }, { "epoch": 0.8675677119504247, "grad_norm": 1.7210983037948608, "learning_rate": 9.81954410970435e-07, "loss": 0.0224, "step": 81200 }, { "epoch": 0.8676745552647043, "grad_norm": 2.9451775550842285, "learning_rate": 9.819499377623907e-07, "loss": 0.0619, "step": 81210 }, { "epoch": 0.867781398578984, "grad_norm": 0.1025027185678482, "learning_rate": 9.819454640101888e-07, "loss": 0.0134, "step": 81220 }, { "epoch": 0.8678882418932635, "grad_norm": 0.009308830834925175, "learning_rate": 9.819409897138338e-07, "loss": 0.0672, "step": 81230 }, { "epoch": 0.8679950852075431, "grad_norm": 1.8810399770736694, "learning_rate": 9.819365148733314e-07, "loss": 0.0382, "step": 81240 }, { "epoch": 0.8681019285218228, "grad_norm": 2.385002374649048, "learning_rate": 9.81932039488686e-07, "loss": 0.1564, "step": 81250 }, { "epoch": 0.8682087718361023, "grad_norm": 0.06797873973846436, "learning_rate": 9.819275635599032e-07, "loss": 0.0344, "step": 81260 }, { "epoch": 0.8683156151503819, "grad_norm": 3.964533567428589, "learning_rate": 9.819230870869876e-07, "loss": 0.0384, "step": 81270 }, { "epoch": 0.8684224584646616, "grad_norm": 0.028465602546930313, "learning_rate": 9.819186100699446e-07, "loss": 0.0501, "step": 81280 }, { "epoch": 0.8685293017789412, "grad_norm": 5.652433395385742, "learning_rate": 9.81914132508779e-07, "loss": 0.0334, "step": 81290 }, { "epoch": 0.8686361450932208, "grad_norm": 0.171515092253685, "learning_rate": 9.81909654403496e-07, "loss": 0.0242, "step": 81300 }, { "epoch": 0.8687429884075004, "grad_norm": 12.116759300231934, "learning_rate": 9.819051757541006e-07, "loss": 0.105, "step": 81310 }, { "epoch": 0.86884983172178, "grad_norm": 3.6029508113861084, "learning_rate": 9.81900696560598e-07, "loss": 0.0546, "step": 81320 }, { "epoch": 0.8689566750360597, "grad_norm": 0.12033875286579132, "learning_rate": 9.818962168229932e-07, "loss": 0.0263, "step": 81330 }, { "epoch": 0.8690635183503392, "grad_norm": 1.5621569156646729, "learning_rate": 9.81891736541291e-07, "loss": 0.0637, "step": 81340 }, { "epoch": 0.8691703616646188, "grad_norm": 1.0229952335357666, "learning_rate": 9.818872557154967e-07, "loss": 0.0377, "step": 81350 }, { "epoch": 0.8692772049788985, "grad_norm": 1.5570943355560303, "learning_rate": 9.818827743456153e-07, "loss": 0.0975, "step": 81360 }, { "epoch": 0.8693840482931781, "grad_norm": 0.30812159180641174, "learning_rate": 9.81878292431652e-07, "loss": 0.0429, "step": 81370 }, { "epoch": 0.8694908916074576, "grad_norm": 0.12590928375720978, "learning_rate": 9.818738099736117e-07, "loss": 0.0315, "step": 81380 }, { "epoch": 0.8695977349217373, "grad_norm": 0.06056424602866173, "learning_rate": 9.818693269714995e-07, "loss": 0.0428, "step": 81390 }, { "epoch": 0.8697045782360169, "grad_norm": 15.419760704040527, "learning_rate": 9.818648434253206e-07, "loss": 0.0672, "step": 81400 }, { "epoch": 0.8698114215502964, "grad_norm": 2.8414783477783203, "learning_rate": 9.818603593350796e-07, "loss": 0.0519, "step": 81410 }, { "epoch": 0.8699182648645761, "grad_norm": 6.305028438568115, "learning_rate": 9.818558747007822e-07, "loss": 0.1028, "step": 81420 }, { "epoch": 0.8700251081788557, "grad_norm": 5.913963317871094, "learning_rate": 9.818513895224328e-07, "loss": 0.0948, "step": 81430 }, { "epoch": 0.8701319514931353, "grad_norm": 1.5025275945663452, "learning_rate": 9.81846903800037e-07, "loss": 0.0327, "step": 81440 }, { "epoch": 0.870238794807415, "grad_norm": 3.0900983810424805, "learning_rate": 9.818424175335997e-07, "loss": 0.0536, "step": 81450 }, { "epoch": 0.8703456381216945, "grad_norm": 0.24145402014255524, "learning_rate": 9.81837930723126e-07, "loss": 0.0804, "step": 81460 }, { "epoch": 0.8704524814359741, "grad_norm": 6.670109272003174, "learning_rate": 9.818334433686208e-07, "loss": 0.024, "step": 81470 }, { "epoch": 0.8705593247502538, "grad_norm": 1.5477206707000732, "learning_rate": 9.818289554700893e-07, "loss": 0.0224, "step": 81480 }, { "epoch": 0.8706661680645333, "grad_norm": 4.048922538757324, "learning_rate": 9.818244670275366e-07, "loss": 0.0339, "step": 81490 }, { "epoch": 0.870773011378813, "grad_norm": 0.3543923795223236, "learning_rate": 9.818199780409677e-07, "loss": 0.1113, "step": 81500 }, { "epoch": 0.8708798546930926, "grad_norm": 4.243800640106201, "learning_rate": 9.818154885103877e-07, "loss": 0.0394, "step": 81510 }, { "epoch": 0.8709866980073722, "grad_norm": 2.491851568222046, "learning_rate": 9.818109984358016e-07, "loss": 0.0274, "step": 81520 }, { "epoch": 0.8710935413216518, "grad_norm": 0.036630600690841675, "learning_rate": 9.818065078172147e-07, "loss": 0.0369, "step": 81530 }, { "epoch": 0.8712003846359314, "grad_norm": 2.054136037826538, "learning_rate": 9.818020166546317e-07, "loss": 0.062, "step": 81540 }, { "epoch": 0.871307227950211, "grad_norm": 5.680964469909668, "learning_rate": 9.817975249480577e-07, "loss": 0.0564, "step": 81550 }, { "epoch": 0.8714140712644907, "grad_norm": 0.11893706768751144, "learning_rate": 9.817930326974982e-07, "loss": 0.0505, "step": 81560 }, { "epoch": 0.8715209145787702, "grad_norm": 6.0760650634765625, "learning_rate": 9.81788539902958e-07, "loss": 0.0304, "step": 81570 }, { "epoch": 0.8716277578930498, "grad_norm": 0.9766935110092163, "learning_rate": 9.81784046564442e-07, "loss": 0.1182, "step": 81580 }, { "epoch": 0.8717346012073295, "grad_norm": 5.103899955749512, "learning_rate": 9.817795526819556e-07, "loss": 0.0671, "step": 81590 }, { "epoch": 0.871841444521609, "grad_norm": 2.8038344383239746, "learning_rate": 9.817750582555035e-07, "loss": 0.0422, "step": 81600 }, { "epoch": 0.8719482878358886, "grad_norm": 0.019332420080900192, "learning_rate": 9.817705632850913e-07, "loss": 0.0211, "step": 81610 }, { "epoch": 0.8720551311501683, "grad_norm": 4.340857028961182, "learning_rate": 9.817660677707237e-07, "loss": 0.0563, "step": 81620 }, { "epoch": 0.8721619744644479, "grad_norm": 3.380621910095215, "learning_rate": 9.817615717124056e-07, "loss": 0.0215, "step": 81630 }, { "epoch": 0.8722688177787274, "grad_norm": 7.2740068435668945, "learning_rate": 9.817570751101428e-07, "loss": 0.0675, "step": 81640 }, { "epoch": 0.8723756610930071, "grad_norm": 3.927597761154175, "learning_rate": 9.817525779639395e-07, "loss": 0.0299, "step": 81650 }, { "epoch": 0.8724825044072867, "grad_norm": 0.5414308905601501, "learning_rate": 9.817480802738013e-07, "loss": 0.006, "step": 81660 }, { "epoch": 0.8725893477215664, "grad_norm": 8.895709037780762, "learning_rate": 9.81743582039733e-07, "loss": 0.0371, "step": 81670 }, { "epoch": 0.872696191035846, "grad_norm": 2.492720127105713, "learning_rate": 9.8173908326174e-07, "loss": 0.0155, "step": 81680 }, { "epoch": 0.8728030343501255, "grad_norm": 1.9505360126495361, "learning_rate": 9.817345839398271e-07, "loss": 0.0464, "step": 81690 }, { "epoch": 0.8729098776644052, "grad_norm": 3.6899688243865967, "learning_rate": 9.81730084074e-07, "loss": 0.0403, "step": 81700 }, { "epoch": 0.8730167209786848, "grad_norm": 0.6357913017272949, "learning_rate": 9.817255836642625e-07, "loss": 0.031, "step": 81710 }, { "epoch": 0.8731235642929643, "grad_norm": 0.030355077236890793, "learning_rate": 9.817210827106211e-07, "loss": 0.0246, "step": 81720 }, { "epoch": 0.873230407607244, "grad_norm": 1.2551536560058594, "learning_rate": 9.817165812130798e-07, "loss": 0.0295, "step": 81730 }, { "epoch": 0.8733372509215236, "grad_norm": 1.3247488737106323, "learning_rate": 9.817120791716444e-07, "loss": 0.0432, "step": 81740 }, { "epoch": 0.8734440942358032, "grad_norm": 0.06265372037887573, "learning_rate": 9.817075765863196e-07, "loss": 0.0527, "step": 81750 }, { "epoch": 0.8735509375500828, "grad_norm": 3.717292070388794, "learning_rate": 9.817030734571105e-07, "loss": 0.0808, "step": 81760 }, { "epoch": 0.8736577808643624, "grad_norm": 1.078635334968567, "learning_rate": 9.816985697840226e-07, "loss": 0.0424, "step": 81770 }, { "epoch": 0.873764624178642, "grad_norm": 0.21550820767879486, "learning_rate": 9.816940655670602e-07, "loss": 0.0088, "step": 81780 }, { "epoch": 0.8738714674929217, "grad_norm": 0.0520813949406147, "learning_rate": 9.816895608062291e-07, "loss": 0.071, "step": 81790 }, { "epoch": 0.8739783108072012, "grad_norm": 2.4679884910583496, "learning_rate": 9.81685055501534e-07, "loss": 0.0669, "step": 81800 }, { "epoch": 0.8740851541214808, "grad_norm": 4.41923713684082, "learning_rate": 9.816805496529804e-07, "loss": 0.0617, "step": 81810 }, { "epoch": 0.8741919974357605, "grad_norm": 1.4525259733200073, "learning_rate": 9.81676043260573e-07, "loss": 0.049, "step": 81820 }, { "epoch": 0.87429884075004, "grad_norm": 2.126901149749756, "learning_rate": 9.816715363243168e-07, "loss": 0.0614, "step": 81830 }, { "epoch": 0.8744056840643196, "grad_norm": 0.33836111426353455, "learning_rate": 9.816670288442173e-07, "loss": 0.0362, "step": 81840 }, { "epoch": 0.8745125273785993, "grad_norm": 3.5442535877227783, "learning_rate": 9.816625208202793e-07, "loss": 0.0565, "step": 81850 }, { "epoch": 0.8746193706928789, "grad_norm": 8.684358596801758, "learning_rate": 9.81658012252508e-07, "loss": 0.0869, "step": 81860 }, { "epoch": 0.8747262140071586, "grad_norm": 0.6823225021362305, "learning_rate": 9.816535031409084e-07, "loss": 0.0596, "step": 81870 }, { "epoch": 0.8748330573214381, "grad_norm": 7.817875385284424, "learning_rate": 9.816489934854854e-07, "loss": 0.0618, "step": 81880 }, { "epoch": 0.8749399006357177, "grad_norm": 6.051729202270508, "learning_rate": 9.816444832862448e-07, "loss": 0.0692, "step": 81890 }, { "epoch": 0.8750467439499974, "grad_norm": 0.043820954859256744, "learning_rate": 9.816399725431908e-07, "loss": 0.0208, "step": 81900 }, { "epoch": 0.8751535872642769, "grad_norm": 3.0322084426879883, "learning_rate": 9.816354612563292e-07, "loss": 0.0582, "step": 81910 }, { "epoch": 0.8752604305785565, "grad_norm": 0.8627592325210571, "learning_rate": 9.816309494256647e-07, "loss": 0.0273, "step": 81920 }, { "epoch": 0.8753672738928362, "grad_norm": 4.523010730743408, "learning_rate": 9.816264370512026e-07, "loss": 0.0336, "step": 81930 }, { "epoch": 0.8754741172071158, "grad_norm": 4.990695476531982, "learning_rate": 9.816219241329477e-07, "loss": 0.062, "step": 81940 }, { "epoch": 0.8755809605213953, "grad_norm": 3.920604944229126, "learning_rate": 9.816174106709055e-07, "loss": 0.0411, "step": 81950 }, { "epoch": 0.875687803835675, "grad_norm": 7.925361633300781, "learning_rate": 9.81612896665081e-07, "loss": 0.0724, "step": 81960 }, { "epoch": 0.8757946471499546, "grad_norm": 1.3653390407562256, "learning_rate": 9.81608382115479e-07, "loss": 0.0134, "step": 81970 }, { "epoch": 0.8759014904642342, "grad_norm": 3.0007071495056152, "learning_rate": 9.816038670221048e-07, "loss": 0.0305, "step": 81980 }, { "epoch": 0.8760083337785138, "grad_norm": 1.1646103858947754, "learning_rate": 9.815993513849635e-07, "loss": 0.1032, "step": 81990 }, { "epoch": 0.8761151770927934, "grad_norm": 1.1109610795974731, "learning_rate": 9.815948352040602e-07, "loss": 0.0139, "step": 82000 }, { "epoch": 0.876222020407073, "grad_norm": 11.228967666625977, "learning_rate": 9.815903184794e-07, "loss": 0.0907, "step": 82010 }, { "epoch": 0.8763288637213527, "grad_norm": 2.5839121341705322, "learning_rate": 9.81585801210988e-07, "loss": 0.0499, "step": 82020 }, { "epoch": 0.8764357070356322, "grad_norm": 1.2353394031524658, "learning_rate": 9.81581283398829e-07, "loss": 0.0323, "step": 82030 }, { "epoch": 0.8765425503499119, "grad_norm": 4.901242733001709, "learning_rate": 9.815767650429286e-07, "loss": 0.0192, "step": 82040 }, { "epoch": 0.8766493936641915, "grad_norm": 3.6610195636749268, "learning_rate": 9.815722461432919e-07, "loss": 0.1194, "step": 82050 }, { "epoch": 0.876756236978471, "grad_norm": 1.0667694807052612, "learning_rate": 9.815677266999233e-07, "loss": 0.024, "step": 82060 }, { "epoch": 0.8768630802927507, "grad_norm": 0.016337020322680473, "learning_rate": 9.815632067128288e-07, "loss": 0.0868, "step": 82070 }, { "epoch": 0.8769699236070303, "grad_norm": 0.22985990345478058, "learning_rate": 9.815586861820129e-07, "loss": 0.0359, "step": 82080 }, { "epoch": 0.8770767669213099, "grad_norm": 12.510845184326172, "learning_rate": 9.81554165107481e-07, "loss": 0.0504, "step": 82090 }, { "epoch": 0.8771836102355896, "grad_norm": 0.3992727994918823, "learning_rate": 9.81549643489238e-07, "loss": 0.0422, "step": 82100 }, { "epoch": 0.8772904535498691, "grad_norm": 3.321727991104126, "learning_rate": 9.815451213272894e-07, "loss": 0.0529, "step": 82110 }, { "epoch": 0.8773972968641487, "grad_norm": 2.146843671798706, "learning_rate": 9.815405986216397e-07, "loss": 0.0646, "step": 82120 }, { "epoch": 0.8775041401784284, "grad_norm": 3.7706878185272217, "learning_rate": 9.815360753722944e-07, "loss": 0.028, "step": 82130 }, { "epoch": 0.8776109834927079, "grad_norm": 0.10872172564268112, "learning_rate": 9.815315515792586e-07, "loss": 0.0266, "step": 82140 }, { "epoch": 0.8777178268069875, "grad_norm": 0.03212239593267441, "learning_rate": 9.815270272425372e-07, "loss": 0.0573, "step": 82150 }, { "epoch": 0.8778246701212672, "grad_norm": 0.8030469417572021, "learning_rate": 9.815225023621355e-07, "loss": 0.0304, "step": 82160 }, { "epoch": 0.8779315134355468, "grad_norm": 3.336665391921997, "learning_rate": 9.815179769380587e-07, "loss": 0.0917, "step": 82170 }, { "epoch": 0.8780383567498263, "grad_norm": 0.08304227888584137, "learning_rate": 9.815134509703115e-07, "loss": 0.0803, "step": 82180 }, { "epoch": 0.878145200064106, "grad_norm": 1.1078944206237793, "learning_rate": 9.815089244588996e-07, "loss": 0.0167, "step": 82190 }, { "epoch": 0.8782520433783856, "grad_norm": 0.42500314116477966, "learning_rate": 9.815043974038274e-07, "loss": 0.0892, "step": 82200 }, { "epoch": 0.8783588866926652, "grad_norm": 0.11411149799823761, "learning_rate": 9.814998698051007e-07, "loss": 0.0287, "step": 82210 }, { "epoch": 0.8784657300069448, "grad_norm": 0.10315831005573273, "learning_rate": 9.814953416627243e-07, "loss": 0.022, "step": 82220 }, { "epoch": 0.8785725733212244, "grad_norm": 0.04110166057944298, "learning_rate": 9.814908129767033e-07, "loss": 0.063, "step": 82230 }, { "epoch": 0.8786794166355041, "grad_norm": 3.9433982372283936, "learning_rate": 9.814862837470426e-07, "loss": 0.0319, "step": 82240 }, { "epoch": 0.8787862599497837, "grad_norm": 2.256978988647461, "learning_rate": 9.814817539737476e-07, "loss": 0.0297, "step": 82250 }, { "epoch": 0.8788931032640632, "grad_norm": 5.752106666564941, "learning_rate": 9.814772236568236e-07, "loss": 0.0195, "step": 82260 }, { "epoch": 0.8789999465783429, "grad_norm": 0.25745266675949097, "learning_rate": 9.814726927962752e-07, "loss": 0.0835, "step": 82270 }, { "epoch": 0.8791067898926225, "grad_norm": 1.5330846309661865, "learning_rate": 9.81468161392108e-07, "loss": 0.0231, "step": 82280 }, { "epoch": 0.879213633206902, "grad_norm": 1.819749355316162, "learning_rate": 9.814636294443267e-07, "loss": 0.0335, "step": 82290 }, { "epoch": 0.8793204765211817, "grad_norm": 6.761951923370361, "learning_rate": 9.814590969529367e-07, "loss": 0.0749, "step": 82300 }, { "epoch": 0.8794273198354613, "grad_norm": 0.5924580097198486, "learning_rate": 9.814545639179432e-07, "loss": 0.0395, "step": 82310 }, { "epoch": 0.8795341631497409, "grad_norm": 9.950078010559082, "learning_rate": 9.81450030339351e-07, "loss": 0.0425, "step": 82320 }, { "epoch": 0.8796410064640205, "grad_norm": 0.14939701557159424, "learning_rate": 9.814454962171656e-07, "loss": 0.0679, "step": 82330 }, { "epoch": 0.8797478497783001, "grad_norm": 4.025454998016357, "learning_rate": 9.814409615513916e-07, "loss": 0.0368, "step": 82340 }, { "epoch": 0.8798546930925797, "grad_norm": 0.12340331822633743, "learning_rate": 9.814364263420347e-07, "loss": 0.0391, "step": 82350 }, { "epoch": 0.8799615364068594, "grad_norm": 11.913475036621094, "learning_rate": 9.814318905890997e-07, "loss": 0.0743, "step": 82360 }, { "epoch": 0.8800683797211389, "grad_norm": 7.708934783935547, "learning_rate": 9.814273542925914e-07, "loss": 0.0383, "step": 82370 }, { "epoch": 0.8801752230354185, "grad_norm": 0.09728079289197922, "learning_rate": 9.814228174525156e-07, "loss": 0.0453, "step": 82380 }, { "epoch": 0.8802820663496982, "grad_norm": 0.9665741324424744, "learning_rate": 9.81418280068877e-07, "loss": 0.0315, "step": 82390 }, { "epoch": 0.8803889096639778, "grad_norm": 0.0602828674018383, "learning_rate": 9.81413742141681e-07, "loss": 0.0478, "step": 82400 }, { "epoch": 0.8804957529782574, "grad_norm": 1.511669635772705, "learning_rate": 9.814092036709323e-07, "loss": 0.04, "step": 82410 }, { "epoch": 0.880602596292537, "grad_norm": 9.662002563476562, "learning_rate": 9.814046646566365e-07, "loss": 0.0306, "step": 82420 }, { "epoch": 0.8807094396068166, "grad_norm": 11.006587028503418, "learning_rate": 9.814001250987985e-07, "loss": 0.0671, "step": 82430 }, { "epoch": 0.8808162829210963, "grad_norm": 5.61132287979126, "learning_rate": 9.813955849974231e-07, "loss": 0.0413, "step": 82440 }, { "epoch": 0.8809231262353758, "grad_norm": 2.664918899536133, "learning_rate": 9.81391044352516e-07, "loss": 0.1081, "step": 82450 }, { "epoch": 0.8810299695496554, "grad_norm": 0.05200200155377388, "learning_rate": 9.81386503164082e-07, "loss": 0.0751, "step": 82460 }, { "epoch": 0.8811368128639351, "grad_norm": 4.291556358337402, "learning_rate": 9.813819614321265e-07, "loss": 0.0438, "step": 82470 }, { "epoch": 0.8812436561782147, "grad_norm": 0.9524321556091309, "learning_rate": 9.813774191566543e-07, "loss": 0.0428, "step": 82480 }, { "epoch": 0.8813504994924942, "grad_norm": 3.117710590362549, "learning_rate": 9.813728763376706e-07, "loss": 0.03, "step": 82490 }, { "epoch": 0.8814573428067739, "grad_norm": 0.7440118789672852, "learning_rate": 9.813683329751807e-07, "loss": 0.0712, "step": 82500 }, { "epoch": 0.8815641861210535, "grad_norm": 2.722478151321411, "learning_rate": 9.813637890691895e-07, "loss": 0.0961, "step": 82510 }, { "epoch": 0.881671029435333, "grad_norm": 7.533420085906982, "learning_rate": 9.813592446197022e-07, "loss": 0.0749, "step": 82520 }, { "epoch": 0.8817778727496127, "grad_norm": 3.5880138874053955, "learning_rate": 9.813546996267243e-07, "loss": 0.0246, "step": 82530 }, { "epoch": 0.8818847160638923, "grad_norm": 1.7577670812606812, "learning_rate": 9.813501540902603e-07, "loss": 0.1587, "step": 82540 }, { "epoch": 0.8819915593781719, "grad_norm": 1.037627100944519, "learning_rate": 9.813456080103157e-07, "loss": 0.0362, "step": 82550 }, { "epoch": 0.8820984026924515, "grad_norm": 0.060842424631118774, "learning_rate": 9.813410613868959e-07, "loss": 0.0338, "step": 82560 }, { "epoch": 0.8822052460067311, "grad_norm": 7.3912577629089355, "learning_rate": 9.813365142200055e-07, "loss": 0.0408, "step": 82570 }, { "epoch": 0.8823120893210107, "grad_norm": 0.09534552693367004, "learning_rate": 9.813319665096495e-07, "loss": 0.0156, "step": 82580 }, { "epoch": 0.8824189326352904, "grad_norm": 2.4342072010040283, "learning_rate": 9.813274182558338e-07, "loss": 0.0481, "step": 82590 }, { "epoch": 0.8825257759495699, "grad_norm": 11.495006561279297, "learning_rate": 9.81322869458563e-07, "loss": 0.1042, "step": 82600 }, { "epoch": 0.8826326192638496, "grad_norm": 4.235986709594727, "learning_rate": 9.813183201178424e-07, "loss": 0.0451, "step": 82610 }, { "epoch": 0.8827394625781292, "grad_norm": 0.12244726717472076, "learning_rate": 9.81313770233677e-07, "loss": 0.0295, "step": 82620 }, { "epoch": 0.8828463058924088, "grad_norm": 0.682129442691803, "learning_rate": 9.81309219806072e-07, "loss": 0.0771, "step": 82630 }, { "epoch": 0.8829531492066884, "grad_norm": 0.8631426095962524, "learning_rate": 9.813046688350326e-07, "loss": 0.0402, "step": 82640 }, { "epoch": 0.883059992520968, "grad_norm": 2.7687020301818848, "learning_rate": 9.81300117320564e-07, "loss": 0.0297, "step": 82650 }, { "epoch": 0.8831668358352476, "grad_norm": 1.662922739982605, "learning_rate": 9.812955652626714e-07, "loss": 0.0319, "step": 82660 }, { "epoch": 0.8832736791495273, "grad_norm": 5.08465576171875, "learning_rate": 9.812910126613593e-07, "loss": 0.0915, "step": 82670 }, { "epoch": 0.8833805224638068, "grad_norm": 14.98371410369873, "learning_rate": 9.812864595166337e-07, "loss": 0.0635, "step": 82680 }, { "epoch": 0.8834873657780864, "grad_norm": 1.6053457260131836, "learning_rate": 9.812819058284992e-07, "loss": 0.0428, "step": 82690 }, { "epoch": 0.8835942090923661, "grad_norm": 2.455970525741577, "learning_rate": 9.812773515969612e-07, "loss": 0.101, "step": 82700 }, { "epoch": 0.8837010524066456, "grad_norm": 8.475611686706543, "learning_rate": 9.812727968220248e-07, "loss": 0.0618, "step": 82710 }, { "epoch": 0.8838078957209252, "grad_norm": 4.837350368499756, "learning_rate": 9.812682415036948e-07, "loss": 0.0082, "step": 82720 }, { "epoch": 0.8839147390352049, "grad_norm": 0.10348540544509888, "learning_rate": 9.81263685641977e-07, "loss": 0.0923, "step": 82730 }, { "epoch": 0.8840215823494845, "grad_norm": 2.6121602058410645, "learning_rate": 9.812591292368758e-07, "loss": 0.0365, "step": 82740 }, { "epoch": 0.884128425663764, "grad_norm": 0.8363443613052368, "learning_rate": 9.812545722883971e-07, "loss": 0.0255, "step": 82750 }, { "epoch": 0.8842352689780437, "grad_norm": 1.0750691890716553, "learning_rate": 9.812500147965455e-07, "loss": 0.0247, "step": 82760 }, { "epoch": 0.8843421122923233, "grad_norm": 1.50007963180542, "learning_rate": 9.812454567613263e-07, "loss": 0.0397, "step": 82770 }, { "epoch": 0.884448955606603, "grad_norm": 14.80151653289795, "learning_rate": 9.812408981827445e-07, "loss": 0.0622, "step": 82780 }, { "epoch": 0.8845557989208825, "grad_norm": 0.02220247872173786, "learning_rate": 9.812363390608056e-07, "loss": 0.0255, "step": 82790 }, { "epoch": 0.8846626422351621, "grad_norm": 5.223860263824463, "learning_rate": 9.812317793955144e-07, "loss": 0.1055, "step": 82800 }, { "epoch": 0.8847694855494418, "grad_norm": 1.9989250898361206, "learning_rate": 9.812272191868764e-07, "loss": 0.0354, "step": 82810 }, { "epoch": 0.8848763288637214, "grad_norm": 5.553207874298096, "learning_rate": 9.812226584348966e-07, "loss": 0.1022, "step": 82820 }, { "epoch": 0.8849831721780009, "grad_norm": 10.954817771911621, "learning_rate": 9.812180971395797e-07, "loss": 0.0398, "step": 82830 }, { "epoch": 0.8850900154922806, "grad_norm": 10.465373992919922, "learning_rate": 9.812135353009316e-07, "loss": 0.0262, "step": 82840 }, { "epoch": 0.8851968588065602, "grad_norm": 0.22148911654949188, "learning_rate": 9.81208972918957e-07, "loss": 0.097, "step": 82850 }, { "epoch": 0.8853037021208398, "grad_norm": 5.549162864685059, "learning_rate": 9.812044099936614e-07, "loss": 0.0871, "step": 82860 }, { "epoch": 0.8854105454351194, "grad_norm": 0.32683271169662476, "learning_rate": 9.811998465250494e-07, "loss": 0.0557, "step": 82870 }, { "epoch": 0.885517388749399, "grad_norm": 1.8474880456924438, "learning_rate": 9.811952825131265e-07, "loss": 0.074, "step": 82880 }, { "epoch": 0.8856242320636786, "grad_norm": 2.223032236099243, "learning_rate": 9.811907179578978e-07, "loss": 0.061, "step": 82890 }, { "epoch": 0.8857310753779583, "grad_norm": 4.394400596618652, "learning_rate": 9.811861528593685e-07, "loss": 0.0497, "step": 82900 }, { "epoch": 0.8858379186922378, "grad_norm": 3.5836181640625, "learning_rate": 9.811815872175437e-07, "loss": 0.044, "step": 82910 }, { "epoch": 0.8859447620065174, "grad_norm": 6.430201053619385, "learning_rate": 9.811770210324287e-07, "loss": 0.0241, "step": 82920 }, { "epoch": 0.8860516053207971, "grad_norm": 2.1938931941986084, "learning_rate": 9.811724543040283e-07, "loss": 0.0372, "step": 82930 }, { "epoch": 0.8861584486350766, "grad_norm": 9.90753173828125, "learning_rate": 9.81167887032348e-07, "loss": 0.0727, "step": 82940 }, { "epoch": 0.8862652919493562, "grad_norm": 7.255556106567383, "learning_rate": 9.811633192173928e-07, "loss": 0.0347, "step": 82950 }, { "epoch": 0.8863721352636359, "grad_norm": 0.4413387179374695, "learning_rate": 9.811587508591679e-07, "loss": 0.052, "step": 82960 }, { "epoch": 0.8864789785779155, "grad_norm": 0.5525923371315002, "learning_rate": 9.811541819576785e-07, "loss": 0.0779, "step": 82970 }, { "epoch": 0.8865858218921951, "grad_norm": 4.36763858795166, "learning_rate": 9.811496125129297e-07, "loss": 0.0492, "step": 82980 }, { "epoch": 0.8866926652064747, "grad_norm": 0.0801929235458374, "learning_rate": 9.811450425249268e-07, "loss": 0.0317, "step": 82990 }, { "epoch": 0.8867995085207543, "grad_norm": 9.395809173583984, "learning_rate": 9.811404719936747e-07, "loss": 0.0499, "step": 83000 }, { "epoch": 0.886906351835034, "grad_norm": 9.429266929626465, "learning_rate": 9.811359009191786e-07, "loss": 0.0837, "step": 83010 }, { "epoch": 0.8870131951493135, "grad_norm": 1.2213828563690186, "learning_rate": 9.811313293014438e-07, "loss": 0.0309, "step": 83020 }, { "epoch": 0.8871200384635931, "grad_norm": 2.1108169555664062, "learning_rate": 9.811267571404755e-07, "loss": 0.0565, "step": 83030 }, { "epoch": 0.8872268817778728, "grad_norm": 14.13716983795166, "learning_rate": 9.811221844362787e-07, "loss": 0.0832, "step": 83040 }, { "epoch": 0.8873337250921524, "grad_norm": 8.339248657226562, "learning_rate": 9.811176111888588e-07, "loss": 0.0372, "step": 83050 }, { "epoch": 0.8874405684064319, "grad_norm": 5.7417216300964355, "learning_rate": 9.811130373982207e-07, "loss": 0.0341, "step": 83060 }, { "epoch": 0.8875474117207116, "grad_norm": 4.4565958976745605, "learning_rate": 9.811084630643695e-07, "loss": 0.0628, "step": 83070 }, { "epoch": 0.8876542550349912, "grad_norm": 1.857426404953003, "learning_rate": 9.811038881873109e-07, "loss": 0.0441, "step": 83080 }, { "epoch": 0.8877610983492707, "grad_norm": 0.13890455663204193, "learning_rate": 9.810993127670496e-07, "loss": 0.0162, "step": 83090 }, { "epoch": 0.8878679416635504, "grad_norm": 2.6232738494873047, "learning_rate": 9.810947368035905e-07, "loss": 0.026, "step": 83100 }, { "epoch": 0.88797478497783, "grad_norm": 0.3685496747493744, "learning_rate": 9.810901602969394e-07, "loss": 0.0776, "step": 83110 }, { "epoch": 0.8880816282921096, "grad_norm": 5.149410247802734, "learning_rate": 9.810855832471012e-07, "loss": 0.0266, "step": 83120 }, { "epoch": 0.8881884716063893, "grad_norm": 1.8594821691513062, "learning_rate": 9.810810056540812e-07, "loss": 0.042, "step": 83130 }, { "epoch": 0.8882953149206688, "grad_norm": 2.720766305923462, "learning_rate": 9.810764275178842e-07, "loss": 0.0555, "step": 83140 }, { "epoch": 0.8884021582349485, "grad_norm": 2.6865410804748535, "learning_rate": 9.810718488385158e-07, "loss": 0.0457, "step": 83150 }, { "epoch": 0.8885090015492281, "grad_norm": 0.7840230464935303, "learning_rate": 9.810672696159808e-07, "loss": 0.0241, "step": 83160 }, { "epoch": 0.8886158448635076, "grad_norm": 3.2585256099700928, "learning_rate": 9.810626898502848e-07, "loss": 0.0171, "step": 83170 }, { "epoch": 0.8887226881777873, "grad_norm": 6.065992832183838, "learning_rate": 9.810581095414323e-07, "loss": 0.019, "step": 83180 }, { "epoch": 0.8888295314920669, "grad_norm": 7.679259777069092, "learning_rate": 9.810535286894293e-07, "loss": 0.0367, "step": 83190 }, { "epoch": 0.8889363748063465, "grad_norm": 0.014302385039627552, "learning_rate": 9.810489472942804e-07, "loss": 0.057, "step": 83200 }, { "epoch": 0.8890432181206261, "grad_norm": 2.5970165729522705, "learning_rate": 9.810443653559907e-07, "loss": 0.0282, "step": 83210 }, { "epoch": 0.8891500614349057, "grad_norm": 8.871391296386719, "learning_rate": 9.81039782874566e-07, "loss": 0.0307, "step": 83220 }, { "epoch": 0.8892569047491853, "grad_norm": 0.04189891740679741, "learning_rate": 9.810351998500108e-07, "loss": 0.0933, "step": 83230 }, { "epoch": 0.889363748063465, "grad_norm": 3.6559088230133057, "learning_rate": 9.810306162823308e-07, "loss": 0.087, "step": 83240 }, { "epoch": 0.8894705913777445, "grad_norm": 1.637101411819458, "learning_rate": 9.810260321715307e-07, "loss": 0.0551, "step": 83250 }, { "epoch": 0.8895774346920241, "grad_norm": 3.525477647781372, "learning_rate": 9.810214475176159e-07, "loss": 0.0573, "step": 83260 }, { "epoch": 0.8896842780063038, "grad_norm": 1.247029423713684, "learning_rate": 9.810168623205918e-07, "loss": 0.0531, "step": 83270 }, { "epoch": 0.8897911213205834, "grad_norm": 11.343332290649414, "learning_rate": 9.81012276580463e-07, "loss": 0.1698, "step": 83280 }, { "epoch": 0.8898979646348629, "grad_norm": 0.2153632938861847, "learning_rate": 9.810076902972355e-07, "loss": 0.0323, "step": 83290 }, { "epoch": 0.8900048079491426, "grad_norm": 0.5469496250152588, "learning_rate": 9.810031034709138e-07, "loss": 0.0447, "step": 83300 }, { "epoch": 0.8901116512634222, "grad_norm": 5.400420665740967, "learning_rate": 9.809985161015033e-07, "loss": 0.0818, "step": 83310 }, { "epoch": 0.8902184945777017, "grad_norm": 0.062186211347579956, "learning_rate": 9.80993928189009e-07, "loss": 0.043, "step": 83320 }, { "epoch": 0.8903253378919814, "grad_norm": 7.551898002624512, "learning_rate": 9.809893397334365e-07, "loss": 0.0476, "step": 83330 }, { "epoch": 0.890432181206261, "grad_norm": 1.377524971961975, "learning_rate": 9.809847507347907e-07, "loss": 0.0553, "step": 83340 }, { "epoch": 0.8905390245205407, "grad_norm": 4.5479302406311035, "learning_rate": 9.809801611930766e-07, "loss": 0.0869, "step": 83350 }, { "epoch": 0.8906458678348202, "grad_norm": 0.12018892168998718, "learning_rate": 9.809755711083e-07, "loss": 0.0301, "step": 83360 }, { "epoch": 0.8907527111490998, "grad_norm": 0.03177223354578018, "learning_rate": 9.809709804804652e-07, "loss": 0.0451, "step": 83370 }, { "epoch": 0.8908595544633795, "grad_norm": 0.012082252651453018, "learning_rate": 9.80966389309578e-07, "loss": 0.0167, "step": 83380 }, { "epoch": 0.8909663977776591, "grad_norm": 2.097684144973755, "learning_rate": 9.809617975956438e-07, "loss": 0.0624, "step": 83390 }, { "epoch": 0.8910732410919386, "grad_norm": 0.16725260019302368, "learning_rate": 9.80957205338667e-07, "loss": 0.0268, "step": 83400 }, { "epoch": 0.8911800844062183, "grad_norm": 6.464435577392578, "learning_rate": 9.809526125386533e-07, "loss": 0.0364, "step": 83410 }, { "epoch": 0.8912869277204979, "grad_norm": 6.116786479949951, "learning_rate": 9.80948019195608e-07, "loss": 0.0936, "step": 83420 }, { "epoch": 0.8913937710347775, "grad_norm": 1.5084936618804932, "learning_rate": 9.80943425309536e-07, "loss": 0.032, "step": 83430 }, { "epoch": 0.8915006143490571, "grad_norm": 4.35378885269165, "learning_rate": 9.809388308804425e-07, "loss": 0.0322, "step": 83440 }, { "epoch": 0.8916074576633367, "grad_norm": 1.6975977420806885, "learning_rate": 9.809342359083327e-07, "loss": 0.0243, "step": 83450 }, { "epoch": 0.8917143009776163, "grad_norm": 0.09180951118469238, "learning_rate": 9.809296403932121e-07, "loss": 0.0179, "step": 83460 }, { "epoch": 0.891821144291896, "grad_norm": 1.6560783386230469, "learning_rate": 9.809250443350854e-07, "loss": 0.0281, "step": 83470 }, { "epoch": 0.8919279876061755, "grad_norm": 8.163701057434082, "learning_rate": 9.80920447733958e-07, "loss": 0.0596, "step": 83480 }, { "epoch": 0.8920348309204551, "grad_norm": 1.8067277669906616, "learning_rate": 9.809158505898353e-07, "loss": 0.0308, "step": 83490 }, { "epoch": 0.8921416742347348, "grad_norm": 0.6446200013160706, "learning_rate": 9.809112529027223e-07, "loss": 0.0752, "step": 83500 }, { "epoch": 0.8922485175490144, "grad_norm": 6.812621593475342, "learning_rate": 9.809066546726242e-07, "loss": 0.0713, "step": 83510 }, { "epoch": 0.892355360863294, "grad_norm": 1.9049098491668701, "learning_rate": 9.809020558995462e-07, "loss": 0.0168, "step": 83520 }, { "epoch": 0.8924622041775736, "grad_norm": 4.708269119262695, "learning_rate": 9.808974565834933e-07, "loss": 0.1097, "step": 83530 }, { "epoch": 0.8925690474918532, "grad_norm": 5.100173473358154, "learning_rate": 9.80892856724471e-07, "loss": 0.0835, "step": 83540 }, { "epoch": 0.8926758908061329, "grad_norm": 1.0022666454315186, "learning_rate": 9.808882563224845e-07, "loss": 0.0958, "step": 83550 }, { "epoch": 0.8927827341204124, "grad_norm": 3.7711198329925537, "learning_rate": 9.808836553775388e-07, "loss": 0.0312, "step": 83560 }, { "epoch": 0.892889577434692, "grad_norm": 8.584184646606445, "learning_rate": 9.80879053889639e-07, "loss": 0.0571, "step": 83570 }, { "epoch": 0.8929964207489717, "grad_norm": 1.266956090927124, "learning_rate": 9.808744518587906e-07, "loss": 0.031, "step": 83580 }, { "epoch": 0.8931032640632512, "grad_norm": 3.734384775161743, "learning_rate": 9.808698492849984e-07, "loss": 0.0281, "step": 83590 }, { "epoch": 0.8932101073775308, "grad_norm": 0.03526302054524422, "learning_rate": 9.80865246168268e-07, "loss": 0.0324, "step": 83600 }, { "epoch": 0.8933169506918105, "grad_norm": 3.483827590942383, "learning_rate": 9.808606425086047e-07, "loss": 0.0556, "step": 83610 }, { "epoch": 0.8934237940060901, "grad_norm": 13.922494888305664, "learning_rate": 9.80856038306013e-07, "loss": 0.0196, "step": 83620 }, { "epoch": 0.8935306373203696, "grad_norm": 6.851787567138672, "learning_rate": 9.80851433560499e-07, "loss": 0.0686, "step": 83630 }, { "epoch": 0.8936374806346493, "grad_norm": 5.578320503234863, "learning_rate": 9.808468282720672e-07, "loss": 0.0364, "step": 83640 }, { "epoch": 0.8937443239489289, "grad_norm": 6.344979763031006, "learning_rate": 9.80842222440723e-07, "loss": 0.0344, "step": 83650 }, { "epoch": 0.8938511672632085, "grad_norm": 14.286319732666016, "learning_rate": 9.808376160664718e-07, "loss": 0.105, "step": 83660 }, { "epoch": 0.8939580105774881, "grad_norm": 14.661221504211426, "learning_rate": 9.808330091493183e-07, "loss": 0.0777, "step": 83670 }, { "epoch": 0.8940648538917677, "grad_norm": 3.6203417778015137, "learning_rate": 9.808284016892684e-07, "loss": 0.0245, "step": 83680 }, { "epoch": 0.8941716972060473, "grad_norm": 14.408025741577148, "learning_rate": 9.808237936863269e-07, "loss": 0.1384, "step": 83690 }, { "epoch": 0.894278540520327, "grad_norm": 3.4477427005767822, "learning_rate": 9.80819185140499e-07, "loss": 0.0322, "step": 83700 }, { "epoch": 0.8943853838346065, "grad_norm": 3.0179927349090576, "learning_rate": 9.808145760517899e-07, "loss": 0.0715, "step": 83710 }, { "epoch": 0.8944922271488862, "grad_norm": 2.5795249938964844, "learning_rate": 9.808099664202047e-07, "loss": 0.0394, "step": 83720 }, { "epoch": 0.8945990704631658, "grad_norm": 0.08775745332241058, "learning_rate": 9.80805356245749e-07, "loss": 0.0329, "step": 83730 }, { "epoch": 0.8947059137774453, "grad_norm": 0.029154572635889053, "learning_rate": 9.808007455284277e-07, "loss": 0.0746, "step": 83740 }, { "epoch": 0.894812757091725, "grad_norm": 0.8128507137298584, "learning_rate": 9.807961342682462e-07, "loss": 0.0201, "step": 83750 }, { "epoch": 0.8949196004060046, "grad_norm": 0.20901265740394592, "learning_rate": 9.807915224652093e-07, "loss": 0.0705, "step": 83760 }, { "epoch": 0.8950264437202842, "grad_norm": 1.2874903678894043, "learning_rate": 9.807869101193227e-07, "loss": 0.0525, "step": 83770 }, { "epoch": 0.8951332870345639, "grad_norm": 0.05389520153403282, "learning_rate": 9.807822972305912e-07, "loss": 0.0322, "step": 83780 }, { "epoch": 0.8952401303488434, "grad_norm": 1.9267654418945312, "learning_rate": 9.807776837990204e-07, "loss": 0.0334, "step": 83790 }, { "epoch": 0.895346973663123, "grad_norm": 0.5714893341064453, "learning_rate": 9.807730698246152e-07, "loss": 0.0584, "step": 83800 }, { "epoch": 0.8954538169774027, "grad_norm": 1.4566751718521118, "learning_rate": 9.807684553073809e-07, "loss": 0.0251, "step": 83810 }, { "epoch": 0.8955606602916822, "grad_norm": 0.9043291211128235, "learning_rate": 9.80763840247323e-07, "loss": 0.0475, "step": 83820 }, { "epoch": 0.8956675036059618, "grad_norm": 2.7059261798858643, "learning_rate": 9.80759224644446e-07, "loss": 0.0456, "step": 83830 }, { "epoch": 0.8957743469202415, "grad_norm": 7.773372173309326, "learning_rate": 9.80754608498756e-07, "loss": 0.0699, "step": 83840 }, { "epoch": 0.8958811902345211, "grad_norm": 3.2833139896392822, "learning_rate": 9.807499918102575e-07, "loss": 0.0398, "step": 83850 }, { "epoch": 0.8959880335488006, "grad_norm": 10.34747314453125, "learning_rate": 9.80745374578956e-07, "loss": 0.0305, "step": 83860 }, { "epoch": 0.8960948768630803, "grad_norm": 0.7356820702552795, "learning_rate": 9.807407568048567e-07, "loss": 0.0438, "step": 83870 }, { "epoch": 0.8962017201773599, "grad_norm": 3.713608503341675, "learning_rate": 9.807361384879647e-07, "loss": 0.0443, "step": 83880 }, { "epoch": 0.8963085634916396, "grad_norm": 3.7237703800201416, "learning_rate": 9.807315196282855e-07, "loss": 0.1059, "step": 83890 }, { "epoch": 0.8964154068059191, "grad_norm": 0.01125018298625946, "learning_rate": 9.80726900225824e-07, "loss": 0.0564, "step": 83900 }, { "epoch": 0.8965222501201987, "grad_norm": 0.5903860926628113, "learning_rate": 9.807222802805855e-07, "loss": 0.0373, "step": 83910 }, { "epoch": 0.8966290934344784, "grad_norm": 3.483673334121704, "learning_rate": 9.807176597925755e-07, "loss": 0.0331, "step": 83920 }, { "epoch": 0.896735936748758, "grad_norm": 11.365525245666504, "learning_rate": 9.807130387617988e-07, "loss": 0.0538, "step": 83930 }, { "epoch": 0.8968427800630375, "grad_norm": 1.959876298904419, "learning_rate": 9.807084171882608e-07, "loss": 0.088, "step": 83940 }, { "epoch": 0.8969496233773172, "grad_norm": 8.132255554199219, "learning_rate": 9.807037950719668e-07, "loss": 0.0472, "step": 83950 }, { "epoch": 0.8970564666915968, "grad_norm": 0.5815686583518982, "learning_rate": 9.80699172412922e-07, "loss": 0.0606, "step": 83960 }, { "epoch": 0.8971633100058763, "grad_norm": 0.26664838194847107, "learning_rate": 9.806945492111314e-07, "loss": 0.0669, "step": 83970 }, { "epoch": 0.897270153320156, "grad_norm": 0.041378263384103775, "learning_rate": 9.806899254666003e-07, "loss": 0.0609, "step": 83980 }, { "epoch": 0.8973769966344356, "grad_norm": 3.5162205696105957, "learning_rate": 9.806853011793342e-07, "loss": 0.021, "step": 83990 }, { "epoch": 0.8974838399487152, "grad_norm": 4.719274520874023, "learning_rate": 9.80680676349338e-07, "loss": 0.0303, "step": 84000 }, { "epoch": 0.8975906832629948, "grad_norm": 0.05209500715136528, "learning_rate": 9.80676050976617e-07, "loss": 0.0466, "step": 84010 }, { "epoch": 0.8976975265772744, "grad_norm": 0.16126440465450287, "learning_rate": 9.806714250611765e-07, "loss": 0.0191, "step": 84020 }, { "epoch": 0.897804369891554, "grad_norm": 6.191399097442627, "learning_rate": 9.806667986030216e-07, "loss": 0.1417, "step": 84030 }, { "epoch": 0.8979112132058337, "grad_norm": 5.8261284828186035, "learning_rate": 9.80662171602158e-07, "loss": 0.0746, "step": 84040 }, { "epoch": 0.8980180565201132, "grad_norm": 0.7540369033813477, "learning_rate": 9.8065754405859e-07, "loss": 0.0377, "step": 84050 }, { "epoch": 0.8981248998343928, "grad_norm": 7.1644511222839355, "learning_rate": 9.806529159723237e-07, "loss": 0.033, "step": 84060 }, { "epoch": 0.8982317431486725, "grad_norm": 0.06415797770023346, "learning_rate": 9.80648287343364e-07, "loss": 0.0221, "step": 84070 }, { "epoch": 0.8983385864629521, "grad_norm": 5.3322038650512695, "learning_rate": 9.80643658171716e-07, "loss": 0.0416, "step": 84080 }, { "epoch": 0.8984454297772317, "grad_norm": 2.506964683532715, "learning_rate": 9.80639028457385e-07, "loss": 0.0954, "step": 84090 }, { "epoch": 0.8985522730915113, "grad_norm": 2.1106514930725098, "learning_rate": 9.806343982003763e-07, "loss": 0.0228, "step": 84100 }, { "epoch": 0.8986591164057909, "grad_norm": 4.536979675292969, "learning_rate": 9.80629767400695e-07, "loss": 0.0561, "step": 84110 }, { "epoch": 0.8987659597200706, "grad_norm": 3.955954074859619, "learning_rate": 9.806251360583465e-07, "loss": 0.0422, "step": 84120 }, { "epoch": 0.8988728030343501, "grad_norm": 0.06008835509419441, "learning_rate": 9.80620504173336e-07, "loss": 0.0386, "step": 84130 }, { "epoch": 0.8989796463486297, "grad_norm": 2.031949281692505, "learning_rate": 9.806158717456686e-07, "loss": 0.0728, "step": 84140 }, { "epoch": 0.8990864896629094, "grad_norm": 3.7549939155578613, "learning_rate": 9.806112387753495e-07, "loss": 0.0534, "step": 84150 }, { "epoch": 0.899193332977189, "grad_norm": 2.7082512378692627, "learning_rate": 9.806066052623842e-07, "loss": 0.0199, "step": 84160 }, { "epoch": 0.8993001762914685, "grad_norm": 3.8499701023101807, "learning_rate": 9.806019712067777e-07, "loss": 0.0262, "step": 84170 }, { "epoch": 0.8994070196057482, "grad_norm": 6.621857643127441, "learning_rate": 9.805973366085354e-07, "loss": 0.0512, "step": 84180 }, { "epoch": 0.8995138629200278, "grad_norm": 4.067039966583252, "learning_rate": 9.805927014676624e-07, "loss": 0.0258, "step": 84190 }, { "epoch": 0.8996207062343073, "grad_norm": 1.3309760093688965, "learning_rate": 9.805880657841638e-07, "loss": 0.0199, "step": 84200 }, { "epoch": 0.899727549548587, "grad_norm": 3.329284429550171, "learning_rate": 9.805834295580453e-07, "loss": 0.0652, "step": 84210 }, { "epoch": 0.8998343928628666, "grad_norm": 3.9890432357788086, "learning_rate": 9.805787927893116e-07, "loss": 0.0638, "step": 84220 }, { "epoch": 0.8999412361771462, "grad_norm": 4.051187038421631, "learning_rate": 9.805741554779683e-07, "loss": 0.0337, "step": 84230 }, { "epoch": 0.9000480794914258, "grad_norm": 1.9233962297439575, "learning_rate": 9.805695176240204e-07, "loss": 0.0671, "step": 84240 }, { "epoch": 0.9001549228057054, "grad_norm": 13.951915740966797, "learning_rate": 9.805648792274734e-07, "loss": 0.12, "step": 84250 }, { "epoch": 0.9002617661199851, "grad_norm": 11.056096076965332, "learning_rate": 9.80560240288332e-07, "loss": 0.0628, "step": 84260 }, { "epoch": 0.9003686094342647, "grad_norm": 2.1615443229675293, "learning_rate": 9.805556008066022e-07, "loss": 0.0208, "step": 84270 }, { "epoch": 0.9004754527485442, "grad_norm": 0.1337076872587204, "learning_rate": 9.805509607822889e-07, "loss": 0.0364, "step": 84280 }, { "epoch": 0.9005822960628239, "grad_norm": 0.2563164234161377, "learning_rate": 9.805463202153972e-07, "loss": 0.0482, "step": 84290 }, { "epoch": 0.9006891393771035, "grad_norm": 3.481356620788574, "learning_rate": 9.805416791059323e-07, "loss": 0.0347, "step": 84300 }, { "epoch": 0.9007959826913831, "grad_norm": 2.1558663845062256, "learning_rate": 9.805370374538997e-07, "loss": 0.0377, "step": 84310 }, { "epoch": 0.9009028260056627, "grad_norm": 8.762606620788574, "learning_rate": 9.805323952593046e-07, "loss": 0.1326, "step": 84320 }, { "epoch": 0.9010096693199423, "grad_norm": 10.072159767150879, "learning_rate": 9.805277525221518e-07, "loss": 0.0701, "step": 84330 }, { "epoch": 0.9011165126342219, "grad_norm": 0.029322661459445953, "learning_rate": 9.805231092424473e-07, "loss": 0.0279, "step": 84340 }, { "epoch": 0.9012233559485016, "grad_norm": 5.192680358886719, "learning_rate": 9.805184654201958e-07, "loss": 0.0282, "step": 84350 }, { "epoch": 0.9013301992627811, "grad_norm": 11.807268142700195, "learning_rate": 9.805138210554028e-07, "loss": 0.1046, "step": 84360 }, { "epoch": 0.9014370425770607, "grad_norm": 4.453531742095947, "learning_rate": 9.805091761480732e-07, "loss": 0.0503, "step": 84370 }, { "epoch": 0.9015438858913404, "grad_norm": 5.872878551483154, "learning_rate": 9.805045306982125e-07, "loss": 0.0678, "step": 84380 }, { "epoch": 0.90165072920562, "grad_norm": 4.2860307693481445, "learning_rate": 9.80499884705826e-07, "loss": 0.0526, "step": 84390 }, { "epoch": 0.9017575725198995, "grad_norm": 2.2602007389068604, "learning_rate": 9.804952381709188e-07, "loss": 0.0419, "step": 84400 }, { "epoch": 0.9018644158341792, "grad_norm": 11.722668647766113, "learning_rate": 9.804905910934964e-07, "loss": 0.0505, "step": 84410 }, { "epoch": 0.9019712591484588, "grad_norm": 5.596866130828857, "learning_rate": 9.804859434735637e-07, "loss": 0.057, "step": 84420 }, { "epoch": 0.9020781024627383, "grad_norm": 2.9902803897857666, "learning_rate": 9.804812953111262e-07, "loss": 0.0454, "step": 84430 }, { "epoch": 0.902184945777018, "grad_norm": 0.08686577528715134, "learning_rate": 9.804766466061888e-07, "loss": 0.0526, "step": 84440 }, { "epoch": 0.9022917890912976, "grad_norm": 4.8649821281433105, "learning_rate": 9.804719973587573e-07, "loss": 0.0881, "step": 84450 }, { "epoch": 0.9023986324055773, "grad_norm": 3.1884565353393555, "learning_rate": 9.804673475688366e-07, "loss": 0.089, "step": 84460 }, { "epoch": 0.9025054757198568, "grad_norm": 6.267464637756348, "learning_rate": 9.804626972364317e-07, "loss": 0.0802, "step": 84470 }, { "epoch": 0.9026123190341364, "grad_norm": 4.137696266174316, "learning_rate": 9.804580463615486e-07, "loss": 0.0472, "step": 84480 }, { "epoch": 0.9027191623484161, "grad_norm": 0.9734225869178772, "learning_rate": 9.80453394944192e-07, "loss": 0.0314, "step": 84490 }, { "epoch": 0.9028260056626957, "grad_norm": 5.94856071472168, "learning_rate": 9.80448742984367e-07, "loss": 0.0187, "step": 84500 }, { "epoch": 0.9029328489769752, "grad_norm": 12.246787071228027, "learning_rate": 9.804440904820793e-07, "loss": 0.1198, "step": 84510 }, { "epoch": 0.9030396922912549, "grad_norm": 8.582316398620605, "learning_rate": 9.804394374373339e-07, "loss": 0.041, "step": 84520 }, { "epoch": 0.9031465356055345, "grad_norm": 1.1797386407852173, "learning_rate": 9.804347838501363e-07, "loss": 0.0322, "step": 84530 }, { "epoch": 0.903253378919814, "grad_norm": 2.476215362548828, "learning_rate": 9.804301297204914e-07, "loss": 0.1313, "step": 84540 }, { "epoch": 0.9033602222340937, "grad_norm": 5.933868408203125, "learning_rate": 9.804254750484045e-07, "loss": 0.0702, "step": 84550 }, { "epoch": 0.9034670655483733, "grad_norm": 0.8084697723388672, "learning_rate": 9.804208198338813e-07, "loss": 0.0739, "step": 84560 }, { "epoch": 0.9035739088626529, "grad_norm": 8.486027717590332, "learning_rate": 9.804161640769264e-07, "loss": 0.0589, "step": 84570 }, { "epoch": 0.9036807521769326, "grad_norm": 0.9282877445220947, "learning_rate": 9.804115077775456e-07, "loss": 0.0425, "step": 84580 }, { "epoch": 0.9037875954912121, "grad_norm": 6.488675594329834, "learning_rate": 9.80406850935744e-07, "loss": 0.0435, "step": 84590 }, { "epoch": 0.9038944388054917, "grad_norm": 0.0775679424405098, "learning_rate": 9.804021935515266e-07, "loss": 0.0526, "step": 84600 }, { "epoch": 0.9040012821197714, "grad_norm": 0.38595473766326904, "learning_rate": 9.80397535624899e-07, "loss": 0.023, "step": 84610 }, { "epoch": 0.904108125434051, "grad_norm": 2.336148738861084, "learning_rate": 9.803928771558662e-07, "loss": 0.0431, "step": 84620 }, { "epoch": 0.9042149687483306, "grad_norm": 5.435112476348877, "learning_rate": 9.803882181444335e-07, "loss": 0.1018, "step": 84630 }, { "epoch": 0.9043218120626102, "grad_norm": 1.1945488452911377, "learning_rate": 9.803835585906067e-07, "loss": 0.0498, "step": 84640 }, { "epoch": 0.9044286553768898, "grad_norm": 0.5324157476425171, "learning_rate": 9.803788984943903e-07, "loss": 0.0125, "step": 84650 }, { "epoch": 0.9045354986911694, "grad_norm": 2.6623828411102295, "learning_rate": 9.803742378557898e-07, "loss": 0.047, "step": 84660 }, { "epoch": 0.904642342005449, "grad_norm": 18.138423919677734, "learning_rate": 9.803695766748107e-07, "loss": 0.1527, "step": 84670 }, { "epoch": 0.9047491853197286, "grad_norm": 7.351972579956055, "learning_rate": 9.803649149514579e-07, "loss": 0.0368, "step": 84680 }, { "epoch": 0.9048560286340083, "grad_norm": 0.3263464570045471, "learning_rate": 9.803602526857371e-07, "loss": 0.0492, "step": 84690 }, { "epoch": 0.9049628719482878, "grad_norm": 9.24791145324707, "learning_rate": 9.803555898776533e-07, "loss": 0.0196, "step": 84700 }, { "epoch": 0.9050697152625674, "grad_norm": 0.06820313632488251, "learning_rate": 9.803509265272116e-07, "loss": 0.0186, "step": 84710 }, { "epoch": 0.9051765585768471, "grad_norm": 9.699870109558105, "learning_rate": 9.803462626344174e-07, "loss": 0.0163, "step": 84720 }, { "epoch": 0.9052834018911267, "grad_norm": 0.058528684079647064, "learning_rate": 9.803415981992762e-07, "loss": 0.0517, "step": 84730 }, { "epoch": 0.9053902452054062, "grad_norm": 3.261645555496216, "learning_rate": 9.80336933221793e-07, "loss": 0.0347, "step": 84740 }, { "epoch": 0.9054970885196859, "grad_norm": 0.0915440171957016, "learning_rate": 9.803322677019733e-07, "loss": 0.0302, "step": 84750 }, { "epoch": 0.9056039318339655, "grad_norm": 3.914372682571411, "learning_rate": 9.80327601639822e-07, "loss": 0.0478, "step": 84760 }, { "epoch": 0.905710775148245, "grad_norm": 11.188352584838867, "learning_rate": 9.803229350353446e-07, "loss": 0.0794, "step": 84770 }, { "epoch": 0.9058176184625247, "grad_norm": 10.11027717590332, "learning_rate": 9.803182678885465e-07, "loss": 0.0677, "step": 84780 }, { "epoch": 0.9059244617768043, "grad_norm": 7.58537483215332, "learning_rate": 9.803136001994328e-07, "loss": 0.0655, "step": 84790 }, { "epoch": 0.9060313050910839, "grad_norm": 5.470611095428467, "learning_rate": 9.803089319680086e-07, "loss": 0.0355, "step": 84800 }, { "epoch": 0.9061381484053636, "grad_norm": 3.8479108810424805, "learning_rate": 9.803042631942798e-07, "loss": 0.0228, "step": 84810 }, { "epoch": 0.9062449917196431, "grad_norm": 3.537727117538452, "learning_rate": 9.802995938782508e-07, "loss": 0.0541, "step": 84820 }, { "epoch": 0.9063518350339228, "grad_norm": 4.31713342666626, "learning_rate": 9.802949240199275e-07, "loss": 0.0399, "step": 84830 }, { "epoch": 0.9064586783482024, "grad_norm": 7.631919860839844, "learning_rate": 9.80290253619315e-07, "loss": 0.1033, "step": 84840 }, { "epoch": 0.9065655216624819, "grad_norm": 5.1326904296875, "learning_rate": 9.802855826764184e-07, "loss": 0.0437, "step": 84850 }, { "epoch": 0.9066723649767616, "grad_norm": 3.3835196495056152, "learning_rate": 9.802809111912434e-07, "loss": 0.0632, "step": 84860 }, { "epoch": 0.9067792082910412, "grad_norm": 3.830124855041504, "learning_rate": 9.802762391637946e-07, "loss": 0.0756, "step": 84870 }, { "epoch": 0.9068860516053208, "grad_norm": 5.14511775970459, "learning_rate": 9.80271566594078e-07, "loss": 0.1461, "step": 84880 }, { "epoch": 0.9069928949196004, "grad_norm": 13.560511589050293, "learning_rate": 9.802668934820984e-07, "loss": 0.0815, "step": 84890 }, { "epoch": 0.90709973823388, "grad_norm": 9.187284469604492, "learning_rate": 9.802622198278614e-07, "loss": 0.0697, "step": 84900 }, { "epoch": 0.9072065815481596, "grad_norm": 3.961385726928711, "learning_rate": 9.80257545631372e-07, "loss": 0.0319, "step": 84910 }, { "epoch": 0.9073134248624393, "grad_norm": 2.402146577835083, "learning_rate": 9.802528708926356e-07, "loss": 0.065, "step": 84920 }, { "epoch": 0.9074202681767188, "grad_norm": 0.2441609650850296, "learning_rate": 9.802481956116575e-07, "loss": 0.0306, "step": 84930 }, { "epoch": 0.9075271114909984, "grad_norm": 7.087043285369873, "learning_rate": 9.802435197884428e-07, "loss": 0.0309, "step": 84940 }, { "epoch": 0.9076339548052781, "grad_norm": 2.025620460510254, "learning_rate": 9.80238843422997e-07, "loss": 0.036, "step": 84950 }, { "epoch": 0.9077407981195577, "grad_norm": 2.7560548782348633, "learning_rate": 9.802341665153253e-07, "loss": 0.0231, "step": 84960 }, { "epoch": 0.9078476414338372, "grad_norm": 0.03732756897807121, "learning_rate": 9.80229489065433e-07, "loss": 0.0333, "step": 84970 }, { "epoch": 0.9079544847481169, "grad_norm": 5.168607234954834, "learning_rate": 9.802248110733254e-07, "loss": 0.0329, "step": 84980 }, { "epoch": 0.9080613280623965, "grad_norm": 0.9837074279785156, "learning_rate": 9.802201325390076e-07, "loss": 0.0773, "step": 84990 }, { "epoch": 0.9081681713766762, "grad_norm": 0.0868578851222992, "learning_rate": 9.80215453462485e-07, "loss": 0.0519, "step": 85000 }, { "epoch": 0.9082750146909557, "grad_norm": 2.0333874225616455, "learning_rate": 9.802107738437634e-07, "loss": 0.0555, "step": 85010 }, { "epoch": 0.9083818580052353, "grad_norm": 10.477164268493652, "learning_rate": 9.80206093682847e-07, "loss": 0.0398, "step": 85020 }, { "epoch": 0.908488701319515, "grad_norm": 10.912513732910156, "learning_rate": 9.80201412979742e-07, "loss": 0.0137, "step": 85030 }, { "epoch": 0.9085955446337945, "grad_norm": 1.8769888877868652, "learning_rate": 9.801967317344532e-07, "loss": 0.0493, "step": 85040 }, { "epoch": 0.9087023879480741, "grad_norm": 3.438274621963501, "learning_rate": 9.80192049946986e-07, "loss": 0.0577, "step": 85050 }, { "epoch": 0.9088092312623538, "grad_norm": 0.028432020917534828, "learning_rate": 9.80187367617346e-07, "loss": 0.0746, "step": 85060 }, { "epoch": 0.9089160745766334, "grad_norm": 6.419795989990234, "learning_rate": 9.80182684745538e-07, "loss": 0.0815, "step": 85070 }, { "epoch": 0.9090229178909129, "grad_norm": 4.236787796020508, "learning_rate": 9.801780013315677e-07, "loss": 0.0155, "step": 85080 }, { "epoch": 0.9091297612051926, "grad_norm": 2.6916146278381348, "learning_rate": 9.8017331737544e-07, "loss": 0.1251, "step": 85090 }, { "epoch": 0.9092366045194722, "grad_norm": 2.4924476146698, "learning_rate": 9.801686328771605e-07, "loss": 0.0314, "step": 85100 }, { "epoch": 0.9093434478337518, "grad_norm": 5.391746520996094, "learning_rate": 9.801639478367342e-07, "loss": 0.1356, "step": 85110 }, { "epoch": 0.9094502911480314, "grad_norm": 3.3186185359954834, "learning_rate": 9.801592622541668e-07, "loss": 0.1041, "step": 85120 }, { "epoch": 0.909557134462311, "grad_norm": 5.916377544403076, "learning_rate": 9.801545761294632e-07, "loss": 0.0426, "step": 85130 }, { "epoch": 0.9096639777765906, "grad_norm": 7.349554538726807, "learning_rate": 9.801498894626288e-07, "loss": 0.0548, "step": 85140 }, { "epoch": 0.9097708210908703, "grad_norm": 0.2631298899650574, "learning_rate": 9.801452022536692e-07, "loss": 0.0306, "step": 85150 }, { "epoch": 0.9098776644051498, "grad_norm": 0.8385105729103088, "learning_rate": 9.80140514502589e-07, "loss": 0.06, "step": 85160 }, { "epoch": 0.9099845077194294, "grad_norm": 0.034284237772226334, "learning_rate": 9.801358262093945e-07, "loss": 0.0379, "step": 85170 }, { "epoch": 0.9100913510337091, "grad_norm": 0.9492450952529907, "learning_rate": 9.8013113737409e-07, "loss": 0.2322, "step": 85180 }, { "epoch": 0.9101981943479887, "grad_norm": 8.7662992477417, "learning_rate": 9.801264479966813e-07, "loss": 0.0286, "step": 85190 }, { "epoch": 0.9103050376622683, "grad_norm": 0.04594934731721878, "learning_rate": 9.801217580771735e-07, "loss": 0.0261, "step": 85200 }, { "epoch": 0.9104118809765479, "grad_norm": 1.0870673656463623, "learning_rate": 9.801170676155722e-07, "loss": 0.0391, "step": 85210 }, { "epoch": 0.9105187242908275, "grad_norm": 3.5990488529205322, "learning_rate": 9.801123766118824e-07, "loss": 0.0207, "step": 85220 }, { "epoch": 0.9106255676051072, "grad_norm": 6.0144171714782715, "learning_rate": 9.801076850661094e-07, "loss": 0.0199, "step": 85230 }, { "epoch": 0.9107324109193867, "grad_norm": 3.687915802001953, "learning_rate": 9.801029929782587e-07, "loss": 0.0266, "step": 85240 }, { "epoch": 0.9108392542336663, "grad_norm": 6.341658115386963, "learning_rate": 9.800983003483354e-07, "loss": 0.0877, "step": 85250 }, { "epoch": 0.910946097547946, "grad_norm": 0.12458303570747375, "learning_rate": 9.80093607176345e-07, "loss": 0.0459, "step": 85260 }, { "epoch": 0.9110529408622255, "grad_norm": 2.4441802501678467, "learning_rate": 9.800889134622926e-07, "loss": 0.0311, "step": 85270 }, { "epoch": 0.9111597841765051, "grad_norm": 0.284208208322525, "learning_rate": 9.800842192061837e-07, "loss": 0.0829, "step": 85280 }, { "epoch": 0.9112666274907848, "grad_norm": 0.048595037311315536, "learning_rate": 9.800795244080234e-07, "loss": 0.0601, "step": 85290 }, { "epoch": 0.9113734708050644, "grad_norm": 14.092781066894531, "learning_rate": 9.80074829067817e-07, "loss": 0.0769, "step": 85300 }, { "epoch": 0.9114803141193439, "grad_norm": 0.01803528144955635, "learning_rate": 9.8007013318557e-07, "loss": 0.0267, "step": 85310 }, { "epoch": 0.9115871574336236, "grad_norm": 7.249168395996094, "learning_rate": 9.800654367612875e-07, "loss": 0.0689, "step": 85320 }, { "epoch": 0.9116940007479032, "grad_norm": 4.248808860778809, "learning_rate": 9.800607397949749e-07, "loss": 0.0743, "step": 85330 }, { "epoch": 0.9118008440621828, "grad_norm": 5.504225254058838, "learning_rate": 9.800560422866376e-07, "loss": 0.039, "step": 85340 }, { "epoch": 0.9119076873764624, "grad_norm": 5.517861366271973, "learning_rate": 9.800513442362807e-07, "loss": 0.0563, "step": 85350 }, { "epoch": 0.912014530690742, "grad_norm": 5.172765731811523, "learning_rate": 9.800466456439096e-07, "loss": 0.0604, "step": 85360 }, { "epoch": 0.9121213740050217, "grad_norm": 0.6469092965126038, "learning_rate": 9.800419465095298e-07, "loss": 0.0272, "step": 85370 }, { "epoch": 0.9122282173193013, "grad_norm": 0.8061918020248413, "learning_rate": 9.800372468331462e-07, "loss": 0.0449, "step": 85380 }, { "epoch": 0.9123350606335808, "grad_norm": 0.41077402234077454, "learning_rate": 9.800325466147643e-07, "loss": 0.0694, "step": 85390 }, { "epoch": 0.9124419039478605, "grad_norm": 0.8907034993171692, "learning_rate": 9.800278458543897e-07, "loss": 0.0549, "step": 85400 }, { "epoch": 0.9125487472621401, "grad_norm": 1.5214800834655762, "learning_rate": 9.80023144552027e-07, "loss": 0.0086, "step": 85410 }, { "epoch": 0.9126555905764197, "grad_norm": 1.012757420539856, "learning_rate": 9.800184427076824e-07, "loss": 0.0453, "step": 85420 }, { "epoch": 0.9127624338906993, "grad_norm": 0.10498654097318649, "learning_rate": 9.800137403213605e-07, "loss": 0.0676, "step": 85430 }, { "epoch": 0.9128692772049789, "grad_norm": 3.8972108364105225, "learning_rate": 9.800090373930667e-07, "loss": 0.0915, "step": 85440 }, { "epoch": 0.9129761205192585, "grad_norm": 0.07423467934131622, "learning_rate": 9.800043339228067e-07, "loss": 0.0298, "step": 85450 }, { "epoch": 0.9130829638335382, "grad_norm": 0.09267875552177429, "learning_rate": 9.799996299105856e-07, "loss": 0.0501, "step": 85460 }, { "epoch": 0.9131898071478177, "grad_norm": 1.4658721685409546, "learning_rate": 9.799949253564086e-07, "loss": 0.0602, "step": 85470 }, { "epoch": 0.9132966504620973, "grad_norm": 3.5751616954803467, "learning_rate": 9.79990220260281e-07, "loss": 0.0265, "step": 85480 }, { "epoch": 0.913403493776377, "grad_norm": 6.377009391784668, "learning_rate": 9.799855146222085e-07, "loss": 0.051, "step": 85490 }, { "epoch": 0.9135103370906565, "grad_norm": 0.5793817043304443, "learning_rate": 9.799808084421959e-07, "loss": 0.0856, "step": 85500 }, { "epoch": 0.9136171804049361, "grad_norm": 2.5729150772094727, "learning_rate": 9.799761017202488e-07, "loss": 0.0315, "step": 85510 }, { "epoch": 0.9137240237192158, "grad_norm": 0.11398809403181076, "learning_rate": 9.799713944563724e-07, "loss": 0.0142, "step": 85520 }, { "epoch": 0.9138308670334954, "grad_norm": 21.445283889770508, "learning_rate": 9.799666866505721e-07, "loss": 0.069, "step": 85530 }, { "epoch": 0.9139377103477749, "grad_norm": 2.4571588039398193, "learning_rate": 9.799619783028531e-07, "loss": 0.0168, "step": 85540 }, { "epoch": 0.9140445536620546, "grad_norm": 7.18346643447876, "learning_rate": 9.79957269413221e-07, "loss": 0.0599, "step": 85550 }, { "epoch": 0.9141513969763342, "grad_norm": 0.04757438972592354, "learning_rate": 9.79952559981681e-07, "loss": 0.0289, "step": 85560 }, { "epoch": 0.9142582402906139, "grad_norm": 9.454803466796875, "learning_rate": 9.79947850008238e-07, "loss": 0.0505, "step": 85570 }, { "epoch": 0.9143650836048934, "grad_norm": 10.758989334106445, "learning_rate": 9.799431394928978e-07, "loss": 0.0377, "step": 85580 }, { "epoch": 0.914471926919173, "grad_norm": 21.302703857421875, "learning_rate": 9.799384284356657e-07, "loss": 0.0488, "step": 85590 }, { "epoch": 0.9145787702334527, "grad_norm": 6.1770758628845215, "learning_rate": 9.799337168365467e-07, "loss": 0.076, "step": 85600 }, { "epoch": 0.9146856135477323, "grad_norm": 1.292514443397522, "learning_rate": 9.799290046955465e-07, "loss": 0.0412, "step": 85610 }, { "epoch": 0.9147924568620118, "grad_norm": 0.008939103223383427, "learning_rate": 9.7992429201267e-07, "loss": 0.0805, "step": 85620 }, { "epoch": 0.9148993001762915, "grad_norm": 0.20457340776920319, "learning_rate": 9.799195787879232e-07, "loss": 0.063, "step": 85630 }, { "epoch": 0.9150061434905711, "grad_norm": 7.3889617919921875, "learning_rate": 9.799148650213105e-07, "loss": 0.0545, "step": 85640 }, { "epoch": 0.9151129868048506, "grad_norm": 0.008144435472786427, "learning_rate": 9.799101507128378e-07, "loss": 0.0603, "step": 85650 }, { "epoch": 0.9152198301191303, "grad_norm": 0.45956742763519287, "learning_rate": 9.799054358625104e-07, "loss": 0.0393, "step": 85660 }, { "epoch": 0.9153266734334099, "grad_norm": 0.024061832576990128, "learning_rate": 9.799007204703335e-07, "loss": 0.0457, "step": 85670 }, { "epoch": 0.9154335167476895, "grad_norm": 8.526642799377441, "learning_rate": 9.798960045363125e-07, "loss": 0.0705, "step": 85680 }, { "epoch": 0.9155403600619691, "grad_norm": 0.11951550096273422, "learning_rate": 9.798912880604526e-07, "loss": 0.0409, "step": 85690 }, { "epoch": 0.9156472033762487, "grad_norm": 0.06554962694644928, "learning_rate": 9.798865710427593e-07, "loss": 0.0654, "step": 85700 }, { "epoch": 0.9157540466905283, "grad_norm": 0.3821374177932739, "learning_rate": 9.798818534832379e-07, "loss": 0.1321, "step": 85710 }, { "epoch": 0.915860890004808, "grad_norm": 5.623926639556885, "learning_rate": 9.798771353818936e-07, "loss": 0.051, "step": 85720 }, { "epoch": 0.9159677333190875, "grad_norm": 2.6023242473602295, "learning_rate": 9.798724167387318e-07, "loss": 0.0527, "step": 85730 }, { "epoch": 0.9160745766333672, "grad_norm": 1.9138849973678589, "learning_rate": 9.79867697553758e-07, "loss": 0.0418, "step": 85740 }, { "epoch": 0.9161814199476468, "grad_norm": 1.1352373361587524, "learning_rate": 9.79862977826977e-07, "loss": 0.0274, "step": 85750 }, { "epoch": 0.9162882632619264, "grad_norm": 15.06731128692627, "learning_rate": 9.798582575583946e-07, "loss": 0.0658, "step": 85760 }, { "epoch": 0.916395106576206, "grad_norm": 6.613165378570557, "learning_rate": 9.79853536748016e-07, "loss": 0.0365, "step": 85770 }, { "epoch": 0.9165019498904856, "grad_norm": 2.189314126968384, "learning_rate": 9.79848815395847e-07, "loss": 0.0475, "step": 85780 }, { "epoch": 0.9166087932047652, "grad_norm": 2.158081531524658, "learning_rate": 9.798440935018919e-07, "loss": 0.0461, "step": 85790 }, { "epoch": 0.9167156365190449, "grad_norm": 5.968946933746338, "learning_rate": 9.798393710661568e-07, "loss": 0.0334, "step": 85800 }, { "epoch": 0.9168224798333244, "grad_norm": 2.3308489322662354, "learning_rate": 9.79834648088647e-07, "loss": 0.0158, "step": 85810 }, { "epoch": 0.916929323147604, "grad_norm": 1.7351932525634766, "learning_rate": 9.798299245693673e-07, "loss": 0.0527, "step": 85820 }, { "epoch": 0.9170361664618837, "grad_norm": 3.145587921142578, "learning_rate": 9.798252005083236e-07, "loss": 0.056, "step": 85830 }, { "epoch": 0.9171430097761633, "grad_norm": 2.1821374893188477, "learning_rate": 9.79820475905521e-07, "loss": 0.0365, "step": 85840 }, { "epoch": 0.9172498530904428, "grad_norm": 3.766908645629883, "learning_rate": 9.798157507609647e-07, "loss": 0.0386, "step": 85850 }, { "epoch": 0.9173566964047225, "grad_norm": 1.9976282119750977, "learning_rate": 9.798110250746604e-07, "loss": 0.0239, "step": 85860 }, { "epoch": 0.9174635397190021, "grad_norm": 6.4273271560668945, "learning_rate": 9.798062988466133e-07, "loss": 0.0222, "step": 85870 }, { "epoch": 0.9175703830332816, "grad_norm": 0.6965777277946472, "learning_rate": 9.798015720768288e-07, "loss": 0.0554, "step": 85880 }, { "epoch": 0.9176772263475613, "grad_norm": 9.212101936340332, "learning_rate": 9.797968447653117e-07, "loss": 0.1317, "step": 85890 }, { "epoch": 0.9177840696618409, "grad_norm": 0.15334872901439667, "learning_rate": 9.79792116912068e-07, "loss": 0.0267, "step": 85900 }, { "epoch": 0.9178909129761205, "grad_norm": 1.136866569519043, "learning_rate": 9.79787388517103e-07, "loss": 0.0337, "step": 85910 }, { "epoch": 0.9179977562904001, "grad_norm": 6.934763431549072, "learning_rate": 9.797826595804215e-07, "loss": 0.0781, "step": 85920 }, { "epoch": 0.9181045996046797, "grad_norm": 3.557344675064087, "learning_rate": 9.797779301020293e-07, "loss": 0.0486, "step": 85930 }, { "epoch": 0.9182114429189594, "grad_norm": 1.8594461679458618, "learning_rate": 9.797732000819315e-07, "loss": 0.0285, "step": 85940 }, { "epoch": 0.918318286233239, "grad_norm": 0.013008714653551579, "learning_rate": 9.797684695201335e-07, "loss": 0.0599, "step": 85950 }, { "epoch": 0.9184251295475185, "grad_norm": 0.04819819703698158, "learning_rate": 9.797637384166408e-07, "loss": 0.0105, "step": 85960 }, { "epoch": 0.9185319728617982, "grad_norm": 7.134799480438232, "learning_rate": 9.797590067714586e-07, "loss": 0.0504, "step": 85970 }, { "epoch": 0.9186388161760778, "grad_norm": 9.861024856567383, "learning_rate": 9.797542745845923e-07, "loss": 0.0541, "step": 85980 }, { "epoch": 0.9187456594903574, "grad_norm": 8.646747589111328, "learning_rate": 9.797495418560472e-07, "loss": 0.043, "step": 85990 }, { "epoch": 0.918852502804637, "grad_norm": 15.021370887756348, "learning_rate": 9.797448085858288e-07, "loss": 0.0576, "step": 86000 }, { "epoch": 0.9189593461189166, "grad_norm": 6.432955265045166, "learning_rate": 9.79740074773942e-07, "loss": 0.0469, "step": 86010 }, { "epoch": 0.9190661894331962, "grad_norm": 5.888144016265869, "learning_rate": 9.797353404203928e-07, "loss": 0.0803, "step": 86020 }, { "epoch": 0.9191730327474759, "grad_norm": 3.6167004108428955, "learning_rate": 9.79730605525186e-07, "loss": 0.1038, "step": 86030 }, { "epoch": 0.9192798760617554, "grad_norm": 3.005699396133423, "learning_rate": 9.797258700883273e-07, "loss": 0.0427, "step": 86040 }, { "epoch": 0.919386719376035, "grad_norm": 2.535741090774536, "learning_rate": 9.797211341098216e-07, "loss": 0.0737, "step": 86050 }, { "epoch": 0.9194935626903147, "grad_norm": 4.212070465087891, "learning_rate": 9.797163975896747e-07, "loss": 0.0464, "step": 86060 }, { "epoch": 0.9196004060045943, "grad_norm": 4.725428104400635, "learning_rate": 9.797116605278918e-07, "loss": 0.0395, "step": 86070 }, { "epoch": 0.9197072493188738, "grad_norm": 4.4405131340026855, "learning_rate": 9.79706922924478e-07, "loss": 0.0524, "step": 86080 }, { "epoch": 0.9198140926331535, "grad_norm": 5.213779926300049, "learning_rate": 9.797021847794393e-07, "loss": 0.0271, "step": 86090 }, { "epoch": 0.9199209359474331, "grad_norm": 2.2679526805877686, "learning_rate": 9.796974460927804e-07, "loss": 0.0228, "step": 86100 }, { "epoch": 0.9200277792617128, "grad_norm": 7.978781700134277, "learning_rate": 9.796927068645067e-07, "loss": 0.1099, "step": 86110 }, { "epoch": 0.9201346225759923, "grad_norm": 5.713588237762451, "learning_rate": 9.79687967094624e-07, "loss": 0.03, "step": 86120 }, { "epoch": 0.9202414658902719, "grad_norm": 6.295464038848877, "learning_rate": 9.796832267831373e-07, "loss": 0.0274, "step": 86130 }, { "epoch": 0.9203483092045516, "grad_norm": 1.9219727516174316, "learning_rate": 9.79678485930052e-07, "loss": 0.0215, "step": 86140 }, { "epoch": 0.9204551525188311, "grad_norm": 2.094731092453003, "learning_rate": 9.796737445353735e-07, "loss": 0.0394, "step": 86150 }, { "epoch": 0.9205619958331107, "grad_norm": 1.561735987663269, "learning_rate": 9.796690025991072e-07, "loss": 0.0325, "step": 86160 }, { "epoch": 0.9206688391473904, "grad_norm": 20.38982582092285, "learning_rate": 9.796642601212583e-07, "loss": 0.0399, "step": 86170 }, { "epoch": 0.92077568246167, "grad_norm": 3.8519346714019775, "learning_rate": 9.796595171018323e-07, "loss": 0.0752, "step": 86180 }, { "epoch": 0.9208825257759495, "grad_norm": 8.482528686523438, "learning_rate": 9.796547735408344e-07, "loss": 0.0384, "step": 86190 }, { "epoch": 0.9209893690902292, "grad_norm": 4.614806175231934, "learning_rate": 9.796500294382703e-07, "loss": 0.0231, "step": 86200 }, { "epoch": 0.9210962124045088, "grad_norm": 3.120713949203491, "learning_rate": 9.79645284794145e-07, "loss": 0.0858, "step": 86210 }, { "epoch": 0.9212030557187884, "grad_norm": 17.553375244140625, "learning_rate": 9.796405396084639e-07, "loss": 0.0391, "step": 86220 }, { "epoch": 0.921309899033068, "grad_norm": 2.214036226272583, "learning_rate": 9.796357938812324e-07, "loss": 0.0676, "step": 86230 }, { "epoch": 0.9214167423473476, "grad_norm": 3.502676010131836, "learning_rate": 9.79631047612456e-07, "loss": 0.057, "step": 86240 }, { "epoch": 0.9215235856616272, "grad_norm": 0.2759380042552948, "learning_rate": 9.7962630080214e-07, "loss": 0.057, "step": 86250 }, { "epoch": 0.9216304289759069, "grad_norm": 0.04371647164225578, "learning_rate": 9.796215534502895e-07, "loss": 0.014, "step": 86260 }, { "epoch": 0.9217372722901864, "grad_norm": 5.276763439178467, "learning_rate": 9.796168055569102e-07, "loss": 0.0353, "step": 86270 }, { "epoch": 0.921844115604466, "grad_norm": 5.812572002410889, "learning_rate": 9.796120571220072e-07, "loss": 0.0367, "step": 86280 }, { "epoch": 0.9219509589187457, "grad_norm": 4.176947116851807, "learning_rate": 9.796073081455863e-07, "loss": 0.0967, "step": 86290 }, { "epoch": 0.9220578022330252, "grad_norm": 4.004578113555908, "learning_rate": 9.796025586276521e-07, "loss": 0.0519, "step": 86300 }, { "epoch": 0.9221646455473049, "grad_norm": 17.8836612701416, "learning_rate": 9.795978085682109e-07, "loss": 0.0583, "step": 86310 }, { "epoch": 0.9222714888615845, "grad_norm": 7.835864543914795, "learning_rate": 9.795930579672674e-07, "loss": 0.0466, "step": 86320 }, { "epoch": 0.9223783321758641, "grad_norm": 1.454069972038269, "learning_rate": 9.79588306824827e-07, "loss": 0.0372, "step": 86330 }, { "epoch": 0.9224851754901437, "grad_norm": 0.11061333864927292, "learning_rate": 9.795835551408953e-07, "loss": 0.1223, "step": 86340 }, { "epoch": 0.9225920188044233, "grad_norm": 4.930205821990967, "learning_rate": 9.795788029154775e-07, "loss": 0.0456, "step": 86350 }, { "epoch": 0.9226988621187029, "grad_norm": 3.4880785942077637, "learning_rate": 9.795740501485791e-07, "loss": 0.051, "step": 86360 }, { "epoch": 0.9228057054329826, "grad_norm": 2.7785677909851074, "learning_rate": 9.795692968402053e-07, "loss": 0.0597, "step": 86370 }, { "epoch": 0.9229125487472621, "grad_norm": 4.308108806610107, "learning_rate": 9.795645429903618e-07, "loss": 0.0293, "step": 86380 }, { "epoch": 0.9230193920615417, "grad_norm": 3.010054349899292, "learning_rate": 9.795597885990536e-07, "loss": 0.0318, "step": 86390 }, { "epoch": 0.9231262353758214, "grad_norm": 4.150486946105957, "learning_rate": 9.79555033666286e-07, "loss": 0.0408, "step": 86400 }, { "epoch": 0.923233078690101, "grad_norm": 0.8240852952003479, "learning_rate": 9.79550278192065e-07, "loss": 0.0169, "step": 86410 }, { "epoch": 0.9233399220043805, "grad_norm": 9.170971870422363, "learning_rate": 9.795455221763953e-07, "loss": 0.0255, "step": 86420 }, { "epoch": 0.9234467653186602, "grad_norm": 4.323953628540039, "learning_rate": 9.795407656192824e-07, "loss": 0.0982, "step": 86430 }, { "epoch": 0.9235536086329398, "grad_norm": 7.314309120178223, "learning_rate": 9.79536008520732e-07, "loss": 0.0566, "step": 86440 }, { "epoch": 0.9236604519472194, "grad_norm": 6.709501266479492, "learning_rate": 9.795312508807493e-07, "loss": 0.0881, "step": 86450 }, { "epoch": 0.923767295261499, "grad_norm": 4.351311683654785, "learning_rate": 9.795264926993395e-07, "loss": 0.0299, "step": 86460 }, { "epoch": 0.9238741385757786, "grad_norm": 2.548732042312622, "learning_rate": 9.795217339765081e-07, "loss": 0.0438, "step": 86470 }, { "epoch": 0.9239809818900583, "grad_norm": 5.347410678863525, "learning_rate": 9.795169747122605e-07, "loss": 0.0207, "step": 86480 }, { "epoch": 0.9240878252043379, "grad_norm": 3.43074893951416, "learning_rate": 9.79512214906602e-07, "loss": 0.0504, "step": 86490 }, { "epoch": 0.9241946685186174, "grad_norm": 2.081852436065674, "learning_rate": 9.79507454559538e-07, "loss": 0.0429, "step": 86500 }, { "epoch": 0.9243015118328971, "grad_norm": 2.6278061866760254, "learning_rate": 9.79502693671074e-07, "loss": 0.0328, "step": 86510 }, { "epoch": 0.9244083551471767, "grad_norm": 3.3080108165740967, "learning_rate": 9.79497932241215e-07, "loss": 0.0502, "step": 86520 }, { "epoch": 0.9245151984614562, "grad_norm": 0.40697628259658813, "learning_rate": 9.794931702699669e-07, "loss": 0.0407, "step": 86530 }, { "epoch": 0.9246220417757359, "grad_norm": 7.427562713623047, "learning_rate": 9.794884077573348e-07, "loss": 0.0442, "step": 86540 }, { "epoch": 0.9247288850900155, "grad_norm": 0.12960240244865417, "learning_rate": 9.794836447033241e-07, "loss": 0.0318, "step": 86550 }, { "epoch": 0.9248357284042951, "grad_norm": 4.940179824829102, "learning_rate": 9.7947888110794e-07, "loss": 0.0281, "step": 86560 }, { "epoch": 0.9249425717185747, "grad_norm": 0.01733076199889183, "learning_rate": 9.794741169711883e-07, "loss": 0.0349, "step": 86570 }, { "epoch": 0.9250494150328543, "grad_norm": 0.7892472147941589, "learning_rate": 9.79469352293074e-07, "loss": 0.0269, "step": 86580 }, { "epoch": 0.9251562583471339, "grad_norm": 9.917515754699707, "learning_rate": 9.794645870736026e-07, "loss": 0.0304, "step": 86590 }, { "epoch": 0.9252631016614136, "grad_norm": 4.777719497680664, "learning_rate": 9.794598213127796e-07, "loss": 0.0605, "step": 86600 }, { "epoch": 0.9253699449756931, "grad_norm": 0.3613353371620178, "learning_rate": 9.794550550106102e-07, "loss": 0.011, "step": 86610 }, { "epoch": 0.9254767882899727, "grad_norm": 5.882017612457275, "learning_rate": 9.794502881670999e-07, "loss": 0.0724, "step": 86620 }, { "epoch": 0.9255836316042524, "grad_norm": 2.166706085205078, "learning_rate": 9.79445520782254e-07, "loss": 0.114, "step": 86630 }, { "epoch": 0.925690474918532, "grad_norm": 10.990747451782227, "learning_rate": 9.79440752856078e-07, "loss": 0.0689, "step": 86640 }, { "epoch": 0.9257973182328115, "grad_norm": 3.0820305347442627, "learning_rate": 9.794359843885772e-07, "loss": 0.0245, "step": 86650 }, { "epoch": 0.9259041615470912, "grad_norm": 15.257336616516113, "learning_rate": 9.794312153797568e-07, "loss": 0.1486, "step": 86660 }, { "epoch": 0.9260110048613708, "grad_norm": 2.3779475688934326, "learning_rate": 9.794264458296224e-07, "loss": 0.0421, "step": 86670 }, { "epoch": 0.9261178481756505, "grad_norm": 7.654531478881836, "learning_rate": 9.794216757381795e-07, "loss": 0.0763, "step": 86680 }, { "epoch": 0.92622469148993, "grad_norm": 5.883371353149414, "learning_rate": 9.794169051054333e-07, "loss": 0.0518, "step": 86690 }, { "epoch": 0.9263315348042096, "grad_norm": 3.076157569885254, "learning_rate": 9.794121339313891e-07, "loss": 0.0748, "step": 86700 }, { "epoch": 0.9264383781184893, "grad_norm": 3.9697933197021484, "learning_rate": 9.794073622160526e-07, "loss": 0.0277, "step": 86710 }, { "epoch": 0.9265452214327689, "grad_norm": 1.2102868556976318, "learning_rate": 9.794025899594292e-07, "loss": 0.0336, "step": 86720 }, { "epoch": 0.9266520647470484, "grad_norm": 0.0904536321759224, "learning_rate": 9.793978171615238e-07, "loss": 0.0277, "step": 86730 }, { "epoch": 0.9267589080613281, "grad_norm": 0.48400792479515076, "learning_rate": 9.793930438223421e-07, "loss": 0.0492, "step": 86740 }, { "epoch": 0.9268657513756077, "grad_norm": 6.63960075378418, "learning_rate": 9.793882699418894e-07, "loss": 0.042, "step": 86750 }, { "epoch": 0.9269725946898872, "grad_norm": 3.2715704441070557, "learning_rate": 9.793834955201715e-07, "loss": 0.0444, "step": 86760 }, { "epoch": 0.9270794380041669, "grad_norm": 1.874213695526123, "learning_rate": 9.79378720557193e-07, "loss": 0.0224, "step": 86770 }, { "epoch": 0.9271862813184465, "grad_norm": 0.8774957656860352, "learning_rate": 9.7937394505296e-07, "loss": 0.0217, "step": 86780 }, { "epoch": 0.9272931246327261, "grad_norm": 9.121777534484863, "learning_rate": 9.793691690074778e-07, "loss": 0.0553, "step": 86790 }, { "epoch": 0.9273999679470057, "grad_norm": 0.05974853038787842, "learning_rate": 9.793643924207512e-07, "loss": 0.0104, "step": 86800 }, { "epoch": 0.9275068112612853, "grad_norm": 0.008498197421431541, "learning_rate": 9.793596152927863e-07, "loss": 0.0594, "step": 86810 }, { "epoch": 0.9276136545755649, "grad_norm": 7.8479766845703125, "learning_rate": 9.793548376235883e-07, "loss": 0.0751, "step": 86820 }, { "epoch": 0.9277204978898446, "grad_norm": 0.621675968170166, "learning_rate": 9.793500594131622e-07, "loss": 0.0497, "step": 86830 }, { "epoch": 0.9278273412041241, "grad_norm": 5.252243518829346, "learning_rate": 9.79345280661514e-07, "loss": 0.0502, "step": 86840 }, { "epoch": 0.9279341845184038, "grad_norm": 10.736333847045898, "learning_rate": 9.793405013686486e-07, "loss": 0.074, "step": 86850 }, { "epoch": 0.9280410278326834, "grad_norm": 6.858193874359131, "learning_rate": 9.793357215345716e-07, "loss": 0.0295, "step": 86860 }, { "epoch": 0.928147871146963, "grad_norm": 2.996922492980957, "learning_rate": 9.793309411592886e-07, "loss": 0.0407, "step": 86870 }, { "epoch": 0.9282547144612426, "grad_norm": 6.115245819091797, "learning_rate": 9.793261602428047e-07, "loss": 0.0677, "step": 86880 }, { "epoch": 0.9283615577755222, "grad_norm": 15.208623886108398, "learning_rate": 9.793213787851251e-07, "loss": 0.0214, "step": 86890 }, { "epoch": 0.9284684010898018, "grad_norm": 8.517520904541016, "learning_rate": 9.79316596786256e-07, "loss": 0.0188, "step": 86900 }, { "epoch": 0.9285752444040815, "grad_norm": 0.01146188098937273, "learning_rate": 9.79311814246202e-07, "loss": 0.0268, "step": 86910 }, { "epoch": 0.928682087718361, "grad_norm": 4.628634929656982, "learning_rate": 9.793070311649686e-07, "loss": 0.0996, "step": 86920 }, { "epoch": 0.9287889310326406, "grad_norm": 9.920421600341797, "learning_rate": 9.793022475425615e-07, "loss": 0.0365, "step": 86930 }, { "epoch": 0.9288957743469203, "grad_norm": 5.49192476272583, "learning_rate": 9.792974633789864e-07, "loss": 0.0425, "step": 86940 }, { "epoch": 0.9290026176611998, "grad_norm": 2.443323850631714, "learning_rate": 9.79292678674248e-07, "loss": 0.0149, "step": 86950 }, { "epoch": 0.9291094609754794, "grad_norm": 6.7072062492370605, "learning_rate": 9.792878934283519e-07, "loss": 0.028, "step": 86960 }, { "epoch": 0.9292163042897591, "grad_norm": 0.18233883380889893, "learning_rate": 9.792831076413036e-07, "loss": 0.1019, "step": 86970 }, { "epoch": 0.9293231476040387, "grad_norm": 5.484732627868652, "learning_rate": 9.792783213131086e-07, "loss": 0.0245, "step": 86980 }, { "epoch": 0.9294299909183182, "grad_norm": 6.4808268547058105, "learning_rate": 9.792735344437722e-07, "loss": 0.0365, "step": 86990 }, { "epoch": 0.9295368342325979, "grad_norm": 0.9181981682777405, "learning_rate": 9.792687470332997e-07, "loss": 0.0314, "step": 87000 }, { "epoch": 0.9296436775468775, "grad_norm": 6.611084938049316, "learning_rate": 9.792639590816967e-07, "loss": 0.0475, "step": 87010 }, { "epoch": 0.9297505208611571, "grad_norm": 1.6742759943008423, "learning_rate": 9.792591705889685e-07, "loss": 0.0619, "step": 87020 }, { "epoch": 0.9298573641754367, "grad_norm": 3.216932535171509, "learning_rate": 9.792543815551205e-07, "loss": 0.0158, "step": 87030 }, { "epoch": 0.9299642074897163, "grad_norm": 5.443353652954102, "learning_rate": 9.792495919801582e-07, "loss": 0.0486, "step": 87040 }, { "epoch": 0.930071050803996, "grad_norm": 4.397807598114014, "learning_rate": 9.792448018640868e-07, "loss": 0.0518, "step": 87050 }, { "epoch": 0.9301778941182756, "grad_norm": 6.709105014801025, "learning_rate": 9.79240011206912e-07, "loss": 0.0306, "step": 87060 }, { "epoch": 0.9302847374325551, "grad_norm": 4.199278354644775, "learning_rate": 9.79235220008639e-07, "loss": 0.048, "step": 87070 }, { "epoch": 0.9303915807468348, "grad_norm": 5.1305365562438965, "learning_rate": 9.792304282692732e-07, "loss": 0.0437, "step": 87080 }, { "epoch": 0.9304984240611144, "grad_norm": 5.176377296447754, "learning_rate": 9.792256359888203e-07, "loss": 0.0283, "step": 87090 }, { "epoch": 0.930605267375394, "grad_norm": 2.61851167678833, "learning_rate": 9.792208431672852e-07, "loss": 0.0356, "step": 87100 }, { "epoch": 0.9307121106896736, "grad_norm": 2.7281994819641113, "learning_rate": 9.792160498046739e-07, "loss": 0.0465, "step": 87110 }, { "epoch": 0.9308189540039532, "grad_norm": 2.480112075805664, "learning_rate": 9.792112559009912e-07, "loss": 0.0551, "step": 87120 }, { "epoch": 0.9309257973182328, "grad_norm": 9.240093231201172, "learning_rate": 9.79206461456243e-07, "loss": 0.0523, "step": 87130 }, { "epoch": 0.9310326406325125, "grad_norm": 0.6740713119506836, "learning_rate": 9.792016664704345e-07, "loss": 0.0416, "step": 87140 }, { "epoch": 0.931139483946792, "grad_norm": 5.740314960479736, "learning_rate": 9.791968709435712e-07, "loss": 0.05, "step": 87150 }, { "epoch": 0.9312463272610716, "grad_norm": 1.1208689212799072, "learning_rate": 9.791920748756583e-07, "loss": 0.0445, "step": 87160 }, { "epoch": 0.9313531705753513, "grad_norm": 3.702019453048706, "learning_rate": 9.791872782667018e-07, "loss": 0.037, "step": 87170 }, { "epoch": 0.9314600138896308, "grad_norm": 9.045231819152832, "learning_rate": 9.791824811167063e-07, "loss": 0.1022, "step": 87180 }, { "epoch": 0.9315668572039104, "grad_norm": 1.1910489797592163, "learning_rate": 9.791776834256776e-07, "loss": 0.028, "step": 87190 }, { "epoch": 0.9316737005181901, "grad_norm": 6.43010139465332, "learning_rate": 9.791728851936212e-07, "loss": 0.0751, "step": 87200 }, { "epoch": 0.9317805438324697, "grad_norm": 1.4174973964691162, "learning_rate": 9.791680864205425e-07, "loss": 0.0274, "step": 87210 }, { "epoch": 0.9318873871467493, "grad_norm": 0.0718211680650711, "learning_rate": 9.79163287106447e-07, "loss": 0.0087, "step": 87220 }, { "epoch": 0.9319942304610289, "grad_norm": 10.538202285766602, "learning_rate": 9.791584872513398e-07, "loss": 0.0792, "step": 87230 }, { "epoch": 0.9321010737753085, "grad_norm": 2.329554557800293, "learning_rate": 9.791536868552265e-07, "loss": 0.0772, "step": 87240 }, { "epoch": 0.9322079170895882, "grad_norm": 1.3069127798080444, "learning_rate": 9.791488859181126e-07, "loss": 0.1047, "step": 87250 }, { "epoch": 0.9323147604038677, "grad_norm": 3.5913479328155518, "learning_rate": 9.791440844400035e-07, "loss": 0.0324, "step": 87260 }, { "epoch": 0.9324216037181473, "grad_norm": 8.622379302978516, "learning_rate": 9.791392824209043e-07, "loss": 0.0314, "step": 87270 }, { "epoch": 0.932528447032427, "grad_norm": 1.4493975639343262, "learning_rate": 9.79134479860821e-07, "loss": 0.0298, "step": 87280 }, { "epoch": 0.9326352903467066, "grad_norm": 0.9398964047431946, "learning_rate": 9.791296767597586e-07, "loss": 0.0194, "step": 87290 }, { "epoch": 0.9327421336609861, "grad_norm": 4.9908061027526855, "learning_rate": 9.791248731177225e-07, "loss": 0.0395, "step": 87300 }, { "epoch": 0.9328489769752658, "grad_norm": 4.019744873046875, "learning_rate": 9.791200689347185e-07, "loss": 0.038, "step": 87310 }, { "epoch": 0.9329558202895454, "grad_norm": 10.059005737304688, "learning_rate": 9.791152642107517e-07, "loss": 0.0597, "step": 87320 }, { "epoch": 0.933062663603825, "grad_norm": 1.9347472190856934, "learning_rate": 9.791104589458274e-07, "loss": 0.0329, "step": 87330 }, { "epoch": 0.9331695069181046, "grad_norm": 11.037041664123535, "learning_rate": 9.791056531399516e-07, "loss": 0.047, "step": 87340 }, { "epoch": 0.9332763502323842, "grad_norm": 1.5431411266326904, "learning_rate": 9.79100846793129e-07, "loss": 0.0473, "step": 87350 }, { "epoch": 0.9333831935466638, "grad_norm": 0.7124718427658081, "learning_rate": 9.790960399053656e-07, "loss": 0.0314, "step": 87360 }, { "epoch": 0.9334900368609435, "grad_norm": 0.39591193199157715, "learning_rate": 9.790912324766666e-07, "loss": 0.0543, "step": 87370 }, { "epoch": 0.933596880175223, "grad_norm": 0.33234521746635437, "learning_rate": 9.790864245070375e-07, "loss": 0.0436, "step": 87380 }, { "epoch": 0.9337037234895026, "grad_norm": 7.389960289001465, "learning_rate": 9.790816159964834e-07, "loss": 0.0598, "step": 87390 }, { "epoch": 0.9338105668037823, "grad_norm": 11.63083553314209, "learning_rate": 9.7907680694501e-07, "loss": 0.0304, "step": 87400 }, { "epoch": 0.9339174101180618, "grad_norm": 14.55907917022705, "learning_rate": 9.79071997352623e-07, "loss": 0.0559, "step": 87410 }, { "epoch": 0.9340242534323415, "grad_norm": 0.9416123032569885, "learning_rate": 9.790671872193275e-07, "loss": 0.032, "step": 87420 }, { "epoch": 0.9341310967466211, "grad_norm": 8.363182067871094, "learning_rate": 9.79062376545129e-07, "loss": 0.0294, "step": 87430 }, { "epoch": 0.9342379400609007, "grad_norm": 9.488762855529785, "learning_rate": 9.790575653300326e-07, "loss": 0.0728, "step": 87440 }, { "epoch": 0.9343447833751803, "grad_norm": 0.8198821544647217, "learning_rate": 9.790527535740445e-07, "loss": 0.0171, "step": 87450 }, { "epoch": 0.9344516266894599, "grad_norm": 6.27849817276001, "learning_rate": 9.790479412771693e-07, "loss": 0.0357, "step": 87460 }, { "epoch": 0.9345584700037395, "grad_norm": 9.759608268737793, "learning_rate": 9.790431284394132e-07, "loss": 0.0595, "step": 87470 }, { "epoch": 0.9346653133180192, "grad_norm": 0.2778968811035156, "learning_rate": 9.79038315060781e-07, "loss": 0.0343, "step": 87480 }, { "epoch": 0.9347721566322987, "grad_norm": 4.526854991912842, "learning_rate": 9.790335011412785e-07, "loss": 0.0548, "step": 87490 }, { "epoch": 0.9348789999465783, "grad_norm": 2.6555888652801514, "learning_rate": 9.790286866809109e-07, "loss": 0.0576, "step": 87500 }, { "epoch": 0.934985843260858, "grad_norm": 1.1841984987258911, "learning_rate": 9.790238716796837e-07, "loss": 0.0151, "step": 87510 }, { "epoch": 0.9350926865751376, "grad_norm": 3.350973129272461, "learning_rate": 9.790190561376026e-07, "loss": 0.0535, "step": 87520 }, { "epoch": 0.9351995298894171, "grad_norm": 1.4428789615631104, "learning_rate": 9.790142400546729e-07, "loss": 0.0317, "step": 87530 }, { "epoch": 0.9353063732036968, "grad_norm": 5.43657112121582, "learning_rate": 9.790094234308997e-07, "loss": 0.0852, "step": 87540 }, { "epoch": 0.9354132165179764, "grad_norm": 4.671939373016357, "learning_rate": 9.790046062662888e-07, "loss": 0.0394, "step": 87550 }, { "epoch": 0.935520059832256, "grad_norm": 5.2151594161987305, "learning_rate": 9.789997885608457e-07, "loss": 0.0778, "step": 87560 }, { "epoch": 0.9356269031465356, "grad_norm": 11.137788772583008, "learning_rate": 9.789949703145754e-07, "loss": 0.0208, "step": 87570 }, { "epoch": 0.9357337464608152, "grad_norm": 10.426700592041016, "learning_rate": 9.789901515274839e-07, "loss": 0.0956, "step": 87580 }, { "epoch": 0.9358405897750949, "grad_norm": 4.658151626586914, "learning_rate": 9.789853321995762e-07, "loss": 0.0401, "step": 87590 }, { "epoch": 0.9359474330893744, "grad_norm": 2.5200915336608887, "learning_rate": 9.789805123308582e-07, "loss": 0.0228, "step": 87600 }, { "epoch": 0.936054276403654, "grad_norm": 7.63318395614624, "learning_rate": 9.789756919213348e-07, "loss": 0.0556, "step": 87610 }, { "epoch": 0.9361611197179337, "grad_norm": 0.9149122834205627, "learning_rate": 9.789708709710115e-07, "loss": 0.0128, "step": 87620 }, { "epoch": 0.9362679630322133, "grad_norm": 0.0398981049656868, "learning_rate": 9.789660494798942e-07, "loss": 0.0289, "step": 87630 }, { "epoch": 0.9363748063464928, "grad_norm": 10.195646286010742, "learning_rate": 9.789612274479882e-07, "loss": 0.0219, "step": 87640 }, { "epoch": 0.9364816496607725, "grad_norm": 0.3828376829624176, "learning_rate": 9.789564048752985e-07, "loss": 0.0607, "step": 87650 }, { "epoch": 0.9365884929750521, "grad_norm": 2.7698044776916504, "learning_rate": 9.78951581761831e-07, "loss": 0.0381, "step": 87660 }, { "epoch": 0.9366953362893317, "grad_norm": 0.1446644514799118, "learning_rate": 9.789467581075912e-07, "loss": 0.0109, "step": 87670 }, { "epoch": 0.9368021796036113, "grad_norm": 7.457973480224609, "learning_rate": 9.789419339125842e-07, "loss": 0.0568, "step": 87680 }, { "epoch": 0.9369090229178909, "grad_norm": 2.931877851486206, "learning_rate": 9.789371091768155e-07, "loss": 0.038, "step": 87690 }, { "epoch": 0.9370158662321705, "grad_norm": 7.045334815979004, "learning_rate": 9.789322839002907e-07, "loss": 0.0403, "step": 87700 }, { "epoch": 0.9371227095464502, "grad_norm": 0.06511831283569336, "learning_rate": 9.789274580830153e-07, "loss": 0.0364, "step": 87710 }, { "epoch": 0.9372295528607297, "grad_norm": 0.25094377994537354, "learning_rate": 9.789226317249949e-07, "loss": 0.0533, "step": 87720 }, { "epoch": 0.9373363961750093, "grad_norm": 4.036770343780518, "learning_rate": 9.789178048262343e-07, "loss": 0.0747, "step": 87730 }, { "epoch": 0.937443239489289, "grad_norm": 8.498937606811523, "learning_rate": 9.789129773867396e-07, "loss": 0.0657, "step": 87740 }, { "epoch": 0.9375500828035686, "grad_norm": 3.395080089569092, "learning_rate": 9.789081494065158e-07, "loss": 0.0603, "step": 87750 }, { "epoch": 0.9376569261178481, "grad_norm": 5.167176723480225, "learning_rate": 9.789033208855685e-07, "loss": 0.0249, "step": 87760 }, { "epoch": 0.9377637694321278, "grad_norm": 14.475410461425781, "learning_rate": 9.788984918239034e-07, "loss": 0.0661, "step": 87770 }, { "epoch": 0.9378706127464074, "grad_norm": 5.673789978027344, "learning_rate": 9.788936622215257e-07, "loss": 0.0642, "step": 87780 }, { "epoch": 0.937977456060687, "grad_norm": 3.3145835399627686, "learning_rate": 9.78888832078441e-07, "loss": 0.042, "step": 87790 }, { "epoch": 0.9380842993749666, "grad_norm": 0.24987879395484924, "learning_rate": 9.788840013946546e-07, "loss": 0.0105, "step": 87800 }, { "epoch": 0.9381911426892462, "grad_norm": 9.752523422241211, "learning_rate": 9.78879170170172e-07, "loss": 0.0566, "step": 87810 }, { "epoch": 0.9382979860035259, "grad_norm": 7.146745681762695, "learning_rate": 9.788743384049986e-07, "loss": 0.0554, "step": 87820 }, { "epoch": 0.9384048293178054, "grad_norm": 2.7657060623168945, "learning_rate": 9.788695060991401e-07, "loss": 0.1098, "step": 87830 }, { "epoch": 0.938511672632085, "grad_norm": 3.842484712600708, "learning_rate": 9.788646732526017e-07, "loss": 0.0263, "step": 87840 }, { "epoch": 0.9386185159463647, "grad_norm": 1.8177440166473389, "learning_rate": 9.78859839865389e-07, "loss": 0.0599, "step": 87850 }, { "epoch": 0.9387253592606443, "grad_norm": 5.908107280731201, "learning_rate": 9.788550059375073e-07, "loss": 0.0479, "step": 87860 }, { "epoch": 0.9388322025749238, "grad_norm": 5.265666484832764, "learning_rate": 9.788501714689622e-07, "loss": 0.0561, "step": 87870 }, { "epoch": 0.9389390458892035, "grad_norm": 0.3656313121318817, "learning_rate": 9.78845336459759e-07, "loss": 0.0253, "step": 87880 }, { "epoch": 0.9390458892034831, "grad_norm": 0.11993381381034851, "learning_rate": 9.788405009099033e-07, "loss": 0.0452, "step": 87890 }, { "epoch": 0.9391527325177627, "grad_norm": 0.0750495046377182, "learning_rate": 9.788356648194007e-07, "loss": 0.1142, "step": 87900 }, { "epoch": 0.9392595758320423, "grad_norm": 4.490785121917725, "learning_rate": 9.788308281882565e-07, "loss": 0.0299, "step": 87910 }, { "epoch": 0.9393664191463219, "grad_norm": 3.3303050994873047, "learning_rate": 9.78825991016476e-07, "loss": 0.2025, "step": 87920 }, { "epoch": 0.9394732624606015, "grad_norm": 8.59545612335205, "learning_rate": 9.78821153304065e-07, "loss": 0.0454, "step": 87930 }, { "epoch": 0.9395801057748812, "grad_norm": 2.339130163192749, "learning_rate": 9.788163150510287e-07, "loss": 0.061, "step": 87940 }, { "epoch": 0.9396869490891607, "grad_norm": 0.27856388688087463, "learning_rate": 9.788114762573726e-07, "loss": 0.0214, "step": 87950 }, { "epoch": 0.9397937924034404, "grad_norm": 7.489043235778809, "learning_rate": 9.788066369231022e-07, "loss": 0.0477, "step": 87960 }, { "epoch": 0.93990063571772, "grad_norm": 6.325145721435547, "learning_rate": 9.78801797048223e-07, "loss": 0.0518, "step": 87970 }, { "epoch": 0.9400074790319995, "grad_norm": 1.2545554637908936, "learning_rate": 9.787969566327404e-07, "loss": 0.0484, "step": 87980 }, { "epoch": 0.9401143223462792, "grad_norm": 0.11418110877275467, "learning_rate": 9.787921156766598e-07, "loss": 0.0247, "step": 87990 }, { "epoch": 0.9402211656605588, "grad_norm": 1.6525633335113525, "learning_rate": 9.78787274179987e-07, "loss": 0.0661, "step": 88000 }, { "epoch": 0.9403280089748384, "grad_norm": 4.0889482498168945, "learning_rate": 9.787824321427272e-07, "loss": 0.0429, "step": 88010 }, { "epoch": 0.940434852289118, "grad_norm": 0.029997078701853752, "learning_rate": 9.787775895648858e-07, "loss": 0.0398, "step": 88020 }, { "epoch": 0.9405416956033976, "grad_norm": 3.177265167236328, "learning_rate": 9.787727464464684e-07, "loss": 0.0604, "step": 88030 }, { "epoch": 0.9406485389176772, "grad_norm": 2.0910966396331787, "learning_rate": 9.787679027874805e-07, "loss": 0.0405, "step": 88040 }, { "epoch": 0.9407553822319569, "grad_norm": 7.264773845672607, "learning_rate": 9.787630585879274e-07, "loss": 0.0321, "step": 88050 }, { "epoch": 0.9408622255462364, "grad_norm": 3.3546500205993652, "learning_rate": 9.787582138478145e-07, "loss": 0.0513, "step": 88060 }, { "epoch": 0.940969068860516, "grad_norm": 8.209248542785645, "learning_rate": 9.787533685671477e-07, "loss": 0.0662, "step": 88070 }, { "epoch": 0.9410759121747957, "grad_norm": 0.4542735517024994, "learning_rate": 9.787485227459322e-07, "loss": 0.0198, "step": 88080 }, { "epoch": 0.9411827554890753, "grad_norm": 2.6113829612731934, "learning_rate": 9.787436763841734e-07, "loss": 0.0245, "step": 88090 }, { "epoch": 0.9412895988033548, "grad_norm": 0.6921306252479553, "learning_rate": 9.78738829481877e-07, "loss": 0.0548, "step": 88100 }, { "epoch": 0.9413964421176345, "grad_norm": 0.1773386299610138, "learning_rate": 9.78733982039048e-07, "loss": 0.0356, "step": 88110 }, { "epoch": 0.9415032854319141, "grad_norm": 4.976983070373535, "learning_rate": 9.787291340556925e-07, "loss": 0.0234, "step": 88120 }, { "epoch": 0.9416101287461937, "grad_norm": 3.018249988555908, "learning_rate": 9.787242855318157e-07, "loss": 0.1205, "step": 88130 }, { "epoch": 0.9417169720604733, "grad_norm": 9.300139427185059, "learning_rate": 9.787194364674229e-07, "loss": 0.0323, "step": 88140 }, { "epoch": 0.9418238153747529, "grad_norm": 6.61557149887085, "learning_rate": 9.7871458686252e-07, "loss": 0.0399, "step": 88150 }, { "epoch": 0.9419306586890326, "grad_norm": 6.291170120239258, "learning_rate": 9.787097367171119e-07, "loss": 0.0353, "step": 88160 }, { "epoch": 0.9420375020033122, "grad_norm": 7.841563701629639, "learning_rate": 9.787048860312045e-07, "loss": 0.0315, "step": 88170 }, { "epoch": 0.9421443453175917, "grad_norm": 5.793725967407227, "learning_rate": 9.787000348048028e-07, "loss": 0.0534, "step": 88180 }, { "epoch": 0.9422511886318714, "grad_norm": 0.6620749235153198, "learning_rate": 9.78695183037913e-07, "loss": 0.0265, "step": 88190 }, { "epoch": 0.942358031946151, "grad_norm": 0.1594683974981308, "learning_rate": 9.786903307305402e-07, "loss": 0.0189, "step": 88200 }, { "epoch": 0.9424648752604305, "grad_norm": 7.153675079345703, "learning_rate": 9.786854778826898e-07, "loss": 0.022, "step": 88210 }, { "epoch": 0.9425717185747102, "grad_norm": 7.513640403747559, "learning_rate": 9.786806244943675e-07, "loss": 0.0202, "step": 88220 }, { "epoch": 0.9426785618889898, "grad_norm": 1.4826565980911255, "learning_rate": 9.786757705655785e-07, "loss": 0.0283, "step": 88230 }, { "epoch": 0.9427854052032694, "grad_norm": 2.244156837463379, "learning_rate": 9.786709160963286e-07, "loss": 0.0267, "step": 88240 }, { "epoch": 0.942892248517549, "grad_norm": 9.443184852600098, "learning_rate": 9.78666061086623e-07, "loss": 0.0423, "step": 88250 }, { "epoch": 0.9429990918318286, "grad_norm": 4.628728866577148, "learning_rate": 9.786612055364673e-07, "loss": 0.025, "step": 88260 }, { "epoch": 0.9431059351461082, "grad_norm": 1.5210063457489014, "learning_rate": 9.78656349445867e-07, "loss": 0.0418, "step": 88270 }, { "epoch": 0.9432127784603879, "grad_norm": 5.157075881958008, "learning_rate": 9.786514928148275e-07, "loss": 0.0507, "step": 88280 }, { "epoch": 0.9433196217746674, "grad_norm": 1.3817925453186035, "learning_rate": 9.786466356433543e-07, "loss": 0.0243, "step": 88290 }, { "epoch": 0.943426465088947, "grad_norm": 0.5166773200035095, "learning_rate": 9.78641777931453e-07, "loss": 0.0785, "step": 88300 }, { "epoch": 0.9435333084032267, "grad_norm": 4.039426803588867, "learning_rate": 9.786369196791288e-07, "loss": 0.031, "step": 88310 }, { "epoch": 0.9436401517175063, "grad_norm": 1.687058687210083, "learning_rate": 9.786320608863877e-07, "loss": 0.0153, "step": 88320 }, { "epoch": 0.9437469950317859, "grad_norm": 4.3840436935424805, "learning_rate": 9.786272015532348e-07, "loss": 0.0604, "step": 88330 }, { "epoch": 0.9438538383460655, "grad_norm": 1.658159852027893, "learning_rate": 9.786223416796756e-07, "loss": 0.0621, "step": 88340 }, { "epoch": 0.9439606816603451, "grad_norm": 0.09236179292201996, "learning_rate": 9.786174812657157e-07, "loss": 0.0457, "step": 88350 }, { "epoch": 0.9440675249746248, "grad_norm": 1.2861278057098389, "learning_rate": 9.786126203113605e-07, "loss": 0.0799, "step": 88360 }, { "epoch": 0.9441743682889043, "grad_norm": 1.7434855699539185, "learning_rate": 9.786077588166156e-07, "loss": 0.0378, "step": 88370 }, { "epoch": 0.9442812116031839, "grad_norm": 0.47260162234306335, "learning_rate": 9.786028967814863e-07, "loss": 0.0362, "step": 88380 }, { "epoch": 0.9443880549174636, "grad_norm": 3.115189790725708, "learning_rate": 9.785980342059784e-07, "loss": 0.0295, "step": 88390 }, { "epoch": 0.9444948982317432, "grad_norm": 14.115286827087402, "learning_rate": 9.78593171090097e-07, "loss": 0.0445, "step": 88400 }, { "epoch": 0.9446017415460227, "grad_norm": 19.731752395629883, "learning_rate": 9.785883074338478e-07, "loss": 0.0424, "step": 88410 }, { "epoch": 0.9447085848603024, "grad_norm": 21.143901824951172, "learning_rate": 9.785834432372364e-07, "loss": 0.0513, "step": 88420 }, { "epoch": 0.944815428174582, "grad_norm": 7.130192279815674, "learning_rate": 9.78578578500268e-07, "loss": 0.0213, "step": 88430 }, { "epoch": 0.9449222714888615, "grad_norm": 3.9277610778808594, "learning_rate": 9.785737132229483e-07, "loss": 0.0397, "step": 88440 }, { "epoch": 0.9450291148031412, "grad_norm": 4.309530735015869, "learning_rate": 9.785688474052828e-07, "loss": 0.0331, "step": 88450 }, { "epoch": 0.9451359581174208, "grad_norm": 0.027387011796236038, "learning_rate": 9.78563981047277e-07, "loss": 0.0474, "step": 88460 }, { "epoch": 0.9452428014317004, "grad_norm": 7.5287184715271, "learning_rate": 9.785591141489365e-07, "loss": 0.0363, "step": 88470 }, { "epoch": 0.94534964474598, "grad_norm": 0.24247793853282928, "learning_rate": 9.785542467102664e-07, "loss": 0.0181, "step": 88480 }, { "epoch": 0.9454564880602596, "grad_norm": 0.02312580682337284, "learning_rate": 9.785493787312726e-07, "loss": 0.0335, "step": 88490 }, { "epoch": 0.9455633313745392, "grad_norm": 2.841857671737671, "learning_rate": 9.785445102119602e-07, "loss": 0.0086, "step": 88500 }, { "epoch": 0.9456701746888189, "grad_norm": 0.12671878933906555, "learning_rate": 9.785396411523352e-07, "loss": 0.041, "step": 88510 }, { "epoch": 0.9457770180030984, "grad_norm": 4.013242244720459, "learning_rate": 9.785347715524026e-07, "loss": 0.1046, "step": 88520 }, { "epoch": 0.9458838613173781, "grad_norm": 0.5103904604911804, "learning_rate": 9.785299014121683e-07, "loss": 0.054, "step": 88530 }, { "epoch": 0.9459907046316577, "grad_norm": 1.7706531286239624, "learning_rate": 9.785250307316374e-07, "loss": 0.0118, "step": 88540 }, { "epoch": 0.9460975479459373, "grad_norm": 1.8186469078063965, "learning_rate": 9.78520159510816e-07, "loss": 0.0559, "step": 88550 }, { "epoch": 0.9462043912602169, "grad_norm": 5.76368522644043, "learning_rate": 9.78515287749709e-07, "loss": 0.0212, "step": 88560 }, { "epoch": 0.9463112345744965, "grad_norm": 1.8954144716262817, "learning_rate": 9.78510415448322e-07, "loss": 0.0384, "step": 88570 }, { "epoch": 0.9464180778887761, "grad_norm": 19.758533477783203, "learning_rate": 9.78505542606661e-07, "loss": 0.1343, "step": 88580 }, { "epoch": 0.9465249212030558, "grad_norm": 3.544001340866089, "learning_rate": 9.785006692247306e-07, "loss": 0.0338, "step": 88590 }, { "epoch": 0.9466317645173353, "grad_norm": 0.7330245971679688, "learning_rate": 9.784957953025372e-07, "loss": 0.0211, "step": 88600 }, { "epoch": 0.9467386078316149, "grad_norm": 0.3830215632915497, "learning_rate": 9.784909208400857e-07, "loss": 0.0231, "step": 88610 }, { "epoch": 0.9468454511458946, "grad_norm": 1.7690236568450928, "learning_rate": 9.784860458373822e-07, "loss": 0.0126, "step": 88620 }, { "epoch": 0.9469522944601741, "grad_norm": 0.07699263840913773, "learning_rate": 9.784811702944315e-07, "loss": 0.0277, "step": 88630 }, { "epoch": 0.9470591377744537, "grad_norm": 1.232743501663208, "learning_rate": 9.784762942112397e-07, "loss": 0.0172, "step": 88640 }, { "epoch": 0.9471659810887334, "grad_norm": 0.5629564523696899, "learning_rate": 9.784714175878119e-07, "loss": 0.0617, "step": 88650 }, { "epoch": 0.947272824403013, "grad_norm": 2.8612663745880127, "learning_rate": 9.784665404241536e-07, "loss": 0.0419, "step": 88660 }, { "epoch": 0.9473796677172925, "grad_norm": 5.4970011711120605, "learning_rate": 9.784616627202707e-07, "loss": 0.0711, "step": 88670 }, { "epoch": 0.9474865110315722, "grad_norm": 4.289161682128906, "learning_rate": 9.784567844761684e-07, "loss": 0.0376, "step": 88680 }, { "epoch": 0.9475933543458518, "grad_norm": 1.213026523590088, "learning_rate": 9.784519056918522e-07, "loss": 0.0126, "step": 88690 }, { "epoch": 0.9477001976601315, "grad_norm": 0.041410643607378006, "learning_rate": 9.784470263673278e-07, "loss": 0.0692, "step": 88700 }, { "epoch": 0.947807040974411, "grad_norm": 10.009078025817871, "learning_rate": 9.784421465026008e-07, "loss": 0.087, "step": 88710 }, { "epoch": 0.9479138842886906, "grad_norm": 9.226286888122559, "learning_rate": 9.784372660976763e-07, "loss": 0.0666, "step": 88720 }, { "epoch": 0.9480207276029703, "grad_norm": 0.24823862314224243, "learning_rate": 9.7843238515256e-07, "loss": 0.0328, "step": 88730 }, { "epoch": 0.9481275709172499, "grad_norm": 0.9670917987823486, "learning_rate": 9.784275036672574e-07, "loss": 0.0114, "step": 88740 }, { "epoch": 0.9482344142315294, "grad_norm": 1.411972999572754, "learning_rate": 9.78422621641774e-07, "loss": 0.0733, "step": 88750 }, { "epoch": 0.9483412575458091, "grad_norm": 4.128236293792725, "learning_rate": 9.784177390761155e-07, "loss": 0.0284, "step": 88760 }, { "epoch": 0.9484481008600887, "grad_norm": 4.134304523468018, "learning_rate": 9.78412855970287e-07, "loss": 0.0541, "step": 88770 }, { "epoch": 0.9485549441743683, "grad_norm": 3.9427428245544434, "learning_rate": 9.784079723242945e-07, "loss": 0.0516, "step": 88780 }, { "epoch": 0.9486617874886479, "grad_norm": 2.742733955383301, "learning_rate": 9.784030881381432e-07, "loss": 0.0339, "step": 88790 }, { "epoch": 0.9487686308029275, "grad_norm": 11.291402816772461, "learning_rate": 9.78398203411839e-07, "loss": 0.0371, "step": 88800 }, { "epoch": 0.9488754741172071, "grad_norm": 0.6460257172584534, "learning_rate": 9.783933181453868e-07, "loss": 0.0575, "step": 88810 }, { "epoch": 0.9489823174314868, "grad_norm": 0.03395400568842888, "learning_rate": 9.783884323387926e-07, "loss": 0.0752, "step": 88820 }, { "epoch": 0.9490891607457663, "grad_norm": 2.8993170261383057, "learning_rate": 9.783835459920617e-07, "loss": 0.0426, "step": 88830 }, { "epoch": 0.9491960040600459, "grad_norm": 0.6859579682350159, "learning_rate": 9.783786591051996e-07, "loss": 0.0936, "step": 88840 }, { "epoch": 0.9493028473743256, "grad_norm": 0.3262503445148468, "learning_rate": 9.78373771678212e-07, "loss": 0.0918, "step": 88850 }, { "epoch": 0.9494096906886051, "grad_norm": 1.3766902685165405, "learning_rate": 9.783688837111042e-07, "loss": 0.0278, "step": 88860 }, { "epoch": 0.9495165340028847, "grad_norm": 1.9623432159423828, "learning_rate": 9.783639952038819e-07, "loss": 0.0382, "step": 88870 }, { "epoch": 0.9496233773171644, "grad_norm": 15.781594276428223, "learning_rate": 9.783591061565504e-07, "loss": 0.0704, "step": 88880 }, { "epoch": 0.949730220631444, "grad_norm": 0.3273008465766907, "learning_rate": 9.783542165691155e-07, "loss": 0.0486, "step": 88890 }, { "epoch": 0.9498370639457236, "grad_norm": 0.13143083453178406, "learning_rate": 9.783493264415826e-07, "loss": 0.0593, "step": 88900 }, { "epoch": 0.9499439072600032, "grad_norm": 6.109429836273193, "learning_rate": 9.783444357739572e-07, "loss": 0.0916, "step": 88910 }, { "epoch": 0.9500507505742828, "grad_norm": 0.2348848432302475, "learning_rate": 9.783395445662448e-07, "loss": 0.0274, "step": 88920 }, { "epoch": 0.9501575938885625, "grad_norm": 1.3865759372711182, "learning_rate": 9.78334652818451e-07, "loss": 0.0156, "step": 88930 }, { "epoch": 0.950264437202842, "grad_norm": 0.07106984406709671, "learning_rate": 9.783297605305812e-07, "loss": 0.0522, "step": 88940 }, { "epoch": 0.9503712805171216, "grad_norm": 1.218177318572998, "learning_rate": 9.78324867702641e-07, "loss": 0.0526, "step": 88950 }, { "epoch": 0.9504781238314013, "grad_norm": 0.028217263519763947, "learning_rate": 9.783199743346359e-07, "loss": 0.0287, "step": 88960 }, { "epoch": 0.9505849671456809, "grad_norm": 2.2166213989257812, "learning_rate": 9.783150804265716e-07, "loss": 0.0658, "step": 88970 }, { "epoch": 0.9506918104599604, "grad_norm": 0.44696295261383057, "learning_rate": 9.783101859784535e-07, "loss": 0.0774, "step": 88980 }, { "epoch": 0.9507986537742401, "grad_norm": 4.339958667755127, "learning_rate": 9.78305290990287e-07, "loss": 0.0308, "step": 88990 }, { "epoch": 0.9509054970885197, "grad_norm": 1.498632788658142, "learning_rate": 9.783003954620777e-07, "loss": 0.0252, "step": 89000 }, { "epoch": 0.9510123404027992, "grad_norm": 0.7312383651733398, "learning_rate": 9.78295499393831e-07, "loss": 0.0226, "step": 89010 }, { "epoch": 0.9511191837170789, "grad_norm": 0.15230172872543335, "learning_rate": 9.782906027855528e-07, "loss": 0.0396, "step": 89020 }, { "epoch": 0.9512260270313585, "grad_norm": 4.937405586242676, "learning_rate": 9.782857056372484e-07, "loss": 0.0506, "step": 89030 }, { "epoch": 0.9513328703456381, "grad_norm": 2.3575689792633057, "learning_rate": 9.782808079489233e-07, "loss": 0.0611, "step": 89040 }, { "epoch": 0.9514397136599178, "grad_norm": 1.8312174081802368, "learning_rate": 9.78275909720583e-07, "loss": 0.0075, "step": 89050 }, { "epoch": 0.9515465569741973, "grad_norm": 0.34773460030555725, "learning_rate": 9.782710109522333e-07, "loss": 0.0125, "step": 89060 }, { "epoch": 0.951653400288477, "grad_norm": 4.875104904174805, "learning_rate": 9.782661116438793e-07, "loss": 0.025, "step": 89070 }, { "epoch": 0.9517602436027566, "grad_norm": 8.05990982055664, "learning_rate": 9.782612117955268e-07, "loss": 0.0644, "step": 89080 }, { "epoch": 0.9518670869170361, "grad_norm": 1.313813328742981, "learning_rate": 9.782563114071815e-07, "loss": 0.0382, "step": 89090 }, { "epoch": 0.9519739302313158, "grad_norm": 6.474295139312744, "learning_rate": 9.782514104788484e-07, "loss": 0.0325, "step": 89100 }, { "epoch": 0.9520807735455954, "grad_norm": 5.123045921325684, "learning_rate": 9.782465090105335e-07, "loss": 0.0807, "step": 89110 }, { "epoch": 0.952187616859875, "grad_norm": 4.856807231903076, "learning_rate": 9.782416070022424e-07, "loss": 0.0434, "step": 89120 }, { "epoch": 0.9522944601741546, "grad_norm": 1.7452362775802612, "learning_rate": 9.782367044539804e-07, "loss": 0.1052, "step": 89130 }, { "epoch": 0.9524013034884342, "grad_norm": 43.419010162353516, "learning_rate": 9.78231801365753e-07, "loss": 0.1396, "step": 89140 }, { "epoch": 0.9525081468027138, "grad_norm": 0.0824279859662056, "learning_rate": 9.782268977375657e-07, "loss": 0.0656, "step": 89150 }, { "epoch": 0.9526149901169935, "grad_norm": 1.9219439029693604, "learning_rate": 9.78221993569424e-07, "loss": 0.0471, "step": 89160 }, { "epoch": 0.952721833431273, "grad_norm": 0.582943856716156, "learning_rate": 9.78217088861334e-07, "loss": 0.0112, "step": 89170 }, { "epoch": 0.9528286767455526, "grad_norm": 3.780646324157715, "learning_rate": 9.782121836133004e-07, "loss": 0.05, "step": 89180 }, { "epoch": 0.9529355200598323, "grad_norm": 6.8104424476623535, "learning_rate": 9.782072778253295e-07, "loss": 0.0649, "step": 89190 }, { "epoch": 0.9530423633741119, "grad_norm": 3.639816999435425, "learning_rate": 9.78202371497426e-07, "loss": 0.0185, "step": 89200 }, { "epoch": 0.9531492066883914, "grad_norm": 3.322126865386963, "learning_rate": 9.781974646295965e-07, "loss": 0.0718, "step": 89210 }, { "epoch": 0.9532560500026711, "grad_norm": 4.92783784866333, "learning_rate": 9.781925572218458e-07, "loss": 0.0597, "step": 89220 }, { "epoch": 0.9533628933169507, "grad_norm": 0.4054078161716461, "learning_rate": 9.781876492741795e-07, "loss": 0.0127, "step": 89230 }, { "epoch": 0.9534697366312302, "grad_norm": 4.71602201461792, "learning_rate": 9.781827407866032e-07, "loss": 0.0175, "step": 89240 }, { "epoch": 0.9535765799455099, "grad_norm": 17.271282196044922, "learning_rate": 9.781778317591225e-07, "loss": 0.0698, "step": 89250 }, { "epoch": 0.9536834232597895, "grad_norm": 1.9010820388793945, "learning_rate": 9.78172922191743e-07, "loss": 0.0757, "step": 89260 }, { "epoch": 0.9537902665740692, "grad_norm": 3.080838918685913, "learning_rate": 9.781680120844704e-07, "loss": 0.0209, "step": 89270 }, { "epoch": 0.9538971098883487, "grad_norm": 19.27309799194336, "learning_rate": 9.781631014373097e-07, "loss": 0.048, "step": 89280 }, { "epoch": 0.9540039532026283, "grad_norm": 0.12926095724105835, "learning_rate": 9.781581902502669e-07, "loss": 0.0476, "step": 89290 }, { "epoch": 0.954110796516908, "grad_norm": 4.430113315582275, "learning_rate": 9.781532785233475e-07, "loss": 0.0176, "step": 89300 }, { "epoch": 0.9542176398311876, "grad_norm": 12.208046913146973, "learning_rate": 9.781483662565569e-07, "loss": 0.0296, "step": 89310 }, { "epoch": 0.9543244831454671, "grad_norm": 11.093817710876465, "learning_rate": 9.781434534499007e-07, "loss": 0.0663, "step": 89320 }, { "epoch": 0.9544313264597468, "grad_norm": 7.867152690887451, "learning_rate": 9.781385401033844e-07, "loss": 0.0576, "step": 89330 }, { "epoch": 0.9545381697740264, "grad_norm": 6.651883125305176, "learning_rate": 9.781336262170137e-07, "loss": 0.0377, "step": 89340 }, { "epoch": 0.954645013088306, "grad_norm": 4.430818557739258, "learning_rate": 9.781287117907938e-07, "loss": 0.0656, "step": 89350 }, { "epoch": 0.9547518564025856, "grad_norm": 0.3395298719406128, "learning_rate": 9.781237968247307e-07, "loss": 0.0261, "step": 89360 }, { "epoch": 0.9548586997168652, "grad_norm": 3.310896396636963, "learning_rate": 9.781188813188298e-07, "loss": 0.0546, "step": 89370 }, { "epoch": 0.9549655430311448, "grad_norm": 7.433436870574951, "learning_rate": 9.781139652730965e-07, "loss": 0.0258, "step": 89380 }, { "epoch": 0.9550723863454245, "grad_norm": 0.22852206230163574, "learning_rate": 9.781090486875364e-07, "loss": 0.0715, "step": 89390 }, { "epoch": 0.955179229659704, "grad_norm": 3.3363888263702393, "learning_rate": 9.781041315621553e-07, "loss": 0.0351, "step": 89400 }, { "epoch": 0.9552860729739836, "grad_norm": 0.7149203419685364, "learning_rate": 9.780992138969583e-07, "loss": 0.0344, "step": 89410 }, { "epoch": 0.9553929162882633, "grad_norm": 0.3971562683582306, "learning_rate": 9.780942956919515e-07, "loss": 0.0417, "step": 89420 }, { "epoch": 0.9554997596025429, "grad_norm": 8.833407402038574, "learning_rate": 9.7808937694714e-07, "loss": 0.0359, "step": 89430 }, { "epoch": 0.9556066029168225, "grad_norm": 0.04877854511141777, "learning_rate": 9.780844576625295e-07, "loss": 0.0163, "step": 89440 }, { "epoch": 0.9557134462311021, "grad_norm": 7.419868469238281, "learning_rate": 9.780795378381255e-07, "loss": 0.078, "step": 89450 }, { "epoch": 0.9558202895453817, "grad_norm": 2.5576963424682617, "learning_rate": 9.780746174739337e-07, "loss": 0.0312, "step": 89460 }, { "epoch": 0.9559271328596614, "grad_norm": 0.020462077111005783, "learning_rate": 9.780696965699596e-07, "loss": 0.0936, "step": 89470 }, { "epoch": 0.9560339761739409, "grad_norm": 6.918805122375488, "learning_rate": 9.780647751262086e-07, "loss": 0.0564, "step": 89480 }, { "epoch": 0.9561408194882205, "grad_norm": 0.010984715074300766, "learning_rate": 9.780598531426865e-07, "loss": 0.0712, "step": 89490 }, { "epoch": 0.9562476628025002, "grad_norm": 0.16583630442619324, "learning_rate": 9.780549306193988e-07, "loss": 0.014, "step": 89500 }, { "epoch": 0.9563545061167797, "grad_norm": 0.49111655354499817, "learning_rate": 9.78050007556351e-07, "loss": 0.027, "step": 89510 }, { "epoch": 0.9564613494310593, "grad_norm": 0.08911322057247162, "learning_rate": 9.780450839535484e-07, "loss": 0.043, "step": 89520 }, { "epoch": 0.956568192745339, "grad_norm": 9.692736625671387, "learning_rate": 9.78040159810997e-07, "loss": 0.0437, "step": 89530 }, { "epoch": 0.9566750360596186, "grad_norm": 0.5859962105751038, "learning_rate": 9.780352351287022e-07, "loss": 0.0819, "step": 89540 }, { "epoch": 0.9567818793738981, "grad_norm": 3.4379665851593018, "learning_rate": 9.780303099066695e-07, "loss": 0.0194, "step": 89550 }, { "epoch": 0.9568887226881778, "grad_norm": 4.100914001464844, "learning_rate": 9.780253841449046e-07, "loss": 0.0208, "step": 89560 }, { "epoch": 0.9569955660024574, "grad_norm": 0.05795535817742348, "learning_rate": 9.78020457843413e-07, "loss": 0.0098, "step": 89570 }, { "epoch": 0.957102409316737, "grad_norm": 0.013187968172132969, "learning_rate": 9.780155310022e-07, "loss": 0.085, "step": 89580 }, { "epoch": 0.9572092526310166, "grad_norm": 0.7881935834884644, "learning_rate": 9.780106036212716e-07, "loss": 0.0183, "step": 89590 }, { "epoch": 0.9573160959452962, "grad_norm": 5.497631072998047, "learning_rate": 9.78005675700633e-07, "loss": 0.116, "step": 89600 }, { "epoch": 0.9574229392595758, "grad_norm": 0.18566381931304932, "learning_rate": 9.7800074724029e-07, "loss": 0.0509, "step": 89610 }, { "epoch": 0.9575297825738555, "grad_norm": 8.086457252502441, "learning_rate": 9.77995818240248e-07, "loss": 0.0379, "step": 89620 }, { "epoch": 0.957636625888135, "grad_norm": 4.862969875335693, "learning_rate": 9.779908887005127e-07, "loss": 0.0398, "step": 89630 }, { "epoch": 0.9577434692024147, "grad_norm": 4.3206987380981445, "learning_rate": 9.779859586210898e-07, "loss": 0.1242, "step": 89640 }, { "epoch": 0.9578503125166943, "grad_norm": 0.18728859722614288, "learning_rate": 9.779810280019844e-07, "loss": 0.0198, "step": 89650 }, { "epoch": 0.9579571558309738, "grad_norm": 1.2908744812011719, "learning_rate": 9.779760968432023e-07, "loss": 0.028, "step": 89660 }, { "epoch": 0.9580639991452535, "grad_norm": 0.37558743357658386, "learning_rate": 9.779711651447494e-07, "loss": 0.0649, "step": 89670 }, { "epoch": 0.9581708424595331, "grad_norm": 0.04178072139620781, "learning_rate": 9.779662329066307e-07, "loss": 0.0432, "step": 89680 }, { "epoch": 0.9582776857738127, "grad_norm": 0.05232102423906326, "learning_rate": 9.779613001288524e-07, "loss": 0.0425, "step": 89690 }, { "epoch": 0.9583845290880924, "grad_norm": 1.5448864698410034, "learning_rate": 9.779563668114193e-07, "loss": 0.0305, "step": 89700 }, { "epoch": 0.9584913724023719, "grad_norm": 5.254105567932129, "learning_rate": 9.779514329543376e-07, "loss": 0.051, "step": 89710 }, { "epoch": 0.9585982157166515, "grad_norm": 2.7897841930389404, "learning_rate": 9.779464985576127e-07, "loss": 0.0201, "step": 89720 }, { "epoch": 0.9587050590309312, "grad_norm": 0.4209417402744293, "learning_rate": 9.7794156362125e-07, "loss": 0.0267, "step": 89730 }, { "epoch": 0.9588119023452107, "grad_norm": 0.31100034713745117, "learning_rate": 9.779366281452552e-07, "loss": 0.0491, "step": 89740 }, { "epoch": 0.9589187456594903, "grad_norm": 0.1158326044678688, "learning_rate": 9.779316921296341e-07, "loss": 0.0405, "step": 89750 }, { "epoch": 0.95902558897377, "grad_norm": 0.11206082999706268, "learning_rate": 9.779267555743918e-07, "loss": 0.0197, "step": 89760 }, { "epoch": 0.9591324322880496, "grad_norm": 1.759835958480835, "learning_rate": 9.779218184795343e-07, "loss": 0.0477, "step": 89770 }, { "epoch": 0.9592392756023291, "grad_norm": 3.4913883209228516, "learning_rate": 9.779168808450668e-07, "loss": 0.0311, "step": 89780 }, { "epoch": 0.9593461189166088, "grad_norm": 0.22126632928848267, "learning_rate": 9.779119426709953e-07, "loss": 0.0208, "step": 89790 }, { "epoch": 0.9594529622308884, "grad_norm": 0.01624251902103424, "learning_rate": 9.77907003957325e-07, "loss": 0.0323, "step": 89800 }, { "epoch": 0.9595598055451681, "grad_norm": 4.091274738311768, "learning_rate": 9.779020647040617e-07, "loss": 0.0539, "step": 89810 }, { "epoch": 0.9596666488594476, "grad_norm": 4.224453926086426, "learning_rate": 9.778971249112109e-07, "loss": 0.0269, "step": 89820 }, { "epoch": 0.9597734921737272, "grad_norm": 0.4459250271320343, "learning_rate": 9.77892184578778e-07, "loss": 0.0857, "step": 89830 }, { "epoch": 0.9598803354880069, "grad_norm": 4.820727348327637, "learning_rate": 9.77887243706769e-07, "loss": 0.0304, "step": 89840 }, { "epoch": 0.9599871788022865, "grad_norm": 0.3571094572544098, "learning_rate": 9.77882302295189e-07, "loss": 0.0422, "step": 89850 }, { "epoch": 0.960094022116566, "grad_norm": 1.2206475734710693, "learning_rate": 9.77877360344044e-07, "loss": 0.0367, "step": 89860 }, { "epoch": 0.9602008654308457, "grad_norm": 1.944125771522522, "learning_rate": 9.778724178533393e-07, "loss": 0.047, "step": 89870 }, { "epoch": 0.9603077087451253, "grad_norm": 0.4812425673007965, "learning_rate": 9.778674748230807e-07, "loss": 0.0357, "step": 89880 }, { "epoch": 0.9604145520594048, "grad_norm": 2.7145235538482666, "learning_rate": 9.778625312532736e-07, "loss": 0.0358, "step": 89890 }, { "epoch": 0.9605213953736845, "grad_norm": 1.5927003622055054, "learning_rate": 9.778575871439236e-07, "loss": 0.0931, "step": 89900 }, { "epoch": 0.9606282386879641, "grad_norm": 4.944826126098633, "learning_rate": 9.778526424950363e-07, "loss": 0.0874, "step": 89910 }, { "epoch": 0.9607350820022437, "grad_norm": 1.0149259567260742, "learning_rate": 9.778476973066171e-07, "loss": 0.0396, "step": 89920 }, { "epoch": 0.9608419253165233, "grad_norm": 4.317785739898682, "learning_rate": 9.778427515786723e-07, "loss": 0.0662, "step": 89930 }, { "epoch": 0.9609487686308029, "grad_norm": 1.239220142364502, "learning_rate": 9.778378053112067e-07, "loss": 0.0149, "step": 89940 }, { "epoch": 0.9610556119450825, "grad_norm": 3.03902006149292, "learning_rate": 9.77832858504226e-07, "loss": 0.0515, "step": 89950 }, { "epoch": 0.9611624552593622, "grad_norm": 3.676647663116455, "learning_rate": 9.77827911157736e-07, "loss": 0.0258, "step": 89960 }, { "epoch": 0.9612692985736417, "grad_norm": 1.3391780853271484, "learning_rate": 9.778229632717423e-07, "loss": 0.0189, "step": 89970 }, { "epoch": 0.9613761418879213, "grad_norm": 0.07609979808330536, "learning_rate": 9.778180148462504e-07, "loss": 0.0478, "step": 89980 }, { "epoch": 0.961482985202201, "grad_norm": 0.2637346684932709, "learning_rate": 9.77813065881266e-07, "loss": 0.0196, "step": 89990 }, { "epoch": 0.9615898285164806, "grad_norm": 8.339942932128906, "learning_rate": 9.778081163767943e-07, "loss": 0.041, "step": 90000 }, { "epoch": 0.9616966718307602, "grad_norm": 0.09387237578630447, "learning_rate": 9.778031663328414e-07, "loss": 0.051, "step": 90010 }, { "epoch": 0.9618035151450398, "grad_norm": 1.4041725397109985, "learning_rate": 9.777982157494126e-07, "loss": 0.0272, "step": 90020 }, { "epoch": 0.9619103584593194, "grad_norm": 7.299520492553711, "learning_rate": 9.777932646265137e-07, "loss": 0.0464, "step": 90030 }, { "epoch": 0.9620172017735991, "grad_norm": 0.2728872001171112, "learning_rate": 9.7778831296415e-07, "loss": 0.0511, "step": 90040 }, { "epoch": 0.9621240450878786, "grad_norm": 3.307828903198242, "learning_rate": 9.777833607623273e-07, "loss": 0.0754, "step": 90050 }, { "epoch": 0.9622308884021582, "grad_norm": 2.4120659828186035, "learning_rate": 9.77778408021051e-07, "loss": 0.0678, "step": 90060 }, { "epoch": 0.9623377317164379, "grad_norm": 4.927586555480957, "learning_rate": 9.77773454740327e-07, "loss": 0.0636, "step": 90070 }, { "epoch": 0.9624445750307175, "grad_norm": 5.472184181213379, "learning_rate": 9.777685009201605e-07, "loss": 0.0646, "step": 90080 }, { "epoch": 0.962551418344997, "grad_norm": 2.451296806335449, "learning_rate": 9.777635465605574e-07, "loss": 0.0307, "step": 90090 }, { "epoch": 0.9626582616592767, "grad_norm": 2.5720691680908203, "learning_rate": 9.777585916615232e-07, "loss": 0.0376, "step": 90100 }, { "epoch": 0.9627651049735563, "grad_norm": 0.8546983003616333, "learning_rate": 9.777536362230635e-07, "loss": 0.0188, "step": 90110 }, { "epoch": 0.9628719482878358, "grad_norm": 4.763490200042725, "learning_rate": 9.77748680245184e-07, "loss": 0.0713, "step": 90120 }, { "epoch": 0.9629787916021155, "grad_norm": 5.1297125816345215, "learning_rate": 9.7774372372789e-07, "loss": 0.0427, "step": 90130 }, { "epoch": 0.9630856349163951, "grad_norm": 0.9277447462081909, "learning_rate": 9.777387666711874e-07, "loss": 0.0448, "step": 90140 }, { "epoch": 0.9631924782306747, "grad_norm": 2.0424106121063232, "learning_rate": 9.777338090750815e-07, "loss": 0.0642, "step": 90150 }, { "epoch": 0.9632993215449543, "grad_norm": 4.79610013961792, "learning_rate": 9.777288509395783e-07, "loss": 0.0154, "step": 90160 }, { "epoch": 0.9634061648592339, "grad_norm": 3.4139795303344727, "learning_rate": 9.77723892264683e-07, "loss": 0.0288, "step": 90170 }, { "epoch": 0.9635130081735136, "grad_norm": 4.034844398498535, "learning_rate": 9.777189330504016e-07, "loss": 0.0412, "step": 90180 }, { "epoch": 0.9636198514877932, "grad_norm": 2.737630844116211, "learning_rate": 9.777139732967393e-07, "loss": 0.0703, "step": 90190 }, { "epoch": 0.9637266948020727, "grad_norm": 0.2730221152305603, "learning_rate": 9.777090130037017e-07, "loss": 0.0445, "step": 90200 }, { "epoch": 0.9638335381163524, "grad_norm": 4.244418621063232, "learning_rate": 9.77704052171295e-07, "loss": 0.0117, "step": 90210 }, { "epoch": 0.963940381430632, "grad_norm": 7.562344551086426, "learning_rate": 9.77699090799524e-07, "loss": 0.0681, "step": 90220 }, { "epoch": 0.9640472247449116, "grad_norm": 1.6489707231521606, "learning_rate": 9.77694128888395e-07, "loss": 0.015, "step": 90230 }, { "epoch": 0.9641540680591912, "grad_norm": 1.506848931312561, "learning_rate": 9.77689166437913e-07, "loss": 0.0501, "step": 90240 }, { "epoch": 0.9642609113734708, "grad_norm": 2.351891279220581, "learning_rate": 9.77684203448084e-07, "loss": 0.0601, "step": 90250 }, { "epoch": 0.9643677546877504, "grad_norm": 0.011494033969938755, "learning_rate": 9.776792399189134e-07, "loss": 0.0366, "step": 90260 }, { "epoch": 0.9644745980020301, "grad_norm": 0.8608587980270386, "learning_rate": 9.776742758504068e-07, "loss": 0.0394, "step": 90270 }, { "epoch": 0.9645814413163096, "grad_norm": 0.20709356665611267, "learning_rate": 9.7766931124257e-07, "loss": 0.0111, "step": 90280 }, { "epoch": 0.9646882846305892, "grad_norm": 3.5841450691223145, "learning_rate": 9.776643460954085e-07, "loss": 0.0742, "step": 90290 }, { "epoch": 0.9647951279448689, "grad_norm": 1.8359460830688477, "learning_rate": 9.776593804089278e-07, "loss": 0.0274, "step": 90300 }, { "epoch": 0.9649019712591484, "grad_norm": 0.01351244281977415, "learning_rate": 9.776544141831338e-07, "loss": 0.0355, "step": 90310 }, { "epoch": 0.965008814573428, "grad_norm": 10.339719772338867, "learning_rate": 9.776494474180318e-07, "loss": 0.0335, "step": 90320 }, { "epoch": 0.9651156578877077, "grad_norm": 10.807798385620117, "learning_rate": 9.776444801136277e-07, "loss": 0.0306, "step": 90330 }, { "epoch": 0.9652225012019873, "grad_norm": 6.170412063598633, "learning_rate": 9.776395122699267e-07, "loss": 0.1293, "step": 90340 }, { "epoch": 0.9653293445162668, "grad_norm": 5.353976249694824, "learning_rate": 9.776345438869346e-07, "loss": 0.0596, "step": 90350 }, { "epoch": 0.9654361878305465, "grad_norm": 2.175548791885376, "learning_rate": 9.776295749646573e-07, "loss": 0.0285, "step": 90360 }, { "epoch": 0.9655430311448261, "grad_norm": 8.212127685546875, "learning_rate": 9.776246055031e-07, "loss": 0.0485, "step": 90370 }, { "epoch": 0.9656498744591058, "grad_norm": 1.9546936750411987, "learning_rate": 9.776196355022687e-07, "loss": 0.0413, "step": 90380 }, { "epoch": 0.9657567177733853, "grad_norm": 5.202980995178223, "learning_rate": 9.776146649621684e-07, "loss": 0.0993, "step": 90390 }, { "epoch": 0.9658635610876649, "grad_norm": 0.6184790730476379, "learning_rate": 9.776096938828053e-07, "loss": 0.0836, "step": 90400 }, { "epoch": 0.9659704044019446, "grad_norm": 5.55327844619751, "learning_rate": 9.776047222641849e-07, "loss": 0.0391, "step": 90410 }, { "epoch": 0.9660772477162242, "grad_norm": 4.756024360656738, "learning_rate": 9.775997501063127e-07, "loss": 0.0213, "step": 90420 }, { "epoch": 0.9661840910305037, "grad_norm": 5.814054489135742, "learning_rate": 9.775947774091942e-07, "loss": 0.0623, "step": 90430 }, { "epoch": 0.9662909343447834, "grad_norm": 3.415882110595703, "learning_rate": 9.775898041728352e-07, "loss": 0.1009, "step": 90440 }, { "epoch": 0.966397777659063, "grad_norm": 4.116949558258057, "learning_rate": 9.775848303972413e-07, "loss": 0.0426, "step": 90450 }, { "epoch": 0.9665046209733426, "grad_norm": 6.3125834465026855, "learning_rate": 9.77579856082418e-07, "loss": 0.0662, "step": 90460 }, { "epoch": 0.9666114642876222, "grad_norm": 0.9512787461280823, "learning_rate": 9.77574881228371e-07, "loss": 0.029, "step": 90470 }, { "epoch": 0.9667183076019018, "grad_norm": 1.7536567449569702, "learning_rate": 9.77569905835106e-07, "loss": 0.0466, "step": 90480 }, { "epoch": 0.9668251509161814, "grad_norm": 1.9790562391281128, "learning_rate": 9.775649299026285e-07, "loss": 0.0323, "step": 90490 }, { "epoch": 0.9669319942304611, "grad_norm": 4.921974182128906, "learning_rate": 9.775599534309442e-07, "loss": 0.0248, "step": 90500 }, { "epoch": 0.9670388375447406, "grad_norm": 4.809384346008301, "learning_rate": 9.775549764200586e-07, "loss": 0.0469, "step": 90510 }, { "epoch": 0.9671456808590202, "grad_norm": 1.8542578220367432, "learning_rate": 9.775499988699774e-07, "loss": 0.062, "step": 90520 }, { "epoch": 0.9672525241732999, "grad_norm": 8.902198791503906, "learning_rate": 9.775450207807062e-07, "loss": 0.042, "step": 90530 }, { "epoch": 0.9673593674875794, "grad_norm": 0.22906111180782318, "learning_rate": 9.775400421522507e-07, "loss": 0.0459, "step": 90540 }, { "epoch": 0.9674662108018591, "grad_norm": 3.1027162075042725, "learning_rate": 9.775350629846164e-07, "loss": 0.0261, "step": 90550 }, { "epoch": 0.9675730541161387, "grad_norm": 6.871677398681641, "learning_rate": 9.775300832778092e-07, "loss": 0.0425, "step": 90560 }, { "epoch": 0.9676798974304183, "grad_norm": 4.123419284820557, "learning_rate": 9.775251030318341e-07, "loss": 0.0309, "step": 90570 }, { "epoch": 0.967786740744698, "grad_norm": 0.02742931991815567, "learning_rate": 9.775201222466974e-07, "loss": 0.0275, "step": 90580 }, { "epoch": 0.9678935840589775, "grad_norm": 5.140036582946777, "learning_rate": 9.775151409224043e-07, "loss": 0.0334, "step": 90590 }, { "epoch": 0.9680004273732571, "grad_norm": 4.701327323913574, "learning_rate": 9.775101590589606e-07, "loss": 0.023, "step": 90600 }, { "epoch": 0.9681072706875368, "grad_norm": 8.37576675415039, "learning_rate": 9.775051766563719e-07, "loss": 0.0781, "step": 90610 }, { "epoch": 0.9682141140018163, "grad_norm": 1.4616804122924805, "learning_rate": 9.775001937146437e-07, "loss": 0.0166, "step": 90620 }, { "epoch": 0.9683209573160959, "grad_norm": 7.155869007110596, "learning_rate": 9.774952102337819e-07, "loss": 0.0589, "step": 90630 }, { "epoch": 0.9684278006303756, "grad_norm": 4.535029888153076, "learning_rate": 9.774902262137918e-07, "loss": 0.046, "step": 90640 }, { "epoch": 0.9685346439446552, "grad_norm": 1.9241585731506348, "learning_rate": 9.774852416546793e-07, "loss": 0.0595, "step": 90650 }, { "epoch": 0.9686414872589347, "grad_norm": 6.030017852783203, "learning_rate": 9.7748025655645e-07, "loss": 0.0395, "step": 90660 }, { "epoch": 0.9687483305732144, "grad_norm": 3.3697874546051025, "learning_rate": 9.774752709191093e-07, "loss": 0.0383, "step": 90670 }, { "epoch": 0.968855173887494, "grad_norm": 1.7383381128311157, "learning_rate": 9.774702847426632e-07, "loss": 0.0417, "step": 90680 }, { "epoch": 0.9689620172017736, "grad_norm": 6.80549430847168, "learning_rate": 9.77465298027117e-07, "loss": 0.0616, "step": 90690 }, { "epoch": 0.9690688605160532, "grad_norm": 3.281365394592285, "learning_rate": 9.774603107724765e-07, "loss": 0.0908, "step": 90700 }, { "epoch": 0.9691757038303328, "grad_norm": 3.892430543899536, "learning_rate": 9.77455322978747e-07, "loss": 0.0498, "step": 90710 }, { "epoch": 0.9692825471446124, "grad_norm": 2.718158483505249, "learning_rate": 9.774503346459348e-07, "loss": 0.1158, "step": 90720 }, { "epoch": 0.969389390458892, "grad_norm": 10.002246856689453, "learning_rate": 9.774453457740448e-07, "loss": 0.0507, "step": 90730 }, { "epoch": 0.9694962337731716, "grad_norm": 0.23494602739810944, "learning_rate": 9.774403563630832e-07, "loss": 0.0569, "step": 90740 }, { "epoch": 0.9696030770874513, "grad_norm": 0.06306034326553345, "learning_rate": 9.774353664130553e-07, "loss": 0.0362, "step": 90750 }, { "epoch": 0.9697099204017309, "grad_norm": 0.6057693362236023, "learning_rate": 9.774303759239667e-07, "loss": 0.0446, "step": 90760 }, { "epoch": 0.9698167637160104, "grad_norm": 8.030004501342773, "learning_rate": 9.774253848958234e-07, "loss": 0.0775, "step": 90770 }, { "epoch": 0.9699236070302901, "grad_norm": 0.02614402398467064, "learning_rate": 9.774203933286307e-07, "loss": 0.0204, "step": 90780 }, { "epoch": 0.9700304503445697, "grad_norm": 3.3529319763183594, "learning_rate": 9.774154012223944e-07, "loss": 0.1012, "step": 90790 }, { "epoch": 0.9701372936588493, "grad_norm": 2.088900566101074, "learning_rate": 9.774104085771202e-07, "loss": 0.0414, "step": 90800 }, { "epoch": 0.970244136973129, "grad_norm": 2.9335639476776123, "learning_rate": 9.774054153928134e-07, "loss": 0.0677, "step": 90810 }, { "epoch": 0.9703509802874085, "grad_norm": 0.39191171526908875, "learning_rate": 9.7740042166948e-07, "loss": 0.0509, "step": 90820 }, { "epoch": 0.9704578236016881, "grad_norm": 1.142202615737915, "learning_rate": 9.773954274071255e-07, "loss": 0.0416, "step": 90830 }, { "epoch": 0.9705646669159678, "grad_norm": 8.828341484069824, "learning_rate": 9.773904326057554e-07, "loss": 0.0337, "step": 90840 }, { "epoch": 0.9706715102302473, "grad_norm": 4.579033374786377, "learning_rate": 9.773854372653754e-07, "loss": 0.0435, "step": 90850 }, { "epoch": 0.9707783535445269, "grad_norm": 1.3869410753250122, "learning_rate": 9.773804413859915e-07, "loss": 0.0116, "step": 90860 }, { "epoch": 0.9708851968588066, "grad_norm": 4.569618225097656, "learning_rate": 9.773754449676087e-07, "loss": 0.0466, "step": 90870 }, { "epoch": 0.9709920401730862, "grad_norm": 6.561339855194092, "learning_rate": 9.773704480102333e-07, "loss": 0.0461, "step": 90880 }, { "epoch": 0.9710988834873657, "grad_norm": 3.378455877304077, "learning_rate": 9.773654505138705e-07, "loss": 0.0384, "step": 90890 }, { "epoch": 0.9712057268016454, "grad_norm": 0.03972858190536499, "learning_rate": 9.773604524785262e-07, "loss": 0.0449, "step": 90900 }, { "epoch": 0.971312570115925, "grad_norm": 2.4670190811157227, "learning_rate": 9.773554539042058e-07, "loss": 0.0163, "step": 90910 }, { "epoch": 0.9714194134302047, "grad_norm": 8.31977653503418, "learning_rate": 9.773504547909151e-07, "loss": 0.0156, "step": 90920 }, { "epoch": 0.9715262567444842, "grad_norm": 0.09947190433740616, "learning_rate": 9.773454551386597e-07, "loss": 0.0277, "step": 90930 }, { "epoch": 0.9716331000587638, "grad_norm": 0.013074054382741451, "learning_rate": 9.77340454947445e-07, "loss": 0.0305, "step": 90940 }, { "epoch": 0.9717399433730435, "grad_norm": 1.9925850629806519, "learning_rate": 9.773354542172773e-07, "loss": 0.0451, "step": 90950 }, { "epoch": 0.971846786687323, "grad_norm": 4.35461950302124, "learning_rate": 9.773304529481616e-07, "loss": 0.0648, "step": 90960 }, { "epoch": 0.9719536300016026, "grad_norm": 4.215958595275879, "learning_rate": 9.77325451140104e-07, "loss": 0.0283, "step": 90970 }, { "epoch": 0.9720604733158823, "grad_norm": 0.04626312479376793, "learning_rate": 9.773204487931098e-07, "loss": 0.0314, "step": 90980 }, { "epoch": 0.9721673166301619, "grad_norm": 16.059402465820312, "learning_rate": 9.773154459071847e-07, "loss": 0.0894, "step": 90990 }, { "epoch": 0.9722741599444414, "grad_norm": 6.042038917541504, "learning_rate": 9.773104424823346e-07, "loss": 0.0229, "step": 91000 }, { "epoch": 0.9723810032587211, "grad_norm": 6.140327453613281, "learning_rate": 9.77305438518565e-07, "loss": 0.0845, "step": 91010 }, { "epoch": 0.9724878465730007, "grad_norm": 0.3162052631378174, "learning_rate": 9.773004340158815e-07, "loss": 0.043, "step": 91020 }, { "epoch": 0.9725946898872803, "grad_norm": 4.835000991821289, "learning_rate": 9.772954289742898e-07, "loss": 0.0252, "step": 91030 }, { "epoch": 0.9727015332015599, "grad_norm": 3.908106565475464, "learning_rate": 9.772904233937956e-07, "loss": 0.0309, "step": 91040 }, { "epoch": 0.9728083765158395, "grad_norm": 8.683087348937988, "learning_rate": 9.772854172744042e-07, "loss": 0.1232, "step": 91050 }, { "epoch": 0.9729152198301191, "grad_norm": 5.228766918182373, "learning_rate": 9.772804106161218e-07, "loss": 0.0479, "step": 91060 }, { "epoch": 0.9730220631443988, "grad_norm": 3.1564741134643555, "learning_rate": 9.772754034189537e-07, "loss": 0.0253, "step": 91070 }, { "epoch": 0.9731289064586783, "grad_norm": 2.6543033123016357, "learning_rate": 9.772703956829055e-07, "loss": 0.0452, "step": 91080 }, { "epoch": 0.9732357497729579, "grad_norm": 0.1082988753914833, "learning_rate": 9.772653874079834e-07, "loss": 0.0469, "step": 91090 }, { "epoch": 0.9733425930872376, "grad_norm": 3.708726406097412, "learning_rate": 9.772603785941923e-07, "loss": 0.075, "step": 91100 }, { "epoch": 0.9734494364015172, "grad_norm": 6.148220539093018, "learning_rate": 9.772553692415384e-07, "loss": 0.0705, "step": 91110 }, { "epoch": 0.9735562797157968, "grad_norm": 5.374856948852539, "learning_rate": 9.772503593500272e-07, "loss": 0.0888, "step": 91120 }, { "epoch": 0.9736631230300764, "grad_norm": 3.3578572273254395, "learning_rate": 9.772453489196642e-07, "loss": 0.084, "step": 91130 }, { "epoch": 0.973769966344356, "grad_norm": 3.3589725494384766, "learning_rate": 9.772403379504553e-07, "loss": 0.0292, "step": 91140 }, { "epoch": 0.9738768096586357, "grad_norm": 5.778581142425537, "learning_rate": 9.77235326442406e-07, "loss": 0.0436, "step": 91150 }, { "epoch": 0.9739836529729152, "grad_norm": 0.3698411285877228, "learning_rate": 9.772303143955218e-07, "loss": 0.039, "step": 91160 }, { "epoch": 0.9740904962871948, "grad_norm": 0.21673397719860077, "learning_rate": 9.772253018098087e-07, "loss": 0.0343, "step": 91170 }, { "epoch": 0.9741973396014745, "grad_norm": 15.631424903869629, "learning_rate": 9.772202886852724e-07, "loss": 0.0369, "step": 91180 }, { "epoch": 0.974304182915754, "grad_norm": 4.841933250427246, "learning_rate": 9.77215275021918e-07, "loss": 0.0095, "step": 91190 }, { "epoch": 0.9744110262300336, "grad_norm": 10.783514976501465, "learning_rate": 9.772102608197518e-07, "loss": 0.0657, "step": 91200 }, { "epoch": 0.9745178695443133, "grad_norm": 0.46620169281959534, "learning_rate": 9.772052460787793e-07, "loss": 0.0123, "step": 91210 }, { "epoch": 0.9746247128585929, "grad_norm": 4.1043701171875, "learning_rate": 9.772002307990057e-07, "loss": 0.0537, "step": 91220 }, { "epoch": 0.9747315561728724, "grad_norm": 8.508151054382324, "learning_rate": 9.771952149804372e-07, "loss": 0.0379, "step": 91230 }, { "epoch": 0.9748383994871521, "grad_norm": 3.0472631454467773, "learning_rate": 9.771901986230793e-07, "loss": 0.0343, "step": 91240 }, { "epoch": 0.9749452428014317, "grad_norm": 13.13017749786377, "learning_rate": 9.771851817269377e-07, "loss": 0.0221, "step": 91250 }, { "epoch": 0.9750520861157113, "grad_norm": 9.251128196716309, "learning_rate": 9.77180164292018e-07, "loss": 0.0355, "step": 91260 }, { "epoch": 0.9751589294299909, "grad_norm": 0.01370952557772398, "learning_rate": 9.771751463183258e-07, "loss": 0.0548, "step": 91270 }, { "epoch": 0.9752657727442705, "grad_norm": 7.613104820251465, "learning_rate": 9.77170127805867e-07, "loss": 0.07, "step": 91280 }, { "epoch": 0.9753726160585502, "grad_norm": 1.48544180393219, "learning_rate": 9.77165108754647e-07, "loss": 0.0497, "step": 91290 }, { "epoch": 0.9754794593728298, "grad_norm": 1.3492735624313354, "learning_rate": 9.771600891646716e-07, "loss": 0.0725, "step": 91300 }, { "epoch": 0.9755863026871093, "grad_norm": 0.088699109852314, "learning_rate": 9.771550690359465e-07, "loss": 0.0968, "step": 91310 }, { "epoch": 0.975693146001389, "grad_norm": 6.802286148071289, "learning_rate": 9.771500483684771e-07, "loss": 0.085, "step": 91320 }, { "epoch": 0.9757999893156686, "grad_norm": 1.9853824377059937, "learning_rate": 9.771450271622694e-07, "loss": 0.0295, "step": 91330 }, { "epoch": 0.9759068326299482, "grad_norm": 0.7534142136573792, "learning_rate": 9.771400054173292e-07, "loss": 0.1125, "step": 91340 }, { "epoch": 0.9760136759442278, "grad_norm": 3.1525046825408936, "learning_rate": 9.771349831336617e-07, "loss": 0.0363, "step": 91350 }, { "epoch": 0.9761205192585074, "grad_norm": 5.785358905792236, "learning_rate": 9.771299603112727e-07, "loss": 0.0384, "step": 91360 }, { "epoch": 0.976227362572787, "grad_norm": 0.06414636969566345, "learning_rate": 9.77124936950168e-07, "loss": 0.0557, "step": 91370 }, { "epoch": 0.9763342058870667, "grad_norm": 5.697888374328613, "learning_rate": 9.771199130503534e-07, "loss": 0.0586, "step": 91380 }, { "epoch": 0.9764410492013462, "grad_norm": 0.289661705493927, "learning_rate": 9.771148886118343e-07, "loss": 0.0525, "step": 91390 }, { "epoch": 0.9765478925156258, "grad_norm": 5.3346781730651855, "learning_rate": 9.771098636346164e-07, "loss": 0.0668, "step": 91400 }, { "epoch": 0.9766547358299055, "grad_norm": 1.4602041244506836, "learning_rate": 9.771048381187056e-07, "loss": 0.0573, "step": 91410 }, { "epoch": 0.976761579144185, "grad_norm": 4.795928955078125, "learning_rate": 9.770998120641074e-07, "loss": 0.0283, "step": 91420 }, { "epoch": 0.9768684224584646, "grad_norm": 5.5856428146362305, "learning_rate": 9.770947854708277e-07, "loss": 0.0369, "step": 91430 }, { "epoch": 0.9769752657727443, "grad_norm": 5.956584453582764, "learning_rate": 9.770897583388715e-07, "loss": 0.0322, "step": 91440 }, { "epoch": 0.9770821090870239, "grad_norm": 3.1248056888580322, "learning_rate": 9.770847306682453e-07, "loss": 0.0385, "step": 91450 }, { "epoch": 0.9771889524013034, "grad_norm": 5.608513832092285, "learning_rate": 9.770797024589544e-07, "loss": 0.0455, "step": 91460 }, { "epoch": 0.9772957957155831, "grad_norm": 0.25748470425605774, "learning_rate": 9.770746737110044e-07, "loss": 0.023, "step": 91470 }, { "epoch": 0.9774026390298627, "grad_norm": 3.730114221572876, "learning_rate": 9.770696444244011e-07, "loss": 0.0842, "step": 91480 }, { "epoch": 0.9775094823441424, "grad_norm": 3.564666509628296, "learning_rate": 9.770646145991503e-07, "loss": 0.057, "step": 91490 }, { "epoch": 0.9776163256584219, "grad_norm": 7.367702484130859, "learning_rate": 9.770595842352573e-07, "loss": 0.049, "step": 91500 }, { "epoch": 0.9777231689727015, "grad_norm": 0.2792355716228485, "learning_rate": 9.770545533327284e-07, "loss": 0.0393, "step": 91510 }, { "epoch": 0.9778300122869812, "grad_norm": 2.416090726852417, "learning_rate": 9.770495218915686e-07, "loss": 0.0547, "step": 91520 }, { "epoch": 0.9779368556012608, "grad_norm": 4.26269006729126, "learning_rate": 9.77044489911784e-07, "loss": 0.0419, "step": 91530 }, { "epoch": 0.9780436989155403, "grad_norm": 0.5834575891494751, "learning_rate": 9.770394573933801e-07, "loss": 0.0368, "step": 91540 }, { "epoch": 0.97815054222982, "grad_norm": 4.01267147064209, "learning_rate": 9.770344243363626e-07, "loss": 0.1208, "step": 91550 }, { "epoch": 0.9782573855440996, "grad_norm": 0.44003918766975403, "learning_rate": 9.770293907407373e-07, "loss": 0.0543, "step": 91560 }, { "epoch": 0.9783642288583791, "grad_norm": 11.181954383850098, "learning_rate": 9.770243566065099e-07, "loss": 0.0281, "step": 91570 }, { "epoch": 0.9784710721726588, "grad_norm": 0.8431616425514221, "learning_rate": 9.77019321933686e-07, "loss": 0.1253, "step": 91580 }, { "epoch": 0.9785779154869384, "grad_norm": 1.383424997329712, "learning_rate": 9.770142867222712e-07, "loss": 0.0239, "step": 91590 }, { "epoch": 0.978684758801218, "grad_norm": 0.008099289610981941, "learning_rate": 9.770092509722712e-07, "loss": 0.0873, "step": 91600 }, { "epoch": 0.9787916021154976, "grad_norm": 2.1606264114379883, "learning_rate": 9.77004214683692e-07, "loss": 0.0404, "step": 91610 }, { "epoch": 0.9788984454297772, "grad_norm": 0.7718096375465393, "learning_rate": 9.769991778565388e-07, "loss": 0.0499, "step": 91620 }, { "epoch": 0.9790052887440568, "grad_norm": 6.086650848388672, "learning_rate": 9.769941404908175e-07, "loss": 0.051, "step": 91630 }, { "epoch": 0.9791121320583365, "grad_norm": 4.19389533996582, "learning_rate": 9.76989102586534e-07, "loss": 0.0655, "step": 91640 }, { "epoch": 0.979218975372616, "grad_norm": 3.4585623741149902, "learning_rate": 9.769840641436938e-07, "loss": 0.0597, "step": 91650 }, { "epoch": 0.9793258186868957, "grad_norm": 3.051642417907715, "learning_rate": 9.769790251623025e-07, "loss": 0.0318, "step": 91660 }, { "epoch": 0.9794326620011753, "grad_norm": 8.732158660888672, "learning_rate": 9.769739856423657e-07, "loss": 0.1044, "step": 91670 }, { "epoch": 0.9795395053154549, "grad_norm": 4.186001777648926, "learning_rate": 9.769689455838897e-07, "loss": 0.0492, "step": 91680 }, { "epoch": 0.9796463486297345, "grad_norm": 0.14181900024414062, "learning_rate": 9.769639049868794e-07, "loss": 0.0659, "step": 91690 }, { "epoch": 0.9797531919440141, "grad_norm": 16.73141098022461, "learning_rate": 9.76958863851341e-07, "loss": 0.0724, "step": 91700 }, { "epoch": 0.9798600352582937, "grad_norm": 10.446691513061523, "learning_rate": 9.769538221772799e-07, "loss": 0.0417, "step": 91710 }, { "epoch": 0.9799668785725734, "grad_norm": 6.7693634033203125, "learning_rate": 9.76948779964702e-07, "loss": 0.0518, "step": 91720 }, { "epoch": 0.9800737218868529, "grad_norm": 7.840898036956787, "learning_rate": 9.76943737213613e-07, "loss": 0.057, "step": 91730 }, { "epoch": 0.9801805652011325, "grad_norm": 1.1223649978637695, "learning_rate": 9.769386939240187e-07, "loss": 0.0565, "step": 91740 }, { "epoch": 0.9802874085154122, "grad_norm": 1.611427903175354, "learning_rate": 9.769336500959244e-07, "loss": 0.0138, "step": 91750 }, { "epoch": 0.9803942518296918, "grad_norm": 11.584532737731934, "learning_rate": 9.76928605729336e-07, "loss": 0.0746, "step": 91760 }, { "epoch": 0.9805010951439713, "grad_norm": 0.8894651532173157, "learning_rate": 9.769235608242592e-07, "loss": 0.0483, "step": 91770 }, { "epoch": 0.980607938458251, "grad_norm": 15.26616096496582, "learning_rate": 9.769185153806999e-07, "loss": 0.0511, "step": 91780 }, { "epoch": 0.9807147817725306, "grad_norm": 9.814005851745605, "learning_rate": 9.769134693986634e-07, "loss": 0.0233, "step": 91790 }, { "epoch": 0.9808216250868101, "grad_norm": 5.354064464569092, "learning_rate": 9.769084228781557e-07, "loss": 0.067, "step": 91800 }, { "epoch": 0.9809284684010898, "grad_norm": 0.7742258310317993, "learning_rate": 9.769033758191823e-07, "loss": 0.0295, "step": 91810 }, { "epoch": 0.9810353117153694, "grad_norm": 0.2002604454755783, "learning_rate": 9.768983282217492e-07, "loss": 0.0472, "step": 91820 }, { "epoch": 0.981142155029649, "grad_norm": 0.7968023419380188, "learning_rate": 9.768932800858617e-07, "loss": 0.0257, "step": 91830 }, { "epoch": 0.9812489983439286, "grad_norm": 6.747221946716309, "learning_rate": 9.768882314115256e-07, "loss": 0.1066, "step": 91840 }, { "epoch": 0.9813558416582082, "grad_norm": 0.3527168929576874, "learning_rate": 9.76883182198747e-07, "loss": 0.0529, "step": 91850 }, { "epoch": 0.9814626849724879, "grad_norm": 7.530283451080322, "learning_rate": 9.768781324475311e-07, "loss": 0.1005, "step": 91860 }, { "epoch": 0.9815695282867675, "grad_norm": 4.548709392547607, "learning_rate": 9.768730821578839e-07, "loss": 0.0427, "step": 91870 }, { "epoch": 0.981676371601047, "grad_norm": 5.6747589111328125, "learning_rate": 9.768680313298108e-07, "loss": 0.0492, "step": 91880 }, { "epoch": 0.9817832149153267, "grad_norm": 5.522730827331543, "learning_rate": 9.768629799633178e-07, "loss": 0.0362, "step": 91890 }, { "epoch": 0.9818900582296063, "grad_norm": 0.6396436095237732, "learning_rate": 9.768579280584105e-07, "loss": 0.0707, "step": 91900 }, { "epoch": 0.9819969015438859, "grad_norm": 1.21061372756958, "learning_rate": 9.768528756150947e-07, "loss": 0.0216, "step": 91910 }, { "epoch": 0.9821037448581655, "grad_norm": 0.1532905399799347, "learning_rate": 9.76847822633376e-07, "loss": 0.0297, "step": 91920 }, { "epoch": 0.9822105881724451, "grad_norm": 1.2919918298721313, "learning_rate": 9.7684276911326e-07, "loss": 0.0471, "step": 91930 }, { "epoch": 0.9823174314867247, "grad_norm": 3.203874349594116, "learning_rate": 9.768377150547527e-07, "loss": 0.0455, "step": 91940 }, { "epoch": 0.9824242748010044, "grad_norm": 7.555558681488037, "learning_rate": 9.768326604578596e-07, "loss": 0.045, "step": 91950 }, { "epoch": 0.9825311181152839, "grad_norm": 3.9252073764801025, "learning_rate": 9.768276053225863e-07, "loss": 0.0132, "step": 91960 }, { "epoch": 0.9826379614295635, "grad_norm": 3.470273733139038, "learning_rate": 9.768225496489387e-07, "loss": 0.0952, "step": 91970 }, { "epoch": 0.9827448047438432, "grad_norm": 0.7653926014900208, "learning_rate": 9.768174934369226e-07, "loss": 0.0187, "step": 91980 }, { "epoch": 0.9828516480581228, "grad_norm": 9.697677612304688, "learning_rate": 9.768124366865434e-07, "loss": 0.0883, "step": 91990 }, { "epoch": 0.9829584913724023, "grad_norm": 0.04302757978439331, "learning_rate": 9.768073793978068e-07, "loss": 0.0304, "step": 92000 }, { "epoch": 0.983065334686682, "grad_norm": 1.4595504999160767, "learning_rate": 9.768023215707191e-07, "loss": 0.0302, "step": 92010 }, { "epoch": 0.9831721780009616, "grad_norm": 3.0395023822784424, "learning_rate": 9.767972632052852e-07, "loss": 0.0649, "step": 92020 }, { "epoch": 0.9832790213152413, "grad_norm": 1.2496097087860107, "learning_rate": 9.767922043015115e-07, "loss": 0.0394, "step": 92030 }, { "epoch": 0.9833858646295208, "grad_norm": 0.35061532258987427, "learning_rate": 9.767871448594033e-07, "loss": 0.0267, "step": 92040 }, { "epoch": 0.9834927079438004, "grad_norm": 2.2537362575531006, "learning_rate": 9.767820848789664e-07, "loss": 0.0708, "step": 92050 }, { "epoch": 0.9835995512580801, "grad_norm": 10.301351547241211, "learning_rate": 9.767770243602067e-07, "loss": 0.0451, "step": 92060 }, { "epoch": 0.9837063945723596, "grad_norm": 5.703777313232422, "learning_rate": 9.767719633031295e-07, "loss": 0.0502, "step": 92070 }, { "epoch": 0.9838132378866392, "grad_norm": 12.311830520629883, "learning_rate": 9.76766901707741e-07, "loss": 0.0598, "step": 92080 }, { "epoch": 0.9839200812009189, "grad_norm": 0.04204336181282997, "learning_rate": 9.767618395740466e-07, "loss": 0.0354, "step": 92090 }, { "epoch": 0.9840269245151985, "grad_norm": 0.07518494874238968, "learning_rate": 9.76756776902052e-07, "loss": 0.0417, "step": 92100 }, { "epoch": 0.984133767829478, "grad_norm": 2.6922855377197266, "learning_rate": 9.76751713691763e-07, "loss": 0.0241, "step": 92110 }, { "epoch": 0.9842406111437577, "grad_norm": 6.927524566650391, "learning_rate": 9.767466499431856e-07, "loss": 0.0296, "step": 92120 }, { "epoch": 0.9843474544580373, "grad_norm": 2.822786331176758, "learning_rate": 9.76741585656325e-07, "loss": 0.0281, "step": 92130 }, { "epoch": 0.9844542977723169, "grad_norm": 5.173951148986816, "learning_rate": 9.767365208311873e-07, "loss": 0.0119, "step": 92140 }, { "epoch": 0.9845611410865965, "grad_norm": 0.1146470308303833, "learning_rate": 9.76731455467778e-07, "loss": 0.0644, "step": 92150 }, { "epoch": 0.9846679844008761, "grad_norm": 0.01364958193153143, "learning_rate": 9.76726389566103e-07, "loss": 0.0267, "step": 92160 }, { "epoch": 0.9847748277151557, "grad_norm": 6.3906426429748535, "learning_rate": 9.767213231261677e-07, "loss": 0.0565, "step": 92170 }, { "epoch": 0.9848816710294354, "grad_norm": 3.678107976913452, "learning_rate": 9.767162561479783e-07, "loss": 0.0709, "step": 92180 }, { "epoch": 0.9849885143437149, "grad_norm": 0.04804152995347977, "learning_rate": 9.767111886315402e-07, "loss": 0.0204, "step": 92190 }, { "epoch": 0.9850953576579945, "grad_norm": 11.558515548706055, "learning_rate": 9.767061205768592e-07, "loss": 0.0187, "step": 92200 }, { "epoch": 0.9852022009722742, "grad_norm": 13.947907447814941, "learning_rate": 9.76701051983941e-07, "loss": 0.0811, "step": 92210 }, { "epoch": 0.9853090442865537, "grad_norm": 5.44883394241333, "learning_rate": 9.766959828527914e-07, "loss": 0.047, "step": 92220 }, { "epoch": 0.9854158876008334, "grad_norm": 0.04749124497175217, "learning_rate": 9.76690913183416e-07, "loss": 0.0431, "step": 92230 }, { "epoch": 0.985522730915113, "grad_norm": 3.92887020111084, "learning_rate": 9.766858429758205e-07, "loss": 0.0444, "step": 92240 }, { "epoch": 0.9856295742293926, "grad_norm": 0.5324549078941345, "learning_rate": 9.766807722300108e-07, "loss": 0.0324, "step": 92250 }, { "epoch": 0.9857364175436722, "grad_norm": 2.542612075805664, "learning_rate": 9.766757009459925e-07, "loss": 0.0653, "step": 92260 }, { "epoch": 0.9858432608579518, "grad_norm": 6.903903007507324, "learning_rate": 9.766706291237714e-07, "loss": 0.0375, "step": 92270 }, { "epoch": 0.9859501041722314, "grad_norm": 2.834892511367798, "learning_rate": 9.76665556763353e-07, "loss": 0.086, "step": 92280 }, { "epoch": 0.9860569474865111, "grad_norm": 7.33048152923584, "learning_rate": 9.766604838647434e-07, "loss": 0.019, "step": 92290 }, { "epoch": 0.9861637908007906, "grad_norm": 11.189400672912598, "learning_rate": 9.76655410427948e-07, "loss": 0.0782, "step": 92300 }, { "epoch": 0.9862706341150702, "grad_norm": 1.5216704607009888, "learning_rate": 9.766503364529728e-07, "loss": 0.0686, "step": 92310 }, { "epoch": 0.9863774774293499, "grad_norm": 0.9627013206481934, "learning_rate": 9.766452619398234e-07, "loss": 0.0264, "step": 92320 }, { "epoch": 0.9864843207436295, "grad_norm": 7.755649566650391, "learning_rate": 9.766401868885056e-07, "loss": 0.054, "step": 92330 }, { "epoch": 0.986591164057909, "grad_norm": 4.690139293670654, "learning_rate": 9.766351112990247e-07, "loss": 0.0309, "step": 92340 }, { "epoch": 0.9866980073721887, "grad_norm": 4.549459457397461, "learning_rate": 9.76630035171387e-07, "loss": 0.0353, "step": 92350 }, { "epoch": 0.9868048506864683, "grad_norm": 0.3425479233264923, "learning_rate": 9.766249585055982e-07, "loss": 0.0304, "step": 92360 }, { "epoch": 0.9869116940007479, "grad_norm": 0.3744020462036133, "learning_rate": 9.766198813016637e-07, "loss": 0.0585, "step": 92370 }, { "epoch": 0.9870185373150275, "grad_norm": 0.05105717107653618, "learning_rate": 9.766148035595893e-07, "loss": 0.0158, "step": 92380 }, { "epoch": 0.9871253806293071, "grad_norm": 2.0669667720794678, "learning_rate": 9.766097252793808e-07, "loss": 0.0581, "step": 92390 }, { "epoch": 0.9872322239435868, "grad_norm": 1.862446665763855, "learning_rate": 9.76604646461044e-07, "loss": 0.0147, "step": 92400 }, { "epoch": 0.9873390672578664, "grad_norm": 0.1492559313774109, "learning_rate": 9.765995671045848e-07, "loss": 0.0977, "step": 92410 }, { "epoch": 0.9874459105721459, "grad_norm": 11.908475875854492, "learning_rate": 9.765944872100084e-07, "loss": 0.0602, "step": 92420 }, { "epoch": 0.9875527538864256, "grad_norm": 0.17739346623420715, "learning_rate": 9.76589406777321e-07, "loss": 0.0378, "step": 92430 }, { "epoch": 0.9876595972007052, "grad_norm": 0.14221739768981934, "learning_rate": 9.765843258065281e-07, "loss": 0.0213, "step": 92440 }, { "epoch": 0.9877664405149847, "grad_norm": 1.9817885160446167, "learning_rate": 9.765792442976357e-07, "loss": 0.0315, "step": 92450 }, { "epoch": 0.9878732838292644, "grad_norm": 1.1597422361373901, "learning_rate": 9.765741622506493e-07, "loss": 0.0508, "step": 92460 }, { "epoch": 0.987980127143544, "grad_norm": 9.989100456237793, "learning_rate": 9.765690796655746e-07, "loss": 0.0976, "step": 92470 }, { "epoch": 0.9880869704578236, "grad_norm": 8.683833122253418, "learning_rate": 9.765639965424176e-07, "loss": 0.0737, "step": 92480 }, { "epoch": 0.9881938137721032, "grad_norm": 0.041292283684015274, "learning_rate": 9.765589128811838e-07, "loss": 0.0463, "step": 92490 }, { "epoch": 0.9883006570863828, "grad_norm": 9.664685249328613, "learning_rate": 9.765538286818788e-07, "loss": 0.0734, "step": 92500 }, { "epoch": 0.9884075004006624, "grad_norm": 0.020380312576889992, "learning_rate": 9.765487439445088e-07, "loss": 0.0676, "step": 92510 }, { "epoch": 0.9885143437149421, "grad_norm": 8.197221755981445, "learning_rate": 9.765436586690794e-07, "loss": 0.0173, "step": 92520 }, { "epoch": 0.9886211870292216, "grad_norm": 7.4997029304504395, "learning_rate": 9.76538572855596e-07, "loss": 0.0737, "step": 92530 }, { "epoch": 0.9887280303435012, "grad_norm": 0.035453807562589645, "learning_rate": 9.765334865040647e-07, "loss": 0.0429, "step": 92540 }, { "epoch": 0.9888348736577809, "grad_norm": 7.946826457977295, "learning_rate": 9.765283996144912e-07, "loss": 0.02, "step": 92550 }, { "epoch": 0.9889417169720605, "grad_norm": 0.023676937445998192, "learning_rate": 9.76523312186881e-07, "loss": 0.0398, "step": 92560 }, { "epoch": 0.98904856028634, "grad_norm": 3.7084109783172607, "learning_rate": 9.765182242212403e-07, "loss": 0.0801, "step": 92570 }, { "epoch": 0.9891554036006197, "grad_norm": 0.12645836174488068, "learning_rate": 9.765131357175741e-07, "loss": 0.0468, "step": 92580 }, { "epoch": 0.9892622469148993, "grad_norm": 15.586892127990723, "learning_rate": 9.76508046675889e-07, "loss": 0.0702, "step": 92590 }, { "epoch": 0.989369090229179, "grad_norm": 2.1632015705108643, "learning_rate": 9.765029570961902e-07, "loss": 0.0614, "step": 92600 }, { "epoch": 0.9894759335434585, "grad_norm": 2.6505236625671387, "learning_rate": 9.764978669784838e-07, "loss": 0.0592, "step": 92610 }, { "epoch": 0.9895827768577381, "grad_norm": 0.2635064423084259, "learning_rate": 9.76492776322775e-07, "loss": 0.0518, "step": 92620 }, { "epoch": 0.9896896201720178, "grad_norm": 4.109877586364746, "learning_rate": 9.764876851290702e-07, "loss": 0.0544, "step": 92630 }, { "epoch": 0.9897964634862974, "grad_norm": 3.1579339504241943, "learning_rate": 9.764825933973747e-07, "loss": 0.0865, "step": 92640 }, { "epoch": 0.9899033068005769, "grad_norm": 6.616633892059326, "learning_rate": 9.764775011276944e-07, "loss": 0.1028, "step": 92650 }, { "epoch": 0.9900101501148566, "grad_norm": 3.033763885498047, "learning_rate": 9.76472408320035e-07, "loss": 0.0489, "step": 92660 }, { "epoch": 0.9901169934291362, "grad_norm": 3.2253365516662598, "learning_rate": 9.764673149744022e-07, "loss": 0.0247, "step": 92670 }, { "epoch": 0.9902238367434157, "grad_norm": 15.443991661071777, "learning_rate": 9.76462221090802e-07, "loss": 0.0448, "step": 92680 }, { "epoch": 0.9903306800576954, "grad_norm": 1.6695588827133179, "learning_rate": 9.7645712666924e-07, "loss": 0.0385, "step": 92690 }, { "epoch": 0.990437523371975, "grad_norm": 0.6184245944023132, "learning_rate": 9.76452031709722e-07, "loss": 0.0342, "step": 92700 }, { "epoch": 0.9905443666862546, "grad_norm": 0.030369501560926437, "learning_rate": 9.764469362122536e-07, "loss": 0.0391, "step": 92710 }, { "epoch": 0.9906512100005342, "grad_norm": 0.42527177929878235, "learning_rate": 9.764418401768406e-07, "loss": 0.0435, "step": 92720 }, { "epoch": 0.9907580533148138, "grad_norm": 2.2739672660827637, "learning_rate": 9.764367436034888e-07, "loss": 0.0414, "step": 92730 }, { "epoch": 0.9908648966290934, "grad_norm": 0.12869110703468323, "learning_rate": 9.76431646492204e-07, "loss": 0.0372, "step": 92740 }, { "epoch": 0.9909717399433731, "grad_norm": 3.936917781829834, "learning_rate": 9.76426548842992e-07, "loss": 0.0236, "step": 92750 }, { "epoch": 0.9910785832576526, "grad_norm": 3.2977466583251953, "learning_rate": 9.764214506558584e-07, "loss": 0.0388, "step": 92760 }, { "epoch": 0.9911854265719323, "grad_norm": 0.1433887630701065, "learning_rate": 9.764163519308092e-07, "loss": 0.0355, "step": 92770 }, { "epoch": 0.9912922698862119, "grad_norm": 2.8213107585906982, "learning_rate": 9.764112526678497e-07, "loss": 0.0706, "step": 92780 }, { "epoch": 0.9913991132004915, "grad_norm": 0.6635496020317078, "learning_rate": 9.76406152866986e-07, "loss": 0.0974, "step": 92790 }, { "epoch": 0.9915059565147711, "grad_norm": 3.602668046951294, "learning_rate": 9.764010525282238e-07, "loss": 0.0286, "step": 92800 }, { "epoch": 0.9916127998290507, "grad_norm": 4.632473945617676, "learning_rate": 9.76395951651569e-07, "loss": 0.026, "step": 92810 }, { "epoch": 0.9917196431433303, "grad_norm": 3.357863187789917, "learning_rate": 9.76390850237027e-07, "loss": 0.0314, "step": 92820 }, { "epoch": 0.99182648645761, "grad_norm": 2.424347162246704, "learning_rate": 9.763857482846038e-07, "loss": 0.0471, "step": 92830 }, { "epoch": 0.9919333297718895, "grad_norm": 8.404906272888184, "learning_rate": 9.763806457943054e-07, "loss": 0.0663, "step": 92840 }, { "epoch": 0.9920401730861691, "grad_norm": 0.3493247628211975, "learning_rate": 9.763755427661372e-07, "loss": 0.0748, "step": 92850 }, { "epoch": 0.9921470164004488, "grad_norm": 0.2905158996582031, "learning_rate": 9.763704392001049e-07, "loss": 0.0391, "step": 92860 }, { "epoch": 0.9922538597147283, "grad_norm": 0.07798591256141663, "learning_rate": 9.763653350962145e-07, "loss": 0.0227, "step": 92870 }, { "epoch": 0.9923607030290079, "grad_norm": 0.10585285723209381, "learning_rate": 9.763602304544715e-07, "loss": 0.034, "step": 92880 }, { "epoch": 0.9924675463432876, "grad_norm": 0.9885070323944092, "learning_rate": 9.763551252748821e-07, "loss": 0.0447, "step": 92890 }, { "epoch": 0.9925743896575672, "grad_norm": 0.1272135227918625, "learning_rate": 9.763500195574518e-07, "loss": 0.0172, "step": 92900 }, { "epoch": 0.9926812329718467, "grad_norm": 3.5482215881347656, "learning_rate": 9.763449133021863e-07, "loss": 0.0608, "step": 92910 }, { "epoch": 0.9927880762861264, "grad_norm": 0.7727323174476624, "learning_rate": 9.763398065090914e-07, "loss": 0.0621, "step": 92920 }, { "epoch": 0.992894919600406, "grad_norm": 0.17222411930561066, "learning_rate": 9.76334699178173e-07, "loss": 0.047, "step": 92930 }, { "epoch": 0.9930017629146856, "grad_norm": 0.4228537380695343, "learning_rate": 9.763295913094368e-07, "loss": 0.0958, "step": 92940 }, { "epoch": 0.9931086062289652, "grad_norm": 5.992094993591309, "learning_rate": 9.763244829028886e-07, "loss": 0.0048, "step": 92950 }, { "epoch": 0.9932154495432448, "grad_norm": 0.02474677748978138, "learning_rate": 9.763193739585338e-07, "loss": 0.0257, "step": 92960 }, { "epoch": 0.9933222928575245, "grad_norm": 5.460602760314941, "learning_rate": 9.763142644763787e-07, "loss": 0.0334, "step": 92970 }, { "epoch": 0.9934291361718041, "grad_norm": 1.7598272562026978, "learning_rate": 9.763091544564288e-07, "loss": 0.0823, "step": 92980 }, { "epoch": 0.9935359794860836, "grad_norm": 8.55970573425293, "learning_rate": 9.7630404389869e-07, "loss": 0.0551, "step": 92990 }, { "epoch": 0.9936428228003633, "grad_norm": 4.173396587371826, "learning_rate": 9.762989328031679e-07, "loss": 0.0297, "step": 93000 }, { "epoch": 0.9937496661146429, "grad_norm": 3.6003801822662354, "learning_rate": 9.762938211698684e-07, "loss": 0.0349, "step": 93010 }, { "epoch": 0.9938565094289225, "grad_norm": 0.04478152096271515, "learning_rate": 9.76288708998797e-07, "loss": 0.0356, "step": 93020 }, { "epoch": 0.9939633527432021, "grad_norm": 0.0099837901070714, "learning_rate": 9.7628359628996e-07, "loss": 0.0369, "step": 93030 }, { "epoch": 0.9940701960574817, "grad_norm": 0.9284777641296387, "learning_rate": 9.762784830433629e-07, "loss": 0.0643, "step": 93040 }, { "epoch": 0.9941770393717613, "grad_norm": 7.913635730743408, "learning_rate": 9.762733692590112e-07, "loss": 0.0804, "step": 93050 }, { "epoch": 0.994283882686041, "grad_norm": 0.926693856716156, "learning_rate": 9.76268254936911e-07, "loss": 0.0206, "step": 93060 }, { "epoch": 0.9943907260003205, "grad_norm": 0.6756818890571594, "learning_rate": 9.76263140077068e-07, "loss": 0.0191, "step": 93070 }, { "epoch": 0.9944975693146001, "grad_norm": 4.260715961456299, "learning_rate": 9.76258024679488e-07, "loss": 0.0585, "step": 93080 }, { "epoch": 0.9946044126288798, "grad_norm": 3.896428346633911, "learning_rate": 9.762529087441768e-07, "loss": 0.0448, "step": 93090 }, { "epoch": 0.9947112559431593, "grad_norm": 0.14109915494918823, "learning_rate": 9.762477922711401e-07, "loss": 0.0261, "step": 93100 }, { "epoch": 0.9948180992574389, "grad_norm": 4.001534938812256, "learning_rate": 9.762426752603836e-07, "loss": 0.0402, "step": 93110 }, { "epoch": 0.9949249425717186, "grad_norm": 0.6666138768196106, "learning_rate": 9.762375577119132e-07, "loss": 0.0668, "step": 93120 }, { "epoch": 0.9950317858859982, "grad_norm": 4.314294815063477, "learning_rate": 9.76232439625735e-07, "loss": 0.0619, "step": 93130 }, { "epoch": 0.9951386292002778, "grad_norm": 1.7043049335479736, "learning_rate": 9.76227321001854e-07, "loss": 0.0302, "step": 93140 }, { "epoch": 0.9952454725145574, "grad_norm": 0.14838232100009918, "learning_rate": 9.762222018402764e-07, "loss": 0.0481, "step": 93150 }, { "epoch": 0.995352315828837, "grad_norm": 0.045242972671985626, "learning_rate": 9.762170821410083e-07, "loss": 0.0535, "step": 93160 }, { "epoch": 0.9954591591431167, "grad_norm": 3.4039928913116455, "learning_rate": 9.76211961904055e-07, "loss": 0.0394, "step": 93170 }, { "epoch": 0.9955660024573962, "grad_norm": 2.8231515884399414, "learning_rate": 9.762068411294224e-07, "loss": 0.0468, "step": 93180 }, { "epoch": 0.9956728457716758, "grad_norm": 4.371371269226074, "learning_rate": 9.762017198171164e-07, "loss": 0.0219, "step": 93190 }, { "epoch": 0.9957796890859555, "grad_norm": 0.8342003226280212, "learning_rate": 9.761965979671428e-07, "loss": 0.0258, "step": 93200 }, { "epoch": 0.9958865324002351, "grad_norm": 1.8421481847763062, "learning_rate": 9.76191475579507e-07, "loss": 0.0419, "step": 93210 }, { "epoch": 0.9959933757145146, "grad_norm": 0.48870420455932617, "learning_rate": 9.761863526542155e-07, "loss": 0.0539, "step": 93220 }, { "epoch": 0.9961002190287943, "grad_norm": 3.522170305252075, "learning_rate": 9.761812291912734e-07, "loss": 0.0749, "step": 93230 }, { "epoch": 0.9962070623430739, "grad_norm": 3.9378421306610107, "learning_rate": 9.76176105190687e-07, "loss": 0.0236, "step": 93240 }, { "epoch": 0.9963139056573534, "grad_norm": 0.2517857849597931, "learning_rate": 9.761709806524615e-07, "loss": 0.0402, "step": 93250 }, { "epoch": 0.9964207489716331, "grad_norm": 4.076085567474365, "learning_rate": 9.761658555766033e-07, "loss": 0.0194, "step": 93260 }, { "epoch": 0.9965275922859127, "grad_norm": 3.4446375370025635, "learning_rate": 9.761607299631178e-07, "loss": 0.045, "step": 93270 }, { "epoch": 0.9966344356001923, "grad_norm": 13.301749229431152, "learning_rate": 9.761556038120107e-07, "loss": 0.1434, "step": 93280 }, { "epoch": 0.996741278914472, "grad_norm": 15.977128028869629, "learning_rate": 9.761504771232883e-07, "loss": 0.0955, "step": 93290 }, { "epoch": 0.9968481222287515, "grad_norm": 1.6328632831573486, "learning_rate": 9.76145349896956e-07, "loss": 0.0454, "step": 93300 }, { "epoch": 0.9969549655430311, "grad_norm": 8.05388069152832, "learning_rate": 9.761402221330194e-07, "loss": 0.0856, "step": 93310 }, { "epoch": 0.9970618088573108, "grad_norm": 0.053758226335048676, "learning_rate": 9.76135093831485e-07, "loss": 0.0445, "step": 93320 }, { "epoch": 0.9971686521715903, "grad_norm": 0.03302258625626564, "learning_rate": 9.761299649923579e-07, "loss": 0.0424, "step": 93330 }, { "epoch": 0.99727549548587, "grad_norm": 0.0395660325884819, "learning_rate": 9.761248356156441e-07, "loss": 0.0277, "step": 93340 }, { "epoch": 0.9973823388001496, "grad_norm": 0.11868418008089066, "learning_rate": 9.761197057013495e-07, "loss": 0.0654, "step": 93350 }, { "epoch": 0.9974891821144292, "grad_norm": 4.452608585357666, "learning_rate": 9.761145752494797e-07, "loss": 0.0313, "step": 93360 }, { "epoch": 0.9975960254287088, "grad_norm": 3.212063789367676, "learning_rate": 9.761094442600408e-07, "loss": 0.0257, "step": 93370 }, { "epoch": 0.9977028687429884, "grad_norm": 4.32447624206543, "learning_rate": 9.761043127330382e-07, "loss": 0.0246, "step": 93380 }, { "epoch": 0.997809712057268, "grad_norm": 4.6026291847229, "learning_rate": 9.76099180668478e-07, "loss": 0.0311, "step": 93390 }, { "epoch": 0.9979165553715477, "grad_norm": 0.5807027816772461, "learning_rate": 9.76094048066366e-07, "loss": 0.0387, "step": 93400 }, { "epoch": 0.9980233986858272, "grad_norm": 0.06954381614923477, "learning_rate": 9.760889149267077e-07, "loss": 0.0515, "step": 93410 }, { "epoch": 0.9981302420001068, "grad_norm": 7.967606544494629, "learning_rate": 9.760837812495092e-07, "loss": 0.013, "step": 93420 }, { "epoch": 0.9982370853143865, "grad_norm": 1.5377000570297241, "learning_rate": 9.760786470347762e-07, "loss": 0.0792, "step": 93430 }, { "epoch": 0.998343928628666, "grad_norm": 3.7397375106811523, "learning_rate": 9.760735122825144e-07, "loss": 0.0551, "step": 93440 }, { "epoch": 0.9984507719429456, "grad_norm": 0.8222266435623169, "learning_rate": 9.760683769927297e-07, "loss": 0.0413, "step": 93450 }, { "epoch": 0.9985576152572253, "grad_norm": 2.6398909091949463, "learning_rate": 9.760632411654279e-07, "loss": 0.0489, "step": 93460 }, { "epoch": 0.9986644585715049, "grad_norm": 0.13906192779541016, "learning_rate": 9.760581048006146e-07, "loss": 0.0187, "step": 93470 }, { "epoch": 0.9987713018857844, "grad_norm": 4.350696563720703, "learning_rate": 9.76052967898296e-07, "loss": 0.0362, "step": 93480 }, { "epoch": 0.9988781452000641, "grad_norm": 0.9829966425895691, "learning_rate": 9.760478304584777e-07, "loss": 0.026, "step": 93490 }, { "epoch": 0.9989849885143437, "grad_norm": 3.1644625663757324, "learning_rate": 9.760426924811652e-07, "loss": 0.1171, "step": 93500 }, { "epoch": 0.9990918318286234, "grad_norm": 2.4857587814331055, "learning_rate": 9.760375539663649e-07, "loss": 0.051, "step": 93510 }, { "epoch": 0.999198675142903, "grad_norm": 6.055521011352539, "learning_rate": 9.760324149140819e-07, "loss": 0.0394, "step": 93520 }, { "epoch": 0.9993055184571825, "grad_norm": 0.2683391571044922, "learning_rate": 9.760272753243226e-07, "loss": 0.0584, "step": 93530 }, { "epoch": 0.9994123617714622, "grad_norm": 5.963533401489258, "learning_rate": 9.760221351970925e-07, "loss": 0.0742, "step": 93540 }, { "epoch": 0.9995192050857418, "grad_norm": 0.9392845630645752, "learning_rate": 9.760169945323978e-07, "loss": 0.0185, "step": 93550 }, { "epoch": 0.9996260484000213, "grad_norm": 4.538995742797852, "learning_rate": 9.760118533302435e-07, "loss": 0.0613, "step": 93560 }, { "epoch": 0.999732891714301, "grad_norm": 6.342100620269775, "learning_rate": 9.760067115906362e-07, "loss": 0.0732, "step": 93570 }, { "epoch": 0.9998397350285806, "grad_norm": 0.026949239894747734, "learning_rate": 9.760015693135813e-07, "loss": 0.0436, "step": 93580 }, { "epoch": 0.9999465783428602, "grad_norm": 0.3962198793888092, "learning_rate": 9.759964264990847e-07, "loss": 0.0566, "step": 93590 }, { "epoch": 1.0, "eval_accuracy": 0.5698503565330924, "eval_cer": 0.06490070775785062, "eval_loss": 0.04691227525472641, "eval_runtime": 19046.7215, "eval_samples_per_second": 0.523, "eval_steps_per_second": 0.261, "eval_wer": 0.17363938706368695, "step": 93595 }, { "epoch": 1.0000534216571397, "grad_norm": 0.09526360034942627, "learning_rate": 4e-09, "loss": 0.0323, "step": 93600 }, { "epoch": 1.0001602649714194, "grad_norm": 19.898427963256836, "learning_rate": 1.4e-08, "loss": 0.0981, "step": 93610 }, { "epoch": 1.000267108285699, "grad_norm": 8.076433181762695, "learning_rate": 2.4e-08, "loss": 0.0328, "step": 93620 }, { "epoch": 1.0003739515999786, "grad_norm": 4.2925519943237305, "learning_rate": 3.4e-08, "loss": 0.0261, "step": 93630 }, { "epoch": 1.0004807949142582, "grad_norm": 5.353057384490967, "learning_rate": 4.4e-08, "loss": 0.0459, "step": 93640 }, { "epoch": 1.000587638228538, "grad_norm": 0.22383283078670502, "learning_rate": 5.3999999999999994e-08, "loss": 0.0635, "step": 93650 }, { "epoch": 1.0006944815428174, "grad_norm": 0.4335901737213135, "learning_rate": 6.4e-08, "loss": 0.0409, "step": 93660 }, { "epoch": 1.000801324857097, "grad_norm": 0.20527075231075287, "learning_rate": 7.399999999999999e-08, "loss": 0.0083, "step": 93670 }, { "epoch": 1.0009081681713767, "grad_norm": 0.03021232783794403, "learning_rate": 8.4e-08, "loss": 0.0352, "step": 93680 }, { "epoch": 1.0010150114856562, "grad_norm": 1.9230512380599976, "learning_rate": 9.4e-08, "loss": 0.0227, "step": 93690 }, { "epoch": 1.0011218547999359, "grad_norm": 1.6551434993743896, "learning_rate": 1.0399999999999999e-07, "loss": 0.0398, "step": 93700 }, { "epoch": 1.0012286981142156, "grad_norm": 12.155996322631836, "learning_rate": 1.14e-07, "loss": 0.0411, "step": 93710 }, { "epoch": 1.001335541428495, "grad_norm": 3.1753134727478027, "learning_rate": 1.24e-07, "loss": 0.0316, "step": 93720 }, { "epoch": 1.0014423847427747, "grad_norm": 0.265129417181015, "learning_rate": 1.34e-07, "loss": 0.0232, "step": 93730 }, { "epoch": 1.0015492280570544, "grad_norm": 6.603443622589111, "learning_rate": 1.44e-07, "loss": 0.0387, "step": 93740 }, { "epoch": 1.0016560713713338, "grad_norm": 6.805219650268555, "learning_rate": 1.54e-07, "loss": 0.0377, "step": 93750 }, { "epoch": 1.0017629146856135, "grad_norm": 0.09068756550550461, "learning_rate": 1.64e-07, "loss": 0.1064, "step": 93760 }, { "epoch": 1.0018697579998932, "grad_norm": 0.021696629002690315, "learning_rate": 1.7399999999999997e-07, "loss": 0.0109, "step": 93770 }, { "epoch": 1.0019766013141727, "grad_norm": 1.2642582654953003, "learning_rate": 1.8399999999999998e-07, "loss": 0.0226, "step": 93780 }, { "epoch": 1.0020834446284523, "grad_norm": 0.02323598600924015, "learning_rate": 1.94e-07, "loss": 0.049, "step": 93790 }, { "epoch": 1.002190287942732, "grad_norm": 0.043221212923526764, "learning_rate": 2.0399999999999997e-07, "loss": 0.0355, "step": 93800 }, { "epoch": 1.0022971312570117, "grad_norm": 4.037938117980957, "learning_rate": 2.1399999999999998e-07, "loss": 0.0498, "step": 93810 }, { "epoch": 1.0024039745712912, "grad_norm": 0.6727945804595947, "learning_rate": 2.24e-07, "loss": 0.0155, "step": 93820 }, { "epoch": 1.0025108178855708, "grad_norm": 7.092669486999512, "learning_rate": 2.34e-07, "loss": 0.0175, "step": 93830 }, { "epoch": 1.0026176611998505, "grad_norm": 2.8636090755462646, "learning_rate": 2.4399999999999996e-07, "loss": 0.0472, "step": 93840 }, { "epoch": 1.00272450451413, "grad_norm": 5.1442060470581055, "learning_rate": 2.5399999999999997e-07, "loss": 0.0651, "step": 93850 }, { "epoch": 1.0028313478284097, "grad_norm": 8.817666053771973, "learning_rate": 2.64e-07, "loss": 0.0413, "step": 93860 }, { "epoch": 1.0029381911426893, "grad_norm": 8.929546356201172, "learning_rate": 2.74e-07, "loss": 0.0452, "step": 93870 }, { "epoch": 1.0030450344569688, "grad_norm": 0.1796831637620926, "learning_rate": 2.8399999999999995e-07, "loss": 0.0178, "step": 93880 }, { "epoch": 1.0031518777712485, "grad_norm": 0.03229088336229324, "learning_rate": 2.9399999999999996e-07, "loss": 0.0162, "step": 93890 }, { "epoch": 1.0032587210855282, "grad_norm": 3.812509059906006, "learning_rate": 3.0399999999999997e-07, "loss": 0.0336, "step": 93900 }, { "epoch": 1.0033655643998076, "grad_norm": 2.042167901992798, "learning_rate": 3.14e-07, "loss": 0.0171, "step": 93910 }, { "epoch": 1.0034724077140873, "grad_norm": 0.18393182754516602, "learning_rate": 3.24e-07, "loss": 0.0296, "step": 93920 }, { "epoch": 1.003579251028367, "grad_norm": 0.017137372866272926, "learning_rate": 3.34e-07, "loss": 0.0129, "step": 93930 }, { "epoch": 1.0036860943426464, "grad_norm": 0.13912633061408997, "learning_rate": 3.4399999999999996e-07, "loss": 0.0416, "step": 93940 }, { "epoch": 1.0037929376569261, "grad_norm": 0.049871332943439484, "learning_rate": 3.5399999999999997e-07, "loss": 0.106, "step": 93950 }, { "epoch": 1.0038997809712058, "grad_norm": 9.67077922821045, "learning_rate": 3.64e-07, "loss": 0.03, "step": 93960 }, { "epoch": 1.0040066242854853, "grad_norm": 0.06910809874534607, "learning_rate": 3.74e-07, "loss": 0.0387, "step": 93970 }, { "epoch": 1.004113467599765, "grad_norm": 0.1574661135673523, "learning_rate": 3.84e-07, "loss": 0.0434, "step": 93980 }, { "epoch": 1.0042203109140446, "grad_norm": 0.03605642542243004, "learning_rate": 3.94e-07, "loss": 0.0187, "step": 93990 }, { "epoch": 1.004327154228324, "grad_norm": 6.366193771362305, "learning_rate": 4.04e-07, "loss": 0.041, "step": 94000 }, { "epoch": 1.0044339975426038, "grad_norm": 0.1998196840286255, "learning_rate": 4.14e-07, "loss": 0.0511, "step": 94010 }, { "epoch": 1.0045408408568834, "grad_norm": 12.740202903747559, "learning_rate": 4.24e-07, "loss": 0.0364, "step": 94020 }, { "epoch": 1.004647684171163, "grad_norm": 0.3090702295303345, "learning_rate": 4.34e-07, "loss": 0.0316, "step": 94030 }, { "epoch": 1.0047545274854426, "grad_norm": 1.2077447175979614, "learning_rate": 4.44e-07, "loss": 0.0355, "step": 94040 }, { "epoch": 1.0048613707997223, "grad_norm": 6.636413097381592, "learning_rate": 4.54e-07, "loss": 0.0248, "step": 94050 }, { "epoch": 1.0049682141140017, "grad_norm": 7.617499828338623, "learning_rate": 4.64e-07, "loss": 0.0193, "step": 94060 }, { "epoch": 1.0050750574282814, "grad_norm": 0.3147677481174469, "learning_rate": 4.7399999999999993e-07, "loss": 0.0109, "step": 94070 }, { "epoch": 1.005181900742561, "grad_norm": 0.004239450208842754, "learning_rate": 4.839999999999999e-07, "loss": 0.0149, "step": 94080 }, { "epoch": 1.0052887440568405, "grad_norm": 2.998748302459717, "learning_rate": 4.94e-07, "loss": 0.0459, "step": 94090 }, { "epoch": 1.0053955873711202, "grad_norm": 3.5352530479431152, "learning_rate": 5.04e-07, "loss": 0.0565, "step": 94100 }, { "epoch": 1.0055024306854, "grad_norm": 2.670687198638916, "learning_rate": 5.14e-07, "loss": 0.0374, "step": 94110 }, { "epoch": 1.0056092739996794, "grad_norm": 1.2886930704116821, "learning_rate": 5.24e-07, "loss": 0.0631, "step": 94120 }, { "epoch": 1.005716117313959, "grad_norm": 14.51108169555664, "learning_rate": 5.34e-07, "loss": 0.0545, "step": 94130 }, { "epoch": 1.0058229606282387, "grad_norm": 4.855917930603027, "learning_rate": 5.44e-07, "loss": 0.0395, "step": 94140 }, { "epoch": 1.0059298039425184, "grad_norm": 1.3247714042663574, "learning_rate": 5.54e-07, "loss": 0.034, "step": 94150 }, { "epoch": 1.0060366472567979, "grad_norm": 2.04425048828125, "learning_rate": 5.639999999999999e-07, "loss": 0.0884, "step": 94160 }, { "epoch": 1.0061434905710775, "grad_norm": 5.378826141357422, "learning_rate": 5.739999999999999e-07, "loss": 0.0257, "step": 94170 }, { "epoch": 1.0062503338853572, "grad_norm": 7.938076972961426, "learning_rate": 5.839999999999999e-07, "loss": 0.0336, "step": 94180 }, { "epoch": 1.0063571771996367, "grad_norm": 0.22818368673324585, "learning_rate": 5.939999999999999e-07, "loss": 0.0235, "step": 94190 }, { "epoch": 1.0064640205139164, "grad_norm": 3.5181026458740234, "learning_rate": 6.04e-07, "loss": 0.0483, "step": 94200 }, { "epoch": 1.006570863828196, "grad_norm": 5.821199417114258, "learning_rate": 6.14e-07, "loss": 0.0325, "step": 94210 }, { "epoch": 1.0066777071424755, "grad_norm": 4.791585445404053, "learning_rate": 6.24e-07, "loss": 0.0533, "step": 94220 }, { "epoch": 1.0067845504567552, "grad_norm": 3.1785929203033447, "learning_rate": 6.34e-07, "loss": 0.032, "step": 94230 }, { "epoch": 1.0068913937710349, "grad_norm": 3.41914439201355, "learning_rate": 6.44e-07, "loss": 0.015, "step": 94240 }, { "epoch": 1.0069982370853143, "grad_norm": 5.463118553161621, "learning_rate": 6.54e-07, "loss": 0.054, "step": 94250 }, { "epoch": 1.007105080399594, "grad_norm": 3.206876754760742, "learning_rate": 6.64e-07, "loss": 0.0241, "step": 94260 }, { "epoch": 1.0072119237138737, "grad_norm": 0.05754726752638817, "learning_rate": 6.74e-07, "loss": 0.0452, "step": 94270 }, { "epoch": 1.0073187670281532, "grad_norm": 8.692941665649414, "learning_rate": 6.84e-07, "loss": 0.0476, "step": 94280 }, { "epoch": 1.0074256103424328, "grad_norm": 0.9881983399391174, "learning_rate": 6.939999999999999e-07, "loss": 0.0673, "step": 94290 }, { "epoch": 1.0075324536567125, "grad_norm": 2.168025493621826, "learning_rate": 7.04e-07, "loss": 0.0153, "step": 94300 }, { "epoch": 1.007639296970992, "grad_norm": 9.586134910583496, "learning_rate": 7.14e-07, "loss": 0.0569, "step": 94310 }, { "epoch": 1.0077461402852717, "grad_norm": 4.542018890380859, "learning_rate": 7.24e-07, "loss": 0.0544, "step": 94320 }, { "epoch": 1.0078529835995513, "grad_norm": 0.19878284633159637, "learning_rate": 7.34e-07, "loss": 0.0092, "step": 94330 }, { "epoch": 1.0079598269138308, "grad_norm": 9.766974449157715, "learning_rate": 7.44e-07, "loss": 0.0596, "step": 94340 }, { "epoch": 1.0080666702281105, "grad_norm": 10.95672607421875, "learning_rate": 7.54e-07, "loss": 0.1768, "step": 94350 }, { "epoch": 1.0081735135423902, "grad_norm": 2.4919002056121826, "learning_rate": 7.64e-07, "loss": 0.0313, "step": 94360 }, { "epoch": 1.0082803568566696, "grad_norm": 2.002072811126709, "learning_rate": 7.74e-07, "loss": 0.0255, "step": 94370 }, { "epoch": 1.0083872001709493, "grad_norm": 4.89475154876709, "learning_rate": 7.84e-07, "loss": 0.0352, "step": 94380 }, { "epoch": 1.008494043485229, "grad_norm": 0.1276041865348816, "learning_rate": 7.94e-07, "loss": 0.0499, "step": 94390 }, { "epoch": 1.0086008867995084, "grad_norm": 3.2212507724761963, "learning_rate": 8.04e-07, "loss": 0.0346, "step": 94400 }, { "epoch": 1.0087077301137881, "grad_norm": 4.431447982788086, "learning_rate": 8.14e-07, "loss": 0.0458, "step": 94410 }, { "epoch": 1.0088145734280678, "grad_norm": 8.048575401306152, "learning_rate": 8.24e-07, "loss": 0.0427, "step": 94420 }, { "epoch": 1.0089214167423473, "grad_norm": 2.637619972229004, "learning_rate": 8.34e-07, "loss": 0.0423, "step": 94430 }, { "epoch": 1.009028260056627, "grad_norm": 1.3912193775177002, "learning_rate": 8.439999999999999e-07, "loss": 0.0349, "step": 94440 }, { "epoch": 1.0091351033709066, "grad_norm": 2.82527232170105, "learning_rate": 8.539999999999999e-07, "loss": 0.0561, "step": 94450 }, { "epoch": 1.009241946685186, "grad_norm": 0.02226521447300911, "learning_rate": 8.639999999999999e-07, "loss": 0.0446, "step": 94460 }, { "epoch": 1.0093487899994658, "grad_norm": 1.4699891805648804, "learning_rate": 8.739999999999999e-07, "loss": 0.0249, "step": 94470 }, { "epoch": 1.0094556333137454, "grad_norm": 6.322928428649902, "learning_rate": 8.839999999999999e-07, "loss": 0.0412, "step": 94480 }, { "epoch": 1.009562476628025, "grad_norm": 7.550853252410889, "learning_rate": 8.939999999999999e-07, "loss": 0.0732, "step": 94490 }, { "epoch": 1.0096693199423046, "grad_norm": 12.222515106201172, "learning_rate": 9.039999999999999e-07, "loss": 0.1134, "step": 94500 }, { "epoch": 1.0097761632565843, "grad_norm": 0.7565433979034424, "learning_rate": 9.14e-07, "loss": 0.0296, "step": 94510 }, { "epoch": 1.0098830065708637, "grad_norm": 3.9557745456695557, "learning_rate": 9.24e-07, "loss": 0.0215, "step": 94520 }, { "epoch": 1.0099898498851434, "grad_norm": 1.8158997297286987, "learning_rate": 9.34e-07, "loss": 0.0192, "step": 94530 }, { "epoch": 1.010096693199423, "grad_norm": 11.925678253173828, "learning_rate": 9.439999999999999e-07, "loss": 0.0579, "step": 94540 }, { "epoch": 1.0102035365137028, "grad_norm": 2.219048023223877, "learning_rate": 9.539999999999999e-07, "loss": 0.0449, "step": 94550 }, { "epoch": 1.0103103798279822, "grad_norm": 4.038903713226318, "learning_rate": 9.64e-07, "loss": 0.0418, "step": 94560 }, { "epoch": 1.010417223142262, "grad_norm": 1.749742031097412, "learning_rate": 9.74e-07, "loss": 0.0587, "step": 94570 }, { "epoch": 1.0105240664565416, "grad_norm": 0.6668569445610046, "learning_rate": 9.84e-07, "loss": 0.0098, "step": 94580 }, { "epoch": 1.010630909770821, "grad_norm": 2.90466046333313, "learning_rate": 9.94e-07, "loss": 0.0528, "step": 94590 }, { "epoch": 1.0107377530851007, "grad_norm": 8.229294776916504, "learning_rate": 9.99999999954837e-07, "loss": 0.0315, "step": 94600 }, { "epoch": 1.0108445963993804, "grad_norm": 0.19056826829910278, "learning_rate": 9.99999999446753e-07, "loss": 0.0233, "step": 94610 }, { "epoch": 1.0109514397136599, "grad_norm": 2.6355419158935547, "learning_rate": 9.999999983741312e-07, "loss": 0.0746, "step": 94620 }, { "epoch": 1.0110582830279395, "grad_norm": 6.074586868286133, "learning_rate": 9.99999996736972e-07, "loss": 0.0148, "step": 94630 }, { "epoch": 1.0111651263422192, "grad_norm": 6.070501327514648, "learning_rate": 9.999999945352746e-07, "loss": 0.0213, "step": 94640 }, { "epoch": 1.0112719696564987, "grad_norm": 0.046920377761125565, "learning_rate": 9.999999917690399e-07, "loss": 0.0721, "step": 94650 }, { "epoch": 1.0113788129707784, "grad_norm": 0.12887251377105713, "learning_rate": 9.999999884382673e-07, "loss": 0.0583, "step": 94660 }, { "epoch": 1.011485656285058, "grad_norm": 1.559003472328186, "learning_rate": 9.999999845429567e-07, "loss": 0.1078, "step": 94670 }, { "epoch": 1.0115924995993375, "grad_norm": 4.189475059509277, "learning_rate": 9.999999800831086e-07, "loss": 0.0692, "step": 94680 }, { "epoch": 1.0116993429136172, "grad_norm": 0.1559295803308487, "learning_rate": 9.999999750587229e-07, "loss": 0.0454, "step": 94690 }, { "epoch": 1.0118061862278969, "grad_norm": 0.15922097861766815, "learning_rate": 9.999999694697996e-07, "loss": 0.0416, "step": 94700 }, { "epoch": 1.0119130295421763, "grad_norm": 0.03965189680457115, "learning_rate": 9.999999633163384e-07, "loss": 0.044, "step": 94710 }, { "epoch": 1.012019872856456, "grad_norm": 0.16772113740444183, "learning_rate": 9.999999565983394e-07, "loss": 0.0405, "step": 94720 }, { "epoch": 1.0121267161707357, "grad_norm": 2.9595422744750977, "learning_rate": 9.999999493158028e-07, "loss": 0.0649, "step": 94730 }, { "epoch": 1.0122335594850151, "grad_norm": 10.634177207946777, "learning_rate": 9.999999414687286e-07, "loss": 0.0194, "step": 94740 }, { "epoch": 1.0123404027992948, "grad_norm": 0.20937588810920715, "learning_rate": 9.999999330571165e-07, "loss": 0.0332, "step": 94750 }, { "epoch": 1.0124472461135745, "grad_norm": 1.0954335927963257, "learning_rate": 9.999999240809671e-07, "loss": 0.0427, "step": 94760 }, { "epoch": 1.012554089427854, "grad_norm": 2.3208587169647217, "learning_rate": 9.999999145402798e-07, "loss": 0.0195, "step": 94770 }, { "epoch": 1.0126609327421336, "grad_norm": 8.923627853393555, "learning_rate": 9.999999044350548e-07, "loss": 0.0344, "step": 94780 }, { "epoch": 1.0127677760564133, "grad_norm": 6.613439559936523, "learning_rate": 9.999998937652925e-07, "loss": 0.0432, "step": 94790 }, { "epoch": 1.0128746193706928, "grad_norm": 4.595951557159424, "learning_rate": 9.999998825309924e-07, "loss": 0.0472, "step": 94800 }, { "epoch": 1.0129814626849725, "grad_norm": 1.1206849813461304, "learning_rate": 9.999998707321546e-07, "loss": 0.0378, "step": 94810 }, { "epoch": 1.0130883059992521, "grad_norm": 9.00780200958252, "learning_rate": 9.999998583687792e-07, "loss": 0.053, "step": 94820 }, { "epoch": 1.0131951493135316, "grad_norm": 4.1975417137146, "learning_rate": 9.999998454408664e-07, "loss": 0.0771, "step": 94830 }, { "epoch": 1.0133019926278113, "grad_norm": 1.1206183433532715, "learning_rate": 9.999998319484159e-07, "loss": 0.0082, "step": 94840 }, { "epoch": 1.013408835942091, "grad_norm": 0.318993479013443, "learning_rate": 9.999998178914279e-07, "loss": 0.0323, "step": 94850 }, { "epoch": 1.0135156792563704, "grad_norm": 1.1199562549591064, "learning_rate": 9.999998032699024e-07, "loss": 0.0257, "step": 94860 }, { "epoch": 1.01362252257065, "grad_norm": 0.04986966401338577, "learning_rate": 9.999997880838394e-07, "loss": 0.0798, "step": 94870 }, { "epoch": 1.0137293658849298, "grad_norm": 0.059041645377874374, "learning_rate": 9.999997723332388e-07, "loss": 0.0145, "step": 94880 }, { "epoch": 1.0138362091992095, "grad_norm": 0.087999127805233, "learning_rate": 9.999997560181008e-07, "loss": 0.0518, "step": 94890 }, { "epoch": 1.013943052513489, "grad_norm": 3.3171653747558594, "learning_rate": 9.999997391384253e-07, "loss": 0.1015, "step": 94900 }, { "epoch": 1.0140498958277686, "grad_norm": 1.081496000289917, "learning_rate": 9.999997216942122e-07, "loss": 0.0392, "step": 94910 }, { "epoch": 1.0141567391420483, "grad_norm": 3.021267890930176, "learning_rate": 9.999997036854619e-07, "loss": 0.0477, "step": 94920 }, { "epoch": 1.0142635824563278, "grad_norm": 0.1803196221590042, "learning_rate": 9.999996851121742e-07, "loss": 0.0539, "step": 94930 }, { "epoch": 1.0143704257706074, "grad_norm": 4.271975994110107, "learning_rate": 9.99999665974349e-07, "loss": 0.0206, "step": 94940 }, { "epoch": 1.0144772690848871, "grad_norm": 0.12416861951351166, "learning_rate": 9.999996462719867e-07, "loss": 0.0136, "step": 94950 }, { "epoch": 1.0145841123991666, "grad_norm": 0.02485176920890808, "learning_rate": 9.999996260050867e-07, "loss": 0.0759, "step": 94960 }, { "epoch": 1.0146909557134463, "grad_norm": 2.360853433609009, "learning_rate": 9.999996051736495e-07, "loss": 0.0229, "step": 94970 }, { "epoch": 1.014797799027726, "grad_norm": 0.04449943080544472, "learning_rate": 9.999995837776752e-07, "loss": 0.0474, "step": 94980 }, { "epoch": 1.0149046423420054, "grad_norm": 5.356226921081543, "learning_rate": 9.999995618171636e-07, "loss": 0.0181, "step": 94990 }, { "epoch": 1.015011485656285, "grad_norm": 3.630963087081909, "learning_rate": 9.999995392921146e-07, "loss": 0.0871, "step": 95000 }, { "epoch": 1.0151183289705648, "grad_norm": 4.6262383460998535, "learning_rate": 9.999995162025282e-07, "loss": 0.0459, "step": 95010 }, { "epoch": 1.0152251722848442, "grad_norm": 0.03833857551217079, "learning_rate": 9.99999492548405e-07, "loss": 0.078, "step": 95020 }, { "epoch": 1.015332015599124, "grad_norm": 0.02563261240720749, "learning_rate": 9.999994683297446e-07, "loss": 0.0235, "step": 95030 }, { "epoch": 1.0154388589134036, "grad_norm": 3.0756101608276367, "learning_rate": 9.99999443546547e-07, "loss": 0.0269, "step": 95040 }, { "epoch": 1.015545702227683, "grad_norm": 0.2713451087474823, "learning_rate": 9.999994181988121e-07, "loss": 0.0289, "step": 95050 }, { "epoch": 1.0156525455419627, "grad_norm": 0.415549099445343, "learning_rate": 9.9999939228654e-07, "loss": 0.0401, "step": 95060 }, { "epoch": 1.0157593888562424, "grad_norm": 0.50433349609375, "learning_rate": 9.999993658097314e-07, "loss": 0.0355, "step": 95070 }, { "epoch": 1.0158662321705219, "grad_norm": 0.0768062174320221, "learning_rate": 9.999993387683854e-07, "loss": 0.0351, "step": 95080 }, { "epoch": 1.0159730754848015, "grad_norm": 3.4685537815093994, "learning_rate": 9.999993111625025e-07, "loss": 0.0404, "step": 95090 }, { "epoch": 1.0160799187990812, "grad_norm": 1.0415295362472534, "learning_rate": 9.999992829920828e-07, "loss": 0.023, "step": 95100 }, { "epoch": 1.0161867621133607, "grad_norm": 6.8232340812683105, "learning_rate": 9.999992542571257e-07, "loss": 0.0174, "step": 95110 }, { "epoch": 1.0162936054276404, "grad_norm": 2.593876361846924, "learning_rate": 9.99999224957632e-07, "loss": 0.0354, "step": 95120 }, { "epoch": 1.01640044874192, "grad_norm": 0.4102499783039093, "learning_rate": 9.999991950936015e-07, "loss": 0.0079, "step": 95130 }, { "epoch": 1.0165072920561995, "grad_norm": 1.4628263711929321, "learning_rate": 9.999991646650343e-07, "loss": 0.0178, "step": 95140 }, { "epoch": 1.0166141353704792, "grad_norm": 3.6303963661193848, "learning_rate": 9.9999913367193e-07, "loss": 0.1195, "step": 95150 }, { "epoch": 1.0167209786847589, "grad_norm": 1.3557626008987427, "learning_rate": 9.99999102114289e-07, "loss": 0.0427, "step": 95160 }, { "epoch": 1.0168278219990383, "grad_norm": 0.006457140669226646, "learning_rate": 9.999990699921116e-07, "loss": 0.0246, "step": 95170 }, { "epoch": 1.016934665313318, "grad_norm": 8.10157585144043, "learning_rate": 9.99999037305397e-07, "loss": 0.0572, "step": 95180 }, { "epoch": 1.0170415086275977, "grad_norm": 0.042440179735422134, "learning_rate": 9.99999004054146e-07, "loss": 0.0284, "step": 95190 }, { "epoch": 1.0171483519418771, "grad_norm": 3.5612831115722656, "learning_rate": 9.999989702383585e-07, "loss": 0.0695, "step": 95200 }, { "epoch": 1.0172551952561568, "grad_norm": 2.356804609298706, "learning_rate": 9.999989358580342e-07, "loss": 0.017, "step": 95210 }, { "epoch": 1.0173620385704365, "grad_norm": 3.0310091972351074, "learning_rate": 9.999989009131737e-07, "loss": 0.042, "step": 95220 }, { "epoch": 1.017468881884716, "grad_norm": 3.475036144256592, "learning_rate": 9.999988654037765e-07, "loss": 0.0197, "step": 95230 }, { "epoch": 1.0175757251989956, "grad_norm": 5.62255334854126, "learning_rate": 9.999988293298427e-07, "loss": 0.024, "step": 95240 }, { "epoch": 1.0176825685132753, "grad_norm": 7.266665935516357, "learning_rate": 9.999987926913727e-07, "loss": 0.0604, "step": 95250 }, { "epoch": 1.0177894118275548, "grad_norm": 1.5577809810638428, "learning_rate": 9.999987554883662e-07, "loss": 0.0509, "step": 95260 }, { "epoch": 1.0178962551418345, "grad_norm": 0.017362017184495926, "learning_rate": 9.999987177208236e-07, "loss": 0.0325, "step": 95270 }, { "epoch": 1.0180030984561141, "grad_norm": 6.999556064605713, "learning_rate": 9.999986793887444e-07, "loss": 0.0604, "step": 95280 }, { "epoch": 1.0181099417703938, "grad_norm": 0.1438538283109665, "learning_rate": 9.999986404921292e-07, "loss": 0.107, "step": 95290 }, { "epoch": 1.0182167850846733, "grad_norm": 0.8134641647338867, "learning_rate": 9.999986010309775e-07, "loss": 0.0127, "step": 95300 }, { "epoch": 1.018323628398953, "grad_norm": 0.03364015743136406, "learning_rate": 9.999985610052899e-07, "loss": 0.0461, "step": 95310 }, { "epoch": 1.0184304717132326, "grad_norm": 0.20547762513160706, "learning_rate": 9.999985204150661e-07, "loss": 0.0143, "step": 95320 }, { "epoch": 1.018537315027512, "grad_norm": 0.06889329105615616, "learning_rate": 9.999984792603062e-07, "loss": 0.0134, "step": 95330 }, { "epoch": 1.0186441583417918, "grad_norm": 0.8584648966789246, "learning_rate": 9.999984375410104e-07, "loss": 0.0166, "step": 95340 }, { "epoch": 1.0187510016560715, "grad_norm": 1.6262673139572144, "learning_rate": 9.999983952571786e-07, "loss": 0.056, "step": 95350 }, { "epoch": 1.018857844970351, "grad_norm": 7.040277004241943, "learning_rate": 9.999983524088108e-07, "loss": 0.0532, "step": 95360 }, { "epoch": 1.0189646882846306, "grad_norm": 5.948887348175049, "learning_rate": 9.999983089959072e-07, "loss": 0.1041, "step": 95370 }, { "epoch": 1.0190715315989103, "grad_norm": 0.415277898311615, "learning_rate": 9.999982650184678e-07, "loss": 0.0207, "step": 95380 }, { "epoch": 1.0191783749131897, "grad_norm": 2.1541430950164795, "learning_rate": 9.999982204764925e-07, "loss": 0.0523, "step": 95390 }, { "epoch": 1.0192852182274694, "grad_norm": 0.19203008711338043, "learning_rate": 9.999981753699815e-07, "loss": 0.0465, "step": 95400 }, { "epoch": 1.019392061541749, "grad_norm": 3.5223019123077393, "learning_rate": 9.999981296989348e-07, "loss": 0.0204, "step": 95410 }, { "epoch": 1.0194989048560286, "grad_norm": 5.079782962799072, "learning_rate": 9.999980834633525e-07, "loss": 0.0181, "step": 95420 }, { "epoch": 1.0196057481703082, "grad_norm": 0.8602268695831299, "learning_rate": 9.999980366632347e-07, "loss": 0.074, "step": 95430 }, { "epoch": 1.019712591484588, "grad_norm": 0.3530869483947754, "learning_rate": 9.999979892985812e-07, "loss": 0.0357, "step": 95440 }, { "epoch": 1.0198194347988674, "grad_norm": 4.642859935760498, "learning_rate": 9.999979413693923e-07, "loss": 0.0788, "step": 95450 }, { "epoch": 1.019926278113147, "grad_norm": 1.9388474225997925, "learning_rate": 9.999978928756683e-07, "loss": 0.0377, "step": 95460 }, { "epoch": 1.0200331214274267, "grad_norm": 6.0322113037109375, "learning_rate": 9.999978438174087e-07, "loss": 0.0861, "step": 95470 }, { "epoch": 1.0201399647417062, "grad_norm": 0.036643702536821365, "learning_rate": 9.999977941946137e-07, "loss": 0.0245, "step": 95480 }, { "epoch": 1.0202468080559859, "grad_norm": 0.6838856935501099, "learning_rate": 9.999977440072835e-07, "loss": 0.055, "step": 95490 }, { "epoch": 1.0203536513702656, "grad_norm": 0.05607646331191063, "learning_rate": 9.99997693255418e-07, "loss": 0.0348, "step": 95500 }, { "epoch": 1.020460494684545, "grad_norm": 3.1251747608184814, "learning_rate": 9.999976419390177e-07, "loss": 0.0443, "step": 95510 }, { "epoch": 1.0205673379988247, "grad_norm": 3.4927854537963867, "learning_rate": 9.999975900580822e-07, "loss": 0.0441, "step": 95520 }, { "epoch": 1.0206741813131044, "grad_norm": 0.2875429689884186, "learning_rate": 9.999975376126117e-07, "loss": 0.0103, "step": 95530 }, { "epoch": 1.0207810246273838, "grad_norm": 0.047648973762989044, "learning_rate": 9.99997484602606e-07, "loss": 0.0173, "step": 95540 }, { "epoch": 1.0208878679416635, "grad_norm": 0.6628183126449585, "learning_rate": 9.999974310280656e-07, "loss": 0.0405, "step": 95550 }, { "epoch": 1.0209947112559432, "grad_norm": 0.7387036085128784, "learning_rate": 9.999973768889905e-07, "loss": 0.0109, "step": 95560 }, { "epoch": 1.0211015545702227, "grad_norm": 0.015007738955318928, "learning_rate": 9.999973221853803e-07, "loss": 0.0382, "step": 95570 }, { "epoch": 1.0212083978845024, "grad_norm": 1.7521157264709473, "learning_rate": 9.999972669172357e-07, "loss": 0.0119, "step": 95580 }, { "epoch": 1.021315241198782, "grad_norm": 5.188046932220459, "learning_rate": 9.999972110845562e-07, "loss": 0.043, "step": 95590 }, { "epoch": 1.0214220845130615, "grad_norm": 2.449232339859009, "learning_rate": 9.999971546873423e-07, "loss": 0.0345, "step": 95600 }, { "epoch": 1.0215289278273412, "grad_norm": 0.3452570140361786, "learning_rate": 9.999970977255937e-07, "loss": 0.0908, "step": 95610 }, { "epoch": 1.0216357711416209, "grad_norm": 1.7648248672485352, "learning_rate": 9.999970401993107e-07, "loss": 0.0678, "step": 95620 }, { "epoch": 1.0217426144559005, "grad_norm": 3.623661756515503, "learning_rate": 9.999969821084935e-07, "loss": 0.0125, "step": 95630 }, { "epoch": 1.02184945777018, "grad_norm": 1.607885479927063, "learning_rate": 9.999969234531418e-07, "loss": 0.0244, "step": 95640 }, { "epoch": 1.0219563010844597, "grad_norm": 1.2881828546524048, "learning_rate": 9.999968642332557e-07, "loss": 0.0449, "step": 95650 }, { "epoch": 1.0220631443987394, "grad_norm": 4.164697170257568, "learning_rate": 9.999968044488356e-07, "loss": 0.0254, "step": 95660 }, { "epoch": 1.0221699877130188, "grad_norm": 3.1520121097564697, "learning_rate": 9.999967440998813e-07, "loss": 0.035, "step": 95670 }, { "epoch": 1.0222768310272985, "grad_norm": 0.037167612463235855, "learning_rate": 9.99996683186393e-07, "loss": 0.0291, "step": 95680 }, { "epoch": 1.0223836743415782, "grad_norm": 1.6938081979751587, "learning_rate": 9.999966217083706e-07, "loss": 0.0115, "step": 95690 }, { "epoch": 1.0224905176558576, "grad_norm": 0.5236520171165466, "learning_rate": 9.999965596658143e-07, "loss": 0.0327, "step": 95700 }, { "epoch": 1.0225973609701373, "grad_norm": 2.6642298698425293, "learning_rate": 9.999964970587242e-07, "loss": 0.0776, "step": 95710 }, { "epoch": 1.022704204284417, "grad_norm": 2.3182156085968018, "learning_rate": 9.999964338871004e-07, "loss": 0.0619, "step": 95720 }, { "epoch": 1.0228110475986965, "grad_norm": 2.0668656826019287, "learning_rate": 9.999963701509429e-07, "loss": 0.0413, "step": 95730 }, { "epoch": 1.0229178909129761, "grad_norm": 1.0019925832748413, "learning_rate": 9.999963058502517e-07, "loss": 0.0075, "step": 95740 }, { "epoch": 1.0230247342272558, "grad_norm": 5.72536563873291, "learning_rate": 9.999962409850268e-07, "loss": 0.0335, "step": 95750 }, { "epoch": 1.0231315775415353, "grad_norm": 2.7862026691436768, "learning_rate": 9.999961755552686e-07, "loss": 0.0203, "step": 95760 }, { "epoch": 1.023238420855815, "grad_norm": 10.003128051757812, "learning_rate": 9.999961095609767e-07, "loss": 0.0497, "step": 95770 }, { "epoch": 1.0233452641700946, "grad_norm": 0.7105570435523987, "learning_rate": 9.999960430021518e-07, "loss": 0.0196, "step": 95780 }, { "epoch": 1.023452107484374, "grad_norm": 2.3569107055664062, "learning_rate": 9.999959758787934e-07, "loss": 0.0277, "step": 95790 }, { "epoch": 1.0235589507986538, "grad_norm": 5.527985095977783, "learning_rate": 9.999959081909019e-07, "loss": 0.0224, "step": 95800 }, { "epoch": 1.0236657941129335, "grad_norm": 4.245680809020996, "learning_rate": 9.999958399384771e-07, "loss": 0.0238, "step": 95810 }, { "epoch": 1.023772637427213, "grad_norm": 1.9578356742858887, "learning_rate": 9.999957711215195e-07, "loss": 0.0297, "step": 95820 }, { "epoch": 1.0238794807414926, "grad_norm": 0.5564010739326477, "learning_rate": 9.999957017400289e-07, "loss": 0.0319, "step": 95830 }, { "epoch": 1.0239863240557723, "grad_norm": 7.6581573486328125, "learning_rate": 9.999956317940054e-07, "loss": 0.0173, "step": 95840 }, { "epoch": 1.0240931673700517, "grad_norm": 0.17604824900627136, "learning_rate": 9.99995561283449e-07, "loss": 0.0779, "step": 95850 }, { "epoch": 1.0242000106843314, "grad_norm": 1.466317057609558, "learning_rate": 9.9999549020836e-07, "loss": 0.0124, "step": 95860 }, { "epoch": 1.024306853998611, "grad_norm": 2.6460494995117188, "learning_rate": 9.999954185687382e-07, "loss": 0.0326, "step": 95870 }, { "epoch": 1.0244136973128906, "grad_norm": 9.880729675292969, "learning_rate": 9.99995346364584e-07, "loss": 0.0575, "step": 95880 }, { "epoch": 1.0245205406271702, "grad_norm": 4.215702056884766, "learning_rate": 9.999952735958973e-07, "loss": 0.0293, "step": 95890 }, { "epoch": 1.02462738394145, "grad_norm": 3.7475552558898926, "learning_rate": 9.999952002626782e-07, "loss": 0.0179, "step": 95900 }, { "epoch": 1.0247342272557294, "grad_norm": 2.7926366329193115, "learning_rate": 9.999951263649266e-07, "loss": 0.0725, "step": 95910 }, { "epoch": 1.024841070570009, "grad_norm": 0.7347517609596252, "learning_rate": 9.99995051902643e-07, "loss": 0.0431, "step": 95920 }, { "epoch": 1.0249479138842887, "grad_norm": 0.902772068977356, "learning_rate": 9.999949768758271e-07, "loss": 0.0268, "step": 95930 }, { "epoch": 1.0250547571985682, "grad_norm": 4.258950233459473, "learning_rate": 9.999949012844792e-07, "loss": 0.0493, "step": 95940 }, { "epoch": 1.0251616005128479, "grad_norm": 3.1511244773864746, "learning_rate": 9.999948251285994e-07, "loss": 0.0171, "step": 95950 }, { "epoch": 1.0252684438271276, "grad_norm": 2.3205292224884033, "learning_rate": 9.999947484081877e-07, "loss": 0.0394, "step": 95960 }, { "epoch": 1.025375287141407, "grad_norm": 1.629008412361145, "learning_rate": 9.99994671123244e-07, "loss": 0.0318, "step": 95970 }, { "epoch": 1.0254821304556867, "grad_norm": 1.061767816543579, "learning_rate": 9.999945932737686e-07, "loss": 0.0324, "step": 95980 }, { "epoch": 1.0255889737699664, "grad_norm": 2.4291117191314697, "learning_rate": 9.999945148597617e-07, "loss": 0.0196, "step": 95990 }, { "epoch": 1.0256958170842458, "grad_norm": 0.8785409331321716, "learning_rate": 9.999944358812232e-07, "loss": 0.0488, "step": 96000 }, { "epoch": 1.0258026603985255, "grad_norm": 0.2834020256996155, "learning_rate": 9.999943563381534e-07, "loss": 0.0214, "step": 96010 }, { "epoch": 1.0259095037128052, "grad_norm": 6.684720516204834, "learning_rate": 9.99994276230552e-07, "loss": 0.0359, "step": 96020 }, { "epoch": 1.0260163470270849, "grad_norm": 1.3686981201171875, "learning_rate": 9.999941955584196e-07, "loss": 0.0401, "step": 96030 }, { "epoch": 1.0261231903413643, "grad_norm": 11.309493064880371, "learning_rate": 9.999941143217556e-07, "loss": 0.0485, "step": 96040 }, { "epoch": 1.026230033655644, "grad_norm": 3.769122362136841, "learning_rate": 9.999940325205607e-07, "loss": 0.0519, "step": 96050 }, { "epoch": 1.0263368769699237, "grad_norm": 1.9912980794906616, "learning_rate": 9.99993950154835e-07, "loss": 0.0302, "step": 96060 }, { "epoch": 1.0264437202842032, "grad_norm": 0.9348112344741821, "learning_rate": 9.999938672245783e-07, "loss": 0.0304, "step": 96070 }, { "epoch": 1.0265505635984828, "grad_norm": 0.30468782782554626, "learning_rate": 9.999937837297906e-07, "loss": 0.0227, "step": 96080 }, { "epoch": 1.0266574069127625, "grad_norm": 0.038872115314006805, "learning_rate": 9.999936996704724e-07, "loss": 0.0273, "step": 96090 }, { "epoch": 1.026764250227042, "grad_norm": 0.01073506474494934, "learning_rate": 9.999936150466234e-07, "loss": 0.0144, "step": 96100 }, { "epoch": 1.0268710935413217, "grad_norm": 7.437786102294922, "learning_rate": 9.99993529858244e-07, "loss": 0.0249, "step": 96110 }, { "epoch": 1.0269779368556013, "grad_norm": 0.39760830998420715, "learning_rate": 9.999934441053342e-07, "loss": 0.0194, "step": 96120 }, { "epoch": 1.0270847801698808, "grad_norm": 2.8536183834075928, "learning_rate": 9.999933577878938e-07, "loss": 0.0511, "step": 96130 }, { "epoch": 1.0271916234841605, "grad_norm": 0.037591274827718735, "learning_rate": 9.999932709059234e-07, "loss": 0.0094, "step": 96140 }, { "epoch": 1.0272984667984402, "grad_norm": 9.845791816711426, "learning_rate": 9.99993183459423e-07, "loss": 0.0512, "step": 96150 }, { "epoch": 1.0274053101127196, "grad_norm": 2.974605083465576, "learning_rate": 9.999930954483921e-07, "loss": 0.0504, "step": 96160 }, { "epoch": 1.0275121534269993, "grad_norm": 2.481807231903076, "learning_rate": 9.999930068728317e-07, "loss": 0.0286, "step": 96170 }, { "epoch": 1.027618996741279, "grad_norm": 3.1630475521087646, "learning_rate": 9.999929177327413e-07, "loss": 0.0311, "step": 96180 }, { "epoch": 1.0277258400555584, "grad_norm": 0.047499317675828934, "learning_rate": 9.99992828028121e-07, "loss": 0.0233, "step": 96190 }, { "epoch": 1.0278326833698381, "grad_norm": 16.04920768737793, "learning_rate": 9.999927377589714e-07, "loss": 0.088, "step": 96200 }, { "epoch": 1.0279395266841178, "grad_norm": 14.863056182861328, "learning_rate": 9.99992646925292e-07, "loss": 0.137, "step": 96210 }, { "epoch": 1.0280463699983973, "grad_norm": 6.339358329772949, "learning_rate": 9.999925555270834e-07, "loss": 0.0353, "step": 96220 }, { "epoch": 1.028153213312677, "grad_norm": 2.9132766723632812, "learning_rate": 9.999924635643453e-07, "loss": 0.052, "step": 96230 }, { "epoch": 1.0282600566269566, "grad_norm": 5.899767875671387, "learning_rate": 9.99992371037078e-07, "loss": 0.028, "step": 96240 }, { "epoch": 1.028366899941236, "grad_norm": 2.0155324935913086, "learning_rate": 9.999922779452814e-07, "loss": 0.044, "step": 96250 }, { "epoch": 1.0284737432555158, "grad_norm": 1.2088909149169922, "learning_rate": 9.99992184288956e-07, "loss": 0.0407, "step": 96260 }, { "epoch": 1.0285805865697955, "grad_norm": 0.17168404161930084, "learning_rate": 9.999920900681015e-07, "loss": 0.0207, "step": 96270 }, { "epoch": 1.028687429884075, "grad_norm": 0.20063923299312592, "learning_rate": 9.999919952827185e-07, "loss": 0.1039, "step": 96280 }, { "epoch": 1.0287942731983546, "grad_norm": 3.1851158142089844, "learning_rate": 9.999918999328066e-07, "loss": 0.0333, "step": 96290 }, { "epoch": 1.0289011165126343, "grad_norm": 0.18483009934425354, "learning_rate": 9.99991804018366e-07, "loss": 0.0591, "step": 96300 }, { "epoch": 1.0290079598269137, "grad_norm": 0.017040535807609558, "learning_rate": 9.999917075393972e-07, "loss": 0.0196, "step": 96310 }, { "epoch": 1.0291148031411934, "grad_norm": 0.10369570553302765, "learning_rate": 9.999916104959e-07, "loss": 0.0094, "step": 96320 }, { "epoch": 1.029221646455473, "grad_norm": 5.092176914215088, "learning_rate": 9.999915128878742e-07, "loss": 0.0332, "step": 96330 }, { "epoch": 1.0293284897697526, "grad_norm": 9.805935859680176, "learning_rate": 9.999914147153205e-07, "loss": 0.0266, "step": 96340 }, { "epoch": 1.0294353330840322, "grad_norm": 4.6630940437316895, "learning_rate": 9.999913159782387e-07, "loss": 0.0454, "step": 96350 }, { "epoch": 1.029542176398312, "grad_norm": 2.048879861831665, "learning_rate": 9.99991216676629e-07, "loss": 0.0294, "step": 96360 }, { "epoch": 1.0296490197125916, "grad_norm": 1.084547996520996, "learning_rate": 9.999911168104914e-07, "loss": 0.0377, "step": 96370 }, { "epoch": 1.029755863026871, "grad_norm": 0.19300252199172974, "learning_rate": 9.999910163798263e-07, "loss": 0.0367, "step": 96380 }, { "epoch": 1.0298627063411507, "grad_norm": 5.158754825592041, "learning_rate": 9.999909153846334e-07, "loss": 0.0271, "step": 96390 }, { "epoch": 1.0299695496554304, "grad_norm": 4.632421970367432, "learning_rate": 9.99990813824913e-07, "loss": 0.0314, "step": 96400 }, { "epoch": 1.0300763929697099, "grad_norm": 0.21251823008060455, "learning_rate": 9.999907117006655e-07, "loss": 0.0326, "step": 96410 }, { "epoch": 1.0301832362839896, "grad_norm": 0.2874782979488373, "learning_rate": 9.999906090118905e-07, "loss": 0.0241, "step": 96420 }, { "epoch": 1.0302900795982692, "grad_norm": 0.3373970687389374, "learning_rate": 9.999905057585884e-07, "loss": 0.0805, "step": 96430 }, { "epoch": 1.0303969229125487, "grad_norm": 0.20079255104064941, "learning_rate": 9.999904019407594e-07, "loss": 0.0354, "step": 96440 }, { "epoch": 1.0305037662268284, "grad_norm": 3.4077329635620117, "learning_rate": 9.999902975584033e-07, "loss": 0.026, "step": 96450 }, { "epoch": 1.030610609541108, "grad_norm": 5.0603203773498535, "learning_rate": 9.999901926115207e-07, "loss": 0.0925, "step": 96460 }, { "epoch": 1.0307174528553875, "grad_norm": 1.2480310201644897, "learning_rate": 9.999900871001111e-07, "loss": 0.0279, "step": 96470 }, { "epoch": 1.0308242961696672, "grad_norm": 8.315337181091309, "learning_rate": 9.99989981024175e-07, "loss": 0.0394, "step": 96480 }, { "epoch": 1.0309311394839469, "grad_norm": 5.347864627838135, "learning_rate": 9.999898743837128e-07, "loss": 0.0754, "step": 96490 }, { "epoch": 1.0310379827982263, "grad_norm": 2.0298948287963867, "learning_rate": 9.999897671787242e-07, "loss": 0.0409, "step": 96500 }, { "epoch": 1.031144826112506, "grad_norm": 2.285783529281616, "learning_rate": 9.999896594092093e-07, "loss": 0.1364, "step": 96510 }, { "epoch": 1.0312516694267857, "grad_norm": 12.006327629089355, "learning_rate": 9.999895510751683e-07, "loss": 0.0635, "step": 96520 }, { "epoch": 1.0313585127410652, "grad_norm": 12.39975643157959, "learning_rate": 9.999894421766015e-07, "loss": 0.0927, "step": 96530 }, { "epoch": 1.0314653560553448, "grad_norm": 2.565493106842041, "learning_rate": 9.999893327135088e-07, "loss": 0.0852, "step": 96540 }, { "epoch": 1.0315721993696245, "grad_norm": 0.27726632356643677, "learning_rate": 9.999892226858905e-07, "loss": 0.0375, "step": 96550 }, { "epoch": 1.031679042683904, "grad_norm": 3.390308380126953, "learning_rate": 9.999891120937463e-07, "loss": 0.0197, "step": 96560 }, { "epoch": 1.0317858859981837, "grad_norm": 4.334097385406494, "learning_rate": 9.999890009370769e-07, "loss": 0.0065, "step": 96570 }, { "epoch": 1.0318927293124633, "grad_norm": 1.3335933685302734, "learning_rate": 9.999888892158822e-07, "loss": 0.0519, "step": 96580 }, { "epoch": 1.0319995726267428, "grad_norm": 27.951486587524414, "learning_rate": 9.999887769301624e-07, "loss": 0.0348, "step": 96590 }, { "epoch": 1.0321064159410225, "grad_norm": 10.685575485229492, "learning_rate": 9.999886640799173e-07, "loss": 0.0356, "step": 96600 }, { "epoch": 1.0322132592553022, "grad_norm": 3.248878240585327, "learning_rate": 9.999885506651474e-07, "loss": 0.0731, "step": 96610 }, { "epoch": 1.0323201025695816, "grad_norm": 0.04607405140995979, "learning_rate": 9.999884366858527e-07, "loss": 0.0293, "step": 96620 }, { "epoch": 1.0324269458838613, "grad_norm": 3.461592674255371, "learning_rate": 9.999883221420334e-07, "loss": 0.0245, "step": 96630 }, { "epoch": 1.032533789198141, "grad_norm": 0.07824798673391342, "learning_rate": 9.999882070336893e-07, "loss": 0.0285, "step": 96640 }, { "epoch": 1.0326406325124204, "grad_norm": 2.0610077381134033, "learning_rate": 9.99988091360821e-07, "loss": 0.0444, "step": 96650 }, { "epoch": 1.0327474758267001, "grad_norm": 0.44482460618019104, "learning_rate": 9.999879751234284e-07, "loss": 0.0156, "step": 96660 }, { "epoch": 1.0328543191409798, "grad_norm": 12.252355575561523, "learning_rate": 9.999878583215116e-07, "loss": 0.0472, "step": 96670 }, { "epoch": 1.0329611624552593, "grad_norm": 3.544020891189575, "learning_rate": 9.999877409550707e-07, "loss": 0.0106, "step": 96680 }, { "epoch": 1.033068005769539, "grad_norm": 3.8433403968811035, "learning_rate": 9.99987623024106e-07, "loss": 0.1051, "step": 96690 }, { "epoch": 1.0331748490838186, "grad_norm": 9.936590194702148, "learning_rate": 9.999875045286173e-07, "loss": 0.0337, "step": 96700 }, { "epoch": 1.033281692398098, "grad_norm": 0.6263647079467773, "learning_rate": 9.999873854686051e-07, "loss": 0.0694, "step": 96710 }, { "epoch": 1.0333885357123778, "grad_norm": 0.13151371479034424, "learning_rate": 9.999872658440696e-07, "loss": 0.0342, "step": 96720 }, { "epoch": 1.0334953790266574, "grad_norm": 0.1487567126750946, "learning_rate": 9.999871456550106e-07, "loss": 0.0342, "step": 96730 }, { "epoch": 1.033602222340937, "grad_norm": 3.2807395458221436, "learning_rate": 9.999870249014285e-07, "loss": 0.0252, "step": 96740 }, { "epoch": 1.0337090656552166, "grad_norm": 9.21173095703125, "learning_rate": 9.99986903583323e-07, "loss": 0.0526, "step": 96750 }, { "epoch": 1.0338159089694963, "grad_norm": 0.09508940577507019, "learning_rate": 9.999867817006948e-07, "loss": 0.0264, "step": 96760 }, { "epoch": 1.033922752283776, "grad_norm": 0.30185166001319885, "learning_rate": 9.999866592535438e-07, "loss": 0.0221, "step": 96770 }, { "epoch": 1.0340295955980554, "grad_norm": 3.0179603099823, "learning_rate": 9.9998653624187e-07, "loss": 0.0552, "step": 96780 }, { "epoch": 1.034136438912335, "grad_norm": 2.4937386512756348, "learning_rate": 9.999864126656737e-07, "loss": 0.0535, "step": 96790 }, { "epoch": 1.0342432822266148, "grad_norm": 3.9401705265045166, "learning_rate": 9.99986288524955e-07, "loss": 0.0294, "step": 96800 }, { "epoch": 1.0343501255408942, "grad_norm": 3.8220889568328857, "learning_rate": 9.999861638197143e-07, "loss": 0.0292, "step": 96810 }, { "epoch": 1.034456968855174, "grad_norm": 0.035415347665548325, "learning_rate": 9.99986038549951e-07, "loss": 0.0437, "step": 96820 }, { "epoch": 1.0345638121694536, "grad_norm": 0.13544325530529022, "learning_rate": 9.99985912715666e-07, "loss": 0.0221, "step": 96830 }, { "epoch": 1.034670655483733, "grad_norm": 0.05139007791876793, "learning_rate": 9.999857863168592e-07, "loss": 0.0432, "step": 96840 }, { "epoch": 1.0347774987980127, "grad_norm": 9.064168930053711, "learning_rate": 9.999856593535307e-07, "loss": 0.0537, "step": 96850 }, { "epoch": 1.0348843421122924, "grad_norm": 0.39065995812416077, "learning_rate": 9.999855318256806e-07, "loss": 0.0474, "step": 96860 }, { "epoch": 1.0349911854265719, "grad_norm": 7.8559794425964355, "learning_rate": 9.99985403733309e-07, "loss": 0.0748, "step": 96870 }, { "epoch": 1.0350980287408516, "grad_norm": 5.597771644592285, "learning_rate": 9.999852750764164e-07, "loss": 0.0699, "step": 96880 }, { "epoch": 1.0352048720551312, "grad_norm": 0.12062425166368484, "learning_rate": 9.999851458550026e-07, "loss": 0.0415, "step": 96890 }, { "epoch": 1.0353117153694107, "grad_norm": 0.6212274432182312, "learning_rate": 9.999850160690677e-07, "loss": 0.0336, "step": 96900 }, { "epoch": 1.0354185586836904, "grad_norm": 0.08100558072328568, "learning_rate": 9.999848857186122e-07, "loss": 0.0674, "step": 96910 }, { "epoch": 1.03552540199797, "grad_norm": 5.3236823081970215, "learning_rate": 9.999847548036358e-07, "loss": 0.039, "step": 96920 }, { "epoch": 1.0356322453122495, "grad_norm": 4.486862659454346, "learning_rate": 9.99984623324139e-07, "loss": 0.0869, "step": 96930 }, { "epoch": 1.0357390886265292, "grad_norm": 0.06037129834294319, "learning_rate": 9.999844912801218e-07, "loss": 0.0751, "step": 96940 }, { "epoch": 1.0358459319408089, "grad_norm": 1.9317753314971924, "learning_rate": 9.999843586715844e-07, "loss": 0.006, "step": 96950 }, { "epoch": 1.0359527752550883, "grad_norm": 6.732677459716797, "learning_rate": 9.99984225498527e-07, "loss": 0.0481, "step": 96960 }, { "epoch": 1.036059618569368, "grad_norm": 11.098487854003906, "learning_rate": 9.999840917609494e-07, "loss": 0.0781, "step": 96970 }, { "epoch": 1.0361664618836477, "grad_norm": 1.942718744277954, "learning_rate": 9.999839574588522e-07, "loss": 0.0198, "step": 96980 }, { "epoch": 1.0362733051979272, "grad_norm": 0.6365543603897095, "learning_rate": 9.999838225922354e-07, "loss": 0.0098, "step": 96990 }, { "epoch": 1.0363801485122068, "grad_norm": 2.5954601764678955, "learning_rate": 9.99983687161099e-07, "loss": 0.0322, "step": 97000 }, { "epoch": 1.0364869918264865, "grad_norm": 0.07539460062980652, "learning_rate": 9.999835511654434e-07, "loss": 0.0116, "step": 97010 }, { "epoch": 1.036593835140766, "grad_norm": 0.39441874623298645, "learning_rate": 9.999834146052687e-07, "loss": 0.0213, "step": 97020 }, { "epoch": 1.0367006784550457, "grad_norm": 0.04437887668609619, "learning_rate": 9.99983277480575e-07, "loss": 0.0193, "step": 97030 }, { "epoch": 1.0368075217693253, "grad_norm": 1.835187315940857, "learning_rate": 9.999831397913622e-07, "loss": 0.026, "step": 97040 }, { "epoch": 1.0369143650836048, "grad_norm": 0.39485567808151245, "learning_rate": 9.999830015376309e-07, "loss": 0.0135, "step": 97050 }, { "epoch": 1.0370212083978845, "grad_norm": 1.6851723194122314, "learning_rate": 9.99982862719381e-07, "loss": 0.0181, "step": 97060 }, { "epoch": 1.0371280517121642, "grad_norm": 8.555249214172363, "learning_rate": 9.999827233366127e-07, "loss": 0.0558, "step": 97070 }, { "epoch": 1.0372348950264436, "grad_norm": 3.0216450691223145, "learning_rate": 9.999825833893262e-07, "loss": 0.0415, "step": 97080 }, { "epoch": 1.0373417383407233, "grad_norm": 3.6291587352752686, "learning_rate": 9.999824428775217e-07, "loss": 0.0468, "step": 97090 }, { "epoch": 1.037448581655003, "grad_norm": 0.8887105584144592, "learning_rate": 9.999823018011992e-07, "loss": 0.0139, "step": 97100 }, { "epoch": 1.0375554249692827, "grad_norm": 0.19929270446300507, "learning_rate": 9.999821601603588e-07, "loss": 0.0324, "step": 97110 }, { "epoch": 1.0376622682835621, "grad_norm": 4.076462745666504, "learning_rate": 9.999820179550011e-07, "loss": 0.0547, "step": 97120 }, { "epoch": 1.0377691115978418, "grad_norm": 4.095235824584961, "learning_rate": 9.999818751851258e-07, "loss": 0.0593, "step": 97130 }, { "epoch": 1.0378759549121215, "grad_norm": 4.6876912117004395, "learning_rate": 9.999817318507333e-07, "loss": 0.0358, "step": 97140 }, { "epoch": 1.037982798226401, "grad_norm": 0.38557544350624084, "learning_rate": 9.999815879518236e-07, "loss": 0.0215, "step": 97150 }, { "epoch": 1.0380896415406806, "grad_norm": 6.243972301483154, "learning_rate": 9.99981443488397e-07, "loss": 0.0624, "step": 97160 }, { "epoch": 1.0381964848549603, "grad_norm": 0.02212287113070488, "learning_rate": 9.999812984604536e-07, "loss": 0.0164, "step": 97170 }, { "epoch": 1.0383033281692398, "grad_norm": 2.0152413845062256, "learning_rate": 9.999811528679936e-07, "loss": 0.0758, "step": 97180 }, { "epoch": 1.0384101714835194, "grad_norm": 6.962400913238525, "learning_rate": 9.999810067110171e-07, "loss": 0.0274, "step": 97190 }, { "epoch": 1.0385170147977991, "grad_norm": 0.5122260451316833, "learning_rate": 9.999808599895243e-07, "loss": 0.0439, "step": 97200 }, { "epoch": 1.0386238581120786, "grad_norm": 0.38726699352264404, "learning_rate": 9.999807127035154e-07, "loss": 0.0399, "step": 97210 }, { "epoch": 1.0387307014263583, "grad_norm": 1.1369404792785645, "learning_rate": 9.999805648529907e-07, "loss": 0.0206, "step": 97220 }, { "epoch": 1.038837544740638, "grad_norm": 0.22065091133117676, "learning_rate": 9.9998041643795e-07, "loss": 0.0422, "step": 97230 }, { "epoch": 1.0389443880549174, "grad_norm": 11.59093952178955, "learning_rate": 9.999802674583937e-07, "loss": 0.0504, "step": 97240 }, { "epoch": 1.039051231369197, "grad_norm": 4.210629463195801, "learning_rate": 9.999801179143218e-07, "loss": 0.0312, "step": 97250 }, { "epoch": 1.0391580746834768, "grad_norm": 0.010030408389866352, "learning_rate": 9.999799678057348e-07, "loss": 0.0107, "step": 97260 }, { "epoch": 1.0392649179977562, "grad_norm": 9.835067749023438, "learning_rate": 9.999798171326328e-07, "loss": 0.0313, "step": 97270 }, { "epoch": 1.039371761312036, "grad_norm": 0.5625452399253845, "learning_rate": 9.999796658950157e-07, "loss": 0.085, "step": 97280 }, { "epoch": 1.0394786046263156, "grad_norm": 4.207020282745361, "learning_rate": 9.99979514092884e-07, "loss": 0.0186, "step": 97290 }, { "epoch": 1.039585447940595, "grad_norm": 0.20033416152000427, "learning_rate": 9.999793617262373e-07, "loss": 0.0501, "step": 97300 }, { "epoch": 1.0396922912548747, "grad_norm": 7.733791828155518, "learning_rate": 9.999792087950765e-07, "loss": 0.0307, "step": 97310 }, { "epoch": 1.0397991345691544, "grad_norm": 9.14358139038086, "learning_rate": 9.999790552994013e-07, "loss": 0.0621, "step": 97320 }, { "epoch": 1.0399059778834339, "grad_norm": 7.129218101501465, "learning_rate": 9.999789012392122e-07, "loss": 0.0349, "step": 97330 }, { "epoch": 1.0400128211977135, "grad_norm": 1.7904503345489502, "learning_rate": 9.999787466145088e-07, "loss": 0.0207, "step": 97340 }, { "epoch": 1.0401196645119932, "grad_norm": 5.130684852600098, "learning_rate": 9.99978591425292e-07, "loss": 0.0665, "step": 97350 }, { "epoch": 1.0402265078262727, "grad_norm": 3.0414416790008545, "learning_rate": 9.999784356715616e-07, "loss": 0.0066, "step": 97360 }, { "epoch": 1.0403333511405524, "grad_norm": 3.401588201522827, "learning_rate": 9.999782793533176e-07, "loss": 0.0645, "step": 97370 }, { "epoch": 1.040440194454832, "grad_norm": 1.63418710231781, "learning_rate": 9.999781224705605e-07, "loss": 0.0316, "step": 97380 }, { "epoch": 1.0405470377691115, "grad_norm": 13.123292922973633, "learning_rate": 9.999779650232904e-07, "loss": 0.0445, "step": 97390 }, { "epoch": 1.0406538810833912, "grad_norm": 0.3286401331424713, "learning_rate": 9.999778070115074e-07, "loss": 0.0439, "step": 97400 }, { "epoch": 1.0407607243976709, "grad_norm": 0.04220961034297943, "learning_rate": 9.999776484352119e-07, "loss": 0.0306, "step": 97410 }, { "epoch": 1.0408675677119503, "grad_norm": 0.7091933488845825, "learning_rate": 9.999774892944038e-07, "loss": 0.0415, "step": 97420 }, { "epoch": 1.04097441102623, "grad_norm": 5.079284191131592, "learning_rate": 9.999773295890832e-07, "loss": 0.0332, "step": 97430 }, { "epoch": 1.0410812543405097, "grad_norm": 0.026632819324731827, "learning_rate": 9.999771693192508e-07, "loss": 0.0581, "step": 97440 }, { "epoch": 1.0411880976547891, "grad_norm": 1.219567060470581, "learning_rate": 9.99977008484906e-07, "loss": 0.016, "step": 97450 }, { "epoch": 1.0412949409690688, "grad_norm": 1.769179344177246, "learning_rate": 9.999768470860496e-07, "loss": 0.0479, "step": 97460 }, { "epoch": 1.0414017842833485, "grad_norm": 0.033319104462862015, "learning_rate": 9.99976685122682e-07, "loss": 0.0033, "step": 97470 }, { "epoch": 1.041508627597628, "grad_norm": 1.679168939590454, "learning_rate": 9.999765225948025e-07, "loss": 0.061, "step": 97480 }, { "epoch": 1.0416154709119076, "grad_norm": 5.62316370010376, "learning_rate": 9.99976359502412e-07, "loss": 0.0416, "step": 97490 }, { "epoch": 1.0417223142261873, "grad_norm": 0.19611097872257233, "learning_rate": 9.9997619584551e-07, "loss": 0.0145, "step": 97500 }, { "epoch": 1.041829157540467, "grad_norm": 1.0585776567459106, "learning_rate": 9.999760316240976e-07, "loss": 0.0377, "step": 97510 }, { "epoch": 1.0419360008547465, "grad_norm": 5.200268745422363, "learning_rate": 9.999758668381745e-07, "loss": 0.0141, "step": 97520 }, { "epoch": 1.0420428441690262, "grad_norm": 2.783916711807251, "learning_rate": 9.99975701487741e-07, "loss": 0.0271, "step": 97530 }, { "epoch": 1.0421496874833058, "grad_norm": 0.3650868535041809, "learning_rate": 9.99975535572797e-07, "loss": 0.0279, "step": 97540 }, { "epoch": 1.0422565307975853, "grad_norm": 0.9151373505592346, "learning_rate": 9.99975369093343e-07, "loss": 0.0169, "step": 97550 }, { "epoch": 1.042363374111865, "grad_norm": 3.6102495193481445, "learning_rate": 9.99975202049379e-07, "loss": 0.0277, "step": 97560 }, { "epoch": 1.0424702174261447, "grad_norm": 0.0639798641204834, "learning_rate": 9.999750344409052e-07, "loss": 0.0439, "step": 97570 }, { "epoch": 1.042577060740424, "grad_norm": 5.262120246887207, "learning_rate": 9.999748662679219e-07, "loss": 0.0585, "step": 97580 }, { "epoch": 1.0426839040547038, "grad_norm": 1.0803568363189697, "learning_rate": 9.999746975304294e-07, "loss": 0.0329, "step": 97590 }, { "epoch": 1.0427907473689835, "grad_norm": 3.7789876461029053, "learning_rate": 9.999745282284275e-07, "loss": 0.0243, "step": 97600 }, { "epoch": 1.042897590683263, "grad_norm": 1.8492591381072998, "learning_rate": 9.999743583619167e-07, "loss": 0.0224, "step": 97610 }, { "epoch": 1.0430044339975426, "grad_norm": 5.352372169494629, "learning_rate": 9.99974187930897e-07, "loss": 0.0307, "step": 97620 }, { "epoch": 1.0431112773118223, "grad_norm": 0.0920352041721344, "learning_rate": 9.999740169353689e-07, "loss": 0.0341, "step": 97630 }, { "epoch": 1.0432181206261018, "grad_norm": 0.20118087530136108, "learning_rate": 9.999738453753324e-07, "loss": 0.0728, "step": 97640 }, { "epoch": 1.0433249639403814, "grad_norm": 0.14054997265338898, "learning_rate": 9.999736732507877e-07, "loss": 0.0365, "step": 97650 }, { "epoch": 1.0434318072546611, "grad_norm": 0.014788459055125713, "learning_rate": 9.999735005617349e-07, "loss": 0.0861, "step": 97660 }, { "epoch": 1.0435386505689406, "grad_norm": 0.17573189735412598, "learning_rate": 9.999733273081744e-07, "loss": 0.0176, "step": 97670 }, { "epoch": 1.0436454938832203, "grad_norm": 4.867619514465332, "learning_rate": 9.99973153490106e-07, "loss": 0.0731, "step": 97680 }, { "epoch": 1.0437523371975, "grad_norm": 2.4280970096588135, "learning_rate": 9.999729791075305e-07, "loss": 0.0339, "step": 97690 }, { "epoch": 1.0438591805117794, "grad_norm": 2.0534398555755615, "learning_rate": 9.999728041604475e-07, "loss": 0.0428, "step": 97700 }, { "epoch": 1.043966023826059, "grad_norm": 0.4344896972179413, "learning_rate": 9.999726286488577e-07, "loss": 0.1609, "step": 97710 }, { "epoch": 1.0440728671403388, "grad_norm": 0.026558393612504005, "learning_rate": 9.99972452572761e-07, "loss": 0.0456, "step": 97720 }, { "epoch": 1.0441797104546182, "grad_norm": 5.593576431274414, "learning_rate": 9.999722759321575e-07, "loss": 0.0446, "step": 97730 }, { "epoch": 1.044286553768898, "grad_norm": 4.025158405303955, "learning_rate": 9.999720987270477e-07, "loss": 0.0542, "step": 97740 }, { "epoch": 1.0443933970831776, "grad_norm": 5.111072540283203, "learning_rate": 9.999719209574318e-07, "loss": 0.0165, "step": 97750 }, { "epoch": 1.044500240397457, "grad_norm": 3.354691982269287, "learning_rate": 9.999717426233097e-07, "loss": 0.0722, "step": 97760 }, { "epoch": 1.0446070837117367, "grad_norm": 0.02284679003059864, "learning_rate": 9.999715637246818e-07, "loss": 0.0188, "step": 97770 }, { "epoch": 1.0447139270260164, "grad_norm": 5.732694149017334, "learning_rate": 9.999713842615482e-07, "loss": 0.0153, "step": 97780 }, { "epoch": 1.0448207703402959, "grad_norm": 0.18708810210227966, "learning_rate": 9.999712042339093e-07, "loss": 0.0845, "step": 97790 }, { "epoch": 1.0449276136545755, "grad_norm": 0.01157594658434391, "learning_rate": 9.99971023641765e-07, "loss": 0.0168, "step": 97800 }, { "epoch": 1.0450344569688552, "grad_norm": 6.055061340332031, "learning_rate": 9.99970842485116e-07, "loss": 0.0302, "step": 97810 }, { "epoch": 1.0451413002831347, "grad_norm": 0.9814098477363586, "learning_rate": 9.99970660763962e-07, "loss": 0.044, "step": 97820 }, { "epoch": 1.0452481435974144, "grad_norm": 5.018909454345703, "learning_rate": 9.999704784783032e-07, "loss": 0.0497, "step": 97830 }, { "epoch": 1.045354986911694, "grad_norm": 6.727485179901123, "learning_rate": 9.999702956281402e-07, "loss": 0.0282, "step": 97840 }, { "epoch": 1.0454618302259737, "grad_norm": 0.04721995070576668, "learning_rate": 9.99970112213473e-07, "loss": 0.026, "step": 97850 }, { "epoch": 1.0455686735402532, "grad_norm": 5.6210246086120605, "learning_rate": 9.999699282343017e-07, "loss": 0.0637, "step": 97860 }, { "epoch": 1.0456755168545329, "grad_norm": 3.198145627975464, "learning_rate": 9.999697436906268e-07, "loss": 0.1278, "step": 97870 }, { "epoch": 1.0457823601688125, "grad_norm": 2.7163329124450684, "learning_rate": 9.999695585824483e-07, "loss": 0.0383, "step": 97880 }, { "epoch": 1.045889203483092, "grad_norm": 15.291441917419434, "learning_rate": 9.999693729097661e-07, "loss": 0.0209, "step": 97890 }, { "epoch": 1.0459960467973717, "grad_norm": 7.985711574554443, "learning_rate": 9.999691866725812e-07, "loss": 0.0274, "step": 97900 }, { "epoch": 1.0461028901116514, "grad_norm": 1.215410590171814, "learning_rate": 9.99968999870893e-07, "loss": 0.0161, "step": 97910 }, { "epoch": 1.0462097334259308, "grad_norm": 4.533626556396484, "learning_rate": 9.999688125047024e-07, "loss": 0.0812, "step": 97920 }, { "epoch": 1.0463165767402105, "grad_norm": 3.6154673099517822, "learning_rate": 9.99968624574009e-07, "loss": 0.0291, "step": 97930 }, { "epoch": 1.0464234200544902, "grad_norm": 2.5822715759277344, "learning_rate": 9.999684360788135e-07, "loss": 0.0098, "step": 97940 }, { "epoch": 1.0465302633687696, "grad_norm": 1.1464622020721436, "learning_rate": 9.999682470191157e-07, "loss": 0.0368, "step": 97950 }, { "epoch": 1.0466371066830493, "grad_norm": 3.247551202774048, "learning_rate": 9.99968057394916e-07, "loss": 0.022, "step": 97960 }, { "epoch": 1.046743949997329, "grad_norm": 7.772812843322754, "learning_rate": 9.999678672062148e-07, "loss": 0.0799, "step": 97970 }, { "epoch": 1.0468507933116085, "grad_norm": 11.429588317871094, "learning_rate": 9.99967676453012e-07, "loss": 0.0149, "step": 97980 }, { "epoch": 1.0469576366258881, "grad_norm": 4.28021764755249, "learning_rate": 9.99967485135308e-07, "loss": 0.0347, "step": 97990 }, { "epoch": 1.0470644799401678, "grad_norm": 1.087045431137085, "learning_rate": 9.999672932531032e-07, "loss": 0.045, "step": 98000 }, { "epoch": 1.0471713232544473, "grad_norm": 4.385335922241211, "learning_rate": 9.999671008063974e-07, "loss": 0.018, "step": 98010 }, { "epoch": 1.047278166568727, "grad_norm": 0.10405819863080978, "learning_rate": 9.999669077951908e-07, "loss": 0.0247, "step": 98020 }, { "epoch": 1.0473850098830066, "grad_norm": 6.342333793640137, "learning_rate": 9.999667142194841e-07, "loss": 0.0333, "step": 98030 }, { "epoch": 1.047491853197286, "grad_norm": 0.09145044535398483, "learning_rate": 9.999665200792773e-07, "loss": 0.0955, "step": 98040 }, { "epoch": 1.0475986965115658, "grad_norm": 3.233659505844116, "learning_rate": 9.999663253745706e-07, "loss": 0.0344, "step": 98050 }, { "epoch": 1.0477055398258455, "grad_norm": 0.009992273524403572, "learning_rate": 9.999661301053638e-07, "loss": 0.0169, "step": 98060 }, { "epoch": 1.047812383140125, "grad_norm": 3.470479726791382, "learning_rate": 9.999659342716577e-07, "loss": 0.0245, "step": 98070 }, { "epoch": 1.0479192264544046, "grad_norm": 12.880797386169434, "learning_rate": 9.999657378734526e-07, "loss": 0.0702, "step": 98080 }, { "epoch": 1.0480260697686843, "grad_norm": 2.7692947387695312, "learning_rate": 9.99965540910748e-07, "loss": 0.0206, "step": 98090 }, { "epoch": 1.0481329130829637, "grad_norm": 6.514156341552734, "learning_rate": 9.99965343383545e-07, "loss": 0.0433, "step": 98100 }, { "epoch": 1.0482397563972434, "grad_norm": 2.071511745452881, "learning_rate": 9.99965145291843e-07, "loss": 0.0122, "step": 98110 }, { "epoch": 1.048346599711523, "grad_norm": 2.120651960372925, "learning_rate": 9.999649466356428e-07, "loss": 0.0727, "step": 98120 }, { "epoch": 1.0484534430258026, "grad_norm": 0.054508231580257416, "learning_rate": 9.999647474149444e-07, "loss": 0.0195, "step": 98130 }, { "epoch": 1.0485602863400822, "grad_norm": 4.187028408050537, "learning_rate": 9.99964547629748e-07, "loss": 0.0293, "step": 98140 }, { "epoch": 1.048667129654362, "grad_norm": 0.04343098774552345, "learning_rate": 9.99964347280054e-07, "loss": 0.0426, "step": 98150 }, { "epoch": 1.0487739729686414, "grad_norm": 5.182177543640137, "learning_rate": 9.999641463658627e-07, "loss": 0.0141, "step": 98160 }, { "epoch": 1.048880816282921, "grad_norm": 0.7286809086799622, "learning_rate": 9.999639448871739e-07, "loss": 0.0569, "step": 98170 }, { "epoch": 1.0489876595972008, "grad_norm": 0.163268581032753, "learning_rate": 9.99963742843988e-07, "loss": 0.0111, "step": 98180 }, { "epoch": 1.0490945029114802, "grad_norm": 0.039794646203517914, "learning_rate": 9.999635402363055e-07, "loss": 0.0428, "step": 98190 }, { "epoch": 1.04920134622576, "grad_norm": 2.8438615798950195, "learning_rate": 9.999633370641264e-07, "loss": 0.0623, "step": 98200 }, { "epoch": 1.0493081895400396, "grad_norm": 3.561168909072876, "learning_rate": 9.999631333274508e-07, "loss": 0.0331, "step": 98210 }, { "epoch": 1.049415032854319, "grad_norm": 2.8199167251586914, "learning_rate": 9.999629290262792e-07, "loss": 0.0277, "step": 98220 }, { "epoch": 1.0495218761685987, "grad_norm": 5.022159576416016, "learning_rate": 9.999627241606117e-07, "loss": 0.0468, "step": 98230 }, { "epoch": 1.0496287194828784, "grad_norm": 5.0428643226623535, "learning_rate": 9.999625187304485e-07, "loss": 0.0585, "step": 98240 }, { "epoch": 1.049735562797158, "grad_norm": 3.9489288330078125, "learning_rate": 9.9996231273579e-07, "loss": 0.0702, "step": 98250 }, { "epoch": 1.0498424061114375, "grad_norm": 5.5366973876953125, "learning_rate": 9.999621061766364e-07, "loss": 0.039, "step": 98260 }, { "epoch": 1.0499492494257172, "grad_norm": 0.2272413671016693, "learning_rate": 9.999618990529878e-07, "loss": 0.0586, "step": 98270 }, { "epoch": 1.050056092739997, "grad_norm": 2.650411605834961, "learning_rate": 9.999616913648444e-07, "loss": 0.0624, "step": 98280 }, { "epoch": 1.0501629360542764, "grad_norm": 6.993862628936768, "learning_rate": 9.999614831122063e-07, "loss": 0.0243, "step": 98290 }, { "epoch": 1.050269779368556, "grad_norm": 0.3006976246833801, "learning_rate": 9.999612742950742e-07, "loss": 0.1062, "step": 98300 }, { "epoch": 1.0503766226828357, "grad_norm": 0.36432120203971863, "learning_rate": 9.99961064913448e-07, "loss": 0.0612, "step": 98310 }, { "epoch": 1.0504834659971152, "grad_norm": 0.014924006536602974, "learning_rate": 9.999608549673282e-07, "loss": 0.0569, "step": 98320 }, { "epoch": 1.0505903093113949, "grad_norm": 1.4431768655776978, "learning_rate": 9.999606444567148e-07, "loss": 0.0709, "step": 98330 }, { "epoch": 1.0506971526256745, "grad_norm": 7.918305397033691, "learning_rate": 9.99960433381608e-07, "loss": 0.0434, "step": 98340 }, { "epoch": 1.050803995939954, "grad_norm": 11.121996879577637, "learning_rate": 9.99960221742008e-07, "loss": 0.0419, "step": 98350 }, { "epoch": 1.0509108392542337, "grad_norm": 0.13438864052295685, "learning_rate": 9.999600095379155e-07, "loss": 0.021, "step": 98360 }, { "epoch": 1.0510176825685134, "grad_norm": 0.02527203969657421, "learning_rate": 9.999597967693302e-07, "loss": 0.0222, "step": 98370 }, { "epoch": 1.0511245258827928, "grad_norm": 0.2376689612865448, "learning_rate": 9.999595834362527e-07, "loss": 0.0673, "step": 98380 }, { "epoch": 1.0512313691970725, "grad_norm": 0.024456847459077835, "learning_rate": 9.99959369538683e-07, "loss": 0.0382, "step": 98390 }, { "epoch": 1.0513382125113522, "grad_norm": 0.5030835270881653, "learning_rate": 9.999591550766212e-07, "loss": 0.0488, "step": 98400 }, { "epoch": 1.0514450558256316, "grad_norm": 6.472896575927734, "learning_rate": 9.999589400500682e-07, "loss": 0.0252, "step": 98410 }, { "epoch": 1.0515518991399113, "grad_norm": 1.8535997867584229, "learning_rate": 9.999587244590238e-07, "loss": 0.0458, "step": 98420 }, { "epoch": 1.051658742454191, "grad_norm": 0.7705284953117371, "learning_rate": 9.99958508303488e-07, "loss": 0.0249, "step": 98430 }, { "epoch": 1.0517655857684705, "grad_norm": 7.623313903808594, "learning_rate": 9.999582915834615e-07, "loss": 0.0225, "step": 98440 }, { "epoch": 1.0518724290827501, "grad_norm": 0.3461369276046753, "learning_rate": 9.99958074298944e-07, "loss": 0.022, "step": 98450 }, { "epoch": 1.0519792723970298, "grad_norm": 0.35262805223464966, "learning_rate": 9.999578564499367e-07, "loss": 0.0388, "step": 98460 }, { "epoch": 1.0520861157113093, "grad_norm": 4.45548677444458, "learning_rate": 9.999576380364387e-07, "loss": 0.0323, "step": 98470 }, { "epoch": 1.052192959025589, "grad_norm": 0.45991402864456177, "learning_rate": 9.999574190584512e-07, "loss": 0.0168, "step": 98480 }, { "epoch": 1.0522998023398686, "grad_norm": 0.04161904379725456, "learning_rate": 9.99957199515974e-07, "loss": 0.0332, "step": 98490 }, { "epoch": 1.052406645654148, "grad_norm": 0.005619551986455917, "learning_rate": 9.99956979409007e-07, "loss": 0.0237, "step": 98500 }, { "epoch": 1.0525134889684278, "grad_norm": 0.3214941918849945, "learning_rate": 9.999567587375512e-07, "loss": 0.0277, "step": 98510 }, { "epoch": 1.0526203322827075, "grad_norm": 9.417194366455078, "learning_rate": 9.999565375016063e-07, "loss": 0.0409, "step": 98520 }, { "epoch": 1.052727175596987, "grad_norm": 0.8699179887771606, "learning_rate": 9.999563157011728e-07, "loss": 0.0397, "step": 98530 }, { "epoch": 1.0528340189112666, "grad_norm": 0.13346414268016815, "learning_rate": 9.99956093336251e-07, "loss": 0.061, "step": 98540 }, { "epoch": 1.0529408622255463, "grad_norm": 1.3425980806350708, "learning_rate": 9.99955870406841e-07, "loss": 0.021, "step": 98550 }, { "epoch": 1.0530477055398257, "grad_norm": 6.964719772338867, "learning_rate": 9.99955646912943e-07, "loss": 0.0141, "step": 98560 }, { "epoch": 1.0531545488541054, "grad_norm": 3.7297449111938477, "learning_rate": 9.999554228545572e-07, "loss": 0.0432, "step": 98570 }, { "epoch": 1.053261392168385, "grad_norm": 4.662478446960449, "learning_rate": 9.999551982316843e-07, "loss": 0.0497, "step": 98580 }, { "epoch": 1.0533682354826648, "grad_norm": 4.587474346160889, "learning_rate": 9.999549730443242e-07, "loss": 0.0261, "step": 98590 }, { "epoch": 1.0534750787969442, "grad_norm": 10.80277156829834, "learning_rate": 9.99954747292477e-07, "loss": 0.0427, "step": 98600 }, { "epoch": 1.053581922111224, "grad_norm": 0.03949962928891182, "learning_rate": 9.999545209761434e-07, "loss": 0.0083, "step": 98610 }, { "epoch": 1.0536887654255036, "grad_norm": 5.966732978820801, "learning_rate": 9.999542940953231e-07, "loss": 0.0368, "step": 98620 }, { "epoch": 1.053795608739783, "grad_norm": 6.252395153045654, "learning_rate": 9.999540666500169e-07, "loss": 0.022, "step": 98630 }, { "epoch": 1.0539024520540627, "grad_norm": 8.14661693572998, "learning_rate": 9.999538386402248e-07, "loss": 0.0387, "step": 98640 }, { "epoch": 1.0540092953683424, "grad_norm": 1.3047798871994019, "learning_rate": 9.99953610065947e-07, "loss": 0.0182, "step": 98650 }, { "epoch": 1.0541161386826219, "grad_norm": 6.855045795440674, "learning_rate": 9.99953380927184e-07, "loss": 0.0238, "step": 98660 }, { "epoch": 1.0542229819969016, "grad_norm": 1.5970706939697266, "learning_rate": 9.999531512239358e-07, "loss": 0.0316, "step": 98670 }, { "epoch": 1.0543298253111812, "grad_norm": 0.1449664682149887, "learning_rate": 9.999529209562028e-07, "loss": 0.0909, "step": 98680 }, { "epoch": 1.0544366686254607, "grad_norm": 0.4207299053668976, "learning_rate": 9.999526901239852e-07, "loss": 0.0065, "step": 98690 }, { "epoch": 1.0545435119397404, "grad_norm": 4.516275882720947, "learning_rate": 9.999524587272831e-07, "loss": 0.0374, "step": 98700 }, { "epoch": 1.05465035525402, "grad_norm": 3.632758140563965, "learning_rate": 9.999522267660971e-07, "loss": 0.0263, "step": 98710 }, { "epoch": 1.0547571985682995, "grad_norm": 0.13783954083919525, "learning_rate": 9.999519942404272e-07, "loss": 0.0084, "step": 98720 }, { "epoch": 1.0548640418825792, "grad_norm": 3.6608245372772217, "learning_rate": 9.99951761150274e-07, "loss": 0.0106, "step": 98730 }, { "epoch": 1.0549708851968589, "grad_norm": 0.17347104847431183, "learning_rate": 9.999515274956373e-07, "loss": 0.0138, "step": 98740 }, { "epoch": 1.0550777285111383, "grad_norm": 0.09126031398773193, "learning_rate": 9.99951293276518e-07, "loss": 0.0072, "step": 98750 }, { "epoch": 1.055184571825418, "grad_norm": 11.287423133850098, "learning_rate": 9.999510584929154e-07, "loss": 0.0332, "step": 98760 }, { "epoch": 1.0552914151396977, "grad_norm": 4.677258491516113, "learning_rate": 9.999508231448305e-07, "loss": 0.0196, "step": 98770 }, { "epoch": 1.0553982584539772, "grad_norm": 0.018171558156609535, "learning_rate": 9.999505872322636e-07, "loss": 0.0198, "step": 98780 }, { "epoch": 1.0555051017682568, "grad_norm": 4.329774856567383, "learning_rate": 9.999503507552146e-07, "loss": 0.0613, "step": 98790 }, { "epoch": 1.0556119450825365, "grad_norm": 0.13346931338310242, "learning_rate": 9.999501137136839e-07, "loss": 0.045, "step": 98800 }, { "epoch": 1.055718788396816, "grad_norm": 3.9291138648986816, "learning_rate": 9.99949876107672e-07, "loss": 0.0191, "step": 98810 }, { "epoch": 1.0558256317110957, "grad_norm": 10.711149215698242, "learning_rate": 9.999496379371785e-07, "loss": 0.0414, "step": 98820 }, { "epoch": 1.0559324750253753, "grad_norm": 0.18891353905200958, "learning_rate": 9.999493992022047e-07, "loss": 0.0171, "step": 98830 }, { "epoch": 1.0560393183396548, "grad_norm": 3.7250101566314697, "learning_rate": 9.999491599027498e-07, "loss": 0.076, "step": 98840 }, { "epoch": 1.0561461616539345, "grad_norm": 10.938594818115234, "learning_rate": 9.999489200388148e-07, "loss": 0.1025, "step": 98850 }, { "epoch": 1.0562530049682142, "grad_norm": 3.357053756713867, "learning_rate": 9.999486796103997e-07, "loss": 0.0199, "step": 98860 }, { "epoch": 1.0563598482824936, "grad_norm": 2.2810659408569336, "learning_rate": 9.999484386175047e-07, "loss": 0.0754, "step": 98870 }, { "epoch": 1.0564666915967733, "grad_norm": 11.065349578857422, "learning_rate": 9.999481970601304e-07, "loss": 0.0879, "step": 98880 }, { "epoch": 1.056573534911053, "grad_norm": 3.4545814990997314, "learning_rate": 9.999479549382769e-07, "loss": 0.0494, "step": 98890 }, { "epoch": 1.0566803782253325, "grad_norm": 5.961474895477295, "learning_rate": 9.99947712251944e-07, "loss": 0.0611, "step": 98900 }, { "epoch": 1.0567872215396121, "grad_norm": 0.3599105775356293, "learning_rate": 9.999474690011328e-07, "loss": 0.0184, "step": 98910 }, { "epoch": 1.0568940648538918, "grad_norm": 3.679978132247925, "learning_rate": 9.999472251858429e-07, "loss": 0.0561, "step": 98920 }, { "epoch": 1.0570009081681713, "grad_norm": 0.10366759449243546, "learning_rate": 9.99946980806075e-07, "loss": 0.0248, "step": 98930 }, { "epoch": 1.057107751482451, "grad_norm": 0.6841274499893188, "learning_rate": 9.999467358618293e-07, "loss": 0.015, "step": 98940 }, { "epoch": 1.0572145947967306, "grad_norm": 0.03706660494208336, "learning_rate": 9.99946490353106e-07, "loss": 0.0322, "step": 98950 }, { "epoch": 1.05732143811101, "grad_norm": 4.148680686950684, "learning_rate": 9.999462442799052e-07, "loss": 0.0168, "step": 98960 }, { "epoch": 1.0574282814252898, "grad_norm": 0.23941951990127563, "learning_rate": 9.999459976422276e-07, "loss": 0.0098, "step": 98970 }, { "epoch": 1.0575351247395695, "grad_norm": 8.450838088989258, "learning_rate": 9.99945750440073e-07, "loss": 0.096, "step": 98980 }, { "epoch": 1.0576419680538491, "grad_norm": 5.160444259643555, "learning_rate": 9.99945502673442e-07, "loss": 0.0369, "step": 98990 }, { "epoch": 1.0577488113681286, "grad_norm": 7.887688636779785, "learning_rate": 9.999452543423347e-07, "loss": 0.0335, "step": 99000 }, { "epoch": 1.0578556546824083, "grad_norm": 9.538674354553223, "learning_rate": 9.999450054467517e-07, "loss": 0.0587, "step": 99010 }, { "epoch": 1.057962497996688, "grad_norm": 0.1531357318162918, "learning_rate": 9.99944755986693e-07, "loss": 0.018, "step": 99020 }, { "epoch": 1.0580693413109674, "grad_norm": 8.740703582763672, "learning_rate": 9.999445059621588e-07, "loss": 0.1138, "step": 99030 }, { "epoch": 1.058176184625247, "grad_norm": 3.227790117263794, "learning_rate": 9.999442553731496e-07, "loss": 0.0422, "step": 99040 }, { "epoch": 1.0582830279395268, "grad_norm": 3.1997556686401367, "learning_rate": 9.999440042196656e-07, "loss": 0.0604, "step": 99050 }, { "epoch": 1.0583898712538062, "grad_norm": 0.1426675170660019, "learning_rate": 9.99943752501707e-07, "loss": 0.018, "step": 99060 }, { "epoch": 1.058496714568086, "grad_norm": 0.35175633430480957, "learning_rate": 9.999435002192744e-07, "loss": 0.0352, "step": 99070 }, { "epoch": 1.0586035578823656, "grad_norm": 3.6959426403045654, "learning_rate": 9.999432473723678e-07, "loss": 0.0551, "step": 99080 }, { "epoch": 1.058710401196645, "grad_norm": 2.9018630981445312, "learning_rate": 9.999429939609874e-07, "loss": 0.0384, "step": 99090 }, { "epoch": 1.0588172445109247, "grad_norm": 23.522502899169922, "learning_rate": 9.999427399851335e-07, "loss": 0.1014, "step": 99100 }, { "epoch": 1.0589240878252044, "grad_norm": 5.3612494468688965, "learning_rate": 9.999424854448067e-07, "loss": 0.0379, "step": 99110 }, { "epoch": 1.0590309311394839, "grad_norm": 0.5729508996009827, "learning_rate": 9.999422303400071e-07, "loss": 0.0364, "step": 99120 }, { "epoch": 1.0591377744537636, "grad_norm": 0.45211347937583923, "learning_rate": 9.99941974670735e-07, "loss": 0.0237, "step": 99130 }, { "epoch": 1.0592446177680432, "grad_norm": 1.0067471265792847, "learning_rate": 9.999417184369907e-07, "loss": 0.0185, "step": 99140 }, { "epoch": 1.0593514610823227, "grad_norm": 0.03519001975655556, "learning_rate": 9.999414616387745e-07, "loss": 0.0626, "step": 99150 }, { "epoch": 1.0594583043966024, "grad_norm": 16.625579833984375, "learning_rate": 9.999412042760865e-07, "loss": 0.0712, "step": 99160 }, { "epoch": 1.059565147710882, "grad_norm": 5.588411331176758, "learning_rate": 9.999409463489272e-07, "loss": 0.068, "step": 99170 }, { "epoch": 1.0596719910251615, "grad_norm": 15.01649284362793, "learning_rate": 9.99940687857297e-07, "loss": 0.1061, "step": 99180 }, { "epoch": 1.0597788343394412, "grad_norm": 1.3037256002426147, "learning_rate": 9.999404288011958e-07, "loss": 0.0343, "step": 99190 }, { "epoch": 1.0598856776537209, "grad_norm": 4.808833599090576, "learning_rate": 9.999401691806241e-07, "loss": 0.0317, "step": 99200 }, { "epoch": 1.0599925209680003, "grad_norm": 9.053211212158203, "learning_rate": 9.999399089955825e-07, "loss": 0.05, "step": 99210 }, { "epoch": 1.06009936428228, "grad_norm": 5.435070991516113, "learning_rate": 9.999396482460709e-07, "loss": 0.0595, "step": 99220 }, { "epoch": 1.0602062075965597, "grad_norm": 1.5563539266586304, "learning_rate": 9.999393869320895e-07, "loss": 0.0574, "step": 99230 }, { "epoch": 1.0603130509108392, "grad_norm": 2.24165678024292, "learning_rate": 9.99939125053639e-07, "loss": 0.0195, "step": 99240 }, { "epoch": 1.0604198942251188, "grad_norm": 7.105439186096191, "learning_rate": 9.999388626107195e-07, "loss": 0.0255, "step": 99250 }, { "epoch": 1.0605267375393985, "grad_norm": 0.110486701130867, "learning_rate": 9.999385996033313e-07, "loss": 0.0312, "step": 99260 }, { "epoch": 1.060633580853678, "grad_norm": 0.03827539086341858, "learning_rate": 9.999383360314746e-07, "loss": 0.0225, "step": 99270 }, { "epoch": 1.0607404241679577, "grad_norm": 10.197497367858887, "learning_rate": 9.999380718951498e-07, "loss": 0.0416, "step": 99280 }, { "epoch": 1.0608472674822373, "grad_norm": 0.20734097063541412, "learning_rate": 9.999378071943572e-07, "loss": 0.0246, "step": 99290 }, { "epoch": 1.0609541107965168, "grad_norm": 5.056283473968506, "learning_rate": 9.999375419290972e-07, "loss": 0.0541, "step": 99300 }, { "epoch": 1.0610609541107965, "grad_norm": 7.236276626586914, "learning_rate": 9.9993727609937e-07, "loss": 0.0484, "step": 99310 }, { "epoch": 1.0611677974250762, "grad_norm": 0.3292015492916107, "learning_rate": 9.999370097051755e-07, "loss": 0.0215, "step": 99320 }, { "epoch": 1.0612746407393558, "grad_norm": 1.1508784294128418, "learning_rate": 9.999367427465147e-07, "loss": 0.0555, "step": 99330 }, { "epoch": 1.0613814840536353, "grad_norm": 0.7316884994506836, "learning_rate": 9.999364752233876e-07, "loss": 0.0241, "step": 99340 }, { "epoch": 1.061488327367915, "grad_norm": 0.056218087673187256, "learning_rate": 9.999362071357943e-07, "loss": 0.0165, "step": 99350 }, { "epoch": 1.0615951706821947, "grad_norm": 0.04365310072898865, "learning_rate": 9.999359384837355e-07, "loss": 0.0101, "step": 99360 }, { "epoch": 1.0617020139964741, "grad_norm": 0.7056117653846741, "learning_rate": 9.999356692672112e-07, "loss": 0.0643, "step": 99370 }, { "epoch": 1.0618088573107538, "grad_norm": 0.0594211146235466, "learning_rate": 9.999353994862218e-07, "loss": 0.0187, "step": 99380 }, { "epoch": 1.0619157006250335, "grad_norm": 0.3890163004398346, "learning_rate": 9.999351291407676e-07, "loss": 0.0314, "step": 99390 }, { "epoch": 1.062022543939313, "grad_norm": 0.6170394420623779, "learning_rate": 9.999348582308489e-07, "loss": 0.0578, "step": 99400 }, { "epoch": 1.0621293872535926, "grad_norm": 5.13746452331543, "learning_rate": 9.999345867564661e-07, "loss": 0.0345, "step": 99410 }, { "epoch": 1.0622362305678723, "grad_norm": 6.390937805175781, "learning_rate": 9.999343147176191e-07, "loss": 0.0194, "step": 99420 }, { "epoch": 1.0623430738821518, "grad_norm": 2.953252077102661, "learning_rate": 9.99934042114309e-07, "loss": 0.0967, "step": 99430 }, { "epoch": 1.0624499171964314, "grad_norm": 3.656682014465332, "learning_rate": 9.999337689465354e-07, "loss": 0.0603, "step": 99440 }, { "epoch": 1.0625567605107111, "grad_norm": 0.675997793674469, "learning_rate": 9.99933495214299e-07, "loss": 0.0338, "step": 99450 }, { "epoch": 1.0626636038249906, "grad_norm": 0.03284503519535065, "learning_rate": 9.999332209175996e-07, "loss": 0.0493, "step": 99460 }, { "epoch": 1.0627704471392703, "grad_norm": 0.08604323118925095, "learning_rate": 9.99932946056438e-07, "loss": 0.0183, "step": 99470 }, { "epoch": 1.06287729045355, "grad_norm": 8.18803882598877, "learning_rate": 9.999326706308145e-07, "loss": 0.0669, "step": 99480 }, { "epoch": 1.0629841337678294, "grad_norm": 2.203409194946289, "learning_rate": 9.999323946407291e-07, "loss": 0.0077, "step": 99490 }, { "epoch": 1.063090977082109, "grad_norm": 0.5403050184249878, "learning_rate": 9.999321180861824e-07, "loss": 0.093, "step": 99500 }, { "epoch": 1.0631978203963888, "grad_norm": 10.061102867126465, "learning_rate": 9.999318409671747e-07, "loss": 0.0525, "step": 99510 }, { "epoch": 1.0633046637106682, "grad_norm": 43.46767044067383, "learning_rate": 9.999315632837063e-07, "loss": 0.0485, "step": 99520 }, { "epoch": 1.063411507024948, "grad_norm": 0.21932338178157806, "learning_rate": 9.99931285035777e-07, "loss": 0.0169, "step": 99530 }, { "epoch": 1.0635183503392276, "grad_norm": 0.180930957198143, "learning_rate": 9.99931006223388e-07, "loss": 0.0241, "step": 99540 }, { "epoch": 1.063625193653507, "grad_norm": 0.05754503607749939, "learning_rate": 9.99930726846539e-07, "loss": 0.045, "step": 99550 }, { "epoch": 1.0637320369677867, "grad_norm": 0.9847931861877441, "learning_rate": 9.999304469052303e-07, "loss": 0.0274, "step": 99560 }, { "epoch": 1.0638388802820664, "grad_norm": 1.3961400985717773, "learning_rate": 9.999301663994625e-07, "loss": 0.0324, "step": 99570 }, { "epoch": 1.0639457235963459, "grad_norm": 13.371536254882812, "learning_rate": 9.99929885329236e-07, "loss": 0.053, "step": 99580 }, { "epoch": 1.0640525669106256, "grad_norm": 19.550823211669922, "learning_rate": 9.999296036945508e-07, "loss": 0.0349, "step": 99590 }, { "epoch": 1.0641594102249052, "grad_norm": 0.008938977494835854, "learning_rate": 9.999293214954072e-07, "loss": 0.0151, "step": 99600 }, { "epoch": 1.0642662535391847, "grad_norm": 1.097915530204773, "learning_rate": 9.99929038731806e-07, "loss": 0.0738, "step": 99610 }, { "epoch": 1.0643730968534644, "grad_norm": 0.4504864811897278, "learning_rate": 9.99928755403747e-07, "loss": 0.0226, "step": 99620 }, { "epoch": 1.064479940167744, "grad_norm": 1.0400511026382446, "learning_rate": 9.999284715112305e-07, "loss": 0.0382, "step": 99630 }, { "epoch": 1.0645867834820235, "grad_norm": 3.551482915878296, "learning_rate": 9.999281870542574e-07, "loss": 0.0516, "step": 99640 }, { "epoch": 1.0646936267963032, "grad_norm": 0.03315138816833496, "learning_rate": 9.999279020328273e-07, "loss": 0.0503, "step": 99650 }, { "epoch": 1.0648004701105829, "grad_norm": 0.14641845226287842, "learning_rate": 9.999276164469411e-07, "loss": 0.0759, "step": 99660 }, { "epoch": 1.0649073134248623, "grad_norm": 6.421437740325928, "learning_rate": 9.999273302965987e-07, "loss": 0.0527, "step": 99670 }, { "epoch": 1.065014156739142, "grad_norm": 11.201748847961426, "learning_rate": 9.999270435818008e-07, "loss": 0.0327, "step": 99680 }, { "epoch": 1.0651210000534217, "grad_norm": 0.0577818900346756, "learning_rate": 9.999267563025475e-07, "loss": 0.0152, "step": 99690 }, { "epoch": 1.0652278433677012, "grad_norm": 0.9949334263801575, "learning_rate": 9.99926468458839e-07, "loss": 0.0577, "step": 99700 }, { "epoch": 1.0653346866819808, "grad_norm": 1.3778517246246338, "learning_rate": 9.999261800506761e-07, "loss": 0.0142, "step": 99710 }, { "epoch": 1.0654415299962605, "grad_norm": 0.4749758243560791, "learning_rate": 9.999258910780585e-07, "loss": 0.0104, "step": 99720 }, { "epoch": 1.06554837331054, "grad_norm": 7.026335716247559, "learning_rate": 9.99925601540987e-07, "loss": 0.0188, "step": 99730 }, { "epoch": 1.0656552166248197, "grad_norm": 7.524916648864746, "learning_rate": 9.999253114394618e-07, "loss": 0.0344, "step": 99740 }, { "epoch": 1.0657620599390993, "grad_norm": 11.454225540161133, "learning_rate": 9.99925020773483e-07, "loss": 0.0569, "step": 99750 }, { "epoch": 1.065868903253379, "grad_norm": 4.1907525062561035, "learning_rate": 9.999247295430512e-07, "loss": 0.0508, "step": 99760 }, { "epoch": 1.0659757465676585, "grad_norm": 6.024078845977783, "learning_rate": 9.999244377481668e-07, "loss": 0.069, "step": 99770 }, { "epoch": 1.0660825898819382, "grad_norm": 11.746204376220703, "learning_rate": 9.999241453888298e-07, "loss": 0.0392, "step": 99780 }, { "epoch": 1.0661894331962178, "grad_norm": 1.9627875089645386, "learning_rate": 9.999238524650409e-07, "loss": 0.0328, "step": 99790 }, { "epoch": 1.0662962765104973, "grad_norm": 1.160753846168518, "learning_rate": 9.999235589768e-07, "loss": 0.03, "step": 99800 }, { "epoch": 1.066403119824777, "grad_norm": 0.023907961323857307, "learning_rate": 9.99923264924108e-07, "loss": 0.018, "step": 99810 }, { "epoch": 1.0665099631390567, "grad_norm": 5.750611782073975, "learning_rate": 9.999229703069646e-07, "loss": 0.0967, "step": 99820 }, { "epoch": 1.0666168064533361, "grad_norm": 5.226452827453613, "learning_rate": 9.999226751253703e-07, "loss": 0.0523, "step": 99830 }, { "epoch": 1.0667236497676158, "grad_norm": 10.878729820251465, "learning_rate": 9.99922379379326e-07, "loss": 0.0646, "step": 99840 }, { "epoch": 1.0668304930818955, "grad_norm": 0.6553530097007751, "learning_rate": 9.999220830688315e-07, "loss": 0.0117, "step": 99850 }, { "epoch": 1.066937336396175, "grad_norm": 0.021469902247190475, "learning_rate": 9.99921786193887e-07, "loss": 0.0289, "step": 99860 }, { "epoch": 1.0670441797104546, "grad_norm": 0.2847644090652466, "learning_rate": 9.999214887544933e-07, "loss": 0.0091, "step": 99870 }, { "epoch": 1.0671510230247343, "grad_norm": 5.423194885253906, "learning_rate": 9.999211907506505e-07, "loss": 0.0445, "step": 99880 }, { "epoch": 1.0672578663390138, "grad_norm": 0.35976073145866394, "learning_rate": 9.999208921823587e-07, "loss": 0.0396, "step": 99890 }, { "epoch": 1.0673647096532934, "grad_norm": 0.03259696066379547, "learning_rate": 9.999205930496186e-07, "loss": 0.0227, "step": 99900 }, { "epoch": 1.0674715529675731, "grad_norm": 0.5461384654045105, "learning_rate": 9.999202933524305e-07, "loss": 0.0372, "step": 99910 }, { "epoch": 1.0675783962818526, "grad_norm": 0.1818264275789261, "learning_rate": 9.999199930907947e-07, "loss": 0.0259, "step": 99920 }, { "epoch": 1.0676852395961323, "grad_norm": 0.11170175671577454, "learning_rate": 9.999196922647114e-07, "loss": 0.0767, "step": 99930 }, { "epoch": 1.067792082910412, "grad_norm": 0.0026868151035159826, "learning_rate": 9.99919390874181e-07, "loss": 0.0237, "step": 99940 }, { "epoch": 1.0678989262246914, "grad_norm": 1.7486112117767334, "learning_rate": 9.99919088919204e-07, "loss": 0.0646, "step": 99950 }, { "epoch": 1.068005769538971, "grad_norm": 1.1735002994537354, "learning_rate": 9.999187863997804e-07, "loss": 0.0321, "step": 99960 }, { "epoch": 1.0681126128532508, "grad_norm": 3.664109706878662, "learning_rate": 9.99918483315911e-07, "loss": 0.0429, "step": 99970 }, { "epoch": 1.0682194561675302, "grad_norm": 0.13990768790245056, "learning_rate": 9.99918179667596e-07, "loss": 0.094, "step": 99980 }, { "epoch": 1.06832629948181, "grad_norm": 3.958099126815796, "learning_rate": 9.999178754548354e-07, "loss": 0.0279, "step": 99990 }, { "epoch": 1.0684331427960896, "grad_norm": 5.878034591674805, "learning_rate": 9.999175706776297e-07, "loss": 0.0894, "step": 100000 }, { "epoch": 1.068539986110369, "grad_norm": 0.47899335622787476, "learning_rate": 9.999172653359794e-07, "loss": 0.0246, "step": 100010 }, { "epoch": 1.0686468294246487, "grad_norm": 3.7580573558807373, "learning_rate": 9.99916959429885e-07, "loss": 0.0356, "step": 100020 }, { "epoch": 1.0687536727389284, "grad_norm": 4.806769847869873, "learning_rate": 9.999166529593466e-07, "loss": 0.046, "step": 100030 }, { "epoch": 1.068860516053208, "grad_norm": 2.6412220001220703, "learning_rate": 9.999163459243641e-07, "loss": 0.057, "step": 100040 }, { "epoch": 1.0689673593674875, "grad_norm": 3.2285661697387695, "learning_rate": 9.999160383249389e-07, "loss": 0.0217, "step": 100050 }, { "epoch": 1.0690742026817672, "grad_norm": 1.6491957902908325, "learning_rate": 9.999157301610703e-07, "loss": 0.0586, "step": 100060 }, { "epoch": 1.069181045996047, "grad_norm": 0.2639240026473999, "learning_rate": 9.999154214327596e-07, "loss": 0.0259, "step": 100070 }, { "epoch": 1.0692878893103264, "grad_norm": 1.9309911727905273, "learning_rate": 9.999151121400062e-07, "loss": 0.0251, "step": 100080 }, { "epoch": 1.069394732624606, "grad_norm": 4.288965702056885, "learning_rate": 9.999148022828112e-07, "loss": 0.0676, "step": 100090 }, { "epoch": 1.0695015759388857, "grad_norm": 2.2216429710388184, "learning_rate": 9.999144918611747e-07, "loss": 0.0392, "step": 100100 }, { "epoch": 1.0696084192531652, "grad_norm": 0.3680819571018219, "learning_rate": 9.999141808750968e-07, "loss": 0.0819, "step": 100110 }, { "epoch": 1.0697152625674449, "grad_norm": 10.053296089172363, "learning_rate": 9.999138693245781e-07, "loss": 0.046, "step": 100120 }, { "epoch": 1.0698221058817245, "grad_norm": 2.031616687774658, "learning_rate": 9.99913557209619e-07, "loss": 0.0477, "step": 100130 }, { "epoch": 1.069928949196004, "grad_norm": 4.448964595794678, "learning_rate": 9.999132445302197e-07, "loss": 0.0213, "step": 100140 }, { "epoch": 1.0700357925102837, "grad_norm": 2.2851340770721436, "learning_rate": 9.999129312863805e-07, "loss": 0.0845, "step": 100150 }, { "epoch": 1.0701426358245634, "grad_norm": 3.9606077671051025, "learning_rate": 9.99912617478102e-07, "loss": 0.02, "step": 100160 }, { "epoch": 1.0702494791388428, "grad_norm": 1.6772407293319702, "learning_rate": 9.999123031053845e-07, "loss": 0.0427, "step": 100170 }, { "epoch": 1.0703563224531225, "grad_norm": 0.15083438158035278, "learning_rate": 9.999119881682282e-07, "loss": 0.0244, "step": 100180 }, { "epoch": 1.0704631657674022, "grad_norm": 2.3469786643981934, "learning_rate": 9.999116726666335e-07, "loss": 0.0542, "step": 100190 }, { "epoch": 1.0705700090816817, "grad_norm": 6.326849460601807, "learning_rate": 9.999113566006008e-07, "loss": 0.0215, "step": 100200 }, { "epoch": 1.0706768523959613, "grad_norm": 1.6212494373321533, "learning_rate": 9.999110399701304e-07, "loss": 0.0424, "step": 100210 }, { "epoch": 1.070783695710241, "grad_norm": 0.06699518859386444, "learning_rate": 9.999107227752229e-07, "loss": 0.0531, "step": 100220 }, { "epoch": 1.0708905390245205, "grad_norm": 0.43381771445274353, "learning_rate": 9.999104050158782e-07, "loss": 0.0264, "step": 100230 }, { "epoch": 1.0709973823388002, "grad_norm": 2.5889604091644287, "learning_rate": 9.999100866920972e-07, "loss": 0.0706, "step": 100240 }, { "epoch": 1.0711042256530798, "grad_norm": 1.4354954957962036, "learning_rate": 9.999097678038797e-07, "loss": 0.0431, "step": 100250 }, { "epoch": 1.0712110689673593, "grad_norm": 0.008186230435967445, "learning_rate": 9.999094483512266e-07, "loss": 0.042, "step": 100260 }, { "epoch": 1.071317912281639, "grad_norm": 3.360302686691284, "learning_rate": 9.999091283341378e-07, "loss": 0.0541, "step": 100270 }, { "epoch": 1.0714247555959187, "grad_norm": 0.07130510360002518, "learning_rate": 9.999088077526138e-07, "loss": 0.074, "step": 100280 }, { "epoch": 1.0715315989101981, "grad_norm": 3.912888526916504, "learning_rate": 9.999084866066554e-07, "loss": 0.0214, "step": 100290 }, { "epoch": 1.0716384422244778, "grad_norm": 0.026974741369485855, "learning_rate": 9.999081648962622e-07, "loss": 0.0507, "step": 100300 }, { "epoch": 1.0717452855387575, "grad_norm": 2.4194436073303223, "learning_rate": 9.99907842621435e-07, "loss": 0.0325, "step": 100310 }, { "epoch": 1.071852128853037, "grad_norm": 0.2985324263572693, "learning_rate": 9.999075197821744e-07, "loss": 0.0282, "step": 100320 }, { "epoch": 1.0719589721673166, "grad_norm": 0.22280630469322205, "learning_rate": 9.999071963784804e-07, "loss": 0.0849, "step": 100330 }, { "epoch": 1.0720658154815963, "grad_norm": 2.2909090518951416, "learning_rate": 9.999068724103533e-07, "loss": 0.0312, "step": 100340 }, { "epoch": 1.0721726587958758, "grad_norm": 1.9474844932556152, "learning_rate": 9.999065478777937e-07, "loss": 0.0127, "step": 100350 }, { "epoch": 1.0722795021101554, "grad_norm": 0.15917141735553741, "learning_rate": 9.999062227808018e-07, "loss": 0.0188, "step": 100360 }, { "epoch": 1.0723863454244351, "grad_norm": 19.05624008178711, "learning_rate": 9.999058971193781e-07, "loss": 0.0403, "step": 100370 }, { "epoch": 1.0724931887387146, "grad_norm": 0.5912094712257385, "learning_rate": 9.999055708935228e-07, "loss": 0.0696, "step": 100380 }, { "epoch": 1.0726000320529943, "grad_norm": 12.113592147827148, "learning_rate": 9.999052441032367e-07, "loss": 0.0483, "step": 100390 }, { "epoch": 1.072706875367274, "grad_norm": 1.2916851043701172, "learning_rate": 9.999049167485196e-07, "loss": 0.018, "step": 100400 }, { "epoch": 1.0728137186815534, "grad_norm": 0.016665106639266014, "learning_rate": 9.999045888293722e-07, "loss": 0.0277, "step": 100410 }, { "epoch": 1.072920561995833, "grad_norm": 6.057650089263916, "learning_rate": 9.999042603457946e-07, "loss": 0.0882, "step": 100420 }, { "epoch": 1.0730274053101128, "grad_norm": 3.3480846881866455, "learning_rate": 9.999039312977875e-07, "loss": 0.0279, "step": 100430 }, { "epoch": 1.0731342486243922, "grad_norm": 0.3090769648551941, "learning_rate": 9.999036016853513e-07, "loss": 0.0622, "step": 100440 }, { "epoch": 1.073241091938672, "grad_norm": 1.427516222000122, "learning_rate": 9.99903271508486e-07, "loss": 0.0091, "step": 100450 }, { "epoch": 1.0733479352529516, "grad_norm": 1.120745062828064, "learning_rate": 9.99902940767192e-07, "loss": 0.0073, "step": 100460 }, { "epoch": 1.073454778567231, "grad_norm": 0.20391827821731567, "learning_rate": 9.9990260946147e-07, "loss": 0.0444, "step": 100470 }, { "epoch": 1.0735616218815107, "grad_norm": 4.470773696899414, "learning_rate": 9.999022775913205e-07, "loss": 0.0523, "step": 100480 }, { "epoch": 1.0736684651957904, "grad_norm": 2.7036471366882324, "learning_rate": 9.999019451567434e-07, "loss": 0.0415, "step": 100490 }, { "epoch": 1.07377530851007, "grad_norm": 2.0061399936676025, "learning_rate": 9.999016121577392e-07, "loss": 0.0091, "step": 100500 }, { "epoch": 1.0738821518243495, "grad_norm": 8.831645965576172, "learning_rate": 9.999012785943085e-07, "loss": 0.0329, "step": 100510 }, { "epoch": 1.0739889951386292, "grad_norm": 0.2341604083776474, "learning_rate": 9.999009444664515e-07, "loss": 0.0246, "step": 100520 }, { "epoch": 1.074095838452909, "grad_norm": 0.17958469688892365, "learning_rate": 9.999006097741684e-07, "loss": 0.0177, "step": 100530 }, { "epoch": 1.0742026817671884, "grad_norm": 1.3454906940460205, "learning_rate": 9.999002745174601e-07, "loss": 0.053, "step": 100540 }, { "epoch": 1.074309525081468, "grad_norm": 1.5383590459823608, "learning_rate": 9.998999386963265e-07, "loss": 0.0402, "step": 100550 }, { "epoch": 1.0744163683957477, "grad_norm": 1.1546101570129395, "learning_rate": 9.998996023107681e-07, "loss": 0.0674, "step": 100560 }, { "epoch": 1.0745232117100272, "grad_norm": 3.2085797786712646, "learning_rate": 9.998992653607854e-07, "loss": 0.0578, "step": 100570 }, { "epoch": 1.0746300550243069, "grad_norm": 4.119309425354004, "learning_rate": 9.998989278463786e-07, "loss": 0.0264, "step": 100580 }, { "epoch": 1.0747368983385865, "grad_norm": 2.6012754440307617, "learning_rate": 9.998985897675482e-07, "loss": 0.0253, "step": 100590 }, { "epoch": 1.074843741652866, "grad_norm": 1.090076208114624, "learning_rate": 9.998982511242945e-07, "loss": 0.0576, "step": 100600 }, { "epoch": 1.0749505849671457, "grad_norm": 8.620743751525879, "learning_rate": 9.998979119166183e-07, "loss": 0.0266, "step": 100610 }, { "epoch": 1.0750574282814254, "grad_norm": 6.909943103790283, "learning_rate": 9.998975721445193e-07, "loss": 0.0121, "step": 100620 }, { "epoch": 1.0751642715957048, "grad_norm": 0.13120931386947632, "learning_rate": 9.998972318079982e-07, "loss": 0.0258, "step": 100630 }, { "epoch": 1.0752711149099845, "grad_norm": 6.279655456542969, "learning_rate": 9.998968909070555e-07, "loss": 0.018, "step": 100640 }, { "epoch": 1.0753779582242642, "grad_norm": 0.13984465599060059, "learning_rate": 9.998965494416915e-07, "loss": 0.0313, "step": 100650 }, { "epoch": 1.0754848015385436, "grad_norm": 6.081313610076904, "learning_rate": 9.998962074119067e-07, "loss": 0.0335, "step": 100660 }, { "epoch": 1.0755916448528233, "grad_norm": 5.718252182006836, "learning_rate": 9.99895864817701e-07, "loss": 0.0259, "step": 100670 }, { "epoch": 1.075698488167103, "grad_norm": 0.4427579641342163, "learning_rate": 9.998955216590754e-07, "loss": 0.0166, "step": 100680 }, { "epoch": 1.0758053314813825, "grad_norm": 6.667051792144775, "learning_rate": 9.998951779360298e-07, "loss": 0.0181, "step": 100690 }, { "epoch": 1.0759121747956621, "grad_norm": 2.6660940647125244, "learning_rate": 9.99894833648565e-07, "loss": 0.048, "step": 100700 }, { "epoch": 1.0760190181099418, "grad_norm": 5.6329474449157715, "learning_rate": 9.998944887966811e-07, "loss": 0.0141, "step": 100710 }, { "epoch": 1.0761258614242213, "grad_norm": 1.4507524967193604, "learning_rate": 9.998941433803788e-07, "loss": 0.0195, "step": 100720 }, { "epoch": 1.076232704738501, "grad_norm": 0.06856292486190796, "learning_rate": 9.99893797399658e-07, "loss": 0.033, "step": 100730 }, { "epoch": 1.0763395480527806, "grad_norm": 9.574954986572266, "learning_rate": 9.998934508545196e-07, "loss": 0.0262, "step": 100740 }, { "epoch": 1.07644639136706, "grad_norm": 0.1400328278541565, "learning_rate": 9.998931037449638e-07, "loss": 0.0398, "step": 100750 }, { "epoch": 1.0765532346813398, "grad_norm": 9.052834510803223, "learning_rate": 9.998927560709908e-07, "loss": 0.0504, "step": 100760 }, { "epoch": 1.0766600779956195, "grad_norm": 4.478770732879639, "learning_rate": 9.998924078326012e-07, "loss": 0.0508, "step": 100770 }, { "epoch": 1.0767669213098991, "grad_norm": 1.7176358699798584, "learning_rate": 9.998920590297953e-07, "loss": 0.0437, "step": 100780 }, { "epoch": 1.0768737646241786, "grad_norm": 0.18535003066062927, "learning_rate": 9.998917096625736e-07, "loss": 0.0707, "step": 100790 }, { "epoch": 1.0769806079384583, "grad_norm": 2.5837242603302, "learning_rate": 9.998913597309363e-07, "loss": 0.0166, "step": 100800 }, { "epoch": 1.077087451252738, "grad_norm": 2.761622667312622, "learning_rate": 9.998910092348842e-07, "loss": 0.0145, "step": 100810 }, { "epoch": 1.0771942945670174, "grad_norm": 0.11851286143064499, "learning_rate": 9.998906581744172e-07, "loss": 0.0451, "step": 100820 }, { "epoch": 1.077301137881297, "grad_norm": 0.26095935702323914, "learning_rate": 9.998903065495359e-07, "loss": 0.0487, "step": 100830 }, { "epoch": 1.0774079811955768, "grad_norm": 11.1195650100708, "learning_rate": 9.99889954360241e-07, "loss": 0.0696, "step": 100840 }, { "epoch": 1.0775148245098563, "grad_norm": 1.8256008625030518, "learning_rate": 9.998896016065322e-07, "loss": 0.0784, "step": 100850 }, { "epoch": 1.077621667824136, "grad_norm": 1.6345305442810059, "learning_rate": 9.998892482884104e-07, "loss": 0.009, "step": 100860 }, { "epoch": 1.0777285111384156, "grad_norm": 1.7850216627120972, "learning_rate": 9.99888894405876e-07, "loss": 0.0199, "step": 100870 }, { "epoch": 1.077835354452695, "grad_norm": 0.3529570400714874, "learning_rate": 9.998885399589294e-07, "loss": 0.0114, "step": 100880 }, { "epoch": 1.0779421977669748, "grad_norm": 0.7601323127746582, "learning_rate": 9.99888184947571e-07, "loss": 0.0122, "step": 100890 }, { "epoch": 1.0780490410812544, "grad_norm": 0.8908248543739319, "learning_rate": 9.99887829371801e-07, "loss": 0.0225, "step": 100900 }, { "epoch": 1.078155884395534, "grad_norm": 6.079805374145508, "learning_rate": 9.998874732316198e-07, "loss": 0.033, "step": 100910 }, { "epoch": 1.0782627277098136, "grad_norm": 0.6540722846984863, "learning_rate": 9.99887116527028e-07, "loss": 0.0213, "step": 100920 }, { "epoch": 1.0783695710240933, "grad_norm": 0.41006040573120117, "learning_rate": 9.998867592580258e-07, "loss": 0.0532, "step": 100930 }, { "epoch": 1.0784764143383727, "grad_norm": 3.249626398086548, "learning_rate": 9.998864014246139e-07, "loss": 0.0351, "step": 100940 }, { "epoch": 1.0785832576526524, "grad_norm": 2.864008665084839, "learning_rate": 9.998860430267923e-07, "loss": 0.0687, "step": 100950 }, { "epoch": 1.078690100966932, "grad_norm": 10.28946304321289, "learning_rate": 9.998856840645618e-07, "loss": 0.0339, "step": 100960 }, { "epoch": 1.0787969442812115, "grad_norm": 8.071283340454102, "learning_rate": 9.998853245379225e-07, "loss": 0.0496, "step": 100970 }, { "epoch": 1.0789037875954912, "grad_norm": 4.370349407196045, "learning_rate": 9.998849644468751e-07, "loss": 0.042, "step": 100980 }, { "epoch": 1.079010630909771, "grad_norm": 5.310302257537842, "learning_rate": 9.998846037914198e-07, "loss": 0.0443, "step": 100990 }, { "epoch": 1.0791174742240504, "grad_norm": 0.37492647767066956, "learning_rate": 9.99884242571557e-07, "loss": 0.0833, "step": 101000 }, { "epoch": 1.07922431753833, "grad_norm": 5.488710880279541, "learning_rate": 9.998838807872874e-07, "loss": 0.0535, "step": 101010 }, { "epoch": 1.0793311608526097, "grad_norm": 5.3524956703186035, "learning_rate": 9.99883518438611e-07, "loss": 0.0325, "step": 101020 }, { "epoch": 1.0794380041668892, "grad_norm": 3.10670804977417, "learning_rate": 9.998831555255282e-07, "loss": 0.059, "step": 101030 }, { "epoch": 1.0795448474811689, "grad_norm": 2.463151454925537, "learning_rate": 9.998827920480399e-07, "loss": 0.0252, "step": 101040 }, { "epoch": 1.0796516907954485, "grad_norm": 1.7081847190856934, "learning_rate": 9.99882428006146e-07, "loss": 0.0425, "step": 101050 }, { "epoch": 1.079758534109728, "grad_norm": 0.8248593211174011, "learning_rate": 9.998820633998472e-07, "loss": 0.0076, "step": 101060 }, { "epoch": 1.0798653774240077, "grad_norm": 6.6666717529296875, "learning_rate": 9.998816982291438e-07, "loss": 0.049, "step": 101070 }, { "epoch": 1.0799722207382874, "grad_norm": 0.023937677964568138, "learning_rate": 9.998813324940362e-07, "loss": 0.0243, "step": 101080 }, { "epoch": 1.0800790640525668, "grad_norm": 5.588838577270508, "learning_rate": 9.998809661945249e-07, "loss": 0.0482, "step": 101090 }, { "epoch": 1.0801859073668465, "grad_norm": 5.427632808685303, "learning_rate": 9.9988059933061e-07, "loss": 0.0306, "step": 101100 }, { "epoch": 1.0802927506811262, "grad_norm": 2.1140389442443848, "learning_rate": 9.998802319022924e-07, "loss": 0.0182, "step": 101110 }, { "epoch": 1.0803995939954056, "grad_norm": 5.5799174308776855, "learning_rate": 9.998798639095724e-07, "loss": 0.0251, "step": 101120 }, { "epoch": 1.0805064373096853, "grad_norm": 4.078283309936523, "learning_rate": 9.998794953524504e-07, "loss": 0.0541, "step": 101130 }, { "epoch": 1.080613280623965, "grad_norm": 0.2633798122406006, "learning_rate": 9.998791262309262e-07, "loss": 0.0249, "step": 101140 }, { "epoch": 1.0807201239382445, "grad_norm": 4.748416423797607, "learning_rate": 9.998787565450013e-07, "loss": 0.0599, "step": 101150 }, { "epoch": 1.0808269672525241, "grad_norm": 5.756997585296631, "learning_rate": 9.998783862946752e-07, "loss": 0.0356, "step": 101160 }, { "epoch": 1.0809338105668038, "grad_norm": 3.123739719390869, "learning_rate": 9.99878015479949e-07, "loss": 0.0316, "step": 101170 }, { "epoch": 1.0810406538810833, "grad_norm": 2.3722493648529053, "learning_rate": 9.998776441008225e-07, "loss": 0.0765, "step": 101180 }, { "epoch": 1.081147497195363, "grad_norm": 1.888082504272461, "learning_rate": 9.998772721572964e-07, "loss": 0.032, "step": 101190 }, { "epoch": 1.0812543405096426, "grad_norm": 2.3509202003479004, "learning_rate": 9.998768996493711e-07, "loss": 0.0663, "step": 101200 }, { "epoch": 1.081361183823922, "grad_norm": 2.7956767082214355, "learning_rate": 9.998765265770473e-07, "loss": 0.0455, "step": 101210 }, { "epoch": 1.0814680271382018, "grad_norm": 26.57487678527832, "learning_rate": 9.99876152940325e-07, "loss": 0.1095, "step": 101220 }, { "epoch": 1.0815748704524815, "grad_norm": 1.4281866550445557, "learning_rate": 9.99875778739205e-07, "loss": 0.0289, "step": 101230 }, { "epoch": 1.0816817137667611, "grad_norm": 3.894451379776001, "learning_rate": 9.998754039736872e-07, "loss": 0.0435, "step": 101240 }, { "epoch": 1.0817885570810406, "grad_norm": 1.5093528032302856, "learning_rate": 9.998750286437726e-07, "loss": 0.0453, "step": 101250 }, { "epoch": 1.0818954003953203, "grad_norm": 8.778934478759766, "learning_rate": 9.998746527494612e-07, "loss": 0.0156, "step": 101260 }, { "epoch": 1.0820022437096, "grad_norm": 9.770196914672852, "learning_rate": 9.998742762907538e-07, "loss": 0.0463, "step": 101270 }, { "epoch": 1.0821090870238794, "grad_norm": 5.4734344482421875, "learning_rate": 9.998738992676504e-07, "loss": 0.0367, "step": 101280 }, { "epoch": 1.082215930338159, "grad_norm": 2.969162702560425, "learning_rate": 9.998735216801517e-07, "loss": 0.0268, "step": 101290 }, { "epoch": 1.0823227736524388, "grad_norm": 0.2724872827529907, "learning_rate": 9.99873143528258e-07, "loss": 0.023, "step": 101300 }, { "epoch": 1.0824296169667182, "grad_norm": 0.8104391098022461, "learning_rate": 9.9987276481197e-07, "loss": 0.005, "step": 101310 }, { "epoch": 1.082536460280998, "grad_norm": 0.030799446627497673, "learning_rate": 9.99872385531288e-07, "loss": 0.0121, "step": 101320 }, { "epoch": 1.0826433035952776, "grad_norm": 1.9013408422470093, "learning_rate": 9.99872005686212e-07, "loss": 0.014, "step": 101330 }, { "epoch": 1.082750146909557, "grad_norm": 1.2855606079101562, "learning_rate": 9.998716252767429e-07, "loss": 0.0435, "step": 101340 }, { "epoch": 1.0828569902238367, "grad_norm": 0.20329608023166656, "learning_rate": 9.99871244302881e-07, "loss": 0.0213, "step": 101350 }, { "epoch": 1.0829638335381164, "grad_norm": 1.9143664836883545, "learning_rate": 9.99870862764627e-07, "loss": 0.0766, "step": 101360 }, { "epoch": 1.0830706768523959, "grad_norm": 1.9185593128204346, "learning_rate": 9.998704806619806e-07, "loss": 0.0263, "step": 101370 }, { "epoch": 1.0831775201666756, "grad_norm": 0.7980847954750061, "learning_rate": 9.998700979949431e-07, "loss": 0.0105, "step": 101380 }, { "epoch": 1.0832843634809552, "grad_norm": 6.039275169372559, "learning_rate": 9.998697147635144e-07, "loss": 0.025, "step": 101390 }, { "epoch": 1.0833912067952347, "grad_norm": 5.14323091506958, "learning_rate": 9.99869330967695e-07, "loss": 0.0602, "step": 101400 }, { "epoch": 1.0834980501095144, "grad_norm": 9.108501434326172, "learning_rate": 9.998689466074854e-07, "loss": 0.0211, "step": 101410 }, { "epoch": 1.083604893423794, "grad_norm": 2.1793477535247803, "learning_rate": 9.998685616828862e-07, "loss": 0.0109, "step": 101420 }, { "epoch": 1.0837117367380735, "grad_norm": 0.029055016115307808, "learning_rate": 9.998681761938974e-07, "loss": 0.0435, "step": 101430 }, { "epoch": 1.0838185800523532, "grad_norm": 0.08970578014850616, "learning_rate": 9.9986779014052e-07, "loss": 0.0287, "step": 101440 }, { "epoch": 1.083925423366633, "grad_norm": 9.56811809539795, "learning_rate": 9.99867403522754e-07, "loss": 0.0593, "step": 101450 }, { "epoch": 1.0840322666809123, "grad_norm": 2.9423158168792725, "learning_rate": 9.998670163405997e-07, "loss": 0.0979, "step": 101460 }, { "epoch": 1.084139109995192, "grad_norm": 3.2849338054656982, "learning_rate": 9.998666285940583e-07, "loss": 0.038, "step": 101470 }, { "epoch": 1.0842459533094717, "grad_norm": 0.2575368881225586, "learning_rate": 9.998662402831295e-07, "loss": 0.0515, "step": 101480 }, { "epoch": 1.0843527966237512, "grad_norm": 0.024402787908911705, "learning_rate": 9.99865851407814e-07, "loss": 0.0151, "step": 101490 }, { "epoch": 1.0844596399380309, "grad_norm": 1.301541805267334, "learning_rate": 9.998654619681122e-07, "loss": 0.0193, "step": 101500 }, { "epoch": 1.0845664832523105, "grad_norm": 6.626030445098877, "learning_rate": 9.998650719640245e-07, "loss": 0.0475, "step": 101510 }, { "epoch": 1.0846733265665902, "grad_norm": 1.8978136777877808, "learning_rate": 9.998646813955515e-07, "loss": 0.023, "step": 101520 }, { "epoch": 1.0847801698808697, "grad_norm": 0.04057631641626358, "learning_rate": 9.998642902626937e-07, "loss": 0.0138, "step": 101530 }, { "epoch": 1.0848870131951494, "grad_norm": 5.84633731842041, "learning_rate": 9.99863898565451e-07, "loss": 0.0262, "step": 101540 }, { "epoch": 1.084993856509429, "grad_norm": 0.2941541373729706, "learning_rate": 9.998635063038247e-07, "loss": 0.026, "step": 101550 }, { "epoch": 1.0851006998237085, "grad_norm": 4.432592868804932, "learning_rate": 9.998631134778145e-07, "loss": 0.0481, "step": 101560 }, { "epoch": 1.0852075431379882, "grad_norm": 6.779629230499268, "learning_rate": 9.998627200874212e-07, "loss": 0.0422, "step": 101570 }, { "epoch": 1.0853143864522679, "grad_norm": 2.7557129859924316, "learning_rate": 9.99862326132645e-07, "loss": 0.0308, "step": 101580 }, { "epoch": 1.0854212297665473, "grad_norm": 0.029021933674812317, "learning_rate": 9.998619316134867e-07, "loss": 0.0198, "step": 101590 }, { "epoch": 1.085528073080827, "grad_norm": 1.9562963247299194, "learning_rate": 9.998615365299466e-07, "loss": 0.034, "step": 101600 }, { "epoch": 1.0856349163951067, "grad_norm": 0.5175259113311768, "learning_rate": 9.99861140882025e-07, "loss": 0.021, "step": 101610 }, { "epoch": 1.0857417597093861, "grad_norm": 3.0518548488616943, "learning_rate": 9.998607446697225e-07, "loss": 0.0321, "step": 101620 }, { "epoch": 1.0858486030236658, "grad_norm": 3.5412607192993164, "learning_rate": 9.998603478930393e-07, "loss": 0.0556, "step": 101630 }, { "epoch": 1.0859554463379455, "grad_norm": 0.012325258925557137, "learning_rate": 9.998599505519762e-07, "loss": 0.0293, "step": 101640 }, { "epoch": 1.086062289652225, "grad_norm": 1.2665947675704956, "learning_rate": 9.998595526465334e-07, "loss": 0.0195, "step": 101650 }, { "epoch": 1.0861691329665046, "grad_norm": 0.08739670366048813, "learning_rate": 9.998591541767116e-07, "loss": 0.0143, "step": 101660 }, { "epoch": 1.0862759762807843, "grad_norm": 0.2704280912876129, "learning_rate": 9.998587551425111e-07, "loss": 0.0373, "step": 101670 }, { "epoch": 1.0863828195950638, "grad_norm": 0.17745864391326904, "learning_rate": 9.998583555439321e-07, "loss": 0.0301, "step": 101680 }, { "epoch": 1.0864896629093435, "grad_norm": 0.1855166107416153, "learning_rate": 9.998579553809753e-07, "loss": 0.0133, "step": 101690 }, { "epoch": 1.0865965062236231, "grad_norm": 0.009896541014313698, "learning_rate": 9.998575546536415e-07, "loss": 0.0194, "step": 101700 }, { "epoch": 1.0867033495379026, "grad_norm": 0.270944207906723, "learning_rate": 9.998571533619305e-07, "loss": 0.0121, "step": 101710 }, { "epoch": 1.0868101928521823, "grad_norm": 10.2575101852417, "learning_rate": 9.99856751505843e-07, "loss": 0.0592, "step": 101720 }, { "epoch": 1.086917036166462, "grad_norm": 0.07843013107776642, "learning_rate": 9.998563490853798e-07, "loss": 0.0175, "step": 101730 }, { "epoch": 1.0870238794807414, "grad_norm": 6.14888334274292, "learning_rate": 9.998559461005407e-07, "loss": 0.0317, "step": 101740 }, { "epoch": 1.087130722795021, "grad_norm": 1.3810676336288452, "learning_rate": 9.998555425513266e-07, "loss": 0.0303, "step": 101750 }, { "epoch": 1.0872375661093008, "grad_norm": 0.546162486076355, "learning_rate": 9.99855138437738e-07, "loss": 0.0778, "step": 101760 }, { "epoch": 1.0873444094235802, "grad_norm": 5.592899322509766, "learning_rate": 9.998547337597752e-07, "loss": 0.0722, "step": 101770 }, { "epoch": 1.08745125273786, "grad_norm": 8.944849014282227, "learning_rate": 9.998543285174387e-07, "loss": 0.0569, "step": 101780 }, { "epoch": 1.0875580960521396, "grad_norm": 5.668024063110352, "learning_rate": 9.99853922710729e-07, "loss": 0.0231, "step": 101790 }, { "epoch": 1.087664939366419, "grad_norm": 0.4151780307292938, "learning_rate": 9.998535163396461e-07, "loss": 0.0388, "step": 101800 }, { "epoch": 1.0877717826806987, "grad_norm": 5.100895404815674, "learning_rate": 9.998531094041914e-07, "loss": 0.0243, "step": 101810 }, { "epoch": 1.0878786259949784, "grad_norm": 1.2861994504928589, "learning_rate": 9.998527019043644e-07, "loss": 0.0368, "step": 101820 }, { "epoch": 1.0879854693092579, "grad_norm": 4.949738502502441, "learning_rate": 9.99852293840166e-07, "loss": 0.0106, "step": 101830 }, { "epoch": 1.0880923126235376, "grad_norm": 0.4247005879878998, "learning_rate": 9.998518852115969e-07, "loss": 0.0496, "step": 101840 }, { "epoch": 1.0881991559378172, "grad_norm": 3.868222236633301, "learning_rate": 9.99851476018657e-07, "loss": 0.0663, "step": 101850 }, { "epoch": 1.0883059992520967, "grad_norm": 0.07939581573009491, "learning_rate": 9.998510662613473e-07, "loss": 0.05, "step": 101860 }, { "epoch": 1.0884128425663764, "grad_norm": 4.628864288330078, "learning_rate": 9.99850655939668e-07, "loss": 0.0502, "step": 101870 }, { "epoch": 1.088519685880656, "grad_norm": 8.464517593383789, "learning_rate": 9.998502450536195e-07, "loss": 0.0375, "step": 101880 }, { "epoch": 1.0886265291949355, "grad_norm": 0.2562291622161865, "learning_rate": 9.998498336032024e-07, "loss": 0.0135, "step": 101890 }, { "epoch": 1.0887333725092152, "grad_norm": 0.9650388360023499, "learning_rate": 9.99849421588417e-07, "loss": 0.0326, "step": 101900 }, { "epoch": 1.0888402158234949, "grad_norm": 6.08022403717041, "learning_rate": 9.99849009009264e-07, "loss": 0.0171, "step": 101910 }, { "epoch": 1.0889470591377743, "grad_norm": 6.863584041595459, "learning_rate": 9.998485958657436e-07, "loss": 0.0912, "step": 101920 }, { "epoch": 1.089053902452054, "grad_norm": 2.504498243331909, "learning_rate": 9.998481821578567e-07, "loss": 0.0467, "step": 101930 }, { "epoch": 1.0891607457663337, "grad_norm": 2.4047281742095947, "learning_rate": 9.998477678856031e-07, "loss": 0.0249, "step": 101940 }, { "epoch": 1.0892675890806132, "grad_norm": 0.011959882453083992, "learning_rate": 9.99847353048984e-07, "loss": 0.0081, "step": 101950 }, { "epoch": 1.0893744323948928, "grad_norm": 5.87335205078125, "learning_rate": 9.998469376479991e-07, "loss": 0.0674, "step": 101960 }, { "epoch": 1.0894812757091725, "grad_norm": 3.0315282344818115, "learning_rate": 9.998465216826497e-07, "loss": 0.0442, "step": 101970 }, { "epoch": 1.0895881190234522, "grad_norm": 1.4053921699523926, "learning_rate": 9.998461051529356e-07, "loss": 0.0152, "step": 101980 }, { "epoch": 1.0896949623377317, "grad_norm": 0.1956813782453537, "learning_rate": 9.998456880588576e-07, "loss": 0.0233, "step": 101990 }, { "epoch": 1.0898018056520113, "grad_norm": 16.652935028076172, "learning_rate": 9.99845270400416e-07, "loss": 0.046, "step": 102000 }, { "epoch": 1.089908648966291, "grad_norm": 9.344508171081543, "learning_rate": 9.998448521776116e-07, "loss": 0.0435, "step": 102010 }, { "epoch": 1.0900154922805705, "grad_norm": 0.45575574040412903, "learning_rate": 9.998444333904446e-07, "loss": 0.0443, "step": 102020 }, { "epoch": 1.0901223355948502, "grad_norm": 1.4040064811706543, "learning_rate": 9.998440140389152e-07, "loss": 0.0406, "step": 102030 }, { "epoch": 1.0902291789091298, "grad_norm": 2.3671751022338867, "learning_rate": 9.998435941230247e-07, "loss": 0.0293, "step": 102040 }, { "epoch": 1.0903360222234093, "grad_norm": 6.737701416015625, "learning_rate": 9.998431736427726e-07, "loss": 0.027, "step": 102050 }, { "epoch": 1.090442865537689, "grad_norm": 1.886697769165039, "learning_rate": 9.998427525981599e-07, "loss": 0.035, "step": 102060 }, { "epoch": 1.0905497088519687, "grad_norm": 9.596979141235352, "learning_rate": 9.99842330989187e-07, "loss": 0.0314, "step": 102070 }, { "epoch": 1.0906565521662481, "grad_norm": 4.572364807128906, "learning_rate": 9.998419088158544e-07, "loss": 0.0813, "step": 102080 }, { "epoch": 1.0907633954805278, "grad_norm": 1.0599819421768188, "learning_rate": 9.998414860781628e-07, "loss": 0.126, "step": 102090 }, { "epoch": 1.0908702387948075, "grad_norm": 4.154411315917969, "learning_rate": 9.998410627761122e-07, "loss": 0.0327, "step": 102100 }, { "epoch": 1.090977082109087, "grad_norm": 1.7918261289596558, "learning_rate": 9.998406389097034e-07, "loss": 0.0142, "step": 102110 }, { "epoch": 1.0910839254233666, "grad_norm": 0.815605878829956, "learning_rate": 9.99840214478937e-07, "loss": 0.0202, "step": 102120 }, { "epoch": 1.0911907687376463, "grad_norm": 0.2326110452413559, "learning_rate": 9.99839789483813e-07, "loss": 0.0095, "step": 102130 }, { "epoch": 1.0912976120519258, "grad_norm": 0.7763413786888123, "learning_rate": 9.99839363924332e-07, "loss": 0.0219, "step": 102140 }, { "epoch": 1.0914044553662055, "grad_norm": 6.357057571411133, "learning_rate": 9.998389378004949e-07, "loss": 0.0183, "step": 102150 }, { "epoch": 1.0915112986804851, "grad_norm": 5.995047569274902, "learning_rate": 9.998385111123017e-07, "loss": 0.0231, "step": 102160 }, { "epoch": 1.0916181419947646, "grad_norm": 1.7501717805862427, "learning_rate": 9.998380838597534e-07, "loss": 0.0265, "step": 102170 }, { "epoch": 1.0917249853090443, "grad_norm": 2.125459909439087, "learning_rate": 9.9983765604285e-07, "loss": 0.0319, "step": 102180 }, { "epoch": 1.091831828623324, "grad_norm": 29.42702865600586, "learning_rate": 9.998372276615922e-07, "loss": 0.0788, "step": 102190 }, { "epoch": 1.0919386719376034, "grad_norm": 6.975734710693359, "learning_rate": 9.998367987159804e-07, "loss": 0.0188, "step": 102200 }, { "epoch": 1.092045515251883, "grad_norm": 5.2003350257873535, "learning_rate": 9.998363692060152e-07, "loss": 0.0448, "step": 102210 }, { "epoch": 1.0921523585661628, "grad_norm": 1.5556203126907349, "learning_rate": 9.998359391316969e-07, "loss": 0.025, "step": 102220 }, { "epoch": 1.0922592018804422, "grad_norm": 0.2558079659938812, "learning_rate": 9.998355084930263e-07, "loss": 0.08, "step": 102230 }, { "epoch": 1.092366045194722, "grad_norm": 0.018826652318239212, "learning_rate": 9.998350772900036e-07, "loss": 0.0466, "step": 102240 }, { "epoch": 1.0924728885090016, "grad_norm": 0.12098925560712814, "learning_rate": 9.998346455226292e-07, "loss": 0.0606, "step": 102250 }, { "epoch": 1.0925797318232813, "grad_norm": 0.7134923934936523, "learning_rate": 9.99834213190904e-07, "loss": 0.0278, "step": 102260 }, { "epoch": 1.0926865751375607, "grad_norm": 5.3584771156311035, "learning_rate": 9.998337802948281e-07, "loss": 0.0343, "step": 102270 }, { "epoch": 1.0927934184518404, "grad_norm": 13.208398818969727, "learning_rate": 9.998333468344022e-07, "loss": 0.0256, "step": 102280 }, { "epoch": 1.09290026176612, "grad_norm": 0.2621539235115051, "learning_rate": 9.998329128096266e-07, "loss": 0.0078, "step": 102290 }, { "epoch": 1.0930071050803996, "grad_norm": 3.949303150177002, "learning_rate": 9.998324782205022e-07, "loss": 0.0581, "step": 102300 }, { "epoch": 1.0931139483946792, "grad_norm": 3.6157846450805664, "learning_rate": 9.99832043067029e-07, "loss": 0.0481, "step": 102310 }, { "epoch": 1.093220791708959, "grad_norm": 12.737338066101074, "learning_rate": 9.998316073492077e-07, "loss": 0.0348, "step": 102320 }, { "epoch": 1.0933276350232384, "grad_norm": 7.349189758300781, "learning_rate": 9.998311710670389e-07, "loss": 0.0293, "step": 102330 }, { "epoch": 1.093434478337518, "grad_norm": 24.907428741455078, "learning_rate": 9.998307342205227e-07, "loss": 0.0522, "step": 102340 }, { "epoch": 1.0935413216517977, "grad_norm": 4.296614646911621, "learning_rate": 9.9983029680966e-07, "loss": 0.0956, "step": 102350 }, { "epoch": 1.0936481649660772, "grad_norm": 3.4496829509735107, "learning_rate": 9.998298588344514e-07, "loss": 0.0568, "step": 102360 }, { "epoch": 1.0937550082803569, "grad_norm": 3.4135022163391113, "learning_rate": 9.99829420294897e-07, "loss": 0.049, "step": 102370 }, { "epoch": 1.0938618515946366, "grad_norm": 0.6809493899345398, "learning_rate": 9.998289811909974e-07, "loss": 0.0294, "step": 102380 }, { "epoch": 1.093968694908916, "grad_norm": 1.6277002096176147, "learning_rate": 9.998285415227534e-07, "loss": 0.0285, "step": 102390 }, { "epoch": 1.0940755382231957, "grad_norm": 7.561782360076904, "learning_rate": 9.99828101290165e-07, "loss": 0.0739, "step": 102400 }, { "epoch": 1.0941823815374754, "grad_norm": 2.3846473693847656, "learning_rate": 9.998276604932331e-07, "loss": 0.0258, "step": 102410 }, { "epoch": 1.0942892248517548, "grad_norm": 3.2377190589904785, "learning_rate": 9.99827219131958e-07, "loss": 0.0282, "step": 102420 }, { "epoch": 1.0943960681660345, "grad_norm": 0.5947915315628052, "learning_rate": 9.998267772063402e-07, "loss": 0.0469, "step": 102430 }, { "epoch": 1.0945029114803142, "grad_norm": 2.7522802352905273, "learning_rate": 9.998263347163804e-07, "loss": 0.0228, "step": 102440 }, { "epoch": 1.0946097547945937, "grad_norm": 9.356902122497559, "learning_rate": 9.998258916620788e-07, "loss": 0.0295, "step": 102450 }, { "epoch": 1.0947165981088733, "grad_norm": 0.6553193926811218, "learning_rate": 9.99825448043436e-07, "loss": 0.0713, "step": 102460 }, { "epoch": 1.094823441423153, "grad_norm": 5.993783473968506, "learning_rate": 9.998250038604526e-07, "loss": 0.0284, "step": 102470 }, { "epoch": 1.0949302847374325, "grad_norm": 1.0219522714614868, "learning_rate": 9.99824559113129e-07, "loss": 0.0121, "step": 102480 }, { "epoch": 1.0950371280517122, "grad_norm": 0.35852357745170593, "learning_rate": 9.99824113801466e-07, "loss": 0.0282, "step": 102490 }, { "epoch": 1.0951439713659918, "grad_norm": 0.11852891743183136, "learning_rate": 9.998236679254637e-07, "loss": 0.0224, "step": 102500 }, { "epoch": 1.0952508146802713, "grad_norm": 0.15860989689826965, "learning_rate": 9.998232214851225e-07, "loss": 0.024, "step": 102510 }, { "epoch": 1.095357657994551, "grad_norm": 2.6445884704589844, "learning_rate": 9.998227744804434e-07, "loss": 0.0281, "step": 102520 }, { "epoch": 1.0954645013088307, "grad_norm": 2.919588327407837, "learning_rate": 9.998223269114268e-07, "loss": 0.027, "step": 102530 }, { "epoch": 1.0955713446231101, "grad_norm": 11.048510551452637, "learning_rate": 9.998218787780727e-07, "loss": 0.0594, "step": 102540 }, { "epoch": 1.0956781879373898, "grad_norm": 8.102290153503418, "learning_rate": 9.998214300803823e-07, "loss": 0.0236, "step": 102550 }, { "epoch": 1.0957850312516695, "grad_norm": 3.467397689819336, "learning_rate": 9.998209808183558e-07, "loss": 0.0849, "step": 102560 }, { "epoch": 1.095891874565949, "grad_norm": 1.8091638088226318, "learning_rate": 9.998205309919934e-07, "loss": 0.0808, "step": 102570 }, { "epoch": 1.0959987178802286, "grad_norm": 2.9611904621124268, "learning_rate": 9.998200806012961e-07, "loss": 0.0048, "step": 102580 }, { "epoch": 1.0961055611945083, "grad_norm": 1.748700499534607, "learning_rate": 9.998196296462642e-07, "loss": 0.0204, "step": 102590 }, { "epoch": 1.0962124045087878, "grad_norm": 3.2007622718811035, "learning_rate": 9.998191781268983e-07, "loss": 0.0607, "step": 102600 }, { "epoch": 1.0963192478230674, "grad_norm": 6.611027717590332, "learning_rate": 9.998187260431986e-07, "loss": 0.0677, "step": 102610 }, { "epoch": 1.0964260911373471, "grad_norm": 11.136364936828613, "learning_rate": 9.99818273395166e-07, "loss": 0.0509, "step": 102620 }, { "epoch": 1.0965329344516266, "grad_norm": 3.8226377964019775, "learning_rate": 9.998178201828007e-07, "loss": 0.0154, "step": 102630 }, { "epoch": 1.0966397777659063, "grad_norm": 5.920340538024902, "learning_rate": 9.998173664061034e-07, "loss": 0.0259, "step": 102640 }, { "epoch": 1.096746621080186, "grad_norm": 3.5629959106445312, "learning_rate": 9.998169120650747e-07, "loss": 0.0769, "step": 102650 }, { "epoch": 1.0968534643944654, "grad_norm": 0.06697732210159302, "learning_rate": 9.998164571597149e-07, "loss": 0.0483, "step": 102660 }, { "epoch": 1.096960307708745, "grad_norm": 3.2912278175354004, "learning_rate": 9.998160016900247e-07, "loss": 0.0263, "step": 102670 }, { "epoch": 1.0970671510230248, "grad_norm": 0.0667787492275238, "learning_rate": 9.998155456560043e-07, "loss": 0.0633, "step": 102680 }, { "epoch": 1.0971739943373042, "grad_norm": 3.5182461738586426, "learning_rate": 9.998150890576545e-07, "loss": 0.0294, "step": 102690 }, { "epoch": 1.097280837651584, "grad_norm": 0.0966116189956665, "learning_rate": 9.998146318949758e-07, "loss": 0.0363, "step": 102700 }, { "epoch": 1.0973876809658636, "grad_norm": 5.891724586486816, "learning_rate": 9.998141741679686e-07, "loss": 0.0444, "step": 102710 }, { "epoch": 1.0974945242801433, "grad_norm": 2.6159894466400146, "learning_rate": 9.998137158766336e-07, "loss": 0.0129, "step": 102720 }, { "epoch": 1.0976013675944227, "grad_norm": 0.07266038656234741, "learning_rate": 9.99813257020971e-07, "loss": 0.0596, "step": 102730 }, { "epoch": 1.0977082109087024, "grad_norm": 2.5493922233581543, "learning_rate": 9.998127976009816e-07, "loss": 0.0344, "step": 102740 }, { "epoch": 1.097815054222982, "grad_norm": 0.48766055703163147, "learning_rate": 9.99812337616666e-07, "loss": 0.025, "step": 102750 }, { "epoch": 1.0979218975372615, "grad_norm": 4.0514373779296875, "learning_rate": 9.998118770680243e-07, "loss": 0.0254, "step": 102760 }, { "epoch": 1.0980287408515412, "grad_norm": 0.25852155685424805, "learning_rate": 9.998114159550573e-07, "loss": 0.0511, "step": 102770 }, { "epoch": 1.098135584165821, "grad_norm": 2.578907012939453, "learning_rate": 9.998109542777655e-07, "loss": 0.0561, "step": 102780 }, { "epoch": 1.0982424274801004, "grad_norm": 5.853623390197754, "learning_rate": 9.998104920361496e-07, "loss": 0.0354, "step": 102790 }, { "epoch": 1.09834927079438, "grad_norm": 4.921706199645996, "learning_rate": 9.998100292302096e-07, "loss": 0.051, "step": 102800 }, { "epoch": 1.0984561141086597, "grad_norm": 7.847723484039307, "learning_rate": 9.998095658599466e-07, "loss": 0.0338, "step": 102810 }, { "epoch": 1.0985629574229392, "grad_norm": 4.040977954864502, "learning_rate": 9.998091019253609e-07, "loss": 0.056, "step": 102820 }, { "epoch": 1.0986698007372189, "grad_norm": 4.496938705444336, "learning_rate": 9.99808637426453e-07, "loss": 0.0298, "step": 102830 }, { "epoch": 1.0987766440514986, "grad_norm": 7.106590270996094, "learning_rate": 9.998081723632233e-07, "loss": 0.0192, "step": 102840 }, { "epoch": 1.098883487365778, "grad_norm": 4.420304298400879, "learning_rate": 9.998077067356724e-07, "loss": 0.0339, "step": 102850 }, { "epoch": 1.0989903306800577, "grad_norm": 0.24592013657093048, "learning_rate": 9.99807240543801e-07, "loss": 0.0311, "step": 102860 }, { "epoch": 1.0990971739943374, "grad_norm": 5.502761363983154, "learning_rate": 9.998067737876094e-07, "loss": 0.0663, "step": 102870 }, { "epoch": 1.0992040173086168, "grad_norm": 0.7237384915351868, "learning_rate": 9.998063064670984e-07, "loss": 0.013, "step": 102880 }, { "epoch": 1.0993108606228965, "grad_norm": 7.494497776031494, "learning_rate": 9.998058385822684e-07, "loss": 0.044, "step": 102890 }, { "epoch": 1.0994177039371762, "grad_norm": 2.095508337020874, "learning_rate": 9.998053701331196e-07, "loss": 0.0288, "step": 102900 }, { "epoch": 1.0995245472514557, "grad_norm": 2.1124424934387207, "learning_rate": 9.99804901119653e-07, "loss": 0.0617, "step": 102910 }, { "epoch": 1.0996313905657353, "grad_norm": 0.10992684960365295, "learning_rate": 9.998044315418688e-07, "loss": 0.053, "step": 102920 }, { "epoch": 1.099738233880015, "grad_norm": 0.9251362085342407, "learning_rate": 9.998039613997678e-07, "loss": 0.0209, "step": 102930 }, { "epoch": 1.0998450771942945, "grad_norm": 4.105299472808838, "learning_rate": 9.998034906933504e-07, "loss": 0.0357, "step": 102940 }, { "epoch": 1.0999519205085742, "grad_norm": 8.719940185546875, "learning_rate": 9.998030194226173e-07, "loss": 0.0363, "step": 102950 }, { "epoch": 1.1000587638228538, "grad_norm": 1.9330521821975708, "learning_rate": 9.998025475875687e-07, "loss": 0.0587, "step": 102960 }, { "epoch": 1.1001656071371333, "grad_norm": 0.8421924710273743, "learning_rate": 9.998020751882052e-07, "loss": 0.0147, "step": 102970 }, { "epoch": 1.100272450451413, "grad_norm": 2.9474947452545166, "learning_rate": 9.998016022245275e-07, "loss": 0.0284, "step": 102980 }, { "epoch": 1.1003792937656927, "grad_norm": 6.568598747253418, "learning_rate": 9.998011286965362e-07, "loss": 0.056, "step": 102990 }, { "epoch": 1.1004861370799723, "grad_norm": 6.7568535804748535, "learning_rate": 9.998006546042314e-07, "loss": 0.0327, "step": 103000 }, { "epoch": 1.1005929803942518, "grad_norm": 3.6323742866516113, "learning_rate": 9.998001799476144e-07, "loss": 0.0442, "step": 103010 }, { "epoch": 1.1006998237085315, "grad_norm": 0.6107302308082581, "learning_rate": 9.99799704726685e-07, "loss": 0.0262, "step": 103020 }, { "epoch": 1.1008066670228112, "grad_norm": 0.636601984500885, "learning_rate": 9.997992289414438e-07, "loss": 0.0723, "step": 103030 }, { "epoch": 1.1009135103370906, "grad_norm": 20.15181541442871, "learning_rate": 9.997987525918919e-07, "loss": 0.0466, "step": 103040 }, { "epoch": 1.1010203536513703, "grad_norm": 0.5855422019958496, "learning_rate": 9.997982756780295e-07, "loss": 0.0558, "step": 103050 }, { "epoch": 1.10112719696565, "grad_norm": 4.930009841918945, "learning_rate": 9.997977981998568e-07, "loss": 0.0128, "step": 103060 }, { "epoch": 1.1012340402799294, "grad_norm": 0.37427622079849243, "learning_rate": 9.997973201573749e-07, "loss": 0.0464, "step": 103070 }, { "epoch": 1.1013408835942091, "grad_norm": 0.21840879321098328, "learning_rate": 9.997968415505841e-07, "loss": 0.0373, "step": 103080 }, { "epoch": 1.1014477269084888, "grad_norm": 2.7616775035858154, "learning_rate": 9.99796362379485e-07, "loss": 0.0599, "step": 103090 }, { "epoch": 1.1015545702227683, "grad_norm": 0.20467086136341095, "learning_rate": 9.99795882644078e-07, "loss": 0.0518, "step": 103100 }, { "epoch": 1.101661413537048, "grad_norm": 2.6406869888305664, "learning_rate": 9.997954023443636e-07, "loss": 0.0102, "step": 103110 }, { "epoch": 1.1017682568513276, "grad_norm": 5.6206560134887695, "learning_rate": 9.997949214803427e-07, "loss": 0.0259, "step": 103120 }, { "epoch": 1.101875100165607, "grad_norm": 1.8292655944824219, "learning_rate": 9.997944400520155e-07, "loss": 0.029, "step": 103130 }, { "epoch": 1.1019819434798868, "grad_norm": 4.860983848571777, "learning_rate": 9.997939580593826e-07, "loss": 0.1258, "step": 103140 }, { "epoch": 1.1020887867941664, "grad_norm": 7.133199691772461, "learning_rate": 9.997934755024446e-07, "loss": 0.0162, "step": 103150 }, { "epoch": 1.102195630108446, "grad_norm": 3.4854233264923096, "learning_rate": 9.997929923812022e-07, "loss": 0.0268, "step": 103160 }, { "epoch": 1.1023024734227256, "grad_norm": 1.709606647491455, "learning_rate": 9.997925086956557e-07, "loss": 0.0228, "step": 103170 }, { "epoch": 1.1024093167370053, "grad_norm": 5.036320209503174, "learning_rate": 9.997920244458057e-07, "loss": 0.0393, "step": 103180 }, { "epoch": 1.1025161600512847, "grad_norm": 2.861199140548706, "learning_rate": 9.997915396316526e-07, "loss": 0.0315, "step": 103190 }, { "epoch": 1.1026230033655644, "grad_norm": 14.96689224243164, "learning_rate": 9.997910542531975e-07, "loss": 0.0564, "step": 103200 }, { "epoch": 1.102729846679844, "grad_norm": 1.9349560737609863, "learning_rate": 9.997905683104406e-07, "loss": 0.0113, "step": 103210 }, { "epoch": 1.1028366899941235, "grad_norm": 0.03241569548845291, "learning_rate": 9.997900818033822e-07, "loss": 0.0522, "step": 103220 }, { "epoch": 1.1029435333084032, "grad_norm": 4.007163047790527, "learning_rate": 9.997895947320231e-07, "loss": 0.0421, "step": 103230 }, { "epoch": 1.103050376622683, "grad_norm": 2.3824124336242676, "learning_rate": 9.997891070963637e-07, "loss": 0.062, "step": 103240 }, { "epoch": 1.1031572199369624, "grad_norm": 0.023644156754016876, "learning_rate": 9.99788618896405e-07, "loss": 0.0416, "step": 103250 }, { "epoch": 1.103264063251242, "grad_norm": 1.973568320274353, "learning_rate": 9.99788130132147e-07, "loss": 0.0545, "step": 103260 }, { "epoch": 1.1033709065655217, "grad_norm": 7.347033500671387, "learning_rate": 9.997876408035905e-07, "loss": 0.053, "step": 103270 }, { "epoch": 1.1034777498798012, "grad_norm": 3.7878637313842773, "learning_rate": 9.99787150910736e-07, "loss": 0.0152, "step": 103280 }, { "epoch": 1.1035845931940809, "grad_norm": 0.2945503890514374, "learning_rate": 9.997866604535843e-07, "loss": 0.0579, "step": 103290 }, { "epoch": 1.1036914365083605, "grad_norm": 1.4297919273376465, "learning_rate": 9.997861694321357e-07, "loss": 0.0291, "step": 103300 }, { "epoch": 1.10379827982264, "grad_norm": 0.22688010334968567, "learning_rate": 9.997856778463907e-07, "loss": 0.0069, "step": 103310 }, { "epoch": 1.1039051231369197, "grad_norm": 1.287974238395691, "learning_rate": 9.9978518569635e-07, "loss": 0.0161, "step": 103320 }, { "epoch": 1.1040119664511994, "grad_norm": 0.02955329790711403, "learning_rate": 9.997846929820139e-07, "loss": 0.0225, "step": 103330 }, { "epoch": 1.1041188097654788, "grad_norm": 2.4806275367736816, "learning_rate": 9.997841997033833e-07, "loss": 0.0537, "step": 103340 }, { "epoch": 1.1042256530797585, "grad_norm": 4.662624835968018, "learning_rate": 9.997837058604586e-07, "loss": 0.0429, "step": 103350 }, { "epoch": 1.1043324963940382, "grad_norm": 2.0018231868743896, "learning_rate": 9.997832114532405e-07, "loss": 0.0516, "step": 103360 }, { "epoch": 1.1044393397083176, "grad_norm": 0.9174409508705139, "learning_rate": 9.997827164817294e-07, "loss": 0.0089, "step": 103370 }, { "epoch": 1.1045461830225973, "grad_norm": 1.7081016302108765, "learning_rate": 9.997822209459258e-07, "loss": 0.0167, "step": 103380 }, { "epoch": 1.104653026336877, "grad_norm": 3.588879346847534, "learning_rate": 9.997817248458303e-07, "loss": 0.0629, "step": 103390 }, { "epoch": 1.1047598696511565, "grad_norm": 0.4231674075126648, "learning_rate": 9.997812281814436e-07, "loss": 0.0237, "step": 103400 }, { "epoch": 1.1048667129654361, "grad_norm": 1.6805734634399414, "learning_rate": 9.99780730952766e-07, "loss": 0.0329, "step": 103410 }, { "epoch": 1.1049735562797158, "grad_norm": 4.024720668792725, "learning_rate": 9.997802331597986e-07, "loss": 0.0245, "step": 103420 }, { "epoch": 1.1050803995939953, "grad_norm": 1.6117383241653442, "learning_rate": 9.997797348025414e-07, "loss": 0.033, "step": 103430 }, { "epoch": 1.105187242908275, "grad_norm": 0.02439163625240326, "learning_rate": 9.99779235880995e-07, "loss": 0.0241, "step": 103440 }, { "epoch": 1.1052940862225547, "grad_norm": 5.3104095458984375, "learning_rate": 9.997787363951604e-07, "loss": 0.1032, "step": 103450 }, { "epoch": 1.1054009295368343, "grad_norm": 1.8656044006347656, "learning_rate": 9.997782363450378e-07, "loss": 0.0808, "step": 103460 }, { "epoch": 1.1055077728511138, "grad_norm": 0.0623258501291275, "learning_rate": 9.99777735730628e-07, "loss": 0.0443, "step": 103470 }, { "epoch": 1.1056146161653935, "grad_norm": 7.106679439544678, "learning_rate": 9.99777234551931e-07, "loss": 0.061, "step": 103480 }, { "epoch": 1.1057214594796732, "grad_norm": 4.7549848556518555, "learning_rate": 9.99776732808948e-07, "loss": 0.0599, "step": 103490 }, { "epoch": 1.1058283027939526, "grad_norm": 0.8908359408378601, "learning_rate": 9.997762305016795e-07, "loss": 0.0327, "step": 103500 }, { "epoch": 1.1059351461082323, "grad_norm": 0.004052844364196062, "learning_rate": 9.997757276301257e-07, "loss": 0.0206, "step": 103510 }, { "epoch": 1.106041989422512, "grad_norm": 0.12302357703447342, "learning_rate": 9.997752241942874e-07, "loss": 0.0136, "step": 103520 }, { "epoch": 1.1061488327367914, "grad_norm": 0.49837806820869446, "learning_rate": 9.997747201941653e-07, "loss": 0.0941, "step": 103530 }, { "epoch": 1.1062556760510711, "grad_norm": 0.18253889679908752, "learning_rate": 9.997742156297598e-07, "loss": 0.0499, "step": 103540 }, { "epoch": 1.1063625193653508, "grad_norm": 4.972412109375, "learning_rate": 9.997737105010713e-07, "loss": 0.0154, "step": 103550 }, { "epoch": 1.1064693626796303, "grad_norm": 1.3086775541305542, "learning_rate": 9.997732048081007e-07, "loss": 0.0593, "step": 103560 }, { "epoch": 1.10657620599391, "grad_norm": 0.5306866765022278, "learning_rate": 9.997726985508486e-07, "loss": 0.0277, "step": 103570 }, { "epoch": 1.1066830493081896, "grad_norm": 0.8291326761245728, "learning_rate": 9.997721917293152e-07, "loss": 0.0317, "step": 103580 }, { "epoch": 1.106789892622469, "grad_norm": 7.044046401977539, "learning_rate": 9.997716843435012e-07, "loss": 0.0317, "step": 103590 }, { "epoch": 1.1068967359367488, "grad_norm": 2.3745598793029785, "learning_rate": 9.997711763934074e-07, "loss": 0.035, "step": 103600 }, { "epoch": 1.1070035792510284, "grad_norm": 1.6245583295822144, "learning_rate": 9.99770667879034e-07, "loss": 0.0197, "step": 103610 }, { "epoch": 1.107110422565308, "grad_norm": 6.133760452270508, "learning_rate": 9.99770158800382e-07, "loss": 0.0306, "step": 103620 }, { "epoch": 1.1072172658795876, "grad_norm": 2.5918500423431396, "learning_rate": 9.997696491574518e-07, "loss": 0.0461, "step": 103630 }, { "epoch": 1.1073241091938673, "grad_norm": 6.4795942306518555, "learning_rate": 9.99769138950244e-07, "loss": 0.0439, "step": 103640 }, { "epoch": 1.1074309525081467, "grad_norm": 2.364509344100952, "learning_rate": 9.99768628178759e-07, "loss": 0.0111, "step": 103650 }, { "epoch": 1.1075377958224264, "grad_norm": 2.3984436988830566, "learning_rate": 9.997681168429974e-07, "loss": 0.0177, "step": 103660 }, { "epoch": 1.107644639136706, "grad_norm": 1.5541325807571411, "learning_rate": 9.9976760494296e-07, "loss": 0.0388, "step": 103670 }, { "epoch": 1.1077514824509855, "grad_norm": 0.009663254953920841, "learning_rate": 9.997670924786473e-07, "loss": 0.0181, "step": 103680 }, { "epoch": 1.1078583257652652, "grad_norm": 0.025564927607774734, "learning_rate": 9.997665794500599e-07, "loss": 0.1023, "step": 103690 }, { "epoch": 1.107965169079545, "grad_norm": 0.8086863160133362, "learning_rate": 9.99766065857198e-07, "loss": 0.0226, "step": 103700 }, { "epoch": 1.1080720123938244, "grad_norm": 0.04928985610604286, "learning_rate": 9.997655517000628e-07, "loss": 0.0382, "step": 103710 }, { "epoch": 1.108178855708104, "grad_norm": 0.9610699415206909, "learning_rate": 9.997650369786544e-07, "loss": 0.0191, "step": 103720 }, { "epoch": 1.1082856990223837, "grad_norm": 0.25510773062705994, "learning_rate": 9.997645216929736e-07, "loss": 0.0474, "step": 103730 }, { "epoch": 1.1083925423366634, "grad_norm": 1.7331867218017578, "learning_rate": 9.99764005843021e-07, "loss": 0.0708, "step": 103740 }, { "epoch": 1.1084993856509429, "grad_norm": 6.28970193862915, "learning_rate": 9.99763489428797e-07, "loss": 0.0749, "step": 103750 }, { "epoch": 1.1086062289652225, "grad_norm": 1.3880780935287476, "learning_rate": 9.997629724503023e-07, "loss": 0.0114, "step": 103760 }, { "epoch": 1.1087130722795022, "grad_norm": 0.9296263456344604, "learning_rate": 9.997624549075377e-07, "loss": 0.0186, "step": 103770 }, { "epoch": 1.1088199155937817, "grad_norm": 10.353165626525879, "learning_rate": 9.997619368005035e-07, "loss": 0.0591, "step": 103780 }, { "epoch": 1.1089267589080614, "grad_norm": 0.014010369777679443, "learning_rate": 9.997614181292002e-07, "loss": 0.0364, "step": 103790 }, { "epoch": 1.109033602222341, "grad_norm": 8.337292671203613, "learning_rate": 9.997608988936286e-07, "loss": 0.0465, "step": 103800 }, { "epoch": 1.1091404455366205, "grad_norm": 6.086833953857422, "learning_rate": 9.99760379093789e-07, "loss": 0.0367, "step": 103810 }, { "epoch": 1.1092472888509002, "grad_norm": 11.291096687316895, "learning_rate": 9.997598587296827e-07, "loss": 0.0382, "step": 103820 }, { "epoch": 1.1093541321651799, "grad_norm": 0.1047002449631691, "learning_rate": 9.997593378013096e-07, "loss": 0.0308, "step": 103830 }, { "epoch": 1.1094609754794593, "grad_norm": 0.03855685889720917, "learning_rate": 9.997588163086703e-07, "loss": 0.0315, "step": 103840 }, { "epoch": 1.109567818793739, "grad_norm": 5.782808780670166, "learning_rate": 9.997582942517658e-07, "loss": 0.0499, "step": 103850 }, { "epoch": 1.1096746621080187, "grad_norm": 0.4075232446193695, "learning_rate": 9.997577716305963e-07, "loss": 0.0612, "step": 103860 }, { "epoch": 1.1097815054222981, "grad_norm": 5.996068954467773, "learning_rate": 9.997572484451627e-07, "loss": 0.0256, "step": 103870 }, { "epoch": 1.1098883487365778, "grad_norm": 5.863204002380371, "learning_rate": 9.997567246954654e-07, "loss": 0.0132, "step": 103880 }, { "epoch": 1.1099951920508575, "grad_norm": 5.49566650390625, "learning_rate": 9.99756200381505e-07, "loss": 0.0338, "step": 103890 }, { "epoch": 1.110102035365137, "grad_norm": 9.797867774963379, "learning_rate": 9.997556755032824e-07, "loss": 0.0198, "step": 103900 }, { "epoch": 1.1102088786794166, "grad_norm": 7.683160305023193, "learning_rate": 9.997551500607976e-07, "loss": 0.02, "step": 103910 }, { "epoch": 1.1103157219936963, "grad_norm": 0.01643160730600357, "learning_rate": 9.997546240540516e-07, "loss": 0.0332, "step": 103920 }, { "epoch": 1.1104225653079758, "grad_norm": 2.3417391777038574, "learning_rate": 9.997540974830449e-07, "loss": 0.0289, "step": 103930 }, { "epoch": 1.1105294086222555, "grad_norm": 0.03678242862224579, "learning_rate": 9.997535703477783e-07, "loss": 0.0677, "step": 103940 }, { "epoch": 1.1106362519365351, "grad_norm": 1.9180033206939697, "learning_rate": 9.99753042648252e-07, "loss": 0.0161, "step": 103950 }, { "epoch": 1.1107430952508146, "grad_norm": 0.7366595268249512, "learning_rate": 9.997525143844668e-07, "loss": 0.0096, "step": 103960 }, { "epoch": 1.1108499385650943, "grad_norm": 0.19791314005851746, "learning_rate": 9.99751985556423e-07, "loss": 0.0666, "step": 103970 }, { "epoch": 1.110956781879374, "grad_norm": 3.0172605514526367, "learning_rate": 9.99751456164122e-07, "loss": 0.0445, "step": 103980 }, { "epoch": 1.1110636251936534, "grad_norm": 0.36496323347091675, "learning_rate": 9.997509262075635e-07, "loss": 0.0308, "step": 103990 }, { "epoch": 1.111170468507933, "grad_norm": 1.4153403043746948, "learning_rate": 9.997503956867488e-07, "loss": 0.016, "step": 104000 }, { "epoch": 1.1112773118222128, "grad_norm": 3.671729803085327, "learning_rate": 9.997498646016782e-07, "loss": 0.0443, "step": 104010 }, { "epoch": 1.1113841551364922, "grad_norm": 4.107105731964111, "learning_rate": 9.997493329523521e-07, "loss": 0.023, "step": 104020 }, { "epoch": 1.111490998450772, "grad_norm": 5.009614944458008, "learning_rate": 9.997488007387713e-07, "loss": 0.0427, "step": 104030 }, { "epoch": 1.1115978417650516, "grad_norm": 0.7545479536056519, "learning_rate": 9.997482679609364e-07, "loss": 0.142, "step": 104040 }, { "epoch": 1.111704685079331, "grad_norm": 0.628750741481781, "learning_rate": 9.99747734618848e-07, "loss": 0.0357, "step": 104050 }, { "epoch": 1.1118115283936107, "grad_norm": 4.0401835441589355, "learning_rate": 9.997472007125068e-07, "loss": 0.068, "step": 104060 }, { "epoch": 1.1119183717078904, "grad_norm": 13.168502807617188, "learning_rate": 9.997466662419133e-07, "loss": 0.0981, "step": 104070 }, { "epoch": 1.1120252150221699, "grad_norm": 2.5174334049224854, "learning_rate": 9.997461312070679e-07, "loss": 0.0339, "step": 104080 }, { "epoch": 1.1121320583364496, "grad_norm": 4.321278095245361, "learning_rate": 9.997455956079715e-07, "loss": 0.0532, "step": 104090 }, { "epoch": 1.1122389016507293, "grad_norm": 0.09605520218610764, "learning_rate": 9.997450594446245e-07, "loss": 0.0227, "step": 104100 }, { "epoch": 1.1123457449650087, "grad_norm": 0.05852248892188072, "learning_rate": 9.997445227170279e-07, "loss": 0.019, "step": 104110 }, { "epoch": 1.1124525882792884, "grad_norm": 2.9790191650390625, "learning_rate": 9.997439854251817e-07, "loss": 0.0411, "step": 104120 }, { "epoch": 1.112559431593568, "grad_norm": 2.0849554538726807, "learning_rate": 9.997434475690868e-07, "loss": 0.0614, "step": 104130 }, { "epoch": 1.1126662749078475, "grad_norm": 7.119271278381348, "learning_rate": 9.997429091487441e-07, "loss": 0.022, "step": 104140 }, { "epoch": 1.1127731182221272, "grad_norm": 1.8807119131088257, "learning_rate": 9.997423701641538e-07, "loss": 0.0111, "step": 104150 }, { "epoch": 1.112879961536407, "grad_norm": 1.1111698150634766, "learning_rate": 9.997418306153168e-07, "loss": 0.0343, "step": 104160 }, { "epoch": 1.1129868048506864, "grad_norm": 0.24278460443019867, "learning_rate": 9.997412905022334e-07, "loss": 0.032, "step": 104170 }, { "epoch": 1.113093648164966, "grad_norm": 6.658870697021484, "learning_rate": 9.997407498249043e-07, "loss": 0.0277, "step": 104180 }, { "epoch": 1.1132004914792457, "grad_norm": 0.07893046736717224, "learning_rate": 9.9974020858333e-07, "loss": 0.0354, "step": 104190 }, { "epoch": 1.1133073347935254, "grad_norm": 1.9597119092941284, "learning_rate": 9.997396667775117e-07, "loss": 0.0258, "step": 104200 }, { "epoch": 1.1134141781078049, "grad_norm": 0.08812761306762695, "learning_rate": 9.997391244074495e-07, "loss": 0.0064, "step": 104210 }, { "epoch": 1.1135210214220845, "grad_norm": 1.8917983770370483, "learning_rate": 9.997385814731442e-07, "loss": 0.0101, "step": 104220 }, { "epoch": 1.1136278647363642, "grad_norm": 4.105746746063232, "learning_rate": 9.99738037974596e-07, "loss": 0.0698, "step": 104230 }, { "epoch": 1.1137347080506437, "grad_norm": 1.5490732192993164, "learning_rate": 9.99737493911806e-07, "loss": 0.0451, "step": 104240 }, { "epoch": 1.1138415513649234, "grad_norm": 0.03352932259440422, "learning_rate": 9.997369492847746e-07, "loss": 0.0313, "step": 104250 }, { "epoch": 1.113948394679203, "grad_norm": 0.11068004369735718, "learning_rate": 9.997364040935028e-07, "loss": 0.1042, "step": 104260 }, { "epoch": 1.1140552379934825, "grad_norm": 17.339012145996094, "learning_rate": 9.997358583379905e-07, "loss": 0.0427, "step": 104270 }, { "epoch": 1.1141620813077622, "grad_norm": 3.510265827178955, "learning_rate": 9.997353120182389e-07, "loss": 0.0608, "step": 104280 }, { "epoch": 1.1142689246220419, "grad_norm": 2.9967124462127686, "learning_rate": 9.997347651342483e-07, "loss": 0.0459, "step": 104290 }, { "epoch": 1.1143757679363213, "grad_norm": 1.104114055633545, "learning_rate": 9.997342176860195e-07, "loss": 0.0276, "step": 104300 }, { "epoch": 1.114482611250601, "grad_norm": 1.4637404680252075, "learning_rate": 9.99733669673553e-07, "loss": 0.0336, "step": 104310 }, { "epoch": 1.1145894545648807, "grad_norm": 9.108929634094238, "learning_rate": 9.997331210968494e-07, "loss": 0.0988, "step": 104320 }, { "epoch": 1.1146962978791601, "grad_norm": 1.858323097229004, "learning_rate": 9.997325719559095e-07, "loss": 0.0442, "step": 104330 }, { "epoch": 1.1148031411934398, "grad_norm": 3.263746738433838, "learning_rate": 9.997320222507339e-07, "loss": 0.0804, "step": 104340 }, { "epoch": 1.1149099845077195, "grad_norm": 0.553398609161377, "learning_rate": 9.99731471981323e-07, "loss": 0.0465, "step": 104350 }, { "epoch": 1.115016827821999, "grad_norm": 0.4844515323638916, "learning_rate": 9.997309211476776e-07, "loss": 0.038, "step": 104360 }, { "epoch": 1.1151236711362786, "grad_norm": 0.992806613445282, "learning_rate": 9.99730369749798e-07, "loss": 0.054, "step": 104370 }, { "epoch": 1.1152305144505583, "grad_norm": 12.49441146850586, "learning_rate": 9.997298177876856e-07, "loss": 0.0578, "step": 104380 }, { "epoch": 1.1153373577648378, "grad_norm": 0.323868066072464, "learning_rate": 9.997292652613403e-07, "loss": 0.0248, "step": 104390 }, { "epoch": 1.1154442010791175, "grad_norm": 7.614941120147705, "learning_rate": 9.997287121707628e-07, "loss": 0.0261, "step": 104400 }, { "epoch": 1.1155510443933971, "grad_norm": 3.861610174179077, "learning_rate": 9.99728158515954e-07, "loss": 0.0389, "step": 104410 }, { "epoch": 1.1156578877076766, "grad_norm": 8.100043296813965, "learning_rate": 9.997276042969144e-07, "loss": 0.0186, "step": 104420 }, { "epoch": 1.1157647310219563, "grad_norm": 2.6615703105926514, "learning_rate": 9.997270495136445e-07, "loss": 0.0226, "step": 104430 }, { "epoch": 1.115871574336236, "grad_norm": 4.189568996429443, "learning_rate": 9.997264941661452e-07, "loss": 0.0468, "step": 104440 }, { "epoch": 1.1159784176505154, "grad_norm": 1.3778890371322632, "learning_rate": 9.99725938254417e-07, "loss": 0.0569, "step": 104450 }, { "epoch": 1.116085260964795, "grad_norm": 1.1178325414657593, "learning_rate": 9.997253817784603e-07, "loss": 0.0279, "step": 104460 }, { "epoch": 1.1161921042790748, "grad_norm": 7.801719665527344, "learning_rate": 9.99724824738276e-07, "loss": 0.0434, "step": 104470 }, { "epoch": 1.1162989475933545, "grad_norm": 6.33902645111084, "learning_rate": 9.997242671338646e-07, "loss": 0.0565, "step": 104480 }, { "epoch": 1.116405790907634, "grad_norm": 0.16038107872009277, "learning_rate": 9.99723708965227e-07, "loss": 0.0465, "step": 104490 }, { "epoch": 1.1165126342219136, "grad_norm": 0.0438896082341671, "learning_rate": 9.997231502323636e-07, "loss": 0.0326, "step": 104500 }, { "epoch": 1.1166194775361933, "grad_norm": 3.0585365295410156, "learning_rate": 9.997225909352749e-07, "loss": 0.0194, "step": 104510 }, { "epoch": 1.1167263208504727, "grad_norm": 1.052474021911621, "learning_rate": 9.997220310739616e-07, "loss": 0.0308, "step": 104520 }, { "epoch": 1.1168331641647524, "grad_norm": 0.007641358766704798, "learning_rate": 9.997214706484246e-07, "loss": 0.0284, "step": 104530 }, { "epoch": 1.116940007479032, "grad_norm": 2.1978137493133545, "learning_rate": 9.997209096586643e-07, "loss": 0.0427, "step": 104540 }, { "epoch": 1.1170468507933116, "grad_norm": 2.7649598121643066, "learning_rate": 9.997203481046813e-07, "loss": 0.1173, "step": 104550 }, { "epoch": 1.1171536941075912, "grad_norm": 0.10005977004766464, "learning_rate": 9.997197859864764e-07, "loss": 0.0983, "step": 104560 }, { "epoch": 1.117260537421871, "grad_norm": 7.604061126708984, "learning_rate": 9.997192233040502e-07, "loss": 0.0465, "step": 104570 }, { "epoch": 1.1173673807361504, "grad_norm": 2.849475622177124, "learning_rate": 9.99718660057403e-07, "loss": 0.0719, "step": 104580 }, { "epoch": 1.11747422405043, "grad_norm": 5.380315780639648, "learning_rate": 9.997180962465359e-07, "loss": 0.0407, "step": 104590 }, { "epoch": 1.1175810673647097, "grad_norm": 0.042383525520563126, "learning_rate": 9.997175318714494e-07, "loss": 0.0172, "step": 104600 }, { "epoch": 1.1176879106789892, "grad_norm": 0.6077620387077332, "learning_rate": 9.997169669321442e-07, "loss": 0.0369, "step": 104610 }, { "epoch": 1.1177947539932689, "grad_norm": 0.05162007361650467, "learning_rate": 9.997164014286205e-07, "loss": 0.0249, "step": 104620 }, { "epoch": 1.1179015973075486, "grad_norm": 10.048179626464844, "learning_rate": 9.997158353608795e-07, "loss": 0.0323, "step": 104630 }, { "epoch": 1.118008440621828, "grad_norm": 6.731637477874756, "learning_rate": 9.997152687289217e-07, "loss": 0.0781, "step": 104640 }, { "epoch": 1.1181152839361077, "grad_norm": 0.3853003680706024, "learning_rate": 9.997147015327473e-07, "loss": 0.0352, "step": 104650 }, { "epoch": 1.1182221272503874, "grad_norm": 4.486183166503906, "learning_rate": 9.997141337723575e-07, "loss": 0.045, "step": 104660 }, { "epoch": 1.1183289705646668, "grad_norm": 15.941452980041504, "learning_rate": 9.997135654477527e-07, "loss": 0.1074, "step": 104670 }, { "epoch": 1.1184358138789465, "grad_norm": 0.11854345351457596, "learning_rate": 9.997129965589338e-07, "loss": 0.0088, "step": 104680 }, { "epoch": 1.1185426571932262, "grad_norm": 1.1872223615646362, "learning_rate": 9.99712427105901e-07, "loss": 0.0402, "step": 104690 }, { "epoch": 1.1186495005075057, "grad_norm": 1.9535250663757324, "learning_rate": 9.99711857088655e-07, "loss": 0.065, "step": 104700 }, { "epoch": 1.1187563438217853, "grad_norm": 0.3240588307380676, "learning_rate": 9.997112865071968e-07, "loss": 0.1188, "step": 104710 }, { "epoch": 1.118863187136065, "grad_norm": 0.13268746435642242, "learning_rate": 9.997107153615267e-07, "loss": 0.0247, "step": 104720 }, { "epoch": 1.1189700304503445, "grad_norm": 1.023329496383667, "learning_rate": 9.997101436516457e-07, "loss": 0.036, "step": 104730 }, { "epoch": 1.1190768737646242, "grad_norm": 3.239372968673706, "learning_rate": 9.997095713775542e-07, "loss": 0.0118, "step": 104740 }, { "epoch": 1.1191837170789039, "grad_norm": 9.622843742370605, "learning_rate": 9.997089985392527e-07, "loss": 0.0614, "step": 104750 }, { "epoch": 1.1192905603931833, "grad_norm": 0.15542174875736237, "learning_rate": 9.99708425136742e-07, "loss": 0.141, "step": 104760 }, { "epoch": 1.119397403707463, "grad_norm": 0.13133101165294647, "learning_rate": 9.99707851170023e-07, "loss": 0.0444, "step": 104770 }, { "epoch": 1.1195042470217427, "grad_norm": 0.6499595046043396, "learning_rate": 9.99707276639096e-07, "loss": 0.0229, "step": 104780 }, { "epoch": 1.1196110903360221, "grad_norm": 3.908398389816284, "learning_rate": 9.997067015439618e-07, "loss": 0.0129, "step": 104790 }, { "epoch": 1.1197179336503018, "grad_norm": 2.727269172668457, "learning_rate": 9.99706125884621e-07, "loss": 0.0155, "step": 104800 }, { "epoch": 1.1198247769645815, "grad_norm": 1.1954768896102905, "learning_rate": 9.997055496610743e-07, "loss": 0.018, "step": 104810 }, { "epoch": 1.119931620278861, "grad_norm": 0.31164926290512085, "learning_rate": 9.997049728733223e-07, "loss": 0.0297, "step": 104820 }, { "epoch": 1.1200384635931406, "grad_norm": 6.21144437789917, "learning_rate": 9.997043955213656e-07, "loss": 0.0409, "step": 104830 }, { "epoch": 1.1201453069074203, "grad_norm": 6.84260368347168, "learning_rate": 9.997038176052052e-07, "loss": 0.0285, "step": 104840 }, { "epoch": 1.1202521502216998, "grad_norm": 2.0454189777374268, "learning_rate": 9.99703239124841e-07, "loss": 0.0112, "step": 104850 }, { "epoch": 1.1203589935359795, "grad_norm": 1.7933417558670044, "learning_rate": 9.997026600802744e-07, "loss": 0.0428, "step": 104860 }, { "epoch": 1.1204658368502591, "grad_norm": 6.807718753814697, "learning_rate": 9.997020804715058e-07, "loss": 0.0545, "step": 104870 }, { "epoch": 1.1205726801645386, "grad_norm": 0.17152796685695648, "learning_rate": 9.997015002985358e-07, "loss": 0.1194, "step": 104880 }, { "epoch": 1.1206795234788183, "grad_norm": 1.5973860025405884, "learning_rate": 9.997009195613651e-07, "loss": 0.0113, "step": 104890 }, { "epoch": 1.120786366793098, "grad_norm": 0.04335770756006241, "learning_rate": 9.997003382599945e-07, "loss": 0.023, "step": 104900 }, { "epoch": 1.1208932101073774, "grad_norm": 7.328585147857666, "learning_rate": 9.996997563944242e-07, "loss": 0.0657, "step": 104910 }, { "epoch": 1.121000053421657, "grad_norm": 0.3701562285423279, "learning_rate": 9.996991739646553e-07, "loss": 0.0279, "step": 104920 }, { "epoch": 1.1211068967359368, "grad_norm": 0.29338741302490234, "learning_rate": 9.996985909706882e-07, "loss": 0.0861, "step": 104930 }, { "epoch": 1.1212137400502165, "grad_norm": 13.364468574523926, "learning_rate": 9.99698007412524e-07, "loss": 0.1212, "step": 104940 }, { "epoch": 1.121320583364496, "grad_norm": 0.46496832370758057, "learning_rate": 9.996974232901628e-07, "loss": 0.0165, "step": 104950 }, { "epoch": 1.1214274266787756, "grad_norm": 3.738986015319824, "learning_rate": 9.996968386036055e-07, "loss": 0.0416, "step": 104960 }, { "epoch": 1.1215342699930553, "grad_norm": 0.299376904964447, "learning_rate": 9.99696253352853e-07, "loss": 0.01, "step": 104970 }, { "epoch": 1.1216411133073347, "grad_norm": 1.7386177778244019, "learning_rate": 9.996956675379053e-07, "loss": 0.0462, "step": 104980 }, { "epoch": 1.1217479566216144, "grad_norm": 1.112332820892334, "learning_rate": 9.996950811587638e-07, "loss": 0.0384, "step": 104990 }, { "epoch": 1.121854799935894, "grad_norm": 7.753679275512695, "learning_rate": 9.996944942154287e-07, "loss": 0.081, "step": 105000 }, { "epoch": 1.1219616432501736, "grad_norm": 0.22156313061714172, "learning_rate": 9.996939067079009e-07, "loss": 0.0247, "step": 105010 }, { "epoch": 1.1220684865644532, "grad_norm": 3.0421576499938965, "learning_rate": 9.996933186361809e-07, "loss": 0.0315, "step": 105020 }, { "epoch": 1.122175329878733, "grad_norm": 0.3952746093273163, "learning_rate": 9.996927300002694e-07, "loss": 0.0271, "step": 105030 }, { "epoch": 1.1222821731930124, "grad_norm": 3.060917615890503, "learning_rate": 9.99692140800167e-07, "loss": 0.0123, "step": 105040 }, { "epoch": 1.122389016507292, "grad_norm": 0.9897955060005188, "learning_rate": 9.996915510358747e-07, "loss": 0.0708, "step": 105050 }, { "epoch": 1.1224958598215717, "grad_norm": 0.43037232756614685, "learning_rate": 9.996909607073928e-07, "loss": 0.0171, "step": 105060 }, { "epoch": 1.1226027031358512, "grad_norm": 0.7031596302986145, "learning_rate": 9.99690369814722e-07, "loss": 0.0115, "step": 105070 }, { "epoch": 1.1227095464501309, "grad_norm": 0.5844239592552185, "learning_rate": 9.996897783578633e-07, "loss": 0.0726, "step": 105080 }, { "epoch": 1.1228163897644106, "grad_norm": 1.029382348060608, "learning_rate": 9.996891863368168e-07, "loss": 0.0304, "step": 105090 }, { "epoch": 1.12292323307869, "grad_norm": 2.0343921184539795, "learning_rate": 9.996885937515837e-07, "loss": 0.0159, "step": 105100 }, { "epoch": 1.1230300763929697, "grad_norm": 1.5660368204116821, "learning_rate": 9.996880006021646e-07, "loss": 0.0575, "step": 105110 }, { "epoch": 1.1231369197072494, "grad_norm": 1.3640111684799194, "learning_rate": 9.9968740688856e-07, "loss": 0.0094, "step": 105120 }, { "epoch": 1.1232437630215288, "grad_norm": 1.861916184425354, "learning_rate": 9.996868126107702e-07, "loss": 0.0439, "step": 105130 }, { "epoch": 1.1233506063358085, "grad_norm": 7.682209491729736, "learning_rate": 9.996862177687968e-07, "loss": 0.0163, "step": 105140 }, { "epoch": 1.1234574496500882, "grad_norm": 11.862998008728027, "learning_rate": 9.996856223626394e-07, "loss": 0.0516, "step": 105150 }, { "epoch": 1.1235642929643677, "grad_norm": 12.694225311279297, "learning_rate": 9.996850263922997e-07, "loss": 0.1238, "step": 105160 }, { "epoch": 1.1236711362786473, "grad_norm": 0.05486869439482689, "learning_rate": 9.996844298577779e-07, "loss": 0.0187, "step": 105170 }, { "epoch": 1.123777979592927, "grad_norm": 5.732303619384766, "learning_rate": 9.996838327590742e-07, "loss": 0.0349, "step": 105180 }, { "epoch": 1.1238848229072065, "grad_norm": 0.3858718276023865, "learning_rate": 9.996832350961901e-07, "loss": 0.0175, "step": 105190 }, { "epoch": 1.1239916662214862, "grad_norm": 2.8444061279296875, "learning_rate": 9.996826368691257e-07, "loss": 0.0259, "step": 105200 }, { "epoch": 1.1240985095357658, "grad_norm": 0.13198727369308472, "learning_rate": 9.996820380778821e-07, "loss": 0.032, "step": 105210 }, { "epoch": 1.1242053528500455, "grad_norm": 3.7858388423919678, "learning_rate": 9.996814387224597e-07, "loss": 0.0458, "step": 105220 }, { "epoch": 1.124312196164325, "grad_norm": 0.012766332365572453, "learning_rate": 9.996808388028594e-07, "loss": 0.0166, "step": 105230 }, { "epoch": 1.1244190394786047, "grad_norm": 0.2368895560503006, "learning_rate": 9.996802383190814e-07, "loss": 0.042, "step": 105240 }, { "epoch": 1.1245258827928843, "grad_norm": 1.851488471031189, "learning_rate": 9.996796372711269e-07, "loss": 0.0849, "step": 105250 }, { "epoch": 1.1246327261071638, "grad_norm": 5.091636657714844, "learning_rate": 9.996790356589963e-07, "loss": 0.0429, "step": 105260 }, { "epoch": 1.1247395694214435, "grad_norm": 1.0950626134872437, "learning_rate": 9.996784334826903e-07, "loss": 0.0319, "step": 105270 }, { "epoch": 1.1248464127357232, "grad_norm": 1.1089556217193604, "learning_rate": 9.996778307422098e-07, "loss": 0.0066, "step": 105280 }, { "epoch": 1.1249532560500026, "grad_norm": 1.3697391748428345, "learning_rate": 9.996772274375554e-07, "loss": 0.0344, "step": 105290 }, { "epoch": 1.1250600993642823, "grad_norm": 0.023814719170331955, "learning_rate": 9.996766235687275e-07, "loss": 0.0423, "step": 105300 }, { "epoch": 1.125166942678562, "grad_norm": 0.5223560333251953, "learning_rate": 9.99676019135727e-07, "loss": 0.0511, "step": 105310 }, { "epoch": 1.1252737859928414, "grad_norm": 1.2610018253326416, "learning_rate": 9.996754141385546e-07, "loss": 0.0168, "step": 105320 }, { "epoch": 1.1253806293071211, "grad_norm": 2.463623285293579, "learning_rate": 9.996748085772108e-07, "loss": 0.057, "step": 105330 }, { "epoch": 1.1254874726214008, "grad_norm": 3.8725576400756836, "learning_rate": 9.996742024516966e-07, "loss": 0.0627, "step": 105340 }, { "epoch": 1.1255943159356803, "grad_norm": 4.461723327636719, "learning_rate": 9.996735957620126e-07, "loss": 0.0933, "step": 105350 }, { "epoch": 1.12570115924996, "grad_norm": 4.649850845336914, "learning_rate": 9.996729885081592e-07, "loss": 0.0375, "step": 105360 }, { "epoch": 1.1258080025642396, "grad_norm": 2.497938632965088, "learning_rate": 9.996723806901374e-07, "loss": 0.029, "step": 105370 }, { "epoch": 1.125914845878519, "grad_norm": 3.61435866355896, "learning_rate": 9.996717723079477e-07, "loss": 0.0167, "step": 105380 }, { "epoch": 1.1260216891927988, "grad_norm": 0.020463697612285614, "learning_rate": 9.996711633615908e-07, "loss": 0.021, "step": 105390 }, { "epoch": 1.1261285325070785, "grad_norm": 2.9938418865203857, "learning_rate": 9.996705538510675e-07, "loss": 0.0287, "step": 105400 }, { "epoch": 1.126235375821358, "grad_norm": 0.04736829549074173, "learning_rate": 9.996699437763786e-07, "loss": 0.0279, "step": 105410 }, { "epoch": 1.1263422191356376, "grad_norm": 0.5555502772331238, "learning_rate": 9.996693331375244e-07, "loss": 0.0332, "step": 105420 }, { "epoch": 1.1264490624499173, "grad_norm": 4.950170993804932, "learning_rate": 9.99668721934506e-07, "loss": 0.0213, "step": 105430 }, { "epoch": 1.1265559057641967, "grad_norm": 4.559754848480225, "learning_rate": 9.996681101673236e-07, "loss": 0.0778, "step": 105440 }, { "epoch": 1.1266627490784764, "grad_norm": 3.501086711883545, "learning_rate": 9.996674978359784e-07, "loss": 0.0699, "step": 105450 }, { "epoch": 1.126769592392756, "grad_norm": 0.31644049286842346, "learning_rate": 9.99666884940471e-07, "loss": 0.0455, "step": 105460 }, { "epoch": 1.1268764357070356, "grad_norm": 3.6932499408721924, "learning_rate": 9.996662714808018e-07, "loss": 0.0531, "step": 105470 }, { "epoch": 1.1269832790213152, "grad_norm": 4.09306526184082, "learning_rate": 9.996656574569717e-07, "loss": 0.0181, "step": 105480 }, { "epoch": 1.127090122335595, "grad_norm": 4.38663911819458, "learning_rate": 9.996650428689814e-07, "loss": 0.021, "step": 105490 }, { "epoch": 1.1271969656498744, "grad_norm": 1.709423542022705, "learning_rate": 9.996644277168317e-07, "loss": 0.0556, "step": 105500 }, { "epoch": 1.127303808964154, "grad_norm": 2.3915252685546875, "learning_rate": 9.996638120005228e-07, "loss": 0.0195, "step": 105510 }, { "epoch": 1.1274106522784337, "grad_norm": 8.609707832336426, "learning_rate": 9.996631957200559e-07, "loss": 0.0489, "step": 105520 }, { "epoch": 1.1275174955927132, "grad_norm": 1.1474347114562988, "learning_rate": 9.996625788754316e-07, "loss": 0.0224, "step": 105530 }, { "epoch": 1.1276243389069929, "grad_norm": 0.07800687104463577, "learning_rate": 9.996619614666505e-07, "loss": 0.0299, "step": 105540 }, { "epoch": 1.1277311822212726, "grad_norm": 0.7649374008178711, "learning_rate": 9.996613434937133e-07, "loss": 0.0184, "step": 105550 }, { "epoch": 1.127838025535552, "grad_norm": 0.09493747353553772, "learning_rate": 9.996607249566208e-07, "loss": 0.0837, "step": 105560 }, { "epoch": 1.1279448688498317, "grad_norm": 0.47166067361831665, "learning_rate": 9.996601058553736e-07, "loss": 0.0305, "step": 105570 }, { "epoch": 1.1280517121641114, "grad_norm": 4.5946044921875, "learning_rate": 9.996594861899725e-07, "loss": 0.0461, "step": 105580 }, { "epoch": 1.1281585554783908, "grad_norm": 6.517831325531006, "learning_rate": 9.99658865960418e-07, "loss": 0.0367, "step": 105590 }, { "epoch": 1.1282653987926705, "grad_norm": 11.468998908996582, "learning_rate": 9.996582451667109e-07, "loss": 0.053, "step": 105600 }, { "epoch": 1.1283722421069502, "grad_norm": 1.9704022407531738, "learning_rate": 9.996576238088521e-07, "loss": 0.019, "step": 105610 }, { "epoch": 1.1284790854212297, "grad_norm": 3.6751911640167236, "learning_rate": 9.99657001886842e-07, "loss": 0.0222, "step": 105620 }, { "epoch": 1.1285859287355093, "grad_norm": 0.6449750661849976, "learning_rate": 9.996563794006815e-07, "loss": 0.0049, "step": 105630 }, { "epoch": 1.128692772049789, "grad_norm": 0.2922557592391968, "learning_rate": 9.99655756350371e-07, "loss": 0.0341, "step": 105640 }, { "epoch": 1.1287996153640685, "grad_norm": 2.8782413005828857, "learning_rate": 9.996551327359118e-07, "loss": 0.0162, "step": 105650 }, { "epoch": 1.1289064586783482, "grad_norm": 1.8027899265289307, "learning_rate": 9.996545085573041e-07, "loss": 0.0138, "step": 105660 }, { "epoch": 1.1290133019926278, "grad_norm": 4.18263578414917, "learning_rate": 9.996538838145486e-07, "loss": 0.0659, "step": 105670 }, { "epoch": 1.1291201453069073, "grad_norm": 0.808193027973175, "learning_rate": 9.996532585076465e-07, "loss": 0.0371, "step": 105680 }, { "epoch": 1.129226988621187, "grad_norm": 5.606175899505615, "learning_rate": 9.99652632636598e-07, "loss": 0.0343, "step": 105690 }, { "epoch": 1.1293338319354667, "grad_norm": 0.1862962394952774, "learning_rate": 9.99652006201404e-07, "loss": 0.0193, "step": 105700 }, { "epoch": 1.1294406752497463, "grad_norm": 2.179102659225464, "learning_rate": 9.99651379202065e-07, "loss": 0.0295, "step": 105710 }, { "epoch": 1.1295475185640258, "grad_norm": 0.09989730268716812, "learning_rate": 9.99650751638582e-07, "loss": 0.0219, "step": 105720 }, { "epoch": 1.1296543618783055, "grad_norm": 7.181369304656982, "learning_rate": 9.996501235109556e-07, "loss": 0.0144, "step": 105730 }, { "epoch": 1.1297612051925852, "grad_norm": 5.035067081451416, "learning_rate": 9.996494948191864e-07, "loss": 0.0736, "step": 105740 }, { "epoch": 1.1298680485068646, "grad_norm": 0.5979374051094055, "learning_rate": 9.996488655632753e-07, "loss": 0.0224, "step": 105750 }, { "epoch": 1.1299748918211443, "grad_norm": 0.03009980171918869, "learning_rate": 9.99648235743223e-07, "loss": 0.0733, "step": 105760 }, { "epoch": 1.130081735135424, "grad_norm": 0.2849787473678589, "learning_rate": 9.9964760535903e-07, "loss": 0.0532, "step": 105770 }, { "epoch": 1.1301885784497034, "grad_norm": 8.168519020080566, "learning_rate": 9.996469744106973e-07, "loss": 0.0543, "step": 105780 }, { "epoch": 1.1302954217639831, "grad_norm": 0.19410130381584167, "learning_rate": 9.996463428982252e-07, "loss": 0.0143, "step": 105790 }, { "epoch": 1.1304022650782628, "grad_norm": 11.443334579467773, "learning_rate": 9.99645710821615e-07, "loss": 0.0721, "step": 105800 }, { "epoch": 1.1305091083925423, "grad_norm": 19.59775733947754, "learning_rate": 9.996450781808667e-07, "loss": 0.0538, "step": 105810 }, { "epoch": 1.130615951706822, "grad_norm": 0.3179831802845001, "learning_rate": 9.996444449759816e-07, "loss": 0.0327, "step": 105820 }, { "epoch": 1.1307227950211016, "grad_norm": 0.7883250117301941, "learning_rate": 9.996438112069604e-07, "loss": 0.0578, "step": 105830 }, { "epoch": 1.130829638335381, "grad_norm": 1.0281895399093628, "learning_rate": 9.996431768738033e-07, "loss": 0.0148, "step": 105840 }, { "epoch": 1.1309364816496608, "grad_norm": 0.5311563611030579, "learning_rate": 9.996425419765114e-07, "loss": 0.022, "step": 105850 }, { "epoch": 1.1310433249639404, "grad_norm": 0.02996700629591942, "learning_rate": 9.996419065150855e-07, "loss": 0.0612, "step": 105860 }, { "epoch": 1.13115016827822, "grad_norm": 2.7827625274658203, "learning_rate": 9.99641270489526e-07, "loss": 0.0233, "step": 105870 }, { "epoch": 1.1312570115924996, "grad_norm": 0.21047882735729218, "learning_rate": 9.99640633899834e-07, "loss": 0.0446, "step": 105880 }, { "epoch": 1.1313638549067793, "grad_norm": 2.3304526805877686, "learning_rate": 9.9963999674601e-07, "loss": 0.0725, "step": 105890 }, { "epoch": 1.1314706982210587, "grad_norm": 4.534341812133789, "learning_rate": 9.996393590280547e-07, "loss": 0.0344, "step": 105900 }, { "epoch": 1.1315775415353384, "grad_norm": 0.022154441103339195, "learning_rate": 9.996387207459686e-07, "loss": 0.0232, "step": 105910 }, { "epoch": 1.131684384849618, "grad_norm": 1.4690437316894531, "learning_rate": 9.99638081899753e-07, "loss": 0.0388, "step": 105920 }, { "epoch": 1.1317912281638978, "grad_norm": 0.20677635073661804, "learning_rate": 9.996374424894082e-07, "loss": 0.0518, "step": 105930 }, { "epoch": 1.1318980714781772, "grad_norm": 3.029827356338501, "learning_rate": 9.99636802514935e-07, "loss": 0.0662, "step": 105940 }, { "epoch": 1.132004914792457, "grad_norm": 2.326054334640503, "learning_rate": 9.996361619763341e-07, "loss": 0.0645, "step": 105950 }, { "epoch": 1.1321117581067366, "grad_norm": 13.429641723632812, "learning_rate": 9.996355208736063e-07, "loss": 0.0247, "step": 105960 }, { "epoch": 1.132218601421016, "grad_norm": 0.18111227452754974, "learning_rate": 9.996348792067524e-07, "loss": 0.0604, "step": 105970 }, { "epoch": 1.1323254447352957, "grad_norm": 0.01809149608016014, "learning_rate": 9.996342369757729e-07, "loss": 0.0409, "step": 105980 }, { "epoch": 1.1324322880495754, "grad_norm": 1.4166884422302246, "learning_rate": 9.996335941806686e-07, "loss": 0.0708, "step": 105990 }, { "epoch": 1.1325391313638549, "grad_norm": 0.3839544653892517, "learning_rate": 9.996329508214403e-07, "loss": 0.0186, "step": 106000 }, { "epoch": 1.1326459746781345, "grad_norm": 0.25823214650154114, "learning_rate": 9.996323068980887e-07, "loss": 0.0429, "step": 106010 }, { "epoch": 1.1327528179924142, "grad_norm": 14.787370681762695, "learning_rate": 9.996316624106145e-07, "loss": 0.0215, "step": 106020 }, { "epoch": 1.1328596613066937, "grad_norm": 4.386655807495117, "learning_rate": 9.996310173590184e-07, "loss": 0.0381, "step": 106030 }, { "epoch": 1.1329665046209734, "grad_norm": 7.150671005249023, "learning_rate": 9.996303717433011e-07, "loss": 0.0529, "step": 106040 }, { "epoch": 1.133073347935253, "grad_norm": 0.07350870966911316, "learning_rate": 9.996297255634638e-07, "loss": 0.008, "step": 106050 }, { "epoch": 1.1331801912495325, "grad_norm": 22.186481475830078, "learning_rate": 9.996290788195064e-07, "loss": 0.0688, "step": 106060 }, { "epoch": 1.1332870345638122, "grad_norm": 0.8156989216804504, "learning_rate": 9.996284315114303e-07, "loss": 0.0464, "step": 106070 }, { "epoch": 1.1333938778780919, "grad_norm": 0.41222307085990906, "learning_rate": 9.996277836392359e-07, "loss": 0.064, "step": 106080 }, { "epoch": 1.1335007211923713, "grad_norm": 2.0119383335113525, "learning_rate": 9.996271352029239e-07, "loss": 0.0091, "step": 106090 }, { "epoch": 1.133607564506651, "grad_norm": 0.17895889282226562, "learning_rate": 9.996264862024954e-07, "loss": 0.0276, "step": 106100 }, { "epoch": 1.1337144078209307, "grad_norm": 1.5642396211624146, "learning_rate": 9.99625836637951e-07, "loss": 0.0466, "step": 106110 }, { "epoch": 1.1338212511352102, "grad_norm": 3.2603235244750977, "learning_rate": 9.996251865092909e-07, "loss": 0.0263, "step": 106120 }, { "epoch": 1.1339280944494898, "grad_norm": 4.770364761352539, "learning_rate": 9.996245358165166e-07, "loss": 0.0237, "step": 106130 }, { "epoch": 1.1340349377637695, "grad_norm": 10.550220489501953, "learning_rate": 9.996238845596283e-07, "loss": 0.0353, "step": 106140 }, { "epoch": 1.134141781078049, "grad_norm": 1.078098177909851, "learning_rate": 9.996232327386268e-07, "loss": 0.0306, "step": 106150 }, { "epoch": 1.1342486243923287, "grad_norm": 0.3633987009525299, "learning_rate": 9.996225803535131e-07, "loss": 0.0555, "step": 106160 }, { "epoch": 1.1343554677066083, "grad_norm": 0.03708663210272789, "learning_rate": 9.99621927404288e-07, "loss": 0.0295, "step": 106170 }, { "epoch": 1.1344623110208878, "grad_norm": 5.775584697723389, "learning_rate": 9.99621273890952e-07, "loss": 0.0314, "step": 106180 }, { "epoch": 1.1345691543351675, "grad_norm": 0.047198664397001266, "learning_rate": 9.996206198135056e-07, "loss": 0.0702, "step": 106190 }, { "epoch": 1.1346759976494472, "grad_norm": 8.597403526306152, "learning_rate": 9.996199651719498e-07, "loss": 0.0351, "step": 106200 }, { "epoch": 1.1347828409637266, "grad_norm": 8.829344749450684, "learning_rate": 9.996193099662856e-07, "loss": 0.0965, "step": 106210 }, { "epoch": 1.1348896842780063, "grad_norm": 1.1011629104614258, "learning_rate": 9.996186541965134e-07, "loss": 0.0286, "step": 106220 }, { "epoch": 1.134996527592286, "grad_norm": 11.243844985961914, "learning_rate": 9.996179978626341e-07, "loss": 0.0486, "step": 106230 }, { "epoch": 1.1351033709065654, "grad_norm": 3.4011149406433105, "learning_rate": 9.996173409646483e-07, "loss": 0.0677, "step": 106240 }, { "epoch": 1.1352102142208451, "grad_norm": 0.01636945828795433, "learning_rate": 9.996166835025567e-07, "loss": 0.0079, "step": 106250 }, { "epoch": 1.1353170575351248, "grad_norm": 0.2586592435836792, "learning_rate": 9.996160254763605e-07, "loss": 0.0268, "step": 106260 }, { "epoch": 1.1354239008494043, "grad_norm": 2.639442205429077, "learning_rate": 9.996153668860598e-07, "loss": 0.0106, "step": 106270 }, { "epoch": 1.135530744163684, "grad_norm": 4.084127426147461, "learning_rate": 9.996147077316556e-07, "loss": 0.0584, "step": 106280 }, { "epoch": 1.1356375874779636, "grad_norm": 2.009669065475464, "learning_rate": 9.99614048013149e-07, "loss": 0.0475, "step": 106290 }, { "epoch": 1.135744430792243, "grad_norm": 2.3374886512756348, "learning_rate": 9.996133877305402e-07, "loss": 0.0287, "step": 106300 }, { "epoch": 1.1358512741065228, "grad_norm": 0.022880615666508675, "learning_rate": 9.9961272688383e-07, "loss": 0.0181, "step": 106310 }, { "epoch": 1.1359581174208024, "grad_norm": 4.781507968902588, "learning_rate": 9.996120654730196e-07, "loss": 0.0794, "step": 106320 }, { "epoch": 1.136064960735082, "grad_norm": 0.48792707920074463, "learning_rate": 9.996114034981095e-07, "loss": 0.0222, "step": 106330 }, { "epoch": 1.1361718040493616, "grad_norm": 0.16181635856628418, "learning_rate": 9.996107409591004e-07, "loss": 0.0093, "step": 106340 }, { "epoch": 1.1362786473636413, "grad_norm": 22.700027465820312, "learning_rate": 9.99610077855993e-07, "loss": 0.0308, "step": 106350 }, { "epoch": 1.1363854906779207, "grad_norm": 0.029622241854667664, "learning_rate": 9.996094141887882e-07, "loss": 0.0216, "step": 106360 }, { "epoch": 1.1364923339922004, "grad_norm": 14.289766311645508, "learning_rate": 9.996087499574863e-07, "loss": 0.0683, "step": 106370 }, { "epoch": 1.13659917730648, "grad_norm": 4.440122604370117, "learning_rate": 9.996080851620888e-07, "loss": 0.0366, "step": 106380 }, { "epoch": 1.1367060206207595, "grad_norm": 3.737165927886963, "learning_rate": 9.99607419802596e-07, "loss": 0.0166, "step": 106390 }, { "epoch": 1.1368128639350392, "grad_norm": 6.669960021972656, "learning_rate": 9.996067538790086e-07, "loss": 0.0253, "step": 106400 }, { "epoch": 1.136919707249319, "grad_norm": 4.707670211791992, "learning_rate": 9.996060873913276e-07, "loss": 0.0315, "step": 106410 }, { "epoch": 1.1370265505635984, "grad_norm": 6.01967716217041, "learning_rate": 9.996054203395536e-07, "loss": 0.0176, "step": 106420 }, { "epoch": 1.137133393877878, "grad_norm": 5.491063117980957, "learning_rate": 9.996047527236872e-07, "loss": 0.0051, "step": 106430 }, { "epoch": 1.1372402371921577, "grad_norm": 0.04146191477775574, "learning_rate": 9.996040845437294e-07, "loss": 0.0539, "step": 106440 }, { "epoch": 1.1373470805064374, "grad_norm": 3.69083571434021, "learning_rate": 9.996034157996812e-07, "loss": 0.0489, "step": 106450 }, { "epoch": 1.1374539238207169, "grad_norm": 6.069563865661621, "learning_rate": 9.996027464915427e-07, "loss": 0.0216, "step": 106460 }, { "epoch": 1.1375607671349965, "grad_norm": 0.049405790865421295, "learning_rate": 9.99602076619315e-07, "loss": 0.0286, "step": 106470 }, { "epoch": 1.1376676104492762, "grad_norm": 0.5167602300643921, "learning_rate": 9.996014061829989e-07, "loss": 0.0456, "step": 106480 }, { "epoch": 1.1377744537635557, "grad_norm": 0.3411674499511719, "learning_rate": 9.99600735182595e-07, "loss": 0.029, "step": 106490 }, { "epoch": 1.1378812970778354, "grad_norm": 4.324370384216309, "learning_rate": 9.996000636181046e-07, "loss": 0.0172, "step": 106500 }, { "epoch": 1.137988140392115, "grad_norm": 2.784505844116211, "learning_rate": 9.995993914895276e-07, "loss": 0.0317, "step": 106510 }, { "epoch": 1.1380949837063945, "grad_norm": 3.2844948768615723, "learning_rate": 9.995987187968652e-07, "loss": 0.0406, "step": 106520 }, { "epoch": 1.1382018270206742, "grad_norm": 5.497439861297607, "learning_rate": 9.995980455401183e-07, "loss": 0.0389, "step": 106530 }, { "epoch": 1.1383086703349539, "grad_norm": 0.5445845127105713, "learning_rate": 9.995973717192873e-07, "loss": 0.0521, "step": 106540 }, { "epoch": 1.1384155136492333, "grad_norm": 3.122390031814575, "learning_rate": 9.995966973343734e-07, "loss": 0.066, "step": 106550 }, { "epoch": 1.138522356963513, "grad_norm": 4.7032270431518555, "learning_rate": 9.99596022385377e-07, "loss": 0.0463, "step": 106560 }, { "epoch": 1.1386292002777927, "grad_norm": 4.462673664093018, "learning_rate": 9.995953468722989e-07, "loss": 0.0512, "step": 106570 }, { "epoch": 1.1387360435920721, "grad_norm": 20.899415969848633, "learning_rate": 9.995946707951402e-07, "loss": 0.0212, "step": 106580 }, { "epoch": 1.1388428869063518, "grad_norm": 2.486963987350464, "learning_rate": 9.995939941539012e-07, "loss": 0.0206, "step": 106590 }, { "epoch": 1.1389497302206315, "grad_norm": 2.8911077976226807, "learning_rate": 9.99593316948583e-07, "loss": 0.0273, "step": 106600 }, { "epoch": 1.139056573534911, "grad_norm": 0.015898576006293297, "learning_rate": 9.99592639179186e-07, "loss": 0.0049, "step": 106610 }, { "epoch": 1.1391634168491906, "grad_norm": 4.678838729858398, "learning_rate": 9.995919608457116e-07, "loss": 0.0263, "step": 106620 }, { "epoch": 1.1392702601634703, "grad_norm": 1.307729721069336, "learning_rate": 9.995912819481597e-07, "loss": 0.0419, "step": 106630 }, { "epoch": 1.1393771034777498, "grad_norm": 9.91174602508545, "learning_rate": 9.995906024865318e-07, "loss": 0.0248, "step": 106640 }, { "epoch": 1.1394839467920295, "grad_norm": 4.446818828582764, "learning_rate": 9.995899224608285e-07, "loss": 0.0497, "step": 106650 }, { "epoch": 1.1395907901063091, "grad_norm": 9.157878875732422, "learning_rate": 9.995892418710503e-07, "loss": 0.0498, "step": 106660 }, { "epoch": 1.1396976334205888, "grad_norm": 5.337472915649414, "learning_rate": 9.995885607171982e-07, "loss": 0.0369, "step": 106670 }, { "epoch": 1.1398044767348683, "grad_norm": 4.118501663208008, "learning_rate": 9.99587878999273e-07, "loss": 0.0473, "step": 106680 }, { "epoch": 1.139911320049148, "grad_norm": 0.8655749559402466, "learning_rate": 9.995871967172752e-07, "loss": 0.021, "step": 106690 }, { "epoch": 1.1400181633634277, "grad_norm": 0.44547152519226074, "learning_rate": 9.995865138712058e-07, "loss": 0.0427, "step": 106700 }, { "epoch": 1.140125006677707, "grad_norm": 6.6218695640563965, "learning_rate": 9.995858304610656e-07, "loss": 0.0422, "step": 106710 }, { "epoch": 1.1402318499919868, "grad_norm": 9.250980377197266, "learning_rate": 9.995851464868553e-07, "loss": 0.071, "step": 106720 }, { "epoch": 1.1403386933062665, "grad_norm": 4.309229850769043, "learning_rate": 9.995844619485757e-07, "loss": 0.0154, "step": 106730 }, { "epoch": 1.140445536620546, "grad_norm": 0.5683116316795349, "learning_rate": 9.995837768462274e-07, "loss": 0.0211, "step": 106740 }, { "epoch": 1.1405523799348256, "grad_norm": 2.565124273300171, "learning_rate": 9.995830911798112e-07, "loss": 0.0901, "step": 106750 }, { "epoch": 1.1406592232491053, "grad_norm": 1.4522532224655151, "learning_rate": 9.995824049493283e-07, "loss": 0.0702, "step": 106760 }, { "epoch": 1.1407660665633848, "grad_norm": 2.4970667362213135, "learning_rate": 9.995817181547789e-07, "loss": 0.0165, "step": 106770 }, { "epoch": 1.1408729098776644, "grad_norm": 7.035643100738525, "learning_rate": 9.99581030796164e-07, "loss": 0.0271, "step": 106780 }, { "epoch": 1.1409797531919441, "grad_norm": 1.3677657842636108, "learning_rate": 9.995803428734847e-07, "loss": 0.038, "step": 106790 }, { "epoch": 1.1410865965062236, "grad_norm": 6.8404951095581055, "learning_rate": 9.995796543867413e-07, "loss": 0.0427, "step": 106800 }, { "epoch": 1.1411934398205033, "grad_norm": 0.8427285552024841, "learning_rate": 9.995789653359347e-07, "loss": 0.0194, "step": 106810 }, { "epoch": 1.141300283134783, "grad_norm": 0.1469057947397232, "learning_rate": 9.99578275721066e-07, "loss": 0.0216, "step": 106820 }, { "epoch": 1.1414071264490624, "grad_norm": 0.36688050627708435, "learning_rate": 9.995775855421356e-07, "loss": 0.0753, "step": 106830 }, { "epoch": 1.141513969763342, "grad_norm": 7.140023231506348, "learning_rate": 9.995768947991442e-07, "loss": 0.0368, "step": 106840 }, { "epoch": 1.1416208130776218, "grad_norm": 10.019421577453613, "learning_rate": 9.99576203492093e-07, "loss": 0.0698, "step": 106850 }, { "epoch": 1.1417276563919012, "grad_norm": 0.277124285697937, "learning_rate": 9.995755116209824e-07, "loss": 0.0426, "step": 106860 }, { "epoch": 1.141834499706181, "grad_norm": 10.152560234069824, "learning_rate": 9.995748191858136e-07, "loss": 0.0346, "step": 106870 }, { "epoch": 1.1419413430204606, "grad_norm": 5.442254066467285, "learning_rate": 9.99574126186587e-07, "loss": 0.0351, "step": 106880 }, { "epoch": 1.14204818633474, "grad_norm": 0.5259597301483154, "learning_rate": 9.995734326233035e-07, "loss": 0.057, "step": 106890 }, { "epoch": 1.1421550296490197, "grad_norm": 9.698277473449707, "learning_rate": 9.995727384959638e-07, "loss": 0.029, "step": 106900 }, { "epoch": 1.1422618729632994, "grad_norm": 0.8515190482139587, "learning_rate": 9.99572043804569e-07, "loss": 0.052, "step": 106910 }, { "epoch": 1.1423687162775789, "grad_norm": 10.899685859680176, "learning_rate": 9.995713485491194e-07, "loss": 0.1211, "step": 106920 }, { "epoch": 1.1424755595918585, "grad_norm": 6.633483409881592, "learning_rate": 9.995706527296162e-07, "loss": 0.0155, "step": 106930 }, { "epoch": 1.1425824029061382, "grad_norm": 0.008904119953513145, "learning_rate": 9.9956995634606e-07, "loss": 0.034, "step": 106940 }, { "epoch": 1.1426892462204177, "grad_norm": 0.9149511456489563, "learning_rate": 9.995692593984514e-07, "loss": 0.0557, "step": 106950 }, { "epoch": 1.1427960895346974, "grad_norm": 0.059718579053878784, "learning_rate": 9.995685618867917e-07, "loss": 0.021, "step": 106960 }, { "epoch": 1.142902932848977, "grad_norm": 3.055008888244629, "learning_rate": 9.995678638110814e-07, "loss": 0.0873, "step": 106970 }, { "epoch": 1.1430097761632565, "grad_norm": 3.435706377029419, "learning_rate": 9.99567165171321e-07, "loss": 0.0317, "step": 106980 }, { "epoch": 1.1431166194775362, "grad_norm": 6.794395923614502, "learning_rate": 9.995664659675119e-07, "loss": 0.0391, "step": 106990 }, { "epoch": 1.1432234627918159, "grad_norm": 3.6426913738250732, "learning_rate": 9.995657661996542e-07, "loss": 0.031, "step": 107000 }, { "epoch": 1.1433303061060953, "grad_norm": 1.1820075511932373, "learning_rate": 9.995650658677492e-07, "loss": 0.0158, "step": 107010 }, { "epoch": 1.143437149420375, "grad_norm": 4.204294681549072, "learning_rate": 9.995643649717978e-07, "loss": 0.0288, "step": 107020 }, { "epoch": 1.1435439927346547, "grad_norm": 3.489169120788574, "learning_rate": 9.995636635118001e-07, "loss": 0.0178, "step": 107030 }, { "epoch": 1.1436508360489341, "grad_norm": 0.6366113424301147, "learning_rate": 9.995629614877577e-07, "loss": 0.0207, "step": 107040 }, { "epoch": 1.1437576793632138, "grad_norm": 0.02952534705400467, "learning_rate": 9.995622588996708e-07, "loss": 0.0594, "step": 107050 }, { "epoch": 1.1438645226774935, "grad_norm": 0.7637016177177429, "learning_rate": 9.995615557475404e-07, "loss": 0.07, "step": 107060 }, { "epoch": 1.143971365991773, "grad_norm": 4.424806594848633, "learning_rate": 9.995608520313673e-07, "loss": 0.0304, "step": 107070 }, { "epoch": 1.1440782093060526, "grad_norm": 0.09336569905281067, "learning_rate": 9.995601477511523e-07, "loss": 0.093, "step": 107080 }, { "epoch": 1.1441850526203323, "grad_norm": 0.14441023766994476, "learning_rate": 9.995594429068963e-07, "loss": 0.0656, "step": 107090 }, { "epoch": 1.1442918959346118, "grad_norm": 0.09496719390153885, "learning_rate": 9.995587374985999e-07, "loss": 0.0322, "step": 107100 }, { "epoch": 1.1443987392488915, "grad_norm": 4.110923767089844, "learning_rate": 9.99558031526264e-07, "loss": 0.0271, "step": 107110 }, { "epoch": 1.1445055825631711, "grad_norm": 12.155083656311035, "learning_rate": 9.995573249898894e-07, "loss": 0.0201, "step": 107120 }, { "epoch": 1.1446124258774506, "grad_norm": 5.703783988952637, "learning_rate": 9.995566178894767e-07, "loss": 0.0343, "step": 107130 }, { "epoch": 1.1447192691917303, "grad_norm": 10.734251022338867, "learning_rate": 9.99555910225027e-07, "loss": 0.0928, "step": 107140 }, { "epoch": 1.14482611250601, "grad_norm": 11.663729667663574, "learning_rate": 9.99555201996541e-07, "loss": 0.0758, "step": 107150 }, { "epoch": 1.1449329558202894, "grad_norm": 2.25235652923584, "learning_rate": 9.995544932040194e-07, "loss": 0.0158, "step": 107160 }, { "epoch": 1.145039799134569, "grad_norm": 7.013121128082275, "learning_rate": 9.995537838474633e-07, "loss": 0.0404, "step": 107170 }, { "epoch": 1.1451466424488488, "grad_norm": 0.9767857789993286, "learning_rate": 9.99553073926873e-07, "loss": 0.0209, "step": 107180 }, { "epoch": 1.1452534857631285, "grad_norm": 2.3206350803375244, "learning_rate": 9.9955236344225e-07, "loss": 0.0241, "step": 107190 }, { "epoch": 1.145360329077408, "grad_norm": 6.794740676879883, "learning_rate": 9.99551652393594e-07, "loss": 0.0374, "step": 107200 }, { "epoch": 1.1454671723916876, "grad_norm": 0.40047043561935425, "learning_rate": 9.99550940780907e-07, "loss": 0.0114, "step": 107210 }, { "epoch": 1.1455740157059673, "grad_norm": 0.37415945529937744, "learning_rate": 9.99550228604189e-07, "loss": 0.0229, "step": 107220 }, { "epoch": 1.1456808590202467, "grad_norm": 5.084012508392334, "learning_rate": 9.995495158634413e-07, "loss": 0.0472, "step": 107230 }, { "epoch": 1.1457877023345264, "grad_norm": 9.983922958374023, "learning_rate": 9.995488025586645e-07, "loss": 0.0577, "step": 107240 }, { "epoch": 1.145894545648806, "grad_norm": 0.2896910309791565, "learning_rate": 9.995480886898593e-07, "loss": 0.0187, "step": 107250 }, { "epoch": 1.1460013889630856, "grad_norm": 0.4637210965156555, "learning_rate": 9.995473742570267e-07, "loss": 0.0186, "step": 107260 }, { "epoch": 1.1461082322773652, "grad_norm": 0.039915017783641815, "learning_rate": 9.995466592601673e-07, "loss": 0.0214, "step": 107270 }, { "epoch": 1.146215075591645, "grad_norm": 0.04772242158651352, "learning_rate": 9.995459436992818e-07, "loss": 0.0245, "step": 107280 }, { "epoch": 1.1463219189059244, "grad_norm": 4.856288909912109, "learning_rate": 9.995452275743717e-07, "loss": 0.0356, "step": 107290 }, { "epoch": 1.146428762220204, "grad_norm": 3.0834147930145264, "learning_rate": 9.99544510885437e-07, "loss": 0.007, "step": 107300 }, { "epoch": 1.1465356055344837, "grad_norm": 2.7726805210113525, "learning_rate": 9.995437936324791e-07, "loss": 0.0262, "step": 107310 }, { "epoch": 1.1466424488487632, "grad_norm": 28.129491806030273, "learning_rate": 9.995430758154983e-07, "loss": 0.0431, "step": 107320 }, { "epoch": 1.1467492921630429, "grad_norm": 4.658478736877441, "learning_rate": 9.995423574344959e-07, "loss": 0.1335, "step": 107330 }, { "epoch": 1.1468561354773226, "grad_norm": 0.1960429847240448, "learning_rate": 9.995416384894724e-07, "loss": 0.0127, "step": 107340 }, { "epoch": 1.146962978791602, "grad_norm": 3.9563992023468018, "learning_rate": 9.995409189804287e-07, "loss": 0.0421, "step": 107350 }, { "epoch": 1.1470698221058817, "grad_norm": 1.7234708070755005, "learning_rate": 9.995401989073655e-07, "loss": 0.0186, "step": 107360 }, { "epoch": 1.1471766654201614, "grad_norm": 4.26308536529541, "learning_rate": 9.995394782702837e-07, "loss": 0.0515, "step": 107370 }, { "epoch": 1.1472835087344408, "grad_norm": 1.3025754690170288, "learning_rate": 9.995387570691842e-07, "loss": 0.0427, "step": 107380 }, { "epoch": 1.1473903520487205, "grad_norm": 1.4550962448120117, "learning_rate": 9.995380353040676e-07, "loss": 0.0291, "step": 107390 }, { "epoch": 1.1474971953630002, "grad_norm": 10.529152870178223, "learning_rate": 9.99537312974935e-07, "loss": 0.0398, "step": 107400 }, { "epoch": 1.14760403867728, "grad_norm": 0.9367032647132874, "learning_rate": 9.99536590081787e-07, "loss": 0.047, "step": 107410 }, { "epoch": 1.1477108819915594, "grad_norm": 0.18297524750232697, "learning_rate": 9.995358666246248e-07, "loss": 0.034, "step": 107420 }, { "epoch": 1.147817725305839, "grad_norm": 4.148873329162598, "learning_rate": 9.995351426034484e-07, "loss": 0.0272, "step": 107430 }, { "epoch": 1.1479245686201187, "grad_norm": 1.3398302793502808, "learning_rate": 9.995344180182595e-07, "loss": 0.0304, "step": 107440 }, { "epoch": 1.1480314119343982, "grad_norm": 3.6719911098480225, "learning_rate": 9.995336928690585e-07, "loss": 0.1302, "step": 107450 }, { "epoch": 1.1481382552486779, "grad_norm": 3.5543293952941895, "learning_rate": 9.995329671558462e-07, "loss": 0.0803, "step": 107460 }, { "epoch": 1.1482450985629575, "grad_norm": 0.8595468401908875, "learning_rate": 9.995322408786232e-07, "loss": 0.0261, "step": 107470 }, { "epoch": 1.148351941877237, "grad_norm": 2.074970006942749, "learning_rate": 9.99531514037391e-07, "loss": 0.0122, "step": 107480 }, { "epoch": 1.1484587851915167, "grad_norm": 3.63767147064209, "learning_rate": 9.995307866321498e-07, "loss": 0.0808, "step": 107490 }, { "epoch": 1.1485656285057964, "grad_norm": 15.761619567871094, "learning_rate": 9.995300586629008e-07, "loss": 0.0254, "step": 107500 }, { "epoch": 1.1486724718200758, "grad_norm": 3.608290433883667, "learning_rate": 9.995293301296445e-07, "loss": 0.0269, "step": 107510 }, { "epoch": 1.1487793151343555, "grad_norm": 0.1108582466840744, "learning_rate": 9.99528601032382e-07, "loss": 0.0415, "step": 107520 }, { "epoch": 1.1488861584486352, "grad_norm": 4.9031267166137695, "learning_rate": 9.995278713711138e-07, "loss": 0.0247, "step": 107530 }, { "epoch": 1.1489930017629146, "grad_norm": 0.06247076392173767, "learning_rate": 9.995271411458413e-07, "loss": 0.0402, "step": 107540 }, { "epoch": 1.1490998450771943, "grad_norm": 0.015629027038812637, "learning_rate": 9.995264103565647e-07, "loss": 0.0196, "step": 107550 }, { "epoch": 1.149206688391474, "grad_norm": 0.20284071564674377, "learning_rate": 9.99525679003285e-07, "loss": 0.0315, "step": 107560 }, { "epoch": 1.1493135317057535, "grad_norm": 4.452755451202393, "learning_rate": 9.995249470860034e-07, "loss": 0.0281, "step": 107570 }, { "epoch": 1.1494203750200331, "grad_norm": 3.019202709197998, "learning_rate": 9.995242146047202e-07, "loss": 0.0213, "step": 107580 }, { "epoch": 1.1495272183343128, "grad_norm": 2.67287015914917, "learning_rate": 9.995234815594367e-07, "loss": 0.0184, "step": 107590 }, { "epoch": 1.1496340616485923, "grad_norm": 0.07635972648859024, "learning_rate": 9.995227479501532e-07, "loss": 0.0354, "step": 107600 }, { "epoch": 1.149740904962872, "grad_norm": 3.1697165966033936, "learning_rate": 9.99522013776871e-07, "loss": 0.0274, "step": 107610 }, { "epoch": 1.1498477482771516, "grad_norm": 11.293971061706543, "learning_rate": 9.995212790395907e-07, "loss": 0.05, "step": 107620 }, { "epoch": 1.149954591591431, "grad_norm": 7.1019287109375, "learning_rate": 9.99520543738313e-07, "loss": 0.0358, "step": 107630 }, { "epoch": 1.1500614349057108, "grad_norm": 2.5941436290740967, "learning_rate": 9.99519807873039e-07, "loss": 0.0525, "step": 107640 }, { "epoch": 1.1501682782199905, "grad_norm": 0.6622797250747681, "learning_rate": 9.995190714437696e-07, "loss": 0.0521, "step": 107650 }, { "epoch": 1.15027512153427, "grad_norm": 7.151516914367676, "learning_rate": 9.995183344505054e-07, "loss": 0.0381, "step": 107660 }, { "epoch": 1.1503819648485496, "grad_norm": 4.128460884094238, "learning_rate": 9.995175968932473e-07, "loss": 0.0232, "step": 107670 }, { "epoch": 1.1504888081628293, "grad_norm": 0.027719339355826378, "learning_rate": 9.995168587719963e-07, "loss": 0.1505, "step": 107680 }, { "epoch": 1.1505956514771087, "grad_norm": 0.09741706401109695, "learning_rate": 9.995161200867528e-07, "loss": 0.0362, "step": 107690 }, { "epoch": 1.1507024947913884, "grad_norm": 2.781989812850952, "learning_rate": 9.995153808375178e-07, "loss": 0.0272, "step": 107700 }, { "epoch": 1.150809338105668, "grad_norm": 0.12704165279865265, "learning_rate": 9.995146410242925e-07, "loss": 0.0074, "step": 107710 }, { "epoch": 1.1509161814199476, "grad_norm": 0.48707491159439087, "learning_rate": 9.995139006470774e-07, "loss": 0.0158, "step": 107720 }, { "epoch": 1.1510230247342272, "grad_norm": 1.584956407546997, "learning_rate": 9.995131597058733e-07, "loss": 0.0519, "step": 107730 }, { "epoch": 1.151129868048507, "grad_norm": 0.06601837277412415, "learning_rate": 9.995124182006812e-07, "loss": 0.0136, "step": 107740 }, { "epoch": 1.1512367113627864, "grad_norm": 4.80795431137085, "learning_rate": 9.99511676131502e-07, "loss": 0.0631, "step": 107750 }, { "epoch": 1.151343554677066, "grad_norm": 5.592727184295654, "learning_rate": 9.995109334983362e-07, "loss": 0.0243, "step": 107760 }, { "epoch": 1.1514503979913457, "grad_norm": 1.5766397714614868, "learning_rate": 9.99510190301185e-07, "loss": 0.0169, "step": 107770 }, { "epoch": 1.1515572413056252, "grad_norm": 6.303966999053955, "learning_rate": 9.995094465400492e-07, "loss": 0.0694, "step": 107780 }, { "epoch": 1.1516640846199049, "grad_norm": 8.196538925170898, "learning_rate": 9.995087022149292e-07, "loss": 0.1123, "step": 107790 }, { "epoch": 1.1517709279341846, "grad_norm": 3.0877766609191895, "learning_rate": 9.995079573258266e-07, "loss": 0.0131, "step": 107800 }, { "epoch": 1.151877771248464, "grad_norm": 6.684564113616943, "learning_rate": 9.995072118727416e-07, "loss": 0.0366, "step": 107810 }, { "epoch": 1.1519846145627437, "grad_norm": 0.1624971181154251, "learning_rate": 9.995064658556754e-07, "loss": 0.1271, "step": 107820 }, { "epoch": 1.1520914578770234, "grad_norm": 0.2311829924583435, "learning_rate": 9.995057192746284e-07, "loss": 0.0498, "step": 107830 }, { "epoch": 1.1521983011913028, "grad_norm": 0.25999975204467773, "learning_rate": 9.99504972129602e-07, "loss": 0.0559, "step": 107840 }, { "epoch": 1.1523051445055825, "grad_norm": 0.04786166921257973, "learning_rate": 9.995042244205967e-07, "loss": 0.0485, "step": 107850 }, { "epoch": 1.1524119878198622, "grad_norm": 0.5971741080284119, "learning_rate": 9.995034761476133e-07, "loss": 0.0201, "step": 107860 }, { "epoch": 1.1525188311341417, "grad_norm": 1.0029776096343994, "learning_rate": 9.99502727310653e-07, "loss": 0.0277, "step": 107870 }, { "epoch": 1.1526256744484213, "grad_norm": 0.19412821531295776, "learning_rate": 9.995019779097163e-07, "loss": 0.0144, "step": 107880 }, { "epoch": 1.152732517762701, "grad_norm": 0.704797625541687, "learning_rate": 9.99501227944804e-07, "loss": 0.0263, "step": 107890 }, { "epoch": 1.1528393610769805, "grad_norm": 0.3861342668533325, "learning_rate": 9.995004774159174e-07, "loss": 0.0375, "step": 107900 }, { "epoch": 1.1529462043912602, "grad_norm": 0.3093694746494293, "learning_rate": 9.99499726323057e-07, "loss": 0.0299, "step": 107910 }, { "epoch": 1.1530530477055398, "grad_norm": 3.8458943367004395, "learning_rate": 9.994989746662236e-07, "loss": 0.0453, "step": 107920 }, { "epoch": 1.1531598910198195, "grad_norm": 2.9380743503570557, "learning_rate": 9.994982224454184e-07, "loss": 0.038, "step": 107930 }, { "epoch": 1.153266734334099, "grad_norm": 1.374621868133545, "learning_rate": 9.994974696606418e-07, "loss": 0.0181, "step": 107940 }, { "epoch": 1.1533735776483787, "grad_norm": 1.0076149702072144, "learning_rate": 9.99496716311895e-07, "loss": 0.009, "step": 107950 }, { "epoch": 1.1534804209626583, "grad_norm": 1.8694007396697998, "learning_rate": 9.994959623991786e-07, "loss": 0.0402, "step": 107960 }, { "epoch": 1.1535872642769378, "grad_norm": 7.015064716339111, "learning_rate": 9.994952079224934e-07, "loss": 0.0679, "step": 107970 }, { "epoch": 1.1536941075912175, "grad_norm": 7.615896224975586, "learning_rate": 9.994944528818408e-07, "loss": 0.0676, "step": 107980 }, { "epoch": 1.1538009509054972, "grad_norm": 3.5726921558380127, "learning_rate": 9.99493697277221e-07, "loss": 0.0237, "step": 107990 }, { "epoch": 1.1539077942197766, "grad_norm": 4.609738826751709, "learning_rate": 9.99492941108635e-07, "loss": 0.0286, "step": 108000 }, { "epoch": 1.1540146375340563, "grad_norm": 0.006303600035607815, "learning_rate": 9.99492184376084e-07, "loss": 0.0082, "step": 108010 }, { "epoch": 1.154121480848336, "grad_norm": 1.928142786026001, "learning_rate": 9.994914270795686e-07, "loss": 0.0165, "step": 108020 }, { "epoch": 1.1542283241626154, "grad_norm": 12.0741548538208, "learning_rate": 9.994906692190896e-07, "loss": 0.0628, "step": 108030 }, { "epoch": 1.1543351674768951, "grad_norm": 5.918454170227051, "learning_rate": 9.99489910794648e-07, "loss": 0.0336, "step": 108040 }, { "epoch": 1.1544420107911748, "grad_norm": 9.017614364624023, "learning_rate": 9.994891518062446e-07, "loss": 0.0474, "step": 108050 }, { "epoch": 1.1545488541054543, "grad_norm": 0.07481761276721954, "learning_rate": 9.9948839225388e-07, "loss": 0.0215, "step": 108060 }, { "epoch": 1.154655697419734, "grad_norm": 0.5359746813774109, "learning_rate": 9.994876321375558e-07, "loss": 0.0247, "step": 108070 }, { "epoch": 1.1547625407340136, "grad_norm": 0.5990814566612244, "learning_rate": 9.99486871457272e-07, "loss": 0.0329, "step": 108080 }, { "epoch": 1.154869384048293, "grad_norm": 0.5304489135742188, "learning_rate": 9.994861102130298e-07, "loss": 0.0582, "step": 108090 }, { "epoch": 1.1549762273625728, "grad_norm": 4.495158672332764, "learning_rate": 9.994853484048304e-07, "loss": 0.0712, "step": 108100 }, { "epoch": 1.1550830706768525, "grad_norm": 0.46142280101776123, "learning_rate": 9.99484586032674e-07, "loss": 0.0246, "step": 108110 }, { "epoch": 1.155189913991132, "grad_norm": 0.13661639392375946, "learning_rate": 9.994838230965622e-07, "loss": 0.0145, "step": 108120 }, { "epoch": 1.1552967573054116, "grad_norm": 3.719663381576538, "learning_rate": 9.99483059596495e-07, "loss": 0.0787, "step": 108130 }, { "epoch": 1.1554036006196913, "grad_norm": 3.3836851119995117, "learning_rate": 9.994822955324739e-07, "loss": 0.0231, "step": 108140 }, { "epoch": 1.155510443933971, "grad_norm": 0.24108409881591797, "learning_rate": 9.994815309044997e-07, "loss": 0.05, "step": 108150 }, { "epoch": 1.1556172872482504, "grad_norm": 1.0182404518127441, "learning_rate": 9.994807657125732e-07, "loss": 0.0384, "step": 108160 }, { "epoch": 1.15572413056253, "grad_norm": 4.855999946594238, "learning_rate": 9.99479999956695e-07, "loss": 0.0194, "step": 108170 }, { "epoch": 1.1558309738768098, "grad_norm": 0.1391354501247406, "learning_rate": 9.994792336368664e-07, "loss": 0.0206, "step": 108180 }, { "epoch": 1.1559378171910892, "grad_norm": 6.860778331756592, "learning_rate": 9.994784667530878e-07, "loss": 0.0419, "step": 108190 }, { "epoch": 1.156044660505369, "grad_norm": 0.6528933644294739, "learning_rate": 9.994776993053605e-07, "loss": 0.0489, "step": 108200 }, { "epoch": 1.1561515038196486, "grad_norm": 3.569330930709839, "learning_rate": 9.99476931293685e-07, "loss": 0.1601, "step": 108210 }, { "epoch": 1.156258347133928, "grad_norm": 1.489315152168274, "learning_rate": 9.994761627180625e-07, "loss": 0.0224, "step": 108220 }, { "epoch": 1.1563651904482077, "grad_norm": 1.0962997674942017, "learning_rate": 9.994753935784938e-07, "loss": 0.0171, "step": 108230 }, { "epoch": 1.1564720337624874, "grad_norm": 1.1332147121429443, "learning_rate": 9.994746238749796e-07, "loss": 0.0468, "step": 108240 }, { "epoch": 1.1565788770767669, "grad_norm": 5.986055850982666, "learning_rate": 9.994738536075209e-07, "loss": 0.0503, "step": 108250 }, { "epoch": 1.1566857203910466, "grad_norm": 6.070154666900635, "learning_rate": 9.994730827761184e-07, "loss": 0.0282, "step": 108260 }, { "epoch": 1.1567925637053262, "grad_norm": 13.484365463256836, "learning_rate": 9.99472311380773e-07, "loss": 0.0322, "step": 108270 }, { "epoch": 1.1568994070196057, "grad_norm": 0.07023520022630692, "learning_rate": 9.994715394214858e-07, "loss": 0.0147, "step": 108280 }, { "epoch": 1.1570062503338854, "grad_norm": 0.3414316177368164, "learning_rate": 9.994707668982577e-07, "loss": 0.0441, "step": 108290 }, { "epoch": 1.157113093648165, "grad_norm": 1.5979771614074707, "learning_rate": 9.994699938110891e-07, "loss": 0.0215, "step": 108300 }, { "epoch": 1.1572199369624445, "grad_norm": 2.4279685020446777, "learning_rate": 9.994692201599813e-07, "loss": 0.0521, "step": 108310 }, { "epoch": 1.1573267802767242, "grad_norm": 1.6717251539230347, "learning_rate": 9.994684459449353e-07, "loss": 0.0367, "step": 108320 }, { "epoch": 1.1574336235910039, "grad_norm": 0.6117110848426819, "learning_rate": 9.994676711659514e-07, "loss": 0.0232, "step": 108330 }, { "epoch": 1.1575404669052833, "grad_norm": 5.74653434753418, "learning_rate": 9.99466895823031e-07, "loss": 0.0162, "step": 108340 }, { "epoch": 1.157647310219563, "grad_norm": 0.07580218464136124, "learning_rate": 9.994661199161745e-07, "loss": 0.0763, "step": 108350 }, { "epoch": 1.1577541535338427, "grad_norm": 11.839329719543457, "learning_rate": 9.994653434453831e-07, "loss": 0.0476, "step": 108360 }, { "epoch": 1.1578609968481222, "grad_norm": 3.7877440452575684, "learning_rate": 9.99464566410658e-07, "loss": 0.0144, "step": 108370 }, { "epoch": 1.1579678401624018, "grad_norm": 0.30934953689575195, "learning_rate": 9.994637888119995e-07, "loss": 0.0247, "step": 108380 }, { "epoch": 1.1580746834766815, "grad_norm": 1.048814296722412, "learning_rate": 9.994630106494087e-07, "loss": 0.051, "step": 108390 }, { "epoch": 1.158181526790961, "grad_norm": 2.1097054481506348, "learning_rate": 9.994622319228865e-07, "loss": 0.0144, "step": 108400 }, { "epoch": 1.1582883701052407, "grad_norm": 18.747541427612305, "learning_rate": 9.994614526324336e-07, "loss": 0.0907, "step": 108410 }, { "epoch": 1.1583952134195203, "grad_norm": 7.9464192390441895, "learning_rate": 9.99460672778051e-07, "loss": 0.0851, "step": 108420 }, { "epoch": 1.1585020567337998, "grad_norm": 12.629828453063965, "learning_rate": 9.994598923597397e-07, "loss": 0.0246, "step": 108430 }, { "epoch": 1.1586089000480795, "grad_norm": 0.21119262278079987, "learning_rate": 9.994591113775007e-07, "loss": 0.0232, "step": 108440 }, { "epoch": 1.1587157433623592, "grad_norm": 0.28480154275894165, "learning_rate": 9.994583298313343e-07, "loss": 0.0429, "step": 108450 }, { "epoch": 1.1588225866766386, "grad_norm": 5.334630966186523, "learning_rate": 9.99457547721242e-07, "loss": 0.0302, "step": 108460 }, { "epoch": 1.1589294299909183, "grad_norm": 4.117889404296875, "learning_rate": 9.994567650472243e-07, "loss": 0.0276, "step": 108470 }, { "epoch": 1.159036273305198, "grad_norm": 2.6911416053771973, "learning_rate": 9.994559818092822e-07, "loss": 0.0196, "step": 108480 }, { "epoch": 1.1591431166194774, "grad_norm": 0.07651438564062119, "learning_rate": 9.994551980074166e-07, "loss": 0.0522, "step": 108490 }, { "epoch": 1.1592499599337571, "grad_norm": 3.077359676361084, "learning_rate": 9.994544136416286e-07, "loss": 0.0421, "step": 108500 }, { "epoch": 1.1593568032480368, "grad_norm": 0.05474550649523735, "learning_rate": 9.994536287119185e-07, "loss": 0.0258, "step": 108510 }, { "epoch": 1.1594636465623163, "grad_norm": 2.3981523513793945, "learning_rate": 9.994528432182879e-07, "loss": 0.0435, "step": 108520 }, { "epoch": 1.159570489876596, "grad_norm": 0.01883770525455475, "learning_rate": 9.994520571607372e-07, "loss": 0.0364, "step": 108530 }, { "epoch": 1.1596773331908756, "grad_norm": 22.190204620361328, "learning_rate": 9.994512705392675e-07, "loss": 0.0381, "step": 108540 }, { "epoch": 1.159784176505155, "grad_norm": 17.788339614868164, "learning_rate": 9.994504833538795e-07, "loss": 0.0523, "step": 108550 }, { "epoch": 1.1598910198194348, "grad_norm": 8.21235466003418, "learning_rate": 9.994496956045743e-07, "loss": 0.0764, "step": 108560 }, { "epoch": 1.1599978631337144, "grad_norm": 1.2111146450042725, "learning_rate": 9.994489072913528e-07, "loss": 0.0294, "step": 108570 }, { "epoch": 1.160104706447994, "grad_norm": 0.03750774636864662, "learning_rate": 9.994481184142156e-07, "loss": 0.0204, "step": 108580 }, { "epoch": 1.1602115497622736, "grad_norm": 0.21751075983047485, "learning_rate": 9.994473289731638e-07, "loss": 0.0581, "step": 108590 }, { "epoch": 1.1603183930765533, "grad_norm": 2.4287075996398926, "learning_rate": 9.994465389681984e-07, "loss": 0.0493, "step": 108600 }, { "epoch": 1.1604252363908327, "grad_norm": 0.11437323689460754, "learning_rate": 9.9944574839932e-07, "loss": 0.1011, "step": 108610 }, { "epoch": 1.1605320797051124, "grad_norm": 0.04564063623547554, "learning_rate": 9.994449572665298e-07, "loss": 0.0353, "step": 108620 }, { "epoch": 1.160638923019392, "grad_norm": 6.412452697753906, "learning_rate": 9.994441655698284e-07, "loss": 0.0347, "step": 108630 }, { "epoch": 1.1607457663336715, "grad_norm": 4.017843723297119, "learning_rate": 9.99443373309217e-07, "loss": 0.018, "step": 108640 }, { "epoch": 1.1608526096479512, "grad_norm": 0.13592903316020966, "learning_rate": 9.994425804846962e-07, "loss": 0.0222, "step": 108650 }, { "epoch": 1.160959452962231, "grad_norm": 6.402827262878418, "learning_rate": 9.994417870962672e-07, "loss": 0.0462, "step": 108660 }, { "epoch": 1.1610662962765106, "grad_norm": 5.085036754608154, "learning_rate": 9.994409931439307e-07, "loss": 0.0105, "step": 108670 }, { "epoch": 1.16117313959079, "grad_norm": 0.05368155613541603, "learning_rate": 9.994401986276875e-07, "loss": 0.0599, "step": 108680 }, { "epoch": 1.1612799829050697, "grad_norm": 12.412936210632324, "learning_rate": 9.994394035475386e-07, "loss": 0.0548, "step": 108690 }, { "epoch": 1.1613868262193494, "grad_norm": 2.9460232257843018, "learning_rate": 9.99438607903485e-07, "loss": 0.0481, "step": 108700 }, { "epoch": 1.1614936695336289, "grad_norm": 3.2117249965667725, "learning_rate": 9.994378116955275e-07, "loss": 0.0294, "step": 108710 }, { "epoch": 1.1616005128479086, "grad_norm": 2.1769046783447266, "learning_rate": 9.994370149236671e-07, "loss": 0.0461, "step": 108720 }, { "epoch": 1.1617073561621882, "grad_norm": 2.923966407775879, "learning_rate": 9.994362175879045e-07, "loss": 0.0431, "step": 108730 }, { "epoch": 1.1618141994764677, "grad_norm": 0.5115869641304016, "learning_rate": 9.994354196882407e-07, "loss": 0.056, "step": 108740 }, { "epoch": 1.1619210427907474, "grad_norm": 3.3141160011291504, "learning_rate": 9.994346212246768e-07, "loss": 0.03, "step": 108750 }, { "epoch": 1.162027886105027, "grad_norm": 2.3429083824157715, "learning_rate": 9.994338221972132e-07, "loss": 0.0189, "step": 108760 }, { "epoch": 1.1621347294193065, "grad_norm": 4.365110397338867, "learning_rate": 9.994330226058514e-07, "loss": 0.0183, "step": 108770 }, { "epoch": 1.1622415727335862, "grad_norm": 1.3989697694778442, "learning_rate": 9.99432222450592e-07, "loss": 0.0186, "step": 108780 }, { "epoch": 1.1623484160478659, "grad_norm": 1.5214611291885376, "learning_rate": 9.994314217314359e-07, "loss": 0.0305, "step": 108790 }, { "epoch": 1.1624552593621453, "grad_norm": 12.760724067687988, "learning_rate": 9.99430620448384e-07, "loss": 0.0502, "step": 108800 }, { "epoch": 1.162562102676425, "grad_norm": 14.623437881469727, "learning_rate": 9.99429818601437e-07, "loss": 0.0218, "step": 108810 }, { "epoch": 1.1626689459907047, "grad_norm": 0.2788968086242676, "learning_rate": 9.994290161905963e-07, "loss": 0.038, "step": 108820 }, { "epoch": 1.1627757893049842, "grad_norm": 0.28782275319099426, "learning_rate": 9.994282132158626e-07, "loss": 0.0202, "step": 108830 }, { "epoch": 1.1628826326192638, "grad_norm": 21.254907608032227, "learning_rate": 9.994274096772366e-07, "loss": 0.0376, "step": 108840 }, { "epoch": 1.1629894759335435, "grad_norm": 3.4822988510131836, "learning_rate": 9.994266055747197e-07, "loss": 0.0636, "step": 108850 }, { "epoch": 1.163096319247823, "grad_norm": 0.11415193229913712, "learning_rate": 9.99425800908312e-07, "loss": 0.0156, "step": 108860 }, { "epoch": 1.1632031625621027, "grad_norm": 0.31448936462402344, "learning_rate": 9.99424995678015e-07, "loss": 0.019, "step": 108870 }, { "epoch": 1.1633100058763823, "grad_norm": 2.135852098464966, "learning_rate": 9.994241898838296e-07, "loss": 0.0231, "step": 108880 }, { "epoch": 1.163416849190662, "grad_norm": 4.037700653076172, "learning_rate": 9.994233835257567e-07, "loss": 0.0394, "step": 108890 }, { "epoch": 1.1635236925049415, "grad_norm": 0.8883311748504639, "learning_rate": 9.99422576603797e-07, "loss": 0.0177, "step": 108900 }, { "epoch": 1.1636305358192212, "grad_norm": 1.524303913116455, "learning_rate": 9.994217691179515e-07, "loss": 0.0307, "step": 108910 }, { "epoch": 1.1637373791335008, "grad_norm": 5.418127536773682, "learning_rate": 9.99420961068221e-07, "loss": 0.0235, "step": 108920 }, { "epoch": 1.1638442224477803, "grad_norm": 4.784289836883545, "learning_rate": 9.994201524546066e-07, "loss": 0.0313, "step": 108930 }, { "epoch": 1.16395106576206, "grad_norm": 0.809698224067688, "learning_rate": 9.994193432771092e-07, "loss": 0.0198, "step": 108940 }, { "epoch": 1.1640579090763397, "grad_norm": 2.434528350830078, "learning_rate": 9.994185335357296e-07, "loss": 0.0289, "step": 108950 }, { "epoch": 1.1641647523906191, "grad_norm": 0.01550885010510683, "learning_rate": 9.994177232304691e-07, "loss": 0.0293, "step": 108960 }, { "epoch": 1.1642715957048988, "grad_norm": 0.329593688249588, "learning_rate": 9.99416912361328e-07, "loss": 0.0164, "step": 108970 }, { "epoch": 1.1643784390191785, "grad_norm": 0.1405123621225357, "learning_rate": 9.994161009283075e-07, "loss": 0.0507, "step": 108980 }, { "epoch": 1.164485282333458, "grad_norm": 3.032691240310669, "learning_rate": 9.994152889314087e-07, "loss": 0.0023, "step": 108990 }, { "epoch": 1.1645921256477376, "grad_norm": 4.794215679168701, "learning_rate": 9.994144763706323e-07, "loss": 0.0706, "step": 109000 }, { "epoch": 1.1646989689620173, "grad_norm": 5.462796688079834, "learning_rate": 9.99413663245979e-07, "loss": 0.0536, "step": 109010 }, { "epoch": 1.1648058122762968, "grad_norm": 3.8061139583587646, "learning_rate": 9.994128495574502e-07, "loss": 0.0381, "step": 109020 }, { "epoch": 1.1649126555905764, "grad_norm": 1.1041131019592285, "learning_rate": 9.994120353050467e-07, "loss": 0.0306, "step": 109030 }, { "epoch": 1.1650194989048561, "grad_norm": 1.3655693531036377, "learning_rate": 9.994112204887693e-07, "loss": 0.0201, "step": 109040 }, { "epoch": 1.1651263422191356, "grad_norm": 3.405170440673828, "learning_rate": 9.994104051086187e-07, "loss": 0.0329, "step": 109050 }, { "epoch": 1.1652331855334153, "grad_norm": 0.16828101873397827, "learning_rate": 9.994095891645964e-07, "loss": 0.0327, "step": 109060 }, { "epoch": 1.165340028847695, "grad_norm": 0.32354941964149475, "learning_rate": 9.994087726567027e-07, "loss": 0.0343, "step": 109070 }, { "epoch": 1.1654468721619744, "grad_norm": 0.3123052716255188, "learning_rate": 9.994079555849388e-07, "loss": 0.0735, "step": 109080 }, { "epoch": 1.165553715476254, "grad_norm": 3.909428358078003, "learning_rate": 9.994071379493059e-07, "loss": 0.0326, "step": 109090 }, { "epoch": 1.1656605587905338, "grad_norm": 4.890223026275635, "learning_rate": 9.994063197498044e-07, "loss": 0.0486, "step": 109100 }, { "epoch": 1.1657674021048132, "grad_norm": 7.353977203369141, "learning_rate": 9.994055009864355e-07, "loss": 0.0326, "step": 109110 }, { "epoch": 1.165874245419093, "grad_norm": 1.846937894821167, "learning_rate": 9.994046816592e-07, "loss": 0.0183, "step": 109120 }, { "epoch": 1.1659810887333726, "grad_norm": 0.022595448419451714, "learning_rate": 9.994038617680993e-07, "loss": 0.0081, "step": 109130 }, { "epoch": 1.166087932047652, "grad_norm": 6.936584949493408, "learning_rate": 9.994030413131335e-07, "loss": 0.0857, "step": 109140 }, { "epoch": 1.1661947753619317, "grad_norm": 0.1964384913444519, "learning_rate": 9.994022202943043e-07, "loss": 0.0739, "step": 109150 }, { "epoch": 1.1663016186762114, "grad_norm": 1.4569661617279053, "learning_rate": 9.994013987116122e-07, "loss": 0.0212, "step": 109160 }, { "epoch": 1.1664084619904909, "grad_norm": 0.12117816507816315, "learning_rate": 9.994005765650583e-07, "loss": 0.0567, "step": 109170 }, { "epoch": 1.1665153053047705, "grad_norm": 10.89037799835205, "learning_rate": 9.993997538546433e-07, "loss": 0.1272, "step": 109180 }, { "epoch": 1.1666221486190502, "grad_norm": 0.7653193473815918, "learning_rate": 9.993989305803684e-07, "loss": 0.0161, "step": 109190 }, { "epoch": 1.1667289919333297, "grad_norm": 0.3757137358188629, "learning_rate": 9.993981067422342e-07, "loss": 0.0323, "step": 109200 }, { "epoch": 1.1668358352476094, "grad_norm": 8.114620208740234, "learning_rate": 9.993972823402421e-07, "loss": 0.0335, "step": 109210 }, { "epoch": 1.166942678561889, "grad_norm": 0.49018919467926025, "learning_rate": 9.993964573743929e-07, "loss": 0.0132, "step": 109220 }, { "epoch": 1.1670495218761685, "grad_norm": 0.12482833862304688, "learning_rate": 9.993956318446871e-07, "loss": 0.0032, "step": 109230 }, { "epoch": 1.1671563651904482, "grad_norm": 1.7089529037475586, "learning_rate": 9.993948057511262e-07, "loss": 0.0386, "step": 109240 }, { "epoch": 1.1672632085047279, "grad_norm": 2.120948553085327, "learning_rate": 9.993939790937106e-07, "loss": 0.0168, "step": 109250 }, { "epoch": 1.1673700518190073, "grad_norm": 3.587116003036499, "learning_rate": 9.993931518724418e-07, "loss": 0.021, "step": 109260 }, { "epoch": 1.167476895133287, "grad_norm": 3.7486629486083984, "learning_rate": 9.9939232408732e-07, "loss": 0.0319, "step": 109270 }, { "epoch": 1.1675837384475667, "grad_norm": 1.4225167036056519, "learning_rate": 9.993914957383468e-07, "loss": 0.0289, "step": 109280 }, { "epoch": 1.1676905817618461, "grad_norm": 1.414688229560852, "learning_rate": 9.993906668255231e-07, "loss": 0.0122, "step": 109290 }, { "epoch": 1.1677974250761258, "grad_norm": 0.0306057371199131, "learning_rate": 9.993898373488497e-07, "loss": 0.0065, "step": 109300 }, { "epoch": 1.1679042683904055, "grad_norm": 2.5458357334136963, "learning_rate": 9.993890073083271e-07, "loss": 0.0677, "step": 109310 }, { "epoch": 1.168011111704685, "grad_norm": 0.0770777016878128, "learning_rate": 9.99388176703957e-07, "loss": 0.023, "step": 109320 }, { "epoch": 1.1681179550189646, "grad_norm": 1.8982564210891724, "learning_rate": 9.993873455357395e-07, "loss": 0.0373, "step": 109330 }, { "epoch": 1.1682247983332443, "grad_norm": 10.746925354003906, "learning_rate": 9.993865138036763e-07, "loss": 0.0702, "step": 109340 }, { "epoch": 1.1683316416475238, "grad_norm": 1.4509167671203613, "learning_rate": 9.99385681507768e-07, "loss": 0.0324, "step": 109350 }, { "epoch": 1.1684384849618035, "grad_norm": 5.826963901519775, "learning_rate": 9.993848486480156e-07, "loss": 0.0287, "step": 109360 }, { "epoch": 1.1685453282760832, "grad_norm": 0.006814881227910519, "learning_rate": 9.993840152244202e-07, "loss": 0.0728, "step": 109370 }, { "epoch": 1.1686521715903626, "grad_norm": 1.2281492948532104, "learning_rate": 9.993831812369822e-07, "loss": 0.047, "step": 109380 }, { "epoch": 1.1687590149046423, "grad_norm": 0.4528576135635376, "learning_rate": 9.99382346685703e-07, "loss": 0.0137, "step": 109390 }, { "epoch": 1.168865858218922, "grad_norm": 0.02224944531917572, "learning_rate": 9.993815115705835e-07, "loss": 0.0605, "step": 109400 }, { "epoch": 1.1689727015332017, "grad_norm": 2.9498963356018066, "learning_rate": 9.993806758916246e-07, "loss": 0.0385, "step": 109410 }, { "epoch": 1.1690795448474811, "grad_norm": 2.113517999649048, "learning_rate": 9.993798396488272e-07, "loss": 0.0069, "step": 109420 }, { "epoch": 1.1691863881617608, "grad_norm": 0.037005625665187836, "learning_rate": 9.993790028421921e-07, "loss": 0.0099, "step": 109430 }, { "epoch": 1.1692932314760405, "grad_norm": 3.26261830329895, "learning_rate": 9.99378165471721e-07, "loss": 0.0286, "step": 109440 }, { "epoch": 1.16940007479032, "grad_norm": 7.179574489593506, "learning_rate": 9.993773275374135e-07, "loss": 0.0337, "step": 109450 }, { "epoch": 1.1695069181045996, "grad_norm": 0.6652122139930725, "learning_rate": 9.993764890392717e-07, "loss": 0.0201, "step": 109460 }, { "epoch": 1.1696137614188793, "grad_norm": 6.257645130157471, "learning_rate": 9.993756499772962e-07, "loss": 0.0656, "step": 109470 }, { "epoch": 1.1697206047331588, "grad_norm": 0.03790101036429405, "learning_rate": 9.993748103514878e-07, "loss": 0.02, "step": 109480 }, { "epoch": 1.1698274480474384, "grad_norm": 0.0867660865187645, "learning_rate": 9.993739701618476e-07, "loss": 0.015, "step": 109490 }, { "epoch": 1.1699342913617181, "grad_norm": 1.1235123872756958, "learning_rate": 9.993731294083766e-07, "loss": 0.0192, "step": 109500 }, { "epoch": 1.1700411346759976, "grad_norm": 2.297274589538574, "learning_rate": 9.993722880910753e-07, "loss": 0.0174, "step": 109510 }, { "epoch": 1.1701479779902773, "grad_norm": 0.044981177896261215, "learning_rate": 9.993714462099453e-07, "loss": 0.1556, "step": 109520 }, { "epoch": 1.170254821304557, "grad_norm": 6.123239517211914, "learning_rate": 9.993706037649871e-07, "loss": 0.0424, "step": 109530 }, { "epoch": 1.1703616646188364, "grad_norm": 18.51181983947754, "learning_rate": 9.99369760756202e-07, "loss": 0.0303, "step": 109540 }, { "epoch": 1.170468507933116, "grad_norm": 0.014459870755672455, "learning_rate": 9.993689171835905e-07, "loss": 0.0349, "step": 109550 }, { "epoch": 1.1705753512473958, "grad_norm": 9.820515632629395, "learning_rate": 9.99368073047154e-07, "loss": 0.0668, "step": 109560 }, { "epoch": 1.1706821945616752, "grad_norm": 0.6977986693382263, "learning_rate": 9.99367228346893e-07, "loss": 0.032, "step": 109570 }, { "epoch": 1.170789037875955, "grad_norm": 2.8747711181640625, "learning_rate": 9.99366383082809e-07, "loss": 0.0535, "step": 109580 }, { "epoch": 1.1708958811902346, "grad_norm": 12.369942665100098, "learning_rate": 9.993655372549024e-07, "loss": 0.056, "step": 109590 }, { "epoch": 1.171002724504514, "grad_norm": 14.979674339294434, "learning_rate": 9.993646908631746e-07, "loss": 0.0593, "step": 109600 }, { "epoch": 1.1711095678187937, "grad_norm": 1.2157236337661743, "learning_rate": 9.993638439076262e-07, "loss": 0.0268, "step": 109610 }, { "epoch": 1.1712164111330734, "grad_norm": 0.06602049618959427, "learning_rate": 9.993629963882587e-07, "loss": 0.0077, "step": 109620 }, { "epoch": 1.171323254447353, "grad_norm": 2.9080467224121094, "learning_rate": 9.993621483050723e-07, "loss": 0.0168, "step": 109630 }, { "epoch": 1.1714300977616325, "grad_norm": 0.046810753643512726, "learning_rate": 9.993612996580684e-07, "loss": 0.022, "step": 109640 }, { "epoch": 1.1715369410759122, "grad_norm": 5.681913375854492, "learning_rate": 9.993604504472482e-07, "loss": 0.0252, "step": 109650 }, { "epoch": 1.171643784390192, "grad_norm": 1.225430965423584, "learning_rate": 9.99359600672612e-07, "loss": 0.0247, "step": 109660 }, { "epoch": 1.1717506277044714, "grad_norm": 0.11685330420732498, "learning_rate": 9.993587503341615e-07, "loss": 0.0125, "step": 109670 }, { "epoch": 1.171857471018751, "grad_norm": 5.983734130859375, "learning_rate": 9.99357899431897e-07, "loss": 0.0188, "step": 109680 }, { "epoch": 1.1719643143330307, "grad_norm": 7.015158653259277, "learning_rate": 9.993570479658198e-07, "loss": 0.0278, "step": 109690 }, { "epoch": 1.1720711576473102, "grad_norm": 2.381594181060791, "learning_rate": 9.993561959359307e-07, "loss": 0.0613, "step": 109700 }, { "epoch": 1.1721780009615899, "grad_norm": 1.3597489595413208, "learning_rate": 9.99355343342231e-07, "loss": 0.0118, "step": 109710 }, { "epoch": 1.1722848442758695, "grad_norm": 0.4657095968723297, "learning_rate": 9.993544901847214e-07, "loss": 0.0283, "step": 109720 }, { "epoch": 1.172391687590149, "grad_norm": 0.039154283702373505, "learning_rate": 9.993536364634025e-07, "loss": 0.0139, "step": 109730 }, { "epoch": 1.1724985309044287, "grad_norm": 2.8250951766967773, "learning_rate": 9.99352782178276e-07, "loss": 0.0153, "step": 109740 }, { "epoch": 1.1726053742187084, "grad_norm": 6.918188095092773, "learning_rate": 9.993519273293424e-07, "loss": 0.0269, "step": 109750 }, { "epoch": 1.1727122175329878, "grad_norm": 1.9170479774475098, "learning_rate": 9.99351071916603e-07, "loss": 0.0177, "step": 109760 }, { "epoch": 1.1728190608472675, "grad_norm": 15.969566345214844, "learning_rate": 9.993502159400584e-07, "loss": 0.0233, "step": 109770 }, { "epoch": 1.1729259041615472, "grad_norm": 5.019729137420654, "learning_rate": 9.993493593997097e-07, "loss": 0.0484, "step": 109780 }, { "epoch": 1.1730327474758266, "grad_norm": 0.910358726978302, "learning_rate": 9.99348502295558e-07, "loss": 0.0138, "step": 109790 }, { "epoch": 1.1731395907901063, "grad_norm": 0.5710175633430481, "learning_rate": 9.993476446276041e-07, "loss": 0.0285, "step": 109800 }, { "epoch": 1.173246434104386, "grad_norm": 0.19969019293785095, "learning_rate": 9.99346786395849e-07, "loss": 0.0077, "step": 109810 }, { "epoch": 1.1733532774186655, "grad_norm": 0.9970614910125732, "learning_rate": 9.993459276002937e-07, "loss": 0.065, "step": 109820 }, { "epoch": 1.1734601207329451, "grad_norm": 5.401449203491211, "learning_rate": 9.99345068240939e-07, "loss": 0.0325, "step": 109830 }, { "epoch": 1.1735669640472248, "grad_norm": 0.3052845001220703, "learning_rate": 9.993442083177864e-07, "loss": 0.0337, "step": 109840 }, { "epoch": 1.1736738073615043, "grad_norm": 1.8219858407974243, "learning_rate": 9.993433478308361e-07, "loss": 0.0306, "step": 109850 }, { "epoch": 1.173780650675784, "grad_norm": 16.021499633789062, "learning_rate": 9.993424867800897e-07, "loss": 0.05, "step": 109860 }, { "epoch": 1.1738874939900636, "grad_norm": 5.088906764984131, "learning_rate": 9.99341625165548e-07, "loss": 0.0224, "step": 109870 }, { "epoch": 1.173994337304343, "grad_norm": 1.4186077117919922, "learning_rate": 9.993407629872118e-07, "loss": 0.0215, "step": 109880 }, { "epoch": 1.1741011806186228, "grad_norm": 2.041515350341797, "learning_rate": 9.993399002450822e-07, "loss": 0.1301, "step": 109890 }, { "epoch": 1.1742080239329025, "grad_norm": 0.06596294790506363, "learning_rate": 9.993390369391601e-07, "loss": 0.0375, "step": 109900 }, { "epoch": 1.174314867247182, "grad_norm": 4.973565578460693, "learning_rate": 9.993381730694467e-07, "loss": 0.0125, "step": 109910 }, { "epoch": 1.1744217105614616, "grad_norm": 12.898569107055664, "learning_rate": 9.993373086359427e-07, "loss": 0.0341, "step": 109920 }, { "epoch": 1.1745285538757413, "grad_norm": 1.7336784601211548, "learning_rate": 9.993364436386491e-07, "loss": 0.0367, "step": 109930 }, { "epoch": 1.1746353971900207, "grad_norm": 1.539185881614685, "learning_rate": 9.993355780775673e-07, "loss": 0.031, "step": 109940 }, { "epoch": 1.1747422405043004, "grad_norm": 7.687521457672119, "learning_rate": 9.993347119526977e-07, "loss": 0.0186, "step": 109950 }, { "epoch": 1.17484908381858, "grad_norm": 11.731863975524902, "learning_rate": 9.993338452640415e-07, "loss": 0.0652, "step": 109960 }, { "epoch": 1.1749559271328596, "grad_norm": 1.0966455936431885, "learning_rate": 9.993329780115998e-07, "loss": 0.0119, "step": 109970 }, { "epoch": 1.1750627704471392, "grad_norm": 0.4401910901069641, "learning_rate": 9.993321101953734e-07, "loss": 0.0336, "step": 109980 }, { "epoch": 1.175169613761419, "grad_norm": 0.5615655183792114, "learning_rate": 9.993312418153635e-07, "loss": 0.0228, "step": 109990 }, { "epoch": 1.1752764570756984, "grad_norm": 0.9977090954780579, "learning_rate": 9.993303728715708e-07, "loss": 0.02, "step": 110000 }, { "epoch": 1.175383300389978, "grad_norm": 3.4838480949401855, "learning_rate": 9.993295033639964e-07, "loss": 0.127, "step": 110010 }, { "epoch": 1.1754901437042578, "grad_norm": 6.462900161743164, "learning_rate": 9.993286332926413e-07, "loss": 0.0405, "step": 110020 }, { "epoch": 1.1755969870185372, "grad_norm": 20.45551300048828, "learning_rate": 9.993277626575068e-07, "loss": 0.0542, "step": 110030 }, { "epoch": 1.175703830332817, "grad_norm": 1.183366060256958, "learning_rate": 9.993268914585933e-07, "loss": 0.0535, "step": 110040 }, { "epoch": 1.1758106736470966, "grad_norm": 5.295804977416992, "learning_rate": 9.99326019695902e-07, "loss": 0.0302, "step": 110050 }, { "epoch": 1.175917516961376, "grad_norm": 0.01764339953660965, "learning_rate": 9.99325147369434e-07, "loss": 0.0358, "step": 110060 }, { "epoch": 1.1760243602756557, "grad_norm": 1.509482502937317, "learning_rate": 9.993242744791903e-07, "loss": 0.0511, "step": 110070 }, { "epoch": 1.1761312035899354, "grad_norm": 0.010438569821417332, "learning_rate": 9.993234010251717e-07, "loss": 0.03, "step": 110080 }, { "epoch": 1.1762380469042149, "grad_norm": 3.294058084487915, "learning_rate": 9.993225270073795e-07, "loss": 0.039, "step": 110090 }, { "epoch": 1.1763448902184945, "grad_norm": 4.325852870941162, "learning_rate": 9.993216524258144e-07, "loss": 0.0102, "step": 110100 }, { "epoch": 1.1764517335327742, "grad_norm": 6.196943283081055, "learning_rate": 9.993207772804772e-07, "loss": 0.0373, "step": 110110 }, { "epoch": 1.1765585768470537, "grad_norm": 2.1282520294189453, "learning_rate": 9.993199015713695e-07, "loss": 0.0319, "step": 110120 }, { "epoch": 1.1766654201613334, "grad_norm": 0.03437464311718941, "learning_rate": 9.993190252984919e-07, "loss": 0.027, "step": 110130 }, { "epoch": 1.176772263475613, "grad_norm": 1.702452540397644, "learning_rate": 9.993181484618452e-07, "loss": 0.029, "step": 110140 }, { "epoch": 1.1768791067898927, "grad_norm": 6.003500938415527, "learning_rate": 9.993172710614308e-07, "loss": 0.0562, "step": 110150 }, { "epoch": 1.1769859501041722, "grad_norm": 1.2512671947479248, "learning_rate": 9.993163930972495e-07, "loss": 0.0397, "step": 110160 }, { "epoch": 1.1770927934184519, "grad_norm": 0.1205114796757698, "learning_rate": 9.993155145693021e-07, "loss": 0.0256, "step": 110170 }, { "epoch": 1.1771996367327315, "grad_norm": 0.552117645740509, "learning_rate": 9.993146354775902e-07, "loss": 0.0409, "step": 110180 }, { "epoch": 1.177306480047011, "grad_norm": 1.6234570741653442, "learning_rate": 9.99313755822114e-07, "loss": 0.0154, "step": 110190 }, { "epoch": 1.1774133233612907, "grad_norm": 0.0341365672647953, "learning_rate": 9.993128756028752e-07, "loss": 0.0266, "step": 110200 }, { "epoch": 1.1775201666755704, "grad_norm": 2.1157937049865723, "learning_rate": 9.993119948198743e-07, "loss": 0.0207, "step": 110210 }, { "epoch": 1.1776270099898498, "grad_norm": 5.352428436279297, "learning_rate": 9.993111134731125e-07, "loss": 0.0314, "step": 110220 }, { "epoch": 1.1777338533041295, "grad_norm": 3.304872512817383, "learning_rate": 9.993102315625907e-07, "loss": 0.0439, "step": 110230 }, { "epoch": 1.1778406966184092, "grad_norm": 3.9200165271759033, "learning_rate": 9.9930934908831e-07, "loss": 0.0273, "step": 110240 }, { "epoch": 1.1779475399326886, "grad_norm": 0.38978856801986694, "learning_rate": 9.993084660502715e-07, "loss": 0.0278, "step": 110250 }, { "epoch": 1.1780543832469683, "grad_norm": 0.6914147138595581, "learning_rate": 9.99307582448476e-07, "loss": 0.0563, "step": 110260 }, { "epoch": 1.178161226561248, "grad_norm": 0.7180715799331665, "learning_rate": 9.993066982829245e-07, "loss": 0.0348, "step": 110270 }, { "epoch": 1.1782680698755275, "grad_norm": 1.353888750076294, "learning_rate": 9.993058135536181e-07, "loss": 0.0132, "step": 110280 }, { "epoch": 1.1783749131898071, "grad_norm": 7.769191265106201, "learning_rate": 9.993049282605577e-07, "loss": 0.0326, "step": 110290 }, { "epoch": 1.1784817565040868, "grad_norm": 4.060413360595703, "learning_rate": 9.993040424037443e-07, "loss": 0.0383, "step": 110300 }, { "epoch": 1.1785885998183663, "grad_norm": 0.00828427542001009, "learning_rate": 9.993031559831791e-07, "loss": 0.0037, "step": 110310 }, { "epoch": 1.178695443132646, "grad_norm": 0.7861016988754272, "learning_rate": 9.993022689988628e-07, "loss": 0.017, "step": 110320 }, { "epoch": 1.1788022864469256, "grad_norm": 5.414020538330078, "learning_rate": 9.993013814507966e-07, "loss": 0.0445, "step": 110330 }, { "epoch": 1.178909129761205, "grad_norm": 1.030735731124878, "learning_rate": 9.993004933389816e-07, "loss": 0.0372, "step": 110340 }, { "epoch": 1.1790159730754848, "grad_norm": 0.102728471159935, "learning_rate": 9.992996046634186e-07, "loss": 0.024, "step": 110350 }, { "epoch": 1.1791228163897645, "grad_norm": 0.11693726480007172, "learning_rate": 9.992987154241086e-07, "loss": 0.0273, "step": 110360 }, { "epoch": 1.1792296597040441, "grad_norm": 0.14470438659191132, "learning_rate": 9.992978256210528e-07, "loss": 0.0551, "step": 110370 }, { "epoch": 1.1793365030183236, "grad_norm": 0.7177654504776001, "learning_rate": 9.992969352542519e-07, "loss": 0.0169, "step": 110380 }, { "epoch": 1.1794433463326033, "grad_norm": 1.6789902448654175, "learning_rate": 9.992960443237071e-07, "loss": 0.0595, "step": 110390 }, { "epoch": 1.179550189646883, "grad_norm": 0.06598544865846634, "learning_rate": 9.992951528294194e-07, "loss": 0.0601, "step": 110400 }, { "epoch": 1.1796570329611624, "grad_norm": 2.829338312149048, "learning_rate": 9.992942607713898e-07, "loss": 0.028, "step": 110410 }, { "epoch": 1.179763876275442, "grad_norm": 3.1491339206695557, "learning_rate": 9.992933681496192e-07, "loss": 0.0291, "step": 110420 }, { "epoch": 1.1798707195897218, "grad_norm": 12.59181022644043, "learning_rate": 9.99292474964109e-07, "loss": 0.1003, "step": 110430 }, { "epoch": 1.1799775629040012, "grad_norm": 0.31348636746406555, "learning_rate": 9.992915812148596e-07, "loss": 0.0214, "step": 110440 }, { "epoch": 1.180084406218281, "grad_norm": 6.4552836418151855, "learning_rate": 9.992906869018724e-07, "loss": 0.0136, "step": 110450 }, { "epoch": 1.1801912495325606, "grad_norm": 0.7366943955421448, "learning_rate": 9.992897920251483e-07, "loss": 0.015, "step": 110460 }, { "epoch": 1.18029809284684, "grad_norm": 3.1980714797973633, "learning_rate": 9.992888965846885e-07, "loss": 0.0294, "step": 110470 }, { "epoch": 1.1804049361611197, "grad_norm": 3.2100470066070557, "learning_rate": 9.992880005804937e-07, "loss": 0.0697, "step": 110480 }, { "epoch": 1.1805117794753994, "grad_norm": 0.17338338494300842, "learning_rate": 9.992871040125652e-07, "loss": 0.036, "step": 110490 }, { "epoch": 1.1806186227896789, "grad_norm": 49.72328567504883, "learning_rate": 9.992862068809037e-07, "loss": 0.0946, "step": 110500 }, { "epoch": 1.1807254661039586, "grad_norm": 8.134413719177246, "learning_rate": 9.992853091855107e-07, "loss": 0.0433, "step": 110510 }, { "epoch": 1.1808323094182382, "grad_norm": 0.8356200456619263, "learning_rate": 9.992844109263865e-07, "loss": 0.0532, "step": 110520 }, { "epoch": 1.1809391527325177, "grad_norm": 2.831362009048462, "learning_rate": 9.992835121035329e-07, "loss": 0.0437, "step": 110530 }, { "epoch": 1.1810459960467974, "grad_norm": 2.295197010040283, "learning_rate": 9.992826127169502e-07, "loss": 0.0085, "step": 110540 }, { "epoch": 1.181152839361077, "grad_norm": 0.5613341927528381, "learning_rate": 9.9928171276664e-07, "loss": 0.0173, "step": 110550 }, { "epoch": 1.1812596826753565, "grad_norm": 2.750889778137207, "learning_rate": 9.992808122526028e-07, "loss": 0.0058, "step": 110560 }, { "epoch": 1.1813665259896362, "grad_norm": 2.6338696479797363, "learning_rate": 9.9927991117484e-07, "loss": 0.0438, "step": 110570 }, { "epoch": 1.1814733693039159, "grad_norm": 0.49007537961006165, "learning_rate": 9.992790095333526e-07, "loss": 0.0285, "step": 110580 }, { "epoch": 1.1815802126181953, "grad_norm": 0.9720713496208191, "learning_rate": 9.992781073281416e-07, "loss": 0.0377, "step": 110590 }, { "epoch": 1.181687055932475, "grad_norm": 0.8789993524551392, "learning_rate": 9.992772045592078e-07, "loss": 0.0123, "step": 110600 }, { "epoch": 1.1817938992467547, "grad_norm": 1.7040766477584839, "learning_rate": 9.99276301226552e-07, "loss": 0.0235, "step": 110610 }, { "epoch": 1.1819007425610342, "grad_norm": 0.269660621881485, "learning_rate": 9.992753973301761e-07, "loss": 0.0319, "step": 110620 }, { "epoch": 1.1820075858753138, "grad_norm": 1.1643543243408203, "learning_rate": 9.992744928700804e-07, "loss": 0.0302, "step": 110630 }, { "epoch": 1.1821144291895935, "grad_norm": 0.36602073907852173, "learning_rate": 9.99273587846266e-07, "loss": 0.0077, "step": 110640 }, { "epoch": 1.182221272503873, "grad_norm": 3.1589455604553223, "learning_rate": 9.992726822587344e-07, "loss": 0.0181, "step": 110650 }, { "epoch": 1.1823281158181527, "grad_norm": 0.040670596063137054, "learning_rate": 9.99271776107486e-07, "loss": 0.0157, "step": 110660 }, { "epoch": 1.1824349591324324, "grad_norm": 0.04951971024274826, "learning_rate": 9.99270869392522e-07, "loss": 0.0383, "step": 110670 }, { "epoch": 1.1825418024467118, "grad_norm": 4.909290790557861, "learning_rate": 9.992699621138436e-07, "loss": 0.036, "step": 110680 }, { "epoch": 1.1826486457609915, "grad_norm": 4.0869364738464355, "learning_rate": 9.992690542714517e-07, "loss": 0.0354, "step": 110690 }, { "epoch": 1.1827554890752712, "grad_norm": 1.1278913021087646, "learning_rate": 9.992681458653476e-07, "loss": 0.0264, "step": 110700 }, { "epoch": 1.1828623323895506, "grad_norm": 4.214676856994629, "learning_rate": 9.992672368955318e-07, "loss": 0.0521, "step": 110710 }, { "epoch": 1.1829691757038303, "grad_norm": 1.3908836841583252, "learning_rate": 9.992663273620056e-07, "loss": 0.052, "step": 110720 }, { "epoch": 1.18307601901811, "grad_norm": 7.057369232177734, "learning_rate": 9.992654172647701e-07, "loss": 0.0875, "step": 110730 }, { "epoch": 1.1831828623323895, "grad_norm": 0.06376782059669495, "learning_rate": 9.992645066038264e-07, "loss": 0.022, "step": 110740 }, { "epoch": 1.1832897056466691, "grad_norm": 9.709511756896973, "learning_rate": 9.992635953791752e-07, "loss": 0.0335, "step": 110750 }, { "epoch": 1.1833965489609488, "grad_norm": 0.05388014018535614, "learning_rate": 9.992626835908177e-07, "loss": 0.0117, "step": 110760 }, { "epoch": 1.1835033922752283, "grad_norm": 8.191469192504883, "learning_rate": 9.992617712387551e-07, "loss": 0.0444, "step": 110770 }, { "epoch": 1.183610235589508, "grad_norm": 0.405938982963562, "learning_rate": 9.992608583229883e-07, "loss": 0.0299, "step": 110780 }, { "epoch": 1.1837170789037876, "grad_norm": 0.4520352780818939, "learning_rate": 9.992599448435183e-07, "loss": 0.0558, "step": 110790 }, { "epoch": 1.183823922218067, "grad_norm": 5.163375377655029, "learning_rate": 9.99259030800346e-07, "loss": 0.0685, "step": 110800 }, { "epoch": 1.1839307655323468, "grad_norm": 6.876206398010254, "learning_rate": 9.992581161934725e-07, "loss": 0.0198, "step": 110810 }, { "epoch": 1.1840376088466265, "grad_norm": 1.9522547721862793, "learning_rate": 9.992572010228993e-07, "loss": 0.0553, "step": 110820 }, { "epoch": 1.184144452160906, "grad_norm": 1.001177191734314, "learning_rate": 9.992562852886265e-07, "loss": 0.0202, "step": 110830 }, { "epoch": 1.1842512954751856, "grad_norm": 1.8358755111694336, "learning_rate": 9.99255368990656e-07, "loss": 0.0402, "step": 110840 }, { "epoch": 1.1843581387894653, "grad_norm": 2.3803412914276123, "learning_rate": 9.992544521289886e-07, "loss": 0.0194, "step": 110850 }, { "epoch": 1.1844649821037447, "grad_norm": 1.83322012424469, "learning_rate": 9.99253534703625e-07, "loss": 0.0261, "step": 110860 }, { "epoch": 1.1845718254180244, "grad_norm": 0.31919795274734497, "learning_rate": 9.992526167145667e-07, "loss": 0.0332, "step": 110870 }, { "epoch": 1.184678668732304, "grad_norm": 6.979945659637451, "learning_rate": 9.992516981618144e-07, "loss": 0.0496, "step": 110880 }, { "epoch": 1.1847855120465838, "grad_norm": 0.3497062623500824, "learning_rate": 9.992507790453692e-07, "loss": 0.0572, "step": 110890 }, { "epoch": 1.1848923553608632, "grad_norm": 4.567087650299072, "learning_rate": 9.992498593652324e-07, "loss": 0.0734, "step": 110900 }, { "epoch": 1.184999198675143, "grad_norm": 0.014764810912311077, "learning_rate": 9.992489391214047e-07, "loss": 0.0369, "step": 110910 }, { "epoch": 1.1851060419894226, "grad_norm": 4.281918525695801, "learning_rate": 9.992480183138871e-07, "loss": 0.0837, "step": 110920 }, { "epoch": 1.185212885303702, "grad_norm": 4.325953960418701, "learning_rate": 9.99247096942681e-07, "loss": 0.0076, "step": 110930 }, { "epoch": 1.1853197286179817, "grad_norm": 0.012280797585844994, "learning_rate": 9.992461750077872e-07, "loss": 0.0235, "step": 110940 }, { "epoch": 1.1854265719322614, "grad_norm": 0.087667316198349, "learning_rate": 9.992452525092067e-07, "loss": 0.0504, "step": 110950 }, { "epoch": 1.1855334152465409, "grad_norm": 0.09104366600513458, "learning_rate": 9.992443294469408e-07, "loss": 0.0301, "step": 110960 }, { "epoch": 1.1856402585608206, "grad_norm": 3.3838014602661133, "learning_rate": 9.992434058209904e-07, "loss": 0.0236, "step": 110970 }, { "epoch": 1.1857471018751002, "grad_norm": 0.31041714549064636, "learning_rate": 9.992424816313563e-07, "loss": 0.0277, "step": 110980 }, { "epoch": 1.1858539451893797, "grad_norm": 0.37001869082450867, "learning_rate": 9.9924155687804e-07, "loss": 0.0157, "step": 110990 }, { "epoch": 1.1859607885036594, "grad_norm": 0.9629064798355103, "learning_rate": 9.99240631561042e-07, "loss": 0.0208, "step": 111000 }, { "epoch": 1.186067631817939, "grad_norm": 0.36034977436065674, "learning_rate": 9.992397056803638e-07, "loss": 0.0198, "step": 111010 }, { "epoch": 1.1861744751322185, "grad_norm": 2.9194369316101074, "learning_rate": 9.992387792360063e-07, "loss": 0.0176, "step": 111020 }, { "epoch": 1.1862813184464982, "grad_norm": 10.236346244812012, "learning_rate": 9.992378522279706e-07, "loss": 0.0486, "step": 111030 }, { "epoch": 1.1863881617607779, "grad_norm": 0.24917204678058624, "learning_rate": 9.992369246562578e-07, "loss": 0.0253, "step": 111040 }, { "epoch": 1.1864950050750573, "grad_norm": 3.443077325820923, "learning_rate": 9.992359965208685e-07, "loss": 0.0406, "step": 111050 }, { "epoch": 1.186601848389337, "grad_norm": 3.185049057006836, "learning_rate": 9.992350678218044e-07, "loss": 0.0447, "step": 111060 }, { "epoch": 1.1867086917036167, "grad_norm": 1.959623098373413, "learning_rate": 9.99234138559066e-07, "loss": 0.0394, "step": 111070 }, { "epoch": 1.1868155350178962, "grad_norm": 1.826734185218811, "learning_rate": 9.992332087326548e-07, "loss": 0.0266, "step": 111080 }, { "epoch": 1.1869223783321758, "grad_norm": 0.01346570998430252, "learning_rate": 9.992322783425713e-07, "loss": 0.0191, "step": 111090 }, { "epoch": 1.1870292216464555, "grad_norm": 9.720864295959473, "learning_rate": 9.992313473888171e-07, "loss": 0.0781, "step": 111100 }, { "epoch": 1.1871360649607352, "grad_norm": 4.5394744873046875, "learning_rate": 9.992304158713932e-07, "loss": 0.0111, "step": 111110 }, { "epoch": 1.1872429082750147, "grad_norm": 2.28818678855896, "learning_rate": 9.992294837903002e-07, "loss": 0.038, "step": 111120 }, { "epoch": 1.1873497515892943, "grad_norm": 2.3419740200042725, "learning_rate": 9.992285511455393e-07, "loss": 0.0278, "step": 111130 }, { "epoch": 1.187456594903574, "grad_norm": 4.1688714027404785, "learning_rate": 9.99227617937112e-07, "loss": 0.0224, "step": 111140 }, { "epoch": 1.1875634382178535, "grad_norm": 1.9876060485839844, "learning_rate": 9.99226684165019e-07, "loss": 0.0402, "step": 111150 }, { "epoch": 1.1876702815321332, "grad_norm": 0.07313591241836548, "learning_rate": 9.992257498292614e-07, "loss": 0.0296, "step": 111160 }, { "epoch": 1.1877771248464128, "grad_norm": 2.0026695728302, "learning_rate": 9.9922481492984e-07, "loss": 0.0232, "step": 111170 }, { "epoch": 1.1878839681606923, "grad_norm": 7.877251148223877, "learning_rate": 9.992238794667565e-07, "loss": 0.0225, "step": 111180 }, { "epoch": 1.187990811474972, "grad_norm": 1.6817362308502197, "learning_rate": 9.992229434400112e-07, "loss": 0.0588, "step": 111190 }, { "epoch": 1.1880976547892517, "grad_norm": 0.778859555721283, "learning_rate": 9.992220068496057e-07, "loss": 0.0133, "step": 111200 }, { "epoch": 1.1882044981035311, "grad_norm": 4.263548851013184, "learning_rate": 9.992210696955409e-07, "loss": 0.0274, "step": 111210 }, { "epoch": 1.1883113414178108, "grad_norm": 5.00650691986084, "learning_rate": 9.992201319778178e-07, "loss": 0.0297, "step": 111220 }, { "epoch": 1.1884181847320905, "grad_norm": 0.44786885380744934, "learning_rate": 9.992191936964374e-07, "loss": 0.0263, "step": 111230 }, { "epoch": 1.18852502804637, "grad_norm": 0.7521165609359741, "learning_rate": 9.99218254851401e-07, "loss": 0.0218, "step": 111240 }, { "epoch": 1.1886318713606496, "grad_norm": 5.765719890594482, "learning_rate": 9.992173154427095e-07, "loss": 0.018, "step": 111250 }, { "epoch": 1.1887387146749293, "grad_norm": 1.1903398036956787, "learning_rate": 9.992163754703639e-07, "loss": 0.0228, "step": 111260 }, { "epoch": 1.1888455579892088, "grad_norm": 1.2340631484985352, "learning_rate": 9.992154349343654e-07, "loss": 0.0485, "step": 111270 }, { "epoch": 1.1889524013034884, "grad_norm": 0.9936724901199341, "learning_rate": 9.992144938347148e-07, "loss": 0.0243, "step": 111280 }, { "epoch": 1.1890592446177681, "grad_norm": 8.060953140258789, "learning_rate": 9.992135521714136e-07, "loss": 0.0356, "step": 111290 }, { "epoch": 1.1891660879320476, "grad_norm": 0.006504238583147526, "learning_rate": 9.992126099444625e-07, "loss": 0.0403, "step": 111300 }, { "epoch": 1.1892729312463273, "grad_norm": 1.1986448764801025, "learning_rate": 9.992116671538627e-07, "loss": 0.014, "step": 111310 }, { "epoch": 1.189379774560607, "grad_norm": 1.1754266023635864, "learning_rate": 9.992107237996153e-07, "loss": 0.0606, "step": 111320 }, { "epoch": 1.1894866178748864, "grad_norm": 0.27909427881240845, "learning_rate": 9.992097798817212e-07, "loss": 0.0366, "step": 111330 }, { "epoch": 1.189593461189166, "grad_norm": 3.365719795227051, "learning_rate": 9.992088354001818e-07, "loss": 0.0813, "step": 111340 }, { "epoch": 1.1897003045034458, "grad_norm": 6.48870849609375, "learning_rate": 9.992078903549976e-07, "loss": 0.0271, "step": 111350 }, { "epoch": 1.1898071478177252, "grad_norm": 1.5123833417892456, "learning_rate": 9.992069447461705e-07, "loss": 0.0322, "step": 111360 }, { "epoch": 1.189913991132005, "grad_norm": 2.679868698120117, "learning_rate": 9.992059985737008e-07, "loss": 0.0498, "step": 111370 }, { "epoch": 1.1900208344462846, "grad_norm": 11.897160530090332, "learning_rate": 9.992050518375897e-07, "loss": 0.0177, "step": 111380 }, { "epoch": 1.190127677760564, "grad_norm": 0.2894873321056366, "learning_rate": 9.992041045378386e-07, "loss": 0.0599, "step": 111390 }, { "epoch": 1.1902345210748437, "grad_norm": 1.3484362363815308, "learning_rate": 9.992031566744484e-07, "loss": 0.0424, "step": 111400 }, { "epoch": 1.1903413643891234, "grad_norm": 5.815890312194824, "learning_rate": 9.9920220824742e-07, "loss": 0.0254, "step": 111410 }, { "epoch": 1.1904482077034029, "grad_norm": 9.852958679199219, "learning_rate": 9.992012592567549e-07, "loss": 0.0288, "step": 111420 }, { "epoch": 1.1905550510176826, "grad_norm": 0.9701012372970581, "learning_rate": 9.992003097024538e-07, "loss": 0.0273, "step": 111430 }, { "epoch": 1.1906618943319622, "grad_norm": 3.5145018100738525, "learning_rate": 9.991993595845177e-07, "loss": 0.0274, "step": 111440 }, { "epoch": 1.1907687376462417, "grad_norm": 0.40599262714385986, "learning_rate": 9.99198408902948e-07, "loss": 0.0165, "step": 111450 }, { "epoch": 1.1908755809605214, "grad_norm": 1.4958590269088745, "learning_rate": 9.991974576577455e-07, "loss": 0.0305, "step": 111460 }, { "epoch": 1.190982424274801, "grad_norm": 2.842348337173462, "learning_rate": 9.991965058489115e-07, "loss": 0.0765, "step": 111470 }, { "epoch": 1.1910892675890805, "grad_norm": 2.3755078315734863, "learning_rate": 9.99195553476447e-07, "loss": 0.0424, "step": 111480 }, { "epoch": 1.1911961109033602, "grad_norm": 0.12526935338974, "learning_rate": 9.99194600540353e-07, "loss": 0.0619, "step": 111490 }, { "epoch": 1.1913029542176399, "grad_norm": 3.9046781063079834, "learning_rate": 9.991936470406307e-07, "loss": 0.028, "step": 111500 }, { "epoch": 1.1914097975319193, "grad_norm": 0.03607390820980072, "learning_rate": 9.99192692977281e-07, "loss": 0.0271, "step": 111510 }, { "epoch": 1.191516640846199, "grad_norm": 3.081956386566162, "learning_rate": 9.99191738350305e-07, "loss": 0.0194, "step": 111520 }, { "epoch": 1.1916234841604787, "grad_norm": 0.05206216126680374, "learning_rate": 9.99190783159704e-07, "loss": 0.097, "step": 111530 }, { "epoch": 1.1917303274747582, "grad_norm": 0.9748629331588745, "learning_rate": 9.991898274054789e-07, "loss": 0.0449, "step": 111540 }, { "epoch": 1.1918371707890378, "grad_norm": 5.858795642852783, "learning_rate": 9.99188871087631e-07, "loss": 0.0194, "step": 111550 }, { "epoch": 1.1919440141033175, "grad_norm": 10.108448028564453, "learning_rate": 9.991879142061607e-07, "loss": 0.0332, "step": 111560 }, { "epoch": 1.192050857417597, "grad_norm": 0.0980866551399231, "learning_rate": 9.9918695676107e-07, "loss": 0.0123, "step": 111570 }, { "epoch": 1.1921577007318767, "grad_norm": 3.5034658908843994, "learning_rate": 9.991859987523594e-07, "loss": 0.0291, "step": 111580 }, { "epoch": 1.1922645440461563, "grad_norm": 0.9659008979797363, "learning_rate": 9.991850401800301e-07, "loss": 0.0134, "step": 111590 }, { "epoch": 1.1923713873604358, "grad_norm": 5.1285624504089355, "learning_rate": 9.991840810440833e-07, "loss": 0.0169, "step": 111600 }, { "epoch": 1.1924782306747155, "grad_norm": 1.1253907680511475, "learning_rate": 9.991831213445198e-07, "loss": 0.0613, "step": 111610 }, { "epoch": 1.1925850739889952, "grad_norm": 0.46924471855163574, "learning_rate": 9.99182161081341e-07, "loss": 0.0619, "step": 111620 }, { "epoch": 1.1926919173032748, "grad_norm": 0.2203158438205719, "learning_rate": 9.99181200254548e-07, "loss": 0.009, "step": 111630 }, { "epoch": 1.1927987606175543, "grad_norm": 2.8545243740081787, "learning_rate": 9.991802388641415e-07, "loss": 0.0668, "step": 111640 }, { "epoch": 1.192905603931834, "grad_norm": 4.970858097076416, "learning_rate": 9.99179276910123e-07, "loss": 0.0148, "step": 111650 }, { "epoch": 1.1930124472461137, "grad_norm": 7.485445499420166, "learning_rate": 9.991783143924935e-07, "loss": 0.0503, "step": 111660 }, { "epoch": 1.1931192905603931, "grad_norm": 16.284244537353516, "learning_rate": 9.99177351311254e-07, "loss": 0.0684, "step": 111670 }, { "epoch": 1.1932261338746728, "grad_norm": 8.467252731323242, "learning_rate": 9.991763876664054e-07, "loss": 0.087, "step": 111680 }, { "epoch": 1.1933329771889525, "grad_norm": 2.1040947437286377, "learning_rate": 9.99175423457949e-07, "loss": 0.0412, "step": 111690 }, { "epoch": 1.193439820503232, "grad_norm": 0.7526304125785828, "learning_rate": 9.99174458685886e-07, "loss": 0.0106, "step": 111700 }, { "epoch": 1.1935466638175116, "grad_norm": 0.6891265511512756, "learning_rate": 9.991734933502174e-07, "loss": 0.031, "step": 111710 }, { "epoch": 1.1936535071317913, "grad_norm": 5.2325568199157715, "learning_rate": 9.991725274509441e-07, "loss": 0.0754, "step": 111720 }, { "epoch": 1.1937603504460708, "grad_norm": 10.3658447265625, "learning_rate": 9.991715609880674e-07, "loss": 0.0436, "step": 111730 }, { "epoch": 1.1938671937603504, "grad_norm": 11.973484992980957, "learning_rate": 9.991705939615885e-07, "loss": 0.0801, "step": 111740 }, { "epoch": 1.1939740370746301, "grad_norm": 3.9821548461914062, "learning_rate": 9.99169626371508e-07, "loss": 0.0171, "step": 111750 }, { "epoch": 1.1940808803889096, "grad_norm": 5.979043006896973, "learning_rate": 9.991686582178276e-07, "loss": 0.0237, "step": 111760 }, { "epoch": 1.1941877237031893, "grad_norm": 7.463300704956055, "learning_rate": 9.991676895005479e-07, "loss": 0.162, "step": 111770 }, { "epoch": 1.194294567017469, "grad_norm": 10.316340446472168, "learning_rate": 9.991667202196702e-07, "loss": 0.0219, "step": 111780 }, { "epoch": 1.1944014103317484, "grad_norm": 3.3559014797210693, "learning_rate": 9.991657503751957e-07, "loss": 0.0428, "step": 111790 }, { "epoch": 1.194508253646028, "grad_norm": 4.501441478729248, "learning_rate": 9.991647799671254e-07, "loss": 0.0311, "step": 111800 }, { "epoch": 1.1946150969603078, "grad_norm": 6.912017345428467, "learning_rate": 9.991638089954605e-07, "loss": 0.092, "step": 111810 }, { "epoch": 1.1947219402745872, "grad_norm": 1.329309344291687, "learning_rate": 9.99162837460202e-07, "loss": 0.0086, "step": 111820 }, { "epoch": 1.194828783588867, "grad_norm": 2.944464683532715, "learning_rate": 9.991618653613508e-07, "loss": 0.049, "step": 111830 }, { "epoch": 1.1949356269031466, "grad_norm": 0.2512040436267853, "learning_rate": 9.991608926989082e-07, "loss": 0.0629, "step": 111840 }, { "epoch": 1.1950424702174263, "grad_norm": 12.53732967376709, "learning_rate": 9.991599194728753e-07, "loss": 0.0216, "step": 111850 }, { "epoch": 1.1951493135317057, "grad_norm": 2.9755024909973145, "learning_rate": 9.991589456832531e-07, "loss": 0.0754, "step": 111860 }, { "epoch": 1.1952561568459854, "grad_norm": 2.233595609664917, "learning_rate": 9.991579713300428e-07, "loss": 0.0341, "step": 111870 }, { "epoch": 1.195363000160265, "grad_norm": 5.3037190437316895, "learning_rate": 9.991569964132457e-07, "loss": 0.0325, "step": 111880 }, { "epoch": 1.1954698434745445, "grad_norm": 1.5182621479034424, "learning_rate": 9.991560209328624e-07, "loss": 0.0604, "step": 111890 }, { "epoch": 1.1955766867888242, "grad_norm": 1.791164755821228, "learning_rate": 9.991550448888947e-07, "loss": 0.028, "step": 111900 }, { "epoch": 1.195683530103104, "grad_norm": 4.901021480560303, "learning_rate": 9.991540682813428e-07, "loss": 0.0102, "step": 111910 }, { "epoch": 1.1957903734173834, "grad_norm": 0.1334352195262909, "learning_rate": 9.991530911102084e-07, "loss": 0.0469, "step": 111920 }, { "epoch": 1.195897216731663, "grad_norm": 1.068585991859436, "learning_rate": 9.991521133754926e-07, "loss": 0.0065, "step": 111930 }, { "epoch": 1.1960040600459427, "grad_norm": 3.754286766052246, "learning_rate": 9.991511350771964e-07, "loss": 0.0545, "step": 111940 }, { "epoch": 1.1961109033602222, "grad_norm": 4.759907245635986, "learning_rate": 9.991501562153208e-07, "loss": 0.0288, "step": 111950 }, { "epoch": 1.1962177466745019, "grad_norm": 2.363894462585449, "learning_rate": 9.99149176789867e-07, "loss": 0.0156, "step": 111960 }, { "epoch": 1.1963245899887816, "grad_norm": 3.8483235836029053, "learning_rate": 9.991481968008362e-07, "loss": 0.0431, "step": 111970 }, { "epoch": 1.196431433303061, "grad_norm": 0.04899049550294876, "learning_rate": 9.991472162482294e-07, "loss": 0.0049, "step": 111980 }, { "epoch": 1.1965382766173407, "grad_norm": 0.2627503573894501, "learning_rate": 9.991462351320476e-07, "loss": 0.0376, "step": 111990 }, { "epoch": 1.1966451199316204, "grad_norm": 0.2915774881839752, "learning_rate": 9.991452534522921e-07, "loss": 0.0303, "step": 112000 }, { "epoch": 1.1967519632458998, "grad_norm": 4.843441486358643, "learning_rate": 9.99144271208964e-07, "loss": 0.0353, "step": 112010 }, { "epoch": 1.1968588065601795, "grad_norm": 7.6985249519348145, "learning_rate": 9.991432884020643e-07, "loss": 0.1024, "step": 112020 }, { "epoch": 1.1969656498744592, "grad_norm": 0.34485650062561035, "learning_rate": 9.99142305031594e-07, "loss": 0.0198, "step": 112030 }, { "epoch": 1.1970724931887387, "grad_norm": 0.02595965564250946, "learning_rate": 9.991413210975549e-07, "loss": 0.0204, "step": 112040 }, { "epoch": 1.1971793365030183, "grad_norm": 3.6917269229888916, "learning_rate": 9.99140336599947e-07, "loss": 0.0254, "step": 112050 }, { "epoch": 1.197286179817298, "grad_norm": 5.256210803985596, "learning_rate": 9.991393515387723e-07, "loss": 0.039, "step": 112060 }, { "epoch": 1.1973930231315775, "grad_norm": 3.3174967765808105, "learning_rate": 9.991383659140314e-07, "loss": 0.0347, "step": 112070 }, { "epoch": 1.1974998664458572, "grad_norm": 8.92633056640625, "learning_rate": 9.991373797257256e-07, "loss": 0.0336, "step": 112080 }, { "epoch": 1.1976067097601368, "grad_norm": 0.04405565932393074, "learning_rate": 9.991363929738562e-07, "loss": 0.0138, "step": 112090 }, { "epoch": 1.1977135530744163, "grad_norm": 0.6081715822219849, "learning_rate": 9.99135405658424e-07, "loss": 0.0184, "step": 112100 }, { "epoch": 1.197820396388696, "grad_norm": 0.39270272850990295, "learning_rate": 9.991344177794303e-07, "loss": 0.0287, "step": 112110 }, { "epoch": 1.1979272397029757, "grad_norm": 2.2775919437408447, "learning_rate": 9.991334293368764e-07, "loss": 0.0482, "step": 112120 }, { "epoch": 1.1980340830172551, "grad_norm": 1.3693790435791016, "learning_rate": 9.991324403307628e-07, "loss": 0.053, "step": 112130 }, { "epoch": 1.1981409263315348, "grad_norm": 0.9788110852241516, "learning_rate": 9.991314507610913e-07, "loss": 0.0198, "step": 112140 }, { "epoch": 1.1982477696458145, "grad_norm": 0.12385603785514832, "learning_rate": 9.991304606278626e-07, "loss": 0.0394, "step": 112150 }, { "epoch": 1.198354612960094, "grad_norm": 0.03739212080836296, "learning_rate": 9.991294699310778e-07, "loss": 0.0267, "step": 112160 }, { "epoch": 1.1984614562743736, "grad_norm": 1.1311639547348022, "learning_rate": 9.991284786707384e-07, "loss": 0.0256, "step": 112170 }, { "epoch": 1.1985682995886533, "grad_norm": 1.0364176034927368, "learning_rate": 9.991274868468452e-07, "loss": 0.0567, "step": 112180 }, { "epoch": 1.1986751429029328, "grad_norm": 5.652348518371582, "learning_rate": 9.991264944593994e-07, "loss": 0.0505, "step": 112190 }, { "epoch": 1.1987819862172124, "grad_norm": 3.397379159927368, "learning_rate": 9.991255015084018e-07, "loss": 0.0253, "step": 112200 }, { "epoch": 1.1988888295314921, "grad_norm": 0.03925101086497307, "learning_rate": 9.991245079938542e-07, "loss": 0.0222, "step": 112210 }, { "epoch": 1.1989956728457716, "grad_norm": 21.16314697265625, "learning_rate": 9.991235139157575e-07, "loss": 0.0461, "step": 112220 }, { "epoch": 1.1991025161600513, "grad_norm": 7.361425876617432, "learning_rate": 9.991225192741123e-07, "loss": 0.0337, "step": 112230 }, { "epoch": 1.199209359474331, "grad_norm": 0.6609125137329102, "learning_rate": 9.991215240689204e-07, "loss": 0.0356, "step": 112240 }, { "epoch": 1.1993162027886104, "grad_norm": 3.9144527912139893, "learning_rate": 9.991205283001824e-07, "loss": 0.0423, "step": 112250 }, { "epoch": 1.19942304610289, "grad_norm": 7.432694435119629, "learning_rate": 9.991195319678998e-07, "loss": 0.0408, "step": 112260 }, { "epoch": 1.1995298894171698, "grad_norm": 6.934628486633301, "learning_rate": 9.991185350720735e-07, "loss": 0.0696, "step": 112270 }, { "epoch": 1.1996367327314492, "grad_norm": 5.510762691497803, "learning_rate": 9.991175376127048e-07, "loss": 0.093, "step": 112280 }, { "epoch": 1.199743576045729, "grad_norm": 0.015793370082974434, "learning_rate": 9.991165395897946e-07, "loss": 0.0682, "step": 112290 }, { "epoch": 1.1998504193600086, "grad_norm": 0.24315333366394043, "learning_rate": 9.991155410033443e-07, "loss": 0.035, "step": 112300 }, { "epoch": 1.199957262674288, "grad_norm": 0.14941811561584473, "learning_rate": 9.991145418533548e-07, "loss": 0.0197, "step": 112310 }, { "epoch": 1.2000641059885677, "grad_norm": 0.05342896655201912, "learning_rate": 9.991135421398274e-07, "loss": 0.0267, "step": 112320 }, { "epoch": 1.2001709493028474, "grad_norm": 3.324518918991089, "learning_rate": 9.99112541862763e-07, "loss": 0.0245, "step": 112330 }, { "epoch": 1.2002777926171269, "grad_norm": 0.08230460435152054, "learning_rate": 9.99111541022163e-07, "loss": 0.0127, "step": 112340 }, { "epoch": 1.2003846359314065, "grad_norm": 5.070927619934082, "learning_rate": 9.991105396180282e-07, "loss": 0.036, "step": 112350 }, { "epoch": 1.2004914792456862, "grad_norm": 9.945599555969238, "learning_rate": 9.9910953765036e-07, "loss": 0.0326, "step": 112360 }, { "epoch": 1.200598322559966, "grad_norm": 2.796851873397827, "learning_rate": 9.991085351191596e-07, "loss": 0.0863, "step": 112370 }, { "epoch": 1.2007051658742454, "grad_norm": 1.060241460800171, "learning_rate": 9.99107532024428e-07, "loss": 0.034, "step": 112380 }, { "epoch": 1.200812009188525, "grad_norm": 4.4359869956970215, "learning_rate": 9.99106528366166e-07, "loss": 0.0571, "step": 112390 }, { "epoch": 1.2009188525028047, "grad_norm": 13.173296928405762, "learning_rate": 9.991055241443755e-07, "loss": 0.0468, "step": 112400 }, { "epoch": 1.2010256958170842, "grad_norm": 3.3651866912841797, "learning_rate": 9.991045193590571e-07, "loss": 0.0188, "step": 112410 }, { "epoch": 1.2011325391313639, "grad_norm": 1.9077095985412598, "learning_rate": 9.991035140102118e-07, "loss": 0.1008, "step": 112420 }, { "epoch": 1.2012393824456435, "grad_norm": 2.1601290702819824, "learning_rate": 9.99102508097841e-07, "loss": 0.0271, "step": 112430 }, { "epoch": 1.201346225759923, "grad_norm": 2.268325090408325, "learning_rate": 9.99101501621946e-07, "loss": 0.0385, "step": 112440 }, { "epoch": 1.2014530690742027, "grad_norm": 0.19867773354053497, "learning_rate": 9.991004945825276e-07, "loss": 0.0236, "step": 112450 }, { "epoch": 1.2015599123884824, "grad_norm": 1.7289671897888184, "learning_rate": 9.990994869795872e-07, "loss": 0.0319, "step": 112460 }, { "epoch": 1.2016667557027618, "grad_norm": 6.24974250793457, "learning_rate": 9.990984788131257e-07, "loss": 0.0287, "step": 112470 }, { "epoch": 1.2017735990170415, "grad_norm": 0.36513546109199524, "learning_rate": 9.990974700831444e-07, "loss": 0.016, "step": 112480 }, { "epoch": 1.2018804423313212, "grad_norm": 2.9693849086761475, "learning_rate": 9.99096460789644e-07, "loss": 0.0627, "step": 112490 }, { "epoch": 1.2019872856456006, "grad_norm": 3.4044477939605713, "learning_rate": 9.990954509326266e-07, "loss": 0.0143, "step": 112500 }, { "epoch": 1.2020941289598803, "grad_norm": 2.606034994125366, "learning_rate": 9.990944405120927e-07, "loss": 0.0226, "step": 112510 }, { "epoch": 1.20220097227416, "grad_norm": 2.4229423999786377, "learning_rate": 9.990934295280432e-07, "loss": 0.0244, "step": 112520 }, { "epoch": 1.2023078155884395, "grad_norm": 2.8677051067352295, "learning_rate": 9.990924179804796e-07, "loss": 0.0249, "step": 112530 }, { "epoch": 1.2024146589027191, "grad_norm": 0.3736404776573181, "learning_rate": 9.990914058694032e-07, "loss": 0.018, "step": 112540 }, { "epoch": 1.2025215022169988, "grad_norm": 5.293300151824951, "learning_rate": 9.99090393194815e-07, "loss": 0.0647, "step": 112550 }, { "epoch": 1.2026283455312783, "grad_norm": 8.296341896057129, "learning_rate": 9.990893799567157e-07, "loss": 0.0199, "step": 112560 }, { "epoch": 1.202735188845558, "grad_norm": 9.403022766113281, "learning_rate": 9.99088366155107e-07, "loss": 0.0543, "step": 112570 }, { "epoch": 1.2028420321598376, "grad_norm": 1.1237190961837769, "learning_rate": 9.9908735178999e-07, "loss": 0.0247, "step": 112580 }, { "epoch": 1.2029488754741173, "grad_norm": 7.389072418212891, "learning_rate": 9.990863368613655e-07, "loss": 0.0172, "step": 112590 }, { "epoch": 1.2030557187883968, "grad_norm": 0.5608544945716858, "learning_rate": 9.99085321369235e-07, "loss": 0.0464, "step": 112600 }, { "epoch": 1.2031625621026765, "grad_norm": 0.12268418818712234, "learning_rate": 9.990843053135996e-07, "loss": 0.0602, "step": 112610 }, { "epoch": 1.2032694054169562, "grad_norm": 2.3707265853881836, "learning_rate": 9.990832886944601e-07, "loss": 0.0375, "step": 112620 }, { "epoch": 1.2033762487312356, "grad_norm": 0.019715294241905212, "learning_rate": 9.99082271511818e-07, "loss": 0.0264, "step": 112630 }, { "epoch": 1.2034830920455153, "grad_norm": 4.987304210662842, "learning_rate": 9.990812537656743e-07, "loss": 0.0163, "step": 112640 }, { "epoch": 1.203589935359795, "grad_norm": 0.11597326397895813, "learning_rate": 9.990802354560303e-07, "loss": 0.0612, "step": 112650 }, { "epoch": 1.2036967786740744, "grad_norm": 18.39890480041504, "learning_rate": 9.99079216582887e-07, "loss": 0.092, "step": 112660 }, { "epoch": 1.2038036219883541, "grad_norm": 0.07118372619152069, "learning_rate": 9.990781971462456e-07, "loss": 0.0206, "step": 112670 }, { "epoch": 1.2039104653026338, "grad_norm": 2.550183057785034, "learning_rate": 9.990771771461073e-07, "loss": 0.0483, "step": 112680 }, { "epoch": 1.2040173086169133, "grad_norm": 2.4852800369262695, "learning_rate": 9.990761565824732e-07, "loss": 0.0348, "step": 112690 }, { "epoch": 1.204124151931193, "grad_norm": 0.07186152786016464, "learning_rate": 9.990751354553444e-07, "loss": 0.0302, "step": 112700 }, { "epoch": 1.2042309952454726, "grad_norm": 6.368832588195801, "learning_rate": 9.99074113764722e-07, "loss": 0.03, "step": 112710 }, { "epoch": 1.204337838559752, "grad_norm": 0.698776125907898, "learning_rate": 9.990730915106076e-07, "loss": 0.0472, "step": 112720 }, { "epoch": 1.2044446818740318, "grad_norm": 4.655444145202637, "learning_rate": 9.990720686930017e-07, "loss": 0.0986, "step": 112730 }, { "epoch": 1.2045515251883114, "grad_norm": 5.723079204559326, "learning_rate": 9.990710453119057e-07, "loss": 0.05, "step": 112740 }, { "epoch": 1.204658368502591, "grad_norm": 0.256544828414917, "learning_rate": 9.99070021367321e-07, "loss": 0.0422, "step": 112750 }, { "epoch": 1.2047652118168706, "grad_norm": 0.12793686985969543, "learning_rate": 9.990689968592487e-07, "loss": 0.0158, "step": 112760 }, { "epoch": 1.2048720551311503, "grad_norm": 0.01930883154273033, "learning_rate": 9.990679717876897e-07, "loss": 0.0605, "step": 112770 }, { "epoch": 1.2049788984454297, "grad_norm": 5.923487186431885, "learning_rate": 9.990669461526453e-07, "loss": 0.0477, "step": 112780 }, { "epoch": 1.2050857417597094, "grad_norm": 0.35004037618637085, "learning_rate": 9.990659199541166e-07, "loss": 0.0263, "step": 112790 }, { "epoch": 1.205192585073989, "grad_norm": 0.31422847509384155, "learning_rate": 9.990648931921049e-07, "loss": 0.0394, "step": 112800 }, { "epoch": 1.2052994283882685, "grad_norm": 0.2542993724346161, "learning_rate": 9.990638658666113e-07, "loss": 0.0746, "step": 112810 }, { "epoch": 1.2054062717025482, "grad_norm": 1.6641933917999268, "learning_rate": 9.990628379776366e-07, "loss": 0.0386, "step": 112820 }, { "epoch": 1.205513115016828, "grad_norm": 3.762518882751465, "learning_rate": 9.990618095251826e-07, "loss": 0.0641, "step": 112830 }, { "epoch": 1.2056199583311074, "grad_norm": 2.046438455581665, "learning_rate": 9.9906078050925e-07, "loss": 0.0405, "step": 112840 }, { "epoch": 1.205726801645387, "grad_norm": 0.5010690093040466, "learning_rate": 9.990597509298404e-07, "loss": 0.0276, "step": 112850 }, { "epoch": 1.2058336449596667, "grad_norm": 1.6412608623504639, "learning_rate": 9.990587207869545e-07, "loss": 0.0793, "step": 112860 }, { "epoch": 1.2059404882739462, "grad_norm": 3.307684898376465, "learning_rate": 9.990576900805936e-07, "loss": 0.0239, "step": 112870 }, { "epoch": 1.2060473315882259, "grad_norm": 0.1337975561618805, "learning_rate": 9.990566588107588e-07, "loss": 0.0127, "step": 112880 }, { "epoch": 1.2061541749025055, "grad_norm": 0.011352844536304474, "learning_rate": 9.990556269774515e-07, "loss": 0.0543, "step": 112890 }, { "epoch": 1.206261018216785, "grad_norm": 0.13487419486045837, "learning_rate": 9.990545945806728e-07, "loss": 0.0083, "step": 112900 }, { "epoch": 1.2063678615310647, "grad_norm": 0.09188246726989746, "learning_rate": 9.990535616204236e-07, "loss": 0.0129, "step": 112910 }, { "epoch": 1.2064747048453444, "grad_norm": 2.2587034702301025, "learning_rate": 9.990525280967054e-07, "loss": 0.0292, "step": 112920 }, { "epoch": 1.2065815481596238, "grad_norm": 0.7572662234306335, "learning_rate": 9.990514940095193e-07, "loss": 0.0058, "step": 112930 }, { "epoch": 1.2066883914739035, "grad_norm": 0.6987883448600769, "learning_rate": 9.990504593588663e-07, "loss": 0.0469, "step": 112940 }, { "epoch": 1.2067952347881832, "grad_norm": 0.00457696383818984, "learning_rate": 9.990494241447476e-07, "loss": 0.0563, "step": 112950 }, { "epoch": 1.2069020781024626, "grad_norm": 3.6053967475891113, "learning_rate": 9.990483883671645e-07, "loss": 0.0196, "step": 112960 }, { "epoch": 1.2070089214167423, "grad_norm": 2.8257665634155273, "learning_rate": 9.990473520261182e-07, "loss": 0.0321, "step": 112970 }, { "epoch": 1.207115764731022, "grad_norm": 0.8755645155906677, "learning_rate": 9.990463151216096e-07, "loss": 0.0145, "step": 112980 }, { "epoch": 1.2072226080453015, "grad_norm": 6.4490275382995605, "learning_rate": 9.990452776536402e-07, "loss": 0.0309, "step": 112990 }, { "epoch": 1.2073294513595811, "grad_norm": 0.7038703560829163, "learning_rate": 9.990442396222109e-07, "loss": 0.0498, "step": 113000 }, { "epoch": 1.2074362946738608, "grad_norm": 8.233394622802734, "learning_rate": 9.990432010273232e-07, "loss": 0.0348, "step": 113010 }, { "epoch": 1.2075431379881403, "grad_norm": 0.4301779568195343, "learning_rate": 9.990421618689777e-07, "loss": 0.0205, "step": 113020 }, { "epoch": 1.20764998130242, "grad_norm": 1.7874740362167358, "learning_rate": 9.990411221471762e-07, "loss": 0.024, "step": 113030 }, { "epoch": 1.2077568246166996, "grad_norm": 2.939687728881836, "learning_rate": 9.990400818619197e-07, "loss": 0.016, "step": 113040 }, { "epoch": 1.207863667930979, "grad_norm": 0.05713154375553131, "learning_rate": 9.99039041013209e-07, "loss": 0.0534, "step": 113050 }, { "epoch": 1.2079705112452588, "grad_norm": 4.712235450744629, "learning_rate": 9.990379996010457e-07, "loss": 0.0288, "step": 113060 }, { "epoch": 1.2080773545595385, "grad_norm": 0.8779155015945435, "learning_rate": 9.990369576254307e-07, "loss": 0.0091, "step": 113070 }, { "epoch": 1.208184197873818, "grad_norm": 0.10112139582633972, "learning_rate": 9.990359150863654e-07, "loss": 0.0547, "step": 113080 }, { "epoch": 1.2082910411880976, "grad_norm": 0.05907206982374191, "learning_rate": 9.99034871983851e-07, "loss": 0.0096, "step": 113090 }, { "epoch": 1.2083978845023773, "grad_norm": 0.10427852720022202, "learning_rate": 9.990338283178885e-07, "loss": 0.061, "step": 113100 }, { "epoch": 1.208504727816657, "grad_norm": 5.630423545837402, "learning_rate": 9.990327840884792e-07, "loss": 0.0624, "step": 113110 }, { "epoch": 1.2086115711309364, "grad_norm": 0.20044568181037903, "learning_rate": 9.99031739295624e-07, "loss": 0.0154, "step": 113120 }, { "epoch": 1.208718414445216, "grad_norm": 0.21254342794418335, "learning_rate": 9.990306939393245e-07, "loss": 0.0715, "step": 113130 }, { "epoch": 1.2088252577594958, "grad_norm": 3.0454585552215576, "learning_rate": 9.990296480195818e-07, "loss": 0.062, "step": 113140 }, { "epoch": 1.2089321010737752, "grad_norm": 0.2024238258600235, "learning_rate": 9.990286015363967e-07, "loss": 0.0141, "step": 113150 }, { "epoch": 1.209038944388055, "grad_norm": 1.6444331407546997, "learning_rate": 9.990275544897706e-07, "loss": 0.0432, "step": 113160 }, { "epoch": 1.2091457877023346, "grad_norm": 6.357694625854492, "learning_rate": 9.99026506879705e-07, "loss": 0.0275, "step": 113170 }, { "epoch": 1.209252631016614, "grad_norm": 0.2473645657300949, "learning_rate": 9.990254587062007e-07, "loss": 0.031, "step": 113180 }, { "epoch": 1.2093594743308937, "grad_norm": 9.449687004089355, "learning_rate": 9.99024409969259e-07, "loss": 0.0523, "step": 113190 }, { "epoch": 1.2094663176451734, "grad_norm": 11.942605018615723, "learning_rate": 9.990233606688809e-07, "loss": 0.0449, "step": 113200 }, { "epoch": 1.2095731609594529, "grad_norm": 0.9493213891983032, "learning_rate": 9.99022310805068e-07, "loss": 0.0395, "step": 113210 }, { "epoch": 1.2096800042737326, "grad_norm": 14.511758804321289, "learning_rate": 9.990212603778213e-07, "loss": 0.0299, "step": 113220 }, { "epoch": 1.2097868475880122, "grad_norm": 13.420381546020508, "learning_rate": 9.990202093871417e-07, "loss": 0.0979, "step": 113230 }, { "epoch": 1.2098936909022917, "grad_norm": 6.57431173324585, "learning_rate": 9.990191578330307e-07, "loss": 0.0217, "step": 113240 }, { "epoch": 1.2100005342165714, "grad_norm": 0.053330838680267334, "learning_rate": 9.990181057154894e-07, "loss": 0.0652, "step": 113250 }, { "epoch": 1.210107377530851, "grad_norm": 9.342782020568848, "learning_rate": 9.99017053034519e-07, "loss": 0.0222, "step": 113260 }, { "epoch": 1.2102142208451305, "grad_norm": 10.327353477478027, "learning_rate": 9.990159997901208e-07, "loss": 0.0872, "step": 113270 }, { "epoch": 1.2103210641594102, "grad_norm": 2.245485305786133, "learning_rate": 9.990149459822958e-07, "loss": 0.0296, "step": 113280 }, { "epoch": 1.21042790747369, "grad_norm": 3.8795621395111084, "learning_rate": 9.990138916110453e-07, "loss": 0.0528, "step": 113290 }, { "epoch": 1.2105347507879693, "grad_norm": 5.036981582641602, "learning_rate": 9.990128366763703e-07, "loss": 0.0579, "step": 113300 }, { "epoch": 1.210641594102249, "grad_norm": 3.3306961059570312, "learning_rate": 9.990117811782724e-07, "loss": 0.0557, "step": 113310 }, { "epoch": 1.2107484374165287, "grad_norm": 0.4787260890007019, "learning_rate": 9.990107251167524e-07, "loss": 0.0217, "step": 113320 }, { "epoch": 1.2108552807308084, "grad_norm": 1.7722337245941162, "learning_rate": 9.990096684918115e-07, "loss": 0.0137, "step": 113330 }, { "epoch": 1.2109621240450879, "grad_norm": 0.5250531435012817, "learning_rate": 9.990086113034511e-07, "loss": 0.0242, "step": 113340 }, { "epoch": 1.2110689673593675, "grad_norm": 6.565034866333008, "learning_rate": 9.990075535516724e-07, "loss": 0.0492, "step": 113350 }, { "epoch": 1.2111758106736472, "grad_norm": 0.030498716980218887, "learning_rate": 9.990064952364764e-07, "loss": 0.0146, "step": 113360 }, { "epoch": 1.2112826539879267, "grad_norm": 2.6310231685638428, "learning_rate": 9.990054363578645e-07, "loss": 0.0084, "step": 113370 }, { "epoch": 1.2113894973022064, "grad_norm": 5.875309944152832, "learning_rate": 9.990043769158377e-07, "loss": 0.0648, "step": 113380 }, { "epoch": 1.211496340616486, "grad_norm": 0.16675254702568054, "learning_rate": 9.990033169103974e-07, "loss": 0.0621, "step": 113390 }, { "epoch": 1.2116031839307655, "grad_norm": 7.9408650398254395, "learning_rate": 9.990022563415447e-07, "loss": 0.0724, "step": 113400 }, { "epoch": 1.2117100272450452, "grad_norm": 0.04489199072122574, "learning_rate": 9.990011952092808e-07, "loss": 0.0138, "step": 113410 }, { "epoch": 1.2118168705593249, "grad_norm": 0.13941682875156403, "learning_rate": 9.990001335136067e-07, "loss": 0.0089, "step": 113420 }, { "epoch": 1.2119237138736043, "grad_norm": 3.1586968898773193, "learning_rate": 9.98999071254524e-07, "loss": 0.0175, "step": 113430 }, { "epoch": 1.212030557187884, "grad_norm": 3.6474153995513916, "learning_rate": 9.989980084320335e-07, "loss": 0.0453, "step": 113440 }, { "epoch": 1.2121374005021637, "grad_norm": 1.4338403940200806, "learning_rate": 9.98996945046137e-07, "loss": 0.0891, "step": 113450 }, { "epoch": 1.2122442438164431, "grad_norm": 1.4313098192214966, "learning_rate": 9.989958810968348e-07, "loss": 0.0201, "step": 113460 }, { "epoch": 1.2123510871307228, "grad_norm": 0.03268356993794441, "learning_rate": 9.989948165841287e-07, "loss": 0.0347, "step": 113470 }, { "epoch": 1.2124579304450025, "grad_norm": 0.2195073813199997, "learning_rate": 9.9899375150802e-07, "loss": 0.0448, "step": 113480 }, { "epoch": 1.212564773759282, "grad_norm": 1.356853723526001, "learning_rate": 9.989926858685094e-07, "loss": 0.0187, "step": 113490 }, { "epoch": 1.2126716170735616, "grad_norm": 0.12030210345983505, "learning_rate": 9.989916196655988e-07, "loss": 0.0389, "step": 113500 }, { "epoch": 1.2127784603878413, "grad_norm": 5.019360542297363, "learning_rate": 9.989905528992887e-07, "loss": 0.0513, "step": 113510 }, { "epoch": 1.2128853037021208, "grad_norm": 9.441274642944336, "learning_rate": 9.989894855695807e-07, "loss": 0.0299, "step": 113520 }, { "epoch": 1.2129921470164005, "grad_norm": 0.4231957495212555, "learning_rate": 9.989884176764758e-07, "loss": 0.0376, "step": 113530 }, { "epoch": 1.2130989903306801, "grad_norm": 0.22942467033863068, "learning_rate": 9.989873492199753e-07, "loss": 0.0232, "step": 113540 }, { "epoch": 1.2132058336449596, "grad_norm": 0.285209596157074, "learning_rate": 9.989862802000805e-07, "loss": 0.0231, "step": 113550 }, { "epoch": 1.2133126769592393, "grad_norm": 3.2772443294525146, "learning_rate": 9.989852106167927e-07, "loss": 0.0158, "step": 113560 }, { "epoch": 1.213419520273519, "grad_norm": 0.05845656991004944, "learning_rate": 9.989841404701128e-07, "loss": 0.044, "step": 113570 }, { "epoch": 1.2135263635877984, "grad_norm": 3.5806469917297363, "learning_rate": 9.98983069760042e-07, "loss": 0.0185, "step": 113580 }, { "epoch": 1.213633206902078, "grad_norm": 0.4552938640117645, "learning_rate": 9.98981998486582e-07, "loss": 0.0205, "step": 113590 }, { "epoch": 1.2137400502163578, "grad_norm": 3.653120517730713, "learning_rate": 9.989809266497333e-07, "loss": 0.0481, "step": 113600 }, { "epoch": 1.2138468935306372, "grad_norm": 0.037211909890174866, "learning_rate": 9.989798542494976e-07, "loss": 0.0447, "step": 113610 }, { "epoch": 1.213953736844917, "grad_norm": 4.231884479522705, "learning_rate": 9.98978781285876e-07, "loss": 0.0405, "step": 113620 }, { "epoch": 1.2140605801591966, "grad_norm": 0.047520652413368225, "learning_rate": 9.989777077588697e-07, "loss": 0.0193, "step": 113630 }, { "epoch": 1.214167423473476, "grad_norm": 8.784624099731445, "learning_rate": 9.9897663366848e-07, "loss": 0.0232, "step": 113640 }, { "epoch": 1.2142742667877557, "grad_norm": 0.9352455139160156, "learning_rate": 9.98975559014708e-07, "loss": 0.0374, "step": 113650 }, { "epoch": 1.2143811101020354, "grad_norm": 1.5057584047317505, "learning_rate": 9.989744837975547e-07, "loss": 0.0225, "step": 113660 }, { "epoch": 1.2144879534163149, "grad_norm": 3.7338147163391113, "learning_rate": 9.989734080170217e-07, "loss": 0.02, "step": 113670 }, { "epoch": 1.2145947967305946, "grad_norm": 12.75775146484375, "learning_rate": 9.989723316731102e-07, "loss": 0.0659, "step": 113680 }, { "epoch": 1.2147016400448742, "grad_norm": 0.1216815784573555, "learning_rate": 9.989712547658211e-07, "loss": 0.0603, "step": 113690 }, { "epoch": 1.2148084833591537, "grad_norm": 0.8115535378456116, "learning_rate": 9.989701772951558e-07, "loss": 0.0381, "step": 113700 }, { "epoch": 1.2149153266734334, "grad_norm": 0.06462322920560837, "learning_rate": 9.989690992611154e-07, "loss": 0.0543, "step": 113710 }, { "epoch": 1.215022169987713, "grad_norm": 0.6467653512954712, "learning_rate": 9.989680206637015e-07, "loss": 0.0574, "step": 113720 }, { "epoch": 1.2151290133019925, "grad_norm": 0.03188489004969597, "learning_rate": 9.98966941502915e-07, "loss": 0.0656, "step": 113730 }, { "epoch": 1.2152358566162722, "grad_norm": 0.2794455289840698, "learning_rate": 9.98965861778757e-07, "loss": 0.0107, "step": 113740 }, { "epoch": 1.2153426999305519, "grad_norm": 6.258123874664307, "learning_rate": 9.98964781491229e-07, "loss": 0.1202, "step": 113750 }, { "epoch": 1.2154495432448313, "grad_norm": 0.0686594620347023, "learning_rate": 9.989637006403323e-07, "loss": 0.0391, "step": 113760 }, { "epoch": 1.215556386559111, "grad_norm": 0.22990907728672028, "learning_rate": 9.989626192260677e-07, "loss": 0.0284, "step": 113770 }, { "epoch": 1.2156632298733907, "grad_norm": 3.906766414642334, "learning_rate": 9.989615372484365e-07, "loss": 0.0633, "step": 113780 }, { "epoch": 1.2157700731876702, "grad_norm": 1.9551597833633423, "learning_rate": 9.989604547074402e-07, "loss": 0.0289, "step": 113790 }, { "epoch": 1.2158769165019498, "grad_norm": 3.0654940605163574, "learning_rate": 9.9895937160308e-07, "loss": 0.0416, "step": 113800 }, { "epoch": 1.2159837598162295, "grad_norm": 1.2218989133834839, "learning_rate": 9.98958287935357e-07, "loss": 0.0226, "step": 113810 }, { "epoch": 1.216090603130509, "grad_norm": 0.024112414568662643, "learning_rate": 9.98957203704272e-07, "loss": 0.0286, "step": 113820 }, { "epoch": 1.2161974464447887, "grad_norm": 0.5744019150733948, "learning_rate": 9.989561189098273e-07, "loss": 0.0404, "step": 113830 }, { "epoch": 1.2163042897590683, "grad_norm": 0.033441998064517975, "learning_rate": 9.989550335520232e-07, "loss": 0.0321, "step": 113840 }, { "epoch": 1.216411133073348, "grad_norm": 3.828369140625, "learning_rate": 9.989539476308613e-07, "loss": 0.0427, "step": 113850 }, { "epoch": 1.2165179763876275, "grad_norm": 0.5568959712982178, "learning_rate": 9.989528611463424e-07, "loss": 0.0392, "step": 113860 }, { "epoch": 1.2166248197019072, "grad_norm": 4.443614482879639, "learning_rate": 9.989517740984683e-07, "loss": 0.04, "step": 113870 }, { "epoch": 1.2167316630161868, "grad_norm": Infinity, "learning_rate": 9.989506864872402e-07, "loss": 0.029, "step": 113880 }, { "epoch": 1.2168385063304663, "grad_norm": 1.1903395652770996, "learning_rate": 9.989495983126587e-07, "loss": 0.0259, "step": 113890 }, { "epoch": 1.216945349644746, "grad_norm": 1.8393690586090088, "learning_rate": 9.989485095747258e-07, "loss": 0.0536, "step": 113900 }, { "epoch": 1.2170521929590257, "grad_norm": 2.0013394355773926, "learning_rate": 9.989474202734423e-07, "loss": 0.0138, "step": 113910 }, { "epoch": 1.2171590362733051, "grad_norm": 0.6613337993621826, "learning_rate": 9.989463304088092e-07, "loss": 0.0313, "step": 113920 }, { "epoch": 1.2172658795875848, "grad_norm": 0.4544159770011902, "learning_rate": 9.989452399808283e-07, "loss": 0.0291, "step": 113930 }, { "epoch": 1.2173727229018645, "grad_norm": 0.01937752775847912, "learning_rate": 9.989441489895006e-07, "loss": 0.0429, "step": 113940 }, { "epoch": 1.217479566216144, "grad_norm": 3.800903558731079, "learning_rate": 9.989430574348272e-07, "loss": 0.0262, "step": 113950 }, { "epoch": 1.2175864095304236, "grad_norm": 0.18694673478603363, "learning_rate": 9.989419653168095e-07, "loss": 0.0237, "step": 113960 }, { "epoch": 1.2176932528447033, "grad_norm": 0.16693535447120667, "learning_rate": 9.989408726354487e-07, "loss": 0.0189, "step": 113970 }, { "epoch": 1.2178000961589828, "grad_norm": 0.035501543432474136, "learning_rate": 9.98939779390746e-07, "loss": 0.0209, "step": 113980 }, { "epoch": 1.2179069394732625, "grad_norm": 3.1563363075256348, "learning_rate": 9.989386855827025e-07, "loss": 0.0135, "step": 113990 }, { "epoch": 1.2180137827875421, "grad_norm": 7.169147491455078, "learning_rate": 9.989375912113197e-07, "loss": 0.0286, "step": 114000 }, { "epoch": 1.2181206261018216, "grad_norm": 12.496789932250977, "learning_rate": 9.989364962765985e-07, "loss": 0.0349, "step": 114010 }, { "epoch": 1.2182274694161013, "grad_norm": 0.6423223614692688, "learning_rate": 9.989354007785403e-07, "loss": 0.0635, "step": 114020 }, { "epoch": 1.218334312730381, "grad_norm": 0.5337278842926025, "learning_rate": 9.989343047171468e-07, "loss": 0.0082, "step": 114030 }, { "epoch": 1.2184411560446604, "grad_norm": 5.972002029418945, "learning_rate": 9.989332080924183e-07, "loss": 0.0307, "step": 114040 }, { "epoch": 1.21854799935894, "grad_norm": 3.3561081886291504, "learning_rate": 9.98932110904357e-07, "loss": 0.0411, "step": 114050 }, { "epoch": 1.2186548426732198, "grad_norm": 2.7134454250335693, "learning_rate": 9.989310131529636e-07, "loss": 0.0133, "step": 114060 }, { "epoch": 1.2187616859874995, "grad_norm": 1.5208170413970947, "learning_rate": 9.989299148382392e-07, "loss": 0.0356, "step": 114070 }, { "epoch": 1.218868529301779, "grad_norm": 2.646327495574951, "learning_rate": 9.989288159601854e-07, "loss": 0.0195, "step": 114080 }, { "epoch": 1.2189753726160586, "grad_norm": 1.4209009408950806, "learning_rate": 9.989277165188035e-07, "loss": 0.0217, "step": 114090 }, { "epoch": 1.2190822159303383, "grad_norm": 3.5297775268554688, "learning_rate": 9.989266165140942e-07, "loss": 0.1199, "step": 114100 }, { "epoch": 1.2191890592446177, "grad_norm": 0.05284018814563751, "learning_rate": 9.989255159460594e-07, "loss": 0.0062, "step": 114110 }, { "epoch": 1.2192959025588974, "grad_norm": 0.0047593265771865845, "learning_rate": 9.989244148146997e-07, "loss": 0.0149, "step": 114120 }, { "epoch": 1.219402745873177, "grad_norm": 1.9031951427459717, "learning_rate": 9.98923313120017e-07, "loss": 0.0637, "step": 114130 }, { "epoch": 1.2195095891874566, "grad_norm": 7.8939971923828125, "learning_rate": 9.98922210862012e-07, "loss": 0.0271, "step": 114140 }, { "epoch": 1.2196164325017362, "grad_norm": 0.5052511692047119, "learning_rate": 9.989211080406864e-07, "loss": 0.0391, "step": 114150 }, { "epoch": 1.219723275816016, "grad_norm": 0.17543308436870575, "learning_rate": 9.98920004656041e-07, "loss": 0.0149, "step": 114160 }, { "epoch": 1.2198301191302954, "grad_norm": 1.6036744117736816, "learning_rate": 9.989189007080774e-07, "loss": 0.0386, "step": 114170 }, { "epoch": 1.219936962444575, "grad_norm": 0.061887774616479874, "learning_rate": 9.989177961967967e-07, "loss": 0.0534, "step": 114180 }, { "epoch": 1.2200438057588547, "grad_norm": 0.010243668220937252, "learning_rate": 9.989166911222e-07, "loss": 0.0135, "step": 114190 }, { "epoch": 1.2201506490731342, "grad_norm": 2.8306148052215576, "learning_rate": 9.98915585484289e-07, "loss": 0.0529, "step": 114200 }, { "epoch": 1.2202574923874139, "grad_norm": 0.046569645404815674, "learning_rate": 9.989144792830644e-07, "loss": 0.0644, "step": 114210 }, { "epoch": 1.2203643357016936, "grad_norm": 0.1515602171421051, "learning_rate": 9.989133725185276e-07, "loss": 0.0126, "step": 114220 }, { "epoch": 1.220471179015973, "grad_norm": 1.3293014764785767, "learning_rate": 9.989122651906802e-07, "loss": 0.0148, "step": 114230 }, { "epoch": 1.2205780223302527, "grad_norm": 0.20974157750606537, "learning_rate": 9.989111572995231e-07, "loss": 0.0564, "step": 114240 }, { "epoch": 1.2206848656445324, "grad_norm": 9.799620628356934, "learning_rate": 9.989100488450578e-07, "loss": 0.0844, "step": 114250 }, { "epoch": 1.2207917089588118, "grad_norm": 0.05675485357642174, "learning_rate": 9.98908939827285e-07, "loss": 0.028, "step": 114260 }, { "epoch": 1.2208985522730915, "grad_norm": 0.31973421573638916, "learning_rate": 9.989078302462068e-07, "loss": 0.0414, "step": 114270 }, { "epoch": 1.2210053955873712, "grad_norm": 3.2347638607025146, "learning_rate": 9.989067201018238e-07, "loss": 0.0523, "step": 114280 }, { "epoch": 1.2211122389016507, "grad_norm": 3.950660467147827, "learning_rate": 9.989056093941373e-07, "loss": 0.0999, "step": 114290 }, { "epoch": 1.2212190822159303, "grad_norm": 7.436350345611572, "learning_rate": 9.98904498123149e-07, "loss": 0.0528, "step": 114300 }, { "epoch": 1.22132592553021, "grad_norm": 0.9100218415260315, "learning_rate": 9.989033862888597e-07, "loss": 0.056, "step": 114310 }, { "epoch": 1.2214327688444895, "grad_norm": 0.10620852559804916, "learning_rate": 9.98902273891271e-07, "loss": 0.0562, "step": 114320 }, { "epoch": 1.2215396121587692, "grad_norm": 0.2163991928100586, "learning_rate": 9.989011609303836e-07, "loss": 0.0522, "step": 114330 }, { "epoch": 1.2216464554730488, "grad_norm": 5.203733921051025, "learning_rate": 9.989000474061995e-07, "loss": 0.0633, "step": 114340 }, { "epoch": 1.2217532987873283, "grad_norm": 3.255995035171509, "learning_rate": 9.988989333187194e-07, "loss": 0.0195, "step": 114350 }, { "epoch": 1.221860142101608, "grad_norm": 0.043940428644418716, "learning_rate": 9.988978186679448e-07, "loss": 0.0095, "step": 114360 }, { "epoch": 1.2219669854158877, "grad_norm": 5.8155107498168945, "learning_rate": 9.988967034538772e-07, "loss": 0.0086, "step": 114370 }, { "epoch": 1.2220738287301671, "grad_norm": 0.1826801598072052, "learning_rate": 9.988955876765172e-07, "loss": 0.0135, "step": 114380 }, { "epoch": 1.2221806720444468, "grad_norm": 0.541773796081543, "learning_rate": 9.988944713358664e-07, "loss": 0.0128, "step": 114390 }, { "epoch": 1.2222875153587265, "grad_norm": 3.317002534866333, "learning_rate": 9.988933544319261e-07, "loss": 0.0167, "step": 114400 }, { "epoch": 1.222394358673006, "grad_norm": 19.492216110229492, "learning_rate": 9.98892236964698e-07, "loss": 0.0669, "step": 114410 }, { "epoch": 1.2225012019872856, "grad_norm": 0.21734362840652466, "learning_rate": 9.988911189341825e-07, "loss": 0.0575, "step": 114420 }, { "epoch": 1.2226080453015653, "grad_norm": 0.11065809428691864, "learning_rate": 9.988900003403813e-07, "loss": 0.0038, "step": 114430 }, { "epoch": 1.2227148886158448, "grad_norm": 3.598909378051758, "learning_rate": 9.988888811832957e-07, "loss": 0.0631, "step": 114440 }, { "epoch": 1.2228217319301244, "grad_norm": 3.6123902797698975, "learning_rate": 9.98887761462927e-07, "loss": 0.0276, "step": 114450 }, { "epoch": 1.2229285752444041, "grad_norm": 2.5220069885253906, "learning_rate": 9.988866411792762e-07, "loss": 0.0234, "step": 114460 }, { "epoch": 1.2230354185586836, "grad_norm": 0.02905610390007496, "learning_rate": 9.988855203323447e-07, "loss": 0.0302, "step": 114470 }, { "epoch": 1.2231422618729633, "grad_norm": 16.876367568969727, "learning_rate": 9.988843989221339e-07, "loss": 0.0967, "step": 114480 }, { "epoch": 1.223249105187243, "grad_norm": 4.69182825088501, "learning_rate": 9.988832769486451e-07, "loss": 0.0629, "step": 114490 }, { "epoch": 1.2233559485015224, "grad_norm": 0.3830995559692383, "learning_rate": 9.98882154411879e-07, "loss": 0.0243, "step": 114500 }, { "epoch": 1.223462791815802, "grad_norm": 0.06372237205505371, "learning_rate": 9.988810313118377e-07, "loss": 0.0459, "step": 114510 }, { "epoch": 1.2235696351300818, "grad_norm": 3.941990613937378, "learning_rate": 9.98879907648522e-07, "loss": 0.0344, "step": 114520 }, { "epoch": 1.2236764784443612, "grad_norm": 0.09500408172607422, "learning_rate": 9.98878783421933e-07, "loss": 0.0351, "step": 114530 }, { "epoch": 1.223783321758641, "grad_norm": 10.058923721313477, "learning_rate": 9.988776586320724e-07, "loss": 0.0287, "step": 114540 }, { "epoch": 1.2238901650729206, "grad_norm": 3.4809963703155518, "learning_rate": 9.988765332789413e-07, "loss": 0.0255, "step": 114550 }, { "epoch": 1.2239970083872, "grad_norm": 2.3061585426330566, "learning_rate": 9.98875407362541e-07, "loss": 0.0563, "step": 114560 }, { "epoch": 1.2241038517014797, "grad_norm": 0.20587345957756042, "learning_rate": 9.988742808828726e-07, "loss": 0.0129, "step": 114570 }, { "epoch": 1.2242106950157594, "grad_norm": 0.6866087913513184, "learning_rate": 9.988731538399374e-07, "loss": 0.0266, "step": 114580 }, { "epoch": 1.224317538330039, "grad_norm": 0.5437314510345459, "learning_rate": 9.988720262337369e-07, "loss": 0.006, "step": 114590 }, { "epoch": 1.2244243816443185, "grad_norm": 4.387390613555908, "learning_rate": 9.98870898064272e-07, "loss": 0.034, "step": 114600 }, { "epoch": 1.2245312249585982, "grad_norm": 0.029254762455821037, "learning_rate": 9.988697693315444e-07, "loss": 0.1021, "step": 114610 }, { "epoch": 1.224638068272878, "grad_norm": 15.670197486877441, "learning_rate": 9.98868640035555e-07, "loss": 0.0444, "step": 114620 }, { "epoch": 1.2247449115871574, "grad_norm": 2.619563579559326, "learning_rate": 9.988675101763056e-07, "loss": 0.0166, "step": 114630 }, { "epoch": 1.224851754901437, "grad_norm": 0.24864572286605835, "learning_rate": 9.988663797537968e-07, "loss": 0.0012, "step": 114640 }, { "epoch": 1.2249585982157167, "grad_norm": 5.872936248779297, "learning_rate": 9.988652487680304e-07, "loss": 0.0299, "step": 114650 }, { "epoch": 1.2250654415299962, "grad_norm": 21.234628677368164, "learning_rate": 9.988641172190074e-07, "loss": 0.0183, "step": 114660 }, { "epoch": 1.2251722848442759, "grad_norm": 0.16121870279312134, "learning_rate": 9.98862985106729e-07, "loss": 0.041, "step": 114670 }, { "epoch": 1.2252791281585556, "grad_norm": 0.22730602324008942, "learning_rate": 9.98861852431197e-07, "loss": 0.0038, "step": 114680 }, { "epoch": 1.225385971472835, "grad_norm": 4.891969680786133, "learning_rate": 9.98860719192412e-07, "loss": 0.0623, "step": 114690 }, { "epoch": 1.2254928147871147, "grad_norm": 6.789350986480713, "learning_rate": 9.988595853903758e-07, "loss": 0.054, "step": 114700 }, { "epoch": 1.2255996581013944, "grad_norm": 3.3937904834747314, "learning_rate": 9.988584510250896e-07, "loss": 0.0134, "step": 114710 }, { "epoch": 1.2257065014156738, "grad_norm": 0.3440197706222534, "learning_rate": 9.988573160965544e-07, "loss": 0.0429, "step": 114720 }, { "epoch": 1.2258133447299535, "grad_norm": 16.300048828125, "learning_rate": 9.988561806047715e-07, "loss": 0.0783, "step": 114730 }, { "epoch": 1.2259201880442332, "grad_norm": 1.4523791074752808, "learning_rate": 9.988550445497424e-07, "loss": 0.0589, "step": 114740 }, { "epoch": 1.2260270313585127, "grad_norm": 2.862708806991577, "learning_rate": 9.988539079314684e-07, "loss": 0.0643, "step": 114750 }, { "epoch": 1.2261338746727923, "grad_norm": 2.6311299800872803, "learning_rate": 9.988527707499505e-07, "loss": 0.013, "step": 114760 }, { "epoch": 1.226240717987072, "grad_norm": 3.725241184234619, "learning_rate": 9.988516330051905e-07, "loss": 0.0677, "step": 114770 }, { "epoch": 1.2263475613013515, "grad_norm": 3.0530152320861816, "learning_rate": 9.98850494697189e-07, "loss": 0.0775, "step": 114780 }, { "epoch": 1.2264544046156312, "grad_norm": 5.367270469665527, "learning_rate": 9.988493558259478e-07, "loss": 0.0106, "step": 114790 }, { "epoch": 1.2265612479299108, "grad_norm": 0.5360437035560608, "learning_rate": 9.98848216391468e-07, "loss": 0.0206, "step": 114800 }, { "epoch": 1.2266680912441905, "grad_norm": 0.039441145956516266, "learning_rate": 9.98847076393751e-07, "loss": 0.0094, "step": 114810 }, { "epoch": 1.22677493455847, "grad_norm": 0.08372621238231659, "learning_rate": 9.988459358327979e-07, "loss": 0.0285, "step": 114820 }, { "epoch": 1.2268817778727497, "grad_norm": 1.8763405084609985, "learning_rate": 9.988447947086101e-07, "loss": 0.0242, "step": 114830 }, { "epoch": 1.2269886211870293, "grad_norm": 2.650050163269043, "learning_rate": 9.98843653021189e-07, "loss": 0.0501, "step": 114840 }, { "epoch": 1.2270954645013088, "grad_norm": 4.845330238342285, "learning_rate": 9.988425107705356e-07, "loss": 0.0092, "step": 114850 }, { "epoch": 1.2272023078155885, "grad_norm": 1.9744784832000732, "learning_rate": 9.988413679566514e-07, "loss": 0.052, "step": 114860 }, { "epoch": 1.2273091511298682, "grad_norm": 0.020062772557139397, "learning_rate": 9.988402245795377e-07, "loss": 0.0415, "step": 114870 }, { "epoch": 1.2274159944441476, "grad_norm": 0.9338213205337524, "learning_rate": 9.988390806391958e-07, "loss": 0.0655, "step": 114880 }, { "epoch": 1.2275228377584273, "grad_norm": 5.115473747253418, "learning_rate": 9.988379361356267e-07, "loss": 0.0122, "step": 114890 }, { "epoch": 1.227629681072707, "grad_norm": 1.6436848640441895, "learning_rate": 9.98836791068832e-07, "loss": 0.0212, "step": 114900 }, { "epoch": 1.2277365243869864, "grad_norm": 0.5335671305656433, "learning_rate": 9.988356454388131e-07, "loss": 0.0498, "step": 114910 }, { "epoch": 1.2278433677012661, "grad_norm": 9.469651222229004, "learning_rate": 9.988344992455711e-07, "loss": 0.0262, "step": 114920 }, { "epoch": 1.2279502110155458, "grad_norm": 1.0060985088348389, "learning_rate": 9.98833352489107e-07, "loss": 0.0316, "step": 114930 }, { "epoch": 1.2280570543298253, "grad_norm": 4.763258934020996, "learning_rate": 9.988322051694226e-07, "loss": 0.0448, "step": 114940 }, { "epoch": 1.228163897644105, "grad_norm": 18.1844482421875, "learning_rate": 9.98831057286519e-07, "loss": 0.0517, "step": 114950 }, { "epoch": 1.2282707409583846, "grad_norm": 0.014464030973613262, "learning_rate": 9.988299088403974e-07, "loss": 0.0112, "step": 114960 }, { "epoch": 1.228377584272664, "grad_norm": 14.067414283752441, "learning_rate": 9.988287598310593e-07, "loss": 0.0785, "step": 114970 }, { "epoch": 1.2284844275869438, "grad_norm": 4.407735347747803, "learning_rate": 9.988276102585058e-07, "loss": 0.0392, "step": 114980 }, { "epoch": 1.2285912709012234, "grad_norm": 3.23232364654541, "learning_rate": 9.988264601227384e-07, "loss": 0.0558, "step": 114990 }, { "epoch": 1.228698114215503, "grad_norm": 1.1121206283569336, "learning_rate": 9.988253094237582e-07, "loss": 0.025, "step": 115000 }, { "epoch": 1.2288049575297826, "grad_norm": 1.4740387201309204, "learning_rate": 9.988241581615665e-07, "loss": 0.0653, "step": 115010 }, { "epoch": 1.2289118008440623, "grad_norm": 0.0974464863538742, "learning_rate": 9.988230063361646e-07, "loss": 0.007, "step": 115020 }, { "epoch": 1.2290186441583417, "grad_norm": 8.954493522644043, "learning_rate": 9.988218539475542e-07, "loss": 0.1293, "step": 115030 }, { "epoch": 1.2291254874726214, "grad_norm": 4.228160381317139, "learning_rate": 9.98820700995736e-07, "loss": 0.0228, "step": 115040 }, { "epoch": 1.229232330786901, "grad_norm": 0.01729755476117134, "learning_rate": 9.988195474807117e-07, "loss": 0.0311, "step": 115050 }, { "epoch": 1.2293391741011805, "grad_norm": 9.28105640411377, "learning_rate": 9.988183934024825e-07, "loss": 0.0202, "step": 115060 }, { "epoch": 1.2294460174154602, "grad_norm": 0.6910535097122192, "learning_rate": 9.988172387610497e-07, "loss": 0.0747, "step": 115070 }, { "epoch": 1.22955286072974, "grad_norm": 2.5507874488830566, "learning_rate": 9.988160835564145e-07, "loss": 0.0545, "step": 115080 }, { "epoch": 1.2296597040440194, "grad_norm": 3.386866807937622, "learning_rate": 9.988149277885783e-07, "loss": 0.023, "step": 115090 }, { "epoch": 1.229766547358299, "grad_norm": 14.7349853515625, "learning_rate": 9.988137714575424e-07, "loss": 0.0509, "step": 115100 }, { "epoch": 1.2298733906725787, "grad_norm": 5.119683742523193, "learning_rate": 9.988126145633081e-07, "loss": 0.0318, "step": 115110 }, { "epoch": 1.2299802339868582, "grad_norm": 4.199375629425049, "learning_rate": 9.988114571058768e-07, "loss": 0.0228, "step": 115120 }, { "epoch": 1.2300870773011379, "grad_norm": 0.2073747217655182, "learning_rate": 9.988102990852497e-07, "loss": 0.0323, "step": 115130 }, { "epoch": 1.2301939206154175, "grad_norm": 0.499723345041275, "learning_rate": 9.98809140501428e-07, "loss": 0.0492, "step": 115140 }, { "epoch": 1.230300763929697, "grad_norm": 15.378005027770996, "learning_rate": 9.988079813544133e-07, "loss": 0.0178, "step": 115150 }, { "epoch": 1.2304076072439767, "grad_norm": 0.44599586725234985, "learning_rate": 9.988068216442066e-07, "loss": 0.0271, "step": 115160 }, { "epoch": 1.2305144505582564, "grad_norm": 2.1889445781707764, "learning_rate": 9.988056613708094e-07, "loss": 0.0118, "step": 115170 }, { "epoch": 1.2306212938725358, "grad_norm": 1.971298336982727, "learning_rate": 9.98804500534223e-07, "loss": 0.0383, "step": 115180 }, { "epoch": 1.2307281371868155, "grad_norm": 0.11633913218975067, "learning_rate": 9.988033391344485e-07, "loss": 0.0259, "step": 115190 }, { "epoch": 1.2308349805010952, "grad_norm": 5.624009609222412, "learning_rate": 9.988021771714876e-07, "loss": 0.0245, "step": 115200 }, { "epoch": 1.2309418238153746, "grad_norm": 1.5768921375274658, "learning_rate": 9.988010146453414e-07, "loss": 0.0197, "step": 115210 }, { "epoch": 1.2310486671296543, "grad_norm": 5.961346626281738, "learning_rate": 9.987998515560111e-07, "loss": 0.019, "step": 115220 }, { "epoch": 1.231155510443934, "grad_norm": 5.7911481857299805, "learning_rate": 9.987986879034981e-07, "loss": 0.022, "step": 115230 }, { "epoch": 1.2312623537582135, "grad_norm": 3.168954849243164, "learning_rate": 9.98797523687804e-07, "loss": 0.0209, "step": 115240 }, { "epoch": 1.2313691970724931, "grad_norm": 0.06284160166978836, "learning_rate": 9.987963589089295e-07, "loss": 0.0252, "step": 115250 }, { "epoch": 1.2314760403867728, "grad_norm": 7.440282344818115, "learning_rate": 9.987951935668763e-07, "loss": 0.0408, "step": 115260 }, { "epoch": 1.2315828837010523, "grad_norm": 0.9211238622665405, "learning_rate": 9.98794027661646e-07, "loss": 0.0458, "step": 115270 }, { "epoch": 1.231689727015332, "grad_norm": 4.325788974761963, "learning_rate": 9.987928611932393e-07, "loss": 0.038, "step": 115280 }, { "epoch": 1.2317965703296117, "grad_norm": 0.4556460976600647, "learning_rate": 9.98791694161658e-07, "loss": 0.0059, "step": 115290 }, { "epoch": 1.231903413643891, "grad_norm": 0.16960015892982483, "learning_rate": 9.98790526566903e-07, "loss": 0.0211, "step": 115300 }, { "epoch": 1.2320102569581708, "grad_norm": 0.5218924880027771, "learning_rate": 9.987893584089761e-07, "loss": 0.0229, "step": 115310 }, { "epoch": 1.2321171002724505, "grad_norm": 2.441356897354126, "learning_rate": 9.987881896878783e-07, "loss": 0.0881, "step": 115320 }, { "epoch": 1.2322239435867302, "grad_norm": 2.7691829204559326, "learning_rate": 9.987870204036108e-07, "loss": 0.0151, "step": 115330 }, { "epoch": 1.2323307869010096, "grad_norm": 3.222135305404663, "learning_rate": 9.987858505561752e-07, "loss": 0.0658, "step": 115340 }, { "epoch": 1.2324376302152893, "grad_norm": 14.547745704650879, "learning_rate": 9.987846801455728e-07, "loss": 0.0528, "step": 115350 }, { "epoch": 1.232544473529569, "grad_norm": 0.09393417835235596, "learning_rate": 9.987835091718048e-07, "loss": 0.0315, "step": 115360 }, { "epoch": 1.2326513168438484, "grad_norm": 2.5966873168945312, "learning_rate": 9.987823376348726e-07, "loss": 0.0373, "step": 115370 }, { "epoch": 1.2327581601581281, "grad_norm": 0.09904666990041733, "learning_rate": 9.987811655347775e-07, "loss": 0.0315, "step": 115380 }, { "epoch": 1.2328650034724078, "grad_norm": 0.16083860397338867, "learning_rate": 9.987799928715208e-07, "loss": 0.0565, "step": 115390 }, { "epoch": 1.2329718467866873, "grad_norm": 0.7967265248298645, "learning_rate": 9.98778819645104e-07, "loss": 0.0426, "step": 115400 }, { "epoch": 1.233078690100967, "grad_norm": 0.04123946279287338, "learning_rate": 9.987776458555282e-07, "loss": 0.0227, "step": 115410 }, { "epoch": 1.2331855334152466, "grad_norm": 0.0517503097653389, "learning_rate": 9.987764715027947e-07, "loss": 0.0331, "step": 115420 }, { "epoch": 1.233292376729526, "grad_norm": 0.06150304898619652, "learning_rate": 9.98775296586905e-07, "loss": 0.0312, "step": 115430 }, { "epoch": 1.2333992200438058, "grad_norm": 1.9063196182250977, "learning_rate": 9.987741211078602e-07, "loss": 0.0158, "step": 115440 }, { "epoch": 1.2335060633580854, "grad_norm": 3.7740912437438965, "learning_rate": 9.987729450656619e-07, "loss": 0.0622, "step": 115450 }, { "epoch": 1.233612906672365, "grad_norm": 1.5937633514404297, "learning_rate": 9.98771768460311e-07, "loss": 0.0555, "step": 115460 }, { "epoch": 1.2337197499866446, "grad_norm": 6.107001304626465, "learning_rate": 9.987705912918096e-07, "loss": 0.0846, "step": 115470 }, { "epoch": 1.2338265933009243, "grad_norm": 1.8059431314468384, "learning_rate": 9.987694135601583e-07, "loss": 0.0126, "step": 115480 }, { "epoch": 1.2339334366152037, "grad_norm": 4.799132823944092, "learning_rate": 9.987682352653588e-07, "loss": 0.0453, "step": 115490 }, { "epoch": 1.2340402799294834, "grad_norm": 1.8132035732269287, "learning_rate": 9.987670564074123e-07, "loss": 0.0704, "step": 115500 }, { "epoch": 1.234147123243763, "grad_norm": 12.237606048583984, "learning_rate": 9.9876587698632e-07, "loss": 0.0346, "step": 115510 }, { "epoch": 1.2342539665580425, "grad_norm": 2.058223247528076, "learning_rate": 9.987646970020835e-07, "loss": 0.0232, "step": 115520 }, { "epoch": 1.2343608098723222, "grad_norm": 0.16518108546733856, "learning_rate": 9.987635164547039e-07, "loss": 0.056, "step": 115530 }, { "epoch": 1.234467653186602, "grad_norm": 0.9072014093399048, "learning_rate": 9.987623353441829e-07, "loss": 0.0417, "step": 115540 }, { "epoch": 1.2345744965008816, "grad_norm": 0.8983389735221863, "learning_rate": 9.987611536705213e-07, "loss": 0.032, "step": 115550 }, { "epoch": 1.234681339815161, "grad_norm": 2.2450449466705322, "learning_rate": 9.98759971433721e-07, "loss": 0.0273, "step": 115560 }, { "epoch": 1.2347881831294407, "grad_norm": 0.7771040797233582, "learning_rate": 9.987587886337827e-07, "loss": 0.0422, "step": 115570 }, { "epoch": 1.2348950264437204, "grad_norm": 4.33480978012085, "learning_rate": 9.987576052707083e-07, "loss": 0.0313, "step": 115580 }, { "epoch": 1.2350018697579999, "grad_norm": 0.35603487491607666, "learning_rate": 9.98756421344499e-07, "loss": 0.0326, "step": 115590 }, { "epoch": 1.2351087130722795, "grad_norm": 0.09751760959625244, "learning_rate": 9.98755236855156e-07, "loss": 0.0543, "step": 115600 }, { "epoch": 1.2352155563865592, "grad_norm": 3.480926752090454, "learning_rate": 9.987540518026806e-07, "loss": 0.0128, "step": 115610 }, { "epoch": 1.2353223997008387, "grad_norm": 0.3679179251194, "learning_rate": 9.987528661870744e-07, "loss": 0.0321, "step": 115620 }, { "epoch": 1.2354292430151184, "grad_norm": 1.9146716594696045, "learning_rate": 9.987516800083383e-07, "loss": 0.0359, "step": 115630 }, { "epoch": 1.235536086329398, "grad_norm": 0.8478923439979553, "learning_rate": 9.987504932664742e-07, "loss": 0.0377, "step": 115640 }, { "epoch": 1.2356429296436775, "grad_norm": 0.08273069560527802, "learning_rate": 9.987493059614829e-07, "loss": 0.0175, "step": 115650 }, { "epoch": 1.2357497729579572, "grad_norm": 0.08210869878530502, "learning_rate": 9.98748118093366e-07, "loss": 0.0212, "step": 115660 }, { "epoch": 1.2358566162722369, "grad_norm": 0.019842853769659996, "learning_rate": 9.98746929662125e-07, "loss": 0.0565, "step": 115670 }, { "epoch": 1.2359634595865163, "grad_norm": 0.0174711961299181, "learning_rate": 9.987457406677611e-07, "loss": 0.0204, "step": 115680 }, { "epoch": 1.236070302900796, "grad_norm": 5.551120758056641, "learning_rate": 9.987445511102754e-07, "loss": 0.0509, "step": 115690 }, { "epoch": 1.2361771462150757, "grad_norm": 5.886983871459961, "learning_rate": 9.987433609896695e-07, "loss": 0.0579, "step": 115700 }, { "epoch": 1.2362839895293551, "grad_norm": 0.3125148415565491, "learning_rate": 9.987421703059449e-07, "loss": 0.0278, "step": 115710 }, { "epoch": 1.2363908328436348, "grad_norm": 12.582146644592285, "learning_rate": 9.987409790591027e-07, "loss": 0.0423, "step": 115720 }, { "epoch": 1.2364976761579145, "grad_norm": 7.33880615234375, "learning_rate": 9.987397872491442e-07, "loss": 0.0426, "step": 115730 }, { "epoch": 1.236604519472194, "grad_norm": 2.6948580741882324, "learning_rate": 9.98738594876071e-07, "loss": 0.0147, "step": 115740 }, { "epoch": 1.2367113627864736, "grad_norm": 3.431673049926758, "learning_rate": 9.98737401939884e-07, "loss": 0.0189, "step": 115750 }, { "epoch": 1.2368182061007533, "grad_norm": 0.02159569412469864, "learning_rate": 9.98736208440585e-07, "loss": 0.0147, "step": 115760 }, { "epoch": 1.2369250494150328, "grad_norm": 4.327091693878174, "learning_rate": 9.987350143781753e-07, "loss": 0.0265, "step": 115770 }, { "epoch": 1.2370318927293125, "grad_norm": 5.444470405578613, "learning_rate": 9.98733819752656e-07, "loss": 0.0485, "step": 115780 }, { "epoch": 1.2371387360435921, "grad_norm": 0.10834482312202454, "learning_rate": 9.987326245640285e-07, "loss": 0.0316, "step": 115790 }, { "epoch": 1.2372455793578716, "grad_norm": 5.102056980133057, "learning_rate": 9.987314288122944e-07, "loss": 0.0395, "step": 115800 }, { "epoch": 1.2373524226721513, "grad_norm": 2.1268527507781982, "learning_rate": 9.987302324974547e-07, "loss": 0.0259, "step": 115810 }, { "epoch": 1.237459265986431, "grad_norm": 14.577598571777344, "learning_rate": 9.987290356195108e-07, "loss": 0.1163, "step": 115820 }, { "epoch": 1.2375661093007104, "grad_norm": 0.6207502484321594, "learning_rate": 9.987278381784646e-07, "loss": 0.0422, "step": 115830 }, { "epoch": 1.23767295261499, "grad_norm": 0.05974474549293518, "learning_rate": 9.987266401743168e-07, "loss": 0.0145, "step": 115840 }, { "epoch": 1.2377797959292698, "grad_norm": 1.4997386932373047, "learning_rate": 9.987254416070689e-07, "loss": 0.0652, "step": 115850 }, { "epoch": 1.2378866392435492, "grad_norm": 1.566162109375, "learning_rate": 9.987242424767223e-07, "loss": 0.0193, "step": 115860 }, { "epoch": 1.237993482557829, "grad_norm": 1.11758553981781, "learning_rate": 9.987230427832787e-07, "loss": 0.0321, "step": 115870 }, { "epoch": 1.2381003258721086, "grad_norm": 8.78809642791748, "learning_rate": 9.98721842526739e-07, "loss": 0.0204, "step": 115880 }, { "epoch": 1.238207169186388, "grad_norm": 3.878107786178589, "learning_rate": 9.987206417071047e-07, "loss": 0.0427, "step": 115890 }, { "epoch": 1.2383140125006677, "grad_norm": 2.238881826400757, "learning_rate": 9.98719440324377e-07, "loss": 0.0156, "step": 115900 }, { "epoch": 1.2384208558149474, "grad_norm": 0.194677472114563, "learning_rate": 9.987182383785576e-07, "loss": 0.0325, "step": 115910 }, { "epoch": 1.2385276991292269, "grad_norm": 12.64936351776123, "learning_rate": 9.987170358696476e-07, "loss": 0.0576, "step": 115920 }, { "epoch": 1.2386345424435066, "grad_norm": 2.6589372158050537, "learning_rate": 9.987158327976483e-07, "loss": 0.0277, "step": 115930 }, { "epoch": 1.2387413857577863, "grad_norm": 0.8345044851303101, "learning_rate": 9.987146291625614e-07, "loss": 0.0514, "step": 115940 }, { "epoch": 1.2388482290720657, "grad_norm": 1.2386462688446045, "learning_rate": 9.98713424964388e-07, "loss": 0.0844, "step": 115950 }, { "epoch": 1.2389550723863454, "grad_norm": 0.03188890218734741, "learning_rate": 9.987122202031292e-07, "loss": 0.0081, "step": 115960 }, { "epoch": 1.239061915700625, "grad_norm": 3.0805916786193848, "learning_rate": 9.987110148787872e-07, "loss": 0.0222, "step": 115970 }, { "epoch": 1.2391687590149045, "grad_norm": 0.018698185682296753, "learning_rate": 9.987098089913624e-07, "loss": 0.0349, "step": 115980 }, { "epoch": 1.2392756023291842, "grad_norm": 2.3873422145843506, "learning_rate": 9.987086025408567e-07, "loss": 0.0321, "step": 115990 }, { "epoch": 1.239382445643464, "grad_norm": 1.2699947357177734, "learning_rate": 9.987073955272716e-07, "loss": 0.009, "step": 116000 }, { "epoch": 1.2394892889577434, "grad_norm": 4.075132846832275, "learning_rate": 9.987061879506079e-07, "loss": 0.0454, "step": 116010 }, { "epoch": 1.239596132272023, "grad_norm": 5.356790065765381, "learning_rate": 9.987049798108674e-07, "loss": 0.0155, "step": 116020 }, { "epoch": 1.2397029755863027, "grad_norm": 2.528458595275879, "learning_rate": 9.987037711080512e-07, "loss": 0.0061, "step": 116030 }, { "epoch": 1.2398098189005822, "grad_norm": 0.05497061833739281, "learning_rate": 9.98702561842161e-07, "loss": 0.0792, "step": 116040 }, { "epoch": 1.2399166622148619, "grad_norm": 0.10404830425977707, "learning_rate": 9.987013520131977e-07, "loss": 0.007, "step": 116050 }, { "epoch": 1.2400235055291415, "grad_norm": 0.07106487452983856, "learning_rate": 9.987001416211632e-07, "loss": 0.0514, "step": 116060 }, { "epoch": 1.2401303488434212, "grad_norm": 1.982714056968689, "learning_rate": 9.986989306660584e-07, "loss": 0.0127, "step": 116070 }, { "epoch": 1.2402371921577007, "grad_norm": 3.137378692626953, "learning_rate": 9.986977191478851e-07, "loss": 0.0289, "step": 116080 }, { "epoch": 1.2403440354719804, "grad_norm": 0.12340401113033295, "learning_rate": 9.986965070666443e-07, "loss": 0.055, "step": 116090 }, { "epoch": 1.24045087878626, "grad_norm": 3.0387330055236816, "learning_rate": 9.986952944223376e-07, "loss": 0.0296, "step": 116100 }, { "epoch": 1.2405577221005395, "grad_norm": 0.7056360840797424, "learning_rate": 9.98694081214966e-07, "loss": 0.0243, "step": 116110 }, { "epoch": 1.2406645654148192, "grad_norm": 3.18624210357666, "learning_rate": 9.986928674445315e-07, "loss": 0.0249, "step": 116120 }, { "epoch": 1.2407714087290989, "grad_norm": 0.006884065456688404, "learning_rate": 9.98691653111035e-07, "loss": 0.0977, "step": 116130 }, { "epoch": 1.2408782520433783, "grad_norm": 0.051413603127002716, "learning_rate": 9.986904382144777e-07, "loss": 0.0466, "step": 116140 }, { "epoch": 1.240985095357658, "grad_norm": 1.0270899534225464, "learning_rate": 9.986892227548617e-07, "loss": 0.0099, "step": 116150 }, { "epoch": 1.2410919386719377, "grad_norm": 3.0698111057281494, "learning_rate": 9.986880067321877e-07, "loss": 0.0239, "step": 116160 }, { "epoch": 1.2411987819862171, "grad_norm": 4.121160984039307, "learning_rate": 9.986867901464573e-07, "loss": 0.0259, "step": 116170 }, { "epoch": 1.2413056253004968, "grad_norm": 7.447195529937744, "learning_rate": 9.98685572997672e-07, "loss": 0.0509, "step": 116180 }, { "epoch": 1.2414124686147765, "grad_norm": 0.013191827572882175, "learning_rate": 9.98684355285833e-07, "loss": 0.0139, "step": 116190 }, { "epoch": 1.241519311929056, "grad_norm": 0.10570505261421204, "learning_rate": 9.986831370109418e-07, "loss": 0.0348, "step": 116200 }, { "epoch": 1.2416261552433356, "grad_norm": 0.01350431703031063, "learning_rate": 9.986819181729992e-07, "loss": 0.011, "step": 116210 }, { "epoch": 1.2417329985576153, "grad_norm": 5.352355480194092, "learning_rate": 9.986806987720076e-07, "loss": 0.1141, "step": 116220 }, { "epoch": 1.2418398418718948, "grad_norm": 0.036336369812488556, "learning_rate": 9.986794788079677e-07, "loss": 0.0341, "step": 116230 }, { "epoch": 1.2419466851861745, "grad_norm": 0.015691114589571953, "learning_rate": 9.98678258280881e-07, "loss": 0.0394, "step": 116240 }, { "epoch": 1.2420535285004541, "grad_norm": 23.244985580444336, "learning_rate": 9.986770371907492e-07, "loss": 0.015, "step": 116250 }, { "epoch": 1.2421603718147336, "grad_norm": 9.702924728393555, "learning_rate": 9.98675815537573e-07, "loss": 0.0631, "step": 116260 }, { "epoch": 1.2422672151290133, "grad_norm": 6.251297473907471, "learning_rate": 9.986745933213543e-07, "loss": 0.034, "step": 116270 }, { "epoch": 1.242374058443293, "grad_norm": 4.3826584815979, "learning_rate": 9.986733705420943e-07, "loss": 0.1009, "step": 116280 }, { "epoch": 1.2424809017575726, "grad_norm": 1.8244342803955078, "learning_rate": 9.986721471997945e-07, "loss": 0.0364, "step": 116290 }, { "epoch": 1.242587745071852, "grad_norm": 0.2913097143173218, "learning_rate": 9.986709232944563e-07, "loss": 0.08, "step": 116300 }, { "epoch": 1.2426945883861318, "grad_norm": 1.2305831909179688, "learning_rate": 9.986696988260808e-07, "loss": 0.0769, "step": 116310 }, { "epoch": 1.2428014317004115, "grad_norm": 3.9922027587890625, "learning_rate": 9.986684737946697e-07, "loss": 0.0626, "step": 116320 }, { "epoch": 1.242908275014691, "grad_norm": 4.100481986999512, "learning_rate": 9.986672482002242e-07, "loss": 0.0318, "step": 116330 }, { "epoch": 1.2430151183289706, "grad_norm": 0.1155136227607727, "learning_rate": 9.986660220427458e-07, "loss": 0.0579, "step": 116340 }, { "epoch": 1.2431219616432503, "grad_norm": 0.7887532114982605, "learning_rate": 9.986647953222358e-07, "loss": 0.0378, "step": 116350 }, { "epoch": 1.2432288049575297, "grad_norm": 1.624508023262024, "learning_rate": 9.986635680386955e-07, "loss": 0.012, "step": 116360 }, { "epoch": 1.2433356482718094, "grad_norm": 10.021114349365234, "learning_rate": 9.986623401921266e-07, "loss": 0.0607, "step": 116370 }, { "epoch": 1.243442491586089, "grad_norm": 0.011812599375844002, "learning_rate": 9.9866111178253e-07, "loss": 0.0468, "step": 116380 }, { "epoch": 1.2435493349003686, "grad_norm": 0.46628618240356445, "learning_rate": 9.986598828099077e-07, "loss": 0.0133, "step": 116390 }, { "epoch": 1.2436561782146482, "grad_norm": 1.3861058950424194, "learning_rate": 9.986586532742605e-07, "loss": 0.0443, "step": 116400 }, { "epoch": 1.243763021528928, "grad_norm": 4.363420486450195, "learning_rate": 9.986574231755902e-07, "loss": 0.0343, "step": 116410 }, { "epoch": 1.2438698648432074, "grad_norm": 1.5730289220809937, "learning_rate": 9.98656192513898e-07, "loss": 0.0431, "step": 116420 }, { "epoch": 1.243976708157487, "grad_norm": 0.21104510128498077, "learning_rate": 9.986549612891852e-07, "loss": 0.0166, "step": 116430 }, { "epoch": 1.2440835514717667, "grad_norm": 1.0643590688705444, "learning_rate": 9.986537295014535e-07, "loss": 0.0824, "step": 116440 }, { "epoch": 1.2441903947860462, "grad_norm": 9.721087455749512, "learning_rate": 9.98652497150704e-07, "loss": 0.0405, "step": 116450 }, { "epoch": 1.2442972381003259, "grad_norm": 1.1802353858947754, "learning_rate": 9.986512642369383e-07, "loss": 0.0629, "step": 116460 }, { "epoch": 1.2444040814146056, "grad_norm": 0.06916186958551407, "learning_rate": 9.986500307601575e-07, "loss": 0.0242, "step": 116470 }, { "epoch": 1.244510924728885, "grad_norm": 0.046773795038461685, "learning_rate": 9.986487967203634e-07, "loss": 0.0486, "step": 116480 }, { "epoch": 1.2446177680431647, "grad_norm": 0.1535094976425171, "learning_rate": 9.98647562117557e-07, "loss": 0.0243, "step": 116490 }, { "epoch": 1.2447246113574444, "grad_norm": 5.5233564376831055, "learning_rate": 9.9864632695174e-07, "loss": 0.026, "step": 116500 }, { "epoch": 1.2448314546717238, "grad_norm": 0.40474122762680054, "learning_rate": 9.986450912229135e-07, "loss": 0.0257, "step": 116510 }, { "epoch": 1.2449382979860035, "grad_norm": 0.15308643877506256, "learning_rate": 9.986438549310793e-07, "loss": 0.0202, "step": 116520 }, { "epoch": 1.2450451413002832, "grad_norm": 0.04086069017648697, "learning_rate": 9.986426180762383e-07, "loss": 0.0633, "step": 116530 }, { "epoch": 1.2451519846145627, "grad_norm": 4.574262619018555, "learning_rate": 9.986413806583922e-07, "loss": 0.0158, "step": 116540 }, { "epoch": 1.2452588279288423, "grad_norm": 1.3619211912155151, "learning_rate": 9.986401426775423e-07, "loss": 0.0177, "step": 116550 }, { "epoch": 1.245365671243122, "grad_norm": 17.319236755371094, "learning_rate": 9.986389041336903e-07, "loss": 0.0578, "step": 116560 }, { "epoch": 1.2454725145574015, "grad_norm": 13.386072158813477, "learning_rate": 9.986376650268372e-07, "loss": 0.0179, "step": 116570 }, { "epoch": 1.2455793578716812, "grad_norm": 4.436056613922119, "learning_rate": 9.986364253569845e-07, "loss": 0.0159, "step": 116580 }, { "epoch": 1.2456862011859609, "grad_norm": 6.856856822967529, "learning_rate": 9.986351851241336e-07, "loss": 0.0195, "step": 116590 }, { "epoch": 1.2457930445002403, "grad_norm": 0.08792219310998917, "learning_rate": 9.98633944328286e-07, "loss": 0.0122, "step": 116600 }, { "epoch": 1.24589988781452, "grad_norm": 2.180291175842285, "learning_rate": 9.98632702969443e-07, "loss": 0.0284, "step": 116610 }, { "epoch": 1.2460067311287997, "grad_norm": 1.5530439615249634, "learning_rate": 9.986314610476062e-07, "loss": 0.0121, "step": 116620 }, { "epoch": 1.2461135744430791, "grad_norm": 4.239853858947754, "learning_rate": 9.986302185627767e-07, "loss": 0.0171, "step": 116630 }, { "epoch": 1.2462204177573588, "grad_norm": 0.04010489210486412, "learning_rate": 9.986289755149563e-07, "loss": 0.0203, "step": 116640 }, { "epoch": 1.2463272610716385, "grad_norm": 0.22390882670879364, "learning_rate": 9.986277319041458e-07, "loss": 0.0492, "step": 116650 }, { "epoch": 1.246434104385918, "grad_norm": 9.355135917663574, "learning_rate": 9.98626487730347e-07, "loss": 0.0285, "step": 116660 }, { "epoch": 1.2465409477001976, "grad_norm": 0.15346454083919525, "learning_rate": 9.986252429935615e-07, "loss": 0.009, "step": 116670 }, { "epoch": 1.2466477910144773, "grad_norm": 0.07240770757198334, "learning_rate": 9.986239976937903e-07, "loss": 0.0471, "step": 116680 }, { "epoch": 1.2467546343287568, "grad_norm": 7.669868469238281, "learning_rate": 9.986227518310351e-07, "loss": 0.0567, "step": 116690 }, { "epoch": 1.2468614776430365, "grad_norm": 4.438228130340576, "learning_rate": 9.98621505405297e-07, "loss": 0.0255, "step": 116700 }, { "epoch": 1.2469683209573161, "grad_norm": 13.500862121582031, "learning_rate": 9.986202584165776e-07, "loss": 0.0831, "step": 116710 }, { "epoch": 1.2470751642715956, "grad_norm": 6.832913875579834, "learning_rate": 9.986190108648784e-07, "loss": 0.0346, "step": 116720 }, { "epoch": 1.2471820075858753, "grad_norm": 0.12693552672863007, "learning_rate": 9.986177627502007e-07, "loss": 0.0182, "step": 116730 }, { "epoch": 1.247288850900155, "grad_norm": 2.908212423324585, "learning_rate": 9.986165140725459e-07, "loss": 0.0646, "step": 116740 }, { "epoch": 1.2473956942144344, "grad_norm": 1.0324764251708984, "learning_rate": 9.986152648319155e-07, "loss": 0.028, "step": 116750 }, { "epoch": 1.247502537528714, "grad_norm": 0.07487142086029053, "learning_rate": 9.986140150283106e-07, "loss": 0.02, "step": 116760 }, { "epoch": 1.2476093808429938, "grad_norm": 0.05811997875571251, "learning_rate": 9.98612764661733e-07, "loss": 0.0384, "step": 116770 }, { "epoch": 1.2477162241572732, "grad_norm": 0.027612371370196342, "learning_rate": 9.98611513732184e-07, "loss": 0.0476, "step": 116780 }, { "epoch": 1.247823067471553, "grad_norm": 0.3851741552352905, "learning_rate": 9.98610262239665e-07, "loss": 0.0412, "step": 116790 }, { "epoch": 1.2479299107858326, "grad_norm": 1.198324203491211, "learning_rate": 9.986090101841773e-07, "loss": 0.047, "step": 116800 }, { "epoch": 1.2480367541001123, "grad_norm": 4.762578964233398, "learning_rate": 9.986077575657224e-07, "loss": 0.0827, "step": 116810 }, { "epoch": 1.2481435974143917, "grad_norm": 1.5530561208724976, "learning_rate": 9.986065043843015e-07, "loss": 0.0173, "step": 116820 }, { "epoch": 1.2482504407286714, "grad_norm": 5.914824962615967, "learning_rate": 9.986052506399165e-07, "loss": 0.0783, "step": 116830 }, { "epoch": 1.248357284042951, "grad_norm": 0.5883715152740479, "learning_rate": 9.986039963325686e-07, "loss": 0.0384, "step": 116840 }, { "epoch": 1.2484641273572306, "grad_norm": 0.25683891773223877, "learning_rate": 9.98602741462259e-07, "loss": 0.025, "step": 116850 }, { "epoch": 1.2485709706715102, "grad_norm": 0.6951274871826172, "learning_rate": 9.986014860289892e-07, "loss": 0.0023, "step": 116860 }, { "epoch": 1.24867781398579, "grad_norm": 0.516541063785553, "learning_rate": 9.986002300327607e-07, "loss": 0.0331, "step": 116870 }, { "epoch": 1.2487846573000694, "grad_norm": 3.3670637607574463, "learning_rate": 9.98598973473575e-07, "loss": 0.0419, "step": 116880 }, { "epoch": 1.248891500614349, "grad_norm": 7.043569564819336, "learning_rate": 9.985977163514336e-07, "loss": 0.0486, "step": 116890 }, { "epoch": 1.2489983439286287, "grad_norm": 0.011317265219986439, "learning_rate": 9.985964586663377e-07, "loss": 0.012, "step": 116900 }, { "epoch": 1.2491051872429082, "grad_norm": 5.18850564956665, "learning_rate": 9.985952004182884e-07, "loss": 0.0908, "step": 116910 }, { "epoch": 1.2492120305571879, "grad_norm": 5.36967134475708, "learning_rate": 9.98593941607288e-07, "loss": 0.0203, "step": 116920 }, { "epoch": 1.2493188738714676, "grad_norm": 4.47231912612915, "learning_rate": 9.98592682233337e-07, "loss": 0.0381, "step": 116930 }, { "epoch": 1.249425717185747, "grad_norm": 4.460423469543457, "learning_rate": 9.985914222964373e-07, "loss": 0.016, "step": 116940 }, { "epoch": 1.2495325605000267, "grad_norm": 0.085240438580513, "learning_rate": 9.985901617965905e-07, "loss": 0.0133, "step": 116950 }, { "epoch": 1.2496394038143064, "grad_norm": 2.6811535358428955, "learning_rate": 9.985889007337976e-07, "loss": 0.0142, "step": 116960 }, { "epoch": 1.2497462471285858, "grad_norm": 4.1010823249816895, "learning_rate": 9.9858763910806e-07, "loss": 0.0242, "step": 116970 }, { "epoch": 1.2498530904428655, "grad_norm": 3.772855043411255, "learning_rate": 9.985863769193797e-07, "loss": 0.0366, "step": 116980 }, { "epoch": 1.2499599337571452, "grad_norm": 3.433885335922241, "learning_rate": 9.985851141677575e-07, "loss": 0.0399, "step": 116990 }, { "epoch": 1.2500667770714249, "grad_norm": 4.03564977645874, "learning_rate": 9.985838508531952e-07, "loss": 0.0555, "step": 117000 }, { "epoch": 1.2501736203857043, "grad_norm": 5.475759506225586, "learning_rate": 9.98582586975694e-07, "loss": 0.0534, "step": 117010 }, { "epoch": 1.250280463699984, "grad_norm": 0.3561631143093109, "learning_rate": 9.985813225352554e-07, "loss": 0.0326, "step": 117020 }, { "epoch": 1.2503873070142637, "grad_norm": 3.414720296859741, "learning_rate": 9.98580057531881e-07, "loss": 0.0281, "step": 117030 }, { "epoch": 1.2504941503285432, "grad_norm": 0.3300637900829315, "learning_rate": 9.985787919655721e-07, "loss": 0.0595, "step": 117040 }, { "epoch": 1.2506009936428228, "grad_norm": 2.989854574203491, "learning_rate": 9.9857752583633e-07, "loss": 0.069, "step": 117050 }, { "epoch": 1.2507078369571025, "grad_norm": 10.013916969299316, "learning_rate": 9.985762591441563e-07, "loss": 0.0466, "step": 117060 }, { "epoch": 1.250814680271382, "grad_norm": 4.246384143829346, "learning_rate": 9.985749918890524e-07, "loss": 0.0515, "step": 117070 }, { "epoch": 1.2509215235856617, "grad_norm": 0.1224433183670044, "learning_rate": 9.985737240710197e-07, "loss": 0.0317, "step": 117080 }, { "epoch": 1.2510283668999413, "grad_norm": 2.5903408527374268, "learning_rate": 9.985724556900597e-07, "loss": 0.0558, "step": 117090 }, { "epoch": 1.2511352102142208, "grad_norm": 0.7716802358627319, "learning_rate": 9.985711867461735e-07, "loss": 0.0262, "step": 117100 }, { "epoch": 1.2512420535285005, "grad_norm": 0.49997588992118835, "learning_rate": 9.985699172393631e-07, "loss": 0.0228, "step": 117110 }, { "epoch": 1.2513488968427802, "grad_norm": 2.3554883003234863, "learning_rate": 9.985686471696295e-07, "loss": 0.0664, "step": 117120 }, { "epoch": 1.2514557401570596, "grad_norm": 0.11057370901107788, "learning_rate": 9.985673765369743e-07, "loss": 0.0204, "step": 117130 }, { "epoch": 1.2515625834713393, "grad_norm": 0.006822466384619474, "learning_rate": 9.985661053413988e-07, "loss": 0.0224, "step": 117140 }, { "epoch": 1.251669426785619, "grad_norm": 0.1730976551771164, "learning_rate": 9.985648335829046e-07, "loss": 0.0527, "step": 117150 }, { "epoch": 1.2517762700998984, "grad_norm": 0.20965056121349335, "learning_rate": 9.985635612614932e-07, "loss": 0.0149, "step": 117160 }, { "epoch": 1.2518831134141781, "grad_norm": 2.629948377609253, "learning_rate": 9.985622883771658e-07, "loss": 0.0304, "step": 117170 }, { "epoch": 1.2519899567284578, "grad_norm": 3.3153343200683594, "learning_rate": 9.98561014929924e-07, "loss": 0.02, "step": 117180 }, { "epoch": 1.2520968000427373, "grad_norm": 8.650854110717773, "learning_rate": 9.98559740919769e-07, "loss": 0.0281, "step": 117190 }, { "epoch": 1.252203643357017, "grad_norm": 0.14202609658241272, "learning_rate": 9.985584663467026e-07, "loss": 0.0427, "step": 117200 }, { "epoch": 1.2523104866712966, "grad_norm": 0.08719559013843536, "learning_rate": 9.98557191210726e-07, "loss": 0.0196, "step": 117210 }, { "epoch": 1.252417329985576, "grad_norm": 5.052801609039307, "learning_rate": 9.985559155118407e-07, "loss": 0.0557, "step": 117220 }, { "epoch": 1.2525241732998558, "grad_norm": 0.007840823382139206, "learning_rate": 9.985546392500483e-07, "loss": 0.0375, "step": 117230 }, { "epoch": 1.2526310166141355, "grad_norm": 4.290520668029785, "learning_rate": 9.9855336242535e-07, "loss": 0.0223, "step": 117240 }, { "epoch": 1.252737859928415, "grad_norm": 6.0184760093688965, "learning_rate": 9.985520850377472e-07, "loss": 0.045, "step": 117250 }, { "epoch": 1.2528447032426946, "grad_norm": 6.426040172576904, "learning_rate": 9.985508070872417e-07, "loss": 0.0306, "step": 117260 }, { "epoch": 1.2529515465569743, "grad_norm": 13.54620361328125, "learning_rate": 9.985495285738345e-07, "loss": 0.0295, "step": 117270 }, { "epoch": 1.2530583898712537, "grad_norm": 4.5959086418151855, "learning_rate": 9.985482494975273e-07, "loss": 0.0389, "step": 117280 }, { "epoch": 1.2531652331855334, "grad_norm": 1.9647784233093262, "learning_rate": 9.985469698583216e-07, "loss": 0.0271, "step": 117290 }, { "epoch": 1.253272076499813, "grad_norm": 6.977908134460449, "learning_rate": 9.985456896562185e-07, "loss": 0.0299, "step": 117300 }, { "epoch": 1.2533789198140926, "grad_norm": 0.16188977658748627, "learning_rate": 9.9854440889122e-07, "loss": 0.0123, "step": 117310 }, { "epoch": 1.2534857631283722, "grad_norm": 0.7355616092681885, "learning_rate": 9.98543127563327e-07, "loss": 0.0309, "step": 117320 }, { "epoch": 1.253592606442652, "grad_norm": 0.1658441424369812, "learning_rate": 9.985418456725413e-07, "loss": 0.0288, "step": 117330 }, { "epoch": 1.2536994497569314, "grad_norm": 0.33702197670936584, "learning_rate": 9.985405632188641e-07, "loss": 0.0129, "step": 117340 }, { "epoch": 1.253806293071211, "grad_norm": 4.6382670402526855, "learning_rate": 9.985392802022971e-07, "loss": 0.0289, "step": 117350 }, { "epoch": 1.2539131363854907, "grad_norm": 3.646040439605713, "learning_rate": 9.985379966228416e-07, "loss": 0.0277, "step": 117360 }, { "epoch": 1.2540199796997702, "grad_norm": 3.7791805267333984, "learning_rate": 9.985367124804992e-07, "loss": 0.0263, "step": 117370 }, { "epoch": 1.2541268230140499, "grad_norm": 4.191267013549805, "learning_rate": 9.985354277752712e-07, "loss": 0.0433, "step": 117380 }, { "epoch": 1.2542336663283296, "grad_norm": 0.47611507773399353, "learning_rate": 9.985341425071589e-07, "loss": 0.0137, "step": 117390 }, { "epoch": 1.254340509642609, "grad_norm": 3.5659520626068115, "learning_rate": 9.98532856676164e-07, "loss": 0.064, "step": 117400 }, { "epoch": 1.2544473529568887, "grad_norm": 0.055728767067193985, "learning_rate": 9.98531570282288e-07, "loss": 0.0071, "step": 117410 }, { "epoch": 1.2545541962711684, "grad_norm": 0.06556741148233414, "learning_rate": 9.98530283325532e-07, "loss": 0.0137, "step": 117420 }, { "epoch": 1.2546610395854478, "grad_norm": 1.292612075805664, "learning_rate": 9.985289958058977e-07, "loss": 0.0407, "step": 117430 }, { "epoch": 1.2547678828997275, "grad_norm": 7.313138961791992, "learning_rate": 9.985277077233868e-07, "loss": 0.0095, "step": 117440 }, { "epoch": 1.2548747262140072, "grad_norm": 2.0391576290130615, "learning_rate": 9.985264190780004e-07, "loss": 0.0607, "step": 117450 }, { "epoch": 1.2549815695282867, "grad_norm": 4.088432312011719, "learning_rate": 9.985251298697398e-07, "loss": 0.0648, "step": 117460 }, { "epoch": 1.2550884128425663, "grad_norm": 9.257790565490723, "learning_rate": 9.985238400986068e-07, "loss": 0.0606, "step": 117470 }, { "epoch": 1.255195256156846, "grad_norm": 2.448993444442749, "learning_rate": 9.985225497646029e-07, "loss": 0.0298, "step": 117480 }, { "epoch": 1.2553020994711255, "grad_norm": 0.6235419511795044, "learning_rate": 9.985212588677294e-07, "loss": 0.0529, "step": 117490 }, { "epoch": 1.2554089427854052, "grad_norm": 0.08456192165613174, "learning_rate": 9.985199674079878e-07, "loss": 0.0397, "step": 117500 }, { "epoch": 1.2555157860996848, "grad_norm": 10.166963577270508, "learning_rate": 9.985186753853794e-07, "loss": 0.0406, "step": 117510 }, { "epoch": 1.2556226294139643, "grad_norm": 0.4790288209915161, "learning_rate": 9.985173827999057e-07, "loss": 0.0043, "step": 117520 }, { "epoch": 1.255729472728244, "grad_norm": 0.1906711608171463, "learning_rate": 9.985160896515685e-07, "loss": 0.0204, "step": 117530 }, { "epoch": 1.2558363160425237, "grad_norm": 0.47756603360176086, "learning_rate": 9.98514795940369e-07, "loss": 0.0386, "step": 117540 }, { "epoch": 1.2559431593568031, "grad_norm": 0.06608524918556213, "learning_rate": 9.985135016663087e-07, "loss": 0.0129, "step": 117550 }, { "epoch": 1.2560500026710828, "grad_norm": 6.900752544403076, "learning_rate": 9.985122068293887e-07, "loss": 0.0474, "step": 117560 }, { "epoch": 1.2561568459853625, "grad_norm": 10.954450607299805, "learning_rate": 9.98510911429611e-07, "loss": 0.1007, "step": 117570 }, { "epoch": 1.256263689299642, "grad_norm": 5.9958600997924805, "learning_rate": 9.98509615466977e-07, "loss": 0.0326, "step": 117580 }, { "epoch": 1.2563705326139216, "grad_norm": 2.362652063369751, "learning_rate": 9.985083189414879e-07, "loss": 0.0313, "step": 117590 }, { "epoch": 1.2564773759282013, "grad_norm": 0.03440910205245018, "learning_rate": 9.985070218531454e-07, "loss": 0.062, "step": 117600 }, { "epoch": 1.256584219242481, "grad_norm": 3.8256919384002686, "learning_rate": 9.985057242019505e-07, "loss": 0.0253, "step": 117610 }, { "epoch": 1.2566910625567604, "grad_norm": 0.1479363888502121, "learning_rate": 9.985044259879052e-07, "loss": 0.0482, "step": 117620 }, { "epoch": 1.2567979058710401, "grad_norm": 2.185739517211914, "learning_rate": 9.98503127211011e-07, "loss": 0.0286, "step": 117630 }, { "epoch": 1.2569047491853198, "grad_norm": 2.077606678009033, "learning_rate": 9.98501827871269e-07, "loss": 0.0587, "step": 117640 }, { "epoch": 1.2570115924995993, "grad_norm": 0.10886046290397644, "learning_rate": 9.985005279686806e-07, "loss": 0.014, "step": 117650 }, { "epoch": 1.257118435813879, "grad_norm": 0.011344647035002708, "learning_rate": 9.984992275032478e-07, "loss": 0.011, "step": 117660 }, { "epoch": 1.2572252791281586, "grad_norm": 0.07001988589763641, "learning_rate": 9.984979264749715e-07, "loss": 0.0155, "step": 117670 }, { "epoch": 1.257332122442438, "grad_norm": 3.6824116706848145, "learning_rate": 9.984966248838536e-07, "loss": 0.0509, "step": 117680 }, { "epoch": 1.2574389657567178, "grad_norm": 6.5244879722595215, "learning_rate": 9.984953227298953e-07, "loss": 0.0665, "step": 117690 }, { "epoch": 1.2575458090709974, "grad_norm": 0.04268224909901619, "learning_rate": 9.98494020013098e-07, "loss": 0.0619, "step": 117700 }, { "epoch": 1.2576526523852771, "grad_norm": 2.1141529083251953, "learning_rate": 9.984927167334635e-07, "loss": 0.0591, "step": 117710 }, { "epoch": 1.2577594956995566, "grad_norm": 0.9067631363868713, "learning_rate": 9.984914128909931e-07, "loss": 0.0355, "step": 117720 }, { "epoch": 1.2578663390138363, "grad_norm": 1.915622353553772, "learning_rate": 9.984901084856884e-07, "loss": 0.0192, "step": 117730 }, { "epoch": 1.257973182328116, "grad_norm": 1.7838610410690308, "learning_rate": 9.984888035175507e-07, "loss": 0.0166, "step": 117740 }, { "epoch": 1.2580800256423954, "grad_norm": 6.597346782684326, "learning_rate": 9.984874979865813e-07, "loss": 0.0358, "step": 117750 }, { "epoch": 1.258186868956675, "grad_norm": 2.8061540126800537, "learning_rate": 9.98486191892782e-07, "loss": 0.0908, "step": 117760 }, { "epoch": 1.2582937122709548, "grad_norm": 0.4069494903087616, "learning_rate": 9.984848852361542e-07, "loss": 0.0594, "step": 117770 }, { "epoch": 1.2584005555852342, "grad_norm": 0.18566404283046722, "learning_rate": 9.984835780166995e-07, "loss": 0.058, "step": 117780 }, { "epoch": 1.258507398899514, "grad_norm": 2.6314949989318848, "learning_rate": 9.984822702344188e-07, "loss": 0.0123, "step": 117790 }, { "epoch": 1.2586142422137936, "grad_norm": 1.611695647239685, "learning_rate": 9.984809618893144e-07, "loss": 0.0411, "step": 117800 }, { "epoch": 1.258721085528073, "grad_norm": 10.61723804473877, "learning_rate": 9.984796529813871e-07, "loss": 0.0332, "step": 117810 }, { "epoch": 1.2588279288423527, "grad_norm": 6.300385475158691, "learning_rate": 9.984783435106388e-07, "loss": 0.0344, "step": 117820 }, { "epoch": 1.2589347721566324, "grad_norm": 3.143239974975586, "learning_rate": 9.984770334770708e-07, "loss": 0.0376, "step": 117830 }, { "epoch": 1.2590416154709119, "grad_norm": 1.435746192932129, "learning_rate": 9.984757228806846e-07, "loss": 0.0137, "step": 117840 }, { "epoch": 1.2591484587851915, "grad_norm": 5.128119468688965, "learning_rate": 9.984744117214816e-07, "loss": 0.033, "step": 117850 }, { "epoch": 1.2592553020994712, "grad_norm": 0.05923145264387131, "learning_rate": 9.984730999994635e-07, "loss": 0.037, "step": 117860 }, { "epoch": 1.2593621454137507, "grad_norm": 14.687018394470215, "learning_rate": 9.984717877146316e-07, "loss": 0.0952, "step": 117870 }, { "epoch": 1.2594689887280304, "grad_norm": 2.405010461807251, "learning_rate": 9.984704748669874e-07, "loss": 0.0682, "step": 117880 }, { "epoch": 1.25957583204231, "grad_norm": 5.8950300216674805, "learning_rate": 9.984691614565326e-07, "loss": 0.0218, "step": 117890 }, { "epoch": 1.2596826753565895, "grad_norm": 3.8245599269866943, "learning_rate": 9.984678474832683e-07, "loss": 0.0416, "step": 117900 }, { "epoch": 1.2597895186708692, "grad_norm": 2.2156331539154053, "learning_rate": 9.984665329471963e-07, "loss": 0.0492, "step": 117910 }, { "epoch": 1.2598963619851489, "grad_norm": 2.161224842071533, "learning_rate": 9.984652178483177e-07, "loss": 0.03, "step": 117920 }, { "epoch": 1.2600032052994283, "grad_norm": 9.182273864746094, "learning_rate": 9.984639021866346e-07, "loss": 0.0771, "step": 117930 }, { "epoch": 1.260110048613708, "grad_norm": 2.701233386993408, "learning_rate": 9.984625859621479e-07, "loss": 0.0374, "step": 117940 }, { "epoch": 1.2602168919279877, "grad_norm": 0.007927111349999905, "learning_rate": 9.984612691748592e-07, "loss": 0.0134, "step": 117950 }, { "epoch": 1.2603237352422672, "grad_norm": 5.129332065582275, "learning_rate": 9.984599518247705e-07, "loss": 0.0438, "step": 117960 }, { "epoch": 1.2604305785565468, "grad_norm": 1.2644654512405396, "learning_rate": 9.984586339118827e-07, "loss": 0.0314, "step": 117970 }, { "epoch": 1.2605374218708265, "grad_norm": 9.831772804260254, "learning_rate": 9.984573154361975e-07, "loss": 0.06, "step": 117980 }, { "epoch": 1.260644265185106, "grad_norm": 3.1839282512664795, "learning_rate": 9.984559963977164e-07, "loss": 0.014, "step": 117990 }, { "epoch": 1.2607511084993857, "grad_norm": 5.910752773284912, "learning_rate": 9.984546767964405e-07, "loss": 0.0146, "step": 118000 }, { "epoch": 1.2608579518136653, "grad_norm": 0.05445268005132675, "learning_rate": 9.98453356632372e-07, "loss": 0.0394, "step": 118010 }, { "epoch": 1.2609647951279448, "grad_norm": 3.7265193462371826, "learning_rate": 9.984520359055121e-07, "loss": 0.0217, "step": 118020 }, { "epoch": 1.2610716384422245, "grad_norm": 0.013644997961819172, "learning_rate": 9.98450714615862e-07, "loss": 0.1084, "step": 118030 }, { "epoch": 1.2611784817565042, "grad_norm": 3.0577313899993896, "learning_rate": 9.984493927634237e-07, "loss": 0.0207, "step": 118040 }, { "epoch": 1.2612853250707836, "grad_norm": 3.595224142074585, "learning_rate": 9.984480703481982e-07, "loss": 0.0201, "step": 118050 }, { "epoch": 1.2613921683850633, "grad_norm": 4.63504695892334, "learning_rate": 9.984467473701874e-07, "loss": 0.0292, "step": 118060 }, { "epoch": 1.261499011699343, "grad_norm": 0.018633006140589714, "learning_rate": 9.984454238293924e-07, "loss": 0.0364, "step": 118070 }, { "epoch": 1.2616058550136224, "grad_norm": 0.030647315084934235, "learning_rate": 9.98444099725815e-07, "loss": 0.032, "step": 118080 }, { "epoch": 1.2617126983279021, "grad_norm": 4.121071815490723, "learning_rate": 9.984427750594565e-07, "loss": 0.0253, "step": 118090 }, { "epoch": 1.2618195416421818, "grad_norm": 5.096868515014648, "learning_rate": 9.984414498303186e-07, "loss": 0.0401, "step": 118100 }, { "epoch": 1.2619263849564613, "grad_norm": 8.933834075927734, "learning_rate": 9.984401240384027e-07, "loss": 0.0593, "step": 118110 }, { "epoch": 1.262033228270741, "grad_norm": 2.5852229595184326, "learning_rate": 9.9843879768371e-07, "loss": 0.0168, "step": 118120 }, { "epoch": 1.2621400715850206, "grad_norm": 6.533612251281738, "learning_rate": 9.984374707662425e-07, "loss": 0.0051, "step": 118130 }, { "epoch": 1.2622469148993, "grad_norm": 3.0986716747283936, "learning_rate": 9.984361432860015e-07, "loss": 0.0275, "step": 118140 }, { "epoch": 1.2623537582135798, "grad_norm": 0.02859974093735218, "learning_rate": 9.984348152429884e-07, "loss": 0.022, "step": 118150 }, { "epoch": 1.2624606015278594, "grad_norm": 1.7154381275177002, "learning_rate": 9.984334866372049e-07, "loss": 0.066, "step": 118160 }, { "epoch": 1.262567444842139, "grad_norm": 0.016123216599225998, "learning_rate": 9.984321574686522e-07, "loss": 0.0425, "step": 118170 }, { "epoch": 1.2626742881564186, "grad_norm": Infinity, "learning_rate": 9.984308277373322e-07, "loss": 0.0846, "step": 118180 }, { "epoch": 1.2627811314706983, "grad_norm": 4.370226860046387, "learning_rate": 9.98429497443246e-07, "loss": 0.0277, "step": 118190 }, { "epoch": 1.2628879747849777, "grad_norm": 1.2682877779006958, "learning_rate": 9.984281665863953e-07, "loss": 0.0225, "step": 118200 }, { "epoch": 1.2629948180992574, "grad_norm": 0.019791189581155777, "learning_rate": 9.984268351667817e-07, "loss": 0.0174, "step": 118210 }, { "epoch": 1.263101661413537, "grad_norm": 1.027252197265625, "learning_rate": 9.984255031844064e-07, "loss": 0.0179, "step": 118220 }, { "epoch": 1.2632085047278165, "grad_norm": 2.4236907958984375, "learning_rate": 9.984241706392713e-07, "loss": 0.0611, "step": 118230 }, { "epoch": 1.2633153480420962, "grad_norm": 1.2695688009262085, "learning_rate": 9.984228375313774e-07, "loss": 0.0408, "step": 118240 }, { "epoch": 1.263422191356376, "grad_norm": 0.00590908620506525, "learning_rate": 9.984215038607267e-07, "loss": 0.0166, "step": 118250 }, { "epoch": 1.2635290346706554, "grad_norm": 7.427923679351807, "learning_rate": 9.984201696273205e-07, "loss": 0.0372, "step": 118260 }, { "epoch": 1.263635877984935, "grad_norm": 0.011624395847320557, "learning_rate": 9.984188348311604e-07, "loss": 0.0211, "step": 118270 }, { "epoch": 1.2637427212992147, "grad_norm": 2.3725526332855225, "learning_rate": 9.984174994722477e-07, "loss": 0.0209, "step": 118280 }, { "epoch": 1.2638495646134942, "grad_norm": 3.9117684364318848, "learning_rate": 9.98416163550584e-07, "loss": 0.0302, "step": 118290 }, { "epoch": 1.2639564079277739, "grad_norm": 1.1885331869125366, "learning_rate": 9.984148270661708e-07, "loss": 0.0615, "step": 118300 }, { "epoch": 1.2640632512420535, "grad_norm": 0.27269795536994934, "learning_rate": 9.984134900190098e-07, "loss": 0.0132, "step": 118310 }, { "epoch": 1.264170094556333, "grad_norm": 3.3535478115081787, "learning_rate": 9.984121524091022e-07, "loss": 0.0401, "step": 118320 }, { "epoch": 1.2642769378706127, "grad_norm": 5.6247687339782715, "learning_rate": 9.984108142364498e-07, "loss": 0.0296, "step": 118330 }, { "epoch": 1.2643837811848924, "grad_norm": 1.3746159076690674, "learning_rate": 9.984094755010539e-07, "loss": 0.018, "step": 118340 }, { "epoch": 1.264490624499172, "grad_norm": 0.2746635973453522, "learning_rate": 9.984081362029162e-07, "loss": 0.0799, "step": 118350 }, { "epoch": 1.2645974678134515, "grad_norm": 3.142981767654419, "learning_rate": 9.984067963420378e-07, "loss": 0.1146, "step": 118360 }, { "epoch": 1.2647043111277312, "grad_norm": 1.2385865449905396, "learning_rate": 9.984054559184207e-07, "loss": 0.0185, "step": 118370 }, { "epoch": 1.2648111544420109, "grad_norm": 1.506577968597412, "learning_rate": 9.984041149320663e-07, "loss": 0.0372, "step": 118380 }, { "epoch": 1.2649179977562903, "grad_norm": 0.8574340343475342, "learning_rate": 9.984027733829758e-07, "loss": 0.0386, "step": 118390 }, { "epoch": 1.26502484107057, "grad_norm": 1.625393033027649, "learning_rate": 9.984014312711514e-07, "loss": 0.0336, "step": 118400 }, { "epoch": 1.2651316843848497, "grad_norm": 0.768669605255127, "learning_rate": 9.984000885965938e-07, "loss": 0.016, "step": 118410 }, { "epoch": 1.2652385276991291, "grad_norm": 0.46683812141418457, "learning_rate": 9.983987453593049e-07, "loss": 0.0262, "step": 118420 }, { "epoch": 1.2653453710134088, "grad_norm": 9.989253044128418, "learning_rate": 9.983974015592862e-07, "loss": 0.0388, "step": 118430 }, { "epoch": 1.2654522143276885, "grad_norm": 5.887067794799805, "learning_rate": 9.983960571965392e-07, "loss": 0.0171, "step": 118440 }, { "epoch": 1.2655590576419682, "grad_norm": 13.357192993164062, "learning_rate": 9.983947122710657e-07, "loss": 0.0331, "step": 118450 }, { "epoch": 1.2656659009562476, "grad_norm": 27.913198471069336, "learning_rate": 9.983933667828667e-07, "loss": 0.1119, "step": 118460 }, { "epoch": 1.2657727442705273, "grad_norm": 0.8009808659553528, "learning_rate": 9.983920207319441e-07, "loss": 0.0137, "step": 118470 }, { "epoch": 1.265879587584807, "grad_norm": 4.465517997741699, "learning_rate": 9.983906741182993e-07, "loss": 0.0825, "step": 118480 }, { "epoch": 1.2659864308990865, "grad_norm": 0.037726398557424545, "learning_rate": 9.983893269419337e-07, "loss": 0.0118, "step": 118490 }, { "epoch": 1.2660932742133661, "grad_norm": 2.347564458847046, "learning_rate": 9.983879792028489e-07, "loss": 0.0269, "step": 118500 }, { "epoch": 1.2662001175276458, "grad_norm": 1.1773066520690918, "learning_rate": 9.983866309010466e-07, "loss": 0.058, "step": 118510 }, { "epoch": 1.2663069608419253, "grad_norm": 0.22172512114048004, "learning_rate": 9.98385282036528e-07, "loss": 0.0487, "step": 118520 }, { "epoch": 1.266413804156205, "grad_norm": 0.25019118189811707, "learning_rate": 9.98383932609295e-07, "loss": 0.0077, "step": 118530 }, { "epoch": 1.2665206474704847, "grad_norm": 0.9642791152000427, "learning_rate": 9.983825826193488e-07, "loss": 0.0357, "step": 118540 }, { "epoch": 1.266627490784764, "grad_norm": 0.021414892747998238, "learning_rate": 9.98381232066691e-07, "loss": 0.034, "step": 118550 }, { "epoch": 1.2667343340990438, "grad_norm": 1.5010111331939697, "learning_rate": 9.983798809513234e-07, "loss": 0.0361, "step": 118560 }, { "epoch": 1.2668411774133235, "grad_norm": 4.9780802726745605, "learning_rate": 9.98378529273247e-07, "loss": 0.0247, "step": 118570 }, { "epoch": 1.266948020727603, "grad_norm": 0.2898161709308624, "learning_rate": 9.98377177032464e-07, "loss": 0.0093, "step": 118580 }, { "epoch": 1.2670548640418826, "grad_norm": 0.21961291134357452, "learning_rate": 9.983758242289755e-07, "loss": 0.0541, "step": 118590 }, { "epoch": 1.2671617073561623, "grad_norm": 1.785688877105713, "learning_rate": 9.983744708627829e-07, "loss": 0.04, "step": 118600 }, { "epoch": 1.2672685506704418, "grad_norm": 1.7454776763916016, "learning_rate": 9.983731169338879e-07, "loss": 0.0538, "step": 118610 }, { "epoch": 1.2673753939847214, "grad_norm": 8.486690521240234, "learning_rate": 9.983717624422921e-07, "loss": 0.0431, "step": 118620 }, { "epoch": 1.2674822372990011, "grad_norm": 1.508846402168274, "learning_rate": 9.983704073879967e-07, "loss": 0.0269, "step": 118630 }, { "epoch": 1.2675890806132806, "grad_norm": 0.03276895731687546, "learning_rate": 9.98369051771004e-07, "loss": 0.0323, "step": 118640 }, { "epoch": 1.2676959239275603, "grad_norm": 0.025284724310040474, "learning_rate": 9.983676955913148e-07, "loss": 0.0357, "step": 118650 }, { "epoch": 1.26780276724184, "grad_norm": 0.14912551641464233, "learning_rate": 9.983663388489306e-07, "loss": 0.0445, "step": 118660 }, { "epoch": 1.2679096105561194, "grad_norm": 0.29817935824394226, "learning_rate": 9.983649815438536e-07, "loss": 0.0289, "step": 118670 }, { "epoch": 1.268016453870399, "grad_norm": 0.24764201045036316, "learning_rate": 9.983636236760847e-07, "loss": 0.0356, "step": 118680 }, { "epoch": 1.2681232971846788, "grad_norm": 0.055059053003787994, "learning_rate": 9.983622652456256e-07, "loss": 0.039, "step": 118690 }, { "epoch": 1.2682301404989582, "grad_norm": 3.3454456329345703, "learning_rate": 9.98360906252478e-07, "loss": 0.0143, "step": 118700 }, { "epoch": 1.268336983813238, "grad_norm": 1.3704606294631958, "learning_rate": 9.983595466966431e-07, "loss": 0.0583, "step": 118710 }, { "epoch": 1.2684438271275176, "grad_norm": 0.06262675672769547, "learning_rate": 9.98358186578123e-07, "loss": 0.0395, "step": 118720 }, { "epoch": 1.268550670441797, "grad_norm": 1.2374615669250488, "learning_rate": 9.983568258969188e-07, "loss": 0.0326, "step": 118730 }, { "epoch": 1.2686575137560767, "grad_norm": 0.08333037048578262, "learning_rate": 9.983554646530322e-07, "loss": 0.0154, "step": 118740 }, { "epoch": 1.2687643570703564, "grad_norm": 4.3247504234313965, "learning_rate": 9.983541028464646e-07, "loss": 0.0125, "step": 118750 }, { "epoch": 1.2688712003846359, "grad_norm": 9.824652671813965, "learning_rate": 9.983527404772174e-07, "loss": 0.0409, "step": 118760 }, { "epoch": 1.2689780436989155, "grad_norm": 9.535280227661133, "learning_rate": 9.983513775452925e-07, "loss": 0.1048, "step": 118770 }, { "epoch": 1.2690848870131952, "grad_norm": 3.723140001296997, "learning_rate": 9.983500140506913e-07, "loss": 0.0345, "step": 118780 }, { "epoch": 1.2691917303274747, "grad_norm": 0.08516869693994522, "learning_rate": 9.983486499934153e-07, "loss": 0.0186, "step": 118790 }, { "epoch": 1.2692985736417544, "grad_norm": 0.12013279646635056, "learning_rate": 9.98347285373466e-07, "loss": 0.036, "step": 118800 }, { "epoch": 1.269405416956034, "grad_norm": 0.718271017074585, "learning_rate": 9.983459201908454e-07, "loss": 0.036, "step": 118810 }, { "epoch": 1.2695122602703135, "grad_norm": 0.6384324431419373, "learning_rate": 9.983445544455542e-07, "loss": 0.0177, "step": 118820 }, { "epoch": 1.2696191035845932, "grad_norm": 0.35704052448272705, "learning_rate": 9.983431881375944e-07, "loss": 0.0098, "step": 118830 }, { "epoch": 1.2697259468988729, "grad_norm": 0.059067126363515854, "learning_rate": 9.983418212669677e-07, "loss": 0.0155, "step": 118840 }, { "epoch": 1.2698327902131523, "grad_norm": 10.769879341125488, "learning_rate": 9.983404538336754e-07, "loss": 0.0525, "step": 118850 }, { "epoch": 1.269939633527432, "grad_norm": 4.788546562194824, "learning_rate": 9.98339085837719e-07, "loss": 0.0349, "step": 118860 }, { "epoch": 1.2700464768417117, "grad_norm": 1.4388295412063599, "learning_rate": 9.983377172791003e-07, "loss": 0.0264, "step": 118870 }, { "epoch": 1.2701533201559911, "grad_norm": 0.8417459726333618, "learning_rate": 9.983363481578208e-07, "loss": 0.1024, "step": 118880 }, { "epoch": 1.2702601634702708, "grad_norm": 0.010928875766694546, "learning_rate": 9.983349784738818e-07, "loss": 0.0183, "step": 118890 }, { "epoch": 1.2703670067845505, "grad_norm": 2.9764294624328613, "learning_rate": 9.983336082272852e-07, "loss": 0.0354, "step": 118900 }, { "epoch": 1.27047385009883, "grad_norm": 2.516838550567627, "learning_rate": 9.983322374180321e-07, "loss": 0.0295, "step": 118910 }, { "epoch": 1.2705806934131096, "grad_norm": 0.6673654913902283, "learning_rate": 9.983308660461243e-07, "loss": 0.0171, "step": 118920 }, { "epoch": 1.2706875367273893, "grad_norm": 0.04140497371554375, "learning_rate": 9.983294941115634e-07, "loss": 0.0175, "step": 118930 }, { "epoch": 1.2707943800416688, "grad_norm": 2.1286990642547607, "learning_rate": 9.98328121614351e-07, "loss": 0.0468, "step": 118940 }, { "epoch": 1.2709012233559485, "grad_norm": 4.334157943725586, "learning_rate": 9.983267485544884e-07, "loss": 0.0152, "step": 118950 }, { "epoch": 1.2710080666702281, "grad_norm": 3.6749978065490723, "learning_rate": 9.983253749319774e-07, "loss": 0.0185, "step": 118960 }, { "epoch": 1.2711149099845076, "grad_norm": 6.268363952636719, "learning_rate": 9.983240007468193e-07, "loss": 0.0279, "step": 118970 }, { "epoch": 1.2712217532987873, "grad_norm": 2.136082649230957, "learning_rate": 9.98322625999016e-07, "loss": 0.0245, "step": 118980 }, { "epoch": 1.271328596613067, "grad_norm": 0.7624320387840271, "learning_rate": 9.983212506885686e-07, "loss": 0.0321, "step": 118990 }, { "epoch": 1.2714354399273464, "grad_norm": 2.1285462379455566, "learning_rate": 9.98319874815479e-07, "loss": 0.0229, "step": 119000 }, { "epoch": 1.271542283241626, "grad_norm": 0.5994706153869629, "learning_rate": 9.983184983797488e-07, "loss": 0.03, "step": 119010 }, { "epoch": 1.2716491265559058, "grad_norm": 7.979199409484863, "learning_rate": 9.983171213813793e-07, "loss": 0.0728, "step": 119020 }, { "epoch": 1.2717559698701852, "grad_norm": 5.069382190704346, "learning_rate": 9.98315743820372e-07, "loss": 0.0635, "step": 119030 }, { "epoch": 1.271862813184465, "grad_norm": 1.6105141639709473, "learning_rate": 9.983143656967288e-07, "loss": 0.0534, "step": 119040 }, { "epoch": 1.2719696564987446, "grad_norm": 10.292892456054688, "learning_rate": 9.98312987010451e-07, "loss": 0.0209, "step": 119050 }, { "epoch": 1.272076499813024, "grad_norm": 0.12757442891597748, "learning_rate": 9.983116077615403e-07, "loss": 0.0275, "step": 119060 }, { "epoch": 1.2721833431273037, "grad_norm": 0.880722165107727, "learning_rate": 9.983102279499982e-07, "loss": 0.0126, "step": 119070 }, { "epoch": 1.2722901864415834, "grad_norm": 0.3576374650001526, "learning_rate": 9.983088475758262e-07, "loss": 0.0452, "step": 119080 }, { "epoch": 1.272397029755863, "grad_norm": 5.039673328399658, "learning_rate": 9.983074666390258e-07, "loss": 0.0093, "step": 119090 }, { "epoch": 1.2725038730701426, "grad_norm": 0.0295645073056221, "learning_rate": 9.983060851395988e-07, "loss": 0.0365, "step": 119100 }, { "epoch": 1.2726107163844222, "grad_norm": 6.13510799407959, "learning_rate": 9.983047030775463e-07, "loss": 0.0188, "step": 119110 }, { "epoch": 1.272717559698702, "grad_norm": 0.01733238250017166, "learning_rate": 9.983033204528706e-07, "loss": 0.0353, "step": 119120 }, { "epoch": 1.2728244030129814, "grad_norm": 0.0656667947769165, "learning_rate": 9.983019372655728e-07, "loss": 0.004, "step": 119130 }, { "epoch": 1.272931246327261, "grad_norm": 3.7950375080108643, "learning_rate": 9.983005535156544e-07, "loss": 0.0153, "step": 119140 }, { "epoch": 1.2730380896415407, "grad_norm": 0.06974347680807114, "learning_rate": 9.98299169203117e-07, "loss": 0.0301, "step": 119150 }, { "epoch": 1.2731449329558202, "grad_norm": 4.4866251945495605, "learning_rate": 9.982977843279623e-07, "loss": 0.0286, "step": 119160 }, { "epoch": 1.2732517762700999, "grad_norm": 1.683944582939148, "learning_rate": 9.982963988901917e-07, "loss": 0.0299, "step": 119170 }, { "epoch": 1.2733586195843796, "grad_norm": 1.3718091249465942, "learning_rate": 9.98295012889807e-07, "loss": 0.0135, "step": 119180 }, { "epoch": 1.2734654628986593, "grad_norm": 0.0329752191901207, "learning_rate": 9.982936263268097e-07, "loss": 0.0091, "step": 119190 }, { "epoch": 1.2735723062129387, "grad_norm": 7.246999740600586, "learning_rate": 9.98292239201201e-07, "loss": 0.0281, "step": 119200 }, { "epoch": 1.2736791495272184, "grad_norm": 0.008995164185762405, "learning_rate": 9.982908515129828e-07, "loss": 0.0487, "step": 119210 }, { "epoch": 1.273785992841498, "grad_norm": 0.020103242248296738, "learning_rate": 9.982894632621567e-07, "loss": 0.0089, "step": 119220 }, { "epoch": 1.2738928361557775, "grad_norm": 3.0121877193450928, "learning_rate": 9.982880744487244e-07, "loss": 0.0201, "step": 119230 }, { "epoch": 1.2739996794700572, "grad_norm": 0.11886586993932724, "learning_rate": 9.98286685072687e-07, "loss": 0.0328, "step": 119240 }, { "epoch": 1.274106522784337, "grad_norm": 4.0856733322143555, "learning_rate": 9.982852951340463e-07, "loss": 0.017, "step": 119250 }, { "epoch": 1.2742133660986164, "grad_norm": 7.0484185218811035, "learning_rate": 9.982839046328042e-07, "loss": 0.0632, "step": 119260 }, { "epoch": 1.274320209412896, "grad_norm": 1.3547662496566772, "learning_rate": 9.982825135689616e-07, "loss": 0.0219, "step": 119270 }, { "epoch": 1.2744270527271757, "grad_norm": 1.2148314714431763, "learning_rate": 9.982811219425207e-07, "loss": 0.0253, "step": 119280 }, { "epoch": 1.2745338960414552, "grad_norm": 5.231990814208984, "learning_rate": 9.982797297534827e-07, "loss": 0.0297, "step": 119290 }, { "epoch": 1.2746407393557349, "grad_norm": 0.6946833729743958, "learning_rate": 9.982783370018492e-07, "loss": 0.0219, "step": 119300 }, { "epoch": 1.2747475826700145, "grad_norm": 0.3233858346939087, "learning_rate": 9.98276943687622e-07, "loss": 0.0441, "step": 119310 }, { "epoch": 1.274854425984294, "grad_norm": 6.076561450958252, "learning_rate": 9.982755498108023e-07, "loss": 0.0765, "step": 119320 }, { "epoch": 1.2749612692985737, "grad_norm": 0.6375188231468201, "learning_rate": 9.982741553713922e-07, "loss": 0.0183, "step": 119330 }, { "epoch": 1.2750681126128534, "grad_norm": 5.799676418304443, "learning_rate": 9.982727603693929e-07, "loss": 0.0666, "step": 119340 }, { "epoch": 1.2751749559271328, "grad_norm": 1.9287890195846558, "learning_rate": 9.982713648048057e-07, "loss": 0.0417, "step": 119350 }, { "epoch": 1.2752817992414125, "grad_norm": 0.01408099103718996, "learning_rate": 9.98269968677633e-07, "loss": 0.0246, "step": 119360 }, { "epoch": 1.2753886425556922, "grad_norm": 0.008156136609613895, "learning_rate": 9.982685719878755e-07, "loss": 0.0456, "step": 119370 }, { "epoch": 1.2754954858699716, "grad_norm": 0.2959686815738678, "learning_rate": 9.982671747355356e-07, "loss": 0.0685, "step": 119380 }, { "epoch": 1.2756023291842513, "grad_norm": 1.6797442436218262, "learning_rate": 9.982657769206143e-07, "loss": 0.0213, "step": 119390 }, { "epoch": 1.275709172498531, "grad_norm": 9.759309768676758, "learning_rate": 9.98264378543113e-07, "loss": 0.0255, "step": 119400 }, { "epoch": 1.2758160158128105, "grad_norm": 0.00431368313729763, "learning_rate": 9.98262979603034e-07, "loss": 0.0273, "step": 119410 }, { "epoch": 1.2759228591270901, "grad_norm": 4.716678619384766, "learning_rate": 9.982615801003783e-07, "loss": 0.0536, "step": 119420 }, { "epoch": 1.2760297024413698, "grad_norm": 0.8328905701637268, "learning_rate": 9.982601800351479e-07, "loss": 0.0544, "step": 119430 }, { "epoch": 1.2761365457556493, "grad_norm": 0.7047789692878723, "learning_rate": 9.982587794073438e-07, "loss": 0.0121, "step": 119440 }, { "epoch": 1.276243389069929, "grad_norm": 0.10944042354822159, "learning_rate": 9.982573782169681e-07, "loss": 0.0253, "step": 119450 }, { "epoch": 1.2763502323842086, "grad_norm": 0.1987617313861847, "learning_rate": 9.982559764640222e-07, "loss": 0.0327, "step": 119460 }, { "epoch": 1.276457075698488, "grad_norm": 4.203894138336182, "learning_rate": 9.982545741485077e-07, "loss": 0.0164, "step": 119470 }, { "epoch": 1.2765639190127678, "grad_norm": 0.0499473437666893, "learning_rate": 9.982531712704264e-07, "loss": 0.0105, "step": 119480 }, { "epoch": 1.2766707623270475, "grad_norm": 9.707757949829102, "learning_rate": 9.982517678297792e-07, "loss": 0.0566, "step": 119490 }, { "epoch": 1.276777605641327, "grad_norm": 6.2016401290893555, "learning_rate": 9.982503638265685e-07, "loss": 0.0638, "step": 119500 }, { "epoch": 1.2768844489556066, "grad_norm": 0.1425178498029709, "learning_rate": 9.982489592607954e-07, "loss": 0.0467, "step": 119510 }, { "epoch": 1.2769912922698863, "grad_norm": 2.0954864025115967, "learning_rate": 9.982475541324617e-07, "loss": 0.047, "step": 119520 }, { "epoch": 1.2770981355841657, "grad_norm": 7.665365695953369, "learning_rate": 9.98246148441569e-07, "loss": 0.0263, "step": 119530 }, { "epoch": 1.2772049788984454, "grad_norm": 0.43682897090911865, "learning_rate": 9.982447421881185e-07, "loss": 0.0349, "step": 119540 }, { "epoch": 1.277311822212725, "grad_norm": 0.029715290293097496, "learning_rate": 9.982433353721122e-07, "loss": 0.0183, "step": 119550 }, { "epoch": 1.2774186655270046, "grad_norm": 1.0958586931228638, "learning_rate": 9.982419279935518e-07, "loss": 0.0452, "step": 119560 }, { "epoch": 1.2775255088412842, "grad_norm": 0.16111910343170166, "learning_rate": 9.982405200524384e-07, "loss": 0.0159, "step": 119570 }, { "epoch": 1.277632352155564, "grad_norm": 3.4154274463653564, "learning_rate": 9.982391115487741e-07, "loss": 0.046, "step": 119580 }, { "epoch": 1.2777391954698434, "grad_norm": 1.047020435333252, "learning_rate": 9.9823770248256e-07, "loss": 0.0498, "step": 119590 }, { "epoch": 1.277846038784123, "grad_norm": 2.683755874633789, "learning_rate": 9.98236292853798e-07, "loss": 0.0322, "step": 119600 }, { "epoch": 1.2779528820984027, "grad_norm": 1.282662272453308, "learning_rate": 9.982348826624896e-07, "loss": 0.0338, "step": 119610 }, { "epoch": 1.2780597254126822, "grad_norm": 1.5371057987213135, "learning_rate": 9.982334719086364e-07, "loss": 0.0126, "step": 119620 }, { "epoch": 1.2781665687269619, "grad_norm": 0.0156573373824358, "learning_rate": 9.9823206059224e-07, "loss": 0.0343, "step": 119630 }, { "epoch": 1.2782734120412416, "grad_norm": 0.45582208037376404, "learning_rate": 9.982306487133021e-07, "loss": 0.0353, "step": 119640 }, { "epoch": 1.278380255355521, "grad_norm": 0.4223668873310089, "learning_rate": 9.98229236271824e-07, "loss": 0.0475, "step": 119650 }, { "epoch": 1.2784870986698007, "grad_norm": 0.2558092772960663, "learning_rate": 9.982278232678077e-07, "loss": 0.0318, "step": 119660 }, { "epoch": 1.2785939419840804, "grad_norm": 0.020099850371479988, "learning_rate": 9.982264097012546e-07, "loss": 0.0162, "step": 119670 }, { "epoch": 1.2787007852983598, "grad_norm": 0.10311976820230484, "learning_rate": 9.982249955721663e-07, "loss": 0.0267, "step": 119680 }, { "epoch": 1.2788076286126395, "grad_norm": 0.00873513799160719, "learning_rate": 9.982235808805441e-07, "loss": 0.0283, "step": 119690 }, { "epoch": 1.2789144719269192, "grad_norm": 0.556735098361969, "learning_rate": 9.9822216562639e-07, "loss": 0.0275, "step": 119700 }, { "epoch": 1.2790213152411987, "grad_norm": 0.1983240395784378, "learning_rate": 9.982207498097055e-07, "loss": 0.0411, "step": 119710 }, { "epoch": 1.2791281585554783, "grad_norm": 1.737196445465088, "learning_rate": 9.982193334304922e-07, "loss": 0.0197, "step": 119720 }, { "epoch": 1.279235001869758, "grad_norm": 5.858084678649902, "learning_rate": 9.982179164887516e-07, "loss": 0.0157, "step": 119730 }, { "epoch": 1.2793418451840375, "grad_norm": 5.283289432525635, "learning_rate": 9.982164989844856e-07, "loss": 0.0507, "step": 119740 }, { "epoch": 1.2794486884983172, "grad_norm": 0.28720182180404663, "learning_rate": 9.982150809176953e-07, "loss": 0.0403, "step": 119750 }, { "epoch": 1.2795555318125968, "grad_norm": 3.0254592895507812, "learning_rate": 9.982136622883828e-07, "loss": 0.0164, "step": 119760 }, { "epoch": 1.2796623751268763, "grad_norm": 0.6927422881126404, "learning_rate": 9.982122430965493e-07, "loss": 0.0617, "step": 119770 }, { "epoch": 1.279769218441156, "grad_norm": 7.305628299713135, "learning_rate": 9.982108233421967e-07, "loss": 0.0055, "step": 119780 }, { "epoch": 1.2798760617554357, "grad_norm": 0.6195551753044128, "learning_rate": 9.982094030253262e-07, "loss": 0.0314, "step": 119790 }, { "epoch": 1.2799829050697151, "grad_norm": 3.1085424423217773, "learning_rate": 9.9820798214594e-07, "loss": 0.0491, "step": 119800 }, { "epoch": 1.2800897483839948, "grad_norm": 0.861640989780426, "learning_rate": 9.982065607040393e-07, "loss": 0.0348, "step": 119810 }, { "epoch": 1.2801965916982745, "grad_norm": 3.592989444732666, "learning_rate": 9.982051386996257e-07, "loss": 0.0293, "step": 119820 }, { "epoch": 1.2803034350125542, "grad_norm": 0.25698956847190857, "learning_rate": 9.98203716132701e-07, "loss": 0.0087, "step": 119830 }, { "epoch": 1.2804102783268336, "grad_norm": 4.446098327636719, "learning_rate": 9.982022930032666e-07, "loss": 0.0579, "step": 119840 }, { "epoch": 1.2805171216411133, "grad_norm": 0.12797917425632477, "learning_rate": 9.982008693113241e-07, "loss": 0.0099, "step": 119850 }, { "epoch": 1.280623964955393, "grad_norm": 0.28865790367126465, "learning_rate": 9.981994450568755e-07, "loss": 0.0274, "step": 119860 }, { "epoch": 1.2807308082696724, "grad_norm": 7.494943141937256, "learning_rate": 9.981980202399221e-07, "loss": 0.0207, "step": 119870 }, { "epoch": 1.2808376515839521, "grad_norm": 8.955950736999512, "learning_rate": 9.981965948604654e-07, "loss": 0.0469, "step": 119880 }, { "epoch": 1.2809444948982318, "grad_norm": 0.19854114949703217, "learning_rate": 9.981951689185071e-07, "loss": 0.0359, "step": 119890 }, { "epoch": 1.2810513382125113, "grad_norm": 4.157070636749268, "learning_rate": 9.98193742414049e-07, "loss": 0.0454, "step": 119900 }, { "epoch": 1.281158181526791, "grad_norm": 4.4470415115356445, "learning_rate": 9.981923153470925e-07, "loss": 0.0241, "step": 119910 }, { "epoch": 1.2812650248410706, "grad_norm": 0.1902763694524765, "learning_rate": 9.981908877176392e-07, "loss": 0.0043, "step": 119920 }, { "epoch": 1.2813718681553503, "grad_norm": 0.12846983969211578, "learning_rate": 9.98189459525691e-07, "loss": 0.0343, "step": 119930 }, { "epoch": 1.2814787114696298, "grad_norm": 0.019579239189624786, "learning_rate": 9.981880307712491e-07, "loss": 0.0229, "step": 119940 }, { "epoch": 1.2815855547839095, "grad_norm": 23.773841857910156, "learning_rate": 9.981866014543155e-07, "loss": 0.017, "step": 119950 }, { "epoch": 1.2816923980981891, "grad_norm": 0.4742473065853119, "learning_rate": 9.981851715748916e-07, "loss": 0.0099, "step": 119960 }, { "epoch": 1.2817992414124686, "grad_norm": 1.5337638854980469, "learning_rate": 9.981837411329787e-07, "loss": 0.0168, "step": 119970 }, { "epoch": 1.2819060847267483, "grad_norm": 5.064723014831543, "learning_rate": 9.981823101285792e-07, "loss": 0.0391, "step": 119980 }, { "epoch": 1.282012928041028, "grad_norm": 4.406334400177002, "learning_rate": 9.981808785616941e-07, "loss": 0.065, "step": 119990 }, { "epoch": 1.2821197713553074, "grad_norm": 3.447453737258911, "learning_rate": 9.981794464323252e-07, "loss": 0.0209, "step": 120000 }, { "epoch": 1.282226614669587, "grad_norm": 0.8972577452659607, "learning_rate": 9.98178013740474e-07, "loss": 0.047, "step": 120010 }, { "epoch": 1.2823334579838668, "grad_norm": 0.3768848776817322, "learning_rate": 9.981765804861423e-07, "loss": 0.0337, "step": 120020 }, { "epoch": 1.2824403012981462, "grad_norm": 0.751261830329895, "learning_rate": 9.981751466693317e-07, "loss": 0.0154, "step": 120030 }, { "epoch": 1.282547144612426, "grad_norm": 7.8294878005981445, "learning_rate": 9.981737122900435e-07, "loss": 0.0794, "step": 120040 }, { "epoch": 1.2826539879267056, "grad_norm": 2.1042263507843018, "learning_rate": 9.9817227734828e-07, "loss": 0.0173, "step": 120050 }, { "epoch": 1.282760831240985, "grad_norm": 5.070968151092529, "learning_rate": 9.98170841844042e-07, "loss": 0.0396, "step": 120060 }, { "epoch": 1.2828676745552647, "grad_norm": 5.16951322555542, "learning_rate": 9.981694057773318e-07, "loss": 0.0277, "step": 120070 }, { "epoch": 1.2829745178695444, "grad_norm": 0.049215126782655716, "learning_rate": 9.981679691481505e-07, "loss": 0.0122, "step": 120080 }, { "epoch": 1.2830813611838239, "grad_norm": 1.3312917947769165, "learning_rate": 9.981665319565002e-07, "loss": 0.0137, "step": 120090 }, { "epoch": 1.2831882044981036, "grad_norm": 0.06868034601211548, "learning_rate": 9.981650942023823e-07, "loss": 0.0276, "step": 120100 }, { "epoch": 1.2832950478123832, "grad_norm": 1.6305599212646484, "learning_rate": 9.981636558857983e-07, "loss": 0.0238, "step": 120110 }, { "epoch": 1.2834018911266627, "grad_norm": 3.409653425216675, "learning_rate": 9.981622170067499e-07, "loss": 0.028, "step": 120120 }, { "epoch": 1.2835087344409424, "grad_norm": 0.35255393385887146, "learning_rate": 9.981607775652388e-07, "loss": 0.0126, "step": 120130 }, { "epoch": 1.283615577755222, "grad_norm": 7.522434711456299, "learning_rate": 9.981593375612665e-07, "loss": 0.0177, "step": 120140 }, { "epoch": 1.2837224210695015, "grad_norm": 4.611089706420898, "learning_rate": 9.981578969948349e-07, "loss": 0.0265, "step": 120150 }, { "epoch": 1.2838292643837812, "grad_norm": 4.548183917999268, "learning_rate": 9.981564558659452e-07, "loss": 0.0437, "step": 120160 }, { "epoch": 1.2839361076980609, "grad_norm": 6.553822994232178, "learning_rate": 9.981550141745996e-07, "loss": 0.0147, "step": 120170 }, { "epoch": 1.2840429510123403, "grad_norm": 2.6579153537750244, "learning_rate": 9.98153571920799e-07, "loss": 0.081, "step": 120180 }, { "epoch": 1.28414979432662, "grad_norm": 0.0066794115118682384, "learning_rate": 9.981521291045458e-07, "loss": 0.0308, "step": 120190 }, { "epoch": 1.2842566376408997, "grad_norm": 0.9711120128631592, "learning_rate": 9.98150685725841e-07, "loss": 0.0202, "step": 120200 }, { "epoch": 1.2843634809551792, "grad_norm": 3.5101757049560547, "learning_rate": 9.981492417846865e-07, "loss": 0.0187, "step": 120210 }, { "epoch": 1.2844703242694588, "grad_norm": 5.070001602172852, "learning_rate": 9.98147797281084e-07, "loss": 0.0396, "step": 120220 }, { "epoch": 1.2845771675837385, "grad_norm": 11.175479888916016, "learning_rate": 9.981463522150348e-07, "loss": 0.0451, "step": 120230 }, { "epoch": 1.284684010898018, "grad_norm": 2.3173916339874268, "learning_rate": 9.98144906586541e-07, "loss": 0.0438, "step": 120240 }, { "epoch": 1.2847908542122977, "grad_norm": 0.9138731360435486, "learning_rate": 9.981434603956041e-07, "loss": 0.0247, "step": 120250 }, { "epoch": 1.2848976975265773, "grad_norm": 4.133002758026123, "learning_rate": 9.981420136422253e-07, "loss": 0.0261, "step": 120260 }, { "epoch": 1.2850045408408568, "grad_norm": 1.7639626264572144, "learning_rate": 9.98140566326407e-07, "loss": 0.0496, "step": 120270 }, { "epoch": 1.2851113841551365, "grad_norm": 8.140937805175781, "learning_rate": 9.9813911844815e-07, "loss": 0.0328, "step": 120280 }, { "epoch": 1.2852182274694162, "grad_norm": 1.290684700012207, "learning_rate": 9.981376700074567e-07, "loss": 0.0299, "step": 120290 }, { "epoch": 1.2853250707836956, "grad_norm": 5.543927192687988, "learning_rate": 9.98136221004328e-07, "loss": 0.0129, "step": 120300 }, { "epoch": 1.2854319140979753, "grad_norm": 0.16497120261192322, "learning_rate": 9.981347714387663e-07, "loss": 0.0334, "step": 120310 }, { "epoch": 1.285538757412255, "grad_norm": 6.154858112335205, "learning_rate": 9.981333213107725e-07, "loss": 0.0103, "step": 120320 }, { "epoch": 1.2856456007265344, "grad_norm": 2.404681444168091, "learning_rate": 9.981318706203487e-07, "loss": 0.0177, "step": 120330 }, { "epoch": 1.2857524440408141, "grad_norm": 0.17270128428936005, "learning_rate": 9.981304193674964e-07, "loss": 0.0435, "step": 120340 }, { "epoch": 1.2858592873550938, "grad_norm": 3.374101400375366, "learning_rate": 9.981289675522174e-07, "loss": 0.0477, "step": 120350 }, { "epoch": 1.2859661306693733, "grad_norm": 0.01502841617912054, "learning_rate": 9.981275151745131e-07, "loss": 0.0849, "step": 120360 }, { "epoch": 1.286072973983653, "grad_norm": 2.586582899093628, "learning_rate": 9.981260622343851e-07, "loss": 0.0311, "step": 120370 }, { "epoch": 1.2861798172979326, "grad_norm": 6.997551441192627, "learning_rate": 9.981246087318355e-07, "loss": 0.0372, "step": 120380 }, { "epoch": 1.286286660612212, "grad_norm": 1.5443459749221802, "learning_rate": 9.981231546668654e-07, "loss": 0.0177, "step": 120390 }, { "epoch": 1.2863935039264918, "grad_norm": 10.309189796447754, "learning_rate": 9.981217000394768e-07, "loss": 0.083, "step": 120400 }, { "epoch": 1.2865003472407714, "grad_norm": 4.2041239738464355, "learning_rate": 9.98120244849671e-07, "loss": 0.0435, "step": 120410 }, { "epoch": 1.286607190555051, "grad_norm": 0.1974356323480606, "learning_rate": 9.9811878909745e-07, "loss": 0.007, "step": 120420 }, { "epoch": 1.2867140338693306, "grad_norm": 3.136817216873169, "learning_rate": 9.981173327828154e-07, "loss": 0.0561, "step": 120430 }, { "epoch": 1.2868208771836103, "grad_norm": 11.86507797241211, "learning_rate": 9.981158759057684e-07, "loss": 0.0483, "step": 120440 }, { "epoch": 1.2869277204978897, "grad_norm": 2.3468668460845947, "learning_rate": 9.981144184663114e-07, "loss": 0.0174, "step": 120450 }, { "epoch": 1.2870345638121694, "grad_norm": 6.723732948303223, "learning_rate": 9.981129604644453e-07, "loss": 0.0693, "step": 120460 }, { "epoch": 1.287141407126449, "grad_norm": 0.2869071066379547, "learning_rate": 9.981115019001721e-07, "loss": 0.0107, "step": 120470 }, { "epoch": 1.2872482504407285, "grad_norm": 4.482511520385742, "learning_rate": 9.981100427734936e-07, "loss": 0.0408, "step": 120480 }, { "epoch": 1.2873550937550082, "grad_norm": 5.726187229156494, "learning_rate": 9.981085830844112e-07, "loss": 0.054, "step": 120490 }, { "epoch": 1.287461937069288, "grad_norm": 0.03926583379507065, "learning_rate": 9.981071228329265e-07, "loss": 0.0212, "step": 120500 }, { "epoch": 1.2875687803835674, "grad_norm": 7.097753047943115, "learning_rate": 9.981056620190413e-07, "loss": 0.0276, "step": 120510 }, { "epoch": 1.287675623697847, "grad_norm": 3.748504400253296, "learning_rate": 9.981042006427574e-07, "loss": 0.048, "step": 120520 }, { "epoch": 1.2877824670121267, "grad_norm": 0.9218388795852661, "learning_rate": 9.98102738704076e-07, "loss": 0.0105, "step": 120530 }, { "epoch": 1.2878893103264062, "grad_norm": 3.143317937850952, "learning_rate": 9.981012762029993e-07, "loss": 0.0419, "step": 120540 }, { "epoch": 1.2879961536406859, "grad_norm": 4.412820816040039, "learning_rate": 9.980998131395284e-07, "loss": 0.0381, "step": 120550 }, { "epoch": 1.2881029969549656, "grad_norm": 6.536865711212158, "learning_rate": 9.980983495136654e-07, "loss": 0.0978, "step": 120560 }, { "epoch": 1.2882098402692452, "grad_norm": 0.07964304089546204, "learning_rate": 9.980968853254117e-07, "loss": 0.0912, "step": 120570 }, { "epoch": 1.2883166835835247, "grad_norm": 0.08908542990684509, "learning_rate": 9.98095420574769e-07, "loss": 0.0182, "step": 120580 }, { "epoch": 1.2884235268978044, "grad_norm": 2.6686184406280518, "learning_rate": 9.98093955261739e-07, "loss": 0.0178, "step": 120590 }, { "epoch": 1.288530370212084, "grad_norm": 2.1891746520996094, "learning_rate": 9.980924893863232e-07, "loss": 0.0311, "step": 120600 }, { "epoch": 1.2886372135263635, "grad_norm": 0.4919038712978363, "learning_rate": 9.980910229485234e-07, "loss": 0.0336, "step": 120610 }, { "epoch": 1.2887440568406432, "grad_norm": 2.0944480895996094, "learning_rate": 9.980895559483416e-07, "loss": 0.0289, "step": 120620 }, { "epoch": 1.2888509001549229, "grad_norm": 0.042165227234363556, "learning_rate": 9.980880883857788e-07, "loss": 0.0172, "step": 120630 }, { "epoch": 1.2889577434692023, "grad_norm": 0.7915531396865845, "learning_rate": 9.98086620260837e-07, "loss": 0.0214, "step": 120640 }, { "epoch": 1.289064586783482, "grad_norm": 0.010723860934376717, "learning_rate": 9.980851515735178e-07, "loss": 0.0279, "step": 120650 }, { "epoch": 1.2891714300977617, "grad_norm": 18.066938400268555, "learning_rate": 9.98083682323823e-07, "loss": 0.0243, "step": 120660 }, { "epoch": 1.2892782734120414, "grad_norm": 0.012340380810201168, "learning_rate": 9.980822125117539e-07, "loss": 0.0383, "step": 120670 }, { "epoch": 1.2893851167263208, "grad_norm": 5.315225601196289, "learning_rate": 9.980807421373126e-07, "loss": 0.0385, "step": 120680 }, { "epoch": 1.2894919600406005, "grad_norm": 10.093560218811035, "learning_rate": 9.980792712005004e-07, "loss": 0.0289, "step": 120690 }, { "epoch": 1.2895988033548802, "grad_norm": 0.17048387229442596, "learning_rate": 9.980777997013192e-07, "loss": 0.0644, "step": 120700 }, { "epoch": 1.2897056466691597, "grad_norm": 0.9713267683982849, "learning_rate": 9.980763276397706e-07, "loss": 0.0363, "step": 120710 }, { "epoch": 1.2898124899834393, "grad_norm": 3.1738507747650146, "learning_rate": 9.980748550158562e-07, "loss": 0.0152, "step": 120720 }, { "epoch": 1.289919333297719, "grad_norm": 0.04447825253009796, "learning_rate": 9.980733818295776e-07, "loss": 0.0157, "step": 120730 }, { "epoch": 1.2900261766119985, "grad_norm": 2.256056547164917, "learning_rate": 9.980719080809367e-07, "loss": 0.1668, "step": 120740 }, { "epoch": 1.2901330199262782, "grad_norm": 3.215468406677246, "learning_rate": 9.980704337699348e-07, "loss": 0.0374, "step": 120750 }, { "epoch": 1.2902398632405578, "grad_norm": 6.539447784423828, "learning_rate": 9.98068958896574e-07, "loss": 0.0434, "step": 120760 }, { "epoch": 1.2903467065548373, "grad_norm": 4.021876811981201, "learning_rate": 9.980674834608557e-07, "loss": 0.07, "step": 120770 }, { "epoch": 1.290453549869117, "grad_norm": 0.6480350494384766, "learning_rate": 9.980660074627816e-07, "loss": 0.0149, "step": 120780 }, { "epoch": 1.2905603931833967, "grad_norm": 1.0114855766296387, "learning_rate": 9.980645309023536e-07, "loss": 0.0183, "step": 120790 }, { "epoch": 1.2906672364976761, "grad_norm": 11.23317813873291, "learning_rate": 9.980630537795728e-07, "loss": 0.0428, "step": 120800 }, { "epoch": 1.2907740798119558, "grad_norm": 7.185416221618652, "learning_rate": 9.980615760944416e-07, "loss": 0.0595, "step": 120810 }, { "epoch": 1.2908809231262355, "grad_norm": 11.36571216583252, "learning_rate": 9.98060097846961e-07, "loss": 0.0597, "step": 120820 }, { "epoch": 1.290987766440515, "grad_norm": 6.039790630340576, "learning_rate": 9.98058619037133e-07, "loss": 0.0596, "step": 120830 }, { "epoch": 1.2910946097547946, "grad_norm": 0.08119199424982071, "learning_rate": 9.980571396649593e-07, "loss": 0.0774, "step": 120840 }, { "epoch": 1.2912014530690743, "grad_norm": 2.2916252613067627, "learning_rate": 9.980556597304414e-07, "loss": 0.0539, "step": 120850 }, { "epoch": 1.2913082963833538, "grad_norm": 2.352419137954712, "learning_rate": 9.980541792335813e-07, "loss": 0.0231, "step": 120860 }, { "epoch": 1.2914151396976334, "grad_norm": 0.4048113226890564, "learning_rate": 9.980526981743801e-07, "loss": 0.0246, "step": 120870 }, { "epoch": 1.2915219830119131, "grad_norm": 3.8677761554718018, "learning_rate": 9.9805121655284e-07, "loss": 0.0451, "step": 120880 }, { "epoch": 1.2916288263261926, "grad_norm": 8.15401840209961, "learning_rate": 9.980497343689625e-07, "loss": 0.0517, "step": 120890 }, { "epoch": 1.2917356696404723, "grad_norm": 3.86822247505188, "learning_rate": 9.980482516227492e-07, "loss": 0.0611, "step": 120900 }, { "epoch": 1.291842512954752, "grad_norm": 0.10685551166534424, "learning_rate": 9.98046768314202e-07, "loss": 0.0405, "step": 120910 }, { "epoch": 1.2919493562690314, "grad_norm": 0.016632674261927605, "learning_rate": 9.980452844433222e-07, "loss": 0.0529, "step": 120920 }, { "epoch": 1.292056199583311, "grad_norm": 4.886689186096191, "learning_rate": 9.98043800010112e-07, "loss": 0.0773, "step": 120930 }, { "epoch": 1.2921630428975908, "grad_norm": 3.3720359802246094, "learning_rate": 9.980423150145724e-07, "loss": 0.0274, "step": 120940 }, { "epoch": 1.2922698862118702, "grad_norm": 0.02209453471004963, "learning_rate": 9.980408294567056e-07, "loss": 0.0032, "step": 120950 }, { "epoch": 1.29237672952615, "grad_norm": 0.020311621949076653, "learning_rate": 9.980393433365132e-07, "loss": 0.0858, "step": 120960 }, { "epoch": 1.2924835728404296, "grad_norm": 1.7687147855758667, "learning_rate": 9.980378566539966e-07, "loss": 0.0569, "step": 120970 }, { "epoch": 1.292590416154709, "grad_norm": 0.24258802831172943, "learning_rate": 9.980363694091575e-07, "loss": 0.0419, "step": 120980 }, { "epoch": 1.2926972594689887, "grad_norm": 13.375439643859863, "learning_rate": 9.980348816019982e-07, "loss": 0.0317, "step": 120990 }, { "epoch": 1.2928041027832684, "grad_norm": 0.022992456331849098, "learning_rate": 9.980333932325196e-07, "loss": 0.0511, "step": 121000 }, { "epoch": 1.2929109460975479, "grad_norm": 0.07663524150848389, "learning_rate": 9.98031904300724e-07, "loss": 0.0227, "step": 121010 }, { "epoch": 1.2930177894118275, "grad_norm": 3.0121817588806152, "learning_rate": 9.980304148066126e-07, "loss": 0.0227, "step": 121020 }, { "epoch": 1.2931246327261072, "grad_norm": 7.044638633728027, "learning_rate": 9.980289247501873e-07, "loss": 0.0315, "step": 121030 }, { "epoch": 1.2932314760403867, "grad_norm": 0.02244192361831665, "learning_rate": 9.980274341314498e-07, "loss": 0.0237, "step": 121040 }, { "epoch": 1.2933383193546664, "grad_norm": 6.185739994049072, "learning_rate": 9.980259429504016e-07, "loss": 0.0234, "step": 121050 }, { "epoch": 1.293445162668946, "grad_norm": 0.10400518029928207, "learning_rate": 9.980244512070446e-07, "loss": 0.0192, "step": 121060 }, { "epoch": 1.2935520059832255, "grad_norm": 1.6151319742202759, "learning_rate": 9.980229589013804e-07, "loss": 0.0525, "step": 121070 }, { "epoch": 1.2936588492975052, "grad_norm": 0.02216324396431446, "learning_rate": 9.980214660334107e-07, "loss": 0.0422, "step": 121080 }, { "epoch": 1.2937656926117849, "grad_norm": 3.7587785720825195, "learning_rate": 9.980199726031373e-07, "loss": 0.037, "step": 121090 }, { "epoch": 1.2938725359260643, "grad_norm": 5.667673587799072, "learning_rate": 9.980184786105616e-07, "loss": 0.0616, "step": 121100 }, { "epoch": 1.293979379240344, "grad_norm": 7.10515832901001, "learning_rate": 9.980169840556856e-07, "loss": 0.0091, "step": 121110 }, { "epoch": 1.2940862225546237, "grad_norm": 0.38672545552253723, "learning_rate": 9.980154889385107e-07, "loss": 0.1009, "step": 121120 }, { "epoch": 1.2941930658689031, "grad_norm": 0.2123354971408844, "learning_rate": 9.980139932590388e-07, "loss": 0.0229, "step": 121130 }, { "epoch": 1.2942999091831828, "grad_norm": 2.957740068435669, "learning_rate": 9.980124970172716e-07, "loss": 0.0282, "step": 121140 }, { "epoch": 1.2944067524974625, "grad_norm": 0.5900014638900757, "learning_rate": 9.980110002132106e-07, "loss": 0.0493, "step": 121150 }, { "epoch": 1.294513595811742, "grad_norm": 3.6046276092529297, "learning_rate": 9.980095028468576e-07, "loss": 0.0318, "step": 121160 }, { "epoch": 1.2946204391260216, "grad_norm": 1.5669827461242676, "learning_rate": 9.980080049182142e-07, "loss": 0.0464, "step": 121170 }, { "epoch": 1.2947272824403013, "grad_norm": 2.417900562286377, "learning_rate": 9.980065064272823e-07, "loss": 0.0252, "step": 121180 }, { "epoch": 1.2948341257545808, "grad_norm": 5.715507984161377, "learning_rate": 9.980050073740634e-07, "loss": 0.0371, "step": 121190 }, { "epoch": 1.2949409690688605, "grad_norm": 6.847652912139893, "learning_rate": 9.980035077585592e-07, "loss": 0.0221, "step": 121200 }, { "epoch": 1.2950478123831402, "grad_norm": 0.9474446177482605, "learning_rate": 9.980020075807715e-07, "loss": 0.006, "step": 121210 }, { "epoch": 1.2951546556974196, "grad_norm": 2.5674750804901123, "learning_rate": 9.98000506840702e-07, "loss": 0.0269, "step": 121220 }, { "epoch": 1.2952614990116993, "grad_norm": 0.35046693682670593, "learning_rate": 9.979990055383524e-07, "loss": 0.0348, "step": 121230 }, { "epoch": 1.295368342325979, "grad_norm": 0.5096801519393921, "learning_rate": 9.979975036737242e-07, "loss": 0.0115, "step": 121240 }, { "epoch": 1.2954751856402584, "grad_norm": 9.200657844543457, "learning_rate": 9.979960012468193e-07, "loss": 0.0562, "step": 121250 }, { "epoch": 1.2955820289545381, "grad_norm": 2.3193790912628174, "learning_rate": 9.979944982576394e-07, "loss": 0.1017, "step": 121260 }, { "epoch": 1.2956888722688178, "grad_norm": 1.5370168685913086, "learning_rate": 9.97992994706186e-07, "loss": 0.0202, "step": 121270 }, { "epoch": 1.2957957155830973, "grad_norm": 3.0173277854919434, "learning_rate": 9.979914905924609e-07, "loss": 0.025, "step": 121280 }, { "epoch": 1.295902558897377, "grad_norm": 3.326446056365967, "learning_rate": 9.97989985916466e-07, "loss": 0.0234, "step": 121290 }, { "epoch": 1.2960094022116566, "grad_norm": 5.686964511871338, "learning_rate": 9.979884806782027e-07, "loss": 0.0498, "step": 121300 }, { "epoch": 1.2961162455259363, "grad_norm": 0.08607221394777298, "learning_rate": 9.979869748776729e-07, "loss": 0.0147, "step": 121310 }, { "epoch": 1.2962230888402158, "grad_norm": 6.48441219329834, "learning_rate": 9.979854685148781e-07, "loss": 0.0455, "step": 121320 }, { "epoch": 1.2963299321544954, "grad_norm": 0.3963397741317749, "learning_rate": 9.979839615898201e-07, "loss": 0.0444, "step": 121330 }, { "epoch": 1.2964367754687751, "grad_norm": 9.267173767089844, "learning_rate": 9.979824541025008e-07, "loss": 0.0329, "step": 121340 }, { "epoch": 1.2965436187830546, "grad_norm": 0.14838159084320068, "learning_rate": 9.979809460529217e-07, "loss": 0.0525, "step": 121350 }, { "epoch": 1.2966504620973343, "grad_norm": 0.0994710847735405, "learning_rate": 9.979794374410844e-07, "loss": 0.0215, "step": 121360 }, { "epoch": 1.296757305411614, "grad_norm": 2.4532647132873535, "learning_rate": 9.979779282669907e-07, "loss": 0.0357, "step": 121370 }, { "epoch": 1.2968641487258934, "grad_norm": 0.024897892028093338, "learning_rate": 9.979764185306424e-07, "loss": 0.0321, "step": 121380 }, { "epoch": 1.296970992040173, "grad_norm": 16.673730850219727, "learning_rate": 9.979749082320411e-07, "loss": 0.0505, "step": 121390 }, { "epoch": 1.2970778353544528, "grad_norm": 1.0012810230255127, "learning_rate": 9.979733973711887e-07, "loss": 0.0124, "step": 121400 }, { "epoch": 1.2971846786687324, "grad_norm": 0.07367192208766937, "learning_rate": 9.979718859480867e-07, "loss": 0.0409, "step": 121410 }, { "epoch": 1.297291521983012, "grad_norm": 0.049093276262283325, "learning_rate": 9.979703739627368e-07, "loss": 0.0197, "step": 121420 }, { "epoch": 1.2973983652972916, "grad_norm": 0.016686460003256798, "learning_rate": 9.979688614151406e-07, "loss": 0.0231, "step": 121430 }, { "epoch": 1.2975052086115713, "grad_norm": 3.2929022312164307, "learning_rate": 9.979673483053003e-07, "loss": 0.0245, "step": 121440 }, { "epoch": 1.2976120519258507, "grad_norm": 1.2403717041015625, "learning_rate": 9.979658346332172e-07, "loss": 0.0373, "step": 121450 }, { "epoch": 1.2977188952401304, "grad_norm": 4.240835666656494, "learning_rate": 9.97964320398893e-07, "loss": 0.0338, "step": 121460 }, { "epoch": 1.29782573855441, "grad_norm": 2.5337648391723633, "learning_rate": 9.979628056023295e-07, "loss": 0.0087, "step": 121470 }, { "epoch": 1.2979325818686895, "grad_norm": 7.4379777908325195, "learning_rate": 9.979612902435283e-07, "loss": 0.04, "step": 121480 }, { "epoch": 1.2980394251829692, "grad_norm": 2.606231451034546, "learning_rate": 9.979597743224915e-07, "loss": 0.0139, "step": 121490 }, { "epoch": 1.298146268497249, "grad_norm": 0.24269594252109528, "learning_rate": 9.979582578392202e-07, "loss": 0.0148, "step": 121500 }, { "epoch": 1.2982531118115284, "grad_norm": 0.6557459235191345, "learning_rate": 9.979567407937167e-07, "loss": 0.012, "step": 121510 }, { "epoch": 1.298359955125808, "grad_norm": 0.22389055788516998, "learning_rate": 9.979552231859825e-07, "loss": 0.032, "step": 121520 }, { "epoch": 1.2984667984400877, "grad_norm": 0.02763204835355282, "learning_rate": 9.97953705016019e-07, "loss": 0.0438, "step": 121530 }, { "epoch": 1.2985736417543672, "grad_norm": 1.966698408126831, "learning_rate": 9.979521862838285e-07, "loss": 0.0179, "step": 121540 }, { "epoch": 1.2986804850686469, "grad_norm": 0.015868956223130226, "learning_rate": 9.979506669894122e-07, "loss": 0.0258, "step": 121550 }, { "epoch": 1.2987873283829265, "grad_norm": 0.81821209192276, "learning_rate": 9.97949147132772e-07, "loss": 0.0271, "step": 121560 }, { "epoch": 1.298894171697206, "grad_norm": 3.7744686603546143, "learning_rate": 9.979476267139096e-07, "loss": 0.0386, "step": 121570 }, { "epoch": 1.2990010150114857, "grad_norm": 3.397484540939331, "learning_rate": 9.97946105732827e-07, "loss": 0.0307, "step": 121580 }, { "epoch": 1.2991078583257654, "grad_norm": 0.8940718770027161, "learning_rate": 9.979445841895253e-07, "loss": 0.0182, "step": 121590 }, { "epoch": 1.2992147016400448, "grad_norm": 4.6554412841796875, "learning_rate": 9.979430620840068e-07, "loss": 0.0663, "step": 121600 }, { "epoch": 1.2993215449543245, "grad_norm": 5.868922710418701, "learning_rate": 9.97941539416273e-07, "loss": 0.0524, "step": 121610 }, { "epoch": 1.2994283882686042, "grad_norm": 0.9205256700515747, "learning_rate": 9.979400161863255e-07, "loss": 0.0369, "step": 121620 }, { "epoch": 1.2995352315828836, "grad_norm": 0.02362748421728611, "learning_rate": 9.979384923941664e-07, "loss": 0.039, "step": 121630 }, { "epoch": 1.2996420748971633, "grad_norm": 12.498374938964844, "learning_rate": 9.97936968039797e-07, "loss": 0.07, "step": 121640 }, { "epoch": 1.299748918211443, "grad_norm": 10.639974594116211, "learning_rate": 9.979354431232193e-07, "loss": 0.0333, "step": 121650 }, { "epoch": 1.2998557615257225, "grad_norm": 1.0023932456970215, "learning_rate": 9.979339176444346e-07, "loss": 0.0306, "step": 121660 }, { "epoch": 1.2999626048400021, "grad_norm": 10.632603645324707, "learning_rate": 9.979323916034452e-07, "loss": 0.029, "step": 121670 }, { "epoch": 1.3000694481542818, "grad_norm": 1.0821607112884521, "learning_rate": 9.979308650002525e-07, "loss": 0.0191, "step": 121680 }, { "epoch": 1.3001762914685613, "grad_norm": 0.9154969453811646, "learning_rate": 9.979293378348581e-07, "loss": 0.0812, "step": 121690 }, { "epoch": 1.300283134782841, "grad_norm": 1.5126873254776, "learning_rate": 9.979278101072641e-07, "loss": 0.0503, "step": 121700 }, { "epoch": 1.3003899780971206, "grad_norm": 2.0663955211639404, "learning_rate": 9.979262818174719e-07, "loss": 0.0419, "step": 121710 }, { "epoch": 1.3004968214114, "grad_norm": 2.9620039463043213, "learning_rate": 9.979247529654835e-07, "loss": 0.0327, "step": 121720 }, { "epoch": 1.3006036647256798, "grad_norm": 2.4126107692718506, "learning_rate": 9.979232235513004e-07, "loss": 0.0267, "step": 121730 }, { "epoch": 1.3007105080399595, "grad_norm": 0.8282005190849304, "learning_rate": 9.979216935749243e-07, "loss": 0.0353, "step": 121740 }, { "epoch": 1.300817351354239, "grad_norm": 4.68536376953125, "learning_rate": 9.979201630363572e-07, "loss": 0.0848, "step": 121750 }, { "epoch": 1.3009241946685186, "grad_norm": 5.563179016113281, "learning_rate": 9.979186319356003e-07, "loss": 0.0188, "step": 121760 }, { "epoch": 1.3010310379827983, "grad_norm": 2.501098155975342, "learning_rate": 9.97917100272656e-07, "loss": 0.0112, "step": 121770 }, { "epoch": 1.3011378812970777, "grad_norm": 1.194654107093811, "learning_rate": 9.979155680475257e-07, "loss": 0.0153, "step": 121780 }, { "epoch": 1.3012447246113574, "grad_norm": 2.4135029315948486, "learning_rate": 9.97914035260211e-07, "loss": 0.0499, "step": 121790 }, { "epoch": 1.301351567925637, "grad_norm": 4.348507404327393, "learning_rate": 9.979125019107139e-07, "loss": 0.0312, "step": 121800 }, { "epoch": 1.3014584112399166, "grad_norm": 0.06765002012252808, "learning_rate": 9.97910967999036e-07, "loss": 0.0111, "step": 121810 }, { "epoch": 1.3015652545541962, "grad_norm": 6.2019853591918945, "learning_rate": 9.979094335251793e-07, "loss": 0.0327, "step": 121820 }, { "epoch": 1.301672097868476, "grad_norm": 8.15261459350586, "learning_rate": 9.979078984891449e-07, "loss": 0.0413, "step": 121830 }, { "epoch": 1.3017789411827554, "grad_norm": 0.8745245337486267, "learning_rate": 9.97906362890935e-07, "loss": 0.0361, "step": 121840 }, { "epoch": 1.301885784497035, "grad_norm": 4.890904426574707, "learning_rate": 9.979048267305512e-07, "loss": 0.0316, "step": 121850 }, { "epoch": 1.3019926278113148, "grad_norm": 1.978804111480713, "learning_rate": 9.979032900079954e-07, "loss": 0.0557, "step": 121860 }, { "epoch": 1.3020994711255942, "grad_norm": 7.947466850280762, "learning_rate": 9.97901752723269e-07, "loss": 0.0568, "step": 121870 }, { "epoch": 1.302206314439874, "grad_norm": 0.3775622248649597, "learning_rate": 9.97900214876374e-07, "loss": 0.0198, "step": 121880 }, { "epoch": 1.3023131577541536, "grad_norm": 0.5454983711242676, "learning_rate": 9.978986764673122e-07, "loss": 0.0271, "step": 121890 }, { "epoch": 1.302420001068433, "grad_norm": 8.472251892089844, "learning_rate": 9.97897137496085e-07, "loss": 0.0141, "step": 121900 }, { "epoch": 1.3025268443827127, "grad_norm": 0.25953438878059387, "learning_rate": 9.978955979626947e-07, "loss": 0.0439, "step": 121910 }, { "epoch": 1.3026336876969924, "grad_norm": 4.06290340423584, "learning_rate": 9.978940578671425e-07, "loss": 0.0219, "step": 121920 }, { "epoch": 1.3027405310112719, "grad_norm": 8.301261901855469, "learning_rate": 9.978925172094305e-07, "loss": 0.0172, "step": 121930 }, { "epoch": 1.3028473743255515, "grad_norm": 6.485494136810303, "learning_rate": 9.9789097598956e-07, "loss": 0.0165, "step": 121940 }, { "epoch": 1.3029542176398312, "grad_norm": 0.10623215138912201, "learning_rate": 9.97889434207533e-07, "loss": 0.011, "step": 121950 }, { "epoch": 1.3030610609541107, "grad_norm": 0.2884265184402466, "learning_rate": 9.978878918633513e-07, "loss": 0.0264, "step": 121960 }, { "epoch": 1.3031679042683904, "grad_norm": 3.097191572189331, "learning_rate": 9.978863489570166e-07, "loss": 0.0459, "step": 121970 }, { "epoch": 1.30327474758267, "grad_norm": 4.732090473175049, "learning_rate": 9.978848054885308e-07, "loss": 0.052, "step": 121980 }, { "epoch": 1.3033815908969495, "grad_norm": 0.7569125890731812, "learning_rate": 9.978832614578954e-07, "loss": 0.0276, "step": 121990 }, { "epoch": 1.3034884342112292, "grad_norm": 6.183706760406494, "learning_rate": 9.978817168651122e-07, "loss": 0.0323, "step": 122000 }, { "epoch": 1.3035952775255089, "grad_norm": 4.380467891693115, "learning_rate": 9.978801717101829e-07, "loss": 0.0429, "step": 122010 }, { "epoch": 1.3037021208397883, "grad_norm": 1.0752779245376587, "learning_rate": 9.978786259931094e-07, "loss": 0.0429, "step": 122020 }, { "epoch": 1.303808964154068, "grad_norm": 0.004095118492841721, "learning_rate": 9.978770797138933e-07, "loss": 0.0331, "step": 122030 }, { "epoch": 1.3039158074683477, "grad_norm": 7.513943195343018, "learning_rate": 9.978755328725362e-07, "loss": 0.0603, "step": 122040 }, { "epoch": 1.3040226507826274, "grad_norm": 3.604839324951172, "learning_rate": 9.978739854690404e-07, "loss": 0.0728, "step": 122050 }, { "epoch": 1.3041294940969068, "grad_norm": 2.357685089111328, "learning_rate": 9.978724375034072e-07, "loss": 0.0357, "step": 122060 }, { "epoch": 1.3042363374111865, "grad_norm": 0.007004762999713421, "learning_rate": 9.978708889756385e-07, "loss": 0.0163, "step": 122070 }, { "epoch": 1.3043431807254662, "grad_norm": 12.653929710388184, "learning_rate": 9.978693398857359e-07, "loss": 0.073, "step": 122080 }, { "epoch": 1.3044500240397456, "grad_norm": 0.05450627952814102, "learning_rate": 9.978677902337011e-07, "loss": 0.0407, "step": 122090 }, { "epoch": 1.3045568673540253, "grad_norm": 0.1069951057434082, "learning_rate": 9.978662400195361e-07, "loss": 0.0556, "step": 122100 }, { "epoch": 1.304663710668305, "grad_norm": 15.24361515045166, "learning_rate": 9.978646892432426e-07, "loss": 0.106, "step": 122110 }, { "epoch": 1.3047705539825845, "grad_norm": 0.020202772691845894, "learning_rate": 9.978631379048222e-07, "loss": 0.0633, "step": 122120 }, { "epoch": 1.3048773972968641, "grad_norm": 0.5676754117012024, "learning_rate": 9.978615860042769e-07, "loss": 0.0244, "step": 122130 }, { "epoch": 1.3049842406111438, "grad_norm": 0.028286008164286613, "learning_rate": 9.97860033541608e-07, "loss": 0.0358, "step": 122140 }, { "epoch": 1.3050910839254235, "grad_norm": 10.324284553527832, "learning_rate": 9.97858480516818e-07, "loss": 0.0076, "step": 122150 }, { "epoch": 1.305197927239703, "grad_norm": 0.31094256043434143, "learning_rate": 9.978569269299077e-07, "loss": 0.0402, "step": 122160 }, { "epoch": 1.3053047705539826, "grad_norm": 4.996932029724121, "learning_rate": 9.978553727808796e-07, "loss": 0.0242, "step": 122170 }, { "epoch": 1.3054116138682623, "grad_norm": 0.3541963994503021, "learning_rate": 9.978538180697354e-07, "loss": 0.0549, "step": 122180 }, { "epoch": 1.3055184571825418, "grad_norm": 10.288252830505371, "learning_rate": 9.978522627964764e-07, "loss": 0.0433, "step": 122190 }, { "epoch": 1.3056253004968215, "grad_norm": 6.392190933227539, "learning_rate": 9.978507069611047e-07, "loss": 0.0329, "step": 122200 }, { "epoch": 1.3057321438111011, "grad_norm": 14.506113052368164, "learning_rate": 9.978491505636219e-07, "loss": 0.027, "step": 122210 }, { "epoch": 1.3058389871253806, "grad_norm": 8.89051628112793, "learning_rate": 9.978475936040297e-07, "loss": 0.1168, "step": 122220 }, { "epoch": 1.3059458304396603, "grad_norm": 3.7715463638305664, "learning_rate": 9.978460360823305e-07, "loss": 0.0397, "step": 122230 }, { "epoch": 1.30605267375394, "grad_norm": 5.5520339012146, "learning_rate": 9.97844477998525e-07, "loss": 0.0485, "step": 122240 }, { "epoch": 1.3061595170682194, "grad_norm": 1.9254788160324097, "learning_rate": 9.97842919352616e-07, "loss": 0.0292, "step": 122250 }, { "epoch": 1.306266360382499, "grad_norm": 15.128854751586914, "learning_rate": 9.978413601446043e-07, "loss": 0.1047, "step": 122260 }, { "epoch": 1.3063732036967788, "grad_norm": 1.8695605993270874, "learning_rate": 9.978398003744923e-07, "loss": 0.0224, "step": 122270 }, { "epoch": 1.3064800470110582, "grad_norm": 0.006725084502249956, "learning_rate": 9.978382400422818e-07, "loss": 0.1514, "step": 122280 }, { "epoch": 1.306586890325338, "grad_norm": 0.5753654837608337, "learning_rate": 9.978366791479743e-07, "loss": 0.0461, "step": 122290 }, { "epoch": 1.3066937336396176, "grad_norm": 3.279477119445801, "learning_rate": 9.978351176915713e-07, "loss": 0.0672, "step": 122300 }, { "epoch": 1.306800576953897, "grad_norm": 1.9433326721191406, "learning_rate": 9.97833555673075e-07, "loss": 0.0383, "step": 122310 }, { "epoch": 1.3069074202681767, "grad_norm": 1.2285596132278442, "learning_rate": 9.978319930924872e-07, "loss": 0.0557, "step": 122320 }, { "epoch": 1.3070142635824564, "grad_norm": 1.807022213935852, "learning_rate": 9.978304299498095e-07, "loss": 0.0092, "step": 122330 }, { "epoch": 1.3071211068967359, "grad_norm": 5.123398303985596, "learning_rate": 9.978288662450436e-07, "loss": 0.0558, "step": 122340 }, { "epoch": 1.3072279502110156, "grad_norm": 16.011289596557617, "learning_rate": 9.978273019781914e-07, "loss": 0.0369, "step": 122350 }, { "epoch": 1.3073347935252952, "grad_norm": 3.8182334899902344, "learning_rate": 9.978257371492545e-07, "loss": 0.0377, "step": 122360 }, { "epoch": 1.3074416368395747, "grad_norm": 3.126786231994629, "learning_rate": 9.978241717582348e-07, "loss": 0.0228, "step": 122370 }, { "epoch": 1.3075484801538544, "grad_norm": 2.398452043533325, "learning_rate": 9.97822605805134e-07, "loss": 0.017, "step": 122380 }, { "epoch": 1.307655323468134, "grad_norm": 0.18073202669620514, "learning_rate": 9.97821039289954e-07, "loss": 0.0056, "step": 122390 }, { "epoch": 1.3077621667824135, "grad_norm": 7.777932643890381, "learning_rate": 9.978194722126964e-07, "loss": 0.065, "step": 122400 }, { "epoch": 1.3078690100966932, "grad_norm": 0.19897659122943878, "learning_rate": 9.97817904573363e-07, "loss": 0.0367, "step": 122410 }, { "epoch": 1.3079758534109729, "grad_norm": 1.1375865936279297, "learning_rate": 9.978163363719559e-07, "loss": 0.0173, "step": 122420 }, { "epoch": 1.3080826967252523, "grad_norm": 1.796196699142456, "learning_rate": 9.978147676084762e-07, "loss": 0.0472, "step": 122430 }, { "epoch": 1.308189540039532, "grad_norm": 0.23415111005306244, "learning_rate": 9.978131982829264e-07, "loss": 0.023, "step": 122440 }, { "epoch": 1.3082963833538117, "grad_norm": 9.176459312438965, "learning_rate": 9.978116283953078e-07, "loss": 0.0876, "step": 122450 }, { "epoch": 1.3084032266680912, "grad_norm": 5.676078796386719, "learning_rate": 9.97810057945622e-07, "loss": 0.0248, "step": 122460 }, { "epoch": 1.3085100699823708, "grad_norm": 9.6764497756958, "learning_rate": 9.978084869338715e-07, "loss": 0.0243, "step": 122470 }, { "epoch": 1.3086169132966505, "grad_norm": 1.7827290296554565, "learning_rate": 9.978069153600574e-07, "loss": 0.0231, "step": 122480 }, { "epoch": 1.30872375661093, "grad_norm": 1.9553101062774658, "learning_rate": 9.978053432241819e-07, "loss": 0.0491, "step": 122490 }, { "epoch": 1.3088305999252097, "grad_norm": 7.913287162780762, "learning_rate": 9.978037705262464e-07, "loss": 0.0188, "step": 122500 }, { "epoch": 1.3089374432394894, "grad_norm": 0.014520665630698204, "learning_rate": 9.97802197266253e-07, "loss": 0.0346, "step": 122510 }, { "epoch": 1.3090442865537688, "grad_norm": 0.041234783828258514, "learning_rate": 9.978006234442034e-07, "loss": 0.022, "step": 122520 }, { "epoch": 1.3091511298680485, "grad_norm": 4.237362861633301, "learning_rate": 9.977990490600992e-07, "loss": 0.038, "step": 122530 }, { "epoch": 1.3092579731823282, "grad_norm": 0.19853678345680237, "learning_rate": 9.977974741139424e-07, "loss": 0.0399, "step": 122540 }, { "epoch": 1.3093648164966076, "grad_norm": 0.5427231192588806, "learning_rate": 9.977958986057346e-07, "loss": 0.0923, "step": 122550 }, { "epoch": 1.3094716598108873, "grad_norm": 2.8566813468933105, "learning_rate": 9.977943225354777e-07, "loss": 0.0932, "step": 122560 }, { "epoch": 1.309578503125167, "grad_norm": 7.586403846740723, "learning_rate": 9.977927459031735e-07, "loss": 0.0445, "step": 122570 }, { "epoch": 1.3096853464394465, "grad_norm": 0.663565456867218, "learning_rate": 9.977911687088236e-07, "loss": 0.0213, "step": 122580 }, { "epoch": 1.3097921897537261, "grad_norm": 0.05631064996123314, "learning_rate": 9.977895909524298e-07, "loss": 0.0145, "step": 122590 }, { "epoch": 1.3098990330680058, "grad_norm": 0.1261119246482849, "learning_rate": 9.977880126339943e-07, "loss": 0.0028, "step": 122600 }, { "epoch": 1.3100058763822853, "grad_norm": 8.923165321350098, "learning_rate": 9.977864337535184e-07, "loss": 0.0697, "step": 122610 }, { "epoch": 1.310112719696565, "grad_norm": 8.990571022033691, "learning_rate": 9.97784854311004e-07, "loss": 0.005, "step": 122620 }, { "epoch": 1.3102195630108446, "grad_norm": 3.9802753925323486, "learning_rate": 9.97783274306453e-07, "loss": 0.0221, "step": 122630 }, { "epoch": 1.310326406325124, "grad_norm": 0.1287509948015213, "learning_rate": 9.977816937398672e-07, "loss": 0.0689, "step": 122640 }, { "epoch": 1.3104332496394038, "grad_norm": 9.959409713745117, "learning_rate": 9.97780112611248e-07, "loss": 0.0185, "step": 122650 }, { "epoch": 1.3105400929536835, "grad_norm": Infinity, "learning_rate": 9.977785309205978e-07, "loss": 0.0459, "step": 122660 }, { "epoch": 1.310646936267963, "grad_norm": 4.331108570098877, "learning_rate": 9.97776948667918e-07, "loss": 0.0721, "step": 122670 }, { "epoch": 1.3107537795822426, "grad_norm": 7.627871990203857, "learning_rate": 9.977753658532103e-07, "loss": 0.0988, "step": 122680 }, { "epoch": 1.3108606228965223, "grad_norm": 3.0640814304351807, "learning_rate": 9.977737824764768e-07, "loss": 0.0247, "step": 122690 }, { "epoch": 1.3109674662108017, "grad_norm": 1.7165725231170654, "learning_rate": 9.97772198537719e-07, "loss": 0.0325, "step": 122700 }, { "epoch": 1.3110743095250814, "grad_norm": 5.138030529022217, "learning_rate": 9.977706140369388e-07, "loss": 0.0284, "step": 122710 }, { "epoch": 1.311181152839361, "grad_norm": 0.004375457763671875, "learning_rate": 9.97769028974138e-07, "loss": 0.0265, "step": 122720 }, { "epoch": 1.3112879961536406, "grad_norm": 0.0908205583691597, "learning_rate": 9.977674433493187e-07, "loss": 0.0271, "step": 122730 }, { "epoch": 1.3113948394679202, "grad_norm": 0.834260106086731, "learning_rate": 9.97765857162482e-07, "loss": 0.0383, "step": 122740 }, { "epoch": 1.3115016827822, "grad_norm": 0.16369841992855072, "learning_rate": 9.977642704136303e-07, "loss": 0.0118, "step": 122750 }, { "epoch": 1.3116085260964794, "grad_norm": 0.010663231834769249, "learning_rate": 9.977626831027653e-07, "loss": 0.0334, "step": 122760 }, { "epoch": 1.311715369410759, "grad_norm": 7.135370254516602, "learning_rate": 9.977610952298884e-07, "loss": 0.0638, "step": 122770 }, { "epoch": 1.3118222127250387, "grad_norm": 2.024958848953247, "learning_rate": 9.977595067950017e-07, "loss": 0.0682, "step": 122780 }, { "epoch": 1.3119290560393184, "grad_norm": 4.465658187866211, "learning_rate": 9.97757917798107e-07, "loss": 0.0377, "step": 122790 }, { "epoch": 1.3120358993535979, "grad_norm": 0.0903279259800911, "learning_rate": 9.97756328239206e-07, "loss": 0.0433, "step": 122800 }, { "epoch": 1.3121427426678776, "grad_norm": 12.080885887145996, "learning_rate": 9.977547381183008e-07, "loss": 0.0368, "step": 122810 }, { "epoch": 1.3122495859821572, "grad_norm": 1.893186092376709, "learning_rate": 9.977531474353925e-07, "loss": 0.1493, "step": 122820 }, { "epoch": 1.3123564292964367, "grad_norm": 4.362864017486572, "learning_rate": 9.977515561904836e-07, "loss": 0.0782, "step": 122830 }, { "epoch": 1.3124632726107164, "grad_norm": 0.01071291696280241, "learning_rate": 9.977499643835757e-07, "loss": 0.0054, "step": 122840 }, { "epoch": 1.312570115924996, "grad_norm": 1.2252590656280518, "learning_rate": 9.977483720146703e-07, "loss": 0.0257, "step": 122850 }, { "epoch": 1.3126769592392755, "grad_norm": 0.7802160382270813, "learning_rate": 9.977467790837694e-07, "loss": 0.0225, "step": 122860 }, { "epoch": 1.3127838025535552, "grad_norm": 0.03333219885826111, "learning_rate": 9.97745185590875e-07, "loss": 0.0316, "step": 122870 }, { "epoch": 1.3128906458678349, "grad_norm": 0.18275415897369385, "learning_rate": 9.977435915359887e-07, "loss": 0.0344, "step": 122880 }, { "epoch": 1.3129974891821146, "grad_norm": 0.0428207628428936, "learning_rate": 9.977419969191121e-07, "loss": 0.0538, "step": 122890 }, { "epoch": 1.313104332496394, "grad_norm": 1.0381081104278564, "learning_rate": 9.977404017402476e-07, "loss": 0.013, "step": 122900 }, { "epoch": 1.3132111758106737, "grad_norm": 0.2735476791858673, "learning_rate": 9.977388059993962e-07, "loss": 0.092, "step": 122910 }, { "epoch": 1.3133180191249534, "grad_norm": 0.21038411557674408, "learning_rate": 9.977372096965605e-07, "loss": 0.0292, "step": 122920 }, { "epoch": 1.3134248624392328, "grad_norm": 8.836225509643555, "learning_rate": 9.977356128317417e-07, "loss": 0.0303, "step": 122930 }, { "epoch": 1.3135317057535125, "grad_norm": 0.25689631700515747, "learning_rate": 9.97734015404942e-07, "loss": 0.0063, "step": 122940 }, { "epoch": 1.3136385490677922, "grad_norm": 3.7086076736450195, "learning_rate": 9.977324174161627e-07, "loss": 0.036, "step": 122950 }, { "epoch": 1.3137453923820717, "grad_norm": 5.752975940704346, "learning_rate": 9.977308188654063e-07, "loss": 0.0433, "step": 122960 }, { "epoch": 1.3138522356963513, "grad_norm": 5.0506439208984375, "learning_rate": 9.977292197526741e-07, "loss": 0.0394, "step": 122970 }, { "epoch": 1.313959079010631, "grad_norm": 5.120874404907227, "learning_rate": 9.97727620077968e-07, "loss": 0.0296, "step": 122980 }, { "epoch": 1.3140659223249105, "grad_norm": 3.543405294418335, "learning_rate": 9.977260198412899e-07, "loss": 0.0718, "step": 122990 }, { "epoch": 1.3141727656391902, "grad_norm": 5.954185962677002, "learning_rate": 9.977244190426415e-07, "loss": 0.0875, "step": 123000 }, { "epoch": 1.3142796089534698, "grad_norm": 3.2205007076263428, "learning_rate": 9.977228176820246e-07, "loss": 0.0261, "step": 123010 }, { "epoch": 1.3143864522677493, "grad_norm": 6.348262786865234, "learning_rate": 9.977212157594412e-07, "loss": 0.0259, "step": 123020 }, { "epoch": 1.314493295582029, "grad_norm": 8.811372756958008, "learning_rate": 9.97719613274893e-07, "loss": 0.0633, "step": 123030 }, { "epoch": 1.3146001388963087, "grad_norm": 3.2580509185791016, "learning_rate": 9.977180102283818e-07, "loss": 0.0276, "step": 123040 }, { "epoch": 1.3147069822105881, "grad_norm": 6.218178749084473, "learning_rate": 9.977164066199093e-07, "loss": 0.0317, "step": 123050 }, { "epoch": 1.3148138255248678, "grad_norm": 12.59870433807373, "learning_rate": 9.977148024494775e-07, "loss": 0.0651, "step": 123060 }, { "epoch": 1.3149206688391475, "grad_norm": 0.5918291211128235, "learning_rate": 9.977131977170883e-07, "loss": 0.0246, "step": 123070 }, { "epoch": 1.315027512153427, "grad_norm": 2.5550875663757324, "learning_rate": 9.97711592422743e-07, "loss": 0.024, "step": 123080 }, { "epoch": 1.3151343554677066, "grad_norm": 0.0312428567558527, "learning_rate": 9.97709986566444e-07, "loss": 0.0476, "step": 123090 }, { "epoch": 1.3152411987819863, "grad_norm": 2.0406429767608643, "learning_rate": 9.977083801481928e-07, "loss": 0.0264, "step": 123100 }, { "epoch": 1.3153480420962658, "grad_norm": 5.399865627288818, "learning_rate": 9.97706773167991e-07, "loss": 0.0762, "step": 123110 }, { "epoch": 1.3154548854105454, "grad_norm": 0.07742650806903839, "learning_rate": 9.977051656258409e-07, "loss": 0.0119, "step": 123120 }, { "epoch": 1.3155617287248251, "grad_norm": 5.047858715057373, "learning_rate": 9.977035575217442e-07, "loss": 0.0377, "step": 123130 }, { "epoch": 1.3156685720391046, "grad_norm": 5.151331901550293, "learning_rate": 9.977019488557026e-07, "loss": 0.0627, "step": 123140 }, { "epoch": 1.3157754153533843, "grad_norm": 0.765444278717041, "learning_rate": 9.977003396277178e-07, "loss": 0.0142, "step": 123150 }, { "epoch": 1.315882258667664, "grad_norm": 2.6994242668151855, "learning_rate": 9.976987298377918e-07, "loss": 0.0161, "step": 123160 }, { "epoch": 1.3159891019819434, "grad_norm": 3.4680991172790527, "learning_rate": 9.976971194859263e-07, "loss": 0.0215, "step": 123170 }, { "epoch": 1.316095945296223, "grad_norm": 0.10001812130212784, "learning_rate": 9.976955085721233e-07, "loss": 0.0068, "step": 123180 }, { "epoch": 1.3162027886105028, "grad_norm": 0.03119116835296154, "learning_rate": 9.976938970963847e-07, "loss": 0.0367, "step": 123190 }, { "epoch": 1.3163096319247822, "grad_norm": 4.174775123596191, "learning_rate": 9.976922850587117e-07, "loss": 0.0882, "step": 123200 }, { "epoch": 1.316416475239062, "grad_norm": 5.104227066040039, "learning_rate": 9.97690672459107e-07, "loss": 0.0178, "step": 123210 }, { "epoch": 1.3165233185533416, "grad_norm": 5.705658912658691, "learning_rate": 9.976890592975716e-07, "loss": 0.0884, "step": 123220 }, { "epoch": 1.316630161867621, "grad_norm": 1.2881908416748047, "learning_rate": 9.976874455741078e-07, "loss": 0.0996, "step": 123230 }, { "epoch": 1.3167370051819007, "grad_norm": 3.73728346824646, "learning_rate": 9.976858312887173e-07, "loss": 0.0372, "step": 123240 }, { "epoch": 1.3168438484961804, "grad_norm": 3.9015591144561768, "learning_rate": 9.976842164414021e-07, "loss": 0.0339, "step": 123250 }, { "epoch": 1.3169506918104599, "grad_norm": 4.7553391456604, "learning_rate": 9.976826010321636e-07, "loss": 0.0872, "step": 123260 }, { "epoch": 1.3170575351247396, "grad_norm": 8.486129760742188, "learning_rate": 9.97680985061004e-07, "loss": 0.0266, "step": 123270 }, { "epoch": 1.3171643784390192, "grad_norm": 2.4865708351135254, "learning_rate": 9.97679368527925e-07, "loss": 0.0351, "step": 123280 }, { "epoch": 1.3172712217532987, "grad_norm": 3.0501980781555176, "learning_rate": 9.976777514329286e-07, "loss": 0.0187, "step": 123290 }, { "epoch": 1.3173780650675784, "grad_norm": 0.8716176748275757, "learning_rate": 9.976761337760161e-07, "loss": 0.0204, "step": 123300 }, { "epoch": 1.317484908381858, "grad_norm": 0.6804483532905579, "learning_rate": 9.9767451555719e-07, "loss": 0.0715, "step": 123310 }, { "epoch": 1.3175917516961375, "grad_norm": 0.1462075114250183, "learning_rate": 9.976728967764517e-07, "loss": 0.0329, "step": 123320 }, { "epoch": 1.3176985950104172, "grad_norm": 4.642945289611816, "learning_rate": 9.976712774338031e-07, "loss": 0.028, "step": 123330 }, { "epoch": 1.3178054383246969, "grad_norm": 0.024479996412992477, "learning_rate": 9.976696575292462e-07, "loss": 0.0116, "step": 123340 }, { "epoch": 1.3179122816389763, "grad_norm": 6.288623809814453, "learning_rate": 9.976680370627825e-07, "loss": 0.0381, "step": 123350 }, { "epoch": 1.318019124953256, "grad_norm": 0.5208103656768799, "learning_rate": 9.976664160344142e-07, "loss": 0.0041, "step": 123360 }, { "epoch": 1.3181259682675357, "grad_norm": 6.467147350311279, "learning_rate": 9.97664794444143e-07, "loss": 0.018, "step": 123370 }, { "epoch": 1.3182328115818152, "grad_norm": 0.018694132566452026, "learning_rate": 9.976631722919704e-07, "loss": 0.0349, "step": 123380 }, { "epoch": 1.3183396548960948, "grad_norm": 4.340697765350342, "learning_rate": 9.976615495778988e-07, "loss": 0.0212, "step": 123390 }, { "epoch": 1.3184464982103745, "grad_norm": 7.101429462432861, "learning_rate": 9.976599263019297e-07, "loss": 0.0404, "step": 123400 }, { "epoch": 1.318553341524654, "grad_norm": 1.4878005981445312, "learning_rate": 9.976583024640647e-07, "loss": 0.0097, "step": 123410 }, { "epoch": 1.3186601848389337, "grad_norm": 4.972605228424072, "learning_rate": 9.976566780643063e-07, "loss": 0.0351, "step": 123420 }, { "epoch": 1.3187670281532133, "grad_norm": 1.8474547863006592, "learning_rate": 9.976550531026558e-07, "loss": 0.0125, "step": 123430 }, { "epoch": 1.3188738714674928, "grad_norm": 2.4168996810913086, "learning_rate": 9.97653427579115e-07, "loss": 0.0411, "step": 123440 }, { "epoch": 1.3189807147817725, "grad_norm": 2.022925615310669, "learning_rate": 9.976518014936861e-07, "loss": 0.0078, "step": 123450 }, { "epoch": 1.3190875580960522, "grad_norm": 0.08682886511087418, "learning_rate": 9.976501748463709e-07, "loss": 0.0128, "step": 123460 }, { "epoch": 1.3191944014103316, "grad_norm": 1.3036271333694458, "learning_rate": 9.976485476371708e-07, "loss": 0.0302, "step": 123470 }, { "epoch": 1.3193012447246113, "grad_norm": 0.9643674492835999, "learning_rate": 9.976469198660883e-07, "loss": 0.0374, "step": 123480 }, { "epoch": 1.319408088038891, "grad_norm": 2.981491804122925, "learning_rate": 9.976452915331245e-07, "loss": 0.0267, "step": 123490 }, { "epoch": 1.3195149313531704, "grad_norm": 2.8285269737243652, "learning_rate": 9.976436626382817e-07, "loss": 0.0213, "step": 123500 }, { "epoch": 1.3196217746674501, "grad_norm": 1.2152822017669678, "learning_rate": 9.976420331815616e-07, "loss": 0.0766, "step": 123510 }, { "epoch": 1.3197286179817298, "grad_norm": 10.37248420715332, "learning_rate": 9.976404031629663e-07, "loss": 0.0385, "step": 123520 }, { "epoch": 1.3198354612960095, "grad_norm": 5.080264568328857, "learning_rate": 9.976387725824974e-07, "loss": 0.0139, "step": 123530 }, { "epoch": 1.319942304610289, "grad_norm": 1.6916979551315308, "learning_rate": 9.976371414401566e-07, "loss": 0.0376, "step": 123540 }, { "epoch": 1.3200491479245686, "grad_norm": 0.08250070363283157, "learning_rate": 9.97635509735946e-07, "loss": 0.0222, "step": 123550 }, { "epoch": 1.3201559912388483, "grad_norm": 3.1163694858551025, "learning_rate": 9.976338774698673e-07, "loss": 0.0894, "step": 123560 }, { "epoch": 1.3202628345531278, "grad_norm": 1.0841035842895508, "learning_rate": 9.976322446419225e-07, "loss": 0.0312, "step": 123570 }, { "epoch": 1.3203696778674074, "grad_norm": 0.25705963373184204, "learning_rate": 9.976306112521132e-07, "loss": 0.0206, "step": 123580 }, { "epoch": 1.3204765211816871, "grad_norm": 6.46041202545166, "learning_rate": 9.976289773004413e-07, "loss": 0.0216, "step": 123590 }, { "epoch": 1.3205833644959666, "grad_norm": 0.26752805709838867, "learning_rate": 9.976273427869091e-07, "loss": 0.0433, "step": 123600 }, { "epoch": 1.3206902078102463, "grad_norm": 3.017686605453491, "learning_rate": 9.976257077115178e-07, "loss": 0.0359, "step": 123610 }, { "epoch": 1.320797051124526, "grad_norm": 2.628154754638672, "learning_rate": 9.976240720742694e-07, "loss": 0.0238, "step": 123620 }, { "epoch": 1.3209038944388056, "grad_norm": 2.405346155166626, "learning_rate": 9.976224358751662e-07, "loss": 0.0294, "step": 123630 }, { "epoch": 1.321010737753085, "grad_norm": 4.102355003356934, "learning_rate": 9.976207991142094e-07, "loss": 0.0766, "step": 123640 }, { "epoch": 1.3211175810673648, "grad_norm": 0.02486184425652027, "learning_rate": 9.976191617914014e-07, "loss": 0.0482, "step": 123650 }, { "epoch": 1.3212244243816444, "grad_norm": 0.37978842854499817, "learning_rate": 9.976175239067436e-07, "loss": 0.0979, "step": 123660 }, { "epoch": 1.321331267695924, "grad_norm": 3.672731876373291, "learning_rate": 9.976158854602384e-07, "loss": 0.0333, "step": 123670 }, { "epoch": 1.3214381110102036, "grad_norm": 0.14186963438987732, "learning_rate": 9.97614246451887e-07, "loss": 0.0644, "step": 123680 }, { "epoch": 1.3215449543244833, "grad_norm": 0.16703233122825623, "learning_rate": 9.976126068816916e-07, "loss": 0.0357, "step": 123690 }, { "epoch": 1.3216517976387627, "grad_norm": 2.3164615631103516, "learning_rate": 9.97610966749654e-07, "loss": 0.0346, "step": 123700 }, { "epoch": 1.3217586409530424, "grad_norm": 2.912465810775757, "learning_rate": 9.976093260557762e-07, "loss": 0.0345, "step": 123710 }, { "epoch": 1.321865484267322, "grad_norm": 8.795928001403809, "learning_rate": 9.976076848000598e-07, "loss": 0.0346, "step": 123720 }, { "epoch": 1.3219723275816015, "grad_norm": 5.404806613922119, "learning_rate": 9.976060429825069e-07, "loss": 0.0426, "step": 123730 }, { "epoch": 1.3220791708958812, "grad_norm": 0.011589059606194496, "learning_rate": 9.97604400603119e-07, "loss": 0.0287, "step": 123740 }, { "epoch": 1.322186014210161, "grad_norm": 3.535916566848755, "learning_rate": 9.976027576618984e-07, "loss": 0.015, "step": 123750 }, { "epoch": 1.3222928575244404, "grad_norm": 0.5547580718994141, "learning_rate": 9.976011141588464e-07, "loss": 0.0074, "step": 123760 }, { "epoch": 1.32239970083872, "grad_norm": 1.3989046812057495, "learning_rate": 9.975994700939656e-07, "loss": 0.04, "step": 123770 }, { "epoch": 1.3225065441529997, "grad_norm": 0.05855011194944382, "learning_rate": 9.975978254672573e-07, "loss": 0.0219, "step": 123780 }, { "epoch": 1.3226133874672792, "grad_norm": 2.3663809299468994, "learning_rate": 9.975961802787233e-07, "loss": 0.0722, "step": 123790 }, { "epoch": 1.3227202307815589, "grad_norm": 0.02813311293721199, "learning_rate": 9.975945345283659e-07, "loss": 0.0473, "step": 123800 }, { "epoch": 1.3228270740958386, "grad_norm": 0.1298571079969406, "learning_rate": 9.975928882161865e-07, "loss": 0.0211, "step": 123810 }, { "epoch": 1.322933917410118, "grad_norm": 0.46969330310821533, "learning_rate": 9.975912413421874e-07, "loss": 0.0332, "step": 123820 }, { "epoch": 1.3230407607243977, "grad_norm": 2.6346943378448486, "learning_rate": 9.975895939063702e-07, "loss": 0.0316, "step": 123830 }, { "epoch": 1.3231476040386774, "grad_norm": 0.03313940763473511, "learning_rate": 9.975879459087365e-07, "loss": 0.0275, "step": 123840 }, { "epoch": 1.3232544473529568, "grad_norm": 4.595343112945557, "learning_rate": 9.975862973492888e-07, "loss": 0.0501, "step": 123850 }, { "epoch": 1.3233612906672365, "grad_norm": 0.06971845030784607, "learning_rate": 9.975846482280287e-07, "loss": 0.0336, "step": 123860 }, { "epoch": 1.3234681339815162, "grad_norm": 2.818204164505005, "learning_rate": 9.975829985449576e-07, "loss": 0.0638, "step": 123870 }, { "epoch": 1.3235749772957957, "grad_norm": 0.49225640296936035, "learning_rate": 9.975813483000778e-07, "loss": 0.0413, "step": 123880 }, { "epoch": 1.3236818206100753, "grad_norm": 0.07892092317342758, "learning_rate": 9.975796974933913e-07, "loss": 0.0398, "step": 123890 }, { "epoch": 1.323788663924355, "grad_norm": 0.5515316128730774, "learning_rate": 9.975780461248996e-07, "loss": 0.0285, "step": 123900 }, { "epoch": 1.3238955072386345, "grad_norm": 2.68148136138916, "learning_rate": 9.975763941946049e-07, "loss": 0.0648, "step": 123910 }, { "epoch": 1.3240023505529142, "grad_norm": 11.354708671569824, "learning_rate": 9.97574741702509e-07, "loss": 0.0614, "step": 123920 }, { "epoch": 1.3241091938671938, "grad_norm": 0.11411004513502121, "learning_rate": 9.975730886486133e-07, "loss": 0.0046, "step": 123930 }, { "epoch": 1.3242160371814733, "grad_norm": 3.029799222946167, "learning_rate": 9.975714350329202e-07, "loss": 0.0209, "step": 123940 }, { "epoch": 1.324322880495753, "grad_norm": 0.8261741399765015, "learning_rate": 9.975697808554313e-07, "loss": 0.0159, "step": 123950 }, { "epoch": 1.3244297238100327, "grad_norm": 0.006511021871119738, "learning_rate": 9.975681261161487e-07, "loss": 0.0439, "step": 123960 }, { "epoch": 1.3245365671243121, "grad_norm": 0.013161026872694492, "learning_rate": 9.97566470815074e-07, "loss": 0.0131, "step": 123970 }, { "epoch": 1.3246434104385918, "grad_norm": 0.03090500272810459, "learning_rate": 9.975648149522095e-07, "loss": 0.0228, "step": 123980 }, { "epoch": 1.3247502537528715, "grad_norm": 2.124201536178589, "learning_rate": 9.975631585275566e-07, "loss": 0.0088, "step": 123990 }, { "epoch": 1.324857097067151, "grad_norm": 3.7817938327789307, "learning_rate": 9.975615015411172e-07, "loss": 0.0865, "step": 124000 }, { "epoch": 1.3249639403814306, "grad_norm": 5.479426860809326, "learning_rate": 9.975598439928934e-07, "loss": 0.0365, "step": 124010 }, { "epoch": 1.3250707836957103, "grad_norm": 4.406449794769287, "learning_rate": 9.97558185882887e-07, "loss": 0.0186, "step": 124020 }, { "epoch": 1.3251776270099898, "grad_norm": 7.818766117095947, "learning_rate": 9.975565272111e-07, "loss": 0.0387, "step": 124030 }, { "epoch": 1.3252844703242694, "grad_norm": 1.2893673181533813, "learning_rate": 9.975548679775339e-07, "loss": 0.0474, "step": 124040 }, { "epoch": 1.3253913136385491, "grad_norm": 4.545450210571289, "learning_rate": 9.97553208182191e-07, "loss": 0.0275, "step": 124050 }, { "epoch": 1.3254981569528286, "grad_norm": 1.1102805137634277, "learning_rate": 9.975515478250729e-07, "loss": 0.073, "step": 124060 }, { "epoch": 1.3256050002671083, "grad_norm": 0.15494565665721893, "learning_rate": 9.975498869061814e-07, "loss": 0.0581, "step": 124070 }, { "epoch": 1.325711843581388, "grad_norm": 0.2059209644794464, "learning_rate": 9.975482254255188e-07, "loss": 0.0422, "step": 124080 }, { "epoch": 1.3258186868956674, "grad_norm": 4.468349933624268, "learning_rate": 9.975465633830863e-07, "loss": 0.0319, "step": 124090 }, { "epoch": 1.325925530209947, "grad_norm": 1.545790672302246, "learning_rate": 9.975449007788866e-07, "loss": 0.0294, "step": 124100 }, { "epoch": 1.3260323735242268, "grad_norm": 3.58709979057312, "learning_rate": 9.975432376129209e-07, "loss": 0.0205, "step": 124110 }, { "epoch": 1.3261392168385062, "grad_norm": 7.018456935882568, "learning_rate": 9.975415738851915e-07, "loss": 0.0448, "step": 124120 }, { "epoch": 1.326246060152786, "grad_norm": 7.36060094833374, "learning_rate": 9.975399095957e-07, "loss": 0.0184, "step": 124130 }, { "epoch": 1.3263529034670656, "grad_norm": 4.942087650299072, "learning_rate": 9.975382447444484e-07, "loss": 0.0123, "step": 124140 }, { "epoch": 1.326459746781345, "grad_norm": 11.6210355758667, "learning_rate": 9.975365793314385e-07, "loss": 0.0741, "step": 124150 }, { "epoch": 1.3265665900956247, "grad_norm": 8.666376113891602, "learning_rate": 9.975349133566723e-07, "loss": 0.0499, "step": 124160 }, { "epoch": 1.3266734334099044, "grad_norm": 0.13825510442256927, "learning_rate": 9.975332468201518e-07, "loss": 0.0271, "step": 124170 }, { "epoch": 1.3267802767241839, "grad_norm": 3.2651615142822266, "learning_rate": 9.975315797218786e-07, "loss": 0.0751, "step": 124180 }, { "epoch": 1.3268871200384635, "grad_norm": 3.1164379119873047, "learning_rate": 9.975299120618546e-07, "loss": 0.0399, "step": 124190 }, { "epoch": 1.3269939633527432, "grad_norm": 5.210720062255859, "learning_rate": 9.97528243840082e-07, "loss": 0.0742, "step": 124200 }, { "epoch": 1.3271008066670227, "grad_norm": 0.059658098965883255, "learning_rate": 9.975265750565622e-07, "loss": 0.0309, "step": 124210 }, { "epoch": 1.3272076499813024, "grad_norm": 3.4354207515716553, "learning_rate": 9.975249057112976e-07, "loss": 0.0397, "step": 124220 }, { "epoch": 1.327314493295582, "grad_norm": 0.42434564232826233, "learning_rate": 9.975232358042896e-07, "loss": 0.0297, "step": 124230 }, { "epoch": 1.3274213366098615, "grad_norm": 0.5768553614616394, "learning_rate": 9.975215653355404e-07, "loss": 0.015, "step": 124240 }, { "epoch": 1.3275281799241412, "grad_norm": 0.3588613271713257, "learning_rate": 9.975198943050519e-07, "loss": 0.0187, "step": 124250 }, { "epoch": 1.3276350232384209, "grad_norm": 1.4660242795944214, "learning_rate": 9.975182227128259e-07, "loss": 0.0504, "step": 124260 }, { "epoch": 1.3277418665527005, "grad_norm": 9.08954906463623, "learning_rate": 9.975165505588642e-07, "loss": 0.007, "step": 124270 }, { "epoch": 1.32784870986698, "grad_norm": 0.05403734743595123, "learning_rate": 9.975148778431686e-07, "loss": 0.0404, "step": 124280 }, { "epoch": 1.3279555531812597, "grad_norm": 0.23178543150424957, "learning_rate": 9.975132045657414e-07, "loss": 0.0084, "step": 124290 }, { "epoch": 1.3280623964955394, "grad_norm": 0.4962833821773529, "learning_rate": 9.97511530726584e-07, "loss": 0.035, "step": 124300 }, { "epoch": 1.3281692398098188, "grad_norm": 2.2511565685272217, "learning_rate": 9.975098563256987e-07, "loss": 0.0168, "step": 124310 }, { "epoch": 1.3282760831240985, "grad_norm": 6.7561116218566895, "learning_rate": 9.975081813630871e-07, "loss": 0.0295, "step": 124320 }, { "epoch": 1.3283829264383782, "grad_norm": 0.19344887137413025, "learning_rate": 9.975065058387515e-07, "loss": 0.0594, "step": 124330 }, { "epoch": 1.3284897697526576, "grad_norm": 0.059114765375852585, "learning_rate": 9.975048297526934e-07, "loss": 0.036, "step": 124340 }, { "epoch": 1.3285966130669373, "grad_norm": 5.100479602813721, "learning_rate": 9.975031531049146e-07, "loss": 0.0082, "step": 124350 }, { "epoch": 1.328703456381217, "grad_norm": 0.5768778920173645, "learning_rate": 9.975014758954175e-07, "loss": 0.0453, "step": 124360 }, { "epoch": 1.3288102996954967, "grad_norm": 0.12234325706958771, "learning_rate": 9.974997981242035e-07, "loss": 0.0395, "step": 124370 }, { "epoch": 1.3289171430097761, "grad_norm": 4.277231216430664, "learning_rate": 9.974981197912747e-07, "loss": 0.1345, "step": 124380 }, { "epoch": 1.3290239863240558, "grad_norm": 0.8142647743225098, "learning_rate": 9.974964408966328e-07, "loss": 0.0466, "step": 124390 }, { "epoch": 1.3291308296383355, "grad_norm": 0.06283195316791534, "learning_rate": 9.9749476144028e-07, "loss": 0.0287, "step": 124400 }, { "epoch": 1.329237672952615, "grad_norm": 5.3582892417907715, "learning_rate": 9.974930814222182e-07, "loss": 0.0129, "step": 124410 }, { "epoch": 1.3293445162668946, "grad_norm": 0.9322021007537842, "learning_rate": 9.974914008424491e-07, "loss": 0.0131, "step": 124420 }, { "epoch": 1.3294513595811743, "grad_norm": 6.081556797027588, "learning_rate": 9.974897197009747e-07, "loss": 0.0331, "step": 124430 }, { "epoch": 1.3295582028954538, "grad_norm": 1.9450775384902954, "learning_rate": 9.974880379977967e-07, "loss": 0.0137, "step": 124440 }, { "epoch": 1.3296650462097335, "grad_norm": 0.03509066253900528, "learning_rate": 9.974863557329172e-07, "loss": 0.0151, "step": 124450 }, { "epoch": 1.3297718895240132, "grad_norm": 0.1041502133011818, "learning_rate": 9.97484672906338e-07, "loss": 0.0504, "step": 124460 }, { "epoch": 1.3298787328382926, "grad_norm": 2.1692967414855957, "learning_rate": 9.974829895180612e-07, "loss": 0.0441, "step": 124470 }, { "epoch": 1.3299855761525723, "grad_norm": 4.467302322387695, "learning_rate": 9.974813055680887e-07, "loss": 0.0168, "step": 124480 }, { "epoch": 1.330092419466852, "grad_norm": 3.612459897994995, "learning_rate": 9.97479621056422e-07, "loss": 0.0182, "step": 124490 }, { "epoch": 1.3301992627811314, "grad_norm": 5.2318243980407715, "learning_rate": 9.974779359830631e-07, "loss": 0.0154, "step": 124500 }, { "epoch": 1.3303061060954111, "grad_norm": 0.11823596060276031, "learning_rate": 9.974762503480145e-07, "loss": 0.0159, "step": 124510 }, { "epoch": 1.3304129494096908, "grad_norm": 5.158235549926758, "learning_rate": 9.974745641512774e-07, "loss": 0.0199, "step": 124520 }, { "epoch": 1.3305197927239703, "grad_norm": 2.129523277282715, "learning_rate": 9.97472877392854e-07, "loss": 0.0514, "step": 124530 }, { "epoch": 1.33062663603825, "grad_norm": 6.267894744873047, "learning_rate": 9.974711900727462e-07, "loss": 0.0429, "step": 124540 }, { "epoch": 1.3307334793525296, "grad_norm": 0.6378181576728821, "learning_rate": 9.974695021909558e-07, "loss": 0.0157, "step": 124550 }, { "epoch": 1.330840322666809, "grad_norm": 1.1225768327713013, "learning_rate": 9.97467813747485e-07, "loss": 0.0543, "step": 124560 }, { "epoch": 1.3309471659810888, "grad_norm": 0.0374177061021328, "learning_rate": 9.974661247423354e-07, "loss": 0.047, "step": 124570 }, { "epoch": 1.3310540092953684, "grad_norm": 11.145922660827637, "learning_rate": 9.97464435175509e-07, "loss": 0.02, "step": 124580 }, { "epoch": 1.331160852609648, "grad_norm": 0.3949090242385864, "learning_rate": 9.974627450470076e-07, "loss": 0.0295, "step": 124590 }, { "epoch": 1.3312676959239276, "grad_norm": 1.5637109279632568, "learning_rate": 9.974610543568333e-07, "loss": 0.0206, "step": 124600 }, { "epoch": 1.3313745392382073, "grad_norm": 1.6606175899505615, "learning_rate": 9.974593631049879e-07, "loss": 0.0507, "step": 124610 }, { "epoch": 1.3314813825524867, "grad_norm": 1.3801802396774292, "learning_rate": 9.974576712914734e-07, "loss": 0.034, "step": 124620 }, { "epoch": 1.3315882258667664, "grad_norm": 3.4481611251831055, "learning_rate": 9.974559789162916e-07, "loss": 0.0242, "step": 124630 }, { "epoch": 1.331695069181046, "grad_norm": 6.495700359344482, "learning_rate": 9.974542859794444e-07, "loss": 0.0326, "step": 124640 }, { "epoch": 1.3318019124953255, "grad_norm": 4.61728048324585, "learning_rate": 9.974525924809338e-07, "loss": 0.0295, "step": 124650 }, { "epoch": 1.3319087558096052, "grad_norm": 5.252134799957275, "learning_rate": 9.974508984207617e-07, "loss": 0.0296, "step": 124660 }, { "epoch": 1.332015599123885, "grad_norm": 0.011946829035878181, "learning_rate": 9.9744920379893e-07, "loss": 0.0992, "step": 124670 }, { "epoch": 1.3321224424381644, "grad_norm": 0.03364281728863716, "learning_rate": 9.974475086154405e-07, "loss": 0.0116, "step": 124680 }, { "epoch": 1.332229285752444, "grad_norm": 0.15089142322540283, "learning_rate": 9.974458128702954e-07, "loss": 0.0417, "step": 124690 }, { "epoch": 1.3323361290667237, "grad_norm": 0.061794713139534, "learning_rate": 9.974441165634964e-07, "loss": 0.0033, "step": 124700 }, { "epoch": 1.3324429723810032, "grad_norm": 0.014062296599149704, "learning_rate": 9.974424196950454e-07, "loss": 0.025, "step": 124710 }, { "epoch": 1.3325498156952829, "grad_norm": 0.09000047296285629, "learning_rate": 9.974407222649444e-07, "loss": 0.0187, "step": 124720 }, { "epoch": 1.3326566590095625, "grad_norm": 0.028728755190968513, "learning_rate": 9.97439024273195e-07, "loss": 0.0146, "step": 124730 }, { "epoch": 1.332763502323842, "grad_norm": 0.3420213758945465, "learning_rate": 9.974373257197999e-07, "loss": 0.0349, "step": 124740 }, { "epoch": 1.3328703456381217, "grad_norm": 4.187201976776123, "learning_rate": 9.974356266047602e-07, "loss": 0.0419, "step": 124750 }, { "epoch": 1.3329771889524014, "grad_norm": 11.176776885986328, "learning_rate": 9.974339269280781e-07, "loss": 0.0246, "step": 124760 }, { "epoch": 1.3330840322666808, "grad_norm": 0.16285762190818787, "learning_rate": 9.974322266897556e-07, "loss": 0.0339, "step": 124770 }, { "epoch": 1.3331908755809605, "grad_norm": 6.2726569175720215, "learning_rate": 9.974305258897946e-07, "loss": 0.0259, "step": 124780 }, { "epoch": 1.3332977188952402, "grad_norm": 1.3500285148620605, "learning_rate": 9.974288245281972e-07, "loss": 0.0436, "step": 124790 }, { "epoch": 1.3334045622095196, "grad_norm": 1.2116148471832275, "learning_rate": 9.974271226049648e-07, "loss": 0.0531, "step": 124800 }, { "epoch": 1.3335114055237993, "grad_norm": 3.7286202907562256, "learning_rate": 9.974254201200998e-07, "loss": 0.0536, "step": 124810 }, { "epoch": 1.333618248838079, "grad_norm": 3.274684429168701, "learning_rate": 9.97423717073604e-07, "loss": 0.0182, "step": 124820 }, { "epoch": 1.3337250921523585, "grad_norm": 0.07900889217853546, "learning_rate": 9.974220134654792e-07, "loss": 0.0424, "step": 124830 }, { "epoch": 1.3338319354666381, "grad_norm": 2.973989963531494, "learning_rate": 9.974203092957272e-07, "loss": 0.0348, "step": 124840 }, { "epoch": 1.3339387787809178, "grad_norm": 0.29824045300483704, "learning_rate": 9.974186045643504e-07, "loss": 0.0776, "step": 124850 }, { "epoch": 1.3340456220951973, "grad_norm": 6.1612653732299805, "learning_rate": 9.974168992713504e-07, "loss": 0.1069, "step": 124860 }, { "epoch": 1.334152465409477, "grad_norm": 0.03015177510678768, "learning_rate": 9.974151934167291e-07, "loss": 0.0351, "step": 124870 }, { "epoch": 1.3342593087237566, "grad_norm": 5.394766807556152, "learning_rate": 9.974134870004886e-07, "loss": 0.0123, "step": 124880 }, { "epoch": 1.334366152038036, "grad_norm": 0.031841013580560684, "learning_rate": 9.974117800226307e-07, "loss": 0.0399, "step": 124890 }, { "epoch": 1.3344729953523158, "grad_norm": 0.03260663524270058, "learning_rate": 9.974100724831574e-07, "loss": 0.0223, "step": 124900 }, { "epoch": 1.3345798386665955, "grad_norm": 3.774104118347168, "learning_rate": 9.974083643820705e-07, "loss": 0.0168, "step": 124910 }, { "epoch": 1.334686681980875, "grad_norm": 1.4845476150512695, "learning_rate": 9.97406655719372e-07, "loss": 0.033, "step": 124920 }, { "epoch": 1.3347935252951546, "grad_norm": 4.708570957183838, "learning_rate": 9.97404946495064e-07, "loss": 0.0304, "step": 124930 }, { "epoch": 1.3349003686094343, "grad_norm": 1.6197055578231812, "learning_rate": 9.974032367091483e-07, "loss": 0.0435, "step": 124940 }, { "epoch": 1.3350072119237137, "grad_norm": 8.041731834411621, "learning_rate": 9.974015263616267e-07, "loss": 0.046, "step": 124950 }, { "epoch": 1.3351140552379934, "grad_norm": 2.5938918590545654, "learning_rate": 9.973998154525011e-07, "loss": 0.0349, "step": 124960 }, { "epoch": 1.335220898552273, "grad_norm": 4.662011623382568, "learning_rate": 9.973981039817736e-07, "loss": 0.0243, "step": 124970 }, { "epoch": 1.3353277418665526, "grad_norm": 6.024604797363281, "learning_rate": 9.973963919494463e-07, "loss": 0.023, "step": 124980 }, { "epoch": 1.3354345851808322, "grad_norm": 0.3983713984489441, "learning_rate": 9.973946793555207e-07, "loss": 0.0255, "step": 124990 }, { "epoch": 1.335541428495112, "grad_norm": 5.037012100219727, "learning_rate": 9.973929661999992e-07, "loss": 0.0461, "step": 125000 }, { "epoch": 1.3356482718093916, "grad_norm": 0.07399050146341324, "learning_rate": 9.973912524828833e-07, "loss": 0.0184, "step": 125010 }, { "epoch": 1.335755115123671, "grad_norm": 0.0308621134608984, "learning_rate": 9.973895382041753e-07, "loss": 0.0258, "step": 125020 }, { "epoch": 1.3358619584379507, "grad_norm": 4.827659606933594, "learning_rate": 9.97387823363877e-07, "loss": 0.0274, "step": 125030 }, { "epoch": 1.3359688017522304, "grad_norm": 0.3110105097293854, "learning_rate": 9.9738610796199e-07, "loss": 0.0248, "step": 125040 }, { "epoch": 1.3360756450665099, "grad_norm": 3.416069269180298, "learning_rate": 9.97384391998517e-07, "loss": 0.0223, "step": 125050 }, { "epoch": 1.3361824883807896, "grad_norm": 1.4212688207626343, "learning_rate": 9.973826754734591e-07, "loss": 0.012, "step": 125060 }, { "epoch": 1.3362893316950692, "grad_norm": 3.5120253562927246, "learning_rate": 9.973809583868187e-07, "loss": 0.044, "step": 125070 }, { "epoch": 1.3363961750093487, "grad_norm": 3.2691404819488525, "learning_rate": 9.973792407385977e-07, "loss": 0.0493, "step": 125080 }, { "epoch": 1.3365030183236284, "grad_norm": 5.5492472648620605, "learning_rate": 9.973775225287982e-07, "loss": 0.0131, "step": 125090 }, { "epoch": 1.336609861637908, "grad_norm": 2.2678334712982178, "learning_rate": 9.973758037574215e-07, "loss": 0.017, "step": 125100 }, { "epoch": 1.3367167049521878, "grad_norm": 5.528703212738037, "learning_rate": 9.973740844244704e-07, "loss": 0.0205, "step": 125110 }, { "epoch": 1.3368235482664672, "grad_norm": 3.5876479148864746, "learning_rate": 9.973723645299463e-07, "loss": 0.0206, "step": 125120 }, { "epoch": 1.336930391580747, "grad_norm": 3.8568239212036133, "learning_rate": 9.97370644073851e-07, "loss": 0.0315, "step": 125130 }, { "epoch": 1.3370372348950266, "grad_norm": 1.0496944189071655, "learning_rate": 9.97368923056187e-07, "loss": 0.0164, "step": 125140 }, { "epoch": 1.337144078209306, "grad_norm": 0.09845743328332901, "learning_rate": 9.973672014769559e-07, "loss": 0.0442, "step": 125150 }, { "epoch": 1.3372509215235857, "grad_norm": 2.0756921768188477, "learning_rate": 9.973654793361596e-07, "loss": 0.0297, "step": 125160 }, { "epoch": 1.3373577648378654, "grad_norm": 0.5441238880157471, "learning_rate": 9.973637566338002e-07, "loss": 0.0329, "step": 125170 }, { "epoch": 1.3374646081521449, "grad_norm": 1.3941998481750488, "learning_rate": 9.973620333698796e-07, "loss": 0.0217, "step": 125180 }, { "epoch": 1.3375714514664245, "grad_norm": 0.10271936655044556, "learning_rate": 9.973603095443996e-07, "loss": 0.0193, "step": 125190 }, { "epoch": 1.3376782947807042, "grad_norm": 8.30262279510498, "learning_rate": 9.973585851573624e-07, "loss": 0.0176, "step": 125200 }, { "epoch": 1.3377851380949837, "grad_norm": 0.6916621923446655, "learning_rate": 9.973568602087699e-07, "loss": 0.0308, "step": 125210 }, { "epoch": 1.3378919814092634, "grad_norm": 0.028121421113610268, "learning_rate": 9.973551346986237e-07, "loss": 0.0944, "step": 125220 }, { "epoch": 1.337998824723543, "grad_norm": 0.09519893676042557, "learning_rate": 9.973534086269263e-07, "loss": 0.0408, "step": 125230 }, { "epoch": 1.3381056680378225, "grad_norm": 10.533495903015137, "learning_rate": 9.973516819936793e-07, "loss": 0.1168, "step": 125240 }, { "epoch": 1.3382125113521022, "grad_norm": 6.62091064453125, "learning_rate": 9.973499547988845e-07, "loss": 0.0481, "step": 125250 }, { "epoch": 1.3383193546663819, "grad_norm": 0.5535851120948792, "learning_rate": 9.973482270425444e-07, "loss": 0.0113, "step": 125260 }, { "epoch": 1.3384261979806613, "grad_norm": 0.017886744812130928, "learning_rate": 9.973464987246603e-07, "loss": 0.0454, "step": 125270 }, { "epoch": 1.338533041294941, "grad_norm": 11.620018005371094, "learning_rate": 9.973447698452345e-07, "loss": 0.0282, "step": 125280 }, { "epoch": 1.3386398846092207, "grad_norm": 0.39023563265800476, "learning_rate": 9.973430404042692e-07, "loss": 0.0467, "step": 125290 }, { "epoch": 1.3387467279235001, "grad_norm": 0.21389088034629822, "learning_rate": 9.973413104017658e-07, "loss": 0.0074, "step": 125300 }, { "epoch": 1.3388535712377798, "grad_norm": 2.497918128967285, "learning_rate": 9.973395798377267e-07, "loss": 0.029, "step": 125310 }, { "epoch": 1.3389604145520595, "grad_norm": 1.5425124168395996, "learning_rate": 9.973378487121536e-07, "loss": 0.049, "step": 125320 }, { "epoch": 1.339067257866339, "grad_norm": 6.276723384857178, "learning_rate": 9.973361170250485e-07, "loss": 0.0363, "step": 125330 }, { "epoch": 1.3391741011806186, "grad_norm": 0.6221535801887512, "learning_rate": 9.973343847764134e-07, "loss": 0.0247, "step": 125340 }, { "epoch": 1.3392809444948983, "grad_norm": 3.225911855697632, "learning_rate": 9.973326519662502e-07, "loss": 0.0253, "step": 125350 }, { "epoch": 1.3393877878091778, "grad_norm": 0.08914726972579956, "learning_rate": 9.973309185945611e-07, "loss": 0.0125, "step": 125360 }, { "epoch": 1.3394946311234575, "grad_norm": 0.6329046487808228, "learning_rate": 9.973291846613476e-07, "loss": 0.0159, "step": 125370 }, { "epoch": 1.3396014744377371, "grad_norm": 8.094673156738281, "learning_rate": 9.97327450166612e-07, "loss": 0.0054, "step": 125380 }, { "epoch": 1.3397083177520166, "grad_norm": 11.134871482849121, "learning_rate": 9.973257151103563e-07, "loss": 0.0361, "step": 125390 }, { "epoch": 1.3398151610662963, "grad_norm": 0.24978429079055786, "learning_rate": 9.97323979492582e-07, "loss": 0.0206, "step": 125400 }, { "epoch": 1.339922004380576, "grad_norm": 0.1250302642583847, "learning_rate": 9.97322243313292e-07, "loss": 0.0252, "step": 125410 }, { "epoch": 1.3400288476948554, "grad_norm": 6.086526870727539, "learning_rate": 9.973205065724871e-07, "loss": 0.0718, "step": 125420 }, { "epoch": 1.340135691009135, "grad_norm": 6.231841087341309, "learning_rate": 9.9731876927017e-07, "loss": 0.047, "step": 125430 }, { "epoch": 1.3402425343234148, "grad_norm": 6.21798038482666, "learning_rate": 9.973170314063425e-07, "loss": 0.0399, "step": 125440 }, { "epoch": 1.3403493776376942, "grad_norm": 3.975649118423462, "learning_rate": 9.973152929810064e-07, "loss": 0.0448, "step": 125450 }, { "epoch": 1.340456220951974, "grad_norm": 18.734254837036133, "learning_rate": 9.97313553994164e-07, "loss": 0.0626, "step": 125460 }, { "epoch": 1.3405630642662536, "grad_norm": 0.1906733363866806, "learning_rate": 9.97311814445817e-07, "loss": 0.0066, "step": 125470 }, { "epoch": 1.340669907580533, "grad_norm": 0.11917294561862946, "learning_rate": 9.973100743359674e-07, "loss": 0.0107, "step": 125480 }, { "epoch": 1.3407767508948127, "grad_norm": 2.0019967555999756, "learning_rate": 9.97308333664617e-07, "loss": 0.0461, "step": 125490 }, { "epoch": 1.3408835942090924, "grad_norm": 0.0329340398311615, "learning_rate": 9.973065924317682e-07, "loss": 0.0172, "step": 125500 }, { "epoch": 1.3409904375233719, "grad_norm": 7.675172805786133, "learning_rate": 9.973048506374228e-07, "loss": 0.009, "step": 125510 }, { "epoch": 1.3410972808376516, "grad_norm": 0.9701703190803528, "learning_rate": 9.973031082815826e-07, "loss": 0.0235, "step": 125520 }, { "epoch": 1.3412041241519312, "grad_norm": 0.0068502044305205345, "learning_rate": 9.973013653642495e-07, "loss": 0.0111, "step": 125530 }, { "epoch": 1.3413109674662107, "grad_norm": 0.062408436089754105, "learning_rate": 9.972996218854257e-07, "loss": 0.03, "step": 125540 }, { "epoch": 1.3414178107804904, "grad_norm": 0.011310182511806488, "learning_rate": 9.972978778451134e-07, "loss": 0.0084, "step": 125550 }, { "epoch": 1.34152465409477, "grad_norm": 0.13697803020477295, "learning_rate": 9.972961332433138e-07, "loss": 0.0292, "step": 125560 }, { "epoch": 1.3416314974090495, "grad_norm": 1.1084262132644653, "learning_rate": 9.972943880800295e-07, "loss": 0.0312, "step": 125570 }, { "epoch": 1.3417383407233292, "grad_norm": 0.06711190193891525, "learning_rate": 9.972926423552625e-07, "loss": 0.0312, "step": 125580 }, { "epoch": 1.3418451840376089, "grad_norm": 4.2969794273376465, "learning_rate": 9.972908960690143e-07, "loss": 0.0325, "step": 125590 }, { "epoch": 1.3419520273518883, "grad_norm": 4.383225917816162, "learning_rate": 9.972891492212875e-07, "loss": 0.0487, "step": 125600 }, { "epoch": 1.342058870666168, "grad_norm": 2.757937431335449, "learning_rate": 9.972874018120834e-07, "loss": 0.0311, "step": 125610 }, { "epoch": 1.3421657139804477, "grad_norm": 3.6882476806640625, "learning_rate": 9.972856538414043e-07, "loss": 0.0597, "step": 125620 }, { "epoch": 1.3422725572947272, "grad_norm": 0.05525622516870499, "learning_rate": 9.972839053092523e-07, "loss": 0.0559, "step": 125630 }, { "epoch": 1.3423794006090068, "grad_norm": 5.491852283477783, "learning_rate": 9.97282156215629e-07, "loss": 0.0551, "step": 125640 }, { "epoch": 1.3424862439232865, "grad_norm": 0.03988909721374512, "learning_rate": 9.97280406560537e-07, "loss": 0.0317, "step": 125650 }, { "epoch": 1.342593087237566, "grad_norm": 3.230156183242798, "learning_rate": 9.972786563439776e-07, "loss": 0.021, "step": 125660 }, { "epoch": 1.3426999305518457, "grad_norm": 0.06781479716300964, "learning_rate": 9.97276905565953e-07, "loss": 0.0611, "step": 125670 }, { "epoch": 1.3428067738661253, "grad_norm": 1.4930469989776611, "learning_rate": 9.972751542264655e-07, "loss": 0.0827, "step": 125680 }, { "epoch": 1.3429136171804048, "grad_norm": 0.037611089646816254, "learning_rate": 9.972734023255167e-07, "loss": 0.0211, "step": 125690 }, { "epoch": 1.3430204604946845, "grad_norm": 0.3231847882270813, "learning_rate": 9.972716498631087e-07, "loss": 0.0312, "step": 125700 }, { "epoch": 1.3431273038089642, "grad_norm": 11.414459228515625, "learning_rate": 9.972698968392434e-07, "loss": 0.023, "step": 125710 }, { "epoch": 1.3432341471232436, "grad_norm": 2.8283307552337646, "learning_rate": 9.97268143253923e-07, "loss": 0.0623, "step": 125720 }, { "epoch": 1.3433409904375233, "grad_norm": 3.8975961208343506, "learning_rate": 9.972663891071494e-07, "loss": 0.0395, "step": 125730 }, { "epoch": 1.343447833751803, "grad_norm": 2.335862874984741, "learning_rate": 9.972646343989244e-07, "loss": 0.0381, "step": 125740 }, { "epoch": 1.3435546770660827, "grad_norm": 4.0446977615356445, "learning_rate": 9.9726287912925e-07, "loss": 0.0368, "step": 125750 }, { "epoch": 1.3436615203803621, "grad_norm": 0.40356728434562683, "learning_rate": 9.972611232981284e-07, "loss": 0.0482, "step": 125760 }, { "epoch": 1.3437683636946418, "grad_norm": 10.009966850280762, "learning_rate": 9.972593669055614e-07, "loss": 0.0574, "step": 125770 }, { "epoch": 1.3438752070089215, "grad_norm": 0.021940380334854126, "learning_rate": 9.972576099515511e-07, "loss": 0.0438, "step": 125780 }, { "epoch": 1.343982050323201, "grad_norm": 6.744762420654297, "learning_rate": 9.972558524360993e-07, "loss": 0.0295, "step": 125790 }, { "epoch": 1.3440888936374806, "grad_norm": 5.378061294555664, "learning_rate": 9.972540943592083e-07, "loss": 0.0195, "step": 125800 }, { "epoch": 1.3441957369517603, "grad_norm": 6.564812183380127, "learning_rate": 9.972523357208798e-07, "loss": 0.0244, "step": 125810 }, { "epoch": 1.3443025802660398, "grad_norm": 1.679358720779419, "learning_rate": 9.972505765211159e-07, "loss": 0.0175, "step": 125820 }, { "epoch": 1.3444094235803195, "grad_norm": 1.2217942476272583, "learning_rate": 9.972488167599184e-07, "loss": 0.0199, "step": 125830 }, { "epoch": 1.3445162668945991, "grad_norm": 0.03993600979447365, "learning_rate": 9.972470564372896e-07, "loss": 0.0166, "step": 125840 }, { "epoch": 1.3446231102088788, "grad_norm": 0.018323929980397224, "learning_rate": 9.972452955532316e-07, "loss": 0.03, "step": 125850 }, { "epoch": 1.3447299535231583, "grad_norm": 3.169247627258301, "learning_rate": 9.972435341077459e-07, "loss": 0.0142, "step": 125860 }, { "epoch": 1.344836796837438, "grad_norm": 0.3244069516658783, "learning_rate": 9.972417721008347e-07, "loss": 0.0682, "step": 125870 }, { "epoch": 1.3449436401517176, "grad_norm": 2.8475852012634277, "learning_rate": 9.972400095325e-07, "loss": 0.0579, "step": 125880 }, { "epoch": 1.345050483465997, "grad_norm": 10.30516529083252, "learning_rate": 9.972382464027438e-07, "loss": 0.0327, "step": 125890 }, { "epoch": 1.3451573267802768, "grad_norm": 9.808440208435059, "learning_rate": 9.97236482711568e-07, "loss": 0.0651, "step": 125900 }, { "epoch": 1.3452641700945565, "grad_norm": 4.170678615570068, "learning_rate": 9.972347184589749e-07, "loss": 0.0299, "step": 125910 }, { "epoch": 1.345371013408836, "grad_norm": 13.070855140686035, "learning_rate": 9.972329536449662e-07, "loss": 0.0679, "step": 125920 }, { "epoch": 1.3454778567231156, "grad_norm": 2.244523048400879, "learning_rate": 9.97231188269544e-07, "loss": 0.0224, "step": 125930 }, { "epoch": 1.3455847000373953, "grad_norm": 6.915611267089844, "learning_rate": 9.9722942233271e-07, "loss": 0.0356, "step": 125940 }, { "epoch": 1.3456915433516747, "grad_norm": 3.9819695949554443, "learning_rate": 9.972276558344668e-07, "loss": 0.0549, "step": 125950 }, { "epoch": 1.3457983866659544, "grad_norm": 6.4562087059021, "learning_rate": 9.97225888774816e-07, "loss": 0.0129, "step": 125960 }, { "epoch": 1.345905229980234, "grad_norm": 0.0521942563354969, "learning_rate": 9.972241211537595e-07, "loss": 0.0288, "step": 125970 }, { "epoch": 1.3460120732945136, "grad_norm": 0.1121552512049675, "learning_rate": 9.972223529712996e-07, "loss": 0.064, "step": 125980 }, { "epoch": 1.3461189166087932, "grad_norm": 0.016650771722197533, "learning_rate": 9.97220584227438e-07, "loss": 0.0344, "step": 125990 }, { "epoch": 1.346225759923073, "grad_norm": 0.3240397274494171, "learning_rate": 9.972188149221766e-07, "loss": 0.0426, "step": 126000 }, { "epoch": 1.3463326032373524, "grad_norm": 6.653343200683594, "learning_rate": 9.97217045055518e-07, "loss": 0.0406, "step": 126010 }, { "epoch": 1.346439446551632, "grad_norm": 0.07790187746286392, "learning_rate": 9.972152746274639e-07, "loss": 0.04, "step": 126020 }, { "epoch": 1.3465462898659117, "grad_norm": 0.048207029700279236, "learning_rate": 9.972135036380158e-07, "loss": 0.0225, "step": 126030 }, { "epoch": 1.3466531331801912, "grad_norm": 2.116431951522827, "learning_rate": 9.972117320871764e-07, "loss": 0.0191, "step": 126040 }, { "epoch": 1.3467599764944709, "grad_norm": 0.1454729288816452, "learning_rate": 9.972099599749476e-07, "loss": 0.024, "step": 126050 }, { "epoch": 1.3468668198087506, "grad_norm": 2.8145172595977783, "learning_rate": 9.97208187301331e-07, "loss": 0.0432, "step": 126060 }, { "epoch": 1.34697366312303, "grad_norm": 1.9570327997207642, "learning_rate": 9.97206414066329e-07, "loss": 0.0143, "step": 126070 }, { "epoch": 1.3470805064373097, "grad_norm": 0.5537429451942444, "learning_rate": 9.972046402699433e-07, "loss": 0.0605, "step": 126080 }, { "epoch": 1.3471873497515894, "grad_norm": 0.2191888689994812, "learning_rate": 9.972028659121758e-07, "loss": 0.0256, "step": 126090 }, { "epoch": 1.3472941930658688, "grad_norm": 0.005752501543611288, "learning_rate": 9.972010909930291e-07, "loss": 0.01, "step": 126100 }, { "epoch": 1.3474010363801485, "grad_norm": 10.087474822998047, "learning_rate": 9.971993155125046e-07, "loss": 0.0639, "step": 126110 }, { "epoch": 1.3475078796944282, "grad_norm": 4.933459281921387, "learning_rate": 9.971975394706048e-07, "loss": 0.0307, "step": 126120 }, { "epoch": 1.3476147230087077, "grad_norm": 0.00883390847593546, "learning_rate": 9.971957628673313e-07, "loss": 0.02, "step": 126130 }, { "epoch": 1.3477215663229873, "grad_norm": 0.12690915167331696, "learning_rate": 9.971939857026863e-07, "loss": 0.0465, "step": 126140 }, { "epoch": 1.347828409637267, "grad_norm": 1.8449326753616333, "learning_rate": 9.971922079766717e-07, "loss": 0.0471, "step": 126150 }, { "epoch": 1.3479352529515465, "grad_norm": 15.705619812011719, "learning_rate": 9.971904296892896e-07, "loss": 0.0608, "step": 126160 }, { "epoch": 1.3480420962658262, "grad_norm": 1.1672284603118896, "learning_rate": 9.97188650840542e-07, "loss": 0.0317, "step": 126170 }, { "epoch": 1.3481489395801058, "grad_norm": 9.283222198486328, "learning_rate": 9.971868714304309e-07, "loss": 0.0213, "step": 126180 }, { "epoch": 1.3482557828943853, "grad_norm": 2.1108932495117188, "learning_rate": 9.971850914589584e-07, "loss": 0.0875, "step": 126190 }, { "epoch": 1.348362626208665, "grad_norm": 10.550858497619629, "learning_rate": 9.971833109261261e-07, "loss": 0.0314, "step": 126200 }, { "epoch": 1.3484694695229447, "grad_norm": 7.788793563842773, "learning_rate": 9.971815298319367e-07, "loss": 0.0576, "step": 126210 }, { "epoch": 1.3485763128372241, "grad_norm": 0.009185939095914364, "learning_rate": 9.971797481763914e-07, "loss": 0.0105, "step": 126220 }, { "epoch": 1.3486831561515038, "grad_norm": 1.239322304725647, "learning_rate": 9.97177965959493e-07, "loss": 0.0301, "step": 126230 }, { "epoch": 1.3487899994657835, "grad_norm": 2.269207000732422, "learning_rate": 9.97176183181243e-07, "loss": 0.0126, "step": 126240 }, { "epoch": 1.348896842780063, "grad_norm": 12.435731887817383, "learning_rate": 9.971743998416436e-07, "loss": 0.0485, "step": 126250 }, { "epoch": 1.3490036860943426, "grad_norm": 17.1050968170166, "learning_rate": 9.971726159406968e-07, "loss": 0.0255, "step": 126260 }, { "epoch": 1.3491105294086223, "grad_norm": 0.13008703291416168, "learning_rate": 9.971708314784046e-07, "loss": 0.0086, "step": 126270 }, { "epoch": 1.3492173727229018, "grad_norm": 0.18037787079811096, "learning_rate": 9.97169046454769e-07, "loss": 0.0104, "step": 126280 }, { "epoch": 1.3493242160371814, "grad_norm": 0.0028891325928270817, "learning_rate": 9.97167260869792e-07, "loss": 0.0267, "step": 126290 }, { "epoch": 1.3494310593514611, "grad_norm": 0.3354927599430084, "learning_rate": 9.971654747234757e-07, "loss": 0.0183, "step": 126300 }, { "epoch": 1.3495379026657406, "grad_norm": 0.037792906165122986, "learning_rate": 9.971636880158219e-07, "loss": 0.0235, "step": 126310 }, { "epoch": 1.3496447459800203, "grad_norm": 3.09722638130188, "learning_rate": 9.971619007468328e-07, "loss": 0.0227, "step": 126320 }, { "epoch": 1.3497515892943, "grad_norm": 0.6285514831542969, "learning_rate": 9.971601129165105e-07, "loss": 0.051, "step": 126330 }, { "epoch": 1.3498584326085794, "grad_norm": 0.09005381911993027, "learning_rate": 9.97158324524857e-07, "loss": 0.0231, "step": 126340 }, { "epoch": 1.349965275922859, "grad_norm": 0.6976522207260132, "learning_rate": 9.97156535571874e-07, "loss": 0.0367, "step": 126350 }, { "epoch": 1.3500721192371388, "grad_norm": 1.649895429611206, "learning_rate": 9.971547460575638e-07, "loss": 0.026, "step": 126360 }, { "epoch": 1.3501789625514182, "grad_norm": 3.95200252532959, "learning_rate": 9.971529559819285e-07, "loss": 0.0314, "step": 126370 }, { "epoch": 1.350285805865698, "grad_norm": 0.1599818617105484, "learning_rate": 9.9715116534497e-07, "loss": 0.056, "step": 126380 }, { "epoch": 1.3503926491799776, "grad_norm": 0.03708435595035553, "learning_rate": 9.971493741466901e-07, "loss": 0.0242, "step": 126390 }, { "epoch": 1.350499492494257, "grad_norm": 3.4096052646636963, "learning_rate": 9.971475823870913e-07, "loss": 0.0145, "step": 126400 }, { "epoch": 1.3506063358085367, "grad_norm": 4.239288806915283, "learning_rate": 9.971457900661751e-07, "loss": 0.0372, "step": 126410 }, { "epoch": 1.3507131791228164, "grad_norm": 3.993946075439453, "learning_rate": 9.97143997183944e-07, "loss": 0.1209, "step": 126420 }, { "epoch": 1.3508200224370959, "grad_norm": 0.01566709205508232, "learning_rate": 9.971422037403996e-07, "loss": 0.0563, "step": 126430 }, { "epoch": 1.3509268657513755, "grad_norm": 0.3512380123138428, "learning_rate": 9.971404097355442e-07, "loss": 0.0495, "step": 126440 }, { "epoch": 1.3510337090656552, "grad_norm": 2.5427310466766357, "learning_rate": 9.971386151693799e-07, "loss": 0.0315, "step": 126450 }, { "epoch": 1.3511405523799347, "grad_norm": 0.22244684398174286, "learning_rate": 9.971368200419083e-07, "loss": 0.015, "step": 126460 }, { "epoch": 1.3512473956942144, "grad_norm": 5.0674333572387695, "learning_rate": 9.97135024353132e-07, "loss": 0.0782, "step": 126470 }, { "epoch": 1.351354239008494, "grad_norm": 3.3311665058135986, "learning_rate": 9.971332281030528e-07, "loss": 0.0414, "step": 126480 }, { "epoch": 1.3514610823227737, "grad_norm": 2.8632891178131104, "learning_rate": 9.971314312916723e-07, "loss": 0.018, "step": 126490 }, { "epoch": 1.3515679256370532, "grad_norm": 4.234663486480713, "learning_rate": 9.97129633918993e-07, "loss": 0.0204, "step": 126500 }, { "epoch": 1.3516747689513329, "grad_norm": 0.7999410629272461, "learning_rate": 9.97127835985017e-07, "loss": 0.012, "step": 126510 }, { "epoch": 1.3517816122656126, "grad_norm": 7.2213873863220215, "learning_rate": 9.97126037489746e-07, "loss": 0.0412, "step": 126520 }, { "epoch": 1.351888455579892, "grad_norm": 1.6063629388809204, "learning_rate": 9.971242384331822e-07, "loss": 0.0207, "step": 126530 }, { "epoch": 1.3519952988941717, "grad_norm": 2.0297181606292725, "learning_rate": 9.971224388153277e-07, "loss": 0.033, "step": 126540 }, { "epoch": 1.3521021422084514, "grad_norm": 8.610616683959961, "learning_rate": 9.971206386361845e-07, "loss": 0.0518, "step": 126550 }, { "epoch": 1.3522089855227308, "grad_norm": 0.01055188663303852, "learning_rate": 9.971188378957545e-07, "loss": 0.0752, "step": 126560 }, { "epoch": 1.3523158288370105, "grad_norm": 2.6759326457977295, "learning_rate": 9.971170365940398e-07, "loss": 0.0115, "step": 126570 }, { "epoch": 1.3524226721512902, "grad_norm": 1.3304526805877686, "learning_rate": 9.971152347310423e-07, "loss": 0.0145, "step": 126580 }, { "epoch": 1.3525295154655699, "grad_norm": 5.810091495513916, "learning_rate": 9.971134323067644e-07, "loss": 0.0667, "step": 126590 }, { "epoch": 1.3526363587798493, "grad_norm": 11.977988243103027, "learning_rate": 9.97111629321208e-07, "loss": 0.0308, "step": 126600 }, { "epoch": 1.352743202094129, "grad_norm": 5.873201370239258, "learning_rate": 9.971098257743748e-07, "loss": 0.0237, "step": 126610 }, { "epoch": 1.3528500454084087, "grad_norm": 9.016965866088867, "learning_rate": 9.971080216662672e-07, "loss": 0.0719, "step": 126620 }, { "epoch": 1.3529568887226882, "grad_norm": 0.05087224394083023, "learning_rate": 9.97106216996887e-07, "loss": 0.0247, "step": 126630 }, { "epoch": 1.3530637320369678, "grad_norm": 5.783870220184326, "learning_rate": 9.971044117662366e-07, "loss": 0.0461, "step": 126640 }, { "epoch": 1.3531705753512475, "grad_norm": 0.029904095456004143, "learning_rate": 9.971026059743174e-07, "loss": 0.0455, "step": 126650 }, { "epoch": 1.353277418665527, "grad_norm": 0.04576760157942772, "learning_rate": 9.971007996211322e-07, "loss": 0.0191, "step": 126660 }, { "epoch": 1.3533842619798067, "grad_norm": 0.13648274540901184, "learning_rate": 9.970989927066827e-07, "loss": 0.0235, "step": 126670 }, { "epoch": 1.3534911052940863, "grad_norm": 1.4041446447372437, "learning_rate": 9.970971852309706e-07, "loss": 0.0536, "step": 126680 }, { "epoch": 1.3535979486083658, "grad_norm": 0.1277233511209488, "learning_rate": 9.970953771939985e-07, "loss": 0.0105, "step": 126690 }, { "epoch": 1.3537047919226455, "grad_norm": 0.6030074954032898, "learning_rate": 9.970935685957684e-07, "loss": 0.0379, "step": 126700 }, { "epoch": 1.3538116352369252, "grad_norm": 0.6425164341926575, "learning_rate": 9.970917594362817e-07, "loss": 0.0036, "step": 126710 }, { "epoch": 1.3539184785512046, "grad_norm": 0.19046902656555176, "learning_rate": 9.970899497155413e-07, "loss": 0.0315, "step": 126720 }, { "epoch": 1.3540253218654843, "grad_norm": 6.863973140716553, "learning_rate": 9.970881394335485e-07, "loss": 0.0269, "step": 126730 }, { "epoch": 1.354132165179764, "grad_norm": 1.8745986223220825, "learning_rate": 9.970863285903058e-07, "loss": 0.014, "step": 126740 }, { "epoch": 1.3542390084940434, "grad_norm": 0.6216233372688293, "learning_rate": 9.970845171858151e-07, "loss": 0.0164, "step": 126750 }, { "epoch": 1.3543458518083231, "grad_norm": 0.07789096981287003, "learning_rate": 9.970827052200786e-07, "loss": 0.0356, "step": 126760 }, { "epoch": 1.3544526951226028, "grad_norm": 0.6883590817451477, "learning_rate": 9.97080892693098e-07, "loss": 0.0062, "step": 126770 }, { "epoch": 1.3545595384368823, "grad_norm": 0.37168553471565247, "learning_rate": 9.970790796048757e-07, "loss": 0.0384, "step": 126780 }, { "epoch": 1.354666381751162, "grad_norm": 2.031515121459961, "learning_rate": 9.970772659554135e-07, "loss": 0.0109, "step": 126790 }, { "epoch": 1.3547732250654416, "grad_norm": 0.9088700413703918, "learning_rate": 9.970754517447136e-07, "loss": 0.0282, "step": 126800 }, { "epoch": 1.354880068379721, "grad_norm": 11.139729499816895, "learning_rate": 9.97073636972778e-07, "loss": 0.0295, "step": 126810 }, { "epoch": 1.3549869116940008, "grad_norm": 0.612403929233551, "learning_rate": 9.970718216396089e-07, "loss": 0.0223, "step": 126820 }, { "epoch": 1.3550937550082804, "grad_norm": 8.132903099060059, "learning_rate": 9.97070005745208e-07, "loss": 0.0357, "step": 126830 }, { "epoch": 1.35520059832256, "grad_norm": 15.541502952575684, "learning_rate": 9.970681892895776e-07, "loss": 0.0402, "step": 126840 }, { "epoch": 1.3553074416368396, "grad_norm": 2.5151290893554688, "learning_rate": 9.970663722727196e-07, "loss": 0.0452, "step": 126850 }, { "epoch": 1.3554142849511193, "grad_norm": 0.006635389290750027, "learning_rate": 9.970645546946362e-07, "loss": 0.0317, "step": 126860 }, { "epoch": 1.3555211282653987, "grad_norm": 1.3743842840194702, "learning_rate": 9.970627365553295e-07, "loss": 0.0049, "step": 126870 }, { "epoch": 1.3556279715796784, "grad_norm": 0.05918274074792862, "learning_rate": 9.970609178548015e-07, "loss": 0.0366, "step": 126880 }, { "epoch": 1.355734814893958, "grad_norm": 0.2364368587732315, "learning_rate": 9.97059098593054e-07, "loss": 0.0354, "step": 126890 }, { "epoch": 1.3558416582082375, "grad_norm": 0.10880304127931595, "learning_rate": 9.970572787700893e-07, "loss": 0.0161, "step": 126900 }, { "epoch": 1.3559485015225172, "grad_norm": 2.5972838401794434, "learning_rate": 9.970554583859096e-07, "loss": 0.0318, "step": 126910 }, { "epoch": 1.356055344836797, "grad_norm": 0.049358636140823364, "learning_rate": 9.970536374405166e-07, "loss": 0.0615, "step": 126920 }, { "epoch": 1.3561621881510764, "grad_norm": 1.207239031791687, "learning_rate": 9.970518159339127e-07, "loss": 0.02, "step": 126930 }, { "epoch": 1.356269031465356, "grad_norm": 4.00764799118042, "learning_rate": 9.970499938660996e-07, "loss": 0.0512, "step": 126940 }, { "epoch": 1.3563758747796357, "grad_norm": 6.909012317657471, "learning_rate": 9.970481712370795e-07, "loss": 0.0662, "step": 126950 }, { "epoch": 1.3564827180939152, "grad_norm": 0.2396930456161499, "learning_rate": 9.970463480468546e-07, "loss": 0.0245, "step": 126960 }, { "epoch": 1.3565895614081949, "grad_norm": 2.919243812561035, "learning_rate": 9.97044524295427e-07, "loss": 0.0694, "step": 126970 }, { "epoch": 1.3566964047224745, "grad_norm": 5.633992671966553, "learning_rate": 9.970426999827985e-07, "loss": 0.0506, "step": 126980 }, { "epoch": 1.356803248036754, "grad_norm": 7.182973384857178, "learning_rate": 9.970408751089713e-07, "loss": 0.0216, "step": 126990 }, { "epoch": 1.3569100913510337, "grad_norm": 0.6371102333068848, "learning_rate": 9.970390496739475e-07, "loss": 0.0602, "step": 127000 }, { "epoch": 1.3570169346653134, "grad_norm": 0.06719177961349487, "learning_rate": 9.970372236777288e-07, "loss": 0.0252, "step": 127010 }, { "epoch": 1.3571237779795928, "grad_norm": 3.118014097213745, "learning_rate": 9.97035397120318e-07, "loss": 0.0855, "step": 127020 }, { "epoch": 1.3572306212938725, "grad_norm": 0.006675703451037407, "learning_rate": 9.970335700017165e-07, "loss": 0.0163, "step": 127030 }, { "epoch": 1.3573374646081522, "grad_norm": 0.5333176255226135, "learning_rate": 9.970317423219266e-07, "loss": 0.0464, "step": 127040 }, { "epoch": 1.3574443079224316, "grad_norm": 0.1325909048318863, "learning_rate": 9.970299140809503e-07, "loss": 0.0346, "step": 127050 }, { "epoch": 1.3575511512367113, "grad_norm": 7.670543670654297, "learning_rate": 9.970280852787897e-07, "loss": 0.0277, "step": 127060 }, { "epoch": 1.357657994550991, "grad_norm": 0.6509484648704529, "learning_rate": 9.97026255915447e-07, "loss": 0.0319, "step": 127070 }, { "epoch": 1.3577648378652705, "grad_norm": 0.9983833432197571, "learning_rate": 9.97024425990924e-07, "loss": 0.0334, "step": 127080 }, { "epoch": 1.3578716811795501, "grad_norm": 2.4454281330108643, "learning_rate": 9.97022595505223e-07, "loss": 0.0118, "step": 127090 }, { "epoch": 1.3579785244938298, "grad_norm": 0.2493516057729721, "learning_rate": 9.97020764458346e-07, "loss": 0.0214, "step": 127100 }, { "epoch": 1.3580853678081093, "grad_norm": 8.864938735961914, "learning_rate": 9.970189328502949e-07, "loss": 0.0342, "step": 127110 }, { "epoch": 1.358192211122389, "grad_norm": 5.909030914306641, "learning_rate": 9.970171006810722e-07, "loss": 0.0121, "step": 127120 }, { "epoch": 1.3582990544366687, "grad_norm": 1.4282958507537842, "learning_rate": 9.970152679506794e-07, "loss": 0.0257, "step": 127130 }, { "epoch": 1.358405897750948, "grad_norm": 12.734190940856934, "learning_rate": 9.97013434659119e-07, "loss": 0.055, "step": 127140 }, { "epoch": 1.3585127410652278, "grad_norm": 5.164783477783203, "learning_rate": 9.970116008063928e-07, "loss": 0.0354, "step": 127150 }, { "epoch": 1.3586195843795075, "grad_norm": 0.07074105739593506, "learning_rate": 9.970097663925032e-07, "loss": 0.053, "step": 127160 }, { "epoch": 1.358726427693787, "grad_norm": 0.04548921808600426, "learning_rate": 9.97007931417452e-07, "loss": 0.0173, "step": 127170 }, { "epoch": 1.3588332710080666, "grad_norm": 0.07497816532850266, "learning_rate": 9.970060958812411e-07, "loss": 0.0309, "step": 127180 }, { "epoch": 1.3589401143223463, "grad_norm": 4.24709415435791, "learning_rate": 9.970042597838731e-07, "loss": 0.0335, "step": 127190 }, { "epoch": 1.3590469576366258, "grad_norm": 0.19688105583190918, "learning_rate": 9.970024231253496e-07, "loss": 0.0141, "step": 127200 }, { "epoch": 1.3591538009509054, "grad_norm": 1.0171630382537842, "learning_rate": 9.970005859056729e-07, "loss": 0.0165, "step": 127210 }, { "epoch": 1.3592606442651851, "grad_norm": 8.922719955444336, "learning_rate": 9.969987481248449e-07, "loss": 0.0205, "step": 127220 }, { "epoch": 1.3593674875794648, "grad_norm": 0.5381003022193909, "learning_rate": 9.969969097828681e-07, "loss": 0.0214, "step": 127230 }, { "epoch": 1.3594743308937443, "grad_norm": 3.3914005756378174, "learning_rate": 9.969950708797441e-07, "loss": 0.049, "step": 127240 }, { "epoch": 1.359581174208024, "grad_norm": 11.092917442321777, "learning_rate": 9.969932314154752e-07, "loss": 0.0634, "step": 127250 }, { "epoch": 1.3596880175223036, "grad_norm": 2.3994176387786865, "learning_rate": 9.969913913900635e-07, "loss": 0.0066, "step": 127260 }, { "epoch": 1.359794860836583, "grad_norm": 4.090932369232178, "learning_rate": 9.969895508035109e-07, "loss": 0.046, "step": 127270 }, { "epoch": 1.3599017041508628, "grad_norm": 0.5977978110313416, "learning_rate": 9.969877096558194e-07, "loss": 0.0009, "step": 127280 }, { "epoch": 1.3600085474651424, "grad_norm": 19.93402862548828, "learning_rate": 9.969858679469915e-07, "loss": 0.083, "step": 127290 }, { "epoch": 1.360115390779422, "grad_norm": 2.4654619693756104, "learning_rate": 9.96984025677029e-07, "loss": 0.0089, "step": 127300 }, { "epoch": 1.3602222340937016, "grad_norm": 0.019261466339230537, "learning_rate": 9.969821828459342e-07, "loss": 0.0306, "step": 127310 }, { "epoch": 1.3603290774079813, "grad_norm": 3.9096930027008057, "learning_rate": 9.969803394537088e-07, "loss": 0.0239, "step": 127320 }, { "epoch": 1.360435920722261, "grad_norm": 3.811347246170044, "learning_rate": 9.96978495500355e-07, "loss": 0.0345, "step": 127330 }, { "epoch": 1.3605427640365404, "grad_norm": 0.08271311223506927, "learning_rate": 9.969766509858751e-07, "loss": 0.0392, "step": 127340 }, { "epoch": 1.36064960735082, "grad_norm": 0.9029297828674316, "learning_rate": 9.96974805910271e-07, "loss": 0.0216, "step": 127350 }, { "epoch": 1.3607564506650998, "grad_norm": 0.01153843104839325, "learning_rate": 9.96972960273545e-07, "loss": 0.0523, "step": 127360 }, { "epoch": 1.3608632939793792, "grad_norm": 3.391429901123047, "learning_rate": 9.969711140756989e-07, "loss": 0.0625, "step": 127370 }, { "epoch": 1.360970137293659, "grad_norm": 2.7809088230133057, "learning_rate": 9.96969267316735e-07, "loss": 0.036, "step": 127380 }, { "epoch": 1.3610769806079386, "grad_norm": 0.004095235373824835, "learning_rate": 9.969674199966552e-07, "loss": 0.0172, "step": 127390 }, { "epoch": 1.361183823922218, "grad_norm": 1.0262925624847412, "learning_rate": 9.969655721154616e-07, "loss": 0.0753, "step": 127400 }, { "epoch": 1.3612906672364977, "grad_norm": 1.37387216091156, "learning_rate": 9.969637236731565e-07, "loss": 0.0226, "step": 127410 }, { "epoch": 1.3613975105507774, "grad_norm": 6.676592826843262, "learning_rate": 9.969618746697419e-07, "loss": 0.0309, "step": 127420 }, { "epoch": 1.3615043538650569, "grad_norm": 9.974547386169434, "learning_rate": 9.969600251052196e-07, "loss": 0.0814, "step": 127430 }, { "epoch": 1.3616111971793365, "grad_norm": 4.630774021148682, "learning_rate": 9.969581749795922e-07, "loss": 0.0552, "step": 127440 }, { "epoch": 1.3617180404936162, "grad_norm": 0.03575771301984787, "learning_rate": 9.969563242928611e-07, "loss": 0.0072, "step": 127450 }, { "epoch": 1.3618248838078957, "grad_norm": 1.8221619129180908, "learning_rate": 9.969544730450292e-07, "loss": 0.0192, "step": 127460 }, { "epoch": 1.3619317271221754, "grad_norm": 0.3863793611526489, "learning_rate": 9.96952621236098e-07, "loss": 0.0181, "step": 127470 }, { "epoch": 1.362038570436455, "grad_norm": 9.595287322998047, "learning_rate": 9.969507688660699e-07, "loss": 0.0552, "step": 127480 }, { "epoch": 1.3621454137507345, "grad_norm": 3.941577196121216, "learning_rate": 9.969489159349467e-07, "loss": 0.049, "step": 127490 }, { "epoch": 1.3622522570650142, "grad_norm": 0.2531803846359253, "learning_rate": 9.96947062442731e-07, "loss": 0.0941, "step": 127500 }, { "epoch": 1.3623591003792939, "grad_norm": 2.2839770317077637, "learning_rate": 9.969452083894243e-07, "loss": 0.031, "step": 127510 }, { "epoch": 1.3624659436935733, "grad_norm": 2.1360788345336914, "learning_rate": 9.96943353775029e-07, "loss": 0.0552, "step": 127520 }, { "epoch": 1.362572787007853, "grad_norm": 2.007861852645874, "learning_rate": 9.96941498599547e-07, "loss": 0.0226, "step": 127530 }, { "epoch": 1.3626796303221327, "grad_norm": 5.257753849029541, "learning_rate": 9.969396428629808e-07, "loss": 0.0585, "step": 127540 }, { "epoch": 1.3627864736364121, "grad_norm": 3.863744020462036, "learning_rate": 9.96937786565332e-07, "loss": 0.04, "step": 127550 }, { "epoch": 1.3628933169506918, "grad_norm": 10.075841903686523, "learning_rate": 9.96935929706603e-07, "loss": 0.0459, "step": 127560 }, { "epoch": 1.3630001602649715, "grad_norm": 4.696951389312744, "learning_rate": 9.969340722867961e-07, "loss": 0.0591, "step": 127570 }, { "epoch": 1.363107003579251, "grad_norm": 2.4124155044555664, "learning_rate": 9.969322143059128e-07, "loss": 0.0168, "step": 127580 }, { "epoch": 1.3632138468935306, "grad_norm": 8.151143074035645, "learning_rate": 9.969303557639558e-07, "loss": 0.0446, "step": 127590 }, { "epoch": 1.3633206902078103, "grad_norm": 5.0611162185668945, "learning_rate": 9.969284966609266e-07, "loss": 0.016, "step": 127600 }, { "epoch": 1.3634275335220898, "grad_norm": 0.418796569108963, "learning_rate": 9.96926636996828e-07, "loss": 0.0095, "step": 127610 }, { "epoch": 1.3635343768363695, "grad_norm": 1.1391311883926392, "learning_rate": 9.969247767716612e-07, "loss": 0.0376, "step": 127620 }, { "epoch": 1.3636412201506491, "grad_norm": 6.534234523773193, "learning_rate": 9.969229159854294e-07, "loss": 0.0795, "step": 127630 }, { "epoch": 1.3637480634649286, "grad_norm": 2.337909460067749, "learning_rate": 9.969210546381337e-07, "loss": 0.0259, "step": 127640 }, { "epoch": 1.3638549067792083, "grad_norm": 10.94491958618164, "learning_rate": 9.969191927297768e-07, "loss": 0.0375, "step": 127650 }, { "epoch": 1.363961750093488, "grad_norm": 15.565286636352539, "learning_rate": 9.969173302603607e-07, "loss": 0.0479, "step": 127660 }, { "epoch": 1.3640685934077674, "grad_norm": 0.046812478452920914, "learning_rate": 9.969154672298873e-07, "loss": 0.0362, "step": 127670 }, { "epoch": 1.364175436722047, "grad_norm": 0.011092067696154118, "learning_rate": 9.96913603638359e-07, "loss": 0.0309, "step": 127680 }, { "epoch": 1.3642822800363268, "grad_norm": 2.559333562850952, "learning_rate": 9.969117394857775e-07, "loss": 0.0143, "step": 127690 }, { "epoch": 1.3643891233506062, "grad_norm": 0.05310162901878357, "learning_rate": 9.969098747721452e-07, "loss": 0.077, "step": 127700 }, { "epoch": 1.364495966664886, "grad_norm": 1.1391360759735107, "learning_rate": 9.969080094974642e-07, "loss": 0.0119, "step": 127710 }, { "epoch": 1.3646028099791656, "grad_norm": 0.1398107260465622, "learning_rate": 9.969061436617365e-07, "loss": 0.0403, "step": 127720 }, { "epoch": 1.364709653293445, "grad_norm": 0.9119394421577454, "learning_rate": 9.969042772649645e-07, "loss": 0.0257, "step": 127730 }, { "epoch": 1.3648164966077247, "grad_norm": 2.751455783843994, "learning_rate": 9.969024103071498e-07, "loss": 0.016, "step": 127740 }, { "epoch": 1.3649233399220044, "grad_norm": 6.607239723205566, "learning_rate": 9.969005427882948e-07, "loss": 0.0316, "step": 127750 }, { "epoch": 1.365030183236284, "grad_norm": 1.0566078424453735, "learning_rate": 9.968986747084018e-07, "loss": 0.029, "step": 127760 }, { "epoch": 1.3651370265505636, "grad_norm": 0.11603429913520813, "learning_rate": 9.968968060674725e-07, "loss": 0.016, "step": 127770 }, { "epoch": 1.3652438698648433, "grad_norm": 0.2483309507369995, "learning_rate": 9.968949368655093e-07, "loss": 0.1095, "step": 127780 }, { "epoch": 1.3653507131791227, "grad_norm": 0.7628223896026611, "learning_rate": 9.96893067102514e-07, "loss": 0.0211, "step": 127790 }, { "epoch": 1.3654575564934024, "grad_norm": 0.035323526710271835, "learning_rate": 9.968911967784891e-07, "loss": 0.0366, "step": 127800 }, { "epoch": 1.365564399807682, "grad_norm": 2.2516539096832275, "learning_rate": 9.968893258934364e-07, "loss": 0.0298, "step": 127810 }, { "epoch": 1.3656712431219615, "grad_norm": 2.6340179443359375, "learning_rate": 9.968874544473585e-07, "loss": 0.0173, "step": 127820 }, { "epoch": 1.3657780864362412, "grad_norm": 2.1368637084960938, "learning_rate": 9.96885582440257e-07, "loss": 0.0252, "step": 127830 }, { "epoch": 1.365884929750521, "grad_norm": 5.716246604919434, "learning_rate": 9.96883709872134e-07, "loss": 0.0238, "step": 127840 }, { "epoch": 1.3659917730648004, "grad_norm": 0.1361767202615738, "learning_rate": 9.968818367429918e-07, "loss": 0.0234, "step": 127850 }, { "epoch": 1.36609861637908, "grad_norm": 0.017645180225372314, "learning_rate": 9.968799630528326e-07, "loss": 0.0323, "step": 127860 }, { "epoch": 1.3662054596933597, "grad_norm": 7.60994815826416, "learning_rate": 9.968780888016585e-07, "loss": 0.052, "step": 127870 }, { "epoch": 1.3663123030076392, "grad_norm": 4.709305286407471, "learning_rate": 9.968762139894714e-07, "loss": 0.0474, "step": 127880 }, { "epoch": 1.3664191463219189, "grad_norm": 0.04266032949090004, "learning_rate": 9.968743386162737e-07, "loss": 0.0328, "step": 127890 }, { "epoch": 1.3665259896361985, "grad_norm": 4.5253777503967285, "learning_rate": 9.968724626820673e-07, "loss": 0.0224, "step": 127900 }, { "epoch": 1.366632832950478, "grad_norm": 1.878909945487976, "learning_rate": 9.968705861868544e-07, "loss": 0.0307, "step": 127910 }, { "epoch": 1.3667396762647577, "grad_norm": 1.134800672531128, "learning_rate": 9.968687091306369e-07, "loss": 0.0151, "step": 127920 }, { "epoch": 1.3668465195790374, "grad_norm": 2.019536018371582, "learning_rate": 9.968668315134174e-07, "loss": 0.0588, "step": 127930 }, { "epoch": 1.3669533628933168, "grad_norm": 0.2977936267852783, "learning_rate": 9.968649533351976e-07, "loss": 0.0589, "step": 127940 }, { "epoch": 1.3670602062075965, "grad_norm": 4.5303192138671875, "learning_rate": 9.968630745959798e-07, "loss": 0.0231, "step": 127950 }, { "epoch": 1.3671670495218762, "grad_norm": 0.17996422946453094, "learning_rate": 9.968611952957662e-07, "loss": 0.0772, "step": 127960 }, { "epoch": 1.3672738928361559, "grad_norm": 1.1170310974121094, "learning_rate": 9.968593154345587e-07, "loss": 0.0319, "step": 127970 }, { "epoch": 1.3673807361504353, "grad_norm": 3.7530808448791504, "learning_rate": 9.968574350123595e-07, "loss": 0.0344, "step": 127980 }, { "epoch": 1.367487579464715, "grad_norm": 0.4336012601852417, "learning_rate": 9.968555540291708e-07, "loss": 0.0423, "step": 127990 }, { "epoch": 1.3675944227789947, "grad_norm": 0.03812263533473015, "learning_rate": 9.968536724849948e-07, "loss": 0.0158, "step": 128000 }, { "epoch": 1.3677012660932741, "grad_norm": 6.563525199890137, "learning_rate": 9.968517903798333e-07, "loss": 0.031, "step": 128010 }, { "epoch": 1.3678081094075538, "grad_norm": 0.010284737683832645, "learning_rate": 9.968499077136888e-07, "loss": 0.0152, "step": 128020 }, { "epoch": 1.3679149527218335, "grad_norm": 0.616965651512146, "learning_rate": 9.968480244865631e-07, "loss": 0.0121, "step": 128030 }, { "epoch": 1.368021796036113, "grad_norm": 1.3906372785568237, "learning_rate": 9.968461406984585e-07, "loss": 0.0971, "step": 128040 }, { "epoch": 1.3681286393503926, "grad_norm": 0.31681936979293823, "learning_rate": 9.968442563493771e-07, "loss": 0.0022, "step": 128050 }, { "epoch": 1.3682354826646723, "grad_norm": 0.7283013463020325, "learning_rate": 9.968423714393213e-07, "loss": 0.0198, "step": 128060 }, { "epoch": 1.368342325978952, "grad_norm": 0.10837326943874359, "learning_rate": 9.968404859682926e-07, "loss": 0.052, "step": 128070 }, { "epoch": 1.3684491692932315, "grad_norm": 13.757258415222168, "learning_rate": 9.968385999362938e-07, "loss": 0.0577, "step": 128080 }, { "epoch": 1.3685560126075111, "grad_norm": 1.2198569774627686, "learning_rate": 9.968367133433266e-07, "loss": 0.0198, "step": 128090 }, { "epoch": 1.3686628559217908, "grad_norm": 3.6640501022338867, "learning_rate": 9.96834826189393e-07, "loss": 0.0203, "step": 128100 }, { "epoch": 1.3687696992360703, "grad_norm": 8.997884750366211, "learning_rate": 9.968329384744958e-07, "loss": 0.0159, "step": 128110 }, { "epoch": 1.36887654255035, "grad_norm": 4.3386006355285645, "learning_rate": 9.968310501986365e-07, "loss": 0.0398, "step": 128120 }, { "epoch": 1.3689833858646296, "grad_norm": 1.6134356260299683, "learning_rate": 9.968291613618175e-07, "loss": 0.0469, "step": 128130 }, { "epoch": 1.369090229178909, "grad_norm": 4.455250263214111, "learning_rate": 9.968272719640409e-07, "loss": 0.0187, "step": 128140 }, { "epoch": 1.3691970724931888, "grad_norm": 0.7681844830513, "learning_rate": 9.968253820053087e-07, "loss": 0.0179, "step": 128150 }, { "epoch": 1.3693039158074685, "grad_norm": 1.9923272132873535, "learning_rate": 9.968234914856234e-07, "loss": 0.0423, "step": 128160 }, { "epoch": 1.369410759121748, "grad_norm": 3.5613584518432617, "learning_rate": 9.968216004049865e-07, "loss": 0.0463, "step": 128170 }, { "epoch": 1.3695176024360276, "grad_norm": 4.643145561218262, "learning_rate": 9.968197087634008e-07, "loss": 0.0247, "step": 128180 }, { "epoch": 1.3696244457503073, "grad_norm": 4.667621612548828, "learning_rate": 9.968178165608681e-07, "loss": 0.0224, "step": 128190 }, { "epoch": 1.3697312890645867, "grad_norm": 2.748988628387451, "learning_rate": 9.968159237973904e-07, "loss": 0.0227, "step": 128200 }, { "epoch": 1.3698381323788664, "grad_norm": 6.000300407409668, "learning_rate": 9.968140304729703e-07, "loss": 0.0447, "step": 128210 }, { "epoch": 1.369944975693146, "grad_norm": 0.1658935248851776, "learning_rate": 9.968121365876095e-07, "loss": 0.0133, "step": 128220 }, { "epoch": 1.3700518190074256, "grad_norm": 0.06575004756450653, "learning_rate": 9.968102421413103e-07, "loss": 0.017, "step": 128230 }, { "epoch": 1.3701586623217052, "grad_norm": 6.143771171569824, "learning_rate": 9.96808347134075e-07, "loss": 0.0724, "step": 128240 }, { "epoch": 1.370265505635985, "grad_norm": 4.146289825439453, "learning_rate": 9.968064515659054e-07, "loss": 0.028, "step": 128250 }, { "epoch": 1.3703723489502644, "grad_norm": 0.5611695647239685, "learning_rate": 9.96804555436804e-07, "loss": 0.0323, "step": 128260 }, { "epoch": 1.370479192264544, "grad_norm": 4.121448516845703, "learning_rate": 9.968026587467727e-07, "loss": 0.035, "step": 128270 }, { "epoch": 1.3705860355788237, "grad_norm": 0.04168317839503288, "learning_rate": 9.968007614958135e-07, "loss": 0.0339, "step": 128280 }, { "epoch": 1.3706928788931032, "grad_norm": 0.010864301584661007, "learning_rate": 9.967988636839288e-07, "loss": 0.01, "step": 128290 }, { "epoch": 1.3707997222073829, "grad_norm": 4.971404552459717, "learning_rate": 9.96796965311121e-07, "loss": 0.028, "step": 128300 }, { "epoch": 1.3709065655216626, "grad_norm": 4.650747776031494, "learning_rate": 9.967950663773915e-07, "loss": 0.0223, "step": 128310 }, { "epoch": 1.371013408835942, "grad_norm": 0.056421030312776566, "learning_rate": 9.967931668827431e-07, "loss": 0.0566, "step": 128320 }, { "epoch": 1.3711202521502217, "grad_norm": 0.004922541324049234, "learning_rate": 9.967912668271779e-07, "loss": 0.0348, "step": 128330 }, { "epoch": 1.3712270954645014, "grad_norm": 2.14211106300354, "learning_rate": 9.967893662106976e-07, "loss": 0.0776, "step": 128340 }, { "epoch": 1.3713339387787808, "grad_norm": 0.4930363595485687, "learning_rate": 9.967874650333048e-07, "loss": 0.0322, "step": 128350 }, { "epoch": 1.3714407820930605, "grad_norm": 3.4608423709869385, "learning_rate": 9.967855632950012e-07, "loss": 0.0812, "step": 128360 }, { "epoch": 1.3715476254073402, "grad_norm": 3.382920503616333, "learning_rate": 9.967836609957895e-07, "loss": 0.0324, "step": 128370 }, { "epoch": 1.3716544687216197, "grad_norm": 0.0310994740575552, "learning_rate": 9.967817581356712e-07, "loss": 0.0365, "step": 128380 }, { "epoch": 1.3717613120358993, "grad_norm": 6.237912654876709, "learning_rate": 9.96779854714649e-07, "loss": 0.0895, "step": 128390 }, { "epoch": 1.371868155350179, "grad_norm": 0.3236502707004547, "learning_rate": 9.96777950732725e-07, "loss": 0.061, "step": 128400 }, { "epoch": 1.3719749986644585, "grad_norm": 2.801121234893799, "learning_rate": 9.96776046189901e-07, "loss": 0.0096, "step": 128410 }, { "epoch": 1.3720818419787382, "grad_norm": 0.903137743473053, "learning_rate": 9.967741410861795e-07, "loss": 0.0181, "step": 128420 }, { "epoch": 1.3721886852930179, "grad_norm": 0.02767924964427948, "learning_rate": 9.967722354215623e-07, "loss": 0.0178, "step": 128430 }, { "epoch": 1.3722955286072973, "grad_norm": 7.660426616668701, "learning_rate": 9.967703291960519e-07, "loss": 0.0782, "step": 128440 }, { "epoch": 1.372402371921577, "grad_norm": 1.2343724966049194, "learning_rate": 9.967684224096503e-07, "loss": 0.024, "step": 128450 }, { "epoch": 1.3725092152358567, "grad_norm": 0.34383082389831543, "learning_rate": 9.967665150623595e-07, "loss": 0.0302, "step": 128460 }, { "epoch": 1.3726160585501361, "grad_norm": 0.04925958439707756, "learning_rate": 9.96764607154182e-07, "loss": 0.0247, "step": 128470 }, { "epoch": 1.3727229018644158, "grad_norm": 2.33817195892334, "learning_rate": 9.967626986851196e-07, "loss": 0.0592, "step": 128480 }, { "epoch": 1.3728297451786955, "grad_norm": 4.171170711517334, "learning_rate": 9.967607896551748e-07, "loss": 0.0207, "step": 128490 }, { "epoch": 1.372936588492975, "grad_norm": 14.170524597167969, "learning_rate": 9.967588800643496e-07, "loss": 0.023, "step": 128500 }, { "epoch": 1.3730434318072546, "grad_norm": 6.993268013000488, "learning_rate": 9.967569699126458e-07, "loss": 0.0214, "step": 128510 }, { "epoch": 1.3731502751215343, "grad_norm": 0.10350260883569717, "learning_rate": 9.967550592000663e-07, "loss": 0.0283, "step": 128520 }, { "epoch": 1.3732571184358138, "grad_norm": 6.766661643981934, "learning_rate": 9.967531479266127e-07, "loss": 0.0127, "step": 128530 }, { "epoch": 1.3733639617500935, "grad_norm": 14.276144027709961, "learning_rate": 9.967512360922873e-07, "loss": 0.0738, "step": 128540 }, { "epoch": 1.3734708050643731, "grad_norm": 0.3321587145328522, "learning_rate": 9.96749323697092e-07, "loss": 0.0547, "step": 128550 }, { "epoch": 1.3735776483786526, "grad_norm": 2.999742031097412, "learning_rate": 9.967474107410297e-07, "loss": 0.0368, "step": 128560 }, { "epoch": 1.3736844916929323, "grad_norm": 4.5011677742004395, "learning_rate": 9.967454972241017e-07, "loss": 0.0809, "step": 128570 }, { "epoch": 1.373791335007212, "grad_norm": 0.5946535468101501, "learning_rate": 9.967435831463107e-07, "loss": 0.0232, "step": 128580 }, { "epoch": 1.3738981783214914, "grad_norm": 2.0739617347717285, "learning_rate": 9.967416685076587e-07, "loss": 0.0252, "step": 128590 }, { "epoch": 1.374005021635771, "grad_norm": 4.69243860244751, "learning_rate": 9.967397533081479e-07, "loss": 0.0165, "step": 128600 }, { "epoch": 1.3741118649500508, "grad_norm": 7.287538528442383, "learning_rate": 9.967378375477805e-07, "loss": 0.0363, "step": 128610 }, { "epoch": 1.3742187082643302, "grad_norm": 3.796154022216797, "learning_rate": 9.967359212265583e-07, "loss": 0.0526, "step": 128620 }, { "epoch": 1.37432555157861, "grad_norm": 0.0548153780400753, "learning_rate": 9.967340043444839e-07, "loss": 0.0156, "step": 128630 }, { "epoch": 1.3744323948928896, "grad_norm": 1.5659385919570923, "learning_rate": 9.967320869015595e-07, "loss": 0.0671, "step": 128640 }, { "epoch": 1.374539238207169, "grad_norm": 6.595181941986084, "learning_rate": 9.967301688977868e-07, "loss": 0.0903, "step": 128650 }, { "epoch": 1.3746460815214487, "grad_norm": 2.59708309173584, "learning_rate": 9.967282503331685e-07, "loss": 0.0342, "step": 128660 }, { "epoch": 1.3747529248357284, "grad_norm": 8.053049087524414, "learning_rate": 9.967263312077063e-07, "loss": 0.0692, "step": 128670 }, { "epoch": 1.3748597681500079, "grad_norm": 9.523689270019531, "learning_rate": 9.967244115214026e-07, "loss": 0.0294, "step": 128680 }, { "epoch": 1.3749666114642876, "grad_norm": 0.36432361602783203, "learning_rate": 9.967224912742596e-07, "loss": 0.0651, "step": 128690 }, { "epoch": 1.3750734547785672, "grad_norm": 5.1652140617370605, "learning_rate": 9.967205704662796e-07, "loss": 0.0292, "step": 128700 }, { "epoch": 1.375180298092847, "grad_norm": 11.265190124511719, "learning_rate": 9.967186490974644e-07, "loss": 0.0361, "step": 128710 }, { "epoch": 1.3752871414071264, "grad_norm": 9.449585914611816, "learning_rate": 9.967167271678162e-07, "loss": 0.028, "step": 128720 }, { "epoch": 1.375393984721406, "grad_norm": 6.451045513153076, "learning_rate": 9.967148046773378e-07, "loss": 0.0229, "step": 128730 }, { "epoch": 1.3755008280356857, "grad_norm": 0.13350117206573486, "learning_rate": 9.967128816260304e-07, "loss": 0.0154, "step": 128740 }, { "epoch": 1.3756076713499652, "grad_norm": 1.839178442955017, "learning_rate": 9.96710958013897e-07, "loss": 0.0384, "step": 128750 }, { "epoch": 1.3757145146642449, "grad_norm": 0.012342510744929314, "learning_rate": 9.967090338409391e-07, "loss": 0.0586, "step": 128760 }, { "epoch": 1.3758213579785246, "grad_norm": 3.1293063163757324, "learning_rate": 9.967071091071596e-07, "loss": 0.0537, "step": 128770 }, { "epoch": 1.375928201292804, "grad_norm": 0.16058547794818878, "learning_rate": 9.967051838125602e-07, "loss": 0.0159, "step": 128780 }, { "epoch": 1.3760350446070837, "grad_norm": 4.818582534790039, "learning_rate": 9.96703257957143e-07, "loss": 0.0425, "step": 128790 }, { "epoch": 1.3761418879213634, "grad_norm": 5.981171131134033, "learning_rate": 9.967013315409104e-07, "loss": 0.0131, "step": 128800 }, { "epoch": 1.376248731235643, "grad_norm": 0.07020442932844162, "learning_rate": 9.966994045638644e-07, "loss": 0.018, "step": 128810 }, { "epoch": 1.3763555745499225, "grad_norm": 2.3043060302734375, "learning_rate": 9.966974770260075e-07, "loss": 0.0535, "step": 128820 }, { "epoch": 1.3764624178642022, "grad_norm": 2.588749647140503, "learning_rate": 9.966955489273415e-07, "loss": 0.0382, "step": 128830 }, { "epoch": 1.3765692611784819, "grad_norm": 13.713677406311035, "learning_rate": 9.96693620267869e-07, "loss": 0.0365, "step": 128840 }, { "epoch": 1.3766761044927613, "grad_norm": 0.841738224029541, "learning_rate": 9.966916910475916e-07, "loss": 0.037, "step": 128850 }, { "epoch": 1.376782947807041, "grad_norm": 0.24602621793746948, "learning_rate": 9.966897612665119e-07, "loss": 0.0073, "step": 128860 }, { "epoch": 1.3768897911213207, "grad_norm": 1.107439637184143, "learning_rate": 9.96687830924632e-07, "loss": 0.0561, "step": 128870 }, { "epoch": 1.3769966344356002, "grad_norm": 0.3722865879535675, "learning_rate": 9.96685900021954e-07, "loss": 0.0085, "step": 128880 }, { "epoch": 1.3771034777498798, "grad_norm": 2.6961472034454346, "learning_rate": 9.966839685584802e-07, "loss": 0.0105, "step": 128890 }, { "epoch": 1.3772103210641595, "grad_norm": 0.1341029405593872, "learning_rate": 9.966820365342126e-07, "loss": 0.0781, "step": 128900 }, { "epoch": 1.377317164378439, "grad_norm": 4.719210147857666, "learning_rate": 9.966801039491536e-07, "loss": 0.0573, "step": 128910 }, { "epoch": 1.3774240076927187, "grad_norm": 2.52908992767334, "learning_rate": 9.966781708033052e-07, "loss": 0.0405, "step": 128920 }, { "epoch": 1.3775308510069983, "grad_norm": 5.022438049316406, "learning_rate": 9.966762370966698e-07, "loss": 0.018, "step": 128930 }, { "epoch": 1.3776376943212778, "grad_norm": 19.53101348876953, "learning_rate": 9.966743028292492e-07, "loss": 0.0182, "step": 128940 }, { "epoch": 1.3777445376355575, "grad_norm": 1.5119649171829224, "learning_rate": 9.966723680010461e-07, "loss": 0.0413, "step": 128950 }, { "epoch": 1.3778513809498372, "grad_norm": 3.474177122116089, "learning_rate": 9.966704326120622e-07, "loss": 0.0206, "step": 128960 }, { "epoch": 1.3779582242641166, "grad_norm": 0.12690728902816772, "learning_rate": 9.966684966623e-07, "loss": 0.0021, "step": 128970 }, { "epoch": 1.3780650675783963, "grad_norm": 2.011364698410034, "learning_rate": 9.966665601517616e-07, "loss": 0.022, "step": 128980 }, { "epoch": 1.378171910892676, "grad_norm": 0.43955525755882263, "learning_rate": 9.96664623080449e-07, "loss": 0.0402, "step": 128990 }, { "epoch": 1.3782787542069554, "grad_norm": 5.982486724853516, "learning_rate": 9.966626854483648e-07, "loss": 0.1095, "step": 129000 }, { "epoch": 1.3783855975212351, "grad_norm": 0.7200706601142883, "learning_rate": 9.966607472555108e-07, "loss": 0.0182, "step": 129010 }, { "epoch": 1.3784924408355148, "grad_norm": 1.9625928401947021, "learning_rate": 9.966588085018895e-07, "loss": 0.0143, "step": 129020 }, { "epoch": 1.3785992841497943, "grad_norm": 0.04381520301103592, "learning_rate": 9.966568691875029e-07, "loss": 0.0083, "step": 129030 }, { "epoch": 1.378706127464074, "grad_norm": 0.00500226765871048, "learning_rate": 9.96654929312353e-07, "loss": 0.0436, "step": 129040 }, { "epoch": 1.3788129707783536, "grad_norm": 19.69721794128418, "learning_rate": 9.966529888764422e-07, "loss": 0.1054, "step": 129050 }, { "epoch": 1.378919814092633, "grad_norm": 0.05976295843720436, "learning_rate": 9.966510478797728e-07, "loss": 0.012, "step": 129060 }, { "epoch": 1.3790266574069128, "grad_norm": 4.334234237670898, "learning_rate": 9.966491063223468e-07, "loss": 0.0432, "step": 129070 }, { "epoch": 1.3791335007211925, "grad_norm": 0.015720434486865997, "learning_rate": 9.966471642041668e-07, "loss": 0.0392, "step": 129080 }, { "epoch": 1.379240344035472, "grad_norm": 8.306329727172852, "learning_rate": 9.966452215252343e-07, "loss": 0.0383, "step": 129090 }, { "epoch": 1.3793471873497516, "grad_norm": 0.10268491506576538, "learning_rate": 9.96643278285552e-07, "loss": 0.0438, "step": 129100 }, { "epoch": 1.3794540306640313, "grad_norm": 9.400308609008789, "learning_rate": 9.96641334485122e-07, "loss": 0.0836, "step": 129110 }, { "epoch": 1.3795608739783107, "grad_norm": 3.862060785293579, "learning_rate": 9.966393901239462e-07, "loss": 0.0519, "step": 129120 }, { "epoch": 1.3796677172925904, "grad_norm": 0.2225164771080017, "learning_rate": 9.966374452020272e-07, "loss": 0.0139, "step": 129130 }, { "epoch": 1.37977456060687, "grad_norm": 6.063940525054932, "learning_rate": 9.966354997193671e-07, "loss": 0.0222, "step": 129140 }, { "epoch": 1.3798814039211496, "grad_norm": 0.016651228070259094, "learning_rate": 9.96633553675968e-07, "loss": 0.0029, "step": 129150 }, { "epoch": 1.3799882472354292, "grad_norm": 3.3193347454071045, "learning_rate": 9.96631607071832e-07, "loss": 0.0461, "step": 129160 }, { "epoch": 1.380095090549709, "grad_norm": 2.754101037979126, "learning_rate": 9.966296599069618e-07, "loss": 0.062, "step": 129170 }, { "epoch": 1.3802019338639884, "grad_norm": 10.0122652053833, "learning_rate": 9.96627712181359e-07, "loss": 0.0645, "step": 129180 }, { "epoch": 1.380308777178268, "grad_norm": 5.571641445159912, "learning_rate": 9.96625763895026e-07, "loss": 0.0639, "step": 129190 }, { "epoch": 1.3804156204925477, "grad_norm": 0.021886633709073067, "learning_rate": 9.966238150479652e-07, "loss": 0.0023, "step": 129200 }, { "epoch": 1.3805224638068272, "grad_norm": 0.20894800126552582, "learning_rate": 9.966218656401783e-07, "loss": 0.0394, "step": 129210 }, { "epoch": 1.3806293071211069, "grad_norm": 1.1811169385910034, "learning_rate": 9.966199156716682e-07, "loss": 0.0272, "step": 129220 }, { "epoch": 1.3807361504353866, "grad_norm": 6.5484395027160645, "learning_rate": 9.966179651424364e-07, "loss": 0.0433, "step": 129230 }, { "epoch": 1.380842993749666, "grad_norm": 0.12713292241096497, "learning_rate": 9.966160140524857e-07, "loss": 0.0178, "step": 129240 }, { "epoch": 1.3809498370639457, "grad_norm": 9.200103759765625, "learning_rate": 9.96614062401818e-07, "loss": 0.0311, "step": 129250 }, { "epoch": 1.3810566803782254, "grad_norm": 3.5459556579589844, "learning_rate": 9.966121101904355e-07, "loss": 0.0368, "step": 129260 }, { "epoch": 1.3811635236925048, "grad_norm": 2.3245720863342285, "learning_rate": 9.966101574183403e-07, "loss": 0.0714, "step": 129270 }, { "epoch": 1.3812703670067845, "grad_norm": 0.46666231751441956, "learning_rate": 9.96608204085535e-07, "loss": 0.0127, "step": 129280 }, { "epoch": 1.3813772103210642, "grad_norm": 0.01224899385124445, "learning_rate": 9.966062501920214e-07, "loss": 0.0424, "step": 129290 }, { "epoch": 1.3814840536353437, "grad_norm": 1.9504643678665161, "learning_rate": 9.966042957378018e-07, "loss": 0.0165, "step": 129300 }, { "epoch": 1.3815908969496233, "grad_norm": 0.5165127515792847, "learning_rate": 9.966023407228786e-07, "loss": 0.0643, "step": 129310 }, { "epoch": 1.381697740263903, "grad_norm": 6.06485652923584, "learning_rate": 9.966003851472539e-07, "loss": 0.0761, "step": 129320 }, { "epoch": 1.3818045835781825, "grad_norm": 0.6293399333953857, "learning_rate": 9.965984290109297e-07, "loss": 0.0313, "step": 129330 }, { "epoch": 1.3819114268924622, "grad_norm": 0.014205689541995525, "learning_rate": 9.965964723139087e-07, "loss": 0.0427, "step": 129340 }, { "epoch": 1.3820182702067418, "grad_norm": 0.1003466248512268, "learning_rate": 9.965945150561926e-07, "loss": 0.0148, "step": 129350 }, { "epoch": 1.3821251135210213, "grad_norm": 12.806446075439453, "learning_rate": 9.965925572377839e-07, "loss": 0.0444, "step": 129360 }, { "epoch": 1.382231956835301, "grad_norm": 4.410528182983398, "learning_rate": 9.965905988586846e-07, "loss": 0.0289, "step": 129370 }, { "epoch": 1.3823388001495807, "grad_norm": 0.09886306524276733, "learning_rate": 9.96588639918897e-07, "loss": 0.0147, "step": 129380 }, { "epoch": 1.3824456434638601, "grad_norm": 0.019259972497820854, "learning_rate": 9.965866804184236e-07, "loss": 0.0204, "step": 129390 }, { "epoch": 1.3825524867781398, "grad_norm": 0.8079134821891785, "learning_rate": 9.965847203572662e-07, "loss": 0.0248, "step": 129400 }, { "epoch": 1.3826593300924195, "grad_norm": 2.1668455600738525, "learning_rate": 9.96582759735427e-07, "loss": 0.0433, "step": 129410 }, { "epoch": 1.382766173406699, "grad_norm": 0.06676310300827026, "learning_rate": 9.965807985529087e-07, "loss": 0.0093, "step": 129420 }, { "epoch": 1.3828730167209786, "grad_norm": 0.03722551092505455, "learning_rate": 9.96578836809713e-07, "loss": 0.0354, "step": 129430 }, { "epoch": 1.3829798600352583, "grad_norm": 11.224090576171875, "learning_rate": 9.965768745058424e-07, "loss": 0.0641, "step": 129440 }, { "epoch": 1.383086703349538, "grad_norm": 4.260556697845459, "learning_rate": 9.965749116412989e-07, "loss": 0.0688, "step": 129450 }, { "epoch": 1.3831935466638174, "grad_norm": 0.4981793463230133, "learning_rate": 9.965729482160851e-07, "loss": 0.0116, "step": 129460 }, { "epoch": 1.3833003899780971, "grad_norm": 8.553163528442383, "learning_rate": 9.965709842302027e-07, "loss": 0.031, "step": 129470 }, { "epoch": 1.3834072332923768, "grad_norm": 1.6729636192321777, "learning_rate": 9.965690196836543e-07, "loss": 0.014, "step": 129480 }, { "epoch": 1.3835140766066563, "grad_norm": 0.8022716045379639, "learning_rate": 9.96567054576442e-07, "loss": 0.0243, "step": 129490 }, { "epoch": 1.383620919920936, "grad_norm": 2.5687503814697266, "learning_rate": 9.96565088908568e-07, "loss": 0.0083, "step": 129500 }, { "epoch": 1.3837277632352156, "grad_norm": 0.6772159337997437, "learning_rate": 9.965631226800347e-07, "loss": 0.0158, "step": 129510 }, { "epoch": 1.383834606549495, "grad_norm": 3.687039613723755, "learning_rate": 9.96561155890844e-07, "loss": 0.0567, "step": 129520 }, { "epoch": 1.3839414498637748, "grad_norm": 7.02087926864624, "learning_rate": 9.965591885409981e-07, "loss": 0.0528, "step": 129530 }, { "epoch": 1.3840482931780544, "grad_norm": 2.2684895992279053, "learning_rate": 9.965572206304997e-07, "loss": 0.0176, "step": 129540 }, { "epoch": 1.3841551364923341, "grad_norm": 5.166227340698242, "learning_rate": 9.965552521593506e-07, "loss": 0.0247, "step": 129550 }, { "epoch": 1.3842619798066136, "grad_norm": 4.22578763961792, "learning_rate": 9.965532831275532e-07, "loss": 0.0159, "step": 129560 }, { "epoch": 1.3843688231208933, "grad_norm": 5.762958526611328, "learning_rate": 9.965513135351095e-07, "loss": 0.0505, "step": 129570 }, { "epoch": 1.384475666435173, "grad_norm": 3.57148814201355, "learning_rate": 9.96549343382022e-07, "loss": 0.0324, "step": 129580 }, { "epoch": 1.3845825097494524, "grad_norm": 0.07574836909770966, "learning_rate": 9.96547372668293e-07, "loss": 0.0298, "step": 129590 }, { "epoch": 1.384689353063732, "grad_norm": 5.8864359855651855, "learning_rate": 9.965454013939243e-07, "loss": 0.0434, "step": 129600 }, { "epoch": 1.3847961963780118, "grad_norm": 1.360684871673584, "learning_rate": 9.965434295589183e-07, "loss": 0.0569, "step": 129610 }, { "epoch": 1.3849030396922912, "grad_norm": 2.5087568759918213, "learning_rate": 9.965414571632775e-07, "loss": 0.014, "step": 129620 }, { "epoch": 1.385009883006571, "grad_norm": 0.009985407814383507, "learning_rate": 9.965394842070038e-07, "loss": 0.0288, "step": 129630 }, { "epoch": 1.3851167263208506, "grad_norm": 4.036612510681152, "learning_rate": 9.965375106900996e-07, "loss": 0.0623, "step": 129640 }, { "epoch": 1.38522356963513, "grad_norm": 4.8232879638671875, "learning_rate": 9.96535536612567e-07, "loss": 0.03, "step": 129650 }, { "epoch": 1.3853304129494097, "grad_norm": 4.26101541519165, "learning_rate": 9.965335619744085e-07, "loss": 0.0371, "step": 129660 }, { "epoch": 1.3854372562636894, "grad_norm": 0.12874393165111542, "learning_rate": 9.965315867756259e-07, "loss": 0.0413, "step": 129670 }, { "epoch": 1.3855440995779689, "grad_norm": 2.7753806114196777, "learning_rate": 9.965296110162218e-07, "loss": 0.0762, "step": 129680 }, { "epoch": 1.3856509428922485, "grad_norm": 1.3177540302276611, "learning_rate": 9.965276346961982e-07, "loss": 0.0376, "step": 129690 }, { "epoch": 1.3857577862065282, "grad_norm": 6.057321071624756, "learning_rate": 9.965256578155575e-07, "loss": 0.0387, "step": 129700 }, { "epoch": 1.3858646295208077, "grad_norm": 0.33862197399139404, "learning_rate": 9.96523680374302e-07, "loss": 0.0254, "step": 129710 }, { "epoch": 1.3859714728350874, "grad_norm": 0.8943276405334473, "learning_rate": 9.965217023724335e-07, "loss": 0.0637, "step": 129720 }, { "epoch": 1.386078316149367, "grad_norm": 0.10702481865882874, "learning_rate": 9.965197238099547e-07, "loss": 0.0275, "step": 129730 }, { "epoch": 1.3861851594636465, "grad_norm": 0.04184112325310707, "learning_rate": 9.965177446868676e-07, "loss": 0.0144, "step": 129740 }, { "epoch": 1.3862920027779262, "grad_norm": 4.088136196136475, "learning_rate": 9.965157650031745e-07, "loss": 0.0223, "step": 129750 }, { "epoch": 1.3863988460922059, "grad_norm": 0.024656666442751884, "learning_rate": 9.965137847588775e-07, "loss": 0.0247, "step": 129760 }, { "epoch": 1.3865056894064853, "grad_norm": 3.874253034591675, "learning_rate": 9.965118039539792e-07, "loss": 0.0424, "step": 129770 }, { "epoch": 1.386612532720765, "grad_norm": 0.14259710907936096, "learning_rate": 9.965098225884816e-07, "loss": 0.0421, "step": 129780 }, { "epoch": 1.3867193760350447, "grad_norm": 1.3211925029754639, "learning_rate": 9.965078406623867e-07, "loss": 0.0092, "step": 129790 }, { "epoch": 1.3868262193493242, "grad_norm": 3.7965235710144043, "learning_rate": 9.96505858175697e-07, "loss": 0.0608, "step": 129800 }, { "epoch": 1.3869330626636038, "grad_norm": 4.772802352905273, "learning_rate": 9.96503875128415e-07, "loss": 0.0552, "step": 129810 }, { "epoch": 1.3870399059778835, "grad_norm": 13.722127914428711, "learning_rate": 9.965018915205425e-07, "loss": 0.0906, "step": 129820 }, { "epoch": 1.387146749292163, "grad_norm": 0.013906311243772507, "learning_rate": 9.964999073520818e-07, "loss": 0.0165, "step": 129830 }, { "epoch": 1.3872535926064427, "grad_norm": 9.49006462097168, "learning_rate": 9.964979226230353e-07, "loss": 0.0717, "step": 129840 }, { "epoch": 1.3873604359207223, "grad_norm": 8.305465698242188, "learning_rate": 9.964959373334052e-07, "loss": 0.0337, "step": 129850 }, { "epoch": 1.3874672792350018, "grad_norm": 2.1203291416168213, "learning_rate": 9.964939514831936e-07, "loss": 0.0136, "step": 129860 }, { "epoch": 1.3875741225492815, "grad_norm": 5.978506565093994, "learning_rate": 9.96491965072403e-07, "loss": 0.081, "step": 129870 }, { "epoch": 1.3876809658635612, "grad_norm": 0.8399505615234375, "learning_rate": 9.964899781010353e-07, "loss": 0.0476, "step": 129880 }, { "epoch": 1.3877878091778406, "grad_norm": 1.938448190689087, "learning_rate": 9.964879905690932e-07, "loss": 0.0178, "step": 129890 }, { "epoch": 1.3878946524921203, "grad_norm": 7.186792373657227, "learning_rate": 9.964860024765786e-07, "loss": 0.0357, "step": 129900 }, { "epoch": 1.3880014958064, "grad_norm": 2.985414743423462, "learning_rate": 9.964840138234937e-07, "loss": 0.0385, "step": 129910 }, { "epoch": 1.3881083391206794, "grad_norm": 14.17479133605957, "learning_rate": 9.964820246098412e-07, "loss": 0.0297, "step": 129920 }, { "epoch": 1.3882151824349591, "grad_norm": 2.936042547225952, "learning_rate": 9.964800348356227e-07, "loss": 0.0254, "step": 129930 }, { "epoch": 1.3883220257492388, "grad_norm": 0.39811402559280396, "learning_rate": 9.96478044500841e-07, "loss": 0.0137, "step": 129940 }, { "epoch": 1.3884288690635183, "grad_norm": 0.2887384593486786, "learning_rate": 9.964760536054979e-07, "loss": 0.0397, "step": 129950 }, { "epoch": 1.388535712377798, "grad_norm": 0.42248353362083435, "learning_rate": 9.96474062149596e-07, "loss": 0.0161, "step": 129960 }, { "epoch": 1.3886425556920776, "grad_norm": 1.7678035497665405, "learning_rate": 9.964720701331373e-07, "loss": 0.0229, "step": 129970 }, { "epoch": 1.388749399006357, "grad_norm": 0.7044579982757568, "learning_rate": 9.96470077556124e-07, "loss": 0.0023, "step": 129980 }, { "epoch": 1.3888562423206368, "grad_norm": 4.365147590637207, "learning_rate": 9.96468084418559e-07, "loss": 0.0312, "step": 129990 }, { "epoch": 1.3889630856349164, "grad_norm": 5.599092483520508, "learning_rate": 9.964660907204438e-07, "loss": 0.0166, "step": 130000 }, { "epoch": 1.389069928949196, "grad_norm": 0.125150665640831, "learning_rate": 9.964640964617809e-07, "loss": 0.0206, "step": 130010 }, { "epoch": 1.3891767722634756, "grad_norm": 0.03902437165379524, "learning_rate": 9.964621016425725e-07, "loss": 0.0074, "step": 130020 }, { "epoch": 1.3892836155777553, "grad_norm": 0.10155647248029709, "learning_rate": 9.96460106262821e-07, "loss": 0.0576, "step": 130030 }, { "epoch": 1.3893904588920347, "grad_norm": 0.17476707696914673, "learning_rate": 9.964581103225285e-07, "loss": 0.0225, "step": 130040 }, { "epoch": 1.3894973022063144, "grad_norm": 0.898273229598999, "learning_rate": 9.964561138216974e-07, "loss": 0.015, "step": 130050 }, { "epoch": 1.389604145520594, "grad_norm": 5.112331867218018, "learning_rate": 9.9645411676033e-07, "loss": 0.0388, "step": 130060 }, { "epoch": 1.3897109888348735, "grad_norm": 7.436931610107422, "learning_rate": 9.964521191384282e-07, "loss": 0.056, "step": 130070 }, { "epoch": 1.3898178321491532, "grad_norm": 0.5878011584281921, "learning_rate": 9.964501209559946e-07, "loss": 0.0314, "step": 130080 }, { "epoch": 1.389924675463433, "grad_norm": 0.8760228753089905, "learning_rate": 9.964481222130315e-07, "loss": 0.017, "step": 130090 }, { "epoch": 1.3900315187777124, "grad_norm": 4.314192295074463, "learning_rate": 9.964461229095407e-07, "loss": 0.1103, "step": 130100 }, { "epoch": 1.390138362091992, "grad_norm": 0.6152493953704834, "learning_rate": 9.96444123045525e-07, "loss": 0.0155, "step": 130110 }, { "epoch": 1.3902452054062717, "grad_norm": 0.3911454975605011, "learning_rate": 9.964421226209863e-07, "loss": 0.0141, "step": 130120 }, { "epoch": 1.3903520487205512, "grad_norm": 2.6691272258758545, "learning_rate": 9.96440121635927e-07, "loss": 0.0255, "step": 130130 }, { "epoch": 1.3904588920348309, "grad_norm": 0.1927473098039627, "learning_rate": 9.964381200903493e-07, "loss": 0.0182, "step": 130140 }, { "epoch": 1.3905657353491105, "grad_norm": 4.895778656005859, "learning_rate": 9.964361179842556e-07, "loss": 0.0076, "step": 130150 }, { "epoch": 1.39067257866339, "grad_norm": 8.541091918945312, "learning_rate": 9.964341153176478e-07, "loss": 0.023, "step": 130160 }, { "epoch": 1.3907794219776697, "grad_norm": 4.9705586433410645, "learning_rate": 9.964321120905287e-07, "loss": 0.0325, "step": 130170 }, { "epoch": 1.3908862652919494, "grad_norm": 0.7919485569000244, "learning_rate": 9.964301083029e-07, "loss": 0.0313, "step": 130180 }, { "epoch": 1.390993108606229, "grad_norm": 3.8202123641967773, "learning_rate": 9.964281039547646e-07, "loss": 0.0123, "step": 130190 }, { "epoch": 1.3910999519205085, "grad_norm": 0.6398949027061462, "learning_rate": 9.964260990461241e-07, "loss": 0.0198, "step": 130200 }, { "epoch": 1.3912067952347882, "grad_norm": 7.936254024505615, "learning_rate": 9.964240935769812e-07, "loss": 0.0462, "step": 130210 }, { "epoch": 1.3913136385490679, "grad_norm": 1.9019811153411865, "learning_rate": 9.96422087547338e-07, "loss": 0.0745, "step": 130220 }, { "epoch": 1.3914204818633473, "grad_norm": 0.0064164213836193085, "learning_rate": 9.964200809571969e-07, "loss": 0.0084, "step": 130230 }, { "epoch": 1.391527325177627, "grad_norm": 6.171793460845947, "learning_rate": 9.9641807380656e-07, "loss": 0.0566, "step": 130240 }, { "epoch": 1.3916341684919067, "grad_norm": 2.262101411819458, "learning_rate": 9.964160660954294e-07, "loss": 0.038, "step": 130250 }, { "epoch": 1.3917410118061861, "grad_norm": 6.20227575302124, "learning_rate": 9.96414057823808e-07, "loss": 0.0907, "step": 130260 }, { "epoch": 1.3918478551204658, "grad_norm": 6.741557598114014, "learning_rate": 9.964120489916975e-07, "loss": 0.0513, "step": 130270 }, { "epoch": 1.3919546984347455, "grad_norm": 0.008693177253007889, "learning_rate": 9.964100395991003e-07, "loss": 0.0599, "step": 130280 }, { "epoch": 1.3920615417490252, "grad_norm": 12.056193351745605, "learning_rate": 9.964080296460186e-07, "loss": 0.0275, "step": 130290 }, { "epoch": 1.3921683850633046, "grad_norm": 1.063791036605835, "learning_rate": 9.964060191324548e-07, "loss": 0.0407, "step": 130300 }, { "epoch": 1.3922752283775843, "grad_norm": 6.803369045257568, "learning_rate": 9.964040080584112e-07, "loss": 0.0244, "step": 130310 }, { "epoch": 1.392382071691864, "grad_norm": 1.9069766998291016, "learning_rate": 9.9640199642389e-07, "loss": 0.019, "step": 130320 }, { "epoch": 1.3924889150061435, "grad_norm": 5.9735565185546875, "learning_rate": 9.963999842288936e-07, "loss": 0.0381, "step": 130330 }, { "epoch": 1.3925957583204231, "grad_norm": 2.07733416557312, "learning_rate": 9.96397971473424e-07, "loss": 0.0237, "step": 130340 }, { "epoch": 1.3927026016347028, "grad_norm": 3.4242045879364014, "learning_rate": 9.963959581574836e-07, "loss": 0.0194, "step": 130350 }, { "epoch": 1.3928094449489823, "grad_norm": 0.007955270819365978, "learning_rate": 9.963939442810747e-07, "loss": 0.0278, "step": 130360 }, { "epoch": 1.392916288263262, "grad_norm": 8.849594116210938, "learning_rate": 9.963919298441996e-07, "loss": 0.0624, "step": 130370 }, { "epoch": 1.3930231315775417, "grad_norm": 2.385744571685791, "learning_rate": 9.963899148468607e-07, "loss": 0.0417, "step": 130380 }, { "epoch": 1.393129974891821, "grad_norm": 0.27029189467430115, "learning_rate": 9.963878992890599e-07, "loss": 0.125, "step": 130390 }, { "epoch": 1.3932368182061008, "grad_norm": 8.18493366241455, "learning_rate": 9.963858831707998e-07, "loss": 0.0359, "step": 130400 }, { "epoch": 1.3933436615203805, "grad_norm": 1.3897218704223633, "learning_rate": 9.963838664920828e-07, "loss": 0.0406, "step": 130410 }, { "epoch": 1.39345050483466, "grad_norm": 3.5235557556152344, "learning_rate": 9.963818492529105e-07, "loss": 0.0399, "step": 130420 }, { "epoch": 1.3935573481489396, "grad_norm": 2.619105815887451, "learning_rate": 9.963798314532858e-07, "loss": 0.0054, "step": 130430 }, { "epoch": 1.3936641914632193, "grad_norm": 0.4996016323566437, "learning_rate": 9.96377813093211e-07, "loss": 0.0517, "step": 130440 }, { "epoch": 1.3937710347774988, "grad_norm": 1.0012671947479248, "learning_rate": 9.96375794172688e-07, "loss": 0.0408, "step": 130450 }, { "epoch": 1.3938778780917784, "grad_norm": 16.193286895751953, "learning_rate": 9.963737746917193e-07, "loss": 0.0649, "step": 130460 }, { "epoch": 1.3939847214060581, "grad_norm": 2.783461093902588, "learning_rate": 9.963717546503071e-07, "loss": 0.0165, "step": 130470 }, { "epoch": 1.3940915647203376, "grad_norm": 0.43880128860473633, "learning_rate": 9.963697340484539e-07, "loss": 0.0292, "step": 130480 }, { "epoch": 1.3941984080346173, "grad_norm": 6.279696464538574, "learning_rate": 9.963677128861617e-07, "loss": 0.048, "step": 130490 }, { "epoch": 1.394305251348897, "grad_norm": 0.27096644043922424, "learning_rate": 9.963656911634327e-07, "loss": 0.0085, "step": 130500 }, { "epoch": 1.3944120946631764, "grad_norm": 0.058275528252124786, "learning_rate": 9.963636688802697e-07, "loss": 0.0197, "step": 130510 }, { "epoch": 1.394518937977456, "grad_norm": 0.9620921611785889, "learning_rate": 9.963616460366745e-07, "loss": 0.0776, "step": 130520 }, { "epoch": 1.3946257812917358, "grad_norm": 11.262846946716309, "learning_rate": 9.963596226326495e-07, "loss": 0.061, "step": 130530 }, { "epoch": 1.3947326246060152, "grad_norm": 0.39716067910194397, "learning_rate": 9.963575986681972e-07, "loss": 0.0284, "step": 130540 }, { "epoch": 1.394839467920295, "grad_norm": 3.4293620586395264, "learning_rate": 9.963555741433196e-07, "loss": 0.0363, "step": 130550 }, { "epoch": 1.3949463112345746, "grad_norm": 0.08236522227525711, "learning_rate": 9.96353549058019e-07, "loss": 0.0544, "step": 130560 }, { "epoch": 1.395053154548854, "grad_norm": 4.512579441070557, "learning_rate": 9.963515234122979e-07, "loss": 0.0549, "step": 130570 }, { "epoch": 1.3951599978631337, "grad_norm": 6.310974597930908, "learning_rate": 9.963494972061583e-07, "loss": 0.0704, "step": 130580 }, { "epoch": 1.3952668411774134, "grad_norm": 2.0959692001342773, "learning_rate": 9.963474704396028e-07, "loss": 0.0197, "step": 130590 }, { "epoch": 1.3953736844916929, "grad_norm": 4.6674885749816895, "learning_rate": 9.963454431126337e-07, "loss": 0.0447, "step": 130600 }, { "epoch": 1.3954805278059725, "grad_norm": 3.9438529014587402, "learning_rate": 9.963434152252529e-07, "loss": 0.0549, "step": 130610 }, { "epoch": 1.3955873711202522, "grad_norm": 2.5902326107025146, "learning_rate": 9.963413867774628e-07, "loss": 0.0471, "step": 130620 }, { "epoch": 1.3956942144345317, "grad_norm": 9.620759010314941, "learning_rate": 9.96339357769266e-07, "loss": 0.0359, "step": 130630 }, { "epoch": 1.3958010577488114, "grad_norm": 0.8316051959991455, "learning_rate": 9.963373282006648e-07, "loss": 0.0743, "step": 130640 }, { "epoch": 1.395907901063091, "grad_norm": 14.059501647949219, "learning_rate": 9.963352980716611e-07, "loss": 0.0348, "step": 130650 }, { "epoch": 1.3960147443773705, "grad_norm": 6.707486152648926, "learning_rate": 9.963332673822573e-07, "loss": 0.0304, "step": 130660 }, { "epoch": 1.3961215876916502, "grad_norm": 1.1120156049728394, "learning_rate": 9.963312361324557e-07, "loss": 0.0095, "step": 130670 }, { "epoch": 1.3962284310059299, "grad_norm": 3.5351786613464355, "learning_rate": 9.963292043222591e-07, "loss": 0.0165, "step": 130680 }, { "epoch": 1.3963352743202093, "grad_norm": 12.244812965393066, "learning_rate": 9.963271719516691e-07, "loss": 0.0399, "step": 130690 }, { "epoch": 1.396442117634489, "grad_norm": 1.8878635168075562, "learning_rate": 9.963251390206883e-07, "loss": 0.0678, "step": 130700 }, { "epoch": 1.3965489609487687, "grad_norm": 0.26865166425704956, "learning_rate": 9.963231055293189e-07, "loss": 0.0528, "step": 130710 }, { "epoch": 1.3966558042630481, "grad_norm": 3.2156221866607666, "learning_rate": 9.963210714775633e-07, "loss": 0.0527, "step": 130720 }, { "epoch": 1.3967626475773278, "grad_norm": 4.575243949890137, "learning_rate": 9.963190368654236e-07, "loss": 0.0754, "step": 130730 }, { "epoch": 1.3968694908916075, "grad_norm": 4.531403064727783, "learning_rate": 9.963170016929026e-07, "loss": 0.0669, "step": 130740 }, { "epoch": 1.396976334205887, "grad_norm": 0.06867901980876923, "learning_rate": 9.963149659600018e-07, "loss": 0.0078, "step": 130750 }, { "epoch": 1.3970831775201666, "grad_norm": 0.27784112095832825, "learning_rate": 9.963129296667243e-07, "loss": 0.0303, "step": 130760 }, { "epoch": 1.3971900208344463, "grad_norm": 5.877257347106934, "learning_rate": 9.963108928130718e-07, "loss": 0.0534, "step": 130770 }, { "epoch": 1.3972968641487258, "grad_norm": 0.08344743400812149, "learning_rate": 9.96308855399047e-07, "loss": 0.031, "step": 130780 }, { "epoch": 1.3974037074630055, "grad_norm": 4.286351203918457, "learning_rate": 9.96306817424652e-07, "loss": 0.078, "step": 130790 }, { "epoch": 1.3975105507772851, "grad_norm": 10.150189399719238, "learning_rate": 9.963047788898893e-07, "loss": 0.0144, "step": 130800 }, { "epoch": 1.3976173940915646, "grad_norm": 0.790804386138916, "learning_rate": 9.963027397947608e-07, "loss": 0.0444, "step": 130810 }, { "epoch": 1.3977242374058443, "grad_norm": 0.012784193269908428, "learning_rate": 9.963007001392693e-07, "loss": 0.0053, "step": 130820 }, { "epoch": 1.397831080720124, "grad_norm": 0.10945644974708557, "learning_rate": 9.962986599234167e-07, "loss": 0.0546, "step": 130830 }, { "epoch": 1.3979379240344034, "grad_norm": 6.737214088439941, "learning_rate": 9.962966191472054e-07, "loss": 0.0559, "step": 130840 }, { "epoch": 1.398044767348683, "grad_norm": 0.9377400279045105, "learning_rate": 9.962945778106377e-07, "loss": 0.0215, "step": 130850 }, { "epoch": 1.3981516106629628, "grad_norm": 7.787601470947266, "learning_rate": 9.962925359137163e-07, "loss": 0.0053, "step": 130860 }, { "epoch": 1.3982584539772422, "grad_norm": 10.29105281829834, "learning_rate": 9.96290493456443e-07, "loss": 0.0258, "step": 130870 }, { "epoch": 1.398365297291522, "grad_norm": 5.238994598388672, "learning_rate": 9.9628845043882e-07, "loss": 0.0453, "step": 130880 }, { "epoch": 1.3984721406058016, "grad_norm": 14.166131973266602, "learning_rate": 9.9628640686085e-07, "loss": 0.0619, "step": 130890 }, { "epoch": 1.398578983920081, "grad_norm": 0.6999537348747253, "learning_rate": 9.962843627225355e-07, "loss": 0.0248, "step": 130900 }, { "epoch": 1.3986858272343607, "grad_norm": 1.000624656677246, "learning_rate": 9.962823180238784e-07, "loss": 0.0395, "step": 130910 }, { "epoch": 1.3987926705486404, "grad_norm": 0.08287576586008072, "learning_rate": 9.962802727648808e-07, "loss": 0.0438, "step": 130920 }, { "epoch": 1.39889951386292, "grad_norm": 0.1131521686911583, "learning_rate": 9.962782269455457e-07, "loss": 0.0191, "step": 130930 }, { "epoch": 1.3990063571771996, "grad_norm": 0.1659030020236969, "learning_rate": 9.962761805658749e-07, "loss": 0.0387, "step": 130940 }, { "epoch": 1.3991132004914792, "grad_norm": 0.23153860867023468, "learning_rate": 9.962741336258707e-07, "loss": 0.0195, "step": 130950 }, { "epoch": 1.399220043805759, "grad_norm": 2.8078205585479736, "learning_rate": 9.962720861255356e-07, "loss": 0.0164, "step": 130960 }, { "epoch": 1.3993268871200384, "grad_norm": 1.4007678031921387, "learning_rate": 9.96270038064872e-07, "loss": 0.0134, "step": 130970 }, { "epoch": 1.399433730434318, "grad_norm": 0.30649545788764954, "learning_rate": 9.962679894438818e-07, "loss": 0.0094, "step": 130980 }, { "epoch": 1.3995405737485977, "grad_norm": 9.640520095825195, "learning_rate": 9.962659402625677e-07, "loss": 0.0406, "step": 130990 }, { "epoch": 1.3996474170628772, "grad_norm": 0.3869655728340149, "learning_rate": 9.96263890520932e-07, "loss": 0.05, "step": 131000 }, { "epoch": 1.399754260377157, "grad_norm": 1.9655461311340332, "learning_rate": 9.96261840218977e-07, "loss": 0.0444, "step": 131010 }, { "epoch": 1.3998611036914366, "grad_norm": 0.2736366093158722, "learning_rate": 9.962597893567047e-07, "loss": 0.0184, "step": 131020 }, { "epoch": 1.3999679470057163, "grad_norm": 0.33039867877960205, "learning_rate": 9.962577379341177e-07, "loss": 0.038, "step": 131030 }, { "epoch": 1.4000747903199957, "grad_norm": 1.5293875932693481, "learning_rate": 9.962556859512184e-07, "loss": 0.0268, "step": 131040 }, { "epoch": 1.4001816336342754, "grad_norm": 1.7199467420578003, "learning_rate": 9.96253633408009e-07, "loss": 0.0472, "step": 131050 }, { "epoch": 1.400288476948555, "grad_norm": 2.7588584423065186, "learning_rate": 9.962515803044916e-07, "loss": 0.0175, "step": 131060 }, { "epoch": 1.4003953202628345, "grad_norm": 3.643770456314087, "learning_rate": 9.962495266406688e-07, "loss": 0.011, "step": 131070 }, { "epoch": 1.4005021635771142, "grad_norm": 1.4752651453018188, "learning_rate": 9.962474724165426e-07, "loss": 0.0472, "step": 131080 }, { "epoch": 1.400609006891394, "grad_norm": 4.200893402099609, "learning_rate": 9.962454176321157e-07, "loss": 0.0312, "step": 131090 }, { "epoch": 1.4007158502056734, "grad_norm": 6.38267707824707, "learning_rate": 9.962433622873905e-07, "loss": 0.033, "step": 131100 }, { "epoch": 1.400822693519953, "grad_norm": 0.07479190826416016, "learning_rate": 9.962413063823689e-07, "loss": 0.0268, "step": 131110 }, { "epoch": 1.4009295368342327, "grad_norm": 0.09865040332078934, "learning_rate": 9.962392499170534e-07, "loss": 0.0265, "step": 131120 }, { "epoch": 1.4010363801485122, "grad_norm": 5.5091400146484375, "learning_rate": 9.962371928914464e-07, "loss": 0.0281, "step": 131130 }, { "epoch": 1.4011432234627919, "grad_norm": 1.0686372518539429, "learning_rate": 9.9623513530555e-07, "loss": 0.0068, "step": 131140 }, { "epoch": 1.4012500667770715, "grad_norm": 0.02662944234907627, "learning_rate": 9.962330771593668e-07, "loss": 0.0285, "step": 131150 }, { "epoch": 1.401356910091351, "grad_norm": 5.570668697357178, "learning_rate": 9.962310184528992e-07, "loss": 0.0236, "step": 131160 }, { "epoch": 1.4014637534056307, "grad_norm": 5.1153883934021, "learning_rate": 9.96228959186149e-07, "loss": 0.0159, "step": 131170 }, { "epoch": 1.4015705967199104, "grad_norm": 0.10646345466375351, "learning_rate": 9.96226899359119e-07, "loss": 0.0786, "step": 131180 }, { "epoch": 1.4016774400341898, "grad_norm": 0.026206593960523605, "learning_rate": 9.962248389718115e-07, "loss": 0.0026, "step": 131190 }, { "epoch": 1.4017842833484695, "grad_norm": 0.08546132594347, "learning_rate": 9.962227780242285e-07, "loss": 0.0536, "step": 131200 }, { "epoch": 1.4018911266627492, "grad_norm": 0.7164894938468933, "learning_rate": 9.962207165163725e-07, "loss": 0.0264, "step": 131210 }, { "epoch": 1.4019979699770286, "grad_norm": 1.3021163940429688, "learning_rate": 9.96218654448246e-07, "loss": 0.0109, "step": 131220 }, { "epoch": 1.4021048132913083, "grad_norm": 8.603438377380371, "learning_rate": 9.962165918198513e-07, "loss": 0.0287, "step": 131230 }, { "epoch": 1.402211656605588, "grad_norm": 0.12555570900440216, "learning_rate": 9.962145286311902e-07, "loss": 0.0356, "step": 131240 }, { "epoch": 1.4023184999198675, "grad_norm": 6.461489677429199, "learning_rate": 9.96212464882266e-07, "loss": 0.0889, "step": 131250 }, { "epoch": 1.4024253432341471, "grad_norm": 1.060863971710205, "learning_rate": 9.9621040057308e-07, "loss": 0.021, "step": 131260 }, { "epoch": 1.4025321865484268, "grad_norm": 0.7497643232345581, "learning_rate": 9.962083357036353e-07, "loss": 0.0412, "step": 131270 }, { "epoch": 1.4026390298627063, "grad_norm": 2.49415922164917, "learning_rate": 9.962062702739338e-07, "loss": 0.0173, "step": 131280 }, { "epoch": 1.402745873176986, "grad_norm": 3.000371217727661, "learning_rate": 9.96204204283978e-07, "loss": 0.0367, "step": 131290 }, { "epoch": 1.4028527164912656, "grad_norm": 0.11918386071920395, "learning_rate": 9.962021377337701e-07, "loss": 0.0549, "step": 131300 }, { "epoch": 1.402959559805545, "grad_norm": 1.4285115003585815, "learning_rate": 9.962000706233126e-07, "loss": 0.0083, "step": 131310 }, { "epoch": 1.4030664031198248, "grad_norm": 9.413222312927246, "learning_rate": 9.961980029526078e-07, "loss": 0.0713, "step": 131320 }, { "epoch": 1.4031732464341045, "grad_norm": 0.11664792150259018, "learning_rate": 9.961959347216583e-07, "loss": 0.0211, "step": 131330 }, { "epoch": 1.403280089748384, "grad_norm": 6.370896339416504, "learning_rate": 9.961938659304657e-07, "loss": 0.0461, "step": 131340 }, { "epoch": 1.4033869330626636, "grad_norm": 4.160171985626221, "learning_rate": 9.96191796579033e-07, "loss": 0.0594, "step": 131350 }, { "epoch": 1.4034937763769433, "grad_norm": 3.2851924896240234, "learning_rate": 9.961897266673623e-07, "loss": 0.0436, "step": 131360 }, { "epoch": 1.4036006196912227, "grad_norm": 1.0347856283187866, "learning_rate": 9.961876561954558e-07, "loss": 0.0601, "step": 131370 }, { "epoch": 1.4037074630055024, "grad_norm": 2.5199201107025146, "learning_rate": 9.96185585163316e-07, "loss": 0.0803, "step": 131380 }, { "epoch": 1.403814306319782, "grad_norm": 4.225775718688965, "learning_rate": 9.961835135709454e-07, "loss": 0.0204, "step": 131390 }, { "epoch": 1.4039211496340616, "grad_norm": 11.558602333068848, "learning_rate": 9.96181441418346e-07, "loss": 0.0893, "step": 131400 }, { "epoch": 1.4040279929483412, "grad_norm": 1.2675811052322388, "learning_rate": 9.961793687055204e-07, "loss": 0.0614, "step": 131410 }, { "epoch": 1.404134836262621, "grad_norm": 6.468095302581787, "learning_rate": 9.961772954324709e-07, "loss": 0.033, "step": 131420 }, { "epoch": 1.4042416795769004, "grad_norm": 6.513443946838379, "learning_rate": 9.961752215991997e-07, "loss": 0.0328, "step": 131430 }, { "epoch": 1.40434852289118, "grad_norm": 0.04507727548480034, "learning_rate": 9.961731472057091e-07, "loss": 0.0549, "step": 131440 }, { "epoch": 1.4044553662054597, "grad_norm": 9.761120796203613, "learning_rate": 9.961710722520018e-07, "loss": 0.06, "step": 131450 }, { "epoch": 1.4045622095197392, "grad_norm": 0.15542609989643097, "learning_rate": 9.961689967380798e-07, "loss": 0.0254, "step": 131460 }, { "epoch": 1.4046690528340189, "grad_norm": 6.147560119628906, "learning_rate": 9.961669206639457e-07, "loss": 0.0306, "step": 131470 }, { "epoch": 1.4047758961482986, "grad_norm": 0.170024573802948, "learning_rate": 9.961648440296015e-07, "loss": 0.0216, "step": 131480 }, { "epoch": 1.404882739462578, "grad_norm": 0.613309919834137, "learning_rate": 9.961627668350497e-07, "loss": 0.0254, "step": 131490 }, { "epoch": 1.4049895827768577, "grad_norm": 3.492236614227295, "learning_rate": 9.96160689080293e-07, "loss": 0.0166, "step": 131500 }, { "epoch": 1.4050964260911374, "grad_norm": 0.05746861547231674, "learning_rate": 9.961586107653331e-07, "loss": 0.016, "step": 131510 }, { "epoch": 1.4052032694054168, "grad_norm": 8.548062324523926, "learning_rate": 9.961565318901728e-07, "loss": 0.0094, "step": 131520 }, { "epoch": 1.4053101127196965, "grad_norm": 4.885876178741455, "learning_rate": 9.961544524548144e-07, "loss": 0.0329, "step": 131530 }, { "epoch": 1.4054169560339762, "grad_norm": 0.05036768689751625, "learning_rate": 9.961523724592601e-07, "loss": 0.1016, "step": 131540 }, { "epoch": 1.4055237993482557, "grad_norm": 9.998749732971191, "learning_rate": 9.961502919035123e-07, "loss": 0.0251, "step": 131550 }, { "epoch": 1.4056306426625353, "grad_norm": 4.6749267578125, "learning_rate": 9.961482107875734e-07, "loss": 0.0301, "step": 131560 }, { "epoch": 1.405737485976815, "grad_norm": 0.8403929471969604, "learning_rate": 9.961461291114458e-07, "loss": 0.0268, "step": 131570 }, { "epoch": 1.4058443292910945, "grad_norm": 0.30270758271217346, "learning_rate": 9.961440468751316e-07, "loss": 0.0577, "step": 131580 }, { "epoch": 1.4059511726053742, "grad_norm": 0.03734298795461655, "learning_rate": 9.961419640786335e-07, "loss": 0.0094, "step": 131590 }, { "epoch": 1.4060580159196538, "grad_norm": 4.845508098602295, "learning_rate": 9.961398807219536e-07, "loss": 0.0158, "step": 131600 }, { "epoch": 1.4061648592339333, "grad_norm": 7.956757545471191, "learning_rate": 9.961377968050944e-07, "loss": 0.0468, "step": 131610 }, { "epoch": 1.406271702548213, "grad_norm": 2.4329757690429688, "learning_rate": 9.961357123280582e-07, "loss": 0.0252, "step": 131620 }, { "epoch": 1.4063785458624927, "grad_norm": 1.7312828302383423, "learning_rate": 9.961336272908472e-07, "loss": 0.0773, "step": 131630 }, { "epoch": 1.4064853891767721, "grad_norm": 0.029730675742030144, "learning_rate": 9.96131541693464e-07, "loss": 0.003, "step": 131640 }, { "epoch": 1.4065922324910518, "grad_norm": 0.11076144874095917, "learning_rate": 9.961294555359109e-07, "loss": 0.0836, "step": 131650 }, { "epoch": 1.4066990758053315, "grad_norm": 2.5992431640625, "learning_rate": 9.9612736881819e-07, "loss": 0.0359, "step": 131660 }, { "epoch": 1.4068059191196112, "grad_norm": 2.1440775394439697, "learning_rate": 9.96125281540304e-07, "loss": 0.1138, "step": 131670 }, { "epoch": 1.4069127624338906, "grad_norm": 1.9772785902023315, "learning_rate": 9.961231937022552e-07, "loss": 0.0123, "step": 131680 }, { "epoch": 1.4070196057481703, "grad_norm": 6.647693634033203, "learning_rate": 9.961211053040455e-07, "loss": 0.0229, "step": 131690 }, { "epoch": 1.40712644906245, "grad_norm": 9.135759353637695, "learning_rate": 9.96119016345678e-07, "loss": 0.0392, "step": 131700 }, { "epoch": 1.4072332923767294, "grad_norm": 0.02379176951944828, "learning_rate": 9.961169268271546e-07, "loss": 0.0116, "step": 131710 }, { "epoch": 1.4073401356910091, "grad_norm": 7.832708358764648, "learning_rate": 9.961148367484776e-07, "loss": 0.0372, "step": 131720 }, { "epoch": 1.4074469790052888, "grad_norm": 3.6521341800689697, "learning_rate": 9.961127461096496e-07, "loss": 0.0599, "step": 131730 }, { "epoch": 1.4075538223195683, "grad_norm": 2.9622251987457275, "learning_rate": 9.961106549106728e-07, "loss": 0.0309, "step": 131740 }, { "epoch": 1.407660665633848, "grad_norm": 0.044752903282642365, "learning_rate": 9.961085631515498e-07, "loss": 0.0199, "step": 131750 }, { "epoch": 1.4077675089481276, "grad_norm": 0.12305036187171936, "learning_rate": 9.961064708322828e-07, "loss": 0.1029, "step": 131760 }, { "epoch": 1.4078743522624073, "grad_norm": 2.872692108154297, "learning_rate": 9.96104377952874e-07, "loss": 0.0383, "step": 131770 }, { "epoch": 1.4079811955766868, "grad_norm": 0.051008276641368866, "learning_rate": 9.96102284513326e-07, "loss": 0.0344, "step": 131780 }, { "epoch": 1.4080880388909665, "grad_norm": 1.2301472425460815, "learning_rate": 9.961001905136411e-07, "loss": 0.0149, "step": 131790 }, { "epoch": 1.4081948822052461, "grad_norm": 0.3801259398460388, "learning_rate": 9.960980959538214e-07, "loss": 0.0754, "step": 131800 }, { "epoch": 1.4083017255195256, "grad_norm": 0.052568670362234116, "learning_rate": 9.960960008338696e-07, "loss": 0.0236, "step": 131810 }, { "epoch": 1.4084085688338053, "grad_norm": 0.9669118523597717, "learning_rate": 9.960939051537883e-07, "loss": 0.0199, "step": 131820 }, { "epoch": 1.408515412148085, "grad_norm": 3.6152760982513428, "learning_rate": 9.960918089135793e-07, "loss": 0.0444, "step": 131830 }, { "epoch": 1.4086222554623644, "grad_norm": 2.97776198387146, "learning_rate": 9.960897121132452e-07, "loss": 0.0115, "step": 131840 }, { "epoch": 1.408729098776644, "grad_norm": 4.794820785522461, "learning_rate": 9.960876147527883e-07, "loss": 0.0161, "step": 131850 }, { "epoch": 1.4088359420909238, "grad_norm": 0.2528463304042816, "learning_rate": 9.96085516832211e-07, "loss": 0.014, "step": 131860 }, { "epoch": 1.4089427854052032, "grad_norm": 7.022368431091309, "learning_rate": 9.960834183515158e-07, "loss": 0.0211, "step": 131870 }, { "epoch": 1.409049628719483, "grad_norm": 5.8858160972595215, "learning_rate": 9.960813193107052e-07, "loss": 0.0329, "step": 131880 }, { "epoch": 1.4091564720337626, "grad_norm": 2.638434886932373, "learning_rate": 9.960792197097812e-07, "loss": 0.0243, "step": 131890 }, { "epoch": 1.409263315348042, "grad_norm": 0.04318838566541672, "learning_rate": 9.960771195487463e-07, "loss": 0.0238, "step": 131900 }, { "epoch": 1.4093701586623217, "grad_norm": 0.9327917098999023, "learning_rate": 9.960750188276027e-07, "loss": 0.027, "step": 131910 }, { "epoch": 1.4094770019766014, "grad_norm": 3.67024564743042, "learning_rate": 9.960729175463532e-07, "loss": 0.0628, "step": 131920 }, { "epoch": 1.4095838452908809, "grad_norm": 0.8111317753791809, "learning_rate": 9.96070815705e-07, "loss": 0.0321, "step": 131930 }, { "epoch": 1.4096906886051606, "grad_norm": 0.2440461814403534, "learning_rate": 9.960687133035453e-07, "loss": 0.0179, "step": 131940 }, { "epoch": 1.4097975319194402, "grad_norm": 3.9371213912963867, "learning_rate": 9.960666103419915e-07, "loss": 0.046, "step": 131950 }, { "epoch": 1.4099043752337197, "grad_norm": 1.9985533952713013, "learning_rate": 9.960645068203411e-07, "loss": 0.0232, "step": 131960 }, { "epoch": 1.4100112185479994, "grad_norm": 0.03694488853216171, "learning_rate": 9.960624027385965e-07, "loss": 0.1253, "step": 131970 }, { "epoch": 1.410118061862279, "grad_norm": 2.1261401176452637, "learning_rate": 9.960602980967598e-07, "loss": 0.0362, "step": 131980 }, { "epoch": 1.4102249051765585, "grad_norm": 3.9931230545043945, "learning_rate": 9.960581928948338e-07, "loss": 0.0167, "step": 131990 }, { "epoch": 1.4103317484908382, "grad_norm": 0.6516847610473633, "learning_rate": 9.960560871328206e-07, "loss": 0.0223, "step": 132000 }, { "epoch": 1.4104385918051179, "grad_norm": 0.008350753225386143, "learning_rate": 9.960539808107228e-07, "loss": 0.0191, "step": 132010 }, { "epoch": 1.4105454351193973, "grad_norm": 9.616242408752441, "learning_rate": 9.960518739285424e-07, "loss": 0.0228, "step": 132020 }, { "epoch": 1.410652278433677, "grad_norm": 0.02932681515812874, "learning_rate": 9.96049766486282e-07, "loss": 0.0394, "step": 132030 }, { "epoch": 1.4107591217479567, "grad_norm": 4.098328113555908, "learning_rate": 9.96047658483944e-07, "loss": 0.1103, "step": 132040 }, { "epoch": 1.4108659650622362, "grad_norm": 0.022024426609277725, "learning_rate": 9.960455499215308e-07, "loss": 0.0417, "step": 132050 }, { "epoch": 1.4109728083765158, "grad_norm": 11.118948936462402, "learning_rate": 9.960434407990448e-07, "loss": 0.0405, "step": 132060 }, { "epoch": 1.4110796516907955, "grad_norm": 6.832371711730957, "learning_rate": 9.960413311164882e-07, "loss": 0.062, "step": 132070 }, { "epoch": 1.411186495005075, "grad_norm": 2.0437278747558594, "learning_rate": 9.960392208738634e-07, "loss": 0.0192, "step": 132080 }, { "epoch": 1.4112933383193547, "grad_norm": 0.00934937410056591, "learning_rate": 9.96037110071173e-07, "loss": 0.0545, "step": 132090 }, { "epoch": 1.4114001816336343, "grad_norm": 3.6101036071777344, "learning_rate": 9.960349987084194e-07, "loss": 0.0458, "step": 132100 }, { "epoch": 1.4115070249479138, "grad_norm": 5.298196315765381, "learning_rate": 9.960328867856048e-07, "loss": 0.0354, "step": 132110 }, { "epoch": 1.4116138682621935, "grad_norm": 1.7086938619613647, "learning_rate": 9.960307743027316e-07, "loss": 0.0163, "step": 132120 }, { "epoch": 1.4117207115764732, "grad_norm": 7.839761257171631, "learning_rate": 9.960286612598021e-07, "loss": 0.0335, "step": 132130 }, { "epoch": 1.4118275548907526, "grad_norm": 9.266294479370117, "learning_rate": 9.96026547656819e-07, "loss": 0.027, "step": 132140 }, { "epoch": 1.4119343982050323, "grad_norm": 6.566099643707275, "learning_rate": 9.960244334937843e-07, "loss": 0.0217, "step": 132150 }, { "epoch": 1.412041241519312, "grad_norm": 8.515144348144531, "learning_rate": 9.960223187707006e-07, "loss": 0.0377, "step": 132160 }, { "epoch": 1.4121480848335914, "grad_norm": 0.6678091883659363, "learning_rate": 9.960202034875704e-07, "loss": 0.0207, "step": 132170 }, { "epoch": 1.4122549281478711, "grad_norm": 4.090893745422363, "learning_rate": 9.96018087644396e-07, "loss": 0.03, "step": 132180 }, { "epoch": 1.4123617714621508, "grad_norm": 10.968836784362793, "learning_rate": 9.960159712411796e-07, "loss": 0.0415, "step": 132190 }, { "epoch": 1.4124686147764303, "grad_norm": 4.81638765335083, "learning_rate": 9.960138542779235e-07, "loss": 0.0373, "step": 132200 }, { "epoch": 1.41257545809071, "grad_norm": 0.04813232645392418, "learning_rate": 9.960117367546308e-07, "loss": 0.0139, "step": 132210 }, { "epoch": 1.4126823014049896, "grad_norm": 1.0614650249481201, "learning_rate": 9.96009618671303e-07, "loss": 0.021, "step": 132220 }, { "epoch": 1.412789144719269, "grad_norm": 1.7802376747131348, "learning_rate": 9.960075000279432e-07, "loss": 0.0142, "step": 132230 }, { "epoch": 1.4128959880335488, "grad_norm": 10.24050235748291, "learning_rate": 9.960053808245534e-07, "loss": 0.0498, "step": 132240 }, { "epoch": 1.4130028313478284, "grad_norm": 2.6620092391967773, "learning_rate": 9.96003261061136e-07, "loss": 0.0439, "step": 132250 }, { "epoch": 1.413109674662108, "grad_norm": 0.041686296463012695, "learning_rate": 9.960011407376937e-07, "loss": 0.0154, "step": 132260 }, { "epoch": 1.4132165179763876, "grad_norm": 0.07438073307275772, "learning_rate": 9.959990198542285e-07, "loss": 0.0444, "step": 132270 }, { "epoch": 1.4133233612906673, "grad_norm": 0.6156101822853088, "learning_rate": 9.95996898410743e-07, "loss": 0.0316, "step": 132280 }, { "epoch": 1.4134302046049467, "grad_norm": 0.19488529860973358, "learning_rate": 9.959947764072397e-07, "loss": 0.0121, "step": 132290 }, { "epoch": 1.4135370479192264, "grad_norm": 0.49043506383895874, "learning_rate": 9.959926538437206e-07, "loss": 0.0133, "step": 132300 }, { "epoch": 1.413643891233506, "grad_norm": 0.0915609747171402, "learning_rate": 9.959905307201885e-07, "loss": 0.0242, "step": 132310 }, { "epoch": 1.4137507345477855, "grad_norm": 0.6337485313415527, "learning_rate": 9.959884070366457e-07, "loss": 0.0261, "step": 132320 }, { "epoch": 1.4138575778620652, "grad_norm": 1.0637413263320923, "learning_rate": 9.959862827930944e-07, "loss": 0.0485, "step": 132330 }, { "epoch": 1.413964421176345, "grad_norm": 1.3816096782684326, "learning_rate": 9.959841579895374e-07, "loss": 0.052, "step": 132340 }, { "epoch": 1.4140712644906244, "grad_norm": 0.6747522950172424, "learning_rate": 9.959820326259767e-07, "loss": 0.059, "step": 132350 }, { "epoch": 1.414178107804904, "grad_norm": 1.0973730087280273, "learning_rate": 9.959799067024148e-07, "loss": 0.0233, "step": 132360 }, { "epoch": 1.4142849511191837, "grad_norm": 5.480284214019775, "learning_rate": 9.959777802188542e-07, "loss": 0.0544, "step": 132370 }, { "epoch": 1.4143917944334632, "grad_norm": 4.185750484466553, "learning_rate": 9.959756531752974e-07, "loss": 0.0877, "step": 132380 }, { "epoch": 1.4144986377477429, "grad_norm": 6.240504741668701, "learning_rate": 9.959735255717465e-07, "loss": 0.0307, "step": 132390 }, { "epoch": 1.4146054810620226, "grad_norm": 1.7675641775131226, "learning_rate": 9.959713974082042e-07, "loss": 0.0331, "step": 132400 }, { "epoch": 1.4147123243763022, "grad_norm": 0.017260020598769188, "learning_rate": 9.959692686846726e-07, "loss": 0.0362, "step": 132410 }, { "epoch": 1.4148191676905817, "grad_norm": 1.6481711864471436, "learning_rate": 9.959671394011544e-07, "loss": 0.0192, "step": 132420 }, { "epoch": 1.4149260110048614, "grad_norm": 10.774065971374512, "learning_rate": 9.959650095576516e-07, "loss": 0.0847, "step": 132430 }, { "epoch": 1.415032854319141, "grad_norm": 0.2660616636276245, "learning_rate": 9.959628791541672e-07, "loss": 0.0673, "step": 132440 }, { "epoch": 1.4151396976334205, "grad_norm": 0.16880519688129425, "learning_rate": 9.959607481907031e-07, "loss": 0.0458, "step": 132450 }, { "epoch": 1.4152465409477002, "grad_norm": 0.005259882193058729, "learning_rate": 9.959586166672617e-07, "loss": 0.0498, "step": 132460 }, { "epoch": 1.4153533842619799, "grad_norm": 0.25376495718955994, "learning_rate": 9.95956484583846e-07, "loss": 0.0764, "step": 132470 }, { "epoch": 1.4154602275762593, "grad_norm": 10.851680755615234, "learning_rate": 9.959543519404577e-07, "loss": 0.041, "step": 132480 }, { "epoch": 1.415567070890539, "grad_norm": 2.201323986053467, "learning_rate": 9.959522187370997e-07, "loss": 0.0037, "step": 132490 }, { "epoch": 1.4156739142048187, "grad_norm": 4.238783359527588, "learning_rate": 9.959500849737741e-07, "loss": 0.0333, "step": 132500 }, { "epoch": 1.4157807575190984, "grad_norm": 3.019822597503662, "learning_rate": 9.959479506504833e-07, "loss": 0.0297, "step": 132510 }, { "epoch": 1.4158876008333778, "grad_norm": 0.9909505248069763, "learning_rate": 9.9594581576723e-07, "loss": 0.0157, "step": 132520 }, { "epoch": 1.4159944441476575, "grad_norm": 0.23799534142017365, "learning_rate": 9.959436803240165e-07, "loss": 0.0725, "step": 132530 }, { "epoch": 1.4161012874619372, "grad_norm": 0.09386686235666275, "learning_rate": 9.95941544320845e-07, "loss": 0.0267, "step": 132540 }, { "epoch": 1.4162081307762167, "grad_norm": 2.3127553462982178, "learning_rate": 9.959394077577182e-07, "loss": 0.045, "step": 132550 }, { "epoch": 1.4163149740904963, "grad_norm": 1.9253125190734863, "learning_rate": 9.959372706346383e-07, "loss": 0.076, "step": 132560 }, { "epoch": 1.416421817404776, "grad_norm": 2.2777316570281982, "learning_rate": 9.959351329516079e-07, "loss": 0.0225, "step": 132570 }, { "epoch": 1.4165286607190555, "grad_norm": 2.438394069671631, "learning_rate": 9.959329947086291e-07, "loss": 0.0117, "step": 132580 }, { "epoch": 1.4166355040333352, "grad_norm": 2.5931994915008545, "learning_rate": 9.959308559057047e-07, "loss": 0.021, "step": 132590 }, { "epoch": 1.4167423473476148, "grad_norm": 4.246848106384277, "learning_rate": 9.95928716542837e-07, "loss": 0.0214, "step": 132600 }, { "epoch": 1.4168491906618943, "grad_norm": 0.08443024009466171, "learning_rate": 9.95926576620028e-07, "loss": 0.1118, "step": 132610 }, { "epoch": 1.416956033976174, "grad_norm": 0.3341299593448639, "learning_rate": 9.959244361372807e-07, "loss": 0.0433, "step": 132620 }, { "epoch": 1.4170628772904537, "grad_norm": 0.16022861003875732, "learning_rate": 9.959222950945974e-07, "loss": 0.0182, "step": 132630 }, { "epoch": 1.4171697206047331, "grad_norm": 1.2308868169784546, "learning_rate": 9.959201534919803e-07, "loss": 0.0199, "step": 132640 }, { "epoch": 1.4172765639190128, "grad_norm": 7.932565212249756, "learning_rate": 9.959180113294317e-07, "loss": 0.0408, "step": 132650 }, { "epoch": 1.4173834072332925, "grad_norm": 3.8409626483917236, "learning_rate": 9.959158686069544e-07, "loss": 0.0651, "step": 132660 }, { "epoch": 1.417490250547572, "grad_norm": 0.03577756881713867, "learning_rate": 9.959137253245508e-07, "loss": 0.0383, "step": 132670 }, { "epoch": 1.4175970938618516, "grad_norm": 5.313490867614746, "learning_rate": 9.959115814822229e-07, "loss": 0.0982, "step": 132680 }, { "epoch": 1.4177039371761313, "grad_norm": 0.08251102268695831, "learning_rate": 9.959094370799736e-07, "loss": 0.0418, "step": 132690 }, { "epoch": 1.4178107804904108, "grad_norm": 0.5623635053634644, "learning_rate": 9.959072921178052e-07, "loss": 0.0131, "step": 132700 }, { "epoch": 1.4179176238046904, "grad_norm": 2.221514940261841, "learning_rate": 9.959051465957199e-07, "loss": 0.0265, "step": 132710 }, { "epoch": 1.4180244671189701, "grad_norm": 0.02510373480618, "learning_rate": 9.959030005137201e-07, "loss": 0.0087, "step": 132720 }, { "epoch": 1.4181313104332496, "grad_norm": 3.794029951095581, "learning_rate": 9.959008538718088e-07, "loss": 0.041, "step": 132730 }, { "epoch": 1.4182381537475293, "grad_norm": 1.284420371055603, "learning_rate": 9.958987066699876e-07, "loss": 0.111, "step": 132740 }, { "epoch": 1.418344997061809, "grad_norm": 3.0546629428863525, "learning_rate": 9.958965589082595e-07, "loss": 0.0195, "step": 132750 }, { "epoch": 1.4184518403760884, "grad_norm": 0.2190459817647934, "learning_rate": 9.958944105866268e-07, "loss": 0.0177, "step": 132760 }, { "epoch": 1.418558683690368, "grad_norm": 0.27710437774658203, "learning_rate": 9.958922617050919e-07, "loss": 0.0065, "step": 132770 }, { "epoch": 1.4186655270046478, "grad_norm": 0.0017443839460611343, "learning_rate": 9.95890112263657e-07, "loss": 0.003, "step": 132780 }, { "epoch": 1.4187723703189272, "grad_norm": 8.799516677856445, "learning_rate": 9.958879622623249e-07, "loss": 0.0262, "step": 132790 }, { "epoch": 1.418879213633207, "grad_norm": 0.778611958026886, "learning_rate": 9.958858117010978e-07, "loss": 0.0078, "step": 132800 }, { "epoch": 1.4189860569474866, "grad_norm": 2.0718181133270264, "learning_rate": 9.958836605799784e-07, "loss": 0.0495, "step": 132810 }, { "epoch": 1.419092900261766, "grad_norm": 2.7814908027648926, "learning_rate": 9.958815088989687e-07, "loss": 0.0244, "step": 132820 }, { "epoch": 1.4191997435760457, "grad_norm": 1.4295639991760254, "learning_rate": 9.958793566580715e-07, "loss": 0.0542, "step": 132830 }, { "epoch": 1.4193065868903254, "grad_norm": 2.9630017280578613, "learning_rate": 9.958772038572889e-07, "loss": 0.0209, "step": 132840 }, { "epoch": 1.4194134302046049, "grad_norm": 4.62285041809082, "learning_rate": 9.958750504966237e-07, "loss": 0.0585, "step": 132850 }, { "epoch": 1.4195202735188845, "grad_norm": 5.886507034301758, "learning_rate": 9.95872896576078e-07, "loss": 0.0274, "step": 132860 }, { "epoch": 1.4196271168331642, "grad_norm": 6.423236846923828, "learning_rate": 9.958707420956544e-07, "loss": 0.0745, "step": 132870 }, { "epoch": 1.4197339601474437, "grad_norm": 8.745945930480957, "learning_rate": 9.958685870553552e-07, "loss": 0.0359, "step": 132880 }, { "epoch": 1.4198408034617234, "grad_norm": 0.6658670902252197, "learning_rate": 9.958664314551832e-07, "loss": 0.0091, "step": 132890 }, { "epoch": 1.419947646776003, "grad_norm": 8.58198356628418, "learning_rate": 9.958642752951403e-07, "loss": 0.0511, "step": 132900 }, { "epoch": 1.4200544900902825, "grad_norm": 5.586863994598389, "learning_rate": 9.958621185752295e-07, "loss": 0.0476, "step": 132910 }, { "epoch": 1.4201613334045622, "grad_norm": 0.5584510564804077, "learning_rate": 9.958599612954527e-07, "loss": 0.0063, "step": 132920 }, { "epoch": 1.4202681767188419, "grad_norm": 2.9141013622283936, "learning_rate": 9.958578034558128e-07, "loss": 0.0287, "step": 132930 }, { "epoch": 1.4203750200331213, "grad_norm": 7.462821960449219, "learning_rate": 9.958556450563118e-07, "loss": 0.0137, "step": 132940 }, { "epoch": 1.420481863347401, "grad_norm": 1.8909538984298706, "learning_rate": 9.958534860969524e-07, "loss": 0.0356, "step": 132950 }, { "epoch": 1.4205887066616807, "grad_norm": 2.53086256980896, "learning_rate": 9.958513265777371e-07, "loss": 0.0215, "step": 132960 }, { "epoch": 1.4206955499759601, "grad_norm": 14.660735130310059, "learning_rate": 9.95849166498668e-07, "loss": 0.093, "step": 132970 }, { "epoch": 1.4208023932902398, "grad_norm": 2.642310857772827, "learning_rate": 9.95847005859748e-07, "loss": 0.0335, "step": 132980 }, { "epoch": 1.4209092366045195, "grad_norm": 0.8026030659675598, "learning_rate": 9.958448446609793e-07, "loss": 0.0128, "step": 132990 }, { "epoch": 1.421016079918799, "grad_norm": 3.597973108291626, "learning_rate": 9.958426829023642e-07, "loss": 0.0249, "step": 133000 }, { "epoch": 1.4211229232330786, "grad_norm": 1.6389846801757812, "learning_rate": 9.958405205839054e-07, "loss": 0.0557, "step": 133010 }, { "epoch": 1.4212297665473583, "grad_norm": 3.8812930583953857, "learning_rate": 9.95838357705605e-07, "loss": 0.0219, "step": 133020 }, { "epoch": 1.4213366098616378, "grad_norm": 1.5093696117401123, "learning_rate": 9.95836194267466e-07, "loss": 0.0123, "step": 133030 }, { "epoch": 1.4214434531759175, "grad_norm": 0.6996262073516846, "learning_rate": 9.958340302694906e-07, "loss": 0.0032, "step": 133040 }, { "epoch": 1.4215502964901972, "grad_norm": 1.6275851726531982, "learning_rate": 9.958318657116808e-07, "loss": 0.0106, "step": 133050 }, { "epoch": 1.4216571398044766, "grad_norm": 7.054134368896484, "learning_rate": 9.958297005940395e-07, "loss": 0.1662, "step": 133060 }, { "epoch": 1.4217639831187563, "grad_norm": 0.750069797039032, "learning_rate": 9.95827534916569e-07, "loss": 0.0921, "step": 133070 }, { "epoch": 1.421870826433036, "grad_norm": 1.4805943965911865, "learning_rate": 9.958253686792721e-07, "loss": 0.0143, "step": 133080 }, { "epoch": 1.4219776697473154, "grad_norm": 1.8920888900756836, "learning_rate": 9.958232018821506e-07, "loss": 0.02, "step": 133090 }, { "epoch": 1.4220845130615951, "grad_norm": 0.19571520388126373, "learning_rate": 9.958210345252074e-07, "loss": 0.0492, "step": 133100 }, { "epoch": 1.4221913563758748, "grad_norm": 0.11899410933256149, "learning_rate": 9.958188666084448e-07, "loss": 0.025, "step": 133110 }, { "epoch": 1.4222981996901543, "grad_norm": 0.6388601064682007, "learning_rate": 9.958166981318653e-07, "loss": 0.1117, "step": 133120 }, { "epoch": 1.422405043004434, "grad_norm": 12.939014434814453, "learning_rate": 9.958145290954713e-07, "loss": 0.0137, "step": 133130 }, { "epoch": 1.4225118863187136, "grad_norm": 0.6989485621452332, "learning_rate": 9.958123594992655e-07, "loss": 0.038, "step": 133140 }, { "epoch": 1.4226187296329933, "grad_norm": 0.2195887714624405, "learning_rate": 9.958101893432498e-07, "loss": 0.0109, "step": 133150 }, { "epoch": 1.4227255729472728, "grad_norm": 4.273401260375977, "learning_rate": 9.95808018627427e-07, "loss": 0.0278, "step": 133160 }, { "epoch": 1.4228324162615524, "grad_norm": 4.034857749938965, "learning_rate": 9.958058473517999e-07, "loss": 0.0528, "step": 133170 }, { "epoch": 1.4229392595758321, "grad_norm": 0.9227350950241089, "learning_rate": 9.958036755163703e-07, "loss": 0.0237, "step": 133180 }, { "epoch": 1.4230461028901116, "grad_norm": 3.364687204360962, "learning_rate": 9.958015031211409e-07, "loss": 0.0189, "step": 133190 }, { "epoch": 1.4231529462043913, "grad_norm": 6.915191650390625, "learning_rate": 9.957993301661143e-07, "loss": 0.0511, "step": 133200 }, { "epoch": 1.423259789518671, "grad_norm": 4.164379119873047, "learning_rate": 9.957971566512926e-07, "loss": 0.0367, "step": 133210 }, { "epoch": 1.4233666328329504, "grad_norm": 6.612908840179443, "learning_rate": 9.957949825766787e-07, "loss": 0.1098, "step": 133220 }, { "epoch": 1.42347347614723, "grad_norm": 5.241380214691162, "learning_rate": 9.957928079422747e-07, "loss": 0.0319, "step": 133230 }, { "epoch": 1.4235803194615098, "grad_norm": 0.02252855896949768, "learning_rate": 9.957906327480834e-07, "loss": 0.0181, "step": 133240 }, { "epoch": 1.4236871627757894, "grad_norm": 0.08165587484836578, "learning_rate": 9.95788456994107e-07, "loss": 0.0379, "step": 133250 }, { "epoch": 1.423794006090069, "grad_norm": 1.4624576568603516, "learning_rate": 9.957862806803481e-07, "loss": 0.0337, "step": 133260 }, { "epoch": 1.4239008494043486, "grad_norm": 0.2343718558549881, "learning_rate": 9.95784103806809e-07, "loss": 0.038, "step": 133270 }, { "epoch": 1.4240076927186283, "grad_norm": 0.5329825282096863, "learning_rate": 9.95781926373492e-07, "loss": 0.0282, "step": 133280 }, { "epoch": 1.4241145360329077, "grad_norm": 3.8638534545898438, "learning_rate": 9.957797483804e-07, "loss": 0.1249, "step": 133290 }, { "epoch": 1.4242213793471874, "grad_norm": 2.5910415649414062, "learning_rate": 9.957775698275352e-07, "loss": 0.0944, "step": 133300 }, { "epoch": 1.424328222661467, "grad_norm": 1.218125820159912, "learning_rate": 9.957753907149002e-07, "loss": 0.0182, "step": 133310 }, { "epoch": 1.4244350659757465, "grad_norm": 1.8656293153762817, "learning_rate": 9.957732110424973e-07, "loss": 0.0252, "step": 133320 }, { "epoch": 1.4245419092900262, "grad_norm": 12.59891128540039, "learning_rate": 9.95771030810329e-07, "loss": 0.1223, "step": 133330 }, { "epoch": 1.424648752604306, "grad_norm": 0.03242053464055061, "learning_rate": 9.957688500183977e-07, "loss": 0.0096, "step": 133340 }, { "epoch": 1.4247555959185854, "grad_norm": 3.8433821201324463, "learning_rate": 9.95766668666706e-07, "loss": 0.0301, "step": 133350 }, { "epoch": 1.424862439232865, "grad_norm": 4.393620491027832, "learning_rate": 9.957644867552563e-07, "loss": 0.0323, "step": 133360 }, { "epoch": 1.4249692825471447, "grad_norm": 2.5225749015808105, "learning_rate": 9.957623042840513e-07, "loss": 0.0517, "step": 133370 }, { "epoch": 1.4250761258614242, "grad_norm": 8.632314682006836, "learning_rate": 9.957601212530932e-07, "loss": 0.013, "step": 133380 }, { "epoch": 1.4251829691757039, "grad_norm": 2.541660785675049, "learning_rate": 9.95757937662384e-07, "loss": 0.0194, "step": 133390 }, { "epoch": 1.4252898124899835, "grad_norm": 0.04640822485089302, "learning_rate": 9.957557535119272e-07, "loss": 0.045, "step": 133400 }, { "epoch": 1.425396655804263, "grad_norm": 0.04562810808420181, "learning_rate": 9.957535688017247e-07, "loss": 0.0396, "step": 133410 }, { "epoch": 1.4255034991185427, "grad_norm": 4.710204601287842, "learning_rate": 9.957513835317787e-07, "loss": 0.0258, "step": 133420 }, { "epoch": 1.4256103424328224, "grad_norm": 4.319093227386475, "learning_rate": 9.957491977020924e-07, "loss": 0.0504, "step": 133430 }, { "epoch": 1.4257171857471018, "grad_norm": 4.45821475982666, "learning_rate": 9.957470113126676e-07, "loss": 0.0478, "step": 133440 }, { "epoch": 1.4258240290613815, "grad_norm": 0.2813147306442261, "learning_rate": 9.95744824363507e-07, "loss": 0.0318, "step": 133450 }, { "epoch": 1.4259308723756612, "grad_norm": 0.34002649784088135, "learning_rate": 9.957426368546132e-07, "loss": 0.0085, "step": 133460 }, { "epoch": 1.4260377156899406, "grad_norm": 2.6069109439849854, "learning_rate": 9.957404487859884e-07, "loss": 0.0186, "step": 133470 }, { "epoch": 1.4261445590042203, "grad_norm": 2.620255708694458, "learning_rate": 9.95738260157635e-07, "loss": 0.0272, "step": 133480 }, { "epoch": 1.4262514023185, "grad_norm": 5.336690902709961, "learning_rate": 9.95736070969556e-07, "loss": 0.0622, "step": 133490 }, { "epoch": 1.4263582456327795, "grad_norm": 0.05426335707306862, "learning_rate": 9.957338812217537e-07, "loss": 0.0081, "step": 133500 }, { "epoch": 1.4264650889470591, "grad_norm": 3.05580735206604, "learning_rate": 9.957316909142302e-07, "loss": 0.0501, "step": 133510 }, { "epoch": 1.4265719322613388, "grad_norm": 1.6003766059875488, "learning_rate": 9.957295000469882e-07, "loss": 0.0428, "step": 133520 }, { "epoch": 1.4266787755756183, "grad_norm": 5.365286350250244, "learning_rate": 9.957273086200302e-07, "loss": 0.0112, "step": 133530 }, { "epoch": 1.426785618889898, "grad_norm": 5.212587356567383, "learning_rate": 9.957251166333586e-07, "loss": 0.0303, "step": 133540 }, { "epoch": 1.4268924622041776, "grad_norm": 2.56032395362854, "learning_rate": 9.957229240869763e-07, "loss": 0.0196, "step": 133550 }, { "epoch": 1.426999305518457, "grad_norm": 8.854422569274902, "learning_rate": 9.95720730980885e-07, "loss": 0.0506, "step": 133560 }, { "epoch": 1.4271061488327368, "grad_norm": 1.9860892295837402, "learning_rate": 9.957185373150876e-07, "loss": 0.043, "step": 133570 }, { "epoch": 1.4272129921470165, "grad_norm": 4.785214424133301, "learning_rate": 9.957163430895868e-07, "loss": 0.0366, "step": 133580 }, { "epoch": 1.427319835461296, "grad_norm": 1.762760877609253, "learning_rate": 9.957141483043848e-07, "loss": 0.0348, "step": 133590 }, { "epoch": 1.4274266787755756, "grad_norm": 0.680451512336731, "learning_rate": 9.957119529594838e-07, "loss": 0.0139, "step": 133600 }, { "epoch": 1.4275335220898553, "grad_norm": 3.6543426513671875, "learning_rate": 9.957097570548869e-07, "loss": 0.0284, "step": 133610 }, { "epoch": 1.4276403654041347, "grad_norm": 3.4758739471435547, "learning_rate": 9.957075605905962e-07, "loss": 0.0856, "step": 133620 }, { "epoch": 1.4277472087184144, "grad_norm": 0.46587276458740234, "learning_rate": 9.957053635666141e-07, "loss": 0.0693, "step": 133630 }, { "epoch": 1.427854052032694, "grad_norm": 2.731212854385376, "learning_rate": 9.957031659829434e-07, "loss": 0.0383, "step": 133640 }, { "epoch": 1.4279608953469736, "grad_norm": 3.2589824199676514, "learning_rate": 9.957009678395865e-07, "loss": 0.0938, "step": 133650 }, { "epoch": 1.4280677386612532, "grad_norm": 0.004542484413832426, "learning_rate": 9.956987691365455e-07, "loss": 0.0616, "step": 133660 }, { "epoch": 1.428174581975533, "grad_norm": 2.5064029693603516, "learning_rate": 9.956965698738233e-07, "loss": 0.0273, "step": 133670 }, { "epoch": 1.4282814252898124, "grad_norm": 7.7155656814575195, "learning_rate": 9.956943700514225e-07, "loss": 0.0186, "step": 133680 }, { "epoch": 1.428388268604092, "grad_norm": 0.020046887919306755, "learning_rate": 9.95692169669345e-07, "loss": 0.0082, "step": 133690 }, { "epoch": 1.4284951119183718, "grad_norm": 11.585428237915039, "learning_rate": 9.956899687275938e-07, "loss": 0.0962, "step": 133700 }, { "epoch": 1.4286019552326512, "grad_norm": 12.809785842895508, "learning_rate": 9.956877672261712e-07, "loss": 0.0286, "step": 133710 }, { "epoch": 1.428708798546931, "grad_norm": 1.5874079465866089, "learning_rate": 9.956855651650798e-07, "loss": 0.0172, "step": 133720 }, { "epoch": 1.4288156418612106, "grad_norm": 0.2933005094528198, "learning_rate": 9.956833625443216e-07, "loss": 0.0402, "step": 133730 }, { "epoch": 1.42892248517549, "grad_norm": 6.88358211517334, "learning_rate": 9.956811593638998e-07, "loss": 0.0199, "step": 133740 }, { "epoch": 1.4290293284897697, "grad_norm": 0.8945342302322388, "learning_rate": 9.956789556238165e-07, "loss": 0.0459, "step": 133750 }, { "epoch": 1.4291361718040494, "grad_norm": 1.8007872104644775, "learning_rate": 9.956767513240743e-07, "loss": 0.0106, "step": 133760 }, { "epoch": 1.4292430151183289, "grad_norm": 0.04706009104847908, "learning_rate": 9.956745464646755e-07, "loss": 0.028, "step": 133770 }, { "epoch": 1.4293498584326085, "grad_norm": 1.450655221939087, "learning_rate": 9.956723410456229e-07, "loss": 0.0123, "step": 133780 }, { "epoch": 1.4294567017468882, "grad_norm": 2.4798974990844727, "learning_rate": 9.956701350669188e-07, "loss": 0.0232, "step": 133790 }, { "epoch": 1.4295635450611677, "grad_norm": 7.294495582580566, "learning_rate": 9.956679285285657e-07, "loss": 0.0147, "step": 133800 }, { "epoch": 1.4296703883754474, "grad_norm": 0.053653452545404434, "learning_rate": 9.95665721430566e-07, "loss": 0.0521, "step": 133810 }, { "epoch": 1.429777231689727, "grad_norm": 0.8757590651512146, "learning_rate": 9.956635137729224e-07, "loss": 0.0392, "step": 133820 }, { "epoch": 1.4298840750040065, "grad_norm": 4.459872245788574, "learning_rate": 9.95661305555637e-07, "loss": 0.0171, "step": 133830 }, { "epoch": 1.4299909183182862, "grad_norm": 0.3695572316646576, "learning_rate": 9.956590967787129e-07, "loss": 0.0145, "step": 133840 }, { "epoch": 1.4300977616325659, "grad_norm": 6.622165203094482, "learning_rate": 9.956568874421523e-07, "loss": 0.0441, "step": 133850 }, { "epoch": 1.4302046049468453, "grad_norm": 8.855961799621582, "learning_rate": 9.956546775459575e-07, "loss": 0.0502, "step": 133860 }, { "epoch": 1.430311448261125, "grad_norm": 16.29338264465332, "learning_rate": 9.956524670901312e-07, "loss": 0.0326, "step": 133870 }, { "epoch": 1.4304182915754047, "grad_norm": 4.72479772567749, "learning_rate": 9.956502560746757e-07, "loss": 0.0385, "step": 133880 }, { "epoch": 1.4305251348896844, "grad_norm": 0.08432114869356155, "learning_rate": 9.956480444995938e-07, "loss": 0.034, "step": 133890 }, { "epoch": 1.4306319782039638, "grad_norm": 0.7945235967636108, "learning_rate": 9.956458323648879e-07, "loss": 0.06, "step": 133900 }, { "epoch": 1.4307388215182435, "grad_norm": 2.106112003326416, "learning_rate": 9.956436196705604e-07, "loss": 0.0121, "step": 133910 }, { "epoch": 1.4308456648325232, "grad_norm": 5.829713344573975, "learning_rate": 9.95641406416614e-07, "loss": 0.045, "step": 133920 }, { "epoch": 1.4309525081468026, "grad_norm": 1.8391855955123901, "learning_rate": 9.956391926030507e-07, "loss": 0.019, "step": 133930 }, { "epoch": 1.4310593514610823, "grad_norm": 0.16232478618621826, "learning_rate": 9.956369782298737e-07, "loss": 0.0101, "step": 133940 }, { "epoch": 1.431166194775362, "grad_norm": 0.35715678334236145, "learning_rate": 9.95634763297085e-07, "loss": 0.0113, "step": 133950 }, { "epoch": 1.4312730380896415, "grad_norm": 13.609527587890625, "learning_rate": 9.95632547804687e-07, "loss": 0.0417, "step": 133960 }, { "epoch": 1.4313798814039211, "grad_norm": 1.5426721572875977, "learning_rate": 9.956303317526829e-07, "loss": 0.0314, "step": 133970 }, { "epoch": 1.4314867247182008, "grad_norm": 0.5008078813552856, "learning_rate": 9.956281151410745e-07, "loss": 0.0536, "step": 133980 }, { "epoch": 1.4315935680324805, "grad_norm": 0.020761847496032715, "learning_rate": 9.956258979698644e-07, "loss": 0.0534, "step": 133990 }, { "epoch": 1.43170041134676, "grad_norm": 4.0093488693237305, "learning_rate": 9.956236802390553e-07, "loss": 0.0199, "step": 134000 }, { "epoch": 1.4318072546610396, "grad_norm": 3.9012928009033203, "learning_rate": 9.956214619486499e-07, "loss": 0.0594, "step": 134010 }, { "epoch": 1.4319140979753193, "grad_norm": 1.1752572059631348, "learning_rate": 9.956192430986504e-07, "loss": 0.0052, "step": 134020 }, { "epoch": 1.4320209412895988, "grad_norm": 3.698906898498535, "learning_rate": 9.956170236890593e-07, "loss": 0.0097, "step": 134030 }, { "epoch": 1.4321277846038785, "grad_norm": 0.07716300338506699, "learning_rate": 9.956148037198792e-07, "loss": 0.0125, "step": 134040 }, { "epoch": 1.4322346279181581, "grad_norm": 6.690627574920654, "learning_rate": 9.956125831911127e-07, "loss": 0.0397, "step": 134050 }, { "epoch": 1.4323414712324376, "grad_norm": 7.9451398849487305, "learning_rate": 9.956103621027618e-07, "loss": 0.049, "step": 134060 }, { "epoch": 1.4324483145467173, "grad_norm": 0.016781102865934372, "learning_rate": 9.956081404548297e-07, "loss": 0.0264, "step": 134070 }, { "epoch": 1.432555157860997, "grad_norm": 0.5440131425857544, "learning_rate": 9.956059182473187e-07, "loss": 0.0269, "step": 134080 }, { "epoch": 1.4326620011752764, "grad_norm": 3.592968463897705, "learning_rate": 9.956036954802308e-07, "loss": 0.0108, "step": 134090 }, { "epoch": 1.432768844489556, "grad_norm": 1.2681905031204224, "learning_rate": 9.956014721535693e-07, "loss": 0.0306, "step": 134100 }, { "epoch": 1.4328756878038358, "grad_norm": 0.12286882847547531, "learning_rate": 9.955992482673364e-07, "loss": 0.0225, "step": 134110 }, { "epoch": 1.4329825311181152, "grad_norm": 0.1252407282590866, "learning_rate": 9.955970238215342e-07, "loss": 0.0315, "step": 134120 }, { "epoch": 1.433089374432395, "grad_norm": 0.03254552185535431, "learning_rate": 9.955947988161656e-07, "loss": 0.075, "step": 134130 }, { "epoch": 1.4331962177466746, "grad_norm": 1.0704925060272217, "learning_rate": 9.955925732512333e-07, "loss": 0.0078, "step": 134140 }, { "epoch": 1.433303061060954, "grad_norm": 6.541513442993164, "learning_rate": 9.955903471267395e-07, "loss": 0.0317, "step": 134150 }, { "epoch": 1.4334099043752337, "grad_norm": 2.2430734634399414, "learning_rate": 9.955881204426869e-07, "loss": 0.033, "step": 134160 }, { "epoch": 1.4335167476895134, "grad_norm": 3.8976187705993652, "learning_rate": 9.955858931990777e-07, "loss": 0.0505, "step": 134170 }, { "epoch": 1.4336235910037929, "grad_norm": 7.064516067504883, "learning_rate": 9.955836653959146e-07, "loss": 0.0249, "step": 134180 }, { "epoch": 1.4337304343180726, "grad_norm": 0.11954296380281448, "learning_rate": 9.955814370332002e-07, "loss": 0.0059, "step": 134190 }, { "epoch": 1.4338372776323522, "grad_norm": 10.667939186096191, "learning_rate": 9.955792081109372e-07, "loss": 0.0147, "step": 134200 }, { "epoch": 1.4339441209466317, "grad_norm": 0.006991816218942404, "learning_rate": 9.955769786291275e-07, "loss": 0.0206, "step": 134210 }, { "epoch": 1.4340509642609114, "grad_norm": 16.18483543395996, "learning_rate": 9.955747485877743e-07, "loss": 0.074, "step": 134220 }, { "epoch": 1.434157807575191, "grad_norm": 5.402403831481934, "learning_rate": 9.955725179868796e-07, "loss": 0.0082, "step": 134230 }, { "epoch": 1.4342646508894705, "grad_norm": 4.912806987762451, "learning_rate": 9.95570286826446e-07, "loss": 0.0203, "step": 134240 }, { "epoch": 1.4343714942037502, "grad_norm": 0.772972822189331, "learning_rate": 9.955680551064764e-07, "loss": 0.0258, "step": 134250 }, { "epoch": 1.43447833751803, "grad_norm": 0.007637600414454937, "learning_rate": 9.95565822826973e-07, "loss": 0.0275, "step": 134260 }, { "epoch": 1.4345851808323093, "grad_norm": 4.126661777496338, "learning_rate": 9.955635899879384e-07, "loss": 0.0478, "step": 134270 }, { "epoch": 1.434692024146589, "grad_norm": 4.421396255493164, "learning_rate": 9.95561356589375e-07, "loss": 0.0197, "step": 134280 }, { "epoch": 1.4347988674608687, "grad_norm": 0.22223518788814545, "learning_rate": 9.955591226312856e-07, "loss": 0.0094, "step": 134290 }, { "epoch": 1.4349057107751482, "grad_norm": 0.5029842257499695, "learning_rate": 9.955568881136724e-07, "loss": 0.0199, "step": 134300 }, { "epoch": 1.4350125540894278, "grad_norm": 9.677136421203613, "learning_rate": 9.955546530365382e-07, "loss": 0.0301, "step": 134310 }, { "epoch": 1.4351193974037075, "grad_norm": 1.5777645111083984, "learning_rate": 9.955524173998852e-07, "loss": 0.0268, "step": 134320 }, { "epoch": 1.435226240717987, "grad_norm": 0.07091929018497467, "learning_rate": 9.955501812037164e-07, "loss": 0.0168, "step": 134330 }, { "epoch": 1.4353330840322667, "grad_norm": 0.6030212044715881, "learning_rate": 9.955479444480337e-07, "loss": 0.0156, "step": 134340 }, { "epoch": 1.4354399273465464, "grad_norm": 7.839470386505127, "learning_rate": 9.955457071328403e-07, "loss": 0.065, "step": 134350 }, { "epoch": 1.4355467706608258, "grad_norm": 2.8658394813537598, "learning_rate": 9.955434692581382e-07, "loss": 0.0304, "step": 134360 }, { "epoch": 1.4356536139751055, "grad_norm": 1.8719161748886108, "learning_rate": 9.955412308239302e-07, "loss": 0.066, "step": 134370 }, { "epoch": 1.4357604572893852, "grad_norm": 5.079665660858154, "learning_rate": 9.955389918302186e-07, "loss": 0.0556, "step": 134380 }, { "epoch": 1.4358673006036646, "grad_norm": 0.07464602589607239, "learning_rate": 9.955367522770064e-07, "loss": 0.0358, "step": 134390 }, { "epoch": 1.4359741439179443, "grad_norm": 0.028106294572353363, "learning_rate": 9.955345121642955e-07, "loss": 0.034, "step": 134400 }, { "epoch": 1.436080987232224, "grad_norm": 1.827952265739441, "learning_rate": 9.955322714920888e-07, "loss": 0.0811, "step": 134410 }, { "epoch": 1.4361878305465035, "grad_norm": 0.8037931323051453, "learning_rate": 9.955300302603889e-07, "loss": 0.0269, "step": 134420 }, { "epoch": 1.4362946738607831, "grad_norm": 9.280741691589355, "learning_rate": 9.95527788469198e-07, "loss": 0.0567, "step": 134430 }, { "epoch": 1.4364015171750628, "grad_norm": 4.604084014892578, "learning_rate": 9.95525546118519e-07, "loss": 0.032, "step": 134440 }, { "epoch": 1.4365083604893423, "grad_norm": 5.171108722686768, "learning_rate": 9.955233032083542e-07, "loss": 0.035, "step": 134450 }, { "epoch": 1.436615203803622, "grad_norm": 3.6597769260406494, "learning_rate": 9.955210597387062e-07, "loss": 0.0465, "step": 134460 }, { "epoch": 1.4367220471179016, "grad_norm": 1.8663514852523804, "learning_rate": 9.955188157095775e-07, "loss": 0.0408, "step": 134470 }, { "epoch": 1.436828890432181, "grad_norm": 0.04761885479092598, "learning_rate": 9.955165711209708e-07, "loss": 0.0123, "step": 134480 }, { "epoch": 1.4369357337464608, "grad_norm": 0.18692317605018616, "learning_rate": 9.955143259728883e-07, "loss": 0.0317, "step": 134490 }, { "epoch": 1.4370425770607405, "grad_norm": 3.2276878356933594, "learning_rate": 9.955120802653327e-07, "loss": 0.0501, "step": 134500 }, { "epoch": 1.43714942037502, "grad_norm": 0.01806926168501377, "learning_rate": 9.955098339983067e-07, "loss": 0.0097, "step": 134510 }, { "epoch": 1.4372562636892996, "grad_norm": 0.1731438934803009, "learning_rate": 9.955075871718125e-07, "loss": 0.0178, "step": 134520 }, { "epoch": 1.4373631070035793, "grad_norm": 0.006870889104902744, "learning_rate": 9.95505339785853e-07, "loss": 0.0532, "step": 134530 }, { "epoch": 1.4374699503178587, "grad_norm": 7.107741832733154, "learning_rate": 9.955030918404307e-07, "loss": 0.0467, "step": 134540 }, { "epoch": 1.4375767936321384, "grad_norm": 0.011393929831683636, "learning_rate": 9.95500843335548e-07, "loss": 0.0539, "step": 134550 }, { "epoch": 1.437683636946418, "grad_norm": 5.436820983886719, "learning_rate": 9.954985942712073e-07, "loss": 0.0407, "step": 134560 }, { "epoch": 1.4377904802606976, "grad_norm": 0.7392373085021973, "learning_rate": 9.954963446474111e-07, "loss": 0.0382, "step": 134570 }, { "epoch": 1.4378973235749772, "grad_norm": 0.015027089975774288, "learning_rate": 9.954940944641624e-07, "loss": 0.0256, "step": 134580 }, { "epoch": 1.438004166889257, "grad_norm": 3.8081018924713135, "learning_rate": 9.954918437214635e-07, "loss": 0.0079, "step": 134590 }, { "epoch": 1.4381110102035364, "grad_norm": 6.30222749710083, "learning_rate": 9.954895924193167e-07, "loss": 0.038, "step": 134600 }, { "epoch": 1.438217853517816, "grad_norm": 0.06683363020420074, "learning_rate": 9.954873405577249e-07, "loss": 0.0422, "step": 134610 }, { "epoch": 1.4383246968320957, "grad_norm": 0.03795298933982849, "learning_rate": 9.954850881366905e-07, "loss": 0.0108, "step": 134620 }, { "epoch": 1.4384315401463754, "grad_norm": 0.08402727544307709, "learning_rate": 9.95482835156216e-07, "loss": 0.0067, "step": 134630 }, { "epoch": 1.4385383834606549, "grad_norm": 0.8375424742698669, "learning_rate": 9.954805816163041e-07, "loss": 0.0098, "step": 134640 }, { "epoch": 1.4386452267749346, "grad_norm": 4.836756229400635, "learning_rate": 9.954783275169572e-07, "loss": 0.0286, "step": 134650 }, { "epoch": 1.4387520700892142, "grad_norm": 1.2846956253051758, "learning_rate": 9.954760728581776e-07, "loss": 0.0904, "step": 134660 }, { "epoch": 1.4388589134034937, "grad_norm": 3.910475254058838, "learning_rate": 9.954738176399684e-07, "loss": 0.0326, "step": 134670 }, { "epoch": 1.4389657567177734, "grad_norm": 0.5756892561912537, "learning_rate": 9.954715618623317e-07, "loss": 0.0149, "step": 134680 }, { "epoch": 1.439072600032053, "grad_norm": 17.116392135620117, "learning_rate": 9.954693055252705e-07, "loss": 0.0221, "step": 134690 }, { "epoch": 1.4391794433463325, "grad_norm": 0.025470837950706482, "learning_rate": 9.954670486287868e-07, "loss": 0.0154, "step": 134700 }, { "epoch": 1.4392862866606122, "grad_norm": 0.09382741153240204, "learning_rate": 9.954647911728836e-07, "loss": 0.0251, "step": 134710 }, { "epoch": 1.4393931299748919, "grad_norm": 6.3002400398254395, "learning_rate": 9.95462533157563e-07, "loss": 0.025, "step": 134720 }, { "epoch": 1.4394999732891716, "grad_norm": 3.069315195083618, "learning_rate": 9.954602745828278e-07, "loss": 0.0434, "step": 134730 }, { "epoch": 1.439606816603451, "grad_norm": 2.1277785301208496, "learning_rate": 9.954580154486807e-07, "loss": 0.0362, "step": 134740 }, { "epoch": 1.4397136599177307, "grad_norm": 0.5068756937980652, "learning_rate": 9.95455755755124e-07, "loss": 0.0238, "step": 134750 }, { "epoch": 1.4398205032320104, "grad_norm": 2.7596473693847656, "learning_rate": 9.954534955021605e-07, "loss": 0.0228, "step": 134760 }, { "epoch": 1.4399273465462898, "grad_norm": 4.454091548919678, "learning_rate": 9.954512346897927e-07, "loss": 0.0285, "step": 134770 }, { "epoch": 1.4400341898605695, "grad_norm": 1.9348324537277222, "learning_rate": 9.954489733180229e-07, "loss": 0.0769, "step": 134780 }, { "epoch": 1.4401410331748492, "grad_norm": 0.3252629041671753, "learning_rate": 9.954467113868539e-07, "loss": 0.0175, "step": 134790 }, { "epoch": 1.4402478764891287, "grad_norm": 0.019659757614135742, "learning_rate": 9.95444448896288e-07, "loss": 0.0172, "step": 134800 }, { "epoch": 1.4403547198034083, "grad_norm": 0.1235114112496376, "learning_rate": 9.95442185846328e-07, "loss": 0.0262, "step": 134810 }, { "epoch": 1.440461563117688, "grad_norm": 3.143712043762207, "learning_rate": 9.954399222369765e-07, "loss": 0.0413, "step": 134820 }, { "epoch": 1.4405684064319675, "grad_norm": 0.5807490348815918, "learning_rate": 9.954376580682358e-07, "loss": 0.0313, "step": 134830 }, { "epoch": 1.4406752497462472, "grad_norm": 0.010419328697025776, "learning_rate": 9.954353933401087e-07, "loss": 0.0358, "step": 134840 }, { "epoch": 1.4407820930605268, "grad_norm": 5.0670485496521, "learning_rate": 9.954331280525977e-07, "loss": 0.0327, "step": 134850 }, { "epoch": 1.4408889363748063, "grad_norm": 0.10966790467500687, "learning_rate": 9.95430862205705e-07, "loss": 0.0325, "step": 134860 }, { "epoch": 1.440995779689086, "grad_norm": 5.347929954528809, "learning_rate": 9.954285957994338e-07, "loss": 0.0112, "step": 134870 }, { "epoch": 1.4411026230033657, "grad_norm": 0.11865042895078659, "learning_rate": 9.95426328833786e-07, "loss": 0.0212, "step": 134880 }, { "epoch": 1.4412094663176451, "grad_norm": 0.40354788303375244, "learning_rate": 9.954240613087649e-07, "loss": 0.0699, "step": 134890 }, { "epoch": 1.4413163096319248, "grad_norm": 6.109158515930176, "learning_rate": 9.954217932243722e-07, "loss": 0.0186, "step": 134900 }, { "epoch": 1.4414231529462045, "grad_norm": 12.001653671264648, "learning_rate": 9.954195245806111e-07, "loss": 0.0662, "step": 134910 }, { "epoch": 1.441529996260484, "grad_norm": 5.642232418060303, "learning_rate": 9.95417255377484e-07, "loss": 0.02, "step": 134920 }, { "epoch": 1.4416368395747636, "grad_norm": 0.5348361134529114, "learning_rate": 9.954149856149934e-07, "loss": 0.0155, "step": 134930 }, { "epoch": 1.4417436828890433, "grad_norm": 9.49288558959961, "learning_rate": 9.95412715293142e-07, "loss": 0.026, "step": 134940 }, { "epoch": 1.4418505262033228, "grad_norm": 0.798961877822876, "learning_rate": 9.95410444411932e-07, "loss": 0.0175, "step": 134950 }, { "epoch": 1.4419573695176024, "grad_norm": 6.777170181274414, "learning_rate": 9.954081729713664e-07, "loss": 0.0419, "step": 134960 }, { "epoch": 1.4420642128318821, "grad_norm": 5.793520450592041, "learning_rate": 9.954059009714475e-07, "loss": 0.0681, "step": 134970 }, { "epoch": 1.4421710561461616, "grad_norm": 0.023217936977744102, "learning_rate": 9.954036284121782e-07, "loss": 0.0137, "step": 134980 }, { "epoch": 1.4422778994604413, "grad_norm": 9.889511108398438, "learning_rate": 9.954013552935604e-07, "loss": 0.0494, "step": 134990 }, { "epoch": 1.442384742774721, "grad_norm": 0.04855189844965935, "learning_rate": 9.953990816155975e-07, "loss": 0.012, "step": 135000 }, { "epoch": 1.4424915860890004, "grad_norm": 5.155638217926025, "learning_rate": 9.953968073782913e-07, "loss": 0.0113, "step": 135010 }, { "epoch": 1.44259842940328, "grad_norm": 2.286914587020874, "learning_rate": 9.95394532581645e-07, "loss": 0.0163, "step": 135020 }, { "epoch": 1.4427052727175598, "grad_norm": 0.2804502546787262, "learning_rate": 9.953922572256606e-07, "loss": 0.0538, "step": 135030 }, { "epoch": 1.4428121160318392, "grad_norm": 2.9406635761260986, "learning_rate": 9.95389981310341e-07, "loss": 0.0281, "step": 135040 }, { "epoch": 1.442918959346119, "grad_norm": 1.0306129455566406, "learning_rate": 9.95387704835689e-07, "loss": 0.0427, "step": 135050 }, { "epoch": 1.4430258026603986, "grad_norm": 0.01916697435081005, "learning_rate": 9.953854278017067e-07, "loss": 0.0317, "step": 135060 }, { "epoch": 1.443132645974678, "grad_norm": 1.5173949003219604, "learning_rate": 9.953831502083967e-07, "loss": 0.0092, "step": 135070 }, { "epoch": 1.4432394892889577, "grad_norm": 0.23623743653297424, "learning_rate": 9.95380872055762e-07, "loss": 0.0228, "step": 135080 }, { "epoch": 1.4433463326032374, "grad_norm": 0.052055660635232925, "learning_rate": 9.953785933438047e-07, "loss": 0.0163, "step": 135090 }, { "epoch": 1.4434531759175169, "grad_norm": 1.6634138822555542, "learning_rate": 9.953763140725277e-07, "loss": 0.0395, "step": 135100 }, { "epoch": 1.4435600192317966, "grad_norm": 1.5012621879577637, "learning_rate": 9.953740342419334e-07, "loss": 0.0272, "step": 135110 }, { "epoch": 1.4436668625460762, "grad_norm": 3.2489264011383057, "learning_rate": 9.953717538520245e-07, "loss": 0.0576, "step": 135120 }, { "epoch": 1.4437737058603557, "grad_norm": 3.8498339653015137, "learning_rate": 9.953694729028035e-07, "loss": 0.0252, "step": 135130 }, { "epoch": 1.4438805491746354, "grad_norm": 0.5692003965377808, "learning_rate": 9.95367191394273e-07, "loss": 0.0099, "step": 135140 }, { "epoch": 1.443987392488915, "grad_norm": 10.362356185913086, "learning_rate": 9.953649093264356e-07, "loss": 0.0152, "step": 135150 }, { "epoch": 1.4440942358031945, "grad_norm": 7.043496131896973, "learning_rate": 9.953626266992937e-07, "loss": 0.057, "step": 135160 }, { "epoch": 1.4442010791174742, "grad_norm": 9.270424842834473, "learning_rate": 9.9536034351285e-07, "loss": 0.0349, "step": 135170 }, { "epoch": 1.4443079224317539, "grad_norm": 0.03891336917877197, "learning_rate": 9.953580597671072e-07, "loss": 0.0409, "step": 135180 }, { "epoch": 1.4444147657460333, "grad_norm": 2.658935546875, "learning_rate": 9.953557754620677e-07, "loss": 0.0138, "step": 135190 }, { "epoch": 1.444521609060313, "grad_norm": 0.03162701055407524, "learning_rate": 9.95353490597734e-07, "loss": 0.0108, "step": 135200 }, { "epoch": 1.4446284523745927, "grad_norm": 0.03424273803830147, "learning_rate": 9.95351205174109e-07, "loss": 0.0441, "step": 135210 }, { "epoch": 1.4447352956888722, "grad_norm": 0.058443620800971985, "learning_rate": 9.95348919191195e-07, "loss": 0.0249, "step": 135220 }, { "epoch": 1.4448421390031518, "grad_norm": 2.2654261589050293, "learning_rate": 9.953466326489947e-07, "loss": 0.0245, "step": 135230 }, { "epoch": 1.4449489823174315, "grad_norm": 7.60213041305542, "learning_rate": 9.953443455475107e-07, "loss": 0.0409, "step": 135240 }, { "epoch": 1.445055825631711, "grad_norm": 3.221560001373291, "learning_rate": 9.953420578867457e-07, "loss": 0.0799, "step": 135250 }, { "epoch": 1.4451626689459907, "grad_norm": 5.480401039123535, "learning_rate": 9.953397696667021e-07, "loss": 0.0276, "step": 135260 }, { "epoch": 1.4452695122602703, "grad_norm": 2.8113560676574707, "learning_rate": 9.953374808873822e-07, "loss": 0.0463, "step": 135270 }, { "epoch": 1.4453763555745498, "grad_norm": 0.07133828103542328, "learning_rate": 9.953351915487891e-07, "loss": 0.0203, "step": 135280 }, { "epoch": 1.4454831988888295, "grad_norm": 8.266016006469727, "learning_rate": 9.953329016509254e-07, "loss": 0.0189, "step": 135290 }, { "epoch": 1.4455900422031092, "grad_norm": 2.2671167850494385, "learning_rate": 9.95330611193793e-07, "loss": 0.0402, "step": 135300 }, { "epoch": 1.4456968855173886, "grad_norm": 8.9829740524292, "learning_rate": 9.953283201773954e-07, "loss": 0.0626, "step": 135310 }, { "epoch": 1.4458037288316683, "grad_norm": 0.031762510538101196, "learning_rate": 9.953260286017345e-07, "loss": 0.016, "step": 135320 }, { "epoch": 1.445910572145948, "grad_norm": 0.010900815017521381, "learning_rate": 9.953237364668131e-07, "loss": 0.0054, "step": 135330 }, { "epoch": 1.4460174154602274, "grad_norm": 0.170345738530159, "learning_rate": 9.95321443772634e-07, "loss": 0.0241, "step": 135340 }, { "epoch": 1.4461242587745071, "grad_norm": 0.2284894436597824, "learning_rate": 9.953191505191995e-07, "loss": 0.0218, "step": 135350 }, { "epoch": 1.4462311020887868, "grad_norm": 0.2921401262283325, "learning_rate": 9.953168567065125e-07, "loss": 0.0167, "step": 135360 }, { "epoch": 1.4463379454030665, "grad_norm": 0.1701112985610962, "learning_rate": 9.953145623345752e-07, "loss": 0.0234, "step": 135370 }, { "epoch": 1.446444788717346, "grad_norm": 10.481331825256348, "learning_rate": 9.953122674033903e-07, "loss": 0.0556, "step": 135380 }, { "epoch": 1.4465516320316256, "grad_norm": 0.2543412148952484, "learning_rate": 9.953099719129606e-07, "loss": 0.0292, "step": 135390 }, { "epoch": 1.4466584753459053, "grad_norm": 0.049877412617206573, "learning_rate": 9.953076758632884e-07, "loss": 0.0435, "step": 135400 }, { "epoch": 1.4467653186601848, "grad_norm": 0.02014809660613537, "learning_rate": 9.953053792543764e-07, "loss": 0.0332, "step": 135410 }, { "epoch": 1.4468721619744644, "grad_norm": 5.017897605895996, "learning_rate": 9.953030820862275e-07, "loss": 0.034, "step": 135420 }, { "epoch": 1.4469790052887441, "grad_norm": 0.05552921071648598, "learning_rate": 9.95300784358844e-07, "loss": 0.0466, "step": 135430 }, { "epoch": 1.4470858486030236, "grad_norm": 4.218732833862305, "learning_rate": 9.952984860722284e-07, "loss": 0.0751, "step": 135440 }, { "epoch": 1.4471926919173033, "grad_norm": 0.008502009324729443, "learning_rate": 9.952961872263836e-07, "loss": 0.0183, "step": 135450 }, { "epoch": 1.447299535231583, "grad_norm": 0.03538891673088074, "learning_rate": 9.95293887821312e-07, "loss": 0.0309, "step": 135460 }, { "epoch": 1.4474063785458626, "grad_norm": 0.01096276007592678, "learning_rate": 9.95291587857016e-07, "loss": 0.0245, "step": 135470 }, { "epoch": 1.447513221860142, "grad_norm": 0.5026097297668457, "learning_rate": 9.952892873334985e-07, "loss": 0.0129, "step": 135480 }, { "epoch": 1.4476200651744218, "grad_norm": 1.2406286001205444, "learning_rate": 9.95286986250762e-07, "loss": 0.0259, "step": 135490 }, { "epoch": 1.4477269084887014, "grad_norm": 0.09457840770483017, "learning_rate": 9.952846846088091e-07, "loss": 0.0235, "step": 135500 }, { "epoch": 1.447833751802981, "grad_norm": 2.711029052734375, "learning_rate": 9.952823824076427e-07, "loss": 0.0823, "step": 135510 }, { "epoch": 1.4479405951172606, "grad_norm": 6.736661911010742, "learning_rate": 9.95280079647265e-07, "loss": 0.0494, "step": 135520 }, { "epoch": 1.4480474384315403, "grad_norm": 2.9240787029266357, "learning_rate": 9.952777763276786e-07, "loss": 0.052, "step": 135530 }, { "epoch": 1.4481542817458197, "grad_norm": 9.400433540344238, "learning_rate": 9.952754724488862e-07, "loss": 0.0811, "step": 135540 }, { "epoch": 1.4482611250600994, "grad_norm": 0.5123962163925171, "learning_rate": 9.952731680108904e-07, "loss": 0.048, "step": 135550 }, { "epoch": 1.448367968374379, "grad_norm": 16.246721267700195, "learning_rate": 9.952708630136937e-07, "loss": 0.0555, "step": 135560 }, { "epoch": 1.4484748116886585, "grad_norm": 4.494853973388672, "learning_rate": 9.95268557457299e-07, "loss": 0.0459, "step": 135570 }, { "epoch": 1.4485816550029382, "grad_norm": 1.898405909538269, "learning_rate": 9.952662513417087e-07, "loss": 0.0296, "step": 135580 }, { "epoch": 1.448688498317218, "grad_norm": 1.766701579093933, "learning_rate": 9.952639446669253e-07, "loss": 0.0296, "step": 135590 }, { "epoch": 1.4487953416314974, "grad_norm": 1.1345460414886475, "learning_rate": 9.952616374329517e-07, "loss": 0.025, "step": 135600 }, { "epoch": 1.448902184945777, "grad_norm": 0.9163509607315063, "learning_rate": 9.952593296397902e-07, "loss": 0.0578, "step": 135610 }, { "epoch": 1.4490090282600567, "grad_norm": 5.275001049041748, "learning_rate": 9.952570212874437e-07, "loss": 0.0109, "step": 135620 }, { "epoch": 1.4491158715743362, "grad_norm": 3.150122880935669, "learning_rate": 9.952547123759146e-07, "loss": 0.0273, "step": 135630 }, { "epoch": 1.4492227148886159, "grad_norm": 5.120464324951172, "learning_rate": 9.952524029052054e-07, "loss": 0.0241, "step": 135640 }, { "epoch": 1.4493295582028956, "grad_norm": 8.670612335205078, "learning_rate": 9.95250092875319e-07, "loss": 0.0455, "step": 135650 }, { "epoch": 1.449436401517175, "grad_norm": 11.041991233825684, "learning_rate": 9.952477822862577e-07, "loss": 0.0399, "step": 135660 }, { "epoch": 1.4495432448314547, "grad_norm": 0.44086530804634094, "learning_rate": 9.952454711380246e-07, "loss": 0.0499, "step": 135670 }, { "epoch": 1.4496500881457344, "grad_norm": 0.036031849682331085, "learning_rate": 9.952431594306216e-07, "loss": 0.0675, "step": 135680 }, { "epoch": 1.4497569314600138, "grad_norm": 5.240912437438965, "learning_rate": 9.95240847164052e-07, "loss": 0.0654, "step": 135690 }, { "epoch": 1.4498637747742935, "grad_norm": 4.532003879547119, "learning_rate": 9.95238534338318e-07, "loss": 0.0255, "step": 135700 }, { "epoch": 1.4499706180885732, "grad_norm": 20.297657012939453, "learning_rate": 9.95236220953422e-07, "loss": 0.1341, "step": 135710 }, { "epoch": 1.4500774614028527, "grad_norm": 6.089953899383545, "learning_rate": 9.952339070093674e-07, "loss": 0.0369, "step": 135720 }, { "epoch": 1.4501843047171323, "grad_norm": 0.821873664855957, "learning_rate": 9.95231592506156e-07, "loss": 0.0196, "step": 135730 }, { "epoch": 1.450291148031412, "grad_norm": 0.13820484280586243, "learning_rate": 9.95229277443791e-07, "loss": 0.0131, "step": 135740 }, { "epoch": 1.4503979913456915, "grad_norm": 1.1364696025848389, "learning_rate": 9.952269618222747e-07, "loss": 0.03, "step": 135750 }, { "epoch": 1.4505048346599712, "grad_norm": 0.00584625406190753, "learning_rate": 9.952246456416098e-07, "loss": 0.0202, "step": 135760 }, { "epoch": 1.4506116779742508, "grad_norm": 1.538092017173767, "learning_rate": 9.952223289017988e-07, "loss": 0.0253, "step": 135770 }, { "epoch": 1.4507185212885303, "grad_norm": 0.02858719415962696, "learning_rate": 9.952200116028447e-07, "loss": 0.0174, "step": 135780 }, { "epoch": 1.45082536460281, "grad_norm": 0.6106423735618591, "learning_rate": 9.952176937447494e-07, "loss": 0.0356, "step": 135790 }, { "epoch": 1.4509322079170897, "grad_norm": 4.118880271911621, "learning_rate": 9.952153753275163e-07, "loss": 0.0234, "step": 135800 }, { "epoch": 1.4510390512313691, "grad_norm": 0.027430720627307892, "learning_rate": 9.952130563511474e-07, "loss": 0.02, "step": 135810 }, { "epoch": 1.4511458945456488, "grad_norm": 14.42628288269043, "learning_rate": 9.95210736815646e-07, "loss": 0.0866, "step": 135820 }, { "epoch": 1.4512527378599285, "grad_norm": 0.02995274029672146, "learning_rate": 9.952084167210138e-07, "loss": 0.0233, "step": 135830 }, { "epoch": 1.451359581174208, "grad_norm": 16.353879928588867, "learning_rate": 9.95206096067254e-07, "loss": 0.0471, "step": 135840 }, { "epoch": 1.4514664244884876, "grad_norm": 14.33914852142334, "learning_rate": 9.952037748543695e-07, "loss": 0.0357, "step": 135850 }, { "epoch": 1.4515732678027673, "grad_norm": 0.09805750101804733, "learning_rate": 9.952014530823622e-07, "loss": 0.035, "step": 135860 }, { "epoch": 1.4516801111170468, "grad_norm": 3.7315123081207275, "learning_rate": 9.951991307512353e-07, "loss": 0.0687, "step": 135870 }, { "epoch": 1.4517869544313264, "grad_norm": 2.5811192989349365, "learning_rate": 9.951968078609912e-07, "loss": 0.0379, "step": 135880 }, { "epoch": 1.4518937977456061, "grad_norm": 6.745131969451904, "learning_rate": 9.951944844116325e-07, "loss": 0.0427, "step": 135890 }, { "epoch": 1.4520006410598856, "grad_norm": 1.181533694267273, "learning_rate": 9.951921604031616e-07, "loss": 0.0167, "step": 135900 }, { "epoch": 1.4521074843741653, "grad_norm": 0.07903854548931122, "learning_rate": 9.951898358355818e-07, "loss": 0.0394, "step": 135910 }, { "epoch": 1.452214327688445, "grad_norm": 1.4218199253082275, "learning_rate": 9.95187510708895e-07, "loss": 0.0138, "step": 135920 }, { "epoch": 1.4523211710027244, "grad_norm": 5.875075817108154, "learning_rate": 9.951851850231043e-07, "loss": 0.0373, "step": 135930 }, { "epoch": 1.452428014317004, "grad_norm": 2.6853244304656982, "learning_rate": 9.95182858778212e-07, "loss": 0.0217, "step": 135940 }, { "epoch": 1.4525348576312838, "grad_norm": 6.323482513427734, "learning_rate": 9.95180531974221e-07, "loss": 0.033, "step": 135950 }, { "epoch": 1.4526417009455632, "grad_norm": 1.7003456354141235, "learning_rate": 9.951782046111338e-07, "loss": 0.012, "step": 135960 }, { "epoch": 1.452748544259843, "grad_norm": 3.6934657096862793, "learning_rate": 9.95175876688953e-07, "loss": 0.0523, "step": 135970 }, { "epoch": 1.4528553875741226, "grad_norm": 6.495023727416992, "learning_rate": 9.951735482076812e-07, "loss": 0.0604, "step": 135980 }, { "epoch": 1.452962230888402, "grad_norm": 2.1365437507629395, "learning_rate": 9.95171219167321e-07, "loss": 0.0181, "step": 135990 }, { "epoch": 1.4530690742026817, "grad_norm": 7.054425239562988, "learning_rate": 9.951688895678754e-07, "loss": 0.0814, "step": 136000 }, { "epoch": 1.4531759175169614, "grad_norm": 0.03551578149199486, "learning_rate": 9.951665594093468e-07, "loss": 0.021, "step": 136010 }, { "epoch": 1.4532827608312409, "grad_norm": 2.695087194442749, "learning_rate": 9.951642286917376e-07, "loss": 0.0123, "step": 136020 }, { "epoch": 1.4533896041455205, "grad_norm": 4.201440334320068, "learning_rate": 9.951618974150506e-07, "loss": 0.0109, "step": 136030 }, { "epoch": 1.4534964474598002, "grad_norm": 8.62808609008789, "learning_rate": 9.951595655792884e-07, "loss": 0.0611, "step": 136040 }, { "epoch": 1.4536032907740797, "grad_norm": 0.15055319666862488, "learning_rate": 9.951572331844537e-07, "loss": 0.0305, "step": 136050 }, { "epoch": 1.4537101340883594, "grad_norm": 0.23437844216823578, "learning_rate": 9.951549002305493e-07, "loss": 0.0328, "step": 136060 }, { "epoch": 1.453816977402639, "grad_norm": 0.44544506072998047, "learning_rate": 9.951525667175776e-07, "loss": 0.0286, "step": 136070 }, { "epoch": 1.4539238207169185, "grad_norm": 8.060404777526855, "learning_rate": 9.95150232645541e-07, "loss": 0.0387, "step": 136080 }, { "epoch": 1.4540306640311982, "grad_norm": 0.008228794671595097, "learning_rate": 9.951478980144427e-07, "loss": 0.0314, "step": 136090 }, { "epoch": 1.4541375073454779, "grad_norm": 0.042090632021427155, "learning_rate": 9.95145562824285e-07, "loss": 0.0692, "step": 136100 }, { "epoch": 1.4542443506597575, "grad_norm": 12.928658485412598, "learning_rate": 9.951432270750704e-07, "loss": 0.0889, "step": 136110 }, { "epoch": 1.454351193974037, "grad_norm": 5.138131141662598, "learning_rate": 9.951408907668019e-07, "loss": 0.0395, "step": 136120 }, { "epoch": 1.4544580372883167, "grad_norm": 2.8129801750183105, "learning_rate": 9.95138553899482e-07, "loss": 0.0165, "step": 136130 }, { "epoch": 1.4545648806025964, "grad_norm": 2.893303632736206, "learning_rate": 9.95136216473113e-07, "loss": 0.0096, "step": 136140 }, { "epoch": 1.4546717239168758, "grad_norm": 0.06141198053956032, "learning_rate": 9.951338784876984e-07, "loss": 0.0353, "step": 136150 }, { "epoch": 1.4547785672311555, "grad_norm": 22.668176651000977, "learning_rate": 9.951315399432398e-07, "loss": 0.0558, "step": 136160 }, { "epoch": 1.4548854105454352, "grad_norm": 3.6735923290252686, "learning_rate": 9.951292008397405e-07, "loss": 0.0606, "step": 136170 }, { "epoch": 1.4549922538597146, "grad_norm": 9.86713695526123, "learning_rate": 9.95126861177203e-07, "loss": 0.0233, "step": 136180 }, { "epoch": 1.4550990971739943, "grad_norm": 4.1122870445251465, "learning_rate": 9.951245209556298e-07, "loss": 0.0683, "step": 136190 }, { "epoch": 1.455205940488274, "grad_norm": 0.5041947960853577, "learning_rate": 9.95122180175024e-07, "loss": 0.0432, "step": 136200 }, { "epoch": 1.4553127838025537, "grad_norm": 6.764044284820557, "learning_rate": 9.951198388353875e-07, "loss": 0.032, "step": 136210 }, { "epoch": 1.4554196271168331, "grad_norm": 6.606338024139404, "learning_rate": 9.951174969367233e-07, "loss": 0.062, "step": 136220 }, { "epoch": 1.4555264704311128, "grad_norm": 0.9465271830558777, "learning_rate": 9.951151544790341e-07, "loss": 0.152, "step": 136230 }, { "epoch": 1.4556333137453925, "grad_norm": 4.679378986358643, "learning_rate": 9.951128114623227e-07, "loss": 0.0211, "step": 136240 }, { "epoch": 1.455740157059672, "grad_norm": 0.032685015350580215, "learning_rate": 9.951104678865917e-07, "loss": 0.0252, "step": 136250 }, { "epoch": 1.4558470003739516, "grad_norm": 4.422041893005371, "learning_rate": 9.951081237518433e-07, "loss": 0.0271, "step": 136260 }, { "epoch": 1.4559538436882313, "grad_norm": 9.020549774169922, "learning_rate": 9.951057790580805e-07, "loss": 0.0463, "step": 136270 }, { "epoch": 1.4560606870025108, "grad_norm": 1.06158447265625, "learning_rate": 9.951034338053059e-07, "loss": 0.0271, "step": 136280 }, { "epoch": 1.4561675303167905, "grad_norm": 1.9106144905090332, "learning_rate": 9.951010879935222e-07, "loss": 0.0091, "step": 136290 }, { "epoch": 1.4562743736310702, "grad_norm": 3.3973491191864014, "learning_rate": 9.95098741622732e-07, "loss": 0.0492, "step": 136300 }, { "epoch": 1.4563812169453496, "grad_norm": 6.627505779266357, "learning_rate": 9.950963946929382e-07, "loss": 0.0274, "step": 136310 }, { "epoch": 1.4564880602596293, "grad_norm": 2.9062345027923584, "learning_rate": 9.95094047204143e-07, "loss": 0.0265, "step": 136320 }, { "epoch": 1.456594903573909, "grad_norm": 0.3827763497829437, "learning_rate": 9.950916991563492e-07, "loss": 0.0289, "step": 136330 }, { "epoch": 1.4567017468881884, "grad_norm": 9.844266891479492, "learning_rate": 9.950893505495596e-07, "loss": 0.0203, "step": 136340 }, { "epoch": 1.4568085902024681, "grad_norm": 3.1262612342834473, "learning_rate": 9.950870013837766e-07, "loss": 0.0293, "step": 136350 }, { "epoch": 1.4569154335167478, "grad_norm": 8.33453369140625, "learning_rate": 9.950846516590032e-07, "loss": 0.0302, "step": 136360 }, { "epoch": 1.4570222768310273, "grad_norm": 14.857353210449219, "learning_rate": 9.950823013752418e-07, "loss": 0.0705, "step": 136370 }, { "epoch": 1.457129120145307, "grad_norm": 4.535232067108154, "learning_rate": 9.95079950532495e-07, "loss": 0.0436, "step": 136380 }, { "epoch": 1.4572359634595866, "grad_norm": 0.1281360238790512, "learning_rate": 9.950775991307658e-07, "loss": 0.0339, "step": 136390 }, { "epoch": 1.457342806773866, "grad_norm": 8.148172378540039, "learning_rate": 9.950752471700564e-07, "loss": 0.0505, "step": 136400 }, { "epoch": 1.4574496500881458, "grad_norm": 0.007410047575831413, "learning_rate": 9.950728946503698e-07, "loss": 0.0288, "step": 136410 }, { "epoch": 1.4575564934024254, "grad_norm": 2.8745615482330322, "learning_rate": 9.950705415717085e-07, "loss": 0.0232, "step": 136420 }, { "epoch": 1.457663336716705, "grad_norm": 15.157346725463867, "learning_rate": 9.950681879340752e-07, "loss": 0.0478, "step": 136430 }, { "epoch": 1.4577701800309846, "grad_norm": 17.178632736206055, "learning_rate": 9.950658337374727e-07, "loss": 0.0897, "step": 136440 }, { "epoch": 1.4578770233452643, "grad_norm": 0.07040628790855408, "learning_rate": 9.950634789819033e-07, "loss": 0.0461, "step": 136450 }, { "epoch": 1.4579838666595437, "grad_norm": 5.754318714141846, "learning_rate": 9.950611236673698e-07, "loss": 0.0636, "step": 136460 }, { "epoch": 1.4580907099738234, "grad_norm": 0.8410942554473877, "learning_rate": 9.950587677938751e-07, "loss": 0.0419, "step": 136470 }, { "epoch": 1.458197553288103, "grad_norm": 3.7364068031311035, "learning_rate": 9.950564113614217e-07, "loss": 0.0475, "step": 136480 }, { "epoch": 1.4583043966023825, "grad_norm": 0.03780467435717583, "learning_rate": 9.950540543700122e-07, "loss": 0.0514, "step": 136490 }, { "epoch": 1.4584112399166622, "grad_norm": 2.738448143005371, "learning_rate": 9.950516968196493e-07, "loss": 0.0482, "step": 136500 }, { "epoch": 1.458518083230942, "grad_norm": 0.003253238508477807, "learning_rate": 9.950493387103356e-07, "loss": 0.03, "step": 136510 }, { "epoch": 1.4586249265452214, "grad_norm": 0.05109690874814987, "learning_rate": 9.95046980042074e-07, "loss": 0.0335, "step": 136520 }, { "epoch": 1.458731769859501, "grad_norm": 5.425597667694092, "learning_rate": 9.95044620814867e-07, "loss": 0.0207, "step": 136530 }, { "epoch": 1.4588386131737807, "grad_norm": 1.8898645639419556, "learning_rate": 9.95042261028717e-07, "loss": 0.053, "step": 136540 }, { "epoch": 1.4589454564880602, "grad_norm": 10.190024375915527, "learning_rate": 9.950399006836273e-07, "loss": 0.0538, "step": 136550 }, { "epoch": 1.4590522998023399, "grad_norm": 1.825260043144226, "learning_rate": 9.950375397796e-07, "loss": 0.0237, "step": 136560 }, { "epoch": 1.4591591431166195, "grad_norm": 2.7116377353668213, "learning_rate": 9.950351783166378e-07, "loss": 0.0254, "step": 136570 }, { "epoch": 1.459265986430899, "grad_norm": 2.777968168258667, "learning_rate": 9.950328162947436e-07, "loss": 0.0245, "step": 136580 }, { "epoch": 1.4593728297451787, "grad_norm": 3.1856489181518555, "learning_rate": 9.9503045371392e-07, "loss": 0.0318, "step": 136590 }, { "epoch": 1.4594796730594584, "grad_norm": 1.189500331878662, "learning_rate": 9.9502809057417e-07, "loss": 0.0347, "step": 136600 }, { "epoch": 1.4595865163737378, "grad_norm": 3.627098798751831, "learning_rate": 9.950257268754955e-07, "loss": 0.0081, "step": 136610 }, { "epoch": 1.4596933596880175, "grad_norm": 11.903997421264648, "learning_rate": 9.950233626178996e-07, "loss": 0.0534, "step": 136620 }, { "epoch": 1.4598002030022972, "grad_norm": 4.751251220703125, "learning_rate": 9.950209978013853e-07, "loss": 0.0378, "step": 136630 }, { "epoch": 1.4599070463165766, "grad_norm": 3.9381797313690186, "learning_rate": 9.950186324259547e-07, "loss": 0.0285, "step": 136640 }, { "epoch": 1.4600138896308563, "grad_norm": 10.853195190429688, "learning_rate": 9.950162664916106e-07, "loss": 0.0361, "step": 136650 }, { "epoch": 1.460120732945136, "grad_norm": 4.485771656036377, "learning_rate": 9.95013899998356e-07, "loss": 0.0165, "step": 136660 }, { "epoch": 1.4602275762594155, "grad_norm": 4.748133659362793, "learning_rate": 9.950115329461933e-07, "loss": 0.0049, "step": 136670 }, { "epoch": 1.4603344195736951, "grad_norm": 4.836813926696777, "learning_rate": 9.95009165335125e-07, "loss": 0.0294, "step": 136680 }, { "epoch": 1.4604412628879748, "grad_norm": 1.3983577489852905, "learning_rate": 9.950067971651543e-07, "loss": 0.0167, "step": 136690 }, { "epoch": 1.4605481062022543, "grad_norm": 3.7193174362182617, "learning_rate": 9.950044284362833e-07, "loss": 0.0338, "step": 136700 }, { "epoch": 1.460654949516534, "grad_norm": 4.313401222229004, "learning_rate": 9.950020591485151e-07, "loss": 0.0607, "step": 136710 }, { "epoch": 1.4607617928308136, "grad_norm": 2.462371587753296, "learning_rate": 9.94999689301852e-07, "loss": 0.0839, "step": 136720 }, { "epoch": 1.460868636145093, "grad_norm": 5.873189926147461, "learning_rate": 9.949973188962972e-07, "loss": 0.0343, "step": 136730 }, { "epoch": 1.4609754794593728, "grad_norm": 0.1886514276266098, "learning_rate": 9.94994947931853e-07, "loss": 0.0123, "step": 136740 }, { "epoch": 1.4610823227736525, "grad_norm": 2.195805072784424, "learning_rate": 9.94992576408522e-07, "loss": 0.0284, "step": 136750 }, { "epoch": 1.461189166087932, "grad_norm": 0.09768324345350266, "learning_rate": 9.949902043263072e-07, "loss": 0.0376, "step": 136760 }, { "epoch": 1.4612960094022116, "grad_norm": 0.1421041488647461, "learning_rate": 9.94987831685211e-07, "loss": 0.0556, "step": 136770 }, { "epoch": 1.4614028527164913, "grad_norm": 0.2535702884197235, "learning_rate": 9.949854584852361e-07, "loss": 0.0337, "step": 136780 }, { "epoch": 1.4615096960307707, "grad_norm": 0.11682351678609848, "learning_rate": 9.949830847263854e-07, "loss": 0.044, "step": 136790 }, { "epoch": 1.4616165393450504, "grad_norm": 0.23832406103610992, "learning_rate": 9.949807104086613e-07, "loss": 0.0261, "step": 136800 }, { "epoch": 1.46172338265933, "grad_norm": 0.13959187269210815, "learning_rate": 9.949783355320668e-07, "loss": 0.0446, "step": 136810 }, { "epoch": 1.4618302259736096, "grad_norm": 0.26019713282585144, "learning_rate": 9.949759600966044e-07, "loss": 0.0567, "step": 136820 }, { "epoch": 1.4619370692878892, "grad_norm": 2.5192177295684814, "learning_rate": 9.949735841022766e-07, "loss": 0.0603, "step": 136830 }, { "epoch": 1.462043912602169, "grad_norm": 2.159986734390259, "learning_rate": 9.949712075490864e-07, "loss": 0.0335, "step": 136840 }, { "epoch": 1.4621507559164486, "grad_norm": 2.8895201683044434, "learning_rate": 9.949688304370362e-07, "loss": 0.042, "step": 136850 }, { "epoch": 1.462257599230728, "grad_norm": 0.08659473061561584, "learning_rate": 9.94966452766129e-07, "loss": 0.0745, "step": 136860 }, { "epoch": 1.4623644425450077, "grad_norm": 0.020715735852718353, "learning_rate": 9.949640745363672e-07, "loss": 0.0787, "step": 136870 }, { "epoch": 1.4624712858592874, "grad_norm": 8.154563903808594, "learning_rate": 9.949616957477538e-07, "loss": 0.0474, "step": 136880 }, { "epoch": 1.4625781291735669, "grad_norm": 0.10235608369112015, "learning_rate": 9.94959316400291e-07, "loss": 0.082, "step": 136890 }, { "epoch": 1.4626849724878466, "grad_norm": 4.668766498565674, "learning_rate": 9.94956936493982e-07, "loss": 0.0375, "step": 136900 }, { "epoch": 1.4627918158021262, "grad_norm": 0.7260074615478516, "learning_rate": 9.94954556028829e-07, "loss": 0.0364, "step": 136910 }, { "epoch": 1.4628986591164057, "grad_norm": 6.969691276550293, "learning_rate": 9.949521750048353e-07, "loss": 0.0317, "step": 136920 }, { "epoch": 1.4630055024306854, "grad_norm": 1.1267274618148804, "learning_rate": 9.94949793422003e-07, "loss": 0.0236, "step": 136930 }, { "epoch": 1.463112345744965, "grad_norm": 1.907462239265442, "learning_rate": 9.949474112803352e-07, "loss": 0.0022, "step": 136940 }, { "epoch": 1.4632191890592448, "grad_norm": 7.819562911987305, "learning_rate": 9.949450285798344e-07, "loss": 0.0281, "step": 136950 }, { "epoch": 1.4633260323735242, "grad_norm": 0.34763064980506897, "learning_rate": 9.949426453205033e-07, "loss": 0.0207, "step": 136960 }, { "epoch": 1.463432875687804, "grad_norm": 5.178188323974609, "learning_rate": 9.949402615023444e-07, "loss": 0.0245, "step": 136970 }, { "epoch": 1.4635397190020836, "grad_norm": 2.999314308166504, "learning_rate": 9.949378771253607e-07, "loss": 0.0382, "step": 136980 }, { "epoch": 1.463646562316363, "grad_norm": 0.003244111081585288, "learning_rate": 9.949354921895547e-07, "loss": 0.009, "step": 136990 }, { "epoch": 1.4637534056306427, "grad_norm": 1.421334981918335, "learning_rate": 9.949331066949294e-07, "loss": 0.027, "step": 137000 }, { "epoch": 1.4638602489449224, "grad_norm": 4.143304824829102, "learning_rate": 9.949307206414871e-07, "loss": 0.0216, "step": 137010 }, { "epoch": 1.4639670922592019, "grad_norm": 2.592345952987671, "learning_rate": 9.949283340292308e-07, "loss": 0.0719, "step": 137020 }, { "epoch": 1.4640739355734815, "grad_norm": 0.48981958627700806, "learning_rate": 9.949259468581628e-07, "loss": 0.0381, "step": 137030 }, { "epoch": 1.4641807788877612, "grad_norm": 6.21188497543335, "learning_rate": 9.949235591282863e-07, "loss": 0.0737, "step": 137040 }, { "epoch": 1.4642876222020407, "grad_norm": 1.334794044494629, "learning_rate": 9.949211708396038e-07, "loss": 0.0338, "step": 137050 }, { "epoch": 1.4643944655163204, "grad_norm": 0.08817780762910843, "learning_rate": 9.949187819921177e-07, "loss": 0.0228, "step": 137060 }, { "epoch": 1.4645013088306, "grad_norm": 0.8239126205444336, "learning_rate": 9.94916392585831e-07, "loss": 0.0162, "step": 137070 }, { "epoch": 1.4646081521448795, "grad_norm": 0.024537809193134308, "learning_rate": 9.949140026207464e-07, "loss": 0.0287, "step": 137080 }, { "epoch": 1.4647149954591592, "grad_norm": 0.6109721660614014, "learning_rate": 9.949116120968667e-07, "loss": 0.0287, "step": 137090 }, { "epoch": 1.4648218387734389, "grad_norm": 0.15683835744857788, "learning_rate": 9.94909221014194e-07, "loss": 0.0807, "step": 137100 }, { "epoch": 1.4649286820877183, "grad_norm": 0.04963010922074318, "learning_rate": 9.949068293727319e-07, "loss": 0.0273, "step": 137110 }, { "epoch": 1.465035525401998, "grad_norm": 15.512998580932617, "learning_rate": 9.949044371724823e-07, "loss": 0.1291, "step": 137120 }, { "epoch": 1.4651423687162777, "grad_norm": 5.217113018035889, "learning_rate": 9.949020444134484e-07, "loss": 0.0386, "step": 137130 }, { "epoch": 1.4652492120305571, "grad_norm": 1.1263821125030518, "learning_rate": 9.948996510956327e-07, "loss": 0.0503, "step": 137140 }, { "epoch": 1.4653560553448368, "grad_norm": 0.004245092626661062, "learning_rate": 9.94897257219038e-07, "loss": 0.0267, "step": 137150 }, { "epoch": 1.4654628986591165, "grad_norm": 1.7979236841201782, "learning_rate": 9.948948627836668e-07, "loss": 0.0271, "step": 137160 }, { "epoch": 1.465569741973396, "grad_norm": 3.096376657485962, "learning_rate": 9.94892467789522e-07, "loss": 0.0365, "step": 137170 }, { "epoch": 1.4656765852876756, "grad_norm": 0.01927315630018711, "learning_rate": 9.948900722366066e-07, "loss": 0.0742, "step": 137180 }, { "epoch": 1.4657834286019553, "grad_norm": 5.03814697265625, "learning_rate": 9.948876761249225e-07, "loss": 0.0401, "step": 137190 }, { "epoch": 1.4658902719162348, "grad_norm": 7.058603286743164, "learning_rate": 9.94885279454473e-07, "loss": 0.0315, "step": 137200 }, { "epoch": 1.4659971152305145, "grad_norm": 0.1288883239030838, "learning_rate": 9.948828822252607e-07, "loss": 0.0304, "step": 137210 }, { "epoch": 1.4661039585447941, "grad_norm": 0.4704286456108093, "learning_rate": 9.948804844372883e-07, "loss": 0.0363, "step": 137220 }, { "epoch": 1.4662108018590736, "grad_norm": 0.23883002996444702, "learning_rate": 9.948780860905583e-07, "loss": 0.0303, "step": 137230 }, { "epoch": 1.4663176451733533, "grad_norm": 2.5670111179351807, "learning_rate": 9.94875687185074e-07, "loss": 0.0231, "step": 137240 }, { "epoch": 1.466424488487633, "grad_norm": 0.024945957586169243, "learning_rate": 9.948732877208373e-07, "loss": 0.032, "step": 137250 }, { "epoch": 1.4665313318019124, "grad_norm": 4.302984237670898, "learning_rate": 9.948708876978513e-07, "loss": 0.0401, "step": 137260 }, { "epoch": 1.466638175116192, "grad_norm": 0.2640811502933502, "learning_rate": 9.94868487116119e-07, "loss": 0.039, "step": 137270 }, { "epoch": 1.4667450184304718, "grad_norm": 5.7428297996521, "learning_rate": 9.948660859756427e-07, "loss": 0.0452, "step": 137280 }, { "epoch": 1.4668518617447512, "grad_norm": 7.918915748596191, "learning_rate": 9.948636842764254e-07, "loss": 0.0421, "step": 137290 }, { "epoch": 1.466958705059031, "grad_norm": 0.010278369300067425, "learning_rate": 9.948612820184695e-07, "loss": 0.07, "step": 137300 }, { "epoch": 1.4670655483733106, "grad_norm": 0.056314222514629364, "learning_rate": 9.948588792017777e-07, "loss": 0.0212, "step": 137310 }, { "epoch": 1.46717239168759, "grad_norm": 0.028778111562132835, "learning_rate": 9.94856475826353e-07, "loss": 0.0641, "step": 137320 }, { "epoch": 1.4672792350018697, "grad_norm": 0.5266179442405701, "learning_rate": 9.94854071892198e-07, "loss": 0.0155, "step": 137330 }, { "epoch": 1.4673860783161494, "grad_norm": 0.22286440432071686, "learning_rate": 9.948516673993156e-07, "loss": 0.0202, "step": 137340 }, { "epoch": 1.4674929216304289, "grad_norm": 1.1099292039871216, "learning_rate": 9.94849262347708e-07, "loss": 0.0132, "step": 137350 }, { "epoch": 1.4675997649447086, "grad_norm": 2.86808443069458, "learning_rate": 9.948468567373785e-07, "loss": 0.0203, "step": 137360 }, { "epoch": 1.4677066082589882, "grad_norm": 1.4428852796554565, "learning_rate": 9.948444505683293e-07, "loss": 0.0274, "step": 137370 }, { "epoch": 1.4678134515732677, "grad_norm": 1.5746039152145386, "learning_rate": 9.948420438405637e-07, "loss": 0.0189, "step": 137380 }, { "epoch": 1.4679202948875474, "grad_norm": 3.042598009109497, "learning_rate": 9.948396365540836e-07, "loss": 0.0186, "step": 137390 }, { "epoch": 1.468027138201827, "grad_norm": 1.3336308002471924, "learning_rate": 9.948372287088926e-07, "loss": 0.0258, "step": 137400 }, { "epoch": 1.4681339815161065, "grad_norm": 3.5533103942871094, "learning_rate": 9.94834820304993e-07, "loss": 0.0333, "step": 137410 }, { "epoch": 1.4682408248303862, "grad_norm": 0.014057302847504616, "learning_rate": 9.948324113423874e-07, "loss": 0.0306, "step": 137420 }, { "epoch": 1.4683476681446659, "grad_norm": 0.04487066715955734, "learning_rate": 9.948300018210787e-07, "loss": 0.0465, "step": 137430 }, { "epoch": 1.4684545114589453, "grad_norm": 6.2306084632873535, "learning_rate": 9.948275917410696e-07, "loss": 0.0183, "step": 137440 }, { "epoch": 1.468561354773225, "grad_norm": 12.524665832519531, "learning_rate": 9.948251811023626e-07, "loss": 0.0381, "step": 137450 }, { "epoch": 1.4686681980875047, "grad_norm": 0.059577830135822296, "learning_rate": 9.94822769904961e-07, "loss": 0.0746, "step": 137460 }, { "epoch": 1.4687750414017842, "grad_norm": 0.14244447648525238, "learning_rate": 9.948203581488668e-07, "loss": 0.0295, "step": 137470 }, { "epoch": 1.4688818847160638, "grad_norm": 0.5745916366577148, "learning_rate": 9.948179458340833e-07, "loss": 0.0108, "step": 137480 }, { "epoch": 1.4689887280303435, "grad_norm": 0.18656739592552185, "learning_rate": 9.948155329606129e-07, "loss": 0.0249, "step": 137490 }, { "epoch": 1.469095571344623, "grad_norm": 0.10843177139759064, "learning_rate": 9.948131195284584e-07, "loss": 0.0121, "step": 137500 }, { "epoch": 1.4692024146589027, "grad_norm": 7.05392599105835, "learning_rate": 9.948107055376228e-07, "loss": 0.0625, "step": 137510 }, { "epoch": 1.4693092579731823, "grad_norm": 3.533078193664551, "learning_rate": 9.948082909881082e-07, "loss": 0.093, "step": 137520 }, { "epoch": 1.4694161012874618, "grad_norm": 6.040279388427734, "learning_rate": 9.948058758799177e-07, "loss": 0.0345, "step": 137530 }, { "epoch": 1.4695229446017415, "grad_norm": 0.11792066693305969, "learning_rate": 9.948034602130545e-07, "loss": 0.041, "step": 137540 }, { "epoch": 1.4696297879160212, "grad_norm": 18.870967864990234, "learning_rate": 9.948010439875204e-07, "loss": 0.0582, "step": 137550 }, { "epoch": 1.4697366312303006, "grad_norm": 2.386732816696167, "learning_rate": 9.947986272033186e-07, "loss": 0.0121, "step": 137560 }, { "epoch": 1.4698434745445803, "grad_norm": 2.2598769664764404, "learning_rate": 9.94796209860452e-07, "loss": 0.0371, "step": 137570 }, { "epoch": 1.46995031785886, "grad_norm": 5.633945941925049, "learning_rate": 9.94793791958923e-07, "loss": 0.0361, "step": 137580 }, { "epoch": 1.4700571611731397, "grad_norm": 4.348382472991943, "learning_rate": 9.947913734987346e-07, "loss": 0.0319, "step": 137590 }, { "epoch": 1.4701640044874191, "grad_norm": 3.7032535076141357, "learning_rate": 9.947889544798892e-07, "loss": 0.0198, "step": 137600 }, { "epoch": 1.4702708478016988, "grad_norm": 0.010263554751873016, "learning_rate": 9.947865349023898e-07, "loss": 0.0225, "step": 137610 }, { "epoch": 1.4703776911159785, "grad_norm": 0.7896525263786316, "learning_rate": 9.94784114766239e-07, "loss": 0.0283, "step": 137620 }, { "epoch": 1.470484534430258, "grad_norm": 0.02960759401321411, "learning_rate": 9.947816940714397e-07, "loss": 0.0148, "step": 137630 }, { "epoch": 1.4705913777445376, "grad_norm": 1.6126848459243774, "learning_rate": 9.947792728179947e-07, "loss": 0.0194, "step": 137640 }, { "epoch": 1.4706982210588173, "grad_norm": 3.4385523796081543, "learning_rate": 9.947768510059063e-07, "loss": 0.012, "step": 137650 }, { "epoch": 1.4708050643730968, "grad_norm": 14.042844772338867, "learning_rate": 9.947744286351775e-07, "loss": 0.0346, "step": 137660 }, { "epoch": 1.4709119076873765, "grad_norm": 4.844738483428955, "learning_rate": 9.947720057058111e-07, "loss": 0.0439, "step": 137670 }, { "epoch": 1.4710187510016561, "grad_norm": 1.0462623834609985, "learning_rate": 9.947695822178096e-07, "loss": 0.0198, "step": 137680 }, { "epoch": 1.4711255943159358, "grad_norm": 1.44187331199646, "learning_rate": 9.947671581711761e-07, "loss": 0.0356, "step": 137690 }, { "epoch": 1.4712324376302153, "grad_norm": 4.6887736320495605, "learning_rate": 9.947647335659132e-07, "loss": 0.0753, "step": 137700 }, { "epoch": 1.471339280944495, "grad_norm": 6.564845085144043, "learning_rate": 9.947623084020233e-07, "loss": 0.0392, "step": 137710 }, { "epoch": 1.4714461242587746, "grad_norm": 0.06203632056713104, "learning_rate": 9.947598826795097e-07, "loss": 0.0509, "step": 137720 }, { "epoch": 1.471552967573054, "grad_norm": 3.7918701171875, "learning_rate": 9.947574563983747e-07, "loss": 0.0412, "step": 137730 }, { "epoch": 1.4716598108873338, "grad_norm": 2.4437639713287354, "learning_rate": 9.947550295586212e-07, "loss": 0.0447, "step": 137740 }, { "epoch": 1.4717666542016135, "grad_norm": 0.45667368173599243, "learning_rate": 9.947526021602519e-07, "loss": 0.0667, "step": 137750 }, { "epoch": 1.471873497515893, "grad_norm": 0.03981972113251686, "learning_rate": 9.947501742032698e-07, "loss": 0.0079, "step": 137760 }, { "epoch": 1.4719803408301726, "grad_norm": 4.878508567810059, "learning_rate": 9.94747745687677e-07, "loss": 0.0323, "step": 137770 }, { "epoch": 1.4720871841444523, "grad_norm": 0.8314284086227417, "learning_rate": 9.94745316613477e-07, "loss": 0.0196, "step": 137780 }, { "epoch": 1.4721940274587317, "grad_norm": 0.9793188571929932, "learning_rate": 9.94742886980672e-07, "loss": 0.013, "step": 137790 }, { "epoch": 1.4723008707730114, "grad_norm": 1.5323888063430786, "learning_rate": 9.94740456789265e-07, "loss": 0.0104, "step": 137800 }, { "epoch": 1.472407714087291, "grad_norm": 8.734953880310059, "learning_rate": 9.947380260392588e-07, "loss": 0.0637, "step": 137810 }, { "epoch": 1.4725145574015706, "grad_norm": 0.04335770383477211, "learning_rate": 9.947355947306558e-07, "loss": 0.0208, "step": 137820 }, { "epoch": 1.4726214007158502, "grad_norm": 3.3449764251708984, "learning_rate": 9.947331628634592e-07, "loss": 0.0298, "step": 137830 }, { "epoch": 1.47272824403013, "grad_norm": 0.1841467022895813, "learning_rate": 9.947307304376715e-07, "loss": 0.0683, "step": 137840 }, { "epoch": 1.4728350873444094, "grad_norm": 8.3478422164917, "learning_rate": 9.947282974532954e-07, "loss": 0.0613, "step": 137850 }, { "epoch": 1.472941930658689, "grad_norm": 1.239974856376648, "learning_rate": 9.947258639103335e-07, "loss": 0.0208, "step": 137860 }, { "epoch": 1.4730487739729687, "grad_norm": 3.1919403076171875, "learning_rate": 9.94723429808789e-07, "loss": 0.0206, "step": 137870 }, { "epoch": 1.4731556172872482, "grad_norm": 2.3451945781707764, "learning_rate": 9.947209951486645e-07, "loss": 0.0472, "step": 137880 }, { "epoch": 1.4732624606015279, "grad_norm": 4.40346097946167, "learning_rate": 9.947185599299624e-07, "loss": 0.0372, "step": 137890 }, { "epoch": 1.4733693039158076, "grad_norm": 3.4135901927948, "learning_rate": 9.94716124152686e-07, "loss": 0.0101, "step": 137900 }, { "epoch": 1.473476147230087, "grad_norm": 0.09785933047533035, "learning_rate": 9.947136878168375e-07, "loss": 0.0214, "step": 137910 }, { "epoch": 1.4735829905443667, "grad_norm": 1.1409087181091309, "learning_rate": 9.9471125092242e-07, "loss": 0.0393, "step": 137920 }, { "epoch": 1.4736898338586464, "grad_norm": 0.2314821034669876, "learning_rate": 9.947088134694361e-07, "loss": 0.0277, "step": 137930 }, { "epoch": 1.4737966771729258, "grad_norm": 1.4327336549758911, "learning_rate": 9.947063754578888e-07, "loss": 0.0716, "step": 137940 }, { "epoch": 1.4739035204872055, "grad_norm": 2.452960252761841, "learning_rate": 9.947039368877806e-07, "loss": 0.0258, "step": 137950 }, { "epoch": 1.4740103638014852, "grad_norm": 1.9596405029296875, "learning_rate": 9.947014977591141e-07, "loss": 0.065, "step": 137960 }, { "epoch": 1.4741172071157647, "grad_norm": 0.017833838239312172, "learning_rate": 9.946990580718925e-07, "loss": 0.0221, "step": 137970 }, { "epoch": 1.4742240504300443, "grad_norm": 1.430880069732666, "learning_rate": 9.946966178261183e-07, "loss": 0.0147, "step": 137980 }, { "epoch": 1.474330893744324, "grad_norm": 7.470522403717041, "learning_rate": 9.946941770217943e-07, "loss": 0.031, "step": 137990 }, { "epoch": 1.4744377370586035, "grad_norm": 3.6656744480133057, "learning_rate": 9.94691735658923e-07, "loss": 0.0231, "step": 138000 }, { "epoch": 1.4745445803728832, "grad_norm": 5.228562831878662, "learning_rate": 9.946892937375076e-07, "loss": 0.0359, "step": 138010 }, { "epoch": 1.4746514236871628, "grad_norm": 5.89589262008667, "learning_rate": 9.946868512575508e-07, "loss": 0.0123, "step": 138020 }, { "epoch": 1.4747582670014423, "grad_norm": 1.3344448804855347, "learning_rate": 9.94684408219055e-07, "loss": 0.0152, "step": 138030 }, { "epoch": 1.474865110315722, "grad_norm": 9.662924766540527, "learning_rate": 9.94681964622023e-07, "loss": 0.0463, "step": 138040 }, { "epoch": 1.4749719536300017, "grad_norm": 4.12063455581665, "learning_rate": 9.946795204664579e-07, "loss": 0.037, "step": 138050 }, { "epoch": 1.4750787969442811, "grad_norm": 0.020005833357572556, "learning_rate": 9.946770757523626e-07, "loss": 0.0249, "step": 138060 }, { "epoch": 1.4751856402585608, "grad_norm": 3.085829973220825, "learning_rate": 9.946746304797392e-07, "loss": 0.0066, "step": 138070 }, { "epoch": 1.4752924835728405, "grad_norm": 1.0250250101089478, "learning_rate": 9.946721846485908e-07, "loss": 0.0675, "step": 138080 }, { "epoch": 1.47539932688712, "grad_norm": 5.773972511291504, "learning_rate": 9.946697382589203e-07, "loss": 0.034, "step": 138090 }, { "epoch": 1.4755061702013996, "grad_norm": 0.016112741082906723, "learning_rate": 9.9466729131073e-07, "loss": 0.0652, "step": 138100 }, { "epoch": 1.4756130135156793, "grad_norm": 0.0377969890832901, "learning_rate": 9.946648438040234e-07, "loss": 0.0427, "step": 138110 }, { "epoch": 1.4757198568299588, "grad_norm": 0.23262682557106018, "learning_rate": 9.946623957388027e-07, "loss": 0.0157, "step": 138120 }, { "epoch": 1.4758267001442384, "grad_norm": 2.011606454849243, "learning_rate": 9.94659947115071e-07, "loss": 0.0636, "step": 138130 }, { "epoch": 1.4759335434585181, "grad_norm": 0.14986640214920044, "learning_rate": 9.946574979328307e-07, "loss": 0.1185, "step": 138140 }, { "epoch": 1.4760403867727976, "grad_norm": 1.227630376815796, "learning_rate": 9.946550481920848e-07, "loss": 0.0232, "step": 138150 }, { "epoch": 1.4761472300870773, "grad_norm": 9.278141975402832, "learning_rate": 9.94652597892836e-07, "loss": 0.0735, "step": 138160 }, { "epoch": 1.476254073401357, "grad_norm": 0.051127005368471146, "learning_rate": 9.94650147035087e-07, "loss": 0.0297, "step": 138170 }, { "epoch": 1.4763609167156364, "grad_norm": 3.8525607585906982, "learning_rate": 9.946476956188407e-07, "loss": 0.0336, "step": 138180 }, { "epoch": 1.476467760029916, "grad_norm": 3.9964168071746826, "learning_rate": 9.946452436441e-07, "loss": 0.0473, "step": 138190 }, { "epoch": 1.4765746033441958, "grad_norm": 2.0184221267700195, "learning_rate": 9.946427911108672e-07, "loss": 0.0323, "step": 138200 }, { "epoch": 1.4766814466584752, "grad_norm": 2.6397252082824707, "learning_rate": 9.946403380191455e-07, "loss": 0.0511, "step": 138210 }, { "epoch": 1.476788289972755, "grad_norm": 1.0993708372116089, "learning_rate": 9.946378843689377e-07, "loss": 0.0287, "step": 138220 }, { "epoch": 1.4768951332870346, "grad_norm": 1.654215693473816, "learning_rate": 9.94635430160246e-07, "loss": 0.0193, "step": 138230 }, { "epoch": 1.477001976601314, "grad_norm": 0.010367222130298615, "learning_rate": 9.946329753930737e-07, "loss": 0.0657, "step": 138240 }, { "epoch": 1.4771088199155937, "grad_norm": 0.0587773360311985, "learning_rate": 9.946305200674235e-07, "loss": 0.0698, "step": 138250 }, { "epoch": 1.4772156632298734, "grad_norm": 0.2923288345336914, "learning_rate": 9.946280641832983e-07, "loss": 0.0332, "step": 138260 }, { "epoch": 1.4773225065441529, "grad_norm": 1.8616918325424194, "learning_rate": 9.946256077407005e-07, "loss": 0.0437, "step": 138270 }, { "epoch": 1.4774293498584326, "grad_norm": 8.49445915222168, "learning_rate": 9.94623150739633e-07, "loss": 0.0555, "step": 138280 }, { "epoch": 1.4775361931727122, "grad_norm": 0.1533903032541275, "learning_rate": 9.946206931800989e-07, "loss": 0.0533, "step": 138290 }, { "epoch": 1.4776430364869917, "grad_norm": 1.944931983947754, "learning_rate": 9.946182350621004e-07, "loss": 0.0364, "step": 138300 }, { "epoch": 1.4777498798012714, "grad_norm": 0.5781249403953552, "learning_rate": 9.946157763856408e-07, "loss": 0.0396, "step": 138310 }, { "epoch": 1.477856723115551, "grad_norm": 0.4755992889404297, "learning_rate": 9.946133171507224e-07, "loss": 0.0473, "step": 138320 }, { "epoch": 1.4779635664298307, "grad_norm": 4.680926322937012, "learning_rate": 9.946108573573485e-07, "loss": 0.0226, "step": 138330 }, { "epoch": 1.4780704097441102, "grad_norm": 3.737898588180542, "learning_rate": 9.946083970055214e-07, "loss": 0.0431, "step": 138340 }, { "epoch": 1.4781772530583899, "grad_norm": 5.683798313140869, "learning_rate": 9.946059360952443e-07, "loss": 0.0379, "step": 138350 }, { "epoch": 1.4782840963726696, "grad_norm": 1.1254162788391113, "learning_rate": 9.946034746265196e-07, "loss": 0.0517, "step": 138360 }, { "epoch": 1.478390939686949, "grad_norm": 9.284997940063477, "learning_rate": 9.946010125993504e-07, "loss": 0.0561, "step": 138370 }, { "epoch": 1.4784977830012287, "grad_norm": 0.14737281203269958, "learning_rate": 9.945985500137393e-07, "loss": 0.0746, "step": 138380 }, { "epoch": 1.4786046263155084, "grad_norm": 0.09586267918348312, "learning_rate": 9.94596086869689e-07, "loss": 0.0681, "step": 138390 }, { "epoch": 1.4787114696297878, "grad_norm": 0.267402708530426, "learning_rate": 9.945936231672022e-07, "loss": 0.011, "step": 138400 }, { "epoch": 1.4788183129440675, "grad_norm": 0.01201402023434639, "learning_rate": 9.945911589062824e-07, "loss": 0.006, "step": 138410 }, { "epoch": 1.4789251562583472, "grad_norm": 0.12005913257598877, "learning_rate": 9.945886940869315e-07, "loss": 0.0306, "step": 138420 }, { "epoch": 1.4790319995726269, "grad_norm": 2.0287277698516846, "learning_rate": 9.945862287091527e-07, "loss": 0.0493, "step": 138430 }, { "epoch": 1.4791388428869063, "grad_norm": 0.7463575601577759, "learning_rate": 9.945837627729486e-07, "loss": 0.076, "step": 138440 }, { "epoch": 1.479245686201186, "grad_norm": 0.7116600275039673, "learning_rate": 9.945812962783222e-07, "loss": 0.0576, "step": 138450 }, { "epoch": 1.4793525295154657, "grad_norm": 3.72834849357605, "learning_rate": 9.945788292252762e-07, "loss": 0.0334, "step": 138460 }, { "epoch": 1.4794593728297452, "grad_norm": 0.691070556640625, "learning_rate": 9.945763616138133e-07, "loss": 0.0446, "step": 138470 }, { "epoch": 1.4795662161440248, "grad_norm": 2.661740303039551, "learning_rate": 9.945738934439366e-07, "loss": 0.0413, "step": 138480 }, { "epoch": 1.4796730594583045, "grad_norm": 8.662642478942871, "learning_rate": 9.945714247156485e-07, "loss": 0.0369, "step": 138490 }, { "epoch": 1.479779902772584, "grad_norm": 1.9668312072753906, "learning_rate": 9.945689554289518e-07, "loss": 0.0562, "step": 138500 }, { "epoch": 1.4798867460868637, "grad_norm": 1.0633952617645264, "learning_rate": 9.945664855838495e-07, "loss": 0.0648, "step": 138510 }, { "epoch": 1.4799935894011433, "grad_norm": 5.274645805358887, "learning_rate": 9.945640151803445e-07, "loss": 0.0765, "step": 138520 }, { "epoch": 1.4801004327154228, "grad_norm": 0.3869853913784027, "learning_rate": 9.945615442184392e-07, "loss": 0.0256, "step": 138530 }, { "epoch": 1.4802072760297025, "grad_norm": 0.009059556759893894, "learning_rate": 9.945590726981368e-07, "loss": 0.0362, "step": 138540 }, { "epoch": 1.4803141193439822, "grad_norm": 0.6448732614517212, "learning_rate": 9.945566006194397e-07, "loss": 0.0294, "step": 138550 }, { "epoch": 1.4804209626582616, "grad_norm": 0.04415655508637428, "learning_rate": 9.945541279823508e-07, "loss": 0.0231, "step": 138560 }, { "epoch": 1.4805278059725413, "grad_norm": 0.043368782848119736, "learning_rate": 9.94551654786873e-07, "loss": 0.0126, "step": 138570 }, { "epoch": 1.480634649286821, "grad_norm": 0.6220861673355103, "learning_rate": 9.945491810330093e-07, "loss": 0.0204, "step": 138580 }, { "epoch": 1.4807414926011004, "grad_norm": 1.5280159711837769, "learning_rate": 9.94546706720762e-07, "loss": 0.0255, "step": 138590 }, { "epoch": 1.4808483359153801, "grad_norm": 0.3745007812976837, "learning_rate": 9.945442318501342e-07, "loss": 0.1365, "step": 138600 }, { "epoch": 1.4809551792296598, "grad_norm": 7.086736679077148, "learning_rate": 9.945417564211287e-07, "loss": 0.0332, "step": 138610 }, { "epoch": 1.4810620225439393, "grad_norm": 0.5985238552093506, "learning_rate": 9.945392804337481e-07, "loss": 0.0319, "step": 138620 }, { "epoch": 1.481168865858219, "grad_norm": 9.057071685791016, "learning_rate": 9.945368038879954e-07, "loss": 0.0171, "step": 138630 }, { "epoch": 1.4812757091724986, "grad_norm": 6.51466178894043, "learning_rate": 9.945343267838733e-07, "loss": 0.0316, "step": 138640 }, { "epoch": 1.481382552486778, "grad_norm": 6.9110026359558105, "learning_rate": 9.945318491213847e-07, "loss": 0.0187, "step": 138650 }, { "epoch": 1.4814893958010578, "grad_norm": 2.458739995956421, "learning_rate": 9.945293709005322e-07, "loss": 0.0365, "step": 138660 }, { "epoch": 1.4815962391153374, "grad_norm": 0.016815926879644394, "learning_rate": 9.94526892121319e-07, "loss": 0.0149, "step": 138670 }, { "epoch": 1.481703082429617, "grad_norm": 8.171539306640625, "learning_rate": 9.945244127837473e-07, "loss": 0.022, "step": 138680 }, { "epoch": 1.4818099257438966, "grad_norm": 3.451017379760742, "learning_rate": 9.945219328878202e-07, "loss": 0.0411, "step": 138690 }, { "epoch": 1.4819167690581763, "grad_norm": 0.024475829675793648, "learning_rate": 9.945194524335407e-07, "loss": 0.0778, "step": 138700 }, { "epoch": 1.4820236123724557, "grad_norm": 5.631635665893555, "learning_rate": 9.945169714209114e-07, "loss": 0.0079, "step": 138710 }, { "epoch": 1.4821304556867354, "grad_norm": 4.175346851348877, "learning_rate": 9.94514489849935e-07, "loss": 0.0181, "step": 138720 }, { "epoch": 1.482237299001015, "grad_norm": 0.9480147957801819, "learning_rate": 9.945120077206146e-07, "loss": 0.0871, "step": 138730 }, { "epoch": 1.4823441423152945, "grad_norm": 4.301607608795166, "learning_rate": 9.945095250329527e-07, "loss": 0.0095, "step": 138740 }, { "epoch": 1.4824509856295742, "grad_norm": 3.273019313812256, "learning_rate": 9.94507041786952e-07, "loss": 0.0051, "step": 138750 }, { "epoch": 1.482557828943854, "grad_norm": 0.09652189165353775, "learning_rate": 9.94504557982616e-07, "loss": 0.0343, "step": 138760 }, { "epoch": 1.4826646722581334, "grad_norm": 3.2381694316864014, "learning_rate": 9.945020736199468e-07, "loss": 0.0101, "step": 138770 }, { "epoch": 1.482771515572413, "grad_norm": 5.173646926879883, "learning_rate": 9.944995886989474e-07, "loss": 0.011, "step": 138780 }, { "epoch": 1.4828783588866927, "grad_norm": 3.4294815063476562, "learning_rate": 9.944971032196205e-07, "loss": 0.0191, "step": 138790 }, { "epoch": 1.4829852022009722, "grad_norm": 0.03178589418530464, "learning_rate": 9.944946171819692e-07, "loss": 0.0246, "step": 138800 }, { "epoch": 1.4830920455152519, "grad_norm": 4.542743682861328, "learning_rate": 9.944921305859962e-07, "loss": 0.0778, "step": 138810 }, { "epoch": 1.4831988888295315, "grad_norm": 6.804914474487305, "learning_rate": 9.944896434317043e-07, "loss": 0.0533, "step": 138820 }, { "epoch": 1.483305732143811, "grad_norm": 1.187436580657959, "learning_rate": 9.944871557190963e-07, "loss": 0.0269, "step": 138830 }, { "epoch": 1.4834125754580907, "grad_norm": 0.24031499028205872, "learning_rate": 9.944846674481748e-07, "loss": 0.0301, "step": 138840 }, { "epoch": 1.4835194187723704, "grad_norm": 0.0397118404507637, "learning_rate": 9.94482178618943e-07, "loss": 0.0482, "step": 138850 }, { "epoch": 1.4836262620866498, "grad_norm": 1.9761788845062256, "learning_rate": 9.944796892314033e-07, "loss": 0.0175, "step": 138860 }, { "epoch": 1.4837331054009295, "grad_norm": 12.809152603149414, "learning_rate": 9.944771992855587e-07, "loss": 0.0648, "step": 138870 }, { "epoch": 1.4838399487152092, "grad_norm": 0.06780163198709488, "learning_rate": 9.944747087814122e-07, "loss": 0.0313, "step": 138880 }, { "epoch": 1.4839467920294886, "grad_norm": 1.9344640970230103, "learning_rate": 9.944722177189663e-07, "loss": 0.0218, "step": 138890 }, { "epoch": 1.4840536353437683, "grad_norm": 1.937066912651062, "learning_rate": 9.94469726098224e-07, "loss": 0.0357, "step": 138900 }, { "epoch": 1.484160478658048, "grad_norm": 0.3554019331932068, "learning_rate": 9.94467233919188e-07, "loss": 0.0138, "step": 138910 }, { "epoch": 1.4842673219723275, "grad_norm": 4.863890647888184, "learning_rate": 9.944647411818612e-07, "loss": 0.0433, "step": 138920 }, { "epoch": 1.4843741652866072, "grad_norm": 0.07122683525085449, "learning_rate": 9.944622478862465e-07, "loss": 0.0435, "step": 138930 }, { "epoch": 1.4844810086008868, "grad_norm": 0.9254379868507385, "learning_rate": 9.944597540323466e-07, "loss": 0.044, "step": 138940 }, { "epoch": 1.4845878519151663, "grad_norm": 2.5472042560577393, "learning_rate": 9.94457259620164e-07, "loss": 0.0248, "step": 138950 }, { "epoch": 1.484694695229446, "grad_norm": 2.2821362018585205, "learning_rate": 9.944547646497022e-07, "loss": 0.0153, "step": 138960 }, { "epoch": 1.4848015385437257, "grad_norm": 8.631573677062988, "learning_rate": 9.944522691209636e-07, "loss": 0.0638, "step": 138970 }, { "epoch": 1.484908381858005, "grad_norm": 0.3330279588699341, "learning_rate": 9.94449773033951e-07, "loss": 0.0138, "step": 138980 }, { "epoch": 1.4850152251722848, "grad_norm": 0.7052226662635803, "learning_rate": 9.944472763886671e-07, "loss": 0.0208, "step": 138990 }, { "epoch": 1.4851220684865645, "grad_norm": 1.388472557067871, "learning_rate": 9.944447791851151e-07, "loss": 0.0259, "step": 139000 }, { "epoch": 1.485228911800844, "grad_norm": 0.5756428241729736, "learning_rate": 9.944422814232979e-07, "loss": 0.0361, "step": 139010 }, { "epoch": 1.4853357551151236, "grad_norm": 12.647587776184082, "learning_rate": 9.944397831032177e-07, "loss": 0.0312, "step": 139020 }, { "epoch": 1.4854425984294033, "grad_norm": 3.872774839401245, "learning_rate": 9.944372842248776e-07, "loss": 0.0596, "step": 139030 }, { "epoch": 1.4855494417436828, "grad_norm": 0.0924178957939148, "learning_rate": 9.944347847882806e-07, "loss": 0.0197, "step": 139040 }, { "epoch": 1.4856562850579624, "grad_norm": 0.0929039716720581, "learning_rate": 9.944322847934295e-07, "loss": 0.0362, "step": 139050 }, { "epoch": 1.4857631283722421, "grad_norm": 0.022458208724856377, "learning_rate": 9.94429784240327e-07, "loss": 0.0198, "step": 139060 }, { "epoch": 1.4858699716865218, "grad_norm": 11.434737205505371, "learning_rate": 9.94427283128976e-07, "loss": 0.037, "step": 139070 }, { "epoch": 1.4859768150008013, "grad_norm": 6.828878402709961, "learning_rate": 9.944247814593792e-07, "loss": 0.0827, "step": 139080 }, { "epoch": 1.486083658315081, "grad_norm": 0.863745391368866, "learning_rate": 9.944222792315394e-07, "loss": 0.0272, "step": 139090 }, { "epoch": 1.4861905016293606, "grad_norm": 10.09556770324707, "learning_rate": 9.944197764454596e-07, "loss": 0.0186, "step": 139100 }, { "epoch": 1.48629734494364, "grad_norm": 0.06323197484016418, "learning_rate": 9.944172731011426e-07, "loss": 0.024, "step": 139110 }, { "epoch": 1.4864041882579198, "grad_norm": 2.8567862510681152, "learning_rate": 9.944147691985912e-07, "loss": 0.0424, "step": 139120 }, { "epoch": 1.4865110315721994, "grad_norm": 2.2171244621276855, "learning_rate": 9.944122647378082e-07, "loss": 0.034, "step": 139130 }, { "epoch": 1.486617874886479, "grad_norm": 0.8525145649909973, "learning_rate": 9.944097597187963e-07, "loss": 0.0911, "step": 139140 }, { "epoch": 1.4867247182007586, "grad_norm": 14.66998291015625, "learning_rate": 9.944072541415587e-07, "loss": 0.0516, "step": 139150 }, { "epoch": 1.4868315615150383, "grad_norm": 3.9093799591064453, "learning_rate": 9.94404748006098e-07, "loss": 0.0274, "step": 139160 }, { "epoch": 1.486938404829318, "grad_norm": 0.07284197211265564, "learning_rate": 9.944022413124168e-07, "loss": 0.0125, "step": 139170 }, { "epoch": 1.4870452481435974, "grad_norm": 2.586287260055542, "learning_rate": 9.943997340605184e-07, "loss": 0.0161, "step": 139180 }, { "epoch": 1.487152091457877, "grad_norm": 2.8705990314483643, "learning_rate": 9.943972262504052e-07, "loss": 0.0408, "step": 139190 }, { "epoch": 1.4872589347721568, "grad_norm": 2.473754405975342, "learning_rate": 9.943947178820802e-07, "loss": 0.0144, "step": 139200 }, { "epoch": 1.4873657780864362, "grad_norm": 0.10212230682373047, "learning_rate": 9.943922089555464e-07, "loss": 0.0546, "step": 139210 }, { "epoch": 1.487472621400716, "grad_norm": 16.970748901367188, "learning_rate": 9.943896994708065e-07, "loss": 0.0302, "step": 139220 }, { "epoch": 1.4875794647149956, "grad_norm": 4.682674407958984, "learning_rate": 9.943871894278632e-07, "loss": 0.0355, "step": 139230 }, { "epoch": 1.487686308029275, "grad_norm": 4.188457012176514, "learning_rate": 9.943846788267195e-07, "loss": 0.0227, "step": 139240 }, { "epoch": 1.4877931513435547, "grad_norm": 3.973459482192993, "learning_rate": 9.94382167667378e-07, "loss": 0.0325, "step": 139250 }, { "epoch": 1.4878999946578344, "grad_norm": 1.8845112323760986, "learning_rate": 9.94379655949842e-07, "loss": 0.0121, "step": 139260 }, { "epoch": 1.4880068379721139, "grad_norm": 1.835252046585083, "learning_rate": 9.943771436741139e-07, "loss": 0.0799, "step": 139270 }, { "epoch": 1.4881136812863935, "grad_norm": 2.484105110168457, "learning_rate": 9.943746308401966e-07, "loss": 0.0739, "step": 139280 }, { "epoch": 1.4882205246006732, "grad_norm": 8.348122596740723, "learning_rate": 9.943721174480933e-07, "loss": 0.0548, "step": 139290 }, { "epoch": 1.4883273679149527, "grad_norm": 0.0528980977833271, "learning_rate": 9.943696034978065e-07, "loss": 0.0493, "step": 139300 }, { "epoch": 1.4884342112292324, "grad_norm": 4.525033950805664, "learning_rate": 9.94367088989339e-07, "loss": 0.0373, "step": 139310 }, { "epoch": 1.488541054543512, "grad_norm": 0.7297660708427429, "learning_rate": 9.943645739226936e-07, "loss": 0.0318, "step": 139320 }, { "epoch": 1.4886478978577915, "grad_norm": 0.41608086228370667, "learning_rate": 9.943620582978735e-07, "loss": 0.0264, "step": 139330 }, { "epoch": 1.4887547411720712, "grad_norm": 0.1334936022758484, "learning_rate": 9.943595421148813e-07, "loss": 0.0335, "step": 139340 }, { "epoch": 1.4888615844863509, "grad_norm": 0.7306945323944092, "learning_rate": 9.943570253737199e-07, "loss": 0.0141, "step": 139350 }, { "epoch": 1.4889684278006303, "grad_norm": 0.013819475658237934, "learning_rate": 9.94354508074392e-07, "loss": 0.0056, "step": 139360 }, { "epoch": 1.48907527111491, "grad_norm": 2.0233848094940186, "learning_rate": 9.943519902169007e-07, "loss": 0.0153, "step": 139370 }, { "epoch": 1.4891821144291897, "grad_norm": 16.338481903076172, "learning_rate": 9.943494718012486e-07, "loss": 0.0436, "step": 139380 }, { "epoch": 1.4892889577434691, "grad_norm": 0.016981026157736778, "learning_rate": 9.943469528274385e-07, "loss": 0.0258, "step": 139390 }, { "epoch": 1.4893958010577488, "grad_norm": 0.044157207012176514, "learning_rate": 9.943444332954736e-07, "loss": 0.0281, "step": 139400 }, { "epoch": 1.4895026443720285, "grad_norm": 6.133934497833252, "learning_rate": 9.943419132053566e-07, "loss": 0.048, "step": 139410 }, { "epoch": 1.489609487686308, "grad_norm": 6.855601787567139, "learning_rate": 9.9433939255709e-07, "loss": 0.0267, "step": 139420 }, { "epoch": 1.4897163310005876, "grad_norm": 0.07818112522363663, "learning_rate": 9.94336871350677e-07, "loss": 0.0073, "step": 139430 }, { "epoch": 1.4898231743148673, "grad_norm": 4.657107830047607, "learning_rate": 9.943343495861205e-07, "loss": 0.0301, "step": 139440 }, { "epoch": 1.4899300176291468, "grad_norm": 0.39570707082748413, "learning_rate": 9.943318272634231e-07, "loss": 0.0387, "step": 139450 }, { "epoch": 1.4900368609434265, "grad_norm": 0.3125537931919098, "learning_rate": 9.943293043825876e-07, "loss": 0.0554, "step": 139460 }, { "epoch": 1.4901437042577061, "grad_norm": 0.13057316839694977, "learning_rate": 9.943267809436173e-07, "loss": 0.008, "step": 139470 }, { "epoch": 1.4902505475719856, "grad_norm": 1.4752774238586426, "learning_rate": 9.943242569465145e-07, "loss": 0.0407, "step": 139480 }, { "epoch": 1.4903573908862653, "grad_norm": 0.1337740272283554, "learning_rate": 9.943217323912824e-07, "loss": 0.0361, "step": 139490 }, { "epoch": 1.490464234200545, "grad_norm": 25.04332733154297, "learning_rate": 9.943192072779239e-07, "loss": 0.0373, "step": 139500 }, { "epoch": 1.4905710775148244, "grad_norm": 0.40598776936531067, "learning_rate": 9.943166816064415e-07, "loss": 0.0366, "step": 139510 }, { "epoch": 1.490677920829104, "grad_norm": 0.056577637791633606, "learning_rate": 9.943141553768383e-07, "loss": 0.006, "step": 139520 }, { "epoch": 1.4907847641433838, "grad_norm": 0.10165490210056305, "learning_rate": 9.943116285891173e-07, "loss": 0.0452, "step": 139530 }, { "epoch": 1.4908916074576632, "grad_norm": 6.622706890106201, "learning_rate": 9.94309101243281e-07, "loss": 0.1417, "step": 139540 }, { "epoch": 1.490998450771943, "grad_norm": 10.427985191345215, "learning_rate": 9.943065733393324e-07, "loss": 0.0533, "step": 139550 }, { "epoch": 1.4911052940862226, "grad_norm": 6.486987113952637, "learning_rate": 9.943040448772746e-07, "loss": 0.0565, "step": 139560 }, { "epoch": 1.491212137400502, "grad_norm": 1.9894993305206299, "learning_rate": 9.9430151585711e-07, "loss": 0.0347, "step": 139570 }, { "epoch": 1.4913189807147818, "grad_norm": 5.359908580780029, "learning_rate": 9.942989862788416e-07, "loss": 0.0551, "step": 139580 }, { "epoch": 1.4914258240290614, "grad_norm": 0.011462317779660225, "learning_rate": 9.942964561424724e-07, "loss": 0.0237, "step": 139590 }, { "epoch": 1.491532667343341, "grad_norm": 0.4530123174190521, "learning_rate": 9.942939254480053e-07, "loss": 0.0108, "step": 139600 }, { "epoch": 1.4916395106576206, "grad_norm": 6.916354656219482, "learning_rate": 9.94291394195443e-07, "loss": 0.0541, "step": 139610 }, { "epoch": 1.4917463539719003, "grad_norm": 0.07701394706964493, "learning_rate": 9.942888623847884e-07, "loss": 0.0118, "step": 139620 }, { "epoch": 1.4918531972861797, "grad_norm": 0.0873800739645958, "learning_rate": 9.942863300160444e-07, "loss": 0.0722, "step": 139630 }, { "epoch": 1.4919600406004594, "grad_norm": 7.285334587097168, "learning_rate": 9.942837970892139e-07, "loss": 0.0445, "step": 139640 }, { "epoch": 1.492066883914739, "grad_norm": 0.7688800096511841, "learning_rate": 9.942812636042996e-07, "loss": 0.052, "step": 139650 }, { "epoch": 1.4921737272290185, "grad_norm": 0.22915665805339813, "learning_rate": 9.942787295613044e-07, "loss": 0.015, "step": 139660 }, { "epoch": 1.4922805705432982, "grad_norm": 0.44200608134269714, "learning_rate": 9.942761949602313e-07, "loss": 0.027, "step": 139670 }, { "epoch": 1.492387413857578, "grad_norm": 0.03821293264627457, "learning_rate": 9.942736598010831e-07, "loss": 0.0159, "step": 139680 }, { "epoch": 1.4924942571718574, "grad_norm": 4.822751045227051, "learning_rate": 9.942711240838624e-07, "loss": 0.0477, "step": 139690 }, { "epoch": 1.492601100486137, "grad_norm": 4.121529579162598, "learning_rate": 9.942685878085726e-07, "loss": 0.0645, "step": 139700 }, { "epoch": 1.4927079438004167, "grad_norm": 3.092686891555786, "learning_rate": 9.942660509752162e-07, "loss": 0.0162, "step": 139710 }, { "epoch": 1.4928147871146962, "grad_norm": 7.630918502807617, "learning_rate": 9.94263513583796e-07, "loss": 0.0514, "step": 139720 }, { "epoch": 1.4929216304289759, "grad_norm": 4.4354987144470215, "learning_rate": 9.94260975634315e-07, "loss": 0.0349, "step": 139730 }, { "epoch": 1.4930284737432555, "grad_norm": 4.411065578460693, "learning_rate": 9.942584371267761e-07, "loss": 0.0558, "step": 139740 }, { "epoch": 1.493135317057535, "grad_norm": 0.6320414543151855, "learning_rate": 9.94255898061182e-07, "loss": 0.0159, "step": 139750 }, { "epoch": 1.4932421603718147, "grad_norm": 0.25939545035362244, "learning_rate": 9.942533584375359e-07, "loss": 0.012, "step": 139760 }, { "epoch": 1.4933490036860944, "grad_norm": 8.247732162475586, "learning_rate": 9.942508182558404e-07, "loss": 0.0269, "step": 139770 }, { "epoch": 1.4934558470003738, "grad_norm": 8.201510429382324, "learning_rate": 9.942482775160983e-07, "loss": 0.0609, "step": 139780 }, { "epoch": 1.4935626903146535, "grad_norm": 2.7646596431732178, "learning_rate": 9.942457362183126e-07, "loss": 0.0092, "step": 139790 }, { "epoch": 1.4936695336289332, "grad_norm": 0.5858094692230225, "learning_rate": 9.942431943624862e-07, "loss": 0.0309, "step": 139800 }, { "epoch": 1.4937763769432129, "grad_norm": 2.394599437713623, "learning_rate": 9.94240651948622e-07, "loss": 0.0587, "step": 139810 }, { "epoch": 1.4938832202574923, "grad_norm": 2.247520923614502, "learning_rate": 9.94238108976723e-07, "loss": 0.0139, "step": 139820 }, { "epoch": 1.493990063571772, "grad_norm": 4.635739803314209, "learning_rate": 9.942355654467916e-07, "loss": 0.026, "step": 139830 }, { "epoch": 1.4940969068860517, "grad_norm": 6.4758124351501465, "learning_rate": 9.942330213588309e-07, "loss": 0.0382, "step": 139840 }, { "epoch": 1.4942037502003311, "grad_norm": 9.690898895263672, "learning_rate": 9.942304767128438e-07, "loss": 0.0173, "step": 139850 }, { "epoch": 1.4943105935146108, "grad_norm": 3.1238114833831787, "learning_rate": 9.942279315088334e-07, "loss": 0.0419, "step": 139860 }, { "epoch": 1.4944174368288905, "grad_norm": 3.3130531311035156, "learning_rate": 9.94225385746802e-07, "loss": 0.0199, "step": 139870 }, { "epoch": 1.49452428014317, "grad_norm": 3.788731575012207, "learning_rate": 9.942228394267533e-07, "loss": 0.0295, "step": 139880 }, { "epoch": 1.4946311234574496, "grad_norm": 0.026600226759910583, "learning_rate": 9.942202925486896e-07, "loss": 0.0079, "step": 139890 }, { "epoch": 1.4947379667717293, "grad_norm": 1.7536054849624634, "learning_rate": 9.942177451126137e-07, "loss": 0.0154, "step": 139900 }, { "epoch": 1.494844810086009, "grad_norm": 0.36154705286026, "learning_rate": 9.942151971185288e-07, "loss": 0.0204, "step": 139910 }, { "epoch": 1.4949516534002885, "grad_norm": 1.4462181329727173, "learning_rate": 9.942126485664375e-07, "loss": 0.0821, "step": 139920 }, { "epoch": 1.4950584967145681, "grad_norm": 1.3968772888183594, "learning_rate": 9.942100994563429e-07, "loss": 0.0178, "step": 139930 }, { "epoch": 1.4951653400288478, "grad_norm": 1.9083555936813354, "learning_rate": 9.94207549788248e-07, "loss": 0.0183, "step": 139940 }, { "epoch": 1.4952721833431273, "grad_norm": 0.9580259919166565, "learning_rate": 9.942049995621552e-07, "loss": 0.009, "step": 139950 }, { "epoch": 1.495379026657407, "grad_norm": 6.983981132507324, "learning_rate": 9.942024487780679e-07, "loss": 0.0155, "step": 139960 }, { "epoch": 1.4954858699716866, "grad_norm": 2.8426971435546875, "learning_rate": 9.941998974359885e-07, "loss": 0.0277, "step": 139970 }, { "epoch": 1.495592713285966, "grad_norm": 2.057927370071411, "learning_rate": 9.941973455359202e-07, "loss": 0.0368, "step": 139980 }, { "epoch": 1.4956995566002458, "grad_norm": 4.315698146820068, "learning_rate": 9.941947930778658e-07, "loss": 0.0138, "step": 139990 }, { "epoch": 1.4958063999145255, "grad_norm": 0.21494126319885254, "learning_rate": 9.94192240061828e-07, "loss": 0.0353, "step": 140000 }, { "epoch": 1.495913243228805, "grad_norm": 2.009899139404297, "learning_rate": 9.941896864878103e-07, "loss": 0.0297, "step": 140010 }, { "epoch": 1.4960200865430846, "grad_norm": 2.63122820854187, "learning_rate": 9.941871323558149e-07, "loss": 0.0447, "step": 140020 }, { "epoch": 1.4961269298573643, "grad_norm": 0.759393572807312, "learning_rate": 9.941845776658448e-07, "loss": 0.0863, "step": 140030 }, { "epoch": 1.4962337731716437, "grad_norm": 0.06937772780656815, "learning_rate": 9.941820224179031e-07, "loss": 0.008, "step": 140040 }, { "epoch": 1.4963406164859234, "grad_norm": 0.6848102807998657, "learning_rate": 9.941794666119927e-07, "loss": 0.0343, "step": 140050 }, { "epoch": 1.496447459800203, "grad_norm": 4.028797149658203, "learning_rate": 9.941769102481163e-07, "loss": 0.0442, "step": 140060 }, { "epoch": 1.4965543031144826, "grad_norm": 6.4718852043151855, "learning_rate": 9.94174353326277e-07, "loss": 0.0258, "step": 140070 }, { "epoch": 1.4966611464287622, "grad_norm": 0.8234487175941467, "learning_rate": 9.941717958464773e-07, "loss": 0.0793, "step": 140080 }, { "epoch": 1.496767989743042, "grad_norm": 0.0432174876332283, "learning_rate": 9.941692378087204e-07, "loss": 0.0069, "step": 140090 }, { "epoch": 1.4968748330573214, "grad_norm": 0.011728825978934765, "learning_rate": 9.941666792130094e-07, "loss": 0.0376, "step": 140100 }, { "epoch": 1.496981676371601, "grad_norm": 5.662307262420654, "learning_rate": 9.941641200593467e-07, "loss": 0.0087, "step": 140110 }, { "epoch": 1.4970885196858807, "grad_norm": 2.9253532886505127, "learning_rate": 9.941615603477357e-07, "loss": 0.0057, "step": 140120 }, { "epoch": 1.4971953630001602, "grad_norm": 2.0940892696380615, "learning_rate": 9.941590000781785e-07, "loss": 0.0288, "step": 140130 }, { "epoch": 1.4973022063144399, "grad_norm": 4.410494804382324, "learning_rate": 9.94156439250679e-07, "loss": 0.0503, "step": 140140 }, { "epoch": 1.4974090496287196, "grad_norm": 0.696499228477478, "learning_rate": 9.94153877865239e-07, "loss": 0.0408, "step": 140150 }, { "epoch": 1.497515892942999, "grad_norm": 0.009882007725536823, "learning_rate": 9.941513159218624e-07, "loss": 0.0138, "step": 140160 }, { "epoch": 1.4976227362572787, "grad_norm": 9.368550300598145, "learning_rate": 9.941487534205517e-07, "loss": 0.061, "step": 140170 }, { "epoch": 1.4977295795715584, "grad_norm": 1.890213131904602, "learning_rate": 9.941461903613097e-07, "loss": 0.0446, "step": 140180 }, { "epoch": 1.4978364228858378, "grad_norm": 7.1569414138793945, "learning_rate": 9.941436267441392e-07, "loss": 0.0235, "step": 140190 }, { "epoch": 1.4979432662001175, "grad_norm": 1.1620737314224243, "learning_rate": 9.941410625690433e-07, "loss": 0.0507, "step": 140200 }, { "epoch": 1.4980501095143972, "grad_norm": 0.06944654881954193, "learning_rate": 9.94138497836025e-07, "loss": 0.0149, "step": 140210 }, { "epoch": 1.4981569528286767, "grad_norm": 2.2789816856384277, "learning_rate": 9.94135932545087e-07, "loss": 0.0469, "step": 140220 }, { "epoch": 1.4982637961429564, "grad_norm": 0.9612645506858826, "learning_rate": 9.941333666962321e-07, "loss": 0.0017, "step": 140230 }, { "epoch": 1.498370639457236, "grad_norm": 7.411708831787109, "learning_rate": 9.941308002894634e-07, "loss": 0.0184, "step": 140240 }, { "epoch": 1.4984774827715155, "grad_norm": 1.3268159627914429, "learning_rate": 9.941282333247837e-07, "loss": 0.1325, "step": 140250 }, { "epoch": 1.4985843260857952, "grad_norm": 0.06144813448190689, "learning_rate": 9.94125665802196e-07, "loss": 0.0322, "step": 140260 }, { "epoch": 1.4986911694000749, "grad_norm": 3.8444130420684814, "learning_rate": 9.941230977217032e-07, "loss": 0.0227, "step": 140270 }, { "epoch": 1.4987980127143543, "grad_norm": 8.301779747009277, "learning_rate": 9.941205290833082e-07, "loss": 0.0594, "step": 140280 }, { "epoch": 1.498904856028634, "grad_norm": 0.16733840107917786, "learning_rate": 9.941179598870135e-07, "loss": 0.0216, "step": 140290 }, { "epoch": 1.4990116993429137, "grad_norm": 0.1888866126537323, "learning_rate": 9.941153901328225e-07, "loss": 0.0316, "step": 140300 }, { "epoch": 1.4991185426571931, "grad_norm": 0.009444285184144974, "learning_rate": 9.94112819820738e-07, "loss": 0.0304, "step": 140310 }, { "epoch": 1.4992253859714728, "grad_norm": 4.287966728210449, "learning_rate": 9.941102489507628e-07, "loss": 0.0326, "step": 140320 }, { "epoch": 1.4993322292857525, "grad_norm": 0.0029074321500957012, "learning_rate": 9.941076775228998e-07, "loss": 0.0414, "step": 140330 }, { "epoch": 1.499439072600032, "grad_norm": 0.13760925829410553, "learning_rate": 9.941051055371518e-07, "loss": 0.032, "step": 140340 }, { "epoch": 1.4995459159143116, "grad_norm": 0.06761988997459412, "learning_rate": 9.941025329935222e-07, "loss": 0.0778, "step": 140350 }, { "epoch": 1.4996527592285913, "grad_norm": 0.016941262409090996, "learning_rate": 9.940999598920133e-07, "loss": 0.0321, "step": 140360 }, { "epoch": 1.4997596025428708, "grad_norm": 2.502049207687378, "learning_rate": 9.940973862326283e-07, "loss": 0.0421, "step": 140370 }, { "epoch": 1.4998664458571505, "grad_norm": 0.9080498814582825, "learning_rate": 9.9409481201537e-07, "loss": 0.0086, "step": 140380 }, { "epoch": 1.4999732891714301, "grad_norm": 4.232608795166016, "learning_rate": 9.940922372402415e-07, "loss": 0.0371, "step": 140390 }, { "epoch": 1.5000801324857096, "grad_norm": 0.09106294810771942, "learning_rate": 9.940896619072454e-07, "loss": 0.0151, "step": 140400 }, { "epoch": 1.5001869757999893, "grad_norm": 0.6561139225959778, "learning_rate": 9.940870860163849e-07, "loss": 0.0723, "step": 140410 }, { "epoch": 1.500293819114269, "grad_norm": 0.010436528362333775, "learning_rate": 9.940845095676628e-07, "loss": 0.0261, "step": 140420 }, { "epoch": 1.5004006624285484, "grad_norm": 0.0700347051024437, "learning_rate": 9.940819325610819e-07, "loss": 0.0166, "step": 140430 }, { "epoch": 1.500507505742828, "grad_norm": 1.163922905921936, "learning_rate": 9.940793549966452e-07, "loss": 0.0058, "step": 140440 }, { "epoch": 1.5006143490571078, "grad_norm": 0.054193321615457535, "learning_rate": 9.940767768743559e-07, "loss": 0.0315, "step": 140450 }, { "epoch": 1.5007211923713872, "grad_norm": 2.369256019592285, "learning_rate": 9.940741981942163e-07, "loss": 0.0125, "step": 140460 }, { "epoch": 1.500828035685667, "grad_norm": 0.355524480342865, "learning_rate": 9.940716189562298e-07, "loss": 0.0226, "step": 140470 }, { "epoch": 1.5009348789999466, "grad_norm": 4.744070053100586, "learning_rate": 9.94069039160399e-07, "loss": 0.0319, "step": 140480 }, { "epoch": 1.501041722314226, "grad_norm": 1.7441080808639526, "learning_rate": 9.940664588067271e-07, "loss": 0.0286, "step": 140490 }, { "epoch": 1.501148565628506, "grad_norm": 8.741268157958984, "learning_rate": 9.94063877895217e-07, "loss": 0.0311, "step": 140500 }, { "epoch": 1.5012554089427854, "grad_norm": 20.02279281616211, "learning_rate": 9.940612964258712e-07, "loss": 0.0499, "step": 140510 }, { "epoch": 1.5013622522570649, "grad_norm": 6.453301429748535, "learning_rate": 9.940587143986932e-07, "loss": 0.0424, "step": 140520 }, { "epoch": 1.5014690955713448, "grad_norm": 3.3620927333831787, "learning_rate": 9.940561318136854e-07, "loss": 0.0673, "step": 140530 }, { "epoch": 1.5015759388856242, "grad_norm": 0.06495986878871918, "learning_rate": 9.94053548670851e-07, "loss": 0.014, "step": 140540 }, { "epoch": 1.5016827821999037, "grad_norm": 5.441705703735352, "learning_rate": 9.94050964970193e-07, "loss": 0.1092, "step": 140550 }, { "epoch": 1.5017896255141836, "grad_norm": 0.08467055857181549, "learning_rate": 9.94048380711714e-07, "loss": 0.0333, "step": 140560 }, { "epoch": 1.501896468828463, "grad_norm": 3.5060601234436035, "learning_rate": 9.94045795895417e-07, "loss": 0.011, "step": 140570 }, { "epoch": 1.5020033121427425, "grad_norm": 2.5235564708709717, "learning_rate": 9.940432105213053e-07, "loss": 0.0388, "step": 140580 }, { "epoch": 1.5021101554570224, "grad_norm": 4.375423908233643, "learning_rate": 9.940406245893813e-07, "loss": 0.0882, "step": 140590 }, { "epoch": 1.5022169987713019, "grad_norm": 0.019801482558250427, "learning_rate": 9.940380380996482e-07, "loss": 0.0139, "step": 140600 }, { "epoch": 1.5023238420855813, "grad_norm": 0.6809931397438049, "learning_rate": 9.94035451052109e-07, "loss": 0.0668, "step": 140610 }, { "epoch": 1.5024306853998612, "grad_norm": 2.051154851913452, "learning_rate": 9.940328634467663e-07, "loss": 0.0304, "step": 140620 }, { "epoch": 1.5025375287141407, "grad_norm": 2.9426259994506836, "learning_rate": 9.940302752836234e-07, "loss": 0.0181, "step": 140630 }, { "epoch": 1.5026443720284202, "grad_norm": 0.08351010829210281, "learning_rate": 9.94027686562683e-07, "loss": 0.038, "step": 140640 }, { "epoch": 1.5027512153427, "grad_norm": 1.0698091983795166, "learning_rate": 9.940250972839477e-07, "loss": 0.0259, "step": 140650 }, { "epoch": 1.5028580586569795, "grad_norm": 7.338504791259766, "learning_rate": 9.940225074474212e-07, "loss": 0.0304, "step": 140660 }, { "epoch": 1.5029649019712592, "grad_norm": 9.67103099822998, "learning_rate": 9.940199170531059e-07, "loss": 0.045, "step": 140670 }, { "epoch": 1.5030717452855389, "grad_norm": 6.679843425750732, "learning_rate": 9.940173261010048e-07, "loss": 0.0504, "step": 140680 }, { "epoch": 1.5031785885998183, "grad_norm": 0.11720513552427292, "learning_rate": 9.940147345911208e-07, "loss": 0.0124, "step": 140690 }, { "epoch": 1.503285431914098, "grad_norm": 1.8447284698486328, "learning_rate": 9.94012142523457e-07, "loss": 0.0394, "step": 140700 }, { "epoch": 1.5033922752283777, "grad_norm": 0.03635033220052719, "learning_rate": 9.94009549898016e-07, "loss": 0.0123, "step": 140710 }, { "epoch": 1.5034991185426572, "grad_norm": 3.5928473472595215, "learning_rate": 9.940069567148011e-07, "loss": 0.0598, "step": 140720 }, { "epoch": 1.5036059618569368, "grad_norm": 19.303613662719727, "learning_rate": 9.94004362973815e-07, "loss": 0.0914, "step": 140730 }, { "epoch": 1.5037128051712165, "grad_norm": 9.271088600158691, "learning_rate": 9.94001768675061e-07, "loss": 0.0126, "step": 140740 }, { "epoch": 1.503819648485496, "grad_norm": 4.269765853881836, "learning_rate": 9.93999173818541e-07, "loss": 0.0474, "step": 140750 }, { "epoch": 1.5039264917997757, "grad_norm": 1.4320374727249146, "learning_rate": 9.939965784042592e-07, "loss": 0.0163, "step": 140760 }, { "epoch": 1.5040333351140553, "grad_norm": 8.182416915893555, "learning_rate": 9.93993982432218e-07, "loss": 0.0411, "step": 140770 }, { "epoch": 1.5041401784283348, "grad_norm": 8.315414428710938, "learning_rate": 9.939913859024201e-07, "loss": 0.0393, "step": 140780 }, { "epoch": 1.5042470217426145, "grad_norm": 1.4242221117019653, "learning_rate": 9.93988788814869e-07, "loss": 0.0224, "step": 140790 }, { "epoch": 1.5043538650568942, "grad_norm": 3.1822354793548584, "learning_rate": 9.939861911695668e-07, "loss": 0.0968, "step": 140800 }, { "epoch": 1.5044607083711736, "grad_norm": 2.8494350910186768, "learning_rate": 9.93983592966517e-07, "loss": 0.0774, "step": 140810 }, { "epoch": 1.5045675516854533, "grad_norm": 3.516359329223633, "learning_rate": 9.939809942057225e-07, "loss": 0.049, "step": 140820 }, { "epoch": 1.504674394999733, "grad_norm": 3.1579813957214355, "learning_rate": 9.939783948871862e-07, "loss": 0.0336, "step": 140830 }, { "epoch": 1.5047812383140124, "grad_norm": 1.8975492715835571, "learning_rate": 9.939757950109112e-07, "loss": 0.0174, "step": 140840 }, { "epoch": 1.5048880816282921, "grad_norm": 4.274230003356934, "learning_rate": 9.939731945769e-07, "loss": 0.029, "step": 140850 }, { "epoch": 1.5049949249425718, "grad_norm": 8.687980651855469, "learning_rate": 9.939705935851559e-07, "loss": 0.0343, "step": 140860 }, { "epoch": 1.5051017682568513, "grad_norm": 0.6503902673721313, "learning_rate": 9.939679920356815e-07, "loss": 0.037, "step": 140870 }, { "epoch": 1.505208611571131, "grad_norm": 0.020835917443037033, "learning_rate": 9.939653899284803e-07, "loss": 0.0185, "step": 140880 }, { "epoch": 1.5053154548854106, "grad_norm": 0.5204303860664368, "learning_rate": 9.939627872635547e-07, "loss": 0.0251, "step": 140890 }, { "epoch": 1.50542229819969, "grad_norm": 6.852756023406982, "learning_rate": 9.939601840409078e-07, "loss": 0.0261, "step": 140900 }, { "epoch": 1.5055291415139698, "grad_norm": 0.15152373909950256, "learning_rate": 9.939575802605427e-07, "loss": 0.0213, "step": 140910 }, { "epoch": 1.5056359848282495, "grad_norm": 0.15716663002967834, "learning_rate": 9.93954975922462e-07, "loss": 0.0677, "step": 140920 }, { "epoch": 1.505742828142529, "grad_norm": 0.05476533621549606, "learning_rate": 9.93952371026669e-07, "loss": 0.0291, "step": 140930 }, { "epoch": 1.5058496714568086, "grad_norm": 11.35051441192627, "learning_rate": 9.939497655731666e-07, "loss": 0.0409, "step": 140940 }, { "epoch": 1.5059565147710883, "grad_norm": 11.661066055297852, "learning_rate": 9.939471595619574e-07, "loss": 0.0189, "step": 140950 }, { "epoch": 1.5060633580853677, "grad_norm": 0.1756431758403778, "learning_rate": 9.939445529930447e-07, "loss": 0.0377, "step": 140960 }, { "epoch": 1.5061702013996474, "grad_norm": 0.013097370974719524, "learning_rate": 9.939419458664314e-07, "loss": 0.0085, "step": 140970 }, { "epoch": 1.506277044713927, "grad_norm": 0.202610045671463, "learning_rate": 9.939393381821202e-07, "loss": 0.0411, "step": 140980 }, { "epoch": 1.5063838880282066, "grad_norm": 1.1594122648239136, "learning_rate": 9.939367299401143e-07, "loss": 0.0402, "step": 140990 }, { "epoch": 1.5064907313424862, "grad_norm": 5.920563697814941, "learning_rate": 9.939341211404166e-07, "loss": 0.0562, "step": 141000 }, { "epoch": 1.506597574656766, "grad_norm": 1.3997923135757446, "learning_rate": 9.939315117830298e-07, "loss": 0.0201, "step": 141010 }, { "epoch": 1.5067044179710454, "grad_norm": 2.5811753273010254, "learning_rate": 9.939289018679572e-07, "loss": 0.0339, "step": 141020 }, { "epoch": 1.506811261285325, "grad_norm": 0.2315620481967926, "learning_rate": 9.939262913952015e-07, "loss": 0.0247, "step": 141030 }, { "epoch": 1.5069181045996047, "grad_norm": 9.075881958007812, "learning_rate": 9.93923680364766e-07, "loss": 0.0355, "step": 141040 }, { "epoch": 1.5070249479138842, "grad_norm": 0.11317652463912964, "learning_rate": 9.93921068776653e-07, "loss": 0.0143, "step": 141050 }, { "epoch": 1.5071317912281639, "grad_norm": 0.4279278814792633, "learning_rate": 9.93918456630866e-07, "loss": 0.0314, "step": 141060 }, { "epoch": 1.5072386345424436, "grad_norm": 6.086715221405029, "learning_rate": 9.939158439274078e-07, "loss": 0.0366, "step": 141070 }, { "epoch": 1.507345477856723, "grad_norm": 2.809061050415039, "learning_rate": 9.939132306662815e-07, "loss": 0.0339, "step": 141080 }, { "epoch": 1.5074523211710027, "grad_norm": 0.3048001825809479, "learning_rate": 9.939106168474898e-07, "loss": 0.0247, "step": 141090 }, { "epoch": 1.5075591644852824, "grad_norm": 3.652508020401001, "learning_rate": 9.939080024710356e-07, "loss": 0.0109, "step": 141100 }, { "epoch": 1.5076660077995618, "grad_norm": 6.814632892608643, "learning_rate": 9.939053875369222e-07, "loss": 0.0282, "step": 141110 }, { "epoch": 1.5077728511138415, "grad_norm": 2.398388624191284, "learning_rate": 9.93902772045152e-07, "loss": 0.0198, "step": 141120 }, { "epoch": 1.5078796944281212, "grad_norm": 7.328733921051025, "learning_rate": 9.939001559957285e-07, "loss": 0.1061, "step": 141130 }, { "epoch": 1.5079865377424007, "grad_norm": 5.6341962814331055, "learning_rate": 9.938975393886545e-07, "loss": 0.0317, "step": 141140 }, { "epoch": 1.5080933810566803, "grad_norm": 0.5532051920890808, "learning_rate": 9.938949222239328e-07, "loss": 0.0263, "step": 141150 }, { "epoch": 1.50820022437096, "grad_norm": 2.483811378479004, "learning_rate": 9.938923045015665e-07, "loss": 0.0161, "step": 141160 }, { "epoch": 1.5083070676852395, "grad_norm": 4.064929008483887, "learning_rate": 9.938896862215586e-07, "loss": 0.0791, "step": 141170 }, { "epoch": 1.5084139109995192, "grad_norm": 1.5046215057373047, "learning_rate": 9.938870673839119e-07, "loss": 0.0223, "step": 141180 }, { "epoch": 1.5085207543137988, "grad_norm": 0.15818744897842407, "learning_rate": 9.938844479886293e-07, "loss": 0.0415, "step": 141190 }, { "epoch": 1.5086275976280783, "grad_norm": 2.1593432426452637, "learning_rate": 9.93881828035714e-07, "loss": 0.0193, "step": 141200 }, { "epoch": 1.508734440942358, "grad_norm": 1.9412728548049927, "learning_rate": 9.938792075251689e-07, "loss": 0.0272, "step": 141210 }, { "epoch": 1.5088412842566377, "grad_norm": 0.10818260908126831, "learning_rate": 9.938765864569968e-07, "loss": 0.0166, "step": 141220 }, { "epoch": 1.5089481275709171, "grad_norm": 6.956778049468994, "learning_rate": 9.938739648312008e-07, "loss": 0.0361, "step": 141230 }, { "epoch": 1.509054970885197, "grad_norm": 2.7377729415893555, "learning_rate": 9.938713426477838e-07, "loss": 0.0413, "step": 141240 }, { "epoch": 1.5091618141994765, "grad_norm": 1.1692264080047607, "learning_rate": 9.938687199067488e-07, "loss": 0.101, "step": 141250 }, { "epoch": 1.509268657513756, "grad_norm": 0.03836345300078392, "learning_rate": 9.938660966080987e-07, "loss": 0.0428, "step": 141260 }, { "epoch": 1.5093755008280358, "grad_norm": 3.751539945602417, "learning_rate": 9.938634727518364e-07, "loss": 0.0203, "step": 141270 }, { "epoch": 1.5094823441423153, "grad_norm": 0.31577616930007935, "learning_rate": 9.938608483379653e-07, "loss": 0.0218, "step": 141280 }, { "epoch": 1.5095891874565948, "grad_norm": 0.1111733540892601, "learning_rate": 9.938582233664878e-07, "loss": 0.0074, "step": 141290 }, { "epoch": 1.5096960307708747, "grad_norm": 0.3812190294265747, "learning_rate": 9.93855597837407e-07, "loss": 0.046, "step": 141300 }, { "epoch": 1.5098028740851541, "grad_norm": 5.477785587310791, "learning_rate": 9.93852971750726e-07, "loss": 0.0183, "step": 141310 }, { "epoch": 1.5099097173994336, "grad_norm": 5.1937103271484375, "learning_rate": 9.93850345106448e-07, "loss": 0.0333, "step": 141320 }, { "epoch": 1.5100165607137135, "grad_norm": 0.4932747781276703, "learning_rate": 9.938477179045754e-07, "loss": 0.0102, "step": 141330 }, { "epoch": 1.510123404027993, "grad_norm": 0.42799049615859985, "learning_rate": 9.938450901451116e-07, "loss": 0.0082, "step": 141340 }, { "epoch": 1.5102302473422724, "grad_norm": 1.9359421730041504, "learning_rate": 9.938424618280595e-07, "loss": 0.037, "step": 141350 }, { "epoch": 1.5103370906565523, "grad_norm": 6.922288417816162, "learning_rate": 9.938398329534218e-07, "loss": 0.0653, "step": 141360 }, { "epoch": 1.5104439339708318, "grad_norm": 3.6621930599212646, "learning_rate": 9.938372035212017e-07, "loss": 0.0348, "step": 141370 }, { "epoch": 1.5105507772851112, "grad_norm": 6.157815456390381, "learning_rate": 9.93834573531402e-07, "loss": 0.0616, "step": 141380 }, { "epoch": 1.5106576205993911, "grad_norm": 0.21165651082992554, "learning_rate": 9.938319429840259e-07, "loss": 0.0218, "step": 141390 }, { "epoch": 1.5107644639136706, "grad_norm": 4.523149013519287, "learning_rate": 9.938293118790765e-07, "loss": 0.0162, "step": 141400 }, { "epoch": 1.5108713072279503, "grad_norm": 0.015571502037346363, "learning_rate": 9.938266802165562e-07, "loss": 0.0241, "step": 141410 }, { "epoch": 1.51097815054223, "grad_norm": 1.9159611463546753, "learning_rate": 9.938240479964684e-07, "loss": 0.0194, "step": 141420 }, { "epoch": 1.5110849938565094, "grad_norm": 2.063840389251709, "learning_rate": 9.93821415218816e-07, "loss": 0.0091, "step": 141430 }, { "epoch": 1.511191837170789, "grad_norm": 3.1553688049316406, "learning_rate": 9.93818781883602e-07, "loss": 0.0272, "step": 141440 }, { "epoch": 1.5112986804850688, "grad_norm": 8.622901916503906, "learning_rate": 9.938161479908294e-07, "loss": 0.0708, "step": 141450 }, { "epoch": 1.5114055237993482, "grad_norm": 3.6854398250579834, "learning_rate": 9.93813513540501e-07, "loss": 0.0554, "step": 141460 }, { "epoch": 1.511512367113628, "grad_norm": 10.034096717834473, "learning_rate": 9.938108785326198e-07, "loss": 0.0271, "step": 141470 }, { "epoch": 1.5116192104279076, "grad_norm": 0.18146200478076935, "learning_rate": 9.93808242967189e-07, "loss": 0.0126, "step": 141480 }, { "epoch": 1.511726053742187, "grad_norm": 0.16153550148010254, "learning_rate": 9.938056068442112e-07, "loss": 0.0207, "step": 141490 }, { "epoch": 1.5118328970564667, "grad_norm": 1.1160444021224976, "learning_rate": 9.938029701636899e-07, "loss": 0.0403, "step": 141500 }, { "epoch": 1.5119397403707464, "grad_norm": 0.18426761031150818, "learning_rate": 9.938003329256275e-07, "loss": 0.0814, "step": 141510 }, { "epoch": 1.5120465836850259, "grad_norm": 0.2584278881549835, "learning_rate": 9.937976951300273e-07, "loss": 0.0587, "step": 141520 }, { "epoch": 1.5121534269993055, "grad_norm": 4.167240619659424, "learning_rate": 9.937950567768922e-07, "loss": 0.0205, "step": 141530 }, { "epoch": 1.5122602703135852, "grad_norm": 2.8666303157806396, "learning_rate": 9.937924178662255e-07, "loss": 0.037, "step": 141540 }, { "epoch": 1.5123671136278647, "grad_norm": 2.23557448387146, "learning_rate": 9.937897783980298e-07, "loss": 0.0188, "step": 141550 }, { "epoch": 1.5124739569421444, "grad_norm": 3.3075754642486572, "learning_rate": 9.93787138372308e-07, "loss": 0.0203, "step": 141560 }, { "epoch": 1.512580800256424, "grad_norm": 7.8546319007873535, "learning_rate": 9.937844977890632e-07, "loss": 0.0215, "step": 141570 }, { "epoch": 1.5126876435707035, "grad_norm": 0.3937256634235382, "learning_rate": 9.937818566482986e-07, "loss": 0.0183, "step": 141580 }, { "epoch": 1.5127944868849832, "grad_norm": 0.053063273429870605, "learning_rate": 9.93779214950017e-07, "loss": 0.0113, "step": 141590 }, { "epoch": 1.5129013301992629, "grad_norm": 8.663597106933594, "learning_rate": 9.937765726942215e-07, "loss": 0.0447, "step": 141600 }, { "epoch": 1.5130081735135423, "grad_norm": 9.364944458007812, "learning_rate": 9.937739298809148e-07, "loss": 0.0504, "step": 141610 }, { "epoch": 1.513115016827822, "grad_norm": 13.49880599975586, "learning_rate": 9.937712865101003e-07, "loss": 0.0521, "step": 141620 }, { "epoch": 1.5132218601421017, "grad_norm": 1.1093844175338745, "learning_rate": 9.937686425817804e-07, "loss": 0.0083, "step": 141630 }, { "epoch": 1.5133287034563812, "grad_norm": 6.872551918029785, "learning_rate": 9.937659980959587e-07, "loss": 0.0343, "step": 141640 }, { "epoch": 1.5134355467706608, "grad_norm": 0.3107832968235016, "learning_rate": 9.937633530526379e-07, "loss": 0.0942, "step": 141650 }, { "epoch": 1.5135423900849405, "grad_norm": 0.00488830404356122, "learning_rate": 9.93760707451821e-07, "loss": 0.0185, "step": 141660 }, { "epoch": 1.51364923339922, "grad_norm": 0.46083301305770874, "learning_rate": 9.937580612935108e-07, "loss": 0.0501, "step": 141670 }, { "epoch": 1.5137560767134997, "grad_norm": 0.07336258143186569, "learning_rate": 9.937554145777105e-07, "loss": 0.048, "step": 141680 }, { "epoch": 1.5138629200277793, "grad_norm": 0.41198426485061646, "learning_rate": 9.937527673044232e-07, "loss": 0.0128, "step": 141690 }, { "epoch": 1.5139697633420588, "grad_norm": 0.07970976829528809, "learning_rate": 9.937501194736518e-07, "loss": 0.0205, "step": 141700 }, { "epoch": 1.5140766066563385, "grad_norm": 5.9127349853515625, "learning_rate": 9.937474710853992e-07, "loss": 0.0452, "step": 141710 }, { "epoch": 1.5141834499706182, "grad_norm": 7.448973655700684, "learning_rate": 9.937448221396684e-07, "loss": 0.0322, "step": 141720 }, { "epoch": 1.5142902932848976, "grad_norm": 1.322830319404602, "learning_rate": 9.937421726364627e-07, "loss": 0.0167, "step": 141730 }, { "epoch": 1.5143971365991773, "grad_norm": 0.08976542204618454, "learning_rate": 9.937395225757845e-07, "loss": 0.0184, "step": 141740 }, { "epoch": 1.514503979913457, "grad_norm": 1.111694574356079, "learning_rate": 9.937368719576374e-07, "loss": 0.0217, "step": 141750 }, { "epoch": 1.5146108232277364, "grad_norm": 0.058217357844114304, "learning_rate": 9.937342207820238e-07, "loss": 0.0316, "step": 141760 }, { "epoch": 1.5147176665420161, "grad_norm": 4.126279354095459, "learning_rate": 9.937315690489472e-07, "loss": 0.0129, "step": 141770 }, { "epoch": 1.5148245098562958, "grad_norm": 6.06545877456665, "learning_rate": 9.937289167584102e-07, "loss": 0.0304, "step": 141780 }, { "epoch": 1.5149313531705753, "grad_norm": 0.025717761367559433, "learning_rate": 9.937262639104162e-07, "loss": 0.0135, "step": 141790 }, { "epoch": 1.515038196484855, "grad_norm": 2.5543155670166016, "learning_rate": 9.937236105049678e-07, "loss": 0.0201, "step": 141800 }, { "epoch": 1.5151450397991346, "grad_norm": 0.7280938029289246, "learning_rate": 9.937209565420684e-07, "loss": 0.0496, "step": 141810 }, { "epoch": 1.515251883113414, "grad_norm": 3.4319329261779785, "learning_rate": 9.937183020217205e-07, "loss": 0.0691, "step": 141820 }, { "epoch": 1.5153587264276938, "grad_norm": 2.7876644134521484, "learning_rate": 9.937156469439277e-07, "loss": 0.0398, "step": 141830 }, { "epoch": 1.5154655697419734, "grad_norm": 2.9679653644561768, "learning_rate": 9.937129913086924e-07, "loss": 0.0295, "step": 141840 }, { "epoch": 1.515572413056253, "grad_norm": 1.868053913116455, "learning_rate": 9.937103351160181e-07, "loss": 0.0102, "step": 141850 }, { "epoch": 1.5156792563705326, "grad_norm": 9.325459480285645, "learning_rate": 9.937076783659072e-07, "loss": 0.0431, "step": 141860 }, { "epoch": 1.5157860996848123, "grad_norm": 14.530237197875977, "learning_rate": 9.937050210583635e-07, "loss": 0.0608, "step": 141870 }, { "epoch": 1.5158929429990917, "grad_norm": 1.4240630865097046, "learning_rate": 9.937023631933894e-07, "loss": 0.0286, "step": 141880 }, { "epoch": 1.5159997863133714, "grad_norm": 0.005876138340681791, "learning_rate": 9.936997047709881e-07, "loss": 0.0625, "step": 141890 }, { "epoch": 1.516106629627651, "grad_norm": 4.585703372955322, "learning_rate": 9.936970457911623e-07, "loss": 0.0358, "step": 141900 }, { "epoch": 1.5162134729419305, "grad_norm": 2.7518723011016846, "learning_rate": 9.936943862539156e-07, "loss": 0.0347, "step": 141910 }, { "epoch": 1.5163203162562102, "grad_norm": 0.021277302876114845, "learning_rate": 9.936917261592506e-07, "loss": 0.0441, "step": 141920 }, { "epoch": 1.51642715957049, "grad_norm": 3.6935718059539795, "learning_rate": 9.936890655071704e-07, "loss": 0.0195, "step": 141930 }, { "epoch": 1.5165340028847694, "grad_norm": 3.7527263164520264, "learning_rate": 9.93686404297678e-07, "loss": 0.0389, "step": 141940 }, { "epoch": 1.516640846199049, "grad_norm": 1.9062347412109375, "learning_rate": 9.936837425307763e-07, "loss": 0.0126, "step": 141950 }, { "epoch": 1.5167476895133287, "grad_norm": 0.0025584734976291656, "learning_rate": 9.936810802064684e-07, "loss": 0.0067, "step": 141960 }, { "epoch": 1.5168545328276082, "grad_norm": 3.4018256664276123, "learning_rate": 9.936784173247574e-07, "loss": 0.0543, "step": 141970 }, { "epoch": 1.516961376141888, "grad_norm": 0.4232622981071472, "learning_rate": 9.936757538856461e-07, "loss": 0.0132, "step": 141980 }, { "epoch": 1.5170682194561675, "grad_norm": 4.076189994812012, "learning_rate": 9.936730898891375e-07, "loss": 0.0217, "step": 141990 }, { "epoch": 1.517175062770447, "grad_norm": 5.972480773925781, "learning_rate": 9.93670425335235e-07, "loss": 0.0264, "step": 142000 }, { "epoch": 1.517281906084727, "grad_norm": 4.67045259475708, "learning_rate": 9.936677602239412e-07, "loss": 0.0251, "step": 142010 }, { "epoch": 1.5173887493990064, "grad_norm": 0.1379023641347885, "learning_rate": 9.936650945552592e-07, "loss": 0.0203, "step": 142020 }, { "epoch": 1.5174955927132858, "grad_norm": 3.5275561809539795, "learning_rate": 9.936624283291921e-07, "loss": 0.015, "step": 142030 }, { "epoch": 1.5176024360275657, "grad_norm": 1.6132749319076538, "learning_rate": 9.936597615457428e-07, "loss": 0.0065, "step": 142040 }, { "epoch": 1.5177092793418452, "grad_norm": 3.201197862625122, "learning_rate": 9.936570942049145e-07, "loss": 0.0576, "step": 142050 }, { "epoch": 1.5178161226561246, "grad_norm": 5.427112102508545, "learning_rate": 9.9365442630671e-07, "loss": 0.0802, "step": 142060 }, { "epoch": 1.5179229659704045, "grad_norm": 4.742171287536621, "learning_rate": 9.936517578511323e-07, "loss": 0.0368, "step": 142070 }, { "epoch": 1.518029809284684, "grad_norm": 13.09316635131836, "learning_rate": 9.936490888381847e-07, "loss": 0.0169, "step": 142080 }, { "epoch": 1.5181366525989635, "grad_norm": 0.588951826095581, "learning_rate": 9.9364641926787e-07, "loss": 0.0262, "step": 142090 }, { "epoch": 1.5182434959132434, "grad_norm": 0.0835057944059372, "learning_rate": 9.93643749140191e-07, "loss": 0.0147, "step": 142100 }, { "epoch": 1.5183503392275228, "grad_norm": 0.1560409814119339, "learning_rate": 9.936410784551513e-07, "loss": 0.0309, "step": 142110 }, { "epoch": 1.5184571825418023, "grad_norm": 0.7347554564476013, "learning_rate": 9.936384072127533e-07, "loss": 0.0307, "step": 142120 }, { "epoch": 1.5185640258560822, "grad_norm": 2.7909293174743652, "learning_rate": 9.936357354130002e-07, "loss": 0.033, "step": 142130 }, { "epoch": 1.5186708691703616, "grad_norm": 3.411736011505127, "learning_rate": 9.936330630558954e-07, "loss": 0.0508, "step": 142140 }, { "epoch": 1.5187777124846413, "grad_norm": 0.1158451959490776, "learning_rate": 9.936303901414414e-07, "loss": 0.0083, "step": 142150 }, { "epoch": 1.518884555798921, "grad_norm": 10.481070518493652, "learning_rate": 9.936277166696417e-07, "loss": 0.073, "step": 142160 }, { "epoch": 1.5189913991132005, "grad_norm": 11.591727256774902, "learning_rate": 9.936250426404987e-07, "loss": 0.0609, "step": 142170 }, { "epoch": 1.5190982424274801, "grad_norm": 3.1066269874572754, "learning_rate": 9.936223680540159e-07, "loss": 0.0176, "step": 142180 }, { "epoch": 1.5192050857417598, "grad_norm": 0.26655417680740356, "learning_rate": 9.936196929101964e-07, "loss": 0.0313, "step": 142190 }, { "epoch": 1.5193119290560393, "grad_norm": 0.10647155344486237, "learning_rate": 9.936170172090428e-07, "loss": 0.0421, "step": 142200 }, { "epoch": 1.519418772370319, "grad_norm": 2.4748101234436035, "learning_rate": 9.936143409505582e-07, "loss": 0.0474, "step": 142210 }, { "epoch": 1.5195256156845987, "grad_norm": 0.0838269293308258, "learning_rate": 9.93611664134746e-07, "loss": 0.0154, "step": 142220 }, { "epoch": 1.519632458998878, "grad_norm": 4.904418468475342, "learning_rate": 9.93608986761609e-07, "loss": 0.0595, "step": 142230 }, { "epoch": 1.5197393023131578, "grad_norm": 2.8113315105438232, "learning_rate": 9.936063088311501e-07, "loss": 0.0248, "step": 142240 }, { "epoch": 1.5198461456274375, "grad_norm": 0.8703973889350891, "learning_rate": 9.936036303433723e-07, "loss": 0.0256, "step": 142250 }, { "epoch": 1.519952988941717, "grad_norm": 0.9240632653236389, "learning_rate": 9.93600951298279e-07, "loss": 0.0498, "step": 142260 }, { "epoch": 1.5200598322559966, "grad_norm": 1.3376082181930542, "learning_rate": 9.935982716958728e-07, "loss": 0.0114, "step": 142270 }, { "epoch": 1.5201666755702763, "grad_norm": 5.7756242752075195, "learning_rate": 9.93595591536157e-07, "loss": 0.0419, "step": 142280 }, { "epoch": 1.5202735188845558, "grad_norm": 0.01389927975833416, "learning_rate": 9.935929108191342e-07, "loss": 0.0207, "step": 142290 }, { "epoch": 1.5203803621988354, "grad_norm": 0.23306292295455933, "learning_rate": 9.935902295448081e-07, "loss": 0.0224, "step": 142300 }, { "epoch": 1.5204872055131151, "grad_norm": 0.05102140083909035, "learning_rate": 9.935875477131813e-07, "loss": 0.0257, "step": 142310 }, { "epoch": 1.5205940488273946, "grad_norm": 0.06283982843160629, "learning_rate": 9.935848653242569e-07, "loss": 0.0263, "step": 142320 }, { "epoch": 1.5207008921416743, "grad_norm": 0.2502965033054352, "learning_rate": 9.935821823780379e-07, "loss": 0.0379, "step": 142330 }, { "epoch": 1.520807735455954, "grad_norm": 0.005471778102219105, "learning_rate": 9.935794988745275e-07, "loss": 0.0174, "step": 142340 }, { "epoch": 1.5209145787702334, "grad_norm": 2.8709640502929688, "learning_rate": 9.935768148137285e-07, "loss": 0.0813, "step": 142350 }, { "epoch": 1.521021422084513, "grad_norm": 0.12442710995674133, "learning_rate": 9.93574130195644e-07, "loss": 0.0196, "step": 142360 }, { "epoch": 1.5211282653987928, "grad_norm": 0.05473692715167999, "learning_rate": 9.93571445020277e-07, "loss": 0.015, "step": 142370 }, { "epoch": 1.5212351087130722, "grad_norm": 6.986658096313477, "learning_rate": 9.935687592876308e-07, "loss": 0.0256, "step": 142380 }, { "epoch": 1.521341952027352, "grad_norm": 2.1671862602233887, "learning_rate": 9.93566072997708e-07, "loss": 0.0274, "step": 142390 }, { "epoch": 1.5214487953416316, "grad_norm": 0.007380330469459295, "learning_rate": 9.935633861505119e-07, "loss": 0.0347, "step": 142400 }, { "epoch": 1.521555638655911, "grad_norm": 0.7442771196365356, "learning_rate": 9.935606987460455e-07, "loss": 0.029, "step": 142410 }, { "epoch": 1.5216624819701907, "grad_norm": 17.91100311279297, "learning_rate": 9.93558010784312e-07, "loss": 0.0398, "step": 142420 }, { "epoch": 1.5217693252844704, "grad_norm": 4.3263678550720215, "learning_rate": 9.935553222653138e-07, "loss": 0.0138, "step": 142430 }, { "epoch": 1.5218761685987499, "grad_norm": 0.005255558993667364, "learning_rate": 9.93552633189055e-07, "loss": 0.0593, "step": 142440 }, { "epoch": 1.5219830119130295, "grad_norm": 0.0382775217294693, "learning_rate": 9.935499435555375e-07, "loss": 0.0312, "step": 142450 }, { "epoch": 1.5220898552273092, "grad_norm": 4.14900016784668, "learning_rate": 9.93547253364765e-07, "loss": 0.0939, "step": 142460 }, { "epoch": 1.5221966985415887, "grad_norm": 1.2430078983306885, "learning_rate": 9.935445626167405e-07, "loss": 0.0129, "step": 142470 }, { "epoch": 1.5223035418558684, "grad_norm": 1.2234656810760498, "learning_rate": 9.93541871311467e-07, "loss": 0.0764, "step": 142480 }, { "epoch": 1.522410385170148, "grad_norm": 1.1501941680908203, "learning_rate": 9.935391794489474e-07, "loss": 0.054, "step": 142490 }, { "epoch": 1.5225172284844275, "grad_norm": 0.023473957553505898, "learning_rate": 9.935364870291847e-07, "loss": 0.0069, "step": 142500 }, { "epoch": 1.5226240717987072, "grad_norm": 0.18558412790298462, "learning_rate": 9.935337940521824e-07, "loss": 0.0216, "step": 142510 }, { "epoch": 1.5227309151129869, "grad_norm": 10.87164306640625, "learning_rate": 9.935311005179427e-07, "loss": 0.0485, "step": 142520 }, { "epoch": 1.5228377584272663, "grad_norm": 0.008454486727714539, "learning_rate": 9.935284064264695e-07, "loss": 0.0252, "step": 142530 }, { "epoch": 1.522944601741546, "grad_norm": 0.34847795963287354, "learning_rate": 9.935257117777654e-07, "loss": 0.0583, "step": 142540 }, { "epoch": 1.5230514450558257, "grad_norm": 1.2947829961776733, "learning_rate": 9.935230165718336e-07, "loss": 0.0465, "step": 142550 }, { "epoch": 1.5231582883701051, "grad_norm": 4.458477020263672, "learning_rate": 9.935203208086769e-07, "loss": 0.0315, "step": 142560 }, { "epoch": 1.5232651316843848, "grad_norm": 0.021008487790822983, "learning_rate": 9.935176244882985e-07, "loss": 0.0415, "step": 142570 }, { "epoch": 1.5233719749986645, "grad_norm": 2.8912501335144043, "learning_rate": 9.935149276107016e-07, "loss": 0.0381, "step": 142580 }, { "epoch": 1.523478818312944, "grad_norm": 0.3411841094493866, "learning_rate": 9.93512230175889e-07, "loss": 0.0105, "step": 142590 }, { "epoch": 1.5235856616272236, "grad_norm": 5.2870306968688965, "learning_rate": 9.93509532183864e-07, "loss": 0.041, "step": 142600 }, { "epoch": 1.5236925049415033, "grad_norm": 2.5052075386047363, "learning_rate": 9.935068336346293e-07, "loss": 0.0552, "step": 142610 }, { "epoch": 1.5237993482557828, "grad_norm": 12.110250473022461, "learning_rate": 9.935041345281882e-07, "loss": 0.0181, "step": 142620 }, { "epoch": 1.5239061915700625, "grad_norm": 1.0984225273132324, "learning_rate": 9.935014348645437e-07, "loss": 0.0619, "step": 142630 }, { "epoch": 1.5240130348843421, "grad_norm": 0.015181059017777443, "learning_rate": 9.934987346436988e-07, "loss": 0.0974, "step": 142640 }, { "epoch": 1.5241198781986216, "grad_norm": 0.04604640230536461, "learning_rate": 9.934960338656567e-07, "loss": 0.0508, "step": 142650 }, { "epoch": 1.5242267215129013, "grad_norm": 5.826618671417236, "learning_rate": 9.934933325304201e-07, "loss": 0.0482, "step": 142660 }, { "epoch": 1.524333564827181, "grad_norm": 4.545825004577637, "learning_rate": 9.934906306379926e-07, "loss": 0.0339, "step": 142670 }, { "epoch": 1.5244404081414604, "grad_norm": 1.4298526048660278, "learning_rate": 9.934879281883767e-07, "loss": 0.0026, "step": 142680 }, { "epoch": 1.52454725145574, "grad_norm": 0.685355544090271, "learning_rate": 9.934852251815757e-07, "loss": 0.0196, "step": 142690 }, { "epoch": 1.5246540947700198, "grad_norm": 0.06960058212280273, "learning_rate": 9.934825216175928e-07, "loss": 0.0594, "step": 142700 }, { "epoch": 1.5247609380842992, "grad_norm": 2.8350911140441895, "learning_rate": 9.934798174964308e-07, "loss": 0.0954, "step": 142710 }, { "epoch": 1.5248677813985791, "grad_norm": 4.072082996368408, "learning_rate": 9.934771128180927e-07, "loss": 0.0301, "step": 142720 }, { "epoch": 1.5249746247128586, "grad_norm": 7.880352020263672, "learning_rate": 9.934744075825818e-07, "loss": 0.032, "step": 142730 }, { "epoch": 1.525081468027138, "grad_norm": 1.4903182983398438, "learning_rate": 9.93471701789901e-07, "loss": 0.0322, "step": 142740 }, { "epoch": 1.525188311341418, "grad_norm": 3.403998851776123, "learning_rate": 9.934689954400535e-07, "loss": 0.0615, "step": 142750 }, { "epoch": 1.5252951546556974, "grad_norm": 2.71606183052063, "learning_rate": 9.934662885330423e-07, "loss": 0.0238, "step": 142760 }, { "epoch": 1.5254019979699769, "grad_norm": 0.893135130405426, "learning_rate": 9.934635810688704e-07, "loss": 0.0577, "step": 142770 }, { "epoch": 1.5255088412842568, "grad_norm": 3.7774558067321777, "learning_rate": 9.934608730475407e-07, "loss": 0.0386, "step": 142780 }, { "epoch": 1.5256156845985362, "grad_norm": 10.320463180541992, "learning_rate": 9.934581644690565e-07, "loss": 0.0424, "step": 142790 }, { "epoch": 1.5257225279128157, "grad_norm": 6.632988929748535, "learning_rate": 9.93455455333421e-07, "loss": 0.0373, "step": 142800 }, { "epoch": 1.5258293712270956, "grad_norm": 2.8326852321624756, "learning_rate": 9.934527456406367e-07, "loss": 0.0285, "step": 142810 }, { "epoch": 1.525936214541375, "grad_norm": 0.03947923332452774, "learning_rate": 9.93450035390707e-07, "loss": 0.0247, "step": 142820 }, { "epoch": 1.5260430578556545, "grad_norm": 0.11717946827411652, "learning_rate": 9.934473245836354e-07, "loss": 0.0438, "step": 142830 }, { "epoch": 1.5261499011699344, "grad_norm": 1.6632864475250244, "learning_rate": 9.934446132194243e-07, "loss": 0.0534, "step": 142840 }, { "epoch": 1.526256744484214, "grad_norm": 4.157187461853027, "learning_rate": 9.93441901298077e-07, "loss": 0.0518, "step": 142850 }, { "epoch": 1.5263635877984933, "grad_norm": 1.7710323333740234, "learning_rate": 9.934391888195964e-07, "loss": 0.0204, "step": 142860 }, { "epoch": 1.5264704311127733, "grad_norm": 0.15358597040176392, "learning_rate": 9.93436475783986e-07, "loss": 0.0038, "step": 142870 }, { "epoch": 1.5265772744270527, "grad_norm": 1.5546332597732544, "learning_rate": 9.934337621912484e-07, "loss": 0.0112, "step": 142880 }, { "epoch": 1.5266841177413324, "grad_norm": 2.2110211849212646, "learning_rate": 9.934310480413867e-07, "loss": 0.0152, "step": 142890 }, { "epoch": 1.526790961055612, "grad_norm": 1.547487497329712, "learning_rate": 9.934283333344042e-07, "loss": 0.0009, "step": 142900 }, { "epoch": 1.5268978043698915, "grad_norm": 0.2821873724460602, "learning_rate": 9.93425618070304e-07, "loss": 0.0151, "step": 142910 }, { "epoch": 1.5270046476841712, "grad_norm": 1.9339770078659058, "learning_rate": 9.93422902249089e-07, "loss": 0.0462, "step": 142920 }, { "epoch": 1.527111490998451, "grad_norm": 0.7245767712593079, "learning_rate": 9.934201858707623e-07, "loss": 0.0441, "step": 142930 }, { "epoch": 1.5272183343127304, "grad_norm": 0.15755291283130646, "learning_rate": 9.93417468935327e-07, "loss": 0.0171, "step": 142940 }, { "epoch": 1.52732517762701, "grad_norm": 0.0805053561925888, "learning_rate": 9.93414751442786e-07, "loss": 0.0082, "step": 142950 }, { "epoch": 1.5274320209412897, "grad_norm": 0.013040914200246334, "learning_rate": 9.934120333931425e-07, "loss": 0.009, "step": 142960 }, { "epoch": 1.5275388642555692, "grad_norm": 0.01228389237076044, "learning_rate": 9.934093147863998e-07, "loss": 0.0296, "step": 142970 }, { "epoch": 1.5276457075698489, "grad_norm": 0.014864789322018623, "learning_rate": 9.934065956225605e-07, "loss": 0.0405, "step": 142980 }, { "epoch": 1.5277525508841285, "grad_norm": 0.030744129791855812, "learning_rate": 9.93403875901628e-07, "loss": 0.0393, "step": 142990 }, { "epoch": 1.527859394198408, "grad_norm": 2.795084238052368, "learning_rate": 9.934011556236053e-07, "loss": 0.0129, "step": 143000 }, { "epoch": 1.5279662375126877, "grad_norm": 11.224321365356445, "learning_rate": 9.933984347884955e-07, "loss": 0.0174, "step": 143010 }, { "epoch": 1.5280730808269674, "grad_norm": 5.839817523956299, "learning_rate": 9.933957133963016e-07, "loss": 0.0496, "step": 143020 }, { "epoch": 1.5281799241412468, "grad_norm": 9.317357063293457, "learning_rate": 9.933929914470267e-07, "loss": 0.0267, "step": 143030 }, { "epoch": 1.5282867674555265, "grad_norm": 0.09395567327737808, "learning_rate": 9.933902689406739e-07, "loss": 0.037, "step": 143040 }, { "epoch": 1.5283936107698062, "grad_norm": 0.0898369774222374, "learning_rate": 9.93387545877246e-07, "loss": 0.0171, "step": 143050 }, { "epoch": 1.5285004540840856, "grad_norm": 2.7191925048828125, "learning_rate": 9.933848222567468e-07, "loss": 0.0691, "step": 143060 }, { "epoch": 1.5286072973983653, "grad_norm": 30.36902618408203, "learning_rate": 9.933820980791785e-07, "loss": 0.0755, "step": 143070 }, { "epoch": 1.528714140712645, "grad_norm": 3.3502228260040283, "learning_rate": 9.933793733445448e-07, "loss": 0.0215, "step": 143080 }, { "epoch": 1.5288209840269245, "grad_norm": 0.029956450685858727, "learning_rate": 9.933766480528486e-07, "loss": 0.0269, "step": 143090 }, { "epoch": 1.5289278273412041, "grad_norm": 1.110770344734192, "learning_rate": 9.933739222040927e-07, "loss": 0.0386, "step": 143100 }, { "epoch": 1.5290346706554838, "grad_norm": 13.23757553100586, "learning_rate": 9.933711957982804e-07, "loss": 0.0561, "step": 143110 }, { "epoch": 1.5291415139697633, "grad_norm": 5.808077335357666, "learning_rate": 9.933684688354149e-07, "loss": 0.0111, "step": 143120 }, { "epoch": 1.529248357284043, "grad_norm": 3.0999152660369873, "learning_rate": 9.93365741315499e-07, "loss": 0.0449, "step": 143130 }, { "epoch": 1.5293552005983226, "grad_norm": 8.003443717956543, "learning_rate": 9.933630132385362e-07, "loss": 0.073, "step": 143140 }, { "epoch": 1.529462043912602, "grad_norm": 3.59621000289917, "learning_rate": 9.933602846045293e-07, "loss": 0.0313, "step": 143150 }, { "epoch": 1.5295688872268818, "grad_norm": 12.18510627746582, "learning_rate": 9.933575554134812e-07, "loss": 0.1176, "step": 143160 }, { "epoch": 1.5296757305411615, "grad_norm": 0.10004971921443939, "learning_rate": 9.933548256653952e-07, "loss": 0.0166, "step": 143170 }, { "epoch": 1.529782573855441, "grad_norm": 3.7721657752990723, "learning_rate": 9.933520953602745e-07, "loss": 0.0122, "step": 143180 }, { "epoch": 1.5298894171697206, "grad_norm": 0.044452570378780365, "learning_rate": 9.93349364498122e-07, "loss": 0.0417, "step": 143190 }, { "epoch": 1.5299962604840003, "grad_norm": 0.4073178768157959, "learning_rate": 9.933466330789409e-07, "loss": 0.0594, "step": 143200 }, { "epoch": 1.5301031037982797, "grad_norm": 2.226327896118164, "learning_rate": 9.93343901102734e-07, "loss": 0.0652, "step": 143210 }, { "epoch": 1.5302099471125594, "grad_norm": 0.002675766358152032, "learning_rate": 9.933411685695049e-07, "loss": 0.0751, "step": 143220 }, { "epoch": 1.530316790426839, "grad_norm": 1.9826316833496094, "learning_rate": 9.933384354792562e-07, "loss": 0.031, "step": 143230 }, { "epoch": 1.5304236337411186, "grad_norm": 0.004833587910979986, "learning_rate": 9.933357018319912e-07, "loss": 0.0775, "step": 143240 }, { "epoch": 1.5305304770553982, "grad_norm": 0.9893137812614441, "learning_rate": 9.933329676277132e-07, "loss": 0.0232, "step": 143250 }, { "epoch": 1.530637320369678, "grad_norm": 4.767927646636963, "learning_rate": 9.933302328664248e-07, "loss": 0.0202, "step": 143260 }, { "epoch": 1.5307441636839574, "grad_norm": 0.1736452728509903, "learning_rate": 9.933274975481292e-07, "loss": 0.0149, "step": 143270 }, { "epoch": 1.530851006998237, "grad_norm": 0.022933386266231537, "learning_rate": 9.933247616728298e-07, "loss": 0.0248, "step": 143280 }, { "epoch": 1.5309578503125167, "grad_norm": 0.6862995624542236, "learning_rate": 9.933220252405294e-07, "loss": 0.0732, "step": 143290 }, { "epoch": 1.5310646936267962, "grad_norm": 3.160167694091797, "learning_rate": 9.933192882512314e-07, "loss": 0.0314, "step": 143300 }, { "epoch": 1.5311715369410759, "grad_norm": 3.048322916030884, "learning_rate": 9.933165507049385e-07, "loss": 0.0449, "step": 143310 }, { "epoch": 1.5312783802553556, "grad_norm": 9.81381893157959, "learning_rate": 9.93313812601654e-07, "loss": 0.0235, "step": 143320 }, { "epoch": 1.531385223569635, "grad_norm": 2.9618446826934814, "learning_rate": 9.933110739413813e-07, "loss": 0.0088, "step": 143330 }, { "epoch": 1.5314920668839147, "grad_norm": 6.238542556762695, "learning_rate": 9.933083347241228e-07, "loss": 0.0198, "step": 143340 }, { "epoch": 1.5315989101981944, "grad_norm": 3.125155448913574, "learning_rate": 9.93305594949882e-07, "loss": 0.0425, "step": 143350 }, { "epoch": 1.5317057535124738, "grad_norm": 2.4573557376861572, "learning_rate": 9.933028546186621e-07, "loss": 0.0665, "step": 143360 }, { "epoch": 1.5318125968267535, "grad_norm": 3.0257840156555176, "learning_rate": 9.93300113730466e-07, "loss": 0.0227, "step": 143370 }, { "epoch": 1.5319194401410332, "grad_norm": 0.6958807110786438, "learning_rate": 9.932973722852967e-07, "loss": 0.0333, "step": 143380 }, { "epoch": 1.5320262834553127, "grad_norm": 6.22579288482666, "learning_rate": 9.932946302831575e-07, "loss": 0.0159, "step": 143390 }, { "epoch": 1.5321331267695923, "grad_norm": 1.116187572479248, "learning_rate": 9.932918877240516e-07, "loss": 0.0241, "step": 143400 }, { "epoch": 1.532239970083872, "grad_norm": 0.13800102472305298, "learning_rate": 9.932891446079817e-07, "loss": 0.0483, "step": 143410 }, { "epoch": 1.5323468133981515, "grad_norm": 0.2232344001531601, "learning_rate": 9.932864009349512e-07, "loss": 0.0209, "step": 143420 }, { "epoch": 1.5324536567124312, "grad_norm": 0.9189767241477966, "learning_rate": 9.932836567049632e-07, "loss": 0.0433, "step": 143430 }, { "epoch": 1.5325605000267108, "grad_norm": 2.19671368598938, "learning_rate": 9.932809119180206e-07, "loss": 0.0684, "step": 143440 }, { "epoch": 1.5326673433409903, "grad_norm": 2.314302921295166, "learning_rate": 9.932781665741268e-07, "loss": 0.0302, "step": 143450 }, { "epoch": 1.5327741866552702, "grad_norm": 5.628841400146484, "learning_rate": 9.932754206732845e-07, "loss": 0.0178, "step": 143460 }, { "epoch": 1.5328810299695497, "grad_norm": 5.434350967407227, "learning_rate": 9.93272674215497e-07, "loss": 0.0059, "step": 143470 }, { "epoch": 1.5329878732838291, "grad_norm": 11.627111434936523, "learning_rate": 9.932699272007677e-07, "loss": 0.0239, "step": 143480 }, { "epoch": 1.533094716598109, "grad_norm": 1.9254343509674072, "learning_rate": 9.932671796290991e-07, "loss": 0.032, "step": 143490 }, { "epoch": 1.5332015599123885, "grad_norm": 0.3839132487773895, "learning_rate": 9.93264431500495e-07, "loss": 0.0166, "step": 143500 }, { "epoch": 1.533308403226668, "grad_norm": 3.5083487033843994, "learning_rate": 9.932616828149577e-07, "loss": 0.0293, "step": 143510 }, { "epoch": 1.5334152465409479, "grad_norm": 4.274059772491455, "learning_rate": 9.932589335724912e-07, "loss": 0.0299, "step": 143520 }, { "epoch": 1.5335220898552273, "grad_norm": 0.06333135068416595, "learning_rate": 9.932561837730978e-07, "loss": 0.0226, "step": 143530 }, { "epoch": 1.5336289331695068, "grad_norm": 0.18240897357463837, "learning_rate": 9.932534334167811e-07, "loss": 0.0177, "step": 143540 }, { "epoch": 1.5337357764837867, "grad_norm": 0.011976816691458225, "learning_rate": 9.932506825035438e-07, "loss": 0.0237, "step": 143550 }, { "epoch": 1.5338426197980661, "grad_norm": 1.2275495529174805, "learning_rate": 9.932479310333896e-07, "loss": 0.0358, "step": 143560 }, { "epoch": 1.5339494631123456, "grad_norm": 0.5287993550300598, "learning_rate": 9.932451790063212e-07, "loss": 0.0343, "step": 143570 }, { "epoch": 1.5340563064266255, "grad_norm": 3.3054518699645996, "learning_rate": 9.932424264223417e-07, "loss": 0.1037, "step": 143580 }, { "epoch": 1.534163149740905, "grad_norm": 0.2681707441806793, "learning_rate": 9.932396732814541e-07, "loss": 0.0062, "step": 143590 }, { "epoch": 1.5342699930551844, "grad_norm": 0.5373970866203308, "learning_rate": 9.932369195836619e-07, "loss": 0.0181, "step": 143600 }, { "epoch": 1.5343768363694643, "grad_norm": 3.3175439834594727, "learning_rate": 9.932341653289677e-07, "loss": 0.0229, "step": 143610 }, { "epoch": 1.5344836796837438, "grad_norm": 2.709786891937256, "learning_rate": 9.932314105173752e-07, "loss": 0.0384, "step": 143620 }, { "epoch": 1.5345905229980235, "grad_norm": 2.308180093765259, "learning_rate": 9.932286551488872e-07, "loss": 0.0557, "step": 143630 }, { "epoch": 1.5346973663123031, "grad_norm": 0.10649380087852478, "learning_rate": 9.932258992235067e-07, "loss": 0.0118, "step": 143640 }, { "epoch": 1.5348042096265826, "grad_norm": 0.03672071546316147, "learning_rate": 9.93223142741237e-07, "loss": 0.0205, "step": 143650 }, { "epoch": 1.5349110529408623, "grad_norm": 0.0482243150472641, "learning_rate": 9.93220385702081e-07, "loss": 0.034, "step": 143660 }, { "epoch": 1.535017896255142, "grad_norm": 4.273955821990967, "learning_rate": 9.932176281060422e-07, "loss": 0.0402, "step": 143670 }, { "epoch": 1.5351247395694214, "grad_norm": 0.041001394391059875, "learning_rate": 9.932148699531233e-07, "loss": 0.0101, "step": 143680 }, { "epoch": 1.535231582883701, "grad_norm": 5.502305507659912, "learning_rate": 9.932121112433277e-07, "loss": 0.0271, "step": 143690 }, { "epoch": 1.5353384261979808, "grad_norm": 0.04593614116311073, "learning_rate": 9.932093519766582e-07, "loss": 0.0417, "step": 143700 }, { "epoch": 1.5354452695122602, "grad_norm": 0.23245815932750702, "learning_rate": 9.932065921531182e-07, "loss": 0.025, "step": 143710 }, { "epoch": 1.53555211282654, "grad_norm": 1.206419587135315, "learning_rate": 9.93203831772711e-07, "loss": 0.0116, "step": 143720 }, { "epoch": 1.5356589561408196, "grad_norm": 0.20072151720523834, "learning_rate": 9.932010708354393e-07, "loss": 0.0512, "step": 143730 }, { "epoch": 1.535765799455099, "grad_norm": 1.6632587909698486, "learning_rate": 9.931983093413062e-07, "loss": 0.0289, "step": 143740 }, { "epoch": 1.5358726427693787, "grad_norm": 1.0015455484390259, "learning_rate": 9.931955472903152e-07, "loss": 0.0283, "step": 143750 }, { "epoch": 1.5359794860836584, "grad_norm": 14.440905570983887, "learning_rate": 9.93192784682469e-07, "loss": 0.0416, "step": 143760 }, { "epoch": 1.5360863293979379, "grad_norm": 3.605412244796753, "learning_rate": 9.931900215177711e-07, "loss": 0.0287, "step": 143770 }, { "epoch": 1.5361931727122176, "grad_norm": 0.053038518875837326, "learning_rate": 9.931872577962244e-07, "loss": 0.0369, "step": 143780 }, { "epoch": 1.5363000160264972, "grad_norm": 6.211198806762695, "learning_rate": 9.93184493517832e-07, "loss": 0.0375, "step": 143790 }, { "epoch": 1.5364068593407767, "grad_norm": 2.674712896347046, "learning_rate": 9.93181728682597e-07, "loss": 0.0265, "step": 143800 }, { "epoch": 1.5365137026550564, "grad_norm": 6.390099048614502, "learning_rate": 9.931789632905227e-07, "loss": 0.1006, "step": 143810 }, { "epoch": 1.536620545969336, "grad_norm": 1.8360505104064941, "learning_rate": 9.931761973416122e-07, "loss": 0.0108, "step": 143820 }, { "epoch": 1.5367273892836155, "grad_norm": 8.646546363830566, "learning_rate": 9.931734308358687e-07, "loss": 0.0851, "step": 143830 }, { "epoch": 1.5368342325978952, "grad_norm": 0.9046773314476013, "learning_rate": 9.931706637732948e-07, "loss": 0.0095, "step": 143840 }, { "epoch": 1.5369410759121749, "grad_norm": 1.1325607299804688, "learning_rate": 9.931678961538941e-07, "loss": 0.0681, "step": 143850 }, { "epoch": 1.5370479192264543, "grad_norm": 6.782171249389648, "learning_rate": 9.931651279776699e-07, "loss": 0.0188, "step": 143860 }, { "epoch": 1.537154762540734, "grad_norm": 5.678003787994385, "learning_rate": 9.931623592446248e-07, "loss": 0.0208, "step": 143870 }, { "epoch": 1.5372616058550137, "grad_norm": 2.904221296310425, "learning_rate": 9.931595899547621e-07, "loss": 0.0295, "step": 143880 }, { "epoch": 1.5373684491692932, "grad_norm": 0.6161229610443115, "learning_rate": 9.931568201080851e-07, "loss": 0.0342, "step": 143890 }, { "epoch": 1.5374752924835728, "grad_norm": 4.948420524597168, "learning_rate": 9.931540497045969e-07, "loss": 0.0263, "step": 143900 }, { "epoch": 1.5375821357978525, "grad_norm": 2.7235231399536133, "learning_rate": 9.931512787443005e-07, "loss": 0.0147, "step": 143910 }, { "epoch": 1.537688979112132, "grad_norm": 3.134171485900879, "learning_rate": 9.931485072271992e-07, "loss": 0.0279, "step": 143920 }, { "epoch": 1.5377958224264117, "grad_norm": 3.5053865909576416, "learning_rate": 9.931457351532959e-07, "loss": 0.0338, "step": 143930 }, { "epoch": 1.5379026657406913, "grad_norm": 0.04156821221113205, "learning_rate": 9.931429625225938e-07, "loss": 0.0276, "step": 143940 }, { "epoch": 1.5380095090549708, "grad_norm": 0.36787691712379456, "learning_rate": 9.93140189335096e-07, "loss": 0.0186, "step": 143950 }, { "epoch": 1.5381163523692505, "grad_norm": 1.6042118072509766, "learning_rate": 9.93137415590806e-07, "loss": 0.0282, "step": 143960 }, { "epoch": 1.5382231956835302, "grad_norm": 0.0025256862863898277, "learning_rate": 9.931346412897266e-07, "loss": 0.0555, "step": 143970 }, { "epoch": 1.5383300389978096, "grad_norm": 0.07686575502157211, "learning_rate": 9.931318664318609e-07, "loss": 0.0231, "step": 143980 }, { "epoch": 1.5384368823120893, "grad_norm": 1.0729678869247437, "learning_rate": 9.93129091017212e-07, "loss": 0.0738, "step": 143990 }, { "epoch": 1.538543725626369, "grad_norm": 0.06961514800786972, "learning_rate": 9.931263150457831e-07, "loss": 0.0372, "step": 144000 }, { "epoch": 1.5386505689406484, "grad_norm": 0.5505453944206238, "learning_rate": 9.931235385175775e-07, "loss": 0.0294, "step": 144010 }, { "epoch": 1.5387574122549281, "grad_norm": 3.0243477821350098, "learning_rate": 9.931207614325983e-07, "loss": 0.0115, "step": 144020 }, { "epoch": 1.5388642555692078, "grad_norm": 2.1423611640930176, "learning_rate": 9.931179837908484e-07, "loss": 0.0266, "step": 144030 }, { "epoch": 1.5389710988834873, "grad_norm": 0.25883689522743225, "learning_rate": 9.93115205592331e-07, "loss": 0.0253, "step": 144040 }, { "epoch": 1.539077942197767, "grad_norm": 2.816176414489746, "learning_rate": 9.931124268370494e-07, "loss": 0.0177, "step": 144050 }, { "epoch": 1.5391847855120466, "grad_norm": 6.631228923797607, "learning_rate": 9.931096475250067e-07, "loss": 0.0229, "step": 144060 }, { "epoch": 1.539291628826326, "grad_norm": 1.1376560926437378, "learning_rate": 9.93106867656206e-07, "loss": 0.0351, "step": 144070 }, { "epoch": 1.5393984721406058, "grad_norm": 4.378481864929199, "learning_rate": 9.931040872306504e-07, "loss": 0.0342, "step": 144080 }, { "epoch": 1.5395053154548854, "grad_norm": 2.2824032306671143, "learning_rate": 9.93101306248343e-07, "loss": 0.0572, "step": 144090 }, { "epoch": 1.539612158769165, "grad_norm": 9.67348575592041, "learning_rate": 9.930985247092872e-07, "loss": 0.0709, "step": 144100 }, { "epoch": 1.5397190020834446, "grad_norm": 4.044643878936768, "learning_rate": 9.930957426134857e-07, "loss": 0.0167, "step": 144110 }, { "epoch": 1.5398258453977243, "grad_norm": 0.40464380383491516, "learning_rate": 9.930929599609422e-07, "loss": 0.0312, "step": 144120 }, { "epoch": 1.5399326887120037, "grad_norm": 0.5880624055862427, "learning_rate": 9.930901767516592e-07, "loss": 0.0036, "step": 144130 }, { "epoch": 1.5400395320262834, "grad_norm": 6.6534037590026855, "learning_rate": 9.930873929856402e-07, "loss": 0.0567, "step": 144140 }, { "epoch": 1.540146375340563, "grad_norm": 0.47297045588493347, "learning_rate": 9.930846086628885e-07, "loss": 0.0261, "step": 144150 }, { "epoch": 1.5402532186548425, "grad_norm": 0.2228289246559143, "learning_rate": 9.930818237834071e-07, "loss": 0.005, "step": 144160 }, { "epoch": 1.5403600619691222, "grad_norm": 0.8258869051933289, "learning_rate": 9.93079038347199e-07, "loss": 0.0212, "step": 144170 }, { "epoch": 1.540466905283402, "grad_norm": 3.4134373664855957, "learning_rate": 9.930762523542674e-07, "loss": 0.0332, "step": 144180 }, { "epoch": 1.5405737485976814, "grad_norm": 0.16509278118610382, "learning_rate": 9.930734658046155e-07, "loss": 0.0389, "step": 144190 }, { "epoch": 1.5406805919119613, "grad_norm": 5.599429607391357, "learning_rate": 9.930706786982465e-07, "loss": 0.0734, "step": 144200 }, { "epoch": 1.5407874352262407, "grad_norm": 0.9069667458534241, "learning_rate": 9.930678910351635e-07, "loss": 0.014, "step": 144210 }, { "epoch": 1.5408942785405202, "grad_norm": 6.22416353225708, "learning_rate": 9.930651028153697e-07, "loss": 0.0204, "step": 144220 }, { "epoch": 1.5410011218548, "grad_norm": 23.71707534790039, "learning_rate": 9.930623140388682e-07, "loss": 0.0241, "step": 144230 }, { "epoch": 1.5411079651690796, "grad_norm": 3.451749563217163, "learning_rate": 9.930595247056619e-07, "loss": 0.0146, "step": 144240 }, { "epoch": 1.541214808483359, "grad_norm": 0.7408709526062012, "learning_rate": 9.930567348157544e-07, "loss": 0.0504, "step": 144250 }, { "epoch": 1.541321651797639, "grad_norm": 1.2293983697891235, "learning_rate": 9.930539443691485e-07, "loss": 0.0152, "step": 144260 }, { "epoch": 1.5414284951119184, "grad_norm": 0.5661711692810059, "learning_rate": 9.930511533658475e-07, "loss": 0.0404, "step": 144270 }, { "epoch": 1.5415353384261978, "grad_norm": 2.811234951019287, "learning_rate": 9.930483618058547e-07, "loss": 0.0336, "step": 144280 }, { "epoch": 1.5416421817404777, "grad_norm": 0.4051292836666107, "learning_rate": 9.930455696891728e-07, "loss": 0.0247, "step": 144290 }, { "epoch": 1.5417490250547572, "grad_norm": 1.2810882329940796, "learning_rate": 9.930427770158055e-07, "loss": 0.0254, "step": 144300 }, { "epoch": 1.5418558683690367, "grad_norm": 3.1393637657165527, "learning_rate": 9.930399837857558e-07, "loss": 0.0345, "step": 144310 }, { "epoch": 1.5419627116833166, "grad_norm": 0.1863260120153427, "learning_rate": 9.930371899990264e-07, "loss": 0.0199, "step": 144320 }, { "epoch": 1.542069554997596, "grad_norm": 0.4666961133480072, "learning_rate": 9.930343956556209e-07, "loss": 0.0243, "step": 144330 }, { "epoch": 1.5421763983118755, "grad_norm": 0.22581341862678528, "learning_rate": 9.930316007555423e-07, "loss": 0.073, "step": 144340 }, { "epoch": 1.5422832416261554, "grad_norm": 0.6318956017494202, "learning_rate": 9.93028805298794e-07, "loss": 0.0495, "step": 144350 }, { "epoch": 1.5423900849404348, "grad_norm": 0.19904525578022003, "learning_rate": 9.93026009285379e-07, "loss": 0.0214, "step": 144360 }, { "epoch": 1.5424969282547145, "grad_norm": 0.8421090245246887, "learning_rate": 9.930232127153003e-07, "loss": 0.013, "step": 144370 }, { "epoch": 1.5426037715689942, "grad_norm": 0.06374290585517883, "learning_rate": 9.930204155885612e-07, "loss": 0.0429, "step": 144380 }, { "epoch": 1.5427106148832737, "grad_norm": 7.936775207519531, "learning_rate": 9.930176179051647e-07, "loss": 0.03, "step": 144390 }, { "epoch": 1.5428174581975533, "grad_norm": 0.24796073138713837, "learning_rate": 9.930148196651143e-07, "loss": 0.0245, "step": 144400 }, { "epoch": 1.542924301511833, "grad_norm": 4.455918312072754, "learning_rate": 9.93012020868413e-07, "loss": 0.0286, "step": 144410 }, { "epoch": 1.5430311448261125, "grad_norm": 1.7714952230453491, "learning_rate": 9.930092215150637e-07, "loss": 0.0374, "step": 144420 }, { "epoch": 1.5431379881403922, "grad_norm": 0.4507904052734375, "learning_rate": 9.9300642160507e-07, "loss": 0.0434, "step": 144430 }, { "epoch": 1.5432448314546718, "grad_norm": 1.2901134490966797, "learning_rate": 9.930036211384348e-07, "loss": 0.0259, "step": 144440 }, { "epoch": 1.5433516747689513, "grad_norm": 3.0578389167785645, "learning_rate": 9.930008201151612e-07, "loss": 0.0143, "step": 144450 }, { "epoch": 1.543458518083231, "grad_norm": 8.22237777709961, "learning_rate": 9.929980185352525e-07, "loss": 0.0388, "step": 144460 }, { "epoch": 1.5435653613975107, "grad_norm": 2.9017672538757324, "learning_rate": 9.929952163987118e-07, "loss": 0.0417, "step": 144470 }, { "epoch": 1.5436722047117901, "grad_norm": 0.011215569451451302, "learning_rate": 9.929924137055424e-07, "loss": 0.0207, "step": 144480 }, { "epoch": 1.5437790480260698, "grad_norm": 0.016953447833657265, "learning_rate": 9.929896104557472e-07, "loss": 0.0092, "step": 144490 }, { "epoch": 1.5438858913403495, "grad_norm": 0.8097427487373352, "learning_rate": 9.929868066493296e-07, "loss": 0.0648, "step": 144500 }, { "epoch": 1.543992734654629, "grad_norm": 2.76292085647583, "learning_rate": 9.929840022862927e-07, "loss": 0.0686, "step": 144510 }, { "epoch": 1.5440995779689086, "grad_norm": 4.161474704742432, "learning_rate": 9.929811973666396e-07, "loss": 0.0187, "step": 144520 }, { "epoch": 1.5442064212831883, "grad_norm": 5.214380741119385, "learning_rate": 9.929783918903736e-07, "loss": 0.051, "step": 144530 }, { "epoch": 1.5443132645974678, "grad_norm": 1.8951915502548218, "learning_rate": 9.929755858574977e-07, "loss": 0.0281, "step": 144540 }, { "epoch": 1.5444201079117474, "grad_norm": 4.112737655639648, "learning_rate": 9.929727792680153e-07, "loss": 0.0439, "step": 144550 }, { "epoch": 1.5445269512260271, "grad_norm": 4.984911918640137, "learning_rate": 9.929699721219293e-07, "loss": 0.0112, "step": 144560 }, { "epoch": 1.5446337945403066, "grad_norm": 0.030036404728889465, "learning_rate": 9.92967164419243e-07, "loss": 0.0246, "step": 144570 }, { "epoch": 1.5447406378545863, "grad_norm": 0.1400432139635086, "learning_rate": 9.929643561599596e-07, "loss": 0.0193, "step": 144580 }, { "epoch": 1.544847481168866, "grad_norm": 0.24981354176998138, "learning_rate": 9.929615473440822e-07, "loss": 0.0272, "step": 144590 }, { "epoch": 1.5449543244831454, "grad_norm": 0.033422596752643585, "learning_rate": 9.929587379716141e-07, "loss": 0.0351, "step": 144600 }, { "epoch": 1.545061167797425, "grad_norm": 2.4260690212249756, "learning_rate": 9.929559280425584e-07, "loss": 0.063, "step": 144610 }, { "epoch": 1.5451680111117048, "grad_norm": 2.2029919624328613, "learning_rate": 9.929531175569182e-07, "loss": 0.0115, "step": 144620 }, { "epoch": 1.5452748544259842, "grad_norm": 0.08701907098293304, "learning_rate": 9.929503065146966e-07, "loss": 0.0223, "step": 144630 }, { "epoch": 1.545381697740264, "grad_norm": 14.983783721923828, "learning_rate": 9.929474949158971e-07, "loss": 0.0573, "step": 144640 }, { "epoch": 1.5454885410545436, "grad_norm": 7.554847717285156, "learning_rate": 9.929446827605227e-07, "loss": 0.0159, "step": 144650 }, { "epoch": 1.545595384368823, "grad_norm": 4.546658515930176, "learning_rate": 9.929418700485762e-07, "loss": 0.0574, "step": 144660 }, { "epoch": 1.5457022276831027, "grad_norm": 0.010450090281665325, "learning_rate": 9.929390567800613e-07, "loss": 0.0368, "step": 144670 }, { "epoch": 1.5458090709973824, "grad_norm": 5.152137279510498, "learning_rate": 9.929362429549812e-07, "loss": 0.0239, "step": 144680 }, { "epoch": 1.5459159143116619, "grad_norm": 1.2591842412948608, "learning_rate": 9.929334285733386e-07, "loss": 0.0638, "step": 144690 }, { "epoch": 1.5460227576259415, "grad_norm": 2.127135753631592, "learning_rate": 9.92930613635137e-07, "loss": 0.0287, "step": 144700 }, { "epoch": 1.5461296009402212, "grad_norm": 4.773316383361816, "learning_rate": 9.929277981403797e-07, "loss": 0.0232, "step": 144710 }, { "epoch": 1.5462364442545007, "grad_norm": 2.3832383155822754, "learning_rate": 9.929249820890698e-07, "loss": 0.0362, "step": 144720 }, { "epoch": 1.5463432875687804, "grad_norm": 13.589397430419922, "learning_rate": 9.9292216548121e-07, "loss": 0.0562, "step": 144730 }, { "epoch": 1.54645013088306, "grad_norm": 0.023728808388113976, "learning_rate": 9.92919348316804e-07, "loss": 0.0087, "step": 144740 }, { "epoch": 1.5465569741973395, "grad_norm": 2.6490390300750732, "learning_rate": 9.929165305958552e-07, "loss": 0.0786, "step": 144750 }, { "epoch": 1.5466638175116192, "grad_norm": 8.284965515136719, "learning_rate": 9.929137123183663e-07, "loss": 0.0459, "step": 144760 }, { "epoch": 1.5467706608258989, "grad_norm": 0.11486957967281342, "learning_rate": 9.929108934843403e-07, "loss": 0.0211, "step": 144770 }, { "epoch": 1.5468775041401783, "grad_norm": 0.5300783514976501, "learning_rate": 9.92908074093781e-07, "loss": 0.0623, "step": 144780 }, { "epoch": 1.546984347454458, "grad_norm": 2.721444606781006, "learning_rate": 9.929052541466911e-07, "loss": 0.0128, "step": 144790 }, { "epoch": 1.5470911907687377, "grad_norm": 0.008946927264332771, "learning_rate": 9.92902433643074e-07, "loss": 0.0202, "step": 144800 }, { "epoch": 1.5471980340830171, "grad_norm": 0.2553587257862091, "learning_rate": 9.92899612582933e-07, "loss": 0.0207, "step": 144810 }, { "epoch": 1.5473048773972968, "grad_norm": 0.1531468629837036, "learning_rate": 9.928967909662709e-07, "loss": 0.0413, "step": 144820 }, { "epoch": 1.5474117207115765, "grad_norm": 6.626461029052734, "learning_rate": 9.928939687930913e-07, "loss": 0.055, "step": 144830 }, { "epoch": 1.547518564025856, "grad_norm": 0.05343445762991905, "learning_rate": 9.928911460633974e-07, "loss": 0.0216, "step": 144840 }, { "epoch": 1.5476254073401357, "grad_norm": 0.07006103545427322, "learning_rate": 9.928883227771917e-07, "loss": 0.0149, "step": 144850 }, { "epoch": 1.5477322506544153, "grad_norm": 10.001909255981445, "learning_rate": 9.928854989344783e-07, "loss": 0.0428, "step": 144860 }, { "epoch": 1.5478390939686948, "grad_norm": 0.2121003419160843, "learning_rate": 9.928826745352597e-07, "loss": 0.0137, "step": 144870 }, { "epoch": 1.5479459372829745, "grad_norm": 0.43429481983184814, "learning_rate": 9.928798495795396e-07, "loss": 0.0148, "step": 144880 }, { "epoch": 1.5480527805972542, "grad_norm": 2.2480945587158203, "learning_rate": 9.928770240673208e-07, "loss": 0.0308, "step": 144890 }, { "epoch": 1.5481596239115336, "grad_norm": 1.9994220733642578, "learning_rate": 9.928741979986067e-07, "loss": 0.0137, "step": 144900 }, { "epoch": 1.5482664672258133, "grad_norm": 4.145040035247803, "learning_rate": 9.928713713734004e-07, "loss": 0.026, "step": 144910 }, { "epoch": 1.548373310540093, "grad_norm": 0.044657111167907715, "learning_rate": 9.92868544191705e-07, "loss": 0.0184, "step": 144920 }, { "epoch": 1.5484801538543724, "grad_norm": 0.4401158094406128, "learning_rate": 9.92865716453524e-07, "loss": 0.035, "step": 144930 }, { "epoch": 1.5485869971686523, "grad_norm": 0.580463707447052, "learning_rate": 9.928628881588603e-07, "loss": 0.0754, "step": 144940 }, { "epoch": 1.5486938404829318, "grad_norm": 0.05293140560388565, "learning_rate": 9.928600593077172e-07, "loss": 0.0045, "step": 144950 }, { "epoch": 1.5488006837972113, "grad_norm": 0.35195913910865784, "learning_rate": 9.92857229900098e-07, "loss": 0.0216, "step": 144960 }, { "epoch": 1.5489075271114912, "grad_norm": 0.10425514727830887, "learning_rate": 9.928543999360058e-07, "loss": 0.0453, "step": 144970 }, { "epoch": 1.5490143704257706, "grad_norm": 0.13607074320316315, "learning_rate": 9.928515694154439e-07, "loss": 0.0111, "step": 144980 }, { "epoch": 1.54912121374005, "grad_norm": 27.756084442138672, "learning_rate": 9.928487383384149e-07, "loss": 0.02, "step": 144990 }, { "epoch": 1.54922805705433, "grad_norm": 2.414074420928955, "learning_rate": 9.928459067049228e-07, "loss": 0.0409, "step": 145000 }, { "epoch": 1.5493349003686094, "grad_norm": 0.5802577137947083, "learning_rate": 9.928430745149704e-07, "loss": 0.043, "step": 145010 }, { "epoch": 1.549441743682889, "grad_norm": 0.2622626721858978, "learning_rate": 9.92840241768561e-07, "loss": 0.0218, "step": 145020 }, { "epoch": 1.5495485869971688, "grad_norm": 1.8245189189910889, "learning_rate": 9.92837408465698e-07, "loss": 0.0486, "step": 145030 }, { "epoch": 1.5496554303114483, "grad_norm": 2.66947340965271, "learning_rate": 9.928345746063842e-07, "loss": 0.0536, "step": 145040 }, { "epoch": 1.5497622736257277, "grad_norm": 6.01550817489624, "learning_rate": 9.92831740190623e-07, "loss": 0.07, "step": 145050 }, { "epoch": 1.5498691169400076, "grad_norm": 0.10176780819892883, "learning_rate": 9.928289052184174e-07, "loss": 0.0129, "step": 145060 }, { "epoch": 1.549975960254287, "grad_norm": 0.21028827130794525, "learning_rate": 9.92826069689771e-07, "loss": 0.0255, "step": 145070 }, { "epoch": 1.5500828035685665, "grad_norm": 7.4332780838012695, "learning_rate": 9.928232336046866e-07, "loss": 0.0504, "step": 145080 }, { "epoch": 1.5501896468828464, "grad_norm": 2.4708869457244873, "learning_rate": 9.928203969631676e-07, "loss": 0.0651, "step": 145090 }, { "epoch": 1.550296490197126, "grad_norm": 0.012366305105388165, "learning_rate": 9.928175597652173e-07, "loss": 0.0755, "step": 145100 }, { "epoch": 1.5504033335114056, "grad_norm": 8.554128646850586, "learning_rate": 9.928147220108385e-07, "loss": 0.0145, "step": 145110 }, { "epoch": 1.5505101768256853, "grad_norm": 5.008845329284668, "learning_rate": 9.928118837000349e-07, "loss": 0.0107, "step": 145120 }, { "epoch": 1.5506170201399647, "grad_norm": 0.10158881545066833, "learning_rate": 9.928090448328094e-07, "loss": 0.0087, "step": 145130 }, { "epoch": 1.5507238634542444, "grad_norm": 3.5216033458709717, "learning_rate": 9.928062054091654e-07, "loss": 0.0733, "step": 145140 }, { "epoch": 1.550830706768524, "grad_norm": 0.2526245713233948, "learning_rate": 9.92803365429106e-07, "loss": 0.0368, "step": 145150 }, { "epoch": 1.5509375500828035, "grad_norm": 8.144980430603027, "learning_rate": 9.928005248926343e-07, "loss": 0.0242, "step": 145160 }, { "epoch": 1.5510443933970832, "grad_norm": 4.219107151031494, "learning_rate": 9.927976837997537e-07, "loss": 0.017, "step": 145170 }, { "epoch": 1.551151236711363, "grad_norm": 0.33775460720062256, "learning_rate": 9.927948421504673e-07, "loss": 0.0296, "step": 145180 }, { "epoch": 1.5512580800256424, "grad_norm": 10.25517463684082, "learning_rate": 9.927919999447782e-07, "loss": 0.0896, "step": 145190 }, { "epoch": 1.551364923339922, "grad_norm": 0.02225818857550621, "learning_rate": 9.9278915718269e-07, "loss": 0.0538, "step": 145200 }, { "epoch": 1.5514717666542017, "grad_norm": 0.6597861647605896, "learning_rate": 9.927863138642053e-07, "loss": 0.0028, "step": 145210 }, { "epoch": 1.5515786099684812, "grad_norm": 1.8293280601501465, "learning_rate": 9.92783469989328e-07, "loss": 0.0164, "step": 145220 }, { "epoch": 1.5516854532827609, "grad_norm": 0.9801638722419739, "learning_rate": 9.927806255580608e-07, "loss": 0.0326, "step": 145230 }, { "epoch": 1.5517922965970405, "grad_norm": 1.882636308670044, "learning_rate": 9.927777805704071e-07, "loss": 0.0149, "step": 145240 }, { "epoch": 1.55189913991132, "grad_norm": 3.1675949096679688, "learning_rate": 9.927749350263701e-07, "loss": 0.0611, "step": 145250 }, { "epoch": 1.5520059832255997, "grad_norm": 1.2059122323989868, "learning_rate": 9.927720889259531e-07, "loss": 0.016, "step": 145260 }, { "epoch": 1.5521128265398794, "grad_norm": 4.640509605407715, "learning_rate": 9.927692422691592e-07, "loss": 0.0136, "step": 145270 }, { "epoch": 1.5522196698541588, "grad_norm": 5.697907447814941, "learning_rate": 9.927663950559916e-07, "loss": 0.0575, "step": 145280 }, { "epoch": 1.5523265131684385, "grad_norm": 0.3247375786304474, "learning_rate": 9.927635472864535e-07, "loss": 0.0124, "step": 145290 }, { "epoch": 1.5524333564827182, "grad_norm": 3.1025431156158447, "learning_rate": 9.92760698960548e-07, "loss": 0.0158, "step": 145300 }, { "epoch": 1.5525401997969976, "grad_norm": 4.144169807434082, "learning_rate": 9.92757850078279e-07, "loss": 0.013, "step": 145310 }, { "epoch": 1.5526470431112773, "grad_norm": 8.623919486999512, "learning_rate": 9.927550006396489e-07, "loss": 0.0234, "step": 145320 }, { "epoch": 1.552753886425557, "grad_norm": 3.4027342796325684, "learning_rate": 9.92752150644661e-07, "loss": 0.0154, "step": 145330 }, { "epoch": 1.5528607297398365, "grad_norm": 0.23521652817726135, "learning_rate": 9.927493000933188e-07, "loss": 0.0282, "step": 145340 }, { "epoch": 1.5529675730541161, "grad_norm": 1.3041867017745972, "learning_rate": 9.927464489856257e-07, "loss": 0.0294, "step": 145350 }, { "epoch": 1.5530744163683958, "grad_norm": 4.528826713562012, "learning_rate": 9.927435973215844e-07, "loss": 0.0443, "step": 145360 }, { "epoch": 1.5531812596826753, "grad_norm": 0.30256199836730957, "learning_rate": 9.927407451011986e-07, "loss": 0.0559, "step": 145370 }, { "epoch": 1.553288102996955, "grad_norm": 4.448217868804932, "learning_rate": 9.927378923244713e-07, "loss": 0.0216, "step": 145380 }, { "epoch": 1.5533949463112346, "grad_norm": 4.9369707107543945, "learning_rate": 9.927350389914056e-07, "loss": 0.0245, "step": 145390 }, { "epoch": 1.553501789625514, "grad_norm": 0.3287065327167511, "learning_rate": 9.92732185102005e-07, "loss": 0.0384, "step": 145400 }, { "epoch": 1.5536086329397938, "grad_norm": 3.158195972442627, "learning_rate": 9.927293306562726e-07, "loss": 0.1063, "step": 145410 }, { "epoch": 1.5537154762540735, "grad_norm": 0.13265472650527954, "learning_rate": 9.927264756542112e-07, "loss": 0.0233, "step": 145420 }, { "epoch": 1.553822319568353, "grad_norm": 3.759770154953003, "learning_rate": 9.927236200958248e-07, "loss": 0.0042, "step": 145430 }, { "epoch": 1.5539291628826326, "grad_norm": 5.056922435760498, "learning_rate": 9.927207639811162e-07, "loss": 0.0261, "step": 145440 }, { "epoch": 1.5540360061969123, "grad_norm": 1.537713885307312, "learning_rate": 9.927179073100887e-07, "loss": 0.0165, "step": 145450 }, { "epoch": 1.5541428495111917, "grad_norm": 0.2630477845668793, "learning_rate": 9.927150500827454e-07, "loss": 0.0285, "step": 145460 }, { "epoch": 1.5542496928254714, "grad_norm": 0.010464114136993885, "learning_rate": 9.927121922990897e-07, "loss": 0.0085, "step": 145470 }, { "epoch": 1.554356536139751, "grad_norm": 1.9413799047470093, "learning_rate": 9.927093339591245e-07, "loss": 0.0181, "step": 145480 }, { "epoch": 1.5544633794540306, "grad_norm": 2.747225761413574, "learning_rate": 9.927064750628536e-07, "loss": 0.0529, "step": 145490 }, { "epoch": 1.5545702227683103, "grad_norm": 8.954205513000488, "learning_rate": 9.927036156102797e-07, "loss": 0.0392, "step": 145500 }, { "epoch": 1.55467706608259, "grad_norm": 0.01855541206896305, "learning_rate": 9.927007556014064e-07, "loss": 0.0437, "step": 145510 }, { "epoch": 1.5547839093968694, "grad_norm": 3.5068888664245605, "learning_rate": 9.926978950362366e-07, "loss": 0.0128, "step": 145520 }, { "epoch": 1.554890752711149, "grad_norm": 0.014161213301122189, "learning_rate": 9.92695033914774e-07, "loss": 0.0051, "step": 145530 }, { "epoch": 1.5549975960254288, "grad_norm": 0.09547646343708038, "learning_rate": 9.926921722370212e-07, "loss": 0.0151, "step": 145540 }, { "epoch": 1.5551044393397082, "grad_norm": 1.5903998613357544, "learning_rate": 9.926893100029818e-07, "loss": 0.0367, "step": 145550 }, { "epoch": 1.555211282653988, "grad_norm": 4.080888271331787, "learning_rate": 9.92686447212659e-07, "loss": 0.0279, "step": 145560 }, { "epoch": 1.5553181259682676, "grad_norm": 2.162196397781372, "learning_rate": 9.92683583866056e-07, "loss": 0.0102, "step": 145570 }, { "epoch": 1.555424969282547, "grad_norm": 0.11666379868984222, "learning_rate": 9.92680719963176e-07, "loss": 0.03, "step": 145580 }, { "epoch": 1.5555318125968267, "grad_norm": 1.256453275680542, "learning_rate": 9.926778555040226e-07, "loss": 0.0206, "step": 145590 }, { "epoch": 1.5556386559111064, "grad_norm": 0.04213445261120796, "learning_rate": 9.926749904885982e-07, "loss": 0.0469, "step": 145600 }, { "epoch": 1.5557454992253859, "grad_norm": 2.3147575855255127, "learning_rate": 9.92672124916907e-07, "loss": 0.0221, "step": 145610 }, { "epoch": 1.5558523425396655, "grad_norm": 0.16170720756053925, "learning_rate": 9.926692587889516e-07, "loss": 0.0193, "step": 145620 }, { "epoch": 1.5559591858539452, "grad_norm": 0.5367239117622375, "learning_rate": 9.926663921047353e-07, "loss": 0.0097, "step": 145630 }, { "epoch": 1.5560660291682247, "grad_norm": 0.013346724212169647, "learning_rate": 9.926635248642617e-07, "loss": 0.0206, "step": 145640 }, { "epoch": 1.5561728724825044, "grad_norm": 5.618431568145752, "learning_rate": 9.926606570675337e-07, "loss": 0.0495, "step": 145650 }, { "epoch": 1.556279715796784, "grad_norm": 1.883365511894226, "learning_rate": 9.926577887145547e-07, "loss": 0.04, "step": 145660 }, { "epoch": 1.5563865591110635, "grad_norm": 0.25926756858825684, "learning_rate": 9.926549198053277e-07, "loss": 0.0096, "step": 145670 }, { "epoch": 1.5564934024253434, "grad_norm": 1.8326760530471802, "learning_rate": 9.926520503398563e-07, "loss": 0.0145, "step": 145680 }, { "epoch": 1.5566002457396229, "grad_norm": 2.1707258224487305, "learning_rate": 9.926491803181434e-07, "loss": 0.053, "step": 145690 }, { "epoch": 1.5567070890539023, "grad_norm": 2.8856236934661865, "learning_rate": 9.926463097401924e-07, "loss": 0.0313, "step": 145700 }, { "epoch": 1.5568139323681822, "grad_norm": 4.687126636505127, "learning_rate": 9.926434386060068e-07, "loss": 0.0711, "step": 145710 }, { "epoch": 1.5569207756824617, "grad_norm": 1.1655696630477905, "learning_rate": 9.926405669155893e-07, "loss": 0.0284, "step": 145720 }, { "epoch": 1.5570276189967411, "grad_norm": 0.028292009606957436, "learning_rate": 9.926376946689434e-07, "loss": 0.0417, "step": 145730 }, { "epoch": 1.557134462311021, "grad_norm": 2.7275562286376953, "learning_rate": 9.926348218660726e-07, "loss": 0.037, "step": 145740 }, { "epoch": 1.5572413056253005, "grad_norm": 4.005679130554199, "learning_rate": 9.926319485069798e-07, "loss": 0.0641, "step": 145750 }, { "epoch": 1.55734814893958, "grad_norm": 0.035666197538375854, "learning_rate": 9.926290745916682e-07, "loss": 0.02, "step": 145760 }, { "epoch": 1.5574549922538599, "grad_norm": 9.021828651428223, "learning_rate": 9.926262001201412e-07, "loss": 0.1213, "step": 145770 }, { "epoch": 1.5575618355681393, "grad_norm": 0.14255093038082123, "learning_rate": 9.926233250924022e-07, "loss": 0.0287, "step": 145780 }, { "epoch": 1.5576686788824188, "grad_norm": 3.934053659439087, "learning_rate": 9.926204495084544e-07, "loss": 0.0149, "step": 145790 }, { "epoch": 1.5577755221966987, "grad_norm": 0.8143088221549988, "learning_rate": 9.926175733683007e-07, "loss": 0.1015, "step": 145800 }, { "epoch": 1.5578823655109781, "grad_norm": 0.6638360619544983, "learning_rate": 9.926146966719447e-07, "loss": 0.0609, "step": 145810 }, { "epoch": 1.5579892088252576, "grad_norm": 5.756676197052002, "learning_rate": 9.926118194193893e-07, "loss": 0.0258, "step": 145820 }, { "epoch": 1.5580960521395375, "grad_norm": 0.3738182783126831, "learning_rate": 9.926089416106383e-07, "loss": 0.0338, "step": 145830 }, { "epoch": 1.558202895453817, "grad_norm": 0.9678884744644165, "learning_rate": 9.926060632456945e-07, "loss": 0.0281, "step": 145840 }, { "epoch": 1.5583097387680966, "grad_norm": 6.835799694061279, "learning_rate": 9.926031843245612e-07, "loss": 0.0398, "step": 145850 }, { "epoch": 1.5584165820823763, "grad_norm": 0.07373732328414917, "learning_rate": 9.926003048472418e-07, "loss": 0.0194, "step": 145860 }, { "epoch": 1.5585234253966558, "grad_norm": 6.77156925201416, "learning_rate": 9.925974248137395e-07, "loss": 0.0237, "step": 145870 }, { "epoch": 1.5586302687109355, "grad_norm": 2.038431167602539, "learning_rate": 9.925945442240575e-07, "loss": 0.0494, "step": 145880 }, { "epoch": 1.5587371120252151, "grad_norm": 2.1136624813079834, "learning_rate": 9.92591663078199e-07, "loss": 0.0152, "step": 145890 }, { "epoch": 1.5588439553394946, "grad_norm": 14.979327201843262, "learning_rate": 9.925887813761674e-07, "loss": 0.0471, "step": 145900 }, { "epoch": 1.5589507986537743, "grad_norm": 6.014317512512207, "learning_rate": 9.925858991179659e-07, "loss": 0.0209, "step": 145910 }, { "epoch": 1.559057641968054, "grad_norm": 0.2832156717777252, "learning_rate": 9.925830163035978e-07, "loss": 0.0243, "step": 145920 }, { "epoch": 1.5591644852823334, "grad_norm": 7.844175338745117, "learning_rate": 9.925801329330663e-07, "loss": 0.0163, "step": 145930 }, { "epoch": 1.559271328596613, "grad_norm": 0.5795660614967346, "learning_rate": 9.925772490063745e-07, "loss": 0.0245, "step": 145940 }, { "epoch": 1.5593781719108928, "grad_norm": 0.07032406330108643, "learning_rate": 9.925743645235259e-07, "loss": 0.0085, "step": 145950 }, { "epoch": 1.5594850152251722, "grad_norm": 10.057961463928223, "learning_rate": 9.925714794845236e-07, "loss": 0.0252, "step": 145960 }, { "epoch": 1.559591858539452, "grad_norm": 2.4065539836883545, "learning_rate": 9.925685938893712e-07, "loss": 0.0216, "step": 145970 }, { "epoch": 1.5596987018537316, "grad_norm": 5.39593505859375, "learning_rate": 9.925657077380713e-07, "loss": 0.0532, "step": 145980 }, { "epoch": 1.559805545168011, "grad_norm": 0.00698320847004652, "learning_rate": 9.925628210306278e-07, "loss": 0.0197, "step": 145990 }, { "epoch": 1.5599123884822907, "grad_norm": 0.17981263995170593, "learning_rate": 9.925599337670437e-07, "loss": 0.0185, "step": 146000 }, { "epoch": 1.5600192317965704, "grad_norm": 0.13085681200027466, "learning_rate": 9.92557045947322e-07, "loss": 0.0392, "step": 146010 }, { "epoch": 1.5601260751108499, "grad_norm": 0.05290856584906578, "learning_rate": 9.925541575714665e-07, "loss": 0.0374, "step": 146020 }, { "epoch": 1.5602329184251296, "grad_norm": 0.16727006435394287, "learning_rate": 9.925512686394799e-07, "loss": 0.0364, "step": 146030 }, { "epoch": 1.5603397617394092, "grad_norm": 0.05317038297653198, "learning_rate": 9.92548379151366e-07, "loss": 0.0597, "step": 146040 }, { "epoch": 1.5604466050536887, "grad_norm": 1.6536009311676025, "learning_rate": 9.925454891071276e-07, "loss": 0.0594, "step": 146050 }, { "epoch": 1.5605534483679684, "grad_norm": 1.8571807146072388, "learning_rate": 9.925425985067683e-07, "loss": 0.0137, "step": 146060 }, { "epoch": 1.560660291682248, "grad_norm": 8.426090240478516, "learning_rate": 9.925397073502913e-07, "loss": 0.0537, "step": 146070 }, { "epoch": 1.5607671349965275, "grad_norm": 3.189068555831909, "learning_rate": 9.925368156376996e-07, "loss": 0.0132, "step": 146080 }, { "epoch": 1.5608739783108072, "grad_norm": 3.3276965618133545, "learning_rate": 9.92533923368997e-07, "loss": 0.0149, "step": 146090 }, { "epoch": 1.560980821625087, "grad_norm": 0.004321008920669556, "learning_rate": 9.92531030544186e-07, "loss": 0.013, "step": 146100 }, { "epoch": 1.5610876649393663, "grad_norm": 3.4832491874694824, "learning_rate": 9.925281371632706e-07, "loss": 0.0604, "step": 146110 }, { "epoch": 1.561194508253646, "grad_norm": 3.1093761920928955, "learning_rate": 9.925252432262537e-07, "loss": 0.0053, "step": 146120 }, { "epoch": 1.5613013515679257, "grad_norm": 0.0776713490486145, "learning_rate": 9.925223487331386e-07, "loss": 0.022, "step": 146130 }, { "epoch": 1.5614081948822052, "grad_norm": 0.07912933081388474, "learning_rate": 9.925194536839285e-07, "loss": 0.0109, "step": 146140 }, { "epoch": 1.5615150381964849, "grad_norm": 0.011813807301223278, "learning_rate": 9.925165580786268e-07, "loss": 0.0117, "step": 146150 }, { "epoch": 1.5616218815107645, "grad_norm": 0.013943633064627647, "learning_rate": 9.925136619172369e-07, "loss": 0.0196, "step": 146160 }, { "epoch": 1.561728724825044, "grad_norm": 9.273276329040527, "learning_rate": 9.925107651997617e-07, "loss": 0.0337, "step": 146170 }, { "epoch": 1.5618355681393237, "grad_norm": 3.13879132270813, "learning_rate": 9.92507867926205e-07, "loss": 0.014, "step": 146180 }, { "epoch": 1.5619424114536034, "grad_norm": 7.054308891296387, "learning_rate": 9.925049700965692e-07, "loss": 0.0625, "step": 146190 }, { "epoch": 1.5620492547678828, "grad_norm": 3.7521493434906006, "learning_rate": 9.925020717108584e-07, "loss": 0.0316, "step": 146200 }, { "epoch": 1.5621560980821625, "grad_norm": 0.1851457804441452, "learning_rate": 9.924991727690756e-07, "loss": 0.0133, "step": 146210 }, { "epoch": 1.5622629413964422, "grad_norm": 2.0316433906555176, "learning_rate": 9.924962732712244e-07, "loss": 0.0111, "step": 146220 }, { "epoch": 1.5623697847107216, "grad_norm": 0.04130033776164055, "learning_rate": 9.924933732173074e-07, "loss": 0.0113, "step": 146230 }, { "epoch": 1.5624766280250013, "grad_norm": 0.24779844284057617, "learning_rate": 9.924904726073281e-07, "loss": 0.0176, "step": 146240 }, { "epoch": 1.562583471339281, "grad_norm": 1.5792624950408936, "learning_rate": 9.9248757144129e-07, "loss": 0.0375, "step": 146250 }, { "epoch": 1.5626903146535605, "grad_norm": 8.339667320251465, "learning_rate": 9.924846697191962e-07, "loss": 0.0297, "step": 146260 }, { "epoch": 1.5627971579678401, "grad_norm": 1.9980242252349854, "learning_rate": 9.924817674410501e-07, "loss": 0.0483, "step": 146270 }, { "epoch": 1.5629040012821198, "grad_norm": 0.045252710580825806, "learning_rate": 9.92478864606855e-07, "loss": 0.0173, "step": 146280 }, { "epoch": 1.5630108445963993, "grad_norm": 2.0110597610473633, "learning_rate": 9.92475961216614e-07, "loss": 0.0347, "step": 146290 }, { "epoch": 1.563117687910679, "grad_norm": 0.04185311868786812, "learning_rate": 9.924730572703306e-07, "loss": 0.1314, "step": 146300 }, { "epoch": 1.5632245312249586, "grad_norm": 5.833586692810059, "learning_rate": 9.924701527680077e-07, "loss": 0.0231, "step": 146310 }, { "epoch": 1.563331374539238, "grad_norm": 0.7812703847885132, "learning_rate": 9.92467247709649e-07, "loss": 0.0083, "step": 146320 }, { "epoch": 1.5634382178535178, "grad_norm": 0.019446922466158867, "learning_rate": 9.924643420952576e-07, "loss": 0.0393, "step": 146330 }, { "epoch": 1.5635450611677975, "grad_norm": 3.1070470809936523, "learning_rate": 9.924614359248367e-07, "loss": 0.0282, "step": 146340 }, { "epoch": 1.563651904482077, "grad_norm": 1.1038278341293335, "learning_rate": 9.924585291983898e-07, "loss": 0.0224, "step": 146350 }, { "epoch": 1.5637587477963566, "grad_norm": 15.654301643371582, "learning_rate": 9.9245562191592e-07, "loss": 0.0619, "step": 146360 }, { "epoch": 1.5638655911106363, "grad_norm": 1.89462411403656, "learning_rate": 9.924527140774307e-07, "loss": 0.0679, "step": 146370 }, { "epoch": 1.5639724344249157, "grad_norm": 6.895290851593018, "learning_rate": 9.92449805682925e-07, "loss": 0.0256, "step": 146380 }, { "epoch": 1.5640792777391954, "grad_norm": 9.485819816589355, "learning_rate": 9.924468967324064e-07, "loss": 0.0572, "step": 146390 }, { "epoch": 1.564186121053475, "grad_norm": 2.2214999198913574, "learning_rate": 9.92443987225878e-07, "loss": 0.017, "step": 146400 }, { "epoch": 1.5642929643677546, "grad_norm": 11.967256546020508, "learning_rate": 9.924410771633432e-07, "loss": 0.0956, "step": 146410 }, { "epoch": 1.5643998076820345, "grad_norm": 6.219560146331787, "learning_rate": 9.924381665448052e-07, "loss": 0.033, "step": 146420 }, { "epoch": 1.564506650996314, "grad_norm": 5.544422626495361, "learning_rate": 9.924352553702674e-07, "loss": 0.0565, "step": 146430 }, { "epoch": 1.5646134943105934, "grad_norm": 2.267317295074463, "learning_rate": 9.92432343639733e-07, "loss": 0.0136, "step": 146440 }, { "epoch": 1.5647203376248733, "grad_norm": 0.04283158481121063, "learning_rate": 9.924294313532055e-07, "loss": 0.0148, "step": 146450 }, { "epoch": 1.5648271809391527, "grad_norm": 0.039752297103405, "learning_rate": 9.924265185106879e-07, "loss": 0.015, "step": 146460 }, { "epoch": 1.5649340242534322, "grad_norm": 0.9967426657676697, "learning_rate": 9.924236051121836e-07, "loss": 0.0126, "step": 146470 }, { "epoch": 1.565040867567712, "grad_norm": 0.7661334872245789, "learning_rate": 9.924206911576957e-07, "loss": 0.0163, "step": 146480 }, { "epoch": 1.5651477108819916, "grad_norm": 0.5148835778236389, "learning_rate": 9.92417776647228e-07, "loss": 0.0122, "step": 146490 }, { "epoch": 1.565254554196271, "grad_norm": 1.0505284070968628, "learning_rate": 9.924148615807832e-07, "loss": 0.0407, "step": 146500 }, { "epoch": 1.565361397510551, "grad_norm": 5.516994953155518, "learning_rate": 9.924119459583649e-07, "loss": 0.0362, "step": 146510 }, { "epoch": 1.5654682408248304, "grad_norm": 0.026537064462900162, "learning_rate": 9.924090297799764e-07, "loss": 0.0127, "step": 146520 }, { "epoch": 1.5655750841391098, "grad_norm": 3.1783864498138428, "learning_rate": 9.924061130456208e-07, "loss": 0.0108, "step": 146530 }, { "epoch": 1.5656819274533897, "grad_norm": 5.573016166687012, "learning_rate": 9.924031957553019e-07, "loss": 0.0069, "step": 146540 }, { "epoch": 1.5657887707676692, "grad_norm": 2.046010971069336, "learning_rate": 9.924002779090223e-07, "loss": 0.0704, "step": 146550 }, { "epoch": 1.5658956140819487, "grad_norm": 5.793336868286133, "learning_rate": 9.923973595067858e-07, "loss": 0.0298, "step": 146560 }, { "epoch": 1.5660024573962286, "grad_norm": 0.5077939629554749, "learning_rate": 9.923944405485953e-07, "loss": 0.0358, "step": 146570 }, { "epoch": 1.566109300710508, "grad_norm": 6.214771747589111, "learning_rate": 9.923915210344546e-07, "loss": 0.0323, "step": 146580 }, { "epoch": 1.5662161440247877, "grad_norm": 0.1636570692062378, "learning_rate": 9.923886009643665e-07, "loss": 0.0103, "step": 146590 }, { "epoch": 1.5663229873390674, "grad_norm": 10.982236862182617, "learning_rate": 9.923856803383346e-07, "loss": 0.0298, "step": 146600 }, { "epoch": 1.5664298306533468, "grad_norm": 17.70450782775879, "learning_rate": 9.923827591563621e-07, "loss": 0.033, "step": 146610 }, { "epoch": 1.5665366739676265, "grad_norm": 0.5408967733383179, "learning_rate": 9.923798374184523e-07, "loss": 0.0252, "step": 146620 }, { "epoch": 1.5666435172819062, "grad_norm": 1.0102548599243164, "learning_rate": 9.923769151246083e-07, "loss": 0.0347, "step": 146630 }, { "epoch": 1.5667503605961857, "grad_norm": 6.807988166809082, "learning_rate": 9.92373992274834e-07, "loss": 0.1048, "step": 146640 }, { "epoch": 1.5668572039104653, "grad_norm": 0.6141542196273804, "learning_rate": 9.92371068869132e-07, "loss": 0.0702, "step": 146650 }, { "epoch": 1.566964047224745, "grad_norm": 0.04662588611245155, "learning_rate": 9.923681449075059e-07, "loss": 0.0134, "step": 146660 }, { "epoch": 1.5670708905390245, "grad_norm": 1.2186425924301147, "learning_rate": 9.923652203899592e-07, "loss": 0.0984, "step": 146670 }, { "epoch": 1.5671777338533042, "grad_norm": 0.8884245753288269, "learning_rate": 9.923622953164949e-07, "loss": 0.0217, "step": 146680 }, { "epoch": 1.5672845771675838, "grad_norm": 0.3652268648147583, "learning_rate": 9.923593696871164e-07, "loss": 0.0383, "step": 146690 }, { "epoch": 1.5673914204818633, "grad_norm": 5.585886001586914, "learning_rate": 9.92356443501827e-07, "loss": 0.0204, "step": 146700 }, { "epoch": 1.567498263796143, "grad_norm": 0.8161217570304871, "learning_rate": 9.9235351676063e-07, "loss": 0.1126, "step": 146710 }, { "epoch": 1.5676051071104227, "grad_norm": 0.011991874314844608, "learning_rate": 9.923505894635289e-07, "loss": 0.0691, "step": 146720 }, { "epoch": 1.5677119504247021, "grad_norm": 1.4129376411437988, "learning_rate": 9.923476616105265e-07, "loss": 0.0292, "step": 146730 }, { "epoch": 1.5678187937389818, "grad_norm": 6.975364685058594, "learning_rate": 9.923447332016266e-07, "loss": 0.0463, "step": 146740 }, { "epoch": 1.5679256370532615, "grad_norm": 0.7983303070068359, "learning_rate": 9.923418042368324e-07, "loss": 0.0427, "step": 146750 }, { "epoch": 1.568032480367541, "grad_norm": 0.18195481598377228, "learning_rate": 9.923388747161473e-07, "loss": 0.0077, "step": 146760 }, { "epoch": 1.5681393236818206, "grad_norm": 7.998671531677246, "learning_rate": 9.92335944639574e-07, "loss": 0.0382, "step": 146770 }, { "epoch": 1.5682461669961003, "grad_norm": 2.8265089988708496, "learning_rate": 9.923330140071165e-07, "loss": 0.0456, "step": 146780 }, { "epoch": 1.5683530103103798, "grad_norm": 2.4354822635650635, "learning_rate": 9.92330082818778e-07, "loss": 0.0366, "step": 146790 }, { "epoch": 1.5684598536246595, "grad_norm": 6.49614953994751, "learning_rate": 9.923271510745615e-07, "loss": 0.0334, "step": 146800 }, { "epoch": 1.5685666969389391, "grad_norm": 3.2662792205810547, "learning_rate": 9.923242187744706e-07, "loss": 0.0325, "step": 146810 }, { "epoch": 1.5686735402532186, "grad_norm": 0.01744641549885273, "learning_rate": 9.923212859185084e-07, "loss": 0.0326, "step": 146820 }, { "epoch": 1.5687803835674983, "grad_norm": 0.35283252596855164, "learning_rate": 9.923183525066783e-07, "loss": 0.07, "step": 146830 }, { "epoch": 1.568887226881778, "grad_norm": 0.5324928164482117, "learning_rate": 9.923154185389836e-07, "loss": 0.0102, "step": 146840 }, { "epoch": 1.5689940701960574, "grad_norm": 0.36003929376602173, "learning_rate": 9.923124840154278e-07, "loss": 0.0413, "step": 146850 }, { "epoch": 1.569100913510337, "grad_norm": 0.041753482073545456, "learning_rate": 9.923095489360139e-07, "loss": 0.0231, "step": 146860 }, { "epoch": 1.5692077568246168, "grad_norm": 5.9777021408081055, "learning_rate": 9.923066133007456e-07, "loss": 0.0695, "step": 146870 }, { "epoch": 1.5693146001388962, "grad_norm": 0.6971126198768616, "learning_rate": 9.923036771096257e-07, "loss": 0.0325, "step": 146880 }, { "epoch": 1.569421443453176, "grad_norm": 2.758139133453369, "learning_rate": 9.92300740362658e-07, "loss": 0.0189, "step": 146890 }, { "epoch": 1.5695282867674556, "grad_norm": 5.534396648406982, "learning_rate": 9.922978030598454e-07, "loss": 0.0471, "step": 146900 }, { "epoch": 1.569635130081735, "grad_norm": 0.14845526218414307, "learning_rate": 9.922948652011916e-07, "loss": 0.047, "step": 146910 }, { "epoch": 1.5697419733960147, "grad_norm": 4.254866123199463, "learning_rate": 9.922919267866996e-07, "loss": 0.0079, "step": 146920 }, { "epoch": 1.5698488167102944, "grad_norm": 2.4076709747314453, "learning_rate": 9.922889878163729e-07, "loss": 0.0055, "step": 146930 }, { "epoch": 1.5699556600245739, "grad_norm": 2.975558280944824, "learning_rate": 9.922860482902149e-07, "loss": 0.049, "step": 146940 }, { "epoch": 1.5700625033388536, "grad_norm": 3.7570748329162598, "learning_rate": 9.922831082082286e-07, "loss": 0.033, "step": 146950 }, { "epoch": 1.5701693466531332, "grad_norm": 2.475905656814575, "learning_rate": 9.922801675704176e-07, "loss": 0.0215, "step": 146960 }, { "epoch": 1.5702761899674127, "grad_norm": 5.081421852111816, "learning_rate": 9.922772263767852e-07, "loss": 0.0585, "step": 146970 }, { "epoch": 1.5703830332816924, "grad_norm": 1.3489813804626465, "learning_rate": 9.922742846273347e-07, "loss": 0.0379, "step": 146980 }, { "epoch": 1.570489876595972, "grad_norm": 0.33103781938552856, "learning_rate": 9.922713423220693e-07, "loss": 0.0228, "step": 146990 }, { "epoch": 1.5705967199102515, "grad_norm": 2.0424435138702393, "learning_rate": 9.922683994609922e-07, "loss": 0.0495, "step": 147000 }, { "epoch": 1.5707035632245312, "grad_norm": 14.420366287231445, "learning_rate": 9.922654560441072e-07, "loss": 0.0732, "step": 147010 }, { "epoch": 1.5708104065388109, "grad_norm": 3.6806020736694336, "learning_rate": 9.922625120714171e-07, "loss": 0.0287, "step": 147020 }, { "epoch": 1.5709172498530903, "grad_norm": 4.690311431884766, "learning_rate": 9.922595675429257e-07, "loss": 0.0202, "step": 147030 }, { "epoch": 1.57102409316737, "grad_norm": 6.043107509613037, "learning_rate": 9.922566224586361e-07, "loss": 0.0334, "step": 147040 }, { "epoch": 1.5711309364816497, "grad_norm": 1.5998189449310303, "learning_rate": 9.922536768185516e-07, "loss": 0.0377, "step": 147050 }, { "epoch": 1.5712377797959292, "grad_norm": 0.04982329532504082, "learning_rate": 9.922507306226755e-07, "loss": 0.0491, "step": 147060 }, { "epoch": 1.5713446231102088, "grad_norm": 1.3330349922180176, "learning_rate": 9.92247783871011e-07, "loss": 0.0597, "step": 147070 }, { "epoch": 1.5714514664244885, "grad_norm": 0.15710985660552979, "learning_rate": 9.922448365635617e-07, "loss": 0.0535, "step": 147080 }, { "epoch": 1.571558309738768, "grad_norm": 3.7083959579467773, "learning_rate": 9.922418887003312e-07, "loss": 0.043, "step": 147090 }, { "epoch": 1.5716651530530477, "grad_norm": 1.7387572526931763, "learning_rate": 9.92238940281322e-07, "loss": 0.0147, "step": 147100 }, { "epoch": 1.5717719963673273, "grad_norm": 15.636068344116211, "learning_rate": 9.92235991306538e-07, "loss": 0.0772, "step": 147110 }, { "epoch": 1.5718788396816068, "grad_norm": 2.261282444000244, "learning_rate": 9.922330417759825e-07, "loss": 0.0152, "step": 147120 }, { "epoch": 1.5719856829958865, "grad_norm": 2.7362895011901855, "learning_rate": 9.922300916896587e-07, "loss": 0.0148, "step": 147130 }, { "epoch": 1.5720925263101662, "grad_norm": 0.039276327937841415, "learning_rate": 9.922271410475699e-07, "loss": 0.0261, "step": 147140 }, { "epoch": 1.5721993696244456, "grad_norm": 0.8394185900688171, "learning_rate": 9.922241898497197e-07, "loss": 0.0186, "step": 147150 }, { "epoch": 1.5723062129387255, "grad_norm": 0.45339247584342957, "learning_rate": 9.92221238096111e-07, "loss": 0.0261, "step": 147160 }, { "epoch": 1.572413056253005, "grad_norm": 0.07583380490541458, "learning_rate": 9.922182857867477e-07, "loss": 0.0207, "step": 147170 }, { "epoch": 1.5725198995672844, "grad_norm": 3.9662139415740967, "learning_rate": 9.922153329216325e-07, "loss": 0.0232, "step": 147180 }, { "epoch": 1.5726267428815643, "grad_norm": 3.252091646194458, "learning_rate": 9.922123795007692e-07, "loss": 0.0424, "step": 147190 }, { "epoch": 1.5727335861958438, "grad_norm": 12.218807220458984, "learning_rate": 9.92209425524161e-07, "loss": 0.0311, "step": 147200 }, { "epoch": 1.5728404295101233, "grad_norm": 0.022769473493099213, "learning_rate": 9.92206470991811e-07, "loss": 0.0173, "step": 147210 }, { "epoch": 1.5729472728244032, "grad_norm": 3.7039995193481445, "learning_rate": 9.92203515903723e-07, "loss": 0.0442, "step": 147220 }, { "epoch": 1.5730541161386826, "grad_norm": 2.0677294731140137, "learning_rate": 9.922005602599e-07, "loss": 0.0534, "step": 147230 }, { "epoch": 1.573160959452962, "grad_norm": 10.378974914550781, "learning_rate": 9.921976040603452e-07, "loss": 0.0304, "step": 147240 }, { "epoch": 1.573267802767242, "grad_norm": 0.47906115651130676, "learning_rate": 9.921946473050625e-07, "loss": 0.0076, "step": 147250 }, { "epoch": 1.5733746460815214, "grad_norm": 0.06907887011766434, "learning_rate": 9.921916899940546e-07, "loss": 0.043, "step": 147260 }, { "epoch": 1.573481489395801, "grad_norm": 0.08700314164161682, "learning_rate": 9.921887321273251e-07, "loss": 0.0192, "step": 147270 }, { "epoch": 1.5735883327100808, "grad_norm": 3.649068593978882, "learning_rate": 9.921857737048777e-07, "loss": 0.0072, "step": 147280 }, { "epoch": 1.5736951760243603, "grad_norm": 7.69522762298584, "learning_rate": 9.921828147267152e-07, "loss": 0.0424, "step": 147290 }, { "epoch": 1.5738020193386397, "grad_norm": 7.380520343780518, "learning_rate": 9.921798551928413e-07, "loss": 0.0306, "step": 147300 }, { "epoch": 1.5739088626529196, "grad_norm": 0.060504037886857986, "learning_rate": 9.921768951032592e-07, "loss": 0.0171, "step": 147310 }, { "epoch": 1.574015705967199, "grad_norm": 2.434969663619995, "learning_rate": 9.92173934457972e-07, "loss": 0.0145, "step": 147320 }, { "epoch": 1.5741225492814788, "grad_norm": 0.20719791948795319, "learning_rate": 9.921709732569835e-07, "loss": 0.0451, "step": 147330 }, { "epoch": 1.5742293925957584, "grad_norm": 0.02617601491510868, "learning_rate": 9.921680115002965e-07, "loss": 0.0372, "step": 147340 }, { "epoch": 1.574336235910038, "grad_norm": 0.10707363486289978, "learning_rate": 9.92165049187915e-07, "loss": 0.0281, "step": 147350 }, { "epoch": 1.5744430792243176, "grad_norm": 3.08770751953125, "learning_rate": 9.92162086319842e-07, "loss": 0.0132, "step": 147360 }, { "epoch": 1.5745499225385973, "grad_norm": 0.5047105550765991, "learning_rate": 9.921591228960808e-07, "loss": 0.0303, "step": 147370 }, { "epoch": 1.5746567658528767, "grad_norm": 1.0015332698822021, "learning_rate": 9.92156158916635e-07, "loss": 0.0191, "step": 147380 }, { "epoch": 1.5747636091671564, "grad_norm": 0.01153167150914669, "learning_rate": 9.921531943815073e-07, "loss": 0.0106, "step": 147390 }, { "epoch": 1.574870452481436, "grad_norm": 0.07422538101673126, "learning_rate": 9.92150229290702e-07, "loss": 0.0204, "step": 147400 }, { "epoch": 1.5749772957957155, "grad_norm": 2.142754077911377, "learning_rate": 9.921472636442215e-07, "loss": 0.0223, "step": 147410 }, { "epoch": 1.5750841391099952, "grad_norm": 5.100796222686768, "learning_rate": 9.9214429744207e-07, "loss": 0.0161, "step": 147420 }, { "epoch": 1.575190982424275, "grad_norm": 2.426182508468628, "learning_rate": 9.921413306842502e-07, "loss": 0.0754, "step": 147430 }, { "epoch": 1.5752978257385544, "grad_norm": 3.6570956707000732, "learning_rate": 9.921383633707655e-07, "loss": 0.0152, "step": 147440 }, { "epoch": 1.575404669052834, "grad_norm": 1.0219573974609375, "learning_rate": 9.921353955016196e-07, "loss": 0.0321, "step": 147450 }, { "epoch": 1.5755115123671137, "grad_norm": 0.16866818070411682, "learning_rate": 9.92132427076816e-07, "loss": 0.0485, "step": 147460 }, { "epoch": 1.5756183556813932, "grad_norm": 4.978163719177246, "learning_rate": 9.921294580963575e-07, "loss": 0.0535, "step": 147470 }, { "epoch": 1.5757251989956729, "grad_norm": 0.38670215010643005, "learning_rate": 9.921264885602477e-07, "loss": 0.0087, "step": 147480 }, { "epoch": 1.5758320423099526, "grad_norm": 0.13513495028018951, "learning_rate": 9.921235184684899e-07, "loss": 0.0413, "step": 147490 }, { "epoch": 1.575938885624232, "grad_norm": 8.998374938964844, "learning_rate": 9.921205478210876e-07, "loss": 0.0321, "step": 147500 }, { "epoch": 1.5760457289385117, "grad_norm": 0.7936224341392517, "learning_rate": 9.921175766180442e-07, "loss": 0.0301, "step": 147510 }, { "epoch": 1.5761525722527914, "grad_norm": 0.5853807926177979, "learning_rate": 9.921146048593626e-07, "loss": 0.0583, "step": 147520 }, { "epoch": 1.5762594155670708, "grad_norm": 0.017202917486429214, "learning_rate": 9.921116325450467e-07, "loss": 0.0241, "step": 147530 }, { "epoch": 1.5763662588813505, "grad_norm": 0.05917653813958168, "learning_rate": 9.921086596750994e-07, "loss": 0.0509, "step": 147540 }, { "epoch": 1.5764731021956302, "grad_norm": 1.0360713005065918, "learning_rate": 9.921056862495246e-07, "loss": 0.0082, "step": 147550 }, { "epoch": 1.5765799455099097, "grad_norm": 7.165548324584961, "learning_rate": 9.921027122683249e-07, "loss": 0.0267, "step": 147560 }, { "epoch": 1.5766867888241893, "grad_norm": 0.36265844106674194, "learning_rate": 9.920997377315046e-07, "loss": 0.0023, "step": 147570 }, { "epoch": 1.576793632138469, "grad_norm": 0.03929810971021652, "learning_rate": 9.920967626390661e-07, "loss": 0.0536, "step": 147580 }, { "epoch": 1.5769004754527485, "grad_norm": 1.1201250553131104, "learning_rate": 9.920937869910136e-07, "loss": 0.0374, "step": 147590 }, { "epoch": 1.5770073187670282, "grad_norm": 0.20491598546504974, "learning_rate": 9.920908107873499e-07, "loss": 0.0638, "step": 147600 }, { "epoch": 1.5771141620813078, "grad_norm": 3.411987066268921, "learning_rate": 9.920878340280784e-07, "loss": 0.0112, "step": 147610 }, { "epoch": 1.5772210053955873, "grad_norm": 12.377612113952637, "learning_rate": 9.92084856713203e-07, "loss": 0.0964, "step": 147620 }, { "epoch": 1.577327848709867, "grad_norm": 0.8269989490509033, "learning_rate": 9.920818788427261e-07, "loss": 0.0404, "step": 147630 }, { "epoch": 1.5774346920241467, "grad_norm": 0.0425545871257782, "learning_rate": 9.92078900416652e-07, "loss": 0.0229, "step": 147640 }, { "epoch": 1.5775415353384261, "grad_norm": 2.58581280708313, "learning_rate": 9.920759214349836e-07, "loss": 0.0316, "step": 147650 }, { "epoch": 1.5776483786527058, "grad_norm": 2.6355438232421875, "learning_rate": 9.920729418977245e-07, "loss": 0.0153, "step": 147660 }, { "epoch": 1.5777552219669855, "grad_norm": 0.5437690615653992, "learning_rate": 9.920699618048776e-07, "loss": 0.0285, "step": 147670 }, { "epoch": 1.577862065281265, "grad_norm": 4.056730270385742, "learning_rate": 9.920669811564467e-07, "loss": 0.0224, "step": 147680 }, { "epoch": 1.5779689085955446, "grad_norm": 0.019561875611543655, "learning_rate": 9.920639999524351e-07, "loss": 0.0323, "step": 147690 }, { "epoch": 1.5780757519098243, "grad_norm": 6.089354038238525, "learning_rate": 9.920610181928462e-07, "loss": 0.0948, "step": 147700 }, { "epoch": 1.5781825952241038, "grad_norm": 1.6707442998886108, "learning_rate": 9.920580358776832e-07, "loss": 0.0233, "step": 147710 }, { "epoch": 1.5782894385383834, "grad_norm": 0.20666737854480743, "learning_rate": 9.920550530069494e-07, "loss": 0.0207, "step": 147720 }, { "epoch": 1.5783962818526631, "grad_norm": 6.044789791107178, "learning_rate": 9.920520695806484e-07, "loss": 0.0605, "step": 147730 }, { "epoch": 1.5785031251669426, "grad_norm": 2.4188742637634277, "learning_rate": 9.920490855987833e-07, "loss": 0.0311, "step": 147740 }, { "epoch": 1.5786099684812223, "grad_norm": 11.119754791259766, "learning_rate": 9.920461010613578e-07, "loss": 0.0574, "step": 147750 }, { "epoch": 1.578716811795502, "grad_norm": 0.7321897149085999, "learning_rate": 9.920431159683753e-07, "loss": 0.0173, "step": 147760 }, { "epoch": 1.5788236551097814, "grad_norm": 4.446265697479248, "learning_rate": 9.920401303198389e-07, "loss": 0.0175, "step": 147770 }, { "epoch": 1.578930498424061, "grad_norm": 0.23173877596855164, "learning_rate": 9.920371441157518e-07, "loss": 0.0118, "step": 147780 }, { "epoch": 1.5790373417383408, "grad_norm": 10.820001602172852, "learning_rate": 9.920341573561178e-07, "loss": 0.0172, "step": 147790 }, { "epoch": 1.5791441850526202, "grad_norm": 1.0316132307052612, "learning_rate": 9.920311700409403e-07, "loss": 0.0114, "step": 147800 }, { "epoch": 1.5792510283669, "grad_norm": 0.026504380628466606, "learning_rate": 9.92028182170222e-07, "loss": 0.0175, "step": 147810 }, { "epoch": 1.5793578716811796, "grad_norm": 0.027395740151405334, "learning_rate": 9.920251937439673e-07, "loss": 0.0189, "step": 147820 }, { "epoch": 1.579464714995459, "grad_norm": 7.374527931213379, "learning_rate": 9.920222047621787e-07, "loss": 0.113, "step": 147830 }, { "epoch": 1.5795715583097387, "grad_norm": 1.831760287284851, "learning_rate": 9.920192152248598e-07, "loss": 0.0711, "step": 147840 }, { "epoch": 1.5796784016240184, "grad_norm": 0.23719142377376556, "learning_rate": 9.920162251320144e-07, "loss": 0.0329, "step": 147850 }, { "epoch": 1.5797852449382979, "grad_norm": 0.07582791149616241, "learning_rate": 9.920132344836452e-07, "loss": 0.0116, "step": 147860 }, { "epoch": 1.5798920882525775, "grad_norm": 0.2575545310974121, "learning_rate": 9.920102432797561e-07, "loss": 0.0324, "step": 147870 }, { "epoch": 1.5799989315668572, "grad_norm": 0.0954250916838646, "learning_rate": 9.920072515203503e-07, "loss": 0.0235, "step": 147880 }, { "epoch": 1.5801057748811367, "grad_norm": 1.233786940574646, "learning_rate": 9.920042592054312e-07, "loss": 0.0229, "step": 147890 }, { "epoch": 1.5802126181954166, "grad_norm": 8.713608741760254, "learning_rate": 9.92001266335002e-07, "loss": 0.0311, "step": 147900 }, { "epoch": 1.580319461509696, "grad_norm": 8.1889066696167, "learning_rate": 9.919982729090665e-07, "loss": 0.0353, "step": 147910 }, { "epoch": 1.5804263048239755, "grad_norm": 3.8609297275543213, "learning_rate": 9.919952789276275e-07, "loss": 0.0128, "step": 147920 }, { "epoch": 1.5805331481382554, "grad_norm": 10.942951202392578, "learning_rate": 9.91992284390689e-07, "loss": 0.0395, "step": 147930 }, { "epoch": 1.5806399914525349, "grad_norm": 12.671600341796875, "learning_rate": 9.91989289298254e-07, "loss": 0.0726, "step": 147940 }, { "epoch": 1.5807468347668143, "grad_norm": 0.12569579482078552, "learning_rate": 9.91986293650326e-07, "loss": 0.0138, "step": 147950 }, { "epoch": 1.5808536780810942, "grad_norm": 8.33714771270752, "learning_rate": 9.919832974469082e-07, "loss": 0.066, "step": 147960 }, { "epoch": 1.5809605213953737, "grad_norm": 1.387265682220459, "learning_rate": 9.91980300688004e-07, "loss": 0.021, "step": 147970 }, { "epoch": 1.5810673647096531, "grad_norm": 3.9091708660125732, "learning_rate": 9.919773033736173e-07, "loss": 0.036, "step": 147980 }, { "epoch": 1.581174208023933, "grad_norm": 2.145604372024536, "learning_rate": 9.919743055037509e-07, "loss": 0.0282, "step": 147990 }, { "epoch": 1.5812810513382125, "grad_norm": 4.524653911590576, "learning_rate": 9.919713070784083e-07, "loss": 0.0858, "step": 148000 }, { "epoch": 1.581387894652492, "grad_norm": 3.830331563949585, "learning_rate": 9.91968308097593e-07, "loss": 0.0233, "step": 148010 }, { "epoch": 1.5814947379667719, "grad_norm": 3.2996809482574463, "learning_rate": 9.919653085613085e-07, "loss": 0.0111, "step": 148020 }, { "epoch": 1.5816015812810513, "grad_norm": 0.6831121444702148, "learning_rate": 9.919623084695579e-07, "loss": 0.008, "step": 148030 }, { "epoch": 1.5817084245953308, "grad_norm": 0.05121491104364395, "learning_rate": 9.919593078223448e-07, "loss": 0.0302, "step": 148040 }, { "epoch": 1.5818152679096107, "grad_norm": 14.557705879211426, "learning_rate": 9.919563066196722e-07, "loss": 0.0377, "step": 148050 }, { "epoch": 1.5819221112238901, "grad_norm": 3.2329320907592773, "learning_rate": 9.919533048615441e-07, "loss": 0.0474, "step": 148060 }, { "epoch": 1.5820289545381698, "grad_norm": 2.7791733741760254, "learning_rate": 9.919503025479637e-07, "loss": 0.0913, "step": 148070 }, { "epoch": 1.5821357978524495, "grad_norm": 0.12135223299264908, "learning_rate": 9.919472996789339e-07, "loss": 0.0475, "step": 148080 }, { "epoch": 1.582242641166729, "grad_norm": 0.4902363121509552, "learning_rate": 9.919442962544588e-07, "loss": 0.0379, "step": 148090 }, { "epoch": 1.5823494844810087, "grad_norm": 0.26261961460113525, "learning_rate": 9.919412922745414e-07, "loss": 0.0117, "step": 148100 }, { "epoch": 1.5824563277952883, "grad_norm": 0.026199597865343094, "learning_rate": 9.919382877391852e-07, "loss": 0.0314, "step": 148110 }, { "epoch": 1.5825631711095678, "grad_norm": 0.021035093814134598, "learning_rate": 9.919352826483934e-07, "loss": 0.0308, "step": 148120 }, { "epoch": 1.5826700144238475, "grad_norm": 0.6248438358306885, "learning_rate": 9.919322770021695e-07, "loss": 0.0394, "step": 148130 }, { "epoch": 1.5827768577381272, "grad_norm": 1.5964550971984863, "learning_rate": 9.91929270800517e-07, "loss": 0.0409, "step": 148140 }, { "epoch": 1.5828837010524066, "grad_norm": 0.7970556616783142, "learning_rate": 9.919262640434392e-07, "loss": 0.0165, "step": 148150 }, { "epoch": 1.5829905443666863, "grad_norm": 3.2663748264312744, "learning_rate": 9.919232567309397e-07, "loss": 0.0563, "step": 148160 }, { "epoch": 1.583097387680966, "grad_norm": 0.02173524536192417, "learning_rate": 9.919202488630215e-07, "loss": 0.0148, "step": 148170 }, { "epoch": 1.5832042309952454, "grad_norm": 10.803725242614746, "learning_rate": 9.919172404396883e-07, "loss": 0.0693, "step": 148180 }, { "epoch": 1.5833110743095251, "grad_norm": 0.23715855181217194, "learning_rate": 9.919142314609435e-07, "loss": 0.0049, "step": 148190 }, { "epoch": 1.5834179176238048, "grad_norm": 2.3136885166168213, "learning_rate": 9.919112219267901e-07, "loss": 0.0259, "step": 148200 }, { "epoch": 1.5835247609380843, "grad_norm": 7.136147499084473, "learning_rate": 9.919082118372322e-07, "loss": 0.0353, "step": 148210 }, { "epoch": 1.583631604252364, "grad_norm": 0.15755954384803772, "learning_rate": 9.919052011922726e-07, "loss": 0.0493, "step": 148220 }, { "epoch": 1.5837384475666436, "grad_norm": 0.27754664421081543, "learning_rate": 9.91902189991915e-07, "loss": 0.0298, "step": 148230 }, { "epoch": 1.583845290880923, "grad_norm": 0.0897134467959404, "learning_rate": 9.918991782361625e-07, "loss": 0.0107, "step": 148240 }, { "epoch": 1.5839521341952028, "grad_norm": 0.07546324282884598, "learning_rate": 9.918961659250188e-07, "loss": 0.0642, "step": 148250 }, { "epoch": 1.5840589775094824, "grad_norm": 1.5046075582504272, "learning_rate": 9.918931530584872e-07, "loss": 0.0425, "step": 148260 }, { "epoch": 1.584165820823762, "grad_norm": 0.08447720855474472, "learning_rate": 9.91890139636571e-07, "loss": 0.0292, "step": 148270 }, { "epoch": 1.5842726641380416, "grad_norm": 3.2815802097320557, "learning_rate": 9.91887125659274e-07, "loss": 0.0636, "step": 148280 }, { "epoch": 1.5843795074523213, "grad_norm": 0.2797345221042633, "learning_rate": 9.918841111265993e-07, "loss": 0.0386, "step": 148290 }, { "epoch": 1.5844863507666007, "grad_norm": 1.6713608503341675, "learning_rate": 9.9188109603855e-07, "loss": 0.0275, "step": 148300 }, { "epoch": 1.5845931940808804, "grad_norm": 0.16270756721496582, "learning_rate": 9.9187808039513e-07, "loss": 0.0264, "step": 148310 }, { "epoch": 1.58470003739516, "grad_norm": 0.028119202703237534, "learning_rate": 9.918750641963424e-07, "loss": 0.0325, "step": 148320 }, { "epoch": 1.5848068807094395, "grad_norm": 6.931946277618408, "learning_rate": 9.91872047442191e-07, "loss": 0.0273, "step": 148330 }, { "epoch": 1.5849137240237192, "grad_norm": 0.01523189339786768, "learning_rate": 9.918690301326787e-07, "loss": 0.018, "step": 148340 }, { "epoch": 1.585020567337999, "grad_norm": 4.869523525238037, "learning_rate": 9.91866012267809e-07, "loss": 0.0656, "step": 148350 }, { "epoch": 1.5851274106522784, "grad_norm": 0.6270352602005005, "learning_rate": 9.91862993847586e-07, "loss": 0.0412, "step": 148360 }, { "epoch": 1.585234253966558, "grad_norm": 0.025111686438322067, "learning_rate": 9.91859974872012e-07, "loss": 0.0198, "step": 148370 }, { "epoch": 1.5853410972808377, "grad_norm": 11.123578071594238, "learning_rate": 9.918569553410913e-07, "loss": 0.0483, "step": 148380 }, { "epoch": 1.5854479405951172, "grad_norm": 0.020567012950778008, "learning_rate": 9.918539352548267e-07, "loss": 0.0248, "step": 148390 }, { "epoch": 1.5855547839093969, "grad_norm": 7.579651355743408, "learning_rate": 9.918509146132222e-07, "loss": 0.0614, "step": 148400 }, { "epoch": 1.5856616272236765, "grad_norm": 0.028879769146442413, "learning_rate": 9.918478934162807e-07, "loss": 0.0342, "step": 148410 }, { "epoch": 1.585768470537956, "grad_norm": 6.955966949462891, "learning_rate": 9.918448716640059e-07, "loss": 0.0686, "step": 148420 }, { "epoch": 1.5858753138522357, "grad_norm": 7.916889190673828, "learning_rate": 9.91841849356401e-07, "loss": 0.0262, "step": 148430 }, { "epoch": 1.5859821571665154, "grad_norm": 10.931445121765137, "learning_rate": 9.918388264934695e-07, "loss": 0.0193, "step": 148440 }, { "epoch": 1.5860890004807948, "grad_norm": 0.22212763130664825, "learning_rate": 9.91835803075215e-07, "loss": 0.0469, "step": 148450 }, { "epoch": 1.5861958437950745, "grad_norm": 3.6164538860321045, "learning_rate": 9.918327791016408e-07, "loss": 0.0282, "step": 148460 }, { "epoch": 1.5863026871093542, "grad_norm": 2.416409492492676, "learning_rate": 9.9182975457275e-07, "loss": 0.1392, "step": 148470 }, { "epoch": 1.5864095304236336, "grad_norm": 0.5119140148162842, "learning_rate": 9.918267294885464e-07, "loss": 0.0385, "step": 148480 }, { "epoch": 1.5865163737379133, "grad_norm": 0.16319571435451508, "learning_rate": 9.918237038490335e-07, "loss": 0.0213, "step": 148490 }, { "epoch": 1.586623217052193, "grad_norm": 1.2484925985336304, "learning_rate": 9.918206776542145e-07, "loss": 0.0162, "step": 148500 }, { "epoch": 1.5867300603664725, "grad_norm": 3.4389915466308594, "learning_rate": 9.918176509040927e-07, "loss": 0.033, "step": 148510 }, { "epoch": 1.5868369036807521, "grad_norm": 7.704201698303223, "learning_rate": 9.918146235986714e-07, "loss": 0.0207, "step": 148520 }, { "epoch": 1.5869437469950318, "grad_norm": 0.9147342443466187, "learning_rate": 9.918115957379548e-07, "loss": 0.0082, "step": 148530 }, { "epoch": 1.5870505903093113, "grad_norm": 0.01542086061090231, "learning_rate": 9.918085673219454e-07, "loss": 0.0232, "step": 148540 }, { "epoch": 1.587157433623591, "grad_norm": 0.015253104269504547, "learning_rate": 9.918055383506472e-07, "loss": 0.0443, "step": 148550 }, { "epoch": 1.5872642769378706, "grad_norm": 5.9453229904174805, "learning_rate": 9.918025088240633e-07, "loss": 0.1095, "step": 148560 }, { "epoch": 1.58737112025215, "grad_norm": 0.028426680713891983, "learning_rate": 9.917994787421974e-07, "loss": 0.0293, "step": 148570 }, { "epoch": 1.5874779635664298, "grad_norm": 0.14602911472320557, "learning_rate": 9.917964481050526e-07, "loss": 0.0292, "step": 148580 }, { "epoch": 1.5875848068807095, "grad_norm": 11.984016418457031, "learning_rate": 9.917934169126327e-07, "loss": 0.0715, "step": 148590 }, { "epoch": 1.587691650194989, "grad_norm": 1.0821607112884521, "learning_rate": 9.917903851649407e-07, "loss": 0.0114, "step": 148600 }, { "epoch": 1.5877984935092686, "grad_norm": 14.855971336364746, "learning_rate": 9.917873528619804e-07, "loss": 0.0667, "step": 148610 }, { "epoch": 1.5879053368235483, "grad_norm": 2.1342122554779053, "learning_rate": 9.91784320003755e-07, "loss": 0.0127, "step": 148620 }, { "epoch": 1.5880121801378277, "grad_norm": 1.1024610996246338, "learning_rate": 9.917812865902679e-07, "loss": 0.0335, "step": 148630 }, { "epoch": 1.5881190234521076, "grad_norm": 0.044229794293642044, "learning_rate": 9.917782526215228e-07, "loss": 0.0116, "step": 148640 }, { "epoch": 1.588225866766387, "grad_norm": 8.322148323059082, "learning_rate": 9.917752180975228e-07, "loss": 0.0382, "step": 148650 }, { "epoch": 1.5883327100806666, "grad_norm": 7.545938014984131, "learning_rate": 9.917721830182715e-07, "loss": 0.0647, "step": 148660 }, { "epoch": 1.5884395533949465, "grad_norm": 4.318179607391357, "learning_rate": 9.917691473837722e-07, "loss": 0.0328, "step": 148670 }, { "epoch": 1.588546396709226, "grad_norm": 0.037579238414764404, "learning_rate": 9.917661111940285e-07, "loss": 0.0325, "step": 148680 }, { "epoch": 1.5886532400235054, "grad_norm": 1.6378116607666016, "learning_rate": 9.917630744490438e-07, "loss": 0.0244, "step": 148690 }, { "epoch": 1.5887600833377853, "grad_norm": 3.667511463165283, "learning_rate": 9.917600371488213e-07, "loss": 0.0129, "step": 148700 }, { "epoch": 1.5888669266520647, "grad_norm": 0.37006646394729614, "learning_rate": 9.917569992933647e-07, "loss": 0.0061, "step": 148710 }, { "epoch": 1.5889737699663442, "grad_norm": 7.674435138702393, "learning_rate": 9.917539608826774e-07, "loss": 0.078, "step": 148720 }, { "epoch": 1.589080613280624, "grad_norm": 2.0350515842437744, "learning_rate": 9.917509219167627e-07, "loss": 0.0328, "step": 148730 }, { "epoch": 1.5891874565949036, "grad_norm": 0.6925379037857056, "learning_rate": 9.917478823956243e-07, "loss": 0.0401, "step": 148740 }, { "epoch": 1.589294299909183, "grad_norm": 0.08349823206663132, "learning_rate": 9.917448423192651e-07, "loss": 0.0667, "step": 148750 }, { "epoch": 1.589401143223463, "grad_norm": 2.556360960006714, "learning_rate": 9.91741801687689e-07, "loss": 0.0244, "step": 148760 }, { "epoch": 1.5895079865377424, "grad_norm": 0.026683278381824493, "learning_rate": 9.917387605008992e-07, "loss": 0.0557, "step": 148770 }, { "epoch": 1.5896148298520218, "grad_norm": 0.08604319393634796, "learning_rate": 9.917357187588993e-07, "loss": 0.0309, "step": 148780 }, { "epoch": 1.5897216731663018, "grad_norm": 0.08056191354990005, "learning_rate": 9.917326764616928e-07, "loss": 0.0251, "step": 148790 }, { "epoch": 1.5898285164805812, "grad_norm": 7.293824195861816, "learning_rate": 9.917296336092829e-07, "loss": 0.0237, "step": 148800 }, { "epoch": 1.589935359794861, "grad_norm": 5.985010147094727, "learning_rate": 9.917265902016731e-07, "loss": 0.0652, "step": 148810 }, { "epoch": 1.5900422031091406, "grad_norm": 7.001245975494385, "learning_rate": 9.91723546238867e-07, "loss": 0.0716, "step": 148820 }, { "epoch": 1.59014904642342, "grad_norm": 2.50199818611145, "learning_rate": 9.917205017208677e-07, "loss": 0.02, "step": 148830 }, { "epoch": 1.5902558897376997, "grad_norm": 5.928789138793945, "learning_rate": 9.917174566476788e-07, "loss": 0.0256, "step": 148840 }, { "epoch": 1.5903627330519794, "grad_norm": 16.602832794189453, "learning_rate": 9.91714411019304e-07, "loss": 0.069, "step": 148850 }, { "epoch": 1.5904695763662589, "grad_norm": 4.927087783813477, "learning_rate": 9.917113648357463e-07, "loss": 0.0224, "step": 148860 }, { "epoch": 1.5905764196805385, "grad_norm": 0.4255412518978119, "learning_rate": 9.917083180970095e-07, "loss": 0.0024, "step": 148870 }, { "epoch": 1.5906832629948182, "grad_norm": 0.13859771192073822, "learning_rate": 9.91705270803097e-07, "loss": 0.0315, "step": 148880 }, { "epoch": 1.5907901063090977, "grad_norm": 14.108102798461914, "learning_rate": 9.91702222954012e-07, "loss": 0.0563, "step": 148890 }, { "epoch": 1.5908969496233774, "grad_norm": 7.0434393882751465, "learning_rate": 9.91699174549758e-07, "loss": 0.056, "step": 148900 }, { "epoch": 1.591003792937657, "grad_norm": 0.18939544260501862, "learning_rate": 9.916961255903388e-07, "loss": 0.0223, "step": 148910 }, { "epoch": 1.5911106362519365, "grad_norm": 0.7191645503044128, "learning_rate": 9.916930760757572e-07, "loss": 0.0196, "step": 148920 }, { "epoch": 1.5912174795662162, "grad_norm": 5.127035617828369, "learning_rate": 9.916900260060171e-07, "loss": 0.0193, "step": 148930 }, { "epoch": 1.5913243228804959, "grad_norm": 1.4743245840072632, "learning_rate": 9.916869753811219e-07, "loss": 0.1292, "step": 148940 }, { "epoch": 1.5914311661947753, "grad_norm": 2.988924503326416, "learning_rate": 9.916839242010751e-07, "loss": 0.0354, "step": 148950 }, { "epoch": 1.591538009509055, "grad_norm": 5.476910591125488, "learning_rate": 9.9168087246588e-07, "loss": 0.0293, "step": 148960 }, { "epoch": 1.5916448528233347, "grad_norm": 0.03850386291742325, "learning_rate": 9.9167782017554e-07, "loss": 0.0098, "step": 148970 }, { "epoch": 1.5917516961376141, "grad_norm": 0.10943114757537842, "learning_rate": 9.916747673300587e-07, "loss": 0.0331, "step": 148980 }, { "epoch": 1.5918585394518938, "grad_norm": 1.5573331117630005, "learning_rate": 9.916717139294395e-07, "loss": 0.0223, "step": 148990 }, { "epoch": 1.5919653827661735, "grad_norm": 2.384730815887451, "learning_rate": 9.916686599736859e-07, "loss": 0.014, "step": 149000 }, { "epoch": 1.592072226080453, "grad_norm": 2.840841293334961, "learning_rate": 9.91665605462801e-07, "loss": 0.0258, "step": 149010 }, { "epoch": 1.5921790693947326, "grad_norm": 3.664297580718994, "learning_rate": 9.916625503967888e-07, "loss": 0.0129, "step": 149020 }, { "epoch": 1.5922859127090123, "grad_norm": 0.8855782747268677, "learning_rate": 9.916594947756522e-07, "loss": 0.0575, "step": 149030 }, { "epoch": 1.5923927560232918, "grad_norm": 2.327712059020996, "learning_rate": 9.916564385993952e-07, "loss": 0.0108, "step": 149040 }, { "epoch": 1.5924995993375715, "grad_norm": 7.683224678039551, "learning_rate": 9.916533818680209e-07, "loss": 0.0199, "step": 149050 }, { "epoch": 1.5926064426518511, "grad_norm": 2.310070037841797, "learning_rate": 9.916503245815328e-07, "loss": 0.0077, "step": 149060 }, { "epoch": 1.5927132859661306, "grad_norm": 10.26915454864502, "learning_rate": 9.916472667399342e-07, "loss": 0.0369, "step": 149070 }, { "epoch": 1.5928201292804103, "grad_norm": 3.712858200073242, "learning_rate": 9.91644208343229e-07, "loss": 0.0104, "step": 149080 }, { "epoch": 1.59292697259469, "grad_norm": 1.4248900413513184, "learning_rate": 9.916411493914203e-07, "loss": 0.0142, "step": 149090 }, { "epoch": 1.5930338159089694, "grad_norm": 1.8306922912597656, "learning_rate": 9.916380898845116e-07, "loss": 0.0268, "step": 149100 }, { "epoch": 1.593140659223249, "grad_norm": 12.569517135620117, "learning_rate": 9.916350298225066e-07, "loss": 0.0382, "step": 149110 }, { "epoch": 1.5932475025375288, "grad_norm": 1.4169169664382935, "learning_rate": 9.916319692054082e-07, "loss": 0.0387, "step": 149120 }, { "epoch": 1.5933543458518082, "grad_norm": 0.37290406227111816, "learning_rate": 9.916289080332202e-07, "loss": 0.0187, "step": 149130 }, { "epoch": 1.593461189166088, "grad_norm": 1.6128344535827637, "learning_rate": 9.916258463059463e-07, "loss": 0.0172, "step": 149140 }, { "epoch": 1.5935680324803676, "grad_norm": 3.9292895793914795, "learning_rate": 9.916227840235895e-07, "loss": 0.0212, "step": 149150 }, { "epoch": 1.593674875794647, "grad_norm": 4.658916473388672, "learning_rate": 9.916197211861537e-07, "loss": 0.0177, "step": 149160 }, { "epoch": 1.5937817191089267, "grad_norm": 5.121861934661865, "learning_rate": 9.91616657793642e-07, "loss": 0.0204, "step": 149170 }, { "epoch": 1.5938885624232064, "grad_norm": 0.02329440601170063, "learning_rate": 9.91613593846058e-07, "loss": 0.0521, "step": 149180 }, { "epoch": 1.5939954057374859, "grad_norm": 1.258107304573059, "learning_rate": 9.91610529343405e-07, "loss": 0.0506, "step": 149190 }, { "epoch": 1.5941022490517656, "grad_norm": 6.891469478607178, "learning_rate": 9.91607464285687e-07, "loss": 0.0511, "step": 149200 }, { "epoch": 1.5942090923660452, "grad_norm": 0.07964323461055756, "learning_rate": 9.916043986729067e-07, "loss": 0.0148, "step": 149210 }, { "epoch": 1.5943159356803247, "grad_norm": 1.5400316715240479, "learning_rate": 9.916013325050683e-07, "loss": 0.0506, "step": 149220 }, { "epoch": 1.5944227789946044, "grad_norm": 0.7415817379951477, "learning_rate": 9.915982657821747e-07, "loss": 0.0211, "step": 149230 }, { "epoch": 1.594529622308884, "grad_norm": 5.3319010734558105, "learning_rate": 9.915951985042295e-07, "loss": 0.0315, "step": 149240 }, { "epoch": 1.5946364656231635, "grad_norm": 0.41459986567497253, "learning_rate": 9.915921306712363e-07, "loss": 0.0559, "step": 149250 }, { "epoch": 1.5947433089374432, "grad_norm": 7.279567718505859, "learning_rate": 9.915890622831984e-07, "loss": 0.0905, "step": 149260 }, { "epoch": 1.5948501522517229, "grad_norm": 2.2485547065734863, "learning_rate": 9.915859933401194e-07, "loss": 0.02, "step": 149270 }, { "epoch": 1.5949569955660023, "grad_norm": 3.564746379852295, "learning_rate": 9.915829238420026e-07, "loss": 0.0144, "step": 149280 }, { "epoch": 1.595063838880282, "grad_norm": 0.9445164799690247, "learning_rate": 9.915798537888516e-07, "loss": 0.042, "step": 149290 }, { "epoch": 1.5951706821945617, "grad_norm": 5.9450364112854, "learning_rate": 9.9157678318067e-07, "loss": 0.0347, "step": 149300 }, { "epoch": 1.5952775255088412, "grad_norm": 0.43974438309669495, "learning_rate": 9.91573712017461e-07, "loss": 0.0116, "step": 149310 }, { "epoch": 1.5953843688231208, "grad_norm": 3.5340280532836914, "learning_rate": 9.915706402992281e-07, "loss": 0.0399, "step": 149320 }, { "epoch": 1.5954912121374005, "grad_norm": 10.100687980651855, "learning_rate": 9.91567568025975e-07, "loss": 0.0516, "step": 149330 }, { "epoch": 1.59559805545168, "grad_norm": 6.765686511993408, "learning_rate": 9.91564495197705e-07, "loss": 0.041, "step": 149340 }, { "epoch": 1.5957048987659597, "grad_norm": 0.004682799801230431, "learning_rate": 9.915614218144214e-07, "loss": 0.0278, "step": 149350 }, { "epoch": 1.5958117420802393, "grad_norm": 7.6270647048950195, "learning_rate": 9.91558347876128e-07, "loss": 0.0332, "step": 149360 }, { "epoch": 1.5959185853945188, "grad_norm": 3.06925892829895, "learning_rate": 9.915552733828282e-07, "loss": 0.0381, "step": 149370 }, { "epoch": 1.5960254287087987, "grad_norm": 0.11798907071352005, "learning_rate": 9.915521983345253e-07, "loss": 0.0295, "step": 149380 }, { "epoch": 1.5961322720230782, "grad_norm": 0.008750513195991516, "learning_rate": 9.915491227312228e-07, "loss": 0.0582, "step": 149390 }, { "epoch": 1.5962391153373576, "grad_norm": 6.588997840881348, "learning_rate": 9.915460465729243e-07, "loss": 0.0229, "step": 149400 }, { "epoch": 1.5963459586516375, "grad_norm": 0.9626349806785583, "learning_rate": 9.915429698596333e-07, "loss": 0.0377, "step": 149410 }, { "epoch": 1.596452801965917, "grad_norm": 0.566307008266449, "learning_rate": 9.915398925913531e-07, "loss": 0.0221, "step": 149420 }, { "epoch": 1.5965596452801964, "grad_norm": 0.301250159740448, "learning_rate": 9.915368147680874e-07, "loss": 0.0048, "step": 149430 }, { "epoch": 1.5966664885944764, "grad_norm": 0.30881401896476746, "learning_rate": 9.915337363898394e-07, "loss": 0.016, "step": 149440 }, { "epoch": 1.5967733319087558, "grad_norm": 0.24285639822483063, "learning_rate": 9.915306574566127e-07, "loss": 0.0329, "step": 149450 }, { "epoch": 1.5968801752230353, "grad_norm": 0.07156926393508911, "learning_rate": 9.91527577968411e-07, "loss": 0.0146, "step": 149460 }, { "epoch": 1.5969870185373152, "grad_norm": 0.3857899010181427, "learning_rate": 9.915244979252374e-07, "loss": 0.0083, "step": 149470 }, { "epoch": 1.5970938618515946, "grad_norm": 0.0264129601418972, "learning_rate": 9.915214173270954e-07, "loss": 0.0426, "step": 149480 }, { "epoch": 1.597200705165874, "grad_norm": 11.77298641204834, "learning_rate": 9.915183361739888e-07, "loss": 0.0291, "step": 149490 }, { "epoch": 1.597307548480154, "grad_norm": 0.84641432762146, "learning_rate": 9.915152544659209e-07, "loss": 0.0338, "step": 149500 }, { "epoch": 1.5974143917944335, "grad_norm": 3.417356014251709, "learning_rate": 9.915121722028952e-07, "loss": 0.0826, "step": 149510 }, { "epoch": 1.597521235108713, "grad_norm": 1.5102465152740479, "learning_rate": 9.91509089384915e-07, "loss": 0.0576, "step": 149520 }, { "epoch": 1.5976280784229928, "grad_norm": 1.9180630445480347, "learning_rate": 9.915060060119842e-07, "loss": 0.039, "step": 149530 }, { "epoch": 1.5977349217372723, "grad_norm": 3.6866037845611572, "learning_rate": 9.915029220841059e-07, "loss": 0.0296, "step": 149540 }, { "epoch": 1.597841765051552, "grad_norm": 2.9974417686462402, "learning_rate": 9.914998376012836e-07, "loss": 0.0303, "step": 149550 }, { "epoch": 1.5979486083658316, "grad_norm": 0.008989796973764896, "learning_rate": 9.914967525635209e-07, "loss": 0.0331, "step": 149560 }, { "epoch": 1.598055451680111, "grad_norm": 10.54909610748291, "learning_rate": 9.914936669708214e-07, "loss": 0.0357, "step": 149570 }, { "epoch": 1.5981622949943908, "grad_norm": 0.5252285599708557, "learning_rate": 9.914905808231884e-07, "loss": 0.094, "step": 149580 }, { "epoch": 1.5982691383086705, "grad_norm": 0.7774367928504944, "learning_rate": 9.914874941206254e-07, "loss": 0.0081, "step": 149590 }, { "epoch": 1.59837598162295, "grad_norm": 0.044520072638988495, "learning_rate": 9.91484406863136e-07, "loss": 0.0192, "step": 149600 }, { "epoch": 1.5984828249372296, "grad_norm": 6.671828269958496, "learning_rate": 9.914813190507236e-07, "loss": 0.0639, "step": 149610 }, { "epoch": 1.5985896682515093, "grad_norm": 0.01858709380030632, "learning_rate": 9.914782306833917e-07, "loss": 0.029, "step": 149620 }, { "epoch": 1.5986965115657887, "grad_norm": 2.681474208831787, "learning_rate": 9.914751417611437e-07, "loss": 0.0271, "step": 149630 }, { "epoch": 1.5988033548800684, "grad_norm": 7.578701496124268, "learning_rate": 9.914720522839833e-07, "loss": 0.0175, "step": 149640 }, { "epoch": 1.598910198194348, "grad_norm": 0.5203515887260437, "learning_rate": 9.914689622519138e-07, "loss": 0.0176, "step": 149650 }, { "epoch": 1.5990170415086276, "grad_norm": 9.618778228759766, "learning_rate": 9.914658716649386e-07, "loss": 0.039, "step": 149660 }, { "epoch": 1.5991238848229072, "grad_norm": 5.152288913726807, "learning_rate": 9.914627805230615e-07, "loss": 0.0905, "step": 149670 }, { "epoch": 1.599230728137187, "grad_norm": 0.30963611602783203, "learning_rate": 9.914596888262858e-07, "loss": 0.0074, "step": 149680 }, { "epoch": 1.5993375714514664, "grad_norm": 2.1662564277648926, "learning_rate": 9.914565965746151e-07, "loss": 0.0185, "step": 149690 }, { "epoch": 1.599444414765746, "grad_norm": 3.882755994796753, "learning_rate": 9.914535037680528e-07, "loss": 0.0151, "step": 149700 }, { "epoch": 1.5995512580800257, "grad_norm": 5.486223220825195, "learning_rate": 9.914504104066023e-07, "loss": 0.0184, "step": 149710 }, { "epoch": 1.5996581013943052, "grad_norm": 0.7760190367698669, "learning_rate": 9.914473164902672e-07, "loss": 0.0281, "step": 149720 }, { "epoch": 1.5997649447085849, "grad_norm": 2.369036912918091, "learning_rate": 9.91444222019051e-07, "loss": 0.0164, "step": 149730 }, { "epoch": 1.5998717880228646, "grad_norm": 0.011002209037542343, "learning_rate": 9.914411269929573e-07, "loss": 0.0529, "step": 149740 }, { "epoch": 1.599978631337144, "grad_norm": 0.008027675561606884, "learning_rate": 9.914380314119894e-07, "loss": 0.034, "step": 149750 }, { "epoch": 1.6000854746514237, "grad_norm": 5.430569648742676, "learning_rate": 9.914349352761509e-07, "loss": 0.0166, "step": 149760 }, { "epoch": 1.6001923179657034, "grad_norm": 0.4492848515510559, "learning_rate": 9.91431838585445e-07, "loss": 0.113, "step": 149770 }, { "epoch": 1.6002991612799828, "grad_norm": 0.7927687764167786, "learning_rate": 9.91428741339876e-07, "loss": 0.0277, "step": 149780 }, { "epoch": 1.6004060045942625, "grad_norm": 1.3436849117279053, "learning_rate": 9.914256435394464e-07, "loss": 0.0264, "step": 149790 }, { "epoch": 1.6005128479085422, "grad_norm": 0.06047210097312927, "learning_rate": 9.914225451841603e-07, "loss": 0.0307, "step": 149800 }, { "epoch": 1.6006196912228217, "grad_norm": 0.7706299424171448, "learning_rate": 9.91419446274021e-07, "loss": 0.0081, "step": 149810 }, { "epoch": 1.6007265345371013, "grad_norm": 1.555401086807251, "learning_rate": 9.914163468090322e-07, "loss": 0.0546, "step": 149820 }, { "epoch": 1.600833377851381, "grad_norm": 0.02890084870159626, "learning_rate": 9.91413246789197e-07, "loss": 0.0263, "step": 149830 }, { "epoch": 1.6009402211656605, "grad_norm": 1.0136077404022217, "learning_rate": 9.914101462145195e-07, "loss": 0.0887, "step": 149840 }, { "epoch": 1.6010470644799402, "grad_norm": 0.029859643429517746, "learning_rate": 9.914070450850027e-07, "loss": 0.024, "step": 149850 }, { "epoch": 1.6011539077942198, "grad_norm": 5.535183906555176, "learning_rate": 9.914039434006503e-07, "loss": 0.1007, "step": 149860 }, { "epoch": 1.6012607511084993, "grad_norm": 0.6440002918243408, "learning_rate": 9.914008411614656e-07, "loss": 0.0352, "step": 149870 }, { "epoch": 1.601367594422779, "grad_norm": 1.7575786113739014, "learning_rate": 9.913977383674524e-07, "loss": 0.0158, "step": 149880 }, { "epoch": 1.6014744377370587, "grad_norm": 0.05081036686897278, "learning_rate": 9.91394635018614e-07, "loss": 0.0172, "step": 149890 }, { "epoch": 1.6015812810513381, "grad_norm": 2.4721481800079346, "learning_rate": 9.91391531114954e-07, "loss": 0.021, "step": 149900 }, { "epoch": 1.6016881243656178, "grad_norm": 0.06806972622871399, "learning_rate": 9.913884266564758e-07, "loss": 0.0559, "step": 149910 }, { "epoch": 1.6017949676798975, "grad_norm": 0.7434883713722229, "learning_rate": 9.913853216431832e-07, "loss": 0.0107, "step": 149920 }, { "epoch": 1.601901810994177, "grad_norm": 0.04773581773042679, "learning_rate": 9.913822160750793e-07, "loss": 0.0388, "step": 149930 }, { "epoch": 1.6020086543084566, "grad_norm": 2.5468204021453857, "learning_rate": 9.913791099521678e-07, "loss": 0.0179, "step": 149940 }, { "epoch": 1.6021154976227363, "grad_norm": 5.932876110076904, "learning_rate": 9.913760032744523e-07, "loss": 0.0292, "step": 149950 }, { "epoch": 1.6022223409370158, "grad_norm": 0.6484336853027344, "learning_rate": 9.91372896041936e-07, "loss": 0.0347, "step": 149960 }, { "epoch": 1.6023291842512954, "grad_norm": 0.047473106533288956, "learning_rate": 9.913697882546228e-07, "loss": 0.0451, "step": 149970 }, { "epoch": 1.6024360275655751, "grad_norm": 0.012056493200361729, "learning_rate": 9.913666799125159e-07, "loss": 0.0321, "step": 149980 }, { "epoch": 1.6025428708798546, "grad_norm": 4.587418556213379, "learning_rate": 9.913635710156191e-07, "loss": 0.0432, "step": 149990 }, { "epoch": 1.6026497141941343, "grad_norm": 0.22716295719146729, "learning_rate": 9.913604615639357e-07, "loss": 0.0217, "step": 150000 }, { "epoch": 1.602756557508414, "grad_norm": 0.13896600902080536, "learning_rate": 9.913573515574692e-07, "loss": 0.0168, "step": 150010 }, { "epoch": 1.6028634008226934, "grad_norm": 1.0504283905029297, "learning_rate": 9.913542409962232e-07, "loss": 0.0396, "step": 150020 }, { "epoch": 1.602970244136973, "grad_norm": 0.021664859727025032, "learning_rate": 9.91351129880201e-07, "loss": 0.0359, "step": 150030 }, { "epoch": 1.6030770874512528, "grad_norm": 0.015570145100355148, "learning_rate": 9.913480182094065e-07, "loss": 0.0136, "step": 150040 }, { "epoch": 1.6031839307655322, "grad_norm": 1.3168792724609375, "learning_rate": 9.91344905983843e-07, "loss": 0.0399, "step": 150050 }, { "epoch": 1.603290774079812, "grad_norm": 7.419675827026367, "learning_rate": 9.913417932035139e-07, "loss": 0.0205, "step": 150060 }, { "epoch": 1.6033976173940916, "grad_norm": 0.3696032166481018, "learning_rate": 9.913386798684228e-07, "loss": 0.0291, "step": 150070 }, { "epoch": 1.603504460708371, "grad_norm": 5.432413578033447, "learning_rate": 9.913355659785734e-07, "loss": 0.0371, "step": 150080 }, { "epoch": 1.6036113040226507, "grad_norm": 0.5015281438827515, "learning_rate": 9.913324515339692e-07, "loss": 0.0231, "step": 150090 }, { "epoch": 1.6037181473369304, "grad_norm": 1.652045488357544, "learning_rate": 9.913293365346134e-07, "loss": 0.0349, "step": 150100 }, { "epoch": 1.6038249906512099, "grad_norm": 1.2868015766143799, "learning_rate": 9.913262209805096e-07, "loss": 0.0399, "step": 150110 }, { "epoch": 1.6039318339654898, "grad_norm": 0.060644231736660004, "learning_rate": 9.913231048716616e-07, "loss": 0.0155, "step": 150120 }, { "epoch": 1.6040386772797692, "grad_norm": 3.702979326248169, "learning_rate": 9.913199882080724e-07, "loss": 0.0447, "step": 150130 }, { "epoch": 1.6041455205940487, "grad_norm": 0.8672885298728943, "learning_rate": 9.91316870989746e-07, "loss": 0.0112, "step": 150140 }, { "epoch": 1.6042523639083286, "grad_norm": 0.11378078907728195, "learning_rate": 9.91313753216686e-07, "loss": 0.0694, "step": 150150 }, { "epoch": 1.604359207222608, "grad_norm": 8.587058067321777, "learning_rate": 9.913106348888956e-07, "loss": 0.1011, "step": 150160 }, { "epoch": 1.6044660505368875, "grad_norm": 1.6701117753982544, "learning_rate": 9.913075160063785e-07, "loss": 0.0517, "step": 150170 }, { "epoch": 1.6045728938511674, "grad_norm": 1.7789376974105835, "learning_rate": 9.91304396569138e-07, "loss": 0.0124, "step": 150180 }, { "epoch": 1.6046797371654469, "grad_norm": 0.7022059559822083, "learning_rate": 9.913012765771775e-07, "loss": 0.0375, "step": 150190 }, { "epoch": 1.6047865804797263, "grad_norm": 3.937391519546509, "learning_rate": 9.912981560305013e-07, "loss": 0.0199, "step": 150200 }, { "epoch": 1.6048934237940062, "grad_norm": 0.0312773734331131, "learning_rate": 9.912950349291118e-07, "loss": 0.0168, "step": 150210 }, { "epoch": 1.6050002671082857, "grad_norm": 1.0007855892181396, "learning_rate": 9.912919132730135e-07, "loss": 0.0407, "step": 150220 }, { "epoch": 1.6051071104225652, "grad_norm": 2.8765692710876465, "learning_rate": 9.912887910622097e-07, "loss": 0.0152, "step": 150230 }, { "epoch": 1.605213953736845, "grad_norm": 1.4457634687423706, "learning_rate": 9.912856682967034e-07, "loss": 0.0632, "step": 150240 }, { "epoch": 1.6053207970511245, "grad_norm": 4.9379401206970215, "learning_rate": 9.912825449764987e-07, "loss": 0.0525, "step": 150250 }, { "epoch": 1.605427640365404, "grad_norm": 2.498048782348633, "learning_rate": 9.91279421101599e-07, "loss": 0.0181, "step": 150260 }, { "epoch": 1.6055344836796839, "grad_norm": 0.266441285610199, "learning_rate": 9.912762966720077e-07, "loss": 0.0456, "step": 150270 }, { "epoch": 1.6056413269939633, "grad_norm": 8.356363296508789, "learning_rate": 9.912731716877282e-07, "loss": 0.0462, "step": 150280 }, { "epoch": 1.605748170308243, "grad_norm": 4.403094291687012, "learning_rate": 9.912700461487645e-07, "loss": 0.0097, "step": 150290 }, { "epoch": 1.6058550136225227, "grad_norm": 4.41416597366333, "learning_rate": 9.912669200551196e-07, "loss": 0.0348, "step": 150300 }, { "epoch": 1.6059618569368022, "grad_norm": 4.38424825668335, "learning_rate": 9.912637934067973e-07, "loss": 0.0679, "step": 150310 }, { "epoch": 1.6060687002510818, "grad_norm": 2.5338330268859863, "learning_rate": 9.912606662038012e-07, "loss": 0.031, "step": 150320 }, { "epoch": 1.6061755435653615, "grad_norm": 3.624828338623047, "learning_rate": 9.912575384461349e-07, "loss": 0.1223, "step": 150330 }, { "epoch": 1.606282386879641, "grad_norm": 0.04289272055029869, "learning_rate": 9.912544101338015e-07, "loss": 0.0529, "step": 150340 }, { "epoch": 1.6063892301939207, "grad_norm": 2.401794195175171, "learning_rate": 9.912512812668047e-07, "loss": 0.0148, "step": 150350 }, { "epoch": 1.6064960735082003, "grad_norm": 7.893643856048584, "learning_rate": 9.912481518451483e-07, "loss": 0.0111, "step": 150360 }, { "epoch": 1.6066029168224798, "grad_norm": 3.8271701335906982, "learning_rate": 9.912450218688357e-07, "loss": 0.0088, "step": 150370 }, { "epoch": 1.6067097601367595, "grad_norm": 0.11605037748813629, "learning_rate": 9.912418913378704e-07, "loss": 0.0152, "step": 150380 }, { "epoch": 1.6068166034510392, "grad_norm": 3.0969114303588867, "learning_rate": 9.912387602522559e-07, "loss": 0.0371, "step": 150390 }, { "epoch": 1.6069234467653186, "grad_norm": 0.0629325807094574, "learning_rate": 9.912356286119957e-07, "loss": 0.0158, "step": 150400 }, { "epoch": 1.6070302900795983, "grad_norm": 0.27045729756355286, "learning_rate": 9.912324964170935e-07, "loss": 0.0284, "step": 150410 }, { "epoch": 1.607137133393878, "grad_norm": 1.2858099937438965, "learning_rate": 9.912293636675526e-07, "loss": 0.0123, "step": 150420 }, { "epoch": 1.6072439767081574, "grad_norm": 0.04617994651198387, "learning_rate": 9.912262303633767e-07, "loss": 0.0467, "step": 150430 }, { "epoch": 1.6073508200224371, "grad_norm": 0.039822425693273544, "learning_rate": 9.912230965045693e-07, "loss": 0.0458, "step": 150440 }, { "epoch": 1.6074576633367168, "grad_norm": 10.075592041015625, "learning_rate": 9.91219962091134e-07, "loss": 0.0131, "step": 150450 }, { "epoch": 1.6075645066509963, "grad_norm": 0.32173410058021545, "learning_rate": 9.912168271230744e-07, "loss": 0.007, "step": 150460 }, { "epoch": 1.607671349965276, "grad_norm": 0.012509322725236416, "learning_rate": 9.912136916003936e-07, "loss": 0.0204, "step": 150470 }, { "epoch": 1.6077781932795556, "grad_norm": 2.402878999710083, "learning_rate": 9.912105555230958e-07, "loss": 0.0181, "step": 150480 }, { "epoch": 1.607885036593835, "grad_norm": 5.662711143493652, "learning_rate": 9.91207418891184e-07, "loss": 0.0259, "step": 150490 }, { "epoch": 1.6079918799081148, "grad_norm": 0.04117896035313606, "learning_rate": 9.912042817046623e-07, "loss": 0.0581, "step": 150500 }, { "epoch": 1.6080987232223944, "grad_norm": 0.267300009727478, "learning_rate": 9.912011439635336e-07, "loss": 0.037, "step": 150510 }, { "epoch": 1.608205566536674, "grad_norm": 0.06834676861763, "learning_rate": 9.911980056678016e-07, "loss": 0.1709, "step": 150520 }, { "epoch": 1.6083124098509536, "grad_norm": 0.42572006583213806, "learning_rate": 9.911948668174702e-07, "loss": 0.0057, "step": 150530 }, { "epoch": 1.6084192531652333, "grad_norm": 4.142199516296387, "learning_rate": 9.911917274125428e-07, "loss": 0.0133, "step": 150540 }, { "epoch": 1.6085260964795127, "grad_norm": 5.384767532348633, "learning_rate": 9.911885874530226e-07, "loss": 0.0385, "step": 150550 }, { "epoch": 1.6086329397937924, "grad_norm": 8.278464317321777, "learning_rate": 9.911854469389136e-07, "loss": 0.0557, "step": 150560 }, { "epoch": 1.608739783108072, "grad_norm": 0.47913286089897156, "learning_rate": 9.911823058702192e-07, "loss": 0.0154, "step": 150570 }, { "epoch": 1.6088466264223515, "grad_norm": 0.10193124413490295, "learning_rate": 9.911791642469427e-07, "loss": 0.0212, "step": 150580 }, { "epoch": 1.6089534697366312, "grad_norm": 0.2602235674858093, "learning_rate": 9.91176022069088e-07, "loss": 0.0521, "step": 150590 }, { "epoch": 1.609060313050911, "grad_norm": 14.524065971374512, "learning_rate": 9.911728793366584e-07, "loss": 0.0332, "step": 150600 }, { "epoch": 1.6091671563651904, "grad_norm": 5.071930408477783, "learning_rate": 9.911697360496578e-07, "loss": 0.0706, "step": 150610 }, { "epoch": 1.60927399967947, "grad_norm": 0.05883755162358284, "learning_rate": 9.911665922080892e-07, "loss": 0.0435, "step": 150620 }, { "epoch": 1.6093808429937497, "grad_norm": 0.11474987119436264, "learning_rate": 9.911634478119567e-07, "loss": 0.0566, "step": 150630 }, { "epoch": 1.6094876863080292, "grad_norm": 6.473484992980957, "learning_rate": 9.911603028612634e-07, "loss": 0.0147, "step": 150640 }, { "epoch": 1.6095945296223089, "grad_norm": 7.331687927246094, "learning_rate": 9.91157157356013e-07, "loss": 0.0437, "step": 150650 }, { "epoch": 1.6097013729365885, "grad_norm": 0.30573442578315735, "learning_rate": 9.911540112962094e-07, "loss": 0.0472, "step": 150660 }, { "epoch": 1.609808216250868, "grad_norm": 0.04163713380694389, "learning_rate": 9.911508646818556e-07, "loss": 0.0202, "step": 150670 }, { "epoch": 1.6099150595651477, "grad_norm": 2.1836373805999756, "learning_rate": 9.911477175129554e-07, "loss": 0.0398, "step": 150680 }, { "epoch": 1.6100219028794274, "grad_norm": 0.8453207612037659, "learning_rate": 9.911445697895123e-07, "loss": 0.0201, "step": 150690 }, { "epoch": 1.6101287461937068, "grad_norm": 3.861379861831665, "learning_rate": 9.911414215115303e-07, "loss": 0.0263, "step": 150700 }, { "epoch": 1.6102355895079865, "grad_norm": 0.01970936357975006, "learning_rate": 9.911382726790124e-07, "loss": 0.0168, "step": 150710 }, { "epoch": 1.6103424328222662, "grad_norm": 3.1510355472564697, "learning_rate": 9.91135123291962e-07, "loss": 0.0241, "step": 150720 }, { "epoch": 1.6104492761365456, "grad_norm": 1.3898160457611084, "learning_rate": 9.911319733503834e-07, "loss": 0.0374, "step": 150730 }, { "epoch": 1.6105561194508253, "grad_norm": 2.150484085083008, "learning_rate": 9.911288228542794e-07, "loss": 0.0216, "step": 150740 }, { "epoch": 1.610662962765105, "grad_norm": 0.17706790566444397, "learning_rate": 9.91125671803654e-07, "loss": 0.0405, "step": 150750 }, { "epoch": 1.6107698060793845, "grad_norm": 2.758239984512329, "learning_rate": 9.911225201985107e-07, "loss": 0.0882, "step": 150760 }, { "epoch": 1.6108766493936642, "grad_norm": 3.1579957008361816, "learning_rate": 9.91119368038853e-07, "loss": 0.0174, "step": 150770 }, { "epoch": 1.6109834927079438, "grad_norm": 0.6793540716171265, "learning_rate": 9.911162153246846e-07, "loss": 0.0409, "step": 150780 }, { "epoch": 1.6110903360222233, "grad_norm": 0.6586884260177612, "learning_rate": 9.911130620560087e-07, "loss": 0.0357, "step": 150790 }, { "epoch": 1.611197179336503, "grad_norm": 0.18218281865119934, "learning_rate": 9.91109908232829e-07, "loss": 0.0277, "step": 150800 }, { "epoch": 1.6113040226507827, "grad_norm": 2.9363842010498047, "learning_rate": 9.911067538551496e-07, "loss": 0.0185, "step": 150810 }, { "epoch": 1.6114108659650621, "grad_norm": 1.6309961080551147, "learning_rate": 9.911035989229733e-07, "loss": 0.0266, "step": 150820 }, { "epoch": 1.6115177092793418, "grad_norm": 0.2944602072238922, "learning_rate": 9.911004434363039e-07, "loss": 0.012, "step": 150830 }, { "epoch": 1.6116245525936215, "grad_norm": 0.6718215942382812, "learning_rate": 9.910972873951452e-07, "loss": 0.0128, "step": 150840 }, { "epoch": 1.611731395907901, "grad_norm": 2.1807916164398193, "learning_rate": 9.910941307995007e-07, "loss": 0.0126, "step": 150850 }, { "epoch": 1.6118382392221808, "grad_norm": 0.01804102212190628, "learning_rate": 9.910909736493735e-07, "loss": 0.0252, "step": 150860 }, { "epoch": 1.6119450825364603, "grad_norm": 0.27344974875450134, "learning_rate": 9.910878159447677e-07, "loss": 0.0281, "step": 150870 }, { "epoch": 1.6120519258507398, "grad_norm": 0.011930884793400764, "learning_rate": 9.910846576856868e-07, "loss": 0.0273, "step": 150880 }, { "epoch": 1.6121587691650197, "grad_norm": 2.879102945327759, "learning_rate": 9.91081498872134e-07, "loss": 0.012, "step": 150890 }, { "epoch": 1.6122656124792991, "grad_norm": 2.616562843322754, "learning_rate": 9.910783395041134e-07, "loss": 0.0174, "step": 150900 }, { "epoch": 1.6123724557935786, "grad_norm": 0.2491200715303421, "learning_rate": 9.910751795816283e-07, "loss": 0.0281, "step": 150910 }, { "epoch": 1.6124792991078585, "grad_norm": 3.1722686290740967, "learning_rate": 9.910720191046822e-07, "loss": 0.0271, "step": 150920 }, { "epoch": 1.612586142422138, "grad_norm": 0.7086543440818787, "learning_rate": 9.910688580732788e-07, "loss": 0.0283, "step": 150930 }, { "epoch": 1.6126929857364174, "grad_norm": 0.37320056557655334, "learning_rate": 9.910656964874213e-07, "loss": 0.0124, "step": 150940 }, { "epoch": 1.6127998290506973, "grad_norm": 5.175549030303955, "learning_rate": 9.910625343471136e-07, "loss": 0.0241, "step": 150950 }, { "epoch": 1.6129066723649768, "grad_norm": 0.19170330464839935, "learning_rate": 9.910593716523594e-07, "loss": 0.0693, "step": 150960 }, { "epoch": 1.6130135156792562, "grad_norm": 0.08867865055799484, "learning_rate": 9.910562084031622e-07, "loss": 0.0146, "step": 150970 }, { "epoch": 1.6131203589935361, "grad_norm": 1.582640290260315, "learning_rate": 9.910530445995253e-07, "loss": 0.0311, "step": 150980 }, { "epoch": 1.6132272023078156, "grad_norm": 15.135994911193848, "learning_rate": 9.910498802414525e-07, "loss": 0.0078, "step": 150990 }, { "epoch": 1.613334045622095, "grad_norm": 0.9844789505004883, "learning_rate": 9.910467153289474e-07, "loss": 0.0256, "step": 151000 }, { "epoch": 1.613440888936375, "grad_norm": 0.7032586932182312, "learning_rate": 9.910435498620134e-07, "loss": 0.0723, "step": 151010 }, { "epoch": 1.6135477322506544, "grad_norm": 0.42507708072662354, "learning_rate": 9.910403838406543e-07, "loss": 0.0158, "step": 151020 }, { "epoch": 1.613654575564934, "grad_norm": 0.060767676681280136, "learning_rate": 9.910372172648733e-07, "loss": 0.0123, "step": 151030 }, { "epoch": 1.6137614188792138, "grad_norm": 3.8329505920410156, "learning_rate": 9.910340501346744e-07, "loss": 0.0126, "step": 151040 }, { "epoch": 1.6138682621934932, "grad_norm": 2.5321338176727295, "learning_rate": 9.910308824500611e-07, "loss": 0.0264, "step": 151050 }, { "epoch": 1.613975105507773, "grad_norm": 0.012636249884963036, "learning_rate": 9.910277142110366e-07, "loss": 0.0589, "step": 151060 }, { "epoch": 1.6140819488220526, "grad_norm": 2.530641794204712, "learning_rate": 9.91024545417605e-07, "loss": 0.043, "step": 151070 }, { "epoch": 1.614188792136332, "grad_norm": 4.493053913116455, "learning_rate": 9.910213760697694e-07, "loss": 0.0396, "step": 151080 }, { "epoch": 1.6142956354506117, "grad_norm": 0.6145302057266235, "learning_rate": 9.91018206167534e-07, "loss": 0.0352, "step": 151090 }, { "epoch": 1.6144024787648914, "grad_norm": 3.2791316509246826, "learning_rate": 9.910150357109015e-07, "loss": 0.0135, "step": 151100 }, { "epoch": 1.6145093220791709, "grad_norm": 0.04806353524327278, "learning_rate": 9.910118646998762e-07, "loss": 0.0163, "step": 151110 }, { "epoch": 1.6146161653934505, "grad_norm": 2.982759475708008, "learning_rate": 9.910086931344614e-07, "loss": 0.0379, "step": 151120 }, { "epoch": 1.6147230087077302, "grad_norm": 1.6349766254425049, "learning_rate": 9.910055210146608e-07, "loss": 0.0306, "step": 151130 }, { "epoch": 1.6148298520220097, "grad_norm": 1.8134435415267944, "learning_rate": 9.91002348340478e-07, "loss": 0.0663, "step": 151140 }, { "epoch": 1.6149366953362894, "grad_norm": 0.2953243553638458, "learning_rate": 9.909991751119161e-07, "loss": 0.0055, "step": 151150 }, { "epoch": 1.615043538650569, "grad_norm": 0.028103457763791084, "learning_rate": 9.909960013289795e-07, "loss": 0.0886, "step": 151160 }, { "epoch": 1.6151503819648485, "grad_norm": 1.316677451133728, "learning_rate": 9.909928269916712e-07, "loss": 0.0376, "step": 151170 }, { "epoch": 1.6152572252791282, "grad_norm": 0.8337212800979614, "learning_rate": 9.90989652099995e-07, "loss": 0.0367, "step": 151180 }, { "epoch": 1.6153640685934079, "grad_norm": 0.8276157379150391, "learning_rate": 9.909864766539543e-07, "loss": 0.0177, "step": 151190 }, { "epoch": 1.6154709119076873, "grad_norm": 3.4091968536376953, "learning_rate": 9.90983300653553e-07, "loss": 0.0218, "step": 151200 }, { "epoch": 1.615577755221967, "grad_norm": 6.690796852111816, "learning_rate": 9.909801240987943e-07, "loss": 0.0311, "step": 151210 }, { "epoch": 1.6156845985362467, "grad_norm": 0.7795559167861938, "learning_rate": 9.909769469896821e-07, "loss": 0.0215, "step": 151220 }, { "epoch": 1.6157914418505261, "grad_norm": 0.7995051741600037, "learning_rate": 9.909737693262197e-07, "loss": 0.0305, "step": 151230 }, { "epoch": 1.6158982851648058, "grad_norm": 0.09249811619520187, "learning_rate": 9.90970591108411e-07, "loss": 0.0165, "step": 151240 }, { "epoch": 1.6160051284790855, "grad_norm": 10.331856727600098, "learning_rate": 9.909674123362594e-07, "loss": 0.0835, "step": 151250 }, { "epoch": 1.616111971793365, "grad_norm": 3.802391767501831, "learning_rate": 9.909642330097685e-07, "loss": 0.0321, "step": 151260 }, { "epoch": 1.6162188151076446, "grad_norm": 3.2206969261169434, "learning_rate": 9.909610531289421e-07, "loss": 0.0122, "step": 151270 }, { "epoch": 1.6163256584219243, "grad_norm": 0.12756536900997162, "learning_rate": 9.909578726937835e-07, "loss": 0.0229, "step": 151280 }, { "epoch": 1.6164325017362038, "grad_norm": 0.02475181594491005, "learning_rate": 9.909546917042964e-07, "loss": 0.0253, "step": 151290 }, { "epoch": 1.6165393450504835, "grad_norm": 0.09007515013217926, "learning_rate": 9.909515101604844e-07, "loss": 0.0612, "step": 151300 }, { "epoch": 1.6166461883647631, "grad_norm": 0.7099632620811462, "learning_rate": 9.909483280623512e-07, "loss": 0.0221, "step": 151310 }, { "epoch": 1.6167530316790426, "grad_norm": 8.036741256713867, "learning_rate": 9.909451454099e-07, "loss": 0.0469, "step": 151320 }, { "epoch": 1.6168598749933223, "grad_norm": 0.3414439558982849, "learning_rate": 9.909419622031349e-07, "loss": 0.0477, "step": 151330 }, { "epoch": 1.616966718307602, "grad_norm": 24.286161422729492, "learning_rate": 9.909387784420593e-07, "loss": 0.1022, "step": 151340 }, { "epoch": 1.6170735616218814, "grad_norm": 7.39292573928833, "learning_rate": 9.909355941266768e-07, "loss": 0.0764, "step": 151350 }, { "epoch": 1.617180404936161, "grad_norm": 0.45188188552856445, "learning_rate": 9.90932409256991e-07, "loss": 0.0226, "step": 151360 }, { "epoch": 1.6172872482504408, "grad_norm": 0.08575985580682755, "learning_rate": 9.909292238330052e-07, "loss": 0.0137, "step": 151370 }, { "epoch": 1.6173940915647202, "grad_norm": 0.6730115413665771, "learning_rate": 9.909260378547234e-07, "loss": 0.0206, "step": 151380 }, { "epoch": 1.617500934879, "grad_norm": 0.3846023380756378, "learning_rate": 9.90922851322149e-07, "loss": 0.0124, "step": 151390 }, { "epoch": 1.6176077781932796, "grad_norm": 0.17145417630672455, "learning_rate": 9.909196642352855e-07, "loss": 0.0611, "step": 151400 }, { "epoch": 1.617714621507559, "grad_norm": 6.757203578948975, "learning_rate": 9.909164765941368e-07, "loss": 0.0306, "step": 151410 }, { "epoch": 1.6178214648218388, "grad_norm": 2.5298476219177246, "learning_rate": 9.909132883987065e-07, "loss": 0.0503, "step": 151420 }, { "epoch": 1.6179283081361184, "grad_norm": 0.3443826735019684, "learning_rate": 9.90910099648998e-07, "loss": 0.026, "step": 151430 }, { "epoch": 1.618035151450398, "grad_norm": 0.018103284761309624, "learning_rate": 9.909069103450147e-07, "loss": 0.0187, "step": 151440 }, { "epoch": 1.6181419947646776, "grad_norm": 0.014803305268287659, "learning_rate": 9.909037204867607e-07, "loss": 0.018, "step": 151450 }, { "epoch": 1.6182488380789573, "grad_norm": 0.857243001461029, "learning_rate": 9.909005300742392e-07, "loss": 0.0376, "step": 151460 }, { "epoch": 1.6183556813932367, "grad_norm": 0.23395641148090363, "learning_rate": 9.90897339107454e-07, "loss": 0.01, "step": 151470 }, { "epoch": 1.6184625247075164, "grad_norm": 0.6196413636207581, "learning_rate": 9.908941475864087e-07, "loss": 0.0494, "step": 151480 }, { "epoch": 1.618569368021796, "grad_norm": 0.09254122525453568, "learning_rate": 9.90890955511107e-07, "loss": 0.0945, "step": 151490 }, { "epoch": 1.6186762113360755, "grad_norm": 0.20473192632198334, "learning_rate": 9.90887762881552e-07, "loss": 0.0134, "step": 151500 }, { "epoch": 1.6187830546503552, "grad_norm": 0.04739762097597122, "learning_rate": 9.90884569697748e-07, "loss": 0.0647, "step": 151510 }, { "epoch": 1.618889897964635, "grad_norm": 1.1245038509368896, "learning_rate": 9.90881375959698e-07, "loss": 0.0222, "step": 151520 }, { "epoch": 1.6189967412789144, "grad_norm": 0.48328742384910583, "learning_rate": 9.908781816674061e-07, "loss": 0.0163, "step": 151530 }, { "epoch": 1.619103584593194, "grad_norm": 3.5897765159606934, "learning_rate": 9.908749868208755e-07, "loss": 0.0597, "step": 151540 }, { "epoch": 1.6192104279074737, "grad_norm": 2.24961519241333, "learning_rate": 9.9087179142011e-07, "loss": 0.0734, "step": 151550 }, { "epoch": 1.6193172712217532, "grad_norm": 1.947402000427246, "learning_rate": 9.908685954651134e-07, "loss": 0.0247, "step": 151560 }, { "epoch": 1.6194241145360329, "grad_norm": 2.917886734008789, "learning_rate": 9.908653989558891e-07, "loss": 0.0133, "step": 151570 }, { "epoch": 1.6195309578503125, "grad_norm": 10.627429962158203, "learning_rate": 9.908622018924406e-07, "loss": 0.0402, "step": 151580 }, { "epoch": 1.619637801164592, "grad_norm": 0.010981187224388123, "learning_rate": 9.908590042747716e-07, "loss": 0.044, "step": 151590 }, { "epoch": 1.619744644478872, "grad_norm": 0.05166061595082283, "learning_rate": 9.908558061028858e-07, "loss": 0.0609, "step": 151600 }, { "epoch": 1.6198514877931514, "grad_norm": 0.2072962075471878, "learning_rate": 9.908526073767868e-07, "loss": 0.0341, "step": 151610 }, { "epoch": 1.6199583311074308, "grad_norm": 0.4170040786266327, "learning_rate": 9.90849408096478e-07, "loss": 0.0485, "step": 151620 }, { "epoch": 1.6200651744217107, "grad_norm": 2.3073015213012695, "learning_rate": 9.908462082619632e-07, "loss": 0.0467, "step": 151630 }, { "epoch": 1.6201720177359902, "grad_norm": 0.0753759890794754, "learning_rate": 9.90843007873246e-07, "loss": 0.0213, "step": 151640 }, { "epoch": 1.6202788610502696, "grad_norm": 3.022002935409546, "learning_rate": 9.9083980693033e-07, "loss": 0.0195, "step": 151650 }, { "epoch": 1.6203857043645495, "grad_norm": 0.10853269696235657, "learning_rate": 9.908366054332188e-07, "loss": 0.0275, "step": 151660 }, { "epoch": 1.620492547678829, "grad_norm": 6.571497917175293, "learning_rate": 9.908334033819162e-07, "loss": 0.0537, "step": 151670 }, { "epoch": 1.6205993909931085, "grad_norm": 3.0229833126068115, "learning_rate": 9.908302007764255e-07, "loss": 0.0096, "step": 151680 }, { "epoch": 1.6207062343073884, "grad_norm": 0.06932996213436127, "learning_rate": 9.908269976167504e-07, "loss": 0.0168, "step": 151690 }, { "epoch": 1.6208130776216678, "grad_norm": 12.620566368103027, "learning_rate": 9.908237939028947e-07, "loss": 0.0387, "step": 151700 }, { "epoch": 1.6209199209359473, "grad_norm": 4.992807388305664, "learning_rate": 9.908205896348617e-07, "loss": 0.0397, "step": 151710 }, { "epoch": 1.6210267642502272, "grad_norm": 2.140256881713867, "learning_rate": 9.908173848126555e-07, "loss": 0.0197, "step": 151720 }, { "epoch": 1.6211336075645066, "grad_norm": 5.829829692840576, "learning_rate": 9.908141794362793e-07, "loss": 0.0303, "step": 151730 }, { "epoch": 1.621240450878786, "grad_norm": 1.1065773963928223, "learning_rate": 9.90810973505737e-07, "loss": 0.0284, "step": 151740 }, { "epoch": 1.621347294193066, "grad_norm": 0.5970244407653809, "learning_rate": 9.908077670210318e-07, "loss": 0.0073, "step": 151750 }, { "epoch": 1.6214541375073455, "grad_norm": 3.5992817878723145, "learning_rate": 9.908045599821678e-07, "loss": 0.0226, "step": 151760 }, { "epoch": 1.6215609808216251, "grad_norm": 0.20190401375293732, "learning_rate": 9.90801352389148e-07, "loss": 0.0224, "step": 151770 }, { "epoch": 1.6216678241359048, "grad_norm": 0.005238963291049004, "learning_rate": 9.90798144241977e-07, "loss": 0.0274, "step": 151780 }, { "epoch": 1.6217746674501843, "grad_norm": 1.5957144498825073, "learning_rate": 9.907949355406576e-07, "loss": 0.066, "step": 151790 }, { "epoch": 1.621881510764464, "grad_norm": 2.3507871627807617, "learning_rate": 9.907917262851934e-07, "loss": 0.0178, "step": 151800 }, { "epoch": 1.6219883540787436, "grad_norm": 4.984536647796631, "learning_rate": 9.907885164755886e-07, "loss": 0.0367, "step": 151810 }, { "epoch": 1.622095197393023, "grad_norm": 6.171363830566406, "learning_rate": 9.907853061118464e-07, "loss": 0.0197, "step": 151820 }, { "epoch": 1.6222020407073028, "grad_norm": 4.864677906036377, "learning_rate": 9.907820951939706e-07, "loss": 0.0319, "step": 151830 }, { "epoch": 1.6223088840215825, "grad_norm": 2.6157171726226807, "learning_rate": 9.90778883721965e-07, "loss": 0.0877, "step": 151840 }, { "epoch": 1.622415727335862, "grad_norm": 3.2276573181152344, "learning_rate": 9.90775671695833e-07, "loss": 0.0515, "step": 151850 }, { "epoch": 1.6225225706501416, "grad_norm": 1.255446434020996, "learning_rate": 9.90772459115578e-07, "loss": 0.0355, "step": 151860 }, { "epoch": 1.6226294139644213, "grad_norm": 0.137591153383255, "learning_rate": 9.907692459812036e-07, "loss": 0.0283, "step": 151870 }, { "epoch": 1.6227362572787007, "grad_norm": 3.7782464027404785, "learning_rate": 9.90766032292714e-07, "loss": 0.0433, "step": 151880 }, { "epoch": 1.6228431005929804, "grad_norm": 0.026527881622314453, "learning_rate": 9.907628180501125e-07, "loss": 0.0111, "step": 151890 }, { "epoch": 1.62294994390726, "grad_norm": 7.494288444519043, "learning_rate": 9.907596032534026e-07, "loss": 0.0446, "step": 151900 }, { "epoch": 1.6230567872215396, "grad_norm": 0.004456565715372562, "learning_rate": 9.907563879025883e-07, "loss": 0.0431, "step": 151910 }, { "epoch": 1.6231636305358192, "grad_norm": 5.908558368682861, "learning_rate": 9.907531719976728e-07, "loss": 0.0388, "step": 151920 }, { "epoch": 1.623270473850099, "grad_norm": 2.8520267009735107, "learning_rate": 9.907499555386602e-07, "loss": 0.0198, "step": 151930 }, { "epoch": 1.6233773171643784, "grad_norm": 4.215137958526611, "learning_rate": 9.907467385255535e-07, "loss": 0.0283, "step": 151940 }, { "epoch": 1.623484160478658, "grad_norm": 3.570744276046753, "learning_rate": 9.90743520958357e-07, "loss": 0.029, "step": 151950 }, { "epoch": 1.6235910037929377, "grad_norm": 0.3105039596557617, "learning_rate": 9.907403028370737e-07, "loss": 0.0424, "step": 151960 }, { "epoch": 1.6236978471072172, "grad_norm": 1.8448761701583862, "learning_rate": 9.907370841617079e-07, "loss": 0.0272, "step": 151970 }, { "epoch": 1.6238046904214969, "grad_norm": 0.1638345718383789, "learning_rate": 9.907338649322628e-07, "loss": 0.0152, "step": 151980 }, { "epoch": 1.6239115337357766, "grad_norm": 0.11565934121608734, "learning_rate": 9.907306451487422e-07, "loss": 0.0288, "step": 151990 }, { "epoch": 1.624018377050056, "grad_norm": 6.035708904266357, "learning_rate": 9.907274248111493e-07, "loss": 0.0375, "step": 152000 }, { "epoch": 1.6241252203643357, "grad_norm": 0.5409186482429504, "learning_rate": 9.907242039194885e-07, "loss": 0.0101, "step": 152010 }, { "epoch": 1.6242320636786154, "grad_norm": 7.9076008796691895, "learning_rate": 9.907209824737629e-07, "loss": 0.0349, "step": 152020 }, { "epoch": 1.6243389069928948, "grad_norm": 1.819538950920105, "learning_rate": 9.90717760473976e-07, "loss": 0.0331, "step": 152030 }, { "epoch": 1.6244457503071745, "grad_norm": 4.367754936218262, "learning_rate": 9.90714537920132e-07, "loss": 0.0451, "step": 152040 }, { "epoch": 1.6245525936214542, "grad_norm": 0.05661388114094734, "learning_rate": 9.907113148122344e-07, "loss": 0.0697, "step": 152050 }, { "epoch": 1.6246594369357337, "grad_norm": 0.01793842576444149, "learning_rate": 9.907080911502864e-07, "loss": 0.0121, "step": 152060 }, { "epoch": 1.6247662802500134, "grad_norm": 4.633647918701172, "learning_rate": 9.907048669342921e-07, "loss": 0.021, "step": 152070 }, { "epoch": 1.624873123564293, "grad_norm": 0.9352163672447205, "learning_rate": 9.907016421642549e-07, "loss": 0.0315, "step": 152080 }, { "epoch": 1.6249799668785725, "grad_norm": 1.1769516468048096, "learning_rate": 9.906984168401784e-07, "loss": 0.0351, "step": 152090 }, { "epoch": 1.6250868101928522, "grad_norm": 3.277015447616577, "learning_rate": 9.906951909620666e-07, "loss": 0.063, "step": 152100 }, { "epoch": 1.6251936535071319, "grad_norm": 2.052586317062378, "learning_rate": 9.906919645299228e-07, "loss": 0.1193, "step": 152110 }, { "epoch": 1.6253004968214113, "grad_norm": 0.004477543756365776, "learning_rate": 9.906887375437507e-07, "loss": 0.0126, "step": 152120 }, { "epoch": 1.625407340135691, "grad_norm": 8.123993873596191, "learning_rate": 9.906855100035538e-07, "loss": 0.0096, "step": 152130 }, { "epoch": 1.6255141834499707, "grad_norm": 0.7510794997215271, "learning_rate": 9.906822819093361e-07, "loss": 0.0083, "step": 152140 }, { "epoch": 1.6256210267642501, "grad_norm": 2.412799596786499, "learning_rate": 9.906790532611011e-07, "loss": 0.0393, "step": 152150 }, { "epoch": 1.6257278700785298, "grad_norm": 2.5551300048828125, "learning_rate": 9.906758240588523e-07, "loss": 0.0736, "step": 152160 }, { "epoch": 1.6258347133928095, "grad_norm": 0.04830437898635864, "learning_rate": 9.906725943025938e-07, "loss": 0.0262, "step": 152170 }, { "epoch": 1.625941556707089, "grad_norm": 0.08794314414262772, "learning_rate": 9.906693639923286e-07, "loss": 0.0231, "step": 152180 }, { "epoch": 1.6260484000213686, "grad_norm": 3.708969831466675, "learning_rate": 9.906661331280608e-07, "loss": 0.0143, "step": 152190 }, { "epoch": 1.6261552433356483, "grad_norm": 0.023751864209771156, "learning_rate": 9.906629017097938e-07, "loss": 0.0214, "step": 152200 }, { "epoch": 1.6262620866499278, "grad_norm": 0.08626490831375122, "learning_rate": 9.906596697375313e-07, "loss": 0.0317, "step": 152210 }, { "epoch": 1.6263689299642075, "grad_norm": 0.5581092834472656, "learning_rate": 9.90656437211277e-07, "loss": 0.021, "step": 152220 }, { "epoch": 1.6264757732784871, "grad_norm": 2.6693451404571533, "learning_rate": 9.906532041310347e-07, "loss": 0.0141, "step": 152230 }, { "epoch": 1.6265826165927666, "grad_norm": 1.016310691833496, "learning_rate": 9.906499704968078e-07, "loss": 0.0343, "step": 152240 }, { "epoch": 1.6266894599070463, "grad_norm": 0.24523545801639557, "learning_rate": 9.906467363086002e-07, "loss": 0.0237, "step": 152250 }, { "epoch": 1.626796303221326, "grad_norm": 2.7087247371673584, "learning_rate": 9.906435015664152e-07, "loss": 0.0502, "step": 152260 }, { "epoch": 1.6269031465356054, "grad_norm": 4.95346212387085, "learning_rate": 9.906402662702567e-07, "loss": 0.0356, "step": 152270 }, { "epoch": 1.627009989849885, "grad_norm": 5.199795722961426, "learning_rate": 9.906370304201283e-07, "loss": 0.047, "step": 152280 }, { "epoch": 1.6271168331641648, "grad_norm": 0.5076087713241577, "learning_rate": 9.906337940160338e-07, "loss": 0.0346, "step": 152290 }, { "epoch": 1.6272236764784442, "grad_norm": 5.233494281768799, "learning_rate": 9.906305570579765e-07, "loss": 0.0253, "step": 152300 }, { "epoch": 1.627330519792724, "grad_norm": 1.3925318717956543, "learning_rate": 9.906273195459604e-07, "loss": 0.0233, "step": 152310 }, { "epoch": 1.6274373631070036, "grad_norm": 0.2658138573169708, "learning_rate": 9.906240814799891e-07, "loss": 0.0054, "step": 152320 }, { "epoch": 1.627544206421283, "grad_norm": 0.2393949031829834, "learning_rate": 9.906208428600661e-07, "loss": 0.052, "step": 152330 }, { "epoch": 1.627651049735563, "grad_norm": 2.70344614982605, "learning_rate": 9.906176036861953e-07, "loss": 0.0293, "step": 152340 }, { "epoch": 1.6277578930498424, "grad_norm": 0.006100904196500778, "learning_rate": 9.906143639583799e-07, "loss": 0.178, "step": 152350 }, { "epoch": 1.6278647363641219, "grad_norm": 3.3916115760803223, "learning_rate": 9.90611123676624e-07, "loss": 0.0629, "step": 152360 }, { "epoch": 1.6279715796784018, "grad_norm": 1.5054844617843628, "learning_rate": 9.906078828409312e-07, "loss": 0.0298, "step": 152370 }, { "epoch": 1.6280784229926812, "grad_norm": 3.3558297157287598, "learning_rate": 9.90604641451305e-07, "loss": 0.02, "step": 152380 }, { "epoch": 1.6281852663069607, "grad_norm": 0.021418863907456398, "learning_rate": 9.906013995077492e-07, "loss": 0.0269, "step": 152390 }, { "epoch": 1.6282921096212406, "grad_norm": 0.06924063712358475, "learning_rate": 9.905981570102672e-07, "loss": 0.0152, "step": 152400 }, { "epoch": 1.62839895293552, "grad_norm": 1.4832361936569214, "learning_rate": 9.90594913958863e-07, "loss": 0.1236, "step": 152410 }, { "epoch": 1.6285057962497995, "grad_norm": 0.2315526306629181, "learning_rate": 9.9059167035354e-07, "loss": 0.032, "step": 152420 }, { "epoch": 1.6286126395640794, "grad_norm": 0.190148264169693, "learning_rate": 9.90588426194302e-07, "loss": 0.0079, "step": 152430 }, { "epoch": 1.6287194828783589, "grad_norm": 0.04188429191708565, "learning_rate": 9.905851814811529e-07, "loss": 0.0157, "step": 152440 }, { "epoch": 1.6288263261926383, "grad_norm": 4.168465614318848, "learning_rate": 9.905819362140958e-07, "loss": 0.028, "step": 152450 }, { "epoch": 1.6289331695069182, "grad_norm": 1.4927424192428589, "learning_rate": 9.905786903931348e-07, "loss": 0.0263, "step": 152460 }, { "epoch": 1.6290400128211977, "grad_norm": 0.2112739086151123, "learning_rate": 9.905754440182734e-07, "loss": 0.0269, "step": 152470 }, { "epoch": 1.6291468561354772, "grad_norm": 13.015846252441406, "learning_rate": 9.905721970895154e-07, "loss": 0.0136, "step": 152480 }, { "epoch": 1.629253699449757, "grad_norm": 2.2524664402008057, "learning_rate": 9.905689496068642e-07, "loss": 0.0149, "step": 152490 }, { "epoch": 1.6293605427640365, "grad_norm": 0.4187817871570587, "learning_rate": 9.905657015703237e-07, "loss": 0.0234, "step": 152500 }, { "epoch": 1.6294673860783162, "grad_norm": 3.750555992126465, "learning_rate": 9.905624529798975e-07, "loss": 0.0197, "step": 152510 }, { "epoch": 1.6295742293925959, "grad_norm": 0.002463381038978696, "learning_rate": 9.905592038355891e-07, "loss": 0.0245, "step": 152520 }, { "epoch": 1.6296810727068753, "grad_norm": 1.822045922279358, "learning_rate": 9.905559541374025e-07, "loss": 0.0123, "step": 152530 }, { "epoch": 1.629787916021155, "grad_norm": 7.568181991577148, "learning_rate": 9.905527038853412e-07, "loss": 0.0388, "step": 152540 }, { "epoch": 1.6298947593354347, "grad_norm": 0.06920492649078369, "learning_rate": 9.90549453079409e-07, "loss": 0.0035, "step": 152550 }, { "epoch": 1.6300016026497142, "grad_norm": 9.891860008239746, "learning_rate": 9.905462017196094e-07, "loss": 0.05, "step": 152560 }, { "epoch": 1.6301084459639938, "grad_norm": 7.291654586791992, "learning_rate": 9.905429498059458e-07, "loss": 0.0201, "step": 152570 }, { "epoch": 1.6302152892782735, "grad_norm": 2.4346718788146973, "learning_rate": 9.905396973384225e-07, "loss": 0.0297, "step": 152580 }, { "epoch": 1.630322132592553, "grad_norm": 0.6848670840263367, "learning_rate": 9.905364443170427e-07, "loss": 0.0275, "step": 152590 }, { "epoch": 1.6304289759068327, "grad_norm": 5.659868240356445, "learning_rate": 9.905331907418105e-07, "loss": 0.0127, "step": 152600 }, { "epoch": 1.6305358192211123, "grad_norm": 3.935349702835083, "learning_rate": 9.90529936612729e-07, "loss": 0.0229, "step": 152610 }, { "epoch": 1.6306426625353918, "grad_norm": 0.018669504672288895, "learning_rate": 9.905266819298025e-07, "loss": 0.0486, "step": 152620 }, { "epoch": 1.6307495058496715, "grad_norm": 0.7701820135116577, "learning_rate": 9.90523426693034e-07, "loss": 0.07, "step": 152630 }, { "epoch": 1.6308563491639512, "grad_norm": 1.7493619918823242, "learning_rate": 9.905201709024276e-07, "loss": 0.0338, "step": 152640 }, { "epoch": 1.6309631924782306, "grad_norm": 1.1370911598205566, "learning_rate": 9.90516914557987e-07, "loss": 0.0255, "step": 152650 }, { "epoch": 1.6310700357925103, "grad_norm": 2.3606696128845215, "learning_rate": 9.905136576597158e-07, "loss": 0.0455, "step": 152660 }, { "epoch": 1.63117687910679, "grad_norm": 0.04766429215669632, "learning_rate": 9.905104002076176e-07, "loss": 0.0311, "step": 152670 }, { "epoch": 1.6312837224210694, "grad_norm": 4.595805644989014, "learning_rate": 9.905071422016963e-07, "loss": 0.0149, "step": 152680 }, { "epoch": 1.6313905657353491, "grad_norm": 0.20173996686935425, "learning_rate": 9.905038836419554e-07, "loss": 0.0457, "step": 152690 }, { "epoch": 1.6314974090496288, "grad_norm": 2.8041207790374756, "learning_rate": 9.905006245283982e-07, "loss": 0.0303, "step": 152700 }, { "epoch": 1.6316042523639083, "grad_norm": 2.424903392791748, "learning_rate": 9.904973648610292e-07, "loss": 0.0123, "step": 152710 }, { "epoch": 1.631711095678188, "grad_norm": 2.9516549110412598, "learning_rate": 9.904941046398513e-07, "loss": 0.0299, "step": 152720 }, { "epoch": 1.6318179389924676, "grad_norm": 1.3789654970169067, "learning_rate": 9.90490843864869e-07, "loss": 0.013, "step": 152730 }, { "epoch": 1.631924782306747, "grad_norm": 2.9783973693847656, "learning_rate": 9.904875825360852e-07, "loss": 0.0245, "step": 152740 }, { "epoch": 1.6320316256210268, "grad_norm": 5.566897392272949, "learning_rate": 9.904843206535039e-07, "loss": 0.0334, "step": 152750 }, { "epoch": 1.6321384689353065, "grad_norm": 2.2683920860290527, "learning_rate": 9.904810582171288e-07, "loss": 0.0531, "step": 152760 }, { "epoch": 1.632245312249586, "grad_norm": 0.17240208387374878, "learning_rate": 9.904777952269637e-07, "loss": 0.0343, "step": 152770 }, { "epoch": 1.6323521555638656, "grad_norm": 1.4923101663589478, "learning_rate": 9.904745316830121e-07, "loss": 0.0196, "step": 152780 }, { "epoch": 1.6324589988781453, "grad_norm": 0.5400791168212891, "learning_rate": 9.904712675852775e-07, "loss": 0.0082, "step": 152790 }, { "epoch": 1.6325658421924247, "grad_norm": 0.059969671070575714, "learning_rate": 9.90468002933764e-07, "loss": 0.0073, "step": 152800 }, { "epoch": 1.6326726855067044, "grad_norm": 0.25166764855384827, "learning_rate": 9.904647377284752e-07, "loss": 0.0169, "step": 152810 }, { "epoch": 1.632779528820984, "grad_norm": 14.195320129394531, "learning_rate": 9.904614719694146e-07, "loss": 0.0399, "step": 152820 }, { "epoch": 1.6328863721352636, "grad_norm": 0.03587418049573898, "learning_rate": 9.90458205656586e-07, "loss": 0.007, "step": 152830 }, { "epoch": 1.6329932154495432, "grad_norm": 8.322964668273926, "learning_rate": 9.904549387899932e-07, "loss": 0.04, "step": 152840 }, { "epoch": 1.633100058763823, "grad_norm": 2.6597208976745605, "learning_rate": 9.904516713696397e-07, "loss": 0.0402, "step": 152850 }, { "epoch": 1.6332069020781024, "grad_norm": 2.1197400093078613, "learning_rate": 9.904484033955292e-07, "loss": 0.0294, "step": 152860 }, { "epoch": 1.633313745392382, "grad_norm": 0.12271320074796677, "learning_rate": 9.904451348676653e-07, "loss": 0.0295, "step": 152870 }, { "epoch": 1.6334205887066617, "grad_norm": 0.8908180594444275, "learning_rate": 9.904418657860519e-07, "loss": 0.0532, "step": 152880 }, { "epoch": 1.6335274320209412, "grad_norm": 2.4202942848205566, "learning_rate": 9.904385961506928e-07, "loss": 0.03, "step": 152890 }, { "epoch": 1.6336342753352209, "grad_norm": 12.403018951416016, "learning_rate": 9.904353259615912e-07, "loss": 0.0438, "step": 152900 }, { "epoch": 1.6337411186495006, "grad_norm": 3.9578003883361816, "learning_rate": 9.904320552187513e-07, "loss": 0.068, "step": 152910 }, { "epoch": 1.63384796196378, "grad_norm": 0.2523144483566284, "learning_rate": 9.904287839221766e-07, "loss": 0.0225, "step": 152920 }, { "epoch": 1.6339548052780597, "grad_norm": 0.28591567277908325, "learning_rate": 9.904255120718708e-07, "loss": 0.0084, "step": 152930 }, { "epoch": 1.6340616485923394, "grad_norm": 0.4275622069835663, "learning_rate": 9.904222396678374e-07, "loss": 0.0295, "step": 152940 }, { "epoch": 1.6341684919066188, "grad_norm": 5.658146381378174, "learning_rate": 9.904189667100805e-07, "loss": 0.0176, "step": 152950 }, { "epoch": 1.6342753352208985, "grad_norm": 3.555041551589966, "learning_rate": 9.904156931986035e-07, "loss": 0.0638, "step": 152960 }, { "epoch": 1.6343821785351782, "grad_norm": 5.133935928344727, "learning_rate": 9.904124191334102e-07, "loss": 0.0118, "step": 152970 }, { "epoch": 1.6344890218494577, "grad_norm": 3.5748422145843506, "learning_rate": 9.904091445145041e-07, "loss": 0.051, "step": 152980 }, { "epoch": 1.6345958651637373, "grad_norm": 8.752910614013672, "learning_rate": 9.904058693418893e-07, "loss": 0.056, "step": 152990 }, { "epoch": 1.634702708478017, "grad_norm": 2.193469762802124, "learning_rate": 9.904025936155692e-07, "loss": 0.0075, "step": 153000 }, { "epoch": 1.6348095517922965, "grad_norm": 10.189948081970215, "learning_rate": 9.903993173355475e-07, "loss": 0.0169, "step": 153010 }, { "epoch": 1.6349163951065762, "grad_norm": 2.2240536212921143, "learning_rate": 9.90396040501828e-07, "loss": 0.0412, "step": 153020 }, { "epoch": 1.6350232384208558, "grad_norm": 1.188961148262024, "learning_rate": 9.903927631144142e-07, "loss": 0.0244, "step": 153030 }, { "epoch": 1.6351300817351353, "grad_norm": 0.0773305743932724, "learning_rate": 9.9038948517331e-07, "loss": 0.0167, "step": 153040 }, { "epoch": 1.635236925049415, "grad_norm": 1.2674909830093384, "learning_rate": 9.903862066785191e-07, "loss": 0.0312, "step": 153050 }, { "epoch": 1.6353437683636947, "grad_norm": 0.05998974293470383, "learning_rate": 9.903829276300454e-07, "loss": 0.0508, "step": 153060 }, { "epoch": 1.6354506116779741, "grad_norm": 0.018562961369752884, "learning_rate": 9.90379648027892e-07, "loss": 0.0041, "step": 153070 }, { "epoch": 1.635557454992254, "grad_norm": 0.25881871581077576, "learning_rate": 9.903763678720632e-07, "loss": 0.0291, "step": 153080 }, { "epoch": 1.6356642983065335, "grad_norm": 5.8655171394348145, "learning_rate": 9.903730871625623e-07, "loss": 0.0664, "step": 153090 }, { "epoch": 1.635771141620813, "grad_norm": 3.6634249687194824, "learning_rate": 9.903698058993935e-07, "loss": 0.05, "step": 153100 }, { "epoch": 1.6358779849350928, "grad_norm": 6.267492771148682, "learning_rate": 9.903665240825599e-07, "loss": 0.0239, "step": 153110 }, { "epoch": 1.6359848282493723, "grad_norm": 0.3548768162727356, "learning_rate": 9.903632417120655e-07, "loss": 0.0041, "step": 153120 }, { "epoch": 1.6360916715636518, "grad_norm": 0.06897001713514328, "learning_rate": 9.903599587879142e-07, "loss": 0.0364, "step": 153130 }, { "epoch": 1.6361985148779317, "grad_norm": 0.8179434537887573, "learning_rate": 9.903566753101093e-07, "loss": 0.0244, "step": 153140 }, { "epoch": 1.6363053581922111, "grad_norm": 0.22285178303718567, "learning_rate": 9.903533912786547e-07, "loss": 0.0423, "step": 153150 }, { "epoch": 1.6364122015064906, "grad_norm": 13.104453086853027, "learning_rate": 9.903501066935541e-07, "loss": 0.0585, "step": 153160 }, { "epoch": 1.6365190448207705, "grad_norm": 3.622030258178711, "learning_rate": 9.903468215548112e-07, "loss": 0.1646, "step": 153170 }, { "epoch": 1.63662588813505, "grad_norm": 0.13339835405349731, "learning_rate": 9.9034353586243e-07, "loss": 0.0481, "step": 153180 }, { "epoch": 1.6367327314493294, "grad_norm": 2.2188143730163574, "learning_rate": 9.903402496164137e-07, "loss": 0.0342, "step": 153190 }, { "epoch": 1.6368395747636093, "grad_norm": 4.229193687438965, "learning_rate": 9.903369628167662e-07, "loss": 0.0615, "step": 153200 }, { "epoch": 1.6369464180778888, "grad_norm": 0.4140540659427643, "learning_rate": 9.903336754634915e-07, "loss": 0.0166, "step": 153210 }, { "epoch": 1.6370532613921682, "grad_norm": 4.855257987976074, "learning_rate": 9.903303875565928e-07, "loss": 0.0251, "step": 153220 }, { "epoch": 1.6371601047064481, "grad_norm": 11.417579650878906, "learning_rate": 9.90327099096074e-07, "loss": 0.0632, "step": 153230 }, { "epoch": 1.6372669480207276, "grad_norm": 0.03256054222583771, "learning_rate": 9.903238100819393e-07, "loss": 0.0093, "step": 153240 }, { "epoch": 1.6373737913350073, "grad_norm": 0.01758662611246109, "learning_rate": 9.903205205141917e-07, "loss": 0.0404, "step": 153250 }, { "epoch": 1.637480634649287, "grad_norm": 0.031488820910453796, "learning_rate": 9.903172303928356e-07, "loss": 0.0052, "step": 153260 }, { "epoch": 1.6375874779635664, "grad_norm": 0.023004703223705292, "learning_rate": 9.90313939717874e-07, "loss": 0.0215, "step": 153270 }, { "epoch": 1.637694321277846, "grad_norm": 1.9946300983428955, "learning_rate": 9.90310648489311e-07, "loss": 0.0222, "step": 153280 }, { "epoch": 1.6378011645921258, "grad_norm": 2.412468910217285, "learning_rate": 9.903073567071502e-07, "loss": 0.0116, "step": 153290 }, { "epoch": 1.6379080079064052, "grad_norm": 0.012020634487271309, "learning_rate": 9.903040643713956e-07, "loss": 0.0336, "step": 153300 }, { "epoch": 1.638014851220685, "grad_norm": 0.7329776287078857, "learning_rate": 9.903007714820507e-07, "loss": 0.0345, "step": 153310 }, { "epoch": 1.6381216945349646, "grad_norm": 2.3106319904327393, "learning_rate": 9.902974780391192e-07, "loss": 0.0207, "step": 153320 }, { "epoch": 1.638228537849244, "grad_norm": 0.024476397782564163, "learning_rate": 9.902941840426046e-07, "loss": 0.0277, "step": 153330 }, { "epoch": 1.6383353811635237, "grad_norm": 0.6800007820129395, "learning_rate": 9.90290889492511e-07, "loss": 0.0577, "step": 153340 }, { "epoch": 1.6384422244778034, "grad_norm": 0.8319754600524902, "learning_rate": 9.90287594388842e-07, "loss": 0.0258, "step": 153350 }, { "epoch": 1.6385490677920829, "grad_norm": 5.362072944641113, "learning_rate": 9.902842987316015e-07, "loss": 0.0533, "step": 153360 }, { "epoch": 1.6386559111063626, "grad_norm": 0.028799045830965042, "learning_rate": 9.902810025207929e-07, "loss": 0.0092, "step": 153370 }, { "epoch": 1.6387627544206422, "grad_norm": 0.4207594692707062, "learning_rate": 9.9027770575642e-07, "loss": 0.0011, "step": 153380 }, { "epoch": 1.6388695977349217, "grad_norm": 0.9512919187545776, "learning_rate": 9.902744084384865e-07, "loss": 0.0261, "step": 153390 }, { "epoch": 1.6389764410492014, "grad_norm": 9.91856861114502, "learning_rate": 9.902711105669963e-07, "loss": 0.0462, "step": 153400 }, { "epoch": 1.639083284363481, "grad_norm": 0.2195681929588318, "learning_rate": 9.90267812141953e-07, "loss": 0.0402, "step": 153410 }, { "epoch": 1.6391901276777605, "grad_norm": 0.045170754194259644, "learning_rate": 9.902645131633602e-07, "loss": 0.0331, "step": 153420 }, { "epoch": 1.6392969709920402, "grad_norm": 0.17157728970050812, "learning_rate": 9.902612136312217e-07, "loss": 0.0188, "step": 153430 }, { "epoch": 1.6394038143063199, "grad_norm": 3.9395885467529297, "learning_rate": 9.902579135455415e-07, "loss": 0.0284, "step": 153440 }, { "epoch": 1.6395106576205993, "grad_norm": 0.009970791637897491, "learning_rate": 9.90254612906323e-07, "loss": 0.0134, "step": 153450 }, { "epoch": 1.639617500934879, "grad_norm": 7.222245693206787, "learning_rate": 9.902513117135702e-07, "loss": 0.0205, "step": 153460 }, { "epoch": 1.6397243442491587, "grad_norm": 1.2735651731491089, "learning_rate": 9.902480099672867e-07, "loss": 0.0153, "step": 153470 }, { "epoch": 1.6398311875634382, "grad_norm": 4.0700907707214355, "learning_rate": 9.90244707667476e-07, "loss": 0.028, "step": 153480 }, { "epoch": 1.6399380308777178, "grad_norm": 9.908720970153809, "learning_rate": 9.902414048141421e-07, "loss": 0.0377, "step": 153490 }, { "epoch": 1.6400448741919975, "grad_norm": 0.4528590738773346, "learning_rate": 9.902381014072887e-07, "loss": 0.0444, "step": 153500 }, { "epoch": 1.640151717506277, "grad_norm": 0.5829214453697205, "learning_rate": 9.902347974469193e-07, "loss": 0.0422, "step": 153510 }, { "epoch": 1.6402585608205567, "grad_norm": 6.117438316345215, "learning_rate": 9.90231492933038e-07, "loss": 0.0668, "step": 153520 }, { "epoch": 1.6403654041348363, "grad_norm": 0.1482098549604416, "learning_rate": 9.902281878656483e-07, "loss": 0.0526, "step": 153530 }, { "epoch": 1.6404722474491158, "grad_norm": 0.7940973043441772, "learning_rate": 9.90224882244754e-07, "loss": 0.0118, "step": 153540 }, { "epoch": 1.6405790907633955, "grad_norm": 1.225832223892212, "learning_rate": 9.90221576070359e-07, "loss": 0.031, "step": 153550 }, { "epoch": 1.6406859340776752, "grad_norm": 1.9717798233032227, "learning_rate": 9.902182693424666e-07, "loss": 0.0133, "step": 153560 }, { "epoch": 1.6407927773919546, "grad_norm": 5.901172637939453, "learning_rate": 9.902149620610807e-07, "loss": 0.0609, "step": 153570 }, { "epoch": 1.6408996207062343, "grad_norm": 2.622941017150879, "learning_rate": 9.902116542262052e-07, "loss": 0.0403, "step": 153580 }, { "epoch": 1.641006464020514, "grad_norm": 0.7582464814186096, "learning_rate": 9.902083458378436e-07, "loss": 0.0619, "step": 153590 }, { "epoch": 1.6411133073347934, "grad_norm": 15.290084838867188, "learning_rate": 9.90205036896e-07, "loss": 0.0407, "step": 153600 }, { "epoch": 1.6412201506490731, "grad_norm": 18.91720199584961, "learning_rate": 9.90201727400678e-07, "loss": 0.0443, "step": 153610 }, { "epoch": 1.6413269939633528, "grad_norm": 2.510272264480591, "learning_rate": 9.901984173518811e-07, "loss": 0.0173, "step": 153620 }, { "epoch": 1.6414338372776323, "grad_norm": 1.8343433141708374, "learning_rate": 9.90195106749613e-07, "loss": 0.0192, "step": 153630 }, { "epoch": 1.641540680591912, "grad_norm": 0.040749210864305496, "learning_rate": 9.90191795593878e-07, "loss": 0.0274, "step": 153640 }, { "epoch": 1.6416475239061916, "grad_norm": 1.5378825664520264, "learning_rate": 9.901884838846792e-07, "loss": 0.0266, "step": 153650 }, { "epoch": 1.641754367220471, "grad_norm": 6.679884910583496, "learning_rate": 9.901851716220207e-07, "loss": 0.018, "step": 153660 }, { "epoch": 1.6418612105347508, "grad_norm": 1.8212966918945312, "learning_rate": 9.90181858805906e-07, "loss": 0.0381, "step": 153670 }, { "epoch": 1.6419680538490304, "grad_norm": 0.7361219525337219, "learning_rate": 9.901785454363393e-07, "loss": 0.0087, "step": 153680 }, { "epoch": 1.64207489716331, "grad_norm": 5.664026260375977, "learning_rate": 9.901752315133239e-07, "loss": 0.0295, "step": 153690 }, { "epoch": 1.6421817404775896, "grad_norm": 0.37049856781959534, "learning_rate": 9.901719170368635e-07, "loss": 0.0434, "step": 153700 }, { "epoch": 1.6422885837918693, "grad_norm": 2.089174747467041, "learning_rate": 9.90168602006962e-07, "loss": 0.0315, "step": 153710 }, { "epoch": 1.6423954271061487, "grad_norm": 0.820962131023407, "learning_rate": 9.901652864236233e-07, "loss": 0.0177, "step": 153720 }, { "epoch": 1.6425022704204284, "grad_norm": 0.7626073360443115, "learning_rate": 9.90161970286851e-07, "loss": 0.0391, "step": 153730 }, { "epoch": 1.642609113734708, "grad_norm": 0.11354613304138184, "learning_rate": 9.90158653596649e-07, "loss": 0.0032, "step": 153740 }, { "epoch": 1.6427159570489875, "grad_norm": 0.11033277213573456, "learning_rate": 9.901553363530207e-07, "loss": 0.0545, "step": 153750 }, { "epoch": 1.6428228003632672, "grad_norm": 5.4938435554504395, "learning_rate": 9.9015201855597e-07, "loss": 0.0109, "step": 153760 }, { "epoch": 1.642929643677547, "grad_norm": 0.2999354898929596, "learning_rate": 9.901487002055007e-07, "loss": 0.0157, "step": 153770 }, { "epoch": 1.6430364869918264, "grad_norm": 1.7546892166137695, "learning_rate": 9.901453813016166e-07, "loss": 0.0214, "step": 153780 }, { "epoch": 1.643143330306106, "grad_norm": 5.80549955368042, "learning_rate": 9.901420618443212e-07, "loss": 0.0263, "step": 153790 }, { "epoch": 1.6432501736203857, "grad_norm": 0.018130755051970482, "learning_rate": 9.901387418336186e-07, "loss": 0.0261, "step": 153800 }, { "epoch": 1.6433570169346652, "grad_norm": 0.15863817930221558, "learning_rate": 9.901354212695122e-07, "loss": 0.0381, "step": 153810 }, { "epoch": 1.643463860248945, "grad_norm": 0.16528810560703278, "learning_rate": 9.90132100152006e-07, "loss": 0.0511, "step": 153820 }, { "epoch": 1.6435707035632245, "grad_norm": 7.447005271911621, "learning_rate": 9.901287784811038e-07, "loss": 0.0497, "step": 153830 }, { "epoch": 1.643677546877504, "grad_norm": 4.530228614807129, "learning_rate": 9.90125456256809e-07, "loss": 0.0418, "step": 153840 }, { "epoch": 1.643784390191784, "grad_norm": 1.5226892232894897, "learning_rate": 9.901221334791259e-07, "loss": 0.0466, "step": 153850 }, { "epoch": 1.6438912335060634, "grad_norm": 6.822268486022949, "learning_rate": 9.901188101480574e-07, "loss": 0.0243, "step": 153860 }, { "epoch": 1.6439980768203428, "grad_norm": 0.03629513084888458, "learning_rate": 9.901154862636082e-07, "loss": 0.0752, "step": 153870 }, { "epoch": 1.6441049201346227, "grad_norm": 2.839404821395874, "learning_rate": 9.901121618257815e-07, "loss": 0.083, "step": 153880 }, { "epoch": 1.6442117634489022, "grad_norm": 3.7308056354522705, "learning_rate": 9.90108836834581e-07, "loss": 0.0169, "step": 153890 }, { "epoch": 1.6443186067631816, "grad_norm": 22.55000114440918, "learning_rate": 9.90105511290011e-07, "loss": 0.0242, "step": 153900 }, { "epoch": 1.6444254500774615, "grad_norm": 0.21867598593235016, "learning_rate": 9.901021851920745e-07, "loss": 0.011, "step": 153910 }, { "epoch": 1.644532293391741, "grad_norm": 4.605453014373779, "learning_rate": 9.900988585407758e-07, "loss": 0.0583, "step": 153920 }, { "epoch": 1.6446391367060205, "grad_norm": 6.242545127868652, "learning_rate": 9.900955313361186e-07, "loss": 0.0282, "step": 153930 }, { "epoch": 1.6447459800203004, "grad_norm": 0.4669071435928345, "learning_rate": 9.900922035781066e-07, "loss": 0.0108, "step": 153940 }, { "epoch": 1.6448528233345798, "grad_norm": 0.2962847650051117, "learning_rate": 9.900888752667434e-07, "loss": 0.0961, "step": 153950 }, { "epoch": 1.6449596666488593, "grad_norm": 2.5032784938812256, "learning_rate": 9.900855464020327e-07, "loss": 0.0231, "step": 153960 }, { "epoch": 1.6450665099631392, "grad_norm": 0.005480502732098103, "learning_rate": 9.900822169839788e-07, "loss": 0.0132, "step": 153970 }, { "epoch": 1.6451733532774186, "grad_norm": 0.021993841975927353, "learning_rate": 9.900788870125848e-07, "loss": 0.0614, "step": 153980 }, { "epoch": 1.6452801965916983, "grad_norm": 2.9181950092315674, "learning_rate": 9.900755564878549e-07, "loss": 0.0578, "step": 153990 }, { "epoch": 1.645387039905978, "grad_norm": 0.01637900248169899, "learning_rate": 9.900722254097926e-07, "loss": 0.0198, "step": 154000 }, { "epoch": 1.6454938832202575, "grad_norm": 5.545902729034424, "learning_rate": 9.900688937784019e-07, "loss": 0.0406, "step": 154010 }, { "epoch": 1.6456007265345372, "grad_norm": 0.08006388694047928, "learning_rate": 9.900655615936862e-07, "loss": 0.0617, "step": 154020 }, { "epoch": 1.6457075698488168, "grad_norm": 8.276723861694336, "learning_rate": 9.900622288556496e-07, "loss": 0.0284, "step": 154030 }, { "epoch": 1.6458144131630963, "grad_norm": 5.009188652038574, "learning_rate": 9.900588955642958e-07, "loss": 0.0596, "step": 154040 }, { "epoch": 1.645921256477376, "grad_norm": 4.3897223472595215, "learning_rate": 9.900555617196287e-07, "loss": 0.0186, "step": 154050 }, { "epoch": 1.6460280997916557, "grad_norm": 2.5699918270111084, "learning_rate": 9.900522273216515e-07, "loss": 0.0259, "step": 154060 }, { "epoch": 1.6461349431059351, "grad_norm": 2.6199283599853516, "learning_rate": 9.900488923703687e-07, "loss": 0.0202, "step": 154070 }, { "epoch": 1.6462417864202148, "grad_norm": 0.004401710350066423, "learning_rate": 9.900455568657834e-07, "loss": 0.0283, "step": 154080 }, { "epoch": 1.6463486297344945, "grad_norm": 0.34797346591949463, "learning_rate": 9.900422208079e-07, "loss": 0.0167, "step": 154090 }, { "epoch": 1.646455473048774, "grad_norm": 0.0312928706407547, "learning_rate": 9.900388841967218e-07, "loss": 0.0283, "step": 154100 }, { "epoch": 1.6465623163630536, "grad_norm": 0.9893326163291931, "learning_rate": 9.900355470322528e-07, "loss": 0.0265, "step": 154110 }, { "epoch": 1.6466691596773333, "grad_norm": 2.0535686016082764, "learning_rate": 9.900322093144964e-07, "loss": 0.0401, "step": 154120 }, { "epoch": 1.6467760029916128, "grad_norm": 2.528224229812622, "learning_rate": 9.90028871043457e-07, "loss": 0.0844, "step": 154130 }, { "epoch": 1.6468828463058924, "grad_norm": 0.2882544994354248, "learning_rate": 9.900255322191377e-07, "loss": 0.0178, "step": 154140 }, { "epoch": 1.6469896896201721, "grad_norm": 0.15670795738697052, "learning_rate": 9.900221928415428e-07, "loss": 0.0517, "step": 154150 }, { "epoch": 1.6470965329344516, "grad_norm": 1.9614436626434326, "learning_rate": 9.900188529106759e-07, "loss": 0.05, "step": 154160 }, { "epoch": 1.6472033762487313, "grad_norm": 2.136723756790161, "learning_rate": 9.900155124265406e-07, "loss": 0.0132, "step": 154170 }, { "epoch": 1.647310219563011, "grad_norm": 2.3962929248809814, "learning_rate": 9.90012171389141e-07, "loss": 0.0676, "step": 154180 }, { "epoch": 1.6474170628772904, "grad_norm": 5.185539722442627, "learning_rate": 9.900088297984804e-07, "loss": 0.0629, "step": 154190 }, { "epoch": 1.64752390619157, "grad_norm": 1.4714107513427734, "learning_rate": 9.900054876545629e-07, "loss": 0.0701, "step": 154200 }, { "epoch": 1.6476307495058498, "grad_norm": 3.0532608032226562, "learning_rate": 9.900021449573922e-07, "loss": 0.0165, "step": 154210 }, { "epoch": 1.6477375928201292, "grad_norm": 0.07839778065681458, "learning_rate": 9.899988017069723e-07, "loss": 0.0281, "step": 154220 }, { "epoch": 1.647844436134409, "grad_norm": 0.07285176217556, "learning_rate": 9.899954579033068e-07, "loss": 0.0352, "step": 154230 }, { "epoch": 1.6479512794486886, "grad_norm": 1.7331303358078003, "learning_rate": 9.899921135463993e-07, "loss": 0.0173, "step": 154240 }, { "epoch": 1.648058122762968, "grad_norm": 0.032408926635980606, "learning_rate": 9.899887686362536e-07, "loss": 0.0129, "step": 154250 }, { "epoch": 1.6481649660772477, "grad_norm": 3.334165334701538, "learning_rate": 9.899854231728737e-07, "loss": 0.0557, "step": 154260 }, { "epoch": 1.6482718093915274, "grad_norm": 0.3765081465244293, "learning_rate": 9.899820771562635e-07, "loss": 0.0264, "step": 154270 }, { "epoch": 1.6483786527058069, "grad_norm": 1.652772068977356, "learning_rate": 9.899787305864263e-07, "loss": 0.02, "step": 154280 }, { "epoch": 1.6484854960200865, "grad_norm": 6.319279670715332, "learning_rate": 9.899753834633662e-07, "loss": 0.0787, "step": 154290 }, { "epoch": 1.6485923393343662, "grad_norm": 0.15246397256851196, "learning_rate": 9.899720357870869e-07, "loss": 0.015, "step": 154300 }, { "epoch": 1.6486991826486457, "grad_norm": 6.518722057342529, "learning_rate": 9.899686875575921e-07, "loss": 0.1489, "step": 154310 }, { "epoch": 1.6488060259629254, "grad_norm": 0.19033250212669373, "learning_rate": 9.89965338774886e-07, "loss": 0.0183, "step": 154320 }, { "epoch": 1.648912869277205, "grad_norm": 0.17695069313049316, "learning_rate": 9.899619894389718e-07, "loss": 0.0396, "step": 154330 }, { "epoch": 1.6490197125914845, "grad_norm": 0.0524626150727272, "learning_rate": 9.899586395498534e-07, "loss": 0.0132, "step": 154340 }, { "epoch": 1.6491265559057642, "grad_norm": 0.17881755530834198, "learning_rate": 9.899552891075348e-07, "loss": 0.0562, "step": 154350 }, { "epoch": 1.6492333992200439, "grad_norm": 0.3065849542617798, "learning_rate": 9.899519381120199e-07, "loss": 0.0151, "step": 154360 }, { "epoch": 1.6493402425343233, "grad_norm": 0.6325265169143677, "learning_rate": 9.899485865633122e-07, "loss": 0.0202, "step": 154370 }, { "epoch": 1.649447085848603, "grad_norm": 0.059889305382966995, "learning_rate": 9.899452344614155e-07, "loss": 0.0265, "step": 154380 }, { "epoch": 1.6495539291628827, "grad_norm": 0.6489498615264893, "learning_rate": 9.899418818063337e-07, "loss": 0.0134, "step": 154390 }, { "epoch": 1.6496607724771621, "grad_norm": 0.15106579661369324, "learning_rate": 9.899385285980705e-07, "loss": 0.0856, "step": 154400 }, { "epoch": 1.6497676157914418, "grad_norm": 0.031439922749996185, "learning_rate": 9.899351748366296e-07, "loss": 0.0401, "step": 154410 }, { "epoch": 1.6498744591057215, "grad_norm": 1.3156218528747559, "learning_rate": 9.899318205220152e-07, "loss": 0.0323, "step": 154420 }, { "epoch": 1.649981302420001, "grad_norm": 1.2143027782440186, "learning_rate": 9.899284656542305e-07, "loss": 0.0104, "step": 154430 }, { "epoch": 1.6500881457342806, "grad_norm": 2.883160352706909, "learning_rate": 9.8992511023328e-07, "loss": 0.0361, "step": 154440 }, { "epoch": 1.6501949890485603, "grad_norm": 0.07608222961425781, "learning_rate": 9.899217542591666e-07, "loss": 0.0282, "step": 154450 }, { "epoch": 1.6503018323628398, "grad_norm": 0.5456886291503906, "learning_rate": 9.899183977318949e-07, "loss": 0.0464, "step": 154460 }, { "epoch": 1.6504086756771195, "grad_norm": 0.2157178819179535, "learning_rate": 9.89915040651468e-07, "loss": 0.0138, "step": 154470 }, { "epoch": 1.6505155189913991, "grad_norm": 1.5492823123931885, "learning_rate": 9.899116830178904e-07, "loss": 0.0333, "step": 154480 }, { "epoch": 1.6506223623056786, "grad_norm": 0.04475069418549538, "learning_rate": 9.899083248311654e-07, "loss": 0.0761, "step": 154490 }, { "epoch": 1.6507292056199583, "grad_norm": 3.503938674926758, "learning_rate": 9.89904966091297e-07, "loss": 0.0298, "step": 154500 }, { "epoch": 1.650836048934238, "grad_norm": 0.0771859735250473, "learning_rate": 9.899016067982889e-07, "loss": 0.0128, "step": 154510 }, { "epoch": 1.6509428922485174, "grad_norm": 4.361909866333008, "learning_rate": 9.89898246952145e-07, "loss": 0.0659, "step": 154520 }, { "epoch": 1.651049735562797, "grad_norm": 1.1274980306625366, "learning_rate": 9.898948865528687e-07, "loss": 0.014, "step": 154530 }, { "epoch": 1.6511565788770768, "grad_norm": 0.16348715126514435, "learning_rate": 9.89891525600464e-07, "loss": 0.0231, "step": 154540 }, { "epoch": 1.6512634221913562, "grad_norm": 0.25416526198387146, "learning_rate": 9.898881640949353e-07, "loss": 0.0078, "step": 154550 }, { "epoch": 1.6513702655056361, "grad_norm": 0.0029159197583794594, "learning_rate": 9.898848020362856e-07, "loss": 0.0258, "step": 154560 }, { "epoch": 1.6514771088199156, "grad_norm": 0.2455170750617981, "learning_rate": 9.898814394245189e-07, "loss": 0.033, "step": 154570 }, { "epoch": 1.651583952134195, "grad_norm": 0.008021504618227482, "learning_rate": 9.898780762596395e-07, "loss": 0.0411, "step": 154580 }, { "epoch": 1.651690795448475, "grad_norm": 3.4939308166503906, "learning_rate": 9.898747125416503e-07, "loss": 0.028, "step": 154590 }, { "epoch": 1.6517976387627544, "grad_norm": 0.2073415368795395, "learning_rate": 9.898713482705557e-07, "loss": 0.0536, "step": 154600 }, { "epoch": 1.6519044820770339, "grad_norm": 0.5377894639968872, "learning_rate": 9.898679834463595e-07, "loss": 0.028, "step": 154610 }, { "epoch": 1.6520113253913138, "grad_norm": 0.9894507527351379, "learning_rate": 9.898646180690654e-07, "loss": 0.0338, "step": 154620 }, { "epoch": 1.6521181687055932, "grad_norm": 0.20559582114219666, "learning_rate": 9.898612521386772e-07, "loss": 0.0214, "step": 154630 }, { "epoch": 1.6522250120198727, "grad_norm": 0.3718545138835907, "learning_rate": 9.898578856551985e-07, "loss": 0.0169, "step": 154640 }, { "epoch": 1.6523318553341526, "grad_norm": 1.948286771774292, "learning_rate": 9.898545186186333e-07, "loss": 0.0409, "step": 154650 }, { "epoch": 1.652438698648432, "grad_norm": 0.10553892701864243, "learning_rate": 9.898511510289854e-07, "loss": 0.034, "step": 154660 }, { "epoch": 1.6525455419627115, "grad_norm": 5.828995704650879, "learning_rate": 9.898477828862586e-07, "loss": 0.0272, "step": 154670 }, { "epoch": 1.6526523852769914, "grad_norm": 7.768517017364502, "learning_rate": 9.898444141904566e-07, "loss": 0.045, "step": 154680 }, { "epoch": 1.652759228591271, "grad_norm": 0.6583043336868286, "learning_rate": 9.898410449415835e-07, "loss": 0.0076, "step": 154690 }, { "epoch": 1.6528660719055503, "grad_norm": 26.211273193359375, "learning_rate": 9.898376751396427e-07, "loss": 0.0392, "step": 154700 }, { "epoch": 1.6529729152198303, "grad_norm": 0.05215781182050705, "learning_rate": 9.898343047846383e-07, "loss": 0.0037, "step": 154710 }, { "epoch": 1.6530797585341097, "grad_norm": 7.017354965209961, "learning_rate": 9.89830933876574e-07, "loss": 0.0565, "step": 154720 }, { "epoch": 1.6531866018483894, "grad_norm": 1.8349025249481201, "learning_rate": 9.898275624154534e-07, "loss": 0.0302, "step": 154730 }, { "epoch": 1.653293445162669, "grad_norm": 5.334396839141846, "learning_rate": 9.898241904012807e-07, "loss": 0.0419, "step": 154740 }, { "epoch": 1.6534002884769485, "grad_norm": 2.5496606826782227, "learning_rate": 9.898208178340596e-07, "loss": 0.0391, "step": 154750 }, { "epoch": 1.6535071317912282, "grad_norm": 4.175335884094238, "learning_rate": 9.898174447137936e-07, "loss": 0.0309, "step": 154760 }, { "epoch": 1.653613975105508, "grad_norm": 0.07726632058620453, "learning_rate": 9.89814071040487e-07, "loss": 0.0379, "step": 154770 }, { "epoch": 1.6537208184197874, "grad_norm": 4.848087310791016, "learning_rate": 9.898106968141432e-07, "loss": 0.0763, "step": 154780 }, { "epoch": 1.653827661734067, "grad_norm": 0.36030083894729614, "learning_rate": 9.898073220347662e-07, "loss": 0.0325, "step": 154790 }, { "epoch": 1.6539345050483467, "grad_norm": 0.07099981606006622, "learning_rate": 9.898039467023597e-07, "loss": 0.0389, "step": 154800 }, { "epoch": 1.6540413483626262, "grad_norm": 1.2422763109207153, "learning_rate": 9.898005708169277e-07, "loss": 0.0471, "step": 154810 }, { "epoch": 1.6541481916769059, "grad_norm": 3.453564167022705, "learning_rate": 9.897971943784738e-07, "loss": 0.0118, "step": 154820 }, { "epoch": 1.6542550349911855, "grad_norm": 0.23098774254322052, "learning_rate": 9.897938173870017e-07, "loss": 0.0173, "step": 154830 }, { "epoch": 1.654361878305465, "grad_norm": 0.410567969083786, "learning_rate": 9.897904398425157e-07, "loss": 0.0082, "step": 154840 }, { "epoch": 1.6544687216197447, "grad_norm": 4.155754089355469, "learning_rate": 9.897870617450193e-07, "loss": 0.0119, "step": 154850 }, { "epoch": 1.6545755649340244, "grad_norm": 0.0423567034304142, "learning_rate": 9.897836830945162e-07, "loss": 0.0392, "step": 154860 }, { "epoch": 1.6546824082483038, "grad_norm": 8.226482391357422, "learning_rate": 9.897803038910103e-07, "loss": 0.0462, "step": 154870 }, { "epoch": 1.6547892515625835, "grad_norm": 6.5391011238098145, "learning_rate": 9.897769241345058e-07, "loss": 0.0248, "step": 154880 }, { "epoch": 1.6548960948768632, "grad_norm": 0.03398219868540764, "learning_rate": 9.89773543825006e-07, "loss": 0.0313, "step": 154890 }, { "epoch": 1.6550029381911426, "grad_norm": 5.112606048583984, "learning_rate": 9.897701629625149e-07, "loss": 0.0703, "step": 154900 }, { "epoch": 1.6551097815054223, "grad_norm": 0.12051039934158325, "learning_rate": 9.897667815470362e-07, "loss": 0.0054, "step": 154910 }, { "epoch": 1.655216624819702, "grad_norm": 2.6963582038879395, "learning_rate": 9.89763399578574e-07, "loss": 0.0117, "step": 154920 }, { "epoch": 1.6553234681339815, "grad_norm": 3.0912551879882812, "learning_rate": 9.89760017057132e-07, "loss": 0.0693, "step": 154930 }, { "epoch": 1.6554303114482611, "grad_norm": 4.265449047088623, "learning_rate": 9.897566339827138e-07, "loss": 0.0277, "step": 154940 }, { "epoch": 1.6555371547625408, "grad_norm": 0.009727499447762966, "learning_rate": 9.897532503553236e-07, "loss": 0.0158, "step": 154950 }, { "epoch": 1.6556439980768203, "grad_norm": 0.5722072124481201, "learning_rate": 9.89749866174965e-07, "loss": 0.0196, "step": 154960 }, { "epoch": 1.6557508413911, "grad_norm": 1.9303455352783203, "learning_rate": 9.897464814416417e-07, "loss": 0.0325, "step": 154970 }, { "epoch": 1.6558576847053796, "grad_norm": 0.04856475815176964, "learning_rate": 9.897430961553576e-07, "loss": 0.04, "step": 154980 }, { "epoch": 1.655964528019659, "grad_norm": 0.5877832770347595, "learning_rate": 9.897397103161168e-07, "loss": 0.0298, "step": 154990 }, { "epoch": 1.6560713713339388, "grad_norm": 6.792555332183838, "learning_rate": 9.897363239239228e-07, "loss": 0.0331, "step": 155000 }, { "epoch": 1.6561782146482185, "grad_norm": 7.422120571136475, "learning_rate": 9.897329369787795e-07, "loss": 0.024, "step": 155010 }, { "epoch": 1.656285057962498, "grad_norm": 4.590315341949463, "learning_rate": 9.897295494806907e-07, "loss": 0.0304, "step": 155020 }, { "epoch": 1.6563919012767776, "grad_norm": 0.058379996567964554, "learning_rate": 9.897261614296604e-07, "loss": 0.0137, "step": 155030 }, { "epoch": 1.6564987445910573, "grad_norm": 0.06669759005308151, "learning_rate": 9.897227728256922e-07, "loss": 0.0157, "step": 155040 }, { "epoch": 1.6566055879053367, "grad_norm": 2.8731296062469482, "learning_rate": 9.8971938366879e-07, "loss": 0.0203, "step": 155050 }, { "epoch": 1.6567124312196164, "grad_norm": 0.074882872402668, "learning_rate": 9.897159939589577e-07, "loss": 0.0403, "step": 155060 }, { "epoch": 1.656819274533896, "grad_norm": 0.5004059672355652, "learning_rate": 9.897126036961991e-07, "loss": 0.0386, "step": 155070 }, { "epoch": 1.6569261178481756, "grad_norm": 5.844181060791016, "learning_rate": 9.89709212880518e-07, "loss": 0.035, "step": 155080 }, { "epoch": 1.6570329611624552, "grad_norm": 2.641867160797119, "learning_rate": 9.897058215119184e-07, "loss": 0.013, "step": 155090 }, { "epoch": 1.657139804476735, "grad_norm": 0.051619529724121094, "learning_rate": 9.897024295904038e-07, "loss": 0.0211, "step": 155100 }, { "epoch": 1.6572466477910144, "grad_norm": 2.554816961288452, "learning_rate": 9.896990371159783e-07, "loss": 0.0137, "step": 155110 }, { "epoch": 1.657353491105294, "grad_norm": 0.3653545379638672, "learning_rate": 9.896956440886455e-07, "loss": 0.0239, "step": 155120 }, { "epoch": 1.6574603344195737, "grad_norm": 4.605901718139648, "learning_rate": 9.896922505084094e-07, "loss": 0.024, "step": 155130 }, { "epoch": 1.6575671777338532, "grad_norm": 0.35003483295440674, "learning_rate": 9.89688856375274e-07, "loss": 0.0619, "step": 155140 }, { "epoch": 1.6576740210481329, "grad_norm": 3.4221882820129395, "learning_rate": 9.896854616892425e-07, "loss": 0.021, "step": 155150 }, { "epoch": 1.6577808643624126, "grad_norm": 4.753763198852539, "learning_rate": 9.896820664503193e-07, "loss": 0.0174, "step": 155160 }, { "epoch": 1.657887707676692, "grad_norm": 10.09208869934082, "learning_rate": 9.896786706585083e-07, "loss": 0.0273, "step": 155170 }, { "epoch": 1.6579945509909717, "grad_norm": 1.9477900266647339, "learning_rate": 9.896752743138131e-07, "loss": 0.0382, "step": 155180 }, { "epoch": 1.6581013943052514, "grad_norm": 0.36383771896362305, "learning_rate": 9.896718774162375e-07, "loss": 0.0215, "step": 155190 }, { "epoch": 1.6582082376195308, "grad_norm": 1.7082709074020386, "learning_rate": 9.896684799657852e-07, "loss": 0.0269, "step": 155200 }, { "epoch": 1.6583150809338105, "grad_norm": 4.782124042510986, "learning_rate": 9.896650819624603e-07, "loss": 0.0256, "step": 155210 }, { "epoch": 1.6584219242480902, "grad_norm": 1.8677090406417847, "learning_rate": 9.896616834062667e-07, "loss": 0.0067, "step": 155220 }, { "epoch": 1.6585287675623697, "grad_norm": 0.029872411862015724, "learning_rate": 9.89658284297208e-07, "loss": 0.0218, "step": 155230 }, { "epoch": 1.6586356108766493, "grad_norm": 7.733648777008057, "learning_rate": 9.896548846352883e-07, "loss": 0.0336, "step": 155240 }, { "epoch": 1.658742454190929, "grad_norm": 4.477642059326172, "learning_rate": 9.896514844205112e-07, "loss": 0.0201, "step": 155250 }, { "epoch": 1.6588492975052085, "grad_norm": 0.02772192284464836, "learning_rate": 9.896480836528806e-07, "loss": 0.0077, "step": 155260 }, { "epoch": 1.6589561408194882, "grad_norm": 0.026752451434731483, "learning_rate": 9.896446823324002e-07, "loss": 0.0299, "step": 155270 }, { "epoch": 1.6590629841337678, "grad_norm": 5.487338066101074, "learning_rate": 9.896412804590743e-07, "loss": 0.0195, "step": 155280 }, { "epoch": 1.6591698274480473, "grad_norm": 0.03551528602838516, "learning_rate": 9.896378780329062e-07, "loss": 0.0217, "step": 155290 }, { "epoch": 1.6592766707623272, "grad_norm": 2.8115720748901367, "learning_rate": 9.896344750539001e-07, "loss": 0.0273, "step": 155300 }, { "epoch": 1.6593835140766067, "grad_norm": 2.053471088409424, "learning_rate": 9.896310715220596e-07, "loss": 0.0459, "step": 155310 }, { "epoch": 1.6594903573908861, "grad_norm": 0.2074597328901291, "learning_rate": 9.896276674373889e-07, "loss": 0.0913, "step": 155320 }, { "epoch": 1.659597200705166, "grad_norm": 0.5934257507324219, "learning_rate": 9.896242627998915e-07, "loss": 0.0392, "step": 155330 }, { "epoch": 1.6597040440194455, "grad_norm": 3.270888328552246, "learning_rate": 9.896208576095713e-07, "loss": 0.0376, "step": 155340 }, { "epoch": 1.659810887333725, "grad_norm": 0.024499837309122086, "learning_rate": 9.89617451866432e-07, "loss": 0.0154, "step": 155350 }, { "epoch": 1.6599177306480049, "grad_norm": 0.06866806000471115, "learning_rate": 9.89614045570478e-07, "loss": 0.0288, "step": 155360 }, { "epoch": 1.6600245739622843, "grad_norm": 1.490256905555725, "learning_rate": 9.896106387217126e-07, "loss": 0.0555, "step": 155370 }, { "epoch": 1.6601314172765638, "grad_norm": 0.007051552180200815, "learning_rate": 9.896072313201398e-07, "loss": 0.0242, "step": 155380 }, { "epoch": 1.6602382605908437, "grad_norm": 3.6293044090270996, "learning_rate": 9.896038233657637e-07, "loss": 0.0261, "step": 155390 }, { "epoch": 1.6603451039051231, "grad_norm": 0.6810759902000427, "learning_rate": 9.896004148585878e-07, "loss": 0.031, "step": 155400 }, { "epoch": 1.6604519472194026, "grad_norm": 8.911938667297363, "learning_rate": 9.89597005798616e-07, "loss": 0.0176, "step": 155410 }, { "epoch": 1.6605587905336825, "grad_norm": 6.309167385101318, "learning_rate": 9.895935961858525e-07, "loss": 0.1419, "step": 155420 }, { "epoch": 1.660665633847962, "grad_norm": 0.6523099541664124, "learning_rate": 9.895901860203006e-07, "loss": 0.0338, "step": 155430 }, { "epoch": 1.6607724771622414, "grad_norm": 4.830713272094727, "learning_rate": 9.895867753019645e-07, "loss": 0.0266, "step": 155440 }, { "epoch": 1.6608793204765213, "grad_norm": 0.06311247497797012, "learning_rate": 9.89583364030848e-07, "loss": 0.0461, "step": 155450 }, { "epoch": 1.6609861637908008, "grad_norm": 3.163039207458496, "learning_rate": 9.895799522069549e-07, "loss": 0.0376, "step": 155460 }, { "epoch": 1.6610930071050805, "grad_norm": 5.247875213623047, "learning_rate": 9.895765398302892e-07, "loss": 0.0377, "step": 155470 }, { "epoch": 1.6611998504193601, "grad_norm": 1.0058597326278687, "learning_rate": 9.895731269008544e-07, "loss": 0.0429, "step": 155480 }, { "epoch": 1.6613066937336396, "grad_norm": 7.0882568359375, "learning_rate": 9.895697134186547e-07, "loss": 0.0533, "step": 155490 }, { "epoch": 1.6614135370479193, "grad_norm": 1.3514736890792847, "learning_rate": 9.89566299383694e-07, "loss": 0.0377, "step": 155500 }, { "epoch": 1.661520380362199, "grad_norm": 0.1882641464471817, "learning_rate": 9.895628847959758e-07, "loss": 0.0607, "step": 155510 }, { "epoch": 1.6616272236764784, "grad_norm": 6.353321552276611, "learning_rate": 9.895594696555042e-07, "loss": 0.0261, "step": 155520 }, { "epoch": 1.661734066990758, "grad_norm": 3.4208908081054688, "learning_rate": 9.895560539622832e-07, "loss": 0.0315, "step": 155530 }, { "epoch": 1.6618409103050378, "grad_norm": 0.20162604749202728, "learning_rate": 9.895526377163161e-07, "loss": 0.0203, "step": 155540 }, { "epoch": 1.6619477536193172, "grad_norm": 2.846210241317749, "learning_rate": 9.895492209176076e-07, "loss": 0.0262, "step": 155550 }, { "epoch": 1.662054596933597, "grad_norm": 0.49570202827453613, "learning_rate": 9.895458035661607e-07, "loss": 0.0456, "step": 155560 }, { "epoch": 1.6621614402478766, "grad_norm": 17.545333862304688, "learning_rate": 9.895423856619799e-07, "loss": 0.0943, "step": 155570 }, { "epoch": 1.662268283562156, "grad_norm": 2.9247734546661377, "learning_rate": 9.895389672050684e-07, "loss": 0.0044, "step": 155580 }, { "epoch": 1.6623751268764357, "grad_norm": 5.716531753540039, "learning_rate": 9.895355481954308e-07, "loss": 0.0366, "step": 155590 }, { "epoch": 1.6624819701907154, "grad_norm": 0.026533763855695724, "learning_rate": 9.895321286330704e-07, "loss": 0.0105, "step": 155600 }, { "epoch": 1.6625888135049949, "grad_norm": 3.139538526535034, "learning_rate": 9.895287085179915e-07, "loss": 0.0286, "step": 155610 }, { "epoch": 1.6626956568192746, "grad_norm": 0.0625438317656517, "learning_rate": 9.895252878501976e-07, "loss": 0.0194, "step": 155620 }, { "epoch": 1.6628025001335542, "grad_norm": 0.6316223740577698, "learning_rate": 9.89521866629693e-07, "loss": 0.0042, "step": 155630 }, { "epoch": 1.6629093434478337, "grad_norm": 0.07035337388515472, "learning_rate": 9.895184448564808e-07, "loss": 0.041, "step": 155640 }, { "epoch": 1.6630161867621134, "grad_norm": 1.022921085357666, "learning_rate": 9.895150225305656e-07, "loss": 0.0541, "step": 155650 }, { "epoch": 1.663123030076393, "grad_norm": 1.5026662349700928, "learning_rate": 9.89511599651951e-07, "loss": 0.0548, "step": 155660 }, { "epoch": 1.6632298733906725, "grad_norm": 0.8068578839302063, "learning_rate": 9.895081762206407e-07, "loss": 0.0273, "step": 155670 }, { "epoch": 1.6633367167049522, "grad_norm": 2.622525930404663, "learning_rate": 9.895047522366389e-07, "loss": 0.0038, "step": 155680 }, { "epoch": 1.6634435600192319, "grad_norm": 0.034841831773519516, "learning_rate": 9.895013276999493e-07, "loss": 0.0993, "step": 155690 }, { "epoch": 1.6635504033335113, "grad_norm": 7.422831058502197, "learning_rate": 9.894979026105757e-07, "loss": 0.0302, "step": 155700 }, { "epoch": 1.663657246647791, "grad_norm": 4.457785129547119, "learning_rate": 9.89494476968522e-07, "loss": 0.0227, "step": 155710 }, { "epoch": 1.6637640899620707, "grad_norm": 3.810692310333252, "learning_rate": 9.89491050773792e-07, "loss": 0.0631, "step": 155720 }, { "epoch": 1.6638709332763502, "grad_norm": 5.172441482543945, "learning_rate": 9.894876240263897e-07, "loss": 0.0415, "step": 155730 }, { "epoch": 1.6639777765906298, "grad_norm": 5.095653057098389, "learning_rate": 9.89484196726319e-07, "loss": 0.0573, "step": 155740 }, { "epoch": 1.6640846199049095, "grad_norm": 0.41591691970825195, "learning_rate": 9.894807688735837e-07, "loss": 0.0147, "step": 155750 }, { "epoch": 1.664191463219189, "grad_norm": 0.3829023540019989, "learning_rate": 9.894773404681877e-07, "loss": 0.0977, "step": 155760 }, { "epoch": 1.6642983065334687, "grad_norm": 2.7983925342559814, "learning_rate": 9.894739115101346e-07, "loss": 0.037, "step": 155770 }, { "epoch": 1.6644051498477483, "grad_norm": 4.022377014160156, "learning_rate": 9.894704819994288e-07, "loss": 0.0449, "step": 155780 }, { "epoch": 1.6645119931620278, "grad_norm": 0.13417969644069672, "learning_rate": 9.89467051936074e-07, "loss": 0.0247, "step": 155790 }, { "epoch": 1.6646188364763075, "grad_norm": 4.4759297370910645, "learning_rate": 9.894636213200735e-07, "loss": 0.0729, "step": 155800 }, { "epoch": 1.6647256797905872, "grad_norm": 0.8352629542350769, "learning_rate": 9.89460190151432e-07, "loss": 0.0906, "step": 155810 }, { "epoch": 1.6648325231048666, "grad_norm": 2.1161386966705322, "learning_rate": 9.894567584301528e-07, "loss": 0.0516, "step": 155820 }, { "epoch": 1.6649393664191463, "grad_norm": 0.16930343210697174, "learning_rate": 9.894533261562402e-07, "loss": 0.0106, "step": 155830 }, { "epoch": 1.665046209733426, "grad_norm": 9.892073631286621, "learning_rate": 9.894498933296977e-07, "loss": 0.0323, "step": 155840 }, { "epoch": 1.6651530530477054, "grad_norm": 1.5177234411239624, "learning_rate": 9.894464599505292e-07, "loss": 0.0418, "step": 155850 }, { "epoch": 1.6652598963619851, "grad_norm": 0.07466941326856613, "learning_rate": 9.89443026018739e-07, "loss": 0.0322, "step": 155860 }, { "epoch": 1.6653667396762648, "grad_norm": 3.9189014434814453, "learning_rate": 9.894395915343306e-07, "loss": 0.0532, "step": 155870 }, { "epoch": 1.6654735829905443, "grad_norm": 0.09455174952745438, "learning_rate": 9.894361564973077e-07, "loss": 0.0229, "step": 155880 }, { "epoch": 1.665580426304824, "grad_norm": 1.144216775894165, "learning_rate": 9.894327209076747e-07, "loss": 0.0141, "step": 155890 }, { "epoch": 1.6656872696191036, "grad_norm": 0.10790415108203888, "learning_rate": 9.89429284765435e-07, "loss": 0.0169, "step": 155900 }, { "epoch": 1.665794112933383, "grad_norm": 0.12679250538349152, "learning_rate": 9.89425848070593e-07, "loss": 0.0438, "step": 155910 }, { "epoch": 1.6659009562476628, "grad_norm": 1.047904372215271, "learning_rate": 9.894224108231521e-07, "loss": 0.0222, "step": 155920 }, { "epoch": 1.6660077995619424, "grad_norm": 1.7914836406707764, "learning_rate": 9.894189730231165e-07, "loss": 0.0302, "step": 155930 }, { "epoch": 1.666114642876222, "grad_norm": 0.1545884609222412, "learning_rate": 9.894155346704897e-07, "loss": 0.03, "step": 155940 }, { "epoch": 1.6662214861905016, "grad_norm": 7.159551620483398, "learning_rate": 9.89412095765276e-07, "loss": 0.0277, "step": 155950 }, { "epoch": 1.6663283295047813, "grad_norm": 0.04842550307512283, "learning_rate": 9.89408656307479e-07, "loss": 0.037, "step": 155960 }, { "epoch": 1.6664351728190607, "grad_norm": 1.0945370197296143, "learning_rate": 9.894052162971028e-07, "loss": 0.0313, "step": 155970 }, { "epoch": 1.6665420161333404, "grad_norm": 0.24793581664562225, "learning_rate": 9.89401775734151e-07, "loss": 0.0755, "step": 155980 }, { "epoch": 1.66664885944762, "grad_norm": 9.875897407531738, "learning_rate": 9.893983346186279e-07, "loss": 0.0322, "step": 155990 }, { "epoch": 1.6667557027618995, "grad_norm": 0.006786811631172895, "learning_rate": 9.89394892950537e-07, "loss": 0.016, "step": 156000 }, { "epoch": 1.6668625460761792, "grad_norm": 0.008757542818784714, "learning_rate": 9.893914507298825e-07, "loss": 0.0381, "step": 156010 }, { "epoch": 1.666969389390459, "grad_norm": 7.7413201332092285, "learning_rate": 9.893880079566678e-07, "loss": 0.0269, "step": 156020 }, { "epoch": 1.6670762327047384, "grad_norm": 2.0631368160247803, "learning_rate": 9.893845646308974e-07, "loss": 0.0447, "step": 156030 }, { "epoch": 1.6671830760190183, "grad_norm": 5.4029011726379395, "learning_rate": 9.893811207525747e-07, "loss": 0.0631, "step": 156040 }, { "epoch": 1.6672899193332977, "grad_norm": 3.140789270401001, "learning_rate": 9.893776763217039e-07, "loss": 0.0333, "step": 156050 }, { "epoch": 1.6673967626475772, "grad_norm": 6.563449382781982, "learning_rate": 9.893742313382887e-07, "loss": 0.0217, "step": 156060 }, { "epoch": 1.667503605961857, "grad_norm": 2.5432991981506348, "learning_rate": 9.89370785802333e-07, "loss": 0.0066, "step": 156070 }, { "epoch": 1.6676104492761366, "grad_norm": 6.556977272033691, "learning_rate": 9.893673397138408e-07, "loss": 0.0978, "step": 156080 }, { "epoch": 1.667717292590416, "grad_norm": 7.0587077140808105, "learning_rate": 9.89363893072816e-07, "loss": 0.0621, "step": 156090 }, { "epoch": 1.667824135904696, "grad_norm": 3.2282180786132812, "learning_rate": 9.893604458792623e-07, "loss": 0.0501, "step": 156100 }, { "epoch": 1.6679309792189754, "grad_norm": 4.238773822784424, "learning_rate": 9.89356998133184e-07, "loss": 0.0403, "step": 156110 }, { "epoch": 1.6680378225332548, "grad_norm": 1.314131736755371, "learning_rate": 9.893535498345845e-07, "loss": 0.034, "step": 156120 }, { "epoch": 1.6681446658475347, "grad_norm": 4.350199222564697, "learning_rate": 9.89350100983468e-07, "loss": 0.0482, "step": 156130 }, { "epoch": 1.6682515091618142, "grad_norm": 0.029547544196248055, "learning_rate": 9.89346651579838e-07, "loss": 0.1038, "step": 156140 }, { "epoch": 1.6683583524760937, "grad_norm": 0.02781549096107483, "learning_rate": 9.89343201623699e-07, "loss": 0.0148, "step": 156150 }, { "epoch": 1.6684651957903736, "grad_norm": 0.013033046387135983, "learning_rate": 9.893397511150545e-07, "loss": 0.0033, "step": 156160 }, { "epoch": 1.668572039104653, "grad_norm": 0.3385491669178009, "learning_rate": 9.893363000539085e-07, "loss": 0.0207, "step": 156170 }, { "epoch": 1.6686788824189325, "grad_norm": 1.8170901536941528, "learning_rate": 9.89332848440265e-07, "loss": 0.0276, "step": 156180 }, { "epoch": 1.6687857257332124, "grad_norm": 1.400622844696045, "learning_rate": 9.893293962741277e-07, "loss": 0.0103, "step": 156190 }, { "epoch": 1.6688925690474918, "grad_norm": 0.17856137454509735, "learning_rate": 9.893259435555005e-07, "loss": 0.0171, "step": 156200 }, { "epoch": 1.6689994123617715, "grad_norm": 0.08670641481876373, "learning_rate": 9.893224902843876e-07, "loss": 0.0411, "step": 156210 }, { "epoch": 1.6691062556760512, "grad_norm": 11.914491653442383, "learning_rate": 9.893190364607925e-07, "loss": 0.062, "step": 156220 }, { "epoch": 1.6692130989903307, "grad_norm": 1.0482420921325684, "learning_rate": 9.893155820847192e-07, "loss": 0.0102, "step": 156230 }, { "epoch": 1.6693199423046103, "grad_norm": 10.658772468566895, "learning_rate": 9.893121271561717e-07, "loss": 0.0451, "step": 156240 }, { "epoch": 1.66942678561889, "grad_norm": 4.146000862121582, "learning_rate": 9.89308671675154e-07, "loss": 0.0383, "step": 156250 }, { "epoch": 1.6695336289331695, "grad_norm": 4.71012020111084, "learning_rate": 9.8930521564167e-07, "loss": 0.048, "step": 156260 }, { "epoch": 1.6696404722474492, "grad_norm": 1.2118364572525024, "learning_rate": 9.893017590557233e-07, "loss": 0.0091, "step": 156270 }, { "epoch": 1.6697473155617288, "grad_norm": 4.079445838928223, "learning_rate": 9.89298301917318e-07, "loss": 0.0304, "step": 156280 }, { "epoch": 1.6698541588760083, "grad_norm": 3.029817819595337, "learning_rate": 9.89294844226458e-07, "loss": 0.0271, "step": 156290 }, { "epoch": 1.669961002190288, "grad_norm": 0.022219857200980186, "learning_rate": 9.89291385983147e-07, "loss": 0.0134, "step": 156300 }, { "epoch": 1.6700678455045677, "grad_norm": 2.3935437202453613, "learning_rate": 9.892879271873895e-07, "loss": 0.0369, "step": 156310 }, { "epoch": 1.6701746888188471, "grad_norm": 5.060643196105957, "learning_rate": 9.892844678391887e-07, "loss": 0.0128, "step": 156320 }, { "epoch": 1.6702815321331268, "grad_norm": 0.8803988099098206, "learning_rate": 9.89281007938549e-07, "loss": 0.0121, "step": 156330 }, { "epoch": 1.6703883754474065, "grad_norm": 1.4749587774276733, "learning_rate": 9.89277547485474e-07, "loss": 0.0385, "step": 156340 }, { "epoch": 1.670495218761686, "grad_norm": 4.613224983215332, "learning_rate": 9.892740864799676e-07, "loss": 0.0887, "step": 156350 }, { "epoch": 1.6706020620759656, "grad_norm": 0.20939965546131134, "learning_rate": 9.892706249220339e-07, "loss": 0.0181, "step": 156360 }, { "epoch": 1.6707089053902453, "grad_norm": 6.478453159332275, "learning_rate": 9.892671628116768e-07, "loss": 0.0406, "step": 156370 }, { "epoch": 1.6708157487045248, "grad_norm": 3.512648820877075, "learning_rate": 9.892637001489002e-07, "loss": 0.062, "step": 156380 }, { "epoch": 1.6709225920188044, "grad_norm": 1.3040491342544556, "learning_rate": 9.892602369337079e-07, "loss": 0.0149, "step": 156390 }, { "epoch": 1.6710294353330841, "grad_norm": 9.02804946899414, "learning_rate": 9.892567731661038e-07, "loss": 0.0621, "step": 156400 }, { "epoch": 1.6711362786473636, "grad_norm": 2.0634353160858154, "learning_rate": 9.89253308846092e-07, "loss": 0.0192, "step": 156410 }, { "epoch": 1.6712431219616433, "grad_norm": 1.1335121393203735, "learning_rate": 9.892498439736763e-07, "loss": 0.0107, "step": 156420 }, { "epoch": 1.671349965275923, "grad_norm": 3.376969575881958, "learning_rate": 9.892463785488605e-07, "loss": 0.0174, "step": 156430 }, { "epoch": 1.6714568085902024, "grad_norm": 0.25045153498649597, "learning_rate": 9.892429125716486e-07, "loss": 0.1162, "step": 156440 }, { "epoch": 1.671563651904482, "grad_norm": 0.9747467637062073, "learning_rate": 9.892394460420445e-07, "loss": 0.0167, "step": 156450 }, { "epoch": 1.6716704952187618, "grad_norm": 0.2622159421443939, "learning_rate": 9.892359789600522e-07, "loss": 0.0092, "step": 156460 }, { "epoch": 1.6717773385330412, "grad_norm": 12.975790023803711, "learning_rate": 9.892325113256756e-07, "loss": 0.0497, "step": 156470 }, { "epoch": 1.671884181847321, "grad_norm": 0.1909831315279007, "learning_rate": 9.892290431389185e-07, "loss": 0.0041, "step": 156480 }, { "epoch": 1.6719910251616006, "grad_norm": 2.3941640853881836, "learning_rate": 9.89225574399785e-07, "loss": 0.0293, "step": 156490 }, { "epoch": 1.67209786847588, "grad_norm": 5.076345443725586, "learning_rate": 9.892221051082788e-07, "loss": 0.0268, "step": 156500 }, { "epoch": 1.6722047117901597, "grad_norm": 5.77396821975708, "learning_rate": 9.89218635264404e-07, "loss": 0.0199, "step": 156510 }, { "epoch": 1.6723115551044394, "grad_norm": 0.15298107266426086, "learning_rate": 9.892151648681645e-07, "loss": 0.0201, "step": 156520 }, { "epoch": 1.6724183984187189, "grad_norm": 2.172800302505493, "learning_rate": 9.89211693919564e-07, "loss": 0.0241, "step": 156530 }, { "epoch": 1.6725252417329985, "grad_norm": 0.031629882752895355, "learning_rate": 9.892082224186064e-07, "loss": 0.0339, "step": 156540 }, { "epoch": 1.6726320850472782, "grad_norm": 0.13802587985992432, "learning_rate": 9.89204750365296e-07, "loss": 0.0569, "step": 156550 }, { "epoch": 1.6727389283615577, "grad_norm": 10.866985321044922, "learning_rate": 9.892012777596366e-07, "loss": 0.0326, "step": 156560 }, { "epoch": 1.6728457716758374, "grad_norm": 4.0884928703308105, "learning_rate": 9.891978046016321e-07, "loss": 0.0039, "step": 156570 }, { "epoch": 1.672952614990117, "grad_norm": 0.08189482986927032, "learning_rate": 9.891943308912861e-07, "loss": 0.0108, "step": 156580 }, { "epoch": 1.6730594583043965, "grad_norm": 0.9880558252334595, "learning_rate": 9.89190856628603e-07, "loss": 0.0323, "step": 156590 }, { "epoch": 1.6731663016186762, "grad_norm": 0.05884641036391258, "learning_rate": 9.891873818135862e-07, "loss": 0.0028, "step": 156600 }, { "epoch": 1.6732731449329559, "grad_norm": 0.02595643885433674, "learning_rate": 9.8918390644624e-07, "loss": 0.0932, "step": 156610 }, { "epoch": 1.6733799882472353, "grad_norm": 9.407451629638672, "learning_rate": 9.891804305265686e-07, "loss": 0.0206, "step": 156620 }, { "epoch": 1.673486831561515, "grad_norm": 1.8354896306991577, "learning_rate": 9.891769540545754e-07, "loss": 0.038, "step": 156630 }, { "epoch": 1.6735936748757947, "grad_norm": 2.0203394889831543, "learning_rate": 9.891734770302645e-07, "loss": 0.0287, "step": 156640 }, { "epoch": 1.6737005181900741, "grad_norm": 4.189885139465332, "learning_rate": 9.891699994536396e-07, "loss": 0.0327, "step": 156650 }, { "epoch": 1.6738073615043538, "grad_norm": 0.45633167028427124, "learning_rate": 9.891665213247052e-07, "loss": 0.0257, "step": 156660 }, { "epoch": 1.6739142048186335, "grad_norm": 5.105703353881836, "learning_rate": 9.891630426434644e-07, "loss": 0.0394, "step": 156670 }, { "epoch": 1.674021048132913, "grad_norm": 0.09949861466884613, "learning_rate": 9.891595634099222e-07, "loss": 0.047, "step": 156680 }, { "epoch": 1.6741278914471927, "grad_norm": 0.012192029505968094, "learning_rate": 9.891560836240816e-07, "loss": 0.0718, "step": 156690 }, { "epoch": 1.6742347347614723, "grad_norm": 1.0115272998809814, "learning_rate": 9.891526032859468e-07, "loss": 0.0036, "step": 156700 }, { "epoch": 1.6743415780757518, "grad_norm": 0.10094799846410751, "learning_rate": 9.89149122395522e-07, "loss": 0.017, "step": 156710 }, { "epoch": 1.6744484213900315, "grad_norm": 0.049800291657447815, "learning_rate": 9.891456409528108e-07, "loss": 0.0148, "step": 156720 }, { "epoch": 1.6745552647043112, "grad_norm": 4.389091491699219, "learning_rate": 9.891421589578173e-07, "loss": 0.0227, "step": 156730 }, { "epoch": 1.6746621080185906, "grad_norm": 0.24865224957466125, "learning_rate": 9.891386764105453e-07, "loss": 0.0111, "step": 156740 }, { "epoch": 1.6747689513328703, "grad_norm": 0.07884673774242401, "learning_rate": 9.891351933109988e-07, "loss": 0.0394, "step": 156750 }, { "epoch": 1.67487579464715, "grad_norm": 0.08456434309482574, "learning_rate": 9.89131709659182e-07, "loss": 0.0038, "step": 156760 }, { "epoch": 1.6749826379614294, "grad_norm": 3.226609706878662, "learning_rate": 9.891282254550983e-07, "loss": 0.0279, "step": 156770 }, { "epoch": 1.6750894812757093, "grad_norm": 0.5009652376174927, "learning_rate": 9.89124740698752e-07, "loss": 0.0652, "step": 156780 }, { "epoch": 1.6751963245899888, "grad_norm": 1.9603773355484009, "learning_rate": 9.89121255390147e-07, "loss": 0.0185, "step": 156790 }, { "epoch": 1.6753031679042683, "grad_norm": 0.0032422116491943598, "learning_rate": 9.891177695292871e-07, "loss": 0.0183, "step": 156800 }, { "epoch": 1.6754100112185482, "grad_norm": 1.5700944662094116, "learning_rate": 9.891142831161765e-07, "loss": 0.0259, "step": 156810 }, { "epoch": 1.6755168545328276, "grad_norm": 0.021963130682706833, "learning_rate": 9.891107961508188e-07, "loss": 0.0467, "step": 156820 }, { "epoch": 1.675623697847107, "grad_norm": 11.189987182617188, "learning_rate": 9.891073086332181e-07, "loss": 0.0663, "step": 156830 }, { "epoch": 1.675730541161387, "grad_norm": 14.073938369750977, "learning_rate": 9.891038205633784e-07, "loss": 0.0391, "step": 156840 }, { "epoch": 1.6758373844756664, "grad_norm": 0.12113485485315323, "learning_rate": 9.891003319413035e-07, "loss": 0.0365, "step": 156850 }, { "epoch": 1.675944227789946, "grad_norm": 1.6439634561538696, "learning_rate": 9.890968427669976e-07, "loss": 0.047, "step": 156860 }, { "epoch": 1.6760510711042258, "grad_norm": 0.04360155761241913, "learning_rate": 9.890933530404644e-07, "loss": 0.013, "step": 156870 }, { "epoch": 1.6761579144185053, "grad_norm": 0.7591933608055115, "learning_rate": 9.890898627617078e-07, "loss": 0.0015, "step": 156880 }, { "epoch": 1.6762647577327847, "grad_norm": 5.51403284072876, "learning_rate": 9.890863719307317e-07, "loss": 0.0189, "step": 156890 }, { "epoch": 1.6763716010470646, "grad_norm": 0.026168225333094597, "learning_rate": 9.890828805475402e-07, "loss": 0.0085, "step": 156900 }, { "epoch": 1.676478444361344, "grad_norm": 0.08249326050281525, "learning_rate": 9.890793886121373e-07, "loss": 0.0145, "step": 156910 }, { "epoch": 1.6765852876756235, "grad_norm": 0.3209805190563202, "learning_rate": 9.89075896124527e-07, "loss": 0.0222, "step": 156920 }, { "epoch": 1.6766921309899034, "grad_norm": 3.605491876602173, "learning_rate": 9.89072403084713e-07, "loss": 0.0975, "step": 156930 }, { "epoch": 1.676798974304183, "grad_norm": 2.787027597427368, "learning_rate": 9.890689094926993e-07, "loss": 0.0314, "step": 156940 }, { "epoch": 1.6769058176184626, "grad_norm": 1.9562314748764038, "learning_rate": 9.890654153484899e-07, "loss": 0.0314, "step": 156950 }, { "epoch": 1.6770126609327423, "grad_norm": 0.0588822141289711, "learning_rate": 9.890619206520887e-07, "loss": 0.0217, "step": 156960 }, { "epoch": 1.6771195042470217, "grad_norm": 6.207584381103516, "learning_rate": 9.890584254034999e-07, "loss": 0.0312, "step": 156970 }, { "epoch": 1.6772263475613014, "grad_norm": 2.6646623611450195, "learning_rate": 9.89054929602727e-07, "loss": 0.0181, "step": 156980 }, { "epoch": 1.677333190875581, "grad_norm": 7.144116401672363, "learning_rate": 9.890514332497742e-07, "loss": 0.0195, "step": 156990 }, { "epoch": 1.6774400341898605, "grad_norm": 0.17305825650691986, "learning_rate": 9.890479363446454e-07, "loss": 0.036, "step": 157000 }, { "epoch": 1.6775468775041402, "grad_norm": 6.8134636878967285, "learning_rate": 9.890444388873448e-07, "loss": 0.0637, "step": 157010 }, { "epoch": 1.67765372081842, "grad_norm": 2.271883964538574, "learning_rate": 9.890409408778758e-07, "loss": 0.0199, "step": 157020 }, { "epoch": 1.6777605641326994, "grad_norm": 0.5033513307571411, "learning_rate": 9.890374423162428e-07, "loss": 0.0851, "step": 157030 }, { "epoch": 1.677867407446979, "grad_norm": 0.38868188858032227, "learning_rate": 9.890339432024497e-07, "loss": 0.0368, "step": 157040 }, { "epoch": 1.6779742507612587, "grad_norm": 0.025809943675994873, "learning_rate": 9.890304435365003e-07, "loss": 0.0426, "step": 157050 }, { "epoch": 1.6780810940755382, "grad_norm": 0.005866995546966791, "learning_rate": 9.890269433183986e-07, "loss": 0.0015, "step": 157060 }, { "epoch": 1.6781879373898179, "grad_norm": 5.0748772621154785, "learning_rate": 9.890234425481486e-07, "loss": 0.0595, "step": 157070 }, { "epoch": 1.6782947807040975, "grad_norm": 3.8400611877441406, "learning_rate": 9.890199412257542e-07, "loss": 0.0184, "step": 157080 }, { "epoch": 1.678401624018377, "grad_norm": 0.03581249713897705, "learning_rate": 9.890164393512195e-07, "loss": 0.0326, "step": 157090 }, { "epoch": 1.6785084673326567, "grad_norm": 0.27002501487731934, "learning_rate": 9.890129369245482e-07, "loss": 0.0537, "step": 157100 }, { "epoch": 1.6786153106469364, "grad_norm": 4.1150898933410645, "learning_rate": 9.890094339457443e-07, "loss": 0.0912, "step": 157110 }, { "epoch": 1.6787221539612158, "grad_norm": 1.9983525276184082, "learning_rate": 9.89005930414812e-07, "loss": 0.0242, "step": 157120 }, { "epoch": 1.6788289972754955, "grad_norm": 0.045618392527103424, "learning_rate": 9.89002426331755e-07, "loss": 0.0081, "step": 157130 }, { "epoch": 1.6789358405897752, "grad_norm": 2.680447578430176, "learning_rate": 9.889989216965774e-07, "loss": 0.0331, "step": 157140 }, { "epoch": 1.6790426839040546, "grad_norm": 0.041148263961076736, "learning_rate": 9.88995416509283e-07, "loss": 0.0634, "step": 157150 }, { "epoch": 1.6791495272183343, "grad_norm": 0.1840963214635849, "learning_rate": 9.889919107698761e-07, "loss": 0.0641, "step": 157160 }, { "epoch": 1.679256370532614, "grad_norm": 4.872908115386963, "learning_rate": 9.889884044783602e-07, "loss": 0.0852, "step": 157170 }, { "epoch": 1.6793632138468935, "grad_norm": 0.5930031538009644, "learning_rate": 9.889848976347397e-07, "loss": 0.0418, "step": 157180 }, { "epoch": 1.6794700571611731, "grad_norm": 0.07708435505628586, "learning_rate": 9.889813902390183e-07, "loss": 0.0265, "step": 157190 }, { "epoch": 1.6795769004754528, "grad_norm": 0.009164616465568542, "learning_rate": 9.889778822912e-07, "loss": 0.0258, "step": 157200 }, { "epoch": 1.6796837437897323, "grad_norm": 0.11317292600870132, "learning_rate": 9.889743737912887e-07, "loss": 0.0335, "step": 157210 }, { "epoch": 1.679790587104012, "grad_norm": 1.7433794736862183, "learning_rate": 9.889708647392884e-07, "loss": 0.0349, "step": 157220 }, { "epoch": 1.6798974304182916, "grad_norm": 6.639228820800781, "learning_rate": 9.889673551352032e-07, "loss": 0.0254, "step": 157230 }, { "epoch": 1.680004273732571, "grad_norm": 0.2378052920103073, "learning_rate": 9.889638449790368e-07, "loss": 0.0076, "step": 157240 }, { "epoch": 1.6801111170468508, "grad_norm": 3.743457078933716, "learning_rate": 9.889603342707933e-07, "loss": 0.0056, "step": 157250 }, { "epoch": 1.6802179603611305, "grad_norm": 6.872307777404785, "learning_rate": 9.88956823010477e-07, "loss": 0.0075, "step": 157260 }, { "epoch": 1.68032480367541, "grad_norm": 2.759639263153076, "learning_rate": 9.889533111980913e-07, "loss": 0.0799, "step": 157270 }, { "epoch": 1.6804316469896896, "grad_norm": 5.4824442863464355, "learning_rate": 9.889497988336403e-07, "loss": 0.0086, "step": 157280 }, { "epoch": 1.6805384903039693, "grad_norm": 0.8316189646720886, "learning_rate": 9.889462859171281e-07, "loss": 0.0649, "step": 157290 }, { "epoch": 1.6806453336182487, "grad_norm": 10.516403198242188, "learning_rate": 9.88942772448559e-07, "loss": 0.0674, "step": 157300 }, { "epoch": 1.6807521769325284, "grad_norm": 0.21152622997760773, "learning_rate": 9.889392584279362e-07, "loss": 0.0029, "step": 157310 }, { "epoch": 1.680859020246808, "grad_norm": 2.4178271293640137, "learning_rate": 9.889357438552643e-07, "loss": 0.0324, "step": 157320 }, { "epoch": 1.6809658635610876, "grad_norm": 1.6095906496047974, "learning_rate": 9.889322287305468e-07, "loss": 0.0058, "step": 157330 }, { "epoch": 1.6810727068753673, "grad_norm": 11.762940406799316, "learning_rate": 9.889287130537881e-07, "loss": 0.0427, "step": 157340 }, { "epoch": 1.681179550189647, "grad_norm": 0.017619460821151733, "learning_rate": 9.889251968249922e-07, "loss": 0.0495, "step": 157350 }, { "epoch": 1.6812863935039264, "grad_norm": 5.065234184265137, "learning_rate": 9.889216800441624e-07, "loss": 0.0471, "step": 157360 }, { "epoch": 1.681393236818206, "grad_norm": 0.18413576483726501, "learning_rate": 9.889181627113035e-07, "loss": 0.0237, "step": 157370 }, { "epoch": 1.6815000801324858, "grad_norm": 0.038172975182533264, "learning_rate": 9.889146448264189e-07, "loss": 0.0153, "step": 157380 }, { "epoch": 1.6816069234467652, "grad_norm": 2.0737767219543457, "learning_rate": 9.889111263895129e-07, "loss": 0.0378, "step": 157390 }, { "epoch": 1.681713766761045, "grad_norm": 3.4677042961120605, "learning_rate": 9.889076074005891e-07, "loss": 0.0466, "step": 157400 }, { "epoch": 1.6818206100753246, "grad_norm": 0.07539208978414536, "learning_rate": 9.88904087859652e-07, "loss": 0.0325, "step": 157410 }, { "epoch": 1.681927453389604, "grad_norm": 2.4647610187530518, "learning_rate": 9.88900567766705e-07, "loss": 0.02, "step": 157420 }, { "epoch": 1.6820342967038837, "grad_norm": 0.008515464141964912, "learning_rate": 9.888970471217527e-07, "loss": 0.0259, "step": 157430 }, { "epoch": 1.6821411400181634, "grad_norm": 0.8765031099319458, "learning_rate": 9.888935259247983e-07, "loss": 0.0149, "step": 157440 }, { "epoch": 1.6822479833324429, "grad_norm": 2.812417507171631, "learning_rate": 9.888900041758465e-07, "loss": 0.0147, "step": 157450 }, { "epoch": 1.6823548266467225, "grad_norm": 0.030422193929553032, "learning_rate": 9.88886481874901e-07, "loss": 0.0088, "step": 157460 }, { "epoch": 1.6824616699610022, "grad_norm": 0.04204675555229187, "learning_rate": 9.888829590219656e-07, "loss": 0.0221, "step": 157470 }, { "epoch": 1.6825685132752817, "grad_norm": 0.01616169884800911, "learning_rate": 9.888794356170447e-07, "loss": 0.044, "step": 157480 }, { "epoch": 1.6826753565895614, "grad_norm": 0.15087802708148956, "learning_rate": 9.888759116601416e-07, "loss": 0.0588, "step": 157490 }, { "epoch": 1.682782199903841, "grad_norm": 7.047318935394287, "learning_rate": 9.888723871512609e-07, "loss": 0.0135, "step": 157500 }, { "epoch": 1.6828890432181205, "grad_norm": 1.0111027956008911, "learning_rate": 9.888688620904067e-07, "loss": 0.0025, "step": 157510 }, { "epoch": 1.6829958865324004, "grad_norm": 9.107860565185547, "learning_rate": 9.888653364775824e-07, "loss": 0.0443, "step": 157520 }, { "epoch": 1.6831027298466799, "grad_norm": 2.314816474914551, "learning_rate": 9.88861810312792e-07, "loss": 0.0178, "step": 157530 }, { "epoch": 1.6832095731609593, "grad_norm": 0.25165244936943054, "learning_rate": 9.8885828359604e-07, "loss": 0.0742, "step": 157540 }, { "epoch": 1.6833164164752392, "grad_norm": 0.32273274660110474, "learning_rate": 9.888547563273301e-07, "loss": 0.0112, "step": 157550 }, { "epoch": 1.6834232597895187, "grad_norm": 3.473299503326416, "learning_rate": 9.888512285066662e-07, "loss": 0.0245, "step": 157560 }, { "epoch": 1.6835301031037981, "grad_norm": 0.5128890872001648, "learning_rate": 9.888477001340525e-07, "loss": 0.0574, "step": 157570 }, { "epoch": 1.683636946418078, "grad_norm": 0.5169743895530701, "learning_rate": 9.888441712094926e-07, "loss": 0.0241, "step": 157580 }, { "epoch": 1.6837437897323575, "grad_norm": 0.8499975204467773, "learning_rate": 9.88840641732991e-07, "loss": 0.0127, "step": 157590 }, { "epoch": 1.683850633046637, "grad_norm": 1.562507152557373, "learning_rate": 9.888371117045512e-07, "loss": 0.0603, "step": 157600 }, { "epoch": 1.6839574763609169, "grad_norm": 8.562692642211914, "learning_rate": 9.888335811241775e-07, "loss": 0.093, "step": 157610 }, { "epoch": 1.6840643196751963, "grad_norm": 0.06938356906175613, "learning_rate": 9.88830049991874e-07, "loss": 0.1234, "step": 157620 }, { "epoch": 1.6841711629894758, "grad_norm": 1.0023741722106934, "learning_rate": 9.888265183076443e-07, "loss": 0.0252, "step": 157630 }, { "epoch": 1.6842780063037557, "grad_norm": 1.6729968786239624, "learning_rate": 9.888229860714925e-07, "loss": 0.0278, "step": 157640 }, { "epoch": 1.6843848496180351, "grad_norm": 0.060947857797145844, "learning_rate": 9.888194532834226e-07, "loss": 0.0205, "step": 157650 }, { "epoch": 1.6844916929323146, "grad_norm": 2.629399538040161, "learning_rate": 9.888159199434389e-07, "loss": 0.015, "step": 157660 }, { "epoch": 1.6845985362465945, "grad_norm": 5.342855930328369, "learning_rate": 9.88812386051545e-07, "loss": 0.0061, "step": 157670 }, { "epoch": 1.684705379560874, "grad_norm": 1.335791826248169, "learning_rate": 9.88808851607745e-07, "loss": 0.0162, "step": 157680 }, { "epoch": 1.6848122228751536, "grad_norm": 1.639001727104187, "learning_rate": 9.88805316612043e-07, "loss": 0.0261, "step": 157690 }, { "epoch": 1.6849190661894333, "grad_norm": 0.008607243187725544, "learning_rate": 9.888017810644427e-07, "loss": 0.0712, "step": 157700 }, { "epoch": 1.6850259095037128, "grad_norm": 0.3035428822040558, "learning_rate": 9.887982449649486e-07, "loss": 0.0544, "step": 157710 }, { "epoch": 1.6851327528179925, "grad_norm": 13.017144203186035, "learning_rate": 9.887947083135642e-07, "loss": 0.0583, "step": 157720 }, { "epoch": 1.6852395961322721, "grad_norm": 7.874812602996826, "learning_rate": 9.887911711102937e-07, "loss": 0.0529, "step": 157730 }, { "epoch": 1.6853464394465516, "grad_norm": 3.5422465801239014, "learning_rate": 9.887876333551411e-07, "loss": 0.0357, "step": 157740 }, { "epoch": 1.6854532827608313, "grad_norm": 1.5606433153152466, "learning_rate": 9.887840950481102e-07, "loss": 0.0095, "step": 157750 }, { "epoch": 1.685560126075111, "grad_norm": 5.087985992431641, "learning_rate": 9.887805561892053e-07, "loss": 0.0338, "step": 157760 }, { "epoch": 1.6856669693893904, "grad_norm": 2.3821418285369873, "learning_rate": 9.887770167784305e-07, "loss": 0.0519, "step": 157770 }, { "epoch": 1.68577381270367, "grad_norm": 0.10460791736841202, "learning_rate": 9.887734768157892e-07, "loss": 0.0237, "step": 157780 }, { "epoch": 1.6858806560179498, "grad_norm": 0.4478779435157776, "learning_rate": 9.887699363012858e-07, "loss": 0.0097, "step": 157790 }, { "epoch": 1.6859874993322292, "grad_norm": 5.989852428436279, "learning_rate": 9.887663952349245e-07, "loss": 0.0124, "step": 157800 }, { "epoch": 1.686094342646509, "grad_norm": 1.2932459115982056, "learning_rate": 9.887628536167088e-07, "loss": 0.0146, "step": 157810 }, { "epoch": 1.6862011859607886, "grad_norm": 13.125216484069824, "learning_rate": 9.88759311446643e-07, "loss": 0.0493, "step": 157820 }, { "epoch": 1.686308029275068, "grad_norm": 9.519474029541016, "learning_rate": 9.887557687247312e-07, "loss": 0.043, "step": 157830 }, { "epoch": 1.6864148725893477, "grad_norm": 4.666151523590088, "learning_rate": 9.88752225450977e-07, "loss": 0.0133, "step": 157840 }, { "epoch": 1.6865217159036274, "grad_norm": 0.10226774215698242, "learning_rate": 9.88748681625385e-07, "loss": 0.0216, "step": 157850 }, { "epoch": 1.6866285592179069, "grad_norm": 4.006784915924072, "learning_rate": 9.887451372479584e-07, "loss": 0.026, "step": 157860 }, { "epoch": 1.6867354025321866, "grad_norm": 1.4387692213058472, "learning_rate": 9.887415923187021e-07, "loss": 0.0501, "step": 157870 }, { "epoch": 1.6868422458464662, "grad_norm": 1.2202677726745605, "learning_rate": 9.887380468376193e-07, "loss": 0.0471, "step": 157880 }, { "epoch": 1.6869490891607457, "grad_norm": 10.715877532958984, "learning_rate": 9.887345008047147e-07, "loss": 0.0197, "step": 157890 }, { "epoch": 1.6870559324750254, "grad_norm": 1.079394817352295, "learning_rate": 9.887309542199918e-07, "loss": 0.0498, "step": 157900 }, { "epoch": 1.687162775789305, "grad_norm": 4.546741008758545, "learning_rate": 9.887274070834545e-07, "loss": 0.0795, "step": 157910 }, { "epoch": 1.6872696191035845, "grad_norm": 7.9004106521606445, "learning_rate": 9.887238593951074e-07, "loss": 0.0157, "step": 157920 }, { "epoch": 1.6873764624178642, "grad_norm": 1.1505407094955444, "learning_rate": 9.887203111549543e-07, "loss": 0.0115, "step": 157930 }, { "epoch": 1.687483305732144, "grad_norm": 0.05902010574936867, "learning_rate": 9.88716762362999e-07, "loss": 0.1064, "step": 157940 }, { "epoch": 1.6875901490464233, "grad_norm": 11.109185218811035, "learning_rate": 9.887132130192454e-07, "loss": 0.0518, "step": 157950 }, { "epoch": 1.687696992360703, "grad_norm": 0.03264300897717476, "learning_rate": 9.887096631236975e-07, "loss": 0.0126, "step": 157960 }, { "epoch": 1.6878038356749827, "grad_norm": 0.012192199006676674, "learning_rate": 9.8870611267636e-07, "loss": 0.0289, "step": 157970 }, { "epoch": 1.6879106789892622, "grad_norm": 1.3630545139312744, "learning_rate": 9.88702561677236e-07, "loss": 0.0097, "step": 157980 }, { "epoch": 1.6880175223035419, "grad_norm": 1.2804858684539795, "learning_rate": 9.886990101263303e-07, "loss": 0.0392, "step": 157990 }, { "epoch": 1.6881243656178215, "grad_norm": 0.13725635409355164, "learning_rate": 9.886954580236462e-07, "loss": 0.0188, "step": 158000 }, { "epoch": 1.688231208932101, "grad_norm": 3.1372249126434326, "learning_rate": 9.886919053691883e-07, "loss": 0.0343, "step": 158010 }, { "epoch": 1.6883380522463807, "grad_norm": 0.5038674473762512, "learning_rate": 9.8868835216296e-07, "loss": 0.0189, "step": 158020 }, { "epoch": 1.6884448955606604, "grad_norm": 5.155529022216797, "learning_rate": 9.886847984049659e-07, "loss": 0.0219, "step": 158030 }, { "epoch": 1.6885517388749398, "grad_norm": 3.7404558658599854, "learning_rate": 9.8868124409521e-07, "loss": 0.0721, "step": 158040 }, { "epoch": 1.6886585821892195, "grad_norm": 1.9615933895111084, "learning_rate": 9.886776892336958e-07, "loss": 0.0392, "step": 158050 }, { "epoch": 1.6887654255034992, "grad_norm": 2.9688751697540283, "learning_rate": 9.886741338204277e-07, "loss": 0.0878, "step": 158060 }, { "epoch": 1.6888722688177786, "grad_norm": 0.11513817310333252, "learning_rate": 9.886705778554097e-07, "loss": 0.0273, "step": 158070 }, { "epoch": 1.6889791121320583, "grad_norm": 9.969891548156738, "learning_rate": 9.886670213386455e-07, "loss": 0.0919, "step": 158080 }, { "epoch": 1.689085955446338, "grad_norm": 0.01700166054069996, "learning_rate": 9.886634642701395e-07, "loss": 0.0408, "step": 158090 }, { "epoch": 1.6891927987606175, "grad_norm": 0.07835077494382858, "learning_rate": 9.886599066498955e-07, "loss": 0.0033, "step": 158100 }, { "epoch": 1.6892996420748971, "grad_norm": 7.673857688903809, "learning_rate": 9.886563484779178e-07, "loss": 0.069, "step": 158110 }, { "epoch": 1.6894064853891768, "grad_norm": 10.05218505859375, "learning_rate": 9.886527897542101e-07, "loss": 0.0463, "step": 158120 }, { "epoch": 1.6895133287034563, "grad_norm": 1.2951089143753052, "learning_rate": 9.886492304787763e-07, "loss": 0.0111, "step": 158130 }, { "epoch": 1.689620172017736, "grad_norm": 0.00953906774520874, "learning_rate": 9.886456706516208e-07, "loss": 0.0073, "step": 158140 }, { "epoch": 1.6897270153320156, "grad_norm": 4.76497220993042, "learning_rate": 9.886421102727476e-07, "loss": 0.0764, "step": 158150 }, { "epoch": 1.689833858646295, "grad_norm": 7.211604118347168, "learning_rate": 9.886385493421604e-07, "loss": 0.0274, "step": 158160 }, { "epoch": 1.6899407019605748, "grad_norm": 0.2610050141811371, "learning_rate": 9.886349878598635e-07, "loss": 0.0409, "step": 158170 }, { "epoch": 1.6900475452748545, "grad_norm": 0.5546790957450867, "learning_rate": 9.886314258258608e-07, "loss": 0.0092, "step": 158180 }, { "epoch": 1.690154388589134, "grad_norm": 0.015418820083141327, "learning_rate": 9.886278632401562e-07, "loss": 0.0788, "step": 158190 }, { "epoch": 1.6902612319034136, "grad_norm": 5.726964473724365, "learning_rate": 9.88624300102754e-07, "loss": 0.023, "step": 158200 }, { "epoch": 1.6903680752176933, "grad_norm": 0.01429238822311163, "learning_rate": 9.88620736413658e-07, "loss": 0.0016, "step": 158210 }, { "epoch": 1.6904749185319727, "grad_norm": 0.005076894536614418, "learning_rate": 9.886171721728724e-07, "loss": 0.0352, "step": 158220 }, { "epoch": 1.6905817618462524, "grad_norm": 3.9903485774993896, "learning_rate": 9.88613607380401e-07, "loss": 0.0246, "step": 158230 }, { "epoch": 1.690688605160532, "grad_norm": 2.2441463470458984, "learning_rate": 9.886100420362483e-07, "loss": 0.0319, "step": 158240 }, { "epoch": 1.6907954484748116, "grad_norm": 6.078979015350342, "learning_rate": 9.886064761404177e-07, "loss": 0.038, "step": 158250 }, { "epoch": 1.6909022917890915, "grad_norm": 0.10546905547380447, "learning_rate": 9.886029096929134e-07, "loss": 0.0323, "step": 158260 }, { "epoch": 1.691009135103371, "grad_norm": 1.1037794351577759, "learning_rate": 9.885993426937398e-07, "loss": 0.029, "step": 158270 }, { "epoch": 1.6911159784176504, "grad_norm": 0.651239275932312, "learning_rate": 9.885957751429006e-07, "loss": 0.0579, "step": 158280 }, { "epoch": 1.6912228217319303, "grad_norm": 1.5266088247299194, "learning_rate": 9.885922070403998e-07, "loss": 0.0353, "step": 158290 }, { "epoch": 1.6913296650462097, "grad_norm": 10.345457077026367, "learning_rate": 9.885886383862415e-07, "loss": 0.0385, "step": 158300 }, { "epoch": 1.6914365083604892, "grad_norm": 0.14811502397060394, "learning_rate": 9.8858506918043e-07, "loss": 0.0414, "step": 158310 }, { "epoch": 1.691543351674769, "grad_norm": 0.4388628304004669, "learning_rate": 9.88581499422969e-07, "loss": 0.0133, "step": 158320 }, { "epoch": 1.6916501949890486, "grad_norm": 1.1794556379318237, "learning_rate": 9.885779291138624e-07, "loss": 0.0428, "step": 158330 }, { "epoch": 1.691757038303328, "grad_norm": 0.032006166875362396, "learning_rate": 9.885743582531145e-07, "loss": 0.034, "step": 158340 }, { "epoch": 1.691863881617608, "grad_norm": 0.16935211420059204, "learning_rate": 9.885707868407294e-07, "loss": 0.0124, "step": 158350 }, { "epoch": 1.6919707249318874, "grad_norm": 3.740231513977051, "learning_rate": 9.88567214876711e-07, "loss": 0.0526, "step": 158360 }, { "epoch": 1.6920775682461668, "grad_norm": 0.49466672539711, "learning_rate": 9.885636423610632e-07, "loss": 0.0397, "step": 158370 }, { "epoch": 1.6921844115604467, "grad_norm": 0.011538073420524597, "learning_rate": 9.885600692937902e-07, "loss": 0.0226, "step": 158380 }, { "epoch": 1.6922912548747262, "grad_norm": 2.022925853729248, "learning_rate": 9.885564956748962e-07, "loss": 0.0162, "step": 158390 }, { "epoch": 1.6923980981890057, "grad_norm": 0.684794008731842, "learning_rate": 9.885529215043848e-07, "loss": 0.023, "step": 158400 }, { "epoch": 1.6925049415032856, "grad_norm": 0.4566532075405121, "learning_rate": 9.885493467822606e-07, "loss": 0.0301, "step": 158410 }, { "epoch": 1.692611784817565, "grad_norm": 7.893770694732666, "learning_rate": 9.88545771508527e-07, "loss": 0.0226, "step": 158420 }, { "epoch": 1.6927186281318447, "grad_norm": 0.11977052688598633, "learning_rate": 9.885421956831886e-07, "loss": 0.0115, "step": 158430 }, { "epoch": 1.6928254714461244, "grad_norm": 1.328783631324768, "learning_rate": 9.88538619306249e-07, "loss": 0.0086, "step": 158440 }, { "epoch": 1.6929323147604038, "grad_norm": 0.08784815669059753, "learning_rate": 9.885350423777125e-07, "loss": 0.0237, "step": 158450 }, { "epoch": 1.6930391580746835, "grad_norm": 0.08697628229856491, "learning_rate": 9.88531464897583e-07, "loss": 0.0634, "step": 158460 }, { "epoch": 1.6931460013889632, "grad_norm": 9.248641014099121, "learning_rate": 9.885278868658647e-07, "loss": 0.039, "step": 158470 }, { "epoch": 1.6932528447032427, "grad_norm": 3.3397836685180664, "learning_rate": 9.885243082825613e-07, "loss": 0.0854, "step": 158480 }, { "epoch": 1.6933596880175223, "grad_norm": 0.6715185642242432, "learning_rate": 9.885207291476773e-07, "loss": 0.0073, "step": 158490 }, { "epoch": 1.693466531331802, "grad_norm": 11.492610931396484, "learning_rate": 9.885171494612166e-07, "loss": 0.0396, "step": 158500 }, { "epoch": 1.6935733746460815, "grad_norm": 0.6780117154121399, "learning_rate": 9.885135692231832e-07, "loss": 0.011, "step": 158510 }, { "epoch": 1.6936802179603612, "grad_norm": 0.023874705657362938, "learning_rate": 9.885099884335809e-07, "loss": 0.0077, "step": 158520 }, { "epoch": 1.6937870612746408, "grad_norm": 0.09318932890892029, "learning_rate": 9.88506407092414e-07, "loss": 0.0269, "step": 158530 }, { "epoch": 1.6938939045889203, "grad_norm": 1.2074041366577148, "learning_rate": 9.885028251996865e-07, "loss": 0.0509, "step": 158540 }, { "epoch": 1.6940007479032, "grad_norm": 0.015064331702888012, "learning_rate": 9.884992427554026e-07, "loss": 0.0281, "step": 158550 }, { "epoch": 1.6941075912174797, "grad_norm": 11.2850923538208, "learning_rate": 9.88495659759566e-07, "loss": 0.0363, "step": 158560 }, { "epoch": 1.6942144345317591, "grad_norm": 1.9370781183242798, "learning_rate": 9.88492076212181e-07, "loss": 0.0218, "step": 158570 }, { "epoch": 1.6943212778460388, "grad_norm": 3.5939362049102783, "learning_rate": 9.884884921132516e-07, "loss": 0.0167, "step": 158580 }, { "epoch": 1.6944281211603185, "grad_norm": 4.2691144943237305, "learning_rate": 9.884849074627819e-07, "loss": 0.0423, "step": 158590 }, { "epoch": 1.694534964474598, "grad_norm": 4.017642974853516, "learning_rate": 9.884813222607758e-07, "loss": 0.0159, "step": 158600 }, { "epoch": 1.6946418077888776, "grad_norm": 18.467233657836914, "learning_rate": 9.884777365072373e-07, "loss": 0.0445, "step": 158610 }, { "epoch": 1.6947486511031573, "grad_norm": 0.1853121966123581, "learning_rate": 9.884741502021706e-07, "loss": 0.0304, "step": 158620 }, { "epoch": 1.6948554944174368, "grad_norm": 0.40152862668037415, "learning_rate": 9.884705633455797e-07, "loss": 0.0483, "step": 158630 }, { "epoch": 1.6949623377317165, "grad_norm": 8.19189739227295, "learning_rate": 9.884669759374689e-07, "loss": 0.072, "step": 158640 }, { "epoch": 1.6950691810459961, "grad_norm": 8.45375919342041, "learning_rate": 9.884633879778417e-07, "loss": 0.0614, "step": 158650 }, { "epoch": 1.6951760243602756, "grad_norm": 5.090472221374512, "learning_rate": 9.884597994667028e-07, "loss": 0.032, "step": 158660 }, { "epoch": 1.6952828676745553, "grad_norm": 0.39744165539741516, "learning_rate": 9.884562104040556e-07, "loss": 0.0117, "step": 158670 }, { "epoch": 1.695389710988835, "grad_norm": 4.983198165893555, "learning_rate": 9.884526207899046e-07, "loss": 0.0235, "step": 158680 }, { "epoch": 1.6954965543031144, "grad_norm": 1.0270363092422485, "learning_rate": 9.884490306242539e-07, "loss": 0.0653, "step": 158690 }, { "epoch": 1.695603397617394, "grad_norm": 3.8137598037719727, "learning_rate": 9.884454399071072e-07, "loss": 0.0217, "step": 158700 }, { "epoch": 1.6957102409316738, "grad_norm": 8.290999412536621, "learning_rate": 9.884418486384688e-07, "loss": 0.0404, "step": 158710 }, { "epoch": 1.6958170842459532, "grad_norm": 0.08110588043928146, "learning_rate": 9.884382568183427e-07, "loss": 0.0344, "step": 158720 }, { "epoch": 1.695923927560233, "grad_norm": 7.211690902709961, "learning_rate": 9.884346644467327e-07, "loss": 0.0601, "step": 158730 }, { "epoch": 1.6960307708745126, "grad_norm": 0.07938960939645767, "learning_rate": 9.884310715236435e-07, "loss": 0.0143, "step": 158740 }, { "epoch": 1.696137614188792, "grad_norm": 8.233128547668457, "learning_rate": 9.884274780490784e-07, "loss": 0.0165, "step": 158750 }, { "epoch": 1.6962444575030717, "grad_norm": 4.676933765411377, "learning_rate": 9.88423884023042e-07, "loss": 0.0335, "step": 158760 }, { "epoch": 1.6963513008173514, "grad_norm": 12.121729850769043, "learning_rate": 9.884202894455382e-07, "loss": 0.1112, "step": 158770 }, { "epoch": 1.6964581441316309, "grad_norm": 0.4064781665802002, "learning_rate": 9.884166943165709e-07, "loss": 0.0385, "step": 158780 }, { "epoch": 1.6965649874459106, "grad_norm": 11.711609840393066, "learning_rate": 9.884130986361444e-07, "loss": 0.0855, "step": 158790 }, { "epoch": 1.6966718307601902, "grad_norm": 5.533663749694824, "learning_rate": 9.884095024042627e-07, "loss": 0.0336, "step": 158800 }, { "epoch": 1.6967786740744697, "grad_norm": 0.2549879848957062, "learning_rate": 9.884059056209296e-07, "loss": 0.0454, "step": 158810 }, { "epoch": 1.6968855173887494, "grad_norm": 0.007472391240298748, "learning_rate": 9.884023082861494e-07, "loss": 0.0516, "step": 158820 }, { "epoch": 1.696992360703029, "grad_norm": 3.601327657699585, "learning_rate": 9.883987103999263e-07, "loss": 0.0167, "step": 158830 }, { "epoch": 1.6970992040173085, "grad_norm": 0.3374442756175995, "learning_rate": 9.883951119622642e-07, "loss": 0.0798, "step": 158840 }, { "epoch": 1.6972060473315882, "grad_norm": 5.747332572937012, "learning_rate": 9.88391512973167e-07, "loss": 0.0291, "step": 158850 }, { "epoch": 1.6973128906458679, "grad_norm": 0.11290960758924484, "learning_rate": 9.88387913432639e-07, "loss": 0.0174, "step": 158860 }, { "epoch": 1.6974197339601473, "grad_norm": 0.8851197957992554, "learning_rate": 9.883843133406842e-07, "loss": 0.0262, "step": 158870 }, { "epoch": 1.697526577274427, "grad_norm": 2.600094795227051, "learning_rate": 9.883807126973068e-07, "loss": 0.019, "step": 158880 }, { "epoch": 1.6976334205887067, "grad_norm": 2.190556287765503, "learning_rate": 9.883771115025104e-07, "loss": 0.0219, "step": 158890 }, { "epoch": 1.6977402639029862, "grad_norm": 0.05429179221391678, "learning_rate": 9.883735097562995e-07, "loss": 0.0265, "step": 158900 }, { "epoch": 1.6978471072172658, "grad_norm": 4.558253288269043, "learning_rate": 9.88369907458678e-07, "loss": 0.0303, "step": 158910 }, { "epoch": 1.6979539505315455, "grad_norm": 4.340445041656494, "learning_rate": 9.883663046096502e-07, "loss": 0.0463, "step": 158920 }, { "epoch": 1.698060793845825, "grad_norm": 0.032102663069963455, "learning_rate": 9.8836270120922e-07, "loss": 0.0235, "step": 158930 }, { "epoch": 1.6981676371601047, "grad_norm": 0.05432434007525444, "learning_rate": 9.883590972573912e-07, "loss": 0.0241, "step": 158940 }, { "epoch": 1.6982744804743843, "grad_norm": 2.8861966133117676, "learning_rate": 9.883554927541681e-07, "loss": 0.0327, "step": 158950 }, { "epoch": 1.6983813237886638, "grad_norm": 1.183057427406311, "learning_rate": 9.883518876995552e-07, "loss": 0.042, "step": 158960 }, { "epoch": 1.6984881671029435, "grad_norm": 4.533578395843506, "learning_rate": 9.883482820935558e-07, "loss": 0.0069, "step": 158970 }, { "epoch": 1.6985950104172232, "grad_norm": 4.875031471252441, "learning_rate": 9.883446759361744e-07, "loss": 0.0359, "step": 158980 }, { "epoch": 1.6987018537315026, "grad_norm": 9.882646560668945, "learning_rate": 9.88341069227415e-07, "loss": 0.0208, "step": 158990 }, { "epoch": 1.6988086970457825, "grad_norm": 1.9452532529830933, "learning_rate": 9.883374619672817e-07, "loss": 0.0607, "step": 159000 }, { "epoch": 1.698915540360062, "grad_norm": 4.900625228881836, "learning_rate": 9.883338541557786e-07, "loss": 0.0786, "step": 159010 }, { "epoch": 1.6990223836743414, "grad_norm": 1.5994760990142822, "learning_rate": 9.883302457929097e-07, "loss": 0.0204, "step": 159020 }, { "epoch": 1.6991292269886213, "grad_norm": 0.19746749103069305, "learning_rate": 9.883266368786791e-07, "loss": 0.0579, "step": 159030 }, { "epoch": 1.6992360703029008, "grad_norm": 0.8091562986373901, "learning_rate": 9.88323027413091e-07, "loss": 0.0081, "step": 159040 }, { "epoch": 1.6993429136171803, "grad_norm": 0.06678719073534012, "learning_rate": 9.88319417396149e-07, "loss": 0.0414, "step": 159050 }, { "epoch": 1.6994497569314602, "grad_norm": 0.17558713257312775, "learning_rate": 9.883158068278577e-07, "loss": 0.0545, "step": 159060 }, { "epoch": 1.6995566002457396, "grad_norm": 0.25644850730895996, "learning_rate": 9.883121957082212e-07, "loss": 0.0089, "step": 159070 }, { "epoch": 1.699663443560019, "grad_norm": 2.7478203773498535, "learning_rate": 9.883085840372431e-07, "loss": 0.0576, "step": 159080 }, { "epoch": 1.699770286874299, "grad_norm": 1.638854742050171, "learning_rate": 9.88304971814928e-07, "loss": 0.0343, "step": 159090 }, { "epoch": 1.6998771301885784, "grad_norm": 2.5280051231384277, "learning_rate": 9.883013590412794e-07, "loss": 0.0197, "step": 159100 }, { "epoch": 1.699983973502858, "grad_norm": 2.4694786071777344, "learning_rate": 9.88297745716302e-07, "loss": 0.0394, "step": 159110 }, { "epoch": 1.7000908168171378, "grad_norm": 3.683840751647949, "learning_rate": 9.882941318399995e-07, "loss": 0.0285, "step": 159120 }, { "epoch": 1.7001976601314173, "grad_norm": 0.33939123153686523, "learning_rate": 9.882905174123758e-07, "loss": 0.0982, "step": 159130 }, { "epoch": 1.7003045034456967, "grad_norm": 0.34459561109542847, "learning_rate": 9.882869024334357e-07, "loss": 0.0172, "step": 159140 }, { "epoch": 1.7004113467599766, "grad_norm": 0.5021811127662659, "learning_rate": 9.882832869031826e-07, "loss": 0.0162, "step": 159150 }, { "epoch": 1.700518190074256, "grad_norm": 0.05983029678463936, "learning_rate": 9.88279670821621e-07, "loss": 0.0725, "step": 159160 }, { "epoch": 1.7006250333885358, "grad_norm": 1.6376169919967651, "learning_rate": 9.882760541887546e-07, "loss": 0.0339, "step": 159170 }, { "epoch": 1.7007318767028154, "grad_norm": 0.8268659710884094, "learning_rate": 9.882724370045877e-07, "loss": 0.0103, "step": 159180 }, { "epoch": 1.700838720017095, "grad_norm": 0.46182796359062195, "learning_rate": 9.882688192691244e-07, "loss": 0.0147, "step": 159190 }, { "epoch": 1.7009455633313746, "grad_norm": 10.923103332519531, "learning_rate": 9.882652009823688e-07, "loss": 0.0322, "step": 159200 }, { "epoch": 1.7010524066456543, "grad_norm": 0.11733994632959366, "learning_rate": 9.88261582144325e-07, "loss": 0.0212, "step": 159210 }, { "epoch": 1.7011592499599337, "grad_norm": 7.122005462646484, "learning_rate": 9.882579627549968e-07, "loss": 0.0635, "step": 159220 }, { "epoch": 1.7012660932742134, "grad_norm": 0.04464927688241005, "learning_rate": 9.882543428143886e-07, "loss": 0.0203, "step": 159230 }, { "epoch": 1.701372936588493, "grad_norm": 5.376049041748047, "learning_rate": 9.882507223225045e-07, "loss": 0.0126, "step": 159240 }, { "epoch": 1.7014797799027725, "grad_norm": 17.000240325927734, "learning_rate": 9.882471012793485e-07, "loss": 0.039, "step": 159250 }, { "epoch": 1.7015866232170522, "grad_norm": 0.06687907129526138, "learning_rate": 9.882434796849246e-07, "loss": 0.0155, "step": 159260 }, { "epoch": 1.701693466531332, "grad_norm": 0.9867922067642212, "learning_rate": 9.882398575392368e-07, "loss": 0.0578, "step": 159270 }, { "epoch": 1.7018003098456114, "grad_norm": 5.916070938110352, "learning_rate": 9.882362348422896e-07, "loss": 0.0444, "step": 159280 }, { "epoch": 1.701907153159891, "grad_norm": 3.7110564708709717, "learning_rate": 9.882326115940866e-07, "loss": 0.0213, "step": 159290 }, { "epoch": 1.7020139964741707, "grad_norm": 1.1445643901824951, "learning_rate": 9.882289877946325e-07, "loss": 0.0113, "step": 159300 }, { "epoch": 1.7021208397884502, "grad_norm": 4.670328617095947, "learning_rate": 9.882253634439307e-07, "loss": 0.0478, "step": 159310 }, { "epoch": 1.7022276831027299, "grad_norm": 2.7295315265655518, "learning_rate": 9.882217385419857e-07, "loss": 0.024, "step": 159320 }, { "epoch": 1.7023345264170096, "grad_norm": 2.4871931076049805, "learning_rate": 9.882181130888015e-07, "loss": 0.0334, "step": 159330 }, { "epoch": 1.702441369731289, "grad_norm": 5.792416095733643, "learning_rate": 9.882144870843823e-07, "loss": 0.0293, "step": 159340 }, { "epoch": 1.7025482130455687, "grad_norm": 1.8072227239608765, "learning_rate": 9.88210860528732e-07, "loss": 0.0152, "step": 159350 }, { "epoch": 1.7026550563598484, "grad_norm": 6.790393829345703, "learning_rate": 9.882072334218548e-07, "loss": 0.0392, "step": 159360 }, { "epoch": 1.7027618996741278, "grad_norm": 0.2729317843914032, "learning_rate": 9.882036057637548e-07, "loss": 0.059, "step": 159370 }, { "epoch": 1.7028687429884075, "grad_norm": 3.952786445617676, "learning_rate": 9.881999775544362e-07, "loss": 0.0054, "step": 159380 }, { "epoch": 1.7029755863026872, "grad_norm": 3.113360643386841, "learning_rate": 9.881963487939028e-07, "loss": 0.0171, "step": 159390 }, { "epoch": 1.7030824296169667, "grad_norm": 0.1345144659280777, "learning_rate": 9.88192719482159e-07, "loss": 0.0132, "step": 159400 }, { "epoch": 1.7031892729312463, "grad_norm": 0.31324875354766846, "learning_rate": 9.881890896192087e-07, "loss": 0.011, "step": 159410 }, { "epoch": 1.703296116245526, "grad_norm": 18.913848876953125, "learning_rate": 9.88185459205056e-07, "loss": 0.0765, "step": 159420 }, { "epoch": 1.7034029595598055, "grad_norm": 0.015334622003138065, "learning_rate": 9.88181828239705e-07, "loss": 0.0309, "step": 159430 }, { "epoch": 1.7035098028740852, "grad_norm": 6.537346363067627, "learning_rate": 9.881781967231601e-07, "loss": 0.0473, "step": 159440 }, { "epoch": 1.7036166461883648, "grad_norm": 0.023112909868359566, "learning_rate": 9.881745646554252e-07, "loss": 0.022, "step": 159450 }, { "epoch": 1.7037234895026443, "grad_norm": 0.15389278531074524, "learning_rate": 9.881709320365043e-07, "loss": 0.0132, "step": 159460 }, { "epoch": 1.703830332816924, "grad_norm": 0.02112298272550106, "learning_rate": 9.881672988664015e-07, "loss": 0.0484, "step": 159470 }, { "epoch": 1.7039371761312037, "grad_norm": 2.79927659034729, "learning_rate": 9.88163665145121e-07, "loss": 0.0375, "step": 159480 }, { "epoch": 1.7040440194454831, "grad_norm": 4.910545825958252, "learning_rate": 9.88160030872667e-07, "loss": 0.0631, "step": 159490 }, { "epoch": 1.7041508627597628, "grad_norm": 0.07003428786993027, "learning_rate": 9.881563960490433e-07, "loss": 0.0112, "step": 159500 }, { "epoch": 1.7042577060740425, "grad_norm": 0.5066686272621155, "learning_rate": 9.881527606742544e-07, "loss": 0.0259, "step": 159510 }, { "epoch": 1.704364549388322, "grad_norm": 0.1165359765291214, "learning_rate": 9.88149124748304e-07, "loss": 0.0298, "step": 159520 }, { "epoch": 1.7044713927026016, "grad_norm": 3.242255449295044, "learning_rate": 9.881454882711964e-07, "loss": 0.0209, "step": 159530 }, { "epoch": 1.7045782360168813, "grad_norm": 9.198100090026855, "learning_rate": 9.881418512429358e-07, "loss": 0.0284, "step": 159540 }, { "epoch": 1.7046850793311608, "grad_norm": 0.5718401670455933, "learning_rate": 9.881382136635262e-07, "loss": 0.0354, "step": 159550 }, { "epoch": 1.7047919226454404, "grad_norm": 5.041842460632324, "learning_rate": 9.881345755329718e-07, "loss": 0.0193, "step": 159560 }, { "epoch": 1.7048987659597201, "grad_norm": 1.5725939273834229, "learning_rate": 9.881309368512765e-07, "loss": 0.0061, "step": 159570 }, { "epoch": 1.7050056092739996, "grad_norm": 0.1668800413608551, "learning_rate": 9.881272976184445e-07, "loss": 0.0197, "step": 159580 }, { "epoch": 1.7051124525882793, "grad_norm": 0.9971755743026733, "learning_rate": 9.8812365783448e-07, "loss": 0.0429, "step": 159590 }, { "epoch": 1.705219295902559, "grad_norm": 2.4945785999298096, "learning_rate": 9.88120017499387e-07, "loss": 0.0133, "step": 159600 }, { "epoch": 1.7053261392168384, "grad_norm": 0.07411246746778488, "learning_rate": 9.881163766131697e-07, "loss": 0.0312, "step": 159610 }, { "epoch": 1.705432982531118, "grad_norm": 1.522307276725769, "learning_rate": 9.88112735175832e-07, "loss": 0.0194, "step": 159620 }, { "epoch": 1.7055398258453978, "grad_norm": 4.204326629638672, "learning_rate": 9.881090931873785e-07, "loss": 0.0264, "step": 159630 }, { "epoch": 1.7056466691596772, "grad_norm": 4.176097393035889, "learning_rate": 9.88105450647813e-07, "loss": 0.0243, "step": 159640 }, { "epoch": 1.705753512473957, "grad_norm": 0.10342448204755783, "learning_rate": 9.881018075571393e-07, "loss": 0.0313, "step": 159650 }, { "epoch": 1.7058603557882366, "grad_norm": 19.30778694152832, "learning_rate": 9.88098163915362e-07, "loss": 0.043, "step": 159660 }, { "epoch": 1.705967199102516, "grad_norm": 0.10038553923368454, "learning_rate": 9.880945197224848e-07, "loss": 0.0023, "step": 159670 }, { "epoch": 1.7060740424167957, "grad_norm": 6.932868003845215, "learning_rate": 9.880908749785124e-07, "loss": 0.0396, "step": 159680 }, { "epoch": 1.7061808857310754, "grad_norm": 2.735635280609131, "learning_rate": 9.880872296834483e-07, "loss": 0.0194, "step": 159690 }, { "epoch": 1.7062877290453549, "grad_norm": 0.09325720369815826, "learning_rate": 9.880835838372971e-07, "loss": 0.0195, "step": 159700 }, { "epoch": 1.7063945723596345, "grad_norm": 5.91907262802124, "learning_rate": 9.880799374400625e-07, "loss": 0.091, "step": 159710 }, { "epoch": 1.7065014156739142, "grad_norm": 0.9398869276046753, "learning_rate": 9.88076290491749e-07, "loss": 0.0242, "step": 159720 }, { "epoch": 1.7066082589881937, "grad_norm": 4.856415748596191, "learning_rate": 9.880726429923605e-07, "loss": 0.0751, "step": 159730 }, { "epoch": 1.7067151023024736, "grad_norm": 0.15100784599781036, "learning_rate": 9.88068994941901e-07, "loss": 0.0088, "step": 159740 }, { "epoch": 1.706821945616753, "grad_norm": 0.0911862775683403, "learning_rate": 9.880653463403749e-07, "loss": 0.0802, "step": 159750 }, { "epoch": 1.7069287889310325, "grad_norm": 0.9239965677261353, "learning_rate": 9.880616971877861e-07, "loss": 0.012, "step": 159760 }, { "epoch": 1.7070356322453124, "grad_norm": 3.049572467803955, "learning_rate": 9.88058047484139e-07, "loss": 0.094, "step": 159770 }, { "epoch": 1.7071424755595919, "grad_norm": 6.072941303253174, "learning_rate": 9.880543972294373e-07, "loss": 0.0334, "step": 159780 }, { "epoch": 1.7072493188738713, "grad_norm": 0.03392954170703888, "learning_rate": 9.880507464236854e-07, "loss": 0.0385, "step": 159790 }, { "epoch": 1.7073561621881512, "grad_norm": 0.16720908880233765, "learning_rate": 9.880470950668874e-07, "loss": 0.0294, "step": 159800 }, { "epoch": 1.7074630055024307, "grad_norm": 8.491348266601562, "learning_rate": 9.880434431590474e-07, "loss": 0.0364, "step": 159810 }, { "epoch": 1.7075698488167101, "grad_norm": 4.936674118041992, "learning_rate": 9.880397907001693e-07, "loss": 0.0154, "step": 159820 }, { "epoch": 1.70767669213099, "grad_norm": 5.267703056335449, "learning_rate": 9.880361376902578e-07, "loss": 0.0305, "step": 159830 }, { "epoch": 1.7077835354452695, "grad_norm": 0.03119628317654133, "learning_rate": 9.880324841293165e-07, "loss": 0.0244, "step": 159840 }, { "epoch": 1.707890378759549, "grad_norm": 5.954025745391846, "learning_rate": 9.880288300173499e-07, "loss": 0.0202, "step": 159850 }, { "epoch": 1.7079972220738289, "grad_norm": 0.11004159599542618, "learning_rate": 9.880251753543616e-07, "loss": 0.0511, "step": 159860 }, { "epoch": 1.7081040653881083, "grad_norm": 6.818033695220947, "learning_rate": 9.880215201403561e-07, "loss": 0.0349, "step": 159870 }, { "epoch": 1.7082109087023878, "grad_norm": 0.6859483122825623, "learning_rate": 9.880178643753376e-07, "loss": 0.0422, "step": 159880 }, { "epoch": 1.7083177520166677, "grad_norm": 1.768607258796692, "learning_rate": 9.8801420805931e-07, "loss": 0.0171, "step": 159890 }, { "epoch": 1.7084245953309471, "grad_norm": 3.6664681434631348, "learning_rate": 9.880105511922777e-07, "loss": 0.0177, "step": 159900 }, { "epoch": 1.7085314386452268, "grad_norm": 1.6336021423339844, "learning_rate": 9.880068937742445e-07, "loss": 0.0226, "step": 159910 }, { "epoch": 1.7086382819595065, "grad_norm": 0.2923824191093445, "learning_rate": 9.880032358052148e-07, "loss": 0.0424, "step": 159920 }, { "epoch": 1.708745125273786, "grad_norm": 14.247649192810059, "learning_rate": 9.879995772851926e-07, "loss": 0.0454, "step": 159930 }, { "epoch": 1.7088519685880657, "grad_norm": 0.183212548494339, "learning_rate": 9.879959182141817e-07, "loss": 0.0138, "step": 159940 }, { "epoch": 1.7089588119023453, "grad_norm": 6.986362457275391, "learning_rate": 9.87992258592187e-07, "loss": 0.0148, "step": 159950 }, { "epoch": 1.7090656552166248, "grad_norm": 0.02625071071088314, "learning_rate": 9.879885984192121e-07, "loss": 0.023, "step": 159960 }, { "epoch": 1.7091724985309045, "grad_norm": 0.009493493475019932, "learning_rate": 9.879849376952612e-07, "loss": 0.0252, "step": 159970 }, { "epoch": 1.7092793418451842, "grad_norm": 4.018359184265137, "learning_rate": 9.879812764203384e-07, "loss": 0.0307, "step": 159980 }, { "epoch": 1.7093861851594636, "grad_norm": 0.05127270147204399, "learning_rate": 9.879776145944482e-07, "loss": 0.0192, "step": 159990 }, { "epoch": 1.7094930284737433, "grad_norm": 6.4749979972839355, "learning_rate": 9.879739522175941e-07, "loss": 0.0399, "step": 160000 }, { "epoch": 1.709599871788023, "grad_norm": 0.777999222278595, "learning_rate": 9.879702892897806e-07, "loss": 0.0259, "step": 160010 }, { "epoch": 1.7097067151023024, "grad_norm": 11.208532333374023, "learning_rate": 9.879666258110119e-07, "loss": 0.0875, "step": 160020 }, { "epoch": 1.7098135584165821, "grad_norm": 15.488024711608887, "learning_rate": 9.87962961781292e-07, "loss": 0.0302, "step": 160030 }, { "epoch": 1.7099204017308618, "grad_norm": 0.22549210488796234, "learning_rate": 9.879592972006254e-07, "loss": 0.0567, "step": 160040 }, { "epoch": 1.7100272450451413, "grad_norm": 0.34958314895629883, "learning_rate": 9.879556320690157e-07, "loss": 0.0386, "step": 160050 }, { "epoch": 1.710134088359421, "grad_norm": 1.1353216171264648, "learning_rate": 9.879519663864672e-07, "loss": 0.0294, "step": 160060 }, { "epoch": 1.7102409316737006, "grad_norm": 1.8251596689224243, "learning_rate": 9.87948300152984e-07, "loss": 0.073, "step": 160070 }, { "epoch": 1.71034777498798, "grad_norm": 0.1051030233502388, "learning_rate": 9.879446333685706e-07, "loss": 0.014, "step": 160080 }, { "epoch": 1.7104546183022598, "grad_norm": 1.1693514585494995, "learning_rate": 9.879409660332309e-07, "loss": 0.0241, "step": 160090 }, { "epoch": 1.7105614616165394, "grad_norm": 2.0195515155792236, "learning_rate": 9.87937298146969e-07, "loss": 0.0469, "step": 160100 }, { "epoch": 1.710668304930819, "grad_norm": 0.026756366714835167, "learning_rate": 9.879336297097888e-07, "loss": 0.0657, "step": 160110 }, { "epoch": 1.7107751482450986, "grad_norm": 0.009403334930539131, "learning_rate": 9.879299607216949e-07, "loss": 0.0376, "step": 160120 }, { "epoch": 1.7108819915593783, "grad_norm": 3.8715784549713135, "learning_rate": 9.879262911826913e-07, "loss": 0.0224, "step": 160130 }, { "epoch": 1.7109888348736577, "grad_norm": 5.384942054748535, "learning_rate": 9.87922621092782e-07, "loss": 0.0114, "step": 160140 }, { "epoch": 1.7110956781879374, "grad_norm": 3.3941240310668945, "learning_rate": 9.87918950451971e-07, "loss": 0.0282, "step": 160150 }, { "epoch": 1.711202521502217, "grad_norm": 0.2353925108909607, "learning_rate": 9.87915279260263e-07, "loss": 0.0427, "step": 160160 }, { "epoch": 1.7113093648164965, "grad_norm": 0.927445113658905, "learning_rate": 9.879116075176618e-07, "loss": 0.0488, "step": 160170 }, { "epoch": 1.7114162081307762, "grad_norm": 0.026484031230211258, "learning_rate": 9.879079352241717e-07, "loss": 0.0233, "step": 160180 }, { "epoch": 1.711523051445056, "grad_norm": 4.142082214355469, "learning_rate": 9.879042623797963e-07, "loss": 0.0131, "step": 160190 }, { "epoch": 1.7116298947593354, "grad_norm": 1.6119000911712646, "learning_rate": 9.879005889845406e-07, "loss": 0.0363, "step": 160200 }, { "epoch": 1.711736738073615, "grad_norm": 8.378751754760742, "learning_rate": 9.87896915038408e-07, "loss": 0.0336, "step": 160210 }, { "epoch": 1.7118435813878947, "grad_norm": 1.68031644821167, "learning_rate": 9.87893240541403e-07, "loss": 0.0408, "step": 160220 }, { "epoch": 1.7119504247021742, "grad_norm": 0.03399379178881645, "learning_rate": 9.8788956549353e-07, "loss": 0.0555, "step": 160230 }, { "epoch": 1.7120572680164539, "grad_norm": 0.04797230660915375, "learning_rate": 9.878858898947925e-07, "loss": 0.0108, "step": 160240 }, { "epoch": 1.7121641113307335, "grad_norm": 0.9580886363983154, "learning_rate": 9.878822137451953e-07, "loss": 0.0289, "step": 160250 }, { "epoch": 1.712270954645013, "grad_norm": 0.05270027369260788, "learning_rate": 9.87878537044742e-07, "loss": 0.0073, "step": 160260 }, { "epoch": 1.7123777979592927, "grad_norm": 0.008303951472043991, "learning_rate": 9.878748597934372e-07, "loss": 0.0601, "step": 160270 }, { "epoch": 1.7124846412735724, "grad_norm": 0.541839599609375, "learning_rate": 9.878711819912848e-07, "loss": 0.0305, "step": 160280 }, { "epoch": 1.7125914845878518, "grad_norm": 0.15837787091732025, "learning_rate": 9.878675036382892e-07, "loss": 0.0057, "step": 160290 }, { "epoch": 1.7126983279021315, "grad_norm": 3.9581546783447266, "learning_rate": 9.87863824734454e-07, "loss": 0.0296, "step": 160300 }, { "epoch": 1.7128051712164112, "grad_norm": 1.8252735137939453, "learning_rate": 9.87860145279784e-07, "loss": 0.0318, "step": 160310 }, { "epoch": 1.7129120145306906, "grad_norm": 0.05515028163790703, "learning_rate": 9.878564652742829e-07, "loss": 0.02, "step": 160320 }, { "epoch": 1.7130188578449703, "grad_norm": 12.493876457214355, "learning_rate": 9.87852784717955e-07, "loss": 0.0305, "step": 160330 }, { "epoch": 1.71312570115925, "grad_norm": 5.147195339202881, "learning_rate": 9.878491036108046e-07, "loss": 0.0683, "step": 160340 }, { "epoch": 1.7132325444735295, "grad_norm": 5.7799811363220215, "learning_rate": 9.878454219528356e-07, "loss": 0.0158, "step": 160350 }, { "epoch": 1.7133393877878091, "grad_norm": 4.180070877075195, "learning_rate": 9.878417397440526e-07, "loss": 0.0508, "step": 160360 }, { "epoch": 1.7134462311020888, "grad_norm": 0.049245625734329224, "learning_rate": 9.878380569844593e-07, "loss": 0.0215, "step": 160370 }, { "epoch": 1.7135530744163683, "grad_norm": 0.022171566262841225, "learning_rate": 9.878343736740599e-07, "loss": 0.0153, "step": 160380 }, { "epoch": 1.713659917730648, "grad_norm": 1.9265164136886597, "learning_rate": 9.878306898128587e-07, "loss": 0.0224, "step": 160390 }, { "epoch": 1.7137667610449276, "grad_norm": 0.01812969148159027, "learning_rate": 9.8782700540086e-07, "loss": 0.0364, "step": 160400 }, { "epoch": 1.713873604359207, "grad_norm": 4.0547919273376465, "learning_rate": 9.878233204380676e-07, "loss": 0.024, "step": 160410 }, { "epoch": 1.7139804476734868, "grad_norm": 2.6437861919403076, "learning_rate": 9.87819634924486e-07, "loss": 0.0952, "step": 160420 }, { "epoch": 1.7140872909877665, "grad_norm": 1.9942697286605835, "learning_rate": 9.878159488601191e-07, "loss": 0.0295, "step": 160430 }, { "epoch": 1.714194134302046, "grad_norm": 3.4699718952178955, "learning_rate": 9.878122622449714e-07, "loss": 0.0904, "step": 160440 }, { "epoch": 1.7143009776163256, "grad_norm": 0.930296778678894, "learning_rate": 9.878085750790466e-07, "loss": 0.0194, "step": 160450 }, { "epoch": 1.7144078209306053, "grad_norm": 3.8211212158203125, "learning_rate": 9.878048873623492e-07, "loss": 0.118, "step": 160460 }, { "epoch": 1.7145146642448847, "grad_norm": 2.210078477859497, "learning_rate": 9.878011990948833e-07, "loss": 0.0242, "step": 160470 }, { "epoch": 1.7146215075591646, "grad_norm": 4.160983085632324, "learning_rate": 9.87797510276653e-07, "loss": 0.0163, "step": 160480 }, { "epoch": 1.714728350873444, "grad_norm": 2.841172218322754, "learning_rate": 9.877938209076624e-07, "loss": 0.039, "step": 160490 }, { "epoch": 1.7148351941877236, "grad_norm": 1.6869235038757324, "learning_rate": 9.877901309879158e-07, "loss": 0.0253, "step": 160500 }, { "epoch": 1.7149420375020035, "grad_norm": 1.8128893375396729, "learning_rate": 9.877864405174175e-07, "loss": 0.0129, "step": 160510 }, { "epoch": 1.715048880816283, "grad_norm": 0.026339098811149597, "learning_rate": 9.877827494961712e-07, "loss": 0.0073, "step": 160520 }, { "epoch": 1.7151557241305624, "grad_norm": 0.02573590911924839, "learning_rate": 9.877790579241816e-07, "loss": 0.0128, "step": 160530 }, { "epoch": 1.7152625674448423, "grad_norm": 4.074799537658691, "learning_rate": 9.877753658014524e-07, "loss": 0.0324, "step": 160540 }, { "epoch": 1.7153694107591217, "grad_norm": 0.5823791027069092, "learning_rate": 9.87771673127988e-07, "loss": 0.0227, "step": 160550 }, { "epoch": 1.7154762540734012, "grad_norm": 6.26967716217041, "learning_rate": 9.87767979903793e-07, "loss": 0.0221, "step": 160560 }, { "epoch": 1.715583097387681, "grad_norm": 0.16821494698524475, "learning_rate": 9.877642861288707e-07, "loss": 0.0191, "step": 160570 }, { "epoch": 1.7156899407019606, "grad_norm": 0.0218537338078022, "learning_rate": 9.877605918032259e-07, "loss": 0.0358, "step": 160580 }, { "epoch": 1.71579678401624, "grad_norm": 3.6395702362060547, "learning_rate": 9.877568969268626e-07, "loss": 0.0229, "step": 160590 }, { "epoch": 1.71590362733052, "grad_norm": 0.03619004040956497, "learning_rate": 9.877532014997849e-07, "loss": 0.0537, "step": 160600 }, { "epoch": 1.7160104706447994, "grad_norm": 4.24984073638916, "learning_rate": 9.87749505521997e-07, "loss": 0.0273, "step": 160610 }, { "epoch": 1.7161173139590788, "grad_norm": 3.020312547683716, "learning_rate": 9.87745808993503e-07, "loss": 0.0326, "step": 160620 }, { "epoch": 1.7162241572733588, "grad_norm": 2.6068387031555176, "learning_rate": 9.877421119143073e-07, "loss": 0.0284, "step": 160630 }, { "epoch": 1.7163310005876382, "grad_norm": 0.01191023550927639, "learning_rate": 9.877384142844139e-07, "loss": 0.0144, "step": 160640 }, { "epoch": 1.716437843901918, "grad_norm": 0.08037006855010986, "learning_rate": 9.87734716103827e-07, "loss": 0.0203, "step": 160650 }, { "epoch": 1.7165446872161976, "grad_norm": 0.049361955374479294, "learning_rate": 9.877310173725508e-07, "loss": 0.0111, "step": 160660 }, { "epoch": 1.716651530530477, "grad_norm": 4.439382553100586, "learning_rate": 9.877273180905896e-07, "loss": 0.0206, "step": 160670 }, { "epoch": 1.7167583738447567, "grad_norm": 0.9611207246780396, "learning_rate": 9.877236182579474e-07, "loss": 0.0706, "step": 160680 }, { "epoch": 1.7168652171590364, "grad_norm": 0.0915079414844513, "learning_rate": 9.877199178746282e-07, "loss": 0.0112, "step": 160690 }, { "epoch": 1.7169720604733159, "grad_norm": 0.06117312237620354, "learning_rate": 9.877162169406367e-07, "loss": 0.0522, "step": 160700 }, { "epoch": 1.7170789037875955, "grad_norm": 0.8171905875205994, "learning_rate": 9.877125154559767e-07, "loss": 0.0063, "step": 160710 }, { "epoch": 1.7171857471018752, "grad_norm": 1.597632646560669, "learning_rate": 9.877088134206524e-07, "loss": 0.0164, "step": 160720 }, { "epoch": 1.7172925904161547, "grad_norm": 1.614535927772522, "learning_rate": 9.87705110834668e-07, "loss": 0.0188, "step": 160730 }, { "epoch": 1.7173994337304344, "grad_norm": 0.829461932182312, "learning_rate": 9.877014076980278e-07, "loss": 0.0319, "step": 160740 }, { "epoch": 1.717506277044714, "grad_norm": 0.052808064967393875, "learning_rate": 9.87697704010736e-07, "loss": 0.0417, "step": 160750 }, { "epoch": 1.7176131203589935, "grad_norm": 0.23961125314235687, "learning_rate": 9.876939997727965e-07, "loss": 0.0393, "step": 160760 }, { "epoch": 1.7177199636732732, "grad_norm": 0.43089571595191956, "learning_rate": 9.876902949842136e-07, "loss": 0.0117, "step": 160770 }, { "epoch": 1.7178268069875529, "grad_norm": 3.56946063041687, "learning_rate": 9.876865896449917e-07, "loss": 0.0269, "step": 160780 }, { "epoch": 1.7179336503018323, "grad_norm": 3.29933500289917, "learning_rate": 9.876828837551349e-07, "loss": 0.022, "step": 160790 }, { "epoch": 1.718040493616112, "grad_norm": 0.2985718548297882, "learning_rate": 9.876791773146472e-07, "loss": 0.0251, "step": 160800 }, { "epoch": 1.7181473369303917, "grad_norm": 1.848252773284912, "learning_rate": 9.87675470323533e-07, "loss": 0.0527, "step": 160810 }, { "epoch": 1.7182541802446711, "grad_norm": 2.154738187789917, "learning_rate": 9.876717627817963e-07, "loss": 0.0356, "step": 160820 }, { "epoch": 1.7183610235589508, "grad_norm": 9.028980255126953, "learning_rate": 9.876680546894414e-07, "loss": 0.0593, "step": 160830 }, { "epoch": 1.7184678668732305, "grad_norm": 0.011199353262782097, "learning_rate": 9.876643460464724e-07, "loss": 0.009, "step": 160840 }, { "epoch": 1.71857471018751, "grad_norm": 6.4504289627075195, "learning_rate": 9.876606368528936e-07, "loss": 0.024, "step": 160850 }, { "epoch": 1.7186815535017896, "grad_norm": 0.7905629277229309, "learning_rate": 9.876569271087093e-07, "loss": 0.0074, "step": 160860 }, { "epoch": 1.7187883968160693, "grad_norm": 0.06716874241828918, "learning_rate": 9.87653216813923e-07, "loss": 0.0188, "step": 160870 }, { "epoch": 1.7188952401303488, "grad_norm": 0.5519562363624573, "learning_rate": 9.876495059685398e-07, "loss": 0.0333, "step": 160880 }, { "epoch": 1.7190020834446285, "grad_norm": 1.8718056678771973, "learning_rate": 9.876457945725635e-07, "loss": 0.0291, "step": 160890 }, { "epoch": 1.7191089267589081, "grad_norm": 0.9799290895462036, "learning_rate": 9.876420826259983e-07, "loss": 0.0807, "step": 160900 }, { "epoch": 1.7192157700731876, "grad_norm": 0.13800114393234253, "learning_rate": 9.876383701288485e-07, "loss": 0.0431, "step": 160910 }, { "epoch": 1.7193226133874673, "grad_norm": 0.1574685126543045, "learning_rate": 9.876346570811181e-07, "loss": 0.0455, "step": 160920 }, { "epoch": 1.719429456701747, "grad_norm": 3.459289073944092, "learning_rate": 9.87630943482811e-07, "loss": 0.0367, "step": 160930 }, { "epoch": 1.7195363000160264, "grad_norm": 1.8655747175216675, "learning_rate": 9.876272293339322e-07, "loss": 0.0267, "step": 160940 }, { "epoch": 1.719643143330306, "grad_norm": 0.007113222032785416, "learning_rate": 9.876235146344853e-07, "loss": 0.0452, "step": 160950 }, { "epoch": 1.7197499866445858, "grad_norm": 1.4607563018798828, "learning_rate": 9.876197993844748e-07, "loss": 0.0155, "step": 160960 }, { "epoch": 1.7198568299588652, "grad_norm": 0.11118518561124802, "learning_rate": 9.876160835839044e-07, "loss": 0.0138, "step": 160970 }, { "epoch": 1.719963673273145, "grad_norm": 0.02172998897731304, "learning_rate": 9.87612367232779e-07, "loss": 0.0184, "step": 160980 }, { "epoch": 1.7200705165874246, "grad_norm": 3.268139362335205, "learning_rate": 9.876086503311023e-07, "loss": 0.0125, "step": 160990 }, { "epoch": 1.720177359901704, "grad_norm": 0.02286200225353241, "learning_rate": 9.876049328788786e-07, "loss": 0.0097, "step": 161000 }, { "epoch": 1.7202842032159837, "grad_norm": 1.8811767101287842, "learning_rate": 9.87601214876112e-07, "loss": 0.0263, "step": 161010 }, { "epoch": 1.7203910465302634, "grad_norm": 0.16162315011024475, "learning_rate": 9.875974963228071e-07, "loss": 0.0293, "step": 161020 }, { "epoch": 1.7204978898445429, "grad_norm": 5.710970401763916, "learning_rate": 9.875937772189678e-07, "loss": 0.0271, "step": 161030 }, { "epoch": 1.7206047331588226, "grad_norm": 1.276000738143921, "learning_rate": 9.875900575645982e-07, "loss": 0.0234, "step": 161040 }, { "epoch": 1.7207115764731022, "grad_norm": 1.2187974452972412, "learning_rate": 9.875863373597027e-07, "loss": 0.0216, "step": 161050 }, { "epoch": 1.7208184197873817, "grad_norm": 6.895129680633545, "learning_rate": 9.875826166042853e-07, "loss": 0.0737, "step": 161060 }, { "epoch": 1.7209252631016614, "grad_norm": 0.454082727432251, "learning_rate": 9.875788952983503e-07, "loss": 0.0359, "step": 161070 }, { "epoch": 1.721032106415941, "grad_norm": 0.37599611282348633, "learning_rate": 9.87575173441902e-07, "loss": 0.0376, "step": 161080 }, { "epoch": 1.7211389497302205, "grad_norm": 0.5548701882362366, "learning_rate": 9.875714510349447e-07, "loss": 0.0381, "step": 161090 }, { "epoch": 1.7212457930445002, "grad_norm": 7.104512691497803, "learning_rate": 9.875677280774822e-07, "loss": 0.0498, "step": 161100 }, { "epoch": 1.7213526363587799, "grad_norm": 0.11662820726633072, "learning_rate": 9.87564004569519e-07, "loss": 0.0372, "step": 161110 }, { "epoch": 1.7214594796730593, "grad_norm": 1.3097419738769531, "learning_rate": 9.875602805110594e-07, "loss": 0.0731, "step": 161120 }, { "epoch": 1.721566322987339, "grad_norm": 1.5016272068023682, "learning_rate": 9.875565559021074e-07, "loss": 0.0386, "step": 161130 }, { "epoch": 1.7216731663016187, "grad_norm": 3.3707189559936523, "learning_rate": 9.875528307426671e-07, "loss": 0.0254, "step": 161140 }, { "epoch": 1.7217800096158982, "grad_norm": 0.011068352498114109, "learning_rate": 9.87549105032743e-07, "loss": 0.0216, "step": 161150 }, { "epoch": 1.7218868529301778, "grad_norm": 9.854829788208008, "learning_rate": 9.87545378772339e-07, "loss": 0.0648, "step": 161160 }, { "epoch": 1.7219936962444575, "grad_norm": 6.021038055419922, "learning_rate": 9.875416519614595e-07, "loss": 0.0094, "step": 161170 }, { "epoch": 1.722100539558737, "grad_norm": 3.892695665359497, "learning_rate": 9.875379246001089e-07, "loss": 0.0234, "step": 161180 }, { "epoch": 1.7222073828730167, "grad_norm": 4.110956192016602, "learning_rate": 9.87534196688291e-07, "loss": 0.0219, "step": 161190 }, { "epoch": 1.7223142261872963, "grad_norm": 0.36413100361824036, "learning_rate": 9.875304682260103e-07, "loss": 0.064, "step": 161200 }, { "epoch": 1.7224210695015758, "grad_norm": 2.5112500190734863, "learning_rate": 9.875267392132706e-07, "loss": 0.0437, "step": 161210 }, { "epoch": 1.7225279128158557, "grad_norm": 3.574878692626953, "learning_rate": 9.875230096500766e-07, "loss": 0.0485, "step": 161220 }, { "epoch": 1.7226347561301352, "grad_norm": 0.0037633897736668587, "learning_rate": 9.875192795364323e-07, "loss": 0.0355, "step": 161230 }, { "epoch": 1.7227415994444146, "grad_norm": 0.04040742293000221, "learning_rate": 9.87515548872342e-07, "loss": 0.022, "step": 161240 }, { "epoch": 1.7228484427586945, "grad_norm": 0.7654813528060913, "learning_rate": 9.8751181765781e-07, "loss": 0.0265, "step": 161250 }, { "epoch": 1.722955286072974, "grad_norm": 5.025647163391113, "learning_rate": 9.875080858928401e-07, "loss": 0.0143, "step": 161260 }, { "epoch": 1.7230621293872534, "grad_norm": 2.2071785926818848, "learning_rate": 9.87504353577437e-07, "loss": 0.0102, "step": 161270 }, { "epoch": 1.7231689727015334, "grad_norm": 0.11551466584205627, "learning_rate": 9.875006207116047e-07, "loss": 0.0294, "step": 161280 }, { "epoch": 1.7232758160158128, "grad_norm": 3.674494981765747, "learning_rate": 9.874968872953474e-07, "loss": 0.0411, "step": 161290 }, { "epoch": 1.7233826593300923, "grad_norm": 1.5199896097183228, "learning_rate": 9.87493153328669e-07, "loss": 0.078, "step": 161300 }, { "epoch": 1.7234895026443722, "grad_norm": 0.05991795286536217, "learning_rate": 9.874894188115743e-07, "loss": 0.0444, "step": 161310 }, { "epoch": 1.7235963459586516, "grad_norm": 8.270620346069336, "learning_rate": 9.874856837440673e-07, "loss": 0.0066, "step": 161320 }, { "epoch": 1.723703189272931, "grad_norm": 2.233902931213379, "learning_rate": 9.87481948126152e-07, "loss": 0.004, "step": 161330 }, { "epoch": 1.723810032587211, "grad_norm": 0.009236167185008526, "learning_rate": 9.874782119578328e-07, "loss": 0.0913, "step": 161340 }, { "epoch": 1.7239168759014905, "grad_norm": 0.013475102372467518, "learning_rate": 9.87474475239114e-07, "loss": 0.0633, "step": 161350 }, { "epoch": 1.72402371921577, "grad_norm": 0.005798171274363995, "learning_rate": 9.874707379699998e-07, "loss": 0.0231, "step": 161360 }, { "epoch": 1.7241305625300498, "grad_norm": 0.010865372605621815, "learning_rate": 9.874670001504943e-07, "loss": 0.0091, "step": 161370 }, { "epoch": 1.7242374058443293, "grad_norm": 0.23561635613441467, "learning_rate": 9.874632617806015e-07, "loss": 0.02, "step": 161380 }, { "epoch": 1.724344249158609, "grad_norm": 4.343600273132324, "learning_rate": 9.874595228603263e-07, "loss": 0.0175, "step": 161390 }, { "epoch": 1.7244510924728886, "grad_norm": 0.40957310795783997, "learning_rate": 9.87455783389672e-07, "loss": 0.015, "step": 161400 }, { "epoch": 1.724557935787168, "grad_norm": 0.13476021587848663, "learning_rate": 9.874520433686438e-07, "loss": 0.0188, "step": 161410 }, { "epoch": 1.7246647791014478, "grad_norm": 5.569730758666992, "learning_rate": 9.874483027972454e-07, "loss": 0.0235, "step": 161420 }, { "epoch": 1.7247716224157275, "grad_norm": 5.071132183074951, "learning_rate": 9.874445616754809e-07, "loss": 0.046, "step": 161430 }, { "epoch": 1.724878465730007, "grad_norm": 13.169684410095215, "learning_rate": 9.874408200033548e-07, "loss": 0.0248, "step": 161440 }, { "epoch": 1.7249853090442866, "grad_norm": 0.012501431629061699, "learning_rate": 9.874370777808712e-07, "loss": 0.0262, "step": 161450 }, { "epoch": 1.7250921523585663, "grad_norm": 2.7002475261688232, "learning_rate": 9.874333350080343e-07, "loss": 0.0183, "step": 161460 }, { "epoch": 1.7251989956728457, "grad_norm": 1.2891162633895874, "learning_rate": 9.874295916848485e-07, "loss": 0.0272, "step": 161470 }, { "epoch": 1.7253058389871254, "grad_norm": 1.9642932415008545, "learning_rate": 9.874258478113177e-07, "loss": 0.0398, "step": 161480 }, { "epoch": 1.725412682301405, "grad_norm": 3.1968235969543457, "learning_rate": 9.874221033874465e-07, "loss": 0.0342, "step": 161490 }, { "epoch": 1.7255195256156846, "grad_norm": 0.1808389127254486, "learning_rate": 9.87418358413239e-07, "loss": 0.094, "step": 161500 }, { "epoch": 1.7256263689299642, "grad_norm": 4.121671676635742, "learning_rate": 9.874146128886993e-07, "loss": 0.0812, "step": 161510 }, { "epoch": 1.725733212244244, "grad_norm": 0.26772743463516235, "learning_rate": 9.874108668138317e-07, "loss": 0.0254, "step": 161520 }, { "epoch": 1.7258400555585234, "grad_norm": 0.3775295913219452, "learning_rate": 9.874071201886404e-07, "loss": 0.03, "step": 161530 }, { "epoch": 1.725946898872803, "grad_norm": 6.995240211486816, "learning_rate": 9.874033730131298e-07, "loss": 0.0418, "step": 161540 }, { "epoch": 1.7260537421870827, "grad_norm": 2.3189990520477295, "learning_rate": 9.87399625287304e-07, "loss": 0.036, "step": 161550 }, { "epoch": 1.7261605855013622, "grad_norm": 1.5909024477005005, "learning_rate": 9.873958770111671e-07, "loss": 0.0773, "step": 161560 }, { "epoch": 1.7262674288156419, "grad_norm": 7.422309398651123, "learning_rate": 9.873921281847236e-07, "loss": 0.0661, "step": 161570 }, { "epoch": 1.7263742721299216, "grad_norm": 1.8598679304122925, "learning_rate": 9.873883788079776e-07, "loss": 0.0418, "step": 161580 }, { "epoch": 1.726481115444201, "grad_norm": 4.52820348739624, "learning_rate": 9.873846288809331e-07, "loss": 0.0492, "step": 161590 }, { "epoch": 1.7265879587584807, "grad_norm": 7.004699230194092, "learning_rate": 9.873808784035948e-07, "loss": 0.0342, "step": 161600 }, { "epoch": 1.7266948020727604, "grad_norm": 1.7961082458496094, "learning_rate": 9.873771273759666e-07, "loss": 0.0123, "step": 161610 }, { "epoch": 1.7268016453870398, "grad_norm": 1.7686419486999512, "learning_rate": 9.87373375798053e-07, "loss": 0.028, "step": 161620 }, { "epoch": 1.7269084887013195, "grad_norm": 5.117084980010986, "learning_rate": 9.873696236698579e-07, "loss": 0.0133, "step": 161630 }, { "epoch": 1.7270153320155992, "grad_norm": 2.6262547969818115, "learning_rate": 9.87365870991386e-07, "loss": 0.0247, "step": 161640 }, { "epoch": 1.7271221753298787, "grad_norm": 0.06496582925319672, "learning_rate": 9.87362117762641e-07, "loss": 0.0836, "step": 161650 }, { "epoch": 1.7272290186441583, "grad_norm": 0.11722195148468018, "learning_rate": 9.873583639836273e-07, "loss": 0.0213, "step": 161660 }, { "epoch": 1.727335861958438, "grad_norm": 0.0242453683167696, "learning_rate": 9.873546096543493e-07, "loss": 0.0359, "step": 161670 }, { "epoch": 1.7274427052727175, "grad_norm": 4.478419303894043, "learning_rate": 9.873508547748112e-07, "loss": 0.0478, "step": 161680 }, { "epoch": 1.7275495485869972, "grad_norm": 14.372201919555664, "learning_rate": 9.873470993450172e-07, "loss": 0.0542, "step": 161690 }, { "epoch": 1.7276563919012768, "grad_norm": 0.20097194612026215, "learning_rate": 9.873433433649715e-07, "loss": 0.0381, "step": 161700 }, { "epoch": 1.7277632352155563, "grad_norm": 5.3633294105529785, "learning_rate": 9.873395868346786e-07, "loss": 0.0404, "step": 161710 }, { "epoch": 1.727870078529836, "grad_norm": 3.324708938598633, "learning_rate": 9.873358297541423e-07, "loss": 0.0303, "step": 161720 }, { "epoch": 1.7279769218441157, "grad_norm": 2.4052798748016357, "learning_rate": 9.87332072123367e-07, "loss": 0.0213, "step": 161730 }, { "epoch": 1.7280837651583951, "grad_norm": 6.763497829437256, "learning_rate": 9.873283139423574e-07, "loss": 0.0657, "step": 161740 }, { "epoch": 1.7281906084726748, "grad_norm": 1.0880268812179565, "learning_rate": 9.87324555211117e-07, "loss": 0.009, "step": 161750 }, { "epoch": 1.7282974517869545, "grad_norm": 0.7835676670074463, "learning_rate": 9.873207959296504e-07, "loss": 0.0241, "step": 161760 }, { "epoch": 1.728404295101234, "grad_norm": 8.235461235046387, "learning_rate": 9.87317036097962e-07, "loss": 0.0221, "step": 161770 }, { "epoch": 1.7285111384155136, "grad_norm": 0.12344883382320404, "learning_rate": 9.873132757160559e-07, "loss": 0.0243, "step": 161780 }, { "epoch": 1.7286179817297933, "grad_norm": 0.4198826849460602, "learning_rate": 9.87309514783936e-07, "loss": 0.0132, "step": 161790 }, { "epoch": 1.7287248250440728, "grad_norm": 0.05301468446850777, "learning_rate": 9.873057533016073e-07, "loss": 0.0725, "step": 161800 }, { "epoch": 1.7288316683583524, "grad_norm": 4.096571922302246, "learning_rate": 9.873019912690733e-07, "loss": 0.0826, "step": 161810 }, { "epoch": 1.7289385116726321, "grad_norm": 0.0058612218126654625, "learning_rate": 9.87298228686339e-07, "loss": 0.0275, "step": 161820 }, { "epoch": 1.7290453549869116, "grad_norm": 0.013458751142024994, "learning_rate": 9.87294465553408e-07, "loss": 0.0373, "step": 161830 }, { "epoch": 1.7291521983011913, "grad_norm": 0.019051915034651756, "learning_rate": 9.872907018702845e-07, "loss": 0.0233, "step": 161840 }, { "epoch": 1.729259041615471, "grad_norm": 0.42862918972969055, "learning_rate": 9.872869376369732e-07, "loss": 0.0244, "step": 161850 }, { "epoch": 1.7293658849297504, "grad_norm": 0.02674071304500103, "learning_rate": 9.872831728534782e-07, "loss": 0.0184, "step": 161860 }, { "epoch": 1.72947272824403, "grad_norm": 0.4507032334804535, "learning_rate": 9.872794075198038e-07, "loss": 0.0082, "step": 161870 }, { "epoch": 1.7295795715583098, "grad_norm": 3.200664520263672, "learning_rate": 9.872756416359541e-07, "loss": 0.0294, "step": 161880 }, { "epoch": 1.7296864148725892, "grad_norm": 2.5166146755218506, "learning_rate": 9.872718752019335e-07, "loss": 0.0152, "step": 161890 }, { "epoch": 1.729793258186869, "grad_norm": 9.758301734924316, "learning_rate": 9.872681082177461e-07, "loss": 0.0691, "step": 161900 }, { "epoch": 1.7299001015011486, "grad_norm": 4.181876182556152, "learning_rate": 9.872643406833962e-07, "loss": 0.0314, "step": 161910 }, { "epoch": 1.730006944815428, "grad_norm": 0.026303540915250778, "learning_rate": 9.87260572598888e-07, "loss": 0.0103, "step": 161920 }, { "epoch": 1.7301137881297077, "grad_norm": 9.30749225616455, "learning_rate": 9.87256803964226e-07, "loss": 0.07, "step": 161930 }, { "epoch": 1.7302206314439874, "grad_norm": 2.892286539077759, "learning_rate": 9.872530347794143e-07, "loss": 0.0775, "step": 161940 }, { "epoch": 1.7303274747582669, "grad_norm": 7.0840959548950195, "learning_rate": 9.87249265044457e-07, "loss": 0.0243, "step": 161950 }, { "epoch": 1.7304343180725468, "grad_norm": 5.8531904220581055, "learning_rate": 9.872454947593586e-07, "loss": 0.0279, "step": 161960 }, { "epoch": 1.7305411613868262, "grad_norm": 1.2564059495925903, "learning_rate": 9.872417239241234e-07, "loss": 0.0348, "step": 161970 }, { "epoch": 1.7306480047011057, "grad_norm": 0.661167323589325, "learning_rate": 9.872379525387554e-07, "loss": 0.0186, "step": 161980 }, { "epoch": 1.7307548480153856, "grad_norm": 6.311051845550537, "learning_rate": 9.87234180603259e-07, "loss": 0.0685, "step": 161990 }, { "epoch": 1.730861691329665, "grad_norm": 0.17551270127296448, "learning_rate": 9.872304081176383e-07, "loss": 0.0293, "step": 162000 }, { "epoch": 1.7309685346439445, "grad_norm": 0.001828781794756651, "learning_rate": 9.87226635081898e-07, "loss": 0.0233, "step": 162010 }, { "epoch": 1.7310753779582244, "grad_norm": 0.01857706531882286, "learning_rate": 9.872228614960416e-07, "loss": 0.0445, "step": 162020 }, { "epoch": 1.7311822212725039, "grad_norm": 3.5058395862579346, "learning_rate": 9.872190873600743e-07, "loss": 0.0226, "step": 162030 }, { "epoch": 1.7312890645867833, "grad_norm": 2.453803300857544, "learning_rate": 9.872153126739995e-07, "loss": 0.025, "step": 162040 }, { "epoch": 1.7313959079010632, "grad_norm": 0.019359787926077843, "learning_rate": 9.87211537437822e-07, "loss": 0.023, "step": 162050 }, { "epoch": 1.7315027512153427, "grad_norm": 0.2886764705181122, "learning_rate": 9.87207761651546e-07, "loss": 0.0297, "step": 162060 }, { "epoch": 1.7316095945296222, "grad_norm": 11.52467155456543, "learning_rate": 9.872039853151755e-07, "loss": 0.0238, "step": 162070 }, { "epoch": 1.731716437843902, "grad_norm": 4.344689846038818, "learning_rate": 9.872002084287149e-07, "loss": 0.0609, "step": 162080 }, { "epoch": 1.7318232811581815, "grad_norm": 0.3982471227645874, "learning_rate": 9.871964309921687e-07, "loss": 0.0529, "step": 162090 }, { "epoch": 1.731930124472461, "grad_norm": 0.6285063624382019, "learning_rate": 9.871926530055408e-07, "loss": 0.0189, "step": 162100 }, { "epoch": 1.7320369677867409, "grad_norm": 3.6044344902038574, "learning_rate": 9.871888744688356e-07, "loss": 0.0609, "step": 162110 }, { "epoch": 1.7321438111010203, "grad_norm": 0.32190921902656555, "learning_rate": 9.871850953820575e-07, "loss": 0.0136, "step": 162120 }, { "epoch": 1.7322506544153, "grad_norm": 1.1180918216705322, "learning_rate": 9.871813157452106e-07, "loss": 0.0102, "step": 162130 }, { "epoch": 1.7323574977295797, "grad_norm": 14.340768814086914, "learning_rate": 9.871775355582993e-07, "loss": 0.0175, "step": 162140 }, { "epoch": 1.7324643410438592, "grad_norm": 0.2209600806236267, "learning_rate": 9.871737548213278e-07, "loss": 0.005, "step": 162150 }, { "epoch": 1.7325711843581388, "grad_norm": 5.477427005767822, "learning_rate": 9.871699735343002e-07, "loss": 0.0507, "step": 162160 }, { "epoch": 1.7326780276724185, "grad_norm": 1.0833008289337158, "learning_rate": 9.871661916972211e-07, "loss": 0.0389, "step": 162170 }, { "epoch": 1.732784870986698, "grad_norm": 0.2080898880958557, "learning_rate": 9.871624093100945e-07, "loss": 0.0179, "step": 162180 }, { "epoch": 1.7328917143009777, "grad_norm": 0.016463840380311012, "learning_rate": 9.871586263729248e-07, "loss": 0.0325, "step": 162190 }, { "epoch": 1.7329985576152573, "grad_norm": 2.8672068119049072, "learning_rate": 9.871548428857162e-07, "loss": 0.0492, "step": 162200 }, { "epoch": 1.7331054009295368, "grad_norm": 1.483949899673462, "learning_rate": 9.87151058848473e-07, "loss": 0.023, "step": 162210 }, { "epoch": 1.7332122442438165, "grad_norm": 3.699228286743164, "learning_rate": 9.871472742611996e-07, "loss": 0.0105, "step": 162220 }, { "epoch": 1.7333190875580962, "grad_norm": 0.009739338420331478, "learning_rate": 9.871434891239e-07, "loss": 0.1813, "step": 162230 }, { "epoch": 1.7334259308723756, "grad_norm": 1.2257622480392456, "learning_rate": 9.87139703436579e-07, "loss": 0.0238, "step": 162240 }, { "epoch": 1.7335327741866553, "grad_norm": 5.4308695793151855, "learning_rate": 9.871359171992401e-07, "loss": 0.0457, "step": 162250 }, { "epoch": 1.733639617500935, "grad_norm": 0.48069605231285095, "learning_rate": 9.871321304118881e-07, "loss": 0.0883, "step": 162260 }, { "epoch": 1.7337464608152144, "grad_norm": 0.006482597906142473, "learning_rate": 9.87128343074527e-07, "loss": 0.0175, "step": 162270 }, { "epoch": 1.7338533041294941, "grad_norm": 1.439520001411438, "learning_rate": 9.871245551871617e-07, "loss": 0.0543, "step": 162280 }, { "epoch": 1.7339601474437738, "grad_norm": 4.384927272796631, "learning_rate": 9.871207667497957e-07, "loss": 0.0171, "step": 162290 }, { "epoch": 1.7340669907580533, "grad_norm": 3.771739959716797, "learning_rate": 9.871169777624337e-07, "loss": 0.0442, "step": 162300 }, { "epoch": 1.734173834072333, "grad_norm": 3.817401647567749, "learning_rate": 9.871131882250798e-07, "loss": 0.0318, "step": 162310 }, { "epoch": 1.7342806773866126, "grad_norm": 5.7806267738342285, "learning_rate": 9.871093981377384e-07, "loss": 0.0275, "step": 162320 }, { "epoch": 1.734387520700892, "grad_norm": 0.008838135749101639, "learning_rate": 9.871056075004137e-07, "loss": 0.0688, "step": 162330 }, { "epoch": 1.7344943640151718, "grad_norm": 1.8244433403015137, "learning_rate": 9.871018163131098e-07, "loss": 0.0768, "step": 162340 }, { "epoch": 1.7346012073294514, "grad_norm": 0.039020612835884094, "learning_rate": 9.870980245758313e-07, "loss": 0.0245, "step": 162350 }, { "epoch": 1.734708050643731, "grad_norm": 3.277458906173706, "learning_rate": 9.870942322885825e-07, "loss": 0.0351, "step": 162360 }, { "epoch": 1.7348148939580106, "grad_norm": 0.06026482209563255, "learning_rate": 9.870904394513675e-07, "loss": 0.0112, "step": 162370 }, { "epoch": 1.7349217372722903, "grad_norm": 0.4091271460056305, "learning_rate": 9.870866460641906e-07, "loss": 0.0572, "step": 162380 }, { "epoch": 1.7350285805865697, "grad_norm": 0.9422586560249329, "learning_rate": 9.870828521270563e-07, "loss": 0.0535, "step": 162390 }, { "epoch": 1.7351354239008494, "grad_norm": 9.836835861206055, "learning_rate": 9.870790576399683e-07, "loss": 0.0525, "step": 162400 }, { "epoch": 1.735242267215129, "grad_norm": 4.61957311630249, "learning_rate": 9.870752626029317e-07, "loss": 0.0274, "step": 162410 }, { "epoch": 1.7353491105294085, "grad_norm": 7.550732135772705, "learning_rate": 9.8707146701595e-07, "loss": 0.0387, "step": 162420 }, { "epoch": 1.7354559538436882, "grad_norm": 1.2375932931900024, "learning_rate": 9.87067670879028e-07, "loss": 0.0508, "step": 162430 }, { "epoch": 1.735562797157968, "grad_norm": 2.4467549324035645, "learning_rate": 9.8706387419217e-07, "loss": 0.0362, "step": 162440 }, { "epoch": 1.7356696404722474, "grad_norm": 0.5707629919052124, "learning_rate": 9.8706007695538e-07, "loss": 0.0077, "step": 162450 }, { "epoch": 1.735776483786527, "grad_norm": 0.8432544469833374, "learning_rate": 9.870562791686622e-07, "loss": 0.0441, "step": 162460 }, { "epoch": 1.7358833271008067, "grad_norm": 0.043236907571554184, "learning_rate": 9.870524808320214e-07, "loss": 0.0433, "step": 162470 }, { "epoch": 1.7359901704150862, "grad_norm": 6.549070835113525, "learning_rate": 9.870486819454615e-07, "loss": 0.0151, "step": 162480 }, { "epoch": 1.7360970137293659, "grad_norm": 1.2048052549362183, "learning_rate": 9.87044882508987e-07, "loss": 0.026, "step": 162490 }, { "epoch": 1.7362038570436455, "grad_norm": 0.8765634894371033, "learning_rate": 9.87041082522602e-07, "loss": 0.0377, "step": 162500 }, { "epoch": 1.736310700357925, "grad_norm": 10.85953426361084, "learning_rate": 9.870372819863106e-07, "loss": 0.0254, "step": 162510 }, { "epoch": 1.7364175436722047, "grad_norm": 9.070720672607422, "learning_rate": 9.870334809001177e-07, "loss": 0.0351, "step": 162520 }, { "epoch": 1.7365243869864844, "grad_norm": 2.124478340148926, "learning_rate": 9.87029679264027e-07, "loss": 0.0679, "step": 162530 }, { "epoch": 1.7366312303007638, "grad_norm": 2.061988592147827, "learning_rate": 9.87025877078043e-07, "loss": 0.0749, "step": 162540 }, { "epoch": 1.7367380736150435, "grad_norm": 3.4299545288085938, "learning_rate": 9.870220743421702e-07, "loss": 0.108, "step": 162550 }, { "epoch": 1.7368449169293232, "grad_norm": 3.341294765472412, "learning_rate": 9.870182710564127e-07, "loss": 0.0535, "step": 162560 }, { "epoch": 1.7369517602436026, "grad_norm": 0.5179969668388367, "learning_rate": 9.870144672207748e-07, "loss": 0.0411, "step": 162570 }, { "epoch": 1.7370586035578823, "grad_norm": 4.931248188018799, "learning_rate": 9.870106628352608e-07, "loss": 0.0245, "step": 162580 }, { "epoch": 1.737165446872162, "grad_norm": 0.22541049122810364, "learning_rate": 9.870068578998751e-07, "loss": 0.0067, "step": 162590 }, { "epoch": 1.7372722901864415, "grad_norm": 0.032500483095645905, "learning_rate": 9.870030524146219e-07, "loss": 0.0327, "step": 162600 }, { "epoch": 1.7373791335007212, "grad_norm": 0.0033419940154999495, "learning_rate": 9.869992463795053e-07, "loss": 0.0213, "step": 162610 }, { "epoch": 1.7374859768150008, "grad_norm": 0.2665663957595825, "learning_rate": 9.8699543979453e-07, "loss": 0.0138, "step": 162620 }, { "epoch": 1.7375928201292803, "grad_norm": 0.2698989808559418, "learning_rate": 9.869916326597e-07, "loss": 0.0081, "step": 162630 }, { "epoch": 1.73769966344356, "grad_norm": 0.20200422406196594, "learning_rate": 9.869878249750195e-07, "loss": 0.0227, "step": 162640 }, { "epoch": 1.7378065067578397, "grad_norm": 0.02856125868856907, "learning_rate": 9.869840167404932e-07, "loss": 0.0707, "step": 162650 }, { "epoch": 1.7379133500721191, "grad_norm": 0.32747623324394226, "learning_rate": 9.869802079561252e-07, "loss": 0.033, "step": 162660 }, { "epoch": 1.7380201933863988, "grad_norm": 7.502436637878418, "learning_rate": 9.869763986219197e-07, "loss": 0.0251, "step": 162670 }, { "epoch": 1.7381270367006785, "grad_norm": 0.6476319432258606, "learning_rate": 9.869725887378812e-07, "loss": 0.0593, "step": 162680 }, { "epoch": 1.738233880014958, "grad_norm": 6.446893215179443, "learning_rate": 9.86968778304014e-07, "loss": 0.0441, "step": 162690 }, { "epoch": 1.7383407233292378, "grad_norm": 0.15250909328460693, "learning_rate": 9.86964967320322e-07, "loss": 0.0341, "step": 162700 }, { "epoch": 1.7384475666435173, "grad_norm": 5.936121940612793, "learning_rate": 9.8696115578681e-07, "loss": 0.0362, "step": 162710 }, { "epoch": 1.7385544099577968, "grad_norm": 4.131043910980225, "learning_rate": 9.86957343703482e-07, "loss": 0.0224, "step": 162720 }, { "epoch": 1.7386612532720767, "grad_norm": 0.08914422988891602, "learning_rate": 9.869535310703424e-07, "loss": 0.0102, "step": 162730 }, { "epoch": 1.7387680965863561, "grad_norm": 5.394323825836182, "learning_rate": 9.869497178873956e-07, "loss": 0.0323, "step": 162740 }, { "epoch": 1.7388749399006356, "grad_norm": 0.1621408313512802, "learning_rate": 9.869459041546457e-07, "loss": 0.0433, "step": 162750 }, { "epoch": 1.7389817832149155, "grad_norm": 1.8327099084854126, "learning_rate": 9.869420898720972e-07, "loss": 0.0214, "step": 162760 }, { "epoch": 1.739088626529195, "grad_norm": 4.371822357177734, "learning_rate": 9.869382750397543e-07, "loss": 0.028, "step": 162770 }, { "epoch": 1.7391954698434744, "grad_norm": 0.04057324305176735, "learning_rate": 9.869344596576215e-07, "loss": 0.046, "step": 162780 }, { "epoch": 1.7393023131577543, "grad_norm": 4.60256814956665, "learning_rate": 9.869306437257028e-07, "loss": 0.0353, "step": 162790 }, { "epoch": 1.7394091564720338, "grad_norm": 5.739589214324951, "learning_rate": 9.869268272440025e-07, "loss": 0.0636, "step": 162800 }, { "epoch": 1.7395159997863132, "grad_norm": 1.764807105064392, "learning_rate": 9.869230102125255e-07, "loss": 0.0436, "step": 162810 }, { "epoch": 1.7396228431005931, "grad_norm": 5.041313171386719, "learning_rate": 9.869191926312752e-07, "loss": 0.0231, "step": 162820 }, { "epoch": 1.7397296864148726, "grad_norm": 1.2086751461029053, "learning_rate": 9.869153745002567e-07, "loss": 0.0574, "step": 162830 }, { "epoch": 1.739836529729152, "grad_norm": 7.828932285308838, "learning_rate": 9.869115558194738e-07, "loss": 0.0474, "step": 162840 }, { "epoch": 1.739943373043432, "grad_norm": 4.357308864593506, "learning_rate": 9.869077365889311e-07, "loss": 0.0194, "step": 162850 }, { "epoch": 1.7400502163577114, "grad_norm": 0.43362560868263245, "learning_rate": 9.869039168086328e-07, "loss": 0.0197, "step": 162860 }, { "epoch": 1.740157059671991, "grad_norm": 0.3047693073749542, "learning_rate": 9.86900096478583e-07, "loss": 0.0305, "step": 162870 }, { "epoch": 1.7402639029862708, "grad_norm": 0.8025712966918945, "learning_rate": 9.868962755987866e-07, "loss": 0.0372, "step": 162880 }, { "epoch": 1.7403707463005502, "grad_norm": 0.045808855444192886, "learning_rate": 9.868924541692475e-07, "loss": 0.0569, "step": 162890 }, { "epoch": 1.74047758961483, "grad_norm": 0.7864881157875061, "learning_rate": 9.868886321899699e-07, "loss": 0.0555, "step": 162900 }, { "epoch": 1.7405844329291096, "grad_norm": 0.1863565742969513, "learning_rate": 9.868848096609585e-07, "loss": 0.0155, "step": 162910 }, { "epoch": 1.740691276243389, "grad_norm": 0.15241385996341705, "learning_rate": 9.868809865822173e-07, "loss": 0.0419, "step": 162920 }, { "epoch": 1.7407981195576687, "grad_norm": 9.68397045135498, "learning_rate": 9.868771629537506e-07, "loss": 0.0718, "step": 162930 }, { "epoch": 1.7409049628719484, "grad_norm": 4.425082683563232, "learning_rate": 9.86873338775563e-07, "loss": 0.0114, "step": 162940 }, { "epoch": 1.7410118061862279, "grad_norm": 1.6457022428512573, "learning_rate": 9.868695140476587e-07, "loss": 0.02, "step": 162950 }, { "epoch": 1.7411186495005075, "grad_norm": 3.289428472518921, "learning_rate": 9.868656887700418e-07, "loss": 0.0031, "step": 162960 }, { "epoch": 1.7412254928147872, "grad_norm": 0.10126728564500809, "learning_rate": 9.868618629427167e-07, "loss": 0.0242, "step": 162970 }, { "epoch": 1.7413323361290667, "grad_norm": 5.646481513977051, "learning_rate": 9.86858036565688e-07, "loss": 0.041, "step": 162980 }, { "epoch": 1.7414391794433464, "grad_norm": 1.7393085956573486, "learning_rate": 9.868542096389598e-07, "loss": 0.023, "step": 162990 }, { "epoch": 1.741546022757626, "grad_norm": 0.04177892580628395, "learning_rate": 9.868503821625366e-07, "loss": 0.0588, "step": 163000 }, { "epoch": 1.7416528660719055, "grad_norm": 5.32316255569458, "learning_rate": 9.868465541364224e-07, "loss": 0.0169, "step": 163010 }, { "epoch": 1.7417597093861852, "grad_norm": 1.0690783262252808, "learning_rate": 9.868427255606218e-07, "loss": 0.014, "step": 163020 }, { "epoch": 1.7418665527004649, "grad_norm": 3.9572460651397705, "learning_rate": 9.86838896435139e-07, "loss": 0.0151, "step": 163030 }, { "epoch": 1.7419733960147443, "grad_norm": 14.325946807861328, "learning_rate": 9.86835066759978e-07, "loss": 0.0448, "step": 163040 }, { "epoch": 1.742080239329024, "grad_norm": 0.031565941870212555, "learning_rate": 9.868312365351438e-07, "loss": 0.1218, "step": 163050 }, { "epoch": 1.7421870826433037, "grad_norm": 5.3492231369018555, "learning_rate": 9.868274057606405e-07, "loss": 0.0352, "step": 163060 }, { "epoch": 1.7422939259575831, "grad_norm": 0.03678251802921295, "learning_rate": 9.86823574436472e-07, "loss": 0.0223, "step": 163070 }, { "epoch": 1.7424007692718628, "grad_norm": 4.85521125793457, "learning_rate": 9.868197425626432e-07, "loss": 0.0635, "step": 163080 }, { "epoch": 1.7425076125861425, "grad_norm": 0.13775081932544708, "learning_rate": 9.868159101391581e-07, "loss": 0.05, "step": 163090 }, { "epoch": 1.742614455900422, "grad_norm": 3.2164933681488037, "learning_rate": 9.86812077166021e-07, "loss": 0.0249, "step": 163100 }, { "epoch": 1.7427212992147016, "grad_norm": 2.9374775886535645, "learning_rate": 9.868082436432364e-07, "loss": 0.0114, "step": 163110 }, { "epoch": 1.7428281425289813, "grad_norm": 6.623580455780029, "learning_rate": 9.868044095708085e-07, "loss": 0.0471, "step": 163120 }, { "epoch": 1.7429349858432608, "grad_norm": 0.07130368053913116, "learning_rate": 9.86800574948742e-07, "loss": 0.0311, "step": 163130 }, { "epoch": 1.7430418291575405, "grad_norm": 1.437588095664978, "learning_rate": 9.867967397770404e-07, "loss": 0.0566, "step": 163140 }, { "epoch": 1.7431486724718201, "grad_norm": 4.302224636077881, "learning_rate": 9.867929040557088e-07, "loss": 0.0103, "step": 163150 }, { "epoch": 1.7432555157860996, "grad_norm": 9.134160995483398, "learning_rate": 9.867890677847513e-07, "loss": 0.0411, "step": 163160 }, { "epoch": 1.7433623591003793, "grad_norm": 0.021506138145923615, "learning_rate": 9.867852309641721e-07, "loss": 0.0055, "step": 163170 }, { "epoch": 1.743469202414659, "grad_norm": 3.944777011871338, "learning_rate": 9.867813935939757e-07, "loss": 0.0971, "step": 163180 }, { "epoch": 1.7435760457289384, "grad_norm": 4.296113014221191, "learning_rate": 9.867775556741665e-07, "loss": 0.0453, "step": 163190 }, { "epoch": 1.743682889043218, "grad_norm": 4.328362941741943, "learning_rate": 9.867737172047485e-07, "loss": 0.0447, "step": 163200 }, { "epoch": 1.7437897323574978, "grad_norm": 14.181365013122559, "learning_rate": 9.867698781857262e-07, "loss": 0.0805, "step": 163210 }, { "epoch": 1.7438965756717772, "grad_norm": 0.4241693317890167, "learning_rate": 9.867660386171042e-07, "loss": 0.0289, "step": 163220 }, { "epoch": 1.744003418986057, "grad_norm": 0.2720872163772583, "learning_rate": 9.867621984988862e-07, "loss": 0.0281, "step": 163230 }, { "epoch": 1.7441102623003366, "grad_norm": 3.9089317321777344, "learning_rate": 9.867583578310774e-07, "loss": 0.0495, "step": 163240 }, { "epoch": 1.744217105614616, "grad_norm": 1.387952446937561, "learning_rate": 9.867545166136815e-07, "loss": 0.0156, "step": 163250 }, { "epoch": 1.7443239489288958, "grad_norm": 0.3026450574398041, "learning_rate": 9.867506748467028e-07, "loss": 0.0073, "step": 163260 }, { "epoch": 1.7444307922431754, "grad_norm": 0.27124497294425964, "learning_rate": 9.867468325301461e-07, "loss": 0.0369, "step": 163270 }, { "epoch": 1.744537635557455, "grad_norm": 1.1722015142440796, "learning_rate": 9.867429896640156e-07, "loss": 0.0275, "step": 163280 }, { "epoch": 1.7446444788717346, "grad_norm": 2.280484437942505, "learning_rate": 9.867391462483153e-07, "loss": 0.0122, "step": 163290 }, { "epoch": 1.7447513221860143, "grad_norm": 3.3287525177001953, "learning_rate": 9.867353022830498e-07, "loss": 0.0388, "step": 163300 }, { "epoch": 1.7448581655002937, "grad_norm": 3.734010696411133, "learning_rate": 9.867314577682234e-07, "loss": 0.0112, "step": 163310 }, { "epoch": 1.7449650088145734, "grad_norm": 3.176922559738159, "learning_rate": 9.867276127038406e-07, "loss": 0.0282, "step": 163320 }, { "epoch": 1.745071852128853, "grad_norm": 4.9142680168151855, "learning_rate": 9.867237670899053e-07, "loss": 0.0411, "step": 163330 }, { "epoch": 1.7451786954431325, "grad_norm": 6.504082202911377, "learning_rate": 9.867199209264224e-07, "loss": 0.079, "step": 163340 }, { "epoch": 1.7452855387574122, "grad_norm": 2.5323429107666016, "learning_rate": 9.86716074213396e-07, "loss": 0.0099, "step": 163350 }, { "epoch": 1.745392382071692, "grad_norm": 1.4626115560531616, "learning_rate": 9.867122269508302e-07, "loss": 0.0423, "step": 163360 }, { "epoch": 1.7454992253859714, "grad_norm": 0.08597100526094437, "learning_rate": 9.867083791387296e-07, "loss": 0.015, "step": 163370 }, { "epoch": 1.745606068700251, "grad_norm": 2.0833778381347656, "learning_rate": 9.867045307770987e-07, "loss": 0.0234, "step": 163380 }, { "epoch": 1.7457129120145307, "grad_norm": 0.16493088006973267, "learning_rate": 9.867006818659415e-07, "loss": 0.0354, "step": 163390 }, { "epoch": 1.7458197553288102, "grad_norm": 0.046772152185440063, "learning_rate": 9.866968324052624e-07, "loss": 0.0337, "step": 163400 }, { "epoch": 1.7459265986430899, "grad_norm": 1.63130784034729, "learning_rate": 9.86692982395066e-07, "loss": 0.04, "step": 163410 }, { "epoch": 1.7460334419573695, "grad_norm": 1.6156775951385498, "learning_rate": 9.866891318353565e-07, "loss": 0.0437, "step": 163420 }, { "epoch": 1.746140285271649, "grad_norm": 0.011387328617274761, "learning_rate": 9.866852807261382e-07, "loss": 0.0469, "step": 163430 }, { "epoch": 1.746247128585929, "grad_norm": 6.455450057983398, "learning_rate": 9.866814290674154e-07, "loss": 0.0442, "step": 163440 }, { "epoch": 1.7463539719002084, "grad_norm": 0.5455124378204346, "learning_rate": 9.866775768591927e-07, "loss": 0.0043, "step": 163450 }, { "epoch": 1.7464608152144878, "grad_norm": 13.391412734985352, "learning_rate": 9.866737241014741e-07, "loss": 0.0476, "step": 163460 }, { "epoch": 1.7465676585287677, "grad_norm": 1.3182116746902466, "learning_rate": 9.866698707942643e-07, "loss": 0.125, "step": 163470 }, { "epoch": 1.7466745018430472, "grad_norm": 0.048583801835775375, "learning_rate": 9.866660169375674e-07, "loss": 0.0165, "step": 163480 }, { "epoch": 1.7467813451573266, "grad_norm": 1.4281140565872192, "learning_rate": 9.86662162531388e-07, "loss": 0.0371, "step": 163490 }, { "epoch": 1.7468881884716065, "grad_norm": 11.423079490661621, "learning_rate": 9.8665830757573e-07, "loss": 0.0287, "step": 163500 }, { "epoch": 1.746995031785886, "grad_norm": 0.752868115901947, "learning_rate": 9.866544520705983e-07, "loss": 0.0181, "step": 163510 }, { "epoch": 1.7471018751001655, "grad_norm": 1.0022729635238647, "learning_rate": 9.866505960159968e-07, "loss": 0.0609, "step": 163520 }, { "epoch": 1.7472087184144454, "grad_norm": 5.75119161605835, "learning_rate": 9.866467394119302e-07, "loss": 0.0739, "step": 163530 }, { "epoch": 1.7473155617287248, "grad_norm": 0.012207350693643093, "learning_rate": 9.866428822584026e-07, "loss": 0.0526, "step": 163540 }, { "epoch": 1.7474224050430043, "grad_norm": 0.3588690757751465, "learning_rate": 9.866390245554185e-07, "loss": 0.0133, "step": 163550 }, { "epoch": 1.7475292483572842, "grad_norm": 4.544703960418701, "learning_rate": 9.86635166302982e-07, "loss": 0.0604, "step": 163560 }, { "epoch": 1.7476360916715636, "grad_norm": 3.617849826812744, "learning_rate": 9.86631307501098e-07, "loss": 0.0152, "step": 163570 }, { "epoch": 1.747742934985843, "grad_norm": 1.0774601697921753, "learning_rate": 9.866274481497705e-07, "loss": 0.0138, "step": 163580 }, { "epoch": 1.747849778300123, "grad_norm": 4.570549011230469, "learning_rate": 9.866235882490038e-07, "loss": 0.0151, "step": 163590 }, { "epoch": 1.7479566216144025, "grad_norm": 4.240077972412109, "learning_rate": 9.866197277988021e-07, "loss": 0.0344, "step": 163600 }, { "epoch": 1.7480634649286821, "grad_norm": 0.40874189138412476, "learning_rate": 9.866158667991705e-07, "loss": 0.0061, "step": 163610 }, { "epoch": 1.7481703082429618, "grad_norm": 0.009860025718808174, "learning_rate": 9.866120052501124e-07, "loss": 0.035, "step": 163620 }, { "epoch": 1.7482771515572413, "grad_norm": 3.7446837425231934, "learning_rate": 9.866081431516329e-07, "loss": 0.0222, "step": 163630 }, { "epoch": 1.748383994871521, "grad_norm": 0.0185161791741848, "learning_rate": 9.866042805037359e-07, "loss": 0.0186, "step": 163640 }, { "epoch": 1.7484908381858006, "grad_norm": 1.3121341466903687, "learning_rate": 9.86600417306426e-07, "loss": 0.0228, "step": 163650 }, { "epoch": 1.74859768150008, "grad_norm": 2.709040403366089, "learning_rate": 9.865965535597076e-07, "loss": 0.026, "step": 163660 }, { "epoch": 1.7487045248143598, "grad_norm": 1.8584007024765015, "learning_rate": 9.865926892635849e-07, "loss": 0.0349, "step": 163670 }, { "epoch": 1.7488113681286395, "grad_norm": 0.3788791596889496, "learning_rate": 9.865888244180623e-07, "loss": 0.0196, "step": 163680 }, { "epoch": 1.748918211442919, "grad_norm": 0.61447674036026, "learning_rate": 9.86584959023144e-07, "loss": 0.0435, "step": 163690 }, { "epoch": 1.7490250547571986, "grad_norm": 0.2927004396915436, "learning_rate": 9.86581093078835e-07, "loss": 0.0338, "step": 163700 }, { "epoch": 1.7491318980714783, "grad_norm": 2.805985927581787, "learning_rate": 9.865772265851388e-07, "loss": 0.0351, "step": 163710 }, { "epoch": 1.7492387413857577, "grad_norm": 7.715830326080322, "learning_rate": 9.865733595420605e-07, "loss": 0.0325, "step": 163720 }, { "epoch": 1.7493455847000374, "grad_norm": 0.8176729679107666, "learning_rate": 9.86569491949604e-07, "loss": 0.0653, "step": 163730 }, { "epoch": 1.749452428014317, "grad_norm": 0.016458777710795403, "learning_rate": 9.86565623807774e-07, "loss": 0.0135, "step": 163740 }, { "epoch": 1.7495592713285966, "grad_norm": 0.01777004264295101, "learning_rate": 9.865617551165745e-07, "loss": 0.0343, "step": 163750 }, { "epoch": 1.7496661146428762, "grad_norm": 1.7204843759536743, "learning_rate": 9.8655788587601e-07, "loss": 0.0248, "step": 163760 }, { "epoch": 1.749772957957156, "grad_norm": 3.613856792449951, "learning_rate": 9.86554016086085e-07, "loss": 0.0332, "step": 163770 }, { "epoch": 1.7498798012714354, "grad_norm": 0.5400461554527283, "learning_rate": 9.865501457468039e-07, "loss": 0.018, "step": 163780 }, { "epoch": 1.749986644585715, "grad_norm": 1.7459896802902222, "learning_rate": 9.86546274858171e-07, "loss": 0.051, "step": 163790 }, { "epoch": 1.7500934878999947, "grad_norm": 1.2711036205291748, "learning_rate": 9.865424034201904e-07, "loss": 0.0535, "step": 163800 }, { "epoch": 1.7502003312142742, "grad_norm": 2.606685161590576, "learning_rate": 9.865385314328668e-07, "loss": 0.0106, "step": 163810 }, { "epoch": 1.7503071745285539, "grad_norm": 0.36124464869499207, "learning_rate": 9.865346588962044e-07, "loss": 0.0853, "step": 163820 }, { "epoch": 1.7504140178428336, "grad_norm": 1.867344856262207, "learning_rate": 9.86530785810208e-07, "loss": 0.0723, "step": 163830 }, { "epoch": 1.750520861157113, "grad_norm": 3.9729669094085693, "learning_rate": 9.865269121748812e-07, "loss": 0.0293, "step": 163840 }, { "epoch": 1.7506277044713927, "grad_norm": 0.1504918932914734, "learning_rate": 9.86523037990229e-07, "loss": 0.0131, "step": 163850 }, { "epoch": 1.7507345477856724, "grad_norm": 3.946237564086914, "learning_rate": 9.865191632562556e-07, "loss": 0.0686, "step": 163860 }, { "epoch": 1.7508413910999518, "grad_norm": 2.896378993988037, "learning_rate": 9.865152879729652e-07, "loss": 0.0238, "step": 163870 }, { "epoch": 1.7509482344142315, "grad_norm": 3.773944616317749, "learning_rate": 9.865114121403624e-07, "loss": 0.013, "step": 163880 }, { "epoch": 1.7510550777285112, "grad_norm": 0.011421029455959797, "learning_rate": 9.865075357584516e-07, "loss": 0.0068, "step": 163890 }, { "epoch": 1.7511619210427907, "grad_norm": 0.15506918728351593, "learning_rate": 9.865036588272369e-07, "loss": 0.016, "step": 163900 }, { "epoch": 1.7512687643570704, "grad_norm": 0.6454876661300659, "learning_rate": 9.86499781346723e-07, "loss": 0.0278, "step": 163910 }, { "epoch": 1.75137560767135, "grad_norm": 7.4297380447387695, "learning_rate": 9.86495903316914e-07, "loss": 0.0279, "step": 163920 }, { "epoch": 1.7514824509856295, "grad_norm": 9.852714538574219, "learning_rate": 9.864920247378143e-07, "loss": 0.0334, "step": 163930 }, { "epoch": 1.7515892942999092, "grad_norm": 1.5383538007736206, "learning_rate": 9.864881456094286e-07, "loss": 0.0457, "step": 163940 }, { "epoch": 1.7516961376141889, "grad_norm": 1.473210334777832, "learning_rate": 9.864842659317612e-07, "loss": 0.0284, "step": 163950 }, { "epoch": 1.7518029809284683, "grad_norm": 1.1295729875564575, "learning_rate": 9.86480385704816e-07, "loss": 0.0646, "step": 163960 }, { "epoch": 1.751909824242748, "grad_norm": 3.1191351413726807, "learning_rate": 9.864765049285979e-07, "loss": 0.0484, "step": 163970 }, { "epoch": 1.7520166675570277, "grad_norm": 17.737531661987305, "learning_rate": 9.86472623603111e-07, "loss": 0.0478, "step": 163980 }, { "epoch": 1.7521235108713071, "grad_norm": 20.6718692779541, "learning_rate": 9.8646874172836e-07, "loss": 0.0766, "step": 163990 }, { "epoch": 1.7522303541855868, "grad_norm": 6.098769187927246, "learning_rate": 9.86464859304349e-07, "loss": 0.0525, "step": 164000 }, { "epoch": 1.7523371974998665, "grad_norm": 2.5170609951019287, "learning_rate": 9.864609763310824e-07, "loss": 0.0572, "step": 164010 }, { "epoch": 1.752444040814146, "grad_norm": 7.096068382263184, "learning_rate": 9.864570928085647e-07, "loss": 0.0526, "step": 164020 }, { "epoch": 1.7525508841284256, "grad_norm": 3.08123517036438, "learning_rate": 9.864532087368002e-07, "loss": 0.03, "step": 164030 }, { "epoch": 1.7526577274427053, "grad_norm": 3.6983067989349365, "learning_rate": 9.864493241157932e-07, "loss": 0.0743, "step": 164040 }, { "epoch": 1.7527645707569848, "grad_norm": 3.961902379989624, "learning_rate": 9.864454389455485e-07, "loss": 0.0131, "step": 164050 }, { "epoch": 1.7528714140712645, "grad_norm": 7.744762420654297, "learning_rate": 9.8644155322607e-07, "loss": 0.0457, "step": 164060 }, { "epoch": 1.7529782573855441, "grad_norm": 3.9219515323638916, "learning_rate": 9.864376669573622e-07, "loss": 0.0305, "step": 164070 }, { "epoch": 1.7530851006998236, "grad_norm": 4.34521484375, "learning_rate": 9.864337801394296e-07, "loss": 0.0295, "step": 164080 }, { "epoch": 1.7531919440141033, "grad_norm": 3.015772581100464, "learning_rate": 9.864298927722766e-07, "loss": 0.0934, "step": 164090 }, { "epoch": 1.753298787328383, "grad_norm": 3.597477674484253, "learning_rate": 9.864260048559074e-07, "loss": 0.0202, "step": 164100 }, { "epoch": 1.7534056306426624, "grad_norm": 0.10509510338306427, "learning_rate": 9.864221163903267e-07, "loss": 0.0303, "step": 164110 }, { "epoch": 1.753512473956942, "grad_norm": 0.0137433260679245, "learning_rate": 9.864182273755388e-07, "loss": 0.0084, "step": 164120 }, { "epoch": 1.7536193172712218, "grad_norm": 0.1033860445022583, "learning_rate": 9.864143378115478e-07, "loss": 0.0571, "step": 164130 }, { "epoch": 1.7537261605855012, "grad_norm": 3.159356117248535, "learning_rate": 9.864104476983584e-07, "loss": 0.0419, "step": 164140 }, { "epoch": 1.753833003899781, "grad_norm": 0.4599863290786743, "learning_rate": 9.864065570359747e-07, "loss": 0.0044, "step": 164150 }, { "epoch": 1.7539398472140606, "grad_norm": 4.167208194732666, "learning_rate": 9.864026658244015e-07, "loss": 0.0124, "step": 164160 }, { "epoch": 1.75404669052834, "grad_norm": 0.002927698427811265, "learning_rate": 9.86398774063643e-07, "loss": 0.0474, "step": 164170 }, { "epoch": 1.75415353384262, "grad_norm": 2.26365065574646, "learning_rate": 9.863948817537036e-07, "loss": 0.0138, "step": 164180 }, { "epoch": 1.7542603771568994, "grad_norm": 3.265153646469116, "learning_rate": 9.863909888945874e-07, "loss": 0.0269, "step": 164190 }, { "epoch": 1.7543672204711789, "grad_norm": 8.94433307647705, "learning_rate": 9.863870954862995e-07, "loss": 0.0498, "step": 164200 }, { "epoch": 1.7544740637854588, "grad_norm": 0.30357542634010315, "learning_rate": 9.863832015288433e-07, "loss": 0.0417, "step": 164210 }, { "epoch": 1.7545809070997382, "grad_norm": 0.11105568706989288, "learning_rate": 9.863793070222242e-07, "loss": 0.04, "step": 164220 }, { "epoch": 1.7546877504140177, "grad_norm": 2.8954708576202393, "learning_rate": 9.86375411966446e-07, "loss": 0.0324, "step": 164230 }, { "epoch": 1.7547945937282976, "grad_norm": 11.961618423461914, "learning_rate": 9.863715163615132e-07, "loss": 0.0215, "step": 164240 }, { "epoch": 1.754901437042577, "grad_norm": 2.386270761489868, "learning_rate": 9.863676202074304e-07, "loss": 0.0349, "step": 164250 }, { "epoch": 1.7550082803568565, "grad_norm": 4.073426246643066, "learning_rate": 9.863637235042017e-07, "loss": 0.0236, "step": 164260 }, { "epoch": 1.7551151236711364, "grad_norm": 1.7217330932617188, "learning_rate": 9.863598262518317e-07, "loss": 0.0121, "step": 164270 }, { "epoch": 1.7552219669854159, "grad_norm": 0.004393079783767462, "learning_rate": 9.863559284503249e-07, "loss": 0.0113, "step": 164280 }, { "epoch": 1.7553288102996953, "grad_norm": 0.09688939154148102, "learning_rate": 9.863520300996852e-07, "loss": 0.0108, "step": 164290 }, { "epoch": 1.7554356536139752, "grad_norm": 1.421887755393982, "learning_rate": 9.863481311999178e-07, "loss": 0.0288, "step": 164300 }, { "epoch": 1.7555424969282547, "grad_norm": 7.635806560516357, "learning_rate": 9.863442317510262e-07, "loss": 0.0277, "step": 164310 }, { "epoch": 1.7556493402425342, "grad_norm": 7.17307186126709, "learning_rate": 9.863403317530155e-07, "loss": 0.0261, "step": 164320 }, { "epoch": 1.755756183556814, "grad_norm": 0.08924456685781479, "learning_rate": 9.863364312058899e-07, "loss": 0.0195, "step": 164330 }, { "epoch": 1.7558630268710935, "grad_norm": 0.014200660400092602, "learning_rate": 9.863325301096537e-07, "loss": 0.0546, "step": 164340 }, { "epoch": 1.7559698701853732, "grad_norm": 0.09928328543901443, "learning_rate": 9.863286284643113e-07, "loss": 0.0106, "step": 164350 }, { "epoch": 1.7560767134996529, "grad_norm": 0.10014406591653824, "learning_rate": 9.863247262698673e-07, "loss": 0.0248, "step": 164360 }, { "epoch": 1.7561835568139323, "grad_norm": 29.957664489746094, "learning_rate": 9.863208235263258e-07, "loss": 0.0435, "step": 164370 }, { "epoch": 1.756290400128212, "grad_norm": 0.3052367568016052, "learning_rate": 9.863169202336917e-07, "loss": 0.0159, "step": 164380 }, { "epoch": 1.7563972434424917, "grad_norm": 1.0929328203201294, "learning_rate": 9.86313016391969e-07, "loss": 0.0251, "step": 164390 }, { "epoch": 1.7565040867567712, "grad_norm": 3.5236778259277344, "learning_rate": 9.86309112001162e-07, "loss": 0.0253, "step": 164400 }, { "epoch": 1.7566109300710508, "grad_norm": 23.190460205078125, "learning_rate": 9.863052070612753e-07, "loss": 0.029, "step": 164410 }, { "epoch": 1.7567177733853305, "grad_norm": 0.06489790230989456, "learning_rate": 9.863013015723134e-07, "loss": 0.0534, "step": 164420 }, { "epoch": 1.75682461669961, "grad_norm": 0.2967766225337982, "learning_rate": 9.862973955342806e-07, "loss": 0.0314, "step": 164430 }, { "epoch": 1.7569314600138897, "grad_norm": 1.3853994607925415, "learning_rate": 9.862934889471815e-07, "loss": 0.0243, "step": 164440 }, { "epoch": 1.7570383033281693, "grad_norm": 0.20637227594852448, "learning_rate": 9.862895818110203e-07, "loss": 0.023, "step": 164450 }, { "epoch": 1.7571451466424488, "grad_norm": 0.00581483356654644, "learning_rate": 9.862856741258012e-07, "loss": 0.0921, "step": 164460 }, { "epoch": 1.7572519899567285, "grad_norm": 3.5520052909851074, "learning_rate": 9.86281765891529e-07, "loss": 0.0322, "step": 164470 }, { "epoch": 1.7573588332710082, "grad_norm": 0.09871543198823929, "learning_rate": 9.862778571082083e-07, "loss": 0.036, "step": 164480 }, { "epoch": 1.7574656765852876, "grad_norm": 1.0830214023590088, "learning_rate": 9.862739477758428e-07, "loss": 0.0525, "step": 164490 }, { "epoch": 1.7575725198995673, "grad_norm": 15.042220115661621, "learning_rate": 9.862700378944376e-07, "loss": 0.0469, "step": 164500 }, { "epoch": 1.757679363213847, "grad_norm": 0.5486862063407898, "learning_rate": 9.862661274639966e-07, "loss": 0.0469, "step": 164510 }, { "epoch": 1.7577862065281264, "grad_norm": 4.687154769897461, "learning_rate": 9.862622164845244e-07, "loss": 0.0298, "step": 164520 }, { "epoch": 1.7578930498424061, "grad_norm": 1.0729775428771973, "learning_rate": 9.86258304956026e-07, "loss": 0.0358, "step": 164530 }, { "epoch": 1.7579998931566858, "grad_norm": 0.23142009973526, "learning_rate": 9.862543928785046e-07, "loss": 0.0465, "step": 164540 }, { "epoch": 1.7581067364709653, "grad_norm": 0.7602732181549072, "learning_rate": 9.862504802519655e-07, "loss": 0.015, "step": 164550 }, { "epoch": 1.758213579785245, "grad_norm": 3.425175905227661, "learning_rate": 9.86246567076413e-07, "loss": 0.0181, "step": 164560 }, { "epoch": 1.7583204230995246, "grad_norm": 2.2104313373565674, "learning_rate": 9.862426533518514e-07, "loss": 0.0148, "step": 164570 }, { "epoch": 1.758427266413804, "grad_norm": 0.28339648246765137, "learning_rate": 9.862387390782851e-07, "loss": 0.0436, "step": 164580 }, { "epoch": 1.7585341097280838, "grad_norm": 0.8194712996482849, "learning_rate": 9.862348242557185e-07, "loss": 0.025, "step": 164590 }, { "epoch": 1.7586409530423635, "grad_norm": 5.493885040283203, "learning_rate": 9.862309088841564e-07, "loss": 0.0381, "step": 164600 }, { "epoch": 1.758747796356643, "grad_norm": 0.29415589570999146, "learning_rate": 9.862269929636027e-07, "loss": 0.06, "step": 164610 }, { "epoch": 1.7588546396709226, "grad_norm": 0.1158628910779953, "learning_rate": 9.86223076494062e-07, "loss": 0.0443, "step": 164620 }, { "epoch": 1.7589614829852023, "grad_norm": 2.705019950866699, "learning_rate": 9.862191594755386e-07, "loss": 0.0264, "step": 164630 }, { "epoch": 1.7590683262994817, "grad_norm": 2.8604843616485596, "learning_rate": 9.862152419080374e-07, "loss": 0.0298, "step": 164640 }, { "epoch": 1.7591751696137614, "grad_norm": 1.7886260747909546, "learning_rate": 9.862113237915623e-07, "loss": 0.0296, "step": 164650 }, { "epoch": 1.759282012928041, "grad_norm": 0.018095724284648895, "learning_rate": 9.86207405126118e-07, "loss": 0.0252, "step": 164660 }, { "epoch": 1.7593888562423206, "grad_norm": 0.6082508563995361, "learning_rate": 9.862034859117088e-07, "loss": 0.0656, "step": 164670 }, { "epoch": 1.7594956995566002, "grad_norm": 0.013107878156006336, "learning_rate": 9.86199566148339e-07, "loss": 0.0116, "step": 164680 }, { "epoch": 1.75960254287088, "grad_norm": 3.980459690093994, "learning_rate": 9.861956458360134e-07, "loss": 0.016, "step": 164690 }, { "epoch": 1.7597093861851594, "grad_norm": 0.2535410523414612, "learning_rate": 9.861917249747363e-07, "loss": 0.0469, "step": 164700 }, { "epoch": 1.759816229499439, "grad_norm": 0.06705806404352188, "learning_rate": 9.86187803564512e-07, "loss": 0.0347, "step": 164710 }, { "epoch": 1.7599230728137187, "grad_norm": 0.18045590817928314, "learning_rate": 9.861838816053449e-07, "loss": 0.0305, "step": 164720 }, { "epoch": 1.7600299161279982, "grad_norm": 1.542197585105896, "learning_rate": 9.861799590972394e-07, "loss": 0.041, "step": 164730 }, { "epoch": 1.7601367594422779, "grad_norm": 3.162435293197632, "learning_rate": 9.861760360402003e-07, "loss": 0.0286, "step": 164740 }, { "epoch": 1.7602436027565576, "grad_norm": 0.023680100217461586, "learning_rate": 9.861721124342315e-07, "loss": 0.0344, "step": 164750 }, { "epoch": 1.760350446070837, "grad_norm": 0.031635090708732605, "learning_rate": 9.861681882793377e-07, "loss": 0.0479, "step": 164760 }, { "epoch": 1.7604572893851167, "grad_norm": 0.2345246523618698, "learning_rate": 9.861642635755236e-07, "loss": 0.006, "step": 164770 }, { "epoch": 1.7605641326993964, "grad_norm": 1.9468594789505005, "learning_rate": 9.861603383227931e-07, "loss": 0.0556, "step": 164780 }, { "epoch": 1.7606709760136758, "grad_norm": 5.603175163269043, "learning_rate": 9.86156412521151e-07, "loss": 0.032, "step": 164790 }, { "epoch": 1.7607778193279555, "grad_norm": 3.050426483154297, "learning_rate": 9.861524861706015e-07, "loss": 0.0248, "step": 164800 }, { "epoch": 1.7608846626422352, "grad_norm": 0.01690499484539032, "learning_rate": 9.861485592711495e-07, "loss": 0.0255, "step": 164810 }, { "epoch": 1.7609915059565147, "grad_norm": 4.794447898864746, "learning_rate": 9.861446318227987e-07, "loss": 0.0441, "step": 164820 }, { "epoch": 1.7610983492707943, "grad_norm": 0.09693441540002823, "learning_rate": 9.86140703825554e-07, "loss": 0.0253, "step": 164830 }, { "epoch": 1.761205192585074, "grad_norm": 0.20021522045135498, "learning_rate": 9.861367752794197e-07, "loss": 0.0237, "step": 164840 }, { "epoch": 1.7613120358993535, "grad_norm": 0.6838791370391846, "learning_rate": 9.861328461844005e-07, "loss": 0.038, "step": 164850 }, { "epoch": 1.7614188792136332, "grad_norm": 1.4771760702133179, "learning_rate": 9.861289165405004e-07, "loss": 0.0216, "step": 164860 }, { "epoch": 1.7615257225279128, "grad_norm": 0.09043050557374954, "learning_rate": 9.861249863477242e-07, "loss": 0.0403, "step": 164870 }, { "epoch": 1.7616325658421923, "grad_norm": 0.03907254338264465, "learning_rate": 9.861210556060762e-07, "loss": 0.0496, "step": 164880 }, { "epoch": 1.761739409156472, "grad_norm": 1.1814301013946533, "learning_rate": 9.86117124315561e-07, "loss": 0.0043, "step": 164890 }, { "epoch": 1.7618462524707517, "grad_norm": 0.010505142621695995, "learning_rate": 9.861131924761826e-07, "loss": 0.0136, "step": 164900 }, { "epoch": 1.7619530957850311, "grad_norm": 2.5859639644622803, "learning_rate": 9.861092600879456e-07, "loss": 0.0478, "step": 164910 }, { "epoch": 1.762059939099311, "grad_norm": 6.2338786125183105, "learning_rate": 9.861053271508548e-07, "loss": 0.0749, "step": 164920 }, { "epoch": 1.7621667824135905, "grad_norm": 0.028292573988437653, "learning_rate": 9.861013936649143e-07, "loss": 0.0432, "step": 164930 }, { "epoch": 1.76227362572787, "grad_norm": 0.07089629769325256, "learning_rate": 9.860974596301287e-07, "loss": 0.0538, "step": 164940 }, { "epoch": 1.7623804690421498, "grad_norm": 14.539288520812988, "learning_rate": 9.860935250465026e-07, "loss": 0.0391, "step": 164950 }, { "epoch": 1.7624873123564293, "grad_norm": 0.8318977952003479, "learning_rate": 9.860895899140398e-07, "loss": 0.035, "step": 164960 }, { "epoch": 1.7625941556707088, "grad_norm": 1.0776715278625488, "learning_rate": 9.860856542327454e-07, "loss": 0.0581, "step": 164970 }, { "epoch": 1.7627009989849887, "grad_norm": 2.1733345985412598, "learning_rate": 9.860817180026238e-07, "loss": 0.0222, "step": 164980 }, { "epoch": 1.7628078422992681, "grad_norm": 0.1843401938676834, "learning_rate": 9.86077781223679e-07, "loss": 0.0259, "step": 164990 }, { "epoch": 1.7629146856135476, "grad_norm": 0.08431357890367508, "learning_rate": 9.860738438959157e-07, "loss": 0.0263, "step": 165000 }, { "epoch": 1.7630215289278275, "grad_norm": 0.18772590160369873, "learning_rate": 9.860699060193383e-07, "loss": 0.0386, "step": 165010 }, { "epoch": 1.763128372242107, "grad_norm": 0.70245361328125, "learning_rate": 9.860659675939514e-07, "loss": 0.0134, "step": 165020 }, { "epoch": 1.7632352155563864, "grad_norm": 11.486839294433594, "learning_rate": 9.860620286197593e-07, "loss": 0.0394, "step": 165030 }, { "epoch": 1.7633420588706663, "grad_norm": 0.05521531403064728, "learning_rate": 9.860580890967664e-07, "loss": 0.0361, "step": 165040 }, { "epoch": 1.7634489021849458, "grad_norm": 8.079863548278809, "learning_rate": 9.860541490249775e-07, "loss": 0.0256, "step": 165050 }, { "epoch": 1.7635557454992252, "grad_norm": 6.361583709716797, "learning_rate": 9.860502084043964e-07, "loss": 0.0447, "step": 165060 }, { "epoch": 1.7636625888135051, "grad_norm": 0.15813475847244263, "learning_rate": 9.860462672350283e-07, "loss": 0.0174, "step": 165070 }, { "epoch": 1.7637694321277846, "grad_norm": 1.2611314058303833, "learning_rate": 9.86042325516877e-07, "loss": 0.0131, "step": 165080 }, { "epoch": 1.7638762754420643, "grad_norm": 6.768911361694336, "learning_rate": 9.860383832499475e-07, "loss": 0.1024, "step": 165090 }, { "epoch": 1.763983118756344, "grad_norm": 13.357823371887207, "learning_rate": 9.860344404342437e-07, "loss": 0.0341, "step": 165100 }, { "epoch": 1.7640899620706234, "grad_norm": 3.7737035751342773, "learning_rate": 9.860304970697707e-07, "loss": 0.0745, "step": 165110 }, { "epoch": 1.764196805384903, "grad_norm": 1.3884729146957397, "learning_rate": 9.860265531565322e-07, "loss": 0.0225, "step": 165120 }, { "epoch": 1.7643036486991828, "grad_norm": 2.567380666732788, "learning_rate": 9.860226086945332e-07, "loss": 0.0211, "step": 165130 }, { "epoch": 1.7644104920134622, "grad_norm": 0.00903890747576952, "learning_rate": 9.86018663683778e-07, "loss": 0.0458, "step": 165140 }, { "epoch": 1.764517335327742, "grad_norm": 0.30254408717155457, "learning_rate": 9.860147181242712e-07, "loss": 0.0185, "step": 165150 }, { "epoch": 1.7646241786420216, "grad_norm": 0.6139087080955505, "learning_rate": 9.860107720160166e-07, "loss": 0.05, "step": 165160 }, { "epoch": 1.764731021956301, "grad_norm": 6.078428268432617, "learning_rate": 9.860068253590196e-07, "loss": 0.0632, "step": 165170 }, { "epoch": 1.7648378652705807, "grad_norm": 5.342111587524414, "learning_rate": 9.860028781532842e-07, "loss": 0.0228, "step": 165180 }, { "epoch": 1.7649447085848604, "grad_norm": 0.6606362462043762, "learning_rate": 9.859989303988148e-07, "loss": 0.0395, "step": 165190 }, { "epoch": 1.7650515518991399, "grad_norm": 3.4438636302948, "learning_rate": 9.859949820956158e-07, "loss": 0.0116, "step": 165200 }, { "epoch": 1.7651583952134196, "grad_norm": 0.28955891728401184, "learning_rate": 9.859910332436917e-07, "loss": 0.0192, "step": 165210 }, { "epoch": 1.7652652385276992, "grad_norm": 8.906781196594238, "learning_rate": 9.859870838430475e-07, "loss": 0.0695, "step": 165220 }, { "epoch": 1.7653720818419787, "grad_norm": 4.30104923248291, "learning_rate": 9.859831338936868e-07, "loss": 0.0312, "step": 165230 }, { "epoch": 1.7654789251562584, "grad_norm": 2.8682074546813965, "learning_rate": 9.859791833956145e-07, "loss": 0.0704, "step": 165240 }, { "epoch": 1.765585768470538, "grad_norm": 0.03230181336402893, "learning_rate": 9.859752323488354e-07, "loss": 0.02, "step": 165250 }, { "epoch": 1.7656926117848175, "grad_norm": 2.1571695804595947, "learning_rate": 9.859712807533532e-07, "loss": 0.0506, "step": 165260 }, { "epoch": 1.7657994550990972, "grad_norm": 1.6686583757400513, "learning_rate": 9.859673286091728e-07, "loss": 0.0418, "step": 165270 }, { "epoch": 1.7659062984133769, "grad_norm": 0.060785774141550064, "learning_rate": 9.859633759162986e-07, "loss": 0.0112, "step": 165280 }, { "epoch": 1.7660131417276563, "grad_norm": 0.06788940727710724, "learning_rate": 9.859594226747352e-07, "loss": 0.0291, "step": 165290 }, { "epoch": 1.766119985041936, "grad_norm": 0.020710539072752, "learning_rate": 9.859554688844868e-07, "loss": 0.0352, "step": 165300 }, { "epoch": 1.7662268283562157, "grad_norm": 1.5618966817855835, "learning_rate": 9.85951514545558e-07, "loss": 0.0462, "step": 165310 }, { "epoch": 1.7663336716704952, "grad_norm": 6.723380088806152, "learning_rate": 9.859475596579536e-07, "loss": 0.024, "step": 165320 }, { "epoch": 1.7664405149847748, "grad_norm": 0.03732648119330406, "learning_rate": 9.859436042216774e-07, "loss": 0.02, "step": 165330 }, { "epoch": 1.7665473582990545, "grad_norm": 5.143660068511963, "learning_rate": 9.85939648236734e-07, "loss": 0.0599, "step": 165340 }, { "epoch": 1.766654201613334, "grad_norm": 0.688707172870636, "learning_rate": 9.859356917031285e-07, "loss": 0.0467, "step": 165350 }, { "epoch": 1.7667610449276137, "grad_norm": 0.7473486065864563, "learning_rate": 9.859317346208649e-07, "loss": 0.0114, "step": 165360 }, { "epoch": 1.7668678882418933, "grad_norm": 1.5011632442474365, "learning_rate": 9.859277769899474e-07, "loss": 0.0386, "step": 165370 }, { "epoch": 1.7669747315561728, "grad_norm": 1.7093043327331543, "learning_rate": 9.85923818810381e-07, "loss": 0.0487, "step": 165380 }, { "epoch": 1.7670815748704525, "grad_norm": 0.17371176183223724, "learning_rate": 9.859198600821698e-07, "loss": 0.0225, "step": 165390 }, { "epoch": 1.7671884181847322, "grad_norm": 0.031016429886221886, "learning_rate": 9.859159008053183e-07, "loss": 0.0361, "step": 165400 }, { "epoch": 1.7672952614990116, "grad_norm": 0.773181140422821, "learning_rate": 9.859119409798313e-07, "loss": 0.0412, "step": 165410 }, { "epoch": 1.7674021048132913, "grad_norm": 2.5766429901123047, "learning_rate": 9.85907980605713e-07, "loss": 0.0418, "step": 165420 }, { "epoch": 1.767508948127571, "grad_norm": 1.0918550491333008, "learning_rate": 9.859040196829678e-07, "loss": 0.0212, "step": 165430 }, { "epoch": 1.7676157914418504, "grad_norm": 0.013430071994662285, "learning_rate": 9.859000582116004e-07, "loss": 0.0147, "step": 165440 }, { "epoch": 1.7677226347561301, "grad_norm": 0.5552627444267273, "learning_rate": 9.858960961916151e-07, "loss": 0.0594, "step": 165450 }, { "epoch": 1.7678294780704098, "grad_norm": 0.11743815243244171, "learning_rate": 9.858921336230166e-07, "loss": 0.0481, "step": 165460 }, { "epoch": 1.7679363213846893, "grad_norm": 4.3842363357543945, "learning_rate": 9.858881705058091e-07, "loss": 0.0354, "step": 165470 }, { "epoch": 1.768043164698969, "grad_norm": 7.743777275085449, "learning_rate": 9.858842068399971e-07, "loss": 0.0221, "step": 165480 }, { "epoch": 1.7681500080132486, "grad_norm": 10.136628150939941, "learning_rate": 9.858802426255851e-07, "loss": 0.0266, "step": 165490 }, { "epoch": 1.768256851327528, "grad_norm": 0.024637337774038315, "learning_rate": 9.858762778625777e-07, "loss": 0.0165, "step": 165500 }, { "epoch": 1.7683636946418078, "grad_norm": 0.07342135161161423, "learning_rate": 9.858723125509796e-07, "loss": 0.051, "step": 165510 }, { "epoch": 1.7684705379560874, "grad_norm": 0.018660377711057663, "learning_rate": 9.858683466907947e-07, "loss": 0.0807, "step": 165520 }, { "epoch": 1.768577381270367, "grad_norm": 0.031890638172626495, "learning_rate": 9.858643802820278e-07, "loss": 0.0468, "step": 165530 }, { "epoch": 1.7686842245846466, "grad_norm": 1.6615278720855713, "learning_rate": 9.858604133246832e-07, "loss": 0.0206, "step": 165540 }, { "epoch": 1.7687910678989263, "grad_norm": 2.2852210998535156, "learning_rate": 9.858564458187658e-07, "loss": 0.0206, "step": 165550 }, { "epoch": 1.7688979112132057, "grad_norm": 0.47995221614837646, "learning_rate": 9.858524777642798e-07, "loss": 0.0096, "step": 165560 }, { "epoch": 1.7690047545274854, "grad_norm": 3.5429394245147705, "learning_rate": 9.858485091612296e-07, "loss": 0.0215, "step": 165570 }, { "epoch": 1.769111597841765, "grad_norm": 0.34847530722618103, "learning_rate": 9.858445400096195e-07, "loss": 0.0342, "step": 165580 }, { "epoch": 1.7692184411560445, "grad_norm": 2.769378900527954, "learning_rate": 9.858405703094547e-07, "loss": 0.0491, "step": 165590 }, { "epoch": 1.7693252844703242, "grad_norm": 0.205505833029747, "learning_rate": 9.85836600060739e-07, "loss": 0.0195, "step": 165600 }, { "epoch": 1.769432127784604, "grad_norm": 0.10393368452787399, "learning_rate": 9.85832629263477e-07, "loss": 0.0746, "step": 165610 }, { "epoch": 1.7695389710988834, "grad_norm": 0.31522583961486816, "learning_rate": 9.858286579176736e-07, "loss": 0.0745, "step": 165620 }, { "epoch": 1.769645814413163, "grad_norm": 0.032663118094205856, "learning_rate": 9.858246860233327e-07, "loss": 0.0309, "step": 165630 }, { "epoch": 1.7697526577274427, "grad_norm": 0.045620840042829514, "learning_rate": 9.858207135804592e-07, "loss": 0.0068, "step": 165640 }, { "epoch": 1.7698595010417222, "grad_norm": 14.748581886291504, "learning_rate": 9.858167405890573e-07, "loss": 0.0342, "step": 165650 }, { "epoch": 1.769966344356002, "grad_norm": 7.349444389343262, "learning_rate": 9.85812767049132e-07, "loss": 0.0376, "step": 165660 }, { "epoch": 1.7700731876702815, "grad_norm": 2.8430161476135254, "learning_rate": 9.85808792960687e-07, "loss": 0.0298, "step": 165670 }, { "epoch": 1.770180030984561, "grad_norm": 5.818066120147705, "learning_rate": 9.858048183237274e-07, "loss": 0.0244, "step": 165680 }, { "epoch": 1.770286874298841, "grad_norm": 0.09040796011686325, "learning_rate": 9.858008431382573e-07, "loss": 0.0199, "step": 165690 }, { "epoch": 1.7703937176131204, "grad_norm": 0.03548063337802887, "learning_rate": 9.857968674042815e-07, "loss": 0.029, "step": 165700 }, { "epoch": 1.7705005609273998, "grad_norm": 3.11283802986145, "learning_rate": 9.857928911218045e-07, "loss": 0.084, "step": 165710 }, { "epoch": 1.7706074042416797, "grad_norm": 0.048263777047395706, "learning_rate": 9.857889142908306e-07, "loss": 0.0092, "step": 165720 }, { "epoch": 1.7707142475559592, "grad_norm": 0.011646422557532787, "learning_rate": 9.857849369113644e-07, "loss": 0.0406, "step": 165730 }, { "epoch": 1.7708210908702386, "grad_norm": 0.48281988501548767, "learning_rate": 9.857809589834102e-07, "loss": 0.0455, "step": 165740 }, { "epoch": 1.7709279341845185, "grad_norm": 1.3797414302825928, "learning_rate": 9.857769805069728e-07, "loss": 0.0321, "step": 165750 }, { "epoch": 1.771034777498798, "grad_norm": 1.2239253520965576, "learning_rate": 9.857730014820563e-07, "loss": 0.0627, "step": 165760 }, { "epoch": 1.7711416208130775, "grad_norm": 1.948894739151001, "learning_rate": 9.857690219086657e-07, "loss": 0.0088, "step": 165770 }, { "epoch": 1.7712484641273574, "grad_norm": 3.24755859375, "learning_rate": 9.857650417868052e-07, "loss": 0.0121, "step": 165780 }, { "epoch": 1.7713553074416368, "grad_norm": 15.727599143981934, "learning_rate": 9.857610611164791e-07, "loss": 0.05, "step": 165790 }, { "epoch": 1.7714621507559163, "grad_norm": 0.054963164031505585, "learning_rate": 9.857570798976919e-07, "loss": 0.0372, "step": 165800 }, { "epoch": 1.7715689940701962, "grad_norm": 0.4540030360221863, "learning_rate": 9.857530981304486e-07, "loss": 0.0178, "step": 165810 }, { "epoch": 1.7716758373844756, "grad_norm": 0.5884065628051758, "learning_rate": 9.857491158147535e-07, "loss": 0.0228, "step": 165820 }, { "epoch": 1.7717826806987553, "grad_norm": 0.9020074605941772, "learning_rate": 9.857451329506109e-07, "loss": 0.0147, "step": 165830 }, { "epoch": 1.771889524013035, "grad_norm": 0.0443728044629097, "learning_rate": 9.857411495380252e-07, "loss": 0.0591, "step": 165840 }, { "epoch": 1.7719963673273145, "grad_norm": 0.019278882071375847, "learning_rate": 9.857371655770014e-07, "loss": 0.0626, "step": 165850 }, { "epoch": 1.7721032106415942, "grad_norm": 0.09435342997312546, "learning_rate": 9.857331810675434e-07, "loss": 0.0183, "step": 165860 }, { "epoch": 1.7722100539558738, "grad_norm": 2.0285794734954834, "learning_rate": 9.85729196009656e-07, "loss": 0.0298, "step": 165870 }, { "epoch": 1.7723168972701533, "grad_norm": 4.9452409744262695, "learning_rate": 9.857252104033439e-07, "loss": 0.03, "step": 165880 }, { "epoch": 1.772423740584433, "grad_norm": 0.7830251455307007, "learning_rate": 9.85721224248611e-07, "loss": 0.0205, "step": 165890 }, { "epoch": 1.7725305838987127, "grad_norm": 0.032088376581668854, "learning_rate": 9.857172375454626e-07, "loss": 0.0493, "step": 165900 }, { "epoch": 1.7726374272129921, "grad_norm": 0.032063256949186325, "learning_rate": 9.857132502939026e-07, "loss": 0.0407, "step": 165910 }, { "epoch": 1.7727442705272718, "grad_norm": 0.6797558665275574, "learning_rate": 9.857092624939355e-07, "loss": 0.0095, "step": 165920 }, { "epoch": 1.7728511138415515, "grad_norm": 4.590389251708984, "learning_rate": 9.857052741455662e-07, "loss": 0.0362, "step": 165930 }, { "epoch": 1.772957957155831, "grad_norm": 0.3047351837158203, "learning_rate": 9.857012852487988e-07, "loss": 0.0304, "step": 165940 }, { "epoch": 1.7730648004701106, "grad_norm": 1.852338194847107, "learning_rate": 9.856972958036382e-07, "loss": 0.0274, "step": 165950 }, { "epoch": 1.7731716437843903, "grad_norm": 0.12390951812267303, "learning_rate": 9.856933058100887e-07, "loss": 0.0088, "step": 165960 }, { "epoch": 1.7732784870986698, "grad_norm": 4.006127834320068, "learning_rate": 9.856893152681548e-07, "loss": 0.0148, "step": 165970 }, { "epoch": 1.7733853304129494, "grad_norm": 2.0111546516418457, "learning_rate": 9.856853241778408e-07, "loss": 0.0275, "step": 165980 }, { "epoch": 1.7734921737272291, "grad_norm": 11.385708808898926, "learning_rate": 9.856813325391515e-07, "loss": 0.0369, "step": 165990 }, { "epoch": 1.7735990170415086, "grad_norm": 1.6972870826721191, "learning_rate": 9.856773403520914e-07, "loss": 0.0585, "step": 166000 }, { "epoch": 1.7737058603557883, "grad_norm": 0.042774491012096405, "learning_rate": 9.85673347616665e-07, "loss": 0.0039, "step": 166010 }, { "epoch": 1.773812703670068, "grad_norm": 0.24469375610351562, "learning_rate": 9.856693543328765e-07, "loss": 0.0453, "step": 166020 }, { "epoch": 1.7739195469843474, "grad_norm": 0.04901266098022461, "learning_rate": 9.856653605007308e-07, "loss": 0.0035, "step": 166030 }, { "epoch": 1.774026390298627, "grad_norm": 1.4317429065704346, "learning_rate": 9.856613661202323e-07, "loss": 0.0107, "step": 166040 }, { "epoch": 1.7741332336129068, "grad_norm": 0.17680108547210693, "learning_rate": 9.856573711913852e-07, "loss": 0.0341, "step": 166050 }, { "epoch": 1.7742400769271862, "grad_norm": 2.9516940116882324, "learning_rate": 9.856533757141946e-07, "loss": 0.0307, "step": 166060 }, { "epoch": 1.774346920241466, "grad_norm": 4.757480621337891, "learning_rate": 9.856493796886645e-07, "loss": 0.0332, "step": 166070 }, { "epoch": 1.7744537635557456, "grad_norm": 15.908552169799805, "learning_rate": 9.856453831147996e-07, "loss": 0.091, "step": 166080 }, { "epoch": 1.774560606870025, "grad_norm": 3.4525673389434814, "learning_rate": 9.856413859926045e-07, "loss": 0.0271, "step": 166090 }, { "epoch": 1.7746674501843047, "grad_norm": 0.09524237364530563, "learning_rate": 9.856373883220835e-07, "loss": 0.0115, "step": 166100 }, { "epoch": 1.7747742934985844, "grad_norm": 2.669158458709717, "learning_rate": 9.856333901032414e-07, "loss": 0.0494, "step": 166110 }, { "epoch": 1.7748811368128639, "grad_norm": 13.818906784057617, "learning_rate": 9.856293913360825e-07, "loss": 0.0322, "step": 166120 }, { "epoch": 1.7749879801271435, "grad_norm": 1.0859096050262451, "learning_rate": 9.856253920206112e-07, "loss": 0.0251, "step": 166130 }, { "epoch": 1.7750948234414232, "grad_norm": 0.4091425836086273, "learning_rate": 9.856213921568324e-07, "loss": 0.0209, "step": 166140 }, { "epoch": 1.7752016667557027, "grad_norm": 3.514951467514038, "learning_rate": 9.856173917447501e-07, "loss": 0.0582, "step": 166150 }, { "epoch": 1.7753085100699824, "grad_norm": 3.8580596446990967, "learning_rate": 9.856133907843695e-07, "loss": 0.0521, "step": 166160 }, { "epoch": 1.775415353384262, "grad_norm": 1.042825698852539, "learning_rate": 9.856093892756944e-07, "loss": 0.0131, "step": 166170 }, { "epoch": 1.7755221966985415, "grad_norm": 0.05852246657013893, "learning_rate": 9.856053872187298e-07, "loss": 0.0359, "step": 166180 }, { "epoch": 1.7756290400128212, "grad_norm": 2.3482487201690674, "learning_rate": 9.8560138461348e-07, "loss": 0.0425, "step": 166190 }, { "epoch": 1.7757358833271009, "grad_norm": 2.1874656677246094, "learning_rate": 9.855973814599498e-07, "loss": 0.0495, "step": 166200 }, { "epoch": 1.7758427266413803, "grad_norm": 0.37672847509384155, "learning_rate": 9.855933777581433e-07, "loss": 0.0329, "step": 166210 }, { "epoch": 1.77594956995566, "grad_norm": 8.547057151794434, "learning_rate": 9.855893735080654e-07, "loss": 0.0566, "step": 166220 }, { "epoch": 1.7760564132699397, "grad_norm": 0.1009678766131401, "learning_rate": 9.855853687097202e-07, "loss": 0.0501, "step": 166230 }, { "epoch": 1.7761632565842191, "grad_norm": 0.08479109406471252, "learning_rate": 9.855813633631126e-07, "loss": 0.0148, "step": 166240 }, { "epoch": 1.7762700998984988, "grad_norm": 2.5508182048797607, "learning_rate": 9.85577357468247e-07, "loss": 0.0242, "step": 166250 }, { "epoch": 1.7763769432127785, "grad_norm": 0.07293848693370819, "learning_rate": 9.85573351025128e-07, "loss": 0.0105, "step": 166260 }, { "epoch": 1.776483786527058, "grad_norm": 0.2936975359916687, "learning_rate": 9.855693440337599e-07, "loss": 0.0243, "step": 166270 }, { "epoch": 1.7765906298413376, "grad_norm": 0.15578097105026245, "learning_rate": 9.855653364941473e-07, "loss": 0.0176, "step": 166280 }, { "epoch": 1.7766974731556173, "grad_norm": 0.005351603962481022, "learning_rate": 9.85561328406295e-07, "loss": 0.0292, "step": 166290 }, { "epoch": 1.7768043164698968, "grad_norm": 0.003193225245922804, "learning_rate": 9.855573197702073e-07, "loss": 0.0277, "step": 166300 }, { "epoch": 1.7769111597841765, "grad_norm": 1.5132966041564941, "learning_rate": 9.855533105858886e-07, "loss": 0.0275, "step": 166310 }, { "epoch": 1.7770180030984561, "grad_norm": 2.541126251220703, "learning_rate": 9.855493008533436e-07, "loss": 0.0034, "step": 166320 }, { "epoch": 1.7771248464127356, "grad_norm": 0.6839661002159119, "learning_rate": 9.855452905725768e-07, "loss": 0.0521, "step": 166330 }, { "epoch": 1.7772316897270153, "grad_norm": 0.0035452956799417734, "learning_rate": 9.855412797435927e-07, "loss": 0.0663, "step": 166340 }, { "epoch": 1.777338533041295, "grad_norm": 7.447173118591309, "learning_rate": 9.855372683663958e-07, "loss": 0.0285, "step": 166350 }, { "epoch": 1.7774453763555744, "grad_norm": 0.030261380597949028, "learning_rate": 9.855332564409908e-07, "loss": 0.0105, "step": 166360 }, { "epoch": 1.777552219669854, "grad_norm": 0.031136276200413704, "learning_rate": 9.855292439673822e-07, "loss": 0.027, "step": 166370 }, { "epoch": 1.7776590629841338, "grad_norm": 3.220522403717041, "learning_rate": 9.855252309455742e-07, "loss": 0.0184, "step": 166380 }, { "epoch": 1.7777659062984132, "grad_norm": 0.6913759708404541, "learning_rate": 9.855212173755718e-07, "loss": 0.0327, "step": 166390 }, { "epoch": 1.7778727496126931, "grad_norm": 0.3725661635398865, "learning_rate": 9.855172032573792e-07, "loss": 0.0249, "step": 166400 }, { "epoch": 1.7779795929269726, "grad_norm": 0.26539555191993713, "learning_rate": 9.855131885910008e-07, "loss": 0.0566, "step": 166410 }, { "epoch": 1.778086436241252, "grad_norm": 0.009379432536661625, "learning_rate": 9.855091733764416e-07, "loss": 0.0541, "step": 166420 }, { "epoch": 1.778193279555532, "grad_norm": 0.08883244544267654, "learning_rate": 9.85505157613706e-07, "loss": 0.0175, "step": 166430 }, { "epoch": 1.7783001228698114, "grad_norm": 5.217996120452881, "learning_rate": 9.855011413027983e-07, "loss": 0.0315, "step": 166440 }, { "epoch": 1.7784069661840909, "grad_norm": 0.012811543419957161, "learning_rate": 9.854971244437232e-07, "loss": 0.0066, "step": 166450 }, { "epoch": 1.7785138094983708, "grad_norm": 5.652409553527832, "learning_rate": 9.854931070364851e-07, "loss": 0.0222, "step": 166460 }, { "epoch": 1.7786206528126502, "grad_norm": 0.4317797124385834, "learning_rate": 9.854890890810888e-07, "loss": 0.0679, "step": 166470 }, { "epoch": 1.7787274961269297, "grad_norm": 0.19893145561218262, "learning_rate": 9.854850705775386e-07, "loss": 0.0363, "step": 166480 }, { "epoch": 1.7788343394412096, "grad_norm": 3.0418519973754883, "learning_rate": 9.85481051525839e-07, "loss": 0.0026, "step": 166490 }, { "epoch": 1.778941182755489, "grad_norm": 3.046696901321411, "learning_rate": 9.854770319259948e-07, "loss": 0.0407, "step": 166500 }, { "epoch": 1.7790480260697685, "grad_norm": 1.9200021028518677, "learning_rate": 9.854730117780103e-07, "loss": 0.0313, "step": 166510 }, { "epoch": 1.7791548693840484, "grad_norm": 0.2373175472021103, "learning_rate": 9.854689910818903e-07, "loss": 0.0022, "step": 166520 }, { "epoch": 1.779261712698328, "grad_norm": 3.8955976963043213, "learning_rate": 9.854649698376392e-07, "loss": 0.0289, "step": 166530 }, { "epoch": 1.7793685560126073, "grad_norm": 0.002122563309967518, "learning_rate": 9.854609480452611e-07, "loss": 0.0174, "step": 166540 }, { "epoch": 1.7794753993268873, "grad_norm": 2.9601824283599854, "learning_rate": 9.854569257047613e-07, "loss": 0.0499, "step": 166550 }, { "epoch": 1.7795822426411667, "grad_norm": 0.014761029742658138, "learning_rate": 9.85452902816144e-07, "loss": 0.0426, "step": 166560 }, { "epoch": 1.7796890859554464, "grad_norm": 0.06262235343456268, "learning_rate": 9.854488793794135e-07, "loss": 0.0335, "step": 166570 }, { "epoch": 1.779795929269726, "grad_norm": 3.274168014526367, "learning_rate": 9.854448553945747e-07, "loss": 0.0472, "step": 166580 }, { "epoch": 1.7799027725840055, "grad_norm": 0.008211202919483185, "learning_rate": 9.854408308616322e-07, "loss": 0.053, "step": 166590 }, { "epoch": 1.7800096158982852, "grad_norm": 3.4064793586730957, "learning_rate": 9.854368057805901e-07, "loss": 0.0271, "step": 166600 }, { "epoch": 1.780116459212565, "grad_norm": 0.16063769161701202, "learning_rate": 9.854327801514533e-07, "loss": 0.032, "step": 166610 }, { "epoch": 1.7802233025268444, "grad_norm": 6.048801422119141, "learning_rate": 9.854287539742263e-07, "loss": 0.045, "step": 166620 }, { "epoch": 1.780330145841124, "grad_norm": 0.8687781095504761, "learning_rate": 9.854247272489135e-07, "loss": 0.0178, "step": 166630 }, { "epoch": 1.7804369891554037, "grad_norm": 0.053299181163311005, "learning_rate": 9.854206999755197e-07, "loss": 0.0259, "step": 166640 }, { "epoch": 1.7805438324696832, "grad_norm": 0.9054144024848938, "learning_rate": 9.85416672154049e-07, "loss": 0.0373, "step": 166650 }, { "epoch": 1.7806506757839629, "grad_norm": 4.044333457946777, "learning_rate": 9.854126437845068e-07, "loss": 0.0258, "step": 166660 }, { "epoch": 1.7807575190982425, "grad_norm": 0.39446741342544556, "learning_rate": 9.854086148668965e-07, "loss": 0.028, "step": 166670 }, { "epoch": 1.780864362412522, "grad_norm": 1.8698335886001587, "learning_rate": 9.854045854012236e-07, "loss": 0.053, "step": 166680 }, { "epoch": 1.7809712057268017, "grad_norm": 2.6764588356018066, "learning_rate": 9.85400555387492e-07, "loss": 0.0279, "step": 166690 }, { "epoch": 1.7810780490410814, "grad_norm": 1.9807907342910767, "learning_rate": 9.853965248257068e-07, "loss": 0.0208, "step": 166700 }, { "epoch": 1.7811848923553608, "grad_norm": 6.294460296630859, "learning_rate": 9.853924937158721e-07, "loss": 0.0412, "step": 166710 }, { "epoch": 1.7812917356696405, "grad_norm": 23.694902420043945, "learning_rate": 9.85388462057993e-07, "loss": 0.0673, "step": 166720 }, { "epoch": 1.7813985789839202, "grad_norm": 1.4337111711502075, "learning_rate": 9.853844298520733e-07, "loss": 0.019, "step": 166730 }, { "epoch": 1.7815054222981996, "grad_norm": 1.3997702598571777, "learning_rate": 9.85380397098118e-07, "loss": 0.0348, "step": 166740 }, { "epoch": 1.7816122656124793, "grad_norm": 1.1760685443878174, "learning_rate": 9.853763637961316e-07, "loss": 0.0178, "step": 166750 }, { "epoch": 1.781719108926759, "grad_norm": 0.4508979320526123, "learning_rate": 9.853723299461187e-07, "loss": 0.0481, "step": 166760 }, { "epoch": 1.7818259522410385, "grad_norm": 6.380767822265625, "learning_rate": 9.853682955480839e-07, "loss": 0.049, "step": 166770 }, { "epoch": 1.7819327955553181, "grad_norm": 4.291813373565674, "learning_rate": 9.853642606020315e-07, "loss": 0.0212, "step": 166780 }, { "epoch": 1.7820396388695978, "grad_norm": 1.24593985080719, "learning_rate": 9.853602251079664e-07, "loss": 0.0282, "step": 166790 }, { "epoch": 1.7821464821838773, "grad_norm": 0.006533426232635975, "learning_rate": 9.85356189065893e-07, "loss": 0.0055, "step": 166800 }, { "epoch": 1.782253325498157, "grad_norm": 1.2595174312591553, "learning_rate": 9.853521524758157e-07, "loss": 0.0602, "step": 166810 }, { "epoch": 1.7823601688124366, "grad_norm": 0.3284299969673157, "learning_rate": 9.85348115337739e-07, "loss": 0.0207, "step": 166820 }, { "epoch": 1.782467012126716, "grad_norm": 5.450192451477051, "learning_rate": 9.85344077651668e-07, "loss": 0.0401, "step": 166830 }, { "epoch": 1.7825738554409958, "grad_norm": 8.76606273651123, "learning_rate": 9.853400394176067e-07, "loss": 0.0247, "step": 166840 }, { "epoch": 1.7826806987552755, "grad_norm": 1.267240047454834, "learning_rate": 9.8533600063556e-07, "loss": 0.0247, "step": 166850 }, { "epoch": 1.782787542069555, "grad_norm": 5.132885932922363, "learning_rate": 9.85331961305532e-07, "loss": 0.0356, "step": 166860 }, { "epoch": 1.7828943853838346, "grad_norm": 0.16385000944137573, "learning_rate": 9.853279214275279e-07, "loss": 0.0619, "step": 166870 }, { "epoch": 1.7830012286981143, "grad_norm": 6.558326244354248, "learning_rate": 9.85323881001552e-07, "loss": 0.0513, "step": 166880 }, { "epoch": 1.7831080720123937, "grad_norm": 0.9869331121444702, "learning_rate": 9.853198400276085e-07, "loss": 0.0119, "step": 166890 }, { "epoch": 1.7832149153266734, "grad_norm": 3.9345970153808594, "learning_rate": 9.853157985057026e-07, "loss": 0.0187, "step": 166900 }, { "epoch": 1.783321758640953, "grad_norm": 2.976980209350586, "learning_rate": 9.853117564358385e-07, "loss": 0.0096, "step": 166910 }, { "epoch": 1.7834286019552326, "grad_norm": 0.0060519748367369175, "learning_rate": 9.853077138180206e-07, "loss": 0.0264, "step": 166920 }, { "epoch": 1.7835354452695122, "grad_norm": 0.03985902667045593, "learning_rate": 9.853036706522538e-07, "loss": 0.0231, "step": 166930 }, { "epoch": 1.783642288583792, "grad_norm": 1.457291603088379, "learning_rate": 9.852996269385424e-07, "loss": 0.021, "step": 166940 }, { "epoch": 1.7837491318980714, "grad_norm": 2.370453119277954, "learning_rate": 9.852955826768911e-07, "loss": 0.0163, "step": 166950 }, { "epoch": 1.783855975212351, "grad_norm": 0.10063911974430084, "learning_rate": 9.852915378673046e-07, "loss": 0.0209, "step": 166960 }, { "epoch": 1.7839628185266307, "grad_norm": 0.2549724876880646, "learning_rate": 9.852874925097875e-07, "loss": 0.017, "step": 166970 }, { "epoch": 1.7840696618409102, "grad_norm": 0.31722769141197205, "learning_rate": 9.852834466043438e-07, "loss": 0.0212, "step": 166980 }, { "epoch": 1.7841765051551899, "grad_norm": 9.419057846069336, "learning_rate": 9.852794001509785e-07, "loss": 0.0479, "step": 166990 }, { "epoch": 1.7842833484694696, "grad_norm": 3.11551570892334, "learning_rate": 9.852753531496963e-07, "loss": 0.0801, "step": 167000 }, { "epoch": 1.784390191783749, "grad_norm": 3.849245548248291, "learning_rate": 9.852713056005016e-07, "loss": 0.0245, "step": 167010 }, { "epoch": 1.7844970350980287, "grad_norm": 0.07003243267536163, "learning_rate": 9.85267257503399e-07, "loss": 0.0167, "step": 167020 }, { "epoch": 1.7846038784123084, "grad_norm": 0.017281563952565193, "learning_rate": 9.852632088583928e-07, "loss": 0.0238, "step": 167030 }, { "epoch": 1.7847107217265878, "grad_norm": 0.06541859358549118, "learning_rate": 9.85259159665488e-07, "loss": 0.0453, "step": 167040 }, { "epoch": 1.7848175650408675, "grad_norm": 1.545810580253601, "learning_rate": 9.852551099246892e-07, "loss": 0.014, "step": 167050 }, { "epoch": 1.7849244083551472, "grad_norm": 0.0699663981795311, "learning_rate": 9.852510596360003e-07, "loss": 0.0524, "step": 167060 }, { "epoch": 1.7850312516694267, "grad_norm": 0.15251991152763367, "learning_rate": 9.852470087994265e-07, "loss": 0.0892, "step": 167070 }, { "epoch": 1.7851380949837063, "grad_norm": 5.846555709838867, "learning_rate": 9.852429574149722e-07, "loss": 0.0375, "step": 167080 }, { "epoch": 1.785244938297986, "grad_norm": 1.2320406436920166, "learning_rate": 9.852389054826422e-07, "loss": 0.0343, "step": 167090 }, { "epoch": 1.7853517816122655, "grad_norm": 2.5254669189453125, "learning_rate": 9.852348530024405e-07, "loss": 0.0456, "step": 167100 }, { "epoch": 1.7854586249265452, "grad_norm": 6.636351108551025, "learning_rate": 9.852307999743722e-07, "loss": 0.0334, "step": 167110 }, { "epoch": 1.7855654682408248, "grad_norm": 0.8837178945541382, "learning_rate": 9.85226746398442e-07, "loss": 0.0089, "step": 167120 }, { "epoch": 1.7856723115551043, "grad_norm": 1.1661440134048462, "learning_rate": 9.852226922746539e-07, "loss": 0.0669, "step": 167130 }, { "epoch": 1.7857791548693842, "grad_norm": 5.112630367279053, "learning_rate": 9.852186376030128e-07, "loss": 0.0203, "step": 167140 }, { "epoch": 1.7858859981836637, "grad_norm": 0.4563484191894531, "learning_rate": 9.85214582383523e-07, "loss": 0.0166, "step": 167150 }, { "epoch": 1.7859928414979431, "grad_norm": 0.6717748641967773, "learning_rate": 9.852105266161895e-07, "loss": 0.0419, "step": 167160 }, { "epoch": 1.786099684812223, "grad_norm": 0.008331799879670143, "learning_rate": 9.852064703010167e-07, "loss": 0.0128, "step": 167170 }, { "epoch": 1.7862065281265025, "grad_norm": 3.4327232837677, "learning_rate": 9.852024134380093e-07, "loss": 0.0698, "step": 167180 }, { "epoch": 1.786313371440782, "grad_norm": 1.6816258430480957, "learning_rate": 9.851983560271714e-07, "loss": 0.0191, "step": 167190 }, { "epoch": 1.7864202147550619, "grad_norm": 6.76240873336792, "learning_rate": 9.851942980685084e-07, "loss": 0.0683, "step": 167200 }, { "epoch": 1.7865270580693413, "grad_norm": 8.416280746459961, "learning_rate": 9.851902395620241e-07, "loss": 0.0191, "step": 167210 }, { "epoch": 1.7866339013836208, "grad_norm": 0.200940802693367, "learning_rate": 9.851861805077236e-07, "loss": 0.0113, "step": 167220 }, { "epoch": 1.7867407446979007, "grad_norm": 0.08602114766836166, "learning_rate": 9.85182120905611e-07, "loss": 0.0209, "step": 167230 }, { "epoch": 1.7868475880121801, "grad_norm": 6.777104377746582, "learning_rate": 9.851780607556914e-07, "loss": 0.028, "step": 167240 }, { "epoch": 1.7869544313264596, "grad_norm": 0.1337517350912094, "learning_rate": 9.85174000057969e-07, "loss": 0.0433, "step": 167250 }, { "epoch": 1.7870612746407395, "grad_norm": 0.18017947673797607, "learning_rate": 9.851699388124487e-07, "loss": 0.0235, "step": 167260 }, { "epoch": 1.787168117955019, "grad_norm": 4.806396007537842, "learning_rate": 9.851658770191348e-07, "loss": 0.0151, "step": 167270 }, { "epoch": 1.7872749612692984, "grad_norm": 6.432272434234619, "learning_rate": 9.851618146780322e-07, "loss": 0.0484, "step": 167280 }, { "epoch": 1.7873818045835783, "grad_norm": 6.424030780792236, "learning_rate": 9.85157751789145e-07, "loss": 0.0386, "step": 167290 }, { "epoch": 1.7874886478978578, "grad_norm": 0.17465732991695404, "learning_rate": 9.851536883524784e-07, "loss": 0.0121, "step": 167300 }, { "epoch": 1.7875954912121375, "grad_norm": 0.4233962297439575, "learning_rate": 9.851496243680364e-07, "loss": 0.0995, "step": 167310 }, { "epoch": 1.7877023345264171, "grad_norm": 0.11092393100261688, "learning_rate": 9.85145559835824e-07, "loss": 0.0732, "step": 167320 }, { "epoch": 1.7878091778406966, "grad_norm": 0.0651882067322731, "learning_rate": 9.851414947558457e-07, "loss": 0.0082, "step": 167330 }, { "epoch": 1.7879160211549763, "grad_norm": 0.4531511664390564, "learning_rate": 9.851374291281057e-07, "loss": 0.017, "step": 167340 }, { "epoch": 1.788022864469256, "grad_norm": 2.64909291267395, "learning_rate": 9.851333629526093e-07, "loss": 0.043, "step": 167350 }, { "epoch": 1.7881297077835354, "grad_norm": 0.9314971566200256, "learning_rate": 9.851292962293607e-07, "loss": 0.0142, "step": 167360 }, { "epoch": 1.788236551097815, "grad_norm": 9.560550689697266, "learning_rate": 9.851252289583643e-07, "loss": 0.0469, "step": 167370 }, { "epoch": 1.7883433944120948, "grad_norm": 1.4441951513290405, "learning_rate": 9.85121161139625e-07, "loss": 0.0108, "step": 167380 }, { "epoch": 1.7884502377263742, "grad_norm": 0.029819972813129425, "learning_rate": 9.851170927731474e-07, "loss": 0.0432, "step": 167390 }, { "epoch": 1.788557081040654, "grad_norm": 0.3811291456222534, "learning_rate": 9.85113023858936e-07, "loss": 0.0118, "step": 167400 }, { "epoch": 1.7886639243549336, "grad_norm": 1.4232001304626465, "learning_rate": 9.851089543969952e-07, "loss": 0.0123, "step": 167410 }, { "epoch": 1.788770767669213, "grad_norm": 0.7075396180152893, "learning_rate": 9.851048843873298e-07, "loss": 0.0195, "step": 167420 }, { "epoch": 1.7888776109834927, "grad_norm": 0.16552290320396423, "learning_rate": 9.851008138299445e-07, "loss": 0.0309, "step": 167430 }, { "epoch": 1.7889844542977724, "grad_norm": 0.027375461533665657, "learning_rate": 9.850967427248435e-07, "loss": 0.0431, "step": 167440 }, { "epoch": 1.7890912976120519, "grad_norm": 2.019050359725952, "learning_rate": 9.85092671072032e-07, "loss": 0.0219, "step": 167450 }, { "epoch": 1.7891981409263316, "grad_norm": 6.849345684051514, "learning_rate": 9.850885988715141e-07, "loss": 0.0206, "step": 167460 }, { "epoch": 1.7893049842406112, "grad_norm": 3.4515419006347656, "learning_rate": 9.850845261232944e-07, "loss": 0.0305, "step": 167470 }, { "epoch": 1.7894118275548907, "grad_norm": 6.001298904418945, "learning_rate": 9.850804528273779e-07, "loss": 0.0321, "step": 167480 }, { "epoch": 1.7895186708691704, "grad_norm": 7.365006446838379, "learning_rate": 9.85076378983769e-07, "loss": 0.0511, "step": 167490 }, { "epoch": 1.78962551418345, "grad_norm": 0.013549931347370148, "learning_rate": 9.850723045924722e-07, "loss": 0.0117, "step": 167500 }, { "epoch": 1.7897323574977295, "grad_norm": 1.6460869312286377, "learning_rate": 9.85068229653492e-07, "loss": 0.0252, "step": 167510 }, { "epoch": 1.7898392008120092, "grad_norm": 2.2389872074127197, "learning_rate": 9.850641541668334e-07, "loss": 0.0482, "step": 167520 }, { "epoch": 1.7899460441262889, "grad_norm": 1.6752492189407349, "learning_rate": 9.850600781325006e-07, "loss": 0.0301, "step": 167530 }, { "epoch": 1.7900528874405683, "grad_norm": 4.697051525115967, "learning_rate": 9.850560015504984e-07, "loss": 0.0226, "step": 167540 }, { "epoch": 1.790159730754848, "grad_norm": 0.05344703420996666, "learning_rate": 9.850519244208314e-07, "loss": 0.0329, "step": 167550 }, { "epoch": 1.7902665740691277, "grad_norm": 4.816856861114502, "learning_rate": 9.850478467435042e-07, "loss": 0.0477, "step": 167560 }, { "epoch": 1.7903734173834072, "grad_norm": 5.183881759643555, "learning_rate": 9.85043768518521e-07, "loss": 0.0244, "step": 167570 }, { "epoch": 1.7904802606976868, "grad_norm": 0.902738630771637, "learning_rate": 9.850396897458872e-07, "loss": 0.0341, "step": 167580 }, { "epoch": 1.7905871040119665, "grad_norm": 1.504105806350708, "learning_rate": 9.85035610425607e-07, "loss": 0.0187, "step": 167590 }, { "epoch": 1.790693947326246, "grad_norm": 7.1925177574157715, "learning_rate": 9.85031530557685e-07, "loss": 0.0112, "step": 167600 }, { "epoch": 1.7908007906405257, "grad_norm": 2.7379310131073, "learning_rate": 9.850274501421255e-07, "loss": 0.0017, "step": 167610 }, { "epoch": 1.7909076339548053, "grad_norm": 0.24632281064987183, "learning_rate": 9.850233691789334e-07, "loss": 0.0147, "step": 167620 }, { "epoch": 1.7910144772690848, "grad_norm": 9.526785850524902, "learning_rate": 9.850192876681136e-07, "loss": 0.0645, "step": 167630 }, { "epoch": 1.7911213205833645, "grad_norm": 6.526034355163574, "learning_rate": 9.850152056096703e-07, "loss": 0.0456, "step": 167640 }, { "epoch": 1.7912281638976442, "grad_norm": 4.350174427032471, "learning_rate": 9.85011123003608e-07, "loss": 0.0264, "step": 167650 }, { "epoch": 1.7913350072119236, "grad_norm": 2.872101306915283, "learning_rate": 9.85007039849932e-07, "loss": 0.0228, "step": 167660 }, { "epoch": 1.7914418505262033, "grad_norm": 0.021645590662956238, "learning_rate": 9.85002956148646e-07, "loss": 0.0312, "step": 167670 }, { "epoch": 1.791548693840483, "grad_norm": 0.06414338946342468, "learning_rate": 9.849988718997554e-07, "loss": 0.0326, "step": 167680 }, { "epoch": 1.7916555371547624, "grad_norm": 2.3134613037109375, "learning_rate": 9.849947871032643e-07, "loss": 0.0031, "step": 167690 }, { "epoch": 1.7917623804690421, "grad_norm": 6.7237868309021, "learning_rate": 9.849907017591773e-07, "loss": 0.0988, "step": 167700 }, { "epoch": 1.7918692237833218, "grad_norm": 2.212406635284424, "learning_rate": 9.849866158674995e-07, "loss": 0.0594, "step": 167710 }, { "epoch": 1.7919760670976013, "grad_norm": 2.195564031600952, "learning_rate": 9.84982529428235e-07, "loss": 0.0285, "step": 167720 }, { "epoch": 1.792082910411881, "grad_norm": 3.684328079223633, "learning_rate": 9.84978442441389e-07, "loss": 0.056, "step": 167730 }, { "epoch": 1.7921897537261606, "grad_norm": 8.03464412689209, "learning_rate": 9.849743549069654e-07, "loss": 0.029, "step": 167740 }, { "epoch": 1.79229659704044, "grad_norm": 9.06422233581543, "learning_rate": 9.84970266824969e-07, "loss": 0.0527, "step": 167750 }, { "epoch": 1.7924034403547198, "grad_norm": 0.014030721969902515, "learning_rate": 9.849661781954048e-07, "loss": 0.0383, "step": 167760 }, { "epoch": 1.7925102836689994, "grad_norm": 1.071438193321228, "learning_rate": 9.849620890182772e-07, "loss": 0.0301, "step": 167770 }, { "epoch": 1.792617126983279, "grad_norm": 0.022337058559060097, "learning_rate": 9.849579992935908e-07, "loss": 0.0527, "step": 167780 }, { "epoch": 1.7927239702975586, "grad_norm": 3.811774492263794, "learning_rate": 9.849539090213502e-07, "loss": 0.0288, "step": 167790 }, { "epoch": 1.7928308136118383, "grad_norm": 4.02850866317749, "learning_rate": 9.8494981820156e-07, "loss": 0.0201, "step": 167800 }, { "epoch": 1.7929376569261177, "grad_norm": 2.6591107845306396, "learning_rate": 9.849457268342247e-07, "loss": 0.0351, "step": 167810 }, { "epoch": 1.7930445002403974, "grad_norm": 3.4064390659332275, "learning_rate": 9.849416349193494e-07, "loss": 0.043, "step": 167820 }, { "epoch": 1.793151343554677, "grad_norm": 0.019746938720345497, "learning_rate": 9.849375424569382e-07, "loss": 0.0225, "step": 167830 }, { "epoch": 1.7932581868689565, "grad_norm": 0.45284363627433777, "learning_rate": 9.849334494469958e-07, "loss": 0.0465, "step": 167840 }, { "epoch": 1.7933650301832362, "grad_norm": 10.617463111877441, "learning_rate": 9.849293558895273e-07, "loss": 0.0426, "step": 167850 }, { "epoch": 1.793471873497516, "grad_norm": 0.16661758720874786, "learning_rate": 9.849252617845367e-07, "loss": 0.0098, "step": 167860 }, { "epoch": 1.7935787168117954, "grad_norm": 4.528097152709961, "learning_rate": 9.849211671320288e-07, "loss": 0.0183, "step": 167870 }, { "epoch": 1.7936855601260753, "grad_norm": 0.2858162522315979, "learning_rate": 9.849170719320084e-07, "loss": 0.0071, "step": 167880 }, { "epoch": 1.7937924034403547, "grad_norm": 0.014203098602592945, "learning_rate": 9.849129761844801e-07, "loss": 0.0147, "step": 167890 }, { "epoch": 1.7938992467546342, "grad_norm": 2.4317524433135986, "learning_rate": 9.849088798894483e-07, "loss": 0.0358, "step": 167900 }, { "epoch": 1.794006090068914, "grad_norm": 1.1908825635910034, "learning_rate": 9.84904783046918e-07, "loss": 0.0157, "step": 167910 }, { "epoch": 1.7941129333831936, "grad_norm": 3.736464500427246, "learning_rate": 9.849006856568935e-07, "loss": 0.0308, "step": 167920 }, { "epoch": 1.794219776697473, "grad_norm": 4.350022792816162, "learning_rate": 9.848965877193795e-07, "loss": 0.0206, "step": 167930 }, { "epoch": 1.794326620011753, "grad_norm": 5.063422203063965, "learning_rate": 9.848924892343806e-07, "loss": 0.0711, "step": 167940 }, { "epoch": 1.7944334633260324, "grad_norm": 5.344489097595215, "learning_rate": 9.848883902019016e-07, "loss": 0.0471, "step": 167950 }, { "epoch": 1.7945403066403118, "grad_norm": 0.05321000888943672, "learning_rate": 9.84884290621947e-07, "loss": 0.0031, "step": 167960 }, { "epoch": 1.7946471499545917, "grad_norm": 2.6939055919647217, "learning_rate": 9.848801904945214e-07, "loss": 0.0168, "step": 167970 }, { "epoch": 1.7947539932688712, "grad_norm": 4.21824312210083, "learning_rate": 9.848760898196294e-07, "loss": 0.0127, "step": 167980 }, { "epoch": 1.7948608365831507, "grad_norm": 0.5373203158378601, "learning_rate": 9.848719885972759e-07, "loss": 0.045, "step": 167990 }, { "epoch": 1.7949676798974306, "grad_norm": 1.6606327295303345, "learning_rate": 9.84867886827465e-07, "loss": 0.0148, "step": 168000 }, { "epoch": 1.79507452321171, "grad_norm": 0.5605081915855408, "learning_rate": 9.848637845102018e-07, "loss": 0.017, "step": 168010 }, { "epoch": 1.7951813665259895, "grad_norm": 9.26097583770752, "learning_rate": 9.84859681645491e-07, "loss": 0.0312, "step": 168020 }, { "epoch": 1.7952882098402694, "grad_norm": 0.015734970569610596, "learning_rate": 9.84855578233337e-07, "loss": 0.038, "step": 168030 }, { "epoch": 1.7953950531545488, "grad_norm": 2.6876754760742188, "learning_rate": 9.848514742737441e-07, "loss": 0.0143, "step": 168040 }, { "epoch": 1.7955018964688285, "grad_norm": 0.007184161338955164, "learning_rate": 9.848473697667176e-07, "loss": 0.0539, "step": 168050 }, { "epoch": 1.7956087397831082, "grad_norm": 1.259314775466919, "learning_rate": 9.848432647122619e-07, "loss": 0.0456, "step": 168060 }, { "epoch": 1.7957155830973877, "grad_norm": 0.27049410343170166, "learning_rate": 9.848391591103812e-07, "loss": 0.041, "step": 168070 }, { "epoch": 1.7958224264116673, "grad_norm": 4.048285961151123, "learning_rate": 9.848350529610808e-07, "loss": 0.0296, "step": 168080 }, { "epoch": 1.795929269725947, "grad_norm": 0.42014893889427185, "learning_rate": 9.84830946264365e-07, "loss": 0.021, "step": 168090 }, { "epoch": 1.7960361130402265, "grad_norm": 0.7108356952667236, "learning_rate": 9.848268390202383e-07, "loss": 0.0074, "step": 168100 }, { "epoch": 1.7961429563545062, "grad_norm": 0.11836402118206024, "learning_rate": 9.848227312287057e-07, "loss": 0.0303, "step": 168110 }, { "epoch": 1.7962497996687858, "grad_norm": 3.928457021713257, "learning_rate": 9.848186228897716e-07, "loss": 0.019, "step": 168120 }, { "epoch": 1.7963566429830653, "grad_norm": 2.2259793281555176, "learning_rate": 9.848145140034407e-07, "loss": 0.0068, "step": 168130 }, { "epoch": 1.796463486297345, "grad_norm": 6.197721481323242, "learning_rate": 9.848104045697175e-07, "loss": 0.0357, "step": 168140 }, { "epoch": 1.7965703296116247, "grad_norm": 10.02210807800293, "learning_rate": 9.848062945886068e-07, "loss": 0.0493, "step": 168150 }, { "epoch": 1.7966771729259041, "grad_norm": 1.441681146621704, "learning_rate": 9.848021840601133e-07, "loss": 0.0162, "step": 168160 }, { "epoch": 1.7967840162401838, "grad_norm": 0.5160133242607117, "learning_rate": 9.847980729842414e-07, "loss": 0.0304, "step": 168170 }, { "epoch": 1.7968908595544635, "grad_norm": 1.1163586378097534, "learning_rate": 9.84793961360996e-07, "loss": 0.0077, "step": 168180 }, { "epoch": 1.796997702868743, "grad_norm": 1.697131872177124, "learning_rate": 9.847898491903817e-07, "loss": 0.0359, "step": 168190 }, { "epoch": 1.7971045461830226, "grad_norm": 0.2721652686595917, "learning_rate": 9.847857364724028e-07, "loss": 0.0253, "step": 168200 }, { "epoch": 1.7972113894973023, "grad_norm": 0.16844499111175537, "learning_rate": 9.847816232070644e-07, "loss": 0.0113, "step": 168210 }, { "epoch": 1.7973182328115818, "grad_norm": 0.07016360759735107, "learning_rate": 9.84777509394371e-07, "loss": 0.0165, "step": 168220 }, { "epoch": 1.7974250761258614, "grad_norm": 0.6340906023979187, "learning_rate": 9.84773395034327e-07, "loss": 0.0248, "step": 168230 }, { "epoch": 1.7975319194401411, "grad_norm": 7.327901840209961, "learning_rate": 9.847692801269377e-07, "loss": 0.0281, "step": 168240 }, { "epoch": 1.7976387627544206, "grad_norm": 0.30791860818862915, "learning_rate": 9.847651646722068e-07, "loss": 0.0357, "step": 168250 }, { "epoch": 1.7977456060687003, "grad_norm": 6.657280445098877, "learning_rate": 9.847610486701397e-07, "loss": 0.0188, "step": 168260 }, { "epoch": 1.79785244938298, "grad_norm": 1.7289875745773315, "learning_rate": 9.847569321207407e-07, "loss": 0.0415, "step": 168270 }, { "epoch": 1.7979592926972594, "grad_norm": 3.2111358642578125, "learning_rate": 9.847528150240146e-07, "loss": 0.0099, "step": 168280 }, { "epoch": 1.798066136011539, "grad_norm": 3.3831775188446045, "learning_rate": 9.847486973799659e-07, "loss": 0.0032, "step": 168290 }, { "epoch": 1.7981729793258188, "grad_norm": 3.774113655090332, "learning_rate": 9.847445791885994e-07, "loss": 0.0297, "step": 168300 }, { "epoch": 1.7982798226400982, "grad_norm": 2.2436599731445312, "learning_rate": 9.847404604499196e-07, "loss": 0.0603, "step": 168310 }, { "epoch": 1.798386665954378, "grad_norm": 9.307111740112305, "learning_rate": 9.847363411639312e-07, "loss": 0.0376, "step": 168320 }, { "epoch": 1.7984935092686576, "grad_norm": 1.1606694459915161, "learning_rate": 9.84732221330639e-07, "loss": 0.0097, "step": 168330 }, { "epoch": 1.798600352582937, "grad_norm": 5.892682075500488, "learning_rate": 9.847281009500476e-07, "loss": 0.0488, "step": 168340 }, { "epoch": 1.7987071958972167, "grad_norm": 3.1121394634246826, "learning_rate": 9.847239800221616e-07, "loss": 0.0239, "step": 168350 }, { "epoch": 1.7988140392114964, "grad_norm": 1.3148995637893677, "learning_rate": 9.847198585469856e-07, "loss": 0.0757, "step": 168360 }, { "epoch": 1.7989208825257759, "grad_norm": 3.1362855434417725, "learning_rate": 9.847157365245243e-07, "loss": 0.0815, "step": 168370 }, { "epoch": 1.7990277258400555, "grad_norm": 1.5785343647003174, "learning_rate": 9.847116139547823e-07, "loss": 0.0179, "step": 168380 }, { "epoch": 1.7991345691543352, "grad_norm": 0.049472302198410034, "learning_rate": 9.847074908377642e-07, "loss": 0.0197, "step": 168390 }, { "epoch": 1.7992414124686147, "grad_norm": 4.371938228607178, "learning_rate": 9.84703367173475e-07, "loss": 0.0871, "step": 168400 }, { "epoch": 1.7993482557828944, "grad_norm": 3.908013343811035, "learning_rate": 9.84699242961919e-07, "loss": 0.0623, "step": 168410 }, { "epoch": 1.799455099097174, "grad_norm": 0.030808376148343086, "learning_rate": 9.846951182031009e-07, "loss": 0.0234, "step": 168420 }, { "epoch": 1.7995619424114535, "grad_norm": 5.269152641296387, "learning_rate": 9.846909928970255e-07, "loss": 0.0392, "step": 168430 }, { "epoch": 1.7996687857257332, "grad_norm": 5.192298889160156, "learning_rate": 9.846868670436973e-07, "loss": 0.0197, "step": 168440 }, { "epoch": 1.7997756290400129, "grad_norm": 0.36903172731399536, "learning_rate": 9.846827406431211e-07, "loss": 0.0215, "step": 168450 }, { "epoch": 1.7998824723542923, "grad_norm": 0.0551561675965786, "learning_rate": 9.846786136953015e-07, "loss": 0.0282, "step": 168460 }, { "epoch": 1.799989315668572, "grad_norm": 0.16732224822044373, "learning_rate": 9.846744862002433e-07, "loss": 0.0416, "step": 168470 }, { "epoch": 1.8000961589828517, "grad_norm": 2.217317581176758, "learning_rate": 9.84670358157951e-07, "loss": 0.0267, "step": 168480 }, { "epoch": 1.8002030022971311, "grad_norm": 1.404017448425293, "learning_rate": 9.84666229568429e-07, "loss": 0.0195, "step": 168490 }, { "epoch": 1.8003098456114108, "grad_norm": 7.891551494598389, "learning_rate": 9.846621004316825e-07, "loss": 0.0459, "step": 168500 }, { "epoch": 1.8004166889256905, "grad_norm": 6.352059364318848, "learning_rate": 9.846579707477159e-07, "loss": 0.0569, "step": 168510 }, { "epoch": 1.80052353223997, "grad_norm": 1.7846871614456177, "learning_rate": 9.84653840516534e-07, "loss": 0.0142, "step": 168520 }, { "epoch": 1.8006303755542497, "grad_norm": 3.3540282249450684, "learning_rate": 9.84649709738141e-07, "loss": 0.0204, "step": 168530 }, { "epoch": 1.8007372188685293, "grad_norm": 0.219112366437912, "learning_rate": 9.846455784125421e-07, "loss": 0.0095, "step": 168540 }, { "epoch": 1.8008440621828088, "grad_norm": 0.5227446556091309, "learning_rate": 9.846414465397417e-07, "loss": 0.0374, "step": 168550 }, { "epoch": 1.8009509054970885, "grad_norm": 7.026749134063721, "learning_rate": 9.846373141197445e-07, "loss": 0.0471, "step": 168560 }, { "epoch": 1.8010577488113682, "grad_norm": 0.04600205272436142, "learning_rate": 9.846331811525556e-07, "loss": 0.0055, "step": 168570 }, { "epoch": 1.8011645921256476, "grad_norm": 2.428872585296631, "learning_rate": 9.846290476381786e-07, "loss": 0.0424, "step": 168580 }, { "epoch": 1.8012714354399273, "grad_norm": 0.7902774810791016, "learning_rate": 9.846249135766193e-07, "loss": 0.0529, "step": 168590 }, { "epoch": 1.801378278754207, "grad_norm": 0.09491266310214996, "learning_rate": 9.846207789678817e-07, "loss": 0.0074, "step": 168600 }, { "epoch": 1.8014851220684864, "grad_norm": 23.61420440673828, "learning_rate": 9.846166438119706e-07, "loss": 0.0219, "step": 168610 }, { "epoch": 1.8015919653827663, "grad_norm": 0.011917118914425373, "learning_rate": 9.846125081088909e-07, "loss": 0.0342, "step": 168620 }, { "epoch": 1.8016988086970458, "grad_norm": 0.12816888093948364, "learning_rate": 9.84608371858647e-07, "loss": 0.0412, "step": 168630 }, { "epoch": 1.8018056520113253, "grad_norm": 0.18710610270500183, "learning_rate": 9.846042350612437e-07, "loss": 0.0104, "step": 168640 }, { "epoch": 1.8019124953256052, "grad_norm": 2.092205762863159, "learning_rate": 9.84600097716686e-07, "loss": 0.0556, "step": 168650 }, { "epoch": 1.8020193386398846, "grad_norm": 1.305390477180481, "learning_rate": 9.845959598249776e-07, "loss": 0.022, "step": 168660 }, { "epoch": 1.802126181954164, "grad_norm": 0.0727597177028656, "learning_rate": 9.845918213861241e-07, "loss": 0.0017, "step": 168670 }, { "epoch": 1.802233025268444, "grad_norm": 2.19431471824646, "learning_rate": 9.8458768240013e-07, "loss": 0.0102, "step": 168680 }, { "epoch": 1.8023398685827234, "grad_norm": 0.1135668084025383, "learning_rate": 9.845835428669995e-07, "loss": 0.0433, "step": 168690 }, { "epoch": 1.802446711897003, "grad_norm": 4.128012657165527, "learning_rate": 9.845794027867379e-07, "loss": 0.0495, "step": 168700 }, { "epoch": 1.8025535552112828, "grad_norm": 0.006861093919724226, "learning_rate": 9.845752621593495e-07, "loss": 0.012, "step": 168710 }, { "epoch": 1.8026603985255623, "grad_norm": 0.008401590399444103, "learning_rate": 9.845711209848388e-07, "loss": 0.0183, "step": 168720 }, { "epoch": 1.8027672418398417, "grad_norm": 0.00891499500721693, "learning_rate": 9.84566979263211e-07, "loss": 0.0461, "step": 168730 }, { "epoch": 1.8028740851541216, "grad_norm": 0.6471120119094849, "learning_rate": 9.845628369944706e-07, "loss": 0.0419, "step": 168740 }, { "epoch": 1.802980928468401, "grad_norm": 0.06972969323396683, "learning_rate": 9.845586941786218e-07, "loss": 0.0268, "step": 168750 }, { "epoch": 1.8030877717826805, "grad_norm": 8.537877082824707, "learning_rate": 9.8455455081567e-07, "loss": 0.0098, "step": 168760 }, { "epoch": 1.8031946150969604, "grad_norm": 0.757401168346405, "learning_rate": 9.845504069056194e-07, "loss": 0.0283, "step": 168770 }, { "epoch": 1.80330145841124, "grad_norm": 0.24673064053058624, "learning_rate": 9.845462624484749e-07, "loss": 0.0373, "step": 168780 }, { "epoch": 1.8034083017255196, "grad_norm": 1.2609608173370361, "learning_rate": 9.845421174442409e-07, "loss": 0.0113, "step": 168790 }, { "epoch": 1.8035151450397993, "grad_norm": 1.6554492712020874, "learning_rate": 9.845379718929225e-07, "loss": 0.0226, "step": 168800 }, { "epoch": 1.8036219883540787, "grad_norm": 4.384690761566162, "learning_rate": 9.845338257945242e-07, "loss": 0.0286, "step": 168810 }, { "epoch": 1.8037288316683584, "grad_norm": 6.514416694641113, "learning_rate": 9.845296791490505e-07, "loss": 0.0351, "step": 168820 }, { "epoch": 1.803835674982638, "grad_norm": 0.03824467957019806, "learning_rate": 9.845255319565063e-07, "loss": 0.007, "step": 168830 }, { "epoch": 1.8039425182969175, "grad_norm": 7.092497825622559, "learning_rate": 9.84521384216896e-07, "loss": 0.024, "step": 168840 }, { "epoch": 1.8040493616111972, "grad_norm": 2.320427894592285, "learning_rate": 9.845172359302247e-07, "loss": 0.0247, "step": 168850 }, { "epoch": 1.804156204925477, "grad_norm": 0.05375804007053375, "learning_rate": 9.845130870964967e-07, "loss": 0.0127, "step": 168860 }, { "epoch": 1.8042630482397564, "grad_norm": 13.313648223876953, "learning_rate": 9.845089377157172e-07, "loss": 0.0242, "step": 168870 }, { "epoch": 1.804369891554036, "grad_norm": 4.3527607917785645, "learning_rate": 9.845047877878903e-07, "loss": 0.033, "step": 168880 }, { "epoch": 1.8044767348683157, "grad_norm": 1.997146487236023, "learning_rate": 9.84500637313021e-07, "loss": 0.0084, "step": 168890 }, { "epoch": 1.8045835781825952, "grad_norm": 0.5550535917282104, "learning_rate": 9.844964862911138e-07, "loss": 0.0322, "step": 168900 }, { "epoch": 1.8046904214968749, "grad_norm": 0.266965389251709, "learning_rate": 9.844923347221737e-07, "loss": 0.0568, "step": 168910 }, { "epoch": 1.8047972648111545, "grad_norm": 0.02798941172659397, "learning_rate": 9.84488182606205e-07, "loss": 0.0351, "step": 168920 }, { "epoch": 1.804904108125434, "grad_norm": 0.9639837741851807, "learning_rate": 9.844840299432125e-07, "loss": 0.0411, "step": 168930 }, { "epoch": 1.8050109514397137, "grad_norm": 0.0556233748793602, "learning_rate": 9.844798767332012e-07, "loss": 0.0213, "step": 168940 }, { "epoch": 1.8051177947539934, "grad_norm": 1.6721171140670776, "learning_rate": 9.844757229761753e-07, "loss": 0.0771, "step": 168950 }, { "epoch": 1.8052246380682728, "grad_norm": 0.038510095328092575, "learning_rate": 9.8447156867214e-07, "loss": 0.0097, "step": 168960 }, { "epoch": 1.8053314813825525, "grad_norm": 0.13635946810245514, "learning_rate": 9.844674138210995e-07, "loss": 0.0173, "step": 168970 }, { "epoch": 1.8054383246968322, "grad_norm": 0.07370687276124954, "learning_rate": 9.84463258423059e-07, "loss": 0.0198, "step": 168980 }, { "epoch": 1.8055451680111116, "grad_norm": 0.08287227898836136, "learning_rate": 9.844591024780228e-07, "loss": 0.0721, "step": 168990 }, { "epoch": 1.8056520113253913, "grad_norm": 2.34230375289917, "learning_rate": 9.844549459859957e-07, "loss": 0.0117, "step": 169000 }, { "epoch": 1.805758854639671, "grad_norm": 0.09972122311592102, "learning_rate": 9.844507889469823e-07, "loss": 0.105, "step": 169010 }, { "epoch": 1.8058656979539505, "grad_norm": 0.011351615190505981, "learning_rate": 9.844466313609873e-07, "loss": 0.0103, "step": 169020 }, { "epoch": 1.8059725412682301, "grad_norm": 0.4252798855304718, "learning_rate": 9.844424732280158e-07, "loss": 0.0478, "step": 169030 }, { "epoch": 1.8060793845825098, "grad_norm": 7.5761494636535645, "learning_rate": 9.84438314548072e-07, "loss": 0.0774, "step": 169040 }, { "epoch": 1.8061862278967893, "grad_norm": 3.5240299701690674, "learning_rate": 9.844341553211608e-07, "loss": 0.0136, "step": 169050 }, { "epoch": 1.806293071211069, "grad_norm": 5.830697059631348, "learning_rate": 9.84429995547287e-07, "loss": 0.0688, "step": 169060 }, { "epoch": 1.8063999145253486, "grad_norm": 6.1125664710998535, "learning_rate": 9.84425835226455e-07, "loss": 0.0667, "step": 169070 }, { "epoch": 1.806506757839628, "grad_norm": 14.205093383789062, "learning_rate": 9.844216743586699e-07, "loss": 0.0389, "step": 169080 }, { "epoch": 1.8066136011539078, "grad_norm": 7.442950248718262, "learning_rate": 9.84417512943936e-07, "loss": 0.0169, "step": 169090 }, { "epoch": 1.8067204444681875, "grad_norm": 4.021759033203125, "learning_rate": 9.844133509822581e-07, "loss": 0.0168, "step": 169100 }, { "epoch": 1.806827287782467, "grad_norm": 0.5157321691513062, "learning_rate": 9.84409188473641e-07, "loss": 0.0423, "step": 169110 }, { "epoch": 1.8069341310967466, "grad_norm": 1.819266438484192, "learning_rate": 9.844050254180895e-07, "loss": 0.0203, "step": 169120 }, { "epoch": 1.8070409744110263, "grad_norm": 1.2258670330047607, "learning_rate": 9.844008618156081e-07, "loss": 0.0814, "step": 169130 }, { "epoch": 1.8071478177253057, "grad_norm": 4.012691020965576, "learning_rate": 9.843966976662014e-07, "loss": 0.0291, "step": 169140 }, { "epoch": 1.8072546610395854, "grad_norm": 2.541193723678589, "learning_rate": 9.843925329698746e-07, "loss": 0.0248, "step": 169150 }, { "epoch": 1.8073615043538651, "grad_norm": 1.3568023443222046, "learning_rate": 9.843883677266318e-07, "loss": 0.0249, "step": 169160 }, { "epoch": 1.8074683476681446, "grad_norm": 1.7288470268249512, "learning_rate": 9.843842019364782e-07, "loss": 0.014, "step": 169170 }, { "epoch": 1.8075751909824243, "grad_norm": 4.211883544921875, "learning_rate": 9.84380035599418e-07, "loss": 0.0171, "step": 169180 }, { "epoch": 1.807682034296704, "grad_norm": 2.334805727005005, "learning_rate": 9.843758687154565e-07, "loss": 0.013, "step": 169190 }, { "epoch": 1.8077888776109834, "grad_norm": 1.427075982093811, "learning_rate": 9.843717012845978e-07, "loss": 0.0151, "step": 169200 }, { "epoch": 1.807895720925263, "grad_norm": 4.345757961273193, "learning_rate": 9.843675333068471e-07, "loss": 0.0124, "step": 169210 }, { "epoch": 1.8080025642395428, "grad_norm": 3.1288962364196777, "learning_rate": 9.843633647822088e-07, "loss": 0.0327, "step": 169220 }, { "epoch": 1.8081094075538222, "grad_norm": 3.3302061557769775, "learning_rate": 9.843591957106877e-07, "loss": 0.0212, "step": 169230 }, { "epoch": 1.808216250868102, "grad_norm": 4.213078022003174, "learning_rate": 9.843550260922884e-07, "loss": 0.0103, "step": 169240 }, { "epoch": 1.8083230941823816, "grad_norm": 0.2552860975265503, "learning_rate": 9.84350855927016e-07, "loss": 0.0772, "step": 169250 }, { "epoch": 1.808429937496661, "grad_norm": 0.02672206424176693, "learning_rate": 9.843466852148748e-07, "loss": 0.0269, "step": 169260 }, { "epoch": 1.8085367808109407, "grad_norm": 0.013327248394489288, "learning_rate": 9.843425139558695e-07, "loss": 0.0175, "step": 169270 }, { "epoch": 1.8086436241252204, "grad_norm": 4.591618061065674, "learning_rate": 9.84338342150005e-07, "loss": 0.0198, "step": 169280 }, { "epoch": 1.8087504674394999, "grad_norm": 6.132844924926758, "learning_rate": 9.84334169797286e-07, "loss": 0.0381, "step": 169290 }, { "epoch": 1.8088573107537795, "grad_norm": 0.9604542851448059, "learning_rate": 9.843299968977173e-07, "loss": 0.0344, "step": 169300 }, { "epoch": 1.8089641540680592, "grad_norm": 11.07905101776123, "learning_rate": 9.843258234513034e-07, "loss": 0.0346, "step": 169310 }, { "epoch": 1.8090709973823387, "grad_norm": 7.554103851318359, "learning_rate": 9.84321649458049e-07, "loss": 0.0505, "step": 169320 }, { "epoch": 1.8091778406966184, "grad_norm": 0.22406256198883057, "learning_rate": 9.84317474917959e-07, "loss": 0.0227, "step": 169330 }, { "epoch": 1.809284684010898, "grad_norm": 5.681430816650391, "learning_rate": 9.84313299831038e-07, "loss": 0.0372, "step": 169340 }, { "epoch": 1.8093915273251775, "grad_norm": 2.033691167831421, "learning_rate": 9.843091241972906e-07, "loss": 0.015, "step": 169350 }, { "epoch": 1.8094983706394574, "grad_norm": 1.509333610534668, "learning_rate": 9.84304948016722e-07, "loss": 0.0268, "step": 169360 }, { "epoch": 1.8096052139537369, "grad_norm": 2.467575788497925, "learning_rate": 9.84300771289336e-07, "loss": 0.0133, "step": 169370 }, { "epoch": 1.8097120572680163, "grad_norm": 0.3382493853569031, "learning_rate": 9.842965940151381e-07, "loss": 0.0345, "step": 169380 }, { "epoch": 1.8098189005822962, "grad_norm": 6.663996696472168, "learning_rate": 9.84292416194133e-07, "loss": 0.011, "step": 169390 }, { "epoch": 1.8099257438965757, "grad_norm": 9.216240882873535, "learning_rate": 9.842882378263252e-07, "loss": 0.0302, "step": 169400 }, { "epoch": 1.8100325872108551, "grad_norm": 2.843750238418579, "learning_rate": 9.842840589117192e-07, "loss": 0.0352, "step": 169410 }, { "epoch": 1.810139430525135, "grad_norm": 0.707068920135498, "learning_rate": 9.8427987945032e-07, "loss": 0.0191, "step": 169420 }, { "epoch": 1.8102462738394145, "grad_norm": 0.7341902256011963, "learning_rate": 9.842756994421324e-07, "loss": 0.0348, "step": 169430 }, { "epoch": 1.810353117153694, "grad_norm": 2.8287413120269775, "learning_rate": 9.84271518887161e-07, "loss": 0.0552, "step": 169440 }, { "epoch": 1.8104599604679739, "grad_norm": 5.15531063079834, "learning_rate": 9.842673377854105e-07, "loss": 0.0198, "step": 169450 }, { "epoch": 1.8105668037822533, "grad_norm": 0.07291004806756973, "learning_rate": 9.842631561368854e-07, "loss": 0.0368, "step": 169460 }, { "epoch": 1.8106736470965328, "grad_norm": 0.0019385908963158727, "learning_rate": 9.842589739415908e-07, "loss": 0.1403, "step": 169470 }, { "epoch": 1.8107804904108127, "grad_norm": 3.4211533069610596, "learning_rate": 9.842547911995314e-07, "loss": 0.0284, "step": 169480 }, { "epoch": 1.8108873337250921, "grad_norm": 3.991211175918579, "learning_rate": 9.842506079107115e-07, "loss": 0.015, "step": 169490 }, { "epoch": 1.8109941770393716, "grad_norm": 2.9243035316467285, "learning_rate": 9.842464240751364e-07, "loss": 0.0347, "step": 169500 }, { "epoch": 1.8111010203536515, "grad_norm": 1.5809134244918823, "learning_rate": 9.842422396928103e-07, "loss": 0.0445, "step": 169510 }, { "epoch": 1.811207863667931, "grad_norm": 1.344607949256897, "learning_rate": 9.842380547637383e-07, "loss": 0.013, "step": 169520 }, { "epoch": 1.8113147069822106, "grad_norm": 5.830296993255615, "learning_rate": 9.84233869287925e-07, "loss": 0.0596, "step": 169530 }, { "epoch": 1.8114215502964903, "grad_norm": 0.14216333627700806, "learning_rate": 9.842296832653752e-07, "loss": 0.0342, "step": 169540 }, { "epoch": 1.8115283936107698, "grad_norm": 0.09570305049419403, "learning_rate": 9.842254966960934e-07, "loss": 0.0379, "step": 169550 }, { "epoch": 1.8116352369250495, "grad_norm": 13.831548690795898, "learning_rate": 9.842213095800845e-07, "loss": 0.0167, "step": 169560 }, { "epoch": 1.8117420802393291, "grad_norm": 0.04722446948289871, "learning_rate": 9.842171219173532e-07, "loss": 0.0315, "step": 169570 }, { "epoch": 1.8118489235536086, "grad_norm": 0.06889010220766068, "learning_rate": 9.842129337079042e-07, "loss": 0.009, "step": 169580 }, { "epoch": 1.8119557668678883, "grad_norm": 0.6457775831222534, "learning_rate": 9.842087449517423e-07, "loss": 0.0486, "step": 169590 }, { "epoch": 1.812062610182168, "grad_norm": 10.383259773254395, "learning_rate": 9.84204555648872e-07, "loss": 0.0449, "step": 169600 }, { "epoch": 1.8121694534964474, "grad_norm": 0.020387524738907814, "learning_rate": 9.842003657992984e-07, "loss": 0.056, "step": 169610 }, { "epoch": 1.812276296810727, "grad_norm": 0.2179918736219406, "learning_rate": 9.84196175403026e-07, "loss": 0.0385, "step": 169620 }, { "epoch": 1.8123831401250068, "grad_norm": 5.367665767669678, "learning_rate": 9.841919844600598e-07, "loss": 0.023, "step": 169630 }, { "epoch": 1.8124899834392862, "grad_norm": 4.190692901611328, "learning_rate": 9.84187792970404e-07, "loss": 0.032, "step": 169640 }, { "epoch": 1.812596826753566, "grad_norm": 0.07822061330080032, "learning_rate": 9.841836009340635e-07, "loss": 0.0211, "step": 169650 }, { "epoch": 1.8127036700678456, "grad_norm": 0.18117108941078186, "learning_rate": 9.841794083510436e-07, "loss": 0.034, "step": 169660 }, { "epoch": 1.812810513382125, "grad_norm": 0.02167283371090889, "learning_rate": 9.841752152213484e-07, "loss": 0.0273, "step": 169670 }, { "epoch": 1.8129173566964047, "grad_norm": 0.08992589265108109, "learning_rate": 9.841710215449827e-07, "loss": 0.0153, "step": 169680 }, { "epoch": 1.8130242000106844, "grad_norm": 12.019989967346191, "learning_rate": 9.841668273219516e-07, "loss": 0.0319, "step": 169690 }, { "epoch": 1.8131310433249639, "grad_norm": 0.05348735302686691, "learning_rate": 9.841626325522596e-07, "loss": 0.0204, "step": 169700 }, { "epoch": 1.8132378866392436, "grad_norm": 0.005225947592407465, "learning_rate": 9.841584372359113e-07, "loss": 0.0379, "step": 169710 }, { "epoch": 1.8133447299535232, "grad_norm": 0.5772237181663513, "learning_rate": 9.841542413729115e-07, "loss": 0.0092, "step": 169720 }, { "epoch": 1.8134515732678027, "grad_norm": 3.3576672077178955, "learning_rate": 9.841500449632654e-07, "loss": 0.0192, "step": 169730 }, { "epoch": 1.8135584165820824, "grad_norm": 0.7138607501983643, "learning_rate": 9.841458480069771e-07, "loss": 0.0075, "step": 169740 }, { "epoch": 1.813665259896362, "grad_norm": 0.008654278703033924, "learning_rate": 9.841416505040517e-07, "loss": 0.0183, "step": 169750 }, { "epoch": 1.8137721032106415, "grad_norm": 2.891749382019043, "learning_rate": 9.841374524544937e-07, "loss": 0.0263, "step": 169760 }, { "epoch": 1.8138789465249212, "grad_norm": 0.7145403623580933, "learning_rate": 9.84133253858308e-07, "loss": 0.0531, "step": 169770 }, { "epoch": 1.813985789839201, "grad_norm": 0.1263933628797531, "learning_rate": 9.841290547154993e-07, "loss": 0.0492, "step": 169780 }, { "epoch": 1.8140926331534803, "grad_norm": 1.2452608346939087, "learning_rate": 9.841248550260724e-07, "loss": 0.0246, "step": 169790 }, { "epoch": 1.81419947646776, "grad_norm": 0.7752758264541626, "learning_rate": 9.84120654790032e-07, "loss": 0.0178, "step": 169800 }, { "epoch": 1.8143063197820397, "grad_norm": 1.3065015077590942, "learning_rate": 9.84116454007383e-07, "loss": 0.0533, "step": 169810 }, { "epoch": 1.8144131630963192, "grad_norm": 0.0026342151686549187, "learning_rate": 9.841122526781297e-07, "loss": 0.0094, "step": 169820 }, { "epoch": 1.8145200064105989, "grad_norm": 0.8403593897819519, "learning_rate": 9.841080508022774e-07, "loss": 0.0363, "step": 169830 }, { "epoch": 1.8146268497248785, "grad_norm": 0.1574528068304062, "learning_rate": 9.841038483798303e-07, "loss": 0.0257, "step": 169840 }, { "epoch": 1.814733693039158, "grad_norm": 2.1870009899139404, "learning_rate": 9.840996454107937e-07, "loss": 0.0173, "step": 169850 }, { "epoch": 1.8148405363534377, "grad_norm": 5.239084243774414, "learning_rate": 9.840954418951719e-07, "loss": 0.0178, "step": 169860 }, { "epoch": 1.8149473796677174, "grad_norm": 0.007404798176139593, "learning_rate": 9.840912378329698e-07, "loss": 0.0275, "step": 169870 }, { "epoch": 1.8150542229819968, "grad_norm": 0.048922400921583176, "learning_rate": 9.840870332241922e-07, "loss": 0.0133, "step": 169880 }, { "epoch": 1.8151610662962765, "grad_norm": 0.012272159568965435, "learning_rate": 9.840828280688439e-07, "loss": 0.0313, "step": 169890 }, { "epoch": 1.8152679096105562, "grad_norm": 0.02733033522963524, "learning_rate": 9.840786223669294e-07, "loss": 0.0603, "step": 169900 }, { "epoch": 1.8153747529248356, "grad_norm": 0.015483248047530651, "learning_rate": 9.840744161184537e-07, "loss": 0.0248, "step": 169910 }, { "epoch": 1.8154815962391153, "grad_norm": 6.41538667678833, "learning_rate": 9.840702093234214e-07, "loss": 0.041, "step": 169920 }, { "epoch": 1.815588439553395, "grad_norm": 0.2154255211353302, "learning_rate": 9.840660019818372e-07, "loss": 0.0616, "step": 169930 }, { "epoch": 1.8156952828676745, "grad_norm": 0.5179392099380493, "learning_rate": 9.840617940937061e-07, "loss": 0.0138, "step": 169940 }, { "epoch": 1.8158021261819541, "grad_norm": 0.13912859559059143, "learning_rate": 9.840575856590327e-07, "loss": 0.0136, "step": 169950 }, { "epoch": 1.8159089694962338, "grad_norm": 0.9524275660514832, "learning_rate": 9.840533766778217e-07, "loss": 0.0185, "step": 169960 }, { "epoch": 1.8160158128105133, "grad_norm": 0.10077684372663498, "learning_rate": 9.84049167150078e-07, "loss": 0.0218, "step": 169970 }, { "epoch": 1.816122656124793, "grad_norm": 4.9434003829956055, "learning_rate": 9.84044957075806e-07, "loss": 0.0331, "step": 169980 }, { "epoch": 1.8162294994390726, "grad_norm": 2.553323984146118, "learning_rate": 9.840407464550109e-07, "loss": 0.0761, "step": 169990 }, { "epoch": 1.816336342753352, "grad_norm": 4.321229934692383, "learning_rate": 9.840365352876972e-07, "loss": 0.0241, "step": 170000 }, { "epoch": 1.8164431860676318, "grad_norm": 7.768074989318848, "learning_rate": 9.8403232357387e-07, "loss": 0.0404, "step": 170010 }, { "epoch": 1.8165500293819115, "grad_norm": 0.3557342290878296, "learning_rate": 9.840281113135333e-07, "loss": 0.0547, "step": 170020 }, { "epoch": 1.816656872696191, "grad_norm": 8.192033767700195, "learning_rate": 9.840238985066927e-07, "loss": 0.0318, "step": 170030 }, { "epoch": 1.8167637160104706, "grad_norm": 7.180049896240234, "learning_rate": 9.840196851533525e-07, "loss": 0.0184, "step": 170040 }, { "epoch": 1.8168705593247503, "grad_norm": 0.03762805461883545, "learning_rate": 9.840154712535173e-07, "loss": 0.0404, "step": 170050 }, { "epoch": 1.8169774026390297, "grad_norm": 4.983717918395996, "learning_rate": 9.840112568071923e-07, "loss": 0.0242, "step": 170060 }, { "epoch": 1.8170842459533094, "grad_norm": 0.868523120880127, "learning_rate": 9.840070418143822e-07, "loss": 0.0583, "step": 170070 }, { "epoch": 1.817191089267589, "grad_norm": 1.1733663082122803, "learning_rate": 9.840028262750914e-07, "loss": 0.0503, "step": 170080 }, { "epoch": 1.8172979325818686, "grad_norm": 0.27619048953056335, "learning_rate": 9.83998610189325e-07, "loss": 0.0117, "step": 170090 }, { "epoch": 1.8174047758961485, "grad_norm": 3.269615650177002, "learning_rate": 9.839943935570875e-07, "loss": 0.0189, "step": 170100 }, { "epoch": 1.817511619210428, "grad_norm": 0.23473621904850006, "learning_rate": 9.83990176378384e-07, "loss": 0.0185, "step": 170110 }, { "epoch": 1.8176184625247074, "grad_norm": 2.5117697715759277, "learning_rate": 9.83985958653219e-07, "loss": 0.0273, "step": 170120 }, { "epoch": 1.8177253058389873, "grad_norm": 0.5019004940986633, "learning_rate": 9.839817403815973e-07, "loss": 0.0226, "step": 170130 }, { "epoch": 1.8178321491532667, "grad_norm": 0.03738030791282654, "learning_rate": 9.839775215635237e-07, "loss": 0.0163, "step": 170140 }, { "epoch": 1.8179389924675462, "grad_norm": 8.187113761901855, "learning_rate": 9.83973302199003e-07, "loss": 0.0312, "step": 170150 }, { "epoch": 1.818045835781826, "grad_norm": 0.01707562990486622, "learning_rate": 9.839690822880398e-07, "loss": 0.02, "step": 170160 }, { "epoch": 1.8181526790961056, "grad_norm": 0.0625942200422287, "learning_rate": 9.83964861830639e-07, "loss": 0.0614, "step": 170170 }, { "epoch": 1.818259522410385, "grad_norm": 3.541078567504883, "learning_rate": 9.839606408268053e-07, "loss": 0.0321, "step": 170180 }, { "epoch": 1.818366365724665, "grad_norm": 0.8997144103050232, "learning_rate": 9.839564192765436e-07, "loss": 0.0158, "step": 170190 }, { "epoch": 1.8184732090389444, "grad_norm": 6.146921634674072, "learning_rate": 9.839521971798585e-07, "loss": 0.05, "step": 170200 }, { "epoch": 1.8185800523532238, "grad_norm": 9.716773986816406, "learning_rate": 9.83947974536755e-07, "loss": 0.0369, "step": 170210 }, { "epoch": 1.8186868956675037, "grad_norm": 0.4961252510547638, "learning_rate": 9.839437513472376e-07, "loss": 0.0666, "step": 170220 }, { "epoch": 1.8187937389817832, "grad_norm": 0.030192045494914055, "learning_rate": 9.839395276113112e-07, "loss": 0.0065, "step": 170230 }, { "epoch": 1.8189005822960627, "grad_norm": 0.01304154098033905, "learning_rate": 9.839353033289805e-07, "loss": 0.0709, "step": 170240 }, { "epoch": 1.8190074256103426, "grad_norm": 4.79437780380249, "learning_rate": 9.839310785002503e-07, "loss": 0.0655, "step": 170250 }, { "epoch": 1.819114268924622, "grad_norm": 1.8469308614730835, "learning_rate": 9.839268531251255e-07, "loss": 0.0293, "step": 170260 }, { "epoch": 1.8192211122389017, "grad_norm": 0.03050735592842102, "learning_rate": 9.839226272036107e-07, "loss": 0.0408, "step": 170270 }, { "epoch": 1.8193279555531814, "grad_norm": 0.06161434203386307, "learning_rate": 9.839184007357109e-07, "loss": 0.0194, "step": 170280 }, { "epoch": 1.8194347988674608, "grad_norm": 6.3505682945251465, "learning_rate": 9.839141737214305e-07, "loss": 0.0433, "step": 170290 }, { "epoch": 1.8195416421817405, "grad_norm": 0.37180495262145996, "learning_rate": 9.839099461607746e-07, "loss": 0.0956, "step": 170300 }, { "epoch": 1.8196484854960202, "grad_norm": 0.537735104560852, "learning_rate": 9.839057180537476e-07, "loss": 0.0058, "step": 170310 }, { "epoch": 1.8197553288102997, "grad_norm": 0.02838178165256977, "learning_rate": 9.839014894003546e-07, "loss": 0.0356, "step": 170320 }, { "epoch": 1.8198621721245793, "grad_norm": 0.7893975973129272, "learning_rate": 9.838972602006005e-07, "loss": 0.0434, "step": 170330 }, { "epoch": 1.819969015438859, "grad_norm": 2.4330244064331055, "learning_rate": 9.838930304544896e-07, "loss": 0.025, "step": 170340 }, { "epoch": 1.8200758587531385, "grad_norm": 0.7179874777793884, "learning_rate": 9.838888001620273e-07, "loss": 0.0192, "step": 170350 }, { "epoch": 1.8201827020674182, "grad_norm": 8.786969184875488, "learning_rate": 9.838845693232177e-07, "loss": 0.0636, "step": 170360 }, { "epoch": 1.8202895453816978, "grad_norm": 4.594067573547363, "learning_rate": 9.83880337938066e-07, "loss": 0.0081, "step": 170370 }, { "epoch": 1.8203963886959773, "grad_norm": 1.4684815406799316, "learning_rate": 9.83876106006577e-07, "loss": 0.0325, "step": 170380 }, { "epoch": 1.820503232010257, "grad_norm": 1.3338232040405273, "learning_rate": 9.838718735287552e-07, "loss": 0.0281, "step": 170390 }, { "epoch": 1.8206100753245367, "grad_norm": 3.0430257320404053, "learning_rate": 9.838676405046056e-07, "loss": 0.0272, "step": 170400 }, { "epoch": 1.8207169186388161, "grad_norm": 4.5802388191223145, "learning_rate": 9.83863406934133e-07, "loss": 0.045, "step": 170410 }, { "epoch": 1.8208237619530958, "grad_norm": 0.41905325651168823, "learning_rate": 9.83859172817342e-07, "loss": 0.0115, "step": 170420 }, { "epoch": 1.8209306052673755, "grad_norm": 10.078729629516602, "learning_rate": 9.838549381542373e-07, "loss": 0.0192, "step": 170430 }, { "epoch": 1.821037448581655, "grad_norm": 0.44042566418647766, "learning_rate": 9.838507029448243e-07, "loss": 0.0516, "step": 170440 }, { "epoch": 1.8211442918959346, "grad_norm": 2.969269037246704, "learning_rate": 9.83846467189107e-07, "loss": 0.0335, "step": 170450 }, { "epoch": 1.8212511352102143, "grad_norm": 1.8800243139266968, "learning_rate": 9.838422308870907e-07, "loss": 0.0531, "step": 170460 }, { "epoch": 1.8213579785244938, "grad_norm": 0.11443589627742767, "learning_rate": 9.838379940387797e-07, "loss": 0.019, "step": 170470 }, { "epoch": 1.8214648218387735, "grad_norm": 1.5643281936645508, "learning_rate": 9.838337566441794e-07, "loss": 0.0355, "step": 170480 }, { "epoch": 1.8215716651530531, "grad_norm": 0.764167308807373, "learning_rate": 9.838295187032941e-07, "loss": 0.0082, "step": 170490 }, { "epoch": 1.8216785084673326, "grad_norm": 2.575972318649292, "learning_rate": 9.83825280216129e-07, "loss": 0.0323, "step": 170500 }, { "epoch": 1.8217853517816123, "grad_norm": 0.03162680193781853, "learning_rate": 9.838210411826883e-07, "loss": 0.0503, "step": 170510 }, { "epoch": 1.821892195095892, "grad_norm": 0.8536608815193176, "learning_rate": 9.838168016029772e-07, "loss": 0.0197, "step": 170520 }, { "epoch": 1.8219990384101714, "grad_norm": 1.4875017404556274, "learning_rate": 9.838125614770006e-07, "loss": 0.0356, "step": 170530 }, { "epoch": 1.822105881724451, "grad_norm": 0.032201800495386124, "learning_rate": 9.83808320804763e-07, "loss": 0.0968, "step": 170540 }, { "epoch": 1.8222127250387308, "grad_norm": 0.002346144523471594, "learning_rate": 9.838040795862693e-07, "loss": 0.03, "step": 170550 }, { "epoch": 1.8223195683530102, "grad_norm": 6.962099552154541, "learning_rate": 9.837998378215243e-07, "loss": 0.051, "step": 170560 }, { "epoch": 1.82242641166729, "grad_norm": 6.912394046783447, "learning_rate": 9.837955955105328e-07, "loss": 0.0238, "step": 170570 }, { "epoch": 1.8225332549815696, "grad_norm": 0.09435758739709854, "learning_rate": 9.837913526532995e-07, "loss": 0.0049, "step": 170580 }, { "epoch": 1.822640098295849, "grad_norm": 6.7002787590026855, "learning_rate": 9.837871092498291e-07, "loss": 0.0502, "step": 170590 }, { "epoch": 1.8227469416101287, "grad_norm": 1.8822046518325806, "learning_rate": 9.837828653001269e-07, "loss": 0.0334, "step": 170600 }, { "epoch": 1.8228537849244084, "grad_norm": 3.290161371231079, "learning_rate": 9.83778620804197e-07, "loss": 0.014, "step": 170610 }, { "epoch": 1.8229606282386879, "grad_norm": 2.625391721725464, "learning_rate": 9.837743757620447e-07, "loss": 0.0242, "step": 170620 }, { "epoch": 1.8230674715529676, "grad_norm": 0.24150848388671875, "learning_rate": 9.837701301736744e-07, "loss": 0.0149, "step": 170630 }, { "epoch": 1.8231743148672472, "grad_norm": 1.1944972276687622, "learning_rate": 9.837658840390913e-07, "loss": 0.0158, "step": 170640 }, { "epoch": 1.8232811581815267, "grad_norm": 1.9475350379943848, "learning_rate": 9.837616373583e-07, "loss": 0.0257, "step": 170650 }, { "epoch": 1.8233880014958064, "grad_norm": 7.838727951049805, "learning_rate": 9.837573901313054e-07, "loss": 0.0585, "step": 170660 }, { "epoch": 1.823494844810086, "grad_norm": 0.6897859573364258, "learning_rate": 9.83753142358112e-07, "loss": 0.0179, "step": 170670 }, { "epoch": 1.8236016881243655, "grad_norm": 1.1213033199310303, "learning_rate": 9.83748894038725e-07, "loss": 0.0372, "step": 170680 }, { "epoch": 1.8237085314386452, "grad_norm": 8.576197624206543, "learning_rate": 9.837446451731488e-07, "loss": 0.0963, "step": 170690 }, { "epoch": 1.8238153747529249, "grad_norm": 2.271228551864624, "learning_rate": 9.837403957613885e-07, "loss": 0.0167, "step": 170700 }, { "epoch": 1.8239222180672043, "grad_norm": 1.6411055326461792, "learning_rate": 9.837361458034488e-07, "loss": 0.0413, "step": 170710 }, { "epoch": 1.824029061381484, "grad_norm": 9.50279712677002, "learning_rate": 9.837318952993342e-07, "loss": 0.0558, "step": 170720 }, { "epoch": 1.8241359046957637, "grad_norm": 3.8467252254486084, "learning_rate": 9.8372764424905e-07, "loss": 0.0334, "step": 170730 }, { "epoch": 1.8242427480100432, "grad_norm": 2.5856359004974365, "learning_rate": 9.83723392652601e-07, "loss": 0.0407, "step": 170740 }, { "epoch": 1.8243495913243228, "grad_norm": 0.041368599981069565, "learning_rate": 9.837191405099915e-07, "loss": 0.0369, "step": 170750 }, { "epoch": 1.8244564346386025, "grad_norm": 4.469498634338379, "learning_rate": 9.837148878212267e-07, "loss": 0.0317, "step": 170760 }, { "epoch": 1.824563277952882, "grad_norm": 3.1408839225769043, "learning_rate": 9.837106345863112e-07, "loss": 0.0142, "step": 170770 }, { "epoch": 1.8246701212671617, "grad_norm": 0.18077142536640167, "learning_rate": 9.8370638080525e-07, "loss": 0.0324, "step": 170780 }, { "epoch": 1.8247769645814413, "grad_norm": 1.3904725313186646, "learning_rate": 9.837021264780477e-07, "loss": 0.0247, "step": 170790 }, { "epoch": 1.8248838078957208, "grad_norm": 0.004505423828959465, "learning_rate": 9.836978716047091e-07, "loss": 0.0177, "step": 170800 }, { "epoch": 1.8249906512100005, "grad_norm": 4.086395740509033, "learning_rate": 9.836936161852393e-07, "loss": 0.0138, "step": 170810 }, { "epoch": 1.8250974945242802, "grad_norm": 5.948202133178711, "learning_rate": 9.836893602196426e-07, "loss": 0.0274, "step": 170820 }, { "epoch": 1.8252043378385596, "grad_norm": 16.287742614746094, "learning_rate": 9.836851037079244e-07, "loss": 0.0892, "step": 170830 }, { "epoch": 1.8253111811528395, "grad_norm": 1.397464394569397, "learning_rate": 9.83680846650089e-07, "loss": 0.0347, "step": 170840 }, { "epoch": 1.825418024467119, "grad_norm": 14.687027931213379, "learning_rate": 9.836765890461416e-07, "loss": 0.0378, "step": 170850 }, { "epoch": 1.8255248677813984, "grad_norm": 0.1815488636493683, "learning_rate": 9.836723308960867e-07, "loss": 0.0514, "step": 170860 }, { "epoch": 1.8256317110956783, "grad_norm": 0.029407719150185585, "learning_rate": 9.836680721999292e-07, "loss": 0.0127, "step": 170870 }, { "epoch": 1.8257385544099578, "grad_norm": 0.22082002460956573, "learning_rate": 9.83663812957674e-07, "loss": 0.0176, "step": 170880 }, { "epoch": 1.8258453977242373, "grad_norm": 0.009217804297804832, "learning_rate": 9.83659553169326e-07, "loss": 0.0377, "step": 170890 }, { "epoch": 1.8259522410385172, "grad_norm": 0.43358495831489563, "learning_rate": 9.836552928348897e-07, "loss": 0.0055, "step": 170900 }, { "epoch": 1.8260590843527966, "grad_norm": 0.47185277938842773, "learning_rate": 9.836510319543704e-07, "loss": 0.0228, "step": 170910 }, { "epoch": 1.826165927667076, "grad_norm": 2.1794378757476807, "learning_rate": 9.836467705277723e-07, "loss": 0.0083, "step": 170920 }, { "epoch": 1.826272770981356, "grad_norm": 0.13559992611408234, "learning_rate": 9.836425085551003e-07, "loss": 0.0449, "step": 170930 }, { "epoch": 1.8263796142956354, "grad_norm": 0.4088146388530731, "learning_rate": 9.836382460363598e-07, "loss": 0.0209, "step": 170940 }, { "epoch": 1.826486457609915, "grad_norm": 0.402657151222229, "learning_rate": 9.836339829715549e-07, "loss": 0.0166, "step": 170950 }, { "epoch": 1.8265933009241948, "grad_norm": 1.8959566354751587, "learning_rate": 9.836297193606909e-07, "loss": 0.0394, "step": 170960 }, { "epoch": 1.8267001442384743, "grad_norm": 0.049083128571510315, "learning_rate": 9.836254552037723e-07, "loss": 0.0707, "step": 170970 }, { "epoch": 1.8268069875527537, "grad_norm": 2.826079845428467, "learning_rate": 9.836211905008042e-07, "loss": 0.0262, "step": 170980 }, { "epoch": 1.8269138308670336, "grad_norm": 0.0049229031428694725, "learning_rate": 9.836169252517912e-07, "loss": 0.0074, "step": 170990 }, { "epoch": 1.827020674181313, "grad_norm": 0.26647043228149414, "learning_rate": 9.836126594567382e-07, "loss": 0.0261, "step": 171000 }, { "epoch": 1.8271275174955928, "grad_norm": 0.1216009259223938, "learning_rate": 9.836083931156501e-07, "loss": 0.0197, "step": 171010 }, { "epoch": 1.8272343608098724, "grad_norm": 1.633208990097046, "learning_rate": 9.836041262285315e-07, "loss": 0.0262, "step": 171020 }, { "epoch": 1.827341204124152, "grad_norm": 0.0030334617476910353, "learning_rate": 9.835998587953873e-07, "loss": 0.0122, "step": 171030 }, { "epoch": 1.8274480474384316, "grad_norm": 2.1334517002105713, "learning_rate": 9.835955908162226e-07, "loss": 0.0077, "step": 171040 }, { "epoch": 1.8275548907527113, "grad_norm": 1.7728878259658813, "learning_rate": 9.835913222910419e-07, "loss": 0.0192, "step": 171050 }, { "epoch": 1.8276617340669907, "grad_norm": 4.1521501541137695, "learning_rate": 9.8358705321985e-07, "loss": 0.0691, "step": 171060 }, { "epoch": 1.8277685773812704, "grad_norm": 4.9897847175598145, "learning_rate": 9.835827836026518e-07, "loss": 0.0938, "step": 171070 }, { "epoch": 1.82787542069555, "grad_norm": 0.5588814616203308, "learning_rate": 9.835785134394522e-07, "loss": 0.0324, "step": 171080 }, { "epoch": 1.8279822640098295, "grad_norm": 1.7407375574111938, "learning_rate": 9.83574242730256e-07, "loss": 0.03, "step": 171090 }, { "epoch": 1.8280891073241092, "grad_norm": 0.06272122263908386, "learning_rate": 9.835699714750678e-07, "loss": 0.019, "step": 171100 }, { "epoch": 1.828195950638389, "grad_norm": 15.827848434448242, "learning_rate": 9.835656996738928e-07, "loss": 0.0469, "step": 171110 }, { "epoch": 1.8283027939526684, "grad_norm": 0.7660520672798157, "learning_rate": 9.835614273267357e-07, "loss": 0.049, "step": 171120 }, { "epoch": 1.828409637266948, "grad_norm": 2.6489720344543457, "learning_rate": 9.83557154433601e-07, "loss": 0.0293, "step": 171130 }, { "epoch": 1.8285164805812277, "grad_norm": 1.2473660707473755, "learning_rate": 9.83552880994494e-07, "loss": 0.021, "step": 171140 }, { "epoch": 1.8286233238955072, "grad_norm": 10.485239028930664, "learning_rate": 9.835486070094192e-07, "loss": 0.043, "step": 171150 }, { "epoch": 1.8287301672097869, "grad_norm": 3.6008315086364746, "learning_rate": 9.835443324783814e-07, "loss": 0.0713, "step": 171160 }, { "epoch": 1.8288370105240666, "grad_norm": 3.4566690921783447, "learning_rate": 9.835400574013858e-07, "loss": 0.033, "step": 171170 }, { "epoch": 1.828943853838346, "grad_norm": 4.791715145111084, "learning_rate": 9.83535781778437e-07, "loss": 0.0435, "step": 171180 }, { "epoch": 1.8290506971526257, "grad_norm": 0.6480404138565063, "learning_rate": 9.835315056095395e-07, "loss": 0.0446, "step": 171190 }, { "epoch": 1.8291575404669054, "grad_norm": 2.8308184146881104, "learning_rate": 9.835272288946985e-07, "loss": 0.0605, "step": 171200 }, { "epoch": 1.8292643837811848, "grad_norm": 0.8458094596862793, "learning_rate": 9.83522951633919e-07, "loss": 0.0419, "step": 171210 }, { "epoch": 1.8293712270954645, "grad_norm": 0.20496582984924316, "learning_rate": 9.835186738272055e-07, "loss": 0.0216, "step": 171220 }, { "epoch": 1.8294780704097442, "grad_norm": 2.1797420978546143, "learning_rate": 9.83514395474563e-07, "loss": 0.0332, "step": 171230 }, { "epoch": 1.8295849137240237, "grad_norm": 1.1933432817459106, "learning_rate": 9.83510116575996e-07, "loss": 0.0153, "step": 171240 }, { "epoch": 1.8296917570383033, "grad_norm": 0.27501311898231506, "learning_rate": 9.835058371315098e-07, "loss": 0.0285, "step": 171250 }, { "epoch": 1.829798600352583, "grad_norm": 0.056478142738342285, "learning_rate": 9.83501557141109e-07, "loss": 0.0054, "step": 171260 }, { "epoch": 1.8299054436668625, "grad_norm": 0.02192273736000061, "learning_rate": 9.834972766047983e-07, "loss": 0.0245, "step": 171270 }, { "epoch": 1.8300122869811422, "grad_norm": 0.025143496692180634, "learning_rate": 9.834929955225828e-07, "loss": 0.019, "step": 171280 }, { "epoch": 1.8301191302954218, "grad_norm": 6.0320658683776855, "learning_rate": 9.834887138944672e-07, "loss": 0.0371, "step": 171290 }, { "epoch": 1.8302259736097013, "grad_norm": 0.10834243893623352, "learning_rate": 9.834844317204563e-07, "loss": 0.0098, "step": 171300 }, { "epoch": 1.830332816923981, "grad_norm": 0.08540746569633484, "learning_rate": 9.83480149000555e-07, "loss": 0.0506, "step": 171310 }, { "epoch": 1.8304396602382607, "grad_norm": 4.528687477111816, "learning_rate": 9.834758657347683e-07, "loss": 0.0674, "step": 171320 }, { "epoch": 1.8305465035525401, "grad_norm": 0.5768362283706665, "learning_rate": 9.834715819231007e-07, "loss": 0.0201, "step": 171330 }, { "epoch": 1.8306533468668198, "grad_norm": 0.05572311952710152, "learning_rate": 9.834672975655572e-07, "loss": 0.0526, "step": 171340 }, { "epoch": 1.8307601901810995, "grad_norm": 0.9655906558036804, "learning_rate": 9.834630126621427e-07, "loss": 0.0528, "step": 171350 }, { "epoch": 1.830867033495379, "grad_norm": 0.8848704695701599, "learning_rate": 9.834587272128618e-07, "loss": 0.0129, "step": 171360 }, { "epoch": 1.8309738768096586, "grad_norm": 4.243828773498535, "learning_rate": 9.834544412177197e-07, "loss": 0.0696, "step": 171370 }, { "epoch": 1.8310807201239383, "grad_norm": 6.293213844299316, "learning_rate": 9.834501546767209e-07, "loss": 0.0628, "step": 171380 }, { "epoch": 1.8311875634382178, "grad_norm": 2.643110513687134, "learning_rate": 9.834458675898704e-07, "loss": 0.0309, "step": 171390 }, { "epoch": 1.8312944067524974, "grad_norm": 2.7420425415039062, "learning_rate": 9.834415799571732e-07, "loss": 0.0203, "step": 171400 }, { "epoch": 1.8314012500667771, "grad_norm": 0.17123660445213318, "learning_rate": 9.834372917786338e-07, "loss": 0.0487, "step": 171410 }, { "epoch": 1.8315080933810566, "grad_norm": 0.17234714329242706, "learning_rate": 9.834330030542573e-07, "loss": 0.0338, "step": 171420 }, { "epoch": 1.8316149366953363, "grad_norm": 1.665966272354126, "learning_rate": 9.834287137840485e-07, "loss": 0.0281, "step": 171430 }, { "epoch": 1.831721780009616, "grad_norm": 1.2825794219970703, "learning_rate": 9.83424423968012e-07, "loss": 0.0218, "step": 171440 }, { "epoch": 1.8318286233238954, "grad_norm": 0.15223410725593567, "learning_rate": 9.834201336061531e-07, "loss": 0.0084, "step": 171450 }, { "epoch": 1.831935466638175, "grad_norm": 8.45838451385498, "learning_rate": 9.834158426984761e-07, "loss": 0.0697, "step": 171460 }, { "epoch": 1.8320423099524548, "grad_norm": 0.5745729207992554, "learning_rate": 9.834115512449863e-07, "loss": 0.0366, "step": 171470 }, { "epoch": 1.8321491532667342, "grad_norm": 1.797335147857666, "learning_rate": 9.834072592456884e-07, "loss": 0.0255, "step": 171480 }, { "epoch": 1.832255996581014, "grad_norm": 0.8997216820716858, "learning_rate": 9.834029667005872e-07, "loss": 0.0377, "step": 171490 }, { "epoch": 1.8323628398952936, "grad_norm": 2.3101606369018555, "learning_rate": 9.833986736096875e-07, "loss": 0.0145, "step": 171500 }, { "epoch": 1.832469683209573, "grad_norm": 4.762669563293457, "learning_rate": 9.833943799729942e-07, "loss": 0.0106, "step": 171510 }, { "epoch": 1.8325765265238527, "grad_norm": 0.29422813653945923, "learning_rate": 9.833900857905122e-07, "loss": 0.0469, "step": 171520 }, { "epoch": 1.8326833698381324, "grad_norm": 0.3939938545227051, "learning_rate": 9.833857910622463e-07, "loss": 0.0621, "step": 171530 }, { "epoch": 1.8327902131524119, "grad_norm": 4.806388854980469, "learning_rate": 9.833814957882015e-07, "loss": 0.0407, "step": 171540 }, { "epoch": 1.8328970564666915, "grad_norm": 6.289590835571289, "learning_rate": 9.833771999683824e-07, "loss": 0.0344, "step": 171550 }, { "epoch": 1.8330038997809712, "grad_norm": 2.969216823577881, "learning_rate": 9.83372903602794e-07, "loss": 0.0597, "step": 171560 }, { "epoch": 1.8331107430952507, "grad_norm": 0.13578839600086212, "learning_rate": 9.83368606691441e-07, "loss": 0.0483, "step": 171570 }, { "epoch": 1.8332175864095306, "grad_norm": 3.953429698944092, "learning_rate": 9.833643092343285e-07, "loss": 0.0143, "step": 171580 }, { "epoch": 1.83332442972381, "grad_norm": 0.08892740309238434, "learning_rate": 9.833600112314611e-07, "loss": 0.032, "step": 171590 }, { "epoch": 1.8334312730380895, "grad_norm": 6.6937994956970215, "learning_rate": 9.833557126828437e-07, "loss": 0.0644, "step": 171600 }, { "epoch": 1.8335381163523694, "grad_norm": 4.951778411865234, "learning_rate": 9.833514135884812e-07, "loss": 0.0415, "step": 171610 }, { "epoch": 1.8336449596666489, "grad_norm": 0.011390088126063347, "learning_rate": 9.833471139483788e-07, "loss": 0.0207, "step": 171620 }, { "epoch": 1.8337518029809283, "grad_norm": 1.4114115238189697, "learning_rate": 9.833428137625407e-07, "loss": 0.0522, "step": 171630 }, { "epoch": 1.8338586462952082, "grad_norm": 5.713343620300293, "learning_rate": 9.833385130309722e-07, "loss": 0.0307, "step": 171640 }, { "epoch": 1.8339654896094877, "grad_norm": 0.37733811140060425, "learning_rate": 9.83334211753678e-07, "loss": 0.0212, "step": 171650 }, { "epoch": 1.8340723329237671, "grad_norm": 0.19995251297950745, "learning_rate": 9.833299099306632e-07, "loss": 0.046, "step": 171660 }, { "epoch": 1.834179176238047, "grad_norm": 0.5074077248573303, "learning_rate": 9.833256075619322e-07, "loss": 0.0214, "step": 171670 }, { "epoch": 1.8342860195523265, "grad_norm": 5.470860004425049, "learning_rate": 9.833213046474902e-07, "loss": 0.0309, "step": 171680 }, { "epoch": 1.834392862866606, "grad_norm": 6.3232598304748535, "learning_rate": 9.83317001187342e-07, "loss": 0.0197, "step": 171690 }, { "epoch": 1.8344997061808859, "grad_norm": 0.15726950764656067, "learning_rate": 9.833126971814925e-07, "loss": 0.0277, "step": 171700 }, { "epoch": 1.8346065494951653, "grad_norm": 3.885019302368164, "learning_rate": 9.833083926299464e-07, "loss": 0.0715, "step": 171710 }, { "epoch": 1.8347133928094448, "grad_norm": 8.117432594299316, "learning_rate": 9.833040875327086e-07, "loss": 0.0913, "step": 171720 }, { "epoch": 1.8348202361237247, "grad_norm": 1.4020026922225952, "learning_rate": 9.83299781889784e-07, "loss": 0.0211, "step": 171730 }, { "epoch": 1.8349270794380041, "grad_norm": 0.24808725714683533, "learning_rate": 9.832954757011774e-07, "loss": 0.0081, "step": 171740 }, { "epoch": 1.8350339227522838, "grad_norm": 4.72951602935791, "learning_rate": 9.83291168966894e-07, "loss": 0.0586, "step": 171750 }, { "epoch": 1.8351407660665635, "grad_norm": 4.684790134429932, "learning_rate": 9.832868616869383e-07, "loss": 0.0321, "step": 171760 }, { "epoch": 1.835247609380843, "grad_norm": 0.10232339054346085, "learning_rate": 9.83282553861315e-07, "loss": 0.0125, "step": 171770 }, { "epoch": 1.8353544526951227, "grad_norm": 9.412155151367188, "learning_rate": 9.832782454900295e-07, "loss": 0.0417, "step": 171780 }, { "epoch": 1.8354612960094023, "grad_norm": 3.4492311477661133, "learning_rate": 9.832739365730863e-07, "loss": 0.0203, "step": 171790 }, { "epoch": 1.8355681393236818, "grad_norm": 1.5743409395217896, "learning_rate": 9.832696271104904e-07, "loss": 0.02, "step": 171800 }, { "epoch": 1.8356749826379615, "grad_norm": 0.6912767291069031, "learning_rate": 9.832653171022467e-07, "loss": 0.026, "step": 171810 }, { "epoch": 1.8357818259522412, "grad_norm": 0.0020936341024935246, "learning_rate": 9.832610065483598e-07, "loss": 0.0225, "step": 171820 }, { "epoch": 1.8358886692665206, "grad_norm": 13.292747497558594, "learning_rate": 9.832566954488346e-07, "loss": 0.0674, "step": 171830 }, { "epoch": 1.8359955125808003, "grad_norm": 0.5332393646240234, "learning_rate": 9.832523838036764e-07, "loss": 0.0105, "step": 171840 }, { "epoch": 1.83610235589508, "grad_norm": 0.7965389490127563, "learning_rate": 9.832480716128897e-07, "loss": 0.0503, "step": 171850 }, { "epoch": 1.8362091992093594, "grad_norm": 0.3378235101699829, "learning_rate": 9.832437588764795e-07, "loss": 0.0498, "step": 171860 }, { "epoch": 1.8363160425236391, "grad_norm": 3.0672194957733154, "learning_rate": 9.832394455944507e-07, "loss": 0.0075, "step": 171870 }, { "epoch": 1.8364228858379188, "grad_norm": 4.2318434715271, "learning_rate": 9.83235131766808e-07, "loss": 0.0866, "step": 171880 }, { "epoch": 1.8365297291521983, "grad_norm": 6.6891608238220215, "learning_rate": 9.832308173935563e-07, "loss": 0.0531, "step": 171890 }, { "epoch": 1.836636572466478, "grad_norm": 0.04961833730340004, "learning_rate": 9.832265024747007e-07, "loss": 0.0312, "step": 171900 }, { "epoch": 1.8367434157807576, "grad_norm": 1.4411972761154175, "learning_rate": 9.832221870102458e-07, "loss": 0.038, "step": 171910 }, { "epoch": 1.836850259095037, "grad_norm": 4.297513484954834, "learning_rate": 9.832178710001964e-07, "loss": 0.0329, "step": 171920 }, { "epoch": 1.8369571024093168, "grad_norm": 1.6509102582931519, "learning_rate": 9.832135544445578e-07, "loss": 0.0352, "step": 171930 }, { "epoch": 1.8370639457235964, "grad_norm": 0.2064920961856842, "learning_rate": 9.832092373433345e-07, "loss": 0.0344, "step": 171940 }, { "epoch": 1.837170789037876, "grad_norm": 5.597133159637451, "learning_rate": 9.832049196965316e-07, "loss": 0.0044, "step": 171950 }, { "epoch": 1.8372776323521556, "grad_norm": 6.814153671264648, "learning_rate": 9.83200601504154e-07, "loss": 0.0708, "step": 171960 }, { "epoch": 1.8373844756664353, "grad_norm": 3.6531083583831787, "learning_rate": 9.831962827662061e-07, "loss": 0.0302, "step": 171970 }, { "epoch": 1.8374913189807147, "grad_norm": 0.02275528386235237, "learning_rate": 9.831919634826933e-07, "loss": 0.0192, "step": 171980 }, { "epoch": 1.8375981622949944, "grad_norm": 0.8936533331871033, "learning_rate": 9.831876436536203e-07, "loss": 0.004, "step": 171990 }, { "epoch": 1.837705005609274, "grad_norm": 1.414120078086853, "learning_rate": 9.83183323278992e-07, "loss": 0.0123, "step": 172000 }, { "epoch": 1.8378118489235535, "grad_norm": 0.07661603391170502, "learning_rate": 9.831790023588133e-07, "loss": 0.0276, "step": 172010 }, { "epoch": 1.8379186922378332, "grad_norm": 3.900702476501465, "learning_rate": 9.83174680893089e-07, "loss": 0.0601, "step": 172020 }, { "epoch": 1.838025535552113, "grad_norm": 0.5220885276794434, "learning_rate": 9.83170358881824e-07, "loss": 0.0482, "step": 172030 }, { "epoch": 1.8381323788663924, "grad_norm": 3.894057273864746, "learning_rate": 9.831660363250233e-07, "loss": 0.0195, "step": 172040 }, { "epoch": 1.838239222180672, "grad_norm": 0.028750039637088776, "learning_rate": 9.831617132226914e-07, "loss": 0.037, "step": 172050 }, { "epoch": 1.8383460654949517, "grad_norm": 2.276831865310669, "learning_rate": 9.831573895748337e-07, "loss": 0.0134, "step": 172060 }, { "epoch": 1.8384529088092312, "grad_norm": 2.494372606277466, "learning_rate": 9.83153065381455e-07, "loss": 0.0243, "step": 172070 }, { "epoch": 1.8385597521235109, "grad_norm": 0.06904450058937073, "learning_rate": 9.831487406425598e-07, "loss": 0.0173, "step": 172080 }, { "epoch": 1.8386665954377905, "grad_norm": 2.352994918823242, "learning_rate": 9.83144415358153e-07, "loss": 0.017, "step": 172090 }, { "epoch": 1.83877343875207, "grad_norm": 0.05912068858742714, "learning_rate": 9.8314008952824e-07, "loss": 0.0106, "step": 172100 }, { "epoch": 1.8388802820663497, "grad_norm": 10.326264381408691, "learning_rate": 9.831357631528255e-07, "loss": 0.0477, "step": 172110 }, { "epoch": 1.8389871253806294, "grad_norm": 2.2841339111328125, "learning_rate": 9.831314362319139e-07, "loss": 0.0197, "step": 172120 }, { "epoch": 1.8390939686949088, "grad_norm": 3.704134464263916, "learning_rate": 9.831271087655107e-07, "loss": 0.0269, "step": 172130 }, { "epoch": 1.8392008120091885, "grad_norm": 3.147883653640747, "learning_rate": 9.831227807536205e-07, "loss": 0.0488, "step": 172140 }, { "epoch": 1.8393076553234682, "grad_norm": 3.877357006072998, "learning_rate": 9.83118452196248e-07, "loss": 0.0948, "step": 172150 }, { "epoch": 1.8394144986377476, "grad_norm": 0.01225520484149456, "learning_rate": 9.831141230933986e-07, "loss": 0.039, "step": 172160 }, { "epoch": 1.8395213419520273, "grad_norm": 0.4658466577529907, "learning_rate": 9.831097934450766e-07, "loss": 0.0226, "step": 172170 }, { "epoch": 1.839628185266307, "grad_norm": 5.034867763519287, "learning_rate": 9.831054632512873e-07, "loss": 0.0317, "step": 172180 }, { "epoch": 1.8397350285805865, "grad_norm": 2.1654775142669678, "learning_rate": 9.831011325120355e-07, "loss": 0.038, "step": 172190 }, { "epoch": 1.8398418718948661, "grad_norm": 1.9326599836349487, "learning_rate": 9.830968012273262e-07, "loss": 0.0256, "step": 172200 }, { "epoch": 1.8399487152091458, "grad_norm": 0.001418803003616631, "learning_rate": 9.830924693971639e-07, "loss": 0.0223, "step": 172210 }, { "epoch": 1.8400555585234253, "grad_norm": 0.11357982456684113, "learning_rate": 9.830881370215538e-07, "loss": 0.0495, "step": 172220 }, { "epoch": 1.840162401837705, "grad_norm": 2.9227259159088135, "learning_rate": 9.830838041005007e-07, "loss": 0.0255, "step": 172230 }, { "epoch": 1.8402692451519846, "grad_norm": 0.14247916638851166, "learning_rate": 9.830794706340097e-07, "loss": 0.0032, "step": 172240 }, { "epoch": 1.840376088466264, "grad_norm": 0.045327458530664444, "learning_rate": 9.830751366220855e-07, "loss": 0.0314, "step": 172250 }, { "epoch": 1.8404829317805438, "grad_norm": 3.1708028316497803, "learning_rate": 9.83070802064733e-07, "loss": 0.0226, "step": 172260 }, { "epoch": 1.8405897750948235, "grad_norm": 1.669925570487976, "learning_rate": 9.83066466961957e-07, "loss": 0.0173, "step": 172270 }, { "epoch": 1.840696618409103, "grad_norm": 8.862749099731445, "learning_rate": 9.830621313137625e-07, "loss": 0.038, "step": 172280 }, { "epoch": 1.8408034617233826, "grad_norm": 10.250372886657715, "learning_rate": 9.830577951201544e-07, "loss": 0.0123, "step": 172290 }, { "epoch": 1.8409103050376623, "grad_norm": 3.415489435195923, "learning_rate": 9.830534583811376e-07, "loss": 0.0484, "step": 172300 }, { "epoch": 1.8410171483519417, "grad_norm": 0.8227600455284119, "learning_rate": 9.830491210967171e-07, "loss": 0.0309, "step": 172310 }, { "epoch": 1.8411239916662216, "grad_norm": 7.688103675842285, "learning_rate": 9.830447832668976e-07, "loss": 0.0527, "step": 172320 }, { "epoch": 1.841230834980501, "grad_norm": 0.09557495266199112, "learning_rate": 9.83040444891684e-07, "loss": 0.0314, "step": 172330 }, { "epoch": 1.8413376782947806, "grad_norm": 7.906465530395508, "learning_rate": 9.830361059710815e-07, "loss": 0.0587, "step": 172340 }, { "epoch": 1.8414445216090605, "grad_norm": 3.221216917037964, "learning_rate": 9.830317665050946e-07, "loss": 0.0676, "step": 172350 }, { "epoch": 1.84155136492334, "grad_norm": 0.33839109539985657, "learning_rate": 9.830274264937284e-07, "loss": 0.0173, "step": 172360 }, { "epoch": 1.8416582082376194, "grad_norm": 4.151824474334717, "learning_rate": 9.830230859369878e-07, "loss": 0.0625, "step": 172370 }, { "epoch": 1.8417650515518993, "grad_norm": 4.210214138031006, "learning_rate": 9.830187448348778e-07, "loss": 0.0494, "step": 172380 }, { "epoch": 1.8418718948661787, "grad_norm": 3.4576597213745117, "learning_rate": 9.830144031874029e-07, "loss": 0.0312, "step": 172390 }, { "epoch": 1.8419787381804582, "grad_norm": 1.8754329681396484, "learning_rate": 9.830100609945685e-07, "loss": 0.0439, "step": 172400 }, { "epoch": 1.8420855814947381, "grad_norm": 0.8440704345703125, "learning_rate": 9.830057182563792e-07, "loss": 0.0388, "step": 172410 }, { "epoch": 1.8421924248090176, "grad_norm": 1.2899513244628906, "learning_rate": 9.8300137497284e-07, "loss": 0.0193, "step": 172420 }, { "epoch": 1.842299268123297, "grad_norm": 0.033579714596271515, "learning_rate": 9.829970311439559e-07, "loss": 0.0058, "step": 172430 }, { "epoch": 1.842406111437577, "grad_norm": 2.4782021045684814, "learning_rate": 9.829926867697315e-07, "loss": 0.0232, "step": 172440 }, { "epoch": 1.8425129547518564, "grad_norm": 0.057403046637773514, "learning_rate": 9.829883418501722e-07, "loss": 0.014, "step": 172450 }, { "epoch": 1.8426197980661358, "grad_norm": 2.4579780101776123, "learning_rate": 9.829839963852824e-07, "loss": 0.0163, "step": 172460 }, { "epoch": 1.8427266413804158, "grad_norm": 0.44297975301742554, "learning_rate": 9.82979650375067e-07, "loss": 0.0303, "step": 172470 }, { "epoch": 1.8428334846946952, "grad_norm": 0.6703762412071228, "learning_rate": 9.829753038195317e-07, "loss": 0.0121, "step": 172480 }, { "epoch": 1.842940328008975, "grad_norm": 4.30666971206665, "learning_rate": 9.829709567186804e-07, "loss": 0.0414, "step": 172490 }, { "epoch": 1.8430471713232546, "grad_norm": 0.05019277334213257, "learning_rate": 9.829666090725186e-07, "loss": 0.0539, "step": 172500 }, { "epoch": 1.843154014637534, "grad_norm": 2.134256362915039, "learning_rate": 9.82962260881051e-07, "loss": 0.0107, "step": 172510 }, { "epoch": 1.8432608579518137, "grad_norm": 0.16568352282047272, "learning_rate": 9.829579121442825e-07, "loss": 0.0239, "step": 172520 }, { "epoch": 1.8433677012660934, "grad_norm": 10.562638282775879, "learning_rate": 9.829535628622182e-07, "loss": 0.0395, "step": 172530 }, { "epoch": 1.8434745445803729, "grad_norm": 0.9060512781143188, "learning_rate": 9.829492130348627e-07, "loss": 0.022, "step": 172540 }, { "epoch": 1.8435813878946525, "grad_norm": 0.07159742712974548, "learning_rate": 9.829448626622212e-07, "loss": 0.0104, "step": 172550 }, { "epoch": 1.8436882312089322, "grad_norm": 0.048513177782297134, "learning_rate": 9.829405117442985e-07, "loss": 0.0172, "step": 172560 }, { "epoch": 1.8437950745232117, "grad_norm": 0.10606643557548523, "learning_rate": 9.829361602810994e-07, "loss": 0.0065, "step": 172570 }, { "epoch": 1.8439019178374914, "grad_norm": 1.2613422870635986, "learning_rate": 9.82931808272629e-07, "loss": 0.0388, "step": 172580 }, { "epoch": 1.844008761151771, "grad_norm": 3.994077682495117, "learning_rate": 9.829274557188922e-07, "loss": 0.0152, "step": 172590 }, { "epoch": 1.8441156044660505, "grad_norm": 6.117441654205322, "learning_rate": 9.829231026198937e-07, "loss": 0.0223, "step": 172600 }, { "epoch": 1.8442224477803302, "grad_norm": 12.334785461425781, "learning_rate": 9.829187489756388e-07, "loss": 0.0484, "step": 172610 }, { "epoch": 1.8443292910946099, "grad_norm": 5.305427551269531, "learning_rate": 9.82914394786132e-07, "loss": 0.0313, "step": 172620 }, { "epoch": 1.8444361344088893, "grad_norm": 0.038166772574186325, "learning_rate": 9.829100400513786e-07, "loss": 0.0134, "step": 172630 }, { "epoch": 1.844542977723169, "grad_norm": 1.600043535232544, "learning_rate": 9.82905684771383e-07, "loss": 0.0686, "step": 172640 }, { "epoch": 1.8446498210374487, "grad_norm": 4.054445743560791, "learning_rate": 9.829013289461507e-07, "loss": 0.0173, "step": 172650 }, { "epoch": 1.8447566643517281, "grad_norm": 14.798904418945312, "learning_rate": 9.828969725756863e-07, "loss": 0.0744, "step": 172660 }, { "epoch": 1.8448635076660078, "grad_norm": 2.0514564514160156, "learning_rate": 9.828926156599947e-07, "loss": 0.0098, "step": 172670 }, { "epoch": 1.8449703509802875, "grad_norm": 1.2584161758422852, "learning_rate": 9.82888258199081e-07, "loss": 0.0279, "step": 172680 }, { "epoch": 1.845077194294567, "grad_norm": 0.07076258212327957, "learning_rate": 9.828839001929499e-07, "loss": 0.0031, "step": 172690 }, { "epoch": 1.8451840376088466, "grad_norm": 1.6841537952423096, "learning_rate": 9.828795416416064e-07, "loss": 0.0562, "step": 172700 }, { "epoch": 1.8452908809231263, "grad_norm": 2.1968040466308594, "learning_rate": 9.828751825450557e-07, "loss": 0.0257, "step": 172710 }, { "epoch": 1.8453977242374058, "grad_norm": 4.854492664337158, "learning_rate": 9.828708229033022e-07, "loss": 0.0717, "step": 172720 }, { "epoch": 1.8455045675516855, "grad_norm": 0.3225279450416565, "learning_rate": 9.828664627163513e-07, "loss": 0.0367, "step": 172730 }, { "epoch": 1.8456114108659651, "grad_norm": 4.77328634262085, "learning_rate": 9.828621019842076e-07, "loss": 0.0249, "step": 172740 }, { "epoch": 1.8457182541802446, "grad_norm": 0.04886814206838608, "learning_rate": 9.82857740706876e-07, "loss": 0.0098, "step": 172750 }, { "epoch": 1.8458250974945243, "grad_norm": 0.19540101289749146, "learning_rate": 9.82853378884362e-07, "loss": 0.06, "step": 172760 }, { "epoch": 1.845931940808804, "grad_norm": 2.825345039367676, "learning_rate": 9.828490165166696e-07, "loss": 0.0165, "step": 172770 }, { "epoch": 1.8460387841230834, "grad_norm": 3.2500572204589844, "learning_rate": 9.828446536038045e-07, "loss": 0.0266, "step": 172780 }, { "epoch": 1.846145627437363, "grad_norm": 6.155917644500732, "learning_rate": 9.828402901457714e-07, "loss": 0.023, "step": 172790 }, { "epoch": 1.8462524707516428, "grad_norm": 0.30358126759529114, "learning_rate": 9.82835926142575e-07, "loss": 0.0149, "step": 172800 }, { "epoch": 1.8463593140659222, "grad_norm": 2.716787338256836, "learning_rate": 9.828315615942205e-07, "loss": 0.0417, "step": 172810 }, { "epoch": 1.846466157380202, "grad_norm": 1.0155595541000366, "learning_rate": 9.828271965007126e-07, "loss": 0.0235, "step": 172820 }, { "epoch": 1.8465730006944816, "grad_norm": 2.761063575744629, "learning_rate": 9.828228308620565e-07, "loss": 0.0268, "step": 172830 }, { "epoch": 1.846679844008761, "grad_norm": 0.07854403555393219, "learning_rate": 9.82818464678257e-07, "loss": 0.0098, "step": 172840 }, { "epoch": 1.8467866873230407, "grad_norm": 0.5934261083602905, "learning_rate": 9.828140979493189e-07, "loss": 0.017, "step": 172850 }, { "epoch": 1.8468935306373204, "grad_norm": 2.3579986095428467, "learning_rate": 9.828097306752471e-07, "loss": 0.0367, "step": 172860 }, { "epoch": 1.8470003739515999, "grad_norm": 16.48693084716797, "learning_rate": 9.82805362856047e-07, "loss": 0.0236, "step": 172870 }, { "epoch": 1.8471072172658796, "grad_norm": 2.1658525466918945, "learning_rate": 9.82800994491723e-07, "loss": 0.0345, "step": 172880 }, { "epoch": 1.8472140605801592, "grad_norm": 0.05585578829050064, "learning_rate": 9.827966255822804e-07, "loss": 0.026, "step": 172890 }, { "epoch": 1.8473209038944387, "grad_norm": 0.0733107402920723, "learning_rate": 9.827922561277237e-07, "loss": 0.0358, "step": 172900 }, { "epoch": 1.8474277472087184, "grad_norm": 3.422795295715332, "learning_rate": 9.827878861280583e-07, "loss": 0.0269, "step": 172910 }, { "epoch": 1.847534590522998, "grad_norm": 1.5784274339675903, "learning_rate": 9.82783515583289e-07, "loss": 0.0068, "step": 172920 }, { "epoch": 1.8476414338372775, "grad_norm": 0.311010479927063, "learning_rate": 9.827791444934205e-07, "loss": 0.0114, "step": 172930 }, { "epoch": 1.8477482771515572, "grad_norm": 2.223755359649658, "learning_rate": 9.82774772858458e-07, "loss": 0.0193, "step": 172940 }, { "epoch": 1.8478551204658369, "grad_norm": 5.272189617156982, "learning_rate": 9.827704006784064e-07, "loss": 0.0302, "step": 172950 }, { "epoch": 1.8479619637801163, "grad_norm": 1.2081760168075562, "learning_rate": 9.827660279532706e-07, "loss": 0.0356, "step": 172960 }, { "epoch": 1.848068807094396, "grad_norm": 7.041481971740723, "learning_rate": 9.827616546830554e-07, "loss": 0.0452, "step": 172970 }, { "epoch": 1.8481756504086757, "grad_norm": 3.4436051845550537, "learning_rate": 9.82757280867766e-07, "loss": 0.0211, "step": 172980 }, { "epoch": 1.8482824937229552, "grad_norm": 4.072836399078369, "learning_rate": 9.82752906507407e-07, "loss": 0.0686, "step": 172990 }, { "epoch": 1.8483893370372348, "grad_norm": 0.18536482751369476, "learning_rate": 9.827485316019837e-07, "loss": 0.0381, "step": 173000 }, { "epoch": 1.8484961803515145, "grad_norm": 3.58807635307312, "learning_rate": 9.827441561515005e-07, "loss": 0.0635, "step": 173010 }, { "epoch": 1.848603023665794, "grad_norm": 8.683340072631836, "learning_rate": 9.827397801559631e-07, "loss": 0.0186, "step": 173020 }, { "epoch": 1.8487098669800737, "grad_norm": 6.076000213623047, "learning_rate": 9.827354036153761e-07, "loss": 0.0123, "step": 173030 }, { "epoch": 1.8488167102943533, "grad_norm": 0.03386962041258812, "learning_rate": 9.827310265297442e-07, "loss": 0.0134, "step": 173040 }, { "epoch": 1.8489235536086328, "grad_norm": 0.0545664019882679, "learning_rate": 9.827266488990726e-07, "loss": 0.0108, "step": 173050 }, { "epoch": 1.8490303969229127, "grad_norm": 10.51634407043457, "learning_rate": 9.827222707233662e-07, "loss": 0.0573, "step": 173060 }, { "epoch": 1.8491372402371922, "grad_norm": 5.752058029174805, "learning_rate": 9.8271789200263e-07, "loss": 0.0539, "step": 173070 }, { "epoch": 1.8492440835514716, "grad_norm": 2.163316488265991, "learning_rate": 9.827135127368687e-07, "loss": 0.0135, "step": 173080 }, { "epoch": 1.8493509268657515, "grad_norm": 0.8460834622383118, "learning_rate": 9.827091329260874e-07, "loss": 0.0362, "step": 173090 }, { "epoch": 1.849457770180031, "grad_norm": 1.9320465326309204, "learning_rate": 9.827047525702912e-07, "loss": 0.0691, "step": 173100 }, { "epoch": 1.8495646134943104, "grad_norm": 5.032489776611328, "learning_rate": 9.827003716694848e-07, "loss": 0.0258, "step": 173110 }, { "epoch": 1.8496714568085904, "grad_norm": 3.3051869869232178, "learning_rate": 9.826959902236732e-07, "loss": 0.0254, "step": 173120 }, { "epoch": 1.8497783001228698, "grad_norm": 0.6296768188476562, "learning_rate": 9.826916082328616e-07, "loss": 0.017, "step": 173130 }, { "epoch": 1.8498851434371493, "grad_norm": 4.595287799835205, "learning_rate": 9.826872256970546e-07, "loss": 0.0833, "step": 173140 }, { "epoch": 1.8499919867514292, "grad_norm": 13.922276496887207, "learning_rate": 9.826828426162575e-07, "loss": 0.0524, "step": 173150 }, { "epoch": 1.8500988300657086, "grad_norm": 0.41243186593055725, "learning_rate": 9.82678458990475e-07, "loss": 0.146, "step": 173160 }, { "epoch": 1.850205673379988, "grad_norm": 6.313485622406006, "learning_rate": 9.826740748197118e-07, "loss": 0.0347, "step": 173170 }, { "epoch": 1.850312516694268, "grad_norm": 1.0491472482681274, "learning_rate": 9.826696901039732e-07, "loss": 0.0284, "step": 173180 }, { "epoch": 1.8504193600085475, "grad_norm": 0.03314461559057236, "learning_rate": 9.826653048432643e-07, "loss": 0.0096, "step": 173190 }, { "epoch": 1.850526203322827, "grad_norm": 0.9023366570472717, "learning_rate": 9.826609190375896e-07, "loss": 0.0434, "step": 173200 }, { "epoch": 1.8506330466371068, "grad_norm": 5.274448871612549, "learning_rate": 9.826565326869545e-07, "loss": 0.0471, "step": 173210 }, { "epoch": 1.8507398899513863, "grad_norm": 1.1602674722671509, "learning_rate": 9.826521457913637e-07, "loss": 0.0668, "step": 173220 }, { "epoch": 1.850846733265666, "grad_norm": 2.1982085704803467, "learning_rate": 9.826477583508221e-07, "loss": 0.0066, "step": 173230 }, { "epoch": 1.8509535765799456, "grad_norm": 0.8270171880722046, "learning_rate": 9.826433703653347e-07, "loss": 0.0386, "step": 173240 }, { "epoch": 1.851060419894225, "grad_norm": 7.382072448730469, "learning_rate": 9.826389818349068e-07, "loss": 0.045, "step": 173250 }, { "epoch": 1.8511672632085048, "grad_norm": 0.037362053990364075, "learning_rate": 9.82634592759543e-07, "loss": 0.1093, "step": 173260 }, { "epoch": 1.8512741065227845, "grad_norm": 1.1624127626419067, "learning_rate": 9.82630203139248e-07, "loss": 0.0434, "step": 173270 }, { "epoch": 1.851380949837064, "grad_norm": 5.283286094665527, "learning_rate": 9.826258129740275e-07, "loss": 0.0323, "step": 173280 }, { "epoch": 1.8514877931513436, "grad_norm": 0.021958565339446068, "learning_rate": 9.826214222638858e-07, "loss": 0.0173, "step": 173290 }, { "epoch": 1.8515946364656233, "grad_norm": 1.8632549047470093, "learning_rate": 9.82617031008828e-07, "loss": 0.0222, "step": 173300 }, { "epoch": 1.8517014797799027, "grad_norm": 0.13371017575263977, "learning_rate": 9.826126392088592e-07, "loss": 0.0255, "step": 173310 }, { "epoch": 1.8518083230941824, "grad_norm": 0.036988645792007446, "learning_rate": 9.826082468639844e-07, "loss": 0.0138, "step": 173320 }, { "epoch": 1.851915166408462, "grad_norm": 3.09045147895813, "learning_rate": 9.826038539742086e-07, "loss": 0.0288, "step": 173330 }, { "epoch": 1.8520220097227416, "grad_norm": 0.05811719223856926, "learning_rate": 9.825994605395363e-07, "loss": 0.0341, "step": 173340 }, { "epoch": 1.8521288530370212, "grad_norm": 4.732731342315674, "learning_rate": 9.825950665599729e-07, "loss": 0.0536, "step": 173350 }, { "epoch": 1.852235696351301, "grad_norm": 0.008532113395631313, "learning_rate": 9.825906720355235e-07, "loss": 0.013, "step": 173360 }, { "epoch": 1.8523425396655804, "grad_norm": 1.347779631614685, "learning_rate": 9.825862769661924e-07, "loss": 0.0174, "step": 173370 }, { "epoch": 1.85244938297986, "grad_norm": 0.7296475768089294, "learning_rate": 9.825818813519853e-07, "loss": 0.037, "step": 173380 }, { "epoch": 1.8525562262941397, "grad_norm": 3.2019026279449463, "learning_rate": 9.825774851929065e-07, "loss": 0.0511, "step": 173390 }, { "epoch": 1.8526630696084192, "grad_norm": 0.7037011384963989, "learning_rate": 9.825730884889615e-07, "loss": 0.0141, "step": 173400 }, { "epoch": 1.8527699129226989, "grad_norm": 4.450938701629639, "learning_rate": 9.825686912401552e-07, "loss": 0.0275, "step": 173410 }, { "epoch": 1.8528767562369786, "grad_norm": 0.01566196419298649, "learning_rate": 9.825642934464923e-07, "loss": 0.0125, "step": 173420 }, { "epoch": 1.852983599551258, "grad_norm": 0.0726001039147377, "learning_rate": 9.825598951079779e-07, "loss": 0.036, "step": 173430 }, { "epoch": 1.8530904428655377, "grad_norm": 2.2568411827087402, "learning_rate": 9.825554962246169e-07, "loss": 0.0577, "step": 173440 }, { "epoch": 1.8531972861798174, "grad_norm": 0.27949339151382446, "learning_rate": 9.825510967964144e-07, "loss": 0.0142, "step": 173450 }, { "epoch": 1.8533041294940968, "grad_norm": 8.640754699707031, "learning_rate": 9.825466968233752e-07, "loss": 0.0333, "step": 173460 }, { "epoch": 1.8534109728083765, "grad_norm": 1.4543572664260864, "learning_rate": 9.825422963055043e-07, "loss": 0.0185, "step": 173470 }, { "epoch": 1.8535178161226562, "grad_norm": 1.5100505352020264, "learning_rate": 9.82537895242807e-07, "loss": 0.1207, "step": 173480 }, { "epoch": 1.8536246594369357, "grad_norm": 2.881659746170044, "learning_rate": 9.825334936352878e-07, "loss": 0.0226, "step": 173490 }, { "epoch": 1.8537315027512153, "grad_norm": 0.6146670579910278, "learning_rate": 9.82529091482952e-07, "loss": 0.0067, "step": 173500 }, { "epoch": 1.853838346065495, "grad_norm": 0.12331745028495789, "learning_rate": 9.825246887858042e-07, "loss": 0.0103, "step": 173510 }, { "epoch": 1.8539451893797745, "grad_norm": 3.210695743560791, "learning_rate": 9.8252028554385e-07, "loss": 0.0104, "step": 173520 }, { "epoch": 1.8540520326940542, "grad_norm": 15.126232147216797, "learning_rate": 9.825158817570935e-07, "loss": 0.0216, "step": 173530 }, { "epoch": 1.8541588760083338, "grad_norm": 1.9887586832046509, "learning_rate": 9.825114774255404e-07, "loss": 0.0338, "step": 173540 }, { "epoch": 1.8542657193226133, "grad_norm": 0.2888389825820923, "learning_rate": 9.825070725491954e-07, "loss": 0.0192, "step": 173550 }, { "epoch": 1.854372562636893, "grad_norm": 0.006533562205731869, "learning_rate": 9.825026671280637e-07, "loss": 0.0565, "step": 173560 }, { "epoch": 1.8544794059511727, "grad_norm": 5.660268783569336, "learning_rate": 9.824982611621497e-07, "loss": 0.0428, "step": 173570 }, { "epoch": 1.8545862492654521, "grad_norm": 1.9528820514678955, "learning_rate": 9.82493854651459e-07, "loss": 0.0571, "step": 173580 }, { "epoch": 1.8546930925797318, "grad_norm": 3.2754344940185547, "learning_rate": 9.824894475959962e-07, "loss": 0.026, "step": 173590 }, { "epoch": 1.8547999358940115, "grad_norm": 6.7394700050354, "learning_rate": 9.824850399957664e-07, "loss": 0.023, "step": 173600 }, { "epoch": 1.854906779208291, "grad_norm": 5.476855278015137, "learning_rate": 9.824806318507748e-07, "loss": 0.012, "step": 173610 }, { "epoch": 1.8550136225225706, "grad_norm": 4.028634548187256, "learning_rate": 9.824762231610258e-07, "loss": 0.0275, "step": 173620 }, { "epoch": 1.8551204658368503, "grad_norm": 0.04976677522063255, "learning_rate": 9.82471813926525e-07, "loss": 0.0138, "step": 173630 }, { "epoch": 1.8552273091511298, "grad_norm": 5.370367527008057, "learning_rate": 9.824674041472768e-07, "loss": 0.0676, "step": 173640 }, { "epoch": 1.8553341524654094, "grad_norm": 0.010552581399679184, "learning_rate": 9.824629938232868e-07, "loss": 0.0202, "step": 173650 }, { "epoch": 1.8554409957796891, "grad_norm": 0.019060464575886726, "learning_rate": 9.824585829545596e-07, "loss": 0.007, "step": 173660 }, { "epoch": 1.8555478390939686, "grad_norm": 4.390722751617432, "learning_rate": 9.824541715411003e-07, "loss": 0.0463, "step": 173670 }, { "epoch": 1.8556546824082483, "grad_norm": 3.300495147705078, "learning_rate": 9.824497595829138e-07, "loss": 0.0206, "step": 173680 }, { "epoch": 1.855761525722528, "grad_norm": 20.45982551574707, "learning_rate": 9.82445347080005e-07, "loss": 0.0297, "step": 173690 }, { "epoch": 1.8558683690368074, "grad_norm": 9.539164543151855, "learning_rate": 9.824409340323788e-07, "loss": 0.0251, "step": 173700 }, { "epoch": 1.855975212351087, "grad_norm": 8.921588897705078, "learning_rate": 9.824365204400408e-07, "loss": 0.0312, "step": 173710 }, { "epoch": 1.8560820556653668, "grad_norm": 0.056870587170124054, "learning_rate": 9.824321063029954e-07, "loss": 0.0455, "step": 173720 }, { "epoch": 1.8561888989796462, "grad_norm": 0.048659972846508026, "learning_rate": 9.824276916212477e-07, "loss": 0.0124, "step": 173730 }, { "epoch": 1.856295742293926, "grad_norm": 5.56091833114624, "learning_rate": 9.824232763948027e-07, "loss": 0.0195, "step": 173740 }, { "epoch": 1.8564025856082056, "grad_norm": 0.9901658892631531, "learning_rate": 9.824188606236656e-07, "loss": 0.0134, "step": 173750 }, { "epoch": 1.856509428922485, "grad_norm": 0.5856855511665344, "learning_rate": 9.82414444307841e-07, "loss": 0.0229, "step": 173760 }, { "epoch": 1.8566162722367647, "grad_norm": 3.205658197402954, "learning_rate": 9.824100274473342e-07, "loss": 0.0206, "step": 173770 }, { "epoch": 1.8567231155510444, "grad_norm": 0.18538877367973328, "learning_rate": 9.824056100421498e-07, "loss": 0.0471, "step": 173780 }, { "epoch": 1.8568299588653239, "grad_norm": 10.61111831665039, "learning_rate": 9.824011920922933e-07, "loss": 0.0337, "step": 173790 }, { "epoch": 1.8569368021796038, "grad_norm": 0.038934655487537384, "learning_rate": 9.823967735977693e-07, "loss": 0.0074, "step": 173800 }, { "epoch": 1.8570436454938832, "grad_norm": 0.8075199723243713, "learning_rate": 9.823923545585832e-07, "loss": 0.0645, "step": 173810 }, { "epoch": 1.8571504888081627, "grad_norm": 1.272326111793518, "learning_rate": 9.823879349747396e-07, "loss": 0.0171, "step": 173820 }, { "epoch": 1.8572573321224426, "grad_norm": 0.028304539620876312, "learning_rate": 9.823835148462435e-07, "loss": 0.0249, "step": 173830 }, { "epoch": 1.857364175436722, "grad_norm": 0.23155304789543152, "learning_rate": 9.823790941731e-07, "loss": 0.0526, "step": 173840 }, { "epoch": 1.8574710187510015, "grad_norm": 4.0038604736328125, "learning_rate": 9.823746729553144e-07, "loss": 0.0311, "step": 173850 }, { "epoch": 1.8575778620652814, "grad_norm": 0.09404120594263077, "learning_rate": 9.82370251192891e-07, "loss": 0.018, "step": 173860 }, { "epoch": 1.8576847053795609, "grad_norm": 2.5717663764953613, "learning_rate": 9.823658288858353e-07, "loss": 0.0295, "step": 173870 }, { "epoch": 1.8577915486938403, "grad_norm": 17.11353302001953, "learning_rate": 9.823614060341522e-07, "loss": 0.0482, "step": 173880 }, { "epoch": 1.8578983920081202, "grad_norm": 1.87349534034729, "learning_rate": 9.823569826378469e-07, "loss": 0.0054, "step": 173890 }, { "epoch": 1.8580052353223997, "grad_norm": 2.608211040496826, "learning_rate": 9.823525586969239e-07, "loss": 0.055, "step": 173900 }, { "epoch": 1.8581120786366792, "grad_norm": 9.113936424255371, "learning_rate": 9.823481342113885e-07, "loss": 0.0168, "step": 173910 }, { "epoch": 1.858218921950959, "grad_norm": 14.183242797851562, "learning_rate": 9.823437091812457e-07, "loss": 0.0282, "step": 173920 }, { "epoch": 1.8583257652652385, "grad_norm": 6.05784273147583, "learning_rate": 9.823392836065004e-07, "loss": 0.055, "step": 173930 }, { "epoch": 1.858432608579518, "grad_norm": 8.97983455657959, "learning_rate": 9.823348574871577e-07, "loss": 0.0326, "step": 173940 }, { "epoch": 1.8585394518937979, "grad_norm": 4.382772445678711, "learning_rate": 9.823304308232224e-07, "loss": 0.0463, "step": 173950 }, { "epoch": 1.8586462952080773, "grad_norm": 14.3714017868042, "learning_rate": 9.823260036146997e-07, "loss": 0.0526, "step": 173960 }, { "epoch": 1.858753138522357, "grad_norm": 1.7536320686340332, "learning_rate": 9.823215758615946e-07, "loss": 0.0088, "step": 173970 }, { "epoch": 1.8588599818366367, "grad_norm": 2.5667686462402344, "learning_rate": 9.823171475639121e-07, "loss": 0.0331, "step": 173980 }, { "epoch": 1.8589668251509162, "grad_norm": 4.365671157836914, "learning_rate": 9.82312718721657e-07, "loss": 0.0303, "step": 173990 }, { "epoch": 1.8590736684651958, "grad_norm": 4.028085708618164, "learning_rate": 9.823082893348345e-07, "loss": 0.0864, "step": 174000 }, { "epoch": 1.8591805117794755, "grad_norm": 3.99949312210083, "learning_rate": 9.823038594034494e-07, "loss": 0.0552, "step": 174010 }, { "epoch": 1.859287355093755, "grad_norm": 1.127918004989624, "learning_rate": 9.822994289275072e-07, "loss": 0.0038, "step": 174020 }, { "epoch": 1.8593941984080347, "grad_norm": 0.22465281188488007, "learning_rate": 9.822949979070123e-07, "loss": 0.0594, "step": 174030 }, { "epoch": 1.8595010417223143, "grad_norm": 0.403584361076355, "learning_rate": 9.822905663419698e-07, "loss": 0.0166, "step": 174040 }, { "epoch": 1.8596078850365938, "grad_norm": 0.04211905598640442, "learning_rate": 9.82286134232385e-07, "loss": 0.0506, "step": 174050 }, { "epoch": 1.8597147283508735, "grad_norm": 0.39847955107688904, "learning_rate": 9.822817015782627e-07, "loss": 0.0994, "step": 174060 }, { "epoch": 1.8598215716651532, "grad_norm": 2.09434175491333, "learning_rate": 9.822772683796082e-07, "loss": 0.0117, "step": 174070 }, { "epoch": 1.8599284149794326, "grad_norm": 1.857656478881836, "learning_rate": 9.822728346364259e-07, "loss": 0.0157, "step": 174080 }, { "epoch": 1.8600352582937123, "grad_norm": 0.17355360090732574, "learning_rate": 9.822684003487216e-07, "loss": 0.0274, "step": 174090 }, { "epoch": 1.860142101607992, "grad_norm": 0.21258817613124847, "learning_rate": 9.822639655164995e-07, "loss": 0.0036, "step": 174100 }, { "epoch": 1.8602489449222714, "grad_norm": 9.135049819946289, "learning_rate": 9.822595301397651e-07, "loss": 0.0706, "step": 174110 }, { "epoch": 1.8603557882365511, "grad_norm": 1.573228359222412, "learning_rate": 9.822550942185233e-07, "loss": 0.0468, "step": 174120 }, { "epoch": 1.8604626315508308, "grad_norm": 7.743505477905273, "learning_rate": 9.82250657752779e-07, "loss": 0.0477, "step": 174130 }, { "epoch": 1.8605694748651103, "grad_norm": 11.02440071105957, "learning_rate": 9.822462207425375e-07, "loss": 0.0236, "step": 174140 }, { "epoch": 1.86067631817939, "grad_norm": 0.0037699085660278797, "learning_rate": 9.822417831878037e-07, "loss": 0.0212, "step": 174150 }, { "epoch": 1.8607831614936696, "grad_norm": 2.07460355758667, "learning_rate": 9.822373450885824e-07, "loss": 0.0752, "step": 174160 }, { "epoch": 1.860890004807949, "grad_norm": 6.588869571685791, "learning_rate": 9.822329064448787e-07, "loss": 0.044, "step": 174170 }, { "epoch": 1.8609968481222288, "grad_norm": 7.536437511444092, "learning_rate": 9.822284672566977e-07, "loss": 0.0362, "step": 174180 }, { "epoch": 1.8611036914365084, "grad_norm": 1.0740704536437988, "learning_rate": 9.822240275240444e-07, "loss": 0.0485, "step": 174190 }, { "epoch": 1.861210534750788, "grad_norm": 1.8368299007415771, "learning_rate": 9.822195872469238e-07, "loss": 0.0151, "step": 174200 }, { "epoch": 1.8613173780650676, "grad_norm": 3.4550185203552246, "learning_rate": 9.822151464253405e-07, "loss": 0.0285, "step": 174210 }, { "epoch": 1.8614242213793473, "grad_norm": 0.014697899110615253, "learning_rate": 9.822107050593002e-07, "loss": 0.0139, "step": 174220 }, { "epoch": 1.8615310646936267, "grad_norm": 0.03502199798822403, "learning_rate": 9.822062631488076e-07, "loss": 0.0098, "step": 174230 }, { "epoch": 1.8616379080079064, "grad_norm": 0.7481607794761658, "learning_rate": 9.82201820693868e-07, "loss": 0.0103, "step": 174240 }, { "epoch": 1.861744751322186, "grad_norm": 1.1105365753173828, "learning_rate": 9.82197377694486e-07, "loss": 0.0269, "step": 174250 }, { "epoch": 1.8618515946364655, "grad_norm": 3.4535224437713623, "learning_rate": 9.821929341506665e-07, "loss": 0.0777, "step": 174260 }, { "epoch": 1.8619584379507452, "grad_norm": 4.627940654754639, "learning_rate": 9.82188490062415e-07, "loss": 0.0148, "step": 174270 }, { "epoch": 1.862065281265025, "grad_norm": 0.047767747193574905, "learning_rate": 9.821840454297364e-07, "loss": 0.0128, "step": 174280 }, { "epoch": 1.8621721245793044, "grad_norm": 0.11277300864458084, "learning_rate": 9.821796002526354e-07, "loss": 0.0061, "step": 174290 }, { "epoch": 1.862278967893584, "grad_norm": 2.516625165939331, "learning_rate": 9.821751545311172e-07, "loss": 0.0183, "step": 174300 }, { "epoch": 1.8623858112078637, "grad_norm": 0.7919655442237854, "learning_rate": 9.82170708265187e-07, "loss": 0.0678, "step": 174310 }, { "epoch": 1.8624926545221432, "grad_norm": 0.6779739856719971, "learning_rate": 9.821662614548497e-07, "loss": 0.0411, "step": 174320 }, { "epoch": 1.8625994978364229, "grad_norm": 1.6094423532485962, "learning_rate": 9.821618141001103e-07, "loss": 0.0114, "step": 174330 }, { "epoch": 1.8627063411507025, "grad_norm": 0.03649713099002838, "learning_rate": 9.821573662009738e-07, "loss": 0.0394, "step": 174340 }, { "epoch": 1.862813184464982, "grad_norm": 0.006693375762552023, "learning_rate": 9.821529177574453e-07, "loss": 0.0354, "step": 174350 }, { "epoch": 1.8629200277792617, "grad_norm": 0.1281118392944336, "learning_rate": 9.821484687695296e-07, "loss": 0.0041, "step": 174360 }, { "epoch": 1.8630268710935414, "grad_norm": 6.230199337005615, "learning_rate": 9.82144019237232e-07, "loss": 0.0317, "step": 174370 }, { "epoch": 1.8631337144078208, "grad_norm": 0.1245967298746109, "learning_rate": 9.821395691605574e-07, "loss": 0.0425, "step": 174380 }, { "epoch": 1.8632405577221005, "grad_norm": 1.2877856492996216, "learning_rate": 9.821351185395109e-07, "loss": 0.0312, "step": 174390 }, { "epoch": 1.8633474010363802, "grad_norm": 0.1414908915758133, "learning_rate": 9.821306673740975e-07, "loss": 0.0102, "step": 174400 }, { "epoch": 1.8634542443506596, "grad_norm": 0.012219903990626335, "learning_rate": 9.82126215664322e-07, "loss": 0.0157, "step": 174410 }, { "epoch": 1.8635610876649393, "grad_norm": 2.070650815963745, "learning_rate": 9.821217634101896e-07, "loss": 0.0441, "step": 174420 }, { "epoch": 1.863667930979219, "grad_norm": 0.8151681423187256, "learning_rate": 9.821173106117056e-07, "loss": 0.0328, "step": 174430 }, { "epoch": 1.8637747742934985, "grad_norm": 7.785265922546387, "learning_rate": 9.821128572688746e-07, "loss": 0.0143, "step": 174440 }, { "epoch": 1.8638816176077782, "grad_norm": 2.1923205852508545, "learning_rate": 9.821084033817018e-07, "loss": 0.0139, "step": 174450 }, { "epoch": 1.8639884609220578, "grad_norm": 15.006254196166992, "learning_rate": 9.821039489501923e-07, "loss": 0.0483, "step": 174460 }, { "epoch": 1.8640953042363373, "grad_norm": 11.147736549377441, "learning_rate": 9.82099493974351e-07, "loss": 0.0148, "step": 174470 }, { "epoch": 1.864202147550617, "grad_norm": 3.991793394088745, "learning_rate": 9.820950384541828e-07, "loss": 0.0479, "step": 174480 }, { "epoch": 1.8643089908648967, "grad_norm": 5.477798938751221, "learning_rate": 9.820905823896932e-07, "loss": 0.0332, "step": 174490 }, { "epoch": 1.8644158341791761, "grad_norm": 11.745344161987305, "learning_rate": 9.820861257808867e-07, "loss": 0.0844, "step": 174500 }, { "epoch": 1.8645226774934558, "grad_norm": 0.04956624656915665, "learning_rate": 9.820816686277688e-07, "loss": 0.0214, "step": 174510 }, { "epoch": 1.8646295208077355, "grad_norm": 0.2005598545074463, "learning_rate": 9.820772109303444e-07, "loss": 0.0279, "step": 174520 }, { "epoch": 1.864736364122015, "grad_norm": 0.036010902374982834, "learning_rate": 9.820727526886182e-07, "loss": 0.0287, "step": 174530 }, { "epoch": 1.8648432074362948, "grad_norm": 0.021952606737613678, "learning_rate": 9.820682939025955e-07, "loss": 0.0211, "step": 174540 }, { "epoch": 1.8649500507505743, "grad_norm": 5.1542463302612305, "learning_rate": 9.820638345722813e-07, "loss": 0.0285, "step": 174550 }, { "epoch": 1.8650568940648538, "grad_norm": 7.092970848083496, "learning_rate": 9.820593746976808e-07, "loss": 0.0197, "step": 174560 }, { "epoch": 1.8651637373791337, "grad_norm": 5.187750816345215, "learning_rate": 9.820549142787988e-07, "loss": 0.0193, "step": 174570 }, { "epoch": 1.8652705806934131, "grad_norm": 3.5926027297973633, "learning_rate": 9.820504533156403e-07, "loss": 0.011, "step": 174580 }, { "epoch": 1.8653774240076926, "grad_norm": 3.0409655570983887, "learning_rate": 9.820459918082106e-07, "loss": 0.0076, "step": 174590 }, { "epoch": 1.8654842673219725, "grad_norm": 3.9090383052825928, "learning_rate": 9.820415297565148e-07, "loss": 0.0096, "step": 174600 }, { "epoch": 1.865591110636252, "grad_norm": 0.05725059658288956, "learning_rate": 9.820370671605574e-07, "loss": 0.0069, "step": 174610 }, { "epoch": 1.8656979539505314, "grad_norm": 2.0739939212799072, "learning_rate": 9.82032604020344e-07, "loss": 0.0621, "step": 174620 }, { "epoch": 1.8658047972648113, "grad_norm": 9.461445808410645, "learning_rate": 9.820281403358792e-07, "loss": 0.051, "step": 174630 }, { "epoch": 1.8659116405790908, "grad_norm": 0.005656647030264139, "learning_rate": 9.820236761071683e-07, "loss": 0.0281, "step": 174640 }, { "epoch": 1.8660184838933702, "grad_norm": 1.0443663597106934, "learning_rate": 9.820192113342163e-07, "loss": 0.0509, "step": 174650 }, { "epoch": 1.8661253272076501, "grad_norm": 0.06741712242364883, "learning_rate": 9.820147460170283e-07, "loss": 0.0332, "step": 174660 }, { "epoch": 1.8662321705219296, "grad_norm": 6.117839336395264, "learning_rate": 9.820102801556094e-07, "loss": 0.021, "step": 174670 }, { "epoch": 1.866339013836209, "grad_norm": 0.5740681290626526, "learning_rate": 9.820058137499643e-07, "loss": 0.0158, "step": 174680 }, { "epoch": 1.866445857150489, "grad_norm": 7.035109519958496, "learning_rate": 9.820013468000983e-07, "loss": 0.0236, "step": 174690 }, { "epoch": 1.8665527004647684, "grad_norm": 14.01602554321289, "learning_rate": 9.819968793060164e-07, "loss": 0.0286, "step": 174700 }, { "epoch": 1.866659543779048, "grad_norm": 1.0891597270965576, "learning_rate": 9.819924112677236e-07, "loss": 0.015, "step": 174710 }, { "epoch": 1.8667663870933278, "grad_norm": 0.0464642196893692, "learning_rate": 9.81987942685225e-07, "loss": 0.0448, "step": 174720 }, { "epoch": 1.8668732304076072, "grad_norm": 5.478842735290527, "learning_rate": 9.819834735585256e-07, "loss": 0.0507, "step": 174730 }, { "epoch": 1.866980073721887, "grad_norm": 0.01017955131828785, "learning_rate": 9.819790038876306e-07, "loss": 0.0149, "step": 174740 }, { "epoch": 1.8670869170361666, "grad_norm": 0.3403090834617615, "learning_rate": 9.81974533672545e-07, "loss": 0.0103, "step": 174750 }, { "epoch": 1.867193760350446, "grad_norm": 0.010542908683419228, "learning_rate": 9.819700629132736e-07, "loss": 0.0404, "step": 174760 }, { "epoch": 1.8673006036647257, "grad_norm": 6.385055065155029, "learning_rate": 9.819655916098217e-07, "loss": 0.0358, "step": 174770 }, { "epoch": 1.8674074469790054, "grad_norm": 0.9985068440437317, "learning_rate": 9.81961119762194e-07, "loss": 0.0259, "step": 174780 }, { "epoch": 1.8675142902932849, "grad_norm": 0.09387888014316559, "learning_rate": 9.819566473703961e-07, "loss": 0.0124, "step": 174790 }, { "epoch": 1.8676211336075645, "grad_norm": 0.0023777876049280167, "learning_rate": 9.819521744344328e-07, "loss": 0.0068, "step": 174800 }, { "epoch": 1.8677279769218442, "grad_norm": 0.06933977454900742, "learning_rate": 9.819477009543091e-07, "loss": 0.042, "step": 174810 }, { "epoch": 1.8678348202361237, "grad_norm": 0.9039247632026672, "learning_rate": 9.8194322693003e-07, "loss": 0.0254, "step": 174820 }, { "epoch": 1.8679416635504034, "grad_norm": 4.879906177520752, "learning_rate": 9.819387523616007e-07, "loss": 0.0747, "step": 174830 }, { "epoch": 1.868048506864683, "grad_norm": 3.651874303817749, "learning_rate": 9.819342772490262e-07, "loss": 0.0081, "step": 174840 }, { "epoch": 1.8681553501789625, "grad_norm": 13.836143493652344, "learning_rate": 9.819298015923114e-07, "loss": 0.0421, "step": 174850 }, { "epoch": 1.8682621934932422, "grad_norm": 6.192532539367676, "learning_rate": 9.819253253914617e-07, "loss": 0.035, "step": 174860 }, { "epoch": 1.8683690368075219, "grad_norm": 5.693475723266602, "learning_rate": 9.819208486464817e-07, "loss": 0.0531, "step": 174870 }, { "epoch": 1.8684758801218013, "grad_norm": 0.05499077960848808, "learning_rate": 9.819163713573768e-07, "loss": 0.062, "step": 174880 }, { "epoch": 1.868582723436081, "grad_norm": 2.504535675048828, "learning_rate": 9.819118935241517e-07, "loss": 0.013, "step": 174890 }, { "epoch": 1.8686895667503607, "grad_norm": 3.221646308898926, "learning_rate": 9.819074151468122e-07, "loss": 0.0427, "step": 174900 }, { "epoch": 1.8687964100646401, "grad_norm": 0.07489529252052307, "learning_rate": 9.819029362253624e-07, "loss": 0.0253, "step": 174910 }, { "epoch": 1.8689032533789198, "grad_norm": 3.0277419090270996, "learning_rate": 9.81898456759808e-07, "loss": 0.0312, "step": 174920 }, { "epoch": 1.8690100966931995, "grad_norm": 4.627371788024902, "learning_rate": 9.81893976750154e-07, "loss": 0.0203, "step": 174930 }, { "epoch": 1.869116940007479, "grad_norm": 0.15045082569122314, "learning_rate": 9.81889496196405e-07, "loss": 0.03, "step": 174940 }, { "epoch": 1.8692237833217586, "grad_norm": 2.3030524253845215, "learning_rate": 9.818850150985667e-07, "loss": 0.0687, "step": 174950 }, { "epoch": 1.8693306266360383, "grad_norm": 3.632582426071167, "learning_rate": 9.818805334566436e-07, "loss": 0.0326, "step": 174960 }, { "epoch": 1.8694374699503178, "grad_norm": 8.578777313232422, "learning_rate": 9.818760512706411e-07, "loss": 0.0367, "step": 174970 }, { "epoch": 1.8695443132645975, "grad_norm": 0.38723692297935486, "learning_rate": 9.818715685405642e-07, "loss": 0.0167, "step": 174980 }, { "epoch": 1.8696511565788771, "grad_norm": 0.012174793519079685, "learning_rate": 9.818670852664181e-07, "loss": 0.0268, "step": 174990 }, { "epoch": 1.8697579998931566, "grad_norm": 1.3663334846496582, "learning_rate": 9.818626014482075e-07, "loss": 0.0176, "step": 175000 }, { "epoch": 1.8698648432074363, "grad_norm": 0.06279708445072174, "learning_rate": 9.818581170859377e-07, "loss": 0.0125, "step": 175010 }, { "epoch": 1.869971686521716, "grad_norm": 1.7946261167526245, "learning_rate": 9.818536321796136e-07, "loss": 0.0312, "step": 175020 }, { "epoch": 1.8700785298359954, "grad_norm": 5.4676513671875, "learning_rate": 9.818491467292406e-07, "loss": 0.0122, "step": 175030 }, { "epoch": 1.870185373150275, "grad_norm": 0.030016141012310982, "learning_rate": 9.818446607348234e-07, "loss": 0.0026, "step": 175040 }, { "epoch": 1.8702922164645548, "grad_norm": 5.284265995025635, "learning_rate": 9.818401741963671e-07, "loss": 0.0481, "step": 175050 }, { "epoch": 1.8703990597788342, "grad_norm": 2.0667104721069336, "learning_rate": 9.81835687113877e-07, "loss": 0.0232, "step": 175060 }, { "epoch": 1.870505903093114, "grad_norm": 0.25222209095954895, "learning_rate": 9.818311994873583e-07, "loss": 0.0382, "step": 175070 }, { "epoch": 1.8706127464073936, "grad_norm": 11.500060081481934, "learning_rate": 9.818267113168154e-07, "loss": 0.0453, "step": 175080 }, { "epoch": 1.870719589721673, "grad_norm": 6.968142509460449, "learning_rate": 9.81822222602254e-07, "loss": 0.0978, "step": 175090 }, { "epoch": 1.8708264330359528, "grad_norm": 5.109288692474365, "learning_rate": 9.818177333436789e-07, "loss": 0.0625, "step": 175100 }, { "epoch": 1.8709332763502324, "grad_norm": 2.829162359237671, "learning_rate": 9.818132435410952e-07, "loss": 0.0311, "step": 175110 }, { "epoch": 1.871040119664512, "grad_norm": 3.9551947116851807, "learning_rate": 9.81808753194508e-07, "loss": 0.0121, "step": 175120 }, { "epoch": 1.8711469629787916, "grad_norm": 1.0468711853027344, "learning_rate": 9.818042623039223e-07, "loss": 0.0221, "step": 175130 }, { "epoch": 1.8712538062930713, "grad_norm": 0.03767670318484306, "learning_rate": 9.817997708693432e-07, "loss": 0.025, "step": 175140 }, { "epoch": 1.8713606496073507, "grad_norm": 0.03583919256925583, "learning_rate": 9.81795278890776e-07, "loss": 0.0291, "step": 175150 }, { "epoch": 1.8714674929216304, "grad_norm": 1.0337200164794922, "learning_rate": 9.817907863682252e-07, "loss": 0.019, "step": 175160 }, { "epoch": 1.87157433623591, "grad_norm": 2.3049447536468506, "learning_rate": 9.817862933016966e-07, "loss": 0.0229, "step": 175170 }, { "epoch": 1.8716811795501895, "grad_norm": 5.901774883270264, "learning_rate": 9.817817996911948e-07, "loss": 0.0516, "step": 175180 }, { "epoch": 1.8717880228644692, "grad_norm": 5.017549991607666, "learning_rate": 9.81777305536725e-07, "loss": 0.0419, "step": 175190 }, { "epoch": 1.871894866178749, "grad_norm": 0.770462691783905, "learning_rate": 9.817728108382923e-07, "loss": 0.0303, "step": 175200 }, { "epoch": 1.8720017094930284, "grad_norm": 11.53462028503418, "learning_rate": 9.817683155959017e-07, "loss": 0.032, "step": 175210 }, { "epoch": 1.872108552807308, "grad_norm": 0.5539877414703369, "learning_rate": 9.81763819809558e-07, "loss": 0.01, "step": 175220 }, { "epoch": 1.8722153961215877, "grad_norm": 0.1269434094429016, "learning_rate": 9.817593234792671e-07, "loss": 0.0269, "step": 175230 }, { "epoch": 1.8723222394358672, "grad_norm": 1.86097252368927, "learning_rate": 9.817548266050332e-07, "loss": 0.0564, "step": 175240 }, { "epoch": 1.8724290827501469, "grad_norm": 0.5169482827186584, "learning_rate": 9.817503291868618e-07, "loss": 0.0495, "step": 175250 }, { "epoch": 1.8725359260644265, "grad_norm": 3.265176773071289, "learning_rate": 9.81745831224758e-07, "loss": 0.0598, "step": 175260 }, { "epoch": 1.872642769378706, "grad_norm": 1.5010693073272705, "learning_rate": 9.81741332718727e-07, "loss": 0.026, "step": 175270 }, { "epoch": 1.872749612692986, "grad_norm": 1.6316924095153809, "learning_rate": 9.817368336687733e-07, "loss": 0.0463, "step": 175280 }, { "epoch": 1.8728564560072654, "grad_norm": 13.604392051696777, "learning_rate": 9.817323340749027e-07, "loss": 0.0716, "step": 175290 }, { "epoch": 1.8729632993215448, "grad_norm": 0.08588548004627228, "learning_rate": 9.817278339371195e-07, "loss": 0.0479, "step": 175300 }, { "epoch": 1.8730701426358247, "grad_norm": 2.342151641845703, "learning_rate": 9.817233332554295e-07, "loss": 0.0287, "step": 175310 }, { "epoch": 1.8731769859501042, "grad_norm": 3.724151134490967, "learning_rate": 9.817188320298376e-07, "loss": 0.0435, "step": 175320 }, { "epoch": 1.8732838292643836, "grad_norm": 2.9269001483917236, "learning_rate": 9.817143302603487e-07, "loss": 0.0115, "step": 175330 }, { "epoch": 1.8733906725786635, "grad_norm": 4.373236656188965, "learning_rate": 9.817098279469678e-07, "loss": 0.0305, "step": 175340 }, { "epoch": 1.873497515892943, "grad_norm": 6.777399063110352, "learning_rate": 9.817053250897004e-07, "loss": 0.0314, "step": 175350 }, { "epoch": 1.8736043592072225, "grad_norm": 6.859820365905762, "learning_rate": 9.817008216885511e-07, "loss": 0.0155, "step": 175360 }, { "epoch": 1.8737112025215024, "grad_norm": 6.682624340057373, "learning_rate": 9.816963177435252e-07, "loss": 0.0449, "step": 175370 }, { "epoch": 1.8738180458357818, "grad_norm": 0.21338987350463867, "learning_rate": 9.816918132546282e-07, "loss": 0.0211, "step": 175380 }, { "epoch": 1.8739248891500613, "grad_norm": 0.09283312410116196, "learning_rate": 9.816873082218644e-07, "loss": 0.0567, "step": 175390 }, { "epoch": 1.8740317324643412, "grad_norm": 13.323345184326172, "learning_rate": 9.816828026452394e-07, "loss": 0.0234, "step": 175400 }, { "epoch": 1.8741385757786206, "grad_norm": 9.555065155029297, "learning_rate": 9.816782965247582e-07, "loss": 0.0136, "step": 175410 }, { "epoch": 1.8742454190929, "grad_norm": 4.249301433563232, "learning_rate": 9.816737898604258e-07, "loss": 0.0223, "step": 175420 }, { "epoch": 1.87435226240718, "grad_norm": 5.78355073928833, "learning_rate": 9.816692826522472e-07, "loss": 0.0249, "step": 175430 }, { "epoch": 1.8744591057214595, "grad_norm": 1.0543068647384644, "learning_rate": 9.816647749002279e-07, "loss": 0.0562, "step": 175440 }, { "epoch": 1.8745659490357391, "grad_norm": 1.038275957107544, "learning_rate": 9.816602666043725e-07, "loss": 0.0505, "step": 175450 }, { "epoch": 1.8746727923500188, "grad_norm": 3.2393195629119873, "learning_rate": 9.816557577646863e-07, "loss": 0.0128, "step": 175460 }, { "epoch": 1.8747796356642983, "grad_norm": 8.362998962402344, "learning_rate": 9.816512483811746e-07, "loss": 0.0271, "step": 175470 }, { "epoch": 1.874886478978578, "grad_norm": 5.4879608154296875, "learning_rate": 9.816467384538421e-07, "loss": 0.0279, "step": 175480 }, { "epoch": 1.8749933222928576, "grad_norm": 0.11856500059366226, "learning_rate": 9.816422279826942e-07, "loss": 0.0226, "step": 175490 }, { "epoch": 1.875100165607137, "grad_norm": 2.5956807136535645, "learning_rate": 9.816377169677356e-07, "loss": 0.0065, "step": 175500 }, { "epoch": 1.8752070089214168, "grad_norm": 6.034595489501953, "learning_rate": 9.81633205408972e-07, "loss": 0.0232, "step": 175510 }, { "epoch": 1.8753138522356965, "grad_norm": 0.08830869197845459, "learning_rate": 9.81628693306408e-07, "loss": 0.0253, "step": 175520 }, { "epoch": 1.875420695549976, "grad_norm": 4.733114242553711, "learning_rate": 9.816241806600488e-07, "loss": 0.0665, "step": 175530 }, { "epoch": 1.8755275388642556, "grad_norm": 6.320062637329102, "learning_rate": 9.816196674698997e-07, "loss": 0.0542, "step": 175540 }, { "epoch": 1.8756343821785353, "grad_norm": 2.2218477725982666, "learning_rate": 9.816151537359656e-07, "loss": 0.0319, "step": 175550 }, { "epoch": 1.8757412254928147, "grad_norm": 5.285356044769287, "learning_rate": 9.816106394582518e-07, "loss": 0.0223, "step": 175560 }, { "epoch": 1.8758480688070944, "grad_norm": 0.11970598250627518, "learning_rate": 9.81606124636763e-07, "loss": 0.0261, "step": 175570 }, { "epoch": 1.875954912121374, "grad_norm": 0.42496994137763977, "learning_rate": 9.816016092715046e-07, "loss": 0.0279, "step": 175580 }, { "epoch": 1.8760617554356536, "grad_norm": 0.32085657119750977, "learning_rate": 9.815970933624818e-07, "loss": 0.0089, "step": 175590 }, { "epoch": 1.8761685987499332, "grad_norm": 1.7979564666748047, "learning_rate": 9.815925769096994e-07, "loss": 0.116, "step": 175600 }, { "epoch": 1.876275442064213, "grad_norm": 2.030930995941162, "learning_rate": 9.815880599131625e-07, "loss": 0.0216, "step": 175610 }, { "epoch": 1.8763822853784924, "grad_norm": 3.4399490356445312, "learning_rate": 9.815835423728765e-07, "loss": 0.0304, "step": 175620 }, { "epoch": 1.876489128692772, "grad_norm": 0.18865326046943665, "learning_rate": 9.815790242888463e-07, "loss": 0.0355, "step": 175630 }, { "epoch": 1.8765959720070517, "grad_norm": 1.0984175205230713, "learning_rate": 9.815745056610768e-07, "loss": 0.0335, "step": 175640 }, { "epoch": 1.8767028153213312, "grad_norm": 0.038785044103860855, "learning_rate": 9.815699864895736e-07, "loss": 0.0426, "step": 175650 }, { "epoch": 1.876809658635611, "grad_norm": 0.47364112734794617, "learning_rate": 9.815654667743416e-07, "loss": 0.0439, "step": 175660 }, { "epoch": 1.8769165019498906, "grad_norm": 2.2489399909973145, "learning_rate": 9.815609465153857e-07, "loss": 0.0406, "step": 175670 }, { "epoch": 1.87702334526417, "grad_norm": 0.3061407804489136, "learning_rate": 9.815564257127112e-07, "loss": 0.0506, "step": 175680 }, { "epoch": 1.8771301885784497, "grad_norm": 0.04162467271089554, "learning_rate": 9.815519043663232e-07, "loss": 0.0278, "step": 175690 }, { "epoch": 1.8772370318927294, "grad_norm": 0.6479263305664062, "learning_rate": 9.815473824762267e-07, "loss": 0.0168, "step": 175700 }, { "epoch": 1.8773438752070088, "grad_norm": 0.041912466287612915, "learning_rate": 9.81542860042427e-07, "loss": 0.0837, "step": 175710 }, { "epoch": 1.8774507185212885, "grad_norm": 4.292680740356445, "learning_rate": 9.815383370649288e-07, "loss": 0.0202, "step": 175720 }, { "epoch": 1.8775575618355682, "grad_norm": 6.4602885246276855, "learning_rate": 9.815338135437375e-07, "loss": 0.037, "step": 175730 }, { "epoch": 1.8776644051498477, "grad_norm": 0.054495714604854584, "learning_rate": 9.815292894788583e-07, "loss": 0.0284, "step": 175740 }, { "epoch": 1.8777712484641274, "grad_norm": 0.5302867293357849, "learning_rate": 9.815247648702962e-07, "loss": 0.0128, "step": 175750 }, { "epoch": 1.877878091778407, "grad_norm": 0.7575716376304626, "learning_rate": 9.815202397180562e-07, "loss": 0.0308, "step": 175760 }, { "epoch": 1.8779849350926865, "grad_norm": 0.02199314348399639, "learning_rate": 9.815157140221436e-07, "loss": 0.0108, "step": 175770 }, { "epoch": 1.8780917784069662, "grad_norm": 7.5359625816345215, "learning_rate": 9.815111877825634e-07, "loss": 0.0593, "step": 175780 }, { "epoch": 1.8781986217212459, "grad_norm": 3.854114532470703, "learning_rate": 9.815066609993209e-07, "loss": 0.0166, "step": 175790 }, { "epoch": 1.8783054650355253, "grad_norm": 0.3922388553619385, "learning_rate": 9.815021336724208e-07, "loss": 0.0599, "step": 175800 }, { "epoch": 1.878412308349805, "grad_norm": 1.7840012311935425, "learning_rate": 9.814976058018685e-07, "loss": 0.0269, "step": 175810 }, { "epoch": 1.8785191516640847, "grad_norm": 0.010076726786792278, "learning_rate": 9.81493077387669e-07, "loss": 0.0201, "step": 175820 }, { "epoch": 1.8786259949783641, "grad_norm": 0.22822345793247223, "learning_rate": 9.814885484298275e-07, "loss": 0.0205, "step": 175830 }, { "epoch": 1.8787328382926438, "grad_norm": 0.008774562738835812, "learning_rate": 9.81484018928349e-07, "loss": 0.012, "step": 175840 }, { "epoch": 1.8788396816069235, "grad_norm": 0.03996064141392708, "learning_rate": 9.81479488883239e-07, "loss": 0.061, "step": 175850 }, { "epoch": 1.878946524921203, "grad_norm": 0.19333545863628387, "learning_rate": 9.814749582945022e-07, "loss": 0.0365, "step": 175860 }, { "epoch": 1.8790533682354826, "grad_norm": 9.11956787109375, "learning_rate": 9.814704271621437e-07, "loss": 0.1245, "step": 175870 }, { "epoch": 1.8791602115497623, "grad_norm": 0.045304831117391586, "learning_rate": 9.814658954861689e-07, "loss": 0.017, "step": 175880 }, { "epoch": 1.8792670548640418, "grad_norm": 6.550055503845215, "learning_rate": 9.814613632665826e-07, "loss": 0.0586, "step": 175890 }, { "epoch": 1.8793738981783215, "grad_norm": 0.8851922750473022, "learning_rate": 9.814568305033901e-07, "loss": 0.0244, "step": 175900 }, { "epoch": 1.8794807414926011, "grad_norm": 0.06343752145767212, "learning_rate": 9.814522971965967e-07, "loss": 0.0259, "step": 175910 }, { "epoch": 1.8795875848068806, "grad_norm": 2.265549659729004, "learning_rate": 9.814477633462073e-07, "loss": 0.0599, "step": 175920 }, { "epoch": 1.8796944281211603, "grad_norm": 6.692229747772217, "learning_rate": 9.81443228952227e-07, "loss": 0.0301, "step": 175930 }, { "epoch": 1.87980127143544, "grad_norm": 3.289425849914551, "learning_rate": 9.814386940146609e-07, "loss": 0.0116, "step": 175940 }, { "epoch": 1.8799081147497194, "grad_norm": 0.5616601705551147, "learning_rate": 9.814341585335141e-07, "loss": 0.0555, "step": 175950 }, { "epoch": 1.880014958063999, "grad_norm": 1.0789371728897095, "learning_rate": 9.814296225087919e-07, "loss": 0.0082, "step": 175960 }, { "epoch": 1.8801218013782788, "grad_norm": 0.2705094814300537, "learning_rate": 9.814250859404993e-07, "loss": 0.0257, "step": 175970 }, { "epoch": 1.8802286446925582, "grad_norm": 2.899369716644287, "learning_rate": 9.814205488286414e-07, "loss": 0.0165, "step": 175980 }, { "epoch": 1.880335488006838, "grad_norm": 1.8244777917861938, "learning_rate": 9.814160111732235e-07, "loss": 0.02, "step": 175990 }, { "epoch": 1.8804423313211176, "grad_norm": 6.038448333740234, "learning_rate": 9.814114729742505e-07, "loss": 0.0192, "step": 176000 }, { "epoch": 1.880549174635397, "grad_norm": 0.3196144998073578, "learning_rate": 9.814069342317275e-07, "loss": 0.0323, "step": 176010 }, { "epoch": 1.880656017949677, "grad_norm": 0.055485185235738754, "learning_rate": 9.814023949456598e-07, "loss": 0.0429, "step": 176020 }, { "epoch": 1.8807628612639564, "grad_norm": 0.012545396573841572, "learning_rate": 9.813978551160526e-07, "loss": 0.0271, "step": 176030 }, { "epoch": 1.8808697045782359, "grad_norm": 6.8226213455200195, "learning_rate": 9.813933147429107e-07, "loss": 0.0602, "step": 176040 }, { "epoch": 1.8809765478925158, "grad_norm": 1.1769932508468628, "learning_rate": 9.813887738262396e-07, "loss": 0.026, "step": 176050 }, { "epoch": 1.8810833912067952, "grad_norm": 5.1645121574401855, "learning_rate": 9.813842323660442e-07, "loss": 0.0629, "step": 176060 }, { "epoch": 1.8811902345210747, "grad_norm": 0.4466382563114166, "learning_rate": 9.813796903623296e-07, "loss": 0.0451, "step": 176070 }, { "epoch": 1.8812970778353546, "grad_norm": 1.5196229219436646, "learning_rate": 9.81375147815101e-07, "loss": 0.0104, "step": 176080 }, { "epoch": 1.881403921149634, "grad_norm": 0.6588044762611389, "learning_rate": 9.813706047243637e-07, "loss": 0.0414, "step": 176090 }, { "epoch": 1.8815107644639135, "grad_norm": Infinity, "learning_rate": 9.813660610901225e-07, "loss": 0.0868, "step": 176100 }, { "epoch": 1.8816176077781934, "grad_norm": 0.5601522922515869, "learning_rate": 9.813615169123826e-07, "loss": 0.0385, "step": 176110 }, { "epoch": 1.8817244510924729, "grad_norm": 0.8468853235244751, "learning_rate": 9.813569721911493e-07, "loss": 0.0198, "step": 176120 }, { "epoch": 1.8818312944067523, "grad_norm": 15.349190711975098, "learning_rate": 9.813524269264277e-07, "loss": 0.0181, "step": 176130 }, { "epoch": 1.8819381377210322, "grad_norm": 2.9102654457092285, "learning_rate": 9.81347881118223e-07, "loss": 0.0101, "step": 176140 }, { "epoch": 1.8820449810353117, "grad_norm": 1.170222282409668, "learning_rate": 9.8134333476654e-07, "loss": 0.0286, "step": 176150 }, { "epoch": 1.8821518243495912, "grad_norm": 0.5862518548965454, "learning_rate": 9.81338787871384e-07, "loss": 0.0059, "step": 176160 }, { "epoch": 1.882258667663871, "grad_norm": 5.168318748474121, "learning_rate": 9.813342404327603e-07, "loss": 0.026, "step": 176170 }, { "epoch": 1.8823655109781505, "grad_norm": 1.9502450227737427, "learning_rate": 9.81329692450674e-07, "loss": 0.0094, "step": 176180 }, { "epoch": 1.8824723542924302, "grad_norm": 5.554568290710449, "learning_rate": 9.8132514392513e-07, "loss": 0.0357, "step": 176190 }, { "epoch": 1.8825791976067099, "grad_norm": 3.7135050296783447, "learning_rate": 9.813205948561337e-07, "loss": 0.0491, "step": 176200 }, { "epoch": 1.8826860409209893, "grad_norm": 2.0316662788391113, "learning_rate": 9.8131604524369e-07, "loss": 0.068, "step": 176210 }, { "epoch": 1.882792884235269, "grad_norm": 0.08015283942222595, "learning_rate": 9.813114950878042e-07, "loss": 0.0739, "step": 176220 }, { "epoch": 1.8828997275495487, "grad_norm": 0.002376656513661146, "learning_rate": 9.813069443884813e-07, "loss": 0.0299, "step": 176230 }, { "epoch": 1.8830065708638282, "grad_norm": 1.0790311098098755, "learning_rate": 9.813023931457266e-07, "loss": 0.0051, "step": 176240 }, { "epoch": 1.8831134141781078, "grad_norm": 5.426658630371094, "learning_rate": 9.812978413595455e-07, "loss": 0.0352, "step": 176250 }, { "epoch": 1.8832202574923875, "grad_norm": 0.04027692973613739, "learning_rate": 9.812932890299423e-07, "loss": 0.0436, "step": 176260 }, { "epoch": 1.883327100806667, "grad_norm": 0.012691600248217583, "learning_rate": 9.81288736156923e-07, "loss": 0.0497, "step": 176270 }, { "epoch": 1.8834339441209467, "grad_norm": 3.045673131942749, "learning_rate": 9.812841827404923e-07, "loss": 0.0458, "step": 176280 }, { "epoch": 1.8835407874352263, "grad_norm": 2.789942741394043, "learning_rate": 9.812796287806552e-07, "loss": 0.0139, "step": 176290 }, { "epoch": 1.8836476307495058, "grad_norm": 0.36170366406440735, "learning_rate": 9.812750742774173e-07, "loss": 0.0352, "step": 176300 }, { "epoch": 1.8837544740637855, "grad_norm": 0.1525668352842331, "learning_rate": 9.812705192307836e-07, "loss": 0.022, "step": 176310 }, { "epoch": 1.8838613173780652, "grad_norm": 2.5163800716400146, "learning_rate": 9.812659636407592e-07, "loss": 0.0125, "step": 176320 }, { "epoch": 1.8839681606923446, "grad_norm": 2.9682302474975586, "learning_rate": 9.81261407507349e-07, "loss": 0.0328, "step": 176330 }, { "epoch": 1.8840750040066243, "grad_norm": 3.2351555824279785, "learning_rate": 9.812568508305584e-07, "loss": 0.0238, "step": 176340 }, { "epoch": 1.884181847320904, "grad_norm": 2.807054281234741, "learning_rate": 9.812522936103926e-07, "loss": 0.0068, "step": 176350 }, { "epoch": 1.8842886906351834, "grad_norm": 1.0999774932861328, "learning_rate": 9.812477358468564e-07, "loss": 0.0298, "step": 176360 }, { "epoch": 1.8843955339494631, "grad_norm": 0.053051695227622986, "learning_rate": 9.812431775399554e-07, "loss": 0.0353, "step": 176370 }, { "epoch": 1.8845023772637428, "grad_norm": 4.809185028076172, "learning_rate": 9.812386186896944e-07, "loss": 0.0229, "step": 176380 }, { "epoch": 1.8846092205780223, "grad_norm": 7.345375061035156, "learning_rate": 9.81234059296079e-07, "loss": 0.0418, "step": 176390 }, { "epoch": 1.884716063892302, "grad_norm": 0.9704576730728149, "learning_rate": 9.812294993591136e-07, "loss": 0.0206, "step": 176400 }, { "epoch": 1.8848229072065816, "grad_norm": 1.4063081741333008, "learning_rate": 9.81224938878804e-07, "loss": 0.0063, "step": 176410 }, { "epoch": 1.884929750520861, "grad_norm": 4.96132755279541, "learning_rate": 9.81220377855155e-07, "loss": 0.0333, "step": 176420 }, { "epoch": 1.8850365938351408, "grad_norm": 0.06199490651488304, "learning_rate": 9.812158162881718e-07, "loss": 0.0174, "step": 176430 }, { "epoch": 1.8851434371494205, "grad_norm": 0.026991762220859528, "learning_rate": 9.8121125417786e-07, "loss": 0.0409, "step": 176440 }, { "epoch": 1.8852502804637, "grad_norm": 0.08378775417804718, "learning_rate": 9.81206691524224e-07, "loss": 0.0372, "step": 176450 }, { "epoch": 1.8853571237779796, "grad_norm": 0.08912666887044907, "learning_rate": 9.812021283272695e-07, "loss": 0.088, "step": 176460 }, { "epoch": 1.8854639670922593, "grad_norm": 7.401072978973389, "learning_rate": 9.811975645870013e-07, "loss": 0.0318, "step": 176470 }, { "epoch": 1.8855708104065387, "grad_norm": 8.201266288757324, "learning_rate": 9.81193000303425e-07, "loss": 0.0096, "step": 176480 }, { "epoch": 1.8856776537208184, "grad_norm": 0.5880126953125, "learning_rate": 9.811884354765453e-07, "loss": 0.0322, "step": 176490 }, { "epoch": 1.885784497035098, "grad_norm": 4.263856410980225, "learning_rate": 9.811838701063678e-07, "loss": 0.0327, "step": 176500 }, { "epoch": 1.8858913403493776, "grad_norm": 0.1696186512708664, "learning_rate": 9.81179304192897e-07, "loss": 0.0394, "step": 176510 }, { "epoch": 1.8859981836636572, "grad_norm": 0.03491983190178871, "learning_rate": 9.811747377361387e-07, "loss": 0.0197, "step": 176520 }, { "epoch": 1.886105026977937, "grad_norm": 2.7936909198760986, "learning_rate": 9.811701707360977e-07, "loss": 0.0246, "step": 176530 }, { "epoch": 1.8862118702922164, "grad_norm": 2.285714864730835, "learning_rate": 9.811656031927795e-07, "loss": 0.0181, "step": 176540 }, { "epoch": 1.886318713606496, "grad_norm": 1.7902659177780151, "learning_rate": 9.811610351061887e-07, "loss": 0.0121, "step": 176550 }, { "epoch": 1.8864255569207757, "grad_norm": 8.22616195678711, "learning_rate": 9.81156466476331e-07, "loss": 0.0332, "step": 176560 }, { "epoch": 1.8865324002350552, "grad_norm": 0.2028053253889084, "learning_rate": 9.811518973032112e-07, "loss": 0.0798, "step": 176570 }, { "epoch": 1.8866392435493349, "grad_norm": 3.147111177444458, "learning_rate": 9.811473275868347e-07, "loss": 0.0178, "step": 176580 }, { "epoch": 1.8867460868636146, "grad_norm": 6.635580062866211, "learning_rate": 9.811427573272065e-07, "loss": 0.0182, "step": 176590 }, { "epoch": 1.886852930177894, "grad_norm": 13.197564125061035, "learning_rate": 9.811381865243319e-07, "loss": 0.0407, "step": 176600 }, { "epoch": 1.8869597734921737, "grad_norm": 0.7276701331138611, "learning_rate": 9.811336151782156e-07, "loss": 0.0017, "step": 176610 }, { "epoch": 1.8870666168064534, "grad_norm": 9.815234184265137, "learning_rate": 9.811290432888635e-07, "loss": 0.0455, "step": 176620 }, { "epoch": 1.8871734601207328, "grad_norm": 1.266003966331482, "learning_rate": 9.811244708562803e-07, "loss": 0.0156, "step": 176630 }, { "epoch": 1.8872803034350125, "grad_norm": 0.6110111474990845, "learning_rate": 9.811198978804714e-07, "loss": 0.0149, "step": 176640 }, { "epoch": 1.8873871467492922, "grad_norm": 4.270509719848633, "learning_rate": 9.811153243614416e-07, "loss": 0.033, "step": 176650 }, { "epoch": 1.8874939900635717, "grad_norm": 0.5855258703231812, "learning_rate": 9.811107502991964e-07, "loss": 0.0598, "step": 176660 }, { "epoch": 1.8876008333778513, "grad_norm": 2.7773005962371826, "learning_rate": 9.811061756937408e-07, "loss": 0.0257, "step": 176670 }, { "epoch": 1.887707676692131, "grad_norm": 4.446951866149902, "learning_rate": 9.8110160054508e-07, "loss": 0.007, "step": 176680 }, { "epoch": 1.8878145200064105, "grad_norm": 1.7098876237869263, "learning_rate": 9.810970248532193e-07, "loss": 0.0565, "step": 176690 }, { "epoch": 1.8879213633206902, "grad_norm": 2.3393197059631348, "learning_rate": 9.810924486181637e-07, "loss": 0.0075, "step": 176700 }, { "epoch": 1.8880282066349698, "grad_norm": 5.163548946380615, "learning_rate": 9.810878718399185e-07, "loss": 0.0148, "step": 176710 }, { "epoch": 1.8881350499492493, "grad_norm": 0.982932984828949, "learning_rate": 9.810832945184885e-07, "loss": 0.0603, "step": 176720 }, { "epoch": 1.888241893263529, "grad_norm": 4.761679172515869, "learning_rate": 9.810787166538795e-07, "loss": 0.0161, "step": 176730 }, { "epoch": 1.8883487365778087, "grad_norm": 0.10360404849052429, "learning_rate": 9.810741382460961e-07, "loss": 0.0077, "step": 176740 }, { "epoch": 1.8884555798920881, "grad_norm": 1.902456521987915, "learning_rate": 9.810695592951438e-07, "loss": 0.0524, "step": 176750 }, { "epoch": 1.888562423206368, "grad_norm": 5.027486324310303, "learning_rate": 9.810649798010276e-07, "loss": 0.0163, "step": 176760 }, { "epoch": 1.8886692665206475, "grad_norm": 0.15192262828350067, "learning_rate": 9.810603997637527e-07, "loss": 0.0119, "step": 176770 }, { "epoch": 1.888776109834927, "grad_norm": 3.2988100051879883, "learning_rate": 9.810558191833243e-07, "loss": 0.0088, "step": 176780 }, { "epoch": 1.8888829531492068, "grad_norm": 4.204479694366455, "learning_rate": 9.810512380597477e-07, "loss": 0.0323, "step": 176790 }, { "epoch": 1.8889897964634863, "grad_norm": 3.0557310581207275, "learning_rate": 9.810466563930278e-07, "loss": 0.0289, "step": 176800 }, { "epoch": 1.8890966397777658, "grad_norm": 0.17506079375743866, "learning_rate": 9.8104207418317e-07, "loss": 0.0134, "step": 176810 }, { "epoch": 1.8892034830920457, "grad_norm": 18.386104583740234, "learning_rate": 9.810374914301794e-07, "loss": 0.0342, "step": 176820 }, { "epoch": 1.8893103264063251, "grad_norm": 7.356357574462891, "learning_rate": 9.810329081340611e-07, "loss": 0.0085, "step": 176830 }, { "epoch": 1.8894171697206046, "grad_norm": 2.955939292907715, "learning_rate": 9.810283242948202e-07, "loss": 0.0225, "step": 176840 }, { "epoch": 1.8895240130348845, "grad_norm": 24.02268409729004, "learning_rate": 9.810237399124623e-07, "loss": 0.0809, "step": 176850 }, { "epoch": 1.889630856349164, "grad_norm": 0.19805675745010376, "learning_rate": 9.810191549869923e-07, "loss": 0.0403, "step": 176860 }, { "epoch": 1.8897376996634434, "grad_norm": 2.923532485961914, "learning_rate": 9.810145695184153e-07, "loss": 0.0138, "step": 176870 }, { "epoch": 1.8898445429777233, "grad_norm": 0.11732546240091324, "learning_rate": 9.810099835067365e-07, "loss": 0.0318, "step": 176880 }, { "epoch": 1.8899513862920028, "grad_norm": 0.36576247215270996, "learning_rate": 9.81005396951961e-07, "loss": 0.0468, "step": 176890 }, { "epoch": 1.8900582296062822, "grad_norm": 0.3653221130371094, "learning_rate": 9.810008098540943e-07, "loss": 0.0077, "step": 176900 }, { "epoch": 1.8901650729205621, "grad_norm": 13.703781127929688, "learning_rate": 9.809962222131413e-07, "loss": 0.0188, "step": 176910 }, { "epoch": 1.8902719162348416, "grad_norm": 0.9627500176429749, "learning_rate": 9.809916340291074e-07, "loss": 0.0487, "step": 176920 }, { "epoch": 1.8903787595491213, "grad_norm": 0.007485015317797661, "learning_rate": 9.809870453019974e-07, "loss": 0.0184, "step": 176930 }, { "epoch": 1.890485602863401, "grad_norm": 2.6973836421966553, "learning_rate": 9.809824560318168e-07, "loss": 0.034, "step": 176940 }, { "epoch": 1.8905924461776804, "grad_norm": 3.0750222206115723, "learning_rate": 9.80977866218571e-07, "loss": 0.0203, "step": 176950 }, { "epoch": 1.89069928949196, "grad_norm": 1.3971660137176514, "learning_rate": 9.809732758622644e-07, "loss": 0.0198, "step": 176960 }, { "epoch": 1.8908061328062398, "grad_norm": 5.9431939125061035, "learning_rate": 9.80968684962903e-07, "loss": 0.0378, "step": 176970 }, { "epoch": 1.8909129761205192, "grad_norm": 0.5520302653312683, "learning_rate": 9.809640935204915e-07, "loss": 0.0309, "step": 176980 }, { "epoch": 1.891019819434799, "grad_norm": 0.3414205312728882, "learning_rate": 9.809595015350353e-07, "loss": 0.0154, "step": 176990 }, { "epoch": 1.8911266627490786, "grad_norm": 3.0206756591796875, "learning_rate": 9.809549090065393e-07, "loss": 0.0211, "step": 177000 }, { "epoch": 1.891233506063358, "grad_norm": 0.0838344395160675, "learning_rate": 9.809503159350093e-07, "loss": 0.0197, "step": 177010 }, { "epoch": 1.8913403493776377, "grad_norm": 0.04726429656147957, "learning_rate": 9.8094572232045e-07, "loss": 0.0559, "step": 177020 }, { "epoch": 1.8914471926919174, "grad_norm": 5.0767903327941895, "learning_rate": 9.809411281628666e-07, "loss": 0.0212, "step": 177030 }, { "epoch": 1.8915540360061969, "grad_norm": 1.8735359907150269, "learning_rate": 9.809365334622643e-07, "loss": 0.0198, "step": 177040 }, { "epoch": 1.8916608793204766, "grad_norm": 4.7773847579956055, "learning_rate": 9.809319382186485e-07, "loss": 0.0266, "step": 177050 }, { "epoch": 1.8917677226347562, "grad_norm": 2.260497570037842, "learning_rate": 9.809273424320242e-07, "loss": 0.026, "step": 177060 }, { "epoch": 1.8918745659490357, "grad_norm": 0.009692724794149399, "learning_rate": 9.809227461023965e-07, "loss": 0.0392, "step": 177070 }, { "epoch": 1.8919814092633154, "grad_norm": 0.3460235893726349, "learning_rate": 9.80918149229771e-07, "loss": 0.0306, "step": 177080 }, { "epoch": 1.892088252577595, "grad_norm": 7.680278778076172, "learning_rate": 9.809135518141523e-07, "loss": 0.0433, "step": 177090 }, { "epoch": 1.8921950958918745, "grad_norm": 1.750902533531189, "learning_rate": 9.809089538555462e-07, "loss": 0.0476, "step": 177100 }, { "epoch": 1.8923019392061542, "grad_norm": 1.0778512954711914, "learning_rate": 9.809043553539573e-07, "loss": 0.0258, "step": 177110 }, { "epoch": 1.8924087825204339, "grad_norm": 8.112650871276855, "learning_rate": 9.808997563093912e-07, "loss": 0.034, "step": 177120 }, { "epoch": 1.8925156258347133, "grad_norm": 7.950323104858398, "learning_rate": 9.808951567218531e-07, "loss": 0.0141, "step": 177130 }, { "epoch": 1.892622469148993, "grad_norm": 0.2742776870727539, "learning_rate": 9.808905565913478e-07, "loss": 0.0546, "step": 177140 }, { "epoch": 1.8927293124632727, "grad_norm": 0.5685235857963562, "learning_rate": 9.80885955917881e-07, "loss": 0.0276, "step": 177150 }, { "epoch": 1.8928361557775522, "grad_norm": 5.836655139923096, "learning_rate": 9.808813547014578e-07, "loss": 0.0231, "step": 177160 }, { "epoch": 1.8929429990918318, "grad_norm": 3.5679588317871094, "learning_rate": 9.80876752942083e-07, "loss": 0.021, "step": 177170 }, { "epoch": 1.8930498424061115, "grad_norm": 0.07279513776302338, "learning_rate": 9.80872150639762e-07, "loss": 0.0396, "step": 177180 }, { "epoch": 1.893156685720391, "grad_norm": 1.5918333530426025, "learning_rate": 9.808675477945003e-07, "loss": 0.0058, "step": 177190 }, { "epoch": 1.8932635290346707, "grad_norm": 0.02265690267086029, "learning_rate": 9.808629444063027e-07, "loss": 0.0095, "step": 177200 }, { "epoch": 1.8933703723489503, "grad_norm": 3.870028495788574, "learning_rate": 9.808583404751745e-07, "loss": 0.0279, "step": 177210 }, { "epoch": 1.8934772156632298, "grad_norm": 2.6238603591918945, "learning_rate": 9.808537360011212e-07, "loss": 0.0697, "step": 177220 }, { "epoch": 1.8935840589775095, "grad_norm": 0.2706483006477356, "learning_rate": 9.808491309841474e-07, "loss": 0.0489, "step": 177230 }, { "epoch": 1.8936909022917892, "grad_norm": 0.570248007774353, "learning_rate": 9.808445254242587e-07, "loss": 0.062, "step": 177240 }, { "epoch": 1.8937977456060686, "grad_norm": 0.24753014743328094, "learning_rate": 9.808399193214605e-07, "loss": 0.025, "step": 177250 }, { "epoch": 1.8939045889203483, "grad_norm": 7.0653910636901855, "learning_rate": 9.808353126757576e-07, "loss": 0.0242, "step": 177260 }, { "epoch": 1.894011432234628, "grad_norm": 1.3458280563354492, "learning_rate": 9.808307054871552e-07, "loss": 0.0092, "step": 177270 }, { "epoch": 1.8941182755489074, "grad_norm": 0.002662418643012643, "learning_rate": 9.808260977556587e-07, "loss": 0.0153, "step": 177280 }, { "epoch": 1.8942251188631871, "grad_norm": 1.846935749053955, "learning_rate": 9.808214894812734e-07, "loss": 0.062, "step": 177290 }, { "epoch": 1.8943319621774668, "grad_norm": 0.6840550303459167, "learning_rate": 9.808168806640042e-07, "loss": 0.0204, "step": 177300 }, { "epoch": 1.8944388054917463, "grad_norm": 0.05203278735280037, "learning_rate": 9.808122713038565e-07, "loss": 0.0231, "step": 177310 }, { "epoch": 1.894545648806026, "grad_norm": 0.06327580660581589, "learning_rate": 9.808076614008354e-07, "loss": 0.0301, "step": 177320 }, { "epoch": 1.8946524921203056, "grad_norm": 3.8769397735595703, "learning_rate": 9.808030509549463e-07, "loss": 0.0253, "step": 177330 }, { "epoch": 1.894759335434585, "grad_norm": 0.4877650737762451, "learning_rate": 9.807984399661941e-07, "loss": 0.0202, "step": 177340 }, { "epoch": 1.8948661787488648, "grad_norm": 0.030168753117322922, "learning_rate": 9.807938284345843e-07, "loss": 0.0609, "step": 177350 }, { "epoch": 1.8949730220631444, "grad_norm": 0.9510558843612671, "learning_rate": 9.807892163601218e-07, "loss": 0.0491, "step": 177360 }, { "epoch": 1.895079865377424, "grad_norm": 0.11000163853168488, "learning_rate": 9.807846037428123e-07, "loss": 0.0357, "step": 177370 }, { "epoch": 1.8951867086917036, "grad_norm": 10.881274223327637, "learning_rate": 9.807799905826605e-07, "loss": 0.0593, "step": 177380 }, { "epoch": 1.8952935520059833, "grad_norm": 18.414106369018555, "learning_rate": 9.80775376879672e-07, "loss": 0.0288, "step": 177390 }, { "epoch": 1.8954003953202627, "grad_norm": 0.1145821139216423, "learning_rate": 9.807707626338515e-07, "loss": 0.01, "step": 177400 }, { "epoch": 1.8955072386345424, "grad_norm": 0.18106554448604584, "learning_rate": 9.807661478452045e-07, "loss": 0.0428, "step": 177410 }, { "epoch": 1.895614081948822, "grad_norm": 0.012792612425982952, "learning_rate": 9.807615325137365e-07, "loss": 0.0334, "step": 177420 }, { "epoch": 1.8957209252631015, "grad_norm": 0.11839817464351654, "learning_rate": 9.807569166394525e-07, "loss": 0.0184, "step": 177430 }, { "epoch": 1.8958277685773812, "grad_norm": 0.029763946309685707, "learning_rate": 9.807523002223573e-07, "loss": 0.013, "step": 177440 }, { "epoch": 1.895934611891661, "grad_norm": 0.04147638380527496, "learning_rate": 9.807476832624568e-07, "loss": 0.0319, "step": 177450 }, { "epoch": 1.8960414552059404, "grad_norm": 1.3122307062149048, "learning_rate": 9.807430657597558e-07, "loss": 0.0053, "step": 177460 }, { "epoch": 1.89614829852022, "grad_norm": 0.5398206114768982, "learning_rate": 9.807384477142595e-07, "loss": 0.0143, "step": 177470 }, { "epoch": 1.8962551418344997, "grad_norm": 1.7352805137634277, "learning_rate": 9.807338291259733e-07, "loss": 0.0105, "step": 177480 }, { "epoch": 1.8963619851487792, "grad_norm": 0.032091107219457626, "learning_rate": 9.807292099949022e-07, "loss": 0.036, "step": 177490 }, { "epoch": 1.896468828463059, "grad_norm": 2.7189483642578125, "learning_rate": 9.807245903210516e-07, "loss": 0.0257, "step": 177500 }, { "epoch": 1.8965756717773385, "grad_norm": 5.205766201019287, "learning_rate": 9.807199701044267e-07, "loss": 0.0365, "step": 177510 }, { "epoch": 1.896682515091618, "grad_norm": 2.3406405448913574, "learning_rate": 9.807153493450326e-07, "loss": 0.0765, "step": 177520 }, { "epoch": 1.896789358405898, "grad_norm": 0.21017657220363617, "learning_rate": 9.807107280428746e-07, "loss": 0.0495, "step": 177530 }, { "epoch": 1.8968962017201774, "grad_norm": 8.173577308654785, "learning_rate": 9.80706106197958e-07, "loss": 0.0495, "step": 177540 }, { "epoch": 1.8970030450344568, "grad_norm": 2.743394374847412, "learning_rate": 9.807014838102878e-07, "loss": 0.0131, "step": 177550 }, { "epoch": 1.8971098883487367, "grad_norm": 4.096840858459473, "learning_rate": 9.806968608798695e-07, "loss": 0.0245, "step": 177560 }, { "epoch": 1.8972167316630162, "grad_norm": 11.294560432434082, "learning_rate": 9.80692237406708e-07, "loss": 0.036, "step": 177570 }, { "epoch": 1.8973235749772956, "grad_norm": 0.031139547005295753, "learning_rate": 9.806876133908088e-07, "loss": 0.0239, "step": 177580 }, { "epoch": 1.8974304182915755, "grad_norm": 4.526544570922852, "learning_rate": 9.80682988832177e-07, "loss": 0.0025, "step": 177590 }, { "epoch": 1.897537261605855, "grad_norm": 0.16403180360794067, "learning_rate": 9.806783637308179e-07, "loss": 0.059, "step": 177600 }, { "epoch": 1.8976441049201345, "grad_norm": 0.0364670604467392, "learning_rate": 9.806737380867364e-07, "loss": 0.0085, "step": 177610 }, { "epoch": 1.8977509482344144, "grad_norm": 0.8381811380386353, "learning_rate": 9.806691118999383e-07, "loss": 0.0226, "step": 177620 }, { "epoch": 1.8978577915486938, "grad_norm": 0.7005259394645691, "learning_rate": 9.806644851704281e-07, "loss": 0.0087, "step": 177630 }, { "epoch": 1.8979646348629733, "grad_norm": 0.005637269001454115, "learning_rate": 9.806598578982117e-07, "loss": 0.0177, "step": 177640 }, { "epoch": 1.8980714781772532, "grad_norm": 0.6181821227073669, "learning_rate": 9.80655230083294e-07, "loss": 0.0499, "step": 177650 }, { "epoch": 1.8981783214915326, "grad_norm": 2.2125771045684814, "learning_rate": 9.8065060172568e-07, "loss": 0.0084, "step": 177660 }, { "epoch": 1.8982851648058123, "grad_norm": 3.1498513221740723, "learning_rate": 9.806459728253757e-07, "loss": 0.0184, "step": 177670 }, { "epoch": 1.898392008120092, "grad_norm": 1.3474979400634766, "learning_rate": 9.806413433823855e-07, "loss": 0.0255, "step": 177680 }, { "epoch": 1.8984988514343715, "grad_norm": 6.7298383712768555, "learning_rate": 9.806367133967151e-07, "loss": 0.0647, "step": 177690 }, { "epoch": 1.8986056947486512, "grad_norm": 5.897592067718506, "learning_rate": 9.806320828683694e-07, "loss": 0.0232, "step": 177700 }, { "epoch": 1.8987125380629308, "grad_norm": 3.8163394927978516, "learning_rate": 9.806274517973538e-07, "loss": 0.0106, "step": 177710 }, { "epoch": 1.8988193813772103, "grad_norm": 2.630929470062256, "learning_rate": 9.806228201836738e-07, "loss": 0.0073, "step": 177720 }, { "epoch": 1.89892622469149, "grad_norm": 0.08239258080720901, "learning_rate": 9.80618188027334e-07, "loss": 0.0246, "step": 177730 }, { "epoch": 1.8990330680057697, "grad_norm": 1.4157143831253052, "learning_rate": 9.806135553283402e-07, "loss": 0.031, "step": 177740 }, { "epoch": 1.8991399113200491, "grad_norm": 4.071354866027832, "learning_rate": 9.806089220866974e-07, "loss": 0.0329, "step": 177750 }, { "epoch": 1.8992467546343288, "grad_norm": 0.04454050958156586, "learning_rate": 9.806042883024107e-07, "loss": 0.0127, "step": 177760 }, { "epoch": 1.8993535979486085, "grad_norm": 0.12020957469940186, "learning_rate": 9.805996539754858e-07, "loss": 0.0113, "step": 177770 }, { "epoch": 1.899460441262888, "grad_norm": 0.05385285243391991, "learning_rate": 9.805950191059274e-07, "loss": 0.0217, "step": 177780 }, { "epoch": 1.8995672845771676, "grad_norm": 0.7729659080505371, "learning_rate": 9.80590383693741e-07, "loss": 0.0344, "step": 177790 }, { "epoch": 1.8996741278914473, "grad_norm": 3.232801675796509, "learning_rate": 9.805857477389317e-07, "loss": 0.0463, "step": 177800 }, { "epoch": 1.8997809712057268, "grad_norm": 4.335905075073242, "learning_rate": 9.80581111241505e-07, "loss": 0.0233, "step": 177810 }, { "epoch": 1.8998878145200064, "grad_norm": 4.525660514831543, "learning_rate": 9.80576474201466e-07, "loss": 0.0408, "step": 177820 }, { "epoch": 1.8999946578342861, "grad_norm": 1.3368146419525146, "learning_rate": 9.805718366188195e-07, "loss": 0.0312, "step": 177830 }, { "epoch": 1.9001015011485656, "grad_norm": 2.533226490020752, "learning_rate": 9.805671984935716e-07, "loss": 0.0628, "step": 177840 }, { "epoch": 1.9002083444628453, "grad_norm": 6.633240222930908, "learning_rate": 9.805625598257267e-07, "loss": 0.0379, "step": 177850 }, { "epoch": 1.900315187777125, "grad_norm": 1.3382114171981812, "learning_rate": 9.805579206152905e-07, "loss": 0.0201, "step": 177860 }, { "epoch": 1.9004220310914044, "grad_norm": 0.02222178690135479, "learning_rate": 9.805532808622682e-07, "loss": 0.0166, "step": 177870 }, { "epoch": 1.900528874405684, "grad_norm": 0.17600229382514954, "learning_rate": 9.80548640566665e-07, "loss": 0.0274, "step": 177880 }, { "epoch": 1.9006357177199638, "grad_norm": 17.33710479736328, "learning_rate": 9.80543999728486e-07, "loss": 0.1578, "step": 177890 }, { "epoch": 1.9007425610342432, "grad_norm": 0.7956599593162537, "learning_rate": 9.805393583477367e-07, "loss": 0.0233, "step": 177900 }, { "epoch": 1.900849404348523, "grad_norm": 2.368988275527954, "learning_rate": 9.80534716424422e-07, "loss": 0.0121, "step": 177910 }, { "epoch": 1.9009562476628026, "grad_norm": 1.382171630859375, "learning_rate": 9.805300739585475e-07, "loss": 0.024, "step": 177920 }, { "epoch": 1.901063090977082, "grad_norm": 0.022789809852838516, "learning_rate": 9.805254309501182e-07, "loss": 0.0623, "step": 177930 }, { "epoch": 1.9011699342913617, "grad_norm": 4.4391703605651855, "learning_rate": 9.805207873991396e-07, "loss": 0.0565, "step": 177940 }, { "epoch": 1.9012767776056414, "grad_norm": 0.03979562595486641, "learning_rate": 9.805161433056167e-07, "loss": 0.0113, "step": 177950 }, { "epoch": 1.9013836209199209, "grad_norm": 1.3194801807403564, "learning_rate": 9.805114986695545e-07, "loss": 0.0203, "step": 177960 }, { "epoch": 1.9014904642342005, "grad_norm": 2.630620002746582, "learning_rate": 9.805068534909589e-07, "loss": 0.0451, "step": 177970 }, { "epoch": 1.9015973075484802, "grad_norm": 0.021329332143068314, "learning_rate": 9.805022077698346e-07, "loss": 0.0045, "step": 177980 }, { "epoch": 1.9017041508627597, "grad_norm": 7.210616588592529, "learning_rate": 9.804975615061872e-07, "loss": 0.0369, "step": 177990 }, { "epoch": 1.9018109941770394, "grad_norm": 0.04202945902943611, "learning_rate": 9.804929147000218e-07, "loss": 0.0197, "step": 178000 }, { "epoch": 1.901917837491319, "grad_norm": 0.05039673671126366, "learning_rate": 9.804882673513434e-07, "loss": 0.0197, "step": 178010 }, { "epoch": 1.9020246808055985, "grad_norm": 4.621438503265381, "learning_rate": 9.804836194601576e-07, "loss": 0.0383, "step": 178020 }, { "epoch": 1.9021315241198782, "grad_norm": 0.0789555013179779, "learning_rate": 9.804789710264696e-07, "loss": 0.0239, "step": 178030 }, { "epoch": 1.9022383674341579, "grad_norm": 1.2226648330688477, "learning_rate": 9.804743220502845e-07, "loss": 0.0342, "step": 178040 }, { "epoch": 1.9023452107484373, "grad_norm": 0.06972796469926834, "learning_rate": 9.804696725316077e-07, "loss": 0.0175, "step": 178050 }, { "epoch": 1.902452054062717, "grad_norm": 3.435772180557251, "learning_rate": 9.804650224704444e-07, "loss": 0.0196, "step": 178060 }, { "epoch": 1.9025588973769967, "grad_norm": 0.10019359737634659, "learning_rate": 9.804603718667998e-07, "loss": 0.0352, "step": 178070 }, { "epoch": 1.9026657406912761, "grad_norm": 5.237915992736816, "learning_rate": 9.804557207206792e-07, "loss": 0.0508, "step": 178080 }, { "epoch": 1.9027725840055558, "grad_norm": 2.741542100906372, "learning_rate": 9.804510690320878e-07, "loss": 0.036, "step": 178090 }, { "epoch": 1.9028794273198355, "grad_norm": 0.2052977830171585, "learning_rate": 9.804464168010308e-07, "loss": 0.0149, "step": 178100 }, { "epoch": 1.902986270634115, "grad_norm": 3.3283956050872803, "learning_rate": 9.804417640275136e-07, "loss": 0.0503, "step": 178110 }, { "epoch": 1.9030931139483946, "grad_norm": 10.704962730407715, "learning_rate": 9.804371107115412e-07, "loss": 0.0371, "step": 178120 }, { "epoch": 1.9031999572626743, "grad_norm": 0.7430804967880249, "learning_rate": 9.804324568531193e-07, "loss": 0.0266, "step": 178130 }, { "epoch": 1.9033068005769538, "grad_norm": 3.0226168632507324, "learning_rate": 9.804278024522527e-07, "loss": 0.0194, "step": 178140 }, { "epoch": 1.9034136438912335, "grad_norm": 0.4597126245498657, "learning_rate": 9.804231475089471e-07, "loss": 0.0249, "step": 178150 }, { "epoch": 1.9035204872055131, "grad_norm": 0.3411347568035126, "learning_rate": 9.804184920232073e-07, "loss": 0.0027, "step": 178160 }, { "epoch": 1.9036273305197926, "grad_norm": 3.9842050075531006, "learning_rate": 9.80413835995039e-07, "loss": 0.0231, "step": 178170 }, { "epoch": 1.9037341738340723, "grad_norm": 7.265343189239502, "learning_rate": 9.80409179424447e-07, "loss": 0.0394, "step": 178180 }, { "epoch": 1.903841017148352, "grad_norm": 0.023226812481880188, "learning_rate": 9.804045223114367e-07, "loss": 0.0086, "step": 178190 }, { "epoch": 1.9039478604626314, "grad_norm": 0.04451778903603554, "learning_rate": 9.803998646560136e-07, "loss": 0.0201, "step": 178200 }, { "epoch": 1.904054703776911, "grad_norm": 0.4326578378677368, "learning_rate": 9.80395206458183e-07, "loss": 0.0513, "step": 178210 }, { "epoch": 1.9041615470911908, "grad_norm": 0.02073531784117222, "learning_rate": 9.803905477179497e-07, "loss": 0.017, "step": 178220 }, { "epoch": 1.9042683904054702, "grad_norm": 0.06407675892114639, "learning_rate": 9.80385888435319e-07, "loss": 0.0333, "step": 178230 }, { "epoch": 1.9043752337197501, "grad_norm": 0.09577235579490662, "learning_rate": 9.803812286102968e-07, "loss": 0.021, "step": 178240 }, { "epoch": 1.9044820770340296, "grad_norm": 0.456792414188385, "learning_rate": 9.803765682428876e-07, "loss": 0.0148, "step": 178250 }, { "epoch": 1.904588920348309, "grad_norm": 0.7899031639099121, "learning_rate": 9.803719073330972e-07, "loss": 0.027, "step": 178260 }, { "epoch": 1.904695763662589, "grad_norm": 0.035694658756256104, "learning_rate": 9.803672458809306e-07, "loss": 0.0205, "step": 178270 }, { "epoch": 1.9048026069768684, "grad_norm": 1.4126254320144653, "learning_rate": 9.803625838863932e-07, "loss": 0.0339, "step": 178280 }, { "epoch": 1.9049094502911479, "grad_norm": 3.0478110313415527, "learning_rate": 9.803579213494901e-07, "loss": 0.007, "step": 178290 }, { "epoch": 1.9050162936054278, "grad_norm": 0.010269139893352985, "learning_rate": 9.803532582702268e-07, "loss": 0.0111, "step": 178300 }, { "epoch": 1.9051231369197072, "grad_norm": 2.8703839778900146, "learning_rate": 9.803485946486083e-07, "loss": 0.0237, "step": 178310 }, { "epoch": 1.9052299802339867, "grad_norm": 1.249793529510498, "learning_rate": 9.8034393048464e-07, "loss": 0.0203, "step": 178320 }, { "epoch": 1.9053368235482666, "grad_norm": 0.10553381592035294, "learning_rate": 9.803392657783271e-07, "loss": 0.0198, "step": 178330 }, { "epoch": 1.905443666862546, "grad_norm": 0.42793723940849304, "learning_rate": 9.803346005296748e-07, "loss": 0.0153, "step": 178340 }, { "epoch": 1.9055505101768255, "grad_norm": 6.608642101287842, "learning_rate": 9.803299347386887e-07, "loss": 0.0248, "step": 178350 }, { "epoch": 1.9056573534911054, "grad_norm": 8.098841667175293, "learning_rate": 9.803252684053738e-07, "loss": 0.077, "step": 178360 }, { "epoch": 1.905764196805385, "grad_norm": 1.4871827363967896, "learning_rate": 9.803206015297353e-07, "loss": 0.0353, "step": 178370 }, { "epoch": 1.9058710401196644, "grad_norm": 0.634282112121582, "learning_rate": 9.803159341117789e-07, "loss": 0.0226, "step": 178380 }, { "epoch": 1.9059778834339443, "grad_norm": 7.071601390838623, "learning_rate": 9.803112661515092e-07, "loss": 0.0266, "step": 178390 }, { "epoch": 1.9060847267482237, "grad_norm": 5.561991214752197, "learning_rate": 9.80306597648932e-07, "loss": 0.0257, "step": 178400 }, { "epoch": 1.9061915700625034, "grad_norm": 6.318807125091553, "learning_rate": 9.803019286040524e-07, "loss": 0.018, "step": 178410 }, { "epoch": 1.906298413376783, "grad_norm": 10.26376724243164, "learning_rate": 9.802972590168757e-07, "loss": 0.1023, "step": 178420 }, { "epoch": 1.9064052566910625, "grad_norm": 0.04901932179927826, "learning_rate": 9.80292588887407e-07, "loss": 0.0144, "step": 178430 }, { "epoch": 1.9065121000053422, "grad_norm": 0.9904612898826599, "learning_rate": 9.802879182156518e-07, "loss": 0.0295, "step": 178440 }, { "epoch": 1.906618943319622, "grad_norm": 0.1190963014960289, "learning_rate": 9.802832470016154e-07, "loss": 0.0248, "step": 178450 }, { "epoch": 1.9067257866339014, "grad_norm": 2.602804183959961, "learning_rate": 9.80278575245303e-07, "loss": 0.0214, "step": 178460 }, { "epoch": 1.906832629948181, "grad_norm": 0.09842623770236969, "learning_rate": 9.802739029467196e-07, "loss": 0.0263, "step": 178470 }, { "epoch": 1.9069394732624607, "grad_norm": 2.144639253616333, "learning_rate": 9.802692301058708e-07, "loss": 0.0437, "step": 178480 }, { "epoch": 1.9070463165767402, "grad_norm": 0.006114616058766842, "learning_rate": 9.80264556722762e-07, "loss": 0.0385, "step": 178490 }, { "epoch": 1.9071531598910199, "grad_norm": 0.470126211643219, "learning_rate": 9.802598827973978e-07, "loss": 0.0273, "step": 178500 }, { "epoch": 1.9072600032052995, "grad_norm": 0.1921081691980362, "learning_rate": 9.802552083297843e-07, "loss": 0.0145, "step": 178510 }, { "epoch": 1.907366846519579, "grad_norm": 0.4133608937263489, "learning_rate": 9.802505333199264e-07, "loss": 0.1209, "step": 178520 }, { "epoch": 1.9074736898338587, "grad_norm": 0.02361193858087063, "learning_rate": 9.802458577678294e-07, "loss": 0.0325, "step": 178530 }, { "epoch": 1.9075805331481384, "grad_norm": 2.646946907043457, "learning_rate": 9.802411816734985e-07, "loss": 0.0119, "step": 178540 }, { "epoch": 1.9076873764624178, "grad_norm": 0.3565765619277954, "learning_rate": 9.80236505036939e-07, "loss": 0.031, "step": 178550 }, { "epoch": 1.9077942197766975, "grad_norm": 0.19841009378433228, "learning_rate": 9.802318278581565e-07, "loss": 0.0865, "step": 178560 }, { "epoch": 1.9079010630909772, "grad_norm": 9.20234203338623, "learning_rate": 9.802271501371558e-07, "loss": 0.0893, "step": 178570 }, { "epoch": 1.9080079064052566, "grad_norm": 4.91666841506958, "learning_rate": 9.802224718739424e-07, "loss": 0.0122, "step": 178580 }, { "epoch": 1.9081147497195363, "grad_norm": 2.337329149246216, "learning_rate": 9.802177930685218e-07, "loss": 0.0465, "step": 178590 }, { "epoch": 1.908221593033816, "grad_norm": 0.059747930616140366, "learning_rate": 9.80213113720899e-07, "loss": 0.0117, "step": 178600 }, { "epoch": 1.9083284363480955, "grad_norm": 2.793179512023926, "learning_rate": 9.80208433831079e-07, "loss": 0.0301, "step": 178610 }, { "epoch": 1.9084352796623751, "grad_norm": 0.08405516296625137, "learning_rate": 9.802037533990676e-07, "loss": 0.0365, "step": 178620 }, { "epoch": 1.9085421229766548, "grad_norm": 0.01711844466626644, "learning_rate": 9.801990724248702e-07, "loss": 0.0191, "step": 178630 }, { "epoch": 1.9086489662909343, "grad_norm": 1.046450138092041, "learning_rate": 9.801943909084917e-07, "loss": 0.0032, "step": 178640 }, { "epoch": 1.908755809605214, "grad_norm": 3.929492712020874, "learning_rate": 9.801897088499373e-07, "loss": 0.0275, "step": 178650 }, { "epoch": 1.9088626529194936, "grad_norm": 2.4784011840820312, "learning_rate": 9.801850262492126e-07, "loss": 0.0103, "step": 178660 }, { "epoch": 1.908969496233773, "grad_norm": 0.12027381360530853, "learning_rate": 9.801803431063229e-07, "loss": 0.0834, "step": 178670 }, { "epoch": 1.9090763395480528, "grad_norm": 4.54191780090332, "learning_rate": 9.801756594212732e-07, "loss": 0.036, "step": 178680 }, { "epoch": 1.9091831828623325, "grad_norm": 6.276669025421143, "learning_rate": 9.801709751940688e-07, "loss": 0.0481, "step": 178690 }, { "epoch": 1.909290026176612, "grad_norm": 4.838289737701416, "learning_rate": 9.801662904247155e-07, "loss": 0.0405, "step": 178700 }, { "epoch": 1.9093968694908916, "grad_norm": 3.5719540119171143, "learning_rate": 9.80161605113218e-07, "loss": 0.0447, "step": 178710 }, { "epoch": 1.9095037128051713, "grad_norm": 0.10741288959980011, "learning_rate": 9.801569192595816e-07, "loss": 0.0173, "step": 178720 }, { "epoch": 1.9096105561194507, "grad_norm": 0.013168890960514545, "learning_rate": 9.801522328638121e-07, "loss": 0.0098, "step": 178730 }, { "epoch": 1.9097173994337304, "grad_norm": 13.46070384979248, "learning_rate": 9.801475459259143e-07, "loss": 0.0798, "step": 178740 }, { "epoch": 1.90982424274801, "grad_norm": 0.07305633276700974, "learning_rate": 9.801428584458938e-07, "loss": 0.0285, "step": 178750 }, { "epoch": 1.9099310860622896, "grad_norm": 1.8213691711425781, "learning_rate": 9.801381704237559e-07, "loss": 0.0796, "step": 178760 }, { "epoch": 1.9100379293765692, "grad_norm": 12.300251960754395, "learning_rate": 9.801334818595055e-07, "loss": 0.0342, "step": 178770 }, { "epoch": 1.910144772690849, "grad_norm": 0.10807903856039047, "learning_rate": 9.801287927531482e-07, "loss": 0.0359, "step": 178780 }, { "epoch": 1.9102516160051284, "grad_norm": 1.2750575542449951, "learning_rate": 9.801241031046895e-07, "loss": 0.0182, "step": 178790 }, { "epoch": 1.910358459319408, "grad_norm": 0.4515354037284851, "learning_rate": 9.801194129141343e-07, "loss": 0.0961, "step": 178800 }, { "epoch": 1.9104653026336877, "grad_norm": 9.444212913513184, "learning_rate": 9.80114722181488e-07, "loss": 0.0376, "step": 178810 }, { "epoch": 1.9105721459479672, "grad_norm": 2.094388246536255, "learning_rate": 9.80110030906756e-07, "loss": 0.0471, "step": 178820 }, { "epoch": 1.9106789892622469, "grad_norm": 1.7278180122375488, "learning_rate": 9.801053390899434e-07, "loss": 0.021, "step": 178830 }, { "epoch": 1.9107858325765266, "grad_norm": 3.3173062801361084, "learning_rate": 9.801006467310559e-07, "loss": 0.037, "step": 178840 }, { "epoch": 1.910892675890806, "grad_norm": 3.48034930229187, "learning_rate": 9.800959538300984e-07, "loss": 0.0057, "step": 178850 }, { "epoch": 1.9109995192050857, "grad_norm": 5.52214241027832, "learning_rate": 9.800912603870763e-07, "loss": 0.0086, "step": 178860 }, { "epoch": 1.9111063625193654, "grad_norm": 0.3029892146587372, "learning_rate": 9.80086566401995e-07, "loss": 0.0299, "step": 178870 }, { "epoch": 1.9112132058336448, "grad_norm": 0.2439597249031067, "learning_rate": 9.800818718748596e-07, "loss": 0.0205, "step": 178880 }, { "epoch": 1.9113200491479245, "grad_norm": 20.517255783081055, "learning_rate": 9.800771768056757e-07, "loss": 0.1382, "step": 178890 }, { "epoch": 1.9114268924622042, "grad_norm": 2.651170253753662, "learning_rate": 9.800724811944482e-07, "loss": 0.0121, "step": 178900 }, { "epoch": 1.9115337357764837, "grad_norm": 6.961584568023682, "learning_rate": 9.800677850411828e-07, "loss": 0.0406, "step": 178910 }, { "epoch": 1.9116405790907633, "grad_norm": 0.013874162919819355, "learning_rate": 9.800630883458847e-07, "loss": 0.0272, "step": 178920 }, { "epoch": 1.911747422405043, "grad_norm": 3.763488531112671, "learning_rate": 9.800583911085592e-07, "loss": 0.0075, "step": 178930 }, { "epoch": 1.9118542657193225, "grad_norm": 1.3502109050750732, "learning_rate": 9.800536933292114e-07, "loss": 0.0115, "step": 178940 }, { "epoch": 1.9119611090336022, "grad_norm": 14.340272903442383, "learning_rate": 9.800489950078468e-07, "loss": 0.0662, "step": 178950 }, { "epoch": 1.9120679523478818, "grad_norm": 0.3285698890686035, "learning_rate": 9.800442961444707e-07, "loss": 0.0145, "step": 178960 }, { "epoch": 1.9121747956621613, "grad_norm": 0.06896647810935974, "learning_rate": 9.800395967390881e-07, "loss": 0.0116, "step": 178970 }, { "epoch": 1.9122816389764412, "grad_norm": 3.1723406314849854, "learning_rate": 9.800348967917046e-07, "loss": 0.0201, "step": 178980 }, { "epoch": 1.9123884822907207, "grad_norm": 8.077505111694336, "learning_rate": 9.800301963023257e-07, "loss": 0.0413, "step": 178990 }, { "epoch": 1.9124953256050001, "grad_norm": 3.355325222015381, "learning_rate": 9.800254952709565e-07, "loss": 0.0142, "step": 179000 }, { "epoch": 1.91260216891928, "grad_norm": 8.718002319335938, "learning_rate": 9.800207936976022e-07, "loss": 0.0514, "step": 179010 }, { "epoch": 1.9127090122335595, "grad_norm": 4.974618434906006, "learning_rate": 9.800160915822682e-07, "loss": 0.038, "step": 179020 }, { "epoch": 1.912815855547839, "grad_norm": 4.322451114654541, "learning_rate": 9.800113889249597e-07, "loss": 0.0394, "step": 179030 }, { "epoch": 1.9129226988621189, "grad_norm": 0.954535961151123, "learning_rate": 9.80006685725682e-07, "loss": 0.0098, "step": 179040 }, { "epoch": 1.9130295421763983, "grad_norm": 0.2952650487422943, "learning_rate": 9.80001981984441e-07, "loss": 0.056, "step": 179050 }, { "epoch": 1.9131363854906778, "grad_norm": 0.3723652958869934, "learning_rate": 9.799972777012411e-07, "loss": 0.0296, "step": 179060 }, { "epoch": 1.9132432288049577, "grad_norm": 8.590665817260742, "learning_rate": 9.799925728760882e-07, "loss": 0.0941, "step": 179070 }, { "epoch": 1.9133500721192371, "grad_norm": 0.10004261881113052, "learning_rate": 9.799878675089875e-07, "loss": 0.0284, "step": 179080 }, { "epoch": 1.9134569154335166, "grad_norm": 2.3502895832061768, "learning_rate": 9.799831615999443e-07, "loss": 0.0436, "step": 179090 }, { "epoch": 1.9135637587477965, "grad_norm": 2.8419854640960693, "learning_rate": 9.79978455148964e-07, "loss": 0.0206, "step": 179100 }, { "epoch": 1.913670602062076, "grad_norm": 0.012345321476459503, "learning_rate": 9.799737481560514e-07, "loss": 0.0849, "step": 179110 }, { "epoch": 1.9137774453763554, "grad_norm": 0.10407187044620514, "learning_rate": 9.799690406212123e-07, "loss": 0.0209, "step": 179120 }, { "epoch": 1.9138842886906353, "grad_norm": 0.16569659113883972, "learning_rate": 9.799643325444522e-07, "loss": 0.0218, "step": 179130 }, { "epoch": 1.9139911320049148, "grad_norm": 0.9022395610809326, "learning_rate": 9.79959623925776e-07, "loss": 0.0731, "step": 179140 }, { "epoch": 1.9140979753191945, "grad_norm": 2.3696446418762207, "learning_rate": 9.79954914765189e-07, "loss": 0.013, "step": 179150 }, { "epoch": 1.9142048186334741, "grad_norm": 1.5246949195861816, "learning_rate": 9.79950205062697e-07, "loss": 0.0147, "step": 179160 }, { "epoch": 1.9143116619477536, "grad_norm": 0.044441502541303635, "learning_rate": 9.799454948183048e-07, "loss": 0.0406, "step": 179170 }, { "epoch": 1.9144185052620333, "grad_norm": 0.003821462159976363, "learning_rate": 9.79940784032018e-07, "loss": 0.0466, "step": 179180 }, { "epoch": 1.914525348576313, "grad_norm": 0.2138514518737793, "learning_rate": 9.799360727038417e-07, "loss": 0.0105, "step": 179190 }, { "epoch": 1.9146321918905924, "grad_norm": 0.44008776545524597, "learning_rate": 9.799313608337815e-07, "loss": 0.0148, "step": 179200 }, { "epoch": 1.914739035204872, "grad_norm": 2.0082154273986816, "learning_rate": 9.799266484218426e-07, "loss": 0.0066, "step": 179210 }, { "epoch": 1.9148458785191518, "grad_norm": 0.03568306937813759, "learning_rate": 9.799219354680302e-07, "loss": 0.0311, "step": 179220 }, { "epoch": 1.9149527218334312, "grad_norm": 0.862224280834198, "learning_rate": 9.799172219723496e-07, "loss": 0.0288, "step": 179230 }, { "epoch": 1.915059565147711, "grad_norm": 0.0592113621532917, "learning_rate": 9.799125079348062e-07, "loss": 0.0637, "step": 179240 }, { "epoch": 1.9151664084619906, "grad_norm": 6.092274188995361, "learning_rate": 9.799077933554056e-07, "loss": 0.0386, "step": 179250 }, { "epoch": 1.91527325177627, "grad_norm": 1.2274205684661865, "learning_rate": 9.799030782341528e-07, "loss": 0.0322, "step": 179260 }, { "epoch": 1.9153800950905497, "grad_norm": 5.615142345428467, "learning_rate": 9.798983625710532e-07, "loss": 0.0415, "step": 179270 }, { "epoch": 1.9154869384048294, "grad_norm": 0.07672328501939774, "learning_rate": 9.79893646366112e-07, "loss": 0.0412, "step": 179280 }, { "epoch": 1.9155937817191089, "grad_norm": 6.704185485839844, "learning_rate": 9.79888929619335e-07, "loss": 0.0265, "step": 179290 }, { "epoch": 1.9157006250333886, "grad_norm": 0.04428543150424957, "learning_rate": 9.798842123307268e-07, "loss": 0.0171, "step": 179300 }, { "epoch": 1.9158074683476682, "grad_norm": 0.003848371095955372, "learning_rate": 9.798794945002933e-07, "loss": 0.0418, "step": 179310 }, { "epoch": 1.9159143116619477, "grad_norm": 0.012361797504127026, "learning_rate": 9.798747761280395e-07, "loss": 0.029, "step": 179320 }, { "epoch": 1.9160211549762274, "grad_norm": 1.7392772436141968, "learning_rate": 9.798700572139712e-07, "loss": 0.0205, "step": 179330 }, { "epoch": 1.916127998290507, "grad_norm": 0.10695814341306686, "learning_rate": 9.79865337758093e-07, "loss": 0.0674, "step": 179340 }, { "epoch": 1.9162348416047865, "grad_norm": 16.428380966186523, "learning_rate": 9.798606177604108e-07, "loss": 0.0621, "step": 179350 }, { "epoch": 1.9163416849190662, "grad_norm": 9.60301685333252, "learning_rate": 9.798558972209296e-07, "loss": 0.0094, "step": 179360 }, { "epoch": 1.9164485282333459, "grad_norm": 2.089810848236084, "learning_rate": 9.79851176139655e-07, "loss": 0.0221, "step": 179370 }, { "epoch": 1.9165553715476253, "grad_norm": 2.355776309967041, "learning_rate": 9.798464545165921e-07, "loss": 0.0553, "step": 179380 }, { "epoch": 1.916662214861905, "grad_norm": 10.725242614746094, "learning_rate": 9.798417323517464e-07, "loss": 0.0275, "step": 179390 }, { "epoch": 1.9167690581761847, "grad_norm": 1.0064845085144043, "learning_rate": 9.798370096451232e-07, "loss": 0.032, "step": 179400 }, { "epoch": 1.9168759014904642, "grad_norm": 0.00222361390478909, "learning_rate": 9.79832286396728e-07, "loss": 0.0156, "step": 179410 }, { "epoch": 1.9169827448047438, "grad_norm": 0.13356299698352814, "learning_rate": 9.798275626065655e-07, "loss": 0.0064, "step": 179420 }, { "epoch": 1.9170895881190235, "grad_norm": 0.03265319764614105, "learning_rate": 9.798228382746417e-07, "loss": 0.0399, "step": 179430 }, { "epoch": 1.917196431433303, "grad_norm": 3.0717854499816895, "learning_rate": 9.798181134009618e-07, "loss": 0.0258, "step": 179440 }, { "epoch": 1.9173032747475827, "grad_norm": 0.008997687138617039, "learning_rate": 9.798133879855308e-07, "loss": 0.0083, "step": 179450 }, { "epoch": 1.9174101180618623, "grad_norm": 0.8491865992546082, "learning_rate": 9.798086620283546e-07, "loss": 0.0283, "step": 179460 }, { "epoch": 1.9175169613761418, "grad_norm": 0.48125094175338745, "learning_rate": 9.798039355294378e-07, "loss": 0.0078, "step": 179470 }, { "epoch": 1.9176238046904215, "grad_norm": 0.4043615758419037, "learning_rate": 9.797992084887865e-07, "loss": 0.0205, "step": 179480 }, { "epoch": 1.9177306480047012, "grad_norm": 0.9067031145095825, "learning_rate": 9.797944809064055e-07, "loss": 0.0136, "step": 179490 }, { "epoch": 1.9178374913189806, "grad_norm": 10.797691345214844, "learning_rate": 9.797897527823002e-07, "loss": 0.025, "step": 179500 }, { "epoch": 1.9179443346332603, "grad_norm": 20.008249282836914, "learning_rate": 9.797850241164764e-07, "loss": 0.0419, "step": 179510 }, { "epoch": 1.91805117794754, "grad_norm": 0.05228433758020401, "learning_rate": 9.797802949089388e-07, "loss": 0.0239, "step": 179520 }, { "epoch": 1.9181580212618194, "grad_norm": 0.01576414704322815, "learning_rate": 9.797755651596931e-07, "loss": 0.0221, "step": 179530 }, { "epoch": 1.9182648645760991, "grad_norm": 1.9050947427749634, "learning_rate": 9.797708348687447e-07, "loss": 0.0093, "step": 179540 }, { "epoch": 1.9183717078903788, "grad_norm": 0.06242698058485985, "learning_rate": 9.797661040360986e-07, "loss": 0.0035, "step": 179550 }, { "epoch": 1.9184785512046583, "grad_norm": 2.203843593597412, "learning_rate": 9.797613726617606e-07, "loss": 0.0279, "step": 179560 }, { "epoch": 1.918585394518938, "grad_norm": 11.27843952178955, "learning_rate": 9.797566407457358e-07, "loss": 0.0341, "step": 179570 }, { "epoch": 1.9186922378332176, "grad_norm": 2.8362174034118652, "learning_rate": 9.797519082880295e-07, "loss": 0.0395, "step": 179580 }, { "epoch": 1.918799081147497, "grad_norm": 0.23119916021823883, "learning_rate": 9.797471752886468e-07, "loss": 0.03, "step": 179590 }, { "epoch": 1.9189059244617768, "grad_norm": 1.355103611946106, "learning_rate": 9.797424417475936e-07, "loss": 0.0076, "step": 179600 }, { "epoch": 1.9190127677760564, "grad_norm": 2.2781944274902344, "learning_rate": 9.79737707664875e-07, "loss": 0.0116, "step": 179610 }, { "epoch": 1.919119611090336, "grad_norm": 1.3931117057800293, "learning_rate": 9.797329730404962e-07, "loss": 0.0278, "step": 179620 }, { "epoch": 1.9192264544046156, "grad_norm": 0.047012921422719955, "learning_rate": 9.797282378744628e-07, "loss": 0.0209, "step": 179630 }, { "epoch": 1.9193332977188953, "grad_norm": 0.17140048742294312, "learning_rate": 9.7972350216678e-07, "loss": 0.0195, "step": 179640 }, { "epoch": 1.9194401410331747, "grad_norm": 0.5273982882499695, "learning_rate": 9.79718765917453e-07, "loss": 0.0396, "step": 179650 }, { "epoch": 1.9195469843474544, "grad_norm": 3.469630002975464, "learning_rate": 9.797140291264873e-07, "loss": 0.0306, "step": 179660 }, { "epoch": 1.919653827661734, "grad_norm": 0.47768422961235046, "learning_rate": 9.797092917938885e-07, "loss": 0.0432, "step": 179670 }, { "epoch": 1.9197606709760136, "grad_norm": 1.4417381286621094, "learning_rate": 9.797045539196616e-07, "loss": 0.0169, "step": 179680 }, { "epoch": 1.9198675142902932, "grad_norm": 1.157317876815796, "learning_rate": 9.796998155038118e-07, "loss": 0.0224, "step": 179690 }, { "epoch": 1.919974357604573, "grad_norm": 2.241089344024658, "learning_rate": 9.79695076546345e-07, "loss": 0.0472, "step": 179700 }, { "epoch": 1.9200812009188524, "grad_norm": 0.053965672850608826, "learning_rate": 9.796903370472662e-07, "loss": 0.0231, "step": 179710 }, { "epoch": 1.9201880442331323, "grad_norm": 1.0343401432037354, "learning_rate": 9.796855970065807e-07, "loss": 0.0136, "step": 179720 }, { "epoch": 1.9202948875474117, "grad_norm": 5.32097053527832, "learning_rate": 9.796808564242942e-07, "loss": 0.0217, "step": 179730 }, { "epoch": 1.9204017308616912, "grad_norm": 0.905552327632904, "learning_rate": 9.796761153004117e-07, "loss": 0.0165, "step": 179740 }, { "epoch": 1.920508574175971, "grad_norm": 5.730951309204102, "learning_rate": 9.796713736349384e-07, "loss": 0.0669, "step": 179750 }, { "epoch": 1.9206154174902506, "grad_norm": 0.37075212597846985, "learning_rate": 9.796666314278803e-07, "loss": 0.0123, "step": 179760 }, { "epoch": 1.92072226080453, "grad_norm": 0.004054976161569357, "learning_rate": 9.79661888679242e-07, "loss": 0.0191, "step": 179770 }, { "epoch": 1.92082910411881, "grad_norm": 15.2412691116333, "learning_rate": 9.796571453890295e-07, "loss": 0.0952, "step": 179780 }, { "epoch": 1.9209359474330894, "grad_norm": 10.369866371154785, "learning_rate": 9.796524015572479e-07, "loss": 0.0203, "step": 179790 }, { "epoch": 1.9210427907473688, "grad_norm": 5.6731438636779785, "learning_rate": 9.796476571839024e-07, "loss": 0.0356, "step": 179800 }, { "epoch": 1.9211496340616487, "grad_norm": 3.787245988845825, "learning_rate": 9.796429122689985e-07, "loss": 0.0301, "step": 179810 }, { "epoch": 1.9212564773759282, "grad_norm": 2.5179905891418457, "learning_rate": 9.796381668125416e-07, "loss": 0.0167, "step": 179820 }, { "epoch": 1.9213633206902077, "grad_norm": 6.5943684577941895, "learning_rate": 9.79633420814537e-07, "loss": 0.0121, "step": 179830 }, { "epoch": 1.9214701640044876, "grad_norm": 2.3894214630126953, "learning_rate": 9.7962867427499e-07, "loss": 0.0202, "step": 179840 }, { "epoch": 1.921577007318767, "grad_norm": 3.1153507232666016, "learning_rate": 9.796239271939062e-07, "loss": 0.0314, "step": 179850 }, { "epoch": 1.9216838506330465, "grad_norm": 4.777751445770264, "learning_rate": 9.796191795712906e-07, "loss": 0.0164, "step": 179860 }, { "epoch": 1.9217906939473264, "grad_norm": 0.19781917333602905, "learning_rate": 9.796144314071489e-07, "loss": 0.0264, "step": 179870 }, { "epoch": 1.9218975372616058, "grad_norm": 2.4889910221099854, "learning_rate": 9.79609682701486e-07, "loss": 0.0707, "step": 179880 }, { "epoch": 1.9220043805758855, "grad_norm": 0.7640941143035889, "learning_rate": 9.79604933454308e-07, "loss": 0.0375, "step": 179890 }, { "epoch": 1.9221112238901652, "grad_norm": 2.0217549800872803, "learning_rate": 9.796001836656195e-07, "loss": 0.0327, "step": 179900 }, { "epoch": 1.9222180672044447, "grad_norm": 0.0015421966090798378, "learning_rate": 9.795954333354265e-07, "loss": 0.0266, "step": 179910 }, { "epoch": 1.9223249105187243, "grad_norm": 8.45625114440918, "learning_rate": 9.795906824637338e-07, "loss": 0.0304, "step": 179920 }, { "epoch": 1.922431753833004, "grad_norm": 2.9982175827026367, "learning_rate": 9.795859310505471e-07, "loss": 0.0171, "step": 179930 }, { "epoch": 1.9225385971472835, "grad_norm": 1.0418288707733154, "learning_rate": 9.795811790958718e-07, "loss": 0.0045, "step": 179940 }, { "epoch": 1.9226454404615632, "grad_norm": 1.5026044845581055, "learning_rate": 9.79576426599713e-07, "loss": 0.0223, "step": 179950 }, { "epoch": 1.9227522837758428, "grad_norm": 0.37563925981521606, "learning_rate": 9.795716735620762e-07, "loss": 0.0729, "step": 179960 }, { "epoch": 1.9228591270901223, "grad_norm": 2.8802921772003174, "learning_rate": 9.79566919982967e-07, "loss": 0.0367, "step": 179970 }, { "epoch": 1.922965970404402, "grad_norm": 0.0479019433259964, "learning_rate": 9.795621658623903e-07, "loss": 0.0116, "step": 179980 }, { "epoch": 1.9230728137186817, "grad_norm": 0.014992473646998405, "learning_rate": 9.79557411200352e-07, "loss": 0.0413, "step": 179990 }, { "epoch": 1.9231796570329611, "grad_norm": 0.0729585587978363, "learning_rate": 9.79552655996857e-07, "loss": 0.0102, "step": 180000 }, { "epoch": 1.9232865003472408, "grad_norm": 3.262420415878296, "learning_rate": 9.79547900251911e-07, "loss": 0.0257, "step": 180010 }, { "epoch": 1.9233933436615205, "grad_norm": 2.202955484390259, "learning_rate": 9.79543143965519e-07, "loss": 0.0559, "step": 180020 }, { "epoch": 1.9235001869758, "grad_norm": 0.11271756142377853, "learning_rate": 9.795383871376866e-07, "loss": 0.0249, "step": 180030 }, { "epoch": 1.9236070302900796, "grad_norm": 5.243514060974121, "learning_rate": 9.795336297684194e-07, "loss": 0.0576, "step": 180040 }, { "epoch": 1.9237138736043593, "grad_norm": 0.050614673644304276, "learning_rate": 9.795288718577224e-07, "loss": 0.1121, "step": 180050 }, { "epoch": 1.9238207169186388, "grad_norm": 3.2315866947174072, "learning_rate": 9.795241134056011e-07, "loss": 0.0272, "step": 180060 }, { "epoch": 1.9239275602329184, "grad_norm": 2.144592523574829, "learning_rate": 9.79519354412061e-07, "loss": 0.0256, "step": 180070 }, { "epoch": 1.9240344035471981, "grad_norm": 0.09650322794914246, "learning_rate": 9.795145948771072e-07, "loss": 0.0747, "step": 180080 }, { "epoch": 1.9241412468614776, "grad_norm": 0.06341439485549927, "learning_rate": 9.795098348007453e-07, "loss": 0.0456, "step": 180090 }, { "epoch": 1.9242480901757573, "grad_norm": 0.041830215603113174, "learning_rate": 9.795050741829806e-07, "loss": 0.0404, "step": 180100 }, { "epoch": 1.924354933490037, "grad_norm": 4.620474338531494, "learning_rate": 9.795003130238184e-07, "loss": 0.0196, "step": 180110 }, { "epoch": 1.9244617768043164, "grad_norm": 4.70831823348999, "learning_rate": 9.794955513232644e-07, "loss": 0.024, "step": 180120 }, { "epoch": 1.924568620118596, "grad_norm": 16.656105041503906, "learning_rate": 9.794907890813235e-07, "loss": 0.0515, "step": 180130 }, { "epoch": 1.9246754634328758, "grad_norm": 0.030459381639957428, "learning_rate": 9.794860262980015e-07, "loss": 0.0567, "step": 180140 }, { "epoch": 1.9247823067471552, "grad_norm": 5.39786434173584, "learning_rate": 9.794812629733034e-07, "loss": 0.0526, "step": 180150 }, { "epoch": 1.924889150061435, "grad_norm": 5.73473596572876, "learning_rate": 9.794764991072349e-07, "loss": 0.0756, "step": 180160 }, { "epoch": 1.9249959933757146, "grad_norm": 0.8399189710617065, "learning_rate": 9.794717346998011e-07, "loss": 0.0484, "step": 180170 }, { "epoch": 1.925102836689994, "grad_norm": 10.283381462097168, "learning_rate": 9.794669697510076e-07, "loss": 0.0447, "step": 180180 }, { "epoch": 1.9252096800042737, "grad_norm": 0.4395160675048828, "learning_rate": 9.794622042608597e-07, "loss": 0.0487, "step": 180190 }, { "epoch": 1.9253165233185534, "grad_norm": 0.018474187701940536, "learning_rate": 9.794574382293628e-07, "loss": 0.0384, "step": 180200 }, { "epoch": 1.9254233666328329, "grad_norm": 0.46534353494644165, "learning_rate": 9.794526716565224e-07, "loss": 0.0214, "step": 180210 }, { "epoch": 1.9255302099471125, "grad_norm": 0.8270784020423889, "learning_rate": 9.794479045423436e-07, "loss": 0.0139, "step": 180220 }, { "epoch": 1.9256370532613922, "grad_norm": 9.931732177734375, "learning_rate": 9.79443136886832e-07, "loss": 0.079, "step": 180230 }, { "epoch": 1.9257438965756717, "grad_norm": 0.4574676752090454, "learning_rate": 9.794383686899928e-07, "loss": 0.0265, "step": 180240 }, { "epoch": 1.9258507398899514, "grad_norm": 3.4673945903778076, "learning_rate": 9.794335999518316e-07, "loss": 0.0184, "step": 180250 }, { "epoch": 1.925957583204231, "grad_norm": 0.08274030685424805, "learning_rate": 9.794288306723534e-07, "loss": 0.0054, "step": 180260 }, { "epoch": 1.9260644265185105, "grad_norm": 3.9522743225097656, "learning_rate": 9.794240608515643e-07, "loss": 0.0426, "step": 180270 }, { "epoch": 1.9261712698327902, "grad_norm": 0.074343241751194, "learning_rate": 9.79419290489469e-07, "loss": 0.0151, "step": 180280 }, { "epoch": 1.9262781131470699, "grad_norm": 0.015988726168870926, "learning_rate": 9.794145195860733e-07, "loss": 0.008, "step": 180290 }, { "epoch": 1.9263849564613493, "grad_norm": 9.521437644958496, "learning_rate": 9.794097481413822e-07, "loss": 0.0183, "step": 180300 }, { "epoch": 1.926491799775629, "grad_norm": 0.8129870891571045, "learning_rate": 9.794049761554015e-07, "loss": 0.0458, "step": 180310 }, { "epoch": 1.9265986430899087, "grad_norm": 4.965837001800537, "learning_rate": 9.794002036281362e-07, "loss": 0.0624, "step": 180320 }, { "epoch": 1.9267054864041882, "grad_norm": 0.14830125868320465, "learning_rate": 9.793954305595921e-07, "loss": 0.0443, "step": 180330 }, { "epoch": 1.9268123297184678, "grad_norm": 0.06634840369224548, "learning_rate": 9.793906569497743e-07, "loss": 0.0033, "step": 180340 }, { "epoch": 1.9269191730327475, "grad_norm": 0.0747414380311966, "learning_rate": 9.793858827986883e-07, "loss": 0.0582, "step": 180350 }, { "epoch": 1.927026016347027, "grad_norm": 0.015397300943732262, "learning_rate": 9.793811081063394e-07, "loss": 0.0288, "step": 180360 }, { "epoch": 1.9271328596613067, "grad_norm": 0.9735340476036072, "learning_rate": 9.79376332872733e-07, "loss": 0.0285, "step": 180370 }, { "epoch": 1.9272397029755863, "grad_norm": 10.34206485748291, "learning_rate": 9.793715570978746e-07, "loss": 0.0308, "step": 180380 }, { "epoch": 1.9273465462898658, "grad_norm": 6.143771648406982, "learning_rate": 9.793667807817696e-07, "loss": 0.0566, "step": 180390 }, { "epoch": 1.9274533896041455, "grad_norm": 20.414827346801758, "learning_rate": 9.793620039244232e-07, "loss": 0.0757, "step": 180400 }, { "epoch": 1.9275602329184252, "grad_norm": 0.12403057515621185, "learning_rate": 9.79357226525841e-07, "loss": 0.0263, "step": 180410 }, { "epoch": 1.9276670762327046, "grad_norm": 0.07851968705654144, "learning_rate": 9.793524485860282e-07, "loss": 0.0252, "step": 180420 }, { "epoch": 1.9277739195469843, "grad_norm": 0.19560237228870392, "learning_rate": 9.793476701049906e-07, "loss": 0.0061, "step": 180430 }, { "epoch": 1.927880762861264, "grad_norm": 0.17250770330429077, "learning_rate": 9.79342891082733e-07, "loss": 0.0109, "step": 180440 }, { "epoch": 1.9279876061755434, "grad_norm": 0.38197198510169983, "learning_rate": 9.793381115192612e-07, "loss": 0.0278, "step": 180450 }, { "epoch": 1.9280944494898233, "grad_norm": 0.8789708018302917, "learning_rate": 9.793333314145805e-07, "loss": 0.0197, "step": 180460 }, { "epoch": 1.9282012928041028, "grad_norm": 0.015450123697519302, "learning_rate": 9.793285507686963e-07, "loss": 0.0104, "step": 180470 }, { "epoch": 1.9283081361183823, "grad_norm": 7.286798477172852, "learning_rate": 9.79323769581614e-07, "loss": 0.0846, "step": 180480 }, { "epoch": 1.9284149794326622, "grad_norm": 0.1617651730775833, "learning_rate": 9.79318987853339e-07, "loss": 0.0283, "step": 180490 }, { "epoch": 1.9285218227469416, "grad_norm": 2.3911476135253906, "learning_rate": 9.793142055838766e-07, "loss": 0.0694, "step": 180500 }, { "epoch": 1.928628666061221, "grad_norm": 2.1777713298797607, "learning_rate": 9.793094227732324e-07, "loss": 0.0285, "step": 180510 }, { "epoch": 1.928735509375501, "grad_norm": 0.0712311789393425, "learning_rate": 9.793046394214117e-07, "loss": 0.0664, "step": 180520 }, { "epoch": 1.9288423526897804, "grad_norm": 1.0528720617294312, "learning_rate": 9.792998555284197e-07, "loss": 0.0359, "step": 180530 }, { "epoch": 1.92894919600406, "grad_norm": 13.455570220947266, "learning_rate": 9.792950710942622e-07, "loss": 0.0501, "step": 180540 }, { "epoch": 1.9290560393183398, "grad_norm": 0.11574968695640564, "learning_rate": 9.792902861189442e-07, "loss": 0.01, "step": 180550 }, { "epoch": 1.9291628826326193, "grad_norm": 8.72030258178711, "learning_rate": 9.792855006024715e-07, "loss": 0.0631, "step": 180560 }, { "epoch": 1.9292697259468987, "grad_norm": 5.60123872756958, "learning_rate": 9.79280714544849e-07, "loss": 0.0114, "step": 180570 }, { "epoch": 1.9293765692611786, "grad_norm": 0.00834944099187851, "learning_rate": 9.792759279460828e-07, "loss": 0.012, "step": 180580 }, { "epoch": 1.929483412575458, "grad_norm": 0.33162087202072144, "learning_rate": 9.792711408061776e-07, "loss": 0.0184, "step": 180590 }, { "epoch": 1.9295902558897375, "grad_norm": 3.0677082538604736, "learning_rate": 9.79266353125139e-07, "loss": 0.0157, "step": 180600 }, { "epoch": 1.9296970992040174, "grad_norm": 4.718439102172852, "learning_rate": 9.79261564902973e-07, "loss": 0.0526, "step": 180610 }, { "epoch": 1.929803942518297, "grad_norm": 0.023589735850691795, "learning_rate": 9.792567761396842e-07, "loss": 0.0294, "step": 180620 }, { "epoch": 1.9299107858325766, "grad_norm": 2.695225238800049, "learning_rate": 9.792519868352782e-07, "loss": 0.0194, "step": 180630 }, { "epoch": 1.9300176291468563, "grad_norm": 4.677643299102783, "learning_rate": 9.792471969897607e-07, "loss": 0.0434, "step": 180640 }, { "epoch": 1.9301244724611357, "grad_norm": 0.01059267669916153, "learning_rate": 9.79242406603137e-07, "loss": 0.0174, "step": 180650 }, { "epoch": 1.9302313157754154, "grad_norm": 5.4445481300354, "learning_rate": 9.792376156754125e-07, "loss": 0.0235, "step": 180660 }, { "epoch": 1.930338159089695, "grad_norm": 0.5688110589981079, "learning_rate": 9.792328242065923e-07, "loss": 0.0192, "step": 180670 }, { "epoch": 1.9304450024039745, "grad_norm": 6.890571117401123, "learning_rate": 9.792280321966824e-07, "loss": 0.0208, "step": 180680 }, { "epoch": 1.9305518457182542, "grad_norm": 6.757410049438477, "learning_rate": 9.792232396456877e-07, "loss": 0.041, "step": 180690 }, { "epoch": 1.930658689032534, "grad_norm": 2.5518789291381836, "learning_rate": 9.792184465536138e-07, "loss": 0.0167, "step": 180700 }, { "epoch": 1.9307655323468134, "grad_norm": 1.9272853136062622, "learning_rate": 9.792136529204659e-07, "loss": 0.0429, "step": 180710 }, { "epoch": 1.930872375661093, "grad_norm": 0.3644469380378723, "learning_rate": 9.792088587462498e-07, "loss": 0.0665, "step": 180720 }, { "epoch": 1.9309792189753727, "grad_norm": 5.097273826599121, "learning_rate": 9.79204064030971e-07, "loss": 0.0161, "step": 180730 }, { "epoch": 1.9310860622896522, "grad_norm": 5.948837757110596, "learning_rate": 9.791992687746345e-07, "loss": 0.0239, "step": 180740 }, { "epoch": 1.9311929056039319, "grad_norm": 1.4041880369186401, "learning_rate": 9.791944729772456e-07, "loss": 0.0278, "step": 180750 }, { "epoch": 1.9312997489182115, "grad_norm": 1.5758187770843506, "learning_rate": 9.791896766388101e-07, "loss": 0.0568, "step": 180760 }, { "epoch": 1.931406592232491, "grad_norm": 0.007741314359009266, "learning_rate": 9.791848797593335e-07, "loss": 0.0062, "step": 180770 }, { "epoch": 1.9315134355467707, "grad_norm": 1.396851658821106, "learning_rate": 9.791800823388208e-07, "loss": 0.013, "step": 180780 }, { "epoch": 1.9316202788610504, "grad_norm": 3.784869909286499, "learning_rate": 9.791752843772776e-07, "loss": 0.0263, "step": 180790 }, { "epoch": 1.9317271221753298, "grad_norm": 9.195988655090332, "learning_rate": 9.791704858747094e-07, "loss": 0.0212, "step": 180800 }, { "epoch": 1.9318339654896095, "grad_norm": 0.034617193043231964, "learning_rate": 9.791656868311216e-07, "loss": 0.0198, "step": 180810 }, { "epoch": 1.9319408088038892, "grad_norm": 0.633957028388977, "learning_rate": 9.791608872465194e-07, "loss": 0.0382, "step": 180820 }, { "epoch": 1.9320476521181686, "grad_norm": 0.058746177703142166, "learning_rate": 9.791560871209086e-07, "loss": 0.0276, "step": 180830 }, { "epoch": 1.9321544954324483, "grad_norm": 0.10398347675800323, "learning_rate": 9.791512864542942e-07, "loss": 0.0154, "step": 180840 }, { "epoch": 1.932261338746728, "grad_norm": 1.5368196964263916, "learning_rate": 9.79146485246682e-07, "loss": 0.023, "step": 180850 }, { "epoch": 1.9323681820610075, "grad_norm": 0.18658873438835144, "learning_rate": 9.791416834980772e-07, "loss": 0.0243, "step": 180860 }, { "epoch": 1.9324750253752871, "grad_norm": 1.4925692081451416, "learning_rate": 9.791368812084854e-07, "loss": 0.0274, "step": 180870 }, { "epoch": 1.9325818686895668, "grad_norm": 1.7489598989486694, "learning_rate": 9.791320783779118e-07, "loss": 0.0111, "step": 180880 }, { "epoch": 1.9326887120038463, "grad_norm": 0.01903575100004673, "learning_rate": 9.791272750063619e-07, "loss": 0.0742, "step": 180890 }, { "epoch": 1.932795555318126, "grad_norm": 0.6223414540290833, "learning_rate": 9.791224710938413e-07, "loss": 0.014, "step": 180900 }, { "epoch": 1.9329023986324056, "grad_norm": 0.0749155580997467, "learning_rate": 9.791176666403552e-07, "loss": 0.0464, "step": 180910 }, { "epoch": 1.933009241946685, "grad_norm": 6.1417131423950195, "learning_rate": 9.791128616459088e-07, "loss": 0.0388, "step": 180920 }, { "epoch": 1.9331160852609648, "grad_norm": 2.85091233253479, "learning_rate": 9.79108056110508e-07, "loss": 0.0464, "step": 180930 }, { "epoch": 1.9332229285752445, "grad_norm": 0.01938362792134285, "learning_rate": 9.791032500341583e-07, "loss": 0.0107, "step": 180940 }, { "epoch": 1.933329771889524, "grad_norm": 0.015579914674162865, "learning_rate": 9.790984434168646e-07, "loss": 0.0175, "step": 180950 }, { "epoch": 1.9334366152038036, "grad_norm": 0.04951559752225876, "learning_rate": 9.790936362586327e-07, "loss": 0.0344, "step": 180960 }, { "epoch": 1.9335434585180833, "grad_norm": 5.186163902282715, "learning_rate": 9.790888285594678e-07, "loss": 0.0104, "step": 180970 }, { "epoch": 1.9336503018323628, "grad_norm": 0.047432202845811844, "learning_rate": 9.790840203193756e-07, "loss": 0.0159, "step": 180980 }, { "epoch": 1.9337571451466424, "grad_norm": 0.060674138367176056, "learning_rate": 9.790792115383612e-07, "loss": 0.0258, "step": 180990 }, { "epoch": 1.9338639884609221, "grad_norm": 0.013398196548223495, "learning_rate": 9.790744022164304e-07, "loss": 0.0281, "step": 181000 }, { "epoch": 1.9339708317752016, "grad_norm": 0.02290717512369156, "learning_rate": 9.790695923535885e-07, "loss": 0.0334, "step": 181010 }, { "epoch": 1.9340776750894813, "grad_norm": 0.6284552216529846, "learning_rate": 9.790647819498408e-07, "loss": 0.0286, "step": 181020 }, { "epoch": 1.934184518403761, "grad_norm": 0.9871736168861389, "learning_rate": 9.790599710051928e-07, "loss": 0.0282, "step": 181030 }, { "epoch": 1.9342913617180404, "grad_norm": 0.16632451117038727, "learning_rate": 9.790551595196496e-07, "loss": 0.0138, "step": 181040 }, { "epoch": 1.93439820503232, "grad_norm": 3.0523970127105713, "learning_rate": 9.790503474932174e-07, "loss": 0.0438, "step": 181050 }, { "epoch": 1.9345050483465998, "grad_norm": 7.614548683166504, "learning_rate": 9.790455349259011e-07, "loss": 0.025, "step": 181060 }, { "epoch": 1.9346118916608792, "grad_norm": 0.1524972766637802, "learning_rate": 9.790407218177062e-07, "loss": 0.0408, "step": 181070 }, { "epoch": 1.934718734975159, "grad_norm": 1.2821288108825684, "learning_rate": 9.790359081686382e-07, "loss": 0.0357, "step": 181080 }, { "epoch": 1.9348255782894386, "grad_norm": 0.2560141086578369, "learning_rate": 9.790310939787024e-07, "loss": 0.0292, "step": 181090 }, { "epoch": 1.934932421603718, "grad_norm": 2.915517568588257, "learning_rate": 9.790262792479045e-07, "loss": 0.0527, "step": 181100 }, { "epoch": 1.9350392649179977, "grad_norm": 3.5334219932556152, "learning_rate": 9.790214639762496e-07, "loss": 0.0223, "step": 181110 }, { "epoch": 1.9351461082322774, "grad_norm": 0.5606855154037476, "learning_rate": 9.790166481637434e-07, "loss": 0.028, "step": 181120 }, { "epoch": 1.9352529515465569, "grad_norm": 0.011510102078318596, "learning_rate": 9.790118318103913e-07, "loss": 0.0172, "step": 181130 }, { "epoch": 1.9353597948608365, "grad_norm": 1.3977584838867188, "learning_rate": 9.790070149161986e-07, "loss": 0.0103, "step": 181140 }, { "epoch": 1.9354666381751162, "grad_norm": 0.3115995526313782, "learning_rate": 9.79002197481171e-07, "loss": 0.0151, "step": 181150 }, { "epoch": 1.9355734814893957, "grad_norm": 4.315639495849609, "learning_rate": 9.789973795053136e-07, "loss": 0.0545, "step": 181160 }, { "epoch": 1.9356803248036754, "grad_norm": 0.8390123248100281, "learning_rate": 9.78992560988632e-07, "loss": 0.0135, "step": 181170 }, { "epoch": 1.935787168117955, "grad_norm": 7.2908172607421875, "learning_rate": 9.789877419311318e-07, "loss": 0.0295, "step": 181180 }, { "epoch": 1.9358940114322345, "grad_norm": 1.8080106973648071, "learning_rate": 9.78982922332818e-07, "loss": 0.0202, "step": 181190 }, { "epoch": 1.9360008547465144, "grad_norm": 0.16693037748336792, "learning_rate": 9.789781021936968e-07, "loss": 0.0314, "step": 181200 }, { "epoch": 1.9361076980607939, "grad_norm": 0.014553493820130825, "learning_rate": 9.789732815137727e-07, "loss": 0.0339, "step": 181210 }, { "epoch": 1.9362145413750733, "grad_norm": 1.8590294122695923, "learning_rate": 9.789684602930518e-07, "loss": 0.0133, "step": 181220 }, { "epoch": 1.9363213846893532, "grad_norm": 0.11752502620220184, "learning_rate": 9.789636385315393e-07, "loss": 0.0083, "step": 181230 }, { "epoch": 1.9364282280036327, "grad_norm": 0.12387005239725113, "learning_rate": 9.789588162292408e-07, "loss": 0.01, "step": 181240 }, { "epoch": 1.9365350713179121, "grad_norm": 0.007811457850039005, "learning_rate": 9.789539933861616e-07, "loss": 0.0695, "step": 181250 }, { "epoch": 1.936641914632192, "grad_norm": 2.058185338973999, "learning_rate": 9.789491700023074e-07, "loss": 0.0142, "step": 181260 }, { "epoch": 1.9367487579464715, "grad_norm": 0.13067901134490967, "learning_rate": 9.78944346077683e-07, "loss": 0.0149, "step": 181270 }, { "epoch": 1.936855601260751, "grad_norm": 2.052358627319336, "learning_rate": 9.789395216122947e-07, "loss": 0.0111, "step": 181280 }, { "epoch": 1.9369624445750309, "grad_norm": 0.06218121945858002, "learning_rate": 9.789346966061473e-07, "loss": 0.0019, "step": 181290 }, { "epoch": 1.9370692878893103, "grad_norm": 0.15392367541790009, "learning_rate": 9.789298710592467e-07, "loss": 0.012, "step": 181300 }, { "epoch": 1.9371761312035898, "grad_norm": 0.08221632987260818, "learning_rate": 9.789250449715979e-07, "loss": 0.0166, "step": 181310 }, { "epoch": 1.9372829745178697, "grad_norm": 3.8135721683502197, "learning_rate": 9.789202183432067e-07, "loss": 0.0332, "step": 181320 }, { "epoch": 1.9373898178321491, "grad_norm": 3.9332995414733887, "learning_rate": 9.789153911740785e-07, "loss": 0.0209, "step": 181330 }, { "epoch": 1.9374966611464286, "grad_norm": 2.4126360416412354, "learning_rate": 9.789105634642185e-07, "loss": 0.0298, "step": 181340 }, { "epoch": 1.9376035044607085, "grad_norm": 2.9586241245269775, "learning_rate": 9.789057352136323e-07, "loss": 0.0198, "step": 181350 }, { "epoch": 1.937710347774988, "grad_norm": 2.6305317878723145, "learning_rate": 9.789009064223255e-07, "loss": 0.0311, "step": 181360 }, { "epoch": 1.9378171910892676, "grad_norm": 1.6179101467132568, "learning_rate": 9.788960770903034e-07, "loss": 0.0489, "step": 181370 }, { "epoch": 1.9379240344035473, "grad_norm": 0.028900863602757454, "learning_rate": 9.788912472175714e-07, "loss": 0.0904, "step": 181380 }, { "epoch": 1.9380308777178268, "grad_norm": 0.31062281131744385, "learning_rate": 9.78886416804135e-07, "loss": 0.0275, "step": 181390 }, { "epoch": 1.9381377210321065, "grad_norm": 4.220746994018555, "learning_rate": 9.7888158585e-07, "loss": 0.0189, "step": 181400 }, { "epoch": 1.9382445643463861, "grad_norm": 0.02792288362979889, "learning_rate": 9.788767543551713e-07, "loss": 0.0122, "step": 181410 }, { "epoch": 1.9383514076606656, "grad_norm": 0.2341560423374176, "learning_rate": 9.788719223196546e-07, "loss": 0.0184, "step": 181420 }, { "epoch": 1.9384582509749453, "grad_norm": 0.7952123880386353, "learning_rate": 9.788670897434556e-07, "loss": 0.0268, "step": 181430 }, { "epoch": 1.938565094289225, "grad_norm": 1.0944420099258423, "learning_rate": 9.788622566265792e-07, "loss": 0.0285, "step": 181440 }, { "epoch": 1.9386719376035044, "grad_norm": 0.13860002160072327, "learning_rate": 9.788574229690313e-07, "loss": 0.0331, "step": 181450 }, { "epoch": 1.938778780917784, "grad_norm": 0.013530545867979527, "learning_rate": 9.788525887708174e-07, "loss": 0.0039, "step": 181460 }, { "epoch": 1.9388856242320638, "grad_norm": 4.119568347930908, "learning_rate": 9.788477540319425e-07, "loss": 0.0529, "step": 181470 }, { "epoch": 1.9389924675463432, "grad_norm": 6.819695472717285, "learning_rate": 9.788429187524125e-07, "loss": 0.0158, "step": 181480 }, { "epoch": 1.939099310860623, "grad_norm": 0.05285494029521942, "learning_rate": 9.788380829322327e-07, "loss": 0.0332, "step": 181490 }, { "epoch": 1.9392061541749026, "grad_norm": 1.4299652576446533, "learning_rate": 9.788332465714086e-07, "loss": 0.0459, "step": 181500 }, { "epoch": 1.939312997489182, "grad_norm": 6.579323768615723, "learning_rate": 9.788284096699456e-07, "loss": 0.0259, "step": 181510 }, { "epoch": 1.9394198408034617, "grad_norm": 9.364502906799316, "learning_rate": 9.788235722278491e-07, "loss": 0.0634, "step": 181520 }, { "epoch": 1.9395266841177414, "grad_norm": 0.2047688215970993, "learning_rate": 9.788187342451246e-07, "loss": 0.0431, "step": 181530 }, { "epoch": 1.9396335274320209, "grad_norm": 0.07701435685157776, "learning_rate": 9.788138957217777e-07, "loss": 0.0289, "step": 181540 }, { "epoch": 1.9397403707463006, "grad_norm": 0.013184954412281513, "learning_rate": 9.788090566578137e-07, "loss": 0.0229, "step": 181550 }, { "epoch": 1.9398472140605802, "grad_norm": 4.28650426864624, "learning_rate": 9.788042170532384e-07, "loss": 0.0396, "step": 181560 }, { "epoch": 1.9399540573748597, "grad_norm": 0.13835245370864868, "learning_rate": 9.787993769080567e-07, "loss": 0.0315, "step": 181570 }, { "epoch": 1.9400609006891394, "grad_norm": 7.7552409172058105, "learning_rate": 9.787945362222745e-07, "loss": 0.024, "step": 181580 }, { "epoch": 1.940167744003419, "grad_norm": 0.022313140332698822, "learning_rate": 9.787896949958972e-07, "loss": 0.0304, "step": 181590 }, { "epoch": 1.9402745873176985, "grad_norm": 1.5028231143951416, "learning_rate": 9.787848532289302e-07, "loss": 0.0346, "step": 181600 }, { "epoch": 1.9403814306319782, "grad_norm": 0.6610894203186035, "learning_rate": 9.78780010921379e-07, "loss": 0.0586, "step": 181610 }, { "epoch": 1.940488273946258, "grad_norm": 7.587441444396973, "learning_rate": 9.787751680732488e-07, "loss": 0.0419, "step": 181620 }, { "epoch": 1.9405951172605374, "grad_norm": 5.561437129974365, "learning_rate": 9.787703246845453e-07, "loss": 0.0505, "step": 181630 }, { "epoch": 1.940701960574817, "grad_norm": 0.20784690976142883, "learning_rate": 9.78765480755274e-07, "loss": 0.0275, "step": 181640 }, { "epoch": 1.9408088038890967, "grad_norm": 1.3172311782836914, "learning_rate": 9.787606362854406e-07, "loss": 0.0428, "step": 181650 }, { "epoch": 1.9409156472033762, "grad_norm": 0.5824311375617981, "learning_rate": 9.787557912750501e-07, "loss": 0.0246, "step": 181660 }, { "epoch": 1.9410224905176559, "grad_norm": 12.137600898742676, "learning_rate": 9.787509457241082e-07, "loss": 0.1717, "step": 181670 }, { "epoch": 1.9411293338319355, "grad_norm": 4.5708842277526855, "learning_rate": 9.787460996326204e-07, "loss": 0.0597, "step": 181680 }, { "epoch": 1.941236177146215, "grad_norm": 5.117571830749512, "learning_rate": 9.787412530005922e-07, "loss": 0.0246, "step": 181690 }, { "epoch": 1.9413430204604947, "grad_norm": 0.03440232574939728, "learning_rate": 9.787364058280288e-07, "loss": 0.03, "step": 181700 }, { "epoch": 1.9414498637747744, "grad_norm": 1.7168298959732056, "learning_rate": 9.787315581149358e-07, "loss": 0.0621, "step": 181710 }, { "epoch": 1.9415567070890538, "grad_norm": 3.1227166652679443, "learning_rate": 9.78726709861319e-07, "loss": 0.0613, "step": 181720 }, { "epoch": 1.9416635504033335, "grad_norm": 3.370293617248535, "learning_rate": 9.787218610671835e-07, "loss": 0.0272, "step": 181730 }, { "epoch": 1.9417703937176132, "grad_norm": 0.0074178921058773994, "learning_rate": 9.787170117325349e-07, "loss": 0.018, "step": 181740 }, { "epoch": 1.9418772370318926, "grad_norm": 0.757361888885498, "learning_rate": 9.787121618573786e-07, "loss": 0.0631, "step": 181750 }, { "epoch": 1.9419840803461723, "grad_norm": 4.018048286437988, "learning_rate": 9.7870731144172e-07, "loss": 0.0471, "step": 181760 }, { "epoch": 1.942090923660452, "grad_norm": 3.153831958770752, "learning_rate": 9.78702460485565e-07, "loss": 0.048, "step": 181770 }, { "epoch": 1.9421977669747315, "grad_norm": 7.7894392013549805, "learning_rate": 9.786976089889186e-07, "loss": 0.0221, "step": 181780 }, { "epoch": 1.9423046102890111, "grad_norm": 10.289647102355957, "learning_rate": 9.786927569517866e-07, "loss": 0.0419, "step": 181790 }, { "epoch": 1.9424114536032908, "grad_norm": 4.706515789031982, "learning_rate": 9.786879043741745e-07, "loss": 0.0214, "step": 181800 }, { "epoch": 1.9425182969175703, "grad_norm": 3.5804238319396973, "learning_rate": 9.786830512560874e-07, "loss": 0.0171, "step": 181810 }, { "epoch": 1.94262514023185, "grad_norm": 0.12391587346792221, "learning_rate": 9.786781975975312e-07, "loss": 0.0597, "step": 181820 }, { "epoch": 1.9427319835461296, "grad_norm": 0.904918909072876, "learning_rate": 9.786733433985108e-07, "loss": 0.0164, "step": 181830 }, { "epoch": 1.942838826860409, "grad_norm": 3.431889533996582, "learning_rate": 9.786684886590324e-07, "loss": 0.013, "step": 181840 }, { "epoch": 1.9429456701746888, "grad_norm": 0.708551824092865, "learning_rate": 9.78663633379101e-07, "loss": 0.0444, "step": 181850 }, { "epoch": 1.9430525134889685, "grad_norm": 3.4031667709350586, "learning_rate": 9.786587775587223e-07, "loss": 0.0341, "step": 181860 }, { "epoch": 1.943159356803248, "grad_norm": 3.0835728645324707, "learning_rate": 9.786539211979017e-07, "loss": 0.0313, "step": 181870 }, { "epoch": 1.9432662001175276, "grad_norm": 2.1847832202911377, "learning_rate": 9.786490642966446e-07, "loss": 0.0155, "step": 181880 }, { "epoch": 1.9433730434318073, "grad_norm": 3.8580100536346436, "learning_rate": 9.786442068549568e-07, "loss": 0.0675, "step": 181890 }, { "epoch": 1.9434798867460867, "grad_norm": 0.07531657069921494, "learning_rate": 9.786393488728436e-07, "loss": 0.0081, "step": 181900 }, { "epoch": 1.9435867300603664, "grad_norm": 0.2736961543560028, "learning_rate": 9.786344903503102e-07, "loss": 0.0065, "step": 181910 }, { "epoch": 1.943693573374646, "grad_norm": 5.53554630279541, "learning_rate": 9.786296312873623e-07, "loss": 0.0446, "step": 181920 }, { "epoch": 1.9438004166889256, "grad_norm": 6.379689693450928, "learning_rate": 9.786247716840056e-07, "loss": 0.0504, "step": 181930 }, { "epoch": 1.9439072600032055, "grad_norm": 0.006978116929531097, "learning_rate": 9.786199115402454e-07, "loss": 0.002, "step": 181940 }, { "epoch": 1.944014103317485, "grad_norm": 0.013869463466107845, "learning_rate": 9.78615050856087e-07, "loss": 0.0415, "step": 181950 }, { "epoch": 1.9441209466317644, "grad_norm": 0.039603933691978455, "learning_rate": 9.786101896315364e-07, "loss": 0.0355, "step": 181960 }, { "epoch": 1.9442277899460443, "grad_norm": 15.01176929473877, "learning_rate": 9.786053278665988e-07, "loss": 0.0478, "step": 181970 }, { "epoch": 1.9443346332603237, "grad_norm": 1.8527514934539795, "learning_rate": 9.786004655612793e-07, "loss": 0.0111, "step": 181980 }, { "epoch": 1.9444414765746032, "grad_norm": 5.2635273933410645, "learning_rate": 9.78595602715584e-07, "loss": 0.0322, "step": 181990 }, { "epoch": 1.944548319888883, "grad_norm": 3.048733711242676, "learning_rate": 9.78590739329518e-07, "loss": 0.0409, "step": 182000 }, { "epoch": 1.9446551632031626, "grad_norm": 0.19115686416625977, "learning_rate": 9.78585875403087e-07, "loss": 0.0346, "step": 182010 }, { "epoch": 1.944762006517442, "grad_norm": 0.6044521331787109, "learning_rate": 9.785810109362964e-07, "loss": 0.0052, "step": 182020 }, { "epoch": 1.944868849831722, "grad_norm": 0.00906977429986, "learning_rate": 9.785761459291517e-07, "loss": 0.0014, "step": 182030 }, { "epoch": 1.9449756931460014, "grad_norm": 3.2854764461517334, "learning_rate": 9.785712803816587e-07, "loss": 0.0291, "step": 182040 }, { "epoch": 1.9450825364602808, "grad_norm": 0.10120216757059097, "learning_rate": 9.785664142938221e-07, "loss": 0.0253, "step": 182050 }, { "epoch": 1.9451893797745607, "grad_norm": 6.639087200164795, "learning_rate": 9.785615476656485e-07, "loss": 0.0488, "step": 182060 }, { "epoch": 1.9452962230888402, "grad_norm": 0.008592353202402592, "learning_rate": 9.785566804971423e-07, "loss": 0.0382, "step": 182070 }, { "epoch": 1.9454030664031197, "grad_norm": 3.988820791244507, "learning_rate": 9.785518127883096e-07, "loss": 0.0418, "step": 182080 }, { "epoch": 1.9455099097173996, "grad_norm": 6.601411819458008, "learning_rate": 9.785469445391558e-07, "loss": 0.0144, "step": 182090 }, { "epoch": 1.945616753031679, "grad_norm": 0.285704106092453, "learning_rate": 9.785420757496865e-07, "loss": 0.0074, "step": 182100 }, { "epoch": 1.9457235963459587, "grad_norm": 1.527991533279419, "learning_rate": 9.78537206419907e-07, "loss": 0.0182, "step": 182110 }, { "epoch": 1.9458304396602384, "grad_norm": 3.9372737407684326, "learning_rate": 9.785323365498229e-07, "loss": 0.03, "step": 182120 }, { "epoch": 1.9459372829745178, "grad_norm": 5.451982021331787, "learning_rate": 9.785274661394396e-07, "loss": 0.0399, "step": 182130 }, { "epoch": 1.9460441262887975, "grad_norm": 5.620050430297852, "learning_rate": 9.785225951887627e-07, "loss": 0.0281, "step": 182140 }, { "epoch": 1.9461509696030772, "grad_norm": 0.07980910688638687, "learning_rate": 9.785177236977977e-07, "loss": 0.0317, "step": 182150 }, { "epoch": 1.9462578129173567, "grad_norm": 0.5298581123352051, "learning_rate": 9.7851285166655e-07, "loss": 0.0178, "step": 182160 }, { "epoch": 1.9463646562316363, "grad_norm": 0.04706253856420517, "learning_rate": 9.785079790950255e-07, "loss": 0.0321, "step": 182170 }, { "epoch": 1.946471499545916, "grad_norm": 1.429337978363037, "learning_rate": 9.78503105983229e-07, "loss": 0.0431, "step": 182180 }, { "epoch": 1.9465783428601955, "grad_norm": 0.20966245234012604, "learning_rate": 9.784982323311666e-07, "loss": 0.0287, "step": 182190 }, { "epoch": 1.9466851861744752, "grad_norm": 2.5260894298553467, "learning_rate": 9.784933581388434e-07, "loss": 0.0299, "step": 182200 }, { "epoch": 1.9467920294887548, "grad_norm": 3.337010622024536, "learning_rate": 9.784884834062652e-07, "loss": 0.0787, "step": 182210 }, { "epoch": 1.9468988728030343, "grad_norm": 0.04540617763996124, "learning_rate": 9.784836081334374e-07, "loss": 0.0169, "step": 182220 }, { "epoch": 1.947005716117314, "grad_norm": 2.3069920539855957, "learning_rate": 9.784787323203654e-07, "loss": 0.0091, "step": 182230 }, { "epoch": 1.9471125594315937, "grad_norm": 2.8266677856445312, "learning_rate": 9.78473855967055e-07, "loss": 0.0256, "step": 182240 }, { "epoch": 1.9472194027458731, "grad_norm": 2.9941298961639404, "learning_rate": 9.784689790735113e-07, "loss": 0.0157, "step": 182250 }, { "epoch": 1.9473262460601528, "grad_norm": 1.0010885000228882, "learning_rate": 9.7846410163974e-07, "loss": 0.0186, "step": 182260 }, { "epoch": 1.9474330893744325, "grad_norm": 1.0282505750656128, "learning_rate": 9.784592236657467e-07, "loss": 0.0132, "step": 182270 }, { "epoch": 1.947539932688712, "grad_norm": 0.9885755181312561, "learning_rate": 9.784543451515369e-07, "loss": 0.0131, "step": 182280 }, { "epoch": 1.9476467760029916, "grad_norm": 0.03333611041307449, "learning_rate": 9.784494660971158e-07, "loss": 0.0098, "step": 182290 }, { "epoch": 1.9477536193172713, "grad_norm": 0.4820663630962372, "learning_rate": 9.784445865024894e-07, "loss": 0.0076, "step": 182300 }, { "epoch": 1.9478604626315508, "grad_norm": 6.672426223754883, "learning_rate": 9.784397063676628e-07, "loss": 0.0237, "step": 182310 }, { "epoch": 1.9479673059458305, "grad_norm": 12.609639167785645, "learning_rate": 9.784348256926418e-07, "loss": 0.0592, "step": 182320 }, { "epoch": 1.9480741492601101, "grad_norm": 7.263059139251709, "learning_rate": 9.784299444774316e-07, "loss": 0.0523, "step": 182330 }, { "epoch": 1.9481809925743896, "grad_norm": 0.13379426300525665, "learning_rate": 9.784250627220379e-07, "loss": 0.0204, "step": 182340 }, { "epoch": 1.9482878358886693, "grad_norm": 0.09984780848026276, "learning_rate": 9.784201804264662e-07, "loss": 0.0288, "step": 182350 }, { "epoch": 1.948394679202949, "grad_norm": 0.0941508561372757, "learning_rate": 9.784152975907222e-07, "loss": 0.0277, "step": 182360 }, { "epoch": 1.9485015225172284, "grad_norm": 4.619146823883057, "learning_rate": 9.78410414214811e-07, "loss": 0.0111, "step": 182370 }, { "epoch": 1.948608365831508, "grad_norm": 0.35716211795806885, "learning_rate": 9.784055302987385e-07, "loss": 0.0397, "step": 182380 }, { "epoch": 1.9487152091457878, "grad_norm": 0.04251391068100929, "learning_rate": 9.7840064584251e-07, "loss": 0.0396, "step": 182390 }, { "epoch": 1.9488220524600672, "grad_norm": 0.12847748398780823, "learning_rate": 9.783957608461312e-07, "loss": 0.0102, "step": 182400 }, { "epoch": 1.948928895774347, "grad_norm": 5.315543174743652, "learning_rate": 9.783908753096071e-07, "loss": 0.0219, "step": 182410 }, { "epoch": 1.9490357390886266, "grad_norm": 0.07171373814344406, "learning_rate": 9.783859892329437e-07, "loss": 0.065, "step": 182420 }, { "epoch": 1.949142582402906, "grad_norm": 9.079568862915039, "learning_rate": 9.783811026161467e-07, "loss": 0.0107, "step": 182430 }, { "epoch": 1.9492494257171857, "grad_norm": 1.265060544013977, "learning_rate": 9.78376215459221e-07, "loss": 0.0209, "step": 182440 }, { "epoch": 1.9493562690314654, "grad_norm": 0.24195370078086853, "learning_rate": 9.783713277621728e-07, "loss": 0.0052, "step": 182450 }, { "epoch": 1.9494631123457449, "grad_norm": 0.0051080225966870785, "learning_rate": 9.78366439525007e-07, "loss": 0.0033, "step": 182460 }, { "epoch": 1.9495699556600246, "grad_norm": 2.5636935234069824, "learning_rate": 9.783615507477294e-07, "loss": 0.0391, "step": 182470 }, { "epoch": 1.9496767989743042, "grad_norm": 0.09857511520385742, "learning_rate": 9.783566614303455e-07, "loss": 0.0254, "step": 182480 }, { "epoch": 1.9497836422885837, "grad_norm": 5.840692043304443, "learning_rate": 9.783517715728612e-07, "loss": 0.0388, "step": 182490 }, { "epoch": 1.9498904856028634, "grad_norm": 0.05652835965156555, "learning_rate": 9.783468811752811e-07, "loss": 0.0474, "step": 182500 }, { "epoch": 1.949997328917143, "grad_norm": 0.3848605453968048, "learning_rate": 9.783419902376115e-07, "loss": 0.0131, "step": 182510 }, { "epoch": 1.9501041722314225, "grad_norm": 1.2266948223114014, "learning_rate": 9.783370987598577e-07, "loss": 0.0237, "step": 182520 }, { "epoch": 1.9502110155457022, "grad_norm": 0.15150673687458038, "learning_rate": 9.783322067420252e-07, "loss": 0.0188, "step": 182530 }, { "epoch": 1.9503178588599819, "grad_norm": 0.05765366554260254, "learning_rate": 9.783273141841195e-07, "loss": 0.0083, "step": 182540 }, { "epoch": 1.9504247021742613, "grad_norm": 0.8957608938217163, "learning_rate": 9.783224210861464e-07, "loss": 0.0268, "step": 182550 }, { "epoch": 1.950531545488541, "grad_norm": 4.791810989379883, "learning_rate": 9.783175274481109e-07, "loss": 0.0661, "step": 182560 }, { "epoch": 1.9506383888028207, "grad_norm": 2.220837354660034, "learning_rate": 9.783126332700189e-07, "loss": 0.0382, "step": 182570 }, { "epoch": 1.9507452321171002, "grad_norm": 2.887737512588501, "learning_rate": 9.783077385518758e-07, "loss": 0.0117, "step": 182580 }, { "epoch": 1.9508520754313798, "grad_norm": 0.12260593473911285, "learning_rate": 9.783028432936873e-07, "loss": 0.0652, "step": 182590 }, { "epoch": 1.9509589187456595, "grad_norm": 0.467483252286911, "learning_rate": 9.782979474954587e-07, "loss": 0.0193, "step": 182600 }, { "epoch": 1.951065762059939, "grad_norm": 1.1045650243759155, "learning_rate": 9.782930511571957e-07, "loss": 0.0365, "step": 182610 }, { "epoch": 1.9511726053742187, "grad_norm": 1.0725029706954956, "learning_rate": 9.782881542789036e-07, "loss": 0.03, "step": 182620 }, { "epoch": 1.9512794486884983, "grad_norm": 0.7093938589096069, "learning_rate": 9.78283256860588e-07, "loss": 0.0097, "step": 182630 }, { "epoch": 1.9513862920027778, "grad_norm": 6.860363006591797, "learning_rate": 9.782783589022547e-07, "loss": 0.0268, "step": 182640 }, { "epoch": 1.9514931353170575, "grad_norm": 25.030961990356445, "learning_rate": 9.782734604039088e-07, "loss": 0.0095, "step": 182650 }, { "epoch": 1.9515999786313372, "grad_norm": 7.301031112670898, "learning_rate": 9.782685613655563e-07, "loss": 0.0299, "step": 182660 }, { "epoch": 1.9517068219456166, "grad_norm": 6.653041362762451, "learning_rate": 9.782636617872026e-07, "loss": 0.0302, "step": 182670 }, { "epoch": 1.9518136652598965, "grad_norm": 1.473650336265564, "learning_rate": 9.782587616688528e-07, "loss": 0.0262, "step": 182680 }, { "epoch": 1.951920508574176, "grad_norm": 7.894556999206543, "learning_rate": 9.782538610105132e-07, "loss": 0.0238, "step": 182690 }, { "epoch": 1.9520273518884554, "grad_norm": 0.0688566118478775, "learning_rate": 9.782489598121886e-07, "loss": 0.0101, "step": 182700 }, { "epoch": 1.9521341952027353, "grad_norm": 2.1348705291748047, "learning_rate": 9.782440580738847e-07, "loss": 0.0416, "step": 182710 }, { "epoch": 1.9522410385170148, "grad_norm": 0.27800843119621277, "learning_rate": 9.782391557956073e-07, "loss": 0.0416, "step": 182720 }, { "epoch": 1.9523478818312943, "grad_norm": 0.09773819148540497, "learning_rate": 9.78234252977362e-07, "loss": 0.0081, "step": 182730 }, { "epoch": 1.9524547251455742, "grad_norm": 1.1946754455566406, "learning_rate": 9.782293496191538e-07, "loss": 0.0279, "step": 182740 }, { "epoch": 1.9525615684598536, "grad_norm": 0.032216865569353104, "learning_rate": 9.782244457209888e-07, "loss": 0.0256, "step": 182750 }, { "epoch": 1.952668411774133, "grad_norm": 0.15022602677345276, "learning_rate": 9.782195412828722e-07, "loss": 0.028, "step": 182760 }, { "epoch": 1.952775255088413, "grad_norm": 0.16856655478477478, "learning_rate": 9.782146363048096e-07, "loss": 0.021, "step": 182770 }, { "epoch": 1.9528820984026924, "grad_norm": 0.7727605104446411, "learning_rate": 9.782097307868067e-07, "loss": 0.0081, "step": 182780 }, { "epoch": 1.952988941716972, "grad_norm": 5.558460712432861, "learning_rate": 9.78204824728869e-07, "loss": 0.0149, "step": 182790 }, { "epoch": 1.9530957850312518, "grad_norm": 4.111578464508057, "learning_rate": 9.781999181310018e-07, "loss": 0.0238, "step": 182800 }, { "epoch": 1.9532026283455313, "grad_norm": 3.2773044109344482, "learning_rate": 9.781950109932108e-07, "loss": 0.0568, "step": 182810 }, { "epoch": 1.9533094716598107, "grad_norm": 0.0285065658390522, "learning_rate": 9.781901033155016e-07, "loss": 0.0191, "step": 182820 }, { "epoch": 1.9534163149740906, "grad_norm": 3.6922223567962646, "learning_rate": 9.781851950978797e-07, "loss": 0.0307, "step": 182830 }, { "epoch": 1.95352315828837, "grad_norm": 1.4375847578048706, "learning_rate": 9.781802863403506e-07, "loss": 0.015, "step": 182840 }, { "epoch": 1.9536300016026498, "grad_norm": 9.231898307800293, "learning_rate": 9.7817537704292e-07, "loss": 0.0165, "step": 182850 }, { "epoch": 1.9537368449169294, "grad_norm": 0.38379061222076416, "learning_rate": 9.78170467205593e-07, "loss": 0.0136, "step": 182860 }, { "epoch": 1.953843688231209, "grad_norm": 0.07753756642341614, "learning_rate": 9.781655568283756e-07, "loss": 0.0292, "step": 182870 }, { "epoch": 1.9539505315454886, "grad_norm": 3.4373044967651367, "learning_rate": 9.781606459112733e-07, "loss": 0.0124, "step": 182880 }, { "epoch": 1.9540573748597683, "grad_norm": 20.43303108215332, "learning_rate": 9.781557344542915e-07, "loss": 0.0442, "step": 182890 }, { "epoch": 1.9541642181740477, "grad_norm": 1.2200839519500732, "learning_rate": 9.781508224574358e-07, "loss": 0.0366, "step": 182900 }, { "epoch": 1.9542710614883274, "grad_norm": 2.719905376434326, "learning_rate": 9.781459099207115e-07, "loss": 0.0129, "step": 182910 }, { "epoch": 1.954377904802607, "grad_norm": 1.4604626893997192, "learning_rate": 9.781409968441247e-07, "loss": 0.0324, "step": 182920 }, { "epoch": 1.9544847481168865, "grad_norm": 3.8924763202667236, "learning_rate": 9.781360832276805e-07, "loss": 0.0176, "step": 182930 }, { "epoch": 1.9545915914311662, "grad_norm": 0.27341222763061523, "learning_rate": 9.781311690713844e-07, "loss": 0.0029, "step": 182940 }, { "epoch": 1.954698434745446, "grad_norm": 2.450075387954712, "learning_rate": 9.781262543752425e-07, "loss": 0.0223, "step": 182950 }, { "epoch": 1.9548052780597254, "grad_norm": 0.0038516083732247353, "learning_rate": 9.781213391392596e-07, "loss": 0.0244, "step": 182960 }, { "epoch": 1.954912121374005, "grad_norm": 1.7589702606201172, "learning_rate": 9.78116423363442e-07, "loss": 0.0278, "step": 182970 }, { "epoch": 1.9550189646882847, "grad_norm": 2.5820508003234863, "learning_rate": 9.781115070477946e-07, "loss": 0.0149, "step": 182980 }, { "epoch": 1.9551258080025642, "grad_norm": 7.026737689971924, "learning_rate": 9.781065901923233e-07, "loss": 0.059, "step": 182990 }, { "epoch": 1.9552326513168439, "grad_norm": 0.48146456480026245, "learning_rate": 9.781016727970336e-07, "loss": 0.0383, "step": 183000 }, { "epoch": 1.9553394946311236, "grad_norm": 1.999314546585083, "learning_rate": 9.780967548619309e-07, "loss": 0.0346, "step": 183010 }, { "epoch": 1.955446337945403, "grad_norm": 2.8721306324005127, "learning_rate": 9.78091836387021e-07, "loss": 0.0104, "step": 183020 }, { "epoch": 1.9555531812596827, "grad_norm": 5.881402969360352, "learning_rate": 9.780869173723092e-07, "loss": 0.0272, "step": 183030 }, { "epoch": 1.9556600245739624, "grad_norm": 3.930760383605957, "learning_rate": 9.780819978178012e-07, "loss": 0.045, "step": 183040 }, { "epoch": 1.9557668678882418, "grad_norm": 2.4640378952026367, "learning_rate": 9.780770777235027e-07, "loss": 0.0172, "step": 183050 }, { "epoch": 1.9558737112025215, "grad_norm": 0.010455131530761719, "learning_rate": 9.78072157089419e-07, "loss": 0.0258, "step": 183060 }, { "epoch": 1.9559805545168012, "grad_norm": 6.236278057098389, "learning_rate": 9.780672359155558e-07, "loss": 0.0434, "step": 183070 }, { "epoch": 1.9560873978310807, "grad_norm": 0.014256356284022331, "learning_rate": 9.780623142019186e-07, "loss": 0.0457, "step": 183080 }, { "epoch": 1.9561942411453603, "grad_norm": 0.4905718266963959, "learning_rate": 9.78057391948513e-07, "loss": 0.0055, "step": 183090 }, { "epoch": 1.95630108445964, "grad_norm": 0.928746223449707, "learning_rate": 9.780524691553444e-07, "loss": 0.0098, "step": 183100 }, { "epoch": 1.9564079277739195, "grad_norm": 2.7394626140594482, "learning_rate": 9.780475458224186e-07, "loss": 0.0791, "step": 183110 }, { "epoch": 1.9565147710881992, "grad_norm": 0.27408212423324585, "learning_rate": 9.780426219497411e-07, "loss": 0.0197, "step": 183120 }, { "epoch": 1.9566216144024788, "grad_norm": 0.06527665257453918, "learning_rate": 9.780376975373173e-07, "loss": 0.0237, "step": 183130 }, { "epoch": 1.9567284577167583, "grad_norm": 7.158664703369141, "learning_rate": 9.780327725851529e-07, "loss": 0.0342, "step": 183140 }, { "epoch": 1.956835301031038, "grad_norm": 3.8658199310302734, "learning_rate": 9.780278470932534e-07, "loss": 0.0268, "step": 183150 }, { "epoch": 1.9569421443453177, "grad_norm": 0.3567892909049988, "learning_rate": 9.780229210616242e-07, "loss": 0.0633, "step": 183160 }, { "epoch": 1.9570489876595971, "grad_norm": 0.0338921993970871, "learning_rate": 9.780179944902714e-07, "loss": 0.0168, "step": 183170 }, { "epoch": 1.9571558309738768, "grad_norm": 10.113848686218262, "learning_rate": 9.780130673792e-07, "loss": 0.0061, "step": 183180 }, { "epoch": 1.9572626742881565, "grad_norm": 5.745436191558838, "learning_rate": 9.780081397284157e-07, "loss": 0.0546, "step": 183190 }, { "epoch": 1.957369517602436, "grad_norm": 8.515291213989258, "learning_rate": 9.780032115379243e-07, "loss": 0.0581, "step": 183200 }, { "epoch": 1.9574763609167156, "grad_norm": 2.764681100845337, "learning_rate": 9.779982828077312e-07, "loss": 0.0503, "step": 183210 }, { "epoch": 1.9575832042309953, "grad_norm": 0.6831129789352417, "learning_rate": 9.779933535378417e-07, "loss": 0.0395, "step": 183220 }, { "epoch": 1.9576900475452748, "grad_norm": 0.06577148288488388, "learning_rate": 9.779884237282618e-07, "loss": 0.0321, "step": 183230 }, { "epoch": 1.9577968908595544, "grad_norm": 4.239411354064941, "learning_rate": 9.77983493378997e-07, "loss": 0.083, "step": 183240 }, { "epoch": 1.9579037341738341, "grad_norm": 0.12896910309791565, "learning_rate": 9.779785624900526e-07, "loss": 0.0419, "step": 183250 }, { "epoch": 1.9580105774881136, "grad_norm": 1.7705802917480469, "learning_rate": 9.779736310614344e-07, "loss": 0.0297, "step": 183260 }, { "epoch": 1.9581174208023933, "grad_norm": 4.700441837310791, "learning_rate": 9.779686990931478e-07, "loss": 0.0308, "step": 183270 }, { "epoch": 1.958224264116673, "grad_norm": 6.63718843460083, "learning_rate": 9.779637665851986e-07, "loss": 0.0315, "step": 183280 }, { "epoch": 1.9583311074309524, "grad_norm": 5.587235450744629, "learning_rate": 9.779588335375922e-07, "loss": 0.0226, "step": 183290 }, { "epoch": 1.958437950745232, "grad_norm": 0.09374252706766129, "learning_rate": 9.779538999503343e-07, "loss": 0.0376, "step": 183300 }, { "epoch": 1.9585447940595118, "grad_norm": 9.93613052368164, "learning_rate": 9.779489658234302e-07, "loss": 0.0652, "step": 183310 }, { "epoch": 1.9586516373737912, "grad_norm": 0.9761397242546082, "learning_rate": 9.779440311568857e-07, "loss": 0.0221, "step": 183320 }, { "epoch": 1.958758480688071, "grad_norm": 4.988736152648926, "learning_rate": 9.779390959507064e-07, "loss": 0.0368, "step": 183330 }, { "epoch": 1.9588653240023506, "grad_norm": 18.072607040405273, "learning_rate": 9.779341602048975e-07, "loss": 0.0528, "step": 183340 }, { "epoch": 1.95897216731663, "grad_norm": 0.1630956530570984, "learning_rate": 9.779292239194653e-07, "loss": 0.0674, "step": 183350 }, { "epoch": 1.9590790106309097, "grad_norm": 1.7195929288864136, "learning_rate": 9.779242870944146e-07, "loss": 0.0221, "step": 183360 }, { "epoch": 1.9591858539451894, "grad_norm": 0.24341511726379395, "learning_rate": 9.779193497297515e-07, "loss": 0.04, "step": 183370 }, { "epoch": 1.9592926972594689, "grad_norm": 14.661134719848633, "learning_rate": 9.779144118254813e-07, "loss": 0.0873, "step": 183380 }, { "epoch": 1.9593995405737485, "grad_norm": 1.9126920700073242, "learning_rate": 9.779094733816095e-07, "loss": 0.0434, "step": 183390 }, { "epoch": 1.9595063838880282, "grad_norm": 9.153363227844238, "learning_rate": 9.77904534398142e-07, "loss": 0.0698, "step": 183400 }, { "epoch": 1.9596132272023077, "grad_norm": 0.2211712747812271, "learning_rate": 9.778995948750843e-07, "loss": 0.027, "step": 183410 }, { "epoch": 1.9597200705165876, "grad_norm": 7.322558879852295, "learning_rate": 9.778946548124419e-07, "loss": 0.0303, "step": 183420 }, { "epoch": 1.959826913830867, "grad_norm": 9.877617835998535, "learning_rate": 9.778897142102202e-07, "loss": 0.0167, "step": 183430 }, { "epoch": 1.9599337571451465, "grad_norm": 6.241664886474609, "learning_rate": 9.77884773068425e-07, "loss": 0.0447, "step": 183440 }, { "epoch": 1.9600406004594264, "grad_norm": 0.21556031703948975, "learning_rate": 9.778798313870619e-07, "loss": 0.0107, "step": 183450 }, { "epoch": 1.9601474437737059, "grad_norm": 10.519680976867676, "learning_rate": 9.778748891661365e-07, "loss": 0.018, "step": 183460 }, { "epoch": 1.9602542870879853, "grad_norm": 6.080667495727539, "learning_rate": 9.778699464056538e-07, "loss": 0.0407, "step": 183470 }, { "epoch": 1.9603611304022652, "grad_norm": 0.5795713067054749, "learning_rate": 9.778650031056203e-07, "loss": 0.0117, "step": 183480 }, { "epoch": 1.9604679737165447, "grad_norm": 4.7708330154418945, "learning_rate": 9.77860059266041e-07, "loss": 0.0303, "step": 183490 }, { "epoch": 1.9605748170308241, "grad_norm": 2.401763916015625, "learning_rate": 9.778551148869216e-07, "loss": 0.0079, "step": 183500 }, { "epoch": 1.960681660345104, "grad_norm": 7.119528770446777, "learning_rate": 9.778501699682679e-07, "loss": 0.0248, "step": 183510 }, { "epoch": 1.9607885036593835, "grad_norm": 1.7304168939590454, "learning_rate": 9.778452245100853e-07, "loss": 0.0183, "step": 183520 }, { "epoch": 1.960895346973663, "grad_norm": 1.856920838356018, "learning_rate": 9.77840278512379e-07, "loss": 0.0301, "step": 183530 }, { "epoch": 1.9610021902879429, "grad_norm": 5.97990083694458, "learning_rate": 9.778353319751553e-07, "loss": 0.0268, "step": 183540 }, { "epoch": 1.9611090336022223, "grad_norm": 0.010485771112143993, "learning_rate": 9.778303848984193e-07, "loss": 0.0211, "step": 183550 }, { "epoch": 1.9612158769165018, "grad_norm": 0.5791366696357727, "learning_rate": 9.778254372821768e-07, "loss": 0.0223, "step": 183560 }, { "epoch": 1.9613227202307817, "grad_norm": 8.606202125549316, "learning_rate": 9.778204891264333e-07, "loss": 0.0532, "step": 183570 }, { "epoch": 1.9614295635450611, "grad_norm": 1.5887449979782104, "learning_rate": 9.778155404311944e-07, "loss": 0.0373, "step": 183580 }, { "epoch": 1.9615364068593408, "grad_norm": 4.156156063079834, "learning_rate": 9.778105911964657e-07, "loss": 0.0672, "step": 183590 }, { "epoch": 1.9616432501736205, "grad_norm": 0.1902705430984497, "learning_rate": 9.77805641422253e-07, "loss": 0.0821, "step": 183600 }, { "epoch": 1.9617500934879, "grad_norm": 26.38780975341797, "learning_rate": 9.778006911085613e-07, "loss": 0.0374, "step": 183610 }, { "epoch": 1.9618569368021797, "grad_norm": 0.005968786776065826, "learning_rate": 9.777957402553967e-07, "loss": 0.1071, "step": 183620 }, { "epoch": 1.9619637801164593, "grad_norm": 3.688809871673584, "learning_rate": 9.777907888627645e-07, "loss": 0.0757, "step": 183630 }, { "epoch": 1.9620706234307388, "grad_norm": 4.783370494842529, "learning_rate": 9.777858369306705e-07, "loss": 0.0248, "step": 183640 }, { "epoch": 1.9621774667450185, "grad_norm": 0.8417586088180542, "learning_rate": 9.777808844591205e-07, "loss": 0.0209, "step": 183650 }, { "epoch": 1.9622843100592982, "grad_norm": 0.011524905450642109, "learning_rate": 9.777759314481196e-07, "loss": 0.0115, "step": 183660 }, { "epoch": 1.9623911533735776, "grad_norm": 1.4807628393173218, "learning_rate": 9.777709778976737e-07, "loss": 0.067, "step": 183670 }, { "epoch": 1.9624979966878573, "grad_norm": 3.1547579765319824, "learning_rate": 9.777660238077883e-07, "loss": 0.0516, "step": 183680 }, { "epoch": 1.962604840002137, "grad_norm": 0.00724170682951808, "learning_rate": 9.777610691784689e-07, "loss": 0.0407, "step": 183690 }, { "epoch": 1.9627116833164164, "grad_norm": 6.3120036125183105, "learning_rate": 9.777561140097212e-07, "loss": 0.0329, "step": 183700 }, { "epoch": 1.9628185266306961, "grad_norm": 2.0086305141448975, "learning_rate": 9.77751158301551e-07, "loss": 0.017, "step": 183710 }, { "epoch": 1.9629253699449758, "grad_norm": 0.03965884447097778, "learning_rate": 9.777462020539635e-07, "loss": 0.036, "step": 183720 }, { "epoch": 1.9630322132592553, "grad_norm": 6.894154071807861, "learning_rate": 9.777412452669644e-07, "loss": 0.0195, "step": 183730 }, { "epoch": 1.963139056573535, "grad_norm": 0.1472960263490677, "learning_rate": 9.777362879405596e-07, "loss": 0.0252, "step": 183740 }, { "epoch": 1.9632458998878146, "grad_norm": 7.861111640930176, "learning_rate": 9.777313300747544e-07, "loss": 0.025, "step": 183750 }, { "epoch": 1.963352743202094, "grad_norm": 4.968877792358398, "learning_rate": 9.777263716695544e-07, "loss": 0.0242, "step": 183760 }, { "epoch": 1.9634595865163738, "grad_norm": 0.120195671916008, "learning_rate": 9.777214127249653e-07, "loss": 0.0111, "step": 183770 }, { "epoch": 1.9635664298306534, "grad_norm": 5.6714558601379395, "learning_rate": 9.777164532409928e-07, "loss": 0.0614, "step": 183780 }, { "epoch": 1.963673273144933, "grad_norm": 2.813997507095337, "learning_rate": 9.777114932176422e-07, "loss": 0.0506, "step": 183790 }, { "epoch": 1.9637801164592126, "grad_norm": 2.569612741470337, "learning_rate": 9.777065326549193e-07, "loss": 0.0569, "step": 183800 }, { "epoch": 1.9638869597734923, "grad_norm": 1.9035941362380981, "learning_rate": 9.777015715528297e-07, "loss": 0.0308, "step": 183810 }, { "epoch": 1.9639938030877717, "grad_norm": 0.038135066628456116, "learning_rate": 9.776966099113787e-07, "loss": 0.0157, "step": 183820 }, { "epoch": 1.9641006464020514, "grad_norm": 5.118245601654053, "learning_rate": 9.776916477305725e-07, "loss": 0.0342, "step": 183830 }, { "epoch": 1.964207489716331, "grad_norm": 2.775135040283203, "learning_rate": 9.776866850104164e-07, "loss": 0.0549, "step": 183840 }, { "epoch": 1.9643143330306105, "grad_norm": 0.14590570330619812, "learning_rate": 9.77681721750916e-07, "loss": 0.0073, "step": 183850 }, { "epoch": 1.9644211763448902, "grad_norm": 0.1675775945186615, "learning_rate": 9.776767579520765e-07, "loss": 0.01, "step": 183860 }, { "epoch": 1.96452801965917, "grad_norm": 2.9623169898986816, "learning_rate": 9.776717936139043e-07, "loss": 0.009, "step": 183870 }, { "epoch": 1.9646348629734494, "grad_norm": 0.04558799788355827, "learning_rate": 9.776668287364044e-07, "loss": 0.0548, "step": 183880 }, { "epoch": 1.964741706287729, "grad_norm": 1.7136659622192383, "learning_rate": 9.776618633195828e-07, "loss": 0.0309, "step": 183890 }, { "epoch": 1.9648485496020087, "grad_norm": 0.11441165953874588, "learning_rate": 9.776568973634447e-07, "loss": 0.015, "step": 183900 }, { "epoch": 1.9649553929162882, "grad_norm": 1.6753779649734497, "learning_rate": 9.77651930867996e-07, "loss": 0.0261, "step": 183910 }, { "epoch": 1.9650622362305679, "grad_norm": 6.364049911499023, "learning_rate": 9.776469638332422e-07, "loss": 0.0339, "step": 183920 }, { "epoch": 1.9651690795448475, "grad_norm": 1.8788552284240723, "learning_rate": 9.77641996259189e-07, "loss": 0.0661, "step": 183930 }, { "epoch": 1.965275922859127, "grad_norm": 0.011129583232104778, "learning_rate": 9.776370281458417e-07, "loss": 0.0259, "step": 183940 }, { "epoch": 1.9653827661734067, "grad_norm": 4.833388328552246, "learning_rate": 9.776320594932064e-07, "loss": 0.0408, "step": 183950 }, { "epoch": 1.9654896094876864, "grad_norm": 19.741500854492188, "learning_rate": 9.776270903012883e-07, "loss": 0.0924, "step": 183960 }, { "epoch": 1.9655964528019658, "grad_norm": 1.2694969177246094, "learning_rate": 9.776221205700931e-07, "loss": 0.0283, "step": 183970 }, { "epoch": 1.9657032961162455, "grad_norm": 12.196684837341309, "learning_rate": 9.776171502996268e-07, "loss": 0.0365, "step": 183980 }, { "epoch": 1.9658101394305252, "grad_norm": 2.1852240562438965, "learning_rate": 9.776121794898944e-07, "loss": 0.0163, "step": 183990 }, { "epoch": 1.9659169827448046, "grad_norm": 1.3168962001800537, "learning_rate": 9.776072081409019e-07, "loss": 0.06, "step": 184000 }, { "epoch": 1.9660238260590843, "grad_norm": 0.014584938995540142, "learning_rate": 9.776022362526547e-07, "loss": 0.0567, "step": 184010 }, { "epoch": 1.966130669373364, "grad_norm": 0.24807919561862946, "learning_rate": 9.775972638251588e-07, "loss": 0.021, "step": 184020 }, { "epoch": 1.9662375126876435, "grad_norm": 0.029502270743250847, "learning_rate": 9.775922908584193e-07, "loss": 0.0177, "step": 184030 }, { "epoch": 1.9663443560019231, "grad_norm": 0.6384543180465698, "learning_rate": 9.775873173524422e-07, "loss": 0.0047, "step": 184040 }, { "epoch": 1.9664511993162028, "grad_norm": 6.2102742195129395, "learning_rate": 9.77582343307233e-07, "loss": 0.0164, "step": 184050 }, { "epoch": 1.9665580426304823, "grad_norm": 4.7185378074646, "learning_rate": 9.775773687227972e-07, "loss": 0.0512, "step": 184060 }, { "epoch": 1.966664885944762, "grad_norm": 1.0768074989318848, "learning_rate": 9.775723935991404e-07, "loss": 0.0396, "step": 184070 }, { "epoch": 1.9667717292590416, "grad_norm": 0.01465953141450882, "learning_rate": 9.775674179362685e-07, "loss": 0.0169, "step": 184080 }, { "epoch": 1.966878572573321, "grad_norm": 0.056843239814043045, "learning_rate": 9.77562441734187e-07, "loss": 0.0313, "step": 184090 }, { "epoch": 1.9669854158876008, "grad_norm": 0.0284636989235878, "learning_rate": 9.775574649929012e-07, "loss": 0.0391, "step": 184100 }, { "epoch": 1.9670922592018805, "grad_norm": 0.036387838423252106, "learning_rate": 9.775524877124173e-07, "loss": 0.0014, "step": 184110 }, { "epoch": 1.96719910251616, "grad_norm": 7.07052755355835, "learning_rate": 9.775475098927404e-07, "loss": 0.0292, "step": 184120 }, { "epoch": 1.9673059458304396, "grad_norm": 3.2293589115142822, "learning_rate": 9.775425315338762e-07, "loss": 0.0426, "step": 184130 }, { "epoch": 1.9674127891447193, "grad_norm": 0.48308277130126953, "learning_rate": 9.775375526358306e-07, "loss": 0.0212, "step": 184140 }, { "epoch": 1.9675196324589987, "grad_norm": 2.7764973640441895, "learning_rate": 9.77532573198609e-07, "loss": 0.0685, "step": 184150 }, { "epoch": 1.9676264757732786, "grad_norm": 0.02061922289431095, "learning_rate": 9.775275932222172e-07, "loss": 0.0178, "step": 184160 }, { "epoch": 1.967733319087558, "grad_norm": 0.18675591051578522, "learning_rate": 9.775226127066606e-07, "loss": 0.0317, "step": 184170 }, { "epoch": 1.9678401624018376, "grad_norm": 0.01996121183037758, "learning_rate": 9.775176316519448e-07, "loss": 0.028, "step": 184180 }, { "epoch": 1.9679470057161175, "grad_norm": 10.261297225952148, "learning_rate": 9.775126500580759e-07, "loss": 0.1024, "step": 184190 }, { "epoch": 1.968053849030397, "grad_norm": 0.043442074209451675, "learning_rate": 9.775076679250588e-07, "loss": 0.0134, "step": 184200 }, { "epoch": 1.9681606923446764, "grad_norm": 5.233799934387207, "learning_rate": 9.775026852528997e-07, "loss": 0.0112, "step": 184210 }, { "epoch": 1.9682675356589563, "grad_norm": 3.8468658924102783, "learning_rate": 9.774977020416042e-07, "loss": 0.0248, "step": 184220 }, { "epoch": 1.9683743789732357, "grad_norm": 2.8865737915039062, "learning_rate": 9.774927182911775e-07, "loss": 0.0554, "step": 184230 }, { "epoch": 1.9684812222875152, "grad_norm": 8.027981758117676, "learning_rate": 9.774877340016256e-07, "loss": 0.0704, "step": 184240 }, { "epoch": 1.9685880656017951, "grad_norm": 16.277690887451172, "learning_rate": 9.774827491729541e-07, "loss": 0.0124, "step": 184250 }, { "epoch": 1.9686949089160746, "grad_norm": 2.1084697246551514, "learning_rate": 9.774777638051684e-07, "loss": 0.0292, "step": 184260 }, { "epoch": 1.968801752230354, "grad_norm": 0.046439558267593384, "learning_rate": 9.774727778982742e-07, "loss": 0.0243, "step": 184270 }, { "epoch": 1.968908595544634, "grad_norm": 0.5664853453636169, "learning_rate": 9.774677914522772e-07, "loss": 0.0264, "step": 184280 }, { "epoch": 1.9690154388589134, "grad_norm": 0.11471963673830032, "learning_rate": 9.77462804467183e-07, "loss": 0.0188, "step": 184290 }, { "epoch": 1.9691222821731929, "grad_norm": 1.9410778284072876, "learning_rate": 9.774578169429974e-07, "loss": 0.0421, "step": 184300 }, { "epoch": 1.9692291254874728, "grad_norm": 0.12070613354444504, "learning_rate": 9.774528288797258e-07, "loss": 0.0671, "step": 184310 }, { "epoch": 1.9693359688017522, "grad_norm": 2.5540759563446045, "learning_rate": 9.774478402773741e-07, "loss": 0.0148, "step": 184320 }, { "epoch": 1.969442812116032, "grad_norm": 7.748943328857422, "learning_rate": 9.774428511359476e-07, "loss": 0.0473, "step": 184330 }, { "epoch": 1.9695496554303116, "grad_norm": 3.646615743637085, "learning_rate": 9.774378614554521e-07, "loss": 0.0288, "step": 184340 }, { "epoch": 1.969656498744591, "grad_norm": 1.1621874570846558, "learning_rate": 9.774328712358932e-07, "loss": 0.0123, "step": 184350 }, { "epoch": 1.9697633420588707, "grad_norm": 2.8999338150024414, "learning_rate": 9.774278804772767e-07, "loss": 0.0655, "step": 184360 }, { "epoch": 1.9698701853731504, "grad_norm": 7.987000942230225, "learning_rate": 9.77422889179608e-07, "loss": 0.0204, "step": 184370 }, { "epoch": 1.9699770286874299, "grad_norm": 1.9491662979125977, "learning_rate": 9.774178973428929e-07, "loss": 0.0479, "step": 184380 }, { "epoch": 1.9700838720017095, "grad_norm": 3.5377860069274902, "learning_rate": 9.774129049671367e-07, "loss": 0.0276, "step": 184390 }, { "epoch": 1.9701907153159892, "grad_norm": 3.3163094520568848, "learning_rate": 9.774079120523454e-07, "loss": 0.037, "step": 184400 }, { "epoch": 1.9702975586302687, "grad_norm": 2.1769344806671143, "learning_rate": 9.774029185985247e-07, "loss": 0.0375, "step": 184410 }, { "epoch": 1.9704044019445484, "grad_norm": 0.7431842684745789, "learning_rate": 9.7739792460568e-07, "loss": 0.0115, "step": 184420 }, { "epoch": 1.970511245258828, "grad_norm": 2.940936326980591, "learning_rate": 9.77392930073817e-07, "loss": 0.0154, "step": 184430 }, { "epoch": 1.9706180885731075, "grad_norm": 12.638821601867676, "learning_rate": 9.773879350029412e-07, "loss": 0.0416, "step": 184440 }, { "epoch": 1.9707249318873872, "grad_norm": 8.69166088104248, "learning_rate": 9.773829393930587e-07, "loss": 0.0649, "step": 184450 }, { "epoch": 1.9708317752016669, "grad_norm": 0.23709039390087128, "learning_rate": 9.773779432441747e-07, "loss": 0.012, "step": 184460 }, { "epoch": 1.9709386185159463, "grad_norm": 0.12085362523794174, "learning_rate": 9.773729465562947e-07, "loss": 0.0483, "step": 184470 }, { "epoch": 1.971045461830226, "grad_norm": 0.10122990608215332, "learning_rate": 9.77367949329425e-07, "loss": 0.0223, "step": 184480 }, { "epoch": 1.9711523051445057, "grad_norm": 1.623081088066101, "learning_rate": 9.773629515635705e-07, "loss": 0.1374, "step": 184490 }, { "epoch": 1.9712591484587851, "grad_norm": 1.9243437051773071, "learning_rate": 9.773579532587374e-07, "loss": 0.0705, "step": 184500 }, { "epoch": 1.9713659917730648, "grad_norm": 0.17215745151042938, "learning_rate": 9.773529544149313e-07, "loss": 0.0484, "step": 184510 }, { "epoch": 1.9714728350873445, "grad_norm": 4.95714807510376, "learning_rate": 9.773479550321575e-07, "loss": 0.0404, "step": 184520 }, { "epoch": 1.971579678401624, "grad_norm": 0.05205480009317398, "learning_rate": 9.77342955110422e-07, "loss": 0.0408, "step": 184530 }, { "epoch": 1.9716865217159036, "grad_norm": 2.664011240005493, "learning_rate": 9.7733795464973e-07, "loss": 0.0056, "step": 184540 }, { "epoch": 1.9717933650301833, "grad_norm": 5.612734794616699, "learning_rate": 9.773329536500876e-07, "loss": 0.0103, "step": 184550 }, { "epoch": 1.9719002083444628, "grad_norm": 4.11163330078125, "learning_rate": 9.773279521115004e-07, "loss": 0.0188, "step": 184560 }, { "epoch": 1.9720070516587425, "grad_norm": 0.42038771510124207, "learning_rate": 9.773229500339736e-07, "loss": 0.03, "step": 184570 }, { "epoch": 1.9721138949730221, "grad_norm": 3.1377758979797363, "learning_rate": 9.773179474175133e-07, "loss": 0.0286, "step": 184580 }, { "epoch": 1.9722207382873016, "grad_norm": 0.455293744802475, "learning_rate": 9.77312944262125e-07, "loss": 0.011, "step": 184590 }, { "epoch": 1.9723275816015813, "grad_norm": 0.4151361584663391, "learning_rate": 9.773079405678144e-07, "loss": 0.0208, "step": 184600 }, { "epoch": 1.972434424915861, "grad_norm": 0.4606892168521881, "learning_rate": 9.773029363345871e-07, "loss": 0.0553, "step": 184610 }, { "epoch": 1.9725412682301404, "grad_norm": 0.08541157096624374, "learning_rate": 9.772979315624487e-07, "loss": 0.0107, "step": 184620 }, { "epoch": 1.97264811154442, "grad_norm": 0.059791449457407, "learning_rate": 9.772929262514052e-07, "loss": 0.0031, "step": 184630 }, { "epoch": 1.9727549548586998, "grad_norm": 0.6048275232315063, "learning_rate": 9.772879204014616e-07, "loss": 0.0289, "step": 184640 }, { "epoch": 1.9728617981729792, "grad_norm": 2.060394525527954, "learning_rate": 9.77282914012624e-07, "loss": 0.0397, "step": 184650 }, { "epoch": 1.972968641487259, "grad_norm": 0.5773442387580872, "learning_rate": 9.77277907084898e-07, "loss": 0.0217, "step": 184660 }, { "epoch": 1.9730754848015386, "grad_norm": 2.929436206817627, "learning_rate": 9.772728996182894e-07, "loss": 0.0358, "step": 184670 }, { "epoch": 1.973182328115818, "grad_norm": 4.643178939819336, "learning_rate": 9.772678916128034e-07, "loss": 0.0675, "step": 184680 }, { "epoch": 1.9732891714300977, "grad_norm": 3.651331663131714, "learning_rate": 9.77262883068446e-07, "loss": 0.0464, "step": 184690 }, { "epoch": 1.9733960147443774, "grad_norm": 0.3044804036617279, "learning_rate": 9.77257873985223e-07, "loss": 0.0157, "step": 184700 }, { "epoch": 1.9735028580586569, "grad_norm": 0.009193786419928074, "learning_rate": 9.772528643631396e-07, "loss": 0.0369, "step": 184710 }, { "epoch": 1.9736097013729366, "grad_norm": 0.04972764104604721, "learning_rate": 9.77247854202202e-07, "loss": 0.0223, "step": 184720 }, { "epoch": 1.9737165446872162, "grad_norm": 0.3736989200115204, "learning_rate": 9.77242843502415e-07, "loss": 0.0183, "step": 184730 }, { "epoch": 1.9738233880014957, "grad_norm": 6.7130842208862305, "learning_rate": 9.772378322637853e-07, "loss": 0.0647, "step": 184740 }, { "epoch": 1.9739302313157754, "grad_norm": 0.9953386783599854, "learning_rate": 9.772328204863178e-07, "loss": 0.0187, "step": 184750 }, { "epoch": 1.974037074630055, "grad_norm": 1.1066219806671143, "learning_rate": 9.772278081700186e-07, "loss": 0.0271, "step": 184760 }, { "epoch": 1.9741439179443345, "grad_norm": 11.625590324401855, "learning_rate": 9.772227953148931e-07, "loss": 0.0381, "step": 184770 }, { "epoch": 1.9742507612586142, "grad_norm": 12.180294036865234, "learning_rate": 9.772177819209471e-07, "loss": 0.1335, "step": 184780 }, { "epoch": 1.9743576045728939, "grad_norm": 2.512070417404175, "learning_rate": 9.77212767988186e-07, "loss": 0.0434, "step": 184790 }, { "epoch": 1.9744644478871733, "grad_norm": 4.213789939880371, "learning_rate": 9.77207753516616e-07, "loss": 0.1079, "step": 184800 }, { "epoch": 1.974571291201453, "grad_norm": 1.5455018281936646, "learning_rate": 9.77202738506242e-07, "loss": 0.0462, "step": 184810 }, { "epoch": 1.9746781345157327, "grad_norm": 4.17586612701416, "learning_rate": 9.771977229570705e-07, "loss": 0.029, "step": 184820 }, { "epoch": 1.9747849778300122, "grad_norm": 3.825385570526123, "learning_rate": 9.771927068691066e-07, "loss": 0.0123, "step": 184830 }, { "epoch": 1.9748918211442918, "grad_norm": 0.600098192691803, "learning_rate": 9.771876902423562e-07, "loss": 0.019, "step": 184840 }, { "epoch": 1.9749986644585715, "grad_norm": 1.8570973873138428, "learning_rate": 9.771826730768247e-07, "loss": 0.0196, "step": 184850 }, { "epoch": 1.975105507772851, "grad_norm": 8.400601387023926, "learning_rate": 9.771776553725182e-07, "loss": 0.034, "step": 184860 }, { "epoch": 1.9752123510871307, "grad_norm": 2.2560243606567383, "learning_rate": 9.771726371294418e-07, "loss": 0.0102, "step": 184870 }, { "epoch": 1.9753191944014103, "grad_norm": 0.3747252821922302, "learning_rate": 9.771676183476018e-07, "loss": 0.0082, "step": 184880 }, { "epoch": 1.9754260377156898, "grad_norm": 3.1236298084259033, "learning_rate": 9.771625990270033e-07, "loss": 0.0181, "step": 184890 }, { "epoch": 1.9755328810299697, "grad_norm": 0.39229756593704224, "learning_rate": 9.771575791676523e-07, "loss": 0.0155, "step": 184900 }, { "epoch": 1.9756397243442492, "grad_norm": 2.4318606853485107, "learning_rate": 9.771525587695544e-07, "loss": 0.0594, "step": 184910 }, { "epoch": 1.9757465676585286, "grad_norm": 3.444209337234497, "learning_rate": 9.771475378327152e-07, "loss": 0.0327, "step": 184920 }, { "epoch": 1.9758534109728085, "grad_norm": 3.43306303024292, "learning_rate": 9.771425163571404e-07, "loss": 0.012, "step": 184930 }, { "epoch": 1.975960254287088, "grad_norm": 0.04376877844333649, "learning_rate": 9.77137494342836e-07, "loss": 0.0331, "step": 184940 }, { "epoch": 1.9760670976013675, "grad_norm": 0.04865517467260361, "learning_rate": 9.771324717898069e-07, "loss": 0.0489, "step": 184950 }, { "epoch": 1.9761739409156474, "grad_norm": 2.9800949096679688, "learning_rate": 9.771274486980595e-07, "loss": 0.0305, "step": 184960 }, { "epoch": 1.9762807842299268, "grad_norm": 0.16157639026641846, "learning_rate": 9.771224250675992e-07, "loss": 0.0111, "step": 184970 }, { "epoch": 1.9763876275442063, "grad_norm": 0.031119003891944885, "learning_rate": 9.771174008984315e-07, "loss": 0.0502, "step": 184980 }, { "epoch": 1.9764944708584862, "grad_norm": 1.749793529510498, "learning_rate": 9.771123761905624e-07, "loss": 0.0063, "step": 184990 }, { "epoch": 1.9766013141727656, "grad_norm": 0.2768367826938629, "learning_rate": 9.771073509439974e-07, "loss": 0.0234, "step": 185000 }, { "epoch": 1.976708157487045, "grad_norm": 9.17227554321289, "learning_rate": 9.77102325158742e-07, "loss": 0.0322, "step": 185010 }, { "epoch": 1.976815000801325, "grad_norm": 8.13254451751709, "learning_rate": 9.770972988348023e-07, "loss": 0.0104, "step": 185020 }, { "epoch": 1.9769218441156045, "grad_norm": 0.006664397194981575, "learning_rate": 9.770922719721837e-07, "loss": 0.031, "step": 185030 }, { "epoch": 1.977028687429884, "grad_norm": 0.048440635204315186, "learning_rate": 9.770872445708918e-07, "loss": 0.0123, "step": 185040 }, { "epoch": 1.9771355307441638, "grad_norm": 0.5889743566513062, "learning_rate": 9.770822166309324e-07, "loss": 0.0066, "step": 185050 }, { "epoch": 1.9772423740584433, "grad_norm": 4.731179714202881, "learning_rate": 9.770771881523115e-07, "loss": 0.0307, "step": 185060 }, { "epoch": 1.977349217372723, "grad_norm": 4.275470733642578, "learning_rate": 9.770721591350343e-07, "loss": 0.0385, "step": 185070 }, { "epoch": 1.9774560606870026, "grad_norm": 3.0267386436462402, "learning_rate": 9.770671295791063e-07, "loss": 0.028, "step": 185080 }, { "epoch": 1.977562904001282, "grad_norm": 0.14538227021694183, "learning_rate": 9.770620994845337e-07, "loss": 0.0239, "step": 185090 }, { "epoch": 1.9776697473155618, "grad_norm": 0.02528388798236847, "learning_rate": 9.77057068851322e-07, "loss": 0.0081, "step": 185100 }, { "epoch": 1.9777765906298415, "grad_norm": 0.7744336128234863, "learning_rate": 9.770520376794768e-07, "loss": 0.0519, "step": 185110 }, { "epoch": 1.977883433944121, "grad_norm": 3.024815082550049, "learning_rate": 9.77047005969004e-07, "loss": 0.0091, "step": 185120 }, { "epoch": 1.9779902772584006, "grad_norm": 1.9892878532409668, "learning_rate": 9.77041973719909e-07, "loss": 0.0129, "step": 185130 }, { "epoch": 1.9780971205726803, "grad_norm": 0.010904625058174133, "learning_rate": 9.770369409321978e-07, "loss": 0.0144, "step": 185140 }, { "epoch": 1.9782039638869597, "grad_norm": 0.014361020177602768, "learning_rate": 9.770319076058758e-07, "loss": 0.013, "step": 185150 }, { "epoch": 1.9783108072012394, "grad_norm": 3.8653769493103027, "learning_rate": 9.770268737409485e-07, "loss": 0.0269, "step": 185160 }, { "epoch": 1.978417650515519, "grad_norm": 0.09300687164068222, "learning_rate": 9.770218393374222e-07, "loss": 0.0359, "step": 185170 }, { "epoch": 1.9785244938297986, "grad_norm": 4.211813449859619, "learning_rate": 9.77016804395302e-07, "loss": 0.0251, "step": 185180 }, { "epoch": 1.9786313371440782, "grad_norm": 5.0264811515808105, "learning_rate": 9.77011768914594e-07, "loss": 0.0239, "step": 185190 }, { "epoch": 1.978738180458358, "grad_norm": 0.011296468786895275, "learning_rate": 9.770067328953036e-07, "loss": 0.0083, "step": 185200 }, { "epoch": 1.9788450237726374, "grad_norm": 7.059968948364258, "learning_rate": 9.770016963374367e-07, "loss": 0.1034, "step": 185210 }, { "epoch": 1.978951867086917, "grad_norm": 6.791957378387451, "learning_rate": 9.769966592409989e-07, "loss": 0.089, "step": 185220 }, { "epoch": 1.9790587104011967, "grad_norm": 1.4380242824554443, "learning_rate": 9.769916216059957e-07, "loss": 0.0041, "step": 185230 }, { "epoch": 1.9791655537154762, "grad_norm": 2.6992435455322266, "learning_rate": 9.76986583432433e-07, "loss": 0.024, "step": 185240 }, { "epoch": 1.9792723970297559, "grad_norm": 0.004973426461219788, "learning_rate": 9.769815447203165e-07, "loss": 0.031, "step": 185250 }, { "epoch": 1.9793792403440356, "grad_norm": 2.9389567375183105, "learning_rate": 9.76976505469652e-07, "loss": 0.0309, "step": 185260 }, { "epoch": 1.979486083658315, "grad_norm": 0.18137697875499725, "learning_rate": 9.76971465680445e-07, "loss": 0.061, "step": 185270 }, { "epoch": 1.9795929269725947, "grad_norm": 11.338617324829102, "learning_rate": 9.769664253527007e-07, "loss": 0.0341, "step": 185280 }, { "epoch": 1.9796997702868744, "grad_norm": 0.0057105692103505135, "learning_rate": 9.769613844864258e-07, "loss": 0.0347, "step": 185290 }, { "epoch": 1.9798066136011538, "grad_norm": 1.3330910205841064, "learning_rate": 9.769563430816254e-07, "loss": 0.0727, "step": 185300 }, { "epoch": 1.9799134569154335, "grad_norm": 0.9581973552703857, "learning_rate": 9.769513011383054e-07, "loss": 0.0509, "step": 185310 }, { "epoch": 1.9800203002297132, "grad_norm": 2.7536232471466064, "learning_rate": 9.76946258656471e-07, "loss": 0.0288, "step": 185320 }, { "epoch": 1.9801271435439927, "grad_norm": 0.17017590999603271, "learning_rate": 9.769412156361288e-07, "loss": 0.0589, "step": 185330 }, { "epoch": 1.9802339868582723, "grad_norm": 0.03710692748427391, "learning_rate": 9.769361720772838e-07, "loss": 0.0227, "step": 185340 }, { "epoch": 1.980340830172552, "grad_norm": 2.076799154281616, "learning_rate": 9.769311279799418e-07, "loss": 0.0231, "step": 185350 }, { "epoch": 1.9804476734868315, "grad_norm": 4.733725070953369, "learning_rate": 9.769260833441084e-07, "loss": 0.0178, "step": 185360 }, { "epoch": 1.9805545168011112, "grad_norm": 9.487074851989746, "learning_rate": 9.769210381697897e-07, "loss": 0.0283, "step": 185370 }, { "epoch": 1.9806613601153908, "grad_norm": 4.466848850250244, "learning_rate": 9.76915992456991e-07, "loss": 0.0329, "step": 185380 }, { "epoch": 1.9807682034296703, "grad_norm": 0.5661792159080505, "learning_rate": 9.76910946205718e-07, "loss": 0.036, "step": 185390 }, { "epoch": 1.98087504674395, "grad_norm": 1.400010585784912, "learning_rate": 9.769058994159769e-07, "loss": 0.0128, "step": 185400 }, { "epoch": 1.9809818900582297, "grad_norm": 4.0747599601745605, "learning_rate": 9.769008520877728e-07, "loss": 0.015, "step": 185410 }, { "epoch": 1.9810887333725091, "grad_norm": 2.768388509750366, "learning_rate": 9.768958042211117e-07, "loss": 0.0323, "step": 185420 }, { "epoch": 1.9811955766867888, "grad_norm": 3.1827149391174316, "learning_rate": 9.768907558159995e-07, "loss": 0.014, "step": 185430 }, { "epoch": 1.9813024200010685, "grad_norm": 4.628352642059326, "learning_rate": 9.768857068724413e-07, "loss": 0.0411, "step": 185440 }, { "epoch": 1.981409263315348, "grad_norm": 6.267886161804199, "learning_rate": 9.768806573904434e-07, "loss": 0.0365, "step": 185450 }, { "epoch": 1.9815161066296276, "grad_norm": 3.60695219039917, "learning_rate": 9.768756073700111e-07, "loss": 0.0465, "step": 185460 }, { "epoch": 1.9816229499439073, "grad_norm": 0.40469732880592346, "learning_rate": 9.768705568111501e-07, "loss": 0.0117, "step": 185470 }, { "epoch": 1.9817297932581868, "grad_norm": 4.445911884307861, "learning_rate": 9.768655057138667e-07, "loss": 0.0439, "step": 185480 }, { "epoch": 1.9818366365724664, "grad_norm": 1.3768250942230225, "learning_rate": 9.768604540781657e-07, "loss": 0.0711, "step": 185490 }, { "epoch": 1.9819434798867461, "grad_norm": 0.09789372235536575, "learning_rate": 9.768554019040533e-07, "loss": 0.0583, "step": 185500 }, { "epoch": 1.9820503232010256, "grad_norm": 0.06387807428836823, "learning_rate": 9.768503491915354e-07, "loss": 0.0137, "step": 185510 }, { "epoch": 1.9821571665153053, "grad_norm": 2.9397947788238525, "learning_rate": 9.768452959406174e-07, "loss": 0.0129, "step": 185520 }, { "epoch": 1.982264009829585, "grad_norm": 4.772812366485596, "learning_rate": 9.76840242151305e-07, "loss": 0.0315, "step": 185530 }, { "epoch": 1.9823708531438644, "grad_norm": 5.309377193450928, "learning_rate": 9.76835187823604e-07, "loss": 0.0168, "step": 185540 }, { "epoch": 1.982477696458144, "grad_norm": 0.7096826434135437, "learning_rate": 9.7683013295752e-07, "loss": 0.0922, "step": 185550 }, { "epoch": 1.9825845397724238, "grad_norm": 6.121733665466309, "learning_rate": 9.76825077553059e-07, "loss": 0.0595, "step": 185560 }, { "epoch": 1.9826913830867032, "grad_norm": 0.9265655279159546, "learning_rate": 9.768200216102265e-07, "loss": 0.0429, "step": 185570 }, { "epoch": 1.982798226400983, "grad_norm": 0.06031697615981102, "learning_rate": 9.76814965129028e-07, "loss": 0.0212, "step": 185580 }, { "epoch": 1.9829050697152626, "grad_norm": 0.3384903371334076, "learning_rate": 9.768099081094693e-07, "loss": 0.0324, "step": 185590 }, { "epoch": 1.983011913029542, "grad_norm": 0.01355689950287342, "learning_rate": 9.768048505515567e-07, "loss": 0.0174, "step": 185600 }, { "epoch": 1.9831187563438217, "grad_norm": 0.32186630368232727, "learning_rate": 9.76799792455295e-07, "loss": 0.0266, "step": 185610 }, { "epoch": 1.9832255996581014, "grad_norm": 11.95117473602295, "learning_rate": 9.767947338206906e-07, "loss": 0.0087, "step": 185620 }, { "epoch": 1.9833324429723809, "grad_norm": 0.2023293823003769, "learning_rate": 9.76789674647749e-07, "loss": 0.0236, "step": 185630 }, { "epoch": 1.9834392862866608, "grad_norm": 1.6054012775421143, "learning_rate": 9.767846149364756e-07, "loss": 0.0109, "step": 185640 }, { "epoch": 1.9835461296009402, "grad_norm": 5.382664680480957, "learning_rate": 9.767795546868767e-07, "loss": 0.0241, "step": 185650 }, { "epoch": 1.9836529729152197, "grad_norm": 0.006842493079602718, "learning_rate": 9.767744938989574e-07, "loss": 0.0306, "step": 185660 }, { "epoch": 1.9837598162294996, "grad_norm": 2.6948657035827637, "learning_rate": 9.767694325727239e-07, "loss": 0.0631, "step": 185670 }, { "epoch": 1.983866659543779, "grad_norm": 5.5406293869018555, "learning_rate": 9.767643707081816e-07, "loss": 0.0564, "step": 185680 }, { "epoch": 1.9839735028580585, "grad_norm": 0.08207765221595764, "learning_rate": 9.767593083053364e-07, "loss": 0.0046, "step": 185690 }, { "epoch": 1.9840803461723384, "grad_norm": 0.04101138189435005, "learning_rate": 9.76754245364194e-07, "loss": 0.0198, "step": 185700 }, { "epoch": 1.9841871894866179, "grad_norm": 0.038734737783670425, "learning_rate": 9.767491818847601e-07, "loss": 0.0248, "step": 185710 }, { "epoch": 1.9842940328008973, "grad_norm": 0.35972121357917786, "learning_rate": 9.767441178670404e-07, "loss": 0.0351, "step": 185720 }, { "epoch": 1.9844008761151772, "grad_norm": 14.235587120056152, "learning_rate": 9.767390533110405e-07, "loss": 0.0268, "step": 185730 }, { "epoch": 1.9845077194294567, "grad_norm": 4.567072868347168, "learning_rate": 9.767339882167663e-07, "loss": 0.0534, "step": 185740 }, { "epoch": 1.9846145627437362, "grad_norm": 11.332208633422852, "learning_rate": 9.767289225842234e-07, "loss": 0.071, "step": 185750 }, { "epoch": 1.984721406058016, "grad_norm": 1.7039258480072021, "learning_rate": 9.767238564134175e-07, "loss": 0.0154, "step": 185760 }, { "epoch": 1.9848282493722955, "grad_norm": 0.573682963848114, "learning_rate": 9.767187897043545e-07, "loss": 0.0107, "step": 185770 }, { "epoch": 1.984935092686575, "grad_norm": 0.4714828431606293, "learning_rate": 9.7671372245704e-07, "loss": 0.0331, "step": 185780 }, { "epoch": 1.9850419360008549, "grad_norm": 0.804185152053833, "learning_rate": 9.767086546714798e-07, "loss": 0.0284, "step": 185790 }, { "epoch": 1.9851487793151343, "grad_norm": 0.028458289802074432, "learning_rate": 9.767035863476793e-07, "loss": 0.0128, "step": 185800 }, { "epoch": 1.985255622629414, "grad_norm": 9.048696517944336, "learning_rate": 9.766985174856448e-07, "loss": 0.0597, "step": 185810 }, { "epoch": 1.9853624659436937, "grad_norm": 4.630152702331543, "learning_rate": 9.766934480853814e-07, "loss": 0.005, "step": 185820 }, { "epoch": 1.9854693092579732, "grad_norm": 0.04150477051734924, "learning_rate": 9.766883781468953e-07, "loss": 0.03, "step": 185830 }, { "epoch": 1.9855761525722528, "grad_norm": 5.316910266876221, "learning_rate": 9.76683307670192e-07, "loss": 0.0623, "step": 185840 }, { "epoch": 1.9856829958865325, "grad_norm": 0.5206994414329529, "learning_rate": 9.766782366552773e-07, "loss": 0.0609, "step": 185850 }, { "epoch": 1.985789839200812, "grad_norm": 6.506099700927734, "learning_rate": 9.76673165102157e-07, "loss": 0.035, "step": 185860 }, { "epoch": 1.9858966825150917, "grad_norm": 0.16517028212547302, "learning_rate": 9.766680930108364e-07, "loss": 0.0449, "step": 185870 }, { "epoch": 1.9860035258293713, "grad_norm": 4.181560516357422, "learning_rate": 9.766630203813217e-07, "loss": 0.0458, "step": 185880 }, { "epoch": 1.9861103691436508, "grad_norm": 0.02432440221309662, "learning_rate": 9.766579472136187e-07, "loss": 0.0034, "step": 185890 }, { "epoch": 1.9862172124579305, "grad_norm": 0.34737029671669006, "learning_rate": 9.766528735077327e-07, "loss": 0.0457, "step": 185900 }, { "epoch": 1.9863240557722102, "grad_norm": 4.116959571838379, "learning_rate": 9.766477992636695e-07, "loss": 0.0372, "step": 185910 }, { "epoch": 1.9864308990864896, "grad_norm": 3.413177967071533, "learning_rate": 9.76642724481435e-07, "loss": 0.0135, "step": 185920 }, { "epoch": 1.9865377424007693, "grad_norm": 0.6846715807914734, "learning_rate": 9.766376491610351e-07, "loss": 0.0689, "step": 185930 }, { "epoch": 1.986644585715049, "grad_norm": 0.10473242402076721, "learning_rate": 9.766325733024752e-07, "loss": 0.0858, "step": 185940 }, { "epoch": 1.9867514290293284, "grad_norm": 7.536953926086426, "learning_rate": 9.766274969057612e-07, "loss": 0.0587, "step": 185950 }, { "epoch": 1.9868582723436081, "grad_norm": 0.06546405702829361, "learning_rate": 9.766224199708988e-07, "loss": 0.0293, "step": 185960 }, { "epoch": 1.9869651156578878, "grad_norm": 0.0053379652090370655, "learning_rate": 9.766173424978937e-07, "loss": 0.0108, "step": 185970 }, { "epoch": 1.9870719589721673, "grad_norm": 0.67039954662323, "learning_rate": 9.766122644867514e-07, "loss": 0.0147, "step": 185980 }, { "epoch": 1.987178802286447, "grad_norm": 1.7370829582214355, "learning_rate": 9.76607185937478e-07, "loss": 0.0221, "step": 185990 }, { "epoch": 1.9872856456007266, "grad_norm": 14.986270904541016, "learning_rate": 9.766021068500794e-07, "loss": 0.0328, "step": 186000 }, { "epoch": 1.987392488915006, "grad_norm": 0.4107620418071747, "learning_rate": 9.765970272245607e-07, "loss": 0.0425, "step": 186010 }, { "epoch": 1.9874993322292858, "grad_norm": 2.372715950012207, "learning_rate": 9.765919470609284e-07, "loss": 0.1121, "step": 186020 }, { "epoch": 1.9876061755435654, "grad_norm": 0.025095591321587563, "learning_rate": 9.765868663591874e-07, "loss": 0.0282, "step": 186030 }, { "epoch": 1.987713018857845, "grad_norm": 1.087218999862671, "learning_rate": 9.765817851193442e-07, "loss": 0.0101, "step": 186040 }, { "epoch": 1.9878198621721246, "grad_norm": 0.04977428913116455, "learning_rate": 9.765767033414038e-07, "loss": 0.0074, "step": 186050 }, { "epoch": 1.9879267054864043, "grad_norm": 0.07008741050958633, "learning_rate": 9.765716210253726e-07, "loss": 0.0263, "step": 186060 }, { "epoch": 1.9880335488006837, "grad_norm": 0.3600543141365051, "learning_rate": 9.76566538171256e-07, "loss": 0.0126, "step": 186070 }, { "epoch": 1.9881403921149634, "grad_norm": 0.5002744197845459, "learning_rate": 9.765614547790599e-07, "loss": 0.0234, "step": 186080 }, { "epoch": 1.988247235429243, "grad_norm": 0.03615069389343262, "learning_rate": 9.7655637084879e-07, "loss": 0.0211, "step": 186090 }, { "epoch": 1.9883540787435225, "grad_norm": 0.0027734171599149704, "learning_rate": 9.765512863804516e-07, "loss": 0.0261, "step": 186100 }, { "epoch": 1.9884609220578022, "grad_norm": 4.593358993530273, "learning_rate": 9.76546201374051e-07, "loss": 0.0201, "step": 186110 }, { "epoch": 1.988567765372082, "grad_norm": 0.0068587264977395535, "learning_rate": 9.76541115829594e-07, "loss": 0.0507, "step": 186120 }, { "epoch": 1.9886746086863614, "grad_norm": 1.2889018058776855, "learning_rate": 9.76536029747086e-07, "loss": 0.0313, "step": 186130 }, { "epoch": 1.988781452000641, "grad_norm": 0.0823214054107666, "learning_rate": 9.76530943126533e-07, "loss": 0.0475, "step": 186140 }, { "epoch": 1.9888882953149207, "grad_norm": 0.8607093095779419, "learning_rate": 9.765258559679403e-07, "loss": 0.0204, "step": 186150 }, { "epoch": 1.9889951386292002, "grad_norm": 8.277623176574707, "learning_rate": 9.765207682713142e-07, "loss": 0.0329, "step": 186160 }, { "epoch": 1.9891019819434799, "grad_norm": 0.012321342714130878, "learning_rate": 9.7651568003666e-07, "loss": 0.0272, "step": 186170 }, { "epoch": 1.9892088252577595, "grad_norm": 0.6223199367523193, "learning_rate": 9.765105912639836e-07, "loss": 0.0619, "step": 186180 }, { "epoch": 1.989315668572039, "grad_norm": 11.153881072998047, "learning_rate": 9.765055019532912e-07, "loss": 0.0524, "step": 186190 }, { "epoch": 1.9894225118863187, "grad_norm": 0.3923879563808441, "learning_rate": 9.765004121045877e-07, "loss": 0.0102, "step": 186200 }, { "epoch": 1.9895293552005984, "grad_norm": 2.5760440826416016, "learning_rate": 9.764953217178792e-07, "loss": 0.0427, "step": 186210 }, { "epoch": 1.9896361985148778, "grad_norm": 1.831283688545227, "learning_rate": 9.764902307931716e-07, "loss": 0.0742, "step": 186220 }, { "epoch": 1.9897430418291575, "grad_norm": 4.653927326202393, "learning_rate": 9.76485139330471e-07, "loss": 0.0554, "step": 186230 }, { "epoch": 1.9898498851434372, "grad_norm": 4.600353717803955, "learning_rate": 9.764800473297824e-07, "loss": 0.0207, "step": 186240 }, { "epoch": 1.9899567284577167, "grad_norm": 6.960427761077881, "learning_rate": 9.764749547911117e-07, "loss": 0.0303, "step": 186250 }, { "epoch": 1.9900635717719963, "grad_norm": 0.07099489122629166, "learning_rate": 9.764698617144648e-07, "loss": 0.0176, "step": 186260 }, { "epoch": 1.990170415086276, "grad_norm": 0.30738940834999084, "learning_rate": 9.764647680998478e-07, "loss": 0.0386, "step": 186270 }, { "epoch": 1.9902772584005555, "grad_norm": 0.1770353466272354, "learning_rate": 9.76459673947266e-07, "loss": 0.0158, "step": 186280 }, { "epoch": 1.9903841017148352, "grad_norm": 0.15679676830768585, "learning_rate": 9.76454579256725e-07, "loss": 0.0307, "step": 186290 }, { "epoch": 1.9904909450291148, "grad_norm": 11.379257202148438, "learning_rate": 9.764494840282312e-07, "loss": 0.0526, "step": 186300 }, { "epoch": 1.9905977883433943, "grad_norm": 1.382215976715088, "learning_rate": 9.764443882617897e-07, "loss": 0.0372, "step": 186310 }, { "epoch": 1.990704631657674, "grad_norm": 3.4352262020111084, "learning_rate": 9.764392919574066e-07, "loss": 0.0567, "step": 186320 }, { "epoch": 1.9908114749719537, "grad_norm": 1.50668466091156, "learning_rate": 9.764341951150878e-07, "loss": 0.0198, "step": 186330 }, { "epoch": 1.9909183182862331, "grad_norm": 0.05636603385210037, "learning_rate": 9.764290977348386e-07, "loss": 0.0426, "step": 186340 }, { "epoch": 1.9910251616005128, "grad_norm": 0.6941210627555847, "learning_rate": 9.76423999816665e-07, "loss": 0.0179, "step": 186350 }, { "epoch": 1.9911320049147925, "grad_norm": 0.06258221715688705, "learning_rate": 9.764189013605728e-07, "loss": 0.0312, "step": 186360 }, { "epoch": 1.991238848229072, "grad_norm": 1.2289693355560303, "learning_rate": 9.764138023665679e-07, "loss": 0.0151, "step": 186370 }, { "epoch": 1.9913456915433518, "grad_norm": 0.010299974121153355, "learning_rate": 9.764087028346556e-07, "loss": 0.0377, "step": 186380 }, { "epoch": 1.9914525348576313, "grad_norm": 2.3303964138031006, "learning_rate": 9.76403602764842e-07, "loss": 0.0243, "step": 186390 }, { "epoch": 1.9915593781719108, "grad_norm": 5.432335376739502, "learning_rate": 9.763985021571326e-07, "loss": 0.0061, "step": 186400 }, { "epoch": 1.9916662214861907, "grad_norm": 0.24332156777381897, "learning_rate": 9.763934010115334e-07, "loss": 0.0362, "step": 186410 }, { "epoch": 1.9917730648004701, "grad_norm": 0.2199290692806244, "learning_rate": 9.763882993280502e-07, "loss": 0.1202, "step": 186420 }, { "epoch": 1.9918799081147496, "grad_norm": 0.01796945184469223, "learning_rate": 9.763831971066887e-07, "loss": 0.0585, "step": 186430 }, { "epoch": 1.9919867514290295, "grad_norm": 6.298449993133545, "learning_rate": 9.763780943474546e-07, "loss": 0.0473, "step": 186440 }, { "epoch": 1.992093594743309, "grad_norm": 18.401994705200195, "learning_rate": 9.763729910503536e-07, "loss": 0.052, "step": 186450 }, { "epoch": 1.9922004380575884, "grad_norm": 0.26433467864990234, "learning_rate": 9.763678872153915e-07, "loss": 0.0119, "step": 186460 }, { "epoch": 1.9923072813718683, "grad_norm": 0.13997772336006165, "learning_rate": 9.763627828425741e-07, "loss": 0.0236, "step": 186470 }, { "epoch": 1.9924141246861478, "grad_norm": 0.7969319820404053, "learning_rate": 9.763576779319073e-07, "loss": 0.0471, "step": 186480 }, { "epoch": 1.9925209680004272, "grad_norm": 0.8288024663925171, "learning_rate": 9.763525724833968e-07, "loss": 0.0134, "step": 186490 }, { "epoch": 1.9926278113147071, "grad_norm": 0.044126685708761215, "learning_rate": 9.763474664970481e-07, "loss": 0.0181, "step": 186500 }, { "epoch": 1.9927346546289866, "grad_norm": 0.512326717376709, "learning_rate": 9.763423599728672e-07, "loss": 0.0071, "step": 186510 }, { "epoch": 1.992841497943266, "grad_norm": 0.036041613668203354, "learning_rate": 9.763372529108597e-07, "loss": 0.0626, "step": 186520 }, { "epoch": 1.992948341257546, "grad_norm": 10.27156925201416, "learning_rate": 9.763321453110319e-07, "loss": 0.0844, "step": 186530 }, { "epoch": 1.9930551845718254, "grad_norm": 4.800241470336914, "learning_rate": 9.763270371733887e-07, "loss": 0.0429, "step": 186540 }, { "epoch": 1.993162027886105, "grad_norm": 0.017580799758434296, "learning_rate": 9.763219284979366e-07, "loss": 0.0668, "step": 186550 }, { "epoch": 1.9932688712003848, "grad_norm": 2.359060525894165, "learning_rate": 9.76316819284681e-07, "loss": 0.005, "step": 186560 }, { "epoch": 1.9933757145146642, "grad_norm": 0.4362071752548218, "learning_rate": 9.763117095336277e-07, "loss": 0.0538, "step": 186570 }, { "epoch": 1.993482557828944, "grad_norm": 5.251672267913818, "learning_rate": 9.763065992447826e-07, "loss": 0.0122, "step": 186580 }, { "epoch": 1.9935894011432236, "grad_norm": 6.31906795501709, "learning_rate": 9.763014884181513e-07, "loss": 0.0319, "step": 186590 }, { "epoch": 1.993696244457503, "grad_norm": 0.11998886615037918, "learning_rate": 9.7629637705374e-07, "loss": 0.0285, "step": 186600 }, { "epoch": 1.9938030877717827, "grad_norm": 3.7175261974334717, "learning_rate": 9.762912651515537e-07, "loss": 0.0224, "step": 186610 }, { "epoch": 1.9939099310860624, "grad_norm": 2.7017133235931396, "learning_rate": 9.762861527115988e-07, "loss": 0.0111, "step": 186620 }, { "epoch": 1.9940167744003419, "grad_norm": 0.12826785445213318, "learning_rate": 9.76281039733881e-07, "loss": 0.0143, "step": 186630 }, { "epoch": 1.9941236177146215, "grad_norm": 0.047618553042411804, "learning_rate": 9.76275926218406e-07, "loss": 0.0648, "step": 186640 }, { "epoch": 1.9942304610289012, "grad_norm": 0.11286717653274536, "learning_rate": 9.762708121651793e-07, "loss": 0.0135, "step": 186650 }, { "epoch": 1.9943373043431807, "grad_norm": 0.1934901922941208, "learning_rate": 9.76265697574207e-07, "loss": 0.0351, "step": 186660 }, { "epoch": 1.9944441476574604, "grad_norm": 0.15342026948928833, "learning_rate": 9.762605824454949e-07, "loss": 0.0152, "step": 186670 }, { "epoch": 1.99455099097174, "grad_norm": 2.1104419231414795, "learning_rate": 9.762554667790485e-07, "loss": 0.0166, "step": 186680 }, { "epoch": 1.9946578342860195, "grad_norm": 0.05753396824002266, "learning_rate": 9.762503505748738e-07, "loss": 0.011, "step": 186690 }, { "epoch": 1.9947646776002992, "grad_norm": 0.6873295307159424, "learning_rate": 9.762452338329766e-07, "loss": 0.042, "step": 186700 }, { "epoch": 1.9948715209145789, "grad_norm": 5.410828113555908, "learning_rate": 9.762401165533623e-07, "loss": 0.0331, "step": 186710 }, { "epoch": 1.9949783642288583, "grad_norm": 8.488456726074219, "learning_rate": 9.762349987360373e-07, "loss": 0.0203, "step": 186720 }, { "epoch": 1.995085207543138, "grad_norm": 0.020594004541635513, "learning_rate": 9.76229880381007e-07, "loss": 0.0419, "step": 186730 }, { "epoch": 1.9951920508574177, "grad_norm": 0.024342557415366173, "learning_rate": 9.76224761488277e-07, "loss": 0.0412, "step": 186740 }, { "epoch": 1.9952988941716971, "grad_norm": 5.464087963104248, "learning_rate": 9.762196420578534e-07, "loss": 0.0519, "step": 186750 }, { "epoch": 1.9954057374859768, "grad_norm": 1.3404759168624878, "learning_rate": 9.762145220897419e-07, "loss": 0.0207, "step": 186760 }, { "epoch": 1.9955125808002565, "grad_norm": 2.718543767929077, "learning_rate": 9.76209401583948e-07, "loss": 0.0084, "step": 186770 }, { "epoch": 1.995619424114536, "grad_norm": 0.05055958777666092, "learning_rate": 9.762042805404783e-07, "loss": 0.0096, "step": 186780 }, { "epoch": 1.9957262674288156, "grad_norm": 9.376276016235352, "learning_rate": 9.761991589593376e-07, "loss": 0.0539, "step": 186790 }, { "epoch": 1.9958331107430953, "grad_norm": 1.2933673858642578, "learning_rate": 9.761940368405324e-07, "loss": 0.018, "step": 186800 }, { "epoch": 1.9959399540573748, "grad_norm": 0.017964858561754227, "learning_rate": 9.76188914184068e-07, "loss": 0.0091, "step": 186810 }, { "epoch": 1.9960467973716545, "grad_norm": 2.166929244995117, "learning_rate": 9.761837909899504e-07, "loss": 0.0422, "step": 186820 }, { "epoch": 1.9961536406859341, "grad_norm": 0.10056240111589432, "learning_rate": 9.761786672581853e-07, "loss": 0.0106, "step": 186830 }, { "epoch": 1.9962604840002136, "grad_norm": 3.9815268516540527, "learning_rate": 9.761735429887786e-07, "loss": 0.0149, "step": 186840 }, { "epoch": 1.9963673273144933, "grad_norm": 0.04850626736879349, "learning_rate": 9.76168418181736e-07, "loss": 0.0442, "step": 186850 }, { "epoch": 1.996474170628773, "grad_norm": 1.0243769884109497, "learning_rate": 9.761632928370635e-07, "loss": 0.026, "step": 186860 }, { "epoch": 1.9965810139430524, "grad_norm": 13.360466957092285, "learning_rate": 9.761581669547666e-07, "loss": 0.0513, "step": 186870 }, { "epoch": 1.996687857257332, "grad_norm": 0.12202583253383636, "learning_rate": 9.76153040534851e-07, "loss": 0.024, "step": 186880 }, { "epoch": 1.9967947005716118, "grad_norm": 0.03828754648566246, "learning_rate": 9.76147913577323e-07, "loss": 0.0086, "step": 186890 }, { "epoch": 1.9969015438858913, "grad_norm": 0.3192754089832306, "learning_rate": 9.761427860821879e-07, "loss": 0.0281, "step": 186900 }, { "epoch": 1.997008387200171, "grad_norm": 0.11663549393415451, "learning_rate": 9.761376580494516e-07, "loss": 0.0406, "step": 186910 }, { "epoch": 1.9971152305144506, "grad_norm": 6.988661289215088, "learning_rate": 9.7613252947912e-07, "loss": 0.0151, "step": 186920 }, { "epoch": 1.99722207382873, "grad_norm": 2.383894920349121, "learning_rate": 9.76127400371199e-07, "loss": 0.0333, "step": 186930 }, { "epoch": 1.9973289171430098, "grad_norm": 0.06636889278888702, "learning_rate": 9.76122270725694e-07, "loss": 0.0132, "step": 186940 }, { "epoch": 1.9974357604572894, "grad_norm": 0.36135023832321167, "learning_rate": 9.761171405426112e-07, "loss": 0.0603, "step": 186950 }, { "epoch": 1.997542603771569, "grad_norm": 0.05479707196354866, "learning_rate": 9.76112009821956e-07, "loss": 0.0376, "step": 186960 }, { "epoch": 1.9976494470858486, "grad_norm": 6.82030725479126, "learning_rate": 9.761068785637345e-07, "loss": 0.0189, "step": 186970 }, { "epoch": 1.9977562904001283, "grad_norm": 0.008775711990892887, "learning_rate": 9.761017467679525e-07, "loss": 0.088, "step": 186980 }, { "epoch": 1.9978631337144077, "grad_norm": 2.133345603942871, "learning_rate": 9.760966144346156e-07, "loss": 0.0735, "step": 186990 }, { "epoch": 1.9979699770286874, "grad_norm": 3.7285475730895996, "learning_rate": 9.760914815637298e-07, "loss": 0.0513, "step": 187000 }, { "epoch": 1.998076820342967, "grad_norm": 9.679656982421875, "learning_rate": 9.760863481553006e-07, "loss": 0.0396, "step": 187010 }, { "epoch": 1.9981836636572465, "grad_norm": 0.044297944754362106, "learning_rate": 9.760812142093341e-07, "loss": 0.0502, "step": 187020 }, { "epoch": 1.9982905069715262, "grad_norm": 0.026646813377738, "learning_rate": 9.76076079725836e-07, "loss": 0.0391, "step": 187030 }, { "epoch": 1.998397350285806, "grad_norm": 0.007171465549618006, "learning_rate": 9.76070944704812e-07, "loss": 0.0027, "step": 187040 }, { "epoch": 1.9985041936000854, "grad_norm": 0.7779175043106079, "learning_rate": 9.760658091462682e-07, "loss": 0.0428, "step": 187050 }, { "epoch": 1.998611036914365, "grad_norm": 4.38879919052124, "learning_rate": 9.760606730502098e-07, "loss": 0.0109, "step": 187060 }, { "epoch": 1.9987178802286447, "grad_norm": 3.2720510959625244, "learning_rate": 9.760555364166431e-07, "loss": 0.0043, "step": 187070 }, { "epoch": 1.9988247235429242, "grad_norm": 2.7954955101013184, "learning_rate": 9.76050399245574e-07, "loss": 0.053, "step": 187080 }, { "epoch": 1.9989315668572039, "grad_norm": 0.15401536226272583, "learning_rate": 9.76045261537008e-07, "loss": 0.0027, "step": 187090 }, { "epoch": 1.9990384101714835, "grad_norm": 0.01612074486911297, "learning_rate": 9.760401232909508e-07, "loss": 0.0161, "step": 187100 }, { "epoch": 1.999145253485763, "grad_norm": 15.877272605895996, "learning_rate": 9.760349845074082e-07, "loss": 0.0117, "step": 187110 }, { "epoch": 1.999252096800043, "grad_norm": 11.367136001586914, "learning_rate": 9.760298451863866e-07, "loss": 0.1296, "step": 187120 }, { "epoch": 1.9993589401143224, "grad_norm": 5.395510673522949, "learning_rate": 9.760247053278911e-07, "loss": 0.0124, "step": 187130 }, { "epoch": 1.9994657834286018, "grad_norm": 0.2496350258588791, "learning_rate": 9.760195649319278e-07, "loss": 0.0127, "step": 187140 }, { "epoch": 1.9995726267428817, "grad_norm": 3.988593816757202, "learning_rate": 9.760144239985027e-07, "loss": 0.0163, "step": 187150 }, { "epoch": 1.9996794700571612, "grad_norm": 7.2628655433654785, "learning_rate": 9.760092825276211e-07, "loss": 0.0446, "step": 187160 }, { "epoch": 1.9997863133714406, "grad_norm": 2.9490747451782227, "learning_rate": 9.760041405192895e-07, "loss": 0.035, "step": 187170 }, { "epoch": 1.9998931566857205, "grad_norm": 7.175581932067871, "learning_rate": 9.759989979735129e-07, "loss": 0.0259, "step": 187180 }, { "epoch": 2.0, "grad_norm": 3.6754863262176514, "learning_rate": 9.759938548902975e-07, "loss": 0.0663, "step": 187190 }, { "epoch": 2.0, "eval_accuracy": 0.6834387867831676, "eval_cer": 0.0491700287618655, "eval_loss": 0.033885374665260315, "eval_runtime": 18460.7608, "eval_samples_per_second": 0.539, "eval_steps_per_second": 0.27, "eval_wer": 0.12636372113262673, "step": 187190 }, { "epoch": 2.0001068433142795, "grad_norm": 0.05296379700303078, "learning_rate": 9.759887112696494e-07, "loss": 0.0153, "step": 187200 }, { "epoch": 2.0002136866285594, "grad_norm": 1.5957491397857666, "learning_rate": 9.75983567111574e-07, "loss": 0.0192, "step": 187210 }, { "epoch": 2.000320529942839, "grad_norm": 0.25244155526161194, "learning_rate": 9.759784224160771e-07, "loss": 0.0316, "step": 187220 }, { "epoch": 2.0004273732571183, "grad_norm": 0.007182394620031118, "learning_rate": 9.759732771831647e-07, "loss": 0.0078, "step": 187230 }, { "epoch": 2.000534216571398, "grad_norm": 9.026947975158691, "learning_rate": 9.759681314128426e-07, "loss": 0.0237, "step": 187240 }, { "epoch": 2.0006410598856776, "grad_norm": 0.005559640936553478, "learning_rate": 9.759629851051165e-07, "loss": 0.025, "step": 187250 }, { "epoch": 2.000747903199957, "grad_norm": 0.15255296230316162, "learning_rate": 9.759578382599925e-07, "loss": 0.0335, "step": 187260 }, { "epoch": 2.000854746514237, "grad_norm": 0.27824127674102783, "learning_rate": 9.75952690877476e-07, "loss": 0.0107, "step": 187270 }, { "epoch": 2.0009615898285165, "grad_norm": 0.9163720607757568, "learning_rate": 9.759475429575728e-07, "loss": 0.0163, "step": 187280 }, { "epoch": 2.001068433142796, "grad_norm": 0.4151090383529663, "learning_rate": 9.75942394500289e-07, "loss": 0.0268, "step": 187290 }, { "epoch": 2.001175276457076, "grad_norm": 1.9406390190124512, "learning_rate": 9.759372455056305e-07, "loss": 0.0108, "step": 187300 }, { "epoch": 2.0012821197713553, "grad_norm": 0.08567538112401962, "learning_rate": 9.759320959736029e-07, "loss": 0.0382, "step": 187310 }, { "epoch": 2.0013889630856347, "grad_norm": 3.4908640384674072, "learning_rate": 9.75926945904212e-07, "loss": 0.0123, "step": 187320 }, { "epoch": 2.0014958063999146, "grad_norm": 0.0391976498067379, "learning_rate": 9.759217952974636e-07, "loss": 0.0191, "step": 187330 }, { "epoch": 2.001602649714194, "grad_norm": 1.5291895866394043, "learning_rate": 9.759166441533634e-07, "loss": 0.0181, "step": 187340 }, { "epoch": 2.0017094930284736, "grad_norm": 2.219987392425537, "learning_rate": 9.759114924719177e-07, "loss": 0.0147, "step": 187350 }, { "epoch": 2.0018163363427535, "grad_norm": 0.009379987604916096, "learning_rate": 9.75906340253132e-07, "loss": 0.0225, "step": 187360 }, { "epoch": 2.001923179657033, "grad_norm": 0.12422584742307663, "learning_rate": 9.75901187497012e-07, "loss": 0.0236, "step": 187370 }, { "epoch": 2.0020300229713124, "grad_norm": 4.8682942390441895, "learning_rate": 9.758960342035637e-07, "loss": 0.0407, "step": 187380 }, { "epoch": 2.0021368662855923, "grad_norm": 0.03586578369140625, "learning_rate": 9.758908803727928e-07, "loss": 0.0108, "step": 187390 }, { "epoch": 2.0022437095998717, "grad_norm": 1.082175374031067, "learning_rate": 9.75885726004705e-07, "loss": 0.0859, "step": 187400 }, { "epoch": 2.002350552914151, "grad_norm": 0.01748386025428772, "learning_rate": 9.758805710993068e-07, "loss": 0.004, "step": 187410 }, { "epoch": 2.002457396228431, "grad_norm": 0.28747987747192383, "learning_rate": 9.758754156566031e-07, "loss": 0.0147, "step": 187420 }, { "epoch": 2.0025642395427106, "grad_norm": 1.1241129636764526, "learning_rate": 9.758702596766002e-07, "loss": 0.0159, "step": 187430 }, { "epoch": 2.00267108285699, "grad_norm": 2.0485622882843018, "learning_rate": 9.75865103159304e-07, "loss": 0.039, "step": 187440 }, { "epoch": 2.00277792617127, "grad_norm": 1.0693395137786865, "learning_rate": 9.758599461047202e-07, "loss": 0.009, "step": 187450 }, { "epoch": 2.0028847694855494, "grad_norm": 0.09863696992397308, "learning_rate": 9.758547885128543e-07, "loss": 0.0097, "step": 187460 }, { "epoch": 2.002991612799829, "grad_norm": 4.227912902832031, "learning_rate": 9.758496303837128e-07, "loss": 0.0342, "step": 187470 }, { "epoch": 2.0030984561141087, "grad_norm": 1.2389914989471436, "learning_rate": 9.758444717173009e-07, "loss": 0.0083, "step": 187480 }, { "epoch": 2.003205299428388, "grad_norm": 0.05616339296102524, "learning_rate": 9.758393125136248e-07, "loss": 0.0102, "step": 187490 }, { "epoch": 2.0033121427426677, "grad_norm": 5.823843002319336, "learning_rate": 9.758341527726902e-07, "loss": 0.0172, "step": 187500 }, { "epoch": 2.0034189860569476, "grad_norm": 3.7733354568481445, "learning_rate": 9.758289924945029e-07, "loss": 0.0123, "step": 187510 }, { "epoch": 2.003525829371227, "grad_norm": 0.5681725144386292, "learning_rate": 9.758238316790686e-07, "loss": 0.0075, "step": 187520 }, { "epoch": 2.0036326726855065, "grad_norm": 0.3457607328891754, "learning_rate": 9.758186703263936e-07, "loss": 0.0111, "step": 187530 }, { "epoch": 2.0037395159997864, "grad_norm": 4.765748977661133, "learning_rate": 9.758135084364832e-07, "loss": 0.0156, "step": 187540 }, { "epoch": 2.003846359314066, "grad_norm": 0.17281801998615265, "learning_rate": 9.758083460093434e-07, "loss": 0.0067, "step": 187550 }, { "epoch": 2.0039532026283453, "grad_norm": 17.13390350341797, "learning_rate": 9.758031830449803e-07, "loss": 0.047, "step": 187560 }, { "epoch": 2.004060045942625, "grad_norm": 0.14952455461025238, "learning_rate": 9.757980195433993e-07, "loss": 0.0225, "step": 187570 }, { "epoch": 2.0041668892569047, "grad_norm": 5.290740013122559, "learning_rate": 9.757928555046064e-07, "loss": 0.013, "step": 187580 }, { "epoch": 2.0042737325711846, "grad_norm": 0.028153765946626663, "learning_rate": 9.757876909286076e-07, "loss": 0.0197, "step": 187590 }, { "epoch": 2.004380575885464, "grad_norm": 0.02196674235165119, "learning_rate": 9.757825258154084e-07, "loss": 0.0441, "step": 187600 }, { "epoch": 2.0044874191997435, "grad_norm": 0.03637133538722992, "learning_rate": 9.75777360165015e-07, "loss": 0.007, "step": 187610 }, { "epoch": 2.0045942625140234, "grad_norm": 1.517747163772583, "learning_rate": 9.757721939774331e-07, "loss": 0.0262, "step": 187620 }, { "epoch": 2.004701105828303, "grad_norm": 0.005098934751003981, "learning_rate": 9.757670272526684e-07, "loss": 0.0371, "step": 187630 }, { "epoch": 2.0048079491425823, "grad_norm": 0.7424383163452148, "learning_rate": 9.757618599907267e-07, "loss": 0.047, "step": 187640 }, { "epoch": 2.004914792456862, "grad_norm": 2.0646181106567383, "learning_rate": 9.75756692191614e-07, "loss": 0.0225, "step": 187650 }, { "epoch": 2.0050216357711417, "grad_norm": 0.011539360508322716, "learning_rate": 9.757515238553363e-07, "loss": 0.0427, "step": 187660 }, { "epoch": 2.005128479085421, "grad_norm": 3.2019779682159424, "learning_rate": 9.757463549818989e-07, "loss": 0.0134, "step": 187670 }, { "epoch": 2.005235322399701, "grad_norm": 1.477708101272583, "learning_rate": 9.75741185571308e-07, "loss": 0.0111, "step": 187680 }, { "epoch": 2.0053421657139805, "grad_norm": 0.022639598697423935, "learning_rate": 9.757360156235697e-07, "loss": 0.0045, "step": 187690 }, { "epoch": 2.00544900902826, "grad_norm": 0.005459820386022329, "learning_rate": 9.757308451386894e-07, "loss": 0.0196, "step": 187700 }, { "epoch": 2.00555585234254, "grad_norm": 0.029531963169574738, "learning_rate": 9.757256741166729e-07, "loss": 0.006, "step": 187710 }, { "epoch": 2.0056626956568193, "grad_norm": 3.113970994949341, "learning_rate": 9.757205025575263e-07, "loss": 0.0155, "step": 187720 }, { "epoch": 2.0057695389710988, "grad_norm": 0.22839544713497162, "learning_rate": 9.757153304612553e-07, "loss": 0.0159, "step": 187730 }, { "epoch": 2.0058763822853787, "grad_norm": 4.319962978363037, "learning_rate": 9.75710157827866e-07, "loss": 0.0124, "step": 187740 }, { "epoch": 2.005983225599658, "grad_norm": 10.639663696289062, "learning_rate": 9.757049846573638e-07, "loss": 0.0264, "step": 187750 }, { "epoch": 2.0060900689139376, "grad_norm": 0.03207510709762573, "learning_rate": 9.75699810949755e-07, "loss": 0.0079, "step": 187760 }, { "epoch": 2.0061969122282175, "grad_norm": 0.32166969776153564, "learning_rate": 9.756946367050449e-07, "loss": 0.0064, "step": 187770 }, { "epoch": 2.006303755542497, "grad_norm": 0.036293018609285355, "learning_rate": 9.756894619232399e-07, "loss": 0.0224, "step": 187780 }, { "epoch": 2.0064105988567764, "grad_norm": 0.1955225169658661, "learning_rate": 9.756842866043455e-07, "loss": 0.034, "step": 187790 }, { "epoch": 2.0065174421710563, "grad_norm": 0.10752801597118378, "learning_rate": 9.756791107483677e-07, "loss": 0.022, "step": 187800 }, { "epoch": 2.006624285485336, "grad_norm": 0.0135535579174757, "learning_rate": 9.756739343553122e-07, "loss": 0.0611, "step": 187810 }, { "epoch": 2.0067311287996152, "grad_norm": 0.27516382932662964, "learning_rate": 9.75668757425185e-07, "loss": 0.0189, "step": 187820 }, { "epoch": 2.006837972113895, "grad_norm": 1.94895601272583, "learning_rate": 9.756635799579917e-07, "loss": 0.029, "step": 187830 }, { "epoch": 2.0069448154281746, "grad_norm": 0.035486675798892975, "learning_rate": 9.756584019537386e-07, "loss": 0.0086, "step": 187840 }, { "epoch": 2.007051658742454, "grad_norm": 0.17159289121627808, "learning_rate": 9.75653223412431e-07, "loss": 0.0845, "step": 187850 }, { "epoch": 2.007158502056734, "grad_norm": 3.471665382385254, "learning_rate": 9.756480443340751e-07, "loss": 0.0141, "step": 187860 }, { "epoch": 2.0072653453710134, "grad_norm": 3.624195098876953, "learning_rate": 9.756428647186768e-07, "loss": 0.0572, "step": 187870 }, { "epoch": 2.007372188685293, "grad_norm": 0.04030173644423485, "learning_rate": 9.756376845662417e-07, "loss": 0.0034, "step": 187880 }, { "epoch": 2.007479031999573, "grad_norm": 3.3540072441101074, "learning_rate": 9.756325038767758e-07, "loss": 0.0262, "step": 187890 }, { "epoch": 2.0075858753138522, "grad_norm": 1.6336733102798462, "learning_rate": 9.756273226502848e-07, "loss": 0.0132, "step": 187900 }, { "epoch": 2.0076927186281317, "grad_norm": 0.15273340046405792, "learning_rate": 9.756221408867748e-07, "loss": 0.0109, "step": 187910 }, { "epoch": 2.0077995619424116, "grad_norm": 0.3125429153442383, "learning_rate": 9.756169585862513e-07, "loss": 0.0061, "step": 187920 }, { "epoch": 2.007906405256691, "grad_norm": 0.03172014281153679, "learning_rate": 9.756117757487207e-07, "loss": 0.0093, "step": 187930 }, { "epoch": 2.0080132485709705, "grad_norm": 0.17898428440093994, "learning_rate": 9.75606592374188e-07, "loss": 0.0255, "step": 187940 }, { "epoch": 2.0081200918852504, "grad_norm": 0.6970845460891724, "learning_rate": 9.7560140846266e-07, "loss": 0.0431, "step": 187950 }, { "epoch": 2.00822693519953, "grad_norm": 3.5464329719543457, "learning_rate": 9.75596224014142e-07, "loss": 0.1357, "step": 187960 }, { "epoch": 2.0083337785138093, "grad_norm": 1.313442349433899, "learning_rate": 9.7559103902864e-07, "loss": 0.0239, "step": 187970 }, { "epoch": 2.0084406218280892, "grad_norm": 0.006575257517397404, "learning_rate": 9.755858535061596e-07, "loss": 0.0082, "step": 187980 }, { "epoch": 2.0085474651423687, "grad_norm": 0.1333712935447693, "learning_rate": 9.75580667446707e-07, "loss": 0.0137, "step": 187990 }, { "epoch": 2.008654308456648, "grad_norm": 3.7963998317718506, "learning_rate": 9.755754808502881e-07, "loss": 0.0175, "step": 188000 }, { "epoch": 2.008761151770928, "grad_norm": 0.08940255641937256, "learning_rate": 9.755702937169083e-07, "loss": 0.0181, "step": 188010 }, { "epoch": 2.0088679950852075, "grad_norm": 1.9081058502197266, "learning_rate": 9.75565106046574e-07, "loss": 0.033, "step": 188020 }, { "epoch": 2.008974838399487, "grad_norm": 1.8333697319030762, "learning_rate": 9.755599178392906e-07, "loss": 0.0341, "step": 188030 }, { "epoch": 2.009081681713767, "grad_norm": 5.44492244720459, "learning_rate": 9.755547290950642e-07, "loss": 0.008, "step": 188040 }, { "epoch": 2.0091885250280463, "grad_norm": 13.136791229248047, "learning_rate": 9.755495398139009e-07, "loss": 0.068, "step": 188050 }, { "epoch": 2.009295368342326, "grad_norm": 0.18592198193073273, "learning_rate": 9.75544349995806e-07, "loss": 0.0116, "step": 188060 }, { "epoch": 2.0094022116566057, "grad_norm": 0.2635652422904968, "learning_rate": 9.755391596407858e-07, "loss": 0.0454, "step": 188070 }, { "epoch": 2.009509054970885, "grad_norm": 0.11531748622655869, "learning_rate": 9.755339687488457e-07, "loss": 0.0094, "step": 188080 }, { "epoch": 2.0096158982851646, "grad_norm": 0.9740133881568909, "learning_rate": 9.75528777319992e-07, "loss": 0.0245, "step": 188090 }, { "epoch": 2.0097227415994445, "grad_norm": 0.042500171810388565, "learning_rate": 9.755235853542306e-07, "loss": 0.0122, "step": 188100 }, { "epoch": 2.009829584913724, "grad_norm": 0.2013498693704605, "learning_rate": 9.755183928515671e-07, "loss": 0.0156, "step": 188110 }, { "epoch": 2.0099364282280034, "grad_norm": 0.07149732112884521, "learning_rate": 9.755131998120074e-07, "loss": 0.0111, "step": 188120 }, { "epoch": 2.0100432715422833, "grad_norm": 0.13608770072460175, "learning_rate": 9.755080062355574e-07, "loss": 0.0136, "step": 188130 }, { "epoch": 2.010150114856563, "grad_norm": 0.023776212707161903, "learning_rate": 9.755028121222229e-07, "loss": 0.0206, "step": 188140 }, { "epoch": 2.0102569581708423, "grad_norm": 0.019302213564515114, "learning_rate": 9.7549761747201e-07, "loss": 0.0066, "step": 188150 }, { "epoch": 2.010363801485122, "grad_norm": 0.04696355015039444, "learning_rate": 9.754924222849244e-07, "loss": 0.017, "step": 188160 }, { "epoch": 2.0104706447994016, "grad_norm": 0.05482511967420578, "learning_rate": 9.754872265609718e-07, "loss": 0.0067, "step": 188170 }, { "epoch": 2.010577488113681, "grad_norm": 0.27499717473983765, "learning_rate": 9.754820303001583e-07, "loss": 0.0062, "step": 188180 }, { "epoch": 2.010684331427961, "grad_norm": 0.017997711896896362, "learning_rate": 9.754768335024897e-07, "loss": 0.0087, "step": 188190 }, { "epoch": 2.0107911747422405, "grad_norm": 1.578450083732605, "learning_rate": 9.75471636167972e-07, "loss": 0.0301, "step": 188200 }, { "epoch": 2.01089801805652, "grad_norm": 0.025516238063573837, "learning_rate": 9.754664382966109e-07, "loss": 0.0074, "step": 188210 }, { "epoch": 2.0110048613708, "grad_norm": 0.7061652541160583, "learning_rate": 9.754612398884121e-07, "loss": 0.0146, "step": 188220 }, { "epoch": 2.0111117046850793, "grad_norm": 0.022458810359239578, "learning_rate": 9.75456040943382e-07, "loss": 0.0066, "step": 188230 }, { "epoch": 2.0112185479993587, "grad_norm": 0.21589557826519012, "learning_rate": 9.75450841461526e-07, "loss": 0.0295, "step": 188240 }, { "epoch": 2.0113253913136386, "grad_norm": 0.0521242693066597, "learning_rate": 9.754456414428499e-07, "loss": 0.0329, "step": 188250 }, { "epoch": 2.011432234627918, "grad_norm": 0.16736388206481934, "learning_rate": 9.7544044088736e-07, "loss": 0.0085, "step": 188260 }, { "epoch": 2.0115390779421976, "grad_norm": 0.043489258736371994, "learning_rate": 9.75435239795062e-07, "loss": 0.0153, "step": 188270 }, { "epoch": 2.0116459212564775, "grad_norm": 4.349844932556152, "learning_rate": 9.754300381659616e-07, "loss": 0.003, "step": 188280 }, { "epoch": 2.011752764570757, "grad_norm": 1.571914792060852, "learning_rate": 9.754248360000647e-07, "loss": 0.0217, "step": 188290 }, { "epoch": 2.011859607885037, "grad_norm": 0.05033176392316818, "learning_rate": 9.754196332973776e-07, "loss": 0.0125, "step": 188300 }, { "epoch": 2.0119664511993163, "grad_norm": 0.04112158343195915, "learning_rate": 9.754144300579058e-07, "loss": 0.0207, "step": 188310 }, { "epoch": 2.0120732945135957, "grad_norm": 0.7369222640991211, "learning_rate": 9.75409226281655e-07, "loss": 0.0092, "step": 188320 }, { "epoch": 2.0121801378278756, "grad_norm": 2.3107218742370605, "learning_rate": 9.754040219686314e-07, "loss": 0.0154, "step": 188330 }, { "epoch": 2.012286981142155, "grad_norm": 0.6343516707420349, "learning_rate": 9.753988171188407e-07, "loss": 0.0378, "step": 188340 }, { "epoch": 2.0123938244564346, "grad_norm": 0.2018258273601532, "learning_rate": 9.75393611732289e-07, "loss": 0.0095, "step": 188350 }, { "epoch": 2.0125006677707145, "grad_norm": 5.172398090362549, "learning_rate": 9.75388405808982e-07, "loss": 0.0364, "step": 188360 }, { "epoch": 2.012607511084994, "grad_norm": 0.022768516093492508, "learning_rate": 9.753831993489256e-07, "loss": 0.0044, "step": 188370 }, { "epoch": 2.0127143543992734, "grad_norm": 4.921689510345459, "learning_rate": 9.753779923521258e-07, "loss": 0.0318, "step": 188380 }, { "epoch": 2.0128211977135533, "grad_norm": 1.2171156406402588, "learning_rate": 9.753727848185883e-07, "loss": 0.0177, "step": 188390 }, { "epoch": 2.0129280410278327, "grad_norm": 0.10490363836288452, "learning_rate": 9.75367576748319e-07, "loss": 0.0086, "step": 188400 }, { "epoch": 2.013034884342112, "grad_norm": 17.053936004638672, "learning_rate": 9.753623681413237e-07, "loss": 0.0219, "step": 188410 }, { "epoch": 2.013141727656392, "grad_norm": 0.7955083847045898, "learning_rate": 9.753571589976085e-07, "loss": 0.0049, "step": 188420 }, { "epoch": 2.0132485709706716, "grad_norm": 4.793671607971191, "learning_rate": 9.753519493171794e-07, "loss": 0.0303, "step": 188430 }, { "epoch": 2.013355414284951, "grad_norm": 7.609452724456787, "learning_rate": 9.753467391000418e-07, "loss": 0.0452, "step": 188440 }, { "epoch": 2.013462257599231, "grad_norm": 5.7213311195373535, "learning_rate": 9.75341528346202e-07, "loss": 0.0315, "step": 188450 }, { "epoch": 2.0135691009135104, "grad_norm": 0.02409893460571766, "learning_rate": 9.753363170556657e-07, "loss": 0.02, "step": 188460 }, { "epoch": 2.01367594422779, "grad_norm": 1.389972448348999, "learning_rate": 9.753311052284387e-07, "loss": 0.0331, "step": 188470 }, { "epoch": 2.0137827875420697, "grad_norm": 7.954373359680176, "learning_rate": 9.753258928645273e-07, "loss": 0.046, "step": 188480 }, { "epoch": 2.013889630856349, "grad_norm": 0.017565257847309113, "learning_rate": 9.753206799639368e-07, "loss": 0.0063, "step": 188490 }, { "epoch": 2.0139964741706287, "grad_norm": 0.06815307587385178, "learning_rate": 9.753154665266736e-07, "loss": 0.0323, "step": 188500 }, { "epoch": 2.0141033174849086, "grad_norm": 0.390889048576355, "learning_rate": 9.753102525527433e-07, "loss": 0.0188, "step": 188510 }, { "epoch": 2.014210160799188, "grad_norm": 0.24049659073352814, "learning_rate": 9.753050380421516e-07, "loss": 0.0109, "step": 188520 }, { "epoch": 2.0143170041134675, "grad_norm": 2.0879361629486084, "learning_rate": 9.75299822994905e-07, "loss": 0.0401, "step": 188530 }, { "epoch": 2.0144238474277474, "grad_norm": 3.163430690765381, "learning_rate": 9.752946074110088e-07, "loss": 0.0707, "step": 188540 }, { "epoch": 2.014530690742027, "grad_norm": 0.2863597571849823, "learning_rate": 9.752893912904694e-07, "loss": 0.0036, "step": 188550 }, { "epoch": 2.0146375340563063, "grad_norm": 3.570688009262085, "learning_rate": 9.75284174633292e-07, "loss": 0.0075, "step": 188560 }, { "epoch": 2.014744377370586, "grad_norm": 0.01503780297935009, "learning_rate": 9.752789574394833e-07, "loss": 0.0038, "step": 188570 }, { "epoch": 2.0148512206848657, "grad_norm": 0.021503308787941933, "learning_rate": 9.752737397090485e-07, "loss": 0.0106, "step": 188580 }, { "epoch": 2.014958063999145, "grad_norm": 6.406716823577881, "learning_rate": 9.75268521441994e-07, "loss": 0.0236, "step": 188590 }, { "epoch": 2.015064907313425, "grad_norm": 5.129262924194336, "learning_rate": 9.752633026383252e-07, "loss": 0.0513, "step": 188600 }, { "epoch": 2.0151717506277045, "grad_norm": 1.1874043941497803, "learning_rate": 9.752580832980483e-07, "loss": 0.0257, "step": 188610 }, { "epoch": 2.015278593941984, "grad_norm": 0.30296429991722107, "learning_rate": 9.752528634211694e-07, "loss": 0.0257, "step": 188620 }, { "epoch": 2.015385437256264, "grad_norm": 0.034660302102565765, "learning_rate": 9.75247643007694e-07, "loss": 0.027, "step": 188630 }, { "epoch": 2.0154922805705433, "grad_norm": 0.5849926471710205, "learning_rate": 9.75242422057628e-07, "loss": 0.0037, "step": 188640 }, { "epoch": 2.0155991238848228, "grad_norm": 0.11192986369132996, "learning_rate": 9.752372005709778e-07, "loss": 0.0245, "step": 188650 }, { "epoch": 2.0157059671991027, "grad_norm": 1.2271733283996582, "learning_rate": 9.752319785477484e-07, "loss": 0.0044, "step": 188660 }, { "epoch": 2.015812810513382, "grad_norm": 0.06771783530712128, "learning_rate": 9.752267559879468e-07, "loss": 0.0011, "step": 188670 }, { "epoch": 2.0159196538276616, "grad_norm": 2.5330677032470703, "learning_rate": 9.75221532891578e-07, "loss": 0.0079, "step": 188680 }, { "epoch": 2.0160264971419415, "grad_norm": 0.9317694306373596, "learning_rate": 9.752163092586483e-07, "loss": 0.0073, "step": 188690 }, { "epoch": 2.016133340456221, "grad_norm": 2.9538168907165527, "learning_rate": 9.752110850891634e-07, "loss": 0.0215, "step": 188700 }, { "epoch": 2.0162401837705004, "grad_norm": 0.0014286406803876162, "learning_rate": 9.752058603831294e-07, "loss": 0.0043, "step": 188710 }, { "epoch": 2.0163470270847803, "grad_norm": 9.178149223327637, "learning_rate": 9.752006351405523e-07, "loss": 0.0436, "step": 188720 }, { "epoch": 2.0164538703990598, "grad_norm": 0.08133888989686966, "learning_rate": 9.751954093614376e-07, "loss": 0.0096, "step": 188730 }, { "epoch": 2.0165607137133392, "grad_norm": 0.0166123379021883, "learning_rate": 9.751901830457915e-07, "loss": 0.0407, "step": 188740 }, { "epoch": 2.016667557027619, "grad_norm": 0.14550670981407166, "learning_rate": 9.7518495619362e-07, "loss": 0.0114, "step": 188750 }, { "epoch": 2.0167744003418986, "grad_norm": 2.385474443435669, "learning_rate": 9.751797288049284e-07, "loss": 0.0074, "step": 188760 }, { "epoch": 2.016881243656178, "grad_norm": 0.23453277349472046, "learning_rate": 9.751745008797234e-07, "loss": 0.0143, "step": 188770 }, { "epoch": 2.016988086970458, "grad_norm": 0.7560654878616333, "learning_rate": 9.751692724180103e-07, "loss": 0.0024, "step": 188780 }, { "epoch": 2.0170949302847374, "grad_norm": 6.493438720703125, "learning_rate": 9.751640434197951e-07, "loss": 0.0288, "step": 188790 }, { "epoch": 2.017201773599017, "grad_norm": 6.129617691040039, "learning_rate": 9.751588138850842e-07, "loss": 0.0396, "step": 188800 }, { "epoch": 2.0173086169132968, "grad_norm": 1.2486521005630493, "learning_rate": 9.75153583813883e-07, "loss": 0.0259, "step": 188810 }, { "epoch": 2.0174154602275762, "grad_norm": 2.9873201847076416, "learning_rate": 9.751483532061974e-07, "loss": 0.0255, "step": 188820 }, { "epoch": 2.0175223035418557, "grad_norm": 5.750517845153809, "learning_rate": 9.751431220620337e-07, "loss": 0.0526, "step": 188830 }, { "epoch": 2.0176291468561356, "grad_norm": 1.605205774307251, "learning_rate": 9.751378903813973e-07, "loss": 0.072, "step": 188840 }, { "epoch": 2.017735990170415, "grad_norm": 0.6271908283233643, "learning_rate": 9.751326581642944e-07, "loss": 0.0083, "step": 188850 }, { "epoch": 2.0178428334846945, "grad_norm": 1.518989086151123, "learning_rate": 9.751274254107312e-07, "loss": 0.0501, "step": 188860 }, { "epoch": 2.0179496767989744, "grad_norm": 0.05181301385164261, "learning_rate": 9.751221921207128e-07, "loss": 0.0278, "step": 188870 }, { "epoch": 2.018056520113254, "grad_norm": 1.3921315670013428, "learning_rate": 9.75116958294246e-07, "loss": 0.0294, "step": 188880 }, { "epoch": 2.0181633634275333, "grad_norm": 1.9450013637542725, "learning_rate": 9.75111723931336e-07, "loss": 0.0272, "step": 188890 }, { "epoch": 2.0182702067418132, "grad_norm": 0.11733580380678177, "learning_rate": 9.751064890319892e-07, "loss": 0.0065, "step": 188900 }, { "epoch": 2.0183770500560927, "grad_norm": 0.08588738739490509, "learning_rate": 9.75101253596211e-07, "loss": 0.0021, "step": 188910 }, { "epoch": 2.018483893370372, "grad_norm": 0.011117788031697273, "learning_rate": 9.75096017624008e-07, "loss": 0.0152, "step": 188920 }, { "epoch": 2.018590736684652, "grad_norm": 0.18483474850654602, "learning_rate": 9.750907811153855e-07, "loss": 0.014, "step": 188930 }, { "epoch": 2.0186975799989315, "grad_norm": 3.320528507232666, "learning_rate": 9.750855440703498e-07, "loss": 0.0241, "step": 188940 }, { "epoch": 2.018804423313211, "grad_norm": 0.012901505455374718, "learning_rate": 9.750803064889067e-07, "loss": 0.0025, "step": 188950 }, { "epoch": 2.018911266627491, "grad_norm": 0.2929063141345978, "learning_rate": 9.750750683710618e-07, "loss": 0.009, "step": 188960 }, { "epoch": 2.0190181099417703, "grad_norm": 2.397252082824707, "learning_rate": 9.750698297168217e-07, "loss": 0.0127, "step": 188970 }, { "epoch": 2.01912495325605, "grad_norm": 7.372202396392822, "learning_rate": 9.750645905261916e-07, "loss": 0.0342, "step": 188980 }, { "epoch": 2.0192317965703297, "grad_norm": 2.0268566608428955, "learning_rate": 9.750593507991778e-07, "loss": 0.0437, "step": 188990 }, { "epoch": 2.019338639884609, "grad_norm": 0.7222005128860474, "learning_rate": 9.750541105357862e-07, "loss": 0.0501, "step": 189000 }, { "epoch": 2.0194454831988886, "grad_norm": 0.11938589811325073, "learning_rate": 9.750488697360226e-07, "loss": 0.0093, "step": 189010 }, { "epoch": 2.0195523265131685, "grad_norm": 1.301039695739746, "learning_rate": 9.750436283998928e-07, "loss": 0.0058, "step": 189020 }, { "epoch": 2.019659169827448, "grad_norm": 4.209596633911133, "learning_rate": 9.750383865274033e-07, "loss": 0.0183, "step": 189030 }, { "epoch": 2.0197660131417274, "grad_norm": 0.24566663801670074, "learning_rate": 9.750331441185593e-07, "loss": 0.0164, "step": 189040 }, { "epoch": 2.0198728564560073, "grad_norm": 0.3152802288532257, "learning_rate": 9.750279011733672e-07, "loss": 0.0036, "step": 189050 }, { "epoch": 2.019979699770287, "grad_norm": 2.2278428077697754, "learning_rate": 9.750226576918325e-07, "loss": 0.0319, "step": 189060 }, { "epoch": 2.0200865430845667, "grad_norm": 13.552234649658203, "learning_rate": 9.750174136739616e-07, "loss": 0.0251, "step": 189070 }, { "epoch": 2.020193386398846, "grad_norm": 2.9552361965179443, "learning_rate": 9.7501216911976e-07, "loss": 0.0292, "step": 189080 }, { "epoch": 2.0203002297131256, "grad_norm": 0.8597739338874817, "learning_rate": 9.75006924029234e-07, "loss": 0.0239, "step": 189090 }, { "epoch": 2.0204070730274055, "grad_norm": 0.18039433658123016, "learning_rate": 9.75001678402389e-07, "loss": 0.0113, "step": 189100 }, { "epoch": 2.020513916341685, "grad_norm": 3.5329883098602295, "learning_rate": 9.749964322392317e-07, "loss": 0.015, "step": 189110 }, { "epoch": 2.0206207596559644, "grad_norm": 0.9281449913978577, "learning_rate": 9.749911855397674e-07, "loss": 0.0182, "step": 189120 }, { "epoch": 2.0207276029702443, "grad_norm": 0.023187503218650818, "learning_rate": 9.749859383040022e-07, "loss": 0.0315, "step": 189130 }, { "epoch": 2.020834446284524, "grad_norm": 0.02891002595424652, "learning_rate": 9.74980690531942e-07, "loss": 0.027, "step": 189140 }, { "epoch": 2.0209412895988033, "grad_norm": 0.292044460773468, "learning_rate": 9.749754422235928e-07, "loss": 0.0205, "step": 189150 }, { "epoch": 2.021048132913083, "grad_norm": 0.002194416243582964, "learning_rate": 9.749701933789604e-07, "loss": 0.0091, "step": 189160 }, { "epoch": 2.0211549762273626, "grad_norm": 8.354019165039062, "learning_rate": 9.749649439980506e-07, "loss": 0.0375, "step": 189170 }, { "epoch": 2.021261819541642, "grad_norm": 10.101054191589355, "learning_rate": 9.7495969408087e-07, "loss": 0.0135, "step": 189180 }, { "epoch": 2.021368662855922, "grad_norm": 2.3272974491119385, "learning_rate": 9.749544436274238e-07, "loss": 0.018, "step": 189190 }, { "epoch": 2.0214755061702014, "grad_norm": 3.260211229324341, "learning_rate": 9.749491926377183e-07, "loss": 0.0144, "step": 189200 }, { "epoch": 2.021582349484481, "grad_norm": 4.889997482299805, "learning_rate": 9.749439411117592e-07, "loss": 0.0103, "step": 189210 }, { "epoch": 2.021689192798761, "grad_norm": 2.4609153270721436, "learning_rate": 9.749386890495525e-07, "loss": 0.0268, "step": 189220 }, { "epoch": 2.0217960361130403, "grad_norm": 0.15323029458522797, "learning_rate": 9.749334364511045e-07, "loss": 0.0098, "step": 189230 }, { "epoch": 2.0219028794273197, "grad_norm": 0.006049387156963348, "learning_rate": 9.749281833164204e-07, "loss": 0.0232, "step": 189240 }, { "epoch": 2.0220097227415996, "grad_norm": 4.2017292976379395, "learning_rate": 9.749229296455067e-07, "loss": 0.0304, "step": 189250 }, { "epoch": 2.022116566055879, "grad_norm": 0.45160067081451416, "learning_rate": 9.74917675438369e-07, "loss": 0.0123, "step": 189260 }, { "epoch": 2.0222234093701585, "grad_norm": 1.8975334167480469, "learning_rate": 9.749124206950138e-07, "loss": 0.0301, "step": 189270 }, { "epoch": 2.0223302526844384, "grad_norm": 9.516152381896973, "learning_rate": 9.749071654154462e-07, "loss": 0.0123, "step": 189280 }, { "epoch": 2.022437095998718, "grad_norm": 0.17532752454280853, "learning_rate": 9.749019095996729e-07, "loss": 0.0156, "step": 189290 }, { "epoch": 2.0225439393129974, "grad_norm": 0.3027781844139099, "learning_rate": 9.748966532476993e-07, "loss": 0.0392, "step": 189300 }, { "epoch": 2.0226507826272773, "grad_norm": 0.7577374577522278, "learning_rate": 9.748913963595315e-07, "loss": 0.0118, "step": 189310 }, { "epoch": 2.0227576259415567, "grad_norm": 1.5085387229919434, "learning_rate": 9.748861389351757e-07, "loss": 0.0052, "step": 189320 }, { "epoch": 2.022864469255836, "grad_norm": 0.8848700523376465, "learning_rate": 9.748808809746375e-07, "loss": 0.0392, "step": 189330 }, { "epoch": 2.022971312570116, "grad_norm": 0.0011688570957630873, "learning_rate": 9.74875622477923e-07, "loss": 0.0245, "step": 189340 }, { "epoch": 2.0230781558843955, "grad_norm": 0.012034442275762558, "learning_rate": 9.748703634450378e-07, "loss": 0.0052, "step": 189350 }, { "epoch": 2.023184999198675, "grad_norm": 7.992674350738525, "learning_rate": 9.748651038759885e-07, "loss": 0.0343, "step": 189360 }, { "epoch": 2.023291842512955, "grad_norm": 0.26979875564575195, "learning_rate": 9.748598437707804e-07, "loss": 0.0175, "step": 189370 }, { "epoch": 2.0233986858272344, "grad_norm": 2.4683494567871094, "learning_rate": 9.748545831294197e-07, "loss": 0.0352, "step": 189380 }, { "epoch": 2.023505529141514, "grad_norm": 0.08442126214504242, "learning_rate": 9.748493219519124e-07, "loss": 0.0117, "step": 189390 }, { "epoch": 2.0236123724557937, "grad_norm": 1.0888289213180542, "learning_rate": 9.748440602382646e-07, "loss": 0.0147, "step": 189400 }, { "epoch": 2.023719215770073, "grad_norm": 0.035838328301906586, "learning_rate": 9.748387979884817e-07, "loss": 0.0302, "step": 189410 }, { "epoch": 2.0238260590843526, "grad_norm": 0.00228365371003747, "learning_rate": 9.748335352025702e-07, "loss": 0.0099, "step": 189420 }, { "epoch": 2.0239329023986325, "grad_norm": 8.715761184692383, "learning_rate": 9.748282718805356e-07, "loss": 0.0244, "step": 189430 }, { "epoch": 2.024039745712912, "grad_norm": 0.36739715933799744, "learning_rate": 9.748230080223841e-07, "loss": 0.0143, "step": 189440 }, { "epoch": 2.0241465890271915, "grad_norm": 0.5935021638870239, "learning_rate": 9.748177436281218e-07, "loss": 0.0162, "step": 189450 }, { "epoch": 2.0242534323414714, "grad_norm": 5.9515838623046875, "learning_rate": 9.74812478697754e-07, "loss": 0.0134, "step": 189460 }, { "epoch": 2.024360275655751, "grad_norm": 0.005586820654571056, "learning_rate": 9.748072132312874e-07, "loss": 0.0297, "step": 189470 }, { "epoch": 2.0244671189700303, "grad_norm": 1.9203180074691772, "learning_rate": 9.748019472287276e-07, "loss": 0.0459, "step": 189480 }, { "epoch": 2.02457396228431, "grad_norm": 0.12369812279939651, "learning_rate": 9.747966806900805e-07, "loss": 0.0147, "step": 189490 }, { "epoch": 2.0246808055985897, "grad_norm": 0.014560568146407604, "learning_rate": 9.747914136153521e-07, "loss": 0.0182, "step": 189500 }, { "epoch": 2.024787648912869, "grad_norm": 0.6226978898048401, "learning_rate": 9.747861460045483e-07, "loss": 0.0183, "step": 189510 }, { "epoch": 2.024894492227149, "grad_norm": 3.306987762451172, "learning_rate": 9.747808778576752e-07, "loss": 0.013, "step": 189520 }, { "epoch": 2.0250013355414285, "grad_norm": 0.7571744322776794, "learning_rate": 9.747756091747388e-07, "loss": 0.0177, "step": 189530 }, { "epoch": 2.025108178855708, "grad_norm": 1.5620774030685425, "learning_rate": 9.747703399557446e-07, "loss": 0.0034, "step": 189540 }, { "epoch": 2.025215022169988, "grad_norm": 0.00563656585291028, "learning_rate": 9.74765070200699e-07, "loss": 0.0141, "step": 189550 }, { "epoch": 2.0253218654842673, "grad_norm": 0.08034458756446838, "learning_rate": 9.747597999096079e-07, "loss": 0.0431, "step": 189560 }, { "epoch": 2.0254287087985468, "grad_norm": 1.5744433403015137, "learning_rate": 9.74754529082477e-07, "loss": 0.0157, "step": 189570 }, { "epoch": 2.0255355521128267, "grad_norm": 9.055218696594238, "learning_rate": 9.747492577193125e-07, "loss": 0.0129, "step": 189580 }, { "epoch": 2.025642395427106, "grad_norm": 0.015254507772624493, "learning_rate": 9.747439858201201e-07, "loss": 0.0021, "step": 189590 }, { "epoch": 2.0257492387413856, "grad_norm": 7.01866340637207, "learning_rate": 9.74738713384906e-07, "loss": 0.0339, "step": 189600 }, { "epoch": 2.0258560820556655, "grad_norm": 0.16136790812015533, "learning_rate": 9.747334404136762e-07, "loss": 0.058, "step": 189610 }, { "epoch": 2.025962925369945, "grad_norm": 6.628762722015381, "learning_rate": 9.747281669064365e-07, "loss": 0.0275, "step": 189620 }, { "epoch": 2.0260697686842244, "grad_norm": 1.3786485195159912, "learning_rate": 9.747228928631928e-07, "loss": 0.0052, "step": 189630 }, { "epoch": 2.0261766119985043, "grad_norm": 0.40736883878707886, "learning_rate": 9.74717618283951e-07, "loss": 0.0102, "step": 189640 }, { "epoch": 2.0262834553127838, "grad_norm": 0.20871850848197937, "learning_rate": 9.747123431687173e-07, "loss": 0.0026, "step": 189650 }, { "epoch": 2.026390298627063, "grad_norm": 0.2855357825756073, "learning_rate": 9.747070675174976e-07, "loss": 0.0218, "step": 189660 }, { "epoch": 2.026497141941343, "grad_norm": 10.799728393554688, "learning_rate": 9.747017913302975e-07, "loss": 0.0149, "step": 189670 }, { "epoch": 2.0266039852556226, "grad_norm": 0.00774731021374464, "learning_rate": 9.746965146071236e-07, "loss": 0.0244, "step": 189680 }, { "epoch": 2.026710828569902, "grad_norm": 5.760664463043213, "learning_rate": 9.746912373479813e-07, "loss": 0.0443, "step": 189690 }, { "epoch": 2.026817671884182, "grad_norm": 3.296161413192749, "learning_rate": 9.74685959552877e-07, "loss": 0.0114, "step": 189700 }, { "epoch": 2.0269245151984614, "grad_norm": 0.03531546890735626, "learning_rate": 9.746806812218162e-07, "loss": 0.0227, "step": 189710 }, { "epoch": 2.027031358512741, "grad_norm": 0.08584939688444138, "learning_rate": 9.74675402354805e-07, "loss": 0.092, "step": 189720 }, { "epoch": 2.0271382018270208, "grad_norm": 0.07050026953220367, "learning_rate": 9.746701229518497e-07, "loss": 0.0207, "step": 189730 }, { "epoch": 2.0272450451413, "grad_norm": 2.2559845447540283, "learning_rate": 9.746648430129558e-07, "loss": 0.0132, "step": 189740 }, { "epoch": 2.0273518884555797, "grad_norm": 2.0850934982299805, "learning_rate": 9.746595625381296e-07, "loss": 0.0094, "step": 189750 }, { "epoch": 2.0274587317698596, "grad_norm": 0.0824747309088707, "learning_rate": 9.74654281527377e-07, "loss": 0.0207, "step": 189760 }, { "epoch": 2.027565575084139, "grad_norm": 0.005096794571727514, "learning_rate": 9.746489999807038e-07, "loss": 0.026, "step": 189770 }, { "epoch": 2.027672418398419, "grad_norm": 6.722715377807617, "learning_rate": 9.746437178981158e-07, "loss": 0.0345, "step": 189780 }, { "epoch": 2.0277792617126984, "grad_norm": 7.2346882820129395, "learning_rate": 9.746384352796197e-07, "loss": 0.0352, "step": 189790 }, { "epoch": 2.027886105026978, "grad_norm": 3.662536382675171, "learning_rate": 9.746331521252206e-07, "loss": 0.0468, "step": 189800 }, { "epoch": 2.0279929483412578, "grad_norm": 2.5733041763305664, "learning_rate": 9.74627868434925e-07, "loss": 0.0281, "step": 189810 }, { "epoch": 2.028099791655537, "grad_norm": 2.3417797088623047, "learning_rate": 9.746225842087387e-07, "loss": 0.0198, "step": 189820 }, { "epoch": 2.0282066349698167, "grad_norm": 0.10397521406412125, "learning_rate": 9.746172994466677e-07, "loss": 0.0685, "step": 189830 }, { "epoch": 2.0283134782840966, "grad_norm": 0.010875658132135868, "learning_rate": 9.74612014148718e-07, "loss": 0.0257, "step": 189840 }, { "epoch": 2.028420321598376, "grad_norm": 0.012980656698346138, "learning_rate": 9.746067283148955e-07, "loss": 0.0056, "step": 189850 }, { "epoch": 2.0285271649126555, "grad_norm": 10.306748390197754, "learning_rate": 9.746014419452062e-07, "loss": 0.0392, "step": 189860 }, { "epoch": 2.0286340082269354, "grad_norm": 5.802779197692871, "learning_rate": 9.745961550396558e-07, "loss": 0.0084, "step": 189870 }, { "epoch": 2.028740851541215, "grad_norm": 1.6381253004074097, "learning_rate": 9.745908675982509e-07, "loss": 0.039, "step": 189880 }, { "epoch": 2.0288476948554943, "grad_norm": 4.113234043121338, "learning_rate": 9.745855796209967e-07, "loss": 0.0969, "step": 189890 }, { "epoch": 2.0289545381697742, "grad_norm": 2.481558084487915, "learning_rate": 9.745802911078999e-07, "loss": 0.0126, "step": 189900 }, { "epoch": 2.0290613814840537, "grad_norm": 0.24296212196350098, "learning_rate": 9.74575002058966e-07, "loss": 0.0404, "step": 189910 }, { "epoch": 2.029168224798333, "grad_norm": 0.2924489974975586, "learning_rate": 9.745697124742012e-07, "loss": 0.0577, "step": 189920 }, { "epoch": 2.029275068112613, "grad_norm": 0.15467993915081024, "learning_rate": 9.745644223536111e-07, "loss": 0.0103, "step": 189930 }, { "epoch": 2.0293819114268925, "grad_norm": 4.771414756774902, "learning_rate": 9.745591316972022e-07, "loss": 0.0175, "step": 189940 }, { "epoch": 2.029488754741172, "grad_norm": 0.019879525527358055, "learning_rate": 9.7455384050498e-07, "loss": 0.0193, "step": 189950 }, { "epoch": 2.029595598055452, "grad_norm": 2.3923802375793457, "learning_rate": 9.74548548776951e-07, "loss": 0.0127, "step": 189960 }, { "epoch": 2.0297024413697313, "grad_norm": 0.023532001301646233, "learning_rate": 9.745432565131206e-07, "loss": 0.0366, "step": 189970 }, { "epoch": 2.029809284684011, "grad_norm": 1.821995496749878, "learning_rate": 9.745379637134954e-07, "loss": 0.0084, "step": 189980 }, { "epoch": 2.0299161279982907, "grad_norm": 5.0312275886535645, "learning_rate": 9.745326703780806e-07, "loss": 0.0393, "step": 189990 }, { "epoch": 2.03002297131257, "grad_norm": 0.11062425374984741, "learning_rate": 9.745273765068829e-07, "loss": 0.0177, "step": 190000 }, { "epoch": 2.0301298146268496, "grad_norm": 1.1668672561645508, "learning_rate": 9.745220820999078e-07, "loss": 0.0026, "step": 190010 }, { "epoch": 2.0302366579411295, "grad_norm": 0.07042538374662399, "learning_rate": 9.745167871571616e-07, "loss": 0.012, "step": 190020 }, { "epoch": 2.030343501255409, "grad_norm": 0.004058329854160547, "learning_rate": 9.7451149167865e-07, "loss": 0.017, "step": 190030 }, { "epoch": 2.0304503445696884, "grad_norm": 0.023906860500574112, "learning_rate": 9.745061956643793e-07, "loss": 0.0196, "step": 190040 }, { "epoch": 2.0305571878839683, "grad_norm": 1.6441938877105713, "learning_rate": 9.74500899114355e-07, "loss": 0.0052, "step": 190050 }, { "epoch": 2.030664031198248, "grad_norm": 1.853363037109375, "learning_rate": 9.744956020285836e-07, "loss": 0.0168, "step": 190060 }, { "epoch": 2.0307708745125272, "grad_norm": 15.74358081817627, "learning_rate": 9.744903044070708e-07, "loss": 0.0682, "step": 190070 }, { "epoch": 2.030877717826807, "grad_norm": 4.54096794128418, "learning_rate": 9.744850062498226e-07, "loss": 0.023, "step": 190080 }, { "epoch": 2.0309845611410866, "grad_norm": 4.397576808929443, "learning_rate": 9.74479707556845e-07, "loss": 0.0091, "step": 190090 }, { "epoch": 2.031091404455366, "grad_norm": 1.9486196041107178, "learning_rate": 9.744744083281442e-07, "loss": 0.0176, "step": 190100 }, { "epoch": 2.031198247769646, "grad_norm": 0.41023972630500793, "learning_rate": 9.744691085637257e-07, "loss": 0.0027, "step": 190110 }, { "epoch": 2.0313050910839254, "grad_norm": 7.610008239746094, "learning_rate": 9.744638082635962e-07, "loss": 0.0179, "step": 190120 }, { "epoch": 2.031411934398205, "grad_norm": 18.710819244384766, "learning_rate": 9.744585074277606e-07, "loss": 0.0341, "step": 190130 }, { "epoch": 2.031518777712485, "grad_norm": 0.10615267604589462, "learning_rate": 9.74453206056226e-07, "loss": 0.0356, "step": 190140 }, { "epoch": 2.0316256210267643, "grad_norm": 9.184849739074707, "learning_rate": 9.744479041489978e-07, "loss": 0.0279, "step": 190150 }, { "epoch": 2.0317324643410437, "grad_norm": 0.2469688206911087, "learning_rate": 9.744426017060822e-07, "loss": 0.0422, "step": 190160 }, { "epoch": 2.0318393076553236, "grad_norm": 2.458224058151245, "learning_rate": 9.74437298727485e-07, "loss": 0.0045, "step": 190170 }, { "epoch": 2.031946150969603, "grad_norm": 0.008792664855718613, "learning_rate": 9.744319952132122e-07, "loss": 0.0093, "step": 190180 }, { "epoch": 2.0320529942838825, "grad_norm": 0.14175373315811157, "learning_rate": 9.7442669116327e-07, "loss": 0.0555, "step": 190190 }, { "epoch": 2.0321598375981624, "grad_norm": 0.015589501708745956, "learning_rate": 9.744213865776642e-07, "loss": 0.0125, "step": 190200 }, { "epoch": 2.032266680912442, "grad_norm": 0.008032470941543579, "learning_rate": 9.74416081456401e-07, "loss": 0.014, "step": 190210 }, { "epoch": 2.0323735242267214, "grad_norm": 0.09475857764482498, "learning_rate": 9.74410775799486e-07, "loss": 0.0216, "step": 190220 }, { "epoch": 2.0324803675410013, "grad_norm": 0.048736583441495895, "learning_rate": 9.744054696069256e-07, "loss": 0.0119, "step": 190230 }, { "epoch": 2.0325872108552807, "grad_norm": 0.032489657402038574, "learning_rate": 9.744001628787254e-07, "loss": 0.029, "step": 190240 }, { "epoch": 2.03269405416956, "grad_norm": 6.443805694580078, "learning_rate": 9.74394855614892e-07, "loss": 0.0265, "step": 190250 }, { "epoch": 2.03280089748384, "grad_norm": 5.495859146118164, "learning_rate": 9.743895478154306e-07, "loss": 0.009, "step": 190260 }, { "epoch": 2.0329077407981195, "grad_norm": 5.7355852127075195, "learning_rate": 9.743842394803478e-07, "loss": 0.0381, "step": 190270 }, { "epoch": 2.033014584112399, "grad_norm": 1.5462567806243896, "learning_rate": 9.743789306096496e-07, "loss": 0.0408, "step": 190280 }, { "epoch": 2.033121427426679, "grad_norm": 0.3295968174934387, "learning_rate": 9.743736212033413e-07, "loss": 0.0109, "step": 190290 }, { "epoch": 2.0332282707409584, "grad_norm": 0.16270451247692108, "learning_rate": 9.743683112614298e-07, "loss": 0.0193, "step": 190300 }, { "epoch": 2.033335114055238, "grad_norm": 0.20229406654834747, "learning_rate": 9.743630007839203e-07, "loss": 0.0423, "step": 190310 }, { "epoch": 2.0334419573695177, "grad_norm": 0.0070878625847399235, "learning_rate": 9.743576897708195e-07, "loss": 0.057, "step": 190320 }, { "epoch": 2.033548800683797, "grad_norm": 5.512731552124023, "learning_rate": 9.743523782221329e-07, "loss": 0.0118, "step": 190330 }, { "epoch": 2.0336556439980766, "grad_norm": 1.233515739440918, "learning_rate": 9.743470661378667e-07, "loss": 0.0168, "step": 190340 }, { "epoch": 2.0337624873123565, "grad_norm": 1.0592796802520752, "learning_rate": 9.743417535180268e-07, "loss": 0.0122, "step": 190350 }, { "epoch": 2.033869330626636, "grad_norm": 3.8785605430603027, "learning_rate": 9.743364403626192e-07, "loss": 0.0108, "step": 190360 }, { "epoch": 2.0339761739409155, "grad_norm": 1.76935875415802, "learning_rate": 9.743311266716503e-07, "loss": 0.0196, "step": 190370 }, { "epoch": 2.0340830172551954, "grad_norm": 6.3691277503967285, "learning_rate": 9.743258124451255e-07, "loss": 0.0392, "step": 190380 }, { "epoch": 2.034189860569475, "grad_norm": 0.44880205392837524, "learning_rate": 9.74320497683051e-07, "loss": 0.0136, "step": 190390 }, { "epoch": 2.0342967038837543, "grad_norm": 0.7745261192321777, "learning_rate": 9.74315182385433e-07, "loss": 0.0222, "step": 190400 }, { "epoch": 2.034403547198034, "grad_norm": 0.18291987478733063, "learning_rate": 9.743098665522773e-07, "loss": 0.0057, "step": 190410 }, { "epoch": 2.0345103905123136, "grad_norm": 0.00809663999825716, "learning_rate": 9.7430455018359e-07, "loss": 0.0158, "step": 190420 }, { "epoch": 2.034617233826593, "grad_norm": 0.0045448183082044125, "learning_rate": 9.74299233279377e-07, "loss": 0.0083, "step": 190430 }, { "epoch": 2.034724077140873, "grad_norm": 4.225246429443359, "learning_rate": 9.742939158396445e-07, "loss": 0.0129, "step": 190440 }, { "epoch": 2.0348309204551525, "grad_norm": 0.791000247001648, "learning_rate": 9.742885978643981e-07, "loss": 0.0407, "step": 190450 }, { "epoch": 2.034937763769432, "grad_norm": 2.0258467197418213, "learning_rate": 9.742832793536445e-07, "loss": 0.0179, "step": 190460 }, { "epoch": 2.035044607083712, "grad_norm": 0.560756266117096, "learning_rate": 9.74277960307389e-07, "loss": 0.0181, "step": 190470 }, { "epoch": 2.0351514503979913, "grad_norm": 3.6164088249206543, "learning_rate": 9.742726407256379e-07, "loss": 0.0204, "step": 190480 }, { "epoch": 2.0352582937122707, "grad_norm": 0.038522008806467056, "learning_rate": 9.742673206083973e-07, "loss": 0.0144, "step": 190490 }, { "epoch": 2.0353651370265506, "grad_norm": 0.02346799336373806, "learning_rate": 9.74261999955673e-07, "loss": 0.0155, "step": 190500 }, { "epoch": 2.03547198034083, "grad_norm": 6.929590225219727, "learning_rate": 9.74256678767471e-07, "loss": 0.0074, "step": 190510 }, { "epoch": 2.0355788236551096, "grad_norm": 3.4014720916748047, "learning_rate": 9.742513570437975e-07, "loss": 0.0433, "step": 190520 }, { "epoch": 2.0356856669693895, "grad_norm": 0.5759037137031555, "learning_rate": 9.742460347846585e-07, "loss": 0.0154, "step": 190530 }, { "epoch": 2.035792510283669, "grad_norm": 0.2697739601135254, "learning_rate": 9.742407119900599e-07, "loss": 0.0083, "step": 190540 }, { "epoch": 2.035899353597949, "grad_norm": 0.5934727191925049, "learning_rate": 9.742353886600078e-07, "loss": 0.0136, "step": 190550 }, { "epoch": 2.0360061969122283, "grad_norm": 2.378248453140259, "learning_rate": 9.742300647945081e-07, "loss": 0.0039, "step": 190560 }, { "epoch": 2.0361130402265077, "grad_norm": 0.0035979433450847864, "learning_rate": 9.74224740393567e-07, "loss": 0.0028, "step": 190570 }, { "epoch": 2.0362198835407876, "grad_norm": 0.7187454700469971, "learning_rate": 9.742194154571902e-07, "loss": 0.0117, "step": 190580 }, { "epoch": 2.036326726855067, "grad_norm": 2.622920274734497, "learning_rate": 9.742140899853838e-07, "loss": 0.0065, "step": 190590 }, { "epoch": 2.0364335701693466, "grad_norm": 2.9474589824676514, "learning_rate": 9.742087639781541e-07, "loss": 0.0051, "step": 190600 }, { "epoch": 2.0365404134836265, "grad_norm": 1.0010986328125, "learning_rate": 9.742034374355069e-07, "loss": 0.0389, "step": 190610 }, { "epoch": 2.036647256797906, "grad_norm": 0.033199574798345566, "learning_rate": 9.741981103574482e-07, "loss": 0.0296, "step": 190620 }, { "epoch": 2.0367541001121854, "grad_norm": 3.0676817893981934, "learning_rate": 9.74192782743984e-07, "loss": 0.0283, "step": 190630 }, { "epoch": 2.0368609434264653, "grad_norm": 0.07367925345897675, "learning_rate": 9.741874545951204e-07, "loss": 0.0114, "step": 190640 }, { "epoch": 2.0369677867407447, "grad_norm": 0.016811544075608253, "learning_rate": 9.741821259108634e-07, "loss": 0.0288, "step": 190650 }, { "epoch": 2.037074630055024, "grad_norm": 0.02037765271961689, "learning_rate": 9.74176796691219e-07, "loss": 0.0223, "step": 190660 }, { "epoch": 2.037181473369304, "grad_norm": 6.372669696807861, "learning_rate": 9.74171466936193e-07, "loss": 0.0182, "step": 190670 }, { "epoch": 2.0372883166835836, "grad_norm": 1.913279414176941, "learning_rate": 9.741661366457918e-07, "loss": 0.0099, "step": 190680 }, { "epoch": 2.037395159997863, "grad_norm": 0.22420275211334229, "learning_rate": 9.741608058200214e-07, "loss": 0.0195, "step": 190690 }, { "epoch": 2.037502003312143, "grad_norm": 0.20494475960731506, "learning_rate": 9.741554744588875e-07, "loss": 0.0284, "step": 190700 }, { "epoch": 2.0376088466264224, "grad_norm": 0.8167266845703125, "learning_rate": 9.741501425623963e-07, "loss": 0.0021, "step": 190710 }, { "epoch": 2.037715689940702, "grad_norm": 3.7057483196258545, "learning_rate": 9.741448101305537e-07, "loss": 0.0397, "step": 190720 }, { "epoch": 2.0378225332549817, "grad_norm": 1.5781633853912354, "learning_rate": 9.74139477163366e-07, "loss": 0.017, "step": 190730 }, { "epoch": 2.037929376569261, "grad_norm": 0.18208736181259155, "learning_rate": 9.741341436608388e-07, "loss": 0.0262, "step": 190740 }, { "epoch": 2.0380362198835407, "grad_norm": 0.0024286687839776278, "learning_rate": 9.741288096229787e-07, "loss": 0.0124, "step": 190750 }, { "epoch": 2.0381430631978206, "grad_norm": 1.0373624563217163, "learning_rate": 9.741234750497912e-07, "loss": 0.0236, "step": 190760 }, { "epoch": 2.0382499065121, "grad_norm": 0.051234170794487, "learning_rate": 9.741181399412825e-07, "loss": 0.0028, "step": 190770 }, { "epoch": 2.0383567498263795, "grad_norm": 0.3293147385120392, "learning_rate": 9.741128042974585e-07, "loss": 0.0026, "step": 190780 }, { "epoch": 2.0384635931406594, "grad_norm": 0.036514293402433395, "learning_rate": 9.741074681183257e-07, "loss": 0.0014, "step": 190790 }, { "epoch": 2.038570436454939, "grad_norm": 0.4647432565689087, "learning_rate": 9.741021314038896e-07, "loss": 0.0185, "step": 190800 }, { "epoch": 2.0386772797692183, "grad_norm": 1.1027389764785767, "learning_rate": 9.740967941541563e-07, "loss": 0.0081, "step": 190810 }, { "epoch": 2.038784123083498, "grad_norm": 0.1162114068865776, "learning_rate": 9.740914563691321e-07, "loss": 0.0005, "step": 190820 }, { "epoch": 2.0388909663977777, "grad_norm": 2.989058494567871, "learning_rate": 9.740861180488227e-07, "loss": 0.0119, "step": 190830 }, { "epoch": 2.038997809712057, "grad_norm": 7.958639621734619, "learning_rate": 9.740807791932344e-07, "loss": 0.0209, "step": 190840 }, { "epoch": 2.039104653026337, "grad_norm": 0.23064696788787842, "learning_rate": 9.740754398023732e-07, "loss": 0.0181, "step": 190850 }, { "epoch": 2.0392114963406165, "grad_norm": 2.8486721515655518, "learning_rate": 9.74070099876245e-07, "loss": 0.0227, "step": 190860 }, { "epoch": 2.039318339654896, "grad_norm": 6.20932149887085, "learning_rate": 9.740647594148559e-07, "loss": 0.0237, "step": 190870 }, { "epoch": 2.039425182969176, "grad_norm": 7.360678195953369, "learning_rate": 9.740594184182118e-07, "loss": 0.0223, "step": 190880 }, { "epoch": 2.0395320262834553, "grad_norm": 0.012272299267351627, "learning_rate": 9.74054076886319e-07, "loss": 0.0339, "step": 190890 }, { "epoch": 2.0396388695977348, "grad_norm": 0.006904464680701494, "learning_rate": 9.740487348191832e-07, "loss": 0.0312, "step": 190900 }, { "epoch": 2.0397457129120147, "grad_norm": 0.35029110312461853, "learning_rate": 9.740433922168108e-07, "loss": 0.0026, "step": 190910 }, { "epoch": 2.039852556226294, "grad_norm": 0.4451943337917328, "learning_rate": 9.740380490792073e-07, "loss": 0.0154, "step": 190920 }, { "epoch": 2.0399593995405736, "grad_norm": 0.014920894987881184, "learning_rate": 9.740327054063794e-07, "loss": 0.0105, "step": 190930 }, { "epoch": 2.0400662428548535, "grad_norm": 3.6947827339172363, "learning_rate": 9.740273611983327e-07, "loss": 0.0159, "step": 190940 }, { "epoch": 2.040173086169133, "grad_norm": 0.4266299903392792, "learning_rate": 9.740220164550733e-07, "loss": 0.0366, "step": 190950 }, { "epoch": 2.0402799294834124, "grad_norm": 0.015272571705281734, "learning_rate": 9.740166711766071e-07, "loss": 0.018, "step": 190960 }, { "epoch": 2.0403867727976923, "grad_norm": 0.16133694350719452, "learning_rate": 9.740113253629405e-07, "loss": 0.0035, "step": 190970 }, { "epoch": 2.0404936161119718, "grad_norm": 0.21080000698566437, "learning_rate": 9.740059790140793e-07, "loss": 0.0286, "step": 190980 }, { "epoch": 2.0406004594262512, "grad_norm": 1.11931574344635, "learning_rate": 9.740006321300296e-07, "loss": 0.0393, "step": 190990 }, { "epoch": 2.040707302740531, "grad_norm": 2.0016703605651855, "learning_rate": 9.739952847107976e-07, "loss": 0.0091, "step": 191000 }, { "epoch": 2.0408141460548106, "grad_norm": 0.3447625935077667, "learning_rate": 9.739899367563888e-07, "loss": 0.0028, "step": 191010 }, { "epoch": 2.04092098936909, "grad_norm": 0.8350096940994263, "learning_rate": 9.739845882668098e-07, "loss": 0.0056, "step": 191020 }, { "epoch": 2.04102783268337, "grad_norm": 0.03917749226093292, "learning_rate": 9.739792392420662e-07, "loss": 0.0138, "step": 191030 }, { "epoch": 2.0411346759976494, "grad_norm": 0.0035454456228762865, "learning_rate": 9.739738896821645e-07, "loss": 0.0125, "step": 191040 }, { "epoch": 2.041241519311929, "grad_norm": 0.0496826209127903, "learning_rate": 9.739685395871105e-07, "loss": 0.0623, "step": 191050 }, { "epoch": 2.041348362626209, "grad_norm": 0.02260914072394371, "learning_rate": 9.739631889569103e-07, "loss": 0.0227, "step": 191060 }, { "epoch": 2.0414552059404882, "grad_norm": 0.3592069149017334, "learning_rate": 9.739578377915696e-07, "loss": 0.0615, "step": 191070 }, { "epoch": 2.0415620492547677, "grad_norm": 0.00850637350231409, "learning_rate": 9.73952486091095e-07, "loss": 0.0227, "step": 191080 }, { "epoch": 2.0416688925690476, "grad_norm": 1.3038713932037354, "learning_rate": 9.739471338554921e-07, "loss": 0.0357, "step": 191090 }, { "epoch": 2.041775735883327, "grad_norm": 5.32649040222168, "learning_rate": 9.739417810847672e-07, "loss": 0.02, "step": 191100 }, { "epoch": 2.0418825791976065, "grad_norm": 0.009221487678587437, "learning_rate": 9.739364277789262e-07, "loss": 0.0835, "step": 191110 }, { "epoch": 2.0419894225118864, "grad_norm": 7.88355827331543, "learning_rate": 9.739310739379753e-07, "loss": 0.0387, "step": 191120 }, { "epoch": 2.042096265826166, "grad_norm": 0.0012214072048664093, "learning_rate": 9.739257195619203e-07, "loss": 0.0269, "step": 191130 }, { "epoch": 2.0422031091404453, "grad_norm": 4.112195014953613, "learning_rate": 9.739203646507674e-07, "loss": 0.0157, "step": 191140 }, { "epoch": 2.0423099524547252, "grad_norm": 0.04948938265442848, "learning_rate": 9.73915009204523e-07, "loss": 0.0108, "step": 191150 }, { "epoch": 2.0424167957690047, "grad_norm": 0.19848698377609253, "learning_rate": 9.739096532231924e-07, "loss": 0.0367, "step": 191160 }, { "epoch": 2.042523639083284, "grad_norm": 4.176001071929932, "learning_rate": 9.73904296706782e-07, "loss": 0.0124, "step": 191170 }, { "epoch": 2.042630482397564, "grad_norm": 6.9938788414001465, "learning_rate": 9.738989396552981e-07, "loss": 0.0063, "step": 191180 }, { "epoch": 2.0427373257118435, "grad_norm": 4.465886116027832, "learning_rate": 9.738935820687466e-07, "loss": 0.0198, "step": 191190 }, { "epoch": 2.042844169026123, "grad_norm": 3.785771608352661, "learning_rate": 9.738882239471332e-07, "loss": 0.0258, "step": 191200 }, { "epoch": 2.042951012340403, "grad_norm": 4.431594371795654, "learning_rate": 9.738828652904644e-07, "loss": 0.0596, "step": 191210 }, { "epoch": 2.0430578556546823, "grad_norm": 10.268925666809082, "learning_rate": 9.73877506098746e-07, "loss": 0.056, "step": 191220 }, { "epoch": 2.043164698968962, "grad_norm": 0.05332295969128609, "learning_rate": 9.738721463719843e-07, "loss": 0.0053, "step": 191230 }, { "epoch": 2.0432715422832417, "grad_norm": 0.31371766328811646, "learning_rate": 9.738667861101851e-07, "loss": 0.016, "step": 191240 }, { "epoch": 2.043378385597521, "grad_norm": 24.095746994018555, "learning_rate": 9.738614253133544e-07, "loss": 0.0418, "step": 191250 }, { "epoch": 2.043485228911801, "grad_norm": 1.1296205520629883, "learning_rate": 9.738560639814987e-07, "loss": 0.0079, "step": 191260 }, { "epoch": 2.0435920722260805, "grad_norm": 0.4291163682937622, "learning_rate": 9.738507021146235e-07, "loss": 0.051, "step": 191270 }, { "epoch": 2.04369891554036, "grad_norm": 6.633768558502197, "learning_rate": 9.73845339712735e-07, "loss": 0.0495, "step": 191280 }, { "epoch": 2.04380575885464, "grad_norm": 0.48371487855911255, "learning_rate": 9.738399767758395e-07, "loss": 0.0123, "step": 191290 }, { "epoch": 2.0439126021689193, "grad_norm": 24.175203323364258, "learning_rate": 9.738346133039427e-07, "loss": 0.0464, "step": 191300 }, { "epoch": 2.044019445483199, "grad_norm": 1.1361713409423828, "learning_rate": 9.738292492970513e-07, "loss": 0.0071, "step": 191310 }, { "epoch": 2.0441262887974787, "grad_norm": 2.316612958908081, "learning_rate": 9.738238847551705e-07, "loss": 0.0157, "step": 191320 }, { "epoch": 2.044233132111758, "grad_norm": 0.826107382774353, "learning_rate": 9.738185196783068e-07, "loss": 0.0137, "step": 191330 }, { "epoch": 2.0443399754260376, "grad_norm": 0.004830342251807451, "learning_rate": 9.738131540664664e-07, "loss": 0.0346, "step": 191340 }, { "epoch": 2.0444468187403175, "grad_norm": 2.3498175144195557, "learning_rate": 9.738077879196553e-07, "loss": 0.0147, "step": 191350 }, { "epoch": 2.044553662054597, "grad_norm": 0.3064940869808197, "learning_rate": 9.738024212378793e-07, "loss": 0.0371, "step": 191360 }, { "epoch": 2.0446605053688764, "grad_norm": 0.021218569949269295, "learning_rate": 9.737970540211445e-07, "loss": 0.0573, "step": 191370 }, { "epoch": 2.0447673486831563, "grad_norm": 0.0048552923835814, "learning_rate": 9.73791686269457e-07, "loss": 0.0033, "step": 191380 }, { "epoch": 2.044874191997436, "grad_norm": 1.7000709772109985, "learning_rate": 9.737863179828233e-07, "loss": 0.0242, "step": 191390 }, { "epoch": 2.0449810353117153, "grad_norm": 0.03873850032687187, "learning_rate": 9.737809491612487e-07, "loss": 0.0221, "step": 191400 }, { "epoch": 2.045087878625995, "grad_norm": 0.08024241775274277, "learning_rate": 9.7377557980474e-07, "loss": 0.0052, "step": 191410 }, { "epoch": 2.0451947219402746, "grad_norm": 0.02242441102862358, "learning_rate": 9.737702099133026e-07, "loss": 0.0121, "step": 191420 }, { "epoch": 2.045301565254554, "grad_norm": 0.05096247047185898, "learning_rate": 9.73764839486943e-07, "loss": 0.0126, "step": 191430 }, { "epoch": 2.045408408568834, "grad_norm": 0.024891212582588196, "learning_rate": 9.737594685256672e-07, "loss": 0.0137, "step": 191440 }, { "epoch": 2.0455152518831135, "grad_norm": 3.442462921142578, "learning_rate": 9.737540970294813e-07, "loss": 0.0209, "step": 191450 }, { "epoch": 2.045622095197393, "grad_norm": 0.02340526320040226, "learning_rate": 9.737487249983912e-07, "loss": 0.0392, "step": 191460 }, { "epoch": 2.045728938511673, "grad_norm": 1.5687752962112427, "learning_rate": 9.737433524324027e-07, "loss": 0.0042, "step": 191470 }, { "epoch": 2.0458357818259523, "grad_norm": 2.5033671855926514, "learning_rate": 9.737379793315227e-07, "loss": 0.0145, "step": 191480 }, { "epoch": 2.0459426251402317, "grad_norm": 3.419290065765381, "learning_rate": 9.737326056957564e-07, "loss": 0.0514, "step": 191490 }, { "epoch": 2.0460494684545116, "grad_norm": 0.9146761298179626, "learning_rate": 9.737272315251103e-07, "loss": 0.0035, "step": 191500 }, { "epoch": 2.046156311768791, "grad_norm": 3.0581955909729004, "learning_rate": 9.737218568195905e-07, "loss": 0.0103, "step": 191510 }, { "epoch": 2.0462631550830706, "grad_norm": 3.9694929122924805, "learning_rate": 9.737164815792029e-07, "loss": 0.0111, "step": 191520 }, { "epoch": 2.0463699983973505, "grad_norm": 5.408381462097168, "learning_rate": 9.737111058039537e-07, "loss": 0.067, "step": 191530 }, { "epoch": 2.04647684171163, "grad_norm": 0.3849254250526428, "learning_rate": 9.737057294938488e-07, "loss": 0.0072, "step": 191540 }, { "epoch": 2.0465836850259094, "grad_norm": 6.77045202255249, "learning_rate": 9.737003526488947e-07, "loss": 0.0819, "step": 191550 }, { "epoch": 2.0466905283401893, "grad_norm": 0.46802377700805664, "learning_rate": 9.736949752690968e-07, "loss": 0.0367, "step": 191560 }, { "epoch": 2.0467973716544687, "grad_norm": 0.8577516674995422, "learning_rate": 9.736895973544616e-07, "loss": 0.0123, "step": 191570 }, { "epoch": 2.046904214968748, "grad_norm": 0.058845482766628265, "learning_rate": 9.736842189049952e-07, "loss": 0.0432, "step": 191580 }, { "epoch": 2.047011058283028, "grad_norm": 1.48625648021698, "learning_rate": 9.736788399207035e-07, "loss": 0.0191, "step": 191590 }, { "epoch": 2.0471179015973076, "grad_norm": 0.07184404134750366, "learning_rate": 9.736734604015927e-07, "loss": 0.0128, "step": 191600 }, { "epoch": 2.047224744911587, "grad_norm": 3.504636764526367, "learning_rate": 9.736680803476686e-07, "loss": 0.0136, "step": 191610 }, { "epoch": 2.047331588225867, "grad_norm": 0.04214360564947128, "learning_rate": 9.736626997589377e-07, "loss": 0.0486, "step": 191620 }, { "epoch": 2.0474384315401464, "grad_norm": 0.42600035667419434, "learning_rate": 9.736573186354059e-07, "loss": 0.0022, "step": 191630 }, { "epoch": 2.047545274854426, "grad_norm": 5.714620113372803, "learning_rate": 9.73651936977079e-07, "loss": 0.0288, "step": 191640 }, { "epoch": 2.0476521181687057, "grad_norm": 2.791020631790161, "learning_rate": 9.736465547839635e-07, "loss": 0.03, "step": 191650 }, { "epoch": 2.047758961482985, "grad_norm": 0.016225840896368027, "learning_rate": 9.736411720560653e-07, "loss": 0.049, "step": 191660 }, { "epoch": 2.0478658047972647, "grad_norm": 0.15907147526741028, "learning_rate": 9.736357887933905e-07, "loss": 0.0245, "step": 191670 }, { "epoch": 2.0479726481115446, "grad_norm": 0.015601299703121185, "learning_rate": 9.73630404995945e-07, "loss": 0.0197, "step": 191680 }, { "epoch": 2.048079491425824, "grad_norm": 4.821752071380615, "learning_rate": 9.73625020663735e-07, "loss": 0.1212, "step": 191690 }, { "epoch": 2.0481863347401035, "grad_norm": 0.1990462988615036, "learning_rate": 9.736196357967667e-07, "loss": 0.0199, "step": 191700 }, { "epoch": 2.0482931780543834, "grad_norm": 0.3273429274559021, "learning_rate": 9.736142503950461e-07, "loss": 0.0402, "step": 191710 }, { "epoch": 2.048400021368663, "grad_norm": 5.723769187927246, "learning_rate": 9.736088644585792e-07, "loss": 0.0153, "step": 191720 }, { "epoch": 2.0485068646829423, "grad_norm": 0.3238557279109955, "learning_rate": 9.736034779873722e-07, "loss": 0.0071, "step": 191730 }, { "epoch": 2.048613707997222, "grad_norm": 0.03129177913069725, "learning_rate": 9.735980909814312e-07, "loss": 0.0192, "step": 191740 }, { "epoch": 2.0487205513115017, "grad_norm": 7.185776710510254, "learning_rate": 9.73592703440762e-07, "loss": 0.0236, "step": 191750 }, { "epoch": 2.048827394625781, "grad_norm": 0.0486329048871994, "learning_rate": 9.735873153653712e-07, "loss": 0.0592, "step": 191760 }, { "epoch": 2.048934237940061, "grad_norm": 15.48909854888916, "learning_rate": 9.735819267552643e-07, "loss": 0.0841, "step": 191770 }, { "epoch": 2.0490410812543405, "grad_norm": 11.925735473632812, "learning_rate": 9.735765376104477e-07, "loss": 0.0446, "step": 191780 }, { "epoch": 2.04914792456862, "grad_norm": 4.590785980224609, "learning_rate": 9.735711479309275e-07, "loss": 0.0328, "step": 191790 }, { "epoch": 2.0492547678829, "grad_norm": 0.777230978012085, "learning_rate": 9.735657577167098e-07, "loss": 0.0098, "step": 191800 }, { "epoch": 2.0493616111971793, "grad_norm": 0.02123839035630226, "learning_rate": 9.735603669678005e-07, "loss": 0.0105, "step": 191810 }, { "epoch": 2.0494684545114588, "grad_norm": 0.00935088749974966, "learning_rate": 9.735549756842058e-07, "loss": 0.0282, "step": 191820 }, { "epoch": 2.0495752978257387, "grad_norm": 8.117043495178223, "learning_rate": 9.73549583865932e-07, "loss": 0.0118, "step": 191830 }, { "epoch": 2.049682141140018, "grad_norm": 0.019047215580940247, "learning_rate": 9.73544191512985e-07, "loss": 0.0287, "step": 191840 }, { "epoch": 2.0497889844542976, "grad_norm": 0.9415434002876282, "learning_rate": 9.735387986253706e-07, "loss": 0.0165, "step": 191850 }, { "epoch": 2.0498958277685775, "grad_norm": 14.375890731811523, "learning_rate": 9.735334052030952e-07, "loss": 0.0269, "step": 191860 }, { "epoch": 2.050002671082857, "grad_norm": 0.02938777022063732, "learning_rate": 9.735280112461649e-07, "loss": 0.0211, "step": 191870 }, { "epoch": 2.0501095143971364, "grad_norm": 2.7934768199920654, "learning_rate": 9.735226167545858e-07, "loss": 0.0186, "step": 191880 }, { "epoch": 2.0502163577114163, "grad_norm": 0.23876917362213135, "learning_rate": 9.735172217283638e-07, "loss": 0.0107, "step": 191890 }, { "epoch": 2.0503232010256958, "grad_norm": 0.01383913028985262, "learning_rate": 9.735118261675052e-07, "loss": 0.0051, "step": 191900 }, { "epoch": 2.050430044339975, "grad_norm": 0.002773761283606291, "learning_rate": 9.73506430072016e-07, "loss": 0.0129, "step": 191910 }, { "epoch": 2.050536887654255, "grad_norm": 1.984955906867981, "learning_rate": 9.735010334419023e-07, "loss": 0.0419, "step": 191920 }, { "epoch": 2.0506437309685346, "grad_norm": 0.008318969048559666, "learning_rate": 9.734956362771703e-07, "loss": 0.0324, "step": 191930 }, { "epoch": 2.050750574282814, "grad_norm": 2.5597763061523438, "learning_rate": 9.734902385778258e-07, "loss": 0.0252, "step": 191940 }, { "epoch": 2.050857417597094, "grad_norm": 9.733330726623535, "learning_rate": 9.734848403438752e-07, "loss": 0.0137, "step": 191950 }, { "epoch": 2.0509642609113734, "grad_norm": 0.004474669694900513, "learning_rate": 9.734794415753244e-07, "loss": 0.0218, "step": 191960 }, { "epoch": 2.051071104225653, "grad_norm": 0.05217568948864937, "learning_rate": 9.734740422721796e-07, "loss": 0.0286, "step": 191970 }, { "epoch": 2.0511779475399328, "grad_norm": 5.319882869720459, "learning_rate": 9.734686424344468e-07, "loss": 0.1248, "step": 191980 }, { "epoch": 2.0512847908542122, "grad_norm": 1.330074429512024, "learning_rate": 9.734632420621326e-07, "loss": 0.0384, "step": 191990 }, { "epoch": 2.0513916341684917, "grad_norm": 0.06973837316036224, "learning_rate": 9.73457841155242e-07, "loss": 0.0214, "step": 192000 }, { "epoch": 2.0514984774827716, "grad_norm": 0.018077921122312546, "learning_rate": 9.734524397137822e-07, "loss": 0.0013, "step": 192010 }, { "epoch": 2.051605320797051, "grad_norm": 3.080498218536377, "learning_rate": 9.734470377377588e-07, "loss": 0.0286, "step": 192020 }, { "epoch": 2.051712164111331, "grad_norm": 2.2140238285064697, "learning_rate": 9.734416352271779e-07, "loss": 0.0321, "step": 192030 }, { "epoch": 2.0518190074256104, "grad_norm": 4.708532810211182, "learning_rate": 9.734362321820458e-07, "loss": 0.0363, "step": 192040 }, { "epoch": 2.05192585073989, "grad_norm": 6.057391166687012, "learning_rate": 9.734308286023682e-07, "loss": 0.0411, "step": 192050 }, { "epoch": 2.0520326940541698, "grad_norm": 2.1229381561279297, "learning_rate": 9.734254244881516e-07, "loss": 0.0977, "step": 192060 }, { "epoch": 2.0521395373684492, "grad_norm": 6.398151397705078, "learning_rate": 9.73420019839402e-07, "loss": 0.0056, "step": 192070 }, { "epoch": 2.0522463806827287, "grad_norm": 2.1378841400146484, "learning_rate": 9.734146146561254e-07, "loss": 0.0171, "step": 192080 }, { "epoch": 2.0523532239970086, "grad_norm": 0.08295085281133652, "learning_rate": 9.73409208938328e-07, "loss": 0.0226, "step": 192090 }, { "epoch": 2.052460067311288, "grad_norm": 0.12036512792110443, "learning_rate": 9.734038026860161e-07, "loss": 0.0259, "step": 192100 }, { "epoch": 2.0525669106255675, "grad_norm": 0.005035326816141605, "learning_rate": 9.733983958991954e-07, "loss": 0.0231, "step": 192110 }, { "epoch": 2.0526737539398474, "grad_norm": 0.7945499420166016, "learning_rate": 9.73392988577872e-07, "loss": 0.0392, "step": 192120 }, { "epoch": 2.052780597254127, "grad_norm": 1.2957780361175537, "learning_rate": 9.733875807220523e-07, "loss": 0.0276, "step": 192130 }, { "epoch": 2.0528874405684063, "grad_norm": 4.410783290863037, "learning_rate": 9.733821723317423e-07, "loss": 0.063, "step": 192140 }, { "epoch": 2.0529942838826862, "grad_norm": 0.00629213685169816, "learning_rate": 9.733767634069483e-07, "loss": 0.0297, "step": 192150 }, { "epoch": 2.0531011271969657, "grad_norm": 1.2255374193191528, "learning_rate": 9.73371353947676e-07, "loss": 0.0267, "step": 192160 }, { "epoch": 2.053207970511245, "grad_norm": 1.1183198690414429, "learning_rate": 9.733659439539318e-07, "loss": 0.0085, "step": 192170 }, { "epoch": 2.053314813825525, "grad_norm": 0.19934403896331787, "learning_rate": 9.733605334257215e-07, "loss": 0.0129, "step": 192180 }, { "epoch": 2.0534216571398045, "grad_norm": 8.154755592346191, "learning_rate": 9.733551223630516e-07, "loss": 0.0358, "step": 192190 }, { "epoch": 2.053528500454084, "grad_norm": 0.10702615976333618, "learning_rate": 9.73349710765928e-07, "loss": 0.0016, "step": 192200 }, { "epoch": 2.053635343768364, "grad_norm": 1.1894633769989014, "learning_rate": 9.73344298634357e-07, "loss": 0.0391, "step": 192210 }, { "epoch": 2.0537421870826433, "grad_norm": 0.047018423676490784, "learning_rate": 9.733388859683444e-07, "loss": 0.0307, "step": 192220 }, { "epoch": 2.053849030396923, "grad_norm": 0.19394274055957794, "learning_rate": 9.733334727678965e-07, "loss": 0.029, "step": 192230 }, { "epoch": 2.0539558737112027, "grad_norm": 8.952291488647461, "learning_rate": 9.733280590330196e-07, "loss": 0.0388, "step": 192240 }, { "epoch": 2.054062717025482, "grad_norm": 0.003755630226805806, "learning_rate": 9.733226447637193e-07, "loss": 0.0165, "step": 192250 }, { "epoch": 2.0541695603397616, "grad_norm": 0.8160892128944397, "learning_rate": 9.733172299600021e-07, "loss": 0.0098, "step": 192260 }, { "epoch": 2.0542764036540415, "grad_norm": 6.401304721832275, "learning_rate": 9.733118146218742e-07, "loss": 0.0191, "step": 192270 }, { "epoch": 2.054383246968321, "grad_norm": 0.6909072995185852, "learning_rate": 9.733063987493412e-07, "loss": 0.0184, "step": 192280 }, { "epoch": 2.0544900902826004, "grad_norm": 0.3617362082004547, "learning_rate": 9.7330098234241e-07, "loss": 0.0221, "step": 192290 }, { "epoch": 2.0545969335968803, "grad_norm": 0.01898297108709812, "learning_rate": 9.732955654010857e-07, "loss": 0.0052, "step": 192300 }, { "epoch": 2.05470377691116, "grad_norm": 0.010727672837674618, "learning_rate": 9.732901479253755e-07, "loss": 0.0033, "step": 192310 }, { "epoch": 2.0548106202254393, "grad_norm": 0.013442670926451683, "learning_rate": 9.732847299152846e-07, "loss": 0.0014, "step": 192320 }, { "epoch": 2.054917463539719, "grad_norm": 3.8252501487731934, "learning_rate": 9.732793113708199e-07, "loss": 0.0119, "step": 192330 }, { "epoch": 2.0550243068539986, "grad_norm": 0.016424980014562607, "learning_rate": 9.732738922919869e-07, "loss": 0.0266, "step": 192340 }, { "epoch": 2.055131150168278, "grad_norm": 1.9061858654022217, "learning_rate": 9.732684726787921e-07, "loss": 0.0166, "step": 192350 }, { "epoch": 2.055237993482558, "grad_norm": 2.2583162784576416, "learning_rate": 9.732630525312414e-07, "loss": 0.0603, "step": 192360 }, { "epoch": 2.0553448367968374, "grad_norm": 1.0995022058486938, "learning_rate": 9.73257631849341e-07, "loss": 0.0316, "step": 192370 }, { "epoch": 2.055451680111117, "grad_norm": 0.5956302881240845, "learning_rate": 9.73252210633097e-07, "loss": 0.0229, "step": 192380 }, { "epoch": 2.055558523425397, "grad_norm": 0.27843230962753296, "learning_rate": 9.732467888825153e-07, "loss": 0.0824, "step": 192390 }, { "epoch": 2.0556653667396763, "grad_norm": 0.03762291744351387, "learning_rate": 9.732413665976027e-07, "loss": 0.0404, "step": 192400 }, { "epoch": 2.0557722100539557, "grad_norm": 4.4642229080200195, "learning_rate": 9.732359437783645e-07, "loss": 0.0233, "step": 192410 }, { "epoch": 2.0558790533682356, "grad_norm": 0.0031504640355706215, "learning_rate": 9.732305204248074e-07, "loss": 0.03, "step": 192420 }, { "epoch": 2.055985896682515, "grad_norm": 0.0459294356405735, "learning_rate": 9.732250965369374e-07, "loss": 0.0073, "step": 192430 }, { "epoch": 2.0560927399967945, "grad_norm": 2.278778553009033, "learning_rate": 9.732196721147603e-07, "loss": 0.0208, "step": 192440 }, { "epoch": 2.0561995833110744, "grad_norm": 0.2873276472091675, "learning_rate": 9.732142471582827e-07, "loss": 0.0098, "step": 192450 }, { "epoch": 2.056306426625354, "grad_norm": 5.943755626678467, "learning_rate": 9.732088216675104e-07, "loss": 0.0175, "step": 192460 }, { "epoch": 2.0564132699396334, "grad_norm": 4.442708969116211, "learning_rate": 9.732033956424497e-07, "loss": 0.0215, "step": 192470 }, { "epoch": 2.0565201132539133, "grad_norm": 0.10103501379489899, "learning_rate": 9.731979690831066e-07, "loss": 0.0306, "step": 192480 }, { "epoch": 2.0566269565681927, "grad_norm": 5.5528564453125, "learning_rate": 9.731925419894874e-07, "loss": 0.0123, "step": 192490 }, { "epoch": 2.056733799882472, "grad_norm": 2.774155616760254, "learning_rate": 9.731871143615978e-07, "loss": 0.0147, "step": 192500 }, { "epoch": 2.056840643196752, "grad_norm": 0.017662595957517624, "learning_rate": 9.731816861994444e-07, "loss": 0.0372, "step": 192510 }, { "epoch": 2.0569474865110315, "grad_norm": 4.049291610717773, "learning_rate": 9.731762575030332e-07, "loss": 0.0095, "step": 192520 }, { "epoch": 2.057054329825311, "grad_norm": 7.755100727081299, "learning_rate": 9.731708282723703e-07, "loss": 0.0304, "step": 192530 }, { "epoch": 2.057161173139591, "grad_norm": 0.02946353703737259, "learning_rate": 9.731653985074618e-07, "loss": 0.014, "step": 192540 }, { "epoch": 2.0572680164538704, "grad_norm": 0.09261011332273483, "learning_rate": 9.731599682083139e-07, "loss": 0.0115, "step": 192550 }, { "epoch": 2.05737485976815, "grad_norm": 0.9195680618286133, "learning_rate": 9.731545373749326e-07, "loss": 0.0108, "step": 192560 }, { "epoch": 2.0574817030824297, "grad_norm": 1.2845969200134277, "learning_rate": 9.731491060073242e-07, "loss": 0.054, "step": 192570 }, { "epoch": 2.057588546396709, "grad_norm": 0.00431371945887804, "learning_rate": 9.731436741054946e-07, "loss": 0.0163, "step": 192580 }, { "epoch": 2.0576953897109886, "grad_norm": 2.204960346221924, "learning_rate": 9.731382416694503e-07, "loss": 0.0082, "step": 192590 }, { "epoch": 2.0578022330252685, "grad_norm": 0.14825090765953064, "learning_rate": 9.73132808699197e-07, "loss": 0.0077, "step": 192600 }, { "epoch": 2.057909076339548, "grad_norm": 3.05385422706604, "learning_rate": 9.731273751947413e-07, "loss": 0.0309, "step": 192610 }, { "epoch": 2.0580159196538275, "grad_norm": 0.5702316761016846, "learning_rate": 9.731219411560888e-07, "loss": 0.0289, "step": 192620 }, { "epoch": 2.0581227629681074, "grad_norm": 0.5935114622116089, "learning_rate": 9.731165065832461e-07, "loss": 0.0272, "step": 192630 }, { "epoch": 2.058229606282387, "grad_norm": 5.857339382171631, "learning_rate": 9.731110714762192e-07, "loss": 0.0117, "step": 192640 }, { "epoch": 2.0583364495966663, "grad_norm": 0.05531521141529083, "learning_rate": 9.73105635835014e-07, "loss": 0.0206, "step": 192650 }, { "epoch": 2.058443292910946, "grad_norm": 0.11363612860441208, "learning_rate": 9.731001996596369e-07, "loss": 0.0049, "step": 192660 }, { "epoch": 2.0585501362252256, "grad_norm": 0.0482889749109745, "learning_rate": 9.73094762950094e-07, "loss": 0.0337, "step": 192670 }, { "epoch": 2.058656979539505, "grad_norm": 0.031419266015291214, "learning_rate": 9.730893257063914e-07, "loss": 0.0237, "step": 192680 }, { "epoch": 2.058763822853785, "grad_norm": 3.022923707962036, "learning_rate": 9.730838879285355e-07, "loss": 0.0302, "step": 192690 }, { "epoch": 2.0588706661680645, "grad_norm": 0.9241568446159363, "learning_rate": 9.73078449616532e-07, "loss": 0.0013, "step": 192700 }, { "epoch": 2.058977509482344, "grad_norm": 0.9148889780044556, "learning_rate": 9.73073010770387e-07, "loss": 0.001, "step": 192710 }, { "epoch": 2.059084352796624, "grad_norm": 0.0380178801715374, "learning_rate": 9.73067571390107e-07, "loss": 0.0624, "step": 192720 }, { "epoch": 2.0591911961109033, "grad_norm": 0.014204135164618492, "learning_rate": 9.73062131475698e-07, "loss": 0.0125, "step": 192730 }, { "epoch": 2.059298039425183, "grad_norm": 0.04129617661237717, "learning_rate": 9.730566910271662e-07, "loss": 0.0047, "step": 192740 }, { "epoch": 2.0594048827394626, "grad_norm": 11.022957801818848, "learning_rate": 9.730512500445178e-07, "loss": 0.0232, "step": 192750 }, { "epoch": 2.059511726053742, "grad_norm": 0.09350870549678802, "learning_rate": 9.730458085277587e-07, "loss": 0.0019, "step": 192760 }, { "epoch": 2.059618569368022, "grad_norm": 0.0411008782684803, "learning_rate": 9.730403664768952e-07, "loss": 0.0152, "step": 192770 }, { "epoch": 2.0597254126823015, "grad_norm": 0.04168745130300522, "learning_rate": 9.730349238919333e-07, "loss": 0.0178, "step": 192780 }, { "epoch": 2.059832255996581, "grad_norm": 0.8193928003311157, "learning_rate": 9.730294807728793e-07, "loss": 0.0119, "step": 192790 }, { "epoch": 2.059939099310861, "grad_norm": 3.1844985485076904, "learning_rate": 9.730240371197396e-07, "loss": 0.0144, "step": 192800 }, { "epoch": 2.0600459426251403, "grad_norm": 6.190174102783203, "learning_rate": 9.7301859293252e-07, "loss": 0.0323, "step": 192810 }, { "epoch": 2.0601527859394198, "grad_norm": 2.5770633220672607, "learning_rate": 9.730131482112264e-07, "loss": 0.0128, "step": 192820 }, { "epoch": 2.0602596292536997, "grad_norm": 0.009524567984044552, "learning_rate": 9.730077029558655e-07, "loss": 0.0009, "step": 192830 }, { "epoch": 2.060366472567979, "grad_norm": 2.996612787246704, "learning_rate": 9.730022571664431e-07, "loss": 0.0459, "step": 192840 }, { "epoch": 2.0604733158822586, "grad_norm": 7.95712423324585, "learning_rate": 9.729968108429654e-07, "loss": 0.0953, "step": 192850 }, { "epoch": 2.0605801591965385, "grad_norm": 6.858149528503418, "learning_rate": 9.729913639854387e-07, "loss": 0.0127, "step": 192860 }, { "epoch": 2.060687002510818, "grad_norm": 3.5021045207977295, "learning_rate": 9.729859165938692e-07, "loss": 0.0146, "step": 192870 }, { "epoch": 2.0607938458250974, "grad_norm": 0.3437783718109131, "learning_rate": 9.729804686682628e-07, "loss": 0.0121, "step": 192880 }, { "epoch": 2.0609006891393773, "grad_norm": 0.12238489091396332, "learning_rate": 9.729750202086255e-07, "loss": 0.0333, "step": 192890 }, { "epoch": 2.0610075324536568, "grad_norm": 0.08604729920625687, "learning_rate": 9.72969571214964e-07, "loss": 0.0177, "step": 192900 }, { "epoch": 2.061114375767936, "grad_norm": 1.1403287649154663, "learning_rate": 9.72964121687284e-07, "loss": 0.0055, "step": 192910 }, { "epoch": 2.061221219082216, "grad_norm": 3.5843617916107178, "learning_rate": 9.72958671625592e-07, "loss": 0.0222, "step": 192920 }, { "epoch": 2.0613280623964956, "grad_norm": 0.11295577883720398, "learning_rate": 9.729532210298938e-07, "loss": 0.0387, "step": 192930 }, { "epoch": 2.061434905710775, "grad_norm": 18.225889205932617, "learning_rate": 9.729477699001957e-07, "loss": 0.0388, "step": 192940 }, { "epoch": 2.061541749025055, "grad_norm": 0.9823988080024719, "learning_rate": 9.72942318236504e-07, "loss": 0.0285, "step": 192950 }, { "epoch": 2.0616485923393344, "grad_norm": 0.3889632225036621, "learning_rate": 9.729368660388245e-07, "loss": 0.0428, "step": 192960 }, { "epoch": 2.061755435653614, "grad_norm": 0.8434495329856873, "learning_rate": 9.729314133071637e-07, "loss": 0.0039, "step": 192970 }, { "epoch": 2.0618622789678938, "grad_norm": 0.003153598867356777, "learning_rate": 9.729259600415279e-07, "loss": 0.0218, "step": 192980 }, { "epoch": 2.061969122282173, "grad_norm": 0.04468594491481781, "learning_rate": 9.729205062419229e-07, "loss": 0.028, "step": 192990 }, { "epoch": 2.0620759655964527, "grad_norm": 1.4486020803451538, "learning_rate": 9.729150519083546e-07, "loss": 0.0108, "step": 193000 }, { "epoch": 2.0621828089107326, "grad_norm": 3.63419508934021, "learning_rate": 9.7290959704083e-07, "loss": 0.0137, "step": 193010 }, { "epoch": 2.062289652225012, "grad_norm": 0.23437561094760895, "learning_rate": 9.729041416393544e-07, "loss": 0.0083, "step": 193020 }, { "epoch": 2.0623964955392915, "grad_norm": 0.008633475750684738, "learning_rate": 9.728986857039344e-07, "loss": 0.0324, "step": 193030 }, { "epoch": 2.0625033388535714, "grad_norm": 0.011634652502834797, "learning_rate": 9.728932292345762e-07, "loss": 0.0143, "step": 193040 }, { "epoch": 2.062610182167851, "grad_norm": 0.26117977499961853, "learning_rate": 9.728877722312857e-07, "loss": 0.0092, "step": 193050 }, { "epoch": 2.0627170254821303, "grad_norm": 4.156944274902344, "learning_rate": 9.728823146940694e-07, "loss": 0.0205, "step": 193060 }, { "epoch": 2.06282386879641, "grad_norm": 0.4572817087173462, "learning_rate": 9.728768566229332e-07, "loss": 0.0144, "step": 193070 }, { "epoch": 2.0629307121106897, "grad_norm": 1.8116090297698975, "learning_rate": 9.728713980178832e-07, "loss": 0.0107, "step": 193080 }, { "epoch": 2.063037555424969, "grad_norm": 1.9831949472427368, "learning_rate": 9.728659388789258e-07, "loss": 0.0237, "step": 193090 }, { "epoch": 2.063144398739249, "grad_norm": 0.9352046847343445, "learning_rate": 9.728604792060672e-07, "loss": 0.0126, "step": 193100 }, { "epoch": 2.0632512420535285, "grad_norm": 2.1719298362731934, "learning_rate": 9.728550189993133e-07, "loss": 0.0233, "step": 193110 }, { "epoch": 2.063358085367808, "grad_norm": 4.956671237945557, "learning_rate": 9.728495582586705e-07, "loss": 0.0126, "step": 193120 }, { "epoch": 2.063464928682088, "grad_norm": 0.005270041525363922, "learning_rate": 9.728440969841446e-07, "loss": 0.0537, "step": 193130 }, { "epoch": 2.0635717719963673, "grad_norm": 6.322688102722168, "learning_rate": 9.728386351757423e-07, "loss": 0.0129, "step": 193140 }, { "epoch": 2.063678615310647, "grad_norm": 2.077216863632202, "learning_rate": 9.728331728334695e-07, "loss": 0.0708, "step": 193150 }, { "epoch": 2.0637854586249267, "grad_norm": 0.04441613703966141, "learning_rate": 9.728277099573322e-07, "loss": 0.0223, "step": 193160 }, { "epoch": 2.063892301939206, "grad_norm": 0.14128278195858002, "learning_rate": 9.728222465473369e-07, "loss": 0.012, "step": 193170 }, { "epoch": 2.0639991452534856, "grad_norm": 5.578207969665527, "learning_rate": 9.728167826034895e-07, "loss": 0.0198, "step": 193180 }, { "epoch": 2.0641059885677655, "grad_norm": 1.1553078889846802, "learning_rate": 9.728113181257963e-07, "loss": 0.0054, "step": 193190 }, { "epoch": 2.064212831882045, "grad_norm": 0.31101351976394653, "learning_rate": 9.728058531142632e-07, "loss": 0.0073, "step": 193200 }, { "epoch": 2.0643196751963244, "grad_norm": 3.94582200050354, "learning_rate": 9.728003875688968e-07, "loss": 0.0347, "step": 193210 }, { "epoch": 2.0644265185106043, "grad_norm": 0.17017416656017303, "learning_rate": 9.727949214897031e-07, "loss": 0.0113, "step": 193220 }, { "epoch": 2.064533361824884, "grad_norm": 2.5909626483917236, "learning_rate": 9.727894548766883e-07, "loss": 0.0428, "step": 193230 }, { "epoch": 2.0646402051391632, "grad_norm": 1.2661458253860474, "learning_rate": 9.727839877298585e-07, "loss": 0.1059, "step": 193240 }, { "epoch": 2.064747048453443, "grad_norm": 8.397700309753418, "learning_rate": 9.727785200492199e-07, "loss": 0.0253, "step": 193250 }, { "epoch": 2.0648538917677226, "grad_norm": 0.8591384291648865, "learning_rate": 9.727730518347785e-07, "loss": 0.0344, "step": 193260 }, { "epoch": 2.064960735082002, "grad_norm": 0.2234523743391037, "learning_rate": 9.727675830865408e-07, "loss": 0.0311, "step": 193270 }, { "epoch": 2.065067578396282, "grad_norm": 0.1420518457889557, "learning_rate": 9.727621138045129e-07, "loss": 0.0215, "step": 193280 }, { "epoch": 2.0651744217105614, "grad_norm": 2.536402702331543, "learning_rate": 9.727566439887007e-07, "loss": 0.0172, "step": 193290 }, { "epoch": 2.065281265024841, "grad_norm": 6.2401123046875, "learning_rate": 9.727511736391105e-07, "loss": 0.0357, "step": 193300 }, { "epoch": 2.065388108339121, "grad_norm": 1.6425706148147583, "learning_rate": 9.727457027557488e-07, "loss": 0.0377, "step": 193310 }, { "epoch": 2.0654949516534002, "grad_norm": 6.260555267333984, "learning_rate": 9.727402313386215e-07, "loss": 0.0156, "step": 193320 }, { "epoch": 2.0656017949676797, "grad_norm": 0.034468717873096466, "learning_rate": 9.727347593877346e-07, "loss": 0.0057, "step": 193330 }, { "epoch": 2.0657086382819596, "grad_norm": 4.495180130004883, "learning_rate": 9.727292869030946e-07, "loss": 0.02, "step": 193340 }, { "epoch": 2.065815481596239, "grad_norm": 0.003759959479793906, "learning_rate": 9.727238138847076e-07, "loss": 0.056, "step": 193350 }, { "epoch": 2.0659223249105185, "grad_norm": 3.2345950603485107, "learning_rate": 9.727183403325795e-07, "loss": 0.0213, "step": 193360 }, { "epoch": 2.0660291682247984, "grad_norm": 4.890099048614502, "learning_rate": 9.72712866246717e-07, "loss": 0.0152, "step": 193370 }, { "epoch": 2.066136011539078, "grad_norm": 3.7959048748016357, "learning_rate": 9.72707391627126e-07, "loss": 0.0163, "step": 193380 }, { "epoch": 2.0662428548533573, "grad_norm": 5.170938968658447, "learning_rate": 9.727019164738124e-07, "loss": 0.0235, "step": 193390 }, { "epoch": 2.0663496981676372, "grad_norm": 1.302182912826538, "learning_rate": 9.72696440786783e-07, "loss": 0.0742, "step": 193400 }, { "epoch": 2.0664565414819167, "grad_norm": 2.532764196395874, "learning_rate": 9.726909645660436e-07, "loss": 0.05, "step": 193410 }, { "epoch": 2.066563384796196, "grad_norm": 2.8164072036743164, "learning_rate": 9.726854878116003e-07, "loss": 0.0181, "step": 193420 }, { "epoch": 2.066670228110476, "grad_norm": 0.2054387927055359, "learning_rate": 9.72680010523459e-07, "loss": 0.0416, "step": 193430 }, { "epoch": 2.0667770714247555, "grad_norm": 0.05931783840060234, "learning_rate": 9.72674532701627e-07, "loss": 0.0227, "step": 193440 }, { "epoch": 2.0668839147390354, "grad_norm": 0.7459396123886108, "learning_rate": 9.726690543461093e-07, "loss": 0.0338, "step": 193450 }, { "epoch": 2.066990758053315, "grad_norm": 2.769293785095215, "learning_rate": 9.726635754569128e-07, "loss": 0.0295, "step": 193460 }, { "epoch": 2.0670976013675944, "grad_norm": 4.08472204208374, "learning_rate": 9.726580960340434e-07, "loss": 0.0658, "step": 193470 }, { "epoch": 2.067204444681874, "grad_norm": 0.21749287843704224, "learning_rate": 9.726526160775074e-07, "loss": 0.0142, "step": 193480 }, { "epoch": 2.0673112879961537, "grad_norm": 17.200607299804688, "learning_rate": 9.726471355873108e-07, "loss": 0.1095, "step": 193490 }, { "epoch": 2.067418131310433, "grad_norm": 0.04278034344315529, "learning_rate": 9.7264165456346e-07, "loss": 0.0135, "step": 193500 }, { "epoch": 2.067524974624713, "grad_norm": 0.04658526927232742, "learning_rate": 9.72636173005961e-07, "loss": 0.1198, "step": 193510 }, { "epoch": 2.0676318179389925, "grad_norm": 1.210744023323059, "learning_rate": 9.726306909148203e-07, "loss": 0.0542, "step": 193520 }, { "epoch": 2.067738661253272, "grad_norm": 4.386528015136719, "learning_rate": 9.726252082900438e-07, "loss": 0.0236, "step": 193530 }, { "epoch": 2.067845504567552, "grad_norm": 0.02301100082695484, "learning_rate": 9.726197251316375e-07, "loss": 0.0267, "step": 193540 }, { "epoch": 2.0679523478818314, "grad_norm": 0.060817901045084, "learning_rate": 9.72614241439608e-07, "loss": 0.0503, "step": 193550 }, { "epoch": 2.068059191196111, "grad_norm": 0.8272992968559265, "learning_rate": 9.726087572139617e-07, "loss": 0.0301, "step": 193560 }, { "epoch": 2.0681660345103907, "grad_norm": 0.06651187688112259, "learning_rate": 9.72603272454704e-07, "loss": 0.0277, "step": 193570 }, { "epoch": 2.06827287782467, "grad_norm": 0.04917449504137039, "learning_rate": 9.725977871618417e-07, "loss": 0.0114, "step": 193580 }, { "epoch": 2.0683797211389496, "grad_norm": 2.4002983570098877, "learning_rate": 9.72592301335381e-07, "loss": 0.0046, "step": 193590 }, { "epoch": 2.0684865644532295, "grad_norm": 1.9656182527542114, "learning_rate": 9.725868149753276e-07, "loss": 0.0199, "step": 193600 }, { "epoch": 2.068593407767509, "grad_norm": 1.7481138706207275, "learning_rate": 9.725813280816881e-07, "loss": 0.0075, "step": 193610 }, { "epoch": 2.0687002510817885, "grad_norm": 3.4129133224487305, "learning_rate": 9.725758406544687e-07, "loss": 0.065, "step": 193620 }, { "epoch": 2.0688070943960684, "grad_norm": 0.02820591814815998, "learning_rate": 9.725703526936756e-07, "loss": 0.0069, "step": 193630 }, { "epoch": 2.068913937710348, "grad_norm": 0.6142308712005615, "learning_rate": 9.725648641993147e-07, "loss": 0.0093, "step": 193640 }, { "epoch": 2.0690207810246273, "grad_norm": 0.16722846031188965, "learning_rate": 9.725593751713926e-07, "loss": 0.0379, "step": 193650 }, { "epoch": 2.069127624338907, "grad_norm": 0.009912253357470036, "learning_rate": 9.72553885609915e-07, "loss": 0.0105, "step": 193660 }, { "epoch": 2.0692344676531866, "grad_norm": 3.100351333618164, "learning_rate": 9.725483955148888e-07, "loss": 0.032, "step": 193670 }, { "epoch": 2.069341310967466, "grad_norm": 1.0250707864761353, "learning_rate": 9.725429048863196e-07, "loss": 0.0222, "step": 193680 }, { "epoch": 2.069448154281746, "grad_norm": 0.2956053912639618, "learning_rate": 9.725374137242137e-07, "loss": 0.024, "step": 193690 }, { "epoch": 2.0695549975960255, "grad_norm": 0.028331981971859932, "learning_rate": 9.725319220285774e-07, "loss": 0.0025, "step": 193700 }, { "epoch": 2.069661840910305, "grad_norm": 1.9225906133651733, "learning_rate": 9.72526429799417e-07, "loss": 0.0237, "step": 193710 }, { "epoch": 2.069768684224585, "grad_norm": 7.909395217895508, "learning_rate": 9.725209370367386e-07, "loss": 0.008, "step": 193720 }, { "epoch": 2.0698755275388643, "grad_norm": 0.8271335959434509, "learning_rate": 9.725154437405484e-07, "loss": 0.0256, "step": 193730 }, { "epoch": 2.0699823708531437, "grad_norm": 0.045384082943201065, "learning_rate": 9.725099499108526e-07, "loss": 0.0154, "step": 193740 }, { "epoch": 2.0700892141674236, "grad_norm": 7.554134368896484, "learning_rate": 9.725044555476574e-07, "loss": 0.0232, "step": 193750 }, { "epoch": 2.070196057481703, "grad_norm": 0.22741654515266418, "learning_rate": 9.72498960650969e-07, "loss": 0.0337, "step": 193760 }, { "epoch": 2.0703029007959826, "grad_norm": 0.0011679305462166667, "learning_rate": 9.724934652207937e-07, "loss": 0.0078, "step": 193770 }, { "epoch": 2.0704097441102625, "grad_norm": 0.20248715579509735, "learning_rate": 9.724879692571375e-07, "loss": 0.016, "step": 193780 }, { "epoch": 2.070516587424542, "grad_norm": 0.050173889845609665, "learning_rate": 9.724824727600066e-07, "loss": 0.0179, "step": 193790 }, { "epoch": 2.0706234307388214, "grad_norm": 3.0684773921966553, "learning_rate": 9.724769757294076e-07, "loss": 0.0084, "step": 193800 }, { "epoch": 2.0707302740531013, "grad_norm": 5.601446151733398, "learning_rate": 9.724714781653462e-07, "loss": 0.0544, "step": 193810 }, { "epoch": 2.0708371173673807, "grad_norm": 0.29004237055778503, "learning_rate": 9.72465980067829e-07, "loss": 0.0324, "step": 193820 }, { "epoch": 2.07094396068166, "grad_norm": 8.237303733825684, "learning_rate": 9.72460481436862e-07, "loss": 0.0217, "step": 193830 }, { "epoch": 2.07105080399594, "grad_norm": 0.01444829348474741, "learning_rate": 9.724549822724513e-07, "loss": 0.0352, "step": 193840 }, { "epoch": 2.0711576473102196, "grad_norm": 0.06597516685724258, "learning_rate": 9.724494825746034e-07, "loss": 0.041, "step": 193850 }, { "epoch": 2.071264490624499, "grad_norm": 1.114066243171692, "learning_rate": 9.724439823433246e-07, "loss": 0.0114, "step": 193860 }, { "epoch": 2.071371333938779, "grad_norm": 0.4371824562549591, "learning_rate": 9.724384815786206e-07, "loss": 0.0121, "step": 193870 }, { "epoch": 2.0714781772530584, "grad_norm": 0.3958548903465271, "learning_rate": 9.724329802804981e-07, "loss": 0.0265, "step": 193880 }, { "epoch": 2.071585020567338, "grad_norm": 1.5989500284194946, "learning_rate": 9.72427478448963e-07, "loss": 0.0113, "step": 193890 }, { "epoch": 2.0716918638816177, "grad_norm": 4.55867862701416, "learning_rate": 9.724219760840215e-07, "loss": 0.0108, "step": 193900 }, { "epoch": 2.071798707195897, "grad_norm": 7.728413105010986, "learning_rate": 9.724164731856801e-07, "loss": 0.0316, "step": 193910 }, { "epoch": 2.0719055505101767, "grad_norm": 0.041772134602069855, "learning_rate": 9.724109697539447e-07, "loss": 0.0138, "step": 193920 }, { "epoch": 2.0720123938244566, "grad_norm": 0.6652511954307556, "learning_rate": 9.72405465788822e-07, "loss": 0.0131, "step": 193930 }, { "epoch": 2.072119237138736, "grad_norm": 0.004353099036961794, "learning_rate": 9.723999612903173e-07, "loss": 0.0075, "step": 193940 }, { "epoch": 2.0722260804530155, "grad_norm": 21.625019073486328, "learning_rate": 9.723944562584378e-07, "loss": 0.0175, "step": 193950 }, { "epoch": 2.0723329237672954, "grad_norm": 7.848995208740234, "learning_rate": 9.723889506931893e-07, "loss": 0.0196, "step": 193960 }, { "epoch": 2.072439767081575, "grad_norm": 0.5771744847297668, "learning_rate": 9.72383444594578e-07, "loss": 0.0486, "step": 193970 }, { "epoch": 2.0725466103958543, "grad_norm": 1.371201753616333, "learning_rate": 9.7237793796261e-07, "loss": 0.0741, "step": 193980 }, { "epoch": 2.072653453710134, "grad_norm": 0.6872391700744629, "learning_rate": 9.723724307972917e-07, "loss": 0.0131, "step": 193990 }, { "epoch": 2.0727602970244137, "grad_norm": 0.13173064589500427, "learning_rate": 9.723669230986293e-07, "loss": 0.0548, "step": 194000 }, { "epoch": 2.072867140338693, "grad_norm": 5.781692028045654, "learning_rate": 9.72361414866629e-07, "loss": 0.0254, "step": 194010 }, { "epoch": 2.072973983652973, "grad_norm": 0.00928471703082323, "learning_rate": 9.723559061012972e-07, "loss": 0.0261, "step": 194020 }, { "epoch": 2.0730808269672525, "grad_norm": 6.497700214385986, "learning_rate": 9.723503968026396e-07, "loss": 0.0364, "step": 194030 }, { "epoch": 2.073187670281532, "grad_norm": 3.5822858810424805, "learning_rate": 9.723448869706629e-07, "loss": 0.0175, "step": 194040 }, { "epoch": 2.073294513595812, "grad_norm": 0.3266986906528473, "learning_rate": 9.723393766053733e-07, "loss": 0.0142, "step": 194050 }, { "epoch": 2.0734013569100913, "grad_norm": 3.951113700866699, "learning_rate": 9.723338657067767e-07, "loss": 0.0286, "step": 194060 }, { "epoch": 2.0735082002243708, "grad_norm": 6.196670055389404, "learning_rate": 9.723283542748797e-07, "loss": 0.0186, "step": 194070 }, { "epoch": 2.0736150435386507, "grad_norm": 11.03388786315918, "learning_rate": 9.72322842309688e-07, "loss": 0.0293, "step": 194080 }, { "epoch": 2.07372188685293, "grad_norm": 0.21879823505878448, "learning_rate": 9.723173298112086e-07, "loss": 0.0204, "step": 194090 }, { "epoch": 2.0738287301672096, "grad_norm": 0.15375885367393494, "learning_rate": 9.72311816779447e-07, "loss": 0.0107, "step": 194100 }, { "epoch": 2.0739355734814895, "grad_norm": 0.583535373210907, "learning_rate": 9.7230630321441e-07, "loss": 0.0285, "step": 194110 }, { "epoch": 2.074042416795769, "grad_norm": 0.0039006974548101425, "learning_rate": 9.723007891161032e-07, "loss": 0.0182, "step": 194120 }, { "epoch": 2.0741492601100484, "grad_norm": 0.26002296805381775, "learning_rate": 9.722952744845331e-07, "loss": 0.008, "step": 194130 }, { "epoch": 2.0742561034243283, "grad_norm": 0.34367817640304565, "learning_rate": 9.722897593197065e-07, "loss": 0.0077, "step": 194140 }, { "epoch": 2.0743629467386078, "grad_norm": 2.650592088699341, "learning_rate": 9.722842436216285e-07, "loss": 0.0031, "step": 194150 }, { "epoch": 2.0744697900528872, "grad_norm": 11.808355331420898, "learning_rate": 9.722787273903064e-07, "loss": 0.0659, "step": 194160 }, { "epoch": 2.074576633367167, "grad_norm": 2.5945348739624023, "learning_rate": 9.722732106257458e-07, "loss": 0.0416, "step": 194170 }, { "epoch": 2.0746834766814466, "grad_norm": 1.684296727180481, "learning_rate": 9.722676933279532e-07, "loss": 0.0255, "step": 194180 }, { "epoch": 2.074790319995726, "grad_norm": 5.651858806610107, "learning_rate": 9.722621754969346e-07, "loss": 0.0521, "step": 194190 }, { "epoch": 2.074897163310006, "grad_norm": 0.06781172007322311, "learning_rate": 9.722566571326964e-07, "loss": 0.0072, "step": 194200 }, { "epoch": 2.0750040066242854, "grad_norm": 0.056191038340330124, "learning_rate": 9.722511382352448e-07, "loss": 0.0025, "step": 194210 }, { "epoch": 2.0751108499385653, "grad_norm": 0.1499217003583908, "learning_rate": 9.72245618804586e-07, "loss": 0.012, "step": 194220 }, { "epoch": 2.0752176932528448, "grad_norm": 4.467488765716553, "learning_rate": 9.722400988407263e-07, "loss": 0.0139, "step": 194230 }, { "epoch": 2.0753245365671242, "grad_norm": 3.2402734756469727, "learning_rate": 9.722345783436718e-07, "loss": 0.0164, "step": 194240 }, { "epoch": 2.0754313798814037, "grad_norm": 0.1761067658662796, "learning_rate": 9.72229057313429e-07, "loss": 0.0095, "step": 194250 }, { "epoch": 2.0755382231956836, "grad_norm": 0.015675118193030357, "learning_rate": 9.722235357500038e-07, "loss": 0.0301, "step": 194260 }, { "epoch": 2.075645066509963, "grad_norm": 0.229217529296875, "learning_rate": 9.722180136534027e-07, "loss": 0.004, "step": 194270 }, { "epoch": 2.075751909824243, "grad_norm": 1.3573122024536133, "learning_rate": 9.722124910236317e-07, "loss": 0.0195, "step": 194280 }, { "epoch": 2.0758587531385224, "grad_norm": 5.5053277015686035, "learning_rate": 9.72206967860697e-07, "loss": 0.0063, "step": 194290 }, { "epoch": 2.075965596452802, "grad_norm": 0.00947502814233303, "learning_rate": 9.722014441646053e-07, "loss": 0.0242, "step": 194300 }, { "epoch": 2.076072439767082, "grad_norm": 2.7494349479675293, "learning_rate": 9.721959199353625e-07, "loss": 0.004, "step": 194310 }, { "epoch": 2.0761792830813612, "grad_norm": 0.004833376035094261, "learning_rate": 9.721903951729747e-07, "loss": 0.0108, "step": 194320 }, { "epoch": 2.0762861263956407, "grad_norm": 11.527798652648926, "learning_rate": 9.721848698774485e-07, "loss": 0.0228, "step": 194330 }, { "epoch": 2.0763929697099206, "grad_norm": 0.003758261911571026, "learning_rate": 9.721793440487897e-07, "loss": 0.0234, "step": 194340 }, { "epoch": 2.0764998130242, "grad_norm": 0.018203994259238243, "learning_rate": 9.721738176870051e-07, "loss": 0.0006, "step": 194350 }, { "epoch": 2.0766066563384795, "grad_norm": 8.874881744384766, "learning_rate": 9.721682907921003e-07, "loss": 0.0191, "step": 194360 }, { "epoch": 2.0767134996527594, "grad_norm": 0.007756393402814865, "learning_rate": 9.721627633640822e-07, "loss": 0.0172, "step": 194370 }, { "epoch": 2.076820342967039, "grad_norm": 0.711412250995636, "learning_rate": 9.721572354029564e-07, "loss": 0.0329, "step": 194380 }, { "epoch": 2.0769271862813183, "grad_norm": 4.983201503753662, "learning_rate": 9.721517069087295e-07, "loss": 0.0098, "step": 194390 }, { "epoch": 2.0770340295955982, "grad_norm": 1.786695122718811, "learning_rate": 9.721461778814077e-07, "loss": 0.0072, "step": 194400 }, { "epoch": 2.0771408729098777, "grad_norm": 0.13377493619918823, "learning_rate": 9.721406483209974e-07, "loss": 0.0169, "step": 194410 }, { "epoch": 2.077247716224157, "grad_norm": 2.0910887718200684, "learning_rate": 9.721351182275047e-07, "loss": 0.0143, "step": 194420 }, { "epoch": 2.077354559538437, "grad_norm": 4.618590354919434, "learning_rate": 9.721295876009356e-07, "loss": 0.0269, "step": 194430 }, { "epoch": 2.0774614028527165, "grad_norm": 1.5517674684524536, "learning_rate": 9.721240564412965e-07, "loss": 0.022, "step": 194440 }, { "epoch": 2.077568246166996, "grad_norm": 13.026262283325195, "learning_rate": 9.72118524748594e-07, "loss": 0.0148, "step": 194450 }, { "epoch": 2.077675089481276, "grad_norm": 1.2437472343444824, "learning_rate": 9.72112992522834e-07, "loss": 0.0106, "step": 194460 }, { "epoch": 2.0777819327955553, "grad_norm": 0.8890549540519714, "learning_rate": 9.721074597640227e-07, "loss": 0.011, "step": 194470 }, { "epoch": 2.077888776109835, "grad_norm": 0.003392321290448308, "learning_rate": 9.721019264721664e-07, "loss": 0.0262, "step": 194480 }, { "epoch": 2.0779956194241147, "grad_norm": 0.004114204086363316, "learning_rate": 9.720963926472715e-07, "loss": 0.011, "step": 194490 }, { "epoch": 2.078102462738394, "grad_norm": 0.5850096940994263, "learning_rate": 9.72090858289344e-07, "loss": 0.0103, "step": 194500 }, { "epoch": 2.0782093060526736, "grad_norm": 7.078842639923096, "learning_rate": 9.720853233983903e-07, "loss": 0.0082, "step": 194510 }, { "epoch": 2.0783161493669535, "grad_norm": 2.841559648513794, "learning_rate": 9.720797879744168e-07, "loss": 0.0171, "step": 194520 }, { "epoch": 2.078422992681233, "grad_norm": 0.2038360983133316, "learning_rate": 9.720742520174294e-07, "loss": 0.0078, "step": 194530 }, { "epoch": 2.0785298359955124, "grad_norm": 4.379631042480469, "learning_rate": 9.720687155274348e-07, "loss": 0.0092, "step": 194540 }, { "epoch": 2.0786366793097923, "grad_norm": 1.6732937097549438, "learning_rate": 9.720631785044386e-07, "loss": 0.0175, "step": 194550 }, { "epoch": 2.078743522624072, "grad_norm": 3.7043957710266113, "learning_rate": 9.720576409484478e-07, "loss": 0.0321, "step": 194560 }, { "epoch": 2.0788503659383513, "grad_norm": 12.25668716430664, "learning_rate": 9.720521028594681e-07, "loss": 0.0251, "step": 194570 }, { "epoch": 2.078957209252631, "grad_norm": 11.64278793334961, "learning_rate": 9.72046564237506e-07, "loss": 0.0587, "step": 194580 }, { "epoch": 2.0790640525669106, "grad_norm": 1.8023287057876587, "learning_rate": 9.720410250825677e-07, "loss": 0.0335, "step": 194590 }, { "epoch": 2.07917089588119, "grad_norm": 0.14958229660987854, "learning_rate": 9.720354853946596e-07, "loss": 0.0525, "step": 194600 }, { "epoch": 2.07927773919547, "grad_norm": 5.445193767547607, "learning_rate": 9.720299451737877e-07, "loss": 0.0234, "step": 194610 }, { "epoch": 2.0793845825097494, "grad_norm": 3.170722246170044, "learning_rate": 9.720244044199583e-07, "loss": 0.0397, "step": 194620 }, { "epoch": 2.079491425824029, "grad_norm": 0.7268801331520081, "learning_rate": 9.720188631331778e-07, "loss": 0.0151, "step": 194630 }, { "epoch": 2.079598269138309, "grad_norm": 8.625925064086914, "learning_rate": 9.720133213134523e-07, "loss": 0.0477, "step": 194640 }, { "epoch": 2.0797051124525883, "grad_norm": 0.008574665524065495, "learning_rate": 9.720077789607884e-07, "loss": 0.066, "step": 194650 }, { "epoch": 2.0798119557668677, "grad_norm": 0.8918961882591248, "learning_rate": 9.720022360751918e-07, "loss": 0.0331, "step": 194660 }, { "epoch": 2.0799187990811476, "grad_norm": 0.257975310087204, "learning_rate": 9.719966926566691e-07, "loss": 0.009, "step": 194670 }, { "epoch": 2.080025642395427, "grad_norm": 0.0562380813062191, "learning_rate": 9.719911487052265e-07, "loss": 0.0213, "step": 194680 }, { "epoch": 2.0801324857097065, "grad_norm": 9.845410346984863, "learning_rate": 9.719856042208705e-07, "loss": 0.0208, "step": 194690 }, { "epoch": 2.0802393290239864, "grad_norm": 0.11362313479185104, "learning_rate": 9.719800592036068e-07, "loss": 0.0396, "step": 194700 }, { "epoch": 2.080346172338266, "grad_norm": 0.24089555442333221, "learning_rate": 9.719745136534422e-07, "loss": 0.0097, "step": 194710 }, { "epoch": 2.0804530156525454, "grad_norm": 0.09076370298862457, "learning_rate": 9.719689675703826e-07, "loss": 0.0056, "step": 194720 }, { "epoch": 2.0805598589668253, "grad_norm": 2.976280450820923, "learning_rate": 9.719634209544347e-07, "loss": 0.0111, "step": 194730 }, { "epoch": 2.0806667022811047, "grad_norm": 2.0965113639831543, "learning_rate": 9.719578738056043e-07, "loss": 0.0103, "step": 194740 }, { "epoch": 2.080773545595384, "grad_norm": 4.059394836425781, "learning_rate": 9.719523261238977e-07, "loss": 0.0232, "step": 194750 }, { "epoch": 2.080880388909664, "grad_norm": 9.074726104736328, "learning_rate": 9.719467779093216e-07, "loss": 0.0056, "step": 194760 }, { "epoch": 2.0809872322239436, "grad_norm": 0.07062669843435287, "learning_rate": 9.719412291618817e-07, "loss": 0.0202, "step": 194770 }, { "epoch": 2.081094075538223, "grad_norm": 1.4991556406021118, "learning_rate": 9.719356798815846e-07, "loss": 0.0225, "step": 194780 }, { "epoch": 2.081200918852503, "grad_norm": 0.411877304315567, "learning_rate": 9.719301300684364e-07, "loss": 0.0122, "step": 194790 }, { "epoch": 2.0813077621667824, "grad_norm": 0.6243487000465393, "learning_rate": 9.719245797224436e-07, "loss": 0.0693, "step": 194800 }, { "epoch": 2.081414605481062, "grad_norm": 0.08764342963695526, "learning_rate": 9.719190288436124e-07, "loss": 0.0457, "step": 194810 }, { "epoch": 2.0815214487953417, "grad_norm": 9.46738052368164, "learning_rate": 9.71913477431949e-07, "loss": 0.0555, "step": 194820 }, { "epoch": 2.081628292109621, "grad_norm": 1.2642573118209839, "learning_rate": 9.719079254874596e-07, "loss": 0.0098, "step": 194830 }, { "epoch": 2.0817351354239007, "grad_norm": 15.952704429626465, "learning_rate": 9.719023730101506e-07, "loss": 0.1172, "step": 194840 }, { "epoch": 2.0818419787381806, "grad_norm": 13.366103172302246, "learning_rate": 9.71896820000028e-07, "loss": 0.0768, "step": 194850 }, { "epoch": 2.08194882205246, "grad_norm": 0.30531278252601624, "learning_rate": 9.718912664570984e-07, "loss": 0.0145, "step": 194860 }, { "epoch": 2.0820556653667395, "grad_norm": 0.1988750696182251, "learning_rate": 9.71885712381368e-07, "loss": 0.0162, "step": 194870 }, { "epoch": 2.0821625086810194, "grad_norm": 2.545501232147217, "learning_rate": 9.718801577728431e-07, "loss": 0.0051, "step": 194880 }, { "epoch": 2.082269351995299, "grad_norm": 6.615373611450195, "learning_rate": 9.718746026315297e-07, "loss": 0.039, "step": 194890 }, { "epoch": 2.0823761953095783, "grad_norm": 8.730880737304688, "learning_rate": 9.718690469574343e-07, "loss": 0.0202, "step": 194900 }, { "epoch": 2.082483038623858, "grad_norm": 0.415210098028183, "learning_rate": 9.71863490750563e-07, "loss": 0.0181, "step": 194910 }, { "epoch": 2.0825898819381377, "grad_norm": 3.369817018508911, "learning_rate": 9.718579340109223e-07, "loss": 0.0233, "step": 194920 }, { "epoch": 2.0826967252524176, "grad_norm": 0.031290553510189056, "learning_rate": 9.718523767385185e-07, "loss": 0.0088, "step": 194930 }, { "epoch": 2.082803568566697, "grad_norm": 0.13510830700397491, "learning_rate": 9.718468189333578e-07, "loss": 0.0051, "step": 194940 }, { "epoch": 2.0829104118809765, "grad_norm": 10.958857536315918, "learning_rate": 9.718412605954463e-07, "loss": 0.0267, "step": 194950 }, { "epoch": 2.083017255195256, "grad_norm": 0.04125111922621727, "learning_rate": 9.718357017247904e-07, "loss": 0.0079, "step": 194960 }, { "epoch": 2.083124098509536, "grad_norm": 1.2428498268127441, "learning_rate": 9.718301423213963e-07, "loss": 0.0484, "step": 194970 }, { "epoch": 2.0832309418238153, "grad_norm": 0.09797652065753937, "learning_rate": 9.718245823852706e-07, "loss": 0.0083, "step": 194980 }, { "epoch": 2.083337785138095, "grad_norm": 0.030481642112135887, "learning_rate": 9.718190219164192e-07, "loss": 0.003, "step": 194990 }, { "epoch": 2.0834446284523747, "grad_norm": 0.03244541212916374, "learning_rate": 9.718134609148486e-07, "loss": 0.0053, "step": 195000 }, { "epoch": 2.083551471766654, "grad_norm": 0.27561306953430176, "learning_rate": 9.718078993805648e-07, "loss": 0.0309, "step": 195010 }, { "epoch": 2.083658315080934, "grad_norm": 1.3033196926116943, "learning_rate": 9.718023373135744e-07, "loss": 0.018, "step": 195020 }, { "epoch": 2.0837651583952135, "grad_norm": 0.6648383736610413, "learning_rate": 9.717967747138835e-07, "loss": 0.0878, "step": 195030 }, { "epoch": 2.083872001709493, "grad_norm": 0.2067103534936905, "learning_rate": 9.717912115814986e-07, "loss": 0.0297, "step": 195040 }, { "epoch": 2.083978845023773, "grad_norm": 0.01529950276017189, "learning_rate": 9.717856479164256e-07, "loss": 0.0104, "step": 195050 }, { "epoch": 2.0840856883380523, "grad_norm": 0.3921493887901306, "learning_rate": 9.71780083718671e-07, "loss": 0.0047, "step": 195060 }, { "epoch": 2.0841925316523318, "grad_norm": 0.1328357309103012, "learning_rate": 9.717745189882412e-07, "loss": 0.0078, "step": 195070 }, { "epoch": 2.0842993749666117, "grad_norm": 4.564439296722412, "learning_rate": 9.717689537251422e-07, "loss": 0.0163, "step": 195080 }, { "epoch": 2.084406218280891, "grad_norm": 5.690310478210449, "learning_rate": 9.717633879293807e-07, "loss": 0.013, "step": 195090 }, { "epoch": 2.0845130615951706, "grad_norm": 2.698542356491089, "learning_rate": 9.717578216009626e-07, "loss": 0.0187, "step": 195100 }, { "epoch": 2.0846199049094505, "grad_norm": 0.012935279868543148, "learning_rate": 9.717522547398943e-07, "loss": 0.0205, "step": 195110 }, { "epoch": 2.08472674822373, "grad_norm": 0.005194897763431072, "learning_rate": 9.717466873461822e-07, "loss": 0.0288, "step": 195120 }, { "epoch": 2.0848335915380094, "grad_norm": 2.640977621078491, "learning_rate": 9.717411194198324e-07, "loss": 0.0092, "step": 195130 }, { "epoch": 2.0849404348522893, "grad_norm": 7.815374374389648, "learning_rate": 9.71735550960851e-07, "loss": 0.0335, "step": 195140 }, { "epoch": 2.0850472781665688, "grad_norm": 1.406785488128662, "learning_rate": 9.717299819692452e-07, "loss": 0.0241, "step": 195150 }, { "epoch": 2.085154121480848, "grad_norm": 0.10745742172002792, "learning_rate": 9.7172441244502e-07, "loss": 0.0099, "step": 195160 }, { "epoch": 2.085260964795128, "grad_norm": 0.37206965684890747, "learning_rate": 9.717188423881826e-07, "loss": 0.0126, "step": 195170 }, { "epoch": 2.0853678081094076, "grad_norm": 0.03853815048933029, "learning_rate": 9.71713271798739e-07, "loss": 0.0235, "step": 195180 }, { "epoch": 2.085474651423687, "grad_norm": 0.019737057387828827, "learning_rate": 9.717077006766956e-07, "loss": 0.0155, "step": 195190 }, { "epoch": 2.085581494737967, "grad_norm": 0.08842750638723373, "learning_rate": 9.717021290220586e-07, "loss": 0.0061, "step": 195200 }, { "epoch": 2.0856883380522464, "grad_norm": 0.2606051564216614, "learning_rate": 9.716965568348343e-07, "loss": 0.0256, "step": 195210 }, { "epoch": 2.085795181366526, "grad_norm": 1.5654654502868652, "learning_rate": 9.716909841150288e-07, "loss": 0.0205, "step": 195220 }, { "epoch": 2.0859020246808058, "grad_norm": 1.6143388748168945, "learning_rate": 9.716854108626488e-07, "loss": 0.019, "step": 195230 }, { "epoch": 2.0860088679950852, "grad_norm": 0.1779712587594986, "learning_rate": 9.716798370777002e-07, "loss": 0.1034, "step": 195240 }, { "epoch": 2.0861157113093647, "grad_norm": 1.1247234344482422, "learning_rate": 9.716742627601898e-07, "loss": 0.0271, "step": 195250 }, { "epoch": 2.0862225546236446, "grad_norm": 0.07473494112491608, "learning_rate": 9.71668687910123e-07, "loss": 0.0339, "step": 195260 }, { "epoch": 2.086329397937924, "grad_norm": 1.7696151733398438, "learning_rate": 9.71663112527507e-07, "loss": 0.0254, "step": 195270 }, { "epoch": 2.0864362412522035, "grad_norm": 0.03395712003111839, "learning_rate": 9.71657536612348e-07, "loss": 0.0052, "step": 195280 }, { "epoch": 2.0865430845664834, "grad_norm": 0.032778192311525345, "learning_rate": 9.716519601646515e-07, "loss": 0.0111, "step": 195290 }, { "epoch": 2.086649927880763, "grad_norm": 10.064840316772461, "learning_rate": 9.716463831844246e-07, "loss": 0.0285, "step": 195300 }, { "epoch": 2.0867567711950423, "grad_norm": 0.377414733171463, "learning_rate": 9.716408056716735e-07, "loss": 0.0294, "step": 195310 }, { "epoch": 2.0868636145093222, "grad_norm": 0.09705977141857147, "learning_rate": 9.71635227626404e-07, "loss": 0.0127, "step": 195320 }, { "epoch": 2.0869704578236017, "grad_norm": 0.5020758509635925, "learning_rate": 9.716296490486229e-07, "loss": 0.0305, "step": 195330 }, { "epoch": 2.087077301137881, "grad_norm": 0.04716639965772629, "learning_rate": 9.716240699383364e-07, "loss": 0.0204, "step": 195340 }, { "epoch": 2.087184144452161, "grad_norm": 7.1395158767700195, "learning_rate": 9.716184902955506e-07, "loss": 0.0185, "step": 195350 }, { "epoch": 2.0872909877664405, "grad_norm": 1.3659707307815552, "learning_rate": 9.71612910120272e-07, "loss": 0.0051, "step": 195360 }, { "epoch": 2.08739783108072, "grad_norm": 0.028271617367863655, "learning_rate": 9.71607329412507e-07, "loss": 0.0073, "step": 195370 }, { "epoch": 2.087504674395, "grad_norm": 3.548414707183838, "learning_rate": 9.716017481722614e-07, "loss": 0.0039, "step": 195380 }, { "epoch": 2.0876115177092793, "grad_norm": 0.0029623848386108875, "learning_rate": 9.71596166399542e-07, "loss": 0.0324, "step": 195390 }, { "epoch": 2.087718361023559, "grad_norm": 0.007214424200356007, "learning_rate": 9.715905840943547e-07, "loss": 0.0111, "step": 195400 }, { "epoch": 2.0878252043378387, "grad_norm": 6.275242805480957, "learning_rate": 9.715850012567063e-07, "loss": 0.0261, "step": 195410 }, { "epoch": 2.087932047652118, "grad_norm": 15.929028511047363, "learning_rate": 9.715794178866029e-07, "loss": 0.0363, "step": 195420 }, { "epoch": 2.0880388909663976, "grad_norm": 0.01907062530517578, "learning_rate": 9.715738339840507e-07, "loss": 0.0102, "step": 195430 }, { "epoch": 2.0881457342806775, "grad_norm": 0.06756541132926941, "learning_rate": 9.71568249549056e-07, "loss": 0.0506, "step": 195440 }, { "epoch": 2.088252577594957, "grad_norm": 0.26650139689445496, "learning_rate": 9.715626645816252e-07, "loss": 0.025, "step": 195450 }, { "epoch": 2.0883594209092364, "grad_norm": 0.5167204141616821, "learning_rate": 9.715570790817645e-07, "loss": 0.0095, "step": 195460 }, { "epoch": 2.0884662642235163, "grad_norm": 0.49057459831237793, "learning_rate": 9.715514930494801e-07, "loss": 0.0151, "step": 195470 }, { "epoch": 2.088573107537796, "grad_norm": 2.326233148574829, "learning_rate": 9.715459064847786e-07, "loss": 0.0073, "step": 195480 }, { "epoch": 2.0886799508520753, "grad_norm": 0.25458866357803345, "learning_rate": 9.715403193876664e-07, "loss": 0.0059, "step": 195490 }, { "epoch": 2.088786794166355, "grad_norm": 0.2459399402141571, "learning_rate": 9.715347317581494e-07, "loss": 0.0237, "step": 195500 }, { "epoch": 2.0888936374806346, "grad_norm": 0.10852843523025513, "learning_rate": 9.71529143596234e-07, "loss": 0.0047, "step": 195510 }, { "epoch": 2.089000480794914, "grad_norm": 0.006083607207983732, "learning_rate": 9.715235549019269e-07, "loss": 0.035, "step": 195520 }, { "epoch": 2.089107324109194, "grad_norm": 1.454377293586731, "learning_rate": 9.71517965675234e-07, "loss": 0.0119, "step": 195530 }, { "epoch": 2.0892141674234734, "grad_norm": 1.6339356899261475, "learning_rate": 9.715123759161615e-07, "loss": 0.0136, "step": 195540 }, { "epoch": 2.089321010737753, "grad_norm": 2.394566059112549, "learning_rate": 9.715067856247161e-07, "loss": 0.0155, "step": 195550 }, { "epoch": 2.089427854052033, "grad_norm": 4.374111652374268, "learning_rate": 9.71501194800904e-07, "loss": 0.0403, "step": 195560 }, { "epoch": 2.0895346973663123, "grad_norm": 4.235976696014404, "learning_rate": 9.714956034447314e-07, "loss": 0.0102, "step": 195570 }, { "epoch": 2.0896415406805917, "grad_norm": 0.024144276976585388, "learning_rate": 9.714900115562048e-07, "loss": 0.0118, "step": 195580 }, { "epoch": 2.0897483839948716, "grad_norm": 13.640806198120117, "learning_rate": 9.714844191353303e-07, "loss": 0.0654, "step": 195590 }, { "epoch": 2.089855227309151, "grad_norm": 1.5865795612335205, "learning_rate": 9.714788261821142e-07, "loss": 0.0275, "step": 195600 }, { "epoch": 2.0899620706234305, "grad_norm": 10.90941333770752, "learning_rate": 9.71473232696563e-07, "loss": 0.0372, "step": 195610 }, { "epoch": 2.0900689139377104, "grad_norm": 0.16114704310894012, "learning_rate": 9.71467638678683e-07, "loss": 0.0299, "step": 195620 }, { "epoch": 2.09017575725199, "grad_norm": 0.6876720190048218, "learning_rate": 9.714620441284806e-07, "loss": 0.013, "step": 195630 }, { "epoch": 2.0902826005662694, "grad_norm": 5.481914520263672, "learning_rate": 9.714564490459617e-07, "loss": 0.0131, "step": 195640 }, { "epoch": 2.0903894438805493, "grad_norm": 1.138022780418396, "learning_rate": 9.71450853431133e-07, "loss": 0.0273, "step": 195650 }, { "epoch": 2.0904962871948287, "grad_norm": 0.10103143751621246, "learning_rate": 9.714452572840007e-07, "loss": 0.0255, "step": 195660 }, { "epoch": 2.090603130509108, "grad_norm": 1.8443495035171509, "learning_rate": 9.714396606045713e-07, "loss": 0.0364, "step": 195670 }, { "epoch": 2.090709973823388, "grad_norm": 7.345730781555176, "learning_rate": 9.714340633928506e-07, "loss": 0.0423, "step": 195680 }, { "epoch": 2.0908168171376675, "grad_norm": 0.03877220302820206, "learning_rate": 9.714284656488454e-07, "loss": 0.0218, "step": 195690 }, { "epoch": 2.0909236604519474, "grad_norm": 2.0657215118408203, "learning_rate": 9.71422867372562e-07, "loss": 0.0109, "step": 195700 }, { "epoch": 2.091030503766227, "grad_norm": 0.038039300590753555, "learning_rate": 9.714172685640063e-07, "loss": 0.0208, "step": 195710 }, { "epoch": 2.0911373470805064, "grad_norm": 0.041681673377752304, "learning_rate": 9.714116692231851e-07, "loss": 0.0026, "step": 195720 }, { "epoch": 2.091244190394786, "grad_norm": 3.1952626705169678, "learning_rate": 9.714060693501048e-07, "loss": 0.0151, "step": 195730 }, { "epoch": 2.0913510337090657, "grad_norm": 0.20054568350315094, "learning_rate": 9.71400468944771e-07, "loss": 0.0147, "step": 195740 }, { "epoch": 2.091457877023345, "grad_norm": 0.21837256848812103, "learning_rate": 9.713948680071908e-07, "loss": 0.0061, "step": 195750 }, { "epoch": 2.091564720337625, "grad_norm": 2.1210954189300537, "learning_rate": 9.713892665373701e-07, "loss": 0.0211, "step": 195760 }, { "epoch": 2.0916715636519045, "grad_norm": 4.041661739349365, "learning_rate": 9.713836645353155e-07, "loss": 0.0151, "step": 195770 }, { "epoch": 2.091778406966184, "grad_norm": 0.0651310458779335, "learning_rate": 9.71378062001033e-07, "loss": 0.0009, "step": 195780 }, { "epoch": 2.091885250280464, "grad_norm": 6.779961585998535, "learning_rate": 9.71372458934529e-07, "loss": 0.0155, "step": 195790 }, { "epoch": 2.0919920935947434, "grad_norm": 0.0012225706595927477, "learning_rate": 9.7136685533581e-07, "loss": 0.0089, "step": 195800 }, { "epoch": 2.092098936909023, "grad_norm": 0.17800749838352203, "learning_rate": 9.713612512048825e-07, "loss": 0.012, "step": 195810 }, { "epoch": 2.0922057802233027, "grad_norm": 2.008549928665161, "learning_rate": 9.713556465417525e-07, "loss": 0.0407, "step": 195820 }, { "epoch": 2.092312623537582, "grad_norm": 4.449791431427002, "learning_rate": 9.713500413464262e-07, "loss": 0.0179, "step": 195830 }, { "epoch": 2.0924194668518616, "grad_norm": 0.1116226315498352, "learning_rate": 9.713444356189103e-07, "loss": 0.0183, "step": 195840 }, { "epoch": 2.0925263101661415, "grad_norm": 1.183433175086975, "learning_rate": 9.713388293592108e-07, "loss": 0.0226, "step": 195850 }, { "epoch": 2.092633153480421, "grad_norm": 2.1439836025238037, "learning_rate": 9.713332225673341e-07, "loss": 0.0215, "step": 195860 }, { "epoch": 2.0927399967947005, "grad_norm": 1.0289485454559326, "learning_rate": 9.713276152432867e-07, "loss": 0.0264, "step": 195870 }, { "epoch": 2.0928468401089804, "grad_norm": 7.7143073081970215, "learning_rate": 9.713220073870752e-07, "loss": 0.0304, "step": 195880 }, { "epoch": 2.09295368342326, "grad_norm": 0.24700546264648438, "learning_rate": 9.713163989987053e-07, "loss": 0.0362, "step": 195890 }, { "epoch": 2.0930605267375393, "grad_norm": 0.0056617469526827335, "learning_rate": 9.713107900781836e-07, "loss": 0.0444, "step": 195900 }, { "epoch": 2.093167370051819, "grad_norm": 0.012155266478657722, "learning_rate": 9.713051806255163e-07, "loss": 0.0341, "step": 195910 }, { "epoch": 2.0932742133660986, "grad_norm": 2.4126992225646973, "learning_rate": 9.712995706407102e-07, "loss": 0.0739, "step": 195920 }, { "epoch": 2.093381056680378, "grad_norm": 1.7558499574661255, "learning_rate": 9.71293960123771e-07, "loss": 0.0146, "step": 195930 }, { "epoch": 2.093487899994658, "grad_norm": 0.6549064517021179, "learning_rate": 9.712883490747057e-07, "loss": 0.0531, "step": 195940 }, { "epoch": 2.0935947433089375, "grad_norm": 0.008360839448869228, "learning_rate": 9.7128273749352e-07, "loss": 0.0169, "step": 195950 }, { "epoch": 2.093701586623217, "grad_norm": 0.5868673324584961, "learning_rate": 9.712771253802206e-07, "loss": 0.0109, "step": 195960 }, { "epoch": 2.093808429937497, "grad_norm": 0.014350784942507744, "learning_rate": 9.712715127348137e-07, "loss": 0.0005, "step": 195970 }, { "epoch": 2.0939152732517763, "grad_norm": 8.221288681030273, "learning_rate": 9.712658995573057e-07, "loss": 0.0198, "step": 195980 }, { "epoch": 2.0940221165660557, "grad_norm": 3.306173086166382, "learning_rate": 9.712602858477031e-07, "loss": 0.0092, "step": 195990 }, { "epoch": 2.0941289598803356, "grad_norm": 0.32843562960624695, "learning_rate": 9.71254671606012e-07, "loss": 0.0156, "step": 196000 }, { "epoch": 2.094235803194615, "grad_norm": 0.02883586473762989, "learning_rate": 9.712490568322387e-07, "loss": 0.0301, "step": 196010 }, { "epoch": 2.0943426465088946, "grad_norm": 3.4436724185943604, "learning_rate": 9.712434415263897e-07, "loss": 0.0235, "step": 196020 }, { "epoch": 2.0944494898231745, "grad_norm": 9.66778564453125, "learning_rate": 9.712378256884713e-07, "loss": 0.0207, "step": 196030 }, { "epoch": 2.094556333137454, "grad_norm": 0.04733042046427727, "learning_rate": 9.712322093184899e-07, "loss": 0.0155, "step": 196040 }, { "epoch": 2.0946631764517334, "grad_norm": 0.02612987719476223, "learning_rate": 9.712265924164516e-07, "loss": 0.0218, "step": 196050 }, { "epoch": 2.0947700197660133, "grad_norm": 0.005861944053322077, "learning_rate": 9.712209749823632e-07, "loss": 0.0108, "step": 196060 }, { "epoch": 2.0948768630802928, "grad_norm": 2.6667404174804688, "learning_rate": 9.712153570162305e-07, "loss": 0.0081, "step": 196070 }, { "epoch": 2.094983706394572, "grad_norm": 0.5953145623207092, "learning_rate": 9.712097385180603e-07, "loss": 0.0213, "step": 196080 }, { "epoch": 2.095090549708852, "grad_norm": 2.226611614227295, "learning_rate": 9.712041194878585e-07, "loss": 0.0156, "step": 196090 }, { "epoch": 2.0951973930231316, "grad_norm": 0.03194788843393326, "learning_rate": 9.711984999256318e-07, "loss": 0.0111, "step": 196100 }, { "epoch": 2.095304236337411, "grad_norm": 2.4010653495788574, "learning_rate": 9.711928798313864e-07, "loss": 0.019, "step": 196110 }, { "epoch": 2.095411079651691, "grad_norm": 0.08255940675735474, "learning_rate": 9.711872592051287e-07, "loss": 0.0092, "step": 196120 }, { "epoch": 2.0955179229659704, "grad_norm": 3.3411684036254883, "learning_rate": 9.711816380468649e-07, "loss": 0.0377, "step": 196130 }, { "epoch": 2.09562476628025, "grad_norm": 3.6785531044006348, "learning_rate": 9.711760163566017e-07, "loss": 0.0182, "step": 196140 }, { "epoch": 2.0957316095945298, "grad_norm": 8.092032432556152, "learning_rate": 9.71170394134345e-07, "loss": 0.0109, "step": 196150 }, { "epoch": 2.095838452908809, "grad_norm": 1.8058756589889526, "learning_rate": 9.711647713801015e-07, "loss": 0.0276, "step": 196160 }, { "epoch": 2.0959452962230887, "grad_norm": 1.1421265602111816, "learning_rate": 9.711591480938775e-07, "loss": 0.0253, "step": 196170 }, { "epoch": 2.0960521395373686, "grad_norm": 2.0107645988464355, "learning_rate": 9.71153524275679e-07, "loss": 0.0159, "step": 196180 }, { "epoch": 2.096158982851648, "grad_norm": 0.3340972363948822, "learning_rate": 9.71147899925513e-07, "loss": 0.025, "step": 196190 }, { "epoch": 2.0962658261659275, "grad_norm": 0.010560154914855957, "learning_rate": 9.71142275043385e-07, "loss": 0.0168, "step": 196200 }, { "epoch": 2.0963726694802074, "grad_norm": 0.0374472439289093, "learning_rate": 9.71136649629302e-07, "loss": 0.0446, "step": 196210 }, { "epoch": 2.096479512794487, "grad_norm": 4.77492094039917, "learning_rate": 9.711310236832704e-07, "loss": 0.0649, "step": 196220 }, { "epoch": 2.0965863561087663, "grad_norm": 0.3224993646144867, "learning_rate": 9.71125397205296e-07, "loss": 0.0486, "step": 196230 }, { "epoch": 2.096693199423046, "grad_norm": 0.08860503137111664, "learning_rate": 9.711197701953856e-07, "loss": 0.037, "step": 196240 }, { "epoch": 2.0968000427373257, "grad_norm": 0.11862192302942276, "learning_rate": 9.711141426535454e-07, "loss": 0.0395, "step": 196250 }, { "epoch": 2.096906886051605, "grad_norm": 2.974318742752075, "learning_rate": 9.711085145797819e-07, "loss": 0.0165, "step": 196260 }, { "epoch": 2.097013729365885, "grad_norm": 10.628368377685547, "learning_rate": 9.711028859741011e-07, "loss": 0.0324, "step": 196270 }, { "epoch": 2.0971205726801645, "grad_norm": 0.15950754284858704, "learning_rate": 9.710972568365097e-07, "loss": 0.013, "step": 196280 }, { "epoch": 2.097227415994444, "grad_norm": 0.01563325710594654, "learning_rate": 9.71091627167014e-07, "loss": 0.0058, "step": 196290 }, { "epoch": 2.097334259308724, "grad_norm": 2.319060802459717, "learning_rate": 9.710859969656202e-07, "loss": 0.0157, "step": 196300 }, { "epoch": 2.0974411026230033, "grad_norm": 2.0032923221588135, "learning_rate": 9.710803662323348e-07, "loss": 0.0403, "step": 196310 }, { "epoch": 2.0975479459372828, "grad_norm": 5.248470306396484, "learning_rate": 9.710747349671642e-07, "loss": 0.0135, "step": 196320 }, { "epoch": 2.0976547892515627, "grad_norm": 0.1003139317035675, "learning_rate": 9.710691031701144e-07, "loss": 0.0132, "step": 196330 }, { "epoch": 2.097761632565842, "grad_norm": 9.092061042785645, "learning_rate": 9.710634708411924e-07, "loss": 0.0326, "step": 196340 }, { "epoch": 2.0978684758801216, "grad_norm": 14.556147575378418, "learning_rate": 9.71057837980404e-07, "loss": 0.1204, "step": 196350 }, { "epoch": 2.0979753191944015, "grad_norm": 0.02058768831193447, "learning_rate": 9.710522045877557e-07, "loss": 0.0174, "step": 196360 }, { "epoch": 2.098082162508681, "grad_norm": 0.1952555775642395, "learning_rate": 9.71046570663254e-07, "loss": 0.0172, "step": 196370 }, { "epoch": 2.0981890058229604, "grad_norm": 0.025426549836993217, "learning_rate": 9.71040936206905e-07, "loss": 0.0143, "step": 196380 }, { "epoch": 2.0982958491372403, "grad_norm": 0.415935218334198, "learning_rate": 9.710353012187153e-07, "loss": 0.0086, "step": 196390 }, { "epoch": 2.09840269245152, "grad_norm": 4.019481658935547, "learning_rate": 9.710296656986915e-07, "loss": 0.0137, "step": 196400 }, { "epoch": 2.0985095357657997, "grad_norm": 0.012219533324241638, "learning_rate": 9.710240296468392e-07, "loss": 0.0138, "step": 196410 }, { "epoch": 2.098616379080079, "grad_norm": 0.09756229817867279, "learning_rate": 9.710183930631654e-07, "loss": 0.0122, "step": 196420 }, { "epoch": 2.0987232223943586, "grad_norm": 4.765023231506348, "learning_rate": 9.710127559476764e-07, "loss": 0.0283, "step": 196430 }, { "epoch": 2.098830065708638, "grad_norm": 2.4250991344451904, "learning_rate": 9.710071183003783e-07, "loss": 0.0333, "step": 196440 }, { "epoch": 2.098936909022918, "grad_norm": 0.26994627714157104, "learning_rate": 9.710014801212777e-07, "loss": 0.0277, "step": 196450 }, { "epoch": 2.0990437523371974, "grad_norm": 0.04979449883103371, "learning_rate": 9.709958414103808e-07, "loss": 0.0381, "step": 196460 }, { "epoch": 2.0991505956514773, "grad_norm": 0.7331324815750122, "learning_rate": 9.70990202167694e-07, "loss": 0.018, "step": 196470 }, { "epoch": 2.099257438965757, "grad_norm": 7.022921562194824, "learning_rate": 9.70984562393224e-07, "loss": 0.0322, "step": 196480 }, { "epoch": 2.0993642822800362, "grad_norm": 0.04657026380300522, "learning_rate": 9.709789220869768e-07, "loss": 0.0203, "step": 196490 }, { "epoch": 2.099471125594316, "grad_norm": 0.06419666856527328, "learning_rate": 9.709732812489585e-07, "loss": 0.0105, "step": 196500 }, { "epoch": 2.0995779689085956, "grad_norm": 0.32311609387397766, "learning_rate": 9.70967639879176e-07, "loss": 0.0189, "step": 196510 }, { "epoch": 2.099684812222875, "grad_norm": 0.006708178669214249, "learning_rate": 9.709619979776357e-07, "loss": 0.0083, "step": 196520 }, { "epoch": 2.099791655537155, "grad_norm": 3.441465377807617, "learning_rate": 9.709563555443437e-07, "loss": 0.0154, "step": 196530 }, { "epoch": 2.0998984988514344, "grad_norm": 0.7157078385353088, "learning_rate": 9.709507125793064e-07, "loss": 0.0127, "step": 196540 }, { "epoch": 2.100005342165714, "grad_norm": 0.6304893493652344, "learning_rate": 9.709450690825302e-07, "loss": 0.0214, "step": 196550 }, { "epoch": 2.100112185479994, "grad_norm": 0.2783287465572357, "learning_rate": 9.709394250540213e-07, "loss": 0.0093, "step": 196560 }, { "epoch": 2.1002190287942732, "grad_norm": 0.5078169703483582, "learning_rate": 9.709337804937866e-07, "loss": 0.0079, "step": 196570 }, { "epoch": 2.1003258721085527, "grad_norm": 8.57306957244873, "learning_rate": 9.709281354018318e-07, "loss": 0.0506, "step": 196580 }, { "epoch": 2.1004327154228326, "grad_norm": 1.4307615756988525, "learning_rate": 9.709224897781635e-07, "loss": 0.0104, "step": 196590 }, { "epoch": 2.100539558737112, "grad_norm": 0.07725732773542404, "learning_rate": 9.709168436227885e-07, "loss": 0.0209, "step": 196600 }, { "epoch": 2.1006464020513915, "grad_norm": 0.3333362638950348, "learning_rate": 9.709111969357127e-07, "loss": 0.0104, "step": 196610 }, { "epoch": 2.1007532453656714, "grad_norm": 2.898761510848999, "learning_rate": 9.709055497169425e-07, "loss": 0.0177, "step": 196620 }, { "epoch": 2.100860088679951, "grad_norm": 3.5948667526245117, "learning_rate": 9.708999019664846e-07, "loss": 0.0306, "step": 196630 }, { "epoch": 2.1009669319942303, "grad_norm": 0.026904543861746788, "learning_rate": 9.70894253684345e-07, "loss": 0.0169, "step": 196640 }, { "epoch": 2.1010737753085102, "grad_norm": 0.4509771764278412, "learning_rate": 9.708886048705303e-07, "loss": 0.0112, "step": 196650 }, { "epoch": 2.1011806186227897, "grad_norm": 0.013719380833208561, "learning_rate": 9.70882955525047e-07, "loss": 0.0051, "step": 196660 }, { "epoch": 2.101287461937069, "grad_norm": 1.3397884368896484, "learning_rate": 9.70877305647901e-07, "loss": 0.0051, "step": 196670 }, { "epoch": 2.101394305251349, "grad_norm": 6.514270305633545, "learning_rate": 9.708716552390993e-07, "loss": 0.0194, "step": 196680 }, { "epoch": 2.1015011485656285, "grad_norm": 0.6914120316505432, "learning_rate": 9.708660042986475e-07, "loss": 0.0422, "step": 196690 }, { "epoch": 2.101607991879908, "grad_norm": 0.6464161276817322, "learning_rate": 9.708603528265528e-07, "loss": 0.0175, "step": 196700 }, { "epoch": 2.101714835194188, "grad_norm": 0.9534569978713989, "learning_rate": 9.708547008228212e-07, "loss": 0.0071, "step": 196710 }, { "epoch": 2.1018216785084674, "grad_norm": 3.469679117202759, "learning_rate": 9.708490482874592e-07, "loss": 0.0136, "step": 196720 }, { "epoch": 2.101928521822747, "grad_norm": 1.5490045547485352, "learning_rate": 9.708433952204728e-07, "loss": 0.0224, "step": 196730 }, { "epoch": 2.1020353651370267, "grad_norm": 2.9192049503326416, "learning_rate": 9.70837741621869e-07, "loss": 0.0478, "step": 196740 }, { "epoch": 2.102142208451306, "grad_norm": 1.041251540184021, "learning_rate": 9.708320874916536e-07, "loss": 0.0481, "step": 196750 }, { "epoch": 2.1022490517655856, "grad_norm": 0.06579424440860748, "learning_rate": 9.708264328298334e-07, "loss": 0.0428, "step": 196760 }, { "epoch": 2.1023558950798655, "grad_norm": 5.358817100524902, "learning_rate": 9.708207776364144e-07, "loss": 0.0313, "step": 196770 }, { "epoch": 2.102462738394145, "grad_norm": 1.4642493724822998, "learning_rate": 9.708151219114034e-07, "loss": 0.0341, "step": 196780 }, { "epoch": 2.1025695817084245, "grad_norm": 0.43148404359817505, "learning_rate": 9.708094656548067e-07, "loss": 0.0498, "step": 196790 }, { "epoch": 2.1026764250227044, "grad_norm": 0.3118983209133148, "learning_rate": 9.708038088666303e-07, "loss": 0.033, "step": 196800 }, { "epoch": 2.102783268336984, "grad_norm": 0.005035653244704008, "learning_rate": 9.70798151546881e-07, "loss": 0.0118, "step": 196810 }, { "epoch": 2.1028901116512633, "grad_norm": 0.011926887556910515, "learning_rate": 9.70792493695565e-07, "loss": 0.0173, "step": 196820 }, { "epoch": 2.102996954965543, "grad_norm": 0.07942261546850204, "learning_rate": 9.707868353126889e-07, "loss": 0.0045, "step": 196830 }, { "epoch": 2.1031037982798226, "grad_norm": 4.922708511352539, "learning_rate": 9.707811763982588e-07, "loss": 0.0169, "step": 196840 }, { "epoch": 2.103210641594102, "grad_norm": 3.6547722816467285, "learning_rate": 9.70775516952281e-07, "loss": 0.0143, "step": 196850 }, { "epoch": 2.103317484908382, "grad_norm": 2.2299795150756836, "learning_rate": 9.707698569747623e-07, "loss": 0.0098, "step": 196860 }, { "epoch": 2.1034243282226615, "grad_norm": 2.336540460586548, "learning_rate": 9.707641964657091e-07, "loss": 0.0284, "step": 196870 }, { "epoch": 2.103531171536941, "grad_norm": 0.9088403582572937, "learning_rate": 9.707585354251274e-07, "loss": 0.0153, "step": 196880 }, { "epoch": 2.103638014851221, "grad_norm": 0.3774225413799286, "learning_rate": 9.70752873853024e-07, "loss": 0.0496, "step": 196890 }, { "epoch": 2.1037448581655003, "grad_norm": 0.02192762866616249, "learning_rate": 9.70747211749405e-07, "loss": 0.0088, "step": 196900 }, { "epoch": 2.1038517014797797, "grad_norm": 1.771467685699463, "learning_rate": 9.707415491142765e-07, "loss": 0.0213, "step": 196910 }, { "epoch": 2.1039585447940596, "grad_norm": 0.18311834335327148, "learning_rate": 9.707358859476456e-07, "loss": 0.0212, "step": 196920 }, { "epoch": 2.104065388108339, "grad_norm": 1.9564286470413208, "learning_rate": 9.707302222495184e-07, "loss": 0.0101, "step": 196930 }, { "epoch": 2.1041722314226186, "grad_norm": 0.13955768942832947, "learning_rate": 9.70724558019901e-07, "loss": 0.0132, "step": 196940 }, { "epoch": 2.1042790747368985, "grad_norm": 0.0070711043663322926, "learning_rate": 9.707188932588002e-07, "loss": 0.0081, "step": 196950 }, { "epoch": 2.104385918051178, "grad_norm": 0.5640378594398499, "learning_rate": 9.707132279662222e-07, "loss": 0.0122, "step": 196960 }, { "epoch": 2.1044927613654574, "grad_norm": 1.9352630376815796, "learning_rate": 9.707075621421736e-07, "loss": 0.0393, "step": 196970 }, { "epoch": 2.1045996046797373, "grad_norm": 0.07717771083116531, "learning_rate": 9.707018957866603e-07, "loss": 0.0228, "step": 196980 }, { "epoch": 2.1047064479940167, "grad_norm": 0.041159939020872116, "learning_rate": 9.706962288996893e-07, "loss": 0.0076, "step": 196990 }, { "epoch": 2.104813291308296, "grad_norm": 0.19006237387657166, "learning_rate": 9.706905614812668e-07, "loss": 0.0434, "step": 197000 }, { "epoch": 2.104920134622576, "grad_norm": 17.191680908203125, "learning_rate": 9.706848935313988e-07, "loss": 0.045, "step": 197010 }, { "epoch": 2.1050269779368556, "grad_norm": 0.011108157224953175, "learning_rate": 9.706792250500923e-07, "loss": 0.0099, "step": 197020 }, { "epoch": 2.105133821251135, "grad_norm": 4.922554969787598, "learning_rate": 9.706735560373534e-07, "loss": 0.018, "step": 197030 }, { "epoch": 2.105240664565415, "grad_norm": 5.475356578826904, "learning_rate": 9.706678864931886e-07, "loss": 0.0277, "step": 197040 }, { "epoch": 2.1053475078796944, "grad_norm": 0.01264705415815115, "learning_rate": 9.706622164176042e-07, "loss": 0.0099, "step": 197050 }, { "epoch": 2.105454351193974, "grad_norm": 5.483910083770752, "learning_rate": 9.706565458106064e-07, "loss": 0.0512, "step": 197060 }, { "epoch": 2.1055611945082537, "grad_norm": 0.3382386267185211, "learning_rate": 9.706508746722021e-07, "loss": 0.0229, "step": 197070 }, { "epoch": 2.105668037822533, "grad_norm": 0.7713136076927185, "learning_rate": 9.706452030023974e-07, "loss": 0.0123, "step": 197080 }, { "epoch": 2.1057748811368127, "grad_norm": 0.888425886631012, "learning_rate": 9.706395308011986e-07, "loss": 0.0218, "step": 197090 }, { "epoch": 2.1058817244510926, "grad_norm": 0.015490268357098103, "learning_rate": 9.706338580686123e-07, "loss": 0.0317, "step": 197100 }, { "epoch": 2.105988567765372, "grad_norm": 2.897413969039917, "learning_rate": 9.70628184804645e-07, "loss": 0.0114, "step": 197110 }, { "epoch": 2.1060954110796515, "grad_norm": 2.143793821334839, "learning_rate": 9.706225110093026e-07, "loss": 0.0062, "step": 197120 }, { "epoch": 2.1062022543939314, "grad_norm": 0.14254677295684814, "learning_rate": 9.706168366825922e-07, "loss": 0.0132, "step": 197130 }, { "epoch": 2.106309097708211, "grad_norm": 0.6093930006027222, "learning_rate": 9.706111618245199e-07, "loss": 0.0071, "step": 197140 }, { "epoch": 2.1064159410224903, "grad_norm": 0.004726390354335308, "learning_rate": 9.706054864350919e-07, "loss": 0.0171, "step": 197150 }, { "epoch": 2.10652278433677, "grad_norm": 1.2265563011169434, "learning_rate": 9.70599810514315e-07, "loss": 0.0071, "step": 197160 }, { "epoch": 2.1066296276510497, "grad_norm": 0.04722324013710022, "learning_rate": 9.70594134062195e-07, "loss": 0.004, "step": 197170 }, { "epoch": 2.1067364709653296, "grad_norm": 0.04894256964325905, "learning_rate": 9.70588457078739e-07, "loss": 0.0188, "step": 197180 }, { "epoch": 2.106843314279609, "grad_norm": 4.144283294677734, "learning_rate": 9.705827795639532e-07, "loss": 0.0116, "step": 197190 }, { "epoch": 2.1069501575938885, "grad_norm": 4.390615463256836, "learning_rate": 9.705771015178439e-07, "loss": 0.0145, "step": 197200 }, { "epoch": 2.107057000908168, "grad_norm": 0.0798768550157547, "learning_rate": 9.705714229404174e-07, "loss": 0.0109, "step": 197210 }, { "epoch": 2.107163844222448, "grad_norm": 2.6290180683135986, "learning_rate": 9.705657438316803e-07, "loss": 0.0087, "step": 197220 }, { "epoch": 2.1072706875367273, "grad_norm": 2.7816200256347656, "learning_rate": 9.70560064191639e-07, "loss": 0.0243, "step": 197230 }, { "epoch": 2.107377530851007, "grad_norm": 0.014083260670304298, "learning_rate": 9.705543840202996e-07, "loss": 0.0144, "step": 197240 }, { "epoch": 2.1074843741652867, "grad_norm": 0.04444844275712967, "learning_rate": 9.705487033176691e-07, "loss": 0.0161, "step": 197250 }, { "epoch": 2.107591217479566, "grad_norm": 2.457669973373413, "learning_rate": 9.705430220837534e-07, "loss": 0.0247, "step": 197260 }, { "epoch": 2.107698060793846, "grad_norm": 1.9150625467300415, "learning_rate": 9.705373403185593e-07, "loss": 0.0235, "step": 197270 }, { "epoch": 2.1078049041081255, "grad_norm": 0.11073911190032959, "learning_rate": 9.705316580220929e-07, "loss": 0.0087, "step": 197280 }, { "epoch": 2.107911747422405, "grad_norm": 2.8636796474456787, "learning_rate": 9.705259751943608e-07, "loss": 0.0297, "step": 197290 }, { "epoch": 2.108018590736685, "grad_norm": 0.26038214564323425, "learning_rate": 9.705202918353694e-07, "loss": 0.0116, "step": 197300 }, { "epoch": 2.1081254340509643, "grad_norm": 5.076454162597656, "learning_rate": 9.705146079451249e-07, "loss": 0.0088, "step": 197310 }, { "epoch": 2.1082322773652438, "grad_norm": 1.9667762517929077, "learning_rate": 9.705089235236342e-07, "loss": 0.0219, "step": 197320 }, { "epoch": 2.1083391206795237, "grad_norm": 0.11311789602041245, "learning_rate": 9.705032385709031e-07, "loss": 0.0177, "step": 197330 }, { "epoch": 2.108445963993803, "grad_norm": 0.5541589856147766, "learning_rate": 9.704975530869386e-07, "loss": 0.07, "step": 197340 }, { "epoch": 2.1085528073080826, "grad_norm": 0.786144495010376, "learning_rate": 9.704918670717468e-07, "loss": 0.014, "step": 197350 }, { "epoch": 2.1086596506223625, "grad_norm": 12.184639930725098, "learning_rate": 9.70486180525334e-07, "loss": 0.0221, "step": 197360 }, { "epoch": 2.108766493936642, "grad_norm": 1.2041399478912354, "learning_rate": 9.704804934477068e-07, "loss": 0.0415, "step": 197370 }, { "epoch": 2.1088733372509214, "grad_norm": 0.516754150390625, "learning_rate": 9.704748058388718e-07, "loss": 0.0306, "step": 197380 }, { "epoch": 2.1089801805652013, "grad_norm": 0.0035742861218750477, "learning_rate": 9.704691176988352e-07, "loss": 0.0124, "step": 197390 }, { "epoch": 2.1090870238794808, "grad_norm": 0.01794566586613655, "learning_rate": 9.704634290276032e-07, "loss": 0.0503, "step": 197400 }, { "epoch": 2.1091938671937602, "grad_norm": 0.03831808269023895, "learning_rate": 9.704577398251829e-07, "loss": 0.0039, "step": 197410 }, { "epoch": 2.10930071050804, "grad_norm": 13.566018104553223, "learning_rate": 9.704520500915801e-07, "loss": 0.0569, "step": 197420 }, { "epoch": 2.1094075538223196, "grad_norm": 1.1908960342407227, "learning_rate": 9.704463598268013e-07, "loss": 0.0149, "step": 197430 }, { "epoch": 2.109514397136599, "grad_norm": 1.2867767810821533, "learning_rate": 9.704406690308533e-07, "loss": 0.0043, "step": 197440 }, { "epoch": 2.109621240450879, "grad_norm": 2.7570180892944336, "learning_rate": 9.70434977703742e-07, "loss": 0.0198, "step": 197450 }, { "epoch": 2.1097280837651584, "grad_norm": 2.3331544399261475, "learning_rate": 9.704292858454743e-07, "loss": 0.005, "step": 197460 }, { "epoch": 2.109834927079438, "grad_norm": 0.8715096116065979, "learning_rate": 9.704235934560563e-07, "loss": 0.0231, "step": 197470 }, { "epoch": 2.1099417703937178, "grad_norm": 0.4439932703971863, "learning_rate": 9.704179005354946e-07, "loss": 0.0045, "step": 197480 }, { "epoch": 2.1100486137079972, "grad_norm": 0.2283935248851776, "learning_rate": 9.704122070837957e-07, "loss": 0.0077, "step": 197490 }, { "epoch": 2.1101554570222767, "grad_norm": 7.862255573272705, "learning_rate": 9.70406513100966e-07, "loss": 0.0284, "step": 197500 }, { "epoch": 2.1102623003365566, "grad_norm": 0.03308764100074768, "learning_rate": 9.704008185870117e-07, "loss": 0.0093, "step": 197510 }, { "epoch": 2.110369143650836, "grad_norm": 0.006662700325250626, "learning_rate": 9.703951235419391e-07, "loss": 0.036, "step": 197520 }, { "epoch": 2.1104759869651155, "grad_norm": 0.6076493263244629, "learning_rate": 9.703894279657553e-07, "loss": 0.008, "step": 197530 }, { "epoch": 2.1105828302793954, "grad_norm": 0.027924582362174988, "learning_rate": 9.703837318584662e-07, "loss": 0.0344, "step": 197540 }, { "epoch": 2.110689673593675, "grad_norm": 0.04880299046635628, "learning_rate": 9.703780352200783e-07, "loss": 0.0135, "step": 197550 }, { "epoch": 2.1107965169079543, "grad_norm": 0.20315580070018768, "learning_rate": 9.703723380505982e-07, "loss": 0.0092, "step": 197560 }, { "epoch": 2.1109033602222342, "grad_norm": 45.194366455078125, "learning_rate": 9.703666403500323e-07, "loss": 0.0766, "step": 197570 }, { "epoch": 2.1110102035365137, "grad_norm": 0.008351386524736881, "learning_rate": 9.703609421183867e-07, "loss": 0.0131, "step": 197580 }, { "epoch": 2.111117046850793, "grad_norm": 3.302034616470337, "learning_rate": 9.703552433556683e-07, "loss": 0.0111, "step": 197590 }, { "epoch": 2.111223890165073, "grad_norm": 0.06719456613063812, "learning_rate": 9.703495440618834e-07, "loss": 0.0117, "step": 197600 }, { "epoch": 2.1113307334793525, "grad_norm": 0.8145293593406677, "learning_rate": 9.703438442370382e-07, "loss": 0.0206, "step": 197610 }, { "epoch": 2.111437576793632, "grad_norm": 1.2387380599975586, "learning_rate": 9.703381438811394e-07, "loss": 0.0622, "step": 197620 }, { "epoch": 2.111544420107912, "grad_norm": 0.01728706993162632, "learning_rate": 9.703324429941933e-07, "loss": 0.013, "step": 197630 }, { "epoch": 2.1116512634221913, "grad_norm": 0.1586998552083969, "learning_rate": 9.703267415762062e-07, "loss": 0.0052, "step": 197640 }, { "epoch": 2.111758106736471, "grad_norm": 5.632178783416748, "learning_rate": 9.70321039627185e-07, "loss": 0.0451, "step": 197650 }, { "epoch": 2.1118649500507507, "grad_norm": 2.637908697128296, "learning_rate": 9.703153371471357e-07, "loss": 0.0456, "step": 197660 }, { "epoch": 2.11197179336503, "grad_norm": 5.754884243011475, "learning_rate": 9.703096341360649e-07, "loss": 0.0068, "step": 197670 }, { "epoch": 2.1120786366793096, "grad_norm": 2.002145767211914, "learning_rate": 9.70303930593979e-07, "loss": 0.0343, "step": 197680 }, { "epoch": 2.1121854799935895, "grad_norm": 1.9593446254730225, "learning_rate": 9.702982265208846e-07, "loss": 0.0442, "step": 197690 }, { "epoch": 2.112292323307869, "grad_norm": 11.197585105895996, "learning_rate": 9.702925219167877e-07, "loss": 0.0281, "step": 197700 }, { "epoch": 2.1123991666221484, "grad_norm": 11.700369834899902, "learning_rate": 9.702868167816952e-07, "loss": 0.0191, "step": 197710 }, { "epoch": 2.1125060099364283, "grad_norm": 0.538049042224884, "learning_rate": 9.702811111156137e-07, "loss": 0.0162, "step": 197720 }, { "epoch": 2.112612853250708, "grad_norm": 0.00894258078187704, "learning_rate": 9.70275404918549e-07, "loss": 0.0077, "step": 197730 }, { "epoch": 2.1127196965649873, "grad_norm": 2.065020799636841, "learning_rate": 9.70269698190508e-07, "loss": 0.0057, "step": 197740 }, { "epoch": 2.112826539879267, "grad_norm": 0.039338141679763794, "learning_rate": 9.702639909314968e-07, "loss": 0.0232, "step": 197750 }, { "epoch": 2.1129333831935466, "grad_norm": 2.2663018703460693, "learning_rate": 9.702582831415223e-07, "loss": 0.0378, "step": 197760 }, { "epoch": 2.113040226507826, "grad_norm": 1.405385136604309, "learning_rate": 9.702525748205907e-07, "loss": 0.0155, "step": 197770 }, { "epoch": 2.113147069822106, "grad_norm": 0.13465146720409393, "learning_rate": 9.702468659687085e-07, "loss": 0.0102, "step": 197780 }, { "epoch": 2.1132539131363854, "grad_norm": 0.1362859159708023, "learning_rate": 9.70241156585882e-07, "loss": 0.0041, "step": 197790 }, { "epoch": 2.113360756450665, "grad_norm": 0.08494555205106735, "learning_rate": 9.702354466721176e-07, "loss": 0.0603, "step": 197800 }, { "epoch": 2.113467599764945, "grad_norm": 0.025495443493127823, "learning_rate": 9.70229736227422e-07, "loss": 0.0549, "step": 197810 }, { "epoch": 2.1135744430792243, "grad_norm": 0.003791405586525798, "learning_rate": 9.702240252518016e-07, "loss": 0.0022, "step": 197820 }, { "epoch": 2.1136812863935037, "grad_norm": 2.2246334552764893, "learning_rate": 9.702183137452627e-07, "loss": 0.0229, "step": 197830 }, { "epoch": 2.1137881297077836, "grad_norm": 2.101219654083252, "learning_rate": 9.702126017078118e-07, "loss": 0.0221, "step": 197840 }, { "epoch": 2.113894973022063, "grad_norm": 0.15553587675094604, "learning_rate": 9.702068891394557e-07, "loss": 0.0237, "step": 197850 }, { "epoch": 2.1140018163363425, "grad_norm": 0.16755110025405884, "learning_rate": 9.702011760402003e-07, "loss": 0.0218, "step": 197860 }, { "epoch": 2.1141086596506224, "grad_norm": 0.0825350433588028, "learning_rate": 9.701954624100521e-07, "loss": 0.0286, "step": 197870 }, { "epoch": 2.114215502964902, "grad_norm": 0.1460513323545456, "learning_rate": 9.70189748249018e-07, "loss": 0.0352, "step": 197880 }, { "epoch": 2.114322346279182, "grad_norm": 7.758638858795166, "learning_rate": 9.701840335571042e-07, "loss": 0.0207, "step": 197890 }, { "epoch": 2.1144291895934613, "grad_norm": 0.09723776578903198, "learning_rate": 9.701783183343169e-07, "loss": 0.0022, "step": 197900 }, { "epoch": 2.1145360329077407, "grad_norm": 0.00755043001845479, "learning_rate": 9.701726025806629e-07, "loss": 0.0064, "step": 197910 }, { "epoch": 2.11464287622202, "grad_norm": 0.007149643264710903, "learning_rate": 9.701668862961488e-07, "loss": 0.0135, "step": 197920 }, { "epoch": 2.1147497195363, "grad_norm": 0.014504976570606232, "learning_rate": 9.701611694807805e-07, "loss": 0.0363, "step": 197930 }, { "epoch": 2.1148565628505795, "grad_norm": 0.1662335842847824, "learning_rate": 9.701554521345648e-07, "loss": 0.014, "step": 197940 }, { "epoch": 2.1149634061648594, "grad_norm": 0.14315371215343475, "learning_rate": 9.701497342575082e-07, "loss": 0.0248, "step": 197950 }, { "epoch": 2.115070249479139, "grad_norm": 0.9322882890701294, "learning_rate": 9.70144015849617e-07, "loss": 0.0139, "step": 197960 }, { "epoch": 2.1151770927934184, "grad_norm": 0.030196750536561012, "learning_rate": 9.701382969108978e-07, "loss": 0.0131, "step": 197970 }, { "epoch": 2.1152839361076983, "grad_norm": 0.12249495089054108, "learning_rate": 9.701325774413568e-07, "loss": 0.0076, "step": 197980 }, { "epoch": 2.1153907794219777, "grad_norm": 1.961241364479065, "learning_rate": 9.701268574410007e-07, "loss": 0.02, "step": 197990 }, { "epoch": 2.115497622736257, "grad_norm": 0.2557215690612793, "learning_rate": 9.70121136909836e-07, "loss": 0.0056, "step": 198000 }, { "epoch": 2.115604466050537, "grad_norm": 2.1943020820617676, "learning_rate": 9.701154158478691e-07, "loss": 0.0274, "step": 198010 }, { "epoch": 2.1157113093648166, "grad_norm": 4.195426940917969, "learning_rate": 9.701096942551063e-07, "loss": 0.0068, "step": 198020 }, { "epoch": 2.115818152679096, "grad_norm": 0.011759525164961815, "learning_rate": 9.701039721315542e-07, "loss": 0.0182, "step": 198030 }, { "epoch": 2.115924995993376, "grad_norm": 1.0002129077911377, "learning_rate": 9.700982494772191e-07, "loss": 0.0217, "step": 198040 }, { "epoch": 2.1160318393076554, "grad_norm": 2.897775173187256, "learning_rate": 9.700925262921078e-07, "loss": 0.0191, "step": 198050 }, { "epoch": 2.116138682621935, "grad_norm": 5.03663969039917, "learning_rate": 9.700868025762266e-07, "loss": 0.019, "step": 198060 }, { "epoch": 2.1162455259362147, "grad_norm": 5.024557113647461, "learning_rate": 9.700810783295816e-07, "loss": 0.0586, "step": 198070 }, { "epoch": 2.116352369250494, "grad_norm": 2.1125004291534424, "learning_rate": 9.7007535355218e-07, "loss": 0.013, "step": 198080 }, { "epoch": 2.1164592125647737, "grad_norm": 0.30509963631629944, "learning_rate": 9.700696282440274e-07, "loss": 0.0098, "step": 198090 }, { "epoch": 2.1165660558790536, "grad_norm": 0.06669032573699951, "learning_rate": 9.700639024051312e-07, "loss": 0.0307, "step": 198100 }, { "epoch": 2.116672899193333, "grad_norm": 0.011803404428064823, "learning_rate": 9.700581760354972e-07, "loss": 0.0458, "step": 198110 }, { "epoch": 2.1167797425076125, "grad_norm": 0.052342839539051056, "learning_rate": 9.700524491351318e-07, "loss": 0.0204, "step": 198120 }, { "epoch": 2.1168865858218924, "grad_norm": 3.578965187072754, "learning_rate": 9.70046721704042e-07, "loss": 0.0444, "step": 198130 }, { "epoch": 2.116993429136172, "grad_norm": 0.410147488117218, "learning_rate": 9.700409937422338e-07, "loss": 0.0139, "step": 198140 }, { "epoch": 2.1171002724504513, "grad_norm": 7.11027193069458, "learning_rate": 9.70035265249714e-07, "loss": 0.0356, "step": 198150 }, { "epoch": 2.117207115764731, "grad_norm": 0.03209836408495903, "learning_rate": 9.700295362264886e-07, "loss": 0.0212, "step": 198160 }, { "epoch": 2.1173139590790107, "grad_norm": 0.689879298210144, "learning_rate": 9.700238066725645e-07, "loss": 0.0135, "step": 198170 }, { "epoch": 2.11742080239329, "grad_norm": 3.2265822887420654, "learning_rate": 9.700180765879482e-07, "loss": 0.0065, "step": 198180 }, { "epoch": 2.11752764570757, "grad_norm": 0.09585506469011307, "learning_rate": 9.70012345972646e-07, "loss": 0.0231, "step": 198190 }, { "epoch": 2.1176344890218495, "grad_norm": 0.04101140424609184, "learning_rate": 9.700066148266644e-07, "loss": 0.0113, "step": 198200 }, { "epoch": 2.117741332336129, "grad_norm": 0.047642070800065994, "learning_rate": 9.700008831500097e-07, "loss": 0.0258, "step": 198210 }, { "epoch": 2.117848175650409, "grad_norm": 0.12222032994031906, "learning_rate": 9.699951509426886e-07, "loss": 0.0172, "step": 198220 }, { "epoch": 2.1179550189646883, "grad_norm": 0.1543753445148468, "learning_rate": 9.699894182047077e-07, "loss": 0.0095, "step": 198230 }, { "epoch": 2.1180618622789678, "grad_norm": 1.448559045791626, "learning_rate": 9.69983684936073e-07, "loss": 0.0054, "step": 198240 }, { "epoch": 2.1181687055932477, "grad_norm": 0.05653860419988632, "learning_rate": 9.699779511367914e-07, "loss": 0.0296, "step": 198250 }, { "epoch": 2.118275548907527, "grad_norm": 0.5936263799667358, "learning_rate": 9.699722168068692e-07, "loss": 0.0539, "step": 198260 }, { "epoch": 2.1183823922218066, "grad_norm": 9.577733993530273, "learning_rate": 9.699664819463127e-07, "loss": 0.0261, "step": 198270 }, { "epoch": 2.1184892355360865, "grad_norm": 4.232828617095947, "learning_rate": 9.69960746555129e-07, "loss": 0.0178, "step": 198280 }, { "epoch": 2.118596078850366, "grad_norm": 0.2888416349887848, "learning_rate": 9.699550106333237e-07, "loss": 0.0173, "step": 198290 }, { "epoch": 2.1187029221646454, "grad_norm": 1.4876863956451416, "learning_rate": 9.69949274180904e-07, "loss": 0.0054, "step": 198300 }, { "epoch": 2.1188097654789253, "grad_norm": 5.759042739868164, "learning_rate": 9.699435371978759e-07, "loss": 0.0179, "step": 198310 }, { "epoch": 2.1189166087932048, "grad_norm": 0.4894270896911621, "learning_rate": 9.699377996842464e-07, "loss": 0.0174, "step": 198320 }, { "epoch": 2.119023452107484, "grad_norm": 2.5548508167266846, "learning_rate": 9.699320616400212e-07, "loss": 0.0688, "step": 198330 }, { "epoch": 2.119130295421764, "grad_norm": 0.009878339245915413, "learning_rate": 9.699263230652077e-07, "loss": 0.0114, "step": 198340 }, { "epoch": 2.1192371387360436, "grad_norm": 0.15241657197475433, "learning_rate": 9.699205839598116e-07, "loss": 0.0106, "step": 198350 }, { "epoch": 2.119343982050323, "grad_norm": 11.283933639526367, "learning_rate": 9.699148443238398e-07, "loss": 0.0565, "step": 198360 }, { "epoch": 2.119450825364603, "grad_norm": 0.6719502806663513, "learning_rate": 9.699091041572987e-07, "loss": 0.005, "step": 198370 }, { "epoch": 2.1195576686788824, "grad_norm": 0.3782861828804016, "learning_rate": 9.699033634601947e-07, "loss": 0.0264, "step": 198380 }, { "epoch": 2.119664511993162, "grad_norm": 6.032032489776611, "learning_rate": 9.698976222325343e-07, "loss": 0.0152, "step": 198390 }, { "epoch": 2.1197713553074418, "grad_norm": 3.487675666809082, "learning_rate": 9.69891880474324e-07, "loss": 0.0131, "step": 198400 }, { "epoch": 2.119878198621721, "grad_norm": 0.03988201543688774, "learning_rate": 9.698861381855704e-07, "loss": 0.0178, "step": 198410 }, { "epoch": 2.1199850419360007, "grad_norm": 3.129930257797241, "learning_rate": 9.698803953662799e-07, "loss": 0.0355, "step": 198420 }, { "epoch": 2.1200918852502806, "grad_norm": 1.0562658309936523, "learning_rate": 9.69874652016459e-07, "loss": 0.0246, "step": 198430 }, { "epoch": 2.12019872856456, "grad_norm": 1.9010720252990723, "learning_rate": 9.698689081361143e-07, "loss": 0.0419, "step": 198440 }, { "epoch": 2.1203055718788395, "grad_norm": 0.02867995947599411, "learning_rate": 9.69863163725252e-07, "loss": 0.0204, "step": 198450 }, { "epoch": 2.1204124151931194, "grad_norm": 0.22885020077228546, "learning_rate": 9.698574187838785e-07, "loss": 0.0367, "step": 198460 }, { "epoch": 2.120519258507399, "grad_norm": 0.012491014786064625, "learning_rate": 9.698516733120007e-07, "loss": 0.0242, "step": 198470 }, { "epoch": 2.1206261018216783, "grad_norm": 5.753483295440674, "learning_rate": 9.69845927309625e-07, "loss": 0.0082, "step": 198480 }, { "epoch": 2.1207329451359582, "grad_norm": 0.11167402565479279, "learning_rate": 9.698401807767575e-07, "loss": 0.0224, "step": 198490 }, { "epoch": 2.1208397884502377, "grad_norm": 3.5581483840942383, "learning_rate": 9.698344337134053e-07, "loss": 0.039, "step": 198500 }, { "epoch": 2.120946631764517, "grad_norm": 2.9988462924957275, "learning_rate": 9.698286861195743e-07, "loss": 0.0201, "step": 198510 }, { "epoch": 2.121053475078797, "grad_norm": 0.08829861134290695, "learning_rate": 9.698229379952714e-07, "loss": 0.0233, "step": 198520 }, { "epoch": 2.1211603183930765, "grad_norm": 2.6510493755340576, "learning_rate": 9.698171893405028e-07, "loss": 0.0043, "step": 198530 }, { "epoch": 2.121267161707356, "grad_norm": 0.01828150451183319, "learning_rate": 9.698114401552753e-07, "loss": 0.0093, "step": 198540 }, { "epoch": 2.121374005021636, "grad_norm": 0.016145268455147743, "learning_rate": 9.69805690439595e-07, "loss": 0.0157, "step": 198550 }, { "epoch": 2.1214808483359153, "grad_norm": 1.2469773292541504, "learning_rate": 9.69799940193469e-07, "loss": 0.0023, "step": 198560 }, { "epoch": 2.121587691650195, "grad_norm": 3.59529447555542, "learning_rate": 9.69794189416903e-07, "loss": 0.038, "step": 198570 }, { "epoch": 2.1216945349644747, "grad_norm": 2.1287662982940674, "learning_rate": 9.697884381099042e-07, "loss": 0.0356, "step": 198580 }, { "epoch": 2.121801378278754, "grad_norm": 3.3024632930755615, "learning_rate": 9.697826862724787e-07, "loss": 0.0279, "step": 198590 }, { "epoch": 2.1219082215930336, "grad_norm": 0.15997157990932465, "learning_rate": 9.697769339046332e-07, "loss": 0.0138, "step": 198600 }, { "epoch": 2.1220150649073135, "grad_norm": 0.1387026607990265, "learning_rate": 9.697711810063737e-07, "loss": 0.023, "step": 198610 }, { "epoch": 2.122121908221593, "grad_norm": 0.6316974759101868, "learning_rate": 9.697654275777075e-07, "loss": 0.0294, "step": 198620 }, { "epoch": 2.1222287515358724, "grad_norm": 1.172878384590149, "learning_rate": 9.697596736186404e-07, "loss": 0.0195, "step": 198630 }, { "epoch": 2.1223355948501523, "grad_norm": 0.56382155418396, "learning_rate": 9.697539191291793e-07, "loss": 0.0029, "step": 198640 }, { "epoch": 2.122442438164432, "grad_norm": 2.967015504837036, "learning_rate": 9.697481641093306e-07, "loss": 0.0057, "step": 198650 }, { "epoch": 2.1225492814787117, "grad_norm": 0.15179306268692017, "learning_rate": 9.69742408559101e-07, "loss": 0.0077, "step": 198660 }, { "epoch": 2.122656124792991, "grad_norm": 0.020151114091277122, "learning_rate": 9.697366524784963e-07, "loss": 0.0242, "step": 198670 }, { "epoch": 2.1227629681072706, "grad_norm": 0.02243920974433422, "learning_rate": 9.697308958675237e-07, "loss": 0.0038, "step": 198680 }, { "epoch": 2.12286981142155, "grad_norm": 0.08435487002134323, "learning_rate": 9.697251387261892e-07, "loss": 0.0167, "step": 198690 }, { "epoch": 2.12297665473583, "grad_norm": 2.435211658477783, "learning_rate": 9.697193810545001e-07, "loss": 0.0162, "step": 198700 }, { "epoch": 2.1230834980501094, "grad_norm": 1.5471124649047852, "learning_rate": 9.697136228524619e-07, "loss": 0.0105, "step": 198710 }, { "epoch": 2.1231903413643893, "grad_norm": 0.1773337721824646, "learning_rate": 9.697078641200817e-07, "loss": 0.0118, "step": 198720 }, { "epoch": 2.123297184678669, "grad_norm": 0.12663692235946655, "learning_rate": 9.697021048573661e-07, "loss": 0.0399, "step": 198730 }, { "epoch": 2.1234040279929483, "grad_norm": 12.207744598388672, "learning_rate": 9.696963450643211e-07, "loss": 0.0235, "step": 198740 }, { "epoch": 2.123510871307228, "grad_norm": 0.3506438732147217, "learning_rate": 9.696905847409534e-07, "loss": 0.0091, "step": 198750 }, { "epoch": 2.1236177146215076, "grad_norm": 0.013582007959485054, "learning_rate": 9.696848238872699e-07, "loss": 0.0101, "step": 198760 }, { "epoch": 2.123724557935787, "grad_norm": 6.216243743896484, "learning_rate": 9.696790625032768e-07, "loss": 0.0545, "step": 198770 }, { "epoch": 2.123831401250067, "grad_norm": 4.6876726150512695, "learning_rate": 9.696733005889803e-07, "loss": 0.0096, "step": 198780 }, { "epoch": 2.1239382445643464, "grad_norm": 7.763055801391602, "learning_rate": 9.696675381443873e-07, "loss": 0.013, "step": 198790 }, { "epoch": 2.124045087878626, "grad_norm": 0.013551943004131317, "learning_rate": 9.696617751695042e-07, "loss": 0.0125, "step": 198800 }, { "epoch": 2.124151931192906, "grad_norm": 0.8090474605560303, "learning_rate": 9.696560116643375e-07, "loss": 0.0093, "step": 198810 }, { "epoch": 2.1242587745071853, "grad_norm": 5.034274101257324, "learning_rate": 9.696502476288939e-07, "loss": 0.098, "step": 198820 }, { "epoch": 2.1243656178214647, "grad_norm": 1.3704769611358643, "learning_rate": 9.696444830631794e-07, "loss": 0.0347, "step": 198830 }, { "epoch": 2.1244724611357446, "grad_norm": 0.022617245092988014, "learning_rate": 9.69638717967201e-07, "loss": 0.0172, "step": 198840 }, { "epoch": 2.124579304450024, "grad_norm": 0.016907015815377235, "learning_rate": 9.696329523409652e-07, "loss": 0.0123, "step": 198850 }, { "epoch": 2.1246861477643035, "grad_norm": 0.06439784169197083, "learning_rate": 9.69627186184478e-07, "loss": 0.0353, "step": 198860 }, { "epoch": 2.1247929910785834, "grad_norm": 4.085498332977295, "learning_rate": 9.696214194977465e-07, "loss": 0.031, "step": 198870 }, { "epoch": 2.124899834392863, "grad_norm": 0.020362475886940956, "learning_rate": 9.69615652280777e-07, "loss": 0.01, "step": 198880 }, { "epoch": 2.1250066777071424, "grad_norm": 0.21564190089702606, "learning_rate": 9.696098845335758e-07, "loss": 0.01, "step": 198890 }, { "epoch": 2.1251135210214223, "grad_norm": 0.03756698593497276, "learning_rate": 9.696041162561499e-07, "loss": 0.0029, "step": 198900 }, { "epoch": 2.1252203643357017, "grad_norm": 12.651147842407227, "learning_rate": 9.695983474485053e-07, "loss": 0.0699, "step": 198910 }, { "epoch": 2.125327207649981, "grad_norm": 0.01780010387301445, "learning_rate": 9.695925781106487e-07, "loss": 0.0133, "step": 198920 }, { "epoch": 2.125434050964261, "grad_norm": 1.134806752204895, "learning_rate": 9.695868082425866e-07, "loss": 0.0071, "step": 198930 }, { "epoch": 2.1255408942785405, "grad_norm": 2.1614532470703125, "learning_rate": 9.695810378443258e-07, "loss": 0.0094, "step": 198940 }, { "epoch": 2.12564773759282, "grad_norm": 0.5877073407173157, "learning_rate": 9.695752669158722e-07, "loss": 0.0609, "step": 198950 }, { "epoch": 2.1257545809071, "grad_norm": 0.06885991990566254, "learning_rate": 9.69569495457233e-07, "loss": 0.0099, "step": 198960 }, { "epoch": 2.1258614242213794, "grad_norm": 9.565800666809082, "learning_rate": 9.69563723468414e-07, "loss": 0.0125, "step": 198970 }, { "epoch": 2.125968267535659, "grad_norm": 1.3749059438705444, "learning_rate": 9.695579509494226e-07, "loss": 0.0136, "step": 198980 }, { "epoch": 2.1260751108499387, "grad_norm": 2.294448137283325, "learning_rate": 9.695521779002646e-07, "loss": 0.0224, "step": 198990 }, { "epoch": 2.126181954164218, "grad_norm": 4.671028137207031, "learning_rate": 9.69546404320947e-07, "loss": 0.0347, "step": 199000 }, { "epoch": 2.1262887974784976, "grad_norm": 1.1603747606277466, "learning_rate": 9.695406302114757e-07, "loss": 0.0222, "step": 199010 }, { "epoch": 2.1263956407927775, "grad_norm": 4.988871097564697, "learning_rate": 9.695348555718579e-07, "loss": 0.039, "step": 199020 }, { "epoch": 2.126502484107057, "grad_norm": 0.5023306608200073, "learning_rate": 9.695290804020995e-07, "loss": 0.0049, "step": 199030 }, { "epoch": 2.1266093274213365, "grad_norm": 0.049689002335071564, "learning_rate": 9.695233047022075e-07, "loss": 0.0118, "step": 199040 }, { "epoch": 2.1267161707356164, "grad_norm": 0.07033957540988922, "learning_rate": 9.695175284721883e-07, "loss": 0.0036, "step": 199050 }, { "epoch": 2.126823014049896, "grad_norm": 1.508363962173462, "learning_rate": 9.695117517120485e-07, "loss": 0.0193, "step": 199060 }, { "epoch": 2.1269298573641753, "grad_norm": 2.6367011070251465, "learning_rate": 9.69505974421794e-07, "loss": 0.0037, "step": 199070 }, { "epoch": 2.127036700678455, "grad_norm": 0.018882956355810165, "learning_rate": 9.695001966014322e-07, "loss": 0.0361, "step": 199080 }, { "epoch": 2.1271435439927346, "grad_norm": 4.358461856842041, "learning_rate": 9.694944182509692e-07, "loss": 0.0142, "step": 199090 }, { "epoch": 2.127250387307014, "grad_norm": 0.015232333913445473, "learning_rate": 9.694886393704115e-07, "loss": 0.0204, "step": 199100 }, { "epoch": 2.127357230621294, "grad_norm": 7.451228618621826, "learning_rate": 9.694828599597658e-07, "loss": 0.0173, "step": 199110 }, { "epoch": 2.1274640739355735, "grad_norm": 0.00960763730108738, "learning_rate": 9.694770800190384e-07, "loss": 0.0843, "step": 199120 }, { "epoch": 2.127570917249853, "grad_norm": 0.7758190035820007, "learning_rate": 9.69471299548236e-07, "loss": 0.0188, "step": 199130 }, { "epoch": 2.127677760564133, "grad_norm": 8.826940536499023, "learning_rate": 9.694655185473652e-07, "loss": 0.0342, "step": 199140 }, { "epoch": 2.1277846038784123, "grad_norm": 2.433943271636963, "learning_rate": 9.694597370164322e-07, "loss": 0.0339, "step": 199150 }, { "epoch": 2.1278914471926917, "grad_norm": 0.2879118323326111, "learning_rate": 9.694539549554439e-07, "loss": 0.0072, "step": 199160 }, { "epoch": 2.1279982905069716, "grad_norm": 0.2777959406375885, "learning_rate": 9.694481723644065e-07, "loss": 0.0363, "step": 199170 }, { "epoch": 2.128105133821251, "grad_norm": 0.0025696945376694202, "learning_rate": 9.694423892433269e-07, "loss": 0.0094, "step": 199180 }, { "epoch": 2.1282119771355306, "grad_norm": 0.007484883069992065, "learning_rate": 9.69436605592211e-07, "loss": 0.0102, "step": 199190 }, { "epoch": 2.1283188204498105, "grad_norm": 0.926750659942627, "learning_rate": 9.69430821411066e-07, "loss": 0.0129, "step": 199200 }, { "epoch": 2.12842566376409, "grad_norm": 2.398834705352783, "learning_rate": 9.694250366998982e-07, "loss": 0.0089, "step": 199210 }, { "epoch": 2.1285325070783694, "grad_norm": 0.01024044118821621, "learning_rate": 9.694192514587141e-07, "loss": 0.0346, "step": 199220 }, { "epoch": 2.1286393503926493, "grad_norm": 0.023405808955430984, "learning_rate": 9.694134656875201e-07, "loss": 0.0156, "step": 199230 }, { "epoch": 2.1287461937069287, "grad_norm": 0.18549633026123047, "learning_rate": 9.69407679386323e-07, "loss": 0.0128, "step": 199240 }, { "epoch": 2.128853037021208, "grad_norm": 1.9029935598373413, "learning_rate": 9.694018925551294e-07, "loss": 0.0385, "step": 199250 }, { "epoch": 2.128959880335488, "grad_norm": 1.1782382726669312, "learning_rate": 9.693961051939453e-07, "loss": 0.0065, "step": 199260 }, { "epoch": 2.1290667236497676, "grad_norm": 1.980250597000122, "learning_rate": 9.693903173027776e-07, "loss": 0.0291, "step": 199270 }, { "epoch": 2.129173566964047, "grad_norm": 5.762753009796143, "learning_rate": 9.69384528881633e-07, "loss": 0.0166, "step": 199280 }, { "epoch": 2.129280410278327, "grad_norm": 1.9402705430984497, "learning_rate": 9.693787399305176e-07, "loss": 0.0158, "step": 199290 }, { "epoch": 2.1293872535926064, "grad_norm": 0.3484758734703064, "learning_rate": 9.693729504494383e-07, "loss": 0.0579, "step": 199300 }, { "epoch": 2.129494096906886, "grad_norm": 0.022526809945702553, "learning_rate": 9.693671604384015e-07, "loss": 0.013, "step": 199310 }, { "epoch": 2.1296009402211658, "grad_norm": 0.14220315217971802, "learning_rate": 9.693613698974138e-07, "loss": 0.0022, "step": 199320 }, { "epoch": 2.129707783535445, "grad_norm": 2.906566858291626, "learning_rate": 9.693555788264816e-07, "loss": 0.005, "step": 199330 }, { "epoch": 2.1298146268497247, "grad_norm": 0.3120823800563812, "learning_rate": 9.693497872256115e-07, "loss": 0.0119, "step": 199340 }, { "epoch": 2.1299214701640046, "grad_norm": 2.3067290782928467, "learning_rate": 9.693439950948103e-07, "loss": 0.0223, "step": 199350 }, { "epoch": 2.130028313478284, "grad_norm": 0.09554214775562286, "learning_rate": 9.693382024340841e-07, "loss": 0.0161, "step": 199360 }, { "epoch": 2.130135156792564, "grad_norm": 24.19317626953125, "learning_rate": 9.693324092434397e-07, "loss": 0.0382, "step": 199370 }, { "epoch": 2.1302420001068434, "grad_norm": 4.468721866607666, "learning_rate": 9.693266155228837e-07, "loss": 0.0173, "step": 199380 }, { "epoch": 2.130348843421123, "grad_norm": 0.34414416551589966, "learning_rate": 9.693208212724223e-07, "loss": 0.0369, "step": 199390 }, { "epoch": 2.1304556867354023, "grad_norm": 3.235671281814575, "learning_rate": 9.693150264920625e-07, "loss": 0.0199, "step": 199400 }, { "epoch": 2.130562530049682, "grad_norm": 6.931190490722656, "learning_rate": 9.693092311818104e-07, "loss": 0.039, "step": 199410 }, { "epoch": 2.1306693733639617, "grad_norm": 1.6008384227752686, "learning_rate": 9.693034353416728e-07, "loss": 0.0026, "step": 199420 }, { "epoch": 2.1307762166782416, "grad_norm": 4.104604721069336, "learning_rate": 9.692976389716564e-07, "loss": 0.036, "step": 199430 }, { "epoch": 2.130883059992521, "grad_norm": 0.10773412138223648, "learning_rate": 9.692918420717672e-07, "loss": 0.0319, "step": 199440 }, { "epoch": 2.1309899033068005, "grad_norm": 3.076909303665161, "learning_rate": 9.692860446420124e-07, "loss": 0.0278, "step": 199450 }, { "epoch": 2.13109674662108, "grad_norm": 3.881990909576416, "learning_rate": 9.692802466823982e-07, "loss": 0.0146, "step": 199460 }, { "epoch": 2.13120358993536, "grad_norm": 14.838436126708984, "learning_rate": 9.69274448192931e-07, "loss": 0.0621, "step": 199470 }, { "epoch": 2.1313104332496393, "grad_norm": 0.018828894942998886, "learning_rate": 9.692686491736177e-07, "loss": 0.02, "step": 199480 }, { "epoch": 2.131417276563919, "grad_norm": 7.866130352020264, "learning_rate": 9.692628496244647e-07, "loss": 0.0246, "step": 199490 }, { "epoch": 2.1315241198781987, "grad_norm": 16.7627010345459, "learning_rate": 9.692570495454783e-07, "loss": 0.0808, "step": 199500 }, { "epoch": 2.131630963192478, "grad_norm": 0.0033026812598109245, "learning_rate": 9.692512489366655e-07, "loss": 0.0299, "step": 199510 }, { "epoch": 2.131737806506758, "grad_norm": 3.6614935398101807, "learning_rate": 9.692454477980325e-07, "loss": 0.0164, "step": 199520 }, { "epoch": 2.1318446498210375, "grad_norm": 13.501663208007812, "learning_rate": 9.692396461295863e-07, "loss": 0.0685, "step": 199530 }, { "epoch": 2.131951493135317, "grad_norm": 0.35777169466018677, "learning_rate": 9.692338439313327e-07, "loss": 0.0346, "step": 199540 }, { "epoch": 2.132058336449597, "grad_norm": 0.7613833546638489, "learning_rate": 9.692280412032787e-07, "loss": 0.0138, "step": 199550 }, { "epoch": 2.1321651797638763, "grad_norm": 1.4473695755004883, "learning_rate": 9.692222379454311e-07, "loss": 0.0204, "step": 199560 }, { "epoch": 2.1322720230781558, "grad_norm": 3.506988525390625, "learning_rate": 9.69216434157796e-07, "loss": 0.0388, "step": 199570 }, { "epoch": 2.1323788663924357, "grad_norm": 13.208428382873535, "learning_rate": 9.692106298403802e-07, "loss": 0.0142, "step": 199580 }, { "epoch": 2.132485709706715, "grad_norm": 3.9983770847320557, "learning_rate": 9.6920482499319e-07, "loss": 0.0136, "step": 199590 }, { "epoch": 2.1325925530209946, "grad_norm": 1.1062885522842407, "learning_rate": 9.691990196162325e-07, "loss": 0.002, "step": 199600 }, { "epoch": 2.1326993963352745, "grad_norm": 0.003586223116144538, "learning_rate": 9.691932137095136e-07, "loss": 0.0121, "step": 199610 }, { "epoch": 2.132806239649554, "grad_norm": 5.23397159576416, "learning_rate": 9.691874072730401e-07, "loss": 0.0088, "step": 199620 }, { "epoch": 2.1329130829638334, "grad_norm": 0.31647270917892456, "learning_rate": 9.691816003068188e-07, "loss": 0.0169, "step": 199630 }, { "epoch": 2.1330199262781133, "grad_norm": 3.590515613555908, "learning_rate": 9.69175792810856e-07, "loss": 0.0131, "step": 199640 }, { "epoch": 2.133126769592393, "grad_norm": 0.009840057231485844, "learning_rate": 9.691699847851584e-07, "loss": 0.0366, "step": 199650 }, { "epoch": 2.1332336129066722, "grad_norm": 0.02094334550201893, "learning_rate": 9.691641762297325e-07, "loss": 0.0167, "step": 199660 }, { "epoch": 2.133340456220952, "grad_norm": 3.5194218158721924, "learning_rate": 9.691583671445847e-07, "loss": 0.0254, "step": 199670 }, { "epoch": 2.1334472995352316, "grad_norm": 0.4123958647251129, "learning_rate": 9.691525575297216e-07, "loss": 0.016, "step": 199680 }, { "epoch": 2.133554142849511, "grad_norm": 0.6982262134552002, "learning_rate": 9.6914674738515e-07, "loss": 0.0244, "step": 199690 }, { "epoch": 2.133660986163791, "grad_norm": 2.549440860748291, "learning_rate": 9.691409367108764e-07, "loss": 0.0553, "step": 199700 }, { "epoch": 2.1337678294780704, "grad_norm": 0.9713107347488403, "learning_rate": 9.69135125506907e-07, "loss": 0.0164, "step": 199710 }, { "epoch": 2.13387467279235, "grad_norm": 3.4202661514282227, "learning_rate": 9.691293137732489e-07, "loss": 0.0111, "step": 199720 }, { "epoch": 2.13398151610663, "grad_norm": 0.7290264964103699, "learning_rate": 9.691235015099083e-07, "loss": 0.0022, "step": 199730 }, { "epoch": 2.1340883594209092, "grad_norm": 0.02566678822040558, "learning_rate": 9.69117688716892e-07, "loss": 0.0118, "step": 199740 }, { "epoch": 2.1341952027351887, "grad_norm": 0.012180199846625328, "learning_rate": 9.691118753942063e-07, "loss": 0.0134, "step": 199750 }, { "epoch": 2.1343020460494686, "grad_norm": 0.09604836255311966, "learning_rate": 9.691060615418578e-07, "loss": 0.0053, "step": 199760 }, { "epoch": 2.134408889363748, "grad_norm": 0.09169311821460724, "learning_rate": 9.691002471598532e-07, "loss": 0.0135, "step": 199770 }, { "epoch": 2.1345157326780275, "grad_norm": 0.040528640151023865, "learning_rate": 9.690944322481992e-07, "loss": 0.0133, "step": 199780 }, { "epoch": 2.1346225759923074, "grad_norm": 0.02474852278828621, "learning_rate": 9.69088616806902e-07, "loss": 0.0095, "step": 199790 }, { "epoch": 2.134729419306587, "grad_norm": 0.4388122856616974, "learning_rate": 9.690828008359682e-07, "loss": 0.0175, "step": 199800 }, { "epoch": 2.1348362626208663, "grad_norm": 0.01908799260854721, "learning_rate": 9.690769843354048e-07, "loss": 0.0138, "step": 199810 }, { "epoch": 2.1349431059351462, "grad_norm": 7.362630844116211, "learning_rate": 9.690711673052178e-07, "loss": 0.0098, "step": 199820 }, { "epoch": 2.1350499492494257, "grad_norm": 0.009679175913333893, "learning_rate": 9.690653497454143e-07, "loss": 0.009, "step": 199830 }, { "epoch": 2.135156792563705, "grad_norm": 0.1053680032491684, "learning_rate": 9.690595316560006e-07, "loss": 0.0081, "step": 199840 }, { "epoch": 2.135263635877985, "grad_norm": 3.451951026916504, "learning_rate": 9.690537130369832e-07, "loss": 0.0104, "step": 199850 }, { "epoch": 2.1353704791922645, "grad_norm": 5.6537981033325195, "learning_rate": 9.690478938883687e-07, "loss": 0.035, "step": 199860 }, { "epoch": 2.135477322506544, "grad_norm": 6.029015064239502, "learning_rate": 9.69042074210164e-07, "loss": 0.0149, "step": 199870 }, { "epoch": 2.135584165820824, "grad_norm": 4.901967525482178, "learning_rate": 9.69036254002375e-07, "loss": 0.0093, "step": 199880 }, { "epoch": 2.1356910091351033, "grad_norm": 6.0465521812438965, "learning_rate": 9.69030433265009e-07, "loss": 0.0433, "step": 199890 }, { "epoch": 2.135797852449383, "grad_norm": 4.116558074951172, "learning_rate": 9.690246119980722e-07, "loss": 0.0069, "step": 199900 }, { "epoch": 2.1359046957636627, "grad_norm": 1.966113805770874, "learning_rate": 9.690187902015711e-07, "loss": 0.0058, "step": 199910 }, { "epoch": 2.136011539077942, "grad_norm": 1.9631973505020142, "learning_rate": 9.690129678755124e-07, "loss": 0.0299, "step": 199920 }, { "epoch": 2.1361183823922216, "grad_norm": 0.009256092831492424, "learning_rate": 9.690071450199027e-07, "loss": 0.0516, "step": 199930 }, { "epoch": 2.1362252257065015, "grad_norm": 3.6276164054870605, "learning_rate": 9.690013216347485e-07, "loss": 0.0183, "step": 199940 }, { "epoch": 2.136332069020781, "grad_norm": 4.614847183227539, "learning_rate": 9.689954977200566e-07, "loss": 0.0289, "step": 199950 }, { "epoch": 2.1364389123350604, "grad_norm": 0.017887532711029053, "learning_rate": 9.689896732758332e-07, "loss": 0.0302, "step": 199960 }, { "epoch": 2.1365457556493404, "grad_norm": 0.002318717073649168, "learning_rate": 9.68983848302085e-07, "loss": 0.0274, "step": 199970 }, { "epoch": 2.13665259896362, "grad_norm": 7.302360534667969, "learning_rate": 9.689780227988186e-07, "loss": 0.0307, "step": 199980 }, { "epoch": 2.1367594422778993, "grad_norm": 0.7452970147132874, "learning_rate": 9.689721967660408e-07, "loss": 0.0134, "step": 199990 }, { "epoch": 2.136866285592179, "grad_norm": 2.9196293354034424, "learning_rate": 9.68966370203758e-07, "loss": 0.1033, "step": 200000 }, { "epoch": 2.1369731289064586, "grad_norm": 2.817309856414795, "learning_rate": 9.689605431119768e-07, "loss": 0.0352, "step": 200010 }, { "epoch": 2.137079972220738, "grad_norm": 0.4517475366592407, "learning_rate": 9.689547154907035e-07, "loss": 0.0314, "step": 200020 }, { "epoch": 2.137186815535018, "grad_norm": 5.045202732086182, "learning_rate": 9.689488873399452e-07, "loss": 0.0073, "step": 200030 }, { "epoch": 2.1372936588492975, "grad_norm": 0.4530412554740906, "learning_rate": 9.68943058659708e-07, "loss": 0.0061, "step": 200040 }, { "epoch": 2.137400502163577, "grad_norm": 2.7024476528167725, "learning_rate": 9.689372294499987e-07, "loss": 0.0201, "step": 200050 }, { "epoch": 2.137507345477857, "grad_norm": 7.259101867675781, "learning_rate": 9.689313997108241e-07, "loss": 0.0331, "step": 200060 }, { "epoch": 2.1376141887921363, "grad_norm": 3.8591928482055664, "learning_rate": 9.689255694421904e-07, "loss": 0.0371, "step": 200070 }, { "epoch": 2.137721032106416, "grad_norm": 0.030900197103619576, "learning_rate": 9.689197386441044e-07, "loss": 0.0069, "step": 200080 }, { "epoch": 2.1378278754206956, "grad_norm": 0.024694060906767845, "learning_rate": 9.689139073165727e-07, "loss": 0.0385, "step": 200090 }, { "epoch": 2.137934718734975, "grad_norm": 0.3169623613357544, "learning_rate": 9.689080754596015e-07, "loss": 0.0262, "step": 200100 }, { "epoch": 2.1380415620492546, "grad_norm": 0.5946534276008606, "learning_rate": 9.68902243073198e-07, "loss": 0.0189, "step": 200110 }, { "epoch": 2.1381484053635345, "grad_norm": 0.03328108787536621, "learning_rate": 9.688964101573684e-07, "loss": 0.0206, "step": 200120 }, { "epoch": 2.138255248677814, "grad_norm": 0.005592901725322008, "learning_rate": 9.688905767121193e-07, "loss": 0.027, "step": 200130 }, { "epoch": 2.138362091992094, "grad_norm": 5.454172611236572, "learning_rate": 9.688847427374572e-07, "loss": 0.009, "step": 200140 }, { "epoch": 2.1384689353063733, "grad_norm": 0.5594373345375061, "learning_rate": 9.688789082333892e-07, "loss": 0.0117, "step": 200150 }, { "epoch": 2.1385757786206527, "grad_norm": 6.353387832641602, "learning_rate": 9.688730731999214e-07, "loss": 0.0556, "step": 200160 }, { "epoch": 2.138682621934932, "grad_norm": 1.699358582496643, "learning_rate": 9.688672376370603e-07, "loss": 0.0062, "step": 200170 }, { "epoch": 2.138789465249212, "grad_norm": 7.223453044891357, "learning_rate": 9.68861401544813e-07, "loss": 0.0409, "step": 200180 }, { "epoch": 2.1388963085634916, "grad_norm": 0.2907606065273285, "learning_rate": 9.688555649231856e-07, "loss": 0.009, "step": 200190 }, { "epoch": 2.1390031518777715, "grad_norm": 1.4209868907928467, "learning_rate": 9.688497277721849e-07, "loss": 0.0012, "step": 200200 }, { "epoch": 2.139109995192051, "grad_norm": 1.148705005645752, "learning_rate": 9.688438900918176e-07, "loss": 0.0257, "step": 200210 }, { "epoch": 2.1392168385063304, "grad_norm": 7.874576568603516, "learning_rate": 9.6883805188209e-07, "loss": 0.0238, "step": 200220 }, { "epoch": 2.1393236818206103, "grad_norm": 0.4564075171947479, "learning_rate": 9.68832213143009e-07, "loss": 0.0424, "step": 200230 }, { "epoch": 2.1394305251348897, "grad_norm": 2.8270938396453857, "learning_rate": 9.68826373874581e-07, "loss": 0.0908, "step": 200240 }, { "epoch": 2.139537368449169, "grad_norm": 1.253867268562317, "learning_rate": 9.688205340768124e-07, "loss": 0.0233, "step": 200250 }, { "epoch": 2.139644211763449, "grad_norm": 0.05054820328950882, "learning_rate": 9.688146937497103e-07, "loss": 0.0059, "step": 200260 }, { "epoch": 2.1397510550777286, "grad_norm": 0.036499056965112686, "learning_rate": 9.68808852893281e-07, "loss": 0.0095, "step": 200270 }, { "epoch": 2.139857898392008, "grad_norm": 10.486312866210938, "learning_rate": 9.68803011507531e-07, "loss": 0.0259, "step": 200280 }, { "epoch": 2.139964741706288, "grad_norm": 0.5512500405311584, "learning_rate": 9.687971695924673e-07, "loss": 0.0147, "step": 200290 }, { "epoch": 2.1400715850205674, "grad_norm": 3.811119318008423, "learning_rate": 9.68791327148096e-07, "loss": 0.0495, "step": 200300 }, { "epoch": 2.140178428334847, "grad_norm": 0.49182039499282837, "learning_rate": 9.68785484174424e-07, "loss": 0.0083, "step": 200310 }, { "epoch": 2.1402852716491267, "grad_norm": 0.0190680380910635, "learning_rate": 9.687796406714576e-07, "loss": 0.0147, "step": 200320 }, { "epoch": 2.140392114963406, "grad_norm": 3.345521926879883, "learning_rate": 9.687737966392038e-07, "loss": 0.0034, "step": 200330 }, { "epoch": 2.1404989582776857, "grad_norm": 0.007917245849967003, "learning_rate": 9.68767952077669e-07, "loss": 0.0203, "step": 200340 }, { "epoch": 2.1406058015919656, "grad_norm": 18.303407669067383, "learning_rate": 9.687621069868598e-07, "loss": 0.0032, "step": 200350 }, { "epoch": 2.140712644906245, "grad_norm": 4.16246223449707, "learning_rate": 9.687562613667828e-07, "loss": 0.0701, "step": 200360 }, { "epoch": 2.1408194882205245, "grad_norm": 0.01192100252956152, "learning_rate": 9.687504152174445e-07, "loss": 0.0318, "step": 200370 }, { "epoch": 2.1409263315348044, "grad_norm": 2.3569540977478027, "learning_rate": 9.687445685388517e-07, "loss": 0.0132, "step": 200380 }, { "epoch": 2.141033174849084, "grad_norm": 0.017903586849570274, "learning_rate": 9.687387213310108e-07, "loss": 0.0237, "step": 200390 }, { "epoch": 2.1411400181633633, "grad_norm": 0.013992909342050552, "learning_rate": 9.687328735939286e-07, "loss": 0.0184, "step": 200400 }, { "epoch": 2.141246861477643, "grad_norm": 4.6269659996032715, "learning_rate": 9.687270253276116e-07, "loss": 0.0548, "step": 200410 }, { "epoch": 2.1413537047919227, "grad_norm": 0.07640999555587769, "learning_rate": 9.687211765320664e-07, "loss": 0.0553, "step": 200420 }, { "epoch": 2.141460548106202, "grad_norm": 0.004298207815736532, "learning_rate": 9.687153272072996e-07, "loss": 0.0338, "step": 200430 }, { "epoch": 2.141567391420482, "grad_norm": 0.04247600585222244, "learning_rate": 9.687094773533179e-07, "loss": 0.0045, "step": 200440 }, { "epoch": 2.1416742347347615, "grad_norm": 0.014626727439463139, "learning_rate": 9.687036269701275e-07, "loss": 0.0883, "step": 200450 }, { "epoch": 2.141781078049041, "grad_norm": 0.020649516955018044, "learning_rate": 9.686977760577358e-07, "loss": 0.0183, "step": 200460 }, { "epoch": 2.141887921363321, "grad_norm": 2.334507703781128, "learning_rate": 9.686919246161488e-07, "loss": 0.0207, "step": 200470 }, { "epoch": 2.1419947646776003, "grad_norm": 7.643110752105713, "learning_rate": 9.686860726453733e-07, "loss": 0.0396, "step": 200480 }, { "epoch": 2.1421016079918798, "grad_norm": 1.54106867313385, "learning_rate": 9.686802201454154e-07, "loss": 0.0715, "step": 200490 }, { "epoch": 2.1422084513061597, "grad_norm": 0.08850497752428055, "learning_rate": 9.686743671162826e-07, "loss": 0.0149, "step": 200500 }, { "epoch": 2.142315294620439, "grad_norm": 0.024316390976309776, "learning_rate": 9.68668513557981e-07, "loss": 0.0253, "step": 200510 }, { "epoch": 2.1424221379347186, "grad_norm": 1.953811526298523, "learning_rate": 9.686626594705172e-07, "loss": 0.0272, "step": 200520 }, { "epoch": 2.1425289812489985, "grad_norm": 0.10788178443908691, "learning_rate": 9.686568048538979e-07, "loss": 0.0251, "step": 200530 }, { "epoch": 2.142635824563278, "grad_norm": 0.042636021971702576, "learning_rate": 9.686509497081297e-07, "loss": 0.0327, "step": 200540 }, { "epoch": 2.1427426678775574, "grad_norm": 7.51356315612793, "learning_rate": 9.686450940332192e-07, "loss": 0.0138, "step": 200550 }, { "epoch": 2.1428495111918373, "grad_norm": 0.11660837382078171, "learning_rate": 9.686392378291731e-07, "loss": 0.0483, "step": 200560 }, { "epoch": 2.1429563545061168, "grad_norm": 3.592768430709839, "learning_rate": 9.686333810959978e-07, "loss": 0.0271, "step": 200570 }, { "epoch": 2.1430631978203962, "grad_norm": 2.336277484893799, "learning_rate": 9.686275238337001e-07, "loss": 0.0093, "step": 200580 }, { "epoch": 2.143170041134676, "grad_norm": 3.9745864868164062, "learning_rate": 9.686216660422866e-07, "loss": 0.0218, "step": 200590 }, { "epoch": 2.1432768844489556, "grad_norm": 3.984382152557373, "learning_rate": 9.686158077217637e-07, "loss": 0.0217, "step": 200600 }, { "epoch": 2.143383727763235, "grad_norm": 2.169386386871338, "learning_rate": 9.686099488721384e-07, "loss": 0.0404, "step": 200610 }, { "epoch": 2.143490571077515, "grad_norm": 2.058817148208618, "learning_rate": 9.68604089493417e-07, "loss": 0.0146, "step": 200620 }, { "epoch": 2.1435974143917944, "grad_norm": 2.156285047531128, "learning_rate": 9.685982295856061e-07, "loss": 0.018, "step": 200630 }, { "epoch": 2.143704257706074, "grad_norm": 0.6621580123901367, "learning_rate": 9.685923691487126e-07, "loss": 0.0073, "step": 200640 }, { "epoch": 2.1438111010203538, "grad_norm": 0.0783940777182579, "learning_rate": 9.68586508182743e-07, "loss": 0.0163, "step": 200650 }, { "epoch": 2.1439179443346332, "grad_norm": 0.020709441974759102, "learning_rate": 9.685806466877037e-07, "loss": 0.0256, "step": 200660 }, { "epoch": 2.1440247876489127, "grad_norm": 0.7497791647911072, "learning_rate": 9.685747846636018e-07, "loss": 0.0184, "step": 200670 }, { "epoch": 2.1441316309631926, "grad_norm": 4.319302558898926, "learning_rate": 9.685689221104432e-07, "loss": 0.0644, "step": 200680 }, { "epoch": 2.144238474277472, "grad_norm": 8.69049072265625, "learning_rate": 9.68563059028235e-07, "loss": 0.0123, "step": 200690 }, { "epoch": 2.1443453175917515, "grad_norm": 0.4460519254207611, "learning_rate": 9.68557195416984e-07, "loss": 0.0408, "step": 200700 }, { "epoch": 2.1444521609060314, "grad_norm": 0.01439871545881033, "learning_rate": 9.685513312766963e-07, "loss": 0.0216, "step": 200710 }, { "epoch": 2.144559004220311, "grad_norm": 0.016618385910987854, "learning_rate": 9.685454666073788e-07, "loss": 0.01, "step": 200720 }, { "epoch": 2.1446658475345903, "grad_norm": 0.08866384625434875, "learning_rate": 9.685396014090382e-07, "loss": 0.0151, "step": 200730 }, { "epoch": 2.1447726908488702, "grad_norm": 0.004233045503497124, "learning_rate": 9.68533735681681e-07, "loss": 0.0161, "step": 200740 }, { "epoch": 2.1448795341631497, "grad_norm": 8.85575008392334, "learning_rate": 9.68527869425314e-07, "loss": 0.0727, "step": 200750 }, { "epoch": 2.144986377477429, "grad_norm": 23.959402084350586, "learning_rate": 9.685220026399435e-07, "loss": 0.0552, "step": 200760 }, { "epoch": 2.145093220791709, "grad_norm": 0.014688492752611637, "learning_rate": 9.685161353255763e-07, "loss": 0.0276, "step": 200770 }, { "epoch": 2.1452000641059885, "grad_norm": 0.600459098815918, "learning_rate": 9.68510267482219e-07, "loss": 0.0166, "step": 200780 }, { "epoch": 2.145306907420268, "grad_norm": 0.8102995157241821, "learning_rate": 9.685043991098783e-07, "loss": 0.0921, "step": 200790 }, { "epoch": 2.145413750734548, "grad_norm": 0.1994428187608719, "learning_rate": 9.684985302085608e-07, "loss": 0.0469, "step": 200800 }, { "epoch": 2.1455205940488273, "grad_norm": 4.418558120727539, "learning_rate": 9.684926607782731e-07, "loss": 0.0089, "step": 200810 }, { "epoch": 2.145627437363107, "grad_norm": 4.895878791809082, "learning_rate": 9.684867908190218e-07, "loss": 0.0305, "step": 200820 }, { "epoch": 2.1457342806773867, "grad_norm": 0.5368816256523132, "learning_rate": 9.684809203308135e-07, "loss": 0.0127, "step": 200830 }, { "epoch": 2.145841123991666, "grad_norm": 0.136461079120636, "learning_rate": 9.684750493136552e-07, "loss": 0.0155, "step": 200840 }, { "epoch": 2.145947967305946, "grad_norm": 11.772421836853027, "learning_rate": 9.684691777675527e-07, "loss": 0.015, "step": 200850 }, { "epoch": 2.1460548106202255, "grad_norm": 4.306985855102539, "learning_rate": 9.684633056925135e-07, "loss": 0.0193, "step": 200860 }, { "epoch": 2.146161653934505, "grad_norm": 0.28718140721321106, "learning_rate": 9.684574330885437e-07, "loss": 0.0328, "step": 200870 }, { "epoch": 2.1462684972487844, "grad_norm": 0.029878580942749977, "learning_rate": 9.684515599556502e-07, "loss": 0.0031, "step": 200880 }, { "epoch": 2.1463753405630643, "grad_norm": 0.019329650327563286, "learning_rate": 9.684456862938396e-07, "loss": 0.0024, "step": 200890 }, { "epoch": 2.146482183877344, "grad_norm": 2.759483814239502, "learning_rate": 9.684398121031183e-07, "loss": 0.0191, "step": 200900 }, { "epoch": 2.1465890271916237, "grad_norm": 0.030775154009461403, "learning_rate": 9.684339373834932e-07, "loss": 0.0251, "step": 200910 }, { "epoch": 2.146695870505903, "grad_norm": 7.3244805335998535, "learning_rate": 9.684280621349707e-07, "loss": 0.0176, "step": 200920 }, { "epoch": 2.1468027138201826, "grad_norm": 1.0882140398025513, "learning_rate": 9.684221863575577e-07, "loss": 0.01, "step": 200930 }, { "epoch": 2.146909557134462, "grad_norm": 0.46030393242836, "learning_rate": 9.684163100512607e-07, "loss": 0.0067, "step": 200940 }, { "epoch": 2.147016400448742, "grad_norm": 0.504723310470581, "learning_rate": 9.684104332160863e-07, "loss": 0.0338, "step": 200950 }, { "epoch": 2.1471232437630214, "grad_norm": 1.7634239196777344, "learning_rate": 9.68404555852041e-07, "loss": 0.0137, "step": 200960 }, { "epoch": 2.1472300870773013, "grad_norm": 1.5357401371002197, "learning_rate": 9.68398677959132e-07, "loss": 0.0166, "step": 200970 }, { "epoch": 2.147336930391581, "grad_norm": 0.019183892756700516, "learning_rate": 9.683927995373653e-07, "loss": 0.0225, "step": 200980 }, { "epoch": 2.1474437737058603, "grad_norm": 3.6792945861816406, "learning_rate": 9.683869205867477e-07, "loss": 0.054, "step": 200990 }, { "epoch": 2.14755061702014, "grad_norm": 0.8862267732620239, "learning_rate": 9.683810411072859e-07, "loss": 0.0728, "step": 201000 }, { "epoch": 2.1476574603344196, "grad_norm": 0.0648302286863327, "learning_rate": 9.683751610989867e-07, "loss": 0.0094, "step": 201010 }, { "epoch": 2.147764303648699, "grad_norm": 1.978084921836853, "learning_rate": 9.683692805618567e-07, "loss": 0.0221, "step": 201020 }, { "epoch": 2.147871146962979, "grad_norm": 0.6934068202972412, "learning_rate": 9.683633994959022e-07, "loss": 0.0116, "step": 201030 }, { "epoch": 2.1479779902772584, "grad_norm": 9.489751815795898, "learning_rate": 9.6835751790113e-07, "loss": 0.0145, "step": 201040 }, { "epoch": 2.148084833591538, "grad_norm": 2.0935099124908447, "learning_rate": 9.683516357775471e-07, "loss": 0.0146, "step": 201050 }, { "epoch": 2.148191676905818, "grad_norm": 0.0032647778280079365, "learning_rate": 9.683457531251597e-07, "loss": 0.0134, "step": 201060 }, { "epoch": 2.1482985202200973, "grad_norm": 1.6631848812103271, "learning_rate": 9.683398699439746e-07, "loss": 0.0094, "step": 201070 }, { "epoch": 2.1484053635343767, "grad_norm": 0.01221893448382616, "learning_rate": 9.683339862339985e-07, "loss": 0.0311, "step": 201080 }, { "epoch": 2.1485122068486566, "grad_norm": 0.9344327449798584, "learning_rate": 9.683281019952378e-07, "loss": 0.0071, "step": 201090 }, { "epoch": 2.148619050162936, "grad_norm": 3.079939126968384, "learning_rate": 9.683222172276995e-07, "loss": 0.0052, "step": 201100 }, { "epoch": 2.1487258934772155, "grad_norm": 6.228010177612305, "learning_rate": 9.6831633193139e-07, "loss": 0.0065, "step": 201110 }, { "epoch": 2.1488327367914954, "grad_norm": 0.0790795162320137, "learning_rate": 9.68310446106316e-07, "loss": 0.0103, "step": 201120 }, { "epoch": 2.148939580105775, "grad_norm": 1.9222683906555176, "learning_rate": 9.683045597524844e-07, "loss": 0.0442, "step": 201130 }, { "epoch": 2.1490464234200544, "grad_norm": 0.053840696811676025, "learning_rate": 9.682986728699013e-07, "loss": 0.0162, "step": 201140 }, { "epoch": 2.1491532667343343, "grad_norm": 0.08733763545751572, "learning_rate": 9.682927854585738e-07, "loss": 0.0785, "step": 201150 }, { "epoch": 2.1492601100486137, "grad_norm": 0.44014036655426025, "learning_rate": 9.682868975185084e-07, "loss": 0.0323, "step": 201160 }, { "epoch": 2.149366953362893, "grad_norm": 2.96830415725708, "learning_rate": 9.682810090497117e-07, "loss": 0.0162, "step": 201170 }, { "epoch": 2.149473796677173, "grad_norm": 16.4320011138916, "learning_rate": 9.682751200521904e-07, "loss": 0.0187, "step": 201180 }, { "epoch": 2.1495806399914525, "grad_norm": 4.626596450805664, "learning_rate": 9.682692305259512e-07, "loss": 0.0301, "step": 201190 }, { "epoch": 2.149687483305732, "grad_norm": 1.8265810012817383, "learning_rate": 9.682633404710008e-07, "loss": 0.0273, "step": 201200 }, { "epoch": 2.149794326620012, "grad_norm": 1.8891655206680298, "learning_rate": 9.682574498873454e-07, "loss": 0.0941, "step": 201210 }, { "epoch": 2.1499011699342914, "grad_norm": 16.521713256835938, "learning_rate": 9.682515587749925e-07, "loss": 0.0282, "step": 201220 }, { "epoch": 2.150008013248571, "grad_norm": 0.05097396671772003, "learning_rate": 9.682456671339477e-07, "loss": 0.0493, "step": 201230 }, { "epoch": 2.1501148565628507, "grad_norm": 12.081090927124023, "learning_rate": 9.682397749642187e-07, "loss": 0.0255, "step": 201240 }, { "epoch": 2.15022169987713, "grad_norm": 9.716758728027344, "learning_rate": 9.682338822658114e-07, "loss": 0.0123, "step": 201250 }, { "epoch": 2.1503285431914096, "grad_norm": 0.0064305332489311695, "learning_rate": 9.682279890387326e-07, "loss": 0.0325, "step": 201260 }, { "epoch": 2.1504353865056896, "grad_norm": 0.027699386700987816, "learning_rate": 9.682220952829892e-07, "loss": 0.0223, "step": 201270 }, { "epoch": 2.150542229819969, "grad_norm": 0.5151089429855347, "learning_rate": 9.682162009985877e-07, "loss": 0.0096, "step": 201280 }, { "epoch": 2.1506490731342485, "grad_norm": 0.02017178386449814, "learning_rate": 9.68210306185535e-07, "loss": 0.008, "step": 201290 }, { "epoch": 2.1507559164485284, "grad_norm": 0.09417805820703506, "learning_rate": 9.68204410843837e-07, "loss": 0.0174, "step": 201300 }, { "epoch": 2.150862759762808, "grad_norm": 5.978053092956543, "learning_rate": 9.681985149735013e-07, "loss": 0.0194, "step": 201310 }, { "epoch": 2.1509696030770873, "grad_norm": 0.09940841794013977, "learning_rate": 9.68192618574534e-07, "loss": 0.0395, "step": 201320 }, { "epoch": 2.151076446391367, "grad_norm": 6.902057647705078, "learning_rate": 9.681867216469419e-07, "loss": 0.0084, "step": 201330 }, { "epoch": 2.1511832897056467, "grad_norm": 3.743690252304077, "learning_rate": 9.681808241907317e-07, "loss": 0.0309, "step": 201340 }, { "epoch": 2.151290133019926, "grad_norm": 0.2240813970565796, "learning_rate": 9.6817492620591e-07, "loss": 0.0092, "step": 201350 }, { "epoch": 2.151396976334206, "grad_norm": 1.0459171533584595, "learning_rate": 9.681690276924834e-07, "loss": 0.0092, "step": 201360 }, { "epoch": 2.1515038196484855, "grad_norm": 0.43280714750289917, "learning_rate": 9.681631286504588e-07, "loss": 0.0241, "step": 201370 }, { "epoch": 2.151610662962765, "grad_norm": 0.15436296164989471, "learning_rate": 9.681572290798425e-07, "loss": 0.0171, "step": 201380 }, { "epoch": 2.151717506277045, "grad_norm": 8.530786514282227, "learning_rate": 9.681513289806415e-07, "loss": 0.0273, "step": 201390 }, { "epoch": 2.1518243495913243, "grad_norm": 0.0019056331366300583, "learning_rate": 9.681454283528622e-07, "loss": 0.0025, "step": 201400 }, { "epoch": 2.1519311929056038, "grad_norm": 0.9432648420333862, "learning_rate": 9.681395271965117e-07, "loss": 0.0114, "step": 201410 }, { "epoch": 2.1520380362198837, "grad_norm": 0.05108952894806862, "learning_rate": 9.68133625511596e-07, "loss": 0.0247, "step": 201420 }, { "epoch": 2.152144879534163, "grad_norm": 0.005814412608742714, "learning_rate": 9.681277232981221e-07, "loss": 0.0168, "step": 201430 }, { "epoch": 2.1522517228484426, "grad_norm": 0.007444374263286591, "learning_rate": 9.681218205560969e-07, "loss": 0.0039, "step": 201440 }, { "epoch": 2.1523585661627225, "grad_norm": 9.807991027832031, "learning_rate": 9.681159172855267e-07, "loss": 0.0467, "step": 201450 }, { "epoch": 2.152465409477002, "grad_norm": 0.0885695293545723, "learning_rate": 9.68110013486418e-07, "loss": 0.033, "step": 201460 }, { "epoch": 2.1525722527912814, "grad_norm": 0.671933650970459, "learning_rate": 9.681041091587783e-07, "loss": 0.0752, "step": 201470 }, { "epoch": 2.1526790961055613, "grad_norm": 2.164194107055664, "learning_rate": 9.680982043026135e-07, "loss": 0.048, "step": 201480 }, { "epoch": 2.1527859394198408, "grad_norm": 16.374879837036133, "learning_rate": 9.680922989179304e-07, "loss": 0.0363, "step": 201490 }, { "epoch": 2.15289278273412, "grad_norm": 9.57033634185791, "learning_rate": 9.680863930047357e-07, "loss": 0.0107, "step": 201500 }, { "epoch": 2.1529996260484, "grad_norm": 0.03500427305698395, "learning_rate": 9.680804865630366e-07, "loss": 0.0166, "step": 201510 }, { "epoch": 2.1531064693626796, "grad_norm": 0.4711797535419464, "learning_rate": 9.680745795928389e-07, "loss": 0.0383, "step": 201520 }, { "epoch": 2.153213312676959, "grad_norm": 1.0041437149047852, "learning_rate": 9.680686720941498e-07, "loss": 0.0385, "step": 201530 }, { "epoch": 2.153320155991239, "grad_norm": 0.08053600788116455, "learning_rate": 9.680627640669757e-07, "loss": 0.0498, "step": 201540 }, { "epoch": 2.1534269993055184, "grad_norm": 1.7664955854415894, "learning_rate": 9.680568555113235e-07, "loss": 0.0061, "step": 201550 }, { "epoch": 2.1535338426197983, "grad_norm": 1.9279834032058716, "learning_rate": 9.680509464271998e-07, "loss": 0.0248, "step": 201560 }, { "epoch": 2.1536406859340778, "grad_norm": 0.03404030203819275, "learning_rate": 9.680450368146113e-07, "loss": 0.0101, "step": 201570 }, { "epoch": 2.153747529248357, "grad_norm": 6.390783786773682, "learning_rate": 9.680391266735646e-07, "loss": 0.0444, "step": 201580 }, { "epoch": 2.1538543725626367, "grad_norm": 2.737797498703003, "learning_rate": 9.680332160040665e-07, "loss": 0.0325, "step": 201590 }, { "epoch": 2.1539612158769166, "grad_norm": 0.015859104692935944, "learning_rate": 9.680273048061234e-07, "loss": 0.0252, "step": 201600 }, { "epoch": 2.154068059191196, "grad_norm": 0.2883327007293701, "learning_rate": 9.68021393079742e-07, "loss": 0.0033, "step": 201610 }, { "epoch": 2.154174902505476, "grad_norm": 5.050470352172852, "learning_rate": 9.680154808249295e-07, "loss": 0.0255, "step": 201620 }, { "epoch": 2.1542817458197554, "grad_norm": 0.005785909481346607, "learning_rate": 9.680095680416919e-07, "loss": 0.0187, "step": 201630 }, { "epoch": 2.154388589134035, "grad_norm": 0.45764684677124023, "learning_rate": 9.680036547300365e-07, "loss": 0.0214, "step": 201640 }, { "epoch": 2.1544954324483143, "grad_norm": 1.5404943227767944, "learning_rate": 9.679977408899694e-07, "loss": 0.0141, "step": 201650 }, { "epoch": 2.154602275762594, "grad_norm": 0.3761979639530182, "learning_rate": 9.679918265214977e-07, "loss": 0.0638, "step": 201660 }, { "epoch": 2.1547091190768737, "grad_norm": 0.003979252651333809, "learning_rate": 9.679859116246276e-07, "loss": 0.0497, "step": 201670 }, { "epoch": 2.1548159623911536, "grad_norm": 8.754319190979004, "learning_rate": 9.679799961993665e-07, "loss": 0.0311, "step": 201680 }, { "epoch": 2.154922805705433, "grad_norm": 0.013388252817094326, "learning_rate": 9.679740802457203e-07, "loss": 0.0207, "step": 201690 }, { "epoch": 2.1550296490197125, "grad_norm": 2.9752066135406494, "learning_rate": 9.679681637636962e-07, "loss": 0.029, "step": 201700 }, { "epoch": 2.1551364923339924, "grad_norm": 0.008361243642866611, "learning_rate": 9.679622467533009e-07, "loss": 0.033, "step": 201710 }, { "epoch": 2.155243335648272, "grad_norm": 1.161012053489685, "learning_rate": 9.679563292145407e-07, "loss": 0.0101, "step": 201720 }, { "epoch": 2.1553501789625513, "grad_norm": 0.13396354019641876, "learning_rate": 9.679504111474224e-07, "loss": 0.0162, "step": 201730 }, { "epoch": 2.1554570222768312, "grad_norm": 9.025493621826172, "learning_rate": 9.679444925519528e-07, "loss": 0.0071, "step": 201740 }, { "epoch": 2.1555638655911107, "grad_norm": 1.7078876495361328, "learning_rate": 9.679385734281387e-07, "loss": 0.0046, "step": 201750 }, { "epoch": 2.15567070890539, "grad_norm": 2.1889617443084717, "learning_rate": 9.679326537759866e-07, "loss": 0.0101, "step": 201760 }, { "epoch": 2.15577755221967, "grad_norm": 0.5744286179542542, "learning_rate": 9.679267335955033e-07, "loss": 0.0071, "step": 201770 }, { "epoch": 2.1558843955339495, "grad_norm": 0.24932919442653656, "learning_rate": 9.67920812886695e-07, "loss": 0.0111, "step": 201780 }, { "epoch": 2.155991238848229, "grad_norm": 4.377585411071777, "learning_rate": 9.679148916495692e-07, "loss": 0.0315, "step": 201790 }, { "epoch": 2.156098082162509, "grad_norm": 0.04732663929462433, "learning_rate": 9.67908969884132e-07, "loss": 0.005, "step": 201800 }, { "epoch": 2.1562049254767883, "grad_norm": 1.4999550580978394, "learning_rate": 9.679030475903905e-07, "loss": 0.0057, "step": 201810 }, { "epoch": 2.156311768791068, "grad_norm": 0.4488964378833771, "learning_rate": 9.678971247683508e-07, "loss": 0.0057, "step": 201820 }, { "epoch": 2.1564186121053477, "grad_norm": 0.006049044895917177, "learning_rate": 9.678912014180201e-07, "loss": 0.0054, "step": 201830 }, { "epoch": 2.156525455419627, "grad_norm": 1.945737600326538, "learning_rate": 9.67885277539405e-07, "loss": 0.0076, "step": 201840 }, { "epoch": 2.1566322987339066, "grad_norm": 1.1839102506637573, "learning_rate": 9.67879353132512e-07, "loss": 0.0303, "step": 201850 }, { "epoch": 2.1567391420481865, "grad_norm": 0.5682085752487183, "learning_rate": 9.678734281973477e-07, "loss": 0.0839, "step": 201860 }, { "epoch": 2.156845985362466, "grad_norm": 0.226124107837677, "learning_rate": 9.678675027339193e-07, "loss": 0.0203, "step": 201870 }, { "epoch": 2.1569528286767454, "grad_norm": 1.491916537284851, "learning_rate": 9.678615767422332e-07, "loss": 0.0093, "step": 201880 }, { "epoch": 2.1570596719910253, "grad_norm": 2.5395984649658203, "learning_rate": 9.678556502222957e-07, "loss": 0.0089, "step": 201890 }, { "epoch": 2.157166515305305, "grad_norm": 0.3100202977657318, "learning_rate": 9.678497231741141e-07, "loss": 0.0246, "step": 201900 }, { "epoch": 2.1572733586195842, "grad_norm": 0.05636386573314667, "learning_rate": 9.678437955976948e-07, "loss": 0.0291, "step": 201910 }, { "epoch": 2.157380201933864, "grad_norm": 0.004775163717567921, "learning_rate": 9.678378674930447e-07, "loss": 0.0334, "step": 201920 }, { "epoch": 2.1574870452481436, "grad_norm": 5.916611194610596, "learning_rate": 9.678319388601703e-07, "loss": 0.0285, "step": 201930 }, { "epoch": 2.157593888562423, "grad_norm": 4.996068954467773, "learning_rate": 9.678260096990782e-07, "loss": 0.0491, "step": 201940 }, { "epoch": 2.157700731876703, "grad_norm": 0.3146972954273224, "learning_rate": 9.678200800097753e-07, "loss": 0.0245, "step": 201950 }, { "epoch": 2.1578075751909824, "grad_norm": 0.03444405272603035, "learning_rate": 9.678141497922683e-07, "loss": 0.0386, "step": 201960 }, { "epoch": 2.157914418505262, "grad_norm": 1.8998842239379883, "learning_rate": 9.678082190465637e-07, "loss": 0.0371, "step": 201970 }, { "epoch": 2.158021261819542, "grad_norm": 2.7866604328155518, "learning_rate": 9.678022877726683e-07, "loss": 0.0384, "step": 201980 }, { "epoch": 2.1581281051338213, "grad_norm": 0.5628986358642578, "learning_rate": 9.677963559705889e-07, "loss": 0.0297, "step": 201990 }, { "epoch": 2.1582349484481007, "grad_norm": 2.3741681575775146, "learning_rate": 9.67790423640332e-07, "loss": 0.0455, "step": 202000 }, { "epoch": 2.1583417917623806, "grad_norm": 4.151631832122803, "learning_rate": 9.677844907819045e-07, "loss": 0.0359, "step": 202010 }, { "epoch": 2.15844863507666, "grad_norm": 0.0703442320227623, "learning_rate": 9.67778557395313e-07, "loss": 0.0182, "step": 202020 }, { "epoch": 2.1585554783909395, "grad_norm": 1.9070415496826172, "learning_rate": 9.677726234805641e-07, "loss": 0.0101, "step": 202030 }, { "epoch": 2.1586623217052194, "grad_norm": 0.2559269666671753, "learning_rate": 9.677666890376649e-07, "loss": 0.0059, "step": 202040 }, { "epoch": 2.158769165019499, "grad_norm": 0.1584189236164093, "learning_rate": 9.677607540666215e-07, "loss": 0.0296, "step": 202050 }, { "epoch": 2.1588760083337784, "grad_norm": 13.698139190673828, "learning_rate": 9.67754818567441e-07, "loss": 0.016, "step": 202060 }, { "epoch": 2.1589828516480583, "grad_norm": 1.10794198513031, "learning_rate": 9.6774888254013e-07, "loss": 0.0029, "step": 202070 }, { "epoch": 2.1590896949623377, "grad_norm": 3.668867349624634, "learning_rate": 9.677429459846952e-07, "loss": 0.0287, "step": 202080 }, { "epoch": 2.159196538276617, "grad_norm": 0.007634392008185387, "learning_rate": 9.677370089011432e-07, "loss": 0.0337, "step": 202090 }, { "epoch": 2.159303381590897, "grad_norm": 4.918166637420654, "learning_rate": 9.67731071289481e-07, "loss": 0.0113, "step": 202100 }, { "epoch": 2.1594102249051765, "grad_norm": 1.3860663175582886, "learning_rate": 9.677251331497148e-07, "loss": 0.0255, "step": 202110 }, { "epoch": 2.159517068219456, "grad_norm": 0.19134272634983063, "learning_rate": 9.67719194481852e-07, "loss": 0.0047, "step": 202120 }, { "epoch": 2.159623911533736, "grad_norm": 3.134423017501831, "learning_rate": 9.677132552858988e-07, "loss": 0.0133, "step": 202130 }, { "epoch": 2.1597307548480154, "grad_norm": 0.18792352080345154, "learning_rate": 9.67707315561862e-07, "loss": 0.0109, "step": 202140 }, { "epoch": 2.159837598162295, "grad_norm": 6.290213584899902, "learning_rate": 9.677013753097486e-07, "loss": 0.06, "step": 202150 }, { "epoch": 2.1599444414765747, "grad_norm": 0.03744719922542572, "learning_rate": 9.676954345295646e-07, "loss": 0.0189, "step": 202160 }, { "epoch": 2.160051284790854, "grad_norm": 0.9874456524848938, "learning_rate": 9.676894932213176e-07, "loss": 0.0047, "step": 202170 }, { "epoch": 2.1601581281051336, "grad_norm": 6.247106552124023, "learning_rate": 9.676835513850135e-07, "loss": 0.0397, "step": 202180 }, { "epoch": 2.1602649714194135, "grad_norm": 3.1896562576293945, "learning_rate": 9.676776090206596e-07, "loss": 0.0278, "step": 202190 }, { "epoch": 2.160371814733693, "grad_norm": 0.18508653342723846, "learning_rate": 9.676716661282622e-07, "loss": 0.1013, "step": 202200 }, { "epoch": 2.1604786580479725, "grad_norm": 7.687909126281738, "learning_rate": 9.676657227078284e-07, "loss": 0.0352, "step": 202210 }, { "epoch": 2.1605855013622524, "grad_norm": 4.497132778167725, "learning_rate": 9.676597787593645e-07, "loss": 0.0276, "step": 202220 }, { "epoch": 2.160692344676532, "grad_norm": 0.00501038134098053, "learning_rate": 9.676538342828776e-07, "loss": 0.0241, "step": 202230 }, { "epoch": 2.1607991879908113, "grad_norm": 0.28287824988365173, "learning_rate": 9.676478892783742e-07, "loss": 0.0122, "step": 202240 }, { "epoch": 2.160906031305091, "grad_norm": 10.59033203125, "learning_rate": 9.676419437458609e-07, "loss": 0.0435, "step": 202250 }, { "epoch": 2.1610128746193706, "grad_norm": 0.5297582745552063, "learning_rate": 9.676359976853446e-07, "loss": 0.007, "step": 202260 }, { "epoch": 2.16111971793365, "grad_norm": 1.4715543985366821, "learning_rate": 9.67630051096832e-07, "loss": 0.0259, "step": 202270 }, { "epoch": 2.16122656124793, "grad_norm": 4.993304252624512, "learning_rate": 9.676241039803298e-07, "loss": 0.0098, "step": 202280 }, { "epoch": 2.1613334045622095, "grad_norm": 0.02436385303735733, "learning_rate": 9.676181563358446e-07, "loss": 0.0221, "step": 202290 }, { "epoch": 2.161440247876489, "grad_norm": 5.908884525299072, "learning_rate": 9.676122081633832e-07, "loss": 0.0115, "step": 202300 }, { "epoch": 2.161547091190769, "grad_norm": 2.308619976043701, "learning_rate": 9.676062594629526e-07, "loss": 0.0179, "step": 202310 }, { "epoch": 2.1616539345050483, "grad_norm": 0.35666853189468384, "learning_rate": 9.67600310234559e-07, "loss": 0.0112, "step": 202320 }, { "epoch": 2.161760777819328, "grad_norm": 0.7296382188796997, "learning_rate": 9.675943604782093e-07, "loss": 0.0195, "step": 202330 }, { "epoch": 2.1618676211336076, "grad_norm": 0.02444591373205185, "learning_rate": 9.675884101939106e-07, "loss": 0.0278, "step": 202340 }, { "epoch": 2.161974464447887, "grad_norm": 0.03070937842130661, "learning_rate": 9.675824593816689e-07, "loss": 0.0165, "step": 202350 }, { "epoch": 2.1620813077621666, "grad_norm": 0.5289450883865356, "learning_rate": 9.675765080414915e-07, "loss": 0.0095, "step": 202360 }, { "epoch": 2.1621881510764465, "grad_norm": 0.9655237793922424, "learning_rate": 9.675705561733849e-07, "loss": 0.0327, "step": 202370 }, { "epoch": 2.162294994390726, "grad_norm": 5.430336952209473, "learning_rate": 9.67564603777356e-07, "loss": 0.0329, "step": 202380 }, { "epoch": 2.162401837705006, "grad_norm": 3.5816774368286133, "learning_rate": 9.67558650853411e-07, "loss": 0.0552, "step": 202390 }, { "epoch": 2.1625086810192853, "grad_norm": 0.3011612296104431, "learning_rate": 9.675526974015572e-07, "loss": 0.0102, "step": 202400 }, { "epoch": 2.1626155243335647, "grad_norm": 0.06257861852645874, "learning_rate": 9.675467434218013e-07, "loss": 0.0105, "step": 202410 }, { "epoch": 2.162722367647844, "grad_norm": 0.014810357242822647, "learning_rate": 9.675407889141497e-07, "loss": 0.0109, "step": 202420 }, { "epoch": 2.162829210962124, "grad_norm": 1.4393243789672852, "learning_rate": 9.675348338786092e-07, "loss": 0.0121, "step": 202430 }, { "epoch": 2.1629360542764036, "grad_norm": 0.7110271453857422, "learning_rate": 9.675288783151867e-07, "loss": 0.0123, "step": 202440 }, { "epoch": 2.1630428975906835, "grad_norm": 2.049834728240967, "learning_rate": 9.675229222238888e-07, "loss": 0.0362, "step": 202450 }, { "epoch": 2.163149740904963, "grad_norm": 0.009597101248800755, "learning_rate": 9.67516965604722e-07, "loss": 0.0192, "step": 202460 }, { "epoch": 2.1632565842192424, "grad_norm": 0.05401695519685745, "learning_rate": 9.675110084576936e-07, "loss": 0.0121, "step": 202470 }, { "epoch": 2.1633634275335223, "grad_norm": 0.7854247689247131, "learning_rate": 9.675050507828098e-07, "loss": 0.0232, "step": 202480 }, { "epoch": 2.1634702708478017, "grad_norm": 0.6451717615127563, "learning_rate": 9.674990925800775e-07, "loss": 0.0139, "step": 202490 }, { "epoch": 2.163577114162081, "grad_norm": 0.4357949495315552, "learning_rate": 9.674931338495036e-07, "loss": 0.0248, "step": 202500 }, { "epoch": 2.163683957476361, "grad_norm": 2.919635057449341, "learning_rate": 9.674871745910944e-07, "loss": 0.0508, "step": 202510 }, { "epoch": 2.1637908007906406, "grad_norm": 0.02113940380513668, "learning_rate": 9.674812148048572e-07, "loss": 0.0203, "step": 202520 }, { "epoch": 2.16389764410492, "grad_norm": 7.885838985443115, "learning_rate": 9.674752544907983e-07, "loss": 0.0187, "step": 202530 }, { "epoch": 2.1640044874192, "grad_norm": 0.36138486862182617, "learning_rate": 9.674692936489244e-07, "loss": 0.0314, "step": 202540 }, { "epoch": 2.1641113307334794, "grad_norm": 4.726189136505127, "learning_rate": 9.674633322792426e-07, "loss": 0.04, "step": 202550 }, { "epoch": 2.164218174047759, "grad_norm": 9.259119033813477, "learning_rate": 9.674573703817592e-07, "loss": 0.0343, "step": 202560 }, { "epoch": 2.1643250173620387, "grad_norm": 0.36117225885391235, "learning_rate": 9.674514079564813e-07, "loss": 0.013, "step": 202570 }, { "epoch": 2.164431860676318, "grad_norm": 0.8413044810295105, "learning_rate": 9.674454450034155e-07, "loss": 0.0272, "step": 202580 }, { "epoch": 2.1645387039905977, "grad_norm": 0.21955636143684387, "learning_rate": 9.674394815225685e-07, "loss": 0.0636, "step": 202590 }, { "epoch": 2.1646455473048776, "grad_norm": 2.7413816452026367, "learning_rate": 9.674335175139472e-07, "loss": 0.0148, "step": 202600 }, { "epoch": 2.164752390619157, "grad_norm": 1.1936225891113281, "learning_rate": 9.674275529775578e-07, "loss": 0.0076, "step": 202610 }, { "epoch": 2.1648592339334365, "grad_norm": 6.463752269744873, "learning_rate": 9.674215879134076e-07, "loss": 0.0315, "step": 202620 }, { "epoch": 2.1649660772477164, "grad_norm": 0.04766058176755905, "learning_rate": 9.674156223215034e-07, "loss": 0.0082, "step": 202630 }, { "epoch": 2.165072920561996, "grad_norm": 7.877416133880615, "learning_rate": 9.674096562018513e-07, "loss": 0.0158, "step": 202640 }, { "epoch": 2.1651797638762753, "grad_norm": 2.8701059818267822, "learning_rate": 9.674036895544586e-07, "loss": 0.0082, "step": 202650 }, { "epoch": 2.165286607190555, "grad_norm": 5.293376445770264, "learning_rate": 9.673977223793319e-07, "loss": 0.0171, "step": 202660 }, { "epoch": 2.1653934505048347, "grad_norm": 0.06624692678451538, "learning_rate": 9.673917546764778e-07, "loss": 0.0294, "step": 202670 }, { "epoch": 2.165500293819114, "grad_norm": 0.037742480635643005, "learning_rate": 9.673857864459033e-07, "loss": 0.0169, "step": 202680 }, { "epoch": 2.165607137133394, "grad_norm": 14.107505798339844, "learning_rate": 9.673798176876148e-07, "loss": 0.0197, "step": 202690 }, { "epoch": 2.1657139804476735, "grad_norm": 14.77293872833252, "learning_rate": 9.67373848401619e-07, "loss": 0.0429, "step": 202700 }, { "epoch": 2.165820823761953, "grad_norm": 2.846623182296753, "learning_rate": 9.673678785879232e-07, "loss": 0.0172, "step": 202710 }, { "epoch": 2.165927667076233, "grad_norm": 1.7568762302398682, "learning_rate": 9.673619082465335e-07, "loss": 0.0152, "step": 202720 }, { "epoch": 2.1660345103905123, "grad_norm": 11.055209159851074, "learning_rate": 9.673559373774573e-07, "loss": 0.0394, "step": 202730 }, { "epoch": 2.1661413537047918, "grad_norm": 1.7566105127334595, "learning_rate": 9.673499659807008e-07, "loss": 0.0193, "step": 202740 }, { "epoch": 2.1662481970190717, "grad_norm": 0.4259304106235504, "learning_rate": 9.67343994056271e-07, "loss": 0.0129, "step": 202750 }, { "epoch": 2.166355040333351, "grad_norm": 0.031443990767002106, "learning_rate": 9.673380216041744e-07, "loss": 0.005, "step": 202760 }, { "epoch": 2.1664618836476306, "grad_norm": 1.8372225761413574, "learning_rate": 9.67332048624418e-07, "loss": 0.0184, "step": 202770 }, { "epoch": 2.1665687269619105, "grad_norm": 0.0026253792457282543, "learning_rate": 9.673260751170084e-07, "loss": 0.0142, "step": 202780 }, { "epoch": 2.16667557027619, "grad_norm": 0.016989298164844513, "learning_rate": 9.673201010819523e-07, "loss": 0.0239, "step": 202790 }, { "epoch": 2.1667824135904694, "grad_norm": 1.2133045196533203, "learning_rate": 9.673141265192566e-07, "loss": 0.0114, "step": 202800 }, { "epoch": 2.1668892569047493, "grad_norm": 0.14043556153774261, "learning_rate": 9.67308151428928e-07, "loss": 0.0056, "step": 202810 }, { "epoch": 2.1669961002190288, "grad_norm": 0.6379592418670654, "learning_rate": 9.673021758109735e-07, "loss": 0.0472, "step": 202820 }, { "epoch": 2.1671029435333082, "grad_norm": 0.005686446558684111, "learning_rate": 9.672961996653991e-07, "loss": 0.0089, "step": 202830 }, { "epoch": 2.167209786847588, "grad_norm": 0.2188168466091156, "learning_rate": 9.672902229922122e-07, "loss": 0.0238, "step": 202840 }, { "epoch": 2.1673166301618676, "grad_norm": 0.008232681080698967, "learning_rate": 9.672842457914195e-07, "loss": 0.0109, "step": 202850 }, { "epoch": 2.167423473476147, "grad_norm": 2.6374828815460205, "learning_rate": 9.672782680630275e-07, "loss": 0.0147, "step": 202860 }, { "epoch": 2.167530316790427, "grad_norm": 0.060573235154151917, "learning_rate": 9.67272289807043e-07, "loss": 0.0181, "step": 202870 }, { "epoch": 2.1676371601047064, "grad_norm": 0.01827872358262539, "learning_rate": 9.67266311023473e-07, "loss": 0.0145, "step": 202880 }, { "epoch": 2.167744003418986, "grad_norm": 0.0013199789682403207, "learning_rate": 9.67260331712324e-07, "loss": 0.0153, "step": 202890 }, { "epoch": 2.167850846733266, "grad_norm": 3.04325532913208, "learning_rate": 9.67254351873603e-07, "loss": 0.0144, "step": 202900 }, { "epoch": 2.1679576900475452, "grad_norm": 0.0422050915658474, "learning_rate": 9.672483715073163e-07, "loss": 0.0245, "step": 202910 }, { "epoch": 2.1680645333618247, "grad_norm": 0.06721236556768417, "learning_rate": 9.67242390613471e-07, "loss": 0.0178, "step": 202920 }, { "epoch": 2.1681713766761046, "grad_norm": 0.006644582841545343, "learning_rate": 9.672364091920737e-07, "loss": 0.0092, "step": 202930 }, { "epoch": 2.168278219990384, "grad_norm": 1.396809458732605, "learning_rate": 9.672304272431313e-07, "loss": 0.0065, "step": 202940 }, { "epoch": 2.1683850633046635, "grad_norm": 11.846839904785156, "learning_rate": 9.672244447666506e-07, "loss": 0.0913, "step": 202950 }, { "epoch": 2.1684919066189434, "grad_norm": 1.4890649318695068, "learning_rate": 9.672184617626382e-07, "loss": 0.0017, "step": 202960 }, { "epoch": 2.168598749933223, "grad_norm": 2.8363163471221924, "learning_rate": 9.672124782311008e-07, "loss": 0.0403, "step": 202970 }, { "epoch": 2.1687055932475023, "grad_norm": 7.277448654174805, "learning_rate": 9.672064941720452e-07, "loss": 0.0167, "step": 202980 }, { "epoch": 2.1688124365617822, "grad_norm": 1.7199153900146484, "learning_rate": 9.672005095854784e-07, "loss": 0.0316, "step": 202990 }, { "epoch": 2.1689192798760617, "grad_norm": 0.5332976579666138, "learning_rate": 9.671945244714067e-07, "loss": 0.036, "step": 203000 }, { "epoch": 2.169026123190341, "grad_norm": 18.7060604095459, "learning_rate": 9.671885388298373e-07, "loss": 0.0299, "step": 203010 }, { "epoch": 2.169132966504621, "grad_norm": 0.5904439091682434, "learning_rate": 9.671825526607768e-07, "loss": 0.0121, "step": 203020 }, { "epoch": 2.1692398098189005, "grad_norm": 2.027812957763672, "learning_rate": 9.671765659642319e-07, "loss": 0.0298, "step": 203030 }, { "epoch": 2.1693466531331804, "grad_norm": 0.08369199186563492, "learning_rate": 9.671705787402094e-07, "loss": 0.014, "step": 203040 }, { "epoch": 2.16945349644746, "grad_norm": 1.5514308214187622, "learning_rate": 9.67164590988716e-07, "loss": 0.0129, "step": 203050 }, { "epoch": 2.1695603397617393, "grad_norm": 0.038170915096998215, "learning_rate": 9.671586027097585e-07, "loss": 0.0128, "step": 203060 }, { "epoch": 2.169667183076019, "grad_norm": 6.385754585266113, "learning_rate": 9.671526139033439e-07, "loss": 0.0636, "step": 203070 }, { "epoch": 2.1697740263902987, "grad_norm": 0.04492780566215515, "learning_rate": 9.671466245694784e-07, "loss": 0.009, "step": 203080 }, { "epoch": 2.169880869704578, "grad_norm": 0.04111868888139725, "learning_rate": 9.671406347081693e-07, "loss": 0.0204, "step": 203090 }, { "epoch": 2.169987713018858, "grad_norm": 0.12591487169265747, "learning_rate": 9.671346443194232e-07, "loss": 0.0012, "step": 203100 }, { "epoch": 2.1700945563331375, "grad_norm": 0.3128232955932617, "learning_rate": 9.671286534032469e-07, "loss": 0.0254, "step": 203110 }, { "epoch": 2.170201399647417, "grad_norm": 1.2043044567108154, "learning_rate": 9.671226619596468e-07, "loss": 0.0709, "step": 203120 }, { "epoch": 2.1703082429616964, "grad_norm": 5.9026970863342285, "learning_rate": 9.671166699886302e-07, "loss": 0.0212, "step": 203130 }, { "epoch": 2.1704150862759763, "grad_norm": 0.3489992022514343, "learning_rate": 9.671106774902035e-07, "loss": 0.0061, "step": 203140 }, { "epoch": 2.170521929590256, "grad_norm": 4.657905101776123, "learning_rate": 9.671046844643738e-07, "loss": 0.0495, "step": 203150 }, { "epoch": 2.1706287729045357, "grad_norm": 1.291644811630249, "learning_rate": 9.670986909111476e-07, "loss": 0.0241, "step": 203160 }, { "epoch": 2.170735616218815, "grad_norm": 1.1541283130645752, "learning_rate": 9.670926968305315e-07, "loss": 0.0111, "step": 203170 }, { "epoch": 2.1708424595330946, "grad_norm": 0.3175380229949951, "learning_rate": 9.670867022225326e-07, "loss": 0.0209, "step": 203180 }, { "epoch": 2.1709493028473745, "grad_norm": 0.20085005462169647, "learning_rate": 9.670807070871576e-07, "loss": 0.0221, "step": 203190 }, { "epoch": 2.171056146161654, "grad_norm": 1.8277175426483154, "learning_rate": 9.670747114244131e-07, "loss": 0.0263, "step": 203200 }, { "epoch": 2.1711629894759334, "grad_norm": 0.13326385617256165, "learning_rate": 9.670687152343062e-07, "loss": 0.0311, "step": 203210 }, { "epoch": 2.1712698327902133, "grad_norm": 4.865155220031738, "learning_rate": 9.670627185168433e-07, "loss": 0.0786, "step": 203220 }, { "epoch": 2.171376676104493, "grad_norm": 0.6080626845359802, "learning_rate": 9.670567212720315e-07, "loss": 0.0213, "step": 203230 }, { "epoch": 2.1714835194187723, "grad_norm": 2.1136634349823, "learning_rate": 9.670507234998774e-07, "loss": 0.028, "step": 203240 }, { "epoch": 2.171590362733052, "grad_norm": 0.15315909683704376, "learning_rate": 9.670447252003876e-07, "loss": 0.0069, "step": 203250 }, { "epoch": 2.1716972060473316, "grad_norm": 0.4197728931903839, "learning_rate": 9.670387263735694e-07, "loss": 0.042, "step": 203260 }, { "epoch": 2.171804049361611, "grad_norm": 0.05627797171473503, "learning_rate": 9.670327270194289e-07, "loss": 0.013, "step": 203270 }, { "epoch": 2.171910892675891, "grad_norm": 0.20802783966064453, "learning_rate": 9.670267271379733e-07, "loss": 0.0208, "step": 203280 }, { "epoch": 2.1720177359901705, "grad_norm": 1.3965948820114136, "learning_rate": 9.670207267292092e-07, "loss": 0.0135, "step": 203290 }, { "epoch": 2.17212457930445, "grad_norm": 2.995227813720703, "learning_rate": 9.670147257931436e-07, "loss": 0.0298, "step": 203300 }, { "epoch": 2.17223142261873, "grad_norm": 2.312278985977173, "learning_rate": 9.67008724329783e-07, "loss": 0.0099, "step": 203310 }, { "epoch": 2.1723382659330093, "grad_norm": 0.267485111951828, "learning_rate": 9.670027223391344e-07, "loss": 0.0138, "step": 203320 }, { "epoch": 2.1724451092472887, "grad_norm": 2.3769242763519287, "learning_rate": 9.669967198212047e-07, "loss": 0.0038, "step": 203330 }, { "epoch": 2.1725519525615686, "grad_norm": 0.5483447909355164, "learning_rate": 9.66990716776e-07, "loss": 0.0194, "step": 203340 }, { "epoch": 2.172658795875848, "grad_norm": 3.759084939956665, "learning_rate": 9.669847132035278e-07, "loss": 0.0092, "step": 203350 }, { "epoch": 2.1727656391901276, "grad_norm": 0.007117969449609518, "learning_rate": 9.669787091037943e-07, "loss": 0.0132, "step": 203360 }, { "epoch": 2.1728724825044075, "grad_norm": 2.018287181854248, "learning_rate": 9.66972704476807e-07, "loss": 0.0694, "step": 203370 }, { "epoch": 2.172979325818687, "grad_norm": 0.03700920566916466, "learning_rate": 9.669666993225722e-07, "loss": 0.0074, "step": 203380 }, { "epoch": 2.1730861691329664, "grad_norm": 0.01029734406620264, "learning_rate": 9.669606936410966e-07, "loss": 0.0378, "step": 203390 }, { "epoch": 2.1731930124472463, "grad_norm": 17.470413208007812, "learning_rate": 9.669546874323873e-07, "loss": 0.0262, "step": 203400 }, { "epoch": 2.1732998557615257, "grad_norm": 0.1567293107509613, "learning_rate": 9.669486806964508e-07, "loss": 0.011, "step": 203410 }, { "epoch": 2.173406699075805, "grad_norm": 5.598500728607178, "learning_rate": 9.669426734332939e-07, "loss": 0.0543, "step": 203420 }, { "epoch": 2.173513542390085, "grad_norm": 0.6758747100830078, "learning_rate": 9.669366656429236e-07, "loss": 0.0132, "step": 203430 }, { "epoch": 2.1736203857043646, "grad_norm": 4.843995094299316, "learning_rate": 9.669306573253466e-07, "loss": 0.0226, "step": 203440 }, { "epoch": 2.173727229018644, "grad_norm": 0.06714886426925659, "learning_rate": 9.669246484805695e-07, "loss": 0.0443, "step": 203450 }, { "epoch": 2.173834072332924, "grad_norm": 5.698840141296387, "learning_rate": 9.669186391085995e-07, "loss": 0.0592, "step": 203460 }, { "epoch": 2.1739409156472034, "grad_norm": 0.7462876439094543, "learning_rate": 9.669126292094428e-07, "loss": 0.0092, "step": 203470 }, { "epoch": 2.174047758961483, "grad_norm": 0.5390975475311279, "learning_rate": 9.669066187831068e-07, "loss": 0.0516, "step": 203480 }, { "epoch": 2.1741546022757627, "grad_norm": 11.455648422241211, "learning_rate": 9.669006078295978e-07, "loss": 0.029, "step": 203490 }, { "epoch": 2.174261445590042, "grad_norm": 0.924746572971344, "learning_rate": 9.668945963489228e-07, "loss": 0.0204, "step": 203500 }, { "epoch": 2.1743682889043217, "grad_norm": 0.029058154672384262, "learning_rate": 9.668885843410886e-07, "loss": 0.0117, "step": 203510 }, { "epoch": 2.1744751322186016, "grad_norm": 2.875042200088501, "learning_rate": 9.66882571806102e-07, "loss": 0.0113, "step": 203520 }, { "epoch": 2.174581975532881, "grad_norm": 0.04025736451148987, "learning_rate": 9.668765587439695e-07, "loss": 0.0209, "step": 203530 }, { "epoch": 2.1746888188471605, "grad_norm": 2.556925058364868, "learning_rate": 9.668705451546985e-07, "loss": 0.0244, "step": 203540 }, { "epoch": 2.1747956621614404, "grad_norm": 3.128833770751953, "learning_rate": 9.668645310382952e-07, "loss": 0.0097, "step": 203550 }, { "epoch": 2.17490250547572, "grad_norm": 0.5269479751586914, "learning_rate": 9.668585163947666e-07, "loss": 0.0131, "step": 203560 }, { "epoch": 2.1750093487899993, "grad_norm": 0.05011589452624321, "learning_rate": 9.668525012241196e-07, "loss": 0.012, "step": 203570 }, { "epoch": 2.175116192104279, "grad_norm": 11.951075553894043, "learning_rate": 9.668464855263606e-07, "loss": 0.0241, "step": 203580 }, { "epoch": 2.1752230354185587, "grad_norm": 1.0842574834823608, "learning_rate": 9.66840469301497e-07, "loss": 0.0103, "step": 203590 }, { "epoch": 2.175329878732838, "grad_norm": 0.8469862341880798, "learning_rate": 9.668344525495353e-07, "loss": 0.0121, "step": 203600 }, { "epoch": 2.175436722047118, "grad_norm": 0.24082495272159576, "learning_rate": 9.66828435270482e-07, "loss": 0.0139, "step": 203610 }, { "epoch": 2.1755435653613975, "grad_norm": 10.670576095581055, "learning_rate": 9.668224174643444e-07, "loss": 0.0068, "step": 203620 }, { "epoch": 2.175650408675677, "grad_norm": 0.3339749276638031, "learning_rate": 9.668163991311288e-07, "loss": 0.0154, "step": 203630 }, { "epoch": 2.175757251989957, "grad_norm": 8.453226089477539, "learning_rate": 9.668103802708426e-07, "loss": 0.0101, "step": 203640 }, { "epoch": 2.1758640953042363, "grad_norm": 3.443345308303833, "learning_rate": 9.668043608834921e-07, "loss": 0.016, "step": 203650 }, { "epoch": 2.1759709386185158, "grad_norm": 0.17327651381492615, "learning_rate": 9.667983409690842e-07, "loss": 0.041, "step": 203660 }, { "epoch": 2.1760777819327957, "grad_norm": 0.04011811688542366, "learning_rate": 9.667923205276258e-07, "loss": 0.0288, "step": 203670 }, { "epoch": 2.176184625247075, "grad_norm": 0.05967625975608826, "learning_rate": 9.667862995591234e-07, "loss": 0.0085, "step": 203680 }, { "epoch": 2.1762914685613546, "grad_norm": 1.2565248012542725, "learning_rate": 9.667802780635843e-07, "loss": 0.0268, "step": 203690 }, { "epoch": 2.1763983118756345, "grad_norm": 0.12929615378379822, "learning_rate": 9.66774256041015e-07, "loss": 0.0157, "step": 203700 }, { "epoch": 2.176505155189914, "grad_norm": 0.1111375093460083, "learning_rate": 9.667682334914223e-07, "loss": 0.0412, "step": 203710 }, { "epoch": 2.1766119985041934, "grad_norm": 1.779339075088501, "learning_rate": 9.667622104148131e-07, "loss": 0.0318, "step": 203720 }, { "epoch": 2.1767188418184733, "grad_norm": 3.908226490020752, "learning_rate": 9.66756186811194e-07, "loss": 0.0183, "step": 203730 }, { "epoch": 2.1768256851327528, "grad_norm": 0.04711160808801651, "learning_rate": 9.667501626805722e-07, "loss": 0.0321, "step": 203740 }, { "epoch": 2.1769325284470322, "grad_norm": 0.05076555907726288, "learning_rate": 9.66744138022954e-07, "loss": 0.01, "step": 203750 }, { "epoch": 2.177039371761312, "grad_norm": 0.006750790867954493, "learning_rate": 9.667381128383466e-07, "loss": 0.0162, "step": 203760 }, { "epoch": 2.1771462150755916, "grad_norm": 1.0989545583724976, "learning_rate": 9.667320871267564e-07, "loss": 0.0267, "step": 203770 }, { "epoch": 2.177253058389871, "grad_norm": 0.19182856380939484, "learning_rate": 9.667260608881906e-07, "loss": 0.0304, "step": 203780 }, { "epoch": 2.177359901704151, "grad_norm": 0.05399809777736664, "learning_rate": 9.667200341226557e-07, "loss": 0.018, "step": 203790 }, { "epoch": 2.1774667450184304, "grad_norm": 0.06535276770591736, "learning_rate": 9.66714006830159e-07, "loss": 0.0653, "step": 203800 }, { "epoch": 2.1775735883327103, "grad_norm": 3.2968015670776367, "learning_rate": 9.667079790107067e-07, "loss": 0.0107, "step": 203810 }, { "epoch": 2.1776804316469898, "grad_norm": 0.014403137378394604, "learning_rate": 9.66701950664306e-07, "loss": 0.0597, "step": 203820 }, { "epoch": 2.1777872749612692, "grad_norm": 10.381800651550293, "learning_rate": 9.666959217909633e-07, "loss": 0.0318, "step": 203830 }, { "epoch": 2.1778941182755487, "grad_norm": 0.017940498888492584, "learning_rate": 9.66689892390686e-07, "loss": 0.0051, "step": 203840 }, { "epoch": 2.1780009615898286, "grad_norm": 0.21584977209568024, "learning_rate": 9.666838624634804e-07, "loss": 0.016, "step": 203850 }, { "epoch": 2.178107804904108, "grad_norm": 0.33771276473999023, "learning_rate": 9.666778320093536e-07, "loss": 0.0202, "step": 203860 }, { "epoch": 2.178214648218388, "grad_norm": 3.977731704711914, "learning_rate": 9.666718010283124e-07, "loss": 0.003, "step": 203870 }, { "epoch": 2.1783214915326674, "grad_norm": 6.168358325958252, "learning_rate": 9.666657695203633e-07, "loss": 0.0167, "step": 203880 }, { "epoch": 2.178428334846947, "grad_norm": 0.014830450527369976, "learning_rate": 9.666597374855135e-07, "loss": 0.0206, "step": 203890 }, { "epoch": 2.1785351781612263, "grad_norm": 0.06914016604423523, "learning_rate": 9.666537049237695e-07, "loss": 0.0631, "step": 203900 }, { "epoch": 2.1786420214755062, "grad_norm": 0.5693246126174927, "learning_rate": 9.666476718351383e-07, "loss": 0.0231, "step": 203910 }, { "epoch": 2.1787488647897857, "grad_norm": 0.0793742761015892, "learning_rate": 9.666416382196266e-07, "loss": 0.0286, "step": 203920 }, { "epoch": 2.1788557081040656, "grad_norm": 0.1544119268655777, "learning_rate": 9.666356040772413e-07, "loss": 0.0173, "step": 203930 }, { "epoch": 2.178962551418345, "grad_norm": 1.0351048707962036, "learning_rate": 9.666295694079893e-07, "loss": 0.0062, "step": 203940 }, { "epoch": 2.1790693947326245, "grad_norm": 14.587648391723633, "learning_rate": 9.66623534211877e-07, "loss": 0.0682, "step": 203950 }, { "epoch": 2.1791762380469044, "grad_norm": 0.020510278642177582, "learning_rate": 9.66617498488912e-07, "loss": 0.0175, "step": 203960 }, { "epoch": 2.179283081361184, "grad_norm": 5.560774326324463, "learning_rate": 9.666114622391003e-07, "loss": 0.0123, "step": 203970 }, { "epoch": 2.1793899246754633, "grad_norm": 0.06218360364437103, "learning_rate": 9.666054254624492e-07, "loss": 0.0135, "step": 203980 }, { "epoch": 2.1794967679897432, "grad_norm": 1.560194969177246, "learning_rate": 9.66599388158965e-07, "loss": 0.0485, "step": 203990 }, { "epoch": 2.1796036113040227, "grad_norm": 2.091587543487549, "learning_rate": 9.665933503286551e-07, "loss": 0.0325, "step": 204000 }, { "epoch": 2.179710454618302, "grad_norm": 0.5802640914916992, "learning_rate": 9.665873119715263e-07, "loss": 0.0337, "step": 204010 }, { "epoch": 2.179817297932582, "grad_norm": 0.771645724773407, "learning_rate": 9.66581273087585e-07, "loss": 0.0758, "step": 204020 }, { "epoch": 2.1799241412468615, "grad_norm": 1.1846835613250732, "learning_rate": 9.665752336768382e-07, "loss": 0.0099, "step": 204030 }, { "epoch": 2.180030984561141, "grad_norm": 3.109971523284912, "learning_rate": 9.665691937392928e-07, "loss": 0.0181, "step": 204040 }, { "epoch": 2.180137827875421, "grad_norm": 0.3775724768638611, "learning_rate": 9.665631532749556e-07, "loss": 0.0162, "step": 204050 }, { "epoch": 2.1802446711897003, "grad_norm": 0.01447288691997528, "learning_rate": 9.665571122838334e-07, "loss": 0.0163, "step": 204060 }, { "epoch": 2.18035151450398, "grad_norm": 8.896489143371582, "learning_rate": 9.66551070765933e-07, "loss": 0.0509, "step": 204070 }, { "epoch": 2.1804583578182597, "grad_norm": 0.05788833647966385, "learning_rate": 9.66545028721261e-07, "loss": 0.0058, "step": 204080 }, { "epoch": 2.180565201132539, "grad_norm": 0.013971090316772461, "learning_rate": 9.665389861498248e-07, "loss": 0.0064, "step": 204090 }, { "epoch": 2.1806720444468186, "grad_norm": 1.7343591451644897, "learning_rate": 9.665329430516306e-07, "loss": 0.0881, "step": 204100 }, { "epoch": 2.1807788877610985, "grad_norm": 6.518662452697754, "learning_rate": 9.665268994266857e-07, "loss": 0.0183, "step": 204110 }, { "epoch": 2.180885731075378, "grad_norm": 0.020203394815325737, "learning_rate": 9.665208552749968e-07, "loss": 0.0388, "step": 204120 }, { "epoch": 2.1809925743896574, "grad_norm": 4.094673156738281, "learning_rate": 9.665148105965703e-07, "loss": 0.0323, "step": 204130 }, { "epoch": 2.1810994177039373, "grad_norm": 4.139031887054443, "learning_rate": 9.665087653914136e-07, "loss": 0.0092, "step": 204140 }, { "epoch": 2.181206261018217, "grad_norm": 0.9892898797988892, "learning_rate": 9.665027196595333e-07, "loss": 0.007, "step": 204150 }, { "epoch": 2.1813131043324963, "grad_norm": 3.550312042236328, "learning_rate": 9.664966734009361e-07, "loss": 0.061, "step": 204160 }, { "epoch": 2.181419947646776, "grad_norm": 0.08673392236232758, "learning_rate": 9.66490626615629e-07, "loss": 0.0073, "step": 204170 }, { "epoch": 2.1815267909610556, "grad_norm": 0.12748681008815765, "learning_rate": 9.664845793036188e-07, "loss": 0.0145, "step": 204180 }, { "epoch": 2.181633634275335, "grad_norm": 0.04610591381788254, "learning_rate": 9.664785314649124e-07, "loss": 0.0389, "step": 204190 }, { "epoch": 2.181740477589615, "grad_norm": 0.7449523210525513, "learning_rate": 9.664724830995164e-07, "loss": 0.0093, "step": 204200 }, { "epoch": 2.1818473209038944, "grad_norm": 4.525869369506836, "learning_rate": 9.664664342074377e-07, "loss": 0.0073, "step": 204210 }, { "epoch": 2.181954164218174, "grad_norm": 0.053314320743083954, "learning_rate": 9.664603847886832e-07, "loss": 0.0528, "step": 204220 }, { "epoch": 2.182061007532454, "grad_norm": 2.7480461597442627, "learning_rate": 9.6645433484326e-07, "loss": 0.0109, "step": 204230 }, { "epoch": 2.1821678508467333, "grad_norm": 1.3833808898925781, "learning_rate": 9.664482843711743e-07, "loss": 0.0113, "step": 204240 }, { "epoch": 2.1822746941610127, "grad_norm": 0.19107453525066376, "learning_rate": 9.664422333724334e-07, "loss": 0.0132, "step": 204250 }, { "epoch": 2.1823815374752926, "grad_norm": 0.002469993196427822, "learning_rate": 9.664361818470442e-07, "loss": 0.0252, "step": 204260 }, { "epoch": 2.182488380789572, "grad_norm": 0.03007636032998562, "learning_rate": 9.66430129795013e-07, "loss": 0.0374, "step": 204270 }, { "epoch": 2.1825952241038515, "grad_norm": 0.4332156181335449, "learning_rate": 9.664240772163472e-07, "loss": 0.0109, "step": 204280 }, { "epoch": 2.1827020674181314, "grad_norm": 3.812575340270996, "learning_rate": 9.664180241110534e-07, "loss": 0.0152, "step": 204290 }, { "epoch": 2.182808910732411, "grad_norm": 2.9117090702056885, "learning_rate": 9.664119704791386e-07, "loss": 0.0221, "step": 204300 }, { "epoch": 2.1829157540466904, "grad_norm": 7.864366054534912, "learning_rate": 9.664059163206092e-07, "loss": 0.0222, "step": 204310 }, { "epoch": 2.1830225973609703, "grad_norm": 2.684561252593994, "learning_rate": 9.663998616354725e-07, "loss": 0.011, "step": 204320 }, { "epoch": 2.1831294406752497, "grad_norm": 2.3214173316955566, "learning_rate": 9.66393806423735e-07, "loss": 0.0164, "step": 204330 }, { "epoch": 2.183236283989529, "grad_norm": 1.662083625793457, "learning_rate": 9.663877506854038e-07, "loss": 0.0186, "step": 204340 }, { "epoch": 2.183343127303809, "grad_norm": 0.08824532479047775, "learning_rate": 9.663816944204856e-07, "loss": 0.0548, "step": 204350 }, { "epoch": 2.1834499706180885, "grad_norm": 0.10108719021081924, "learning_rate": 9.663756376289874e-07, "loss": 0.0311, "step": 204360 }, { "epoch": 2.183556813932368, "grad_norm": 0.10891927778720856, "learning_rate": 9.663695803109157e-07, "loss": 0.0819, "step": 204370 }, { "epoch": 2.183663657246648, "grad_norm": 0.8474712371826172, "learning_rate": 9.663635224662777e-07, "loss": 0.011, "step": 204380 }, { "epoch": 2.1837705005609274, "grad_norm": 10.111412048339844, "learning_rate": 9.6635746409508e-07, "loss": 0.0164, "step": 204390 }, { "epoch": 2.183877343875207, "grad_norm": 1.077014684677124, "learning_rate": 9.663514051973294e-07, "loss": 0.0212, "step": 204400 }, { "epoch": 2.1839841871894867, "grad_norm": 2.629082441329956, "learning_rate": 9.66345345773033e-07, "loss": 0.0331, "step": 204410 }, { "epoch": 2.184091030503766, "grad_norm": 1.0670796632766724, "learning_rate": 9.663392858221976e-07, "loss": 0.013, "step": 204420 }, { "epoch": 2.1841978738180456, "grad_norm": 0.1539374589920044, "learning_rate": 9.663332253448298e-07, "loss": 0.0031, "step": 204430 }, { "epoch": 2.1843047171323255, "grad_norm": 2.0961508750915527, "learning_rate": 9.663271643409367e-07, "loss": 0.0241, "step": 204440 }, { "epoch": 2.184411560446605, "grad_norm": 0.011120741255581379, "learning_rate": 9.663211028105248e-07, "loss": 0.0003, "step": 204450 }, { "epoch": 2.1845184037608845, "grad_norm": 7.654678821563721, "learning_rate": 9.663150407536015e-07, "loss": 0.0465, "step": 204460 }, { "epoch": 2.1846252470751644, "grad_norm": 3.8888819217681885, "learning_rate": 9.663089781701733e-07, "loss": 0.0169, "step": 204470 }, { "epoch": 2.184732090389444, "grad_norm": 0.011438916437327862, "learning_rate": 9.66302915060247e-07, "loss": 0.0035, "step": 204480 }, { "epoch": 2.1848389337037233, "grad_norm": 2.6654059886932373, "learning_rate": 9.662968514238295e-07, "loss": 0.0314, "step": 204490 }, { "epoch": 2.184945777018003, "grad_norm": 1.6150166988372803, "learning_rate": 9.662907872609275e-07, "loss": 0.0176, "step": 204500 }, { "epoch": 2.1850526203322826, "grad_norm": 4.23583459854126, "learning_rate": 9.662847225715482e-07, "loss": 0.0303, "step": 204510 }, { "epoch": 2.1851594636465625, "grad_norm": 0.20497551560401917, "learning_rate": 9.662786573556983e-07, "loss": 0.02, "step": 204520 }, { "epoch": 2.185266306960842, "grad_norm": 0.06259983777999878, "learning_rate": 9.662725916133846e-07, "loss": 0.0288, "step": 204530 }, { "epoch": 2.1853731502751215, "grad_norm": 8.537296295166016, "learning_rate": 9.662665253446136e-07, "loss": 0.053, "step": 204540 }, { "epoch": 2.185479993589401, "grad_norm": 0.4553370177745819, "learning_rate": 9.66260458549393e-07, "loss": 0.0116, "step": 204550 }, { "epoch": 2.185586836903681, "grad_norm": 5.182250022888184, "learning_rate": 9.662543912277288e-07, "loss": 0.0604, "step": 204560 }, { "epoch": 2.1856936802179603, "grad_norm": 10.841985702514648, "learning_rate": 9.662483233796283e-07, "loss": 0.0187, "step": 204570 }, { "epoch": 2.18580052353224, "grad_norm": 0.02220628224313259, "learning_rate": 9.662422550050983e-07, "loss": 0.0135, "step": 204580 }, { "epoch": 2.1859073668465197, "grad_norm": 15.354776382446289, "learning_rate": 9.662361861041456e-07, "loss": 0.0057, "step": 204590 }, { "epoch": 2.186014210160799, "grad_norm": 0.4841306805610657, "learning_rate": 9.66230116676777e-07, "loss": 0.0113, "step": 204600 }, { "epoch": 2.1861210534750786, "grad_norm": 2.3268957138061523, "learning_rate": 9.662240467229995e-07, "loss": 0.0077, "step": 204610 }, { "epoch": 2.1862278967893585, "grad_norm": 0.14289188385009766, "learning_rate": 9.662179762428197e-07, "loss": 0.0162, "step": 204620 }, { "epoch": 2.186334740103638, "grad_norm": 0.015142466872930527, "learning_rate": 9.66211905236245e-07, "loss": 0.0031, "step": 204630 }, { "epoch": 2.186441583417918, "grad_norm": 2.1016290187835693, "learning_rate": 9.662058337032815e-07, "loss": 0.0408, "step": 204640 }, { "epoch": 2.1865484267321973, "grad_norm": 0.019356800243258476, "learning_rate": 9.661997616439365e-07, "loss": 0.0116, "step": 204650 }, { "epoch": 2.1866552700464768, "grad_norm": 1.9956369400024414, "learning_rate": 9.661936890582168e-07, "loss": 0.0109, "step": 204660 }, { "epoch": 2.1867621133607567, "grad_norm": 0.12004072219133377, "learning_rate": 9.661876159461293e-07, "loss": 0.0156, "step": 204670 }, { "epoch": 2.186868956675036, "grad_norm": 1.225018858909607, "learning_rate": 9.661815423076806e-07, "loss": 0.0278, "step": 204680 }, { "epoch": 2.1869757999893156, "grad_norm": 0.16195110976696014, "learning_rate": 9.66175468142878e-07, "loss": 0.0076, "step": 204690 }, { "epoch": 2.1870826433035955, "grad_norm": 4.318630695343018, "learning_rate": 9.661693934517282e-07, "loss": 0.0288, "step": 204700 }, { "epoch": 2.187189486617875, "grad_norm": 0.009686402976512909, "learning_rate": 9.661633182342377e-07, "loss": 0.012, "step": 204710 }, { "epoch": 2.1872963299321544, "grad_norm": 0.032908741384744644, "learning_rate": 9.661572424904137e-07, "loss": 0.017, "step": 204720 }, { "epoch": 2.1874031732464343, "grad_norm": 0.027904406189918518, "learning_rate": 9.66151166220263e-07, "loss": 0.01, "step": 204730 }, { "epoch": 2.1875100165607138, "grad_norm": 6.273062229156494, "learning_rate": 9.661450894237923e-07, "loss": 0.0091, "step": 204740 }, { "epoch": 2.187616859874993, "grad_norm": 5.734523296356201, "learning_rate": 9.66139012101009e-07, "loss": 0.0096, "step": 204750 }, { "epoch": 2.187723703189273, "grad_norm": 3.828214168548584, "learning_rate": 9.661329342519192e-07, "loss": 0.0269, "step": 204760 }, { "epoch": 2.1878305465035526, "grad_norm": 4.975100040435791, "learning_rate": 9.661268558765303e-07, "loss": 0.0179, "step": 204770 }, { "epoch": 2.187937389817832, "grad_norm": 7.689139366149902, "learning_rate": 9.661207769748489e-07, "loss": 0.0038, "step": 204780 }, { "epoch": 2.188044233132112, "grad_norm": 0.010255217552185059, "learning_rate": 9.661146975468822e-07, "loss": 0.0221, "step": 204790 }, { "epoch": 2.1881510764463914, "grad_norm": 1.4301270246505737, "learning_rate": 9.661086175926367e-07, "loss": 0.0145, "step": 204800 }, { "epoch": 2.188257919760671, "grad_norm": 0.9313193559646606, "learning_rate": 9.661025371121194e-07, "loss": 0.0422, "step": 204810 }, { "epoch": 2.1883647630749508, "grad_norm": 0.0038841860368847847, "learning_rate": 9.66096456105337e-07, "loss": 0.0108, "step": 204820 }, { "epoch": 2.18847160638923, "grad_norm": 1.9999055862426758, "learning_rate": 9.660903745722967e-07, "loss": 0.0098, "step": 204830 }, { "epoch": 2.1885784497035097, "grad_norm": 17.299388885498047, "learning_rate": 9.660842925130052e-07, "loss": 0.0125, "step": 204840 }, { "epoch": 2.1886852930177896, "grad_norm": 0.008184300735592842, "learning_rate": 9.660782099274693e-07, "loss": 0.0124, "step": 204850 }, { "epoch": 2.188792136332069, "grad_norm": 1.7739444971084595, "learning_rate": 9.660721268156957e-07, "loss": 0.0278, "step": 204860 }, { "epoch": 2.1888989796463485, "grad_norm": 4.280880451202393, "learning_rate": 9.66066043177692e-07, "loss": 0.0341, "step": 204870 }, { "epoch": 2.1890058229606284, "grad_norm": 1.8001458644866943, "learning_rate": 9.66059959013464e-07, "loss": 0.0223, "step": 204880 }, { "epoch": 2.189112666274908, "grad_norm": 0.4063577353954315, "learning_rate": 9.660538743230195e-07, "loss": 0.0416, "step": 204890 }, { "epoch": 2.1892195095891873, "grad_norm": 0.810424268245697, "learning_rate": 9.66047789106365e-07, "loss": 0.0189, "step": 204900 }, { "epoch": 2.189326352903467, "grad_norm": 0.5429697036743164, "learning_rate": 9.660417033635073e-07, "loss": 0.0521, "step": 204910 }, { "epoch": 2.1894331962177467, "grad_norm": 0.055071812123060226, "learning_rate": 9.660356170944531e-07, "loss": 0.0405, "step": 204920 }, { "epoch": 2.189540039532026, "grad_norm": 0.19007200002670288, "learning_rate": 9.660295302992099e-07, "loss": 0.0324, "step": 204930 }, { "epoch": 2.189646882846306, "grad_norm": 0.025980761274695396, "learning_rate": 9.66023442977784e-07, "loss": 0.0617, "step": 204940 }, { "epoch": 2.1897537261605855, "grad_norm": 0.8510847687721252, "learning_rate": 9.660173551301825e-07, "loss": 0.0074, "step": 204950 }, { "epoch": 2.189860569474865, "grad_norm": 3.4262278079986572, "learning_rate": 9.660112667564122e-07, "loss": 0.0097, "step": 204960 }, { "epoch": 2.189967412789145, "grad_norm": 0.02328510954976082, "learning_rate": 9.6600517785648e-07, "loss": 0.0006, "step": 204970 }, { "epoch": 2.1900742561034243, "grad_norm": 8.20703125, "learning_rate": 9.659990884303927e-07, "loss": 0.0254, "step": 204980 }, { "epoch": 2.190181099417704, "grad_norm": 6.821194648742676, "learning_rate": 9.659929984781575e-07, "loss": 0.0174, "step": 204990 }, { "epoch": 2.1902879427319837, "grad_norm": 0.37849119305610657, "learning_rate": 9.65986907999781e-07, "loss": 0.0334, "step": 205000 }, { "epoch": 2.190394786046263, "grad_norm": 4.816410541534424, "learning_rate": 9.659808169952699e-07, "loss": 0.0176, "step": 205010 }, { "epoch": 2.1905016293605426, "grad_norm": 0.1877060830593109, "learning_rate": 9.659747254646315e-07, "loss": 0.0167, "step": 205020 }, { "epoch": 2.1906084726748225, "grad_norm": 1.0657061338424683, "learning_rate": 9.659686334078722e-07, "loss": 0.0264, "step": 205030 }, { "epoch": 2.190715315989102, "grad_norm": 0.2812320590019226, "learning_rate": 9.659625408249993e-07, "loss": 0.034, "step": 205040 }, { "epoch": 2.1908221593033814, "grad_norm": 20.822799682617188, "learning_rate": 9.659564477160196e-07, "loss": 0.0177, "step": 205050 }, { "epoch": 2.1909290026176613, "grad_norm": 0.681887149810791, "learning_rate": 9.6595035408094e-07, "loss": 0.1064, "step": 205060 }, { "epoch": 2.191035845931941, "grad_norm": 4.778679847717285, "learning_rate": 9.659442599197671e-07, "loss": 0.025, "step": 205070 }, { "epoch": 2.1911426892462202, "grad_norm": 7.632136344909668, "learning_rate": 9.65938165232508e-07, "loss": 0.0501, "step": 205080 }, { "epoch": 2.1912495325605, "grad_norm": 0.006438404321670532, "learning_rate": 9.659320700191694e-07, "loss": 0.0386, "step": 205090 }, { "epoch": 2.1913563758747796, "grad_norm": 0.20211265981197357, "learning_rate": 9.659259742797586e-07, "loss": 0.0167, "step": 205100 }, { "epoch": 2.191463219189059, "grad_norm": 3.773141384124756, "learning_rate": 9.65919878014282e-07, "loss": 0.0118, "step": 205110 }, { "epoch": 2.191570062503339, "grad_norm": 0.09566370397806168, "learning_rate": 9.659137812227466e-07, "loss": 0.0124, "step": 205120 }, { "epoch": 2.1916769058176184, "grad_norm": 3.517155885696411, "learning_rate": 9.659076839051597e-07, "loss": 0.0416, "step": 205130 }, { "epoch": 2.191783749131898, "grad_norm": 5.295639991760254, "learning_rate": 9.659015860615278e-07, "loss": 0.0353, "step": 205140 }, { "epoch": 2.191890592446178, "grad_norm": 5.984766960144043, "learning_rate": 9.658954876918578e-07, "loss": 0.0155, "step": 205150 }, { "epoch": 2.1919974357604572, "grad_norm": 9.76119327545166, "learning_rate": 9.658893887961565e-07, "loss": 0.0245, "step": 205160 }, { "epoch": 2.1921042790747367, "grad_norm": 0.4215441346168518, "learning_rate": 9.65883289374431e-07, "loss": 0.0198, "step": 205170 }, { "epoch": 2.1922111223890166, "grad_norm": 3.081841230392456, "learning_rate": 9.658771894266879e-07, "loss": 0.0025, "step": 205180 }, { "epoch": 2.192317965703296, "grad_norm": 2.0094916820526123, "learning_rate": 9.658710889529346e-07, "loss": 0.0126, "step": 205190 }, { "epoch": 2.1924248090175755, "grad_norm": 0.005980458110570908, "learning_rate": 9.658649879531777e-07, "loss": 0.0083, "step": 205200 }, { "epoch": 2.1925316523318554, "grad_norm": 1.4117131233215332, "learning_rate": 9.65858886427424e-07, "loss": 0.0126, "step": 205210 }, { "epoch": 2.192638495646135, "grad_norm": 0.04053819924592972, "learning_rate": 9.658527843756803e-07, "loss": 0.0227, "step": 205220 }, { "epoch": 2.1927453389604143, "grad_norm": 0.2812683582305908, "learning_rate": 9.658466817979537e-07, "loss": 0.0228, "step": 205230 }, { "epoch": 2.1928521822746943, "grad_norm": 2.241278886795044, "learning_rate": 9.65840578694251e-07, "loss": 0.023, "step": 205240 }, { "epoch": 2.1929590255889737, "grad_norm": 0.19129899144172668, "learning_rate": 9.658344750645792e-07, "loss": 0.0202, "step": 205250 }, { "epoch": 2.193065868903253, "grad_norm": 0.06440296769142151, "learning_rate": 9.658283709089453e-07, "loss": 0.0255, "step": 205260 }, { "epoch": 2.193172712217533, "grad_norm": 5.1885552406311035, "learning_rate": 9.658222662273557e-07, "loss": 0.0316, "step": 205270 }, { "epoch": 2.1932795555318125, "grad_norm": 0.3140719532966614, "learning_rate": 9.658161610198178e-07, "loss": 0.0094, "step": 205280 }, { "epoch": 2.1933863988460924, "grad_norm": 4.295383930206299, "learning_rate": 9.65810055286338e-07, "loss": 0.0089, "step": 205290 }, { "epoch": 2.193493242160372, "grad_norm": 2.1346068382263184, "learning_rate": 9.65803949026924e-07, "loss": 0.0258, "step": 205300 }, { "epoch": 2.1936000854746514, "grad_norm": 0.001314812689088285, "learning_rate": 9.65797842241582e-07, "loss": 0.0152, "step": 205310 }, { "epoch": 2.193706928788931, "grad_norm": 0.4225142300128937, "learning_rate": 9.657917349303187e-07, "loss": 0.0078, "step": 205320 }, { "epoch": 2.1938137721032107, "grad_norm": 2.0897936820983887, "learning_rate": 9.657856270931418e-07, "loss": 0.0085, "step": 205330 }, { "epoch": 2.19392061541749, "grad_norm": 0.1259153038263321, "learning_rate": 9.657795187300577e-07, "loss": 0.0138, "step": 205340 }, { "epoch": 2.19402745873177, "grad_norm": 3.3786251544952393, "learning_rate": 9.657734098410734e-07, "loss": 0.008, "step": 205350 }, { "epoch": 2.1941343020460495, "grad_norm": 0.07965221256017685, "learning_rate": 9.657673004261957e-07, "loss": 0.0388, "step": 205360 }, { "epoch": 2.194241145360329, "grad_norm": 0.1214897558093071, "learning_rate": 9.657611904854316e-07, "loss": 0.0035, "step": 205370 }, { "epoch": 2.1943479886746085, "grad_norm": 1.3090888261795044, "learning_rate": 9.657550800187879e-07, "loss": 0.0081, "step": 205380 }, { "epoch": 2.1944548319888884, "grad_norm": 5.991797924041748, "learning_rate": 9.657489690262714e-07, "loss": 0.0203, "step": 205390 }, { "epoch": 2.194561675303168, "grad_norm": 1.2255202531814575, "learning_rate": 9.657428575078895e-07, "loss": 0.0176, "step": 205400 }, { "epoch": 2.1946685186174477, "grad_norm": 0.03978894650936127, "learning_rate": 9.657367454636484e-07, "loss": 0.0371, "step": 205410 }, { "epoch": 2.194775361931727, "grad_norm": 0.0072929589077830315, "learning_rate": 9.657306328935557e-07, "loss": 0.0254, "step": 205420 }, { "epoch": 2.1948822052460066, "grad_norm": 8.475281715393066, "learning_rate": 9.65724519797618e-07, "loss": 0.0366, "step": 205430 }, { "epoch": 2.1949890485602865, "grad_norm": 1.4365055561065674, "learning_rate": 9.65718406175842e-07, "loss": 0.0068, "step": 205440 }, { "epoch": 2.195095891874566, "grad_norm": 0.10802216827869415, "learning_rate": 9.657122920282346e-07, "loss": 0.0119, "step": 205450 }, { "epoch": 2.1952027351888455, "grad_norm": 0.004854192491620779, "learning_rate": 9.65706177354803e-07, "loss": 0.0117, "step": 205460 }, { "epoch": 2.1953095785031254, "grad_norm": 0.053380925208330154, "learning_rate": 9.657000621555541e-07, "loss": 0.0517, "step": 205470 }, { "epoch": 2.195416421817405, "grad_norm": 0.2725308835506439, "learning_rate": 9.656939464304945e-07, "loss": 0.0123, "step": 205480 }, { "epoch": 2.1955232651316843, "grad_norm": 0.1418466717004776, "learning_rate": 9.656878301796314e-07, "loss": 0.0173, "step": 205490 }, { "epoch": 2.195630108445964, "grad_norm": 0.6257781982421875, "learning_rate": 9.656817134029717e-07, "loss": 0.0102, "step": 205500 }, { "epoch": 2.1957369517602436, "grad_norm": 0.1219504326581955, "learning_rate": 9.65675596100522e-07, "loss": 0.0057, "step": 205510 }, { "epoch": 2.195843795074523, "grad_norm": 8.870397567749023, "learning_rate": 9.656694782722895e-07, "loss": 0.0314, "step": 205520 }, { "epoch": 2.195950638388803, "grad_norm": 4.438493728637695, "learning_rate": 9.65663359918281e-07, "loss": 0.019, "step": 205530 }, { "epoch": 2.1960574817030825, "grad_norm": 1.0535725355148315, "learning_rate": 9.656572410385035e-07, "loss": 0.0398, "step": 205540 }, { "epoch": 2.196164325017362, "grad_norm": 0.6446707248687744, "learning_rate": 9.656511216329638e-07, "loss": 0.024, "step": 205550 }, { "epoch": 2.196271168331642, "grad_norm": 0.022366493940353394, "learning_rate": 9.656450017016685e-07, "loss": 0.0286, "step": 205560 }, { "epoch": 2.1963780116459213, "grad_norm": 3.972712755203247, "learning_rate": 9.656388812446253e-07, "loss": 0.0305, "step": 205570 }, { "epoch": 2.1964848549602007, "grad_norm": 0.2582326829433441, "learning_rate": 9.656327602618405e-07, "loss": 0.0154, "step": 205580 }, { "epoch": 2.1965916982744806, "grad_norm": 0.027233367785811424, "learning_rate": 9.656266387533212e-07, "loss": 0.0059, "step": 205590 }, { "epoch": 2.19669854158876, "grad_norm": 5.566064834594727, "learning_rate": 9.656205167190742e-07, "loss": 0.007, "step": 205600 }, { "epoch": 2.1968053849030396, "grad_norm": 0.08837717771530151, "learning_rate": 9.656143941591067e-07, "loss": 0.0072, "step": 205610 }, { "epoch": 2.1969122282173195, "grad_norm": 4.417654514312744, "learning_rate": 9.656082710734252e-07, "loss": 0.009, "step": 205620 }, { "epoch": 2.197019071531599, "grad_norm": 3.783963918685913, "learning_rate": 9.656021474620367e-07, "loss": 0.0232, "step": 205630 }, { "epoch": 2.1971259148458784, "grad_norm": 0.4821104407310486, "learning_rate": 9.655960233249485e-07, "loss": 0.018, "step": 205640 }, { "epoch": 2.1972327581601583, "grad_norm": 0.08751983940601349, "learning_rate": 9.65589898662167e-07, "loss": 0.0011, "step": 205650 }, { "epoch": 2.1973396014744377, "grad_norm": 0.004262574482709169, "learning_rate": 9.655837734736998e-07, "loss": 0.0094, "step": 205660 }, { "epoch": 2.197446444788717, "grad_norm": 1.515623688697815, "learning_rate": 9.65577647759553e-07, "loss": 0.0511, "step": 205670 }, { "epoch": 2.197553288102997, "grad_norm": 1.3749340772628784, "learning_rate": 9.65571521519734e-07, "loss": 0.0122, "step": 205680 }, { "epoch": 2.1976601314172766, "grad_norm": 3.8056342601776123, "learning_rate": 9.655653947542498e-07, "loss": 0.0183, "step": 205690 }, { "epoch": 2.197766974731556, "grad_norm": 10.055511474609375, "learning_rate": 9.655592674631069e-07, "loss": 0.0335, "step": 205700 }, { "epoch": 2.197873818045836, "grad_norm": 0.8592578172683716, "learning_rate": 9.655531396463123e-07, "loss": 0.0209, "step": 205710 }, { "epoch": 2.1979806613601154, "grad_norm": 0.5318590402603149, "learning_rate": 9.655470113038734e-07, "loss": 0.0353, "step": 205720 }, { "epoch": 2.198087504674395, "grad_norm": 4.188952922821045, "learning_rate": 9.655408824357967e-07, "loss": 0.0253, "step": 205730 }, { "epoch": 2.1981943479886747, "grad_norm": 0.02913578972220421, "learning_rate": 9.655347530420891e-07, "loss": 0.0017, "step": 205740 }, { "epoch": 2.198301191302954, "grad_norm": 0.10429098457098007, "learning_rate": 9.655286231227578e-07, "loss": 0.0569, "step": 205750 }, { "epoch": 2.1984080346172337, "grad_norm": 0.6439080834388733, "learning_rate": 9.655224926778093e-07, "loss": 0.021, "step": 205760 }, { "epoch": 2.1985148779315136, "grad_norm": 6.314185619354248, "learning_rate": 9.65516361707251e-07, "loss": 0.013, "step": 205770 }, { "epoch": 2.198621721245793, "grad_norm": 0.12136362493038177, "learning_rate": 9.655102302110892e-07, "loss": 0.0015, "step": 205780 }, { "epoch": 2.1987285645600725, "grad_norm": 0.34709399938583374, "learning_rate": 9.655040981893317e-07, "loss": 0.0208, "step": 205790 }, { "epoch": 2.1988354078743524, "grad_norm": 0.044128771871328354, "learning_rate": 9.654979656419847e-07, "loss": 0.0058, "step": 205800 }, { "epoch": 2.198942251188632, "grad_norm": 0.6367528438568115, "learning_rate": 9.654918325690555e-07, "loss": 0.0185, "step": 205810 }, { "epoch": 2.1990490945029113, "grad_norm": 0.2861098647117615, "learning_rate": 9.654856989705508e-07, "loss": 0.0043, "step": 205820 }, { "epoch": 2.199155937817191, "grad_norm": 0.20049388706684113, "learning_rate": 9.654795648464778e-07, "loss": 0.0113, "step": 205830 }, { "epoch": 2.1992627811314707, "grad_norm": 0.4364691972732544, "learning_rate": 9.65473430196843e-07, "loss": 0.014, "step": 205840 }, { "epoch": 2.19936962444575, "grad_norm": 0.033577460795640945, "learning_rate": 9.654672950216535e-07, "loss": 0.014, "step": 205850 }, { "epoch": 2.19947646776003, "grad_norm": 0.0073521020822227, "learning_rate": 9.654611593209166e-07, "loss": 0.0285, "step": 205860 }, { "epoch": 2.1995833110743095, "grad_norm": 0.2682701051235199, "learning_rate": 9.654550230946387e-07, "loss": 0.0206, "step": 205870 }, { "epoch": 2.199690154388589, "grad_norm": 0.591145932674408, "learning_rate": 9.65448886342827e-07, "loss": 0.0131, "step": 205880 }, { "epoch": 2.199796997702869, "grad_norm": 4.283697605133057, "learning_rate": 9.654427490654884e-07, "loss": 0.0285, "step": 205890 }, { "epoch": 2.1999038410171483, "grad_norm": 0.29845455288887024, "learning_rate": 9.654366112626298e-07, "loss": 0.0274, "step": 205900 }, { "epoch": 2.2000106843314278, "grad_norm": 2.1317501068115234, "learning_rate": 9.65430472934258e-07, "loss": 0.0676, "step": 205910 }, { "epoch": 2.2001175276457077, "grad_norm": 4.138710021972656, "learning_rate": 9.654243340803803e-07, "loss": 0.0218, "step": 205920 }, { "epoch": 2.200224370959987, "grad_norm": 2.560802459716797, "learning_rate": 9.654181947010032e-07, "loss": 0.0332, "step": 205930 }, { "epoch": 2.2003312142742666, "grad_norm": 6.67229700088501, "learning_rate": 9.654120547961339e-07, "loss": 0.0295, "step": 205940 }, { "epoch": 2.2004380575885465, "grad_norm": 0.016676779836416245, "learning_rate": 9.654059143657793e-07, "loss": 0.046, "step": 205950 }, { "epoch": 2.200544900902826, "grad_norm": 4.9180426597595215, "learning_rate": 9.653997734099464e-07, "loss": 0.0206, "step": 205960 }, { "epoch": 2.2006517442171054, "grad_norm": 0.45524731278419495, "learning_rate": 9.653936319286418e-07, "loss": 0.0257, "step": 205970 }, { "epoch": 2.2007585875313853, "grad_norm": 4.214721202850342, "learning_rate": 9.65387489921873e-07, "loss": 0.0086, "step": 205980 }, { "epoch": 2.2008654308456648, "grad_norm": 0.010768942534923553, "learning_rate": 9.65381347389646e-07, "loss": 0.0066, "step": 205990 }, { "epoch": 2.2009722741599447, "grad_norm": 0.01156021561473608, "learning_rate": 9.653752043319689e-07, "loss": 0.0041, "step": 206000 }, { "epoch": 2.201079117474224, "grad_norm": 0.02094375155866146, "learning_rate": 9.653690607488477e-07, "loss": 0.0289, "step": 206010 }, { "epoch": 2.2011859607885036, "grad_norm": 0.30961522459983826, "learning_rate": 9.6536291664029e-07, "loss": 0.0209, "step": 206020 }, { "epoch": 2.201292804102783, "grad_norm": 0.026096003130078316, "learning_rate": 9.653567720063023e-07, "loss": 0.0408, "step": 206030 }, { "epoch": 2.201399647417063, "grad_norm": 0.021784517914056778, "learning_rate": 9.653506268468919e-07, "loss": 0.0059, "step": 206040 }, { "epoch": 2.2015064907313424, "grad_norm": 1.2088373899459839, "learning_rate": 9.653444811620651e-07, "loss": 0.0226, "step": 206050 }, { "epoch": 2.2016133340456223, "grad_norm": 0.030337510630488396, "learning_rate": 9.653383349518296e-07, "loss": 0.004, "step": 206060 }, { "epoch": 2.2017201773599018, "grad_norm": 0.33544740080833435, "learning_rate": 9.65332188216192e-07, "loss": 0.0194, "step": 206070 }, { "epoch": 2.2018270206741812, "grad_norm": 0.624999463558197, "learning_rate": 9.653260409551591e-07, "loss": 0.0191, "step": 206080 }, { "epoch": 2.2019338639884607, "grad_norm": 0.5347193479537964, "learning_rate": 9.65319893168738e-07, "loss": 0.0083, "step": 206090 }, { "epoch": 2.2020407073027406, "grad_norm": 0.09870634227991104, "learning_rate": 9.653137448569356e-07, "loss": 0.0102, "step": 206100 }, { "epoch": 2.20214755061702, "grad_norm": 0.19019150733947754, "learning_rate": 9.65307596019759e-07, "loss": 0.0083, "step": 206110 }, { "epoch": 2.2022543939313, "grad_norm": 2.8868277072906494, "learning_rate": 9.653014466572148e-07, "loss": 0.0174, "step": 206120 }, { "epoch": 2.2023612372455794, "grad_norm": 0.4751929044723511, "learning_rate": 9.652952967693102e-07, "loss": 0.0313, "step": 206130 }, { "epoch": 2.202468080559859, "grad_norm": 2.1321489810943604, "learning_rate": 9.652891463560521e-07, "loss": 0.0608, "step": 206140 }, { "epoch": 2.202574923874139, "grad_norm": 0.4984939396381378, "learning_rate": 9.652829954174476e-07, "loss": 0.0124, "step": 206150 }, { "epoch": 2.2026817671884182, "grad_norm": 0.006221098825335503, "learning_rate": 9.652768439535034e-07, "loss": 0.0127, "step": 206160 }, { "epoch": 2.2027886105026977, "grad_norm": 7.281771659851074, "learning_rate": 9.652706919642265e-07, "loss": 0.1339, "step": 206170 }, { "epoch": 2.2028954538169776, "grad_norm": 1.3869662284851074, "learning_rate": 9.652645394496238e-07, "loss": 0.0189, "step": 206180 }, { "epoch": 2.203002297131257, "grad_norm": 18.322757720947266, "learning_rate": 9.652583864097026e-07, "loss": 0.0242, "step": 206190 }, { "epoch": 2.2031091404455365, "grad_norm": 0.048532236367464066, "learning_rate": 9.652522328444692e-07, "loss": 0.0219, "step": 206200 }, { "epoch": 2.2032159837598164, "grad_norm": 5.134415149688721, "learning_rate": 9.65246078753931e-07, "loss": 0.0275, "step": 206210 }, { "epoch": 2.203322827074096, "grad_norm": 0.03493592515587807, "learning_rate": 9.65239924138095e-07, "loss": 0.017, "step": 206220 }, { "epoch": 2.2034296703883753, "grad_norm": 3.685225009918213, "learning_rate": 9.65233768996968e-07, "loss": 0.038, "step": 206230 }, { "epoch": 2.2035365137026552, "grad_norm": 14.572596549987793, "learning_rate": 9.652276133305567e-07, "loss": 0.0516, "step": 206240 }, { "epoch": 2.2036433570169347, "grad_norm": 0.9515160918235779, "learning_rate": 9.652214571388686e-07, "loss": 0.0106, "step": 206250 }, { "epoch": 2.203750200331214, "grad_norm": 0.3071950674057007, "learning_rate": 9.652153004219104e-07, "loss": 0.0257, "step": 206260 }, { "epoch": 2.203857043645494, "grad_norm": 0.11097290366888046, "learning_rate": 9.652091431796888e-07, "loss": 0.0488, "step": 206270 }, { "epoch": 2.2039638869597735, "grad_norm": 0.6819820404052734, "learning_rate": 9.652029854122111e-07, "loss": 0.0082, "step": 206280 }, { "epoch": 2.204070730274053, "grad_norm": 15.804754257202148, "learning_rate": 9.65196827119484e-07, "loss": 0.0273, "step": 206290 }, { "epoch": 2.204177573588333, "grad_norm": 1.2229342460632324, "learning_rate": 9.651906683015148e-07, "loss": 0.0057, "step": 206300 }, { "epoch": 2.2042844169026123, "grad_norm": 0.10960877686738968, "learning_rate": 9.6518450895831e-07, "loss": 0.0097, "step": 206310 }, { "epoch": 2.204391260216892, "grad_norm": 0.015099316835403442, "learning_rate": 9.651783490898768e-07, "loss": 0.0063, "step": 206320 }, { "epoch": 2.2044981035311717, "grad_norm": 0.02252659760415554, "learning_rate": 9.651721886962223e-07, "loss": 0.032, "step": 206330 }, { "epoch": 2.204604946845451, "grad_norm": 0.21364659070968628, "learning_rate": 9.651660277773533e-07, "loss": 0.01, "step": 206340 }, { "epoch": 2.2047117901597306, "grad_norm": 0.1894702911376953, "learning_rate": 9.651598663332765e-07, "loss": 0.0155, "step": 206350 }, { "epoch": 2.2048186334740105, "grad_norm": 0.7773433923721313, "learning_rate": 9.651537043639991e-07, "loss": 0.0033, "step": 206360 }, { "epoch": 2.20492547678829, "grad_norm": 0.009521054103970528, "learning_rate": 9.651475418695283e-07, "loss": 0.0305, "step": 206370 }, { "epoch": 2.2050323201025694, "grad_norm": 0.06135273352265358, "learning_rate": 9.651413788498708e-07, "loss": 0.0093, "step": 206380 }, { "epoch": 2.2051391634168493, "grad_norm": 0.05318214371800423, "learning_rate": 9.651352153050333e-07, "loss": 0.0129, "step": 206390 }, { "epoch": 2.205246006731129, "grad_norm": 13.046764373779297, "learning_rate": 9.651290512350234e-07, "loss": 0.0609, "step": 206400 }, { "epoch": 2.2053528500454083, "grad_norm": 1.4904162883758545, "learning_rate": 9.651228866398475e-07, "loss": 0.0057, "step": 206410 }, { "epoch": 2.205459693359688, "grad_norm": 6.288191795349121, "learning_rate": 9.65116721519513e-07, "loss": 0.017, "step": 206420 }, { "epoch": 2.2055665366739676, "grad_norm": 0.5531454682350159, "learning_rate": 9.651105558740261e-07, "loss": 0.0613, "step": 206430 }, { "epoch": 2.205673379988247, "grad_norm": 1.6683439016342163, "learning_rate": 9.651043897033946e-07, "loss": 0.0079, "step": 206440 }, { "epoch": 2.205780223302527, "grad_norm": 0.015922795981168747, "learning_rate": 9.650982230076253e-07, "loss": 0.017, "step": 206450 }, { "epoch": 2.2058870666168064, "grad_norm": 0.9944292902946472, "learning_rate": 9.650920557867248e-07, "loss": 0.005, "step": 206460 }, { "epoch": 2.205993909931086, "grad_norm": 0.04428086429834366, "learning_rate": 9.650858880407003e-07, "loss": 0.0273, "step": 206470 }, { "epoch": 2.206100753245366, "grad_norm": 1.6832342147827148, "learning_rate": 9.650797197695585e-07, "loss": 0.006, "step": 206480 }, { "epoch": 2.2062075965596453, "grad_norm": 0.005065307021141052, "learning_rate": 9.65073550973307e-07, "loss": 0.0146, "step": 206490 }, { "epoch": 2.2063144398739247, "grad_norm": 0.1811951845884323, "learning_rate": 9.650673816519522e-07, "loss": 0.0311, "step": 206500 }, { "epoch": 2.2064212831882046, "grad_norm": 9.29746150970459, "learning_rate": 9.650612118055011e-07, "loss": 0.0569, "step": 206510 }, { "epoch": 2.206528126502484, "grad_norm": 0.5406718254089355, "learning_rate": 9.65055041433961e-07, "loss": 0.0119, "step": 206520 }, { "epoch": 2.2066349698167635, "grad_norm": 6.161470890045166, "learning_rate": 9.650488705373385e-07, "loss": 0.0121, "step": 206530 }, { "epoch": 2.2067418131310435, "grad_norm": 5.742910861968994, "learning_rate": 9.650426991156408e-07, "loss": 0.0219, "step": 206540 }, { "epoch": 2.206848656445323, "grad_norm": 0.1705969125032425, "learning_rate": 9.650365271688748e-07, "loss": 0.0062, "step": 206550 }, { "epoch": 2.2069554997596024, "grad_norm": 1.948228359222412, "learning_rate": 9.650303546970472e-07, "loss": 0.0497, "step": 206560 }, { "epoch": 2.2070623430738823, "grad_norm": 0.004615118261426687, "learning_rate": 9.650241817001657e-07, "loss": 0.0161, "step": 206570 }, { "epoch": 2.2071691863881617, "grad_norm": 6.836610317230225, "learning_rate": 9.650180081782365e-07, "loss": 0.0443, "step": 206580 }, { "epoch": 2.207276029702441, "grad_norm": 0.9659750461578369, "learning_rate": 9.65011834131267e-07, "loss": 0.034, "step": 206590 }, { "epoch": 2.207382873016721, "grad_norm": 1.8378504514694214, "learning_rate": 9.650056595592638e-07, "loss": 0.0038, "step": 206600 }, { "epoch": 2.2074897163310006, "grad_norm": 4.4006500244140625, "learning_rate": 9.649994844622345e-07, "loss": 0.0261, "step": 206610 }, { "epoch": 2.20759655964528, "grad_norm": 20.788686752319336, "learning_rate": 9.649933088401855e-07, "loss": 0.0367, "step": 206620 }, { "epoch": 2.20770340295956, "grad_norm": 0.047571148723363876, "learning_rate": 9.649871326931239e-07, "loss": 0.0321, "step": 206630 }, { "epoch": 2.2078102462738394, "grad_norm": 7.837447166442871, "learning_rate": 9.649809560210568e-07, "loss": 0.0241, "step": 206640 }, { "epoch": 2.207917089588119, "grad_norm": 0.02436145208775997, "learning_rate": 9.64974778823991e-07, "loss": 0.0109, "step": 206650 }, { "epoch": 2.2080239329023987, "grad_norm": 2.6086292266845703, "learning_rate": 9.649686011019338e-07, "loss": 0.0031, "step": 206660 }, { "epoch": 2.208130776216678, "grad_norm": 3.7265117168426514, "learning_rate": 9.649624228548919e-07, "loss": 0.063, "step": 206670 }, { "epoch": 2.2082376195309577, "grad_norm": 3.0355958938598633, "learning_rate": 9.649562440828722e-07, "loss": 0.0124, "step": 206680 }, { "epoch": 2.2083444628452376, "grad_norm": 0.17937278747558594, "learning_rate": 9.649500647858818e-07, "loss": 0.0187, "step": 206690 }, { "epoch": 2.208451306159517, "grad_norm": 0.021312477067112923, "learning_rate": 9.649438849639277e-07, "loss": 0.0097, "step": 206700 }, { "epoch": 2.2085581494737965, "grad_norm": 11.801962852478027, "learning_rate": 9.64937704617017e-07, "loss": 0.0399, "step": 206710 }, { "epoch": 2.2086649927880764, "grad_norm": 0.8293471932411194, "learning_rate": 9.649315237451564e-07, "loss": 0.0037, "step": 206720 }, { "epoch": 2.208771836102356, "grad_norm": 4.705063343048096, "learning_rate": 9.649253423483532e-07, "loss": 0.0145, "step": 206730 }, { "epoch": 2.2088786794166353, "grad_norm": 3.903498649597168, "learning_rate": 9.64919160426614e-07, "loss": 0.0145, "step": 206740 }, { "epoch": 2.208985522730915, "grad_norm": 0.0070709348656237125, "learning_rate": 9.64912977979946e-07, "loss": 0.018, "step": 206750 }, { "epoch": 2.2090923660451947, "grad_norm": 2.475431442260742, "learning_rate": 9.649067950083562e-07, "loss": 0.024, "step": 206760 }, { "epoch": 2.2091992093594746, "grad_norm": 1.756748914718628, "learning_rate": 9.649006115118517e-07, "loss": 0.0129, "step": 206770 }, { "epoch": 2.209306052673754, "grad_norm": 0.05181143805384636, "learning_rate": 9.648944274904391e-07, "loss": 0.0077, "step": 206780 }, { "epoch": 2.2094128959880335, "grad_norm": 0.0651858001947403, "learning_rate": 9.648882429441256e-07, "loss": 0.0189, "step": 206790 }, { "epoch": 2.209519739302313, "grad_norm": 0.010457177646458149, "learning_rate": 9.648820578729184e-07, "loss": 0.006, "step": 206800 }, { "epoch": 2.209626582616593, "grad_norm": 0.30067381262779236, "learning_rate": 9.648758722768241e-07, "loss": 0.0097, "step": 206810 }, { "epoch": 2.2097334259308723, "grad_norm": 0.5502318739891052, "learning_rate": 9.648696861558498e-07, "loss": 0.0552, "step": 206820 }, { "epoch": 2.209840269245152, "grad_norm": 4.899520397186279, "learning_rate": 9.648634995100027e-07, "loss": 0.0466, "step": 206830 }, { "epoch": 2.2099471125594317, "grad_norm": 2.681344985961914, "learning_rate": 9.648573123392897e-07, "loss": 0.0127, "step": 206840 }, { "epoch": 2.210053955873711, "grad_norm": 3.2762279510498047, "learning_rate": 9.648511246437175e-07, "loss": 0.0325, "step": 206850 }, { "epoch": 2.2101607991879906, "grad_norm": 0.12081529945135117, "learning_rate": 9.648449364232933e-07, "loss": 0.0106, "step": 206860 }, { "epoch": 2.2102676425022705, "grad_norm": 4.7312140464782715, "learning_rate": 9.648387476780242e-07, "loss": 0.045, "step": 206870 }, { "epoch": 2.21037448581655, "grad_norm": 0.022337650880217552, "learning_rate": 9.648325584079169e-07, "loss": 0.0171, "step": 206880 }, { "epoch": 2.21048132913083, "grad_norm": 0.007875834591686726, "learning_rate": 9.648263686129787e-07, "loss": 0.007, "step": 206890 }, { "epoch": 2.2105881724451093, "grad_norm": 0.8892719745635986, "learning_rate": 9.648201782932163e-07, "loss": 0.0163, "step": 206900 }, { "epoch": 2.2106950157593888, "grad_norm": 10.211291313171387, "learning_rate": 9.648139874486372e-07, "loss": 0.0352, "step": 206910 }, { "epoch": 2.2108018590736687, "grad_norm": 0.3779078722000122, "learning_rate": 9.648077960792477e-07, "loss": 0.0051, "step": 206920 }, { "epoch": 2.210908702387948, "grad_norm": 8.572918891906738, "learning_rate": 9.648016041850553e-07, "loss": 0.0305, "step": 206930 }, { "epoch": 2.2110155457022276, "grad_norm": 0.3881100118160248, "learning_rate": 9.647954117660666e-07, "loss": 0.0204, "step": 206940 }, { "epoch": 2.2111223890165075, "grad_norm": 0.1286141723394394, "learning_rate": 9.64789218822289e-07, "loss": 0.0146, "step": 206950 }, { "epoch": 2.211229232330787, "grad_norm": 0.10641879588365555, "learning_rate": 9.647830253537292e-07, "loss": 0.0141, "step": 206960 }, { "epoch": 2.2113360756450664, "grad_norm": 0.09550134837627411, "learning_rate": 9.647768313603943e-07, "loss": 0.0136, "step": 206970 }, { "epoch": 2.2114429189593463, "grad_norm": 4.547279357910156, "learning_rate": 9.64770636842291e-07, "loss": 0.0471, "step": 206980 }, { "epoch": 2.2115497622736258, "grad_norm": 1.1545296907424927, "learning_rate": 9.64764441799427e-07, "loss": 0.009, "step": 206990 }, { "epoch": 2.2116566055879052, "grad_norm": 0.943576991558075, "learning_rate": 9.647582462318088e-07, "loss": 0.0218, "step": 207000 }, { "epoch": 2.211763448902185, "grad_norm": 0.17038971185684204, "learning_rate": 9.647520501394432e-07, "loss": 0.0048, "step": 207010 }, { "epoch": 2.2118702922164646, "grad_norm": 14.475176811218262, "learning_rate": 9.647458535223377e-07, "loss": 0.047, "step": 207020 }, { "epoch": 2.211977135530744, "grad_norm": 5.401876926422119, "learning_rate": 9.64739656380499e-07, "loss": 0.0185, "step": 207030 }, { "epoch": 2.212083978845024, "grad_norm": 30.8574161529541, "learning_rate": 9.64733458713934e-07, "loss": 0.0338, "step": 207040 }, { "epoch": 2.2121908221593034, "grad_norm": 0.36713138222694397, "learning_rate": 9.6472726052265e-07, "loss": 0.0044, "step": 207050 }, { "epoch": 2.212297665473583, "grad_norm": 3.76920747756958, "learning_rate": 9.64721061806654e-07, "loss": 0.0146, "step": 207060 }, { "epoch": 2.2124045087878628, "grad_norm": 5.65406608581543, "learning_rate": 9.647148625659527e-07, "loss": 0.0161, "step": 207070 }, { "epoch": 2.2125113521021422, "grad_norm": 0.6374912858009338, "learning_rate": 9.647086628005532e-07, "loss": 0.0509, "step": 207080 }, { "epoch": 2.2126181954164217, "grad_norm": 0.15943285822868347, "learning_rate": 9.647024625104628e-07, "loss": 0.0138, "step": 207090 }, { "epoch": 2.2127250387307016, "grad_norm": 0.9791572093963623, "learning_rate": 9.646962616956878e-07, "loss": 0.0132, "step": 207100 }, { "epoch": 2.212831882044981, "grad_norm": 0.12117250263690948, "learning_rate": 9.64690060356236e-07, "loss": 0.0043, "step": 207110 }, { "epoch": 2.2129387253592605, "grad_norm": 2.1757099628448486, "learning_rate": 9.646838584921138e-07, "loss": 0.014, "step": 207120 }, { "epoch": 2.2130455686735404, "grad_norm": 0.07004749774932861, "learning_rate": 9.646776561033286e-07, "loss": 0.0184, "step": 207130 }, { "epoch": 2.21315241198782, "grad_norm": 3.6731858253479004, "learning_rate": 9.646714531898874e-07, "loss": 0.0186, "step": 207140 }, { "epoch": 2.2132592553020993, "grad_norm": 1.1596450805664062, "learning_rate": 9.646652497517967e-07, "loss": 0.0132, "step": 207150 }, { "epoch": 2.2133660986163792, "grad_norm": 7.296895980834961, "learning_rate": 9.646590457890642e-07, "loss": 0.0199, "step": 207160 }, { "epoch": 2.2134729419306587, "grad_norm": 0.05409175530076027, "learning_rate": 9.646528413016964e-07, "loss": 0.0254, "step": 207170 }, { "epoch": 2.213579785244938, "grad_norm": 0.2864610552787781, "learning_rate": 9.646466362897005e-07, "loss": 0.0266, "step": 207180 }, { "epoch": 2.213686628559218, "grad_norm": 0.16329489648342133, "learning_rate": 9.646404307530834e-07, "loss": 0.0252, "step": 207190 }, { "epoch": 2.2137934718734975, "grad_norm": 0.5737830996513367, "learning_rate": 9.646342246918524e-07, "loss": 0.0046, "step": 207200 }, { "epoch": 2.213900315187777, "grad_norm": 0.06828021258115768, "learning_rate": 9.64628018106014e-07, "loss": 0.0009, "step": 207210 }, { "epoch": 2.214007158502057, "grad_norm": 4.889925479888916, "learning_rate": 9.646218109955758e-07, "loss": 0.0312, "step": 207220 }, { "epoch": 2.2141140018163363, "grad_norm": 0.01377216074615717, "learning_rate": 9.646156033605442e-07, "loss": 0.0153, "step": 207230 }, { "epoch": 2.214220845130616, "grad_norm": 3.9700980186462402, "learning_rate": 9.646093952009267e-07, "loss": 0.0543, "step": 207240 }, { "epoch": 2.2143276884448957, "grad_norm": 0.05888981744647026, "learning_rate": 9.6460318651673e-07, "loss": 0.0165, "step": 207250 }, { "epoch": 2.214434531759175, "grad_norm": 0.3814946413040161, "learning_rate": 9.645969773079615e-07, "loss": 0.0255, "step": 207260 }, { "epoch": 2.2145413750734546, "grad_norm": 0.0687619298696518, "learning_rate": 9.64590767574628e-07, "loss": 0.0042, "step": 207270 }, { "epoch": 2.2146482183877345, "grad_norm": 5.010005474090576, "learning_rate": 9.645845573167362e-07, "loss": 0.0223, "step": 207280 }, { "epoch": 2.214755061702014, "grad_norm": 0.005906049627810717, "learning_rate": 9.645783465342934e-07, "loss": 0.0142, "step": 207290 }, { "epoch": 2.2148619050162934, "grad_norm": 0.040727660059928894, "learning_rate": 9.645721352273065e-07, "loss": 0.0059, "step": 207300 }, { "epoch": 2.2149687483305733, "grad_norm": 0.03142968565225601, "learning_rate": 9.645659233957827e-07, "loss": 0.0179, "step": 207310 }, { "epoch": 2.215075591644853, "grad_norm": 0.09857139736413956, "learning_rate": 9.64559711039729e-07, "loss": 0.0048, "step": 207320 }, { "epoch": 2.2151824349591323, "grad_norm": 0.06127935275435448, "learning_rate": 9.645534981591521e-07, "loss": 0.0142, "step": 207330 }, { "epoch": 2.215289278273412, "grad_norm": 0.004465722944587469, "learning_rate": 9.645472847540595e-07, "loss": 0.0376, "step": 207340 }, { "epoch": 2.2153961215876916, "grad_norm": 4.689080715179443, "learning_rate": 9.645410708244578e-07, "loss": 0.0212, "step": 207350 }, { "epoch": 2.215502964901971, "grad_norm": 24.789819717407227, "learning_rate": 9.645348563703542e-07, "loss": 0.0602, "step": 207360 }, { "epoch": 2.215609808216251, "grad_norm": 0.03714069724082947, "learning_rate": 9.645286413917558e-07, "loss": 0.0344, "step": 207370 }, { "epoch": 2.2157166515305304, "grad_norm": 1.3981914520263672, "learning_rate": 9.645224258886692e-07, "loss": 0.025, "step": 207380 }, { "epoch": 2.21582349484481, "grad_norm": 0.02294536866247654, "learning_rate": 9.64516209861102e-07, "loss": 0.0031, "step": 207390 }, { "epoch": 2.21593033815909, "grad_norm": 1.5886276960372925, "learning_rate": 9.645099933090607e-07, "loss": 0.0288, "step": 207400 }, { "epoch": 2.2160371814733693, "grad_norm": 0.5323336720466614, "learning_rate": 9.645037762325529e-07, "loss": 0.0089, "step": 207410 }, { "epoch": 2.2161440247876487, "grad_norm": 0.4741121530532837, "learning_rate": 9.644975586315851e-07, "loss": 0.0106, "step": 207420 }, { "epoch": 2.2162508681019286, "grad_norm": 0.010630978271365166, "learning_rate": 9.644913405061646e-07, "loss": 0.0639, "step": 207430 }, { "epoch": 2.216357711416208, "grad_norm": 0.007681247778236866, "learning_rate": 9.644851218562982e-07, "loss": 0.0754, "step": 207440 }, { "epoch": 2.2164645547304875, "grad_norm": 0.13534876704216003, "learning_rate": 9.64478902681993e-07, "loss": 0.0051, "step": 207450 }, { "epoch": 2.2165713980447674, "grad_norm": 0.0365937240421772, "learning_rate": 9.644726829832562e-07, "loss": 0.035, "step": 207460 }, { "epoch": 2.216678241359047, "grad_norm": 0.002994624199345708, "learning_rate": 9.644664627600945e-07, "loss": 0.0062, "step": 207470 }, { "epoch": 2.216785084673327, "grad_norm": 4.090217590332031, "learning_rate": 9.644602420125151e-07, "loss": 0.031, "step": 207480 }, { "epoch": 2.2168919279876063, "grad_norm": 0.06775679439306259, "learning_rate": 9.644540207405252e-07, "loss": 0.0247, "step": 207490 }, { "epoch": 2.2169987713018857, "grad_norm": 6.9303483963012695, "learning_rate": 9.644477989441317e-07, "loss": 0.0222, "step": 207500 }, { "epoch": 2.217105614616165, "grad_norm": 0.017215263098478317, "learning_rate": 9.644415766233414e-07, "loss": 0.0141, "step": 207510 }, { "epoch": 2.217212457930445, "grad_norm": 10.933478355407715, "learning_rate": 9.644353537781616e-07, "loss": 0.0177, "step": 207520 }, { "epoch": 2.2173193012447245, "grad_norm": 2.560854196548462, "learning_rate": 9.644291304085992e-07, "loss": 0.0389, "step": 207530 }, { "epoch": 2.2174261445590044, "grad_norm": 4.33193826675415, "learning_rate": 9.644229065146614e-07, "loss": 0.0159, "step": 207540 }, { "epoch": 2.217532987873284, "grad_norm": 0.12647736072540283, "learning_rate": 9.644166820963549e-07, "loss": 0.0041, "step": 207550 }, { "epoch": 2.2176398311875634, "grad_norm": 0.9103200435638428, "learning_rate": 9.644104571536869e-07, "loss": 0.0171, "step": 207560 }, { "epoch": 2.217746674501843, "grad_norm": 6.2287068367004395, "learning_rate": 9.644042316866644e-07, "loss": 0.0443, "step": 207570 }, { "epoch": 2.2178535178161227, "grad_norm": 0.05265987664461136, "learning_rate": 9.643980056952947e-07, "loss": 0.0042, "step": 207580 }, { "epoch": 2.217960361130402, "grad_norm": 0.6959840059280396, "learning_rate": 9.643917791795845e-07, "loss": 0.0181, "step": 207590 }, { "epoch": 2.218067204444682, "grad_norm": 3.2717204093933105, "learning_rate": 9.64385552139541e-07, "loss": 0.0139, "step": 207600 }, { "epoch": 2.2181740477589615, "grad_norm": 0.025141267105937004, "learning_rate": 9.643793245751712e-07, "loss": 0.0136, "step": 207610 }, { "epoch": 2.218280891073241, "grad_norm": 5.303186893463135, "learning_rate": 9.64373096486482e-07, "loss": 0.0424, "step": 207620 }, { "epoch": 2.218387734387521, "grad_norm": 0.020030584186315536, "learning_rate": 9.643668678734805e-07, "loss": 0.0249, "step": 207630 }, { "epoch": 2.2184945777018004, "grad_norm": 3.8599958419799805, "learning_rate": 9.643606387361739e-07, "loss": 0.0211, "step": 207640 }, { "epoch": 2.21860142101608, "grad_norm": 3.5264205932617188, "learning_rate": 9.643544090745687e-07, "loss": 0.0281, "step": 207650 }, { "epoch": 2.2187082643303597, "grad_norm": 0.08459669351577759, "learning_rate": 9.643481788886728e-07, "loss": 0.031, "step": 207660 }, { "epoch": 2.218815107644639, "grad_norm": 1.2674663066864014, "learning_rate": 9.643419481784924e-07, "loss": 0.0208, "step": 207670 }, { "epoch": 2.2189219509589186, "grad_norm": 0.04626055061817169, "learning_rate": 9.643357169440351e-07, "loss": 0.0237, "step": 207680 }, { "epoch": 2.2190287942731985, "grad_norm": 1.02820885181427, "learning_rate": 9.643294851853077e-07, "loss": 0.0281, "step": 207690 }, { "epoch": 2.219135637587478, "grad_norm": 2.0066967010498047, "learning_rate": 9.643232529023172e-07, "loss": 0.0062, "step": 207700 }, { "epoch": 2.2192424809017575, "grad_norm": 5.894909858703613, "learning_rate": 9.643170200950707e-07, "loss": 0.0114, "step": 207710 }, { "epoch": 2.2193493242160374, "grad_norm": 0.0030180844478309155, "learning_rate": 9.643107867635754e-07, "loss": 0.0069, "step": 207720 }, { "epoch": 2.219456167530317, "grad_norm": 0.03976743295788765, "learning_rate": 9.643045529078379e-07, "loss": 0.019, "step": 207730 }, { "epoch": 2.2195630108445963, "grad_norm": 0.4481874108314514, "learning_rate": 9.642983185278657e-07, "loss": 0.0162, "step": 207740 }, { "epoch": 2.219669854158876, "grad_norm": 0.027221135795116425, "learning_rate": 9.642920836236657e-07, "loss": 0.0093, "step": 207750 }, { "epoch": 2.2197766974731556, "grad_norm": 1.7743405103683472, "learning_rate": 9.642858481952448e-07, "loss": 0.0089, "step": 207760 }, { "epoch": 2.219883540787435, "grad_norm": 0.2474023997783661, "learning_rate": 9.642796122426103e-07, "loss": 0.0339, "step": 207770 }, { "epoch": 2.219990384101715, "grad_norm": 0.027428189292550087, "learning_rate": 9.64273375765769e-07, "loss": 0.0069, "step": 207780 }, { "epoch": 2.2200972274159945, "grad_norm": 0.24862079322338104, "learning_rate": 9.642671387647279e-07, "loss": 0.0105, "step": 207790 }, { "epoch": 2.220204070730274, "grad_norm": 5.4597272872924805, "learning_rate": 9.642609012394943e-07, "loss": 0.031, "step": 207800 }, { "epoch": 2.220310914044554, "grad_norm": 0.006843823008239269, "learning_rate": 9.64254663190075e-07, "loss": 0.0321, "step": 207810 }, { "epoch": 2.2204177573588333, "grad_norm": 3.2139382362365723, "learning_rate": 9.64248424616477e-07, "loss": 0.0364, "step": 207820 }, { "epoch": 2.2205246006731127, "grad_norm": 1.9371546506881714, "learning_rate": 9.642421855187078e-07, "loss": 0.0136, "step": 207830 }, { "epoch": 2.2206314439873927, "grad_norm": 0.006027833558619022, "learning_rate": 9.64235945896774e-07, "loss": 0.0119, "step": 207840 }, { "epoch": 2.220738287301672, "grad_norm": 0.010787061415612698, "learning_rate": 9.642297057506828e-07, "loss": 0.0374, "step": 207850 }, { "epoch": 2.2208451306159516, "grad_norm": 2.6328766345977783, "learning_rate": 9.642234650804414e-07, "loss": 0.0159, "step": 207860 }, { "epoch": 2.2209519739302315, "grad_norm": 9.635895729064941, "learning_rate": 9.642172238860563e-07, "loss": 0.0127, "step": 207870 }, { "epoch": 2.221058817244511, "grad_norm": 6.424381732940674, "learning_rate": 9.642109821675351e-07, "loss": 0.0071, "step": 207880 }, { "epoch": 2.2211656605587904, "grad_norm": 0.5729005336761475, "learning_rate": 9.642047399248848e-07, "loss": 0.0044, "step": 207890 }, { "epoch": 2.2212725038730703, "grad_norm": 0.692304253578186, "learning_rate": 9.64198497158112e-07, "loss": 0.0091, "step": 207900 }, { "epoch": 2.2213793471873498, "grad_norm": 0.0052219876088202, "learning_rate": 9.641922538672243e-07, "loss": 0.0225, "step": 207910 }, { "epoch": 2.221486190501629, "grad_norm": 0.00315782125107944, "learning_rate": 9.641860100522284e-07, "loss": 0.0105, "step": 207920 }, { "epoch": 2.221593033815909, "grad_norm": 1.9935576915740967, "learning_rate": 9.641797657131314e-07, "loss": 0.0046, "step": 207930 }, { "epoch": 2.2216998771301886, "grad_norm": 0.13375170528888702, "learning_rate": 9.641735208499407e-07, "loss": 0.0109, "step": 207940 }, { "epoch": 2.221806720444468, "grad_norm": 0.14328117668628693, "learning_rate": 9.64167275462663e-07, "loss": 0.0798, "step": 207950 }, { "epoch": 2.221913563758748, "grad_norm": 10.09636116027832, "learning_rate": 9.641610295513052e-07, "loss": 0.0952, "step": 207960 }, { "epoch": 2.2220204070730274, "grad_norm": 0.03300139680504799, "learning_rate": 9.641547831158745e-07, "loss": 0.0095, "step": 207970 }, { "epoch": 2.222127250387307, "grad_norm": 0.0045446534641087055, "learning_rate": 9.641485361563783e-07, "loss": 0.0138, "step": 207980 }, { "epoch": 2.2222340937015868, "grad_norm": 2.6941819190979004, "learning_rate": 9.641422886728231e-07, "loss": 0.0159, "step": 207990 }, { "epoch": 2.222340937015866, "grad_norm": 4.232516765594482, "learning_rate": 9.641360406652166e-07, "loss": 0.0084, "step": 208000 }, { "epoch": 2.2224477803301457, "grad_norm": 1.7789437770843506, "learning_rate": 9.641297921335651e-07, "loss": 0.0172, "step": 208010 }, { "epoch": 2.2225546236444256, "grad_norm": 0.2176295965909958, "learning_rate": 9.641235430778762e-07, "loss": 0.0077, "step": 208020 }, { "epoch": 2.222661466958705, "grad_norm": 1.3085678815841675, "learning_rate": 9.64117293498157e-07, "loss": 0.0259, "step": 208030 }, { "epoch": 2.2227683102729845, "grad_norm": 0.027920987457036972, "learning_rate": 9.64111043394414e-07, "loss": 0.0305, "step": 208040 }, { "epoch": 2.2228751535872644, "grad_norm": 0.542834997177124, "learning_rate": 9.641047927666548e-07, "loss": 0.0013, "step": 208050 }, { "epoch": 2.222981996901544, "grad_norm": 6.4773945808410645, "learning_rate": 9.640985416148861e-07, "loss": 0.0657, "step": 208060 }, { "epoch": 2.2230888402158233, "grad_norm": 0.006416281219571829, "learning_rate": 9.640922899391153e-07, "loss": 0.0205, "step": 208070 }, { "epoch": 2.223195683530103, "grad_norm": 0.309678852558136, "learning_rate": 9.64086037739349e-07, "loss": 0.0165, "step": 208080 }, { "epoch": 2.2233025268443827, "grad_norm": 3.262923240661621, "learning_rate": 9.64079785015595e-07, "loss": 0.0372, "step": 208090 }, { "epoch": 2.223409370158662, "grad_norm": 1.6192978620529175, "learning_rate": 9.640735317678595e-07, "loss": 0.0064, "step": 208100 }, { "epoch": 2.223516213472942, "grad_norm": 2.417452812194824, "learning_rate": 9.640672779961501e-07, "loss": 0.0335, "step": 208110 }, { "epoch": 2.2236230567872215, "grad_norm": 1.892331838607788, "learning_rate": 9.640610237004738e-07, "loss": 0.0286, "step": 208120 }, { "epoch": 2.223729900101501, "grad_norm": 8.941045761108398, "learning_rate": 9.640547688808374e-07, "loss": 0.0299, "step": 208130 }, { "epoch": 2.223836743415781, "grad_norm": 0.2881567180156708, "learning_rate": 9.640485135372483e-07, "loss": 0.0182, "step": 208140 }, { "epoch": 2.2239435867300603, "grad_norm": 8.998589515686035, "learning_rate": 9.640422576697135e-07, "loss": 0.0201, "step": 208150 }, { "epoch": 2.2240504300443398, "grad_norm": 0.02081034891307354, "learning_rate": 9.640360012782396e-07, "loss": 0.021, "step": 208160 }, { "epoch": 2.2241572733586197, "grad_norm": 0.07841132581233978, "learning_rate": 9.640297443628343e-07, "loss": 0.0185, "step": 208170 }, { "epoch": 2.224264116672899, "grad_norm": 6.138374328613281, "learning_rate": 9.640234869235042e-07, "loss": 0.0449, "step": 208180 }, { "epoch": 2.2243709599871786, "grad_norm": 7.427937984466553, "learning_rate": 9.64017228960257e-07, "loss": 0.0155, "step": 208190 }, { "epoch": 2.2244778033014585, "grad_norm": 1.0514658689498901, "learning_rate": 9.640109704730988e-07, "loss": 0.0089, "step": 208200 }, { "epoch": 2.224584646615738, "grad_norm": 0.0014069897588342428, "learning_rate": 9.640047114620375e-07, "loss": 0.0069, "step": 208210 }, { "epoch": 2.2246914899300174, "grad_norm": 0.021702250465750694, "learning_rate": 9.639984519270797e-07, "loss": 0.0237, "step": 208220 }, { "epoch": 2.2247983332442973, "grad_norm": 0.1399032473564148, "learning_rate": 9.639921918682329e-07, "loss": 0.0535, "step": 208230 }, { "epoch": 2.224905176558577, "grad_norm": 0.030883001163601875, "learning_rate": 9.639859312855034e-07, "loss": 0.0033, "step": 208240 }, { "epoch": 2.2250120198728567, "grad_norm": 0.06394381821155548, "learning_rate": 9.639796701788992e-07, "loss": 0.0109, "step": 208250 }, { "epoch": 2.225118863187136, "grad_norm": 1.4011269807815552, "learning_rate": 9.639734085484268e-07, "loss": 0.0935, "step": 208260 }, { "epoch": 2.2252257065014156, "grad_norm": 2.4302093982696533, "learning_rate": 9.639671463940933e-07, "loss": 0.0536, "step": 208270 }, { "epoch": 2.225332549815695, "grad_norm": 0.05724595487117767, "learning_rate": 9.639608837159062e-07, "loss": 0.0624, "step": 208280 }, { "epoch": 2.225439393129975, "grad_norm": 0.03729674592614174, "learning_rate": 9.63954620513872e-07, "loss": 0.0135, "step": 208290 }, { "epoch": 2.2255462364442544, "grad_norm": 0.20366287231445312, "learning_rate": 9.63948356787998e-07, "loss": 0.0091, "step": 208300 }, { "epoch": 2.2256530797585343, "grad_norm": 1.3141226768493652, "learning_rate": 9.639420925382914e-07, "loss": 0.0215, "step": 208310 }, { "epoch": 2.225759923072814, "grad_norm": 0.14273673295974731, "learning_rate": 9.63935827764759e-07, "loss": 0.0244, "step": 208320 }, { "epoch": 2.2258667663870932, "grad_norm": 0.011307538487017155, "learning_rate": 9.639295624674081e-07, "loss": 0.0147, "step": 208330 }, { "epoch": 2.2259736097013727, "grad_norm": 0.3510979115962982, "learning_rate": 9.639232966462457e-07, "loss": 0.033, "step": 208340 }, { "epoch": 2.2260804530156526, "grad_norm": 1.4740750789642334, "learning_rate": 9.63917030301279e-07, "loss": 0.0151, "step": 208350 }, { "epoch": 2.226187296329932, "grad_norm": 8.453495025634766, "learning_rate": 9.639107634325147e-07, "loss": 0.021, "step": 208360 }, { "epoch": 2.226294139644212, "grad_norm": 0.2519092857837677, "learning_rate": 9.639044960399605e-07, "loss": 0.0373, "step": 208370 }, { "epoch": 2.2264009829584914, "grad_norm": 5.3289794921875, "learning_rate": 9.63898228123623e-07, "loss": 0.0134, "step": 208380 }, { "epoch": 2.226507826272771, "grad_norm": 0.18730700016021729, "learning_rate": 9.638919596835092e-07, "loss": 0.0709, "step": 208390 }, { "epoch": 2.226614669587051, "grad_norm": 3.8306846618652344, "learning_rate": 9.638856907196265e-07, "loss": 0.0353, "step": 208400 }, { "epoch": 2.2267215129013302, "grad_norm": 5.213666915893555, "learning_rate": 9.638794212319818e-07, "loss": 0.0268, "step": 208410 }, { "epoch": 2.2268283562156097, "grad_norm": 0.051094770431518555, "learning_rate": 9.638731512205823e-07, "loss": 0.0103, "step": 208420 }, { "epoch": 2.2269351995298896, "grad_norm": 0.8149859309196472, "learning_rate": 9.638668806854349e-07, "loss": 0.004, "step": 208430 }, { "epoch": 2.227042042844169, "grad_norm": 0.043950408697128296, "learning_rate": 9.638606096265468e-07, "loss": 0.0023, "step": 208440 }, { "epoch": 2.2271488861584485, "grad_norm": 0.7685580849647522, "learning_rate": 9.638543380439252e-07, "loss": 0.0239, "step": 208450 }, { "epoch": 2.2272557294727284, "grad_norm": 0.9740827083587646, "learning_rate": 9.63848065937577e-07, "loss": 0.0113, "step": 208460 }, { "epoch": 2.227362572787008, "grad_norm": 2.024693250656128, "learning_rate": 9.638417933075092e-07, "loss": 0.023, "step": 208470 }, { "epoch": 2.2274694161012873, "grad_norm": 0.45518505573272705, "learning_rate": 9.63835520153729e-07, "loss": 0.0177, "step": 208480 }, { "epoch": 2.2275762594155673, "grad_norm": 0.013358831405639648, "learning_rate": 9.638292464762436e-07, "loss": 0.0018, "step": 208490 }, { "epoch": 2.2276831027298467, "grad_norm": 0.007509387098252773, "learning_rate": 9.638229722750599e-07, "loss": 0.0374, "step": 208500 }, { "epoch": 2.227789946044126, "grad_norm": 9.16109848022461, "learning_rate": 9.63816697550185e-07, "loss": 0.0383, "step": 208510 }, { "epoch": 2.227896789358406, "grad_norm": 0.04676859825849533, "learning_rate": 9.638104223016262e-07, "loss": 0.0114, "step": 208520 }, { "epoch": 2.2280036326726855, "grad_norm": 0.3626362383365631, "learning_rate": 9.638041465293905e-07, "loss": 0.0216, "step": 208530 }, { "epoch": 2.228110475986965, "grad_norm": 4.984079360961914, "learning_rate": 9.637978702334847e-07, "loss": 0.0424, "step": 208540 }, { "epoch": 2.228217319301245, "grad_norm": 1.376797080039978, "learning_rate": 9.637915934139163e-07, "loss": 0.03, "step": 208550 }, { "epoch": 2.2283241626155244, "grad_norm": 0.05173114687204361, "learning_rate": 9.63785316070692e-07, "loss": 0.0137, "step": 208560 }, { "epoch": 2.228431005929804, "grad_norm": 6.311662197113037, "learning_rate": 9.637790382038192e-07, "loss": 0.0347, "step": 208570 }, { "epoch": 2.2285378492440837, "grad_norm": 0.03444019705057144, "learning_rate": 9.637727598133047e-07, "loss": 0.0136, "step": 208580 }, { "epoch": 2.228644692558363, "grad_norm": 0.014092833735048771, "learning_rate": 9.63766480899156e-07, "loss": 0.0164, "step": 208590 }, { "epoch": 2.2287515358726426, "grad_norm": 0.032968536019325256, "learning_rate": 9.637602014613796e-07, "loss": 0.0138, "step": 208600 }, { "epoch": 2.2288583791869225, "grad_norm": 0.1103011816740036, "learning_rate": 9.637539214999832e-07, "loss": 0.0163, "step": 208610 }, { "epoch": 2.228965222501202, "grad_norm": 0.03319596126675606, "learning_rate": 9.637476410149735e-07, "loss": 0.0054, "step": 208620 }, { "epoch": 2.2290720658154815, "grad_norm": 0.32985296845436096, "learning_rate": 9.637413600063577e-07, "loss": 0.0033, "step": 208630 }, { "epoch": 2.2291789091297614, "grad_norm": 0.02215680107474327, "learning_rate": 9.637350784741432e-07, "loss": 0.0231, "step": 208640 }, { "epoch": 2.229285752444041, "grad_norm": 0.05986388027667999, "learning_rate": 9.637287964183365e-07, "loss": 0.042, "step": 208650 }, { "epoch": 2.2293925957583203, "grad_norm": 0.1686805784702301, "learning_rate": 9.63722513838945e-07, "loss": 0.0152, "step": 208660 }, { "epoch": 2.2294994390726, "grad_norm": 2.3396899700164795, "learning_rate": 9.63716230735976e-07, "loss": 0.0354, "step": 208670 }, { "epoch": 2.2296062823868796, "grad_norm": 2.747816324234009, "learning_rate": 9.637099471094362e-07, "loss": 0.0166, "step": 208680 }, { "epoch": 2.229713125701159, "grad_norm": 0.287515252828598, "learning_rate": 9.637036629593327e-07, "loss": 0.0159, "step": 208690 }, { "epoch": 2.229819969015439, "grad_norm": 2.6385629177093506, "learning_rate": 9.63697378285673e-07, "loss": 0.0184, "step": 208700 }, { "epoch": 2.2299268123297185, "grad_norm": 2.484349489212036, "learning_rate": 9.636910930884639e-07, "loss": 0.0114, "step": 208710 }, { "epoch": 2.230033655643998, "grad_norm": 1.9435333013534546, "learning_rate": 9.636848073677127e-07, "loss": 0.0203, "step": 208720 }, { "epoch": 2.230140498958278, "grad_norm": 0.3024607300758362, "learning_rate": 9.636785211234261e-07, "loss": 0.0358, "step": 208730 }, { "epoch": 2.2302473422725573, "grad_norm": 12.872685432434082, "learning_rate": 9.636722343556116e-07, "loss": 0.0215, "step": 208740 }, { "epoch": 2.2303541855868367, "grad_norm": 0.4166623651981354, "learning_rate": 9.63665947064276e-07, "loss": 0.0194, "step": 208750 }, { "epoch": 2.2304610289011166, "grad_norm": 0.00760870473459363, "learning_rate": 9.63659659249427e-07, "loss": 0.0016, "step": 208760 }, { "epoch": 2.230567872215396, "grad_norm": 9.102611541748047, "learning_rate": 9.636533709110708e-07, "loss": 0.026, "step": 208770 }, { "epoch": 2.2306747155296756, "grad_norm": 0.01356864906847477, "learning_rate": 9.63647082049215e-07, "loss": 0.0313, "step": 208780 }, { "epoch": 2.2307815588439555, "grad_norm": 0.24324695765972137, "learning_rate": 9.636407926638669e-07, "loss": 0.0306, "step": 208790 }, { "epoch": 2.230888402158235, "grad_norm": 1.917881965637207, "learning_rate": 9.63634502755033e-07, "loss": 0.0241, "step": 208800 }, { "epoch": 2.2309952454725144, "grad_norm": 2.2252368927001953, "learning_rate": 9.63628212322721e-07, "loss": 0.0057, "step": 208810 }, { "epoch": 2.2311020887867943, "grad_norm": 5.642115116119385, "learning_rate": 9.636219213669378e-07, "loss": 0.018, "step": 208820 }, { "epoch": 2.2312089321010737, "grad_norm": 10.54084587097168, "learning_rate": 9.636156298876903e-07, "loss": 0.0319, "step": 208830 }, { "epoch": 2.231315775415353, "grad_norm": 0.6493381857872009, "learning_rate": 9.636093378849858e-07, "loss": 0.0119, "step": 208840 }, { "epoch": 2.231422618729633, "grad_norm": 1.4608501195907593, "learning_rate": 9.636030453588314e-07, "loss": 0.0262, "step": 208850 }, { "epoch": 2.2315294620439126, "grad_norm": 6.1902313232421875, "learning_rate": 9.635967523092342e-07, "loss": 0.0299, "step": 208860 }, { "epoch": 2.231636305358192, "grad_norm": 1.4423956871032715, "learning_rate": 9.635904587362013e-07, "loss": 0.0225, "step": 208870 }, { "epoch": 2.231743148672472, "grad_norm": 0.07709724456071854, "learning_rate": 9.635841646397398e-07, "loss": 0.0169, "step": 208880 }, { "epoch": 2.2318499919867514, "grad_norm": 2.7000176906585693, "learning_rate": 9.635778700198567e-07, "loss": 0.015, "step": 208890 }, { "epoch": 2.231956835301031, "grad_norm": 0.612628161907196, "learning_rate": 9.635715748765592e-07, "loss": 0.0056, "step": 208900 }, { "epoch": 2.2320636786153107, "grad_norm": 0.010038844309747219, "learning_rate": 9.635652792098542e-07, "loss": 0.0082, "step": 208910 }, { "epoch": 2.23217052192959, "grad_norm": 0.01338698249310255, "learning_rate": 9.635589830197494e-07, "loss": 0.0235, "step": 208920 }, { "epoch": 2.2322773652438697, "grad_norm": 0.017454881221055984, "learning_rate": 9.635526863062514e-07, "loss": 0.0337, "step": 208930 }, { "epoch": 2.2323842085581496, "grad_norm": 0.006720224395394325, "learning_rate": 9.635463890693673e-07, "loss": 0.0137, "step": 208940 }, { "epoch": 2.232491051872429, "grad_norm": 0.18460409343242645, "learning_rate": 9.635400913091046e-07, "loss": 0.0147, "step": 208950 }, { "epoch": 2.232597895186709, "grad_norm": 1.9935111999511719, "learning_rate": 9.6353379302547e-07, "loss": 0.0234, "step": 208960 }, { "epoch": 2.2327047385009884, "grad_norm": 1.2938926219940186, "learning_rate": 9.635274942184705e-07, "loss": 0.0044, "step": 208970 }, { "epoch": 2.232811581815268, "grad_norm": 3.4045791625976562, "learning_rate": 9.635211948881138e-07, "loss": 0.017, "step": 208980 }, { "epoch": 2.2329184251295473, "grad_norm": 0.021865416318178177, "learning_rate": 9.635148950344066e-07, "loss": 0.0212, "step": 208990 }, { "epoch": 2.233025268443827, "grad_norm": 6.07166051864624, "learning_rate": 9.63508594657356e-07, "loss": 0.0243, "step": 209000 }, { "epoch": 2.2331321117581067, "grad_norm": 6.030951976776123, "learning_rate": 9.635022937569695e-07, "loss": 0.0307, "step": 209010 }, { "epoch": 2.2332389550723866, "grad_norm": 1.2245934009552002, "learning_rate": 9.634959923332537e-07, "loss": 0.0763, "step": 209020 }, { "epoch": 2.233345798386666, "grad_norm": 1.9166481494903564, "learning_rate": 9.63489690386216e-07, "loss": 0.0082, "step": 209030 }, { "epoch": 2.2334526417009455, "grad_norm": 2.0691637992858887, "learning_rate": 9.634833879158634e-07, "loss": 0.0233, "step": 209040 }, { "epoch": 2.233559485015225, "grad_norm": 0.016636136919260025, "learning_rate": 9.634770849222032e-07, "loss": 0.0267, "step": 209050 }, { "epoch": 2.233666328329505, "grad_norm": 5.443820476531982, "learning_rate": 9.634707814052423e-07, "loss": 0.0358, "step": 209060 }, { "epoch": 2.2337731716437843, "grad_norm": 0.3732474446296692, "learning_rate": 9.634644773649877e-07, "loss": 0.0264, "step": 209070 }, { "epoch": 2.233880014958064, "grad_norm": 5.038765907287598, "learning_rate": 9.634581728014472e-07, "loss": 0.0358, "step": 209080 }, { "epoch": 2.2339868582723437, "grad_norm": 4.756392955780029, "learning_rate": 9.634518677146273e-07, "loss": 0.0704, "step": 209090 }, { "epoch": 2.234093701586623, "grad_norm": 0.10956195741891861, "learning_rate": 9.63445562104535e-07, "loss": 0.0041, "step": 209100 }, { "epoch": 2.234200544900903, "grad_norm": 0.033972807228565216, "learning_rate": 9.634392559711779e-07, "loss": 0.0204, "step": 209110 }, { "epoch": 2.2343073882151825, "grad_norm": 0.05994270741939545, "learning_rate": 9.634329493145628e-07, "loss": 0.0077, "step": 209120 }, { "epoch": 2.234414231529462, "grad_norm": 12.629034996032715, "learning_rate": 9.63426642134697e-07, "loss": 0.0137, "step": 209130 }, { "epoch": 2.234521074843742, "grad_norm": 2.201098918914795, "learning_rate": 9.634203344315875e-07, "loss": 0.0083, "step": 209140 }, { "epoch": 2.2346279181580213, "grad_norm": 0.2148178368806839, "learning_rate": 9.634140262052414e-07, "loss": 0.0327, "step": 209150 }, { "epoch": 2.2347347614723008, "grad_norm": 0.010290635749697685, "learning_rate": 9.634077174556661e-07, "loss": 0.0154, "step": 209160 }, { "epoch": 2.2348416047865807, "grad_norm": 16.142568588256836, "learning_rate": 9.634014081828683e-07, "loss": 0.0576, "step": 209170 }, { "epoch": 2.23494844810086, "grad_norm": 0.3190980553627014, "learning_rate": 9.633950983868554e-07, "loss": 0.0242, "step": 209180 }, { "epoch": 2.2350552914151396, "grad_norm": 0.020334482192993164, "learning_rate": 9.633887880676345e-07, "loss": 0.0275, "step": 209190 }, { "epoch": 2.2351621347294195, "grad_norm": 1.447375774383545, "learning_rate": 9.633824772252125e-07, "loss": 0.0185, "step": 209200 }, { "epoch": 2.235268978043699, "grad_norm": 0.04403034597635269, "learning_rate": 9.63376165859597e-07, "loss": 0.0051, "step": 209210 }, { "epoch": 2.2353758213579784, "grad_norm": 0.03670162707567215, "learning_rate": 9.633698539707946e-07, "loss": 0.0151, "step": 209220 }, { "epoch": 2.2354826646722583, "grad_norm": 4.175106525421143, "learning_rate": 9.633635415588128e-07, "loss": 0.0212, "step": 209230 }, { "epoch": 2.2355895079865378, "grad_norm": 0.008315392769873142, "learning_rate": 9.633572286236587e-07, "loss": 0.0266, "step": 209240 }, { "epoch": 2.2356963513008172, "grad_norm": 1.3694344758987427, "learning_rate": 9.633509151653392e-07, "loss": 0.0075, "step": 209250 }, { "epoch": 2.235803194615097, "grad_norm": 4.736929416656494, "learning_rate": 9.633446011838615e-07, "loss": 0.0066, "step": 209260 }, { "epoch": 2.2359100379293766, "grad_norm": 2.827976942062378, "learning_rate": 9.633382866792329e-07, "loss": 0.0928, "step": 209270 }, { "epoch": 2.236016881243656, "grad_norm": 5.183816909790039, "learning_rate": 9.6333197165146e-07, "loss": 0.0251, "step": 209280 }, { "epoch": 2.236123724557936, "grad_norm": 1.5928312540054321, "learning_rate": 9.633256561005507e-07, "loss": 0.0138, "step": 209290 }, { "epoch": 2.2362305678722154, "grad_norm": 2.3499257564544678, "learning_rate": 9.633193400265117e-07, "loss": 0.0144, "step": 209300 }, { "epoch": 2.236337411186495, "grad_norm": 0.4086611568927765, "learning_rate": 9.633130234293503e-07, "loss": 0.007, "step": 209310 }, { "epoch": 2.2364442545007748, "grad_norm": 1.7197610139846802, "learning_rate": 9.633067063090734e-07, "loss": 0.0067, "step": 209320 }, { "epoch": 2.2365510978150542, "grad_norm": 2.697462797164917, "learning_rate": 9.633003886656884e-07, "loss": 0.022, "step": 209330 }, { "epoch": 2.2366579411293337, "grad_norm": 0.4015027582645416, "learning_rate": 9.63294070499202e-07, "loss": 0.0151, "step": 209340 }, { "epoch": 2.2367647844436136, "grad_norm": 3.5759127140045166, "learning_rate": 9.63287751809622e-07, "loss": 0.0096, "step": 209350 }, { "epoch": 2.236871627757893, "grad_norm": 2.556321144104004, "learning_rate": 9.632814325969549e-07, "loss": 0.0095, "step": 209360 }, { "epoch": 2.2369784710721725, "grad_norm": 1.1384309530258179, "learning_rate": 9.632751128612081e-07, "loss": 0.0274, "step": 209370 }, { "epoch": 2.2370853143864524, "grad_norm": 3.1460955142974854, "learning_rate": 9.63268792602389e-07, "loss": 0.025, "step": 209380 }, { "epoch": 2.237192157700732, "grad_norm": 2.452418804168701, "learning_rate": 9.63262471820504e-07, "loss": 0.0182, "step": 209390 }, { "epoch": 2.2372990010150113, "grad_norm": 4.735293388366699, "learning_rate": 9.63256150515561e-07, "loss": 0.0101, "step": 209400 }, { "epoch": 2.2374058443292912, "grad_norm": 0.6567951440811157, "learning_rate": 9.632498286875668e-07, "loss": 0.0148, "step": 209410 }, { "epoch": 2.2375126876435707, "grad_norm": 1.545608639717102, "learning_rate": 9.632435063365287e-07, "loss": 0.0257, "step": 209420 }, { "epoch": 2.23761953095785, "grad_norm": 0.006144016981124878, "learning_rate": 9.632371834624536e-07, "loss": 0.009, "step": 209430 }, { "epoch": 2.23772637427213, "grad_norm": 2.5256688594818115, "learning_rate": 9.632308600653487e-07, "loss": 0.0177, "step": 209440 }, { "epoch": 2.2378332175864095, "grad_norm": 2.239241600036621, "learning_rate": 9.632245361452213e-07, "loss": 0.0352, "step": 209450 }, { "epoch": 2.237940060900689, "grad_norm": 3.006830930709839, "learning_rate": 9.632182117020783e-07, "loss": 0.0269, "step": 209460 }, { "epoch": 2.238046904214969, "grad_norm": 2.421323776245117, "learning_rate": 9.632118867359273e-07, "loss": 0.0161, "step": 209470 }, { "epoch": 2.2381537475292483, "grad_norm": 1.6134464740753174, "learning_rate": 9.63205561246775e-07, "loss": 0.0438, "step": 209480 }, { "epoch": 2.238260590843528, "grad_norm": 0.5907060503959656, "learning_rate": 9.631992352346284e-07, "loss": 0.0134, "step": 209490 }, { "epoch": 2.2383674341578077, "grad_norm": 4.713442802429199, "learning_rate": 9.63192908699495e-07, "loss": 0.0189, "step": 209500 }, { "epoch": 2.238474277472087, "grad_norm": 0.6729912161827087, "learning_rate": 9.63186581641382e-07, "loss": 0.0277, "step": 209510 }, { "epoch": 2.2385811207863666, "grad_norm": 6.741358757019043, "learning_rate": 9.631802540602961e-07, "loss": 0.0542, "step": 209520 }, { "epoch": 2.2386879641006465, "grad_norm": 0.6163010597229004, "learning_rate": 9.63173925956245e-07, "loss": 0.0286, "step": 209530 }, { "epoch": 2.238794807414926, "grad_norm": 0.08916883170604706, "learning_rate": 9.631675973292356e-07, "loss": 0.0378, "step": 209540 }, { "epoch": 2.2389016507292054, "grad_norm": 2.247217893600464, "learning_rate": 9.631612681792752e-07, "loss": 0.0156, "step": 209550 }, { "epoch": 2.2390084940434853, "grad_norm": 0.23744609951972961, "learning_rate": 9.631549385063704e-07, "loss": 0.0265, "step": 209560 }, { "epoch": 2.239115337357765, "grad_norm": 2.5113654136657715, "learning_rate": 9.631486083105287e-07, "loss": 0.0045, "step": 209570 }, { "epoch": 2.2392221806720443, "grad_norm": 1.7340891361236572, "learning_rate": 9.631422775917574e-07, "loss": 0.0238, "step": 209580 }, { "epoch": 2.239329023986324, "grad_norm": 0.017968522384762764, "learning_rate": 9.631359463500638e-07, "loss": 0.0051, "step": 209590 }, { "epoch": 2.2394358673006036, "grad_norm": 0.010826779529452324, "learning_rate": 9.631296145854543e-07, "loss": 0.0171, "step": 209600 }, { "epoch": 2.239542710614883, "grad_norm": 0.23324455320835114, "learning_rate": 9.631232822979368e-07, "loss": 0.0215, "step": 209610 }, { "epoch": 2.239649553929163, "grad_norm": 8.914133071899414, "learning_rate": 9.63116949487518e-07, "loss": 0.0242, "step": 209620 }, { "epoch": 2.2397563972434424, "grad_norm": 1.555834412574768, "learning_rate": 9.631106161542052e-07, "loss": 0.0142, "step": 209630 }, { "epoch": 2.239863240557722, "grad_norm": 0.7540472149848938, "learning_rate": 9.631042822980057e-07, "loss": 0.0169, "step": 209640 }, { "epoch": 2.239970083872002, "grad_norm": 0.045828938484191895, "learning_rate": 9.630979479189266e-07, "loss": 0.0073, "step": 209650 }, { "epoch": 2.2400769271862813, "grad_norm": 3.9662892818450928, "learning_rate": 9.630916130169747e-07, "loss": 0.0339, "step": 209660 }, { "epoch": 2.2401837705005607, "grad_norm": 0.5134311318397522, "learning_rate": 9.630852775921573e-07, "loss": 0.0123, "step": 209670 }, { "epoch": 2.2402906138148406, "grad_norm": 0.023376930505037308, "learning_rate": 9.63078941644482e-07, "loss": 0.0144, "step": 209680 }, { "epoch": 2.24039745712912, "grad_norm": 9.148862838745117, "learning_rate": 9.630726051739556e-07, "loss": 0.0195, "step": 209690 }, { "epoch": 2.2405043004433995, "grad_norm": 1.0474332571029663, "learning_rate": 9.630662681805853e-07, "loss": 0.0213, "step": 209700 }, { "epoch": 2.2406111437576794, "grad_norm": 0.07803256809711456, "learning_rate": 9.63059930664378e-07, "loss": 0.0247, "step": 209710 }, { "epoch": 2.240717987071959, "grad_norm": 0.007739347405731678, "learning_rate": 9.630535926253413e-07, "loss": 0.0111, "step": 209720 }, { "epoch": 2.240824830386239, "grad_norm": 2.1823973655700684, "learning_rate": 9.63047254063482e-07, "loss": 0.032, "step": 209730 }, { "epoch": 2.2409316737005183, "grad_norm": 0.02744092233479023, "learning_rate": 9.630409149788075e-07, "loss": 0.0254, "step": 209740 }, { "epoch": 2.2410385170147977, "grad_norm": 1.764233112335205, "learning_rate": 9.630345753713247e-07, "loss": 0.0299, "step": 209750 }, { "epoch": 2.241145360329077, "grad_norm": 0.6820236444473267, "learning_rate": 9.630282352410412e-07, "loss": 0.0199, "step": 209760 }, { "epoch": 2.241252203643357, "grad_norm": 3.753079652786255, "learning_rate": 9.630218945879636e-07, "loss": 0.0207, "step": 209770 }, { "epoch": 2.2413590469576365, "grad_norm": 0.11115878075361252, "learning_rate": 9.630155534120997e-07, "loss": 0.044, "step": 209780 }, { "epoch": 2.2414658902719165, "grad_norm": 4.920185089111328, "learning_rate": 9.63009211713456e-07, "loss": 0.0374, "step": 209790 }, { "epoch": 2.241572733586196, "grad_norm": 3.4594035148620605, "learning_rate": 9.6300286949204e-07, "loss": 0.0388, "step": 209800 }, { "epoch": 2.2416795769004754, "grad_norm": 3.3327596187591553, "learning_rate": 9.629965267478588e-07, "loss": 0.0177, "step": 209810 }, { "epoch": 2.241786420214755, "grad_norm": 2.07753849029541, "learning_rate": 9.629901834809195e-07, "loss": 0.0317, "step": 209820 }, { "epoch": 2.2418932635290347, "grad_norm": 3.947556734085083, "learning_rate": 9.629838396912296e-07, "loss": 0.0315, "step": 209830 }, { "epoch": 2.242000106843314, "grad_norm": 3.7955267429351807, "learning_rate": 9.629774953787956e-07, "loss": 0.0218, "step": 209840 }, { "epoch": 2.242106950157594, "grad_norm": 0.11687445640563965, "learning_rate": 9.629711505436256e-07, "loss": 0.0198, "step": 209850 }, { "epoch": 2.2422137934718736, "grad_norm": 0.03436530381441116, "learning_rate": 9.629648051857258e-07, "loss": 0.0463, "step": 209860 }, { "epoch": 2.242320636786153, "grad_norm": 3.918623924255371, "learning_rate": 9.62958459305104e-07, "loss": 0.0079, "step": 209870 }, { "epoch": 2.242427480100433, "grad_norm": 0.07930151373147964, "learning_rate": 9.62952112901767e-07, "loss": 0.0101, "step": 209880 }, { "epoch": 2.2425343234147124, "grad_norm": 20.609663009643555, "learning_rate": 9.629457659757223e-07, "loss": 0.057, "step": 209890 }, { "epoch": 2.242641166728992, "grad_norm": 0.010964548215270042, "learning_rate": 9.629394185269769e-07, "loss": 0.0097, "step": 209900 }, { "epoch": 2.2427480100432717, "grad_norm": 0.05259124934673309, "learning_rate": 9.629330705555378e-07, "loss": 0.0099, "step": 209910 }, { "epoch": 2.242854853357551, "grad_norm": 0.6201950311660767, "learning_rate": 9.629267220614123e-07, "loss": 0.0149, "step": 209920 }, { "epoch": 2.2429616966718307, "grad_norm": 0.020613158121705055, "learning_rate": 9.629203730446077e-07, "loss": 0.0034, "step": 209930 }, { "epoch": 2.2430685399861106, "grad_norm": 0.039230119436979294, "learning_rate": 9.62914023505131e-07, "loss": 0.011, "step": 209940 }, { "epoch": 2.24317538330039, "grad_norm": 10.870420455932617, "learning_rate": 9.629076734429895e-07, "loss": 0.0323, "step": 209950 }, { "epoch": 2.2432822266146695, "grad_norm": 0.7899675369262695, "learning_rate": 9.629013228581903e-07, "loss": 0.0378, "step": 209960 }, { "epoch": 2.2433890699289494, "grad_norm": 3.6468887329101562, "learning_rate": 9.628949717507405e-07, "loss": 0.0118, "step": 209970 }, { "epoch": 2.243495913243229, "grad_norm": 0.29533007740974426, "learning_rate": 9.628886201206475e-07, "loss": 0.0072, "step": 209980 }, { "epoch": 2.2436027565575083, "grad_norm": 10.999608039855957, "learning_rate": 9.62882267967918e-07, "loss": 0.067, "step": 209990 }, { "epoch": 2.243709599871788, "grad_norm": 0.03239969536662102, "learning_rate": 9.628759152925598e-07, "loss": 0.0259, "step": 210000 }, { "epoch": 2.2438164431860677, "grad_norm": 4.948847770690918, "learning_rate": 9.628695620945795e-07, "loss": 0.0331, "step": 210010 }, { "epoch": 2.243923286500347, "grad_norm": 3.8938848972320557, "learning_rate": 9.628632083739848e-07, "loss": 0.0165, "step": 210020 }, { "epoch": 2.244030129814627, "grad_norm": 2.548999309539795, "learning_rate": 9.628568541307824e-07, "loss": 0.0408, "step": 210030 }, { "epoch": 2.2441369731289065, "grad_norm": 0.03800579905509949, "learning_rate": 9.628504993649798e-07, "loss": 0.0205, "step": 210040 }, { "epoch": 2.244243816443186, "grad_norm": 5.170707702636719, "learning_rate": 9.62844144076584e-07, "loss": 0.0337, "step": 210050 }, { "epoch": 2.244350659757466, "grad_norm": 0.006062495056539774, "learning_rate": 9.628377882656022e-07, "loss": 0.0222, "step": 210060 }, { "epoch": 2.2444575030717453, "grad_norm": 0.005487512797117233, "learning_rate": 9.628314319320417e-07, "loss": 0.0087, "step": 210070 }, { "epoch": 2.2445643463860248, "grad_norm": 0.0028142461087554693, "learning_rate": 9.628250750759095e-07, "loss": 0.0246, "step": 210080 }, { "epoch": 2.2446711897003047, "grad_norm": 0.010485020466148853, "learning_rate": 9.628187176972129e-07, "loss": 0.0068, "step": 210090 }, { "epoch": 2.244778033014584, "grad_norm": 0.001968878088518977, "learning_rate": 9.62812359795959e-07, "loss": 0.0254, "step": 210100 }, { "epoch": 2.2448848763288636, "grad_norm": 0.0039048020262271166, "learning_rate": 9.62806001372155e-07, "loss": 0.0212, "step": 210110 }, { "epoch": 2.2449917196431435, "grad_norm": 2.957446336746216, "learning_rate": 9.62799642425808e-07, "loss": 0.0037, "step": 210120 }, { "epoch": 2.245098562957423, "grad_norm": 2.643467664718628, "learning_rate": 9.627932829569254e-07, "loss": 0.0246, "step": 210130 }, { "epoch": 2.2452054062717024, "grad_norm": 3.6973073482513428, "learning_rate": 9.627869229655143e-07, "loss": 0.016, "step": 210140 }, { "epoch": 2.2453122495859823, "grad_norm": 3.228977680206299, "learning_rate": 9.62780562451582e-07, "loss": 0.0417, "step": 210150 }, { "epoch": 2.2454190929002618, "grad_norm": 2.418058395385742, "learning_rate": 9.627742014151351e-07, "loss": 0.0643, "step": 210160 }, { "epoch": 2.245525936214541, "grad_norm": 1.8501516580581665, "learning_rate": 9.627678398561814e-07, "loss": 0.0092, "step": 210170 }, { "epoch": 2.245632779528821, "grad_norm": 6.525606155395508, "learning_rate": 9.62761477774728e-07, "loss": 0.0383, "step": 210180 }, { "epoch": 2.2457396228431006, "grad_norm": 0.054400667548179626, "learning_rate": 9.627551151707818e-07, "loss": 0.0028, "step": 210190 }, { "epoch": 2.24584646615738, "grad_norm": 2.6821296215057373, "learning_rate": 9.627487520443502e-07, "loss": 0.0213, "step": 210200 }, { "epoch": 2.24595330947166, "grad_norm": 3.375438928604126, "learning_rate": 9.627423883954402e-07, "loss": 0.0185, "step": 210210 }, { "epoch": 2.2460601527859394, "grad_norm": 5.9174580574035645, "learning_rate": 9.627360242240594e-07, "loss": 0.019, "step": 210220 }, { "epoch": 2.246166996100219, "grad_norm": 0.029397249221801758, "learning_rate": 9.627296595302145e-07, "loss": 0.0133, "step": 210230 }, { "epoch": 2.2462738394144988, "grad_norm": 2.034390687942505, "learning_rate": 9.62723294313913e-07, "loss": 0.0044, "step": 210240 }, { "epoch": 2.246380682728778, "grad_norm": 0.05227840691804886, "learning_rate": 9.62716928575162e-07, "loss": 0.0135, "step": 210250 }, { "epoch": 2.2464875260430577, "grad_norm": 4.331500053405762, "learning_rate": 9.627105623139683e-07, "loss": 0.011, "step": 210260 }, { "epoch": 2.2465943693573376, "grad_norm": 2.6381964683532715, "learning_rate": 9.627041955303398e-07, "loss": 0.0239, "step": 210270 }, { "epoch": 2.246701212671617, "grad_norm": 0.002456540474668145, "learning_rate": 9.626978282242834e-07, "loss": 0.0162, "step": 210280 }, { "epoch": 2.2468080559858965, "grad_norm": 1.2089221477508545, "learning_rate": 9.62691460395806e-07, "loss": 0.0544, "step": 210290 }, { "epoch": 2.2469148993001764, "grad_norm": 0.014245356433093548, "learning_rate": 9.626850920449152e-07, "loss": 0.0247, "step": 210300 }, { "epoch": 2.247021742614456, "grad_norm": 0.5916736125946045, "learning_rate": 9.62678723171618e-07, "loss": 0.0448, "step": 210310 }, { "epoch": 2.2471285859287353, "grad_norm": 10.565431594848633, "learning_rate": 9.626723537759217e-07, "loss": 0.012, "step": 210320 }, { "epoch": 2.2472354292430152, "grad_norm": 2.246777057647705, "learning_rate": 9.626659838578331e-07, "loss": 0.0174, "step": 210330 }, { "epoch": 2.2473422725572947, "grad_norm": 5.708272457122803, "learning_rate": 9.626596134173599e-07, "loss": 0.0196, "step": 210340 }, { "epoch": 2.247449115871574, "grad_norm": 0.13263748586177826, "learning_rate": 9.62653242454509e-07, "loss": 0.0248, "step": 210350 }, { "epoch": 2.247555959185854, "grad_norm": 1.1856123208999634, "learning_rate": 9.626468709692876e-07, "loss": 0.014, "step": 210360 }, { "epoch": 2.2476628025001335, "grad_norm": 0.014046701602637768, "learning_rate": 9.62640498961703e-07, "loss": 0.0439, "step": 210370 }, { "epoch": 2.247769645814413, "grad_norm": 3.4760971069335938, "learning_rate": 9.626341264317624e-07, "loss": 0.018, "step": 210380 }, { "epoch": 2.247876489128693, "grad_norm": 18.113697052001953, "learning_rate": 9.62627753379473e-07, "loss": 0.028, "step": 210390 }, { "epoch": 2.2479833324429723, "grad_norm": 5.2768354415893555, "learning_rate": 9.626213798048419e-07, "loss": 0.0162, "step": 210400 }, { "epoch": 2.248090175757252, "grad_norm": 3.816188335418701, "learning_rate": 9.626150057078762e-07, "loss": 0.0301, "step": 210410 }, { "epoch": 2.2481970190715317, "grad_norm": 0.010205226019024849, "learning_rate": 9.626086310885835e-07, "loss": 0.0097, "step": 210420 }, { "epoch": 2.248303862385811, "grad_norm": 0.2528286576271057, "learning_rate": 9.626022559469707e-07, "loss": 0.0958, "step": 210430 }, { "epoch": 2.248410705700091, "grad_norm": 0.5830657482147217, "learning_rate": 9.62595880283045e-07, "loss": 0.0202, "step": 210440 }, { "epoch": 2.2485175490143705, "grad_norm": 2.789090394973755, "learning_rate": 9.625895040968135e-07, "loss": 0.0235, "step": 210450 }, { "epoch": 2.24862439232865, "grad_norm": 2.1336417198181152, "learning_rate": 9.625831273882837e-07, "loss": 0.1033, "step": 210460 }, { "epoch": 2.2487312356429294, "grad_norm": 2.719658851623535, "learning_rate": 9.625767501574626e-07, "loss": 0.0032, "step": 210470 }, { "epoch": 2.2488380789572093, "grad_norm": 1.7647286653518677, "learning_rate": 9.625703724043574e-07, "loss": 0.0149, "step": 210480 }, { "epoch": 2.248944922271489, "grad_norm": 0.20604781806468964, "learning_rate": 9.625639941289753e-07, "loss": 0.019, "step": 210490 }, { "epoch": 2.2490517655857687, "grad_norm": 1.2352553606033325, "learning_rate": 9.625576153313237e-07, "loss": 0.011, "step": 210500 }, { "epoch": 2.249158608900048, "grad_norm": 0.06370618939399719, "learning_rate": 9.625512360114096e-07, "loss": 0.0127, "step": 210510 }, { "epoch": 2.2492654522143276, "grad_norm": 0.034608274698257446, "learning_rate": 9.625448561692401e-07, "loss": 0.0247, "step": 210520 }, { "epoch": 2.249372295528607, "grad_norm": 0.02843157947063446, "learning_rate": 9.625384758048228e-07, "loss": 0.0149, "step": 210530 }, { "epoch": 2.249479138842887, "grad_norm": 2.5506250858306885, "learning_rate": 9.625320949181643e-07, "loss": 0.0141, "step": 210540 }, { "epoch": 2.2495859821571664, "grad_norm": 4.172931671142578, "learning_rate": 9.625257135092723e-07, "loss": 0.0048, "step": 210550 }, { "epoch": 2.2496928254714463, "grad_norm": 1.1722129583358765, "learning_rate": 9.625193315781538e-07, "loss": 0.0114, "step": 210560 }, { "epoch": 2.249799668785726, "grad_norm": 0.0207559484988451, "learning_rate": 9.625129491248162e-07, "loss": 0.035, "step": 210570 }, { "epoch": 2.2499065121000053, "grad_norm": 6.393143653869629, "learning_rate": 9.625065661492667e-07, "loss": 0.0271, "step": 210580 }, { "epoch": 2.2500133554142847, "grad_norm": 1.7279077768325806, "learning_rate": 9.625001826515122e-07, "loss": 0.0393, "step": 210590 }, { "epoch": 2.2501201987285646, "grad_norm": 0.038873907178640366, "learning_rate": 9.6249379863156e-07, "loss": 0.023, "step": 210600 }, { "epoch": 2.250227042042844, "grad_norm": 8.836970329284668, "learning_rate": 9.624874140894176e-07, "loss": 0.014, "step": 210610 }, { "epoch": 2.250333885357124, "grad_norm": 0.043575968593358994, "learning_rate": 9.62481029025092e-07, "loss": 0.0046, "step": 210620 }, { "epoch": 2.2504407286714034, "grad_norm": 9.718302726745605, "learning_rate": 9.624746434385902e-07, "loss": 0.0794, "step": 210630 }, { "epoch": 2.250547571985683, "grad_norm": 0.005818994715809822, "learning_rate": 9.624682573299198e-07, "loss": 0.047, "step": 210640 }, { "epoch": 2.250654415299963, "grad_norm": 0.1329222172498703, "learning_rate": 9.624618706990877e-07, "loss": 0.0266, "step": 210650 }, { "epoch": 2.2507612586142423, "grad_norm": 10.054908752441406, "learning_rate": 9.624554835461015e-07, "loss": 0.0419, "step": 210660 }, { "epoch": 2.2508681019285217, "grad_norm": 8.328943252563477, "learning_rate": 9.62449095870968e-07, "loss": 0.0254, "step": 210670 }, { "epoch": 2.2509749452428016, "grad_norm": 18.348115921020508, "learning_rate": 9.624427076736943e-07, "loss": 0.0987, "step": 210680 }, { "epoch": 2.251081788557081, "grad_norm": 0.005794440396130085, "learning_rate": 9.624363189542882e-07, "loss": 0.0549, "step": 210690 }, { "epoch": 2.2511886318713605, "grad_norm": 1.8120384216308594, "learning_rate": 9.624299297127565e-07, "loss": 0.0062, "step": 210700 }, { "epoch": 2.2512954751856404, "grad_norm": 8.36453628540039, "learning_rate": 9.624235399491066e-07, "loss": 0.0364, "step": 210710 }, { "epoch": 2.25140231849992, "grad_norm": 2.6359658241271973, "learning_rate": 9.624171496633455e-07, "loss": 0.0147, "step": 210720 }, { "epoch": 2.2515091618141994, "grad_norm": 0.1841914802789688, "learning_rate": 9.624107588554806e-07, "loss": 0.0088, "step": 210730 }, { "epoch": 2.2516160051284793, "grad_norm": 6.607679843902588, "learning_rate": 9.62404367525519e-07, "loss": 0.0307, "step": 210740 }, { "epoch": 2.2517228484427587, "grad_norm": 3.319491147994995, "learning_rate": 9.623979756734681e-07, "loss": 0.0098, "step": 210750 }, { "epoch": 2.251829691757038, "grad_norm": 5.2352614402771, "learning_rate": 9.623915832993348e-07, "loss": 0.0176, "step": 210760 }, { "epoch": 2.251936535071318, "grad_norm": 0.1484731137752533, "learning_rate": 9.623851904031266e-07, "loss": 0.0094, "step": 210770 }, { "epoch": 2.2520433783855975, "grad_norm": 3.262903928756714, "learning_rate": 9.623787969848508e-07, "loss": 0.0393, "step": 210780 }, { "epoch": 2.252150221699877, "grad_norm": 0.0018101583700627089, "learning_rate": 9.623724030445142e-07, "loss": 0.0047, "step": 210790 }, { "epoch": 2.252257065014157, "grad_norm": 0.0366935096681118, "learning_rate": 9.623660085821243e-07, "loss": 0.0126, "step": 210800 }, { "epoch": 2.2523639083284364, "grad_norm": 0.015358896926045418, "learning_rate": 9.623596135976883e-07, "loss": 0.0118, "step": 210810 }, { "epoch": 2.252470751642716, "grad_norm": 13.874421119689941, "learning_rate": 9.623532180912136e-07, "loss": 0.0548, "step": 210820 }, { "epoch": 2.2525775949569957, "grad_norm": 3.4327430725097656, "learning_rate": 9.623468220627068e-07, "loss": 0.0186, "step": 210830 }, { "epoch": 2.252684438271275, "grad_norm": 0.10235510021448135, "learning_rate": 9.62340425512176e-07, "loss": 0.0125, "step": 210840 }, { "epoch": 2.2527912815855546, "grad_norm": 1.956844449043274, "learning_rate": 9.623340284396275e-07, "loss": 0.0082, "step": 210850 }, { "epoch": 2.2528981248998345, "grad_norm": 6.623736381530762, "learning_rate": 9.623276308450693e-07, "loss": 0.0357, "step": 210860 }, { "epoch": 2.253004968214114, "grad_norm": 1.1351462602615356, "learning_rate": 9.62321232728508e-07, "loss": 0.0139, "step": 210870 }, { "epoch": 2.2531118115283935, "grad_norm": 1.2687742710113525, "learning_rate": 9.623148340899516e-07, "loss": 0.0059, "step": 210880 }, { "epoch": 2.2532186548426734, "grad_norm": 0.011104441247880459, "learning_rate": 9.623084349294067e-07, "loss": 0.0079, "step": 210890 }, { "epoch": 2.253325498156953, "grad_norm": 0.5515437722206116, "learning_rate": 9.623020352468804e-07, "loss": 0.011, "step": 210900 }, { "epoch": 2.2534323414712323, "grad_norm": 2.0055642127990723, "learning_rate": 9.622956350423805e-07, "loss": 0.0329, "step": 210910 }, { "epoch": 2.253539184785512, "grad_norm": 0.2914815843105316, "learning_rate": 9.622892343159139e-07, "loss": 0.0343, "step": 210920 }, { "epoch": 2.2536460280997916, "grad_norm": 2.5939841270446777, "learning_rate": 9.62282833067488e-07, "loss": 0.012, "step": 210930 }, { "epoch": 2.253752871414071, "grad_norm": 1.1957648992538452, "learning_rate": 9.622764312971095e-07, "loss": 0.0117, "step": 210940 }, { "epoch": 2.253859714728351, "grad_norm": 4.485699653625488, "learning_rate": 9.622700290047863e-07, "loss": 0.0242, "step": 210950 }, { "epoch": 2.2539665580426305, "grad_norm": 0.020443731918931007, "learning_rate": 9.622636261905253e-07, "loss": 0.0453, "step": 210960 }, { "epoch": 2.25407340135691, "grad_norm": 0.017656465992331505, "learning_rate": 9.622572228543337e-07, "loss": 0.0145, "step": 210970 }, { "epoch": 2.25418024467119, "grad_norm": 2.3444483280181885, "learning_rate": 9.62250818996219e-07, "loss": 0.0163, "step": 210980 }, { "epoch": 2.2542870879854693, "grad_norm": 6.477766513824463, "learning_rate": 9.622444146161881e-07, "loss": 0.0041, "step": 210990 }, { "epoch": 2.2543939312997487, "grad_norm": 1.3481088876724243, "learning_rate": 9.622380097142485e-07, "loss": 0.0097, "step": 211000 }, { "epoch": 2.2545007746140286, "grad_norm": 0.3035403788089752, "learning_rate": 9.62231604290407e-07, "loss": 0.0278, "step": 211010 }, { "epoch": 2.254607617928308, "grad_norm": 0.027295513078570366, "learning_rate": 9.622251983446713e-07, "loss": 0.0034, "step": 211020 }, { "epoch": 2.2547144612425876, "grad_norm": 3.3080015182495117, "learning_rate": 9.622187918770484e-07, "loss": 0.0143, "step": 211030 }, { "epoch": 2.2548213045568675, "grad_norm": 4.244287014007568, "learning_rate": 9.622123848875458e-07, "loss": 0.0122, "step": 211040 }, { "epoch": 2.254928147871147, "grad_norm": 0.007904991507530212, "learning_rate": 9.622059773761704e-07, "loss": 0.0639, "step": 211050 }, { "epoch": 2.2550349911854264, "grad_norm": 0.018472017720341682, "learning_rate": 9.621995693429297e-07, "loss": 0.0269, "step": 211060 }, { "epoch": 2.2551418344997063, "grad_norm": 4.271749496459961, "learning_rate": 9.621931607878305e-07, "loss": 0.0605, "step": 211070 }, { "epoch": 2.2552486778139857, "grad_norm": 5.451249122619629, "learning_rate": 9.621867517108806e-07, "loss": 0.0211, "step": 211080 }, { "epoch": 2.255355521128265, "grad_norm": 0.07674120366573334, "learning_rate": 9.62180342112087e-07, "loss": 0.0371, "step": 211090 }, { "epoch": 2.255462364442545, "grad_norm": 0.5691611766815186, "learning_rate": 9.621739319914566e-07, "loss": 0.0138, "step": 211100 }, { "epoch": 2.2555692077568246, "grad_norm": 3.4952359199523926, "learning_rate": 9.621675213489972e-07, "loss": 0.0268, "step": 211110 }, { "epoch": 2.255676051071104, "grad_norm": 0.03319196030497551, "learning_rate": 9.62161110184716e-07, "loss": 0.0229, "step": 211120 }, { "epoch": 2.255782894385384, "grad_norm": Infinity, "learning_rate": 9.621546984986196e-07, "loss": 0.0705, "step": 211130 }, { "epoch": 2.2558897376996634, "grad_norm": 0.35943347215652466, "learning_rate": 9.621482862907158e-07, "loss": 0.0221, "step": 211140 }, { "epoch": 2.2559965810139433, "grad_norm": 0.18433372676372528, "learning_rate": 9.621418735610115e-07, "loss": 0.0259, "step": 211150 }, { "epoch": 2.2561034243282228, "grad_norm": 0.0907726064324379, "learning_rate": 9.621354603095145e-07, "loss": 0.0168, "step": 211160 }, { "epoch": 2.256210267642502, "grad_norm": 12.078113555908203, "learning_rate": 9.621290465362316e-07, "loss": 0.0114, "step": 211170 }, { "epoch": 2.2563171109567817, "grad_norm": 0.0734587237238884, "learning_rate": 9.6212263224117e-07, "loss": 0.01, "step": 211180 }, { "epoch": 2.2564239542710616, "grad_norm": 0.9538047909736633, "learning_rate": 9.621162174243372e-07, "loss": 0.0104, "step": 211190 }, { "epoch": 2.256530797585341, "grad_norm": 0.6964641809463501, "learning_rate": 9.621098020857403e-07, "loss": 0.0121, "step": 211200 }, { "epoch": 2.256637640899621, "grad_norm": 0.05530741438269615, "learning_rate": 9.621033862253866e-07, "loss": 0.0187, "step": 211210 }, { "epoch": 2.2567444842139004, "grad_norm": 5.099841117858887, "learning_rate": 9.620969698432833e-07, "loss": 0.0121, "step": 211220 }, { "epoch": 2.25685132752818, "grad_norm": 0.09300296008586884, "learning_rate": 9.620905529394374e-07, "loss": 0.0216, "step": 211230 }, { "epoch": 2.2569581708424593, "grad_norm": 1.8525575399398804, "learning_rate": 9.620841355138568e-07, "loss": 0.0068, "step": 211240 }, { "epoch": 2.257065014156739, "grad_norm": 0.015233947895467281, "learning_rate": 9.62077717566548e-07, "loss": 0.0104, "step": 211250 }, { "epoch": 2.2571718574710187, "grad_norm": 0.12517811357975006, "learning_rate": 9.620712990975185e-07, "loss": 0.0303, "step": 211260 }, { "epoch": 2.2572787007852986, "grad_norm": 1.1837071180343628, "learning_rate": 9.620648801067759e-07, "loss": 0.0359, "step": 211270 }, { "epoch": 2.257385544099578, "grad_norm": 3.968627452850342, "learning_rate": 9.620584605943272e-07, "loss": 0.0317, "step": 211280 }, { "epoch": 2.2574923874138575, "grad_norm": 0.10306432098150253, "learning_rate": 9.620520405601795e-07, "loss": 0.0133, "step": 211290 }, { "epoch": 2.257599230728137, "grad_norm": 0.6597827076911926, "learning_rate": 9.620456200043402e-07, "loss": 0.0458, "step": 211300 }, { "epoch": 2.257706074042417, "grad_norm": 0.24406535923480988, "learning_rate": 9.620391989268164e-07, "loss": 0.0084, "step": 211310 }, { "epoch": 2.2578129173566963, "grad_norm": 0.063412144780159, "learning_rate": 9.620327773276155e-07, "loss": 0.0284, "step": 211320 }, { "epoch": 2.257919760670976, "grad_norm": 1.9127107858657837, "learning_rate": 9.62026355206745e-07, "loss": 0.0242, "step": 211330 }, { "epoch": 2.2580266039852557, "grad_norm": 2.2807579040527344, "learning_rate": 9.620199325642116e-07, "loss": 0.0727, "step": 211340 }, { "epoch": 2.258133447299535, "grad_norm": 0.002948402427136898, "learning_rate": 9.62013509400023e-07, "loss": 0.026, "step": 211350 }, { "epoch": 2.2582402906138146, "grad_norm": 7.613994121551514, "learning_rate": 9.620070857141862e-07, "loss": 0.005, "step": 211360 }, { "epoch": 2.2583471339280945, "grad_norm": 0.13626383244991302, "learning_rate": 9.620006615067085e-07, "loss": 0.0533, "step": 211370 }, { "epoch": 2.258453977242374, "grad_norm": 0.016707943752408028, "learning_rate": 9.619942367775971e-07, "loss": 0.0139, "step": 211380 }, { "epoch": 2.258560820556654, "grad_norm": 8.450111389160156, "learning_rate": 9.619878115268596e-07, "loss": 0.0244, "step": 211390 }, { "epoch": 2.2586676638709333, "grad_norm": 0.20152337849140167, "learning_rate": 9.619813857545026e-07, "loss": 0.0305, "step": 211400 }, { "epoch": 2.2587745071852128, "grad_norm": 0.21630559861660004, "learning_rate": 9.619749594605341e-07, "loss": 0.0169, "step": 211410 }, { "epoch": 2.2588813504994927, "grad_norm": 0.6006214618682861, "learning_rate": 9.61968532644961e-07, "loss": 0.0334, "step": 211420 }, { "epoch": 2.258988193813772, "grad_norm": 0.2777787744998932, "learning_rate": 9.619621053077903e-07, "loss": 0.0364, "step": 211430 }, { "epoch": 2.2590950371280516, "grad_norm": 0.005854864604771137, "learning_rate": 9.619556774490296e-07, "loss": 0.0036, "step": 211440 }, { "epoch": 2.2592018804423315, "grad_norm": 0.016482366248965263, "learning_rate": 9.619492490686862e-07, "loss": 0.0147, "step": 211450 }, { "epoch": 2.259308723756611, "grad_norm": 0.3232046663761139, "learning_rate": 9.619428201667672e-07, "loss": 0.0115, "step": 211460 }, { "epoch": 2.2594155670708904, "grad_norm": 6.388408660888672, "learning_rate": 9.619363907432798e-07, "loss": 0.0078, "step": 211470 }, { "epoch": 2.2595224103851703, "grad_norm": 0.004227284342050552, "learning_rate": 9.619299607982315e-07, "loss": 0.0167, "step": 211480 }, { "epoch": 2.25962925369945, "grad_norm": 1.7500039339065552, "learning_rate": 9.619235303316294e-07, "loss": 0.0311, "step": 211490 }, { "epoch": 2.2597360970137292, "grad_norm": 0.00822531245648861, "learning_rate": 9.619170993434807e-07, "loss": 0.02, "step": 211500 }, { "epoch": 2.259842940328009, "grad_norm": 3.5072569847106934, "learning_rate": 9.619106678337927e-07, "loss": 0.0564, "step": 211510 }, { "epoch": 2.2599497836422886, "grad_norm": 0.2954476773738861, "learning_rate": 9.619042358025727e-07, "loss": 0.0119, "step": 211520 }, { "epoch": 2.260056626956568, "grad_norm": 5.526194095611572, "learning_rate": 9.61897803249828e-07, "loss": 0.0198, "step": 211530 }, { "epoch": 2.260163470270848, "grad_norm": 4.260819435119629, "learning_rate": 9.618913701755657e-07, "loss": 0.0525, "step": 211540 }, { "epoch": 2.2602703135851274, "grad_norm": 3.969322919845581, "learning_rate": 9.618849365797933e-07, "loss": 0.0407, "step": 211550 }, { "epoch": 2.260377156899407, "grad_norm": 0.02785828523337841, "learning_rate": 9.61878502462518e-07, "loss": 0.0074, "step": 211560 }, { "epoch": 2.260484000213687, "grad_norm": 0.408430278301239, "learning_rate": 9.618720678237468e-07, "loss": 0.028, "step": 211570 }, { "epoch": 2.2605908435279662, "grad_norm": 0.01611296832561493, "learning_rate": 9.618656326634874e-07, "loss": 0.02, "step": 211580 }, { "epoch": 2.2606976868422457, "grad_norm": 0.8990684151649475, "learning_rate": 9.618591969817468e-07, "loss": 0.0156, "step": 211590 }, { "epoch": 2.2608045301565256, "grad_norm": 2.0692458152770996, "learning_rate": 9.618527607785323e-07, "loss": 0.0364, "step": 211600 }, { "epoch": 2.260911373470805, "grad_norm": 6.374268054962158, "learning_rate": 9.61846324053851e-07, "loss": 0.0247, "step": 211610 }, { "epoch": 2.2610182167850845, "grad_norm": 0.00033178823650814593, "learning_rate": 9.618398868077106e-07, "loss": 0.0223, "step": 211620 }, { "epoch": 2.2611250600993644, "grad_norm": 0.7135095000267029, "learning_rate": 9.61833449040118e-07, "loss": 0.0102, "step": 211630 }, { "epoch": 2.261231903413644, "grad_norm": 0.028220240026712418, "learning_rate": 9.618270107510806e-07, "loss": 0.002, "step": 211640 }, { "epoch": 2.2613387467279233, "grad_norm": 5.420426845550537, "learning_rate": 9.618205719406055e-07, "loss": 0.0126, "step": 211650 }, { "epoch": 2.2614455900422032, "grad_norm": 0.015008696354925632, "learning_rate": 9.618141326087004e-07, "loss": 0.0192, "step": 211660 }, { "epoch": 2.2615524333564827, "grad_norm": 3.7817153930664062, "learning_rate": 9.61807692755372e-07, "loss": 0.0098, "step": 211670 }, { "epoch": 2.261659276670762, "grad_norm": 9.034635543823242, "learning_rate": 9.61801252380628e-07, "loss": 0.0253, "step": 211680 }, { "epoch": 2.261766119985042, "grad_norm": 15.278450012207031, "learning_rate": 9.617948114844757e-07, "loss": 0.0477, "step": 211690 }, { "epoch": 2.2618729632993215, "grad_norm": 8.431427955627441, "learning_rate": 9.617883700669217e-07, "loss": 0.0762, "step": 211700 }, { "epoch": 2.261979806613601, "grad_norm": 0.06399817019701004, "learning_rate": 9.617819281279743e-07, "loss": 0.0176, "step": 211710 }, { "epoch": 2.262086649927881, "grad_norm": 4.24769926071167, "learning_rate": 9.6177548566764e-07, "loss": 0.0597, "step": 211720 }, { "epoch": 2.2621934932421603, "grad_norm": 0.18164017796516418, "learning_rate": 9.617690426859264e-07, "loss": 0.0143, "step": 211730 }, { "epoch": 2.26230033655644, "grad_norm": 0.017599835991859436, "learning_rate": 9.617625991828405e-07, "loss": 0.0103, "step": 211740 }, { "epoch": 2.2624071798707197, "grad_norm": 0.7875942587852478, "learning_rate": 9.617561551583902e-07, "loss": 0.0315, "step": 211750 }, { "epoch": 2.262514023184999, "grad_norm": 0.36739957332611084, "learning_rate": 9.61749710612582e-07, "loss": 0.0048, "step": 211760 }, { "epoch": 2.2626208664992786, "grad_norm": 0.20319531857967377, "learning_rate": 9.617432655454238e-07, "loss": 0.0035, "step": 211770 }, { "epoch": 2.2627277098135585, "grad_norm": 3.2933919429779053, "learning_rate": 9.617368199569224e-07, "loss": 0.0231, "step": 211780 }, { "epoch": 2.262834553127838, "grad_norm": 1.5840562582015991, "learning_rate": 9.617303738470853e-07, "loss": 0.0657, "step": 211790 }, { "epoch": 2.2629413964421174, "grad_norm": 1.438596248626709, "learning_rate": 9.617239272159198e-07, "loss": 0.0131, "step": 211800 }, { "epoch": 2.2630482397563974, "grad_norm": 0.09036491066217422, "learning_rate": 9.617174800634332e-07, "loss": 0.0122, "step": 211810 }, { "epoch": 2.263155083070677, "grad_norm": 4.546907901763916, "learning_rate": 9.617110323896324e-07, "loss": 0.0335, "step": 211820 }, { "epoch": 2.2632619263849563, "grad_norm": 7.290389060974121, "learning_rate": 9.617045841945253e-07, "loss": 0.0222, "step": 211830 }, { "epoch": 2.263368769699236, "grad_norm": 0.2320820838212967, "learning_rate": 9.61698135478119e-07, "loss": 0.0168, "step": 211840 }, { "epoch": 2.2634756130135156, "grad_norm": 4.838858604431152, "learning_rate": 9.616916862404203e-07, "loss": 0.0105, "step": 211850 }, { "epoch": 2.2635824563277955, "grad_norm": 0.047902725636959076, "learning_rate": 9.616852364814369e-07, "loss": 0.0213, "step": 211860 }, { "epoch": 2.263689299642075, "grad_norm": 0.11070827394723892, "learning_rate": 9.616787862011762e-07, "loss": 0.0111, "step": 211870 }, { "epoch": 2.2637961429563545, "grad_norm": 2.0389275550842285, "learning_rate": 9.616723353996453e-07, "loss": 0.0142, "step": 211880 }, { "epoch": 2.263902986270634, "grad_norm": 2.5393004417419434, "learning_rate": 9.616658840768515e-07, "loss": 0.0225, "step": 211890 }, { "epoch": 2.264009829584914, "grad_norm": 0.24934028089046478, "learning_rate": 9.61659432232802e-07, "loss": 0.0118, "step": 211900 }, { "epoch": 2.2641166728991933, "grad_norm": 1.215223789215088, "learning_rate": 9.61652979867504e-07, "loss": 0.004, "step": 211910 }, { "epoch": 2.264223516213473, "grad_norm": 1.8438218832015991, "learning_rate": 9.616465269809652e-07, "loss": 0.022, "step": 211920 }, { "epoch": 2.2643303595277526, "grad_norm": 6.161953926086426, "learning_rate": 9.616400735731925e-07, "loss": 0.0286, "step": 211930 }, { "epoch": 2.264437202842032, "grad_norm": 1.9379305839538574, "learning_rate": 9.616336196441935e-07, "loss": 0.0544, "step": 211940 }, { "epoch": 2.2645440461563116, "grad_norm": 3.373119831085205, "learning_rate": 9.616271651939752e-07, "loss": 0.0662, "step": 211950 }, { "epoch": 2.2646508894705915, "grad_norm": 4.710548400878906, "learning_rate": 9.61620710222545e-07, "loss": 0.0365, "step": 211960 }, { "epoch": 2.264757732784871, "grad_norm": 4.577484607696533, "learning_rate": 9.6161425472991e-07, "loss": 0.0524, "step": 211970 }, { "epoch": 2.264864576099151, "grad_norm": 7.191612243652344, "learning_rate": 9.616077987160778e-07, "loss": 0.0605, "step": 211980 }, { "epoch": 2.2649714194134303, "grad_norm": 4.181514263153076, "learning_rate": 9.616013421810554e-07, "loss": 0.009, "step": 211990 }, { "epoch": 2.2650782627277097, "grad_norm": 3.9482972621917725, "learning_rate": 9.615948851248503e-07, "loss": 0.0132, "step": 212000 }, { "epoch": 2.265185106041989, "grad_norm": 1.4893810749053955, "learning_rate": 9.615884275474699e-07, "loss": 0.0272, "step": 212010 }, { "epoch": 2.265291949356269, "grad_norm": 3.4610588550567627, "learning_rate": 9.615819694489213e-07, "loss": 0.0309, "step": 212020 }, { "epoch": 2.2653987926705486, "grad_norm": 0.6950125694274902, "learning_rate": 9.61575510829212e-07, "loss": 0.0041, "step": 212030 }, { "epoch": 2.2655056359848285, "grad_norm": 0.40734076499938965, "learning_rate": 9.615690516883486e-07, "loss": 0.0567, "step": 212040 }, { "epoch": 2.265612479299108, "grad_norm": 7.322071552276611, "learning_rate": 9.615625920263392e-07, "loss": 0.0202, "step": 212050 }, { "epoch": 2.2657193226133874, "grad_norm": 13.067667007446289, "learning_rate": 9.615561318431908e-07, "loss": 0.0298, "step": 212060 }, { "epoch": 2.265826165927667, "grad_norm": 0.9856511950492859, "learning_rate": 9.615496711389108e-07, "loss": 0.0124, "step": 212070 }, { "epoch": 2.2659330092419467, "grad_norm": 0.04003490135073662, "learning_rate": 9.615432099135061e-07, "loss": 0.01, "step": 212080 }, { "epoch": 2.266039852556226, "grad_norm": 0.5278744101524353, "learning_rate": 9.615367481669844e-07, "loss": 0.0014, "step": 212090 }, { "epoch": 2.266146695870506, "grad_norm": 0.02167997509241104, "learning_rate": 9.61530285899353e-07, "loss": 0.0034, "step": 212100 }, { "epoch": 2.2662535391847856, "grad_norm": 0.06360359489917755, "learning_rate": 9.61523823110619e-07, "loss": 0.0298, "step": 212110 }, { "epoch": 2.266360382499065, "grad_norm": 4.176634311676025, "learning_rate": 9.615173598007898e-07, "loss": 0.043, "step": 212120 }, { "epoch": 2.266467225813345, "grad_norm": 8.52575969696045, "learning_rate": 9.615108959698727e-07, "loss": 0.0429, "step": 212130 }, { "epoch": 2.2665740691276244, "grad_norm": 10.682332038879395, "learning_rate": 9.615044316178749e-07, "loss": 0.0121, "step": 212140 }, { "epoch": 2.266680912441904, "grad_norm": 0.07894191145896912, "learning_rate": 9.614979667448038e-07, "loss": 0.0343, "step": 212150 }, { "epoch": 2.2667877557561837, "grad_norm": 0.264602929353714, "learning_rate": 9.614915013506666e-07, "loss": 0.0243, "step": 212160 }, { "epoch": 2.266894599070463, "grad_norm": 1.5236172676086426, "learning_rate": 9.614850354354707e-07, "loss": 0.0229, "step": 212170 }, { "epoch": 2.2670014423847427, "grad_norm": 1.7279911041259766, "learning_rate": 9.614785689992234e-07, "loss": 0.0573, "step": 212180 }, { "epoch": 2.2671082856990226, "grad_norm": 0.14168119430541992, "learning_rate": 9.614721020419318e-07, "loss": 0.0166, "step": 212190 }, { "epoch": 2.267215129013302, "grad_norm": 4.758691310882568, "learning_rate": 9.614656345636035e-07, "loss": 0.031, "step": 212200 }, { "epoch": 2.2673219723275815, "grad_norm": 1.4741966724395752, "learning_rate": 9.614591665642458e-07, "loss": 0.0441, "step": 212210 }, { "epoch": 2.2674288156418614, "grad_norm": 0.4226584732532501, "learning_rate": 9.614526980438657e-07, "loss": 0.0087, "step": 212220 }, { "epoch": 2.267535658956141, "grad_norm": 4.801668643951416, "learning_rate": 9.614462290024707e-07, "loss": 0.0136, "step": 212230 }, { "epoch": 2.2676425022704203, "grad_norm": 0.7392174005508423, "learning_rate": 9.61439759440068e-07, "loss": 0.0115, "step": 212240 }, { "epoch": 2.2677493455847, "grad_norm": 0.6967692375183105, "learning_rate": 9.614332893566652e-07, "loss": 0.0246, "step": 212250 }, { "epoch": 2.2678561888989797, "grad_norm": 4.833302021026611, "learning_rate": 9.614268187522691e-07, "loss": 0.0265, "step": 212260 }, { "epoch": 2.267963032213259, "grad_norm": 0.006591287441551685, "learning_rate": 9.614203476268876e-07, "loss": 0.0165, "step": 212270 }, { "epoch": 2.268069875527539, "grad_norm": 0.6534944772720337, "learning_rate": 9.614138759805275e-07, "loss": 0.0118, "step": 212280 }, { "epoch": 2.2681767188418185, "grad_norm": 0.007889114320278168, "learning_rate": 9.614074038131966e-07, "loss": 0.0207, "step": 212290 }, { "epoch": 2.268283562156098, "grad_norm": 0.10690093785524368, "learning_rate": 9.614009311249017e-07, "loss": 0.0222, "step": 212300 }, { "epoch": 2.268390405470378, "grad_norm": 1.2019466161727905, "learning_rate": 9.613944579156502e-07, "loss": 0.015, "step": 212310 }, { "epoch": 2.2684972487846573, "grad_norm": 3.5139691829681396, "learning_rate": 9.613879841854497e-07, "loss": 0.0143, "step": 212320 }, { "epoch": 2.2686040920989368, "grad_norm": 5.039405345916748, "learning_rate": 9.613815099343072e-07, "loss": 0.0077, "step": 212330 }, { "epoch": 2.2687109354132167, "grad_norm": 1.3507276773452759, "learning_rate": 9.613750351622303e-07, "loss": 0.034, "step": 212340 }, { "epoch": 2.268817778727496, "grad_norm": 3.136566638946533, "learning_rate": 9.61368559869226e-07, "loss": 0.012, "step": 212350 }, { "epoch": 2.2689246220417756, "grad_norm": 1.1600724458694458, "learning_rate": 9.61362084055302e-07, "loss": 0.0108, "step": 212360 }, { "epoch": 2.2690314653560555, "grad_norm": 0.01691259630024433, "learning_rate": 9.613556077204653e-07, "loss": 0.0267, "step": 212370 }, { "epoch": 2.269138308670335, "grad_norm": 8.643974304199219, "learning_rate": 9.613491308647232e-07, "loss": 0.054, "step": 212380 }, { "epoch": 2.2692451519846144, "grad_norm": 0.005011638160794973, "learning_rate": 9.613426534880833e-07, "loss": 0.0509, "step": 212390 }, { "epoch": 2.2693519952988943, "grad_norm": 4.806710243225098, "learning_rate": 9.613361755905525e-07, "loss": 0.0248, "step": 212400 }, { "epoch": 2.2694588386131738, "grad_norm": 0.011647643521428108, "learning_rate": 9.613296971721385e-07, "loss": 0.0107, "step": 212410 }, { "epoch": 2.2695656819274532, "grad_norm": 0.21798765659332275, "learning_rate": 9.613232182328485e-07, "loss": 0.0138, "step": 212420 }, { "epoch": 2.269672525241733, "grad_norm": 1.7950469255447388, "learning_rate": 9.613167387726897e-07, "loss": 0.0103, "step": 212430 }, { "epoch": 2.2697793685560126, "grad_norm": 0.004936015233397484, "learning_rate": 9.613102587916693e-07, "loss": 0.0178, "step": 212440 }, { "epoch": 2.269886211870292, "grad_norm": 1.1788873672485352, "learning_rate": 9.613037782897951e-07, "loss": 0.0113, "step": 212450 }, { "epoch": 2.269993055184572, "grad_norm": 2.876889705657959, "learning_rate": 9.612972972670738e-07, "loss": 0.0133, "step": 212460 }, { "epoch": 2.2700998984988514, "grad_norm": 0.003056551329791546, "learning_rate": 9.612908157235134e-07, "loss": 0.0059, "step": 212470 }, { "epoch": 2.270206741813131, "grad_norm": 0.11253000050783157, "learning_rate": 9.612843336591206e-07, "loss": 0.0166, "step": 212480 }, { "epoch": 2.2703135851274108, "grad_norm": 0.1333116888999939, "learning_rate": 9.61277851073903e-07, "loss": 0.0158, "step": 212490 }, { "epoch": 2.2704204284416902, "grad_norm": 2.9246156215667725, "learning_rate": 9.61271367967868e-07, "loss": 0.0136, "step": 212500 }, { "epoch": 2.2705272717559697, "grad_norm": 0.27467358112335205, "learning_rate": 9.612648843410228e-07, "loss": 0.0165, "step": 212510 }, { "epoch": 2.2706341150702496, "grad_norm": 3.7993481159210205, "learning_rate": 9.612584001933745e-07, "loss": 0.0083, "step": 212520 }, { "epoch": 2.270740958384529, "grad_norm": 0.2843189537525177, "learning_rate": 9.61251915524931e-07, "loss": 0.0127, "step": 212530 }, { "epoch": 2.2708478016988085, "grad_norm": 0.07344411313533783, "learning_rate": 9.612454303356991e-07, "loss": 0.0169, "step": 212540 }, { "epoch": 2.2709546450130884, "grad_norm": 0.006383343134075403, "learning_rate": 9.612389446256863e-07, "loss": 0.009, "step": 212550 }, { "epoch": 2.271061488327368, "grad_norm": 0.020862499251961708, "learning_rate": 9.612324583949e-07, "loss": 0.0359, "step": 212560 }, { "epoch": 2.2711683316416473, "grad_norm": 8.86499309539795, "learning_rate": 9.612259716433472e-07, "loss": 0.0169, "step": 212570 }, { "epoch": 2.2712751749559272, "grad_norm": 1.193141222000122, "learning_rate": 9.612194843710357e-07, "loss": 0.0123, "step": 212580 }, { "epoch": 2.2713820182702067, "grad_norm": 2.2874772548675537, "learning_rate": 9.612129965779724e-07, "loss": 0.0311, "step": 212590 }, { "epoch": 2.271488861584486, "grad_norm": 9.453557968139648, "learning_rate": 9.612065082641651e-07, "loss": 0.0404, "step": 212600 }, { "epoch": 2.271595704898766, "grad_norm": 0.04759948328137398, "learning_rate": 9.612000194296206e-07, "loss": 0.0224, "step": 212610 }, { "epoch": 2.2717025482130455, "grad_norm": 0.02940394915640354, "learning_rate": 9.611935300743467e-07, "loss": 0.0165, "step": 212620 }, { "epoch": 2.2718093915273254, "grad_norm": 2.2729992866516113, "learning_rate": 9.611870401983503e-07, "loss": 0.0388, "step": 212630 }, { "epoch": 2.271916234841605, "grad_norm": 0.2587908208370209, "learning_rate": 9.61180549801639e-07, "loss": 0.0024, "step": 212640 }, { "epoch": 2.2720230781558843, "grad_norm": 0.9820407629013062, "learning_rate": 9.6117405888422e-07, "loss": 0.025, "step": 212650 }, { "epoch": 2.272129921470164, "grad_norm": 0.03944559395313263, "learning_rate": 9.611675674461007e-07, "loss": 0.0344, "step": 212660 }, { "epoch": 2.2722367647844437, "grad_norm": 0.004671594128012657, "learning_rate": 9.611610754872885e-07, "loss": 0.0201, "step": 212670 }, { "epoch": 2.272343608098723, "grad_norm": 1.7070029973983765, "learning_rate": 9.611545830077905e-07, "loss": 0.008, "step": 212680 }, { "epoch": 2.272450451413003, "grad_norm": 1.7648249864578247, "learning_rate": 9.611480900076141e-07, "loss": 0.0388, "step": 212690 }, { "epoch": 2.2725572947272825, "grad_norm": 0.9824864864349365, "learning_rate": 9.611415964867669e-07, "loss": 0.0331, "step": 212700 }, { "epoch": 2.272664138041562, "grad_norm": 0.035073596984148026, "learning_rate": 9.61135102445256e-07, "loss": 0.0029, "step": 212710 }, { "epoch": 2.2727709813558414, "grad_norm": 0.06434813141822815, "learning_rate": 9.611286078830887e-07, "loss": 0.0432, "step": 212720 }, { "epoch": 2.2728778246701213, "grad_norm": 2.039083957672119, "learning_rate": 9.611221128002724e-07, "loss": 0.0211, "step": 212730 }, { "epoch": 2.272984667984401, "grad_norm": 2.2115302085876465, "learning_rate": 9.611156171968145e-07, "loss": 0.0134, "step": 212740 }, { "epoch": 2.2730915112986807, "grad_norm": 0.3772108852863312, "learning_rate": 9.611091210727222e-07, "loss": 0.0292, "step": 212750 }, { "epoch": 2.27319835461296, "grad_norm": 0.7141196131706238, "learning_rate": 9.61102624428003e-07, "loss": 0.0186, "step": 212760 }, { "epoch": 2.2733051979272396, "grad_norm": 1.9549939632415771, "learning_rate": 9.61096127262664e-07, "loss": 0.0092, "step": 212770 }, { "epoch": 2.273412041241519, "grad_norm": 5.200072765350342, "learning_rate": 9.610896295767127e-07, "loss": 0.0148, "step": 212780 }, { "epoch": 2.273518884555799, "grad_norm": 3.214855432510376, "learning_rate": 9.610831313701567e-07, "loss": 0.0299, "step": 212790 }, { "epoch": 2.2736257278700784, "grad_norm": 0.008155489340424538, "learning_rate": 9.610766326430025e-07, "loss": 0.0253, "step": 212800 }, { "epoch": 2.2737325711843583, "grad_norm": 4.760327339172363, "learning_rate": 9.610701333952584e-07, "loss": 0.0239, "step": 212810 }, { "epoch": 2.273839414498638, "grad_norm": 0.004975419957190752, "learning_rate": 9.610636336269313e-07, "loss": 0.0047, "step": 212820 }, { "epoch": 2.2739462578129173, "grad_norm": 3.2213680744171143, "learning_rate": 9.610571333380284e-07, "loss": 0.0196, "step": 212830 }, { "epoch": 2.2740531011271967, "grad_norm": 6.590444087982178, "learning_rate": 9.610506325285573e-07, "loss": 0.0224, "step": 212840 }, { "epoch": 2.2741599444414766, "grad_norm": 0.016771968454122543, "learning_rate": 9.610441311985251e-07, "loss": 0.0154, "step": 212850 }, { "epoch": 2.274266787755756, "grad_norm": 0.01435143407434225, "learning_rate": 9.610376293479393e-07, "loss": 0.0336, "step": 212860 }, { "epoch": 2.274373631070036, "grad_norm": 4.3224053382873535, "learning_rate": 9.610311269768075e-07, "loss": 0.0456, "step": 212870 }, { "epoch": 2.2744804743843154, "grad_norm": 4.935907363891602, "learning_rate": 9.610246240851365e-07, "loss": 0.0132, "step": 212880 }, { "epoch": 2.274587317698595, "grad_norm": 0.02635687030851841, "learning_rate": 9.610181206729339e-07, "loss": 0.0097, "step": 212890 }, { "epoch": 2.274694161012875, "grad_norm": 4.065795421600342, "learning_rate": 9.61011616740207e-07, "loss": 0.0109, "step": 212900 }, { "epoch": 2.2748010043271543, "grad_norm": 0.12556268274784088, "learning_rate": 9.610051122869636e-07, "loss": 0.0622, "step": 212910 }, { "epoch": 2.2749078476414337, "grad_norm": 0.03660067915916443, "learning_rate": 9.609986073132103e-07, "loss": 0.0097, "step": 212920 }, { "epoch": 2.2750146909557136, "grad_norm": 0.022847089916467667, "learning_rate": 9.609921018189549e-07, "loss": 0.0454, "step": 212930 }, { "epoch": 2.275121534269993, "grad_norm": 0.18808531761169434, "learning_rate": 9.609855958042044e-07, "loss": 0.0108, "step": 212940 }, { "epoch": 2.2752283775842725, "grad_norm": 1.5022554397583008, "learning_rate": 9.609790892689665e-07, "loss": 0.0199, "step": 212950 }, { "epoch": 2.2753352208985524, "grad_norm": 3.3377878665924072, "learning_rate": 9.609725822132484e-07, "loss": 0.0027, "step": 212960 }, { "epoch": 2.275442064212832, "grad_norm": 0.038349732756614685, "learning_rate": 9.609660746370577e-07, "loss": 0.0158, "step": 212970 }, { "epoch": 2.2755489075271114, "grad_norm": 0.29390841722488403, "learning_rate": 9.609595665404012e-07, "loss": 0.0231, "step": 212980 }, { "epoch": 2.2756557508413913, "grad_norm": 1.3288438320159912, "learning_rate": 9.609530579232867e-07, "loss": 0.027, "step": 212990 }, { "epoch": 2.2757625941556707, "grad_norm": 1.0126140117645264, "learning_rate": 9.609465487857214e-07, "loss": 0.0139, "step": 213000 }, { "epoch": 2.27586943746995, "grad_norm": 0.07810687273740768, "learning_rate": 9.609400391277125e-07, "loss": 0.0081, "step": 213010 }, { "epoch": 2.27597628078423, "grad_norm": 2.101858377456665, "learning_rate": 9.609335289492675e-07, "loss": 0.0103, "step": 213020 }, { "epoch": 2.2760831240985095, "grad_norm": 0.3134792149066925, "learning_rate": 9.609270182503941e-07, "loss": 0.0143, "step": 213030 }, { "epoch": 2.276189967412789, "grad_norm": 0.16001266241073608, "learning_rate": 9.60920507031099e-07, "loss": 0.0508, "step": 213040 }, { "epoch": 2.276296810727069, "grad_norm": 0.10057146102190018, "learning_rate": 9.6091399529139e-07, "loss": 0.0156, "step": 213050 }, { "epoch": 2.2764036540413484, "grad_norm": 0.42060375213623047, "learning_rate": 9.609074830312743e-07, "loss": 0.0072, "step": 213060 }, { "epoch": 2.276510497355628, "grad_norm": 0.5203761458396912, "learning_rate": 9.609009702507591e-07, "loss": 0.0062, "step": 213070 }, { "epoch": 2.2766173406699077, "grad_norm": 11.634937286376953, "learning_rate": 9.608944569498521e-07, "loss": 0.0163, "step": 213080 }, { "epoch": 2.276724183984187, "grad_norm": 0.05688592791557312, "learning_rate": 9.608879431285605e-07, "loss": 0.0563, "step": 213090 }, { "epoch": 2.2768310272984666, "grad_norm": 0.5378732085227966, "learning_rate": 9.608814287868917e-07, "loss": 0.0237, "step": 213100 }, { "epoch": 2.2769378706127466, "grad_norm": 0.1359959840774536, "learning_rate": 9.608749139248525e-07, "loss": 0.0145, "step": 213110 }, { "epoch": 2.277044713927026, "grad_norm": 0.014703486114740372, "learning_rate": 9.608683985424512e-07, "loss": 0.0265, "step": 213120 }, { "epoch": 2.2771515572413055, "grad_norm": 0.7200607061386108, "learning_rate": 9.608618826396945e-07, "loss": 0.0637, "step": 213130 }, { "epoch": 2.2772584005555854, "grad_norm": 0.008460757322609425, "learning_rate": 9.608553662165902e-07, "loss": 0.0374, "step": 213140 }, { "epoch": 2.277365243869865, "grad_norm": 7.540617942810059, "learning_rate": 9.60848849273145e-07, "loss": 0.0143, "step": 213150 }, { "epoch": 2.2774720871841443, "grad_norm": 0.027804236859083176, "learning_rate": 9.60842331809367e-07, "loss": 0.0125, "step": 213160 }, { "epoch": 2.277578930498424, "grad_norm": 0.20848523080348969, "learning_rate": 9.608358138252633e-07, "loss": 0.0334, "step": 213170 }, { "epoch": 2.2776857738127037, "grad_norm": 0.5098409056663513, "learning_rate": 9.608292953208409e-07, "loss": 0.0093, "step": 213180 }, { "epoch": 2.277792617126983, "grad_norm": 2.7059426307678223, "learning_rate": 9.608227762961075e-07, "loss": 0.0046, "step": 213190 }, { "epoch": 2.277899460441263, "grad_norm": 2.333099603652954, "learning_rate": 9.608162567510705e-07, "loss": 0.009, "step": 213200 }, { "epoch": 2.2780063037555425, "grad_norm": 5.9346818923950195, "learning_rate": 9.608097366857373e-07, "loss": 0.0295, "step": 213210 }, { "epoch": 2.278113147069822, "grad_norm": 4.289660930633545, "learning_rate": 9.608032161001148e-07, "loss": 0.0074, "step": 213220 }, { "epoch": 2.278219990384102, "grad_norm": 5.942991733551025, "learning_rate": 9.607966949942107e-07, "loss": 0.0171, "step": 213230 }, { "epoch": 2.2783268336983813, "grad_norm": 10.456878662109375, "learning_rate": 9.607901733680326e-07, "loss": 0.0345, "step": 213240 }, { "epoch": 2.2784336770126608, "grad_norm": 0.12649935483932495, "learning_rate": 9.607836512215875e-07, "loss": 0.0471, "step": 213250 }, { "epoch": 2.2785405203269407, "grad_norm": 4.8076491355896, "learning_rate": 9.607771285548828e-07, "loss": 0.039, "step": 213260 }, { "epoch": 2.27864736364122, "grad_norm": 2.966848850250244, "learning_rate": 9.607706053679263e-07, "loss": 0.006, "step": 213270 }, { "epoch": 2.2787542069554996, "grad_norm": 2.009148359298706, "learning_rate": 9.607640816607247e-07, "loss": 0.0167, "step": 213280 }, { "epoch": 2.2788610502697795, "grad_norm": 0.6821647882461548, "learning_rate": 9.607575574332856e-07, "loss": 0.0051, "step": 213290 }, { "epoch": 2.278967893584059, "grad_norm": 5.771602153778076, "learning_rate": 9.607510326856169e-07, "loss": 0.0506, "step": 213300 }, { "epoch": 2.2790747368983384, "grad_norm": 0.029339885339140892, "learning_rate": 9.60744507417725e-07, "loss": 0.0056, "step": 213310 }, { "epoch": 2.2791815802126183, "grad_norm": 0.5661563277244568, "learning_rate": 9.60737981629618e-07, "loss": 0.0113, "step": 213320 }, { "epoch": 2.2792884235268978, "grad_norm": 0.07402901351451874, "learning_rate": 9.60731455321303e-07, "loss": 0.0015, "step": 213330 }, { "epoch": 2.2793952668411777, "grad_norm": 0.009540189057588577, "learning_rate": 9.607249284927874e-07, "loss": 0.0282, "step": 213340 }, { "epoch": 2.279502110155457, "grad_norm": 0.0058596269227564335, "learning_rate": 9.607184011440785e-07, "loss": 0.041, "step": 213350 }, { "epoch": 2.2796089534697366, "grad_norm": 2.8498127460479736, "learning_rate": 9.607118732751838e-07, "loss": 0.011, "step": 213360 }, { "epoch": 2.279715796784016, "grad_norm": 1.0866936445236206, "learning_rate": 9.607053448861107e-07, "loss": 0.0325, "step": 213370 }, { "epoch": 2.279822640098296, "grad_norm": 4.893067359924316, "learning_rate": 9.606988159768666e-07, "loss": 0.023, "step": 213380 }, { "epoch": 2.2799294834125754, "grad_norm": 3.0094211101531982, "learning_rate": 9.606922865474585e-07, "loss": 0.0337, "step": 213390 }, { "epoch": 2.2800363267268553, "grad_norm": 0.03195378929376602, "learning_rate": 9.606857565978943e-07, "loss": 0.0469, "step": 213400 }, { "epoch": 2.2801431700411348, "grad_norm": 4.844923973083496, "learning_rate": 9.60679226128181e-07, "loss": 0.0204, "step": 213410 }, { "epoch": 2.280250013355414, "grad_norm": 1.9310442209243774, "learning_rate": 9.606726951383261e-07, "loss": 0.0303, "step": 213420 }, { "epoch": 2.2803568566696937, "grad_norm": 2.0048277378082275, "learning_rate": 9.606661636283368e-07, "loss": 0.0581, "step": 213430 }, { "epoch": 2.2804636999839736, "grad_norm": 0.035750776529312134, "learning_rate": 9.60659631598221e-07, "loss": 0.0132, "step": 213440 }, { "epoch": 2.280570543298253, "grad_norm": 1.1361112594604492, "learning_rate": 9.606530990479852e-07, "loss": 0.0187, "step": 213450 }, { "epoch": 2.280677386612533, "grad_norm": 3.944103717803955, "learning_rate": 9.606465659776378e-07, "loss": 0.0241, "step": 213460 }, { "epoch": 2.2807842299268124, "grad_norm": 0.00910758413374424, "learning_rate": 9.606400323871852e-07, "loss": 0.0131, "step": 213470 }, { "epoch": 2.280891073241092, "grad_norm": 0.5811815857887268, "learning_rate": 9.606334982766356e-07, "loss": 0.0102, "step": 213480 }, { "epoch": 2.2809979165553713, "grad_norm": 2.302551031112671, "learning_rate": 9.606269636459959e-07, "loss": 0.0232, "step": 213490 }, { "epoch": 2.281104759869651, "grad_norm": 2.1992015838623047, "learning_rate": 9.606204284952735e-07, "loss": 0.0021, "step": 213500 }, { "epoch": 2.2812116031839307, "grad_norm": 0.03317378833889961, "learning_rate": 9.60613892824476e-07, "loss": 0.003, "step": 213510 }, { "epoch": 2.2813184464982106, "grad_norm": 0.0038037626072764397, "learning_rate": 9.606073566336107e-07, "loss": 0.0016, "step": 213520 }, { "epoch": 2.28142528981249, "grad_norm": 0.022724928334355354, "learning_rate": 9.606008199226847e-07, "loss": 0.0585, "step": 213530 }, { "epoch": 2.2815321331267695, "grad_norm": 0.6524899005889893, "learning_rate": 9.605942826917057e-07, "loss": 0.0049, "step": 213540 }, { "epoch": 2.281638976441049, "grad_norm": 4.666316986083984, "learning_rate": 9.605877449406809e-07, "loss": 0.0052, "step": 213550 }, { "epoch": 2.281745819755329, "grad_norm": 2.4247219562530518, "learning_rate": 9.60581206669618e-07, "loss": 0.0236, "step": 213560 }, { "epoch": 2.2818526630696083, "grad_norm": 0.04679551348090172, "learning_rate": 9.60574667878524e-07, "loss": 0.056, "step": 213570 }, { "epoch": 2.2819595063838882, "grad_norm": 5.969161033630371, "learning_rate": 9.605681285674066e-07, "loss": 0.0184, "step": 213580 }, { "epoch": 2.2820663496981677, "grad_norm": 1.8389453887939453, "learning_rate": 9.605615887362728e-07, "loss": 0.0151, "step": 213590 }, { "epoch": 2.282173193012447, "grad_norm": 6.112318992614746, "learning_rate": 9.605550483851304e-07, "loss": 0.0149, "step": 213600 }, { "epoch": 2.282280036326727, "grad_norm": 0.5700797438621521, "learning_rate": 9.605485075139866e-07, "loss": 0.0229, "step": 213610 }, { "epoch": 2.2823868796410065, "grad_norm": 0.3380616009235382, "learning_rate": 9.605419661228485e-07, "loss": 0.0177, "step": 213620 }, { "epoch": 2.282493722955286, "grad_norm": 0.006355973891913891, "learning_rate": 9.60535424211724e-07, "loss": 0.0244, "step": 213630 }, { "epoch": 2.282600566269566, "grad_norm": 0.09514078497886658, "learning_rate": 9.605288817806203e-07, "loss": 0.0211, "step": 213640 }, { "epoch": 2.2827074095838453, "grad_norm": 0.0034327181056141853, "learning_rate": 9.605223388295446e-07, "loss": 0.0124, "step": 213650 }, { "epoch": 2.282814252898125, "grad_norm": 0.7597508430480957, "learning_rate": 9.605157953585043e-07, "loss": 0.0224, "step": 213660 }, { "epoch": 2.2829210962124047, "grad_norm": 3.1867241859436035, "learning_rate": 9.605092513675072e-07, "loss": 0.0386, "step": 213670 }, { "epoch": 2.283027939526684, "grad_norm": 0.9066434502601624, "learning_rate": 9.605027068565602e-07, "loss": 0.0067, "step": 213680 }, { "epoch": 2.2831347828409636, "grad_norm": 0.011067341081798077, "learning_rate": 9.604961618256709e-07, "loss": 0.0288, "step": 213690 }, { "epoch": 2.2832416261552435, "grad_norm": 2.474360704421997, "learning_rate": 9.604896162748467e-07, "loss": 0.0282, "step": 213700 }, { "epoch": 2.283348469469523, "grad_norm": 3.0584664344787598, "learning_rate": 9.60483070204095e-07, "loss": 0.0113, "step": 213710 }, { "epoch": 2.2834553127838024, "grad_norm": 2.7930197715759277, "learning_rate": 9.604765236134232e-07, "loss": 0.0386, "step": 213720 }, { "epoch": 2.2835621560980823, "grad_norm": 0.23235389590263367, "learning_rate": 9.604699765028387e-07, "loss": 0.0429, "step": 213730 }, { "epoch": 2.283668999412362, "grad_norm": 4.086788654327393, "learning_rate": 9.604634288723487e-07, "loss": 0.0161, "step": 213740 }, { "epoch": 2.2837758427266412, "grad_norm": 0.9390701055526733, "learning_rate": 9.60456880721961e-07, "loss": 0.0116, "step": 213750 }, { "epoch": 2.283882686040921, "grad_norm": 0.01487099938094616, "learning_rate": 9.604503320516823e-07, "loss": 0.0266, "step": 213760 }, { "epoch": 2.2839895293552006, "grad_norm": 1.861199975013733, "learning_rate": 9.604437828615205e-07, "loss": 0.0259, "step": 213770 }, { "epoch": 2.28409637266948, "grad_norm": 7.710620403289795, "learning_rate": 9.604372331514832e-07, "loss": 0.0403, "step": 213780 }, { "epoch": 2.28420321598376, "grad_norm": 3.0859591960906982, "learning_rate": 9.604306829215774e-07, "loss": 0.0244, "step": 213790 }, { "epoch": 2.2843100592980394, "grad_norm": 3.3813858032226562, "learning_rate": 9.604241321718105e-07, "loss": 0.0098, "step": 213800 }, { "epoch": 2.284416902612319, "grad_norm": 0.21984264254570007, "learning_rate": 9.604175809021902e-07, "loss": 0.0106, "step": 213810 }, { "epoch": 2.284523745926599, "grad_norm": 0.003908668179064989, "learning_rate": 9.604110291127236e-07, "loss": 0.0194, "step": 213820 }, { "epoch": 2.2846305892408783, "grad_norm": 3.526726245880127, "learning_rate": 9.60404476803418e-07, "loss": 0.0471, "step": 213830 }, { "epoch": 2.2847374325551577, "grad_norm": 1.84290611743927, "learning_rate": 9.603979239742812e-07, "loss": 0.0164, "step": 213840 }, { "epoch": 2.2848442758694376, "grad_norm": 0.038785070180892944, "learning_rate": 9.603913706253204e-07, "loss": 0.0254, "step": 213850 }, { "epoch": 2.284951119183717, "grad_norm": 0.025006946176290512, "learning_rate": 9.60384816756543e-07, "loss": 0.0107, "step": 213860 }, { "epoch": 2.2850579624979965, "grad_norm": 0.005185556598007679, "learning_rate": 9.603782623679565e-07, "loss": 0.0125, "step": 213870 }, { "epoch": 2.2851648058122764, "grad_norm": 20.687118530273438, "learning_rate": 9.60371707459568e-07, "loss": 0.0524, "step": 213880 }, { "epoch": 2.285271649126556, "grad_norm": 0.6180779337882996, "learning_rate": 9.60365152031385e-07, "loss": 0.0119, "step": 213890 }, { "epoch": 2.2853784924408354, "grad_norm": 0.03836646303534508, "learning_rate": 9.603585960834153e-07, "loss": 0.02, "step": 213900 }, { "epoch": 2.2854853357551153, "grad_norm": 0.02986825257539749, "learning_rate": 9.603520396156658e-07, "loss": 0.0126, "step": 213910 }, { "epoch": 2.2855921790693947, "grad_norm": 1.1270804405212402, "learning_rate": 9.603454826281441e-07, "loss": 0.0069, "step": 213920 }, { "epoch": 2.285699022383674, "grad_norm": 13.840919494628906, "learning_rate": 9.603389251208578e-07, "loss": 0.0479, "step": 213930 }, { "epoch": 2.285805865697954, "grad_norm": 9.37332820892334, "learning_rate": 9.60332367093814e-07, "loss": 0.0329, "step": 213940 }, { "epoch": 2.2859127090122335, "grad_norm": 0.0315619520843029, "learning_rate": 9.603258085470202e-07, "loss": 0.0162, "step": 213950 }, { "epoch": 2.286019552326513, "grad_norm": 12.312701225280762, "learning_rate": 9.603192494804837e-07, "loss": 0.0243, "step": 213960 }, { "epoch": 2.286126395640793, "grad_norm": 0.7446256279945374, "learning_rate": 9.603126898942122e-07, "loss": 0.0101, "step": 213970 }, { "epoch": 2.2862332389550724, "grad_norm": 0.08666103333234787, "learning_rate": 9.60306129788213e-07, "loss": 0.0365, "step": 213980 }, { "epoch": 2.286340082269352, "grad_norm": 0.5257670879364014, "learning_rate": 9.60299569162493e-07, "loss": 0.0099, "step": 213990 }, { "epoch": 2.2864469255836317, "grad_norm": 0.04937078431248665, "learning_rate": 9.602930080170605e-07, "loss": 0.0138, "step": 214000 }, { "epoch": 2.286553768897911, "grad_norm": 0.6052361726760864, "learning_rate": 9.602864463519224e-07, "loss": 0.0171, "step": 214010 }, { "epoch": 2.2866606122121906, "grad_norm": 0.053872108459472656, "learning_rate": 9.60279884167086e-07, "loss": 0.0052, "step": 214020 }, { "epoch": 2.2867674555264705, "grad_norm": 0.002822848269715905, "learning_rate": 9.602733214625588e-07, "loss": 0.0079, "step": 214030 }, { "epoch": 2.28687429884075, "grad_norm": 0.004906040150672197, "learning_rate": 9.602667582383486e-07, "loss": 0.004, "step": 214040 }, { "epoch": 2.2869811421550295, "grad_norm": 1.4905794858932495, "learning_rate": 9.602601944944622e-07, "loss": 0.0393, "step": 214050 }, { "epoch": 2.2870879854693094, "grad_norm": 2.002164125442505, "learning_rate": 9.602536302309074e-07, "loss": 0.0602, "step": 214060 }, { "epoch": 2.287194828783589, "grad_norm": 4.188579559326172, "learning_rate": 9.602470654476917e-07, "loss": 0.0207, "step": 214070 }, { "epoch": 2.2873016720978683, "grad_norm": 5.103126049041748, "learning_rate": 9.602405001448222e-07, "loss": 0.0269, "step": 214080 }, { "epoch": 2.287408515412148, "grad_norm": 0.3563220798969269, "learning_rate": 9.602339343223062e-07, "loss": 0.0123, "step": 214090 }, { "epoch": 2.2875153587264276, "grad_norm": 0.09084846079349518, "learning_rate": 9.602273679801518e-07, "loss": 0.0242, "step": 214100 }, { "epoch": 2.2876222020407075, "grad_norm": 0.4253036081790924, "learning_rate": 9.602208011183655e-07, "loss": 0.0016, "step": 214110 }, { "epoch": 2.287729045354987, "grad_norm": 2.374218463897705, "learning_rate": 9.602142337369555e-07, "loss": 0.0345, "step": 214120 }, { "epoch": 2.2878358886692665, "grad_norm": 0.01510443165898323, "learning_rate": 9.602076658359289e-07, "loss": 0.0108, "step": 214130 }, { "epoch": 2.287942731983546, "grad_norm": 2.1099119186401367, "learning_rate": 9.602010974152928e-07, "loss": 0.0249, "step": 214140 }, { "epoch": 2.288049575297826, "grad_norm": 0.017145346850156784, "learning_rate": 9.601945284750552e-07, "loss": 0.0007, "step": 214150 }, { "epoch": 2.2881564186121053, "grad_norm": 17.703052520751953, "learning_rate": 9.601879590152233e-07, "loss": 0.0806, "step": 214160 }, { "epoch": 2.288263261926385, "grad_norm": 0.06942452490329742, "learning_rate": 9.601813890358042e-07, "loss": 0.0101, "step": 214170 }, { "epoch": 2.2883701052406646, "grad_norm": 0.008579371497035027, "learning_rate": 9.601748185368057e-07, "loss": 0.0227, "step": 214180 }, { "epoch": 2.288476948554944, "grad_norm": 1.8593530654907227, "learning_rate": 9.601682475182351e-07, "loss": 0.0294, "step": 214190 }, { "epoch": 2.2885837918692236, "grad_norm": 7.9347004890441895, "learning_rate": 9.601616759800998e-07, "loss": 0.0544, "step": 214200 }, { "epoch": 2.2886906351835035, "grad_norm": 0.7776468992233276, "learning_rate": 9.601551039224075e-07, "loss": 0.0166, "step": 214210 }, { "epoch": 2.288797478497783, "grad_norm": 0.1297139823436737, "learning_rate": 9.60148531345165e-07, "loss": 0.0069, "step": 214220 }, { "epoch": 2.288904321812063, "grad_norm": 0.026352595537900925, "learning_rate": 9.601419582483802e-07, "loss": 0.0406, "step": 214230 }, { "epoch": 2.2890111651263423, "grad_norm": 0.4039421081542969, "learning_rate": 9.601353846320606e-07, "loss": 0.0078, "step": 214240 }, { "epoch": 2.2891180084406217, "grad_norm": 0.8132044672966003, "learning_rate": 9.601288104962131e-07, "loss": 0.0095, "step": 214250 }, { "epoch": 2.289224851754901, "grad_norm": 0.017404425889253616, "learning_rate": 9.601222358408456e-07, "loss": 0.0269, "step": 214260 }, { "epoch": 2.289331695069181, "grad_norm": 1.779185175895691, "learning_rate": 9.601156606659654e-07, "loss": 0.0037, "step": 214270 }, { "epoch": 2.2894385383834606, "grad_norm": 0.2644217908382416, "learning_rate": 9.601090849715799e-07, "loss": 0.0245, "step": 214280 }, { "epoch": 2.2895453816977405, "grad_norm": 0.01252738293260336, "learning_rate": 9.601025087576963e-07, "loss": 0.0107, "step": 214290 }, { "epoch": 2.28965222501202, "grad_norm": 0.010560503229498863, "learning_rate": 9.600959320243225e-07, "loss": 0.0398, "step": 214300 }, { "epoch": 2.2897590683262994, "grad_norm": 1.5464588403701782, "learning_rate": 9.600893547714656e-07, "loss": 0.0336, "step": 214310 }, { "epoch": 2.289865911640579, "grad_norm": 0.6913240551948547, "learning_rate": 9.60082776999133e-07, "loss": 0.0295, "step": 214320 }, { "epoch": 2.2899727549548587, "grad_norm": 7.423491954803467, "learning_rate": 9.600761987073322e-07, "loss": 0.047, "step": 214330 }, { "epoch": 2.290079598269138, "grad_norm": 12.882340431213379, "learning_rate": 9.60069619896071e-07, "loss": 0.0475, "step": 214340 }, { "epoch": 2.290186441583418, "grad_norm": 2.523871898651123, "learning_rate": 9.60063040565356e-07, "loss": 0.0167, "step": 214350 }, { "epoch": 2.2902932848976976, "grad_norm": 1.368471384048462, "learning_rate": 9.600564607151954e-07, "loss": 0.0153, "step": 214360 }, { "epoch": 2.290400128211977, "grad_norm": 1.26011323928833, "learning_rate": 9.600498803455962e-07, "loss": 0.0124, "step": 214370 }, { "epoch": 2.290506971526257, "grad_norm": 0.1384008377790451, "learning_rate": 9.60043299456566e-07, "loss": 0.0128, "step": 214380 }, { "epoch": 2.2906138148405364, "grad_norm": 0.009909585118293762, "learning_rate": 9.600367180481122e-07, "loss": 0.0085, "step": 214390 }, { "epoch": 2.290720658154816, "grad_norm": 4.658443927764893, "learning_rate": 9.600301361202422e-07, "loss": 0.0555, "step": 214400 }, { "epoch": 2.2908275014690958, "grad_norm": 0.07019582390785217, "learning_rate": 9.600235536729636e-07, "loss": 0.0194, "step": 214410 }, { "epoch": 2.290934344783375, "grad_norm": 15.760472297668457, "learning_rate": 9.600169707062835e-07, "loss": 0.0441, "step": 214420 }, { "epoch": 2.2910411880976547, "grad_norm": 0.0026485954876989126, "learning_rate": 9.600103872202097e-07, "loss": 0.0103, "step": 214430 }, { "epoch": 2.2911480314119346, "grad_norm": 2.402651071548462, "learning_rate": 9.600038032147494e-07, "loss": 0.0206, "step": 214440 }, { "epoch": 2.291254874726214, "grad_norm": 0.4073468744754791, "learning_rate": 9.599972186899098e-07, "loss": 0.0146, "step": 214450 }, { "epoch": 2.2913617180404935, "grad_norm": 2.6153628826141357, "learning_rate": 9.59990633645699e-07, "loss": 0.024, "step": 214460 }, { "epoch": 2.2914685613547734, "grad_norm": 0.16070882976055145, "learning_rate": 9.59984048082124e-07, "loss": 0.0142, "step": 214470 }, { "epoch": 2.291575404669053, "grad_norm": 0.9483824372291565, "learning_rate": 9.599774619991923e-07, "loss": 0.0522, "step": 214480 }, { "epoch": 2.2916822479833323, "grad_norm": 0.014933295547962189, "learning_rate": 9.599708753969112e-07, "loss": 0.0138, "step": 214490 }, { "epoch": 2.291789091297612, "grad_norm": 0.5059328079223633, "learning_rate": 9.599642882752882e-07, "loss": 0.0078, "step": 214500 }, { "epoch": 2.2918959346118917, "grad_norm": 0.7323455214500427, "learning_rate": 9.59957700634331e-07, "loss": 0.0149, "step": 214510 }, { "epoch": 2.292002777926171, "grad_norm": 0.7731952667236328, "learning_rate": 9.599511124740467e-07, "loss": 0.0162, "step": 214520 }, { "epoch": 2.292109621240451, "grad_norm": 5.524588108062744, "learning_rate": 9.59944523794443e-07, "loss": 0.027, "step": 214530 }, { "epoch": 2.2922164645547305, "grad_norm": 4.3240814208984375, "learning_rate": 9.599379345955271e-07, "loss": 0.0188, "step": 214540 }, { "epoch": 2.29232330786901, "grad_norm": 5.281196117401123, "learning_rate": 9.599313448773064e-07, "loss": 0.0377, "step": 214550 }, { "epoch": 2.29243015118329, "grad_norm": 2.4937782287597656, "learning_rate": 9.599247546397889e-07, "loss": 0.0136, "step": 214560 }, { "epoch": 2.2925369944975693, "grad_norm": 6.372690200805664, "learning_rate": 9.599181638829815e-07, "loss": 0.0071, "step": 214570 }, { "epoch": 2.2926438378118488, "grad_norm": 12.931123733520508, "learning_rate": 9.599115726068917e-07, "loss": 0.0116, "step": 214580 }, { "epoch": 2.2927506811261287, "grad_norm": 0.03898759186267853, "learning_rate": 9.59904980811527e-07, "loss": 0.0244, "step": 214590 }, { "epoch": 2.292857524440408, "grad_norm": 3.3040072917938232, "learning_rate": 9.59898388496895e-07, "loss": 0.0478, "step": 214600 }, { "epoch": 2.2929643677546876, "grad_norm": 0.29464489221572876, "learning_rate": 9.598917956630029e-07, "loss": 0.0077, "step": 214610 }, { "epoch": 2.2930712110689675, "grad_norm": 7.468996524810791, "learning_rate": 9.598852023098582e-07, "loss": 0.0521, "step": 214620 }, { "epoch": 2.293178054383247, "grad_norm": 0.03940349817276001, "learning_rate": 9.598786084374686e-07, "loss": 0.0099, "step": 214630 }, { "epoch": 2.2932848976975264, "grad_norm": 0.06967653334140778, "learning_rate": 9.598720140458412e-07, "loss": 0.0086, "step": 214640 }, { "epoch": 2.2933917410118063, "grad_norm": 0.4514053165912628, "learning_rate": 9.598654191349835e-07, "loss": 0.0016, "step": 214650 }, { "epoch": 2.2934985843260858, "grad_norm": 1.9727716445922852, "learning_rate": 9.598588237049033e-07, "loss": 0.0403, "step": 214660 }, { "epoch": 2.2936054276403652, "grad_norm": 1.3384097814559937, "learning_rate": 9.598522277556075e-07, "loss": 0.0015, "step": 214670 }, { "epoch": 2.293712270954645, "grad_norm": 4.465604305267334, "learning_rate": 9.598456312871039e-07, "loss": 0.0129, "step": 214680 }, { "epoch": 2.2938191142689246, "grad_norm": 0.2974383234977722, "learning_rate": 9.598390342994001e-07, "loss": 0.0166, "step": 214690 }, { "epoch": 2.293925957583204, "grad_norm": 4.869774341583252, "learning_rate": 9.598324367925031e-07, "loss": 0.0118, "step": 214700 }, { "epoch": 2.294032800897484, "grad_norm": 3.613770008087158, "learning_rate": 9.598258387664206e-07, "loss": 0.0192, "step": 214710 }, { "epoch": 2.2941396442117634, "grad_norm": 6.976055145263672, "learning_rate": 9.598192402211601e-07, "loss": 0.0993, "step": 214720 }, { "epoch": 2.294246487526043, "grad_norm": 7.487908840179443, "learning_rate": 9.59812641156729e-07, "loss": 0.0607, "step": 214730 }, { "epoch": 2.294353330840323, "grad_norm": 1.4962584972381592, "learning_rate": 9.598060415731346e-07, "loss": 0.0041, "step": 214740 }, { "epoch": 2.2944601741546022, "grad_norm": 0.20372626185417175, "learning_rate": 9.597994414703846e-07, "loss": 0.0053, "step": 214750 }, { "epoch": 2.2945670174688817, "grad_norm": 0.047538600862026215, "learning_rate": 9.59792840848486e-07, "loss": 0.0127, "step": 214760 }, { "epoch": 2.2946738607831616, "grad_norm": 11.662003517150879, "learning_rate": 9.59786239707447e-07, "loss": 0.0179, "step": 214770 }, { "epoch": 2.294780704097441, "grad_norm": 2.230403423309326, "learning_rate": 9.597796380472745e-07, "loss": 0.0127, "step": 214780 }, { "epoch": 2.2948875474117205, "grad_norm": 4.714676856994629, "learning_rate": 9.59773035867976e-07, "loss": 0.01, "step": 214790 }, { "epoch": 2.2949943907260004, "grad_norm": 3.463040351867676, "learning_rate": 9.597664331695593e-07, "loss": 0.0051, "step": 214800 }, { "epoch": 2.29510123404028, "grad_norm": 4.562132835388184, "learning_rate": 9.597598299520311e-07, "loss": 0.0214, "step": 214810 }, { "epoch": 2.29520807735456, "grad_norm": 0.05666958540678024, "learning_rate": 9.597532262153997e-07, "loss": 0.037, "step": 214820 }, { "epoch": 2.2953149206688392, "grad_norm": 0.020918026566505432, "learning_rate": 9.597466219596722e-07, "loss": 0.0454, "step": 214830 }, { "epoch": 2.2954217639831187, "grad_norm": 0.07046028226613998, "learning_rate": 9.59740017184856e-07, "loss": 0.0169, "step": 214840 }, { "epoch": 2.295528607297398, "grad_norm": 0.00820162333548069, "learning_rate": 9.597334118909587e-07, "loss": 0.0392, "step": 214850 }, { "epoch": 2.295635450611678, "grad_norm": 0.027839865535497665, "learning_rate": 9.597268060779875e-07, "loss": 0.0311, "step": 214860 }, { "epoch": 2.2957422939259575, "grad_norm": 0.34927311539649963, "learning_rate": 9.597201997459504e-07, "loss": 0.0162, "step": 214870 }, { "epoch": 2.2958491372402374, "grad_norm": 3.632558584213257, "learning_rate": 9.597135928948542e-07, "loss": 0.0095, "step": 214880 }, { "epoch": 2.295955980554517, "grad_norm": 4.2594380378723145, "learning_rate": 9.597069855247066e-07, "loss": 0.0701, "step": 214890 }, { "epoch": 2.2960628238687963, "grad_norm": 0.039879173040390015, "learning_rate": 9.597003776355154e-07, "loss": 0.0353, "step": 214900 }, { "epoch": 2.296169667183076, "grad_norm": 0.7540717720985413, "learning_rate": 9.596937692272876e-07, "loss": 0.0169, "step": 214910 }, { "epoch": 2.2962765104973557, "grad_norm": 6.352027416229248, "learning_rate": 9.596871603000306e-07, "loss": 0.0314, "step": 214920 }, { "epoch": 2.296383353811635, "grad_norm": 0.620574951171875, "learning_rate": 9.596805508537525e-07, "loss": 0.0254, "step": 214930 }, { "epoch": 2.296490197125915, "grad_norm": 3.9279730319976807, "learning_rate": 9.596739408884603e-07, "loss": 0.0313, "step": 214940 }, { "epoch": 2.2965970404401945, "grad_norm": 0.14591781795024872, "learning_rate": 9.596673304041613e-07, "loss": 0.0496, "step": 214950 }, { "epoch": 2.296703883754474, "grad_norm": 0.2859460711479187, "learning_rate": 9.596607194008634e-07, "loss": 0.0037, "step": 214960 }, { "epoch": 2.2968107270687534, "grad_norm": 0.002041004365310073, "learning_rate": 9.596541078785739e-07, "loss": 0.0512, "step": 214970 }, { "epoch": 2.2969175703830333, "grad_norm": 0.24773553013801575, "learning_rate": 9.596474958373e-07, "loss": 0.0223, "step": 214980 }, { "epoch": 2.297024413697313, "grad_norm": 1.6924152374267578, "learning_rate": 9.596408832770495e-07, "loss": 0.0227, "step": 214990 }, { "epoch": 2.2971312570115927, "grad_norm": 2.0440454483032227, "learning_rate": 9.596342701978297e-07, "loss": 0.0117, "step": 215000 }, { "epoch": 2.297238100325872, "grad_norm": 0.34588173031806946, "learning_rate": 9.596276565996483e-07, "loss": 0.0157, "step": 215010 }, { "epoch": 2.2973449436401516, "grad_norm": 0.03253989666700363, "learning_rate": 9.596210424825125e-07, "loss": 0.0089, "step": 215020 }, { "epoch": 2.297451786954431, "grad_norm": 2.6083788871765137, "learning_rate": 9.596144278464297e-07, "loss": 0.0238, "step": 215030 }, { "epoch": 2.297558630268711, "grad_norm": 2.5435690879821777, "learning_rate": 9.596078126914076e-07, "loss": 0.027, "step": 215040 }, { "epoch": 2.2976654735829904, "grad_norm": 1.9979801177978516, "learning_rate": 9.596011970174538e-07, "loss": 0.0883, "step": 215050 }, { "epoch": 2.2977723168972704, "grad_norm": 3.5659213066101074, "learning_rate": 9.595945808245753e-07, "loss": 0.0083, "step": 215060 }, { "epoch": 2.29787916021155, "grad_norm": 0.004261282738298178, "learning_rate": 9.5958796411278e-07, "loss": 0.0058, "step": 215070 }, { "epoch": 2.2979860035258293, "grad_norm": 0.008346719667315483, "learning_rate": 9.59581346882075e-07, "loss": 0.0288, "step": 215080 }, { "epoch": 2.298092846840109, "grad_norm": 10.104520797729492, "learning_rate": 9.595747291324681e-07, "loss": 0.0292, "step": 215090 }, { "epoch": 2.2981996901543886, "grad_norm": 0.2551591992378235, "learning_rate": 9.595681108639667e-07, "loss": 0.0211, "step": 215100 }, { "epoch": 2.298306533468668, "grad_norm": 0.5120007395744324, "learning_rate": 9.59561492076578e-07, "loss": 0.0142, "step": 215110 }, { "epoch": 2.298413376782948, "grad_norm": 1.8031553030014038, "learning_rate": 9.5955487277031e-07, "loss": 0.0334, "step": 215120 }, { "epoch": 2.2985202200972275, "grad_norm": 0.04814540222287178, "learning_rate": 9.595482529451698e-07, "loss": 0.0014, "step": 215130 }, { "epoch": 2.298627063411507, "grad_norm": 0.12299525737762451, "learning_rate": 9.595416326011648e-07, "loss": 0.0125, "step": 215140 }, { "epoch": 2.298733906725787, "grad_norm": 0.99820476770401, "learning_rate": 9.595350117383024e-07, "loss": 0.0093, "step": 215150 }, { "epoch": 2.2988407500400663, "grad_norm": 0.9959071278572083, "learning_rate": 9.595283903565907e-07, "loss": 0.0196, "step": 215160 }, { "epoch": 2.2989475933543457, "grad_norm": 1.286523699760437, "learning_rate": 9.595217684560367e-07, "loss": 0.0065, "step": 215170 }, { "epoch": 2.2990544366686256, "grad_norm": 3.207439661026001, "learning_rate": 9.59515146036648e-07, "loss": 0.0427, "step": 215180 }, { "epoch": 2.299161279982905, "grad_norm": 0.6504543423652649, "learning_rate": 9.595085230984316e-07, "loss": 0.0384, "step": 215190 }, { "epoch": 2.2992681232971846, "grad_norm": 0.06270986050367355, "learning_rate": 9.595018996413957e-07, "loss": 0.0022, "step": 215200 }, { "epoch": 2.2993749666114645, "grad_norm": 6.409342288970947, "learning_rate": 9.594952756655475e-07, "loss": 0.0434, "step": 215210 }, { "epoch": 2.299481809925744, "grad_norm": 0.23539286851882935, "learning_rate": 9.594886511708943e-07, "loss": 0.0139, "step": 215220 }, { "epoch": 2.2995886532400234, "grad_norm": 3.4588968753814697, "learning_rate": 9.59482026157444e-07, "loss": 0.0121, "step": 215230 }, { "epoch": 2.2996954965543033, "grad_norm": 0.04686254635453224, "learning_rate": 9.594754006252037e-07, "loss": 0.0012, "step": 215240 }, { "epoch": 2.2998023398685827, "grad_norm": 0.38135576248168945, "learning_rate": 9.594687745741809e-07, "loss": 0.0271, "step": 215250 }, { "epoch": 2.299909183182862, "grad_norm": 3.077481985092163, "learning_rate": 9.594621480043832e-07, "loss": 0.0179, "step": 215260 }, { "epoch": 2.300016026497142, "grad_norm": 0.005166003480553627, "learning_rate": 9.594555209158183e-07, "loss": 0.0119, "step": 215270 }, { "epoch": 2.3001228698114216, "grad_norm": 0.2641201913356781, "learning_rate": 9.59448893308493e-07, "loss": 0.021, "step": 215280 }, { "epoch": 2.300229713125701, "grad_norm": 0.9742857217788696, "learning_rate": 9.594422651824155e-07, "loss": 0.0323, "step": 215290 }, { "epoch": 2.300336556439981, "grad_norm": 0.18596936762332916, "learning_rate": 9.59435636537593e-07, "loss": 0.0248, "step": 215300 }, { "epoch": 2.3004433997542604, "grad_norm": 9.27375316619873, "learning_rate": 9.59429007374033e-07, "loss": 0.0564, "step": 215310 }, { "epoch": 2.30055024306854, "grad_norm": 0.03765955939888954, "learning_rate": 9.594223776917426e-07, "loss": 0.0085, "step": 215320 }, { "epoch": 2.3006570863828197, "grad_norm": 0.1797892302274704, "learning_rate": 9.594157474907302e-07, "loss": 0.0145, "step": 215330 }, { "epoch": 2.300763929697099, "grad_norm": 0.023342272266745567, "learning_rate": 9.594091167710025e-07, "loss": 0.0508, "step": 215340 }, { "epoch": 2.3008707730113787, "grad_norm": 2.2199037075042725, "learning_rate": 9.594024855325674e-07, "loss": 0.0042, "step": 215350 }, { "epoch": 2.3009776163256586, "grad_norm": 3.0707268714904785, "learning_rate": 9.59395853775432e-07, "loss": 0.0232, "step": 215360 }, { "epoch": 2.301084459639938, "grad_norm": 0.31177857518196106, "learning_rate": 9.59389221499604e-07, "loss": 0.0146, "step": 215370 }, { "epoch": 2.3011913029542175, "grad_norm": 3.767570734024048, "learning_rate": 9.59382588705091e-07, "loss": 0.0261, "step": 215380 }, { "epoch": 2.3012981462684974, "grad_norm": 0.6881078481674194, "learning_rate": 9.593759553919003e-07, "loss": 0.0107, "step": 215390 }, { "epoch": 2.301404989582777, "grad_norm": 0.19900549948215485, "learning_rate": 9.593693215600396e-07, "loss": 0.017, "step": 215400 }, { "epoch": 2.3015118328970563, "grad_norm": 0.011122725903987885, "learning_rate": 9.593626872095163e-07, "loss": 0.0158, "step": 215410 }, { "epoch": 2.301618676211336, "grad_norm": 3.566580057144165, "learning_rate": 9.593560523403377e-07, "loss": 0.0072, "step": 215420 }, { "epoch": 2.3017255195256157, "grad_norm": 0.7161718606948853, "learning_rate": 9.593494169525116e-07, "loss": 0.0073, "step": 215430 }, { "epoch": 2.301832362839895, "grad_norm": 6.860276699066162, "learning_rate": 9.593427810460453e-07, "loss": 0.0274, "step": 215440 }, { "epoch": 2.301939206154175, "grad_norm": 1.562483787536621, "learning_rate": 9.593361446209462e-07, "loss": 0.0196, "step": 215450 }, { "epoch": 2.3020460494684545, "grad_norm": 3.498645067214966, "learning_rate": 9.59329507677222e-07, "loss": 0.0127, "step": 215460 }, { "epoch": 2.302152892782734, "grad_norm": 6.344115734100342, "learning_rate": 9.593228702148802e-07, "loss": 0.0157, "step": 215470 }, { "epoch": 2.302259736097014, "grad_norm": 14.270906448364258, "learning_rate": 9.593162322339281e-07, "loss": 0.0105, "step": 215480 }, { "epoch": 2.3023665794112933, "grad_norm": 0.0706709623336792, "learning_rate": 9.593095937343734e-07, "loss": 0.0124, "step": 215490 }, { "epoch": 2.3024734227255728, "grad_norm": 0.004300371278077364, "learning_rate": 9.593029547162235e-07, "loss": 0.0124, "step": 215500 }, { "epoch": 2.3025802660398527, "grad_norm": 9.223244667053223, "learning_rate": 9.592963151794858e-07, "loss": 0.0375, "step": 215510 }, { "epoch": 2.302687109354132, "grad_norm": 0.148484006524086, "learning_rate": 9.592896751241678e-07, "loss": 0.0214, "step": 215520 }, { "epoch": 2.3027939526684116, "grad_norm": 0.02644466981291771, "learning_rate": 9.592830345502774e-07, "loss": 0.0417, "step": 215530 }, { "epoch": 2.3029007959826915, "grad_norm": 0.41912010312080383, "learning_rate": 9.592763934578216e-07, "loss": 0.0166, "step": 215540 }, { "epoch": 2.303007639296971, "grad_norm": 11.633810997009277, "learning_rate": 9.592697518468082e-07, "loss": 0.0424, "step": 215550 }, { "epoch": 2.3031144826112504, "grad_norm": 0.4327494502067566, "learning_rate": 9.592631097172444e-07, "loss": 0.0394, "step": 215560 }, { "epoch": 2.3032213259255303, "grad_norm": 0.48949235677719116, "learning_rate": 9.59256467069138e-07, "loss": 0.0107, "step": 215570 }, { "epoch": 2.3033281692398098, "grad_norm": 4.019737243652344, "learning_rate": 9.592498239024965e-07, "loss": 0.0191, "step": 215580 }, { "epoch": 2.3034350125540897, "grad_norm": 6.876791477203369, "learning_rate": 9.59243180217327e-07, "loss": 0.0467, "step": 215590 }, { "epoch": 2.303541855868369, "grad_norm": 2.1012625694274902, "learning_rate": 9.592365360136377e-07, "loss": 0.0054, "step": 215600 }, { "epoch": 2.3036486991826486, "grad_norm": 2.299325466156006, "learning_rate": 9.592298912914354e-07, "loss": 0.0214, "step": 215610 }, { "epoch": 2.303755542496928, "grad_norm": 0.050321366637945175, "learning_rate": 9.592232460507279e-07, "loss": 0.0246, "step": 215620 }, { "epoch": 2.303862385811208, "grad_norm": 4.56259298324585, "learning_rate": 9.592166002915228e-07, "loss": 0.0136, "step": 215630 }, { "epoch": 2.3039692291254874, "grad_norm": 0.3548952341079712, "learning_rate": 9.592099540138274e-07, "loss": 0.0361, "step": 215640 }, { "epoch": 2.3040760724397673, "grad_norm": 5.402568340301514, "learning_rate": 9.592033072176492e-07, "loss": 0.063, "step": 215650 }, { "epoch": 2.3041829157540468, "grad_norm": 2.116722583770752, "learning_rate": 9.591966599029961e-07, "loss": 0.0551, "step": 215660 }, { "epoch": 2.3042897590683262, "grad_norm": 0.330761581659317, "learning_rate": 9.59190012069875e-07, "loss": 0.0619, "step": 215670 }, { "epoch": 2.3043966023826057, "grad_norm": 0.009703024290502071, "learning_rate": 9.59183363718294e-07, "loss": 0.0465, "step": 215680 }, { "epoch": 2.3045034456968856, "grad_norm": 0.8346431851387024, "learning_rate": 9.591767148482602e-07, "loss": 0.0322, "step": 215690 }, { "epoch": 2.304610289011165, "grad_norm": 4.662460803985596, "learning_rate": 9.591700654597811e-07, "loss": 0.0097, "step": 215700 }, { "epoch": 2.304717132325445, "grad_norm": 0.02055681124329567, "learning_rate": 9.591634155528648e-07, "loss": 0.0217, "step": 215710 }, { "epoch": 2.3048239756397244, "grad_norm": 0.014519618824124336, "learning_rate": 9.591567651275179e-07, "loss": 0.0317, "step": 215720 }, { "epoch": 2.304930818954004, "grad_norm": 5.72626256942749, "learning_rate": 9.591501141837485e-07, "loss": 0.0493, "step": 215730 }, { "epoch": 2.3050376622682833, "grad_norm": 3.60546612739563, "learning_rate": 9.59143462721564e-07, "loss": 0.0073, "step": 215740 }, { "epoch": 2.3051445055825632, "grad_norm": 0.6258503198623657, "learning_rate": 9.59136810740972e-07, "loss": 0.0202, "step": 215750 }, { "epoch": 2.3052513488968427, "grad_norm": 2.38657546043396, "learning_rate": 9.591301582419795e-07, "loss": 0.0342, "step": 215760 }, { "epoch": 2.3053581922111226, "grad_norm": 0.3791584074497223, "learning_rate": 9.591235052245947e-07, "loss": 0.0143, "step": 215770 }, { "epoch": 2.305465035525402, "grad_norm": 0.4865868091583252, "learning_rate": 9.591168516888246e-07, "loss": 0.007, "step": 215780 }, { "epoch": 2.3055718788396815, "grad_norm": 0.012198585085570812, "learning_rate": 9.591101976346772e-07, "loss": 0.0225, "step": 215790 }, { "epoch": 2.305678722153961, "grad_norm": 0.0918658971786499, "learning_rate": 9.591035430621596e-07, "loss": 0.0091, "step": 215800 }, { "epoch": 2.305785565468241, "grad_norm": 1.4399515390396118, "learning_rate": 9.590968879712794e-07, "loss": 0.0157, "step": 215810 }, { "epoch": 2.3058924087825203, "grad_norm": 0.0033456578385084867, "learning_rate": 9.590902323620443e-07, "loss": 0.0162, "step": 215820 }, { "epoch": 2.3059992520968002, "grad_norm": 5.270191669464111, "learning_rate": 9.590835762344617e-07, "loss": 0.033, "step": 215830 }, { "epoch": 2.3061060954110797, "grad_norm": 4.158712863922119, "learning_rate": 9.59076919588539e-07, "loss": 0.0474, "step": 215840 }, { "epoch": 2.306212938725359, "grad_norm": 2.126509189605713, "learning_rate": 9.590702624242836e-07, "loss": 0.0094, "step": 215850 }, { "epoch": 2.306319782039639, "grad_norm": 1.2637584209442139, "learning_rate": 9.590636047417033e-07, "loss": 0.0593, "step": 215860 }, { "epoch": 2.3064266253539185, "grad_norm": 0.036787308752536774, "learning_rate": 9.59056946540806e-07, "loss": 0.0095, "step": 215870 }, { "epoch": 2.306533468668198, "grad_norm": 0.010050511918962002, "learning_rate": 9.590502878215983e-07, "loss": 0.0112, "step": 215880 }, { "epoch": 2.306640311982478, "grad_norm": 0.004281788133084774, "learning_rate": 9.590436285840882e-07, "loss": 0.0414, "step": 215890 }, { "epoch": 2.3067471552967573, "grad_norm": 3.60073184967041, "learning_rate": 9.590369688282833e-07, "loss": 0.0334, "step": 215900 }, { "epoch": 2.306853998611037, "grad_norm": 10.575663566589355, "learning_rate": 9.59030308554191e-07, "loss": 0.0146, "step": 215910 }, { "epoch": 2.3069608419253167, "grad_norm": 0.021096862852573395, "learning_rate": 9.590236477618189e-07, "loss": 0.0154, "step": 215920 }, { "epoch": 2.307067685239596, "grad_norm": 0.003639624221250415, "learning_rate": 9.590169864511743e-07, "loss": 0.0205, "step": 215930 }, { "epoch": 2.3071745285538756, "grad_norm": 1.3103333711624146, "learning_rate": 9.590103246222652e-07, "loss": 0.0368, "step": 215940 }, { "epoch": 2.3072813718681555, "grad_norm": 0.19389145076274872, "learning_rate": 9.590036622750985e-07, "loss": 0.0223, "step": 215950 }, { "epoch": 2.307388215182435, "grad_norm": 4.121248245239258, "learning_rate": 9.58996999409682e-07, "loss": 0.0229, "step": 215960 }, { "epoch": 2.3074950584967144, "grad_norm": 2.9233171939849854, "learning_rate": 9.589903360260233e-07, "loss": 0.0055, "step": 215970 }, { "epoch": 2.3076019018109943, "grad_norm": 9.586801528930664, "learning_rate": 9.5898367212413e-07, "loss": 0.0415, "step": 215980 }, { "epoch": 2.307708745125274, "grad_norm": 3.253523826599121, "learning_rate": 9.589770077040095e-07, "loss": 0.0172, "step": 215990 }, { "epoch": 2.3078155884395533, "grad_norm": 10.420507431030273, "learning_rate": 9.589703427656693e-07, "loss": 0.0751, "step": 216000 }, { "epoch": 2.307922431753833, "grad_norm": 0.016158364713191986, "learning_rate": 9.589636773091167e-07, "loss": 0.0805, "step": 216010 }, { "epoch": 2.3080292750681126, "grad_norm": 1.4943206310272217, "learning_rate": 9.589570113343598e-07, "loss": 0.023, "step": 216020 }, { "epoch": 2.308136118382392, "grad_norm": 0.00732771772891283, "learning_rate": 9.589503448414056e-07, "loss": 0.007, "step": 216030 }, { "epoch": 2.308242961696672, "grad_norm": 0.2396685779094696, "learning_rate": 9.589436778302619e-07, "loss": 0.0062, "step": 216040 }, { "epoch": 2.3083498050109514, "grad_norm": 0.44534340500831604, "learning_rate": 9.58937010300936e-07, "loss": 0.0175, "step": 216050 }, { "epoch": 2.308456648325231, "grad_norm": 8.312079429626465, "learning_rate": 9.589303422534359e-07, "loss": 0.0155, "step": 216060 }, { "epoch": 2.308563491639511, "grad_norm": 0.08613086491823196, "learning_rate": 9.589236736877686e-07, "loss": 0.017, "step": 216070 }, { "epoch": 2.3086703349537903, "grad_norm": 3.7549240589141846, "learning_rate": 9.589170046039418e-07, "loss": 0.0255, "step": 216080 }, { "epoch": 2.3087771782680697, "grad_norm": 0.03196745738387108, "learning_rate": 9.589103350019632e-07, "loss": 0.0234, "step": 216090 }, { "epoch": 2.3088840215823496, "grad_norm": 0.2355024218559265, "learning_rate": 9.589036648818399e-07, "loss": 0.0061, "step": 216100 }, { "epoch": 2.308990864896629, "grad_norm": 2.838085889816284, "learning_rate": 9.588969942435801e-07, "loss": 0.0221, "step": 216110 }, { "epoch": 2.3090977082109085, "grad_norm": 0.5867419838905334, "learning_rate": 9.588903230871908e-07, "loss": 0.0167, "step": 216120 }, { "epoch": 2.3092045515251884, "grad_norm": 0.007706296164542437, "learning_rate": 9.588836514126797e-07, "loss": 0.0097, "step": 216130 }, { "epoch": 2.309311394839468, "grad_norm": 0.09992541372776031, "learning_rate": 9.588769792200542e-07, "loss": 0.0197, "step": 216140 }, { "epoch": 2.3094182381537474, "grad_norm": 0.011210000142455101, "learning_rate": 9.58870306509322e-07, "loss": 0.0255, "step": 216150 }, { "epoch": 2.3095250814680273, "grad_norm": 0.008640773594379425, "learning_rate": 9.588636332804907e-07, "loss": 0.0047, "step": 216160 }, { "epoch": 2.3096319247823067, "grad_norm": 1.4895867109298706, "learning_rate": 9.588569595335675e-07, "loss": 0.0094, "step": 216170 }, { "epoch": 2.309738768096586, "grad_norm": 0.038690607994794846, "learning_rate": 9.588502852685605e-07, "loss": 0.0817, "step": 216180 }, { "epoch": 2.309845611410866, "grad_norm": 3.360840320587158, "learning_rate": 9.588436104854768e-07, "loss": 0.0142, "step": 216190 }, { "epoch": 2.3099524547251455, "grad_norm": 6.815606117248535, "learning_rate": 9.58836935184324e-07, "loss": 0.0376, "step": 216200 }, { "epoch": 2.310059298039425, "grad_norm": 0.12096275389194489, "learning_rate": 9.588302593651095e-07, "loss": 0.0085, "step": 216210 }, { "epoch": 2.310166141353705, "grad_norm": 5.435888767242432, "learning_rate": 9.588235830278413e-07, "loss": 0.01, "step": 216220 }, { "epoch": 2.3102729846679844, "grad_norm": 0.11638814210891724, "learning_rate": 9.588169061725264e-07, "loss": 0.0136, "step": 216230 }, { "epoch": 2.310379827982264, "grad_norm": 0.49156877398490906, "learning_rate": 9.588102287991727e-07, "loss": 0.0106, "step": 216240 }, { "epoch": 2.3104866712965437, "grad_norm": 0.06127845495939255, "learning_rate": 9.588035509077877e-07, "loss": 0.0224, "step": 216250 }, { "epoch": 2.310593514610823, "grad_norm": 0.007863687351346016, "learning_rate": 9.587968724983786e-07, "loss": 0.0314, "step": 216260 }, { "epoch": 2.3107003579251026, "grad_norm": 0.734758734703064, "learning_rate": 9.587901935709533e-07, "loss": 0.0331, "step": 216270 }, { "epoch": 2.3108072012393825, "grad_norm": 0.022463420405983925, "learning_rate": 9.587835141255194e-07, "loss": 0.011, "step": 216280 }, { "epoch": 2.310914044553662, "grad_norm": 1.1482011079788208, "learning_rate": 9.587768341620843e-07, "loss": 0.0485, "step": 216290 }, { "epoch": 2.311020887867942, "grad_norm": 7.3436174392700195, "learning_rate": 9.587701536806554e-07, "loss": 0.0059, "step": 216300 }, { "epoch": 2.3111277311822214, "grad_norm": 0.16887368261814117, "learning_rate": 9.587634726812403e-07, "loss": 0.0049, "step": 216310 }, { "epoch": 2.311234574496501, "grad_norm": 0.14167943596839905, "learning_rate": 9.587567911638468e-07, "loss": 0.0438, "step": 216320 }, { "epoch": 2.3113414178107803, "grad_norm": 1.2991234064102173, "learning_rate": 9.587501091284822e-07, "loss": 0.0104, "step": 216330 }, { "epoch": 2.31144826112506, "grad_norm": 8.27585506439209, "learning_rate": 9.58743426575154e-07, "loss": 0.0455, "step": 216340 }, { "epoch": 2.3115551044393396, "grad_norm": 3.828817844390869, "learning_rate": 9.5873674350387e-07, "loss": 0.0231, "step": 216350 }, { "epoch": 2.3116619477536196, "grad_norm": 3.095900058746338, "learning_rate": 9.587300599146375e-07, "loss": 0.0224, "step": 216360 }, { "epoch": 2.311768791067899, "grad_norm": 2.4271926879882812, "learning_rate": 9.58723375807464e-07, "loss": 0.0249, "step": 216370 }, { "epoch": 2.3118756343821785, "grad_norm": 13.899408340454102, "learning_rate": 9.587166911823574e-07, "loss": 0.0534, "step": 216380 }, { "epoch": 2.311982477696458, "grad_norm": 0.03505375236272812, "learning_rate": 9.58710006039325e-07, "loss": 0.0203, "step": 216390 }, { "epoch": 2.312089321010738, "grad_norm": 0.051713768392801285, "learning_rate": 9.587033203783743e-07, "loss": 0.0147, "step": 216400 }, { "epoch": 2.3121961643250173, "grad_norm": 1.453436255455017, "learning_rate": 9.58696634199513e-07, "loss": 0.018, "step": 216410 }, { "epoch": 2.312303007639297, "grad_norm": 2.380263566970825, "learning_rate": 9.586899475027487e-07, "loss": 0.0045, "step": 216420 }, { "epoch": 2.3124098509535767, "grad_norm": 1.6115823984146118, "learning_rate": 9.586832602880887e-07, "loss": 0.0296, "step": 216430 }, { "epoch": 2.312516694267856, "grad_norm": 0.3389602303504944, "learning_rate": 9.586765725555407e-07, "loss": 0.0359, "step": 216440 }, { "epoch": 2.3126235375821356, "grad_norm": 4.450758457183838, "learning_rate": 9.586698843051121e-07, "loss": 0.0256, "step": 216450 }, { "epoch": 2.3127303808964155, "grad_norm": 0.9181351661682129, "learning_rate": 9.586631955368108e-07, "loss": 0.0412, "step": 216460 }, { "epoch": 2.312837224210695, "grad_norm": 0.9006718993186951, "learning_rate": 9.58656506250644e-07, "loss": 0.0135, "step": 216470 }, { "epoch": 2.312944067524975, "grad_norm": 0.5560184121131897, "learning_rate": 9.586498164466194e-07, "loss": 0.0218, "step": 216480 }, { "epoch": 2.3130509108392543, "grad_norm": 0.12439830601215363, "learning_rate": 9.586431261247446e-07, "loss": 0.025, "step": 216490 }, { "epoch": 2.3131577541535338, "grad_norm": 0.16207343339920044, "learning_rate": 9.586364352850272e-07, "loss": 0.0209, "step": 216500 }, { "epoch": 2.313264597467813, "grad_norm": 12.172348022460938, "learning_rate": 9.586297439274744e-07, "loss": 0.0314, "step": 216510 }, { "epoch": 2.313371440782093, "grad_norm": 1.533550500869751, "learning_rate": 9.586230520520942e-07, "loss": 0.0036, "step": 216520 }, { "epoch": 2.3134782840963726, "grad_norm": 0.016714882105588913, "learning_rate": 9.586163596588938e-07, "loss": 0.0065, "step": 216530 }, { "epoch": 2.3135851274106525, "grad_norm": 0.07313881069421768, "learning_rate": 9.586096667478811e-07, "loss": 0.0202, "step": 216540 }, { "epoch": 2.313691970724932, "grad_norm": 0.06242689862847328, "learning_rate": 9.586029733190632e-07, "loss": 0.0092, "step": 216550 }, { "epoch": 2.3137988140392114, "grad_norm": 0.05480843782424927, "learning_rate": 9.585962793724482e-07, "loss": 0.0375, "step": 216560 }, { "epoch": 2.3139056573534913, "grad_norm": 0.9981322288513184, "learning_rate": 9.585895849080432e-07, "loss": 0.0059, "step": 216570 }, { "epoch": 2.3140125006677708, "grad_norm": 0.1621183604001999, "learning_rate": 9.58582889925856e-07, "loss": 0.0167, "step": 216580 }, { "epoch": 2.31411934398205, "grad_norm": 1.0159733295440674, "learning_rate": 9.585761944258942e-07, "loss": 0.0175, "step": 216590 }, { "epoch": 2.31422618729633, "grad_norm": 0.07394930720329285, "learning_rate": 9.585694984081652e-07, "loss": 0.066, "step": 216600 }, { "epoch": 2.3143330306106096, "grad_norm": 1.6856722831726074, "learning_rate": 9.585628018726767e-07, "loss": 0.0364, "step": 216610 }, { "epoch": 2.314439873924889, "grad_norm": 0.07131604105234146, "learning_rate": 9.585561048194362e-07, "loss": 0.0386, "step": 216620 }, { "epoch": 2.314546717239169, "grad_norm": 0.16246533393859863, "learning_rate": 9.58549407248451e-07, "loss": 0.0082, "step": 216630 }, { "epoch": 2.3146535605534484, "grad_norm": 0.054181016981601715, "learning_rate": 9.58542709159729e-07, "loss": 0.0057, "step": 216640 }, { "epoch": 2.314760403867728, "grad_norm": 0.0971221998333931, "learning_rate": 9.585360105532779e-07, "loss": 0.0011, "step": 216650 }, { "epoch": 2.3148672471820078, "grad_norm": 3.4739034175872803, "learning_rate": 9.58529311429105e-07, "loss": 0.0141, "step": 216660 }, { "epoch": 2.314974090496287, "grad_norm": 0.05443647503852844, "learning_rate": 9.585226117872176e-07, "loss": 0.0327, "step": 216670 }, { "epoch": 2.3150809338105667, "grad_norm": 2.2792088985443115, "learning_rate": 9.585159116276237e-07, "loss": 0.0242, "step": 216680 }, { "epoch": 2.3151877771248466, "grad_norm": 1.154192566871643, "learning_rate": 9.58509210950331e-07, "loss": 0.0598, "step": 216690 }, { "epoch": 2.315294620439126, "grad_norm": 5.781087875366211, "learning_rate": 9.585025097553466e-07, "loss": 0.0068, "step": 216700 }, { "epoch": 2.3154014637534055, "grad_norm": 0.007148489821702242, "learning_rate": 9.584958080426782e-07, "loss": 0.0144, "step": 216710 }, { "epoch": 2.3155083070676854, "grad_norm": 5.399107933044434, "learning_rate": 9.584891058123334e-07, "loss": 0.0087, "step": 216720 }, { "epoch": 2.315615150381965, "grad_norm": 1.2917346954345703, "learning_rate": 9.5848240306432e-07, "loss": 0.0467, "step": 216730 }, { "epoch": 2.3157219936962443, "grad_norm": 0.0021370332688093185, "learning_rate": 9.584756997986452e-07, "loss": 0.0061, "step": 216740 }, { "epoch": 2.315828837010524, "grad_norm": 0.0008975094533525407, "learning_rate": 9.584689960153167e-07, "loss": 0.0584, "step": 216750 }, { "epoch": 2.3159356803248037, "grad_norm": 4.105907440185547, "learning_rate": 9.584622917143422e-07, "loss": 0.0327, "step": 216760 }, { "epoch": 2.316042523639083, "grad_norm": 0.4861965775489807, "learning_rate": 9.584555868957291e-07, "loss": 0.0151, "step": 216770 }, { "epoch": 2.316149366953363, "grad_norm": 5.695103168487549, "learning_rate": 9.584488815594851e-07, "loss": 0.0289, "step": 216780 }, { "epoch": 2.3162562102676425, "grad_norm": 0.016263915225863457, "learning_rate": 9.584421757056176e-07, "loss": 0.0073, "step": 216790 }, { "epoch": 2.316363053581922, "grad_norm": 1.0426074266433716, "learning_rate": 9.584354693341344e-07, "loss": 0.0072, "step": 216800 }, { "epoch": 2.316469896896202, "grad_norm": 0.3028102517127991, "learning_rate": 9.58428762445043e-07, "loss": 0.0212, "step": 216810 }, { "epoch": 2.3165767402104813, "grad_norm": 0.044999655336141586, "learning_rate": 9.584220550383508e-07, "loss": 0.0219, "step": 216820 }, { "epoch": 2.316683583524761, "grad_norm": 0.031204991042613983, "learning_rate": 9.584153471140656e-07, "loss": 0.0343, "step": 216830 }, { "epoch": 2.3167904268390407, "grad_norm": 1.5655746459960938, "learning_rate": 9.584086386721948e-07, "loss": 0.0138, "step": 216840 }, { "epoch": 2.31689727015332, "grad_norm": 1.4454798698425293, "learning_rate": 9.584019297127462e-07, "loss": 0.0071, "step": 216850 }, { "epoch": 2.3170041134675996, "grad_norm": 2.2450578212738037, "learning_rate": 9.58395220235727e-07, "loss": 0.0038, "step": 216860 }, { "epoch": 2.3171109567818795, "grad_norm": 0.1526404321193695, "learning_rate": 9.58388510241145e-07, "loss": 0.0254, "step": 216870 }, { "epoch": 2.317217800096159, "grad_norm": 0.1744176149368286, "learning_rate": 9.58381799729008e-07, "loss": 0.0318, "step": 216880 }, { "epoch": 2.3173246434104384, "grad_norm": 0.005647390615195036, "learning_rate": 9.583750886993232e-07, "loss": 0.0238, "step": 216890 }, { "epoch": 2.3174314867247183, "grad_norm": 4.921208381652832, "learning_rate": 9.583683771520985e-07, "loss": 0.0608, "step": 216900 }, { "epoch": 2.317538330038998, "grad_norm": 4.504777908325195, "learning_rate": 9.583616650873411e-07, "loss": 0.0295, "step": 216910 }, { "epoch": 2.3176451733532772, "grad_norm": 0.5487567782402039, "learning_rate": 9.58354952505059e-07, "loss": 0.0036, "step": 216920 }, { "epoch": 2.317752016667557, "grad_norm": 0.10780606418848038, "learning_rate": 9.583482394052593e-07, "loss": 0.0063, "step": 216930 }, { "epoch": 2.3178588599818366, "grad_norm": 0.021997617557644844, "learning_rate": 9.583415257879498e-07, "loss": 0.0178, "step": 216940 }, { "epoch": 2.317965703296116, "grad_norm": 0.04902965947985649, "learning_rate": 9.583348116531384e-07, "loss": 0.0072, "step": 216950 }, { "epoch": 2.318072546610396, "grad_norm": 1.4081752300262451, "learning_rate": 9.583280970008324e-07, "loss": 0.0146, "step": 216960 }, { "epoch": 2.3181793899246754, "grad_norm": 2.316603422164917, "learning_rate": 9.583213818310392e-07, "loss": 0.0037, "step": 216970 }, { "epoch": 2.318286233238955, "grad_norm": 0.3388117849826813, "learning_rate": 9.583146661437667e-07, "loss": 0.0079, "step": 216980 }, { "epoch": 2.318393076553235, "grad_norm": 0.018899904564023018, "learning_rate": 9.583079499390223e-07, "loss": 0.0262, "step": 216990 }, { "epoch": 2.3184999198675142, "grad_norm": 2.2534749507904053, "learning_rate": 9.583012332168137e-07, "loss": 0.0709, "step": 217000 }, { "epoch": 2.3186067631817937, "grad_norm": 1.1881581544876099, "learning_rate": 9.582945159771483e-07, "loss": 0.017, "step": 217010 }, { "epoch": 2.3187136064960736, "grad_norm": 7.247790336608887, "learning_rate": 9.582877982200337e-07, "loss": 0.0306, "step": 217020 }, { "epoch": 2.318820449810353, "grad_norm": 0.04838595166802406, "learning_rate": 9.58281079945478e-07, "loss": 0.0162, "step": 217030 }, { "epoch": 2.3189272931246325, "grad_norm": 0.004322635009884834, "learning_rate": 9.58274361153488e-07, "loss": 0.004, "step": 217040 }, { "epoch": 2.3190341364389124, "grad_norm": 0.007233687676489353, "learning_rate": 9.582676418440718e-07, "loss": 0.0291, "step": 217050 }, { "epoch": 2.319140979753192, "grad_norm": 0.007514414843171835, "learning_rate": 9.582609220172368e-07, "loss": 0.0224, "step": 217060 }, { "epoch": 2.319247823067472, "grad_norm": 0.043538935482501984, "learning_rate": 9.582542016729907e-07, "loss": 0.0099, "step": 217070 }, { "epoch": 2.3193546663817513, "grad_norm": 1.5675547122955322, "learning_rate": 9.58247480811341e-07, "loss": 0.0097, "step": 217080 }, { "epoch": 2.3194615096960307, "grad_norm": 1.4931095838546753, "learning_rate": 9.582407594322953e-07, "loss": 0.0205, "step": 217090 }, { "epoch": 2.31956835301031, "grad_norm": 0.037844762206077576, "learning_rate": 9.58234037535861e-07, "loss": 0.0118, "step": 217100 }, { "epoch": 2.31967519632459, "grad_norm": 2.840205430984497, "learning_rate": 9.58227315122046e-07, "loss": 0.0555, "step": 217110 }, { "epoch": 2.3197820396388695, "grad_norm": 0.03506486862897873, "learning_rate": 9.582205921908578e-07, "loss": 0.0063, "step": 217120 }, { "epoch": 2.3198888829531494, "grad_norm": 0.18206992745399475, "learning_rate": 9.582138687423042e-07, "loss": 0.0432, "step": 217130 }, { "epoch": 2.319995726267429, "grad_norm": 1.0406125783920288, "learning_rate": 9.582071447763924e-07, "loss": 0.0192, "step": 217140 }, { "epoch": 2.3201025695817084, "grad_norm": 6.021522521972656, "learning_rate": 9.582004202931302e-07, "loss": 0.0079, "step": 217150 }, { "epoch": 2.320209412895988, "grad_norm": 4.496947765350342, "learning_rate": 9.581936952925248e-07, "loss": 0.016, "step": 217160 }, { "epoch": 2.3203162562102677, "grad_norm": 2.3188185691833496, "learning_rate": 9.581869697745846e-07, "loss": 0.0129, "step": 217170 }, { "epoch": 2.320423099524547, "grad_norm": 0.8207492232322693, "learning_rate": 9.581802437393164e-07, "loss": 0.0186, "step": 217180 }, { "epoch": 2.320529942838827, "grad_norm": 0.07711431384086609, "learning_rate": 9.581735171867284e-07, "loss": 0.0129, "step": 217190 }, { "epoch": 2.3206367861531065, "grad_norm": 0.03477950021624565, "learning_rate": 9.581667901168277e-07, "loss": 0.0257, "step": 217200 }, { "epoch": 2.320743629467386, "grad_norm": 17.46845817565918, "learning_rate": 9.581600625296221e-07, "loss": 0.0222, "step": 217210 }, { "epoch": 2.3208504727816655, "grad_norm": 0.013785652816295624, "learning_rate": 9.581533344251194e-07, "loss": 0.0102, "step": 217220 }, { "epoch": 2.3209573160959454, "grad_norm": 0.041357602924108505, "learning_rate": 9.58146605803327e-07, "loss": 0.0013, "step": 217230 }, { "epoch": 2.321064159410225, "grad_norm": 9.454115867614746, "learning_rate": 9.581398766642523e-07, "loss": 0.0251, "step": 217240 }, { "epoch": 2.3211710027245047, "grad_norm": 4.368180274963379, "learning_rate": 9.581331470079033e-07, "loss": 0.0344, "step": 217250 }, { "epoch": 2.321277846038784, "grad_norm": 1.9176220893859863, "learning_rate": 9.581264168342873e-07, "loss": 0.0089, "step": 217260 }, { "epoch": 2.3213846893530636, "grad_norm": 3.859058141708374, "learning_rate": 9.58119686143412e-07, "loss": 0.0232, "step": 217270 }, { "epoch": 2.321491532667343, "grad_norm": 0.04376664012670517, "learning_rate": 9.58112954935285e-07, "loss": 0.0267, "step": 217280 }, { "epoch": 2.321598375981623, "grad_norm": 0.09563615918159485, "learning_rate": 9.58106223209914e-07, "loss": 0.0088, "step": 217290 }, { "epoch": 2.3217052192959025, "grad_norm": 5.389126300811768, "learning_rate": 9.580994909673064e-07, "loss": 0.0494, "step": 217300 }, { "epoch": 2.3218120626101824, "grad_norm": 7.6360015869140625, "learning_rate": 9.580927582074699e-07, "loss": 0.0253, "step": 217310 }, { "epoch": 2.321918905924462, "grad_norm": 1.766417145729065, "learning_rate": 9.580860249304122e-07, "loss": 0.0188, "step": 217320 }, { "epoch": 2.3220257492387413, "grad_norm": 0.18474312126636505, "learning_rate": 9.580792911361406e-07, "loss": 0.0342, "step": 217330 }, { "epoch": 2.322132592553021, "grad_norm": 0.01947515271604061, "learning_rate": 9.58072556824663e-07, "loss": 0.0039, "step": 217340 }, { "epoch": 2.3222394358673006, "grad_norm": 1.6148093938827515, "learning_rate": 9.580658219959869e-07, "loss": 0.0126, "step": 217350 }, { "epoch": 2.32234627918158, "grad_norm": 4.451333045959473, "learning_rate": 9.580590866501202e-07, "loss": 0.0512, "step": 217360 }, { "epoch": 2.32245312249586, "grad_norm": 0.2623223066329956, "learning_rate": 9.5805235078707e-07, "loss": 0.0244, "step": 217370 }, { "epoch": 2.3225599658101395, "grad_norm": 0.048161692917346954, "learning_rate": 9.58045614406844e-07, "loss": 0.0406, "step": 217380 }, { "epoch": 2.322666809124419, "grad_norm": 0.048126161098480225, "learning_rate": 9.5803887750945e-07, "loss": 0.101, "step": 217390 }, { "epoch": 2.322773652438699, "grad_norm": 1.0635737180709839, "learning_rate": 9.580321400948955e-07, "loss": 0.0129, "step": 217400 }, { "epoch": 2.3228804957529783, "grad_norm": 1.0987036228179932, "learning_rate": 9.580254021631883e-07, "loss": 0.0368, "step": 217410 }, { "epoch": 2.3229873390672577, "grad_norm": 8.275036811828613, "learning_rate": 9.580186637143357e-07, "loss": 0.0232, "step": 217420 }, { "epoch": 2.3230941823815376, "grad_norm": 0.09214580804109573, "learning_rate": 9.580119247483456e-07, "loss": 0.0194, "step": 217430 }, { "epoch": 2.323201025695817, "grad_norm": 0.560463547706604, "learning_rate": 9.580051852652253e-07, "loss": 0.0083, "step": 217440 }, { "epoch": 2.3233078690100966, "grad_norm": 0.016845202073454857, "learning_rate": 9.579984452649828e-07, "loss": 0.0085, "step": 217450 }, { "epoch": 2.3234147123243765, "grad_norm": 1.1304961442947388, "learning_rate": 9.579917047476253e-07, "loss": 0.0264, "step": 217460 }, { "epoch": 2.323521555638656, "grad_norm": 4.365717887878418, "learning_rate": 9.579849637131606e-07, "loss": 0.0171, "step": 217470 }, { "epoch": 2.3236283989529354, "grad_norm": 4.21537971496582, "learning_rate": 9.579782221615965e-07, "loss": 0.0304, "step": 217480 }, { "epoch": 2.3237352422672153, "grad_norm": 0.4779389202594757, "learning_rate": 9.579714800929401e-07, "loss": 0.0949, "step": 217490 }, { "epoch": 2.3238420855814947, "grad_norm": 0.21259015798568726, "learning_rate": 9.579647375071996e-07, "loss": 0.0258, "step": 217500 }, { "epoch": 2.323948928895774, "grad_norm": 0.00582478241994977, "learning_rate": 9.579579944043823e-07, "loss": 0.0201, "step": 217510 }, { "epoch": 2.324055772210054, "grad_norm": 7.311122417449951, "learning_rate": 9.579512507844958e-07, "loss": 0.0078, "step": 217520 }, { "epoch": 2.3241626155243336, "grad_norm": 0.0013514877064153552, "learning_rate": 9.579445066475478e-07, "loss": 0.024, "step": 217530 }, { "epoch": 2.324269458838613, "grad_norm": 0.011100404895842075, "learning_rate": 9.57937761993546e-07, "loss": 0.0209, "step": 217540 }, { "epoch": 2.324376302152893, "grad_norm": 6.435516834259033, "learning_rate": 9.579310168224976e-07, "loss": 0.0379, "step": 217550 }, { "epoch": 2.3244831454671724, "grad_norm": 3.678412437438965, "learning_rate": 9.579242711344108e-07, "loss": 0.0161, "step": 217560 }, { "epoch": 2.324589988781452, "grad_norm": 3.8545546531677246, "learning_rate": 9.57917524929293e-07, "loss": 0.0048, "step": 217570 }, { "epoch": 2.3246968320957317, "grad_norm": 4.177701950073242, "learning_rate": 9.579107782071515e-07, "loss": 0.0223, "step": 217580 }, { "epoch": 2.324803675410011, "grad_norm": 0.1133614033460617, "learning_rate": 9.579040309679943e-07, "loss": 0.0575, "step": 217590 }, { "epoch": 2.3249105187242907, "grad_norm": 1.6542046070098877, "learning_rate": 9.57897283211829e-07, "loss": 0.0168, "step": 217600 }, { "epoch": 2.3250173620385706, "grad_norm": 0.9729034304618835, "learning_rate": 9.57890534938663e-07, "loss": 0.0139, "step": 217610 }, { "epoch": 2.32512420535285, "grad_norm": 5.28851842880249, "learning_rate": 9.578837861485037e-07, "loss": 0.0546, "step": 217620 }, { "epoch": 2.3252310486671295, "grad_norm": 1.3084707260131836, "learning_rate": 9.578770368413596e-07, "loss": 0.0102, "step": 217630 }, { "epoch": 2.3253378919814094, "grad_norm": 0.012602847069501877, "learning_rate": 9.578702870172374e-07, "loss": 0.0122, "step": 217640 }, { "epoch": 2.325444735295689, "grad_norm": 1.1273225545883179, "learning_rate": 9.578635366761454e-07, "loss": 0.0169, "step": 217650 }, { "epoch": 2.3255515786099683, "grad_norm": 5.95058012008667, "learning_rate": 9.578567858180907e-07, "loss": 0.0173, "step": 217660 }, { "epoch": 2.325658421924248, "grad_norm": 0.00217883987352252, "learning_rate": 9.57850034443081e-07, "loss": 0.0179, "step": 217670 }, { "epoch": 2.3257652652385277, "grad_norm": 0.007036405615508556, "learning_rate": 9.578432825511242e-07, "loss": 0.0115, "step": 217680 }, { "epoch": 2.325872108552807, "grad_norm": 3.493814706802368, "learning_rate": 9.57836530142228e-07, "loss": 0.0132, "step": 217690 }, { "epoch": 2.325978951867087, "grad_norm": 0.5136175751686096, "learning_rate": 9.578297772163994e-07, "loss": 0.0127, "step": 217700 }, { "epoch": 2.3260857951813665, "grad_norm": 0.002379374811425805, "learning_rate": 9.578230237736466e-07, "loss": 0.0014, "step": 217710 }, { "epoch": 2.326192638495646, "grad_norm": 1.6447255611419678, "learning_rate": 9.57816269813977e-07, "loss": 0.0209, "step": 217720 }, { "epoch": 2.326299481809926, "grad_norm": 1.0650988817214966, "learning_rate": 9.578095153373985e-07, "loss": 0.0221, "step": 217730 }, { "epoch": 2.3264063251242053, "grad_norm": 0.010391500778496265, "learning_rate": 9.578027603439185e-07, "loss": 0.0321, "step": 217740 }, { "epoch": 2.3265131684384848, "grad_norm": 5.492410659790039, "learning_rate": 9.577960048335444e-07, "loss": 0.0931, "step": 217750 }, { "epoch": 2.3266200117527647, "grad_norm": 0.24883997440338135, "learning_rate": 9.577892488062842e-07, "loss": 0.0222, "step": 217760 }, { "epoch": 2.326726855067044, "grad_norm": 0.018101368099451065, "learning_rate": 9.577824922621452e-07, "loss": 0.0077, "step": 217770 }, { "epoch": 2.326833698381324, "grad_norm": 0.0096522755920887, "learning_rate": 9.577757352011353e-07, "loss": 0.0112, "step": 217780 }, { "epoch": 2.3269405416956035, "grad_norm": 0.13171517848968506, "learning_rate": 9.577689776232623e-07, "loss": 0.0097, "step": 217790 }, { "epoch": 2.327047385009883, "grad_norm": 4.369137763977051, "learning_rate": 9.577622195285333e-07, "loss": 0.0361, "step": 217800 }, { "epoch": 2.3271542283241624, "grad_norm": 1.2104427814483643, "learning_rate": 9.577554609169563e-07, "loss": 0.018, "step": 217810 }, { "epoch": 2.3272610716384423, "grad_norm": 10.612444877624512, "learning_rate": 9.577487017885389e-07, "loss": 0.0362, "step": 217820 }, { "epoch": 2.3273679149527218, "grad_norm": 5.290512561798096, "learning_rate": 9.577419421432884e-07, "loss": 0.0222, "step": 217830 }, { "epoch": 2.3274747582670017, "grad_norm": 0.7973664402961731, "learning_rate": 9.57735181981213e-07, "loss": 0.0215, "step": 217840 }, { "epoch": 2.327581601581281, "grad_norm": 0.011625285260379314, "learning_rate": 9.577284213023199e-07, "loss": 0.0161, "step": 217850 }, { "epoch": 2.3276884448955606, "grad_norm": 1.3919553756713867, "learning_rate": 9.577216601066168e-07, "loss": 0.0354, "step": 217860 }, { "epoch": 2.32779528820984, "grad_norm": 0.002825165167450905, "learning_rate": 9.577148983941115e-07, "loss": 0.0296, "step": 217870 }, { "epoch": 2.32790213152412, "grad_norm": 0.2377827912569046, "learning_rate": 9.577081361648117e-07, "loss": 0.0045, "step": 217880 }, { "epoch": 2.3280089748383994, "grad_norm": 9.39128303527832, "learning_rate": 9.577013734187246e-07, "loss": 0.0149, "step": 217890 }, { "epoch": 2.3281158181526793, "grad_norm": 0.026179352775216103, "learning_rate": 9.57694610155858e-07, "loss": 0.0849, "step": 217900 }, { "epoch": 2.3282226614669588, "grad_norm": 3.9708242416381836, "learning_rate": 9.5768784637622e-07, "loss": 0.0369, "step": 217910 }, { "epoch": 2.3283295047812382, "grad_norm": 2.865104913711548, "learning_rate": 9.576810820798178e-07, "loss": 0.0186, "step": 217920 }, { "epoch": 2.3284363480955177, "grad_norm": 10.614137649536133, "learning_rate": 9.576743172666587e-07, "loss": 0.0117, "step": 217930 }, { "epoch": 2.3285431914097976, "grad_norm": 5.482859134674072, "learning_rate": 9.576675519367512e-07, "loss": 0.0335, "step": 217940 }, { "epoch": 2.328650034724077, "grad_norm": 0.9044740200042725, "learning_rate": 9.576607860901025e-07, "loss": 0.0152, "step": 217950 }, { "epoch": 2.328756878038357, "grad_norm": 1.5556122064590454, "learning_rate": 9.5765401972672e-07, "loss": 0.075, "step": 217960 }, { "epoch": 2.3288637213526364, "grad_norm": 1.5537805557250977, "learning_rate": 9.576472528466118e-07, "loss": 0.0215, "step": 217970 }, { "epoch": 2.328970564666916, "grad_norm": 11.882776260375977, "learning_rate": 9.57640485449785e-07, "loss": 0.011, "step": 217980 }, { "epoch": 2.3290774079811953, "grad_norm": 4.053191184997559, "learning_rate": 9.576337175362478e-07, "loss": 0.015, "step": 217990 }, { "epoch": 2.3291842512954752, "grad_norm": 0.35840970277786255, "learning_rate": 9.576269491060076e-07, "loss": 0.077, "step": 218000 }, { "epoch": 2.3292910946097547, "grad_norm": 1.5389235019683838, "learning_rate": 9.57620180159072e-07, "loss": 0.0296, "step": 218010 }, { "epoch": 2.3293979379240346, "grad_norm": 2.3880863189697266, "learning_rate": 9.576134106954485e-07, "loss": 0.0133, "step": 218020 }, { "epoch": 2.329504781238314, "grad_norm": 0.02123398706316948, "learning_rate": 9.576066407151452e-07, "loss": 0.0756, "step": 218030 }, { "epoch": 2.3296116245525935, "grad_norm": 1.9613829851150513, "learning_rate": 9.575998702181693e-07, "loss": 0.0093, "step": 218040 }, { "epoch": 2.3297184678668734, "grad_norm": 0.05407722294330597, "learning_rate": 9.575930992045286e-07, "loss": 0.0325, "step": 218050 }, { "epoch": 2.329825311181153, "grad_norm": 6.066671371459961, "learning_rate": 9.575863276742308e-07, "loss": 0.0297, "step": 218060 }, { "epoch": 2.3299321544954323, "grad_norm": 0.008595261722803116, "learning_rate": 9.575795556272834e-07, "loss": 0.013, "step": 218070 }, { "epoch": 2.3300389978097122, "grad_norm": 0.9574616551399231, "learning_rate": 9.575727830636943e-07, "loss": 0.0072, "step": 218080 }, { "epoch": 2.3301458411239917, "grad_norm": 0.7468269467353821, "learning_rate": 9.57566009983471e-07, "loss": 0.0239, "step": 218090 }, { "epoch": 2.330252684438271, "grad_norm": 1.971395492553711, "learning_rate": 9.57559236386621e-07, "loss": 0.0732, "step": 218100 }, { "epoch": 2.330359527752551, "grad_norm": 7.291614532470703, "learning_rate": 9.575524622731523e-07, "loss": 0.0414, "step": 218110 }, { "epoch": 2.3304663710668305, "grad_norm": 0.39533430337905884, "learning_rate": 9.57545687643072e-07, "loss": 0.0063, "step": 218120 }, { "epoch": 2.33057321438111, "grad_norm": 0.00929923728108406, "learning_rate": 9.575389124963885e-07, "loss": 0.0394, "step": 218130 }, { "epoch": 2.33068005769539, "grad_norm": 0.28333303332328796, "learning_rate": 9.575321368331088e-07, "loss": 0.0171, "step": 218140 }, { "epoch": 2.3307869010096693, "grad_norm": 0.011854429729282856, "learning_rate": 9.575253606532407e-07, "loss": 0.0073, "step": 218150 }, { "epoch": 2.330893744323949, "grad_norm": 0.0027883986476808786, "learning_rate": 9.57518583956792e-07, "loss": 0.0186, "step": 218160 }, { "epoch": 2.3310005876382287, "grad_norm": 0.6631758213043213, "learning_rate": 9.575118067437705e-07, "loss": 0.0072, "step": 218170 }, { "epoch": 2.331107430952508, "grad_norm": 6.193909645080566, "learning_rate": 9.575050290141834e-07, "loss": 0.0683, "step": 218180 }, { "epoch": 2.3312142742667876, "grad_norm": 1.2395168542861938, "learning_rate": 9.574982507680386e-07, "loss": 0.0692, "step": 218190 }, { "epoch": 2.3313211175810675, "grad_norm": 0.0770149677991867, "learning_rate": 9.57491472005344e-07, "loss": 0.0143, "step": 218200 }, { "epoch": 2.331427960895347, "grad_norm": 8.347848892211914, "learning_rate": 9.574846927261066e-07, "loss": 0.0123, "step": 218210 }, { "epoch": 2.3315348042096264, "grad_norm": 0.7436471581459045, "learning_rate": 9.574779129303347e-07, "loss": 0.0044, "step": 218220 }, { "epoch": 2.3316416475239063, "grad_norm": 0.2403782308101654, "learning_rate": 9.574711326180357e-07, "loss": 0.0057, "step": 218230 }, { "epoch": 2.331748490838186, "grad_norm": 5.783169746398926, "learning_rate": 9.574643517892173e-07, "loss": 0.0408, "step": 218240 }, { "epoch": 2.3318553341524653, "grad_norm": 2.164612293243408, "learning_rate": 9.574575704438872e-07, "loss": 0.0258, "step": 218250 }, { "epoch": 2.331962177466745, "grad_norm": 3.263446569442749, "learning_rate": 9.574507885820526e-07, "loss": 0.0183, "step": 218260 }, { "epoch": 2.3320690207810246, "grad_norm": 0.10293017327785492, "learning_rate": 9.57444006203722e-07, "loss": 0.0092, "step": 218270 }, { "epoch": 2.332175864095304, "grad_norm": 0.8887375593185425, "learning_rate": 9.574372233089023e-07, "loss": 0.0084, "step": 218280 }, { "epoch": 2.332282707409584, "grad_norm": 7.753722667694092, "learning_rate": 9.574304398976014e-07, "loss": 0.0142, "step": 218290 }, { "epoch": 2.3323895507238634, "grad_norm": 0.16686974465847015, "learning_rate": 9.574236559698272e-07, "loss": 0.0103, "step": 218300 }, { "epoch": 2.332496394038143, "grad_norm": 13.92437744140625, "learning_rate": 9.57416871525587e-07, "loss": 0.0345, "step": 218310 }, { "epoch": 2.332603237352423, "grad_norm": 0.006336853373795748, "learning_rate": 9.57410086564889e-07, "loss": 0.035, "step": 218320 }, { "epoch": 2.3327100806667023, "grad_norm": 5.322472095489502, "learning_rate": 9.5740330108774e-07, "loss": 0.0335, "step": 218330 }, { "epoch": 2.3328169239809817, "grad_norm": 3.1564908027648926, "learning_rate": 9.573965150941482e-07, "loss": 0.0058, "step": 218340 }, { "epoch": 2.3329237672952616, "grad_norm": 5.407134532928467, "learning_rate": 9.573897285841217e-07, "loss": 0.0429, "step": 218350 }, { "epoch": 2.333030610609541, "grad_norm": 2.2193305492401123, "learning_rate": 9.573829415576672e-07, "loss": 0.023, "step": 218360 }, { "epoch": 2.3331374539238205, "grad_norm": 0.8193129301071167, "learning_rate": 9.57376154014793e-07, "loss": 0.0127, "step": 218370 }, { "epoch": 2.3332442972381005, "grad_norm": 0.2591486871242523, "learning_rate": 9.573693659555066e-07, "loss": 0.0155, "step": 218380 }, { "epoch": 2.33335114055238, "grad_norm": 0.8901447653770447, "learning_rate": 9.573625773798157e-07, "loss": 0.0051, "step": 218390 }, { "epoch": 2.3334579838666594, "grad_norm": 0.18262094259262085, "learning_rate": 9.573557882877278e-07, "loss": 0.0069, "step": 218400 }, { "epoch": 2.3335648271809393, "grad_norm": 0.08880139887332916, "learning_rate": 9.57348998679251e-07, "loss": 0.0213, "step": 218410 }, { "epoch": 2.3336716704952187, "grad_norm": 2.2455050945281982, "learning_rate": 9.573422085543923e-07, "loss": 0.0352, "step": 218420 }, { "epoch": 2.333778513809498, "grad_norm": 0.20595285296440125, "learning_rate": 9.5733541791316e-07, "loss": 0.0478, "step": 218430 }, { "epoch": 2.333885357123778, "grad_norm": 2.757847309112549, "learning_rate": 9.573286267555612e-07, "loss": 0.0356, "step": 218440 }, { "epoch": 2.3339922004380576, "grad_norm": 0.005914987530559301, "learning_rate": 9.57321835081604e-07, "loss": 0.0732, "step": 218450 }, { "epoch": 2.334099043752337, "grad_norm": 0.3109213709831238, "learning_rate": 9.573150428912962e-07, "loss": 0.0144, "step": 218460 }, { "epoch": 2.334205887066617, "grad_norm": 0.002125062048435211, "learning_rate": 9.57308250184645e-07, "loss": 0.0385, "step": 218470 }, { "epoch": 2.3343127303808964, "grad_norm": 3.144822359085083, "learning_rate": 9.573014569616582e-07, "loss": 0.0234, "step": 218480 }, { "epoch": 2.334419573695176, "grad_norm": 0.3175201714038849, "learning_rate": 9.572946632223435e-07, "loss": 0.0254, "step": 218490 }, { "epoch": 2.3345264170094557, "grad_norm": 0.804304301738739, "learning_rate": 9.572878689667087e-07, "loss": 0.0215, "step": 218500 }, { "epoch": 2.334633260323735, "grad_norm": 1.3753198385238647, "learning_rate": 9.572810741947616e-07, "loss": 0.012, "step": 218510 }, { "epoch": 2.3347401036380147, "grad_norm": 0.3023674190044403, "learning_rate": 9.572742789065093e-07, "loss": 0.0268, "step": 218520 }, { "epoch": 2.3348469469522946, "grad_norm": 2.1968235969543457, "learning_rate": 9.5726748310196e-07, "loss": 0.0139, "step": 218530 }, { "epoch": 2.334953790266574, "grad_norm": 0.16635382175445557, "learning_rate": 9.572606867811211e-07, "loss": 0.0119, "step": 218540 }, { "epoch": 2.335060633580854, "grad_norm": 3.683896541595459, "learning_rate": 9.572538899440003e-07, "loss": 0.016, "step": 218550 }, { "epoch": 2.3351674768951334, "grad_norm": 1.6533499956130981, "learning_rate": 9.572470925906056e-07, "loss": 0.013, "step": 218560 }, { "epoch": 2.335274320209413, "grad_norm": 5.278632640838623, "learning_rate": 9.572402947209444e-07, "loss": 0.0579, "step": 218570 }, { "epoch": 2.3353811635236923, "grad_norm": 0.8657525777816772, "learning_rate": 9.57233496335024e-07, "loss": 0.0446, "step": 218580 }, { "epoch": 2.335488006837972, "grad_norm": 0.8606446385383606, "learning_rate": 9.57226697432853e-07, "loss": 0.0163, "step": 218590 }, { "epoch": 2.3355948501522517, "grad_norm": 4.62619686126709, "learning_rate": 9.572198980144384e-07, "loss": 0.0169, "step": 218600 }, { "epoch": 2.3357016934665316, "grad_norm": 0.05252974480390549, "learning_rate": 9.57213098079788e-07, "loss": 0.0438, "step": 218610 }, { "epoch": 2.335808536780811, "grad_norm": 2.337630271911621, "learning_rate": 9.572062976289095e-07, "loss": 0.0412, "step": 218620 }, { "epoch": 2.3359153800950905, "grad_norm": 1.2329119443893433, "learning_rate": 9.571994966618103e-07, "loss": 0.0365, "step": 218630 }, { "epoch": 2.33602222340937, "grad_norm": 2.3714261054992676, "learning_rate": 9.571926951784988e-07, "loss": 0.0095, "step": 218640 }, { "epoch": 2.33612906672365, "grad_norm": 0.13058483600616455, "learning_rate": 9.57185893178982e-07, "loss": 0.0152, "step": 218650 }, { "epoch": 2.3362359100379293, "grad_norm": 0.0503411665558815, "learning_rate": 9.571790906632678e-07, "loss": 0.0027, "step": 218660 }, { "epoch": 2.336342753352209, "grad_norm": 1.1887582540512085, "learning_rate": 9.571722876313638e-07, "loss": 0.0192, "step": 218670 }, { "epoch": 2.3364495966664887, "grad_norm": 0.7538201808929443, "learning_rate": 9.571654840832782e-07, "loss": 0.024, "step": 218680 }, { "epoch": 2.336556439980768, "grad_norm": 0.0075110928155481815, "learning_rate": 9.57158680019018e-07, "loss": 0.0487, "step": 218690 }, { "epoch": 2.3366632832950476, "grad_norm": 0.11760012060403824, "learning_rate": 9.57151875438591e-07, "loss": 0.0475, "step": 218700 }, { "epoch": 2.3367701266093275, "grad_norm": 0.2017575353384018, "learning_rate": 9.571450703420054e-07, "loss": 0.0188, "step": 218710 }, { "epoch": 2.336876969923607, "grad_norm": 0.6363433003425598, "learning_rate": 9.571382647292683e-07, "loss": 0.0148, "step": 218720 }, { "epoch": 2.336983813237887, "grad_norm": 0.057579632848501205, "learning_rate": 9.571314586003876e-07, "loss": 0.0262, "step": 218730 }, { "epoch": 2.3370906565521663, "grad_norm": 0.005270285066217184, "learning_rate": 9.571246519553709e-07, "loss": 0.0304, "step": 218740 }, { "epoch": 2.3371974998664458, "grad_norm": 0.025558853521943092, "learning_rate": 9.57117844794226e-07, "loss": 0.0047, "step": 218750 }, { "epoch": 2.337304343180725, "grad_norm": 0.005016464740037918, "learning_rate": 9.571110371169607e-07, "loss": 0.0155, "step": 218760 }, { "epoch": 2.337411186495005, "grad_norm": 0.020143885165452957, "learning_rate": 9.571042289235825e-07, "loss": 0.0165, "step": 218770 }, { "epoch": 2.3375180298092846, "grad_norm": 9.726903915405273, "learning_rate": 9.57097420214099e-07, "loss": 0.0463, "step": 218780 }, { "epoch": 2.3376248731235645, "grad_norm": 0.7487569451332092, "learning_rate": 9.57090610988518e-07, "loss": 0.1101, "step": 218790 }, { "epoch": 2.337731716437844, "grad_norm": 26.24043083190918, "learning_rate": 9.570838012468473e-07, "loss": 0.091, "step": 218800 }, { "epoch": 2.3378385597521234, "grad_norm": 0.7484629154205322, "learning_rate": 9.570769909890946e-07, "loss": 0.0455, "step": 218810 }, { "epoch": 2.3379454030664033, "grad_norm": 2.159980535507202, "learning_rate": 9.570701802152672e-07, "loss": 0.0059, "step": 218820 }, { "epoch": 2.3380522463806828, "grad_norm": 0.03234149515628815, "learning_rate": 9.570633689253733e-07, "loss": 0.005, "step": 218830 }, { "epoch": 2.3381590896949622, "grad_norm": 0.061267655342817307, "learning_rate": 9.570565571194204e-07, "loss": 0.0016, "step": 218840 }, { "epoch": 2.338265933009242, "grad_norm": 3.6010334491729736, "learning_rate": 9.57049744797416e-07, "loss": 0.0117, "step": 218850 }, { "epoch": 2.3383727763235216, "grad_norm": 12.810566902160645, "learning_rate": 9.57042931959368e-07, "loss": 0.0472, "step": 218860 }, { "epoch": 2.338479619637801, "grad_norm": 2.5275187492370605, "learning_rate": 9.57036118605284e-07, "loss": 0.0034, "step": 218870 }, { "epoch": 2.338586462952081, "grad_norm": 2.8068456649780273, "learning_rate": 9.570293047351718e-07, "loss": 0.0395, "step": 218880 }, { "epoch": 2.3386933062663604, "grad_norm": 7.379872798919678, "learning_rate": 9.57022490349039e-07, "loss": 0.0181, "step": 218890 }, { "epoch": 2.33880014958064, "grad_norm": 7.4026594161987305, "learning_rate": 9.570156754468932e-07, "loss": 0.0435, "step": 218900 }, { "epoch": 2.3389069928949198, "grad_norm": 3.4339101314544678, "learning_rate": 9.570088600287422e-07, "loss": 0.0378, "step": 218910 }, { "epoch": 2.3390138362091992, "grad_norm": 4.869071006774902, "learning_rate": 9.570020440945938e-07, "loss": 0.0276, "step": 218920 }, { "epoch": 2.3391206795234787, "grad_norm": 2.4511868953704834, "learning_rate": 9.569952276444558e-07, "loss": 0.0136, "step": 218930 }, { "epoch": 2.3392275228377586, "grad_norm": 0.004009815864264965, "learning_rate": 9.569884106783355e-07, "loss": 0.0076, "step": 218940 }, { "epoch": 2.339334366152038, "grad_norm": 1.275151014328003, "learning_rate": 9.569815931962407e-07, "loss": 0.0244, "step": 218950 }, { "epoch": 2.3394412094663175, "grad_norm": 2.7813737392425537, "learning_rate": 9.569747751981792e-07, "loss": 0.0225, "step": 218960 }, { "epoch": 2.3395480527805974, "grad_norm": 3.3561196327209473, "learning_rate": 9.569679566841588e-07, "loss": 0.0231, "step": 218970 }, { "epoch": 2.339654896094877, "grad_norm": 0.05165785551071167, "learning_rate": 9.569611376541871e-07, "loss": 0.0076, "step": 218980 }, { "epoch": 2.3397617394091563, "grad_norm": 2.569967031478882, "learning_rate": 9.569543181082718e-07, "loss": 0.044, "step": 218990 }, { "epoch": 2.3398685827234362, "grad_norm": 0.0027438742108643055, "learning_rate": 9.569474980464204e-07, "loss": 0.0056, "step": 219000 }, { "epoch": 2.3399754260377157, "grad_norm": 4.291506767272949, "learning_rate": 9.56940677468641e-07, "loss": 0.016, "step": 219010 }, { "epoch": 2.340082269351995, "grad_norm": 0.25482437014579773, "learning_rate": 9.569338563749411e-07, "loss": 0.0185, "step": 219020 }, { "epoch": 2.340189112666275, "grad_norm": 2.0006725788116455, "learning_rate": 9.569270347653283e-07, "loss": 0.0295, "step": 219030 }, { "epoch": 2.3402959559805545, "grad_norm": 0.008676279336214066, "learning_rate": 9.569202126398103e-07, "loss": 0.0142, "step": 219040 }, { "epoch": 2.340402799294834, "grad_norm": 21.65690040588379, "learning_rate": 9.56913389998395e-07, "loss": 0.036, "step": 219050 }, { "epoch": 2.340509642609114, "grad_norm": 0.00879440363496542, "learning_rate": 9.569065668410902e-07, "loss": 0.0159, "step": 219060 }, { "epoch": 2.3406164859233933, "grad_norm": 3.0508954524993896, "learning_rate": 9.568997431679028e-07, "loss": 0.0146, "step": 219070 }, { "epoch": 2.340723329237673, "grad_norm": 2.711639642715454, "learning_rate": 9.568929189788417e-07, "loss": 0.0153, "step": 219080 }, { "epoch": 2.3408301725519527, "grad_norm": 3.6867337226867676, "learning_rate": 9.568860942739138e-07, "loss": 0.0405, "step": 219090 }, { "epoch": 2.340937015866232, "grad_norm": 0.09703464806079865, "learning_rate": 9.56879269053127e-07, "loss": 0.019, "step": 219100 }, { "epoch": 2.3410438591805116, "grad_norm": 0.12344211339950562, "learning_rate": 9.56872443316489e-07, "loss": 0.0074, "step": 219110 }, { "epoch": 2.3411507024947915, "grad_norm": 0.2791358232498169, "learning_rate": 9.568656170640077e-07, "loss": 0.0292, "step": 219120 }, { "epoch": 2.341257545809071, "grad_norm": 4.783178329467773, "learning_rate": 9.568587902956904e-07, "loss": 0.0104, "step": 219130 }, { "epoch": 2.3413643891233504, "grad_norm": 1.514475703239441, "learning_rate": 9.568519630115453e-07, "loss": 0.0137, "step": 219140 }, { "epoch": 2.3414712324376303, "grad_norm": 1.8436938524246216, "learning_rate": 9.568451352115795e-07, "loss": 0.061, "step": 219150 }, { "epoch": 2.34157807575191, "grad_norm": 2.8190391063690186, "learning_rate": 9.568383068958014e-07, "loss": 0.01, "step": 219160 }, { "epoch": 2.3416849190661893, "grad_norm": 0.06865212321281433, "learning_rate": 9.568314780642182e-07, "loss": 0.0117, "step": 219170 }, { "epoch": 2.341791762380469, "grad_norm": 12.079792022705078, "learning_rate": 9.568246487168378e-07, "loss": 0.0247, "step": 219180 }, { "epoch": 2.3418986056947486, "grad_norm": 0.13375264406204224, "learning_rate": 9.56817818853668e-07, "loss": 0.0124, "step": 219190 }, { "epoch": 2.342005449009028, "grad_norm": 8.44662094116211, "learning_rate": 9.568109884747164e-07, "loss": 0.0758, "step": 219200 }, { "epoch": 2.342112292323308, "grad_norm": 3.271585702896118, "learning_rate": 9.568041575799906e-07, "loss": 0.0088, "step": 219210 }, { "epoch": 2.3422191356375874, "grad_norm": 0.14908064901828766, "learning_rate": 9.567973261694984e-07, "loss": 0.0167, "step": 219220 }, { "epoch": 2.342325978951867, "grad_norm": 0.03797181323170662, "learning_rate": 9.567904942432477e-07, "loss": 0.0012, "step": 219230 }, { "epoch": 2.342432822266147, "grad_norm": 2.641188144683838, "learning_rate": 9.56783661801246e-07, "loss": 0.008, "step": 219240 }, { "epoch": 2.3425396655804263, "grad_norm": 5.152378559112549, "learning_rate": 9.56776828843501e-07, "loss": 0.0082, "step": 219250 }, { "epoch": 2.342646508894706, "grad_norm": 0.0016113589517772198, "learning_rate": 9.567699953700204e-07, "loss": 0.007, "step": 219260 }, { "epoch": 2.3427533522089856, "grad_norm": 2.578739643096924, "learning_rate": 9.567631613808122e-07, "loss": 0.0233, "step": 219270 }, { "epoch": 2.342860195523265, "grad_norm": 0.026169462129473686, "learning_rate": 9.567563268758838e-07, "loss": 0.0206, "step": 219280 }, { "epoch": 2.3429670388375445, "grad_norm": 0.025714242830872536, "learning_rate": 9.567494918552432e-07, "loss": 0.0468, "step": 219290 }, { "epoch": 2.3430738821518244, "grad_norm": 1.1815916299819946, "learning_rate": 9.567426563188978e-07, "loss": 0.013, "step": 219300 }, { "epoch": 2.343180725466104, "grad_norm": 6.694552898406982, "learning_rate": 9.567358202668554e-07, "loss": 0.0445, "step": 219310 }, { "epoch": 2.343287568780384, "grad_norm": 6.976526737213135, "learning_rate": 9.56728983699124e-07, "loss": 0.0113, "step": 219320 }, { "epoch": 2.3433944120946633, "grad_norm": 0.3615739941596985, "learning_rate": 9.56722146615711e-07, "loss": 0.0065, "step": 219330 }, { "epoch": 2.3435012554089427, "grad_norm": 3.2510714530944824, "learning_rate": 9.567153090166241e-07, "loss": 0.0354, "step": 219340 }, { "epoch": 2.343608098723222, "grad_norm": 4.478160381317139, "learning_rate": 9.567084709018712e-07, "loss": 0.017, "step": 219350 }, { "epoch": 2.343714942037502, "grad_norm": 6.811233043670654, "learning_rate": 9.567016322714602e-07, "loss": 0.0287, "step": 219360 }, { "epoch": 2.3438217853517815, "grad_norm": 8.698575973510742, "learning_rate": 9.566947931253983e-07, "loss": 0.0375, "step": 219370 }, { "epoch": 2.3439286286660614, "grad_norm": 6.774029731750488, "learning_rate": 9.566879534636937e-07, "loss": 0.0424, "step": 219380 }, { "epoch": 2.344035471980341, "grad_norm": 21.62520408630371, "learning_rate": 9.56681113286354e-07, "loss": 0.0838, "step": 219390 }, { "epoch": 2.3441423152946204, "grad_norm": 0.024218518286943436, "learning_rate": 9.566742725933865e-07, "loss": 0.0175, "step": 219400 }, { "epoch": 2.3442491586089, "grad_norm": 4.053225517272949, "learning_rate": 9.566674313847994e-07, "loss": 0.0226, "step": 219410 }, { "epoch": 2.3443560019231797, "grad_norm": 0.024331549182534218, "learning_rate": 9.566605896606005e-07, "loss": 0.0223, "step": 219420 }, { "epoch": 2.344462845237459, "grad_norm": 0.14137208461761475, "learning_rate": 9.566537474207972e-07, "loss": 0.0173, "step": 219430 }, { "epoch": 2.344569688551739, "grad_norm": 1.9081015586853027, "learning_rate": 9.566469046653975e-07, "loss": 0.0772, "step": 219440 }, { "epoch": 2.3446765318660185, "grad_norm": 0.024647792801260948, "learning_rate": 9.566400613944089e-07, "loss": 0.02, "step": 219450 }, { "epoch": 2.344783375180298, "grad_norm": 2.181056022644043, "learning_rate": 9.56633217607839e-07, "loss": 0.0137, "step": 219460 }, { "epoch": 2.3448902184945775, "grad_norm": 5.749191761016846, "learning_rate": 9.56626373305696e-07, "loss": 0.021, "step": 219470 }, { "epoch": 2.3449970618088574, "grad_norm": 3.078190565109253, "learning_rate": 9.566195284879873e-07, "loss": 0.0134, "step": 219480 }, { "epoch": 2.345103905123137, "grad_norm": 4.631232738494873, "learning_rate": 9.566126831547206e-07, "loss": 0.0241, "step": 219490 }, { "epoch": 2.3452107484374167, "grad_norm": 2.381746530532837, "learning_rate": 9.566058373059038e-07, "loss": 0.0266, "step": 219500 }, { "epoch": 2.345317591751696, "grad_norm": 0.10504064708948135, "learning_rate": 9.565989909415447e-07, "loss": 0.0416, "step": 219510 }, { "epoch": 2.3454244350659756, "grad_norm": 0.07121653854846954, "learning_rate": 9.565921440616506e-07, "loss": 0.0308, "step": 219520 }, { "epoch": 2.3455312783802555, "grad_norm": 5.683238506317139, "learning_rate": 9.565852966662296e-07, "loss": 0.0399, "step": 219530 }, { "epoch": 2.345638121694535, "grad_norm": 2.491041660308838, "learning_rate": 9.565784487552894e-07, "loss": 0.0599, "step": 219540 }, { "epoch": 2.3457449650088145, "grad_norm": 4.225241661071777, "learning_rate": 9.565716003288376e-07, "loss": 0.0227, "step": 219550 }, { "epoch": 2.3458518083230944, "grad_norm": 5.759631156921387, "learning_rate": 9.565647513868819e-07, "loss": 0.0246, "step": 219560 }, { "epoch": 2.345958651637374, "grad_norm": 17.98137855529785, "learning_rate": 9.565579019294303e-07, "loss": 0.0392, "step": 219570 }, { "epoch": 2.3460654949516533, "grad_norm": 2.280043125152588, "learning_rate": 9.565510519564902e-07, "loss": 0.0138, "step": 219580 }, { "epoch": 2.346172338265933, "grad_norm": 0.29881471395492554, "learning_rate": 9.565442014680698e-07, "loss": 0.0208, "step": 219590 }, { "epoch": 2.3462791815802126, "grad_norm": 3.6754629611968994, "learning_rate": 9.565373504641764e-07, "loss": 0.0399, "step": 219600 }, { "epoch": 2.346386024894492, "grad_norm": 0.1514732390642166, "learning_rate": 9.565304989448178e-07, "loss": 0.0198, "step": 219610 }, { "epoch": 2.346492868208772, "grad_norm": 0.1403367817401886, "learning_rate": 9.565236469100019e-07, "loss": 0.0068, "step": 219620 }, { "epoch": 2.3465997115230515, "grad_norm": 0.5254405736923218, "learning_rate": 9.565167943597364e-07, "loss": 0.0186, "step": 219630 }, { "epoch": 2.346706554837331, "grad_norm": 0.004049330949783325, "learning_rate": 9.565099412940289e-07, "loss": 0.0268, "step": 219640 }, { "epoch": 2.346813398151611, "grad_norm": 3.7371819019317627, "learning_rate": 9.565030877128872e-07, "loss": 0.0154, "step": 219650 }, { "epoch": 2.3469202414658903, "grad_norm": 0.017434794455766678, "learning_rate": 9.56496233616319e-07, "loss": 0.0085, "step": 219660 }, { "epoch": 2.3470270847801697, "grad_norm": 6.5588507652282715, "learning_rate": 9.564893790043323e-07, "loss": 0.0244, "step": 219670 }, { "epoch": 2.3471339280944497, "grad_norm": 0.8650670051574707, "learning_rate": 9.564825238769344e-07, "loss": 0.0257, "step": 219680 }, { "epoch": 2.347240771408729, "grad_norm": 0.25996845960617065, "learning_rate": 9.564756682341336e-07, "loss": 0.0096, "step": 219690 }, { "epoch": 2.3473476147230086, "grad_norm": 19.8823184967041, "learning_rate": 9.56468812075937e-07, "loss": 0.0134, "step": 219700 }, { "epoch": 2.3474544580372885, "grad_norm": 0.0065216482616961, "learning_rate": 9.564619554023527e-07, "loss": 0.0226, "step": 219710 }, { "epoch": 2.347561301351568, "grad_norm": 0.00804337952286005, "learning_rate": 9.564550982133886e-07, "loss": 0.0007, "step": 219720 }, { "epoch": 2.3476681446658474, "grad_norm": 0.009557994082570076, "learning_rate": 9.56448240509052e-07, "loss": 0.0051, "step": 219730 }, { "epoch": 2.3477749879801273, "grad_norm": 2.150853157043457, "learning_rate": 9.56441382289351e-07, "loss": 0.0104, "step": 219740 }, { "epoch": 2.3478818312944068, "grad_norm": 1.349149227142334, "learning_rate": 9.564345235542931e-07, "loss": 0.0183, "step": 219750 }, { "epoch": 2.347988674608686, "grad_norm": 0.01514436211436987, "learning_rate": 9.564276643038865e-07, "loss": 0.0149, "step": 219760 }, { "epoch": 2.348095517922966, "grad_norm": 0.010699008591473103, "learning_rate": 9.564208045381384e-07, "loss": 0.0147, "step": 219770 }, { "epoch": 2.3482023612372456, "grad_norm": 0.26950573921203613, "learning_rate": 9.564139442570568e-07, "loss": 0.0271, "step": 219780 }, { "epoch": 2.348309204551525, "grad_norm": 15.32193660736084, "learning_rate": 9.564070834606494e-07, "loss": 0.0305, "step": 219790 }, { "epoch": 2.348416047865805, "grad_norm": 4.525417327880859, "learning_rate": 9.56400222148924e-07, "loss": 0.0756, "step": 219800 }, { "epoch": 2.3485228911800844, "grad_norm": 3.6589372158050537, "learning_rate": 9.563933603218883e-07, "loss": 0.0206, "step": 219810 }, { "epoch": 2.348629734494364, "grad_norm": 0.015370671637356281, "learning_rate": 9.563864979795499e-07, "loss": 0.0153, "step": 219820 }, { "epoch": 2.3487365778086438, "grad_norm": 0.5525261759757996, "learning_rate": 9.563796351219168e-07, "loss": 0.0359, "step": 219830 }, { "epoch": 2.348843421122923, "grad_norm": 1.905739665031433, "learning_rate": 9.563727717489966e-07, "loss": 0.0251, "step": 219840 }, { "epoch": 2.3489502644372027, "grad_norm": 0.4059370458126068, "learning_rate": 9.563659078607972e-07, "loss": 0.0012, "step": 219850 }, { "epoch": 2.3490571077514826, "grad_norm": 0.009021857753396034, "learning_rate": 9.563590434573263e-07, "loss": 0.0226, "step": 219860 }, { "epoch": 2.349163951065762, "grad_norm": 2.2551090717315674, "learning_rate": 9.563521785385914e-07, "loss": 0.0249, "step": 219870 }, { "epoch": 2.3492707943800415, "grad_norm": 0.7807332873344421, "learning_rate": 9.563453131046007e-07, "loss": 0.0067, "step": 219880 }, { "epoch": 2.3493776376943214, "grad_norm": 0.2919410169124603, "learning_rate": 9.563384471553616e-07, "loss": 0.082, "step": 219890 }, { "epoch": 2.349484481008601, "grad_norm": 1.1549785137176514, "learning_rate": 9.563315806908816e-07, "loss": 0.0057, "step": 219900 }, { "epoch": 2.3495913243228803, "grad_norm": 0.023710986599326134, "learning_rate": 9.56324713711169e-07, "loss": 0.0237, "step": 219910 }, { "epoch": 2.34969816763716, "grad_norm": 18.99116325378418, "learning_rate": 9.563178462162316e-07, "loss": 0.0194, "step": 219920 }, { "epoch": 2.3498050109514397, "grad_norm": 1.9781616926193237, "learning_rate": 9.563109782060768e-07, "loss": 0.0195, "step": 219930 }, { "epoch": 2.349911854265719, "grad_norm": 0.12920022010803223, "learning_rate": 9.563041096807126e-07, "loss": 0.0237, "step": 219940 }, { "epoch": 2.350018697579999, "grad_norm": 1.482977271080017, "learning_rate": 9.562972406401464e-07, "loss": 0.0343, "step": 219950 }, { "epoch": 2.3501255408942785, "grad_norm": 4.098820209503174, "learning_rate": 9.56290371084386e-07, "loss": 0.0229, "step": 219960 }, { "epoch": 2.350232384208558, "grad_norm": 0.23614604771137238, "learning_rate": 9.562835010134397e-07, "loss": 0.0915, "step": 219970 }, { "epoch": 2.350339227522838, "grad_norm": 7.362311840057373, "learning_rate": 9.56276630427315e-07, "loss": 0.0403, "step": 219980 }, { "epoch": 2.3504460708371173, "grad_norm": 2.1668028831481934, "learning_rate": 9.562697593260191e-07, "loss": 0.004, "step": 219990 }, { "epoch": 2.3505529141513968, "grad_norm": 0.026490770280361176, "learning_rate": 9.562628877095604e-07, "loss": 0.0076, "step": 220000 }, { "epoch": 2.3506597574656767, "grad_norm": 0.11304429918527603, "learning_rate": 9.562560155779465e-07, "loss": 0.0212, "step": 220010 }, { "epoch": 2.350766600779956, "grad_norm": 0.04888260364532471, "learning_rate": 9.562491429311853e-07, "loss": 0.0351, "step": 220020 }, { "epoch": 2.350873444094236, "grad_norm": 14.56126880645752, "learning_rate": 9.56242269769284e-07, "loss": 0.0097, "step": 220030 }, { "epoch": 2.3509802874085155, "grad_norm": 1.2570838928222656, "learning_rate": 9.56235396092251e-07, "loss": 0.0343, "step": 220040 }, { "epoch": 2.351087130722795, "grad_norm": 0.25381606817245483, "learning_rate": 9.562285219000937e-07, "loss": 0.0369, "step": 220050 }, { "epoch": 2.3511939740370744, "grad_norm": 0.08272074908018112, "learning_rate": 9.5622164719282e-07, "loss": 0.0057, "step": 220060 }, { "epoch": 2.3513008173513543, "grad_norm": 1.5646710395812988, "learning_rate": 9.562147719704376e-07, "loss": 0.0198, "step": 220070 }, { "epoch": 2.351407660665634, "grad_norm": 1.9888852834701538, "learning_rate": 9.562078962329543e-07, "loss": 0.0139, "step": 220080 }, { "epoch": 2.3515145039799137, "grad_norm": 0.14665086567401886, "learning_rate": 9.562010199803778e-07, "loss": 0.0301, "step": 220090 }, { "epoch": 2.351621347294193, "grad_norm": 0.842113733291626, "learning_rate": 9.561941432127161e-07, "loss": 0.0044, "step": 220100 }, { "epoch": 2.3517281906084726, "grad_norm": 5.355189323425293, "learning_rate": 9.561872659299767e-07, "loss": 0.0304, "step": 220110 }, { "epoch": 2.351835033922752, "grad_norm": 0.045289743691682816, "learning_rate": 9.561803881321673e-07, "loss": 0.0086, "step": 220120 }, { "epoch": 2.351941877237032, "grad_norm": 0.047185514122247696, "learning_rate": 9.56173509819296e-07, "loss": 0.0107, "step": 220130 }, { "epoch": 2.3520487205513114, "grad_norm": 0.04956859350204468, "learning_rate": 9.561666309913702e-07, "loss": 0.0092, "step": 220140 }, { "epoch": 2.3521555638655913, "grad_norm": 3.7127718925476074, "learning_rate": 9.56159751648398e-07, "loss": 0.0437, "step": 220150 }, { "epoch": 2.352262407179871, "grad_norm": 1.1454105377197266, "learning_rate": 9.561528717903869e-07, "loss": 0.0369, "step": 220160 }, { "epoch": 2.3523692504941502, "grad_norm": 0.03286421298980713, "learning_rate": 9.56145991417345e-07, "loss": 0.0112, "step": 220170 }, { "epoch": 2.3524760938084297, "grad_norm": 3.4385135173797607, "learning_rate": 9.561391105292796e-07, "loss": 0.0129, "step": 220180 }, { "epoch": 2.3525829371227096, "grad_norm": 0.029084792360663414, "learning_rate": 9.561322291261988e-07, "loss": 0.0026, "step": 220190 }, { "epoch": 2.352689780436989, "grad_norm": 0.09338255226612091, "learning_rate": 9.561253472081103e-07, "loss": 0.028, "step": 220200 }, { "epoch": 2.352796623751269, "grad_norm": 0.05134345218539238, "learning_rate": 9.561184647750217e-07, "loss": 0.0061, "step": 220210 }, { "epoch": 2.3529034670655484, "grad_norm": 0.009909610264003277, "learning_rate": 9.561115818269411e-07, "loss": 0.0075, "step": 220220 }, { "epoch": 2.353010310379828, "grad_norm": 4.5544867515563965, "learning_rate": 9.561046983638764e-07, "loss": 0.0308, "step": 220230 }, { "epoch": 2.3531171536941073, "grad_norm": 3.824420690536499, "learning_rate": 9.560978143858347e-07, "loss": 0.0324, "step": 220240 }, { "epoch": 2.3532239970083872, "grad_norm": 2.9344656467437744, "learning_rate": 9.56090929892824e-07, "loss": 0.0301, "step": 220250 }, { "epoch": 2.3533308403226667, "grad_norm": 0.01600729674100876, "learning_rate": 9.560840448848525e-07, "loss": 0.0094, "step": 220260 }, { "epoch": 2.3534376836369466, "grad_norm": 0.4069918394088745, "learning_rate": 9.560771593619277e-07, "loss": 0.0082, "step": 220270 }, { "epoch": 2.353544526951226, "grad_norm": 0.43224114179611206, "learning_rate": 9.560702733240571e-07, "loss": 0.0158, "step": 220280 }, { "epoch": 2.3536513702655055, "grad_norm": 2.5313830375671387, "learning_rate": 9.56063386771249e-07, "loss": 0.0113, "step": 220290 }, { "epoch": 2.3537582135797854, "grad_norm": 3.896496295928955, "learning_rate": 9.560564997035107e-07, "loss": 0.0087, "step": 220300 }, { "epoch": 2.353865056894065, "grad_norm": 0.7447588443756104, "learning_rate": 9.560496121208504e-07, "loss": 0.0124, "step": 220310 }, { "epoch": 2.3539719002083443, "grad_norm": 10.682368278503418, "learning_rate": 9.560427240232756e-07, "loss": 0.0458, "step": 220320 }, { "epoch": 2.3540787435226243, "grad_norm": 0.6563338041305542, "learning_rate": 9.560358354107942e-07, "loss": 0.0055, "step": 220330 }, { "epoch": 2.3541855868369037, "grad_norm": 1.596841812133789, "learning_rate": 9.560289462834138e-07, "loss": 0.0154, "step": 220340 }, { "epoch": 2.354292430151183, "grad_norm": 6.541121482849121, "learning_rate": 9.560220566411423e-07, "loss": 0.0279, "step": 220350 }, { "epoch": 2.354399273465463, "grad_norm": 9.037979125976562, "learning_rate": 9.560151664839876e-07, "loss": 0.0121, "step": 220360 }, { "epoch": 2.3545061167797425, "grad_norm": 0.002539279405027628, "learning_rate": 9.560082758119574e-07, "loss": 0.0018, "step": 220370 }, { "epoch": 2.354612960094022, "grad_norm": 3.996645212173462, "learning_rate": 9.560013846250593e-07, "loss": 0.0072, "step": 220380 }, { "epoch": 2.354719803408302, "grad_norm": 0.011749651283025742, "learning_rate": 9.559944929233012e-07, "loss": 0.0375, "step": 220390 }, { "epoch": 2.3548266467225814, "grad_norm": 1.2564916610717773, "learning_rate": 9.55987600706691e-07, "loss": 0.0258, "step": 220400 }, { "epoch": 2.354933490036861, "grad_norm": 2.3371706008911133, "learning_rate": 9.559807079752363e-07, "loss": 0.0114, "step": 220410 }, { "epoch": 2.3550403333511407, "grad_norm": 0.6751762628555298, "learning_rate": 9.55973814728945e-07, "loss": 0.0462, "step": 220420 }, { "epoch": 2.35514717666542, "grad_norm": 0.7242804169654846, "learning_rate": 9.55966920967825e-07, "loss": 0.0046, "step": 220430 }, { "epoch": 2.3552540199796996, "grad_norm": 1.749434232711792, "learning_rate": 9.559600266918837e-07, "loss": 0.032, "step": 220440 }, { "epoch": 2.3553608632939795, "grad_norm": 3.7993078231811523, "learning_rate": 9.55953131901129e-07, "loss": 0.0139, "step": 220450 }, { "epoch": 2.355467706608259, "grad_norm": 7.067577838897705, "learning_rate": 9.559462365955692e-07, "loss": 0.0268, "step": 220460 }, { "epoch": 2.3555745499225385, "grad_norm": 0.8116975426673889, "learning_rate": 9.559393407752116e-07, "loss": 0.0077, "step": 220470 }, { "epoch": 2.3556813932368184, "grad_norm": 0.4104655683040619, "learning_rate": 9.559324444400639e-07, "loss": 0.0252, "step": 220480 }, { "epoch": 2.355788236551098, "grad_norm": 3.13931941986084, "learning_rate": 9.55925547590134e-07, "loss": 0.0469, "step": 220490 }, { "epoch": 2.3558950798653773, "grad_norm": 0.13037963211536407, "learning_rate": 9.5591865022543e-07, "loss": 0.0149, "step": 220500 }, { "epoch": 2.356001923179657, "grad_norm": 3.3260958194732666, "learning_rate": 9.559117523459593e-07, "loss": 0.0194, "step": 220510 }, { "epoch": 2.3561087664939366, "grad_norm": 0.183258518576622, "learning_rate": 9.559048539517298e-07, "loss": 0.0117, "step": 220520 }, { "epoch": 2.356215609808216, "grad_norm": 1.1904881000518799, "learning_rate": 9.558979550427495e-07, "loss": 0.0199, "step": 220530 }, { "epoch": 2.356322453122496, "grad_norm": 0.011293825693428516, "learning_rate": 9.558910556190257e-07, "loss": 0.0263, "step": 220540 }, { "epoch": 2.3564292964367755, "grad_norm": 1.3614656925201416, "learning_rate": 9.558841556805667e-07, "loss": 0.0294, "step": 220550 }, { "epoch": 2.356536139751055, "grad_norm": 0.01720549166202545, "learning_rate": 9.558772552273799e-07, "loss": 0.0115, "step": 220560 }, { "epoch": 2.356642983065335, "grad_norm": 0.5591751933097839, "learning_rate": 9.558703542594736e-07, "loss": 0.0516, "step": 220570 }, { "epoch": 2.3567498263796143, "grad_norm": 0.017131395637989044, "learning_rate": 9.55863452776855e-07, "loss": 0.0124, "step": 220580 }, { "epoch": 2.3568566696938937, "grad_norm": 0.002059295540675521, "learning_rate": 9.558565507795322e-07, "loss": 0.0154, "step": 220590 }, { "epoch": 2.3569635130081736, "grad_norm": 0.00576005969196558, "learning_rate": 9.558496482675128e-07, "loss": 0.0305, "step": 220600 }, { "epoch": 2.357070356322453, "grad_norm": 1.7602697610855103, "learning_rate": 9.55842745240805e-07, "loss": 0.0056, "step": 220610 }, { "epoch": 2.3571771996367326, "grad_norm": 0.016540633514523506, "learning_rate": 9.558358416994164e-07, "loss": 0.0621, "step": 220620 }, { "epoch": 2.3572840429510125, "grad_norm": 0.49245548248291016, "learning_rate": 9.558289376433544e-07, "loss": 0.0012, "step": 220630 }, { "epoch": 2.357390886265292, "grad_norm": 0.1531422883272171, "learning_rate": 9.558220330726274e-07, "loss": 0.0219, "step": 220640 }, { "epoch": 2.3574977295795714, "grad_norm": 3.9578332901000977, "learning_rate": 9.558151279872427e-07, "loss": 0.0301, "step": 220650 }, { "epoch": 2.3576045728938513, "grad_norm": 0.5989718437194824, "learning_rate": 9.558082223872084e-07, "loss": 0.0142, "step": 220660 }, { "epoch": 2.3577114162081307, "grad_norm": 0.7786870002746582, "learning_rate": 9.558013162725324e-07, "loss": 0.012, "step": 220670 }, { "epoch": 2.35781825952241, "grad_norm": 0.3217974901199341, "learning_rate": 9.557944096432222e-07, "loss": 0.0004, "step": 220680 }, { "epoch": 2.35792510283669, "grad_norm": 0.0693332701921463, "learning_rate": 9.557875024992857e-07, "loss": 0.0115, "step": 220690 }, { "epoch": 2.3580319461509696, "grad_norm": 8.221353530883789, "learning_rate": 9.557805948407305e-07, "loss": 0.0266, "step": 220700 }, { "epoch": 2.358138789465249, "grad_norm": 2.839829206466675, "learning_rate": 9.55773686667565e-07, "loss": 0.0401, "step": 220710 }, { "epoch": 2.358245632779529, "grad_norm": 1.330359935760498, "learning_rate": 9.557667779797961e-07, "loss": 0.0081, "step": 220720 }, { "epoch": 2.3583524760938084, "grad_norm": 3.080583095550537, "learning_rate": 9.557598687774326e-07, "loss": 0.0243, "step": 220730 }, { "epoch": 2.3584593194080883, "grad_norm": 0.1900734156370163, "learning_rate": 9.557529590604815e-07, "loss": 0.0274, "step": 220740 }, { "epoch": 2.3585661627223677, "grad_norm": 2.9908320903778076, "learning_rate": 9.55746048828951e-07, "loss": 0.0209, "step": 220750 }, { "epoch": 2.358673006036647, "grad_norm": 0.01032230257987976, "learning_rate": 9.55739138082849e-07, "loss": 0.0121, "step": 220760 }, { "epoch": 2.3587798493509267, "grad_norm": 5.022513389587402, "learning_rate": 9.55732226822183e-07, "loss": 0.0146, "step": 220770 }, { "epoch": 2.3588866926652066, "grad_norm": 0.013192223384976387, "learning_rate": 9.557253150469608e-07, "loss": 0.0713, "step": 220780 }, { "epoch": 2.358993535979486, "grad_norm": 0.02437596581876278, "learning_rate": 9.557184027571906e-07, "loss": 0.008, "step": 220790 }, { "epoch": 2.359100379293766, "grad_norm": 0.05735119432210922, "learning_rate": 9.557114899528795e-07, "loss": 0.0025, "step": 220800 }, { "epoch": 2.3592072226080454, "grad_norm": 1.7879023551940918, "learning_rate": 9.557045766340362e-07, "loss": 0.0281, "step": 220810 }, { "epoch": 2.359314065922325, "grad_norm": 3.744539260864258, "learning_rate": 9.556976628006676e-07, "loss": 0.0051, "step": 220820 }, { "epoch": 2.3594209092366043, "grad_norm": 1.096780776977539, "learning_rate": 9.556907484527822e-07, "loss": 0.0379, "step": 220830 }, { "epoch": 2.359527752550884, "grad_norm": 0.15199746191501617, "learning_rate": 9.556838335903876e-07, "loss": 0.0244, "step": 220840 }, { "epoch": 2.3596345958651637, "grad_norm": 0.5392199158668518, "learning_rate": 9.556769182134914e-07, "loss": 0.028, "step": 220850 }, { "epoch": 2.3597414391794436, "grad_norm": 0.03528445214033127, "learning_rate": 9.556700023221017e-07, "loss": 0.0251, "step": 220860 }, { "epoch": 2.359848282493723, "grad_norm": 0.11028214544057846, "learning_rate": 9.55663085916226e-07, "loss": 0.0107, "step": 220870 }, { "epoch": 2.3599551258080025, "grad_norm": 0.13294392824172974, "learning_rate": 9.556561689958726e-07, "loss": 0.045, "step": 220880 }, { "epoch": 2.360061969122282, "grad_norm": 3.339301347732544, "learning_rate": 9.556492515610486e-07, "loss": 0.0419, "step": 220890 }, { "epoch": 2.360168812436562, "grad_norm": 0.008891255594789982, "learning_rate": 9.556423336117625e-07, "loss": 0.0091, "step": 220900 }, { "epoch": 2.3602756557508413, "grad_norm": 0.5156798362731934, "learning_rate": 9.556354151480218e-07, "loss": 0.0068, "step": 220910 }, { "epoch": 2.360382499065121, "grad_norm": 0.007903509773314, "learning_rate": 9.556284961698342e-07, "loss": 0.0067, "step": 220920 }, { "epoch": 2.3604893423794007, "grad_norm": 1.8526095151901245, "learning_rate": 9.556215766772076e-07, "loss": 0.0115, "step": 220930 }, { "epoch": 2.36059618569368, "grad_norm": 2.314577102661133, "learning_rate": 9.5561465667015e-07, "loss": 0.0095, "step": 220940 }, { "epoch": 2.3607030290079596, "grad_norm": 3.177788496017456, "learning_rate": 9.55607736148669e-07, "loss": 0.0255, "step": 220950 }, { "epoch": 2.3608098723222395, "grad_norm": 0.002707656240090728, "learning_rate": 9.556008151127726e-07, "loss": 0.0036, "step": 220960 }, { "epoch": 2.360916715636519, "grad_norm": 0.9193438291549683, "learning_rate": 9.555938935624684e-07, "loss": 0.0337, "step": 220970 }, { "epoch": 2.361023558950799, "grad_norm": 5.723071575164795, "learning_rate": 9.555869714977643e-07, "loss": 0.0086, "step": 220980 }, { "epoch": 2.3611304022650783, "grad_norm": 2.9741151332855225, "learning_rate": 9.55580048918668e-07, "loss": 0.009, "step": 220990 }, { "epoch": 2.3612372455793578, "grad_norm": 5.552261829376221, "learning_rate": 9.555731258251876e-07, "loss": 0.0129, "step": 221000 }, { "epoch": 2.3613440888936377, "grad_norm": 0.3937667906284332, "learning_rate": 9.555662022173307e-07, "loss": 0.0123, "step": 221010 }, { "epoch": 2.361450932207917, "grad_norm": 6.324759483337402, "learning_rate": 9.555592780951052e-07, "loss": 0.0244, "step": 221020 }, { "epoch": 2.3615577755221966, "grad_norm": 3.3105790615081787, "learning_rate": 9.55552353458519e-07, "loss": 0.0102, "step": 221030 }, { "epoch": 2.3616646188364765, "grad_norm": 0.25549858808517456, "learning_rate": 9.555454283075795e-07, "loss": 0.0114, "step": 221040 }, { "epoch": 2.361771462150756, "grad_norm": 0.036810193210840225, "learning_rate": 9.55538502642295e-07, "loss": 0.0135, "step": 221050 }, { "epoch": 2.3618783054650354, "grad_norm": 0.02845049649477005, "learning_rate": 9.555315764626733e-07, "loss": 0.0057, "step": 221060 }, { "epoch": 2.3619851487793153, "grad_norm": 0.04975132644176483, "learning_rate": 9.555246497687217e-07, "loss": 0.0185, "step": 221070 }, { "epoch": 2.3620919920935948, "grad_norm": 1.0842853784561157, "learning_rate": 9.555177225604488e-07, "loss": 0.0258, "step": 221080 }, { "epoch": 2.3621988354078742, "grad_norm": 0.019933443516492844, "learning_rate": 9.555107948378619e-07, "loss": 0.0167, "step": 221090 }, { "epoch": 2.362305678722154, "grad_norm": 0.025491956621408463, "learning_rate": 9.555038666009687e-07, "loss": 0.0045, "step": 221100 }, { "epoch": 2.3624125220364336, "grad_norm": 0.1517152190208435, "learning_rate": 9.554969378497776e-07, "loss": 0.0473, "step": 221110 }, { "epoch": 2.362519365350713, "grad_norm": 1.1724284887313843, "learning_rate": 9.554900085842958e-07, "loss": 0.006, "step": 221120 }, { "epoch": 2.362626208664993, "grad_norm": 2.818671464920044, "learning_rate": 9.554830788045316e-07, "loss": 0.0431, "step": 221130 }, { "epoch": 2.3627330519792724, "grad_norm": 6.938202381134033, "learning_rate": 9.554761485104924e-07, "loss": 0.0713, "step": 221140 }, { "epoch": 2.362839895293552, "grad_norm": 0.12474139779806137, "learning_rate": 9.554692177021865e-07, "loss": 0.0045, "step": 221150 }, { "epoch": 2.3629467386078318, "grad_norm": 1.4960989952087402, "learning_rate": 9.554622863796213e-07, "loss": 0.0383, "step": 221160 }, { "epoch": 2.3630535819221112, "grad_norm": 1.3043113946914673, "learning_rate": 9.554553545428048e-07, "loss": 0.0642, "step": 221170 }, { "epoch": 2.3631604252363907, "grad_norm": 0.06588584184646606, "learning_rate": 9.554484221917448e-07, "loss": 0.0153, "step": 221180 }, { "epoch": 2.3632672685506706, "grad_norm": 2.3733649253845215, "learning_rate": 9.554414893264495e-07, "loss": 0.0324, "step": 221190 }, { "epoch": 2.36337411186495, "grad_norm": 0.4879249334335327, "learning_rate": 9.554345559469262e-07, "loss": 0.0032, "step": 221200 }, { "epoch": 2.3634809551792295, "grad_norm": 0.3009794056415558, "learning_rate": 9.554276220531827e-07, "loss": 0.024, "step": 221210 }, { "epoch": 2.3635877984935094, "grad_norm": 0.02419714257121086, "learning_rate": 9.554206876452273e-07, "loss": 0.003, "step": 221220 }, { "epoch": 2.363694641807789, "grad_norm": 2.1678264141082764, "learning_rate": 9.554137527230675e-07, "loss": 0.0189, "step": 221230 }, { "epoch": 2.3638014851220683, "grad_norm": 0.0417688712477684, "learning_rate": 9.554068172867112e-07, "loss": 0.026, "step": 221240 }, { "epoch": 2.3639083284363482, "grad_norm": 6.714487075805664, "learning_rate": 9.55399881336166e-07, "loss": 0.0588, "step": 221250 }, { "epoch": 2.3640151717506277, "grad_norm": 0.032711710780858994, "learning_rate": 9.553929448714402e-07, "loss": 0.0019, "step": 221260 }, { "epoch": 2.364122015064907, "grad_norm": 6.043415546417236, "learning_rate": 9.553860078925414e-07, "loss": 0.0346, "step": 221270 }, { "epoch": 2.364228858379187, "grad_norm": 0.040340591222047806, "learning_rate": 9.553790703994773e-07, "loss": 0.0093, "step": 221280 }, { "epoch": 2.3643357016934665, "grad_norm": 1.6870697736740112, "learning_rate": 9.553721323922562e-07, "loss": 0.0654, "step": 221290 }, { "epoch": 2.364442545007746, "grad_norm": 0.5526773929595947, "learning_rate": 9.553651938708852e-07, "loss": 0.0239, "step": 221300 }, { "epoch": 2.364549388322026, "grad_norm": 1.6915483474731445, "learning_rate": 9.553582548353727e-07, "loss": 0.0169, "step": 221310 }, { "epoch": 2.3646562316363053, "grad_norm": 0.37784069776535034, "learning_rate": 9.553513152857263e-07, "loss": 0.0263, "step": 221320 }, { "epoch": 2.364763074950585, "grad_norm": 1.4729728698730469, "learning_rate": 9.55344375221954e-07, "loss": 0.0138, "step": 221330 }, { "epoch": 2.3648699182648647, "grad_norm": 0.024668190628290176, "learning_rate": 9.553374346440636e-07, "loss": 0.0556, "step": 221340 }, { "epoch": 2.364976761579144, "grad_norm": 0.2832886278629303, "learning_rate": 9.553304935520626e-07, "loss": 0.0121, "step": 221350 }, { "epoch": 2.3650836048934236, "grad_norm": 0.03106030449271202, "learning_rate": 9.553235519459592e-07, "loss": 0.0065, "step": 221360 }, { "epoch": 2.3651904482077035, "grad_norm": 0.07696413993835449, "learning_rate": 9.553166098257612e-07, "loss": 0.0452, "step": 221370 }, { "epoch": 2.365297291521983, "grad_norm": 0.0071031139232218266, "learning_rate": 9.553096671914765e-07, "loss": 0.0288, "step": 221380 }, { "epoch": 2.3654041348362624, "grad_norm": 0.12114384770393372, "learning_rate": 9.553027240431126e-07, "loss": 0.0092, "step": 221390 }, { "epoch": 2.3655109781505423, "grad_norm": 0.28175827860832214, "learning_rate": 9.552957803806775e-07, "loss": 0.0149, "step": 221400 }, { "epoch": 2.365617821464822, "grad_norm": 0.10735032707452774, "learning_rate": 9.552888362041793e-07, "loss": 0.0121, "step": 221410 }, { "epoch": 2.3657246647791013, "grad_norm": 2.547173500061035, "learning_rate": 9.552818915136258e-07, "loss": 0.0133, "step": 221420 }, { "epoch": 2.365831508093381, "grad_norm": 5.330565452575684, "learning_rate": 9.552749463090245e-07, "loss": 0.0386, "step": 221430 }, { "epoch": 2.3659383514076606, "grad_norm": 1.3641782999038696, "learning_rate": 9.552680005903832e-07, "loss": 0.0106, "step": 221440 }, { "epoch": 2.36604519472194, "grad_norm": 0.010044483467936516, "learning_rate": 9.552610543577102e-07, "loss": 0.0066, "step": 221450 }, { "epoch": 2.36615203803622, "grad_norm": 1.4761395454406738, "learning_rate": 9.552541076110132e-07, "loss": 0.0071, "step": 221460 }, { "epoch": 2.3662588813504994, "grad_norm": 4.232999324798584, "learning_rate": 9.552471603502997e-07, "loss": 0.0194, "step": 221470 }, { "epoch": 2.366365724664779, "grad_norm": 0.7769683003425598, "learning_rate": 9.55240212575578e-07, "loss": 0.0133, "step": 221480 }, { "epoch": 2.366472567979059, "grad_norm": 0.4842395484447479, "learning_rate": 9.552332642868557e-07, "loss": 0.0217, "step": 221490 }, { "epoch": 2.3665794112933383, "grad_norm": 0.36626142263412476, "learning_rate": 9.552263154841408e-07, "loss": 0.0212, "step": 221500 }, { "epoch": 2.366686254607618, "grad_norm": 3.573448419570923, "learning_rate": 9.552193661674408e-07, "loss": 0.0174, "step": 221510 }, { "epoch": 2.3667930979218976, "grad_norm": 3.209015369415283, "learning_rate": 9.55212416336764e-07, "loss": 0.056, "step": 221520 }, { "epoch": 2.366899941236177, "grad_norm": 4.275626182556152, "learning_rate": 9.552054659921176e-07, "loss": 0.0037, "step": 221530 }, { "epoch": 2.3670067845504565, "grad_norm": 0.23517517745494843, "learning_rate": 9.551985151335102e-07, "loss": 0.0182, "step": 221540 }, { "epoch": 2.3671136278647364, "grad_norm": 1.7451809644699097, "learning_rate": 9.551915637609494e-07, "loss": 0.0342, "step": 221550 }, { "epoch": 2.367220471179016, "grad_norm": 3.3742516040802, "learning_rate": 9.551846118744429e-07, "loss": 0.0168, "step": 221560 }, { "epoch": 2.367327314493296, "grad_norm": 0.19247186183929443, "learning_rate": 9.551776594739985e-07, "loss": 0.0346, "step": 221570 }, { "epoch": 2.3674341578075753, "grad_norm": 0.05326090380549431, "learning_rate": 9.551707065596243e-07, "loss": 0.0018, "step": 221580 }, { "epoch": 2.3675410011218547, "grad_norm": 0.008222309872508049, "learning_rate": 9.55163753131328e-07, "loss": 0.0192, "step": 221590 }, { "epoch": 2.367647844436134, "grad_norm": 5.487696170806885, "learning_rate": 9.551567991891173e-07, "loss": 0.0346, "step": 221600 }, { "epoch": 2.367754687750414, "grad_norm": 0.04345608875155449, "learning_rate": 9.551498447330004e-07, "loss": 0.022, "step": 221610 }, { "epoch": 2.3678615310646935, "grad_norm": 0.47467178106307983, "learning_rate": 9.55142889762985e-07, "loss": 0.0058, "step": 221620 }, { "epoch": 2.3679683743789735, "grad_norm": 1.1535838842391968, "learning_rate": 9.551359342790788e-07, "loss": 0.0084, "step": 221630 }, { "epoch": 2.368075217693253, "grad_norm": 1.2724674940109253, "learning_rate": 9.5512897828129e-07, "loss": 0.029, "step": 221640 }, { "epoch": 2.3681820610075324, "grad_norm": 0.01701412722468376, "learning_rate": 9.55122021769626e-07, "loss": 0.0109, "step": 221650 }, { "epoch": 2.368288904321812, "grad_norm": 0.9860835075378418, "learning_rate": 9.55115064744095e-07, "loss": 0.0117, "step": 221660 }, { "epoch": 2.3683957476360917, "grad_norm": 5.345808506011963, "learning_rate": 9.551081072047046e-07, "loss": 0.0207, "step": 221670 }, { "epoch": 2.368502590950371, "grad_norm": 0.5387328863143921, "learning_rate": 9.551011491514628e-07, "loss": 0.0248, "step": 221680 }, { "epoch": 2.368609434264651, "grad_norm": 4.9810333251953125, "learning_rate": 9.550941905843776e-07, "loss": 0.0252, "step": 221690 }, { "epoch": 2.3687162775789306, "grad_norm": 5.527661323547363, "learning_rate": 9.550872315034566e-07, "loss": 0.0237, "step": 221700 }, { "epoch": 2.36882312089321, "grad_norm": 0.14288318157196045, "learning_rate": 9.550802719087078e-07, "loss": 0.0199, "step": 221710 }, { "epoch": 2.3689299642074895, "grad_norm": 0.3659951090812683, "learning_rate": 9.550733118001393e-07, "loss": 0.015, "step": 221720 }, { "epoch": 2.3690368075217694, "grad_norm": 0.08529560267925262, "learning_rate": 9.550663511777583e-07, "loss": 0.0077, "step": 221730 }, { "epoch": 2.369143650836049, "grad_norm": 0.009398705326020718, "learning_rate": 9.550593900415733e-07, "loss": 0.0031, "step": 221740 }, { "epoch": 2.3692504941503287, "grad_norm": 0.004243825562298298, "learning_rate": 9.550524283915918e-07, "loss": 0.0043, "step": 221750 }, { "epoch": 2.369357337464608, "grad_norm": 2.181845188140869, "learning_rate": 9.550454662278217e-07, "loss": 0.0023, "step": 221760 }, { "epoch": 2.3694641807788877, "grad_norm": 1.4134918451309204, "learning_rate": 9.550385035502711e-07, "loss": 0.0169, "step": 221770 }, { "epoch": 2.3695710240931676, "grad_norm": 0.4276426136493683, "learning_rate": 9.550315403589476e-07, "loss": 0.0289, "step": 221780 }, { "epoch": 2.369677867407447, "grad_norm": 1.4916106462478638, "learning_rate": 9.55024576653859e-07, "loss": 0.0329, "step": 221790 }, { "epoch": 2.3697847107217265, "grad_norm": 0.028069650754332542, "learning_rate": 9.550176124350134e-07, "loss": 0.027, "step": 221800 }, { "epoch": 2.3698915540360064, "grad_norm": 0.46197834610939026, "learning_rate": 9.550106477024187e-07, "loss": 0.0018, "step": 221810 }, { "epoch": 2.369998397350286, "grad_norm": 0.034419067203998566, "learning_rate": 9.550036824560825e-07, "loss": 0.0204, "step": 221820 }, { "epoch": 2.3701052406645653, "grad_norm": 0.9914424419403076, "learning_rate": 9.549967166960128e-07, "loss": 0.0479, "step": 221830 }, { "epoch": 2.370212083978845, "grad_norm": 0.02263730764389038, "learning_rate": 9.549897504222176e-07, "loss": 0.0035, "step": 221840 }, { "epoch": 2.3703189272931247, "grad_norm": 1.8407953977584839, "learning_rate": 9.549827836347043e-07, "loss": 0.0179, "step": 221850 }, { "epoch": 2.370425770607404, "grad_norm": 0.0013409045059233904, "learning_rate": 9.549758163334816e-07, "loss": 0.0133, "step": 221860 }, { "epoch": 2.370532613921684, "grad_norm": 2.179979085922241, "learning_rate": 9.549688485185566e-07, "loss": 0.036, "step": 221870 }, { "epoch": 2.3706394572359635, "grad_norm": 0.4470939338207245, "learning_rate": 9.549618801899374e-07, "loss": 0.0111, "step": 221880 }, { "epoch": 2.370746300550243, "grad_norm": 1.7784955501556396, "learning_rate": 9.549549113476318e-07, "loss": 0.0045, "step": 221890 }, { "epoch": 2.370853143864523, "grad_norm": 0.010882227681577206, "learning_rate": 9.54947941991648e-07, "loss": 0.0117, "step": 221900 }, { "epoch": 2.3709599871788023, "grad_norm": 4.1648478507995605, "learning_rate": 9.549409721219936e-07, "loss": 0.0206, "step": 221910 }, { "epoch": 2.3710668304930818, "grad_norm": 0.0037484385538846254, "learning_rate": 9.549340017386764e-07, "loss": 0.0061, "step": 221920 }, { "epoch": 2.3711736738073617, "grad_norm": 14.008883476257324, "learning_rate": 9.549270308417044e-07, "loss": 0.0704, "step": 221930 }, { "epoch": 2.371280517121641, "grad_norm": 4.964456558227539, "learning_rate": 9.549200594310855e-07, "loss": 0.0065, "step": 221940 }, { "epoch": 2.3713873604359206, "grad_norm": 0.7025011777877808, "learning_rate": 9.549130875068273e-07, "loss": 0.0109, "step": 221950 }, { "epoch": 2.3714942037502005, "grad_norm": 4.153311729431152, "learning_rate": 9.549061150689383e-07, "loss": 0.0308, "step": 221960 }, { "epoch": 2.37160104706448, "grad_norm": 0.05250072479248047, "learning_rate": 9.548991421174257e-07, "loss": 0.0194, "step": 221970 }, { "epoch": 2.3717078903787594, "grad_norm": 3.210761308670044, "learning_rate": 9.548921686522977e-07, "loss": 0.0188, "step": 221980 }, { "epoch": 2.3718147336930393, "grad_norm": 2.243150472640991, "learning_rate": 9.548851946735621e-07, "loss": 0.0102, "step": 221990 }, { "epoch": 2.3719215770073188, "grad_norm": 0.05663586035370827, "learning_rate": 9.548782201812267e-07, "loss": 0.0331, "step": 222000 }, { "epoch": 2.372028420321598, "grad_norm": 0.12624184787273407, "learning_rate": 9.548712451752995e-07, "loss": 0.0215, "step": 222010 }, { "epoch": 2.372135263635878, "grad_norm": 0.2526925802230835, "learning_rate": 9.548642696557884e-07, "loss": 0.0189, "step": 222020 }, { "epoch": 2.3722421069501576, "grad_norm": 3.826070547103882, "learning_rate": 9.54857293622701e-07, "loss": 0.0226, "step": 222030 }, { "epoch": 2.372348950264437, "grad_norm": 0.09657701849937439, "learning_rate": 9.548503170760458e-07, "loss": 0.0292, "step": 222040 }, { "epoch": 2.372455793578717, "grad_norm": 0.13921144604682922, "learning_rate": 9.5484334001583e-07, "loss": 0.0222, "step": 222050 }, { "epoch": 2.3725626368929964, "grad_norm": 0.16220717132091522, "learning_rate": 9.548363624420618e-07, "loss": 0.0082, "step": 222060 }, { "epoch": 2.372669480207276, "grad_norm": 1.385046124458313, "learning_rate": 9.548293843547489e-07, "loss": 0.0145, "step": 222070 }, { "epoch": 2.3727763235215558, "grad_norm": 8.975683212280273, "learning_rate": 9.548224057538994e-07, "loss": 0.021, "step": 222080 }, { "epoch": 2.3728831668358352, "grad_norm": 0.03891769424080849, "learning_rate": 9.548154266395209e-07, "loss": 0.0245, "step": 222090 }, { "epoch": 2.3729900101501147, "grad_norm": 10.051462173461914, "learning_rate": 9.548084470116217e-07, "loss": 0.0237, "step": 222100 }, { "epoch": 2.3730968534643946, "grad_norm": 0.7086196541786194, "learning_rate": 9.548014668702094e-07, "loss": 0.0251, "step": 222110 }, { "epoch": 2.373203696778674, "grad_norm": 0.07166681438684464, "learning_rate": 9.54794486215292e-07, "loss": 0.0201, "step": 222120 }, { "epoch": 2.3733105400929535, "grad_norm": 0.008219127543270588, "learning_rate": 9.54787505046877e-07, "loss": 0.0317, "step": 222130 }, { "epoch": 2.3734173834072334, "grad_norm": 0.11557473987340927, "learning_rate": 9.547805233649729e-07, "loss": 0.0005, "step": 222140 }, { "epoch": 2.373524226721513, "grad_norm": 0.20759828388690948, "learning_rate": 9.54773541169587e-07, "loss": 0.0158, "step": 222150 }, { "epoch": 2.3736310700357923, "grad_norm": 0.04025823250412941, "learning_rate": 9.547665584607277e-07, "loss": 0.0407, "step": 222160 }, { "epoch": 2.3737379133500722, "grad_norm": 3.2094578742980957, "learning_rate": 9.547595752384022e-07, "loss": 0.0129, "step": 222170 }, { "epoch": 2.3738447566643517, "grad_norm": 0.02788723073899746, "learning_rate": 9.547525915026194e-07, "loss": 0.0109, "step": 222180 }, { "epoch": 2.373951599978631, "grad_norm": 0.03442298248410225, "learning_rate": 9.547456072533862e-07, "loss": 0.0106, "step": 222190 }, { "epoch": 2.374058443292911, "grad_norm": 0.6933099031448364, "learning_rate": 9.54738622490711e-07, "loss": 0.0305, "step": 222200 }, { "epoch": 2.3741652866071905, "grad_norm": 0.1453939527273178, "learning_rate": 9.547316372146016e-07, "loss": 0.0067, "step": 222210 }, { "epoch": 2.3742721299214704, "grad_norm": 2.57157826423645, "learning_rate": 9.547246514250658e-07, "loss": 0.0196, "step": 222220 }, { "epoch": 2.37437897323575, "grad_norm": 0.4130864441394806, "learning_rate": 9.547176651221116e-07, "loss": 0.007, "step": 222230 }, { "epoch": 2.3744858165500293, "grad_norm": 0.001053337473422289, "learning_rate": 9.547106783057468e-07, "loss": 0.0092, "step": 222240 }, { "epoch": 2.374592659864309, "grad_norm": 0.06219961866736412, "learning_rate": 9.547036909759793e-07, "loss": 0.0193, "step": 222250 }, { "epoch": 2.3746995031785887, "grad_norm": 2.5961756706237793, "learning_rate": 9.546967031328173e-07, "loss": 0.0192, "step": 222260 }, { "epoch": 2.374806346492868, "grad_norm": 0.01130862720310688, "learning_rate": 9.546897147762681e-07, "loss": 0.0112, "step": 222270 }, { "epoch": 2.374913189807148, "grad_norm": 5.773796558380127, "learning_rate": 9.5468272590634e-07, "loss": 0.0175, "step": 222280 }, { "epoch": 2.3750200331214275, "grad_norm": 2.83972430229187, "learning_rate": 9.546757365230407e-07, "loss": 0.0197, "step": 222290 }, { "epoch": 2.375126876435707, "grad_norm": 0.7050483822822571, "learning_rate": 9.546687466263782e-07, "loss": 0.0434, "step": 222300 }, { "epoch": 2.3752337197499864, "grad_norm": 0.013330229558050632, "learning_rate": 9.546617562163603e-07, "loss": 0.0104, "step": 222310 }, { "epoch": 2.3753405630642663, "grad_norm": 2.24025821685791, "learning_rate": 9.546547652929951e-07, "loss": 0.0091, "step": 222320 }, { "epoch": 2.375447406378546, "grad_norm": 1.4797016382217407, "learning_rate": 9.546477738562903e-07, "loss": 0.0338, "step": 222330 }, { "epoch": 2.3755542496928257, "grad_norm": 0.9732669591903687, "learning_rate": 9.546407819062538e-07, "loss": 0.0257, "step": 222340 }, { "epoch": 2.375661093007105, "grad_norm": 0.1936020404100418, "learning_rate": 9.546337894428938e-07, "loss": 0.0028, "step": 222350 }, { "epoch": 2.3757679363213846, "grad_norm": 2.4962730407714844, "learning_rate": 9.546267964662177e-07, "loss": 0.0054, "step": 222360 }, { "epoch": 2.375874779635664, "grad_norm": 0.03617436811327934, "learning_rate": 9.546198029762336e-07, "loss": 0.0237, "step": 222370 }, { "epoch": 2.375981622949944, "grad_norm": 2.390399932861328, "learning_rate": 9.546128089729497e-07, "loss": 0.0104, "step": 222380 }, { "epoch": 2.3760884662642234, "grad_norm": 0.05808643996715546, "learning_rate": 9.546058144563734e-07, "loss": 0.0391, "step": 222390 }, { "epoch": 2.3761953095785033, "grad_norm": 2.4397599697113037, "learning_rate": 9.545988194265128e-07, "loss": 0.0184, "step": 222400 }, { "epoch": 2.376302152892783, "grad_norm": 0.016666606068611145, "learning_rate": 9.54591823883376e-07, "loss": 0.0142, "step": 222410 }, { "epoch": 2.3764089962070623, "grad_norm": 3.010389566421509, "learning_rate": 9.545848278269706e-07, "loss": 0.0128, "step": 222420 }, { "epoch": 2.3765158395213417, "grad_norm": 0.5575446486473083, "learning_rate": 9.545778312573046e-07, "loss": 0.0316, "step": 222430 }, { "epoch": 2.3766226828356216, "grad_norm": 3.8651387691497803, "learning_rate": 9.545708341743859e-07, "loss": 0.0188, "step": 222440 }, { "epoch": 2.376729526149901, "grad_norm": 0.010785650461912155, "learning_rate": 9.545638365782224e-07, "loss": 0.0127, "step": 222450 }, { "epoch": 2.376836369464181, "grad_norm": 1.1989142894744873, "learning_rate": 9.545568384688222e-07, "loss": 0.1256, "step": 222460 }, { "epoch": 2.3769432127784604, "grad_norm": 4.179763317108154, "learning_rate": 9.545498398461929e-07, "loss": 0.003, "step": 222470 }, { "epoch": 2.37705005609274, "grad_norm": 0.05913218855857849, "learning_rate": 9.545428407103425e-07, "loss": 0.0773, "step": 222480 }, { "epoch": 2.37715689940702, "grad_norm": 10.623109817504883, "learning_rate": 9.545358410612788e-07, "loss": 0.0433, "step": 222490 }, { "epoch": 2.3772637427212993, "grad_norm": 0.05388617888092995, "learning_rate": 9.545288408990102e-07, "loss": 0.0192, "step": 222500 }, { "epoch": 2.3773705860355787, "grad_norm": 4.735682964324951, "learning_rate": 9.54521840223544e-07, "loss": 0.0725, "step": 222510 }, { "epoch": 2.3774774293498586, "grad_norm": 3.481515407562256, "learning_rate": 9.545148390348883e-07, "loss": 0.0388, "step": 222520 }, { "epoch": 2.377584272664138, "grad_norm": 1.4052447080612183, "learning_rate": 9.545078373330511e-07, "loss": 0.0285, "step": 222530 }, { "epoch": 2.3776911159784175, "grad_norm": 4.992589473724365, "learning_rate": 9.545008351180403e-07, "loss": 0.0295, "step": 222540 }, { "epoch": 2.3777979592926974, "grad_norm": 0.7282933592796326, "learning_rate": 9.544938323898636e-07, "loss": 0.0157, "step": 222550 }, { "epoch": 2.377904802606977, "grad_norm": 0.40078699588775635, "learning_rate": 9.544868291485292e-07, "loss": 0.0057, "step": 222560 }, { "epoch": 2.3780116459212564, "grad_norm": 1.6939659118652344, "learning_rate": 9.54479825394045e-07, "loss": 0.0366, "step": 222570 }, { "epoch": 2.3781184892355363, "grad_norm": 7.2212958335876465, "learning_rate": 9.544728211264185e-07, "loss": 0.0327, "step": 222580 }, { "epoch": 2.3782253325498157, "grad_norm": 7.717949390411377, "learning_rate": 9.54465816345658e-07, "loss": 0.0208, "step": 222590 }, { "epoch": 2.378332175864095, "grad_norm": 8.652482986450195, "learning_rate": 9.544588110517714e-07, "loss": 0.0263, "step": 222600 }, { "epoch": 2.378439019178375, "grad_norm": 0.015359957702457905, "learning_rate": 9.544518052447664e-07, "loss": 0.0041, "step": 222610 }, { "epoch": 2.3785458624926545, "grad_norm": 0.00307118589989841, "learning_rate": 9.54444798924651e-07, "loss": 0.0045, "step": 222620 }, { "epoch": 2.378652705806934, "grad_norm": 2.091360330581665, "learning_rate": 9.544377920914332e-07, "loss": 0.0038, "step": 222630 }, { "epoch": 2.378759549121214, "grad_norm": 7.9678850173950195, "learning_rate": 9.544307847451207e-07, "loss": 0.0492, "step": 222640 }, { "epoch": 2.3788663924354934, "grad_norm": 0.004844872280955315, "learning_rate": 9.544237768857218e-07, "loss": 0.0259, "step": 222650 }, { "epoch": 2.378973235749773, "grad_norm": 3.9159610271453857, "learning_rate": 9.54416768513244e-07, "loss": 0.0202, "step": 222660 }, { "epoch": 2.3790800790640527, "grad_norm": 4.412619590759277, "learning_rate": 9.544097596276953e-07, "loss": 0.0343, "step": 222670 }, { "epoch": 2.379186922378332, "grad_norm": 4.13296365737915, "learning_rate": 9.544027502290836e-07, "loss": 0.0235, "step": 222680 }, { "epoch": 2.3792937656926116, "grad_norm": 0.11077473312616348, "learning_rate": 9.54395740317417e-07, "loss": 0.0088, "step": 222690 }, { "epoch": 2.3794006090068915, "grad_norm": 15.250276565551758, "learning_rate": 9.543887298927033e-07, "loss": 0.0363, "step": 222700 }, { "epoch": 2.379507452321171, "grad_norm": 0.01229096669703722, "learning_rate": 9.543817189549505e-07, "loss": 0.0146, "step": 222710 }, { "epoch": 2.3796142956354505, "grad_norm": 4.100192546844482, "learning_rate": 9.543747075041665e-07, "loss": 0.0572, "step": 222720 }, { "epoch": 2.3797211389497304, "grad_norm": 0.014526845887303352, "learning_rate": 9.54367695540359e-07, "loss": 0.0275, "step": 222730 }, { "epoch": 2.37982798226401, "grad_norm": 4.743135929107666, "learning_rate": 9.543606830635363e-07, "loss": 0.008, "step": 222740 }, { "epoch": 2.3799348255782893, "grad_norm": 1.8092645406723022, "learning_rate": 9.543536700737058e-07, "loss": 0.0234, "step": 222750 }, { "epoch": 2.380041668892569, "grad_norm": 0.03047831542789936, "learning_rate": 9.54346656570876e-07, "loss": 0.0012, "step": 222760 }, { "epoch": 2.3801485122068486, "grad_norm": 10.427936553955078, "learning_rate": 9.543396425550545e-07, "loss": 0.0892, "step": 222770 }, { "epoch": 2.380255355521128, "grad_norm": 11.040499687194824, "learning_rate": 9.543326280262492e-07, "loss": 0.0081, "step": 222780 }, { "epoch": 2.380362198835408, "grad_norm": 0.2066071480512619, "learning_rate": 9.54325612984468e-07, "loss": 0.0133, "step": 222790 }, { "epoch": 2.3804690421496875, "grad_norm": 3.266552448272705, "learning_rate": 9.543185974297188e-07, "loss": 0.019, "step": 222800 }, { "epoch": 2.380575885463967, "grad_norm": 0.14922353625297546, "learning_rate": 9.543115813620097e-07, "loss": 0.0174, "step": 222810 }, { "epoch": 2.380682728778247, "grad_norm": 0.05535823106765747, "learning_rate": 9.543045647813487e-07, "loss": 0.0171, "step": 222820 }, { "epoch": 2.3807895720925263, "grad_norm": 0.9313980937004089, "learning_rate": 9.542975476877433e-07, "loss": 0.0202, "step": 222830 }, { "epoch": 2.3808964154068057, "grad_norm": 1.4067115783691406, "learning_rate": 9.54290530081202e-07, "loss": 0.0326, "step": 222840 }, { "epoch": 2.3810032587210856, "grad_norm": 10.497939109802246, "learning_rate": 9.542835119617321e-07, "loss": 0.0277, "step": 222850 }, { "epoch": 2.381110102035365, "grad_norm": 1.6694531440734863, "learning_rate": 9.54276493329342e-07, "loss": 0.0248, "step": 222860 }, { "epoch": 2.3812169453496446, "grad_norm": 0.0317678339779377, "learning_rate": 9.542694741840394e-07, "loss": 0.0217, "step": 222870 }, { "epoch": 2.3813237886639245, "grad_norm": 0.4646080732345581, "learning_rate": 9.542624545258324e-07, "loss": 0.0067, "step": 222880 }, { "epoch": 2.381430631978204, "grad_norm": 0.02711561881005764, "learning_rate": 9.542554343547285e-07, "loss": 0.0341, "step": 222890 }, { "epoch": 2.3815374752924834, "grad_norm": 3.2340049743652344, "learning_rate": 9.542484136707362e-07, "loss": 0.0118, "step": 222900 }, { "epoch": 2.3816443186067633, "grad_norm": 0.691987931728363, "learning_rate": 9.54241392473863e-07, "loss": 0.0068, "step": 222910 }, { "epoch": 2.3817511619210427, "grad_norm": 7.170839786529541, "learning_rate": 9.542343707641172e-07, "loss": 0.0255, "step": 222920 }, { "epoch": 2.381858005235322, "grad_norm": 6.1058430671691895, "learning_rate": 9.542273485415063e-07, "loss": 0.0312, "step": 222930 }, { "epoch": 2.381964848549602, "grad_norm": 0.09458296746015549, "learning_rate": 9.542203258060385e-07, "loss": 0.0199, "step": 222940 }, { "epoch": 2.3820716918638816, "grad_norm": 0.766958475112915, "learning_rate": 9.542133025577217e-07, "loss": 0.0242, "step": 222950 }, { "epoch": 2.382178535178161, "grad_norm": 0.131726935505867, "learning_rate": 9.542062787965638e-07, "loss": 0.0199, "step": 222960 }, { "epoch": 2.382285378492441, "grad_norm": 0.030789805576205254, "learning_rate": 9.541992545225727e-07, "loss": 0.0089, "step": 222970 }, { "epoch": 2.3823922218067204, "grad_norm": 0.13766534626483917, "learning_rate": 9.541922297357565e-07, "loss": 0.0132, "step": 222980 }, { "epoch": 2.3824990651210003, "grad_norm": 0.38978803157806396, "learning_rate": 9.541852044361227e-07, "loss": 0.0377, "step": 222990 }, { "epoch": 2.3826059084352798, "grad_norm": 0.24693676829338074, "learning_rate": 9.541781786236798e-07, "loss": 0.0636, "step": 223000 }, { "epoch": 2.382712751749559, "grad_norm": 0.2838206887245178, "learning_rate": 9.541711522984352e-07, "loss": 0.0143, "step": 223010 }, { "epoch": 2.3828195950638387, "grad_norm": 0.004579327069222927, "learning_rate": 9.541641254603974e-07, "loss": 0.0232, "step": 223020 }, { "epoch": 2.3829264383781186, "grad_norm": 8.099272727966309, "learning_rate": 9.541570981095738e-07, "loss": 0.0396, "step": 223030 }, { "epoch": 2.383033281692398, "grad_norm": 6.499846458435059, "learning_rate": 9.541500702459727e-07, "loss": 0.1007, "step": 223040 }, { "epoch": 2.383140125006678, "grad_norm": 2.5241661071777344, "learning_rate": 9.541430418696018e-07, "loss": 0.0331, "step": 223050 }, { "epoch": 2.3832469683209574, "grad_norm": 2.496610164642334, "learning_rate": 9.54136012980469e-07, "loss": 0.0092, "step": 223060 }, { "epoch": 2.383353811635237, "grad_norm": 0.2739003002643585, "learning_rate": 9.541289835785827e-07, "loss": 0.113, "step": 223070 }, { "epoch": 2.3834606549495163, "grad_norm": 4.718664646148682, "learning_rate": 9.541219536639504e-07, "loss": 0.0411, "step": 223080 }, { "epoch": 2.383567498263796, "grad_norm": 2.598205804824829, "learning_rate": 9.5411492323658e-07, "loss": 0.0095, "step": 223090 }, { "epoch": 2.3836743415780757, "grad_norm": 0.05503077432513237, "learning_rate": 9.541078922964797e-07, "loss": 0.0318, "step": 223100 }, { "epoch": 2.3837811848923556, "grad_norm": 0.2947927415370941, "learning_rate": 9.541008608436571e-07, "loss": 0.0047, "step": 223110 }, { "epoch": 2.383888028206635, "grad_norm": 7.10610294342041, "learning_rate": 9.540938288781207e-07, "loss": 0.0389, "step": 223120 }, { "epoch": 2.3839948715209145, "grad_norm": 0.7095021605491638, "learning_rate": 9.540867963998778e-07, "loss": 0.0068, "step": 223130 }, { "epoch": 2.384101714835194, "grad_norm": 0.009284589439630508, "learning_rate": 9.540797634089368e-07, "loss": 0.0108, "step": 223140 }, { "epoch": 2.384208558149474, "grad_norm": 0.38566598296165466, "learning_rate": 9.540727299053054e-07, "loss": 0.0248, "step": 223150 }, { "epoch": 2.3843154014637533, "grad_norm": 3.7931010723114014, "learning_rate": 9.540656958889916e-07, "loss": 0.0197, "step": 223160 }, { "epoch": 2.384422244778033, "grad_norm": 1.3442034721374512, "learning_rate": 9.540586613600034e-07, "loss": 0.0169, "step": 223170 }, { "epoch": 2.3845290880923127, "grad_norm": 0.7235444188117981, "learning_rate": 9.540516263183486e-07, "loss": 0.0131, "step": 223180 }, { "epoch": 2.384635931406592, "grad_norm": 1.5616397857666016, "learning_rate": 9.540445907640354e-07, "loss": 0.0548, "step": 223190 }, { "epoch": 2.3847427747208716, "grad_norm": 1.6152496337890625, "learning_rate": 9.540375546970716e-07, "loss": 0.0248, "step": 223200 }, { "epoch": 2.3848496180351515, "grad_norm": 3.3496716022491455, "learning_rate": 9.54030518117465e-07, "loss": 0.0167, "step": 223210 }, { "epoch": 2.384956461349431, "grad_norm": 1.9034827947616577, "learning_rate": 9.540234810252238e-07, "loss": 0.021, "step": 223220 }, { "epoch": 2.385063304663711, "grad_norm": 0.016671184450387955, "learning_rate": 9.540164434203557e-07, "loss": 0.0458, "step": 223230 }, { "epoch": 2.3851701479779903, "grad_norm": 0.012357054278254509, "learning_rate": 9.54009405302869e-07, "loss": 0.0166, "step": 223240 }, { "epoch": 2.3852769912922698, "grad_norm": 0.49083203077316284, "learning_rate": 9.540023666727712e-07, "loss": 0.0127, "step": 223250 }, { "epoch": 2.3853838346065497, "grad_norm": 0.043352726846933365, "learning_rate": 9.539953275300704e-07, "loss": 0.0181, "step": 223260 }, { "epoch": 2.385490677920829, "grad_norm": 3.4357917308807373, "learning_rate": 9.539882878747748e-07, "loss": 0.0489, "step": 223270 }, { "epoch": 2.3855975212351086, "grad_norm": 0.9957913160324097, "learning_rate": 9.539812477068921e-07, "loss": 0.0102, "step": 223280 }, { "epoch": 2.3857043645493885, "grad_norm": 1.6910693645477295, "learning_rate": 9.539742070264302e-07, "loss": 0.0221, "step": 223290 }, { "epoch": 2.385811207863668, "grad_norm": 1.6001219749450684, "learning_rate": 9.539671658333975e-07, "loss": 0.0027, "step": 223300 }, { "epoch": 2.3859180511779474, "grad_norm": 0.002142613520845771, "learning_rate": 9.539601241278013e-07, "loss": 0.0023, "step": 223310 }, { "epoch": 2.3860248944922273, "grad_norm": 0.12118703126907349, "learning_rate": 9.539530819096497e-07, "loss": 0.0214, "step": 223320 }, { "epoch": 2.386131737806507, "grad_norm": 0.43533286452293396, "learning_rate": 9.53946039178951e-07, "loss": 0.0183, "step": 223330 }, { "epoch": 2.3862385811207862, "grad_norm": 1.4081388711929321, "learning_rate": 9.53938995935713e-07, "loss": 0.0096, "step": 223340 }, { "epoch": 2.386345424435066, "grad_norm": 0.48395007848739624, "learning_rate": 9.539319521799436e-07, "loss": 0.0405, "step": 223350 }, { "epoch": 2.3864522677493456, "grad_norm": 0.019922848790884018, "learning_rate": 9.539249079116509e-07, "loss": 0.0155, "step": 223360 }, { "epoch": 2.386559111063625, "grad_norm": 2.0180392265319824, "learning_rate": 9.539178631308426e-07, "loss": 0.0242, "step": 223370 }, { "epoch": 2.386665954377905, "grad_norm": 9.509872436523438, "learning_rate": 9.539108178375267e-07, "loss": 0.0155, "step": 223380 }, { "epoch": 2.3867727976921844, "grad_norm": 1.1034404039382935, "learning_rate": 9.539037720317114e-07, "loss": 0.0665, "step": 223390 }, { "epoch": 2.386879641006464, "grad_norm": 0.004670687019824982, "learning_rate": 9.538967257134044e-07, "loss": 0.0089, "step": 223400 }, { "epoch": 2.386986484320744, "grad_norm": 0.015533635392785072, "learning_rate": 9.538896788826135e-07, "loss": 0.0155, "step": 223410 }, { "epoch": 2.3870933276350232, "grad_norm": 4.200006008148193, "learning_rate": 9.538826315393473e-07, "loss": 0.0109, "step": 223420 }, { "epoch": 2.3872001709493027, "grad_norm": 0.021104605868458748, "learning_rate": 9.538755836836131e-07, "loss": 0.0055, "step": 223430 }, { "epoch": 2.3873070142635826, "grad_norm": 8.144283294677734, "learning_rate": 9.538685353154192e-07, "loss": 0.0028, "step": 223440 }, { "epoch": 2.387413857577862, "grad_norm": 1.7030999660491943, "learning_rate": 9.538614864347735e-07, "loss": 0.0154, "step": 223450 }, { "epoch": 2.3875207008921415, "grad_norm": 5.755122661590576, "learning_rate": 9.538544370416836e-07, "loss": 0.026, "step": 223460 }, { "epoch": 2.3876275442064214, "grad_norm": 0.2613069415092468, "learning_rate": 9.538473871361581e-07, "loss": 0.0153, "step": 223470 }, { "epoch": 2.387734387520701, "grad_norm": 1.0209534168243408, "learning_rate": 9.538403367182046e-07, "loss": 0.0037, "step": 223480 }, { "epoch": 2.3878412308349803, "grad_norm": 0.0014624171890318394, "learning_rate": 9.538332857878313e-07, "loss": 0.0046, "step": 223490 }, { "epoch": 2.3879480741492602, "grad_norm": 0.00820097979158163, "learning_rate": 9.538262343450456e-07, "loss": 0.0253, "step": 223500 }, { "epoch": 2.3880549174635397, "grad_norm": 0.05092862620949745, "learning_rate": 9.53819182389856e-07, "loss": 0.008, "step": 223510 }, { "epoch": 2.388161760777819, "grad_norm": 0.14107970893383026, "learning_rate": 9.538121299222704e-07, "loss": 0.0273, "step": 223520 }, { "epoch": 2.388268604092099, "grad_norm": 7.859539031982422, "learning_rate": 9.538050769422965e-07, "loss": 0.0167, "step": 223530 }, { "epoch": 2.3883754474063785, "grad_norm": 5.945033073425293, "learning_rate": 9.537980234499426e-07, "loss": 0.0475, "step": 223540 }, { "epoch": 2.388482290720658, "grad_norm": 0.2843545973300934, "learning_rate": 9.537909694452162e-07, "loss": 0.0171, "step": 223550 }, { "epoch": 2.388589134034938, "grad_norm": 1.3124157190322876, "learning_rate": 9.537839149281257e-07, "loss": 0.002, "step": 223560 }, { "epoch": 2.3886959773492173, "grad_norm": 0.04985637590289116, "learning_rate": 9.53776859898679e-07, "loss": 0.0084, "step": 223570 }, { "epoch": 2.388802820663497, "grad_norm": 1.7399059534072876, "learning_rate": 9.537698043568838e-07, "loss": 0.0123, "step": 223580 }, { "epoch": 2.3889096639777767, "grad_norm": 15.605218887329102, "learning_rate": 9.537627483027485e-07, "loss": 0.0613, "step": 223590 }, { "epoch": 2.389016507292056, "grad_norm": 5.347483158111572, "learning_rate": 9.537556917362806e-07, "loss": 0.0478, "step": 223600 }, { "epoch": 2.3891233506063356, "grad_norm": 0.023063689470291138, "learning_rate": 9.537486346574883e-07, "loss": 0.0124, "step": 223610 }, { "epoch": 2.3892301939206155, "grad_norm": 1.4632282257080078, "learning_rate": 9.537415770663794e-07, "loss": 0.005, "step": 223620 }, { "epoch": 2.389337037234895, "grad_norm": 0.005757145583629608, "learning_rate": 9.537345189629623e-07, "loss": 0.028, "step": 223630 }, { "epoch": 2.3894438805491744, "grad_norm": 4.1931633949279785, "learning_rate": 9.537274603472445e-07, "loss": 0.0023, "step": 223640 }, { "epoch": 2.3895507238634544, "grad_norm": 2.9779727458953857, "learning_rate": 9.537204012192342e-07, "loss": 0.03, "step": 223650 }, { "epoch": 2.389657567177734, "grad_norm": 0.6808603405952454, "learning_rate": 9.537133415789393e-07, "loss": 0.0056, "step": 223660 }, { "epoch": 2.3897644104920133, "grad_norm": 0.04360727220773697, "learning_rate": 9.53706281426368e-07, "loss": 0.0198, "step": 223670 }, { "epoch": 2.389871253806293, "grad_norm": 0.1385156214237213, "learning_rate": 9.536992207615277e-07, "loss": 0.0141, "step": 223680 }, { "epoch": 2.3899780971205726, "grad_norm": 1.6483023166656494, "learning_rate": 9.53692159584427e-07, "loss": 0.006, "step": 223690 }, { "epoch": 2.3900849404348525, "grad_norm": 5.22450590133667, "learning_rate": 9.536850978950736e-07, "loss": 0.0151, "step": 223700 }, { "epoch": 2.390191783749132, "grad_norm": 0.007510367315262556, "learning_rate": 9.536780356934753e-07, "loss": 0.0074, "step": 223710 }, { "epoch": 2.3902986270634115, "grad_norm": 7.060265064239502, "learning_rate": 9.536709729796405e-07, "loss": 0.0193, "step": 223720 }, { "epoch": 2.390405470377691, "grad_norm": 4.863270282745361, "learning_rate": 9.536639097535768e-07, "loss": 0.0096, "step": 223730 }, { "epoch": 2.390512313691971, "grad_norm": 4.2106032371521, "learning_rate": 9.536568460152923e-07, "loss": 0.0427, "step": 223740 }, { "epoch": 2.3906191570062503, "grad_norm": 4.0728254318237305, "learning_rate": 9.536497817647951e-07, "loss": 0.0102, "step": 223750 }, { "epoch": 2.39072600032053, "grad_norm": 0.01328237447887659, "learning_rate": 9.536427170020928e-07, "loss": 0.0106, "step": 223760 }, { "epoch": 2.3908328436348096, "grad_norm": 4.2764129638671875, "learning_rate": 9.53635651727194e-07, "loss": 0.0171, "step": 223770 }, { "epoch": 2.390939686949089, "grad_norm": 0.008329237811267376, "learning_rate": 9.53628585940106e-07, "loss": 0.0205, "step": 223780 }, { "epoch": 2.3910465302633686, "grad_norm": 0.019603105261921883, "learning_rate": 9.536215196408372e-07, "loss": 0.0089, "step": 223790 }, { "epoch": 2.3911533735776485, "grad_norm": 0.2007289081811905, "learning_rate": 9.536144528293955e-07, "loss": 0.0788, "step": 223800 }, { "epoch": 2.391260216891928, "grad_norm": 0.14694738388061523, "learning_rate": 9.536073855057886e-07, "loss": 0.0058, "step": 223810 }, { "epoch": 2.391367060206208, "grad_norm": 4.195113658905029, "learning_rate": 9.536003176700251e-07, "loss": 0.0419, "step": 223820 }, { "epoch": 2.3914739035204873, "grad_norm": 0.8645177483558655, "learning_rate": 9.535932493221124e-07, "loss": 0.0107, "step": 223830 }, { "epoch": 2.3915807468347667, "grad_norm": 0.2216021865606308, "learning_rate": 9.535861804620587e-07, "loss": 0.007, "step": 223840 }, { "epoch": 2.391687590149046, "grad_norm": 0.050961341708898544, "learning_rate": 9.535791110898721e-07, "loss": 0.0083, "step": 223850 }, { "epoch": 2.391794433463326, "grad_norm": 0.057983044534921646, "learning_rate": 9.535720412055604e-07, "loss": 0.0261, "step": 223860 }, { "epoch": 2.3919012767776056, "grad_norm": 0.021333253011107445, "learning_rate": 9.535649708091316e-07, "loss": 0.0266, "step": 223870 }, { "epoch": 2.3920081200918855, "grad_norm": 8.819424629211426, "learning_rate": 9.535578999005935e-07, "loss": 0.0386, "step": 223880 }, { "epoch": 2.392114963406165, "grad_norm": 5.191520690917969, "learning_rate": 9.535508284799545e-07, "loss": 0.0304, "step": 223890 }, { "epoch": 2.3922218067204444, "grad_norm": 0.030741408467292786, "learning_rate": 9.535437565472224e-07, "loss": 0.0463, "step": 223900 }, { "epoch": 2.392328650034724, "grad_norm": 0.7928058505058289, "learning_rate": 9.535366841024053e-07, "loss": 0.0398, "step": 223910 }, { "epoch": 2.3924354933490037, "grad_norm": 2.0183565616607666, "learning_rate": 9.535296111455108e-07, "loss": 0.035, "step": 223920 }, { "epoch": 2.392542336663283, "grad_norm": 0.2688988149166107, "learning_rate": 9.535225376765472e-07, "loss": 0.0255, "step": 223930 }, { "epoch": 2.392649179977563, "grad_norm": 0.05924579128623009, "learning_rate": 9.535154636955225e-07, "loss": 0.076, "step": 223940 }, { "epoch": 2.3927560232918426, "grad_norm": 0.023667501285672188, "learning_rate": 9.535083892024447e-07, "loss": 0.0374, "step": 223950 }, { "epoch": 2.392862866606122, "grad_norm": 1.470007061958313, "learning_rate": 9.535013141973215e-07, "loss": 0.0189, "step": 223960 }, { "epoch": 2.392969709920402, "grad_norm": 0.034980956465005875, "learning_rate": 9.534942386801612e-07, "loss": 0.0098, "step": 223970 }, { "epoch": 2.3930765532346814, "grad_norm": 0.032809048891067505, "learning_rate": 9.534871626509716e-07, "loss": 0.0051, "step": 223980 }, { "epoch": 2.393183396548961, "grad_norm": 0.22404907643795013, "learning_rate": 9.534800861097608e-07, "loss": 0.0014, "step": 223990 }, { "epoch": 2.3932902398632407, "grad_norm": 2.8729324340820312, "learning_rate": 9.534730090565368e-07, "loss": 0.0094, "step": 224000 }, { "epoch": 2.39339708317752, "grad_norm": 2.712332248687744, "learning_rate": 9.534659314913076e-07, "loss": 0.0275, "step": 224010 }, { "epoch": 2.3935039264917997, "grad_norm": 1.5250593423843384, "learning_rate": 9.53458853414081e-07, "loss": 0.0604, "step": 224020 }, { "epoch": 2.3936107698060796, "grad_norm": 3.56437349319458, "learning_rate": 9.534517748248652e-07, "loss": 0.062, "step": 224030 }, { "epoch": 2.393717613120359, "grad_norm": 9.690217971801758, "learning_rate": 9.534446957236682e-07, "loss": 0.0229, "step": 224040 }, { "epoch": 2.3938244564346385, "grad_norm": 0.17706623673439026, "learning_rate": 9.534376161104978e-07, "loss": 0.0237, "step": 224050 }, { "epoch": 2.3939312997489184, "grad_norm": 1.4462223052978516, "learning_rate": 9.534305359853621e-07, "loss": 0.017, "step": 224060 }, { "epoch": 2.394038143063198, "grad_norm": 0.08094929903745651, "learning_rate": 9.534234553482691e-07, "loss": 0.0183, "step": 224070 }, { "epoch": 2.3941449863774773, "grad_norm": 2.091947078704834, "learning_rate": 9.53416374199227e-07, "loss": 0.0063, "step": 224080 }, { "epoch": 2.394251829691757, "grad_norm": 1.375024676322937, "learning_rate": 9.534092925382434e-07, "loss": 0.0135, "step": 224090 }, { "epoch": 2.3943586730060367, "grad_norm": 1.8173513412475586, "learning_rate": 9.534022103653264e-07, "loss": 0.0222, "step": 224100 }, { "epoch": 2.394465516320316, "grad_norm": 0.02182506024837494, "learning_rate": 9.533951276804841e-07, "loss": 0.0038, "step": 224110 }, { "epoch": 2.394572359634596, "grad_norm": 2.323928117752075, "learning_rate": 9.533880444837246e-07, "loss": 0.0098, "step": 224120 }, { "epoch": 2.3946792029488755, "grad_norm": 0.09544741362333298, "learning_rate": 9.533809607750558e-07, "loss": 0.0166, "step": 224130 }, { "epoch": 2.394786046263155, "grad_norm": 0.011414342559874058, "learning_rate": 9.533738765544856e-07, "loss": 0.0039, "step": 224140 }, { "epoch": 2.394892889577435, "grad_norm": 0.004174227360635996, "learning_rate": 9.533667918220223e-07, "loss": 0.0154, "step": 224150 }, { "epoch": 2.3949997328917143, "grad_norm": 4.030078411102295, "learning_rate": 9.533597065776734e-07, "loss": 0.0267, "step": 224160 }, { "epoch": 2.3951065762059938, "grad_norm": 0.08349737524986267, "learning_rate": 9.533526208214473e-07, "loss": 0.0236, "step": 224170 }, { "epoch": 2.3952134195202737, "grad_norm": 0.490947425365448, "learning_rate": 9.533455345533519e-07, "loss": 0.0087, "step": 224180 }, { "epoch": 2.395320262834553, "grad_norm": 1.061915636062622, "learning_rate": 9.53338447773395e-07, "loss": 0.0218, "step": 224190 }, { "epoch": 2.3954271061488326, "grad_norm": 0.06769036501646042, "learning_rate": 9.533313604815849e-07, "loss": 0.0175, "step": 224200 }, { "epoch": 2.3955339494631125, "grad_norm": 0.10435974597930908, "learning_rate": 9.533242726779296e-07, "loss": 0.0164, "step": 224210 }, { "epoch": 2.395640792777392, "grad_norm": 7.106018543243408, "learning_rate": 9.533171843624368e-07, "loss": 0.0287, "step": 224220 }, { "epoch": 2.3957476360916714, "grad_norm": 0.17497913539409637, "learning_rate": 9.533100955351148e-07, "loss": 0.0261, "step": 224230 }, { "epoch": 2.3958544794059513, "grad_norm": 0.1463472545146942, "learning_rate": 9.533030061959714e-07, "loss": 0.0318, "step": 224240 }, { "epoch": 2.3959613227202308, "grad_norm": 0.008891443721950054, "learning_rate": 9.532959163450147e-07, "loss": 0.0178, "step": 224250 }, { "epoch": 2.3960681660345102, "grad_norm": 0.49509599804878235, "learning_rate": 9.532888259822527e-07, "loss": 0.022, "step": 224260 }, { "epoch": 2.39617500934879, "grad_norm": 3.455909490585327, "learning_rate": 9.532817351076934e-07, "loss": 0.0207, "step": 224270 }, { "epoch": 2.3962818526630696, "grad_norm": 0.4738111197948456, "learning_rate": 9.532746437213448e-07, "loss": 0.0324, "step": 224280 }, { "epoch": 2.396388695977349, "grad_norm": 2.091639518737793, "learning_rate": 9.53267551823215e-07, "loss": 0.0573, "step": 224290 }, { "epoch": 2.396495539291629, "grad_norm": 0.0023566288873553276, "learning_rate": 9.532604594133119e-07, "loss": 0.031, "step": 224300 }, { "epoch": 2.3966023826059084, "grad_norm": 0.02096790075302124, "learning_rate": 9.532533664916435e-07, "loss": 0.0526, "step": 224310 }, { "epoch": 2.396709225920188, "grad_norm": 0.004902345594018698, "learning_rate": 9.532462730582178e-07, "loss": 0.0148, "step": 224320 }, { "epoch": 2.3968160692344678, "grad_norm": 2.1072657108306885, "learning_rate": 9.53239179113043e-07, "loss": 0.007, "step": 224330 }, { "epoch": 2.3969229125487472, "grad_norm": 0.04826418310403824, "learning_rate": 9.532320846561268e-07, "loss": 0.0579, "step": 224340 }, { "epoch": 2.3970297558630267, "grad_norm": 0.08820709586143494, "learning_rate": 9.532249896874774e-07, "loss": 0.0195, "step": 224350 }, { "epoch": 2.3971365991773066, "grad_norm": 7.058499813079834, "learning_rate": 9.532178942071029e-07, "loss": 0.0149, "step": 224360 }, { "epoch": 2.397243442491586, "grad_norm": 23.193113327026367, "learning_rate": 9.532107982150111e-07, "loss": 0.0218, "step": 224370 }, { "epoch": 2.3973502858058655, "grad_norm": 0.08800958096981049, "learning_rate": 9.5320370171121e-07, "loss": 0.008, "step": 224380 }, { "epoch": 2.3974571291201454, "grad_norm": 0.02052025869488716, "learning_rate": 9.531966046957079e-07, "loss": 0.0124, "step": 224390 }, { "epoch": 2.397563972434425, "grad_norm": 0.2594488263130188, "learning_rate": 9.531895071685125e-07, "loss": 0.0066, "step": 224400 }, { "epoch": 2.3976708157487043, "grad_norm": 0.0026079758536070585, "learning_rate": 9.531824091296321e-07, "loss": 0.0323, "step": 224410 }, { "epoch": 2.3977776590629842, "grad_norm": 0.004301188979297876, "learning_rate": 9.531753105790744e-07, "loss": 0.0633, "step": 224420 }, { "epoch": 2.3978845023772637, "grad_norm": 0.8082401156425476, "learning_rate": 9.531682115168476e-07, "loss": 0.0142, "step": 224430 }, { "epoch": 2.397991345691543, "grad_norm": 0.015117825008928776, "learning_rate": 9.531611119429596e-07, "loss": 0.0111, "step": 224440 }, { "epoch": 2.398098189005823, "grad_norm": 1.2553173303604126, "learning_rate": 9.531540118574187e-07, "loss": 0.0174, "step": 224450 }, { "epoch": 2.3982050323201025, "grad_norm": 0.04108016565442085, "learning_rate": 9.531469112602326e-07, "loss": 0.0144, "step": 224460 }, { "epoch": 2.3983118756343824, "grad_norm": 3.5741117000579834, "learning_rate": 9.531398101514095e-07, "loss": 0.0188, "step": 224470 }, { "epoch": 2.398418718948662, "grad_norm": 0.2217983454465866, "learning_rate": 9.531327085309574e-07, "loss": 0.022, "step": 224480 }, { "epoch": 2.3985255622629413, "grad_norm": 0.20710332691669464, "learning_rate": 9.531256063988842e-07, "loss": 0.0205, "step": 224490 }, { "epoch": 2.398632405577221, "grad_norm": 2.566617727279663, "learning_rate": 9.53118503755198e-07, "loss": 0.0342, "step": 224500 }, { "epoch": 2.3987392488915007, "grad_norm": 0.9367498159408569, "learning_rate": 9.531114005999068e-07, "loss": 0.0393, "step": 224510 }, { "epoch": 2.39884609220578, "grad_norm": 0.03848293051123619, "learning_rate": 9.531042969330185e-07, "loss": 0.0039, "step": 224520 }, { "epoch": 2.39895293552006, "grad_norm": 0.03950820490717888, "learning_rate": 9.530971927545415e-07, "loss": 0.018, "step": 224530 }, { "epoch": 2.3990597788343395, "grad_norm": 0.8979688286781311, "learning_rate": 9.530900880644834e-07, "loss": 0.0257, "step": 224540 }, { "epoch": 2.399166622148619, "grad_norm": 3.8749027252197266, "learning_rate": 9.530829828628525e-07, "loss": 0.0182, "step": 224550 }, { "epoch": 2.3992734654628984, "grad_norm": 3.7946953773498535, "learning_rate": 9.530758771496566e-07, "loss": 0.0495, "step": 224560 }, { "epoch": 2.3993803087771783, "grad_norm": 0.2912254333496094, "learning_rate": 9.530687709249039e-07, "loss": 0.0472, "step": 224570 }, { "epoch": 2.399487152091458, "grad_norm": 0.264329731464386, "learning_rate": 9.530616641886025e-07, "loss": 0.0406, "step": 224580 }, { "epoch": 2.3995939954057377, "grad_norm": 2.600067615509033, "learning_rate": 9.530545569407599e-07, "loss": 0.0089, "step": 224590 }, { "epoch": 2.399700838720017, "grad_norm": 0.007305834908038378, "learning_rate": 9.530474491813849e-07, "loss": 0.0072, "step": 224600 }, { "epoch": 2.3998076820342966, "grad_norm": 4.264058589935303, "learning_rate": 9.530403409104851e-07, "loss": 0.0144, "step": 224610 }, { "epoch": 2.399914525348576, "grad_norm": 1.0330144166946411, "learning_rate": 9.530332321280684e-07, "loss": 0.0209, "step": 224620 }, { "epoch": 2.400021368662856, "grad_norm": 0.010960794985294342, "learning_rate": 9.530261228341429e-07, "loss": 0.0043, "step": 224630 }, { "epoch": 2.4001282119771354, "grad_norm": 0.071515753865242, "learning_rate": 9.53019013028717e-07, "loss": 0.0452, "step": 224640 }, { "epoch": 2.4002350552914153, "grad_norm": 0.04675108194351196, "learning_rate": 9.530119027117983e-07, "loss": 0.0251, "step": 224650 }, { "epoch": 2.400341898605695, "grad_norm": 0.38118889927864075, "learning_rate": 9.530047918833949e-07, "loss": 0.0279, "step": 224660 }, { "epoch": 2.4004487419199743, "grad_norm": 1.4313050508499146, "learning_rate": 9.529976805435152e-07, "loss": 0.0096, "step": 224670 }, { "epoch": 2.4005555852342537, "grad_norm": 0.009561914019286633, "learning_rate": 9.529905686921666e-07, "loss": 0.0134, "step": 224680 }, { "epoch": 2.4006624285485336, "grad_norm": 6.556144714355469, "learning_rate": 9.529834563293575e-07, "loss": 0.0156, "step": 224690 }, { "epoch": 2.400769271862813, "grad_norm": 1.8106850385665894, "learning_rate": 9.529763434550961e-07, "loss": 0.0115, "step": 224700 }, { "epoch": 2.400876115177093, "grad_norm": 0.16718411445617676, "learning_rate": 9.529692300693901e-07, "loss": 0.006, "step": 224710 }, { "epoch": 2.4009829584913724, "grad_norm": 0.007837670855224133, "learning_rate": 9.529621161722477e-07, "loss": 0.0103, "step": 224720 }, { "epoch": 2.401089801805652, "grad_norm": 0.3911326825618744, "learning_rate": 9.529550017636767e-07, "loss": 0.0115, "step": 224730 }, { "epoch": 2.401196645119932, "grad_norm": 0.03660787642002106, "learning_rate": 9.529478868436854e-07, "loss": 0.0074, "step": 224740 }, { "epoch": 2.4013034884342113, "grad_norm": 5.737010478973389, "learning_rate": 9.529407714122817e-07, "loss": 0.0573, "step": 224750 }, { "epoch": 2.4014103317484907, "grad_norm": 4.020005226135254, "learning_rate": 9.529336554694737e-07, "loss": 0.0377, "step": 224760 }, { "epoch": 2.4015171750627706, "grad_norm": 0.004229446407407522, "learning_rate": 9.529265390152696e-07, "loss": 0.0253, "step": 224770 }, { "epoch": 2.40162401837705, "grad_norm": 1.5045418739318848, "learning_rate": 9.529194220496773e-07, "loss": 0.0222, "step": 224780 }, { "epoch": 2.4017308616913295, "grad_norm": 4.343575477600098, "learning_rate": 9.529123045727046e-07, "loss": 0.0103, "step": 224790 }, { "epoch": 2.4018377050056094, "grad_norm": 1.1049987077713013, "learning_rate": 9.529051865843596e-07, "loss": 0.0078, "step": 224800 }, { "epoch": 2.401944548319889, "grad_norm": 0.026444850489497185, "learning_rate": 9.528980680846508e-07, "loss": 0.0205, "step": 224810 }, { "epoch": 2.4020513916341684, "grad_norm": 0.002398791955783963, "learning_rate": 9.528909490735857e-07, "loss": 0.0049, "step": 224820 }, { "epoch": 2.4021582349484483, "grad_norm": 5.259599208831787, "learning_rate": 9.528838295511725e-07, "loss": 0.0362, "step": 224830 }, { "epoch": 2.4022650782627277, "grad_norm": 2.474388360977173, "learning_rate": 9.528767095174193e-07, "loss": 0.0081, "step": 224840 }, { "epoch": 2.402371921577007, "grad_norm": 0.06595975905656815, "learning_rate": 9.528695889723341e-07, "loss": 0.0447, "step": 224850 }, { "epoch": 2.402478764891287, "grad_norm": 0.012238554656505585, "learning_rate": 9.52862467915925e-07, "loss": 0.0089, "step": 224860 }, { "epoch": 2.4025856082055665, "grad_norm": 5.566346168518066, "learning_rate": 9.528553463482001e-07, "loss": 0.0423, "step": 224870 }, { "epoch": 2.402692451519846, "grad_norm": 0.05331557244062424, "learning_rate": 9.528482242691672e-07, "loss": 0.0491, "step": 224880 }, { "epoch": 2.402799294834126, "grad_norm": 1.5696282386779785, "learning_rate": 9.528411016788345e-07, "loss": 0.0047, "step": 224890 }, { "epoch": 2.4029061381484054, "grad_norm": 1.499660611152649, "learning_rate": 9.528339785772101e-07, "loss": 0.0458, "step": 224900 }, { "epoch": 2.403012981462685, "grad_norm": 0.04842585325241089, "learning_rate": 9.528268549643018e-07, "loss": 0.0149, "step": 224910 }, { "epoch": 2.4031198247769647, "grad_norm": 3.3235273361206055, "learning_rate": 9.528197308401179e-07, "loss": 0.0173, "step": 224920 }, { "epoch": 2.403226668091244, "grad_norm": 15.470207214355469, "learning_rate": 9.528126062046665e-07, "loss": 0.0207, "step": 224930 }, { "epoch": 2.4033335114055236, "grad_norm": 0.1754010170698166, "learning_rate": 9.528054810579553e-07, "loss": 0.009, "step": 224940 }, { "epoch": 2.4034403547198036, "grad_norm": 1.9830782413482666, "learning_rate": 9.527983553999925e-07, "loss": 0.0112, "step": 224950 }, { "epoch": 2.403547198034083, "grad_norm": 0.4663829505443573, "learning_rate": 9.527912292307863e-07, "loss": 0.0197, "step": 224960 }, { "epoch": 2.4036540413483625, "grad_norm": 6.270527362823486, "learning_rate": 9.527841025503445e-07, "loss": 0.0171, "step": 224970 }, { "epoch": 2.4037608846626424, "grad_norm": 1.303240180015564, "learning_rate": 9.527769753586754e-07, "loss": 0.0137, "step": 224980 }, { "epoch": 2.403867727976922, "grad_norm": 0.43907278776168823, "learning_rate": 9.527698476557868e-07, "loss": 0.0307, "step": 224990 }, { "epoch": 2.4039745712912013, "grad_norm": 0.23410271108150482, "learning_rate": 9.52762719441687e-07, "loss": 0.0513, "step": 225000 }, { "epoch": 2.404081414605481, "grad_norm": 0.014585052616894245, "learning_rate": 9.527555907163838e-07, "loss": 0.0101, "step": 225010 }, { "epoch": 2.4041882579197607, "grad_norm": 5.104319095611572, "learning_rate": 9.527484614798855e-07, "loss": 0.0295, "step": 225020 }, { "epoch": 2.40429510123404, "grad_norm": 0.5159698724746704, "learning_rate": 9.527413317322e-07, "loss": 0.0187, "step": 225030 }, { "epoch": 2.40440194454832, "grad_norm": 0.030961133539676666, "learning_rate": 9.527342014733353e-07, "loss": 0.025, "step": 225040 }, { "epoch": 2.4045087878625995, "grad_norm": 1.556226134300232, "learning_rate": 9.527270707032994e-07, "loss": 0.0206, "step": 225050 }, { "epoch": 2.404615631176879, "grad_norm": 0.0017743374919518828, "learning_rate": 9.527199394221006e-07, "loss": 0.0099, "step": 225060 }, { "epoch": 2.404722474491159, "grad_norm": 1.0826551914215088, "learning_rate": 9.52712807629747e-07, "loss": 0.0166, "step": 225070 }, { "epoch": 2.4048293178054383, "grad_norm": 0.24368473887443542, "learning_rate": 9.527056753262461e-07, "loss": 0.0066, "step": 225080 }, { "epoch": 2.4049361611197178, "grad_norm": 4.633533954620361, "learning_rate": 9.526985425116065e-07, "loss": 0.0275, "step": 225090 }, { "epoch": 2.4050430044339977, "grad_norm": 2.4309849739074707, "learning_rate": 9.526914091858362e-07, "loss": 0.0372, "step": 225100 }, { "epoch": 2.405149847748277, "grad_norm": 11.365994453430176, "learning_rate": 9.52684275348943e-07, "loss": 0.032, "step": 225110 }, { "epoch": 2.4052566910625566, "grad_norm": 0.002448328072205186, "learning_rate": 9.526771410009351e-07, "loss": 0.0129, "step": 225120 }, { "epoch": 2.4053635343768365, "grad_norm": 2.5721442699432373, "learning_rate": 9.526700061418207e-07, "loss": 0.0204, "step": 225130 }, { "epoch": 2.405470377691116, "grad_norm": 0.001432144083082676, "learning_rate": 9.526628707716075e-07, "loss": 0.0014, "step": 225140 }, { "epoch": 2.4055772210053954, "grad_norm": 0.025164252147078514, "learning_rate": 9.526557348903038e-07, "loss": 0.0155, "step": 225150 }, { "epoch": 2.4056840643196753, "grad_norm": 0.035836003720760345, "learning_rate": 9.526485984979176e-07, "loss": 0.0199, "step": 225160 }, { "epoch": 2.4057909076339548, "grad_norm": 5.9335784912109375, "learning_rate": 9.52641461594457e-07, "loss": 0.0575, "step": 225170 }, { "epoch": 2.4058977509482347, "grad_norm": 3.4531359672546387, "learning_rate": 9.5263432417993e-07, "loss": 0.0362, "step": 225180 }, { "epoch": 2.406004594262514, "grad_norm": 0.1466318815946579, "learning_rate": 9.526271862543447e-07, "loss": 0.0118, "step": 225190 }, { "epoch": 2.4061114375767936, "grad_norm": 8.53603744506836, "learning_rate": 9.526200478177092e-07, "loss": 0.0261, "step": 225200 }, { "epoch": 2.406218280891073, "grad_norm": 5.4222941398620605, "learning_rate": 9.526129088700314e-07, "loss": 0.0211, "step": 225210 }, { "epoch": 2.406325124205353, "grad_norm": 0.010899540036916733, "learning_rate": 9.526057694113197e-07, "loss": 0.0116, "step": 225220 }, { "epoch": 2.4064319675196324, "grad_norm": 0.252562940120697, "learning_rate": 9.525986294415817e-07, "loss": 0.0105, "step": 225230 }, { "epoch": 2.4065388108339123, "grad_norm": 8.417353630065918, "learning_rate": 9.525914889608257e-07, "loss": 0.0342, "step": 225240 }, { "epoch": 2.4066456541481918, "grad_norm": 8.708538055419922, "learning_rate": 9.525843479690599e-07, "loss": 0.0162, "step": 225250 }, { "epoch": 2.406752497462471, "grad_norm": 2.3269264698028564, "learning_rate": 9.525772064662921e-07, "loss": 0.0089, "step": 225260 }, { "epoch": 2.4068593407767507, "grad_norm": 0.4610118865966797, "learning_rate": 9.525700644525303e-07, "loss": 0.0414, "step": 225270 }, { "epoch": 2.4069661840910306, "grad_norm": 0.027562344446778297, "learning_rate": 9.52562921927783e-07, "loss": 0.0058, "step": 225280 }, { "epoch": 2.40707302740531, "grad_norm": 3.7801971435546875, "learning_rate": 9.525557788920579e-07, "loss": 0.0194, "step": 225290 }, { "epoch": 2.40717987071959, "grad_norm": 0.7907775640487671, "learning_rate": 9.525486353453632e-07, "loss": 0.0204, "step": 225300 }, { "epoch": 2.4072867140338694, "grad_norm": 4.3814287185668945, "learning_rate": 9.52541491287707e-07, "loss": 0.0066, "step": 225310 }, { "epoch": 2.407393557348149, "grad_norm": 2.6406123638153076, "learning_rate": 9.525343467190972e-07, "loss": 0.0152, "step": 225320 }, { "epoch": 2.4075004006624283, "grad_norm": 0.030136652290821075, "learning_rate": 9.52527201639542e-07, "loss": 0.0155, "step": 225330 }, { "epoch": 2.4076072439767082, "grad_norm": 1.4729204177856445, "learning_rate": 9.525200560490493e-07, "loss": 0.0102, "step": 225340 }, { "epoch": 2.4077140872909877, "grad_norm": 3.114656686782837, "learning_rate": 9.525129099476275e-07, "loss": 0.0379, "step": 225350 }, { "epoch": 2.4078209306052676, "grad_norm": 5.7445807456970215, "learning_rate": 9.525057633352845e-07, "loss": 0.0186, "step": 225360 }, { "epoch": 2.407927773919547, "grad_norm": 0.002980317221954465, "learning_rate": 9.524986162120282e-07, "loss": 0.0195, "step": 225370 }, { "epoch": 2.4080346172338265, "grad_norm": 6.689818859100342, "learning_rate": 9.524914685778669e-07, "loss": 0.0036, "step": 225380 }, { "epoch": 2.408141460548106, "grad_norm": 0.2949300706386566, "learning_rate": 9.524843204328086e-07, "loss": 0.0086, "step": 225390 }, { "epoch": 2.408248303862386, "grad_norm": 0.26256778836250305, "learning_rate": 9.524771717768611e-07, "loss": 0.0229, "step": 225400 }, { "epoch": 2.4083551471766653, "grad_norm": 0.015119446441531181, "learning_rate": 9.524700226100331e-07, "loss": 0.0545, "step": 225410 }, { "epoch": 2.4084619904909452, "grad_norm": 1.3748078346252441, "learning_rate": 9.524628729323321e-07, "loss": 0.0091, "step": 225420 }, { "epoch": 2.4085688338052247, "grad_norm": 0.39674869179725647, "learning_rate": 9.524557227437664e-07, "loss": 0.0085, "step": 225430 }, { "epoch": 2.408675677119504, "grad_norm": 0.004416313488036394, "learning_rate": 9.524485720443441e-07, "loss": 0.0341, "step": 225440 }, { "epoch": 2.408782520433784, "grad_norm": 0.06535670906305313, "learning_rate": 9.52441420834073e-07, "loss": 0.0891, "step": 225450 }, { "epoch": 2.4088893637480635, "grad_norm": 3.458238124847412, "learning_rate": 9.524342691129615e-07, "loss": 0.0062, "step": 225460 }, { "epoch": 2.408996207062343, "grad_norm": 0.09520624577999115, "learning_rate": 9.524271168810176e-07, "loss": 0.0263, "step": 225470 }, { "epoch": 2.409103050376623, "grad_norm": 0.42811721563339233, "learning_rate": 9.524199641382493e-07, "loss": 0.0326, "step": 225480 }, { "epoch": 2.4092098936909023, "grad_norm": 3.5714340209960938, "learning_rate": 9.524128108846648e-07, "loss": 0.0301, "step": 225490 }, { "epoch": 2.409316737005182, "grad_norm": 4.408203125, "learning_rate": 9.52405657120272e-07, "loss": 0.0341, "step": 225500 }, { "epoch": 2.4094235803194617, "grad_norm": 0.005820097867399454, "learning_rate": 9.523985028450792e-07, "loss": 0.0331, "step": 225510 }, { "epoch": 2.409530423633741, "grad_norm": 1.373923420906067, "learning_rate": 9.523913480590941e-07, "loss": 0.0119, "step": 225520 }, { "epoch": 2.4096372669480206, "grad_norm": 3.368591070175171, "learning_rate": 9.523841927623253e-07, "loss": 0.0421, "step": 225530 }, { "epoch": 2.4097441102623005, "grad_norm": 0.015431574545800686, "learning_rate": 9.523770369547803e-07, "loss": 0.0109, "step": 225540 }, { "epoch": 2.40985095357658, "grad_norm": 0.9046123027801514, "learning_rate": 9.523698806364677e-07, "loss": 0.02, "step": 225550 }, { "epoch": 2.4099577968908594, "grad_norm": 0.715919554233551, "learning_rate": 9.523627238073954e-07, "loss": 0.0268, "step": 225560 }, { "epoch": 2.4100646402051393, "grad_norm": 2.869086742401123, "learning_rate": 9.523555664675713e-07, "loss": 0.0538, "step": 225570 }, { "epoch": 2.410171483519419, "grad_norm": 2.972080945968628, "learning_rate": 9.523484086170037e-07, "loss": 0.0311, "step": 225580 }, { "epoch": 2.4102783268336982, "grad_norm": 0.1597108542919159, "learning_rate": 9.523412502557007e-07, "loss": 0.0159, "step": 225590 }, { "epoch": 2.410385170147978, "grad_norm": 0.03337567299604416, "learning_rate": 9.523340913836701e-07, "loss": 0.0088, "step": 225600 }, { "epoch": 2.4104920134622576, "grad_norm": 1.8246006965637207, "learning_rate": 9.523269320009202e-07, "loss": 0.0061, "step": 225610 }, { "epoch": 2.410598856776537, "grad_norm": 0.022577181458473206, "learning_rate": 9.523197721074592e-07, "loss": 0.0137, "step": 225620 }, { "epoch": 2.410705700090817, "grad_norm": 1.1465604305267334, "learning_rate": 9.523126117032949e-07, "loss": 0.0044, "step": 225630 }, { "epoch": 2.4108125434050964, "grad_norm": 3.256389856338501, "learning_rate": 9.523054507884356e-07, "loss": 0.0349, "step": 225640 }, { "epoch": 2.410919386719376, "grad_norm": 0.8864904046058655, "learning_rate": 9.522982893628893e-07, "loss": 0.0421, "step": 225650 }, { "epoch": 2.411026230033656, "grad_norm": 1.4045003652572632, "learning_rate": 9.52291127426664e-07, "loss": 0.0102, "step": 225660 }, { "epoch": 2.4111330733479353, "grad_norm": 4.4717278480529785, "learning_rate": 9.52283964979768e-07, "loss": 0.0134, "step": 225670 }, { "epoch": 2.4112399166622147, "grad_norm": 0.10573919862508774, "learning_rate": 9.522768020222093e-07, "loss": 0.0153, "step": 225680 }, { "epoch": 2.4113467599764946, "grad_norm": 0.7563915252685547, "learning_rate": 9.522696385539959e-07, "loss": 0.0152, "step": 225690 }, { "epoch": 2.411453603290774, "grad_norm": 6.362081527709961, "learning_rate": 9.522624745751359e-07, "loss": 0.0234, "step": 225700 }, { "epoch": 2.4115604466050535, "grad_norm": 3.6859734058380127, "learning_rate": 9.522553100856374e-07, "loss": 0.013, "step": 225710 }, { "epoch": 2.4116672899193334, "grad_norm": 0.6886235475540161, "learning_rate": 9.522481450855086e-07, "loss": 0.0276, "step": 225720 }, { "epoch": 2.411774133233613, "grad_norm": 0.0358993262052536, "learning_rate": 9.522409795747574e-07, "loss": 0.001, "step": 225730 }, { "epoch": 2.4118809765478924, "grad_norm": 0.5364248156547546, "learning_rate": 9.522338135533923e-07, "loss": 0.0209, "step": 225740 }, { "epoch": 2.4119878198621723, "grad_norm": 0.006217010319232941, "learning_rate": 9.522266470214209e-07, "loss": 0.0254, "step": 225750 }, { "epoch": 2.4120946631764517, "grad_norm": 8.313939094543457, "learning_rate": 9.522194799788513e-07, "loss": 0.034, "step": 225760 }, { "epoch": 2.412201506490731, "grad_norm": 0.5330656170845032, "learning_rate": 9.52212312425692e-07, "loss": 0.0115, "step": 225770 }, { "epoch": 2.412308349805011, "grad_norm": 5.867103576660156, "learning_rate": 9.522051443619507e-07, "loss": 0.0138, "step": 225780 }, { "epoch": 2.4124151931192905, "grad_norm": 0.06617660820484161, "learning_rate": 9.52197975787636e-07, "loss": 0.0093, "step": 225790 }, { "epoch": 2.41252203643357, "grad_norm": 0.7392690777778625, "learning_rate": 9.521908067027554e-07, "loss": 0.0633, "step": 225800 }, { "epoch": 2.41262887974785, "grad_norm": 0.06302689760923386, "learning_rate": 9.521836371073172e-07, "loss": 0.0141, "step": 225810 }, { "epoch": 2.4127357230621294, "grad_norm": 2.891535997390747, "learning_rate": 9.521764670013297e-07, "loss": 0.0123, "step": 225820 }, { "epoch": 2.412842566376409, "grad_norm": 0.026960669085383415, "learning_rate": 9.521692963848007e-07, "loss": 0.0143, "step": 225830 }, { "epoch": 2.4129494096906887, "grad_norm": 0.10420190542936325, "learning_rate": 9.521621252577386e-07, "loss": 0.0421, "step": 225840 }, { "epoch": 2.413056253004968, "grad_norm": 2.2724316120147705, "learning_rate": 9.521549536201512e-07, "loss": 0.0291, "step": 225850 }, { "epoch": 2.4131630963192476, "grad_norm": 4.6026530265808105, "learning_rate": 9.521477814720468e-07, "loss": 0.0225, "step": 225860 }, { "epoch": 2.4132699396335275, "grad_norm": 0.008833358064293861, "learning_rate": 9.521406088134334e-07, "loss": 0.0252, "step": 225870 }, { "epoch": 2.413376782947807, "grad_norm": 0.001896668691188097, "learning_rate": 9.521334356443191e-07, "loss": 0.0395, "step": 225880 }, { "epoch": 2.4134836262620865, "grad_norm": 3.6496264934539795, "learning_rate": 9.521262619647121e-07, "loss": 0.0294, "step": 225890 }, { "epoch": 2.4135904695763664, "grad_norm": 2.876581907272339, "learning_rate": 9.521190877746204e-07, "loss": 0.025, "step": 225900 }, { "epoch": 2.413697312890646, "grad_norm": 2.007195472717285, "learning_rate": 9.52111913074052e-07, "loss": 0.0163, "step": 225910 }, { "epoch": 2.4138041562049253, "grad_norm": 0.06236710026860237, "learning_rate": 9.521047378630154e-07, "loss": 0.0155, "step": 225920 }, { "epoch": 2.413910999519205, "grad_norm": 9.10039234161377, "learning_rate": 9.520975621415181e-07, "loss": 0.023, "step": 225930 }, { "epoch": 2.4140178428334846, "grad_norm": 6.4869208335876465, "learning_rate": 9.520903859095688e-07, "loss": 0.0196, "step": 225940 }, { "epoch": 2.4141246861477645, "grad_norm": 4.294290542602539, "learning_rate": 9.520832091671752e-07, "loss": 0.0077, "step": 225950 }, { "epoch": 2.414231529462044, "grad_norm": 0.22576364874839783, "learning_rate": 9.520760319143456e-07, "loss": 0.0419, "step": 225960 }, { "epoch": 2.4143383727763235, "grad_norm": 0.19599416851997375, "learning_rate": 9.52068854151088e-07, "loss": 0.0191, "step": 225970 }, { "epoch": 2.414445216090603, "grad_norm": 0.6662065386772156, "learning_rate": 9.520616758774106e-07, "loss": 0.0235, "step": 225980 }, { "epoch": 2.414552059404883, "grad_norm": 0.09378449618816376, "learning_rate": 9.520544970933213e-07, "loss": 0.0244, "step": 225990 }, { "epoch": 2.4146589027191623, "grad_norm": 0.11809073388576508, "learning_rate": 9.520473177988285e-07, "loss": 0.0236, "step": 226000 }, { "epoch": 2.414765746033442, "grad_norm": 2.7667601108551025, "learning_rate": 9.520401379939402e-07, "loss": 0.0463, "step": 226010 }, { "epoch": 2.4148725893477216, "grad_norm": 1.502357006072998, "learning_rate": 9.520329576786644e-07, "loss": 0.0245, "step": 226020 }, { "epoch": 2.414979432662001, "grad_norm": 0.27855345606803894, "learning_rate": 9.520257768530093e-07, "loss": 0.0321, "step": 226030 }, { "epoch": 2.4150862759762806, "grad_norm": 1.9431195259094238, "learning_rate": 9.520185955169828e-07, "loss": 0.0064, "step": 226040 }, { "epoch": 2.4151931192905605, "grad_norm": 9.022250175476074, "learning_rate": 9.520114136705935e-07, "loss": 0.0542, "step": 226050 }, { "epoch": 2.41529996260484, "grad_norm": 0.06399679183959961, "learning_rate": 9.520042313138489e-07, "loss": 0.0085, "step": 226060 }, { "epoch": 2.41540680591912, "grad_norm": 3.397035598754883, "learning_rate": 9.519970484467576e-07, "loss": 0.0317, "step": 226070 }, { "epoch": 2.4155136492333993, "grad_norm": 1.0346744060516357, "learning_rate": 9.519898650693275e-07, "loss": 0.0328, "step": 226080 }, { "epoch": 2.4156204925476787, "grad_norm": 0.3530949652194977, "learning_rate": 9.519826811815666e-07, "loss": 0.0033, "step": 226090 }, { "epoch": 2.415727335861958, "grad_norm": 0.04078957438468933, "learning_rate": 9.519754967834832e-07, "loss": 0.0098, "step": 226100 }, { "epoch": 2.415834179176238, "grad_norm": 0.02513510175049305, "learning_rate": 9.519683118750854e-07, "loss": 0.0137, "step": 226110 }, { "epoch": 2.4159410224905176, "grad_norm": 1.5255792140960693, "learning_rate": 9.519611264563814e-07, "loss": 0.0179, "step": 226120 }, { "epoch": 2.4160478658047975, "grad_norm": 6.082350254058838, "learning_rate": 9.519539405273789e-07, "loss": 0.042, "step": 226130 }, { "epoch": 2.416154709119077, "grad_norm": 2.555814743041992, "learning_rate": 9.519467540880866e-07, "loss": 0.0652, "step": 226140 }, { "epoch": 2.4162615524333564, "grad_norm": 0.3261623680591583, "learning_rate": 9.51939567138512e-07, "loss": 0.0111, "step": 226150 }, { "epoch": 2.416368395747636, "grad_norm": 5.6466064453125, "learning_rate": 9.519323796786635e-07, "loss": 0.017, "step": 226160 }, { "epoch": 2.4164752390619157, "grad_norm": 0.0781799703836441, "learning_rate": 9.519251917085496e-07, "loss": 0.022, "step": 226170 }, { "epoch": 2.416582082376195, "grad_norm": 1.7440450191497803, "learning_rate": 9.519180032281777e-07, "loss": 0.0332, "step": 226180 }, { "epoch": 2.416688925690475, "grad_norm": 0.04141314700245857, "learning_rate": 9.519108142375565e-07, "loss": 0.0373, "step": 226190 }, { "epoch": 2.4167957690047546, "grad_norm": 1.1044011116027832, "learning_rate": 9.519036247366936e-07, "loss": 0.0274, "step": 226200 }, { "epoch": 2.416902612319034, "grad_norm": 1.9479902982711792, "learning_rate": 9.518964347255977e-07, "loss": 0.0133, "step": 226210 }, { "epoch": 2.417009455633314, "grad_norm": 3.668276309967041, "learning_rate": 9.518892442042765e-07, "loss": 0.0323, "step": 226220 }, { "epoch": 2.4171162989475934, "grad_norm": 5.622251510620117, "learning_rate": 9.518820531727381e-07, "loss": 0.0186, "step": 226230 }, { "epoch": 2.417223142261873, "grad_norm": 2.1498265266418457, "learning_rate": 9.518748616309911e-07, "loss": 0.0208, "step": 226240 }, { "epoch": 2.4173299855761528, "grad_norm": 4.592495441436768, "learning_rate": 9.518676695790429e-07, "loss": 0.0238, "step": 226250 }, { "epoch": 2.417436828890432, "grad_norm": 1.7237614393234253, "learning_rate": 9.518604770169022e-07, "loss": 0.0036, "step": 226260 }, { "epoch": 2.4175436722047117, "grad_norm": 4.229793548583984, "learning_rate": 9.518532839445769e-07, "loss": 0.024, "step": 226270 }, { "epoch": 2.4176505155189916, "grad_norm": 3.314390182495117, "learning_rate": 9.518460903620751e-07, "loss": 0.0224, "step": 226280 }, { "epoch": 2.417757358833271, "grad_norm": 1.9611451625823975, "learning_rate": 9.518388962694051e-07, "loss": 0.0291, "step": 226290 }, { "epoch": 2.4178642021475505, "grad_norm": 0.0076842461712658405, "learning_rate": 9.518317016665746e-07, "loss": 0.0175, "step": 226300 }, { "epoch": 2.4179710454618304, "grad_norm": 0.03643602877855301, "learning_rate": 9.518245065535922e-07, "loss": 0.028, "step": 226310 }, { "epoch": 2.41807788877611, "grad_norm": 0.806961178779602, "learning_rate": 9.51817310930466e-07, "loss": 0.0069, "step": 226320 }, { "epoch": 2.4181847320903893, "grad_norm": 0.04543523117899895, "learning_rate": 9.518101147972037e-07, "loss": 0.0049, "step": 226330 }, { "epoch": 2.418291575404669, "grad_norm": 0.09739165753126144, "learning_rate": 9.518029181538136e-07, "loss": 0.0766, "step": 226340 }, { "epoch": 2.4183984187189487, "grad_norm": 8.313915252685547, "learning_rate": 9.517957210003042e-07, "loss": 0.0253, "step": 226350 }, { "epoch": 2.418505262033228, "grad_norm": 0.08234359323978424, "learning_rate": 9.517885233366831e-07, "loss": 0.0214, "step": 226360 }, { "epoch": 2.418612105347508, "grad_norm": 0.3969503343105316, "learning_rate": 9.517813251629587e-07, "loss": 0.0099, "step": 226370 }, { "epoch": 2.4187189486617875, "grad_norm": 5.607693672180176, "learning_rate": 9.517741264791392e-07, "loss": 0.0071, "step": 226380 }, { "epoch": 2.418825791976067, "grad_norm": 1.6311419010162354, "learning_rate": 9.517669272852324e-07, "loss": 0.0164, "step": 226390 }, { "epoch": 2.418932635290347, "grad_norm": 0.015991969034075737, "learning_rate": 9.517597275812469e-07, "loss": 0.0129, "step": 226400 }, { "epoch": 2.4190394786046263, "grad_norm": 1.4264978170394897, "learning_rate": 9.517525273671905e-07, "loss": 0.0232, "step": 226410 }, { "epoch": 2.4191463219189058, "grad_norm": 0.7325435280799866, "learning_rate": 9.517453266430712e-07, "loss": 0.0335, "step": 226420 }, { "epoch": 2.4192531652331857, "grad_norm": 16.983600616455078, "learning_rate": 9.517381254088977e-07, "loss": 0.0124, "step": 226430 }, { "epoch": 2.419360008547465, "grad_norm": 1.0434893369674683, "learning_rate": 9.517309236646774e-07, "loss": 0.0087, "step": 226440 }, { "epoch": 2.4194668518617446, "grad_norm": 3.8781144618988037, "learning_rate": 9.51723721410419e-07, "loss": 0.0051, "step": 226450 }, { "epoch": 2.4195736951760245, "grad_norm": 1.0196200609207153, "learning_rate": 9.517165186461303e-07, "loss": 0.015, "step": 226460 }, { "epoch": 2.419680538490304, "grad_norm": 5.645401477813721, "learning_rate": 9.517093153718197e-07, "loss": 0.0145, "step": 226470 }, { "epoch": 2.4197873818045834, "grad_norm": 11.917189598083496, "learning_rate": 9.51702111587495e-07, "loss": 0.0409, "step": 226480 }, { "epoch": 2.4198942251188633, "grad_norm": 0.021778197959065437, "learning_rate": 9.516949072931649e-07, "loss": 0.0124, "step": 226490 }, { "epoch": 2.4200010684331428, "grad_norm": 2.2752280235290527, "learning_rate": 9.516877024888368e-07, "loss": 0.0309, "step": 226500 }, { "epoch": 2.4201079117474222, "grad_norm": 0.00740179605782032, "learning_rate": 9.516804971745193e-07, "loss": 0.0258, "step": 226510 }, { "epoch": 2.420214755061702, "grad_norm": 5.800781726837158, "learning_rate": 9.516732913502203e-07, "loss": 0.0124, "step": 226520 }, { "epoch": 2.4203215983759816, "grad_norm": 1.8500120639801025, "learning_rate": 9.516660850159483e-07, "loss": 0.0146, "step": 226530 }, { "epoch": 2.420428441690261, "grad_norm": 0.43737179040908813, "learning_rate": 9.51658878171711e-07, "loss": 0.0135, "step": 226540 }, { "epoch": 2.420535285004541, "grad_norm": 0.08325042575597763, "learning_rate": 9.516516708175169e-07, "loss": 0.0079, "step": 226550 }, { "epoch": 2.4206421283188204, "grad_norm": 2.671626329421997, "learning_rate": 9.516444629533739e-07, "loss": 0.0057, "step": 226560 }, { "epoch": 2.4207489716331, "grad_norm": 0.019695254042744637, "learning_rate": 9.516372545792902e-07, "loss": 0.0149, "step": 226570 }, { "epoch": 2.42085581494738, "grad_norm": 7.82423210144043, "learning_rate": 9.516300456952739e-07, "loss": 0.0267, "step": 226580 }, { "epoch": 2.4209626582616592, "grad_norm": 2.4402096271514893, "learning_rate": 9.516228363013333e-07, "loss": 0.0739, "step": 226590 }, { "epoch": 2.4210695015759387, "grad_norm": 1.496488332748413, "learning_rate": 9.516156263974764e-07, "loss": 0.0205, "step": 226600 }, { "epoch": 2.4211763448902186, "grad_norm": 2.1214396953582764, "learning_rate": 9.516084159837114e-07, "loss": 0.0168, "step": 226610 }, { "epoch": 2.421283188204498, "grad_norm": 1.78675377368927, "learning_rate": 9.516012050600463e-07, "loss": 0.0172, "step": 226620 }, { "epoch": 2.4213900315187775, "grad_norm": 0.9377111196517944, "learning_rate": 9.515939936264895e-07, "loss": 0.0109, "step": 226630 }, { "epoch": 2.4214968748330574, "grad_norm": 0.013609977439045906, "learning_rate": 9.51586781683049e-07, "loss": 0.0171, "step": 226640 }, { "epoch": 2.421603718147337, "grad_norm": 0.604960560798645, "learning_rate": 9.515795692297328e-07, "loss": 0.0371, "step": 226650 }, { "epoch": 2.421710561461617, "grad_norm": 5.794261455535889, "learning_rate": 9.515723562665493e-07, "loss": 0.0838, "step": 226660 }, { "epoch": 2.4218174047758962, "grad_norm": 3.194347620010376, "learning_rate": 9.515651427935065e-07, "loss": 0.0494, "step": 226670 }, { "epoch": 2.4219242480901757, "grad_norm": 0.658395528793335, "learning_rate": 9.515579288106126e-07, "loss": 0.0176, "step": 226680 }, { "epoch": 2.422031091404455, "grad_norm": 3.060154914855957, "learning_rate": 9.515507143178755e-07, "loss": 0.0196, "step": 226690 }, { "epoch": 2.422137934718735, "grad_norm": 6.062475681304932, "learning_rate": 9.515434993153038e-07, "loss": 0.0497, "step": 226700 }, { "epoch": 2.4222447780330145, "grad_norm": 1.7262930870056152, "learning_rate": 9.515362838029054e-07, "loss": 0.033, "step": 226710 }, { "epoch": 2.4223516213472944, "grad_norm": 9.692654609680176, "learning_rate": 9.515290677806885e-07, "loss": 0.0078, "step": 226720 }, { "epoch": 2.422458464661574, "grad_norm": 0.4098844826221466, "learning_rate": 9.51521851248661e-07, "loss": 0.0257, "step": 226730 }, { "epoch": 2.4225653079758533, "grad_norm": 0.6970310807228088, "learning_rate": 9.515146342068314e-07, "loss": 0.0081, "step": 226740 }, { "epoch": 2.422672151290133, "grad_norm": 0.8288788795471191, "learning_rate": 9.515074166552077e-07, "loss": 0.033, "step": 226750 }, { "epoch": 2.4227789946044127, "grad_norm": 1.1022424697875977, "learning_rate": 9.51500198593798e-07, "loss": 0.0193, "step": 226760 }, { "epoch": 2.422885837918692, "grad_norm": 0.004389375913888216, "learning_rate": 9.514929800226104e-07, "loss": 0.0388, "step": 226770 }, { "epoch": 2.422992681232972, "grad_norm": 0.19502337276935577, "learning_rate": 9.514857609416533e-07, "loss": 0.0356, "step": 226780 }, { "epoch": 2.4230995245472515, "grad_norm": 0.728417158126831, "learning_rate": 9.514785413509347e-07, "loss": 0.0053, "step": 226790 }, { "epoch": 2.423206367861531, "grad_norm": 4.262511253356934, "learning_rate": 9.514713212504628e-07, "loss": 0.0222, "step": 226800 }, { "epoch": 2.4233132111758104, "grad_norm": 0.008049288764595985, "learning_rate": 9.514641006402455e-07, "loss": 0.0114, "step": 226810 }, { "epoch": 2.4234200544900903, "grad_norm": 0.012185588479042053, "learning_rate": 9.514568795202913e-07, "loss": 0.0041, "step": 226820 }, { "epoch": 2.42352689780437, "grad_norm": 7.019885063171387, "learning_rate": 9.514496578906082e-07, "loss": 0.038, "step": 226830 }, { "epoch": 2.4236337411186497, "grad_norm": 9.968548774719238, "learning_rate": 9.514424357512044e-07, "loss": 0.0235, "step": 226840 }, { "epoch": 2.423740584432929, "grad_norm": 4.37864875793457, "learning_rate": 9.51435213102088e-07, "loss": 0.0093, "step": 226850 }, { "epoch": 2.4238474277472086, "grad_norm": 0.07486262172460556, "learning_rate": 9.514279899432671e-07, "loss": 0.0005, "step": 226860 }, { "epoch": 2.423954271061488, "grad_norm": 6.469515800476074, "learning_rate": 9.5142076627475e-07, "loss": 0.0114, "step": 226870 }, { "epoch": 2.424061114375768, "grad_norm": 3.189246892929077, "learning_rate": 9.514135420965448e-07, "loss": 0.0132, "step": 226880 }, { "epoch": 2.4241679576900474, "grad_norm": 0.0911894142627716, "learning_rate": 9.514063174086595e-07, "loss": 0.0108, "step": 226890 }, { "epoch": 2.4242748010043274, "grad_norm": 0.30009543895721436, "learning_rate": 9.513990922111026e-07, "loss": 0.0203, "step": 226900 }, { "epoch": 2.424381644318607, "grad_norm": 2.32619571685791, "learning_rate": 9.513918665038818e-07, "loss": 0.0124, "step": 226910 }, { "epoch": 2.4244884876328863, "grad_norm": 0.20957106351852417, "learning_rate": 9.513846402870059e-07, "loss": 0.0076, "step": 226920 }, { "epoch": 2.424595330947166, "grad_norm": 0.015844672918319702, "learning_rate": 9.513774135604823e-07, "loss": 0.0117, "step": 226930 }, { "epoch": 2.4247021742614456, "grad_norm": 10.825356483459473, "learning_rate": 9.513701863243197e-07, "loss": 0.0299, "step": 226940 }, { "epoch": 2.424809017575725, "grad_norm": 0.0547446645796299, "learning_rate": 9.513629585785262e-07, "loss": 0.0204, "step": 226950 }, { "epoch": 2.424915860890005, "grad_norm": 0.04277721419930458, "learning_rate": 9.513557303231095e-07, "loss": 0.0598, "step": 226960 }, { "epoch": 2.4250227042042845, "grad_norm": 3.167022466659546, "learning_rate": 9.513485015580784e-07, "loss": 0.0143, "step": 226970 }, { "epoch": 2.425129547518564, "grad_norm": 2.1981279850006104, "learning_rate": 9.513412722834406e-07, "loss": 0.0216, "step": 226980 }, { "epoch": 2.425236390832844, "grad_norm": 0.1919453740119934, "learning_rate": 9.513340424992047e-07, "loss": 0.0068, "step": 226990 }, { "epoch": 2.4253432341471233, "grad_norm": 0.018940702080726624, "learning_rate": 9.513268122053783e-07, "loss": 0.0482, "step": 227000 }, { "epoch": 2.4254500774614027, "grad_norm": 4.537855625152588, "learning_rate": 9.513195814019701e-07, "loss": 0.0419, "step": 227010 }, { "epoch": 2.4255569207756826, "grad_norm": 0.19308723509311676, "learning_rate": 9.513123500889879e-07, "loss": 0.0102, "step": 227020 }, { "epoch": 2.425663764089962, "grad_norm": 0.02962362952530384, "learning_rate": 9.513051182664401e-07, "loss": 0.0032, "step": 227030 }, { "epoch": 2.4257706074042416, "grad_norm": 0.01874076947569847, "learning_rate": 9.512978859343348e-07, "loss": 0.0239, "step": 227040 }, { "epoch": 2.4258774507185215, "grad_norm": 0.004322522785514593, "learning_rate": 9.5129065309268e-07, "loss": 0.0145, "step": 227050 }, { "epoch": 2.425984294032801, "grad_norm": 0.05596292018890381, "learning_rate": 9.51283419741484e-07, "loss": 0.0143, "step": 227060 }, { "epoch": 2.4260911373470804, "grad_norm": 1.2294776439666748, "learning_rate": 9.512761858807549e-07, "loss": 0.0049, "step": 227070 }, { "epoch": 2.4261979806613603, "grad_norm": 5.1255879402160645, "learning_rate": 9.512689515105011e-07, "loss": 0.0173, "step": 227080 }, { "epoch": 2.4263048239756397, "grad_norm": 0.5774049758911133, "learning_rate": 9.512617166307305e-07, "loss": 0.0511, "step": 227090 }, { "epoch": 2.426411667289919, "grad_norm": 0.640116274356842, "learning_rate": 9.512544812414513e-07, "loss": 0.0554, "step": 227100 }, { "epoch": 2.426518510604199, "grad_norm": 2.12404465675354, "learning_rate": 9.512472453426719e-07, "loss": 0.0262, "step": 227110 }, { "epoch": 2.4266253539184786, "grad_norm": 0.32638779282569885, "learning_rate": 9.512400089344001e-07, "loss": 0.0246, "step": 227120 }, { "epoch": 2.426732197232758, "grad_norm": 0.10640493780374527, "learning_rate": 9.512327720166444e-07, "loss": 0.003, "step": 227130 }, { "epoch": 2.426839040547038, "grad_norm": 1.7819420099258423, "learning_rate": 9.512255345894129e-07, "loss": 0.004, "step": 227140 }, { "epoch": 2.4269458838613174, "grad_norm": 0.5335313677787781, "learning_rate": 9.512182966527136e-07, "loss": 0.0119, "step": 227150 }, { "epoch": 2.427052727175597, "grad_norm": 0.44260361790657043, "learning_rate": 9.512110582065549e-07, "loss": 0.0026, "step": 227160 }, { "epoch": 2.4271595704898767, "grad_norm": 0.03301488235592842, "learning_rate": 9.512038192509447e-07, "loss": 0.0244, "step": 227170 }, { "epoch": 2.427266413804156, "grad_norm": 0.29453736543655396, "learning_rate": 9.511965797858914e-07, "loss": 0.0311, "step": 227180 }, { "epoch": 2.4273732571184357, "grad_norm": 5.208499908447266, "learning_rate": 9.511893398114031e-07, "loss": 0.0402, "step": 227190 }, { "epoch": 2.4274801004327156, "grad_norm": 0.027794888243079185, "learning_rate": 9.511820993274881e-07, "loss": 0.0161, "step": 227200 }, { "epoch": 2.427586943746995, "grad_norm": 3.2979440689086914, "learning_rate": 9.511748583341543e-07, "loss": 0.0467, "step": 227210 }, { "epoch": 2.4276937870612745, "grad_norm": 0.05208363011479378, "learning_rate": 9.511676168314101e-07, "loss": 0.0042, "step": 227220 }, { "epoch": 2.4278006303755544, "grad_norm": 0.012783712707459927, "learning_rate": 9.511603748192635e-07, "loss": 0.0099, "step": 227230 }, { "epoch": 2.427907473689834, "grad_norm": 0.022180289030075073, "learning_rate": 9.511531322977231e-07, "loss": 0.0176, "step": 227240 }, { "epoch": 2.4280143170041133, "grad_norm": 1.3582344055175781, "learning_rate": 9.511458892667965e-07, "loss": 0.023, "step": 227250 }, { "epoch": 2.428121160318393, "grad_norm": 0.0512576550245285, "learning_rate": 9.511386457264923e-07, "loss": 0.0222, "step": 227260 }, { "epoch": 2.4282280036326727, "grad_norm": 0.010298728942871094, "learning_rate": 9.511314016768184e-07, "loss": 0.04, "step": 227270 }, { "epoch": 2.428334846946952, "grad_norm": 0.07519174367189407, "learning_rate": 9.511241571177831e-07, "loss": 0.0213, "step": 227280 }, { "epoch": 2.428441690261232, "grad_norm": 2.6995859146118164, "learning_rate": 9.511169120493944e-07, "loss": 0.0424, "step": 227290 }, { "epoch": 2.4285485335755115, "grad_norm": 3.717824935913086, "learning_rate": 9.51109666471661e-07, "loss": 0.0556, "step": 227300 }, { "epoch": 2.428655376889791, "grad_norm": 6.67446756362915, "learning_rate": 9.511024203845905e-07, "loss": 0.0348, "step": 227310 }, { "epoch": 2.428762220204071, "grad_norm": 0.1335745006799698, "learning_rate": 9.510951737881916e-07, "loss": 0.0624, "step": 227320 }, { "epoch": 2.4288690635183503, "grad_norm": 0.4914153814315796, "learning_rate": 9.510879266824719e-07, "loss": 0.0115, "step": 227330 }, { "epoch": 2.4289759068326298, "grad_norm": 0.1714273989200592, "learning_rate": 9.5108067906744e-07, "loss": 0.0159, "step": 227340 }, { "epoch": 2.4290827501469097, "grad_norm": 0.006835554726421833, "learning_rate": 9.510734309431041e-07, "loss": 0.0115, "step": 227350 }, { "epoch": 2.429189593461189, "grad_norm": 0.05605943128466606, "learning_rate": 9.510661823094721e-07, "loss": 0.0917, "step": 227360 }, { "epoch": 2.4292964367754686, "grad_norm": 0.007985160686075687, "learning_rate": 9.510589331665524e-07, "loss": 0.0085, "step": 227370 }, { "epoch": 2.4294032800897485, "grad_norm": 0.0027075037360191345, "learning_rate": 9.510516835143531e-07, "loss": 0.0147, "step": 227380 }, { "epoch": 2.429510123404028, "grad_norm": 0.0920063704252243, "learning_rate": 9.510444333528823e-07, "loss": 0.0428, "step": 227390 }, { "epoch": 2.4296169667183074, "grad_norm": 0.04218891263008118, "learning_rate": 9.510371826821484e-07, "loss": 0.0075, "step": 227400 }, { "epoch": 2.4297238100325873, "grad_norm": 3.315761089324951, "learning_rate": 9.510299315021595e-07, "loss": 0.0201, "step": 227410 }, { "epoch": 2.4298306533468668, "grad_norm": 6.476018905639648, "learning_rate": 9.510226798129238e-07, "loss": 0.0109, "step": 227420 }, { "epoch": 2.4299374966611467, "grad_norm": 0.07862637937068939, "learning_rate": 9.510154276144493e-07, "loss": 0.007, "step": 227430 }, { "epoch": 2.430044339975426, "grad_norm": 0.0016718332190066576, "learning_rate": 9.510081749067444e-07, "loss": 0.0129, "step": 227440 }, { "epoch": 2.4301511832897056, "grad_norm": 0.8488121628761292, "learning_rate": 9.510009216898174e-07, "loss": 0.0114, "step": 227450 }, { "epoch": 2.430258026603985, "grad_norm": 0.011539983563125134, "learning_rate": 9.50993667963676e-07, "loss": 0.1006, "step": 227460 }, { "epoch": 2.430364869918265, "grad_norm": 6.543353080749512, "learning_rate": 9.50986413728329e-07, "loss": 0.0233, "step": 227470 }, { "epoch": 2.4304717132325444, "grad_norm": 0.00714621227234602, "learning_rate": 9.509791589837842e-07, "loss": 0.0196, "step": 227480 }, { "epoch": 2.4305785565468243, "grad_norm": 8.580784797668457, "learning_rate": 9.509719037300499e-07, "loss": 0.0347, "step": 227490 }, { "epoch": 2.4306853998611038, "grad_norm": 14.040778160095215, "learning_rate": 9.509646479671342e-07, "loss": 0.0099, "step": 227500 }, { "epoch": 2.4307922431753832, "grad_norm": 2.566316604614258, "learning_rate": 9.509573916950455e-07, "loss": 0.0234, "step": 227510 }, { "epoch": 2.4308990864896627, "grad_norm": 0.0852309837937355, "learning_rate": 9.509501349137919e-07, "loss": 0.0217, "step": 227520 }, { "epoch": 2.4310059298039426, "grad_norm": 2.016584873199463, "learning_rate": 9.509428776233814e-07, "loss": 0.0232, "step": 227530 }, { "epoch": 2.431112773118222, "grad_norm": 2.530475378036499, "learning_rate": 9.509356198238225e-07, "loss": 0.0072, "step": 227540 }, { "epoch": 2.431219616432502, "grad_norm": 0.05114633962512016, "learning_rate": 9.509283615151231e-07, "loss": 0.0089, "step": 227550 }, { "epoch": 2.4313264597467814, "grad_norm": 0.5622248649597168, "learning_rate": 9.509211026972917e-07, "loss": 0.0205, "step": 227560 }, { "epoch": 2.431433303061061, "grad_norm": 0.16019058227539062, "learning_rate": 9.509138433703363e-07, "loss": 0.0174, "step": 227570 }, { "epoch": 2.4315401463753403, "grad_norm": 3.2195284366607666, "learning_rate": 9.509065835342651e-07, "loss": 0.0219, "step": 227580 }, { "epoch": 2.4316469896896202, "grad_norm": 3.49588680267334, "learning_rate": 9.508993231890863e-07, "loss": 0.0369, "step": 227590 }, { "epoch": 2.4317538330038997, "grad_norm": 0.04043430835008621, "learning_rate": 9.508920623348083e-07, "loss": 0.0303, "step": 227600 }, { "epoch": 2.4318606763181796, "grad_norm": 0.4603695273399353, "learning_rate": 9.50884800971439e-07, "loss": 0.0178, "step": 227610 }, { "epoch": 2.431967519632459, "grad_norm": 0.3354533016681671, "learning_rate": 9.508775390989867e-07, "loss": 0.088, "step": 227620 }, { "epoch": 2.4320743629467385, "grad_norm": 4.870230197906494, "learning_rate": 9.508702767174597e-07, "loss": 0.0596, "step": 227630 }, { "epoch": 2.432181206261018, "grad_norm": 0.005085037089884281, "learning_rate": 9.508630138268661e-07, "loss": 0.0071, "step": 227640 }, { "epoch": 2.432288049575298, "grad_norm": 0.90326327085495, "learning_rate": 9.508557504272141e-07, "loss": 0.0269, "step": 227650 }, { "epoch": 2.4323948928895773, "grad_norm": 4.559906959533691, "learning_rate": 9.50848486518512e-07, "loss": 0.0145, "step": 227660 }, { "epoch": 2.4325017362038572, "grad_norm": 0.019112246111035347, "learning_rate": 9.508412221007678e-07, "loss": 0.0088, "step": 227670 }, { "epoch": 2.4326085795181367, "grad_norm": 7.338944911956787, "learning_rate": 9.5083395717399e-07, "loss": 0.0239, "step": 227680 }, { "epoch": 2.432715422832416, "grad_norm": 4.398833751678467, "learning_rate": 9.508266917381866e-07, "loss": 0.0505, "step": 227690 }, { "epoch": 2.432822266146696, "grad_norm": 0.011880903504788876, "learning_rate": 9.508194257933657e-07, "loss": 0.0202, "step": 227700 }, { "epoch": 2.4329291094609755, "grad_norm": 4.3151421546936035, "learning_rate": 9.508121593395357e-07, "loss": 0.0078, "step": 227710 }, { "epoch": 2.433035952775255, "grad_norm": 1.7503958940505981, "learning_rate": 9.508048923767049e-07, "loss": 0.0031, "step": 227720 }, { "epoch": 2.433142796089535, "grad_norm": 0.15808114409446716, "learning_rate": 9.507976249048812e-07, "loss": 0.0215, "step": 227730 }, { "epoch": 2.4332496394038143, "grad_norm": 0.014765392057597637, "learning_rate": 9.507903569240731e-07, "loss": 0.0156, "step": 227740 }, { "epoch": 2.433356482718094, "grad_norm": 0.07493390142917633, "learning_rate": 9.507830884342884e-07, "loss": 0.0354, "step": 227750 }, { "epoch": 2.4334633260323737, "grad_norm": 6.28954553604126, "learning_rate": 9.507758194355359e-07, "loss": 0.0238, "step": 227760 }, { "epoch": 2.433570169346653, "grad_norm": 0.0026595722883939743, "learning_rate": 9.507685499278233e-07, "loss": 0.0262, "step": 227770 }, { "epoch": 2.4336770126609326, "grad_norm": 0.007796272169798613, "learning_rate": 9.507612799111591e-07, "loss": 0.0385, "step": 227780 }, { "epoch": 2.4337838559752125, "grad_norm": 6.285617828369141, "learning_rate": 9.507540093855512e-07, "loss": 0.0296, "step": 227790 }, { "epoch": 2.433890699289492, "grad_norm": 1.3481065034866333, "learning_rate": 9.507467383510081e-07, "loss": 0.0161, "step": 227800 }, { "epoch": 2.4339975426037714, "grad_norm": 2.906832456588745, "learning_rate": 9.507394668075379e-07, "loss": 0.0249, "step": 227810 }, { "epoch": 2.4341043859180513, "grad_norm": 0.39193248748779297, "learning_rate": 9.507321947551489e-07, "loss": 0.0581, "step": 227820 }, { "epoch": 2.434211229232331, "grad_norm": 0.924116849899292, "learning_rate": 9.507249221938491e-07, "loss": 0.0188, "step": 227830 }, { "epoch": 2.4343180725466103, "grad_norm": 3.6000592708587646, "learning_rate": 9.50717649123647e-07, "loss": 0.0095, "step": 227840 }, { "epoch": 2.43442491586089, "grad_norm": 0.009920635260641575, "learning_rate": 9.507103755445505e-07, "loss": 0.0281, "step": 227850 }, { "epoch": 2.4345317591751696, "grad_norm": 28.123178482055664, "learning_rate": 9.507031014565681e-07, "loss": 0.0278, "step": 227860 }, { "epoch": 2.434638602489449, "grad_norm": 0.1490749567747116, "learning_rate": 9.506958268597078e-07, "loss": 0.0479, "step": 227870 }, { "epoch": 2.434745445803729, "grad_norm": 2.733607053756714, "learning_rate": 9.506885517539779e-07, "loss": 0.004, "step": 227880 }, { "epoch": 2.4348522891180084, "grad_norm": 0.416156530380249, "learning_rate": 9.506812761393868e-07, "loss": 0.0389, "step": 227890 }, { "epoch": 2.434959132432288, "grad_norm": 0.07481574267148972, "learning_rate": 9.506740000159422e-07, "loss": 0.0055, "step": 227900 }, { "epoch": 2.435065975746568, "grad_norm": 0.033779632300138474, "learning_rate": 9.506667233836528e-07, "loss": 0.0058, "step": 227910 }, { "epoch": 2.4351728190608473, "grad_norm": 0.635235607624054, "learning_rate": 9.506594462425267e-07, "loss": 0.0046, "step": 227920 }, { "epoch": 2.4352796623751267, "grad_norm": 7.172227382659912, "learning_rate": 9.506521685925721e-07, "loss": 0.0182, "step": 227930 }, { "epoch": 2.4353865056894066, "grad_norm": 0.715286374092102, "learning_rate": 9.506448904337971e-07, "loss": 0.0042, "step": 227940 }, { "epoch": 2.435493349003686, "grad_norm": 4.8395466804504395, "learning_rate": 9.5063761176621e-07, "loss": 0.0915, "step": 227950 }, { "epoch": 2.4356001923179655, "grad_norm": 0.40961387753486633, "learning_rate": 9.506303325898191e-07, "loss": 0.0287, "step": 227960 }, { "epoch": 2.4357070356322454, "grad_norm": 0.013117078691720963, "learning_rate": 9.506230529046324e-07, "loss": 0.017, "step": 227970 }, { "epoch": 2.435813878946525, "grad_norm": 0.07384829968214035, "learning_rate": 9.506157727106583e-07, "loss": 0.0017, "step": 227980 }, { "epoch": 2.4359207222608044, "grad_norm": 0.005662424955517054, "learning_rate": 9.506084920079052e-07, "loss": 0.0111, "step": 227990 }, { "epoch": 2.4360275655750843, "grad_norm": 15.053369522094727, "learning_rate": 9.506012107963808e-07, "loss": 0.0411, "step": 228000 }, { "epoch": 2.4361344088893637, "grad_norm": 0.01408544834703207, "learning_rate": 9.505939290760938e-07, "loss": 0.0057, "step": 228010 }, { "epoch": 2.436241252203643, "grad_norm": 0.017641581594944, "learning_rate": 9.505866468470522e-07, "loss": 0.0112, "step": 228020 }, { "epoch": 2.436348095517923, "grad_norm": 0.021420082077383995, "learning_rate": 9.505793641092643e-07, "loss": 0.0098, "step": 228030 }, { "epoch": 2.4364549388322025, "grad_norm": 0.7603364586830139, "learning_rate": 9.505720808627382e-07, "loss": 0.0027, "step": 228040 }, { "epoch": 2.436561782146482, "grad_norm": 9.640793800354004, "learning_rate": 9.505647971074824e-07, "loss": 0.0252, "step": 228050 }, { "epoch": 2.436668625460762, "grad_norm": 0.01489943079650402, "learning_rate": 9.505575128435048e-07, "loss": 0.0266, "step": 228060 }, { "epoch": 2.4367754687750414, "grad_norm": 2.6536622047424316, "learning_rate": 9.505502280708138e-07, "loss": 0.0473, "step": 228070 }, { "epoch": 2.436882312089321, "grad_norm": 3.332239866256714, "learning_rate": 9.505429427894176e-07, "loss": 0.0072, "step": 228080 }, { "epoch": 2.4369891554036007, "grad_norm": 6.913987159729004, "learning_rate": 9.505356569993244e-07, "loss": 0.1038, "step": 228090 }, { "epoch": 2.43709599871788, "grad_norm": 2.1666088104248047, "learning_rate": 9.505283707005425e-07, "loss": 0.0414, "step": 228100 }, { "epoch": 2.4372028420321596, "grad_norm": 0.02544574998319149, "learning_rate": 9.5052108389308e-07, "loss": 0.0184, "step": 228110 }, { "epoch": 2.4373096853464395, "grad_norm": 1.8938941955566406, "learning_rate": 9.505137965769451e-07, "loss": 0.0115, "step": 228120 }, { "epoch": 2.437416528660719, "grad_norm": 0.043081942945718765, "learning_rate": 9.505065087521462e-07, "loss": 0.0135, "step": 228130 }, { "epoch": 2.437523371974999, "grad_norm": 0.005951525177806616, "learning_rate": 9.504992204186914e-07, "loss": 0.0524, "step": 228140 }, { "epoch": 2.4376302152892784, "grad_norm": 0.03668392077088356, "learning_rate": 9.504919315765891e-07, "loss": 0.0076, "step": 228150 }, { "epoch": 2.437737058603558, "grad_norm": 2.1755475997924805, "learning_rate": 9.504846422258475e-07, "loss": 0.0377, "step": 228160 }, { "epoch": 2.4378439019178373, "grad_norm": 0.03734998404979706, "learning_rate": 9.504773523664744e-07, "loss": 0.0104, "step": 228170 }, { "epoch": 2.437950745232117, "grad_norm": 16.652864456176758, "learning_rate": 9.504700619984787e-07, "loss": 0.0087, "step": 228180 }, { "epoch": 2.4380575885463966, "grad_norm": 5.949414253234863, "learning_rate": 9.504627711218681e-07, "loss": 0.0148, "step": 228190 }, { "epoch": 2.4381644318606766, "grad_norm": 2.997635841369629, "learning_rate": 9.504554797366511e-07, "loss": 0.0135, "step": 228200 }, { "epoch": 2.438271275174956, "grad_norm": 0.019121967256069183, "learning_rate": 9.504481878428358e-07, "loss": 0.0105, "step": 228210 }, { "epoch": 2.4383781184892355, "grad_norm": 5.141668319702148, "learning_rate": 9.504408954404306e-07, "loss": 0.0128, "step": 228220 }, { "epoch": 2.438484961803515, "grad_norm": 0.08169066160917282, "learning_rate": 9.504336025294436e-07, "loss": 0.0165, "step": 228230 }, { "epoch": 2.438591805117795, "grad_norm": 1.9028279781341553, "learning_rate": 9.504263091098831e-07, "loss": 0.0173, "step": 228240 }, { "epoch": 2.4386986484320743, "grad_norm": 0.6214151978492737, "learning_rate": 9.504190151817573e-07, "loss": 0.0232, "step": 228250 }, { "epoch": 2.438805491746354, "grad_norm": 3.038144826889038, "learning_rate": 9.504117207450743e-07, "loss": 0.0073, "step": 228260 }, { "epoch": 2.4389123350606337, "grad_norm": 0.3729651868343353, "learning_rate": 9.504044257998426e-07, "loss": 0.0147, "step": 228270 }, { "epoch": 2.439019178374913, "grad_norm": 0.09016840904951096, "learning_rate": 9.503971303460702e-07, "loss": 0.0211, "step": 228280 }, { "epoch": 2.4391260216891926, "grad_norm": 0.22222648561000824, "learning_rate": 9.503898343837656e-07, "loss": 0.0285, "step": 228290 }, { "epoch": 2.4392328650034725, "grad_norm": 0.037194788455963135, "learning_rate": 9.503825379129369e-07, "loss": 0.0204, "step": 228300 }, { "epoch": 2.439339708317752, "grad_norm": 4.271047115325928, "learning_rate": 9.503752409335921e-07, "loss": 0.0205, "step": 228310 }, { "epoch": 2.439446551632032, "grad_norm": 0.008526756428182125, "learning_rate": 9.503679434457399e-07, "loss": 0.103, "step": 228320 }, { "epoch": 2.4395533949463113, "grad_norm": 0.6832947731018066, "learning_rate": 9.503606454493883e-07, "loss": 0.0127, "step": 228330 }, { "epoch": 2.4396602382605908, "grad_norm": 7.666962623596191, "learning_rate": 9.503533469445453e-07, "loss": 0.0201, "step": 228340 }, { "epoch": 2.43976708157487, "grad_norm": 0.2586851119995117, "learning_rate": 9.503460479312195e-07, "loss": 0.0155, "step": 228350 }, { "epoch": 2.43987392488915, "grad_norm": 0.25198888778686523, "learning_rate": 9.503387484094191e-07, "loss": 0.0094, "step": 228360 }, { "epoch": 2.4399807682034296, "grad_norm": 0.9580896496772766, "learning_rate": 9.503314483791522e-07, "loss": 0.0124, "step": 228370 }, { "epoch": 2.4400876115177095, "grad_norm": 2.7848236560821533, "learning_rate": 9.503241478404271e-07, "loss": 0.007, "step": 228380 }, { "epoch": 2.440194454831989, "grad_norm": 0.004355730954557657, "learning_rate": 9.503168467932521e-07, "loss": 0.0341, "step": 228390 }, { "epoch": 2.4403012981462684, "grad_norm": 0.007770901545882225, "learning_rate": 9.503095452376353e-07, "loss": 0.009, "step": 228400 }, { "epoch": 2.4404081414605483, "grad_norm": 9.33690357208252, "learning_rate": 9.503022431735852e-07, "loss": 0.0289, "step": 228410 }, { "epoch": 2.4405149847748278, "grad_norm": 10.10218334197998, "learning_rate": 9.502949406011097e-07, "loss": 0.1037, "step": 228420 }, { "epoch": 2.440621828089107, "grad_norm": 0.02205755189061165, "learning_rate": 9.502876375202173e-07, "loss": 0.0148, "step": 228430 }, { "epoch": 2.440728671403387, "grad_norm": 0.010767042636871338, "learning_rate": 9.502803339309163e-07, "loss": 0.0225, "step": 228440 }, { "epoch": 2.4408355147176666, "grad_norm": 0.11035019159317017, "learning_rate": 9.502730298332147e-07, "loss": 0.0257, "step": 228450 }, { "epoch": 2.440942358031946, "grad_norm": 0.04929396137595177, "learning_rate": 9.502657252271208e-07, "loss": 0.0103, "step": 228460 }, { "epoch": 2.441049201346226, "grad_norm": 0.5508080720901489, "learning_rate": 9.502584201126429e-07, "loss": 0.008, "step": 228470 }, { "epoch": 2.4411560446605054, "grad_norm": 5.987053871154785, "learning_rate": 9.502511144897893e-07, "loss": 0.0145, "step": 228480 }, { "epoch": 2.441262887974785, "grad_norm": 6.327090263366699, "learning_rate": 9.502438083585683e-07, "loss": 0.0164, "step": 228490 }, { "epoch": 2.4413697312890648, "grad_norm": 5.266936779022217, "learning_rate": 9.502365017189879e-07, "loss": 0.0224, "step": 228500 }, { "epoch": 2.441476574603344, "grad_norm": 8.1462984085083, "learning_rate": 9.502291945710567e-07, "loss": 0.029, "step": 228510 }, { "epoch": 2.4415834179176237, "grad_norm": 2.420987844467163, "learning_rate": 9.502218869147827e-07, "loss": 0.0265, "step": 228520 }, { "epoch": 2.4416902612319036, "grad_norm": 1.3939234018325806, "learning_rate": 9.502145787501742e-07, "loss": 0.0216, "step": 228530 }, { "epoch": 2.441797104546183, "grad_norm": 5.699890613555908, "learning_rate": 9.502072700772395e-07, "loss": 0.0414, "step": 228540 }, { "epoch": 2.4419039478604625, "grad_norm": 0.06373502314090729, "learning_rate": 9.501999608959868e-07, "loss": 0.0103, "step": 228550 }, { "epoch": 2.4420107911747424, "grad_norm": 1.1692181825637817, "learning_rate": 9.501926512064242e-07, "loss": 0.0178, "step": 228560 }, { "epoch": 2.442117634489022, "grad_norm": 0.013694890774786472, "learning_rate": 9.501853410085603e-07, "loss": 0.0277, "step": 228570 }, { "epoch": 2.4422244778033013, "grad_norm": 0.9085254669189453, "learning_rate": 9.501780303024032e-07, "loss": 0.0443, "step": 228580 }, { "epoch": 2.4423313211175812, "grad_norm": 6.719690799713135, "learning_rate": 9.501707190879609e-07, "loss": 0.0174, "step": 228590 }, { "epoch": 2.4424381644318607, "grad_norm": 2.7682547569274902, "learning_rate": 9.50163407365242e-07, "loss": 0.0264, "step": 228600 }, { "epoch": 2.44254500774614, "grad_norm": 0.438113272190094, "learning_rate": 9.501560951342546e-07, "loss": 0.0097, "step": 228610 }, { "epoch": 2.44265185106042, "grad_norm": 0.06585446745157242, "learning_rate": 9.501487823950072e-07, "loss": 0.0034, "step": 228620 }, { "epoch": 2.4427586943746995, "grad_norm": 2.530404806137085, "learning_rate": 9.501414691475077e-07, "loss": 0.063, "step": 228630 }, { "epoch": 2.442865537688979, "grad_norm": 4.959028244018555, "learning_rate": 9.501341553917645e-07, "loss": 0.0191, "step": 228640 }, { "epoch": 2.442972381003259, "grad_norm": 9.892982482910156, "learning_rate": 9.501268411277858e-07, "loss": 0.0303, "step": 228650 }, { "epoch": 2.4430792243175383, "grad_norm": 8.491512298583984, "learning_rate": 9.501195263555801e-07, "loss": 0.0562, "step": 228660 }, { "epoch": 2.443186067631818, "grad_norm": 0.04214049130678177, "learning_rate": 9.501122110751554e-07, "loss": 0.0254, "step": 228670 }, { "epoch": 2.4432929109460977, "grad_norm": 4.197152614593506, "learning_rate": 9.5010489528652e-07, "loss": 0.0702, "step": 228680 }, { "epoch": 2.443399754260377, "grad_norm": 1.7851123809814453, "learning_rate": 9.500975789896822e-07, "loss": 0.03, "step": 228690 }, { "epoch": 2.4435065975746566, "grad_norm": 1.8310902118682861, "learning_rate": 9.500902621846503e-07, "loss": 0.0062, "step": 228700 }, { "epoch": 2.4436134408889365, "grad_norm": 1.7774523496627808, "learning_rate": 9.500829448714325e-07, "loss": 0.0135, "step": 228710 }, { "epoch": 2.443720284203216, "grad_norm": 3.284594774246216, "learning_rate": 9.500756270500372e-07, "loss": 0.0316, "step": 228720 }, { "epoch": 2.4438271275174954, "grad_norm": 0.039834339171648026, "learning_rate": 9.500683087204724e-07, "loss": 0.0167, "step": 228730 }, { "epoch": 2.4439339708317753, "grad_norm": 0.10670096427202225, "learning_rate": 9.500609898827466e-07, "loss": 0.0341, "step": 228740 }, { "epoch": 2.444040814146055, "grad_norm": 2.731555700302124, "learning_rate": 9.500536705368679e-07, "loss": 0.0554, "step": 228750 }, { "epoch": 2.4441476574603342, "grad_norm": 0.3981081545352936, "learning_rate": 9.500463506828446e-07, "loss": 0.0084, "step": 228760 }, { "epoch": 2.444254500774614, "grad_norm": 2.846743583679199, "learning_rate": 9.500390303206851e-07, "loss": 0.0116, "step": 228770 }, { "epoch": 2.4443613440888936, "grad_norm": 0.09528260678052902, "learning_rate": 9.500317094503975e-07, "loss": 0.034, "step": 228780 }, { "epoch": 2.444468187403173, "grad_norm": 1.3673732280731201, "learning_rate": 9.500243880719902e-07, "loss": 0.0278, "step": 228790 }, { "epoch": 2.444575030717453, "grad_norm": 5.823822975158691, "learning_rate": 9.500170661854714e-07, "loss": 0.0126, "step": 228800 }, { "epoch": 2.4446818740317324, "grad_norm": 0.004005414433777332, "learning_rate": 9.500097437908495e-07, "loss": 0.0116, "step": 228810 }, { "epoch": 2.444788717346012, "grad_norm": 10.126110076904297, "learning_rate": 9.500024208881325e-07, "loss": 0.0118, "step": 228820 }, { "epoch": 2.444895560660292, "grad_norm": 0.5720105171203613, "learning_rate": 9.499950974773288e-07, "loss": 0.0174, "step": 228830 }, { "epoch": 2.4450024039745712, "grad_norm": 7.079211235046387, "learning_rate": 9.499877735584465e-07, "loss": 0.0219, "step": 228840 }, { "epoch": 2.4451092472888507, "grad_norm": 7.135664939880371, "learning_rate": 9.499804491314943e-07, "loss": 0.0236, "step": 228850 }, { "epoch": 2.4452160906031306, "grad_norm": 1.3862336874008179, "learning_rate": 9.499731241964801e-07, "loss": 0.043, "step": 228860 }, { "epoch": 2.44532293391741, "grad_norm": 4.819483280181885, "learning_rate": 9.499657987534123e-07, "loss": 0.0183, "step": 228870 }, { "epoch": 2.4454297772316895, "grad_norm": 0.050312455743551254, "learning_rate": 9.499584728022992e-07, "loss": 0.0036, "step": 228880 }, { "epoch": 2.4455366205459694, "grad_norm": 0.03616158291697502, "learning_rate": 9.499511463431489e-07, "loss": 0.017, "step": 228890 }, { "epoch": 2.445643463860249, "grad_norm": 4.261142253875732, "learning_rate": 9.499438193759699e-07, "loss": 0.0107, "step": 228900 }, { "epoch": 2.445750307174529, "grad_norm": 0.05668116733431816, "learning_rate": 9.499364919007704e-07, "loss": 0.0184, "step": 228910 }, { "epoch": 2.4458571504888083, "grad_norm": 2.1776254177093506, "learning_rate": 9.499291639175585e-07, "loss": 0.0161, "step": 228920 }, { "epoch": 2.4459639938030877, "grad_norm": 6.080855846405029, "learning_rate": 9.499218354263427e-07, "loss": 0.0438, "step": 228930 }, { "epoch": 2.446070837117367, "grad_norm": 2.9324557781219482, "learning_rate": 9.499145064271312e-07, "loss": 0.0278, "step": 228940 }, { "epoch": 2.446177680431647, "grad_norm": 6.248112678527832, "learning_rate": 9.499071769199323e-07, "loss": 0.0049, "step": 228950 }, { "epoch": 2.4462845237459265, "grad_norm": 0.17964990437030792, "learning_rate": 9.498998469047541e-07, "loss": 0.0181, "step": 228960 }, { "epoch": 2.4463913670602064, "grad_norm": Infinity, "learning_rate": 9.498925163816051e-07, "loss": 0.1199, "step": 228970 }, { "epoch": 2.446498210374486, "grad_norm": 5.125726699829102, "learning_rate": 9.498851853504937e-07, "loss": 0.1432, "step": 228980 }, { "epoch": 2.4466050536887654, "grad_norm": 2.958779811859131, "learning_rate": 9.498778538114277e-07, "loss": 0.078, "step": 228990 }, { "epoch": 2.446711897003045, "grad_norm": 3.55779767036438, "learning_rate": 9.498705217644157e-07, "loss": 0.0145, "step": 229000 }, { "epoch": 2.4468187403173247, "grad_norm": 0.6804548501968384, "learning_rate": 9.498631892094659e-07, "loss": 0.0182, "step": 229010 }, { "epoch": 2.446925583631604, "grad_norm": 5.061739921569824, "learning_rate": 9.498558561465868e-07, "loss": 0.0224, "step": 229020 }, { "epoch": 2.447032426945884, "grad_norm": 0.16642530262470245, "learning_rate": 9.498485225757862e-07, "loss": 0.0492, "step": 229030 }, { "epoch": 2.4471392702601635, "grad_norm": 0.17204399406909943, "learning_rate": 9.498411884970729e-07, "loss": 0.0591, "step": 229040 }, { "epoch": 2.447246113574443, "grad_norm": 9.270258903503418, "learning_rate": 9.498338539104549e-07, "loss": 0.0151, "step": 229050 }, { "epoch": 2.4473529568887225, "grad_norm": 0.15853504836559296, "learning_rate": 9.498265188159404e-07, "loss": 0.0063, "step": 229060 }, { "epoch": 2.4474598002030024, "grad_norm": 2.7498462200164795, "learning_rate": 9.498191832135381e-07, "loss": 0.0377, "step": 229070 }, { "epoch": 2.447566643517282, "grad_norm": 4.339678764343262, "learning_rate": 9.498118471032556e-07, "loss": 0.0576, "step": 229080 }, { "epoch": 2.4476734868315617, "grad_norm": 0.16311152279376984, "learning_rate": 9.498045104851017e-07, "loss": 0.032, "step": 229090 }, { "epoch": 2.447780330145841, "grad_norm": 0.788221538066864, "learning_rate": 9.497971733590848e-07, "loss": 0.0163, "step": 229100 }, { "epoch": 2.4478871734601206, "grad_norm": 0.03777696564793587, "learning_rate": 9.497898357252127e-07, "loss": 0.0335, "step": 229110 }, { "epoch": 2.4479940167744, "grad_norm": 2.113039255142212, "learning_rate": 9.49782497583494e-07, "loss": 0.01, "step": 229120 }, { "epoch": 2.44810086008868, "grad_norm": 0.9804072380065918, "learning_rate": 9.497751589339369e-07, "loss": 0.0134, "step": 229130 }, { "epoch": 2.4482077034029595, "grad_norm": 1.3165428638458252, "learning_rate": 9.497678197765498e-07, "loss": 0.0332, "step": 229140 }, { "epoch": 2.4483145467172394, "grad_norm": 0.6497477889060974, "learning_rate": 9.497604801113408e-07, "loss": 0.0296, "step": 229150 }, { "epoch": 2.448421390031519, "grad_norm": 1.5610325336456299, "learning_rate": 9.497531399383183e-07, "loss": 0.0085, "step": 229160 }, { "epoch": 2.4485282333457983, "grad_norm": 1.152310848236084, "learning_rate": 9.497457992574904e-07, "loss": 0.0164, "step": 229170 }, { "epoch": 2.448635076660078, "grad_norm": 7.697388172149658, "learning_rate": 9.497384580688657e-07, "loss": 0.0395, "step": 229180 }, { "epoch": 2.4487419199743576, "grad_norm": 0.8148209452629089, "learning_rate": 9.497311163724525e-07, "loss": 0.0129, "step": 229190 }, { "epoch": 2.448848763288637, "grad_norm": 14.153014183044434, "learning_rate": 9.497237741682587e-07, "loss": 0.038, "step": 229200 }, { "epoch": 2.448955606602917, "grad_norm": 0.052157673984766006, "learning_rate": 9.497164314562929e-07, "loss": 0.0124, "step": 229210 }, { "epoch": 2.4490624499171965, "grad_norm": 0.12733273208141327, "learning_rate": 9.497090882365633e-07, "loss": 0.0299, "step": 229220 }, { "epoch": 2.449169293231476, "grad_norm": 0.5342348217964172, "learning_rate": 9.49701744509078e-07, "loss": 0.0039, "step": 229230 }, { "epoch": 2.449276136545756, "grad_norm": 3.561803102493286, "learning_rate": 9.496944002738458e-07, "loss": 0.0109, "step": 229240 }, { "epoch": 2.4493829798600353, "grad_norm": 4.4341583251953125, "learning_rate": 9.496870555308744e-07, "loss": 0.0285, "step": 229250 }, { "epoch": 2.4494898231743147, "grad_norm": 1.1124024391174316, "learning_rate": 9.496797102801725e-07, "loss": 0.005, "step": 229260 }, { "epoch": 2.4495966664885946, "grad_norm": 5.119101524353027, "learning_rate": 9.496723645217484e-07, "loss": 0.0376, "step": 229270 }, { "epoch": 2.449703509802874, "grad_norm": 0.08322864025831223, "learning_rate": 9.496650182556101e-07, "loss": 0.0008, "step": 229280 }, { "epoch": 2.4498103531171536, "grad_norm": 0.10593587905168533, "learning_rate": 9.496576714817662e-07, "loss": 0.0162, "step": 229290 }, { "epoch": 2.4499171964314335, "grad_norm": 0.010752161033451557, "learning_rate": 9.496503242002247e-07, "loss": 0.0252, "step": 229300 }, { "epoch": 2.450024039745713, "grad_norm": 5.316885471343994, "learning_rate": 9.496429764109941e-07, "loss": 0.02, "step": 229310 }, { "epoch": 2.4501308830599924, "grad_norm": 0.2784944176673889, "learning_rate": 9.496356281140826e-07, "loss": 0.0062, "step": 229320 }, { "epoch": 2.4502377263742723, "grad_norm": 0.024107320234179497, "learning_rate": 9.496282793094986e-07, "loss": 0.057, "step": 229330 }, { "epoch": 2.4503445696885517, "grad_norm": 0.17472411692142487, "learning_rate": 9.496209299972503e-07, "loss": 0.0111, "step": 229340 }, { "epoch": 2.450451413002831, "grad_norm": 1.1305344104766846, "learning_rate": 9.496135801773462e-07, "loss": 0.0632, "step": 229350 }, { "epoch": 2.450558256317111, "grad_norm": 1.6416642665863037, "learning_rate": 9.496062298497942e-07, "loss": 0.0427, "step": 229360 }, { "epoch": 2.4506650996313906, "grad_norm": 4.3096113204956055, "learning_rate": 9.495988790146029e-07, "loss": 0.0393, "step": 229370 }, { "epoch": 2.45077194294567, "grad_norm": 4.037638187408447, "learning_rate": 9.495915276717805e-07, "loss": 0.0376, "step": 229380 }, { "epoch": 2.45087878625995, "grad_norm": 0.07420237362384796, "learning_rate": 9.495841758213354e-07, "loss": 0.0033, "step": 229390 }, { "epoch": 2.4509856295742294, "grad_norm": 15.317639350891113, "learning_rate": 9.49576823463276e-07, "loss": 0.0433, "step": 229400 }, { "epoch": 2.451092472888509, "grad_norm": 0.43546026945114136, "learning_rate": 9.495694705976102e-07, "loss": 0.0083, "step": 229410 }, { "epoch": 2.4511993162027887, "grad_norm": 2.0498766899108887, "learning_rate": 9.495621172243466e-07, "loss": 0.0013, "step": 229420 }, { "epoch": 2.451306159517068, "grad_norm": 3.45298171043396, "learning_rate": 9.495547633434933e-07, "loss": 0.0077, "step": 229430 }, { "epoch": 2.4514130028313477, "grad_norm": 1.110865831375122, "learning_rate": 9.49547408955059e-07, "loss": 0.003, "step": 229440 }, { "epoch": 2.4515198461456276, "grad_norm": 5.6537017822265625, "learning_rate": 9.495400540590516e-07, "loss": 0.0462, "step": 229450 }, { "epoch": 2.451626689459907, "grad_norm": 0.4411069452762604, "learning_rate": 9.495326986554796e-07, "loss": 0.0157, "step": 229460 }, { "epoch": 2.4517335327741865, "grad_norm": 0.08989591896533966, "learning_rate": 9.495253427443513e-07, "loss": 0.0327, "step": 229470 }, { "epoch": 2.4518403760884664, "grad_norm": 2.7677009105682373, "learning_rate": 9.495179863256748e-07, "loss": 0.033, "step": 229480 }, { "epoch": 2.451947219402746, "grad_norm": 0.5994551777839661, "learning_rate": 9.495106293994587e-07, "loss": 0.0218, "step": 229490 }, { "epoch": 2.4520540627170253, "grad_norm": 0.02660396695137024, "learning_rate": 9.495032719657111e-07, "loss": 0.0126, "step": 229500 }, { "epoch": 2.452160906031305, "grad_norm": 0.05216841772198677, "learning_rate": 9.494959140244404e-07, "loss": 0.0308, "step": 229510 }, { "epoch": 2.4522677493455847, "grad_norm": 1.073715329170227, "learning_rate": 9.494885555756549e-07, "loss": 0.0103, "step": 229520 }, { "epoch": 2.452374592659864, "grad_norm": 0.011635726317763329, "learning_rate": 9.49481196619363e-07, "loss": 0.0106, "step": 229530 }, { "epoch": 2.452481435974144, "grad_norm": 0.0450996533036232, "learning_rate": 9.494738371555727e-07, "loss": 0.0635, "step": 229540 }, { "epoch": 2.4525882792884235, "grad_norm": 5.034013748168945, "learning_rate": 9.494664771842926e-07, "loss": 0.0121, "step": 229550 }, { "epoch": 2.452695122602703, "grad_norm": 1.3519227504730225, "learning_rate": 9.49459116705531e-07, "loss": 0.0337, "step": 229560 }, { "epoch": 2.452801965916983, "grad_norm": 0.11351342499256134, "learning_rate": 9.49451755719296e-07, "loss": 0.0186, "step": 229570 }, { "epoch": 2.4529088092312623, "grad_norm": 0.18041890859603882, "learning_rate": 9.494443942255963e-07, "loss": 0.0045, "step": 229580 }, { "epoch": 2.4530156525455418, "grad_norm": 1.899229884147644, "learning_rate": 9.494370322244397e-07, "loss": 0.0129, "step": 229590 }, { "epoch": 2.4531224958598217, "grad_norm": 2.8879740238189697, "learning_rate": 9.494296697158349e-07, "loss": 0.0103, "step": 229600 }, { "epoch": 2.453229339174101, "grad_norm": 3.5393521785736084, "learning_rate": 9.494223066997899e-07, "loss": 0.0154, "step": 229610 }, { "epoch": 2.453336182488381, "grad_norm": 2.0112690925598145, "learning_rate": 9.494149431763135e-07, "loss": 0.0196, "step": 229620 }, { "epoch": 2.4534430258026605, "grad_norm": 0.04419073462486267, "learning_rate": 9.494075791454135e-07, "loss": 0.0141, "step": 229630 }, { "epoch": 2.45354986911694, "grad_norm": 0.03913341462612152, "learning_rate": 9.494002146070985e-07, "loss": 0.0246, "step": 229640 }, { "epoch": 2.4536567124312194, "grad_norm": 0.053083132952451706, "learning_rate": 9.493928495613767e-07, "loss": 0.0103, "step": 229650 }, { "epoch": 2.4537635557454993, "grad_norm": 2.578371286392212, "learning_rate": 9.493854840082565e-07, "loss": 0.0068, "step": 229660 }, { "epoch": 2.4538703990597788, "grad_norm": 3.7577784061431885, "learning_rate": 9.49378117947746e-07, "loss": 0.028, "step": 229670 }, { "epoch": 2.4539772423740587, "grad_norm": 0.8636646866798401, "learning_rate": 9.493707513798539e-07, "loss": 0.0129, "step": 229680 }, { "epoch": 2.454084085688338, "grad_norm": 1.279294490814209, "learning_rate": 9.493633843045883e-07, "loss": 0.0198, "step": 229690 }, { "epoch": 2.4541909290026176, "grad_norm": 3.349653959274292, "learning_rate": 9.493560167219574e-07, "loss": 0.0169, "step": 229700 }, { "epoch": 2.454297772316897, "grad_norm": 0.2642076909542084, "learning_rate": 9.493486486319697e-07, "loss": 0.01, "step": 229710 }, { "epoch": 2.454404615631177, "grad_norm": 0.2607129216194153, "learning_rate": 9.493412800346335e-07, "loss": 0.0311, "step": 229720 }, { "epoch": 2.4545114589454564, "grad_norm": 0.6156010627746582, "learning_rate": 9.49333910929957e-07, "loss": 0.0418, "step": 229730 }, { "epoch": 2.4546183022597363, "grad_norm": 7.804010391235352, "learning_rate": 9.493265413179485e-07, "loss": 0.0139, "step": 229740 }, { "epoch": 2.4547251455740158, "grad_norm": 0.13914644718170166, "learning_rate": 9.493191711986166e-07, "loss": 0.015, "step": 229750 }, { "epoch": 2.4548319888882952, "grad_norm": 11.259211540222168, "learning_rate": 9.493118005719695e-07, "loss": 0.0127, "step": 229760 }, { "epoch": 2.4549388322025747, "grad_norm": 0.23608125746250153, "learning_rate": 9.493044294380154e-07, "loss": 0.0179, "step": 229770 }, { "epoch": 2.4550456755168546, "grad_norm": 20.565624237060547, "learning_rate": 9.492970577967626e-07, "loss": 0.0519, "step": 229780 }, { "epoch": 2.455152518831134, "grad_norm": 2.5050313472747803, "learning_rate": 9.492896856482195e-07, "loss": 0.0203, "step": 229790 }, { "epoch": 2.455259362145414, "grad_norm": 1.17856764793396, "learning_rate": 9.492823129923946e-07, "loss": 0.0347, "step": 229800 }, { "epoch": 2.4553662054596934, "grad_norm": 0.19497987627983093, "learning_rate": 9.49274939829296e-07, "loss": 0.0093, "step": 229810 }, { "epoch": 2.455473048773973, "grad_norm": 0.11885940283536911, "learning_rate": 9.49267566158932e-07, "loss": 0.0365, "step": 229820 }, { "epoch": 2.4555798920882523, "grad_norm": 0.8106920719146729, "learning_rate": 9.49260191981311e-07, "loss": 0.0115, "step": 229830 }, { "epoch": 2.4556867354025322, "grad_norm": 2.3634796142578125, "learning_rate": 9.492528172964415e-07, "loss": 0.0237, "step": 229840 }, { "epoch": 2.4557935787168117, "grad_norm": 0.045274797827005386, "learning_rate": 9.492454421043316e-07, "loss": 0.0594, "step": 229850 }, { "epoch": 2.4559004220310916, "grad_norm": 1.080910086631775, "learning_rate": 9.492380664049896e-07, "loss": 0.0037, "step": 229860 }, { "epoch": 2.456007265345371, "grad_norm": 0.045769933611154556, "learning_rate": 9.492306901984239e-07, "loss": 0.0119, "step": 229870 }, { "epoch": 2.4561141086596505, "grad_norm": 0.16553011536598206, "learning_rate": 9.492233134846431e-07, "loss": 0.004, "step": 229880 }, { "epoch": 2.4562209519739304, "grad_norm": 4.1647725105285645, "learning_rate": 9.492159362636549e-07, "loss": 0.0218, "step": 229890 }, { "epoch": 2.45632779528821, "grad_norm": 0.15109668672084808, "learning_rate": 9.492085585354684e-07, "loss": 0.0014, "step": 229900 }, { "epoch": 2.4564346386024893, "grad_norm": 0.016614820808172226, "learning_rate": 9.492011803000912e-07, "loss": 0.0322, "step": 229910 }, { "epoch": 2.4565414819167692, "grad_norm": 0.03280014544725418, "learning_rate": 9.491938015575321e-07, "loss": 0.0131, "step": 229920 }, { "epoch": 2.4566483252310487, "grad_norm": 0.15252235531806946, "learning_rate": 9.491864223077993e-07, "loss": 0.0287, "step": 229930 }, { "epoch": 2.456755168545328, "grad_norm": 0.08097919076681137, "learning_rate": 9.491790425509012e-07, "loss": 0.0174, "step": 229940 }, { "epoch": 2.456862011859608, "grad_norm": 11.570655822753906, "learning_rate": 9.49171662286846e-07, "loss": 0.0201, "step": 229950 }, { "epoch": 2.4569688551738875, "grad_norm": 0.03582790493965149, "learning_rate": 9.49164281515642e-07, "loss": 0.0282, "step": 229960 }, { "epoch": 2.457075698488167, "grad_norm": 0.5837926268577576, "learning_rate": 9.491569002372976e-07, "loss": 0.035, "step": 229970 }, { "epoch": 2.457182541802447, "grad_norm": 0.1277204304933548, "learning_rate": 9.491495184518213e-07, "loss": 0.0291, "step": 229980 }, { "epoch": 2.4572893851167263, "grad_norm": 0.01731119677424431, "learning_rate": 9.491421361592213e-07, "loss": 0.0097, "step": 229990 }, { "epoch": 2.457396228431006, "grad_norm": 0.011976604349911213, "learning_rate": 9.491347533595059e-07, "loss": 0.0387, "step": 230000 }, { "epoch": 2.4575030717452857, "grad_norm": 2.941734552383423, "learning_rate": 9.491273700526833e-07, "loss": 0.0521, "step": 230010 }, { "epoch": 2.457609915059565, "grad_norm": 2.2258434295654297, "learning_rate": 9.491199862387622e-07, "loss": 0.0116, "step": 230020 }, { "epoch": 2.4577167583738446, "grad_norm": 1.1047608852386475, "learning_rate": 9.491126019177508e-07, "loss": 0.0025, "step": 230030 }, { "epoch": 2.4578236016881245, "grad_norm": 0.020682726055383682, "learning_rate": 9.491052170896572e-07, "loss": 0.0189, "step": 230040 }, { "epoch": 2.457930445002404, "grad_norm": 4.971097946166992, "learning_rate": 9.4909783175449e-07, "loss": 0.0182, "step": 230050 }, { "epoch": 2.4580372883166834, "grad_norm": 0.12249960750341415, "learning_rate": 9.490904459122575e-07, "loss": 0.0151, "step": 230060 }, { "epoch": 2.4581441316309633, "grad_norm": 0.04032537713646889, "learning_rate": 9.490830595629679e-07, "loss": 0.0087, "step": 230070 }, { "epoch": 2.458250974945243, "grad_norm": 0.08898001164197922, "learning_rate": 9.490756727066296e-07, "loss": 0.0167, "step": 230080 }, { "epoch": 2.4583578182595223, "grad_norm": 1.3127753734588623, "learning_rate": 9.49068285343251e-07, "loss": 0.0219, "step": 230090 }, { "epoch": 2.458464661573802, "grad_norm": 8.169836044311523, "learning_rate": 9.490608974728404e-07, "loss": 0.0553, "step": 230100 }, { "epoch": 2.4585715048880816, "grad_norm": 0.06350936740636826, "learning_rate": 9.490535090954063e-07, "loss": 0.0059, "step": 230110 }, { "epoch": 2.458678348202361, "grad_norm": 0.6570018529891968, "learning_rate": 9.490461202109568e-07, "loss": 0.0544, "step": 230120 }, { "epoch": 2.458785191516641, "grad_norm": 4.494363784790039, "learning_rate": 9.490387308195003e-07, "loss": 0.0248, "step": 230130 }, { "epoch": 2.4588920348309204, "grad_norm": 0.6208198666572571, "learning_rate": 9.490313409210452e-07, "loss": 0.0013, "step": 230140 }, { "epoch": 2.4589988781452, "grad_norm": 0.41330236196517944, "learning_rate": 9.490239505156e-07, "loss": 0.0167, "step": 230150 }, { "epoch": 2.45910572145948, "grad_norm": 0.02248486876487732, "learning_rate": 9.490165596031726e-07, "loss": 0.0038, "step": 230160 }, { "epoch": 2.4592125647737593, "grad_norm": 0.4569975733757019, "learning_rate": 9.490091681837718e-07, "loss": 0.0394, "step": 230170 }, { "epoch": 2.4593194080880387, "grad_norm": 0.014162273146212101, "learning_rate": 9.490017762574056e-07, "loss": 0.013, "step": 230180 }, { "epoch": 2.4594262514023186, "grad_norm": 4.717972278594971, "learning_rate": 9.489943838240826e-07, "loss": 0.0134, "step": 230190 }, { "epoch": 2.459533094716598, "grad_norm": 0.8993427753448486, "learning_rate": 9.489869908838111e-07, "loss": 0.0215, "step": 230200 }, { "epoch": 2.4596399380308775, "grad_norm": 0.03859778866171837, "learning_rate": 9.489795974365993e-07, "loss": 0.0141, "step": 230210 }, { "epoch": 2.4597467813451575, "grad_norm": 0.005001387558877468, "learning_rate": 9.489722034824557e-07, "loss": 0.0569, "step": 230220 }, { "epoch": 2.459853624659437, "grad_norm": 4.290029525756836, "learning_rate": 9.489648090213886e-07, "loss": 0.0368, "step": 230230 }, { "epoch": 2.4599604679737164, "grad_norm": 0.0526747852563858, "learning_rate": 9.489574140534063e-07, "loss": 0.0574, "step": 230240 }, { "epoch": 2.4600673112879963, "grad_norm": 4.866199493408203, "learning_rate": 9.489500185785174e-07, "loss": 0.0373, "step": 230250 }, { "epoch": 2.4601741546022757, "grad_norm": 4.97966194152832, "learning_rate": 9.489426225967298e-07, "loss": 0.0395, "step": 230260 }, { "epoch": 2.460280997916555, "grad_norm": 0.1701948493719101, "learning_rate": 9.489352261080521e-07, "loss": 0.0185, "step": 230270 }, { "epoch": 2.460387841230835, "grad_norm": 0.01804962009191513, "learning_rate": 9.489278291124927e-07, "loss": 0.0246, "step": 230280 }, { "epoch": 2.4604946845451146, "grad_norm": 2.3861300945281982, "learning_rate": 9.4892043161006e-07, "loss": 0.012, "step": 230290 }, { "epoch": 2.460601527859394, "grad_norm": 1.1956902742385864, "learning_rate": 9.489130336007621e-07, "loss": 0.0082, "step": 230300 }, { "epoch": 2.460708371173674, "grad_norm": 0.13268205523490906, "learning_rate": 9.489056350846076e-07, "loss": 0.0148, "step": 230310 }, { "epoch": 2.4608152144879534, "grad_norm": 0.02380097284913063, "learning_rate": 9.488982360616047e-07, "loss": 0.0449, "step": 230320 }, { "epoch": 2.460922057802233, "grad_norm": 0.0747743770480156, "learning_rate": 9.488908365317618e-07, "loss": 0.0083, "step": 230330 }, { "epoch": 2.4610289011165127, "grad_norm": 0.072537362575531, "learning_rate": 9.488834364950874e-07, "loss": 0.0077, "step": 230340 }, { "epoch": 2.461135744430792, "grad_norm": 0.41550347208976746, "learning_rate": 9.488760359515897e-07, "loss": 0.0212, "step": 230350 }, { "epoch": 2.4612425877450717, "grad_norm": 0.10514933615922928, "learning_rate": 9.488686349012769e-07, "loss": 0.0215, "step": 230360 }, { "epoch": 2.4613494310593516, "grad_norm": 1.6739171743392944, "learning_rate": 9.488612333441577e-07, "loss": 0.0379, "step": 230370 }, { "epoch": 2.461456274373631, "grad_norm": 0.01728512905538082, "learning_rate": 9.488538312802402e-07, "loss": 0.009, "step": 230380 }, { "epoch": 2.461563117687911, "grad_norm": 0.12688124179840088, "learning_rate": 9.488464287095329e-07, "loss": 0.0127, "step": 230390 }, { "epoch": 2.4616699610021904, "grad_norm": 6.814452171325684, "learning_rate": 9.48839025632044e-07, "loss": 0.025, "step": 230400 }, { "epoch": 2.46177680431647, "grad_norm": 0.035937368869781494, "learning_rate": 9.488316220477821e-07, "loss": 0.0072, "step": 230410 }, { "epoch": 2.4618836476307493, "grad_norm": 0.02574995905160904, "learning_rate": 9.488242179567554e-07, "loss": 0.0038, "step": 230420 }, { "epoch": 2.461990490945029, "grad_norm": 0.8298740386962891, "learning_rate": 9.488168133589723e-07, "loss": 0.0179, "step": 230430 }, { "epoch": 2.4620973342593087, "grad_norm": 0.4095936119556427, "learning_rate": 9.488094082544411e-07, "loss": 0.0057, "step": 230440 }, { "epoch": 2.4622041775735886, "grad_norm": 7.6313862800598145, "learning_rate": 9.488020026431702e-07, "loss": 0.0478, "step": 230450 }, { "epoch": 2.462311020887868, "grad_norm": 0.3268878757953644, "learning_rate": 9.487945965251681e-07, "loss": 0.0078, "step": 230460 }, { "epoch": 2.4624178642021475, "grad_norm": 8.887945175170898, "learning_rate": 9.487871899004429e-07, "loss": 0.0934, "step": 230470 }, { "epoch": 2.462524707516427, "grad_norm": 0.00839976966381073, "learning_rate": 9.48779782769003e-07, "loss": 0.0088, "step": 230480 }, { "epoch": 2.462631550830707, "grad_norm": 0.1666235625743866, "learning_rate": 9.487723751308569e-07, "loss": 0.0114, "step": 230490 }, { "epoch": 2.4627383941449863, "grad_norm": 0.009350227192044258, "learning_rate": 9.48764966986013e-07, "loss": 0.0128, "step": 230500 }, { "epoch": 2.462845237459266, "grad_norm": 0.1728488951921463, "learning_rate": 9.487575583344796e-07, "loss": 0.0405, "step": 230510 }, { "epoch": 2.4629520807735457, "grad_norm": 4.56616735458374, "learning_rate": 9.487501491762649e-07, "loss": 0.0388, "step": 230520 }, { "epoch": 2.463058924087825, "grad_norm": 0.1741347461938858, "learning_rate": 9.487427395113777e-07, "loss": 0.0304, "step": 230530 }, { "epoch": 2.4631657674021046, "grad_norm": 3.1169533729553223, "learning_rate": 9.487353293398257e-07, "loss": 0.0173, "step": 230540 }, { "epoch": 2.4632726107163845, "grad_norm": 1.379151463508606, "learning_rate": 9.487279186616179e-07, "loss": 0.0043, "step": 230550 }, { "epoch": 2.463379454030664, "grad_norm": 0.45655736327171326, "learning_rate": 9.487205074767623e-07, "loss": 0.0109, "step": 230560 }, { "epoch": 2.463486297344944, "grad_norm": 0.01409733947366476, "learning_rate": 9.487130957852675e-07, "loss": 0.0075, "step": 230570 }, { "epoch": 2.4635931406592233, "grad_norm": 0.3873476982116699, "learning_rate": 9.487056835871416e-07, "loss": 0.0068, "step": 230580 }, { "epoch": 2.4636999839735028, "grad_norm": 1.3064908981323242, "learning_rate": 9.486982708823932e-07, "loss": 0.0203, "step": 230590 }, { "epoch": 2.463806827287782, "grad_norm": 3.8844592571258545, "learning_rate": 9.486908576710306e-07, "loss": 0.0117, "step": 230600 }, { "epoch": 2.463913670602062, "grad_norm": 0.8947761654853821, "learning_rate": 9.486834439530622e-07, "loss": 0.0343, "step": 230610 }, { "epoch": 2.4640205139163416, "grad_norm": 10.22057056427002, "learning_rate": 9.486760297284963e-07, "loss": 0.0152, "step": 230620 }, { "epoch": 2.4641273572306215, "grad_norm": 0.30337947607040405, "learning_rate": 9.486686149973413e-07, "loss": 0.0071, "step": 230630 }, { "epoch": 2.464234200544901, "grad_norm": 5.7407732009887695, "learning_rate": 9.486611997596055e-07, "loss": 0.0288, "step": 230640 }, { "epoch": 2.4643410438591804, "grad_norm": 1.2286159992218018, "learning_rate": 9.486537840152973e-07, "loss": 0.0823, "step": 230650 }, { "epoch": 2.4644478871734603, "grad_norm": 0.257526695728302, "learning_rate": 9.486463677644252e-07, "loss": 0.0251, "step": 230660 }, { "epoch": 2.4645547304877398, "grad_norm": 0.4241483807563782, "learning_rate": 9.486389510069975e-07, "loss": 0.0101, "step": 230670 }, { "epoch": 2.4646615738020192, "grad_norm": 1.1728167533874512, "learning_rate": 9.486315337430226e-07, "loss": 0.0055, "step": 230680 }, { "epoch": 2.464768417116299, "grad_norm": 0.009661451913416386, "learning_rate": 9.486241159725088e-07, "loss": 0.0127, "step": 230690 }, { "epoch": 2.4648752604305786, "grad_norm": 0.07737911492586136, "learning_rate": 9.486166976954645e-07, "loss": 0.0151, "step": 230700 }, { "epoch": 2.464982103744858, "grad_norm": 4.280019283294678, "learning_rate": 9.48609278911898e-07, "loss": 0.0105, "step": 230710 }, { "epoch": 2.465088947059138, "grad_norm": 2.6955597400665283, "learning_rate": 9.48601859621818e-07, "loss": 0.0153, "step": 230720 }, { "epoch": 2.4651957903734174, "grad_norm": 0.04466328024864197, "learning_rate": 9.485944398252325e-07, "loss": 0.0192, "step": 230730 }, { "epoch": 2.465302633687697, "grad_norm": 0.6453031301498413, "learning_rate": 9.485870195221499e-07, "loss": 0.013, "step": 230740 }, { "epoch": 2.4654094770019768, "grad_norm": 4.8559112548828125, "learning_rate": 9.485795987125789e-07, "loss": 0.0354, "step": 230750 }, { "epoch": 2.4655163203162562, "grad_norm": 0.28157874941825867, "learning_rate": 9.485721773965277e-07, "loss": 0.0056, "step": 230760 }, { "epoch": 2.4656231636305357, "grad_norm": 0.527329683303833, "learning_rate": 9.485647555740044e-07, "loss": 0.0157, "step": 230770 }, { "epoch": 2.4657300069448156, "grad_norm": 3.221973419189453, "learning_rate": 9.485573332450179e-07, "loss": 0.0294, "step": 230780 }, { "epoch": 2.465836850259095, "grad_norm": 0.042586278170347214, "learning_rate": 9.485499104095762e-07, "loss": 0.0325, "step": 230790 }, { "epoch": 2.4659436935733745, "grad_norm": 6.0210981369018555, "learning_rate": 9.485424870676877e-07, "loss": 0.0193, "step": 230800 }, { "epoch": 2.4660505368876544, "grad_norm": 0.23872287571430206, "learning_rate": 9.48535063219361e-07, "loss": 0.0107, "step": 230810 }, { "epoch": 2.466157380201934, "grad_norm": 0.11936844140291214, "learning_rate": 9.485276388646044e-07, "loss": 0.0085, "step": 230820 }, { "epoch": 2.4662642235162133, "grad_norm": 0.029885364696383476, "learning_rate": 9.485202140034262e-07, "loss": 0.0489, "step": 230830 }, { "epoch": 2.4663710668304932, "grad_norm": 0.10664529353380203, "learning_rate": 9.485127886358348e-07, "loss": 0.01, "step": 230840 }, { "epoch": 2.4664779101447727, "grad_norm": 0.2225908786058426, "learning_rate": 9.485053627618387e-07, "loss": 0.0039, "step": 230850 }, { "epoch": 2.466584753459052, "grad_norm": 2.1063265800476074, "learning_rate": 9.484979363814461e-07, "loss": 0.0006, "step": 230860 }, { "epoch": 2.466691596773332, "grad_norm": 0.04630584642291069, "learning_rate": 9.484905094946654e-07, "loss": 0.0207, "step": 230870 }, { "epoch": 2.4667984400876115, "grad_norm": 0.004127265885472298, "learning_rate": 9.484830821015053e-07, "loss": 0.0041, "step": 230880 }, { "epoch": 2.466905283401891, "grad_norm": 1.3160231113433838, "learning_rate": 9.484756542019738e-07, "loss": 0.0308, "step": 230890 }, { "epoch": 2.467012126716171, "grad_norm": 0.38954320549964905, "learning_rate": 9.484682257960794e-07, "loss": 0.0035, "step": 230900 }, { "epoch": 2.4671189700304503, "grad_norm": 0.012420608662068844, "learning_rate": 9.484607968838306e-07, "loss": 0.0071, "step": 230910 }, { "epoch": 2.46722581334473, "grad_norm": 41.26258850097656, "learning_rate": 9.484533674652357e-07, "loss": 0.0221, "step": 230920 }, { "epoch": 2.4673326566590097, "grad_norm": 2.7428414821624756, "learning_rate": 9.484459375403031e-07, "loss": 0.0552, "step": 230930 }, { "epoch": 2.467439499973289, "grad_norm": 0.006666451692581177, "learning_rate": 9.484385071090412e-07, "loss": 0.0219, "step": 230940 }, { "epoch": 2.4675463432875686, "grad_norm": 0.8506773710250854, "learning_rate": 9.484310761714585e-07, "loss": 0.0129, "step": 230950 }, { "epoch": 2.4676531866018485, "grad_norm": 0.11509530246257782, "learning_rate": 9.484236447275631e-07, "loss": 0.0267, "step": 230960 }, { "epoch": 2.467760029916128, "grad_norm": 0.7974492907524109, "learning_rate": 9.484162127773636e-07, "loss": 0.0029, "step": 230970 }, { "epoch": 2.4678668732304074, "grad_norm": 7.679954528808594, "learning_rate": 9.484087803208682e-07, "loss": 0.0463, "step": 230980 }, { "epoch": 2.4679737165446873, "grad_norm": 0.3389882445335388, "learning_rate": 9.484013473580856e-07, "loss": 0.0637, "step": 230990 }, { "epoch": 2.468080559858967, "grad_norm": 0.009480161592364311, "learning_rate": 9.483939138890241e-07, "loss": 0.0112, "step": 231000 }, { "epoch": 2.4681874031732463, "grad_norm": 0.02410971373319626, "learning_rate": 9.48386479913692e-07, "loss": 0.006, "step": 231010 }, { "epoch": 2.468294246487526, "grad_norm": 15.317904472351074, "learning_rate": 9.483790454320977e-07, "loss": 0.0399, "step": 231020 }, { "epoch": 2.4684010898018056, "grad_norm": 0.02775505743920803, "learning_rate": 9.483716104442496e-07, "loss": 0.0063, "step": 231030 }, { "epoch": 2.468507933116085, "grad_norm": 0.03966113552451134, "learning_rate": 9.483641749501561e-07, "loss": 0.0512, "step": 231040 }, { "epoch": 2.468614776430365, "grad_norm": 1.0142699480056763, "learning_rate": 9.483567389498257e-07, "loss": 0.0047, "step": 231050 }, { "epoch": 2.4687216197446444, "grad_norm": 0.1863483041524887, "learning_rate": 9.483493024432665e-07, "loss": 0.024, "step": 231060 }, { "epoch": 2.468828463058924, "grad_norm": 0.007359848823398352, "learning_rate": 9.483418654304874e-07, "loss": 0.014, "step": 231070 }, { "epoch": 2.468935306373204, "grad_norm": 9.170425415039062, "learning_rate": 9.483344279114962e-07, "loss": 0.0414, "step": 231080 }, { "epoch": 2.4690421496874833, "grad_norm": 9.277295112609863, "learning_rate": 9.483269898863018e-07, "loss": 0.0039, "step": 231090 }, { "epoch": 2.469148993001763, "grad_norm": 0.5590654611587524, "learning_rate": 9.483195513549123e-07, "loss": 0.0256, "step": 231100 }, { "epoch": 2.4692558363160426, "grad_norm": 3.5961837768554688, "learning_rate": 9.483121123173361e-07, "loss": 0.0063, "step": 231110 }, { "epoch": 2.469362679630322, "grad_norm": 1.0325648784637451, "learning_rate": 9.483046727735818e-07, "loss": 0.0127, "step": 231120 }, { "epoch": 2.4694695229446015, "grad_norm": 3.037137031555176, "learning_rate": 9.482972327236577e-07, "loss": 0.0175, "step": 231130 }, { "epoch": 2.4695763662588814, "grad_norm": 11.864073753356934, "learning_rate": 9.482897921675721e-07, "loss": 0.0217, "step": 231140 }, { "epoch": 2.469683209573161, "grad_norm": 2.2546935081481934, "learning_rate": 9.482823511053337e-07, "loss": 0.0417, "step": 231150 }, { "epoch": 2.469790052887441, "grad_norm": 0.30855029821395874, "learning_rate": 9.482749095369504e-07, "loss": 0.0154, "step": 231160 }, { "epoch": 2.4698968962017203, "grad_norm": 0.6274252533912659, "learning_rate": 9.482674674624311e-07, "loss": 0.005, "step": 231170 }, { "epoch": 2.4700037395159997, "grad_norm": 0.007066136226058006, "learning_rate": 9.482600248817839e-07, "loss": 0.0203, "step": 231180 }, { "epoch": 2.470110582830279, "grad_norm": 1.5354342460632324, "learning_rate": 9.482525817950173e-07, "loss": 0.0106, "step": 231190 }, { "epoch": 2.470217426144559, "grad_norm": 0.01365737710148096, "learning_rate": 9.482451382021399e-07, "loss": 0.0072, "step": 231200 }, { "epoch": 2.4703242694588385, "grad_norm": 0.13890385627746582, "learning_rate": 9.482376941031596e-07, "loss": 0.01, "step": 231210 }, { "epoch": 2.4704311127731184, "grad_norm": 0.020814968273043633, "learning_rate": 9.482302494980854e-07, "loss": 0.0187, "step": 231220 }, { "epoch": 2.470537956087398, "grad_norm": 9.45882511138916, "learning_rate": 9.482228043869253e-07, "loss": 0.0392, "step": 231230 }, { "epoch": 2.4706447994016774, "grad_norm": 4.183202266693115, "learning_rate": 9.482153587696877e-07, "loss": 0.0113, "step": 231240 }, { "epoch": 2.470751642715957, "grad_norm": 4.963732719421387, "learning_rate": 9.482079126463813e-07, "loss": 0.0205, "step": 231250 }, { "epoch": 2.4708584860302367, "grad_norm": 1.1178195476531982, "learning_rate": 9.482004660170143e-07, "loss": 0.0218, "step": 231260 }, { "epoch": 2.470965329344516, "grad_norm": 3.676675319671631, "learning_rate": 9.481930188815951e-07, "loss": 0.0186, "step": 231270 }, { "epoch": 2.471072172658796, "grad_norm": 4.798023700714111, "learning_rate": 9.481855712401322e-07, "loss": 0.0203, "step": 231280 }, { "epoch": 2.4711790159730755, "grad_norm": 0.3594841957092285, "learning_rate": 9.48178123092634e-07, "loss": 0.0284, "step": 231290 }, { "epoch": 2.471285859287355, "grad_norm": 2.5121848583221436, "learning_rate": 9.481706744391087e-07, "loss": 0.0287, "step": 231300 }, { "epoch": 2.4713927026016345, "grad_norm": 6.070255279541016, "learning_rate": 9.48163225279565e-07, "loss": 0.0103, "step": 231310 }, { "epoch": 2.4714995459159144, "grad_norm": 0.03854256868362427, "learning_rate": 9.481557756140113e-07, "loss": 0.0209, "step": 231320 }, { "epoch": 2.471606389230194, "grad_norm": 0.2434472292661667, "learning_rate": 9.481483254424559e-07, "loss": 0.0227, "step": 231330 }, { "epoch": 2.4717132325444737, "grad_norm": 3.6454317569732666, "learning_rate": 9.48140874764907e-07, "loss": 0.0213, "step": 231340 }, { "epoch": 2.471820075858753, "grad_norm": 0.04028378799557686, "learning_rate": 9.481334235813734e-07, "loss": 0.019, "step": 231350 }, { "epoch": 2.4719269191730326, "grad_norm": 0.4612652659416199, "learning_rate": 9.481259718918633e-07, "loss": 0.0064, "step": 231360 }, { "epoch": 2.4720337624873125, "grad_norm": 0.01447259820997715, "learning_rate": 9.48118519696385e-07, "loss": 0.0395, "step": 231370 }, { "epoch": 2.472140605801592, "grad_norm": 0.009025774896144867, "learning_rate": 9.481110669949472e-07, "loss": 0.0313, "step": 231380 }, { "epoch": 2.4722474491158715, "grad_norm": 0.12914270162582397, "learning_rate": 9.481036137875584e-07, "loss": 0.0341, "step": 231390 }, { "epoch": 2.4723542924301514, "grad_norm": 0.19180411100387573, "learning_rate": 9.480961600742266e-07, "loss": 0.0584, "step": 231400 }, { "epoch": 2.472461135744431, "grad_norm": 0.307959645986557, "learning_rate": 9.480887058549603e-07, "loss": 0.0147, "step": 231410 }, { "epoch": 2.4725679790587103, "grad_norm": 1.9808977842330933, "learning_rate": 9.480812511297682e-07, "loss": 0.0176, "step": 231420 }, { "epoch": 2.47267482237299, "grad_norm": 0.018746137619018555, "learning_rate": 9.480737958986584e-07, "loss": 0.006, "step": 231430 }, { "epoch": 2.4727816656872696, "grad_norm": 0.00630542729049921, "learning_rate": 9.480663401616397e-07, "loss": 0.0192, "step": 231440 }, { "epoch": 2.472888509001549, "grad_norm": 4.095978260040283, "learning_rate": 9.4805888391872e-07, "loss": 0.0118, "step": 231450 }, { "epoch": 2.472995352315829, "grad_norm": 0.04423408582806587, "learning_rate": 9.480514271699083e-07, "loss": 0.0386, "step": 231460 }, { "epoch": 2.4731021956301085, "grad_norm": 0.03260200098156929, "learning_rate": 9.480439699152125e-07, "loss": 0.0217, "step": 231470 }, { "epoch": 2.473209038944388, "grad_norm": 0.01921841688454151, "learning_rate": 9.480365121546414e-07, "loss": 0.0156, "step": 231480 }, { "epoch": 2.473315882258668, "grad_norm": 3.111355781555176, "learning_rate": 9.480290538882032e-07, "loss": 0.0018, "step": 231490 }, { "epoch": 2.4734227255729473, "grad_norm": 9.36738395690918, "learning_rate": 9.480215951159062e-07, "loss": 0.0351, "step": 231500 }, { "epoch": 2.4735295688872267, "grad_norm": 0.04241233691573143, "learning_rate": 9.480141358377593e-07, "loss": 0.0324, "step": 231510 }, { "epoch": 2.4736364122015067, "grad_norm": 0.6762331128120422, "learning_rate": 9.480066760537705e-07, "loss": 0.019, "step": 231520 }, { "epoch": 2.473743255515786, "grad_norm": 5.346261978149414, "learning_rate": 9.479992157639484e-07, "loss": 0.0173, "step": 231530 }, { "epoch": 2.4738500988300656, "grad_norm": 6.561379909515381, "learning_rate": 9.479917549683012e-07, "loss": 0.0291, "step": 231540 }, { "epoch": 2.4739569421443455, "grad_norm": 0.3316793143749237, "learning_rate": 9.479842936668377e-07, "loss": 0.0147, "step": 231550 }, { "epoch": 2.474063785458625, "grad_norm": 4.934474945068359, "learning_rate": 9.479768318595661e-07, "loss": 0.0584, "step": 231560 }, { "epoch": 2.4741706287729044, "grad_norm": 1.7150617837905884, "learning_rate": 9.479693695464948e-07, "loss": 0.0515, "step": 231570 }, { "epoch": 2.4742774720871843, "grad_norm": 3.3483164310455322, "learning_rate": 9.479619067276322e-07, "loss": 0.0107, "step": 231580 }, { "epoch": 2.4743843154014638, "grad_norm": 0.24562427401542664, "learning_rate": 9.479544434029869e-07, "loss": 0.0084, "step": 231590 }, { "epoch": 2.474491158715743, "grad_norm": 0.31466910243034363, "learning_rate": 9.479469795725672e-07, "loss": 0.0186, "step": 231600 }, { "epoch": 2.474598002030023, "grad_norm": 2.7042627334594727, "learning_rate": 9.479395152363815e-07, "loss": 0.0106, "step": 231610 }, { "epoch": 2.4747048453443026, "grad_norm": 2.2363016605377197, "learning_rate": 9.479320503944383e-07, "loss": 0.0216, "step": 231620 }, { "epoch": 2.474811688658582, "grad_norm": 1.379371166229248, "learning_rate": 9.47924585046746e-07, "loss": 0.0205, "step": 231630 }, { "epoch": 2.474918531972862, "grad_norm": 4.955206871032715, "learning_rate": 9.47917119193313e-07, "loss": 0.0195, "step": 231640 }, { "epoch": 2.4750253752871414, "grad_norm": 4.85837984085083, "learning_rate": 9.479096528341479e-07, "loss": 0.0059, "step": 231650 }, { "epoch": 2.475132218601421, "grad_norm": 16.07514762878418, "learning_rate": 9.47902185969259e-07, "loss": 0.0425, "step": 231660 }, { "epoch": 2.4752390619157008, "grad_norm": 1.1488958597183228, "learning_rate": 9.478947185986546e-07, "loss": 0.0277, "step": 231670 }, { "epoch": 2.47534590522998, "grad_norm": 0.021386150270700455, "learning_rate": 9.478872507223433e-07, "loss": 0.0077, "step": 231680 }, { "epoch": 2.4754527485442597, "grad_norm": 0.045998554676771164, "learning_rate": 9.478797823403335e-07, "loss": 0.0091, "step": 231690 }, { "epoch": 2.4755595918585396, "grad_norm": 2.3802661895751953, "learning_rate": 9.478723134526337e-07, "loss": 0.0323, "step": 231700 }, { "epoch": 2.475666435172819, "grad_norm": 3.2818071842193604, "learning_rate": 9.478648440592521e-07, "loss": 0.0362, "step": 231710 }, { "epoch": 2.4757732784870985, "grad_norm": 4.2187089920043945, "learning_rate": 9.478573741601974e-07, "loss": 0.0247, "step": 231720 }, { "epoch": 2.4758801218013784, "grad_norm": 0.03278633952140808, "learning_rate": 9.478499037554779e-07, "loss": 0.0077, "step": 231730 }, { "epoch": 2.475986965115658, "grad_norm": 0.0035421866923570633, "learning_rate": 9.478424328451021e-07, "loss": 0.0133, "step": 231740 }, { "epoch": 2.4760938084299373, "grad_norm": 6.997837543487549, "learning_rate": 9.478349614290782e-07, "loss": 0.0273, "step": 231750 }, { "epoch": 2.476200651744217, "grad_norm": 0.009165139868855476, "learning_rate": 9.47827489507415e-07, "loss": 0.0303, "step": 231760 }, { "epoch": 2.4763074950584967, "grad_norm": 10.991759300231934, "learning_rate": 9.478200170801207e-07, "loss": 0.0231, "step": 231770 }, { "epoch": 2.476414338372776, "grad_norm": 2.3269941806793213, "learning_rate": 9.478125441472039e-07, "loss": 0.0085, "step": 231780 }, { "epoch": 2.476521181687056, "grad_norm": 10.453505516052246, "learning_rate": 9.478050707086729e-07, "loss": 0.0308, "step": 231790 }, { "epoch": 2.4766280250013355, "grad_norm": 2.6506714820861816, "learning_rate": 9.477975967645362e-07, "loss": 0.0227, "step": 231800 }, { "epoch": 2.476734868315615, "grad_norm": 1.6856210231781006, "learning_rate": 9.477901223148021e-07, "loss": 0.0055, "step": 231810 }, { "epoch": 2.476841711629895, "grad_norm": 6.635643005371094, "learning_rate": 9.477826473594792e-07, "loss": 0.0129, "step": 231820 }, { "epoch": 2.4769485549441743, "grad_norm": 0.061591316014528275, "learning_rate": 9.477751718985761e-07, "loss": 0.0435, "step": 231830 }, { "epoch": 2.4770553982584538, "grad_norm": 0.04486184939742088, "learning_rate": 9.477676959321009e-07, "loss": 0.0207, "step": 231840 }, { "epoch": 2.4771622415727337, "grad_norm": 1.5244646072387695, "learning_rate": 9.47760219460062e-07, "loss": 0.0151, "step": 231850 }, { "epoch": 2.477269084887013, "grad_norm": 0.008536658249795437, "learning_rate": 9.477527424824682e-07, "loss": 0.0047, "step": 231860 }, { "epoch": 2.477375928201293, "grad_norm": 2.165257692337036, "learning_rate": 9.477452649993276e-07, "loss": 0.0301, "step": 231870 }, { "epoch": 2.4774827715155725, "grad_norm": 6.021294593811035, "learning_rate": 9.47737787010649e-07, "loss": 0.0406, "step": 231880 }, { "epoch": 2.477589614829852, "grad_norm": 0.17970213294029236, "learning_rate": 9.477303085164405e-07, "loss": 0.0174, "step": 231890 }, { "epoch": 2.4776964581441314, "grad_norm": 0.13599930703639984, "learning_rate": 9.477228295167108e-07, "loss": 0.0045, "step": 231900 }, { "epoch": 2.4778033014584113, "grad_norm": 0.00660953251644969, "learning_rate": 9.477153500114682e-07, "loss": 0.0052, "step": 231910 }, { "epoch": 2.477910144772691, "grad_norm": 5.356842517852783, "learning_rate": 9.477078700007212e-07, "loss": 0.0546, "step": 231920 }, { "epoch": 2.4780169880869707, "grad_norm": 0.04898419603705406, "learning_rate": 9.477003894844782e-07, "loss": 0.0187, "step": 231930 }, { "epoch": 2.47812383140125, "grad_norm": 0.05576515570282936, "learning_rate": 9.476929084627476e-07, "loss": 0.0091, "step": 231940 }, { "epoch": 2.4782306747155296, "grad_norm": 8.930834770202637, "learning_rate": 9.47685426935538e-07, "loss": 0.0146, "step": 231950 }, { "epoch": 2.478337518029809, "grad_norm": 4.602077484130859, "learning_rate": 9.476779449028578e-07, "loss": 0.0191, "step": 231960 }, { "epoch": 2.478444361344089, "grad_norm": 3.4016737937927246, "learning_rate": 9.476704623647154e-07, "loss": 0.0831, "step": 231970 }, { "epoch": 2.4785512046583684, "grad_norm": 9.006026268005371, "learning_rate": 9.476629793211191e-07, "loss": 0.0207, "step": 231980 }, { "epoch": 2.4786580479726483, "grad_norm": 0.1562473475933075, "learning_rate": 9.476554957720777e-07, "loss": 0.0112, "step": 231990 }, { "epoch": 2.478764891286928, "grad_norm": 32.08793258666992, "learning_rate": 9.476480117175994e-07, "loss": 0.0518, "step": 232000 }, { "epoch": 2.4788717346012072, "grad_norm": 0.005486227106302977, "learning_rate": 9.476405271576926e-07, "loss": 0.0129, "step": 232010 }, { "epoch": 2.4789785779154867, "grad_norm": 3.5484728813171387, "learning_rate": 9.476330420923659e-07, "loss": 0.0216, "step": 232020 }, { "epoch": 2.4790854212297666, "grad_norm": 1.6875708103179932, "learning_rate": 9.476255565216278e-07, "loss": 0.0073, "step": 232030 }, { "epoch": 2.479192264544046, "grad_norm": 3.194272994995117, "learning_rate": 9.476180704454867e-07, "loss": 0.025, "step": 232040 }, { "epoch": 2.479299107858326, "grad_norm": 0.10156755894422531, "learning_rate": 9.476105838639509e-07, "loss": 0.0018, "step": 232050 }, { "epoch": 2.4794059511726054, "grad_norm": 0.2906360328197479, "learning_rate": 9.476030967770291e-07, "loss": 0.0115, "step": 232060 }, { "epoch": 2.479512794486885, "grad_norm": 0.13946029543876648, "learning_rate": 9.475956091847295e-07, "loss": 0.0137, "step": 232070 }, { "epoch": 2.4796196378011643, "grad_norm": 0.002833141479641199, "learning_rate": 9.475881210870606e-07, "loss": 0.0476, "step": 232080 }, { "epoch": 2.4797264811154442, "grad_norm": 0.014194140210747719, "learning_rate": 9.475806324840312e-07, "loss": 0.01, "step": 232090 }, { "epoch": 2.4798333244297237, "grad_norm": 8.1066255569458, "learning_rate": 9.475731433756493e-07, "loss": 0.036, "step": 232100 }, { "epoch": 2.4799401677440036, "grad_norm": 2.6464216709136963, "learning_rate": 9.475656537619236e-07, "loss": 0.0177, "step": 232110 }, { "epoch": 2.480047011058283, "grad_norm": 6.039304733276367, "learning_rate": 9.475581636428624e-07, "loss": 0.0531, "step": 232120 }, { "epoch": 2.4801538543725625, "grad_norm": 16.436084747314453, "learning_rate": 9.475506730184744e-07, "loss": 0.0349, "step": 232130 }, { "epoch": 2.4802606976868424, "grad_norm": 0.9006975293159485, "learning_rate": 9.475431818887678e-07, "loss": 0.0183, "step": 232140 }, { "epoch": 2.480367541001122, "grad_norm": 1.5565229654312134, "learning_rate": 9.475356902537513e-07, "loss": 0.029, "step": 232150 }, { "epoch": 2.4804743843154013, "grad_norm": 0.43609103560447693, "learning_rate": 9.47528198113433e-07, "loss": 0.0306, "step": 232160 }, { "epoch": 2.4805812276296813, "grad_norm": 0.011255869641900063, "learning_rate": 9.475207054678217e-07, "loss": 0.0097, "step": 232170 }, { "epoch": 2.4806880709439607, "grad_norm": 0.008374499157071114, "learning_rate": 9.475132123169258e-07, "loss": 0.0048, "step": 232180 }, { "epoch": 2.48079491425824, "grad_norm": 0.11576429754495621, "learning_rate": 9.475057186607537e-07, "loss": 0.0053, "step": 232190 }, { "epoch": 2.48090175757252, "grad_norm": 0.5378453135490417, "learning_rate": 9.474982244993138e-07, "loss": 0.0305, "step": 232200 }, { "epoch": 2.4810086008867995, "grad_norm": 0.157740980386734, "learning_rate": 9.474907298326147e-07, "loss": 0.0097, "step": 232210 }, { "epoch": 2.481115444201079, "grad_norm": 0.3408629298210144, "learning_rate": 9.474832346606647e-07, "loss": 0.0393, "step": 232220 }, { "epoch": 2.481222287515359, "grad_norm": 3.164654016494751, "learning_rate": 9.474757389834724e-07, "loss": 0.0088, "step": 232230 }, { "epoch": 2.4813291308296384, "grad_norm": 0.8707985877990723, "learning_rate": 9.474682428010463e-07, "loss": 0.024, "step": 232240 }, { "epoch": 2.481435974143918, "grad_norm": 1.5118072032928467, "learning_rate": 9.474607461133948e-07, "loss": 0.0255, "step": 232250 }, { "epoch": 2.4815428174581977, "grad_norm": 0.13375414907932281, "learning_rate": 9.474532489205262e-07, "loss": 0.0052, "step": 232260 }, { "epoch": 2.481649660772477, "grad_norm": 2.2650270462036133, "learning_rate": 9.474457512224491e-07, "loss": 0.0421, "step": 232270 }, { "epoch": 2.4817565040867566, "grad_norm": 0.028917282819747925, "learning_rate": 9.474382530191721e-07, "loss": 0.0202, "step": 232280 }, { "epoch": 2.4818633474010365, "grad_norm": 3.343965530395508, "learning_rate": 9.474307543107036e-07, "loss": 0.032, "step": 232290 }, { "epoch": 2.481970190715316, "grad_norm": 0.006646553985774517, "learning_rate": 9.474232550970518e-07, "loss": 0.0134, "step": 232300 }, { "epoch": 2.4820770340295955, "grad_norm": 4.1199870109558105, "learning_rate": 9.474157553782256e-07, "loss": 0.0236, "step": 232310 }, { "epoch": 2.4821838773438754, "grad_norm": 0.08386755734682083, "learning_rate": 9.474082551542331e-07, "loss": 0.005, "step": 232320 }, { "epoch": 2.482290720658155, "grad_norm": 2.729567050933838, "learning_rate": 9.474007544250829e-07, "loss": 0.0174, "step": 232330 }, { "epoch": 2.4823975639724343, "grad_norm": 4.425327777862549, "learning_rate": 9.473932531907836e-07, "loss": 0.0173, "step": 232340 }, { "epoch": 2.482504407286714, "grad_norm": 0.06967085599899292, "learning_rate": 9.473857514513435e-07, "loss": 0.0148, "step": 232350 }, { "epoch": 2.4826112506009936, "grad_norm": 5.154468059539795, "learning_rate": 9.47378249206771e-07, "loss": 0.0201, "step": 232360 }, { "epoch": 2.482718093915273, "grad_norm": 0.2904447019100189, "learning_rate": 9.473707464570749e-07, "loss": 0.0079, "step": 232370 }, { "epoch": 2.482824937229553, "grad_norm": 1.4686540365219116, "learning_rate": 9.473632432022632e-07, "loss": 0.0242, "step": 232380 }, { "epoch": 2.4829317805438325, "grad_norm": 3.215782880783081, "learning_rate": 9.473557394423448e-07, "loss": 0.0073, "step": 232390 }, { "epoch": 2.483038623858112, "grad_norm": 7.992362976074219, "learning_rate": 9.47348235177328e-07, "loss": 0.005, "step": 232400 }, { "epoch": 2.483145467172392, "grad_norm": 5.590008735656738, "learning_rate": 9.473407304072213e-07, "loss": 0.0428, "step": 232410 }, { "epoch": 2.4832523104866713, "grad_norm": 2.0813889503479004, "learning_rate": 9.473332251320332e-07, "loss": 0.0033, "step": 232420 }, { "epoch": 2.4833591538009507, "grad_norm": 0.00627253670245409, "learning_rate": 9.47325719351772e-07, "loss": 0.048, "step": 232430 }, { "epoch": 2.4834659971152306, "grad_norm": 2.399111270904541, "learning_rate": 9.473182130664464e-07, "loss": 0.0236, "step": 232440 }, { "epoch": 2.48357284042951, "grad_norm": 0.028316400945186615, "learning_rate": 9.473107062760646e-07, "loss": 0.0511, "step": 232450 }, { "epoch": 2.4836796837437896, "grad_norm": 9.317902565002441, "learning_rate": 9.473031989806355e-07, "loss": 0.0155, "step": 232460 }, { "epoch": 2.4837865270580695, "grad_norm": 0.02867133915424347, "learning_rate": 9.47295691180167e-07, "loss": 0.0197, "step": 232470 }, { "epoch": 2.483893370372349, "grad_norm": 0.058575283735990524, "learning_rate": 9.472881828746682e-07, "loss": 0.0335, "step": 232480 }, { "epoch": 2.4840002136866284, "grad_norm": 0.0077477116137743, "learning_rate": 9.472806740641473e-07, "loss": 0.0145, "step": 232490 }, { "epoch": 2.4841070570009083, "grad_norm": 0.0157153382897377, "learning_rate": 9.472731647486126e-07, "loss": 0.0413, "step": 232500 }, { "epoch": 2.4842139003151877, "grad_norm": 0.2191181629896164, "learning_rate": 9.472656549280728e-07, "loss": 0.0214, "step": 232510 }, { "epoch": 2.484320743629467, "grad_norm": 0.11046349257230759, "learning_rate": 9.472581446025363e-07, "loss": 0.0366, "step": 232520 }, { "epoch": 2.484427586943747, "grad_norm": 0.00449961656704545, "learning_rate": 9.472506337720116e-07, "loss": 0.0646, "step": 232530 }, { "epoch": 2.4845344302580266, "grad_norm": 0.30070313811302185, "learning_rate": 9.472431224365072e-07, "loss": 0.0069, "step": 232540 }, { "epoch": 2.484641273572306, "grad_norm": 0.19180786609649658, "learning_rate": 9.472356105960315e-07, "loss": 0.0415, "step": 232550 }, { "epoch": 2.484748116886586, "grad_norm": 0.011744852177798748, "learning_rate": 9.472280982505931e-07, "loss": 0.043, "step": 232560 }, { "epoch": 2.4848549602008654, "grad_norm": 0.06623683869838715, "learning_rate": 9.472205854002004e-07, "loss": 0.0173, "step": 232570 }, { "epoch": 2.4849618035151453, "grad_norm": 0.00879050325602293, "learning_rate": 9.47213072044862e-07, "loss": 0.0129, "step": 232580 }, { "epoch": 2.4850686468294247, "grad_norm": 2.665163278579712, "learning_rate": 9.472055581845862e-07, "loss": 0.0232, "step": 232590 }, { "epoch": 2.485175490143704, "grad_norm": 9.115557670593262, "learning_rate": 9.471980438193817e-07, "loss": 0.0509, "step": 232600 }, { "epoch": 2.4852823334579837, "grad_norm": 0.0248323455452919, "learning_rate": 9.471905289492567e-07, "loss": 0.013, "step": 232610 }, { "epoch": 2.4853891767722636, "grad_norm": 0.823630690574646, "learning_rate": 9.471830135742198e-07, "loss": 0.0069, "step": 232620 }, { "epoch": 2.485496020086543, "grad_norm": 1.726946234703064, "learning_rate": 9.471754976942796e-07, "loss": 0.0246, "step": 232630 }, { "epoch": 2.485602863400823, "grad_norm": 0.216291144490242, "learning_rate": 9.471679813094447e-07, "loss": 0.009, "step": 232640 }, { "epoch": 2.4857097067151024, "grad_norm": 0.8589335083961487, "learning_rate": 9.471604644197231e-07, "loss": 0.0307, "step": 232650 }, { "epoch": 2.485816550029382, "grad_norm": 1.2624521255493164, "learning_rate": 9.471529470251237e-07, "loss": 0.0043, "step": 232660 }, { "epoch": 2.4859233933436613, "grad_norm": 1.8952851295471191, "learning_rate": 9.471454291256548e-07, "loss": 0.0049, "step": 232670 }, { "epoch": 2.486030236657941, "grad_norm": 2.6286234855651855, "learning_rate": 9.471379107213251e-07, "loss": 0.0258, "step": 232680 }, { "epoch": 2.4861370799722207, "grad_norm": 0.5426098704338074, "learning_rate": 9.471303918121429e-07, "loss": 0.0403, "step": 232690 }, { "epoch": 2.4862439232865006, "grad_norm": 9.982234001159668, "learning_rate": 9.471228723981169e-07, "loss": 0.0223, "step": 232700 }, { "epoch": 2.48635076660078, "grad_norm": 0.7282431125640869, "learning_rate": 9.471153524792553e-07, "loss": 0.0155, "step": 232710 }, { "epoch": 2.4864576099150595, "grad_norm": 0.02025822550058365, "learning_rate": 9.471078320555666e-07, "loss": 0.0595, "step": 232720 }, { "epoch": 2.486564453229339, "grad_norm": 0.5064917206764221, "learning_rate": 9.471003111270595e-07, "loss": 0.0087, "step": 232730 }, { "epoch": 2.486671296543619, "grad_norm": 0.007467489689588547, "learning_rate": 9.470927896937424e-07, "loss": 0.023, "step": 232740 }, { "epoch": 2.4867781398578983, "grad_norm": 0.0057795667089521885, "learning_rate": 9.470852677556238e-07, "loss": 0.007, "step": 232750 }, { "epoch": 2.486884983172178, "grad_norm": 21.167789459228516, "learning_rate": 9.470777453127122e-07, "loss": 0.098, "step": 232760 }, { "epoch": 2.4869918264864577, "grad_norm": 0.12370893359184265, "learning_rate": 9.470702223650161e-07, "loss": 0.0259, "step": 232770 }, { "epoch": 2.487098669800737, "grad_norm": 2.9358441829681396, "learning_rate": 9.47062698912544e-07, "loss": 0.015, "step": 232780 }, { "epoch": 2.4872055131150166, "grad_norm": 1.3189761638641357, "learning_rate": 9.470551749553043e-07, "loss": 0.0494, "step": 232790 }, { "epoch": 2.4873123564292965, "grad_norm": 0.00948302261531353, "learning_rate": 9.470476504933056e-07, "loss": 0.0363, "step": 232800 }, { "epoch": 2.487419199743576, "grad_norm": 0.0566461943089962, "learning_rate": 9.470401255265563e-07, "loss": 0.0059, "step": 232810 }, { "epoch": 2.487526043057856, "grad_norm": 0.15435892343521118, "learning_rate": 9.470326000550651e-07, "loss": 0.0312, "step": 232820 }, { "epoch": 2.4876328863721353, "grad_norm": 3.2142887115478516, "learning_rate": 9.470250740788402e-07, "loss": 0.0204, "step": 232830 }, { "epoch": 2.4877397296864148, "grad_norm": 1.8967071771621704, "learning_rate": 9.470175475978903e-07, "loss": 0.017, "step": 232840 }, { "epoch": 2.4878465730006947, "grad_norm": 0.03517443686723709, "learning_rate": 9.47010020612224e-07, "loss": 0.0126, "step": 232850 }, { "epoch": 2.487953416314974, "grad_norm": 0.10763753205537796, "learning_rate": 9.470024931218494e-07, "loss": 0.0894, "step": 232860 }, { "epoch": 2.4880602596292536, "grad_norm": 3.9071571826934814, "learning_rate": 9.469949651267755e-07, "loss": 0.0261, "step": 232870 }, { "epoch": 2.4881671029435335, "grad_norm": 2.7056992053985596, "learning_rate": 9.469874366270103e-07, "loss": 0.0119, "step": 232880 }, { "epoch": 2.488273946257813, "grad_norm": 6.844607830047607, "learning_rate": 9.469799076225627e-07, "loss": 0.0305, "step": 232890 }, { "epoch": 2.4883807895720924, "grad_norm": 3.2812681198120117, "learning_rate": 9.469723781134411e-07, "loss": 0.0091, "step": 232900 }, { "epoch": 2.4884876328863723, "grad_norm": 0.33633819222450256, "learning_rate": 9.469648480996538e-07, "loss": 0.062, "step": 232910 }, { "epoch": 2.4885944762006518, "grad_norm": 0.016687355935573578, "learning_rate": 9.469573175812095e-07, "loss": 0.0074, "step": 232920 }, { "epoch": 2.4887013195149312, "grad_norm": 0.23459479212760925, "learning_rate": 9.469497865581167e-07, "loss": 0.0029, "step": 232930 }, { "epoch": 2.488808162829211, "grad_norm": 0.07727042585611343, "learning_rate": 9.469422550303839e-07, "loss": 0.0221, "step": 232940 }, { "epoch": 2.4889150061434906, "grad_norm": 4.346688270568848, "learning_rate": 9.469347229980195e-07, "loss": 0.0288, "step": 232950 }, { "epoch": 2.48902184945777, "grad_norm": 3.7763874530792236, "learning_rate": 9.469271904610321e-07, "loss": 0.0121, "step": 232960 }, { "epoch": 2.48912869277205, "grad_norm": 0.16534000635147095, "learning_rate": 9.4691965741943e-07, "loss": 0.0087, "step": 232970 }, { "epoch": 2.4892355360863294, "grad_norm": 0.11773817986249924, "learning_rate": 9.469121238732221e-07, "loss": 0.0159, "step": 232980 }, { "epoch": 2.489342379400609, "grad_norm": 0.007719547487795353, "learning_rate": 9.469045898224166e-07, "loss": 0.0275, "step": 232990 }, { "epoch": 2.4894492227148888, "grad_norm": 2.1680963039398193, "learning_rate": 9.468970552670222e-07, "loss": 0.0263, "step": 233000 }, { "epoch": 2.4895560660291682, "grad_norm": 0.35548150539398193, "learning_rate": 9.46889520207047e-07, "loss": 0.0209, "step": 233010 }, { "epoch": 2.4896629093434477, "grad_norm": 0.06647888571023941, "learning_rate": 9.468819846425e-07, "loss": 0.0078, "step": 233020 }, { "epoch": 2.4897697526577276, "grad_norm": 0.02725832164287567, "learning_rate": 9.468744485733895e-07, "loss": 0.0152, "step": 233030 }, { "epoch": 2.489876595972007, "grad_norm": 0.23996275663375854, "learning_rate": 9.468669119997241e-07, "loss": 0.0139, "step": 233040 }, { "epoch": 2.4899834392862865, "grad_norm": 1.4140292406082153, "learning_rate": 9.468593749215121e-07, "loss": 0.0064, "step": 233050 }, { "epoch": 2.4900902826005664, "grad_norm": 2.6121938228607178, "learning_rate": 9.468518373387621e-07, "loss": 0.0111, "step": 233060 }, { "epoch": 2.490197125914846, "grad_norm": 0.11977053433656693, "learning_rate": 9.468442992514829e-07, "loss": 0.0514, "step": 233070 }, { "epoch": 2.4903039692291253, "grad_norm": 0.37555593252182007, "learning_rate": 9.468367606596825e-07, "loss": 0.0159, "step": 233080 }, { "epoch": 2.4904108125434052, "grad_norm": 0.0021930166985839605, "learning_rate": 9.468292215633698e-07, "loss": 0.0134, "step": 233090 }, { "epoch": 2.4905176558576847, "grad_norm": 0.08413878828287125, "learning_rate": 9.468216819625532e-07, "loss": 0.0205, "step": 233100 }, { "epoch": 2.490624499171964, "grad_norm": 0.3228785991668701, "learning_rate": 9.468141418572411e-07, "loss": 0.0087, "step": 233110 }, { "epoch": 2.490731342486244, "grad_norm": 4.997663974761963, "learning_rate": 9.468066012474421e-07, "loss": 0.0166, "step": 233120 }, { "epoch": 2.4908381858005235, "grad_norm": 1.1632658243179321, "learning_rate": 9.467990601331648e-07, "loss": 0.0158, "step": 233130 }, { "epoch": 2.490945029114803, "grad_norm": 3.949568510055542, "learning_rate": 9.467915185144177e-07, "loss": 0.0169, "step": 233140 }, { "epoch": 2.491051872429083, "grad_norm": 3.2266921997070312, "learning_rate": 9.467839763912091e-07, "loss": 0.0384, "step": 233150 }, { "epoch": 2.4911587157433623, "grad_norm": 0.012298008427023888, "learning_rate": 9.467764337635477e-07, "loss": 0.0166, "step": 233160 }, { "epoch": 2.491265559057642, "grad_norm": 5.442584037780762, "learning_rate": 9.467688906314422e-07, "loss": 0.0099, "step": 233170 }, { "epoch": 2.4913724023719217, "grad_norm": 2.061863660812378, "learning_rate": 9.467613469949007e-07, "loss": 0.0377, "step": 233180 }, { "epoch": 2.491479245686201, "grad_norm": 3.435880184173584, "learning_rate": 9.467538028539319e-07, "loss": 0.0067, "step": 233190 }, { "epoch": 2.4915860890004806, "grad_norm": 0.03882995992898941, "learning_rate": 9.467462582085444e-07, "loss": 0.0109, "step": 233200 }, { "epoch": 2.4916929323147605, "grad_norm": 0.010867184959352016, "learning_rate": 9.467387130587467e-07, "loss": 0.0264, "step": 233210 }, { "epoch": 2.49179977562904, "grad_norm": 1.1963825225830078, "learning_rate": 9.467311674045472e-07, "loss": 0.0371, "step": 233220 }, { "epoch": 2.4919066189433194, "grad_norm": 0.007429894525557756, "learning_rate": 9.467236212459547e-07, "loss": 0.0043, "step": 233230 }, { "epoch": 2.4920134622575993, "grad_norm": 0.16847507655620575, "learning_rate": 9.467160745829771e-07, "loss": 0.0204, "step": 233240 }, { "epoch": 2.492120305571879, "grad_norm": 9.785183906555176, "learning_rate": 9.467085274156237e-07, "loss": 0.0217, "step": 233250 }, { "epoch": 2.4922271488861583, "grad_norm": 0.01770373061299324, "learning_rate": 9.467009797439025e-07, "loss": 0.0145, "step": 233260 }, { "epoch": 2.492333992200438, "grad_norm": 0.8105974793434143, "learning_rate": 9.466934315678224e-07, "loss": 0.0121, "step": 233270 }, { "epoch": 2.4924408355147176, "grad_norm": 14.0453462600708, "learning_rate": 9.466858828873914e-07, "loss": 0.0177, "step": 233280 }, { "epoch": 2.492547678828997, "grad_norm": 0.018993591889739037, "learning_rate": 9.466783337026185e-07, "loss": 0.0076, "step": 233290 }, { "epoch": 2.492654522143277, "grad_norm": 3.2412571907043457, "learning_rate": 9.46670784013512e-07, "loss": 0.0088, "step": 233300 }, { "epoch": 2.4927613654575564, "grad_norm": 1.6191072463989258, "learning_rate": 9.466632338200804e-07, "loss": 0.0244, "step": 233310 }, { "epoch": 2.492868208771836, "grad_norm": 0.03089406155049801, "learning_rate": 9.466556831223325e-07, "loss": 0.0168, "step": 233320 }, { "epoch": 2.492975052086116, "grad_norm": 4.4766716957092285, "learning_rate": 9.466481319202764e-07, "loss": 0.0396, "step": 233330 }, { "epoch": 2.4930818954003953, "grad_norm": 7.979977130889893, "learning_rate": 9.46640580213921e-07, "loss": 0.0255, "step": 233340 }, { "epoch": 2.493188738714675, "grad_norm": 0.24664951860904694, "learning_rate": 9.466330280032747e-07, "loss": 0.0219, "step": 233350 }, { "epoch": 2.4932955820289546, "grad_norm": 1.4793778657913208, "learning_rate": 9.466254752883458e-07, "loss": 0.0207, "step": 233360 }, { "epoch": 2.493402425343234, "grad_norm": 4.156654357910156, "learning_rate": 9.466179220691432e-07, "loss": 0.0285, "step": 233370 }, { "epoch": 2.4935092686575135, "grad_norm": 3.5719354152679443, "learning_rate": 9.466103683456752e-07, "loss": 0.0181, "step": 233380 }, { "epoch": 2.4936161119717934, "grad_norm": 2.939455270767212, "learning_rate": 9.466028141179503e-07, "loss": 0.0227, "step": 233390 }, { "epoch": 2.493722955286073, "grad_norm": 0.005576573312282562, "learning_rate": 9.465952593859772e-07, "loss": 0.0239, "step": 233400 }, { "epoch": 2.493829798600353, "grad_norm": 3.8412766456604004, "learning_rate": 9.465877041497644e-07, "loss": 0.0492, "step": 233410 }, { "epoch": 2.4939366419146323, "grad_norm": 2.382986545562744, "learning_rate": 9.465801484093204e-07, "loss": 0.0043, "step": 233420 }, { "epoch": 2.4940434852289117, "grad_norm": 5.834466457366943, "learning_rate": 9.465725921646535e-07, "loss": 0.0324, "step": 233430 }, { "epoch": 2.494150328543191, "grad_norm": 0.032325390726327896, "learning_rate": 9.465650354157726e-07, "loss": 0.0104, "step": 233440 }, { "epoch": 2.494257171857471, "grad_norm": 1.0583924055099487, "learning_rate": 9.465574781626862e-07, "loss": 0.0107, "step": 233450 }, { "epoch": 2.4943640151717505, "grad_norm": 0.007032512221485376, "learning_rate": 9.465499204054025e-07, "loss": 0.0232, "step": 233460 }, { "epoch": 2.4944708584860305, "grad_norm": 4.984414577484131, "learning_rate": 9.465423621439303e-07, "loss": 0.0264, "step": 233470 }, { "epoch": 2.49457770180031, "grad_norm": 4.630026817321777, "learning_rate": 9.46534803378278e-07, "loss": 0.0228, "step": 233480 }, { "epoch": 2.4946845451145894, "grad_norm": 1.2230758666992188, "learning_rate": 9.465272441084543e-07, "loss": 0.0378, "step": 233490 }, { "epoch": 2.494791388428869, "grad_norm": 2.844574213027954, "learning_rate": 9.465196843344676e-07, "loss": 0.0197, "step": 233500 }, { "epoch": 2.4948982317431487, "grad_norm": 0.01811579056084156, "learning_rate": 9.465121240563265e-07, "loss": 0.0344, "step": 233510 }, { "epoch": 2.495005075057428, "grad_norm": 4.563631534576416, "learning_rate": 9.465045632740394e-07, "loss": 0.046, "step": 233520 }, { "epoch": 2.495111918371708, "grad_norm": 2.8572001457214355, "learning_rate": 9.464970019876151e-07, "loss": 0.0155, "step": 233530 }, { "epoch": 2.4952187616859876, "grad_norm": 0.0608614906668663, "learning_rate": 9.46489440197062e-07, "loss": 0.0048, "step": 233540 }, { "epoch": 2.495325605000267, "grad_norm": 0.014526531100273132, "learning_rate": 9.464818779023884e-07, "loss": 0.0152, "step": 233550 }, { "epoch": 2.4954324483145465, "grad_norm": 0.22703994810581207, "learning_rate": 9.464743151036033e-07, "loss": 0.025, "step": 233560 }, { "epoch": 2.4955392916288264, "grad_norm": 3.516301393508911, "learning_rate": 9.46466751800715e-07, "loss": 0.0264, "step": 233570 }, { "epoch": 2.495646134943106, "grad_norm": 3.754700183868408, "learning_rate": 9.464591879937319e-07, "loss": 0.0299, "step": 233580 }, { "epoch": 2.4957529782573857, "grad_norm": 0.001890483545139432, "learning_rate": 9.464516236826627e-07, "loss": 0.017, "step": 233590 }, { "epoch": 2.495859821571665, "grad_norm": 0.002154765883460641, "learning_rate": 9.46444058867516e-07, "loss": 0.1032, "step": 233600 }, { "epoch": 2.4959666648859447, "grad_norm": 0.0025802289601415396, "learning_rate": 9.464364935483003e-07, "loss": 0.0084, "step": 233610 }, { "epoch": 2.4960735082002246, "grad_norm": 5.942816257476807, "learning_rate": 9.46428927725024e-07, "loss": 0.013, "step": 233620 }, { "epoch": 2.496180351514504, "grad_norm": 0.13126151263713837, "learning_rate": 9.464213613976957e-07, "loss": 0.0087, "step": 233630 }, { "epoch": 2.4962871948287835, "grad_norm": 0.26450687646865845, "learning_rate": 9.464137945663242e-07, "loss": 0.0171, "step": 233640 }, { "epoch": 2.4963940381430634, "grad_norm": 22.739198684692383, "learning_rate": 9.464062272309177e-07, "loss": 0.0175, "step": 233650 }, { "epoch": 2.496500881457343, "grad_norm": 0.05612277239561081, "learning_rate": 9.463986593914848e-07, "loss": 0.0216, "step": 233660 }, { "epoch": 2.4966077247716223, "grad_norm": 0.08234979957342148, "learning_rate": 9.463910910480342e-07, "loss": 0.054, "step": 233670 }, { "epoch": 2.496714568085902, "grad_norm": 3.426654100418091, "learning_rate": 9.463835222005743e-07, "loss": 0.0412, "step": 233680 }, { "epoch": 2.4968214114001817, "grad_norm": 1.0184624195098877, "learning_rate": 9.463759528491138e-07, "loss": 0.0137, "step": 233690 }, { "epoch": 2.496928254714461, "grad_norm": 4.506532192230225, "learning_rate": 9.463683829936612e-07, "loss": 0.0293, "step": 233700 }, { "epoch": 2.497035098028741, "grad_norm": 0.09964542090892792, "learning_rate": 9.463608126342249e-07, "loss": 0.008, "step": 233710 }, { "epoch": 2.4971419413430205, "grad_norm": 0.009088246151804924, "learning_rate": 9.463532417708137e-07, "loss": 0.0146, "step": 233720 }, { "epoch": 2.4972487846573, "grad_norm": 0.024709166958928108, "learning_rate": 9.463456704034358e-07, "loss": 0.0201, "step": 233730 }, { "epoch": 2.49735562797158, "grad_norm": 4.934550762176514, "learning_rate": 9.463380985321001e-07, "loss": 0.0159, "step": 233740 }, { "epoch": 2.4974624712858593, "grad_norm": 8.710344314575195, "learning_rate": 9.46330526156815e-07, "loss": 0.0141, "step": 233750 }, { "epoch": 2.4975693146001388, "grad_norm": 0.04217221587896347, "learning_rate": 9.463229532775889e-07, "loss": 0.0923, "step": 233760 }, { "epoch": 2.4976761579144187, "grad_norm": 1.5565073490142822, "learning_rate": 9.463153798944306e-07, "loss": 0.007, "step": 233770 }, { "epoch": 2.497783001228698, "grad_norm": 0.07265625894069672, "learning_rate": 9.463078060073486e-07, "loss": 0.0138, "step": 233780 }, { "epoch": 2.4978898445429776, "grad_norm": 0.027228351682424545, "learning_rate": 9.463002316163513e-07, "loss": 0.0206, "step": 233790 }, { "epoch": 2.4979966878572575, "grad_norm": 8.169910430908203, "learning_rate": 9.462926567214475e-07, "loss": 0.0475, "step": 233800 }, { "epoch": 2.498103531171537, "grad_norm": 4.706918716430664, "learning_rate": 9.462850813226455e-07, "loss": 0.0124, "step": 233810 }, { "epoch": 2.4982103744858164, "grad_norm": 0.00910640973597765, "learning_rate": 9.46277505419954e-07, "loss": 0.0044, "step": 233820 }, { "epoch": 2.4983172178000963, "grad_norm": 0.004312749020755291, "learning_rate": 9.462699290133816e-07, "loss": 0.011, "step": 233830 }, { "epoch": 2.4984240611143758, "grad_norm": 0.036415304988622665, "learning_rate": 9.462623521029366e-07, "loss": 0.0109, "step": 233840 }, { "epoch": 2.498530904428655, "grad_norm": 5.005606174468994, "learning_rate": 9.462547746886278e-07, "loss": 0.0441, "step": 233850 }, { "epoch": 2.498637747742935, "grad_norm": 0.11815783381462097, "learning_rate": 9.462471967704636e-07, "loss": 0.0291, "step": 233860 }, { "epoch": 2.4987445910572146, "grad_norm": 0.4668998718261719, "learning_rate": 9.462396183484527e-07, "loss": 0.0268, "step": 233870 }, { "epoch": 2.498851434371494, "grad_norm": 0.005776820704340935, "learning_rate": 9.462320394226036e-07, "loss": 0.0209, "step": 233880 }, { "epoch": 2.498958277685774, "grad_norm": 2.934906482696533, "learning_rate": 9.462244599929249e-07, "loss": 0.0259, "step": 233890 }, { "epoch": 2.4990651210000534, "grad_norm": 0.25090405344963074, "learning_rate": 9.46216880059425e-07, "loss": 0.0138, "step": 233900 }, { "epoch": 2.499171964314333, "grad_norm": 0.016857434064149857, "learning_rate": 9.462092996221126e-07, "loss": 0.0338, "step": 233910 }, { "epoch": 2.4992788076286128, "grad_norm": 29.722423553466797, "learning_rate": 9.462017186809963e-07, "loss": 0.0258, "step": 233920 }, { "epoch": 2.4993856509428922, "grad_norm": 4.606966018676758, "learning_rate": 9.461941372360844e-07, "loss": 0.0606, "step": 233930 }, { "epoch": 2.4994924942571717, "grad_norm": 3.307188034057617, "learning_rate": 9.461865552873858e-07, "loss": 0.0226, "step": 233940 }, { "epoch": 2.4995993375714516, "grad_norm": 9.119769096374512, "learning_rate": 9.46178972834909e-07, "loss": 0.03, "step": 233950 }, { "epoch": 2.499706180885731, "grad_norm": 1.9668670892715454, "learning_rate": 9.461713898786623e-07, "loss": 0.0286, "step": 233960 }, { "epoch": 2.4998130242000105, "grad_norm": 0.050506703555583954, "learning_rate": 9.461638064186544e-07, "loss": 0.01, "step": 233970 }, { "epoch": 2.4999198675142904, "grad_norm": 6.519766330718994, "learning_rate": 9.46156222454894e-07, "loss": 0.0212, "step": 233980 }, { "epoch": 2.50002671082857, "grad_norm": 5.1631622314453125, "learning_rate": 9.461486379873896e-07, "loss": 0.0623, "step": 233990 }, { "epoch": 2.5001335541428498, "grad_norm": 4.916151523590088, "learning_rate": 9.461410530161495e-07, "loss": 0.0291, "step": 234000 }, { "epoch": 2.5002403974571292, "grad_norm": 1.418350100517273, "learning_rate": 9.461334675411825e-07, "loss": 0.0077, "step": 234010 }, { "epoch": 2.5003472407714087, "grad_norm": 0.25080588459968567, "learning_rate": 9.461258815624973e-07, "loss": 0.009, "step": 234020 }, { "epoch": 2.500454084085688, "grad_norm": 2.080124855041504, "learning_rate": 9.461182950801023e-07, "loss": 0.0216, "step": 234030 }, { "epoch": 2.500560927399968, "grad_norm": 6.149075984954834, "learning_rate": 9.461107080940061e-07, "loss": 0.043, "step": 234040 }, { "epoch": 2.5006677707142475, "grad_norm": 0.02185787260532379, "learning_rate": 9.46103120604217e-07, "loss": 0.0358, "step": 234050 }, { "epoch": 2.5007746140285274, "grad_norm": 3.065865993499756, "learning_rate": 9.46095532610744e-07, "loss": 0.0193, "step": 234060 }, { "epoch": 2.500881457342807, "grad_norm": 0.06741898506879807, "learning_rate": 9.460879441135955e-07, "loss": 0.0169, "step": 234070 }, { "epoch": 2.5009883006570863, "grad_norm": 0.051629964262247086, "learning_rate": 9.460803551127799e-07, "loss": 0.0408, "step": 234080 }, { "epoch": 2.501095143971366, "grad_norm": 0.09726636111736298, "learning_rate": 9.460727656083061e-07, "loss": 0.0036, "step": 234090 }, { "epoch": 2.5012019872856457, "grad_norm": 0.10187281668186188, "learning_rate": 9.460651756001824e-07, "loss": 0.0169, "step": 234100 }, { "epoch": 2.501308830599925, "grad_norm": 0.5339271426200867, "learning_rate": 9.460575850884173e-07, "loss": 0.0642, "step": 234110 }, { "epoch": 2.501415673914205, "grad_norm": 3.358944892883301, "learning_rate": 9.460499940730198e-07, "loss": 0.0067, "step": 234120 }, { "epoch": 2.5015225172284845, "grad_norm": 0.08234067261219025, "learning_rate": 9.46042402553998e-07, "loss": 0.0216, "step": 234130 }, { "epoch": 2.501629360542764, "grad_norm": 6.835063934326172, "learning_rate": 9.460348105313607e-07, "loss": 0.0078, "step": 234140 }, { "epoch": 2.5017362038570434, "grad_norm": 0.09548915177583694, "learning_rate": 9.460272180051166e-07, "loss": 0.023, "step": 234150 }, { "epoch": 2.5018430471713233, "grad_norm": 0.21851158142089844, "learning_rate": 9.460196249752739e-07, "loss": 0.0198, "step": 234160 }, { "epoch": 2.501949890485603, "grad_norm": 0.3876449465751648, "learning_rate": 9.460120314418413e-07, "loss": 0.0089, "step": 234170 }, { "epoch": 2.5020567337998827, "grad_norm": 0.5128745436668396, "learning_rate": 9.460044374048277e-07, "loss": 0.0315, "step": 234180 }, { "epoch": 2.502163577114162, "grad_norm": 0.08783433586359024, "learning_rate": 9.459968428642413e-07, "loss": 0.0244, "step": 234190 }, { "epoch": 2.5022704204284416, "grad_norm": 1.278627872467041, "learning_rate": 9.459892478200908e-07, "loss": 0.0374, "step": 234200 }, { "epoch": 2.502377263742721, "grad_norm": 3.9445013999938965, "learning_rate": 9.459816522723848e-07, "loss": 0.0377, "step": 234210 }, { "epoch": 2.502484107057001, "grad_norm": 0.6105213165283203, "learning_rate": 9.459740562211318e-07, "loss": 0.0189, "step": 234220 }, { "epoch": 2.5025909503712804, "grad_norm": 0.04504821076989174, "learning_rate": 9.459664596663406e-07, "loss": 0.0375, "step": 234230 }, { "epoch": 2.5026977936855603, "grad_norm": 0.6483319401741028, "learning_rate": 9.459588626080195e-07, "loss": 0.0329, "step": 234240 }, { "epoch": 2.50280463699984, "grad_norm": 0.006832584273070097, "learning_rate": 9.459512650461771e-07, "loss": 0.0301, "step": 234250 }, { "epoch": 2.5029114803141193, "grad_norm": 2.446944236755371, "learning_rate": 9.459436669808221e-07, "loss": 0.0239, "step": 234260 }, { "epoch": 2.5030183236283987, "grad_norm": 0.07680569589138031, "learning_rate": 9.459360684119631e-07, "loss": 0.0146, "step": 234270 }, { "epoch": 2.5031251669426786, "grad_norm": 0.489663302898407, "learning_rate": 9.459284693396086e-07, "loss": 0.0126, "step": 234280 }, { "epoch": 2.503232010256958, "grad_norm": 4.101572036743164, "learning_rate": 9.459208697637673e-07, "loss": 0.0072, "step": 234290 }, { "epoch": 2.503338853571238, "grad_norm": 1.854857325553894, "learning_rate": 9.459132696844476e-07, "loss": 0.0107, "step": 234300 }, { "epoch": 2.5034456968855174, "grad_norm": 0.11137973517179489, "learning_rate": 9.459056691016581e-07, "loss": 0.0139, "step": 234310 }, { "epoch": 2.503552540199797, "grad_norm": 1.4219390153884888, "learning_rate": 9.458980680154075e-07, "loss": 0.022, "step": 234320 }, { "epoch": 2.5036593835140764, "grad_norm": 1.068397879600525, "learning_rate": 9.458904664257043e-07, "loss": 0.0268, "step": 234330 }, { "epoch": 2.5037662268283563, "grad_norm": 11.186566352844238, "learning_rate": 9.458828643325571e-07, "loss": 0.0122, "step": 234340 }, { "epoch": 2.5038730701426357, "grad_norm": 0.017793243750929832, "learning_rate": 9.458752617359746e-07, "loss": 0.0239, "step": 234350 }, { "epoch": 2.5039799134569156, "grad_norm": 0.005529765971004963, "learning_rate": 9.458676586359653e-07, "loss": 0.0063, "step": 234360 }, { "epoch": 2.504086756771195, "grad_norm": 2.1411352157592773, "learning_rate": 9.458600550325377e-07, "loss": 0.0221, "step": 234370 }, { "epoch": 2.5041936000854745, "grad_norm": 0.0063051641918718815, "learning_rate": 9.458524509257004e-07, "loss": 0.0691, "step": 234380 }, { "epoch": 2.504300443399754, "grad_norm": 0.932300865650177, "learning_rate": 9.45844846315462e-07, "loss": 0.0089, "step": 234390 }, { "epoch": 2.504407286714034, "grad_norm": 0.013246694579720497, "learning_rate": 9.458372412018311e-07, "loss": 0.0423, "step": 234400 }, { "epoch": 2.5045141300283134, "grad_norm": 0.6673170924186707, "learning_rate": 9.458296355848166e-07, "loss": 0.0161, "step": 234410 }, { "epoch": 2.5046209733425933, "grad_norm": 0.593355119228363, "learning_rate": 9.458220294644265e-07, "loss": 0.0178, "step": 234420 }, { "epoch": 2.5047278166568727, "grad_norm": 1.6788586378097534, "learning_rate": 9.458144228406697e-07, "loss": 0.0102, "step": 234430 }, { "epoch": 2.504834659971152, "grad_norm": 1.175784707069397, "learning_rate": 9.458068157135548e-07, "loss": 0.0084, "step": 234440 }, { "epoch": 2.504941503285432, "grad_norm": 0.02054646797478199, "learning_rate": 9.457992080830904e-07, "loss": 0.0369, "step": 234450 }, { "epoch": 2.5050483465997115, "grad_norm": 0.673698365688324, "learning_rate": 9.45791599949285e-07, "loss": 0.0185, "step": 234460 }, { "epoch": 2.505155189913991, "grad_norm": 2.2670934200286865, "learning_rate": 9.457839913121473e-07, "loss": 0.0267, "step": 234470 }, { "epoch": 2.505262033228271, "grad_norm": 8.826643943786621, "learning_rate": 9.457763821716857e-07, "loss": 0.0302, "step": 234480 }, { "epoch": 2.5053688765425504, "grad_norm": 3.862931966781616, "learning_rate": 9.457687725279091e-07, "loss": 0.0172, "step": 234490 }, { "epoch": 2.50547571985683, "grad_norm": 0.9349189400672913, "learning_rate": 9.45761162380826e-07, "loss": 0.0066, "step": 234500 }, { "epoch": 2.5055825631711097, "grad_norm": 0.1260734498500824, "learning_rate": 9.457535517304447e-07, "loss": 0.0287, "step": 234510 }, { "epoch": 2.505689406485389, "grad_norm": 0.03673408553004265, "learning_rate": 9.45745940576774e-07, "loss": 0.0151, "step": 234520 }, { "epoch": 2.5057962497996686, "grad_norm": 2.5838236808776855, "learning_rate": 9.457383289198224e-07, "loss": 0.0207, "step": 234530 }, { "epoch": 2.5059030931139485, "grad_norm": 0.014231217093765736, "learning_rate": 9.457307167595986e-07, "loss": 0.0334, "step": 234540 }, { "epoch": 2.506009936428228, "grad_norm": 1.8608112335205078, "learning_rate": 9.457231040961114e-07, "loss": 0.0227, "step": 234550 }, { "epoch": 2.5061167797425075, "grad_norm": 0.006358959712088108, "learning_rate": 9.457154909293692e-07, "loss": 0.0214, "step": 234560 }, { "epoch": 2.5062236230567874, "grad_norm": 4.0525360107421875, "learning_rate": 9.457078772593804e-07, "loss": 0.0245, "step": 234570 }, { "epoch": 2.506330466371067, "grad_norm": 0.17923112213611603, "learning_rate": 9.457002630861538e-07, "loss": 0.032, "step": 234580 }, { "epoch": 2.5064373096853463, "grad_norm": 11.15268611907959, "learning_rate": 9.456926484096979e-07, "loss": 0.0293, "step": 234590 }, { "epoch": 2.506544152999626, "grad_norm": 3.189229726791382, "learning_rate": 9.456850332300215e-07, "loss": 0.0086, "step": 234600 }, { "epoch": 2.5066509963139056, "grad_norm": 0.006638993509113789, "learning_rate": 9.456774175471329e-07, "loss": 0.0348, "step": 234610 }, { "epoch": 2.506757839628185, "grad_norm": 1.3045756816864014, "learning_rate": 9.456698013610411e-07, "loss": 0.0178, "step": 234620 }, { "epoch": 2.506864682942465, "grad_norm": 10.165098190307617, "learning_rate": 9.456621846717542e-07, "loss": 0.0474, "step": 234630 }, { "epoch": 2.5069715262567445, "grad_norm": 2.1818597316741943, "learning_rate": 9.456545674792811e-07, "loss": 0.051, "step": 234640 }, { "epoch": 2.5070783695710244, "grad_norm": 12.512842178344727, "learning_rate": 9.456469497836306e-07, "loss": 0.0351, "step": 234650 }, { "epoch": 2.507185212885304, "grad_norm": 0.05265781655907631, "learning_rate": 9.456393315848108e-07, "loss": 0.0119, "step": 234660 }, { "epoch": 2.5072920561995833, "grad_norm": 0.689631998538971, "learning_rate": 9.456317128828306e-07, "loss": 0.0217, "step": 234670 }, { "epoch": 2.5073988995138627, "grad_norm": 0.1897204965353012, "learning_rate": 9.456240936776987e-07, "loss": 0.0034, "step": 234680 }, { "epoch": 2.5075057428281426, "grad_norm": 4.260437488555908, "learning_rate": 9.456164739694234e-07, "loss": 0.0118, "step": 234690 }, { "epoch": 2.507612586142422, "grad_norm": 0.004078199155628681, "learning_rate": 9.456088537580136e-07, "loss": 0.009, "step": 234700 }, { "epoch": 2.507719429456702, "grad_norm": 4.404082298278809, "learning_rate": 9.456012330434777e-07, "loss": 0.0417, "step": 234710 }, { "epoch": 2.5078262727709815, "grad_norm": 0.555768609046936, "learning_rate": 9.455936118258243e-07, "loss": 0.0074, "step": 234720 }, { "epoch": 2.507933116085261, "grad_norm": 0.06036396697163582, "learning_rate": 9.455859901050623e-07, "loss": 0.018, "step": 234730 }, { "epoch": 2.5080399593995404, "grad_norm": 0.21444427967071533, "learning_rate": 9.455783678811999e-07, "loss": 0.0177, "step": 234740 }, { "epoch": 2.5081468027138203, "grad_norm": 0.5272598266601562, "learning_rate": 9.455707451542459e-07, "loss": 0.0431, "step": 234750 }, { "epoch": 2.5082536460280997, "grad_norm": 0.037509735673666, "learning_rate": 9.45563121924209e-07, "loss": 0.0023, "step": 234760 }, { "epoch": 2.5083604893423797, "grad_norm": 0.01488131657242775, "learning_rate": 9.455554981910977e-07, "loss": 0.0574, "step": 234770 }, { "epoch": 2.508467332656659, "grad_norm": 0.005132158752530813, "learning_rate": 9.455478739549205e-07, "loss": 0.0153, "step": 234780 }, { "epoch": 2.5085741759709386, "grad_norm": 1.435056209564209, "learning_rate": 9.455402492156863e-07, "loss": 0.0198, "step": 234790 }, { "epoch": 2.508681019285218, "grad_norm": 0.007038650568574667, "learning_rate": 9.455326239734033e-07, "loss": 0.0098, "step": 234800 }, { "epoch": 2.508787862599498, "grad_norm": 1.4195895195007324, "learning_rate": 9.455249982280806e-07, "loss": 0.0121, "step": 234810 }, { "epoch": 2.5088947059137774, "grad_norm": 7.127680778503418, "learning_rate": 9.455173719797264e-07, "loss": 0.0194, "step": 234820 }, { "epoch": 2.5090015492280573, "grad_norm": 1.892098307609558, "learning_rate": 9.455097452283495e-07, "loss": 0.014, "step": 234830 }, { "epoch": 2.5091083925423368, "grad_norm": 0.45701831579208374, "learning_rate": 9.455021179739584e-07, "loss": 0.0072, "step": 234840 }, { "epoch": 2.509215235856616, "grad_norm": 1.6909236907958984, "learning_rate": 9.454944902165618e-07, "loss": 0.0196, "step": 234850 }, { "epoch": 2.5093220791708957, "grad_norm": 0.14348158240318298, "learning_rate": 9.454868619561685e-07, "loss": 0.0057, "step": 234860 }, { "epoch": 2.5094289224851756, "grad_norm": 9.016190528869629, "learning_rate": 9.454792331927866e-07, "loss": 0.0375, "step": 234870 }, { "epoch": 2.509535765799455, "grad_norm": 0.5693758726119995, "learning_rate": 9.454716039264252e-07, "loss": 0.0101, "step": 234880 }, { "epoch": 2.509642609113735, "grad_norm": 1.8513538837432861, "learning_rate": 9.454639741570927e-07, "loss": 0.0333, "step": 234890 }, { "epoch": 2.5097494524280144, "grad_norm": 0.0014490849571302533, "learning_rate": 9.454563438847978e-07, "loss": 0.0028, "step": 234900 }, { "epoch": 2.509856295742294, "grad_norm": 0.2516493499279022, "learning_rate": 9.45448713109549e-07, "loss": 0.0026, "step": 234910 }, { "epoch": 2.5099631390565733, "grad_norm": 0.5357956886291504, "learning_rate": 9.45441081831355e-07, "loss": 0.0439, "step": 234920 }, { "epoch": 2.510069982370853, "grad_norm": 2.793962240219116, "learning_rate": 9.454334500502246e-07, "loss": 0.0232, "step": 234930 }, { "epoch": 2.5101768256851327, "grad_norm": 0.011043095029890537, "learning_rate": 9.454258177661659e-07, "loss": 0.0134, "step": 234940 }, { "epoch": 2.5102836689994126, "grad_norm": 0.22239916026592255, "learning_rate": 9.454181849791881e-07, "loss": 0.0165, "step": 234950 }, { "epoch": 2.510390512313692, "grad_norm": 10.375266075134277, "learning_rate": 9.454105516892992e-07, "loss": 0.041, "step": 234960 }, { "epoch": 2.5104973556279715, "grad_norm": 0.9275983572006226, "learning_rate": 9.454029178965085e-07, "loss": 0.0308, "step": 234970 }, { "epoch": 2.510604198942251, "grad_norm": 3.130357265472412, "learning_rate": 9.453952836008242e-07, "loss": 0.0328, "step": 234980 }, { "epoch": 2.510711042256531, "grad_norm": 0.1697731763124466, "learning_rate": 9.453876488022549e-07, "loss": 0.0075, "step": 234990 }, { "epoch": 2.5108178855708103, "grad_norm": 11.077468872070312, "learning_rate": 9.453800135008094e-07, "loss": 0.0916, "step": 235000 }, { "epoch": 2.51092472888509, "grad_norm": 0.09578295052051544, "learning_rate": 9.453723776964964e-07, "loss": 0.0337, "step": 235010 }, { "epoch": 2.5110315721993697, "grad_norm": 3.358672857284546, "learning_rate": 9.453647413893242e-07, "loss": 0.0257, "step": 235020 }, { "epoch": 2.511138415513649, "grad_norm": 4.151402473449707, "learning_rate": 9.453571045793016e-07, "loss": 0.0108, "step": 235030 }, { "epoch": 2.5112452588279286, "grad_norm": 0.09583741426467896, "learning_rate": 9.453494672664372e-07, "loss": 0.0504, "step": 235040 }, { "epoch": 2.5113521021422085, "grad_norm": 1.213057041168213, "learning_rate": 9.453418294507398e-07, "loss": 0.0101, "step": 235050 }, { "epoch": 2.511458945456488, "grad_norm": 0.012905220501124859, "learning_rate": 9.453341911322177e-07, "loss": 0.0085, "step": 235060 }, { "epoch": 2.511565788770768, "grad_norm": 3.8732059001922607, "learning_rate": 9.453265523108798e-07, "loss": 0.0182, "step": 235070 }, { "epoch": 2.5116726320850473, "grad_norm": 0.6243835687637329, "learning_rate": 9.453189129867345e-07, "loss": 0.0076, "step": 235080 }, { "epoch": 2.5117794753993268, "grad_norm": 0.32230308651924133, "learning_rate": 9.453112731597906e-07, "loss": 0.007, "step": 235090 }, { "epoch": 2.5118863187136062, "grad_norm": 0.47781452536582947, "learning_rate": 9.453036328300567e-07, "loss": 0.0726, "step": 235100 }, { "epoch": 2.511993162027886, "grad_norm": 0.017917349934577942, "learning_rate": 9.452959919975414e-07, "loss": 0.0219, "step": 235110 }, { "epoch": 2.5121000053421656, "grad_norm": 0.06387851387262344, "learning_rate": 9.452883506622531e-07, "loss": 0.008, "step": 235120 }, { "epoch": 2.5122068486564455, "grad_norm": 0.1132003515958786, "learning_rate": 9.452807088242009e-07, "loss": 0.0364, "step": 235130 }, { "epoch": 2.512313691970725, "grad_norm": 2.208660840988159, "learning_rate": 9.45273066483393e-07, "loss": 0.0085, "step": 235140 }, { "epoch": 2.5124205352850044, "grad_norm": 0.04034672677516937, "learning_rate": 9.452654236398383e-07, "loss": 0.0154, "step": 235150 }, { "epoch": 2.512527378599284, "grad_norm": 3.5624005794525146, "learning_rate": 9.452577802935453e-07, "loss": 0.0359, "step": 235160 }, { "epoch": 2.512634221913564, "grad_norm": 7.704615116119385, "learning_rate": 9.452501364445227e-07, "loss": 0.0277, "step": 235170 }, { "epoch": 2.5127410652278432, "grad_norm": 0.5174757838249207, "learning_rate": 9.452424920927791e-07, "loss": 0.0082, "step": 235180 }, { "epoch": 2.512847908542123, "grad_norm": 0.02247021533548832, "learning_rate": 9.452348472383232e-07, "loss": 0.0201, "step": 235190 }, { "epoch": 2.5129547518564026, "grad_norm": 0.12252944707870483, "learning_rate": 9.452272018811634e-07, "loss": 0.0092, "step": 235200 }, { "epoch": 2.513061595170682, "grad_norm": 6.299504280090332, "learning_rate": 9.452195560213085e-07, "loss": 0.022, "step": 235210 }, { "epoch": 2.513168438484962, "grad_norm": 6.296100616455078, "learning_rate": 9.45211909658767e-07, "loss": 0.0181, "step": 235220 }, { "epoch": 2.5132752817992414, "grad_norm": 0.1219937652349472, "learning_rate": 9.45204262793548e-07, "loss": 0.0506, "step": 235230 }, { "epoch": 2.513382125113521, "grad_norm": 0.632858395576477, "learning_rate": 9.451966154256594e-07, "loss": 0.1017, "step": 235240 }, { "epoch": 2.513488968427801, "grad_norm": 1.6701411008834839, "learning_rate": 9.451889675551104e-07, "loss": 0.0305, "step": 235250 }, { "epoch": 2.5135958117420802, "grad_norm": 9.043282508850098, "learning_rate": 9.451813191819096e-07, "loss": 0.063, "step": 235260 }, { "epoch": 2.5137026550563597, "grad_norm": 3.266406297683716, "learning_rate": 9.451736703060654e-07, "loss": 0.0237, "step": 235270 }, { "epoch": 2.5138094983706396, "grad_norm": 0.0057306839153170586, "learning_rate": 9.451660209275864e-07, "loss": 0.047, "step": 235280 }, { "epoch": 2.513916341684919, "grad_norm": 0.3654005229473114, "learning_rate": 9.451583710464814e-07, "loss": 0.0238, "step": 235290 }, { "epoch": 2.5140231849991985, "grad_norm": 0.08683683723211288, "learning_rate": 9.451507206627592e-07, "loss": 0.0051, "step": 235300 }, { "epoch": 2.5141300283134784, "grad_norm": 4.694196701049805, "learning_rate": 9.45143069776428e-07, "loss": 0.0162, "step": 235310 }, { "epoch": 2.514236871627758, "grad_norm": 0.1604374349117279, "learning_rate": 9.451354183874966e-07, "loss": 0.0379, "step": 235320 }, { "epoch": 2.5143437149420373, "grad_norm": 0.8008809089660645, "learning_rate": 9.45127766495974e-07, "loss": 0.0208, "step": 235330 }, { "epoch": 2.5144505582563172, "grad_norm": 0.013912019319832325, "learning_rate": 9.451201141018684e-07, "loss": 0.0088, "step": 235340 }, { "epoch": 2.5145574015705967, "grad_norm": 0.25465962290763855, "learning_rate": 9.451124612051886e-07, "loss": 0.0092, "step": 235350 }, { "epoch": 2.514664244884876, "grad_norm": 0.20376937091350555, "learning_rate": 9.451048078059434e-07, "loss": 0.0225, "step": 235360 }, { "epoch": 2.514771088199156, "grad_norm": 16.80590057373047, "learning_rate": 9.45097153904141e-07, "loss": 0.0272, "step": 235370 }, { "epoch": 2.5148779315134355, "grad_norm": 1.2103171348571777, "learning_rate": 9.450894994997905e-07, "loss": 0.0178, "step": 235380 }, { "epoch": 2.514984774827715, "grad_norm": 0.6155234575271606, "learning_rate": 9.450818445929003e-07, "loss": 0.0189, "step": 235390 }, { "epoch": 2.515091618141995, "grad_norm": 3.9861369132995605, "learning_rate": 9.450741891834792e-07, "loss": 0.0092, "step": 235400 }, { "epoch": 2.5151984614562743, "grad_norm": 2.0978751182556152, "learning_rate": 9.450665332715358e-07, "loss": 0.0503, "step": 235410 }, { "epoch": 2.5153053047705543, "grad_norm": 0.028300968930125237, "learning_rate": 9.450588768570784e-07, "loss": 0.0076, "step": 235420 }, { "epoch": 2.5154121480848337, "grad_norm": 6.352682590484619, "learning_rate": 9.450512199401162e-07, "loss": 0.0183, "step": 235430 }, { "epoch": 2.515518991399113, "grad_norm": 0.61345374584198, "learning_rate": 9.450435625206574e-07, "loss": 0.0129, "step": 235440 }, { "epoch": 2.5156258347133926, "grad_norm": 0.024744046851992607, "learning_rate": 9.450359045987111e-07, "loss": 0.0011, "step": 235450 }, { "epoch": 2.5157326780276725, "grad_norm": 0.03139181062579155, "learning_rate": 9.450282461742853e-07, "loss": 0.0325, "step": 235460 }, { "epoch": 2.515839521341952, "grad_norm": 0.04515877738595009, "learning_rate": 9.450205872473893e-07, "loss": 0.0039, "step": 235470 }, { "epoch": 2.515946364656232, "grad_norm": 2.980269193649292, "learning_rate": 9.450129278180314e-07, "loss": 0.0138, "step": 235480 }, { "epoch": 2.5160532079705114, "grad_norm": 0.02510470524430275, "learning_rate": 9.450052678862204e-07, "loss": 0.0027, "step": 235490 }, { "epoch": 2.516160051284791, "grad_norm": 2.6356325149536133, "learning_rate": 9.449976074519648e-07, "loss": 0.0269, "step": 235500 }, { "epoch": 2.5162668945990703, "grad_norm": 0.016468677669763565, "learning_rate": 9.449899465152733e-07, "loss": 0.0138, "step": 235510 }, { "epoch": 2.51637373791335, "grad_norm": 0.08721014112234116, "learning_rate": 9.449822850761544e-07, "loss": 0.0323, "step": 235520 }, { "epoch": 2.5164805812276296, "grad_norm": 1.44024658203125, "learning_rate": 9.449746231346172e-07, "loss": 0.0361, "step": 235530 }, { "epoch": 2.5165874245419095, "grad_norm": 0.45625996589660645, "learning_rate": 9.449669606906699e-07, "loss": 0.0073, "step": 235540 }, { "epoch": 2.516694267856189, "grad_norm": 0.06665897369384766, "learning_rate": 9.449592977443214e-07, "loss": 0.0041, "step": 235550 }, { "epoch": 2.5168011111704685, "grad_norm": 3.557105302810669, "learning_rate": 9.449516342955801e-07, "loss": 0.0142, "step": 235560 }, { "epoch": 2.516907954484748, "grad_norm": 1.2334753274917603, "learning_rate": 9.44943970344455e-07, "loss": 0.0156, "step": 235570 }, { "epoch": 2.517014797799028, "grad_norm": 5.409976959228516, "learning_rate": 9.449363058909545e-07, "loss": 0.0392, "step": 235580 }, { "epoch": 2.5171216411133073, "grad_norm": 0.03248439356684685, "learning_rate": 9.449286409350874e-07, "loss": 0.0244, "step": 235590 }, { "epoch": 2.517228484427587, "grad_norm": 4.27787446975708, "learning_rate": 9.449209754768623e-07, "loss": 0.009, "step": 235600 }, { "epoch": 2.5173353277418666, "grad_norm": 0.1619221419095993, "learning_rate": 9.449133095162878e-07, "loss": 0.0119, "step": 235610 }, { "epoch": 2.517442171056146, "grad_norm": 0.29186496138572693, "learning_rate": 9.449056430533726e-07, "loss": 0.0186, "step": 235620 }, { "epoch": 2.5175490143704256, "grad_norm": 3.9269566535949707, "learning_rate": 9.448979760881253e-07, "loss": 0.0103, "step": 235630 }, { "epoch": 2.5176558576847055, "grad_norm": 1.397598147392273, "learning_rate": 9.448903086205546e-07, "loss": 0.011, "step": 235640 }, { "epoch": 2.517762700998985, "grad_norm": 0.004178424831479788, "learning_rate": 9.448826406506692e-07, "loss": 0.0177, "step": 235650 }, { "epoch": 2.517869544313265, "grad_norm": 0.15742966532707214, "learning_rate": 9.448749721784777e-07, "loss": 0.0243, "step": 235660 }, { "epoch": 2.5179763876275443, "grad_norm": 9.39297103881836, "learning_rate": 9.448673032039889e-07, "loss": 0.0213, "step": 235670 }, { "epoch": 2.5180832309418237, "grad_norm": 0.002895353129133582, "learning_rate": 9.448596337272112e-07, "loss": 0.0157, "step": 235680 }, { "epoch": 2.518190074256103, "grad_norm": 6.095892906188965, "learning_rate": 9.448519637481534e-07, "loss": 0.0205, "step": 235690 }, { "epoch": 2.518296917570383, "grad_norm": 0.08612102270126343, "learning_rate": 9.448442932668243e-07, "loss": 0.0191, "step": 235700 }, { "epoch": 2.5184037608846626, "grad_norm": 0.006425095722079277, "learning_rate": 9.448366222832323e-07, "loss": 0.0057, "step": 235710 }, { "epoch": 2.5185106041989425, "grad_norm": 0.046331942081451416, "learning_rate": 9.448289507973861e-07, "loss": 0.0182, "step": 235720 }, { "epoch": 2.518617447513222, "grad_norm": 1.584289789199829, "learning_rate": 9.448212788092946e-07, "loss": 0.0321, "step": 235730 }, { "epoch": 2.5187242908275014, "grad_norm": 0.432670921087265, "learning_rate": 9.448136063189662e-07, "loss": 0.009, "step": 235740 }, { "epoch": 2.518831134141781, "grad_norm": 0.16674557328224182, "learning_rate": 9.448059333264098e-07, "loss": 0.0572, "step": 235750 }, { "epoch": 2.5189379774560607, "grad_norm": 6.81001615524292, "learning_rate": 9.447982598316338e-07, "loss": 0.0112, "step": 235760 }, { "epoch": 2.51904482077034, "grad_norm": 0.025942422449588776, "learning_rate": 9.44790585834647e-07, "loss": 0.0058, "step": 235770 }, { "epoch": 2.51915166408462, "grad_norm": 2.2424445152282715, "learning_rate": 9.447829113354581e-07, "loss": 0.0162, "step": 235780 }, { "epoch": 2.5192585073988996, "grad_norm": 14.57855224609375, "learning_rate": 9.447752363340757e-07, "loss": 0.0354, "step": 235790 }, { "epoch": 2.519365350713179, "grad_norm": 0.025064080953598022, "learning_rate": 9.447675608305086e-07, "loss": 0.0302, "step": 235800 }, { "epoch": 2.5194721940274585, "grad_norm": 11.298145294189453, "learning_rate": 9.447598848247651e-07, "loss": 0.0359, "step": 235810 }, { "epoch": 2.5195790373417384, "grad_norm": 6.331684112548828, "learning_rate": 9.447522083168544e-07, "loss": 0.0224, "step": 235820 }, { "epoch": 2.519685880656018, "grad_norm": 7.791663646697998, "learning_rate": 9.447445313067847e-07, "loss": 0.0234, "step": 235830 }, { "epoch": 2.5197927239702977, "grad_norm": 1.4027783870697021, "learning_rate": 9.447368537945651e-07, "loss": 0.0464, "step": 235840 }, { "epoch": 2.519899567284577, "grad_norm": 0.8609793782234192, "learning_rate": 9.447291757802037e-07, "loss": 0.0387, "step": 235850 }, { "epoch": 2.5200064105988567, "grad_norm": 0.5652676820755005, "learning_rate": 9.447214972637096e-07, "loss": 0.0343, "step": 235860 }, { "epoch": 2.520113253913136, "grad_norm": 0.014746545813977718, "learning_rate": 9.447138182450916e-07, "loss": 0.024, "step": 235870 }, { "epoch": 2.520220097227416, "grad_norm": 0.005240409169346094, "learning_rate": 9.447061387243578e-07, "loss": 0.0132, "step": 235880 }, { "epoch": 2.5203269405416955, "grad_norm": 12.61351203918457, "learning_rate": 9.446984587015174e-07, "loss": 0.0368, "step": 235890 }, { "epoch": 2.5204337838559754, "grad_norm": 0.15417592227458954, "learning_rate": 9.446907781765789e-07, "loss": 0.0078, "step": 235900 }, { "epoch": 2.520540627170255, "grad_norm": 5.489170551300049, "learning_rate": 9.446830971495508e-07, "loss": 0.0135, "step": 235910 }, { "epoch": 2.5206474704845343, "grad_norm": 3.8211896419525146, "learning_rate": 9.44675415620442e-07, "loss": 0.0168, "step": 235920 }, { "epoch": 2.520754313798814, "grad_norm": 6.364001750946045, "learning_rate": 9.446677335892611e-07, "loss": 0.0189, "step": 235930 }, { "epoch": 2.5208611571130937, "grad_norm": 0.0037913003470748663, "learning_rate": 9.446600510560168e-07, "loss": 0.0138, "step": 235940 }, { "epoch": 2.520968000427373, "grad_norm": 4.9516921043396, "learning_rate": 9.446523680207177e-07, "loss": 0.0232, "step": 235950 }, { "epoch": 2.521074843741653, "grad_norm": 0.9245138764381409, "learning_rate": 9.446446844833725e-07, "loss": 0.0255, "step": 235960 }, { "epoch": 2.5211816870559325, "grad_norm": 0.2213858664035797, "learning_rate": 9.4463700044399e-07, "loss": 0.0237, "step": 235970 }, { "epoch": 2.521288530370212, "grad_norm": 0.2850162088871002, "learning_rate": 9.446293159025787e-07, "loss": 0.0606, "step": 235980 }, { "epoch": 2.521395373684492, "grad_norm": 4.5513916015625, "learning_rate": 9.446216308591472e-07, "loss": 0.0127, "step": 235990 }, { "epoch": 2.5215022169987713, "grad_norm": 0.08598380535840988, "learning_rate": 9.446139453137045e-07, "loss": 0.0102, "step": 236000 }, { "epoch": 2.5216090603130508, "grad_norm": 1.0035704374313354, "learning_rate": 9.446062592662592e-07, "loss": 0.0088, "step": 236010 }, { "epoch": 2.5217159036273307, "grad_norm": 0.2080966979265213, "learning_rate": 9.445985727168197e-07, "loss": 0.0152, "step": 236020 }, { "epoch": 2.52182274694161, "grad_norm": 6.944969654083252, "learning_rate": 9.445908856653948e-07, "loss": 0.0165, "step": 236030 }, { "epoch": 2.5219295902558896, "grad_norm": 1.8465008735656738, "learning_rate": 9.445831981119934e-07, "loss": 0.0171, "step": 236040 }, { "epoch": 2.5220364335701695, "grad_norm": 1.8960391283035278, "learning_rate": 9.44575510056624e-07, "loss": 0.0287, "step": 236050 }, { "epoch": 2.522143276884449, "grad_norm": 4.16273832321167, "learning_rate": 9.445678214992952e-07, "loss": 0.0277, "step": 236060 }, { "epoch": 2.5222501201987284, "grad_norm": 3.8713507652282715, "learning_rate": 9.445601324400158e-07, "loss": 0.0172, "step": 236070 }, { "epoch": 2.5223569635130083, "grad_norm": 0.7100048065185547, "learning_rate": 9.445524428787943e-07, "loss": 0.0836, "step": 236080 }, { "epoch": 2.5224638068272878, "grad_norm": 0.6187082529067993, "learning_rate": 9.445447528156398e-07, "loss": 0.0186, "step": 236090 }, { "epoch": 2.5225706501415672, "grad_norm": 0.008848600089550018, "learning_rate": 9.445370622505606e-07, "loss": 0.0126, "step": 236100 }, { "epoch": 2.522677493455847, "grad_norm": 1.204738974571228, "learning_rate": 9.445293711835655e-07, "loss": 0.0221, "step": 236110 }, { "epoch": 2.5227843367701266, "grad_norm": 0.006903516128659248, "learning_rate": 9.445216796146633e-07, "loss": 0.0229, "step": 236120 }, { "epoch": 2.5228911800844065, "grad_norm": 7.997220039367676, "learning_rate": 9.445139875438624e-07, "loss": 0.0203, "step": 236130 }, { "epoch": 2.522998023398686, "grad_norm": 0.011043567210435867, "learning_rate": 9.445062949711719e-07, "loss": 0.0078, "step": 236140 }, { "epoch": 2.5231048667129654, "grad_norm": 3.280261754989624, "learning_rate": 9.444986018966002e-07, "loss": 0.0048, "step": 236150 }, { "epoch": 2.523211710027245, "grad_norm": 0.054612137377262115, "learning_rate": 9.444909083201558e-07, "loss": 0.0361, "step": 236160 }, { "epoch": 2.5233185533415248, "grad_norm": 2.48575496673584, "learning_rate": 9.444832142418477e-07, "loss": 0.0286, "step": 236170 }, { "epoch": 2.5234253966558042, "grad_norm": 8.469164848327637, "learning_rate": 9.444755196616847e-07, "loss": 0.028, "step": 236180 }, { "epoch": 2.523532239970084, "grad_norm": 0.7944983243942261, "learning_rate": 9.444678245796751e-07, "loss": 0.0276, "step": 236190 }, { "epoch": 2.5236390832843636, "grad_norm": 0.22628319263458252, "learning_rate": 9.444601289958279e-07, "loss": 0.0048, "step": 236200 }, { "epoch": 2.523745926598643, "grad_norm": 0.35706183314323425, "learning_rate": 9.444524329101516e-07, "loss": 0.0336, "step": 236210 }, { "epoch": 2.5238527699129225, "grad_norm": 0.042984697967767715, "learning_rate": 9.444447363226548e-07, "loss": 0.0106, "step": 236220 }, { "epoch": 2.5239596132272024, "grad_norm": 0.3694119453430176, "learning_rate": 9.444370392333465e-07, "loss": 0.0112, "step": 236230 }, { "epoch": 2.524066456541482, "grad_norm": 0.057429030537605286, "learning_rate": 9.444293416422353e-07, "loss": 0.0188, "step": 236240 }, { "epoch": 2.5241732998557618, "grad_norm": 2.1985087394714355, "learning_rate": 9.444216435493297e-07, "loss": 0.0192, "step": 236250 }, { "epoch": 2.5242801431700412, "grad_norm": 6.055042743682861, "learning_rate": 9.444139449546386e-07, "loss": 0.0615, "step": 236260 }, { "epoch": 2.5243869864843207, "grad_norm": 0.015649935230612755, "learning_rate": 9.444062458581707e-07, "loss": 0.0182, "step": 236270 }, { "epoch": 2.5244938297986, "grad_norm": 0.043346814811229706, "learning_rate": 9.443985462599345e-07, "loss": 0.0248, "step": 236280 }, { "epoch": 2.52460067311288, "grad_norm": 0.33278077840805054, "learning_rate": 9.443908461599388e-07, "loss": 0.0138, "step": 236290 }, { "epoch": 2.5247075164271595, "grad_norm": 1.3740761280059814, "learning_rate": 9.443831455581923e-07, "loss": 0.0107, "step": 236300 }, { "epoch": 2.5248143597414394, "grad_norm": 0.01528778113424778, "learning_rate": 9.443754444547039e-07, "loss": 0.0161, "step": 236310 }, { "epoch": 2.524921203055719, "grad_norm": 3.8655953407287598, "learning_rate": 9.443677428494818e-07, "loss": 0.0386, "step": 236320 }, { "epoch": 2.5250280463699983, "grad_norm": 3.6805689334869385, "learning_rate": 9.44360040742535e-07, "loss": 0.0085, "step": 236330 }, { "epoch": 2.525134889684278, "grad_norm": 10.422203063964844, "learning_rate": 9.443523381338723e-07, "loss": 0.0454, "step": 236340 }, { "epoch": 2.5252417329985577, "grad_norm": 0.33842918276786804, "learning_rate": 9.443446350235022e-07, "loss": 0.0083, "step": 236350 }, { "epoch": 2.525348576312837, "grad_norm": 0.031512320041656494, "learning_rate": 9.443369314114336e-07, "loss": 0.0091, "step": 236360 }, { "epoch": 2.525455419627117, "grad_norm": 0.026850949972867966, "learning_rate": 9.443292272976748e-07, "loss": 0.006, "step": 236370 }, { "epoch": 2.5255622629413965, "grad_norm": 0.42574936151504517, "learning_rate": 9.44321522682235e-07, "loss": 0.0085, "step": 236380 }, { "epoch": 2.525669106255676, "grad_norm": 0.08057013154029846, "learning_rate": 9.443138175651226e-07, "loss": 0.0827, "step": 236390 }, { "epoch": 2.5257759495699554, "grad_norm": 0.06634358316659927, "learning_rate": 9.443061119463464e-07, "loss": 0.0162, "step": 236400 }, { "epoch": 2.5258827928842353, "grad_norm": 6.762988090515137, "learning_rate": 9.44298405825915e-07, "loss": 0.0359, "step": 236410 }, { "epoch": 2.525989636198515, "grad_norm": 0.02048821933567524, "learning_rate": 9.442906992038371e-07, "loss": 0.0074, "step": 236420 }, { "epoch": 2.5260964795127947, "grad_norm": 0.12028255313634872, "learning_rate": 9.442829920801216e-07, "loss": 0.06, "step": 236430 }, { "epoch": 2.526203322827074, "grad_norm": 0.3292372226715088, "learning_rate": 9.442752844547771e-07, "loss": 0.0095, "step": 236440 }, { "epoch": 2.5263101661413536, "grad_norm": 0.11886248737573624, "learning_rate": 9.442675763278121e-07, "loss": 0.0154, "step": 236450 }, { "epoch": 2.526417009455633, "grad_norm": 8.612286567687988, "learning_rate": 9.442598676992358e-07, "loss": 0.0244, "step": 236460 }, { "epoch": 2.526523852769913, "grad_norm": 0.02544366382062435, "learning_rate": 9.442521585690563e-07, "loss": 0.0192, "step": 236470 }, { "epoch": 2.5266306960841924, "grad_norm": 1.633692979812622, "learning_rate": 9.442444489372826e-07, "loss": 0.0274, "step": 236480 }, { "epoch": 2.5267375393984723, "grad_norm": 9.278841018676758, "learning_rate": 9.442367388039234e-07, "loss": 0.0195, "step": 236490 }, { "epoch": 2.526844382712752, "grad_norm": 4.3901824951171875, "learning_rate": 9.442290281689876e-07, "loss": 0.01, "step": 236500 }, { "epoch": 2.5269512260270313, "grad_norm": 2.787447690963745, "learning_rate": 9.442213170324833e-07, "loss": 0.0171, "step": 236510 }, { "epoch": 2.5270580693413107, "grad_norm": 0.15593503415584564, "learning_rate": 9.442136053944198e-07, "loss": 0.0185, "step": 236520 }, { "epoch": 2.5271649126555906, "grad_norm": 0.07974820584058762, "learning_rate": 9.442058932548058e-07, "loss": 0.0563, "step": 236530 }, { "epoch": 2.52727175596987, "grad_norm": 3.925384044647217, "learning_rate": 9.441981806136496e-07, "loss": 0.0231, "step": 236540 }, { "epoch": 2.52737859928415, "grad_norm": 0.005029725376516581, "learning_rate": 9.441904674709602e-07, "loss": 0.0337, "step": 236550 }, { "epoch": 2.5274854425984294, "grad_norm": 0.43284451961517334, "learning_rate": 9.441827538267463e-07, "loss": 0.0016, "step": 236560 }, { "epoch": 2.527592285912709, "grad_norm": 0.8922970294952393, "learning_rate": 9.441750396810164e-07, "loss": 0.0355, "step": 236570 }, { "epoch": 2.5276991292269884, "grad_norm": 0.05575479567050934, "learning_rate": 9.441673250337795e-07, "loss": 0.0056, "step": 236580 }, { "epoch": 2.5278059725412683, "grad_norm": 0.05014204606413841, "learning_rate": 9.441596098850442e-07, "loss": 0.0315, "step": 236590 }, { "epoch": 2.5279128158555477, "grad_norm": 5.5703582763671875, "learning_rate": 9.44151894234819e-07, "loss": 0.0549, "step": 236600 }, { "epoch": 2.5280196591698276, "grad_norm": 0.7095020413398743, "learning_rate": 9.441441780831128e-07, "loss": 0.0251, "step": 236610 }, { "epoch": 2.528126502484107, "grad_norm": 2.344505548477173, "learning_rate": 9.441364614299345e-07, "loss": 0.0138, "step": 236620 }, { "epoch": 2.5282333457983865, "grad_norm": 3.1499440670013428, "learning_rate": 9.441287442752925e-07, "loss": 0.0435, "step": 236630 }, { "epoch": 2.528340189112666, "grad_norm": 2.182600975036621, "learning_rate": 9.441210266191956e-07, "loss": 0.0125, "step": 236640 }, { "epoch": 2.528447032426946, "grad_norm": 0.024516401812434196, "learning_rate": 9.441133084616526e-07, "loss": 0.0213, "step": 236650 }, { "epoch": 2.5285538757412254, "grad_norm": 4.941661357879639, "learning_rate": 9.44105589802672e-07, "loss": 0.0086, "step": 236660 }, { "epoch": 2.5286607190555053, "grad_norm": 0.5049799680709839, "learning_rate": 9.440978706422628e-07, "loss": 0.0113, "step": 236670 }, { "epoch": 2.5287675623697847, "grad_norm": 3.962204694747925, "learning_rate": 9.440901509804336e-07, "loss": 0.0233, "step": 236680 }, { "epoch": 2.528874405684064, "grad_norm": 3.218444585800171, "learning_rate": 9.44082430817193e-07, "loss": 0.0336, "step": 236690 }, { "epoch": 2.528981248998344, "grad_norm": 1.2418497800827026, "learning_rate": 9.440747101525499e-07, "loss": 0.0095, "step": 236700 }, { "epoch": 2.5290880923126235, "grad_norm": 0.4957294166088104, "learning_rate": 9.440669889865129e-07, "loss": 0.1062, "step": 236710 }, { "epoch": 2.529194935626903, "grad_norm": 1.0003578662872314, "learning_rate": 9.440592673190909e-07, "loss": 0.0316, "step": 236720 }, { "epoch": 2.529301778941183, "grad_norm": 11.494799613952637, "learning_rate": 9.440515451502924e-07, "loss": 0.0263, "step": 236730 }, { "epoch": 2.5294086222554624, "grad_norm": 0.2828013598918915, "learning_rate": 9.440438224801261e-07, "loss": 0.0029, "step": 236740 }, { "epoch": 2.529515465569742, "grad_norm": 0.16678640246391296, "learning_rate": 9.440360993086008e-07, "loss": 0.0084, "step": 236750 }, { "epoch": 2.5296223088840217, "grad_norm": 0.3777911961078644, "learning_rate": 9.440283756357253e-07, "loss": 0.0068, "step": 236760 }, { "epoch": 2.529729152198301, "grad_norm": 0.7170318961143494, "learning_rate": 9.440206514615084e-07, "loss": 0.0146, "step": 236770 }, { "epoch": 2.5298359955125806, "grad_norm": 0.0028014176059514284, "learning_rate": 9.440129267859585e-07, "loss": 0.001, "step": 236780 }, { "epoch": 2.5299428388268606, "grad_norm": 10.883164405822754, "learning_rate": 9.440052016090845e-07, "loss": 0.009, "step": 236790 }, { "epoch": 2.53004968214114, "grad_norm": 5.058009624481201, "learning_rate": 9.439974759308951e-07, "loss": 0.0072, "step": 236800 }, { "epoch": 2.5301565254554195, "grad_norm": 3.0981342792510986, "learning_rate": 9.43989749751399e-07, "loss": 0.0356, "step": 236810 }, { "epoch": 2.5302633687696994, "grad_norm": 0.0021569710224866867, "learning_rate": 9.439820230706051e-07, "loss": 0.0234, "step": 236820 }, { "epoch": 2.530370212083979, "grad_norm": 3.403367042541504, "learning_rate": 9.439742958885218e-07, "loss": 0.0138, "step": 236830 }, { "epoch": 2.5304770553982583, "grad_norm": 4.424094200134277, "learning_rate": 9.439665682051582e-07, "loss": 0.0382, "step": 236840 }, { "epoch": 2.530583898712538, "grad_norm": 0.040321968495845795, "learning_rate": 9.439588400205227e-07, "loss": 0.0093, "step": 236850 }, { "epoch": 2.5306907420268177, "grad_norm": 0.4213539958000183, "learning_rate": 9.439511113346244e-07, "loss": 0.0066, "step": 236860 }, { "epoch": 2.530797585341097, "grad_norm": 0.0637042447924614, "learning_rate": 9.439433821474716e-07, "loss": 0.02, "step": 236870 }, { "epoch": 2.530904428655377, "grad_norm": 0.022993413731455803, "learning_rate": 9.439356524590731e-07, "loss": 0.0162, "step": 236880 }, { "epoch": 2.5310112719696565, "grad_norm": 6.168317794799805, "learning_rate": 9.439279222694378e-07, "loss": 0.0292, "step": 236890 }, { "epoch": 2.5311181152839364, "grad_norm": 6.940155029296875, "learning_rate": 9.439201915785746e-07, "loss": 0.0237, "step": 236900 }, { "epoch": 2.531224958598216, "grad_norm": 0.026519285514950752, "learning_rate": 9.439124603864919e-07, "loss": 0.0206, "step": 236910 }, { "epoch": 2.5313318019124953, "grad_norm": 0.6789231896400452, "learning_rate": 9.439047286931984e-07, "loss": 0.0047, "step": 236920 }, { "epoch": 2.5314386452267748, "grad_norm": 0.8399509787559509, "learning_rate": 9.43896996498703e-07, "loss": 0.017, "step": 236930 }, { "epoch": 2.5315454885410547, "grad_norm": 3.329465389251709, "learning_rate": 9.438892638030144e-07, "loss": 0.0198, "step": 236940 }, { "epoch": 2.531652331855334, "grad_norm": 0.024766726419329643, "learning_rate": 9.438815306061414e-07, "loss": 0.0166, "step": 236950 }, { "epoch": 2.531759175169614, "grad_norm": 9.045711517333984, "learning_rate": 9.438737969080926e-07, "loss": 0.0075, "step": 236960 }, { "epoch": 2.5318660184838935, "grad_norm": 0.4079569876194, "learning_rate": 9.438660627088768e-07, "loss": 0.0136, "step": 236970 }, { "epoch": 2.531972861798173, "grad_norm": 0.011945555917918682, "learning_rate": 9.438583280085026e-07, "loss": 0.0064, "step": 236980 }, { "epoch": 2.5320797051124524, "grad_norm": 0.30118030309677124, "learning_rate": 9.43850592806979e-07, "loss": 0.0618, "step": 236990 }, { "epoch": 2.5321865484267323, "grad_norm": 1.722577691078186, "learning_rate": 9.438428571043144e-07, "loss": 0.0596, "step": 237000 }, { "epoch": 2.5322933917410118, "grad_norm": 0.06905678659677505, "learning_rate": 9.43835120900518e-07, "loss": 0.0221, "step": 237010 }, { "epoch": 2.5324002350552917, "grad_norm": 0.39822226762771606, "learning_rate": 9.43827384195598e-07, "loss": 0.0213, "step": 237020 }, { "epoch": 2.532507078369571, "grad_norm": 0.00445139454677701, "learning_rate": 9.438196469895635e-07, "loss": 0.0568, "step": 237030 }, { "epoch": 2.5326139216838506, "grad_norm": 0.006268475204706192, "learning_rate": 9.438119092824231e-07, "loss": 0.0134, "step": 237040 }, { "epoch": 2.53272076499813, "grad_norm": 0.007090792525559664, "learning_rate": 9.438041710741855e-07, "loss": 0.0121, "step": 237050 }, { "epoch": 2.53282760831241, "grad_norm": 16.918140411376953, "learning_rate": 9.437964323648596e-07, "loss": 0.0231, "step": 237060 }, { "epoch": 2.5329344516266894, "grad_norm": 0.94507896900177, "learning_rate": 9.43788693154454e-07, "loss": 0.022, "step": 237070 }, { "epoch": 2.5330412949409693, "grad_norm": 14.01130199432373, "learning_rate": 9.437809534429775e-07, "loss": 0.0278, "step": 237080 }, { "epoch": 2.5331481382552488, "grad_norm": 2.024148941040039, "learning_rate": 9.437732132304387e-07, "loss": 0.0067, "step": 237090 }, { "epoch": 2.533254981569528, "grad_norm": 0.035085905343294144, "learning_rate": 9.437654725168465e-07, "loss": 0.0152, "step": 237100 }, { "epoch": 2.5333618248838077, "grad_norm": 0.07362879812717438, "learning_rate": 9.437577313022096e-07, "loss": 0.0125, "step": 237110 }, { "epoch": 2.5334686681980876, "grad_norm": 5.7380876541137695, "learning_rate": 9.437499895865368e-07, "loss": 0.0183, "step": 237120 }, { "epoch": 2.533575511512367, "grad_norm": 0.06632976979017258, "learning_rate": 9.437422473698366e-07, "loss": 0.0089, "step": 237130 }, { "epoch": 2.533682354826647, "grad_norm": 0.029466675594449043, "learning_rate": 9.437345046521181e-07, "loss": 0.0181, "step": 237140 }, { "epoch": 2.5337891981409264, "grad_norm": 5.276520729064941, "learning_rate": 9.437267614333896e-07, "loss": 0.0124, "step": 237150 }, { "epoch": 2.533896041455206, "grad_norm": 1.0172924995422363, "learning_rate": 9.437190177136603e-07, "loss": 0.0248, "step": 237160 }, { "epoch": 2.5340028847694853, "grad_norm": 5.3869404792785645, "learning_rate": 9.437112734929387e-07, "loss": 0.0375, "step": 237170 }, { "epoch": 2.5341097280837652, "grad_norm": 0.0481596514582634, "learning_rate": 9.437035287712336e-07, "loss": 0.0175, "step": 237180 }, { "epoch": 2.5342165713980447, "grad_norm": 0.0061542741023004055, "learning_rate": 9.436957835485537e-07, "loss": 0.0118, "step": 237190 }, { "epoch": 2.5343234147123246, "grad_norm": 0.021104533225297928, "learning_rate": 9.436880378249079e-07, "loss": 0.0128, "step": 237200 }, { "epoch": 2.534430258026604, "grad_norm": 0.02572178654372692, "learning_rate": 9.436802916003046e-07, "loss": 0.0081, "step": 237210 }, { "epoch": 2.5345371013408835, "grad_norm": 0.6618082523345947, "learning_rate": 9.436725448747529e-07, "loss": 0.0113, "step": 237220 }, { "epoch": 2.534643944655163, "grad_norm": 11.638785362243652, "learning_rate": 9.436647976482615e-07, "loss": 0.0245, "step": 237230 }, { "epoch": 2.534750787969443, "grad_norm": 2.8848485946655273, "learning_rate": 9.43657049920839e-07, "loss": 0.0195, "step": 237240 }, { "epoch": 2.5348576312837223, "grad_norm": 1.0340889692306519, "learning_rate": 9.43649301692494e-07, "loss": 0.018, "step": 237250 }, { "epoch": 2.5349644745980022, "grad_norm": 2.265752077102661, "learning_rate": 9.436415529632355e-07, "loss": 0.0134, "step": 237260 }, { "epoch": 2.5350713179122817, "grad_norm": 0.406557559967041, "learning_rate": 9.436338037330725e-07, "loss": 0.0327, "step": 237270 }, { "epoch": 2.535178161226561, "grad_norm": 3.1046664714813232, "learning_rate": 9.436260540020132e-07, "loss": 0.0134, "step": 237280 }, { "epoch": 2.5352850045408406, "grad_norm": 0.0310902651399374, "learning_rate": 9.436183037700667e-07, "loss": 0.0112, "step": 237290 }, { "epoch": 2.5353918478551205, "grad_norm": 3.35394024848938, "learning_rate": 9.436105530372416e-07, "loss": 0.0207, "step": 237300 }, { "epoch": 2.5354986911694, "grad_norm": 0.03976821154356003, "learning_rate": 9.436028018035466e-07, "loss": 0.0986, "step": 237310 }, { "epoch": 2.53560553448368, "grad_norm": 0.004146192222833633, "learning_rate": 9.435950500689907e-07, "loss": 0.0239, "step": 237320 }, { "epoch": 2.5357123777979593, "grad_norm": 6.515666484832764, "learning_rate": 9.435872978335826e-07, "loss": 0.0145, "step": 237330 }, { "epoch": 2.535819221112239, "grad_norm": 0.004235090222209692, "learning_rate": 9.435795450973308e-07, "loss": 0.0318, "step": 237340 }, { "epoch": 2.5359260644265182, "grad_norm": 0.13466010987758636, "learning_rate": 9.435717918602442e-07, "loss": 0.0196, "step": 237350 }, { "epoch": 2.536032907740798, "grad_norm": 0.24992705881595612, "learning_rate": 9.435640381223317e-07, "loss": 0.0155, "step": 237360 }, { "epoch": 2.5361397510550776, "grad_norm": 0.06732325255870819, "learning_rate": 9.435562838836019e-07, "loss": 0.0255, "step": 237370 }, { "epoch": 2.5362465943693575, "grad_norm": 0.16707782447338104, "learning_rate": 9.435485291440635e-07, "loss": 0.0045, "step": 237380 }, { "epoch": 2.536353437683637, "grad_norm": 0.35874858498573303, "learning_rate": 9.435407739037253e-07, "loss": 0.0221, "step": 237390 }, { "epoch": 2.5364602809979164, "grad_norm": 0.007676547393202782, "learning_rate": 9.435330181625962e-07, "loss": 0.0062, "step": 237400 }, { "epoch": 2.5365671243121963, "grad_norm": 0.012103806249797344, "learning_rate": 9.435252619206848e-07, "loss": 0.0118, "step": 237410 }, { "epoch": 2.536673967626476, "grad_norm": 2.9426956176757812, "learning_rate": 9.43517505178e-07, "loss": 0.0163, "step": 237420 }, { "epoch": 2.5367808109407552, "grad_norm": 0.003293189685791731, "learning_rate": 9.435097479345503e-07, "loss": 0.0244, "step": 237430 }, { "epoch": 2.536887654255035, "grad_norm": 7.089386940002441, "learning_rate": 9.435019901903447e-07, "loss": 0.0295, "step": 237440 }, { "epoch": 2.5369944975693146, "grad_norm": 0.09802243858575821, "learning_rate": 9.434942319453919e-07, "loss": 0.0271, "step": 237450 }, { "epoch": 2.537101340883594, "grad_norm": 0.9186884760856628, "learning_rate": 9.434864731997005e-07, "loss": 0.0377, "step": 237460 }, { "epoch": 2.537208184197874, "grad_norm": 0.03202872723340988, "learning_rate": 9.434787139532796e-07, "loss": 0.052, "step": 237470 }, { "epoch": 2.5373150275121534, "grad_norm": 6.217782020568848, "learning_rate": 9.434709542061377e-07, "loss": 0.0146, "step": 237480 }, { "epoch": 2.537421870826433, "grad_norm": 0.2575911283493042, "learning_rate": 9.434631939582836e-07, "loss": 0.0108, "step": 237490 }, { "epoch": 2.537528714140713, "grad_norm": 2.033280611038208, "learning_rate": 9.434554332097261e-07, "loss": 0.0152, "step": 237500 }, { "epoch": 2.5376355574549923, "grad_norm": 0.5934579372406006, "learning_rate": 9.434476719604739e-07, "loss": 0.0196, "step": 237510 }, { "epoch": 2.5377424007692717, "grad_norm": 0.10144898295402527, "learning_rate": 9.434399102105359e-07, "loss": 0.0227, "step": 237520 }, { "epoch": 2.5378492440835516, "grad_norm": 0.014120534993708134, "learning_rate": 9.434321479599206e-07, "loss": 0.004, "step": 237530 }, { "epoch": 2.537956087397831, "grad_norm": 1.466816782951355, "learning_rate": 9.434243852086372e-07, "loss": 0.0116, "step": 237540 }, { "epoch": 2.5380629307121105, "grad_norm": 1.1578086614608765, "learning_rate": 9.434166219566939e-07, "loss": 0.0055, "step": 237550 }, { "epoch": 2.5381697740263904, "grad_norm": 0.2740781903266907, "learning_rate": 9.434088582041e-07, "loss": 0.0198, "step": 237560 }, { "epoch": 2.53827661734067, "grad_norm": 0.010749978944659233, "learning_rate": 9.43401093950864e-07, "loss": 0.0325, "step": 237570 }, { "epoch": 2.5383834606549494, "grad_norm": 4.397646903991699, "learning_rate": 9.433933291969946e-07, "loss": 0.0329, "step": 237580 }, { "epoch": 2.5384903039692293, "grad_norm": 0.03695265203714371, "learning_rate": 9.433855639425008e-07, "loss": 0.0187, "step": 237590 }, { "epoch": 2.5385971472835087, "grad_norm": 1.0789872407913208, "learning_rate": 9.433777981873912e-07, "loss": 0.0233, "step": 237600 }, { "epoch": 2.5387039905977886, "grad_norm": 1.6974623203277588, "learning_rate": 9.433700319316744e-07, "loss": 0.0107, "step": 237610 }, { "epoch": 2.538810833912068, "grad_norm": 2.1254308223724365, "learning_rate": 9.433622651753597e-07, "loss": 0.0444, "step": 237620 }, { "epoch": 2.5389176772263475, "grad_norm": 0.03268251195549965, "learning_rate": 9.433544979184553e-07, "loss": 0.0144, "step": 237630 }, { "epoch": 2.539024520540627, "grad_norm": 2.9711790084838867, "learning_rate": 9.433467301609703e-07, "loss": 0.0858, "step": 237640 }, { "epoch": 2.539131363854907, "grad_norm": 1.4455991983413696, "learning_rate": 9.433389619029133e-07, "loss": 0.0094, "step": 237650 }, { "epoch": 2.5392382071691864, "grad_norm": 2.748491048812866, "learning_rate": 9.433311931442932e-07, "loss": 0.0297, "step": 237660 }, { "epoch": 2.5393450504834663, "grad_norm": 1.0136735439300537, "learning_rate": 9.433234238851189e-07, "loss": 0.0415, "step": 237670 }, { "epoch": 2.5394518937977457, "grad_norm": 5.194852352142334, "learning_rate": 9.433156541253987e-07, "loss": 0.0063, "step": 237680 }, { "epoch": 2.539558737112025, "grad_norm": 2.824963331222534, "learning_rate": 9.433078838651418e-07, "loss": 0.0245, "step": 237690 }, { "epoch": 2.5396655804263046, "grad_norm": 1.0165367126464844, "learning_rate": 9.433001131043569e-07, "loss": 0.0238, "step": 237700 }, { "epoch": 2.5397724237405845, "grad_norm": 0.13857252895832062, "learning_rate": 9.432923418430526e-07, "loss": 0.0566, "step": 237710 }, { "epoch": 2.539879267054864, "grad_norm": 0.4549964964389801, "learning_rate": 9.432845700812378e-07, "loss": 0.0085, "step": 237720 }, { "epoch": 2.539986110369144, "grad_norm": 0.6240588426589966, "learning_rate": 9.432767978189213e-07, "loss": 0.0061, "step": 237730 }, { "epoch": 2.5400929536834234, "grad_norm": 0.22464905679225922, "learning_rate": 9.432690250561119e-07, "loss": 0.0045, "step": 237740 }, { "epoch": 2.540199796997703, "grad_norm": 0.13559958338737488, "learning_rate": 9.432612517928183e-07, "loss": 0.0164, "step": 237750 }, { "epoch": 2.5403066403119823, "grad_norm": 0.9734987020492554, "learning_rate": 9.432534780290494e-07, "loss": 0.0136, "step": 237760 }, { "epoch": 2.540413483626262, "grad_norm": 0.05509772151708603, "learning_rate": 9.432457037648136e-07, "loss": 0.0065, "step": 237770 }, { "epoch": 2.5405203269405416, "grad_norm": 0.03543141111731529, "learning_rate": 9.4323792900012e-07, "loss": 0.065, "step": 237780 }, { "epoch": 2.5406271702548215, "grad_norm": 0.19115592539310455, "learning_rate": 9.432301537349774e-07, "loss": 0.0064, "step": 237790 }, { "epoch": 2.540734013569101, "grad_norm": 2.2129507064819336, "learning_rate": 9.432223779693945e-07, "loss": 0.0076, "step": 237800 }, { "epoch": 2.5408408568833805, "grad_norm": 0.010573280043900013, "learning_rate": 9.432146017033802e-07, "loss": 0.0047, "step": 237810 }, { "epoch": 2.54094770019766, "grad_norm": 0.021111609414219856, "learning_rate": 9.43206824936943e-07, "loss": 0.0044, "step": 237820 }, { "epoch": 2.54105454351194, "grad_norm": 0.018749132752418518, "learning_rate": 9.43199047670092e-07, "loss": 0.014, "step": 237830 }, { "epoch": 2.5411613868262193, "grad_norm": 0.011824547313153744, "learning_rate": 9.431912699028356e-07, "loss": 0.0085, "step": 237840 }, { "epoch": 2.541268230140499, "grad_norm": 12.701922416687012, "learning_rate": 9.43183491635183e-07, "loss": 0.013, "step": 237850 }, { "epoch": 2.5413750734547786, "grad_norm": 0.02166152372956276, "learning_rate": 9.431757128671427e-07, "loss": 0.0229, "step": 237860 }, { "epoch": 2.541481916769058, "grad_norm": 0.35414761304855347, "learning_rate": 9.431679335987236e-07, "loss": 0.0183, "step": 237870 }, { "epoch": 2.5415887600833376, "grad_norm": 0.09052597731351852, "learning_rate": 9.431601538299344e-07, "loss": 0.0686, "step": 237880 }, { "epoch": 2.5416956033976175, "grad_norm": 0.044819317758083344, "learning_rate": 9.43152373560784e-07, "loss": 0.057, "step": 237890 }, { "epoch": 2.541802446711897, "grad_norm": 0.32325443625450134, "learning_rate": 9.431445927912811e-07, "loss": 0.0237, "step": 237900 }, { "epoch": 2.541909290026177, "grad_norm": 2.1863088607788086, "learning_rate": 9.431368115214346e-07, "loss": 0.0134, "step": 237910 }, { "epoch": 2.5420161333404563, "grad_norm": 0.8839656114578247, "learning_rate": 9.431290297512531e-07, "loss": 0.017, "step": 237920 }, { "epoch": 2.5421229766547357, "grad_norm": 5.94533634185791, "learning_rate": 9.431212474807455e-07, "loss": 0.0441, "step": 237930 }, { "epoch": 2.542229819969015, "grad_norm": 0.004427980165928602, "learning_rate": 9.431134647099205e-07, "loss": 0.0012, "step": 237940 }, { "epoch": 2.542336663283295, "grad_norm": 2.551846742630005, "learning_rate": 9.43105681438787e-07, "loss": 0.0077, "step": 237950 }, { "epoch": 2.5424435065975746, "grad_norm": 4.192008018493652, "learning_rate": 9.430978976673538e-07, "loss": 0.0078, "step": 237960 }, { "epoch": 2.5425503499118545, "grad_norm": 0.14502641558647156, "learning_rate": 9.430901133956297e-07, "loss": 0.0073, "step": 237970 }, { "epoch": 2.542657193226134, "grad_norm": 0.3743112087249756, "learning_rate": 9.430823286236231e-07, "loss": 0.0098, "step": 237980 }, { "epoch": 2.5427640365404134, "grad_norm": 0.6386351585388184, "learning_rate": 9.430745433513434e-07, "loss": 0.0128, "step": 237990 }, { "epoch": 2.542870879854693, "grad_norm": 0.27213671803474426, "learning_rate": 9.430667575787989e-07, "loss": 0.0336, "step": 238000 }, { "epoch": 2.5429777231689727, "grad_norm": 0.11187353730201721, "learning_rate": 9.430589713059989e-07, "loss": 0.0315, "step": 238010 }, { "epoch": 2.543084566483252, "grad_norm": 2.195160388946533, "learning_rate": 9.430511845329516e-07, "loss": 0.009, "step": 238020 }, { "epoch": 2.543191409797532, "grad_norm": 2.5628833770751953, "learning_rate": 9.430433972596662e-07, "loss": 0.0096, "step": 238030 }, { "epoch": 2.5432982531118116, "grad_norm": 7.053081512451172, "learning_rate": 9.430356094861514e-07, "loss": 0.0234, "step": 238040 }, { "epoch": 2.543405096426091, "grad_norm": 0.28258374333381653, "learning_rate": 9.430278212124159e-07, "loss": 0.0133, "step": 238050 }, { "epoch": 2.5435119397403705, "grad_norm": 0.44658970832824707, "learning_rate": 9.430200324384686e-07, "loss": 0.023, "step": 238060 }, { "epoch": 2.5436187830546504, "grad_norm": 0.002372093265876174, "learning_rate": 9.430122431643182e-07, "loss": 0.0287, "step": 238070 }, { "epoch": 2.54372562636893, "grad_norm": 3.946655511856079, "learning_rate": 9.430044533899735e-07, "loss": 0.0384, "step": 238080 }, { "epoch": 2.5438324696832098, "grad_norm": 0.009266957640647888, "learning_rate": 9.429966631154434e-07, "loss": 0.0231, "step": 238090 }, { "epoch": 2.543939312997489, "grad_norm": 0.02648342214524746, "learning_rate": 9.429888723407365e-07, "loss": 0.0093, "step": 238100 }, { "epoch": 2.5440461563117687, "grad_norm": 12.216052055358887, "learning_rate": 9.429810810658618e-07, "loss": 0.0315, "step": 238110 }, { "epoch": 2.544152999626048, "grad_norm": 6.568530559539795, "learning_rate": 9.429732892908281e-07, "loss": 0.0304, "step": 238120 }, { "epoch": 2.544259842940328, "grad_norm": 1.022326946258545, "learning_rate": 9.429654970156441e-07, "loss": 0.0242, "step": 238130 }, { "epoch": 2.5443666862546075, "grad_norm": 0.3483631908893585, "learning_rate": 9.429577042403186e-07, "loss": 0.0138, "step": 238140 }, { "epoch": 2.5444735295688874, "grad_norm": 1.628676176071167, "learning_rate": 9.429499109648604e-07, "loss": 0.0196, "step": 238150 }, { "epoch": 2.544580372883167, "grad_norm": 0.060589469969272614, "learning_rate": 9.429421171892784e-07, "loss": 0.0033, "step": 238160 }, { "epoch": 2.5446872161974463, "grad_norm": 0.051182445138692856, "learning_rate": 9.429343229135814e-07, "loss": 0.0387, "step": 238170 }, { "epoch": 2.544794059511726, "grad_norm": 1.689327359199524, "learning_rate": 9.429265281377778e-07, "loss": 0.017, "step": 238180 }, { "epoch": 2.5449009028260057, "grad_norm": 2.2585558891296387, "learning_rate": 9.42918732861877e-07, "loss": 0.0324, "step": 238190 }, { "epoch": 2.545007746140285, "grad_norm": 0.43312868475914, "learning_rate": 9.429109370858874e-07, "loss": 0.0077, "step": 238200 }, { "epoch": 2.545114589454565, "grad_norm": 3.6365509033203125, "learning_rate": 9.429031408098179e-07, "loss": 0.0105, "step": 238210 }, { "epoch": 2.5452214327688445, "grad_norm": 0.005286649335175753, "learning_rate": 9.428953440336775e-07, "loss": 0.0145, "step": 238220 }, { "epoch": 2.545328276083124, "grad_norm": 12.756890296936035, "learning_rate": 9.428875467574746e-07, "loss": 0.0223, "step": 238230 }, { "epoch": 2.545435119397404, "grad_norm": 0.005405094008892775, "learning_rate": 9.428797489812183e-07, "loss": 0.0028, "step": 238240 }, { "epoch": 2.5455419627116833, "grad_norm": 1.5398277044296265, "learning_rate": 9.428719507049174e-07, "loss": 0.0606, "step": 238250 }, { "epoch": 2.5456488060259628, "grad_norm": 0.022347327321767807, "learning_rate": 9.428641519285807e-07, "loss": 0.0223, "step": 238260 }, { "epoch": 2.5457556493402427, "grad_norm": 0.5380733013153076, "learning_rate": 9.428563526522168e-07, "loss": 0.0054, "step": 238270 }, { "epoch": 2.545862492654522, "grad_norm": 1.5332306623458862, "learning_rate": 9.428485528758348e-07, "loss": 0.0203, "step": 238280 }, { "epoch": 2.5459693359688016, "grad_norm": 5.177090644836426, "learning_rate": 9.428407525994433e-07, "loss": 0.015, "step": 238290 }, { "epoch": 2.5460761792830815, "grad_norm": 2.5180840492248535, "learning_rate": 9.428329518230511e-07, "loss": 0.0192, "step": 238300 }, { "epoch": 2.546183022597361, "grad_norm": 0.014750567264854908, "learning_rate": 9.428251505466671e-07, "loss": 0.015, "step": 238310 }, { "epoch": 2.5462898659116404, "grad_norm": 27.06715202331543, "learning_rate": 9.428173487703002e-07, "loss": 0.0099, "step": 238320 }, { "epoch": 2.5463967092259203, "grad_norm": 0.06891218572854996, "learning_rate": 9.428095464939589e-07, "loss": 0.0057, "step": 238330 }, { "epoch": 2.5465035525401998, "grad_norm": 1.2699133157730103, "learning_rate": 9.428017437176523e-07, "loss": 0.0167, "step": 238340 }, { "epoch": 2.5466103958544792, "grad_norm": 2.1719961166381836, "learning_rate": 9.427939404413891e-07, "loss": 0.0165, "step": 238350 }, { "epoch": 2.546717239168759, "grad_norm": 0.07032135128974915, "learning_rate": 9.427861366651783e-07, "loss": 0.02, "step": 238360 }, { "epoch": 2.5468240824830386, "grad_norm": 14.800796508789062, "learning_rate": 9.427783323890283e-07, "loss": 0.0207, "step": 238370 }, { "epoch": 2.5469309257973185, "grad_norm": Infinity, "learning_rate": 9.427705276129483e-07, "loss": 0.029, "step": 238380 }, { "epoch": 2.547037769111598, "grad_norm": 0.5180774331092834, "learning_rate": 9.427627223369467e-07, "loss": 0.0198, "step": 238390 }, { "epoch": 2.5471446124258774, "grad_norm": 0.0575508177280426, "learning_rate": 9.42754916561033e-07, "loss": 0.0236, "step": 238400 }, { "epoch": 2.547251455740157, "grad_norm": 0.06577832251787186, "learning_rate": 9.427471102852152e-07, "loss": 0.0441, "step": 238410 }, { "epoch": 2.547358299054437, "grad_norm": 9.25125503540039, "learning_rate": 9.427393035095025e-07, "loss": 0.0058, "step": 238420 }, { "epoch": 2.5474651423687162, "grad_norm": 0.6449125409126282, "learning_rate": 9.427314962339039e-07, "loss": 0.0103, "step": 238430 }, { "epoch": 2.547571985682996, "grad_norm": 5.879789352416992, "learning_rate": 9.427236884584279e-07, "loss": 0.0191, "step": 238440 }, { "epoch": 2.5476788289972756, "grad_norm": 2.12819766998291, "learning_rate": 9.427158801830836e-07, "loss": 0.0317, "step": 238450 }, { "epoch": 2.547785672311555, "grad_norm": 0.5281773805618286, "learning_rate": 9.427080714078796e-07, "loss": 0.1087, "step": 238460 }, { "epoch": 2.5478925156258345, "grad_norm": 0.22407376766204834, "learning_rate": 9.427002621328247e-07, "loss": 0.0112, "step": 238470 }, { "epoch": 2.5479993589401144, "grad_norm": 0.040089163929224014, "learning_rate": 9.426924523579278e-07, "loss": 0.0167, "step": 238480 }, { "epoch": 2.548106202254394, "grad_norm": 0.15749803185462952, "learning_rate": 9.426846420831978e-07, "loss": 0.0295, "step": 238490 }, { "epoch": 2.548213045568674, "grad_norm": 2.482205390930176, "learning_rate": 9.426768313086433e-07, "loss": 0.0186, "step": 238500 }, { "epoch": 2.5483198888829532, "grad_norm": 0.2133847177028656, "learning_rate": 9.426690200342734e-07, "loss": 0.0245, "step": 238510 }, { "epoch": 2.5484267321972327, "grad_norm": 0.012598227709531784, "learning_rate": 9.426612082600967e-07, "loss": 0.0284, "step": 238520 }, { "epoch": 2.548533575511512, "grad_norm": 0.013989229686558247, "learning_rate": 9.42653395986122e-07, "loss": 0.0109, "step": 238530 }, { "epoch": 2.548640418825792, "grad_norm": 0.6650708317756653, "learning_rate": 9.426455832123583e-07, "loss": 0.002, "step": 238540 }, { "epoch": 2.5487472621400715, "grad_norm": 0.2676447927951813, "learning_rate": 9.426377699388142e-07, "loss": 0.0176, "step": 238550 }, { "epoch": 2.5488541054543514, "grad_norm": 0.07354437559843063, "learning_rate": 9.426299561654989e-07, "loss": 0.0241, "step": 238560 }, { "epoch": 2.548960948768631, "grad_norm": 0.5798287391662598, "learning_rate": 9.426221418924207e-07, "loss": 0.0103, "step": 238570 }, { "epoch": 2.5490677920829103, "grad_norm": 0.04701188579201698, "learning_rate": 9.426143271195888e-07, "loss": 0.033, "step": 238580 }, { "epoch": 2.54917463539719, "grad_norm": 0.0021945631597191095, "learning_rate": 9.42606511847012e-07, "loss": 0.0175, "step": 238590 }, { "epoch": 2.5492814787114697, "grad_norm": 0.007002157624810934, "learning_rate": 9.42598696074699e-07, "loss": 0.0964, "step": 238600 }, { "epoch": 2.549388322025749, "grad_norm": 1.3334813117980957, "learning_rate": 9.425908798026587e-07, "loss": 0.0161, "step": 238610 }, { "epoch": 2.549495165340029, "grad_norm": 0.004707669839262962, "learning_rate": 9.425830630308998e-07, "loss": 0.011, "step": 238620 }, { "epoch": 2.5496020086543085, "grad_norm": 0.006358094047755003, "learning_rate": 9.425752457594313e-07, "loss": 0.0067, "step": 238630 }, { "epoch": 2.549708851968588, "grad_norm": 0.010845842771232128, "learning_rate": 9.425674279882618e-07, "loss": 0.0327, "step": 238640 }, { "epoch": 2.5498156952828674, "grad_norm": 2.301133632659912, "learning_rate": 9.425596097174005e-07, "loss": 0.0511, "step": 238650 }, { "epoch": 2.5499225385971473, "grad_norm": 0.028229404240846634, "learning_rate": 9.425517909468559e-07, "loss": 0.0242, "step": 238660 }, { "epoch": 2.550029381911427, "grad_norm": 0.002209343248978257, "learning_rate": 9.425439716766368e-07, "loss": 0.0267, "step": 238670 }, { "epoch": 2.5501362252257067, "grad_norm": 1.826054573059082, "learning_rate": 9.425361519067523e-07, "loss": 0.0096, "step": 238680 }, { "epoch": 2.550243068539986, "grad_norm": 5.847806930541992, "learning_rate": 9.42528331637211e-07, "loss": 0.0253, "step": 238690 }, { "epoch": 2.5503499118542656, "grad_norm": 2.2621724605560303, "learning_rate": 9.425205108680218e-07, "loss": 0.0264, "step": 238700 }, { "epoch": 2.550456755168545, "grad_norm": 0.9580999612808228, "learning_rate": 9.425126895991937e-07, "loss": 0.0106, "step": 238710 }, { "epoch": 2.550563598482825, "grad_norm": 0.26583606004714966, "learning_rate": 9.425048678307352e-07, "loss": 0.0129, "step": 238720 }, { "epoch": 2.5506704417971044, "grad_norm": 0.6897248029708862, "learning_rate": 9.424970455626556e-07, "loss": 0.0072, "step": 238730 }, { "epoch": 2.5507772851113844, "grad_norm": 1.1152596473693848, "learning_rate": 9.424892227949631e-07, "loss": 0.0227, "step": 238740 }, { "epoch": 2.550884128425664, "grad_norm": 0.5152246952056885, "learning_rate": 9.424813995276671e-07, "loss": 0.0216, "step": 238750 }, { "epoch": 2.5509909717399433, "grad_norm": 1.3580445051193237, "learning_rate": 9.42473575760776e-07, "loss": 0.0112, "step": 238760 }, { "epoch": 2.5510978150542227, "grad_norm": 0.45976316928863525, "learning_rate": 9.424657514942989e-07, "loss": 0.0044, "step": 238770 }, { "epoch": 2.5512046583685026, "grad_norm": 3.632462501525879, "learning_rate": 9.424579267282447e-07, "loss": 0.0067, "step": 238780 }, { "epoch": 2.551311501682782, "grad_norm": 0.08557932078838348, "learning_rate": 9.424501014626221e-07, "loss": 0.0374, "step": 238790 }, { "epoch": 2.551418344997062, "grad_norm": 0.1002858579158783, "learning_rate": 9.424422756974397e-07, "loss": 0.0003, "step": 238800 }, { "epoch": 2.5515251883113415, "grad_norm": 0.001975587336346507, "learning_rate": 9.424344494327068e-07, "loss": 0.0201, "step": 238810 }, { "epoch": 2.551632031625621, "grad_norm": 3.7568862438201904, "learning_rate": 9.42426622668432e-07, "loss": 0.0105, "step": 238820 }, { "epoch": 2.5517388749399004, "grad_norm": 0.01061240304261446, "learning_rate": 9.424187954046241e-07, "loss": 0.0132, "step": 238830 }, { "epoch": 2.5518457182541803, "grad_norm": 2.255399465560913, "learning_rate": 9.424109676412919e-07, "loss": 0.0067, "step": 238840 }, { "epoch": 2.5519525615684597, "grad_norm": 8.124661445617676, "learning_rate": 9.424031393784445e-07, "loss": 0.0286, "step": 238850 }, { "epoch": 2.5520594048827396, "grad_norm": 0.0025319044943898916, "learning_rate": 9.423953106160905e-07, "loss": 0.018, "step": 238860 }, { "epoch": 2.552166248197019, "grad_norm": 0.8721495270729065, "learning_rate": 9.423874813542387e-07, "loss": 0.0122, "step": 238870 }, { "epoch": 2.5522730915112986, "grad_norm": 7.83573579788208, "learning_rate": 9.423796515928982e-07, "loss": 0.019, "step": 238880 }, { "epoch": 2.5523799348255785, "grad_norm": 0.0660708099603653, "learning_rate": 9.423718213320777e-07, "loss": 0.0266, "step": 238890 }, { "epoch": 2.552486778139858, "grad_norm": 0.004486721009016037, "learning_rate": 9.42363990571786e-07, "loss": 0.018, "step": 238900 }, { "epoch": 2.5525936214541374, "grad_norm": 2.9895882606506348, "learning_rate": 9.423561593120319e-07, "loss": 0.0311, "step": 238910 }, { "epoch": 2.5527004647684173, "grad_norm": 5.504727363586426, "learning_rate": 9.423483275528242e-07, "loss": 0.0731, "step": 238920 }, { "epoch": 2.5528073080826967, "grad_norm": 1.9293280839920044, "learning_rate": 9.42340495294172e-07, "loss": 0.0169, "step": 238930 }, { "epoch": 2.552914151396976, "grad_norm": 0.580986738204956, "learning_rate": 9.42332662536084e-07, "loss": 0.0646, "step": 238940 }, { "epoch": 2.553020994711256, "grad_norm": 0.5447623133659363, "learning_rate": 9.42324829278569e-07, "loss": 0.0183, "step": 238950 }, { "epoch": 2.5531278380255356, "grad_norm": 0.041092853993177414, "learning_rate": 9.423169955216359e-07, "loss": 0.0141, "step": 238960 }, { "epoch": 2.553234681339815, "grad_norm": 6.278682231903076, "learning_rate": 9.423091612652936e-07, "loss": 0.0464, "step": 238970 }, { "epoch": 2.553341524654095, "grad_norm": 0.025402117520570755, "learning_rate": 9.423013265095508e-07, "loss": 0.0082, "step": 238980 }, { "epoch": 2.5534483679683744, "grad_norm": 2.872825860977173, "learning_rate": 9.422934912544164e-07, "loss": 0.0276, "step": 238990 }, { "epoch": 2.553555211282654, "grad_norm": 2.644028663635254, "learning_rate": 9.422856554998993e-07, "loss": 0.0144, "step": 239000 }, { "epoch": 2.5536620545969337, "grad_norm": 10.419341087341309, "learning_rate": 9.422778192460084e-07, "loss": 0.06, "step": 239010 }, { "epoch": 2.553768897911213, "grad_norm": 3.233736753463745, "learning_rate": 9.422699824927522e-07, "loss": 0.0168, "step": 239020 }, { "epoch": 2.5538757412254927, "grad_norm": 0.029627788811922073, "learning_rate": 9.422621452401401e-07, "loss": 0.0195, "step": 239030 }, { "epoch": 2.5539825845397726, "grad_norm": 0.03435899317264557, "learning_rate": 9.422543074881805e-07, "loss": 0.0052, "step": 239040 }, { "epoch": 2.554089427854052, "grad_norm": 0.03934904560446739, "learning_rate": 9.422464692368824e-07, "loss": 0.0024, "step": 239050 }, { "epoch": 2.5541962711683315, "grad_norm": 1.6619431972503662, "learning_rate": 9.422386304862546e-07, "loss": 0.0371, "step": 239060 }, { "epoch": 2.5543031144826114, "grad_norm": 0.004160845186561346, "learning_rate": 9.422307912363063e-07, "loss": 0.0173, "step": 239070 }, { "epoch": 2.554409957796891, "grad_norm": 12.94628620147705, "learning_rate": 9.422229514870457e-07, "loss": 0.0844, "step": 239080 }, { "epoch": 2.5545168011111707, "grad_norm": 2.8003995418548584, "learning_rate": 9.422151112384823e-07, "loss": 0.0395, "step": 239090 }, { "epoch": 2.55462364442545, "grad_norm": 0.08635596185922623, "learning_rate": 9.422072704906245e-07, "loss": 0.0051, "step": 239100 }, { "epoch": 2.5547304877397297, "grad_norm": 2.905215263366699, "learning_rate": 9.421994292434814e-07, "loss": 0.0071, "step": 239110 }, { "epoch": 2.554837331054009, "grad_norm": 0.0017288465751335025, "learning_rate": 9.421915874970617e-07, "loss": 0.0413, "step": 239120 }, { "epoch": 2.554944174368289, "grad_norm": 0.9877526760101318, "learning_rate": 9.421837452513744e-07, "loss": 0.0148, "step": 239130 }, { "epoch": 2.5550510176825685, "grad_norm": 2.0377442836761475, "learning_rate": 9.421759025064282e-07, "loss": 0.0137, "step": 239140 }, { "epoch": 2.5551578609968484, "grad_norm": 4.836214542388916, "learning_rate": 9.421680592622321e-07, "loss": 0.0436, "step": 239150 }, { "epoch": 2.555264704311128, "grad_norm": 1.0546956062316895, "learning_rate": 9.421602155187949e-07, "loss": 0.0135, "step": 239160 }, { "epoch": 2.5553715476254073, "grad_norm": 6.637853145599365, "learning_rate": 9.421523712761253e-07, "loss": 0.0173, "step": 239170 }, { "epoch": 2.5554783909396868, "grad_norm": 4.6591105461120605, "learning_rate": 9.421445265342325e-07, "loss": 0.0237, "step": 239180 }, { "epoch": 2.5555852342539667, "grad_norm": 0.05009268596768379, "learning_rate": 9.421366812931251e-07, "loss": 0.0038, "step": 239190 }, { "epoch": 2.555692077568246, "grad_norm": 1.922531008720398, "learning_rate": 9.421288355528119e-07, "loss": 0.0236, "step": 239200 }, { "epoch": 2.555798920882526, "grad_norm": 1.3242132663726807, "learning_rate": 9.42120989313302e-07, "loss": 0.0183, "step": 239210 }, { "epoch": 2.5559057641968055, "grad_norm": 0.048493094742298126, "learning_rate": 9.42113142574604e-07, "loss": 0.0053, "step": 239220 }, { "epoch": 2.556012607511085, "grad_norm": 7.521878719329834, "learning_rate": 9.42105295336727e-07, "loss": 0.0937, "step": 239230 }, { "epoch": 2.5561194508253644, "grad_norm": 0.015198007225990295, "learning_rate": 9.420974475996798e-07, "loss": 0.0264, "step": 239240 }, { "epoch": 2.5562262941396443, "grad_norm": 10.671058654785156, "learning_rate": 9.420895993634712e-07, "loss": 0.0343, "step": 239250 }, { "epoch": 2.5563331374539238, "grad_norm": 11.837261199951172, "learning_rate": 9.4208175062811e-07, "loss": 0.0513, "step": 239260 }, { "epoch": 2.5564399807682037, "grad_norm": 5.343117713928223, "learning_rate": 9.420739013936052e-07, "loss": 0.0407, "step": 239270 }, { "epoch": 2.556546824082483, "grad_norm": 0.2785421311855316, "learning_rate": 9.420660516599655e-07, "loss": 0.0091, "step": 239280 }, { "epoch": 2.5566536673967626, "grad_norm": 4.360348224639893, "learning_rate": 9.420582014272001e-07, "loss": 0.0151, "step": 239290 }, { "epoch": 2.556760510711042, "grad_norm": 3.1599390506744385, "learning_rate": 9.420503506953173e-07, "loss": 0.0107, "step": 239300 }, { "epoch": 2.556867354025322, "grad_norm": 0.06908292323350906, "learning_rate": 9.420424994643266e-07, "loss": 0.0025, "step": 239310 }, { "epoch": 2.5569741973396014, "grad_norm": 4.8755035400390625, "learning_rate": 9.420346477342363e-07, "loss": 0.012, "step": 239320 }, { "epoch": 2.5570810406538813, "grad_norm": 0.09563210606575012, "learning_rate": 9.420267955050556e-07, "loss": 0.0141, "step": 239330 }, { "epoch": 2.5571878839681608, "grad_norm": 3.4168901443481445, "learning_rate": 9.420189427767934e-07, "loss": 0.009, "step": 239340 }, { "epoch": 2.5572947272824402, "grad_norm": 5.0224080085754395, "learning_rate": 9.420110895494584e-07, "loss": 0.0405, "step": 239350 }, { "epoch": 2.5574015705967197, "grad_norm": 0.012020950205624104, "learning_rate": 9.420032358230594e-07, "loss": 0.0307, "step": 239360 }, { "epoch": 2.5575084139109996, "grad_norm": 2.3490781784057617, "learning_rate": 9.419953815976055e-07, "loss": 0.0245, "step": 239370 }, { "epoch": 2.557615257225279, "grad_norm": 2.252100706100464, "learning_rate": 9.419875268731055e-07, "loss": 0.0083, "step": 239380 }, { "epoch": 2.557722100539559, "grad_norm": 0.024549677968025208, "learning_rate": 9.419796716495681e-07, "loss": 0.0235, "step": 239390 }, { "epoch": 2.5578289438538384, "grad_norm": 0.11247485131025314, "learning_rate": 9.419718159270023e-07, "loss": 0.0076, "step": 239400 }, { "epoch": 2.557935787168118, "grad_norm": 11.353897094726562, "learning_rate": 9.41963959705417e-07, "loss": 0.0109, "step": 239410 }, { "epoch": 2.5580426304823973, "grad_norm": 2.431055784225464, "learning_rate": 9.419561029848211e-07, "loss": 0.0097, "step": 239420 }, { "epoch": 2.5581494737966772, "grad_norm": 5.5203046798706055, "learning_rate": 9.419482457652234e-07, "loss": 0.0089, "step": 239430 }, { "epoch": 2.5582563171109567, "grad_norm": 3.0989627838134766, "learning_rate": 9.419403880466327e-07, "loss": 0.017, "step": 239440 }, { "epoch": 2.5583631604252366, "grad_norm": 0.09182930737733841, "learning_rate": 9.419325298290579e-07, "loss": 0.0246, "step": 239450 }, { "epoch": 2.558470003739516, "grad_norm": 4.035529136657715, "learning_rate": 9.41924671112508e-07, "loss": 0.0041, "step": 239460 }, { "epoch": 2.5585768470537955, "grad_norm": 1.2376124858856201, "learning_rate": 9.419168118969918e-07, "loss": 0.0478, "step": 239470 }, { "epoch": 2.558683690368075, "grad_norm": 5.37655782699585, "learning_rate": 9.419089521825182e-07, "loss": 0.0326, "step": 239480 }, { "epoch": 2.558790533682355, "grad_norm": 1.3001720905303955, "learning_rate": 9.41901091969096e-07, "loss": 0.0225, "step": 239490 }, { "epoch": 2.5588973769966343, "grad_norm": 0.07207342982292175, "learning_rate": 9.41893231256734e-07, "loss": 0.0151, "step": 239500 }, { "epoch": 2.5590042203109142, "grad_norm": 0.010444306768476963, "learning_rate": 9.418853700454414e-07, "loss": 0.0372, "step": 239510 }, { "epoch": 2.5591110636251937, "grad_norm": 0.5985032916069031, "learning_rate": 9.418775083352267e-07, "loss": 0.0335, "step": 239520 }, { "epoch": 2.559217906939473, "grad_norm": 2.7196919918060303, "learning_rate": 9.418696461260991e-07, "loss": 0.0506, "step": 239530 }, { "epoch": 2.5593247502537526, "grad_norm": 0.22207412123680115, "learning_rate": 9.418617834180671e-07, "loss": 0.0243, "step": 239540 }, { "epoch": 2.5594315935680325, "grad_norm": 1.6555989980697632, "learning_rate": 9.4185392021114e-07, "loss": 0.0212, "step": 239550 }, { "epoch": 2.559538436882312, "grad_norm": 8.021865844726562, "learning_rate": 9.418460565053263e-07, "loss": 0.0225, "step": 239560 }, { "epoch": 2.559645280196592, "grad_norm": 0.01420747209340334, "learning_rate": 9.418381923006353e-07, "loss": 0.0033, "step": 239570 }, { "epoch": 2.5597521235108713, "grad_norm": 2.177849054336548, "learning_rate": 9.418303275970754e-07, "loss": 0.0154, "step": 239580 }, { "epoch": 2.559858966825151, "grad_norm": 0.07252150028944016, "learning_rate": 9.418224623946558e-07, "loss": 0.0148, "step": 239590 }, { "epoch": 2.5599658101394303, "grad_norm": 9.360027313232422, "learning_rate": 9.418145966933852e-07, "loss": 0.0115, "step": 239600 }, { "epoch": 2.56007265345371, "grad_norm": 4.2838029861450195, "learning_rate": 9.418067304932729e-07, "loss": 0.0113, "step": 239610 }, { "epoch": 2.5601794967679896, "grad_norm": 2.863646984100342, "learning_rate": 9.417988637943271e-07, "loss": 0.0093, "step": 239620 }, { "epoch": 2.5602863400822695, "grad_norm": 14.896434783935547, "learning_rate": 9.417909965965571e-07, "loss": 0.1075, "step": 239630 }, { "epoch": 2.560393183396549, "grad_norm": 0.024021891877055168, "learning_rate": 9.417831288999718e-07, "loss": 0.0053, "step": 239640 }, { "epoch": 2.5605000267108284, "grad_norm": 0.04950876161456108, "learning_rate": 9.417752607045802e-07, "loss": 0.0147, "step": 239650 }, { "epoch": 2.5606068700251083, "grad_norm": 2.2285966873168945, "learning_rate": 9.417673920103907e-07, "loss": 0.0151, "step": 239660 }, { "epoch": 2.560713713339388, "grad_norm": 2.6199588775634766, "learning_rate": 9.417595228174125e-07, "loss": 0.0119, "step": 239670 }, { "epoch": 2.5608205566536673, "grad_norm": 0.12058898061513901, "learning_rate": 9.417516531256546e-07, "loss": 0.0208, "step": 239680 }, { "epoch": 2.560927399967947, "grad_norm": 3.415874719619751, "learning_rate": 9.417437829351256e-07, "loss": 0.0152, "step": 239690 }, { "epoch": 2.5610342432822266, "grad_norm": 2.8447258472442627, "learning_rate": 9.417359122458346e-07, "loss": 0.0222, "step": 239700 }, { "epoch": 2.561141086596506, "grad_norm": 1.393979787826538, "learning_rate": 9.417280410577904e-07, "loss": 0.0158, "step": 239710 }, { "epoch": 2.561247929910786, "grad_norm": 3.782693862915039, "learning_rate": 9.417201693710018e-07, "loss": 0.0105, "step": 239720 }, { "epoch": 2.5613547732250654, "grad_norm": 0.16170358657836914, "learning_rate": 9.417122971854779e-07, "loss": 0.0188, "step": 239730 }, { "epoch": 2.561461616539345, "grad_norm": 1.9781832695007324, "learning_rate": 9.417044245012276e-07, "loss": 0.0146, "step": 239740 }, { "epoch": 2.561568459853625, "grad_norm": 0.06594615429639816, "learning_rate": 9.416965513182595e-07, "loss": 0.0151, "step": 239750 }, { "epoch": 2.5616753031679043, "grad_norm": 0.17401252686977386, "learning_rate": 9.416886776365827e-07, "loss": 0.0149, "step": 239760 }, { "epoch": 2.5617821464821837, "grad_norm": 3.6310417652130127, "learning_rate": 9.416808034562061e-07, "loss": 0.0276, "step": 239770 }, { "epoch": 2.5618889897964636, "grad_norm": 0.026093874126672745, "learning_rate": 9.416729287771383e-07, "loss": 0.0334, "step": 239780 }, { "epoch": 2.561995833110743, "grad_norm": 0.06087953597307205, "learning_rate": 9.416650535993887e-07, "loss": 0.0331, "step": 239790 }, { "epoch": 2.5621026764250225, "grad_norm": 11.680102348327637, "learning_rate": 9.416571779229659e-07, "loss": 0.0219, "step": 239800 }, { "epoch": 2.5622095197393024, "grad_norm": 0.06434990465641022, "learning_rate": 9.416493017478787e-07, "loss": 0.0079, "step": 239810 }, { "epoch": 2.562316363053582, "grad_norm": 0.04051065444946289, "learning_rate": 9.416414250741362e-07, "loss": 0.0489, "step": 239820 }, { "epoch": 2.5624232063678614, "grad_norm": 1.4023358821868896, "learning_rate": 9.416335479017471e-07, "loss": 0.0374, "step": 239830 }, { "epoch": 2.5625300496821413, "grad_norm": 0.09247536957263947, "learning_rate": 9.416256702307204e-07, "loss": 0.0312, "step": 239840 }, { "epoch": 2.5626368929964207, "grad_norm": 2.5870931148529053, "learning_rate": 9.416177920610651e-07, "loss": 0.0835, "step": 239850 }, { "epoch": 2.5627437363107006, "grad_norm": 0.13878189027309418, "learning_rate": 9.416099133927898e-07, "loss": 0.0054, "step": 239860 }, { "epoch": 2.56285057962498, "grad_norm": 0.02442718669772148, "learning_rate": 9.416020342259037e-07, "loss": 0.0599, "step": 239870 }, { "epoch": 2.5629574229392595, "grad_norm": 0.007678444497287273, "learning_rate": 9.415941545604156e-07, "loss": 0.0299, "step": 239880 }, { "epoch": 2.563064266253539, "grad_norm": 2.1122424602508545, "learning_rate": 9.415862743963343e-07, "loss": 0.0126, "step": 239890 }, { "epoch": 2.563171109567819, "grad_norm": 0.16598516702651978, "learning_rate": 9.415783937336689e-07, "loss": 0.0196, "step": 239900 }, { "epoch": 2.5632779528820984, "grad_norm": 0.3383828401565552, "learning_rate": 9.41570512572428e-07, "loss": 0.0173, "step": 239910 }, { "epoch": 2.5633847961963783, "grad_norm": 1.0451514720916748, "learning_rate": 9.415626309126208e-07, "loss": 0.0237, "step": 239920 }, { "epoch": 2.5634916395106577, "grad_norm": 0.5145205855369568, "learning_rate": 9.415547487542559e-07, "loss": 0.0196, "step": 239930 }, { "epoch": 2.563598482824937, "grad_norm": 4.251934051513672, "learning_rate": 9.415468660973425e-07, "loss": 0.0245, "step": 239940 }, { "epoch": 2.5637053261392166, "grad_norm": 0.48565492033958435, "learning_rate": 9.415389829418894e-07, "loss": 0.037, "step": 239950 }, { "epoch": 2.5638121694534965, "grad_norm": 3.0643117427825928, "learning_rate": 9.415310992879053e-07, "loss": 0.0201, "step": 239960 }, { "epoch": 2.563919012767776, "grad_norm": 0.08390882611274719, "learning_rate": 9.415232151353993e-07, "loss": 0.0422, "step": 239970 }, { "epoch": 2.564025856082056, "grad_norm": 0.042846791446208954, "learning_rate": 9.415153304843804e-07, "loss": 0.0124, "step": 239980 }, { "epoch": 2.5641326993963354, "grad_norm": 0.031516339629888535, "learning_rate": 9.415074453348572e-07, "loss": 0.0169, "step": 239990 }, { "epoch": 2.564239542710615, "grad_norm": 0.004187255632132292, "learning_rate": 9.414995596868389e-07, "loss": 0.0192, "step": 240000 }, { "epoch": 2.5643463860248943, "grad_norm": 2.8309881687164307, "learning_rate": 9.414916735403343e-07, "loss": 0.0135, "step": 240010 }, { "epoch": 2.564453229339174, "grad_norm": 7.35617208480835, "learning_rate": 9.414837868953522e-07, "loss": 0.0222, "step": 240020 }, { "epoch": 2.5645600726534536, "grad_norm": 0.3507235646247864, "learning_rate": 9.414758997519015e-07, "loss": 0.0124, "step": 240030 }, { "epoch": 2.5646669159677336, "grad_norm": 0.09694667160511017, "learning_rate": 9.414680121099914e-07, "loss": 0.0334, "step": 240040 }, { "epoch": 2.564773759282013, "grad_norm": 0.5529466867446899, "learning_rate": 9.414601239696305e-07, "loss": 0.0382, "step": 240050 }, { "epoch": 2.5648806025962925, "grad_norm": 1.0939185619354248, "learning_rate": 9.414522353308277e-07, "loss": 0.0129, "step": 240060 }, { "epoch": 2.564987445910572, "grad_norm": 0.009394058957695961, "learning_rate": 9.414443461935922e-07, "loss": 0.0141, "step": 240070 }, { "epoch": 2.565094289224852, "grad_norm": 0.19845157861709595, "learning_rate": 9.414364565579326e-07, "loss": 0.0378, "step": 240080 }, { "epoch": 2.5652011325391313, "grad_norm": 0.18243703246116638, "learning_rate": 9.414285664238579e-07, "loss": 0.012, "step": 240090 }, { "epoch": 2.565307975853411, "grad_norm": 0.665447473526001, "learning_rate": 9.41420675791377e-07, "loss": 0.0075, "step": 240100 }, { "epoch": 2.5654148191676907, "grad_norm": 0.009938916191458702, "learning_rate": 9.414127846604989e-07, "loss": 0.0315, "step": 240110 }, { "epoch": 2.56552166248197, "grad_norm": 5.276920795440674, "learning_rate": 9.414048930312325e-07, "loss": 0.0566, "step": 240120 }, { "epoch": 2.5656285057962496, "grad_norm": 0.041297636926174164, "learning_rate": 9.413970009035866e-07, "loss": 0.0703, "step": 240130 }, { "epoch": 2.5657353491105295, "grad_norm": 0.02028486505150795, "learning_rate": 9.413891082775702e-07, "loss": 0.0089, "step": 240140 }, { "epoch": 2.565842192424809, "grad_norm": 0.4429769217967987, "learning_rate": 9.413812151531921e-07, "loss": 0.0085, "step": 240150 }, { "epoch": 2.565949035739089, "grad_norm": 0.8412480354309082, "learning_rate": 9.413733215304615e-07, "loss": 0.033, "step": 240160 }, { "epoch": 2.5660558790533683, "grad_norm": 0.39304912090301514, "learning_rate": 9.41365427409387e-07, "loss": 0.0084, "step": 240170 }, { "epoch": 2.5661627223676478, "grad_norm": 2.0257833003997803, "learning_rate": 9.413575327899775e-07, "loss": 0.0151, "step": 240180 }, { "epoch": 2.566269565681927, "grad_norm": 5.8536272048950195, "learning_rate": 9.413496376722421e-07, "loss": 0.0221, "step": 240190 }, { "epoch": 2.566376408996207, "grad_norm": 0.11902879923582077, "learning_rate": 9.413417420561897e-07, "loss": 0.0121, "step": 240200 }, { "epoch": 2.5664832523104866, "grad_norm": 1.7940174341201782, "learning_rate": 9.413338459418292e-07, "loss": 0.0546, "step": 240210 }, { "epoch": 2.5665900956247665, "grad_norm": 0.25500401854515076, "learning_rate": 9.413259493291694e-07, "loss": 0.0195, "step": 240220 }, { "epoch": 2.566696938939046, "grad_norm": 0.10200216621160507, "learning_rate": 9.413180522182193e-07, "loss": 0.0217, "step": 240230 }, { "epoch": 2.5668037822533254, "grad_norm": 0.308775931596756, "learning_rate": 9.413101546089879e-07, "loss": 0.0232, "step": 240240 }, { "epoch": 2.566910625567605, "grad_norm": 4.228860378265381, "learning_rate": 9.41302256501484e-07, "loss": 0.0261, "step": 240250 }, { "epoch": 2.5670174688818848, "grad_norm": 0.28024813532829285, "learning_rate": 9.412943578957164e-07, "loss": 0.0259, "step": 240260 }, { "epoch": 2.567124312196164, "grad_norm": 0.5087554454803467, "learning_rate": 9.412864587916944e-07, "loss": 0.0146, "step": 240270 }, { "epoch": 2.567231155510444, "grad_norm": 0.13119611144065857, "learning_rate": 9.412785591894266e-07, "loss": 0.0122, "step": 240280 }, { "epoch": 2.5673379988247236, "grad_norm": 0.010966172441840172, "learning_rate": 9.412706590889219e-07, "loss": 0.0087, "step": 240290 }, { "epoch": 2.567444842139003, "grad_norm": 1.1056791543960571, "learning_rate": 9.412627584901893e-07, "loss": 0.023, "step": 240300 }, { "epoch": 2.5675516854532825, "grad_norm": 6.787789344787598, "learning_rate": 9.412548573932378e-07, "loss": 0.0312, "step": 240310 }, { "epoch": 2.5676585287675624, "grad_norm": 1.2168711423873901, "learning_rate": 9.412469557980763e-07, "loss": 0.0283, "step": 240320 }, { "epoch": 2.567765372081842, "grad_norm": 4.984117031097412, "learning_rate": 9.412390537047137e-07, "loss": 0.0193, "step": 240330 }, { "epoch": 2.5678722153961218, "grad_norm": 0.057901691645383835, "learning_rate": 9.412311511131588e-07, "loss": 0.0301, "step": 240340 }, { "epoch": 2.567979058710401, "grad_norm": 0.6006415486335754, "learning_rate": 9.412232480234208e-07, "loss": 0.0077, "step": 240350 }, { "epoch": 2.5680859020246807, "grad_norm": 0.7079229950904846, "learning_rate": 9.412153444355083e-07, "loss": 0.0361, "step": 240360 }, { "epoch": 2.5681927453389606, "grad_norm": 0.01416904665529728, "learning_rate": 9.412074403494305e-07, "loss": 0.0226, "step": 240370 }, { "epoch": 2.56829958865324, "grad_norm": 4.219393730163574, "learning_rate": 9.411995357651961e-07, "loss": 0.0057, "step": 240380 }, { "epoch": 2.5684064319675195, "grad_norm": 0.5491004586219788, "learning_rate": 9.411916306828141e-07, "loss": 0.0392, "step": 240390 }, { "epoch": 2.5685132752817994, "grad_norm": 0.30253487825393677, "learning_rate": 9.411837251022935e-07, "loss": 0.0304, "step": 240400 }, { "epoch": 2.568620118596079, "grad_norm": 0.04639891907572746, "learning_rate": 9.411758190236432e-07, "loss": 0.0192, "step": 240410 }, { "epoch": 2.5687269619103583, "grad_norm": 0.43644604086875916, "learning_rate": 9.411679124468721e-07, "loss": 0.0016, "step": 240420 }, { "epoch": 2.5688338052246382, "grad_norm": 0.5657911896705627, "learning_rate": 9.411600053719892e-07, "loss": 0.0539, "step": 240430 }, { "epoch": 2.5689406485389177, "grad_norm": 0.013547070324420929, "learning_rate": 9.411520977990033e-07, "loss": 0.0115, "step": 240440 }, { "epoch": 2.569047491853197, "grad_norm": 1.9480000734329224, "learning_rate": 9.411441897279234e-07, "loss": 0.0041, "step": 240450 }, { "epoch": 2.569154335167477, "grad_norm": 1.526011347770691, "learning_rate": 9.411362811587582e-07, "loss": 0.0022, "step": 240460 }, { "epoch": 2.5692611784817565, "grad_norm": 0.0091930590569973, "learning_rate": 9.411283720915171e-07, "loss": 0.0078, "step": 240470 }, { "epoch": 2.569368021796036, "grad_norm": 0.037882644683122635, "learning_rate": 9.411204625262087e-07, "loss": 0.0284, "step": 240480 }, { "epoch": 2.569474865110316, "grad_norm": 0.0037232839968055487, "learning_rate": 9.411125524628421e-07, "loss": 0.0272, "step": 240490 }, { "epoch": 2.5695817084245953, "grad_norm": 4.997801303863525, "learning_rate": 9.411046419014259e-07, "loss": 0.0082, "step": 240500 }, { "epoch": 2.569688551738875, "grad_norm": 11.724811553955078, "learning_rate": 9.410967308419695e-07, "loss": 0.0429, "step": 240510 }, { "epoch": 2.5697953950531547, "grad_norm": 0.009558841586112976, "learning_rate": 9.410888192844815e-07, "loss": 0.0008, "step": 240520 }, { "epoch": 2.569902238367434, "grad_norm": 1.5644508600234985, "learning_rate": 9.41080907228971e-07, "loss": 0.0165, "step": 240530 }, { "epoch": 2.5700090816817136, "grad_norm": 0.040920551866292953, "learning_rate": 9.410729946754467e-07, "loss": 0.0014, "step": 240540 }, { "epoch": 2.5701159249959935, "grad_norm": 0.22082524001598358, "learning_rate": 9.410650816239178e-07, "loss": 0.011, "step": 240550 }, { "epoch": 2.570222768310273, "grad_norm": 0.7563406229019165, "learning_rate": 9.41057168074393e-07, "loss": 0.0809, "step": 240560 }, { "epoch": 2.570329611624553, "grad_norm": 0.10140050202608109, "learning_rate": 9.410492540268816e-07, "loss": 0.0216, "step": 240570 }, { "epoch": 2.5704364549388323, "grad_norm": 0.04569326713681221, "learning_rate": 9.410413394813923e-07, "loss": 0.0174, "step": 240580 }, { "epoch": 2.570543298253112, "grad_norm": 10.111861228942871, "learning_rate": 9.410334244379339e-07, "loss": 0.0465, "step": 240590 }, { "epoch": 2.5706501415673912, "grad_norm": 1.2424472570419312, "learning_rate": 9.410255088965155e-07, "loss": 0.0053, "step": 240600 }, { "epoch": 2.570756984881671, "grad_norm": 0.05671709403395653, "learning_rate": 9.41017592857146e-07, "loss": 0.0203, "step": 240610 }, { "epoch": 2.5708638281959506, "grad_norm": 0.001732325297780335, "learning_rate": 9.410096763198344e-07, "loss": 0.1027, "step": 240620 }, { "epoch": 2.5709706715102305, "grad_norm": 0.14987275004386902, "learning_rate": 9.410017592845896e-07, "loss": 0.0205, "step": 240630 }, { "epoch": 2.57107751482451, "grad_norm": 1.1234973669052124, "learning_rate": 9.409938417514205e-07, "loss": 0.0123, "step": 240640 }, { "epoch": 2.5711843581387894, "grad_norm": 4.4044084548950195, "learning_rate": 9.409859237203361e-07, "loss": 0.0166, "step": 240650 }, { "epoch": 2.571291201453069, "grad_norm": 0.05699971318244934, "learning_rate": 9.409780051913454e-07, "loss": 0.0082, "step": 240660 }, { "epoch": 2.571398044767349, "grad_norm": 7.692248344421387, "learning_rate": 9.40970086164457e-07, "loss": 0.0237, "step": 240670 }, { "epoch": 2.5715048880816282, "grad_norm": 0.2385103553533554, "learning_rate": 9.409621666396803e-07, "loss": 0.0115, "step": 240680 }, { "epoch": 2.571611731395908, "grad_norm": 0.5401301980018616, "learning_rate": 9.409542466170239e-07, "loss": 0.0096, "step": 240690 }, { "epoch": 2.5717185747101876, "grad_norm": 4.77712345123291, "learning_rate": 9.409463260964969e-07, "loss": 0.0648, "step": 240700 }, { "epoch": 2.571825418024467, "grad_norm": 1.0135767459869385, "learning_rate": 9.409384050781084e-07, "loss": 0.0332, "step": 240710 }, { "epoch": 2.5719322613387465, "grad_norm": 8.024518966674805, "learning_rate": 9.40930483561867e-07, "loss": 0.0323, "step": 240720 }, { "epoch": 2.5720391046530264, "grad_norm": 3.578974962234497, "learning_rate": 9.409225615477818e-07, "loss": 0.0145, "step": 240730 }, { "epoch": 2.572145947967306, "grad_norm": 4.156579971313477, "learning_rate": 9.409146390358618e-07, "loss": 0.0268, "step": 240740 }, { "epoch": 2.572252791281586, "grad_norm": 3.192453384399414, "learning_rate": 9.409067160261159e-07, "loss": 0.0163, "step": 240750 }, { "epoch": 2.5723596345958653, "grad_norm": 0.05255264788866043, "learning_rate": 9.40898792518553e-07, "loss": 0.0127, "step": 240760 }, { "epoch": 2.5724664779101447, "grad_norm": 0.005750866606831551, "learning_rate": 9.408908685131821e-07, "loss": 0.0095, "step": 240770 }, { "epoch": 2.572573321224424, "grad_norm": 0.011302097700536251, "learning_rate": 9.408829440100121e-07, "loss": 0.0092, "step": 240780 }, { "epoch": 2.572680164538704, "grad_norm": 0.02420133352279663, "learning_rate": 9.408750190090521e-07, "loss": 0.0134, "step": 240790 }, { "epoch": 2.5727870078529835, "grad_norm": 5.163872241973877, "learning_rate": 9.408670935103108e-07, "loss": 0.0106, "step": 240800 }, { "epoch": 2.5728938511672634, "grad_norm": 0.03839902952313423, "learning_rate": 9.408591675137973e-07, "loss": 0.0016, "step": 240810 }, { "epoch": 2.573000694481543, "grad_norm": 6.215089797973633, "learning_rate": 9.408512410195206e-07, "loss": 0.0136, "step": 240820 }, { "epoch": 2.5731075377958224, "grad_norm": 0.002678454155102372, "learning_rate": 9.408433140274896e-07, "loss": 0.0774, "step": 240830 }, { "epoch": 2.573214381110102, "grad_norm": 0.6069174408912659, "learning_rate": 9.40835386537713e-07, "loss": 0.0078, "step": 240840 }, { "epoch": 2.5733212244243817, "grad_norm": 1.0688680410385132, "learning_rate": 9.408274585502003e-07, "loss": 0.0089, "step": 240850 }, { "epoch": 2.573428067738661, "grad_norm": 1.9951763153076172, "learning_rate": 9.408195300649599e-07, "loss": 0.0605, "step": 240860 }, { "epoch": 2.573534911052941, "grad_norm": 6.000300884246826, "learning_rate": 9.40811601082001e-07, "loss": 0.0108, "step": 240870 }, { "epoch": 2.5736417543672205, "grad_norm": 0.823533833026886, "learning_rate": 9.408036716013325e-07, "loss": 0.0069, "step": 240880 }, { "epoch": 2.5737485976815, "grad_norm": 0.05734236165881157, "learning_rate": 9.407957416229635e-07, "loss": 0.0222, "step": 240890 }, { "epoch": 2.5738554409957795, "grad_norm": 0.0015302682295441628, "learning_rate": 9.407878111469028e-07, "loss": 0.0141, "step": 240900 }, { "epoch": 2.5739622843100594, "grad_norm": 0.09545212239027023, "learning_rate": 9.407798801731595e-07, "loss": 0.0302, "step": 240910 }, { "epoch": 2.574069127624339, "grad_norm": 0.021119728684425354, "learning_rate": 9.407719487017423e-07, "loss": 0.0185, "step": 240920 }, { "epoch": 2.5741759709386187, "grad_norm": 1.9676419496536255, "learning_rate": 9.407640167326603e-07, "loss": 0.017, "step": 240930 }, { "epoch": 2.574282814252898, "grad_norm": 16.518259048461914, "learning_rate": 9.407560842659226e-07, "loss": 0.0071, "step": 240940 }, { "epoch": 2.5743896575671776, "grad_norm": 8.948973655700684, "learning_rate": 9.407481513015378e-07, "loss": 0.0562, "step": 240950 }, { "epoch": 2.574496500881457, "grad_norm": 0.37511196732521057, "learning_rate": 9.407402178395151e-07, "loss": 0.0034, "step": 240960 }, { "epoch": 2.574603344195737, "grad_norm": 0.1396317034959793, "learning_rate": 9.407322838798637e-07, "loss": 0.0182, "step": 240970 }, { "epoch": 2.5747101875100165, "grad_norm": 0.019360845908522606, "learning_rate": 9.40724349422592e-07, "loss": 0.0271, "step": 240980 }, { "epoch": 2.5748170308242964, "grad_norm": 0.04745377600193024, "learning_rate": 9.407164144677094e-07, "loss": 0.0162, "step": 240990 }, { "epoch": 2.574923874138576, "grad_norm": 0.009648305363953114, "learning_rate": 9.407084790152247e-07, "loss": 0.0245, "step": 241000 }, { "epoch": 2.5750307174528553, "grad_norm": 3.8373799324035645, "learning_rate": 9.407005430651468e-07, "loss": 0.0325, "step": 241010 }, { "epoch": 2.5751375607671347, "grad_norm": 0.6314331889152527, "learning_rate": 9.406926066174847e-07, "loss": 0.0145, "step": 241020 }, { "epoch": 2.5752444040814146, "grad_norm": 0.1292194128036499, "learning_rate": 9.406846696722475e-07, "loss": 0.0477, "step": 241030 }, { "epoch": 2.575351247395694, "grad_norm": 3.5930209159851074, "learning_rate": 9.40676732229444e-07, "loss": 0.0223, "step": 241040 }, { "epoch": 2.575458090709974, "grad_norm": 0.02201196923851967, "learning_rate": 9.406687942890832e-07, "loss": 0.0285, "step": 241050 }, { "epoch": 2.5755649340242535, "grad_norm": 6.956140041351318, "learning_rate": 9.40660855851174e-07, "loss": 0.0295, "step": 241060 }, { "epoch": 2.575671777338533, "grad_norm": 8.443140029907227, "learning_rate": 9.406529169157256e-07, "loss": 0.0334, "step": 241070 }, { "epoch": 2.5757786206528124, "grad_norm": 10.456459999084473, "learning_rate": 9.406449774827468e-07, "loss": 0.0145, "step": 241080 }, { "epoch": 2.5758854639670923, "grad_norm": 0.9063838720321655, "learning_rate": 9.406370375522464e-07, "loss": 0.0048, "step": 241090 }, { "epoch": 2.5759923072813717, "grad_norm": 0.8521929979324341, "learning_rate": 9.406290971242334e-07, "loss": 0.0153, "step": 241100 }, { "epoch": 2.5760991505956516, "grad_norm": 0.060288358479738235, "learning_rate": 9.406211561987171e-07, "loss": 0.0272, "step": 241110 }, { "epoch": 2.576205993909931, "grad_norm": 2.3440685272216797, "learning_rate": 9.406132147757064e-07, "loss": 0.0327, "step": 241120 }, { "epoch": 2.5763128372242106, "grad_norm": 0.12754829227924347, "learning_rate": 9.4060527285521e-07, "loss": 0.0129, "step": 241130 }, { "epoch": 2.5764196805384905, "grad_norm": 4.489189147949219, "learning_rate": 9.405973304372369e-07, "loss": 0.0195, "step": 241140 }, { "epoch": 2.57652652385277, "grad_norm": 2.7731072902679443, "learning_rate": 9.405893875217963e-07, "loss": 0.005, "step": 241150 }, { "epoch": 2.5766333671670494, "grad_norm": 10.169672012329102, "learning_rate": 9.405814441088969e-07, "loss": 0.0217, "step": 241160 }, { "epoch": 2.5767402104813293, "grad_norm": 1.7556737661361694, "learning_rate": 9.405735001985479e-07, "loss": 0.0094, "step": 241170 }, { "epoch": 2.5768470537956087, "grad_norm": 0.0034852928947657347, "learning_rate": 9.405655557907581e-07, "loss": 0.0032, "step": 241180 }, { "epoch": 2.576953897109888, "grad_norm": 2.2150356769561768, "learning_rate": 9.405576108855365e-07, "loss": 0.0132, "step": 241190 }, { "epoch": 2.577060740424168, "grad_norm": 0.1606329381465912, "learning_rate": 9.405496654828922e-07, "loss": 0.019, "step": 241200 }, { "epoch": 2.5771675837384476, "grad_norm": 1.1512476205825806, "learning_rate": 9.40541719582834e-07, "loss": 0.023, "step": 241210 }, { "epoch": 2.577274427052727, "grad_norm": 2.752943992614746, "learning_rate": 9.405337731853711e-07, "loss": 0.0063, "step": 241220 }, { "epoch": 2.577381270367007, "grad_norm": 0.5486137270927429, "learning_rate": 9.405258262905121e-07, "loss": 0.0141, "step": 241230 }, { "epoch": 2.5774881136812864, "grad_norm": 0.04090217500925064, "learning_rate": 9.405178788982664e-07, "loss": 0.0069, "step": 241240 }, { "epoch": 2.577594956995566, "grad_norm": 2.298710346221924, "learning_rate": 9.405099310086427e-07, "loss": 0.0064, "step": 241250 }, { "epoch": 2.5777018003098457, "grad_norm": 3.361201763153076, "learning_rate": 9.405019826216499e-07, "loss": 0.0337, "step": 241260 }, { "epoch": 2.577808643624125, "grad_norm": 0.10584039986133575, "learning_rate": 9.404940337372973e-07, "loss": 0.0122, "step": 241270 }, { "epoch": 2.5779154869384047, "grad_norm": 3.9826157093048096, "learning_rate": 9.404860843555936e-07, "loss": 0.0104, "step": 241280 }, { "epoch": 2.5780223302526846, "grad_norm": 0.010113978758454323, "learning_rate": 9.404781344765479e-07, "loss": 0.0071, "step": 241290 }, { "epoch": 2.578129173566964, "grad_norm": 2.435821533203125, "learning_rate": 9.404701841001691e-07, "loss": 0.0103, "step": 241300 }, { "epoch": 2.5782360168812435, "grad_norm": 2.3856139183044434, "learning_rate": 9.404622332264663e-07, "loss": 0.0169, "step": 241310 }, { "epoch": 2.5783428601955234, "grad_norm": 2.0423614978790283, "learning_rate": 9.404542818554483e-07, "loss": 0.0105, "step": 241320 }, { "epoch": 2.578449703509803, "grad_norm": 0.04722389951348305, "learning_rate": 9.404463299871241e-07, "loss": 0.0014, "step": 241330 }, { "epoch": 2.5785565468240828, "grad_norm": 0.01869099959731102, "learning_rate": 9.40438377621503e-07, "loss": 0.0103, "step": 241340 }, { "epoch": 2.578663390138362, "grad_norm": 1.2887400388717651, "learning_rate": 9.404304247585936e-07, "loss": 0.0452, "step": 241350 }, { "epoch": 2.5787702334526417, "grad_norm": 0.017558854073286057, "learning_rate": 9.40422471398405e-07, "loss": 0.0084, "step": 241360 }, { "epoch": 2.578877076766921, "grad_norm": 9.544000625610352, "learning_rate": 9.404145175409461e-07, "loss": 0.0279, "step": 241370 }, { "epoch": 2.578983920081201, "grad_norm": 0.03539164364337921, "learning_rate": 9.404065631862261e-07, "loss": 0.0157, "step": 241380 }, { "epoch": 2.5790907633954805, "grad_norm": 0.11329390108585358, "learning_rate": 9.403986083342539e-07, "loss": 0.0132, "step": 241390 }, { "epoch": 2.5791976067097604, "grad_norm": 1.7811940908432007, "learning_rate": 9.403906529850382e-07, "loss": 0.0104, "step": 241400 }, { "epoch": 2.57930445002404, "grad_norm": 1.4957056045532227, "learning_rate": 9.403826971385884e-07, "loss": 0.0219, "step": 241410 }, { "epoch": 2.5794112933383193, "grad_norm": 1.927249789237976, "learning_rate": 9.403747407949134e-07, "loss": 0.0301, "step": 241420 }, { "epoch": 2.5795181366525988, "grad_norm": 0.08137834817171097, "learning_rate": 9.403667839540219e-07, "loss": 0.0199, "step": 241430 }, { "epoch": 2.5796249799668787, "grad_norm": 2.3762271404266357, "learning_rate": 9.40358826615923e-07, "loss": 0.0042, "step": 241440 }, { "epoch": 2.579731823281158, "grad_norm": 0.029956702142953873, "learning_rate": 9.40350868780626e-07, "loss": 0.016, "step": 241450 }, { "epoch": 2.579838666595438, "grad_norm": 0.18827712535858154, "learning_rate": 9.403429104481395e-07, "loss": 0.0097, "step": 241460 }, { "epoch": 2.5799455099097175, "grad_norm": 0.2649833559989929, "learning_rate": 9.403349516184726e-07, "loss": 0.0074, "step": 241470 }, { "epoch": 2.580052353223997, "grad_norm": 0.1425907015800476, "learning_rate": 9.403269922916344e-07, "loss": 0.0416, "step": 241480 }, { "epoch": 2.5801591965382764, "grad_norm": 0.0356704443693161, "learning_rate": 9.403190324676336e-07, "loss": 0.0219, "step": 241490 }, { "epoch": 2.5802660398525563, "grad_norm": 0.29311084747314453, "learning_rate": 9.403110721464796e-07, "loss": 0.0575, "step": 241500 }, { "epoch": 2.5803728831668358, "grad_norm": 0.008988665416836739, "learning_rate": 9.40303111328181e-07, "loss": 0.0143, "step": 241510 }, { "epoch": 2.5804797264811157, "grad_norm": 0.13632887601852417, "learning_rate": 9.40295150012747e-07, "loss": 0.0193, "step": 241520 }, { "epoch": 2.580586569795395, "grad_norm": 4.852469444274902, "learning_rate": 9.402871882001866e-07, "loss": 0.0204, "step": 241530 }, { "epoch": 2.5806934131096746, "grad_norm": 5.203205108642578, "learning_rate": 9.402792258905085e-07, "loss": 0.0546, "step": 241540 }, { "epoch": 2.580800256423954, "grad_norm": 3.305314064025879, "learning_rate": 9.402712630837222e-07, "loss": 0.0174, "step": 241550 }, { "epoch": 2.580907099738234, "grad_norm": 3.7776143550872803, "learning_rate": 9.402632997798363e-07, "loss": 0.055, "step": 241560 }, { "epoch": 2.5810139430525134, "grad_norm": 6.06695032119751, "learning_rate": 9.402553359788601e-07, "loss": 0.0394, "step": 241570 }, { "epoch": 2.5811207863667933, "grad_norm": 2.4063174724578857, "learning_rate": 9.402473716808021e-07, "loss": 0.0143, "step": 241580 }, { "epoch": 2.5812276296810728, "grad_norm": 0.017667410895228386, "learning_rate": 9.402394068856719e-07, "loss": 0.0231, "step": 241590 }, { "epoch": 2.5813344729953522, "grad_norm": 0.9095833897590637, "learning_rate": 9.402314415934779e-07, "loss": 0.0209, "step": 241600 }, { "epoch": 2.5814413163096317, "grad_norm": 0.05479699745774269, "learning_rate": 9.402234758042296e-07, "loss": 0.0276, "step": 241610 }, { "epoch": 2.5815481596239116, "grad_norm": 0.00884794071316719, "learning_rate": 9.402155095179356e-07, "loss": 0.0132, "step": 241620 }, { "epoch": 2.581655002938191, "grad_norm": 0.046845607459545135, "learning_rate": 9.402075427346052e-07, "loss": 0.0136, "step": 241630 }, { "epoch": 2.581761846252471, "grad_norm": 1.2371456623077393, "learning_rate": 9.401995754542471e-07, "loss": 0.0238, "step": 241640 }, { "epoch": 2.5818686895667504, "grad_norm": 0.26393210887908936, "learning_rate": 9.401916076768705e-07, "loss": 0.0106, "step": 241650 }, { "epoch": 2.58197553288103, "grad_norm": 1.8449174165725708, "learning_rate": 9.401836394024845e-07, "loss": 0.0039, "step": 241660 }, { "epoch": 2.5820823761953093, "grad_norm": 0.7418817281723022, "learning_rate": 9.401756706310977e-07, "loss": 0.0574, "step": 241670 }, { "epoch": 2.5821892195095892, "grad_norm": 0.2828271985054016, "learning_rate": 9.401677013627196e-07, "loss": 0.0013, "step": 241680 }, { "epoch": 2.5822960628238687, "grad_norm": 0.17728324234485626, "learning_rate": 9.401597315973588e-07, "loss": 0.0092, "step": 241690 }, { "epoch": 2.5824029061381486, "grad_norm": 0.29886406660079956, "learning_rate": 9.401517613350244e-07, "loss": 0.0046, "step": 241700 }, { "epoch": 2.582509749452428, "grad_norm": 0.01585766300559044, "learning_rate": 9.401437905757256e-07, "loss": 0.0337, "step": 241710 }, { "epoch": 2.5826165927667075, "grad_norm": 0.09047003090381622, "learning_rate": 9.401358193194712e-07, "loss": 0.0083, "step": 241720 }, { "epoch": 2.582723436080987, "grad_norm": 1.4051258563995361, "learning_rate": 9.401278475662701e-07, "loss": 0.0095, "step": 241730 }, { "epoch": 2.582830279395267, "grad_norm": 1.5883454084396362, "learning_rate": 9.401198753161315e-07, "loss": 0.034, "step": 241740 }, { "epoch": 2.5829371227095463, "grad_norm": 2.6627564430236816, "learning_rate": 9.401119025690645e-07, "loss": 0.0055, "step": 241750 }, { "epoch": 2.5830439660238262, "grad_norm": 1.3266749382019043, "learning_rate": 9.401039293250779e-07, "loss": 0.0132, "step": 241760 }, { "epoch": 2.5831508093381057, "grad_norm": 0.04918624460697174, "learning_rate": 9.400959555841806e-07, "loss": 0.0041, "step": 241770 }, { "epoch": 2.583257652652385, "grad_norm": 0.06601772457361221, "learning_rate": 9.400879813463818e-07, "loss": 0.0114, "step": 241780 }, { "epoch": 2.5833644959666646, "grad_norm": 6.530571460723877, "learning_rate": 9.400800066116905e-07, "loss": 0.0114, "step": 241790 }, { "epoch": 2.5834713392809445, "grad_norm": 0.060011543333530426, "learning_rate": 9.400720313801155e-07, "loss": 0.0497, "step": 241800 }, { "epoch": 2.583578182595224, "grad_norm": 0.23010815680027008, "learning_rate": 9.400640556516662e-07, "loss": 0.0296, "step": 241810 }, { "epoch": 2.583685025909504, "grad_norm": 0.10477279126644135, "learning_rate": 9.400560794263513e-07, "loss": 0.0097, "step": 241820 }, { "epoch": 2.5837918692237833, "grad_norm": 0.8968015313148499, "learning_rate": 9.400481027041799e-07, "loss": 0.0011, "step": 241830 }, { "epoch": 2.583898712538063, "grad_norm": 0.023298470303416252, "learning_rate": 9.400401254851609e-07, "loss": 0.0093, "step": 241840 }, { "epoch": 2.5840055558523427, "grad_norm": 6.9421305656433105, "learning_rate": 9.400321477693034e-07, "loss": 0.0131, "step": 241850 }, { "epoch": 2.584112399166622, "grad_norm": 0.492195188999176, "learning_rate": 9.400241695566165e-07, "loss": 0.006, "step": 241860 }, { "epoch": 2.5842192424809016, "grad_norm": 2.555194139480591, "learning_rate": 9.400161908471089e-07, "loss": 0.0068, "step": 241870 }, { "epoch": 2.5843260857951815, "grad_norm": 6.857988357543945, "learning_rate": 9.400082116407899e-07, "loss": 0.0379, "step": 241880 }, { "epoch": 2.584432929109461, "grad_norm": 0.010516217909753323, "learning_rate": 9.400002319376685e-07, "loss": 0.004, "step": 241890 }, { "epoch": 2.5845397724237404, "grad_norm": 7.791797161102295, "learning_rate": 9.399922517377536e-07, "loss": 0.0319, "step": 241900 }, { "epoch": 2.5846466157380203, "grad_norm": 5.457148551940918, "learning_rate": 9.399842710410541e-07, "loss": 0.0103, "step": 241910 }, { "epoch": 2.5847534590523, "grad_norm": 1.253098726272583, "learning_rate": 9.399762898475794e-07, "loss": 0.0604, "step": 241920 }, { "epoch": 2.5848603023665793, "grad_norm": 0.016932550817728043, "learning_rate": 9.399683081573381e-07, "loss": 0.0056, "step": 241930 }, { "epoch": 2.584967145680859, "grad_norm": 0.4539615511894226, "learning_rate": 9.399603259703393e-07, "loss": 0.0232, "step": 241940 }, { "epoch": 2.5850739889951386, "grad_norm": 0.010428385809063911, "learning_rate": 9.399523432865922e-07, "loss": 0.0163, "step": 241950 }, { "epoch": 2.585180832309418, "grad_norm": 1.3219244480133057, "learning_rate": 9.399443601061057e-07, "loss": 0.0049, "step": 241960 }, { "epoch": 2.585287675623698, "grad_norm": 0.02663244679570198, "learning_rate": 9.399363764288888e-07, "loss": 0.0118, "step": 241970 }, { "epoch": 2.5853945189379774, "grad_norm": 3.508148670196533, "learning_rate": 9.399283922549505e-07, "loss": 0.0186, "step": 241980 }, { "epoch": 2.585501362252257, "grad_norm": 0.044976215809583664, "learning_rate": 9.399204075843e-07, "loss": 0.0356, "step": 241990 }, { "epoch": 2.585608205566537, "grad_norm": 2.814467191696167, "learning_rate": 9.399124224169459e-07, "loss": 0.0048, "step": 242000 }, { "epoch": 2.5857150488808163, "grad_norm": 0.009532896801829338, "learning_rate": 9.399044367528976e-07, "loss": 0.0152, "step": 242010 }, { "epoch": 2.5858218921950957, "grad_norm": 0.24343818426132202, "learning_rate": 9.39896450592164e-07, "loss": 0.0162, "step": 242020 }, { "epoch": 2.5859287355093756, "grad_norm": 0.023264490067958832, "learning_rate": 9.398884639347542e-07, "loss": 0.0363, "step": 242030 }, { "epoch": 2.586035578823655, "grad_norm": 0.15776093304157257, "learning_rate": 9.398804767806769e-07, "loss": 0.0604, "step": 242040 }, { "epoch": 2.586142422137935, "grad_norm": 4.320496559143066, "learning_rate": 9.398724891299416e-07, "loss": 0.0215, "step": 242050 }, { "epoch": 2.5862492654522145, "grad_norm": 0.24939587712287903, "learning_rate": 9.398645009825569e-07, "loss": 0.0207, "step": 242060 }, { "epoch": 2.586356108766494, "grad_norm": 0.06295314431190491, "learning_rate": 9.398565123385321e-07, "loss": 0.0024, "step": 242070 }, { "epoch": 2.5864629520807734, "grad_norm": 12.477818489074707, "learning_rate": 9.398485231978759e-07, "loss": 0.0071, "step": 242080 }, { "epoch": 2.5865697953950533, "grad_norm": 0.06069445610046387, "learning_rate": 9.398405335605977e-07, "loss": 0.0504, "step": 242090 }, { "epoch": 2.5866766387093327, "grad_norm": 2.664064645767212, "learning_rate": 9.398325434267062e-07, "loss": 0.0043, "step": 242100 }, { "epoch": 2.5867834820236126, "grad_norm": 0.021703612059354782, "learning_rate": 9.398245527962105e-07, "loss": 0.0227, "step": 242110 }, { "epoch": 2.586890325337892, "grad_norm": 0.09894070029258728, "learning_rate": 9.398165616691198e-07, "loss": 0.0083, "step": 242120 }, { "epoch": 2.5869971686521716, "grad_norm": 0.22483669221401215, "learning_rate": 9.398085700454432e-07, "loss": 0.0069, "step": 242130 }, { "epoch": 2.587104011966451, "grad_norm": 0.018164459615945816, "learning_rate": 9.398005779251892e-07, "loss": 0.0057, "step": 242140 }, { "epoch": 2.587210855280731, "grad_norm": 0.31941142678260803, "learning_rate": 9.397925853083675e-07, "loss": 0.0283, "step": 242150 }, { "epoch": 2.5873176985950104, "grad_norm": 0.25176575779914856, "learning_rate": 9.397845921949866e-07, "loss": 0.017, "step": 242160 }, { "epoch": 2.5874245419092903, "grad_norm": 2.1113216876983643, "learning_rate": 9.397765985850555e-07, "loss": 0.0146, "step": 242170 }, { "epoch": 2.5875313852235697, "grad_norm": 3.4159276485443115, "learning_rate": 9.397686044785836e-07, "loss": 0.0142, "step": 242180 }, { "epoch": 2.587638228537849, "grad_norm": 0.2182878702878952, "learning_rate": 9.397606098755799e-07, "loss": 0.0054, "step": 242190 }, { "epoch": 2.5877450718521287, "grad_norm": 0.04629959538578987, "learning_rate": 9.39752614776053e-07, "loss": 0.0046, "step": 242200 }, { "epoch": 2.5878519151664086, "grad_norm": 5.427464008331299, "learning_rate": 9.397446191800124e-07, "loss": 0.0341, "step": 242210 }, { "epoch": 2.587958758480688, "grad_norm": 1.693634033203125, "learning_rate": 9.39736623087467e-07, "loss": 0.0139, "step": 242220 }, { "epoch": 2.588065601794968, "grad_norm": 0.006010825280100107, "learning_rate": 9.397286264984257e-07, "loss": 0.0034, "step": 242230 }, { "epoch": 2.5881724451092474, "grad_norm": 5.163661956787109, "learning_rate": 9.397206294128974e-07, "loss": 0.0092, "step": 242240 }, { "epoch": 2.588279288423527, "grad_norm": 8.246200561523438, "learning_rate": 9.397126318308915e-07, "loss": 0.006, "step": 242250 }, { "epoch": 2.5883861317378063, "grad_norm": 2.0804972648620605, "learning_rate": 9.397046337524168e-07, "loss": 0.0123, "step": 242260 }, { "epoch": 2.588492975052086, "grad_norm": 0.010092797689139843, "learning_rate": 9.396966351774825e-07, "loss": 0.0238, "step": 242270 }, { "epoch": 2.5885998183663657, "grad_norm": 4.959643363952637, "learning_rate": 9.396886361060974e-07, "loss": 0.0126, "step": 242280 }, { "epoch": 2.5887066616806456, "grad_norm": 0.09426121413707733, "learning_rate": 9.396806365382706e-07, "loss": 0.0339, "step": 242290 }, { "epoch": 2.588813504994925, "grad_norm": 0.21807444095611572, "learning_rate": 9.396726364740113e-07, "loss": 0.0113, "step": 242300 }, { "epoch": 2.5889203483092045, "grad_norm": 1.994582176208496, "learning_rate": 9.396646359133284e-07, "loss": 0.0044, "step": 242310 }, { "epoch": 2.589027191623484, "grad_norm": 1.0133229494094849, "learning_rate": 9.396566348562309e-07, "loss": 0.0154, "step": 242320 }, { "epoch": 2.589134034937764, "grad_norm": 0.17659211158752441, "learning_rate": 9.396486333027278e-07, "loss": 0.0417, "step": 242330 }, { "epoch": 2.5892408782520433, "grad_norm": 0.33513912558555603, "learning_rate": 9.396406312528283e-07, "loss": 0.0042, "step": 242340 }, { "epoch": 2.589347721566323, "grad_norm": 5.524789333343506, "learning_rate": 9.396326287065414e-07, "loss": 0.0262, "step": 242350 }, { "epoch": 2.5894545648806027, "grad_norm": 7.577959060668945, "learning_rate": 9.39624625663876e-07, "loss": 0.0556, "step": 242360 }, { "epoch": 2.589561408194882, "grad_norm": 0.5170308947563171, "learning_rate": 9.396166221248411e-07, "loss": 0.0398, "step": 242370 }, { "epoch": 2.5896682515091616, "grad_norm": 0.09874162077903748, "learning_rate": 9.39608618089446e-07, "loss": 0.0104, "step": 242380 }, { "epoch": 2.5897750948234415, "grad_norm": 0.021229803562164307, "learning_rate": 9.396006135576997e-07, "loss": 0.0044, "step": 242390 }, { "epoch": 2.589881938137721, "grad_norm": 0.004847529344260693, "learning_rate": 9.395926085296111e-07, "loss": 0.0347, "step": 242400 }, { "epoch": 2.589988781452001, "grad_norm": 1.7894704341888428, "learning_rate": 9.39584603005189e-07, "loss": 0.03, "step": 242410 }, { "epoch": 2.5900956247662803, "grad_norm": 2.6617438793182373, "learning_rate": 9.39576596984443e-07, "loss": 0.0338, "step": 242420 }, { "epoch": 2.5902024680805598, "grad_norm": 0.006455882918089628, "learning_rate": 9.395685904673817e-07, "loss": 0.0154, "step": 242430 }, { "epoch": 2.590309311394839, "grad_norm": 4.595372676849365, "learning_rate": 9.395605834540143e-07, "loss": 0.0935, "step": 242440 }, { "epoch": 2.590416154709119, "grad_norm": 0.010427404195070267, "learning_rate": 9.395525759443499e-07, "loss": 0.0228, "step": 242450 }, { "epoch": 2.5905229980233986, "grad_norm": 0.5802022814750671, "learning_rate": 9.395445679383974e-07, "loss": 0.0307, "step": 242460 }, { "epoch": 2.5906298413376785, "grad_norm": 4.465529441833496, "learning_rate": 9.395365594361659e-07, "loss": 0.0414, "step": 242470 }, { "epoch": 2.590736684651958, "grad_norm": 1.0637813806533813, "learning_rate": 9.395285504376644e-07, "loss": 0.0452, "step": 242480 }, { "epoch": 2.5908435279662374, "grad_norm": 1.3515708446502686, "learning_rate": 9.395205409429021e-07, "loss": 0.0106, "step": 242490 }, { "epoch": 2.590950371280517, "grad_norm": 0.9482366442680359, "learning_rate": 9.395125309518879e-07, "loss": 0.0308, "step": 242500 }, { "epoch": 2.5910572145947968, "grad_norm": 2.5396270751953125, "learning_rate": 9.395045204646308e-07, "loss": 0.0134, "step": 242510 }, { "epoch": 2.5911640579090762, "grad_norm": 0.18017438054084778, "learning_rate": 9.3949650948114e-07, "loss": 0.007, "step": 242520 }, { "epoch": 2.591270901223356, "grad_norm": 1.4773896932601929, "learning_rate": 9.394884980014246e-07, "loss": 0.0044, "step": 242530 }, { "epoch": 2.5913777445376356, "grad_norm": 0.794508159160614, "learning_rate": 9.394804860254934e-07, "loss": 0.01, "step": 242540 }, { "epoch": 2.591484587851915, "grad_norm": 1.8526432514190674, "learning_rate": 9.394724735533555e-07, "loss": 0.0253, "step": 242550 }, { "epoch": 2.5915914311661945, "grad_norm": 0.46923601627349854, "learning_rate": 9.3946446058502e-07, "loss": 0.0447, "step": 242560 }, { "epoch": 2.5916982744804744, "grad_norm": 2.6618800163269043, "learning_rate": 9.39456447120496e-07, "loss": 0.0306, "step": 242570 }, { "epoch": 2.591805117794754, "grad_norm": 0.3432285189628601, "learning_rate": 9.394484331597925e-07, "loss": 0.0139, "step": 242580 }, { "epoch": 2.5919119611090338, "grad_norm": 0.035222578793764114, "learning_rate": 9.394404187029187e-07, "loss": 0.0211, "step": 242590 }, { "epoch": 2.5920188044233132, "grad_norm": 1.6431630849838257, "learning_rate": 9.394324037498833e-07, "loss": 0.0316, "step": 242600 }, { "epoch": 2.5921256477375927, "grad_norm": 0.17985616624355316, "learning_rate": 9.394243883006955e-07, "loss": 0.0171, "step": 242610 }, { "epoch": 2.5922324910518726, "grad_norm": 8.89207649230957, "learning_rate": 9.394163723553648e-07, "loss": 0.0105, "step": 242620 }, { "epoch": 2.592339334366152, "grad_norm": 2.3159663677215576, "learning_rate": 9.394083559138994e-07, "loss": 0.0354, "step": 242630 }, { "epoch": 2.5924461776804315, "grad_norm": 0.002194585744291544, "learning_rate": 9.39400338976309e-07, "loss": 0.0118, "step": 242640 }, { "epoch": 2.5925530209947114, "grad_norm": 17.284574508666992, "learning_rate": 9.393923215426025e-07, "loss": 0.0876, "step": 242650 }, { "epoch": 2.592659864308991, "grad_norm": 0.12231031060218811, "learning_rate": 9.393843036127888e-07, "loss": 0.0179, "step": 242660 }, { "epoch": 2.5927667076232703, "grad_norm": 4.428038120269775, "learning_rate": 9.393762851868772e-07, "loss": 0.0465, "step": 242670 }, { "epoch": 2.5928735509375502, "grad_norm": 22.340181350708008, "learning_rate": 9.393682662648765e-07, "loss": 0.046, "step": 242680 }, { "epoch": 2.5929803942518297, "grad_norm": 0.1357419341802597, "learning_rate": 9.393602468467958e-07, "loss": 0.004, "step": 242690 }, { "epoch": 2.593087237566109, "grad_norm": 1.485163927078247, "learning_rate": 9.393522269326443e-07, "loss": 0.0115, "step": 242700 }, { "epoch": 2.593194080880389, "grad_norm": 0.3410763442516327, "learning_rate": 9.39344206522431e-07, "loss": 0.0081, "step": 242710 }, { "epoch": 2.5933009241946685, "grad_norm": 0.24793551862239838, "learning_rate": 9.393361856161649e-07, "loss": 0.0036, "step": 242720 }, { "epoch": 2.593407767508948, "grad_norm": 0.09731072187423706, "learning_rate": 9.393281642138551e-07, "loss": 0.0026, "step": 242730 }, { "epoch": 2.593514610823228, "grad_norm": 0.029960177838802338, "learning_rate": 9.393201423155106e-07, "loss": 0.0177, "step": 242740 }, { "epoch": 2.5936214541375073, "grad_norm": 0.03337879106402397, "learning_rate": 9.393121199211406e-07, "loss": 0.0061, "step": 242750 }, { "epoch": 2.593728297451787, "grad_norm": 0.3140611946582794, "learning_rate": 9.39304097030754e-07, "loss": 0.0096, "step": 242760 }, { "epoch": 2.5938351407660667, "grad_norm": 1.1647684574127197, "learning_rate": 9.392960736443599e-07, "loss": 0.0098, "step": 242770 }, { "epoch": 2.593941984080346, "grad_norm": 1.9983608722686768, "learning_rate": 9.392880497619674e-07, "loss": 0.0109, "step": 242780 }, { "epoch": 2.5940488273946256, "grad_norm": 0.12784603238105774, "learning_rate": 9.392800253835856e-07, "loss": 0.0309, "step": 242790 }, { "epoch": 2.5941556707089055, "grad_norm": 4.719385623931885, "learning_rate": 9.392720005092234e-07, "loss": 0.0206, "step": 242800 }, { "epoch": 2.594262514023185, "grad_norm": 14.042909622192383, "learning_rate": 9.3926397513889e-07, "loss": 0.0197, "step": 242810 }, { "epoch": 2.594369357337465, "grad_norm": 1.5840290784835815, "learning_rate": 9.392559492725944e-07, "loss": 0.0061, "step": 242820 }, { "epoch": 2.5944762006517443, "grad_norm": 0.012323597446084023, "learning_rate": 9.392479229103456e-07, "loss": 0.0161, "step": 242830 }, { "epoch": 2.594583043966024, "grad_norm": 2.90804123878479, "learning_rate": 9.392398960521529e-07, "loss": 0.0195, "step": 242840 }, { "epoch": 2.5946898872803033, "grad_norm": 0.8222326636314392, "learning_rate": 9.392318686980252e-07, "loss": 0.0169, "step": 242850 }, { "epoch": 2.594796730594583, "grad_norm": 0.4905856251716614, "learning_rate": 9.392238408479715e-07, "loss": 0.0592, "step": 242860 }, { "epoch": 2.5949035739088626, "grad_norm": 2.158308744430542, "learning_rate": 9.392158125020009e-07, "loss": 0.0366, "step": 242870 }, { "epoch": 2.5950104172231425, "grad_norm": 7.4717183113098145, "learning_rate": 9.392077836601225e-07, "loss": 0.0111, "step": 242880 }, { "epoch": 2.595117260537422, "grad_norm": 6.99534797668457, "learning_rate": 9.391997543223456e-07, "loss": 0.0169, "step": 242890 }, { "epoch": 2.5952241038517014, "grad_norm": 0.13333788514137268, "learning_rate": 9.391917244886787e-07, "loss": 0.0345, "step": 242900 }, { "epoch": 2.595330947165981, "grad_norm": 0.11614978313446045, "learning_rate": 9.391836941591313e-07, "loss": 0.0391, "step": 242910 }, { "epoch": 2.595437790480261, "grad_norm": 4.421674728393555, "learning_rate": 9.391756633337124e-07, "loss": 0.0175, "step": 242920 }, { "epoch": 2.5955446337945403, "grad_norm": 0.14166954159736633, "learning_rate": 9.391676320124311e-07, "loss": 0.0141, "step": 242930 }, { "epoch": 2.59565147710882, "grad_norm": 0.08122938871383667, "learning_rate": 9.391596001952964e-07, "loss": 0.0099, "step": 242940 }, { "epoch": 2.5957583204230996, "grad_norm": 0.07347507029771805, "learning_rate": 9.391515678823172e-07, "loss": 0.0048, "step": 242950 }, { "epoch": 2.595865163737379, "grad_norm": 0.019408253952860832, "learning_rate": 9.391435350735029e-07, "loss": 0.0188, "step": 242960 }, { "epoch": 2.5959720070516585, "grad_norm": 4.633045673370361, "learning_rate": 9.391355017688623e-07, "loss": 0.0428, "step": 242970 }, { "epoch": 2.5960788503659384, "grad_norm": 0.5545952916145325, "learning_rate": 9.391274679684046e-07, "loss": 0.0158, "step": 242980 }, { "epoch": 2.596185693680218, "grad_norm": 0.020906096324324608, "learning_rate": 9.391194336721389e-07, "loss": 0.0316, "step": 242990 }, { "epoch": 2.596292536994498, "grad_norm": 0.014843067154288292, "learning_rate": 9.391113988800741e-07, "loss": 0.0117, "step": 243000 }, { "epoch": 2.5963993803087773, "grad_norm": 4.316193580627441, "learning_rate": 9.391033635922195e-07, "loss": 0.0886, "step": 243010 }, { "epoch": 2.5965062236230567, "grad_norm": 0.02663903310894966, "learning_rate": 9.39095327808584e-07, "loss": 0.0096, "step": 243020 }, { "epoch": 2.596613066937336, "grad_norm": 3.9127957820892334, "learning_rate": 9.390872915291767e-07, "loss": 0.0378, "step": 243030 }, { "epoch": 2.596719910251616, "grad_norm": Infinity, "learning_rate": 9.390792547540069e-07, "loss": 0.0274, "step": 243040 }, { "epoch": 2.5968267535658955, "grad_norm": 0.007269850466400385, "learning_rate": 9.390712174830832e-07, "loss": 0.0652, "step": 243050 }, { "epoch": 2.5969335968801754, "grad_norm": 8.659309387207031, "learning_rate": 9.390631797164153e-07, "loss": 0.0369, "step": 243060 }, { "epoch": 2.597040440194455, "grad_norm": 0.3036767840385437, "learning_rate": 9.390551414540115e-07, "loss": 0.0156, "step": 243070 }, { "epoch": 2.5971472835087344, "grad_norm": 4.977985382080078, "learning_rate": 9.390471026958817e-07, "loss": 0.0065, "step": 243080 }, { "epoch": 2.597254126823014, "grad_norm": 0.604482889175415, "learning_rate": 9.390390634420345e-07, "loss": 0.0118, "step": 243090 }, { "epoch": 2.5973609701372937, "grad_norm": 0.2668057382106781, "learning_rate": 9.390310236924789e-07, "loss": 0.004, "step": 243100 }, { "epoch": 2.597467813451573, "grad_norm": 0.021048450842499733, "learning_rate": 9.390229834472243e-07, "loss": 0.0234, "step": 243110 }, { "epoch": 2.597574656765853, "grad_norm": 0.010234790854156017, "learning_rate": 9.390149427062795e-07, "loss": 0.0304, "step": 243120 }, { "epoch": 2.5976815000801325, "grad_norm": 2.9671812057495117, "learning_rate": 9.390069014696538e-07, "loss": 0.0085, "step": 243130 }, { "epoch": 2.597788343394412, "grad_norm": 0.4973754286766052, "learning_rate": 9.389988597373561e-07, "loss": 0.0192, "step": 243140 }, { "epoch": 2.5978951867086915, "grad_norm": 0.6932148337364197, "learning_rate": 9.389908175093954e-07, "loss": 0.0036, "step": 243150 }, { "epoch": 2.5980020300229714, "grad_norm": 0.015026289969682693, "learning_rate": 9.389827747857813e-07, "loss": 0.0254, "step": 243160 }, { "epoch": 2.598108873337251, "grad_norm": 2.0203678607940674, "learning_rate": 9.389747315665222e-07, "loss": 0.002, "step": 243170 }, { "epoch": 2.5982157166515307, "grad_norm": 0.011959841474890709, "learning_rate": 9.389666878516277e-07, "loss": 0.0019, "step": 243180 }, { "epoch": 2.59832255996581, "grad_norm": 5.783129692077637, "learning_rate": 9.389586436411066e-07, "loss": 0.0127, "step": 243190 }, { "epoch": 2.5984294032800896, "grad_norm": 0.5098528265953064, "learning_rate": 9.389505989349681e-07, "loss": 0.0075, "step": 243200 }, { "epoch": 2.598536246594369, "grad_norm": 0.14461471140384674, "learning_rate": 9.389425537332212e-07, "loss": 0.0066, "step": 243210 }, { "epoch": 2.598643089908649, "grad_norm": 0.146475687623024, "learning_rate": 9.389345080358751e-07, "loss": 0.0113, "step": 243220 }, { "epoch": 2.5987499332229285, "grad_norm": 0.031217265874147415, "learning_rate": 9.389264618429387e-07, "loss": 0.0229, "step": 243230 }, { "epoch": 2.5988567765372084, "grad_norm": 0.034828588366508484, "learning_rate": 9.389184151544212e-07, "loss": 0.001, "step": 243240 }, { "epoch": 2.598963619851488, "grad_norm": 0.5071229934692383, "learning_rate": 9.389103679703318e-07, "loss": 0.0092, "step": 243250 }, { "epoch": 2.5990704631657673, "grad_norm": 0.06280206888914108, "learning_rate": 9.389023202906794e-07, "loss": 0.0075, "step": 243260 }, { "epoch": 2.5991773064800467, "grad_norm": 0.054059263318777084, "learning_rate": 9.388942721154731e-07, "loss": 0.0182, "step": 243270 }, { "epoch": 2.5992841497943266, "grad_norm": 0.06805893033742905, "learning_rate": 9.388862234447222e-07, "loss": 0.0139, "step": 243280 }, { "epoch": 2.599390993108606, "grad_norm": 1.2420666217803955, "learning_rate": 9.388781742784354e-07, "loss": 0.0139, "step": 243290 }, { "epoch": 2.599497836422886, "grad_norm": 1.5935535430908203, "learning_rate": 9.388701246166222e-07, "loss": 0.0364, "step": 243300 }, { "epoch": 2.5996046797371655, "grad_norm": 0.018368562683463097, "learning_rate": 9.388620744592914e-07, "loss": 0.008, "step": 243310 }, { "epoch": 2.599711523051445, "grad_norm": 0.5776685476303101, "learning_rate": 9.388540238064521e-07, "loss": 0.0784, "step": 243320 }, { "epoch": 2.599818366365725, "grad_norm": 0.21048372983932495, "learning_rate": 9.388459726581137e-07, "loss": 0.0244, "step": 243330 }, { "epoch": 2.5999252096800043, "grad_norm": 0.08066340535879135, "learning_rate": 9.38837921014285e-07, "loss": 0.0231, "step": 243340 }, { "epoch": 2.6000320529942837, "grad_norm": 2.239011764526367, "learning_rate": 9.388298688749751e-07, "loss": 0.0135, "step": 243350 }, { "epoch": 2.6001388963085637, "grad_norm": 4.326001167297363, "learning_rate": 9.388218162401934e-07, "loss": 0.0189, "step": 243360 }, { "epoch": 2.600245739622843, "grad_norm": 0.09362470358610153, "learning_rate": 9.388137631099485e-07, "loss": 0.0159, "step": 243370 }, { "epoch": 2.6003525829371226, "grad_norm": 0.8214297294616699, "learning_rate": 9.388057094842499e-07, "loss": 0.045, "step": 243380 }, { "epoch": 2.6004594262514025, "grad_norm": 4.9556450843811035, "learning_rate": 9.387976553631065e-07, "loss": 0.0153, "step": 243390 }, { "epoch": 2.600566269565682, "grad_norm": 2.8962292671203613, "learning_rate": 9.387896007465273e-07, "loss": 0.0101, "step": 243400 }, { "epoch": 2.6006731128799614, "grad_norm": 0.1305907815694809, "learning_rate": 9.387815456345216e-07, "loss": 0.0086, "step": 243410 }, { "epoch": 2.6007799561942413, "grad_norm": 0.0944305807352066, "learning_rate": 9.387734900270985e-07, "loss": 0.002, "step": 243420 }, { "epoch": 2.6008867995085208, "grad_norm": 0.8159964084625244, "learning_rate": 9.38765433924267e-07, "loss": 0.0249, "step": 243430 }, { "epoch": 2.6009936428228, "grad_norm": 0.24028779566287994, "learning_rate": 9.38757377326036e-07, "loss": 0.0101, "step": 243440 }, { "epoch": 2.60110048613708, "grad_norm": 5.29927921295166, "learning_rate": 9.38749320232415e-07, "loss": 0.0315, "step": 243450 }, { "epoch": 2.6012073294513596, "grad_norm": 0.005396178457885981, "learning_rate": 9.387412626434129e-07, "loss": 0.0491, "step": 243460 }, { "epoch": 2.601314172765639, "grad_norm": 0.010047635063529015, "learning_rate": 9.387332045590389e-07, "loss": 0.0904, "step": 243470 }, { "epoch": 2.601421016079919, "grad_norm": 0.0748230367898941, "learning_rate": 9.387251459793018e-07, "loss": 0.032, "step": 243480 }, { "epoch": 2.6015278593941984, "grad_norm": 3.0168962478637695, "learning_rate": 9.387170869042109e-07, "loss": 0.018, "step": 243490 }, { "epoch": 2.601634702708478, "grad_norm": 1.7646552324295044, "learning_rate": 9.387090273337753e-07, "loss": 0.0187, "step": 243500 }, { "epoch": 2.6017415460227578, "grad_norm": 2.9430699348449707, "learning_rate": 9.387009672680042e-07, "loss": 0.0154, "step": 243510 }, { "epoch": 2.601848389337037, "grad_norm": 0.7822921872138977, "learning_rate": 9.386929067069065e-07, "loss": 0.0151, "step": 243520 }, { "epoch": 2.601955232651317, "grad_norm": 0.015676969662308693, "learning_rate": 9.386848456504914e-07, "loss": 0.0011, "step": 243530 }, { "epoch": 2.6020620759655966, "grad_norm": 0.1359696090221405, "learning_rate": 9.386767840987681e-07, "loss": 0.0087, "step": 243540 }, { "epoch": 2.602168919279876, "grad_norm": 0.6042238473892212, "learning_rate": 9.386687220517455e-07, "loss": 0.0172, "step": 243550 }, { "epoch": 2.6022757625941555, "grad_norm": 2.0745227336883545, "learning_rate": 9.38660659509433e-07, "loss": 0.0427, "step": 243560 }, { "epoch": 2.6023826059084354, "grad_norm": 2.070425033569336, "learning_rate": 9.386525964718394e-07, "loss": 0.0106, "step": 243570 }, { "epoch": 2.602489449222715, "grad_norm": 4.958557605743408, "learning_rate": 9.386445329389738e-07, "loss": 0.0255, "step": 243580 }, { "epoch": 2.6025962925369948, "grad_norm": 0.1033666580915451, "learning_rate": 9.386364689108455e-07, "loss": 0.0228, "step": 243590 }, { "epoch": 2.602703135851274, "grad_norm": 0.014407440088689327, "learning_rate": 9.386284043874635e-07, "loss": 0.0068, "step": 243600 }, { "epoch": 2.6028099791655537, "grad_norm": 0.007949822582304478, "learning_rate": 9.38620339368837e-07, "loss": 0.0031, "step": 243610 }, { "epoch": 2.602916822479833, "grad_norm": 0.16081169247627258, "learning_rate": 9.38612273854975e-07, "loss": 0.0082, "step": 243620 }, { "epoch": 2.603023665794113, "grad_norm": 0.009072012268006802, "learning_rate": 9.386042078458867e-07, "loss": 0.0383, "step": 243630 }, { "epoch": 2.6031305091083925, "grad_norm": 0.08268606662750244, "learning_rate": 9.385961413415809e-07, "loss": 0.031, "step": 243640 }, { "epoch": 2.6032373524226724, "grad_norm": 0.3571035861968994, "learning_rate": 9.385880743420671e-07, "loss": 0.0419, "step": 243650 }, { "epoch": 2.603344195736952, "grad_norm": 0.4077659845352173, "learning_rate": 9.385800068473544e-07, "loss": 0.0243, "step": 243660 }, { "epoch": 2.6034510390512313, "grad_norm": 3.2760255336761475, "learning_rate": 9.385719388574515e-07, "loss": 0.013, "step": 243670 }, { "epoch": 2.603557882365511, "grad_norm": 0.01530925091356039, "learning_rate": 9.38563870372368e-07, "loss": 0.0011, "step": 243680 }, { "epoch": 2.6036647256797907, "grad_norm": 0.00947509240359068, "learning_rate": 9.385558013921128e-07, "loss": 0.0075, "step": 243690 }, { "epoch": 2.60377156899407, "grad_norm": 0.02931319549679756, "learning_rate": 9.385477319166949e-07, "loss": 0.0242, "step": 243700 }, { "epoch": 2.60387841230835, "grad_norm": 2.213557481765747, "learning_rate": 9.385396619461234e-07, "loss": 0.0356, "step": 243710 }, { "epoch": 2.6039852556226295, "grad_norm": 0.09744618088006973, "learning_rate": 9.385315914804077e-07, "loss": 0.0013, "step": 243720 }, { "epoch": 2.604092098936909, "grad_norm": 2.8863956928253174, "learning_rate": 9.385235205195567e-07, "loss": 0.0527, "step": 243730 }, { "epoch": 2.6041989422511884, "grad_norm": 0.5815045237541199, "learning_rate": 9.385154490635796e-07, "loss": 0.0311, "step": 243740 }, { "epoch": 2.6043057855654683, "grad_norm": 0.003291183849796653, "learning_rate": 9.385073771124854e-07, "loss": 0.0044, "step": 243750 }, { "epoch": 2.604412628879748, "grad_norm": 6.772679328918457, "learning_rate": 9.384993046662832e-07, "loss": 0.0132, "step": 243760 }, { "epoch": 2.6045194721940277, "grad_norm": 0.02510887198150158, "learning_rate": 9.384912317249824e-07, "loss": 0.0243, "step": 243770 }, { "epoch": 2.604626315508307, "grad_norm": 0.02707112580537796, "learning_rate": 9.384831582885917e-07, "loss": 0.0088, "step": 243780 }, { "epoch": 2.6047331588225866, "grad_norm": 4.04617166519165, "learning_rate": 9.384750843571206e-07, "loss": 0.0251, "step": 243790 }, { "epoch": 2.604840002136866, "grad_norm": 0.2856518626213074, "learning_rate": 9.384670099305778e-07, "loss": 0.0088, "step": 243800 }, { "epoch": 2.604946845451146, "grad_norm": 2.3379552364349365, "learning_rate": 9.384589350089728e-07, "loss": 0.027, "step": 243810 }, { "epoch": 2.6050536887654254, "grad_norm": 9.209086418151855, "learning_rate": 9.384508595923146e-07, "loss": 0.0532, "step": 243820 }, { "epoch": 2.6051605320797053, "grad_norm": 0.668328046798706, "learning_rate": 9.384427836806122e-07, "loss": 0.026, "step": 243830 }, { "epoch": 2.605267375393985, "grad_norm": 3.404134750366211, "learning_rate": 9.38434707273875e-07, "loss": 0.0448, "step": 243840 }, { "epoch": 2.6053742187082642, "grad_norm": 8.01849365234375, "learning_rate": 9.384266303721117e-07, "loss": 0.0362, "step": 243850 }, { "epoch": 2.6054810620225437, "grad_norm": 0.556185245513916, "learning_rate": 9.384185529753317e-07, "loss": 0.0145, "step": 243860 }, { "epoch": 2.6055879053368236, "grad_norm": 0.012530270032584667, "learning_rate": 9.384104750835441e-07, "loss": 0.0269, "step": 243870 }, { "epoch": 2.605694748651103, "grad_norm": 0.012625477276742458, "learning_rate": 9.384023966967581e-07, "loss": 0.0176, "step": 243880 }, { "epoch": 2.605801591965383, "grad_norm": 1.8124302625656128, "learning_rate": 9.383943178149825e-07, "loss": 0.0281, "step": 243890 }, { "epoch": 2.6059084352796624, "grad_norm": 0.7489787340164185, "learning_rate": 9.383862384382266e-07, "loss": 0.0173, "step": 243900 }, { "epoch": 2.606015278593942, "grad_norm": 0.0029427632689476013, "learning_rate": 9.383781585664997e-07, "loss": 0.0132, "step": 243910 }, { "epoch": 2.6061221219082213, "grad_norm": 0.0069187418557703495, "learning_rate": 9.383700781998107e-07, "loss": 0.038, "step": 243920 }, { "epoch": 2.6062289652225012, "grad_norm": 0.3136788308620453, "learning_rate": 9.383619973381689e-07, "loss": 0.013, "step": 243930 }, { "epoch": 2.6063358085367807, "grad_norm": 0.005897199735045433, "learning_rate": 9.383539159815832e-07, "loss": 0.0157, "step": 243940 }, { "epoch": 2.6064426518510606, "grad_norm": 4.080827236175537, "learning_rate": 9.38345834130063e-07, "loss": 0.0098, "step": 243950 }, { "epoch": 2.60654949516534, "grad_norm": 2.0038864612579346, "learning_rate": 9.383377517836171e-07, "loss": 0.0382, "step": 243960 }, { "epoch": 2.6066563384796195, "grad_norm": 0.038669511675834656, "learning_rate": 9.383296689422548e-07, "loss": 0.0298, "step": 243970 }, { "epoch": 2.606763181793899, "grad_norm": 20.733543395996094, "learning_rate": 9.383215856059853e-07, "loss": 0.0555, "step": 243980 }, { "epoch": 2.606870025108179, "grad_norm": 2.8330178260803223, "learning_rate": 9.383135017748176e-07, "loss": 0.0143, "step": 243990 }, { "epoch": 2.6069768684224583, "grad_norm": 13.0026216506958, "learning_rate": 9.38305417448761e-07, "loss": 0.0112, "step": 244000 }, { "epoch": 2.6070837117367383, "grad_norm": 0.12437213212251663, "learning_rate": 9.382973326278242e-07, "loss": 0.0132, "step": 244010 }, { "epoch": 2.6071905550510177, "grad_norm": 1.7579342126846313, "learning_rate": 9.38289247312017e-07, "loss": 0.0206, "step": 244020 }, { "epoch": 2.607297398365297, "grad_norm": 0.0960218757390976, "learning_rate": 9.382811615013479e-07, "loss": 0.0046, "step": 244030 }, { "epoch": 2.6074042416795766, "grad_norm": 0.06354231387376785, "learning_rate": 9.382730751958264e-07, "loss": 0.0104, "step": 244040 }, { "epoch": 2.6075110849938565, "grad_norm": 0.5768094658851624, "learning_rate": 9.382649883954615e-07, "loss": 0.0068, "step": 244050 }, { "epoch": 2.607617928308136, "grad_norm": 7.929896831512451, "learning_rate": 9.382569011002624e-07, "loss": 0.0162, "step": 244060 }, { "epoch": 2.607724771622416, "grad_norm": 0.13332971930503845, "learning_rate": 9.382488133102382e-07, "loss": 0.0291, "step": 244070 }, { "epoch": 2.6078316149366954, "grad_norm": 2.134152412414551, "learning_rate": 9.382407250253979e-07, "loss": 0.0161, "step": 244080 }, { "epoch": 2.607938458250975, "grad_norm": 0.07227112352848053, "learning_rate": 9.382326362457509e-07, "loss": 0.0202, "step": 244090 }, { "epoch": 2.6080453015652547, "grad_norm": 0.13796548545360565, "learning_rate": 9.382245469713059e-07, "loss": 0.0318, "step": 244100 }, { "epoch": 2.608152144879534, "grad_norm": 1.4546886682510376, "learning_rate": 9.382164572020726e-07, "loss": 0.0261, "step": 244110 }, { "epoch": 2.6082589881938136, "grad_norm": 1.451809048652649, "learning_rate": 9.382083669380597e-07, "loss": 0.0487, "step": 244120 }, { "epoch": 2.6083658315080935, "grad_norm": 0.013505426235496998, "learning_rate": 9.382002761792764e-07, "loss": 0.0224, "step": 244130 }, { "epoch": 2.608472674822373, "grad_norm": 0.08107121288776398, "learning_rate": 9.381921849257321e-07, "loss": 0.0029, "step": 244140 }, { "epoch": 2.6085795181366525, "grad_norm": 0.014010913670063019, "learning_rate": 9.381840931774356e-07, "loss": 0.0355, "step": 244150 }, { "epoch": 2.6086863614509324, "grad_norm": 0.2002473771572113, "learning_rate": 9.381760009343964e-07, "loss": 0.0028, "step": 244160 }, { "epoch": 2.608793204765212, "grad_norm": 0.08309534192085266, "learning_rate": 9.381679081966233e-07, "loss": 0.0096, "step": 244170 }, { "epoch": 2.6089000480794913, "grad_norm": 2.234551191329956, "learning_rate": 9.381598149641255e-07, "loss": 0.0415, "step": 244180 }, { "epoch": 2.609006891393771, "grad_norm": 0.2162056267261505, "learning_rate": 9.381517212369123e-07, "loss": 0.0085, "step": 244190 }, { "epoch": 2.6091137347080506, "grad_norm": 0.458779901266098, "learning_rate": 9.381436270149927e-07, "loss": 0.0121, "step": 244200 }, { "epoch": 2.60922057802233, "grad_norm": 0.10468979924917221, "learning_rate": 9.381355322983759e-07, "loss": 0.021, "step": 244210 }, { "epoch": 2.60932742133661, "grad_norm": 1.053514838218689, "learning_rate": 9.381274370870708e-07, "loss": 0.0497, "step": 244220 }, { "epoch": 2.6094342646508895, "grad_norm": 0.0274296123534441, "learning_rate": 9.38119341381087e-07, "loss": 0.0393, "step": 244230 }, { "epoch": 2.609541107965169, "grad_norm": 0.46036869287490845, "learning_rate": 9.381112451804334e-07, "loss": 0.0155, "step": 244240 }, { "epoch": 2.609647951279449, "grad_norm": 4.971604347229004, "learning_rate": 9.381031484851192e-07, "loss": 0.0443, "step": 244250 }, { "epoch": 2.6097547945937283, "grad_norm": 0.2121044546365738, "learning_rate": 9.380950512951533e-07, "loss": 0.0215, "step": 244260 }, { "epoch": 2.6098616379080077, "grad_norm": 0.18188337981700897, "learning_rate": 9.380869536105451e-07, "loss": 0.031, "step": 244270 }, { "epoch": 2.6099684812222876, "grad_norm": 0.028089752420783043, "learning_rate": 9.380788554313036e-07, "loss": 0.037, "step": 244280 }, { "epoch": 2.610075324536567, "grad_norm": 0.25438591837882996, "learning_rate": 9.380707567574381e-07, "loss": 0.0132, "step": 244290 }, { "epoch": 2.610182167850847, "grad_norm": 0.016272377222776413, "learning_rate": 9.380626575889576e-07, "loss": 0.0163, "step": 244300 }, { "epoch": 2.6102890111651265, "grad_norm": 1.7711321115493774, "learning_rate": 9.380545579258713e-07, "loss": 0.0043, "step": 244310 }, { "epoch": 2.610395854479406, "grad_norm": 1.9722386598587036, "learning_rate": 9.380464577681884e-07, "loss": 0.0445, "step": 244320 }, { "epoch": 2.6105026977936854, "grad_norm": 1.2594507932662964, "learning_rate": 9.380383571159178e-07, "loss": 0.0093, "step": 244330 }, { "epoch": 2.6106095411079653, "grad_norm": 0.16492748260498047, "learning_rate": 9.38030255969069e-07, "loss": 0.0274, "step": 244340 }, { "epoch": 2.6107163844222447, "grad_norm": 0.04903491958975792, "learning_rate": 9.380221543276509e-07, "loss": 0.0176, "step": 244350 }, { "epoch": 2.6108232277365246, "grad_norm": 0.06231115385890007, "learning_rate": 9.380140521916729e-07, "loss": 0.0104, "step": 244360 }, { "epoch": 2.610930071050804, "grad_norm": 0.05166873335838318, "learning_rate": 9.380059495611439e-07, "loss": 0.0098, "step": 244370 }, { "epoch": 2.6110369143650836, "grad_norm": 0.9221269488334656, "learning_rate": 9.379978464360731e-07, "loss": 0.0079, "step": 244380 }, { "epoch": 2.611143757679363, "grad_norm": 0.15906032919883728, "learning_rate": 9.379897428164696e-07, "loss": 0.0288, "step": 244390 }, { "epoch": 2.611250600993643, "grad_norm": 0.007618540897965431, "learning_rate": 9.379816387023427e-07, "loss": 0.0112, "step": 244400 }, { "epoch": 2.6113574443079224, "grad_norm": 0.4739300310611725, "learning_rate": 9.379735340937014e-07, "loss": 0.0062, "step": 244410 }, { "epoch": 2.6114642876222023, "grad_norm": 0.15146702527999878, "learning_rate": 9.37965428990555e-07, "loss": 0.0215, "step": 244420 }, { "epoch": 2.6115711309364817, "grad_norm": 0.02757488377392292, "learning_rate": 9.379573233929126e-07, "loss": 0.0165, "step": 244430 }, { "epoch": 2.611677974250761, "grad_norm": 0.007633943110704422, "learning_rate": 9.379492173007833e-07, "loss": 0.013, "step": 244440 }, { "epoch": 2.6117848175650407, "grad_norm": 0.0065559144131839275, "learning_rate": 9.379411107141761e-07, "loss": 0.0052, "step": 244450 }, { "epoch": 2.6118916608793206, "grad_norm": 0.7880085110664368, "learning_rate": 9.379330036331006e-07, "loss": 0.005, "step": 244460 }, { "epoch": 2.6119985041936, "grad_norm": 1.3599352836608887, "learning_rate": 9.379248960575657e-07, "loss": 0.0136, "step": 244470 }, { "epoch": 2.61210534750788, "grad_norm": 0.05845795199275017, "learning_rate": 9.379167879875803e-07, "loss": 0.0039, "step": 244480 }, { "epoch": 2.6122121908221594, "grad_norm": 7.055955410003662, "learning_rate": 9.379086794231539e-07, "loss": 0.0249, "step": 244490 }, { "epoch": 2.612319034136439, "grad_norm": 0.8816732168197632, "learning_rate": 9.379005703642956e-07, "loss": 0.0197, "step": 244500 }, { "epoch": 2.6124258774507183, "grad_norm": 0.00980933103710413, "learning_rate": 9.378924608110145e-07, "loss": 0.0204, "step": 244510 }, { "epoch": 2.612532720764998, "grad_norm": 0.0037241994868963957, "learning_rate": 9.378843507633198e-07, "loss": 0.0109, "step": 244520 }, { "epoch": 2.6126395640792777, "grad_norm": 0.006384738255292177, "learning_rate": 9.378762402212206e-07, "loss": 0.0327, "step": 244530 }, { "epoch": 2.6127464073935576, "grad_norm": 1.901995062828064, "learning_rate": 9.378681291847259e-07, "loss": 0.0047, "step": 244540 }, { "epoch": 2.612853250707837, "grad_norm": 1.5923100709915161, "learning_rate": 9.378600176538451e-07, "loss": 0.0078, "step": 244550 }, { "epoch": 2.6129600940221165, "grad_norm": 6.063998699188232, "learning_rate": 9.378519056285875e-07, "loss": 0.0135, "step": 244560 }, { "epoch": 2.613066937336396, "grad_norm": 4.974520206451416, "learning_rate": 9.378437931089619e-07, "loss": 0.0109, "step": 244570 }, { "epoch": 2.613173780650676, "grad_norm": 0.0986376628279686, "learning_rate": 9.378356800949776e-07, "loss": 0.0056, "step": 244580 }, { "epoch": 2.6132806239649553, "grad_norm": 4.5377960205078125, "learning_rate": 9.378275665866438e-07, "loss": 0.0409, "step": 244590 }, { "epoch": 2.613387467279235, "grad_norm": 3.008840560913086, "learning_rate": 9.378194525839697e-07, "loss": 0.0268, "step": 244600 }, { "epoch": 2.6134943105935147, "grad_norm": 0.07736983150243759, "learning_rate": 9.378113380869643e-07, "loss": 0.015, "step": 244610 }, { "epoch": 2.613601153907794, "grad_norm": 0.4711942672729492, "learning_rate": 9.378032230956369e-07, "loss": 0.0036, "step": 244620 }, { "epoch": 2.6137079972220736, "grad_norm": 0.015002185478806496, "learning_rate": 9.377951076099966e-07, "loss": 0.0015, "step": 244630 }, { "epoch": 2.6138148405363535, "grad_norm": 1.5233813524246216, "learning_rate": 9.377869916300525e-07, "loss": 0.0395, "step": 244640 }, { "epoch": 2.613921683850633, "grad_norm": 1.8254671096801758, "learning_rate": 9.37778875155814e-07, "loss": 0.0044, "step": 244650 }, { "epoch": 2.614028527164913, "grad_norm": 0.44185367226600647, "learning_rate": 9.377707581872901e-07, "loss": 0.01, "step": 244660 }, { "epoch": 2.6141353704791923, "grad_norm": 3.64111328125, "learning_rate": 9.377626407244899e-07, "loss": 0.0231, "step": 244670 }, { "epoch": 2.6142422137934718, "grad_norm": 1.7232003211975098, "learning_rate": 9.377545227674226e-07, "loss": 0.0086, "step": 244680 }, { "epoch": 2.6143490571077512, "grad_norm": 0.7407872080802917, "learning_rate": 9.377464043160974e-07, "loss": 0.0031, "step": 244690 }, { "epoch": 2.614455900422031, "grad_norm": 0.011837596073746681, "learning_rate": 9.377382853705236e-07, "loss": 0.0086, "step": 244700 }, { "epoch": 2.6145627437363106, "grad_norm": 4.346612453460693, "learning_rate": 9.377301659307102e-07, "loss": 0.0094, "step": 244710 }, { "epoch": 2.6146695870505905, "grad_norm": 0.005243323277682066, "learning_rate": 9.377220459966664e-07, "loss": 0.0177, "step": 244720 }, { "epoch": 2.61477643036487, "grad_norm": 0.033235371112823486, "learning_rate": 9.377139255684012e-07, "loss": 0.027, "step": 244730 }, { "epoch": 2.6148832736791494, "grad_norm": 0.09891178458929062, "learning_rate": 9.377058046459241e-07, "loss": 0.0084, "step": 244740 }, { "epoch": 2.614990116993429, "grad_norm": 2.0087673664093018, "learning_rate": 9.376976832292442e-07, "loss": 0.0092, "step": 244750 }, { "epoch": 2.6150969603077088, "grad_norm": 0.0061793155036866665, "learning_rate": 9.376895613183704e-07, "loss": 0.0026, "step": 244760 }, { "epoch": 2.6152038036219882, "grad_norm": 0.00261727930046618, "learning_rate": 9.37681438913312e-07, "loss": 0.0067, "step": 244770 }, { "epoch": 2.615310646936268, "grad_norm": 0.050760962069034576, "learning_rate": 9.376733160140786e-07, "loss": 0.0072, "step": 244780 }, { "epoch": 2.6154174902505476, "grad_norm": 0.005401285365223885, "learning_rate": 9.376651926206786e-07, "loss": 0.0228, "step": 244790 }, { "epoch": 2.615524333564827, "grad_norm": 8.36196517944336, "learning_rate": 9.376570687331218e-07, "loss": 0.0105, "step": 244800 }, { "epoch": 2.615631176879107, "grad_norm": 3.22562575340271, "learning_rate": 9.37648944351417e-07, "loss": 0.0043, "step": 244810 }, { "epoch": 2.6157380201933864, "grad_norm": 4.983692169189453, "learning_rate": 9.376408194755736e-07, "loss": 0.0134, "step": 244820 }, { "epoch": 2.615844863507666, "grad_norm": 7.626765251159668, "learning_rate": 9.376326941056006e-07, "loss": 0.0163, "step": 244830 }, { "epoch": 2.6159517068219458, "grad_norm": 4.387684345245361, "learning_rate": 9.376245682415074e-07, "loss": 0.031, "step": 244840 }, { "epoch": 2.6160585501362252, "grad_norm": 0.09119927883148193, "learning_rate": 9.376164418833027e-07, "loss": 0.0237, "step": 244850 }, { "epoch": 2.6161653934505047, "grad_norm": 1.1708121299743652, "learning_rate": 9.376083150309963e-07, "loss": 0.0145, "step": 244860 }, { "epoch": 2.6162722367647846, "grad_norm": 0.024255158379673958, "learning_rate": 9.376001876845972e-07, "loss": 0.0089, "step": 244870 }, { "epoch": 2.616379080079064, "grad_norm": 6.614399433135986, "learning_rate": 9.375920598441143e-07, "loss": 0.0035, "step": 244880 }, { "epoch": 2.6164859233933435, "grad_norm": 0.2614109218120575, "learning_rate": 9.375839315095568e-07, "loss": 0.0697, "step": 244890 }, { "epoch": 2.6165927667076234, "grad_norm": 0.04110592603683472, "learning_rate": 9.375758026809342e-07, "loss": 0.0311, "step": 244900 }, { "epoch": 2.616699610021903, "grad_norm": 0.19896340370178223, "learning_rate": 9.375676733582555e-07, "loss": 0.0167, "step": 244910 }, { "epoch": 2.6168064533361823, "grad_norm": 12.314108848571777, "learning_rate": 9.375595435415298e-07, "loss": 0.038, "step": 244920 }, { "epoch": 2.6169132966504622, "grad_norm": 5.924069404602051, "learning_rate": 9.375514132307663e-07, "loss": 0.0593, "step": 244930 }, { "epoch": 2.6170201399647417, "grad_norm": 0.1282740980386734, "learning_rate": 9.375432824259744e-07, "loss": 0.0459, "step": 244940 }, { "epoch": 2.617126983279021, "grad_norm": 1.561620831489563, "learning_rate": 9.37535151127163e-07, "loss": 0.0122, "step": 244950 }, { "epoch": 2.617233826593301, "grad_norm": 2.5209004878997803, "learning_rate": 9.375270193343414e-07, "loss": 0.0243, "step": 244960 }, { "epoch": 2.6173406699075805, "grad_norm": 1.257545828819275, "learning_rate": 9.375188870475187e-07, "loss": 0.0145, "step": 244970 }, { "epoch": 2.61744751322186, "grad_norm": 0.07023217529058456, "learning_rate": 9.375107542667043e-07, "loss": 0.0024, "step": 244980 }, { "epoch": 2.61755435653614, "grad_norm": 0.04298580437898636, "learning_rate": 9.37502620991907e-07, "loss": 0.0379, "step": 244990 }, { "epoch": 2.6176611998504193, "grad_norm": 0.042854372411966324, "learning_rate": 9.374944872231365e-07, "loss": 0.0055, "step": 245000 }, { "epoch": 2.6177680431646992, "grad_norm": 0.25810033082962036, "learning_rate": 9.374863529604016e-07, "loss": 0.0242, "step": 245010 }, { "epoch": 2.6178748864789787, "grad_norm": 0.11845063418149948, "learning_rate": 9.374782182037115e-07, "loss": 0.0161, "step": 245020 }, { "epoch": 2.617981729793258, "grad_norm": 0.024613581597805023, "learning_rate": 9.374700829530756e-07, "loss": 0.0148, "step": 245030 }, { "epoch": 2.6180885731075376, "grad_norm": 1.12986159324646, "learning_rate": 9.374619472085029e-07, "loss": 0.0422, "step": 245040 }, { "epoch": 2.6181954164218175, "grad_norm": 1.9414799213409424, "learning_rate": 9.374538109700026e-07, "loss": 0.0043, "step": 245050 }, { "epoch": 2.618302259736097, "grad_norm": 0.024121452122926712, "learning_rate": 9.374456742375841e-07, "loss": 0.0251, "step": 245060 }, { "epoch": 2.618409103050377, "grad_norm": 0.8079806566238403, "learning_rate": 9.374375370112562e-07, "loss": 0.0119, "step": 245070 }, { "epoch": 2.6185159463646563, "grad_norm": 0.14733606576919556, "learning_rate": 9.374293992910285e-07, "loss": 0.0099, "step": 245080 }, { "epoch": 2.618622789678936, "grad_norm": 0.03766651824116707, "learning_rate": 9.374212610769099e-07, "loss": 0.0112, "step": 245090 }, { "epoch": 2.6187296329932153, "grad_norm": 0.09948946535587311, "learning_rate": 9.374131223689097e-07, "loss": 0.0103, "step": 245100 }, { "epoch": 2.618836476307495, "grad_norm": 1.8599011898040771, "learning_rate": 9.37404983167037e-07, "loss": 0.0335, "step": 245110 }, { "epoch": 2.6189433196217746, "grad_norm": 4.523409843444824, "learning_rate": 9.373968434713011e-07, "loss": 0.0131, "step": 245120 }, { "epoch": 2.6190501629360545, "grad_norm": 2.036837577819824, "learning_rate": 9.373887032817112e-07, "loss": 0.1312, "step": 245130 }, { "epoch": 2.619157006250334, "grad_norm": 3.7120859622955322, "learning_rate": 9.373805625982764e-07, "loss": 0.0339, "step": 245140 }, { "epoch": 2.6192638495646134, "grad_norm": 0.12166161835193634, "learning_rate": 9.373724214210059e-07, "loss": 0.0129, "step": 245150 }, { "epoch": 2.619370692878893, "grad_norm": 3.8167483806610107, "learning_rate": 9.37364279749909e-07, "loss": 0.0293, "step": 245160 }, { "epoch": 2.619477536193173, "grad_norm": 0.035137154161930084, "learning_rate": 9.373561375849949e-07, "loss": 0.0091, "step": 245170 }, { "epoch": 2.6195843795074523, "grad_norm": 2.1641619205474854, "learning_rate": 9.373479949262726e-07, "loss": 0.0191, "step": 245180 }, { "epoch": 2.619691222821732, "grad_norm": 6.816671371459961, "learning_rate": 9.373398517737514e-07, "loss": 0.0173, "step": 245190 }, { "epoch": 2.6197980661360116, "grad_norm": 1.0367991924285889, "learning_rate": 9.373317081274406e-07, "loss": 0.0218, "step": 245200 }, { "epoch": 2.619904909450291, "grad_norm": 1.3091844320297241, "learning_rate": 9.373235639873492e-07, "loss": 0.0312, "step": 245210 }, { "epoch": 2.6200117527645705, "grad_norm": 0.35665225982666016, "learning_rate": 9.373154193534865e-07, "loss": 0.0025, "step": 245220 }, { "epoch": 2.6201185960788504, "grad_norm": 3.73140811920166, "learning_rate": 9.373072742258617e-07, "loss": 0.0083, "step": 245230 }, { "epoch": 2.62022543939313, "grad_norm": 0.12725722789764404, "learning_rate": 9.372991286044838e-07, "loss": 0.0456, "step": 245240 }, { "epoch": 2.62033228270741, "grad_norm": 13.121856689453125, "learning_rate": 9.372909824893625e-07, "loss": 0.0386, "step": 245250 }, { "epoch": 2.6204391260216893, "grad_norm": 0.2528246343135834, "learning_rate": 9.372828358805065e-07, "loss": 0.0064, "step": 245260 }, { "epoch": 2.6205459693359687, "grad_norm": 1.2647031545639038, "learning_rate": 9.37274688777925e-07, "loss": 0.0047, "step": 245270 }, { "epoch": 2.620652812650248, "grad_norm": 7.45784854888916, "learning_rate": 9.372665411816277e-07, "loss": 0.0313, "step": 245280 }, { "epoch": 2.620759655964528, "grad_norm": 3.5131707191467285, "learning_rate": 9.372583930916234e-07, "loss": 0.009, "step": 245290 }, { "epoch": 2.6208664992788075, "grad_norm": 0.007295339368283749, "learning_rate": 9.372502445079212e-07, "loss": 0.0042, "step": 245300 }, { "epoch": 2.6209733425930875, "grad_norm": 0.7884068489074707, "learning_rate": 9.372420954305307e-07, "loss": 0.0601, "step": 245310 }, { "epoch": 2.621080185907367, "grad_norm": 3.6158478260040283, "learning_rate": 9.372339458594607e-07, "loss": 0.0069, "step": 245320 }, { "epoch": 2.6211870292216464, "grad_norm": 12.917964935302734, "learning_rate": 9.372257957947207e-07, "loss": 0.1063, "step": 245330 }, { "epoch": 2.621293872535926, "grad_norm": 3.4071054458618164, "learning_rate": 9.372176452363196e-07, "loss": 0.0412, "step": 245340 }, { "epoch": 2.6214007158502057, "grad_norm": 0.06915412843227386, "learning_rate": 9.37209494184267e-07, "loss": 0.0239, "step": 245350 }, { "epoch": 2.621507559164485, "grad_norm": 0.36143916845321655, "learning_rate": 9.372013426385718e-07, "loss": 0.0222, "step": 245360 }, { "epoch": 2.621614402478765, "grad_norm": 0.3735795021057129, "learning_rate": 9.371931905992432e-07, "loss": 0.034, "step": 245370 }, { "epoch": 2.6217212457930446, "grad_norm": 0.0078117516823112965, "learning_rate": 9.371850380662905e-07, "loss": 0.009, "step": 245380 }, { "epoch": 2.621828089107324, "grad_norm": 0.8251190781593323, "learning_rate": 9.37176885039723e-07, "loss": 0.0158, "step": 245390 }, { "epoch": 2.6219349324216035, "grad_norm": 0.4519728720188141, "learning_rate": 9.371687315195498e-07, "loss": 0.028, "step": 245400 }, { "epoch": 2.6220417757358834, "grad_norm": 5.919281482696533, "learning_rate": 9.3716057750578e-07, "loss": 0.0383, "step": 245410 }, { "epoch": 2.622148619050163, "grad_norm": 6.2836713790893555, "learning_rate": 9.37152422998423e-07, "loss": 0.0305, "step": 245420 }, { "epoch": 2.6222554623644427, "grad_norm": 0.17069955170154572, "learning_rate": 9.371442679974879e-07, "loss": 0.0359, "step": 245430 }, { "epoch": 2.622362305678722, "grad_norm": 0.011758624576032162, "learning_rate": 9.37136112502984e-07, "loss": 0.0337, "step": 245440 }, { "epoch": 2.6224691489930017, "grad_norm": 0.9676700234413147, "learning_rate": 9.371279565149203e-07, "loss": 0.0152, "step": 245450 }, { "epoch": 2.622575992307281, "grad_norm": 0.034345462918281555, "learning_rate": 9.371198000333062e-07, "loss": 0.0034, "step": 245460 }, { "epoch": 2.622682835621561, "grad_norm": 0.015477346256375313, "learning_rate": 9.371116430581509e-07, "loss": 0.0067, "step": 245470 }, { "epoch": 2.6227896789358405, "grad_norm": 2.3654778003692627, "learning_rate": 9.371034855894635e-07, "loss": 0.0269, "step": 245480 }, { "epoch": 2.6228965222501204, "grad_norm": 3.5339910984039307, "learning_rate": 9.370953276272533e-07, "loss": 0.0471, "step": 245490 }, { "epoch": 2.6230033655644, "grad_norm": 0.21702240407466888, "learning_rate": 9.370871691715295e-07, "loss": 0.0145, "step": 245500 }, { "epoch": 2.6231102088786793, "grad_norm": 0.5095475912094116, "learning_rate": 9.370790102223013e-07, "loss": 0.0061, "step": 245510 }, { "epoch": 2.6232170521929588, "grad_norm": 0.004976659081876278, "learning_rate": 9.370708507795779e-07, "loss": 0.1033, "step": 245520 }, { "epoch": 2.6233238955072387, "grad_norm": 0.2764444947242737, "learning_rate": 9.370626908433685e-07, "loss": 0.0357, "step": 245530 }, { "epoch": 2.623430738821518, "grad_norm": 0.004008674528449774, "learning_rate": 9.370545304136823e-07, "loss": 0.0247, "step": 245540 }, { "epoch": 2.623537582135798, "grad_norm": 0.1702091544866562, "learning_rate": 9.370463694905286e-07, "loss": 0.0183, "step": 245550 }, { "epoch": 2.6236444254500775, "grad_norm": 3.054949998855591, "learning_rate": 9.370382080739166e-07, "loss": 0.0201, "step": 245560 }, { "epoch": 2.623751268764357, "grad_norm": 0.3919515311717987, "learning_rate": 9.370300461638554e-07, "loss": 0.0313, "step": 245570 }, { "epoch": 2.623858112078637, "grad_norm": 1.2580766677856445, "learning_rate": 9.370218837603544e-07, "loss": 0.0088, "step": 245580 }, { "epoch": 2.6239649553929163, "grad_norm": 0.7889396548271179, "learning_rate": 9.370137208634227e-07, "loss": 0.0633, "step": 245590 }, { "epoch": 2.6240717987071958, "grad_norm": 5.096667766571045, "learning_rate": 9.370055574730694e-07, "loss": 0.0112, "step": 245600 }, { "epoch": 2.6241786420214757, "grad_norm": 0.21776440739631653, "learning_rate": 9.36997393589304e-07, "loss": 0.0034, "step": 245610 }, { "epoch": 2.624285485335755, "grad_norm": 0.06468655914068222, "learning_rate": 9.369892292121354e-07, "loss": 0.0417, "step": 245620 }, { "epoch": 2.6243923286500346, "grad_norm": 0.58452969789505, "learning_rate": 9.369810643415731e-07, "loss": 0.0177, "step": 245630 }, { "epoch": 2.6244991719643145, "grad_norm": 0.07601296156644821, "learning_rate": 9.369728989776262e-07, "loss": 0.0027, "step": 245640 }, { "epoch": 2.624606015278594, "grad_norm": 5.15266752243042, "learning_rate": 9.369647331203038e-07, "loss": 0.0057, "step": 245650 }, { "epoch": 2.6247128585928734, "grad_norm": 0.33561310172080994, "learning_rate": 9.369565667696153e-07, "loss": 0.0051, "step": 245660 }, { "epoch": 2.6248197019071533, "grad_norm": 0.002018360188230872, "learning_rate": 9.369483999255699e-07, "loss": 0.0142, "step": 245670 }, { "epoch": 2.6249265452214328, "grad_norm": 1.7878620624542236, "learning_rate": 9.369402325881767e-07, "loss": 0.0261, "step": 245680 }, { "epoch": 2.625033388535712, "grad_norm": 3.259478807449341, "learning_rate": 9.369320647574452e-07, "loss": 0.0073, "step": 245690 }, { "epoch": 2.625140231849992, "grad_norm": 0.018752148374915123, "learning_rate": 9.369238964333841e-07, "loss": 0.0258, "step": 245700 }, { "epoch": 2.6252470751642716, "grad_norm": 0.01943514123558998, "learning_rate": 9.369157276160031e-07, "loss": 0.0664, "step": 245710 }, { "epoch": 2.625353918478551, "grad_norm": 0.42762407660484314, "learning_rate": 9.369075583053114e-07, "loss": 0.014, "step": 245720 }, { "epoch": 2.625460761792831, "grad_norm": 0.18608570098876953, "learning_rate": 9.368993885013179e-07, "loss": 0.0054, "step": 245730 }, { "epoch": 2.6255676051071104, "grad_norm": 9.820305824279785, "learning_rate": 9.368912182040321e-07, "loss": 0.0298, "step": 245740 }, { "epoch": 2.62567444842139, "grad_norm": 6.975128650665283, "learning_rate": 9.368830474134632e-07, "loss": 0.0548, "step": 245750 }, { "epoch": 2.6257812917356698, "grad_norm": 0.039137423038482666, "learning_rate": 9.368748761296204e-07, "loss": 0.0033, "step": 245760 }, { "epoch": 2.6258881350499492, "grad_norm": 2.175065040588379, "learning_rate": 9.368667043525127e-07, "loss": 0.0071, "step": 245770 }, { "epoch": 2.625994978364229, "grad_norm": 0.06571055203676224, "learning_rate": 9.368585320821496e-07, "loss": 0.0077, "step": 245780 }, { "epoch": 2.6261018216785086, "grad_norm": 1.664047360420227, "learning_rate": 9.368503593185402e-07, "loss": 0.0321, "step": 245790 }, { "epoch": 2.626208664992788, "grad_norm": 0.42087510228157043, "learning_rate": 9.368421860616939e-07, "loss": 0.0187, "step": 245800 }, { "epoch": 2.6263155083070675, "grad_norm": 0.2103063464164734, "learning_rate": 9.368340123116196e-07, "loss": 0.0264, "step": 245810 }, { "epoch": 2.6264223516213474, "grad_norm": 0.24023443460464478, "learning_rate": 9.36825838068327e-07, "loss": 0.0149, "step": 245820 }, { "epoch": 2.626529194935627, "grad_norm": 7.035926342010498, "learning_rate": 9.368176633318249e-07, "loss": 0.0397, "step": 245830 }, { "epoch": 2.6266360382499068, "grad_norm": 0.042157288640737534, "learning_rate": 9.368094881021227e-07, "loss": 0.0146, "step": 245840 }, { "epoch": 2.6267428815641862, "grad_norm": 1.2928136587142944, "learning_rate": 9.368013123792295e-07, "loss": 0.0198, "step": 245850 }, { "epoch": 2.6268497248784657, "grad_norm": 0.3378508985042572, "learning_rate": 9.367931361631549e-07, "loss": 0.0115, "step": 245860 }, { "epoch": 2.626956568192745, "grad_norm": 0.003564115846529603, "learning_rate": 9.367849594539077e-07, "loss": 0.0344, "step": 245870 }, { "epoch": 2.627063411507025, "grad_norm": 1.110304832458496, "learning_rate": 9.367767822514973e-07, "loss": 0.0057, "step": 245880 }, { "epoch": 2.6271702548213045, "grad_norm": 0.013193180784583092, "learning_rate": 9.367686045559331e-07, "loss": 0.0258, "step": 245890 }, { "epoch": 2.6272770981355844, "grad_norm": 0.3778177797794342, "learning_rate": 9.367604263672242e-07, "loss": 0.0227, "step": 245900 }, { "epoch": 2.627383941449864, "grad_norm": 0.030588964000344276, "learning_rate": 9.367522476853797e-07, "loss": 0.0058, "step": 245910 }, { "epoch": 2.6274907847641433, "grad_norm": 0.6216283440589905, "learning_rate": 9.36744068510409e-07, "loss": 0.0182, "step": 245920 }, { "epoch": 2.627597628078423, "grad_norm": 1.7848587036132812, "learning_rate": 9.367358888423212e-07, "loss": 0.0134, "step": 245930 }, { "epoch": 2.6277044713927027, "grad_norm": 0.03492826595902443, "learning_rate": 9.367277086811258e-07, "loss": 0.0092, "step": 245940 }, { "epoch": 2.627811314706982, "grad_norm": 5.467145919799805, "learning_rate": 9.367195280268316e-07, "loss": 0.0099, "step": 245950 }, { "epoch": 2.627918158021262, "grad_norm": 1.927686095237732, "learning_rate": 9.367113468794483e-07, "loss": 0.007, "step": 245960 }, { "epoch": 2.6280250013355415, "grad_norm": 2.029134511947632, "learning_rate": 9.367031652389849e-07, "loss": 0.0162, "step": 245970 }, { "epoch": 2.628131844649821, "grad_norm": 0.007937675341963768, "learning_rate": 9.366949831054506e-07, "loss": 0.0057, "step": 245980 }, { "epoch": 2.6282386879641004, "grad_norm": 3.306845188140869, "learning_rate": 9.366868004788547e-07, "loss": 0.0054, "step": 245990 }, { "epoch": 2.6283455312783803, "grad_norm": 4.857974529266357, "learning_rate": 9.366786173592065e-07, "loss": 0.0205, "step": 246000 }, { "epoch": 2.62845237459266, "grad_norm": 13.657488822937012, "learning_rate": 9.366704337465151e-07, "loss": 0.0383, "step": 246010 }, { "epoch": 2.6285592179069397, "grad_norm": 0.03479475900530815, "learning_rate": 9.366622496407899e-07, "loss": 0.0605, "step": 246020 }, { "epoch": 2.628666061221219, "grad_norm": 0.545676589012146, "learning_rate": 9.3665406504204e-07, "loss": 0.006, "step": 246030 }, { "epoch": 2.6287729045354986, "grad_norm": 0.020702572539448738, "learning_rate": 9.366458799502748e-07, "loss": 0.0047, "step": 246040 }, { "epoch": 2.628879747849778, "grad_norm": 2.3247528076171875, "learning_rate": 9.366376943655034e-07, "loss": 0.0224, "step": 246050 }, { "epoch": 2.628986591164058, "grad_norm": 0.7007853984832764, "learning_rate": 9.366295082877351e-07, "loss": 0.0167, "step": 246060 }, { "epoch": 2.6290934344783374, "grad_norm": 0.333658367395401, "learning_rate": 9.366213217169791e-07, "loss": 0.0594, "step": 246070 }, { "epoch": 2.6292002777926173, "grad_norm": 0.002973301336169243, "learning_rate": 9.366131346532447e-07, "loss": 0.0206, "step": 246080 }, { "epoch": 2.629307121106897, "grad_norm": 5.112250804901123, "learning_rate": 9.36604947096541e-07, "loss": 0.0352, "step": 246090 }, { "epoch": 2.6294139644211763, "grad_norm": 8.036201477050781, "learning_rate": 9.365967590468775e-07, "loss": 0.023, "step": 246100 }, { "epoch": 2.6295208077354557, "grad_norm": 0.008502143435180187, "learning_rate": 9.365885705042633e-07, "loss": 0.0032, "step": 246110 }, { "epoch": 2.6296276510497356, "grad_norm": 1.720937728881836, "learning_rate": 9.365803814687076e-07, "loss": 0.0213, "step": 246120 }, { "epoch": 2.629734494364015, "grad_norm": 9.832714080810547, "learning_rate": 9.365721919402199e-07, "loss": 0.0421, "step": 246130 }, { "epoch": 2.629841337678295, "grad_norm": 5.60292387008667, "learning_rate": 9.365640019188091e-07, "loss": 0.0197, "step": 246140 }, { "epoch": 2.6299481809925744, "grad_norm": 0.07070393860340118, "learning_rate": 9.365558114044845e-07, "loss": 0.0118, "step": 246150 }, { "epoch": 2.630055024306854, "grad_norm": 10.53717041015625, "learning_rate": 9.365476203972554e-07, "loss": 0.0088, "step": 246160 }, { "epoch": 2.6301618676211334, "grad_norm": 5.97097110748291, "learning_rate": 9.365394288971312e-07, "loss": 0.0205, "step": 246170 }, { "epoch": 2.6302687109354133, "grad_norm": 1.3094534873962402, "learning_rate": 9.365312369041209e-07, "loss": 0.0249, "step": 246180 }, { "epoch": 2.6303755542496927, "grad_norm": 2.7903482913970947, "learning_rate": 9.365230444182342e-07, "loss": 0.0268, "step": 246190 }, { "epoch": 2.6304823975639726, "grad_norm": 0.3003943860530853, "learning_rate": 9.365148514394798e-07, "loss": 0.0235, "step": 246200 }, { "epoch": 2.630589240878252, "grad_norm": 0.07856674492359161, "learning_rate": 9.365066579678673e-07, "loss": 0.0109, "step": 246210 }, { "epoch": 2.6306960841925315, "grad_norm": 0.032582905143499374, "learning_rate": 9.364984640034057e-07, "loss": 0.0225, "step": 246220 }, { "epoch": 2.630802927506811, "grad_norm": 1.573488712310791, "learning_rate": 9.364902695461046e-07, "loss": 0.0218, "step": 246230 }, { "epoch": 2.630909770821091, "grad_norm": 0.15455590188503265, "learning_rate": 9.364820745959728e-07, "loss": 0.0339, "step": 246240 }, { "epoch": 2.6310166141353704, "grad_norm": 0.01913018897175789, "learning_rate": 9.364738791530201e-07, "loss": 0.0252, "step": 246250 }, { "epoch": 2.6311234574496503, "grad_norm": 0.6520862579345703, "learning_rate": 9.36465683217255e-07, "loss": 0.0044, "step": 246260 }, { "epoch": 2.6312303007639297, "grad_norm": 1.8651058673858643, "learning_rate": 9.364574867886876e-07, "loss": 0.0262, "step": 246270 }, { "epoch": 2.631337144078209, "grad_norm": 0.04898238927125931, "learning_rate": 9.364492898673266e-07, "loss": 0.0052, "step": 246280 }, { "epoch": 2.631443987392489, "grad_norm": 0.007503949571400881, "learning_rate": 9.364410924531814e-07, "loss": 0.0239, "step": 246290 }, { "epoch": 2.6315508307067685, "grad_norm": 0.9644738435745239, "learning_rate": 9.364328945462614e-07, "loss": 0.0232, "step": 246300 }, { "epoch": 2.631657674021048, "grad_norm": 1.3507335186004639, "learning_rate": 9.364246961465756e-07, "loss": 0.0256, "step": 246310 }, { "epoch": 2.631764517335328, "grad_norm": 0.04165215417742729, "learning_rate": 9.364164972541332e-07, "loss": 0.0068, "step": 246320 }, { "epoch": 2.6318713606496074, "grad_norm": 0.08468633890151978, "learning_rate": 9.364082978689441e-07, "loss": 0.0078, "step": 246330 }, { "epoch": 2.631978203963887, "grad_norm": 4.0047526359558105, "learning_rate": 9.364000979910167e-07, "loss": 0.006, "step": 246340 }, { "epoch": 2.6320850472781667, "grad_norm": 0.7341569662094116, "learning_rate": 9.363918976203608e-07, "loss": 0.0198, "step": 246350 }, { "epoch": 2.632191890592446, "grad_norm": 0.40532007813453674, "learning_rate": 9.363836967569855e-07, "loss": 0.005, "step": 246360 }, { "epoch": 2.6322987339067256, "grad_norm": 0.02471858635544777, "learning_rate": 9.363754954009e-07, "loss": 0.0207, "step": 246370 }, { "epoch": 2.6324055772210055, "grad_norm": 0.2926136553287506, "learning_rate": 9.363672935521137e-07, "loss": 0.006, "step": 246380 }, { "epoch": 2.632512420535285, "grad_norm": 3.923783540725708, "learning_rate": 9.363590912106358e-07, "loss": 0.0465, "step": 246390 }, { "epoch": 2.6326192638495645, "grad_norm": 1.9094314575195312, "learning_rate": 9.363508883764755e-07, "loss": 0.015, "step": 246400 }, { "epoch": 2.6327261071638444, "grad_norm": 8.670889854431152, "learning_rate": 9.363426850496422e-07, "loss": 0.0093, "step": 246410 }, { "epoch": 2.632832950478124, "grad_norm": 0.061848949640989304, "learning_rate": 9.363344812301451e-07, "loss": 0.0422, "step": 246420 }, { "epoch": 2.6329397937924033, "grad_norm": 1.157334327697754, "learning_rate": 9.363262769179934e-07, "loss": 0.0082, "step": 246430 }, { "epoch": 2.633046637106683, "grad_norm": 14.285404205322266, "learning_rate": 9.363180721131963e-07, "loss": 0.0163, "step": 246440 }, { "epoch": 2.6331534804209626, "grad_norm": 3.3629000186920166, "learning_rate": 9.363098668157632e-07, "loss": 0.0422, "step": 246450 }, { "epoch": 2.633260323735242, "grad_norm": 1.306181788444519, "learning_rate": 9.363016610257035e-07, "loss": 0.0095, "step": 246460 }, { "epoch": 2.633367167049522, "grad_norm": 1.7619367837905884, "learning_rate": 9.362934547430261e-07, "loss": 0.0055, "step": 246470 }, { "epoch": 2.6334740103638015, "grad_norm": 0.012426390312612057, "learning_rate": 9.362852479677405e-07, "loss": 0.0101, "step": 246480 }, { "epoch": 2.6335808536780814, "grad_norm": 0.0061051445081830025, "learning_rate": 9.362770406998559e-07, "loss": 0.0069, "step": 246490 }, { "epoch": 2.633687696992361, "grad_norm": 11.602360725402832, "learning_rate": 9.362688329393817e-07, "loss": 0.0305, "step": 246500 }, { "epoch": 2.6337945403066403, "grad_norm": 10.885615348815918, "learning_rate": 9.362606246863269e-07, "loss": 0.0443, "step": 246510 }, { "epoch": 2.6339013836209197, "grad_norm": 0.06837518513202667, "learning_rate": 9.36252415940701e-07, "loss": 0.0307, "step": 246520 }, { "epoch": 2.6340082269351996, "grad_norm": 0.05903751030564308, "learning_rate": 9.362442067025133e-07, "loss": 0.0375, "step": 246530 }, { "epoch": 2.634115070249479, "grad_norm": 1.5855622291564941, "learning_rate": 9.36235996971773e-07, "loss": 0.0136, "step": 246540 }, { "epoch": 2.634221913563759, "grad_norm": 1.897806167602539, "learning_rate": 9.362277867484892e-07, "loss": 0.019, "step": 246550 }, { "epoch": 2.6343287568780385, "grad_norm": 1.756333589553833, "learning_rate": 9.362195760326713e-07, "loss": 0.0133, "step": 246560 }, { "epoch": 2.634435600192318, "grad_norm": 0.03990575671195984, "learning_rate": 9.362113648243286e-07, "loss": 0.0121, "step": 246570 }, { "epoch": 2.6345424435065974, "grad_norm": 0.07589491456747055, "learning_rate": 9.362031531234704e-07, "loss": 0.0071, "step": 246580 }, { "epoch": 2.6346492868208773, "grad_norm": 0.6061801910400391, "learning_rate": 9.361949409301059e-07, "loss": 0.0159, "step": 246590 }, { "epoch": 2.6347561301351567, "grad_norm": 0.11980878561735153, "learning_rate": 9.361867282442443e-07, "loss": 0.0171, "step": 246600 }, { "epoch": 2.6348629734494367, "grad_norm": 12.19171142578125, "learning_rate": 9.361785150658952e-07, "loss": 0.0469, "step": 246610 }, { "epoch": 2.634969816763716, "grad_norm": 0.017988568171858788, "learning_rate": 9.361703013950674e-07, "loss": 0.0162, "step": 246620 }, { "epoch": 2.6350766600779956, "grad_norm": 0.0017649948131293058, "learning_rate": 9.361620872317705e-07, "loss": 0.0124, "step": 246630 }, { "epoch": 2.635183503392275, "grad_norm": 0.3865393102169037, "learning_rate": 9.361538725760136e-07, "loss": 0.0276, "step": 246640 }, { "epoch": 2.635290346706555, "grad_norm": 0.5711120367050171, "learning_rate": 9.361456574278061e-07, "loss": 0.0235, "step": 246650 }, { "epoch": 2.6353971900208344, "grad_norm": 5.1166768074035645, "learning_rate": 9.361374417871572e-07, "loss": 0.0119, "step": 246660 }, { "epoch": 2.6355040333351143, "grad_norm": 3.880864381790161, "learning_rate": 9.361292256540764e-07, "loss": 0.037, "step": 246670 }, { "epoch": 2.6356108766493938, "grad_norm": 17.742372512817383, "learning_rate": 9.361210090285726e-07, "loss": 0.0259, "step": 246680 }, { "epoch": 2.635717719963673, "grad_norm": 0.04015440121293068, "learning_rate": 9.361127919106553e-07, "loss": 0.0262, "step": 246690 }, { "epoch": 2.6358245632779527, "grad_norm": 0.1101013645529747, "learning_rate": 9.361045743003338e-07, "loss": 0.0284, "step": 246700 }, { "epoch": 2.6359314065922326, "grad_norm": 0.06341677159070969, "learning_rate": 9.360963561976173e-07, "loss": 0.0095, "step": 246710 }, { "epoch": 2.636038249906512, "grad_norm": 0.005532062612473965, "learning_rate": 9.360881376025152e-07, "loss": 0.0086, "step": 246720 }, { "epoch": 2.636145093220792, "grad_norm": 5.882631301879883, "learning_rate": 9.360799185150365e-07, "loss": 0.0396, "step": 246730 }, { "epoch": 2.6362519365350714, "grad_norm": 2.5722367763519287, "learning_rate": 9.360716989351907e-07, "loss": 0.0304, "step": 246740 }, { "epoch": 2.636358779849351, "grad_norm": 1.4064152240753174, "learning_rate": 9.360634788629871e-07, "loss": 0.0116, "step": 246750 }, { "epoch": 2.6364656231636303, "grad_norm": 11.76010799407959, "learning_rate": 9.360552582984349e-07, "loss": 0.0069, "step": 246760 }, { "epoch": 2.63657246647791, "grad_norm": 0.7539443373680115, "learning_rate": 9.360470372415435e-07, "loss": 0.0121, "step": 246770 }, { "epoch": 2.6366793097921897, "grad_norm": 0.036762721836566925, "learning_rate": 9.360388156923217e-07, "loss": 0.0092, "step": 246780 }, { "epoch": 2.6367861531064696, "grad_norm": 3.5486645698547363, "learning_rate": 9.360305936507795e-07, "loss": 0.002, "step": 246790 }, { "epoch": 2.636892996420749, "grad_norm": 0.0006429117638617754, "learning_rate": 9.36022371116926e-07, "loss": 0.0225, "step": 246800 }, { "epoch": 2.6369998397350285, "grad_norm": 2.8514933586120605, "learning_rate": 9.360141480907699e-07, "loss": 0.0112, "step": 246810 }, { "epoch": 2.637106683049308, "grad_norm": 0.274380624294281, "learning_rate": 9.360059245723213e-07, "loss": 0.0131, "step": 246820 }, { "epoch": 2.637213526363588, "grad_norm": 0.2058081030845642, "learning_rate": 9.359977005615889e-07, "loss": 0.0078, "step": 246830 }, { "epoch": 2.6373203696778673, "grad_norm": 0.011834830977022648, "learning_rate": 9.359894760585822e-07, "loss": 0.0057, "step": 246840 }, { "epoch": 2.637427212992147, "grad_norm": 2.9959261417388916, "learning_rate": 9.359812510633105e-07, "loss": 0.007, "step": 246850 }, { "epoch": 2.6375340563064267, "grad_norm": 0.06603314727544785, "learning_rate": 9.359730255757831e-07, "loss": 0.0013, "step": 246860 }, { "epoch": 2.637640899620706, "grad_norm": 1.906250238418579, "learning_rate": 9.359647995960092e-07, "loss": 0.024, "step": 246870 }, { "epoch": 2.6377477429349856, "grad_norm": 3.6074864864349365, "learning_rate": 9.359565731239982e-07, "loss": 0.0064, "step": 246880 }, { "epoch": 2.6378545862492655, "grad_norm": 0.009176385588943958, "learning_rate": 9.359483461597594e-07, "loss": 0.007, "step": 246890 }, { "epoch": 2.637961429563545, "grad_norm": 1.0045279264450073, "learning_rate": 9.359401187033018e-07, "loss": 0.0013, "step": 246900 }, { "epoch": 2.638068272877825, "grad_norm": 0.9161716103553772, "learning_rate": 9.359318907546351e-07, "loss": 0.0093, "step": 246910 }, { "epoch": 2.6381751161921043, "grad_norm": 0.46726953983306885, "learning_rate": 9.359236623137683e-07, "loss": 0.0191, "step": 246920 }, { "epoch": 2.638281959506384, "grad_norm": 0.03979460150003433, "learning_rate": 9.359154333807109e-07, "loss": 0.0175, "step": 246930 }, { "epoch": 2.6383888028206632, "grad_norm": 0.010567820630967617, "learning_rate": 9.359072039554719e-07, "loss": 0.0179, "step": 246940 }, { "epoch": 2.638495646134943, "grad_norm": 0.9375137090682983, "learning_rate": 9.358989740380607e-07, "loss": 0.0378, "step": 246950 }, { "epoch": 2.6386024894492226, "grad_norm": 0.7767987847328186, "learning_rate": 9.358907436284869e-07, "loss": 0.0214, "step": 246960 }, { "epoch": 2.6387093327635025, "grad_norm": 0.235316663980484, "learning_rate": 9.358825127267595e-07, "loss": 0.013, "step": 246970 }, { "epoch": 2.638816176077782, "grad_norm": 0.024185681715607643, "learning_rate": 9.358742813328878e-07, "loss": 0.0217, "step": 246980 }, { "epoch": 2.6389230193920614, "grad_norm": 0.1834208220243454, "learning_rate": 9.358660494468811e-07, "loss": 0.0033, "step": 246990 }, { "epoch": 2.639029862706341, "grad_norm": 0.0021956576965749264, "learning_rate": 9.358578170687488e-07, "loss": 0.0035, "step": 247000 }, { "epoch": 2.639136706020621, "grad_norm": 17.892080307006836, "learning_rate": 9.358495841985e-07, "loss": 0.0448, "step": 247010 }, { "epoch": 2.6392435493349002, "grad_norm": 0.17332564294338226, "learning_rate": 9.358413508361442e-07, "loss": 0.0546, "step": 247020 }, { "epoch": 2.63935039264918, "grad_norm": 2.076192617416382, "learning_rate": 9.358331169816906e-07, "loss": 0.0184, "step": 247030 }, { "epoch": 2.6394572359634596, "grad_norm": 0.09394465386867523, "learning_rate": 9.358248826351487e-07, "loss": 0.0108, "step": 247040 }, { "epoch": 2.639564079277739, "grad_norm": 1.4982045888900757, "learning_rate": 9.358166477965275e-07, "loss": 0.0129, "step": 247050 }, { "epoch": 2.639670922592019, "grad_norm": 6.287858963012695, "learning_rate": 9.358084124658363e-07, "loss": 0.0315, "step": 247060 }, { "epoch": 2.6397777659062984, "grad_norm": 0.08498654514551163, "learning_rate": 9.358001766430847e-07, "loss": 0.0293, "step": 247070 }, { "epoch": 2.639884609220578, "grad_norm": 1.017566442489624, "learning_rate": 9.357919403282817e-07, "loss": 0.0371, "step": 247080 }, { "epoch": 2.639991452534858, "grad_norm": 0.6407480239868164, "learning_rate": 9.357837035214366e-07, "loss": 0.0073, "step": 247090 }, { "epoch": 2.6400982958491372, "grad_norm": 0.016905637457966805, "learning_rate": 9.35775466222559e-07, "loss": 0.0065, "step": 247100 }, { "epoch": 2.6402051391634167, "grad_norm": 0.03338641673326492, "learning_rate": 9.357672284316579e-07, "loss": 0.0197, "step": 247110 }, { "epoch": 2.6403119824776966, "grad_norm": 0.07984866946935654, "learning_rate": 9.357589901487427e-07, "loss": 0.02, "step": 247120 }, { "epoch": 2.640418825791976, "grad_norm": 1.5863590240478516, "learning_rate": 9.357507513738227e-07, "loss": 0.0127, "step": 247130 }, { "epoch": 2.6405256691062555, "grad_norm": 1.305799961090088, "learning_rate": 9.357425121069074e-07, "loss": 0.0227, "step": 247140 }, { "epoch": 2.6406325124205354, "grad_norm": 0.4965030252933502, "learning_rate": 9.357342723480057e-07, "loss": 0.0463, "step": 247150 }, { "epoch": 2.640739355734815, "grad_norm": 0.18569304049015045, "learning_rate": 9.357260320971272e-07, "loss": 0.0288, "step": 247160 }, { "epoch": 2.6408461990490943, "grad_norm": 1.7166675329208374, "learning_rate": 9.357177913542811e-07, "loss": 0.009, "step": 247170 }, { "epoch": 2.6409530423633742, "grad_norm": 0.015663789585232735, "learning_rate": 9.357095501194769e-07, "loss": 0.0444, "step": 247180 }, { "epoch": 2.6410598856776537, "grad_norm": 2.84372878074646, "learning_rate": 9.357013083927234e-07, "loss": 0.0084, "step": 247190 }, { "epoch": 2.641166728991933, "grad_norm": 0.7558908462524414, "learning_rate": 9.356930661740306e-07, "loss": 0.0041, "step": 247200 }, { "epoch": 2.641273572306213, "grad_norm": 3.574406147003174, "learning_rate": 9.356848234634072e-07, "loss": 0.0164, "step": 247210 }, { "epoch": 2.6413804156204925, "grad_norm": 0.12518903613090515, "learning_rate": 9.356765802608628e-07, "loss": 0.0533, "step": 247220 }, { "epoch": 2.641487258934772, "grad_norm": 0.08936551213264465, "learning_rate": 9.356683365664067e-07, "loss": 0.0225, "step": 247230 }, { "epoch": 2.641594102249052, "grad_norm": 2.906240463256836, "learning_rate": 9.356600923800482e-07, "loss": 0.0109, "step": 247240 }, { "epoch": 2.6417009455633313, "grad_norm": 0.06976192444562912, "learning_rate": 9.356518477017965e-07, "loss": 0.011, "step": 247250 }, { "epoch": 2.6418077888776113, "grad_norm": 0.029216667637228966, "learning_rate": 9.35643602531661e-07, "loss": 0.0052, "step": 247260 }, { "epoch": 2.6419146321918907, "grad_norm": 0.03582625091075897, "learning_rate": 9.35635356869651e-07, "loss": 0.0061, "step": 247270 }, { "epoch": 2.64202147550617, "grad_norm": 1.597381353378296, "learning_rate": 9.356271107157758e-07, "loss": 0.0673, "step": 247280 }, { "epoch": 2.6421283188204496, "grad_norm": 4.8639349937438965, "learning_rate": 9.356188640700447e-07, "loss": 0.0466, "step": 247290 }, { "epoch": 2.6422351621347295, "grad_norm": 2.3793492317199707, "learning_rate": 9.356106169324671e-07, "loss": 0.0196, "step": 247300 }, { "epoch": 2.642342005449009, "grad_norm": 0.5451313853263855, "learning_rate": 9.356023693030522e-07, "loss": 0.013, "step": 247310 }, { "epoch": 2.642448848763289, "grad_norm": 2.108057975769043, "learning_rate": 9.355941211818092e-07, "loss": 0.0071, "step": 247320 }, { "epoch": 2.6425556920775684, "grad_norm": 1.4364063739776611, "learning_rate": 9.355858725687478e-07, "loss": 0.0156, "step": 247330 }, { "epoch": 2.642662535391848, "grad_norm": 0.1288233995437622, "learning_rate": 9.35577623463877e-07, "loss": 0.0264, "step": 247340 }, { "epoch": 2.6427693787061273, "grad_norm": 2.248786211013794, "learning_rate": 9.355693738672062e-07, "loss": 0.034, "step": 247350 }, { "epoch": 2.642876222020407, "grad_norm": 0.03351324424147606, "learning_rate": 9.355611237787446e-07, "loss": 0.0514, "step": 247360 }, { "epoch": 2.6429830653346866, "grad_norm": 0.240199014544487, "learning_rate": 9.355528731985017e-07, "loss": 0.0449, "step": 247370 }, { "epoch": 2.6430899086489665, "grad_norm": 1.019105315208435, "learning_rate": 9.355446221264866e-07, "loss": 0.0089, "step": 247380 }, { "epoch": 2.643196751963246, "grad_norm": 0.2832081615924835, "learning_rate": 9.355363705627089e-07, "loss": 0.0216, "step": 247390 }, { "epoch": 2.6433035952775255, "grad_norm": 0.013864845968782902, "learning_rate": 9.355281185071777e-07, "loss": 0.0029, "step": 247400 }, { "epoch": 2.643410438591805, "grad_norm": 1.9164342880249023, "learning_rate": 9.355198659599024e-07, "loss": 0.0214, "step": 247410 }, { "epoch": 2.643517281906085, "grad_norm": 1.033423900604248, "learning_rate": 9.355116129208924e-07, "loss": 0.0044, "step": 247420 }, { "epoch": 2.6436241252203643, "grad_norm": 1.100607991218567, "learning_rate": 9.355033593901568e-07, "loss": 0.0166, "step": 247430 }, { "epoch": 2.643730968534644, "grad_norm": 3.142869234085083, "learning_rate": 9.354951053677051e-07, "loss": 0.0315, "step": 247440 }, { "epoch": 2.6438378118489236, "grad_norm": 0.010303900577127934, "learning_rate": 9.354868508535466e-07, "loss": 0.0175, "step": 247450 }, { "epoch": 2.643944655163203, "grad_norm": 4.526243209838867, "learning_rate": 9.354785958476903e-07, "loss": 0.018, "step": 247460 }, { "epoch": 2.6440514984774826, "grad_norm": 0.1353505700826645, "learning_rate": 9.35470340350146e-07, "loss": 0.0245, "step": 247470 }, { "epoch": 2.6441583417917625, "grad_norm": 0.036844126880168915, "learning_rate": 9.354620843609229e-07, "loss": 0.0199, "step": 247480 }, { "epoch": 2.644265185106042, "grad_norm": 0.03540235385298729, "learning_rate": 9.354538278800302e-07, "loss": 0.0022, "step": 247490 }, { "epoch": 2.644372028420322, "grad_norm": 0.057748544961214066, "learning_rate": 9.354455709074772e-07, "loss": 0.0159, "step": 247500 }, { "epoch": 2.6444788717346013, "grad_norm": 0.01978328265249729, "learning_rate": 9.354373134432733e-07, "loss": 0.0046, "step": 247510 }, { "epoch": 2.6445857150488807, "grad_norm": 0.1695004105567932, "learning_rate": 9.354290554874278e-07, "loss": 0.0232, "step": 247520 }, { "epoch": 2.64469255836316, "grad_norm": 6.270340919494629, "learning_rate": 9.354207970399501e-07, "loss": 0.0418, "step": 247530 }, { "epoch": 2.64479940167744, "grad_norm": 1.851527214050293, "learning_rate": 9.354125381008494e-07, "loss": 0.0184, "step": 247540 }, { "epoch": 2.6449062449917196, "grad_norm": 0.028582042083144188, "learning_rate": 9.35404278670135e-07, "loss": 0.0287, "step": 247550 }, { "epoch": 2.6450130883059995, "grad_norm": 0.007669192273169756, "learning_rate": 9.353960187478165e-07, "loss": 0.0313, "step": 247560 }, { "epoch": 2.645119931620279, "grad_norm": 0.08667688816785812, "learning_rate": 9.35387758333903e-07, "loss": 0.0292, "step": 247570 }, { "epoch": 2.6452267749345584, "grad_norm": 0.9839016199111938, "learning_rate": 9.353794974284037e-07, "loss": 0.0261, "step": 247580 }, { "epoch": 2.645333618248838, "grad_norm": 6.660000324249268, "learning_rate": 9.353712360313282e-07, "loss": 0.0111, "step": 247590 }, { "epoch": 2.6454404615631177, "grad_norm": 0.5330143570899963, "learning_rate": 9.353629741426856e-07, "loss": 0.0187, "step": 247600 }, { "epoch": 2.645547304877397, "grad_norm": 8.386309623718262, "learning_rate": 9.353547117624856e-07, "loss": 0.0121, "step": 247610 }, { "epoch": 2.645654148191677, "grad_norm": 1.262622594833374, "learning_rate": 9.35346448890737e-07, "loss": 0.0192, "step": 247620 }, { "epoch": 2.6457609915059566, "grad_norm": 0.4754849672317505, "learning_rate": 9.353381855274496e-07, "loss": 0.031, "step": 247630 }, { "epoch": 2.645867834820236, "grad_norm": 1.4914053678512573, "learning_rate": 9.353299216726324e-07, "loss": 0.0275, "step": 247640 }, { "epoch": 2.6459746781345155, "grad_norm": 0.7662565112113953, "learning_rate": 9.35321657326295e-07, "loss": 0.0159, "step": 247650 }, { "epoch": 2.6460815214487954, "grad_norm": 0.05711150914430618, "learning_rate": 9.353133924884465e-07, "loss": 0.0189, "step": 247660 }, { "epoch": 2.646188364763075, "grad_norm": 0.050833359360694885, "learning_rate": 9.353051271590962e-07, "loss": 0.0739, "step": 247670 }, { "epoch": 2.6462952080773547, "grad_norm": 1.4188902378082275, "learning_rate": 9.352968613382537e-07, "loss": 0.0187, "step": 247680 }, { "epoch": 2.646402051391634, "grad_norm": 3.799555540084839, "learning_rate": 9.352885950259282e-07, "loss": 0.0175, "step": 247690 }, { "epoch": 2.6465088947059137, "grad_norm": 0.22469627857208252, "learning_rate": 9.352803282221291e-07, "loss": 0.0096, "step": 247700 }, { "epoch": 2.646615738020193, "grad_norm": 0.8048427104949951, "learning_rate": 9.352720609268655e-07, "loss": 0.0121, "step": 247710 }, { "epoch": 2.646722581334473, "grad_norm": 0.11855729669332504, "learning_rate": 9.35263793140147e-07, "loss": 0.0081, "step": 247720 }, { "epoch": 2.6468294246487525, "grad_norm": 3.46170711517334, "learning_rate": 9.352555248619826e-07, "loss": 0.0324, "step": 247730 }, { "epoch": 2.6469362679630324, "grad_norm": 6.292686462402344, "learning_rate": 9.352472560923822e-07, "loss": 0.0147, "step": 247740 }, { "epoch": 2.647043111277312, "grad_norm": 1.5765037536621094, "learning_rate": 9.352389868313546e-07, "loss": 0.0349, "step": 247750 }, { "epoch": 2.6471499545915913, "grad_norm": 0.3128759264945984, "learning_rate": 9.352307170789094e-07, "loss": 0.0557, "step": 247760 }, { "epoch": 2.647256797905871, "grad_norm": 3.788804054260254, "learning_rate": 9.352224468350558e-07, "loss": 0.0323, "step": 247770 }, { "epoch": 2.6473636412201507, "grad_norm": 8.82220458984375, "learning_rate": 9.352141760998033e-07, "loss": 0.0237, "step": 247780 }, { "epoch": 2.64747048453443, "grad_norm": 1.8596659898757935, "learning_rate": 9.35205904873161e-07, "loss": 0.0144, "step": 247790 }, { "epoch": 2.64757732784871, "grad_norm": 6.467607498168945, "learning_rate": 9.351976331551386e-07, "loss": 0.0237, "step": 247800 }, { "epoch": 2.6476841711629895, "grad_norm": 0.02899838425219059, "learning_rate": 9.351893609457451e-07, "loss": 0.0009, "step": 247810 }, { "epoch": 2.647791014477269, "grad_norm": 0.06582693010568619, "learning_rate": 9.3518108824499e-07, "loss": 0.0263, "step": 247820 }, { "epoch": 2.647897857791549, "grad_norm": 0.03520350158214569, "learning_rate": 9.351728150528826e-07, "loss": 0.0158, "step": 247830 }, { "epoch": 2.6480047011058283, "grad_norm": 0.04920204356312752, "learning_rate": 9.351645413694322e-07, "loss": 0.0868, "step": 247840 }, { "epoch": 2.6481115444201078, "grad_norm": 10.998684883117676, "learning_rate": 9.351562671946482e-07, "loss": 0.0411, "step": 247850 }, { "epoch": 2.6482183877343877, "grad_norm": 0.5624253749847412, "learning_rate": 9.3514799252854e-07, "loss": 0.0223, "step": 247860 }, { "epoch": 2.648325231048667, "grad_norm": 0.011065009981393814, "learning_rate": 9.351397173711168e-07, "loss": 0.0063, "step": 247870 }, { "epoch": 2.6484320743629466, "grad_norm": 7.074224948883057, "learning_rate": 9.351314417223881e-07, "loss": 0.0322, "step": 247880 }, { "epoch": 2.6485389176772265, "grad_norm": 0.2378264218568802, "learning_rate": 9.35123165582363e-07, "loss": 0.0137, "step": 247890 }, { "epoch": 2.648645760991506, "grad_norm": 4.778249740600586, "learning_rate": 9.351148889510512e-07, "loss": 0.0115, "step": 247900 }, { "epoch": 2.6487526043057854, "grad_norm": 1.2201157808303833, "learning_rate": 9.351066118284618e-07, "loss": 0.0147, "step": 247910 }, { "epoch": 2.6488594476200653, "grad_norm": 0.015249351039528847, "learning_rate": 9.350983342146042e-07, "loss": 0.022, "step": 247920 }, { "epoch": 2.6489662909343448, "grad_norm": 0.230631485581398, "learning_rate": 9.350900561094877e-07, "loss": 0.0204, "step": 247930 }, { "epoch": 2.6490731342486242, "grad_norm": 13.302116394042969, "learning_rate": 9.350817775131217e-07, "loss": 0.0168, "step": 247940 }, { "epoch": 2.649179977562904, "grad_norm": 2.894578456878662, "learning_rate": 9.350734984255155e-07, "loss": 0.0181, "step": 247950 }, { "epoch": 2.6492868208771836, "grad_norm": 2.688429355621338, "learning_rate": 9.350652188466784e-07, "loss": 0.0247, "step": 247960 }, { "epoch": 2.6493936641914635, "grad_norm": 0.02373996004462242, "learning_rate": 9.350569387766201e-07, "loss": 0.0043, "step": 247970 }, { "epoch": 2.649500507505743, "grad_norm": 0.007809671573340893, "learning_rate": 9.350486582153494e-07, "loss": 0.0204, "step": 247980 }, { "epoch": 2.6496073508200224, "grad_norm": 1.5909223556518555, "learning_rate": 9.35040377162876e-07, "loss": 0.0331, "step": 247990 }, { "epoch": 2.649714194134302, "grad_norm": 0.008108924143016338, "learning_rate": 9.350320956192094e-07, "loss": 0.0231, "step": 248000 }, { "epoch": 2.6498210374485818, "grad_norm": 1.7785364389419556, "learning_rate": 9.350238135843584e-07, "loss": 0.0165, "step": 248010 }, { "epoch": 2.6499278807628612, "grad_norm": 0.04558759927749634, "learning_rate": 9.35015531058333e-07, "loss": 0.0331, "step": 248020 }, { "epoch": 2.650034724077141, "grad_norm": 2.9459681510925293, "learning_rate": 9.350072480411421e-07, "loss": 0.0166, "step": 248030 }, { "epoch": 2.6501415673914206, "grad_norm": 0.08811793476343155, "learning_rate": 9.349989645327952e-07, "loss": 0.0214, "step": 248040 }, { "epoch": 2.6502484107057, "grad_norm": 0.003959442023187876, "learning_rate": 9.349906805333016e-07, "loss": 0.0057, "step": 248050 }, { "epoch": 2.6503552540199795, "grad_norm": 0.29497501254081726, "learning_rate": 9.349823960426706e-07, "loss": 0.0112, "step": 248060 }, { "epoch": 2.6504620973342594, "grad_norm": 9.745079040527344, "learning_rate": 9.349741110609118e-07, "loss": 0.0354, "step": 248070 }, { "epoch": 2.650568940648539, "grad_norm": 0.043071627616882324, "learning_rate": 9.349658255880344e-07, "loss": 0.0068, "step": 248080 }, { "epoch": 2.6506757839628188, "grad_norm": 0.23894616961479187, "learning_rate": 9.349575396240477e-07, "loss": 0.0133, "step": 248090 }, { "epoch": 2.6507826272770982, "grad_norm": 0.0062718866392970085, "learning_rate": 9.349492531689611e-07, "loss": 0.0074, "step": 248100 }, { "epoch": 2.6508894705913777, "grad_norm": 2.012295722961426, "learning_rate": 9.349409662227838e-07, "loss": 0.0141, "step": 248110 }, { "epoch": 2.650996313905657, "grad_norm": 0.032669030129909515, "learning_rate": 9.349326787855256e-07, "loss": 0.0076, "step": 248120 }, { "epoch": 2.651103157219937, "grad_norm": 2.476240634918213, "learning_rate": 9.349243908571955e-07, "loss": 0.0241, "step": 248130 }, { "epoch": 2.6512100005342165, "grad_norm": 0.0019759114366024733, "learning_rate": 9.34916102437803e-07, "loss": 0.0149, "step": 248140 }, { "epoch": 2.6513168438484964, "grad_norm": 3.299898386001587, "learning_rate": 9.349078135273571e-07, "loss": 0.0184, "step": 248150 }, { "epoch": 2.651423687162776, "grad_norm": 3.9440395832061768, "learning_rate": 9.348995241258677e-07, "loss": 0.0079, "step": 248160 }, { "epoch": 2.6515305304770553, "grad_norm": 5.231354713439941, "learning_rate": 9.348912342333439e-07, "loss": 0.0171, "step": 248170 }, { "epoch": 2.651637373791335, "grad_norm": 4.426970958709717, "learning_rate": 9.348829438497951e-07, "loss": 0.0141, "step": 248180 }, { "epoch": 2.6517442171056147, "grad_norm": 0.010545840486884117, "learning_rate": 9.348746529752306e-07, "loss": 0.0096, "step": 248190 }, { "epoch": 2.651851060419894, "grad_norm": 0.03400365635752678, "learning_rate": 9.348663616096598e-07, "loss": 0.0057, "step": 248200 }, { "epoch": 2.651957903734174, "grad_norm": 1.638381004333496, "learning_rate": 9.348580697530921e-07, "loss": 0.0086, "step": 248210 }, { "epoch": 2.6520647470484535, "grad_norm": 1.7960678339004517, "learning_rate": 9.348497774055366e-07, "loss": 0.0021, "step": 248220 }, { "epoch": 2.652171590362733, "grad_norm": 9.050923347473145, "learning_rate": 9.348414845670032e-07, "loss": 0.0273, "step": 248230 }, { "epoch": 2.6522784336770124, "grad_norm": 21.84465217590332, "learning_rate": 9.348331912375007e-07, "loss": 0.0377, "step": 248240 }, { "epoch": 2.6523852769912923, "grad_norm": 2.815014600753784, "learning_rate": 9.348248974170387e-07, "loss": 0.0226, "step": 248250 }, { "epoch": 2.652492120305572, "grad_norm": 1.6032283306121826, "learning_rate": 9.348166031056268e-07, "loss": 0.0227, "step": 248260 }, { "epoch": 2.6525989636198517, "grad_norm": 2.5064239501953125, "learning_rate": 9.34808308303274e-07, "loss": 0.0374, "step": 248270 }, { "epoch": 2.652705806934131, "grad_norm": 0.5800508260726929, "learning_rate": 9.348000130099898e-07, "loss": 0.0079, "step": 248280 }, { "epoch": 2.6528126502484106, "grad_norm": 0.4293152987957001, "learning_rate": 9.347917172257836e-07, "loss": 0.0176, "step": 248290 }, { "epoch": 2.65291949356269, "grad_norm": 0.9242621660232544, "learning_rate": 9.347834209506646e-07, "loss": 0.0184, "step": 248300 }, { "epoch": 2.65302633687697, "grad_norm": 2.464254140853882, "learning_rate": 9.347751241846424e-07, "loss": 0.0098, "step": 248310 }, { "epoch": 2.6531331801912494, "grad_norm": 2.1916592121124268, "learning_rate": 9.347668269277265e-07, "loss": 0.0223, "step": 248320 }, { "epoch": 2.6532400235055293, "grad_norm": 0.01862068846821785, "learning_rate": 9.347585291799257e-07, "loss": 0.041, "step": 248330 }, { "epoch": 2.653346866819809, "grad_norm": 8.010272026062012, "learning_rate": 9.347502309412498e-07, "loss": 0.1005, "step": 248340 }, { "epoch": 2.6534537101340883, "grad_norm": 0.5134797096252441, "learning_rate": 9.347419322117081e-07, "loss": 0.0029, "step": 248350 }, { "epoch": 2.6535605534483677, "grad_norm": 2.372370481491089, "learning_rate": 9.3473363299131e-07, "loss": 0.0289, "step": 248360 }, { "epoch": 2.6536673967626476, "grad_norm": 0.1062031015753746, "learning_rate": 9.347253332800649e-07, "loss": 0.025, "step": 248370 }, { "epoch": 2.653774240076927, "grad_norm": 2.286257028579712, "learning_rate": 9.347170330779819e-07, "loss": 0.0231, "step": 248380 }, { "epoch": 2.653881083391207, "grad_norm": 8.427274703979492, "learning_rate": 9.347087323850706e-07, "loss": 0.0098, "step": 248390 }, { "epoch": 2.6539879267054864, "grad_norm": 0.1292635202407837, "learning_rate": 9.347004312013404e-07, "loss": 0.0232, "step": 248400 }, { "epoch": 2.654094770019766, "grad_norm": 0.8718423247337341, "learning_rate": 9.346921295268004e-07, "loss": 0.0138, "step": 248410 }, { "epoch": 2.6542016133340454, "grad_norm": 0.5961568355560303, "learning_rate": 9.346838273614604e-07, "loss": 0.0482, "step": 248420 }, { "epoch": 2.6543084566483253, "grad_norm": 0.0935550183057785, "learning_rate": 9.346755247053295e-07, "loss": 0.0107, "step": 248430 }, { "epoch": 2.6544152999626047, "grad_norm": 3.4546000957489014, "learning_rate": 9.346672215584172e-07, "loss": 0.0286, "step": 248440 }, { "epoch": 2.6545221432768846, "grad_norm": 0.6201836466789246, "learning_rate": 9.346589179207327e-07, "loss": 0.0073, "step": 248450 }, { "epoch": 2.654628986591164, "grad_norm": 2.132192611694336, "learning_rate": 9.346506137922855e-07, "loss": 0.0378, "step": 248460 }, { "epoch": 2.6547358299054435, "grad_norm": 0.006849516183137894, "learning_rate": 9.346423091730849e-07, "loss": 0.0219, "step": 248470 }, { "epoch": 2.654842673219723, "grad_norm": 1.9757943153381348, "learning_rate": 9.346340040631404e-07, "loss": 0.0117, "step": 248480 }, { "epoch": 2.654949516534003, "grad_norm": 6.223348617553711, "learning_rate": 9.346256984624613e-07, "loss": 0.0218, "step": 248490 }, { "epoch": 2.6550563598482824, "grad_norm": 10.46886157989502, "learning_rate": 9.34617392371057e-07, "loss": 0.0261, "step": 248500 }, { "epoch": 2.6551632031625623, "grad_norm": 0.8122024536132812, "learning_rate": 9.346090857889368e-07, "loss": 0.0121, "step": 248510 }, { "epoch": 2.6552700464768417, "grad_norm": 0.06358936429023743, "learning_rate": 9.346007787161101e-07, "loss": 0.0069, "step": 248520 }, { "epoch": 2.655376889791121, "grad_norm": 0.10477663576602936, "learning_rate": 9.345924711525864e-07, "loss": 0.0018, "step": 248530 }, { "epoch": 2.655483733105401, "grad_norm": 0.04716244339942932, "learning_rate": 9.345841630983751e-07, "loss": 0.0103, "step": 248540 }, { "epoch": 2.6555905764196805, "grad_norm": 0.028052587062120438, "learning_rate": 9.345758545534853e-07, "loss": 0.0125, "step": 248550 }, { "epoch": 2.65569741973396, "grad_norm": 5.707311630249023, "learning_rate": 9.345675455179266e-07, "loss": 0.0351, "step": 248560 }, { "epoch": 2.65580426304824, "grad_norm": 1.5349348783493042, "learning_rate": 9.345592359917083e-07, "loss": 0.0245, "step": 248570 }, { "epoch": 2.6559111063625194, "grad_norm": 6.367799282073975, "learning_rate": 9.345509259748399e-07, "loss": 0.0077, "step": 248580 }, { "epoch": 2.656017949676799, "grad_norm": 1.045929193496704, "learning_rate": 9.345426154673309e-07, "loss": 0.01, "step": 248590 }, { "epoch": 2.6561247929910787, "grad_norm": 0.0020507194567471743, "learning_rate": 9.345343044691902e-07, "loss": 0.0222, "step": 248600 }, { "epoch": 2.656231636305358, "grad_norm": 0.023134218528866768, "learning_rate": 9.345259929804277e-07, "loss": 0.0134, "step": 248610 }, { "epoch": 2.6563384796196376, "grad_norm": 0.003201768035069108, "learning_rate": 9.345176810010523e-07, "loss": 0.0046, "step": 248620 }, { "epoch": 2.6564453229339176, "grad_norm": 0.04175017401576042, "learning_rate": 9.345093685310738e-07, "loss": 0.0452, "step": 248630 }, { "epoch": 2.656552166248197, "grad_norm": 0.02420915849506855, "learning_rate": 9.345010555705014e-07, "loss": 0.0378, "step": 248640 }, { "epoch": 2.6566590095624765, "grad_norm": 6.562735557556152, "learning_rate": 9.344927421193444e-07, "loss": 0.0178, "step": 248650 }, { "epoch": 2.6567658528767564, "grad_norm": 0.24001763761043549, "learning_rate": 9.344844281776125e-07, "loss": 0.0336, "step": 248660 }, { "epoch": 2.656872696191036, "grad_norm": 0.5956305861473083, "learning_rate": 9.344761137453148e-07, "loss": 0.0073, "step": 248670 }, { "epoch": 2.6569795395053153, "grad_norm": 0.1314712017774582, "learning_rate": 9.344677988224608e-07, "loss": 0.0349, "step": 248680 }, { "epoch": 2.657086382819595, "grad_norm": 0.48654091358184814, "learning_rate": 9.344594834090598e-07, "loss": 0.0039, "step": 248690 }, { "epoch": 2.6571932261338747, "grad_norm": 2.726417064666748, "learning_rate": 9.344511675051214e-07, "loss": 0.0066, "step": 248700 }, { "epoch": 2.657300069448154, "grad_norm": 0.6390000581741333, "learning_rate": 9.344428511106548e-07, "loss": 0.0356, "step": 248710 }, { "epoch": 2.657406912762434, "grad_norm": 1.3287346363067627, "learning_rate": 9.344345342256693e-07, "loss": 0.0066, "step": 248720 }, { "epoch": 2.6575137560767135, "grad_norm": 2.2807562351226807, "learning_rate": 9.344262168501744e-07, "loss": 0.0564, "step": 248730 }, { "epoch": 2.6576205993909934, "grad_norm": 0.0066589172929525375, "learning_rate": 9.344178989841798e-07, "loss": 0.0056, "step": 248740 }, { "epoch": 2.657727442705273, "grad_norm": 3.035681962966919, "learning_rate": 9.344095806276943e-07, "loss": 0.0071, "step": 248750 }, { "epoch": 2.6578342860195523, "grad_norm": 0.015009100548923016, "learning_rate": 9.344012617807278e-07, "loss": 0.0266, "step": 248760 }, { "epoch": 2.6579411293338318, "grad_norm": 1.1373610496520996, "learning_rate": 9.343929424432893e-07, "loss": 0.019, "step": 248770 }, { "epoch": 2.6580479726481117, "grad_norm": 4.332866191864014, "learning_rate": 9.343846226153885e-07, "loss": 0.0193, "step": 248780 }, { "epoch": 2.658154815962391, "grad_norm": 2.357758045196533, "learning_rate": 9.343763022970347e-07, "loss": 0.0251, "step": 248790 }, { "epoch": 2.658261659276671, "grad_norm": 3.9250857830047607, "learning_rate": 9.343679814882372e-07, "loss": 0.02, "step": 248800 }, { "epoch": 2.6583685025909505, "grad_norm": 0.10178295522928238, "learning_rate": 9.343596601890055e-07, "loss": 0.0108, "step": 248810 }, { "epoch": 2.65847534590523, "grad_norm": 0.014607559889554977, "learning_rate": 9.34351338399349e-07, "loss": 0.0162, "step": 248820 }, { "epoch": 2.6585821892195094, "grad_norm": 0.0084752906113863, "learning_rate": 9.34343016119277e-07, "loss": 0.0426, "step": 248830 }, { "epoch": 2.6586890325337893, "grad_norm": 1.768457293510437, "learning_rate": 9.34334693348799e-07, "loss": 0.0226, "step": 248840 }, { "epoch": 2.6587958758480688, "grad_norm": 0.027700524777173996, "learning_rate": 9.343263700879243e-07, "loss": 0.0038, "step": 248850 }, { "epoch": 2.6589027191623487, "grad_norm": 0.0548275001347065, "learning_rate": 9.343180463366625e-07, "loss": 0.0183, "step": 248860 }, { "epoch": 2.659009562476628, "grad_norm": 5.6124982833862305, "learning_rate": 9.343097220950227e-07, "loss": 0.0194, "step": 248870 }, { "epoch": 2.6591164057909076, "grad_norm": 1.7227554321289062, "learning_rate": 9.343013973630143e-07, "loss": 0.0134, "step": 248880 }, { "epoch": 2.659223249105187, "grad_norm": 9.057748794555664, "learning_rate": 9.34293072140647e-07, "loss": 0.0408, "step": 248890 }, { "epoch": 2.659330092419467, "grad_norm": 2.3187196254730225, "learning_rate": 9.342847464279301e-07, "loss": 0.0291, "step": 248900 }, { "epoch": 2.6594369357337464, "grad_norm": 0.11424830555915833, "learning_rate": 9.34276420224873e-07, "loss": 0.0023, "step": 248910 }, { "epoch": 2.6595437790480263, "grad_norm": 14.941239356994629, "learning_rate": 9.342680935314849e-07, "loss": 0.0391, "step": 248920 }, { "epoch": 2.6596506223623058, "grad_norm": 5.093940734863281, "learning_rate": 9.342597663477754e-07, "loss": 0.0405, "step": 248930 }, { "epoch": 2.659757465676585, "grad_norm": 2.610433340072632, "learning_rate": 9.342514386737537e-07, "loss": 0.0532, "step": 248940 }, { "epoch": 2.6598643089908647, "grad_norm": 5.835207462310791, "learning_rate": 9.342431105094295e-07, "loss": 0.005, "step": 248950 }, { "epoch": 2.6599711523051446, "grad_norm": 0.0068199001252651215, "learning_rate": 9.34234781854812e-07, "loss": 0.0145, "step": 248960 }, { "epoch": 2.660077995619424, "grad_norm": 0.0062223998829722404, "learning_rate": 9.342264527099108e-07, "loss": 0.0036, "step": 248970 }, { "epoch": 2.660184838933704, "grad_norm": 11.165605545043945, "learning_rate": 9.342181230747351e-07, "loss": 0.0101, "step": 248980 }, { "epoch": 2.6602916822479834, "grad_norm": 1.098618745803833, "learning_rate": 9.342097929492942e-07, "loss": 0.0193, "step": 248990 }, { "epoch": 2.660398525562263, "grad_norm": 2.3265533447265625, "learning_rate": 9.342014623335979e-07, "loss": 0.0288, "step": 249000 }, { "epoch": 2.6605053688765423, "grad_norm": 0.2825097143650055, "learning_rate": 9.341931312276552e-07, "loss": 0.0433, "step": 249010 }, { "epoch": 2.6606122121908222, "grad_norm": 6.188366889953613, "learning_rate": 9.341847996314758e-07, "loss": 0.0077, "step": 249020 }, { "epoch": 2.6607190555051017, "grad_norm": 0.0990578904747963, "learning_rate": 9.341764675450689e-07, "loss": 0.0082, "step": 249030 }, { "epoch": 2.6608258988193816, "grad_norm": 0.0014386574039235711, "learning_rate": 9.341681349684441e-07, "loss": 0.0257, "step": 249040 }, { "epoch": 2.660932742133661, "grad_norm": 0.10828103125095367, "learning_rate": 9.341598019016107e-07, "loss": 0.022, "step": 249050 }, { "epoch": 2.6610395854479405, "grad_norm": 0.23332929611206055, "learning_rate": 9.341514683445781e-07, "loss": 0.0752, "step": 249060 }, { "epoch": 2.66114642876222, "grad_norm": 6.129086971282959, "learning_rate": 9.341431342973557e-07, "loss": 0.0492, "step": 249070 }, { "epoch": 2.6612532720765, "grad_norm": 3.0774266719818115, "learning_rate": 9.341347997599529e-07, "loss": 0.0666, "step": 249080 }, { "epoch": 2.6613601153907793, "grad_norm": 0.11748180538415909, "learning_rate": 9.341264647323793e-07, "loss": 0.0639, "step": 249090 }, { "epoch": 2.6614669587050592, "grad_norm": 0.1041119173169136, "learning_rate": 9.34118129214644e-07, "loss": 0.0136, "step": 249100 }, { "epoch": 2.6615738020193387, "grad_norm": 0.29363155364990234, "learning_rate": 9.341097932067565e-07, "loss": 0.0124, "step": 249110 }, { "epoch": 2.661680645333618, "grad_norm": 0.9637498259544373, "learning_rate": 9.341014567087264e-07, "loss": 0.0196, "step": 249120 }, { "epoch": 2.6617874886478976, "grad_norm": 11.273065567016602, "learning_rate": 9.34093119720563e-07, "loss": 0.0243, "step": 249130 }, { "epoch": 2.6618943319621775, "grad_norm": 6.105127811431885, "learning_rate": 9.340847822422757e-07, "loss": 0.0073, "step": 249140 }, { "epoch": 2.662001175276457, "grad_norm": 1.0372612476348877, "learning_rate": 9.34076444273874e-07, "loss": 0.0271, "step": 249150 }, { "epoch": 2.662108018590737, "grad_norm": 0.39422065019607544, "learning_rate": 9.340681058153669e-07, "loss": 0.0126, "step": 249160 }, { "epoch": 2.6622148619050163, "grad_norm": 2.234006881713867, "learning_rate": 9.340597668667645e-07, "loss": 0.045, "step": 249170 }, { "epoch": 2.662321705219296, "grad_norm": 0.3579615652561188, "learning_rate": 9.340514274280757e-07, "loss": 0.0177, "step": 249180 }, { "epoch": 2.6624285485335752, "grad_norm": 2.1538233757019043, "learning_rate": 9.340430874993101e-07, "loss": 0.009, "step": 249190 }, { "epoch": 2.662535391847855, "grad_norm": 0.08145066350698471, "learning_rate": 9.340347470804773e-07, "loss": 0.0023, "step": 249200 }, { "epoch": 2.6626422351621346, "grad_norm": 1.6940011978149414, "learning_rate": 9.340264061715864e-07, "loss": 0.0387, "step": 249210 }, { "epoch": 2.6627490784764145, "grad_norm": 0.09155435860157013, "learning_rate": 9.340180647726468e-07, "loss": 0.0213, "step": 249220 }, { "epoch": 2.662855921790694, "grad_norm": 2.492326498031616, "learning_rate": 9.340097228836681e-07, "loss": 0.0104, "step": 249230 }, { "epoch": 2.6629627651049734, "grad_norm": 3.125274658203125, "learning_rate": 9.340013805046596e-07, "loss": 0.068, "step": 249240 }, { "epoch": 2.6630696084192533, "grad_norm": 4.84928035736084, "learning_rate": 9.339930376356309e-07, "loss": 0.0058, "step": 249250 }, { "epoch": 2.663176451733533, "grad_norm": 0.6574319005012512, "learning_rate": 9.339846942765912e-07, "loss": 0.0178, "step": 249260 }, { "epoch": 2.6632832950478122, "grad_norm": 1.8865265846252441, "learning_rate": 9.339763504275502e-07, "loss": 0.0137, "step": 249270 }, { "epoch": 2.663390138362092, "grad_norm": 0.023909917101264, "learning_rate": 9.339680060885171e-07, "loss": 0.0571, "step": 249280 }, { "epoch": 2.6634969816763716, "grad_norm": 0.026739761233329773, "learning_rate": 9.339596612595013e-07, "loss": 0.0172, "step": 249290 }, { "epoch": 2.663603824990651, "grad_norm": 3.7117130756378174, "learning_rate": 9.339513159405122e-07, "loss": 0.0504, "step": 249300 }, { "epoch": 2.663710668304931, "grad_norm": 0.32993409037590027, "learning_rate": 9.339429701315596e-07, "loss": 0.0382, "step": 249310 }, { "epoch": 2.6638175116192104, "grad_norm": 2.706939220428467, "learning_rate": 9.339346238326523e-07, "loss": 0.0105, "step": 249320 }, { "epoch": 2.66392435493349, "grad_norm": 0.24725845456123352, "learning_rate": 9.339262770438002e-07, "loss": 0.0227, "step": 249330 }, { "epoch": 2.66403119824777, "grad_norm": 1.9651511907577515, "learning_rate": 9.339179297650126e-07, "loss": 0.0087, "step": 249340 }, { "epoch": 2.6641380415620493, "grad_norm": 0.08423938602209091, "learning_rate": 9.33909581996299e-07, "loss": 0.0036, "step": 249350 }, { "epoch": 2.6642448848763287, "grad_norm": 0.16443568468093872, "learning_rate": 9.339012337376686e-07, "loss": 0.0064, "step": 249360 }, { "epoch": 2.6643517281906086, "grad_norm": 0.07966087013483047, "learning_rate": 9.33892884989131e-07, "loss": 0.0379, "step": 249370 }, { "epoch": 2.664458571504888, "grad_norm": 0.02544892206788063, "learning_rate": 9.338845357506955e-07, "loss": 0.0102, "step": 249380 }, { "epoch": 2.6645654148191675, "grad_norm": 0.0017498820088803768, "learning_rate": 9.338761860223717e-07, "loss": 0.017, "step": 249390 }, { "epoch": 2.6646722581334474, "grad_norm": 1.2162697315216064, "learning_rate": 9.338678358041689e-07, "loss": 0.0643, "step": 249400 }, { "epoch": 2.664779101447727, "grad_norm": 3.033092975616455, "learning_rate": 9.338594850960966e-07, "loss": 0.0151, "step": 249410 }, { "epoch": 2.6648859447620064, "grad_norm": 7.581259250640869, "learning_rate": 9.338511338981642e-07, "loss": 0.0464, "step": 249420 }, { "epoch": 2.6649927880762863, "grad_norm": 4.029736042022705, "learning_rate": 9.338427822103811e-07, "loss": 0.0248, "step": 249430 }, { "epoch": 2.6650996313905657, "grad_norm": 2.216933012008667, "learning_rate": 9.338344300327569e-07, "loss": 0.056, "step": 249440 }, { "epoch": 2.6652064747048456, "grad_norm": 0.09707985818386078, "learning_rate": 9.338260773653007e-07, "loss": 0.0081, "step": 249450 }, { "epoch": 2.665313318019125, "grad_norm": 0.07443436980247498, "learning_rate": 9.338177242080222e-07, "loss": 0.0108, "step": 249460 }, { "epoch": 2.6654201613334045, "grad_norm": 0.0558951199054718, "learning_rate": 9.338093705609307e-07, "loss": 0.0038, "step": 249470 }, { "epoch": 2.665527004647684, "grad_norm": 0.057188984006643295, "learning_rate": 9.338010164240357e-07, "loss": 0.008, "step": 249480 }, { "epoch": 2.665633847961964, "grad_norm": 0.544728696346283, "learning_rate": 9.337926617973466e-07, "loss": 0.1101, "step": 249490 }, { "epoch": 2.6657406912762434, "grad_norm": 0.3092341721057892, "learning_rate": 9.337843066808728e-07, "loss": 0.0215, "step": 249500 }, { "epoch": 2.6658475345905233, "grad_norm": 0.029557785019278526, "learning_rate": 9.337759510746239e-07, "loss": 0.0212, "step": 249510 }, { "epoch": 2.6659543779048027, "grad_norm": 0.02010948956012726, "learning_rate": 9.337675949786093e-07, "loss": 0.0199, "step": 249520 }, { "epoch": 2.666061221219082, "grad_norm": 8.702815055847168, "learning_rate": 9.337592383928381e-07, "loss": 0.0189, "step": 249530 }, { "epoch": 2.6661680645333616, "grad_norm": 0.017861254513263702, "learning_rate": 9.3375088131732e-07, "loss": 0.0266, "step": 249540 }, { "epoch": 2.6662749078476415, "grad_norm": 0.09948264807462692, "learning_rate": 9.337425237520646e-07, "loss": 0.0076, "step": 249550 }, { "epoch": 2.666381751161921, "grad_norm": 0.14302457869052887, "learning_rate": 9.33734165697081e-07, "loss": 0.0114, "step": 249560 }, { "epoch": 2.666488594476201, "grad_norm": 0.8918399810791016, "learning_rate": 9.33725807152379e-07, "loss": 0.096, "step": 249570 }, { "epoch": 2.6665954377904804, "grad_norm": 3.91259503364563, "learning_rate": 9.337174481179676e-07, "loss": 0.0142, "step": 249580 }, { "epoch": 2.66670228110476, "grad_norm": 10.701458930969238, "learning_rate": 9.337090885938566e-07, "loss": 0.0381, "step": 249590 }, { "epoch": 2.6668091244190393, "grad_norm": 0.14054882526397705, "learning_rate": 9.337007285800551e-07, "loss": 0.0203, "step": 249600 }, { "epoch": 2.666915967733319, "grad_norm": 1.7213070392608643, "learning_rate": 9.33692368076573e-07, "loss": 0.0174, "step": 249610 }, { "epoch": 2.6670228110475986, "grad_norm": 0.01352574024349451, "learning_rate": 9.336840070834194e-07, "loss": 0.037, "step": 249620 }, { "epoch": 2.6671296543618785, "grad_norm": 0.04255128651857376, "learning_rate": 9.336756456006038e-07, "loss": 0.0237, "step": 249630 }, { "epoch": 2.667236497676158, "grad_norm": 0.038754116743803024, "learning_rate": 9.336672836281357e-07, "loss": 0.0251, "step": 249640 }, { "epoch": 2.6673433409904375, "grad_norm": 4.16121244430542, "learning_rate": 9.336589211660244e-07, "loss": 0.0169, "step": 249650 }, { "epoch": 2.667450184304717, "grad_norm": 0.9288153648376465, "learning_rate": 9.336505582142795e-07, "loss": 0.0869, "step": 249660 }, { "epoch": 2.667557027618997, "grad_norm": 0.06159320846199989, "learning_rate": 9.336421947729104e-07, "loss": 0.0557, "step": 249670 }, { "epoch": 2.6676638709332763, "grad_norm": 1.0578819513320923, "learning_rate": 9.336338308419264e-07, "loss": 0.0289, "step": 249680 }, { "epoch": 2.667770714247556, "grad_norm": 7.107741832733154, "learning_rate": 9.336254664213375e-07, "loss": 0.0447, "step": 249690 }, { "epoch": 2.6678775575618356, "grad_norm": 3.8294219970703125, "learning_rate": 9.336171015111522e-07, "loss": 0.0191, "step": 249700 }, { "epoch": 2.667984400876115, "grad_norm": 4.379251480102539, "learning_rate": 9.336087361113809e-07, "loss": 0.0289, "step": 249710 }, { "epoch": 2.6680912441903946, "grad_norm": 0.01483079418540001, "learning_rate": 9.336003702220323e-07, "loss": 0.0508, "step": 249720 }, { "epoch": 2.6681980875046745, "grad_norm": 3.0260915756225586, "learning_rate": 9.335920038431163e-07, "loss": 0.0144, "step": 249730 }, { "epoch": 2.668304930818954, "grad_norm": 2.3039510250091553, "learning_rate": 9.335836369746423e-07, "loss": 0.011, "step": 249740 }, { "epoch": 2.668411774133234, "grad_norm": 1.327857494354248, "learning_rate": 9.335752696166195e-07, "loss": 0.007, "step": 249750 }, { "epoch": 2.6685186174475133, "grad_norm": 0.02881241776049137, "learning_rate": 9.335669017690575e-07, "loss": 0.0022, "step": 249760 }, { "epoch": 2.6686254607617927, "grad_norm": 0.44265058636665344, "learning_rate": 9.335585334319658e-07, "loss": 0.0122, "step": 249770 }, { "epoch": 2.668732304076072, "grad_norm": 0.11352971941232681, "learning_rate": 9.335501646053537e-07, "loss": 0.064, "step": 249780 }, { "epoch": 2.668839147390352, "grad_norm": 11.828596115112305, "learning_rate": 9.335417952892309e-07, "loss": 0.049, "step": 249790 }, { "epoch": 2.6689459907046316, "grad_norm": 1.7159866094589233, "learning_rate": 9.335334254836064e-07, "loss": 0.0171, "step": 249800 }, { "epoch": 2.6690528340189115, "grad_norm": 5.679811954498291, "learning_rate": 9.335250551884903e-07, "loss": 0.0064, "step": 249810 }, { "epoch": 2.669159677333191, "grad_norm": 0.004567219875752926, "learning_rate": 9.335166844038916e-07, "loss": 0.002, "step": 249820 }, { "epoch": 2.6692665206474704, "grad_norm": 0.39254164695739746, "learning_rate": 9.335083131298197e-07, "loss": 0.02, "step": 249830 }, { "epoch": 2.66937336396175, "grad_norm": 0.008527782745659351, "learning_rate": 9.334999413662843e-07, "loss": 0.0063, "step": 249840 }, { "epoch": 2.6694802072760297, "grad_norm": 9.146171569824219, "learning_rate": 9.334915691132947e-07, "loss": 0.0192, "step": 249850 }, { "epoch": 2.669587050590309, "grad_norm": 0.11875934898853302, "learning_rate": 9.334831963708605e-07, "loss": 0.0076, "step": 249860 }, { "epoch": 2.669693893904589, "grad_norm": 0.008528086356818676, "learning_rate": 9.334748231389909e-07, "loss": 0.0222, "step": 249870 }, { "epoch": 2.6698007372188686, "grad_norm": 0.003283130470663309, "learning_rate": 9.334664494176955e-07, "loss": 0.019, "step": 249880 }, { "epoch": 2.669907580533148, "grad_norm": 0.0868050679564476, "learning_rate": 9.33458075206984e-07, "loss": 0.0141, "step": 249890 }, { "epoch": 2.6700144238474275, "grad_norm": 3.492994785308838, "learning_rate": 9.334497005068655e-07, "loss": 0.0115, "step": 249900 }, { "epoch": 2.6701212671617074, "grad_norm": 11.219536781311035, "learning_rate": 9.334413253173495e-07, "loss": 0.0166, "step": 249910 }, { "epoch": 2.670228110475987, "grad_norm": 3.28271222114563, "learning_rate": 9.334329496384457e-07, "loss": 0.0544, "step": 249920 }, { "epoch": 2.6703349537902668, "grad_norm": 6.167888641357422, "learning_rate": 9.334245734701631e-07, "loss": 0.0105, "step": 249930 }, { "epoch": 2.670441797104546, "grad_norm": 0.003440273692831397, "learning_rate": 9.334161968125116e-07, "loss": 0.0032, "step": 249940 }, { "epoch": 2.6705486404188257, "grad_norm": 0.0946715846657753, "learning_rate": 9.334078196655007e-07, "loss": 0.0085, "step": 249950 }, { "epoch": 2.670655483733105, "grad_norm": 0.43203872442245483, "learning_rate": 9.333994420291393e-07, "loss": 0.0193, "step": 249960 }, { "epoch": 2.670762327047385, "grad_norm": 4.814957618713379, "learning_rate": 9.333910639034375e-07, "loss": 0.0133, "step": 249970 }, { "epoch": 2.6708691703616645, "grad_norm": 5.002927303314209, "learning_rate": 9.333826852884044e-07, "loss": 0.0109, "step": 249980 }, { "epoch": 2.6709760136759444, "grad_norm": 5.803261756896973, "learning_rate": 9.333743061840494e-07, "loss": 0.053, "step": 249990 }, { "epoch": 2.671082856990224, "grad_norm": 0.9808123707771301, "learning_rate": 9.333659265903822e-07, "loss": 0.0161, "step": 250000 }, { "epoch": 2.6711897003045033, "grad_norm": 0.002289771568030119, "learning_rate": 9.333575465074122e-07, "loss": 0.0126, "step": 250010 }, { "epoch": 2.671296543618783, "grad_norm": 0.7058002948760986, "learning_rate": 9.333491659351488e-07, "loss": 0.0015, "step": 250020 }, { "epoch": 2.6714033869330627, "grad_norm": 3.963514566421509, "learning_rate": 9.333407848736015e-07, "loss": 0.0168, "step": 250030 }, { "epoch": 2.671510230247342, "grad_norm": 0.4908968210220337, "learning_rate": 9.333324033227797e-07, "loss": 0.0078, "step": 250040 }, { "epoch": 2.671617073561622, "grad_norm": 0.018218865618109703, "learning_rate": 9.333240212826929e-07, "loss": 0.0013, "step": 250050 }, { "epoch": 2.6717239168759015, "grad_norm": 0.13316139578819275, "learning_rate": 9.333156387533507e-07, "loss": 0.0127, "step": 250060 }, { "epoch": 2.671830760190181, "grad_norm": 1.3985410928726196, "learning_rate": 9.333072557347623e-07, "loss": 0.0183, "step": 250070 }, { "epoch": 2.671937603504461, "grad_norm": 0.03231707960367203, "learning_rate": 9.332988722269374e-07, "loss": 0.0081, "step": 250080 }, { "epoch": 2.6720444468187403, "grad_norm": 0.27558284997940063, "learning_rate": 9.332904882298853e-07, "loss": 0.0399, "step": 250090 }, { "epoch": 2.6721512901330198, "grad_norm": 1.1671912670135498, "learning_rate": 9.332821037436156e-07, "loss": 0.0041, "step": 250100 }, { "epoch": 2.6722581334472997, "grad_norm": 5.158736228942871, "learning_rate": 9.332737187681376e-07, "loss": 0.0112, "step": 250110 }, { "epoch": 2.672364976761579, "grad_norm": 6.138617038726807, "learning_rate": 9.33265333303461e-07, "loss": 0.0134, "step": 250120 }, { "epoch": 2.6724718200758586, "grad_norm": 1.0613555908203125, "learning_rate": 9.332569473495951e-07, "loss": 0.0269, "step": 250130 }, { "epoch": 2.6725786633901385, "grad_norm": 0.34350091218948364, "learning_rate": 9.332485609065495e-07, "loss": 0.0272, "step": 250140 }, { "epoch": 2.672685506704418, "grad_norm": 0.018722988665103912, "learning_rate": 9.332401739743334e-07, "loss": 0.0156, "step": 250150 }, { "epoch": 2.6727923500186974, "grad_norm": 6.111017227172852, "learning_rate": 9.332317865529565e-07, "loss": 0.0174, "step": 250160 }, { "epoch": 2.6728991933329773, "grad_norm": 8.081673622131348, "learning_rate": 9.332233986424282e-07, "loss": 0.0104, "step": 250170 }, { "epoch": 2.6730060366472568, "grad_norm": 1.3764876127243042, "learning_rate": 9.332150102427581e-07, "loss": 0.1211, "step": 250180 }, { "epoch": 2.6731128799615362, "grad_norm": 1.5191643238067627, "learning_rate": 9.332066213539554e-07, "loss": 0.0089, "step": 250190 }, { "epoch": 2.673219723275816, "grad_norm": 1.5962269306182861, "learning_rate": 9.331982319760298e-07, "loss": 0.01, "step": 250200 }, { "epoch": 2.6733265665900956, "grad_norm": 7.6627044677734375, "learning_rate": 9.331898421089908e-07, "loss": 0.0345, "step": 250210 }, { "epoch": 2.6734334099043755, "grad_norm": 0.011395803652703762, "learning_rate": 9.331814517528476e-07, "loss": 0.0141, "step": 250220 }, { "epoch": 2.673540253218655, "grad_norm": 0.0022558895871043205, "learning_rate": 9.331730609076098e-07, "loss": 0.0136, "step": 250230 }, { "epoch": 2.6736470965329344, "grad_norm": 2.9529080390930176, "learning_rate": 9.331646695732871e-07, "loss": 0.0165, "step": 250240 }, { "epoch": 2.673753939847214, "grad_norm": 3.7957918643951416, "learning_rate": 9.331562777498887e-07, "loss": 0.0157, "step": 250250 }, { "epoch": 2.673860783161494, "grad_norm": 0.009974533692002296, "learning_rate": 9.331478854374243e-07, "loss": 0.0074, "step": 250260 }, { "epoch": 2.6739676264757732, "grad_norm": 1.9446768760681152, "learning_rate": 9.331394926359031e-07, "loss": 0.0211, "step": 250270 }, { "epoch": 2.674074469790053, "grad_norm": 2.9600155353546143, "learning_rate": 9.331310993453348e-07, "loss": 0.0132, "step": 250280 }, { "epoch": 2.6741813131043326, "grad_norm": 9.173775672912598, "learning_rate": 9.331227055657286e-07, "loss": 0.032, "step": 250290 }, { "epoch": 2.674288156418612, "grad_norm": 0.01111186109483242, "learning_rate": 9.331143112970945e-07, "loss": 0.0145, "step": 250300 }, { "epoch": 2.6743949997328915, "grad_norm": 2.3317368030548096, "learning_rate": 9.331059165394415e-07, "loss": 0.0029, "step": 250310 }, { "epoch": 2.6745018430471714, "grad_norm": 0.0024844491854310036, "learning_rate": 9.330975212927793e-07, "loss": 0.0238, "step": 250320 }, { "epoch": 2.674608686361451, "grad_norm": 0.45327988266944885, "learning_rate": 9.330891255571171e-07, "loss": 0.0262, "step": 250330 }, { "epoch": 2.674715529675731, "grad_norm": 0.005238200072199106, "learning_rate": 9.33080729332465e-07, "loss": 0.02, "step": 250340 }, { "epoch": 2.6748223729900102, "grad_norm": 7.107531547546387, "learning_rate": 9.330723326188317e-07, "loss": 0.0111, "step": 250350 }, { "epoch": 2.6749292163042897, "grad_norm": 2.1550097465515137, "learning_rate": 9.330639354162271e-07, "loss": 0.0198, "step": 250360 }, { "epoch": 2.675036059618569, "grad_norm": 6.278749942779541, "learning_rate": 9.330555377246607e-07, "loss": 0.0248, "step": 250370 }, { "epoch": 2.675142902932849, "grad_norm": 4.885226726531982, "learning_rate": 9.330471395441419e-07, "loss": 0.0136, "step": 250380 }, { "epoch": 2.6752497462471285, "grad_norm": 0.1580263078212738, "learning_rate": 9.330387408746802e-07, "loss": 0.0231, "step": 250390 }, { "epoch": 2.6753565895614084, "grad_norm": 0.004890522453933954, "learning_rate": 9.330303417162851e-07, "loss": 0.017, "step": 250400 }, { "epoch": 2.675463432875688, "grad_norm": 7.161579132080078, "learning_rate": 9.330219420689661e-07, "loss": 0.0356, "step": 250410 }, { "epoch": 2.6755702761899673, "grad_norm": 3.930164098739624, "learning_rate": 9.330135419327326e-07, "loss": 0.0748, "step": 250420 }, { "epoch": 2.675677119504247, "grad_norm": 0.015272132121026516, "learning_rate": 9.330051413075941e-07, "loss": 0.0161, "step": 250430 }, { "epoch": 2.6757839628185267, "grad_norm": 0.26809611916542053, "learning_rate": 9.329967401935601e-07, "loss": 0.0147, "step": 250440 }, { "epoch": 2.675890806132806, "grad_norm": 0.9170752763748169, "learning_rate": 9.3298833859064e-07, "loss": 0.0144, "step": 250450 }, { "epoch": 2.675997649447086, "grad_norm": 3.8191189765930176, "learning_rate": 9.329799364988436e-07, "loss": 0.0344, "step": 250460 }, { "epoch": 2.6761044927613655, "grad_norm": 15.45034122467041, "learning_rate": 9.329715339181802e-07, "loss": 0.0197, "step": 250470 }, { "epoch": 2.676211336075645, "grad_norm": 0.010424109175801277, "learning_rate": 9.329631308486592e-07, "loss": 0.026, "step": 250480 }, { "epoch": 2.6763181793899244, "grad_norm": 0.3260170519351959, "learning_rate": 9.3295472729029e-07, "loss": 0.0112, "step": 250490 }, { "epoch": 2.6764250227042043, "grad_norm": 0.025754433125257492, "learning_rate": 9.329463232430824e-07, "loss": 0.0146, "step": 250500 }, { "epoch": 2.676531866018484, "grad_norm": 0.006560744717717171, "learning_rate": 9.329379187070456e-07, "loss": 0.0112, "step": 250510 }, { "epoch": 2.6766387093327637, "grad_norm": 6.2168965339660645, "learning_rate": 9.329295136821893e-07, "loss": 0.0265, "step": 250520 }, { "epoch": 2.676745552647043, "grad_norm": 0.3592265546321869, "learning_rate": 9.329211081685228e-07, "loss": 0.0046, "step": 250530 }, { "epoch": 2.6768523959613226, "grad_norm": 1.9421738386154175, "learning_rate": 9.329127021660558e-07, "loss": 0.019, "step": 250540 }, { "epoch": 2.676959239275602, "grad_norm": 0.012928209267556667, "learning_rate": 9.329042956747978e-07, "loss": 0.0241, "step": 250550 }, { "epoch": 2.677066082589882, "grad_norm": 3.997107744216919, "learning_rate": 9.32895888694758e-07, "loss": 0.038, "step": 250560 }, { "epoch": 2.6771729259041614, "grad_norm": 2.4691014289855957, "learning_rate": 9.328874812259461e-07, "loss": 0.0473, "step": 250570 }, { "epoch": 2.6772797692184414, "grad_norm": 1.568617582321167, "learning_rate": 9.328790732683716e-07, "loss": 0.05, "step": 250580 }, { "epoch": 2.677386612532721, "grad_norm": 0.010768377222120762, "learning_rate": 9.328706648220439e-07, "loss": 0.0078, "step": 250590 }, { "epoch": 2.6774934558470003, "grad_norm": 0.014610244892537594, "learning_rate": 9.328622558869725e-07, "loss": 0.0128, "step": 250600 }, { "epoch": 2.6776002991612797, "grad_norm": 1.2427921295166016, "learning_rate": 9.328538464631671e-07, "loss": 0.0393, "step": 250610 }, { "epoch": 2.6777071424755596, "grad_norm": 5.795893669128418, "learning_rate": 9.32845436550637e-07, "loss": 0.0037, "step": 250620 }, { "epoch": 2.677813985789839, "grad_norm": 1.940445899963379, "learning_rate": 9.328370261493917e-07, "loss": 0.0195, "step": 250630 }, { "epoch": 2.677920829104119, "grad_norm": 0.13240382075309753, "learning_rate": 9.328286152594407e-07, "loss": 0.0189, "step": 250640 }, { "epoch": 2.6780276724183985, "grad_norm": 1.541028618812561, "learning_rate": 9.328202038807936e-07, "loss": 0.0625, "step": 250650 }, { "epoch": 2.678134515732678, "grad_norm": 0.17301161587238312, "learning_rate": 9.328117920134598e-07, "loss": 0.0078, "step": 250660 }, { "epoch": 2.6782413590469574, "grad_norm": 0.10464197397232056, "learning_rate": 9.328033796574487e-07, "loss": 0.0381, "step": 250670 }, { "epoch": 2.6783482023612373, "grad_norm": 4.19761848449707, "learning_rate": 9.327949668127701e-07, "loss": 0.0249, "step": 250680 }, { "epoch": 2.6784550456755167, "grad_norm": 0.004514679778367281, "learning_rate": 9.327865534794333e-07, "loss": 0.0265, "step": 250690 }, { "epoch": 2.6785618889897966, "grad_norm": 0.03293513134121895, "learning_rate": 9.327781396574477e-07, "loss": 0.0331, "step": 250700 }, { "epoch": 2.678668732304076, "grad_norm": 1.6234090328216553, "learning_rate": 9.32769725346823e-07, "loss": 0.0026, "step": 250710 }, { "epoch": 2.6787755756183556, "grad_norm": 3.030942440032959, "learning_rate": 9.327613105475685e-07, "loss": 0.0133, "step": 250720 }, { "epoch": 2.6788824189326355, "grad_norm": 0.12560586631298065, "learning_rate": 9.32752895259694e-07, "loss": 0.0077, "step": 250730 }, { "epoch": 2.678989262246915, "grad_norm": 3.361567974090576, "learning_rate": 9.327444794832087e-07, "loss": 0.0067, "step": 250740 }, { "epoch": 2.6790961055611944, "grad_norm": 3.688239336013794, "learning_rate": 9.327360632181221e-07, "loss": 0.0165, "step": 250750 }, { "epoch": 2.6792029488754743, "grad_norm": 0.8572626709938049, "learning_rate": 9.32727646464444e-07, "loss": 0.0589, "step": 250760 }, { "epoch": 2.6793097921897537, "grad_norm": 0.02078227512538433, "learning_rate": 9.327192292221837e-07, "loss": 0.015, "step": 250770 }, { "epoch": 2.679416635504033, "grad_norm": 2.807527780532837, "learning_rate": 9.327108114913507e-07, "loss": 0.02, "step": 250780 }, { "epoch": 2.679523478818313, "grad_norm": 0.04649348929524422, "learning_rate": 9.327023932719546e-07, "loss": 0.0166, "step": 250790 }, { "epoch": 2.6796303221325926, "grad_norm": 3.672675848007202, "learning_rate": 9.326939745640048e-07, "loss": 0.0152, "step": 250800 }, { "epoch": 2.679737165446872, "grad_norm": 0.008840524591505527, "learning_rate": 9.326855553675109e-07, "loss": 0.005, "step": 250810 }, { "epoch": 2.679844008761152, "grad_norm": 3.040595293045044, "learning_rate": 9.326771356824823e-07, "loss": 0.0086, "step": 250820 }, { "epoch": 2.6799508520754314, "grad_norm": 0.07039975374937057, "learning_rate": 9.326687155089285e-07, "loss": 0.0182, "step": 250830 }, { "epoch": 2.680057695389711, "grad_norm": 1.7930742502212524, "learning_rate": 9.326602948468591e-07, "loss": 0.0134, "step": 250840 }, { "epoch": 2.6801645387039907, "grad_norm": 12.11963939666748, "learning_rate": 9.326518736962836e-07, "loss": 0.0482, "step": 250850 }, { "epoch": 2.68027138201827, "grad_norm": 1.3348795175552368, "learning_rate": 9.326434520572115e-07, "loss": 0.0459, "step": 250860 }, { "epoch": 2.6803782253325497, "grad_norm": 0.34588485956192017, "learning_rate": 9.326350299296522e-07, "loss": 0.037, "step": 250870 }, { "epoch": 2.6804850686468296, "grad_norm": 0.18368588387966156, "learning_rate": 9.326266073136156e-07, "loss": 0.0038, "step": 250880 }, { "epoch": 2.680591911961109, "grad_norm": 3.8823864459991455, "learning_rate": 9.326181842091105e-07, "loss": 0.0281, "step": 250890 }, { "epoch": 2.6806987552753885, "grad_norm": 0.2887268662452698, "learning_rate": 9.32609760616147e-07, "loss": 0.0185, "step": 250900 }, { "epoch": 2.6808055985896684, "grad_norm": 0.008570265024900436, "learning_rate": 9.326013365347345e-07, "loss": 0.0243, "step": 250910 }, { "epoch": 2.680912441903948, "grad_norm": 1.8721626996994019, "learning_rate": 9.325929119648823e-07, "loss": 0.0342, "step": 250920 }, { "epoch": 2.6810192852182277, "grad_norm": 2.791609287261963, "learning_rate": 9.325844869066001e-07, "loss": 0.0217, "step": 250930 }, { "epoch": 2.681126128532507, "grad_norm": 0.22657495737075806, "learning_rate": 9.325760613598974e-07, "loss": 0.0052, "step": 250940 }, { "epoch": 2.6812329718467867, "grad_norm": 1.2983931303024292, "learning_rate": 9.325676353247837e-07, "loss": 0.0103, "step": 250950 }, { "epoch": 2.681339815161066, "grad_norm": 4.224930763244629, "learning_rate": 9.325592088012684e-07, "loss": 0.0061, "step": 250960 }, { "epoch": 2.681446658475346, "grad_norm": 0.006110031623393297, "learning_rate": 9.325507817893612e-07, "loss": 0.0231, "step": 250970 }, { "epoch": 2.6815535017896255, "grad_norm": 0.04221898689866066, "learning_rate": 9.325423542890715e-07, "loss": 0.0177, "step": 250980 }, { "epoch": 2.6816603451039054, "grad_norm": 0.07498538494110107, "learning_rate": 9.325339263004088e-07, "loss": 0.0167, "step": 250990 }, { "epoch": 2.681767188418185, "grad_norm": 0.5064336061477661, "learning_rate": 9.325254978233826e-07, "loss": 0.0142, "step": 251000 }, { "epoch": 2.6818740317324643, "grad_norm": 0.2937970459461212, "learning_rate": 9.325170688580026e-07, "loss": 0.0082, "step": 251010 }, { "epoch": 2.6819808750467438, "grad_norm": 0.7546499967575073, "learning_rate": 9.325086394042782e-07, "loss": 0.0158, "step": 251020 }, { "epoch": 2.6820877183610237, "grad_norm": 0.17807155847549438, "learning_rate": 9.325002094622188e-07, "loss": 0.0091, "step": 251030 }, { "epoch": 2.682194561675303, "grad_norm": 1.2962501049041748, "learning_rate": 9.32491779031834e-07, "loss": 0.0146, "step": 251040 }, { "epoch": 2.682301404989583, "grad_norm": 0.07015882432460785, "learning_rate": 9.324833481131333e-07, "loss": 0.0153, "step": 251050 }, { "epoch": 2.6824082483038625, "grad_norm": 0.03442327305674553, "learning_rate": 9.324749167061265e-07, "loss": 0.0093, "step": 251060 }, { "epoch": 2.682515091618142, "grad_norm": 1.6693397760391235, "learning_rate": 9.324664848108226e-07, "loss": 0.0024, "step": 251070 }, { "epoch": 2.6826219349324214, "grad_norm": 0.8124015927314758, "learning_rate": 9.324580524272316e-07, "loss": 0.0274, "step": 251080 }, { "epoch": 2.6827287782467013, "grad_norm": 0.009822681546211243, "learning_rate": 9.324496195553627e-07, "loss": 0.0174, "step": 251090 }, { "epoch": 2.6828356215609808, "grad_norm": 0.38007253408432007, "learning_rate": 9.324411861952256e-07, "loss": 0.0263, "step": 251100 }, { "epoch": 2.6829424648752607, "grad_norm": 0.07793211936950684, "learning_rate": 9.324327523468298e-07, "loss": 0.0286, "step": 251110 }, { "epoch": 2.68304930818954, "grad_norm": 0.03419046476483345, "learning_rate": 9.324243180101847e-07, "loss": 0.0047, "step": 251120 }, { "epoch": 2.6831561515038196, "grad_norm": 0.07505630701780319, "learning_rate": 9.324158831853e-07, "loss": 0.043, "step": 251130 }, { "epoch": 2.683262994818099, "grad_norm": 0.5580716729164124, "learning_rate": 9.32407447872185e-07, "loss": 0.0073, "step": 251140 }, { "epoch": 2.683369838132379, "grad_norm": 7.1951704025268555, "learning_rate": 9.323990120708495e-07, "loss": 0.0129, "step": 251150 }, { "epoch": 2.6834766814466584, "grad_norm": 6.615041255950928, "learning_rate": 9.323905757813028e-07, "loss": 0.0652, "step": 251160 }, { "epoch": 2.6835835247609383, "grad_norm": 1.4143192768096924, "learning_rate": 9.323821390035544e-07, "loss": 0.0309, "step": 251170 }, { "epoch": 2.6836903680752178, "grad_norm": 3.399953603744507, "learning_rate": 9.323737017376142e-07, "loss": 0.0487, "step": 251180 }, { "epoch": 2.6837972113894972, "grad_norm": 1.6035550832748413, "learning_rate": 9.323652639834914e-07, "loss": 0.0154, "step": 251190 }, { "epoch": 2.6839040547037767, "grad_norm": 2.6033473014831543, "learning_rate": 9.323568257411954e-07, "loss": 0.0125, "step": 251200 }, { "epoch": 2.6840108980180566, "grad_norm": 0.012660306878387928, "learning_rate": 9.323483870107361e-07, "loss": 0.0148, "step": 251210 }, { "epoch": 2.684117741332336, "grad_norm": 2.8105275630950928, "learning_rate": 9.323399477921227e-07, "loss": 0.0472, "step": 251220 }, { "epoch": 2.684224584646616, "grad_norm": 18.23502540588379, "learning_rate": 9.323315080853648e-07, "loss": 0.016, "step": 251230 }, { "epoch": 2.6843314279608954, "grad_norm": 1.4122165441513062, "learning_rate": 9.323230678904721e-07, "loss": 0.0252, "step": 251240 }, { "epoch": 2.684438271275175, "grad_norm": 1.1048833131790161, "learning_rate": 9.32314627207454e-07, "loss": 0.0215, "step": 251250 }, { "epoch": 2.6845451145894543, "grad_norm": 2.4121692180633545, "learning_rate": 9.323061860363202e-07, "loss": 0.0527, "step": 251260 }, { "epoch": 2.6846519579037342, "grad_norm": 0.013515514321625233, "learning_rate": 9.322977443770799e-07, "loss": 0.0249, "step": 251270 }, { "epoch": 2.6847588012180137, "grad_norm": 0.03668958693742752, "learning_rate": 9.32289302229743e-07, "loss": 0.0171, "step": 251280 }, { "epoch": 2.6848656445322936, "grad_norm": 9.413822174072266, "learning_rate": 9.322808595943188e-07, "loss": 0.0517, "step": 251290 }, { "epoch": 2.684972487846573, "grad_norm": 2.0188798904418945, "learning_rate": 9.322724164708168e-07, "loss": 0.0272, "step": 251300 }, { "epoch": 2.6850793311608525, "grad_norm": 12.277444839477539, "learning_rate": 9.322639728592468e-07, "loss": 0.0197, "step": 251310 }, { "epoch": 2.685186174475132, "grad_norm": 10.515887260437012, "learning_rate": 9.322555287596179e-07, "loss": 0.0209, "step": 251320 }, { "epoch": 2.685293017789412, "grad_norm": 0.002126601990312338, "learning_rate": 9.3224708417194e-07, "loss": 0.0312, "step": 251330 }, { "epoch": 2.6853998611036913, "grad_norm": 0.08046742528676987, "learning_rate": 9.322386390962224e-07, "loss": 0.0113, "step": 251340 }, { "epoch": 2.6855067044179712, "grad_norm": 2.4366798400878906, "learning_rate": 9.32230193532475e-07, "loss": 0.0017, "step": 251350 }, { "epoch": 2.6856135477322507, "grad_norm": 0.041441794484853745, "learning_rate": 9.322217474807069e-07, "loss": 0.0207, "step": 251360 }, { "epoch": 2.68572039104653, "grad_norm": 1.929476261138916, "learning_rate": 9.322133009409278e-07, "loss": 0.0221, "step": 251370 }, { "epoch": 2.6858272343608096, "grad_norm": 3.2640576362609863, "learning_rate": 9.322048539131474e-07, "loss": 0.0188, "step": 251380 }, { "epoch": 2.6859340776750895, "grad_norm": 0.04571322724223137, "learning_rate": 9.321964063973751e-07, "loss": 0.019, "step": 251390 }, { "epoch": 2.686040920989369, "grad_norm": 5.099975109100342, "learning_rate": 9.321879583936203e-07, "loss": 0.0119, "step": 251400 }, { "epoch": 2.686147764303649, "grad_norm": 0.5997906923294067, "learning_rate": 9.321795099018928e-07, "loss": 0.0332, "step": 251410 }, { "epoch": 2.6862546076179283, "grad_norm": 0.08171288669109344, "learning_rate": 9.32171060922202e-07, "loss": 0.0287, "step": 251420 }, { "epoch": 2.686361450932208, "grad_norm": 0.0016680730041116476, "learning_rate": 9.321626114545573e-07, "loss": 0.0136, "step": 251430 }, { "epoch": 2.6864682942464873, "grad_norm": 0.025075597688555717, "learning_rate": 9.321541614989686e-07, "loss": 0.0126, "step": 251440 }, { "epoch": 2.686575137560767, "grad_norm": 1.5182560682296753, "learning_rate": 9.321457110554451e-07, "loss": 0.0197, "step": 251450 }, { "epoch": 2.6866819808750466, "grad_norm": 1.644978642463684, "learning_rate": 9.321372601239966e-07, "loss": 0.0181, "step": 251460 }, { "epoch": 2.6867888241893265, "grad_norm": 8.156055450439453, "learning_rate": 9.321288087046323e-07, "loss": 0.069, "step": 251470 }, { "epoch": 2.686895667503606, "grad_norm": 0.13088352978229523, "learning_rate": 9.321203567973624e-07, "loss": 0.0025, "step": 251480 }, { "epoch": 2.6870025108178854, "grad_norm": 3.9878265857696533, "learning_rate": 9.321119044021956e-07, "loss": 0.0107, "step": 251490 }, { "epoch": 2.6871093541321653, "grad_norm": 0.00884808599948883, "learning_rate": 9.321034515191419e-07, "loss": 0.0284, "step": 251500 }, { "epoch": 2.687216197446445, "grad_norm": 18.347007751464844, "learning_rate": 9.320949981482109e-07, "loss": 0.0261, "step": 251510 }, { "epoch": 2.6873230407607243, "grad_norm": 0.1025727167725563, "learning_rate": 9.32086544289412e-07, "loss": 0.0073, "step": 251520 }, { "epoch": 2.687429884075004, "grad_norm": 0.017296181991696358, "learning_rate": 9.320780899427547e-07, "loss": 0.0272, "step": 251530 }, { "epoch": 2.6875367273892836, "grad_norm": 5.025025367736816, "learning_rate": 9.320696351082488e-07, "loss": 0.0303, "step": 251540 }, { "epoch": 2.687643570703563, "grad_norm": 2.873986005783081, "learning_rate": 9.320611797859035e-07, "loss": 0.0299, "step": 251550 }, { "epoch": 2.687750414017843, "grad_norm": 1.4184083938598633, "learning_rate": 9.320527239757287e-07, "loss": 0.002, "step": 251560 }, { "epoch": 2.6878572573321224, "grad_norm": 3.8501904010772705, "learning_rate": 9.320442676777336e-07, "loss": 0.0331, "step": 251570 }, { "epoch": 2.687964100646402, "grad_norm": 0.22913512587547302, "learning_rate": 9.320358108919279e-07, "loss": 0.0163, "step": 251580 }, { "epoch": 2.688070943960682, "grad_norm": 0.24928687512874603, "learning_rate": 9.320273536183213e-07, "loss": 0.013, "step": 251590 }, { "epoch": 2.6881777872749613, "grad_norm": 0.10731416940689087, "learning_rate": 9.320188958569231e-07, "loss": 0.0235, "step": 251600 }, { "epoch": 2.6882846305892407, "grad_norm": 0.008757011033594608, "learning_rate": 9.320104376077432e-07, "loss": 0.0087, "step": 251610 }, { "epoch": 2.6883914739035206, "grad_norm": 0.43950945138931274, "learning_rate": 9.320019788707905e-07, "loss": 0.0155, "step": 251620 }, { "epoch": 2.6884983172178, "grad_norm": 5.571757793426514, "learning_rate": 9.319935196460752e-07, "loss": 0.0481, "step": 251630 }, { "epoch": 2.6886051605320795, "grad_norm": 0.4385623335838318, "learning_rate": 9.319850599336065e-07, "loss": 0.0092, "step": 251640 }, { "epoch": 2.6887120038463594, "grad_norm": 0.13346974551677704, "learning_rate": 9.319765997333942e-07, "loss": 0.0121, "step": 251650 }, { "epoch": 2.688818847160639, "grad_norm": 0.04953332617878914, "learning_rate": 9.319681390454477e-07, "loss": 0.0124, "step": 251660 }, { "epoch": 2.6889256904749184, "grad_norm": 15.112855911254883, "learning_rate": 9.319596778697766e-07, "loss": 0.0147, "step": 251670 }, { "epoch": 2.6890325337891983, "grad_norm": 10.947136878967285, "learning_rate": 9.319512162063903e-07, "loss": 0.063, "step": 251680 }, { "epoch": 2.6891393771034777, "grad_norm": 0.0144267613068223, "learning_rate": 9.319427540552987e-07, "loss": 0.0134, "step": 251690 }, { "epoch": 2.6892462204177576, "grad_norm": 2.6474533081054688, "learning_rate": 9.319342914165108e-07, "loss": 0.0459, "step": 251700 }, { "epoch": 2.689353063732037, "grad_norm": 3.1389079093933105, "learning_rate": 9.319258282900366e-07, "loss": 0.0228, "step": 251710 }, { "epoch": 2.6894599070463165, "grad_norm": 0.14433586597442627, "learning_rate": 9.319173646758856e-07, "loss": 0.0234, "step": 251720 }, { "epoch": 2.689566750360596, "grad_norm": 0.30184051394462585, "learning_rate": 9.319089005740674e-07, "loss": 0.018, "step": 251730 }, { "epoch": 2.689673593674876, "grad_norm": 0.00293197063729167, "learning_rate": 9.319004359845912e-07, "loss": 0.0232, "step": 251740 }, { "epoch": 2.6897804369891554, "grad_norm": 0.0240489412099123, "learning_rate": 9.318919709074671e-07, "loss": 0.0043, "step": 251750 }, { "epoch": 2.6898872803034353, "grad_norm": 0.13149403035640717, "learning_rate": 9.318835053427041e-07, "loss": 0.0244, "step": 251760 }, { "epoch": 2.6899941236177147, "grad_norm": 4.781937599182129, "learning_rate": 9.318750392903122e-07, "loss": 0.0583, "step": 251770 }, { "epoch": 2.690100966931994, "grad_norm": 3.5179519653320312, "learning_rate": 9.318665727503008e-07, "loss": 0.0545, "step": 251780 }, { "epoch": 2.6902078102462736, "grad_norm": 0.008028236217796803, "learning_rate": 9.318581057226793e-07, "loss": 0.0261, "step": 251790 }, { "epoch": 2.6903146535605535, "grad_norm": 0.03354616463184357, "learning_rate": 9.318496382074574e-07, "loss": 0.0221, "step": 251800 }, { "epoch": 2.690421496874833, "grad_norm": 0.5758676528930664, "learning_rate": 9.318411702046449e-07, "loss": 0.0184, "step": 251810 }, { "epoch": 2.690528340189113, "grad_norm": 0.08369853347539902, "learning_rate": 9.318327017142508e-07, "loss": 0.0403, "step": 251820 }, { "epoch": 2.6906351835033924, "grad_norm": 4.048457622528076, "learning_rate": 9.318242327362853e-07, "loss": 0.0451, "step": 251830 }, { "epoch": 2.690742026817672, "grad_norm": 5.2403693199157715, "learning_rate": 9.318157632707575e-07, "loss": 0.0319, "step": 251840 }, { "epoch": 2.6908488701319513, "grad_norm": 14.79159927368164, "learning_rate": 9.318072933176771e-07, "loss": 0.0633, "step": 251850 }, { "epoch": 2.690955713446231, "grad_norm": 1.4735283851623535, "learning_rate": 9.317988228770537e-07, "loss": 0.0066, "step": 251860 }, { "epoch": 2.6910625567605106, "grad_norm": 1.867992639541626, "learning_rate": 9.317903519488969e-07, "loss": 0.0415, "step": 251870 }, { "epoch": 2.6911694000747906, "grad_norm": 11.309349060058594, "learning_rate": 9.317818805332161e-07, "loss": 0.0273, "step": 251880 }, { "epoch": 2.69127624338907, "grad_norm": 0.06175416335463524, "learning_rate": 9.317734086300211e-07, "loss": 0.0433, "step": 251890 }, { "epoch": 2.6913830867033495, "grad_norm": 6.371642112731934, "learning_rate": 9.317649362393212e-07, "loss": 0.0829, "step": 251900 }, { "epoch": 2.691489930017629, "grad_norm": 0.27579036355018616, "learning_rate": 9.317564633611261e-07, "loss": 0.0055, "step": 251910 }, { "epoch": 2.691596773331909, "grad_norm": 0.15990479290485382, "learning_rate": 9.317479899954454e-07, "loss": 0.0139, "step": 251920 }, { "epoch": 2.6917036166461883, "grad_norm": 5.619837760925293, "learning_rate": 9.317395161422887e-07, "loss": 0.0076, "step": 251930 }, { "epoch": 2.691810459960468, "grad_norm": 9.262138366699219, "learning_rate": 9.317310418016655e-07, "loss": 0.023, "step": 251940 }, { "epoch": 2.6919173032747477, "grad_norm": 0.016149692237377167, "learning_rate": 9.317225669735853e-07, "loss": 0.0089, "step": 251950 }, { "epoch": 2.692024146589027, "grad_norm": 0.33681005239486694, "learning_rate": 9.317140916580577e-07, "loss": 0.0064, "step": 251960 }, { "epoch": 2.6921309899033066, "grad_norm": 0.5224860310554504, "learning_rate": 9.317056158550924e-07, "loss": 0.0356, "step": 251970 }, { "epoch": 2.6922378332175865, "grad_norm": 0.019148744642734528, "learning_rate": 9.316971395646989e-07, "loss": 0.0309, "step": 251980 }, { "epoch": 2.692344676531866, "grad_norm": 0.30261802673339844, "learning_rate": 9.316886627868866e-07, "loss": 0.0188, "step": 251990 }, { "epoch": 2.692451519846146, "grad_norm": 0.013643934391438961, "learning_rate": 9.316801855216654e-07, "loss": 0.0103, "step": 252000 }, { "epoch": 2.6925583631604253, "grad_norm": 0.2746233642101288, "learning_rate": 9.316717077690446e-07, "loss": 0.0109, "step": 252010 }, { "epoch": 2.6926652064747048, "grad_norm": 0.029575632885098457, "learning_rate": 9.316632295290337e-07, "loss": 0.0036, "step": 252020 }, { "epoch": 2.692772049788984, "grad_norm": 0.06641560792922974, "learning_rate": 9.316547508016427e-07, "loss": 0.0163, "step": 252030 }, { "epoch": 2.692878893103264, "grad_norm": 0.01334225945174694, "learning_rate": 9.316462715868807e-07, "loss": 0.0163, "step": 252040 }, { "epoch": 2.6929857364175436, "grad_norm": 0.0029339687898755074, "learning_rate": 9.316377918847577e-07, "loss": 0.018, "step": 252050 }, { "epoch": 2.6930925797318235, "grad_norm": 1.6335755586624146, "learning_rate": 9.316293116952829e-07, "loss": 0.0127, "step": 252060 }, { "epoch": 2.693199423046103, "grad_norm": 1.879401445388794, "learning_rate": 9.316208310184661e-07, "loss": 0.0124, "step": 252070 }, { "epoch": 2.6933062663603824, "grad_norm": 0.045666467398405075, "learning_rate": 9.316123498543167e-07, "loss": 0.0154, "step": 252080 }, { "epoch": 2.693413109674662, "grad_norm": 0.005679248366504908, "learning_rate": 9.316038682028445e-07, "loss": 0.0348, "step": 252090 }, { "epoch": 2.6935199529889418, "grad_norm": 1.4215786457061768, "learning_rate": 9.31595386064059e-07, "loss": 0.0074, "step": 252100 }, { "epoch": 2.693626796303221, "grad_norm": 0.06766782701015472, "learning_rate": 9.315869034379695e-07, "loss": 0.0075, "step": 252110 }, { "epoch": 2.693733639617501, "grad_norm": 1.0088257789611816, "learning_rate": 9.31578420324586e-07, "loss": 0.03, "step": 252120 }, { "epoch": 2.6938404829317806, "grad_norm": 0.05367656797170639, "learning_rate": 9.315699367239179e-07, "loss": 0.0161, "step": 252130 }, { "epoch": 2.69394732624606, "grad_norm": 0.4335740804672241, "learning_rate": 9.315614526359747e-07, "loss": 0.0159, "step": 252140 }, { "epoch": 2.6940541695603395, "grad_norm": 10.234955787658691, "learning_rate": 9.315529680607661e-07, "loss": 0.0633, "step": 252150 }, { "epoch": 2.6941610128746194, "grad_norm": 3.262680768966675, "learning_rate": 9.315444829983017e-07, "loss": 0.0038, "step": 252160 }, { "epoch": 2.694267856188899, "grad_norm": 0.1460685431957245, "learning_rate": 9.315359974485908e-07, "loss": 0.0231, "step": 252170 }, { "epoch": 2.6943746995031788, "grad_norm": 3.641735315322876, "learning_rate": 9.315275114116433e-07, "loss": 0.0092, "step": 252180 }, { "epoch": 2.694481542817458, "grad_norm": 2.9878487586975098, "learning_rate": 9.315190248874687e-07, "loss": 0.0062, "step": 252190 }, { "epoch": 2.6945883861317377, "grad_norm": 0.675679624080658, "learning_rate": 9.315105378760765e-07, "loss": 0.0011, "step": 252200 }, { "epoch": 2.6946952294460176, "grad_norm": 1.4549129009246826, "learning_rate": 9.315020503774764e-07, "loss": 0.0089, "step": 252210 }, { "epoch": 2.694802072760297, "grad_norm": 0.026051612570881844, "learning_rate": 9.314935623916778e-07, "loss": 0.0126, "step": 252220 }, { "epoch": 2.6949089160745765, "grad_norm": 4.736518383026123, "learning_rate": 9.314850739186906e-07, "loss": 0.0464, "step": 252230 }, { "epoch": 2.6950157593888564, "grad_norm": 0.003038126975297928, "learning_rate": 9.314765849585239e-07, "loss": 0.0149, "step": 252240 }, { "epoch": 2.695122602703136, "grad_norm": 0.0066827088594436646, "learning_rate": 9.314680955111879e-07, "loss": 0.0045, "step": 252250 }, { "epoch": 2.6952294460174153, "grad_norm": 0.429961621761322, "learning_rate": 9.314596055766917e-07, "loss": 0.0062, "step": 252260 }, { "epoch": 2.6953362893316952, "grad_norm": 5.701121807098389, "learning_rate": 9.314511151550451e-07, "loss": 0.0477, "step": 252270 }, { "epoch": 2.6954431326459747, "grad_norm": 0.47198158502578735, "learning_rate": 9.314426242462574e-07, "loss": 0.0317, "step": 252280 }, { "epoch": 2.695549975960254, "grad_norm": 0.010531012900173664, "learning_rate": 9.314341328503385e-07, "loss": 0.0102, "step": 252290 }, { "epoch": 2.695656819274534, "grad_norm": 0.0067125908099114895, "learning_rate": 9.31425640967298e-07, "loss": 0.0883, "step": 252300 }, { "epoch": 2.6957636625888135, "grad_norm": 0.16591966152191162, "learning_rate": 9.314171485971454e-07, "loss": 0.0144, "step": 252310 }, { "epoch": 2.695870505903093, "grad_norm": 0.010640808381140232, "learning_rate": 9.314086557398903e-07, "loss": 0.0045, "step": 252320 }, { "epoch": 2.695977349217373, "grad_norm": 0.005589188542217016, "learning_rate": 9.314001623955422e-07, "loss": 0.0093, "step": 252330 }, { "epoch": 2.6960841925316523, "grad_norm": 1.555187463760376, "learning_rate": 9.313916685641108e-07, "loss": 0.0494, "step": 252340 }, { "epoch": 2.696191035845932, "grad_norm": 0.018548976629972458, "learning_rate": 9.313831742456055e-07, "loss": 0.0093, "step": 252350 }, { "epoch": 2.6962978791602117, "grad_norm": 0.00591783132404089, "learning_rate": 9.313746794400362e-07, "loss": 0.0163, "step": 252360 }, { "epoch": 2.696404722474491, "grad_norm": 0.08930188417434692, "learning_rate": 9.313661841474124e-07, "loss": 0.0051, "step": 252370 }, { "epoch": 2.6965115657887706, "grad_norm": 0.07564949989318848, "learning_rate": 9.313576883677434e-07, "loss": 0.0096, "step": 252380 }, { "epoch": 2.6966184091030505, "grad_norm": 5.4488301277160645, "learning_rate": 9.313491921010392e-07, "loss": 0.0437, "step": 252390 }, { "epoch": 2.69672525241733, "grad_norm": 0.3138065040111542, "learning_rate": 9.31340695347309e-07, "loss": 0.0153, "step": 252400 }, { "epoch": 2.69683209573161, "grad_norm": 0.7480343580245972, "learning_rate": 9.313321981065628e-07, "loss": 0.0011, "step": 252410 }, { "epoch": 2.6969389390458893, "grad_norm": 0.0027497473638504744, "learning_rate": 9.3132370037881e-07, "loss": 0.0063, "step": 252420 }, { "epoch": 2.697045782360169, "grad_norm": 0.026956088840961456, "learning_rate": 9.3131520216406e-07, "loss": 0.0595, "step": 252430 }, { "epoch": 2.6971526256744482, "grad_norm": 0.7584787011146545, "learning_rate": 9.313067034623228e-07, "loss": 0.0489, "step": 252440 }, { "epoch": 2.697259468988728, "grad_norm": 0.0023868258576840162, "learning_rate": 9.312982042736078e-07, "loss": 0.0207, "step": 252450 }, { "epoch": 2.6973663123030076, "grad_norm": 0.3955593407154083, "learning_rate": 9.312897045979244e-07, "loss": 0.005, "step": 252460 }, { "epoch": 2.6974731556172875, "grad_norm": 0.008477653376758099, "learning_rate": 9.312812044352824e-07, "loss": 0.0152, "step": 252470 }, { "epoch": 2.697579998931567, "grad_norm": 4.948469161987305, "learning_rate": 9.312727037856913e-07, "loss": 0.0199, "step": 252480 }, { "epoch": 2.6976868422458464, "grad_norm": 6.918609142303467, "learning_rate": 9.312642026491608e-07, "loss": 0.0068, "step": 252490 }, { "epoch": 2.697793685560126, "grad_norm": 0.9016776084899902, "learning_rate": 9.312557010257006e-07, "loss": 0.0162, "step": 252500 }, { "epoch": 2.697900528874406, "grad_norm": 1.457783818244934, "learning_rate": 9.312471989153201e-07, "loss": 0.0087, "step": 252510 }, { "epoch": 2.6980073721886852, "grad_norm": 4.322230815887451, "learning_rate": 9.31238696318029e-07, "loss": 0.062, "step": 252520 }, { "epoch": 2.698114215502965, "grad_norm": 7.935399055480957, "learning_rate": 9.312301932338368e-07, "loss": 0.0203, "step": 252530 }, { "epoch": 2.6982210588172446, "grad_norm": 1.3181304931640625, "learning_rate": 9.312216896627532e-07, "loss": 0.0542, "step": 252540 }, { "epoch": 2.698327902131524, "grad_norm": 0.0180352833122015, "learning_rate": 9.312131856047878e-07, "loss": 0.0086, "step": 252550 }, { "epoch": 2.6984347454458035, "grad_norm": 0.1735195815563202, "learning_rate": 9.3120468105995e-07, "loss": 0.0145, "step": 252560 }, { "epoch": 2.6985415887600834, "grad_norm": 1.3812819719314575, "learning_rate": 9.311961760282497e-07, "loss": 0.0359, "step": 252570 }, { "epoch": 2.698648432074363, "grad_norm": 0.10280618071556091, "learning_rate": 9.311876705096964e-07, "loss": 0.0177, "step": 252580 }, { "epoch": 2.698755275388643, "grad_norm": 4.6775689125061035, "learning_rate": 9.311791645042997e-07, "loss": 0.017, "step": 252590 }, { "epoch": 2.6988621187029223, "grad_norm": 0.014028603211045265, "learning_rate": 9.311706580120691e-07, "loss": 0.0167, "step": 252600 }, { "epoch": 2.6989689620172017, "grad_norm": 0.45406419038772583, "learning_rate": 9.311621510330143e-07, "loss": 0.0193, "step": 252610 }, { "epoch": 2.699075805331481, "grad_norm": 0.0029793502762913704, "learning_rate": 9.311536435671449e-07, "loss": 0.0211, "step": 252620 }, { "epoch": 2.699182648645761, "grad_norm": 1.7392055988311768, "learning_rate": 9.311451356144705e-07, "loss": 0.0335, "step": 252630 }, { "epoch": 2.6992894919600405, "grad_norm": 2.925997257232666, "learning_rate": 9.311366271750008e-07, "loss": 0.013, "step": 252640 }, { "epoch": 2.6993963352743204, "grad_norm": 0.023966273292899132, "learning_rate": 9.311281182487452e-07, "loss": 0.0367, "step": 252650 }, { "epoch": 2.6995031785886, "grad_norm": 3.6852712631225586, "learning_rate": 9.311196088357134e-07, "loss": 0.0194, "step": 252660 }, { "epoch": 2.6996100219028794, "grad_norm": 0.0566670261323452, "learning_rate": 9.31111098935915e-07, "loss": 0.0109, "step": 252670 }, { "epoch": 2.699716865217159, "grad_norm": 1.282576560974121, "learning_rate": 9.311025885493597e-07, "loss": 0.0122, "step": 252680 }, { "epoch": 2.6998237085314387, "grad_norm": 0.752130389213562, "learning_rate": 9.31094077676057e-07, "loss": 0.0196, "step": 252690 }, { "epoch": 2.699930551845718, "grad_norm": 0.025943342596292496, "learning_rate": 9.310855663160166e-07, "loss": 0.0011, "step": 252700 }, { "epoch": 2.700037395159998, "grad_norm": 0.10344500839710236, "learning_rate": 9.310770544692478e-07, "loss": 0.0099, "step": 252710 }, { "epoch": 2.7001442384742775, "grad_norm": 0.01053309440612793, "learning_rate": 9.31068542135761e-07, "loss": 0.0092, "step": 252720 }, { "epoch": 2.700251081788557, "grad_norm": 8.610591888427734, "learning_rate": 9.310600293155648e-07, "loss": 0.0344, "step": 252730 }, { "epoch": 2.7003579251028365, "grad_norm": 0.05211038514971733, "learning_rate": 9.310515160086694e-07, "loss": 0.0244, "step": 252740 }, { "epoch": 2.7004647684171164, "grad_norm": 2.3545002937316895, "learning_rate": 9.310430022150846e-07, "loss": 0.017, "step": 252750 }, { "epoch": 2.700571611731396, "grad_norm": 0.6521009802818298, "learning_rate": 9.310344879348195e-07, "loss": 0.0298, "step": 252760 }, { "epoch": 2.7006784550456757, "grad_norm": 0.4063272774219513, "learning_rate": 9.310259731678838e-07, "loss": 0.0152, "step": 252770 }, { "epoch": 2.700785298359955, "grad_norm": 4.169251441955566, "learning_rate": 9.310174579142874e-07, "loss": 0.0348, "step": 252780 }, { "epoch": 2.7008921416742346, "grad_norm": 4.707153797149658, "learning_rate": 9.310089421740396e-07, "loss": 0.0072, "step": 252790 }, { "epoch": 2.700998984988514, "grad_norm": 3.023503541946411, "learning_rate": 9.310004259471505e-07, "loss": 0.0255, "step": 252800 }, { "epoch": 2.701105828302794, "grad_norm": 0.027982158586382866, "learning_rate": 9.309919092336291e-07, "loss": 0.0279, "step": 252810 }, { "epoch": 2.7012126716170735, "grad_norm": 4.255123138427734, "learning_rate": 9.309833920334854e-07, "loss": 0.0922, "step": 252820 }, { "epoch": 2.7013195149313534, "grad_norm": 2.3288166522979736, "learning_rate": 9.30974874346729e-07, "loss": 0.0377, "step": 252830 }, { "epoch": 2.701426358245633, "grad_norm": 10.47774887084961, "learning_rate": 9.309663561733694e-07, "loss": 0.0454, "step": 252840 }, { "epoch": 2.7015332015599123, "grad_norm": 9.973814010620117, "learning_rate": 9.309578375134162e-07, "loss": 0.0302, "step": 252850 }, { "epoch": 2.7016400448741917, "grad_norm": 0.029857220128178596, "learning_rate": 9.309493183668791e-07, "loss": 0.0153, "step": 252860 }, { "epoch": 2.7017468881884716, "grad_norm": 2.846083641052246, "learning_rate": 9.309407987337676e-07, "loss": 0.0187, "step": 252870 }, { "epoch": 2.701853731502751, "grad_norm": 11.502699851989746, "learning_rate": 9.309322786140916e-07, "loss": 0.0077, "step": 252880 }, { "epoch": 2.701960574817031, "grad_norm": 0.23859338462352753, "learning_rate": 9.309237580078604e-07, "loss": 0.005, "step": 252890 }, { "epoch": 2.7020674181313105, "grad_norm": 0.15443475544452667, "learning_rate": 9.309152369150837e-07, "loss": 0.0163, "step": 252900 }, { "epoch": 2.70217426144559, "grad_norm": 0.009597049094736576, "learning_rate": 9.309067153357713e-07, "loss": 0.0044, "step": 252910 }, { "epoch": 2.7022811047598694, "grad_norm": 10.717503547668457, "learning_rate": 9.308981932699327e-07, "loss": 0.0142, "step": 252920 }, { "epoch": 2.7023879480741493, "grad_norm": 0.07754898816347122, "learning_rate": 9.308896707175775e-07, "loss": 0.0188, "step": 252930 }, { "epoch": 2.7024947913884287, "grad_norm": 0.007905426435172558, "learning_rate": 9.308811476787153e-07, "loss": 0.0621, "step": 252940 }, { "epoch": 2.7026016347027086, "grad_norm": 0.9160811305046082, "learning_rate": 9.308726241533558e-07, "loss": 0.0094, "step": 252950 }, { "epoch": 2.702708478016988, "grad_norm": 1.9770711660385132, "learning_rate": 9.308641001415087e-07, "loss": 0.0099, "step": 252960 }, { "epoch": 2.7028153213312676, "grad_norm": 20.497777938842773, "learning_rate": 9.308555756431833e-07, "loss": 0.0052, "step": 252970 }, { "epoch": 2.7029221646455475, "grad_norm": 6.228860855102539, "learning_rate": 9.308470506583895e-07, "loss": 0.0108, "step": 252980 }, { "epoch": 2.703029007959827, "grad_norm": 5.9502177238464355, "learning_rate": 9.308385251871369e-07, "loss": 0.0108, "step": 252990 }, { "epoch": 2.7031358512741064, "grad_norm": 0.011623360216617584, "learning_rate": 9.308299992294351e-07, "loss": 0.0038, "step": 253000 }, { "epoch": 2.7032426945883863, "grad_norm": 0.11477022618055344, "learning_rate": 9.308214727852937e-07, "loss": 0.0423, "step": 253010 }, { "epoch": 2.7033495379026657, "grad_norm": 0.23748093843460083, "learning_rate": 9.308129458547224e-07, "loss": 0.0246, "step": 253020 }, { "epoch": 2.703456381216945, "grad_norm": 0.07933231443166733, "learning_rate": 9.308044184377306e-07, "loss": 0.0679, "step": 253030 }, { "epoch": 2.703563224531225, "grad_norm": 2.5685784816741943, "learning_rate": 9.307958905343283e-07, "loss": 0.0067, "step": 253040 }, { "epoch": 2.7036700678455046, "grad_norm": 0.7513266205787659, "learning_rate": 9.307873621445248e-07, "loss": 0.0353, "step": 253050 }, { "epoch": 2.703776911159784, "grad_norm": 1.3417407274246216, "learning_rate": 9.307788332683298e-07, "loss": 0.0208, "step": 253060 }, { "epoch": 2.703883754474064, "grad_norm": 0.20797070860862732, "learning_rate": 9.307703039057531e-07, "loss": 0.0287, "step": 253070 }, { "epoch": 2.7039905977883434, "grad_norm": 8.689962387084961, "learning_rate": 9.307617740568042e-07, "loss": 0.0351, "step": 253080 }, { "epoch": 2.704097441102623, "grad_norm": 0.5549323558807373, "learning_rate": 9.307532437214928e-07, "loss": 0.0456, "step": 253090 }, { "epoch": 2.7042042844169027, "grad_norm": 0.11223696917295456, "learning_rate": 9.307447128998283e-07, "loss": 0.0163, "step": 253100 }, { "epoch": 2.704311127731182, "grad_norm": 0.022247156128287315, "learning_rate": 9.307361815918206e-07, "loss": 0.0133, "step": 253110 }, { "epoch": 2.7044179710454617, "grad_norm": 0.1370660662651062, "learning_rate": 9.307276497974793e-07, "loss": 0.0199, "step": 253120 }, { "epoch": 2.7045248143597416, "grad_norm": 0.006864581722766161, "learning_rate": 9.307191175168139e-07, "loss": 0.0466, "step": 253130 }, { "epoch": 2.704631657674021, "grad_norm": 0.013280356302857399, "learning_rate": 9.307105847498342e-07, "loss": 0.0309, "step": 253140 }, { "epoch": 2.7047385009883005, "grad_norm": 1.9391498565673828, "learning_rate": 9.307020514965496e-07, "loss": 0.0086, "step": 253150 }, { "epoch": 2.7048453443025804, "grad_norm": 0.14935895800590515, "learning_rate": 9.3069351775697e-07, "loss": 0.0093, "step": 253160 }, { "epoch": 2.70495218761686, "grad_norm": 1.6711223125457764, "learning_rate": 9.306849835311049e-07, "loss": 0.0029, "step": 253170 }, { "epoch": 2.7050590309311398, "grad_norm": 5.340949058532715, "learning_rate": 9.306764488189639e-07, "loss": 0.0101, "step": 253180 }, { "epoch": 2.705165874245419, "grad_norm": 0.004627961665391922, "learning_rate": 9.306679136205568e-07, "loss": 0.0049, "step": 253190 }, { "epoch": 2.7052727175596987, "grad_norm": 0.35380256175994873, "learning_rate": 9.306593779358929e-07, "loss": 0.0038, "step": 253200 }, { "epoch": 2.705379560873978, "grad_norm": 0.010074623860418797, "learning_rate": 9.306508417649823e-07, "loss": 0.0504, "step": 253210 }, { "epoch": 2.705486404188258, "grad_norm": 0.04808947071433067, "learning_rate": 9.306423051078343e-07, "loss": 0.0136, "step": 253220 }, { "epoch": 2.7055932475025375, "grad_norm": 5.0255279541015625, "learning_rate": 9.306337679644585e-07, "loss": 0.0581, "step": 253230 }, { "epoch": 2.7057000908168174, "grad_norm": 4.077236175537109, "learning_rate": 9.306252303348648e-07, "loss": 0.0328, "step": 253240 }, { "epoch": 2.705806934131097, "grad_norm": 0.02247629128396511, "learning_rate": 9.306166922190628e-07, "loss": 0.0253, "step": 253250 }, { "epoch": 2.7059137774453763, "grad_norm": 0.06241516023874283, "learning_rate": 9.306081536170619e-07, "loss": 0.0045, "step": 253260 }, { "epoch": 2.7060206207596558, "grad_norm": 0.2054673582315445, "learning_rate": 9.305996145288719e-07, "loss": 0.0287, "step": 253270 }, { "epoch": 2.7061274640739357, "grad_norm": 3.837723970413208, "learning_rate": 9.305910749545025e-07, "loss": 0.0137, "step": 253280 }, { "epoch": 2.706234307388215, "grad_norm": 12.269326210021973, "learning_rate": 9.305825348939633e-07, "loss": 0.0082, "step": 253290 }, { "epoch": 2.706341150702495, "grad_norm": 0.14217868447303772, "learning_rate": 9.30573994347264e-07, "loss": 0.0166, "step": 253300 }, { "epoch": 2.7064479940167745, "grad_norm": 0.849828839302063, "learning_rate": 9.305654533144139e-07, "loss": 0.0528, "step": 253310 }, { "epoch": 2.706554837331054, "grad_norm": 0.7353847026824951, "learning_rate": 9.305569117954231e-07, "loss": 0.0154, "step": 253320 }, { "epoch": 2.7066616806453334, "grad_norm": 2.7707462310791016, "learning_rate": 9.305483697903011e-07, "loss": 0.0037, "step": 253330 }, { "epoch": 2.7067685239596133, "grad_norm": 9.213728904724121, "learning_rate": 9.305398272990573e-07, "loss": 0.0193, "step": 253340 }, { "epoch": 2.7068753672738928, "grad_norm": 0.04402207210659981, "learning_rate": 9.305312843217017e-07, "loss": 0.0224, "step": 253350 }, { "epoch": 2.7069822105881727, "grad_norm": 0.028311988338828087, "learning_rate": 9.305227408582438e-07, "loss": 0.0117, "step": 253360 }, { "epoch": 2.707089053902452, "grad_norm": 1.1303726434707642, "learning_rate": 9.305141969086932e-07, "loss": 0.0331, "step": 253370 }, { "epoch": 2.7071958972167316, "grad_norm": 0.12463017553091049, "learning_rate": 9.305056524730594e-07, "loss": 0.0319, "step": 253380 }, { "epoch": 2.707302740531011, "grad_norm": 0.2618143558502197, "learning_rate": 9.304971075513525e-07, "loss": 0.0175, "step": 253390 }, { "epoch": 2.707409583845291, "grad_norm": 6.751891136169434, "learning_rate": 9.304885621435817e-07, "loss": 0.0063, "step": 253400 }, { "epoch": 2.7075164271595704, "grad_norm": 0.667292058467865, "learning_rate": 9.304800162497568e-07, "loss": 0.0404, "step": 253410 }, { "epoch": 2.7076232704738503, "grad_norm": 1.9872910976409912, "learning_rate": 9.304714698698877e-07, "loss": 0.018, "step": 253420 }, { "epoch": 2.7077301137881298, "grad_norm": 0.015905937179923058, "learning_rate": 9.304629230039836e-07, "loss": 0.0098, "step": 253430 }, { "epoch": 2.7078369571024092, "grad_norm": 0.1393580436706543, "learning_rate": 9.304543756520544e-07, "loss": 0.0201, "step": 253440 }, { "epoch": 2.7079438004166887, "grad_norm": 3.2914235591888428, "learning_rate": 9.304458278141099e-07, "loss": 0.0287, "step": 253450 }, { "epoch": 2.7080506437309686, "grad_norm": 13.535929679870605, "learning_rate": 9.304372794901594e-07, "loss": 0.0166, "step": 253460 }, { "epoch": 2.708157487045248, "grad_norm": 3.555330276489258, "learning_rate": 9.304287306802126e-07, "loss": 0.022, "step": 253470 }, { "epoch": 2.708264330359528, "grad_norm": 0.017951227724552155, "learning_rate": 9.304201813842794e-07, "loss": 0.0178, "step": 253480 }, { "epoch": 2.7083711736738074, "grad_norm": 8.087503433227539, "learning_rate": 9.304116316023695e-07, "loss": 0.0149, "step": 253490 }, { "epoch": 2.708478016988087, "grad_norm": 2.1071717739105225, "learning_rate": 9.304030813344922e-07, "loss": 0.0413, "step": 253500 }, { "epoch": 2.7085848603023663, "grad_norm": 0.02456638403236866, "learning_rate": 9.303945305806573e-07, "loss": 0.0232, "step": 253510 }, { "epoch": 2.7086917036166462, "grad_norm": 0.5371128916740417, "learning_rate": 9.303859793408746e-07, "loss": 0.0146, "step": 253520 }, { "epoch": 2.7087985469309257, "grad_norm": 0.033315662294626236, "learning_rate": 9.303774276151537e-07, "loss": 0.0108, "step": 253530 }, { "epoch": 2.7089053902452056, "grad_norm": 1.9723416566848755, "learning_rate": 9.303688754035041e-07, "loss": 0.0392, "step": 253540 }, { "epoch": 2.709012233559485, "grad_norm": 11.424308776855469, "learning_rate": 9.303603227059356e-07, "loss": 0.0193, "step": 253550 }, { "epoch": 2.7091190768737645, "grad_norm": 0.23425602912902832, "learning_rate": 9.303517695224578e-07, "loss": 0.096, "step": 253560 }, { "epoch": 2.709225920188044, "grad_norm": 0.13352471590042114, "learning_rate": 9.303432158530804e-07, "loss": 0.0379, "step": 253570 }, { "epoch": 2.709332763502324, "grad_norm": 1.0379339456558228, "learning_rate": 9.303346616978129e-07, "loss": 0.0137, "step": 253580 }, { "epoch": 2.7094396068166033, "grad_norm": 0.017370114102959633, "learning_rate": 9.303261070566653e-07, "loss": 0.0281, "step": 253590 }, { "epoch": 2.7095464501308832, "grad_norm": 10.318358421325684, "learning_rate": 9.303175519296469e-07, "loss": 0.016, "step": 253600 }, { "epoch": 2.7096532934451627, "grad_norm": 0.02461881749331951, "learning_rate": 9.303089963167675e-07, "loss": 0.0028, "step": 253610 }, { "epoch": 2.709760136759442, "grad_norm": 3.8072867393493652, "learning_rate": 9.303004402180368e-07, "loss": 0.0103, "step": 253620 }, { "epoch": 2.7098669800737216, "grad_norm": 0.3243841826915741, "learning_rate": 9.302918836334645e-07, "loss": 0.0021, "step": 253630 }, { "epoch": 2.7099738233880015, "grad_norm": 0.2832408845424652, "learning_rate": 9.302833265630601e-07, "loss": 0.0226, "step": 253640 }, { "epoch": 2.710080666702281, "grad_norm": 0.05643295869231224, "learning_rate": 9.302747690068333e-07, "loss": 0.0084, "step": 253650 }, { "epoch": 2.710187510016561, "grad_norm": 2.161660671234131, "learning_rate": 9.30266210964794e-07, "loss": 0.0071, "step": 253660 }, { "epoch": 2.7102943533308403, "grad_norm": 3.6956722736358643, "learning_rate": 9.302576524369514e-07, "loss": 0.0157, "step": 253670 }, { "epoch": 2.71040119664512, "grad_norm": 1.017552375793457, "learning_rate": 9.302490934233157e-07, "loss": 0.029, "step": 253680 }, { "epoch": 2.7105080399593997, "grad_norm": 4.289062023162842, "learning_rate": 9.302405339238961e-07, "loss": 0.02, "step": 253690 }, { "epoch": 2.710614883273679, "grad_norm": 0.007753532379865646, "learning_rate": 9.302319739387027e-07, "loss": 0.0139, "step": 253700 }, { "epoch": 2.7107217265879586, "grad_norm": 8.086509704589844, "learning_rate": 9.302234134677446e-07, "loss": 0.0464, "step": 253710 }, { "epoch": 2.7108285699022385, "grad_norm": 0.021522624418139458, "learning_rate": 9.302148525110321e-07, "loss": 0.0343, "step": 253720 }, { "epoch": 2.710935413216518, "grad_norm": 1.703066349029541, "learning_rate": 9.302062910685745e-07, "loss": 0.0843, "step": 253730 }, { "epoch": 2.7110422565307974, "grad_norm": 0.22992579638957977, "learning_rate": 9.301977291403815e-07, "loss": 0.0147, "step": 253740 }, { "epoch": 2.7111490998450773, "grad_norm": 2.4142229557037354, "learning_rate": 9.301891667264628e-07, "loss": 0.0221, "step": 253750 }, { "epoch": 2.711255943159357, "grad_norm": 0.02666638419032097, "learning_rate": 9.30180603826828e-07, "loss": 0.013, "step": 253760 }, { "epoch": 2.7113627864736363, "grad_norm": 1.6474080085754395, "learning_rate": 9.301720404414868e-07, "loss": 0.003, "step": 253770 }, { "epoch": 2.711469629787916, "grad_norm": 0.5989476442337036, "learning_rate": 9.301634765704489e-07, "loss": 0.0092, "step": 253780 }, { "epoch": 2.7115764731021956, "grad_norm": 8.859630584716797, "learning_rate": 9.301549122137241e-07, "loss": 0.0039, "step": 253790 }, { "epoch": 2.711683316416475, "grad_norm": 0.017865410074591637, "learning_rate": 9.301463473713219e-07, "loss": 0.0151, "step": 253800 }, { "epoch": 2.711790159730755, "grad_norm": 0.18253299593925476, "learning_rate": 9.30137782043252e-07, "loss": 0.013, "step": 253810 }, { "epoch": 2.7118970030450344, "grad_norm": 0.05915931984782219, "learning_rate": 9.30129216229524e-07, "loss": 0.054, "step": 253820 }, { "epoch": 2.712003846359314, "grad_norm": 1.0492647886276245, "learning_rate": 9.301206499301477e-07, "loss": 0.0034, "step": 253830 }, { "epoch": 2.712110689673594, "grad_norm": 0.20739145576953888, "learning_rate": 9.301120831451327e-07, "loss": 0.0598, "step": 253840 }, { "epoch": 2.7122175329878733, "grad_norm": 0.2692627012729645, "learning_rate": 9.301035158744889e-07, "loss": 0.0392, "step": 253850 }, { "epoch": 2.7123243763021527, "grad_norm": 4.239377021789551, "learning_rate": 9.300949481182256e-07, "loss": 0.0101, "step": 253860 }, { "epoch": 2.7124312196164326, "grad_norm": 0.0038511513266712427, "learning_rate": 9.300863798763525e-07, "loss": 0.0426, "step": 253870 }, { "epoch": 2.712538062930712, "grad_norm": 0.0075101424008607864, "learning_rate": 9.300778111488796e-07, "loss": 0.0557, "step": 253880 }, { "epoch": 2.712644906244992, "grad_norm": 3.0831594467163086, "learning_rate": 9.300692419358165e-07, "loss": 0.009, "step": 253890 }, { "epoch": 2.7127517495592715, "grad_norm": 3.297703266143799, "learning_rate": 9.300606722371726e-07, "loss": 0.057, "step": 253900 }, { "epoch": 2.712858592873551, "grad_norm": 1.9712374210357666, "learning_rate": 9.300521020529577e-07, "loss": 0.0664, "step": 253910 }, { "epoch": 2.7129654361878304, "grad_norm": 0.10069791227579117, "learning_rate": 9.300435313831815e-07, "loss": 0.0212, "step": 253920 }, { "epoch": 2.7130722795021103, "grad_norm": 14.020626068115234, "learning_rate": 9.300349602278538e-07, "loss": 0.0059, "step": 253930 }, { "epoch": 2.7131791228163897, "grad_norm": 4.850493431091309, "learning_rate": 9.300263885869841e-07, "loss": 0.0085, "step": 253940 }, { "epoch": 2.7132859661306696, "grad_norm": 0.061322685331106186, "learning_rate": 9.300178164605823e-07, "loss": 0.014, "step": 253950 }, { "epoch": 2.713392809444949, "grad_norm": 0.01535781193524599, "learning_rate": 9.300092438486578e-07, "loss": 0.0344, "step": 253960 }, { "epoch": 2.7134996527592286, "grad_norm": 0.03246350586414337, "learning_rate": 9.300006707512205e-07, "loss": 0.0229, "step": 253970 }, { "epoch": 2.713606496073508, "grad_norm": 0.007937599904835224, "learning_rate": 9.299920971682799e-07, "loss": 0.0306, "step": 253980 }, { "epoch": 2.713713339387788, "grad_norm": 2.7557668685913086, "learning_rate": 9.299835230998458e-07, "loss": 0.0116, "step": 253990 }, { "epoch": 2.7138201827020674, "grad_norm": 0.2092251032590866, "learning_rate": 9.299749485459278e-07, "loss": 0.0258, "step": 254000 }, { "epoch": 2.7139270260163473, "grad_norm": 1.1744790077209473, "learning_rate": 9.299663735065357e-07, "loss": 0.0046, "step": 254010 }, { "epoch": 2.7140338693306267, "grad_norm": 0.006386518944054842, "learning_rate": 9.299577979816793e-07, "loss": 0.0133, "step": 254020 }, { "epoch": 2.714140712644906, "grad_norm": 1.584254503250122, "learning_rate": 9.299492219713679e-07, "loss": 0.0119, "step": 254030 }, { "epoch": 2.7142475559591857, "grad_norm": 2.8691720962524414, "learning_rate": 9.299406454756113e-07, "loss": 0.008, "step": 254040 }, { "epoch": 2.7143543992734656, "grad_norm": 8.091835021972656, "learning_rate": 9.299320684944193e-07, "loss": 0.0147, "step": 254050 }, { "epoch": 2.714461242587745, "grad_norm": 0.9759195446968079, "learning_rate": 9.299234910278016e-07, "loss": 0.0067, "step": 254060 }, { "epoch": 2.714568085902025, "grad_norm": 0.13198387622833252, "learning_rate": 9.299149130757677e-07, "loss": 0.0133, "step": 254070 }, { "epoch": 2.7146749292163044, "grad_norm": 0.994999885559082, "learning_rate": 9.299063346383276e-07, "loss": 0.0324, "step": 254080 }, { "epoch": 2.714781772530584, "grad_norm": 0.22724701464176178, "learning_rate": 9.298977557154907e-07, "loss": 0.0075, "step": 254090 }, { "epoch": 2.7148886158448633, "grad_norm": 0.029071735218167305, "learning_rate": 9.298891763072669e-07, "loss": 0.009, "step": 254100 }, { "epoch": 2.714995459159143, "grad_norm": 2.053499698638916, "learning_rate": 9.298805964136656e-07, "loss": 0.0037, "step": 254110 }, { "epoch": 2.7151023024734227, "grad_norm": 4.210749626159668, "learning_rate": 9.298720160346969e-07, "loss": 0.044, "step": 254120 }, { "epoch": 2.7152091457877026, "grad_norm": 0.03557974472641945, "learning_rate": 9.2986343517037e-07, "loss": 0.0422, "step": 254130 }, { "epoch": 2.715315989101982, "grad_norm": 1.5863546133041382, "learning_rate": 9.29854853820695e-07, "loss": 0.0264, "step": 254140 }, { "epoch": 2.7154228324162615, "grad_norm": 0.019593140110373497, "learning_rate": 9.298462719856813e-07, "loss": 0.0104, "step": 254150 }, { "epoch": 2.715529675730541, "grad_norm": 0.16019728779792786, "learning_rate": 9.298376896653388e-07, "loss": 0.0055, "step": 254160 }, { "epoch": 2.715636519044821, "grad_norm": 0.6580671072006226, "learning_rate": 9.298291068596771e-07, "loss": 0.017, "step": 254170 }, { "epoch": 2.7157433623591003, "grad_norm": 4.3600053787231445, "learning_rate": 9.298205235687059e-07, "loss": 0.0126, "step": 254180 }, { "epoch": 2.71585020567338, "grad_norm": 0.26969078183174133, "learning_rate": 9.298119397924349e-07, "loss": 0.0286, "step": 254190 }, { "epoch": 2.7159570489876597, "grad_norm": 0.12037378549575806, "learning_rate": 9.298033555308737e-07, "loss": 0.0098, "step": 254200 }, { "epoch": 2.716063892301939, "grad_norm": 0.02893097512423992, "learning_rate": 9.297947707840321e-07, "loss": 0.0255, "step": 254210 }, { "epoch": 2.7161707356162186, "grad_norm": 0.44442638754844666, "learning_rate": 9.297861855519199e-07, "loss": 0.0101, "step": 254220 }, { "epoch": 2.7162775789304985, "grad_norm": 0.2231091856956482, "learning_rate": 9.297775998345465e-07, "loss": 0.0025, "step": 254230 }, { "epoch": 2.716384422244778, "grad_norm": 0.014515819028019905, "learning_rate": 9.297690136319218e-07, "loss": 0.0086, "step": 254240 }, { "epoch": 2.716491265559058, "grad_norm": 0.6925739645957947, "learning_rate": 9.297604269440556e-07, "loss": 0.0092, "step": 254250 }, { "epoch": 2.7165981088733373, "grad_norm": 3.8016531467437744, "learning_rate": 9.297518397709572e-07, "loss": 0.0121, "step": 254260 }, { "epoch": 2.7167049521876168, "grad_norm": 0.065559983253479, "learning_rate": 9.297432521126367e-07, "loss": 0.0033, "step": 254270 }, { "epoch": 2.716811795501896, "grad_norm": 0.01779475249350071, "learning_rate": 9.297346639691034e-07, "loss": 0.009, "step": 254280 }, { "epoch": 2.716918638816176, "grad_norm": 0.003428239608183503, "learning_rate": 9.297260753403675e-07, "loss": 0.0065, "step": 254290 }, { "epoch": 2.7170254821304556, "grad_norm": 0.001389765995554626, "learning_rate": 9.297174862264384e-07, "loss": 0.0451, "step": 254300 }, { "epoch": 2.7171323254447355, "grad_norm": 3.590909481048584, "learning_rate": 9.297088966273257e-07, "loss": 0.0189, "step": 254310 }, { "epoch": 2.717239168759015, "grad_norm": 4.673320293426514, "learning_rate": 9.297003065430393e-07, "loss": 0.0314, "step": 254320 }, { "epoch": 2.7173460120732944, "grad_norm": 0.008771821856498718, "learning_rate": 9.296917159735889e-07, "loss": 0.0047, "step": 254330 }, { "epoch": 2.717452855387574, "grad_norm": 0.0015522140311077237, "learning_rate": 9.29683124918984e-07, "loss": 0.0097, "step": 254340 }, { "epoch": 2.7175596987018538, "grad_norm": 0.010078336112201214, "learning_rate": 9.296745333792345e-07, "loss": 0.0168, "step": 254350 }, { "epoch": 2.7176665420161332, "grad_norm": 14.892067909240723, "learning_rate": 9.2966594135435e-07, "loss": 0.0067, "step": 254360 }, { "epoch": 2.717773385330413, "grad_norm": 0.0026963793206959963, "learning_rate": 9.296573488443401e-07, "loss": 0.0114, "step": 254370 }, { "epoch": 2.7178802286446926, "grad_norm": 0.5952342748641968, "learning_rate": 9.296487558492148e-07, "loss": 0.0107, "step": 254380 }, { "epoch": 2.717987071958972, "grad_norm": 0.3002203702926636, "learning_rate": 9.296401623689835e-07, "loss": 0.0645, "step": 254390 }, { "epoch": 2.7180939152732515, "grad_norm": 0.39999744296073914, "learning_rate": 9.29631568403656e-07, "loss": 0.0013, "step": 254400 }, { "epoch": 2.7182007585875314, "grad_norm": 0.006362252403050661, "learning_rate": 9.296229739532422e-07, "loss": 0.0109, "step": 254410 }, { "epoch": 2.718307601901811, "grad_norm": 1.1168763637542725, "learning_rate": 9.296143790177515e-07, "loss": 0.0164, "step": 254420 }, { "epoch": 2.7184144452160908, "grad_norm": 3.725696563720703, "learning_rate": 9.296057835971938e-07, "loss": 0.0402, "step": 254430 }, { "epoch": 2.7185212885303702, "grad_norm": 0.054144229739904404, "learning_rate": 9.295971876915787e-07, "loss": 0.0105, "step": 254440 }, { "epoch": 2.7186281318446497, "grad_norm": 4.832393646240234, "learning_rate": 9.29588591300916e-07, "loss": 0.0084, "step": 254450 }, { "epoch": 2.7187349751589296, "grad_norm": 1.3158713579177856, "learning_rate": 9.295799944252154e-07, "loss": 0.0145, "step": 254460 }, { "epoch": 2.718841818473209, "grad_norm": 0.10124983638525009, "learning_rate": 9.295713970644865e-07, "loss": 0.0366, "step": 254470 }, { "epoch": 2.7189486617874885, "grad_norm": 2.333742618560791, "learning_rate": 9.295627992187392e-07, "loss": 0.0248, "step": 254480 }, { "epoch": 2.7190555051017684, "grad_norm": 0.21139755845069885, "learning_rate": 9.295542008879829e-07, "loss": 0.0199, "step": 254490 }, { "epoch": 2.719162348416048, "grad_norm": 0.047180477529764175, "learning_rate": 9.295456020722274e-07, "loss": 0.0101, "step": 254500 }, { "epoch": 2.7192691917303273, "grad_norm": 1.802689790725708, "learning_rate": 9.295370027714827e-07, "loss": 0.0784, "step": 254510 }, { "epoch": 2.7193760350446072, "grad_norm": 17.589149475097656, "learning_rate": 9.295284029857584e-07, "loss": 0.0287, "step": 254520 }, { "epoch": 2.7194828783588867, "grad_norm": 1.511986255645752, "learning_rate": 9.295198027150639e-07, "loss": 0.0076, "step": 254530 }, { "epoch": 2.719589721673166, "grad_norm": 3.4304416179656982, "learning_rate": 9.295112019594092e-07, "loss": 0.0203, "step": 254540 }, { "epoch": 2.719696564987446, "grad_norm": 12.850870132446289, "learning_rate": 9.295026007188039e-07, "loss": 0.0396, "step": 254550 }, { "epoch": 2.7198034083017255, "grad_norm": 2.6596570014953613, "learning_rate": 9.294939989932577e-07, "loss": 0.0074, "step": 254560 }, { "epoch": 2.719910251616005, "grad_norm": 0.3839894235134125, "learning_rate": 9.294853967827804e-07, "loss": 0.0156, "step": 254570 }, { "epoch": 2.720017094930285, "grad_norm": 6.079463005065918, "learning_rate": 9.294767940873818e-07, "loss": 0.009, "step": 254580 }, { "epoch": 2.7201239382445643, "grad_norm": 2.736672878265381, "learning_rate": 9.294681909070714e-07, "loss": 0.0209, "step": 254590 }, { "epoch": 2.720230781558844, "grad_norm": 1.311111569404602, "learning_rate": 9.29459587241859e-07, "loss": 0.0039, "step": 254600 }, { "epoch": 2.7203376248731237, "grad_norm": 0.016469771042466164, "learning_rate": 9.294509830917543e-07, "loss": 0.0273, "step": 254610 }, { "epoch": 2.720444468187403, "grad_norm": 0.7430171370506287, "learning_rate": 9.294423784567671e-07, "loss": 0.032, "step": 254620 }, { "epoch": 2.7205513115016826, "grad_norm": 0.04114759713411331, "learning_rate": 9.294337733369069e-07, "loss": 0.0117, "step": 254630 }, { "epoch": 2.7206581548159625, "grad_norm": 0.23057322204113007, "learning_rate": 9.294251677321836e-07, "loss": 0.0272, "step": 254640 }, { "epoch": 2.720764998130242, "grad_norm": 14.218572616577148, "learning_rate": 9.29416561642607e-07, "loss": 0.0271, "step": 254650 }, { "epoch": 2.720871841444522, "grad_norm": 0.41000762581825256, "learning_rate": 9.294079550681866e-07, "loss": 0.0162, "step": 254660 }, { "epoch": 2.7209786847588013, "grad_norm": 0.015651816502213478, "learning_rate": 9.293993480089322e-07, "loss": 0.0376, "step": 254670 }, { "epoch": 2.721085528073081, "grad_norm": 0.04211104288697243, "learning_rate": 9.293907404648535e-07, "loss": 0.0314, "step": 254680 }, { "epoch": 2.7211923713873603, "grad_norm": 0.02919856645166874, "learning_rate": 9.293821324359603e-07, "loss": 0.0202, "step": 254690 }, { "epoch": 2.72129921470164, "grad_norm": 0.03353783115744591, "learning_rate": 9.293735239222623e-07, "loss": 0.0049, "step": 254700 }, { "epoch": 2.7214060580159196, "grad_norm": 0.02303912676870823, "learning_rate": 9.293649149237691e-07, "loss": 0.0072, "step": 254710 }, { "epoch": 2.7215129013301995, "grad_norm": 6.934154987335205, "learning_rate": 9.293563054404906e-07, "loss": 0.0117, "step": 254720 }, { "epoch": 2.721619744644479, "grad_norm": 0.022300072014331818, "learning_rate": 9.293476954724364e-07, "loss": 0.0259, "step": 254730 }, { "epoch": 2.7217265879587584, "grad_norm": 9.254915237426758, "learning_rate": 9.293390850196161e-07, "loss": 0.0208, "step": 254740 }, { "epoch": 2.721833431273038, "grad_norm": 0.03169119358062744, "learning_rate": 9.293304740820397e-07, "loss": 0.0103, "step": 254750 }, { "epoch": 2.721940274587318, "grad_norm": 0.022996699437499046, "learning_rate": 9.293218626597168e-07, "loss": 0.0589, "step": 254760 }, { "epoch": 2.7220471179015973, "grad_norm": 0.06787881255149841, "learning_rate": 9.293132507526571e-07, "loss": 0.0303, "step": 254770 }, { "epoch": 2.722153961215877, "grad_norm": 0.03025108389556408, "learning_rate": 9.293046383608704e-07, "loss": 0.0189, "step": 254780 }, { "epoch": 2.7222608045301566, "grad_norm": 0.0480954572558403, "learning_rate": 9.292960254843663e-07, "loss": 0.0216, "step": 254790 }, { "epoch": 2.722367647844436, "grad_norm": 0.10531765967607498, "learning_rate": 9.292874121231547e-07, "loss": 0.016, "step": 254800 }, { "epoch": 2.7224744911587155, "grad_norm": 0.38663941621780396, "learning_rate": 9.29278798277245e-07, "loss": 0.0154, "step": 254810 }, { "epoch": 2.7225813344729954, "grad_norm": 1.3355096578598022, "learning_rate": 9.292701839466473e-07, "loss": 0.0366, "step": 254820 }, { "epoch": 2.722688177787275, "grad_norm": 0.7157315611839294, "learning_rate": 9.29261569131371e-07, "loss": 0.0129, "step": 254830 }, { "epoch": 2.722795021101555, "grad_norm": 3.186534881591797, "learning_rate": 9.292529538314261e-07, "loss": 0.006, "step": 254840 }, { "epoch": 2.7229018644158343, "grad_norm": 0.02739562653005123, "learning_rate": 9.292443380468222e-07, "loss": 0.0197, "step": 254850 }, { "epoch": 2.7230087077301137, "grad_norm": 0.035941869020462036, "learning_rate": 9.292357217775691e-07, "loss": 0.0164, "step": 254860 }, { "epoch": 2.723115551044393, "grad_norm": 0.5537163019180298, "learning_rate": 9.292271050236764e-07, "loss": 0.0039, "step": 254870 }, { "epoch": 2.723222394358673, "grad_norm": 0.00984513945877552, "learning_rate": 9.292184877851539e-07, "loss": 0.0125, "step": 254880 }, { "epoch": 2.7233292376729525, "grad_norm": 0.006022542715072632, "learning_rate": 9.292098700620113e-07, "loss": 0.0036, "step": 254890 }, { "epoch": 2.7234360809872324, "grad_norm": 0.00037286485894583166, "learning_rate": 9.292012518542585e-07, "loss": 0.0081, "step": 254900 }, { "epoch": 2.723542924301512, "grad_norm": 3.3083763122558594, "learning_rate": 9.291926331619049e-07, "loss": 0.0258, "step": 254910 }, { "epoch": 2.7236497676157914, "grad_norm": 0.12020944058895111, "learning_rate": 9.291840139849607e-07, "loss": 0.0182, "step": 254920 }, { "epoch": 2.723756610930071, "grad_norm": 0.02170472964644432, "learning_rate": 9.29175394323435e-07, "loss": 0.0292, "step": 254930 }, { "epoch": 2.7238634542443507, "grad_norm": 0.009809538722038269, "learning_rate": 9.291667741773381e-07, "loss": 0.0058, "step": 254940 }, { "epoch": 2.72397029755863, "grad_norm": 0.014597582630813122, "learning_rate": 9.291581535466796e-07, "loss": 0.01, "step": 254950 }, { "epoch": 2.72407714087291, "grad_norm": 1.99214506149292, "learning_rate": 9.291495324314691e-07, "loss": 0.0707, "step": 254960 }, { "epoch": 2.7241839841871895, "grad_norm": 0.04379594698548317, "learning_rate": 9.291409108317164e-07, "loss": 0.0122, "step": 254970 }, { "epoch": 2.724290827501469, "grad_norm": 4.151421070098877, "learning_rate": 9.291322887474311e-07, "loss": 0.0165, "step": 254980 }, { "epoch": 2.7243976708157485, "grad_norm": 1.0897581577301025, "learning_rate": 9.291236661786232e-07, "loss": 0.0122, "step": 254990 }, { "epoch": 2.7245045141300284, "grad_norm": 4.328856945037842, "learning_rate": 9.291150431253021e-07, "loss": 0.026, "step": 255000 }, { "epoch": 2.724611357444308, "grad_norm": 2.8956336975097656, "learning_rate": 9.291064195874779e-07, "loss": 0.0087, "step": 255010 }, { "epoch": 2.7247182007585877, "grad_norm": 0.518282413482666, "learning_rate": 9.290977955651603e-07, "loss": 0.034, "step": 255020 }, { "epoch": 2.724825044072867, "grad_norm": 4.733404159545898, "learning_rate": 9.290891710583586e-07, "loss": 0.042, "step": 255030 }, { "epoch": 2.7249318873871466, "grad_norm": 0.9405770897865295, "learning_rate": 9.29080546067083e-07, "loss": 0.0122, "step": 255040 }, { "epoch": 2.725038730701426, "grad_norm": 0.017367824912071228, "learning_rate": 9.290719205913428e-07, "loss": 0.0237, "step": 255050 }, { "epoch": 2.725145574015706, "grad_norm": 0.09654609858989716, "learning_rate": 9.290632946311484e-07, "loss": 0.0336, "step": 255060 }, { "epoch": 2.7252524173299855, "grad_norm": 5.969744682312012, "learning_rate": 9.29054668186509e-07, "loss": 0.0127, "step": 255070 }, { "epoch": 2.7253592606442654, "grad_norm": 1.1091325283050537, "learning_rate": 9.290460412574346e-07, "loss": 0.0082, "step": 255080 }, { "epoch": 2.725466103958545, "grad_norm": 0.046935223042964935, "learning_rate": 9.290374138439347e-07, "loss": 0.0255, "step": 255090 }, { "epoch": 2.7255729472728243, "grad_norm": 0.11967835575342178, "learning_rate": 9.290287859460193e-07, "loss": 0.0015, "step": 255100 }, { "epoch": 2.7256797905871037, "grad_norm": 5.732570171356201, "learning_rate": 9.290201575636979e-07, "loss": 0.0183, "step": 255110 }, { "epoch": 2.7257866339013836, "grad_norm": 1.9356184005737305, "learning_rate": 9.290115286969804e-07, "loss": 0.0085, "step": 255120 }, { "epoch": 2.725893477215663, "grad_norm": 0.06444182991981506, "learning_rate": 9.290028993458765e-07, "loss": 0.0271, "step": 255130 }, { "epoch": 2.726000320529943, "grad_norm": 0.010133195668458939, "learning_rate": 9.289942695103961e-07, "loss": 0.01, "step": 255140 }, { "epoch": 2.7261071638442225, "grad_norm": 12.593856811523438, "learning_rate": 9.289856391905487e-07, "loss": 0.0401, "step": 255150 }, { "epoch": 2.726214007158502, "grad_norm": 0.014818085357546806, "learning_rate": 9.28977008386344e-07, "loss": 0.0217, "step": 255160 }, { "epoch": 2.726320850472782, "grad_norm": 0.019626153632998466, "learning_rate": 9.289683770977919e-07, "loss": 0.0176, "step": 255170 }, { "epoch": 2.7264276937870613, "grad_norm": 1.000736951828003, "learning_rate": 9.289597453249023e-07, "loss": 0.0111, "step": 255180 }, { "epoch": 2.7265345371013407, "grad_norm": 5.772706508636475, "learning_rate": 9.289511130676846e-07, "loss": 0.0255, "step": 255190 }, { "epoch": 2.7266413804156207, "grad_norm": 1.7554436922073364, "learning_rate": 9.289424803261488e-07, "loss": 0.0241, "step": 255200 }, { "epoch": 2.7267482237299, "grad_norm": 0.14966635406017303, "learning_rate": 9.289338471003046e-07, "loss": 0.0015, "step": 255210 }, { "epoch": 2.7268550670441796, "grad_norm": 4.714855194091797, "learning_rate": 9.289252133901617e-07, "loss": 0.0117, "step": 255220 }, { "epoch": 2.7269619103584595, "grad_norm": 0.00628645159304142, "learning_rate": 9.289165791957298e-07, "loss": 0.0029, "step": 255230 }, { "epoch": 2.727068753672739, "grad_norm": 0.05216134339570999, "learning_rate": 9.289079445170187e-07, "loss": 0.0133, "step": 255240 }, { "epoch": 2.7271755969870184, "grad_norm": 1.5054633617401123, "learning_rate": 9.288993093540382e-07, "loss": 0.022, "step": 255250 }, { "epoch": 2.7272824403012983, "grad_norm": 0.38517341017723083, "learning_rate": 9.288906737067981e-07, "loss": 0.0262, "step": 255260 }, { "epoch": 2.7273892836155778, "grad_norm": 2.07494854927063, "learning_rate": 9.288820375753079e-07, "loss": 0.0186, "step": 255270 }, { "epoch": 2.727496126929857, "grad_norm": 2.4468512535095215, "learning_rate": 9.288734009595777e-07, "loss": 0.011, "step": 255280 }, { "epoch": 2.727602970244137, "grad_norm": 0.15567485988140106, "learning_rate": 9.28864763859617e-07, "loss": 0.0171, "step": 255290 }, { "epoch": 2.7277098135584166, "grad_norm": 0.027263334020972252, "learning_rate": 9.288561262754355e-07, "loss": 0.0228, "step": 255300 }, { "epoch": 2.727816656872696, "grad_norm": 0.06923104077577591, "learning_rate": 9.288474882070431e-07, "loss": 0.0165, "step": 255310 }, { "epoch": 2.727923500186976, "grad_norm": 1.9620628356933594, "learning_rate": 9.288388496544494e-07, "loss": 0.0111, "step": 255320 }, { "epoch": 2.7280303435012554, "grad_norm": 0.26368796825408936, "learning_rate": 9.288302106176647e-07, "loss": 0.0262, "step": 255330 }, { "epoch": 2.728137186815535, "grad_norm": 0.06189774349331856, "learning_rate": 9.288215710966979e-07, "loss": 0.0159, "step": 255340 }, { "epoch": 2.7282440301298148, "grad_norm": 1.6811245679855347, "learning_rate": 9.288129310915593e-07, "loss": 0.0172, "step": 255350 }, { "epoch": 2.728350873444094, "grad_norm": 0.020046943798661232, "learning_rate": 9.288042906022586e-07, "loss": 0.0164, "step": 255360 }, { "epoch": 2.728457716758374, "grad_norm": 2.387362241744995, "learning_rate": 9.287956496288054e-07, "loss": 0.0327, "step": 255370 }, { "epoch": 2.7285645600726536, "grad_norm": 0.11857602745294571, "learning_rate": 9.287870081712097e-07, "loss": 0.0214, "step": 255380 }, { "epoch": 2.728671403386933, "grad_norm": 0.08386588841676712, "learning_rate": 9.28778366229481e-07, "loss": 0.0369, "step": 255390 }, { "epoch": 2.7287782467012125, "grad_norm": 0.703993022441864, "learning_rate": 9.287697238036292e-07, "loss": 0.018, "step": 255400 }, { "epoch": 2.7288850900154924, "grad_norm": 2.509847402572632, "learning_rate": 9.287610808936639e-07, "loss": 0.0122, "step": 255410 }, { "epoch": 2.728991933329772, "grad_norm": 2.2460269927978516, "learning_rate": 9.287524374995952e-07, "loss": 0.006, "step": 255420 }, { "epoch": 2.7290987766440518, "grad_norm": 0.02565721608698368, "learning_rate": 9.287437936214324e-07, "loss": 0.0108, "step": 255430 }, { "epoch": 2.729205619958331, "grad_norm": 1.767083764076233, "learning_rate": 9.287351492591858e-07, "loss": 0.0075, "step": 255440 }, { "epoch": 2.7293124632726107, "grad_norm": 3.7889771461486816, "learning_rate": 9.287265044128645e-07, "loss": 0.0291, "step": 255450 }, { "epoch": 2.72941930658689, "grad_norm": 0.012287404388189316, "learning_rate": 9.287178590824789e-07, "loss": 0.0574, "step": 255460 }, { "epoch": 2.72952614990117, "grad_norm": 5.776226997375488, "learning_rate": 9.287092132680383e-07, "loss": 0.0127, "step": 255470 }, { "epoch": 2.7296329932154495, "grad_norm": 0.11993003636598587, "learning_rate": 9.287005669695527e-07, "loss": 0.0171, "step": 255480 }, { "epoch": 2.7297398365297294, "grad_norm": 0.02977876178920269, "learning_rate": 9.286919201870318e-07, "loss": 0.0384, "step": 255490 }, { "epoch": 2.729846679844009, "grad_norm": 6.856686592102051, "learning_rate": 9.286832729204854e-07, "loss": 0.0431, "step": 255500 }, { "epoch": 2.7299535231582883, "grad_norm": 0.011740168556571007, "learning_rate": 9.286746251699232e-07, "loss": 0.0269, "step": 255510 }, { "epoch": 2.730060366472568, "grad_norm": 4.901212692260742, "learning_rate": 9.286659769353551e-07, "loss": 0.0073, "step": 255520 }, { "epoch": 2.7301672097868477, "grad_norm": 0.006236713379621506, "learning_rate": 9.286573282167907e-07, "loss": 0.008, "step": 255530 }, { "epoch": 2.730274053101127, "grad_norm": 1.2483155727386475, "learning_rate": 9.286486790142398e-07, "loss": 0.0839, "step": 255540 }, { "epoch": 2.730380896415407, "grad_norm": 3.1229443550109863, "learning_rate": 9.286400293277121e-07, "loss": 0.0071, "step": 255550 }, { "epoch": 2.7304877397296865, "grad_norm": 4.580199718475342, "learning_rate": 9.286313791572176e-07, "loss": 0.0306, "step": 255560 }, { "epoch": 2.730594583043966, "grad_norm": 0.005632362328469753, "learning_rate": 9.286227285027659e-07, "loss": 0.0044, "step": 255570 }, { "epoch": 2.7307014263582454, "grad_norm": 0.33970651030540466, "learning_rate": 9.286140773643667e-07, "loss": 0.0034, "step": 255580 }, { "epoch": 2.7308082696725253, "grad_norm": 1.3949517011642456, "learning_rate": 9.2860542574203e-07, "loss": 0.0328, "step": 255590 }, { "epoch": 2.730915112986805, "grad_norm": 2.1665828227996826, "learning_rate": 9.285967736357652e-07, "loss": 0.0346, "step": 255600 }, { "epoch": 2.7310219563010847, "grad_norm": 0.10455181449651718, "learning_rate": 9.285881210455824e-07, "loss": 0.0234, "step": 255610 }, { "epoch": 2.731128799615364, "grad_norm": 0.039476700127124786, "learning_rate": 9.285794679714914e-07, "loss": 0.0305, "step": 255620 }, { "epoch": 2.7312356429296436, "grad_norm": 1.1492252349853516, "learning_rate": 9.285708144135017e-07, "loss": 0.0156, "step": 255630 }, { "epoch": 2.731342486243923, "grad_norm": 0.10187678784132004, "learning_rate": 9.285621603716232e-07, "loss": 0.0364, "step": 255640 }, { "epoch": 2.731449329558203, "grad_norm": 1.5518964529037476, "learning_rate": 9.285535058458657e-07, "loss": 0.0057, "step": 255650 }, { "epoch": 2.7315561728724824, "grad_norm": 0.5368057489395142, "learning_rate": 9.28544850836239e-07, "loss": 0.0167, "step": 255660 }, { "epoch": 2.7316630161867623, "grad_norm": 10.81374454498291, "learning_rate": 9.285361953427527e-07, "loss": 0.008, "step": 255670 }, { "epoch": 2.731769859501042, "grad_norm": 0.8451001048088074, "learning_rate": 9.285275393654168e-07, "loss": 0.0174, "step": 255680 }, { "epoch": 2.7318767028153212, "grad_norm": 0.7502939701080322, "learning_rate": 9.28518882904241e-07, "loss": 0.0185, "step": 255690 }, { "epoch": 2.7319835461296007, "grad_norm": 2.9169762134552, "learning_rate": 9.28510225959235e-07, "loss": 0.0215, "step": 255700 }, { "epoch": 2.7320903894438806, "grad_norm": 0.9698954224586487, "learning_rate": 9.285015685304085e-07, "loss": 0.0194, "step": 255710 }, { "epoch": 2.73219723275816, "grad_norm": 0.24995343387126923, "learning_rate": 9.284929106177715e-07, "loss": 0.0342, "step": 255720 }, { "epoch": 2.73230407607244, "grad_norm": 0.03998713940382004, "learning_rate": 9.284842522213337e-07, "loss": 0.007, "step": 255730 }, { "epoch": 2.7324109193867194, "grad_norm": 4.238652229309082, "learning_rate": 9.284755933411048e-07, "loss": 0.049, "step": 255740 }, { "epoch": 2.732517762700999, "grad_norm": 2.959836959838867, "learning_rate": 9.284669339770945e-07, "loss": 0.0073, "step": 255750 }, { "epoch": 2.7326246060152783, "grad_norm": 3.618013858795166, "learning_rate": 9.284582741293128e-07, "loss": 0.0185, "step": 255760 }, { "epoch": 2.7327314493295582, "grad_norm": 0.04005608335137367, "learning_rate": 9.284496137977695e-07, "loss": 0.0302, "step": 255770 }, { "epoch": 2.7328382926438377, "grad_norm": 1.1392594575881958, "learning_rate": 9.28440952982474e-07, "loss": 0.0024, "step": 255780 }, { "epoch": 2.7329451359581176, "grad_norm": 2.0816893577575684, "learning_rate": 9.284322916834365e-07, "loss": 0.0137, "step": 255790 }, { "epoch": 2.733051979272397, "grad_norm": 9.78332233428955, "learning_rate": 9.284236299006665e-07, "loss": 0.021, "step": 255800 }, { "epoch": 2.7331588225866765, "grad_norm": 0.07154491543769836, "learning_rate": 9.284149676341739e-07, "loss": 0.0295, "step": 255810 }, { "epoch": 2.733265665900956, "grad_norm": 0.1438000202178955, "learning_rate": 9.284063048839685e-07, "loss": 0.0709, "step": 255820 }, { "epoch": 2.733372509215236, "grad_norm": 3.557239532470703, "learning_rate": 9.283976416500601e-07, "loss": 0.0193, "step": 255830 }, { "epoch": 2.7334793525295153, "grad_norm": 0.11507921665906906, "learning_rate": 9.283889779324582e-07, "loss": 0.0209, "step": 255840 }, { "epoch": 2.7335861958437953, "grad_norm": 0.17163394391536713, "learning_rate": 9.283803137311732e-07, "loss": 0.0021, "step": 255850 }, { "epoch": 2.7336930391580747, "grad_norm": 1.3455896377563477, "learning_rate": 9.283716490462144e-07, "loss": 0.0371, "step": 255860 }, { "epoch": 2.733799882472354, "grad_norm": 0.00333088799379766, "learning_rate": 9.283629838775915e-07, "loss": 0.0168, "step": 255870 }, { "epoch": 2.7339067257866336, "grad_norm": 0.07676228880882263, "learning_rate": 9.283543182253144e-07, "loss": 0.0275, "step": 255880 }, { "epoch": 2.7340135691009135, "grad_norm": 1.3915741443634033, "learning_rate": 9.283456520893931e-07, "loss": 0.0356, "step": 255890 }, { "epoch": 2.734120412415193, "grad_norm": 4.3364667892456055, "learning_rate": 9.283369854698373e-07, "loss": 0.0099, "step": 255900 }, { "epoch": 2.734227255729473, "grad_norm": 0.01885530725121498, "learning_rate": 9.283283183666566e-07, "loss": 0.0412, "step": 255910 }, { "epoch": 2.7343340990437524, "grad_norm": 1.512695074081421, "learning_rate": 9.283196507798609e-07, "loss": 0.0196, "step": 255920 }, { "epoch": 2.734440942358032, "grad_norm": 2.9650702476501465, "learning_rate": 9.2831098270946e-07, "loss": 0.0076, "step": 255930 }, { "epoch": 2.7345477856723117, "grad_norm": 1.8111436367034912, "learning_rate": 9.283023141554637e-07, "loss": 0.0131, "step": 255940 }, { "epoch": 2.734654628986591, "grad_norm": 0.00981524121016264, "learning_rate": 9.282936451178818e-07, "loss": 0.0044, "step": 255950 }, { "epoch": 2.7347614723008706, "grad_norm": 1.0048397779464722, "learning_rate": 9.282849755967239e-07, "loss": 0.0192, "step": 255960 }, { "epoch": 2.7348683156151505, "grad_norm": 10.098158836364746, "learning_rate": 9.28276305592e-07, "loss": 0.0549, "step": 255970 }, { "epoch": 2.73497515892943, "grad_norm": 0.02525593712925911, "learning_rate": 9.282676351037199e-07, "loss": 0.1129, "step": 255980 }, { "epoch": 2.7350820022437095, "grad_norm": 0.03115512616932392, "learning_rate": 9.282589641318932e-07, "loss": 0.0169, "step": 255990 }, { "epoch": 2.7351888455579894, "grad_norm": 0.007332407869398594, "learning_rate": 9.282502926765298e-07, "loss": 0.013, "step": 256000 }, { "epoch": 2.735295688872269, "grad_norm": 3.767353057861328, "learning_rate": 9.282416207376398e-07, "loss": 0.0158, "step": 256010 }, { "epoch": 2.7354025321865483, "grad_norm": 0.00659846281632781, "learning_rate": 9.282329483152325e-07, "loss": 0.0105, "step": 256020 }, { "epoch": 2.735509375500828, "grad_norm": 0.0059966943226754665, "learning_rate": 9.282242754093177e-07, "loss": 0.0441, "step": 256030 }, { "epoch": 2.7356162188151076, "grad_norm": 6.203979969024658, "learning_rate": 9.282156020199056e-07, "loss": 0.0108, "step": 256040 }, { "epoch": 2.735723062129387, "grad_norm": 0.005508792120963335, "learning_rate": 9.282069281470057e-07, "loss": 0.0281, "step": 256050 }, { "epoch": 2.735829905443667, "grad_norm": 6.943076133728027, "learning_rate": 9.281982537906278e-07, "loss": 0.0172, "step": 256060 }, { "epoch": 2.7359367487579465, "grad_norm": 1.7600709199905396, "learning_rate": 9.281895789507818e-07, "loss": 0.0223, "step": 256070 }, { "epoch": 2.736043592072226, "grad_norm": 1.206598162651062, "learning_rate": 9.281809036274775e-07, "loss": 0.0281, "step": 256080 }, { "epoch": 2.736150435386506, "grad_norm": 0.005473126657307148, "learning_rate": 9.281722278207246e-07, "loss": 0.0121, "step": 256090 }, { "epoch": 2.7362572787007853, "grad_norm": 0.026123708114027977, "learning_rate": 9.281635515305329e-07, "loss": 0.0256, "step": 256100 }, { "epoch": 2.7363641220150647, "grad_norm": 2.283958911895752, "learning_rate": 9.281548747569123e-07, "loss": 0.0043, "step": 256110 }, { "epoch": 2.7364709653293446, "grad_norm": 0.2515127956867218, "learning_rate": 9.281461974998725e-07, "loss": 0.0047, "step": 256120 }, { "epoch": 2.736577808643624, "grad_norm": 9.522139549255371, "learning_rate": 9.281375197594234e-07, "loss": 0.0494, "step": 256130 }, { "epoch": 2.736684651957904, "grad_norm": 8.837517738342285, "learning_rate": 9.281288415355746e-07, "loss": 0.0338, "step": 256140 }, { "epoch": 2.7367914952721835, "grad_norm": 3.2916131019592285, "learning_rate": 9.28120162828336e-07, "loss": 0.0162, "step": 256150 }, { "epoch": 2.736898338586463, "grad_norm": 1.6904481649398804, "learning_rate": 9.281114836377176e-07, "loss": 0.0129, "step": 256160 }, { "epoch": 2.7370051819007424, "grad_norm": 2.949293851852417, "learning_rate": 9.281028039637288e-07, "loss": 0.011, "step": 256170 }, { "epoch": 2.7371120252150223, "grad_norm": 0.3140125572681427, "learning_rate": 9.280941238063798e-07, "loss": 0.009, "step": 256180 }, { "epoch": 2.7372188685293017, "grad_norm": 0.04196232184767723, "learning_rate": 9.280854431656803e-07, "loss": 0.0054, "step": 256190 }, { "epoch": 2.7373257118435816, "grad_norm": 3.8782410621643066, "learning_rate": 9.280767620416399e-07, "loss": 0.0176, "step": 256200 }, { "epoch": 2.737432555157861, "grad_norm": 13.96318531036377, "learning_rate": 9.280680804342684e-07, "loss": 0.0438, "step": 256210 }, { "epoch": 2.7375393984721406, "grad_norm": 8.420744895935059, "learning_rate": 9.280593983435759e-07, "loss": 0.0217, "step": 256220 }, { "epoch": 2.73764624178642, "grad_norm": 0.2909145951271057, "learning_rate": 9.280507157695721e-07, "loss": 0.0146, "step": 256230 }, { "epoch": 2.7377530851007, "grad_norm": 0.11928075551986694, "learning_rate": 9.280420327122667e-07, "loss": 0.0222, "step": 256240 }, { "epoch": 2.7378599284149794, "grad_norm": 9.132601737976074, "learning_rate": 9.280333491716694e-07, "loss": 0.0328, "step": 256250 }, { "epoch": 2.7379667717292593, "grad_norm": 1.5160819292068481, "learning_rate": 9.280246651477903e-07, "loss": 0.0118, "step": 256260 }, { "epoch": 2.7380736150435387, "grad_norm": 0.9221649169921875, "learning_rate": 9.280159806406389e-07, "loss": 0.0103, "step": 256270 }, { "epoch": 2.738180458357818, "grad_norm": 0.03443606570363045, "learning_rate": 9.280072956502252e-07, "loss": 0.0055, "step": 256280 }, { "epoch": 2.7382873016720977, "grad_norm": 0.1591072380542755, "learning_rate": 9.279986101765589e-07, "loss": 0.0157, "step": 256290 }, { "epoch": 2.7383941449863776, "grad_norm": 2.3319051265716553, "learning_rate": 9.279899242196501e-07, "loss": 0.006, "step": 256300 }, { "epoch": 2.738500988300657, "grad_norm": 5.401128768920898, "learning_rate": 9.279812377795081e-07, "loss": 0.01, "step": 256310 }, { "epoch": 2.738607831614937, "grad_norm": 7.46482515335083, "learning_rate": 9.279725508561431e-07, "loss": 0.0358, "step": 256320 }, { "epoch": 2.7387146749292164, "grad_norm": 0.05901936814188957, "learning_rate": 9.279638634495648e-07, "loss": 0.0024, "step": 256330 }, { "epoch": 2.738821518243496, "grad_norm": 1.2295851707458496, "learning_rate": 9.27955175559783e-07, "loss": 0.017, "step": 256340 }, { "epoch": 2.7389283615577753, "grad_norm": 0.0176599882543087, "learning_rate": 9.279464871868074e-07, "loss": 0.0093, "step": 256350 }, { "epoch": 2.739035204872055, "grad_norm": 2.7709317207336426, "learning_rate": 9.279377983306481e-07, "loss": 0.003, "step": 256360 }, { "epoch": 2.7391420481863347, "grad_norm": 0.9487290382385254, "learning_rate": 9.279291089913146e-07, "loss": 0.0065, "step": 256370 }, { "epoch": 2.7392488915006146, "grad_norm": 0.017144251614809036, "learning_rate": 9.279204191688169e-07, "loss": 0.0408, "step": 256380 }, { "epoch": 2.739355734814894, "grad_norm": 0.6656867265701294, "learning_rate": 9.279117288631648e-07, "loss": 0.002, "step": 256390 }, { "epoch": 2.7394625781291735, "grad_norm": 0.0031194635666906834, "learning_rate": 9.27903038074368e-07, "loss": 0.0044, "step": 256400 }, { "epoch": 2.739569421443453, "grad_norm": 4.333354949951172, "learning_rate": 9.278943468024363e-07, "loss": 0.159, "step": 256410 }, { "epoch": 2.739676264757733, "grad_norm": 3.5993664264678955, "learning_rate": 9.278856550473797e-07, "loss": 0.0125, "step": 256420 }, { "epoch": 2.7397831080720123, "grad_norm": 1.4660463333129883, "learning_rate": 9.278769628092078e-07, "loss": 0.0386, "step": 256430 }, { "epoch": 2.739889951386292, "grad_norm": 0.02573668584227562, "learning_rate": 9.278682700879305e-07, "loss": 0.0567, "step": 256440 }, { "epoch": 2.7399967947005717, "grad_norm": 0.21587364375591278, "learning_rate": 9.278595768835578e-07, "loss": 0.0154, "step": 256450 }, { "epoch": 2.740103638014851, "grad_norm": 0.5497379302978516, "learning_rate": 9.278508831960992e-07, "loss": 0.0213, "step": 256460 }, { "epoch": 2.7402104813291306, "grad_norm": 10.097922325134277, "learning_rate": 9.278421890255648e-07, "loss": 0.0459, "step": 256470 }, { "epoch": 2.7403173246434105, "grad_norm": 0.6809942126274109, "learning_rate": 9.278334943719641e-07, "loss": 0.0077, "step": 256480 }, { "epoch": 2.74042416795769, "grad_norm": 0.8793399930000305, "learning_rate": 9.278247992353072e-07, "loss": 0.024, "step": 256490 }, { "epoch": 2.74053101127197, "grad_norm": 0.008935417979955673, "learning_rate": 9.278161036156039e-07, "loss": 0.003, "step": 256500 }, { "epoch": 2.7406378545862493, "grad_norm": 0.005819293670356274, "learning_rate": 9.278074075128636e-07, "loss": 0.0617, "step": 256510 }, { "epoch": 2.7407446979005288, "grad_norm": 0.9224862456321716, "learning_rate": 9.277987109270968e-07, "loss": 0.0143, "step": 256520 }, { "epoch": 2.7408515412148082, "grad_norm": 0.006077094003558159, "learning_rate": 9.277900138583128e-07, "loss": 0.0026, "step": 256530 }, { "epoch": 2.740958384529088, "grad_norm": 0.4698629081249237, "learning_rate": 9.277813163065217e-07, "loss": 0.0414, "step": 256540 }, { "epoch": 2.7410652278433676, "grad_norm": 0.13912039995193481, "learning_rate": 9.277726182717332e-07, "loss": 0.0395, "step": 256550 }, { "epoch": 2.7411720711576475, "grad_norm": 1.4248796701431274, "learning_rate": 9.277639197539571e-07, "loss": 0.0228, "step": 256560 }, { "epoch": 2.741278914471927, "grad_norm": 3.1764869689941406, "learning_rate": 9.277552207532031e-07, "loss": 0.0257, "step": 256570 }, { "epoch": 2.7413857577862064, "grad_norm": 2.8507871627807617, "learning_rate": 9.277465212694813e-07, "loss": 0.0218, "step": 256580 }, { "epoch": 2.741492601100486, "grad_norm": 3.5862860679626465, "learning_rate": 9.277378213028014e-07, "loss": 0.0226, "step": 256590 }, { "epoch": 2.7415994444147658, "grad_norm": 0.6835296750068665, "learning_rate": 9.277291208531732e-07, "loss": 0.0056, "step": 256600 }, { "epoch": 2.7417062877290452, "grad_norm": 2.7523231506347656, "learning_rate": 9.277204199206065e-07, "loss": 0.0187, "step": 256610 }, { "epoch": 2.741813131043325, "grad_norm": 0.08683809638023376, "learning_rate": 9.277117185051112e-07, "loss": 0.0065, "step": 256620 }, { "epoch": 2.7419199743576046, "grad_norm": 0.06293359398841858, "learning_rate": 9.27703016606697e-07, "loss": 0.017, "step": 256630 }, { "epoch": 2.742026817671884, "grad_norm": 0.23468351364135742, "learning_rate": 9.276943142253739e-07, "loss": 0.0177, "step": 256640 }, { "epoch": 2.742133660986164, "grad_norm": 0.007485068403184414, "learning_rate": 9.276856113611518e-07, "loss": 0.01, "step": 256650 }, { "epoch": 2.7422405043004434, "grad_norm": 0.03026716597378254, "learning_rate": 9.276769080140401e-07, "loss": 0.0111, "step": 256660 }, { "epoch": 2.742347347614723, "grad_norm": 3.5711417198181152, "learning_rate": 9.276682041840491e-07, "loss": 0.0174, "step": 256670 }, { "epoch": 2.7424541909290028, "grad_norm": 10.880255699157715, "learning_rate": 9.276594998711883e-07, "loss": 0.0203, "step": 256680 }, { "epoch": 2.7425610342432822, "grad_norm": 0.0804465264081955, "learning_rate": 9.276507950754676e-07, "loss": 0.014, "step": 256690 }, { "epoch": 2.7426678775575617, "grad_norm": 0.3349185585975647, "learning_rate": 9.276420897968971e-07, "loss": 0.0612, "step": 256700 }, { "epoch": 2.7427747208718416, "grad_norm": 7.925215721130371, "learning_rate": 9.276333840354862e-07, "loss": 0.0561, "step": 256710 }, { "epoch": 2.742881564186121, "grad_norm": 0.08158557862043381, "learning_rate": 9.276246777912449e-07, "loss": 0.0056, "step": 256720 }, { "epoch": 2.7429884075004005, "grad_norm": 0.04979771375656128, "learning_rate": 9.276159710641832e-07, "loss": 0.0168, "step": 256730 }, { "epoch": 2.7430952508146804, "grad_norm": 4.782672882080078, "learning_rate": 9.276072638543108e-07, "loss": 0.0274, "step": 256740 }, { "epoch": 2.74320209412896, "grad_norm": 4.032263278961182, "learning_rate": 9.275985561616374e-07, "loss": 0.023, "step": 256750 }, { "epoch": 2.7433089374432393, "grad_norm": 3.1557493209838867, "learning_rate": 9.27589847986173e-07, "loss": 0.0501, "step": 256760 }, { "epoch": 2.7434157807575192, "grad_norm": 0.03346238657832146, "learning_rate": 9.275811393279273e-07, "loss": 0.0366, "step": 256770 }, { "epoch": 2.7435226240717987, "grad_norm": 0.008565881289541721, "learning_rate": 9.275724301869106e-07, "loss": 0.0489, "step": 256780 }, { "epoch": 2.743629467386078, "grad_norm": 0.5179883241653442, "learning_rate": 9.275637205631319e-07, "loss": 0.0032, "step": 256790 }, { "epoch": 2.743736310700358, "grad_norm": 4.6458539962768555, "learning_rate": 9.275550104566018e-07, "loss": 0.0239, "step": 256800 }, { "epoch": 2.7438431540146375, "grad_norm": 4.626293659210205, "learning_rate": 9.275462998673297e-07, "loss": 0.0847, "step": 256810 }, { "epoch": 2.743949997328917, "grad_norm": 2.6386663913726807, "learning_rate": 9.275375887953256e-07, "loss": 0.0528, "step": 256820 }, { "epoch": 2.744056840643197, "grad_norm": 7.4476141929626465, "learning_rate": 9.275288772405992e-07, "loss": 0.1055, "step": 256830 }, { "epoch": 2.7441636839574763, "grad_norm": 1.5583381652832031, "learning_rate": 9.275201652031605e-07, "loss": 0.0176, "step": 256840 }, { "epoch": 2.7442705272717562, "grad_norm": 1.6556954383850098, "learning_rate": 9.275114526830193e-07, "loss": 0.0146, "step": 256850 }, { "epoch": 2.7443773705860357, "grad_norm": 5.983797550201416, "learning_rate": 9.275027396801853e-07, "loss": 0.0276, "step": 256860 }, { "epoch": 2.744484213900315, "grad_norm": 2.450878381729126, "learning_rate": 9.274940261946687e-07, "loss": 0.0171, "step": 256870 }, { "epoch": 2.7445910572145946, "grad_norm": 0.1781909316778183, "learning_rate": 9.274853122264787e-07, "loss": 0.0172, "step": 256880 }, { "epoch": 2.7446979005288745, "grad_norm": 0.4131181836128235, "learning_rate": 9.274765977756258e-07, "loss": 0.0077, "step": 256890 }, { "epoch": 2.744804743843154, "grad_norm": 0.14639337360858917, "learning_rate": 9.274678828421194e-07, "loss": 0.0251, "step": 256900 }, { "epoch": 2.744911587157434, "grad_norm": 3.080475330352783, "learning_rate": 9.274591674259696e-07, "loss": 0.0122, "step": 256910 }, { "epoch": 2.7450184304717133, "grad_norm": 0.7421445846557617, "learning_rate": 9.274504515271862e-07, "loss": 0.0035, "step": 256920 }, { "epoch": 2.745125273785993, "grad_norm": 0.027414832264184952, "learning_rate": 9.274417351457788e-07, "loss": 0.0167, "step": 256930 }, { "epoch": 2.7452321171002723, "grad_norm": 0.04707739129662514, "learning_rate": 9.274330182817575e-07, "loss": 0.0276, "step": 256940 }, { "epoch": 2.745338960414552, "grad_norm": 0.037787873297929764, "learning_rate": 9.274243009351321e-07, "loss": 0.0297, "step": 256950 }, { "epoch": 2.7454458037288316, "grad_norm": 4.3218841552734375, "learning_rate": 9.274155831059123e-07, "loss": 0.0162, "step": 256960 }, { "epoch": 2.7455526470431115, "grad_norm": 4.194112300872803, "learning_rate": 9.274068647941081e-07, "loss": 0.0105, "step": 256970 }, { "epoch": 2.745659490357391, "grad_norm": 0.00811974797397852, "learning_rate": 9.273981459997293e-07, "loss": 0.0079, "step": 256980 }, { "epoch": 2.7457663336716704, "grad_norm": 1.124265432357788, "learning_rate": 9.273894267227858e-07, "loss": 0.0325, "step": 256990 }, { "epoch": 2.74587317698595, "grad_norm": 2.6800894737243652, "learning_rate": 9.273807069632873e-07, "loss": 0.0348, "step": 257000 }, { "epoch": 2.74598002030023, "grad_norm": 0.47729602456092834, "learning_rate": 9.273719867212437e-07, "loss": 0.0098, "step": 257010 }, { "epoch": 2.7460868636145093, "grad_norm": 0.0032476335763931274, "learning_rate": 9.273632659966651e-07, "loss": 0.0621, "step": 257020 }, { "epoch": 2.746193706928789, "grad_norm": 0.03892176225781441, "learning_rate": 9.273545447895608e-07, "loss": 0.0259, "step": 257030 }, { "epoch": 2.7463005502430686, "grad_norm": 0.00935368798673153, "learning_rate": 9.273458230999411e-07, "loss": 0.0263, "step": 257040 }, { "epoch": 2.746407393557348, "grad_norm": 0.010878683999180794, "learning_rate": 9.273371009278157e-07, "loss": 0.0039, "step": 257050 }, { "epoch": 2.7465142368716275, "grad_norm": 0.032831769436597824, "learning_rate": 9.273283782731945e-07, "loss": 0.0655, "step": 257060 }, { "epoch": 2.7466210801859074, "grad_norm": 0.030165523290634155, "learning_rate": 9.273196551360873e-07, "loss": 0.031, "step": 257070 }, { "epoch": 2.746727923500187, "grad_norm": 0.05291905999183655, "learning_rate": 9.27310931516504e-07, "loss": 0.0233, "step": 257080 }, { "epoch": 2.746834766814467, "grad_norm": 0.46457675099372864, "learning_rate": 9.273022074144543e-07, "loss": 0.0039, "step": 257090 }, { "epoch": 2.7469416101287463, "grad_norm": 7.615629196166992, "learning_rate": 9.272934828299483e-07, "loss": 0.0132, "step": 257100 }, { "epoch": 2.7470484534430257, "grad_norm": 0.284561425447464, "learning_rate": 9.272847577629955e-07, "loss": 0.0175, "step": 257110 }, { "epoch": 2.747155296757305, "grad_norm": 0.494416743516922, "learning_rate": 9.272760322136061e-07, "loss": 0.0072, "step": 257120 }, { "epoch": 2.747262140071585, "grad_norm": 0.012783023528754711, "learning_rate": 9.272673061817899e-07, "loss": 0.0317, "step": 257130 }, { "epoch": 2.7473689833858645, "grad_norm": 1.0762810707092285, "learning_rate": 9.272585796675565e-07, "loss": 0.0039, "step": 257140 }, { "epoch": 2.7474758267001445, "grad_norm": 0.17822757363319397, "learning_rate": 9.272498526709159e-07, "loss": 0.0067, "step": 257150 }, { "epoch": 2.747582670014424, "grad_norm": 2.720524787902832, "learning_rate": 9.272411251918781e-07, "loss": 0.0256, "step": 257160 }, { "epoch": 2.7476895133287034, "grad_norm": 0.0008162642479874194, "learning_rate": 9.272323972304528e-07, "loss": 0.0111, "step": 257170 }, { "epoch": 2.747796356642983, "grad_norm": 2.8754851818084717, "learning_rate": 9.272236687866499e-07, "loss": 0.0153, "step": 257180 }, { "epoch": 2.7479031999572627, "grad_norm": 0.043070968240499496, "learning_rate": 9.272149398604791e-07, "loss": 0.0311, "step": 257190 }, { "epoch": 2.748010043271542, "grad_norm": 5.600130081176758, "learning_rate": 9.272062104519505e-07, "loss": 0.0111, "step": 257200 }, { "epoch": 2.748116886585822, "grad_norm": 3.2673568725585938, "learning_rate": 9.271974805610738e-07, "loss": 0.0386, "step": 257210 }, { "epoch": 2.7482237299001016, "grad_norm": 0.05606125295162201, "learning_rate": 9.271887501878589e-07, "loss": 0.0392, "step": 257220 }, { "epoch": 2.748330573214381, "grad_norm": 1.93303644657135, "learning_rate": 9.271800193323156e-07, "loss": 0.0104, "step": 257230 }, { "epoch": 2.7484374165286605, "grad_norm": 0.026623018085956573, "learning_rate": 9.27171287994454e-07, "loss": 0.0034, "step": 257240 }, { "epoch": 2.7485442598429404, "grad_norm": 0.02134730853140354, "learning_rate": 9.271625561742836e-07, "loss": 0.0093, "step": 257250 }, { "epoch": 2.74865110315722, "grad_norm": 0.5359598994255066, "learning_rate": 9.271538238718146e-07, "loss": 0.0161, "step": 257260 }, { "epoch": 2.7487579464714997, "grad_norm": 0.04212715104222298, "learning_rate": 9.271450910870566e-07, "loss": 0.0329, "step": 257270 }, { "epoch": 2.748864789785779, "grad_norm": 2.0268735885620117, "learning_rate": 9.271363578200195e-07, "loss": 0.0641, "step": 257280 }, { "epoch": 2.7489716331000587, "grad_norm": 1.3651889562606812, "learning_rate": 9.271276240707133e-07, "loss": 0.042, "step": 257290 }, { "epoch": 2.749078476414338, "grad_norm": 5.206341743469238, "learning_rate": 9.271188898391478e-07, "loss": 0.0089, "step": 257300 }, { "epoch": 2.749185319728618, "grad_norm": 0.007867860607802868, "learning_rate": 9.271101551253327e-07, "loss": 0.0071, "step": 257310 }, { "epoch": 2.7492921630428975, "grad_norm": 5.384372711181641, "learning_rate": 9.271014199292782e-07, "loss": 0.0138, "step": 257320 }, { "epoch": 2.7493990063571774, "grad_norm": 0.011399087496101856, "learning_rate": 9.270926842509938e-07, "loss": 0.0221, "step": 257330 }, { "epoch": 2.749505849671457, "grad_norm": 2.9615867137908936, "learning_rate": 9.270839480904895e-07, "loss": 0.0171, "step": 257340 }, { "epoch": 2.7496126929857363, "grad_norm": 2.9288253784179688, "learning_rate": 9.270752114477752e-07, "loss": 0.0343, "step": 257350 }, { "epoch": 2.7497195363000158, "grad_norm": 0.11169116944074631, "learning_rate": 9.270664743228609e-07, "loss": 0.0148, "step": 257360 }, { "epoch": 2.7498263796142957, "grad_norm": 4.322469711303711, "learning_rate": 9.270577367157562e-07, "loss": 0.0503, "step": 257370 }, { "epoch": 2.749933222928575, "grad_norm": 3.783358573913574, "learning_rate": 9.270489986264711e-07, "loss": 0.0071, "step": 257380 }, { "epoch": 2.750040066242855, "grad_norm": 0.026229610666632652, "learning_rate": 9.270402600550154e-07, "loss": 0.0323, "step": 257390 }, { "epoch": 2.7501469095571345, "grad_norm": 0.004077670630067587, "learning_rate": 9.270315210013992e-07, "loss": 0.0243, "step": 257400 }, { "epoch": 2.750253752871414, "grad_norm": 0.015214706771075726, "learning_rate": 9.270227814656319e-07, "loss": 0.0242, "step": 257410 }, { "epoch": 2.750360596185694, "grad_norm": 7.033626079559326, "learning_rate": 9.270140414477239e-07, "loss": 0.0065, "step": 257420 }, { "epoch": 2.7504674394999733, "grad_norm": 0.4243435561656952, "learning_rate": 9.270053009476849e-07, "loss": 0.0045, "step": 257430 }, { "epoch": 2.7505742828142528, "grad_norm": 0.034530382603406906, "learning_rate": 9.269965599655244e-07, "loss": 0.0053, "step": 257440 }, { "epoch": 2.7506811261285327, "grad_norm": 4.5911149978637695, "learning_rate": 9.269878185012526e-07, "loss": 0.0194, "step": 257450 }, { "epoch": 2.750787969442812, "grad_norm": 3.044347047805786, "learning_rate": 9.269790765548795e-07, "loss": 0.0286, "step": 257460 }, { "epoch": 2.7508948127570916, "grad_norm": 0.09487994760274887, "learning_rate": 9.269703341264146e-07, "loss": 0.027, "step": 257470 }, { "epoch": 2.7510016560713715, "grad_norm": 0.009983171708881855, "learning_rate": 9.269615912158681e-07, "loss": 0.0092, "step": 257480 }, { "epoch": 2.751108499385651, "grad_norm": 0.15930470824241638, "learning_rate": 9.269528478232498e-07, "loss": 0.0136, "step": 257490 }, { "epoch": 2.7512153426999304, "grad_norm": 5.679133892059326, "learning_rate": 9.269441039485694e-07, "loss": 0.0181, "step": 257500 }, { "epoch": 2.7513221860142103, "grad_norm": 9.084026336669922, "learning_rate": 9.269353595918368e-07, "loss": 0.0258, "step": 257510 }, { "epoch": 2.7514290293284898, "grad_norm": 0.4451678395271301, "learning_rate": 9.269266147530621e-07, "loss": 0.0085, "step": 257520 }, { "epoch": 2.751535872642769, "grad_norm": 2.2255825996398926, "learning_rate": 9.26917869432255e-07, "loss": 0.0146, "step": 257530 }, { "epoch": 2.751642715957049, "grad_norm": 0.030828461050987244, "learning_rate": 9.269091236294253e-07, "loss": 0.0565, "step": 257540 }, { "epoch": 2.7517495592713286, "grad_norm": 0.15971603989601135, "learning_rate": 9.269003773445831e-07, "loss": 0.002, "step": 257550 }, { "epoch": 2.751856402585608, "grad_norm": 0.19496001303195953, "learning_rate": 9.268916305777381e-07, "loss": 0.0209, "step": 257560 }, { "epoch": 2.751963245899888, "grad_norm": 11.559840202331543, "learning_rate": 9.268828833289004e-07, "loss": 0.0233, "step": 257570 }, { "epoch": 2.7520700892141674, "grad_norm": 5.2703752517700195, "learning_rate": 9.268741355980794e-07, "loss": 0.0253, "step": 257580 }, { "epoch": 2.752176932528447, "grad_norm": 0.33617955446243286, "learning_rate": 9.268653873852855e-07, "loss": 0.0519, "step": 257590 }, { "epoch": 2.7522837758427268, "grad_norm": 1.0443814992904663, "learning_rate": 9.268566386905284e-07, "loss": 0.025, "step": 257600 }, { "epoch": 2.7523906191570062, "grad_norm": 4.862279891967773, "learning_rate": 9.268478895138178e-07, "loss": 0.0459, "step": 257610 }, { "epoch": 2.752497462471286, "grad_norm": 0.8825281262397766, "learning_rate": 9.268391398551638e-07, "loss": 0.0179, "step": 257620 }, { "epoch": 2.7526043057855656, "grad_norm": 0.004281891975551844, "learning_rate": 9.268303897145761e-07, "loss": 0.0033, "step": 257630 }, { "epoch": 2.752711149099845, "grad_norm": 0.05122033879160881, "learning_rate": 9.268216390920648e-07, "loss": 0.0099, "step": 257640 }, { "epoch": 2.7528179924141245, "grad_norm": 7.887514591217041, "learning_rate": 9.268128879876396e-07, "loss": 0.0319, "step": 257650 }, { "epoch": 2.7529248357284044, "grad_norm": 0.1132357269525528, "learning_rate": 9.268041364013104e-07, "loss": 0.0392, "step": 257660 }, { "epoch": 2.753031679042684, "grad_norm": 0.009436349384486675, "learning_rate": 9.267953843330871e-07, "loss": 0.011, "step": 257670 }, { "epoch": 2.7531385223569638, "grad_norm": 0.12295554578304291, "learning_rate": 9.267866317829796e-07, "loss": 0.0288, "step": 257680 }, { "epoch": 2.7532453656712432, "grad_norm": 6.6141839027404785, "learning_rate": 9.267778787509979e-07, "loss": 0.0114, "step": 257690 }, { "epoch": 2.7533522089855227, "grad_norm": 0.04295692220330238, "learning_rate": 9.267691252371516e-07, "loss": 0.0077, "step": 257700 }, { "epoch": 2.753459052299802, "grad_norm": 10.992130279541016, "learning_rate": 9.267603712414509e-07, "loss": 0.0623, "step": 257710 }, { "epoch": 2.753565895614082, "grad_norm": 0.02015044540166855, "learning_rate": 9.267516167639056e-07, "loss": 0.0031, "step": 257720 }, { "epoch": 2.7536727389283615, "grad_norm": 0.035230282694101334, "learning_rate": 9.267428618045252e-07, "loss": 0.0129, "step": 257730 }, { "epoch": 2.7537795822426414, "grad_norm": 2.4201438426971436, "learning_rate": 9.267341063633202e-07, "loss": 0.0147, "step": 257740 }, { "epoch": 2.753886425556921, "grad_norm": 0.5594243407249451, "learning_rate": 9.267253504403001e-07, "loss": 0.0124, "step": 257750 }, { "epoch": 2.7539932688712003, "grad_norm": 10.37584400177002, "learning_rate": 9.267165940354747e-07, "loss": 0.0206, "step": 257760 }, { "epoch": 2.75410011218548, "grad_norm": 1.1405775547027588, "learning_rate": 9.267078371488544e-07, "loss": 0.0378, "step": 257770 }, { "epoch": 2.7542069554997597, "grad_norm": 3.4146370887756348, "learning_rate": 9.266990797804484e-07, "loss": 0.0198, "step": 257780 }, { "epoch": 2.754313798814039, "grad_norm": 3.020484447479248, "learning_rate": 9.266903219302671e-07, "loss": 0.0179, "step": 257790 }, { "epoch": 2.754420642128319, "grad_norm": 0.005552599672228098, "learning_rate": 9.266815635983202e-07, "loss": 0.0208, "step": 257800 }, { "epoch": 2.7545274854425985, "grad_norm": 0.1688249111175537, "learning_rate": 9.266728047846177e-07, "loss": 0.0069, "step": 257810 }, { "epoch": 2.754634328756878, "grad_norm": 3.6871538162231445, "learning_rate": 9.266640454891693e-07, "loss": 0.0207, "step": 257820 }, { "epoch": 2.7547411720711574, "grad_norm": 0.7988042235374451, "learning_rate": 9.26655285711985e-07, "loss": 0.0279, "step": 257830 }, { "epoch": 2.7548480153854373, "grad_norm": 0.5939759612083435, "learning_rate": 9.266465254530748e-07, "loss": 0.0177, "step": 257840 }, { "epoch": 2.754954858699717, "grad_norm": 4.579522132873535, "learning_rate": 9.266377647124483e-07, "loss": 0.0468, "step": 257850 }, { "epoch": 2.7550617020139967, "grad_norm": 4.5225419998168945, "learning_rate": 9.266290034901156e-07, "loss": 0.0322, "step": 257860 }, { "epoch": 2.755168545328276, "grad_norm": 3.00506854057312, "learning_rate": 9.266202417860867e-07, "loss": 0.0279, "step": 257870 }, { "epoch": 2.7552753886425556, "grad_norm": 4.955045223236084, "learning_rate": 9.266114796003711e-07, "loss": 0.0286, "step": 257880 }, { "epoch": 2.755382231956835, "grad_norm": 0.044265106320381165, "learning_rate": 9.266027169329791e-07, "loss": 0.0376, "step": 257890 }, { "epoch": 2.755489075271115, "grad_norm": 0.010030604898929596, "learning_rate": 9.265939537839204e-07, "loss": 0.0084, "step": 257900 }, { "epoch": 2.7555959185853944, "grad_norm": 0.07262347638607025, "learning_rate": 9.265851901532049e-07, "loss": 0.0081, "step": 257910 }, { "epoch": 2.7557027618996743, "grad_norm": 0.009853383526206017, "learning_rate": 9.265764260408426e-07, "loss": 0.0225, "step": 257920 }, { "epoch": 2.755809605213954, "grad_norm": 1.7083677053451538, "learning_rate": 9.265676614468434e-07, "loss": 0.0088, "step": 257930 }, { "epoch": 2.7559164485282333, "grad_norm": 4.098267555236816, "learning_rate": 9.26558896371217e-07, "loss": 0.0119, "step": 257940 }, { "epoch": 2.7560232918425127, "grad_norm": 3.133084535598755, "learning_rate": 9.265501308139733e-07, "loss": 0.0646, "step": 257950 }, { "epoch": 2.7561301351567926, "grad_norm": 12.941669464111328, "learning_rate": 9.265413647751225e-07, "loss": 0.0463, "step": 257960 }, { "epoch": 2.756236978471072, "grad_norm": 1.8113256692886353, "learning_rate": 9.265325982546742e-07, "loss": 0.0166, "step": 257970 }, { "epoch": 2.756343821785352, "grad_norm": 0.4838618338108063, "learning_rate": 9.265238312526385e-07, "loss": 0.0231, "step": 257980 }, { "epoch": 2.7564506650996314, "grad_norm": 2.2595627307891846, "learning_rate": 9.265150637690251e-07, "loss": 0.0077, "step": 257990 }, { "epoch": 2.756557508413911, "grad_norm": 0.3296695649623871, "learning_rate": 9.265062958038441e-07, "loss": 0.0035, "step": 258000 }, { "epoch": 2.7566643517281904, "grad_norm": 1.8252243995666504, "learning_rate": 9.264975273571051e-07, "loss": 0.0192, "step": 258010 }, { "epoch": 2.7567711950424703, "grad_norm": 0.4115789830684662, "learning_rate": 9.264887584288184e-07, "loss": 0.0181, "step": 258020 }, { "epoch": 2.7568780383567497, "grad_norm": 0.8708360195159912, "learning_rate": 9.264799890189936e-07, "loss": 0.0069, "step": 258030 }, { "epoch": 2.7569848816710296, "grad_norm": 0.09384617954492569, "learning_rate": 9.264712191276408e-07, "loss": 0.0196, "step": 258040 }, { "epoch": 2.757091724985309, "grad_norm": 0.6137325763702393, "learning_rate": 9.264624487547698e-07, "loss": 0.0383, "step": 258050 }, { "epoch": 2.7571985682995885, "grad_norm": 0.35293275117874146, "learning_rate": 9.264536779003904e-07, "loss": 0.0034, "step": 258060 }, { "epoch": 2.757305411613868, "grad_norm": 0.6212672591209412, "learning_rate": 9.264449065645128e-07, "loss": 0.042, "step": 258070 }, { "epoch": 2.757412254928148, "grad_norm": 7.840836048126221, "learning_rate": 9.264361347471465e-07, "loss": 0.0491, "step": 258080 }, { "epoch": 2.7575190982424274, "grad_norm": 0.015357895754277706, "learning_rate": 9.264273624483017e-07, "loss": 0.0293, "step": 258090 }, { "epoch": 2.7576259415567073, "grad_norm": 0.006452657748013735, "learning_rate": 9.264185896679883e-07, "loss": 0.0373, "step": 258100 }, { "epoch": 2.7577327848709867, "grad_norm": 0.013168507255613804, "learning_rate": 9.26409816406216e-07, "loss": 0.0389, "step": 258110 }, { "epoch": 2.757839628185266, "grad_norm": 1.0414321422576904, "learning_rate": 9.264010426629949e-07, "loss": 0.0048, "step": 258120 }, { "epoch": 2.757946471499546, "grad_norm": 0.0721273347735405, "learning_rate": 9.263922684383349e-07, "loss": 0.008, "step": 258130 }, { "epoch": 2.7580533148138255, "grad_norm": 1.628984808921814, "learning_rate": 9.263834937322458e-07, "loss": 0.0436, "step": 258140 }, { "epoch": 2.758160158128105, "grad_norm": 4.51588773727417, "learning_rate": 9.263747185447375e-07, "loss": 0.0114, "step": 258150 }, { "epoch": 2.758267001442385, "grad_norm": 0.3555428087711334, "learning_rate": 9.2636594287582e-07, "loss": 0.0305, "step": 258160 }, { "epoch": 2.7583738447566644, "grad_norm": 2.0610132217407227, "learning_rate": 9.263571667255033e-07, "loss": 0.0205, "step": 258170 }, { "epoch": 2.758480688070944, "grad_norm": 0.8278557658195496, "learning_rate": 9.26348390093797e-07, "loss": 0.0409, "step": 258180 }, { "epoch": 2.7585875313852237, "grad_norm": 0.047161176800727844, "learning_rate": 9.263396129807112e-07, "loss": 0.0146, "step": 258190 }, { "epoch": 2.758694374699503, "grad_norm": 0.02674327790737152, "learning_rate": 9.263308353862559e-07, "loss": 0.0051, "step": 258200 }, { "epoch": 2.7588012180137826, "grad_norm": 2.129105806350708, "learning_rate": 9.263220573104409e-07, "loss": 0.0227, "step": 258210 }, { "epoch": 2.7589080613280625, "grad_norm": 0.7780489325523376, "learning_rate": 9.26313278753276e-07, "loss": 0.0056, "step": 258220 }, { "epoch": 2.759014904642342, "grad_norm": 0.0966767817735672, "learning_rate": 9.263044997147713e-07, "loss": 0.0149, "step": 258230 }, { "epoch": 2.7591217479566215, "grad_norm": 1.528571605682373, "learning_rate": 9.262957201949367e-07, "loss": 0.0115, "step": 258240 }, { "epoch": 2.7592285912709014, "grad_norm": 0.005399285349994898, "learning_rate": 9.262869401937821e-07, "loss": 0.0037, "step": 258250 }, { "epoch": 2.759335434585181, "grad_norm": 0.0024486654438078403, "learning_rate": 9.262781597113172e-07, "loss": 0.0323, "step": 258260 }, { "epoch": 2.7594422778994603, "grad_norm": 0.003518105484545231, "learning_rate": 9.262693787475522e-07, "loss": 0.0447, "step": 258270 }, { "epoch": 2.75954912121374, "grad_norm": 0.4163758456707001, "learning_rate": 9.262605973024969e-07, "loss": 0.0372, "step": 258280 }, { "epoch": 2.7596559645280196, "grad_norm": 0.3142605125904083, "learning_rate": 9.262518153761611e-07, "loss": 0.0071, "step": 258290 }, { "epoch": 2.759762807842299, "grad_norm": 0.4090256094932556, "learning_rate": 9.262430329685549e-07, "loss": 0.0139, "step": 258300 }, { "epoch": 2.759869651156579, "grad_norm": 0.07315514981746674, "learning_rate": 9.262342500796883e-07, "loss": 0.0046, "step": 258310 }, { "epoch": 2.7599764944708585, "grad_norm": 3.376758575439453, "learning_rate": 9.262254667095708e-07, "loss": 0.029, "step": 258320 }, { "epoch": 2.7600833377851384, "grad_norm": 2.4489262104034424, "learning_rate": 9.262166828582128e-07, "loss": 0.0173, "step": 258330 }, { "epoch": 2.760190181099418, "grad_norm": 0.0184724573045969, "learning_rate": 9.262078985256238e-07, "loss": 0.0229, "step": 258340 }, { "epoch": 2.7602970244136973, "grad_norm": 0.006683523766696453, "learning_rate": 9.261991137118141e-07, "loss": 0.0773, "step": 258350 }, { "epoch": 2.7604038677279767, "grad_norm": 0.23512998223304749, "learning_rate": 9.261903284167933e-07, "loss": 0.0273, "step": 258360 }, { "epoch": 2.7605107110422566, "grad_norm": 0.6380130648612976, "learning_rate": 9.261815426405715e-07, "loss": 0.003, "step": 258370 }, { "epoch": 2.760617554356536, "grad_norm": 0.18000133335590363, "learning_rate": 9.261727563831585e-07, "loss": 0.0072, "step": 258380 }, { "epoch": 2.760724397670816, "grad_norm": 0.004207144491374493, "learning_rate": 9.261639696445644e-07, "loss": 0.0713, "step": 258390 }, { "epoch": 2.7608312409850955, "grad_norm": 0.006257807835936546, "learning_rate": 9.261551824247991e-07, "loss": 0.0183, "step": 258400 }, { "epoch": 2.760938084299375, "grad_norm": 2.6708500385284424, "learning_rate": 9.261463947238722e-07, "loss": 0.0264, "step": 258410 }, { "epoch": 2.7610449276136544, "grad_norm": 0.5323806405067444, "learning_rate": 9.261376065417941e-07, "loss": 0.0217, "step": 258420 }, { "epoch": 2.7611517709279343, "grad_norm": 2.0376384258270264, "learning_rate": 9.261288178785742e-07, "loss": 0.0145, "step": 258430 }, { "epoch": 2.7612586142422137, "grad_norm": 0.028052620589733124, "learning_rate": 9.26120028734223e-07, "loss": 0.0092, "step": 258440 }, { "epoch": 2.7613654575564937, "grad_norm": 1.0473079681396484, "learning_rate": 9.261112391087499e-07, "loss": 0.0354, "step": 258450 }, { "epoch": 2.761472300870773, "grad_norm": 0.15348531305789948, "learning_rate": 9.261024490021651e-07, "loss": 0.0203, "step": 258460 }, { "epoch": 2.7615791441850526, "grad_norm": 7.4964518547058105, "learning_rate": 9.260936584144786e-07, "loss": 0.0203, "step": 258470 }, { "epoch": 2.761685987499332, "grad_norm": 0.0412253737449646, "learning_rate": 9.260848673457002e-07, "loss": 0.0263, "step": 258480 }, { "epoch": 2.761792830813612, "grad_norm": 1.1908568143844604, "learning_rate": 9.260760757958397e-07, "loss": 0.0516, "step": 258490 }, { "epoch": 2.7618996741278914, "grad_norm": 0.8882921934127808, "learning_rate": 9.260672837649071e-07, "loss": 0.0075, "step": 258500 }, { "epoch": 2.7620065174421713, "grad_norm": 7.612504959106445, "learning_rate": 9.260584912529126e-07, "loss": 0.0299, "step": 258510 }, { "epoch": 2.7621133607564508, "grad_norm": 0.019772280007600784, "learning_rate": 9.260496982598657e-07, "loss": 0.0172, "step": 258520 }, { "epoch": 2.76222020407073, "grad_norm": 0.029689155519008636, "learning_rate": 9.260409047857766e-07, "loss": 0.0311, "step": 258530 }, { "epoch": 2.7623270473850097, "grad_norm": 5.818167686462402, "learning_rate": 9.260321108306553e-07, "loss": 0.0197, "step": 258540 }, { "epoch": 2.7624338906992896, "grad_norm": 5.09254789352417, "learning_rate": 9.260233163945114e-07, "loss": 0.0043, "step": 258550 }, { "epoch": 2.762540734013569, "grad_norm": 0.05852853134274483, "learning_rate": 9.26014521477355e-07, "loss": 0.0082, "step": 258560 }, { "epoch": 2.762647577327849, "grad_norm": 0.08846063911914825, "learning_rate": 9.260057260791962e-07, "loss": 0.0208, "step": 258570 }, { "epoch": 2.7627544206421284, "grad_norm": 0.07596049457788467, "learning_rate": 9.259969302000448e-07, "loss": 0.021, "step": 258580 }, { "epoch": 2.762861263956408, "grad_norm": 1.256211280822754, "learning_rate": 9.259881338399106e-07, "loss": 0.0348, "step": 258590 }, { "epoch": 2.7629681072706873, "grad_norm": 1.4969096183776855, "learning_rate": 9.259793369988037e-07, "loss": 0.0005, "step": 258600 }, { "epoch": 2.763074950584967, "grad_norm": 3.593498945236206, "learning_rate": 9.259705396767339e-07, "loss": 0.0184, "step": 258610 }, { "epoch": 2.7631817938992467, "grad_norm": 0.6404493451118469, "learning_rate": 9.259617418737113e-07, "loss": 0.0192, "step": 258620 }, { "epoch": 2.7632886372135266, "grad_norm": 0.02844996377825737, "learning_rate": 9.259529435897457e-07, "loss": 0.0573, "step": 258630 }, { "epoch": 2.763395480527806, "grad_norm": 3.806586503982544, "learning_rate": 9.259441448248472e-07, "loss": 0.0152, "step": 258640 }, { "epoch": 2.7635023238420855, "grad_norm": 0.15161234140396118, "learning_rate": 9.259353455790256e-07, "loss": 0.0185, "step": 258650 }, { "epoch": 2.763609167156365, "grad_norm": 7.887052536010742, "learning_rate": 9.259265458522906e-07, "loss": 0.0318, "step": 258660 }, { "epoch": 2.763716010470645, "grad_norm": 0.009100823663175106, "learning_rate": 9.259177456446525e-07, "loss": 0.0157, "step": 258670 }, { "epoch": 2.7638228537849243, "grad_norm": 9.350861549377441, "learning_rate": 9.259089449561213e-07, "loss": 0.0502, "step": 258680 }, { "epoch": 2.763929697099204, "grad_norm": 0.008788594976067543, "learning_rate": 9.259001437867065e-07, "loss": 0.0037, "step": 258690 }, { "epoch": 2.7640365404134837, "grad_norm": 1.331889271736145, "learning_rate": 9.258913421364185e-07, "loss": 0.0098, "step": 258700 }, { "epoch": 2.764143383727763, "grad_norm": 1.7229771614074707, "learning_rate": 9.25882540005267e-07, "loss": 0.0072, "step": 258710 }, { "epoch": 2.7642502270420426, "grad_norm": 0.0891827866435051, "learning_rate": 9.25873737393262e-07, "loss": 0.0185, "step": 258720 }, { "epoch": 2.7643570703563225, "grad_norm": 0.012034865096211433, "learning_rate": 9.258649343004132e-07, "loss": 0.0117, "step": 258730 }, { "epoch": 2.764463913670602, "grad_norm": 2.267382860183716, "learning_rate": 9.25856130726731e-07, "loss": 0.0072, "step": 258740 }, { "epoch": 2.764570756984882, "grad_norm": 8.013178825378418, "learning_rate": 9.258473266722249e-07, "loss": 0.0325, "step": 258750 }, { "epoch": 2.7646776002991613, "grad_norm": 0.21039260923862457, "learning_rate": 9.258385221369051e-07, "loss": 0.022, "step": 258760 }, { "epoch": 2.764784443613441, "grad_norm": 0.007120670285075903, "learning_rate": 9.258297171207814e-07, "loss": 0.0309, "step": 258770 }, { "epoch": 2.7648912869277202, "grad_norm": 1.1229358911514282, "learning_rate": 9.258209116238638e-07, "loss": 0.0077, "step": 258780 }, { "epoch": 2.764998130242, "grad_norm": 0.040313102304935455, "learning_rate": 9.258121056461624e-07, "loss": 0.0147, "step": 258790 }, { "epoch": 2.7651049735562796, "grad_norm": 0.7632755637168884, "learning_rate": 9.258032991876869e-07, "loss": 0.023, "step": 258800 }, { "epoch": 2.7652118168705595, "grad_norm": 6.110448360443115, "learning_rate": 9.257944922484473e-07, "loss": 0.0362, "step": 258810 }, { "epoch": 2.765318660184839, "grad_norm": 0.0164102204144001, "learning_rate": 9.257856848284538e-07, "loss": 0.005, "step": 258820 }, { "epoch": 2.7654255034991184, "grad_norm": 0.06457924097776413, "learning_rate": 9.257768769277159e-07, "loss": 0.0857, "step": 258830 }, { "epoch": 2.765532346813398, "grad_norm": 0.5706505179405212, "learning_rate": 9.257680685462438e-07, "loss": 0.0182, "step": 258840 }, { "epoch": 2.765639190127678, "grad_norm": 0.06091541424393654, "learning_rate": 9.257592596840474e-07, "loss": 0.0284, "step": 258850 }, { "epoch": 2.7657460334419572, "grad_norm": 2.6875922679901123, "learning_rate": 9.257504503411367e-07, "loss": 0.0216, "step": 258860 }, { "epoch": 2.765852876756237, "grad_norm": 0.42166808247566223, "learning_rate": 9.257416405175217e-07, "loss": 0.0268, "step": 258870 }, { "epoch": 2.7659597200705166, "grad_norm": 18.316116333007812, "learning_rate": 9.257328302132121e-07, "loss": 0.0399, "step": 258880 }, { "epoch": 2.766066563384796, "grad_norm": 0.016259025782346725, "learning_rate": 9.257240194282181e-07, "loss": 0.0077, "step": 258890 }, { "epoch": 2.766173406699076, "grad_norm": 1.0142245292663574, "learning_rate": 9.257152081625496e-07, "loss": 0.0128, "step": 258900 }, { "epoch": 2.7662802500133554, "grad_norm": 2.77510929107666, "learning_rate": 9.257063964162164e-07, "loss": 0.0035, "step": 258910 }, { "epoch": 2.766387093327635, "grad_norm": 0.9895524978637695, "learning_rate": 9.256975841892287e-07, "loss": 0.0196, "step": 258920 }, { "epoch": 2.766493936641915, "grad_norm": 0.6039266586303711, "learning_rate": 9.256887714815961e-07, "loss": 0.0109, "step": 258930 }, { "epoch": 2.7666007799561942, "grad_norm": 0.628420889377594, "learning_rate": 9.256799582933288e-07, "loss": 0.0345, "step": 258940 }, { "epoch": 2.7667076232704737, "grad_norm": 6.177915573120117, "learning_rate": 9.256711446244367e-07, "loss": 0.0441, "step": 258950 }, { "epoch": 2.7668144665847536, "grad_norm": 0.3075812757015228, "learning_rate": 9.256623304749298e-07, "loss": 0.0369, "step": 258960 }, { "epoch": 2.766921309899033, "grad_norm": 0.027351675555109978, "learning_rate": 9.25653515844818e-07, "loss": 0.0084, "step": 258970 }, { "epoch": 2.7670281532133125, "grad_norm": 0.03827826306223869, "learning_rate": 9.256447007341113e-07, "loss": 0.0321, "step": 258980 }, { "epoch": 2.7671349965275924, "grad_norm": 1.6479579210281372, "learning_rate": 9.256358851428194e-07, "loss": 0.012, "step": 258990 }, { "epoch": 2.767241839841872, "grad_norm": 0.19646869599819183, "learning_rate": 9.256270690709527e-07, "loss": 0.0225, "step": 259000 }, { "epoch": 2.7673486831561513, "grad_norm": 0.004533296450972557, "learning_rate": 9.256182525185208e-07, "loss": 0.011, "step": 259010 }, { "epoch": 2.7674555264704312, "grad_norm": 8.071949005126953, "learning_rate": 9.256094354855338e-07, "loss": 0.0548, "step": 259020 }, { "epoch": 2.7675623697847107, "grad_norm": 0.21517539024353027, "learning_rate": 9.256006179720016e-07, "loss": 0.0375, "step": 259030 }, { "epoch": 2.76766921309899, "grad_norm": 0.27731549739837646, "learning_rate": 9.255917999779342e-07, "loss": 0.0079, "step": 259040 }, { "epoch": 2.76777605641327, "grad_norm": 0.5148182511329651, "learning_rate": 9.255829815033416e-07, "loss": 0.0094, "step": 259050 }, { "epoch": 2.7678828997275495, "grad_norm": 0.030037060379981995, "learning_rate": 9.255741625482337e-07, "loss": 0.0276, "step": 259060 }, { "epoch": 2.767989743041829, "grad_norm": 0.45168137550354004, "learning_rate": 9.255653431126203e-07, "loss": 0.0067, "step": 259070 }, { "epoch": 2.768096586356109, "grad_norm": 0.01024780236184597, "learning_rate": 9.255565231965117e-07, "loss": 0.0116, "step": 259080 }, { "epoch": 2.7682034296703883, "grad_norm": 3.57051420211792, "learning_rate": 9.255477027999175e-07, "loss": 0.0301, "step": 259090 }, { "epoch": 2.7683102729846683, "grad_norm": 0.7480422258377075, "learning_rate": 9.255388819228478e-07, "loss": 0.0137, "step": 259100 }, { "epoch": 2.7684171162989477, "grad_norm": 13.595536231994629, "learning_rate": 9.255300605653127e-07, "loss": 0.0319, "step": 259110 }, { "epoch": 2.768523959613227, "grad_norm": 0.08903449773788452, "learning_rate": 9.25521238727322e-07, "loss": 0.0171, "step": 259120 }, { "epoch": 2.7686308029275066, "grad_norm": 3.188652276992798, "learning_rate": 9.255124164088857e-07, "loss": 0.0155, "step": 259130 }, { "epoch": 2.7687376462417865, "grad_norm": 4.252842903137207, "learning_rate": 9.255035936100137e-07, "loss": 0.0061, "step": 259140 }, { "epoch": 2.768844489556066, "grad_norm": 0.025224903598427773, "learning_rate": 9.254947703307162e-07, "loss": 0.0023, "step": 259150 }, { "epoch": 2.768951332870346, "grad_norm": 0.11228165775537491, "learning_rate": 9.25485946571003e-07, "loss": 0.0869, "step": 259160 }, { "epoch": 2.7690581761846254, "grad_norm": 0.030185913667082787, "learning_rate": 9.254771223308839e-07, "loss": 0.0198, "step": 259170 }, { "epoch": 2.769165019498905, "grad_norm": 3.0989484786987305, "learning_rate": 9.254682976103691e-07, "loss": 0.0155, "step": 259180 }, { "epoch": 2.7692718628131843, "grad_norm": 2.6628127098083496, "learning_rate": 9.254594724094685e-07, "loss": 0.0161, "step": 259190 }, { "epoch": 2.769378706127464, "grad_norm": 2.248032331466675, "learning_rate": 9.25450646728192e-07, "loss": 0.0088, "step": 259200 }, { "epoch": 2.7694855494417436, "grad_norm": 0.40344417095184326, "learning_rate": 9.254418205665495e-07, "loss": 0.0196, "step": 259210 }, { "epoch": 2.7695923927560235, "grad_norm": 0.10068827867507935, "learning_rate": 9.254329939245512e-07, "loss": 0.0222, "step": 259220 }, { "epoch": 2.769699236070303, "grad_norm": 0.13399192690849304, "learning_rate": 9.25424166802207e-07, "loss": 0.0127, "step": 259230 }, { "epoch": 2.7698060793845825, "grad_norm": 5.106140613555908, "learning_rate": 9.254153391995268e-07, "loss": 0.012, "step": 259240 }, { "epoch": 2.769912922698862, "grad_norm": 6.401418209075928, "learning_rate": 9.254065111165204e-07, "loss": 0.0548, "step": 259250 }, { "epoch": 2.770019766013142, "grad_norm": 5.245116233825684, "learning_rate": 9.253976825531981e-07, "loss": 0.0224, "step": 259260 }, { "epoch": 2.7701266093274213, "grad_norm": 0.028342876583337784, "learning_rate": 9.253888535095696e-07, "loss": 0.0323, "step": 259270 }, { "epoch": 2.770233452641701, "grad_norm": 0.13044272363185883, "learning_rate": 9.25380023985645e-07, "loss": 0.0293, "step": 259280 }, { "epoch": 2.7703402959559806, "grad_norm": 4.798940658569336, "learning_rate": 9.253711939814344e-07, "loss": 0.0128, "step": 259290 }, { "epoch": 2.77044713927026, "grad_norm": 1.4087612628936768, "learning_rate": 9.253623634969474e-07, "loss": 0.0106, "step": 259300 }, { "epoch": 2.7705539825845396, "grad_norm": 0.5072075128555298, "learning_rate": 9.253535325321944e-07, "loss": 0.0321, "step": 259310 }, { "epoch": 2.7706608258988195, "grad_norm": 7.274178981781006, "learning_rate": 9.25344701087185e-07, "loss": 0.0152, "step": 259320 }, { "epoch": 2.770767669213099, "grad_norm": 16.37944984436035, "learning_rate": 9.253358691619292e-07, "loss": 0.0099, "step": 259330 }, { "epoch": 2.770874512527379, "grad_norm": 0.028763610869646072, "learning_rate": 9.253270367564375e-07, "loss": 0.0083, "step": 259340 }, { "epoch": 2.7709813558416583, "grad_norm": 0.01104763150215149, "learning_rate": 9.253182038707192e-07, "loss": 0.0075, "step": 259350 }, { "epoch": 2.7710881991559377, "grad_norm": 0.004128789994865656, "learning_rate": 9.253093705047847e-07, "loss": 0.0172, "step": 259360 }, { "epoch": 2.771195042470217, "grad_norm": 2.7661869525909424, "learning_rate": 9.253005366586437e-07, "loss": 0.0243, "step": 259370 }, { "epoch": 2.771301885784497, "grad_norm": 0.9648990035057068, "learning_rate": 9.252917023323064e-07, "loss": 0.0082, "step": 259380 }, { "epoch": 2.7714087290987766, "grad_norm": 0.959663987159729, "learning_rate": 9.252828675257824e-07, "loss": 0.0234, "step": 259390 }, { "epoch": 2.7715155724130565, "grad_norm": 6.777709007263184, "learning_rate": 9.252740322390821e-07, "loss": 0.0204, "step": 259400 }, { "epoch": 2.771622415727336, "grad_norm": 0.28223735094070435, "learning_rate": 9.252651964722156e-07, "loss": 0.0125, "step": 259410 }, { "epoch": 2.7717292590416154, "grad_norm": 0.017819371074438095, "learning_rate": 9.252563602251922e-07, "loss": 0.0271, "step": 259420 }, { "epoch": 2.771836102355895, "grad_norm": 0.03951555863022804, "learning_rate": 9.252475234980225e-07, "loss": 0.009, "step": 259430 }, { "epoch": 2.7719429456701747, "grad_norm": 0.013195028528571129, "learning_rate": 9.252386862907162e-07, "loss": 0.0098, "step": 259440 }, { "epoch": 2.772049788984454, "grad_norm": 4.679219722747803, "learning_rate": 9.252298486032832e-07, "loss": 0.0239, "step": 259450 }, { "epoch": 2.772156632298734, "grad_norm": 1.3724515438079834, "learning_rate": 9.252210104357338e-07, "loss": 0.0079, "step": 259460 }, { "epoch": 2.7722634756130136, "grad_norm": 0.03005288727581501, "learning_rate": 9.252121717880776e-07, "loss": 0.001, "step": 259470 }, { "epoch": 2.772370318927293, "grad_norm": 0.043225038796663284, "learning_rate": 9.25203332660325e-07, "loss": 0.0051, "step": 259480 }, { "epoch": 2.7724771622415725, "grad_norm": 1.6792207956314087, "learning_rate": 9.251944930524855e-07, "loss": 0.035, "step": 259490 }, { "epoch": 2.7725840055558524, "grad_norm": 0.0839015319943428, "learning_rate": 9.251856529645696e-07, "loss": 0.0318, "step": 259500 }, { "epoch": 2.772690848870132, "grad_norm": 0.06520790606737137, "learning_rate": 9.251768123965868e-07, "loss": 0.0053, "step": 259510 }, { "epoch": 2.7727976921844117, "grad_norm": 0.39323413372039795, "learning_rate": 9.251679713485473e-07, "loss": 0.0024, "step": 259520 }, { "epoch": 2.772904535498691, "grad_norm": 0.021034155040979385, "learning_rate": 9.25159129820461e-07, "loss": 0.0228, "step": 259530 }, { "epoch": 2.7730113788129707, "grad_norm": 0.011324018239974976, "learning_rate": 9.251502878123382e-07, "loss": 0.0182, "step": 259540 }, { "epoch": 2.77311822212725, "grad_norm": 1.259305715560913, "learning_rate": 9.251414453241885e-07, "loss": 0.0025, "step": 259550 }, { "epoch": 2.77322506544153, "grad_norm": 1.8680775165557861, "learning_rate": 9.25132602356022e-07, "loss": 0.0248, "step": 259560 }, { "epoch": 2.7733319087558095, "grad_norm": 8.92673397064209, "learning_rate": 9.251237589078488e-07, "loss": 0.0099, "step": 259570 }, { "epoch": 2.7734387520700894, "grad_norm": 0.242730513215065, "learning_rate": 9.251149149796787e-07, "loss": 0.0034, "step": 259580 }, { "epoch": 2.773545595384369, "grad_norm": 0.008680983446538448, "learning_rate": 9.251060705715217e-07, "loss": 0.0065, "step": 259590 }, { "epoch": 2.7736524386986483, "grad_norm": 0.058094292879104614, "learning_rate": 9.25097225683388e-07, "loss": 0.008, "step": 259600 }, { "epoch": 2.773759282012928, "grad_norm": 6.314585208892822, "learning_rate": 9.250883803152874e-07, "loss": 0.0243, "step": 259610 }, { "epoch": 2.7738661253272077, "grad_norm": 0.024406012147665024, "learning_rate": 9.2507953446723e-07, "loss": 0.009, "step": 259620 }, { "epoch": 2.773972968641487, "grad_norm": 11.242889404296875, "learning_rate": 9.250706881392257e-07, "loss": 0.0074, "step": 259630 }, { "epoch": 2.774079811955767, "grad_norm": 0.03634453937411308, "learning_rate": 9.250618413312844e-07, "loss": 0.0499, "step": 259640 }, { "epoch": 2.7741866552700465, "grad_norm": 0.0676276907324791, "learning_rate": 9.250529940434162e-07, "loss": 0.0179, "step": 259650 }, { "epoch": 2.774293498584326, "grad_norm": 1.0554828643798828, "learning_rate": 9.250441462756312e-07, "loss": 0.0437, "step": 259660 }, { "epoch": 2.774400341898606, "grad_norm": 0.058911845088005066, "learning_rate": 9.250352980279392e-07, "loss": 0.0441, "step": 259670 }, { "epoch": 2.7745071852128853, "grad_norm": 0.010531281121075153, "learning_rate": 9.250264493003503e-07, "loss": 0.0313, "step": 259680 }, { "epoch": 2.7746140285271648, "grad_norm": 0.31452563405036926, "learning_rate": 9.250176000928747e-07, "loss": 0.0085, "step": 259690 }, { "epoch": 2.7747208718414447, "grad_norm": 0.05498553812503815, "learning_rate": 9.250087504055218e-07, "loss": 0.013, "step": 259700 }, { "epoch": 2.774827715155724, "grad_norm": 0.24063128232955933, "learning_rate": 9.249999002383021e-07, "loss": 0.0109, "step": 259710 }, { "epoch": 2.7749345584700036, "grad_norm": 0.05007495731115341, "learning_rate": 9.249910495912255e-07, "loss": 0.0316, "step": 259720 }, { "epoch": 2.7750414017842835, "grad_norm": 1.2798609733581543, "learning_rate": 9.249821984643018e-07, "loss": 0.0311, "step": 259730 }, { "epoch": 2.775148245098563, "grad_norm": 0.030079888179898262, "learning_rate": 9.249733468575412e-07, "loss": 0.0098, "step": 259740 }, { "epoch": 2.7752550884128424, "grad_norm": 0.08552251756191254, "learning_rate": 9.249644947709535e-07, "loss": 0.0207, "step": 259750 }, { "epoch": 2.7753619317271223, "grad_norm": 12.49109172821045, "learning_rate": 9.249556422045488e-07, "loss": 0.0181, "step": 259760 }, { "epoch": 2.7754687750414018, "grad_norm": 0.20468339323997498, "learning_rate": 9.249467891583373e-07, "loss": 0.0477, "step": 259770 }, { "epoch": 2.7755756183556812, "grad_norm": 24.75645637512207, "learning_rate": 9.249379356323286e-07, "loss": 0.0076, "step": 259780 }, { "epoch": 2.775682461669961, "grad_norm": 0.004090324975550175, "learning_rate": 9.249290816265331e-07, "loss": 0.0121, "step": 259790 }, { "epoch": 2.7757893049842406, "grad_norm": 0.8869280219078064, "learning_rate": 9.249202271409604e-07, "loss": 0.0255, "step": 259800 }, { "epoch": 2.7758961482985205, "grad_norm": 2.572281837463379, "learning_rate": 9.249113721756208e-07, "loss": 0.0863, "step": 259810 }, { "epoch": 2.7760029916128, "grad_norm": 0.016247622668743134, "learning_rate": 9.249025167305243e-07, "loss": 0.0052, "step": 259820 }, { "epoch": 2.7761098349270794, "grad_norm": 0.021471621468663216, "learning_rate": 9.248936608056806e-07, "loss": 0.0221, "step": 259830 }, { "epoch": 2.776216678241359, "grad_norm": 2.1755154132843018, "learning_rate": 9.248848044010999e-07, "loss": 0.0075, "step": 259840 }, { "epoch": 2.7763235215556388, "grad_norm": 0.8780444860458374, "learning_rate": 9.248759475167923e-07, "loss": 0.0271, "step": 259850 }, { "epoch": 2.7764303648699182, "grad_norm": 12.839385986328125, "learning_rate": 9.248670901527677e-07, "loss": 0.0318, "step": 259860 }, { "epoch": 2.776537208184198, "grad_norm": 0.02607141062617302, "learning_rate": 9.248582323090359e-07, "loss": 0.0195, "step": 259870 }, { "epoch": 2.7766440514984776, "grad_norm": 1.2281458377838135, "learning_rate": 9.248493739856074e-07, "loss": 0.022, "step": 259880 }, { "epoch": 2.776750894812757, "grad_norm": 8.240059852600098, "learning_rate": 9.248405151824916e-07, "loss": 0.022, "step": 259890 }, { "epoch": 2.7768577381270365, "grad_norm": 5.580545425415039, "learning_rate": 9.248316558996989e-07, "loss": 0.0347, "step": 259900 }, { "epoch": 2.7769645814413164, "grad_norm": 0.06671871244907379, "learning_rate": 9.248227961372393e-07, "loss": 0.036, "step": 259910 }, { "epoch": 2.777071424755596, "grad_norm": 0.013494741171598434, "learning_rate": 9.248139358951225e-07, "loss": 0.0266, "step": 259920 }, { "epoch": 2.7771782680698758, "grad_norm": 4.566006660461426, "learning_rate": 9.248050751733587e-07, "loss": 0.0145, "step": 259930 }, { "epoch": 2.7772851113841552, "grad_norm": 5.473848342895508, "learning_rate": 9.247962139719581e-07, "loss": 0.0124, "step": 259940 }, { "epoch": 2.7773919546984347, "grad_norm": 1.4581791162490845, "learning_rate": 9.247873522909304e-07, "loss": 0.0251, "step": 259950 }, { "epoch": 2.777498798012714, "grad_norm": 10.317145347595215, "learning_rate": 9.247784901302858e-07, "loss": 0.0032, "step": 259960 }, { "epoch": 2.777605641326994, "grad_norm": 0.3802628815174103, "learning_rate": 9.24769627490034e-07, "loss": 0.0324, "step": 259970 }, { "epoch": 2.7777124846412735, "grad_norm": 1.3756057024002075, "learning_rate": 9.247607643701856e-07, "loss": 0.004, "step": 259980 }, { "epoch": 2.7778193279555534, "grad_norm": 0.018812008202075958, "learning_rate": 9.2475190077075e-07, "loss": 0.0075, "step": 259990 }, { "epoch": 2.777926171269833, "grad_norm": 4.189800262451172, "learning_rate": 9.247430366917375e-07, "loss": 0.0213, "step": 260000 }, { "epoch": 2.7780330145841123, "grad_norm": 3.550785779953003, "learning_rate": 9.247341721331581e-07, "loss": 0.0114, "step": 260010 }, { "epoch": 2.778139857898392, "grad_norm": 0.03643719106912613, "learning_rate": 9.247253070950217e-07, "loss": 0.0355, "step": 260020 }, { "epoch": 2.7782467012126717, "grad_norm": 0.8521414995193481, "learning_rate": 9.247164415773383e-07, "loss": 0.0257, "step": 260030 }, { "epoch": 2.778353544526951, "grad_norm": 10.351914405822754, "learning_rate": 9.247075755801181e-07, "loss": 0.0418, "step": 260040 }, { "epoch": 2.778460387841231, "grad_norm": 0.07486503571271896, "learning_rate": 9.246987091033709e-07, "loss": 0.0143, "step": 260050 }, { "epoch": 2.7785672311555105, "grad_norm": 0.03961972892284393, "learning_rate": 9.246898421471069e-07, "loss": 0.0017, "step": 260060 }, { "epoch": 2.77867407446979, "grad_norm": 1.4496276378631592, "learning_rate": 9.24680974711336e-07, "loss": 0.0071, "step": 260070 }, { "epoch": 2.7787809177840694, "grad_norm": 8.504711151123047, "learning_rate": 9.246721067960681e-07, "loss": 0.0466, "step": 260080 }, { "epoch": 2.7788877610983493, "grad_norm": 0.8826647996902466, "learning_rate": 9.246632384013135e-07, "loss": 0.0214, "step": 260090 }, { "epoch": 2.778994604412629, "grad_norm": 4.437893390655518, "learning_rate": 9.246543695270819e-07, "loss": 0.0169, "step": 260100 }, { "epoch": 2.7791014477269087, "grad_norm": 0.6510013937950134, "learning_rate": 9.246455001733836e-07, "loss": 0.0811, "step": 260110 }, { "epoch": 2.779208291041188, "grad_norm": 2.025613307952881, "learning_rate": 9.246366303402285e-07, "loss": 0.0041, "step": 260120 }, { "epoch": 2.7793151343554676, "grad_norm": 15.882559776306152, "learning_rate": 9.246277600276264e-07, "loss": 0.0286, "step": 260130 }, { "epoch": 2.779421977669747, "grad_norm": 3.2121479511260986, "learning_rate": 9.246188892355877e-07, "loss": 0.0408, "step": 260140 }, { "epoch": 2.779528820984027, "grad_norm": 5.989817142486572, "learning_rate": 9.246100179641223e-07, "loss": 0.0243, "step": 260150 }, { "epoch": 2.7796356642983064, "grad_norm": 0.047884199768304825, "learning_rate": 9.246011462132399e-07, "loss": 0.0415, "step": 260160 }, { "epoch": 2.7797425076125863, "grad_norm": 0.03243410959839821, "learning_rate": 9.24592273982951e-07, "loss": 0.0062, "step": 260170 }, { "epoch": 2.779849350926866, "grad_norm": 0.07725472748279572, "learning_rate": 9.245834012732652e-07, "loss": 0.0566, "step": 260180 }, { "epoch": 2.7799561942411453, "grad_norm": 9.459736824035645, "learning_rate": 9.245745280841928e-07, "loss": 0.0465, "step": 260190 }, { "epoch": 2.7800630375554247, "grad_norm": 2.6927945613861084, "learning_rate": 9.245656544157436e-07, "loss": 0.0104, "step": 260200 }, { "epoch": 2.7801698808697046, "grad_norm": 3.209857225418091, "learning_rate": 9.245567802679277e-07, "loss": 0.0185, "step": 260210 }, { "epoch": 2.780276724183984, "grad_norm": 0.1419273316860199, "learning_rate": 9.245479056407554e-07, "loss": 0.0343, "step": 260220 }, { "epoch": 2.780383567498264, "grad_norm": 5.656788349151611, "learning_rate": 9.245390305342362e-07, "loss": 0.0235, "step": 260230 }, { "epoch": 2.7804904108125434, "grad_norm": 0.04059527814388275, "learning_rate": 9.245301549483806e-07, "loss": 0.0144, "step": 260240 }, { "epoch": 2.780597254126823, "grad_norm": 0.06628451496362686, "learning_rate": 9.245212788831982e-07, "loss": 0.0038, "step": 260250 }, { "epoch": 2.7807040974411024, "grad_norm": 0.19719786942005157, "learning_rate": 9.245124023386995e-07, "loss": 0.0049, "step": 260260 }, { "epoch": 2.7808109407553823, "grad_norm": 0.3745902180671692, "learning_rate": 9.245035253148941e-07, "loss": 0.0749, "step": 260270 }, { "epoch": 2.7809177840696617, "grad_norm": 15.350775718688965, "learning_rate": 9.244946478117921e-07, "loss": 0.0309, "step": 260280 }, { "epoch": 2.7810246273839416, "grad_norm": 2.254563570022583, "learning_rate": 9.244857698294037e-07, "loss": 0.004, "step": 260290 }, { "epoch": 2.781131470698221, "grad_norm": 4.362329959869385, "learning_rate": 9.24476891367739e-07, "loss": 0.0069, "step": 260300 }, { "epoch": 2.7812383140125005, "grad_norm": 0.0070912581868469715, "learning_rate": 9.244680124268076e-07, "loss": 0.0191, "step": 260310 }, { "epoch": 2.78134515732678, "grad_norm": 0.08612658083438873, "learning_rate": 9.244591330066199e-07, "loss": 0.0135, "step": 260320 }, { "epoch": 2.78145200064106, "grad_norm": 5.931192874908447, "learning_rate": 9.244502531071857e-07, "loss": 0.0279, "step": 260330 }, { "epoch": 2.7815588439553394, "grad_norm": 0.20613612234592438, "learning_rate": 9.244413727285152e-07, "loss": 0.0489, "step": 260340 }, { "epoch": 2.7816656872696193, "grad_norm": 4.98455810546875, "learning_rate": 9.244324918706184e-07, "loss": 0.0675, "step": 260350 }, { "epoch": 2.7817725305838987, "grad_norm": 11.99093246459961, "learning_rate": 9.244236105335053e-07, "loss": 0.0336, "step": 260360 }, { "epoch": 2.781879373898178, "grad_norm": 1.1913965940475464, "learning_rate": 9.244147287171857e-07, "loss": 0.0143, "step": 260370 }, { "epoch": 2.781986217212458, "grad_norm": 2.323423385620117, "learning_rate": 9.244058464216701e-07, "loss": 0.0234, "step": 260380 }, { "epoch": 2.7820930605267375, "grad_norm": 0.9635887145996094, "learning_rate": 9.243969636469682e-07, "loss": 0.0275, "step": 260390 }, { "epoch": 2.782199903841017, "grad_norm": 6.260709285736084, "learning_rate": 9.243880803930899e-07, "loss": 0.026, "step": 260400 }, { "epoch": 2.782306747155297, "grad_norm": 2.1000845432281494, "learning_rate": 9.243791966600456e-07, "loss": 0.0308, "step": 260410 }, { "epoch": 2.7824135904695764, "grad_norm": 3.3796682357788086, "learning_rate": 9.243703124478453e-07, "loss": 0.0698, "step": 260420 }, { "epoch": 2.782520433783856, "grad_norm": 0.970820426940918, "learning_rate": 9.243614277564986e-07, "loss": 0.0111, "step": 260430 }, { "epoch": 2.7826272770981357, "grad_norm": 1.4933701753616333, "learning_rate": 9.24352542586016e-07, "loss": 0.0462, "step": 260440 }, { "epoch": 2.782734120412415, "grad_norm": 4.967287540435791, "learning_rate": 9.243436569364074e-07, "loss": 0.019, "step": 260450 }, { "epoch": 2.7828409637266946, "grad_norm": 3.5890655517578125, "learning_rate": 9.243347708076826e-07, "loss": 0.0813, "step": 260460 }, { "epoch": 2.7829478070409746, "grad_norm": 0.012889890931546688, "learning_rate": 9.243258841998519e-07, "loss": 0.0008, "step": 260470 }, { "epoch": 2.783054650355254, "grad_norm": 7.931897163391113, "learning_rate": 9.243169971129252e-07, "loss": 0.0166, "step": 260480 }, { "epoch": 2.7831614936695335, "grad_norm": 3.3881149291992188, "learning_rate": 9.243081095469127e-07, "loss": 0.008, "step": 260490 }, { "epoch": 2.7832683369838134, "grad_norm": 4.635459899902344, "learning_rate": 9.242992215018242e-07, "loss": 0.021, "step": 260500 }, { "epoch": 2.783375180298093, "grad_norm": 0.28773337602615356, "learning_rate": 9.242903329776698e-07, "loss": 0.052, "step": 260510 }, { "epoch": 2.7834820236123723, "grad_norm": 2.483729839324951, "learning_rate": 9.2428144397446e-07, "loss": 0.0133, "step": 260520 }, { "epoch": 2.783588866926652, "grad_norm": 1.4761230945587158, "learning_rate": 9.24272554492204e-07, "loss": 0.0129, "step": 260530 }, { "epoch": 2.7836957102409317, "grad_norm": 0.006615947932004929, "learning_rate": 9.242636645309123e-07, "loss": 0.0046, "step": 260540 }, { "epoch": 2.783802553555211, "grad_norm": 0.006947237532585859, "learning_rate": 9.242547740905951e-07, "loss": 0.0197, "step": 260550 }, { "epoch": 2.783909396869491, "grad_norm": 0.020125705748796463, "learning_rate": 9.242458831712621e-07, "loss": 0.0289, "step": 260560 }, { "epoch": 2.7840162401837705, "grad_norm": 1.7643018960952759, "learning_rate": 9.242369917729236e-07, "loss": 0.0712, "step": 260570 }, { "epoch": 2.7841230834980504, "grad_norm": 9.029645919799805, "learning_rate": 9.242280998955894e-07, "loss": 0.0378, "step": 260580 }, { "epoch": 2.78422992681233, "grad_norm": 0.39270031452178955, "learning_rate": 9.242192075392696e-07, "loss": 0.0132, "step": 260590 }, { "epoch": 2.7843367701266093, "grad_norm": 15.988826751708984, "learning_rate": 9.242103147039745e-07, "loss": 0.0412, "step": 260600 }, { "epoch": 2.7844436134408888, "grad_norm": 3.1762781143188477, "learning_rate": 9.242014213897138e-07, "loss": 0.0047, "step": 260610 }, { "epoch": 2.7845504567551687, "grad_norm": 1.768754005432129, "learning_rate": 9.241925275964977e-07, "loss": 0.0227, "step": 260620 }, { "epoch": 2.784657300069448, "grad_norm": 0.2593245506286621, "learning_rate": 9.241836333243362e-07, "loss": 0.0064, "step": 260630 }, { "epoch": 2.784764143383728, "grad_norm": 1.8909682035446167, "learning_rate": 9.241747385732394e-07, "loss": 0.0196, "step": 260640 }, { "epoch": 2.7848709866980075, "grad_norm": 0.9394558668136597, "learning_rate": 9.241658433432174e-07, "loss": 0.0129, "step": 260650 }, { "epoch": 2.784977830012287, "grad_norm": 0.026232978329062462, "learning_rate": 9.241569476342801e-07, "loss": 0.0196, "step": 260660 }, { "epoch": 2.7850846733265664, "grad_norm": 5.947890758514404, "learning_rate": 9.241480514464375e-07, "loss": 0.0163, "step": 260670 }, { "epoch": 2.7851915166408463, "grad_norm": 0.47990596294403076, "learning_rate": 9.241391547796998e-07, "loss": 0.0159, "step": 260680 }, { "epoch": 2.7852983599551258, "grad_norm": 4.443371772766113, "learning_rate": 9.241302576340769e-07, "loss": 0.048, "step": 260690 }, { "epoch": 2.7854052032694057, "grad_norm": 0.005782567895948887, "learning_rate": 9.241213600095789e-07, "loss": 0.0043, "step": 260700 }, { "epoch": 2.785512046583685, "grad_norm": 2.1641297340393066, "learning_rate": 9.241124619062161e-07, "loss": 0.0148, "step": 260710 }, { "epoch": 2.7856188898979646, "grad_norm": 0.4694742262363434, "learning_rate": 9.241035633239982e-07, "loss": 0.0111, "step": 260720 }, { "epoch": 2.785725733212244, "grad_norm": 0.8115990161895752, "learning_rate": 9.240946642629352e-07, "loss": 0.0149, "step": 260730 }, { "epoch": 2.785832576526524, "grad_norm": 0.010051430203020573, "learning_rate": 9.240857647230376e-07, "loss": 0.0041, "step": 260740 }, { "epoch": 2.7859394198408034, "grad_norm": 0.22809430956840515, "learning_rate": 9.24076864704315e-07, "loss": 0.0157, "step": 260750 }, { "epoch": 2.7860462631550833, "grad_norm": 2.9740166664123535, "learning_rate": 9.240679642067777e-07, "loss": 0.0162, "step": 260760 }, { "epoch": 2.7861531064693628, "grad_norm": 10.6953706741333, "learning_rate": 9.240590632304355e-07, "loss": 0.0831, "step": 260770 }, { "epoch": 2.786259949783642, "grad_norm": 0.022607360035181046, "learning_rate": 9.240501617752988e-07, "loss": 0.012, "step": 260780 }, { "epoch": 2.7863667930979217, "grad_norm": 0.9872385859489441, "learning_rate": 9.240412598413773e-07, "loss": 0.0122, "step": 260790 }, { "epoch": 2.7864736364122016, "grad_norm": 2.1607751846313477, "learning_rate": 9.240323574286815e-07, "loss": 0.0232, "step": 260800 }, { "epoch": 2.786580479726481, "grad_norm": 0.026575453579425812, "learning_rate": 9.240234545372209e-07, "loss": 0.0322, "step": 260810 }, { "epoch": 2.786687323040761, "grad_norm": 2.5571341514587402, "learning_rate": 9.240145511670059e-07, "loss": 0.0351, "step": 260820 }, { "epoch": 2.7867941663550404, "grad_norm": 0.3512347638607025, "learning_rate": 9.240056473180464e-07, "loss": 0.013, "step": 260830 }, { "epoch": 2.78690100966932, "grad_norm": 2.0228912830352783, "learning_rate": 9.239967429903526e-07, "loss": 0.0506, "step": 260840 }, { "epoch": 2.7870078529835993, "grad_norm": 1.2871512174606323, "learning_rate": 9.239878381839346e-07, "loss": 0.005, "step": 260850 }, { "epoch": 2.7871146962978792, "grad_norm": 0.013325114734470844, "learning_rate": 9.239789328988021e-07, "loss": 0.0137, "step": 260860 }, { "epoch": 2.7872215396121587, "grad_norm": 1.2806018590927124, "learning_rate": 9.239700271349655e-07, "loss": 0.0066, "step": 260870 }, { "epoch": 2.7873283829264386, "grad_norm": 0.010952013544738293, "learning_rate": 9.239611208924346e-07, "loss": 0.0418, "step": 260880 }, { "epoch": 2.787435226240718, "grad_norm": 1.1594613790512085, "learning_rate": 9.239522141712197e-07, "loss": 0.011, "step": 260890 }, { "epoch": 2.7875420695549975, "grad_norm": 0.007029864937067032, "learning_rate": 9.239433069713307e-07, "loss": 0.0087, "step": 260900 }, { "epoch": 2.787648912869277, "grad_norm": 0.6010913252830505, "learning_rate": 9.239343992927779e-07, "loss": 0.0531, "step": 260910 }, { "epoch": 2.787755756183557, "grad_norm": 0.018123554065823555, "learning_rate": 9.239254911355709e-07, "loss": 0.0509, "step": 260920 }, { "epoch": 2.7878625994978363, "grad_norm": 0.015834033489227295, "learning_rate": 9.239165824997203e-07, "loss": 0.0479, "step": 260930 }, { "epoch": 2.7879694428121162, "grad_norm": 0.0032147085294127464, "learning_rate": 9.239076733852358e-07, "loss": 0.0308, "step": 260940 }, { "epoch": 2.7880762861263957, "grad_norm": 0.10549471527338028, "learning_rate": 9.238987637921273e-07, "loss": 0.004, "step": 260950 }, { "epoch": 2.788183129440675, "grad_norm": 3.460785150527954, "learning_rate": 9.238898537204054e-07, "loss": 0.0132, "step": 260960 }, { "epoch": 2.7882899727549546, "grad_norm": 0.023669622838497162, "learning_rate": 9.238809431700798e-07, "loss": 0.034, "step": 260970 }, { "epoch": 2.7883968160692345, "grad_norm": 2.6436076164245605, "learning_rate": 9.238720321411606e-07, "loss": 0.0018, "step": 260980 }, { "epoch": 2.788503659383514, "grad_norm": 3.5698227882385254, "learning_rate": 9.238631206336578e-07, "loss": 0.021, "step": 260990 }, { "epoch": 2.788610502697794, "grad_norm": 4.371289253234863, "learning_rate": 9.238542086475815e-07, "loss": 0.0419, "step": 261000 }, { "epoch": 2.7887173460120733, "grad_norm": 1.7923444509506226, "learning_rate": 9.23845296182942e-07, "loss": 0.0147, "step": 261010 }, { "epoch": 2.788824189326353, "grad_norm": 0.11543766409158707, "learning_rate": 9.238363832397491e-07, "loss": 0.0251, "step": 261020 }, { "epoch": 2.7889310326406322, "grad_norm": 0.016558051109313965, "learning_rate": 9.238274698180129e-07, "loss": 0.0139, "step": 261030 }, { "epoch": 2.789037875954912, "grad_norm": 2.0509626865386963, "learning_rate": 9.238185559177436e-07, "loss": 0.0429, "step": 261040 }, { "epoch": 2.7891447192691916, "grad_norm": 0.8701993227005005, "learning_rate": 9.238096415389512e-07, "loss": 0.0039, "step": 261050 }, { "epoch": 2.7892515625834715, "grad_norm": 0.007671006489545107, "learning_rate": 9.238007266816455e-07, "loss": 0.0177, "step": 261060 }, { "epoch": 2.789358405897751, "grad_norm": 0.09163789451122284, "learning_rate": 9.23791811345837e-07, "loss": 0.0073, "step": 261070 }, { "epoch": 2.7894652492120304, "grad_norm": 1.3461675643920898, "learning_rate": 9.237828955315355e-07, "loss": 0.0157, "step": 261080 }, { "epoch": 2.7895720925263103, "grad_norm": 7.337329387664795, "learning_rate": 9.23773979238751e-07, "loss": 0.013, "step": 261090 }, { "epoch": 2.78967893584059, "grad_norm": 0.01781875267624855, "learning_rate": 9.237650624674937e-07, "loss": 0.0322, "step": 261100 }, { "epoch": 2.7897857791548692, "grad_norm": 0.09712681919336319, "learning_rate": 9.237561452177738e-07, "loss": 0.0268, "step": 261110 }, { "epoch": 2.789892622469149, "grad_norm": 2.779757261276245, "learning_rate": 9.237472274896012e-07, "loss": 0.0131, "step": 261120 }, { "epoch": 2.7899994657834286, "grad_norm": 0.9126444458961487, "learning_rate": 9.23738309282986e-07, "loss": 0.0273, "step": 261130 }, { "epoch": 2.790106309097708, "grad_norm": 0.12230870127677917, "learning_rate": 9.237293905979383e-07, "loss": 0.0057, "step": 261140 }, { "epoch": 2.790213152411988, "grad_norm": 8.509175300598145, "learning_rate": 9.237204714344681e-07, "loss": 0.0392, "step": 261150 }, { "epoch": 2.7903199957262674, "grad_norm": 2.3910794258117676, "learning_rate": 9.237115517925856e-07, "loss": 0.0196, "step": 261160 }, { "epoch": 2.790426839040547, "grad_norm": 2.1404545307159424, "learning_rate": 9.237026316723006e-07, "loss": 0.0104, "step": 261170 }, { "epoch": 2.790533682354827, "grad_norm": 0.03333612531423569, "learning_rate": 9.236937110736234e-07, "loss": 0.0354, "step": 261180 }, { "epoch": 2.7906405256691063, "grad_norm": 0.02364525943994522, "learning_rate": 9.23684789996564e-07, "loss": 0.0152, "step": 261190 }, { "epoch": 2.7907473689833857, "grad_norm": 0.31891509890556335, "learning_rate": 9.236758684411325e-07, "loss": 0.0124, "step": 261200 }, { "epoch": 2.7908542122976656, "grad_norm": 0.1909443438053131, "learning_rate": 9.23666946407339e-07, "loss": 0.0431, "step": 261210 }, { "epoch": 2.790961055611945, "grad_norm": 0.09967947006225586, "learning_rate": 9.236580238951937e-07, "loss": 0.041, "step": 261220 }, { "epoch": 2.7910678989262245, "grad_norm": 0.41866862773895264, "learning_rate": 9.236491009047063e-07, "loss": 0.0408, "step": 261230 }, { "epoch": 2.7911747422405044, "grad_norm": 0.021780021488666534, "learning_rate": 9.236401774358872e-07, "loss": 0.005, "step": 261240 }, { "epoch": 2.791281585554784, "grad_norm": 2.1867988109588623, "learning_rate": 9.236312534887463e-07, "loss": 0.0275, "step": 261250 }, { "epoch": 2.7913884288690634, "grad_norm": 0.3846036195755005, "learning_rate": 9.236223290632937e-07, "loss": 0.0241, "step": 261260 }, { "epoch": 2.7914952721833433, "grad_norm": 0.007831034250557423, "learning_rate": 9.236134041595396e-07, "loss": 0.0622, "step": 261270 }, { "epoch": 2.7916021154976227, "grad_norm": 0.2983624041080475, "learning_rate": 9.236044787774941e-07, "loss": 0.0119, "step": 261280 }, { "epoch": 2.7917089588119026, "grad_norm": 0.15547992289066315, "learning_rate": 9.23595552917167e-07, "loss": 0.0028, "step": 261290 }, { "epoch": 2.791815802126182, "grad_norm": 0.5887545943260193, "learning_rate": 9.235866265785685e-07, "loss": 0.0336, "step": 261300 }, { "epoch": 2.7919226454404615, "grad_norm": 0.00286358711309731, "learning_rate": 9.235776997617088e-07, "loss": 0.0215, "step": 261310 }, { "epoch": 2.792029488754741, "grad_norm": 0.7717170715332031, "learning_rate": 9.23568772466598e-07, "loss": 0.0219, "step": 261320 }, { "epoch": 2.792136332069021, "grad_norm": 0.12688887119293213, "learning_rate": 9.235598446932461e-07, "loss": 0.0036, "step": 261330 }, { "epoch": 2.7922431753833004, "grad_norm": 7.170870304107666, "learning_rate": 9.235509164416631e-07, "loss": 0.0183, "step": 261340 }, { "epoch": 2.7923500186975803, "grad_norm": 5.389111518859863, "learning_rate": 9.235419877118591e-07, "loss": 0.0208, "step": 261350 }, { "epoch": 2.7924568620118597, "grad_norm": 7.151939392089844, "learning_rate": 9.235330585038443e-07, "loss": 0.0279, "step": 261360 }, { "epoch": 2.792563705326139, "grad_norm": 3.1203362941741943, "learning_rate": 9.235241288176286e-07, "loss": 0.0456, "step": 261370 }, { "epoch": 2.7926705486404186, "grad_norm": 0.013661476783454418, "learning_rate": 9.235151986532224e-07, "loss": 0.0301, "step": 261380 }, { "epoch": 2.7927773919546985, "grad_norm": 4.985635280609131, "learning_rate": 9.235062680106353e-07, "loss": 0.0114, "step": 261390 }, { "epoch": 2.792884235268978, "grad_norm": 6.602495193481445, "learning_rate": 9.23497336889878e-07, "loss": 0.0671, "step": 261400 }, { "epoch": 2.792991078583258, "grad_norm": 0.07455126196146011, "learning_rate": 9.2348840529096e-07, "loss": 0.014, "step": 261410 }, { "epoch": 2.7930979218975374, "grad_norm": 0.022236501798033714, "learning_rate": 9.234794732138917e-07, "loss": 0.0052, "step": 261420 }, { "epoch": 2.793204765211817, "grad_norm": 0.17215920984745026, "learning_rate": 9.234705406586831e-07, "loss": 0.0199, "step": 261430 }, { "epoch": 2.7933116085260963, "grad_norm": 1.8021700382232666, "learning_rate": 9.234616076253443e-07, "loss": 0.0273, "step": 261440 }, { "epoch": 2.793418451840376, "grad_norm": 6.380979537963867, "learning_rate": 9.234526741138854e-07, "loss": 0.0301, "step": 261450 }, { "epoch": 2.7935252951546556, "grad_norm": 0.025857606902718544, "learning_rate": 9.234437401243165e-07, "loss": 0.0253, "step": 261460 }, { "epoch": 2.7936321384689355, "grad_norm": 0.5848756432533264, "learning_rate": 9.234348056566475e-07, "loss": 0.0091, "step": 261470 }, { "epoch": 2.793738981783215, "grad_norm": 5.64178466796875, "learning_rate": 9.234258707108889e-07, "loss": 0.0232, "step": 261480 }, { "epoch": 2.7938458250974945, "grad_norm": 3.8461732864379883, "learning_rate": 9.234169352870503e-07, "loss": 0.0143, "step": 261490 }, { "epoch": 2.793952668411774, "grad_norm": 28.485353469848633, "learning_rate": 9.234079993851421e-07, "loss": 0.0251, "step": 261500 }, { "epoch": 2.794059511726054, "grad_norm": 3.319598436355591, "learning_rate": 9.233990630051744e-07, "loss": 0.0332, "step": 261510 }, { "epoch": 2.7941663550403333, "grad_norm": 3.532989501953125, "learning_rate": 9.23390126147157e-07, "loss": 0.0184, "step": 261520 }, { "epoch": 2.794273198354613, "grad_norm": 10.283411026000977, "learning_rate": 9.233811888111005e-07, "loss": 0.0512, "step": 261530 }, { "epoch": 2.7943800416688926, "grad_norm": 0.003576799761503935, "learning_rate": 9.233722509970146e-07, "loss": 0.0296, "step": 261540 }, { "epoch": 2.794486884983172, "grad_norm": 3.991729259490967, "learning_rate": 9.233633127049092e-07, "loss": 0.0113, "step": 261550 }, { "epoch": 2.7945937282974516, "grad_norm": 4.310905933380127, "learning_rate": 9.233543739347949e-07, "loss": 0.0078, "step": 261560 }, { "epoch": 2.7947005716117315, "grad_norm": 0.03922383487224579, "learning_rate": 9.233454346866815e-07, "loss": 0.0555, "step": 261570 }, { "epoch": 2.794807414926011, "grad_norm": 10.172019004821777, "learning_rate": 9.233364949605792e-07, "loss": 0.0158, "step": 261580 }, { "epoch": 2.794914258240291, "grad_norm": 0.32279980182647705, "learning_rate": 9.233275547564981e-07, "loss": 0.028, "step": 261590 }, { "epoch": 2.7950211015545703, "grad_norm": 0.051511816680431366, "learning_rate": 9.23318614074448e-07, "loss": 0.0186, "step": 261600 }, { "epoch": 2.7951279448688497, "grad_norm": 0.00974658690392971, "learning_rate": 9.233096729144394e-07, "loss": 0.0175, "step": 261610 }, { "epoch": 2.795234788183129, "grad_norm": 20.150110244750977, "learning_rate": 9.233007312764823e-07, "loss": 0.015, "step": 261620 }, { "epoch": 2.795341631497409, "grad_norm": 1.0921533107757568, "learning_rate": 9.232917891605867e-07, "loss": 0.0244, "step": 261630 }, { "epoch": 2.7954484748116886, "grad_norm": 0.007477722596377134, "learning_rate": 9.232828465667626e-07, "loss": 0.0052, "step": 261640 }, { "epoch": 2.7955553181259685, "grad_norm": 4.562743186950684, "learning_rate": 9.232739034950204e-07, "loss": 0.046, "step": 261650 }, { "epoch": 2.795662161440248, "grad_norm": 0.07452191412448883, "learning_rate": 9.232649599453698e-07, "loss": 0.0009, "step": 261660 }, { "epoch": 2.7957690047545274, "grad_norm": 0.02570599503815174, "learning_rate": 9.232560159178212e-07, "loss": 0.049, "step": 261670 }, { "epoch": 2.795875848068807, "grad_norm": 1.3133535385131836, "learning_rate": 9.232470714123846e-07, "loss": 0.0041, "step": 261680 }, { "epoch": 2.7959826913830867, "grad_norm": 6.998848915100098, "learning_rate": 9.232381264290702e-07, "loss": 0.0343, "step": 261690 }, { "epoch": 2.796089534697366, "grad_norm": 0.5654125213623047, "learning_rate": 9.232291809678879e-07, "loss": 0.0267, "step": 261700 }, { "epoch": 2.796196378011646, "grad_norm": 0.01146535575389862, "learning_rate": 9.232202350288481e-07, "loss": 0.0104, "step": 261710 }, { "epoch": 2.7963032213259256, "grad_norm": 0.02421414852142334, "learning_rate": 9.232112886119605e-07, "loss": 0.0084, "step": 261720 }, { "epoch": 2.796410064640205, "grad_norm": 4.158084869384766, "learning_rate": 9.232023417172354e-07, "loss": 0.0312, "step": 261730 }, { "epoch": 2.7965169079544845, "grad_norm": 0.1600112020969391, "learning_rate": 9.231933943446831e-07, "loss": 0.0017, "step": 261740 }, { "epoch": 2.7966237512687644, "grad_norm": 0.27463653683662415, "learning_rate": 9.231844464943135e-07, "loss": 0.0783, "step": 261750 }, { "epoch": 2.796730594583044, "grad_norm": 6.874787330627441, "learning_rate": 9.231754981661366e-07, "loss": 0.03, "step": 261760 }, { "epoch": 2.7968374378973238, "grad_norm": 1.731367826461792, "learning_rate": 9.231665493601626e-07, "loss": 0.0133, "step": 261770 }, { "epoch": 2.796944281211603, "grad_norm": 0.028598863631486893, "learning_rate": 9.231576000764019e-07, "loss": 0.0203, "step": 261780 }, { "epoch": 2.7970511245258827, "grad_norm": 0.5698127150535583, "learning_rate": 9.23148650314864e-07, "loss": 0.0247, "step": 261790 }, { "epoch": 2.797157967840162, "grad_norm": 16.467327117919922, "learning_rate": 9.231397000755595e-07, "loss": 0.0528, "step": 261800 }, { "epoch": 2.797264811154442, "grad_norm": 0.03079056739807129, "learning_rate": 9.231307493584983e-07, "loss": 0.0081, "step": 261810 }, { "epoch": 2.7973716544687215, "grad_norm": 5.153083801269531, "learning_rate": 9.231217981636905e-07, "loss": 0.0166, "step": 261820 }, { "epoch": 2.7974784977830014, "grad_norm": 0.024247797206044197, "learning_rate": 9.231128464911463e-07, "loss": 0.0075, "step": 261830 }, { "epoch": 2.797585341097281, "grad_norm": 3.881633996963501, "learning_rate": 9.23103894340876e-07, "loss": 0.0133, "step": 261840 }, { "epoch": 2.7976921844115603, "grad_norm": 0.3072225749492645, "learning_rate": 9.230949417128892e-07, "loss": 0.0108, "step": 261850 }, { "epoch": 2.79779902772584, "grad_norm": 0.32513928413391113, "learning_rate": 9.230859886071962e-07, "loss": 0.0419, "step": 261860 }, { "epoch": 2.7979058710401197, "grad_norm": 0.6504904627799988, "learning_rate": 9.230770350238074e-07, "loss": 0.0785, "step": 261870 }, { "epoch": 2.798012714354399, "grad_norm": 0.02871616557240486, "learning_rate": 9.230680809627327e-07, "loss": 0.0967, "step": 261880 }, { "epoch": 2.798119557668679, "grad_norm": 9.69101619720459, "learning_rate": 9.23059126423982e-07, "loss": 0.027, "step": 261890 }, { "epoch": 2.7982264009829585, "grad_norm": 3.9503440856933594, "learning_rate": 9.230501714075658e-07, "loss": 0.0186, "step": 261900 }, { "epoch": 2.798333244297238, "grad_norm": 1.6457459926605225, "learning_rate": 9.230412159134939e-07, "loss": 0.0234, "step": 261910 }, { "epoch": 2.798440087611518, "grad_norm": 0.006087033543735743, "learning_rate": 9.230322599417767e-07, "loss": 0.0538, "step": 261920 }, { "epoch": 2.7985469309257973, "grad_norm": 7.074734687805176, "learning_rate": 9.23023303492424e-07, "loss": 0.0681, "step": 261930 }, { "epoch": 2.7986537742400768, "grad_norm": 2.9007697105407715, "learning_rate": 9.230143465654462e-07, "loss": 0.0165, "step": 261940 }, { "epoch": 2.7987606175543567, "grad_norm": 0.4197002351284027, "learning_rate": 9.230053891608531e-07, "loss": 0.046, "step": 261950 }, { "epoch": 2.798867460868636, "grad_norm": 0.004032061900943518, "learning_rate": 9.229964312786552e-07, "loss": 0.0197, "step": 261960 }, { "epoch": 2.7989743041829156, "grad_norm": 0.029878713190555573, "learning_rate": 9.229874729188623e-07, "loss": 0.0346, "step": 261970 }, { "epoch": 2.7990811474971955, "grad_norm": 5.545931339263916, "learning_rate": 9.229785140814845e-07, "loss": 0.0172, "step": 261980 }, { "epoch": 2.799187990811475, "grad_norm": 2.714421033859253, "learning_rate": 9.229695547665322e-07, "loss": 0.0182, "step": 261990 }, { "epoch": 2.7992948341257544, "grad_norm": 0.1277311146259308, "learning_rate": 9.229605949740152e-07, "loss": 0.0135, "step": 262000 }, { "epoch": 2.7994016774400343, "grad_norm": 7.043609142303467, "learning_rate": 9.229516347039439e-07, "loss": 0.019, "step": 262010 }, { "epoch": 2.799508520754314, "grad_norm": 0.8452364802360535, "learning_rate": 9.229426739563281e-07, "loss": 0.0226, "step": 262020 }, { "epoch": 2.7996153640685932, "grad_norm": 0.022127222269773483, "learning_rate": 9.229337127311785e-07, "loss": 0.033, "step": 262030 }, { "epoch": 2.799722207382873, "grad_norm": 0.11829546093940735, "learning_rate": 9.229247510285045e-07, "loss": 0.0507, "step": 262040 }, { "epoch": 2.7998290506971526, "grad_norm": 0.03884848579764366, "learning_rate": 9.229157888483166e-07, "loss": 0.0283, "step": 262050 }, { "epoch": 2.7999358940114325, "grad_norm": 0.006025348789989948, "learning_rate": 9.229068261906247e-07, "loss": 0.0216, "step": 262060 }, { "epoch": 2.800042737325712, "grad_norm": 8.525394439697266, "learning_rate": 9.228978630554393e-07, "loss": 0.0212, "step": 262070 }, { "epoch": 2.8001495806399914, "grad_norm": 0.02059238962829113, "learning_rate": 9.228888994427702e-07, "loss": 0.004, "step": 262080 }, { "epoch": 2.800256423954271, "grad_norm": 3.9502756595611572, "learning_rate": 9.228799353526278e-07, "loss": 0.0175, "step": 262090 }, { "epoch": 2.800363267268551, "grad_norm": 1.7590067386627197, "learning_rate": 9.228709707850217e-07, "loss": 0.0268, "step": 262100 }, { "epoch": 2.8004701105828302, "grad_norm": 0.01137739047408104, "learning_rate": 9.228620057399625e-07, "loss": 0.0443, "step": 262110 }, { "epoch": 2.80057695389711, "grad_norm": 0.008422496728599072, "learning_rate": 9.228530402174604e-07, "loss": 0.0136, "step": 262120 }, { "epoch": 2.8006837972113896, "grad_norm": 2.1845147609710693, "learning_rate": 9.228440742175249e-07, "loss": 0.0721, "step": 262130 }, { "epoch": 2.800790640525669, "grad_norm": 0.01835463009774685, "learning_rate": 9.228351077401668e-07, "loss": 0.0133, "step": 262140 }, { "epoch": 2.8008974838399485, "grad_norm": 1.9205126762390137, "learning_rate": 9.22826140785396e-07, "loss": 0.0062, "step": 262150 }, { "epoch": 2.8010043271542284, "grad_norm": 0.21523381769657135, "learning_rate": 9.228171733532224e-07, "loss": 0.0473, "step": 262160 }, { "epoch": 2.801111170468508, "grad_norm": 0.015455586835741997, "learning_rate": 9.228082054436564e-07, "loss": 0.0012, "step": 262170 }, { "epoch": 2.801218013782788, "grad_norm": 3.2143163681030273, "learning_rate": 9.227992370567081e-07, "loss": 0.0152, "step": 262180 }, { "epoch": 2.8013248570970672, "grad_norm": 0.022690769284963608, "learning_rate": 9.227902681923874e-07, "loss": 0.0473, "step": 262190 }, { "epoch": 2.8014317004113467, "grad_norm": 2.980635404586792, "learning_rate": 9.227812988507046e-07, "loss": 0.01, "step": 262200 }, { "epoch": 2.801538543725626, "grad_norm": 4.939157485961914, "learning_rate": 9.227723290316698e-07, "loss": 0.0077, "step": 262210 }, { "epoch": 2.801645387039906, "grad_norm": 0.3241196572780609, "learning_rate": 9.227633587352933e-07, "loss": 0.0333, "step": 262220 }, { "epoch": 2.8017522303541855, "grad_norm": 0.012178272940218449, "learning_rate": 9.227543879615847e-07, "loss": 0.0677, "step": 262230 }, { "epoch": 2.8018590736684654, "grad_norm": 0.01760992221534252, "learning_rate": 9.227454167105548e-07, "loss": 0.0268, "step": 262240 }, { "epoch": 2.801965916982745, "grad_norm": 0.23654808104038239, "learning_rate": 9.227364449822134e-07, "loss": 0.0158, "step": 262250 }, { "epoch": 2.8020727602970243, "grad_norm": 0.44048210978507996, "learning_rate": 9.227274727765706e-07, "loss": 0.0169, "step": 262260 }, { "epoch": 2.802179603611304, "grad_norm": 0.6676025390625, "learning_rate": 9.227185000936365e-07, "loss": 0.0449, "step": 262270 }, { "epoch": 2.8022864469255837, "grad_norm": 0.28486359119415283, "learning_rate": 9.227095269334213e-07, "loss": 0.0097, "step": 262280 }, { "epoch": 2.802393290239863, "grad_norm": 0.06817780435085297, "learning_rate": 9.227005532959352e-07, "loss": 0.0012, "step": 262290 }, { "epoch": 2.802500133554143, "grad_norm": 0.0035388998221606016, "learning_rate": 9.226915791811882e-07, "loss": 0.0136, "step": 262300 }, { "epoch": 2.8026069768684225, "grad_norm": 0.06787978857755661, "learning_rate": 9.226826045891907e-07, "loss": 0.0259, "step": 262310 }, { "epoch": 2.802713820182702, "grad_norm": 6.187131404876709, "learning_rate": 9.226736295199526e-07, "loss": 0.0476, "step": 262320 }, { "epoch": 2.8028206634969814, "grad_norm": 0.018041806295514107, "learning_rate": 9.226646539734839e-07, "loss": 0.003, "step": 262330 }, { "epoch": 2.8029275068112613, "grad_norm": 0.0031771170906722546, "learning_rate": 9.226556779497949e-07, "loss": 0.01, "step": 262340 }, { "epoch": 2.803034350125541, "grad_norm": 0.029879644513130188, "learning_rate": 9.226467014488959e-07, "loss": 0.0176, "step": 262350 }, { "epoch": 2.8031411934398207, "grad_norm": 0.023751212283968925, "learning_rate": 9.226377244707968e-07, "loss": 0.0072, "step": 262360 }, { "epoch": 2.8032480367541, "grad_norm": 0.9718239903450012, "learning_rate": 9.226287470155079e-07, "loss": 0.0028, "step": 262370 }, { "epoch": 2.8033548800683796, "grad_norm": 0.33670592308044434, "learning_rate": 9.226197690830392e-07, "loss": 0.018, "step": 262380 }, { "epoch": 2.803461723382659, "grad_norm": 1.5100520849227905, "learning_rate": 9.226107906734009e-07, "loss": 0.0389, "step": 262390 }, { "epoch": 2.803568566696939, "grad_norm": 1.8620750904083252, "learning_rate": 9.22601811786603e-07, "loss": 0.008, "step": 262400 }, { "epoch": 2.8036754100112184, "grad_norm": 0.0031858484726399183, "learning_rate": 9.22592832422656e-07, "loss": 0.0302, "step": 262410 }, { "epoch": 2.8037822533254984, "grad_norm": 0.008871602825820446, "learning_rate": 9.225838525815695e-07, "loss": 0.0106, "step": 262420 }, { "epoch": 2.803889096639778, "grad_norm": 4.598839282989502, "learning_rate": 9.225748722633541e-07, "loss": 0.0811, "step": 262430 }, { "epoch": 2.8039959399540573, "grad_norm": 0.06667143106460571, "learning_rate": 9.225658914680197e-07, "loss": 0.0191, "step": 262440 }, { "epoch": 2.8041027832683367, "grad_norm": 0.002513021696358919, "learning_rate": 9.225569101955766e-07, "loss": 0.0091, "step": 262450 }, { "epoch": 2.8042096265826166, "grad_norm": 0.003175792284309864, "learning_rate": 9.225479284460348e-07, "loss": 0.0055, "step": 262460 }, { "epoch": 2.804316469896896, "grad_norm": 0.0013482382055372, "learning_rate": 9.225389462194044e-07, "loss": 0.0074, "step": 262470 }, { "epoch": 2.804423313211176, "grad_norm": 5.489420413970947, "learning_rate": 9.225299635156959e-07, "loss": 0.0171, "step": 262480 }, { "epoch": 2.8045301565254555, "grad_norm": 1.581788420677185, "learning_rate": 9.22520980334919e-07, "loss": 0.0212, "step": 262490 }, { "epoch": 2.804636999839735, "grad_norm": 0.04530942812561989, "learning_rate": 9.22511996677084e-07, "loss": 0.027, "step": 262500 }, { "epoch": 2.8047438431540144, "grad_norm": 0.003035434987396002, "learning_rate": 9.225030125422013e-07, "loss": 0.028, "step": 262510 }, { "epoch": 2.8048506864682943, "grad_norm": 0.10317334532737732, "learning_rate": 9.224940279302806e-07, "loss": 0.0336, "step": 262520 }, { "epoch": 2.8049575297825737, "grad_norm": 3.556722402572632, "learning_rate": 9.224850428413322e-07, "loss": 0.0049, "step": 262530 }, { "epoch": 2.8050643730968536, "grad_norm": 3.7025647163391113, "learning_rate": 9.224760572753665e-07, "loss": 0.0264, "step": 262540 }, { "epoch": 2.805171216411133, "grad_norm": 7.722219467163086, "learning_rate": 9.224670712323933e-07, "loss": 0.0344, "step": 262550 }, { "epoch": 2.8052780597254126, "grad_norm": 0.26923051476478577, "learning_rate": 9.22458084712423e-07, "loss": 0.0007, "step": 262560 }, { "epoch": 2.8053849030396925, "grad_norm": 0.12152132391929626, "learning_rate": 9.224490977154655e-07, "loss": 0.0061, "step": 262570 }, { "epoch": 2.805491746353972, "grad_norm": 0.006978623103350401, "learning_rate": 9.224401102415311e-07, "loss": 0.049, "step": 262580 }, { "epoch": 2.8055985896682514, "grad_norm": 0.07218992710113525, "learning_rate": 9.2243112229063e-07, "loss": 0.0042, "step": 262590 }, { "epoch": 2.8057054329825313, "grad_norm": 0.020504839718341827, "learning_rate": 9.224221338627724e-07, "loss": 0.0463, "step": 262600 }, { "epoch": 2.8058122762968107, "grad_norm": 0.001884076977148652, "learning_rate": 9.224131449579682e-07, "loss": 0.0215, "step": 262610 }, { "epoch": 2.80591911961109, "grad_norm": 2.597379207611084, "learning_rate": 9.224041555762276e-07, "loss": 0.0353, "step": 262620 }, { "epoch": 2.80602596292537, "grad_norm": 0.878383219242096, "learning_rate": 9.22395165717561e-07, "loss": 0.0219, "step": 262630 }, { "epoch": 2.8061328062396496, "grad_norm": 10.940079689025879, "learning_rate": 9.223861753819782e-07, "loss": 0.0144, "step": 262640 }, { "epoch": 2.806239649553929, "grad_norm": 0.19961200654506683, "learning_rate": 9.223771845694897e-07, "loss": 0.0145, "step": 262650 }, { "epoch": 2.806346492868209, "grad_norm": 1.0884307622909546, "learning_rate": 9.223681932801053e-07, "loss": 0.017, "step": 262660 }, { "epoch": 2.8064533361824884, "grad_norm": 0.08085225522518158, "learning_rate": 9.223592015138355e-07, "loss": 0.0117, "step": 262670 }, { "epoch": 2.806560179496768, "grad_norm": 4.280893325805664, "learning_rate": 9.223502092706902e-07, "loss": 0.0106, "step": 262680 }, { "epoch": 2.8066670228110477, "grad_norm": 0.0434749573469162, "learning_rate": 9.223412165506797e-07, "loss": 0.0108, "step": 262690 }, { "epoch": 2.806773866125327, "grad_norm": 0.024924013763666153, "learning_rate": 9.22332223353814e-07, "loss": 0.0129, "step": 262700 }, { "epoch": 2.8068807094396067, "grad_norm": 0.8163968324661255, "learning_rate": 9.223232296801033e-07, "loss": 0.0032, "step": 262710 }, { "epoch": 2.8069875527538866, "grad_norm": 0.6863424181938171, "learning_rate": 9.223142355295579e-07, "loss": 0.0183, "step": 262720 }, { "epoch": 2.807094396068166, "grad_norm": 0.2556941509246826, "learning_rate": 9.223052409021878e-07, "loss": 0.0496, "step": 262730 }, { "epoch": 2.8072012393824455, "grad_norm": 7.562548637390137, "learning_rate": 9.222962457980033e-07, "loss": 0.0225, "step": 262740 }, { "epoch": 2.8073080826967254, "grad_norm": 0.3612746000289917, "learning_rate": 9.222872502170145e-07, "loss": 0.0121, "step": 262750 }, { "epoch": 2.807414926011005, "grad_norm": 0.39873433113098145, "learning_rate": 9.222782541592313e-07, "loss": 0.0679, "step": 262760 }, { "epoch": 2.8075217693252847, "grad_norm": 0.18528003990650177, "learning_rate": 9.222692576246643e-07, "loss": 0.0245, "step": 262770 }, { "epoch": 2.807628612639564, "grad_norm": 7.977923393249512, "learning_rate": 9.222602606133234e-07, "loss": 0.03, "step": 262780 }, { "epoch": 2.8077354559538437, "grad_norm": 3.470254421234131, "learning_rate": 9.222512631252186e-07, "loss": 0.0905, "step": 262790 }, { "epoch": 2.807842299268123, "grad_norm": 0.009509583935141563, "learning_rate": 9.222422651603605e-07, "loss": 0.0286, "step": 262800 }, { "epoch": 2.807949142582403, "grad_norm": 0.032259780913591385, "learning_rate": 9.222332667187589e-07, "loss": 0.0175, "step": 262810 }, { "epoch": 2.8080559858966825, "grad_norm": 0.028150901198387146, "learning_rate": 9.222242678004242e-07, "loss": 0.0197, "step": 262820 }, { "epoch": 2.8081628292109624, "grad_norm": 1.0035325288772583, "learning_rate": 9.222152684053662e-07, "loss": 0.0331, "step": 262830 }, { "epoch": 2.808269672525242, "grad_norm": 0.019612004980444908, "learning_rate": 9.222062685335954e-07, "loss": 0.025, "step": 262840 }, { "epoch": 2.8083765158395213, "grad_norm": 0.0030388622544705868, "learning_rate": 9.22197268185122e-07, "loss": 0.0167, "step": 262850 }, { "epoch": 2.8084833591538008, "grad_norm": 3.0246617794036865, "learning_rate": 9.221882673599559e-07, "loss": 0.0172, "step": 262860 }, { "epoch": 2.8085902024680807, "grad_norm": 5.795866012573242, "learning_rate": 9.221792660581074e-07, "loss": 0.0237, "step": 262870 }, { "epoch": 2.80869704578236, "grad_norm": 1.187658429145813, "learning_rate": 9.221702642795866e-07, "loss": 0.026, "step": 262880 }, { "epoch": 2.80880388909664, "grad_norm": 2.248363733291626, "learning_rate": 9.221612620244037e-07, "loss": 0.0759, "step": 262890 }, { "epoch": 2.8089107324109195, "grad_norm": 4.58198881149292, "learning_rate": 9.22152259292569e-07, "loss": 0.0392, "step": 262900 }, { "epoch": 2.809017575725199, "grad_norm": 0.48667168617248535, "learning_rate": 9.221432560840924e-07, "loss": 0.0119, "step": 262910 }, { "epoch": 2.8091244190394784, "grad_norm": 0.026097506284713745, "learning_rate": 9.221342523989843e-07, "loss": 0.0288, "step": 262920 }, { "epoch": 2.8092312623537583, "grad_norm": 6.781896114349365, "learning_rate": 9.221252482372547e-07, "loss": 0.0189, "step": 262930 }, { "epoch": 2.8093381056680378, "grad_norm": 0.041731055825948715, "learning_rate": 9.221162435989138e-07, "loss": 0.0067, "step": 262940 }, { "epoch": 2.8094449489823177, "grad_norm": 0.39161890745162964, "learning_rate": 9.221072384839719e-07, "loss": 0.01, "step": 262950 }, { "epoch": 2.809551792296597, "grad_norm": 0.9611359238624573, "learning_rate": 9.220982328924391e-07, "loss": 0.005, "step": 262960 }, { "epoch": 2.8096586356108766, "grad_norm": 10.662861824035645, "learning_rate": 9.220892268243254e-07, "loss": 0.0266, "step": 262970 }, { "epoch": 2.809765478925156, "grad_norm": 0.0022851971443742514, "learning_rate": 9.220802202796411e-07, "loss": 0.0339, "step": 262980 }, { "epoch": 2.809872322239436, "grad_norm": 0.014758029952645302, "learning_rate": 9.220712132583965e-07, "loss": 0.0282, "step": 262990 }, { "epoch": 2.8099791655537154, "grad_norm": 0.3577154278755188, "learning_rate": 9.220622057606015e-07, "loss": 0.0323, "step": 263000 }, { "epoch": 2.8100860088679953, "grad_norm": 10.53026008605957, "learning_rate": 9.220531977862667e-07, "loss": 0.0491, "step": 263010 }, { "epoch": 2.8101928521822748, "grad_norm": 4.901169300079346, "learning_rate": 9.220441893354019e-07, "loss": 0.0076, "step": 263020 }, { "epoch": 2.8102996954965542, "grad_norm": 0.38106289505958557, "learning_rate": 9.220351804080171e-07, "loss": 0.0235, "step": 263030 }, { "epoch": 2.8104065388108337, "grad_norm": 0.16921594738960266, "learning_rate": 9.220261710041228e-07, "loss": 0.0113, "step": 263040 }, { "epoch": 2.8105133821251136, "grad_norm": 0.0836668387055397, "learning_rate": 9.220171611237293e-07, "loss": 0.0163, "step": 263050 }, { "epoch": 2.810620225439393, "grad_norm": 0.10910090804100037, "learning_rate": 9.220081507668465e-07, "loss": 0.0097, "step": 263060 }, { "epoch": 2.810727068753673, "grad_norm": 0.3588876724243164, "learning_rate": 9.219991399334846e-07, "loss": 0.0127, "step": 263070 }, { "epoch": 2.8108339120679524, "grad_norm": 4.865545272827148, "learning_rate": 9.219901286236537e-07, "loss": 0.0502, "step": 263080 }, { "epoch": 2.810940755382232, "grad_norm": 0.29455384612083435, "learning_rate": 9.219811168373643e-07, "loss": 0.0266, "step": 263090 }, { "epoch": 2.8110475986965113, "grad_norm": 0.009555251337587833, "learning_rate": 9.219721045746263e-07, "loss": 0.0067, "step": 263100 }, { "epoch": 2.8111544420107912, "grad_norm": 1.121725082397461, "learning_rate": 9.2196309183545e-07, "loss": 0.0553, "step": 263110 }, { "epoch": 2.8112612853250707, "grad_norm": 1.1756452322006226, "learning_rate": 9.219540786198455e-07, "loss": 0.0204, "step": 263120 }, { "epoch": 2.8113681286393506, "grad_norm": 1.2081509828567505, "learning_rate": 9.219450649278229e-07, "loss": 0.0304, "step": 263130 }, { "epoch": 2.81147497195363, "grad_norm": 0.03352100029587746, "learning_rate": 9.219360507593925e-07, "loss": 0.0409, "step": 263140 }, { "epoch": 2.8115818152679095, "grad_norm": 2.683060884475708, "learning_rate": 9.219270361145645e-07, "loss": 0.0073, "step": 263150 }, { "epoch": 2.811688658582189, "grad_norm": 0.00490630604326725, "learning_rate": 9.21918020993349e-07, "loss": 0.0252, "step": 263160 }, { "epoch": 2.811795501896469, "grad_norm": 4.763671875, "learning_rate": 9.219090053957563e-07, "loss": 0.0101, "step": 263170 }, { "epoch": 2.8119023452107483, "grad_norm": 2.592362642288208, "learning_rate": 9.218999893217965e-07, "loss": 0.0207, "step": 263180 }, { "epoch": 2.8120091885250282, "grad_norm": 0.2849423885345459, "learning_rate": 9.218909727714796e-07, "loss": 0.0076, "step": 263190 }, { "epoch": 2.8121160318393077, "grad_norm": 16.020292282104492, "learning_rate": 9.218819557448161e-07, "loss": 0.0518, "step": 263200 }, { "epoch": 2.812222875153587, "grad_norm": 0.05479271337389946, "learning_rate": 9.218729382418159e-07, "loss": 0.045, "step": 263210 }, { "epoch": 2.8123297184678666, "grad_norm": 0.07436899095773697, "learning_rate": 9.218639202624894e-07, "loss": 0.0091, "step": 263220 }, { "epoch": 2.8124365617821465, "grad_norm": 17.001998901367188, "learning_rate": 9.218549018068466e-07, "loss": 0.0426, "step": 263230 }, { "epoch": 2.812543405096426, "grad_norm": 0.007911217398941517, "learning_rate": 9.218458828748978e-07, "loss": 0.0273, "step": 263240 }, { "epoch": 2.812650248410706, "grad_norm": 0.10376347601413727, "learning_rate": 9.218368634666533e-07, "loss": 0.0501, "step": 263250 }, { "epoch": 2.8127570917249853, "grad_norm": 3.998178482055664, "learning_rate": 9.218278435821232e-07, "loss": 0.0095, "step": 263260 }, { "epoch": 2.812863935039265, "grad_norm": 0.006538147572427988, "learning_rate": 9.218188232213174e-07, "loss": 0.0287, "step": 263270 }, { "epoch": 2.8129707783535443, "grad_norm": 0.19689121842384338, "learning_rate": 9.218098023842464e-07, "loss": 0.0599, "step": 263280 }, { "epoch": 2.813077621667824, "grad_norm": 0.22282271087169647, "learning_rate": 9.218007810709203e-07, "loss": 0.0229, "step": 263290 }, { "epoch": 2.8131844649821036, "grad_norm": 4.591761589050293, "learning_rate": 9.217917592813494e-07, "loss": 0.0247, "step": 263300 }, { "epoch": 2.8132913082963835, "grad_norm": 1.9874213933944702, "learning_rate": 9.217827370155435e-07, "loss": 0.0337, "step": 263310 }, { "epoch": 2.813398151610663, "grad_norm": 0.11571915447711945, "learning_rate": 9.217737142735133e-07, "loss": 0.0083, "step": 263320 }, { "epoch": 2.8135049949249424, "grad_norm": 0.0467141754925251, "learning_rate": 9.217646910552686e-07, "loss": 0.0331, "step": 263330 }, { "epoch": 2.8136118382392223, "grad_norm": 0.26727423071861267, "learning_rate": 9.217556673608198e-07, "loss": 0.0067, "step": 263340 }, { "epoch": 2.813718681553502, "grad_norm": 0.007909661158919334, "learning_rate": 9.21746643190177e-07, "loss": 0.0264, "step": 263350 }, { "epoch": 2.8138255248677813, "grad_norm": 0.07982594519853592, "learning_rate": 9.217376185433505e-07, "loss": 0.0129, "step": 263360 }, { "epoch": 2.813932368182061, "grad_norm": 0.09152168780565262, "learning_rate": 9.217285934203502e-07, "loss": 0.0226, "step": 263370 }, { "epoch": 2.8140392114963406, "grad_norm": 0.0016693200450390577, "learning_rate": 9.217195678211866e-07, "loss": 0.0274, "step": 263380 }, { "epoch": 2.81414605481062, "grad_norm": 0.004339467268437147, "learning_rate": 9.217105417458699e-07, "loss": 0.0321, "step": 263390 }, { "epoch": 2.8142528981249, "grad_norm": 8.419398307800293, "learning_rate": 9.217015151944099e-07, "loss": 0.0676, "step": 263400 }, { "epoch": 2.8143597414391794, "grad_norm": 6.009547710418701, "learning_rate": 9.216924881668173e-07, "loss": 0.0194, "step": 263410 }, { "epoch": 2.814466584753459, "grad_norm": 3.404386043548584, "learning_rate": 9.21683460663102e-07, "loss": 0.0043, "step": 263420 }, { "epoch": 2.814573428067739, "grad_norm": 0.014337733387947083, "learning_rate": 9.216744326832742e-07, "loss": 0.0221, "step": 263430 }, { "epoch": 2.8146802713820183, "grad_norm": 0.0846586599946022, "learning_rate": 9.216654042273441e-07, "loss": 0.0126, "step": 263440 }, { "epoch": 2.8147871146962977, "grad_norm": 0.1970701664686203, "learning_rate": 9.216563752953221e-07, "loss": 0.0033, "step": 263450 }, { "epoch": 2.8148939580105776, "grad_norm": 23.134441375732422, "learning_rate": 9.21647345887218e-07, "loss": 0.0768, "step": 263460 }, { "epoch": 2.815000801324857, "grad_norm": 3.9182467460632324, "learning_rate": 9.216383160030424e-07, "loss": 0.0041, "step": 263470 }, { "epoch": 2.8151076446391365, "grad_norm": 3.357327938079834, "learning_rate": 9.216292856428054e-07, "loss": 0.023, "step": 263480 }, { "epoch": 2.8152144879534164, "grad_norm": 2.590442180633545, "learning_rate": 9.216202548065168e-07, "loss": 0.0183, "step": 263490 }, { "epoch": 2.815321331267696, "grad_norm": 0.13439305126667023, "learning_rate": 9.216112234941873e-07, "loss": 0.0037, "step": 263500 }, { "epoch": 2.8154281745819754, "grad_norm": 0.008458991535007954, "learning_rate": 9.216021917058269e-07, "loss": 0.0519, "step": 263510 }, { "epoch": 2.8155350178962553, "grad_norm": 0.0038123640697449446, "learning_rate": 9.215931594414459e-07, "loss": 0.0353, "step": 263520 }, { "epoch": 2.8156418612105347, "grad_norm": 0.05047411099076271, "learning_rate": 9.215841267010542e-07, "loss": 0.0015, "step": 263530 }, { "epoch": 2.8157487045248146, "grad_norm": 5.975201606750488, "learning_rate": 9.215750934846625e-07, "loss": 0.021, "step": 263540 }, { "epoch": 2.815855547839094, "grad_norm": 2.293449878692627, "learning_rate": 9.215660597922804e-07, "loss": 0.0102, "step": 263550 }, { "epoch": 2.8159623911533735, "grad_norm": 0.022818632423877716, "learning_rate": 9.215570256239185e-07, "loss": 0.0152, "step": 263560 }, { "epoch": 2.816069234467653, "grad_norm": 0.021748607978224754, "learning_rate": 9.215479909795869e-07, "loss": 0.0259, "step": 263570 }, { "epoch": 2.816176077781933, "grad_norm": 0.024977298453450203, "learning_rate": 9.215389558592958e-07, "loss": 0.0064, "step": 263580 }, { "epoch": 2.8162829210962124, "grad_norm": 6.363614082336426, "learning_rate": 9.215299202630555e-07, "loss": 0.0165, "step": 263590 }, { "epoch": 2.8163897644104923, "grad_norm": 0.002041666768491268, "learning_rate": 9.21520884190876e-07, "loss": 0.0305, "step": 263600 }, { "epoch": 2.8164966077247717, "grad_norm": 1.2626147270202637, "learning_rate": 9.215118476427676e-07, "loss": 0.0048, "step": 263610 }, { "epoch": 2.816603451039051, "grad_norm": 8.178502082824707, "learning_rate": 9.215028106187405e-07, "loss": 0.1054, "step": 263620 }, { "epoch": 2.8167102943533306, "grad_norm": 0.11886833608150482, "learning_rate": 9.21493773118805e-07, "loss": 0.03, "step": 263630 }, { "epoch": 2.8168171376676105, "grad_norm": 0.024746736511588097, "learning_rate": 9.214847351429712e-07, "loss": 0.0006, "step": 263640 }, { "epoch": 2.81692398098189, "grad_norm": 3.000404119491577, "learning_rate": 9.214756966912493e-07, "loss": 0.0286, "step": 263650 }, { "epoch": 2.81703082429617, "grad_norm": 0.007784009911119938, "learning_rate": 9.214666577636496e-07, "loss": 0.0623, "step": 263660 }, { "epoch": 2.8171376676104494, "grad_norm": 0.006391025613993406, "learning_rate": 9.214576183601821e-07, "loss": 0.0079, "step": 263670 }, { "epoch": 2.817244510924729, "grad_norm": 6.65347957611084, "learning_rate": 9.214485784808572e-07, "loss": 0.0317, "step": 263680 }, { "epoch": 2.8173513542390083, "grad_norm": 0.17840468883514404, "learning_rate": 9.214395381256851e-07, "loss": 0.0042, "step": 263690 }, { "epoch": 2.817458197553288, "grad_norm": 0.15670715272426605, "learning_rate": 9.214304972946758e-07, "loss": 0.0161, "step": 263700 }, { "epoch": 2.8175650408675676, "grad_norm": 0.01447019912302494, "learning_rate": 9.214214559878398e-07, "loss": 0.0183, "step": 263710 }, { "epoch": 2.8176718841818476, "grad_norm": 2.0436060428619385, "learning_rate": 9.214124142051871e-07, "loss": 0.0655, "step": 263720 }, { "epoch": 2.817778727496127, "grad_norm": 0.5239045023918152, "learning_rate": 9.21403371946728e-07, "loss": 0.0217, "step": 263730 }, { "epoch": 2.8178855708104065, "grad_norm": 0.9757863879203796, "learning_rate": 9.213943292124729e-07, "loss": 0.0078, "step": 263740 }, { "epoch": 2.817992414124686, "grad_norm": 5.297442436218262, "learning_rate": 9.213852860024315e-07, "loss": 0.0579, "step": 263750 }, { "epoch": 2.818099257438966, "grad_norm": 4.065976619720459, "learning_rate": 9.213762423166143e-07, "loss": 0.0098, "step": 263760 }, { "epoch": 2.8182061007532453, "grad_norm": 3.7337515354156494, "learning_rate": 9.213671981550317e-07, "loss": 0.0338, "step": 263770 }, { "epoch": 2.818312944067525, "grad_norm": 2.7165017127990723, "learning_rate": 9.213581535176937e-07, "loss": 0.0182, "step": 263780 }, { "epoch": 2.8184197873818047, "grad_norm": 0.3426561951637268, "learning_rate": 9.213491084046105e-07, "loss": 0.0209, "step": 263790 }, { "epoch": 2.818526630696084, "grad_norm": 0.3804744780063629, "learning_rate": 9.213400628157924e-07, "loss": 0.0025, "step": 263800 }, { "epoch": 2.8186334740103636, "grad_norm": 10.082978248596191, "learning_rate": 9.213310167512494e-07, "loss": 0.0403, "step": 263810 }, { "epoch": 2.8187403173246435, "grad_norm": 2.561750650405884, "learning_rate": 9.213219702109921e-07, "loss": 0.0285, "step": 263820 }, { "epoch": 2.818847160638923, "grad_norm": 6.397312164306641, "learning_rate": 9.213129231950304e-07, "loss": 0.0069, "step": 263830 }, { "epoch": 2.818954003953203, "grad_norm": 0.025108452886343002, "learning_rate": 9.213038757033746e-07, "loss": 0.0189, "step": 263840 }, { "epoch": 2.8190608472674823, "grad_norm": 6.860453128814697, "learning_rate": 9.212948277360349e-07, "loss": 0.0412, "step": 263850 }, { "epoch": 2.8191676905817618, "grad_norm": 1.2753198146820068, "learning_rate": 9.212857792930217e-07, "loss": 0.067, "step": 263860 }, { "epoch": 2.819274533896041, "grad_norm": 3.3402304649353027, "learning_rate": 9.21276730374345e-07, "loss": 0.0128, "step": 263870 }, { "epoch": 2.819381377210321, "grad_norm": 4.812805652618408, "learning_rate": 9.212676809800149e-07, "loss": 0.0277, "step": 263880 }, { "epoch": 2.8194882205246006, "grad_norm": 5.881824970245361, "learning_rate": 9.21258631110042e-07, "loss": 0.0071, "step": 263890 }, { "epoch": 2.8195950638388805, "grad_norm": 0.033340536057949066, "learning_rate": 9.212495807644363e-07, "loss": 0.0006, "step": 263900 }, { "epoch": 2.81970190715316, "grad_norm": 0.8847204446792603, "learning_rate": 9.212405299432077e-07, "loss": 0.0345, "step": 263910 }, { "epoch": 2.8198087504674394, "grad_norm": 1.8880199193954468, "learning_rate": 9.212314786463671e-07, "loss": 0.0233, "step": 263920 }, { "epoch": 2.819915593781719, "grad_norm": 1.3944100141525269, "learning_rate": 9.212224268739243e-07, "loss": 0.0438, "step": 263930 }, { "epoch": 2.8200224370959988, "grad_norm": 0.052479252219200134, "learning_rate": 9.212133746258897e-07, "loss": 0.024, "step": 263940 }, { "epoch": 2.820129280410278, "grad_norm": 41.05778884887695, "learning_rate": 9.212043219022731e-07, "loss": 0.0271, "step": 263950 }, { "epoch": 2.820236123724558, "grad_norm": 0.10112976282835007, "learning_rate": 9.211952687030853e-07, "loss": 0.0142, "step": 263960 }, { "epoch": 2.8203429670388376, "grad_norm": 2.5468499660491943, "learning_rate": 9.211862150283362e-07, "loss": 0.0185, "step": 263970 }, { "epoch": 2.820449810353117, "grad_norm": 0.004894251935184002, "learning_rate": 9.211771608780361e-07, "loss": 0.0218, "step": 263980 }, { "epoch": 2.8205566536673965, "grad_norm": 4.7139387130737305, "learning_rate": 9.21168106252195e-07, "loss": 0.0074, "step": 263990 }, { "epoch": 2.8206634969816764, "grad_norm": 0.06628893315792084, "learning_rate": 9.211590511508235e-07, "loss": 0.0076, "step": 264000 }, { "epoch": 2.820770340295956, "grad_norm": 0.9176662564277649, "learning_rate": 9.211499955739316e-07, "loss": 0.0086, "step": 264010 }, { "epoch": 2.8208771836102358, "grad_norm": 6.556332111358643, "learning_rate": 9.211409395215295e-07, "loss": 0.0246, "step": 264020 }, { "epoch": 2.820984026924515, "grad_norm": 2.349424362182617, "learning_rate": 9.211318829936277e-07, "loss": 0.0068, "step": 264030 }, { "epoch": 2.8210908702387947, "grad_norm": 0.016553187742829323, "learning_rate": 9.211228259902359e-07, "loss": 0.0232, "step": 264040 }, { "epoch": 2.8211977135530746, "grad_norm": 0.10258686542510986, "learning_rate": 9.21113768511365e-07, "loss": 0.0103, "step": 264050 }, { "epoch": 2.821304556867354, "grad_norm": 0.0762430727481842, "learning_rate": 9.211047105570246e-07, "loss": 0.0616, "step": 264060 }, { "epoch": 2.8214114001816335, "grad_norm": 0.020791688933968544, "learning_rate": 9.210956521272253e-07, "loss": 0.0057, "step": 264070 }, { "epoch": 2.8215182434959134, "grad_norm": 0.09655673056840897, "learning_rate": 9.210865932219774e-07, "loss": 0.0175, "step": 264080 }, { "epoch": 2.821625086810193, "grad_norm": 0.3368992507457733, "learning_rate": 9.210775338412906e-07, "loss": 0.0082, "step": 264090 }, { "epoch": 2.8217319301244723, "grad_norm": 1.3948931694030762, "learning_rate": 9.210684739851757e-07, "loss": 0.0091, "step": 264100 }, { "epoch": 2.8218387734387522, "grad_norm": 2.1950550079345703, "learning_rate": 9.210594136536427e-07, "loss": 0.0044, "step": 264110 }, { "epoch": 2.8219456167530317, "grad_norm": 0.062446024268865585, "learning_rate": 9.210503528467018e-07, "loss": 0.0263, "step": 264120 }, { "epoch": 2.822052460067311, "grad_norm": 3.6163644790649414, "learning_rate": 9.210412915643632e-07, "loss": 0.0082, "step": 264130 }, { "epoch": 2.822159303381591, "grad_norm": 0.8732558488845825, "learning_rate": 9.210322298066375e-07, "loss": 0.0096, "step": 264140 }, { "epoch": 2.8222661466958705, "grad_norm": 1.5732247829437256, "learning_rate": 9.210231675735345e-07, "loss": 0.0378, "step": 264150 }, { "epoch": 2.82237299001015, "grad_norm": 0.006753734778612852, "learning_rate": 9.210141048650643e-07, "loss": 0.0292, "step": 264160 }, { "epoch": 2.82247983332443, "grad_norm": 0.030922044068574905, "learning_rate": 9.210050416812377e-07, "loss": 0.0165, "step": 264170 }, { "epoch": 2.8225866766387093, "grad_norm": 0.6634824275970459, "learning_rate": 9.209959780220646e-07, "loss": 0.0212, "step": 264180 }, { "epoch": 2.822693519952989, "grad_norm": 1.1888933181762695, "learning_rate": 9.209869138875552e-07, "loss": 0.013, "step": 264190 }, { "epoch": 2.8228003632672687, "grad_norm": 0.008666608482599258, "learning_rate": 9.209778492777198e-07, "loss": 0.0354, "step": 264200 }, { "epoch": 2.822907206581548, "grad_norm": 7.538045883178711, "learning_rate": 9.209687841925686e-07, "loss": 0.0091, "step": 264210 }, { "epoch": 2.8230140498958276, "grad_norm": 3.2001163959503174, "learning_rate": 9.209597186321118e-07, "loss": 0.0094, "step": 264220 }, { "epoch": 2.8231208932101075, "grad_norm": 5.586474418640137, "learning_rate": 9.2095065259636e-07, "loss": 0.0182, "step": 264230 }, { "epoch": 2.823227736524387, "grad_norm": 0.014453733339905739, "learning_rate": 9.209415860853229e-07, "loss": 0.0162, "step": 264240 }, { "epoch": 2.823334579838667, "grad_norm": 0.2862597107887268, "learning_rate": 9.20932519099011e-07, "loss": 0.0202, "step": 264250 }, { "epoch": 2.8234414231529463, "grad_norm": 0.016612211242318153, "learning_rate": 9.209234516374346e-07, "loss": 0.004, "step": 264260 }, { "epoch": 2.823548266467226, "grad_norm": 0.21248146891593933, "learning_rate": 9.209143837006038e-07, "loss": 0.004, "step": 264270 }, { "epoch": 2.8236551097815052, "grad_norm": 0.060958754271268845, "learning_rate": 9.209053152885289e-07, "loss": 0.0211, "step": 264280 }, { "epoch": 2.823761953095785, "grad_norm": 0.02083384245634079, "learning_rate": 9.208962464012202e-07, "loss": 0.0453, "step": 264290 }, { "epoch": 2.8238687964100646, "grad_norm": 0.002096601529046893, "learning_rate": 9.208871770386878e-07, "loss": 0.0185, "step": 264300 }, { "epoch": 2.8239756397243445, "grad_norm": 0.028012890368700027, "learning_rate": 9.20878107200942e-07, "loss": 0.006, "step": 264310 }, { "epoch": 2.824082483038624, "grad_norm": 0.1098373755812645, "learning_rate": 9.208690368879931e-07, "loss": 0.0878, "step": 264320 }, { "epoch": 2.8241893263529034, "grad_norm": 0.07425826787948608, "learning_rate": 9.208599660998513e-07, "loss": 0.0065, "step": 264330 }, { "epoch": 2.824296169667183, "grad_norm": 5.844177722930908, "learning_rate": 9.208508948365268e-07, "loss": 0.0121, "step": 264340 }, { "epoch": 2.824403012981463, "grad_norm": 0.015022272244095802, "learning_rate": 9.208418230980298e-07, "loss": 0.0324, "step": 264350 }, { "epoch": 2.8245098562957422, "grad_norm": 0.619287371635437, "learning_rate": 9.208327508843707e-07, "loss": 0.0183, "step": 264360 }, { "epoch": 2.824616699610022, "grad_norm": 0.15812654793262482, "learning_rate": 9.208236781955596e-07, "loss": 0.0165, "step": 264370 }, { "epoch": 2.8247235429243016, "grad_norm": 0.23307329416275024, "learning_rate": 9.20814605031607e-07, "loss": 0.004, "step": 264380 }, { "epoch": 2.824830386238581, "grad_norm": 7.065685749053955, "learning_rate": 9.208055313925227e-07, "loss": 0.0378, "step": 264390 }, { "epoch": 2.8249372295528605, "grad_norm": 0.01347317360341549, "learning_rate": 9.207964572783173e-07, "loss": 0.0038, "step": 264400 }, { "epoch": 2.8250440728671404, "grad_norm": 0.0165148563683033, "learning_rate": 9.207873826890011e-07, "loss": 0.0297, "step": 264410 }, { "epoch": 2.82515091618142, "grad_norm": 7.795791149139404, "learning_rate": 9.20778307624584e-07, "loss": 0.0264, "step": 264420 }, { "epoch": 2.8252577594957, "grad_norm": 0.059933073818683624, "learning_rate": 9.207692320850764e-07, "loss": 0.0117, "step": 264430 }, { "epoch": 2.8253646028099793, "grad_norm": 0.020095430314540863, "learning_rate": 9.207601560704887e-07, "loss": 0.0105, "step": 264440 }, { "epoch": 2.8254714461242587, "grad_norm": 0.17966419458389282, "learning_rate": 9.20751079580831e-07, "loss": 0.0328, "step": 264450 }, { "epoch": 2.825578289438538, "grad_norm": 0.6463167667388916, "learning_rate": 9.207420026161136e-07, "loss": 0.0054, "step": 264460 }, { "epoch": 2.825685132752818, "grad_norm": 0.019774969667196274, "learning_rate": 9.207329251763466e-07, "loss": 0.0107, "step": 264470 }, { "epoch": 2.8257919760670975, "grad_norm": 0.01766042411327362, "learning_rate": 9.207238472615404e-07, "loss": 0.0017, "step": 264480 }, { "epoch": 2.8258988193813774, "grad_norm": 5.895252227783203, "learning_rate": 9.207147688717055e-07, "loss": 0.0263, "step": 264490 }, { "epoch": 2.826005662695657, "grad_norm": 14.997499465942383, "learning_rate": 9.207056900068515e-07, "loss": 0.0201, "step": 264500 }, { "epoch": 2.8261125060099364, "grad_norm": 0.013195483945310116, "learning_rate": 9.206966106669891e-07, "loss": 0.003, "step": 264510 }, { "epoch": 2.826219349324216, "grad_norm": 0.025145865976810455, "learning_rate": 9.206875308521287e-07, "loss": 0.0223, "step": 264520 }, { "epoch": 2.8263261926384957, "grad_norm": 2.6803596019744873, "learning_rate": 9.206784505622801e-07, "loss": 0.0391, "step": 264530 }, { "epoch": 2.826433035952775, "grad_norm": 1.525248408317566, "learning_rate": 9.206693697974539e-07, "loss": 0.0289, "step": 264540 }, { "epoch": 2.826539879267055, "grad_norm": 0.8975122570991516, "learning_rate": 9.206602885576602e-07, "loss": 0.0424, "step": 264550 }, { "epoch": 2.8266467225813345, "grad_norm": 7.974057197570801, "learning_rate": 9.206512068429092e-07, "loss": 0.0367, "step": 264560 }, { "epoch": 2.826753565895614, "grad_norm": 0.13322673738002777, "learning_rate": 9.206421246532113e-07, "loss": 0.0163, "step": 264570 }, { "epoch": 2.8268604092098935, "grad_norm": 0.10899840295314789, "learning_rate": 9.206330419885768e-07, "loss": 0.0117, "step": 264580 }, { "epoch": 2.8269672525241734, "grad_norm": 0.004201047122478485, "learning_rate": 9.206239588490156e-07, "loss": 0.0018, "step": 264590 }, { "epoch": 2.827074095838453, "grad_norm": 0.038205914199352264, "learning_rate": 9.206148752345384e-07, "loss": 0.0181, "step": 264600 }, { "epoch": 2.8271809391527327, "grad_norm": 0.11223766952753067, "learning_rate": 9.206057911451553e-07, "loss": 0.0384, "step": 264610 }, { "epoch": 2.827287782467012, "grad_norm": 6.03231954574585, "learning_rate": 9.205967065808764e-07, "loss": 0.0317, "step": 264620 }, { "epoch": 2.8273946257812916, "grad_norm": 0.08512056618928909, "learning_rate": 9.205876215417121e-07, "loss": 0.0173, "step": 264630 }, { "epoch": 2.827501469095571, "grad_norm": 0.001605318277142942, "learning_rate": 9.205785360276727e-07, "loss": 0.0219, "step": 264640 }, { "epoch": 2.827608312409851, "grad_norm": 4.650577545166016, "learning_rate": 9.205694500387683e-07, "loss": 0.0264, "step": 264650 }, { "epoch": 2.8277151557241305, "grad_norm": 0.016307145357131958, "learning_rate": 9.205603635750092e-07, "loss": 0.0142, "step": 264660 }, { "epoch": 2.8278219990384104, "grad_norm": 15.732779502868652, "learning_rate": 9.205512766364058e-07, "loss": 0.021, "step": 264670 }, { "epoch": 2.82792884235269, "grad_norm": 4.883881568908691, "learning_rate": 9.205421892229684e-07, "loss": 0.0393, "step": 264680 }, { "epoch": 2.8280356856669693, "grad_norm": 0.0041521391831338406, "learning_rate": 9.205331013347069e-07, "loss": 0.0359, "step": 264690 }, { "epoch": 2.8281425289812487, "grad_norm": 6.75524377822876, "learning_rate": 9.205240129716319e-07, "loss": 0.0257, "step": 264700 }, { "epoch": 2.8282493722955286, "grad_norm": 0.08382914215326309, "learning_rate": 9.205149241337535e-07, "loss": 0.0236, "step": 264710 }, { "epoch": 2.828356215609808, "grad_norm": 0.21591514348983765, "learning_rate": 9.205058348210821e-07, "loss": 0.0057, "step": 264720 }, { "epoch": 2.828463058924088, "grad_norm": 2.6876327991485596, "learning_rate": 9.204967450336278e-07, "loss": 0.0168, "step": 264730 }, { "epoch": 2.8285699022383675, "grad_norm": 0.4324152171611786, "learning_rate": 9.204876547714009e-07, "loss": 0.0193, "step": 264740 }, { "epoch": 2.828676745552647, "grad_norm": 1.568401575088501, "learning_rate": 9.204785640344119e-07, "loss": 0.0101, "step": 264750 }, { "epoch": 2.8287835888669264, "grad_norm": 0.30450528860092163, "learning_rate": 9.204694728226706e-07, "loss": 0.0387, "step": 264760 }, { "epoch": 2.8288904321812063, "grad_norm": 2.8922321796417236, "learning_rate": 9.204603811361877e-07, "loss": 0.0041, "step": 264770 }, { "epoch": 2.8289972754954857, "grad_norm": 0.0022917292080819607, "learning_rate": 9.204512889749732e-07, "loss": 0.0172, "step": 264780 }, { "epoch": 2.8291041188097656, "grad_norm": 0.028038611635565758, "learning_rate": 9.204421963390376e-07, "loss": 0.0251, "step": 264790 }, { "epoch": 2.829210962124045, "grad_norm": 0.004662091378122568, "learning_rate": 9.204331032283909e-07, "loss": 0.0085, "step": 264800 }, { "epoch": 2.8293178054383246, "grad_norm": 0.034912340342998505, "learning_rate": 9.204240096430436e-07, "loss": 0.0258, "step": 264810 }, { "epoch": 2.8294246487526045, "grad_norm": 3.728623628616333, "learning_rate": 9.204149155830057e-07, "loss": 0.023, "step": 264820 }, { "epoch": 2.829531492066884, "grad_norm": 0.04541440308094025, "learning_rate": 9.204058210482877e-07, "loss": 0.0438, "step": 264830 }, { "epoch": 2.8296383353811634, "grad_norm": 0.031324610114097595, "learning_rate": 9.203967260388999e-07, "loss": 0.0149, "step": 264840 }, { "epoch": 2.8297451786954433, "grad_norm": 0.6492143869400024, "learning_rate": 9.203876305548524e-07, "loss": 0.0136, "step": 264850 }, { "epoch": 2.8298520220097227, "grad_norm": 0.007539290934801102, "learning_rate": 9.203785345961554e-07, "loss": 0.0075, "step": 264860 }, { "epoch": 2.829958865324002, "grad_norm": 0.04157935082912445, "learning_rate": 9.203694381628195e-07, "loss": 0.0348, "step": 264870 }, { "epoch": 2.830065708638282, "grad_norm": 0.06092757731676102, "learning_rate": 9.203603412548548e-07, "loss": 0.0114, "step": 264880 }, { "epoch": 2.8301725519525616, "grad_norm": 0.14643354713916779, "learning_rate": 9.203512438722713e-07, "loss": 0.0085, "step": 264890 }, { "epoch": 2.830279395266841, "grad_norm": 0.010650320909917355, "learning_rate": 9.203421460150797e-07, "loss": 0.0023, "step": 264900 }, { "epoch": 2.830386238581121, "grad_norm": 0.1381627768278122, "learning_rate": 9.203330476832903e-07, "loss": 0.0138, "step": 264910 }, { "epoch": 2.8304930818954004, "grad_norm": 4.546048164367676, "learning_rate": 9.203239488769128e-07, "loss": 0.0199, "step": 264920 }, { "epoch": 2.83059992520968, "grad_norm": 0.0606643483042717, "learning_rate": 9.203148495959581e-07, "loss": 0.0078, "step": 264930 }, { "epoch": 2.8307067685239597, "grad_norm": 0.4598364531993866, "learning_rate": 9.20305749840436e-07, "loss": 0.0168, "step": 264940 }, { "epoch": 2.830813611838239, "grad_norm": 0.06049613654613495, "learning_rate": 9.20296649610357e-07, "loss": 0.0083, "step": 264950 }, { "epoch": 2.8309204551525187, "grad_norm": 0.0658947303891182, "learning_rate": 9.202875489057315e-07, "loss": 0.0236, "step": 264960 }, { "epoch": 2.8310272984667986, "grad_norm": 6.365312576293945, "learning_rate": 9.202784477265696e-07, "loss": 0.0527, "step": 264970 }, { "epoch": 2.831134141781078, "grad_norm": 0.007352527696639299, "learning_rate": 9.202693460728817e-07, "loss": 0.0259, "step": 264980 }, { "epoch": 2.8312409850953575, "grad_norm": 0.004044210072606802, "learning_rate": 9.202602439446778e-07, "loss": 0.0098, "step": 264990 }, { "epoch": 2.8313478284096374, "grad_norm": 3.7780697345733643, "learning_rate": 9.202511413419685e-07, "loss": 0.0363, "step": 265000 }, { "epoch": 2.831454671723917, "grad_norm": 1.0781958103179932, "learning_rate": 9.202420382647638e-07, "loss": 0.0193, "step": 265010 }, { "epoch": 2.8315615150381968, "grad_norm": 0.05955149233341217, "learning_rate": 9.202329347130742e-07, "loss": 0.0607, "step": 265020 }, { "epoch": 2.831668358352476, "grad_norm": 0.011458665132522583, "learning_rate": 9.202238306869099e-07, "loss": 0.0411, "step": 265030 }, { "epoch": 2.8317752016667557, "grad_norm": 0.010815360583364964, "learning_rate": 9.202147261862811e-07, "loss": 0.0158, "step": 265040 }, { "epoch": 2.831882044981035, "grad_norm": 0.9451083540916443, "learning_rate": 9.202056212111984e-07, "loss": 0.0533, "step": 265050 }, { "epoch": 2.831988888295315, "grad_norm": 4.211323261260986, "learning_rate": 9.201965157616717e-07, "loss": 0.0062, "step": 265060 }, { "epoch": 2.8320957316095945, "grad_norm": 2.5272209644317627, "learning_rate": 9.201874098377114e-07, "loss": 0.0123, "step": 265070 }, { "epoch": 2.8322025749238744, "grad_norm": 0.04275641590356827, "learning_rate": 9.201783034393278e-07, "loss": 0.0117, "step": 265080 }, { "epoch": 2.832309418238154, "grad_norm": 1.7898756265640259, "learning_rate": 9.201691965665313e-07, "loss": 0.029, "step": 265090 }, { "epoch": 2.8324162615524333, "grad_norm": 8.282583236694336, "learning_rate": 9.201600892193318e-07, "loss": 0.0513, "step": 265100 }, { "epoch": 2.8325231048667128, "grad_norm": 0.9774449467658997, "learning_rate": 9.2015098139774e-07, "loss": 0.006, "step": 265110 }, { "epoch": 2.8326299481809927, "grad_norm": 0.06062214821577072, "learning_rate": 9.201418731017661e-07, "loss": 0.0222, "step": 265120 }, { "epoch": 2.832736791495272, "grad_norm": 2.084055185317993, "learning_rate": 9.201327643314203e-07, "loss": 0.0462, "step": 265130 }, { "epoch": 2.832843634809552, "grad_norm": 10.996123313903809, "learning_rate": 9.201236550867128e-07, "loss": 0.0374, "step": 265140 }, { "epoch": 2.8329504781238315, "grad_norm": 0.02363451197743416, "learning_rate": 9.201145453676539e-07, "loss": 0.0162, "step": 265150 }, { "epoch": 2.833057321438111, "grad_norm": 1.1921685934066772, "learning_rate": 9.201054351742543e-07, "loss": 0.0111, "step": 265160 }, { "epoch": 2.8331641647523904, "grad_norm": 18.438232421875, "learning_rate": 9.200963245065238e-07, "loss": 0.0317, "step": 265170 }, { "epoch": 2.8332710080666703, "grad_norm": 0.7354297637939453, "learning_rate": 9.200872133644729e-07, "loss": 0.0714, "step": 265180 }, { "epoch": 2.8333778513809498, "grad_norm": 0.010644212365150452, "learning_rate": 9.200781017481116e-07, "loss": 0.0065, "step": 265190 }, { "epoch": 2.8334846946952297, "grad_norm": 1.4709632396697998, "learning_rate": 9.200689896574505e-07, "loss": 0.0039, "step": 265200 }, { "epoch": 2.833591538009509, "grad_norm": 1.224200963973999, "learning_rate": 9.200598770924998e-07, "loss": 0.0109, "step": 265210 }, { "epoch": 2.8336983813237886, "grad_norm": 0.01762722246348858, "learning_rate": 9.2005076405327e-07, "loss": 0.008, "step": 265220 }, { "epoch": 2.833805224638068, "grad_norm": 0.010277247987687588, "learning_rate": 9.20041650539771e-07, "loss": 0.0017, "step": 265230 }, { "epoch": 2.833912067952348, "grad_norm": 2.5026907920837402, "learning_rate": 9.200325365520134e-07, "loss": 0.0078, "step": 265240 }, { "epoch": 2.8340189112666274, "grad_norm": 0.3842138946056366, "learning_rate": 9.200234220900072e-07, "loss": 0.0045, "step": 265250 }, { "epoch": 2.8341257545809073, "grad_norm": 0.1374453753232956, "learning_rate": 9.200143071537629e-07, "loss": 0.0233, "step": 265260 }, { "epoch": 2.834232597895187, "grad_norm": 8.559671401977539, "learning_rate": 9.20005191743291e-07, "loss": 0.0354, "step": 265270 }, { "epoch": 2.8343394412094662, "grad_norm": 2.1781482696533203, "learning_rate": 9.199960758586011e-07, "loss": 0.0164, "step": 265280 }, { "epoch": 2.8344462845237457, "grad_norm": 0.003467065282166004, "learning_rate": 9.199869594997043e-07, "loss": 0.0097, "step": 265290 }, { "epoch": 2.8345531278380256, "grad_norm": 1.9502400159835815, "learning_rate": 9.199778426666102e-07, "loss": 0.0146, "step": 265300 }, { "epoch": 2.834659971152305, "grad_norm": 1.2991186380386353, "learning_rate": 9.199687253593298e-07, "loss": 0.0048, "step": 265310 }, { "epoch": 2.834766814466585, "grad_norm": 0.8601128458976746, "learning_rate": 9.199596075778729e-07, "loss": 0.0355, "step": 265320 }, { "epoch": 2.8348736577808644, "grad_norm": 3.7943220138549805, "learning_rate": 9.199504893222497e-07, "loss": 0.0245, "step": 265330 }, { "epoch": 2.834980501095144, "grad_norm": 0.010448391549289227, "learning_rate": 9.199413705924708e-07, "loss": 0.0069, "step": 265340 }, { "epoch": 2.8350873444094233, "grad_norm": 5.676609039306641, "learning_rate": 9.199322513885465e-07, "loss": 0.0271, "step": 265350 }, { "epoch": 2.8351941877237032, "grad_norm": 0.020203972235322, "learning_rate": 9.199231317104869e-07, "loss": 0.0274, "step": 265360 }, { "epoch": 2.8353010310379827, "grad_norm": 0.07036501169204712, "learning_rate": 9.199140115583024e-07, "loss": 0.0099, "step": 265370 }, { "epoch": 2.8354078743522626, "grad_norm": 4.0089945793151855, "learning_rate": 9.199048909320033e-07, "loss": 0.0083, "step": 265380 }, { "epoch": 2.835514717666542, "grad_norm": 0.7376008033752441, "learning_rate": 9.198957698315999e-07, "loss": 0.0041, "step": 265390 }, { "epoch": 2.8356215609808215, "grad_norm": 2.4191620349884033, "learning_rate": 9.198866482571024e-07, "loss": 0.007, "step": 265400 }, { "epoch": 2.835728404295101, "grad_norm": 0.025891201570630074, "learning_rate": 9.198775262085213e-07, "loss": 0.0779, "step": 265410 }, { "epoch": 2.835835247609381, "grad_norm": 1.4552184343338013, "learning_rate": 9.198684036858668e-07, "loss": 0.0216, "step": 265420 }, { "epoch": 2.8359420909236603, "grad_norm": 0.007999852299690247, "learning_rate": 9.19859280689149e-07, "loss": 0.096, "step": 265430 }, { "epoch": 2.8360489342379402, "grad_norm": 0.01087996270507574, "learning_rate": 9.198501572183785e-07, "loss": 0.0202, "step": 265440 }, { "epoch": 2.8361557775522197, "grad_norm": 0.004298684187233448, "learning_rate": 9.198410332735655e-07, "loss": 0.0182, "step": 265450 }, { "epoch": 2.836262620866499, "grad_norm": 2.814091920852661, "learning_rate": 9.198319088547204e-07, "loss": 0.0176, "step": 265460 }, { "epoch": 2.8363694641807786, "grad_norm": 5.453623294830322, "learning_rate": 9.198227839618532e-07, "loss": 0.0245, "step": 265470 }, { "epoch": 2.8364763074950585, "grad_norm": 0.0716967061161995, "learning_rate": 9.198136585949745e-07, "loss": 0.0213, "step": 265480 }, { "epoch": 2.836583150809338, "grad_norm": 0.7060986161231995, "learning_rate": 9.198045327540944e-07, "loss": 0.0111, "step": 265490 }, { "epoch": 2.836689994123618, "grad_norm": 0.9899969100952148, "learning_rate": 9.197954064392234e-07, "loss": 0.0229, "step": 265500 }, { "epoch": 2.8367968374378973, "grad_norm": 2.333634853363037, "learning_rate": 9.197862796503717e-07, "loss": 0.0091, "step": 265510 }, { "epoch": 2.836903680752177, "grad_norm": 0.23361621797084808, "learning_rate": 9.197771523875495e-07, "loss": 0.0159, "step": 265520 }, { "epoch": 2.8370105240664567, "grad_norm": 0.02388833649456501, "learning_rate": 9.197680246507673e-07, "loss": 0.0156, "step": 265530 }, { "epoch": 2.837117367380736, "grad_norm": 10.154720306396484, "learning_rate": 9.197588964400352e-07, "loss": 0.0136, "step": 265540 }, { "epoch": 2.8372242106950156, "grad_norm": 0.3111923336982727, "learning_rate": 9.197497677553638e-07, "loss": 0.0101, "step": 265550 }, { "epoch": 2.8373310540092955, "grad_norm": 0.23209862411022186, "learning_rate": 9.197406385967632e-07, "loss": 0.0285, "step": 265560 }, { "epoch": 2.837437897323575, "grad_norm": 0.00929584912955761, "learning_rate": 9.197315089642437e-07, "loss": 0.0081, "step": 265570 }, { "epoch": 2.8375447406378544, "grad_norm": 1.1760274171829224, "learning_rate": 9.197223788578156e-07, "loss": 0.0111, "step": 265580 }, { "epoch": 2.8376515839521343, "grad_norm": 4.435914993286133, "learning_rate": 9.197132482774892e-07, "loss": 0.029, "step": 265590 }, { "epoch": 2.837758427266414, "grad_norm": 0.10029776394367218, "learning_rate": 9.19704117223275e-07, "loss": 0.0158, "step": 265600 }, { "epoch": 2.8378652705806933, "grad_norm": 0.0029416026081889868, "learning_rate": 9.196949856951831e-07, "loss": 0.0034, "step": 265610 }, { "epoch": 2.837972113894973, "grad_norm": 0.00503644859418273, "learning_rate": 9.196858536932239e-07, "loss": 0.0113, "step": 265620 }, { "epoch": 2.8380789572092526, "grad_norm": 9.834891319274902, "learning_rate": 9.196767212174077e-07, "loss": 0.0601, "step": 265630 }, { "epoch": 2.838185800523532, "grad_norm": 4.155580997467041, "learning_rate": 9.196675882677449e-07, "loss": 0.0075, "step": 265640 }, { "epoch": 2.838292643837812, "grad_norm": 0.2707178592681885, "learning_rate": 9.196584548442456e-07, "loss": 0.0299, "step": 265650 }, { "epoch": 2.8383994871520914, "grad_norm": 0.16410578787326813, "learning_rate": 9.196493209469203e-07, "loss": 0.017, "step": 265660 }, { "epoch": 2.838506330466371, "grad_norm": 0.03192546218633652, "learning_rate": 9.196401865757793e-07, "loss": 0.0267, "step": 265670 }, { "epoch": 2.838613173780651, "grad_norm": 4.433718681335449, "learning_rate": 9.196310517308328e-07, "loss": 0.0238, "step": 265680 }, { "epoch": 2.8387200170949303, "grad_norm": 0.004457760602235794, "learning_rate": 9.196219164120911e-07, "loss": 0.0184, "step": 265690 }, { "epoch": 2.8388268604092097, "grad_norm": 0.031766392290592194, "learning_rate": 9.196127806195646e-07, "loss": 0.006, "step": 265700 }, { "epoch": 2.8389337037234896, "grad_norm": 0.049056313931941986, "learning_rate": 9.196036443532636e-07, "loss": 0.0061, "step": 265710 }, { "epoch": 2.839040547037769, "grad_norm": 2.5597493648529053, "learning_rate": 9.195945076131985e-07, "loss": 0.0145, "step": 265720 }, { "epoch": 2.839147390352049, "grad_norm": 0.2063245177268982, "learning_rate": 9.195853703993795e-07, "loss": 0.0027, "step": 265730 }, { "epoch": 2.8392542336663285, "grad_norm": 0.047619983553886414, "learning_rate": 9.195762327118169e-07, "loss": 0.0446, "step": 265740 }, { "epoch": 2.839361076980608, "grad_norm": 6.300936698913574, "learning_rate": 9.195670945505211e-07, "loss": 0.0424, "step": 265750 }, { "epoch": 2.8394679202948874, "grad_norm": 0.2820863723754883, "learning_rate": 9.195579559155024e-07, "loss": 0.01, "step": 265760 }, { "epoch": 2.8395747636091673, "grad_norm": 0.5856720209121704, "learning_rate": 9.195488168067711e-07, "loss": 0.0049, "step": 265770 }, { "epoch": 2.8396816069234467, "grad_norm": 1.7600067853927612, "learning_rate": 9.195396772243375e-07, "loss": 0.0182, "step": 265780 }, { "epoch": 2.8397884502377266, "grad_norm": 7.515708923339844, "learning_rate": 9.195305371682119e-07, "loss": 0.0195, "step": 265790 }, { "epoch": 2.839895293552006, "grad_norm": 0.06703133136034012, "learning_rate": 9.195213966384048e-07, "loss": 0.0355, "step": 265800 }, { "epoch": 2.8400021368662856, "grad_norm": 0.01195936743170023, "learning_rate": 9.195122556349262e-07, "loss": 0.0215, "step": 265810 }, { "epoch": 2.840108980180565, "grad_norm": 1.059744954109192, "learning_rate": 9.195031141577867e-07, "loss": 0.0111, "step": 265820 }, { "epoch": 2.840215823494845, "grad_norm": 3.4044644832611084, "learning_rate": 9.194939722069965e-07, "loss": 0.0203, "step": 265830 }, { "epoch": 2.8403226668091244, "grad_norm": 11.983123779296875, "learning_rate": 9.194848297825659e-07, "loss": 0.0281, "step": 265840 }, { "epoch": 2.8404295101234043, "grad_norm": 9.243792533874512, "learning_rate": 9.194756868845054e-07, "loss": 0.0311, "step": 265850 }, { "epoch": 2.8405363534376837, "grad_norm": 4.855871677398682, "learning_rate": 9.194665435128251e-07, "loss": 0.0224, "step": 265860 }, { "epoch": 2.840643196751963, "grad_norm": 3.2824692726135254, "learning_rate": 9.194573996675354e-07, "loss": 0.027, "step": 265870 }, { "epoch": 2.8407500400662427, "grad_norm": 0.534671425819397, "learning_rate": 9.194482553486467e-07, "loss": 0.0247, "step": 265880 }, { "epoch": 2.8408568833805226, "grad_norm": 3.0221924781799316, "learning_rate": 9.194391105561693e-07, "loss": 0.0175, "step": 265890 }, { "epoch": 2.840963726694802, "grad_norm": 0.0015956975985318422, "learning_rate": 9.194299652901134e-07, "loss": 0.0022, "step": 265900 }, { "epoch": 2.841070570009082, "grad_norm": 0.05460043251514435, "learning_rate": 9.194208195504893e-07, "loss": 0.0013, "step": 265910 }, { "epoch": 2.8411774133233614, "grad_norm": 0.9536168575286865, "learning_rate": 9.194116733373076e-07, "loss": 0.0258, "step": 265920 }, { "epoch": 2.841284256637641, "grad_norm": 0.0085265152156353, "learning_rate": 9.194025266505785e-07, "loss": 0.0266, "step": 265930 }, { "epoch": 2.8413910999519203, "grad_norm": 0.009533200412988663, "learning_rate": 9.193933794903122e-07, "loss": 0.0007, "step": 265940 }, { "epoch": 2.8414979432662, "grad_norm": 1.2236279249191284, "learning_rate": 9.193842318565191e-07, "loss": 0.0419, "step": 265950 }, { "epoch": 2.8416047865804797, "grad_norm": 15.841219902038574, "learning_rate": 9.193750837492097e-07, "loss": 0.0853, "step": 265960 }, { "epoch": 2.8417116298947596, "grad_norm": 0.02885134145617485, "learning_rate": 9.19365935168394e-07, "loss": 0.0325, "step": 265970 }, { "epoch": 2.841818473209039, "grad_norm": 1.5357288122177124, "learning_rate": 9.193567861140826e-07, "loss": 0.0275, "step": 265980 }, { "epoch": 2.8419253165233185, "grad_norm": 0.7888440489768982, "learning_rate": 9.193476365862858e-07, "loss": 0.0056, "step": 265990 }, { "epoch": 2.842032159837598, "grad_norm": 0.4040510952472687, "learning_rate": 9.193384865850138e-07, "loss": 0.0544, "step": 266000 }, { "epoch": 2.842139003151878, "grad_norm": 0.021401209756731987, "learning_rate": 9.19329336110277e-07, "loss": 0.0304, "step": 266010 }, { "epoch": 2.8422458464661573, "grad_norm": 0.1711689531803131, "learning_rate": 9.193201851620858e-07, "loss": 0.0167, "step": 266020 }, { "epoch": 2.842352689780437, "grad_norm": 5.463259696960449, "learning_rate": 9.193110337404503e-07, "loss": 0.0122, "step": 266030 }, { "epoch": 2.8424595330947167, "grad_norm": 5.193680286407471, "learning_rate": 9.19301881845381e-07, "loss": 0.0529, "step": 266040 }, { "epoch": 2.842566376408996, "grad_norm": 3.597489595413208, "learning_rate": 9.192927294768884e-07, "loss": 0.0331, "step": 266050 }, { "epoch": 2.8426732197232756, "grad_norm": 0.2486630380153656, "learning_rate": 9.192835766349825e-07, "loss": 0.0104, "step": 266060 }, { "epoch": 2.8427800630375555, "grad_norm": 0.08315718173980713, "learning_rate": 9.192744233196738e-07, "loss": 0.0062, "step": 266070 }, { "epoch": 2.842886906351835, "grad_norm": 0.054444752633571625, "learning_rate": 9.192652695309728e-07, "loss": 0.0111, "step": 266080 }, { "epoch": 2.842993749666115, "grad_norm": 0.1364593356847763, "learning_rate": 9.192561152688895e-07, "loss": 0.031, "step": 266090 }, { "epoch": 2.8431005929803943, "grad_norm": 0.47913143038749695, "learning_rate": 9.192469605334346e-07, "loss": 0.0157, "step": 266100 }, { "epoch": 2.8432074362946738, "grad_norm": 0.021160749718546867, "learning_rate": 9.192378053246181e-07, "loss": 0.007, "step": 266110 }, { "epoch": 2.843314279608953, "grad_norm": 0.0747896060347557, "learning_rate": 9.192286496424505e-07, "loss": 0.0199, "step": 266120 }, { "epoch": 2.843421122923233, "grad_norm": 0.10321278870105743, "learning_rate": 9.192194934869422e-07, "loss": 0.0333, "step": 266130 }, { "epoch": 2.8435279662375126, "grad_norm": 10.951765060424805, "learning_rate": 9.192103368581033e-07, "loss": 0.0264, "step": 266140 }, { "epoch": 2.8436348095517925, "grad_norm": 1.6283677816390991, "learning_rate": 9.192011797559443e-07, "loss": 0.0834, "step": 266150 }, { "epoch": 2.843741652866072, "grad_norm": 0.07642924040555954, "learning_rate": 9.191920221804756e-07, "loss": 0.0228, "step": 266160 }, { "epoch": 2.8438484961803514, "grad_norm": 0.0023460243828594685, "learning_rate": 9.191828641317074e-07, "loss": 0.0049, "step": 266170 }, { "epoch": 2.843955339494631, "grad_norm": 2.033723831176758, "learning_rate": 9.191737056096502e-07, "loss": 0.0079, "step": 266180 }, { "epoch": 2.8440621828089108, "grad_norm": 0.006526040378957987, "learning_rate": 9.191645466143143e-07, "loss": 0.0034, "step": 266190 }, { "epoch": 2.8441690261231902, "grad_norm": 1.870765209197998, "learning_rate": 9.191553871457099e-07, "loss": 0.0142, "step": 266200 }, { "epoch": 2.84427586943747, "grad_norm": 0.024296896532177925, "learning_rate": 9.191462272038473e-07, "loss": 0.0193, "step": 266210 }, { "epoch": 2.8443827127517496, "grad_norm": 0.02477370575070381, "learning_rate": 9.191370667887372e-07, "loss": 0.0155, "step": 266220 }, { "epoch": 2.844489556066029, "grad_norm": 3.1909327507019043, "learning_rate": 9.191279059003896e-07, "loss": 0.0338, "step": 266230 }, { "epoch": 2.8445963993803085, "grad_norm": 0.005058652255684137, "learning_rate": 9.191187445388148e-07, "loss": 0.0128, "step": 266240 }, { "epoch": 2.8447032426945884, "grad_norm": 7.213764190673828, "learning_rate": 9.191095827040237e-07, "loss": 0.0172, "step": 266250 }, { "epoch": 2.844810086008868, "grad_norm": 2.82578706741333, "learning_rate": 9.19100420396026e-07, "loss": 0.0509, "step": 266260 }, { "epoch": 2.8449169293231478, "grad_norm": 0.004162937868386507, "learning_rate": 9.190912576148323e-07, "loss": 0.0012, "step": 266270 }, { "epoch": 2.8450237726374272, "grad_norm": 0.009212831035256386, "learning_rate": 9.190820943604529e-07, "loss": 0.0285, "step": 266280 }, { "epoch": 2.8451306159517067, "grad_norm": 0.9194322228431702, "learning_rate": 9.190729306328983e-07, "loss": 0.0253, "step": 266290 }, { "epoch": 2.8452374592659866, "grad_norm": 0.15583324432373047, "learning_rate": 9.190637664321787e-07, "loss": 0.0149, "step": 266300 }, { "epoch": 2.845344302580266, "grad_norm": 0.24472403526306152, "learning_rate": 9.190546017583045e-07, "loss": 0.0179, "step": 266310 }, { "epoch": 2.8454511458945455, "grad_norm": 0.1312311589717865, "learning_rate": 9.190454366112861e-07, "loss": 0.0118, "step": 266320 }, { "epoch": 2.8455579892088254, "grad_norm": 0.09324485063552856, "learning_rate": 9.190362709911337e-07, "loss": 0.0247, "step": 266330 }, { "epoch": 2.845664832523105, "grad_norm": 0.07824989408254623, "learning_rate": 9.190271048978576e-07, "loss": 0.0092, "step": 266340 }, { "epoch": 2.8457716758373843, "grad_norm": 0.06427504867315292, "learning_rate": 9.190179383314683e-07, "loss": 0.0703, "step": 266350 }, { "epoch": 2.8458785191516642, "grad_norm": 0.3279811441898346, "learning_rate": 9.190087712919763e-07, "loss": 0.0339, "step": 266360 }, { "epoch": 2.8459853624659437, "grad_norm": 0.011810055933892727, "learning_rate": 9.189996037793917e-07, "loss": 0.0043, "step": 266370 }, { "epoch": 2.846092205780223, "grad_norm": 0.06762789189815521, "learning_rate": 9.189904357937249e-07, "loss": 0.0225, "step": 266380 }, { "epoch": 2.846199049094503, "grad_norm": 3.8705813884735107, "learning_rate": 9.189812673349861e-07, "loss": 0.0361, "step": 266390 }, { "epoch": 2.8463058924087825, "grad_norm": 10.968814849853516, "learning_rate": 9.189720984031861e-07, "loss": 0.0682, "step": 266400 }, { "epoch": 2.846412735723062, "grad_norm": 2.8756725788116455, "learning_rate": 9.189629289983348e-07, "loss": 0.0318, "step": 266410 }, { "epoch": 2.846519579037342, "grad_norm": 22.119659423828125, "learning_rate": 9.189537591204428e-07, "loss": 0.0151, "step": 266420 }, { "epoch": 2.8466264223516213, "grad_norm": 3.2305150032043457, "learning_rate": 9.189445887695204e-07, "loss": 0.021, "step": 266430 }, { "epoch": 2.846733265665901, "grad_norm": 7.474452018737793, "learning_rate": 9.18935417945578e-07, "loss": 0.0283, "step": 266440 }, { "epoch": 2.8468401089801807, "grad_norm": 0.34162622690200806, "learning_rate": 9.189262466486259e-07, "loss": 0.0119, "step": 266450 }, { "epoch": 2.84694695229446, "grad_norm": 1.0210504531860352, "learning_rate": 9.189170748786743e-07, "loss": 0.0125, "step": 266460 }, { "epoch": 2.8470537956087396, "grad_norm": 2.0634236335754395, "learning_rate": 9.189079026357338e-07, "loss": 0.0068, "step": 266470 }, { "epoch": 2.8471606389230195, "grad_norm": 0.01857874169945717, "learning_rate": 9.188987299198145e-07, "loss": 0.0171, "step": 266480 }, { "epoch": 2.847267482237299, "grad_norm": 0.21998795866966248, "learning_rate": 9.188895567309272e-07, "loss": 0.0219, "step": 266490 }, { "epoch": 2.847374325551579, "grad_norm": 5.309637546539307, "learning_rate": 9.188803830690819e-07, "loss": 0.0147, "step": 266500 }, { "epoch": 2.8474811688658583, "grad_norm": 7.4822306632995605, "learning_rate": 9.188712089342889e-07, "loss": 0.0266, "step": 266510 }, { "epoch": 2.847588012180138, "grad_norm": 2.403080940246582, "learning_rate": 9.188620343265588e-07, "loss": 0.0184, "step": 266520 }, { "epoch": 2.8476948554944173, "grad_norm": 0.47017085552215576, "learning_rate": 9.188528592459017e-07, "loss": 0.0019, "step": 266530 }, { "epoch": 2.847801698808697, "grad_norm": 0.32556235790252686, "learning_rate": 9.188436836923284e-07, "loss": 0.034, "step": 266540 }, { "epoch": 2.8479085421229766, "grad_norm": 0.7579368948936462, "learning_rate": 9.188345076658487e-07, "loss": 0.0256, "step": 266550 }, { "epoch": 2.8480153854372565, "grad_norm": 1.5744115114212036, "learning_rate": 9.188253311664733e-07, "loss": 0.0236, "step": 266560 }, { "epoch": 2.848122228751536, "grad_norm": 0.1680876463651657, "learning_rate": 9.188161541942125e-07, "loss": 0.0255, "step": 266570 }, { "epoch": 2.8482290720658154, "grad_norm": 0.36163097620010376, "learning_rate": 9.188069767490767e-07, "loss": 0.0145, "step": 266580 }, { "epoch": 2.848335915380095, "grad_norm": 0.016034340485930443, "learning_rate": 9.187977988310762e-07, "loss": 0.0191, "step": 266590 }, { "epoch": 2.848442758694375, "grad_norm": 2.405897617340088, "learning_rate": 9.187886204402213e-07, "loss": 0.0048, "step": 266600 }, { "epoch": 2.8485496020086543, "grad_norm": 0.5121414065361023, "learning_rate": 9.187794415765225e-07, "loss": 0.0388, "step": 266610 }, { "epoch": 2.848656445322934, "grad_norm": 14.16551685333252, "learning_rate": 9.187702622399901e-07, "loss": 0.0359, "step": 266620 }, { "epoch": 2.8487632886372136, "grad_norm": 1.725954294204712, "learning_rate": 9.187610824306345e-07, "loss": 0.0134, "step": 266630 }, { "epoch": 2.848870131951493, "grad_norm": 0.014363855123519897, "learning_rate": 9.187519021484659e-07, "loss": 0.0011, "step": 266640 }, { "epoch": 2.8489769752657725, "grad_norm": 0.9186080694198608, "learning_rate": 9.187427213934949e-07, "loss": 0.0128, "step": 266650 }, { "epoch": 2.8490838185800524, "grad_norm": 5.136234283447266, "learning_rate": 9.18733540165732e-07, "loss": 0.023, "step": 266660 }, { "epoch": 2.849190661894332, "grad_norm": 3.0356760025024414, "learning_rate": 9.187243584651869e-07, "loss": 0.0224, "step": 266670 }, { "epoch": 2.849297505208612, "grad_norm": 0.06247281655669212, "learning_rate": 9.187151762918706e-07, "loss": 0.0278, "step": 266680 }, { "epoch": 2.8494043485228913, "grad_norm": 1.3589845895767212, "learning_rate": 9.187059936457933e-07, "loss": 0.0284, "step": 266690 }, { "epoch": 2.8495111918371707, "grad_norm": 3.6185522079467773, "learning_rate": 9.186968105269652e-07, "loss": 0.019, "step": 266700 }, { "epoch": 2.84961803515145, "grad_norm": 0.0037828164640814066, "learning_rate": 9.186876269353969e-07, "loss": 0.0094, "step": 266710 }, { "epoch": 2.84972487846573, "grad_norm": 1.4814772605895996, "learning_rate": 9.186784428710988e-07, "loss": 0.0033, "step": 266720 }, { "epoch": 2.8498317217800095, "grad_norm": 0.0854448452591896, "learning_rate": 9.18669258334081e-07, "loss": 0.0133, "step": 266730 }, { "epoch": 2.8499385650942894, "grad_norm": 0.12529051303863525, "learning_rate": 9.186600733243539e-07, "loss": 0.036, "step": 266740 }, { "epoch": 2.850045408408569, "grad_norm": 1.7446502447128296, "learning_rate": 9.186508878419281e-07, "loss": 0.0219, "step": 266750 }, { "epoch": 2.8501522517228484, "grad_norm": 0.02363983355462551, "learning_rate": 9.186417018868138e-07, "loss": 0.0103, "step": 266760 }, { "epoch": 2.850259095037128, "grad_norm": 0.1851990818977356, "learning_rate": 9.186325154590215e-07, "loss": 0.0017, "step": 266770 }, { "epoch": 2.8503659383514077, "grad_norm": 0.03780609741806984, "learning_rate": 9.186233285585614e-07, "loss": 0.0234, "step": 266780 }, { "epoch": 2.850472781665687, "grad_norm": 0.10138601064682007, "learning_rate": 9.18614141185444e-07, "loss": 0.0248, "step": 266790 }, { "epoch": 2.850579624979967, "grad_norm": 4.359605312347412, "learning_rate": 9.186049533396796e-07, "loss": 0.0113, "step": 266800 }, { "epoch": 2.8506864682942465, "grad_norm": 2.726926326751709, "learning_rate": 9.185957650212788e-07, "loss": 0.0101, "step": 266810 }, { "epoch": 2.850793311608526, "grad_norm": 17.91309356689453, "learning_rate": 9.185865762302516e-07, "loss": 0.0678, "step": 266820 }, { "epoch": 2.8509001549228055, "grad_norm": 3.077357053756714, "learning_rate": 9.185773869666087e-07, "loss": 0.02, "step": 266830 }, { "epoch": 2.8510069982370854, "grad_norm": 1.9869048595428467, "learning_rate": 9.185681972303603e-07, "loss": 0.0196, "step": 266840 }, { "epoch": 2.851113841551365, "grad_norm": 0.0025949813425540924, "learning_rate": 9.185590070215167e-07, "loss": 0.0305, "step": 266850 }, { "epoch": 2.8512206848656447, "grad_norm": 0.007140116300433874, "learning_rate": 9.185498163400884e-07, "loss": 0.0157, "step": 266860 }, { "epoch": 2.851327528179924, "grad_norm": 0.004815845284610987, "learning_rate": 9.185406251860859e-07, "loss": 0.0104, "step": 266870 }, { "epoch": 2.8514343714942036, "grad_norm": 0.009734271094202995, "learning_rate": 9.185314335595193e-07, "loss": 0.0598, "step": 266880 }, { "epoch": 2.851541214808483, "grad_norm": 0.14287900924682617, "learning_rate": 9.185222414603992e-07, "loss": 0.0379, "step": 266890 }, { "epoch": 2.851648058122763, "grad_norm": 0.007328766398131847, "learning_rate": 9.185130488887361e-07, "loss": 0.0299, "step": 266900 }, { "epoch": 2.8517549014370425, "grad_norm": 0.9503894448280334, "learning_rate": 9.185038558445399e-07, "loss": 0.005, "step": 266910 }, { "epoch": 2.8518617447513224, "grad_norm": 1.332891821861267, "learning_rate": 9.184946623278213e-07, "loss": 0.0057, "step": 266920 }, { "epoch": 2.851968588065602, "grad_norm": 2.1285531520843506, "learning_rate": 9.184854683385907e-07, "loss": 0.0215, "step": 266930 }, { "epoch": 2.8520754313798813, "grad_norm": 0.1451699584722519, "learning_rate": 9.184762738768584e-07, "loss": 0.0022, "step": 266940 }, { "epoch": 2.8521822746941607, "grad_norm": 10.873467445373535, "learning_rate": 9.184670789426348e-07, "loss": 0.0392, "step": 266950 }, { "epoch": 2.8522891180084406, "grad_norm": 3.6891989707946777, "learning_rate": 9.184578835359303e-07, "loss": 0.0407, "step": 266960 }, { "epoch": 2.85239596132272, "grad_norm": 1.124185562133789, "learning_rate": 9.184486876567553e-07, "loss": 0.0066, "step": 266970 }, { "epoch": 2.852502804637, "grad_norm": 0.1449582874774933, "learning_rate": 9.184394913051201e-07, "loss": 0.0097, "step": 266980 }, { "epoch": 2.8526096479512795, "grad_norm": 0.0019025667570531368, "learning_rate": 9.184302944810352e-07, "loss": 0.021, "step": 266990 }, { "epoch": 2.852716491265559, "grad_norm": 4.649994373321533, "learning_rate": 9.184210971845108e-07, "loss": 0.0102, "step": 267000 }, { "epoch": 2.852823334579839, "grad_norm": 1.3088252544403076, "learning_rate": 9.184118994155575e-07, "loss": 0.0219, "step": 267010 }, { "epoch": 2.8529301778941183, "grad_norm": 11.734204292297363, "learning_rate": 9.184027011741855e-07, "loss": 0.043, "step": 267020 }, { "epoch": 2.8530370212083977, "grad_norm": 0.003741268767043948, "learning_rate": 9.183935024604054e-07, "loss": 0.055, "step": 267030 }, { "epoch": 2.8531438645226777, "grad_norm": 0.10815398395061493, "learning_rate": 9.183843032742273e-07, "loss": 0.0235, "step": 267040 }, { "epoch": 2.853250707836957, "grad_norm": 0.016623567789793015, "learning_rate": 9.183751036156619e-07, "loss": 0.0114, "step": 267050 }, { "epoch": 2.8533575511512366, "grad_norm": 8.01998233795166, "learning_rate": 9.183659034847193e-07, "loss": 0.01, "step": 267060 }, { "epoch": 2.8534643944655165, "grad_norm": 5.161258220672607, "learning_rate": 9.183567028814101e-07, "loss": 0.0386, "step": 267070 }, { "epoch": 2.853571237779796, "grad_norm": 0.11544031649827957, "learning_rate": 9.183475018057446e-07, "loss": 0.0191, "step": 267080 }, { "epoch": 2.8536780810940754, "grad_norm": 1.5689202547073364, "learning_rate": 9.183383002577332e-07, "loss": 0.0033, "step": 267090 }, { "epoch": 2.8537849244083553, "grad_norm": 2.0220329761505127, "learning_rate": 9.183290982373863e-07, "loss": 0.0144, "step": 267100 }, { "epoch": 2.8538917677226348, "grad_norm": 0.009892236441373825, "learning_rate": 9.183198957447142e-07, "loss": 0.0139, "step": 267110 }, { "epoch": 2.853998611036914, "grad_norm": 1.4892226457595825, "learning_rate": 9.183106927797275e-07, "loss": 0.0107, "step": 267120 }, { "epoch": 2.854105454351194, "grad_norm": 0.06169498339295387, "learning_rate": 9.183014893424363e-07, "loss": 0.0363, "step": 267130 }, { "epoch": 2.8542122976654736, "grad_norm": 4.23927640914917, "learning_rate": 9.182922854328512e-07, "loss": 0.0131, "step": 267140 }, { "epoch": 2.854319140979753, "grad_norm": 0.30029967427253723, "learning_rate": 9.182830810509825e-07, "loss": 0.0099, "step": 267150 }, { "epoch": 2.854425984294033, "grad_norm": 0.08454670757055283, "learning_rate": 9.182738761968407e-07, "loss": 0.0337, "step": 267160 }, { "epoch": 2.8545328276083124, "grad_norm": 0.28463509678840637, "learning_rate": 9.182646708704363e-07, "loss": 0.0181, "step": 267170 }, { "epoch": 2.854639670922592, "grad_norm": 5.0841875076293945, "learning_rate": 9.182554650717792e-07, "loss": 0.0374, "step": 267180 }, { "epoch": 2.8547465142368718, "grad_norm": 1.4053868055343628, "learning_rate": 9.182462588008803e-07, "loss": 0.0027, "step": 267190 }, { "epoch": 2.854853357551151, "grad_norm": 0.23577633500099182, "learning_rate": 9.182370520577497e-07, "loss": 0.0076, "step": 267200 }, { "epoch": 2.854960200865431, "grad_norm": 0.06315644830465317, "learning_rate": 9.182278448423979e-07, "loss": 0.0155, "step": 267210 }, { "epoch": 2.8550670441797106, "grad_norm": 0.7745181322097778, "learning_rate": 9.182186371548354e-07, "loss": 0.0186, "step": 267220 }, { "epoch": 2.85517388749399, "grad_norm": 0.4344121813774109, "learning_rate": 9.182094289950724e-07, "loss": 0.0076, "step": 267230 }, { "epoch": 2.8552807308082695, "grad_norm": 2.1362924575805664, "learning_rate": 9.182002203631195e-07, "loss": 0.0082, "step": 267240 }, { "epoch": 2.8553875741225494, "grad_norm": 0.008501308038830757, "learning_rate": 9.18191011258987e-07, "loss": 0.017, "step": 267250 }, { "epoch": 2.855494417436829, "grad_norm": 0.16100206971168518, "learning_rate": 9.181818016826853e-07, "loss": 0.0329, "step": 267260 }, { "epoch": 2.8556012607511088, "grad_norm": 0.8742165565490723, "learning_rate": 9.181725916342244e-07, "loss": 0.012, "step": 267270 }, { "epoch": 2.855708104065388, "grad_norm": 0.01758500002324581, "learning_rate": 9.181633811136156e-07, "loss": 0.0291, "step": 267280 }, { "epoch": 2.8558149473796677, "grad_norm": 1.9926921129226685, "learning_rate": 9.181541701208685e-07, "loss": 0.0143, "step": 267290 }, { "epoch": 2.855921790693947, "grad_norm": 0.4345094561576843, "learning_rate": 9.181449586559939e-07, "loss": 0.0262, "step": 267300 }, { "epoch": 2.856028634008227, "grad_norm": 6.682521343231201, "learning_rate": 9.181357467190019e-07, "loss": 0.0324, "step": 267310 }, { "epoch": 2.8561354773225065, "grad_norm": 14.628985404968262, "learning_rate": 9.181265343099032e-07, "loss": 0.0093, "step": 267320 }, { "epoch": 2.8562423206367864, "grad_norm": 0.32046863436698914, "learning_rate": 9.181173214287081e-07, "loss": 0.0094, "step": 267330 }, { "epoch": 2.856349163951066, "grad_norm": 11.233223915100098, "learning_rate": 9.181081080754269e-07, "loss": 0.0348, "step": 267340 }, { "epoch": 2.8564560072653453, "grad_norm": 0.016082117334008217, "learning_rate": 9.180988942500702e-07, "loss": 0.0193, "step": 267350 }, { "epoch": 2.856562850579625, "grad_norm": 0.4363986849784851, "learning_rate": 9.180896799526483e-07, "loss": 0.0036, "step": 267360 }, { "epoch": 2.8566696938939047, "grad_norm": 0.019858626648783684, "learning_rate": 9.180804651831716e-07, "loss": 0.017, "step": 267370 }, { "epoch": 2.856776537208184, "grad_norm": 0.8078503608703613, "learning_rate": 9.180712499416505e-07, "loss": 0.0257, "step": 267380 }, { "epoch": 2.856883380522464, "grad_norm": 0.024792177602648735, "learning_rate": 9.180620342280954e-07, "loss": 0.0542, "step": 267390 }, { "epoch": 2.8569902238367435, "grad_norm": 7.747437477111816, "learning_rate": 9.180528180425167e-07, "loss": 0.0062, "step": 267400 }, { "epoch": 2.857097067151023, "grad_norm": 0.05639301612973213, "learning_rate": 9.180436013849246e-07, "loss": 0.0348, "step": 267410 }, { "epoch": 2.8572039104653024, "grad_norm": 3.705247402191162, "learning_rate": 9.1803438425533e-07, "loss": 0.0175, "step": 267420 }, { "epoch": 2.8573107537795823, "grad_norm": 0.01616322435438633, "learning_rate": 9.180251666537431e-07, "loss": 0.0296, "step": 267430 }, { "epoch": 2.857417597093862, "grad_norm": 5.599289417266846, "learning_rate": 9.180159485801739e-07, "loss": 0.0221, "step": 267440 }, { "epoch": 2.8575244404081417, "grad_norm": 0.02590056136250496, "learning_rate": 9.180067300346335e-07, "loss": 0.0199, "step": 267450 }, { "epoch": 2.857631283722421, "grad_norm": 3.3674144744873047, "learning_rate": 9.179975110171318e-07, "loss": 0.0559, "step": 267460 }, { "epoch": 2.8577381270367006, "grad_norm": 0.047124993056058884, "learning_rate": 9.179882915276793e-07, "loss": 0.0006, "step": 267470 }, { "epoch": 2.85784497035098, "grad_norm": 1.5231878757476807, "learning_rate": 9.179790715662866e-07, "loss": 0.0398, "step": 267480 }, { "epoch": 2.85795181366526, "grad_norm": 1.5857654809951782, "learning_rate": 9.179698511329638e-07, "loss": 0.0317, "step": 267490 }, { "epoch": 2.8580586569795394, "grad_norm": 3.4353811740875244, "learning_rate": 9.179606302277216e-07, "loss": 0.0367, "step": 267500 }, { "epoch": 2.8581655002938193, "grad_norm": 0.9068902730941772, "learning_rate": 9.179514088505702e-07, "loss": 0.0077, "step": 267510 }, { "epoch": 2.858272343608099, "grad_norm": 1.4002348184585571, "learning_rate": 9.179421870015203e-07, "loss": 0.0045, "step": 267520 }, { "epoch": 2.8583791869223782, "grad_norm": 0.12046616524457932, "learning_rate": 9.17932964680582e-07, "loss": 0.0475, "step": 267530 }, { "epoch": 2.8584860302366577, "grad_norm": 7.509730339050293, "learning_rate": 9.179237418877659e-07, "loss": 0.0167, "step": 267540 }, { "epoch": 2.8585928735509376, "grad_norm": 10.402724266052246, "learning_rate": 9.179145186230824e-07, "loss": 0.0307, "step": 267550 }, { "epoch": 2.858699716865217, "grad_norm": 0.02556798793375492, "learning_rate": 9.179052948865418e-07, "loss": 0.0447, "step": 267560 }, { "epoch": 2.858806560179497, "grad_norm": 2.329986333847046, "learning_rate": 9.178960706781544e-07, "loss": 0.005, "step": 267570 }, { "epoch": 2.8589134034937764, "grad_norm": 0.21922935545444489, "learning_rate": 9.178868459979311e-07, "loss": 0.0308, "step": 267580 }, { "epoch": 2.859020246808056, "grad_norm": 0.07089840620756149, "learning_rate": 9.178776208458818e-07, "loss": 0.0325, "step": 267590 }, { "epoch": 2.8591270901223353, "grad_norm": 3.1358697414398193, "learning_rate": 9.178683952220173e-07, "loss": 0.0158, "step": 267600 }, { "epoch": 2.8592339334366152, "grad_norm": 2.1208653450012207, "learning_rate": 9.178591691263477e-07, "loss": 0.0155, "step": 267610 }, { "epoch": 2.8593407767508947, "grad_norm": 0.2510460913181305, "learning_rate": 9.178499425588837e-07, "loss": 0.0305, "step": 267620 }, { "epoch": 2.8594476200651746, "grad_norm": 10.972426414489746, "learning_rate": 9.178407155196355e-07, "loss": 0.0255, "step": 267630 }, { "epoch": 2.859554463379454, "grad_norm": 9.224032402038574, "learning_rate": 9.178314880086136e-07, "loss": 0.0176, "step": 267640 }, { "epoch": 2.8596613066937335, "grad_norm": 0.005700002424418926, "learning_rate": 9.178222600258282e-07, "loss": 0.0075, "step": 267650 }, { "epoch": 2.859768150008013, "grad_norm": 1.1156845092773438, "learning_rate": 9.178130315712903e-07, "loss": 0.0108, "step": 267660 }, { "epoch": 2.859874993322293, "grad_norm": 0.0346871018409729, "learning_rate": 9.178038026450098e-07, "loss": 0.0094, "step": 267670 }, { "epoch": 2.8599818366365723, "grad_norm": 0.013374019414186478, "learning_rate": 9.177945732469973e-07, "loss": 0.0044, "step": 267680 }, { "epoch": 2.8600886799508523, "grad_norm": 2.18537974357605, "learning_rate": 9.177853433772631e-07, "loss": 0.0426, "step": 267690 }, { "epoch": 2.8601955232651317, "grad_norm": 0.055362895131111145, "learning_rate": 9.177761130358178e-07, "loss": 0.0083, "step": 267700 }, { "epoch": 2.860302366579411, "grad_norm": 0.0214017853140831, "learning_rate": 9.177668822226717e-07, "loss": 0.0293, "step": 267710 }, { "epoch": 2.8604092098936906, "grad_norm": 4.567144870758057, "learning_rate": 9.177576509378353e-07, "loss": 0.0249, "step": 267720 }, { "epoch": 2.8605160532079705, "grad_norm": 0.015831628814339638, "learning_rate": 9.17748419181319e-07, "loss": 0.0174, "step": 267730 }, { "epoch": 2.86062289652225, "grad_norm": 4.14380407333374, "learning_rate": 9.177391869531332e-07, "loss": 0.0094, "step": 267740 }, { "epoch": 2.86072973983653, "grad_norm": 1.377341866493225, "learning_rate": 9.177299542532882e-07, "loss": 0.0099, "step": 267750 }, { "epoch": 2.8608365831508094, "grad_norm": 0.22773419320583344, "learning_rate": 9.177207210817948e-07, "loss": 0.023, "step": 267760 }, { "epoch": 2.860943426465089, "grad_norm": 3.0487051010131836, "learning_rate": 9.177114874386629e-07, "loss": 0.013, "step": 267770 }, { "epoch": 2.8610502697793687, "grad_norm": 0.06658158451318741, "learning_rate": 9.177022533239034e-07, "loss": 0.0402, "step": 267780 }, { "epoch": 2.861157113093648, "grad_norm": 0.013204329647123814, "learning_rate": 9.176930187375265e-07, "loss": 0.0231, "step": 267790 }, { "epoch": 2.8612639564079276, "grad_norm": 0.452410489320755, "learning_rate": 9.176837836795426e-07, "loss": 0.0025, "step": 267800 }, { "epoch": 2.8613707997222075, "grad_norm": 0.31680670380592346, "learning_rate": 9.176745481499621e-07, "loss": 0.0037, "step": 267810 }, { "epoch": 2.861477643036487, "grad_norm": 2.628282308578491, "learning_rate": 9.176653121487956e-07, "loss": 0.0124, "step": 267820 }, { "epoch": 2.8615844863507665, "grad_norm": 0.018671976402401924, "learning_rate": 9.176560756760536e-07, "loss": 0.0124, "step": 267830 }, { "epoch": 2.8616913296650464, "grad_norm": 0.017628047615289688, "learning_rate": 9.176468387317462e-07, "loss": 0.0295, "step": 267840 }, { "epoch": 2.861798172979326, "grad_norm": 0.0051545267924666405, "learning_rate": 9.176376013158839e-07, "loss": 0.016, "step": 267850 }, { "epoch": 2.8619050162936053, "grad_norm": 0.008218123577535152, "learning_rate": 9.176283634284774e-07, "loss": 0.0037, "step": 267860 }, { "epoch": 2.862011859607885, "grad_norm": 1.4917880296707153, "learning_rate": 9.176191250695369e-07, "loss": 0.0137, "step": 267870 }, { "epoch": 2.8621187029221646, "grad_norm": 0.0006315649370662868, "learning_rate": 9.176098862390729e-07, "loss": 0.0116, "step": 267880 }, { "epoch": 2.862225546236444, "grad_norm": 6.637043476104736, "learning_rate": 9.176006469370959e-07, "loss": 0.0956, "step": 267890 }, { "epoch": 2.862332389550724, "grad_norm": 0.04333608224987984, "learning_rate": 9.175914071636161e-07, "loss": 0.003, "step": 267900 }, { "epoch": 2.8624392328650035, "grad_norm": 0.11500997841358185, "learning_rate": 9.175821669186442e-07, "loss": 0.0108, "step": 267910 }, { "epoch": 2.862546076179283, "grad_norm": 2.5240895748138428, "learning_rate": 9.175729262021905e-07, "loss": 0.0185, "step": 267920 }, { "epoch": 2.862652919493563, "grad_norm": 0.00641010794788599, "learning_rate": 9.175636850142654e-07, "loss": 0.0171, "step": 267930 }, { "epoch": 2.8627597628078423, "grad_norm": 0.05881907045841217, "learning_rate": 9.175544433548794e-07, "loss": 0.0214, "step": 267940 }, { "epoch": 2.8628666061221217, "grad_norm": 0.001985004171729088, "learning_rate": 9.17545201224043e-07, "loss": 0.0105, "step": 267950 }, { "epoch": 2.8629734494364016, "grad_norm": 0.16717633605003357, "learning_rate": 9.175359586217663e-07, "loss": 0.0164, "step": 267960 }, { "epoch": 2.863080292750681, "grad_norm": 0.007981163449585438, "learning_rate": 9.175267155480603e-07, "loss": 0.0357, "step": 267970 }, { "epoch": 2.863187136064961, "grad_norm": 4.082672595977783, "learning_rate": 9.175174720029349e-07, "loss": 0.0241, "step": 267980 }, { "epoch": 2.8632939793792405, "grad_norm": 7.855713367462158, "learning_rate": 9.175082279864009e-07, "loss": 0.0292, "step": 267990 }, { "epoch": 2.86340082269352, "grad_norm": 0.033038895577192307, "learning_rate": 9.174989834984685e-07, "loss": 0.0433, "step": 268000 }, { "epoch": 2.8635076660077994, "grad_norm": 3.6863198280334473, "learning_rate": 9.174897385391484e-07, "loss": 0.0048, "step": 268010 }, { "epoch": 2.8636145093220793, "grad_norm": 0.0011607499327510595, "learning_rate": 9.174804931084508e-07, "loss": 0.0128, "step": 268020 }, { "epoch": 2.8637213526363587, "grad_norm": 4.582544326782227, "learning_rate": 9.174712472063861e-07, "loss": 0.0064, "step": 268030 }, { "epoch": 2.8638281959506386, "grad_norm": 3.267218589782715, "learning_rate": 9.17462000832965e-07, "loss": 0.0074, "step": 268040 }, { "epoch": 2.863935039264918, "grad_norm": 2.1084792613983154, "learning_rate": 9.174527539881978e-07, "loss": 0.0249, "step": 268050 }, { "epoch": 2.8640418825791976, "grad_norm": 2.2316994667053223, "learning_rate": 9.174435066720948e-07, "loss": 0.019, "step": 268060 }, { "epoch": 2.864148725893477, "grad_norm": 0.027262696996331215, "learning_rate": 9.174342588846666e-07, "loss": 0.0269, "step": 268070 }, { "epoch": 2.864255569207757, "grad_norm": 3.1391029357910156, "learning_rate": 9.174250106259237e-07, "loss": 0.0268, "step": 268080 }, { "epoch": 2.8643624125220364, "grad_norm": 2.759087085723877, "learning_rate": 9.174157618958765e-07, "loss": 0.0077, "step": 268090 }, { "epoch": 2.8644692558363163, "grad_norm": 2.4750101566314697, "learning_rate": 9.174065126945352e-07, "loss": 0.0293, "step": 268100 }, { "epoch": 2.8645760991505957, "grad_norm": 3.3791732788085938, "learning_rate": 9.173972630219106e-07, "loss": 0.0278, "step": 268110 }, { "epoch": 2.864682942464875, "grad_norm": 4.714632511138916, "learning_rate": 9.173880128780129e-07, "loss": 0.0188, "step": 268120 }, { "epoch": 2.8647897857791547, "grad_norm": 0.38507363200187683, "learning_rate": 9.173787622628527e-07, "loss": 0.0155, "step": 268130 }, { "epoch": 2.8648966290934346, "grad_norm": 1.8707445859909058, "learning_rate": 9.173695111764404e-07, "loss": 0.0678, "step": 268140 }, { "epoch": 2.865003472407714, "grad_norm": 4.262751579284668, "learning_rate": 9.173602596187864e-07, "loss": 0.0361, "step": 268150 }, { "epoch": 2.865110315721994, "grad_norm": 3.654620409011841, "learning_rate": 9.173510075899011e-07, "loss": 0.0234, "step": 268160 }, { "epoch": 2.8652171590362734, "grad_norm": 9.937176704406738, "learning_rate": 9.173417550897951e-07, "loss": 0.0349, "step": 268170 }, { "epoch": 2.865324002350553, "grad_norm": 0.010770535096526146, "learning_rate": 9.173325021184787e-07, "loss": 0.0042, "step": 268180 }, { "epoch": 2.8654308456648323, "grad_norm": 12.34342098236084, "learning_rate": 9.173232486759623e-07, "loss": 0.0623, "step": 268190 }, { "epoch": 2.865537688979112, "grad_norm": 0.11497271060943604, "learning_rate": 9.173139947622568e-07, "loss": 0.0378, "step": 268200 }, { "epoch": 2.8656445322933917, "grad_norm": 3.921259641647339, "learning_rate": 9.17304740377372e-07, "loss": 0.0147, "step": 268210 }, { "epoch": 2.8657513756076716, "grad_norm": 0.8050026297569275, "learning_rate": 9.172954855213187e-07, "loss": 0.0025, "step": 268220 }, { "epoch": 2.865858218921951, "grad_norm": 0.023751193657517433, "learning_rate": 9.172862301941074e-07, "loss": 0.0022, "step": 268230 }, { "epoch": 2.8659650622362305, "grad_norm": 8.35076904296875, "learning_rate": 9.172769743957485e-07, "loss": 0.0298, "step": 268240 }, { "epoch": 2.86607190555051, "grad_norm": 5.149313926696777, "learning_rate": 9.172677181262523e-07, "loss": 0.0137, "step": 268250 }, { "epoch": 2.86617874886479, "grad_norm": 6.480467796325684, "learning_rate": 9.172584613856293e-07, "loss": 0.0237, "step": 268260 }, { "epoch": 2.8662855921790693, "grad_norm": 8.420735359191895, "learning_rate": 9.1724920417389e-07, "loss": 0.0183, "step": 268270 }, { "epoch": 2.866392435493349, "grad_norm": 2.9831295013427734, "learning_rate": 9.172399464910449e-07, "loss": 0.0177, "step": 268280 }, { "epoch": 2.8664992788076287, "grad_norm": 1.371498703956604, "learning_rate": 9.172306883371044e-07, "loss": 0.032, "step": 268290 }, { "epoch": 2.866606122121908, "grad_norm": 0.18263040482997894, "learning_rate": 9.172214297120791e-07, "loss": 0.0146, "step": 268300 }, { "epoch": 2.8667129654361876, "grad_norm": 4.047589302062988, "learning_rate": 9.172121706159791e-07, "loss": 0.027, "step": 268310 }, { "epoch": 2.8668198087504675, "grad_norm": 0.025985557585954666, "learning_rate": 9.172029110488152e-07, "loss": 0.0111, "step": 268320 }, { "epoch": 2.866926652064747, "grad_norm": 0.12052682787179947, "learning_rate": 9.171936510105978e-07, "loss": 0.0152, "step": 268330 }, { "epoch": 2.867033495379027, "grad_norm": 1.8754396438598633, "learning_rate": 9.171843905013371e-07, "loss": 0.0181, "step": 268340 }, { "epoch": 2.8671403386933063, "grad_norm": 4.889826774597168, "learning_rate": 9.171751295210438e-07, "loss": 0.0274, "step": 268350 }, { "epoch": 2.8672471820075858, "grad_norm": 16.174116134643555, "learning_rate": 9.171658680697283e-07, "loss": 0.01, "step": 268360 }, { "epoch": 2.8673540253218652, "grad_norm": 3.3686459064483643, "learning_rate": 9.171566061474011e-07, "loss": 0.0078, "step": 268370 }, { "epoch": 2.867460868636145, "grad_norm": 0.020072078332304955, "learning_rate": 9.171473437540725e-07, "loss": 0.0132, "step": 268380 }, { "epoch": 2.8675677119504246, "grad_norm": 3.164868116378784, "learning_rate": 9.171380808897532e-07, "loss": 0.0132, "step": 268390 }, { "epoch": 2.8676745552647045, "grad_norm": 0.2814939022064209, "learning_rate": 9.171288175544535e-07, "loss": 0.0102, "step": 268400 }, { "epoch": 2.867781398578984, "grad_norm": 0.013465678319334984, "learning_rate": 9.171195537481837e-07, "loss": 0.0264, "step": 268410 }, { "epoch": 2.8678882418932634, "grad_norm": 0.003089424455538392, "learning_rate": 9.171102894709546e-07, "loss": 0.0299, "step": 268420 }, { "epoch": 2.867995085207543, "grad_norm": 6.9477858543396, "learning_rate": 9.171010247227766e-07, "loss": 0.0257, "step": 268430 }, { "epoch": 2.8681019285218228, "grad_norm": 0.27522122859954834, "learning_rate": 9.170917595036599e-07, "loss": 0.0408, "step": 268440 }, { "epoch": 2.8682087718361022, "grad_norm": 0.10329511761665344, "learning_rate": 9.170824938136153e-07, "loss": 0.0262, "step": 268450 }, { "epoch": 2.868315615150382, "grad_norm": 0.025115450844168663, "learning_rate": 9.170732276526528e-07, "loss": 0.0159, "step": 268460 }, { "epoch": 2.8684224584646616, "grad_norm": 0.064693883061409, "learning_rate": 9.170639610207833e-07, "loss": 0.0543, "step": 268470 }, { "epoch": 2.868529301778941, "grad_norm": 7.479794979095459, "learning_rate": 9.170546939180171e-07, "loss": 0.0456, "step": 268480 }, { "epoch": 2.868636145093221, "grad_norm": 9.470650672912598, "learning_rate": 9.170454263443647e-07, "loss": 0.014, "step": 268490 }, { "epoch": 2.8687429884075004, "grad_norm": 0.6007576584815979, "learning_rate": 9.170361582998366e-07, "loss": 0.0101, "step": 268500 }, { "epoch": 2.86884983172178, "grad_norm": 0.5517707467079163, "learning_rate": 9.170268897844431e-07, "loss": 0.05, "step": 268510 }, { "epoch": 2.86895667503606, "grad_norm": 6.204091548919678, "learning_rate": 9.170176207981948e-07, "loss": 0.0156, "step": 268520 }, { "epoch": 2.8690635183503392, "grad_norm": 8.142436027526855, "learning_rate": 9.170083513411023e-07, "loss": 0.0248, "step": 268530 }, { "epoch": 2.8691703616646187, "grad_norm": 6.92369270324707, "learning_rate": 9.169990814131756e-07, "loss": 0.0696, "step": 268540 }, { "epoch": 2.8692772049788986, "grad_norm": 0.007697337307035923, "learning_rate": 9.169898110144257e-07, "loss": 0.0165, "step": 268550 }, { "epoch": 2.869384048293178, "grad_norm": 0.19646956026554108, "learning_rate": 9.169805401448627e-07, "loss": 0.095, "step": 268560 }, { "epoch": 2.8694908916074575, "grad_norm": 4.626890659332275, "learning_rate": 9.169712688044972e-07, "loss": 0.032, "step": 268570 }, { "epoch": 2.8695977349217374, "grad_norm": 5.889101028442383, "learning_rate": 9.169619969933396e-07, "loss": 0.0231, "step": 268580 }, { "epoch": 2.869704578236017, "grad_norm": 5.526412487030029, "learning_rate": 9.169527247114006e-07, "loss": 0.0229, "step": 268590 }, { "epoch": 2.8698114215502963, "grad_norm": 0.00497098034247756, "learning_rate": 9.169434519586903e-07, "loss": 0.0212, "step": 268600 }, { "epoch": 2.8699182648645762, "grad_norm": 1.4773164987564087, "learning_rate": 9.169341787352196e-07, "loss": 0.0438, "step": 268610 }, { "epoch": 2.8700251081788557, "grad_norm": 0.0049138618633151054, "learning_rate": 9.169249050409986e-07, "loss": 0.0276, "step": 268620 }, { "epoch": 2.870131951493135, "grad_norm": 2.4497673511505127, "learning_rate": 9.169156308760379e-07, "loss": 0.0201, "step": 268630 }, { "epoch": 2.870238794807415, "grad_norm": 0.38778597116470337, "learning_rate": 9.169063562403482e-07, "loss": 0.0275, "step": 268640 }, { "epoch": 2.8703456381216945, "grad_norm": 0.40641745924949646, "learning_rate": 9.168970811339396e-07, "loss": 0.0073, "step": 268650 }, { "epoch": 2.870452481435974, "grad_norm": 3.5325238704681396, "learning_rate": 9.168878055568228e-07, "loss": 0.0123, "step": 268660 }, { "epoch": 2.870559324750254, "grad_norm": 0.6814423203468323, "learning_rate": 9.16878529509008e-07, "loss": 0.0274, "step": 268670 }, { "epoch": 2.8706661680645333, "grad_norm": 0.10913258045911789, "learning_rate": 9.16869252990506e-07, "loss": 0.0063, "step": 268680 }, { "epoch": 2.8707730113788132, "grad_norm": 0.47758349776268005, "learning_rate": 9.168599760013272e-07, "loss": 0.0045, "step": 268690 }, { "epoch": 2.8708798546930927, "grad_norm": 3.6889235973358154, "learning_rate": 9.168506985414821e-07, "loss": 0.0066, "step": 268700 }, { "epoch": 2.870986698007372, "grad_norm": 0.004564064089208841, "learning_rate": 9.168414206109809e-07, "loss": 0.0072, "step": 268710 }, { "epoch": 2.8710935413216516, "grad_norm": 4.596253871917725, "learning_rate": 9.168321422098344e-07, "loss": 0.0182, "step": 268720 }, { "epoch": 2.8712003846359315, "grad_norm": 0.11786938458681107, "learning_rate": 9.168228633380531e-07, "loss": 0.0266, "step": 268730 }, { "epoch": 2.871307227950211, "grad_norm": 0.20556388795375824, "learning_rate": 9.16813583995647e-07, "loss": 0.0223, "step": 268740 }, { "epoch": 2.871414071264491, "grad_norm": 3.698861598968506, "learning_rate": 9.168043041826271e-07, "loss": 0.0301, "step": 268750 }, { "epoch": 2.8715209145787703, "grad_norm": 4.215373516082764, "learning_rate": 9.167950238990036e-07, "loss": 0.0102, "step": 268760 }, { "epoch": 2.87162775789305, "grad_norm": 0.12486628443002701, "learning_rate": 9.167857431447872e-07, "loss": 0.0051, "step": 268770 }, { "epoch": 2.8717346012073293, "grad_norm": 0.027052801102399826, "learning_rate": 9.16776461919988e-07, "loss": 0.0201, "step": 268780 }, { "epoch": 2.871841444521609, "grad_norm": 0.20592765510082245, "learning_rate": 9.16767180224617e-07, "loss": 0.0059, "step": 268790 }, { "epoch": 2.8719482878358886, "grad_norm": 0.025022635236382484, "learning_rate": 9.167578980586844e-07, "loss": 0.0108, "step": 268800 }, { "epoch": 2.8720551311501685, "grad_norm": 0.016712505370378494, "learning_rate": 9.167486154222004e-07, "loss": 0.0235, "step": 268810 }, { "epoch": 2.872161974464448, "grad_norm": 7.777166843414307, "learning_rate": 9.167393323151759e-07, "loss": 0.0621, "step": 268820 }, { "epoch": 2.8722688177787274, "grad_norm": 1.4567874670028687, "learning_rate": 9.167300487376213e-07, "loss": 0.0474, "step": 268830 }, { "epoch": 2.872375661093007, "grad_norm": 1.461620569229126, "learning_rate": 9.16720764689547e-07, "loss": 0.0195, "step": 268840 }, { "epoch": 2.872482504407287, "grad_norm": 2.2965478897094727, "learning_rate": 9.167114801709633e-07, "loss": 0.0072, "step": 268850 }, { "epoch": 2.8725893477215663, "grad_norm": 0.07652458548545837, "learning_rate": 9.167021951818812e-07, "loss": 0.0349, "step": 268860 }, { "epoch": 2.872696191035846, "grad_norm": 1.5941909551620483, "learning_rate": 9.166929097223106e-07, "loss": 0.0064, "step": 268870 }, { "epoch": 2.8728030343501256, "grad_norm": 3.8921968936920166, "learning_rate": 9.166836237922624e-07, "loss": 0.018, "step": 268880 }, { "epoch": 2.872909877664405, "grad_norm": 0.011287735775113106, "learning_rate": 9.166743373917469e-07, "loss": 0.0182, "step": 268890 }, { "epoch": 2.8730167209786845, "grad_norm": 0.005835611838847399, "learning_rate": 9.166650505207747e-07, "loss": 0.0104, "step": 268900 }, { "epoch": 2.8731235642929644, "grad_norm": 4.9963297843933105, "learning_rate": 9.166557631793561e-07, "loss": 0.0186, "step": 268910 }, { "epoch": 2.873230407607244, "grad_norm": 7.2592315673828125, "learning_rate": 9.166464753675018e-07, "loss": 0.0374, "step": 268920 }, { "epoch": 2.873337250921524, "grad_norm": 0.29390397667884827, "learning_rate": 9.166371870852221e-07, "loss": 0.0272, "step": 268930 }, { "epoch": 2.8734440942358033, "grad_norm": 0.27404505014419556, "learning_rate": 9.166278983325277e-07, "loss": 0.0078, "step": 268940 }, { "epoch": 2.8735509375500827, "grad_norm": 2.7865195274353027, "learning_rate": 9.166186091094288e-07, "loss": 0.0079, "step": 268950 }, { "epoch": 2.873657780864362, "grad_norm": 0.02046905644237995, "learning_rate": 9.166093194159361e-07, "loss": 0.0079, "step": 268960 }, { "epoch": 2.873764624178642, "grad_norm": 0.006339934188872576, "learning_rate": 9.166000292520601e-07, "loss": 0.0053, "step": 268970 }, { "epoch": 2.8738714674929215, "grad_norm": 2.3199856281280518, "learning_rate": 9.165907386178112e-07, "loss": 0.0181, "step": 268980 }, { "epoch": 2.8739783108072015, "grad_norm": 0.0020475960336625576, "learning_rate": 9.165814475131999e-07, "loss": 0.0133, "step": 268990 }, { "epoch": 2.874085154121481, "grad_norm": 1.6406044960021973, "learning_rate": 9.165721559382366e-07, "loss": 0.007, "step": 269000 }, { "epoch": 2.8741919974357604, "grad_norm": 0.2633119523525238, "learning_rate": 9.165628638929322e-07, "loss": 0.0185, "step": 269010 }, { "epoch": 2.87429884075004, "grad_norm": 1.7235455513000488, "learning_rate": 9.165535713772966e-07, "loss": 0.0675, "step": 269020 }, { "epoch": 2.8744056840643197, "grad_norm": 1.2055422067642212, "learning_rate": 9.165442783913406e-07, "loss": 0.0044, "step": 269030 }, { "epoch": 2.874512527378599, "grad_norm": 0.2991643249988556, "learning_rate": 9.165349849350747e-07, "loss": 0.0542, "step": 269040 }, { "epoch": 2.874619370692879, "grad_norm": 0.007503631059080362, "learning_rate": 9.165256910085096e-07, "loss": 0.0294, "step": 269050 }, { "epoch": 2.8747262140071586, "grad_norm": 2.7447171211242676, "learning_rate": 9.165163966116552e-07, "loss": 0.0126, "step": 269060 }, { "epoch": 2.874833057321438, "grad_norm": 0.16800455749034882, "learning_rate": 9.165071017445225e-07, "loss": 0.0286, "step": 269070 }, { "epoch": 2.8749399006357175, "grad_norm": 0.03888422250747681, "learning_rate": 9.164978064071218e-07, "loss": 0.0167, "step": 269080 }, { "epoch": 2.8750467439499974, "grad_norm": 0.014758294448256493, "learning_rate": 9.164885105994637e-07, "loss": 0.0116, "step": 269090 }, { "epoch": 2.875153587264277, "grad_norm": 0.11838437616825104, "learning_rate": 9.164792143215587e-07, "loss": 0.0236, "step": 269100 }, { "epoch": 2.8752604305785567, "grad_norm": 0.24657705426216125, "learning_rate": 9.164699175734172e-07, "loss": 0.0185, "step": 269110 }, { "epoch": 2.875367273892836, "grad_norm": 0.02383991703391075, "learning_rate": 9.164606203550497e-07, "loss": 0.0057, "step": 269120 }, { "epoch": 2.8754741172071157, "grad_norm": 0.05628569424152374, "learning_rate": 9.164513226664668e-07, "loss": 0.0071, "step": 269130 }, { "epoch": 2.875580960521395, "grad_norm": 0.06942214071750641, "learning_rate": 9.164420245076787e-07, "loss": 0.0214, "step": 269140 }, { "epoch": 2.875687803835675, "grad_norm": 2.014728307723999, "learning_rate": 9.164327258786964e-07, "loss": 0.0269, "step": 269150 }, { "epoch": 2.8757946471499545, "grad_norm": 0.018260164186358452, "learning_rate": 9.1642342677953e-07, "loss": 0.0056, "step": 269160 }, { "epoch": 2.8759014904642344, "grad_norm": 0.1494249552488327, "learning_rate": 9.164141272101902e-07, "loss": 0.0308, "step": 269170 }, { "epoch": 2.876008333778514, "grad_norm": 0.03099021688103676, "learning_rate": 9.164048271706873e-07, "loss": 0.0094, "step": 269180 }, { "epoch": 2.8761151770927933, "grad_norm": 0.008908221498131752, "learning_rate": 9.163955266610319e-07, "loss": 0.0044, "step": 269190 }, { "epoch": 2.8762220204070728, "grad_norm": 8.57209300994873, "learning_rate": 9.163862256812346e-07, "loss": 0.0227, "step": 269200 }, { "epoch": 2.8763288637213527, "grad_norm": 0.2725757956504822, "learning_rate": 9.163769242313057e-07, "loss": 0.029, "step": 269210 }, { "epoch": 2.876435707035632, "grad_norm": 0.003737921128049493, "learning_rate": 9.163676223112561e-07, "loss": 0.021, "step": 269220 }, { "epoch": 2.876542550349912, "grad_norm": 0.014503341168165207, "learning_rate": 9.163583199210958e-07, "loss": 0.007, "step": 269230 }, { "epoch": 2.8766493936641915, "grad_norm": 0.5917634963989258, "learning_rate": 9.163490170608357e-07, "loss": 0.0404, "step": 269240 }, { "epoch": 2.876756236978471, "grad_norm": 0.6010072231292725, "learning_rate": 9.163397137304861e-07, "loss": 0.0095, "step": 269250 }, { "epoch": 2.876863080292751, "grad_norm": 6.599720001220703, "learning_rate": 9.163304099300573e-07, "loss": 0.0195, "step": 269260 }, { "epoch": 2.8769699236070303, "grad_norm": 3.0975704193115234, "learning_rate": 9.163211056595603e-07, "loss": 0.0252, "step": 269270 }, { "epoch": 2.8770767669213098, "grad_norm": 1.4635062217712402, "learning_rate": 9.163118009190052e-07, "loss": 0.0216, "step": 269280 }, { "epoch": 2.8771836102355897, "grad_norm": 5.497108459472656, "learning_rate": 9.163024957084028e-07, "loss": 0.02, "step": 269290 }, { "epoch": 2.877290453549869, "grad_norm": 3.3745925426483154, "learning_rate": 9.162931900277634e-07, "loss": 0.0073, "step": 269300 }, { "epoch": 2.8773972968641486, "grad_norm": 0.23026075959205627, "learning_rate": 9.162838838770975e-07, "loss": 0.0134, "step": 269310 }, { "epoch": 2.8775041401784285, "grad_norm": 0.9924108386039734, "learning_rate": 9.162745772564157e-07, "loss": 0.0169, "step": 269320 }, { "epoch": 2.877610983492708, "grad_norm": 1.374422550201416, "learning_rate": 9.162652701657286e-07, "loss": 0.0291, "step": 269330 }, { "epoch": 2.8777178268069874, "grad_norm": 2.144223928451538, "learning_rate": 9.162559626050466e-07, "loss": 0.0232, "step": 269340 }, { "epoch": 2.8778246701212673, "grad_norm": 2.0301427841186523, "learning_rate": 9.162466545743801e-07, "loss": 0.0078, "step": 269350 }, { "epoch": 2.8779315134355468, "grad_norm": 6.568715572357178, "learning_rate": 9.162373460737397e-07, "loss": 0.0091, "step": 269360 }, { "epoch": 2.878038356749826, "grad_norm": 1.224122405052185, "learning_rate": 9.16228037103136e-07, "loss": 0.0109, "step": 269370 }, { "epoch": 2.878145200064106, "grad_norm": 4.720966339111328, "learning_rate": 9.162187276625793e-07, "loss": 0.0228, "step": 269380 }, { "epoch": 2.8782520433783856, "grad_norm": 0.5325936675071716, "learning_rate": 9.162094177520804e-07, "loss": 0.019, "step": 269390 }, { "epoch": 2.878358886692665, "grad_norm": 1.901160717010498, "learning_rate": 9.162001073716497e-07, "loss": 0.0101, "step": 269400 }, { "epoch": 2.878465730006945, "grad_norm": 0.006862569600343704, "learning_rate": 9.161907965212974e-07, "loss": 0.0399, "step": 269410 }, { "epoch": 2.8785725733212244, "grad_norm": 0.009247812442481518, "learning_rate": 9.161814852010345e-07, "loss": 0.0005, "step": 269420 }, { "epoch": 2.878679416635504, "grad_norm": 4.389361381530762, "learning_rate": 9.161721734108712e-07, "loss": 0.0492, "step": 269430 }, { "epoch": 2.8787862599497838, "grad_norm": 0.1375570148229599, "learning_rate": 9.161628611508181e-07, "loss": 0.0043, "step": 269440 }, { "epoch": 2.8788931032640632, "grad_norm": 0.010325858369469643, "learning_rate": 9.161535484208858e-07, "loss": 0.0117, "step": 269450 }, { "epoch": 2.878999946578343, "grad_norm": 0.22298943996429443, "learning_rate": 9.161442352210847e-07, "loss": 0.0056, "step": 269460 }, { "epoch": 2.8791067898926226, "grad_norm": 0.5181754231452942, "learning_rate": 9.161349215514254e-07, "loss": 0.0356, "step": 269470 }, { "epoch": 2.879213633206902, "grad_norm": 1.4080175161361694, "learning_rate": 9.161256074119181e-07, "loss": 0.0686, "step": 269480 }, { "epoch": 2.8793204765211815, "grad_norm": 3.164370536804199, "learning_rate": 9.161162928025739e-07, "loss": 0.0183, "step": 269490 }, { "epoch": 2.8794273198354614, "grad_norm": 0.011506300419569016, "learning_rate": 9.16106977723403e-07, "loss": 0.0177, "step": 269500 }, { "epoch": 2.879534163149741, "grad_norm": 0.05778677016496658, "learning_rate": 9.160976621744156e-07, "loss": 0.0286, "step": 269510 }, { "epoch": 2.8796410064640208, "grad_norm": 0.3044873774051666, "learning_rate": 9.160883461556228e-07, "loss": 0.0043, "step": 269520 }, { "epoch": 2.8797478497783002, "grad_norm": 3.1202876567840576, "learning_rate": 9.160790296670349e-07, "loss": 0.0278, "step": 269530 }, { "epoch": 2.8798546930925797, "grad_norm": 5.422648906707764, "learning_rate": 9.160697127086622e-07, "loss": 0.0266, "step": 269540 }, { "epoch": 2.879961536406859, "grad_norm": 1.1739178895950317, "learning_rate": 9.160603952805155e-07, "loss": 0.008, "step": 269550 }, { "epoch": 2.880068379721139, "grad_norm": 0.0032976465299725533, "learning_rate": 9.160510773826051e-07, "loss": 0.0146, "step": 269560 }, { "epoch": 2.8801752230354185, "grad_norm": 0.0035504098050296307, "learning_rate": 9.160417590149416e-07, "loss": 0.0159, "step": 269570 }, { "epoch": 2.8802820663496984, "grad_norm": 3.1946051120758057, "learning_rate": 9.160324401775357e-07, "loss": 0.0175, "step": 269580 }, { "epoch": 2.880388909663978, "grad_norm": 0.01403007935732603, "learning_rate": 9.160231208703978e-07, "loss": 0.0088, "step": 269590 }, { "epoch": 2.8804957529782573, "grad_norm": 2.146315097808838, "learning_rate": 9.160138010935384e-07, "loss": 0.0362, "step": 269600 }, { "epoch": 2.880602596292537, "grad_norm": 2.4362735748291016, "learning_rate": 9.160044808469678e-07, "loss": 0.1165, "step": 269610 }, { "epoch": 2.8807094396068167, "grad_norm": 0.11818450689315796, "learning_rate": 9.15995160130697e-07, "loss": 0.0117, "step": 269620 }, { "epoch": 2.880816282921096, "grad_norm": 0.008277122862637043, "learning_rate": 9.159858389447362e-07, "loss": 0.0491, "step": 269630 }, { "epoch": 2.880923126235376, "grad_norm": 0.3307254910469055, "learning_rate": 9.159765172890959e-07, "loss": 0.0137, "step": 269640 }, { "epoch": 2.8810299695496555, "grad_norm": 0.2571972906589508, "learning_rate": 9.159671951637867e-07, "loss": 0.0017, "step": 269650 }, { "epoch": 2.881136812863935, "grad_norm": 9.334465980529785, "learning_rate": 9.159578725688193e-07, "loss": 0.0395, "step": 269660 }, { "epoch": 2.8812436561782144, "grad_norm": 0.20591944456100464, "learning_rate": 9.15948549504204e-07, "loss": 0.0076, "step": 269670 }, { "epoch": 2.8813504994924943, "grad_norm": 0.740111231803894, "learning_rate": 9.159392259699513e-07, "loss": 0.0116, "step": 269680 }, { "epoch": 2.881457342806774, "grad_norm": 5.491433620452881, "learning_rate": 9.159299019660719e-07, "loss": 0.0056, "step": 269690 }, { "epoch": 2.8815641861210537, "grad_norm": 0.7398541569709778, "learning_rate": 9.159205774925762e-07, "loss": 0.0041, "step": 269700 }, { "epoch": 2.881671029435333, "grad_norm": 3.6091208457946777, "learning_rate": 9.159112525494749e-07, "loss": 0.0236, "step": 269710 }, { "epoch": 2.8817778727496126, "grad_norm": 1.6094988584518433, "learning_rate": 9.159019271367782e-07, "loss": 0.0111, "step": 269720 }, { "epoch": 2.881884716063892, "grad_norm": 0.36233583092689514, "learning_rate": 9.158926012544971e-07, "loss": 0.0243, "step": 269730 }, { "epoch": 2.881991559378172, "grad_norm": 0.016990801319479942, "learning_rate": 9.158832749026417e-07, "loss": 0.0177, "step": 269740 }, { "epoch": 2.8820984026924514, "grad_norm": 8.31378173828125, "learning_rate": 9.158739480812228e-07, "loss": 0.0159, "step": 269750 }, { "epoch": 2.8822052460067313, "grad_norm": 0.004794543609023094, "learning_rate": 9.158646207902506e-07, "loss": 0.0173, "step": 269760 }, { "epoch": 2.882312089321011, "grad_norm": 0.6278284192085266, "learning_rate": 9.158552930297361e-07, "loss": 0.0302, "step": 269770 }, { "epoch": 2.8824189326352903, "grad_norm": 0.02899344451725483, "learning_rate": 9.158459647996895e-07, "loss": 0.011, "step": 269780 }, { "epoch": 2.8825257759495697, "grad_norm": 1.7449471950531006, "learning_rate": 9.158366361001213e-07, "loss": 0.0163, "step": 269790 }, { "epoch": 2.8826326192638496, "grad_norm": 1.178373098373413, "learning_rate": 9.158273069310422e-07, "loss": 0.0293, "step": 269800 }, { "epoch": 2.882739462578129, "grad_norm": 8.449617385864258, "learning_rate": 9.158179772924629e-07, "loss": 0.0576, "step": 269810 }, { "epoch": 2.882846305892409, "grad_norm": 0.2663003206253052, "learning_rate": 9.158086471843936e-07, "loss": 0.041, "step": 269820 }, { "epoch": 2.8829531492066884, "grad_norm": 0.011145390570163727, "learning_rate": 9.157993166068449e-07, "loss": 0.0146, "step": 269830 }, { "epoch": 2.883059992520968, "grad_norm": 2.61365008354187, "learning_rate": 9.157899855598274e-07, "loss": 0.0303, "step": 269840 }, { "epoch": 2.8831668358352474, "grad_norm": 0.007745651062577963, "learning_rate": 9.157806540433517e-07, "loss": 0.0101, "step": 269850 }, { "epoch": 2.8832736791495273, "grad_norm": 0.0954371988773346, "learning_rate": 9.157713220574281e-07, "loss": 0.0155, "step": 269860 }, { "epoch": 2.8833805224638067, "grad_norm": 0.414119154214859, "learning_rate": 9.157619896020674e-07, "loss": 0.0033, "step": 269870 }, { "epoch": 2.8834873657780866, "grad_norm": 0.0021298001520335674, "learning_rate": 9.157526566772801e-07, "loss": 0.0189, "step": 269880 }, { "epoch": 2.883594209092366, "grad_norm": 2.6638011932373047, "learning_rate": 9.157433232830766e-07, "loss": 0.0223, "step": 269890 }, { "epoch": 2.8837010524066455, "grad_norm": 0.19161559641361237, "learning_rate": 9.157339894194676e-07, "loss": 0.0065, "step": 269900 }, { "epoch": 2.883807895720925, "grad_norm": 0.005225103814154863, "learning_rate": 9.157246550864634e-07, "loss": 0.0071, "step": 269910 }, { "epoch": 2.883914739035205, "grad_norm": 0.023102914914488792, "learning_rate": 9.157153202840747e-07, "loss": 0.0064, "step": 269920 }, { "epoch": 2.8840215823494844, "grad_norm": 1.7560579776763916, "learning_rate": 9.157059850123121e-07, "loss": 0.0024, "step": 269930 }, { "epoch": 2.8841284256637643, "grad_norm": 1.796044945716858, "learning_rate": 9.156966492711861e-07, "loss": 0.0022, "step": 269940 }, { "epoch": 2.8842352689780437, "grad_norm": 0.1458408385515213, "learning_rate": 9.156873130607071e-07, "loss": 0.0069, "step": 269950 }, { "epoch": 2.884342112292323, "grad_norm": 5.255474090576172, "learning_rate": 9.156779763808859e-07, "loss": 0.0256, "step": 269960 }, { "epoch": 2.884448955606603, "grad_norm": 0.10639743506908417, "learning_rate": 9.156686392317328e-07, "loss": 0.0049, "step": 269970 }, { "epoch": 2.8845557989208825, "grad_norm": 0.34299036860466003, "learning_rate": 9.156593016132584e-07, "loss": 0.0016, "step": 269980 }, { "epoch": 2.884662642235162, "grad_norm": 0.004547883756458759, "learning_rate": 9.156499635254733e-07, "loss": 0.0206, "step": 269990 }, { "epoch": 2.884769485549442, "grad_norm": 0.019999386742711067, "learning_rate": 9.156406249683882e-07, "loss": 0.0186, "step": 270000 }, { "epoch": 2.8848763288637214, "grad_norm": 0.5382857322692871, "learning_rate": 9.156312859420132e-07, "loss": 0.0232, "step": 270010 }, { "epoch": 2.884983172178001, "grad_norm": 0.76312655210495, "learning_rate": 9.156219464463591e-07, "loss": 0.0283, "step": 270020 }, { "epoch": 2.8850900154922807, "grad_norm": 7.398309707641602, "learning_rate": 9.156126064814366e-07, "loss": 0.0304, "step": 270030 }, { "epoch": 2.88519685880656, "grad_norm": 1.0113552808761597, "learning_rate": 9.156032660472561e-07, "loss": 0.0099, "step": 270040 }, { "epoch": 2.8853037021208396, "grad_norm": 0.2938332259654999, "learning_rate": 9.15593925143828e-07, "loss": 0.0186, "step": 270050 }, { "epoch": 2.8854105454351195, "grad_norm": 4.577097415924072, "learning_rate": 9.155845837711631e-07, "loss": 0.0685, "step": 270060 }, { "epoch": 2.885517388749399, "grad_norm": 0.011908534914255142, "learning_rate": 9.15575241929272e-07, "loss": 0.0122, "step": 270070 }, { "epoch": 2.8856242320636785, "grad_norm": 0.11568307131528854, "learning_rate": 9.155658996181648e-07, "loss": 0.0109, "step": 270080 }, { "epoch": 2.8857310753779584, "grad_norm": 0.8528985977172852, "learning_rate": 9.155565568378525e-07, "loss": 0.0103, "step": 270090 }, { "epoch": 2.885837918692238, "grad_norm": 0.5183994770050049, "learning_rate": 9.155472135883455e-07, "loss": 0.0293, "step": 270100 }, { "epoch": 2.8859447620065173, "grad_norm": 0.6443018913269043, "learning_rate": 9.155378698696543e-07, "loss": 0.0387, "step": 270110 }, { "epoch": 2.886051605320797, "grad_norm": 0.008751691319048405, "learning_rate": 9.155285256817894e-07, "loss": 0.009, "step": 270120 }, { "epoch": 2.8861584486350766, "grad_norm": 2.7344603538513184, "learning_rate": 9.155191810247616e-07, "loss": 0.0439, "step": 270130 }, { "epoch": 2.886265291949356, "grad_norm": 0.023942679166793823, "learning_rate": 9.155098358985811e-07, "loss": 0.0168, "step": 270140 }, { "epoch": 2.886372135263636, "grad_norm": 2.0334367752075195, "learning_rate": 9.155004903032586e-07, "loss": 0.0089, "step": 270150 }, { "epoch": 2.8864789785779155, "grad_norm": 0.00428652623668313, "learning_rate": 9.154911442388049e-07, "loss": 0.0165, "step": 270160 }, { "epoch": 2.8865858218921954, "grad_norm": 0.14624318480491638, "learning_rate": 9.154817977052303e-07, "loss": 0.0072, "step": 270170 }, { "epoch": 2.886692665206475, "grad_norm": 0.26044145226478577, "learning_rate": 9.154724507025453e-07, "loss": 0.0269, "step": 270180 }, { "epoch": 2.8867995085207543, "grad_norm": 0.008965441957116127, "learning_rate": 9.154631032307605e-07, "loss": 0.0481, "step": 270190 }, { "epoch": 2.8869063518350337, "grad_norm": 2.9188992977142334, "learning_rate": 9.154537552898867e-07, "loss": 0.0146, "step": 270200 }, { "epoch": 2.8870131951493136, "grad_norm": 1.0596002340316772, "learning_rate": 9.15444406879934e-07, "loss": 0.0043, "step": 270210 }, { "epoch": 2.887120038463593, "grad_norm": 1.90605890750885, "learning_rate": 9.154350580009134e-07, "loss": 0.0255, "step": 270220 }, { "epoch": 2.887226881777873, "grad_norm": 10.425973892211914, "learning_rate": 9.154257086528352e-07, "loss": 0.0214, "step": 270230 }, { "epoch": 2.8873337250921525, "grad_norm": 6.303284645080566, "learning_rate": 9.1541635883571e-07, "loss": 0.0232, "step": 270240 }, { "epoch": 2.887440568406432, "grad_norm": 0.030127357691526413, "learning_rate": 9.154070085495485e-07, "loss": 0.0359, "step": 270250 }, { "epoch": 2.8875474117207114, "grad_norm": 0.16756179928779602, "learning_rate": 9.15397657794361e-07, "loss": 0.0113, "step": 270260 }, { "epoch": 2.8876542550349913, "grad_norm": 0.03592168912291527, "learning_rate": 9.153883065701582e-07, "loss": 0.0181, "step": 270270 }, { "epoch": 2.8877610983492707, "grad_norm": 0.9051671028137207, "learning_rate": 9.153789548769507e-07, "loss": 0.0055, "step": 270280 }, { "epoch": 2.8878679416635507, "grad_norm": 0.08087154477834702, "learning_rate": 9.153696027147491e-07, "loss": 0.0034, "step": 270290 }, { "epoch": 2.88797478497783, "grad_norm": 0.4298799932003021, "learning_rate": 9.153602500835638e-07, "loss": 0.0359, "step": 270300 }, { "epoch": 2.8880816282921096, "grad_norm": 0.0772179439663887, "learning_rate": 9.153508969834053e-07, "loss": 0.0381, "step": 270310 }, { "epoch": 2.888188471606389, "grad_norm": 0.30792126059532166, "learning_rate": 9.153415434142844e-07, "loss": 0.0152, "step": 270320 }, { "epoch": 2.888295314920669, "grad_norm": 0.02241467498242855, "learning_rate": 9.153321893762115e-07, "loss": 0.0188, "step": 270330 }, { "epoch": 2.8884021582349484, "grad_norm": 0.030985919758677483, "learning_rate": 9.153228348691972e-07, "loss": 0.0156, "step": 270340 }, { "epoch": 2.8885090015492283, "grad_norm": 0.03915226459503174, "learning_rate": 9.153134798932522e-07, "loss": 0.0275, "step": 270350 }, { "epoch": 2.8886158448635078, "grad_norm": 0.05729928985238075, "learning_rate": 9.153041244483868e-07, "loss": 0.0162, "step": 270360 }, { "epoch": 2.888722688177787, "grad_norm": 0.2131100445985794, "learning_rate": 9.152947685346118e-07, "loss": 0.0557, "step": 270370 }, { "epoch": 2.8888295314920667, "grad_norm": 7.372620105743408, "learning_rate": 9.152854121519376e-07, "loss": 0.027, "step": 270380 }, { "epoch": 2.8889363748063466, "grad_norm": 3.8259317874908447, "learning_rate": 9.152760553003748e-07, "loss": 0.0325, "step": 270390 }, { "epoch": 2.889043218120626, "grad_norm": 0.15264898538589478, "learning_rate": 9.152666979799341e-07, "loss": 0.0004, "step": 270400 }, { "epoch": 2.889150061434906, "grad_norm": 0.11343566328287125, "learning_rate": 9.152573401906258e-07, "loss": 0.0081, "step": 270410 }, { "epoch": 2.8892569047491854, "grad_norm": 1.319406509399414, "learning_rate": 9.152479819324608e-07, "loss": 0.0192, "step": 270420 }, { "epoch": 2.889363748063465, "grad_norm": 0.007054642308503389, "learning_rate": 9.152386232054494e-07, "loss": 0.041, "step": 270430 }, { "epoch": 2.8894705913777443, "grad_norm": 2.404744863510132, "learning_rate": 9.152292640096022e-07, "loss": 0.0482, "step": 270440 }, { "epoch": 2.889577434692024, "grad_norm": 0.01642846316099167, "learning_rate": 9.1521990434493e-07, "loss": 0.009, "step": 270450 }, { "epoch": 2.8896842780063037, "grad_norm": 0.04537908732891083, "learning_rate": 9.15210544211443e-07, "loss": 0.0154, "step": 270460 }, { "epoch": 2.8897911213205836, "grad_norm": 1.3995939493179321, "learning_rate": 9.15201183609152e-07, "loss": 0.0159, "step": 270470 }, { "epoch": 2.889897964634863, "grad_norm": 14.817498207092285, "learning_rate": 9.151918225380676e-07, "loss": 0.0474, "step": 270480 }, { "epoch": 2.8900048079491425, "grad_norm": 0.0012230738066136837, "learning_rate": 9.151824609982002e-07, "loss": 0.0279, "step": 270490 }, { "epoch": 2.890111651263422, "grad_norm": 1.256994605064392, "learning_rate": 9.151730989895605e-07, "loss": 0.0311, "step": 270500 }, { "epoch": 2.890218494577702, "grad_norm": 2.917841911315918, "learning_rate": 9.151637365121591e-07, "loss": 0.0358, "step": 270510 }, { "epoch": 2.8903253378919813, "grad_norm": 2.640087366104126, "learning_rate": 9.151543735660065e-07, "loss": 0.0091, "step": 270520 }, { "epoch": 2.890432181206261, "grad_norm": 5.1495442390441895, "learning_rate": 9.151450101511132e-07, "loss": 0.0309, "step": 270530 }, { "epoch": 2.8905390245205407, "grad_norm": 0.4859122633934021, "learning_rate": 9.1513564626749e-07, "loss": 0.0367, "step": 270540 }, { "epoch": 2.89064586783482, "grad_norm": 0.18966497480869293, "learning_rate": 9.151262819151471e-07, "loss": 0.0419, "step": 270550 }, { "epoch": 2.8907527111490996, "grad_norm": 0.9372024536132812, "learning_rate": 9.151169170940953e-07, "loss": 0.0781, "step": 270560 }, { "epoch": 2.8908595544633795, "grad_norm": 0.08206797391176224, "learning_rate": 9.151075518043453e-07, "loss": 0.0021, "step": 270570 }, { "epoch": 2.890966397777659, "grad_norm": 0.15563389658927917, "learning_rate": 9.150981860459076e-07, "loss": 0.0149, "step": 270580 }, { "epoch": 2.891073241091939, "grad_norm": 8.937077522277832, "learning_rate": 9.150888198187927e-07, "loss": 0.0196, "step": 270590 }, { "epoch": 2.8911800844062183, "grad_norm": 0.029826445505023003, "learning_rate": 9.15079453123011e-07, "loss": 0.0197, "step": 270600 }, { "epoch": 2.891286927720498, "grad_norm": 7.654165267944336, "learning_rate": 9.150700859585736e-07, "loss": 0.0446, "step": 270610 }, { "epoch": 2.8913937710347772, "grad_norm": 5.551252841949463, "learning_rate": 9.150607183254905e-07, "loss": 0.0127, "step": 270620 }, { "epoch": 2.891500614349057, "grad_norm": 0.018064074218273163, "learning_rate": 9.150513502237725e-07, "loss": 0.0181, "step": 270630 }, { "epoch": 2.8916074576633366, "grad_norm": 0.5961391925811768, "learning_rate": 9.150419816534303e-07, "loss": 0.014, "step": 270640 }, { "epoch": 2.8917143009776165, "grad_norm": 0.0038689449429512024, "learning_rate": 9.150326126144743e-07, "loss": 0.0091, "step": 270650 }, { "epoch": 2.891821144291896, "grad_norm": 0.5487142205238342, "learning_rate": 9.150232431069152e-07, "loss": 0.0342, "step": 270660 }, { "epoch": 2.8919279876061754, "grad_norm": 2.2031466960906982, "learning_rate": 9.150138731307637e-07, "loss": 0.0085, "step": 270670 }, { "epoch": 2.892034830920455, "grad_norm": 1.5600866079330444, "learning_rate": 9.1500450268603e-07, "loss": 0.0225, "step": 270680 }, { "epoch": 2.892141674234735, "grad_norm": 0.7810661792755127, "learning_rate": 9.149951317727249e-07, "loss": 0.0162, "step": 270690 }, { "epoch": 2.8922485175490142, "grad_norm": 0.5757932662963867, "learning_rate": 9.149857603908591e-07, "loss": 0.0243, "step": 270700 }, { "epoch": 2.892355360863294, "grad_norm": 0.17713262140750885, "learning_rate": 9.14976388540443e-07, "loss": 0.0088, "step": 270710 }, { "epoch": 2.8924622041775736, "grad_norm": 0.21582235395908356, "learning_rate": 9.149670162214872e-07, "loss": 0.0225, "step": 270720 }, { "epoch": 2.892569047491853, "grad_norm": 8.9518404006958, "learning_rate": 9.149576434340023e-07, "loss": 0.013, "step": 270730 }, { "epoch": 2.892675890806133, "grad_norm": 0.09584242105484009, "learning_rate": 9.14948270177999e-07, "loss": 0.0013, "step": 270740 }, { "epoch": 2.8927827341204124, "grad_norm": 0.4582808017730713, "learning_rate": 9.149388964534878e-07, "loss": 0.009, "step": 270750 }, { "epoch": 2.892889577434692, "grad_norm": 0.4020620286464691, "learning_rate": 9.149295222604791e-07, "loss": 0.0094, "step": 270760 }, { "epoch": 2.892996420748972, "grad_norm": 2.666111946105957, "learning_rate": 9.149201475989838e-07, "loss": 0.0455, "step": 270770 }, { "epoch": 2.8931032640632512, "grad_norm": 0.038527049124240875, "learning_rate": 9.149107724690123e-07, "loss": 0.0161, "step": 270780 }, { "epoch": 2.8932101073775307, "grad_norm": 0.011104869656264782, "learning_rate": 9.149013968705752e-07, "loss": 0.0023, "step": 270790 }, { "epoch": 2.8933169506918106, "grad_norm": 0.4755825996398926, "learning_rate": 9.148920208036833e-07, "loss": 0.0051, "step": 270800 }, { "epoch": 2.89342379400609, "grad_norm": 0.6472517848014832, "learning_rate": 9.148826442683466e-07, "loss": 0.0161, "step": 270810 }, { "epoch": 2.8935306373203695, "grad_norm": 0.1136465072631836, "learning_rate": 9.148732672645764e-07, "loss": 0.0141, "step": 270820 }, { "epoch": 2.8936374806346494, "grad_norm": 0.3387502431869507, "learning_rate": 9.148638897923828e-07, "loss": 0.0039, "step": 270830 }, { "epoch": 2.893744323948929, "grad_norm": 9.076726913452148, "learning_rate": 9.148545118517767e-07, "loss": 0.0311, "step": 270840 }, { "epoch": 2.8938511672632083, "grad_norm": 1.4689427614212036, "learning_rate": 9.148451334427685e-07, "loss": 0.0097, "step": 270850 }, { "epoch": 2.8939580105774882, "grad_norm": 0.8982245922088623, "learning_rate": 9.148357545653688e-07, "loss": 0.0041, "step": 270860 }, { "epoch": 2.8940648538917677, "grad_norm": 2.158216714859009, "learning_rate": 9.148263752195882e-07, "loss": 0.0264, "step": 270870 }, { "epoch": 2.894171697206047, "grad_norm": 0.4183065891265869, "learning_rate": 9.148169954054374e-07, "loss": 0.0307, "step": 270880 }, { "epoch": 2.894278540520327, "grad_norm": 1.63201105594635, "learning_rate": 9.148076151229268e-07, "loss": 0.0008, "step": 270890 }, { "epoch": 2.8943853838346065, "grad_norm": 3.0764544010162354, "learning_rate": 9.147982343720672e-07, "loss": 0.0663, "step": 270900 }, { "epoch": 2.894492227148886, "grad_norm": 0.021838147193193436, "learning_rate": 9.14788853152869e-07, "loss": 0.0273, "step": 270910 }, { "epoch": 2.894599070463166, "grad_norm": 0.7563716769218445, "learning_rate": 9.147794714653429e-07, "loss": 0.0162, "step": 270920 }, { "epoch": 2.8947059137774453, "grad_norm": 0.8447108864784241, "learning_rate": 9.147700893094995e-07, "loss": 0.0133, "step": 270930 }, { "epoch": 2.8948127570917253, "grad_norm": 2.9290244579315186, "learning_rate": 9.147607066853493e-07, "loss": 0.0221, "step": 270940 }, { "epoch": 2.8949196004060047, "grad_norm": 0.14340735971927643, "learning_rate": 9.147513235929031e-07, "loss": 0.0419, "step": 270950 }, { "epoch": 2.895026443720284, "grad_norm": 12.348372459411621, "learning_rate": 9.147419400321712e-07, "loss": 0.0456, "step": 270960 }, { "epoch": 2.8951332870345636, "grad_norm": 0.07637178897857666, "learning_rate": 9.147325560031645e-07, "loss": 0.0125, "step": 270970 }, { "epoch": 2.8952401303488435, "grad_norm": 4.730197906494141, "learning_rate": 9.147231715058933e-07, "loss": 0.0328, "step": 270980 }, { "epoch": 2.895346973663123, "grad_norm": 0.8189948797225952, "learning_rate": 9.147137865403684e-07, "loss": 0.0826, "step": 270990 }, { "epoch": 2.895453816977403, "grad_norm": 5.106998920440674, "learning_rate": 9.147044011066003e-07, "loss": 0.0172, "step": 271000 }, { "epoch": 2.8955606602916824, "grad_norm": 0.026212479919195175, "learning_rate": 9.146950152045997e-07, "loss": 0.0529, "step": 271010 }, { "epoch": 2.895667503605962, "grad_norm": 0.09669984132051468, "learning_rate": 9.146856288343772e-07, "loss": 0.0326, "step": 271020 }, { "epoch": 2.8957743469202413, "grad_norm": 0.014451435767114162, "learning_rate": 9.14676241995943e-07, "loss": 0.0635, "step": 271030 }, { "epoch": 2.895881190234521, "grad_norm": 0.025515250861644745, "learning_rate": 9.146668546893084e-07, "loss": 0.0046, "step": 271040 }, { "epoch": 2.8959880335488006, "grad_norm": 0.0029517735820263624, "learning_rate": 9.146574669144834e-07, "loss": 0.0389, "step": 271050 }, { "epoch": 2.8960948768630805, "grad_norm": 0.06882438063621521, "learning_rate": 9.146480786714789e-07, "loss": 0.0075, "step": 271060 }, { "epoch": 2.89620172017736, "grad_norm": 0.23206081986427307, "learning_rate": 9.146386899603054e-07, "loss": 0.0198, "step": 271070 }, { "epoch": 2.8963085634916395, "grad_norm": 0.6794330477714539, "learning_rate": 9.146293007809736e-07, "loss": 0.0098, "step": 271080 }, { "epoch": 2.896415406805919, "grad_norm": 0.47282129526138306, "learning_rate": 9.14619911133494e-07, "loss": 0.0249, "step": 271090 }, { "epoch": 2.896522250120199, "grad_norm": 11.840490341186523, "learning_rate": 9.146105210178772e-07, "loss": 0.0232, "step": 271100 }, { "epoch": 2.8966290934344783, "grad_norm": 10.31275463104248, "learning_rate": 9.146011304341338e-07, "loss": 0.0397, "step": 271110 }, { "epoch": 2.896735936748758, "grad_norm": 0.04463418945670128, "learning_rate": 9.145917393822744e-07, "loss": 0.0126, "step": 271120 }, { "epoch": 2.8968427800630376, "grad_norm": 0.3842203617095947, "learning_rate": 9.145823478623097e-07, "loss": 0.0127, "step": 271130 }, { "epoch": 2.896949623377317, "grad_norm": 5.390582084655762, "learning_rate": 9.145729558742503e-07, "loss": 0.0377, "step": 271140 }, { "epoch": 2.8970564666915966, "grad_norm": 0.14718195796012878, "learning_rate": 9.145635634181065e-07, "loss": 0.0196, "step": 271150 }, { "epoch": 2.8971633100058765, "grad_norm": 2.050157070159912, "learning_rate": 9.145541704938895e-07, "loss": 0.0094, "step": 271160 }, { "epoch": 2.897270153320156, "grad_norm": 0.02890545129776001, "learning_rate": 9.145447771016092e-07, "loss": 0.0235, "step": 271170 }, { "epoch": 2.897376996634436, "grad_norm": 0.45716941356658936, "learning_rate": 9.145353832412769e-07, "loss": 0.0079, "step": 271180 }, { "epoch": 2.8974838399487153, "grad_norm": 0.006727840285748243, "learning_rate": 9.145259889129027e-07, "loss": 0.0097, "step": 271190 }, { "epoch": 2.8975906832629947, "grad_norm": 3.190382480621338, "learning_rate": 9.145165941164974e-07, "loss": 0.0133, "step": 271200 }, { "epoch": 2.897697526577274, "grad_norm": 0.08826690167188644, "learning_rate": 9.145071988520715e-07, "loss": 0.0087, "step": 271210 }, { "epoch": 2.897804369891554, "grad_norm": 12.134481430053711, "learning_rate": 9.144978031196357e-07, "loss": 0.0189, "step": 271220 }, { "epoch": 2.8979112132058336, "grad_norm": 5.168839454650879, "learning_rate": 9.144884069192008e-07, "loss": 0.0061, "step": 271230 }, { "epoch": 2.8980180565201135, "grad_norm": 0.1277056485414505, "learning_rate": 9.144790102507769e-07, "loss": 0.0244, "step": 271240 }, { "epoch": 2.898124899834393, "grad_norm": 0.5063913464546204, "learning_rate": 9.14469613114375e-07, "loss": 0.0193, "step": 271250 }, { "epoch": 2.8982317431486724, "grad_norm": 0.09649612754583359, "learning_rate": 9.144602155100058e-07, "loss": 0.0089, "step": 271260 }, { "epoch": 2.898338586462952, "grad_norm": 1.3276432752609253, "learning_rate": 9.144508174376796e-07, "loss": 0.0244, "step": 271270 }, { "epoch": 2.8984454297772317, "grad_norm": 0.12074323743581772, "learning_rate": 9.144414188974073e-07, "loss": 0.0105, "step": 271280 }, { "epoch": 2.898552273091511, "grad_norm": 0.07024073600769043, "learning_rate": 9.144320198891991e-07, "loss": 0.0347, "step": 271290 }, { "epoch": 2.898659116405791, "grad_norm": 1.6198487281799316, "learning_rate": 9.144226204130661e-07, "loss": 0.0094, "step": 271300 }, { "epoch": 2.8987659597200706, "grad_norm": 6.028568267822266, "learning_rate": 9.144132204690185e-07, "loss": 0.0322, "step": 271310 }, { "epoch": 2.89887280303435, "grad_norm": 5.625850677490234, "learning_rate": 9.144038200570672e-07, "loss": 0.0174, "step": 271320 }, { "epoch": 2.8989796463486295, "grad_norm": 0.06819720566272736, "learning_rate": 9.143944191772225e-07, "loss": 0.0074, "step": 271330 }, { "epoch": 2.8990864896629094, "grad_norm": 0.10978884249925613, "learning_rate": 9.143850178294956e-07, "loss": 0.0105, "step": 271340 }, { "epoch": 2.899193332977189, "grad_norm": 7.001487731933594, "learning_rate": 9.143756160138966e-07, "loss": 0.0157, "step": 271350 }, { "epoch": 2.8993001762914687, "grad_norm": 0.007380975876003504, "learning_rate": 9.143662137304362e-07, "loss": 0.0298, "step": 271360 }, { "epoch": 2.899407019605748, "grad_norm": 0.8156784772872925, "learning_rate": 9.143568109791251e-07, "loss": 0.0188, "step": 271370 }, { "epoch": 2.8995138629200277, "grad_norm": 0.039417166262865067, "learning_rate": 9.143474077599737e-07, "loss": 0.0464, "step": 271380 }, { "epoch": 2.899620706234307, "grad_norm": 0.4951551556587219, "learning_rate": 9.143380040729931e-07, "loss": 0.0378, "step": 271390 }, { "epoch": 2.899727549548587, "grad_norm": 0.03923792019486427, "learning_rate": 9.143285999181935e-07, "loss": 0.0129, "step": 271400 }, { "epoch": 2.8998343928628665, "grad_norm": 0.025177856907248497, "learning_rate": 9.143191952955856e-07, "loss": 0.0792, "step": 271410 }, { "epoch": 2.8999412361771464, "grad_norm": 20.267202377319336, "learning_rate": 9.143097902051802e-07, "loss": 0.0217, "step": 271420 }, { "epoch": 2.900048079491426, "grad_norm": 1.1980637311935425, "learning_rate": 9.143003846469877e-07, "loss": 0.0079, "step": 271430 }, { "epoch": 2.9001549228057053, "grad_norm": 2.290729522705078, "learning_rate": 9.142909786210187e-07, "loss": 0.0087, "step": 271440 }, { "epoch": 2.900261766119985, "grad_norm": 0.02025510184466839, "learning_rate": 9.142815721272839e-07, "loss": 0.0102, "step": 271450 }, { "epoch": 2.9003686094342647, "grad_norm": 0.23053143918514252, "learning_rate": 9.142721651657941e-07, "loss": 0.042, "step": 271460 }, { "epoch": 2.900475452748544, "grad_norm": 0.0075422897934913635, "learning_rate": 9.142627577365596e-07, "loss": 0.0096, "step": 271470 }, { "epoch": 2.900582296062824, "grad_norm": 0.01285579428076744, "learning_rate": 9.142533498395913e-07, "loss": 0.0158, "step": 271480 }, { "epoch": 2.9006891393771035, "grad_norm": 0.31946054100990295, "learning_rate": 9.142439414748997e-07, "loss": 0.0022, "step": 271490 }, { "epoch": 2.900795982691383, "grad_norm": 0.04131520912051201, "learning_rate": 9.142345326424954e-07, "loss": 0.0325, "step": 271500 }, { "epoch": 2.900902826005663, "grad_norm": 0.9049184918403625, "learning_rate": 9.14225123342389e-07, "loss": 0.0134, "step": 271510 }, { "epoch": 2.9010096693199423, "grad_norm": 0.014300349168479443, "learning_rate": 9.142157135745912e-07, "loss": 0.0226, "step": 271520 }, { "epoch": 2.9011165126342218, "grad_norm": 2.061699867248535, "learning_rate": 9.142063033391127e-07, "loss": 0.0068, "step": 271530 }, { "epoch": 2.9012233559485017, "grad_norm": 0.5467656850814819, "learning_rate": 9.141968926359639e-07, "loss": 0.0224, "step": 271540 }, { "epoch": 2.901330199262781, "grad_norm": 0.29539892077445984, "learning_rate": 9.141874814651555e-07, "loss": 0.0154, "step": 271550 }, { "epoch": 2.9014370425770606, "grad_norm": 14.254948616027832, "learning_rate": 9.141780698266984e-07, "loss": 0.034, "step": 271560 }, { "epoch": 2.9015438858913405, "grad_norm": 0.4721939265727997, "learning_rate": 9.141686577206028e-07, "loss": 0.0088, "step": 271570 }, { "epoch": 2.90165072920562, "grad_norm": 4.855579376220703, "learning_rate": 9.141592451468796e-07, "loss": 0.0184, "step": 271580 }, { "epoch": 2.9017575725198994, "grad_norm": 10.989609718322754, "learning_rate": 9.141498321055393e-07, "loss": 0.0348, "step": 271590 }, { "epoch": 2.9018644158341793, "grad_norm": 0.019860388711094856, "learning_rate": 9.141404185965927e-07, "loss": 0.0086, "step": 271600 }, { "epoch": 2.9019712591484588, "grad_norm": 9.710022926330566, "learning_rate": 9.141310046200501e-07, "loss": 0.0346, "step": 271610 }, { "epoch": 2.9020781024627382, "grad_norm": 0.079966239631176, "learning_rate": 9.141215901759225e-07, "loss": 0.011, "step": 271620 }, { "epoch": 2.902184945777018, "grad_norm": 5.592724800109863, "learning_rate": 9.141121752642203e-07, "loss": 0.0808, "step": 271630 }, { "epoch": 2.9022917890912976, "grad_norm": 0.15109162032604218, "learning_rate": 9.141027598849542e-07, "loss": 0.0094, "step": 271640 }, { "epoch": 2.9023986324055775, "grad_norm": 4.396624565124512, "learning_rate": 9.140933440381349e-07, "loss": 0.0616, "step": 271650 }, { "epoch": 2.902505475719857, "grad_norm": 0.019257621839642525, "learning_rate": 9.140839277237729e-07, "loss": 0.0093, "step": 271660 }, { "epoch": 2.9026123190341364, "grad_norm": 0.5805124044418335, "learning_rate": 9.140745109418791e-07, "loss": 0.029, "step": 271670 }, { "epoch": 2.902719162348416, "grad_norm": 0.0876573696732521, "learning_rate": 9.140650936924637e-07, "loss": 0.0282, "step": 271680 }, { "epoch": 2.9028260056626958, "grad_norm": 4.043865203857422, "learning_rate": 9.140556759755376e-07, "loss": 0.0028, "step": 271690 }, { "epoch": 2.9029328489769752, "grad_norm": 0.005633936729282141, "learning_rate": 9.140462577911113e-07, "loss": 0.0095, "step": 271700 }, { "epoch": 2.903039692291255, "grad_norm": 2.3917458057403564, "learning_rate": 9.140368391391957e-07, "loss": 0.0015, "step": 271710 }, { "epoch": 2.9031465356055346, "grad_norm": 0.04293200746178627, "learning_rate": 9.140274200198012e-07, "loss": 0.0136, "step": 271720 }, { "epoch": 2.903253378919814, "grad_norm": 1.655134677886963, "learning_rate": 9.140180004329385e-07, "loss": 0.0266, "step": 271730 }, { "epoch": 2.9033602222340935, "grad_norm": 2.0598113536834717, "learning_rate": 9.140085803786182e-07, "loss": 0.0061, "step": 271740 }, { "epoch": 2.9034670655483734, "grad_norm": 4.935462951660156, "learning_rate": 9.139991598568509e-07, "loss": 0.0241, "step": 271750 }, { "epoch": 2.903573908862653, "grad_norm": 12.932764053344727, "learning_rate": 9.139897388676474e-07, "loss": 0.0273, "step": 271760 }, { "epoch": 2.9036807521769328, "grad_norm": 0.04797063767910004, "learning_rate": 9.139803174110182e-07, "loss": 0.0223, "step": 271770 }, { "epoch": 2.9037875954912122, "grad_norm": 0.07083436846733093, "learning_rate": 9.13970895486974e-07, "loss": 0.0289, "step": 271780 }, { "epoch": 2.9038944388054917, "grad_norm": 6.628864765167236, "learning_rate": 9.139614730955254e-07, "loss": 0.0136, "step": 271790 }, { "epoch": 2.904001282119771, "grad_norm": 9.228484153747559, "learning_rate": 9.139520502366831e-07, "loss": 0.0085, "step": 271800 }, { "epoch": 2.904108125434051, "grad_norm": 2.485518217086792, "learning_rate": 9.139426269104575e-07, "loss": 0.0164, "step": 271810 }, { "epoch": 2.9042149687483305, "grad_norm": 0.023213326930999756, "learning_rate": 9.139332031168596e-07, "loss": 0.0219, "step": 271820 }, { "epoch": 2.9043218120626104, "grad_norm": 3.2117159366607666, "learning_rate": 9.139237788559e-07, "loss": 0.0216, "step": 271830 }, { "epoch": 2.90442865537689, "grad_norm": 0.16339026391506195, "learning_rate": 9.139143541275891e-07, "loss": 0.017, "step": 271840 }, { "epoch": 2.9045354986911693, "grad_norm": 0.030668038874864578, "learning_rate": 9.139049289319376e-07, "loss": 0.0211, "step": 271850 }, { "epoch": 2.904642342005449, "grad_norm": 0.024785717949271202, "learning_rate": 9.138955032689562e-07, "loss": 0.0177, "step": 271860 }, { "epoch": 2.9047491853197287, "grad_norm": 0.13533832132816315, "learning_rate": 9.138860771386556e-07, "loss": 0.0116, "step": 271870 }, { "epoch": 2.904856028634008, "grad_norm": 0.015818700194358826, "learning_rate": 9.138766505410463e-07, "loss": 0.0269, "step": 271880 }, { "epoch": 2.904962871948288, "grad_norm": 0.22029520571231842, "learning_rate": 9.138672234761391e-07, "loss": 0.0037, "step": 271890 }, { "epoch": 2.9050697152625675, "grad_norm": 0.004002165514975786, "learning_rate": 9.138577959439445e-07, "loss": 0.01, "step": 271900 }, { "epoch": 2.905176558576847, "grad_norm": 0.13961009681224823, "learning_rate": 9.138483679444732e-07, "loss": 0.0246, "step": 271910 }, { "epoch": 2.9052834018911264, "grad_norm": 9.089493751525879, "learning_rate": 9.13838939477736e-07, "loss": 0.013, "step": 271920 }, { "epoch": 2.9053902452054063, "grad_norm": 2.850583076477051, "learning_rate": 9.138295105437432e-07, "loss": 0.0276, "step": 271930 }, { "epoch": 2.905497088519686, "grad_norm": 0.0052102901972830296, "learning_rate": 9.138200811425058e-07, "loss": 0.0272, "step": 271940 }, { "epoch": 2.9056039318339657, "grad_norm": 0.0072927228175103664, "learning_rate": 9.138106512740343e-07, "loss": 0.0239, "step": 271950 }, { "epoch": 2.905710775148245, "grad_norm": 11.828205108642578, "learning_rate": 9.138012209383392e-07, "loss": 0.0193, "step": 271960 }, { "epoch": 2.9058176184625246, "grad_norm": 12.167250633239746, "learning_rate": 9.137917901354315e-07, "loss": 0.0347, "step": 271970 }, { "epoch": 2.905924461776804, "grad_norm": 0.009215231984853745, "learning_rate": 9.137823588653216e-07, "loss": 0.0219, "step": 271980 }, { "epoch": 2.906031305091084, "grad_norm": 1.1640779972076416, "learning_rate": 9.137729271280201e-07, "loss": 0.0145, "step": 271990 }, { "epoch": 2.9061381484053634, "grad_norm": 0.8397724628448486, "learning_rate": 9.137634949235378e-07, "loss": 0.0105, "step": 272000 }, { "epoch": 2.9062449917196433, "grad_norm": 0.046228740364313126, "learning_rate": 9.137540622518851e-07, "loss": 0.0589, "step": 272010 }, { "epoch": 2.906351835033923, "grad_norm": 1.870835542678833, "learning_rate": 9.13744629113073e-07, "loss": 0.0389, "step": 272020 }, { "epoch": 2.9064586783482023, "grad_norm": 1.7566314935684204, "learning_rate": 9.13735195507112e-07, "loss": 0.0427, "step": 272030 }, { "epoch": 2.9065655216624817, "grad_norm": 0.03551419451832771, "learning_rate": 9.137257614340127e-07, "loss": 0.0157, "step": 272040 }, { "epoch": 2.9066723649767616, "grad_norm": 0.098723866045475, "learning_rate": 9.137163268937857e-07, "loss": 0.033, "step": 272050 }, { "epoch": 2.906779208291041, "grad_norm": 0.012799086049199104, "learning_rate": 9.137068918864419e-07, "loss": 0.0086, "step": 272060 }, { "epoch": 2.906886051605321, "grad_norm": 0.043183647096157074, "learning_rate": 9.136974564119917e-07, "loss": 0.0197, "step": 272070 }, { "epoch": 2.9069928949196004, "grad_norm": 0.010836923494935036, "learning_rate": 9.136880204704459e-07, "loss": 0.0251, "step": 272080 }, { "epoch": 2.90709973823388, "grad_norm": 5.327620506286621, "learning_rate": 9.13678584061815e-07, "loss": 0.0334, "step": 272090 }, { "epoch": 2.9072065815481594, "grad_norm": 0.029889684170484543, "learning_rate": 9.1366914718611e-07, "loss": 0.0103, "step": 272100 }, { "epoch": 2.9073134248624393, "grad_norm": 0.015740856528282166, "learning_rate": 9.136597098433411e-07, "loss": 0.0229, "step": 272110 }, { "epoch": 2.9074202681767187, "grad_norm": 0.14161564409732819, "learning_rate": 9.136502720335192e-07, "loss": 0.0238, "step": 272120 }, { "epoch": 2.9075271114909986, "grad_norm": 4.213068008422852, "learning_rate": 9.13640833756655e-07, "loss": 0.0123, "step": 272130 }, { "epoch": 2.907633954805278, "grad_norm": 0.28401416540145874, "learning_rate": 9.13631395012759e-07, "loss": 0.0741, "step": 272140 }, { "epoch": 2.9077407981195575, "grad_norm": 0.9206047654151917, "learning_rate": 9.136219558018419e-07, "loss": 0.0034, "step": 272150 }, { "epoch": 2.907847641433837, "grad_norm": 0.005660200957208872, "learning_rate": 9.136125161239146e-07, "loss": 0.0089, "step": 272160 }, { "epoch": 2.907954484748117, "grad_norm": 0.3207308053970337, "learning_rate": 9.136030759789872e-07, "loss": 0.0265, "step": 272170 }, { "epoch": 2.9080613280623964, "grad_norm": 0.020646683871746063, "learning_rate": 9.135936353670711e-07, "loss": 0.0579, "step": 272180 }, { "epoch": 2.9081681713766763, "grad_norm": 0.978223443031311, "learning_rate": 9.135841942881764e-07, "loss": 0.002, "step": 272190 }, { "epoch": 2.9082750146909557, "grad_norm": 18.365140914916992, "learning_rate": 9.135747527423139e-07, "loss": 0.0147, "step": 272200 }, { "epoch": 2.908381858005235, "grad_norm": 0.535772442817688, "learning_rate": 9.135653107294943e-07, "loss": 0.0303, "step": 272210 }, { "epoch": 2.908488701319515, "grad_norm": 27.898019790649414, "learning_rate": 9.135558682497282e-07, "loss": 0.0366, "step": 272220 }, { "epoch": 2.9085955446337945, "grad_norm": 0.004519200883805752, "learning_rate": 9.135464253030263e-07, "loss": 0.0198, "step": 272230 }, { "epoch": 2.908702387948074, "grad_norm": 0.7938026785850525, "learning_rate": 9.135369818893994e-07, "loss": 0.0139, "step": 272240 }, { "epoch": 2.908809231262354, "grad_norm": 4.03827428817749, "learning_rate": 9.135275380088579e-07, "loss": 0.0147, "step": 272250 }, { "epoch": 2.9089160745766334, "grad_norm": 0.04605906829237938, "learning_rate": 9.135180936614127e-07, "loss": 0.0024, "step": 272260 }, { "epoch": 2.909022917890913, "grad_norm": 0.00815244484692812, "learning_rate": 9.135086488470745e-07, "loss": 0.0205, "step": 272270 }, { "epoch": 2.9091297612051927, "grad_norm": 0.028781762346625328, "learning_rate": 9.134992035658536e-07, "loss": 0.0067, "step": 272280 }, { "epoch": 2.909236604519472, "grad_norm": 0.010523336008191109, "learning_rate": 9.134897578177609e-07, "loss": 0.0102, "step": 272290 }, { "epoch": 2.9093434478337517, "grad_norm": 0.006785623729228973, "learning_rate": 9.134803116028072e-07, "loss": 0.0117, "step": 272300 }, { "epoch": 2.9094502911480316, "grad_norm": 0.005356082692742348, "learning_rate": 9.134708649210027e-07, "loss": 0.0247, "step": 272310 }, { "epoch": 2.909557134462311, "grad_norm": 0.21221201121807098, "learning_rate": 9.134614177723588e-07, "loss": 0.0162, "step": 272320 }, { "epoch": 2.9096639777765905, "grad_norm": 0.006942042149603367, "learning_rate": 9.134519701568856e-07, "loss": 0.0425, "step": 272330 }, { "epoch": 2.9097708210908704, "grad_norm": 0.285332590341568, "learning_rate": 9.134425220745938e-07, "loss": 0.027, "step": 272340 }, { "epoch": 2.90987766440515, "grad_norm": 2.0859086513519287, "learning_rate": 9.134330735254944e-07, "loss": 0.0119, "step": 272350 }, { "epoch": 2.9099845077194293, "grad_norm": 3.815985679626465, "learning_rate": 9.134236245095976e-07, "loss": 0.0205, "step": 272360 }, { "epoch": 2.910091351033709, "grad_norm": 5.962447643280029, "learning_rate": 9.134141750269146e-07, "loss": 0.0127, "step": 272370 }, { "epoch": 2.9101981943479887, "grad_norm": 0.01116207055747509, "learning_rate": 9.134047250774556e-07, "loss": 0.0398, "step": 272380 }, { "epoch": 2.910305037662268, "grad_norm": 0.2944406569004059, "learning_rate": 9.133952746612314e-07, "loss": 0.007, "step": 272390 }, { "epoch": 2.910411880976548, "grad_norm": 0.006370876915752888, "learning_rate": 9.133858237782531e-07, "loss": 0.0316, "step": 272400 }, { "epoch": 2.9105187242908275, "grad_norm": 0.9463651776313782, "learning_rate": 9.133763724285307e-07, "loss": 0.0272, "step": 272410 }, { "epoch": 2.9106255676051074, "grad_norm": 1.699941635131836, "learning_rate": 9.133669206120752e-07, "loss": 0.0119, "step": 272420 }, { "epoch": 2.910732410919387, "grad_norm": 0.07804373651742935, "learning_rate": 9.133574683288974e-07, "loss": 0.0273, "step": 272430 }, { "epoch": 2.9108392542336663, "grad_norm": 0.015273891389369965, "learning_rate": 9.133480155790077e-07, "loss": 0.0341, "step": 272440 }, { "epoch": 2.9109460975479458, "grad_norm": 0.3549876809120178, "learning_rate": 9.133385623624168e-07, "loss": 0.0117, "step": 272450 }, { "epoch": 2.9110529408622257, "grad_norm": 0.021434025838971138, "learning_rate": 9.133291086791356e-07, "loss": 0.0038, "step": 272460 }, { "epoch": 2.911159784176505, "grad_norm": 0.05844550579786301, "learning_rate": 9.133196545291746e-07, "loss": 0.0683, "step": 272470 }, { "epoch": 2.911266627490785, "grad_norm": 8.459214210510254, "learning_rate": 9.133101999125445e-07, "loss": 0.0362, "step": 272480 }, { "epoch": 2.9113734708050645, "grad_norm": 0.006209578365087509, "learning_rate": 9.13300744829256e-07, "loss": 0.0329, "step": 272490 }, { "epoch": 2.911480314119344, "grad_norm": 3.4084696769714355, "learning_rate": 9.132912892793199e-07, "loss": 0.0089, "step": 272500 }, { "epoch": 2.9115871574336234, "grad_norm": 8.726533889770508, "learning_rate": 9.132818332627465e-07, "loss": 0.0218, "step": 272510 }, { "epoch": 2.9116940007479033, "grad_norm": 0.8189821839332581, "learning_rate": 9.132723767795469e-07, "loss": 0.0132, "step": 272520 }, { "epoch": 2.9118008440621828, "grad_norm": 7.463510990142822, "learning_rate": 9.132629198297316e-07, "loss": 0.0849, "step": 272530 }, { "epoch": 2.9119076873764627, "grad_norm": 0.4494050145149231, "learning_rate": 9.132534624133113e-07, "loss": 0.007, "step": 272540 }, { "epoch": 2.912014530690742, "grad_norm": 0.01649939827620983, "learning_rate": 9.132440045302965e-07, "loss": 0.0108, "step": 272550 }, { "epoch": 2.9121213740050216, "grad_norm": 0.7615046501159668, "learning_rate": 9.13234546180698e-07, "loss": 0.0378, "step": 272560 }, { "epoch": 2.912228217319301, "grad_norm": 0.09058505296707153, "learning_rate": 9.132250873645266e-07, "loss": 0.0098, "step": 272570 }, { "epoch": 2.912335060633581, "grad_norm": 0.16531433165073395, "learning_rate": 9.132156280817929e-07, "loss": 0.008, "step": 272580 }, { "epoch": 2.9124419039478604, "grad_norm": 0.29771628975868225, "learning_rate": 9.132061683325076e-07, "loss": 0.0124, "step": 272590 }, { "epoch": 2.9125487472621403, "grad_norm": 0.2071717083454132, "learning_rate": 9.131967081166814e-07, "loss": 0.0139, "step": 272600 }, { "epoch": 2.9126555905764198, "grad_norm": 1.8220731019973755, "learning_rate": 9.131872474343247e-07, "loss": 0.0044, "step": 272610 }, { "epoch": 2.912762433890699, "grad_norm": 5.335940837860107, "learning_rate": 9.131777862854486e-07, "loss": 0.0123, "step": 272620 }, { "epoch": 2.9128692772049787, "grad_norm": 0.026114683598279953, "learning_rate": 9.131683246700637e-07, "loss": 0.007, "step": 272630 }, { "epoch": 2.9129761205192586, "grad_norm": 0.08653575927019119, "learning_rate": 9.131588625881803e-07, "loss": 0.06, "step": 272640 }, { "epoch": 2.913082963833538, "grad_norm": 3.612372875213623, "learning_rate": 9.131494000398095e-07, "loss": 0.0127, "step": 272650 }, { "epoch": 2.913189807147818, "grad_norm": 1.8414496183395386, "learning_rate": 9.13139937024962e-07, "loss": 0.0692, "step": 272660 }, { "epoch": 2.9132966504620974, "grad_norm": 0.16172441840171814, "learning_rate": 9.131304735436481e-07, "loss": 0.0055, "step": 272670 }, { "epoch": 2.913403493776377, "grad_norm": 0.36795660853385925, "learning_rate": 9.131210095958788e-07, "loss": 0.0346, "step": 272680 }, { "epoch": 2.9135103370906563, "grad_norm": 0.11027007550001144, "learning_rate": 9.131115451816647e-07, "loss": 0.0248, "step": 272690 }, { "epoch": 2.9136171804049362, "grad_norm": 0.191825270652771, "learning_rate": 9.131020803010164e-07, "loss": 0.0118, "step": 272700 }, { "epoch": 2.9137240237192157, "grad_norm": 0.0009438187116757035, "learning_rate": 9.130926149539447e-07, "loss": 0.013, "step": 272710 }, { "epoch": 2.9138308670334956, "grad_norm": 5.0765485763549805, "learning_rate": 9.130831491404603e-07, "loss": 0.1109, "step": 272720 }, { "epoch": 2.913937710347775, "grad_norm": 0.1496310979127884, "learning_rate": 9.130736828605739e-07, "loss": 0.0015, "step": 272730 }, { "epoch": 2.9140445536620545, "grad_norm": 10.265356063842773, "learning_rate": 9.130642161142961e-07, "loss": 0.018, "step": 272740 }, { "epoch": 2.914151396976334, "grad_norm": 2.6823103427886963, "learning_rate": 9.130547489016376e-07, "loss": 0.0076, "step": 272750 }, { "epoch": 2.914258240290614, "grad_norm": 1.1407887935638428, "learning_rate": 9.130452812226091e-07, "loss": 0.0158, "step": 272760 }, { "epoch": 2.9143650836048933, "grad_norm": 0.04463572800159454, "learning_rate": 9.130358130772213e-07, "loss": 0.0168, "step": 272770 }, { "epoch": 2.9144719269191732, "grad_norm": 1.8633002042770386, "learning_rate": 9.13026344465485e-07, "loss": 0.0203, "step": 272780 }, { "epoch": 2.9145787702334527, "grad_norm": 8.641117095947266, "learning_rate": 9.130168753874106e-07, "loss": 0.0076, "step": 272790 }, { "epoch": 2.914685613547732, "grad_norm": 3.1792356967926025, "learning_rate": 9.130074058430092e-07, "loss": 0.0046, "step": 272800 }, { "epoch": 2.9147924568620116, "grad_norm": 0.03309167921543121, "learning_rate": 9.129979358322911e-07, "loss": 0.0244, "step": 272810 }, { "epoch": 2.9148993001762915, "grad_norm": 0.04177938774228096, "learning_rate": 9.129884653552673e-07, "loss": 0.0034, "step": 272820 }, { "epoch": 2.915006143490571, "grad_norm": 2.161418914794922, "learning_rate": 9.129789944119481e-07, "loss": 0.0071, "step": 272830 }, { "epoch": 2.915112986804851, "grad_norm": 0.12324348092079163, "learning_rate": 9.129695230023448e-07, "loss": 0.014, "step": 272840 }, { "epoch": 2.9152198301191303, "grad_norm": 0.3285072147846222, "learning_rate": 9.129600511264674e-07, "loss": 0.0061, "step": 272850 }, { "epoch": 2.91532667343341, "grad_norm": 9.988182067871094, "learning_rate": 9.129505787843273e-07, "loss": 0.0501, "step": 272860 }, { "epoch": 2.9154335167476892, "grad_norm": 0.11503765732049942, "learning_rate": 9.129411059759344e-07, "loss": 0.0152, "step": 272870 }, { "epoch": 2.915540360061969, "grad_norm": 0.0033509330824017525, "learning_rate": 9.129316327013001e-07, "loss": 0.004, "step": 272880 }, { "epoch": 2.9156472033762486, "grad_norm": 2.1810426712036133, "learning_rate": 9.129221589604348e-07, "loss": 0.0231, "step": 272890 }, { "epoch": 2.9157540466905285, "grad_norm": 0.011384916491806507, "learning_rate": 9.129126847533491e-07, "loss": 0.0171, "step": 272900 }, { "epoch": 2.915860890004808, "grad_norm": 0.26380378007888794, "learning_rate": 9.12903210080054e-07, "loss": 0.0276, "step": 272910 }, { "epoch": 2.9159677333190874, "grad_norm": 1.969176173210144, "learning_rate": 9.128937349405599e-07, "loss": 0.0547, "step": 272920 }, { "epoch": 2.9160745766333673, "grad_norm": 0.11916883289813995, "learning_rate": 9.128842593348776e-07, "loss": 0.0145, "step": 272930 }, { "epoch": 2.916181419947647, "grad_norm": 0.05878976732492447, "learning_rate": 9.128747832630178e-07, "loss": 0.0154, "step": 272940 }, { "epoch": 2.9162882632619263, "grad_norm": 0.6925950050354004, "learning_rate": 9.128653067249912e-07, "loss": 0.0359, "step": 272950 }, { "epoch": 2.916395106576206, "grad_norm": 25.655988693237305, "learning_rate": 9.128558297208087e-07, "loss": 0.0507, "step": 272960 }, { "epoch": 2.9165019498904856, "grad_norm": 4.453115463256836, "learning_rate": 9.128463522504805e-07, "loss": 0.0232, "step": 272970 }, { "epoch": 2.916608793204765, "grad_norm": 0.5693838596343994, "learning_rate": 9.128368743140178e-07, "loss": 0.0067, "step": 272980 }, { "epoch": 2.916715636519045, "grad_norm": 2.570460796356201, "learning_rate": 9.128273959114311e-07, "loss": 0.0096, "step": 272990 }, { "epoch": 2.9168224798333244, "grad_norm": 8.364049911499023, "learning_rate": 9.128179170427311e-07, "loss": 0.0086, "step": 273000 }, { "epoch": 2.916929323147604, "grad_norm": 6.6825995445251465, "learning_rate": 9.128084377079285e-07, "loss": 0.0163, "step": 273010 }, { "epoch": 2.917036166461884, "grad_norm": 0.0032977478113025427, "learning_rate": 9.12798957907034e-07, "loss": 0.0325, "step": 273020 }, { "epoch": 2.9171430097761633, "grad_norm": 1.9066691398620605, "learning_rate": 9.127894776400583e-07, "loss": 0.0046, "step": 273030 }, { "epoch": 2.9172498530904427, "grad_norm": 0.010763661004602909, "learning_rate": 9.127799969070122e-07, "loss": 0.0187, "step": 273040 }, { "epoch": 2.9173566964047226, "grad_norm": 8.704292297363281, "learning_rate": 9.127705157079064e-07, "loss": 0.0159, "step": 273050 }, { "epoch": 2.917463539719002, "grad_norm": 0.24445606768131256, "learning_rate": 9.127610340427514e-07, "loss": 0.0208, "step": 273060 }, { "epoch": 2.9175703830332815, "grad_norm": 0.7400761246681213, "learning_rate": 9.127515519115581e-07, "loss": 0.0115, "step": 273070 }, { "epoch": 2.9176772263475614, "grad_norm": 0.005312115885317326, "learning_rate": 9.127420693143371e-07, "loss": 0.0116, "step": 273080 }, { "epoch": 2.917784069661841, "grad_norm": 0.12008768320083618, "learning_rate": 9.12732586251099e-07, "loss": 0.0228, "step": 273090 }, { "epoch": 2.9178909129761204, "grad_norm": 7.905004978179932, "learning_rate": 9.127231027218549e-07, "loss": 0.0399, "step": 273100 }, { "epoch": 2.9179977562904003, "grad_norm": 0.17840330302715302, "learning_rate": 9.127136187266153e-07, "loss": 0.002, "step": 273110 }, { "epoch": 2.9181045996046797, "grad_norm": 0.007469196803867817, "learning_rate": 9.127041342653907e-07, "loss": 0.0304, "step": 273120 }, { "epoch": 2.9182114429189596, "grad_norm": 0.7859047055244446, "learning_rate": 9.12694649338192e-07, "loss": 0.0158, "step": 273130 }, { "epoch": 2.918318286233239, "grad_norm": 0.06497423350811005, "learning_rate": 9.1268516394503e-07, "loss": 0.0066, "step": 273140 }, { "epoch": 2.9184251295475185, "grad_norm": 0.009677649475634098, "learning_rate": 9.126756780859153e-07, "loss": 0.0187, "step": 273150 }, { "epoch": 2.918531972861798, "grad_norm": 0.11217798292636871, "learning_rate": 9.126661917608585e-07, "loss": 0.0137, "step": 273160 }, { "epoch": 2.918638816176078, "grad_norm": 1.2423923015594482, "learning_rate": 9.126567049698705e-07, "loss": 0.0136, "step": 273170 }, { "epoch": 2.9187456594903574, "grad_norm": 0.003727623028680682, "learning_rate": 9.12647217712962e-07, "loss": 0.0044, "step": 273180 }, { "epoch": 2.9188525028046373, "grad_norm": 1.0330873727798462, "learning_rate": 9.126377299901435e-07, "loss": 0.0059, "step": 273190 }, { "epoch": 2.9189593461189167, "grad_norm": 3.7156856060028076, "learning_rate": 9.12628241801426e-07, "loss": 0.0055, "step": 273200 }, { "epoch": 2.919066189433196, "grad_norm": 0.014597756788134575, "learning_rate": 9.126187531468199e-07, "loss": 0.022, "step": 273210 }, { "epoch": 2.9191730327474756, "grad_norm": 0.12103672325611115, "learning_rate": 9.126092640263362e-07, "loss": 0.0216, "step": 273220 }, { "epoch": 2.9192798760617555, "grad_norm": 1.0465654134750366, "learning_rate": 9.125997744399856e-07, "loss": 0.0293, "step": 273230 }, { "epoch": 2.919386719376035, "grad_norm": 1.2343244552612305, "learning_rate": 9.125902843877785e-07, "loss": 0.0122, "step": 273240 }, { "epoch": 2.919493562690315, "grad_norm": 0.012423859909176826, "learning_rate": 9.12580793869726e-07, "loss": 0.0254, "step": 273250 }, { "epoch": 2.9196004060045944, "grad_norm": 2.7735249996185303, "learning_rate": 9.125713028858386e-07, "loss": 0.0141, "step": 273260 }, { "epoch": 2.919707249318874, "grad_norm": 1.3407793045043945, "learning_rate": 9.12561811436127e-07, "loss": 0.0167, "step": 273270 }, { "epoch": 2.9198140926331533, "grad_norm": 6.2734246253967285, "learning_rate": 9.12552319520602e-07, "loss": 0.0257, "step": 273280 }, { "epoch": 2.919920935947433, "grad_norm": 0.07256768643856049, "learning_rate": 9.125428271392743e-07, "loss": 0.032, "step": 273290 }, { "epoch": 2.9200277792617126, "grad_norm": 2.007996082305908, "learning_rate": 9.125333342921546e-07, "loss": 0.0379, "step": 273300 }, { "epoch": 2.9201346225759925, "grad_norm": 0.9638180136680603, "learning_rate": 9.125238409792538e-07, "loss": 0.0153, "step": 273310 }, { "epoch": 2.920241465890272, "grad_norm": 0.009189043194055557, "learning_rate": 9.125143472005823e-07, "loss": 0.0294, "step": 273320 }, { "epoch": 2.9203483092045515, "grad_norm": 0.0805351585149765, "learning_rate": 9.125048529561509e-07, "loss": 0.0086, "step": 273330 }, { "epoch": 2.920455152518831, "grad_norm": 0.003597499104216695, "learning_rate": 9.124953582459705e-07, "loss": 0.029, "step": 273340 }, { "epoch": 2.920561995833111, "grad_norm": 1.2138763666152954, "learning_rate": 9.124858630700515e-07, "loss": 0.0154, "step": 273350 }, { "epoch": 2.9206688391473903, "grad_norm": 0.011269780807197094, "learning_rate": 9.124763674284051e-07, "loss": 0.0208, "step": 273360 }, { "epoch": 2.92077568246167, "grad_norm": 2.234171152114868, "learning_rate": 9.124668713210416e-07, "loss": 0.0105, "step": 273370 }, { "epoch": 2.9208825257759496, "grad_norm": 0.14187012612819672, "learning_rate": 9.12457374747972e-07, "loss": 0.0147, "step": 273380 }, { "epoch": 2.920989369090229, "grad_norm": 4.159200668334961, "learning_rate": 9.124478777092068e-07, "loss": 0.02, "step": 273390 }, { "epoch": 2.9210962124045086, "grad_norm": 0.11083360016345978, "learning_rate": 9.124383802047568e-07, "loss": 0.0281, "step": 273400 }, { "epoch": 2.9212030557187885, "grad_norm": 0.9156175851821899, "learning_rate": 9.124288822346328e-07, "loss": 0.0109, "step": 273410 }, { "epoch": 2.921309899033068, "grad_norm": 4.992588520050049, "learning_rate": 9.124193837988454e-07, "loss": 0.0335, "step": 273420 }, { "epoch": 2.921416742347348, "grad_norm": 0.8367701172828674, "learning_rate": 9.124098848974055e-07, "loss": 0.0325, "step": 273430 }, { "epoch": 2.9215235856616273, "grad_norm": 0.03970695659518242, "learning_rate": 9.124003855303236e-07, "loss": 0.0244, "step": 273440 }, { "epoch": 2.9216304289759067, "grad_norm": 0.9792876243591309, "learning_rate": 9.123908856976106e-07, "loss": 0.026, "step": 273450 }, { "epoch": 2.921737272290186, "grad_norm": 0.009210307151079178, "learning_rate": 9.123813853992771e-07, "loss": 0.0111, "step": 273460 }, { "epoch": 2.921844115604466, "grad_norm": 0.014617310836911201, "learning_rate": 9.123718846353339e-07, "loss": 0.0289, "step": 273470 }, { "epoch": 2.9219509589187456, "grad_norm": 2.0374515056610107, "learning_rate": 9.123623834057918e-07, "loss": 0.0217, "step": 273480 }, { "epoch": 2.9220578022330255, "grad_norm": 2.599815607070923, "learning_rate": 9.123528817106614e-07, "loss": 0.043, "step": 273490 }, { "epoch": 2.922164645547305, "grad_norm": 0.22809730470180511, "learning_rate": 9.123433795499535e-07, "loss": 0.0474, "step": 273500 }, { "epoch": 2.9222714888615844, "grad_norm": 0.32515645027160645, "learning_rate": 9.123338769236788e-07, "loss": 0.0194, "step": 273510 }, { "epoch": 2.922378332175864, "grad_norm": 0.006285787560045719, "learning_rate": 9.12324373831848e-07, "loss": 0.1053, "step": 273520 }, { "epoch": 2.9224851754901437, "grad_norm": 0.8874306678771973, "learning_rate": 9.123148702744719e-07, "loss": 0.0199, "step": 273530 }, { "epoch": 2.922592018804423, "grad_norm": 0.23071256279945374, "learning_rate": 9.123053662515612e-07, "loss": 0.012, "step": 273540 }, { "epoch": 2.922698862118703, "grad_norm": 0.016027454286813736, "learning_rate": 9.122958617631264e-07, "loss": 0.0129, "step": 273550 }, { "epoch": 2.9228057054329826, "grad_norm": 1.2457714080810547, "learning_rate": 9.122863568091787e-07, "loss": 0.0138, "step": 273560 }, { "epoch": 2.922912548747262, "grad_norm": 0.07543580234050751, "learning_rate": 9.122768513897286e-07, "loss": 0.0362, "step": 273570 }, { "epoch": 2.9230193920615415, "grad_norm": 0.01622350513935089, "learning_rate": 9.122673455047867e-07, "loss": 0.0139, "step": 273580 }, { "epoch": 2.9231262353758214, "grad_norm": 0.10802626609802246, "learning_rate": 9.122578391543639e-07, "loss": 0.0269, "step": 273590 }, { "epoch": 2.923233078690101, "grad_norm": 0.048693303018808365, "learning_rate": 9.122483323384709e-07, "loss": 0.0288, "step": 273600 }, { "epoch": 2.9233399220043808, "grad_norm": 0.016163384541869164, "learning_rate": 9.122388250571184e-07, "loss": 0.0016, "step": 273610 }, { "epoch": 2.92344676531866, "grad_norm": 6.373363971710205, "learning_rate": 9.122293173103173e-07, "loss": 0.0158, "step": 273620 }, { "epoch": 2.9235536086329397, "grad_norm": 0.13153758645057678, "learning_rate": 9.122198090980779e-07, "loss": 0.0224, "step": 273630 }, { "epoch": 2.923660451947219, "grad_norm": 0.12766531109809875, "learning_rate": 9.122103004204114e-07, "loss": 0.0072, "step": 273640 }, { "epoch": 2.923767295261499, "grad_norm": 0.01469174399971962, "learning_rate": 9.122007912773284e-07, "loss": 0.0123, "step": 273650 }, { "epoch": 2.9238741385757785, "grad_norm": 3.591205596923828, "learning_rate": 9.121912816688395e-07, "loss": 0.0449, "step": 273660 }, { "epoch": 2.9239809818900584, "grad_norm": 4.174437999725342, "learning_rate": 9.121817715949556e-07, "loss": 0.0364, "step": 273670 }, { "epoch": 2.924087825204338, "grad_norm": 0.013422450982034206, "learning_rate": 9.121722610556872e-07, "loss": 0.0105, "step": 273680 }, { "epoch": 2.9241946685186173, "grad_norm": 6.1056928634643555, "learning_rate": 9.121627500510454e-07, "loss": 0.0179, "step": 273690 }, { "epoch": 2.924301511832897, "grad_norm": 0.012688417918980122, "learning_rate": 9.121532385810407e-07, "loss": 0.001, "step": 273700 }, { "epoch": 2.9244083551471767, "grad_norm": 0.003970898687839508, "learning_rate": 9.121437266456839e-07, "loss": 0.001, "step": 273710 }, { "epoch": 2.924515198461456, "grad_norm": 1.499708890914917, "learning_rate": 9.121342142449857e-07, "loss": 0.0064, "step": 273720 }, { "epoch": 2.924622041775736, "grad_norm": 1.5529332160949707, "learning_rate": 9.121247013789568e-07, "loss": 0.024, "step": 273730 }, { "epoch": 2.9247288850900155, "grad_norm": 0.04042727127671242, "learning_rate": 9.121151880476081e-07, "loss": 0.0168, "step": 273740 }, { "epoch": 2.924835728404295, "grad_norm": 2.660567045211792, "learning_rate": 9.121056742509503e-07, "loss": 0.0223, "step": 273750 }, { "epoch": 2.924942571718575, "grad_norm": 2.5734782218933105, "learning_rate": 9.12096159988994e-07, "loss": 0.0102, "step": 273760 }, { "epoch": 2.9250494150328543, "grad_norm": 0.0032820887863636017, "learning_rate": 9.1208664526175e-07, "loss": 0.0347, "step": 273770 }, { "epoch": 2.9251562583471338, "grad_norm": 0.005310379434376955, "learning_rate": 9.120771300692291e-07, "loss": 0.0058, "step": 273780 }, { "epoch": 2.9252631016614137, "grad_norm": 0.00928408931940794, "learning_rate": 9.120676144114421e-07, "loss": 0.0133, "step": 273790 }, { "epoch": 2.925369944975693, "grad_norm": 0.6729149222373962, "learning_rate": 9.120580982883996e-07, "loss": 0.01, "step": 273800 }, { "epoch": 2.9254767882899726, "grad_norm": 0.008716132491827011, "learning_rate": 9.120485817001125e-07, "loss": 0.0204, "step": 273810 }, { "epoch": 2.9255836316042525, "grad_norm": 0.10847533494234085, "learning_rate": 9.120390646465913e-07, "loss": 0.0356, "step": 273820 }, { "epoch": 2.925690474918532, "grad_norm": 0.021982086822390556, "learning_rate": 9.12029547127847e-07, "loss": 0.0058, "step": 273830 }, { "epoch": 2.9257973182328114, "grad_norm": 0.35319289565086365, "learning_rate": 9.120200291438902e-07, "loss": 0.0083, "step": 273840 }, { "epoch": 2.9259041615470913, "grad_norm": 0.008285549469292164, "learning_rate": 9.120105106947317e-07, "loss": 0.0146, "step": 273850 }, { "epoch": 2.926011004861371, "grad_norm": 1.2622942924499512, "learning_rate": 9.120009917803822e-07, "loss": 0.023, "step": 273860 }, { "epoch": 2.9261178481756502, "grad_norm": 3.2743988037109375, "learning_rate": 9.119914724008526e-07, "loss": 0.0206, "step": 273870 }, { "epoch": 2.92622469148993, "grad_norm": 0.12450157105922699, "learning_rate": 9.119819525561534e-07, "loss": 0.0065, "step": 273880 }, { "epoch": 2.9263315348042096, "grad_norm": 0.010571924038231373, "learning_rate": 9.119724322462956e-07, "loss": 0.0191, "step": 273890 }, { "epoch": 2.9264383781184895, "grad_norm": 3.3816733360290527, "learning_rate": 9.119629114712897e-07, "loss": 0.0683, "step": 273900 }, { "epoch": 2.926545221432769, "grad_norm": 0.7218390107154846, "learning_rate": 9.119533902311467e-07, "loss": 0.0135, "step": 273910 }, { "epoch": 2.9266520647470484, "grad_norm": 9.686360359191895, "learning_rate": 9.119438685258771e-07, "loss": 0.0077, "step": 273920 }, { "epoch": 2.926758908061328, "grad_norm": 8.067962646484375, "learning_rate": 9.11934346355492e-07, "loss": 0.0541, "step": 273930 }, { "epoch": 2.926865751375608, "grad_norm": 1.948123574256897, "learning_rate": 9.119248237200017e-07, "loss": 0.0111, "step": 273940 }, { "epoch": 2.9269725946898872, "grad_norm": 0.5398308634757996, "learning_rate": 9.119153006194173e-07, "loss": 0.0354, "step": 273950 }, { "epoch": 2.927079438004167, "grad_norm": 7.50641393661499, "learning_rate": 9.119057770537495e-07, "loss": 0.0596, "step": 273960 }, { "epoch": 2.9271862813184466, "grad_norm": 1.7646777629852295, "learning_rate": 9.118962530230088e-07, "loss": 0.0135, "step": 273970 }, { "epoch": 2.927293124632726, "grad_norm": 0.14760448038578033, "learning_rate": 9.118867285272063e-07, "loss": 0.0219, "step": 273980 }, { "epoch": 2.9273999679470055, "grad_norm": 1.1465129852294922, "learning_rate": 9.118772035663525e-07, "loss": 0.0116, "step": 273990 }, { "epoch": 2.9275068112612854, "grad_norm": 1.734561800956726, "learning_rate": 9.118676781404584e-07, "loss": 0.0187, "step": 274000 }, { "epoch": 2.927613654575565, "grad_norm": 0.19090834259986877, "learning_rate": 9.118581522495345e-07, "loss": 0.0644, "step": 274010 }, { "epoch": 2.927720497889845, "grad_norm": 0.06287848204374313, "learning_rate": 9.118486258935916e-07, "loss": 0.0198, "step": 274020 }, { "epoch": 2.9278273412041242, "grad_norm": 0.2793514132499695, "learning_rate": 9.118390990726405e-07, "loss": 0.0445, "step": 274030 }, { "epoch": 2.9279341845184037, "grad_norm": 1.486549735069275, "learning_rate": 9.118295717866921e-07, "loss": 0.0012, "step": 274040 }, { "epoch": 2.928041027832683, "grad_norm": 1.4175609350204468, "learning_rate": 9.11820044035757e-07, "loss": 0.0214, "step": 274050 }, { "epoch": 2.928147871146963, "grad_norm": 0.7475242018699646, "learning_rate": 9.118105158198461e-07, "loss": 0.0032, "step": 274060 }, { "epoch": 2.9282547144612425, "grad_norm": 1.059970736503601, "learning_rate": 9.118009871389698e-07, "loss": 0.0169, "step": 274070 }, { "epoch": 2.9283615577755224, "grad_norm": 1.0090465545654297, "learning_rate": 9.117914579931394e-07, "loss": 0.0161, "step": 274080 }, { "epoch": 2.928468401089802, "grad_norm": 4.631870746612549, "learning_rate": 9.117819283823652e-07, "loss": 0.0158, "step": 274090 }, { "epoch": 2.9285752444040813, "grad_norm": 3.611982822418213, "learning_rate": 9.11772398306658e-07, "loss": 0.0222, "step": 274100 }, { "epoch": 2.928682087718361, "grad_norm": 0.5012236833572388, "learning_rate": 9.117628677660288e-07, "loss": 0.0097, "step": 274110 }, { "epoch": 2.9287889310326407, "grad_norm": 0.8891732096672058, "learning_rate": 9.117533367604882e-07, "loss": 0.0098, "step": 274120 }, { "epoch": 2.92889577434692, "grad_norm": 0.022392800077795982, "learning_rate": 9.117438052900471e-07, "loss": 0.0021, "step": 274130 }, { "epoch": 2.9290026176612, "grad_norm": 0.0022345797624439, "learning_rate": 9.117342733547162e-07, "loss": 0.0377, "step": 274140 }, { "epoch": 2.9291094609754795, "grad_norm": 3.3686959743499756, "learning_rate": 9.117247409545061e-07, "loss": 0.0098, "step": 274150 }, { "epoch": 2.929216304289759, "grad_norm": 0.20381391048431396, "learning_rate": 9.117152080894277e-07, "loss": 0.0183, "step": 274160 }, { "epoch": 2.9293231476040384, "grad_norm": 0.005525112152099609, "learning_rate": 9.117056747594918e-07, "loss": 0.0046, "step": 274170 }, { "epoch": 2.9294299909183183, "grad_norm": 0.009063336066901684, "learning_rate": 9.116961409647093e-07, "loss": 0.018, "step": 274180 }, { "epoch": 2.929536834232598, "grad_norm": 0.006978529971092939, "learning_rate": 9.116866067050906e-07, "loss": 0.0419, "step": 274190 }, { "epoch": 2.9296436775468777, "grad_norm": 13.392451286315918, "learning_rate": 9.116770719806466e-07, "loss": 0.0293, "step": 274200 }, { "epoch": 2.929750520861157, "grad_norm": 0.5265982747077942, "learning_rate": 9.116675367913881e-07, "loss": 0.014, "step": 274210 }, { "epoch": 2.9298573641754366, "grad_norm": 0.018459394574165344, "learning_rate": 9.116580011373261e-07, "loss": 0.0112, "step": 274220 }, { "epoch": 2.929964207489716, "grad_norm": 0.994774580001831, "learning_rate": 9.11648465018471e-07, "loss": 0.0182, "step": 274230 }, { "epoch": 2.930071050803996, "grad_norm": 1.4989678859710693, "learning_rate": 9.116389284348336e-07, "loss": 0.0067, "step": 274240 }, { "epoch": 2.9301778941182755, "grad_norm": 0.4290367364883423, "learning_rate": 9.116293913864249e-07, "loss": 0.033, "step": 274250 }, { "epoch": 2.9302847374325554, "grad_norm": 0.002657797187566757, "learning_rate": 9.116198538732556e-07, "loss": 0.0109, "step": 274260 }, { "epoch": 2.930391580746835, "grad_norm": 0.11407045274972916, "learning_rate": 9.116103158953363e-07, "loss": 0.0333, "step": 274270 }, { "epoch": 2.9304984240611143, "grad_norm": 0.0488315150141716, "learning_rate": 9.11600777452678e-07, "loss": 0.0059, "step": 274280 }, { "epoch": 2.9306052673753937, "grad_norm": 1.8024437427520752, "learning_rate": 9.115912385452913e-07, "loss": 0.0255, "step": 274290 }, { "epoch": 2.9307121106896736, "grad_norm": 0.12857522070407867, "learning_rate": 9.115816991731871e-07, "loss": 0.0059, "step": 274300 }, { "epoch": 2.930818954003953, "grad_norm": 0.06695455312728882, "learning_rate": 9.115721593363759e-07, "loss": 0.0121, "step": 274310 }, { "epoch": 2.930925797318233, "grad_norm": 0.07126667350530624, "learning_rate": 9.115626190348689e-07, "loss": 0.001, "step": 274320 }, { "epoch": 2.9310326406325125, "grad_norm": 0.03783467411994934, "learning_rate": 9.115530782686765e-07, "loss": 0.0041, "step": 274330 }, { "epoch": 2.931139483946792, "grad_norm": 3.2795217037200928, "learning_rate": 9.115435370378097e-07, "loss": 0.0165, "step": 274340 }, { "epoch": 2.9312463272610714, "grad_norm": 0.9865552186965942, "learning_rate": 9.115339953422792e-07, "loss": 0.009, "step": 274350 }, { "epoch": 2.9313531705753513, "grad_norm": 0.10267970710992813, "learning_rate": 9.115244531820957e-07, "loss": 0.0269, "step": 274360 }, { "epoch": 2.9314600138896307, "grad_norm": 2.4103121757507324, "learning_rate": 9.1151491055727e-07, "loss": 0.0651, "step": 274370 }, { "epoch": 2.9315668572039106, "grad_norm": 0.16660115122795105, "learning_rate": 9.115053674678131e-07, "loss": 0.0067, "step": 274380 }, { "epoch": 2.93167370051819, "grad_norm": 0.2639649212360382, "learning_rate": 9.114958239137354e-07, "loss": 0.0151, "step": 274390 }, { "epoch": 2.9317805438324696, "grad_norm": 0.790490448474884, "learning_rate": 9.114862798950478e-07, "loss": 0.0094, "step": 274400 }, { "epoch": 2.9318873871467495, "grad_norm": 0.017606712877750397, "learning_rate": 9.114767354117612e-07, "loss": 0.0201, "step": 274410 }, { "epoch": 2.931994230461029, "grad_norm": 3.5646848678588867, "learning_rate": 9.114671904638865e-07, "loss": 0.0156, "step": 274420 }, { "epoch": 2.9321010737753084, "grad_norm": 4.721549987792969, "learning_rate": 9.114576450514339e-07, "loss": 0.0494, "step": 274430 }, { "epoch": 2.9322079170895883, "grad_norm": 0.019195755943655968, "learning_rate": 9.11448099174415e-07, "loss": 0.0097, "step": 274440 }, { "epoch": 2.9323147604038677, "grad_norm": 5.443733215332031, "learning_rate": 9.114385528328399e-07, "loss": 0.0072, "step": 274450 }, { "epoch": 2.932421603718147, "grad_norm": 0.1919090747833252, "learning_rate": 9.114290060267196e-07, "loss": 0.0036, "step": 274460 }, { "epoch": 2.932528447032427, "grad_norm": 0.1271485835313797, "learning_rate": 9.114194587560648e-07, "loss": 0.0226, "step": 274470 }, { "epoch": 2.9326352903467066, "grad_norm": 0.10227085649967194, "learning_rate": 9.114099110208866e-07, "loss": 0.0068, "step": 274480 }, { "epoch": 2.932742133660986, "grad_norm": 0.2730763852596283, "learning_rate": 9.114003628211955e-07, "loss": 0.0192, "step": 274490 }, { "epoch": 2.932848976975266, "grad_norm": 0.1958918422460556, "learning_rate": 9.113908141570023e-07, "loss": 0.0175, "step": 274500 }, { "epoch": 2.9329558202895454, "grad_norm": 0.1424742043018341, "learning_rate": 9.113812650283179e-07, "loss": 0.0396, "step": 274510 }, { "epoch": 2.933062663603825, "grad_norm": 2.800740957260132, "learning_rate": 9.11371715435153e-07, "loss": 0.0159, "step": 274520 }, { "epoch": 2.9331695069181047, "grad_norm": 0.031204242259263992, "learning_rate": 9.113621653775183e-07, "loss": 0.016, "step": 274530 }, { "epoch": 2.933276350232384, "grad_norm": 0.018166372552514076, "learning_rate": 9.113526148554247e-07, "loss": 0.0015, "step": 274540 }, { "epoch": 2.9333831935466637, "grad_norm": 0.023344915360212326, "learning_rate": 9.11343063868883e-07, "loss": 0.0253, "step": 274550 }, { "epoch": 2.9334900368609436, "grad_norm": 0.9742194414138794, "learning_rate": 9.113335124179039e-07, "loss": 0.0141, "step": 274560 }, { "epoch": 2.933596880175223, "grad_norm": 6.447198867797852, "learning_rate": 9.113239605024982e-07, "loss": 0.0207, "step": 274570 }, { "epoch": 2.9337037234895025, "grad_norm": 0.4284498393535614, "learning_rate": 9.113144081226767e-07, "loss": 0.0022, "step": 274580 }, { "epoch": 2.9338105668037824, "grad_norm": 4.267665386199951, "learning_rate": 9.113048552784501e-07, "loss": 0.0167, "step": 274590 }, { "epoch": 2.933917410118062, "grad_norm": 0.15635085105895996, "learning_rate": 9.112953019698295e-07, "loss": 0.0126, "step": 274600 }, { "epoch": 2.9340242534323417, "grad_norm": 0.48467186093330383, "learning_rate": 9.112857481968254e-07, "loss": 0.0092, "step": 274610 }, { "epoch": 2.934131096746621, "grad_norm": 0.04735614359378815, "learning_rate": 9.112761939594485e-07, "loss": 0.017, "step": 274620 }, { "epoch": 2.9342379400609007, "grad_norm": 0.1537816971540451, "learning_rate": 9.112666392577098e-07, "loss": 0.0214, "step": 274630 }, { "epoch": 2.93434478337518, "grad_norm": 0.33721765875816345, "learning_rate": 9.112570840916201e-07, "loss": 0.0225, "step": 274640 }, { "epoch": 2.93445162668946, "grad_norm": 0.07369235903024673, "learning_rate": 9.112475284611902e-07, "loss": 0.0078, "step": 274650 }, { "epoch": 2.9345584700037395, "grad_norm": 0.6921921372413635, "learning_rate": 9.112379723664305e-07, "loss": 0.012, "step": 274660 }, { "epoch": 2.9346653133180194, "grad_norm": 9.865527153015137, "learning_rate": 9.112284158073524e-07, "loss": 0.0153, "step": 274670 }, { "epoch": 2.934772156632299, "grad_norm": 2.991037368774414, "learning_rate": 9.112188587839662e-07, "loss": 0.0362, "step": 274680 }, { "epoch": 2.9348789999465783, "grad_norm": 2.971431016921997, "learning_rate": 9.112093012962829e-07, "loss": 0.0311, "step": 274690 }, { "epoch": 2.9349858432608578, "grad_norm": 3.7306602001190186, "learning_rate": 9.111997433443133e-07, "loss": 0.0409, "step": 274700 }, { "epoch": 2.9350926865751377, "grad_norm": 0.512882649898529, "learning_rate": 9.111901849280682e-07, "loss": 0.0014, "step": 274710 }, { "epoch": 2.935199529889417, "grad_norm": 0.61895352602005, "learning_rate": 9.111806260475583e-07, "loss": 0.039, "step": 274720 }, { "epoch": 2.935306373203697, "grad_norm": 0.10441520810127258, "learning_rate": 9.111710667027943e-07, "loss": 0.0045, "step": 274730 }, { "epoch": 2.9354132165179765, "grad_norm": 0.048897236585617065, "learning_rate": 9.111615068937873e-07, "loss": 0.0059, "step": 274740 }, { "epoch": 2.935520059832256, "grad_norm": 0.2887868881225586, "learning_rate": 9.111519466205479e-07, "loss": 0.0114, "step": 274750 }, { "epoch": 2.9356269031465354, "grad_norm": 0.10143163055181503, "learning_rate": 9.111423858830869e-07, "loss": 0.0054, "step": 274760 }, { "epoch": 2.9357337464608153, "grad_norm": 3.272047519683838, "learning_rate": 9.111328246814151e-07, "loss": 0.0292, "step": 274770 }, { "epoch": 2.9358405897750948, "grad_norm": 0.32273292541503906, "learning_rate": 9.111232630155434e-07, "loss": 0.0206, "step": 274780 }, { "epoch": 2.9359474330893747, "grad_norm": 0.04094173014163971, "learning_rate": 9.111137008854823e-07, "loss": 0.0052, "step": 274790 }, { "epoch": 2.936054276403654, "grad_norm": 0.006388641428202391, "learning_rate": 9.11104138291243e-07, "loss": 0.0144, "step": 274800 }, { "epoch": 2.9361611197179336, "grad_norm": 5.870082855224609, "learning_rate": 9.110945752328361e-07, "loss": 0.0081, "step": 274810 }, { "epoch": 2.936267963032213, "grad_norm": 1.506740689277649, "learning_rate": 9.110850117102724e-07, "loss": 0.03, "step": 274820 }, { "epoch": 2.936374806346493, "grad_norm": 0.09079303592443466, "learning_rate": 9.110754477235626e-07, "loss": 0.0028, "step": 274830 }, { "epoch": 2.9364816496607724, "grad_norm": 0.006268829107284546, "learning_rate": 9.110658832727176e-07, "loss": 0.0054, "step": 274840 }, { "epoch": 2.9365884929750523, "grad_norm": 0.017797499895095825, "learning_rate": 9.110563183577482e-07, "loss": 0.0038, "step": 274850 }, { "epoch": 2.9366953362893318, "grad_norm": 7.288347244262695, "learning_rate": 9.110467529786653e-07, "loss": 0.0468, "step": 274860 }, { "epoch": 2.9368021796036112, "grad_norm": 6.823060512542725, "learning_rate": 9.110371871354795e-07, "loss": 0.0219, "step": 274870 }, { "epoch": 2.9369090229178907, "grad_norm": 0.009141789749264717, "learning_rate": 9.110276208282017e-07, "loss": 0.0655, "step": 274880 }, { "epoch": 2.9370158662321706, "grad_norm": 0.0057282354682683945, "learning_rate": 9.110180540568427e-07, "loss": 0.0857, "step": 274890 }, { "epoch": 2.93712270954645, "grad_norm": 1.291892170906067, "learning_rate": 9.110084868214133e-07, "loss": 0.0094, "step": 274900 }, { "epoch": 2.93722955286073, "grad_norm": 0.08554849028587341, "learning_rate": 9.109989191219243e-07, "loss": 0.0124, "step": 274910 }, { "epoch": 2.9373363961750094, "grad_norm": 0.04155882075428963, "learning_rate": 9.109893509583865e-07, "loss": 0.0212, "step": 274920 }, { "epoch": 2.937443239489289, "grad_norm": 0.006703122053295374, "learning_rate": 9.109797823308107e-07, "loss": 0.0096, "step": 274930 }, { "epoch": 2.9375500828035683, "grad_norm": 0.0030004261061549187, "learning_rate": 9.109702132392076e-07, "loss": 0.0606, "step": 274940 }, { "epoch": 2.9376569261178482, "grad_norm": 14.348271369934082, "learning_rate": 9.109606436835884e-07, "loss": 0.0546, "step": 274950 }, { "epoch": 2.9377637694321277, "grad_norm": 0.009315880946815014, "learning_rate": 9.109510736639634e-07, "loss": 0.0278, "step": 274960 }, { "epoch": 2.9378706127464076, "grad_norm": 1.5866804122924805, "learning_rate": 9.109415031803436e-07, "loss": 0.0124, "step": 274970 }, { "epoch": 2.937977456060687, "grad_norm": 3.6725800037384033, "learning_rate": 9.109319322327399e-07, "loss": 0.0158, "step": 274980 }, { "epoch": 2.9380842993749665, "grad_norm": 0.07993404567241669, "learning_rate": 9.10922360821163e-07, "loss": 0.0043, "step": 274990 }, { "epoch": 2.938191142689246, "grad_norm": 0.3010275363922119, "learning_rate": 9.109127889456238e-07, "loss": 0.0059, "step": 275000 }, { "epoch": 2.938297986003526, "grad_norm": 1.8970720767974854, "learning_rate": 9.10903216606133e-07, "loss": 0.0086, "step": 275010 }, { "epoch": 2.9384048293178053, "grad_norm": 9.821846961975098, "learning_rate": 9.108936438027015e-07, "loss": 0.0637, "step": 275020 }, { "epoch": 2.9385116726320852, "grad_norm": 8.155255317687988, "learning_rate": 9.1088407053534e-07, "loss": 0.0262, "step": 275030 }, { "epoch": 2.9386185159463647, "grad_norm": 0.0251576267182827, "learning_rate": 9.108744968040593e-07, "loss": 0.0422, "step": 275040 }, { "epoch": 2.938725359260644, "grad_norm": 3.5059051513671875, "learning_rate": 9.108649226088705e-07, "loss": 0.0217, "step": 275050 }, { "epoch": 2.9388322025749236, "grad_norm": 13.768280982971191, "learning_rate": 9.10855347949784e-07, "loss": 0.0533, "step": 275060 }, { "epoch": 2.9389390458892035, "grad_norm": 0.008575798943638802, "learning_rate": 9.108457728268109e-07, "loss": 0.0083, "step": 275070 }, { "epoch": 2.939045889203483, "grad_norm": 0.6275701522827148, "learning_rate": 9.108361972399618e-07, "loss": 0.0155, "step": 275080 }, { "epoch": 2.939152732517763, "grad_norm": 0.21077579259872437, "learning_rate": 9.10826621189248e-07, "loss": 0.0471, "step": 275090 }, { "epoch": 2.9392595758320423, "grad_norm": 0.11629699915647507, "learning_rate": 9.108170446746794e-07, "loss": 0.0187, "step": 275100 }, { "epoch": 2.939366419146322, "grad_norm": 0.10389529168605804, "learning_rate": 9.108074676962677e-07, "loss": 0.0109, "step": 275110 }, { "epoch": 2.9394732624606013, "grad_norm": 0.7838039398193359, "learning_rate": 9.107978902540233e-07, "loss": 0.0077, "step": 275120 }, { "epoch": 2.939580105774881, "grad_norm": 3.6001806259155273, "learning_rate": 9.107883123479571e-07, "loss": 0.0425, "step": 275130 }, { "epoch": 2.9396869490891606, "grad_norm": 10.090742111206055, "learning_rate": 9.107787339780798e-07, "loss": 0.0391, "step": 275140 }, { "epoch": 2.9397937924034405, "grad_norm": 0.16939789056777954, "learning_rate": 9.107691551444023e-07, "loss": 0.0072, "step": 275150 }, { "epoch": 2.93990063571772, "grad_norm": 0.15383295714855194, "learning_rate": 9.107595758469356e-07, "loss": 0.0198, "step": 275160 }, { "epoch": 2.9400074790319994, "grad_norm": 4.571253299713135, "learning_rate": 9.107499960856901e-07, "loss": 0.0096, "step": 275170 }, { "epoch": 2.9401143223462793, "grad_norm": 4.590834617614746, "learning_rate": 9.10740415860677e-07, "loss": 0.0201, "step": 275180 }, { "epoch": 2.940221165660559, "grad_norm": 0.006972495000809431, "learning_rate": 9.10730835171907e-07, "loss": 0.0162, "step": 275190 }, { "epoch": 2.9403280089748383, "grad_norm": 0.5747405886650085, "learning_rate": 9.107212540193907e-07, "loss": 0.0779, "step": 275200 }, { "epoch": 2.940434852289118, "grad_norm": 7.014262676239014, "learning_rate": 9.107116724031393e-07, "loss": 0.0236, "step": 275210 }, { "epoch": 2.9405416956033976, "grad_norm": 0.0004993276088498533, "learning_rate": 9.107020903231633e-07, "loss": 0.0331, "step": 275220 }, { "epoch": 2.940648538917677, "grad_norm": 0.0031431233510375023, "learning_rate": 9.106925077794738e-07, "loss": 0.004, "step": 275230 }, { "epoch": 2.940755382231957, "grad_norm": 1.2924543619155884, "learning_rate": 9.106829247720813e-07, "loss": 0.0525, "step": 275240 }, { "epoch": 2.9408622255462364, "grad_norm": 0.14394932985305786, "learning_rate": 9.106733413009968e-07, "loss": 0.0273, "step": 275250 }, { "epoch": 2.940969068860516, "grad_norm": 1.9914578199386597, "learning_rate": 9.106637573662313e-07, "loss": 0.0317, "step": 275260 }, { "epoch": 2.941075912174796, "grad_norm": 0.17956258356571198, "learning_rate": 9.106541729677951e-07, "loss": 0.051, "step": 275270 }, { "epoch": 2.9411827554890753, "grad_norm": 4.231794357299805, "learning_rate": 9.106445881056995e-07, "loss": 0.0174, "step": 275280 }, { "epoch": 2.9412895988033547, "grad_norm": 0.06068814918398857, "learning_rate": 9.106350027799552e-07, "loss": 0.007, "step": 275290 }, { "epoch": 2.9413964421176346, "grad_norm": 10.596293449401855, "learning_rate": 9.106254169905729e-07, "loss": 0.0263, "step": 275300 }, { "epoch": 2.941503285431914, "grad_norm": 0.14668430387973785, "learning_rate": 9.106158307375637e-07, "loss": 0.0028, "step": 275310 }, { "epoch": 2.9416101287461935, "grad_norm": 3.7634809017181396, "learning_rate": 9.106062440209381e-07, "loss": 0.048, "step": 275320 }, { "epoch": 2.9417169720604734, "grad_norm": 0.534494161605835, "learning_rate": 9.10596656840707e-07, "loss": 0.0719, "step": 275330 }, { "epoch": 2.941823815374753, "grad_norm": 3.0744802951812744, "learning_rate": 9.105870691968815e-07, "loss": 0.0106, "step": 275340 }, { "epoch": 2.9419306586890324, "grad_norm": 4.587790489196777, "learning_rate": 9.10577481089472e-07, "loss": 0.0074, "step": 275350 }, { "epoch": 2.9420375020033123, "grad_norm": 0.006596102844923735, "learning_rate": 9.105678925184896e-07, "loss": 0.0045, "step": 275360 }, { "epoch": 2.9421443453175917, "grad_norm": 0.08434629440307617, "learning_rate": 9.105583034839451e-07, "loss": 0.0279, "step": 275370 }, { "epoch": 2.9422511886318716, "grad_norm": 0.24482667446136475, "learning_rate": 9.105487139858492e-07, "loss": 0.0015, "step": 275380 }, { "epoch": 2.942358031946151, "grad_norm": 3.204300880432129, "learning_rate": 9.105391240242129e-07, "loss": 0.0093, "step": 275390 }, { "epoch": 2.9424648752604305, "grad_norm": 2.534273386001587, "learning_rate": 9.105295335990469e-07, "loss": 0.0071, "step": 275400 }, { "epoch": 2.94257171857471, "grad_norm": 2.864555835723877, "learning_rate": 9.105199427103621e-07, "loss": 0.0075, "step": 275410 }, { "epoch": 2.94267856188899, "grad_norm": 0.029693234711885452, "learning_rate": 9.105103513581693e-07, "loss": 0.0013, "step": 275420 }, { "epoch": 2.9427854052032694, "grad_norm": 2.091434955596924, "learning_rate": 9.105007595424794e-07, "loss": 0.0231, "step": 275430 }, { "epoch": 2.9428922485175493, "grad_norm": 0.013090578839182854, "learning_rate": 9.104911672633031e-07, "loss": 0.0235, "step": 275440 }, { "epoch": 2.9429990918318287, "grad_norm": 0.07618828862905502, "learning_rate": 9.104815745206513e-07, "loss": 0.0126, "step": 275450 }, { "epoch": 2.943105935146108, "grad_norm": 0.4976131319999695, "learning_rate": 9.104719813145347e-07, "loss": 0.0029, "step": 275460 }, { "epoch": 2.9432127784603876, "grad_norm": 0.07194098830223083, "learning_rate": 9.104623876449644e-07, "loss": 0.0146, "step": 275470 }, { "epoch": 2.9433196217746675, "grad_norm": 0.008774728514254093, "learning_rate": 9.10452793511951e-07, "loss": 0.0099, "step": 275480 }, { "epoch": 2.943426465088947, "grad_norm": 0.031010620296001434, "learning_rate": 9.104431989155055e-07, "loss": 0.0229, "step": 275490 }, { "epoch": 2.943533308403227, "grad_norm": 0.038208797574043274, "learning_rate": 9.104336038556386e-07, "loss": 0.0085, "step": 275500 }, { "epoch": 2.9436401517175064, "grad_norm": 0.6256581544876099, "learning_rate": 9.104240083323612e-07, "loss": 0.0217, "step": 275510 }, { "epoch": 2.943746995031786, "grad_norm": 0.6745672225952148, "learning_rate": 9.104144123456842e-07, "loss": 0.0128, "step": 275520 }, { "epoch": 2.9438538383460653, "grad_norm": 0.04060734063386917, "learning_rate": 9.104048158956182e-07, "loss": 0.0038, "step": 275530 }, { "epoch": 2.943960681660345, "grad_norm": 0.008032881654798985, "learning_rate": 9.103952189821742e-07, "loss": 0.0126, "step": 275540 }, { "epoch": 2.9440675249746247, "grad_norm": 1.5219024419784546, "learning_rate": 9.103856216053632e-07, "loss": 0.0144, "step": 275550 }, { "epoch": 2.9441743682889046, "grad_norm": 1.091068148612976, "learning_rate": 9.103760237651956e-07, "loss": 0.0392, "step": 275560 }, { "epoch": 2.944281211603184, "grad_norm": 27.704538345336914, "learning_rate": 9.103664254616828e-07, "loss": 0.0184, "step": 275570 }, { "epoch": 2.9443880549174635, "grad_norm": 4.690637111663818, "learning_rate": 9.103568266948352e-07, "loss": 0.0147, "step": 275580 }, { "epoch": 2.944494898231743, "grad_norm": 0.021768921986222267, "learning_rate": 9.103472274646636e-07, "loss": 0.0139, "step": 275590 }, { "epoch": 2.944601741546023, "grad_norm": 1.6565619707107544, "learning_rate": 9.103376277711791e-07, "loss": 0.0388, "step": 275600 }, { "epoch": 2.9447085848603023, "grad_norm": 5.044412612915039, "learning_rate": 9.103280276143925e-07, "loss": 0.0239, "step": 275610 }, { "epoch": 2.944815428174582, "grad_norm": 0.03901562839746475, "learning_rate": 9.103184269943147e-07, "loss": 0.0659, "step": 275620 }, { "epoch": 2.9449222714888617, "grad_norm": 0.4424895942211151, "learning_rate": 9.103088259109562e-07, "loss": 0.008, "step": 275630 }, { "epoch": 2.945029114803141, "grad_norm": 1.0579657554626465, "learning_rate": 9.102992243643282e-07, "loss": 0.0433, "step": 275640 }, { "epoch": 2.9451359581174206, "grad_norm": 2.438549041748047, "learning_rate": 9.102896223544414e-07, "loss": 0.0277, "step": 275650 }, { "epoch": 2.9452428014317005, "grad_norm": 1.322609305381775, "learning_rate": 9.102800198813068e-07, "loss": 0.0088, "step": 275660 }, { "epoch": 2.94534964474598, "grad_norm": 0.022920146584510803, "learning_rate": 9.102704169449349e-07, "loss": 0.0363, "step": 275670 }, { "epoch": 2.94545648806026, "grad_norm": 0.012634013779461384, "learning_rate": 9.102608135453367e-07, "loss": 0.0854, "step": 275680 }, { "epoch": 2.9455633313745393, "grad_norm": 0.057138592004776, "learning_rate": 9.102512096825232e-07, "loss": 0.0257, "step": 275690 }, { "epoch": 2.9456701746888188, "grad_norm": 4.49506950378418, "learning_rate": 9.10241605356505e-07, "loss": 0.0503, "step": 275700 }, { "epoch": 2.945777018003098, "grad_norm": 0.030530188232660294, "learning_rate": 9.102320005672931e-07, "loss": 0.0269, "step": 275710 }, { "epoch": 2.945883861317378, "grad_norm": 0.08426302671432495, "learning_rate": 9.102223953148984e-07, "loss": 0.0092, "step": 275720 }, { "epoch": 2.9459907046316576, "grad_norm": 1.54286789894104, "learning_rate": 9.102127895993316e-07, "loss": 0.0369, "step": 275730 }, { "epoch": 2.9460975479459375, "grad_norm": 8.222992897033691, "learning_rate": 9.102031834206035e-07, "loss": 0.0098, "step": 275740 }, { "epoch": 2.946204391260217, "grad_norm": 0.34098178148269653, "learning_rate": 9.101935767787252e-07, "loss": 0.0185, "step": 275750 }, { "epoch": 2.9463112345744964, "grad_norm": 0.16650021076202393, "learning_rate": 9.101839696737074e-07, "loss": 0.0047, "step": 275760 }, { "epoch": 2.946418077888776, "grad_norm": 0.33716079592704773, "learning_rate": 9.101743621055609e-07, "loss": 0.02, "step": 275770 }, { "epoch": 2.9465249212030558, "grad_norm": 0.005948718637228012, "learning_rate": 9.101647540742965e-07, "loss": 0.0119, "step": 275780 }, { "epoch": 2.946631764517335, "grad_norm": 5.073652267456055, "learning_rate": 9.101551455799252e-07, "loss": 0.048, "step": 275790 }, { "epoch": 2.946738607831615, "grad_norm": 0.056241463869810104, "learning_rate": 9.101455366224577e-07, "loss": 0.0162, "step": 275800 }, { "epoch": 2.9468454511458946, "grad_norm": 4.689908027648926, "learning_rate": 9.101359272019051e-07, "loss": 0.0161, "step": 275810 }, { "epoch": 2.946952294460174, "grad_norm": 0.5769286751747131, "learning_rate": 9.101263173182781e-07, "loss": 0.0216, "step": 275820 }, { "epoch": 2.9470591377744535, "grad_norm": 0.004417846444994211, "learning_rate": 9.101167069715874e-07, "loss": 0.0334, "step": 275830 }, { "epoch": 2.9471659810887334, "grad_norm": 0.21150146424770355, "learning_rate": 9.10107096161844e-07, "loss": 0.0151, "step": 275840 }, { "epoch": 2.947272824403013, "grad_norm": 2.2111411094665527, "learning_rate": 9.100974848890587e-07, "loss": 0.0175, "step": 275850 }, { "epoch": 2.9473796677172928, "grad_norm": 0.02033247798681259, "learning_rate": 9.100878731532424e-07, "loss": 0.0066, "step": 275860 }, { "epoch": 2.947486511031572, "grad_norm": 5.036607265472412, "learning_rate": 9.10078260954406e-07, "loss": 0.0417, "step": 275870 }, { "epoch": 2.9475933543458517, "grad_norm": 0.10936613380908966, "learning_rate": 9.100686482925604e-07, "loss": 0.0139, "step": 275880 }, { "epoch": 2.9477001976601316, "grad_norm": 0.054012320935726166, "learning_rate": 9.100590351677162e-07, "loss": 0.0228, "step": 275890 }, { "epoch": 2.947807040974411, "grad_norm": 1.3369964361190796, "learning_rate": 9.100494215798844e-07, "loss": 0.0138, "step": 275900 }, { "epoch": 2.9479138842886905, "grad_norm": 5.588523864746094, "learning_rate": 9.100398075290758e-07, "loss": 0.009, "step": 275910 }, { "epoch": 2.9480207276029704, "grad_norm": 0.1283961683511734, "learning_rate": 9.100301930153015e-07, "loss": 0.0302, "step": 275920 }, { "epoch": 2.94812757091725, "grad_norm": 8.951491355895996, "learning_rate": 9.10020578038572e-07, "loss": 0.0579, "step": 275930 }, { "epoch": 2.9482344142315293, "grad_norm": 0.508840799331665, "learning_rate": 9.100109625988983e-07, "loss": 0.0022, "step": 275940 }, { "epoch": 2.9483412575458092, "grad_norm": 0.05144788697361946, "learning_rate": 9.100013466962915e-07, "loss": 0.0027, "step": 275950 }, { "epoch": 2.9484481008600887, "grad_norm": 10.294477462768555, "learning_rate": 9.099917303307621e-07, "loss": 0.0209, "step": 275960 }, { "epoch": 2.948554944174368, "grad_norm": 0.781559407711029, "learning_rate": 9.099821135023211e-07, "loss": 0.0126, "step": 275970 }, { "epoch": 2.948661787488648, "grad_norm": 3.667005777359009, "learning_rate": 9.099724962109793e-07, "loss": 0.0047, "step": 275980 }, { "epoch": 2.9487686308029275, "grad_norm": 1.6841644048690796, "learning_rate": 9.099628784567477e-07, "loss": 0.0113, "step": 275990 }, { "epoch": 2.948875474117207, "grad_norm": 0.04483111575245857, "learning_rate": 9.099532602396369e-07, "loss": 0.0266, "step": 276000 }, { "epoch": 2.948982317431487, "grad_norm": 0.04763980582356453, "learning_rate": 9.09943641559658e-07, "loss": 0.0503, "step": 276010 }, { "epoch": 2.9490891607457663, "grad_norm": 4.698731422424316, "learning_rate": 9.09934022416822e-07, "loss": 0.0081, "step": 276020 }, { "epoch": 2.949196004060046, "grad_norm": 5.709558486938477, "learning_rate": 9.099244028111393e-07, "loss": 0.0266, "step": 276030 }, { "epoch": 2.9493028473743257, "grad_norm": 2.194427967071533, "learning_rate": 9.09914782742621e-07, "loss": 0.0087, "step": 276040 }, { "epoch": 2.949409690688605, "grad_norm": 1.8861836194992065, "learning_rate": 9.099051622112781e-07, "loss": 0.016, "step": 276050 }, { "epoch": 2.9495165340028846, "grad_norm": 4.407150745391846, "learning_rate": 9.098955412171214e-07, "loss": 0.0445, "step": 276060 }, { "epoch": 2.9496233773171645, "grad_norm": 1.6561437845230103, "learning_rate": 9.098859197601616e-07, "loss": 0.0338, "step": 276070 }, { "epoch": 2.949730220631444, "grad_norm": 5.57005500793457, "learning_rate": 9.098762978404096e-07, "loss": 0.0079, "step": 276080 }, { "epoch": 2.949837063945724, "grad_norm": 2.3911983966827393, "learning_rate": 9.098666754578765e-07, "loss": 0.034, "step": 276090 }, { "epoch": 2.9499439072600033, "grad_norm": 0.010634645819664001, "learning_rate": 9.098570526125729e-07, "loss": 0.0208, "step": 276100 }, { "epoch": 2.950050750574283, "grad_norm": 0.010526864789426327, "learning_rate": 9.098474293045097e-07, "loss": 0.0149, "step": 276110 }, { "epoch": 2.9501575938885622, "grad_norm": 0.04046901315450668, "learning_rate": 9.098378055336979e-07, "loss": 0.0184, "step": 276120 }, { "epoch": 2.950264437202842, "grad_norm": 0.1622716188430786, "learning_rate": 9.098281813001481e-07, "loss": 0.0135, "step": 276130 }, { "epoch": 2.9503712805171216, "grad_norm": 2.3017349243164062, "learning_rate": 9.098185566038715e-07, "loss": 0.0184, "step": 276140 }, { "epoch": 2.9504781238314015, "grad_norm": 12.363489151000977, "learning_rate": 9.098089314448789e-07, "loss": 0.0458, "step": 276150 }, { "epoch": 2.950584967145681, "grad_norm": 0.0125358197838068, "learning_rate": 9.09799305823181e-07, "loss": 0.0189, "step": 276160 }, { "epoch": 2.9506918104599604, "grad_norm": 2.112116813659668, "learning_rate": 9.097896797387887e-07, "loss": 0.0296, "step": 276170 }, { "epoch": 2.95079865377424, "grad_norm": 9.004016876220703, "learning_rate": 9.097800531917129e-07, "loss": 0.0256, "step": 276180 }, { "epoch": 2.95090549708852, "grad_norm": 0.06075510382652283, "learning_rate": 9.097704261819645e-07, "loss": 0.0009, "step": 276190 }, { "epoch": 2.9510123404027992, "grad_norm": 0.04129989817738533, "learning_rate": 9.097607987095546e-07, "loss": 0.0133, "step": 276200 }, { "epoch": 2.951119183717079, "grad_norm": 2.0251755714416504, "learning_rate": 9.097511707744936e-07, "loss": 0.0196, "step": 276210 }, { "epoch": 2.9512260270313586, "grad_norm": 3.164808988571167, "learning_rate": 9.097415423767926e-07, "loss": 0.01, "step": 276220 }, { "epoch": 2.951332870345638, "grad_norm": 0.842750072479248, "learning_rate": 9.097319135164627e-07, "loss": 0.0033, "step": 276230 }, { "epoch": 2.9514397136599175, "grad_norm": 0.18181642889976501, "learning_rate": 9.097222841935143e-07, "loss": 0.0094, "step": 276240 }, { "epoch": 2.9515465569741974, "grad_norm": 0.0038804346695542336, "learning_rate": 9.097126544079585e-07, "loss": 0.0059, "step": 276250 }, { "epoch": 2.951653400288477, "grad_norm": 3.0652554035186768, "learning_rate": 9.097030241598063e-07, "loss": 0.008, "step": 276260 }, { "epoch": 2.951760243602757, "grad_norm": 1.9567524194717407, "learning_rate": 9.096933934490685e-07, "loss": 0.0128, "step": 276270 }, { "epoch": 2.9518670869170363, "grad_norm": 4.309112071990967, "learning_rate": 9.096837622757558e-07, "loss": 0.0188, "step": 276280 }, { "epoch": 2.9519739302313157, "grad_norm": 0.06714647263288498, "learning_rate": 9.096741306398794e-07, "loss": 0.0385, "step": 276290 }, { "epoch": 2.952080773545595, "grad_norm": 4.219645977020264, "learning_rate": 9.096644985414498e-07, "loss": 0.0212, "step": 276300 }, { "epoch": 2.952187616859875, "grad_norm": 0.15363657474517822, "learning_rate": 9.096548659804782e-07, "loss": 0.039, "step": 276310 }, { "epoch": 2.9522944601741545, "grad_norm": 0.1063198447227478, "learning_rate": 9.096452329569753e-07, "loss": 0.075, "step": 276320 }, { "epoch": 2.9524013034884344, "grad_norm": 0.06979494541883469, "learning_rate": 9.09635599470952e-07, "loss": 0.0528, "step": 276330 }, { "epoch": 2.952508146802714, "grad_norm": 1.9798604249954224, "learning_rate": 9.096259655224192e-07, "loss": 0.0109, "step": 276340 }, { "epoch": 2.9526149901169934, "grad_norm": 0.2819571793079376, "learning_rate": 9.096163311113878e-07, "loss": 0.0077, "step": 276350 }, { "epoch": 2.952721833431273, "grad_norm": 7.973641395568848, "learning_rate": 9.096066962378685e-07, "loss": 0.0339, "step": 276360 }, { "epoch": 2.9528286767455527, "grad_norm": 0.10259748995304108, "learning_rate": 9.095970609018726e-07, "loss": 0.0287, "step": 276370 }, { "epoch": 2.952935520059832, "grad_norm": 0.38044893741607666, "learning_rate": 9.095874251034106e-07, "loss": 0.0223, "step": 276380 }, { "epoch": 2.953042363374112, "grad_norm": 0.009141369722783566, "learning_rate": 9.095777888424933e-07, "loss": 0.0068, "step": 276390 }, { "epoch": 2.9531492066883915, "grad_norm": 0.005856877192854881, "learning_rate": 9.095681521191319e-07, "loss": 0.0007, "step": 276400 }, { "epoch": 2.953256050002671, "grad_norm": 0.1109856590628624, "learning_rate": 9.095585149333371e-07, "loss": 0.052, "step": 276410 }, { "epoch": 2.9533628933169505, "grad_norm": 0.007191935554146767, "learning_rate": 9.0954887728512e-07, "loss": 0.0053, "step": 276420 }, { "epoch": 2.9534697366312304, "grad_norm": 0.021820399910211563, "learning_rate": 9.095392391744912e-07, "loss": 0.0158, "step": 276430 }, { "epoch": 2.95357657994551, "grad_norm": 0.005459866486489773, "learning_rate": 9.095296006014615e-07, "loss": 0.0053, "step": 276440 }, { "epoch": 2.9536834232597897, "grad_norm": 0.014668137766420841, "learning_rate": 9.095199615660423e-07, "loss": 0.01, "step": 276450 }, { "epoch": 2.953790266574069, "grad_norm": 0.020661132410168648, "learning_rate": 9.095103220682439e-07, "loss": 0.032, "step": 276460 }, { "epoch": 2.9538971098883486, "grad_norm": 7.302680015563965, "learning_rate": 9.095006821080776e-07, "loss": 0.0316, "step": 276470 }, { "epoch": 2.954003953202628, "grad_norm": 2.5852999687194824, "learning_rate": 9.094910416855539e-07, "loss": 0.0277, "step": 276480 }, { "epoch": 2.954110796516908, "grad_norm": 0.020829016342759132, "learning_rate": 9.094814008006842e-07, "loss": 0.028, "step": 276490 }, { "epoch": 2.9542176398311875, "grad_norm": 0.036796312779188156, "learning_rate": 9.094717594534789e-07, "loss": 0.0393, "step": 276500 }, { "epoch": 2.9543244831454674, "grad_norm": 0.2763957977294922, "learning_rate": 9.094621176439491e-07, "loss": 0.0372, "step": 276510 }, { "epoch": 2.954431326459747, "grad_norm": 0.009748556651175022, "learning_rate": 9.094524753721058e-07, "loss": 0.0136, "step": 276520 }, { "epoch": 2.9545381697740263, "grad_norm": 0.02287333644926548, "learning_rate": 9.094428326379595e-07, "loss": 0.0135, "step": 276530 }, { "epoch": 2.9546450130883057, "grad_norm": 1.6555001735687256, "learning_rate": 9.094331894415215e-07, "loss": 0.0343, "step": 276540 }, { "epoch": 2.9547518564025856, "grad_norm": 22.780820846557617, "learning_rate": 9.094235457828025e-07, "loss": 0.0236, "step": 276550 }, { "epoch": 2.954858699716865, "grad_norm": 0.48169437050819397, "learning_rate": 9.094139016618136e-07, "loss": 0.0125, "step": 276560 }, { "epoch": 2.954965543031145, "grad_norm": 0.09433642029762268, "learning_rate": 9.094042570785653e-07, "loss": 0.0229, "step": 276570 }, { "epoch": 2.9550723863454245, "grad_norm": 0.016285626217722893, "learning_rate": 9.093946120330687e-07, "loss": 0.0199, "step": 276580 }, { "epoch": 2.955179229659704, "grad_norm": 0.36534571647644043, "learning_rate": 9.093849665253347e-07, "loss": 0.0343, "step": 276590 }, { "epoch": 2.9552860729739834, "grad_norm": 0.38172152638435364, "learning_rate": 9.093753205553743e-07, "loss": 0.034, "step": 276600 }, { "epoch": 2.9553929162882633, "grad_norm": 2.9634287357330322, "learning_rate": 9.093656741231981e-07, "loss": 0.0233, "step": 276610 }, { "epoch": 2.9554997596025427, "grad_norm": 0.026870252564549446, "learning_rate": 9.093560272288172e-07, "loss": 0.0184, "step": 276620 }, { "epoch": 2.9556066029168226, "grad_norm": 6.005640506744385, "learning_rate": 9.093463798722425e-07, "loss": 0.0386, "step": 276630 }, { "epoch": 2.955713446231102, "grad_norm": 2.2804808616638184, "learning_rate": 9.093367320534848e-07, "loss": 0.0184, "step": 276640 }, { "epoch": 2.9558202895453816, "grad_norm": 1.5937613248825073, "learning_rate": 9.09327083772555e-07, "loss": 0.0261, "step": 276650 }, { "epoch": 2.9559271328596615, "grad_norm": 0.8346267342567444, "learning_rate": 9.093174350294641e-07, "loss": 0.0103, "step": 276660 }, { "epoch": 2.956033976173941, "grad_norm": 1.7508156299591064, "learning_rate": 9.093077858242229e-07, "loss": 0.0209, "step": 276670 }, { "epoch": 2.9561408194882204, "grad_norm": 0.6019958257675171, "learning_rate": 9.092981361568423e-07, "loss": 0.0283, "step": 276680 }, { "epoch": 2.9562476628025003, "grad_norm": 0.01128485519438982, "learning_rate": 9.092884860273332e-07, "loss": 0.0182, "step": 276690 }, { "epoch": 2.9563545061167797, "grad_norm": 0.0022954836022108793, "learning_rate": 9.092788354357066e-07, "loss": 0.0153, "step": 276700 }, { "epoch": 2.956461349431059, "grad_norm": 0.007012500427663326, "learning_rate": 9.092691843819732e-07, "loss": 0.0056, "step": 276710 }, { "epoch": 2.956568192745339, "grad_norm": 0.005492207128554583, "learning_rate": 9.092595328661441e-07, "loss": 0.0144, "step": 276720 }, { "epoch": 2.9566750360596186, "grad_norm": 0.003594089997932315, "learning_rate": 9.092498808882301e-07, "loss": 0.0043, "step": 276730 }, { "epoch": 2.956781879373898, "grad_norm": 3.851166009902954, "learning_rate": 9.092402284482419e-07, "loss": 0.0231, "step": 276740 }, { "epoch": 2.956888722688178, "grad_norm": 0.20169058442115784, "learning_rate": 9.092305755461907e-07, "loss": 0.0012, "step": 276750 }, { "epoch": 2.9569955660024574, "grad_norm": 0.08555558323860168, "learning_rate": 9.092209221820874e-07, "loss": 0.0454, "step": 276760 }, { "epoch": 2.957102409316737, "grad_norm": 0.0216375719755888, "learning_rate": 9.092112683559427e-07, "loss": 0.0083, "step": 276770 }, { "epoch": 2.9572092526310167, "grad_norm": 1.2380731105804443, "learning_rate": 9.092016140677677e-07, "loss": 0.0079, "step": 276780 }, { "epoch": 2.957316095945296, "grad_norm": 2.5772318840026855, "learning_rate": 9.09191959317573e-07, "loss": 0.0433, "step": 276790 }, { "epoch": 2.9574229392595757, "grad_norm": 0.04959842562675476, "learning_rate": 9.091823041053699e-07, "loss": 0.0126, "step": 276800 }, { "epoch": 2.9575297825738556, "grad_norm": 20.903383255004883, "learning_rate": 9.09172648431169e-07, "loss": 0.0505, "step": 276810 }, { "epoch": 2.957636625888135, "grad_norm": 14.859759330749512, "learning_rate": 9.091629922949813e-07, "loss": 0.0421, "step": 276820 }, { "epoch": 2.9577434692024145, "grad_norm": 0.015310619957745075, "learning_rate": 9.091533356968176e-07, "loss": 0.0088, "step": 276830 }, { "epoch": 2.9578503125166944, "grad_norm": 0.03190189227461815, "learning_rate": 9.091436786366891e-07, "loss": 0.0197, "step": 276840 }, { "epoch": 2.957957155830974, "grad_norm": 0.02294522151350975, "learning_rate": 9.091340211146064e-07, "loss": 0.0199, "step": 276850 }, { "epoch": 2.9580639991452538, "grad_norm": 0.03144100308418274, "learning_rate": 9.091243631305805e-07, "loss": 0.013, "step": 276860 }, { "epoch": 2.958170842459533, "grad_norm": 13.233512878417969, "learning_rate": 9.091147046846224e-07, "loss": 0.0269, "step": 276870 }, { "epoch": 2.9582776857738127, "grad_norm": 0.9797987937927246, "learning_rate": 9.091050457767429e-07, "loss": 0.0062, "step": 276880 }, { "epoch": 2.958384529088092, "grad_norm": 1.1837389469146729, "learning_rate": 9.090953864069528e-07, "loss": 0.0146, "step": 276890 }, { "epoch": 2.958491372402372, "grad_norm": 4.656287670135498, "learning_rate": 9.090857265752632e-07, "loss": 0.021, "step": 276900 }, { "epoch": 2.9585982157166515, "grad_norm": 0.4812678098678589, "learning_rate": 9.090760662816849e-07, "loss": 0.0253, "step": 276910 }, { "epoch": 2.9587050590309314, "grad_norm": 0.08317924290895462, "learning_rate": 9.09066405526229e-07, "loss": 0.0294, "step": 276920 }, { "epoch": 2.958811902345211, "grad_norm": 0.629188060760498, "learning_rate": 9.090567443089061e-07, "loss": 0.0033, "step": 276930 }, { "epoch": 2.9589187456594903, "grad_norm": 0.08035589754581451, "learning_rate": 9.090470826297274e-07, "loss": 0.0148, "step": 276940 }, { "epoch": 2.9590255889737698, "grad_norm": 0.0049407645128667355, "learning_rate": 9.090374204887036e-07, "loss": 0.0075, "step": 276950 }, { "epoch": 2.9591324322880497, "grad_norm": 0.04333241283893585, "learning_rate": 9.090277578858456e-07, "loss": 0.0125, "step": 276960 }, { "epoch": 2.959239275602329, "grad_norm": 0.06810536980628967, "learning_rate": 9.090180948211646e-07, "loss": 0.0432, "step": 276970 }, { "epoch": 2.959346118916609, "grad_norm": 0.010867525823414326, "learning_rate": 9.090084312946711e-07, "loss": 0.0121, "step": 276980 }, { "epoch": 2.9594529622308885, "grad_norm": 1.1065549850463867, "learning_rate": 9.089987673063762e-07, "loss": 0.0033, "step": 276990 }, { "epoch": 2.959559805545168, "grad_norm": 0.02319733239710331, "learning_rate": 9.08989102856291e-07, "loss": 0.0071, "step": 277000 }, { "epoch": 2.9596666488594474, "grad_norm": 0.13863776624202728, "learning_rate": 9.08979437944426e-07, "loss": 0.0024, "step": 277010 }, { "epoch": 2.9597734921737273, "grad_norm": 7.110928535461426, "learning_rate": 9.089697725707926e-07, "loss": 0.0272, "step": 277020 }, { "epoch": 2.9598803354880068, "grad_norm": 4.372225284576416, "learning_rate": 9.089601067354013e-07, "loss": 0.0434, "step": 277030 }, { "epoch": 2.9599871788022867, "grad_norm": 2.2016758918762207, "learning_rate": 9.089504404382633e-07, "loss": 0.0853, "step": 277040 }, { "epoch": 2.960094022116566, "grad_norm": 0.28870004415512085, "learning_rate": 9.089407736793892e-07, "loss": 0.0202, "step": 277050 }, { "epoch": 2.9602008654308456, "grad_norm": 6.383993148803711, "learning_rate": 9.089311064587904e-07, "loss": 0.027, "step": 277060 }, { "epoch": 2.960307708745125, "grad_norm": 0.009154744446277618, "learning_rate": 9.089214387764772e-07, "loss": 0.0275, "step": 277070 }, { "epoch": 2.960414552059405, "grad_norm": 3.1778175830841064, "learning_rate": 9.08911770632461e-07, "loss": 0.0284, "step": 277080 }, { "epoch": 2.9605213953736844, "grad_norm": 0.2366405427455902, "learning_rate": 9.089021020267525e-07, "loss": 0.0017, "step": 277090 }, { "epoch": 2.9606282386879643, "grad_norm": 0.8485251069068909, "learning_rate": 9.088924329593626e-07, "loss": 0.0194, "step": 277100 }, { "epoch": 2.960735082002244, "grad_norm": 0.06648855656385422, "learning_rate": 9.088827634303024e-07, "loss": 0.0124, "step": 277110 }, { "epoch": 2.9608419253165232, "grad_norm": 0.4096498191356659, "learning_rate": 9.088730934395826e-07, "loss": 0.0054, "step": 277120 }, { "epoch": 2.9609487686308027, "grad_norm": 6.9297919273376465, "learning_rate": 9.088634229872143e-07, "loss": 0.0232, "step": 277130 }, { "epoch": 2.9610556119450826, "grad_norm": 3.5230095386505127, "learning_rate": 9.088537520732083e-07, "loss": 0.0208, "step": 277140 }, { "epoch": 2.961162455259362, "grad_norm": 6.241237163543701, "learning_rate": 9.088440806975757e-07, "loss": 0.0174, "step": 277150 }, { "epoch": 2.961269298573642, "grad_norm": 0.8921003937721252, "learning_rate": 9.08834408860327e-07, "loss": 0.0236, "step": 277160 }, { "epoch": 2.9613761418879214, "grad_norm": 0.0038647418841719627, "learning_rate": 9.088247365614736e-07, "loss": 0.007, "step": 277170 }, { "epoch": 2.961482985202201, "grad_norm": 0.5148898363113403, "learning_rate": 9.088150638010261e-07, "loss": 0.0098, "step": 277180 }, { "epoch": 2.9615898285164803, "grad_norm": 0.019884997978806496, "learning_rate": 9.088053905789957e-07, "loss": 0.0029, "step": 277190 }, { "epoch": 2.9616966718307602, "grad_norm": 13.265888214111328, "learning_rate": 9.08795716895393e-07, "loss": 0.0381, "step": 277200 }, { "epoch": 2.9618035151450397, "grad_norm": 1.5809249877929688, "learning_rate": 9.087860427502292e-07, "loss": 0.031, "step": 277210 }, { "epoch": 2.9619103584593196, "grad_norm": 0.26141971349716187, "learning_rate": 9.08776368143515e-07, "loss": 0.0174, "step": 277220 }, { "epoch": 2.962017201773599, "grad_norm": 0.27945777773857117, "learning_rate": 9.087666930752615e-07, "loss": 0.0059, "step": 277230 }, { "epoch": 2.9621240450878785, "grad_norm": 0.025173308327794075, "learning_rate": 9.087570175454795e-07, "loss": 0.0059, "step": 277240 }, { "epoch": 2.962230888402158, "grad_norm": 1.517702579498291, "learning_rate": 9.0874734155418e-07, "loss": 0.0106, "step": 277250 }, { "epoch": 2.962337731716438, "grad_norm": 4.623987674713135, "learning_rate": 9.08737665101374e-07, "loss": 0.0145, "step": 277260 }, { "epoch": 2.9624445750307173, "grad_norm": 0.2532327473163605, "learning_rate": 9.087279881870722e-07, "loss": 0.0048, "step": 277270 }, { "epoch": 2.9625514183449972, "grad_norm": 0.01940632052719593, "learning_rate": 9.087183108112857e-07, "loss": 0.0015, "step": 277280 }, { "epoch": 2.9626582616592767, "grad_norm": 4.269472599029541, "learning_rate": 9.087086329740253e-07, "loss": 0.0179, "step": 277290 }, { "epoch": 2.962765104973556, "grad_norm": 0.1843091994524002, "learning_rate": 9.086989546753021e-07, "loss": 0.0178, "step": 277300 }, { "epoch": 2.9628719482878356, "grad_norm": 0.009082249365746975, "learning_rate": 9.086892759151268e-07, "loss": 0.018, "step": 277310 }, { "epoch": 2.9629787916021155, "grad_norm": 0.46256643533706665, "learning_rate": 9.086795966935105e-07, "loss": 0.0604, "step": 277320 }, { "epoch": 2.963085634916395, "grad_norm": 0.4294343590736389, "learning_rate": 9.086699170104644e-07, "loss": 0.0045, "step": 277330 }, { "epoch": 2.963192478230675, "grad_norm": 0.36460092663764954, "learning_rate": 9.086602368659988e-07, "loss": 0.0118, "step": 277340 }, { "epoch": 2.9632993215449543, "grad_norm": 1.703596591949463, "learning_rate": 9.08650556260125e-07, "loss": 0.0251, "step": 277350 }, { "epoch": 2.963406164859234, "grad_norm": 0.2748279571533203, "learning_rate": 9.08640875192854e-07, "loss": 0.0231, "step": 277360 }, { "epoch": 2.9635130081735137, "grad_norm": 0.07003770023584366, "learning_rate": 9.086311936641964e-07, "loss": 0.0213, "step": 277370 }, { "epoch": 2.963619851487793, "grad_norm": 10.848738670349121, "learning_rate": 9.086215116741635e-07, "loss": 0.0385, "step": 277380 }, { "epoch": 2.9637266948020726, "grad_norm": 0.053892090916633606, "learning_rate": 9.086118292227661e-07, "loss": 0.0113, "step": 277390 }, { "epoch": 2.9638335381163525, "grad_norm": 7.116072654724121, "learning_rate": 9.086021463100149e-07, "loss": 0.0359, "step": 277400 }, { "epoch": 2.963940381430632, "grad_norm": 0.00648476043716073, "learning_rate": 9.085924629359212e-07, "loss": 0.0301, "step": 277410 }, { "epoch": 2.9640472247449114, "grad_norm": 0.004062776919454336, "learning_rate": 9.085827791004959e-07, "loss": 0.0063, "step": 277420 }, { "epoch": 2.9641540680591913, "grad_norm": 2.210893154144287, "learning_rate": 9.085730948037496e-07, "loss": 0.0028, "step": 277430 }, { "epoch": 2.964260911373471, "grad_norm": 23.393095016479492, "learning_rate": 9.085634100456934e-07, "loss": 0.0146, "step": 277440 }, { "epoch": 2.9643677546877503, "grad_norm": 0.019984373822808266, "learning_rate": 9.085537248263384e-07, "loss": 0.011, "step": 277450 }, { "epoch": 2.96447459800203, "grad_norm": 2.2633872032165527, "learning_rate": 9.085440391456953e-07, "loss": 0.0291, "step": 277460 }, { "epoch": 2.9645814413163096, "grad_norm": 0.38188472390174866, "learning_rate": 9.085343530037752e-07, "loss": 0.0016, "step": 277470 }, { "epoch": 2.964688284630589, "grad_norm": 5.634415149688721, "learning_rate": 9.085246664005891e-07, "loss": 0.0216, "step": 277480 }, { "epoch": 2.964795127944869, "grad_norm": 0.005472233053296804, "learning_rate": 9.085149793361478e-07, "loss": 0.0108, "step": 277490 }, { "epoch": 2.9649019712591484, "grad_norm": 11.09212589263916, "learning_rate": 9.085052918104621e-07, "loss": 0.0131, "step": 277500 }, { "epoch": 2.965008814573428, "grad_norm": 0.641287088394165, "learning_rate": 9.084956038235431e-07, "loss": 0.0514, "step": 277510 }, { "epoch": 2.965115657887708, "grad_norm": 0.10679496079683304, "learning_rate": 9.084859153754018e-07, "loss": 0.0225, "step": 277520 }, { "epoch": 2.9652225012019873, "grad_norm": 3.4699623584747314, "learning_rate": 9.084762264660493e-07, "loss": 0.0233, "step": 277530 }, { "epoch": 2.9653293445162667, "grad_norm": 0.6130220890045166, "learning_rate": 9.08466537095496e-07, "loss": 0.0211, "step": 277540 }, { "epoch": 2.9654361878305466, "grad_norm": 2.7336208820343018, "learning_rate": 9.084568472637533e-07, "loss": 0.0423, "step": 277550 }, { "epoch": 2.965543031144826, "grad_norm": 2.9368667602539062, "learning_rate": 9.084471569708318e-07, "loss": 0.0125, "step": 277560 }, { "epoch": 2.965649874459106, "grad_norm": 0.6240830421447754, "learning_rate": 9.084374662167429e-07, "loss": 0.0073, "step": 277570 }, { "epoch": 2.9657567177733855, "grad_norm": 1.7178046703338623, "learning_rate": 9.08427775001497e-07, "loss": 0.0267, "step": 277580 }, { "epoch": 2.965863561087665, "grad_norm": 0.008225172758102417, "learning_rate": 9.084180833251056e-07, "loss": 0.0163, "step": 277590 }, { "epoch": 2.9659704044019444, "grad_norm": 4.342083930969238, "learning_rate": 9.08408391187579e-07, "loss": 0.0281, "step": 277600 }, { "epoch": 2.9660772477162243, "grad_norm": 1.2645199298858643, "learning_rate": 9.083986985889289e-07, "loss": 0.0093, "step": 277610 }, { "epoch": 2.9661840910305037, "grad_norm": 0.6539841890335083, "learning_rate": 9.083890055291656e-07, "loss": 0.0154, "step": 277620 }, { "epoch": 2.9662909343447836, "grad_norm": 3.8486809730529785, "learning_rate": 9.083793120083003e-07, "loss": 0.0222, "step": 277630 }, { "epoch": 2.966397777659063, "grad_norm": 2.695917844772339, "learning_rate": 9.083696180263441e-07, "loss": 0.0256, "step": 277640 }, { "epoch": 2.9665046209733426, "grad_norm": 0.07192978262901306, "learning_rate": 9.083599235833077e-07, "loss": 0.0242, "step": 277650 }, { "epoch": 2.966611464287622, "grad_norm": 5.818296909332275, "learning_rate": 9.08350228679202e-07, "loss": 0.0931, "step": 277660 }, { "epoch": 2.966718307601902, "grad_norm": 0.100155308842659, "learning_rate": 9.083405333140381e-07, "loss": 0.005, "step": 277670 }, { "epoch": 2.9668251509161814, "grad_norm": 0.022867834195494652, "learning_rate": 9.083308374878271e-07, "loss": 0.0147, "step": 277680 }, { "epoch": 2.9669319942304613, "grad_norm": 0.42335259914398193, "learning_rate": 9.083211412005797e-07, "loss": 0.0195, "step": 277690 }, { "epoch": 2.9670388375447407, "grad_norm": 0.36082151532173157, "learning_rate": 9.083114444523068e-07, "loss": 0.0278, "step": 277700 }, { "epoch": 2.96714568085902, "grad_norm": 0.011110280640423298, "learning_rate": 9.083017472430195e-07, "loss": 0.0086, "step": 277710 }, { "epoch": 2.9672525241732997, "grad_norm": 1.3342758417129517, "learning_rate": 9.082920495727289e-07, "loss": 0.0052, "step": 277720 }, { "epoch": 2.9673593674875796, "grad_norm": 0.025686558336019516, "learning_rate": 9.082823514414455e-07, "loss": 0.0061, "step": 277730 }, { "epoch": 2.967466210801859, "grad_norm": 0.01786375604569912, "learning_rate": 9.082726528491807e-07, "loss": 0.0137, "step": 277740 }, { "epoch": 2.967573054116139, "grad_norm": 0.048736367374658585, "learning_rate": 9.082629537959451e-07, "loss": 0.0014, "step": 277750 }, { "epoch": 2.9676798974304184, "grad_norm": 2.9065020084381104, "learning_rate": 9.082532542817498e-07, "loss": 0.0041, "step": 277760 }, { "epoch": 2.967786740744698, "grad_norm": 0.010572594590485096, "learning_rate": 9.08243554306606e-07, "loss": 0.0225, "step": 277770 }, { "epoch": 2.9678935840589773, "grad_norm": 0.09587029367685318, "learning_rate": 9.082338538705242e-07, "loss": 0.0132, "step": 277780 }, { "epoch": 2.968000427373257, "grad_norm": 0.03864752873778343, "learning_rate": 9.082241529735156e-07, "loss": 0.0032, "step": 277790 }, { "epoch": 2.9681072706875367, "grad_norm": 0.005389614496380091, "learning_rate": 9.082144516155912e-07, "loss": 0.0134, "step": 277800 }, { "epoch": 2.9682141140018166, "grad_norm": 10.001999855041504, "learning_rate": 9.082047497967618e-07, "loss": 0.0066, "step": 277810 }, { "epoch": 2.968320957316096, "grad_norm": 1.3604413270950317, "learning_rate": 9.081950475170385e-07, "loss": 0.0785, "step": 277820 }, { "epoch": 2.9684278006303755, "grad_norm": 0.049353402107954025, "learning_rate": 9.08185344776432e-07, "loss": 0.0104, "step": 277830 }, { "epoch": 2.968534643944655, "grad_norm": 0.04855019226670265, "learning_rate": 9.081756415749538e-07, "loss": 0.0013, "step": 277840 }, { "epoch": 2.968641487258935, "grad_norm": 1.5433909893035889, "learning_rate": 9.081659379126142e-07, "loss": 0.0112, "step": 277850 }, { "epoch": 2.9687483305732143, "grad_norm": 0.03871539980173111, "learning_rate": 9.081562337894245e-07, "loss": 0.0231, "step": 277860 }, { "epoch": 2.968855173887494, "grad_norm": 14.077162742614746, "learning_rate": 9.081465292053956e-07, "loss": 0.0483, "step": 277870 }, { "epoch": 2.9689620172017737, "grad_norm": 5.762475967407227, "learning_rate": 9.081368241605385e-07, "loss": 0.0183, "step": 277880 }, { "epoch": 2.969068860516053, "grad_norm": 0.8010231256484985, "learning_rate": 9.081271186548642e-07, "loss": 0.0129, "step": 277890 }, { "epoch": 2.9691757038303326, "grad_norm": 3.0621416568756104, "learning_rate": 9.081174126883835e-07, "loss": 0.0311, "step": 277900 }, { "epoch": 2.9692825471446125, "grad_norm": 0.010513224638998508, "learning_rate": 9.081077062611074e-07, "loss": 0.0165, "step": 277910 }, { "epoch": 2.969389390458892, "grad_norm": 3.983375072479248, "learning_rate": 9.080979993730468e-07, "loss": 0.0245, "step": 277920 }, { "epoch": 2.969496233773172, "grad_norm": 0.7197707891464233, "learning_rate": 9.08088292024213e-07, "loss": 0.0072, "step": 277930 }, { "epoch": 2.9696030770874513, "grad_norm": 0.039813872426748276, "learning_rate": 9.080785842146166e-07, "loss": 0.0209, "step": 277940 }, { "epoch": 2.9697099204017308, "grad_norm": 0.5226789116859436, "learning_rate": 9.080688759442687e-07, "loss": 0.0062, "step": 277950 }, { "epoch": 2.96981676371601, "grad_norm": 12.427626609802246, "learning_rate": 9.080591672131803e-07, "loss": 0.0364, "step": 277960 }, { "epoch": 2.96992360703029, "grad_norm": 0.09555310755968094, "learning_rate": 9.080494580213623e-07, "loss": 0.0003, "step": 277970 }, { "epoch": 2.9700304503445696, "grad_norm": 0.03798714280128479, "learning_rate": 9.080397483688256e-07, "loss": 0.0314, "step": 277980 }, { "epoch": 2.9701372936588495, "grad_norm": 1.0111674070358276, "learning_rate": 9.080300382555812e-07, "loss": 0.0845, "step": 277990 }, { "epoch": 2.970244136973129, "grad_norm": 0.13215115666389465, "learning_rate": 9.080203276816401e-07, "loss": 0.0193, "step": 278000 }, { "epoch": 2.9703509802874084, "grad_norm": 6.974827766418457, "learning_rate": 9.080106166470132e-07, "loss": 0.0137, "step": 278010 }, { "epoch": 2.970457823601688, "grad_norm": 0.0019313190132379532, "learning_rate": 9.080009051517117e-07, "loss": 0.0091, "step": 278020 }, { "epoch": 2.9705646669159678, "grad_norm": 0.21251071989536285, "learning_rate": 9.079911931957462e-07, "loss": 0.0797, "step": 278030 }, { "epoch": 2.9706715102302472, "grad_norm": 1.236763596534729, "learning_rate": 9.07981480779128e-07, "loss": 0.0231, "step": 278040 }, { "epoch": 2.970778353544527, "grad_norm": 1.135968565940857, "learning_rate": 9.079717679018678e-07, "loss": 0.0116, "step": 278050 }, { "epoch": 2.9708851968588066, "grad_norm": 1.6263597011566162, "learning_rate": 9.079620545639768e-07, "loss": 0.0027, "step": 278060 }, { "epoch": 2.970992040173086, "grad_norm": 1.0062288045883179, "learning_rate": 9.079523407654656e-07, "loss": 0.0163, "step": 278070 }, { "epoch": 2.9710988834873655, "grad_norm": 0.33847153186798096, "learning_rate": 9.079426265063457e-07, "loss": 0.0009, "step": 278080 }, { "epoch": 2.9712057268016454, "grad_norm": 0.05385810136795044, "learning_rate": 9.079329117866277e-07, "loss": 0.0189, "step": 278090 }, { "epoch": 2.971312570115925, "grad_norm": 0.018655885010957718, "learning_rate": 9.079231966063227e-07, "loss": 0.0051, "step": 278100 }, { "epoch": 2.9714194134302048, "grad_norm": 4.799254417419434, "learning_rate": 9.079134809654415e-07, "loss": 0.0106, "step": 278110 }, { "epoch": 2.9715262567444842, "grad_norm": 2.600471258163452, "learning_rate": 9.079037648639953e-07, "loss": 0.0345, "step": 278120 }, { "epoch": 2.9716331000587637, "grad_norm": 0.341880738735199, "learning_rate": 9.078940483019949e-07, "loss": 0.0454, "step": 278130 }, { "epoch": 2.9717399433730436, "grad_norm": 0.34331104159355164, "learning_rate": 9.078843312794514e-07, "loss": 0.0594, "step": 278140 }, { "epoch": 2.971846786687323, "grad_norm": 0.06262654066085815, "learning_rate": 9.078746137963756e-07, "loss": 0.0096, "step": 278150 }, { "epoch": 2.9719536300016025, "grad_norm": 2.381701707839966, "learning_rate": 9.078648958527786e-07, "loss": 0.0049, "step": 278160 }, { "epoch": 2.9720604733158824, "grad_norm": 1.0778346061706543, "learning_rate": 9.078551774486714e-07, "loss": 0.0095, "step": 278170 }, { "epoch": 2.972167316630162, "grad_norm": 1.2500252723693848, "learning_rate": 9.07845458584065e-07, "loss": 0.0144, "step": 278180 }, { "epoch": 2.9722741599444413, "grad_norm": 10.345999717712402, "learning_rate": 9.078357392589701e-07, "loss": 0.0602, "step": 278190 }, { "epoch": 2.9723810032587212, "grad_norm": 11.063722610473633, "learning_rate": 9.07826019473398e-07, "loss": 0.0242, "step": 278200 }, { "epoch": 2.9724878465730007, "grad_norm": 0.8031502366065979, "learning_rate": 9.078162992273595e-07, "loss": 0.0027, "step": 278210 }, { "epoch": 2.97259468988728, "grad_norm": 0.41703325510025024, "learning_rate": 9.078065785208657e-07, "loss": 0.0166, "step": 278220 }, { "epoch": 2.97270153320156, "grad_norm": 0.005782099440693855, "learning_rate": 9.077968573539274e-07, "loss": 0.0087, "step": 278230 }, { "epoch": 2.9728083765158395, "grad_norm": 5.956601619720459, "learning_rate": 9.077871357265557e-07, "loss": 0.011, "step": 278240 }, { "epoch": 2.972915219830119, "grad_norm": 0.3413960635662079, "learning_rate": 9.077774136387614e-07, "loss": 0.023, "step": 278250 }, { "epoch": 2.973022063144399, "grad_norm": 3.0517001152038574, "learning_rate": 9.077676910905559e-07, "loss": 0.0039, "step": 278260 }, { "epoch": 2.9731289064586783, "grad_norm": 0.00688014505431056, "learning_rate": 9.077579680819498e-07, "loss": 0.0045, "step": 278270 }, { "epoch": 2.973235749772958, "grad_norm": 4.428066253662109, "learning_rate": 9.077482446129541e-07, "loss": 0.0184, "step": 278280 }, { "epoch": 2.9733425930872377, "grad_norm": 0.024440880864858627, "learning_rate": 9.077385206835799e-07, "loss": 0.0198, "step": 278290 }, { "epoch": 2.973449436401517, "grad_norm": 9.763175010681152, "learning_rate": 9.077287962938382e-07, "loss": 0.041, "step": 278300 }, { "epoch": 2.9735562797157966, "grad_norm": 0.3293345868587494, "learning_rate": 9.0771907144374e-07, "loss": 0.0639, "step": 278310 }, { "epoch": 2.9736631230300765, "grad_norm": 0.0824885368347168, "learning_rate": 9.07709346133296e-07, "loss": 0.0217, "step": 278320 }, { "epoch": 2.973769966344356, "grad_norm": 3.56492280960083, "learning_rate": 9.076996203625173e-07, "loss": 0.0273, "step": 278330 }, { "epoch": 2.973876809658636, "grad_norm": 0.010907024145126343, "learning_rate": 9.076898941314151e-07, "loss": 0.0093, "step": 278340 }, { "epoch": 2.9739836529729153, "grad_norm": 2.154733896255493, "learning_rate": 9.076801674400003e-07, "loss": 0.0038, "step": 278350 }, { "epoch": 2.974090496287195, "grad_norm": 0.01608157344162464, "learning_rate": 9.076704402882838e-07, "loss": 0.0304, "step": 278360 }, { "epoch": 2.9741973396014743, "grad_norm": 5.338021755218506, "learning_rate": 9.076607126762765e-07, "loss": 0.0518, "step": 278370 }, { "epoch": 2.974304182915754, "grad_norm": 0.022220922634005547, "learning_rate": 9.076509846039896e-07, "loss": 0.0187, "step": 278380 }, { "epoch": 2.9744110262300336, "grad_norm": 1.1065090894699097, "learning_rate": 9.076412560714339e-07, "loss": 0.0024, "step": 278390 }, { "epoch": 2.9745178695443135, "grad_norm": 0.009959663264453411, "learning_rate": 9.076315270786204e-07, "loss": 0.037, "step": 278400 }, { "epoch": 2.974624712858593, "grad_norm": 0.8507682681083679, "learning_rate": 9.076217976255603e-07, "loss": 0.0095, "step": 278410 }, { "epoch": 2.9747315561728724, "grad_norm": 0.30267322063446045, "learning_rate": 9.076120677122643e-07, "loss": 0.0197, "step": 278420 }, { "epoch": 2.974838399487152, "grad_norm": 0.005138069856911898, "learning_rate": 9.076023373387436e-07, "loss": 0.0258, "step": 278430 }, { "epoch": 2.974945242801432, "grad_norm": 0.020721688866615295, "learning_rate": 9.075926065050091e-07, "loss": 0.038, "step": 278440 }, { "epoch": 2.9750520861157113, "grad_norm": 0.3200223445892334, "learning_rate": 9.075828752110716e-07, "loss": 0.0103, "step": 278450 }, { "epoch": 2.975158929429991, "grad_norm": 4.234971046447754, "learning_rate": 9.075731434569425e-07, "loss": 0.0107, "step": 278460 }, { "epoch": 2.9752657727442706, "grad_norm": 18.33229637145996, "learning_rate": 9.075634112426324e-07, "loss": 0.0072, "step": 278470 }, { "epoch": 2.97537261605855, "grad_norm": 0.08970072120428085, "learning_rate": 9.075536785681525e-07, "loss": 0.0238, "step": 278480 }, { "epoch": 2.9754794593728295, "grad_norm": 0.047799691557884216, "learning_rate": 9.075439454335138e-07, "loss": 0.0084, "step": 278490 }, { "epoch": 2.9755863026871094, "grad_norm": 3.6003644466400146, "learning_rate": 9.075342118387273e-07, "loss": 0.0191, "step": 278500 }, { "epoch": 2.975693146001389, "grad_norm": 0.3223845958709717, "learning_rate": 9.075244777838037e-07, "loss": 0.023, "step": 278510 }, { "epoch": 2.975799989315669, "grad_norm": 0.060560740530490875, "learning_rate": 9.075147432687543e-07, "loss": 0.0175, "step": 278520 }, { "epoch": 2.9759068326299483, "grad_norm": 0.004676620475947857, "learning_rate": 9.0750500829359e-07, "loss": 0.0007, "step": 278530 }, { "epoch": 2.9760136759442277, "grad_norm": 0.01811300590634346, "learning_rate": 9.074952728583219e-07, "loss": 0.0348, "step": 278540 }, { "epoch": 2.976120519258507, "grad_norm": 0.4789608120918274, "learning_rate": 9.074855369629607e-07, "loss": 0.0085, "step": 278550 }, { "epoch": 2.976227362572787, "grad_norm": 0.5959742665290833, "learning_rate": 9.074758006075176e-07, "loss": 0.0262, "step": 278560 }, { "epoch": 2.9763342058870665, "grad_norm": 0.036644987761974335, "learning_rate": 9.074660637920037e-07, "loss": 0.0034, "step": 278570 }, { "epoch": 2.9764410492013464, "grad_norm": 2.3779056072235107, "learning_rate": 9.074563265164297e-07, "loss": 0.0036, "step": 278580 }, { "epoch": 2.976547892515626, "grad_norm": 0.003315238282084465, "learning_rate": 9.074465887808069e-07, "loss": 0.0288, "step": 278590 }, { "epoch": 2.9766547358299054, "grad_norm": 1.615998387336731, "learning_rate": 9.074368505851461e-07, "loss": 0.0083, "step": 278600 }, { "epoch": 2.976761579144185, "grad_norm": 1.8948335647583008, "learning_rate": 9.074271119294583e-07, "loss": 0.0344, "step": 278610 }, { "epoch": 2.9768684224584647, "grad_norm": 0.170615091919899, "learning_rate": 9.074173728137545e-07, "loss": 0.0183, "step": 278620 }, { "epoch": 2.976975265772744, "grad_norm": 0.1806531399488449, "learning_rate": 9.074076332380458e-07, "loss": 0.007, "step": 278630 }, { "epoch": 2.977082109087024, "grad_norm": 12.310685157775879, "learning_rate": 9.073978932023432e-07, "loss": 0.0307, "step": 278640 }, { "epoch": 2.9771889524013035, "grad_norm": 2.5129141807556152, "learning_rate": 9.073881527066576e-07, "loss": 0.0033, "step": 278650 }, { "epoch": 2.977295795715583, "grad_norm": 3.758876323699951, "learning_rate": 9.07378411751e-07, "loss": 0.0301, "step": 278660 }, { "epoch": 2.9774026390298625, "grad_norm": 0.016878657042980194, "learning_rate": 9.073686703353815e-07, "loss": 0.0416, "step": 278670 }, { "epoch": 2.9775094823441424, "grad_norm": 0.019125817343592644, "learning_rate": 9.073589284598129e-07, "loss": 0.0026, "step": 278680 }, { "epoch": 2.977616325658422, "grad_norm": 0.0033334174659103155, "learning_rate": 9.073491861243054e-07, "loss": 0.0069, "step": 278690 }, { "epoch": 2.9777231689727017, "grad_norm": 0.14893648028373718, "learning_rate": 9.073394433288699e-07, "loss": 0.0073, "step": 278700 }, { "epoch": 2.977830012286981, "grad_norm": 3.996830940246582, "learning_rate": 9.073297000735174e-07, "loss": 0.0159, "step": 278710 }, { "epoch": 2.9779368556012606, "grad_norm": 0.012845487333834171, "learning_rate": 9.07319956358259e-07, "loss": 0.0267, "step": 278720 }, { "epoch": 2.97804369891554, "grad_norm": 2.2777671813964844, "learning_rate": 9.073102121831055e-07, "loss": 0.0089, "step": 278730 }, { "epoch": 2.97815054222982, "grad_norm": 0.6459659337997437, "learning_rate": 9.073004675480681e-07, "loss": 0.0154, "step": 278740 }, { "epoch": 2.9782573855440995, "grad_norm": 0.027515340596437454, "learning_rate": 9.072907224531577e-07, "loss": 0.0052, "step": 278750 }, { "epoch": 2.9783642288583794, "grad_norm": 0.006440355442464352, "learning_rate": 9.072809768983855e-07, "loss": 0.0177, "step": 278760 }, { "epoch": 2.978471072172659, "grad_norm": 0.2223922163248062, "learning_rate": 9.072712308837622e-07, "loss": 0.0218, "step": 278770 }, { "epoch": 2.9785779154869383, "grad_norm": 2.401869773864746, "learning_rate": 9.072614844092989e-07, "loss": 0.015, "step": 278780 }, { "epoch": 2.9786847588012177, "grad_norm": 0.015598147176206112, "learning_rate": 9.072517374750067e-07, "loss": 0.0295, "step": 278790 }, { "epoch": 2.9787916021154976, "grad_norm": 0.004607262555509806, "learning_rate": 9.072419900808966e-07, "loss": 0.0093, "step": 278800 }, { "epoch": 2.978898445429777, "grad_norm": 4.885426044464111, "learning_rate": 9.072322422269796e-07, "loss": 0.0115, "step": 278810 }, { "epoch": 2.979005288744057, "grad_norm": 4.814350605010986, "learning_rate": 9.072224939132664e-07, "loss": 0.0238, "step": 278820 }, { "epoch": 2.9791121320583365, "grad_norm": 1.2981162071228027, "learning_rate": 9.072127451397685e-07, "loss": 0.0061, "step": 278830 }, { "epoch": 2.979218975372616, "grad_norm": 2.637119770050049, "learning_rate": 9.072029959064966e-07, "loss": 0.0122, "step": 278840 }, { "epoch": 2.979325818686896, "grad_norm": 0.16033363342285156, "learning_rate": 9.07193246213462e-07, "loss": 0.009, "step": 278850 }, { "epoch": 2.9794326620011753, "grad_norm": 1.8616209030151367, "learning_rate": 9.071834960606752e-07, "loss": 0.029, "step": 278860 }, { "epoch": 2.9795395053154548, "grad_norm": 0.5338512063026428, "learning_rate": 9.071737454481476e-07, "loss": 0.0087, "step": 278870 }, { "epoch": 2.9796463486297347, "grad_norm": 0.4714813232421875, "learning_rate": 9.071639943758899e-07, "loss": 0.0552, "step": 278880 }, { "epoch": 2.979753191944014, "grad_norm": 0.023156654089689255, "learning_rate": 9.071542428439138e-07, "loss": 0.0035, "step": 278890 }, { "epoch": 2.9798600352582936, "grad_norm": 11.054829597473145, "learning_rate": 9.071444908522295e-07, "loss": 0.0364, "step": 278900 }, { "epoch": 2.9799668785725735, "grad_norm": 0.019734898582100868, "learning_rate": 9.071347384008483e-07, "loss": 0.0035, "step": 278910 }, { "epoch": 2.980073721886853, "grad_norm": 13.367408752441406, "learning_rate": 9.071249854897813e-07, "loss": 0.0214, "step": 278920 }, { "epoch": 2.9801805652011324, "grad_norm": 3.227543354034424, "learning_rate": 9.071152321190396e-07, "loss": 0.0272, "step": 278930 }, { "epoch": 2.9802874085154123, "grad_norm": 0.005086403340101242, "learning_rate": 9.07105478288634e-07, "loss": 0.0093, "step": 278940 }, { "epoch": 2.9803942518296918, "grad_norm": 0.004472505766898394, "learning_rate": 9.070957239985757e-07, "loss": 0.0079, "step": 278950 }, { "epoch": 2.980501095143971, "grad_norm": 0.03721965104341507, "learning_rate": 9.070859692488755e-07, "loss": 0.01, "step": 278960 }, { "epoch": 2.980607938458251, "grad_norm": 2.3909037113189697, "learning_rate": 9.070762140395444e-07, "loss": 0.0476, "step": 278970 }, { "epoch": 2.9807147817725306, "grad_norm": 0.9416837692260742, "learning_rate": 9.070664583705937e-07, "loss": 0.0195, "step": 278980 }, { "epoch": 2.98082162508681, "grad_norm": 0.42827722430229187, "learning_rate": 9.070567022420341e-07, "loss": 0.0125, "step": 278990 }, { "epoch": 2.98092846840109, "grad_norm": 0.011276781558990479, "learning_rate": 9.070469456538769e-07, "loss": 0.0203, "step": 279000 }, { "epoch": 2.9810353117153694, "grad_norm": 8.854873657226562, "learning_rate": 9.07037188606133e-07, "loss": 0.0496, "step": 279010 }, { "epoch": 2.981142155029649, "grad_norm": 6.031835556030273, "learning_rate": 9.070274310988133e-07, "loss": 0.0191, "step": 279020 }, { "epoch": 2.9812489983439288, "grad_norm": 0.007314626593142748, "learning_rate": 9.070176731319289e-07, "loss": 0.0079, "step": 279030 }, { "epoch": 2.981355841658208, "grad_norm": 3.242692470550537, "learning_rate": 9.070079147054907e-07, "loss": 0.0273, "step": 279040 }, { "epoch": 2.981462684972488, "grad_norm": 0.004988437052816153, "learning_rate": 9.069981558195101e-07, "loss": 0.0152, "step": 279050 }, { "epoch": 2.9815695282867676, "grad_norm": 0.04794856905937195, "learning_rate": 9.069883964739977e-07, "loss": 0.0029, "step": 279060 }, { "epoch": 2.981676371601047, "grad_norm": 0.005508420057594776, "learning_rate": 9.069786366689649e-07, "loss": 0.0156, "step": 279070 }, { "epoch": 2.9817832149153265, "grad_norm": 1.8807011842727661, "learning_rate": 9.069688764044224e-07, "loss": 0.0067, "step": 279080 }, { "epoch": 2.9818900582296064, "grad_norm": 0.1445707380771637, "learning_rate": 9.069591156803812e-07, "loss": 0.0299, "step": 279090 }, { "epoch": 2.981996901543886, "grad_norm": 0.5945954322814941, "learning_rate": 9.069493544968525e-07, "loss": 0.0409, "step": 279100 }, { "epoch": 2.9821037448581658, "grad_norm": 0.007718935608863831, "learning_rate": 9.069395928538472e-07, "loss": 0.0011, "step": 279110 }, { "epoch": 2.982210588172445, "grad_norm": 2.553358316421509, "learning_rate": 9.069298307513765e-07, "loss": 0.0123, "step": 279120 }, { "epoch": 2.9823174314867247, "grad_norm": 0.6616153717041016, "learning_rate": 9.069200681894513e-07, "loss": 0.0027, "step": 279130 }, { "epoch": 2.982424274801004, "grad_norm": 0.12520655989646912, "learning_rate": 9.069103051680826e-07, "loss": 0.0164, "step": 279140 }, { "epoch": 2.982531118115284, "grad_norm": 0.48187926411628723, "learning_rate": 9.069005416872815e-07, "loss": 0.0672, "step": 279150 }, { "epoch": 2.9826379614295635, "grad_norm": 0.030578430742025375, "learning_rate": 9.068907777470589e-07, "loss": 0.0124, "step": 279160 }, { "epoch": 2.9827448047438434, "grad_norm": 1.8119391202926636, "learning_rate": 9.068810133474259e-07, "loss": 0.0018, "step": 279170 }, { "epoch": 2.982851648058123, "grad_norm": 12.28676986694336, "learning_rate": 9.068712484883936e-07, "loss": 0.0492, "step": 279180 }, { "epoch": 2.9829584913724023, "grad_norm": 5.614538669586182, "learning_rate": 9.06861483169973e-07, "loss": 0.021, "step": 279190 }, { "epoch": 2.983065334686682, "grad_norm": 0.05911632627248764, "learning_rate": 9.06851717392175e-07, "loss": 0.0013, "step": 279200 }, { "epoch": 2.9831721780009617, "grad_norm": 0.33865320682525635, "learning_rate": 9.068419511550107e-07, "loss": 0.0239, "step": 279210 }, { "epoch": 2.983279021315241, "grad_norm": 0.1688212752342224, "learning_rate": 9.068321844584912e-07, "loss": 0.0296, "step": 279220 }, { "epoch": 2.983385864629521, "grad_norm": 0.017560692504048347, "learning_rate": 9.068224173026273e-07, "loss": 0.016, "step": 279230 }, { "epoch": 2.9834927079438005, "grad_norm": 2.1381008625030518, "learning_rate": 9.068126496874303e-07, "loss": 0.0194, "step": 279240 }, { "epoch": 2.98359955125808, "grad_norm": 0.2124214470386505, "learning_rate": 9.068028816129112e-07, "loss": 0.0031, "step": 279250 }, { "epoch": 2.9837063945723594, "grad_norm": 0.024160707369446754, "learning_rate": 9.067931130790808e-07, "loss": 0.0066, "step": 279260 }, { "epoch": 2.9838132378866393, "grad_norm": 0.09854929894208908, "learning_rate": 9.067833440859502e-07, "loss": 0.0401, "step": 279270 }, { "epoch": 2.983920081200919, "grad_norm": 0.20022690296173096, "learning_rate": 9.067735746335308e-07, "loss": 0.0015, "step": 279280 }, { "epoch": 2.9840269245151987, "grad_norm": 0.10009658336639404, "learning_rate": 9.067638047218331e-07, "loss": 0.0102, "step": 279290 }, { "epoch": 2.984133767829478, "grad_norm": 0.4985046088695526, "learning_rate": 9.067540343508684e-07, "loss": 0.0052, "step": 279300 }, { "epoch": 2.9842406111437576, "grad_norm": 6.1431565284729, "learning_rate": 9.067442635206477e-07, "loss": 0.0018, "step": 279310 }, { "epoch": 2.984347454458037, "grad_norm": 2.384570598602295, "learning_rate": 9.067344922311821e-07, "loss": 0.0096, "step": 279320 }, { "epoch": 2.984454297772317, "grad_norm": 0.027054328471422195, "learning_rate": 9.067247204824824e-07, "loss": 0.0437, "step": 279330 }, { "epoch": 2.9845611410865964, "grad_norm": 0.14623451232910156, "learning_rate": 9.0671494827456e-07, "loss": 0.0107, "step": 279340 }, { "epoch": 2.9846679844008763, "grad_norm": 0.022085342556238174, "learning_rate": 9.067051756074255e-07, "loss": 0.0352, "step": 279350 }, { "epoch": 2.984774827715156, "grad_norm": 0.010971354320645332, "learning_rate": 9.066954024810903e-07, "loss": 0.0218, "step": 279360 }, { "epoch": 2.9848816710294352, "grad_norm": 0.9628807306289673, "learning_rate": 9.066856288955653e-07, "loss": 0.0243, "step": 279370 }, { "epoch": 2.9849885143437147, "grad_norm": 0.6737129092216492, "learning_rate": 9.066758548508615e-07, "loss": 0.0079, "step": 279380 }, { "epoch": 2.9850953576579946, "grad_norm": 4.120754241943359, "learning_rate": 9.066660803469901e-07, "loss": 0.0127, "step": 279390 }, { "epoch": 2.985202200972274, "grad_norm": 11.39114761352539, "learning_rate": 9.066563053839618e-07, "loss": 0.0831, "step": 279400 }, { "epoch": 2.985309044286554, "grad_norm": 1.759607195854187, "learning_rate": 9.06646529961788e-07, "loss": 0.0036, "step": 279410 }, { "epoch": 2.9854158876008334, "grad_norm": 1.015891194343567, "learning_rate": 9.066367540804793e-07, "loss": 0.0022, "step": 279420 }, { "epoch": 2.985522730915113, "grad_norm": 9.483081817626953, "learning_rate": 9.066269777400473e-07, "loss": 0.037, "step": 279430 }, { "epoch": 2.9856295742293923, "grad_norm": 4.033876895904541, "learning_rate": 9.066172009405027e-07, "loss": 0.0179, "step": 279440 }, { "epoch": 2.9857364175436722, "grad_norm": 1.9521989822387695, "learning_rate": 9.066074236818566e-07, "loss": 0.0122, "step": 279450 }, { "epoch": 2.9858432608579517, "grad_norm": 0.015476323664188385, "learning_rate": 9.065976459641201e-07, "loss": 0.0425, "step": 279460 }, { "epoch": 2.9859501041722316, "grad_norm": 0.11637362092733383, "learning_rate": 9.065878677873038e-07, "loss": 0.0368, "step": 279470 }, { "epoch": 2.986056947486511, "grad_norm": 1.9760546684265137, "learning_rate": 9.065780891514194e-07, "loss": 0.0616, "step": 279480 }, { "epoch": 2.9861637908007905, "grad_norm": 0.030917614698410034, "learning_rate": 9.065683100564777e-07, "loss": 0.0417, "step": 279490 }, { "epoch": 2.98627063411507, "grad_norm": 0.057867541909217834, "learning_rate": 9.065585305024896e-07, "loss": 0.0062, "step": 279500 }, { "epoch": 2.98637747742935, "grad_norm": 3.3858044147491455, "learning_rate": 9.065487504894662e-07, "loss": 0.0423, "step": 279510 }, { "epoch": 2.9864843207436294, "grad_norm": 0.033083729445934296, "learning_rate": 9.065389700174186e-07, "loss": 0.0088, "step": 279520 }, { "epoch": 2.9865911640579093, "grad_norm": 10.5086669921875, "learning_rate": 9.065291890863579e-07, "loss": 0.0513, "step": 279530 }, { "epoch": 2.9866980073721887, "grad_norm": 1.0307973623275757, "learning_rate": 9.06519407696295e-07, "loss": 0.0415, "step": 279540 }, { "epoch": 2.986804850686468, "grad_norm": 17.273603439331055, "learning_rate": 9.06509625847241e-07, "loss": 0.035, "step": 279550 }, { "epoch": 2.9869116940007476, "grad_norm": 20.82929039001465, "learning_rate": 9.064998435392071e-07, "loss": 0.0409, "step": 279560 }, { "epoch": 2.9870185373150275, "grad_norm": 0.05462595075368881, "learning_rate": 9.064900607722039e-07, "loss": 0.0163, "step": 279570 }, { "epoch": 2.987125380629307, "grad_norm": 0.0072007556445896626, "learning_rate": 9.06480277546243e-07, "loss": 0.0108, "step": 279580 }, { "epoch": 2.987232223943587, "grad_norm": 1.6262741088867188, "learning_rate": 9.06470493861335e-07, "loss": 0.0208, "step": 279590 }, { "epoch": 2.9873390672578664, "grad_norm": 0.5031917095184326, "learning_rate": 9.064607097174913e-07, "loss": 0.0162, "step": 279600 }, { "epoch": 2.987445910572146, "grad_norm": 0.9635461568832397, "learning_rate": 9.064509251147228e-07, "loss": 0.0158, "step": 279610 }, { "epoch": 2.9875527538864257, "grad_norm": 0.7660543322563171, "learning_rate": 9.064411400530403e-07, "loss": 0.0602, "step": 279620 }, { "epoch": 2.987659597200705, "grad_norm": 13.830294609069824, "learning_rate": 9.064313545324553e-07, "loss": 0.0413, "step": 279630 }, { "epoch": 2.9877664405149846, "grad_norm": 0.01939110830426216, "learning_rate": 9.064215685529786e-07, "loss": 0.0189, "step": 279640 }, { "epoch": 2.9878732838292645, "grad_norm": 3.46016263961792, "learning_rate": 9.064117821146211e-07, "loss": 0.0182, "step": 279650 }, { "epoch": 2.987980127143544, "grad_norm": 0.0307064950466156, "learning_rate": 9.064019952173942e-07, "loss": 0.0429, "step": 279660 }, { "epoch": 2.9880869704578235, "grad_norm": 11.241999626159668, "learning_rate": 9.063922078613086e-07, "loss": 0.0232, "step": 279670 }, { "epoch": 2.9881938137721034, "grad_norm": 0.08798125386238098, "learning_rate": 9.063824200463757e-07, "loss": 0.0128, "step": 279680 }, { "epoch": 2.988300657086383, "grad_norm": 0.007773762568831444, "learning_rate": 9.063726317726063e-07, "loss": 0.0246, "step": 279690 }, { "epoch": 2.9884075004006623, "grad_norm": 0.972990870475769, "learning_rate": 9.063628430400116e-07, "loss": 0.0096, "step": 279700 }, { "epoch": 2.988514343714942, "grad_norm": 4.242774486541748, "learning_rate": 9.063530538486025e-07, "loss": 0.0119, "step": 279710 }, { "epoch": 2.9886211870292216, "grad_norm": 6.447386741638184, "learning_rate": 9.063432641983901e-07, "loss": 0.0247, "step": 279720 }, { "epoch": 2.988728030343501, "grad_norm": 0.04024576395750046, "learning_rate": 9.063334740893857e-07, "loss": 0.0031, "step": 279730 }, { "epoch": 2.988834873657781, "grad_norm": 0.011946821585297585, "learning_rate": 9.063236835216e-07, "loss": 0.0018, "step": 279740 }, { "epoch": 2.9889417169720605, "grad_norm": 0.39973118901252747, "learning_rate": 9.063138924950441e-07, "loss": 0.0254, "step": 279750 }, { "epoch": 2.98904856028634, "grad_norm": 0.007518724072724581, "learning_rate": 9.063041010097292e-07, "loss": 0.0131, "step": 279760 }, { "epoch": 2.98915540360062, "grad_norm": 0.9344631433486938, "learning_rate": 9.062943090656663e-07, "loss": 0.0026, "step": 279770 }, { "epoch": 2.9892622469148993, "grad_norm": 0.004595933016389608, "learning_rate": 9.062845166628666e-07, "loss": 0.0109, "step": 279780 }, { "epoch": 2.9893690902291787, "grad_norm": 3.0175721645355225, "learning_rate": 9.062747238013408e-07, "loss": 0.0087, "step": 279790 }, { "epoch": 2.9894759335434586, "grad_norm": 1.3346664905548096, "learning_rate": 9.062649304811003e-07, "loss": 0.0903, "step": 279800 }, { "epoch": 2.989582776857738, "grad_norm": 0.021459950134158134, "learning_rate": 9.06255136702156e-07, "loss": 0.0209, "step": 279810 }, { "epoch": 2.989689620172018, "grad_norm": 0.0022677259985357523, "learning_rate": 9.062453424645192e-07, "loss": 0.0137, "step": 279820 }, { "epoch": 2.9897964634862975, "grad_norm": 0.05865113064646721, "learning_rate": 9.062355477682004e-07, "loss": 0.0252, "step": 279830 }, { "epoch": 2.989903306800577, "grad_norm": 0.006883314810693264, "learning_rate": 9.062257526132112e-07, "loss": 0.0209, "step": 279840 }, { "epoch": 2.9900101501148564, "grad_norm": 2.2654385566711426, "learning_rate": 9.062159569995625e-07, "loss": 0.0174, "step": 279850 }, { "epoch": 2.9901169934291363, "grad_norm": 5.845516681671143, "learning_rate": 9.062061609272652e-07, "loss": 0.0163, "step": 279860 }, { "epoch": 2.9902238367434157, "grad_norm": 8.2831392288208, "learning_rate": 9.061963643963306e-07, "loss": 0.0248, "step": 279870 }, { "epoch": 2.9903306800576956, "grad_norm": 2.758810520172119, "learning_rate": 9.061865674067696e-07, "loss": 0.0074, "step": 279880 }, { "epoch": 2.990437523371975, "grad_norm": 4.069849491119385, "learning_rate": 9.061767699585932e-07, "loss": 0.0532, "step": 279890 }, { "epoch": 2.9905443666862546, "grad_norm": 3.9332756996154785, "learning_rate": 9.061669720518128e-07, "loss": 0.0557, "step": 279900 }, { "epoch": 2.990651210000534, "grad_norm": 10.534653663635254, "learning_rate": 9.061571736864391e-07, "loss": 0.0385, "step": 279910 }, { "epoch": 2.990758053314814, "grad_norm": 4.600903511047363, "learning_rate": 9.061473748624833e-07, "loss": 0.0029, "step": 279920 }, { "epoch": 2.9908648966290934, "grad_norm": 3.9548802375793457, "learning_rate": 9.061375755799564e-07, "loss": 0.0346, "step": 279930 }, { "epoch": 2.9909717399433733, "grad_norm": 0.00686303386464715, "learning_rate": 9.061277758388696e-07, "loss": 0.0076, "step": 279940 }, { "epoch": 2.9910785832576527, "grad_norm": 1.1162570714950562, "learning_rate": 9.061179756392338e-07, "loss": 0.0185, "step": 279950 }, { "epoch": 2.991185426571932, "grad_norm": 3.171703338623047, "learning_rate": 9.061081749810602e-07, "loss": 0.0079, "step": 279960 }, { "epoch": 2.9912922698862117, "grad_norm": 0.0831613838672638, "learning_rate": 9.0609837386436e-07, "loss": 0.0361, "step": 279970 }, { "epoch": 2.9913991132004916, "grad_norm": 0.9297705292701721, "learning_rate": 9.060885722891438e-07, "loss": 0.0069, "step": 279980 }, { "epoch": 2.991505956514771, "grad_norm": 0.2923714816570282, "learning_rate": 9.060787702554232e-07, "loss": 0.0155, "step": 279990 }, { "epoch": 2.991612799829051, "grad_norm": 0.06300383061170578, "learning_rate": 9.060689677632088e-07, "loss": 0.0197, "step": 280000 }, { "epoch": 2.9917196431433304, "grad_norm": 3.677781820297241, "learning_rate": 9.06059164812512e-07, "loss": 0.0212, "step": 280010 }, { "epoch": 2.99182648645761, "grad_norm": 1.1936665773391724, "learning_rate": 9.060493614033438e-07, "loss": 0.0068, "step": 280020 }, { "epoch": 2.9919333297718893, "grad_norm": 5.409682273864746, "learning_rate": 9.060395575357151e-07, "loss": 0.0542, "step": 280030 }, { "epoch": 2.992040173086169, "grad_norm": 0.08951244503259659, "learning_rate": 9.060297532096373e-07, "loss": 0.0079, "step": 280040 }, { "epoch": 2.9921470164004487, "grad_norm": 0.03511780872941017, "learning_rate": 9.060199484251211e-07, "loss": 0.0344, "step": 280050 }, { "epoch": 2.9922538597147286, "grad_norm": 0.5819935202598572, "learning_rate": 9.060101431821779e-07, "loss": 0.0332, "step": 280060 }, { "epoch": 2.992360703029008, "grad_norm": 0.030592070892453194, "learning_rate": 9.060003374808184e-07, "loss": 0.0313, "step": 280070 }, { "epoch": 2.9924675463432875, "grad_norm": 1.4581938982009888, "learning_rate": 9.059905313210538e-07, "loss": 0.0293, "step": 280080 }, { "epoch": 2.992574389657567, "grad_norm": 0.009861896745860577, "learning_rate": 9.059807247028954e-07, "loss": 0.0079, "step": 280090 }, { "epoch": 2.992681232971847, "grad_norm": 8.425775527954102, "learning_rate": 9.059709176263542e-07, "loss": 0.0061, "step": 280100 }, { "epoch": 2.9927880762861263, "grad_norm": 5.318071365356445, "learning_rate": 9.059611100914412e-07, "loss": 0.0094, "step": 280110 }, { "epoch": 2.992894919600406, "grad_norm": 4.827929496765137, "learning_rate": 9.059513020981672e-07, "loss": 0.0133, "step": 280120 }, { "epoch": 2.9930017629146857, "grad_norm": 6.732865333557129, "learning_rate": 9.059414936465439e-07, "loss": 0.0324, "step": 280130 }, { "epoch": 2.993108606228965, "grad_norm": 0.0168343223631382, "learning_rate": 9.059316847365819e-07, "loss": 0.0021, "step": 280140 }, { "epoch": 2.9932154495432446, "grad_norm": 0.08453420549631119, "learning_rate": 9.059218753682923e-07, "loss": 0.0167, "step": 280150 }, { "epoch": 2.9933222928575245, "grad_norm": 0.06132259592413902, "learning_rate": 9.059120655416863e-07, "loss": 0.0158, "step": 280160 }, { "epoch": 2.993429136171804, "grad_norm": 0.00428013876080513, "learning_rate": 9.059022552567749e-07, "loss": 0.0172, "step": 280170 }, { "epoch": 2.993535979486084, "grad_norm": 0.03368993103504181, "learning_rate": 9.058924445135692e-07, "loss": 0.0041, "step": 280180 }, { "epoch": 2.9936428228003633, "grad_norm": 0.025187727063894272, "learning_rate": 9.058826333120804e-07, "loss": 0.0078, "step": 280190 }, { "epoch": 2.9937496661146428, "grad_norm": 14.912322044372559, "learning_rate": 9.058728216523195e-07, "loss": 0.0484, "step": 280200 }, { "epoch": 2.9938565094289222, "grad_norm": 0.29217085242271423, "learning_rate": 9.058630095342974e-07, "loss": 0.0165, "step": 280210 }, { "epoch": 2.993963352743202, "grad_norm": 1.2920925617218018, "learning_rate": 9.058531969580254e-07, "loss": 0.0305, "step": 280220 }, { "epoch": 2.9940701960574816, "grad_norm": 0.46567654609680176, "learning_rate": 9.058433839235145e-07, "loss": 0.0046, "step": 280230 }, { "epoch": 2.9941770393717615, "grad_norm": 0.19041897356510162, "learning_rate": 9.058335704307757e-07, "loss": 0.0452, "step": 280240 }, { "epoch": 2.994283882686041, "grad_norm": 0.04166441410779953, "learning_rate": 9.058237564798204e-07, "loss": 0.0306, "step": 280250 }, { "epoch": 2.9943907260003204, "grad_norm": 0.01359672099351883, "learning_rate": 9.058139420706594e-07, "loss": 0.0035, "step": 280260 }, { "epoch": 2.9944975693146, "grad_norm": 0.18481948971748352, "learning_rate": 9.058041272033037e-07, "loss": 0.0054, "step": 280270 }, { "epoch": 2.9946044126288798, "grad_norm": 0.0038432274013757706, "learning_rate": 9.057943118777647e-07, "loss": 0.0378, "step": 280280 }, { "epoch": 2.9947112559431592, "grad_norm": 0.36989089846611023, "learning_rate": 9.05784496094053e-07, "loss": 0.0084, "step": 280290 }, { "epoch": 2.994818099257439, "grad_norm": 0.041896432638168335, "learning_rate": 9.057746798521803e-07, "loss": 0.0114, "step": 280300 }, { "epoch": 2.9949249425717186, "grad_norm": 0.028254257515072823, "learning_rate": 9.057648631521571e-07, "loss": 0.0085, "step": 280310 }, { "epoch": 2.995031785885998, "grad_norm": 5.665797710418701, "learning_rate": 9.057550459939949e-07, "loss": 0.02, "step": 280320 }, { "epoch": 2.995138629200278, "grad_norm": 0.007773092482239008, "learning_rate": 9.057452283777045e-07, "loss": 0.0487, "step": 280330 }, { "epoch": 2.9952454725145574, "grad_norm": 0.8979864120483398, "learning_rate": 9.057354103032973e-07, "loss": 0.0048, "step": 280340 }, { "epoch": 2.995352315828837, "grad_norm": 0.32195812463760376, "learning_rate": 9.05725591770784e-07, "loss": 0.0326, "step": 280350 }, { "epoch": 2.995459159143117, "grad_norm": 2.229708194732666, "learning_rate": 9.057157727801761e-07, "loss": 0.0567, "step": 280360 }, { "epoch": 2.9955660024573962, "grad_norm": 5.549741744995117, "learning_rate": 9.057059533314844e-07, "loss": 0.0152, "step": 280370 }, { "epoch": 2.9956728457716757, "grad_norm": 0.25215500593185425, "learning_rate": 9.056961334247199e-07, "loss": 0.0006, "step": 280380 }, { "epoch": 2.9957796890859556, "grad_norm": 0.7322531342506409, "learning_rate": 9.05686313059894e-07, "loss": 0.0073, "step": 280390 }, { "epoch": 2.995886532400235, "grad_norm": 3.290523052215576, "learning_rate": 9.056764922370176e-07, "loss": 0.0213, "step": 280400 }, { "epoch": 2.9959933757145145, "grad_norm": 0.26445314288139343, "learning_rate": 9.056666709561019e-07, "loss": 0.0031, "step": 280410 }, { "epoch": 2.9961002190287944, "grad_norm": 1.6132988929748535, "learning_rate": 9.056568492171577e-07, "loss": 0.03, "step": 280420 }, { "epoch": 2.996207062343074, "grad_norm": 0.0680522471666336, "learning_rate": 9.056470270201964e-07, "loss": 0.0896, "step": 280430 }, { "epoch": 2.9963139056573533, "grad_norm": 0.6036205291748047, "learning_rate": 9.056372043652291e-07, "loss": 0.006, "step": 280440 }, { "epoch": 2.9964207489716332, "grad_norm": 1.5895601511001587, "learning_rate": 9.056273812522667e-07, "loss": 0.0644, "step": 280450 }, { "epoch": 2.9965275922859127, "grad_norm": 2.4889326095581055, "learning_rate": 9.056175576813203e-07, "loss": 0.0037, "step": 280460 }, { "epoch": 2.996634435600192, "grad_norm": 2.9282283782958984, "learning_rate": 9.056077336524011e-07, "loss": 0.0202, "step": 280470 }, { "epoch": 2.996741278914472, "grad_norm": 0.1286046802997589, "learning_rate": 9.055979091655203e-07, "loss": 0.031, "step": 280480 }, { "epoch": 2.9968481222287515, "grad_norm": 0.2778920531272888, "learning_rate": 9.055880842206886e-07, "loss": 0.0003, "step": 280490 }, { "epoch": 2.996954965543031, "grad_norm": 0.00630049267783761, "learning_rate": 9.055782588179176e-07, "loss": 0.0296, "step": 280500 }, { "epoch": 2.997061808857311, "grad_norm": 7.432650089263916, "learning_rate": 9.05568432957218e-07, "loss": 0.027, "step": 280510 }, { "epoch": 2.9971686521715903, "grad_norm": 2.699702739715576, "learning_rate": 9.05558606638601e-07, "loss": 0.0225, "step": 280520 }, { "epoch": 2.9972754954858702, "grad_norm": 0.013324524275958538, "learning_rate": 9.055487798620778e-07, "loss": 0.0099, "step": 280530 }, { "epoch": 2.9973823388001497, "grad_norm": 1.8076677322387695, "learning_rate": 9.055389526276595e-07, "loss": 0.0133, "step": 280540 }, { "epoch": 2.997489182114429, "grad_norm": 3.3534576892852783, "learning_rate": 9.055291249353569e-07, "loss": 0.0216, "step": 280550 }, { "epoch": 2.9975960254287086, "grad_norm": 0.07875841110944748, "learning_rate": 9.055192967851815e-07, "loss": 0.004, "step": 280560 }, { "epoch": 2.9977028687429885, "grad_norm": 2.9617671966552734, "learning_rate": 9.055094681771441e-07, "loss": 0.0269, "step": 280570 }, { "epoch": 2.997809712057268, "grad_norm": 0.10113312304019928, "learning_rate": 9.054996391112559e-07, "loss": 0.0133, "step": 280580 }, { "epoch": 2.997916555371548, "grad_norm": 0.39191749691963196, "learning_rate": 9.05489809587528e-07, "loss": 0.0082, "step": 280590 }, { "epoch": 2.9980233986858273, "grad_norm": 1.072638750076294, "learning_rate": 9.054799796059716e-07, "loss": 0.0298, "step": 280600 }, { "epoch": 2.998130242000107, "grad_norm": 0.2443642020225525, "learning_rate": 9.054701491665977e-07, "loss": 0.0162, "step": 280610 }, { "epoch": 2.9982370853143863, "grad_norm": 1.5601791143417358, "learning_rate": 9.054603182694173e-07, "loss": 0.01, "step": 280620 }, { "epoch": 2.998343928628666, "grad_norm": 3.422903299331665, "learning_rate": 9.054504869144416e-07, "loss": 0.028, "step": 280630 }, { "epoch": 2.9984507719429456, "grad_norm": 0.08050748705863953, "learning_rate": 9.054406551016818e-07, "loss": 0.0267, "step": 280640 }, { "epoch": 2.9985576152572255, "grad_norm": 0.0869431346654892, "learning_rate": 9.054308228311487e-07, "loss": 0.0082, "step": 280650 }, { "epoch": 2.998664458571505, "grad_norm": 0.004454249981790781, "learning_rate": 9.054209901028538e-07, "loss": 0.0072, "step": 280660 }, { "epoch": 2.9987713018857844, "grad_norm": 38.04841613769531, "learning_rate": 9.054111569168079e-07, "loss": 0.1262, "step": 280670 }, { "epoch": 2.998878145200064, "grad_norm": 2.587938070297241, "learning_rate": 9.054013232730222e-07, "loss": 0.004, "step": 280680 }, { "epoch": 2.998984988514344, "grad_norm": 3.878908634185791, "learning_rate": 9.05391489171508e-07, "loss": 0.0083, "step": 280690 }, { "epoch": 2.9990918318286233, "grad_norm": 0.19892723858356476, "learning_rate": 9.053816546122761e-07, "loss": 0.0344, "step": 280700 }, { "epoch": 2.999198675142903, "grad_norm": 0.03135424107313156, "learning_rate": 9.053718195953376e-07, "loss": 0.0113, "step": 280710 }, { "epoch": 2.9993055184571826, "grad_norm": 6.692549228668213, "learning_rate": 9.053619841207039e-07, "loss": 0.0094, "step": 280720 }, { "epoch": 2.999412361771462, "grad_norm": 1.1105057001113892, "learning_rate": 9.053521481883857e-07, "loss": 0.0167, "step": 280730 }, { "epoch": 2.9995192050857415, "grad_norm": 0.8300196528434753, "learning_rate": 9.053423117983945e-07, "loss": 0.0344, "step": 280740 }, { "epoch": 2.9996260484000214, "grad_norm": 0.013495228253304958, "learning_rate": 9.053324749507411e-07, "loss": 0.0167, "step": 280750 }, { "epoch": 2.999732891714301, "grad_norm": 1.8839147090911865, "learning_rate": 9.053226376454369e-07, "loss": 0.0245, "step": 280760 }, { "epoch": 2.999839735028581, "grad_norm": 0.004439129494130611, "learning_rate": 9.053127998824928e-07, "loss": 0.022, "step": 280770 }, { "epoch": 2.9999465783428603, "grad_norm": 4.419670104980469, "learning_rate": 9.0530296166192e-07, "loss": 0.0153, "step": 280780 }, { "epoch": 3.0, "eval_accuracy": 0.7505272672491714, "eval_cer": 0.03758920595655289, "eval_loss": 0.03024059347808361, "eval_runtime": 17455.2838, "eval_samples_per_second": 0.57, "eval_steps_per_second": 0.285, "eval_wer": 0.09742641345227365, "step": 280785 }, { "epoch": 3.0000534216571397, "grad_norm": 0.01892361417412758, "learning_rate": 9.052931229837295e-07, "loss": 0.0134, "step": 280790 }, { "epoch": 3.0001602649714196, "grad_norm": 1.0940908193588257, "learning_rate": 9.052832838479325e-07, "loss": 0.0178, "step": 280800 }, { "epoch": 3.000267108285699, "grad_norm": 0.02846485935151577, "learning_rate": 9.052734442545401e-07, "loss": 0.002, "step": 280810 }, { "epoch": 3.0003739515999786, "grad_norm": 1.434548258781433, "learning_rate": 9.052636042035633e-07, "loss": 0.0031, "step": 280820 }, { "epoch": 3.0004807949142585, "grad_norm": 4.3753838539123535, "learning_rate": 9.052537636950135e-07, "loss": 0.0156, "step": 280830 }, { "epoch": 3.000587638228538, "grad_norm": 4.289593696594238, "learning_rate": 9.052439227289015e-07, "loss": 0.0213, "step": 280840 }, { "epoch": 3.0006944815428174, "grad_norm": 0.03361397236585617, "learning_rate": 9.052340813052384e-07, "loss": 0.0053, "step": 280850 }, { "epoch": 3.0008013248570973, "grad_norm": 0.072966068983078, "learning_rate": 9.052242394240356e-07, "loss": 0.0125, "step": 280860 }, { "epoch": 3.0009081681713767, "grad_norm": 0.16071383655071259, "learning_rate": 9.05214397085304e-07, "loss": 0.0056, "step": 280870 }, { "epoch": 3.001015011485656, "grad_norm": 0.7331271767616272, "learning_rate": 9.052045542890547e-07, "loss": 0.0106, "step": 280880 }, { "epoch": 3.001121854799936, "grad_norm": 0.01932944543659687, "learning_rate": 9.05194711035299e-07, "loss": 0.0247, "step": 280890 }, { "epoch": 3.0012286981142156, "grad_norm": 3.2867484092712402, "learning_rate": 9.051848673240479e-07, "loss": 0.0174, "step": 280900 }, { "epoch": 3.001335541428495, "grad_norm": 0.05067328363656998, "learning_rate": 9.051750231553123e-07, "loss": 0.0034, "step": 280910 }, { "epoch": 3.001442384742775, "grad_norm": 0.7287457585334778, "learning_rate": 9.051651785291037e-07, "loss": 0.0102, "step": 280920 }, { "epoch": 3.0015492280570544, "grad_norm": 4.005728244781494, "learning_rate": 9.05155333445433e-07, "loss": 0.0167, "step": 280930 }, { "epoch": 3.001656071371334, "grad_norm": 1.9665412902832031, "learning_rate": 9.051454879043114e-07, "loss": 0.0042, "step": 280940 }, { "epoch": 3.0017629146856137, "grad_norm": 1.7852572202682495, "learning_rate": 9.051356419057498e-07, "loss": 0.015, "step": 280950 }, { "epoch": 3.001869757999893, "grad_norm": 1.0153439044952393, "learning_rate": 9.051257954497598e-07, "loss": 0.0053, "step": 280960 }, { "epoch": 3.0019766013141727, "grad_norm": 1.5592776536941528, "learning_rate": 9.05115948536352e-07, "loss": 0.0124, "step": 280970 }, { "epoch": 3.0020834446284526, "grad_norm": 0.5097546577453613, "learning_rate": 9.051061011655377e-07, "loss": 0.0075, "step": 280980 }, { "epoch": 3.002190287942732, "grad_norm": 0.9605875015258789, "learning_rate": 9.050962533373279e-07, "loss": 0.0029, "step": 280990 }, { "epoch": 3.0022971312570115, "grad_norm": 0.039134565740823746, "learning_rate": 9.050864050517341e-07, "loss": 0.008, "step": 281000 }, { "epoch": 3.0024039745712914, "grad_norm": 0.004178895615041256, "learning_rate": 9.05076556308767e-07, "loss": 0.0587, "step": 281010 }, { "epoch": 3.002510817885571, "grad_norm": 0.16132132709026337, "learning_rate": 9.050667071084381e-07, "loss": 0.0091, "step": 281020 }, { "epoch": 3.0026176611998503, "grad_norm": 2.7758522033691406, "learning_rate": 9.050568574507581e-07, "loss": 0.0079, "step": 281030 }, { "epoch": 3.00272450451413, "grad_norm": 2.031524896621704, "learning_rate": 9.050470073357385e-07, "loss": 0.0146, "step": 281040 }, { "epoch": 3.0028313478284097, "grad_norm": 0.008258320391178131, "learning_rate": 9.0503715676339e-07, "loss": 0.0048, "step": 281050 }, { "epoch": 3.002938191142689, "grad_norm": 0.7040026783943176, "learning_rate": 9.050273057337244e-07, "loss": 0.0014, "step": 281060 }, { "epoch": 3.003045034456969, "grad_norm": 4.319912433624268, "learning_rate": 9.050174542467521e-07, "loss": 0.0085, "step": 281070 }, { "epoch": 3.0031518777712485, "grad_norm": 0.11892691254615784, "learning_rate": 9.050076023024845e-07, "loss": 0.0042, "step": 281080 }, { "epoch": 3.003258721085528, "grad_norm": 0.0023421747609972954, "learning_rate": 9.049977499009329e-07, "loss": 0.0483, "step": 281090 }, { "epoch": 3.003365564399808, "grad_norm": 0.010048966854810715, "learning_rate": 9.049878970421082e-07, "loss": 0.0064, "step": 281100 }, { "epoch": 3.0034724077140873, "grad_norm": 0.003653669962659478, "learning_rate": 9.049780437260217e-07, "loss": 0.0078, "step": 281110 }, { "epoch": 3.0035792510283668, "grad_norm": 0.06041260436177254, "learning_rate": 9.049681899526842e-07, "loss": 0.0032, "step": 281120 }, { "epoch": 3.0036860943426467, "grad_norm": 0.014594941399991512, "learning_rate": 9.049583357221073e-07, "loss": 0.0021, "step": 281130 }, { "epoch": 3.003792937656926, "grad_norm": 0.0023307253140956163, "learning_rate": 9.049484810343018e-07, "loss": 0.0215, "step": 281140 }, { "epoch": 3.0038997809712056, "grad_norm": 0.3854399621486664, "learning_rate": 9.049386258892789e-07, "loss": 0.0309, "step": 281150 }, { "epoch": 3.0040066242854855, "grad_norm": 0.009958175010979176, "learning_rate": 9.049287702870496e-07, "loss": 0.0049, "step": 281160 }, { "epoch": 3.004113467599765, "grad_norm": 1.437942385673523, "learning_rate": 9.049189142276252e-07, "loss": 0.033, "step": 281170 }, { "epoch": 3.0042203109140444, "grad_norm": 0.21835479140281677, "learning_rate": 9.049090577110169e-07, "loss": 0.0116, "step": 281180 }, { "epoch": 3.0043271542283243, "grad_norm": 8.987920761108398, "learning_rate": 9.048992007372355e-07, "loss": 0.0112, "step": 281190 }, { "epoch": 3.0044339975426038, "grad_norm": 1.8197060823440552, "learning_rate": 9.048893433062927e-07, "loss": 0.0079, "step": 281200 }, { "epoch": 3.004540840856883, "grad_norm": 0.012672916986048222, "learning_rate": 9.04879485418199e-07, "loss": 0.0237, "step": 281210 }, { "epoch": 3.004647684171163, "grad_norm": 1.7372664213180542, "learning_rate": 9.048696270729659e-07, "loss": 0.0041, "step": 281220 }, { "epoch": 3.0047545274854426, "grad_norm": 0.2817116379737854, "learning_rate": 9.048597682706044e-07, "loss": 0.0067, "step": 281230 }, { "epoch": 3.004861370799722, "grad_norm": 0.004763118922710419, "learning_rate": 9.048499090111257e-07, "loss": 0.0048, "step": 281240 }, { "epoch": 3.004968214114002, "grad_norm": 0.3276385962963104, "learning_rate": 9.048400492945408e-07, "loss": 0.0313, "step": 281250 }, { "epoch": 3.0050750574282814, "grad_norm": 2.191354513168335, "learning_rate": 9.048301891208609e-07, "loss": 0.005, "step": 281260 }, { "epoch": 3.005181900742561, "grad_norm": 0.3237674832344055, "learning_rate": 9.048203284900974e-07, "loss": 0.0082, "step": 281270 }, { "epoch": 3.0052887440568408, "grad_norm": 4.773035526275635, "learning_rate": 9.048104674022611e-07, "loss": 0.0229, "step": 281280 }, { "epoch": 3.0053955873711202, "grad_norm": 0.0034761843271553516, "learning_rate": 9.048006058573631e-07, "loss": 0.0163, "step": 281290 }, { "epoch": 3.0055024306853997, "grad_norm": 0.0833156406879425, "learning_rate": 9.047907438554147e-07, "loss": 0.0139, "step": 281300 }, { "epoch": 3.0056092739996796, "grad_norm": 0.030167914927005768, "learning_rate": 9.047808813964272e-07, "loss": 0.0053, "step": 281310 }, { "epoch": 3.005716117313959, "grad_norm": 9.73298454284668, "learning_rate": 9.047710184804114e-07, "loss": 0.016, "step": 281320 }, { "epoch": 3.0058229606282385, "grad_norm": 10.84473705291748, "learning_rate": 9.047611551073786e-07, "loss": 0.0064, "step": 281330 }, { "epoch": 3.0059298039425184, "grad_norm": 3.714226245880127, "learning_rate": 9.047512912773399e-07, "loss": 0.012, "step": 281340 }, { "epoch": 3.006036647256798, "grad_norm": 0.014407298527657986, "learning_rate": 9.047414269903063e-07, "loss": 0.0062, "step": 281350 }, { "epoch": 3.0061434905710773, "grad_norm": 0.0257791206240654, "learning_rate": 9.047315622462893e-07, "loss": 0.011, "step": 281360 }, { "epoch": 3.0062503338853572, "grad_norm": 0.25161322951316833, "learning_rate": 9.047216970452996e-07, "loss": 0.0212, "step": 281370 }, { "epoch": 3.0063571771996367, "grad_norm": 0.07671412825584412, "learning_rate": 9.047118313873487e-07, "loss": 0.0075, "step": 281380 }, { "epoch": 3.006464020513916, "grad_norm": 0.020627470687031746, "learning_rate": 9.047019652724474e-07, "loss": 0.0382, "step": 281390 }, { "epoch": 3.006570863828196, "grad_norm": 0.004207510035485029, "learning_rate": 9.046920987006072e-07, "loss": 0.0154, "step": 281400 }, { "epoch": 3.0066777071424755, "grad_norm": 0.09526628255844116, "learning_rate": 9.046822316718392e-07, "loss": 0.0088, "step": 281410 }, { "epoch": 3.006784550456755, "grad_norm": 7.631413459777832, "learning_rate": 9.046723641861542e-07, "loss": 0.0259, "step": 281420 }, { "epoch": 3.006891393771035, "grad_norm": 2.6857821941375732, "learning_rate": 9.046624962435638e-07, "loss": 0.0281, "step": 281430 }, { "epoch": 3.0069982370853143, "grad_norm": 2.1953248977661133, "learning_rate": 9.046526278440787e-07, "loss": 0.0129, "step": 281440 }, { "epoch": 3.007105080399594, "grad_norm": 0.010833998210728168, "learning_rate": 9.046427589877104e-07, "loss": 0.0038, "step": 281450 }, { "epoch": 3.0072119237138737, "grad_norm": 0.03072296269237995, "learning_rate": 9.046328896744697e-07, "loss": 0.0258, "step": 281460 }, { "epoch": 3.007318767028153, "grad_norm": 0.04548598825931549, "learning_rate": 9.04623019904368e-07, "loss": 0.008, "step": 281470 }, { "epoch": 3.0074256103424326, "grad_norm": 0.007816596888005733, "learning_rate": 9.046131496774164e-07, "loss": 0.026, "step": 281480 }, { "epoch": 3.0075324536567125, "grad_norm": 0.25106096267700195, "learning_rate": 9.04603278993626e-07, "loss": 0.0114, "step": 281490 }, { "epoch": 3.007639296970992, "grad_norm": 11.626754760742188, "learning_rate": 9.04593407853008e-07, "loss": 0.0997, "step": 281500 }, { "epoch": 3.0077461402852714, "grad_norm": 5.701542854309082, "learning_rate": 9.045835362555733e-07, "loss": 0.0069, "step": 281510 }, { "epoch": 3.0078529835995513, "grad_norm": 2.3657028675079346, "learning_rate": 9.045736642013335e-07, "loss": 0.019, "step": 281520 }, { "epoch": 3.007959826913831, "grad_norm": 0.005108025390654802, "learning_rate": 9.045637916902995e-07, "loss": 0.0026, "step": 281530 }, { "epoch": 3.0080666702281107, "grad_norm": 0.002371289301663637, "learning_rate": 9.045539187224824e-07, "loss": 0.0101, "step": 281540 }, { "epoch": 3.00817351354239, "grad_norm": 0.19360822439193726, "learning_rate": 9.045440452978933e-07, "loss": 0.0211, "step": 281550 }, { "epoch": 3.0082803568566696, "grad_norm": 0.011597366072237492, "learning_rate": 9.045341714165435e-07, "loss": 0.0407, "step": 281560 }, { "epoch": 3.0083872001709495, "grad_norm": 0.2332189530134201, "learning_rate": 9.045242970784442e-07, "loss": 0.0046, "step": 281570 }, { "epoch": 3.008494043485229, "grad_norm": 1.96413254737854, "learning_rate": 9.045144222836062e-07, "loss": 0.0279, "step": 281580 }, { "epoch": 3.0086008867995084, "grad_norm": 0.009138290770351887, "learning_rate": 9.045045470320411e-07, "loss": 0.0113, "step": 281590 }, { "epoch": 3.0087077301137883, "grad_norm": 5.130658149719238, "learning_rate": 9.044946713237596e-07, "loss": 0.0245, "step": 281600 }, { "epoch": 3.008814573428068, "grad_norm": 0.011966109275817871, "learning_rate": 9.044847951587732e-07, "loss": 0.0138, "step": 281610 }, { "epoch": 3.0089214167423473, "grad_norm": 0.042460039258003235, "learning_rate": 9.044749185370932e-07, "loss": 0.004, "step": 281620 }, { "epoch": 3.009028260056627, "grad_norm": 3.6345651149749756, "learning_rate": 9.044650414587303e-07, "loss": 0.0164, "step": 281630 }, { "epoch": 3.0091351033709066, "grad_norm": 0.013270656578242779, "learning_rate": 9.044551639236957e-07, "loss": 0.0308, "step": 281640 }, { "epoch": 3.009241946685186, "grad_norm": 0.47792327404022217, "learning_rate": 9.044452859320008e-07, "loss": 0.0132, "step": 281650 }, { "epoch": 3.009348789999466, "grad_norm": 0.5366161465644836, "learning_rate": 9.044354074836566e-07, "loss": 0.0195, "step": 281660 }, { "epoch": 3.0094556333137454, "grad_norm": 0.004842331632971764, "learning_rate": 9.044255285786744e-07, "loss": 0.0275, "step": 281670 }, { "epoch": 3.009562476628025, "grad_norm": 0.021457882598042488, "learning_rate": 9.044156492170651e-07, "loss": 0.0022, "step": 281680 }, { "epoch": 3.009669319942305, "grad_norm": 1.4193826913833618, "learning_rate": 9.044057693988403e-07, "loss": 0.0084, "step": 281690 }, { "epoch": 3.0097761632565843, "grad_norm": 0.003498716978356242, "learning_rate": 9.043958891240107e-07, "loss": 0.0252, "step": 281700 }, { "epoch": 3.0098830065708637, "grad_norm": 0.04015824571251869, "learning_rate": 9.043860083925875e-07, "loss": 0.0016, "step": 281710 }, { "epoch": 3.0099898498851436, "grad_norm": 0.10184461623430252, "learning_rate": 9.04376127204582e-07, "loss": 0.0069, "step": 281720 }, { "epoch": 3.010096693199423, "grad_norm": 0.44490933418273926, "learning_rate": 9.043662455600054e-07, "loss": 0.0175, "step": 281730 }, { "epoch": 3.0102035365137025, "grad_norm": 0.1465674489736557, "learning_rate": 9.04356363458869e-07, "loss": 0.017, "step": 281740 }, { "epoch": 3.0103103798279824, "grad_norm": 0.36013904213905334, "learning_rate": 9.043464809011834e-07, "loss": 0.0129, "step": 281750 }, { "epoch": 3.010417223142262, "grad_norm": 0.058771658688783646, "learning_rate": 9.043365978869602e-07, "loss": 0.0044, "step": 281760 }, { "epoch": 3.0105240664565414, "grad_norm": 0.022522324696183205, "learning_rate": 9.043267144162106e-07, "loss": 0.0044, "step": 281770 }, { "epoch": 3.0106309097708213, "grad_norm": 2.2040417194366455, "learning_rate": 9.043168304889455e-07, "loss": 0.0062, "step": 281780 }, { "epoch": 3.0107377530851007, "grad_norm": 0.25420135259628296, "learning_rate": 9.043069461051763e-07, "loss": 0.0137, "step": 281790 }, { "epoch": 3.01084459639938, "grad_norm": 0.09218699485063553, "learning_rate": 9.042970612649139e-07, "loss": 0.012, "step": 281800 }, { "epoch": 3.01095143971366, "grad_norm": 0.42425546050071716, "learning_rate": 9.042871759681696e-07, "loss": 0.0145, "step": 281810 }, { "epoch": 3.0110582830279395, "grad_norm": 4.111822128295898, "learning_rate": 9.042772902149547e-07, "loss": 0.0576, "step": 281820 }, { "epoch": 3.011165126342219, "grad_norm": 1.824288249015808, "learning_rate": 9.042674040052802e-07, "loss": 0.0402, "step": 281830 }, { "epoch": 3.011271969656499, "grad_norm": 2.4629735946655273, "learning_rate": 9.042575173391572e-07, "loss": 0.017, "step": 281840 }, { "epoch": 3.0113788129707784, "grad_norm": 0.22073447704315186, "learning_rate": 9.042476302165971e-07, "loss": 0.0089, "step": 281850 }, { "epoch": 3.011485656285058, "grad_norm": 0.024528231471776962, "learning_rate": 9.042377426376108e-07, "loss": 0.0025, "step": 281860 }, { "epoch": 3.0115924995993377, "grad_norm": 0.9464972019195557, "learning_rate": 9.042278546022095e-07, "loss": 0.0225, "step": 281870 }, { "epoch": 3.011699342913617, "grad_norm": 0.2632816433906555, "learning_rate": 9.042179661104046e-07, "loss": 0.001, "step": 281880 }, { "epoch": 3.0118061862278966, "grad_norm": 2.4989588260650635, "learning_rate": 9.042080771622071e-07, "loss": 0.0091, "step": 281890 }, { "epoch": 3.0119130295421765, "grad_norm": 1.4437421560287476, "learning_rate": 9.041981877576281e-07, "loss": 0.0092, "step": 281900 }, { "epoch": 3.012019872856456, "grad_norm": 0.003156710881739855, "learning_rate": 9.041882978966788e-07, "loss": 0.0199, "step": 281910 }, { "epoch": 3.0121267161707355, "grad_norm": 0.5922430157661438, "learning_rate": 9.041784075793705e-07, "loss": 0.0295, "step": 281920 }, { "epoch": 3.0122335594850154, "grad_norm": 0.032730937004089355, "learning_rate": 9.041685168057142e-07, "loss": 0.0224, "step": 281930 }, { "epoch": 3.012340402799295, "grad_norm": 0.024818003177642822, "learning_rate": 9.041586255757212e-07, "loss": 0.0125, "step": 281940 }, { "epoch": 3.0124472461135743, "grad_norm": 0.1551956832408905, "learning_rate": 9.041487338894026e-07, "loss": 0.0049, "step": 281950 }, { "epoch": 3.012554089427854, "grad_norm": 0.08626765757799149, "learning_rate": 9.041388417467696e-07, "loss": 0.0065, "step": 281960 }, { "epoch": 3.0126609327421336, "grad_norm": 0.03976147621870041, "learning_rate": 9.041289491478333e-07, "loss": 0.0215, "step": 281970 }, { "epoch": 3.012767776056413, "grad_norm": 8.483651161193848, "learning_rate": 9.04119056092605e-07, "loss": 0.0174, "step": 281980 }, { "epoch": 3.012874619370693, "grad_norm": 0.025542685762047768, "learning_rate": 9.041091625810957e-07, "loss": 0.0049, "step": 281990 }, { "epoch": 3.0129814626849725, "grad_norm": 0.02512527070939541, "learning_rate": 9.040992686133166e-07, "loss": 0.0069, "step": 282000 }, { "epoch": 3.013088305999252, "grad_norm": 0.004970494657754898, "learning_rate": 9.04089374189279e-07, "loss": 0.0159, "step": 282010 }, { "epoch": 3.013195149313532, "grad_norm": 3.41817307472229, "learning_rate": 9.040794793089941e-07, "loss": 0.0294, "step": 282020 }, { "epoch": 3.0133019926278113, "grad_norm": 0.31879493594169617, "learning_rate": 9.040695839724727e-07, "loss": 0.0299, "step": 282030 }, { "epoch": 3.0134088359420907, "grad_norm": 2.64449405670166, "learning_rate": 9.040596881797265e-07, "loss": 0.0016, "step": 282040 }, { "epoch": 3.0135156792563706, "grad_norm": 3.804189443588257, "learning_rate": 9.040497919307663e-07, "loss": 0.0053, "step": 282050 }, { "epoch": 3.01362252257065, "grad_norm": 0.005264141131192446, "learning_rate": 9.040398952256034e-07, "loss": 0.0243, "step": 282060 }, { "epoch": 3.0137293658849296, "grad_norm": 0.04219600930809975, "learning_rate": 9.040299980642491e-07, "loss": 0.0036, "step": 282070 }, { "epoch": 3.0138362091992095, "grad_norm": 0.09398376941680908, "learning_rate": 9.040201004467143e-07, "loss": 0.0137, "step": 282080 }, { "epoch": 3.013943052513489, "grad_norm": 0.06641270965337753, "learning_rate": 9.040102023730103e-07, "loss": 0.0154, "step": 282090 }, { "epoch": 3.0140498958277684, "grad_norm": 0.002951889531686902, "learning_rate": 9.040003038431483e-07, "loss": 0.0066, "step": 282100 }, { "epoch": 3.0141567391420483, "grad_norm": 1.4870573282241821, "learning_rate": 9.039904048571395e-07, "loss": 0.0283, "step": 282110 }, { "epoch": 3.0142635824563278, "grad_norm": 1.9872747659683228, "learning_rate": 9.039805054149951e-07, "loss": 0.0017, "step": 282120 }, { "epoch": 3.014370425770607, "grad_norm": 2.608152151107788, "learning_rate": 9.039706055167261e-07, "loss": 0.0174, "step": 282130 }, { "epoch": 3.014477269084887, "grad_norm": 0.033370595425367355, "learning_rate": 9.039607051623439e-07, "loss": 0.0041, "step": 282140 }, { "epoch": 3.0145841123991666, "grad_norm": 2.2390689849853516, "learning_rate": 9.039508043518596e-07, "loss": 0.0021, "step": 282150 }, { "epoch": 3.014690955713446, "grad_norm": 0.018020527437329292, "learning_rate": 9.039409030852841e-07, "loss": 0.029, "step": 282160 }, { "epoch": 3.014797799027726, "grad_norm": 0.0167448278516531, "learning_rate": 9.039310013626292e-07, "loss": 0.0223, "step": 282170 }, { "epoch": 3.0149046423420054, "grad_norm": 0.006848437245935202, "learning_rate": 9.039210991839054e-07, "loss": 0.0004, "step": 282180 }, { "epoch": 3.015011485656285, "grad_norm": 1.8706510066986084, "learning_rate": 9.039111965491243e-07, "loss": 0.0146, "step": 282190 }, { "epoch": 3.0151183289705648, "grad_norm": 0.15656623244285583, "learning_rate": 9.03901293458297e-07, "loss": 0.0063, "step": 282200 }, { "epoch": 3.015225172284844, "grad_norm": 0.06987379491329193, "learning_rate": 9.038913899114347e-07, "loss": 0.0333, "step": 282210 }, { "epoch": 3.0153320155991237, "grad_norm": 1.4039709568023682, "learning_rate": 9.038814859085484e-07, "loss": 0.0091, "step": 282220 }, { "epoch": 3.0154388589134036, "grad_norm": 0.5078092813491821, "learning_rate": 9.038715814496496e-07, "loss": 0.0418, "step": 282230 }, { "epoch": 3.015545702227683, "grad_norm": 0.0024204275105148554, "learning_rate": 9.038616765347491e-07, "loss": 0.0086, "step": 282240 }, { "epoch": 3.0156525455419625, "grad_norm": 1.3013142347335815, "learning_rate": 9.038517711638584e-07, "loss": 0.031, "step": 282250 }, { "epoch": 3.0157593888562424, "grad_norm": 1.9225274324417114, "learning_rate": 9.038418653369886e-07, "loss": 0.0038, "step": 282260 }, { "epoch": 3.015866232170522, "grad_norm": 5.066603183746338, "learning_rate": 9.038319590541508e-07, "loss": 0.039, "step": 282270 }, { "epoch": 3.0159730754848018, "grad_norm": 2.97127628326416, "learning_rate": 9.038220523153564e-07, "loss": 0.0009, "step": 282280 }, { "epoch": 3.016079918799081, "grad_norm": 0.030417123809456825, "learning_rate": 9.038121451206162e-07, "loss": 0.0055, "step": 282290 }, { "epoch": 3.0161867621133607, "grad_norm": 0.0023068361915647984, "learning_rate": 9.038022374699417e-07, "loss": 0.0058, "step": 282300 }, { "epoch": 3.0162936054276406, "grad_norm": 0.0017082897247746587, "learning_rate": 9.03792329363344e-07, "loss": 0.0119, "step": 282310 }, { "epoch": 3.01640044874192, "grad_norm": 0.11532402783632278, "learning_rate": 9.037824208008342e-07, "loss": 0.0644, "step": 282320 }, { "epoch": 3.0165072920561995, "grad_norm": 0.9802526831626892, "learning_rate": 9.037725117824238e-07, "loss": 0.0068, "step": 282330 }, { "epoch": 3.0166141353704794, "grad_norm": 4.74855375289917, "learning_rate": 9.037626023081235e-07, "loss": 0.0022, "step": 282340 }, { "epoch": 3.016720978684759, "grad_norm": 0.057455919682979584, "learning_rate": 9.037526923779448e-07, "loss": 0.0042, "step": 282350 }, { "epoch": 3.0168278219990383, "grad_norm": 0.6073872447013855, "learning_rate": 9.03742781991899e-07, "loss": 0.0267, "step": 282360 }, { "epoch": 3.016934665313318, "grad_norm": 0.029213419184088707, "learning_rate": 9.03732871149997e-07, "loss": 0.0089, "step": 282370 }, { "epoch": 3.0170415086275977, "grad_norm": 0.021809495985507965, "learning_rate": 9.037229598522502e-07, "loss": 0.0198, "step": 282380 }, { "epoch": 3.017148351941877, "grad_norm": 0.41657698154449463, "learning_rate": 9.037130480986696e-07, "loss": 0.0045, "step": 282390 }, { "epoch": 3.017255195256157, "grad_norm": 7.45395565032959, "learning_rate": 9.037031358892666e-07, "loss": 0.0179, "step": 282400 }, { "epoch": 3.0173620385704365, "grad_norm": 1.7392499446868896, "learning_rate": 9.036932232240522e-07, "loss": 0.0236, "step": 282410 }, { "epoch": 3.017468881884716, "grad_norm": 3.901343822479248, "learning_rate": 9.036833101030377e-07, "loss": 0.0295, "step": 282420 }, { "epoch": 3.017575725198996, "grad_norm": 18.049070358276367, "learning_rate": 9.036733965262343e-07, "loss": 0.0301, "step": 282430 }, { "epoch": 3.0176825685132753, "grad_norm": 0.04961355775594711, "learning_rate": 9.036634824936532e-07, "loss": 0.0442, "step": 282440 }, { "epoch": 3.017789411827555, "grad_norm": 8.685062408447266, "learning_rate": 9.036535680053055e-07, "loss": 0.0177, "step": 282450 }, { "epoch": 3.0178962551418347, "grad_norm": 1.019801378250122, "learning_rate": 9.036436530612024e-07, "loss": 0.0606, "step": 282460 }, { "epoch": 3.018003098456114, "grad_norm": 0.3833109140396118, "learning_rate": 9.036337376613554e-07, "loss": 0.0004, "step": 282470 }, { "epoch": 3.0181099417703936, "grad_norm": 0.018836064264178276, "learning_rate": 9.036238218057753e-07, "loss": 0.003, "step": 282480 }, { "epoch": 3.0182167850846735, "grad_norm": 0.0028851099777966738, "learning_rate": 9.036139054944733e-07, "loss": 0.0656, "step": 282490 }, { "epoch": 3.018323628398953, "grad_norm": 1.316776156425476, "learning_rate": 9.036039887274609e-07, "loss": 0.0022, "step": 282500 }, { "epoch": 3.0184304717132324, "grad_norm": 0.03890436887741089, "learning_rate": 9.035940715047492e-07, "loss": 0.0173, "step": 282510 }, { "epoch": 3.0185373150275123, "grad_norm": 5.338818550109863, "learning_rate": 9.035841538263491e-07, "loss": 0.0051, "step": 282520 }, { "epoch": 3.018644158341792, "grad_norm": 0.04732402041554451, "learning_rate": 9.035742356922722e-07, "loss": 0.0036, "step": 282530 }, { "epoch": 3.0187510016560712, "grad_norm": 14.710929870605469, "learning_rate": 9.035643171025296e-07, "loss": 0.0151, "step": 282540 }, { "epoch": 3.018857844970351, "grad_norm": 0.7121937274932861, "learning_rate": 9.035543980571322e-07, "loss": 0.0012, "step": 282550 }, { "epoch": 3.0189646882846306, "grad_norm": 0.00740481074899435, "learning_rate": 9.035444785560916e-07, "loss": 0.0534, "step": 282560 }, { "epoch": 3.01907153159891, "grad_norm": 1.5134276151657104, "learning_rate": 9.035345585994188e-07, "loss": 0.0117, "step": 282570 }, { "epoch": 3.01917837491319, "grad_norm": 3.6485061645507812, "learning_rate": 9.03524638187125e-07, "loss": 0.0175, "step": 282580 }, { "epoch": 3.0192852182274694, "grad_norm": 10.564924240112305, "learning_rate": 9.035147173192214e-07, "loss": 0.0184, "step": 282590 }, { "epoch": 3.019392061541749, "grad_norm": 0.008015470579266548, "learning_rate": 9.035047959957192e-07, "loss": 0.0063, "step": 282600 }, { "epoch": 3.019498904856029, "grad_norm": 2.318472146987915, "learning_rate": 9.034948742166297e-07, "loss": 0.0259, "step": 282610 }, { "epoch": 3.0196057481703082, "grad_norm": 0.17598824203014374, "learning_rate": 9.034849519819641e-07, "loss": 0.0088, "step": 282620 }, { "epoch": 3.0197125914845877, "grad_norm": 0.14093369245529175, "learning_rate": 9.034750292917335e-07, "loss": 0.0149, "step": 282630 }, { "epoch": 3.0198194347988676, "grad_norm": 0.2767926752567291, "learning_rate": 9.03465106145949e-07, "loss": 0.02, "step": 282640 }, { "epoch": 3.019926278113147, "grad_norm": 4.71560525894165, "learning_rate": 9.034551825446221e-07, "loss": 0.0125, "step": 282650 }, { "epoch": 3.0200331214274265, "grad_norm": 0.0015142073389142752, "learning_rate": 9.034452584877638e-07, "loss": 0.0146, "step": 282660 }, { "epoch": 3.0201399647417064, "grad_norm": 0.5743493437767029, "learning_rate": 9.034353339753853e-07, "loss": 0.0065, "step": 282670 }, { "epoch": 3.020246808055986, "grad_norm": 2.658141851425171, "learning_rate": 9.03425409007498e-07, "loss": 0.0145, "step": 282680 }, { "epoch": 3.0203536513702653, "grad_norm": 0.6567137241363525, "learning_rate": 9.034154835841128e-07, "loss": 0.0147, "step": 282690 }, { "epoch": 3.0204604946845452, "grad_norm": 3.076399326324463, "learning_rate": 9.034055577052412e-07, "loss": 0.0058, "step": 282700 }, { "epoch": 3.0205673379988247, "grad_norm": 1.657131314277649, "learning_rate": 9.033956313708942e-07, "loss": 0.0072, "step": 282710 }, { "epoch": 3.020674181313104, "grad_norm": 7.811465740203857, "learning_rate": 9.03385704581083e-07, "loss": 0.0422, "step": 282720 }, { "epoch": 3.020781024627384, "grad_norm": 0.0018271071603521705, "learning_rate": 9.03375777335819e-07, "loss": 0.0077, "step": 282730 }, { "epoch": 3.0208878679416635, "grad_norm": 0.0075262850150465965, "learning_rate": 9.033658496351135e-07, "loss": 0.0175, "step": 282740 }, { "epoch": 3.020994711255943, "grad_norm": 4.347872734069824, "learning_rate": 9.033559214789772e-07, "loss": 0.0253, "step": 282750 }, { "epoch": 3.021101554570223, "grad_norm": 0.08938982337713242, "learning_rate": 9.033459928674218e-07, "loss": 0.0073, "step": 282760 }, { "epoch": 3.0212083978845024, "grad_norm": 0.0034442092292010784, "learning_rate": 9.033360638004584e-07, "loss": 0.0055, "step": 282770 }, { "epoch": 3.021315241198782, "grad_norm": 0.16346609592437744, "learning_rate": 9.033261342780979e-07, "loss": 0.0014, "step": 282780 }, { "epoch": 3.0214220845130617, "grad_norm": 5.468307018280029, "learning_rate": 9.03316204300352e-07, "loss": 0.0101, "step": 282790 }, { "epoch": 3.021528927827341, "grad_norm": 0.04281293973326683, "learning_rate": 9.033062738672315e-07, "loss": 0.0118, "step": 282800 }, { "epoch": 3.0216357711416206, "grad_norm": 0.1923932284116745, "learning_rate": 9.032963429787477e-07, "loss": 0.0081, "step": 282810 }, { "epoch": 3.0217426144559005, "grad_norm": 0.6857026815414429, "learning_rate": 9.032864116349119e-07, "loss": 0.0208, "step": 282820 }, { "epoch": 3.02184945777018, "grad_norm": 0.01734752580523491, "learning_rate": 9.032764798357356e-07, "loss": 0.0009, "step": 282830 }, { "epoch": 3.0219563010844595, "grad_norm": 1.4973433017730713, "learning_rate": 9.032665475812294e-07, "loss": 0.0118, "step": 282840 }, { "epoch": 3.0220631443987394, "grad_norm": 0.020690204575657845, "learning_rate": 9.032566148714049e-07, "loss": 0.0063, "step": 282850 }, { "epoch": 3.022169987713019, "grad_norm": 0.4664931297302246, "learning_rate": 9.032466817062734e-07, "loss": 0.0041, "step": 282860 }, { "epoch": 3.0222768310272983, "grad_norm": 0.5986654162406921, "learning_rate": 9.032367480858458e-07, "loss": 0.001, "step": 282870 }, { "epoch": 3.022383674341578, "grad_norm": 0.0017139684641733766, "learning_rate": 9.032268140101335e-07, "loss": 0.0023, "step": 282880 }, { "epoch": 3.0224905176558576, "grad_norm": 0.0653252974152565, "learning_rate": 9.032168794791479e-07, "loss": 0.0063, "step": 282890 }, { "epoch": 3.022597360970137, "grad_norm": 4.549435615539551, "learning_rate": 9.032069444928998e-07, "loss": 0.0232, "step": 282900 }, { "epoch": 3.022704204284417, "grad_norm": 0.26861289143562317, "learning_rate": 9.031970090514008e-07, "loss": 0.0228, "step": 282910 }, { "epoch": 3.0228110475986965, "grad_norm": 1.3098033666610718, "learning_rate": 9.031870731546618e-07, "loss": 0.0046, "step": 282920 }, { "epoch": 3.022917890912976, "grad_norm": 1.317244291305542, "learning_rate": 9.031771368026941e-07, "loss": 0.0243, "step": 282930 }, { "epoch": 3.023024734227256, "grad_norm": 0.9052402973175049, "learning_rate": 9.03167199995509e-07, "loss": 0.0012, "step": 282940 }, { "epoch": 3.0231315775415353, "grad_norm": 0.007581938523799181, "learning_rate": 9.031572627331179e-07, "loss": 0.0058, "step": 282950 }, { "epoch": 3.0232384208558147, "grad_norm": 0.06966890394687653, "learning_rate": 9.031473250155317e-07, "loss": 0.0118, "step": 282960 }, { "epoch": 3.0233452641700946, "grad_norm": 2.9463272094726562, "learning_rate": 9.031373868427616e-07, "loss": 0.008, "step": 282970 }, { "epoch": 3.023452107484374, "grad_norm": 0.1573205590248108, "learning_rate": 9.031274482148192e-07, "loss": 0.0065, "step": 282980 }, { "epoch": 3.0235589507986536, "grad_norm": 1.6691524982452393, "learning_rate": 9.031175091317154e-07, "loss": 0.0013, "step": 282990 }, { "epoch": 3.0236657941129335, "grad_norm": 0.07238197326660156, "learning_rate": 9.031075695934615e-07, "loss": 0.0147, "step": 283000 }, { "epoch": 3.023772637427213, "grad_norm": 4.6256794929504395, "learning_rate": 9.030976296000687e-07, "loss": 0.013, "step": 283010 }, { "epoch": 3.023879480741493, "grad_norm": 0.046496231108903885, "learning_rate": 9.030876891515483e-07, "loss": 0.0068, "step": 283020 }, { "epoch": 3.0239863240557723, "grad_norm": 0.04613519087433815, "learning_rate": 9.030777482479115e-07, "loss": 0.0208, "step": 283030 }, { "epoch": 3.0240931673700517, "grad_norm": 3.94875168800354, "learning_rate": 9.030678068891695e-07, "loss": 0.0141, "step": 283040 }, { "epoch": 3.0242000106843316, "grad_norm": 0.8972745537757874, "learning_rate": 9.030578650753334e-07, "loss": 0.0072, "step": 283050 }, { "epoch": 3.024306853998611, "grad_norm": 0.3622606098651886, "learning_rate": 9.030479228064147e-07, "loss": 0.0026, "step": 283060 }, { "epoch": 3.0244136973128906, "grad_norm": 0.0035471064038574696, "learning_rate": 9.030379800824243e-07, "loss": 0.0003, "step": 283070 }, { "epoch": 3.0245205406271705, "grad_norm": 0.3952771723270416, "learning_rate": 9.030280369033738e-07, "loss": 0.0086, "step": 283080 }, { "epoch": 3.02462738394145, "grad_norm": 1.0852599143981934, "learning_rate": 9.030180932692742e-07, "loss": 0.0087, "step": 283090 }, { "epoch": 3.0247342272557294, "grad_norm": 0.011357782408595085, "learning_rate": 9.030081491801367e-07, "loss": 0.0071, "step": 283100 }, { "epoch": 3.0248410705700093, "grad_norm": 0.02321632206439972, "learning_rate": 9.029982046359726e-07, "loss": 0.0061, "step": 283110 }, { "epoch": 3.0249479138842887, "grad_norm": 0.0009614788577891886, "learning_rate": 9.029882596367931e-07, "loss": 0.0046, "step": 283120 }, { "epoch": 3.025054757198568, "grad_norm": 0.015435240231454372, "learning_rate": 9.029783141826093e-07, "loss": 0.0251, "step": 283130 }, { "epoch": 3.025161600512848, "grad_norm": 0.5394574403762817, "learning_rate": 9.029683682734327e-07, "loss": 0.0163, "step": 283140 }, { "epoch": 3.0252684438271276, "grad_norm": 0.036110952496528625, "learning_rate": 9.029584219092744e-07, "loss": 0.0152, "step": 283150 }, { "epoch": 3.025375287141407, "grad_norm": 0.0020206535700708628, "learning_rate": 9.029484750901456e-07, "loss": 0.0246, "step": 283160 }, { "epoch": 3.025482130455687, "grad_norm": 0.009290345013141632, "learning_rate": 9.029385278160577e-07, "loss": 0.0185, "step": 283170 }, { "epoch": 3.0255889737699664, "grad_norm": 0.11318786442279816, "learning_rate": 9.029285800870217e-07, "loss": 0.0056, "step": 283180 }, { "epoch": 3.025695817084246, "grad_norm": 0.0687485858798027, "learning_rate": 9.029186319030488e-07, "loss": 0.0111, "step": 283190 }, { "epoch": 3.0258026603985257, "grad_norm": 0.13688205182552338, "learning_rate": 9.029086832641504e-07, "loss": 0.004, "step": 283200 }, { "epoch": 3.025909503712805, "grad_norm": 4.627528190612793, "learning_rate": 9.028987341703378e-07, "loss": 0.0128, "step": 283210 }, { "epoch": 3.0260163470270847, "grad_norm": 0.4701802730560303, "learning_rate": 9.028887846216221e-07, "loss": 0.0163, "step": 283220 }, { "epoch": 3.0261231903413646, "grad_norm": 2.0537126064300537, "learning_rate": 9.028788346180145e-07, "loss": 0.0122, "step": 283230 }, { "epoch": 3.026230033655644, "grad_norm": 5.170984268188477, "learning_rate": 9.028688841595263e-07, "loss": 0.018, "step": 283240 }, { "epoch": 3.0263368769699235, "grad_norm": 0.0015687869163230062, "learning_rate": 9.028589332461687e-07, "loss": 0.0067, "step": 283250 }, { "epoch": 3.0264437202842034, "grad_norm": 1.9715869426727295, "learning_rate": 9.02848981877953e-07, "loss": 0.0148, "step": 283260 }, { "epoch": 3.026550563598483, "grad_norm": 0.0009266304550692439, "learning_rate": 9.028390300548905e-07, "loss": 0.0028, "step": 283270 }, { "epoch": 3.0266574069127623, "grad_norm": 5.834992408752441, "learning_rate": 9.028290777769921e-07, "loss": 0.0071, "step": 283280 }, { "epoch": 3.026764250227042, "grad_norm": 5.762646198272705, "learning_rate": 9.028191250442694e-07, "loss": 0.0171, "step": 283290 }, { "epoch": 3.0268710935413217, "grad_norm": 0.0038259609136730433, "learning_rate": 9.028091718567335e-07, "loss": 0.0005, "step": 283300 }, { "epoch": 3.026977936855601, "grad_norm": 0.10898549109697342, "learning_rate": 9.027992182143957e-07, "loss": 0.0101, "step": 283310 }, { "epoch": 3.027084780169881, "grad_norm": 0.01565239205956459, "learning_rate": 9.02789264117267e-07, "loss": 0.0217, "step": 283320 }, { "epoch": 3.0271916234841605, "grad_norm": 0.1780504286289215, "learning_rate": 9.02779309565359e-07, "loss": 0.0049, "step": 283330 }, { "epoch": 3.02729846679844, "grad_norm": 0.13107486069202423, "learning_rate": 9.027693545586828e-07, "loss": 0.0109, "step": 283340 }, { "epoch": 3.02740531011272, "grad_norm": 0.001203926862217486, "learning_rate": 9.027593990972494e-07, "loss": 0.0118, "step": 283350 }, { "epoch": 3.0275121534269993, "grad_norm": 0.0689578726887703, "learning_rate": 9.027494431810703e-07, "loss": 0.0059, "step": 283360 }, { "epoch": 3.0276189967412788, "grad_norm": 1.2481034994125366, "learning_rate": 9.027394868101568e-07, "loss": 0.0118, "step": 283370 }, { "epoch": 3.0277258400555587, "grad_norm": 0.0014017015928402543, "learning_rate": 9.027295299845199e-07, "loss": 0.0031, "step": 283380 }, { "epoch": 3.027832683369838, "grad_norm": 0.005912642925977707, "learning_rate": 9.02719572704171e-07, "loss": 0.0264, "step": 283390 }, { "epoch": 3.0279395266841176, "grad_norm": 0.021724920719861984, "learning_rate": 9.027096149691213e-07, "loss": 0.022, "step": 283400 }, { "epoch": 3.0280463699983975, "grad_norm": 2.3633463382720947, "learning_rate": 9.026996567793821e-07, "loss": 0.0042, "step": 283410 }, { "epoch": 3.028153213312677, "grad_norm": 0.006715310737490654, "learning_rate": 9.026896981349645e-07, "loss": 0.0023, "step": 283420 }, { "epoch": 3.0282600566269564, "grad_norm": 4.783882141113281, "learning_rate": 9.0267973903588e-07, "loss": 0.04, "step": 283430 }, { "epoch": 3.0283668999412363, "grad_norm": 0.00574773782864213, "learning_rate": 9.026697794821396e-07, "loss": 0.014, "step": 283440 }, { "epoch": 3.0284737432555158, "grad_norm": 0.31631430983543396, "learning_rate": 9.026598194737546e-07, "loss": 0.0098, "step": 283450 }, { "epoch": 3.0285805865697952, "grad_norm": 5.587789058685303, "learning_rate": 9.026498590107363e-07, "loss": 0.0121, "step": 283460 }, { "epoch": 3.028687429884075, "grad_norm": 0.23910915851593018, "learning_rate": 9.026398980930958e-07, "loss": 0.0035, "step": 283470 }, { "epoch": 3.0287942731983546, "grad_norm": 3.892258405685425, "learning_rate": 9.026299367208446e-07, "loss": 0.0265, "step": 283480 }, { "epoch": 3.028901116512634, "grad_norm": 5.8716888427734375, "learning_rate": 9.026199748939938e-07, "loss": 0.0071, "step": 283490 }, { "epoch": 3.029007959826914, "grad_norm": 0.057880550622940063, "learning_rate": 9.026100126125547e-07, "loss": 0.0096, "step": 283500 }, { "epoch": 3.0291148031411934, "grad_norm": 0.02773292362689972, "learning_rate": 9.026000498765385e-07, "loss": 0.0022, "step": 283510 }, { "epoch": 3.029221646455473, "grad_norm": 0.028956305235624313, "learning_rate": 9.025900866859564e-07, "loss": 0.0155, "step": 283520 }, { "epoch": 3.0293284897697528, "grad_norm": 0.4024941623210907, "learning_rate": 9.025801230408196e-07, "loss": 0.0305, "step": 283530 }, { "epoch": 3.0294353330840322, "grad_norm": 0.0015661531360819936, "learning_rate": 9.025701589411396e-07, "loss": 0.0085, "step": 283540 }, { "epoch": 3.0295421763983117, "grad_norm": 1.269514799118042, "learning_rate": 9.025601943869276e-07, "loss": 0.0142, "step": 283550 }, { "epoch": 3.0296490197125916, "grad_norm": 3.046292781829834, "learning_rate": 9.025502293781946e-07, "loss": 0.0426, "step": 283560 }, { "epoch": 3.029755863026871, "grad_norm": 0.3001405894756317, "learning_rate": 9.025402639149521e-07, "loss": 0.0246, "step": 283570 }, { "epoch": 3.0298627063411505, "grad_norm": 4.087014675140381, "learning_rate": 9.025302979972112e-07, "loss": 0.0105, "step": 283580 }, { "epoch": 3.0299695496554304, "grad_norm": 3.316544532775879, "learning_rate": 9.025203316249831e-07, "loss": 0.0202, "step": 283590 }, { "epoch": 3.03007639296971, "grad_norm": 0.004918423015624285, "learning_rate": 9.025103647982794e-07, "loss": 0.0079, "step": 283600 }, { "epoch": 3.0301832362839893, "grad_norm": 5.485698699951172, "learning_rate": 9.025003975171111e-07, "loss": 0.0037, "step": 283610 }, { "epoch": 3.0302900795982692, "grad_norm": 1.9257011413574219, "learning_rate": 9.024904297814893e-07, "loss": 0.0283, "step": 283620 }, { "epoch": 3.0303969229125487, "grad_norm": 0.10202497988939285, "learning_rate": 9.024804615914254e-07, "loss": 0.0083, "step": 283630 }, { "epoch": 3.030503766226828, "grad_norm": 0.005414809565991163, "learning_rate": 9.024704929469309e-07, "loss": 0.0175, "step": 283640 }, { "epoch": 3.030610609541108, "grad_norm": 0.01797172985970974, "learning_rate": 9.024605238480168e-07, "loss": 0.0023, "step": 283650 }, { "epoch": 3.0307174528553875, "grad_norm": 0.02205348201096058, "learning_rate": 9.024505542946943e-07, "loss": 0.0069, "step": 283660 }, { "epoch": 3.030824296169667, "grad_norm": 0.4101298749446869, "learning_rate": 9.024405842869746e-07, "loss": 0.0075, "step": 283670 }, { "epoch": 3.030931139483947, "grad_norm": 0.8506816029548645, "learning_rate": 9.024306138248694e-07, "loss": 0.0159, "step": 283680 }, { "epoch": 3.0310379827982263, "grad_norm": 0.12158533930778503, "learning_rate": 9.024206429083896e-07, "loss": 0.0066, "step": 283690 }, { "epoch": 3.031144826112506, "grad_norm": 1.3437446355819702, "learning_rate": 9.024106715375464e-07, "loss": 0.0485, "step": 283700 }, { "epoch": 3.0312516694267857, "grad_norm": 0.025531379505991936, "learning_rate": 9.024006997123512e-07, "loss": 0.0169, "step": 283710 }, { "epoch": 3.031358512741065, "grad_norm": 0.6563299894332886, "learning_rate": 9.023907274328153e-07, "loss": 0.0101, "step": 283720 }, { "epoch": 3.031465356055345, "grad_norm": 2.8695192337036133, "learning_rate": 9.023807546989499e-07, "loss": 0.0365, "step": 283730 }, { "epoch": 3.0315721993696245, "grad_norm": 0.015880340710282326, "learning_rate": 9.023707815107663e-07, "loss": 0.004, "step": 283740 }, { "epoch": 3.031679042683904, "grad_norm": 0.03849449008703232, "learning_rate": 9.023608078682756e-07, "loss": 0.0093, "step": 283750 }, { "epoch": 3.0317858859981834, "grad_norm": 0.018389558419585228, "learning_rate": 9.023508337714893e-07, "loss": 0.0085, "step": 283760 }, { "epoch": 3.0318927293124633, "grad_norm": 2.1771488189697266, "learning_rate": 9.023408592204184e-07, "loss": 0.0062, "step": 283770 }, { "epoch": 3.031999572626743, "grad_norm": 0.00508448202162981, "learning_rate": 9.023308842150745e-07, "loss": 0.0122, "step": 283780 }, { "epoch": 3.0321064159410227, "grad_norm": 0.599176287651062, "learning_rate": 9.023209087554686e-07, "loss": 0.013, "step": 283790 }, { "epoch": 3.032213259255302, "grad_norm": 0.00891541875898838, "learning_rate": 9.023109328416118e-07, "loss": 0.026, "step": 283800 }, { "epoch": 3.0323201025695816, "grad_norm": 1.4905250072479248, "learning_rate": 9.02300956473516e-07, "loss": 0.0034, "step": 283810 }, { "epoch": 3.0324269458838615, "grad_norm": 0.016459589824080467, "learning_rate": 9.022909796511917e-07, "loss": 0.019, "step": 283820 }, { "epoch": 3.032533789198141, "grad_norm": 4.372298240661621, "learning_rate": 9.022810023746508e-07, "loss": 0.0174, "step": 283830 }, { "epoch": 3.0326406325124204, "grad_norm": 0.22186461091041565, "learning_rate": 9.02271024643904e-07, "loss": 0.0225, "step": 283840 }, { "epoch": 3.0327474758267003, "grad_norm": 4.727686882019043, "learning_rate": 9.022610464589631e-07, "loss": 0.0132, "step": 283850 }, { "epoch": 3.03285431914098, "grad_norm": 3.296778917312622, "learning_rate": 9.022510678198389e-07, "loss": 0.0221, "step": 283860 }, { "epoch": 3.0329611624552593, "grad_norm": 3.907567024230957, "learning_rate": 9.022410887265431e-07, "loss": 0.008, "step": 283870 }, { "epoch": 3.033068005769539, "grad_norm": 0.06679347157478333, "learning_rate": 9.022311091790866e-07, "loss": 0.0084, "step": 283880 }, { "epoch": 3.0331748490838186, "grad_norm": 9.887298583984375, "learning_rate": 9.022211291774809e-07, "loss": 0.0054, "step": 283890 }, { "epoch": 3.033281692398098, "grad_norm": 0.005497615784406662, "learning_rate": 9.022111487217372e-07, "loss": 0.0282, "step": 283900 }, { "epoch": 3.033388535712378, "grad_norm": 0.07596951723098755, "learning_rate": 9.022011678118667e-07, "loss": 0.0028, "step": 283910 }, { "epoch": 3.0334953790266574, "grad_norm": 0.020540626719594002, "learning_rate": 9.021911864478808e-07, "loss": 0.0094, "step": 283920 }, { "epoch": 3.033602222340937, "grad_norm": 2.2976272106170654, "learning_rate": 9.021812046297905e-07, "loss": 0.0336, "step": 283930 }, { "epoch": 3.033709065655217, "grad_norm": 2.0640320777893066, "learning_rate": 9.021712223576075e-07, "loss": 0.0187, "step": 283940 }, { "epoch": 3.0338159089694963, "grad_norm": 0.03007822483778, "learning_rate": 9.021612396313427e-07, "loss": 0.0088, "step": 283950 }, { "epoch": 3.0339227522837757, "grad_norm": 0.2474757730960846, "learning_rate": 9.021512564510077e-07, "loss": 0.002, "step": 283960 }, { "epoch": 3.0340295955980556, "grad_norm": 2.137953519821167, "learning_rate": 9.021412728166134e-07, "loss": 0.0039, "step": 283970 }, { "epoch": 3.034136438912335, "grad_norm": 0.04303497076034546, "learning_rate": 9.021312887281713e-07, "loss": 0.0797, "step": 283980 }, { "epoch": 3.0342432822266145, "grad_norm": 0.02480679750442505, "learning_rate": 9.021213041856925e-07, "loss": 0.0142, "step": 283990 }, { "epoch": 3.0343501255408944, "grad_norm": 0.10984881967306137, "learning_rate": 9.021113191891885e-07, "loss": 0.0263, "step": 284000 }, { "epoch": 3.034456968855174, "grad_norm": 0.12146657705307007, "learning_rate": 9.021013337386705e-07, "loss": 0.0131, "step": 284010 }, { "epoch": 3.0345638121694534, "grad_norm": 8.87179946899414, "learning_rate": 9.020913478341498e-07, "loss": 0.0023, "step": 284020 }, { "epoch": 3.0346706554837333, "grad_norm": 2.7153615951538086, "learning_rate": 9.020813614756375e-07, "loss": 0.0063, "step": 284030 }, { "epoch": 3.0347774987980127, "grad_norm": 0.8964529633522034, "learning_rate": 9.020713746631451e-07, "loss": 0.0063, "step": 284040 }, { "epoch": 3.034884342112292, "grad_norm": 0.08800384402275085, "learning_rate": 9.020613873966837e-07, "loss": 0.0017, "step": 284050 }, { "epoch": 3.034991185426572, "grad_norm": 1.8510408401489258, "learning_rate": 9.020513996762646e-07, "loss": 0.0157, "step": 284060 }, { "epoch": 3.0350980287408516, "grad_norm": 0.004236580338329077, "learning_rate": 9.020414115018993e-07, "loss": 0.0155, "step": 284070 }, { "epoch": 3.035204872055131, "grad_norm": 1.0227311849594116, "learning_rate": 9.020314228735988e-07, "loss": 0.0162, "step": 284080 }, { "epoch": 3.035311715369411, "grad_norm": 2.4684250354766846, "learning_rate": 9.020214337913744e-07, "loss": 0.0381, "step": 284090 }, { "epoch": 3.0354185586836904, "grad_norm": 8.899738311767578, "learning_rate": 9.020114442552376e-07, "loss": 0.0468, "step": 284100 }, { "epoch": 3.03552540199797, "grad_norm": 0.0037544830702245235, "learning_rate": 9.020014542651994e-07, "loss": 0.0048, "step": 284110 }, { "epoch": 3.0356322453122497, "grad_norm": 0.0664132684469223, "learning_rate": 9.019914638212713e-07, "loss": 0.0174, "step": 284120 }, { "epoch": 3.035739088626529, "grad_norm": 4.642306804656982, "learning_rate": 9.019814729234645e-07, "loss": 0.0067, "step": 284130 }, { "epoch": 3.0358459319408087, "grad_norm": 0.14002786576747894, "learning_rate": 9.019714815717902e-07, "loss": 0.0092, "step": 284140 }, { "epoch": 3.0359527752550886, "grad_norm": 0.046695198863744736, "learning_rate": 9.019614897662598e-07, "loss": 0.0198, "step": 284150 }, { "epoch": 3.036059618569368, "grad_norm": 0.033194877207279205, "learning_rate": 9.019514975068847e-07, "loss": 0.0086, "step": 284160 }, { "epoch": 3.0361664618836475, "grad_norm": 0.18652424216270447, "learning_rate": 9.019415047936758e-07, "loss": 0.0101, "step": 284170 }, { "epoch": 3.0362733051979274, "grad_norm": 0.3762880265712738, "learning_rate": 9.019315116266447e-07, "loss": 0.0114, "step": 284180 }, { "epoch": 3.036380148512207, "grad_norm": 0.004568303935229778, "learning_rate": 9.019215180058026e-07, "loss": 0.0218, "step": 284190 }, { "epoch": 3.0364869918264863, "grad_norm": 0.012553388252854347, "learning_rate": 9.019115239311608e-07, "loss": 0.0136, "step": 284200 }, { "epoch": 3.036593835140766, "grad_norm": 0.003499511396512389, "learning_rate": 9.019015294027305e-07, "loss": 0.0012, "step": 284210 }, { "epoch": 3.0367006784550457, "grad_norm": 2.2932379245758057, "learning_rate": 9.01891534420523e-07, "loss": 0.0188, "step": 284220 }, { "epoch": 3.036807521769325, "grad_norm": 1.6887141466140747, "learning_rate": 9.018815389845497e-07, "loss": 0.0193, "step": 284230 }, { "epoch": 3.036914365083605, "grad_norm": 0.02026868611574173, "learning_rate": 9.018715430948218e-07, "loss": 0.0041, "step": 284240 }, { "epoch": 3.0370212083978845, "grad_norm": 0.4612298905849457, "learning_rate": 9.018615467513506e-07, "loss": 0.0215, "step": 284250 }, { "epoch": 3.037128051712164, "grad_norm": 0.002362418919801712, "learning_rate": 9.018515499541473e-07, "loss": 0.0017, "step": 284260 }, { "epoch": 3.037234895026444, "grad_norm": 2.694798707962036, "learning_rate": 9.018415527032233e-07, "loss": 0.0069, "step": 284270 }, { "epoch": 3.0373417383407233, "grad_norm": 0.7905395030975342, "learning_rate": 9.0183155499859e-07, "loss": 0.0027, "step": 284280 }, { "epoch": 3.0374485816550028, "grad_norm": 0.14603137969970703, "learning_rate": 9.018215568402584e-07, "loss": 0.0138, "step": 284290 }, { "epoch": 3.0375554249692827, "grad_norm": 0.04492723196744919, "learning_rate": 9.0181155822824e-07, "loss": 0.0075, "step": 284300 }, { "epoch": 3.037662268283562, "grad_norm": 0.005725169088691473, "learning_rate": 9.018015591625461e-07, "loss": 0.0546, "step": 284310 }, { "epoch": 3.0377691115978416, "grad_norm": 0.1911066323518753, "learning_rate": 9.017915596431878e-07, "loss": 0.0027, "step": 284320 }, { "epoch": 3.0378759549121215, "grad_norm": 0.10885901004076004, "learning_rate": 9.017815596701764e-07, "loss": 0.0303, "step": 284330 }, { "epoch": 3.037982798226401, "grad_norm": 0.33897146582603455, "learning_rate": 9.017715592435235e-07, "loss": 0.0321, "step": 284340 }, { "epoch": 3.0380896415406804, "grad_norm": 0.01454151887446642, "learning_rate": 9.017615583632401e-07, "loss": 0.001, "step": 284350 }, { "epoch": 3.0381964848549603, "grad_norm": 4.031505584716797, "learning_rate": 9.017515570293374e-07, "loss": 0.0055, "step": 284360 }, { "epoch": 3.0383033281692398, "grad_norm": 0.12691444158554077, "learning_rate": 9.017415552418272e-07, "loss": 0.0074, "step": 284370 }, { "epoch": 3.038410171483519, "grad_norm": 0.048848509788513184, "learning_rate": 9.017315530007204e-07, "loss": 0.0085, "step": 284380 }, { "epoch": 3.038517014797799, "grad_norm": 0.002088892040774226, "learning_rate": 9.017215503060281e-07, "loss": 0.0004, "step": 284390 }, { "epoch": 3.0386238581120786, "grad_norm": 0.09394362568855286, "learning_rate": 9.01711547157762e-07, "loss": 0.0023, "step": 284400 }, { "epoch": 3.038730701426358, "grad_norm": 0.039811551570892334, "learning_rate": 9.017015435559334e-07, "loss": 0.0244, "step": 284410 }, { "epoch": 3.038837544740638, "grad_norm": 0.20325960218906403, "learning_rate": 9.016915395005534e-07, "loss": 0.0101, "step": 284420 }, { "epoch": 3.0389443880549174, "grad_norm": 0.06741735339164734, "learning_rate": 9.016815349916331e-07, "loss": 0.0159, "step": 284430 }, { "epoch": 3.039051231369197, "grad_norm": 0.003968468401581049, "learning_rate": 9.016715300291841e-07, "loss": 0.0164, "step": 284440 }, { "epoch": 3.0391580746834768, "grad_norm": 0.20609544217586517, "learning_rate": 9.016615246132177e-07, "loss": 0.0075, "step": 284450 }, { "epoch": 3.039264917997756, "grad_norm": 0.1279819905757904, "learning_rate": 9.016515187437452e-07, "loss": 0.0068, "step": 284460 }, { "epoch": 3.0393717613120357, "grad_norm": 0.006475269794464111, "learning_rate": 9.016415124207777e-07, "loss": 0.0152, "step": 284470 }, { "epoch": 3.0394786046263156, "grad_norm": 0.7442572712898254, "learning_rate": 9.016315056443268e-07, "loss": 0.004, "step": 284480 }, { "epoch": 3.039585447940595, "grad_norm": 8.904276847839355, "learning_rate": 9.016214984144035e-07, "loss": 0.007, "step": 284490 }, { "epoch": 3.039692291254875, "grad_norm": 0.00315952324308455, "learning_rate": 9.01611490731019e-07, "loss": 0.0022, "step": 284500 }, { "epoch": 3.0397991345691544, "grad_norm": 0.05280321091413498, "learning_rate": 9.016014825941851e-07, "loss": 0.0122, "step": 284510 }, { "epoch": 3.039905977883434, "grad_norm": 0.018390517681837082, "learning_rate": 9.015914740039127e-07, "loss": 0.0153, "step": 284520 }, { "epoch": 3.0400128211977138, "grad_norm": 0.9813328385353088, "learning_rate": 9.015814649602134e-07, "loss": 0.0185, "step": 284530 }, { "epoch": 3.0401196645119932, "grad_norm": 0.029556503519415855, "learning_rate": 9.015714554630981e-07, "loss": 0.0047, "step": 284540 }, { "epoch": 3.0402265078262727, "grad_norm": 0.07589881867170334, "learning_rate": 9.015614455125784e-07, "loss": 0.0126, "step": 284550 }, { "epoch": 3.0403333511405526, "grad_norm": 3.615973472595215, "learning_rate": 9.015514351086655e-07, "loss": 0.0168, "step": 284560 }, { "epoch": 3.040440194454832, "grad_norm": 0.021130172535777092, "learning_rate": 9.015414242513707e-07, "loss": 0.0006, "step": 284570 }, { "epoch": 3.0405470377691115, "grad_norm": 1.971722960472107, "learning_rate": 9.015314129407054e-07, "loss": 0.0054, "step": 284580 }, { "epoch": 3.0406538810833914, "grad_norm": 0.00975479744374752, "learning_rate": 9.015214011766808e-07, "loss": 0.0356, "step": 284590 }, { "epoch": 3.040760724397671, "grad_norm": 0.09086334705352783, "learning_rate": 9.015113889593082e-07, "loss": 0.0027, "step": 284600 }, { "epoch": 3.0408675677119503, "grad_norm": 0.0025545640382915735, "learning_rate": 9.015013762885989e-07, "loss": 0.0218, "step": 284610 }, { "epoch": 3.0409744110262302, "grad_norm": 0.0007496527978219092, "learning_rate": 9.014913631645643e-07, "loss": 0.0046, "step": 284620 }, { "epoch": 3.0410812543405097, "grad_norm": 0.01878693513572216, "learning_rate": 9.014813495872156e-07, "loss": 0.009, "step": 284630 }, { "epoch": 3.041188097654789, "grad_norm": 5.456911563873291, "learning_rate": 9.014713355565643e-07, "loss": 0.0109, "step": 284640 }, { "epoch": 3.041294940969069, "grad_norm": 0.004693977069109678, "learning_rate": 9.014613210726214e-07, "loss": 0.0153, "step": 284650 }, { "epoch": 3.0414017842833485, "grad_norm": 3.1371395587921143, "learning_rate": 9.014513061353986e-07, "loss": 0.0238, "step": 284660 }, { "epoch": 3.041508627597628, "grad_norm": 1.123915433883667, "learning_rate": 9.014412907449067e-07, "loss": 0.0163, "step": 284670 }, { "epoch": 3.041615470911908, "grad_norm": 0.021100372076034546, "learning_rate": 9.014312749011574e-07, "loss": 0.0024, "step": 284680 }, { "epoch": 3.0417223142261873, "grad_norm": 0.9774813055992126, "learning_rate": 9.014212586041618e-07, "loss": 0.015, "step": 284690 }, { "epoch": 3.041829157540467, "grad_norm": 0.02971084788441658, "learning_rate": 9.014112418539315e-07, "loss": 0.0064, "step": 284700 }, { "epoch": 3.0419360008547467, "grad_norm": 0.2105483114719391, "learning_rate": 9.014012246504774e-07, "loss": 0.0077, "step": 284710 }, { "epoch": 3.042042844169026, "grad_norm": 0.7461782693862915, "learning_rate": 9.013912069938112e-07, "loss": 0.0166, "step": 284720 }, { "epoch": 3.0421496874833056, "grad_norm": 0.6241260170936584, "learning_rate": 9.01381188883944e-07, "loss": 0.0116, "step": 284730 }, { "epoch": 3.0422565307975855, "grad_norm": 0.7283463478088379, "learning_rate": 9.013711703208871e-07, "loss": 0.0078, "step": 284740 }, { "epoch": 3.042363374111865, "grad_norm": 0.07889768481254578, "learning_rate": 9.013611513046518e-07, "loss": 0.0109, "step": 284750 }, { "epoch": 3.0424702174261444, "grad_norm": 0.015606781467795372, "learning_rate": 9.013511318352496e-07, "loss": 0.0236, "step": 284760 }, { "epoch": 3.0425770607404243, "grad_norm": 0.3904402554035187, "learning_rate": 9.013411119126915e-07, "loss": 0.0184, "step": 284770 }, { "epoch": 3.042683904054704, "grad_norm": 0.03356660157442093, "learning_rate": 9.013310915369892e-07, "loss": 0.0009, "step": 284780 }, { "epoch": 3.0427907473689833, "grad_norm": 0.0023465240374207497, "learning_rate": 9.013210707081537e-07, "loss": 0.0406, "step": 284790 }, { "epoch": 3.042897590683263, "grad_norm": 0.021664194762706757, "learning_rate": 9.013110494261964e-07, "loss": 0.0292, "step": 284800 }, { "epoch": 3.0430044339975426, "grad_norm": 3.302377939224243, "learning_rate": 9.013010276911287e-07, "loss": 0.0078, "step": 284810 }, { "epoch": 3.043111277311822, "grad_norm": 0.9256083369255066, "learning_rate": 9.012910055029617e-07, "loss": 0.0228, "step": 284820 }, { "epoch": 3.043218120626102, "grad_norm": 0.11973349750041962, "learning_rate": 9.01280982861707e-07, "loss": 0.003, "step": 284830 }, { "epoch": 3.0433249639403814, "grad_norm": 0.5285499095916748, "learning_rate": 9.012709597673758e-07, "loss": 0.0095, "step": 284840 }, { "epoch": 3.043431807254661, "grad_norm": 0.012540717609226704, "learning_rate": 9.012609362199795e-07, "loss": 0.0047, "step": 284850 }, { "epoch": 3.043538650568941, "grad_norm": 0.20960938930511475, "learning_rate": 9.012509122195291e-07, "loss": 0.0127, "step": 284860 }, { "epoch": 3.0436454938832203, "grad_norm": 0.003001872915774584, "learning_rate": 9.012408877660362e-07, "loss": 0.0109, "step": 284870 }, { "epoch": 3.0437523371974997, "grad_norm": 0.4721144139766693, "learning_rate": 9.012308628595122e-07, "loss": 0.0687, "step": 284880 }, { "epoch": 3.0438591805117796, "grad_norm": 0.008353786543011665, "learning_rate": 9.012208374999681e-07, "loss": 0.0127, "step": 284890 }, { "epoch": 3.043966023826059, "grad_norm": 8.806726455688477, "learning_rate": 9.012108116874154e-07, "loss": 0.0207, "step": 284900 }, { "epoch": 3.0440728671403385, "grad_norm": 4.632471561431885, "learning_rate": 9.012007854218655e-07, "loss": 0.0162, "step": 284910 }, { "epoch": 3.0441797104546184, "grad_norm": 1.6114568710327148, "learning_rate": 9.011907587033297e-07, "loss": 0.0057, "step": 284920 }, { "epoch": 3.044286553768898, "grad_norm": 0.02287711389362812, "learning_rate": 9.01180731531819e-07, "loss": 0.0014, "step": 284930 }, { "epoch": 3.0443933970831774, "grad_norm": 0.27750203013420105, "learning_rate": 9.011707039073452e-07, "loss": 0.0081, "step": 284940 }, { "epoch": 3.0445002403974573, "grad_norm": 0.9894943237304688, "learning_rate": 9.011606758299193e-07, "loss": 0.0083, "step": 284950 }, { "epoch": 3.0446070837117367, "grad_norm": 0.10920028388500214, "learning_rate": 9.011506472995527e-07, "loss": 0.0055, "step": 284960 }, { "epoch": 3.044713927026016, "grad_norm": 0.681084930896759, "learning_rate": 9.011406183162569e-07, "loss": 0.045, "step": 284970 }, { "epoch": 3.044820770340296, "grad_norm": 25.77818489074707, "learning_rate": 9.011305888800429e-07, "loss": 0.0265, "step": 284980 }, { "epoch": 3.0449276136545755, "grad_norm": 0.004823360592126846, "learning_rate": 9.011205589909222e-07, "loss": 0.0011, "step": 284990 }, { "epoch": 3.045034456968855, "grad_norm": 0.09348815679550171, "learning_rate": 9.011105286489062e-07, "loss": 0.0085, "step": 285000 }, { "epoch": 3.045141300283135, "grad_norm": 0.01936863176524639, "learning_rate": 9.01100497854006e-07, "loss": 0.0079, "step": 285010 }, { "epoch": 3.0452481435974144, "grad_norm": 0.010028081014752388, "learning_rate": 9.010904666062332e-07, "loss": 0.0032, "step": 285020 }, { "epoch": 3.045354986911694, "grad_norm": 3.393805980682373, "learning_rate": 9.010804349055989e-07, "loss": 0.0147, "step": 285030 }, { "epoch": 3.0454618302259737, "grad_norm": 3.8033411502838135, "learning_rate": 9.010704027521147e-07, "loss": 0.0252, "step": 285040 }, { "epoch": 3.045568673540253, "grad_norm": 0.412720650434494, "learning_rate": 9.010603701457915e-07, "loss": 0.0312, "step": 285050 }, { "epoch": 3.0456755168545326, "grad_norm": 3.090193271636963, "learning_rate": 9.010503370866411e-07, "loss": 0.0077, "step": 285060 }, { "epoch": 3.0457823601688125, "grad_norm": 1.342240571975708, "learning_rate": 9.010403035746745e-07, "loss": 0.0176, "step": 285070 }, { "epoch": 3.045889203483092, "grad_norm": 1.8985401391983032, "learning_rate": 9.01030269609903e-07, "loss": 0.0111, "step": 285080 }, { "epoch": 3.0459960467973715, "grad_norm": 4.970050811767578, "learning_rate": 9.010202351923383e-07, "loss": 0.0092, "step": 285090 }, { "epoch": 3.0461028901116514, "grad_norm": 1.0411425828933716, "learning_rate": 9.010102003219913e-07, "loss": 0.0116, "step": 285100 }, { "epoch": 3.046209733425931, "grad_norm": 0.7724099159240723, "learning_rate": 9.010001649988737e-07, "loss": 0.0078, "step": 285110 }, { "epoch": 3.0463165767402103, "grad_norm": 2.46523380279541, "learning_rate": 9.009901292229965e-07, "loss": 0.0039, "step": 285120 }, { "epoch": 3.04642342005449, "grad_norm": 3.970669984817505, "learning_rate": 9.009800929943712e-07, "loss": 0.0158, "step": 285130 }, { "epoch": 3.0465302633687696, "grad_norm": 0.005508200731128454, "learning_rate": 9.009700563130091e-07, "loss": 0.0313, "step": 285140 }, { "epoch": 3.046637106683049, "grad_norm": 9.309269905090332, "learning_rate": 9.009600191789216e-07, "loss": 0.0065, "step": 285150 }, { "epoch": 3.046743949997329, "grad_norm": 0.0066746557131409645, "learning_rate": 9.0094998159212e-07, "loss": 0.0054, "step": 285160 }, { "epoch": 3.0468507933116085, "grad_norm": 3.7893831729888916, "learning_rate": 9.009399435526155e-07, "loss": 0.0322, "step": 285170 }, { "epoch": 3.046957636625888, "grad_norm": 2.255556106567383, "learning_rate": 9.009299050604196e-07, "loss": 0.0132, "step": 285180 }, { "epoch": 3.047064479940168, "grad_norm": 0.003001868724822998, "learning_rate": 9.009198661155435e-07, "loss": 0.0417, "step": 285190 }, { "epoch": 3.0471713232544473, "grad_norm": 0.23139019310474396, "learning_rate": 9.009098267179988e-07, "loss": 0.0046, "step": 285200 }, { "epoch": 3.047278166568727, "grad_norm": 0.028496280312538147, "learning_rate": 9.008997868677966e-07, "loss": 0.0002, "step": 285210 }, { "epoch": 3.0473850098830066, "grad_norm": 1.6521930694580078, "learning_rate": 9.008897465649481e-07, "loss": 0.0062, "step": 285220 }, { "epoch": 3.047491853197286, "grad_norm": 0.014059014618396759, "learning_rate": 9.008797058094649e-07, "loss": 0.0141, "step": 285230 }, { "epoch": 3.0475986965115656, "grad_norm": 0.005546532105654478, "learning_rate": 9.008696646013583e-07, "loss": 0.0066, "step": 285240 }, { "epoch": 3.0477055398258455, "grad_norm": 14.053506851196289, "learning_rate": 9.008596229406397e-07, "loss": 0.0109, "step": 285250 }, { "epoch": 3.047812383140125, "grad_norm": 0.004314870107918978, "learning_rate": 9.008495808273202e-07, "loss": 0.0047, "step": 285260 }, { "epoch": 3.047919226454405, "grad_norm": 1.4720540046691895, "learning_rate": 9.008395382614113e-07, "loss": 0.0287, "step": 285270 }, { "epoch": 3.0480260697686843, "grad_norm": 0.02258892171084881, "learning_rate": 9.008294952429243e-07, "loss": 0.0331, "step": 285280 }, { "epoch": 3.0481329130829637, "grad_norm": 0.4225265383720398, "learning_rate": 9.008194517718706e-07, "loss": 0.0075, "step": 285290 }, { "epoch": 3.0482397563972436, "grad_norm": 0.21475717425346375, "learning_rate": 9.008094078482613e-07, "loss": 0.0497, "step": 285300 }, { "epoch": 3.048346599711523, "grad_norm": 0.007523311302065849, "learning_rate": 9.00799363472108e-07, "loss": 0.0005, "step": 285310 }, { "epoch": 3.0484534430258026, "grad_norm": 2.148153781890869, "learning_rate": 9.007893186434221e-07, "loss": 0.0314, "step": 285320 }, { "epoch": 3.0485602863400825, "grad_norm": 2.9408318996429443, "learning_rate": 9.007792733622148e-07, "loss": 0.0084, "step": 285330 }, { "epoch": 3.048667129654362, "grad_norm": 0.0478624626994133, "learning_rate": 9.007692276284974e-07, "loss": 0.0107, "step": 285340 }, { "epoch": 3.0487739729686414, "grad_norm": 0.005038102623075247, "learning_rate": 9.007591814422813e-07, "loss": 0.0047, "step": 285350 }, { "epoch": 3.0488808162829213, "grad_norm": 0.002955802483484149, "learning_rate": 9.007491348035779e-07, "loss": 0.0124, "step": 285360 }, { "epoch": 3.0489876595972008, "grad_norm": 0.5410979390144348, "learning_rate": 9.007390877123983e-07, "loss": 0.0136, "step": 285370 }, { "epoch": 3.04909450291148, "grad_norm": 12.755936622619629, "learning_rate": 9.007290401687542e-07, "loss": 0.0178, "step": 285380 }, { "epoch": 3.04920134622576, "grad_norm": 7.234495162963867, "learning_rate": 9.007189921726565e-07, "loss": 0.0455, "step": 285390 }, { "epoch": 3.0493081895400396, "grad_norm": 0.798153817653656, "learning_rate": 9.007089437241171e-07, "loss": 0.002, "step": 285400 }, { "epoch": 3.049415032854319, "grad_norm": 0.00966833345592022, "learning_rate": 9.006988948231471e-07, "loss": 0.0249, "step": 285410 }, { "epoch": 3.049521876168599, "grad_norm": 0.03282495588064194, "learning_rate": 9.006888454697577e-07, "loss": 0.025, "step": 285420 }, { "epoch": 3.0496287194828784, "grad_norm": 0.727428674697876, "learning_rate": 9.006787956639603e-07, "loss": 0.0391, "step": 285430 }, { "epoch": 3.049735562797158, "grad_norm": 0.12248986959457397, "learning_rate": 9.006687454057662e-07, "loss": 0.0123, "step": 285440 }, { "epoch": 3.0498424061114378, "grad_norm": 3.821916341781616, "learning_rate": 9.00658694695187e-07, "loss": 0.0116, "step": 285450 }, { "epoch": 3.049949249425717, "grad_norm": 0.09817683696746826, "learning_rate": 9.006486435322341e-07, "loss": 0.0176, "step": 285460 }, { "epoch": 3.0500560927399967, "grad_norm": 0.6525037884712219, "learning_rate": 9.006385919169183e-07, "loss": 0.0043, "step": 285470 }, { "epoch": 3.0501629360542766, "grad_norm": 0.040877312421798706, "learning_rate": 9.006285398492513e-07, "loss": 0.0002, "step": 285480 }, { "epoch": 3.050269779368556, "grad_norm": 3.9653992652893066, "learning_rate": 9.006184873292446e-07, "loss": 0.0192, "step": 285490 }, { "epoch": 3.0503766226828355, "grad_norm": 0.2674960494041443, "learning_rate": 9.006084343569094e-07, "loss": 0.0186, "step": 285500 }, { "epoch": 3.0504834659971154, "grad_norm": 3.696648597717285, "learning_rate": 9.00598380932257e-07, "loss": 0.0187, "step": 285510 }, { "epoch": 3.050590309311395, "grad_norm": 3.3508548736572266, "learning_rate": 9.005883270552988e-07, "loss": 0.0259, "step": 285520 }, { "epoch": 3.0506971526256743, "grad_norm": 8.090646743774414, "learning_rate": 9.00578272726046e-07, "loss": 0.0072, "step": 285530 }, { "epoch": 3.050803995939954, "grad_norm": 0.016928354278206825, "learning_rate": 9.005682179445103e-07, "loss": 0.0143, "step": 285540 }, { "epoch": 3.0509108392542337, "grad_norm": 0.46535810828208923, "learning_rate": 9.005581627107029e-07, "loss": 0.0074, "step": 285550 }, { "epoch": 3.051017682568513, "grad_norm": 0.00648987852036953, "learning_rate": 9.005481070246348e-07, "loss": 0.0101, "step": 285560 }, { "epoch": 3.051124525882793, "grad_norm": 0.10294903814792633, "learning_rate": 9.005380508863179e-07, "loss": 0.0039, "step": 285570 }, { "epoch": 3.0512313691970725, "grad_norm": 0.0010750334477052093, "learning_rate": 9.005279942957632e-07, "loss": 0.0141, "step": 285580 }, { "epoch": 3.051338212511352, "grad_norm": 0.05780306085944176, "learning_rate": 9.005179372529822e-07, "loss": 0.0144, "step": 285590 }, { "epoch": 3.051445055825632, "grad_norm": 0.001143069937825203, "learning_rate": 9.005078797579863e-07, "loss": 0.0028, "step": 285600 }, { "epoch": 3.0515518991399113, "grad_norm": 1.8995509147644043, "learning_rate": 9.004978218107866e-07, "loss": 0.0063, "step": 285610 }, { "epoch": 3.0516587424541908, "grad_norm": 0.004515939392149448, "learning_rate": 9.004877634113948e-07, "loss": 0.0092, "step": 285620 }, { "epoch": 3.0517655857684707, "grad_norm": 0.808073878288269, "learning_rate": 9.00477704559822e-07, "loss": 0.0028, "step": 285630 }, { "epoch": 3.05187242908275, "grad_norm": 0.2959290146827698, "learning_rate": 9.004676452560796e-07, "loss": 0.0044, "step": 285640 }, { "epoch": 3.0519792723970296, "grad_norm": 0.01133474987000227, "learning_rate": 9.004575855001792e-07, "loss": 0.0198, "step": 285650 }, { "epoch": 3.0520861157113095, "grad_norm": 0.17075036466121674, "learning_rate": 9.004475252921317e-07, "loss": 0.0341, "step": 285660 }, { "epoch": 3.052192959025589, "grad_norm": 0.2601425051689148, "learning_rate": 9.004374646319489e-07, "loss": 0.0045, "step": 285670 }, { "epoch": 3.0522998023398684, "grad_norm": 0.0177635345607996, "learning_rate": 9.00427403519642e-07, "loss": 0.0131, "step": 285680 }, { "epoch": 3.0524066456541483, "grad_norm": 0.006158242002129555, "learning_rate": 9.004173419552222e-07, "loss": 0.0075, "step": 285690 }, { "epoch": 3.052513488968428, "grad_norm": 0.008014896884560585, "learning_rate": 9.00407279938701e-07, "loss": 0.029, "step": 285700 }, { "epoch": 3.0526203322827072, "grad_norm": 0.7153975367546082, "learning_rate": 9.0039721747009e-07, "loss": 0.0136, "step": 285710 }, { "epoch": 3.052727175596987, "grad_norm": 7.962524890899658, "learning_rate": 9.003871545494002e-07, "loss": 0.0196, "step": 285720 }, { "epoch": 3.0528340189112666, "grad_norm": 7.270054817199707, "learning_rate": 9.00377091176643e-07, "loss": 0.007, "step": 285730 }, { "epoch": 3.052940862225546, "grad_norm": 0.03239040821790695, "learning_rate": 9.003670273518298e-07, "loss": 0.0056, "step": 285740 }, { "epoch": 3.053047705539826, "grad_norm": 0.07304366677999496, "learning_rate": 9.003569630749721e-07, "loss": 0.0215, "step": 285750 }, { "epoch": 3.0531545488541054, "grad_norm": 0.019375186413526535, "learning_rate": 9.003468983460812e-07, "loss": 0.023, "step": 285760 }, { "epoch": 3.053261392168385, "grad_norm": 0.010850983671844006, "learning_rate": 9.003368331651684e-07, "loss": 0.005, "step": 285770 }, { "epoch": 3.053368235482665, "grad_norm": 1.630640983581543, "learning_rate": 9.003267675322452e-07, "loss": 0.0129, "step": 285780 }, { "epoch": 3.0534750787969442, "grad_norm": 8.828265190124512, "learning_rate": 9.003167014473228e-07, "loss": 0.0143, "step": 285790 }, { "epoch": 3.0535819221112237, "grad_norm": 13.187758445739746, "learning_rate": 9.003066349104125e-07, "loss": 0.0441, "step": 285800 }, { "epoch": 3.0536887654255036, "grad_norm": 2.827502489089966, "learning_rate": 9.00296567921526e-07, "loss": 0.0088, "step": 285810 }, { "epoch": 3.053795608739783, "grad_norm": 0.03795710206031799, "learning_rate": 9.002865004806743e-07, "loss": 0.047, "step": 285820 }, { "epoch": 3.0539024520540625, "grad_norm": 4.1390814781188965, "learning_rate": 9.002764325878689e-07, "loss": 0.0099, "step": 285830 }, { "epoch": 3.0540092953683424, "grad_norm": 0.0021677797194570303, "learning_rate": 9.002663642431214e-07, "loss": 0.0196, "step": 285840 }, { "epoch": 3.054116138682622, "grad_norm": 7.202689170837402, "learning_rate": 9.002562954464427e-07, "loss": 0.0328, "step": 285850 }, { "epoch": 3.0542229819969013, "grad_norm": 3.0745129585266113, "learning_rate": 9.002462261978447e-07, "loss": 0.0059, "step": 285860 }, { "epoch": 3.0543298253111812, "grad_norm": 0.014130890369415283, "learning_rate": 9.002361564973384e-07, "loss": 0.0042, "step": 285870 }, { "epoch": 3.0544366686254607, "grad_norm": 0.02422512322664261, "learning_rate": 9.002260863449354e-07, "loss": 0.0189, "step": 285880 }, { "epoch": 3.05454351193974, "grad_norm": 0.007784637156873941, "learning_rate": 9.002160157406466e-07, "loss": 0.0061, "step": 285890 }, { "epoch": 3.05465035525402, "grad_norm": 1.7778452634811401, "learning_rate": 9.002059446844841e-07, "loss": 0.0046, "step": 285900 }, { "epoch": 3.0547571985682995, "grad_norm": 0.04634976014494896, "learning_rate": 9.001958731764586e-07, "loss": 0.0083, "step": 285910 }, { "epoch": 3.054864041882579, "grad_norm": 7.233450412750244, "learning_rate": 9.001858012165819e-07, "loss": 0.0156, "step": 285920 }, { "epoch": 3.054970885196859, "grad_norm": 0.08463232964277267, "learning_rate": 9.001757288048651e-07, "loss": 0.0166, "step": 285930 }, { "epoch": 3.0550777285111383, "grad_norm": 6.2433295249938965, "learning_rate": 9.001656559413198e-07, "loss": 0.0135, "step": 285940 }, { "epoch": 3.055184571825418, "grad_norm": 8.625012397766113, "learning_rate": 9.001555826259572e-07, "loss": 0.0169, "step": 285950 }, { "epoch": 3.0552914151396977, "grad_norm": 0.09593033790588379, "learning_rate": 9.001455088587889e-07, "loss": 0.0067, "step": 285960 }, { "epoch": 3.055398258453977, "grad_norm": 0.05227860435843468, "learning_rate": 9.00135434639826e-07, "loss": 0.0034, "step": 285970 }, { "epoch": 3.055505101768257, "grad_norm": 0.3172350525856018, "learning_rate": 9.0012535996908e-07, "loss": 0.008, "step": 285980 }, { "epoch": 3.0556119450825365, "grad_norm": 5.149871349334717, "learning_rate": 9.001152848465622e-07, "loss": 0.0427, "step": 285990 }, { "epoch": 3.055718788396816, "grad_norm": 0.006407859269529581, "learning_rate": 9.001052092722843e-07, "loss": 0.0249, "step": 286000 }, { "epoch": 3.055825631711096, "grad_norm": 0.2768677771091461, "learning_rate": 9.000951332462571e-07, "loss": 0.0081, "step": 286010 }, { "epoch": 3.0559324750253753, "grad_norm": 3.0176467895507812, "learning_rate": 9.000850567684925e-07, "loss": 0.0084, "step": 286020 }, { "epoch": 3.056039318339655, "grad_norm": 1.1951473951339722, "learning_rate": 9.000749798390017e-07, "loss": 0.0035, "step": 286030 }, { "epoch": 3.0561461616539347, "grad_norm": 0.005647758953273296, "learning_rate": 9.00064902457796e-07, "loss": 0.0145, "step": 286040 }, { "epoch": 3.056253004968214, "grad_norm": 0.49023082852363586, "learning_rate": 9.000548246248868e-07, "loss": 0.0546, "step": 286050 }, { "epoch": 3.0563598482824936, "grad_norm": 2.710033416748047, "learning_rate": 9.000447463402855e-07, "loss": 0.0047, "step": 286060 }, { "epoch": 3.0564666915967735, "grad_norm": 0.7563155293464661, "learning_rate": 9.000346676040036e-07, "loss": 0.0048, "step": 286070 }, { "epoch": 3.056573534911053, "grad_norm": 0.8636828064918518, "learning_rate": 9.000245884160522e-07, "loss": 0.0059, "step": 286080 }, { "epoch": 3.0566803782253325, "grad_norm": 0.042310405522584915, "learning_rate": 9.000145087764429e-07, "loss": 0.0096, "step": 286090 }, { "epoch": 3.0567872215396124, "grad_norm": 2.4720327854156494, "learning_rate": 9.000044286851871e-07, "loss": 0.0111, "step": 286100 }, { "epoch": 3.056894064853892, "grad_norm": 0.04553321748971939, "learning_rate": 8.99994348142296e-07, "loss": 0.0161, "step": 286110 }, { "epoch": 3.0570009081681713, "grad_norm": 3.373913526535034, "learning_rate": 8.999842671477812e-07, "loss": 0.0136, "step": 286120 }, { "epoch": 3.057107751482451, "grad_norm": 4.4129486083984375, "learning_rate": 8.999741857016538e-07, "loss": 0.007, "step": 286130 }, { "epoch": 3.0572145947967306, "grad_norm": 0.02213446982204914, "learning_rate": 8.999641038039256e-07, "loss": 0.0553, "step": 286140 }, { "epoch": 3.05732143811101, "grad_norm": 4.328195095062256, "learning_rate": 8.999540214546076e-07, "loss": 0.0123, "step": 286150 }, { "epoch": 3.05742828142529, "grad_norm": 1.8463510274887085, "learning_rate": 8.999439386537113e-07, "loss": 0.0123, "step": 286160 }, { "epoch": 3.0575351247395695, "grad_norm": 2.247934341430664, "learning_rate": 8.999338554012483e-07, "loss": 0.01, "step": 286170 }, { "epoch": 3.057641968053849, "grad_norm": 5.909366130828857, "learning_rate": 8.999237716972297e-07, "loss": 0.0256, "step": 286180 }, { "epoch": 3.057748811368129, "grad_norm": 0.6906375885009766, "learning_rate": 8.999136875416668e-07, "loss": 0.0058, "step": 286190 }, { "epoch": 3.0578556546824083, "grad_norm": 0.011872632429003716, "learning_rate": 8.999036029345714e-07, "loss": 0.0078, "step": 286200 }, { "epoch": 3.0579624979966877, "grad_norm": 0.3789070248603821, "learning_rate": 8.998935178759546e-07, "loss": 0.0013, "step": 286210 }, { "epoch": 3.0580693413109676, "grad_norm": 12.794279098510742, "learning_rate": 8.998834323658278e-07, "loss": 0.0095, "step": 286220 }, { "epoch": 3.058176184625247, "grad_norm": 0.6453625559806824, "learning_rate": 8.998733464042024e-07, "loss": 0.0074, "step": 286230 }, { "epoch": 3.0582830279395266, "grad_norm": 0.10632640868425369, "learning_rate": 8.998632599910897e-07, "loss": 0.0063, "step": 286240 }, { "epoch": 3.0583898712538065, "grad_norm": 2.3628227710723877, "learning_rate": 8.998531731265013e-07, "loss": 0.0149, "step": 286250 }, { "epoch": 3.058496714568086, "grad_norm": 3.3165225982666016, "learning_rate": 8.998430858104485e-07, "loss": 0.0091, "step": 286260 }, { "epoch": 3.0586035578823654, "grad_norm": 5.119683265686035, "learning_rate": 8.998329980429427e-07, "loss": 0.0134, "step": 286270 }, { "epoch": 3.0587104011966453, "grad_norm": 0.3047536611557007, "learning_rate": 8.998229098239954e-07, "loss": 0.0184, "step": 286280 }, { "epoch": 3.0588172445109247, "grad_norm": 3.668264389038086, "learning_rate": 8.998128211536175e-07, "loss": 0.0252, "step": 286290 }, { "epoch": 3.058924087825204, "grad_norm": 4.741170883178711, "learning_rate": 8.998027320318211e-07, "loss": 0.0074, "step": 286300 }, { "epoch": 3.059030931139484, "grad_norm": 0.5461117029190063, "learning_rate": 8.99792642458617e-07, "loss": 0.0059, "step": 286310 }, { "epoch": 3.0591377744537636, "grad_norm": 2.3578457832336426, "learning_rate": 8.99782552434017e-07, "loss": 0.0027, "step": 286320 }, { "epoch": 3.059244617768043, "grad_norm": 0.3185534179210663, "learning_rate": 8.997724619580323e-07, "loss": 0.0178, "step": 286330 }, { "epoch": 3.059351461082323, "grad_norm": 1.8531501293182373, "learning_rate": 8.997623710306743e-07, "loss": 0.0261, "step": 286340 }, { "epoch": 3.0594583043966024, "grad_norm": 0.2893276810646057, "learning_rate": 8.997522796519543e-07, "loss": 0.0161, "step": 286350 }, { "epoch": 3.059565147710882, "grad_norm": 0.0032813819125294685, "learning_rate": 8.99742187821884e-07, "loss": 0.0045, "step": 286360 }, { "epoch": 3.0596719910251617, "grad_norm": 0.3917746841907501, "learning_rate": 8.997320955404745e-07, "loss": 0.0089, "step": 286370 }, { "epoch": 3.059778834339441, "grad_norm": 0.012341590598225594, "learning_rate": 8.997220028077372e-07, "loss": 0.0014, "step": 286380 }, { "epoch": 3.0598856776537207, "grad_norm": 0.040950626134872437, "learning_rate": 8.997119096236838e-07, "loss": 0.0106, "step": 286390 }, { "epoch": 3.0599925209680006, "grad_norm": 0.04521610960364342, "learning_rate": 8.997018159883253e-07, "loss": 0.0175, "step": 286400 }, { "epoch": 3.06009936428228, "grad_norm": 0.08463875204324722, "learning_rate": 8.996917219016733e-07, "loss": 0.0045, "step": 286410 }, { "epoch": 3.0602062075965595, "grad_norm": 0.2988336980342865, "learning_rate": 8.996816273637393e-07, "loss": 0.0155, "step": 286420 }, { "epoch": 3.0603130509108394, "grad_norm": 0.16303463280200958, "learning_rate": 8.996715323745346e-07, "loss": 0.0051, "step": 286430 }, { "epoch": 3.060419894225119, "grad_norm": 1.9044909477233887, "learning_rate": 8.996614369340704e-07, "loss": 0.0128, "step": 286440 }, { "epoch": 3.0605267375393983, "grad_norm": 0.007469142787158489, "learning_rate": 8.996513410423582e-07, "loss": 0.0025, "step": 286450 }, { "epoch": 3.060633580853678, "grad_norm": 20.47751235961914, "learning_rate": 8.996412446994098e-07, "loss": 0.0356, "step": 286460 }, { "epoch": 3.0607404241679577, "grad_norm": 5.844295501708984, "learning_rate": 8.996311479052361e-07, "loss": 0.012, "step": 286470 }, { "epoch": 3.060847267482237, "grad_norm": 0.45548805594444275, "learning_rate": 8.996210506598487e-07, "loss": 0.0207, "step": 286480 }, { "epoch": 3.060954110796517, "grad_norm": 4.581808090209961, "learning_rate": 8.99610952963259e-07, "loss": 0.0197, "step": 286490 }, { "epoch": 3.0610609541107965, "grad_norm": 2.720767021179199, "learning_rate": 8.996008548154783e-07, "loss": 0.0223, "step": 286500 }, { "epoch": 3.061167797425076, "grad_norm": 0.037896040827035904, "learning_rate": 8.99590756216518e-07, "loss": 0.0115, "step": 286510 }, { "epoch": 3.061274640739356, "grad_norm": 0.008174874819815159, "learning_rate": 8.995806571663897e-07, "loss": 0.0183, "step": 286520 }, { "epoch": 3.0613814840536353, "grad_norm": 20.770139694213867, "learning_rate": 8.995705576651047e-07, "loss": 0.0073, "step": 286530 }, { "epoch": 3.0614883273679148, "grad_norm": 0.050160523504018784, "learning_rate": 8.995604577126743e-07, "loss": 0.0065, "step": 286540 }, { "epoch": 3.0615951706821947, "grad_norm": 0.10645368695259094, "learning_rate": 8.995503573091101e-07, "loss": 0.0058, "step": 286550 }, { "epoch": 3.061702013996474, "grad_norm": 7.229362487792969, "learning_rate": 8.995402564544232e-07, "loss": 0.0338, "step": 286560 }, { "epoch": 3.0618088573107536, "grad_norm": 0.9559691548347473, "learning_rate": 8.995301551486254e-07, "loss": 0.0475, "step": 286570 }, { "epoch": 3.0619157006250335, "grad_norm": 0.2259722650051117, "learning_rate": 8.995200533917278e-07, "loss": 0.0064, "step": 286580 }, { "epoch": 3.062022543939313, "grad_norm": 0.011851001530885696, "learning_rate": 8.99509951183742e-07, "loss": 0.0037, "step": 286590 }, { "epoch": 3.0621293872535924, "grad_norm": 0.21905338764190674, "learning_rate": 8.994998485246792e-07, "loss": 0.0263, "step": 286600 }, { "epoch": 3.0622362305678723, "grad_norm": 2.1811180114746094, "learning_rate": 8.994897454145509e-07, "loss": 0.0051, "step": 286610 }, { "epoch": 3.0623430738821518, "grad_norm": 3.7143197059631348, "learning_rate": 8.994796418533685e-07, "loss": 0.0084, "step": 286620 }, { "epoch": 3.0624499171964312, "grad_norm": 0.22864720225334167, "learning_rate": 8.994695378411437e-07, "loss": 0.0068, "step": 286630 }, { "epoch": 3.062556760510711, "grad_norm": 0.5864136815071106, "learning_rate": 8.994594333778873e-07, "loss": 0.0232, "step": 286640 }, { "epoch": 3.0626636038249906, "grad_norm": 0.10920059680938721, "learning_rate": 8.994493284636113e-07, "loss": 0.0011, "step": 286650 }, { "epoch": 3.06277044713927, "grad_norm": 4.621748924255371, "learning_rate": 8.994392230983268e-07, "loss": 0.0076, "step": 286660 }, { "epoch": 3.06287729045355, "grad_norm": 6.671399116516113, "learning_rate": 8.994291172820452e-07, "loss": 0.0187, "step": 286670 }, { "epoch": 3.0629841337678294, "grad_norm": 0.008153365924954414, "learning_rate": 8.994190110147781e-07, "loss": 0.0112, "step": 286680 }, { "epoch": 3.0630909770821093, "grad_norm": 0.02699410170316696, "learning_rate": 8.994089042965367e-07, "loss": 0.0051, "step": 286690 }, { "epoch": 3.0631978203963888, "grad_norm": 0.17472617328166962, "learning_rate": 8.993987971273324e-07, "loss": 0.0061, "step": 286700 }, { "epoch": 3.0633046637106682, "grad_norm": 0.7161499857902527, "learning_rate": 8.993886895071769e-07, "loss": 0.0011, "step": 286710 }, { "epoch": 3.0634115070249477, "grad_norm": 0.002248281380161643, "learning_rate": 8.993785814360814e-07, "loss": 0.0031, "step": 286720 }, { "epoch": 3.0635183503392276, "grad_norm": 9.484549522399902, "learning_rate": 8.993684729140573e-07, "loss": 0.0225, "step": 286730 }, { "epoch": 3.063625193653507, "grad_norm": 0.01450434047728777, "learning_rate": 8.993583639411159e-07, "loss": 0.0124, "step": 286740 }, { "epoch": 3.063732036967787, "grad_norm": 0.04050641134381294, "learning_rate": 8.993482545172691e-07, "loss": 0.003, "step": 286750 }, { "epoch": 3.0638388802820664, "grad_norm": 0.010346002876758575, "learning_rate": 8.993381446425276e-07, "loss": 0.0422, "step": 286760 }, { "epoch": 3.063945723596346, "grad_norm": 0.06361015141010284, "learning_rate": 8.993280343169034e-07, "loss": 0.0015, "step": 286770 }, { "epoch": 3.0640525669106258, "grad_norm": 0.007702544331550598, "learning_rate": 8.993179235404078e-07, "loss": 0.0024, "step": 286780 }, { "epoch": 3.0641594102249052, "grad_norm": 0.13599319756031036, "learning_rate": 8.99307812313052e-07, "loss": 0.0064, "step": 286790 }, { "epoch": 3.0642662535391847, "grad_norm": 0.06454098224639893, "learning_rate": 8.992977006348476e-07, "loss": 0.0036, "step": 286800 }, { "epoch": 3.0643730968534646, "grad_norm": 0.06808869540691376, "learning_rate": 8.992875885058059e-07, "loss": 0.0027, "step": 286810 }, { "epoch": 3.064479940167744, "grad_norm": 3.533156394958496, "learning_rate": 8.992774759259384e-07, "loss": 0.021, "step": 286820 }, { "epoch": 3.0645867834820235, "grad_norm": 0.07890372723340988, "learning_rate": 8.992673628952565e-07, "loss": 0.0039, "step": 286830 }, { "epoch": 3.0646936267963034, "grad_norm": 0.010676441714167595, "learning_rate": 8.992572494137716e-07, "loss": 0.0087, "step": 286840 }, { "epoch": 3.064800470110583, "grad_norm": 0.5872857570648193, "learning_rate": 8.992471354814952e-07, "loss": 0.0056, "step": 286850 }, { "epoch": 3.0649073134248623, "grad_norm": 3.4722936153411865, "learning_rate": 8.992370210984386e-07, "loss": 0.004, "step": 286860 }, { "epoch": 3.0650141567391422, "grad_norm": 4.609463691711426, "learning_rate": 8.992269062646133e-07, "loss": 0.0467, "step": 286870 }, { "epoch": 3.0651210000534217, "grad_norm": 0.10743837058544159, "learning_rate": 8.992167909800307e-07, "loss": 0.0093, "step": 286880 }, { "epoch": 3.065227843367701, "grad_norm": 0.03756268694996834, "learning_rate": 8.992066752447021e-07, "loss": 0.0141, "step": 286890 }, { "epoch": 3.065334686681981, "grad_norm": 0.012949065305292606, "learning_rate": 8.991965590586392e-07, "loss": 0.0044, "step": 286900 }, { "epoch": 3.0654415299962605, "grad_norm": 0.11277077347040176, "learning_rate": 8.991864424218531e-07, "loss": 0.0472, "step": 286910 }, { "epoch": 3.06554837331054, "grad_norm": 0.07936055958271027, "learning_rate": 8.991763253343553e-07, "loss": 0.0239, "step": 286920 }, { "epoch": 3.06565521662482, "grad_norm": 0.21032151579856873, "learning_rate": 8.991662077961575e-07, "loss": 0.0019, "step": 286930 }, { "epoch": 3.0657620599390993, "grad_norm": 3.9776320457458496, "learning_rate": 8.99156089807271e-07, "loss": 0.0247, "step": 286940 }, { "epoch": 3.065868903253379, "grad_norm": 4.5298614501953125, "learning_rate": 8.99145971367707e-07, "loss": 0.0122, "step": 286950 }, { "epoch": 3.0659757465676587, "grad_norm": 0.3128644526004791, "learning_rate": 8.991358524774771e-07, "loss": 0.0079, "step": 286960 }, { "epoch": 3.066082589881938, "grad_norm": 0.6323238611221313, "learning_rate": 8.991257331365927e-07, "loss": 0.0409, "step": 286970 }, { "epoch": 3.0661894331962176, "grad_norm": 1.909227967262268, "learning_rate": 8.991156133450651e-07, "loss": 0.04, "step": 286980 }, { "epoch": 3.0662962765104975, "grad_norm": 0.20620636641979218, "learning_rate": 8.99105493102906e-07, "loss": 0.0166, "step": 286990 }, { "epoch": 3.066403119824777, "grad_norm": 7.206198215484619, "learning_rate": 8.990953724101266e-07, "loss": 0.0108, "step": 287000 }, { "epoch": 3.0665099631390564, "grad_norm": 0.07551629096269608, "learning_rate": 8.990852512667384e-07, "loss": 0.0034, "step": 287010 }, { "epoch": 3.0666168064533363, "grad_norm": 0.12467922270298004, "learning_rate": 8.99075129672753e-07, "loss": 0.038, "step": 287020 }, { "epoch": 3.066723649767616, "grad_norm": 0.09622941166162491, "learning_rate": 8.990650076281815e-07, "loss": 0.0009, "step": 287030 }, { "epoch": 3.0668304930818953, "grad_norm": 0.04226723313331604, "learning_rate": 8.990548851330356e-07, "loss": 0.0058, "step": 287040 }, { "epoch": 3.066937336396175, "grad_norm": 5.724898815155029, "learning_rate": 8.990447621873265e-07, "loss": 0.0158, "step": 287050 }, { "epoch": 3.0670441797104546, "grad_norm": 0.007091996259987354, "learning_rate": 8.990346387910656e-07, "loss": 0.0096, "step": 287060 }, { "epoch": 3.067151023024734, "grad_norm": 0.32402747869491577, "learning_rate": 8.990245149442648e-07, "loss": 0.0092, "step": 287070 }, { "epoch": 3.067257866339014, "grad_norm": 0.744736909866333, "learning_rate": 8.990143906469351e-07, "loss": 0.0272, "step": 287080 }, { "epoch": 3.0673647096532934, "grad_norm": 0.010196869261562824, "learning_rate": 8.99004265899088e-07, "loss": 0.0042, "step": 287090 }, { "epoch": 3.067471552967573, "grad_norm": 0.05276670679450035, "learning_rate": 8.989941407007349e-07, "loss": 0.0078, "step": 287100 }, { "epoch": 3.067578396281853, "grad_norm": 13.172696113586426, "learning_rate": 8.989840150518874e-07, "loss": 0.016, "step": 287110 }, { "epoch": 3.0676852395961323, "grad_norm": 0.009900526143610477, "learning_rate": 8.989738889525569e-07, "loss": 0.0332, "step": 287120 }, { "epoch": 3.0677920829104117, "grad_norm": 0.05675039067864418, "learning_rate": 8.989637624027546e-07, "loss": 0.0139, "step": 287130 }, { "epoch": 3.0678989262246916, "grad_norm": 0.09659626334905624, "learning_rate": 8.989536354024921e-07, "loss": 0.0109, "step": 287140 }, { "epoch": 3.068005769538971, "grad_norm": 0.01577080972492695, "learning_rate": 8.98943507951781e-07, "loss": 0.0112, "step": 287150 }, { "epoch": 3.0681126128532505, "grad_norm": 0.06689988076686859, "learning_rate": 8.989333800506325e-07, "loss": 0.0051, "step": 287160 }, { "epoch": 3.0682194561675304, "grad_norm": 5.474936008453369, "learning_rate": 8.989232516990581e-07, "loss": 0.0066, "step": 287170 }, { "epoch": 3.06832629948181, "grad_norm": 3.971508502960205, "learning_rate": 8.989131228970692e-07, "loss": 0.0355, "step": 287180 }, { "epoch": 3.0684331427960894, "grad_norm": 0.19984090328216553, "learning_rate": 8.989029936446772e-07, "loss": 0.0285, "step": 287190 }, { "epoch": 3.0685399861103693, "grad_norm": 0.07734503597021103, "learning_rate": 8.988928639418938e-07, "loss": 0.0031, "step": 287200 }, { "epoch": 3.0686468294246487, "grad_norm": 3.663752794265747, "learning_rate": 8.988827337887301e-07, "loss": 0.0297, "step": 287210 }, { "epoch": 3.068753672738928, "grad_norm": 4.387180328369141, "learning_rate": 8.988726031851978e-07, "loss": 0.0198, "step": 287220 }, { "epoch": 3.068860516053208, "grad_norm": 0.6259650588035583, "learning_rate": 8.988624721313083e-07, "loss": 0.0157, "step": 287230 }, { "epoch": 3.0689673593674875, "grad_norm": 0.028452899307012558, "learning_rate": 8.988523406270727e-07, "loss": 0.0091, "step": 287240 }, { "epoch": 3.069074202681767, "grad_norm": 1.8334612846374512, "learning_rate": 8.98842208672503e-07, "loss": 0.0061, "step": 287250 }, { "epoch": 3.069181045996047, "grad_norm": 0.03243383765220642, "learning_rate": 8.988320762676102e-07, "loss": 0.0158, "step": 287260 }, { "epoch": 3.0692878893103264, "grad_norm": 2.2855288982391357, "learning_rate": 8.988219434124058e-07, "loss": 0.0253, "step": 287270 }, { "epoch": 3.069394732624606, "grad_norm": 1.6001839637756348, "learning_rate": 8.988118101069014e-07, "loss": 0.0116, "step": 287280 }, { "epoch": 3.0695015759388857, "grad_norm": 0.030215151607990265, "learning_rate": 8.988016763511084e-07, "loss": 0.0395, "step": 287290 }, { "epoch": 3.069608419253165, "grad_norm": 1.9356627464294434, "learning_rate": 8.987915421450383e-07, "loss": 0.0043, "step": 287300 }, { "epoch": 3.0697152625674446, "grad_norm": 0.20959097146987915, "learning_rate": 8.987814074887022e-07, "loss": 0.0062, "step": 287310 }, { "epoch": 3.0698221058817245, "grad_norm": 5.469913482666016, "learning_rate": 8.98771272382112e-07, "loss": 0.0258, "step": 287320 }, { "epoch": 3.069928949196004, "grad_norm": 0.014564276672899723, "learning_rate": 8.987611368252787e-07, "loss": 0.0005, "step": 287330 }, { "epoch": 3.0700357925102835, "grad_norm": 0.3717736303806305, "learning_rate": 8.987510008182142e-07, "loss": 0.013, "step": 287340 }, { "epoch": 3.0701426358245634, "grad_norm": 0.13113367557525635, "learning_rate": 8.987408643609298e-07, "loss": 0.0137, "step": 287350 }, { "epoch": 3.070249479138843, "grad_norm": 1.5498117208480835, "learning_rate": 8.987307274534367e-07, "loss": 0.0105, "step": 287360 }, { "epoch": 3.0703563224531223, "grad_norm": 21.873559951782227, "learning_rate": 8.987205900957466e-07, "loss": 0.0485, "step": 287370 }, { "epoch": 3.070463165767402, "grad_norm": 0.04974561929702759, "learning_rate": 8.987104522878708e-07, "loss": 0.0067, "step": 287380 }, { "epoch": 3.0705700090816817, "grad_norm": 0.005156701896339655, "learning_rate": 8.987003140298208e-07, "loss": 0.0066, "step": 287390 }, { "epoch": 3.070676852395961, "grad_norm": 0.0019491728162392974, "learning_rate": 8.98690175321608e-07, "loss": 0.0029, "step": 287400 }, { "epoch": 3.070783695710241, "grad_norm": 0.14803113043308258, "learning_rate": 8.986800361632441e-07, "loss": 0.0141, "step": 287410 }, { "epoch": 3.0708905390245205, "grad_norm": 0.03446358069777489, "learning_rate": 8.986698965547401e-07, "loss": 0.0011, "step": 287420 }, { "epoch": 3.0709973823388, "grad_norm": 0.009231239557266235, "learning_rate": 8.986597564961079e-07, "loss": 0.0143, "step": 287430 }, { "epoch": 3.07110422565308, "grad_norm": 0.00546377245336771, "learning_rate": 8.986496159873586e-07, "loss": 0.0045, "step": 287440 }, { "epoch": 3.0712110689673593, "grad_norm": 0.0060086939483881, "learning_rate": 8.98639475028504e-07, "loss": 0.0096, "step": 287450 }, { "epoch": 3.071317912281639, "grad_norm": 1.971923589706421, "learning_rate": 8.986293336195552e-07, "loss": 0.0083, "step": 287460 }, { "epoch": 3.0714247555959187, "grad_norm": 0.5156643390655518, "learning_rate": 8.986191917605238e-07, "loss": 0.0022, "step": 287470 }, { "epoch": 3.071531598910198, "grad_norm": 0.12333449721336365, "learning_rate": 8.986090494514213e-07, "loss": 0.0009, "step": 287480 }, { "epoch": 3.071638442224478, "grad_norm": 0.20639106631278992, "learning_rate": 8.98598906692259e-07, "loss": 0.0034, "step": 287490 }, { "epoch": 3.0717452855387575, "grad_norm": 13.916122436523438, "learning_rate": 8.985887634830485e-07, "loss": 0.017, "step": 287500 }, { "epoch": 3.071852128853037, "grad_norm": 3.578695297241211, "learning_rate": 8.985786198238013e-07, "loss": 0.0052, "step": 287510 }, { "epoch": 3.071958972167317, "grad_norm": 0.17105518281459808, "learning_rate": 8.985684757145287e-07, "loss": 0.0218, "step": 287520 }, { "epoch": 3.0720658154815963, "grad_norm": 2.289881944656372, "learning_rate": 8.985583311552421e-07, "loss": 0.0143, "step": 287530 }, { "epoch": 3.0721726587958758, "grad_norm": 3.273681879043579, "learning_rate": 8.985481861459531e-07, "loss": 0.0341, "step": 287540 }, { "epoch": 3.0722795021101557, "grad_norm": 0.4767160713672638, "learning_rate": 8.985380406866732e-07, "loss": 0.0026, "step": 287550 }, { "epoch": 3.072386345424435, "grad_norm": 4.038381099700928, "learning_rate": 8.985278947774137e-07, "loss": 0.0128, "step": 287560 }, { "epoch": 3.0724931887387146, "grad_norm": 0.010647904127836227, "learning_rate": 8.985177484181862e-07, "loss": 0.0073, "step": 287570 }, { "epoch": 3.0726000320529945, "grad_norm": 1.8885297775268555, "learning_rate": 8.985076016090021e-07, "loss": 0.0021, "step": 287580 }, { "epoch": 3.072706875367274, "grad_norm": 0.007208671886473894, "learning_rate": 8.984974543498727e-07, "loss": 0.0152, "step": 287590 }, { "epoch": 3.0728137186815534, "grad_norm": 0.005143657326698303, "learning_rate": 8.984873066408097e-07, "loss": 0.0038, "step": 287600 }, { "epoch": 3.0729205619958333, "grad_norm": 0.2802124321460724, "learning_rate": 8.984771584818245e-07, "loss": 0.0091, "step": 287610 }, { "epoch": 3.0730274053101128, "grad_norm": 0.0025107681285589933, "learning_rate": 8.984670098729285e-07, "loss": 0.015, "step": 287620 }, { "epoch": 3.073134248624392, "grad_norm": 1.936548113822937, "learning_rate": 8.984568608141331e-07, "loss": 0.0413, "step": 287630 }, { "epoch": 3.073241091938672, "grad_norm": 1.256515622138977, "learning_rate": 8.984467113054498e-07, "loss": 0.0129, "step": 287640 }, { "epoch": 3.0733479352529516, "grad_norm": 0.016065560281276703, "learning_rate": 8.984365613468902e-07, "loss": 0.0122, "step": 287650 }, { "epoch": 3.073454778567231, "grad_norm": 2.9974958896636963, "learning_rate": 8.984264109384656e-07, "loss": 0.0069, "step": 287660 }, { "epoch": 3.073561621881511, "grad_norm": 1.7623926401138306, "learning_rate": 8.984162600801874e-07, "loss": 0.0054, "step": 287670 }, { "epoch": 3.0736684651957904, "grad_norm": 0.08079835027456284, "learning_rate": 8.984061087720673e-07, "loss": 0.0032, "step": 287680 }, { "epoch": 3.07377530851007, "grad_norm": 0.016222702339291573, "learning_rate": 8.983959570141166e-07, "loss": 0.0014, "step": 287690 }, { "epoch": 3.0738821518243498, "grad_norm": 0.23589643836021423, "learning_rate": 8.983858048063469e-07, "loss": 0.0145, "step": 287700 }, { "epoch": 3.073988995138629, "grad_norm": 0.22076210379600525, "learning_rate": 8.983756521487694e-07, "loss": 0.0313, "step": 287710 }, { "epoch": 3.0740958384529087, "grad_norm": 0.01869497075676918, "learning_rate": 8.983654990413958e-07, "loss": 0.009, "step": 287720 }, { "epoch": 3.0742026817671886, "grad_norm": 0.04840269312262535, "learning_rate": 8.983553454842376e-07, "loss": 0.0136, "step": 287730 }, { "epoch": 3.074309525081468, "grad_norm": 0.04987619072198868, "learning_rate": 8.983451914773059e-07, "loss": 0.0066, "step": 287740 }, { "epoch": 3.0744163683957475, "grad_norm": 6.367135524749756, "learning_rate": 8.983350370206126e-07, "loss": 0.0322, "step": 287750 }, { "epoch": 3.0745232117100274, "grad_norm": 0.0978918969631195, "learning_rate": 8.983248821141689e-07, "loss": 0.013, "step": 287760 }, { "epoch": 3.074630055024307, "grad_norm": 0.4790203273296356, "learning_rate": 8.983147267579865e-07, "loss": 0.0074, "step": 287770 }, { "epoch": 3.0747368983385863, "grad_norm": 0.009550894610583782, "learning_rate": 8.983045709520765e-07, "loss": 0.0061, "step": 287780 }, { "epoch": 3.0748437416528662, "grad_norm": 0.009742163121700287, "learning_rate": 8.982944146964506e-07, "loss": 0.013, "step": 287790 }, { "epoch": 3.0749505849671457, "grad_norm": 0.05326569452881813, "learning_rate": 8.982842579911204e-07, "loss": 0.006, "step": 287800 }, { "epoch": 3.075057428281425, "grad_norm": 5.676773548126221, "learning_rate": 8.98274100836097e-07, "loss": 0.003, "step": 287810 }, { "epoch": 3.075164271595705, "grad_norm": 1.0527995824813843, "learning_rate": 8.982639432313924e-07, "loss": 0.0121, "step": 287820 }, { "epoch": 3.0752711149099845, "grad_norm": 6.556388854980469, "learning_rate": 8.982537851770174e-07, "loss": 0.0062, "step": 287830 }, { "epoch": 3.075377958224264, "grad_norm": 1.131380558013916, "learning_rate": 8.98243626672984e-07, "loss": 0.0292, "step": 287840 }, { "epoch": 3.075484801538544, "grad_norm": 0.004150205757468939, "learning_rate": 8.982334677193035e-07, "loss": 0.0085, "step": 287850 }, { "epoch": 3.0755916448528233, "grad_norm": 0.0998334214091301, "learning_rate": 8.982233083159872e-07, "loss": 0.0014, "step": 287860 }, { "epoch": 3.075698488167103, "grad_norm": 7.961594104766846, "learning_rate": 8.982131484630468e-07, "loss": 0.0419, "step": 287870 }, { "epoch": 3.0758053314813827, "grad_norm": 0.01069530751556158, "learning_rate": 8.982029881604938e-07, "loss": 0.0228, "step": 287880 }, { "epoch": 3.075912174795662, "grad_norm": 4.0458221435546875, "learning_rate": 8.981928274083396e-07, "loss": 0.0119, "step": 287890 }, { "epoch": 3.0760190181099416, "grad_norm": 0.009603946469724178, "learning_rate": 8.981826662065955e-07, "loss": 0.001, "step": 287900 }, { "epoch": 3.0761258614242215, "grad_norm": 1.507097601890564, "learning_rate": 8.981725045552732e-07, "loss": 0.0145, "step": 287910 }, { "epoch": 3.076232704738501, "grad_norm": 8.227815628051758, "learning_rate": 8.981623424543841e-07, "loss": 0.0227, "step": 287920 }, { "epoch": 3.0763395480527804, "grad_norm": 0.8137621879577637, "learning_rate": 8.981521799039397e-07, "loss": 0.0058, "step": 287930 }, { "epoch": 3.0764463913670603, "grad_norm": 6.102063179016113, "learning_rate": 8.981420169039513e-07, "loss": 0.0295, "step": 287940 }, { "epoch": 3.07655323468134, "grad_norm": 1.3942686319351196, "learning_rate": 8.981318534544307e-07, "loss": 0.0057, "step": 287950 }, { "epoch": 3.0766600779956192, "grad_norm": 3.6894147396087646, "learning_rate": 8.98121689555389e-07, "loss": 0.0198, "step": 287960 }, { "epoch": 3.076766921309899, "grad_norm": 0.024929940700531006, "learning_rate": 8.981115252068378e-07, "loss": 0.0103, "step": 287970 }, { "epoch": 3.0768737646241786, "grad_norm": 0.09285345673561096, "learning_rate": 8.981013604087889e-07, "loss": 0.0029, "step": 287980 }, { "epoch": 3.076980607938458, "grad_norm": 0.3052099347114563, "learning_rate": 8.980911951612534e-07, "loss": 0.018, "step": 287990 }, { "epoch": 3.077087451252738, "grad_norm": 4.521749973297119, "learning_rate": 8.980810294642429e-07, "loss": 0.0303, "step": 288000 }, { "epoch": 3.0771942945670174, "grad_norm": 0.16977907717227936, "learning_rate": 8.98070863317769e-07, "loss": 0.0221, "step": 288010 }, { "epoch": 3.077301137881297, "grad_norm": 0.003094214014708996, "learning_rate": 8.980606967218428e-07, "loss": 0.008, "step": 288020 }, { "epoch": 3.077407981195577, "grad_norm": 0.0035938431974500418, "learning_rate": 8.980505296764762e-07, "loss": 0.0083, "step": 288030 }, { "epoch": 3.0775148245098563, "grad_norm": 0.2242884635925293, "learning_rate": 8.980403621816803e-07, "loss": 0.0022, "step": 288040 }, { "epoch": 3.0776216678241357, "grad_norm": 5.435515403747559, "learning_rate": 8.98030194237467e-07, "loss": 0.0246, "step": 288050 }, { "epoch": 3.0777285111384156, "grad_norm": 0.05793789029121399, "learning_rate": 8.980200258438476e-07, "loss": 0.0007, "step": 288060 }, { "epoch": 3.077835354452695, "grad_norm": 8.555563926696777, "learning_rate": 8.980098570008335e-07, "loss": 0.0517, "step": 288070 }, { "epoch": 3.0779421977669745, "grad_norm": 0.1632397472858429, "learning_rate": 8.979996877084362e-07, "loss": 0.0075, "step": 288080 }, { "epoch": 3.0780490410812544, "grad_norm": 5.024763584136963, "learning_rate": 8.979895179666672e-07, "loss": 0.0073, "step": 288090 }, { "epoch": 3.078155884395534, "grad_norm": 6.50036096572876, "learning_rate": 8.979793477755379e-07, "loss": 0.0266, "step": 288100 }, { "epoch": 3.0782627277098134, "grad_norm": 4.6602582931518555, "learning_rate": 8.979691771350602e-07, "loss": 0.0045, "step": 288110 }, { "epoch": 3.0783695710240933, "grad_norm": 1.2541704177856445, "learning_rate": 8.97959006045245e-07, "loss": 0.0525, "step": 288120 }, { "epoch": 3.0784764143383727, "grad_norm": 0.002017536200582981, "learning_rate": 8.979488345061041e-07, "loss": 0.0467, "step": 288130 }, { "epoch": 3.078583257652652, "grad_norm": 0.025471854954957962, "learning_rate": 8.979386625176488e-07, "loss": 0.0001, "step": 288140 }, { "epoch": 3.078690100966932, "grad_norm": 2.5659308433532715, "learning_rate": 8.979284900798909e-07, "loss": 0.0041, "step": 288150 }, { "epoch": 3.0787969442812115, "grad_norm": 3.491387128829956, "learning_rate": 8.979183171928417e-07, "loss": 0.0055, "step": 288160 }, { "epoch": 3.0789037875954914, "grad_norm": 0.9214094281196594, "learning_rate": 8.979081438565126e-07, "loss": 0.0256, "step": 288170 }, { "epoch": 3.079010630909771, "grad_norm": 0.6588960886001587, "learning_rate": 8.978979700709152e-07, "loss": 0.0232, "step": 288180 }, { "epoch": 3.0791174742240504, "grad_norm": 4.457760810852051, "learning_rate": 8.978877958360611e-07, "loss": 0.0136, "step": 288190 }, { "epoch": 3.07922431753833, "grad_norm": 0.14312665164470673, "learning_rate": 8.978776211519615e-07, "loss": 0.0177, "step": 288200 }, { "epoch": 3.0793311608526097, "grad_norm": 1.4599733352661133, "learning_rate": 8.978674460186279e-07, "loss": 0.0263, "step": 288210 }, { "epoch": 3.079438004166889, "grad_norm": 0.07336804270744324, "learning_rate": 8.978572704360723e-07, "loss": 0.0063, "step": 288220 }, { "epoch": 3.079544847481169, "grad_norm": 7.191471576690674, "learning_rate": 8.978470944043055e-07, "loss": 0.0241, "step": 288230 }, { "epoch": 3.0796516907954485, "grad_norm": 4.39608097076416, "learning_rate": 8.978369179233394e-07, "loss": 0.011, "step": 288240 }, { "epoch": 3.079758534109728, "grad_norm": 5.276379585266113, "learning_rate": 8.978267409931854e-07, "loss": 0.0146, "step": 288250 }, { "epoch": 3.079865377424008, "grad_norm": 0.01570814661681652, "learning_rate": 8.978165636138551e-07, "loss": 0.0035, "step": 288260 }, { "epoch": 3.0799722207382874, "grad_norm": 0.16205544769763947, "learning_rate": 8.978063857853597e-07, "loss": 0.0059, "step": 288270 }, { "epoch": 3.080079064052567, "grad_norm": 3.239436626434326, "learning_rate": 8.977962075077109e-07, "loss": 0.0116, "step": 288280 }, { "epoch": 3.0801859073668467, "grad_norm": 0.805493950843811, "learning_rate": 8.977860287809203e-07, "loss": 0.0018, "step": 288290 }, { "epoch": 3.080292750681126, "grad_norm": 0.004756065551191568, "learning_rate": 8.97775849604999e-07, "loss": 0.0008, "step": 288300 }, { "epoch": 3.0803995939954056, "grad_norm": 5.922921180725098, "learning_rate": 8.97765669979959e-07, "loss": 0.0341, "step": 288310 }, { "epoch": 3.0805064373096855, "grad_norm": 0.0006591762648895383, "learning_rate": 8.977554899058113e-07, "loss": 0.0272, "step": 288320 }, { "epoch": 3.080613280623965, "grad_norm": 0.006355200428515673, "learning_rate": 8.977453093825678e-07, "loss": 0.0057, "step": 288330 }, { "epoch": 3.0807201239382445, "grad_norm": 1.393497109413147, "learning_rate": 8.977351284102397e-07, "loss": 0.0525, "step": 288340 }, { "epoch": 3.0808269672525244, "grad_norm": 1.1981161832809448, "learning_rate": 8.977249469888387e-07, "loss": 0.0121, "step": 288350 }, { "epoch": 3.080933810566804, "grad_norm": 3.9807283878326416, "learning_rate": 8.977147651183763e-07, "loss": 0.0206, "step": 288360 }, { "epoch": 3.0810406538810833, "grad_norm": 2.749159812927246, "learning_rate": 8.977045827988638e-07, "loss": 0.0069, "step": 288370 }, { "epoch": 3.081147497195363, "grad_norm": 0.06938575208187103, "learning_rate": 8.976944000303128e-07, "loss": 0.0099, "step": 288380 }, { "epoch": 3.0812543405096426, "grad_norm": 0.008061693981289864, "learning_rate": 8.976842168127349e-07, "loss": 0.0153, "step": 288390 }, { "epoch": 3.081361183823922, "grad_norm": 0.025623144581913948, "learning_rate": 8.976740331461414e-07, "loss": 0.0023, "step": 288400 }, { "epoch": 3.081468027138202, "grad_norm": 5.528353691101074, "learning_rate": 8.97663849030544e-07, "loss": 0.0115, "step": 288410 }, { "epoch": 3.0815748704524815, "grad_norm": 0.35534247756004333, "learning_rate": 8.97653664465954e-07, "loss": 0.0155, "step": 288420 }, { "epoch": 3.081681713766761, "grad_norm": 0.11456657946109772, "learning_rate": 8.976434794523831e-07, "loss": 0.011, "step": 288430 }, { "epoch": 3.081788557081041, "grad_norm": 0.7135921716690063, "learning_rate": 8.976332939898425e-07, "loss": 0.0021, "step": 288440 }, { "epoch": 3.0818954003953203, "grad_norm": 0.9309313893318176, "learning_rate": 8.976231080783442e-07, "loss": 0.0147, "step": 288450 }, { "epoch": 3.0820022437095997, "grad_norm": 0.055882345885038376, "learning_rate": 8.976129217178992e-07, "loss": 0.0012, "step": 288460 }, { "epoch": 3.0821090870238796, "grad_norm": 0.034800250083208084, "learning_rate": 8.976027349085193e-07, "loss": 0.0045, "step": 288470 }, { "epoch": 3.082215930338159, "grad_norm": 3.859835147857666, "learning_rate": 8.975925476502159e-07, "loss": 0.0236, "step": 288480 }, { "epoch": 3.0823227736524386, "grad_norm": 1.232467532157898, "learning_rate": 8.975823599430004e-07, "loss": 0.0071, "step": 288490 }, { "epoch": 3.0824296169667185, "grad_norm": 0.1037399098277092, "learning_rate": 8.975721717868844e-07, "loss": 0.0214, "step": 288500 }, { "epoch": 3.082536460280998, "grad_norm": 0.017066963016986847, "learning_rate": 8.975619831818795e-07, "loss": 0.0077, "step": 288510 }, { "epoch": 3.0826433035952774, "grad_norm": 0.019341209903359413, "learning_rate": 8.975517941279972e-07, "loss": 0.0077, "step": 288520 }, { "epoch": 3.0827501469095573, "grad_norm": 0.4767899811267853, "learning_rate": 8.975416046252488e-07, "loss": 0.0031, "step": 288530 }, { "epoch": 3.0828569902238367, "grad_norm": 0.02285408414900303, "learning_rate": 8.97531414673646e-07, "loss": 0.0843, "step": 288540 }, { "epoch": 3.082963833538116, "grad_norm": 0.02407090924680233, "learning_rate": 8.975212242732002e-07, "loss": 0.0057, "step": 288550 }, { "epoch": 3.083070676852396, "grad_norm": 0.02534579299390316, "learning_rate": 8.975110334239228e-07, "loss": 0.0122, "step": 288560 }, { "epoch": 3.0831775201666756, "grad_norm": 0.016323082149028778, "learning_rate": 8.975008421258257e-07, "loss": 0.0859, "step": 288570 }, { "epoch": 3.083284363480955, "grad_norm": 0.865791916847229, "learning_rate": 8.974906503789199e-07, "loss": 0.0087, "step": 288580 }, { "epoch": 3.083391206795235, "grad_norm": 0.4939447045326233, "learning_rate": 8.974804581832173e-07, "loss": 0.0111, "step": 288590 }, { "epoch": 3.0834980501095144, "grad_norm": 0.011997891589999199, "learning_rate": 8.974702655387292e-07, "loss": 0.0057, "step": 288600 }, { "epoch": 3.083604893423794, "grad_norm": 0.41028323769569397, "learning_rate": 8.974600724454672e-07, "loss": 0.0084, "step": 288610 }, { "epoch": 3.0837117367380737, "grad_norm": 0.11802465468645096, "learning_rate": 8.974498789034429e-07, "loss": 0.005, "step": 288620 }, { "epoch": 3.083818580052353, "grad_norm": 0.06025872007012367, "learning_rate": 8.974396849126675e-07, "loss": 0.0029, "step": 288630 }, { "epoch": 3.0839254233666327, "grad_norm": 0.11478210240602493, "learning_rate": 8.974294904731527e-07, "loss": 0.0221, "step": 288640 }, { "epoch": 3.0840322666809126, "grad_norm": 6.4884538650512695, "learning_rate": 8.9741929558491e-07, "loss": 0.0351, "step": 288650 }, { "epoch": 3.084139109995192, "grad_norm": 0.007959363982081413, "learning_rate": 8.974091002479511e-07, "loss": 0.0128, "step": 288660 }, { "epoch": 3.0842459533094715, "grad_norm": 0.22747036814689636, "learning_rate": 8.973989044622872e-07, "loss": 0.0106, "step": 288670 }, { "epoch": 3.0843527966237514, "grad_norm": 4.57974100112915, "learning_rate": 8.973887082279299e-07, "loss": 0.0085, "step": 288680 }, { "epoch": 3.084459639938031, "grad_norm": 0.7863629460334778, "learning_rate": 8.973785115448908e-07, "loss": 0.0619, "step": 288690 }, { "epoch": 3.0845664832523103, "grad_norm": 0.47008073329925537, "learning_rate": 8.973683144131814e-07, "loss": 0.0153, "step": 288700 }, { "epoch": 3.08467332656659, "grad_norm": 0.02861318551003933, "learning_rate": 8.973581168328133e-07, "loss": 0.0337, "step": 288710 }, { "epoch": 3.0847801698808697, "grad_norm": 1.2174080610275269, "learning_rate": 8.973479188037978e-07, "loss": 0.0176, "step": 288720 }, { "epoch": 3.084887013195149, "grad_norm": 0.006522917188704014, "learning_rate": 8.973377203261464e-07, "loss": 0.1067, "step": 288730 }, { "epoch": 3.084993856509429, "grad_norm": 2.0071210861206055, "learning_rate": 8.973275213998708e-07, "loss": 0.0103, "step": 288740 }, { "epoch": 3.0851006998237085, "grad_norm": 0.4698339104652405, "learning_rate": 8.973173220249824e-07, "loss": 0.0286, "step": 288750 }, { "epoch": 3.085207543137988, "grad_norm": 0.05152482911944389, "learning_rate": 8.97307122201493e-07, "loss": 0.0283, "step": 288760 }, { "epoch": 3.085314386452268, "grad_norm": 0.6989242434501648, "learning_rate": 8.972969219294135e-07, "loss": 0.0195, "step": 288770 }, { "epoch": 3.0854212297665473, "grad_norm": 0.06999785453081131, "learning_rate": 8.97286721208756e-07, "loss": 0.0064, "step": 288780 }, { "epoch": 3.0855280730808268, "grad_norm": 0.01733580231666565, "learning_rate": 8.972765200395318e-07, "loss": 0.0192, "step": 288790 }, { "epoch": 3.0856349163951067, "grad_norm": 1.6391243934631348, "learning_rate": 8.972663184217524e-07, "loss": 0.0227, "step": 288800 }, { "epoch": 3.085741759709386, "grad_norm": 0.4379412531852722, "learning_rate": 8.972561163554293e-07, "loss": 0.0057, "step": 288810 }, { "epoch": 3.0858486030236656, "grad_norm": 0.04313064366579056, "learning_rate": 8.972459138405742e-07, "loss": 0.0305, "step": 288820 }, { "epoch": 3.0859554463379455, "grad_norm": 0.039444468915462494, "learning_rate": 8.972357108771984e-07, "loss": 0.0178, "step": 288830 }, { "epoch": 3.086062289652225, "grad_norm": 6.522125244140625, "learning_rate": 8.972255074653135e-07, "loss": 0.0068, "step": 288840 }, { "epoch": 3.0861691329665044, "grad_norm": 7.351005554199219, "learning_rate": 8.972153036049309e-07, "loss": 0.0239, "step": 288850 }, { "epoch": 3.0862759762807843, "grad_norm": 0.004561757203191519, "learning_rate": 8.972050992960625e-07, "loss": 0.0105, "step": 288860 }, { "epoch": 3.0863828195950638, "grad_norm": 23.902734756469727, "learning_rate": 8.971948945387193e-07, "loss": 0.0457, "step": 288870 }, { "epoch": 3.0864896629093432, "grad_norm": 3.6546456813812256, "learning_rate": 8.971846893329132e-07, "loss": 0.0425, "step": 288880 }, { "epoch": 3.086596506223623, "grad_norm": 0.02668107859790325, "learning_rate": 8.971744836786557e-07, "loss": 0.0105, "step": 288890 }, { "epoch": 3.0867033495379026, "grad_norm": 1.5704796314239502, "learning_rate": 8.97164277575958e-07, "loss": 0.0057, "step": 288900 }, { "epoch": 3.086810192852182, "grad_norm": 0.4644375145435333, "learning_rate": 8.97154071024832e-07, "loss": 0.0706, "step": 288910 }, { "epoch": 3.086917036166462, "grad_norm": 0.1664733588695526, "learning_rate": 8.971438640252892e-07, "loss": 0.0192, "step": 288920 }, { "epoch": 3.0870238794807414, "grad_norm": 0.00806945376098156, "learning_rate": 8.971336565773409e-07, "loss": 0.0166, "step": 288930 }, { "epoch": 3.0871307227950213, "grad_norm": 2.8827013969421387, "learning_rate": 8.971234486809987e-07, "loss": 0.0099, "step": 288940 }, { "epoch": 3.087237566109301, "grad_norm": 0.01962532475590706, "learning_rate": 8.971132403362742e-07, "loss": 0.0235, "step": 288950 }, { "epoch": 3.0873444094235802, "grad_norm": 0.02007615566253662, "learning_rate": 8.971030315431788e-07, "loss": 0.0037, "step": 288960 }, { "epoch": 3.08745125273786, "grad_norm": 3.300337791442871, "learning_rate": 8.970928223017241e-07, "loss": 0.0079, "step": 288970 }, { "epoch": 3.0875580960521396, "grad_norm": 4.111143589019775, "learning_rate": 8.970826126119218e-07, "loss": 0.0099, "step": 288980 }, { "epoch": 3.087664939366419, "grad_norm": 2.703155040740967, "learning_rate": 8.97072402473783e-07, "loss": 0.0052, "step": 288990 }, { "epoch": 3.087771782680699, "grad_norm": 8.363953590393066, "learning_rate": 8.970621918873198e-07, "loss": 0.0177, "step": 289000 }, { "epoch": 3.0878786259949784, "grad_norm": 0.05223860591650009, "learning_rate": 8.970519808525431e-07, "loss": 0.0014, "step": 289010 }, { "epoch": 3.087985469309258, "grad_norm": 0.007391206920146942, "learning_rate": 8.970417693694649e-07, "loss": 0.0069, "step": 289020 }, { "epoch": 3.088092312623538, "grad_norm": 1.9142258167266846, "learning_rate": 8.970315574380966e-07, "loss": 0.0117, "step": 289030 }, { "epoch": 3.0881991559378172, "grad_norm": 0.020993541926145554, "learning_rate": 8.970213450584497e-07, "loss": 0.0067, "step": 289040 }, { "epoch": 3.0883059992520967, "grad_norm": 0.03084452822804451, "learning_rate": 8.970111322305358e-07, "loss": 0.0194, "step": 289050 }, { "epoch": 3.0884128425663766, "grad_norm": 3.7939255237579346, "learning_rate": 8.970009189543663e-07, "loss": 0.0324, "step": 289060 }, { "epoch": 3.088519685880656, "grad_norm": 0.008920831605792046, "learning_rate": 8.969907052299528e-07, "loss": 0.0177, "step": 289070 }, { "epoch": 3.0886265291949355, "grad_norm": 0.14009076356887817, "learning_rate": 8.969804910573068e-07, "loss": 0.0059, "step": 289080 }, { "epoch": 3.0887333725092154, "grad_norm": 15.531664848327637, "learning_rate": 8.9697027643644e-07, "loss": 0.0203, "step": 289090 }, { "epoch": 3.088840215823495, "grad_norm": 1.3494700193405151, "learning_rate": 8.969600613673636e-07, "loss": 0.0106, "step": 289100 }, { "epoch": 3.0889470591377743, "grad_norm": 0.7979017496109009, "learning_rate": 8.969498458500894e-07, "loss": 0.0136, "step": 289110 }, { "epoch": 3.0890539024520542, "grad_norm": 0.001607669168151915, "learning_rate": 8.969396298846288e-07, "loss": 0.0114, "step": 289120 }, { "epoch": 3.0891607457663337, "grad_norm": 0.25337108969688416, "learning_rate": 8.969294134709935e-07, "loss": 0.0074, "step": 289130 }, { "epoch": 3.089267589080613, "grad_norm": 0.022123850882053375, "learning_rate": 8.969191966091949e-07, "loss": 0.0243, "step": 289140 }, { "epoch": 3.089374432394893, "grad_norm": 0.3727828562259674, "learning_rate": 8.969089792992446e-07, "loss": 0.0055, "step": 289150 }, { "epoch": 3.0894812757091725, "grad_norm": 0.6727485656738281, "learning_rate": 8.968987615411541e-07, "loss": 0.008, "step": 289160 }, { "epoch": 3.089588119023452, "grad_norm": 0.30458512902259827, "learning_rate": 8.968885433349349e-07, "loss": 0.0122, "step": 289170 }, { "epoch": 3.089694962337732, "grad_norm": 0.0330205000936985, "learning_rate": 8.968783246805987e-07, "loss": 0.0279, "step": 289180 }, { "epoch": 3.0898018056520113, "grad_norm": 0.04416801035404205, "learning_rate": 8.968681055781568e-07, "loss": 0.0158, "step": 289190 }, { "epoch": 3.089908648966291, "grad_norm": 0.02665114775300026, "learning_rate": 8.96857886027621e-07, "loss": 0.0022, "step": 289200 }, { "epoch": 3.0900154922805707, "grad_norm": 0.04796106740832329, "learning_rate": 8.968476660290026e-07, "loss": 0.0461, "step": 289210 }, { "epoch": 3.09012233559485, "grad_norm": 0.10278326272964478, "learning_rate": 8.968374455823132e-07, "loss": 0.014, "step": 289220 }, { "epoch": 3.0902291789091296, "grad_norm": 0.005074453540146351, "learning_rate": 8.968272246875643e-07, "loss": 0.0146, "step": 289230 }, { "epoch": 3.0903360222234095, "grad_norm": 0.06192314997315407, "learning_rate": 8.968170033447675e-07, "loss": 0.009, "step": 289240 }, { "epoch": 3.090442865537689, "grad_norm": 4.953819751739502, "learning_rate": 8.968067815539345e-07, "loss": 0.0053, "step": 289250 }, { "epoch": 3.0905497088519684, "grad_norm": 1.5714901685714722, "learning_rate": 8.967965593150767e-07, "loss": 0.0058, "step": 289260 }, { "epoch": 3.0906565521662483, "grad_norm": 5.253169059753418, "learning_rate": 8.967863366282056e-07, "loss": 0.0105, "step": 289270 }, { "epoch": 3.090763395480528, "grad_norm": 0.009534655138850212, "learning_rate": 8.967761134933327e-07, "loss": 0.0143, "step": 289280 }, { "epoch": 3.0908702387948073, "grad_norm": 0.11287958174943924, "learning_rate": 8.967658899104698e-07, "loss": 0.002, "step": 289290 }, { "epoch": 3.090977082109087, "grad_norm": 0.051237016916275024, "learning_rate": 8.96755665879628e-07, "loss": 0.0135, "step": 289300 }, { "epoch": 3.0910839254233666, "grad_norm": 8.020614624023438, "learning_rate": 8.967454414008193e-07, "loss": 0.0086, "step": 289310 }, { "epoch": 3.091190768737646, "grad_norm": 3.093043804168701, "learning_rate": 8.96735216474055e-07, "loss": 0.0104, "step": 289320 }, { "epoch": 3.091297612051926, "grad_norm": 0.6227348446846008, "learning_rate": 8.967249910993467e-07, "loss": 0.0232, "step": 289330 }, { "epoch": 3.0914044553662055, "grad_norm": 0.026723597198724747, "learning_rate": 8.96714765276706e-07, "loss": 0.0146, "step": 289340 }, { "epoch": 3.091511298680485, "grad_norm": 9.891785621643066, "learning_rate": 8.967045390061442e-07, "loss": 0.0098, "step": 289350 }, { "epoch": 3.091618141994765, "grad_norm": 0.1547866314649582, "learning_rate": 8.966943122876733e-07, "loss": 0.0157, "step": 289360 }, { "epoch": 3.0917249853090443, "grad_norm": 0.15272648632526398, "learning_rate": 8.966840851213044e-07, "loss": 0.0102, "step": 289370 }, { "epoch": 3.0918318286233237, "grad_norm": 6.8179931640625, "learning_rate": 8.966738575070493e-07, "loss": 0.0226, "step": 289380 }, { "epoch": 3.0919386719376036, "grad_norm": 15.660589218139648, "learning_rate": 8.966636294449195e-07, "loss": 0.0661, "step": 289390 }, { "epoch": 3.092045515251883, "grad_norm": 0.37836965918540955, "learning_rate": 8.966534009349266e-07, "loss": 0.0033, "step": 289400 }, { "epoch": 3.0921523585661626, "grad_norm": 0.4688059091567993, "learning_rate": 8.96643171977082e-07, "loss": 0.0172, "step": 289410 }, { "epoch": 3.0922592018804425, "grad_norm": 6.017228603363037, "learning_rate": 8.966329425713972e-07, "loss": 0.0108, "step": 289420 }, { "epoch": 3.092366045194722, "grad_norm": 0.002414014423266053, "learning_rate": 8.96622712717884e-07, "loss": 0.0112, "step": 289430 }, { "epoch": 3.0924728885090014, "grad_norm": 9.192256927490234, "learning_rate": 8.966124824165537e-07, "loss": 0.0245, "step": 289440 }, { "epoch": 3.0925797318232813, "grad_norm": 0.6641727089881897, "learning_rate": 8.966022516674181e-07, "loss": 0.0034, "step": 289450 }, { "epoch": 3.0926865751375607, "grad_norm": 0.21739321947097778, "learning_rate": 8.965920204704887e-07, "loss": 0.0051, "step": 289460 }, { "epoch": 3.09279341845184, "grad_norm": 2.837951898574829, "learning_rate": 8.965817888257767e-07, "loss": 0.0219, "step": 289470 }, { "epoch": 3.09290026176612, "grad_norm": 0.2208791822195053, "learning_rate": 8.965715567332941e-07, "loss": 0.0033, "step": 289480 }, { "epoch": 3.0930071050803996, "grad_norm": 2.098116159439087, "learning_rate": 8.965613241930523e-07, "loss": 0.0127, "step": 289490 }, { "epoch": 3.093113948394679, "grad_norm": 3.874149799346924, "learning_rate": 8.965510912050628e-07, "loss": 0.013, "step": 289500 }, { "epoch": 3.093220791708959, "grad_norm": 0.7203812599182129, "learning_rate": 8.965408577693371e-07, "loss": 0.002, "step": 289510 }, { "epoch": 3.0933276350232384, "grad_norm": 0.06007407605648041, "learning_rate": 8.965306238858871e-07, "loss": 0.0251, "step": 289520 }, { "epoch": 3.093434478337518, "grad_norm": 5.374155521392822, "learning_rate": 8.965203895547239e-07, "loss": 0.0307, "step": 289530 }, { "epoch": 3.0935413216517977, "grad_norm": 0.04881800338625908, "learning_rate": 8.965101547758592e-07, "loss": 0.0043, "step": 289540 }, { "epoch": 3.093648164966077, "grad_norm": 2.1419501304626465, "learning_rate": 8.964999195493048e-07, "loss": 0.0718, "step": 289550 }, { "epoch": 3.0937550082803567, "grad_norm": 13.529706001281738, "learning_rate": 8.964896838750719e-07, "loss": 0.0066, "step": 289560 }, { "epoch": 3.0938618515946366, "grad_norm": 0.023125186562538147, "learning_rate": 8.964794477531723e-07, "loss": 0.0125, "step": 289570 }, { "epoch": 3.093968694908916, "grad_norm": 0.04993920400738716, "learning_rate": 8.964692111836174e-07, "loss": 0.0027, "step": 289580 }, { "epoch": 3.0940755382231955, "grad_norm": 0.08909013122320175, "learning_rate": 8.964589741664191e-07, "loss": 0.0126, "step": 289590 }, { "epoch": 3.0941823815374754, "grad_norm": 0.012480133213102818, "learning_rate": 8.964487367015883e-07, "loss": 0.0036, "step": 289600 }, { "epoch": 3.094289224851755, "grad_norm": 0.2733028829097748, "learning_rate": 8.964384987891371e-07, "loss": 0.0005, "step": 289610 }, { "epoch": 3.0943960681660343, "grad_norm": 5.148642063140869, "learning_rate": 8.964282604290772e-07, "loss": 0.0024, "step": 289620 }, { "epoch": 3.094502911480314, "grad_norm": 0.011641845107078552, "learning_rate": 8.964180216214195e-07, "loss": 0.0139, "step": 289630 }, { "epoch": 3.0946097547945937, "grad_norm": 0.015336097218096256, "learning_rate": 8.964077823661761e-07, "loss": 0.0022, "step": 289640 }, { "epoch": 3.0947165981088736, "grad_norm": 0.014513913542032242, "learning_rate": 8.963975426633583e-07, "loss": 0.0184, "step": 289650 }, { "epoch": 3.094823441423153, "grad_norm": 1.675203561782837, "learning_rate": 8.963873025129779e-07, "loss": 0.0014, "step": 289660 }, { "epoch": 3.0949302847374325, "grad_norm": 0.1135515421628952, "learning_rate": 8.963770619150462e-07, "loss": 0.0063, "step": 289670 }, { "epoch": 3.095037128051712, "grad_norm": 4.575657844543457, "learning_rate": 8.96366820869575e-07, "loss": 0.0447, "step": 289680 }, { "epoch": 3.095143971365992, "grad_norm": 0.0037318074610084295, "learning_rate": 8.963565793765758e-07, "loss": 0.0222, "step": 289690 }, { "epoch": 3.0952508146802713, "grad_norm": 0.06272434443235397, "learning_rate": 8.963463374360598e-07, "loss": 0.025, "step": 289700 }, { "epoch": 3.095357657994551, "grad_norm": 0.005240156315267086, "learning_rate": 8.963360950480391e-07, "loss": 0.0056, "step": 289710 }, { "epoch": 3.0954645013088307, "grad_norm": 0.34303340315818787, "learning_rate": 8.96325852212525e-07, "loss": 0.0023, "step": 289720 }, { "epoch": 3.09557134462311, "grad_norm": 0.05458780750632286, "learning_rate": 8.963156089295291e-07, "loss": 0.0011, "step": 289730 }, { "epoch": 3.09567818793739, "grad_norm": 0.004934715572744608, "learning_rate": 8.963053651990629e-07, "loss": 0.042, "step": 289740 }, { "epoch": 3.0957850312516695, "grad_norm": 0.30926570296287537, "learning_rate": 8.962951210211381e-07, "loss": 0.0192, "step": 289750 }, { "epoch": 3.095891874565949, "grad_norm": 0.03612813726067543, "learning_rate": 8.962848763957662e-07, "loss": 0.0099, "step": 289760 }, { "epoch": 3.095998717880229, "grad_norm": 3.6345834732055664, "learning_rate": 8.962746313229588e-07, "loss": 0.0208, "step": 289770 }, { "epoch": 3.0961055611945083, "grad_norm": 0.015372942201793194, "learning_rate": 8.962643858027273e-07, "loss": 0.0087, "step": 289780 }, { "epoch": 3.0962124045087878, "grad_norm": 4.883588790893555, "learning_rate": 8.962541398350836e-07, "loss": 0.0134, "step": 289790 }, { "epoch": 3.0963192478230677, "grad_norm": 5.472483158111572, "learning_rate": 8.96243893420039e-07, "loss": 0.0111, "step": 289800 }, { "epoch": 3.096426091137347, "grad_norm": 0.08183109760284424, "learning_rate": 8.96233646557605e-07, "loss": 0.0075, "step": 289810 }, { "epoch": 3.0965329344516266, "grad_norm": 0.10583381354808807, "learning_rate": 8.962233992477934e-07, "loss": 0.0042, "step": 289820 }, { "epoch": 3.0966397777659065, "grad_norm": 0.015199028886854649, "learning_rate": 8.962131514906157e-07, "loss": 0.0026, "step": 289830 }, { "epoch": 3.096746621080186, "grad_norm": 0.0033270264975726604, "learning_rate": 8.962029032860834e-07, "loss": 0.0013, "step": 289840 }, { "epoch": 3.0968534643944654, "grad_norm": 8.13010311126709, "learning_rate": 8.961926546342081e-07, "loss": 0.0029, "step": 289850 }, { "epoch": 3.0969603077087453, "grad_norm": 5.484902858734131, "learning_rate": 8.961824055350015e-07, "loss": 0.0071, "step": 289860 }, { "epoch": 3.0970671510230248, "grad_norm": 0.02962307445704937, "learning_rate": 8.96172155988475e-07, "loss": 0.009, "step": 289870 }, { "epoch": 3.0971739943373042, "grad_norm": 0.002519832691177726, "learning_rate": 8.961619059946402e-07, "loss": 0.0061, "step": 289880 }, { "epoch": 3.097280837651584, "grad_norm": 0.0005457537481561303, "learning_rate": 8.961516555535087e-07, "loss": 0.0017, "step": 289890 }, { "epoch": 3.0973876809658636, "grad_norm": 0.03920717537403107, "learning_rate": 8.961414046650921e-07, "loss": 0.0035, "step": 289900 }, { "epoch": 3.097494524280143, "grad_norm": 0.0028724896255880594, "learning_rate": 8.96131153329402e-07, "loss": 0.0036, "step": 289910 }, { "epoch": 3.097601367594423, "grad_norm": 0.040280915796756744, "learning_rate": 8.9612090154645e-07, "loss": 0.0154, "step": 289920 }, { "epoch": 3.0977082109087024, "grad_norm": 0.9355299472808838, "learning_rate": 8.961106493162474e-07, "loss": 0.0207, "step": 289930 }, { "epoch": 3.097815054222982, "grad_norm": 6.915284156799316, "learning_rate": 8.961003966388061e-07, "loss": 0.0118, "step": 289940 }, { "epoch": 3.0979218975372618, "grad_norm": 1.3601645231246948, "learning_rate": 8.960901435141376e-07, "loss": 0.0474, "step": 289950 }, { "epoch": 3.0980287408515412, "grad_norm": 0.004345803055912256, "learning_rate": 8.960798899422535e-07, "loss": 0.0158, "step": 289960 }, { "epoch": 3.0981355841658207, "grad_norm": 2.834118127822876, "learning_rate": 8.960696359231652e-07, "loss": 0.0109, "step": 289970 }, { "epoch": 3.0982424274801006, "grad_norm": 0.006793100852519274, "learning_rate": 8.960593814568843e-07, "loss": 0.0213, "step": 289980 }, { "epoch": 3.09834927079438, "grad_norm": 0.5129048824310303, "learning_rate": 8.960491265434225e-07, "loss": 0.0302, "step": 289990 }, { "epoch": 3.0984561141086595, "grad_norm": 0.011336195282638073, "learning_rate": 8.960388711827916e-07, "loss": 0.0066, "step": 290000 }, { "epoch": 3.0985629574229394, "grad_norm": 0.0116352504119277, "learning_rate": 8.960286153750025e-07, "loss": 0.0079, "step": 290010 }, { "epoch": 3.098669800737219, "grad_norm": 4.378405570983887, "learning_rate": 8.960183591200675e-07, "loss": 0.0013, "step": 290020 }, { "epoch": 3.0987766440514983, "grad_norm": 1.791035771369934, "learning_rate": 8.960081024179979e-07, "loss": 0.0205, "step": 290030 }, { "epoch": 3.0988834873657782, "grad_norm": 0.007175422739237547, "learning_rate": 8.959978452688051e-07, "loss": 0.0112, "step": 290040 }, { "epoch": 3.0989903306800577, "grad_norm": 0.08293453603982925, "learning_rate": 8.959875876725009e-07, "loss": 0.0037, "step": 290050 }, { "epoch": 3.099097173994337, "grad_norm": 0.046543389558792114, "learning_rate": 8.959773296290969e-07, "loss": 0.031, "step": 290060 }, { "epoch": 3.099204017308617, "grad_norm": 0.0031225152779370546, "learning_rate": 8.959670711386045e-07, "loss": 0.0316, "step": 290070 }, { "epoch": 3.0993108606228965, "grad_norm": 3.0445303916931152, "learning_rate": 8.959568122010354e-07, "loss": 0.0102, "step": 290080 }, { "epoch": 3.099417703937176, "grad_norm": 0.6905665993690491, "learning_rate": 8.959465528164013e-07, "loss": 0.0372, "step": 290090 }, { "epoch": 3.099524547251456, "grad_norm": 0.004371850285679102, "learning_rate": 8.959362929847136e-07, "loss": 0.0356, "step": 290100 }, { "epoch": 3.0996313905657353, "grad_norm": 0.009848100133240223, "learning_rate": 8.959260327059839e-07, "loss": 0.0372, "step": 290110 }, { "epoch": 3.099738233880015, "grad_norm": 0.008685525506734848, "learning_rate": 8.959157719802238e-07, "loss": 0.0075, "step": 290120 }, { "epoch": 3.0998450771942947, "grad_norm": 0.009194089099764824, "learning_rate": 8.95905510807445e-07, "loss": 0.0026, "step": 290130 }, { "epoch": 3.099951920508574, "grad_norm": 0.2667299211025238, "learning_rate": 8.95895249187659e-07, "loss": 0.0217, "step": 290140 }, { "epoch": 3.1000587638228536, "grad_norm": 3.7833476066589355, "learning_rate": 8.958849871208774e-07, "loss": 0.0147, "step": 290150 }, { "epoch": 3.1001656071371335, "grad_norm": 1.1263315677642822, "learning_rate": 8.958747246071117e-07, "loss": 0.0111, "step": 290160 }, { "epoch": 3.100272450451413, "grad_norm": 2.1761698722839355, "learning_rate": 8.958644616463735e-07, "loss": 0.027, "step": 290170 }, { "epoch": 3.1003792937656924, "grad_norm": 0.7633721232414246, "learning_rate": 8.958541982386746e-07, "loss": 0.0035, "step": 290180 }, { "epoch": 3.1004861370799723, "grad_norm": 3.8969783782958984, "learning_rate": 8.958439343840263e-07, "loss": 0.0234, "step": 290190 }, { "epoch": 3.100592980394252, "grad_norm": 2.3423118591308594, "learning_rate": 8.958336700824405e-07, "loss": 0.0236, "step": 290200 }, { "epoch": 3.1006998237085313, "grad_norm": 0.1934957653284073, "learning_rate": 8.958234053339284e-07, "loss": 0.0115, "step": 290210 }, { "epoch": 3.100806667022811, "grad_norm": 0.008425193838775158, "learning_rate": 8.958131401385018e-07, "loss": 0.002, "step": 290220 }, { "epoch": 3.1009135103370906, "grad_norm": 0.07148576527833939, "learning_rate": 8.958028744961724e-07, "loss": 0.0094, "step": 290230 }, { "epoch": 3.10102035365137, "grad_norm": 0.04431510716676712, "learning_rate": 8.957926084069517e-07, "loss": 0.0017, "step": 290240 }, { "epoch": 3.10112719696565, "grad_norm": 2.6036362648010254, "learning_rate": 8.957823418708513e-07, "loss": 0.0135, "step": 290250 }, { "epoch": 3.1012340402799294, "grad_norm": 0.0056181116960942745, "learning_rate": 8.957720748878827e-07, "loss": 0.0114, "step": 290260 }, { "epoch": 3.101340883594209, "grad_norm": 7.225697040557861, "learning_rate": 8.957618074580575e-07, "loss": 0.0435, "step": 290270 }, { "epoch": 3.101447726908489, "grad_norm": 0.9370347857475281, "learning_rate": 8.957515395813874e-07, "loss": 0.0064, "step": 290280 }, { "epoch": 3.1015545702227683, "grad_norm": 2.0913779735565186, "learning_rate": 8.95741271257884e-07, "loss": 0.0129, "step": 290290 }, { "epoch": 3.1016614135370477, "grad_norm": 0.41927725076675415, "learning_rate": 8.957310024875588e-07, "loss": 0.0197, "step": 290300 }, { "epoch": 3.1017682568513276, "grad_norm": 1.3349697589874268, "learning_rate": 8.957207332704233e-07, "loss": 0.0026, "step": 290310 }, { "epoch": 3.101875100165607, "grad_norm": 0.19184669852256775, "learning_rate": 8.957104636064894e-07, "loss": 0.0752, "step": 290320 }, { "epoch": 3.1019819434798865, "grad_norm": 4.333034038543701, "learning_rate": 8.957001934957685e-07, "loss": 0.0202, "step": 290330 }, { "epoch": 3.1020887867941664, "grad_norm": 0.009077738039195538, "learning_rate": 8.956899229382722e-07, "loss": 0.0061, "step": 290340 }, { "epoch": 3.102195630108446, "grad_norm": 0.04798835515975952, "learning_rate": 8.95679651934012e-07, "loss": 0.0123, "step": 290350 }, { "epoch": 3.1023024734227254, "grad_norm": 0.030784007161855698, "learning_rate": 8.956693804829997e-07, "loss": 0.0196, "step": 290360 }, { "epoch": 3.1024093167370053, "grad_norm": 0.7655044198036194, "learning_rate": 8.95659108585247e-07, "loss": 0.0073, "step": 290370 }, { "epoch": 3.1025161600512847, "grad_norm": 0.5173418521881104, "learning_rate": 8.956488362407651e-07, "loss": 0.0055, "step": 290380 }, { "epoch": 3.102623003365564, "grad_norm": 0.06397125124931335, "learning_rate": 8.956385634495658e-07, "loss": 0.0202, "step": 290390 }, { "epoch": 3.102729846679844, "grad_norm": 4.059074878692627, "learning_rate": 8.956282902116607e-07, "loss": 0.014, "step": 290400 }, { "epoch": 3.1028366899941235, "grad_norm": 0.0857081338763237, "learning_rate": 8.956180165270614e-07, "loss": 0.0263, "step": 290410 }, { "epoch": 3.1029435333084034, "grad_norm": 3.696028232574463, "learning_rate": 8.956077423957795e-07, "loss": 0.0109, "step": 290420 }, { "epoch": 3.103050376622683, "grad_norm": 0.014052468352019787, "learning_rate": 8.955974678178266e-07, "loss": 0.0039, "step": 290430 }, { "epoch": 3.1031572199369624, "grad_norm": 1.792181134223938, "learning_rate": 8.955871927932143e-07, "loss": 0.0129, "step": 290440 }, { "epoch": 3.1032640632512423, "grad_norm": 0.011857847683131695, "learning_rate": 8.955769173219543e-07, "loss": 0.0079, "step": 290450 }, { "epoch": 3.1033709065655217, "grad_norm": 1.3030167818069458, "learning_rate": 8.955666414040581e-07, "loss": 0.0071, "step": 290460 }, { "epoch": 3.103477749879801, "grad_norm": 0.22708304226398468, "learning_rate": 8.955563650395372e-07, "loss": 0.0057, "step": 290470 }, { "epoch": 3.103584593194081, "grad_norm": 0.0020025211852043867, "learning_rate": 8.955460882284034e-07, "loss": 0.0105, "step": 290480 }, { "epoch": 3.1036914365083605, "grad_norm": 3.2083640098571777, "learning_rate": 8.955358109706681e-07, "loss": 0.0143, "step": 290490 }, { "epoch": 3.10379827982264, "grad_norm": 0.02424485608935356, "learning_rate": 8.955255332663431e-07, "loss": 0.0062, "step": 290500 }, { "epoch": 3.10390512313692, "grad_norm": 0.013028123416006565, "learning_rate": 8.955152551154399e-07, "loss": 0.0246, "step": 290510 }, { "epoch": 3.1040119664511994, "grad_norm": 2.1878368854522705, "learning_rate": 8.955049765179701e-07, "loss": 0.0286, "step": 290520 }, { "epoch": 3.104118809765479, "grad_norm": 0.005580794997513294, "learning_rate": 8.954946974739454e-07, "loss": 0.012, "step": 290530 }, { "epoch": 3.1042256530797587, "grad_norm": 0.3908529281616211, "learning_rate": 8.954844179833771e-07, "loss": 0.0121, "step": 290540 }, { "epoch": 3.104332496394038, "grad_norm": 0.30584871768951416, "learning_rate": 8.954741380462774e-07, "loss": 0.0121, "step": 290550 }, { "epoch": 3.1044393397083176, "grad_norm": 0.39367246627807617, "learning_rate": 8.954638576626574e-07, "loss": 0.0037, "step": 290560 }, { "epoch": 3.1045461830225975, "grad_norm": 0.693075954914093, "learning_rate": 8.954535768325289e-07, "loss": 0.0159, "step": 290570 }, { "epoch": 3.104653026336877, "grad_norm": 0.2152898609638214, "learning_rate": 8.954432955559032e-07, "loss": 0.015, "step": 290580 }, { "epoch": 3.1047598696511565, "grad_norm": 7.345605850219727, "learning_rate": 8.954330138327926e-07, "loss": 0.0139, "step": 290590 }, { "epoch": 3.1048667129654364, "grad_norm": 0.052309323102235794, "learning_rate": 8.95422731663208e-07, "loss": 0.0104, "step": 290600 }, { "epoch": 3.104973556279716, "grad_norm": 0.002588909352198243, "learning_rate": 8.954124490471613e-07, "loss": 0.0245, "step": 290610 }, { "epoch": 3.1050803995939953, "grad_norm": 0.015769707038998604, "learning_rate": 8.954021659846642e-07, "loss": 0.016, "step": 290620 }, { "epoch": 3.105187242908275, "grad_norm": 0.2704181671142578, "learning_rate": 8.953918824757282e-07, "loss": 0.0049, "step": 290630 }, { "epoch": 3.1052940862225547, "grad_norm": 2.9146318435668945, "learning_rate": 8.953815985203648e-07, "loss": 0.0223, "step": 290640 }, { "epoch": 3.105400929536834, "grad_norm": 0.019543426111340523, "learning_rate": 8.953713141185859e-07, "loss": 0.0058, "step": 290650 }, { "epoch": 3.105507772851114, "grad_norm": 0.010555942542850971, "learning_rate": 8.953610292704029e-07, "loss": 0.008, "step": 290660 }, { "epoch": 3.1056146161653935, "grad_norm": 0.5303777456283569, "learning_rate": 8.953507439758275e-07, "loss": 0.0292, "step": 290670 }, { "epoch": 3.105721459479673, "grad_norm": 0.1167251467704773, "learning_rate": 8.953404582348712e-07, "loss": 0.001, "step": 290680 }, { "epoch": 3.105828302793953, "grad_norm": 7.7211198806762695, "learning_rate": 8.953301720475457e-07, "loss": 0.0175, "step": 290690 }, { "epoch": 3.1059351461082323, "grad_norm": 0.012392446398735046, "learning_rate": 8.953198854138626e-07, "loss": 0.0016, "step": 290700 }, { "epoch": 3.1060419894225118, "grad_norm": 1.8480695486068726, "learning_rate": 8.953095983338335e-07, "loss": 0.0056, "step": 290710 }, { "epoch": 3.1061488327367917, "grad_norm": 0.017802121117711067, "learning_rate": 8.9529931080747e-07, "loss": 0.0088, "step": 290720 }, { "epoch": 3.106255676051071, "grad_norm": 2.533099412918091, "learning_rate": 8.952890228347838e-07, "loss": 0.0117, "step": 290730 }, { "epoch": 3.1063625193653506, "grad_norm": 6.681350231170654, "learning_rate": 8.952787344157864e-07, "loss": 0.0225, "step": 290740 }, { "epoch": 3.1064693626796305, "grad_norm": 5.335112571716309, "learning_rate": 8.952684455504895e-07, "loss": 0.0152, "step": 290750 }, { "epoch": 3.10657620599391, "grad_norm": 7.45695161819458, "learning_rate": 8.952581562389048e-07, "loss": 0.0162, "step": 290760 }, { "epoch": 3.1066830493081894, "grad_norm": 0.10956228524446487, "learning_rate": 8.952478664810437e-07, "loss": 0.0171, "step": 290770 }, { "epoch": 3.1067898926224693, "grad_norm": 0.005139369051903486, "learning_rate": 8.95237576276918e-07, "loss": 0.0108, "step": 290780 }, { "epoch": 3.1068967359367488, "grad_norm": 0.0497858002781868, "learning_rate": 8.95227285626539e-07, "loss": 0.0052, "step": 290790 }, { "epoch": 3.107003579251028, "grad_norm": 0.021357884630560875, "learning_rate": 8.952169945299188e-07, "loss": 0.0013, "step": 290800 }, { "epoch": 3.107110422565308, "grad_norm": 0.0020950378384441137, "learning_rate": 8.952067029870688e-07, "loss": 0.0295, "step": 290810 }, { "epoch": 3.1072172658795876, "grad_norm": 0.048927102237939835, "learning_rate": 8.951964109980006e-07, "loss": 0.0064, "step": 290820 }, { "epoch": 3.107324109193867, "grad_norm": 2.2075858116149902, "learning_rate": 8.951861185627258e-07, "loss": 0.0095, "step": 290830 }, { "epoch": 3.107430952508147, "grad_norm": 8.398982048034668, "learning_rate": 8.951758256812559e-07, "loss": 0.0154, "step": 290840 }, { "epoch": 3.1075377958224264, "grad_norm": 0.889539361000061, "learning_rate": 8.951655323536028e-07, "loss": 0.0306, "step": 290850 }, { "epoch": 3.107644639136706, "grad_norm": 3.3286869525909424, "learning_rate": 8.95155238579778e-07, "loss": 0.0073, "step": 290860 }, { "epoch": 3.1077514824509858, "grad_norm": 4.195065975189209, "learning_rate": 8.951449443597931e-07, "loss": 0.0095, "step": 290870 }, { "epoch": 3.107858325765265, "grad_norm": 0.2213076800107956, "learning_rate": 8.951346496936597e-07, "loss": 0.0073, "step": 290880 }, { "epoch": 3.1079651690795447, "grad_norm": 4.003480911254883, "learning_rate": 8.951243545813896e-07, "loss": 0.0153, "step": 290890 }, { "epoch": 3.1080720123938246, "grad_norm": 0.002938247751444578, "learning_rate": 8.95114059022994e-07, "loss": 0.0153, "step": 290900 }, { "epoch": 3.108178855708104, "grad_norm": 0.9864130616188049, "learning_rate": 8.95103763018485e-07, "loss": 0.01, "step": 290910 }, { "epoch": 3.1082856990223835, "grad_norm": 0.03908872604370117, "learning_rate": 8.95093466567874e-07, "loss": 0.0082, "step": 290920 }, { "epoch": 3.1083925423366634, "grad_norm": 0.17683075368404388, "learning_rate": 8.950831696711727e-07, "loss": 0.0315, "step": 290930 }, { "epoch": 3.108499385650943, "grad_norm": 0.012913565151393414, "learning_rate": 8.950728723283926e-07, "loss": 0.0449, "step": 290940 }, { "epoch": 3.1086062289652223, "grad_norm": 0.009517484344542027, "learning_rate": 8.950625745395455e-07, "loss": 0.02, "step": 290950 }, { "epoch": 3.108713072279502, "grad_norm": 0.08431044965982437, "learning_rate": 8.950522763046428e-07, "loss": 0.0136, "step": 290960 }, { "epoch": 3.1088199155937817, "grad_norm": 1.0805773735046387, "learning_rate": 8.950419776236963e-07, "loss": 0.0258, "step": 290970 }, { "epoch": 3.108926758908061, "grad_norm": 2.021653652191162, "learning_rate": 8.950316784967178e-07, "loss": 0.0097, "step": 290980 }, { "epoch": 3.109033602222341, "grad_norm": 5.209935665130615, "learning_rate": 8.950213789237185e-07, "loss": 0.0087, "step": 290990 }, { "epoch": 3.1091404455366205, "grad_norm": 0.001204638509079814, "learning_rate": 8.950110789047102e-07, "loss": 0.0067, "step": 291000 }, { "epoch": 3.1092472888509, "grad_norm": 0.7285249829292297, "learning_rate": 8.950007784397049e-07, "loss": 0.0101, "step": 291010 }, { "epoch": 3.10935413216518, "grad_norm": 0.8208093643188477, "learning_rate": 8.949904775287136e-07, "loss": 0.0306, "step": 291020 }, { "epoch": 3.1094609754794593, "grad_norm": 9.102008819580078, "learning_rate": 8.949801761717484e-07, "loss": 0.0073, "step": 291030 }, { "epoch": 3.109567818793739, "grad_norm": 5.459765434265137, "learning_rate": 8.949698743688207e-07, "loss": 0.0069, "step": 291040 }, { "epoch": 3.1096746621080187, "grad_norm": 0.03872176259756088, "learning_rate": 8.949595721199421e-07, "loss": 0.001, "step": 291050 }, { "epoch": 3.109781505422298, "grad_norm": 2.7468948364257812, "learning_rate": 8.949492694251246e-07, "loss": 0.0144, "step": 291060 }, { "epoch": 3.1098883487365776, "grad_norm": 0.04847289249300957, "learning_rate": 8.949389662843793e-07, "loss": 0.0048, "step": 291070 }, { "epoch": 3.1099951920508575, "grad_norm": 0.0514877587556839, "learning_rate": 8.949286626977183e-07, "loss": 0.0136, "step": 291080 }, { "epoch": 3.110102035365137, "grad_norm": 0.12408732622861862, "learning_rate": 8.949183586651529e-07, "loss": 0.0055, "step": 291090 }, { "epoch": 3.1102088786794164, "grad_norm": 0.004684088751673698, "learning_rate": 8.94908054186695e-07, "loss": 0.0117, "step": 291100 }, { "epoch": 3.1103157219936963, "grad_norm": 0.006328969728201628, "learning_rate": 8.94897749262356e-07, "loss": 0.0205, "step": 291110 }, { "epoch": 3.110422565307976, "grad_norm": 1.5646435022354126, "learning_rate": 8.948874438921477e-07, "loss": 0.0015, "step": 291120 }, { "epoch": 3.1105294086222557, "grad_norm": 1.2029461860656738, "learning_rate": 8.948771380760815e-07, "loss": 0.0076, "step": 291130 }, { "epoch": 3.110636251936535, "grad_norm": 0.018725911155343056, "learning_rate": 8.948668318141694e-07, "loss": 0.0103, "step": 291140 }, { "epoch": 3.1107430952508146, "grad_norm": 4.968008995056152, "learning_rate": 8.948565251064229e-07, "loss": 0.0148, "step": 291150 }, { "epoch": 3.110849938565094, "grad_norm": 0.0012134575517848134, "learning_rate": 8.948462179528533e-07, "loss": 0.0138, "step": 291160 }, { "epoch": 3.110956781879374, "grad_norm": 0.4186737537384033, "learning_rate": 8.948359103534727e-07, "loss": 0.0158, "step": 291170 }, { "epoch": 3.1110636251936534, "grad_norm": 0.1794246882200241, "learning_rate": 8.948256023082925e-07, "loss": 0.0013, "step": 291180 }, { "epoch": 3.1111704685079333, "grad_norm": 3.5510921478271484, "learning_rate": 8.948152938173244e-07, "loss": 0.0291, "step": 291190 }, { "epoch": 3.111277311822213, "grad_norm": 0.04686098173260689, "learning_rate": 8.948049848805801e-07, "loss": 0.0081, "step": 291200 }, { "epoch": 3.1113841551364922, "grad_norm": 2.8433830738067627, "learning_rate": 8.947946754980712e-07, "loss": 0.0049, "step": 291210 }, { "epoch": 3.111490998450772, "grad_norm": 0.3295225203037262, "learning_rate": 8.947843656698093e-07, "loss": 0.0185, "step": 291220 }, { "epoch": 3.1115978417650516, "grad_norm": 0.003676158143207431, "learning_rate": 8.94774055395806e-07, "loss": 0.0057, "step": 291230 }, { "epoch": 3.111704685079331, "grad_norm": 0.5549528002738953, "learning_rate": 8.94763744676073e-07, "loss": 0.0091, "step": 291240 }, { "epoch": 3.111811528393611, "grad_norm": 5.897006511688232, "learning_rate": 8.947534335106219e-07, "loss": 0.0093, "step": 291250 }, { "epoch": 3.1119183717078904, "grad_norm": 0.985499918460846, "learning_rate": 8.947431218994645e-07, "loss": 0.0056, "step": 291260 }, { "epoch": 3.11202521502217, "grad_norm": 0.24664975702762604, "learning_rate": 8.947328098426123e-07, "loss": 0.0126, "step": 291270 }, { "epoch": 3.11213205833645, "grad_norm": 1.4594017267227173, "learning_rate": 8.947224973400769e-07, "loss": 0.0038, "step": 291280 }, { "epoch": 3.1122389016507293, "grad_norm": 0.004333178047090769, "learning_rate": 8.947121843918699e-07, "loss": 0.0081, "step": 291290 }, { "epoch": 3.1123457449650087, "grad_norm": 0.01616876944899559, "learning_rate": 8.947018709980032e-07, "loss": 0.0272, "step": 291300 }, { "epoch": 3.1124525882792886, "grad_norm": 3.4358489513397217, "learning_rate": 8.946915571584883e-07, "loss": 0.0213, "step": 291310 }, { "epoch": 3.112559431593568, "grad_norm": 0.0409247949719429, "learning_rate": 8.946812428733368e-07, "loss": 0.0013, "step": 291320 }, { "epoch": 3.1126662749078475, "grad_norm": 0.08095210790634155, "learning_rate": 8.946709281425604e-07, "loss": 0.0231, "step": 291330 }, { "epoch": 3.1127731182221274, "grad_norm": 2.3131937980651855, "learning_rate": 8.946606129661708e-07, "loss": 0.0052, "step": 291340 }, { "epoch": 3.112879961536407, "grad_norm": 7.245918273925781, "learning_rate": 8.946502973441796e-07, "loss": 0.01, "step": 291350 }, { "epoch": 3.1129868048506864, "grad_norm": 0.05287664756178856, "learning_rate": 8.946399812765984e-07, "loss": 0.0351, "step": 291360 }, { "epoch": 3.1130936481649663, "grad_norm": 0.3268967866897583, "learning_rate": 8.946296647634387e-07, "loss": 0.0212, "step": 291370 }, { "epoch": 3.1132004914792457, "grad_norm": 0.038932915776968, "learning_rate": 8.946193478047125e-07, "loss": 0.0077, "step": 291380 }, { "epoch": 3.113307334793525, "grad_norm": 4.024435043334961, "learning_rate": 8.946090304004312e-07, "loss": 0.005, "step": 291390 }, { "epoch": 3.113414178107805, "grad_norm": 2.675130844116211, "learning_rate": 8.945987125506066e-07, "loss": 0.0011, "step": 291400 }, { "epoch": 3.1135210214220845, "grad_norm": 0.0036217374727129936, "learning_rate": 8.945883942552501e-07, "loss": 0.0129, "step": 291410 }, { "epoch": 3.113627864736364, "grad_norm": 2.499162197113037, "learning_rate": 8.945780755143737e-07, "loss": 0.0207, "step": 291420 }, { "epoch": 3.113734708050644, "grad_norm": 0.007256343029439449, "learning_rate": 8.945677563279888e-07, "loss": 0.0068, "step": 291430 }, { "epoch": 3.1138415513649234, "grad_norm": 0.06091804429888725, "learning_rate": 8.945574366961073e-07, "loss": 0.0219, "step": 291440 }, { "epoch": 3.113948394679203, "grad_norm": 0.26161783933639526, "learning_rate": 8.945471166187404e-07, "loss": 0.0141, "step": 291450 }, { "epoch": 3.1140552379934827, "grad_norm": 3.5340781211853027, "learning_rate": 8.945367960959003e-07, "loss": 0.0221, "step": 291460 }, { "epoch": 3.114162081307762, "grad_norm": 0.3374185264110565, "learning_rate": 8.945264751275981e-07, "loss": 0.0184, "step": 291470 }, { "epoch": 3.1142689246220416, "grad_norm": 2.8217403888702393, "learning_rate": 8.945161537138459e-07, "loss": 0.0309, "step": 291480 }, { "epoch": 3.1143757679363215, "grad_norm": 0.025182418525218964, "learning_rate": 8.945058318546551e-07, "loss": 0.0121, "step": 291490 }, { "epoch": 3.114482611250601, "grad_norm": 1.7532856464385986, "learning_rate": 8.944955095500375e-07, "loss": 0.0263, "step": 291500 }, { "epoch": 3.1145894545648805, "grad_norm": 0.05438223481178284, "learning_rate": 8.944851868000047e-07, "loss": 0.0048, "step": 291510 }, { "epoch": 3.1146962978791604, "grad_norm": 7.634440898895264, "learning_rate": 8.944748636045682e-07, "loss": 0.0024, "step": 291520 }, { "epoch": 3.11480314119344, "grad_norm": 0.7546830773353577, "learning_rate": 8.944645399637401e-07, "loss": 0.0024, "step": 291530 }, { "epoch": 3.1149099845077193, "grad_norm": 0.011482198722660542, "learning_rate": 8.944542158775315e-07, "loss": 0.0077, "step": 291540 }, { "epoch": 3.115016827821999, "grad_norm": 0.33919501304626465, "learning_rate": 8.944438913459544e-07, "loss": 0.0025, "step": 291550 }, { "epoch": 3.1151236711362786, "grad_norm": 1.656197190284729, "learning_rate": 8.944335663690206e-07, "loss": 0.002, "step": 291560 }, { "epoch": 3.115230514450558, "grad_norm": 0.6318984627723694, "learning_rate": 8.944232409467412e-07, "loss": 0.018, "step": 291570 }, { "epoch": 3.115337357764838, "grad_norm": 0.9025794863700867, "learning_rate": 8.944129150791285e-07, "loss": 0.0031, "step": 291580 }, { "epoch": 3.1154442010791175, "grad_norm": 0.09996262937784195, "learning_rate": 8.944025887661934e-07, "loss": 0.0196, "step": 291590 }, { "epoch": 3.115551044393397, "grad_norm": 3.3291661739349365, "learning_rate": 8.943922620079485e-07, "loss": 0.012, "step": 291600 }, { "epoch": 3.115657887707677, "grad_norm": 0.005546253640204668, "learning_rate": 8.943819348044046e-07, "loss": 0.041, "step": 291610 }, { "epoch": 3.1157647310219563, "grad_norm": 0.002452854998409748, "learning_rate": 8.94371607155574e-07, "loss": 0.0108, "step": 291620 }, { "epoch": 3.1158715743362357, "grad_norm": 4.634953498840332, "learning_rate": 8.943612790614679e-07, "loss": 0.0057, "step": 291630 }, { "epoch": 3.1159784176505156, "grad_norm": 1.3008216619491577, "learning_rate": 8.943509505220983e-07, "loss": 0.0081, "step": 291640 }, { "epoch": 3.116085260964795, "grad_norm": 0.004338080529123545, "learning_rate": 8.943406215374767e-07, "loss": 0.005, "step": 291650 }, { "epoch": 3.1161921042790746, "grad_norm": 0.09112589061260223, "learning_rate": 8.943302921076145e-07, "loss": 0.0136, "step": 291660 }, { "epoch": 3.1162989475933545, "grad_norm": 0.02739926055073738, "learning_rate": 8.943199622325239e-07, "loss": 0.011, "step": 291670 }, { "epoch": 3.116405790907634, "grad_norm": 12.446465492248535, "learning_rate": 8.943096319122163e-07, "loss": 0.0208, "step": 291680 }, { "epoch": 3.1165126342219134, "grad_norm": 1.3698135614395142, "learning_rate": 8.942993011467034e-07, "loss": 0.0293, "step": 291690 }, { "epoch": 3.1166194775361933, "grad_norm": 0.4577450454235077, "learning_rate": 8.942889699359968e-07, "loss": 0.0161, "step": 291700 }, { "epoch": 3.1167263208504727, "grad_norm": 1.2938220500946045, "learning_rate": 8.94278638280108e-07, "loss": 0.0069, "step": 291710 }, { "epoch": 3.116833164164752, "grad_norm": 0.007239334750920534, "learning_rate": 8.942683061790491e-07, "loss": 0.0008, "step": 291720 }, { "epoch": 3.116940007479032, "grad_norm": 3.7444353103637695, "learning_rate": 8.942579736328315e-07, "loss": 0.0601, "step": 291730 }, { "epoch": 3.1170468507933116, "grad_norm": 0.022403856739401817, "learning_rate": 8.942476406414667e-07, "loss": 0.0012, "step": 291740 }, { "epoch": 3.117153694107591, "grad_norm": 5.752355575561523, "learning_rate": 8.942373072049667e-07, "loss": 0.0062, "step": 291750 }, { "epoch": 3.117260537421871, "grad_norm": 1.461930751800537, "learning_rate": 8.942269733233431e-07, "loss": 0.005, "step": 291760 }, { "epoch": 3.1173673807361504, "grad_norm": 0.005308732856065035, "learning_rate": 8.942166389966075e-07, "loss": 0.0471, "step": 291770 }, { "epoch": 3.11747422405043, "grad_norm": 0.48959511518478394, "learning_rate": 8.942063042247714e-07, "loss": 0.0398, "step": 291780 }, { "epoch": 3.1175810673647097, "grad_norm": 4.433967590332031, "learning_rate": 8.941959690078466e-07, "loss": 0.018, "step": 291790 }, { "epoch": 3.117687910678989, "grad_norm": 0.08470825105905533, "learning_rate": 8.94185633345845e-07, "loss": 0.0128, "step": 291800 }, { "epoch": 3.1177947539932687, "grad_norm": 0.021186664700508118, "learning_rate": 8.94175297238778e-07, "loss": 0.005, "step": 291810 }, { "epoch": 3.1179015973075486, "grad_norm": 0.5582250952720642, "learning_rate": 8.941649606866572e-07, "loss": 0.0348, "step": 291820 }, { "epoch": 3.118008440621828, "grad_norm": 0.09922780841588974, "learning_rate": 8.941546236894945e-07, "loss": 0.0516, "step": 291830 }, { "epoch": 3.1181152839361075, "grad_norm": 0.00372793385758996, "learning_rate": 8.941442862473015e-07, "loss": 0.0008, "step": 291840 }, { "epoch": 3.1182221272503874, "grad_norm": 0.020878518000245094, "learning_rate": 8.941339483600899e-07, "loss": 0.0094, "step": 291850 }, { "epoch": 3.118328970564667, "grad_norm": 1.1531908512115479, "learning_rate": 8.941236100278713e-07, "loss": 0.0119, "step": 291860 }, { "epoch": 3.1184358138789463, "grad_norm": 0.23230527341365814, "learning_rate": 8.941132712506574e-07, "loss": 0.025, "step": 291870 }, { "epoch": 3.118542657193226, "grad_norm": 8.075288772583008, "learning_rate": 8.941029320284599e-07, "loss": 0.009, "step": 291880 }, { "epoch": 3.1186495005075057, "grad_norm": 1.3648937940597534, "learning_rate": 8.940925923612903e-07, "loss": 0.049, "step": 291890 }, { "epoch": 3.1187563438217856, "grad_norm": 1.1372456550598145, "learning_rate": 8.940822522491606e-07, "loss": 0.0017, "step": 291900 }, { "epoch": 3.118863187136065, "grad_norm": 7.8469061851501465, "learning_rate": 8.94071911692082e-07, "loss": 0.0284, "step": 291910 }, { "epoch": 3.1189700304503445, "grad_norm": 1.000313401222229, "learning_rate": 8.940615706900668e-07, "loss": 0.0157, "step": 291920 }, { "epoch": 3.1190768737646244, "grad_norm": 0.012802892364561558, "learning_rate": 8.940512292431262e-07, "loss": 0.0219, "step": 291930 }, { "epoch": 3.119183717078904, "grad_norm": 0.36819082498550415, "learning_rate": 8.940408873512721e-07, "loss": 0.008, "step": 291940 }, { "epoch": 3.1192905603931833, "grad_norm": 1.1392837762832642, "learning_rate": 8.94030545014516e-07, "loss": 0.0294, "step": 291950 }, { "epoch": 3.119397403707463, "grad_norm": 0.023887090384960175, "learning_rate": 8.940202022328697e-07, "loss": 0.0184, "step": 291960 }, { "epoch": 3.1195042470217427, "grad_norm": 0.006647028028964996, "learning_rate": 8.940098590063449e-07, "loss": 0.0103, "step": 291970 }, { "epoch": 3.119611090336022, "grad_norm": 0.008960659615695477, "learning_rate": 8.939995153349532e-07, "loss": 0.0096, "step": 291980 }, { "epoch": 3.119717933650302, "grad_norm": 1.9749902486801147, "learning_rate": 8.939891712187063e-07, "loss": 0.0108, "step": 291990 }, { "epoch": 3.1198247769645815, "grad_norm": 0.19716739654541016, "learning_rate": 8.93978826657616e-07, "loss": 0.0057, "step": 292000 }, { "epoch": 3.119931620278861, "grad_norm": 3.1990456581115723, "learning_rate": 8.939684816516936e-07, "loss": 0.0101, "step": 292010 }, { "epoch": 3.120038463593141, "grad_norm": 0.012124178931117058, "learning_rate": 8.939581362009514e-07, "loss": 0.0345, "step": 292020 }, { "epoch": 3.1201453069074203, "grad_norm": 2.639033555984497, "learning_rate": 8.939477903054004e-07, "loss": 0.007, "step": 292030 }, { "epoch": 3.1202521502216998, "grad_norm": 0.006519745569676161, "learning_rate": 8.939374439650529e-07, "loss": 0.0126, "step": 292040 }, { "epoch": 3.1203589935359797, "grad_norm": 0.8225006461143494, "learning_rate": 8.939270971799202e-07, "loss": 0.0098, "step": 292050 }, { "epoch": 3.120465836850259, "grad_norm": 2.3315296173095703, "learning_rate": 8.93916749950014e-07, "loss": 0.0068, "step": 292060 }, { "epoch": 3.1205726801645386, "grad_norm": 0.0099024074152112, "learning_rate": 8.939064022753462e-07, "loss": 0.0028, "step": 292070 }, { "epoch": 3.1206795234788185, "grad_norm": 0.04318564385175705, "learning_rate": 8.938960541559282e-07, "loss": 0.0257, "step": 292080 }, { "epoch": 3.120786366793098, "grad_norm": 1.6937214136123657, "learning_rate": 8.938857055917718e-07, "loss": 0.0709, "step": 292090 }, { "epoch": 3.1208932101073774, "grad_norm": 0.0020763895008713007, "learning_rate": 8.938753565828888e-07, "loss": 0.0278, "step": 292100 }, { "epoch": 3.1210000534216573, "grad_norm": 0.017592037096619606, "learning_rate": 8.938650071292908e-07, "loss": 0.0017, "step": 292110 }, { "epoch": 3.1211068967359368, "grad_norm": 0.3107459545135498, "learning_rate": 8.938546572309894e-07, "loss": 0.0148, "step": 292120 }, { "epoch": 3.1212137400502162, "grad_norm": 0.03533507138490677, "learning_rate": 8.938443068879964e-07, "loss": 0.006, "step": 292130 }, { "epoch": 3.121320583364496, "grad_norm": 0.11316752433776855, "learning_rate": 8.938339561003234e-07, "loss": 0.0111, "step": 292140 }, { "epoch": 3.1214274266787756, "grad_norm": 0.8716039657592773, "learning_rate": 8.938236048679824e-07, "loss": 0.0051, "step": 292150 }, { "epoch": 3.121534269993055, "grad_norm": 2.5606212615966797, "learning_rate": 8.938132531909846e-07, "loss": 0.024, "step": 292160 }, { "epoch": 3.121641113307335, "grad_norm": 0.005273580085486174, "learning_rate": 8.93802901069342e-07, "loss": 0.0093, "step": 292170 }, { "epoch": 3.1217479566216144, "grad_norm": 0.002402434591203928, "learning_rate": 8.937925485030661e-07, "loss": 0.0079, "step": 292180 }, { "epoch": 3.121854799935894, "grad_norm": 5.148256301879883, "learning_rate": 8.937821954921687e-07, "loss": 0.0474, "step": 292190 }, { "epoch": 3.121961643250174, "grad_norm": 4.666153907775879, "learning_rate": 8.937718420366615e-07, "loss": 0.0141, "step": 292200 }, { "epoch": 3.1220684865644532, "grad_norm": 1.9600127935409546, "learning_rate": 8.937614881365563e-07, "loss": 0.0011, "step": 292210 }, { "epoch": 3.1221753298787327, "grad_norm": 3.433729887008667, "learning_rate": 8.937511337918645e-07, "loss": 0.013, "step": 292220 }, { "epoch": 3.1222821731930126, "grad_norm": 0.1411525458097458, "learning_rate": 8.93740779002598e-07, "loss": 0.0083, "step": 292230 }, { "epoch": 3.122389016507292, "grad_norm": 4.805629253387451, "learning_rate": 8.937304237687685e-07, "loss": 0.0192, "step": 292240 }, { "epoch": 3.1224958598215715, "grad_norm": 1.5639622211456299, "learning_rate": 8.937200680903876e-07, "loss": 0.026, "step": 292250 }, { "epoch": 3.1226027031358514, "grad_norm": 0.09754890203475952, "learning_rate": 8.93709711967467e-07, "loss": 0.0021, "step": 292260 }, { "epoch": 3.122709546450131, "grad_norm": 4.995089054107666, "learning_rate": 8.936993554000185e-07, "loss": 0.0236, "step": 292270 }, { "epoch": 3.1228163897644103, "grad_norm": 0.005247778259217739, "learning_rate": 8.936889983880536e-07, "loss": 0.0065, "step": 292280 }, { "epoch": 3.1229232330786902, "grad_norm": 6.785050392150879, "learning_rate": 8.936786409315842e-07, "loss": 0.0253, "step": 292290 }, { "epoch": 3.1230300763929697, "grad_norm": 0.6057172417640686, "learning_rate": 8.936682830306219e-07, "loss": 0.007, "step": 292300 }, { "epoch": 3.123136919707249, "grad_norm": 2.6249537467956543, "learning_rate": 8.936579246851784e-07, "loss": 0.0127, "step": 292310 }, { "epoch": 3.123243763021529, "grad_norm": 0.06090352311730385, "learning_rate": 8.936475658952652e-07, "loss": 0.0292, "step": 292320 }, { "epoch": 3.1233506063358085, "grad_norm": 4.497857093811035, "learning_rate": 8.936372066608944e-07, "loss": 0.0179, "step": 292330 }, { "epoch": 3.123457449650088, "grad_norm": 0.03829653933644295, "learning_rate": 8.936268469820774e-07, "loss": 0.0051, "step": 292340 }, { "epoch": 3.123564292964368, "grad_norm": 8.233136177062988, "learning_rate": 8.936164868588261e-07, "loss": 0.0117, "step": 292350 }, { "epoch": 3.1236711362786473, "grad_norm": 1.2897018194198608, "learning_rate": 8.936061262911518e-07, "loss": 0.0126, "step": 292360 }, { "epoch": 3.123777979592927, "grad_norm": 0.05541149899363518, "learning_rate": 8.935957652790669e-07, "loss": 0.0172, "step": 292370 }, { "epoch": 3.1238848229072067, "grad_norm": 2.7690412998199463, "learning_rate": 8.935854038225823e-07, "loss": 0.0022, "step": 292380 }, { "epoch": 3.123991666221486, "grad_norm": 2.331223249435425, "learning_rate": 8.935750419217103e-07, "loss": 0.0284, "step": 292390 }, { "epoch": 3.1240985095357656, "grad_norm": 0.005534003954380751, "learning_rate": 8.935646795764622e-07, "loss": 0.0175, "step": 292400 }, { "epoch": 3.1242053528500455, "grad_norm": 0.05865248292684555, "learning_rate": 8.9355431678685e-07, "loss": 0.009, "step": 292410 }, { "epoch": 3.124312196164325, "grad_norm": 0.004940271843224764, "learning_rate": 8.935439535528853e-07, "loss": 0.0161, "step": 292420 }, { "epoch": 3.1244190394786044, "grad_norm": 0.09107404202222824, "learning_rate": 8.935335898745796e-07, "loss": 0.0093, "step": 292430 }, { "epoch": 3.1245258827928843, "grad_norm": 0.04915958270430565, "learning_rate": 8.935232257519449e-07, "loss": 0.0115, "step": 292440 }, { "epoch": 3.124632726107164, "grad_norm": 0.8145080804824829, "learning_rate": 8.935128611849927e-07, "loss": 0.0664, "step": 292450 }, { "epoch": 3.1247395694214433, "grad_norm": 0.5335443019866943, "learning_rate": 8.93502496173735e-07, "loss": 0.0154, "step": 292460 }, { "epoch": 3.124846412735723, "grad_norm": 0.02431928552687168, "learning_rate": 8.934921307181831e-07, "loss": 0.0024, "step": 292470 }, { "epoch": 3.1249532560500026, "grad_norm": 0.01164420135319233, "learning_rate": 8.934817648183488e-07, "loss": 0.0006, "step": 292480 }, { "epoch": 3.125060099364282, "grad_norm": 1.9847040176391602, "learning_rate": 8.93471398474244e-07, "loss": 0.0337, "step": 292490 }, { "epoch": 3.125166942678562, "grad_norm": 0.010133499279618263, "learning_rate": 8.934610316858804e-07, "loss": 0.0036, "step": 292500 }, { "epoch": 3.1252737859928414, "grad_norm": 7.487995624542236, "learning_rate": 8.934506644532694e-07, "loss": 0.0403, "step": 292510 }, { "epoch": 3.125380629307121, "grad_norm": 0.011433923617005348, "learning_rate": 8.93440296776423e-07, "loss": 0.0077, "step": 292520 }, { "epoch": 3.125487472621401, "grad_norm": 3.031303882598877, "learning_rate": 8.934299286553528e-07, "loss": 0.0239, "step": 292530 }, { "epoch": 3.1255943159356803, "grad_norm": 0.07055051624774933, "learning_rate": 8.934195600900705e-07, "loss": 0.0028, "step": 292540 }, { "epoch": 3.1257011592499597, "grad_norm": 4.783420085906982, "learning_rate": 8.934091910805879e-07, "loss": 0.0155, "step": 292550 }, { "epoch": 3.1258080025642396, "grad_norm": 0.027974380180239677, "learning_rate": 8.933988216269166e-07, "loss": 0.0021, "step": 292560 }, { "epoch": 3.125914845878519, "grad_norm": 0.0018536816351115704, "learning_rate": 8.933884517290684e-07, "loss": 0.0034, "step": 292570 }, { "epoch": 3.1260216891927985, "grad_norm": 0.08783231675624847, "learning_rate": 8.933780813870549e-07, "loss": 0.0058, "step": 292580 }, { "epoch": 3.1261285325070785, "grad_norm": 0.020534826442599297, "learning_rate": 8.933677106008877e-07, "loss": 0.0441, "step": 292590 }, { "epoch": 3.126235375821358, "grad_norm": 1.3031896352767944, "learning_rate": 8.933573393705789e-07, "loss": 0.0384, "step": 292600 }, { "epoch": 3.126342219135638, "grad_norm": 0.1392466425895691, "learning_rate": 8.933469676961398e-07, "loss": 0.0064, "step": 292610 }, { "epoch": 3.1264490624499173, "grad_norm": 0.07212315499782562, "learning_rate": 8.933365955775823e-07, "loss": 0.0558, "step": 292620 }, { "epoch": 3.1265559057641967, "grad_norm": 0.06535202264785767, "learning_rate": 8.933262230149181e-07, "loss": 0.0116, "step": 292630 }, { "epoch": 3.126662749078476, "grad_norm": 0.0032982644625008106, "learning_rate": 8.93315850008159e-07, "loss": 0.0501, "step": 292640 }, { "epoch": 3.126769592392756, "grad_norm": 0.02581280656158924, "learning_rate": 8.933054765573167e-07, "loss": 0.0294, "step": 292650 }, { "epoch": 3.1268764357070356, "grad_norm": 2.8780035972595215, "learning_rate": 8.932951026624026e-07, "loss": 0.0123, "step": 292660 }, { "epoch": 3.1269832790213155, "grad_norm": 0.009678002446889877, "learning_rate": 8.932847283234288e-07, "loss": 0.0038, "step": 292670 }, { "epoch": 3.127090122335595, "grad_norm": 0.21836940944194794, "learning_rate": 8.932743535404068e-07, "loss": 0.0187, "step": 292680 }, { "epoch": 3.1271969656498744, "grad_norm": 0.5386033654212952, "learning_rate": 8.932639783133485e-07, "loss": 0.0029, "step": 292690 }, { "epoch": 3.1273038089641543, "grad_norm": 2.1529293060302734, "learning_rate": 8.932536026422652e-07, "loss": 0.0044, "step": 292700 }, { "epoch": 3.1274106522784337, "grad_norm": 0.0779857411980629, "learning_rate": 8.932432265271692e-07, "loss": 0.0217, "step": 292710 }, { "epoch": 3.127517495592713, "grad_norm": 10.976751327514648, "learning_rate": 8.932328499680716e-07, "loss": 0.0044, "step": 292720 }, { "epoch": 3.127624338906993, "grad_norm": 0.5344165563583374, "learning_rate": 8.932224729649849e-07, "loss": 0.0156, "step": 292730 }, { "epoch": 3.1277311822212726, "grad_norm": 0.09023413807153702, "learning_rate": 8.9321209551792e-07, "loss": 0.0167, "step": 292740 }, { "epoch": 3.127838025535552, "grad_norm": 0.04074104502797127, "learning_rate": 8.932017176268891e-07, "loss": 0.0121, "step": 292750 }, { "epoch": 3.127944868849832, "grad_norm": 0.04992997646331787, "learning_rate": 8.931913392919038e-07, "loss": 0.0038, "step": 292760 }, { "epoch": 3.1280517121641114, "grad_norm": 0.18897131085395813, "learning_rate": 8.931809605129757e-07, "loss": 0.0018, "step": 292770 }, { "epoch": 3.128158555478391, "grad_norm": 4.543428897857666, "learning_rate": 8.931705812901165e-07, "loss": 0.0059, "step": 292780 }, { "epoch": 3.1282653987926707, "grad_norm": 0.13300055265426636, "learning_rate": 8.931602016233381e-07, "loss": 0.0074, "step": 292790 }, { "epoch": 3.12837224210695, "grad_norm": 0.5385547280311584, "learning_rate": 8.931498215126524e-07, "loss": 0.0165, "step": 292800 }, { "epoch": 3.1284790854212297, "grad_norm": 0.053159959614276886, "learning_rate": 8.931394409580708e-07, "loss": 0.0291, "step": 292810 }, { "epoch": 3.1285859287355096, "grad_norm": 2.1192612648010254, "learning_rate": 8.931290599596049e-07, "loss": 0.007, "step": 292820 }, { "epoch": 3.128692772049789, "grad_norm": 1.0512646436691284, "learning_rate": 8.931186785172669e-07, "loss": 0.0333, "step": 292830 }, { "epoch": 3.1287996153640685, "grad_norm": 0.1811729073524475, "learning_rate": 8.93108296631068e-07, "loss": 0.0101, "step": 292840 }, { "epoch": 3.1289064586783484, "grad_norm": 0.0023867180570960045, "learning_rate": 8.930979143010202e-07, "loss": 0.0081, "step": 292850 }, { "epoch": 3.129013301992628, "grad_norm": 11.81506633758545, "learning_rate": 8.930875315271353e-07, "loss": 0.0398, "step": 292860 }, { "epoch": 3.1291201453069073, "grad_norm": 0.040582045912742615, "learning_rate": 8.930771483094249e-07, "loss": 0.0134, "step": 292870 }, { "epoch": 3.129226988621187, "grad_norm": 0.2513819634914398, "learning_rate": 8.930667646479007e-07, "loss": 0.02, "step": 292880 }, { "epoch": 3.1293338319354667, "grad_norm": 0.20265991985797882, "learning_rate": 8.930563805425744e-07, "loss": 0.003, "step": 292890 }, { "epoch": 3.129440675249746, "grad_norm": 0.13879452645778656, "learning_rate": 8.930459959934579e-07, "loss": 0.0011, "step": 292900 }, { "epoch": 3.129547518564026, "grad_norm": 0.11294293403625488, "learning_rate": 8.930356110005627e-07, "loss": 0.0033, "step": 292910 }, { "epoch": 3.1296543618783055, "grad_norm": 0.0020918166264891624, "learning_rate": 8.930252255639006e-07, "loss": 0.0185, "step": 292920 }, { "epoch": 3.129761205192585, "grad_norm": 2.7725346088409424, "learning_rate": 8.930148396834835e-07, "loss": 0.0145, "step": 292930 }, { "epoch": 3.129868048506865, "grad_norm": 0.02117268182337284, "learning_rate": 8.930044533593228e-07, "loss": 0.0182, "step": 292940 }, { "epoch": 3.1299748918211443, "grad_norm": 0.024518078193068504, "learning_rate": 8.929940665914306e-07, "loss": 0.0306, "step": 292950 }, { "epoch": 3.1300817351354238, "grad_norm": 5.004830360412598, "learning_rate": 8.929836793798183e-07, "loss": 0.024, "step": 292960 }, { "epoch": 3.1301885784497037, "grad_norm": 0.013183907605707645, "learning_rate": 8.929732917244978e-07, "loss": 0.0041, "step": 292970 }, { "epoch": 3.130295421763983, "grad_norm": 0.011600000783801079, "learning_rate": 8.929629036254809e-07, "loss": 0.0053, "step": 292980 }, { "epoch": 3.1304022650782626, "grad_norm": 0.3258976638317108, "learning_rate": 8.929525150827791e-07, "loss": 0.0367, "step": 292990 }, { "epoch": 3.1305091083925425, "grad_norm": 0.01969987340271473, "learning_rate": 8.929421260964043e-07, "loss": 0.0265, "step": 293000 }, { "epoch": 3.130615951706822, "grad_norm": 0.29433003067970276, "learning_rate": 8.929317366663682e-07, "loss": 0.0056, "step": 293010 }, { "epoch": 3.1307227950211014, "grad_norm": 10.60433292388916, "learning_rate": 8.929213467926825e-07, "loss": 0.0413, "step": 293020 }, { "epoch": 3.1308296383353813, "grad_norm": 0.0061609018594026566, "learning_rate": 8.92910956475359e-07, "loss": 0.0152, "step": 293030 }, { "epoch": 3.1309364816496608, "grad_norm": 1.1162246465682983, "learning_rate": 8.929005657144093e-07, "loss": 0.0074, "step": 293040 }, { "epoch": 3.13104332496394, "grad_norm": 0.21274083852767944, "learning_rate": 8.928901745098451e-07, "loss": 0.0138, "step": 293050 }, { "epoch": 3.13115016827822, "grad_norm": 4.396613597869873, "learning_rate": 8.928797828616783e-07, "loss": 0.0199, "step": 293060 }, { "epoch": 3.1312570115924996, "grad_norm": 0.0024505462497472763, "learning_rate": 8.928693907699207e-07, "loss": 0.0016, "step": 293070 }, { "epoch": 3.131363854906779, "grad_norm": 5.596904754638672, "learning_rate": 8.928589982345837e-07, "loss": 0.0103, "step": 293080 }, { "epoch": 3.131470698221059, "grad_norm": 0.3130602240562439, "learning_rate": 8.928486052556795e-07, "loss": 0.0284, "step": 293090 }, { "epoch": 3.1315775415353384, "grad_norm": 5.43716287612915, "learning_rate": 8.928382118332195e-07, "loss": 0.0187, "step": 293100 }, { "epoch": 3.131684384849618, "grad_norm": 0.020462414249777794, "learning_rate": 8.928278179672155e-07, "loss": 0.0101, "step": 293110 }, { "epoch": 3.1317912281638978, "grad_norm": 0.1706189513206482, "learning_rate": 8.928174236576791e-07, "loss": 0.007, "step": 293120 }, { "epoch": 3.1318980714781772, "grad_norm": 0.08563800156116486, "learning_rate": 8.928070289046223e-07, "loss": 0.0099, "step": 293130 }, { "epoch": 3.1320049147924567, "grad_norm": 5.531103134155273, "learning_rate": 8.927966337080568e-07, "loss": 0.029, "step": 293140 }, { "epoch": 3.1321117581067366, "grad_norm": 0.0012485700426623225, "learning_rate": 8.927862380679941e-07, "loss": 0.0078, "step": 293150 }, { "epoch": 3.132218601421016, "grad_norm": 9.697972297668457, "learning_rate": 8.927758419844462e-07, "loss": 0.0009, "step": 293160 }, { "epoch": 3.1323254447352955, "grad_norm": 1.3754351139068604, "learning_rate": 8.927654454574248e-07, "loss": 0.0288, "step": 293170 }, { "epoch": 3.1324322880495754, "grad_norm": 0.10766052454710007, "learning_rate": 8.927550484869413e-07, "loss": 0.0094, "step": 293180 }, { "epoch": 3.132539131363855, "grad_norm": 0.012842809781432152, "learning_rate": 8.927446510730079e-07, "loss": 0.0072, "step": 293190 }, { "epoch": 3.1326459746781343, "grad_norm": 5.174279689788818, "learning_rate": 8.927342532156362e-07, "loss": 0.0285, "step": 293200 }, { "epoch": 3.1327528179924142, "grad_norm": 0.14059634506702423, "learning_rate": 8.927238549148377e-07, "loss": 0.0026, "step": 293210 }, { "epoch": 3.1328596613066937, "grad_norm": 3.0066397190093994, "learning_rate": 8.927134561706245e-07, "loss": 0.0044, "step": 293220 }, { "epoch": 3.132966504620973, "grad_norm": 0.047655899077653885, "learning_rate": 8.92703056983008e-07, "loss": 0.0037, "step": 293230 }, { "epoch": 3.133073347935253, "grad_norm": 0.0006249983562156558, "learning_rate": 8.926926573520002e-07, "loss": 0.0082, "step": 293240 }, { "epoch": 3.1331801912495325, "grad_norm": 0.016959622502326965, "learning_rate": 8.926822572776127e-07, "loss": 0.0492, "step": 293250 }, { "epoch": 3.133287034563812, "grad_norm": 3.435138702392578, "learning_rate": 8.926718567598574e-07, "loss": 0.0386, "step": 293260 }, { "epoch": 3.133393877878092, "grad_norm": 8.039435386657715, "learning_rate": 8.926614557987458e-07, "loss": 0.0207, "step": 293270 }, { "epoch": 3.1335007211923713, "grad_norm": 0.008831524290144444, "learning_rate": 8.9265105439429e-07, "loss": 0.0064, "step": 293280 }, { "epoch": 3.133607564506651, "grad_norm": 0.010976847261190414, "learning_rate": 8.926406525465013e-07, "loss": 0.0109, "step": 293290 }, { "epoch": 3.1337144078209307, "grad_norm": 0.006831159349530935, "learning_rate": 8.926302502553917e-07, "loss": 0.0164, "step": 293300 }, { "epoch": 3.13382125113521, "grad_norm": 0.16902460157871246, "learning_rate": 8.92619847520973e-07, "loss": 0.0071, "step": 293310 }, { "epoch": 3.13392809444949, "grad_norm": 1.1360257863998413, "learning_rate": 8.926094443432568e-07, "loss": 0.0026, "step": 293320 }, { "epoch": 3.1340349377637695, "grad_norm": 0.05257898569107056, "learning_rate": 8.92599040722255e-07, "loss": 0.0015, "step": 293330 }, { "epoch": 3.134141781078049, "grad_norm": 0.06330487132072449, "learning_rate": 8.925886366579792e-07, "loss": 0.0044, "step": 293340 }, { "epoch": 3.1342486243923284, "grad_norm": 0.005671756342053413, "learning_rate": 8.925782321504412e-07, "loss": 0.0328, "step": 293350 }, { "epoch": 3.1343554677066083, "grad_norm": 12.929462432861328, "learning_rate": 8.925678271996528e-07, "loss": 0.0252, "step": 293360 }, { "epoch": 3.134462311020888, "grad_norm": 3.591740369796753, "learning_rate": 8.925574218056256e-07, "loss": 0.0096, "step": 293370 }, { "epoch": 3.1345691543351677, "grad_norm": 0.043314605951309204, "learning_rate": 8.925470159683715e-07, "loss": 0.0409, "step": 293380 }, { "epoch": 3.134675997649447, "grad_norm": 6.295755386352539, "learning_rate": 8.925366096879021e-07, "loss": 0.0156, "step": 293390 }, { "epoch": 3.1347828409637266, "grad_norm": 0.009148801676928997, "learning_rate": 8.925262029642293e-07, "loss": 0.0226, "step": 293400 }, { "epoch": 3.134889684278006, "grad_norm": 0.18112976849079132, "learning_rate": 8.925157957973648e-07, "loss": 0.0005, "step": 293410 }, { "epoch": 3.134996527592286, "grad_norm": 0.03437769040465355, "learning_rate": 8.925053881873204e-07, "loss": 0.041, "step": 293420 }, { "epoch": 3.1351033709065654, "grad_norm": 0.0854749009013176, "learning_rate": 8.924949801341077e-07, "loss": 0.0035, "step": 293430 }, { "epoch": 3.1352102142208453, "grad_norm": 0.025006119161844254, "learning_rate": 8.924845716377386e-07, "loss": 0.0305, "step": 293440 }, { "epoch": 3.135317057535125, "grad_norm": 0.013777045533061028, "learning_rate": 8.924741626982248e-07, "loss": 0.0029, "step": 293450 }, { "epoch": 3.1354239008494043, "grad_norm": 0.5552327632904053, "learning_rate": 8.92463753315578e-07, "loss": 0.0039, "step": 293460 }, { "epoch": 3.135530744163684, "grad_norm": 4.61231803894043, "learning_rate": 8.9245334348981e-07, "loss": 0.015, "step": 293470 }, { "epoch": 3.1356375874779636, "grad_norm": 0.10594826936721802, "learning_rate": 8.924429332209327e-07, "loss": 0.0029, "step": 293480 }, { "epoch": 3.135744430792243, "grad_norm": 3.7480084896087646, "learning_rate": 8.924325225089575e-07, "loss": 0.0078, "step": 293490 }, { "epoch": 3.135851274106523, "grad_norm": 2.447190523147583, "learning_rate": 8.924221113538966e-07, "loss": 0.0195, "step": 293500 }, { "epoch": 3.1359581174208024, "grad_norm": 0.02641177736222744, "learning_rate": 8.924116997557612e-07, "loss": 0.0082, "step": 293510 }, { "epoch": 3.136064960735082, "grad_norm": 0.6115413904190063, "learning_rate": 8.924012877145636e-07, "loss": 0.0108, "step": 293520 }, { "epoch": 3.136171804049362, "grad_norm": 0.008898969739675522, "learning_rate": 8.923908752303153e-07, "loss": 0.0363, "step": 293530 }, { "epoch": 3.1362786473636413, "grad_norm": 3.443429470062256, "learning_rate": 8.92380462303028e-07, "loss": 0.0106, "step": 293540 }, { "epoch": 3.1363854906779207, "grad_norm": 0.8264336585998535, "learning_rate": 8.923700489327136e-07, "loss": 0.0093, "step": 293550 }, { "epoch": 3.1364923339922006, "grad_norm": 1.8139910697937012, "learning_rate": 8.923596351193838e-07, "loss": 0.0037, "step": 293560 }, { "epoch": 3.13659917730648, "grad_norm": 0.003170425770804286, "learning_rate": 8.923492208630504e-07, "loss": 0.0083, "step": 293570 }, { "epoch": 3.1367060206207595, "grad_norm": 3.902529239654541, "learning_rate": 8.923388061637252e-07, "loss": 0.0267, "step": 293580 }, { "epoch": 3.1368128639350394, "grad_norm": 0.0016257567331194878, "learning_rate": 8.923283910214196e-07, "loss": 0.0276, "step": 293590 }, { "epoch": 3.136919707249319, "grad_norm": 0.6738623380661011, "learning_rate": 8.923179754361458e-07, "loss": 0.0168, "step": 293600 }, { "epoch": 3.1370265505635984, "grad_norm": 0.010940908454358578, "learning_rate": 8.923075594079155e-07, "loss": 0.002, "step": 293610 }, { "epoch": 3.1371333938778783, "grad_norm": 0.9161951541900635, "learning_rate": 8.922971429367403e-07, "loss": 0.0015, "step": 293620 }, { "epoch": 3.1372402371921577, "grad_norm": 0.044623225927352905, "learning_rate": 8.922867260226319e-07, "loss": 0.0041, "step": 293630 }, { "epoch": 3.137347080506437, "grad_norm": 0.1840122938156128, "learning_rate": 8.922763086656022e-07, "loss": 0.0156, "step": 293640 }, { "epoch": 3.137453923820717, "grad_norm": 0.45643991231918335, "learning_rate": 8.922658908656631e-07, "loss": 0.0006, "step": 293650 }, { "epoch": 3.1375607671349965, "grad_norm": 0.07044228166341782, "learning_rate": 8.922554726228259e-07, "loss": 0.0178, "step": 293660 }, { "epoch": 3.137667610449276, "grad_norm": 0.09029608219861984, "learning_rate": 8.92245053937103e-07, "loss": 0.0235, "step": 293670 }, { "epoch": 3.137774453763556, "grad_norm": 0.0029910574667155743, "learning_rate": 8.922346348085057e-07, "loss": 0.0452, "step": 293680 }, { "epoch": 3.1378812970778354, "grad_norm": 1.4163920879364014, "learning_rate": 8.922242152370458e-07, "loss": 0.0055, "step": 293690 }, { "epoch": 3.137988140392115, "grad_norm": 0.018107105046510696, "learning_rate": 8.922137952227353e-07, "loss": 0.0089, "step": 293700 }, { "epoch": 3.1380949837063947, "grad_norm": 0.014623202383518219, "learning_rate": 8.922033747655857e-07, "loss": 0.0012, "step": 293710 }, { "epoch": 3.138201827020674, "grad_norm": 4.295476913452148, "learning_rate": 8.92192953865609e-07, "loss": 0.0086, "step": 293720 }, { "epoch": 3.1383086703349536, "grad_norm": 0.22772014141082764, "learning_rate": 8.921825325228169e-07, "loss": 0.0032, "step": 293730 }, { "epoch": 3.1384155136492335, "grad_norm": 0.7206023335456848, "learning_rate": 8.92172110737221e-07, "loss": 0.0092, "step": 293740 }, { "epoch": 3.138522356963513, "grad_norm": 0.010426721535623074, "learning_rate": 8.921616885088332e-07, "loss": 0.0155, "step": 293750 }, { "epoch": 3.1386292002777925, "grad_norm": 3.60090970993042, "learning_rate": 8.921512658376653e-07, "loss": 0.0105, "step": 293760 }, { "epoch": 3.1387360435920724, "grad_norm": 0.045137371867895126, "learning_rate": 8.92140842723729e-07, "loss": 0.0073, "step": 293770 }, { "epoch": 3.138842886906352, "grad_norm": 4.099654674530029, "learning_rate": 8.92130419167036e-07, "loss": 0.0137, "step": 293780 }, { "epoch": 3.1389497302206313, "grad_norm": 0.5847029089927673, "learning_rate": 8.921199951675984e-07, "loss": 0.011, "step": 293790 }, { "epoch": 3.139056573534911, "grad_norm": 0.0231386236846447, "learning_rate": 8.921095707254276e-07, "loss": 0.0002, "step": 293800 }, { "epoch": 3.1391634168491906, "grad_norm": 7.159772872924805, "learning_rate": 8.920991458405353e-07, "loss": 0.0605, "step": 293810 }, { "epoch": 3.13927026016347, "grad_norm": 0.023974860087037086, "learning_rate": 8.920887205129337e-07, "loss": 0.0138, "step": 293820 }, { "epoch": 3.13937710347775, "grad_norm": 0.11973292380571365, "learning_rate": 8.920782947426344e-07, "loss": 0.0052, "step": 293830 }, { "epoch": 3.1394839467920295, "grad_norm": 18.281959533691406, "learning_rate": 8.92067868529649e-07, "loss": 0.0171, "step": 293840 }, { "epoch": 3.139590790106309, "grad_norm": 0.010188727639615536, "learning_rate": 8.920574418739894e-07, "loss": 0.0126, "step": 293850 }, { "epoch": 3.139697633420589, "grad_norm": 0.014901824295520782, "learning_rate": 8.920470147756673e-07, "loss": 0.0058, "step": 293860 }, { "epoch": 3.1398044767348683, "grad_norm": 0.21367467939853668, "learning_rate": 8.920365872346947e-07, "loss": 0.0331, "step": 293870 }, { "epoch": 3.1399113200491477, "grad_norm": 0.5202910900115967, "learning_rate": 8.92026159251083e-07, "loss": 0.0021, "step": 293880 }, { "epoch": 3.1400181633634277, "grad_norm": 0.007865375839173794, "learning_rate": 8.920157308248443e-07, "loss": 0.0041, "step": 293890 }, { "epoch": 3.140125006677707, "grad_norm": 4.498775959014893, "learning_rate": 8.920053019559902e-07, "loss": 0.0026, "step": 293900 }, { "epoch": 3.1402318499919866, "grad_norm": 0.04652932658791542, "learning_rate": 8.919948726445326e-07, "loss": 0.0197, "step": 293910 }, { "epoch": 3.1403386933062665, "grad_norm": 0.2515597641468048, "learning_rate": 8.919844428904832e-07, "loss": 0.0137, "step": 293920 }, { "epoch": 3.140445536620546, "grad_norm": 0.747157096862793, "learning_rate": 8.919740126938537e-07, "loss": 0.0334, "step": 293930 }, { "epoch": 3.1405523799348254, "grad_norm": 0.013101160526275635, "learning_rate": 8.919635820546561e-07, "loss": 0.0042, "step": 293940 }, { "epoch": 3.1406592232491053, "grad_norm": 3.034580945968628, "learning_rate": 8.919531509729018e-07, "loss": 0.0289, "step": 293950 }, { "epoch": 3.1407660665633848, "grad_norm": 0.04121922701597214, "learning_rate": 8.919427194486029e-07, "loss": 0.0057, "step": 293960 }, { "epoch": 3.140872909877664, "grad_norm": 3.124835968017578, "learning_rate": 8.919322874817712e-07, "loss": 0.0108, "step": 293970 }, { "epoch": 3.140979753191944, "grad_norm": 0.14504316449165344, "learning_rate": 8.919218550724182e-07, "loss": 0.0124, "step": 293980 }, { "epoch": 3.1410865965062236, "grad_norm": 6.0694451332092285, "learning_rate": 8.91911422220556e-07, "loss": 0.0184, "step": 293990 }, { "epoch": 3.141193439820503, "grad_norm": 0.029360543936491013, "learning_rate": 8.919009889261962e-07, "loss": 0.0088, "step": 294000 }, { "epoch": 3.141300283134783, "grad_norm": 0.3473817706108093, "learning_rate": 8.918905551893505e-07, "loss": 0.036, "step": 294010 }, { "epoch": 3.1414071264490624, "grad_norm": 0.03171202912926674, "learning_rate": 8.918801210100309e-07, "loss": 0.0263, "step": 294020 }, { "epoch": 3.141513969763342, "grad_norm": 0.49026551842689514, "learning_rate": 8.918696863882489e-07, "loss": 0.0025, "step": 294030 }, { "epoch": 3.1416208130776218, "grad_norm": 0.03566035255789757, "learning_rate": 8.918592513240167e-07, "loss": 0.0264, "step": 294040 }, { "epoch": 3.141727656391901, "grad_norm": 0.15541943907737732, "learning_rate": 8.918488158173456e-07, "loss": 0.009, "step": 294050 }, { "epoch": 3.1418344997061807, "grad_norm": 3.419985294342041, "learning_rate": 8.918383798682478e-07, "loss": 0.017, "step": 294060 }, { "epoch": 3.1419413430204606, "grad_norm": 0.01879841834306717, "learning_rate": 8.918279434767348e-07, "loss": 0.0121, "step": 294070 }, { "epoch": 3.14204818633474, "grad_norm": 1.9170584678649902, "learning_rate": 8.918175066428184e-07, "loss": 0.0248, "step": 294080 }, { "epoch": 3.14215502964902, "grad_norm": 1.487863302230835, "learning_rate": 8.918070693665106e-07, "loss": 0.018, "step": 294090 }, { "epoch": 3.1422618729632994, "grad_norm": 9.071137428283691, "learning_rate": 8.91796631647823e-07, "loss": 0.0105, "step": 294100 }, { "epoch": 3.142368716277579, "grad_norm": 4.478607654571533, "learning_rate": 8.917861934867675e-07, "loss": 0.0094, "step": 294110 }, { "epoch": 3.1424755595918583, "grad_norm": 2.2147936820983887, "learning_rate": 8.917757548833557e-07, "loss": 0.0158, "step": 294120 }, { "epoch": 3.142582402906138, "grad_norm": 11.058795928955078, "learning_rate": 8.917653158375995e-07, "loss": 0.0074, "step": 294130 }, { "epoch": 3.1426892462204177, "grad_norm": 0.7018547654151917, "learning_rate": 8.917548763495109e-07, "loss": 0.0492, "step": 294140 }, { "epoch": 3.1427960895346976, "grad_norm": 2.79292893409729, "learning_rate": 8.917444364191012e-07, "loss": 0.0303, "step": 294150 }, { "epoch": 3.142902932848977, "grad_norm": 1.459637999534607, "learning_rate": 8.917339960463825e-07, "loss": 0.0033, "step": 294160 }, { "epoch": 3.1430097761632565, "grad_norm": 0.018831491470336914, "learning_rate": 8.917235552313668e-07, "loss": 0.0154, "step": 294170 }, { "epoch": 3.1431166194775364, "grad_norm": 0.5228995680809021, "learning_rate": 8.917131139740654e-07, "loss": 0.0084, "step": 294180 }, { "epoch": 3.143223462791816, "grad_norm": 3.105557918548584, "learning_rate": 8.917026722744905e-07, "loss": 0.0071, "step": 294190 }, { "epoch": 3.1433303061060953, "grad_norm": 0.41758501529693604, "learning_rate": 8.916922301326536e-07, "loss": 0.0024, "step": 294200 }, { "epoch": 3.143437149420375, "grad_norm": 0.13217155635356903, "learning_rate": 8.916817875485666e-07, "loss": 0.0043, "step": 294210 }, { "epoch": 3.1435439927346547, "grad_norm": 0.03957467898726463, "learning_rate": 8.916713445222413e-07, "loss": 0.0037, "step": 294220 }, { "epoch": 3.143650836048934, "grad_norm": 2.1169674396514893, "learning_rate": 8.916609010536895e-07, "loss": 0.0053, "step": 294230 }, { "epoch": 3.143757679363214, "grad_norm": 3.2404260635375977, "learning_rate": 8.916504571429231e-07, "loss": 0.0097, "step": 294240 }, { "epoch": 3.1438645226774935, "grad_norm": 2.693948984146118, "learning_rate": 8.916400127899537e-07, "loss": 0.0115, "step": 294250 }, { "epoch": 3.143971365991773, "grad_norm": 0.005036077927798033, "learning_rate": 8.916295679947931e-07, "loss": 0.0003, "step": 294260 }, { "epoch": 3.144078209306053, "grad_norm": 0.03236304223537445, "learning_rate": 8.916191227574533e-07, "loss": 0.0287, "step": 294270 }, { "epoch": 3.1441850526203323, "grad_norm": 1.043536901473999, "learning_rate": 8.916086770779459e-07, "loss": 0.0194, "step": 294280 }, { "epoch": 3.144291895934612, "grad_norm": 0.002663212362676859, "learning_rate": 8.915982309562826e-07, "loss": 0.0043, "step": 294290 }, { "epoch": 3.1443987392488917, "grad_norm": 0.006893872749060392, "learning_rate": 8.915877843924755e-07, "loss": 0.017, "step": 294300 }, { "epoch": 3.144505582563171, "grad_norm": 3.307258129119873, "learning_rate": 8.915773373865363e-07, "loss": 0.0384, "step": 294310 }, { "epoch": 3.1446124258774506, "grad_norm": 0.03009222447872162, "learning_rate": 8.915668899384765e-07, "loss": 0.0016, "step": 294320 }, { "epoch": 3.1447192691917305, "grad_norm": 0.055351756513118744, "learning_rate": 8.915564420483083e-07, "loss": 0.0229, "step": 294330 }, { "epoch": 3.14482611250601, "grad_norm": 0.0024014415685087442, "learning_rate": 8.915459937160433e-07, "loss": 0.0034, "step": 294340 }, { "epoch": 3.1449329558202894, "grad_norm": 0.007312334142625332, "learning_rate": 8.915355449416933e-07, "loss": 0.0335, "step": 294350 }, { "epoch": 3.1450397991345693, "grad_norm": 0.18739303946495056, "learning_rate": 8.9152509572527e-07, "loss": 0.0008, "step": 294360 }, { "epoch": 3.145146642448849, "grad_norm": 0.016223816201090813, "learning_rate": 8.915146460667855e-07, "loss": 0.0015, "step": 294370 }, { "epoch": 3.1452534857631282, "grad_norm": 0.7063618898391724, "learning_rate": 8.915041959662515e-07, "loss": 0.0096, "step": 294380 }, { "epoch": 3.145360329077408, "grad_norm": 0.711421549320221, "learning_rate": 8.914937454236795e-07, "loss": 0.0245, "step": 294390 }, { "epoch": 3.1454671723916876, "grad_norm": 0.6580107808113098, "learning_rate": 8.914832944390816e-07, "loss": 0.0116, "step": 294400 }, { "epoch": 3.145574015705967, "grad_norm": 2.989194869995117, "learning_rate": 8.914728430124695e-07, "loss": 0.0321, "step": 294410 }, { "epoch": 3.145680859020247, "grad_norm": 3.154118299484253, "learning_rate": 8.91462391143855e-07, "loss": 0.0168, "step": 294420 }, { "epoch": 3.1457877023345264, "grad_norm": 0.12783630192279816, "learning_rate": 8.9145193883325e-07, "loss": 0.0329, "step": 294430 }, { "epoch": 3.145894545648806, "grad_norm": 4.065679550170898, "learning_rate": 8.91441486080666e-07, "loss": 0.0036, "step": 294440 }, { "epoch": 3.146001388963086, "grad_norm": 3.3246572017669678, "learning_rate": 8.914310328861152e-07, "loss": 0.028, "step": 294450 }, { "epoch": 3.1461082322773652, "grad_norm": 0.11807362735271454, "learning_rate": 8.914205792496092e-07, "loss": 0.0093, "step": 294460 }, { "epoch": 3.1462150755916447, "grad_norm": 0.46615052223205566, "learning_rate": 8.914101251711599e-07, "loss": 0.0044, "step": 294470 }, { "epoch": 3.1463219189059246, "grad_norm": 0.015198142267763615, "learning_rate": 8.913996706507789e-07, "loss": 0.0105, "step": 294480 }, { "epoch": 3.146428762220204, "grad_norm": 0.13045278191566467, "learning_rate": 8.913892156884781e-07, "loss": 0.019, "step": 294490 }, { "epoch": 3.1465356055344835, "grad_norm": 0.0012728514848276973, "learning_rate": 8.913787602842695e-07, "loss": 0.0134, "step": 294500 }, { "epoch": 3.1466424488487634, "grad_norm": 0.009186080656945705, "learning_rate": 8.913683044381646e-07, "loss": 0.0152, "step": 294510 }, { "epoch": 3.146749292163043, "grad_norm": 5.704399108886719, "learning_rate": 8.913578481501754e-07, "loss": 0.0244, "step": 294520 }, { "epoch": 3.1468561354773223, "grad_norm": 0.014029363170266151, "learning_rate": 8.913473914203135e-07, "loss": 0.0156, "step": 294530 }, { "epoch": 3.1469629787916023, "grad_norm": 1.421788215637207, "learning_rate": 8.913369342485911e-07, "loss": 0.0068, "step": 294540 }, { "epoch": 3.1470698221058817, "grad_norm": 0.08275074511766434, "learning_rate": 8.913264766350197e-07, "loss": 0.0008, "step": 294550 }, { "epoch": 3.147176665420161, "grad_norm": 0.010933312587440014, "learning_rate": 8.913160185796111e-07, "loss": 0.026, "step": 294560 }, { "epoch": 3.147283508734441, "grad_norm": 2.0046446323394775, "learning_rate": 8.913055600823773e-07, "loss": 0.0169, "step": 294570 }, { "epoch": 3.1473903520487205, "grad_norm": 0.06453917920589447, "learning_rate": 8.912951011433298e-07, "loss": 0.0031, "step": 294580 }, { "epoch": 3.147497195363, "grad_norm": 3.7265379428863525, "learning_rate": 8.912846417624809e-07, "loss": 0.0186, "step": 294590 }, { "epoch": 3.14760403867728, "grad_norm": 0.9563275575637817, "learning_rate": 8.912741819398417e-07, "loss": 0.0128, "step": 294600 }, { "epoch": 3.1477108819915594, "grad_norm": 3.4828603267669678, "learning_rate": 8.912637216754248e-07, "loss": 0.0461, "step": 294610 }, { "epoch": 3.147817725305839, "grad_norm": 0.02927403151988983, "learning_rate": 8.912532609692414e-07, "loss": 0.005, "step": 294620 }, { "epoch": 3.1479245686201187, "grad_norm": 0.06831587105989456, "learning_rate": 8.912427998213037e-07, "loss": 0.0196, "step": 294630 }, { "epoch": 3.148031411934398, "grad_norm": 0.007574752904474735, "learning_rate": 8.912323382316231e-07, "loss": 0.0122, "step": 294640 }, { "epoch": 3.1481382552486776, "grad_norm": 2.4149069786071777, "learning_rate": 8.912218762002119e-07, "loss": 0.0439, "step": 294650 }, { "epoch": 3.1482450985629575, "grad_norm": 0.014585087075829506, "learning_rate": 8.912114137270817e-07, "loss": 0.0241, "step": 294660 }, { "epoch": 3.148351941877237, "grad_norm": 0.09728923439979553, "learning_rate": 8.91200950812244e-07, "loss": 0.0202, "step": 294670 }, { "epoch": 3.1484587851915165, "grad_norm": 4.453793525695801, "learning_rate": 8.911904874557111e-07, "loss": 0.0146, "step": 294680 }, { "epoch": 3.1485656285057964, "grad_norm": 0.011241080239415169, "learning_rate": 8.911800236574947e-07, "loss": 0.0026, "step": 294690 }, { "epoch": 3.148672471820076, "grad_norm": 1.2530603408813477, "learning_rate": 8.911695594176063e-07, "loss": 0.0141, "step": 294700 }, { "epoch": 3.1487793151343553, "grad_norm": 0.022570693865418434, "learning_rate": 8.911590947360581e-07, "loss": 0.0023, "step": 294710 }, { "epoch": 3.148886158448635, "grad_norm": 0.9429023861885071, "learning_rate": 8.911486296128617e-07, "loss": 0.0069, "step": 294720 }, { "epoch": 3.1489930017629146, "grad_norm": 1.4632959365844727, "learning_rate": 8.911381640480291e-07, "loss": 0.0054, "step": 294730 }, { "epoch": 3.149099845077194, "grad_norm": 1.733056664466858, "learning_rate": 8.911276980415719e-07, "loss": 0.0677, "step": 294740 }, { "epoch": 3.149206688391474, "grad_norm": 0.010955496691167355, "learning_rate": 8.911172315935021e-07, "loss": 0.0014, "step": 294750 }, { "epoch": 3.1493135317057535, "grad_norm": 16.0246639251709, "learning_rate": 8.911067647038312e-07, "loss": 0.0212, "step": 294760 }, { "epoch": 3.149420375020033, "grad_norm": 0.0018606481608003378, "learning_rate": 8.910962973725714e-07, "loss": 0.0047, "step": 294770 }, { "epoch": 3.149527218334313, "grad_norm": 0.24112209677696228, "learning_rate": 8.910858295997344e-07, "loss": 0.0089, "step": 294780 }, { "epoch": 3.1496340616485923, "grad_norm": 2.303602457046509, "learning_rate": 8.910753613853319e-07, "loss": 0.0152, "step": 294790 }, { "epoch": 3.149740904962872, "grad_norm": 0.5486566424369812, "learning_rate": 8.910648927293758e-07, "loss": 0.0061, "step": 294800 }, { "epoch": 3.1498477482771516, "grad_norm": 4.219135761260986, "learning_rate": 8.91054423631878e-07, "loss": 0.0056, "step": 294810 }, { "epoch": 3.149954591591431, "grad_norm": 3.645115852355957, "learning_rate": 8.910439540928501e-07, "loss": 0.0039, "step": 294820 }, { "epoch": 3.1500614349057106, "grad_norm": 0.12115495651960373, "learning_rate": 8.910334841123041e-07, "loss": 0.0112, "step": 294830 }, { "epoch": 3.1501682782199905, "grad_norm": 4.826955318450928, "learning_rate": 8.910230136902519e-07, "loss": 0.0151, "step": 294840 }, { "epoch": 3.15027512153427, "grad_norm": 0.0018678550841286778, "learning_rate": 8.910125428267051e-07, "loss": 0.0259, "step": 294850 }, { "epoch": 3.15038196484855, "grad_norm": 0.0405513234436512, "learning_rate": 8.910020715216758e-07, "loss": 0.0152, "step": 294860 }, { "epoch": 3.1504888081628293, "grad_norm": 7.977911472320557, "learning_rate": 8.909915997751755e-07, "loss": 0.0172, "step": 294870 }, { "epoch": 3.1505956514771087, "grad_norm": 0.00206627044826746, "learning_rate": 8.909811275872161e-07, "loss": 0.0158, "step": 294880 }, { "epoch": 3.150702494791388, "grad_norm": 9.437459945678711, "learning_rate": 8.909706549578096e-07, "loss": 0.0096, "step": 294890 }, { "epoch": 3.150809338105668, "grad_norm": 0.23896202445030212, "learning_rate": 8.909601818869677e-07, "loss": 0.004, "step": 294900 }, { "epoch": 3.1509161814199476, "grad_norm": 0.015417888760566711, "learning_rate": 8.909497083747023e-07, "loss": 0.0174, "step": 294910 }, { "epoch": 3.1510230247342275, "grad_norm": 0.05040288716554642, "learning_rate": 8.909392344210252e-07, "loss": 0.0055, "step": 294920 }, { "epoch": 3.151129868048507, "grad_norm": 0.6190884113311768, "learning_rate": 8.909287600259481e-07, "loss": 0.0034, "step": 294930 }, { "epoch": 3.1512367113627864, "grad_norm": 0.16706496477127075, "learning_rate": 8.909182851894829e-07, "loss": 0.011, "step": 294940 }, { "epoch": 3.1513435546770663, "grad_norm": 0.036451078951358795, "learning_rate": 8.909078099116416e-07, "loss": 0.0169, "step": 294950 }, { "epoch": 3.1514503979913457, "grad_norm": 0.0029430249705910683, "learning_rate": 8.908973341924357e-07, "loss": 0.0012, "step": 294960 }, { "epoch": 3.151557241305625, "grad_norm": 2.239901065826416, "learning_rate": 8.908868580318773e-07, "loss": 0.0511, "step": 294970 }, { "epoch": 3.151664084619905, "grad_norm": 0.6633850336074829, "learning_rate": 8.908763814299783e-07, "loss": 0.0003, "step": 294980 }, { "epoch": 3.1517709279341846, "grad_norm": 0.16202551126480103, "learning_rate": 8.908659043867501e-07, "loss": 0.0074, "step": 294990 }, { "epoch": 3.151877771248464, "grad_norm": 0.05573219433426857, "learning_rate": 8.908554269022049e-07, "loss": 0.0395, "step": 295000 }, { "epoch": 3.151984614562744, "grad_norm": 0.7526668310165405, "learning_rate": 8.908449489763543e-07, "loss": 0.0118, "step": 295010 }, { "epoch": 3.1520914578770234, "grad_norm": 0.0037033301778137684, "learning_rate": 8.908344706092103e-07, "loss": 0.0237, "step": 295020 }, { "epoch": 3.152198301191303, "grad_norm": 0.007316594943404198, "learning_rate": 8.90823991800785e-07, "loss": 0.0152, "step": 295030 }, { "epoch": 3.1523051445055827, "grad_norm": 0.04825609177350998, "learning_rate": 8.908135125510896e-07, "loss": 0.0078, "step": 295040 }, { "epoch": 3.152411987819862, "grad_norm": 0.04062670096755028, "learning_rate": 8.908030328601363e-07, "loss": 0.0042, "step": 295050 }, { "epoch": 3.1525188311341417, "grad_norm": 0.0062722330912947655, "learning_rate": 8.907925527279368e-07, "loss": 0.0228, "step": 295060 }, { "epoch": 3.1526256744484216, "grad_norm": 0.014267317950725555, "learning_rate": 8.907820721545031e-07, "loss": 0.0025, "step": 295070 }, { "epoch": 3.152732517762701, "grad_norm": 0.3217197060585022, "learning_rate": 8.90771591139847e-07, "loss": 0.0382, "step": 295080 }, { "epoch": 3.1528393610769805, "grad_norm": 0.028481435030698776, "learning_rate": 8.907611096839803e-07, "loss": 0.0155, "step": 295090 }, { "epoch": 3.1529462043912604, "grad_norm": 1.547723650932312, "learning_rate": 8.907506277869148e-07, "loss": 0.0042, "step": 295100 }, { "epoch": 3.15305304770554, "grad_norm": 0.38706839084625244, "learning_rate": 8.907401454486623e-07, "loss": 0.007, "step": 295110 }, { "epoch": 3.1531598910198193, "grad_norm": 11.372932434082031, "learning_rate": 8.907296626692346e-07, "loss": 0.0148, "step": 295120 }, { "epoch": 3.153266734334099, "grad_norm": 1.7917550802230835, "learning_rate": 8.907191794486437e-07, "loss": 0.0196, "step": 295130 }, { "epoch": 3.1533735776483787, "grad_norm": 6.842540264129639, "learning_rate": 8.907086957869014e-07, "loss": 0.0395, "step": 295140 }, { "epoch": 3.153480420962658, "grad_norm": 1.5409451723098755, "learning_rate": 8.906982116840195e-07, "loss": 0.0153, "step": 295150 }, { "epoch": 3.153587264276938, "grad_norm": 3.5129921436309814, "learning_rate": 8.906877271400097e-07, "loss": 0.0155, "step": 295160 }, { "epoch": 3.1536941075912175, "grad_norm": 3.644974946975708, "learning_rate": 8.906772421548841e-07, "loss": 0.013, "step": 295170 }, { "epoch": 3.153800950905497, "grad_norm": 0.018879394978284836, "learning_rate": 8.906667567286544e-07, "loss": 0.0202, "step": 295180 }, { "epoch": 3.153907794219777, "grad_norm": 1.1868959665298462, "learning_rate": 8.906562708613324e-07, "loss": 0.0091, "step": 295190 }, { "epoch": 3.1540146375340563, "grad_norm": 0.7267057299613953, "learning_rate": 8.9064578455293e-07, "loss": 0.0062, "step": 295200 }, { "epoch": 3.1541214808483358, "grad_norm": 5.3752217292785645, "learning_rate": 8.906352978034591e-07, "loss": 0.0038, "step": 295210 }, { "epoch": 3.1542283241626157, "grad_norm": 0.01434649620205164, "learning_rate": 8.906248106129314e-07, "loss": 0.0029, "step": 295220 }, { "epoch": 3.154335167476895, "grad_norm": 0.24831439554691315, "learning_rate": 8.906143229813588e-07, "loss": 0.0131, "step": 295230 }, { "epoch": 3.1544420107911746, "grad_norm": 0.13694755733013153, "learning_rate": 8.906038349087532e-07, "loss": 0.0046, "step": 295240 }, { "epoch": 3.1545488541054545, "grad_norm": 3.66987681388855, "learning_rate": 8.905933463951263e-07, "loss": 0.0124, "step": 295250 }, { "epoch": 3.154655697419734, "grad_norm": 0.2213127315044403, "learning_rate": 8.905828574404901e-07, "loss": 0.0099, "step": 295260 }, { "epoch": 3.1547625407340134, "grad_norm": 0.13764525949954987, "learning_rate": 8.905723680448562e-07, "loss": 0.0064, "step": 295270 }, { "epoch": 3.1548693840482933, "grad_norm": 3.285768985748291, "learning_rate": 8.905618782082369e-07, "loss": 0.026, "step": 295280 }, { "epoch": 3.1549762273625728, "grad_norm": 0.0034494702704250813, "learning_rate": 8.905513879306436e-07, "loss": 0.0134, "step": 295290 }, { "epoch": 3.1550830706768522, "grad_norm": 7.607657432556152, "learning_rate": 8.905408972120884e-07, "loss": 0.0294, "step": 295300 }, { "epoch": 3.155189913991132, "grad_norm": 1.3988419771194458, "learning_rate": 8.905304060525829e-07, "loss": 0.019, "step": 295310 }, { "epoch": 3.1552967573054116, "grad_norm": 0.11394374072551727, "learning_rate": 8.905199144521392e-07, "loss": 0.0157, "step": 295320 }, { "epoch": 3.155403600619691, "grad_norm": 0.5015942454338074, "learning_rate": 8.90509422410769e-07, "loss": 0.0182, "step": 295330 }, { "epoch": 3.155510443933971, "grad_norm": 3.394484519958496, "learning_rate": 8.904989299284843e-07, "loss": 0.0156, "step": 295340 }, { "epoch": 3.1556172872482504, "grad_norm": 0.023867487907409668, "learning_rate": 8.904884370052968e-07, "loss": 0.0049, "step": 295350 }, { "epoch": 3.15572413056253, "grad_norm": 0.02351437136530876, "learning_rate": 8.904779436412184e-07, "loss": 0.0169, "step": 295360 }, { "epoch": 3.1558309738768098, "grad_norm": 2.1740634441375732, "learning_rate": 8.904674498362608e-07, "loss": 0.0199, "step": 295370 }, { "epoch": 3.1559378171910892, "grad_norm": 8.344620704650879, "learning_rate": 8.90456955590436e-07, "loss": 0.0238, "step": 295380 }, { "epoch": 3.1560446605053687, "grad_norm": 0.07551630586385727, "learning_rate": 8.904464609037558e-07, "loss": 0.0132, "step": 295390 }, { "epoch": 3.1561515038196486, "grad_norm": 0.01673765853047371, "learning_rate": 8.904359657762322e-07, "loss": 0.0188, "step": 295400 }, { "epoch": 3.156258347133928, "grad_norm": 0.04972385987639427, "learning_rate": 8.90425470207877e-07, "loss": 0.003, "step": 295410 }, { "epoch": 3.1563651904482075, "grad_norm": 0.5122891664505005, "learning_rate": 8.904149741987018e-07, "loss": 0.006, "step": 295420 }, { "epoch": 3.1564720337624874, "grad_norm": 0.00649246945977211, "learning_rate": 8.904044777487187e-07, "loss": 0.0244, "step": 295430 }, { "epoch": 3.156578877076767, "grad_norm": 0.013148634694516659, "learning_rate": 8.903939808579395e-07, "loss": 0.0042, "step": 295440 }, { "epoch": 3.1566857203910463, "grad_norm": 1.192333698272705, "learning_rate": 8.903834835263759e-07, "loss": 0.0076, "step": 295450 }, { "epoch": 3.1567925637053262, "grad_norm": 0.32421788573265076, "learning_rate": 8.903729857540401e-07, "loss": 0.0044, "step": 295460 }, { "epoch": 3.1568994070196057, "grad_norm": 0.002441275166347623, "learning_rate": 8.903624875409436e-07, "loss": 0.0232, "step": 295470 }, { "epoch": 3.157006250333885, "grad_norm": 1.773270845413208, "learning_rate": 8.903519888870983e-07, "loss": 0.0287, "step": 295480 }, { "epoch": 3.157113093648165, "grad_norm": 0.03860479220747948, "learning_rate": 8.903414897925163e-07, "loss": 0.023, "step": 295490 }, { "epoch": 3.1572199369624445, "grad_norm": 0.017173418775200844, "learning_rate": 8.903309902572091e-07, "loss": 0.0149, "step": 295500 }, { "epoch": 3.157326780276724, "grad_norm": 0.7230818271636963, "learning_rate": 8.903204902811891e-07, "loss": 0.0116, "step": 295510 }, { "epoch": 3.157433623591004, "grad_norm": 0.03152329474687576, "learning_rate": 8.903099898644675e-07, "loss": 0.0174, "step": 295520 }, { "epoch": 3.1575404669052833, "grad_norm": 0.001511601498350501, "learning_rate": 8.902994890070567e-07, "loss": 0.0111, "step": 295530 }, { "epoch": 3.157647310219563, "grad_norm": 0.005661048460751772, "learning_rate": 8.902889877089681e-07, "loss": 0.0153, "step": 295540 }, { "epoch": 3.1577541535338427, "grad_norm": 0.06088028848171234, "learning_rate": 8.902784859702139e-07, "loss": 0.0023, "step": 295550 }, { "epoch": 3.157860996848122, "grad_norm": 1.651491403579712, "learning_rate": 8.902679837908058e-07, "loss": 0.0135, "step": 295560 }, { "epoch": 3.157967840162402, "grad_norm": 0.0666641891002655, "learning_rate": 8.902574811707557e-07, "loss": 0.0199, "step": 295570 }, { "epoch": 3.1580746834766815, "grad_norm": 4.933617115020752, "learning_rate": 8.902469781100754e-07, "loss": 0.0157, "step": 295580 }, { "epoch": 3.158181526790961, "grad_norm": 5.970544815063477, "learning_rate": 8.902364746087769e-07, "loss": 0.0335, "step": 295590 }, { "epoch": 3.1582883701052404, "grad_norm": 0.023512691259384155, "learning_rate": 8.902259706668718e-07, "loss": 0.0329, "step": 295600 }, { "epoch": 3.1583952134195203, "grad_norm": 0.008782641962170601, "learning_rate": 8.902154662843724e-07, "loss": 0.0137, "step": 295610 }, { "epoch": 3.1585020567338, "grad_norm": 0.5880289673805237, "learning_rate": 8.902049614612901e-07, "loss": 0.0165, "step": 295620 }, { "epoch": 3.1586089000480797, "grad_norm": 0.027206668630242348, "learning_rate": 8.901944561976369e-07, "loss": 0.0121, "step": 295630 }, { "epoch": 3.158715743362359, "grad_norm": 0.2137679159641266, "learning_rate": 8.901839504934248e-07, "loss": 0.0192, "step": 295640 }, { "epoch": 3.1588225866766386, "grad_norm": 0.0023714350536465645, "learning_rate": 8.901734443486656e-07, "loss": 0.0005, "step": 295650 }, { "epoch": 3.1589294299909185, "grad_norm": 0.0357276052236557, "learning_rate": 8.901629377633711e-07, "loss": 0.0048, "step": 295660 }, { "epoch": 3.159036273305198, "grad_norm": 0.3546106517314911, "learning_rate": 8.901524307375532e-07, "loss": 0.0046, "step": 295670 }, { "epoch": 3.1591431166194774, "grad_norm": 11.993453979492188, "learning_rate": 8.901419232712238e-07, "loss": 0.0216, "step": 295680 }, { "epoch": 3.1592499599337573, "grad_norm": 0.004194020293653011, "learning_rate": 8.901314153643946e-07, "loss": 0.0129, "step": 295690 }, { "epoch": 3.159356803248037, "grad_norm": 3.953347682952881, "learning_rate": 8.901209070170777e-07, "loss": 0.0081, "step": 295700 }, { "epoch": 3.1594636465623163, "grad_norm": 0.40863409638404846, "learning_rate": 8.901103982292848e-07, "loss": 0.0224, "step": 295710 }, { "epoch": 3.159570489876596, "grad_norm": 0.007941840216517448, "learning_rate": 8.900998890010279e-07, "loss": 0.0054, "step": 295720 }, { "epoch": 3.1596773331908756, "grad_norm": 0.8341427445411682, "learning_rate": 8.900893793323186e-07, "loss": 0.0444, "step": 295730 }, { "epoch": 3.159784176505155, "grad_norm": 4.200644016265869, "learning_rate": 8.900788692231691e-07, "loss": 0.0184, "step": 295740 }, { "epoch": 3.159891019819435, "grad_norm": 0.41613003611564636, "learning_rate": 8.90068358673591e-07, "loss": 0.0343, "step": 295750 }, { "epoch": 3.1599978631337144, "grad_norm": 0.02557021751999855, "learning_rate": 8.900578476835965e-07, "loss": 0.0097, "step": 295760 }, { "epoch": 3.160104706447994, "grad_norm": 5.326498031616211, "learning_rate": 8.90047336253197e-07, "loss": 0.0226, "step": 295770 }, { "epoch": 3.160211549762274, "grad_norm": 5.509068012237549, "learning_rate": 8.900368243824048e-07, "loss": 0.0317, "step": 295780 }, { "epoch": 3.1603183930765533, "grad_norm": 0.13511496782302856, "learning_rate": 8.900263120712314e-07, "loss": 0.0304, "step": 295790 }, { "epoch": 3.1604252363908327, "grad_norm": 6.17053747177124, "learning_rate": 8.900157993196891e-07, "loss": 0.0127, "step": 295800 }, { "epoch": 3.1605320797051126, "grad_norm": 1.357810378074646, "learning_rate": 8.900052861277895e-07, "loss": 0.0245, "step": 295810 }, { "epoch": 3.160638923019392, "grad_norm": 3.515691041946411, "learning_rate": 8.899947724955443e-07, "loss": 0.0356, "step": 295820 }, { "epoch": 3.1607457663336715, "grad_norm": 7.7948832511901855, "learning_rate": 8.899842584229657e-07, "loss": 0.0123, "step": 295830 }, { "epoch": 3.1608526096479515, "grad_norm": 1.3254289627075195, "learning_rate": 8.899737439100654e-07, "loss": 0.0046, "step": 295840 }, { "epoch": 3.160959452962231, "grad_norm": 0.052830323576927185, "learning_rate": 8.899632289568553e-07, "loss": 0.0065, "step": 295850 }, { "epoch": 3.1610662962765104, "grad_norm": 0.00661913026124239, "learning_rate": 8.899527135633474e-07, "loss": 0.0645, "step": 295860 }, { "epoch": 3.1611731395907903, "grad_norm": 0.38549694418907166, "learning_rate": 8.899421977295534e-07, "loss": 0.0778, "step": 295870 }, { "epoch": 3.1612799829050697, "grad_norm": 0.09143351763486862, "learning_rate": 8.899316814554852e-07, "loss": 0.0062, "step": 295880 }, { "epoch": 3.161386826219349, "grad_norm": 0.16206754744052887, "learning_rate": 8.899211647411546e-07, "loss": 0.0007, "step": 295890 }, { "epoch": 3.161493669533629, "grad_norm": 0.007854137569665909, "learning_rate": 8.899106475865736e-07, "loss": 0.0375, "step": 295900 }, { "epoch": 3.1616005128479086, "grad_norm": 0.24324089288711548, "learning_rate": 8.899001299917541e-07, "loss": 0.0071, "step": 295910 }, { "epoch": 3.161707356162188, "grad_norm": 1.047135353088379, "learning_rate": 8.89889611956708e-07, "loss": 0.0341, "step": 295920 }, { "epoch": 3.161814199476468, "grad_norm": 0.008336668834090233, "learning_rate": 8.898790934814471e-07, "loss": 0.0084, "step": 295930 }, { "epoch": 3.1619210427907474, "grad_norm": 2.459967613220215, "learning_rate": 8.898685745659832e-07, "loss": 0.015, "step": 295940 }, { "epoch": 3.162027886105027, "grad_norm": 0.013482337817549706, "learning_rate": 8.898580552103283e-07, "loss": 0.0142, "step": 295950 }, { "epoch": 3.1621347294193067, "grad_norm": 5.932334899902344, "learning_rate": 8.898475354144941e-07, "loss": 0.0357, "step": 295960 }, { "epoch": 3.162241572733586, "grad_norm": 0.018443875014781952, "learning_rate": 8.898370151784928e-07, "loss": 0.0198, "step": 295970 }, { "epoch": 3.1623484160478657, "grad_norm": 0.007006501313298941, "learning_rate": 8.89826494502336e-07, "loss": 0.0091, "step": 295980 }, { "epoch": 3.1624552593621456, "grad_norm": 1.0733935832977295, "learning_rate": 8.898159733860356e-07, "loss": 0.027, "step": 295990 }, { "epoch": 3.162562102676425, "grad_norm": 0.0069170487113296986, "learning_rate": 8.898054518296037e-07, "loss": 0.0171, "step": 296000 }, { "epoch": 3.1626689459907045, "grad_norm": 0.06195974722504616, "learning_rate": 8.897949298330519e-07, "loss": 0.0119, "step": 296010 }, { "epoch": 3.1627757893049844, "grad_norm": 4.619551181793213, "learning_rate": 8.897844073963923e-07, "loss": 0.047, "step": 296020 }, { "epoch": 3.162882632619264, "grad_norm": 0.7491724491119385, "learning_rate": 8.897738845196366e-07, "loss": 0.0122, "step": 296030 }, { "epoch": 3.1629894759335433, "grad_norm": 0.025018850341439247, "learning_rate": 8.897633612027969e-07, "loss": 0.0111, "step": 296040 }, { "epoch": 3.163096319247823, "grad_norm": 0.007365900091826916, "learning_rate": 8.897528374458847e-07, "loss": 0.025, "step": 296050 }, { "epoch": 3.1632031625621027, "grad_norm": 0.02357877604663372, "learning_rate": 8.897423132489124e-07, "loss": 0.0471, "step": 296060 }, { "epoch": 3.163310005876382, "grad_norm": 0.02592512220144272, "learning_rate": 8.897317886118913e-07, "loss": 0.0267, "step": 296070 }, { "epoch": 3.163416849190662, "grad_norm": 12.702316284179688, "learning_rate": 8.897212635348339e-07, "loss": 0.0194, "step": 296080 }, { "epoch": 3.1635236925049415, "grad_norm": 0.5542886257171631, "learning_rate": 8.897107380177516e-07, "loss": 0.0331, "step": 296090 }, { "epoch": 3.163630535819221, "grad_norm": 3.9349796772003174, "learning_rate": 8.897002120606566e-07, "loss": 0.0239, "step": 296100 }, { "epoch": 3.163737379133501, "grad_norm": 5.314216613769531, "learning_rate": 8.896896856635605e-07, "loss": 0.0404, "step": 296110 }, { "epoch": 3.1638442224477803, "grad_norm": 0.8400192260742188, "learning_rate": 8.896791588264755e-07, "loss": 0.005, "step": 296120 }, { "epoch": 3.1639510657620598, "grad_norm": 8.773622512817383, "learning_rate": 8.896686315494133e-07, "loss": 0.0152, "step": 296130 }, { "epoch": 3.1640579090763397, "grad_norm": 0.006849279161542654, "learning_rate": 8.896581038323857e-07, "loss": 0.0157, "step": 296140 }, { "epoch": 3.164164752390619, "grad_norm": 0.01092339213937521, "learning_rate": 8.896475756754047e-07, "loss": 0.0185, "step": 296150 }, { "epoch": 3.1642715957048986, "grad_norm": 0.9485163688659668, "learning_rate": 8.896370470784822e-07, "loss": 0.0412, "step": 296160 }, { "epoch": 3.1643784390191785, "grad_norm": 2.62648868560791, "learning_rate": 8.896265180416301e-07, "loss": 0.0391, "step": 296170 }, { "epoch": 3.164485282333458, "grad_norm": 0.01553122978657484, "learning_rate": 8.896159885648602e-07, "loss": 0.0078, "step": 296180 }, { "epoch": 3.1645921256477374, "grad_norm": 0.839794397354126, "learning_rate": 8.896054586481845e-07, "loss": 0.0067, "step": 296190 }, { "epoch": 3.1646989689620173, "grad_norm": 0.05278493091464043, "learning_rate": 8.895949282916149e-07, "loss": 0.0252, "step": 296200 }, { "epoch": 3.1648058122762968, "grad_norm": 2.403224229812622, "learning_rate": 8.89584397495163e-07, "loss": 0.0214, "step": 296210 }, { "epoch": 3.164912655590576, "grad_norm": 0.040656641125679016, "learning_rate": 8.89573866258841e-07, "loss": 0.007, "step": 296220 }, { "epoch": 3.165019498904856, "grad_norm": 0.0007422955241054296, "learning_rate": 8.895633345826607e-07, "loss": 0.0029, "step": 296230 }, { "epoch": 3.1651263422191356, "grad_norm": 0.013589450158178806, "learning_rate": 8.895528024666341e-07, "loss": 0.0086, "step": 296240 }, { "epoch": 3.165233185533415, "grad_norm": 0.19090014696121216, "learning_rate": 8.89542269910773e-07, "loss": 0.0062, "step": 296250 }, { "epoch": 3.165340028847695, "grad_norm": 0.8961675763130188, "learning_rate": 8.895317369150892e-07, "loss": 0.0325, "step": 296260 }, { "epoch": 3.1654468721619744, "grad_norm": 0.06794624030590057, "learning_rate": 8.895212034795947e-07, "loss": 0.0123, "step": 296270 }, { "epoch": 3.1655537154762543, "grad_norm": 0.7040970325469971, "learning_rate": 8.895106696043013e-07, "loss": 0.0122, "step": 296280 }, { "epoch": 3.1656605587905338, "grad_norm": 1.3793846368789673, "learning_rate": 8.89500135289221e-07, "loss": 0.0112, "step": 296290 }, { "epoch": 3.165767402104813, "grad_norm": 0.08326174318790436, "learning_rate": 8.894896005343655e-07, "loss": 0.0032, "step": 296300 }, { "epoch": 3.1658742454190927, "grad_norm": 0.6660880446434021, "learning_rate": 8.89479065339747e-07, "loss": 0.0019, "step": 296310 }, { "epoch": 3.1659810887333726, "grad_norm": 6.138039588928223, "learning_rate": 8.894685297053773e-07, "loss": 0.0257, "step": 296320 }, { "epoch": 3.166087932047652, "grad_norm": 5.080819606781006, "learning_rate": 8.894579936312681e-07, "loss": 0.013, "step": 296330 }, { "epoch": 3.166194775361932, "grad_norm": 0.07516966015100479, "learning_rate": 8.894474571174315e-07, "loss": 0.0117, "step": 296340 }, { "epoch": 3.1663016186762114, "grad_norm": 0.45385390520095825, "learning_rate": 8.894369201638794e-07, "loss": 0.0081, "step": 296350 }, { "epoch": 3.166408461990491, "grad_norm": 0.013699796050786972, "learning_rate": 8.894263827706236e-07, "loss": 0.033, "step": 296360 }, { "epoch": 3.1665153053047703, "grad_norm": 1.9600448608398438, "learning_rate": 8.894158449376758e-07, "loss": 0.0044, "step": 296370 }, { "epoch": 3.1666221486190502, "grad_norm": 2.4159014225006104, "learning_rate": 8.894053066650484e-07, "loss": 0.0112, "step": 296380 }, { "epoch": 3.1667289919333297, "grad_norm": 10.600863456726074, "learning_rate": 8.893947679527529e-07, "loss": 0.0106, "step": 296390 }, { "epoch": 3.1668358352476096, "grad_norm": 0.049152277410030365, "learning_rate": 8.893842288008013e-07, "loss": 0.0024, "step": 296400 }, { "epoch": 3.166942678561889, "grad_norm": 0.015093071386218071, "learning_rate": 8.893736892092056e-07, "loss": 0.0132, "step": 296410 }, { "epoch": 3.1670495218761685, "grad_norm": 0.8201039433479309, "learning_rate": 8.893631491779775e-07, "loss": 0.0046, "step": 296420 }, { "epoch": 3.1671563651904484, "grad_norm": 0.08299709856510162, "learning_rate": 8.89352608707129e-07, "loss": 0.0107, "step": 296430 }, { "epoch": 3.167263208504728, "grad_norm": 0.036629099398851395, "learning_rate": 8.893420677966721e-07, "loss": 0.0106, "step": 296440 }, { "epoch": 3.1673700518190073, "grad_norm": 0.2187691032886505, "learning_rate": 8.893315264466187e-07, "loss": 0.0114, "step": 296450 }, { "epoch": 3.1674768951332872, "grad_norm": 5.9825520515441895, "learning_rate": 8.893209846569805e-07, "loss": 0.0173, "step": 296460 }, { "epoch": 3.1675837384475667, "grad_norm": 2.6458914279937744, "learning_rate": 8.893104424277695e-07, "loss": 0.0416, "step": 296470 }, { "epoch": 3.167690581761846, "grad_norm": 3.5970280170440674, "learning_rate": 8.892998997589978e-07, "loss": 0.005, "step": 296480 }, { "epoch": 3.167797425076126, "grad_norm": 3.953333854675293, "learning_rate": 8.89289356650677e-07, "loss": 0.0298, "step": 296490 }, { "epoch": 3.1679042683904055, "grad_norm": 1.8571338653564453, "learning_rate": 8.892788131028191e-07, "loss": 0.0136, "step": 296500 }, { "epoch": 3.168011111704685, "grad_norm": 2.9189414978027344, "learning_rate": 8.892682691154362e-07, "loss": 0.0623, "step": 296510 }, { "epoch": 3.168117955018965, "grad_norm": 0.11805316060781479, "learning_rate": 8.892577246885399e-07, "loss": 0.0137, "step": 296520 }, { "epoch": 3.1682247983332443, "grad_norm": 0.4134765565395355, "learning_rate": 8.892471798221423e-07, "loss": 0.0084, "step": 296530 }, { "epoch": 3.168331641647524, "grad_norm": 0.0017460845410823822, "learning_rate": 8.892366345162552e-07, "loss": 0.0043, "step": 296540 }, { "epoch": 3.1684384849618037, "grad_norm": 0.35280841588974, "learning_rate": 8.892260887708907e-07, "loss": 0.0247, "step": 296550 }, { "epoch": 3.168545328276083, "grad_norm": 0.14422699809074402, "learning_rate": 8.892155425860603e-07, "loss": 0.0075, "step": 296560 }, { "epoch": 3.1686521715903626, "grad_norm": 0.8054600954055786, "learning_rate": 8.892049959617765e-07, "loss": 0.0209, "step": 296570 }, { "epoch": 3.1687590149046425, "grad_norm": 2.6451058387756348, "learning_rate": 8.891944488980508e-07, "loss": 0.0117, "step": 296580 }, { "epoch": 3.168865858218922, "grad_norm": 1.0371263027191162, "learning_rate": 8.89183901394895e-07, "loss": 0.0018, "step": 296590 }, { "epoch": 3.1689727015332014, "grad_norm": 8.388519287109375, "learning_rate": 8.891733534523213e-07, "loss": 0.0201, "step": 296600 }, { "epoch": 3.1690795448474813, "grad_norm": 0.029249204322695732, "learning_rate": 8.891628050703417e-07, "loss": 0.0284, "step": 296610 }, { "epoch": 3.169186388161761, "grad_norm": 5.343644142150879, "learning_rate": 8.891522562489677e-07, "loss": 0.0148, "step": 296620 }, { "epoch": 3.1692932314760403, "grad_norm": 0.0010184624698013067, "learning_rate": 8.891417069882114e-07, "loss": 0.0078, "step": 296630 }, { "epoch": 3.16940007479032, "grad_norm": 3.4625279903411865, "learning_rate": 8.891311572880849e-07, "loss": 0.0218, "step": 296640 }, { "epoch": 3.1695069181045996, "grad_norm": 0.00592421879991889, "learning_rate": 8.891206071486e-07, "loss": 0.0127, "step": 296650 }, { "epoch": 3.169613761418879, "grad_norm": 0.06769514083862305, "learning_rate": 8.891100565697684e-07, "loss": 0.0061, "step": 296660 }, { "epoch": 3.169720604733159, "grad_norm": 0.06584326922893524, "learning_rate": 8.890995055516022e-07, "loss": 0.0401, "step": 296670 }, { "epoch": 3.1698274480474384, "grad_norm": 0.1139291301369667, "learning_rate": 8.890889540941135e-07, "loss": 0.0195, "step": 296680 }, { "epoch": 3.169934291361718, "grad_norm": 0.043584857136011124, "learning_rate": 8.890784021973138e-07, "loss": 0.0084, "step": 296690 }, { "epoch": 3.170041134675998, "grad_norm": 3.327284336090088, "learning_rate": 8.890678498612152e-07, "loss": 0.0061, "step": 296700 }, { "epoch": 3.1701479779902773, "grad_norm": 0.9811274409294128, "learning_rate": 8.890572970858296e-07, "loss": 0.0037, "step": 296710 }, { "epoch": 3.1702548213045567, "grad_norm": 1.3680140972137451, "learning_rate": 8.890467438711691e-07, "loss": 0.0141, "step": 296720 }, { "epoch": 3.1703616646188366, "grad_norm": 0.0029700000304728746, "learning_rate": 8.890361902172454e-07, "loss": 0.0069, "step": 296730 }, { "epoch": 3.170468507933116, "grad_norm": 10.2293701171875, "learning_rate": 8.890256361240704e-07, "loss": 0.0319, "step": 296740 }, { "epoch": 3.1705753512473955, "grad_norm": 0.31222546100616455, "learning_rate": 8.890150815916564e-07, "loss": 0.0052, "step": 296750 }, { "epoch": 3.1706821945616754, "grad_norm": 0.001475243247114122, "learning_rate": 8.890045266200148e-07, "loss": 0.0157, "step": 296760 }, { "epoch": 3.170789037875955, "grad_norm": 7.427028656005859, "learning_rate": 8.889939712091577e-07, "loss": 0.0302, "step": 296770 }, { "epoch": 3.1708958811902344, "grad_norm": 1.3336503505706787, "learning_rate": 8.88983415359097e-07, "loss": 0.0135, "step": 296780 }, { "epoch": 3.1710027245045143, "grad_norm": 0.08768267929553986, "learning_rate": 8.889728590698447e-07, "loss": 0.0014, "step": 296790 }, { "epoch": 3.1711095678187937, "grad_norm": 0.0917697548866272, "learning_rate": 8.889623023414127e-07, "loss": 0.0128, "step": 296800 }, { "epoch": 3.171216411133073, "grad_norm": 4.804887294769287, "learning_rate": 8.88951745173813e-07, "loss": 0.0124, "step": 296810 }, { "epoch": 3.171323254447353, "grad_norm": 11.08500862121582, "learning_rate": 8.889411875670574e-07, "loss": 0.0036, "step": 296820 }, { "epoch": 3.1714300977616325, "grad_norm": 2.609651803970337, "learning_rate": 8.889306295211578e-07, "loss": 0.0062, "step": 296830 }, { "epoch": 3.171536941075912, "grad_norm": 4.2666096687316895, "learning_rate": 8.889200710361261e-07, "loss": 0.006, "step": 296840 }, { "epoch": 3.171643784390192, "grad_norm": 0.024553067982196808, "learning_rate": 8.889095121119743e-07, "loss": 0.0156, "step": 296850 }, { "epoch": 3.1717506277044714, "grad_norm": 0.0030293178278952837, "learning_rate": 8.888989527487144e-07, "loss": 0.0187, "step": 296860 }, { "epoch": 3.171857471018751, "grad_norm": 0.017735153436660767, "learning_rate": 8.888883929463582e-07, "loss": 0.0023, "step": 296870 }, { "epoch": 3.1719643143330307, "grad_norm": 4.610318183898926, "learning_rate": 8.888778327049176e-07, "loss": 0.0117, "step": 296880 }, { "epoch": 3.17207115764731, "grad_norm": 0.20388902723789215, "learning_rate": 8.888672720244045e-07, "loss": 0.0088, "step": 296890 }, { "epoch": 3.1721780009615896, "grad_norm": 3.9112117290496826, "learning_rate": 8.888567109048312e-07, "loss": 0.0129, "step": 296900 }, { "epoch": 3.1722848442758695, "grad_norm": 0.028030388057231903, "learning_rate": 8.888461493462091e-07, "loss": 0.0356, "step": 296910 }, { "epoch": 3.172391687590149, "grad_norm": 11.953275680541992, "learning_rate": 8.888355873485504e-07, "loss": 0.021, "step": 296920 }, { "epoch": 3.1724985309044285, "grad_norm": 0.005930894520133734, "learning_rate": 8.88825024911867e-07, "loss": 0.0018, "step": 296930 }, { "epoch": 3.1726053742187084, "grad_norm": 0.18103055655956268, "learning_rate": 8.888144620361708e-07, "loss": 0.0084, "step": 296940 }, { "epoch": 3.172712217532988, "grad_norm": 0.043317895382642746, "learning_rate": 8.888038987214737e-07, "loss": 0.0138, "step": 296950 }, { "epoch": 3.1728190608472673, "grad_norm": 0.017438501119613647, "learning_rate": 8.887933349677876e-07, "loss": 0.0012, "step": 296960 }, { "epoch": 3.172925904161547, "grad_norm": 6.853452205657959, "learning_rate": 8.887827707751245e-07, "loss": 0.0098, "step": 296970 }, { "epoch": 3.1730327474758266, "grad_norm": 0.006560845300555229, "learning_rate": 8.887722061434963e-07, "loss": 0.0117, "step": 296980 }, { "epoch": 3.173139590790106, "grad_norm": 0.014780115336179733, "learning_rate": 8.887616410729151e-07, "loss": 0.0239, "step": 296990 }, { "epoch": 3.173246434104386, "grad_norm": 3.598031759262085, "learning_rate": 8.887510755633925e-07, "loss": 0.0269, "step": 297000 }, { "epoch": 3.1733532774186655, "grad_norm": 2.05110239982605, "learning_rate": 8.887405096149407e-07, "loss": 0.0091, "step": 297010 }, { "epoch": 3.173460120732945, "grad_norm": 3.208465814590454, "learning_rate": 8.887299432275714e-07, "loss": 0.0093, "step": 297020 }, { "epoch": 3.173566964047225, "grad_norm": 0.05074300989508629, "learning_rate": 8.887193764012967e-07, "loss": 0.007, "step": 297030 }, { "epoch": 3.1736738073615043, "grad_norm": 0.3507620692253113, "learning_rate": 8.887088091361285e-07, "loss": 0.0011, "step": 297040 }, { "epoch": 3.173780650675784, "grad_norm": 0.04239741340279579, "learning_rate": 8.886982414320787e-07, "loss": 0.0031, "step": 297050 }, { "epoch": 3.1738874939900636, "grad_norm": 13.58259105682373, "learning_rate": 8.886876732891593e-07, "loss": 0.0061, "step": 297060 }, { "epoch": 3.173994337304343, "grad_norm": 2.444779634475708, "learning_rate": 8.886771047073822e-07, "loss": 0.0157, "step": 297070 }, { "epoch": 3.1741011806186226, "grad_norm": 0.042702462524175644, "learning_rate": 8.886665356867593e-07, "loss": 0.0082, "step": 297080 }, { "epoch": 3.1742080239329025, "grad_norm": 0.0063073947094380856, "learning_rate": 8.886559662273025e-07, "loss": 0.0303, "step": 297090 }, { "epoch": 3.174314867247182, "grad_norm": 0.025687776505947113, "learning_rate": 8.886453963290237e-07, "loss": 0.0056, "step": 297100 }, { "epoch": 3.174421710561462, "grad_norm": 9.27529525756836, "learning_rate": 8.88634825991935e-07, "loss": 0.0365, "step": 297110 }, { "epoch": 3.1745285538757413, "grad_norm": 0.21447378396987915, "learning_rate": 8.886242552160483e-07, "loss": 0.0007, "step": 297120 }, { "epoch": 3.1746353971900207, "grad_norm": 5.5091938972473145, "learning_rate": 8.886136840013755e-07, "loss": 0.0089, "step": 297130 }, { "epoch": 3.1747422405043006, "grad_norm": 0.019457660615444183, "learning_rate": 8.886031123479284e-07, "loss": 0.0156, "step": 297140 }, { "epoch": 3.17484908381858, "grad_norm": 0.245543971657753, "learning_rate": 8.885925402557191e-07, "loss": 0.0046, "step": 297150 }, { "epoch": 3.1749559271328596, "grad_norm": 0.02451292797923088, "learning_rate": 8.885819677247596e-07, "loss": 0.0112, "step": 297160 }, { "epoch": 3.1750627704471395, "grad_norm": 0.011268402449786663, "learning_rate": 8.885713947550616e-07, "loss": 0.0133, "step": 297170 }, { "epoch": 3.175169613761419, "grad_norm": 4.726138591766357, "learning_rate": 8.885608213466372e-07, "loss": 0.0245, "step": 297180 }, { "epoch": 3.1752764570756984, "grad_norm": 0.273330420255661, "learning_rate": 8.885502474994984e-07, "loss": 0.0199, "step": 297190 }, { "epoch": 3.1753833003899783, "grad_norm": 1.627867579460144, "learning_rate": 8.885396732136569e-07, "loss": 0.0245, "step": 297200 }, { "epoch": 3.1754901437042578, "grad_norm": 14.579185485839844, "learning_rate": 8.885290984891249e-07, "loss": 0.0135, "step": 297210 }, { "epoch": 3.175596987018537, "grad_norm": 0.5415955185890198, "learning_rate": 8.885185233259141e-07, "loss": 0.0183, "step": 297220 }, { "epoch": 3.175703830332817, "grad_norm": 0.14389465749263763, "learning_rate": 8.885079477240367e-07, "loss": 0.0065, "step": 297230 }, { "epoch": 3.1758106736470966, "grad_norm": 0.6835441589355469, "learning_rate": 8.884973716835045e-07, "loss": 0.0061, "step": 297240 }, { "epoch": 3.175917516961376, "grad_norm": 3.712919235229492, "learning_rate": 8.884867952043293e-07, "loss": 0.0202, "step": 297250 }, { "epoch": 3.176024360275656, "grad_norm": 0.05829767882823944, "learning_rate": 8.884762182865233e-07, "loss": 0.0113, "step": 297260 }, { "epoch": 3.1761312035899354, "grad_norm": 2.592698335647583, "learning_rate": 8.884656409300983e-07, "loss": 0.0189, "step": 297270 }, { "epoch": 3.176238046904215, "grad_norm": 1.2498018741607666, "learning_rate": 8.884550631350663e-07, "loss": 0.0103, "step": 297280 }, { "epoch": 3.1763448902184948, "grad_norm": 0.09688157588243484, "learning_rate": 8.884444849014392e-07, "loss": 0.0225, "step": 297290 }, { "epoch": 3.176451733532774, "grad_norm": 11.007613182067871, "learning_rate": 8.884339062292289e-07, "loss": 0.0235, "step": 297300 }, { "epoch": 3.1765585768470537, "grad_norm": 0.5115709900856018, "learning_rate": 8.884233271184476e-07, "loss": 0.0027, "step": 297310 }, { "epoch": 3.1766654201613336, "grad_norm": 0.022555973380804062, "learning_rate": 8.884127475691067e-07, "loss": 0.0508, "step": 297320 }, { "epoch": 3.176772263475613, "grad_norm": 2.876448631286621, "learning_rate": 8.884021675812188e-07, "loss": 0.0067, "step": 297330 }, { "epoch": 3.1768791067898925, "grad_norm": 0.1789504438638687, "learning_rate": 8.883915871547955e-07, "loss": 0.0075, "step": 297340 }, { "epoch": 3.1769859501041724, "grad_norm": 0.014633753336966038, "learning_rate": 8.883810062898487e-07, "loss": 0.0077, "step": 297350 }, { "epoch": 3.177092793418452, "grad_norm": 3.350158214569092, "learning_rate": 8.883704249863904e-07, "loss": 0.0092, "step": 297360 }, { "epoch": 3.1771996367327313, "grad_norm": 0.6113935708999634, "learning_rate": 8.883598432444327e-07, "loss": 0.0069, "step": 297370 }, { "epoch": 3.177306480047011, "grad_norm": 0.7601990699768066, "learning_rate": 8.883492610639873e-07, "loss": 0.0092, "step": 297380 }, { "epoch": 3.1774133233612907, "grad_norm": 0.2554892897605896, "learning_rate": 8.883386784450664e-07, "loss": 0.0269, "step": 297390 }, { "epoch": 3.17752016667557, "grad_norm": 0.42037761211395264, "learning_rate": 8.883280953876817e-07, "loss": 0.0156, "step": 297400 }, { "epoch": 3.17762700998985, "grad_norm": 0.13930845260620117, "learning_rate": 8.883175118918454e-07, "loss": 0.0054, "step": 297410 }, { "epoch": 3.1777338533041295, "grad_norm": 2.1955318450927734, "learning_rate": 8.883069279575692e-07, "loss": 0.0183, "step": 297420 }, { "epoch": 3.177840696618409, "grad_norm": 0.2523554265499115, "learning_rate": 8.882963435848652e-07, "loss": 0.0061, "step": 297430 }, { "epoch": 3.177947539932689, "grad_norm": 0.26770588755607605, "learning_rate": 8.882857587737454e-07, "loss": 0.0068, "step": 297440 }, { "epoch": 3.1780543832469683, "grad_norm": 6.0978684425354, "learning_rate": 8.882751735242217e-07, "loss": 0.0244, "step": 297450 }, { "epoch": 3.1781612265612478, "grad_norm": 0.007398989051580429, "learning_rate": 8.882645878363059e-07, "loss": 0.0239, "step": 297460 }, { "epoch": 3.1782680698755277, "grad_norm": 4.093593597412109, "learning_rate": 8.882540017100101e-07, "loss": 0.0101, "step": 297470 }, { "epoch": 3.178374913189807, "grad_norm": 0.006514119915664196, "learning_rate": 8.882434151453462e-07, "loss": 0.0424, "step": 297480 }, { "epoch": 3.1784817565040866, "grad_norm": 0.7304531931877136, "learning_rate": 8.882328281423262e-07, "loss": 0.0072, "step": 297490 }, { "epoch": 3.1785885998183665, "grad_norm": 2.1531691551208496, "learning_rate": 8.88222240700962e-07, "loss": 0.0084, "step": 297500 }, { "epoch": 3.178695443132646, "grad_norm": 0.0059014493599534035, "learning_rate": 8.882116528212657e-07, "loss": 0.0027, "step": 297510 }, { "epoch": 3.1788022864469254, "grad_norm": 0.010551120154559612, "learning_rate": 8.882010645032491e-07, "loss": 0.0042, "step": 297520 }, { "epoch": 3.1789091297612053, "grad_norm": 0.22449426352977753, "learning_rate": 8.881904757469243e-07, "loss": 0.0121, "step": 297530 }, { "epoch": 3.179015973075485, "grad_norm": 0.0043723490089178085, "learning_rate": 8.88179886552303e-07, "loss": 0.0092, "step": 297540 }, { "epoch": 3.1791228163897642, "grad_norm": 0.03286205977201462, "learning_rate": 8.881692969193974e-07, "loss": 0.0023, "step": 297550 }, { "epoch": 3.179229659704044, "grad_norm": 0.3616700768470764, "learning_rate": 8.881587068482194e-07, "loss": 0.0116, "step": 297560 }, { "epoch": 3.1793365030183236, "grad_norm": 1.8167377710342407, "learning_rate": 8.881481163387807e-07, "loss": 0.0247, "step": 297570 }, { "epoch": 3.179443346332603, "grad_norm": 0.013258421793580055, "learning_rate": 8.881375253910937e-07, "loss": 0.0334, "step": 297580 }, { "epoch": 3.179550189646883, "grad_norm": 0.06325770914554596, "learning_rate": 8.881269340051702e-07, "loss": 0.0094, "step": 297590 }, { "epoch": 3.1796570329611624, "grad_norm": 6.1006903648376465, "learning_rate": 8.881163421810219e-07, "loss": 0.0203, "step": 297600 }, { "epoch": 3.179763876275442, "grad_norm": 0.11989228427410126, "learning_rate": 8.88105749918661e-07, "loss": 0.0482, "step": 297610 }, { "epoch": 3.179870719589722, "grad_norm": 0.030284253880381584, "learning_rate": 8.880951572180995e-07, "loss": 0.017, "step": 297620 }, { "epoch": 3.1799775629040012, "grad_norm": 0.02525227516889572, "learning_rate": 8.880845640793492e-07, "loss": 0.0045, "step": 297630 }, { "epoch": 3.1800844062182807, "grad_norm": 0.07005149871110916, "learning_rate": 8.880739705024222e-07, "loss": 0.0035, "step": 297640 }, { "epoch": 3.1801912495325606, "grad_norm": 0.22373715043067932, "learning_rate": 8.880633764873304e-07, "loss": 0.0399, "step": 297650 }, { "epoch": 3.18029809284684, "grad_norm": 1.7004480361938477, "learning_rate": 8.880527820340857e-07, "loss": 0.0202, "step": 297660 }, { "epoch": 3.1804049361611195, "grad_norm": 1.5163097381591797, "learning_rate": 8.880421871427001e-07, "loss": 0.0168, "step": 297670 }, { "epoch": 3.1805117794753994, "grad_norm": 0.008625570684671402, "learning_rate": 8.880315918131857e-07, "loss": 0.0045, "step": 297680 }, { "epoch": 3.180618622789679, "grad_norm": 2.069089889526367, "learning_rate": 8.880209960455542e-07, "loss": 0.0288, "step": 297690 }, { "epoch": 3.1807254661039583, "grad_norm": 1.9301366806030273, "learning_rate": 8.880103998398179e-07, "loss": 0.0025, "step": 297700 }, { "epoch": 3.1808323094182382, "grad_norm": 0.004358304664492607, "learning_rate": 8.879998031959883e-07, "loss": 0.0034, "step": 297710 }, { "epoch": 3.1809391527325177, "grad_norm": 4.354500770568848, "learning_rate": 8.879892061140779e-07, "loss": 0.0298, "step": 297720 }, { "epoch": 3.181045996046797, "grad_norm": 4.520585536956787, "learning_rate": 8.879786085940983e-07, "loss": 0.0208, "step": 297730 }, { "epoch": 3.181152839361077, "grad_norm": 0.05417543649673462, "learning_rate": 8.879680106360615e-07, "loss": 0.0081, "step": 297740 }, { "epoch": 3.1812596826753565, "grad_norm": 0.020569760352373123, "learning_rate": 8.879574122399797e-07, "loss": 0.0192, "step": 297750 }, { "epoch": 3.1813665259896364, "grad_norm": 2.067270278930664, "learning_rate": 8.879468134058645e-07, "loss": 0.0081, "step": 297760 }, { "epoch": 3.181473369303916, "grad_norm": 0.1055399626493454, "learning_rate": 8.879362141337283e-07, "loss": 0.0043, "step": 297770 }, { "epoch": 3.1815802126181953, "grad_norm": 0.34430912137031555, "learning_rate": 8.879256144235827e-07, "loss": 0.0098, "step": 297780 }, { "epoch": 3.181687055932475, "grad_norm": 8.487727165222168, "learning_rate": 8.879150142754397e-07, "loss": 0.0205, "step": 297790 }, { "epoch": 3.1817938992467547, "grad_norm": 0.01773684471845627, "learning_rate": 8.879044136893114e-07, "loss": 0.0048, "step": 297800 }, { "epoch": 3.181900742561034, "grad_norm": 9.319286346435547, "learning_rate": 8.8789381266521e-07, "loss": 0.0152, "step": 297810 }, { "epoch": 3.182007585875314, "grad_norm": 0.05395697429776192, "learning_rate": 8.878832112031469e-07, "loss": 0.0163, "step": 297820 }, { "epoch": 3.1821144291895935, "grad_norm": 2.2461483478546143, "learning_rate": 8.878726093031345e-07, "loss": 0.0105, "step": 297830 }, { "epoch": 3.182221272503873, "grad_norm": 0.9701303839683533, "learning_rate": 8.878620069651848e-07, "loss": 0.011, "step": 297840 }, { "epoch": 3.1823281158181524, "grad_norm": 6.545009613037109, "learning_rate": 8.878514041893094e-07, "loss": 0.0399, "step": 297850 }, { "epoch": 3.1824349591324324, "grad_norm": 2.239994525909424, "learning_rate": 8.878408009755206e-07, "loss": 0.006, "step": 297860 }, { "epoch": 3.182541802446712, "grad_norm": 6.0165557861328125, "learning_rate": 8.878301973238303e-07, "loss": 0.02, "step": 297870 }, { "epoch": 3.1826486457609917, "grad_norm": 0.2554191052913666, "learning_rate": 8.878195932342504e-07, "loss": 0.0062, "step": 297880 }, { "epoch": 3.182755489075271, "grad_norm": 0.11109232902526855, "learning_rate": 8.878089887067928e-07, "loss": 0.0137, "step": 297890 }, { "epoch": 3.1828623323895506, "grad_norm": 0.8089449405670166, "learning_rate": 8.877983837414697e-07, "loss": 0.0166, "step": 297900 }, { "epoch": 3.1829691757038305, "grad_norm": 0.01623786985874176, "learning_rate": 8.87787778338293e-07, "loss": 0.0052, "step": 297910 }, { "epoch": 3.18307601901811, "grad_norm": 0.29492515325546265, "learning_rate": 8.877771724972745e-07, "loss": 0.0403, "step": 297920 }, { "epoch": 3.1831828623323895, "grad_norm": 0.004064826760441065, "learning_rate": 8.877665662184263e-07, "loss": 0.0024, "step": 297930 }, { "epoch": 3.1832897056466694, "grad_norm": 0.03147357702255249, "learning_rate": 8.877559595017605e-07, "loss": 0.1051, "step": 297940 }, { "epoch": 3.183396548960949, "grad_norm": 0.011099165305495262, "learning_rate": 8.877453523472889e-07, "loss": 0.0189, "step": 297950 }, { "epoch": 3.1835033922752283, "grad_norm": 1.840438961982727, "learning_rate": 8.877347447550235e-07, "loss": 0.0074, "step": 297960 }, { "epoch": 3.183610235589508, "grad_norm": 0.08305409550666809, "learning_rate": 8.877241367249763e-07, "loss": 0.0029, "step": 297970 }, { "epoch": 3.1837170789037876, "grad_norm": 4.401040077209473, "learning_rate": 8.877135282571593e-07, "loss": 0.0217, "step": 297980 }, { "epoch": 3.183823922218067, "grad_norm": 0.18843212723731995, "learning_rate": 8.877029193515846e-07, "loss": 0.0084, "step": 297990 }, { "epoch": 3.183930765532347, "grad_norm": 0.008232141844928265, "learning_rate": 8.876923100082638e-07, "loss": 0.0119, "step": 298000 }, { "epoch": 3.1840376088466265, "grad_norm": 2.3166279792785645, "learning_rate": 8.876817002272094e-07, "loss": 0.0278, "step": 298010 }, { "epoch": 3.184144452160906, "grad_norm": 0.02433694899082184, "learning_rate": 8.87671090008433e-07, "loss": 0.0075, "step": 298020 }, { "epoch": 3.184251295475186, "grad_norm": 0.009232745505869389, "learning_rate": 8.876604793519465e-07, "loss": 0.0421, "step": 298030 }, { "epoch": 3.1843581387894653, "grad_norm": 2.69619083404541, "learning_rate": 8.876498682577622e-07, "loss": 0.0155, "step": 298040 }, { "epoch": 3.1844649821037447, "grad_norm": 0.0015707103302702308, "learning_rate": 8.87639256725892e-07, "loss": 0.0355, "step": 298050 }, { "epoch": 3.1845718254180246, "grad_norm": 0.0041231876239180565, "learning_rate": 8.876286447563478e-07, "loss": 0.0067, "step": 298060 }, { "epoch": 3.184678668732304, "grad_norm": 0.0013245182344689965, "learning_rate": 8.876180323491415e-07, "loss": 0.0051, "step": 298070 }, { "epoch": 3.1847855120465836, "grad_norm": 0.029179181903600693, "learning_rate": 8.876074195042853e-07, "loss": 0.0392, "step": 298080 }, { "epoch": 3.1848923553608635, "grad_norm": 0.011003472842276096, "learning_rate": 8.87596806221791e-07, "loss": 0.011, "step": 298090 }, { "epoch": 3.184999198675143, "grad_norm": 0.4831421375274658, "learning_rate": 8.875861925016708e-07, "loss": 0.008, "step": 298100 }, { "epoch": 3.1851060419894224, "grad_norm": 18.1853084564209, "learning_rate": 8.875755783439363e-07, "loss": 0.0276, "step": 298110 }, { "epoch": 3.1852128853037023, "grad_norm": 2.1752121448516846, "learning_rate": 8.875649637485998e-07, "loss": 0.0023, "step": 298120 }, { "epoch": 3.1853197286179817, "grad_norm": 4.8594841957092285, "learning_rate": 8.875543487156733e-07, "loss": 0.0322, "step": 298130 }, { "epoch": 3.185426571932261, "grad_norm": 0.02650556154549122, "learning_rate": 8.875437332451685e-07, "loss": 0.0049, "step": 298140 }, { "epoch": 3.185533415246541, "grad_norm": 0.028469745069742203, "learning_rate": 8.875331173370978e-07, "loss": 0.0045, "step": 298150 }, { "epoch": 3.1856402585608206, "grad_norm": 1.530532956123352, "learning_rate": 8.875225009914729e-07, "loss": 0.0152, "step": 298160 }, { "epoch": 3.1857471018751, "grad_norm": 6.387135982513428, "learning_rate": 8.875118842083058e-07, "loss": 0.0684, "step": 298170 }, { "epoch": 3.18585394518938, "grad_norm": 4.18467903137207, "learning_rate": 8.875012669876086e-07, "loss": 0.0035, "step": 298180 }, { "epoch": 3.1859607885036594, "grad_norm": 0.04222308099269867, "learning_rate": 8.874906493293932e-07, "loss": 0.0033, "step": 298190 }, { "epoch": 3.186067631817939, "grad_norm": 1.1947600841522217, "learning_rate": 8.874800312336715e-07, "loss": 0.0032, "step": 298200 }, { "epoch": 3.1861744751322187, "grad_norm": 0.019334258511662483, "learning_rate": 8.874694127004556e-07, "loss": 0.0019, "step": 298210 }, { "epoch": 3.186281318446498, "grad_norm": 0.4407312870025635, "learning_rate": 8.874587937297577e-07, "loss": 0.0121, "step": 298220 }, { "epoch": 3.1863881617607777, "grad_norm": 1.8994288444519043, "learning_rate": 8.874481743215893e-07, "loss": 0.0398, "step": 298230 }, { "epoch": 3.1864950050750576, "grad_norm": 0.010951842181384563, "learning_rate": 8.874375544759629e-07, "loss": 0.0078, "step": 298240 }, { "epoch": 3.186601848389337, "grad_norm": 0.017134038731455803, "learning_rate": 8.874269341928902e-07, "loss": 0.0181, "step": 298250 }, { "epoch": 3.1867086917036165, "grad_norm": 0.7576490640640259, "learning_rate": 8.874163134723833e-07, "loss": 0.0068, "step": 298260 }, { "epoch": 3.1868155350178964, "grad_norm": 0.01707497239112854, "learning_rate": 8.874056923144541e-07, "loss": 0.0083, "step": 298270 }, { "epoch": 3.186922378332176, "grad_norm": 7.55242919921875, "learning_rate": 8.873950707191144e-07, "loss": 0.0152, "step": 298280 }, { "epoch": 3.1870292216464553, "grad_norm": 0.03610995039343834, "learning_rate": 8.873844486863767e-07, "loss": 0.0067, "step": 298290 }, { "epoch": 3.187136064960735, "grad_norm": 1.8743926286697388, "learning_rate": 8.873738262162527e-07, "loss": 0.0081, "step": 298300 }, { "epoch": 3.1872429082750147, "grad_norm": 0.3029257655143738, "learning_rate": 8.873632033087543e-07, "loss": 0.0042, "step": 298310 }, { "epoch": 3.187349751589294, "grad_norm": 0.029813067987561226, "learning_rate": 8.873525799638937e-07, "loss": 0.0154, "step": 298320 }, { "epoch": 3.187456594903574, "grad_norm": 0.5645762085914612, "learning_rate": 8.873419561816829e-07, "loss": 0.0121, "step": 298330 }, { "epoch": 3.1875634382178535, "grad_norm": 0.0072067733854055405, "learning_rate": 8.873313319621336e-07, "loss": 0.0126, "step": 298340 }, { "epoch": 3.187670281532133, "grad_norm": 3.6300957202911377, "learning_rate": 8.873207073052582e-07, "loss": 0.0153, "step": 298350 }, { "epoch": 3.187777124846413, "grad_norm": 0.013788907788693905, "learning_rate": 8.873100822110683e-07, "loss": 0.0014, "step": 298360 }, { "epoch": 3.1878839681606923, "grad_norm": 0.18493469059467316, "learning_rate": 8.872994566795762e-07, "loss": 0.0033, "step": 298370 }, { "epoch": 3.1879908114749718, "grad_norm": 0.558851420879364, "learning_rate": 8.872888307107937e-07, "loss": 0.0211, "step": 298380 }, { "epoch": 3.1880976547892517, "grad_norm": 0.005675602238625288, "learning_rate": 8.872782043047329e-07, "loss": 0.0016, "step": 298390 }, { "epoch": 3.188204498103531, "grad_norm": 8.962231636047363, "learning_rate": 8.872675774614058e-07, "loss": 0.0167, "step": 298400 }, { "epoch": 3.1883113414178106, "grad_norm": 3.1770737171173096, "learning_rate": 8.872569501808242e-07, "loss": 0.0161, "step": 298410 }, { "epoch": 3.1884181847320905, "grad_norm": 0.6590239405632019, "learning_rate": 8.872463224630005e-07, "loss": 0.0108, "step": 298420 }, { "epoch": 3.18852502804637, "grad_norm": 0.5593117475509644, "learning_rate": 8.872356943079464e-07, "loss": 0.005, "step": 298430 }, { "epoch": 3.1886318713606494, "grad_norm": 6.318560600280762, "learning_rate": 8.872250657156742e-07, "loss": 0.0292, "step": 298440 }, { "epoch": 3.1887387146749293, "grad_norm": 0.009582744911313057, "learning_rate": 8.872144366861954e-07, "loss": 0.0038, "step": 298450 }, { "epoch": 3.1888455579892088, "grad_norm": 0.26544854044914246, "learning_rate": 8.872038072195222e-07, "loss": 0.0246, "step": 298460 }, { "epoch": 3.1889524013034882, "grad_norm": 1.7086313962936401, "learning_rate": 8.871931773156669e-07, "loss": 0.0218, "step": 298470 }, { "epoch": 3.189059244617768, "grad_norm": 3.946469783782959, "learning_rate": 8.871825469746413e-07, "loss": 0.0257, "step": 298480 }, { "epoch": 3.1891660879320476, "grad_norm": 0.022574229165911674, "learning_rate": 8.871719161964573e-07, "loss": 0.0038, "step": 298490 }, { "epoch": 3.189272931246327, "grad_norm": 4.980598449707031, "learning_rate": 8.871612849811268e-07, "loss": 0.0323, "step": 298500 }, { "epoch": 3.189379774560607, "grad_norm": 2.92386531829834, "learning_rate": 8.871506533286621e-07, "loss": 0.0067, "step": 298510 }, { "epoch": 3.1894866178748864, "grad_norm": 0.005334442015737295, "learning_rate": 8.871400212390753e-07, "loss": 0.0079, "step": 298520 }, { "epoch": 3.1895934611891663, "grad_norm": 0.01545634027570486, "learning_rate": 8.87129388712378e-07, "loss": 0.0254, "step": 298530 }, { "epoch": 3.1897003045034458, "grad_norm": 0.03697977215051651, "learning_rate": 8.871187557485824e-07, "loss": 0.0061, "step": 298540 }, { "epoch": 3.1898071478177252, "grad_norm": 0.0069575123488903046, "learning_rate": 8.871081223477006e-07, "loss": 0.0036, "step": 298550 }, { "epoch": 3.1899139911320047, "grad_norm": 0.709297776222229, "learning_rate": 8.870974885097446e-07, "loss": 0.0114, "step": 298560 }, { "epoch": 3.1900208344462846, "grad_norm": 0.34363624453544617, "learning_rate": 8.870868542347261e-07, "loss": 0.0027, "step": 298570 }, { "epoch": 3.190127677760564, "grad_norm": 2.3051230907440186, "learning_rate": 8.870762195226573e-07, "loss": 0.0165, "step": 298580 }, { "epoch": 3.190234521074844, "grad_norm": 9.07102108001709, "learning_rate": 8.870655843735504e-07, "loss": 0.0132, "step": 298590 }, { "epoch": 3.1903413643891234, "grad_norm": 1.7927312850952148, "learning_rate": 8.870549487874173e-07, "loss": 0.0134, "step": 298600 }, { "epoch": 3.190448207703403, "grad_norm": 0.10501141101121902, "learning_rate": 8.870443127642697e-07, "loss": 0.0088, "step": 298610 }, { "epoch": 3.1905550510176828, "grad_norm": 3.452136516571045, "learning_rate": 8.870336763041201e-07, "loss": 0.0137, "step": 298620 }, { "epoch": 3.1906618943319622, "grad_norm": 0.0970567837357521, "learning_rate": 8.870230394069802e-07, "loss": 0.0062, "step": 298630 }, { "epoch": 3.1907687376462417, "grad_norm": 0.10027281939983368, "learning_rate": 8.87012402072862e-07, "loss": 0.0452, "step": 298640 }, { "epoch": 3.1908755809605216, "grad_norm": 13.798942565917969, "learning_rate": 8.870017643017776e-07, "loss": 0.0235, "step": 298650 }, { "epoch": 3.190982424274801, "grad_norm": 0.011585607193410397, "learning_rate": 8.869911260937391e-07, "loss": 0.0145, "step": 298660 }, { "epoch": 3.1910892675890805, "grad_norm": 4.064939498901367, "learning_rate": 8.869804874487584e-07, "loss": 0.0164, "step": 298670 }, { "epoch": 3.1911961109033604, "grad_norm": 0.8405415415763855, "learning_rate": 8.869698483668475e-07, "loss": 0.0142, "step": 298680 }, { "epoch": 3.19130295421764, "grad_norm": 0.07069191336631775, "learning_rate": 8.869592088480183e-07, "loss": 0.0036, "step": 298690 }, { "epoch": 3.1914097975319193, "grad_norm": 0.06002272292971611, "learning_rate": 8.869485688922831e-07, "loss": 0.0013, "step": 298700 }, { "epoch": 3.1915166408461992, "grad_norm": 0.5604901909828186, "learning_rate": 8.869379284996537e-07, "loss": 0.005, "step": 298710 }, { "epoch": 3.1916234841604787, "grad_norm": 0.9130943417549133, "learning_rate": 8.869272876701422e-07, "loss": 0.0407, "step": 298720 }, { "epoch": 3.191730327474758, "grad_norm": 2.7710351943969727, "learning_rate": 8.869166464037606e-07, "loss": 0.008, "step": 298730 }, { "epoch": 3.191837170789038, "grad_norm": 3.2621214389801025, "learning_rate": 8.869060047005208e-07, "loss": 0.0121, "step": 298740 }, { "epoch": 3.1919440141033175, "grad_norm": 3.9156455993652344, "learning_rate": 8.86895362560435e-07, "loss": 0.0214, "step": 298750 }, { "epoch": 3.192050857417597, "grad_norm": 3.319204092025757, "learning_rate": 8.868847199835152e-07, "loss": 0.0089, "step": 298760 }, { "epoch": 3.192157700731877, "grad_norm": 0.005056839436292648, "learning_rate": 8.868740769697733e-07, "loss": 0.0337, "step": 298770 }, { "epoch": 3.1922645440461563, "grad_norm": 7.380889415740967, "learning_rate": 8.868634335192213e-07, "loss": 0.0267, "step": 298780 }, { "epoch": 3.192371387360436, "grad_norm": 3.7538838386535645, "learning_rate": 8.868527896318714e-07, "loss": 0.0079, "step": 298790 }, { "epoch": 3.1924782306747157, "grad_norm": 0.012605990283191204, "learning_rate": 8.868421453077355e-07, "loss": 0.036, "step": 298800 }, { "epoch": 3.192585073988995, "grad_norm": 3.5261974334716797, "learning_rate": 8.868315005468256e-07, "loss": 0.0052, "step": 298810 }, { "epoch": 3.1926919173032746, "grad_norm": 0.12322310358285904, "learning_rate": 8.868208553491536e-07, "loss": 0.037, "step": 298820 }, { "epoch": 3.1927987606175545, "grad_norm": 0.012291469611227512, "learning_rate": 8.868102097147319e-07, "loss": 0.0296, "step": 298830 }, { "epoch": 3.192905603931834, "grad_norm": 5.221111297607422, "learning_rate": 8.867995636435721e-07, "loss": 0.0551, "step": 298840 }, { "epoch": 3.1930124472461134, "grad_norm": 0.33640480041503906, "learning_rate": 8.867889171356864e-07, "loss": 0.0099, "step": 298850 }, { "epoch": 3.1931192905603933, "grad_norm": 0.517480731010437, "learning_rate": 8.867782701910869e-07, "loss": 0.0167, "step": 298860 }, { "epoch": 3.193226133874673, "grad_norm": 7.01088285446167, "learning_rate": 8.867676228097855e-07, "loss": 0.035, "step": 298870 }, { "epoch": 3.1933329771889523, "grad_norm": 0.08705668896436691, "learning_rate": 8.867569749917943e-07, "loss": 0.016, "step": 298880 }, { "epoch": 3.193439820503232, "grad_norm": 1.256104588508606, "learning_rate": 8.867463267371254e-07, "loss": 0.0025, "step": 298890 }, { "epoch": 3.1935466638175116, "grad_norm": 6.91632080078125, "learning_rate": 8.867356780457904e-07, "loss": 0.0114, "step": 298900 }, { "epoch": 3.193653507131791, "grad_norm": 0.045102111995220184, "learning_rate": 8.867250289178019e-07, "loss": 0.0246, "step": 298910 }, { "epoch": 3.193760350446071, "grad_norm": 0.678135097026825, "learning_rate": 8.867143793531715e-07, "loss": 0.0214, "step": 298920 }, { "epoch": 3.1938671937603504, "grad_norm": 0.06447245925664902, "learning_rate": 8.867037293519114e-07, "loss": 0.0123, "step": 298930 }, { "epoch": 3.19397403707463, "grad_norm": 1.5716426372528076, "learning_rate": 8.866930789140337e-07, "loss": 0.0244, "step": 298940 }, { "epoch": 3.19408088038891, "grad_norm": 0.22284410893917084, "learning_rate": 8.866824280395502e-07, "loss": 0.0087, "step": 298950 }, { "epoch": 3.1941877237031893, "grad_norm": 0.2165631800889969, "learning_rate": 8.866717767284731e-07, "loss": 0.0031, "step": 298960 }, { "epoch": 3.1942945670174687, "grad_norm": 0.035008784383535385, "learning_rate": 8.866611249808144e-07, "loss": 0.0126, "step": 298970 }, { "epoch": 3.1944014103317486, "grad_norm": 3.1491000652313232, "learning_rate": 8.866504727965862e-07, "loss": 0.0161, "step": 298980 }, { "epoch": 3.194508253646028, "grad_norm": 1.1459310054779053, "learning_rate": 8.866398201758004e-07, "loss": 0.068, "step": 298990 }, { "epoch": 3.1946150969603075, "grad_norm": 2.6755828857421875, "learning_rate": 8.866291671184689e-07, "loss": 0.0042, "step": 299000 }, { "epoch": 3.1947219402745874, "grad_norm": 0.047344546765089035, "learning_rate": 8.86618513624604e-07, "loss": 0.0558, "step": 299010 }, { "epoch": 3.194828783588867, "grad_norm": 6.333035945892334, "learning_rate": 8.866078596942177e-07, "loss": 0.0255, "step": 299020 }, { "epoch": 3.1949356269031464, "grad_norm": 0.005615685600787401, "learning_rate": 8.865972053273218e-07, "loss": 0.0116, "step": 299030 }, { "epoch": 3.1950424702174263, "grad_norm": 0.08981719613075256, "learning_rate": 8.865865505239284e-07, "loss": 0.0037, "step": 299040 }, { "epoch": 3.1951493135317057, "grad_norm": 0.013366466388106346, "learning_rate": 8.865758952840499e-07, "loss": 0.0073, "step": 299050 }, { "epoch": 3.195256156845985, "grad_norm": 0.011394876055419445, "learning_rate": 8.865652396076979e-07, "loss": 0.0112, "step": 299060 }, { "epoch": 3.195363000160265, "grad_norm": 0.4098605811595917, "learning_rate": 8.865545834948844e-07, "loss": 0.0017, "step": 299070 }, { "epoch": 3.1954698434745445, "grad_norm": 28.996519088745117, "learning_rate": 8.865439269456219e-07, "loss": 0.0107, "step": 299080 }, { "epoch": 3.195576686788824, "grad_norm": 0.07163488119840622, "learning_rate": 8.86533269959922e-07, "loss": 0.0082, "step": 299090 }, { "epoch": 3.195683530103104, "grad_norm": 1.640834093093872, "learning_rate": 8.865226125377969e-07, "loss": 0.0021, "step": 299100 }, { "epoch": 3.1957903734173834, "grad_norm": 0.9561014175415039, "learning_rate": 8.865119546792584e-07, "loss": 0.0046, "step": 299110 }, { "epoch": 3.195897216731663, "grad_norm": 2.673654794692993, "learning_rate": 8.86501296384319e-07, "loss": 0.0056, "step": 299120 }, { "epoch": 3.1960040600459427, "grad_norm": 2.4226181507110596, "learning_rate": 8.864906376529903e-07, "loss": 0.0066, "step": 299130 }, { "epoch": 3.196110903360222, "grad_norm": 0.048104189336299896, "learning_rate": 8.864799784852846e-07, "loss": 0.026, "step": 299140 }, { "epoch": 3.1962177466745016, "grad_norm": 0.0024550436064600945, "learning_rate": 8.864693188812138e-07, "loss": 0.0388, "step": 299150 }, { "epoch": 3.1963245899887816, "grad_norm": 3.3342673778533936, "learning_rate": 8.864586588407899e-07, "loss": 0.0408, "step": 299160 }, { "epoch": 3.196431433303061, "grad_norm": 0.21243345737457275, "learning_rate": 8.86447998364025e-07, "loss": 0.0067, "step": 299170 }, { "epoch": 3.1965382766173405, "grad_norm": 0.08030597865581512, "learning_rate": 8.864373374509313e-07, "loss": 0.0017, "step": 299180 }, { "epoch": 3.1966451199316204, "grad_norm": 2.0880041122436523, "learning_rate": 8.864266761015206e-07, "loss": 0.0195, "step": 299190 }, { "epoch": 3.1967519632459, "grad_norm": 0.5595965385437012, "learning_rate": 8.864160143158051e-07, "loss": 0.0052, "step": 299200 }, { "epoch": 3.1968588065601793, "grad_norm": 0.21287238597869873, "learning_rate": 8.864053520937965e-07, "loss": 0.0572, "step": 299210 }, { "epoch": 3.196965649874459, "grad_norm": 6.727243900299072, "learning_rate": 8.863946894355072e-07, "loss": 0.0519, "step": 299220 }, { "epoch": 3.1970724931887387, "grad_norm": 0.02117636799812317, "learning_rate": 8.863840263409493e-07, "loss": 0.0125, "step": 299230 }, { "epoch": 3.1971793365030186, "grad_norm": 7.304812431335449, "learning_rate": 8.863733628101345e-07, "loss": 0.0085, "step": 299240 }, { "epoch": 3.197286179817298, "grad_norm": 0.7602930068969727, "learning_rate": 8.863626988430751e-07, "loss": 0.0726, "step": 299250 }, { "epoch": 3.1973930231315775, "grad_norm": 0.478510320186615, "learning_rate": 8.863520344397831e-07, "loss": 0.0166, "step": 299260 }, { "epoch": 3.197499866445857, "grad_norm": 0.009973565116524696, "learning_rate": 8.863413696002704e-07, "loss": 0.0118, "step": 299270 }, { "epoch": 3.197606709760137, "grad_norm": 2.9832489490509033, "learning_rate": 8.863307043245491e-07, "loss": 0.0024, "step": 299280 }, { "epoch": 3.1977135530744163, "grad_norm": 2.000671148300171, "learning_rate": 8.863200386126312e-07, "loss": 0.0187, "step": 299290 }, { "epoch": 3.197820396388696, "grad_norm": 0.42058226466178894, "learning_rate": 8.863093724645291e-07, "loss": 0.01, "step": 299300 }, { "epoch": 3.1979272397029757, "grad_norm": 0.01644168235361576, "learning_rate": 8.862987058802545e-07, "loss": 0.0095, "step": 299310 }, { "epoch": 3.198034083017255, "grad_norm": 3.9113378524780273, "learning_rate": 8.862880388598192e-07, "loss": 0.0167, "step": 299320 }, { "epoch": 3.1981409263315346, "grad_norm": 0.07331540435552597, "learning_rate": 8.862773714032359e-07, "loss": 0.0229, "step": 299330 }, { "epoch": 3.1982477696458145, "grad_norm": 0.37569767236709595, "learning_rate": 8.862667035105161e-07, "loss": 0.0299, "step": 299340 }, { "epoch": 3.198354612960094, "grad_norm": 2.788985013961792, "learning_rate": 8.862560351816721e-07, "loss": 0.0052, "step": 299350 }, { "epoch": 3.198461456274374, "grad_norm": 0.13004066050052643, "learning_rate": 8.86245366416716e-07, "loss": 0.0064, "step": 299360 }, { "epoch": 3.1985682995886533, "grad_norm": 0.02577783912420273, "learning_rate": 8.862346972156596e-07, "loss": 0.0281, "step": 299370 }, { "epoch": 3.1986751429029328, "grad_norm": 12.2827730178833, "learning_rate": 8.862240275785151e-07, "loss": 0.0331, "step": 299380 }, { "epoch": 3.1987819862172127, "grad_norm": 1.4500771760940552, "learning_rate": 8.862133575052945e-07, "loss": 0.0152, "step": 299390 }, { "epoch": 3.198888829531492, "grad_norm": 3.623002767562866, "learning_rate": 8.862026869960097e-07, "loss": 0.0376, "step": 299400 }, { "epoch": 3.1989956728457716, "grad_norm": 0.6766688823699951, "learning_rate": 8.861920160506732e-07, "loss": 0.0037, "step": 299410 }, { "epoch": 3.1991025161600515, "grad_norm": 5.132050037384033, "learning_rate": 8.861813446692967e-07, "loss": 0.0045, "step": 299420 }, { "epoch": 3.199209359474331, "grad_norm": 0.006208642385900021, "learning_rate": 8.861706728518923e-07, "loss": 0.0182, "step": 299430 }, { "epoch": 3.1993162027886104, "grad_norm": 0.4532202184200287, "learning_rate": 8.861600005984719e-07, "loss": 0.0013, "step": 299440 }, { "epoch": 3.1994230461028903, "grad_norm": 5.028118133544922, "learning_rate": 8.861493279090479e-07, "loss": 0.0153, "step": 299450 }, { "epoch": 3.1995298894171698, "grad_norm": 0.4941104054450989, "learning_rate": 8.861386547836322e-07, "loss": 0.0421, "step": 299460 }, { "epoch": 3.199636732731449, "grad_norm": 0.09852264076471329, "learning_rate": 8.861279812222368e-07, "loss": 0.0053, "step": 299470 }, { "epoch": 3.199743576045729, "grad_norm": 0.5214390754699707, "learning_rate": 8.861173072248738e-07, "loss": 0.0342, "step": 299480 }, { "epoch": 3.1998504193600086, "grad_norm": 0.025048881769180298, "learning_rate": 8.86106632791555e-07, "loss": 0.0528, "step": 299490 }, { "epoch": 3.199957262674288, "grad_norm": 0.28085219860076904, "learning_rate": 8.860959579222929e-07, "loss": 0.0211, "step": 299500 }, { "epoch": 3.200064105988568, "grad_norm": 0.10351713746786118, "learning_rate": 8.860852826170991e-07, "loss": 0.0211, "step": 299510 }, { "epoch": 3.2001709493028474, "grad_norm": 0.892869770526886, "learning_rate": 8.860746068759861e-07, "loss": 0.0052, "step": 299520 }, { "epoch": 3.200277792617127, "grad_norm": 0.01359325647354126, "learning_rate": 8.860639306989657e-07, "loss": 0.0175, "step": 299530 }, { "epoch": 3.2003846359314068, "grad_norm": 0.0023011271841824055, "learning_rate": 8.860532540860498e-07, "loss": 0.01, "step": 299540 }, { "epoch": 3.200491479245686, "grad_norm": 0.3484875559806824, "learning_rate": 8.860425770372509e-07, "loss": 0.0132, "step": 299550 }, { "epoch": 3.2005983225599657, "grad_norm": 0.10169333219528198, "learning_rate": 8.860318995525805e-07, "loss": 0.015, "step": 299560 }, { "epoch": 3.2007051658742456, "grad_norm": 0.4626605212688446, "learning_rate": 8.860212216320511e-07, "loss": 0.0174, "step": 299570 }, { "epoch": 3.200812009188525, "grad_norm": 0.39375296235084534, "learning_rate": 8.860105432756746e-07, "loss": 0.0382, "step": 299580 }, { "epoch": 3.2009188525028045, "grad_norm": 3.828537940979004, "learning_rate": 8.859998644834631e-07, "loss": 0.0147, "step": 299590 }, { "epoch": 3.2010256958170844, "grad_norm": 1.4351751804351807, "learning_rate": 8.859891852554286e-07, "loss": 0.0098, "step": 299600 }, { "epoch": 3.201132539131364, "grad_norm": 1.5848488807678223, "learning_rate": 8.859785055915832e-07, "loss": 0.01, "step": 299610 }, { "epoch": 3.2012393824456433, "grad_norm": 0.2946617603302002, "learning_rate": 8.85967825491939e-07, "loss": 0.0072, "step": 299620 }, { "epoch": 3.2013462257599232, "grad_norm": 3.606551170349121, "learning_rate": 8.859571449565078e-07, "loss": 0.0172, "step": 299630 }, { "epoch": 3.2014530690742027, "grad_norm": 10.1597318649292, "learning_rate": 8.859464639853019e-07, "loss": 0.0119, "step": 299640 }, { "epoch": 3.201559912388482, "grad_norm": 0.6957383155822754, "learning_rate": 8.859357825783333e-07, "loss": 0.0068, "step": 299650 }, { "epoch": 3.201666755702762, "grad_norm": 0.001786920242011547, "learning_rate": 8.859251007356143e-07, "loss": 0.0299, "step": 299660 }, { "epoch": 3.2017735990170415, "grad_norm": 3.1608729362487793, "learning_rate": 8.859144184571566e-07, "loss": 0.0501, "step": 299670 }, { "epoch": 3.201880442331321, "grad_norm": 0.05862145870923996, "learning_rate": 8.859037357429723e-07, "loss": 0.0095, "step": 299680 }, { "epoch": 3.201987285645601, "grad_norm": 0.7499125599861145, "learning_rate": 8.858930525930737e-07, "loss": 0.0057, "step": 299690 }, { "epoch": 3.2020941289598803, "grad_norm": 0.02814139425754547, "learning_rate": 8.858823690074726e-07, "loss": 0.0138, "step": 299700 }, { "epoch": 3.20220097227416, "grad_norm": 0.0747821256518364, "learning_rate": 8.858716849861811e-07, "loss": 0.0097, "step": 299710 }, { "epoch": 3.2023078155884397, "grad_norm": 0.06050274148583412, "learning_rate": 8.858610005292115e-07, "loss": 0.004, "step": 299720 }, { "epoch": 3.202414658902719, "grad_norm": 0.0063382298685610294, "learning_rate": 8.858503156365758e-07, "loss": 0.0235, "step": 299730 }, { "epoch": 3.2025215022169986, "grad_norm": 0.03596946969628334, "learning_rate": 8.858396303082858e-07, "loss": 0.0214, "step": 299740 }, { "epoch": 3.2026283455312785, "grad_norm": 0.3214932680130005, "learning_rate": 8.858289445443539e-07, "loss": 0.0037, "step": 299750 }, { "epoch": 3.202735188845558, "grad_norm": 0.014710353687405586, "learning_rate": 8.858182583447918e-07, "loss": 0.0071, "step": 299760 }, { "epoch": 3.2028420321598374, "grad_norm": 0.14128397405147552, "learning_rate": 8.858075717096118e-07, "loss": 0.0011, "step": 299770 }, { "epoch": 3.2029488754741173, "grad_norm": 0.0026470141019672155, "learning_rate": 8.85796884638826e-07, "loss": 0.0156, "step": 299780 }, { "epoch": 3.203055718788397, "grad_norm": 0.5751643776893616, "learning_rate": 8.857861971324465e-07, "loss": 0.0112, "step": 299790 }, { "epoch": 3.2031625621026762, "grad_norm": 0.047217827290296555, "learning_rate": 8.857755091904851e-07, "loss": 0.0116, "step": 299800 }, { "epoch": 3.203269405416956, "grad_norm": 0.7608290910720825, "learning_rate": 8.85764820812954e-07, "loss": 0.0079, "step": 299810 }, { "epoch": 3.2033762487312356, "grad_norm": 0.8614196181297302, "learning_rate": 8.857541319998654e-07, "loss": 0.0091, "step": 299820 }, { "epoch": 3.203483092045515, "grad_norm": 2.6244089603424072, "learning_rate": 8.857434427512314e-07, "loss": 0.0128, "step": 299830 }, { "epoch": 3.203589935359795, "grad_norm": 4.331000328063965, "learning_rate": 8.857327530670638e-07, "loss": 0.0725, "step": 299840 }, { "epoch": 3.2036967786740744, "grad_norm": 1.7738429307937622, "learning_rate": 8.857220629473747e-07, "loss": 0.0035, "step": 299850 }, { "epoch": 3.203803621988354, "grad_norm": 0.3048139810562134, "learning_rate": 8.857113723921764e-07, "loss": 0.0032, "step": 299860 }, { "epoch": 3.203910465302634, "grad_norm": 4.505713939666748, "learning_rate": 8.857006814014809e-07, "loss": 0.0296, "step": 299870 }, { "epoch": 3.2040173086169133, "grad_norm": 0.09937207400798798, "learning_rate": 8.856899899753e-07, "loss": 0.0134, "step": 299880 }, { "epoch": 3.2041241519311927, "grad_norm": 0.1204710379242897, "learning_rate": 8.856792981136463e-07, "loss": 0.0275, "step": 299890 }, { "epoch": 3.2042309952454726, "grad_norm": 4.164456844329834, "learning_rate": 8.856686058165315e-07, "loss": 0.0359, "step": 299900 }, { "epoch": 3.204337838559752, "grad_norm": 4.749305248260498, "learning_rate": 8.856579130839676e-07, "loss": 0.014, "step": 299910 }, { "epoch": 3.2044446818740315, "grad_norm": 0.010691250674426556, "learning_rate": 8.856472199159668e-07, "loss": 0.0016, "step": 299920 }, { "epoch": 3.2045515251883114, "grad_norm": 0.4487859308719635, "learning_rate": 8.856365263125414e-07, "loss": 0.0315, "step": 299930 }, { "epoch": 3.204658368502591, "grad_norm": 0.25799980759620667, "learning_rate": 8.856258322737031e-07, "loss": 0.0143, "step": 299940 }, { "epoch": 3.2047652118168704, "grad_norm": 0.4674554169178009, "learning_rate": 8.856151377994641e-07, "loss": 0.0162, "step": 299950 }, { "epoch": 3.2048720551311503, "grad_norm": 0.05531914159655571, "learning_rate": 8.856044428898364e-07, "loss": 0.0098, "step": 299960 }, { "epoch": 3.2049788984454297, "grad_norm": 3.518676280975342, "learning_rate": 8.855937475448324e-07, "loss": 0.0112, "step": 299970 }, { "epoch": 3.205085741759709, "grad_norm": 0.7353549003601074, "learning_rate": 8.855830517644639e-07, "loss": 0.0147, "step": 299980 }, { "epoch": 3.205192585073989, "grad_norm": 2.9134654998779297, "learning_rate": 8.855723555487431e-07, "loss": 0.0153, "step": 299990 }, { "epoch": 3.2052994283882685, "grad_norm": 1.612684726715088, "learning_rate": 8.855616588976819e-07, "loss": 0.0039, "step": 300000 }, { "epoch": 3.2054062717025484, "grad_norm": 0.011241618543863297, "learning_rate": 8.855509618112924e-07, "loss": 0.0267, "step": 300010 }, { "epoch": 3.205513115016828, "grad_norm": 2.219163417816162, "learning_rate": 8.855402642895869e-07, "loss": 0.0033, "step": 300020 }, { "epoch": 3.2056199583311074, "grad_norm": 0.5304633378982544, "learning_rate": 8.855295663325772e-07, "loss": 0.0077, "step": 300030 }, { "epoch": 3.205726801645387, "grad_norm": 7.341452121734619, "learning_rate": 8.855188679402758e-07, "loss": 0.0089, "step": 300040 }, { "epoch": 3.2058336449596667, "grad_norm": 0.017929092049598694, "learning_rate": 8.855081691126944e-07, "loss": 0.0366, "step": 300050 }, { "epoch": 3.205940488273946, "grad_norm": 4.024376392364502, "learning_rate": 8.85497469849845e-07, "loss": 0.0066, "step": 300060 }, { "epoch": 3.206047331588226, "grad_norm": 4.762130260467529, "learning_rate": 8.8548677015174e-07, "loss": 0.0137, "step": 300070 }, { "epoch": 3.2061541749025055, "grad_norm": 0.013839605264365673, "learning_rate": 8.854760700183914e-07, "loss": 0.0249, "step": 300080 }, { "epoch": 3.206261018216785, "grad_norm": 0.0012657620245590806, "learning_rate": 8.854653694498111e-07, "loss": 0.009, "step": 300090 }, { "epoch": 3.206367861531065, "grad_norm": 0.008105024695396423, "learning_rate": 8.854546684460114e-07, "loss": 0.0087, "step": 300100 }, { "epoch": 3.2064747048453444, "grad_norm": 0.26150715351104736, "learning_rate": 8.854439670070042e-07, "loss": 0.0101, "step": 300110 }, { "epoch": 3.206581548159624, "grad_norm": 0.039730917662382126, "learning_rate": 8.854332651328018e-07, "loss": 0.0133, "step": 300120 }, { "epoch": 3.2066883914739037, "grad_norm": 0.7024260759353638, "learning_rate": 8.85422562823416e-07, "loss": 0.0478, "step": 300130 }, { "epoch": 3.206795234788183, "grad_norm": 0.005768665578216314, "learning_rate": 8.854118600788591e-07, "loss": 0.0008, "step": 300140 }, { "epoch": 3.2069020781024626, "grad_norm": 0.06729079782962799, "learning_rate": 8.854011568991431e-07, "loss": 0.0093, "step": 300150 }, { "epoch": 3.2070089214167425, "grad_norm": 0.015293668955564499, "learning_rate": 8.853904532842802e-07, "loss": 0.0153, "step": 300160 }, { "epoch": 3.207115764731022, "grad_norm": 0.10209428519010544, "learning_rate": 8.853797492342823e-07, "loss": 0.0333, "step": 300170 }, { "epoch": 3.2072226080453015, "grad_norm": 4.471214294433594, "learning_rate": 8.853690447491615e-07, "loss": 0.015, "step": 300180 }, { "epoch": 3.2073294513595814, "grad_norm": 0.014747442677617073, "learning_rate": 8.853583398289302e-07, "loss": 0.0212, "step": 300190 }, { "epoch": 3.207436294673861, "grad_norm": 0.0019416534341871738, "learning_rate": 8.853476344736001e-07, "loss": 0.0136, "step": 300200 }, { "epoch": 3.2075431379881403, "grad_norm": 2.019078254699707, "learning_rate": 8.853369286831835e-07, "loss": 0.016, "step": 300210 }, { "epoch": 3.20764998130242, "grad_norm": 0.0047736456617712975, "learning_rate": 8.853262224576923e-07, "loss": 0.02, "step": 300220 }, { "epoch": 3.2077568246166996, "grad_norm": 0.034761372953653336, "learning_rate": 8.853155157971388e-07, "loss": 0.0018, "step": 300230 }, { "epoch": 3.207863667930979, "grad_norm": 2.218235731124878, "learning_rate": 8.85304808701535e-07, "loss": 0.0069, "step": 300240 }, { "epoch": 3.207970511245259, "grad_norm": 0.0015940711600705981, "learning_rate": 8.852941011708929e-07, "loss": 0.0068, "step": 300250 }, { "epoch": 3.2080773545595385, "grad_norm": 2.7334518432617188, "learning_rate": 8.852833932052248e-07, "loss": 0.0145, "step": 300260 }, { "epoch": 3.208184197873818, "grad_norm": 0.02837267890572548, "learning_rate": 8.852726848045426e-07, "loss": 0.006, "step": 300270 }, { "epoch": 3.208291041188098, "grad_norm": 0.07695135474205017, "learning_rate": 8.852619759688587e-07, "loss": 0.067, "step": 300280 }, { "epoch": 3.2083978845023773, "grad_norm": 1.409945011138916, "learning_rate": 8.852512666981846e-07, "loss": 0.0107, "step": 300290 }, { "epoch": 3.2085047278166567, "grad_norm": 0.007631469517946243, "learning_rate": 8.852405569925329e-07, "loss": 0.0331, "step": 300300 }, { "epoch": 3.2086115711309366, "grad_norm": 0.024862362071871758, "learning_rate": 8.852298468519155e-07, "loss": 0.0091, "step": 300310 }, { "epoch": 3.208718414445216, "grad_norm": 0.7754925489425659, "learning_rate": 8.852191362763446e-07, "loss": 0.0167, "step": 300320 }, { "epoch": 3.2088252577594956, "grad_norm": 0.12075917422771454, "learning_rate": 8.852084252658322e-07, "loss": 0.0133, "step": 300330 }, { "epoch": 3.2089321010737755, "grad_norm": 0.44822001457214355, "learning_rate": 8.851977138203903e-07, "loss": 0.0162, "step": 300340 }, { "epoch": 3.209038944388055, "grad_norm": 0.21204465627670288, "learning_rate": 8.851870019400313e-07, "loss": 0.0113, "step": 300350 }, { "epoch": 3.2091457877023344, "grad_norm": 0.18884141743183136, "learning_rate": 8.85176289624767e-07, "loss": 0.0073, "step": 300360 }, { "epoch": 3.2092526310166143, "grad_norm": 0.029511021450161934, "learning_rate": 8.851655768746096e-07, "loss": 0.0109, "step": 300370 }, { "epoch": 3.2093594743308937, "grad_norm": 0.6491981744766235, "learning_rate": 8.851548636895712e-07, "loss": 0.0058, "step": 300380 }, { "epoch": 3.209466317645173, "grad_norm": 6.114368438720703, "learning_rate": 8.851441500696639e-07, "loss": 0.0122, "step": 300390 }, { "epoch": 3.209573160959453, "grad_norm": 1.7594612836837769, "learning_rate": 8.851334360148998e-07, "loss": 0.005, "step": 300400 }, { "epoch": 3.2096800042737326, "grad_norm": 11.622239112854004, "learning_rate": 8.851227215252909e-07, "loss": 0.0165, "step": 300410 }, { "epoch": 3.209786847588012, "grad_norm": 11.028026580810547, "learning_rate": 8.851120066008495e-07, "loss": 0.017, "step": 300420 }, { "epoch": 3.209893690902292, "grad_norm": 2.505115270614624, "learning_rate": 8.851012912415874e-07, "loss": 0.0035, "step": 300430 }, { "epoch": 3.2100005342165714, "grad_norm": 0.04895031079649925, "learning_rate": 8.850905754475171e-07, "loss": 0.0052, "step": 300440 }, { "epoch": 3.210107377530851, "grad_norm": 0.04327540472149849, "learning_rate": 8.850798592186503e-07, "loss": 0.0113, "step": 300450 }, { "epoch": 3.2102142208451308, "grad_norm": 0.010878254659473896, "learning_rate": 8.850691425549995e-07, "loss": 0.0057, "step": 300460 }, { "epoch": 3.21032106415941, "grad_norm": 0.03677501901984215, "learning_rate": 8.850584254565762e-07, "loss": 0.0034, "step": 300470 }, { "epoch": 3.2104279074736897, "grad_norm": 3.6731338500976562, "learning_rate": 8.850477079233931e-07, "loss": 0.0428, "step": 300480 }, { "epoch": 3.2105347507879696, "grad_norm": 0.016879551112651825, "learning_rate": 8.850369899554622e-07, "loss": 0.008, "step": 300490 }, { "epoch": 3.210641594102249, "grad_norm": 2.2524642944335938, "learning_rate": 8.850262715527953e-07, "loss": 0.0023, "step": 300500 }, { "epoch": 3.2107484374165285, "grad_norm": 0.0011225523194298148, "learning_rate": 8.850155527154047e-07, "loss": 0.0051, "step": 300510 }, { "epoch": 3.2108552807308084, "grad_norm": 6.431787014007568, "learning_rate": 8.850048334433026e-07, "loss": 0.0622, "step": 300520 }, { "epoch": 3.210962124045088, "grad_norm": 15.365324974060059, "learning_rate": 8.849941137365009e-07, "loss": 0.0109, "step": 300530 }, { "epoch": 3.2110689673593673, "grad_norm": 0.007479738909751177, "learning_rate": 8.849833935950117e-07, "loss": 0.0126, "step": 300540 }, { "epoch": 3.211175810673647, "grad_norm": 0.10431574285030365, "learning_rate": 8.849726730188474e-07, "loss": 0.0076, "step": 300550 }, { "epoch": 3.2112826539879267, "grad_norm": 0.002965611172839999, "learning_rate": 8.849619520080198e-07, "loss": 0.0113, "step": 300560 }, { "epoch": 3.211389497302206, "grad_norm": 0.6427611708641052, "learning_rate": 8.849512305625411e-07, "loss": 0.0176, "step": 300570 }, { "epoch": 3.211496340616486, "grad_norm": 0.08026816695928574, "learning_rate": 8.849405086824233e-07, "loss": 0.0136, "step": 300580 }, { "epoch": 3.2116031839307655, "grad_norm": 0.09647738188505173, "learning_rate": 8.849297863676787e-07, "loss": 0.0117, "step": 300590 }, { "epoch": 3.211710027245045, "grad_norm": 0.04995892196893692, "learning_rate": 8.849190636183192e-07, "loss": 0.0093, "step": 300600 }, { "epoch": 3.211816870559325, "grad_norm": 0.025392884388566017, "learning_rate": 8.849083404343573e-07, "loss": 0.002, "step": 300610 }, { "epoch": 3.2119237138736043, "grad_norm": 0.001366319484077394, "learning_rate": 8.848976168158047e-07, "loss": 0.0288, "step": 300620 }, { "epoch": 3.2120305571878838, "grad_norm": 0.02556002512574196, "learning_rate": 8.848868927626736e-07, "loss": 0.0169, "step": 300630 }, { "epoch": 3.2121374005021637, "grad_norm": 0.005174416117370129, "learning_rate": 8.848761682749762e-07, "loss": 0.0115, "step": 300640 }, { "epoch": 3.212244243816443, "grad_norm": 5.928799152374268, "learning_rate": 8.848654433527245e-07, "loss": 0.0109, "step": 300650 }, { "epoch": 3.2123510871307226, "grad_norm": 0.06668820977210999, "learning_rate": 8.848547179959307e-07, "loss": 0.0061, "step": 300660 }, { "epoch": 3.2124579304450025, "grad_norm": 0.5992546677589417, "learning_rate": 8.848439922046068e-07, "loss": 0.0094, "step": 300670 }, { "epoch": 3.212564773759282, "grad_norm": 0.07112009823322296, "learning_rate": 8.84833265978765e-07, "loss": 0.006, "step": 300680 }, { "epoch": 3.2126716170735614, "grad_norm": 0.0009989157551899552, "learning_rate": 8.848225393184176e-07, "loss": 0.007, "step": 300690 }, { "epoch": 3.2127784603878413, "grad_norm": 0.9130605459213257, "learning_rate": 8.848118122235764e-07, "loss": 0.0143, "step": 300700 }, { "epoch": 3.2128853037021208, "grad_norm": 3.7762720584869385, "learning_rate": 8.848010846942535e-07, "loss": 0.0164, "step": 300710 }, { "epoch": 3.2129921470164007, "grad_norm": 3.9063644409179688, "learning_rate": 8.847903567304612e-07, "loss": 0.0089, "step": 300720 }, { "epoch": 3.21309899033068, "grad_norm": 0.3305489420890808, "learning_rate": 8.847796283322115e-07, "loss": 0.0011, "step": 300730 }, { "epoch": 3.2132058336449596, "grad_norm": 0.019832514226436615, "learning_rate": 8.847688994995167e-07, "loss": 0.0132, "step": 300740 }, { "epoch": 3.213312676959239, "grad_norm": 0.05061482638120651, "learning_rate": 8.847581702323888e-07, "loss": 0.0114, "step": 300750 }, { "epoch": 3.213419520273519, "grad_norm": 0.010235218331217766, "learning_rate": 8.847474405308397e-07, "loss": 0.0085, "step": 300760 }, { "epoch": 3.2135263635877984, "grad_norm": 1.6422430276870728, "learning_rate": 8.847367103948817e-07, "loss": 0.0301, "step": 300770 }, { "epoch": 3.2136332069020783, "grad_norm": 0.3156137466430664, "learning_rate": 8.84725979824527e-07, "loss": 0.0007, "step": 300780 }, { "epoch": 3.213740050216358, "grad_norm": 0.037885382771492004, "learning_rate": 8.847152488197877e-07, "loss": 0.0622, "step": 300790 }, { "epoch": 3.2138468935306372, "grad_norm": 2.44826078414917, "learning_rate": 8.847045173806757e-07, "loss": 0.0187, "step": 300800 }, { "epoch": 3.2139537368449167, "grad_norm": 1.910702109336853, "learning_rate": 8.846937855072034e-07, "loss": 0.0179, "step": 300810 }, { "epoch": 3.2140605801591966, "grad_norm": 0.11036498844623566, "learning_rate": 8.846830531993828e-07, "loss": 0.0128, "step": 300820 }, { "epoch": 3.214167423473476, "grad_norm": 0.015086531639099121, "learning_rate": 8.84672320457226e-07, "loss": 0.0396, "step": 300830 }, { "epoch": 3.214274266787756, "grad_norm": 5.614279270172119, "learning_rate": 8.846615872807449e-07, "loss": 0.0358, "step": 300840 }, { "epoch": 3.2143811101020354, "grad_norm": 2.5919344425201416, "learning_rate": 8.846508536699519e-07, "loss": 0.0122, "step": 300850 }, { "epoch": 3.214487953416315, "grad_norm": 1.0616086721420288, "learning_rate": 8.846401196248593e-07, "loss": 0.0175, "step": 300860 }, { "epoch": 3.214594796730595, "grad_norm": 1.0518858432769775, "learning_rate": 8.846293851454788e-07, "loss": 0.0314, "step": 300870 }, { "epoch": 3.2147016400448742, "grad_norm": 0.00830224622040987, "learning_rate": 8.846186502318227e-07, "loss": 0.0048, "step": 300880 }, { "epoch": 3.2148084833591537, "grad_norm": 8.235623359680176, "learning_rate": 8.846079148839032e-07, "loss": 0.0424, "step": 300890 }, { "epoch": 3.2149153266734336, "grad_norm": 0.22398222982883453, "learning_rate": 8.845971791017322e-07, "loss": 0.0079, "step": 300900 }, { "epoch": 3.215022169987713, "grad_norm": 3.7422194480895996, "learning_rate": 8.84586442885322e-07, "loss": 0.0062, "step": 300910 }, { "epoch": 3.2151290133019925, "grad_norm": 0.01819879375398159, "learning_rate": 8.845757062346848e-07, "loss": 0.026, "step": 300920 }, { "epoch": 3.2152358566162724, "grad_norm": 0.05332760512828827, "learning_rate": 8.845649691498325e-07, "loss": 0.0092, "step": 300930 }, { "epoch": 3.215342699930552, "grad_norm": 0.002240596804767847, "learning_rate": 8.845542316307772e-07, "loss": 0.0128, "step": 300940 }, { "epoch": 3.2154495432448313, "grad_norm": 0.0032382486388087273, "learning_rate": 8.845434936775314e-07, "loss": 0.0085, "step": 300950 }, { "epoch": 3.2155563865591112, "grad_norm": 5.864820957183838, "learning_rate": 8.845327552901068e-07, "loss": 0.0109, "step": 300960 }, { "epoch": 3.2156632298733907, "grad_norm": 0.006263475399464369, "learning_rate": 8.845220164685156e-07, "loss": 0.0099, "step": 300970 }, { "epoch": 3.21577007318767, "grad_norm": 6.175029277801514, "learning_rate": 8.845112772127703e-07, "loss": 0.0219, "step": 300980 }, { "epoch": 3.21587691650195, "grad_norm": 2.927651882171631, "learning_rate": 8.845005375228825e-07, "loss": 0.0082, "step": 300990 }, { "epoch": 3.2159837598162295, "grad_norm": 0.012106569483876228, "learning_rate": 8.844897973988648e-07, "loss": 0.0038, "step": 301000 }, { "epoch": 3.216090603130509, "grad_norm": 0.010332495905458927, "learning_rate": 8.84479056840729e-07, "loss": 0.004, "step": 301010 }, { "epoch": 3.216197446444789, "grad_norm": 0.9382083415985107, "learning_rate": 8.844683158484873e-07, "loss": 0.0028, "step": 301020 }, { "epoch": 3.2163042897590683, "grad_norm": 0.5796183943748474, "learning_rate": 8.844575744221518e-07, "loss": 0.0901, "step": 301030 }, { "epoch": 3.216411133073348, "grad_norm": 5.707672119140625, "learning_rate": 8.844468325617348e-07, "loss": 0.0055, "step": 301040 }, { "epoch": 3.2165179763876277, "grad_norm": 6.799531936645508, "learning_rate": 8.844360902672483e-07, "loss": 0.0148, "step": 301050 }, { "epoch": 3.216624819701907, "grad_norm": 0.12709912657737732, "learning_rate": 8.844253475387045e-07, "loss": 0.01, "step": 301060 }, { "epoch": 3.2167316630161866, "grad_norm": 0.196421816945076, "learning_rate": 8.844146043761152e-07, "loss": 0.0691, "step": 301070 }, { "epoch": 3.2168385063304665, "grad_norm": 7.4687418937683105, "learning_rate": 8.844038607794931e-07, "loss": 0.0138, "step": 301080 }, { "epoch": 3.216945349644746, "grad_norm": 0.020788947120308876, "learning_rate": 8.843931167488498e-07, "loss": 0.0029, "step": 301090 }, { "epoch": 3.2170521929590254, "grad_norm": 0.4635343551635742, "learning_rate": 8.843823722841977e-07, "loss": 0.0049, "step": 301100 }, { "epoch": 3.2171590362733054, "grad_norm": 0.009037922136485577, "learning_rate": 8.84371627385549e-07, "loss": 0.0321, "step": 301110 }, { "epoch": 3.217265879587585, "grad_norm": 0.04343150183558464, "learning_rate": 8.843608820529157e-07, "loss": 0.0243, "step": 301120 }, { "epoch": 3.2173727229018643, "grad_norm": 0.43180012702941895, "learning_rate": 8.8435013628631e-07, "loss": 0.0371, "step": 301130 }, { "epoch": 3.217479566216144, "grad_norm": 1.1427875757217407, "learning_rate": 8.843393900857439e-07, "loss": 0.0052, "step": 301140 }, { "epoch": 3.2175864095304236, "grad_norm": 0.009943169541656971, "learning_rate": 8.843286434512296e-07, "loss": 0.0188, "step": 301150 }, { "epoch": 3.217693252844703, "grad_norm": 2.94433856010437, "learning_rate": 8.843178963827793e-07, "loss": 0.0098, "step": 301160 }, { "epoch": 3.217800096158983, "grad_norm": 0.6606663465499878, "learning_rate": 8.84307148880405e-07, "loss": 0.005, "step": 301170 }, { "epoch": 3.2179069394732625, "grad_norm": 1.805612325668335, "learning_rate": 8.842964009441189e-07, "loss": 0.017, "step": 301180 }, { "epoch": 3.218013782787542, "grad_norm": 0.035080987960100174, "learning_rate": 8.842856525739334e-07, "loss": 0.0064, "step": 301190 }, { "epoch": 3.218120626101822, "grad_norm": 0.030110280960798264, "learning_rate": 8.842749037698603e-07, "loss": 0.0077, "step": 301200 }, { "epoch": 3.2182274694161013, "grad_norm": 0.005434231832623482, "learning_rate": 8.842641545319117e-07, "loss": 0.0231, "step": 301210 }, { "epoch": 3.2183343127303807, "grad_norm": 0.0032579144462943077, "learning_rate": 8.842534048601e-07, "loss": 0.004, "step": 301220 }, { "epoch": 3.2184411560446606, "grad_norm": 2.0992486476898193, "learning_rate": 8.842426547544372e-07, "loss": 0.0139, "step": 301230 }, { "epoch": 3.21854799935894, "grad_norm": 0.01535392738878727, "learning_rate": 8.842319042149353e-07, "loss": 0.0119, "step": 301240 }, { "epoch": 3.2186548426732196, "grad_norm": 4.977592945098877, "learning_rate": 8.842211532416067e-07, "loss": 0.0303, "step": 301250 }, { "epoch": 3.2187616859874995, "grad_norm": 0.012094353325664997, "learning_rate": 8.842104018344635e-07, "loss": 0.0073, "step": 301260 }, { "epoch": 3.218868529301779, "grad_norm": 7.3876953125, "learning_rate": 8.841996499935176e-07, "loss": 0.0176, "step": 301270 }, { "epoch": 3.2189753726160584, "grad_norm": 2.45208477973938, "learning_rate": 8.841888977187815e-07, "loss": 0.0159, "step": 301280 }, { "epoch": 3.2190822159303383, "grad_norm": 0.03404268994927406, "learning_rate": 8.841781450102668e-07, "loss": 0.035, "step": 301290 }, { "epoch": 3.2191890592446177, "grad_norm": 2.162393808364868, "learning_rate": 8.841673918679862e-07, "loss": 0.0067, "step": 301300 }, { "epoch": 3.219295902558897, "grad_norm": 1.7818772792816162, "learning_rate": 8.841566382919516e-07, "loss": 0.0355, "step": 301310 }, { "epoch": 3.219402745873177, "grad_norm": 0.31165093183517456, "learning_rate": 8.841458842821752e-07, "loss": 0.0084, "step": 301320 }, { "epoch": 3.2195095891874566, "grad_norm": 4.241656303405762, "learning_rate": 8.841351298386691e-07, "loss": 0.014, "step": 301330 }, { "epoch": 3.219616432501736, "grad_norm": 0.22871536016464233, "learning_rate": 8.841243749614453e-07, "loss": 0.0038, "step": 301340 }, { "epoch": 3.219723275816016, "grad_norm": 0.9090155363082886, "learning_rate": 8.841136196505163e-07, "loss": 0.0107, "step": 301350 }, { "epoch": 3.2198301191302954, "grad_norm": 4.551193714141846, "learning_rate": 8.841028639058939e-07, "loss": 0.0076, "step": 301360 }, { "epoch": 3.219936962444575, "grad_norm": 0.05011651664972305, "learning_rate": 8.840921077275905e-07, "loss": 0.0066, "step": 301370 }, { "epoch": 3.2200438057588547, "grad_norm": 0.23614710569381714, "learning_rate": 8.84081351115618e-07, "loss": 0.0056, "step": 301380 }, { "epoch": 3.220150649073134, "grad_norm": 5.349271774291992, "learning_rate": 8.840705940699886e-07, "loss": 0.0013, "step": 301390 }, { "epoch": 3.2202574923874137, "grad_norm": 0.0025267142336815596, "learning_rate": 8.840598365907147e-07, "loss": 0.0051, "step": 301400 }, { "epoch": 3.2203643357016936, "grad_norm": 0.1140044704079628, "learning_rate": 8.840490786778082e-07, "loss": 0.019, "step": 301410 }, { "epoch": 3.220471179015973, "grad_norm": 0.2916529178619385, "learning_rate": 8.840383203312813e-07, "loss": 0.0056, "step": 301420 }, { "epoch": 3.2205780223302525, "grad_norm": 0.018924465402960777, "learning_rate": 8.840275615511461e-07, "loss": 0.0444, "step": 301430 }, { "epoch": 3.2206848656445324, "grad_norm": 0.10901683568954468, "learning_rate": 8.840168023374148e-07, "loss": 0.0046, "step": 301440 }, { "epoch": 3.220791708958812, "grad_norm": 1.4090431928634644, "learning_rate": 8.840060426900995e-07, "loss": 0.0246, "step": 301450 }, { "epoch": 3.2208985522730913, "grad_norm": 0.24944202601909637, "learning_rate": 8.839952826092125e-07, "loss": 0.0007, "step": 301460 }, { "epoch": 3.221005395587371, "grad_norm": 0.006830934900790453, "learning_rate": 8.839845220947658e-07, "loss": 0.001, "step": 301470 }, { "epoch": 3.2211122389016507, "grad_norm": 0.06119733303785324, "learning_rate": 8.839737611467717e-07, "loss": 0.004, "step": 301480 }, { "epoch": 3.2212190822159306, "grad_norm": 0.04261177405714989, "learning_rate": 8.839629997652421e-07, "loss": 0.0252, "step": 301490 }, { "epoch": 3.22132592553021, "grad_norm": 6.750084400177002, "learning_rate": 8.839522379501894e-07, "loss": 0.0092, "step": 301500 }, { "epoch": 3.2214327688444895, "grad_norm": 0.6201605796813965, "learning_rate": 8.839414757016255e-07, "loss": 0.0048, "step": 301510 }, { "epoch": 3.221539612158769, "grad_norm": 0.09516385197639465, "learning_rate": 8.839307130195628e-07, "loss": 0.022, "step": 301520 }, { "epoch": 3.221646455473049, "grad_norm": 0.002486470155417919, "learning_rate": 8.839199499040133e-07, "loss": 0.0025, "step": 301530 }, { "epoch": 3.2217532987873283, "grad_norm": 0.001754692755639553, "learning_rate": 8.839091863549893e-07, "loss": 0.0013, "step": 301540 }, { "epoch": 3.221860142101608, "grad_norm": 0.8229265809059143, "learning_rate": 8.838984223725028e-07, "loss": 0.0339, "step": 301550 }, { "epoch": 3.2219669854158877, "grad_norm": 4.622647285461426, "learning_rate": 8.83887657956566e-07, "loss": 0.0272, "step": 301560 }, { "epoch": 3.222073828730167, "grad_norm": 0.4353656470775604, "learning_rate": 8.83876893107191e-07, "loss": 0.0026, "step": 301570 }, { "epoch": 3.222180672044447, "grad_norm": 0.45520931482315063, "learning_rate": 8.838661278243901e-07, "loss": 0.0014, "step": 301580 }, { "epoch": 3.2222875153587265, "grad_norm": 11.676036834716797, "learning_rate": 8.838553621081753e-07, "loss": 0.0918, "step": 301590 }, { "epoch": 3.222394358673006, "grad_norm": 1.7577102184295654, "learning_rate": 8.83844595958559e-07, "loss": 0.0087, "step": 301600 }, { "epoch": 3.222501201987286, "grad_norm": 1.7585819959640503, "learning_rate": 8.838338293755529e-07, "loss": 0.0047, "step": 301610 }, { "epoch": 3.2226080453015653, "grad_norm": 0.02115175686776638, "learning_rate": 8.838230623591696e-07, "loss": 0.0017, "step": 301620 }, { "epoch": 3.2227148886158448, "grad_norm": 3.750551700592041, "learning_rate": 8.838122949094211e-07, "loss": 0.0028, "step": 301630 }, { "epoch": 3.2228217319301247, "grad_norm": 0.021682152524590492, "learning_rate": 8.838015270263195e-07, "loss": 0.0308, "step": 301640 }, { "epoch": 3.222928575244404, "grad_norm": 0.004210393410176039, "learning_rate": 8.837907587098771e-07, "loss": 0.0422, "step": 301650 }, { "epoch": 3.2230354185586836, "grad_norm": 0.026535814628005028, "learning_rate": 8.83779989960106e-07, "loss": 0.0094, "step": 301660 }, { "epoch": 3.2231422618729635, "grad_norm": 0.04850722849369049, "learning_rate": 8.837692207770182e-07, "loss": 0.0016, "step": 301670 }, { "epoch": 3.223249105187243, "grad_norm": 0.9581080675125122, "learning_rate": 8.83758451160626e-07, "loss": 0.0065, "step": 301680 }, { "epoch": 3.2233559485015224, "grad_norm": 0.156637504696846, "learning_rate": 8.837476811109416e-07, "loss": 0.046, "step": 301690 }, { "epoch": 3.2234627918158023, "grad_norm": 0.23993262648582458, "learning_rate": 8.837369106279771e-07, "loss": 0.0095, "step": 301700 }, { "epoch": 3.2235696351300818, "grad_norm": 0.05020882561802864, "learning_rate": 8.837261397117446e-07, "loss": 0.0057, "step": 301710 }, { "epoch": 3.2236764784443612, "grad_norm": 12.421555519104004, "learning_rate": 8.837153683622563e-07, "loss": 0.0091, "step": 301720 }, { "epoch": 3.223783321758641, "grad_norm": 4.01365852355957, "learning_rate": 8.837045965795244e-07, "loss": 0.0262, "step": 301730 }, { "epoch": 3.2238901650729206, "grad_norm": 1.1560709476470947, "learning_rate": 8.836938243635612e-07, "loss": 0.0066, "step": 301740 }, { "epoch": 3.2239970083872, "grad_norm": 8.634489059448242, "learning_rate": 8.836830517143785e-07, "loss": 0.022, "step": 301750 }, { "epoch": 3.22410385170148, "grad_norm": 0.1261979043483734, "learning_rate": 8.836722786319889e-07, "loss": 0.014, "step": 301760 }, { "epoch": 3.2242106950157594, "grad_norm": 12.077371597290039, "learning_rate": 8.836615051164043e-07, "loss": 0.0257, "step": 301770 }, { "epoch": 3.224317538330039, "grad_norm": 0.025327030569314957, "learning_rate": 8.836507311676366e-07, "loss": 0.0145, "step": 301780 }, { "epoch": 3.2244243816443188, "grad_norm": 0.005308082327246666, "learning_rate": 8.836399567856986e-07, "loss": 0.0004, "step": 301790 }, { "epoch": 3.2245312249585982, "grad_norm": 0.0035371019039303064, "learning_rate": 8.836291819706021e-07, "loss": 0.004, "step": 301800 }, { "epoch": 3.2246380682728777, "grad_norm": 0.3902811110019684, "learning_rate": 8.836184067223592e-07, "loss": 0.025, "step": 301810 }, { "epoch": 3.2247449115871576, "grad_norm": 5.2944655418396, "learning_rate": 8.836076310409823e-07, "loss": 0.0043, "step": 301820 }, { "epoch": 3.224851754901437, "grad_norm": 0.009928354993462563, "learning_rate": 8.835968549264833e-07, "loss": 0.0012, "step": 301830 }, { "epoch": 3.2249585982157165, "grad_norm": 0.3297998905181885, "learning_rate": 8.835860783788746e-07, "loss": 0.0067, "step": 301840 }, { "epoch": 3.2250654415299964, "grad_norm": 3.0734593868255615, "learning_rate": 8.835753013981681e-07, "loss": 0.0062, "step": 301850 }, { "epoch": 3.225172284844276, "grad_norm": 0.0032728181686252356, "learning_rate": 8.835645239843763e-07, "loss": 0.0017, "step": 301860 }, { "epoch": 3.2252791281585553, "grad_norm": 0.08140261471271515, "learning_rate": 8.835537461375111e-07, "loss": 0.0206, "step": 301870 }, { "epoch": 3.2253859714728352, "grad_norm": 0.016090592369437218, "learning_rate": 8.835429678575849e-07, "loss": 0.0006, "step": 301880 }, { "epoch": 3.2254928147871147, "grad_norm": 3.477586030960083, "learning_rate": 8.835321891446096e-07, "loss": 0.0182, "step": 301890 }, { "epoch": 3.225599658101394, "grad_norm": 0.0036629929672926664, "learning_rate": 8.835214099985975e-07, "loss": 0.0303, "step": 301900 }, { "epoch": 3.225706501415674, "grad_norm": 0.09768568724393845, "learning_rate": 8.835106304195609e-07, "loss": 0.0453, "step": 301910 }, { "epoch": 3.2258133447299535, "grad_norm": 0.8719149231910706, "learning_rate": 8.834998504075118e-07, "loss": 0.0295, "step": 301920 }, { "epoch": 3.225920188044233, "grad_norm": 0.001989290351048112, "learning_rate": 8.834890699624624e-07, "loss": 0.0157, "step": 301930 }, { "epoch": 3.226027031358513, "grad_norm": 11.854233741760254, "learning_rate": 8.83478289084425e-07, "loss": 0.0284, "step": 301940 }, { "epoch": 3.2261338746727923, "grad_norm": 0.009187408722937107, "learning_rate": 8.834675077734115e-07, "loss": 0.0006, "step": 301950 }, { "epoch": 3.226240717987072, "grad_norm": 0.6181442737579346, "learning_rate": 8.834567260294344e-07, "loss": 0.007, "step": 301960 }, { "epoch": 3.2263475613013517, "grad_norm": 0.05799372121691704, "learning_rate": 8.834459438525057e-07, "loss": 0.0067, "step": 301970 }, { "epoch": 3.226454404615631, "grad_norm": 0.3951217532157898, "learning_rate": 8.834351612426375e-07, "loss": 0.0031, "step": 301980 }, { "epoch": 3.2265612479299106, "grad_norm": 0.0031615979969501495, "learning_rate": 8.834243781998422e-07, "loss": 0.0045, "step": 301990 }, { "epoch": 3.2266680912441905, "grad_norm": 1.634452223777771, "learning_rate": 8.834135947241317e-07, "loss": 0.0084, "step": 302000 }, { "epoch": 3.22677493455847, "grad_norm": 0.02054476924240589, "learning_rate": 8.834028108155183e-07, "loss": 0.0164, "step": 302010 }, { "epoch": 3.2268817778727494, "grad_norm": 0.09004191309213638, "learning_rate": 8.833920264740142e-07, "loss": 0.0197, "step": 302020 }, { "epoch": 3.2269886211870293, "grad_norm": 0.0028075980953872204, "learning_rate": 8.833812416996318e-07, "loss": 0.0137, "step": 302030 }, { "epoch": 3.227095464501309, "grad_norm": 0.009583825245499611, "learning_rate": 8.833704564923828e-07, "loss": 0.0163, "step": 302040 }, { "epoch": 3.2272023078155883, "grad_norm": 0.49629729986190796, "learning_rate": 8.833596708522797e-07, "loss": 0.0087, "step": 302050 }, { "epoch": 3.227309151129868, "grad_norm": 0.5465192198753357, "learning_rate": 8.833488847793344e-07, "loss": 0.0089, "step": 302060 }, { "epoch": 3.2274159944441476, "grad_norm": 0.2218020111322403, "learning_rate": 8.833380982735597e-07, "loss": 0.0059, "step": 302070 }, { "epoch": 3.227522837758427, "grad_norm": 0.0008812019950710237, "learning_rate": 8.83327311334967e-07, "loss": 0.0145, "step": 302080 }, { "epoch": 3.227629681072707, "grad_norm": 0.17090733349323273, "learning_rate": 8.833165239635689e-07, "loss": 0.0015, "step": 302090 }, { "epoch": 3.2277365243869864, "grad_norm": 2.6746277809143066, "learning_rate": 8.833057361593776e-07, "loss": 0.0175, "step": 302100 }, { "epoch": 3.227843367701266, "grad_norm": 7.051688194274902, "learning_rate": 8.832949479224052e-07, "loss": 0.0076, "step": 302110 }, { "epoch": 3.227950211015546, "grad_norm": 0.011830448172986507, "learning_rate": 8.832841592526638e-07, "loss": 0.0117, "step": 302120 }, { "epoch": 3.2280570543298253, "grad_norm": 1.4635099172592163, "learning_rate": 8.832733701501656e-07, "loss": 0.0368, "step": 302130 }, { "epoch": 3.2281638976441047, "grad_norm": 2.6688506603240967, "learning_rate": 8.83262580614923e-07, "loss": 0.0331, "step": 302140 }, { "epoch": 3.2282707409583846, "grad_norm": 0.010465532541275024, "learning_rate": 8.832517906469479e-07, "loss": 0.0026, "step": 302150 }, { "epoch": 3.228377584272664, "grad_norm": 2.818896770477295, "learning_rate": 8.832410002462527e-07, "loss": 0.0035, "step": 302160 }, { "epoch": 3.2284844275869435, "grad_norm": 0.7822646498680115, "learning_rate": 8.832302094128494e-07, "loss": 0.0048, "step": 302170 }, { "epoch": 3.2285912709012234, "grad_norm": 0.5997185111045837, "learning_rate": 8.832194181467503e-07, "loss": 0.0069, "step": 302180 }, { "epoch": 3.228698114215503, "grad_norm": 2.1790521144866943, "learning_rate": 8.832086264479677e-07, "loss": 0.0061, "step": 302190 }, { "epoch": 3.228804957529783, "grad_norm": 2.7828924655914307, "learning_rate": 8.831978343165134e-07, "loss": 0.0084, "step": 302200 }, { "epoch": 3.2289118008440623, "grad_norm": 0.017954092472791672, "learning_rate": 8.831870417523999e-07, "loss": 0.0061, "step": 302210 }, { "epoch": 3.2290186441583417, "grad_norm": 6.457523345947266, "learning_rate": 8.831762487556394e-07, "loss": 0.0099, "step": 302220 }, { "epoch": 3.229125487472621, "grad_norm": 2.321878433227539, "learning_rate": 8.831654553262438e-07, "loss": 0.0112, "step": 302230 }, { "epoch": 3.229232330786901, "grad_norm": 2.9838578701019287, "learning_rate": 8.831546614642255e-07, "loss": 0.0313, "step": 302240 }, { "epoch": 3.2293391741011805, "grad_norm": 6.067038536071777, "learning_rate": 8.831438671695969e-07, "loss": 0.02, "step": 302250 }, { "epoch": 3.2294460174154604, "grad_norm": 0.031126320362091064, "learning_rate": 8.831330724423698e-07, "loss": 0.0006, "step": 302260 }, { "epoch": 3.22955286072974, "grad_norm": 5.512126922607422, "learning_rate": 8.831222772825566e-07, "loss": 0.0299, "step": 302270 }, { "epoch": 3.2296597040440194, "grad_norm": 0.27093037962913513, "learning_rate": 8.831114816901694e-07, "loss": 0.0025, "step": 302280 }, { "epoch": 3.229766547358299, "grad_norm": 0.007117117289453745, "learning_rate": 8.831006856652205e-07, "loss": 0.0144, "step": 302290 }, { "epoch": 3.2298733906725787, "grad_norm": 11.680438995361328, "learning_rate": 8.830898892077218e-07, "loss": 0.0243, "step": 302300 }, { "epoch": 3.229980233986858, "grad_norm": 0.8631361126899719, "learning_rate": 8.83079092317686e-07, "loss": 0.0232, "step": 302310 }, { "epoch": 3.230087077301138, "grad_norm": 0.32880356907844543, "learning_rate": 8.830682949951248e-07, "loss": 0.0011, "step": 302320 }, { "epoch": 3.2301939206154175, "grad_norm": 3.4495513439178467, "learning_rate": 8.830574972400507e-07, "loss": 0.0148, "step": 302330 }, { "epoch": 3.230300763929697, "grad_norm": 3.207883358001709, "learning_rate": 8.830466990524756e-07, "loss": 0.019, "step": 302340 }, { "epoch": 3.230407607243977, "grad_norm": 1.200675368309021, "learning_rate": 8.83035900432412e-07, "loss": 0.0222, "step": 302350 }, { "epoch": 3.2305144505582564, "grad_norm": 18.66437339782715, "learning_rate": 8.83025101379872e-07, "loss": 0.0197, "step": 302360 }, { "epoch": 3.230621293872536, "grad_norm": 2.7607014179229736, "learning_rate": 8.830143018948676e-07, "loss": 0.0111, "step": 302370 }, { "epoch": 3.2307281371868157, "grad_norm": 0.013966087251901627, "learning_rate": 8.830035019774113e-07, "loss": 0.0063, "step": 302380 }, { "epoch": 3.230834980501095, "grad_norm": 3.740555763244629, "learning_rate": 8.82992701627515e-07, "loss": 0.0182, "step": 302390 }, { "epoch": 3.2309418238153746, "grad_norm": 0.5611720681190491, "learning_rate": 8.829819008451911e-07, "loss": 0.0349, "step": 302400 }, { "epoch": 3.2310486671296546, "grad_norm": 1.003147006034851, "learning_rate": 8.829710996304516e-07, "loss": 0.0205, "step": 302410 }, { "epoch": 3.231155510443934, "grad_norm": 3.3328752517700195, "learning_rate": 8.82960297983309e-07, "loss": 0.0152, "step": 302420 }, { "epoch": 3.2312623537582135, "grad_norm": 1.0664678812026978, "learning_rate": 8.829494959037753e-07, "loss": 0.0144, "step": 302430 }, { "epoch": 3.2313691970724934, "grad_norm": 0.3362010419368744, "learning_rate": 8.829386933918628e-07, "loss": 0.0137, "step": 302440 }, { "epoch": 3.231476040386773, "grad_norm": 0.3999457061290741, "learning_rate": 8.829278904475834e-07, "loss": 0.0047, "step": 302450 }, { "epoch": 3.2315828837010523, "grad_norm": 3.4675891399383545, "learning_rate": 8.829170870709497e-07, "loss": 0.0177, "step": 302460 }, { "epoch": 3.231689727015332, "grad_norm": 0.3330291211605072, "learning_rate": 8.829062832619736e-07, "loss": 0.0683, "step": 302470 }, { "epoch": 3.2317965703296117, "grad_norm": 0.16814292967319489, "learning_rate": 8.828954790206673e-07, "loss": 0.0596, "step": 302480 }, { "epoch": 3.231903413643891, "grad_norm": 4.6103925704956055, "learning_rate": 8.828846743470433e-07, "loss": 0.0056, "step": 302490 }, { "epoch": 3.232010256958171, "grad_norm": 0.005424043163657188, "learning_rate": 8.828738692411136e-07, "loss": 0.0372, "step": 302500 }, { "epoch": 3.2321171002724505, "grad_norm": 9.689148902893066, "learning_rate": 8.828630637028902e-07, "loss": 0.0246, "step": 302510 }, { "epoch": 3.23222394358673, "grad_norm": 4.936007976531982, "learning_rate": 8.828522577323857e-07, "loss": 0.007, "step": 302520 }, { "epoch": 3.23233078690101, "grad_norm": 0.03333968296647072, "learning_rate": 8.82841451329612e-07, "loss": 0.0236, "step": 302530 }, { "epoch": 3.2324376302152893, "grad_norm": 0.04508233815431595, "learning_rate": 8.828306444945814e-07, "loss": 0.0043, "step": 302540 }, { "epoch": 3.2325444735295688, "grad_norm": 0.16791489720344543, "learning_rate": 8.828198372273064e-07, "loss": 0.0057, "step": 302550 }, { "epoch": 3.2326513168438487, "grad_norm": 0.18920636177062988, "learning_rate": 8.828090295277986e-07, "loss": 0.0313, "step": 302560 }, { "epoch": 3.232758160158128, "grad_norm": 2.5740625858306885, "learning_rate": 8.827982213960705e-07, "loss": 0.0043, "step": 302570 }, { "epoch": 3.2328650034724076, "grad_norm": 0.42280006408691406, "learning_rate": 8.827874128321344e-07, "loss": 0.0425, "step": 302580 }, { "epoch": 3.2329718467866875, "grad_norm": 0.022747274488210678, "learning_rate": 8.827766038360025e-07, "loss": 0.0116, "step": 302590 }, { "epoch": 3.233078690100967, "grad_norm": 0.24641647934913635, "learning_rate": 8.82765794407687e-07, "loss": 0.0241, "step": 302600 }, { "epoch": 3.2331855334152464, "grad_norm": 8.133460998535156, "learning_rate": 8.827549845471998e-07, "loss": 0.032, "step": 302610 }, { "epoch": 3.2332923767295263, "grad_norm": 0.8631286025047302, "learning_rate": 8.827441742545534e-07, "loss": 0.0063, "step": 302620 }, { "epoch": 3.2333992200438058, "grad_norm": 1.024514079093933, "learning_rate": 8.827333635297601e-07, "loss": 0.0071, "step": 302630 }, { "epoch": 3.233506063358085, "grad_norm": 0.6242437362670898, "learning_rate": 8.827225523728317e-07, "loss": 0.0052, "step": 302640 }, { "epoch": 3.233612906672365, "grad_norm": 4.849241256713867, "learning_rate": 8.827117407837807e-07, "loss": 0.0146, "step": 302650 }, { "epoch": 3.2337197499866446, "grad_norm": 0.012125304900109768, "learning_rate": 8.827009287626194e-07, "loss": 0.002, "step": 302660 }, { "epoch": 3.233826593300924, "grad_norm": 1.8017191886901855, "learning_rate": 8.826901163093597e-07, "loss": 0.0021, "step": 302670 }, { "epoch": 3.233933436615204, "grad_norm": 0.04598962143063545, "learning_rate": 8.826793034240141e-07, "loss": 0.0047, "step": 302680 }, { "epoch": 3.2340402799294834, "grad_norm": 0.0015068287029862404, "learning_rate": 8.826684901065946e-07, "loss": 0.0231, "step": 302690 }, { "epoch": 3.234147123243763, "grad_norm": 0.058794908225536346, "learning_rate": 8.826576763571137e-07, "loss": 0.0058, "step": 302700 }, { "epoch": 3.2342539665580428, "grad_norm": 10.028218269348145, "learning_rate": 8.826468621755831e-07, "loss": 0.0992, "step": 302710 }, { "epoch": 3.234360809872322, "grad_norm": 5.0052080154418945, "learning_rate": 8.826360475620154e-07, "loss": 0.0104, "step": 302720 }, { "epoch": 3.2344676531866017, "grad_norm": 0.015395752154290676, "learning_rate": 8.826252325164228e-07, "loss": 0.0232, "step": 302730 }, { "epoch": 3.2345744965008816, "grad_norm": 0.596708357334137, "learning_rate": 8.826144170388174e-07, "loss": 0.0073, "step": 302740 }, { "epoch": 3.234681339815161, "grad_norm": 0.007702948525547981, "learning_rate": 8.826036011292116e-07, "loss": 0.008, "step": 302750 }, { "epoch": 3.2347881831294405, "grad_norm": 0.002215348184108734, "learning_rate": 8.825927847876172e-07, "loss": 0.0337, "step": 302760 }, { "epoch": 3.2348950264437204, "grad_norm": 4.0342864990234375, "learning_rate": 8.825819680140468e-07, "loss": 0.0185, "step": 302770 }, { "epoch": 3.235001869758, "grad_norm": 1.0342422723770142, "learning_rate": 8.825711508085125e-07, "loss": 0.0225, "step": 302780 }, { "epoch": 3.2351087130722793, "grad_norm": 4.308403968811035, "learning_rate": 8.825603331710266e-07, "loss": 0.0206, "step": 302790 }, { "epoch": 3.235215556386559, "grad_norm": 0.5398082733154297, "learning_rate": 8.825495151016009e-07, "loss": 0.0124, "step": 302800 }, { "epoch": 3.2353223997008387, "grad_norm": 1.1628693342208862, "learning_rate": 8.825386966002482e-07, "loss": 0.0071, "step": 302810 }, { "epoch": 3.235429243015118, "grad_norm": 0.5562829971313477, "learning_rate": 8.825278776669802e-07, "loss": 0.008, "step": 302820 }, { "epoch": 3.235536086329398, "grad_norm": 2.74226713180542, "learning_rate": 8.825170583018096e-07, "loss": 0.0165, "step": 302830 }, { "epoch": 3.2356429296436775, "grad_norm": 0.0014684917405247688, "learning_rate": 8.825062385047483e-07, "loss": 0.0084, "step": 302840 }, { "epoch": 3.235749772957957, "grad_norm": 0.07509572058916092, "learning_rate": 8.824954182758086e-07, "loss": 0.0022, "step": 302850 }, { "epoch": 3.235856616272237, "grad_norm": 0.008751039393246174, "learning_rate": 8.824845976150026e-07, "loss": 0.0061, "step": 302860 }, { "epoch": 3.2359634595865163, "grad_norm": 0.2923343777656555, "learning_rate": 8.824737765223426e-07, "loss": 0.0207, "step": 302870 }, { "epoch": 3.236070302900796, "grad_norm": 0.009289140813052654, "learning_rate": 8.824629549978409e-07, "loss": 0.0013, "step": 302880 }, { "epoch": 3.2361771462150757, "grad_norm": 0.003291927045211196, "learning_rate": 8.824521330415098e-07, "loss": 0.0029, "step": 302890 }, { "epoch": 3.236283989529355, "grad_norm": 0.2350320667028427, "learning_rate": 8.824413106533612e-07, "loss": 0.0051, "step": 302900 }, { "epoch": 3.2363908328436346, "grad_norm": 4.5843729972839355, "learning_rate": 8.824304878334076e-07, "loss": 0.0147, "step": 302910 }, { "epoch": 3.2364976761579145, "grad_norm": 0.004730159416794777, "learning_rate": 8.824196645816611e-07, "loss": 0.025, "step": 302920 }, { "epoch": 3.236604519472194, "grad_norm": 3.0920207500457764, "learning_rate": 8.824088408981338e-07, "loss": 0.0081, "step": 302930 }, { "epoch": 3.2367113627864734, "grad_norm": 0.19741885364055634, "learning_rate": 8.823980167828382e-07, "loss": 0.0183, "step": 302940 }, { "epoch": 3.2368182061007533, "grad_norm": 0.014528114348649979, "learning_rate": 8.823871922357864e-07, "loss": 0.0401, "step": 302950 }, { "epoch": 3.236925049415033, "grad_norm": 8.434056282043457, "learning_rate": 8.823763672569906e-07, "loss": 0.0074, "step": 302960 }, { "epoch": 3.2370318927293127, "grad_norm": 0.0221570897847414, "learning_rate": 8.823655418464628e-07, "loss": 0.0002, "step": 302970 }, { "epoch": 3.237138736043592, "grad_norm": 5.32542610168457, "learning_rate": 8.823547160042158e-07, "loss": 0.0197, "step": 302980 }, { "epoch": 3.2372455793578716, "grad_norm": 0.10932915657758713, "learning_rate": 8.823438897302612e-07, "loss": 0.0313, "step": 302990 }, { "epoch": 3.237352422672151, "grad_norm": 3.683556318283081, "learning_rate": 8.823330630246115e-07, "loss": 0.01, "step": 303000 }, { "epoch": 3.237459265986431, "grad_norm": 0.13720707595348358, "learning_rate": 8.823222358872791e-07, "loss": 0.0432, "step": 303010 }, { "epoch": 3.2375661093007104, "grad_norm": 0.020624395459890366, "learning_rate": 8.823114083182759e-07, "loss": 0.0111, "step": 303020 }, { "epoch": 3.2376729526149903, "grad_norm": 0.024120517075061798, "learning_rate": 8.823005803176143e-07, "loss": 0.0088, "step": 303030 }, { "epoch": 3.23777979592927, "grad_norm": 0.1875733733177185, "learning_rate": 8.822897518853065e-07, "loss": 0.0039, "step": 303040 }, { "epoch": 3.2378866392435492, "grad_norm": 7.103012561798096, "learning_rate": 8.822789230213646e-07, "loss": 0.0384, "step": 303050 }, { "epoch": 3.237993482557829, "grad_norm": 2.313612937927246, "learning_rate": 8.822680937258011e-07, "loss": 0.0163, "step": 303060 }, { "epoch": 3.2381003258721086, "grad_norm": 0.052539680153131485, "learning_rate": 8.82257263998628e-07, "loss": 0.0168, "step": 303070 }, { "epoch": 3.238207169186388, "grad_norm": 0.6361705660820007, "learning_rate": 8.822464338398576e-07, "loss": 0.0519, "step": 303080 }, { "epoch": 3.238314012500668, "grad_norm": 2.5656511783599854, "learning_rate": 8.822356032495022e-07, "loss": 0.0132, "step": 303090 }, { "epoch": 3.2384208558149474, "grad_norm": 0.052635058760643005, "learning_rate": 8.822247722275739e-07, "loss": 0.0021, "step": 303100 }, { "epoch": 3.238527699129227, "grad_norm": 1.5740036964416504, "learning_rate": 8.822139407740849e-07, "loss": 0.0185, "step": 303110 }, { "epoch": 3.238634542443507, "grad_norm": 0.015300547704100609, "learning_rate": 8.822031088890476e-07, "loss": 0.0153, "step": 303120 }, { "epoch": 3.2387413857577863, "grad_norm": 0.6800093650817871, "learning_rate": 8.821922765724742e-07, "loss": 0.0305, "step": 303130 }, { "epoch": 3.2388482290720657, "grad_norm": 3.310163736343384, "learning_rate": 8.821814438243768e-07, "loss": 0.009, "step": 303140 }, { "epoch": 3.2389550723863456, "grad_norm": 0.30571654438972473, "learning_rate": 8.821706106447677e-07, "loss": 0.0045, "step": 303150 }, { "epoch": 3.239061915700625, "grad_norm": 5.7654595375061035, "learning_rate": 8.821597770336592e-07, "loss": 0.0128, "step": 303160 }, { "epoch": 3.2391687590149045, "grad_norm": 6.8554253578186035, "learning_rate": 8.821489429910634e-07, "loss": 0.0165, "step": 303170 }, { "epoch": 3.2392756023291844, "grad_norm": 3.865330219268799, "learning_rate": 8.821381085169925e-07, "loss": 0.0058, "step": 303180 }, { "epoch": 3.239382445643464, "grad_norm": 0.04443664476275444, "learning_rate": 8.821272736114591e-07, "loss": 0.0111, "step": 303190 }, { "epoch": 3.2394892889577434, "grad_norm": 0.3843431770801544, "learning_rate": 8.82116438274475e-07, "loss": 0.0375, "step": 303200 }, { "epoch": 3.2395961322720233, "grad_norm": 2.778743028640747, "learning_rate": 8.821056025060526e-07, "loss": 0.0356, "step": 303210 }, { "epoch": 3.2397029755863027, "grad_norm": 0.002573399106040597, "learning_rate": 8.820947663062042e-07, "loss": 0.0097, "step": 303220 }, { "epoch": 3.239809818900582, "grad_norm": 0.012297931127250195, "learning_rate": 8.820839296749419e-07, "loss": 0.014, "step": 303230 }, { "epoch": 3.239916662214862, "grad_norm": 2.053650379180908, "learning_rate": 8.820730926122781e-07, "loss": 0.0031, "step": 303240 }, { "epoch": 3.2400235055291415, "grad_norm": 0.007563556544482708, "learning_rate": 8.820622551182248e-07, "loss": 0.0071, "step": 303250 }, { "epoch": 3.240130348843421, "grad_norm": 0.006936016026884317, "learning_rate": 8.820514171927945e-07, "loss": 0.0044, "step": 303260 }, { "epoch": 3.240237192157701, "grad_norm": 5.895776748657227, "learning_rate": 8.820405788359993e-07, "loss": 0.0084, "step": 303270 }, { "epoch": 3.2403440354719804, "grad_norm": 3.2843847274780273, "learning_rate": 8.820297400478514e-07, "loss": 0.0115, "step": 303280 }, { "epoch": 3.24045087878626, "grad_norm": 5.146067142486572, "learning_rate": 8.820189008283632e-07, "loss": 0.0414, "step": 303290 }, { "epoch": 3.2405577221005397, "grad_norm": 0.00398315628990531, "learning_rate": 8.820080611775467e-07, "loss": 0.0171, "step": 303300 }, { "epoch": 3.240664565414819, "grad_norm": 8.720908164978027, "learning_rate": 8.819972210954143e-07, "loss": 0.0133, "step": 303310 }, { "epoch": 3.2407714087290986, "grad_norm": 7.760694980621338, "learning_rate": 8.819863805819783e-07, "loss": 0.0151, "step": 303320 }, { "epoch": 3.2408782520433785, "grad_norm": 0.029783092439174652, "learning_rate": 8.819755396372507e-07, "loss": 0.0033, "step": 303330 }, { "epoch": 3.240985095357658, "grad_norm": 0.02862778678536415, "learning_rate": 8.81964698261244e-07, "loss": 0.0091, "step": 303340 }, { "epoch": 3.2410919386719375, "grad_norm": 0.04377167671918869, "learning_rate": 8.819538564539703e-07, "loss": 0.0033, "step": 303350 }, { "epoch": 3.2411987819862174, "grad_norm": 0.025972602888941765, "learning_rate": 8.819430142154418e-07, "loss": 0.0032, "step": 303360 }, { "epoch": 3.241305625300497, "grad_norm": 0.03526860475540161, "learning_rate": 8.819321715456709e-07, "loss": 0.0046, "step": 303370 }, { "epoch": 3.2414124686147763, "grad_norm": 6.136613845825195, "learning_rate": 8.819213284446696e-07, "loss": 0.0045, "step": 303380 }, { "epoch": 3.241519311929056, "grad_norm": 4.063258171081543, "learning_rate": 8.819104849124505e-07, "loss": 0.0063, "step": 303390 }, { "epoch": 3.2416261552433356, "grad_norm": 0.03372294828295708, "learning_rate": 8.818996409490256e-07, "loss": 0.0052, "step": 303400 }, { "epoch": 3.241732998557615, "grad_norm": 5.203378677368164, "learning_rate": 8.818887965544071e-07, "loss": 0.0246, "step": 303410 }, { "epoch": 3.241839841871895, "grad_norm": 0.0037145845126360655, "learning_rate": 8.818779517286073e-07, "loss": 0.0078, "step": 303420 }, { "epoch": 3.2419466851861745, "grad_norm": 0.06231102719902992, "learning_rate": 8.818671064716385e-07, "loss": 0.0115, "step": 303430 }, { "epoch": 3.242053528500454, "grad_norm": 0.9705178141593933, "learning_rate": 8.818562607835129e-07, "loss": 0.0377, "step": 303440 }, { "epoch": 3.242160371814734, "grad_norm": 0.009143632836639881, "learning_rate": 8.818454146642429e-07, "loss": 0.0019, "step": 303450 }, { "epoch": 3.2422672151290133, "grad_norm": 0.01575995236635208, "learning_rate": 8.818345681138406e-07, "loss": 0.0115, "step": 303460 }, { "epoch": 3.2423740584432927, "grad_norm": 0.31541383266448975, "learning_rate": 8.81823721132318e-07, "loss": 0.0214, "step": 303470 }, { "epoch": 3.2424809017575726, "grad_norm": 0.03924627974629402, "learning_rate": 8.81812873719688e-07, "loss": 0.0069, "step": 303480 }, { "epoch": 3.242587745071852, "grad_norm": 2.5415549278259277, "learning_rate": 8.818020258759621e-07, "loss": 0.0115, "step": 303490 }, { "epoch": 3.2426945883861316, "grad_norm": 1.0731725692749023, "learning_rate": 8.817911776011531e-07, "loss": 0.0064, "step": 303500 }, { "epoch": 3.2428014317004115, "grad_norm": 0.015939444303512573, "learning_rate": 8.817803288952728e-07, "loss": 0.0013, "step": 303510 }, { "epoch": 3.242908275014691, "grad_norm": 0.0017654859693720937, "learning_rate": 8.817694797583339e-07, "loss": 0.0015, "step": 303520 }, { "epoch": 3.2430151183289704, "grad_norm": 7.997246742248535, "learning_rate": 8.817586301903483e-07, "loss": 0.0049, "step": 303530 }, { "epoch": 3.2431219616432503, "grad_norm": 0.016270466148853302, "learning_rate": 8.817477801913287e-07, "loss": 0.0423, "step": 303540 }, { "epoch": 3.2432288049575297, "grad_norm": 2.2969307899475098, "learning_rate": 8.817369297612866e-07, "loss": 0.0015, "step": 303550 }, { "epoch": 3.243335648271809, "grad_norm": 34.10438919067383, "learning_rate": 8.817260789002351e-07, "loss": 0.0177, "step": 303560 }, { "epoch": 3.243442491586089, "grad_norm": 4.510702610015869, "learning_rate": 8.817152276081859e-07, "loss": 0.0159, "step": 303570 }, { "epoch": 3.2435493349003686, "grad_norm": 39.651710510253906, "learning_rate": 8.817043758851514e-07, "loss": 0.0484, "step": 303580 }, { "epoch": 3.243656178214648, "grad_norm": 1.3708844184875488, "learning_rate": 8.816935237311438e-07, "loss": 0.016, "step": 303590 }, { "epoch": 3.243763021528928, "grad_norm": 0.7198604345321655, "learning_rate": 8.816826711461753e-07, "loss": 0.0182, "step": 303600 }, { "epoch": 3.2438698648432074, "grad_norm": 0.025879476219415665, "learning_rate": 8.816718181302584e-07, "loss": 0.0081, "step": 303610 }, { "epoch": 3.243976708157487, "grad_norm": 11.3739652633667, "learning_rate": 8.816609646834053e-07, "loss": 0.0091, "step": 303620 }, { "epoch": 3.2440835514717667, "grad_norm": 0.006400244310498238, "learning_rate": 8.81650110805628e-07, "loss": 0.0071, "step": 303630 }, { "epoch": 3.244190394786046, "grad_norm": 0.416589617729187, "learning_rate": 8.81639256496939e-07, "loss": 0.0323, "step": 303640 }, { "epoch": 3.2442972381003257, "grad_norm": 1.2278069257736206, "learning_rate": 8.816284017573505e-07, "loss": 0.0176, "step": 303650 }, { "epoch": 3.2444040814146056, "grad_norm": 0.016859417781233788, "learning_rate": 8.816175465868747e-07, "loss": 0.0589, "step": 303660 }, { "epoch": 3.244510924728885, "grad_norm": 1.048287034034729, "learning_rate": 8.816066909855239e-07, "loss": 0.0351, "step": 303670 }, { "epoch": 3.244617768043165, "grad_norm": 1.3413506746292114, "learning_rate": 8.815958349533104e-07, "loss": 0.0077, "step": 303680 }, { "epoch": 3.2447246113574444, "grad_norm": 6.718729496002197, "learning_rate": 8.815849784902462e-07, "loss": 0.0395, "step": 303690 }, { "epoch": 3.244831454671724, "grad_norm": 0.007289357017725706, "learning_rate": 8.815741215963439e-07, "loss": 0.0226, "step": 303700 }, { "epoch": 3.2449382979860033, "grad_norm": 0.01123170368373394, "learning_rate": 8.815632642716156e-07, "loss": 0.0156, "step": 303710 }, { "epoch": 3.245045141300283, "grad_norm": 0.05234917998313904, "learning_rate": 8.815524065160736e-07, "loss": 0.0021, "step": 303720 }, { "epoch": 3.2451519846145627, "grad_norm": 0.130755215883255, "learning_rate": 8.815415483297301e-07, "loss": 0.0007, "step": 303730 }, { "epoch": 3.2452588279288426, "grad_norm": 0.03255376219749451, "learning_rate": 8.815306897125975e-07, "loss": 0.0026, "step": 303740 }, { "epoch": 3.245365671243122, "grad_norm": 0.151380255818367, "learning_rate": 8.815198306646878e-07, "loss": 0.0079, "step": 303750 }, { "epoch": 3.2454725145574015, "grad_norm": 8.122535705566406, "learning_rate": 8.815089711860135e-07, "loss": 0.0187, "step": 303760 }, { "epoch": 3.245579357871681, "grad_norm": 0.15590184926986694, "learning_rate": 8.814981112765868e-07, "loss": 0.0136, "step": 303770 }, { "epoch": 3.245686201185961, "grad_norm": 0.3553037643432617, "learning_rate": 8.8148725093642e-07, "loss": 0.0097, "step": 303780 }, { "epoch": 3.2457930445002403, "grad_norm": 0.006733789574354887, "learning_rate": 8.814763901655251e-07, "loss": 0.0255, "step": 303790 }, { "epoch": 3.24589988781452, "grad_norm": 0.74931401014328, "learning_rate": 8.814655289639146e-07, "loss": 0.0018, "step": 303800 }, { "epoch": 3.2460067311287997, "grad_norm": 1.031086802482605, "learning_rate": 8.814546673316008e-07, "loss": 0.0075, "step": 303810 }, { "epoch": 3.246113574443079, "grad_norm": 0.2881578803062439, "learning_rate": 8.81443805268596e-07, "loss": 0.0129, "step": 303820 }, { "epoch": 3.246220417757359, "grad_norm": 2.764465808868408, "learning_rate": 8.814329427749122e-07, "loss": 0.0075, "step": 303830 }, { "epoch": 3.2463272610716385, "grad_norm": 0.3962835967540741, "learning_rate": 8.814220798505618e-07, "loss": 0.0033, "step": 303840 }, { "epoch": 3.246434104385918, "grad_norm": 0.4407840371131897, "learning_rate": 8.814112164955571e-07, "loss": 0.0627, "step": 303850 }, { "epoch": 3.246540947700198, "grad_norm": 9.498409271240234, "learning_rate": 8.814003527099105e-07, "loss": 0.0139, "step": 303860 }, { "epoch": 3.2466477910144773, "grad_norm": 0.014416610822081566, "learning_rate": 8.81389488493634e-07, "loss": 0.0165, "step": 303870 }, { "epoch": 3.2467546343287568, "grad_norm": 0.9992396831512451, "learning_rate": 8.8137862384674e-07, "loss": 0.0059, "step": 303880 }, { "epoch": 3.2468614776430367, "grad_norm": 1.996347188949585, "learning_rate": 8.813677587692408e-07, "loss": 0.0073, "step": 303890 }, { "epoch": 3.246968320957316, "grad_norm": 0.020503871142864227, "learning_rate": 8.813568932611486e-07, "loss": 0.0042, "step": 303900 }, { "epoch": 3.2470751642715956, "grad_norm": 0.007614276371896267, "learning_rate": 8.813460273224755e-07, "loss": 0.0273, "step": 303910 }, { "epoch": 3.2471820075858755, "grad_norm": 0.008936073631048203, "learning_rate": 8.813351609532343e-07, "loss": 0.0043, "step": 303920 }, { "epoch": 3.247288850900155, "grad_norm": 0.03657322749495506, "learning_rate": 8.813242941534367e-07, "loss": 0.0012, "step": 303930 }, { "epoch": 3.2473956942144344, "grad_norm": 0.006324155256152153, "learning_rate": 8.813134269230952e-07, "loss": 0.0072, "step": 303940 }, { "epoch": 3.2475025375287143, "grad_norm": 0.001699622836895287, "learning_rate": 8.81302559262222e-07, "loss": 0.0097, "step": 303950 }, { "epoch": 3.2476093808429938, "grad_norm": 0.004104724153876305, "learning_rate": 8.812916911708295e-07, "loss": 0.0027, "step": 303960 }, { "epoch": 3.2477162241572732, "grad_norm": 15.801799774169922, "learning_rate": 8.812808226489299e-07, "loss": 0.0327, "step": 303970 }, { "epoch": 3.247823067471553, "grad_norm": 0.5296398997306824, "learning_rate": 8.812699536965355e-07, "loss": 0.0022, "step": 303980 }, { "epoch": 3.2479299107858326, "grad_norm": 8.863204956054688, "learning_rate": 8.812590843136585e-07, "loss": 0.0085, "step": 303990 }, { "epoch": 3.248036754100112, "grad_norm": 0.11139437556266785, "learning_rate": 8.812482145003112e-07, "loss": 0.0015, "step": 304000 }, { "epoch": 3.248143597414392, "grad_norm": 0.0011630733497440815, "learning_rate": 8.812373442565061e-07, "loss": 0.0102, "step": 304010 }, { "epoch": 3.2482504407286714, "grad_norm": 1.7711066007614136, "learning_rate": 8.812264735822551e-07, "loss": 0.0033, "step": 304020 }, { "epoch": 3.248357284042951, "grad_norm": 0.20495104789733887, "learning_rate": 8.812156024775706e-07, "loss": 0.0785, "step": 304030 }, { "epoch": 3.248464127357231, "grad_norm": 8.809884071350098, "learning_rate": 8.81204730942465e-07, "loss": 0.0425, "step": 304040 }, { "epoch": 3.2485709706715102, "grad_norm": 2.882730722427368, "learning_rate": 8.811938589769505e-07, "loss": 0.0199, "step": 304050 }, { "epoch": 3.2486778139857897, "grad_norm": 4.134119510650635, "learning_rate": 8.811829865810393e-07, "loss": 0.0074, "step": 304060 }, { "epoch": 3.2487846573000696, "grad_norm": 2.7047722339630127, "learning_rate": 8.811721137547437e-07, "loss": 0.0211, "step": 304070 }, { "epoch": 3.248891500614349, "grad_norm": 7.695643424987793, "learning_rate": 8.811612404980762e-07, "loss": 0.0133, "step": 304080 }, { "epoch": 3.2489983439286285, "grad_norm": 0.0016898246249184012, "learning_rate": 8.811503668110487e-07, "loss": 0.0002, "step": 304090 }, { "epoch": 3.2491051872429084, "grad_norm": 0.8186026811599731, "learning_rate": 8.811394926936737e-07, "loss": 0.0076, "step": 304100 }, { "epoch": 3.249212030557188, "grad_norm": 0.2671959400177002, "learning_rate": 8.811286181459635e-07, "loss": 0.0086, "step": 304110 }, { "epoch": 3.2493188738714673, "grad_norm": 11.529348373413086, "learning_rate": 8.811177431679303e-07, "loss": 0.0126, "step": 304120 }, { "epoch": 3.2494257171857472, "grad_norm": 0.03907846286892891, "learning_rate": 8.811068677595863e-07, "loss": 0.0011, "step": 304130 }, { "epoch": 3.2495325605000267, "grad_norm": 0.0009321739198639989, "learning_rate": 8.810959919209441e-07, "loss": 0.0022, "step": 304140 }, { "epoch": 3.249639403814306, "grad_norm": 0.003255178453400731, "learning_rate": 8.810851156520156e-07, "loss": 0.0047, "step": 304150 }, { "epoch": 3.249746247128586, "grad_norm": 10.65069580078125, "learning_rate": 8.810742389528133e-07, "loss": 0.0095, "step": 304160 }, { "epoch": 3.2498530904428655, "grad_norm": 2.561990737915039, "learning_rate": 8.810633618233494e-07, "loss": 0.0083, "step": 304170 }, { "epoch": 3.249959933757145, "grad_norm": 0.009793837554752827, "learning_rate": 8.810524842636362e-07, "loss": 0.0029, "step": 304180 }, { "epoch": 3.250066777071425, "grad_norm": 0.030019273981451988, "learning_rate": 8.81041606273686e-07, "loss": 0.0151, "step": 304190 }, { "epoch": 3.2501736203857043, "grad_norm": 3.894275665283203, "learning_rate": 8.810307278535111e-07, "loss": 0.0117, "step": 304200 }, { "epoch": 3.250280463699984, "grad_norm": 0.03237202763557434, "learning_rate": 8.810198490031238e-07, "loss": 0.0055, "step": 304210 }, { "epoch": 3.2503873070142637, "grad_norm": 0.06867856532335281, "learning_rate": 8.810089697225362e-07, "loss": 0.0165, "step": 304220 }, { "epoch": 3.250494150328543, "grad_norm": 0.01084213051944971, "learning_rate": 8.809980900117608e-07, "loss": 0.0015, "step": 304230 }, { "epoch": 3.2506009936428226, "grad_norm": 0.15557961165905, "learning_rate": 8.809872098708098e-07, "loss": 0.0184, "step": 304240 }, { "epoch": 3.2507078369571025, "grad_norm": 0.0411607027053833, "learning_rate": 8.809763292996954e-07, "loss": 0.0033, "step": 304250 }, { "epoch": 3.250814680271382, "grad_norm": 13.52313232421875, "learning_rate": 8.809654482984301e-07, "loss": 0.0567, "step": 304260 }, { "epoch": 3.2509215235856614, "grad_norm": 2.2420654296875, "learning_rate": 8.809545668670259e-07, "loss": 0.0207, "step": 304270 }, { "epoch": 3.2510283668999413, "grad_norm": 1.810848355293274, "learning_rate": 8.809436850054954e-07, "loss": 0.0117, "step": 304280 }, { "epoch": 3.251135210214221, "grad_norm": 4.608799934387207, "learning_rate": 8.809328027138508e-07, "loss": 0.005, "step": 304290 }, { "epoch": 3.2512420535285003, "grad_norm": 0.015071946196258068, "learning_rate": 8.809219199921041e-07, "loss": 0.0045, "step": 304300 }, { "epoch": 3.25134889684278, "grad_norm": 0.2477709949016571, "learning_rate": 8.809110368402679e-07, "loss": 0.0013, "step": 304310 }, { "epoch": 3.2514557401570596, "grad_norm": 4.078135967254639, "learning_rate": 8.809001532583543e-07, "loss": 0.0229, "step": 304320 }, { "epoch": 3.2515625834713395, "grad_norm": 0.038005948066711426, "learning_rate": 8.808892692463759e-07, "loss": 0.0004, "step": 304330 }, { "epoch": 3.251669426785619, "grad_norm": 0.006202465854585171, "learning_rate": 8.808783848043444e-07, "loss": 0.0244, "step": 304340 }, { "epoch": 3.2517762700998984, "grad_norm": 0.005998879671096802, "learning_rate": 8.808674999322727e-07, "loss": 0.0055, "step": 304350 }, { "epoch": 3.251883113414178, "grad_norm": 13.460783958435059, "learning_rate": 8.808566146301728e-07, "loss": 0.039, "step": 304360 }, { "epoch": 3.251989956728458, "grad_norm": 0.12036306411027908, "learning_rate": 8.808457288980571e-07, "loss": 0.0209, "step": 304370 }, { "epoch": 3.2520968000427373, "grad_norm": 0.05249585583806038, "learning_rate": 8.808348427359378e-07, "loss": 0.0161, "step": 304380 }, { "epoch": 3.252203643357017, "grad_norm": 0.03239823877811432, "learning_rate": 8.808239561438272e-07, "loss": 0.0159, "step": 304390 }, { "epoch": 3.2523104866712966, "grad_norm": 0.0018553700065240264, "learning_rate": 8.808130691217377e-07, "loss": 0.0346, "step": 304400 }, { "epoch": 3.252417329985576, "grad_norm": 0.0019427182851359248, "learning_rate": 8.808021816696814e-07, "loss": 0.0114, "step": 304410 }, { "epoch": 3.2525241732998555, "grad_norm": 7.242119312286377, "learning_rate": 8.807912937876707e-07, "loss": 0.0178, "step": 304420 }, { "epoch": 3.2526310166141355, "grad_norm": 0.015283233486115932, "learning_rate": 8.80780405475718e-07, "loss": 0.0146, "step": 304430 }, { "epoch": 3.252737859928415, "grad_norm": 0.21623650193214417, "learning_rate": 8.807695167338352e-07, "loss": 0.0038, "step": 304440 }, { "epoch": 3.252844703242695, "grad_norm": 0.23112362623214722, "learning_rate": 8.807586275620351e-07, "loss": 0.0009, "step": 304450 }, { "epoch": 3.2529515465569743, "grad_norm": 2.400766611099243, "learning_rate": 8.807477379603299e-07, "loss": 0.018, "step": 304460 }, { "epoch": 3.2530583898712537, "grad_norm": 0.10063988715410233, "learning_rate": 8.807368479287316e-07, "loss": 0.0011, "step": 304470 }, { "epoch": 3.253165233185533, "grad_norm": 5.182104587554932, "learning_rate": 8.807259574672525e-07, "loss": 0.0315, "step": 304480 }, { "epoch": 3.253272076499813, "grad_norm": 9.373466491699219, "learning_rate": 8.807150665759053e-07, "loss": 0.0335, "step": 304490 }, { "epoch": 3.2533789198140926, "grad_norm": 2.997846841812134, "learning_rate": 8.80704175254702e-07, "loss": 0.0479, "step": 304500 }, { "epoch": 3.2534857631283725, "grad_norm": 0.6586881279945374, "learning_rate": 8.806932835036549e-07, "loss": 0.0292, "step": 304510 }, { "epoch": 3.253592606442652, "grad_norm": 0.39642807841300964, "learning_rate": 8.806823913227765e-07, "loss": 0.0119, "step": 304520 }, { "epoch": 3.2536994497569314, "grad_norm": 4.093774318695068, "learning_rate": 8.806714987120788e-07, "loss": 0.0239, "step": 304530 }, { "epoch": 3.253806293071211, "grad_norm": 0.7361876368522644, "learning_rate": 8.806606056715743e-07, "loss": 0.005, "step": 304540 }, { "epoch": 3.2539131363854907, "grad_norm": 0.04145785793662071, "learning_rate": 8.806497122012753e-07, "loss": 0.0064, "step": 304550 }, { "epoch": 3.25401997969977, "grad_norm": 0.01042922306805849, "learning_rate": 8.806388183011939e-07, "loss": 0.0067, "step": 304560 }, { "epoch": 3.25412682301405, "grad_norm": 1.9954254627227783, "learning_rate": 8.806279239713427e-07, "loss": 0.0146, "step": 304570 }, { "epoch": 3.2542336663283296, "grad_norm": 0.023222435265779495, "learning_rate": 8.806170292117336e-07, "loss": 0.0063, "step": 304580 }, { "epoch": 3.254340509642609, "grad_norm": 0.03981297090649605, "learning_rate": 8.806061340223794e-07, "loss": 0.0804, "step": 304590 }, { "epoch": 3.254447352956889, "grad_norm": 0.2857305407524109, "learning_rate": 8.80595238403292e-07, "loss": 0.0058, "step": 304600 }, { "epoch": 3.2545541962711684, "grad_norm": 2.55623722076416, "learning_rate": 8.80584342354484e-07, "loss": 0.0092, "step": 304610 }, { "epoch": 3.254661039585448, "grad_norm": 7.8103861808776855, "learning_rate": 8.805734458759675e-07, "loss": 0.0157, "step": 304620 }, { "epoch": 3.2547678828997277, "grad_norm": 0.08929162472486496, "learning_rate": 8.805625489677548e-07, "loss": 0.0194, "step": 304630 }, { "epoch": 3.254874726214007, "grad_norm": 0.8131378889083862, "learning_rate": 8.805516516298583e-07, "loss": 0.0066, "step": 304640 }, { "epoch": 3.2549815695282867, "grad_norm": 2.2719027996063232, "learning_rate": 8.805407538622902e-07, "loss": 0.0101, "step": 304650 }, { "epoch": 3.2550884128425666, "grad_norm": 0.0041189175099134445, "learning_rate": 8.80529855665063e-07, "loss": 0.0237, "step": 304660 }, { "epoch": 3.255195256156846, "grad_norm": 0.6221536993980408, "learning_rate": 8.805189570381888e-07, "loss": 0.0071, "step": 304670 }, { "epoch": 3.2553020994711255, "grad_norm": 4.753644943237305, "learning_rate": 8.805080579816799e-07, "loss": 0.0082, "step": 304680 }, { "epoch": 3.2554089427854054, "grad_norm": 0.30062025785446167, "learning_rate": 8.804971584955487e-07, "loss": 0.0141, "step": 304690 }, { "epoch": 3.255515786099685, "grad_norm": 0.0016982393572106957, "learning_rate": 8.804862585798076e-07, "loss": 0.0359, "step": 304700 }, { "epoch": 3.2556226294139643, "grad_norm": 0.2628290355205536, "learning_rate": 8.804753582344688e-07, "loss": 0.0073, "step": 304710 }, { "epoch": 3.255729472728244, "grad_norm": 0.11280176043510437, "learning_rate": 8.804644574595446e-07, "loss": 0.0058, "step": 304720 }, { "epoch": 3.2558363160425237, "grad_norm": 2.1738975048065186, "learning_rate": 8.804535562550473e-07, "loss": 0.0048, "step": 304730 }, { "epoch": 3.255943159356803, "grad_norm": 0.0025408174842596054, "learning_rate": 8.804426546209891e-07, "loss": 0.0111, "step": 304740 }, { "epoch": 3.256050002671083, "grad_norm": 0.010959960520267487, "learning_rate": 8.804317525573826e-07, "loss": 0.0006, "step": 304750 }, { "epoch": 3.2561568459853625, "grad_norm": 0.5123283267021179, "learning_rate": 8.804208500642398e-07, "loss": 0.0066, "step": 304760 }, { "epoch": 3.256263689299642, "grad_norm": 1.8447258472442627, "learning_rate": 8.804099471415731e-07, "loss": 0.0206, "step": 304770 }, { "epoch": 3.256370532613922, "grad_norm": 0.00265688169747591, "learning_rate": 8.803990437893951e-07, "loss": 0.0031, "step": 304780 }, { "epoch": 3.2564773759282013, "grad_norm": 0.005821303464472294, "learning_rate": 8.803881400077177e-07, "loss": 0.0024, "step": 304790 }, { "epoch": 3.2565842192424808, "grad_norm": 1.184456467628479, "learning_rate": 8.803772357965534e-07, "loss": 0.0018, "step": 304800 }, { "epoch": 3.2566910625567607, "grad_norm": 4.659965991973877, "learning_rate": 8.803663311559144e-07, "loss": 0.0098, "step": 304810 }, { "epoch": 3.25679790587104, "grad_norm": 0.7475781440734863, "learning_rate": 8.803554260858133e-07, "loss": 0.0234, "step": 304820 }, { "epoch": 3.2569047491853196, "grad_norm": 1.5468976497650146, "learning_rate": 8.803445205862621e-07, "loss": 0.0058, "step": 304830 }, { "epoch": 3.2570115924995995, "grad_norm": 0.07386660575866699, "learning_rate": 8.803336146572732e-07, "loss": 0.019, "step": 304840 }, { "epoch": 3.257118435813879, "grad_norm": 0.05192870274186134, "learning_rate": 8.803227082988591e-07, "loss": 0.0104, "step": 304850 }, { "epoch": 3.2572252791281584, "grad_norm": 0.014712145552039146, "learning_rate": 8.803118015110318e-07, "loss": 0.006, "step": 304860 }, { "epoch": 3.2573321224424383, "grad_norm": 0.05225295573472977, "learning_rate": 8.80300894293804e-07, "loss": 0.0046, "step": 304870 }, { "epoch": 3.2574389657567178, "grad_norm": 6.4254984855651855, "learning_rate": 8.802899866471875e-07, "loss": 0.0407, "step": 304880 }, { "epoch": 3.2575458090709972, "grad_norm": 0.43094316124916077, "learning_rate": 8.80279078571195e-07, "loss": 0.0124, "step": 304890 }, { "epoch": 3.257652652385277, "grad_norm": 14.111634254455566, "learning_rate": 8.802681700658387e-07, "loss": 0.0081, "step": 304900 }, { "epoch": 3.2577594956995566, "grad_norm": 0.0025085611268877983, "learning_rate": 8.80257261131131e-07, "loss": 0.0124, "step": 304910 }, { "epoch": 3.257866339013836, "grad_norm": 0.08195408433675766, "learning_rate": 8.802463517670842e-07, "loss": 0.1103, "step": 304920 }, { "epoch": 3.257973182328116, "grad_norm": 2.3973371982574463, "learning_rate": 8.802354419737104e-07, "loss": 0.02, "step": 304930 }, { "epoch": 3.2580800256423954, "grad_norm": 8.743247985839844, "learning_rate": 8.802245317510222e-07, "loss": 0.0313, "step": 304940 }, { "epoch": 3.258186868956675, "grad_norm": 10.118202209472656, "learning_rate": 8.802136210990318e-07, "loss": 0.0746, "step": 304950 }, { "epoch": 3.2582937122709548, "grad_norm": 0.1738552749156952, "learning_rate": 8.802027100177516e-07, "loss": 0.0041, "step": 304960 }, { "epoch": 3.2584005555852342, "grad_norm": 0.09690762311220169, "learning_rate": 8.801917985071938e-07, "loss": 0.0188, "step": 304970 }, { "epoch": 3.2585073988995137, "grad_norm": 1.1237865686416626, "learning_rate": 8.801808865673707e-07, "loss": 0.0117, "step": 304980 }, { "epoch": 3.2586142422137936, "grad_norm": 3.100175142288208, "learning_rate": 8.801699741982946e-07, "loss": 0.0185, "step": 304990 }, { "epoch": 3.258721085528073, "grad_norm": 0.21053458750247955, "learning_rate": 8.801590613999781e-07, "loss": 0.0081, "step": 305000 }, { "epoch": 3.2588279288423525, "grad_norm": 8.881954193115234, "learning_rate": 8.801481481724334e-07, "loss": 0.0083, "step": 305010 }, { "epoch": 3.2589347721566324, "grad_norm": 0.04397443309426308, "learning_rate": 8.801372345156724e-07, "loss": 0.0117, "step": 305020 }, { "epoch": 3.259041615470912, "grad_norm": 1.9588139057159424, "learning_rate": 8.80126320429708e-07, "loss": 0.007, "step": 305030 }, { "epoch": 3.2591484587851913, "grad_norm": 0.016426049172878265, "learning_rate": 8.801154059145524e-07, "loss": 0.0002, "step": 305040 }, { "epoch": 3.2592553020994712, "grad_norm": 0.15163543820381165, "learning_rate": 8.801044909702175e-07, "loss": 0.0094, "step": 305050 }, { "epoch": 3.2593621454137507, "grad_norm": 0.05515780672430992, "learning_rate": 8.800935755967162e-07, "loss": 0.0267, "step": 305060 }, { "epoch": 3.25946898872803, "grad_norm": 0.14790821075439453, "learning_rate": 8.800826597940605e-07, "loss": 0.027, "step": 305070 }, { "epoch": 3.25957583204231, "grad_norm": 2.3429906368255615, "learning_rate": 8.800717435622627e-07, "loss": 0.0132, "step": 305080 }, { "epoch": 3.2596826753565895, "grad_norm": 10.581124305725098, "learning_rate": 8.800608269013354e-07, "loss": 0.0171, "step": 305090 }, { "epoch": 3.2597895186708694, "grad_norm": 1.2560547590255737, "learning_rate": 8.800499098112905e-07, "loss": 0.0036, "step": 305100 }, { "epoch": 3.259896361985149, "grad_norm": 0.0016229621833190322, "learning_rate": 8.800389922921409e-07, "loss": 0.0226, "step": 305110 }, { "epoch": 3.2600032052994283, "grad_norm": 0.0034946806263178587, "learning_rate": 8.800280743438984e-07, "loss": 0.0053, "step": 305120 }, { "epoch": 3.260110048613708, "grad_norm": 0.8034050464630127, "learning_rate": 8.800171559665755e-07, "loss": 0.0013, "step": 305130 }, { "epoch": 3.2602168919279877, "grad_norm": 0.16908077895641327, "learning_rate": 8.800062371601846e-07, "loss": 0.0105, "step": 305140 }, { "epoch": 3.260323735242267, "grad_norm": 2.2544286251068115, "learning_rate": 8.79995317924738e-07, "loss": 0.0007, "step": 305150 }, { "epoch": 3.260430578556547, "grad_norm": 0.2663942575454712, "learning_rate": 8.799843982602478e-07, "loss": 0.0278, "step": 305160 }, { "epoch": 3.2605374218708265, "grad_norm": 0.03532358631491661, "learning_rate": 8.799734781667268e-07, "loss": 0.0288, "step": 305170 }, { "epoch": 3.260644265185106, "grad_norm": 0.0005316443275660276, "learning_rate": 8.799625576441869e-07, "loss": 0.0105, "step": 305180 }, { "epoch": 3.2607511084993854, "grad_norm": 0.0067304628901183605, "learning_rate": 8.799516366926407e-07, "loss": 0.0139, "step": 305190 }, { "epoch": 3.2608579518136653, "grad_norm": 0.06680051237344742, "learning_rate": 8.799407153121003e-07, "loss": 0.0003, "step": 305200 }, { "epoch": 3.260964795127945, "grad_norm": 0.1468789428472519, "learning_rate": 8.799297935025783e-07, "loss": 0.0064, "step": 305210 }, { "epoch": 3.2610716384422247, "grad_norm": 3.1760125160217285, "learning_rate": 8.799188712640869e-07, "loss": 0.0107, "step": 305220 }, { "epoch": 3.261178481756504, "grad_norm": 0.0035949957091361284, "learning_rate": 8.799079485966383e-07, "loss": 0.0149, "step": 305230 }, { "epoch": 3.2612853250707836, "grad_norm": 11.21463394165039, "learning_rate": 8.798970255002449e-07, "loss": 0.0601, "step": 305240 }, { "epoch": 3.261392168385063, "grad_norm": 0.2338399440050125, "learning_rate": 8.798861019749193e-07, "loss": 0.0116, "step": 305250 }, { "epoch": 3.261499011699343, "grad_norm": 0.37865108251571655, "learning_rate": 8.798751780206734e-07, "loss": 0.0047, "step": 305260 }, { "epoch": 3.2616058550136224, "grad_norm": 4.337898254394531, "learning_rate": 8.7986425363752e-07, "loss": 0.0256, "step": 305270 }, { "epoch": 3.2617126983279023, "grad_norm": 0.0037221303209662437, "learning_rate": 8.79853328825471e-07, "loss": 0.0001, "step": 305280 }, { "epoch": 3.261819541642182, "grad_norm": 0.013606631197035313, "learning_rate": 8.79842403584539e-07, "loss": 0.016, "step": 305290 }, { "epoch": 3.2619263849564613, "grad_norm": 0.3808547258377075, "learning_rate": 8.798314779147363e-07, "loss": 0.0032, "step": 305300 }, { "epoch": 3.2620332282707407, "grad_norm": 0.01858951523900032, "learning_rate": 8.798205518160751e-07, "loss": 0.023, "step": 305310 }, { "epoch": 3.2621400715850206, "grad_norm": 3.9811577796936035, "learning_rate": 8.798096252885678e-07, "loss": 0.0171, "step": 305320 }, { "epoch": 3.2622469148993, "grad_norm": 1.9855515956878662, "learning_rate": 8.797986983322267e-07, "loss": 0.0199, "step": 305330 }, { "epoch": 3.26235375821358, "grad_norm": 0.28084442019462585, "learning_rate": 8.797877709470645e-07, "loss": 0.0167, "step": 305340 }, { "epoch": 3.2624606015278594, "grad_norm": 0.15387696027755737, "learning_rate": 8.79776843133093e-07, "loss": 0.0338, "step": 305350 }, { "epoch": 3.262567444842139, "grad_norm": 1.1086639165878296, "learning_rate": 8.79765914890325e-07, "loss": 0.035, "step": 305360 }, { "epoch": 3.262674288156419, "grad_norm": 6.162062168121338, "learning_rate": 8.797549862187723e-07, "loss": 0.0042, "step": 305370 }, { "epoch": 3.2627811314706983, "grad_norm": 0.009465966373682022, "learning_rate": 8.797440571184478e-07, "loss": 0.0098, "step": 305380 }, { "epoch": 3.2628879747849777, "grad_norm": 0.10025422275066376, "learning_rate": 8.797331275893634e-07, "loss": 0.0117, "step": 305390 }, { "epoch": 3.2629948180992576, "grad_norm": 3.890465021133423, "learning_rate": 8.797221976315319e-07, "loss": 0.009, "step": 305400 }, { "epoch": 3.263101661413537, "grad_norm": 0.027883127331733704, "learning_rate": 8.797112672449651e-07, "loss": 0.0148, "step": 305410 }, { "epoch": 3.2632085047278165, "grad_norm": 7.521757125854492, "learning_rate": 8.797003364296759e-07, "loss": 0.0199, "step": 305420 }, { "epoch": 3.2633153480420964, "grad_norm": 0.02157873660326004, "learning_rate": 8.796894051856762e-07, "loss": 0.0065, "step": 305430 }, { "epoch": 3.263422191356376, "grad_norm": 0.1233663409948349, "learning_rate": 8.796784735129786e-07, "loss": 0.0448, "step": 305440 }, { "epoch": 3.2635290346706554, "grad_norm": 0.3825376629829407, "learning_rate": 8.796675414115952e-07, "loss": 0.0062, "step": 305450 }, { "epoch": 3.2636358779849353, "grad_norm": 6.199644088745117, "learning_rate": 8.796566088815387e-07, "loss": 0.0262, "step": 305460 }, { "epoch": 3.2637427212992147, "grad_norm": 2.851412773132324, "learning_rate": 8.79645675922821e-07, "loss": 0.0018, "step": 305470 }, { "epoch": 3.263849564613494, "grad_norm": 1.4028031826019287, "learning_rate": 8.796347425354549e-07, "loss": 0.0026, "step": 305480 }, { "epoch": 3.263956407927774, "grad_norm": 0.015507438220083714, "learning_rate": 8.796238087194523e-07, "loss": 0.008, "step": 305490 }, { "epoch": 3.2640632512420535, "grad_norm": 0.003485527355223894, "learning_rate": 8.796128744748259e-07, "loss": 0.0143, "step": 305500 }, { "epoch": 3.264170094556333, "grad_norm": 1.5420957803726196, "learning_rate": 8.796019398015879e-07, "loss": 0.0337, "step": 305510 }, { "epoch": 3.264276937870613, "grad_norm": 0.12100283056497574, "learning_rate": 8.795910046997506e-07, "loss": 0.0195, "step": 305520 }, { "epoch": 3.2643837811848924, "grad_norm": 0.09702523052692413, "learning_rate": 8.795800691693265e-07, "loss": 0.0194, "step": 305530 }, { "epoch": 3.264490624499172, "grad_norm": 0.0028956946916878223, "learning_rate": 8.795691332103279e-07, "loss": 0.0054, "step": 305540 }, { "epoch": 3.2645974678134517, "grad_norm": 0.2302807718515396, "learning_rate": 8.79558196822767e-07, "loss": 0.0006, "step": 305550 }, { "epoch": 3.264704311127731, "grad_norm": 5.889350414276123, "learning_rate": 8.795472600066562e-07, "loss": 0.0137, "step": 305560 }, { "epoch": 3.2648111544420106, "grad_norm": 0.771872341632843, "learning_rate": 8.79536322762008e-07, "loss": 0.0049, "step": 305570 }, { "epoch": 3.2649179977562905, "grad_norm": 0.9334471225738525, "learning_rate": 8.795253850888347e-07, "loss": 0.0062, "step": 305580 }, { "epoch": 3.26502484107057, "grad_norm": 1.2452622652053833, "learning_rate": 8.795144469871485e-07, "loss": 0.0074, "step": 305590 }, { "epoch": 3.2651316843848495, "grad_norm": 1.2028659582138062, "learning_rate": 8.795035084569617e-07, "loss": 0.0094, "step": 305600 }, { "epoch": 3.2652385276991294, "grad_norm": 0.02014222741127014, "learning_rate": 8.794925694982871e-07, "loss": 0.0104, "step": 305610 }, { "epoch": 3.265345371013409, "grad_norm": 0.6612761616706848, "learning_rate": 8.794816301111365e-07, "loss": 0.0019, "step": 305620 }, { "epoch": 3.2654522143276883, "grad_norm": 3.2887864112854004, "learning_rate": 8.794706902955227e-07, "loss": 0.0097, "step": 305630 }, { "epoch": 3.265559057641968, "grad_norm": 0.09168357402086258, "learning_rate": 8.794597500514577e-07, "loss": 0.0181, "step": 305640 }, { "epoch": 3.2656659009562476, "grad_norm": 4.623546600341797, "learning_rate": 8.794488093789541e-07, "loss": 0.0093, "step": 305650 }, { "epoch": 3.265772744270527, "grad_norm": 0.23127557337284088, "learning_rate": 8.79437868278024e-07, "loss": 0.017, "step": 305660 }, { "epoch": 3.265879587584807, "grad_norm": 0.06622392684221268, "learning_rate": 8.7942692674868e-07, "loss": 0.0111, "step": 305670 }, { "epoch": 3.2659864308990865, "grad_norm": 0.47486230731010437, "learning_rate": 8.794159847909343e-07, "loss": 0.0043, "step": 305680 }, { "epoch": 3.266093274213366, "grad_norm": 0.035294175148010254, "learning_rate": 8.794050424047997e-07, "loss": 0.0179, "step": 305690 }, { "epoch": 3.266200117527646, "grad_norm": 0.4460209310054779, "learning_rate": 8.793940995902877e-07, "loss": 0.0154, "step": 305700 }, { "epoch": 3.2663069608419253, "grad_norm": 0.018442004919052124, "learning_rate": 8.793831563474112e-07, "loss": 0.0005, "step": 305710 }, { "epoch": 3.2664138041562047, "grad_norm": 5.088815212249756, "learning_rate": 8.793722126761827e-07, "loss": 0.0115, "step": 305720 }, { "epoch": 3.2665206474704847, "grad_norm": 0.9533485770225525, "learning_rate": 8.793612685766142e-07, "loss": 0.0085, "step": 305730 }, { "epoch": 3.266627490784764, "grad_norm": 0.0017954211216419935, "learning_rate": 8.793503240487183e-07, "loss": 0.0035, "step": 305740 }, { "epoch": 3.2667343340990436, "grad_norm": 4.89414644241333, "learning_rate": 8.79339379092507e-07, "loss": 0.0052, "step": 305750 }, { "epoch": 3.2668411774133235, "grad_norm": 0.182661235332489, "learning_rate": 8.793284337079931e-07, "loss": 0.0124, "step": 305760 }, { "epoch": 3.266948020727603, "grad_norm": 0.9852304458618164, "learning_rate": 8.793174878951888e-07, "loss": 0.0012, "step": 305770 }, { "epoch": 3.2670548640418824, "grad_norm": 0.0028738107066601515, "learning_rate": 8.793065416541064e-07, "loss": 0.0063, "step": 305780 }, { "epoch": 3.2671617073561623, "grad_norm": 0.0027693784795701504, "learning_rate": 8.792955949847583e-07, "loss": 0.0117, "step": 305790 }, { "epoch": 3.2672685506704418, "grad_norm": 10.167947769165039, "learning_rate": 8.792846478871567e-07, "loss": 0.0288, "step": 305800 }, { "epoch": 3.2673753939847217, "grad_norm": 4.9266743659973145, "learning_rate": 8.792737003613142e-07, "loss": 0.0305, "step": 305810 }, { "epoch": 3.267482237299001, "grad_norm": 0.15432476997375488, "learning_rate": 8.792627524072431e-07, "loss": 0.0183, "step": 305820 }, { "epoch": 3.2675890806132806, "grad_norm": 7.044228553771973, "learning_rate": 8.792518040249557e-07, "loss": 0.035, "step": 305830 }, { "epoch": 3.26769592392756, "grad_norm": 0.0010309016797691584, "learning_rate": 8.792408552144646e-07, "loss": 0.0415, "step": 305840 }, { "epoch": 3.26780276724184, "grad_norm": 0.003732437966391444, "learning_rate": 8.792299059757816e-07, "loss": 0.0093, "step": 305850 }, { "epoch": 3.2679096105561194, "grad_norm": 2.851106882095337, "learning_rate": 8.792189563089196e-07, "loss": 0.0114, "step": 305860 }, { "epoch": 3.2680164538703993, "grad_norm": 0.26243695616722107, "learning_rate": 8.792080062138907e-07, "loss": 0.013, "step": 305870 }, { "epoch": 3.2681232971846788, "grad_norm": 0.0057746609672904015, "learning_rate": 8.791970556907073e-07, "loss": 0.0188, "step": 305880 }, { "epoch": 3.268230140498958, "grad_norm": 14.090808868408203, "learning_rate": 8.79186104739382e-07, "loss": 0.0155, "step": 305890 }, { "epoch": 3.2683369838132377, "grad_norm": 0.710395336151123, "learning_rate": 8.791751533599269e-07, "loss": 0.0105, "step": 305900 }, { "epoch": 3.2684438271275176, "grad_norm": 0.24078060686588287, "learning_rate": 8.791642015523544e-07, "loss": 0.0067, "step": 305910 }, { "epoch": 3.268550670441797, "grad_norm": 0.02994675002992153, "learning_rate": 8.791532493166769e-07, "loss": 0.0061, "step": 305920 }, { "epoch": 3.268657513756077, "grad_norm": 0.11459778994321823, "learning_rate": 8.791422966529067e-07, "loss": 0.0002, "step": 305930 }, { "epoch": 3.2687643570703564, "grad_norm": 0.031110763549804688, "learning_rate": 8.791313435610564e-07, "loss": 0.0046, "step": 305940 }, { "epoch": 3.268871200384636, "grad_norm": 0.013248564675450325, "learning_rate": 8.791203900411379e-07, "loss": 0.0066, "step": 305950 }, { "epoch": 3.2689780436989153, "grad_norm": 0.1239834651350975, "learning_rate": 8.791094360931641e-07, "loss": 0.0121, "step": 305960 }, { "epoch": 3.269084887013195, "grad_norm": 0.03329310566186905, "learning_rate": 8.790984817171472e-07, "loss": 0.0083, "step": 305970 }, { "epoch": 3.2691917303274747, "grad_norm": 8.198406219482422, "learning_rate": 8.790875269130993e-07, "loss": 0.0125, "step": 305980 }, { "epoch": 3.2692985736417546, "grad_norm": 0.0173792727291584, "learning_rate": 8.790765716810331e-07, "loss": 0.051, "step": 305990 }, { "epoch": 3.269405416956034, "grad_norm": 0.029593544080853462, "learning_rate": 8.790656160209609e-07, "loss": 0.0068, "step": 306000 }, { "epoch": 3.2695122602703135, "grad_norm": 0.7303255200386047, "learning_rate": 8.790546599328947e-07, "loss": 0.002, "step": 306010 }, { "epoch": 3.269619103584593, "grad_norm": 0.4602638781070709, "learning_rate": 8.790437034168475e-07, "loss": 0.0105, "step": 306020 }, { "epoch": 3.269725946898873, "grad_norm": 1.0067412853240967, "learning_rate": 8.790327464728312e-07, "loss": 0.0201, "step": 306030 }, { "epoch": 3.2698327902131523, "grad_norm": 3.0509450435638428, "learning_rate": 8.790217891008585e-07, "loss": 0.003, "step": 306040 }, { "epoch": 3.269939633527432, "grad_norm": 0.11557509005069733, "learning_rate": 8.790108313009415e-07, "loss": 0.0102, "step": 306050 }, { "epoch": 3.2700464768417117, "grad_norm": 0.039395157247781754, "learning_rate": 8.789998730730925e-07, "loss": 0.0075, "step": 306060 }, { "epoch": 3.270153320155991, "grad_norm": 0.016973352059721947, "learning_rate": 8.789889144173241e-07, "loss": 0.0161, "step": 306070 }, { "epoch": 3.270260163470271, "grad_norm": 2.8573596477508545, "learning_rate": 8.789779553336488e-07, "loss": 0.0013, "step": 306080 }, { "epoch": 3.2703670067845505, "grad_norm": 0.03157415986061096, "learning_rate": 8.789669958220787e-07, "loss": 0.0018, "step": 306090 }, { "epoch": 3.27047385009883, "grad_norm": 0.020268503576517105, "learning_rate": 8.789560358826262e-07, "loss": 0.0098, "step": 306100 }, { "epoch": 3.27058069341311, "grad_norm": 0.3785167634487152, "learning_rate": 8.789450755153037e-07, "loss": 0.0276, "step": 306110 }, { "epoch": 3.2706875367273893, "grad_norm": 0.5484024286270142, "learning_rate": 8.789341147201237e-07, "loss": 0.0262, "step": 306120 }, { "epoch": 3.270794380041669, "grad_norm": 0.009023016318678856, "learning_rate": 8.789231534970984e-07, "loss": 0.016, "step": 306130 }, { "epoch": 3.2709012233559487, "grad_norm": 0.024791764095425606, "learning_rate": 8.789121918462404e-07, "loss": 0.0081, "step": 306140 }, { "epoch": 3.271008066670228, "grad_norm": 0.006149867083877325, "learning_rate": 8.789012297675617e-07, "loss": 0.0177, "step": 306150 }, { "epoch": 3.2711149099845076, "grad_norm": 2.6795265674591064, "learning_rate": 8.788902672610752e-07, "loss": 0.0161, "step": 306160 }, { "epoch": 3.2712217532987875, "grad_norm": 0.31071731448173523, "learning_rate": 8.788793043267928e-07, "loss": 0.0063, "step": 306170 }, { "epoch": 3.271328596613067, "grad_norm": 2.5063343048095703, "learning_rate": 8.788683409647271e-07, "loss": 0.0236, "step": 306180 }, { "epoch": 3.2714354399273464, "grad_norm": 0.034348905086517334, "learning_rate": 8.788573771748905e-07, "loss": 0.0311, "step": 306190 }, { "epoch": 3.2715422832416263, "grad_norm": 15.438183784484863, "learning_rate": 8.788464129572952e-07, "loss": 0.0451, "step": 306200 }, { "epoch": 3.271649126555906, "grad_norm": 0.0033728426788002253, "learning_rate": 8.78835448311954e-07, "loss": 0.0086, "step": 306210 }, { "epoch": 3.2717559698701852, "grad_norm": 0.5203073024749756, "learning_rate": 8.788244832388787e-07, "loss": 0.016, "step": 306220 }, { "epoch": 3.271862813184465, "grad_norm": 2.9004368782043457, "learning_rate": 8.788135177380821e-07, "loss": 0.0081, "step": 306230 }, { "epoch": 3.2719696564987446, "grad_norm": 0.03845730796456337, "learning_rate": 8.788025518095764e-07, "loss": 0.0113, "step": 306240 }, { "epoch": 3.272076499813024, "grad_norm": 0.014143859036266804, "learning_rate": 8.787915854533741e-07, "loss": 0.0036, "step": 306250 }, { "epoch": 3.272183343127304, "grad_norm": 0.29905807971954346, "learning_rate": 8.787806186694874e-07, "loss": 0.0054, "step": 306260 }, { "epoch": 3.2722901864415834, "grad_norm": 3.5203495025634766, "learning_rate": 8.787696514579289e-07, "loss": 0.0412, "step": 306270 }, { "epoch": 3.272397029755863, "grad_norm": 1.2243880033493042, "learning_rate": 8.787586838187109e-07, "loss": 0.0257, "step": 306280 }, { "epoch": 3.272503873070143, "grad_norm": 0.039712123572826385, "learning_rate": 8.787477157518456e-07, "loss": 0.0268, "step": 306290 }, { "epoch": 3.2726107163844222, "grad_norm": 0.037395671010017395, "learning_rate": 8.787367472573456e-07, "loss": 0.0277, "step": 306300 }, { "epoch": 3.2727175596987017, "grad_norm": 0.19015519320964813, "learning_rate": 8.787257783352233e-07, "loss": 0.0118, "step": 306310 }, { "epoch": 3.2728244030129816, "grad_norm": 0.2858496606349945, "learning_rate": 8.787148089854909e-07, "loss": 0.0105, "step": 306320 }, { "epoch": 3.272931246327261, "grad_norm": 0.019584665074944496, "learning_rate": 8.787038392081609e-07, "loss": 0.0245, "step": 306330 }, { "epoch": 3.2730380896415405, "grad_norm": 1.2501327991485596, "learning_rate": 8.786928690032457e-07, "loss": 0.0096, "step": 306340 }, { "epoch": 3.2731449329558204, "grad_norm": 0.004076823592185974, "learning_rate": 8.786818983707578e-07, "loss": 0.0189, "step": 306350 }, { "epoch": 3.2732517762701, "grad_norm": 0.1993260681629181, "learning_rate": 8.786709273107093e-07, "loss": 0.0101, "step": 306360 }, { "epoch": 3.2733586195843793, "grad_norm": 1.6978209018707275, "learning_rate": 8.786599558231129e-07, "loss": 0.011, "step": 306370 }, { "epoch": 3.2734654628986593, "grad_norm": 0.3914799988269806, "learning_rate": 8.786489839079808e-07, "loss": 0.0192, "step": 306380 }, { "epoch": 3.2735723062129387, "grad_norm": 7.2519989013671875, "learning_rate": 8.786380115653253e-07, "loss": 0.0396, "step": 306390 }, { "epoch": 3.273679149527218, "grad_norm": 0.19276000559329987, "learning_rate": 8.786270387951591e-07, "loss": 0.0032, "step": 306400 }, { "epoch": 3.273785992841498, "grad_norm": 7.970150470733643, "learning_rate": 8.786160655974943e-07, "loss": 0.0132, "step": 306410 }, { "epoch": 3.2738928361557775, "grad_norm": 18.514583587646484, "learning_rate": 8.786050919723433e-07, "loss": 0.0376, "step": 306420 }, { "epoch": 3.273999679470057, "grad_norm": 0.009317505173385143, "learning_rate": 8.785941179197186e-07, "loss": 0.0065, "step": 306430 }, { "epoch": 3.274106522784337, "grad_norm": 0.004537144210189581, "learning_rate": 8.785831434396326e-07, "loss": 0.0039, "step": 306440 }, { "epoch": 3.2742133660986164, "grad_norm": 0.10545849800109863, "learning_rate": 8.785721685320977e-07, "loss": 0.0031, "step": 306450 }, { "epoch": 3.274320209412896, "grad_norm": 0.011073783971369267, "learning_rate": 8.785611931971262e-07, "loss": 0.0004, "step": 306460 }, { "epoch": 3.2744270527271757, "grad_norm": 7.557424545288086, "learning_rate": 8.785502174347305e-07, "loss": 0.0264, "step": 306470 }, { "epoch": 3.274533896041455, "grad_norm": 0.0020883858669549227, "learning_rate": 8.78539241244923e-07, "loss": 0.0068, "step": 306480 }, { "epoch": 3.2746407393557346, "grad_norm": 0.0008112448267638683, "learning_rate": 8.785282646277164e-07, "loss": 0.0059, "step": 306490 }, { "epoch": 3.2747475826700145, "grad_norm": 0.09141965955495834, "learning_rate": 8.785172875831226e-07, "loss": 0.033, "step": 306500 }, { "epoch": 3.274854425984294, "grad_norm": 0.054399263113737106, "learning_rate": 8.785063101111543e-07, "loss": 0.0035, "step": 306510 }, { "epoch": 3.2749612692985735, "grad_norm": 0.02709246054291725, "learning_rate": 8.784953322118237e-07, "loss": 0.0051, "step": 306520 }, { "epoch": 3.2750681126128534, "grad_norm": 0.8068149089813232, "learning_rate": 8.784843538851434e-07, "loss": 0.0246, "step": 306530 }, { "epoch": 3.275174955927133, "grad_norm": 5.697800159454346, "learning_rate": 8.784733751311256e-07, "loss": 0.0052, "step": 306540 }, { "epoch": 3.2752817992414123, "grad_norm": 0.018758021295070648, "learning_rate": 8.784623959497829e-07, "loss": 0.0066, "step": 306550 }, { "epoch": 3.275388642555692, "grad_norm": 0.01117190346121788, "learning_rate": 8.784514163411275e-07, "loss": 0.085, "step": 306560 }, { "epoch": 3.2754954858699716, "grad_norm": 6.108309745788574, "learning_rate": 8.78440436305172e-07, "loss": 0.0696, "step": 306570 }, { "epoch": 3.2756023291842515, "grad_norm": 0.007415921427309513, "learning_rate": 8.784294558419286e-07, "loss": 0.0028, "step": 306580 }, { "epoch": 3.275709172498531, "grad_norm": 0.011687024496495724, "learning_rate": 8.784184749514098e-07, "loss": 0.0027, "step": 306590 }, { "epoch": 3.2758160158128105, "grad_norm": 0.10004696995019913, "learning_rate": 8.784074936336279e-07, "loss": 0.0365, "step": 306600 }, { "epoch": 3.27592285912709, "grad_norm": 8.270278930664062, "learning_rate": 8.783965118885956e-07, "loss": 0.0245, "step": 306610 }, { "epoch": 3.27602970244137, "grad_norm": 4.767293453216553, "learning_rate": 8.783855297163249e-07, "loss": 0.0395, "step": 306620 }, { "epoch": 3.2761365457556493, "grad_norm": 0.008577125146985054, "learning_rate": 8.783745471168284e-07, "loss": 0.0035, "step": 306630 }, { "epoch": 3.276243389069929, "grad_norm": 2.1697723865509033, "learning_rate": 8.783635640901185e-07, "loss": 0.0225, "step": 306640 }, { "epoch": 3.2763502323842086, "grad_norm": 0.4503099024295807, "learning_rate": 8.783525806362076e-07, "loss": 0.0094, "step": 306650 }, { "epoch": 3.276457075698488, "grad_norm": 0.023842982947826385, "learning_rate": 8.78341596755108e-07, "loss": 0.0107, "step": 306660 }, { "epoch": 3.2765639190127676, "grad_norm": 7.854767322540283, "learning_rate": 8.783306124468322e-07, "loss": 0.009, "step": 306670 }, { "epoch": 3.2766707623270475, "grad_norm": 1.2740919589996338, "learning_rate": 8.783196277113927e-07, "loss": 0.0424, "step": 306680 }, { "epoch": 3.276777605641327, "grad_norm": 0.756370484828949, "learning_rate": 8.783086425488018e-07, "loss": 0.0045, "step": 306690 }, { "epoch": 3.276884448955607, "grad_norm": 0.0036479386035352945, "learning_rate": 8.782976569590717e-07, "loss": 0.0016, "step": 306700 }, { "epoch": 3.2769912922698863, "grad_norm": 0.004085950553417206, "learning_rate": 8.782866709422152e-07, "loss": 0.0171, "step": 306710 }, { "epoch": 3.2770981355841657, "grad_norm": 0.04769182205200195, "learning_rate": 8.782756844982443e-07, "loss": 0.0122, "step": 306720 }, { "epoch": 3.277204978898445, "grad_norm": 0.006354419980198145, "learning_rate": 8.782646976271718e-07, "loss": 0.0173, "step": 306730 }, { "epoch": 3.277311822212725, "grad_norm": 0.03269410505890846, "learning_rate": 8.782537103290098e-07, "loss": 0.0057, "step": 306740 }, { "epoch": 3.2774186655270046, "grad_norm": 2.513822555541992, "learning_rate": 8.782427226037708e-07, "loss": 0.008, "step": 306750 }, { "epoch": 3.2775255088412845, "grad_norm": 0.0024584014900028706, "learning_rate": 8.782317344514671e-07, "loss": 0.0054, "step": 306760 }, { "epoch": 3.277632352155564, "grad_norm": 5.502831935882568, "learning_rate": 8.782207458721115e-07, "loss": 0.0113, "step": 306770 }, { "epoch": 3.2777391954698434, "grad_norm": 0.002055092016234994, "learning_rate": 8.78209756865716e-07, "loss": 0.0066, "step": 306780 }, { "epoch": 3.277846038784123, "grad_norm": 1.3440543413162231, "learning_rate": 8.78198767432293e-07, "loss": 0.0088, "step": 306790 }, { "epoch": 3.2779528820984027, "grad_norm": 0.05355362966656685, "learning_rate": 8.781877775718554e-07, "loss": 0.0073, "step": 306800 }, { "epoch": 3.278059725412682, "grad_norm": 2.178391695022583, "learning_rate": 8.781767872844149e-07, "loss": 0.0065, "step": 306810 }, { "epoch": 3.278166568726962, "grad_norm": Infinity, "learning_rate": 8.781657965699845e-07, "loss": 0.0101, "step": 306820 }, { "epoch": 3.2782734120412416, "grad_norm": 0.005357615649700165, "learning_rate": 8.781548054285761e-07, "loss": 0.0171, "step": 306830 }, { "epoch": 3.278380255355521, "grad_norm": 3.0715279579162598, "learning_rate": 8.781438138602026e-07, "loss": 0.0082, "step": 306840 }, { "epoch": 3.278487098669801, "grad_norm": 1.7131654024124146, "learning_rate": 8.781328218648763e-07, "loss": 0.0085, "step": 306850 }, { "epoch": 3.2785939419840804, "grad_norm": 0.40167132019996643, "learning_rate": 8.781218294426092e-07, "loss": 0.0373, "step": 306860 }, { "epoch": 3.27870078529836, "grad_norm": 0.006790644954890013, "learning_rate": 8.781108365934143e-07, "loss": 0.0012, "step": 306870 }, { "epoch": 3.2788076286126397, "grad_norm": 0.06418535113334656, "learning_rate": 8.780998433173036e-07, "loss": 0.0066, "step": 306880 }, { "epoch": 3.278914471926919, "grad_norm": 3.802469491958618, "learning_rate": 8.780888496142897e-07, "loss": 0.0225, "step": 306890 }, { "epoch": 3.2790213152411987, "grad_norm": 0.900101900100708, "learning_rate": 8.780778554843848e-07, "loss": 0.0156, "step": 306900 }, { "epoch": 3.2791281585554786, "grad_norm": 4.258310794830322, "learning_rate": 8.780668609276016e-07, "loss": 0.0124, "step": 306910 }, { "epoch": 3.279235001869758, "grad_norm": 3.378772735595703, "learning_rate": 8.780558659439524e-07, "loss": 0.0179, "step": 306920 }, { "epoch": 3.2793418451840375, "grad_norm": 6.18445348739624, "learning_rate": 8.780448705334495e-07, "loss": 0.0197, "step": 306930 }, { "epoch": 3.2794486884983174, "grad_norm": 0.5374414920806885, "learning_rate": 8.780338746961055e-07, "loss": 0.0037, "step": 306940 }, { "epoch": 3.279555531812597, "grad_norm": 13.020998001098633, "learning_rate": 8.780228784319326e-07, "loss": 0.0245, "step": 306950 }, { "epoch": 3.2796623751268763, "grad_norm": 7.377540588378906, "learning_rate": 8.780118817409435e-07, "loss": 0.0106, "step": 306960 }, { "epoch": 3.279769218441156, "grad_norm": 14.509418487548828, "learning_rate": 8.780008846231503e-07, "loss": 0.0601, "step": 306970 }, { "epoch": 3.2798760617554357, "grad_norm": 2.405120372772217, "learning_rate": 8.779898870785655e-07, "loss": 0.0096, "step": 306980 }, { "epoch": 3.279982905069715, "grad_norm": 3.2659215927124023, "learning_rate": 8.779788891072018e-07, "loss": 0.0078, "step": 306990 }, { "epoch": 3.280089748383995, "grad_norm": 3.7760255336761475, "learning_rate": 8.779678907090714e-07, "loss": 0.0131, "step": 307000 }, { "epoch": 3.2801965916982745, "grad_norm": 0.09362725168466568, "learning_rate": 8.779568918841867e-07, "loss": 0.0117, "step": 307010 }, { "epoch": 3.280303435012554, "grad_norm": 0.11366908997297287, "learning_rate": 8.7794589263256e-07, "loss": 0.0115, "step": 307020 }, { "epoch": 3.280410278326834, "grad_norm": 0.09210586547851562, "learning_rate": 8.77934892954204e-07, "loss": 0.0264, "step": 307030 }, { "epoch": 3.2805171216411133, "grad_norm": 0.06897322088479996, "learning_rate": 8.77923892849131e-07, "loss": 0.0152, "step": 307040 }, { "epoch": 3.2806239649553928, "grad_norm": 0.01757775992155075, "learning_rate": 8.779128923173533e-07, "loss": 0.03, "step": 307050 }, { "epoch": 3.2807308082696727, "grad_norm": 7.566927909851074, "learning_rate": 8.779018913588836e-07, "loss": 0.0125, "step": 307060 }, { "epoch": 3.280837651583952, "grad_norm": 0.00335766957141459, "learning_rate": 8.778908899737339e-07, "loss": 0.0135, "step": 307070 }, { "epoch": 3.2809444948982316, "grad_norm": 0.19110526144504547, "learning_rate": 8.77879888161917e-07, "loss": 0.0129, "step": 307080 }, { "epoch": 3.2810513382125115, "grad_norm": 4.9477949142456055, "learning_rate": 8.778688859234453e-07, "loss": 0.0193, "step": 307090 }, { "epoch": 3.281158181526791, "grad_norm": 5.143840312957764, "learning_rate": 8.778578832583309e-07, "loss": 0.0125, "step": 307100 }, { "epoch": 3.2812650248410704, "grad_norm": 0.0008711641421541572, "learning_rate": 8.778468801665865e-07, "loss": 0.015, "step": 307110 }, { "epoch": 3.2813718681553503, "grad_norm": 0.0021557388827204704, "learning_rate": 8.778358766482245e-07, "loss": 0.0043, "step": 307120 }, { "epoch": 3.2814787114696298, "grad_norm": 0.02188028208911419, "learning_rate": 8.778248727032572e-07, "loss": 0.0265, "step": 307130 }, { "epoch": 3.2815855547839092, "grad_norm": 3.58221435546875, "learning_rate": 8.778138683316971e-07, "loss": 0.0266, "step": 307140 }, { "epoch": 3.281692398098189, "grad_norm": 4.054340362548828, "learning_rate": 8.778028635335567e-07, "loss": 0.0207, "step": 307150 }, { "epoch": 3.2817992414124686, "grad_norm": 3.0085487365722656, "learning_rate": 8.777918583088484e-07, "loss": 0.0032, "step": 307160 }, { "epoch": 3.281906084726748, "grad_norm": 0.7518927454948425, "learning_rate": 8.777808526575844e-07, "loss": 0.003, "step": 307170 }, { "epoch": 3.282012928041028, "grad_norm": 0.014069957658648491, "learning_rate": 8.777698465797774e-07, "loss": 0.0113, "step": 307180 }, { "epoch": 3.2821197713553074, "grad_norm": 3.2880678176879883, "learning_rate": 8.777588400754398e-07, "loss": 0.017, "step": 307190 }, { "epoch": 3.282226614669587, "grad_norm": 0.561227560043335, "learning_rate": 8.777478331445839e-07, "loss": 0.0042, "step": 307200 }, { "epoch": 3.2823334579838668, "grad_norm": 0.006621047388762236, "learning_rate": 8.777368257872222e-07, "loss": 0.0121, "step": 307210 }, { "epoch": 3.2824403012981462, "grad_norm": 0.003290644846856594, "learning_rate": 8.777258180033671e-07, "loss": 0.0221, "step": 307220 }, { "epoch": 3.2825471446124257, "grad_norm": 2.5524892807006836, "learning_rate": 8.77714809793031e-07, "loss": 0.02, "step": 307230 }, { "epoch": 3.2826539879267056, "grad_norm": 0.9028486609458923, "learning_rate": 8.777038011562264e-07, "loss": 0.012, "step": 307240 }, { "epoch": 3.282760831240985, "grad_norm": 0.0026860004290938377, "learning_rate": 8.776927920929658e-07, "loss": 0.001, "step": 307250 }, { "epoch": 3.2828676745552645, "grad_norm": 1.9340800046920776, "learning_rate": 8.776817826032614e-07, "loss": 0.0152, "step": 307260 }, { "epoch": 3.2829745178695444, "grad_norm": 0.1731228530406952, "learning_rate": 8.776707726871258e-07, "loss": 0.0132, "step": 307270 }, { "epoch": 3.283081361183824, "grad_norm": 4.468954563140869, "learning_rate": 8.776597623445714e-07, "loss": 0.0768, "step": 307280 }, { "epoch": 3.283188204498104, "grad_norm": 0.013355768285691738, "learning_rate": 8.776487515756106e-07, "loss": 0.0492, "step": 307290 }, { "epoch": 3.2832950478123832, "grad_norm": 0.09221722930669785, "learning_rate": 8.776377403802559e-07, "loss": 0.0021, "step": 307300 }, { "epoch": 3.2834018911266627, "grad_norm": 0.1862376183271408, "learning_rate": 8.776267287585197e-07, "loss": 0.0015, "step": 307310 }, { "epoch": 3.283508734440942, "grad_norm": 0.012560619041323662, "learning_rate": 8.776157167104145e-07, "loss": 0.0218, "step": 307320 }, { "epoch": 3.283615577755222, "grad_norm": 0.02331320196390152, "learning_rate": 8.776047042359525e-07, "loss": 0.009, "step": 307330 }, { "epoch": 3.2837224210695015, "grad_norm": 3.137540340423584, "learning_rate": 8.775936913351462e-07, "loss": 0.0144, "step": 307340 }, { "epoch": 3.2838292643837814, "grad_norm": 1.8988878726959229, "learning_rate": 8.775826780080084e-07, "loss": 0.0286, "step": 307350 }, { "epoch": 3.283936107698061, "grad_norm": 0.00506131025031209, "learning_rate": 8.775716642545512e-07, "loss": 0.0147, "step": 307360 }, { "epoch": 3.2840429510123403, "grad_norm": 0.3865155875682831, "learning_rate": 8.775606500747869e-07, "loss": 0.0154, "step": 307370 }, { "epoch": 3.28414979432662, "grad_norm": 0.5099055767059326, "learning_rate": 8.775496354687284e-07, "loss": 0.0213, "step": 307380 }, { "epoch": 3.2842566376408997, "grad_norm": 0.018711216747760773, "learning_rate": 8.775386204363878e-07, "loss": 0.0021, "step": 307390 }, { "epoch": 3.284363480955179, "grad_norm": 0.005511823110282421, "learning_rate": 8.775276049777773e-07, "loss": 0.045, "step": 307400 }, { "epoch": 3.284470324269459, "grad_norm": 0.05020812898874283, "learning_rate": 8.775165890929099e-07, "loss": 0.0065, "step": 307410 }, { "epoch": 3.2845771675837385, "grad_norm": 1.0309970378875732, "learning_rate": 8.775055727817977e-07, "loss": 0.0149, "step": 307420 }, { "epoch": 3.284684010898018, "grad_norm": 0.0043420083820819855, "learning_rate": 8.774945560444534e-07, "loss": 0.0065, "step": 307430 }, { "epoch": 3.2847908542122974, "grad_norm": 0.14072299003601074, "learning_rate": 8.77483538880889e-07, "loss": 0.0108, "step": 307440 }, { "epoch": 3.2848976975265773, "grad_norm": 0.5839207172393799, "learning_rate": 8.774725212911174e-07, "loss": 0.0049, "step": 307450 }, { "epoch": 3.285004540840857, "grad_norm": 0.0007968161953613162, "learning_rate": 8.774615032751507e-07, "loss": 0.0297, "step": 307460 }, { "epoch": 3.2851113841551367, "grad_norm": 0.14361897110939026, "learning_rate": 8.774504848330015e-07, "loss": 0.0126, "step": 307470 }, { "epoch": 3.285218227469416, "grad_norm": 0.23245710134506226, "learning_rate": 8.774394659646822e-07, "loss": 0.0136, "step": 307480 }, { "epoch": 3.2853250707836956, "grad_norm": 0.03748447820544243, "learning_rate": 8.774284466702053e-07, "loss": 0.0079, "step": 307490 }, { "epoch": 3.285431914097975, "grad_norm": 6.221188068389893, "learning_rate": 8.774174269495832e-07, "loss": 0.0232, "step": 307500 }, { "epoch": 3.285538757412255, "grad_norm": 4.726837635040283, "learning_rate": 8.774064068028284e-07, "loss": 0.024, "step": 307510 }, { "epoch": 3.2856456007265344, "grad_norm": 0.08448518067598343, "learning_rate": 8.773953862299532e-07, "loss": 0.0172, "step": 307520 }, { "epoch": 3.2857524440408143, "grad_norm": 4.123218536376953, "learning_rate": 8.7738436523097e-07, "loss": 0.0136, "step": 307530 }, { "epoch": 3.285859287355094, "grad_norm": 4.663749694824219, "learning_rate": 8.773733438058916e-07, "loss": 0.0228, "step": 307540 }, { "epoch": 3.2859661306693733, "grad_norm": 0.03409739211201668, "learning_rate": 8.773623219547302e-07, "loss": 0.0105, "step": 307550 }, { "epoch": 3.286072973983653, "grad_norm": 4.433861255645752, "learning_rate": 8.773512996774981e-07, "loss": 0.0139, "step": 307560 }, { "epoch": 3.2861798172979326, "grad_norm": 9.365074157714844, "learning_rate": 8.77340276974208e-07, "loss": 0.0627, "step": 307570 }, { "epoch": 3.286286660612212, "grad_norm": 0.5517298579216003, "learning_rate": 8.773292538448722e-07, "loss": 0.0152, "step": 307580 }, { "epoch": 3.286393503926492, "grad_norm": 4.195797920227051, "learning_rate": 8.773182302895033e-07, "loss": 0.0091, "step": 307590 }, { "epoch": 3.2865003472407714, "grad_norm": 0.13169407844543457, "learning_rate": 8.773072063081135e-07, "loss": 0.0235, "step": 307600 }, { "epoch": 3.286607190555051, "grad_norm": 0.002076897770166397, "learning_rate": 8.772961819007154e-07, "loss": 0.0146, "step": 307610 }, { "epoch": 3.286714033869331, "grad_norm": 12.790990829467773, "learning_rate": 8.772851570673215e-07, "loss": 0.0175, "step": 307620 }, { "epoch": 3.2868208771836103, "grad_norm": 0.6564616560935974, "learning_rate": 8.772741318079442e-07, "loss": 0.0094, "step": 307630 }, { "epoch": 3.2869277204978897, "grad_norm": 0.008881148882210255, "learning_rate": 8.772631061225959e-07, "loss": 0.0263, "step": 307640 }, { "epoch": 3.2870345638121696, "grad_norm": 0.01958143152296543, "learning_rate": 8.77252080011289e-07, "loss": 0.0178, "step": 307650 }, { "epoch": 3.287141407126449, "grad_norm": 9.416303634643555, "learning_rate": 8.772410534740362e-07, "loss": 0.0187, "step": 307660 }, { "epoch": 3.2872482504407285, "grad_norm": 3.0683047771453857, "learning_rate": 8.772300265108496e-07, "loss": 0.0127, "step": 307670 }, { "epoch": 3.2873550937550085, "grad_norm": 5.0673651695251465, "learning_rate": 8.772189991217421e-07, "loss": 0.0089, "step": 307680 }, { "epoch": 3.287461937069288, "grad_norm": 0.06126631051301956, "learning_rate": 8.772079713067255e-07, "loss": 0.0231, "step": 307690 }, { "epoch": 3.2875687803835674, "grad_norm": 2.572746515274048, "learning_rate": 8.771969430658129e-07, "loss": 0.0339, "step": 307700 }, { "epoch": 3.2876756236978473, "grad_norm": 0.36460572481155396, "learning_rate": 8.771859143990165e-07, "loss": 0.0166, "step": 307710 }, { "epoch": 3.2877824670121267, "grad_norm": 0.1991042196750641, "learning_rate": 8.771748853063485e-07, "loss": 0.0044, "step": 307720 }, { "epoch": 3.287889310326406, "grad_norm": 2.969939708709717, "learning_rate": 8.771638557878219e-07, "loss": 0.0113, "step": 307730 }, { "epoch": 3.287996153640686, "grad_norm": 0.35042157769203186, "learning_rate": 8.771528258434486e-07, "loss": 0.0041, "step": 307740 }, { "epoch": 3.2881029969549656, "grad_norm": 2.908332347869873, "learning_rate": 8.771417954732414e-07, "loss": 0.0403, "step": 307750 }, { "epoch": 3.288209840269245, "grad_norm": 7.201918601989746, "learning_rate": 8.771307646772126e-07, "loss": 0.0039, "step": 307760 }, { "epoch": 3.288316683583525, "grad_norm": 5.387862205505371, "learning_rate": 8.771197334553746e-07, "loss": 0.0179, "step": 307770 }, { "epoch": 3.2884235268978044, "grad_norm": 0.0457037016749382, "learning_rate": 8.771087018077401e-07, "loss": 0.001, "step": 307780 }, { "epoch": 3.288530370212084, "grad_norm": 5.726756572723389, "learning_rate": 8.770976697343214e-07, "loss": 0.031, "step": 307790 }, { "epoch": 3.2886372135263637, "grad_norm": 0.06153162568807602, "learning_rate": 8.77086637235131e-07, "loss": 0.0064, "step": 307800 }, { "epoch": 3.288744056840643, "grad_norm": 5.0950493812561035, "learning_rate": 8.770756043101812e-07, "loss": 0.021, "step": 307810 }, { "epoch": 3.2888509001549227, "grad_norm": 0.24714557826519012, "learning_rate": 8.770645709594848e-07, "loss": 0.0225, "step": 307820 }, { "epoch": 3.2889577434692026, "grad_norm": 0.02072705142199993, "learning_rate": 8.770535371830538e-07, "loss": 0.0176, "step": 307830 }, { "epoch": 3.289064586783482, "grad_norm": 0.006582657340914011, "learning_rate": 8.770425029809012e-07, "loss": 0.0085, "step": 307840 }, { "epoch": 3.2891714300977615, "grad_norm": 0.6056243181228638, "learning_rate": 8.770314683530388e-07, "loss": 0.0416, "step": 307850 }, { "epoch": 3.2892782734120414, "grad_norm": 3.212707996368408, "learning_rate": 8.770204332994798e-07, "loss": 0.0231, "step": 307860 }, { "epoch": 3.289385116726321, "grad_norm": 0.5458709001541138, "learning_rate": 8.77009397820236e-07, "loss": 0.0021, "step": 307870 }, { "epoch": 3.2894919600406003, "grad_norm": 5.9187188148498535, "learning_rate": 8.7699836191532e-07, "loss": 0.0049, "step": 307880 }, { "epoch": 3.28959880335488, "grad_norm": 0.01675381511449814, "learning_rate": 8.769873255847448e-07, "loss": 0.0049, "step": 307890 }, { "epoch": 3.2897056466691597, "grad_norm": 0.9335574507713318, "learning_rate": 8.769762888285222e-07, "loss": 0.0394, "step": 307900 }, { "epoch": 3.289812489983439, "grad_norm": 3.782625436782837, "learning_rate": 8.76965251646665e-07, "loss": 0.025, "step": 307910 }, { "epoch": 3.289919333297719, "grad_norm": 0.5118594765663147, "learning_rate": 8.769542140391855e-07, "loss": 0.0234, "step": 307920 }, { "epoch": 3.2900261766119985, "grad_norm": 5.654210567474365, "learning_rate": 8.769431760060963e-07, "loss": 0.0109, "step": 307930 }, { "epoch": 3.290133019926278, "grad_norm": 0.011194512248039246, "learning_rate": 8.769321375474098e-07, "loss": 0.0057, "step": 307940 }, { "epoch": 3.290239863240558, "grad_norm": 0.14914312958717346, "learning_rate": 8.769210986631385e-07, "loss": 0.0179, "step": 307950 }, { "epoch": 3.2903467065548373, "grad_norm": 0.21947793662548065, "learning_rate": 8.769100593532948e-07, "loss": 0.0162, "step": 307960 }, { "epoch": 3.2904535498691168, "grad_norm": 3.042609214782715, "learning_rate": 8.768990196178911e-07, "loss": 0.024, "step": 307970 }, { "epoch": 3.2905603931833967, "grad_norm": 0.0063139768317341805, "learning_rate": 8.768879794569401e-07, "loss": 0.0031, "step": 307980 }, { "epoch": 3.290667236497676, "grad_norm": 0.026186751201748848, "learning_rate": 8.76876938870454e-07, "loss": 0.0092, "step": 307990 }, { "epoch": 3.2907740798119556, "grad_norm": 0.004952094051986933, "learning_rate": 8.768658978584455e-07, "loss": 0.0144, "step": 308000 }, { "epoch": 3.2908809231262355, "grad_norm": 1.226128339767456, "learning_rate": 8.76854856420927e-07, "loss": 0.0129, "step": 308010 }, { "epoch": 3.290987766440515, "grad_norm": 0.013864423148334026, "learning_rate": 8.768438145579109e-07, "loss": 0.0017, "step": 308020 }, { "epoch": 3.2910946097547944, "grad_norm": 0.09741400927305222, "learning_rate": 8.768327722694095e-07, "loss": 0.0056, "step": 308030 }, { "epoch": 3.2912014530690743, "grad_norm": 0.017127981409430504, "learning_rate": 8.768217295554357e-07, "loss": 0.0242, "step": 308040 }, { "epoch": 3.2913082963833538, "grad_norm": 0.010707682929933071, "learning_rate": 8.768106864160016e-07, "loss": 0.0071, "step": 308050 }, { "epoch": 3.2914151396976337, "grad_norm": 0.0040498534217476845, "learning_rate": 8.767996428511199e-07, "loss": 0.0092, "step": 308060 }, { "epoch": 3.291521983011913, "grad_norm": 0.0019864372443407774, "learning_rate": 8.767885988608028e-07, "loss": 0.0064, "step": 308070 }, { "epoch": 3.2916288263261926, "grad_norm": 0.16701123118400574, "learning_rate": 8.767775544450631e-07, "loss": 0.0091, "step": 308080 }, { "epoch": 3.291735669640472, "grad_norm": 0.6627028584480286, "learning_rate": 8.76766509603913e-07, "loss": 0.0194, "step": 308090 }, { "epoch": 3.291842512954752, "grad_norm": 0.031039517372846603, "learning_rate": 8.767554643373651e-07, "loss": 0.0127, "step": 308100 }, { "epoch": 3.2919493562690314, "grad_norm": 0.007481019478291273, "learning_rate": 8.767444186454318e-07, "loss": 0.0079, "step": 308110 }, { "epoch": 3.2920561995833113, "grad_norm": 0.005374441854655743, "learning_rate": 8.767333725281257e-07, "loss": 0.0165, "step": 308120 }, { "epoch": 3.2921630428975908, "grad_norm": 6.004302501678467, "learning_rate": 8.767223259854592e-07, "loss": 0.0147, "step": 308130 }, { "epoch": 3.2922698862118702, "grad_norm": 0.02416938543319702, "learning_rate": 8.767112790174447e-07, "loss": 0.0021, "step": 308140 }, { "epoch": 3.2923767295261497, "grad_norm": 0.03010508045554161, "learning_rate": 8.767002316240948e-07, "loss": 0.0127, "step": 308150 }, { "epoch": 3.2924835728404296, "grad_norm": 1.3914570808410645, "learning_rate": 8.766891838054218e-07, "loss": 0.0135, "step": 308160 }, { "epoch": 3.292590416154709, "grad_norm": 7.25799036026001, "learning_rate": 8.766781355614383e-07, "loss": 0.0193, "step": 308170 }, { "epoch": 3.292697259468989, "grad_norm": 4.448193073272705, "learning_rate": 8.766670868921569e-07, "loss": 0.0299, "step": 308180 }, { "epoch": 3.2928041027832684, "grad_norm": 0.02333345077931881, "learning_rate": 8.766560377975898e-07, "loss": 0.0567, "step": 308190 }, { "epoch": 3.292910946097548, "grad_norm": 0.04564771428704262, "learning_rate": 8.766449882777496e-07, "loss": 0.0222, "step": 308200 }, { "epoch": 3.2930177894118273, "grad_norm": 1.4811742305755615, "learning_rate": 8.766339383326489e-07, "loss": 0.0519, "step": 308210 }, { "epoch": 3.2931246327261072, "grad_norm": 1.6074925661087036, "learning_rate": 8.766228879622999e-07, "loss": 0.0063, "step": 308220 }, { "epoch": 3.2932314760403867, "grad_norm": 0.4796830117702484, "learning_rate": 8.766118371667153e-07, "loss": 0.0326, "step": 308230 }, { "epoch": 3.2933383193546666, "grad_norm": 2.359776496887207, "learning_rate": 8.766007859459075e-07, "loss": 0.0315, "step": 308240 }, { "epoch": 3.293445162668946, "grad_norm": 0.027978036552667618, "learning_rate": 8.765897342998892e-07, "loss": 0.018, "step": 308250 }, { "epoch": 3.2935520059832255, "grad_norm": 0.14173103868961334, "learning_rate": 8.765786822286723e-07, "loss": 0.0038, "step": 308260 }, { "epoch": 3.293658849297505, "grad_norm": 0.18333643674850464, "learning_rate": 8.765676297322698e-07, "loss": 0.0112, "step": 308270 }, { "epoch": 3.293765692611785, "grad_norm": 0.01875016652047634, "learning_rate": 8.765565768106942e-07, "loss": 0.0048, "step": 308280 }, { "epoch": 3.2938725359260643, "grad_norm": 0.4348808228969574, "learning_rate": 8.765455234639576e-07, "loss": 0.0375, "step": 308290 }, { "epoch": 3.2939793792403442, "grad_norm": 0.04457710683345795, "learning_rate": 8.765344696920727e-07, "loss": 0.005, "step": 308300 }, { "epoch": 3.2940862225546237, "grad_norm": 0.40957534313201904, "learning_rate": 8.76523415495052e-07, "loss": 0.0079, "step": 308310 }, { "epoch": 3.294193065868903, "grad_norm": 0.9535648822784424, "learning_rate": 8.765123608729079e-07, "loss": 0.0166, "step": 308320 }, { "epoch": 3.294299909183183, "grad_norm": 1.1917903423309326, "learning_rate": 8.765013058256531e-07, "loss": 0.0039, "step": 308330 }, { "epoch": 3.2944067524974625, "grad_norm": 2.629650115966797, "learning_rate": 8.764902503532998e-07, "loss": 0.0026, "step": 308340 }, { "epoch": 3.294513595811742, "grad_norm": 0.012975557707250118, "learning_rate": 8.764791944558605e-07, "loss": 0.0024, "step": 308350 }, { "epoch": 3.294620439126022, "grad_norm": 0.8437017798423767, "learning_rate": 8.764681381333479e-07, "loss": 0.0012, "step": 308360 }, { "epoch": 3.2947272824403013, "grad_norm": 0.495449960231781, "learning_rate": 8.764570813857743e-07, "loss": 0.0036, "step": 308370 }, { "epoch": 3.294834125754581, "grad_norm": 4.61877965927124, "learning_rate": 8.764460242131522e-07, "loss": 0.0248, "step": 308380 }, { "epoch": 3.2949409690688607, "grad_norm": 1.870786428451538, "learning_rate": 8.764349666154942e-07, "loss": 0.0099, "step": 308390 }, { "epoch": 3.29504781238314, "grad_norm": 5.306987285614014, "learning_rate": 8.764239085928126e-07, "loss": 0.0158, "step": 308400 }, { "epoch": 3.2951546556974196, "grad_norm": 0.015618103556334972, "learning_rate": 8.764128501451203e-07, "loss": 0.0475, "step": 308410 }, { "epoch": 3.2952614990116995, "grad_norm": 6.207639217376709, "learning_rate": 8.764017912724292e-07, "loss": 0.0248, "step": 308420 }, { "epoch": 3.295368342325979, "grad_norm": 8.575274467468262, "learning_rate": 8.763907319747521e-07, "loss": 0.0086, "step": 308430 }, { "epoch": 3.2954751856402584, "grad_norm": 0.942266047000885, "learning_rate": 8.763796722521015e-07, "loss": 0.0329, "step": 308440 }, { "epoch": 3.2955820289545383, "grad_norm": 0.07120250165462494, "learning_rate": 8.763686121044899e-07, "loss": 0.0075, "step": 308450 }, { "epoch": 3.295688872268818, "grad_norm": 0.02755412831902504, "learning_rate": 8.763575515319297e-07, "loss": 0.048, "step": 308460 }, { "epoch": 3.2957957155830973, "grad_norm": 0.017547262832522392, "learning_rate": 8.763464905344333e-07, "loss": 0.0299, "step": 308470 }, { "epoch": 3.295902558897377, "grad_norm": 0.005048966966569424, "learning_rate": 8.763354291120135e-07, "loss": 0.008, "step": 308480 }, { "epoch": 3.2960094022116566, "grad_norm": 0.03209132328629494, "learning_rate": 8.763243672646825e-07, "loss": 0.007, "step": 308490 }, { "epoch": 3.296116245525936, "grad_norm": 0.15700122714042664, "learning_rate": 8.763133049924529e-07, "loss": 0.0079, "step": 308500 }, { "epoch": 3.296223088840216, "grad_norm": 9.74370002746582, "learning_rate": 8.763022422953373e-07, "loss": 0.0185, "step": 308510 }, { "epoch": 3.2963299321544954, "grad_norm": 3.822391986846924, "learning_rate": 8.762911791733478e-07, "loss": 0.0298, "step": 308520 }, { "epoch": 3.296436775468775, "grad_norm": 3.279027223587036, "learning_rate": 8.762801156264973e-07, "loss": 0.0124, "step": 308530 }, { "epoch": 3.296543618783055, "grad_norm": 0.018694551661610603, "learning_rate": 8.762690516547981e-07, "loss": 0.0118, "step": 308540 }, { "epoch": 3.2966504620973343, "grad_norm": 0.017098909243941307, "learning_rate": 8.762579872582627e-07, "loss": 0.0353, "step": 308550 }, { "epoch": 3.2967573054116137, "grad_norm": 1.5058413743972778, "learning_rate": 8.762469224369038e-07, "loss": 0.0449, "step": 308560 }, { "epoch": 3.2968641487258936, "grad_norm": 11.311259269714355, "learning_rate": 8.762358571907335e-07, "loss": 0.0222, "step": 308570 }, { "epoch": 3.296970992040173, "grad_norm": 0.003507691901177168, "learning_rate": 8.762247915197648e-07, "loss": 0.0061, "step": 308580 }, { "epoch": 3.2970778353544525, "grad_norm": 7.787378787994385, "learning_rate": 8.762137254240099e-07, "loss": 0.0118, "step": 308590 }, { "epoch": 3.2971846786687324, "grad_norm": 0.11688491702079773, "learning_rate": 8.76202658903481e-07, "loss": 0.0092, "step": 308600 }, { "epoch": 3.297291521983012, "grad_norm": 0.013909179717302322, "learning_rate": 8.761915919581913e-07, "loss": 0.0137, "step": 308610 }, { "epoch": 3.2973983652972914, "grad_norm": 0.12675504386425018, "learning_rate": 8.761805245881526e-07, "loss": 0.0368, "step": 308620 }, { "epoch": 3.2975052086115713, "grad_norm": 0.4025210440158844, "learning_rate": 8.761694567933777e-07, "loss": 0.014, "step": 308630 }, { "epoch": 3.2976120519258507, "grad_norm": 18.810638427734375, "learning_rate": 8.761583885738792e-07, "loss": 0.0364, "step": 308640 }, { "epoch": 3.29771889524013, "grad_norm": 4.446909427642822, "learning_rate": 8.761473199296696e-07, "loss": 0.0057, "step": 308650 }, { "epoch": 3.29782573855441, "grad_norm": 1.840042233467102, "learning_rate": 8.761362508607611e-07, "loss": 0.0309, "step": 308660 }, { "epoch": 3.2979325818686895, "grad_norm": 0.04743586480617523, "learning_rate": 8.761251813671664e-07, "loss": 0.0181, "step": 308670 }, { "epoch": 3.298039425182969, "grad_norm": 0.015894804149866104, "learning_rate": 8.761141114488981e-07, "loss": 0.0154, "step": 308680 }, { "epoch": 3.298146268497249, "grad_norm": 0.1226605698466301, "learning_rate": 8.761030411059685e-07, "loss": 0.0363, "step": 308690 }, { "epoch": 3.2982531118115284, "grad_norm": 0.7888857126235962, "learning_rate": 8.760919703383902e-07, "loss": 0.0052, "step": 308700 }, { "epoch": 3.298359955125808, "grad_norm": 0.0163692869246006, "learning_rate": 8.760808991461757e-07, "loss": 0.0095, "step": 308710 }, { "epoch": 3.2984667984400877, "grad_norm": 0.006156838499009609, "learning_rate": 8.760698275293375e-07, "loss": 0.0031, "step": 308720 }, { "epoch": 3.298573641754367, "grad_norm": 0.007645290344953537, "learning_rate": 8.76058755487888e-07, "loss": 0.0101, "step": 308730 }, { "epoch": 3.2986804850686466, "grad_norm": 3.168701171875, "learning_rate": 8.760476830218397e-07, "loss": 0.0074, "step": 308740 }, { "epoch": 3.2987873283829265, "grad_norm": 0.0035300918389111757, "learning_rate": 8.760366101312053e-07, "loss": 0.0095, "step": 308750 }, { "epoch": 3.298894171697206, "grad_norm": 0.05019085481762886, "learning_rate": 8.760255368159972e-07, "loss": 0.0148, "step": 308760 }, { "epoch": 3.299001015011486, "grad_norm": 0.22817282378673553, "learning_rate": 8.76014463076228e-07, "loss": 0.0017, "step": 308770 }, { "epoch": 3.2991078583257654, "grad_norm": 0.0015572425909340382, "learning_rate": 8.760033889119099e-07, "loss": 0.0178, "step": 308780 }, { "epoch": 3.299214701640045, "grad_norm": 0.05346059426665306, "learning_rate": 8.759923143230556e-07, "loss": 0.0313, "step": 308790 }, { "epoch": 3.2993215449543243, "grad_norm": 0.009777841158211231, "learning_rate": 8.759812393096777e-07, "loss": 0.0292, "step": 308800 }, { "epoch": 3.299428388268604, "grad_norm": 0.4908684492111206, "learning_rate": 8.759701638717887e-07, "loss": 0.0108, "step": 308810 }, { "epoch": 3.2995352315828836, "grad_norm": 0.05705377832055092, "learning_rate": 8.759590880094008e-07, "loss": 0.0081, "step": 308820 }, { "epoch": 3.2996420748971635, "grad_norm": 4.640321254730225, "learning_rate": 8.759480117225269e-07, "loss": 0.0035, "step": 308830 }, { "epoch": 3.299748918211443, "grad_norm": 0.7701987028121948, "learning_rate": 8.759369350111791e-07, "loss": 0.0104, "step": 308840 }, { "epoch": 3.2998557615257225, "grad_norm": 6.436017990112305, "learning_rate": 8.759258578753702e-07, "loss": 0.0198, "step": 308850 }, { "epoch": 3.299962604840002, "grad_norm": 0.6360578536987305, "learning_rate": 8.759147803151127e-07, "loss": 0.0148, "step": 308860 }, { "epoch": 3.300069448154282, "grad_norm": 0.010004386305809021, "learning_rate": 8.75903702330419e-07, "loss": 0.0167, "step": 308870 }, { "epoch": 3.3001762914685613, "grad_norm": 2.7675623893737793, "learning_rate": 8.758926239213017e-07, "loss": 0.0117, "step": 308880 }, { "epoch": 3.300283134782841, "grad_norm": 0.5314527750015259, "learning_rate": 8.758815450877733e-07, "loss": 0.0273, "step": 308890 }, { "epoch": 3.3003899780971206, "grad_norm": 0.0008825500262901187, "learning_rate": 8.758704658298462e-07, "loss": 0.0421, "step": 308900 }, { "epoch": 3.3004968214114, "grad_norm": 0.015904732048511505, "learning_rate": 8.758593861475329e-07, "loss": 0.0064, "step": 308910 }, { "epoch": 3.3006036647256796, "grad_norm": 3.4375038146972656, "learning_rate": 8.75848306040846e-07, "loss": 0.0493, "step": 308920 }, { "epoch": 3.3007105080399595, "grad_norm": 0.36768415570259094, "learning_rate": 8.758372255097981e-07, "loss": 0.0313, "step": 308930 }, { "epoch": 3.300817351354239, "grad_norm": 0.23131422698497772, "learning_rate": 8.758261445544015e-07, "loss": 0.0031, "step": 308940 }, { "epoch": 3.300924194668519, "grad_norm": 0.018907275050878525, "learning_rate": 8.758150631746689e-07, "loss": 0.0098, "step": 308950 }, { "epoch": 3.3010310379827983, "grad_norm": 0.6977236866950989, "learning_rate": 8.758039813706127e-07, "loss": 0.0087, "step": 308960 }, { "epoch": 3.3011378812970777, "grad_norm": 0.41628989577293396, "learning_rate": 8.757928991422454e-07, "loss": 0.0084, "step": 308970 }, { "epoch": 3.301244724611357, "grad_norm": 0.9394819140434265, "learning_rate": 8.757818164895796e-07, "loss": 0.0197, "step": 308980 }, { "epoch": 3.301351567925637, "grad_norm": 1.461273431777954, "learning_rate": 8.757707334126278e-07, "loss": 0.008, "step": 308990 }, { "epoch": 3.3014584112399166, "grad_norm": 0.00823654979467392, "learning_rate": 8.757596499114025e-07, "loss": 0.0116, "step": 309000 }, { "epoch": 3.3015652545541965, "grad_norm": 0.29963213205337524, "learning_rate": 8.757485659859161e-07, "loss": 0.0048, "step": 309010 }, { "epoch": 3.301672097868476, "grad_norm": 4.752403736114502, "learning_rate": 8.757374816361813e-07, "loss": 0.0173, "step": 309020 }, { "epoch": 3.3017789411827554, "grad_norm": 5.980899810791016, "learning_rate": 8.757263968622104e-07, "loss": 0.0269, "step": 309030 }, { "epoch": 3.3018857844970353, "grad_norm": 0.03708808124065399, "learning_rate": 8.757153116640161e-07, "loss": 0.0307, "step": 309040 }, { "epoch": 3.3019926278113148, "grad_norm": 35.27369689941406, "learning_rate": 8.757042260416109e-07, "loss": 0.0536, "step": 309050 }, { "epoch": 3.302099471125594, "grad_norm": 0.0008588512428104877, "learning_rate": 8.756931399950073e-07, "loss": 0.0089, "step": 309060 }, { "epoch": 3.302206314439874, "grad_norm": 0.0020455592311918736, "learning_rate": 8.756820535242177e-07, "loss": 0.0163, "step": 309070 }, { "epoch": 3.3023131577541536, "grad_norm": 3.3407795429229736, "learning_rate": 8.756709666292548e-07, "loss": 0.0187, "step": 309080 }, { "epoch": 3.302420001068433, "grad_norm": 0.37873053550720215, "learning_rate": 8.756598793101309e-07, "loss": 0.0155, "step": 309090 }, { "epoch": 3.302526844382713, "grad_norm": 0.07653240859508514, "learning_rate": 8.756487915668587e-07, "loss": 0.0257, "step": 309100 }, { "epoch": 3.3026336876969924, "grad_norm": 0.013174410909414291, "learning_rate": 8.756377033994508e-07, "loss": 0.0142, "step": 309110 }, { "epoch": 3.302740531011272, "grad_norm": 0.008453162387013435, "learning_rate": 8.756266148079194e-07, "loss": 0.0072, "step": 309120 }, { "epoch": 3.3028473743255518, "grad_norm": 0.011867673136293888, "learning_rate": 8.756155257922773e-07, "loss": 0.0321, "step": 309130 }, { "epoch": 3.302954217639831, "grad_norm": 0.07439319789409637, "learning_rate": 8.756044363525367e-07, "loss": 0.0062, "step": 309140 }, { "epoch": 3.3030610609541107, "grad_norm": 0.7004995942115784, "learning_rate": 8.755933464887106e-07, "loss": 0.0202, "step": 309150 }, { "epoch": 3.3031679042683906, "grad_norm": 0.02172255516052246, "learning_rate": 8.755822562008112e-07, "loss": 0.0091, "step": 309160 }, { "epoch": 3.30327474758267, "grad_norm": 0.05110081657767296, "learning_rate": 8.755711654888511e-07, "loss": 0.0026, "step": 309170 }, { "epoch": 3.3033815908969495, "grad_norm": 0.1635245680809021, "learning_rate": 8.755600743528428e-07, "loss": 0.0319, "step": 309180 }, { "epoch": 3.3034884342112294, "grad_norm": 0.7961272597312927, "learning_rate": 8.755489827927988e-07, "loss": 0.0237, "step": 309190 }, { "epoch": 3.303595277525509, "grad_norm": 0.14100134372711182, "learning_rate": 8.755378908087316e-07, "loss": 0.0101, "step": 309200 }, { "epoch": 3.3037021208397883, "grad_norm": 4.783667087554932, "learning_rate": 8.75526798400654e-07, "loss": 0.0258, "step": 309210 }, { "epoch": 3.303808964154068, "grad_norm": 1.952463150024414, "learning_rate": 8.755157055685781e-07, "loss": 0.0274, "step": 309220 }, { "epoch": 3.3039158074683477, "grad_norm": 0.4530172348022461, "learning_rate": 8.755046123125167e-07, "loss": 0.0073, "step": 309230 }, { "epoch": 3.304022650782627, "grad_norm": 6.007678985595703, "learning_rate": 8.754935186324823e-07, "loss": 0.0093, "step": 309240 }, { "epoch": 3.304129494096907, "grad_norm": 1.2871432304382324, "learning_rate": 8.754824245284873e-07, "loss": 0.0026, "step": 309250 }, { "epoch": 3.3042363374111865, "grad_norm": 0.002845475450158119, "learning_rate": 8.754713300005443e-07, "loss": 0.0014, "step": 309260 }, { "epoch": 3.304343180725466, "grad_norm": 5.622666358947754, "learning_rate": 8.754602350486657e-07, "loss": 0.0095, "step": 309270 }, { "epoch": 3.304450024039746, "grad_norm": 21.763402938842773, "learning_rate": 8.754491396728645e-07, "loss": 0.0297, "step": 309280 }, { "epoch": 3.3045568673540253, "grad_norm": 5.977415084838867, "learning_rate": 8.754380438731525e-07, "loss": 0.0219, "step": 309290 }, { "epoch": 3.3046637106683048, "grad_norm": 0.4482133686542511, "learning_rate": 8.754269476495428e-07, "loss": 0.0171, "step": 309300 }, { "epoch": 3.3047705539825847, "grad_norm": 0.5808486938476562, "learning_rate": 8.754158510020478e-07, "loss": 0.0084, "step": 309310 }, { "epoch": 3.304877397296864, "grad_norm": 0.9831318259239197, "learning_rate": 8.754047539306799e-07, "loss": 0.0203, "step": 309320 }, { "epoch": 3.3049842406111436, "grad_norm": 0.1024012491106987, "learning_rate": 8.753936564354517e-07, "loss": 0.0105, "step": 309330 }, { "epoch": 3.3050910839254235, "grad_norm": 10.357132911682129, "learning_rate": 8.753825585163758e-07, "loss": 0.0642, "step": 309340 }, { "epoch": 3.305197927239703, "grad_norm": 9.670432090759277, "learning_rate": 8.753714601734646e-07, "loss": 0.0141, "step": 309350 }, { "epoch": 3.3053047705539824, "grad_norm": 0.01690671034157276, "learning_rate": 8.753603614067306e-07, "loss": 0.0155, "step": 309360 }, { "epoch": 3.3054116138682623, "grad_norm": 1.0027917623519897, "learning_rate": 8.753492622161866e-07, "loss": 0.0143, "step": 309370 }, { "epoch": 3.305518457182542, "grad_norm": 1.025325894355774, "learning_rate": 8.753381626018449e-07, "loss": 0.024, "step": 309380 }, { "epoch": 3.3056253004968212, "grad_norm": 3.2213873863220215, "learning_rate": 8.75327062563718e-07, "loss": 0.0119, "step": 309390 }, { "epoch": 3.305732143811101, "grad_norm": 3.338256359100342, "learning_rate": 8.753159621018185e-07, "loss": 0.0313, "step": 309400 }, { "epoch": 3.3058389871253806, "grad_norm": 0.2903907001018524, "learning_rate": 8.753048612161591e-07, "loss": 0.0207, "step": 309410 }, { "epoch": 3.30594583043966, "grad_norm": 0.01398796308785677, "learning_rate": 8.752937599067522e-07, "loss": 0.0091, "step": 309420 }, { "epoch": 3.30605267375394, "grad_norm": 0.15650320053100586, "learning_rate": 8.752826581736101e-07, "loss": 0.0042, "step": 309430 }, { "epoch": 3.3061595170682194, "grad_norm": 0.018317900598049164, "learning_rate": 8.752715560167457e-07, "loss": 0.0119, "step": 309440 }, { "epoch": 3.306266360382499, "grad_norm": 3.237419843673706, "learning_rate": 8.752604534361714e-07, "loss": 0.0499, "step": 309450 }, { "epoch": 3.306373203696779, "grad_norm": 0.005962562281638384, "learning_rate": 8.752493504318997e-07, "loss": 0.0164, "step": 309460 }, { "epoch": 3.3064800470110582, "grad_norm": 0.01788727380335331, "learning_rate": 8.752382470039432e-07, "loss": 0.0007, "step": 309470 }, { "epoch": 3.3065868903253377, "grad_norm": 6.511073589324951, "learning_rate": 8.752271431523144e-07, "loss": 0.0046, "step": 309480 }, { "epoch": 3.3066937336396176, "grad_norm": 0.059909429401159286, "learning_rate": 8.752160388770258e-07, "loss": 0.0086, "step": 309490 }, { "epoch": 3.306800576953897, "grad_norm": 1.53544282913208, "learning_rate": 8.752049341780898e-07, "loss": 0.0106, "step": 309500 }, { "epoch": 3.3069074202681765, "grad_norm": 0.007627377286553383, "learning_rate": 8.751938290555194e-07, "loss": 0.0135, "step": 309510 }, { "epoch": 3.3070142635824564, "grad_norm": 0.013477143831551075, "learning_rate": 8.751827235093267e-07, "loss": 0.0146, "step": 309520 }, { "epoch": 3.307121106896736, "grad_norm": 0.01411193236708641, "learning_rate": 8.751716175395245e-07, "loss": 0.027, "step": 309530 }, { "epoch": 3.307227950211016, "grad_norm": 6.597731590270996, "learning_rate": 8.75160511146125e-07, "loss": 0.012, "step": 309540 }, { "epoch": 3.3073347935252952, "grad_norm": 0.005218625999987125, "learning_rate": 8.751494043291412e-07, "loss": 0.0156, "step": 309550 }, { "epoch": 3.3074416368395747, "grad_norm": 1.180428147315979, "learning_rate": 8.751382970885855e-07, "loss": 0.0309, "step": 309560 }, { "epoch": 3.307548480153854, "grad_norm": 0.5980095863342285, "learning_rate": 8.751271894244701e-07, "loss": 0.0194, "step": 309570 }, { "epoch": 3.307655323468134, "grad_norm": 0.7043814063072205, "learning_rate": 8.751160813368078e-07, "loss": 0.0185, "step": 309580 }, { "epoch": 3.3077621667824135, "grad_norm": 0.9538541436195374, "learning_rate": 8.751049728256112e-07, "loss": 0.0263, "step": 309590 }, { "epoch": 3.3078690100966934, "grad_norm": 1.3777469396591187, "learning_rate": 8.750938638908928e-07, "loss": 0.0054, "step": 309600 }, { "epoch": 3.307975853410973, "grad_norm": 0.004061752464622259, "learning_rate": 8.750827545326651e-07, "loss": 0.0085, "step": 309610 }, { "epoch": 3.3080826967252523, "grad_norm": 0.015163708478212357, "learning_rate": 8.750716447509406e-07, "loss": 0.0088, "step": 309620 }, { "epoch": 3.308189540039532, "grad_norm": 1.3417917490005493, "learning_rate": 8.75060534545732e-07, "loss": 0.0071, "step": 309630 }, { "epoch": 3.3082963833538117, "grad_norm": 0.029639916494488716, "learning_rate": 8.750494239170517e-07, "loss": 0.0157, "step": 309640 }, { "epoch": 3.308403226668091, "grad_norm": 0.013665789738297462, "learning_rate": 8.750383128649124e-07, "loss": 0.0395, "step": 309650 }, { "epoch": 3.308510069982371, "grad_norm": 0.004284498281776905, "learning_rate": 8.750272013893263e-07, "loss": 0.0071, "step": 309660 }, { "epoch": 3.3086169132966505, "grad_norm": 0.018380599096417427, "learning_rate": 8.750160894903064e-07, "loss": 0.0002, "step": 309670 }, { "epoch": 3.30872375661093, "grad_norm": 0.01750939153134823, "learning_rate": 8.750049771678649e-07, "loss": 0.0197, "step": 309680 }, { "epoch": 3.3088305999252094, "grad_norm": 0.3945164680480957, "learning_rate": 8.749938644220146e-07, "loss": 0.0085, "step": 309690 }, { "epoch": 3.3089374432394894, "grad_norm": 0.023401202633976936, "learning_rate": 8.749827512527679e-07, "loss": 0.0361, "step": 309700 }, { "epoch": 3.309044286553769, "grad_norm": 2.65608549118042, "learning_rate": 8.749716376601373e-07, "loss": 0.0162, "step": 309710 }, { "epoch": 3.3091511298680487, "grad_norm": 7.440706253051758, "learning_rate": 8.749605236441356e-07, "loss": 0.0316, "step": 309720 }, { "epoch": 3.309257973182328, "grad_norm": 0.07779280841350555, "learning_rate": 8.749494092047749e-07, "loss": 0.0382, "step": 309730 }, { "epoch": 3.3093648164966076, "grad_norm": 30.30609893798828, "learning_rate": 8.749382943420681e-07, "loss": 0.0067, "step": 309740 }, { "epoch": 3.309471659810887, "grad_norm": 7.330585956573486, "learning_rate": 8.749271790560277e-07, "loss": 0.0221, "step": 309750 }, { "epoch": 3.309578503125167, "grad_norm": 0.010268780402839184, "learning_rate": 8.749160633466664e-07, "loss": 0.0036, "step": 309760 }, { "epoch": 3.3096853464394465, "grad_norm": 0.006319454871118069, "learning_rate": 8.749049472139962e-07, "loss": 0.0054, "step": 309770 }, { "epoch": 3.3097921897537264, "grad_norm": 4.7869696617126465, "learning_rate": 8.748938306580302e-07, "loss": 0.0209, "step": 309780 }, { "epoch": 3.309899033068006, "grad_norm": 0.011592788621783257, "learning_rate": 8.748827136787808e-07, "loss": 0.0027, "step": 309790 }, { "epoch": 3.3100058763822853, "grad_norm": 0.39712876081466675, "learning_rate": 8.748715962762605e-07, "loss": 0.0118, "step": 309800 }, { "epoch": 3.310112719696565, "grad_norm": 1.1649320125579834, "learning_rate": 8.74860478450482e-07, "loss": 0.0303, "step": 309810 }, { "epoch": 3.3102195630108446, "grad_norm": 0.012883322313427925, "learning_rate": 8.748493602014575e-07, "loss": 0.0037, "step": 309820 }, { "epoch": 3.310326406325124, "grad_norm": 6.157339096069336, "learning_rate": 8.748382415291999e-07, "loss": 0.0245, "step": 309830 }, { "epoch": 3.310433249639404, "grad_norm": 0.06800708174705505, "learning_rate": 8.748271224337217e-07, "loss": 0.0083, "step": 309840 }, { "epoch": 3.3105400929536835, "grad_norm": 0.2074168622493744, "learning_rate": 8.748160029150353e-07, "loss": 0.0031, "step": 309850 }, { "epoch": 3.310646936267963, "grad_norm": 0.7620933651924133, "learning_rate": 8.748048829731534e-07, "loss": 0.0091, "step": 309860 }, { "epoch": 3.310753779582243, "grad_norm": 4.729238033294678, "learning_rate": 8.747937626080883e-07, "loss": 0.0143, "step": 309870 }, { "epoch": 3.3108606228965223, "grad_norm": 0.0017424405086785555, "learning_rate": 8.74782641819853e-07, "loss": 0.0037, "step": 309880 }, { "epoch": 3.3109674662108017, "grad_norm": 0.010348238050937653, "learning_rate": 8.747715206084599e-07, "loss": 0.0001, "step": 309890 }, { "epoch": 3.3110743095250816, "grad_norm": 1.5172258615493774, "learning_rate": 8.747603989739211e-07, "loss": 0.0134, "step": 309900 }, { "epoch": 3.311181152839361, "grad_norm": 4.980621814727783, "learning_rate": 8.747492769162498e-07, "loss": 0.0129, "step": 309910 }, { "epoch": 3.3112879961536406, "grad_norm": 2.3863463401794434, "learning_rate": 8.747381544354581e-07, "loss": 0.0157, "step": 309920 }, { "epoch": 3.3113948394679205, "grad_norm": 0.1790231615304947, "learning_rate": 8.747270315315589e-07, "loss": 0.0144, "step": 309930 }, { "epoch": 3.3115016827822, "grad_norm": 2.6709561347961426, "learning_rate": 8.747159082045646e-07, "loss": 0.0036, "step": 309940 }, { "epoch": 3.3116085260964794, "grad_norm": 0.004922403488308191, "learning_rate": 8.747047844544877e-07, "loss": 0.0056, "step": 309950 }, { "epoch": 3.3117153694107593, "grad_norm": 0.0016029620310291648, "learning_rate": 8.746936602813409e-07, "loss": 0.0052, "step": 309960 }, { "epoch": 3.3118222127250387, "grad_norm": 2.8755881786346436, "learning_rate": 8.746825356851365e-07, "loss": 0.0201, "step": 309970 }, { "epoch": 3.311929056039318, "grad_norm": 0.036824245005846024, "learning_rate": 8.746714106658873e-07, "loss": 0.0061, "step": 309980 }, { "epoch": 3.312035899353598, "grad_norm": 5.335360050201416, "learning_rate": 8.746602852236059e-07, "loss": 0.0156, "step": 309990 }, { "epoch": 3.3121427426678776, "grad_norm": 3.7445082664489746, "learning_rate": 8.746491593583048e-07, "loss": 0.0291, "step": 310000 }, { "epoch": 3.312249585982157, "grad_norm": 0.007387548219412565, "learning_rate": 8.746380330699963e-07, "loss": 0.0157, "step": 310010 }, { "epoch": 3.312356429296437, "grad_norm": 1.9121524095535278, "learning_rate": 8.746269063586933e-07, "loss": 0.0161, "step": 310020 }, { "epoch": 3.3124632726107164, "grad_norm": 3.5021889209747314, "learning_rate": 8.746157792244082e-07, "loss": 0.0029, "step": 310030 }, { "epoch": 3.312570115924996, "grad_norm": 3.3615450859069824, "learning_rate": 8.746046516671537e-07, "loss": 0.0033, "step": 310040 }, { "epoch": 3.3126769592392757, "grad_norm": 0.010248972102999687, "learning_rate": 8.745935236869423e-07, "loss": 0.0114, "step": 310050 }, { "epoch": 3.312783802553555, "grad_norm": 4.173837661743164, "learning_rate": 8.745823952837865e-07, "loss": 0.0201, "step": 310060 }, { "epoch": 3.3128906458678347, "grad_norm": 0.019894681870937347, "learning_rate": 8.745712664576988e-07, "loss": 0.007, "step": 310070 }, { "epoch": 3.3129974891821146, "grad_norm": 0.6213497519493103, "learning_rate": 8.745601372086921e-07, "loss": 0.0098, "step": 310080 }, { "epoch": 3.313104332496394, "grad_norm": 3.7680251598358154, "learning_rate": 8.745490075367785e-07, "loss": 0.0219, "step": 310090 }, { "epoch": 3.3132111758106735, "grad_norm": 0.16575540602207184, "learning_rate": 8.745378774419709e-07, "loss": 0.0017, "step": 310100 }, { "epoch": 3.3133180191249534, "grad_norm": 0.01253712922334671, "learning_rate": 8.745267469242818e-07, "loss": 0.0188, "step": 310110 }, { "epoch": 3.313424862439233, "grad_norm": 0.018173212185502052, "learning_rate": 8.745156159837237e-07, "loss": 0.0067, "step": 310120 }, { "epoch": 3.3135317057535123, "grad_norm": 0.5287365913391113, "learning_rate": 8.745044846203091e-07, "loss": 0.0098, "step": 310130 }, { "epoch": 3.313638549067792, "grad_norm": 1.4199957847595215, "learning_rate": 8.744933528340508e-07, "loss": 0.0009, "step": 310140 }, { "epoch": 3.3137453923820717, "grad_norm": 1.069738745689392, "learning_rate": 8.744822206249613e-07, "loss": 0.019, "step": 310150 }, { "epoch": 3.313852235696351, "grad_norm": 0.001077249413356185, "learning_rate": 8.744710879930531e-07, "loss": 0.0453, "step": 310160 }, { "epoch": 3.313959079010631, "grad_norm": 0.02466689608991146, "learning_rate": 8.744599549383386e-07, "loss": 0.0095, "step": 310170 }, { "epoch": 3.3140659223249105, "grad_norm": 3.965738534927368, "learning_rate": 8.744488214608306e-07, "loss": 0.0274, "step": 310180 }, { "epoch": 3.31417276563919, "grad_norm": 0.5280628800392151, "learning_rate": 8.744376875605417e-07, "loss": 0.0069, "step": 310190 }, { "epoch": 3.31427960895347, "grad_norm": 4.3586506843566895, "learning_rate": 8.744265532374843e-07, "loss": 0.0246, "step": 310200 }, { "epoch": 3.3143864522677493, "grad_norm": 7.209039211273193, "learning_rate": 8.744154184916713e-07, "loss": 0.0024, "step": 310210 }, { "epoch": 3.3144932955820288, "grad_norm": 1.21564519405365, "learning_rate": 8.744042833231148e-07, "loss": 0.0072, "step": 310220 }, { "epoch": 3.3146001388963087, "grad_norm": 0.008786612190306187, "learning_rate": 8.743931477318276e-07, "loss": 0.019, "step": 310230 }, { "epoch": 3.314706982210588, "grad_norm": 0.41888388991355896, "learning_rate": 8.743820117178224e-07, "loss": 0.0328, "step": 310240 }, { "epoch": 3.314813825524868, "grad_norm": 0.056426674127578735, "learning_rate": 8.743708752811117e-07, "loss": 0.0137, "step": 310250 }, { "epoch": 3.3149206688391475, "grad_norm": 2.837451457977295, "learning_rate": 8.743597384217079e-07, "loss": 0.037, "step": 310260 }, { "epoch": 3.315027512153427, "grad_norm": 15.701525688171387, "learning_rate": 8.743486011396237e-07, "loss": 0.0454, "step": 310270 }, { "epoch": 3.3151343554677064, "grad_norm": 0.32810789346694946, "learning_rate": 8.743374634348717e-07, "loss": 0.0123, "step": 310280 }, { "epoch": 3.3152411987819863, "grad_norm": 0.23087897896766663, "learning_rate": 8.743263253074644e-07, "loss": 0.002, "step": 310290 }, { "epoch": 3.3153480420962658, "grad_norm": 0.630953848361969, "learning_rate": 8.743151867574145e-07, "loss": 0.0042, "step": 310300 }, { "epoch": 3.3154548854105457, "grad_norm": 0.045311689376831055, "learning_rate": 8.743040477847344e-07, "loss": 0.0051, "step": 310310 }, { "epoch": 3.315561728724825, "grad_norm": 1.0946577787399292, "learning_rate": 8.742929083894369e-07, "loss": 0.0113, "step": 310320 }, { "epoch": 3.3156685720391046, "grad_norm": 3.5632259845733643, "learning_rate": 8.742817685715345e-07, "loss": 0.0049, "step": 310330 }, { "epoch": 3.315775415353384, "grad_norm": 0.1515698879957199, "learning_rate": 8.742706283310395e-07, "loss": 0.008, "step": 310340 }, { "epoch": 3.315882258667664, "grad_norm": 2.173914670944214, "learning_rate": 8.742594876679649e-07, "loss": 0.0189, "step": 310350 }, { "epoch": 3.3159891019819434, "grad_norm": 0.12985296547412872, "learning_rate": 8.742483465823232e-07, "loss": 0.0024, "step": 310360 }, { "epoch": 3.3160959452962233, "grad_norm": 0.07234636694192886, "learning_rate": 8.742372050741267e-07, "loss": 0.0017, "step": 310370 }, { "epoch": 3.3162027886105028, "grad_norm": 0.02151496894657612, "learning_rate": 8.742260631433882e-07, "loss": 0.0199, "step": 310380 }, { "epoch": 3.3163096319247822, "grad_norm": 0.0030033730436116457, "learning_rate": 8.742149207901201e-07, "loss": 0.0088, "step": 310390 }, { "epoch": 3.3164164752390617, "grad_norm": 0.3304710388183594, "learning_rate": 8.742037780143352e-07, "loss": 0.0023, "step": 310400 }, { "epoch": 3.3165233185533416, "grad_norm": 0.0018668111879378557, "learning_rate": 8.741926348160461e-07, "loss": 0.0005, "step": 310410 }, { "epoch": 3.316630161867621, "grad_norm": 4.870622634887695, "learning_rate": 8.741814911952651e-07, "loss": 0.0095, "step": 310420 }, { "epoch": 3.316737005181901, "grad_norm": 0.02886475808918476, "learning_rate": 8.741703471520052e-07, "loss": 0.0587, "step": 310430 }, { "epoch": 3.3168438484961804, "grad_norm": 0.05809204652905464, "learning_rate": 8.741592026862785e-07, "loss": 0.0137, "step": 310440 }, { "epoch": 3.31695069181046, "grad_norm": 2.7878146171569824, "learning_rate": 8.74148057798098e-07, "loss": 0.0142, "step": 310450 }, { "epoch": 3.3170575351247393, "grad_norm": 0.0016414953861385584, "learning_rate": 8.741369124874759e-07, "loss": 0.0023, "step": 310460 }, { "epoch": 3.3171643784390192, "grad_norm": 0.05762087181210518, "learning_rate": 8.74125766754425e-07, "loss": 0.0006, "step": 310470 }, { "epoch": 3.3172712217532987, "grad_norm": 0.010882319882512093, "learning_rate": 8.741146205989581e-07, "loss": 0.0134, "step": 310480 }, { "epoch": 3.3173780650675786, "grad_norm": 9.293410301208496, "learning_rate": 8.741034740210874e-07, "loss": 0.0385, "step": 310490 }, { "epoch": 3.317484908381858, "grad_norm": 0.04423287510871887, "learning_rate": 8.740923270208257e-07, "loss": 0.0646, "step": 310500 }, { "epoch": 3.3175917516961375, "grad_norm": 0.29605036973953247, "learning_rate": 8.740811795981855e-07, "loss": 0.0139, "step": 310510 }, { "epoch": 3.3176985950104174, "grad_norm": 0.07409940659999847, "learning_rate": 8.740700317531795e-07, "loss": 0.0021, "step": 310520 }, { "epoch": 3.317805438324697, "grad_norm": 1.3712520599365234, "learning_rate": 8.7405888348582e-07, "loss": 0.0121, "step": 310530 }, { "epoch": 3.3179122816389763, "grad_norm": 0.1493159979581833, "learning_rate": 8.7404773479612e-07, "loss": 0.0005, "step": 310540 }, { "epoch": 3.3180191249532562, "grad_norm": 0.22178484499454498, "learning_rate": 8.740365856840917e-07, "loss": 0.0145, "step": 310550 }, { "epoch": 3.3181259682675357, "grad_norm": 1.3269227743148804, "learning_rate": 8.74025436149748e-07, "loss": 0.0285, "step": 310560 }, { "epoch": 3.318232811581815, "grad_norm": 0.019572505727410316, "learning_rate": 8.740142861931012e-07, "loss": 0.0108, "step": 310570 }, { "epoch": 3.318339654896095, "grad_norm": 0.005263314116746187, "learning_rate": 8.740031358141642e-07, "loss": 0.0049, "step": 310580 }, { "epoch": 3.3184464982103745, "grad_norm": 0.007666571065783501, "learning_rate": 8.739919850129494e-07, "loss": 0.0269, "step": 310590 }, { "epoch": 3.318553341524654, "grad_norm": 0.020704101771116257, "learning_rate": 8.739808337894694e-07, "loss": 0.0243, "step": 310600 }, { "epoch": 3.318660184838934, "grad_norm": 0.10653582215309143, "learning_rate": 8.739696821437368e-07, "loss": 0.0215, "step": 310610 }, { "epoch": 3.3187670281532133, "grad_norm": 0.00892520509660244, "learning_rate": 8.739585300757642e-07, "loss": 0.0055, "step": 310620 }, { "epoch": 3.318873871467493, "grad_norm": 0.19923704862594604, "learning_rate": 8.739473775855642e-07, "loss": 0.0088, "step": 310630 }, { "epoch": 3.3189807147817727, "grad_norm": 1.1387720108032227, "learning_rate": 8.739362246731494e-07, "loss": 0.0695, "step": 310640 }, { "epoch": 3.319087558096052, "grad_norm": 0.018077481538057327, "learning_rate": 8.739250713385324e-07, "loss": 0.0097, "step": 310650 }, { "epoch": 3.3191944014103316, "grad_norm": 4.904196739196777, "learning_rate": 8.739139175817258e-07, "loss": 0.0074, "step": 310660 }, { "epoch": 3.3193012447246115, "grad_norm": 0.13570353388786316, "learning_rate": 8.73902763402742e-07, "loss": 0.0194, "step": 310670 }, { "epoch": 3.319408088038891, "grad_norm": 0.02492889203131199, "learning_rate": 8.738916088015939e-07, "loss": 0.0019, "step": 310680 }, { "epoch": 3.3195149313531704, "grad_norm": 0.07000424712896347, "learning_rate": 8.73880453778294e-07, "loss": 0.0015, "step": 310690 }, { "epoch": 3.3196217746674503, "grad_norm": 0.07230625301599503, "learning_rate": 8.738692983328549e-07, "loss": 0.0046, "step": 310700 }, { "epoch": 3.31972861798173, "grad_norm": 0.6676785945892334, "learning_rate": 8.738581424652889e-07, "loss": 0.015, "step": 310710 }, { "epoch": 3.3198354612960093, "grad_norm": 2.5396506786346436, "learning_rate": 8.738469861756091e-07, "loss": 0.0039, "step": 310720 }, { "epoch": 3.319942304610289, "grad_norm": 2.2242724895477295, "learning_rate": 8.738358294638276e-07, "loss": 0.0137, "step": 310730 }, { "epoch": 3.3200491479245686, "grad_norm": 0.0011977493995800614, "learning_rate": 8.738246723299574e-07, "loss": 0.0004, "step": 310740 }, { "epoch": 3.320155991238848, "grad_norm": 1.022330641746521, "learning_rate": 8.738135147740108e-07, "loss": 0.0308, "step": 310750 }, { "epoch": 3.320262834553128, "grad_norm": 14.811846733093262, "learning_rate": 8.738023567960006e-07, "loss": 0.0253, "step": 310760 }, { "epoch": 3.3203696778674074, "grad_norm": 5.533885478973389, "learning_rate": 8.737911983959392e-07, "loss": 0.0298, "step": 310770 }, { "epoch": 3.320476521181687, "grad_norm": 0.3512280285358429, "learning_rate": 8.737800395738395e-07, "loss": 0.0136, "step": 310780 }, { "epoch": 3.320583364495967, "grad_norm": 2.1972243785858154, "learning_rate": 8.73768880329714e-07, "loss": 0.0328, "step": 310790 }, { "epoch": 3.3206902078102463, "grad_norm": 0.24657323956489563, "learning_rate": 8.737577206635751e-07, "loss": 0.0005, "step": 310800 }, { "epoch": 3.3207970511245257, "grad_norm": 5.49659538269043, "learning_rate": 8.737465605754354e-07, "loss": 0.0066, "step": 310810 }, { "epoch": 3.3209038944388056, "grad_norm": 0.022699004039168358, "learning_rate": 8.737354000653079e-07, "loss": 0.0074, "step": 310820 }, { "epoch": 3.321010737753085, "grad_norm": 3.8026247024536133, "learning_rate": 8.737242391332047e-07, "loss": 0.0092, "step": 310830 }, { "epoch": 3.3211175810673645, "grad_norm": 0.013369033113121986, "learning_rate": 8.737130777791387e-07, "loss": 0.0087, "step": 310840 }, { "epoch": 3.3212244243816444, "grad_norm": 0.08669479936361313, "learning_rate": 8.737019160031224e-07, "loss": 0.0032, "step": 310850 }, { "epoch": 3.321331267695924, "grad_norm": 0.01616005040705204, "learning_rate": 8.736907538051686e-07, "loss": 0.0357, "step": 310860 }, { "epoch": 3.3214381110102034, "grad_norm": 2.122277021408081, "learning_rate": 8.736795911852896e-07, "loss": 0.0317, "step": 310870 }, { "epoch": 3.3215449543244833, "grad_norm": 0.013623259961605072, "learning_rate": 8.736684281434981e-07, "loss": 0.0391, "step": 310880 }, { "epoch": 3.3216517976387627, "grad_norm": 4.24274206161499, "learning_rate": 8.736572646798068e-07, "loss": 0.0357, "step": 310890 }, { "epoch": 3.321758640953042, "grad_norm": 0.03936241194605827, "learning_rate": 8.736461007942282e-07, "loss": 0.0004, "step": 310900 }, { "epoch": 3.321865484267322, "grad_norm": 1.6816710233688354, "learning_rate": 8.73634936486775e-07, "loss": 0.0217, "step": 310910 }, { "epoch": 3.3219723275816015, "grad_norm": 0.24219442903995514, "learning_rate": 8.736237717574597e-07, "loss": 0.0161, "step": 310920 }, { "epoch": 3.322079170895881, "grad_norm": 3.1838879585266113, "learning_rate": 8.736126066062949e-07, "loss": 0.0058, "step": 310930 }, { "epoch": 3.322186014210161, "grad_norm": 0.020894167944788933, "learning_rate": 8.736014410332935e-07, "loss": 0.0122, "step": 310940 }, { "epoch": 3.3222928575244404, "grad_norm": 0.01808660291135311, "learning_rate": 8.735902750384679e-07, "loss": 0.0447, "step": 310950 }, { "epoch": 3.32239970083872, "grad_norm": 0.002490860875695944, "learning_rate": 8.735791086218304e-07, "loss": 0.0038, "step": 310960 }, { "epoch": 3.3225065441529997, "grad_norm": 0.010088564828038216, "learning_rate": 8.73567941783394e-07, "loss": 0.0632, "step": 310970 }, { "epoch": 3.322613387467279, "grad_norm": 0.017912862822413445, "learning_rate": 8.735567745231713e-07, "loss": 0.0195, "step": 310980 }, { "epoch": 3.3227202307815586, "grad_norm": 4.463822841644287, "learning_rate": 8.735456068411746e-07, "loss": 0.0202, "step": 310990 }, { "epoch": 3.3228270740958386, "grad_norm": 0.0033260527998209, "learning_rate": 8.735344387374168e-07, "loss": 0.0068, "step": 311000 }, { "epoch": 3.322933917410118, "grad_norm": 1.5064702033996582, "learning_rate": 8.735232702119106e-07, "loss": 0.0055, "step": 311010 }, { "epoch": 3.323040760724398, "grad_norm": 2.6297333240509033, "learning_rate": 8.735121012646682e-07, "loss": 0.0022, "step": 311020 }, { "epoch": 3.3231476040386774, "grad_norm": 0.13908492028713226, "learning_rate": 8.735009318957027e-07, "loss": 0.0013, "step": 311030 }, { "epoch": 3.323254447352957, "grad_norm": 1.020081877708435, "learning_rate": 8.734897621050264e-07, "loss": 0.0284, "step": 311040 }, { "epoch": 3.3233612906672363, "grad_norm": 1.6113907098770142, "learning_rate": 8.734785918926517e-07, "loss": 0.0043, "step": 311050 }, { "epoch": 3.323468133981516, "grad_norm": 0.9353151321411133, "learning_rate": 8.734674212585917e-07, "loss": 0.0141, "step": 311060 }, { "epoch": 3.3235749772957957, "grad_norm": 0.5264560580253601, "learning_rate": 8.734562502028588e-07, "loss": 0.0496, "step": 311070 }, { "epoch": 3.3236818206100756, "grad_norm": 3.611741065979004, "learning_rate": 8.734450787254654e-07, "loss": 0.0125, "step": 311080 }, { "epoch": 3.323788663924355, "grad_norm": 4.65770149230957, "learning_rate": 8.734339068264247e-07, "loss": 0.019, "step": 311090 }, { "epoch": 3.3238955072386345, "grad_norm": 5.17244815826416, "learning_rate": 8.734227345057486e-07, "loss": 0.0154, "step": 311100 }, { "epoch": 3.324002350552914, "grad_norm": 4.5633368492126465, "learning_rate": 8.734115617634503e-07, "loss": 0.004, "step": 311110 }, { "epoch": 3.324109193867194, "grad_norm": 2.4928159713745117, "learning_rate": 8.73400388599542e-07, "loss": 0.0024, "step": 311120 }, { "epoch": 3.3242160371814733, "grad_norm": 1.3630106449127197, "learning_rate": 8.733892150140365e-07, "loss": 0.0108, "step": 311130 }, { "epoch": 3.324322880495753, "grad_norm": 0.02765115723013878, "learning_rate": 8.733780410069465e-07, "loss": 0.009, "step": 311140 }, { "epoch": 3.3244297238100327, "grad_norm": 1.9985557794570923, "learning_rate": 8.733668665782844e-07, "loss": 0.0114, "step": 311150 }, { "epoch": 3.324536567124312, "grad_norm": 0.0024652329739183187, "learning_rate": 8.733556917280629e-07, "loss": 0.0005, "step": 311160 }, { "epoch": 3.3246434104385916, "grad_norm": 0.1923525035381317, "learning_rate": 8.733445164562947e-07, "loss": 0.0039, "step": 311170 }, { "epoch": 3.3247502537528715, "grad_norm": 4.5381245613098145, "learning_rate": 8.733333407629923e-07, "loss": 0.0198, "step": 311180 }, { "epoch": 3.324857097067151, "grad_norm": 0.0020130483899265528, "learning_rate": 8.733221646481686e-07, "loss": 0.0041, "step": 311190 }, { "epoch": 3.324963940381431, "grad_norm": 0.2785932421684265, "learning_rate": 8.733109881118358e-07, "loss": 0.0029, "step": 311200 }, { "epoch": 3.3250707836957103, "grad_norm": 0.049772948026657104, "learning_rate": 8.732998111540067e-07, "loss": 0.0042, "step": 311210 }, { "epoch": 3.3251776270099898, "grad_norm": 0.7272917628288269, "learning_rate": 8.732886337746941e-07, "loss": 0.003, "step": 311220 }, { "epoch": 3.325284470324269, "grad_norm": 0.790943443775177, "learning_rate": 8.732774559739103e-07, "loss": 0.036, "step": 311230 }, { "epoch": 3.325391313638549, "grad_norm": 0.005809273570775986, "learning_rate": 8.732662777516682e-07, "loss": 0.0205, "step": 311240 }, { "epoch": 3.3254981569528286, "grad_norm": 0.4483451843261719, "learning_rate": 8.732550991079801e-07, "loss": 0.0103, "step": 311250 }, { "epoch": 3.3256050002671085, "grad_norm": 9.348689079284668, "learning_rate": 8.732439200428591e-07, "loss": 0.0187, "step": 311260 }, { "epoch": 3.325711843581388, "grad_norm": 0.07281100749969482, "learning_rate": 8.732327405563174e-07, "loss": 0.0013, "step": 311270 }, { "epoch": 3.3258186868956674, "grad_norm": 0.00366563000716269, "learning_rate": 8.732215606483677e-07, "loss": 0.0056, "step": 311280 }, { "epoch": 3.3259255302099473, "grad_norm": 1.7071462869644165, "learning_rate": 8.732103803190228e-07, "loss": 0.0127, "step": 311290 }, { "epoch": 3.3260323735242268, "grad_norm": 0.14479970932006836, "learning_rate": 8.731991995682953e-07, "loss": 0.0153, "step": 311300 }, { "epoch": 3.326139216838506, "grad_norm": 0.00800272822380066, "learning_rate": 8.731880183961974e-07, "loss": 0.0234, "step": 311310 }, { "epoch": 3.326246060152786, "grad_norm": 0.004028222057968378, "learning_rate": 8.731768368027423e-07, "loss": 0.0063, "step": 311320 }, { "epoch": 3.3263529034670656, "grad_norm": 1.8069133758544922, "learning_rate": 8.731656547879424e-07, "loss": 0.0086, "step": 311330 }, { "epoch": 3.326459746781345, "grad_norm": 7.217584133148193, "learning_rate": 8.731544723518102e-07, "loss": 0.013, "step": 311340 }, { "epoch": 3.326566590095625, "grad_norm": 0.024754196405410767, "learning_rate": 8.731432894943587e-07, "loss": 0.0268, "step": 311350 }, { "epoch": 3.3266734334099044, "grad_norm": 0.3175034821033478, "learning_rate": 8.731321062156001e-07, "loss": 0.005, "step": 311360 }, { "epoch": 3.326780276724184, "grad_norm": 4.7200775146484375, "learning_rate": 8.731209225155472e-07, "loss": 0.0109, "step": 311370 }, { "epoch": 3.3268871200384638, "grad_norm": 0.018267259001731873, "learning_rate": 8.731097383942125e-07, "loss": 0.0074, "step": 311380 }, { "epoch": 3.3269939633527432, "grad_norm": 0.1373153030872345, "learning_rate": 8.730985538516088e-07, "loss": 0.0081, "step": 311390 }, { "epoch": 3.3271008066670227, "grad_norm": 0.002167917089536786, "learning_rate": 8.730873688877487e-07, "loss": 0.0099, "step": 311400 }, { "epoch": 3.3272076499813026, "grad_norm": 0.08851902186870575, "learning_rate": 8.730761835026448e-07, "loss": 0.0115, "step": 311410 }, { "epoch": 3.327314493295582, "grad_norm": 1.3790028095245361, "learning_rate": 8.730649976963098e-07, "loss": 0.0155, "step": 311420 }, { "epoch": 3.3274213366098615, "grad_norm": 0.5546405911445618, "learning_rate": 8.730538114687562e-07, "loss": 0.003, "step": 311430 }, { "epoch": 3.3275281799241414, "grad_norm": 0.0057988581247627735, "learning_rate": 8.730426248199967e-07, "loss": 0.018, "step": 311440 }, { "epoch": 3.327635023238421, "grad_norm": 0.03626482933759689, "learning_rate": 8.730314377500438e-07, "loss": 0.0017, "step": 311450 }, { "epoch": 3.3277418665527003, "grad_norm": 0.006239545065909624, "learning_rate": 8.730202502589105e-07, "loss": 0.0196, "step": 311460 }, { "epoch": 3.3278487098669802, "grad_norm": 0.029828587546944618, "learning_rate": 8.730090623466089e-07, "loss": 0.0051, "step": 311470 }, { "epoch": 3.3279555531812597, "grad_norm": 0.0016458843601867557, "learning_rate": 8.729978740131522e-07, "loss": 0.0072, "step": 311480 }, { "epoch": 3.328062396495539, "grad_norm": 0.003097502514719963, "learning_rate": 8.729866852585526e-07, "loss": 0.0189, "step": 311490 }, { "epoch": 3.328169239809819, "grad_norm": 0.01405293121933937, "learning_rate": 8.729754960828229e-07, "loss": 0.0199, "step": 311500 }, { "epoch": 3.3282760831240985, "grad_norm": 0.026910172775387764, "learning_rate": 8.729643064859758e-07, "loss": 0.0099, "step": 311510 }, { "epoch": 3.328382926438378, "grad_norm": 12.25808048248291, "learning_rate": 8.729531164680237e-07, "loss": 0.0073, "step": 311520 }, { "epoch": 3.328489769752658, "grad_norm": 0.003693831153213978, "learning_rate": 8.729419260289794e-07, "loss": 0.0267, "step": 311530 }, { "epoch": 3.3285966130669373, "grad_norm": 0.272542268037796, "learning_rate": 8.729307351688555e-07, "loss": 0.0182, "step": 311540 }, { "epoch": 3.328703456381217, "grad_norm": 0.26814112067222595, "learning_rate": 8.729195438876648e-07, "loss": 0.0193, "step": 311550 }, { "epoch": 3.3288102996954967, "grad_norm": 1.3240536451339722, "learning_rate": 8.729083521854197e-07, "loss": 0.0019, "step": 311560 }, { "epoch": 3.328917143009776, "grad_norm": 2.4055185317993164, "learning_rate": 8.728971600621328e-07, "loss": 0.0198, "step": 311570 }, { "epoch": 3.3290239863240556, "grad_norm": 4.601426124572754, "learning_rate": 8.72885967517817e-07, "loss": 0.0123, "step": 311580 }, { "epoch": 3.3291308296383355, "grad_norm": 0.0036460664123296738, "learning_rate": 8.728747745524849e-07, "loss": 0.0116, "step": 311590 }, { "epoch": 3.329237672952615, "grad_norm": 0.6907237768173218, "learning_rate": 8.728635811661489e-07, "loss": 0.0205, "step": 311600 }, { "epoch": 3.3293445162668944, "grad_norm": 0.7898962497711182, "learning_rate": 8.728523873588218e-07, "loss": 0.0048, "step": 311610 }, { "epoch": 3.3294513595811743, "grad_norm": 0.01151295192539692, "learning_rate": 8.728411931305163e-07, "loss": 0.0085, "step": 311620 }, { "epoch": 3.329558202895454, "grad_norm": 4.339011192321777, "learning_rate": 8.728299984812448e-07, "loss": 0.0119, "step": 311630 }, { "epoch": 3.3296650462097332, "grad_norm": 0.028153274208307266, "learning_rate": 8.728188034110203e-07, "loss": 0.0144, "step": 311640 }, { "epoch": 3.329771889524013, "grad_norm": 2.1097545623779297, "learning_rate": 8.72807607919855e-07, "loss": 0.0113, "step": 311650 }, { "epoch": 3.3298787328382926, "grad_norm": 0.11922159045934677, "learning_rate": 8.727964120077619e-07, "loss": 0.01, "step": 311660 }, { "epoch": 3.329985576152572, "grad_norm": 0.05387145280838013, "learning_rate": 8.727852156747535e-07, "loss": 0.0052, "step": 311670 }, { "epoch": 3.330092419466852, "grad_norm": 0.5617347359657288, "learning_rate": 8.727740189208424e-07, "loss": 0.0116, "step": 311680 }, { "epoch": 3.3301992627811314, "grad_norm": 0.06932379305362701, "learning_rate": 8.727628217460415e-07, "loss": 0.028, "step": 311690 }, { "epoch": 3.330306106095411, "grad_norm": 0.22768919169902802, "learning_rate": 8.72751624150363e-07, "loss": 0.0115, "step": 311700 }, { "epoch": 3.330412949409691, "grad_norm": 0.010173050686717033, "learning_rate": 8.727404261338198e-07, "loss": 0.0121, "step": 311710 }, { "epoch": 3.3305197927239703, "grad_norm": 19.047189712524414, "learning_rate": 8.727292276964246e-07, "loss": 0.0277, "step": 311720 }, { "epoch": 3.33062663603825, "grad_norm": 0.04687328264117241, "learning_rate": 8.727180288381899e-07, "loss": 0.0063, "step": 311730 }, { "epoch": 3.3307334793525296, "grad_norm": 0.04080335795879364, "learning_rate": 8.727068295591286e-07, "loss": 0.0015, "step": 311740 }, { "epoch": 3.330840322666809, "grad_norm": 0.18589897453784943, "learning_rate": 8.726956298592529e-07, "loss": 0.0074, "step": 311750 }, { "epoch": 3.3309471659810885, "grad_norm": 0.06479848176240921, "learning_rate": 8.72684429738576e-07, "loss": 0.027, "step": 311760 }, { "epoch": 3.3310540092953684, "grad_norm": 1.2764346599578857, "learning_rate": 8.726732291971101e-07, "loss": 0.009, "step": 311770 }, { "epoch": 3.331160852609648, "grad_norm": 0.03165876120328903, "learning_rate": 8.726620282348679e-07, "loss": 0.0286, "step": 311780 }, { "epoch": 3.331267695923928, "grad_norm": 0.011892537586390972, "learning_rate": 8.726508268518624e-07, "loss": 0.0016, "step": 311790 }, { "epoch": 3.3313745392382073, "grad_norm": 13.496129035949707, "learning_rate": 8.726396250481057e-07, "loss": 0.0026, "step": 311800 }, { "epoch": 3.3314813825524867, "grad_norm": 1.740112543106079, "learning_rate": 8.726284228236109e-07, "loss": 0.0122, "step": 311810 }, { "epoch": 3.331588225866766, "grad_norm": 0.007660538423806429, "learning_rate": 8.726172201783904e-07, "loss": 0.0304, "step": 311820 }, { "epoch": 3.331695069181046, "grad_norm": 9.564291954040527, "learning_rate": 8.72606017112457e-07, "loss": 0.0228, "step": 311830 }, { "epoch": 3.3318019124953255, "grad_norm": 0.09504750370979309, "learning_rate": 8.725948136258234e-07, "loss": 0.0242, "step": 311840 }, { "epoch": 3.3319087558096054, "grad_norm": 0.7438428997993469, "learning_rate": 8.72583609718502e-07, "loss": 0.0023, "step": 311850 }, { "epoch": 3.332015599123885, "grad_norm": 8.340149879455566, "learning_rate": 8.725724053905055e-07, "loss": 0.0621, "step": 311860 }, { "epoch": 3.3321224424381644, "grad_norm": 0.08988860249519348, "learning_rate": 8.725612006418467e-07, "loss": 0.0149, "step": 311870 }, { "epoch": 3.332229285752444, "grad_norm": 0.04622315615415573, "learning_rate": 8.725499954725382e-07, "loss": 0.0109, "step": 311880 }, { "epoch": 3.3323361290667237, "grad_norm": 0.04975633695721626, "learning_rate": 8.725387898825927e-07, "loss": 0.0139, "step": 311890 }, { "epoch": 3.332442972381003, "grad_norm": 0.00996659230440855, "learning_rate": 8.725275838720228e-07, "loss": 0.0045, "step": 311900 }, { "epoch": 3.332549815695283, "grad_norm": 0.03688989207148552, "learning_rate": 8.725163774408411e-07, "loss": 0.003, "step": 311910 }, { "epoch": 3.3326566590095625, "grad_norm": 0.37046393752098083, "learning_rate": 8.725051705890603e-07, "loss": 0.007, "step": 311920 }, { "epoch": 3.332763502323842, "grad_norm": 0.42754533886909485, "learning_rate": 8.72493963316693e-07, "loss": 0.0008, "step": 311930 }, { "epoch": 3.3328703456381215, "grad_norm": 2.4104182720184326, "learning_rate": 8.724827556237518e-07, "loss": 0.0033, "step": 311940 }, { "epoch": 3.3329771889524014, "grad_norm": 2.2001025676727295, "learning_rate": 8.724715475102496e-07, "loss": 0.0289, "step": 311950 }, { "epoch": 3.333084032266681, "grad_norm": 0.0011108422186225653, "learning_rate": 8.724603389761989e-07, "loss": 0.0005, "step": 311960 }, { "epoch": 3.3331908755809607, "grad_norm": 4.382129192352295, "learning_rate": 8.724491300216124e-07, "loss": 0.0248, "step": 311970 }, { "epoch": 3.33329771889524, "grad_norm": 0.0733274444937706, "learning_rate": 8.724379206465027e-07, "loss": 0.0087, "step": 311980 }, { "epoch": 3.3334045622095196, "grad_norm": 0.003105254378169775, "learning_rate": 8.724267108508825e-07, "loss": 0.007, "step": 311990 }, { "epoch": 3.3335114055237995, "grad_norm": 8.159392356872559, "learning_rate": 8.724155006347645e-07, "loss": 0.023, "step": 312000 }, { "epoch": 3.333618248838079, "grad_norm": 0.04042347893118858, "learning_rate": 8.724042899981611e-07, "loss": 0.0075, "step": 312010 }, { "epoch": 3.3337250921523585, "grad_norm": 0.4663642346858978, "learning_rate": 8.723930789410851e-07, "loss": 0.0063, "step": 312020 }, { "epoch": 3.3338319354666384, "grad_norm": 2.8820950984954834, "learning_rate": 8.723818674635495e-07, "loss": 0.0248, "step": 312030 }, { "epoch": 3.333938778780918, "grad_norm": 0.48088499903678894, "learning_rate": 8.723706555655664e-07, "loss": 0.0268, "step": 312040 }, { "epoch": 3.3340456220951973, "grad_norm": 2.6934773921966553, "learning_rate": 8.723594432471488e-07, "loss": 0.0133, "step": 312050 }, { "epoch": 3.334152465409477, "grad_norm": 0.8301748633384705, "learning_rate": 8.723482305083094e-07, "loss": 0.0137, "step": 312060 }, { "epoch": 3.3342593087237566, "grad_norm": 0.006975075695663691, "learning_rate": 8.723370173490607e-07, "loss": 0.0269, "step": 312070 }, { "epoch": 3.334366152038036, "grad_norm": 0.033135246485471725, "learning_rate": 8.723258037694154e-07, "loss": 0.0135, "step": 312080 }, { "epoch": 3.334472995352316, "grad_norm": 3.791111707687378, "learning_rate": 8.723145897693863e-07, "loss": 0.0144, "step": 312090 }, { "epoch": 3.3345798386665955, "grad_norm": 0.016122255474328995, "learning_rate": 8.723033753489857e-07, "loss": 0.0065, "step": 312100 }, { "epoch": 3.334686681980875, "grad_norm": 0.1273188441991806, "learning_rate": 8.722921605082265e-07, "loss": 0.0124, "step": 312110 }, { "epoch": 3.334793525295155, "grad_norm": 0.05113933980464935, "learning_rate": 8.722809452471213e-07, "loss": 0.0073, "step": 312120 }, { "epoch": 3.3349003686094343, "grad_norm": 0.025725897401571274, "learning_rate": 8.72269729565683e-07, "loss": 0.0033, "step": 312130 }, { "epoch": 3.3350072119237137, "grad_norm": 4.4304423332214355, "learning_rate": 8.72258513463924e-07, "loss": 0.0369, "step": 312140 }, { "epoch": 3.3351140552379936, "grad_norm": 0.021453790366649628, "learning_rate": 8.72247296941857e-07, "loss": 0.0069, "step": 312150 }, { "epoch": 3.335220898552273, "grad_norm": 0.003463233355432749, "learning_rate": 8.722360799994947e-07, "loss": 0.0035, "step": 312160 }, { "epoch": 3.3353277418665526, "grad_norm": 0.04457809031009674, "learning_rate": 8.7222486263685e-07, "loss": 0.0075, "step": 312170 }, { "epoch": 3.3354345851808325, "grad_norm": 15.856776237487793, "learning_rate": 8.722136448539351e-07, "loss": 0.0274, "step": 312180 }, { "epoch": 3.335541428495112, "grad_norm": 1.16716468334198, "learning_rate": 8.72202426650763e-07, "loss": 0.0685, "step": 312190 }, { "epoch": 3.3356482718093914, "grad_norm": 1.298900842666626, "learning_rate": 8.721912080273462e-07, "loss": 0.0276, "step": 312200 }, { "epoch": 3.3357551151236713, "grad_norm": 2.6256184577941895, "learning_rate": 8.721799889836975e-07, "loss": 0.0269, "step": 312210 }, { "epoch": 3.3358619584379507, "grad_norm": 0.017113210633397102, "learning_rate": 8.721687695198295e-07, "loss": 0.0432, "step": 312220 }, { "epoch": 3.33596880175223, "grad_norm": 0.0076713282614946365, "learning_rate": 8.721575496357549e-07, "loss": 0.0007, "step": 312230 }, { "epoch": 3.33607564506651, "grad_norm": 0.19087013602256775, "learning_rate": 8.721463293314862e-07, "loss": 0.002, "step": 312240 }, { "epoch": 3.3361824883807896, "grad_norm": 0.01822304166853428, "learning_rate": 8.721351086070363e-07, "loss": 0.0191, "step": 312250 }, { "epoch": 3.336289331695069, "grad_norm": 0.0022840758319944143, "learning_rate": 8.721238874624179e-07, "loss": 0.0021, "step": 312260 }, { "epoch": 3.336396175009349, "grad_norm": 0.6988155841827393, "learning_rate": 8.721126658976433e-07, "loss": 0.0038, "step": 312270 }, { "epoch": 3.3365030183236284, "grad_norm": 6.610959529876709, "learning_rate": 8.721014439127256e-07, "loss": 0.0141, "step": 312280 }, { "epoch": 3.336609861637908, "grad_norm": 0.024152835831046104, "learning_rate": 8.720902215076772e-07, "loss": 0.0329, "step": 312290 }, { "epoch": 3.3367167049521878, "grad_norm": 0.08873452991247177, "learning_rate": 8.720789986825109e-07, "loss": 0.0132, "step": 312300 }, { "epoch": 3.336823548266467, "grad_norm": 0.27699777483940125, "learning_rate": 8.720677754372392e-07, "loss": 0.0201, "step": 312310 }, { "epoch": 3.3369303915807467, "grad_norm": 0.8197444677352905, "learning_rate": 8.720565517718751e-07, "loss": 0.0113, "step": 312320 }, { "epoch": 3.3370372348950266, "grad_norm": 0.05527182295918465, "learning_rate": 8.720453276864309e-07, "loss": 0.0459, "step": 312330 }, { "epoch": 3.337144078209306, "grad_norm": 8.722808837890625, "learning_rate": 8.720341031809195e-07, "loss": 0.0044, "step": 312340 }, { "epoch": 3.3372509215235855, "grad_norm": 4.981021881103516, "learning_rate": 8.720228782553536e-07, "loss": 0.0361, "step": 312350 }, { "epoch": 3.3373577648378654, "grad_norm": 0.021104827523231506, "learning_rate": 8.720116529097458e-07, "loss": 0.0105, "step": 312360 }, { "epoch": 3.337464608152145, "grad_norm": 4.857933521270752, "learning_rate": 8.720004271441086e-07, "loss": 0.0155, "step": 312370 }, { "epoch": 3.3375714514664243, "grad_norm": 20.82036018371582, "learning_rate": 8.719892009584549e-07, "loss": 0.0163, "step": 312380 }, { "epoch": 3.337678294780704, "grad_norm": 0.4839460253715515, "learning_rate": 8.719779743527975e-07, "loss": 0.0078, "step": 312390 }, { "epoch": 3.3377851380949837, "grad_norm": 1.2005839347839355, "learning_rate": 8.719667473271487e-07, "loss": 0.0145, "step": 312400 }, { "epoch": 3.337891981409263, "grad_norm": 8.466644287109375, "learning_rate": 8.719555198815214e-07, "loss": 0.0356, "step": 312410 }, { "epoch": 3.337998824723543, "grad_norm": 0.02821066789329052, "learning_rate": 8.719442920159284e-07, "loss": 0.0161, "step": 312420 }, { "epoch": 3.3381056680378225, "grad_norm": 0.021877996623516083, "learning_rate": 8.71933063730382e-07, "loss": 0.0043, "step": 312430 }, { "epoch": 3.338212511352102, "grad_norm": 23.882125854492188, "learning_rate": 8.719218350248951e-07, "loss": 0.0101, "step": 312440 }, { "epoch": 3.338319354666382, "grad_norm": 6.6876912117004395, "learning_rate": 8.719106058994805e-07, "loss": 0.0289, "step": 312450 }, { "epoch": 3.3384261979806613, "grad_norm": 0.06917954236268997, "learning_rate": 8.718993763541508e-07, "loss": 0.0019, "step": 312460 }, { "epoch": 3.3385330412949408, "grad_norm": 0.018654663115739822, "learning_rate": 8.718881463889185e-07, "loss": 0.0041, "step": 312470 }, { "epoch": 3.3386398846092207, "grad_norm": 0.006003331858664751, "learning_rate": 8.718769160037964e-07, "loss": 0.0133, "step": 312480 }, { "epoch": 3.3387467279235, "grad_norm": 0.010872244834899902, "learning_rate": 8.718656851987974e-07, "loss": 0.0109, "step": 312490 }, { "epoch": 3.33885357123778, "grad_norm": 0.06269103288650513, "learning_rate": 8.718544539739338e-07, "loss": 0.0095, "step": 312500 }, { "epoch": 3.3389604145520595, "grad_norm": 0.4896566867828369, "learning_rate": 8.718432223292185e-07, "loss": 0.0128, "step": 312510 }, { "epoch": 3.339067257866339, "grad_norm": 0.04577062278985977, "learning_rate": 8.718319902646642e-07, "loss": 0.0086, "step": 312520 }, { "epoch": 3.3391741011806184, "grad_norm": 0.1335151046514511, "learning_rate": 8.718207577802834e-07, "loss": 0.0679, "step": 312530 }, { "epoch": 3.3392809444948983, "grad_norm": 5.842788219451904, "learning_rate": 8.718095248760889e-07, "loss": 0.0149, "step": 312540 }, { "epoch": 3.3393877878091778, "grad_norm": 6.566046237945557, "learning_rate": 8.717982915520935e-07, "loss": 0.0402, "step": 312550 }, { "epoch": 3.3394946311234577, "grad_norm": 0.07398851215839386, "learning_rate": 8.717870578083098e-07, "loss": 0.0075, "step": 312560 }, { "epoch": 3.339601474437737, "grad_norm": 0.4503138065338135, "learning_rate": 8.717758236447503e-07, "loss": 0.0658, "step": 312570 }, { "epoch": 3.3397083177520166, "grad_norm": 0.005877859890460968, "learning_rate": 8.717645890614278e-07, "loss": 0.0066, "step": 312580 }, { "epoch": 3.339815161066296, "grad_norm": 0.018639417365193367, "learning_rate": 8.717533540583552e-07, "loss": 0.0182, "step": 312590 }, { "epoch": 3.339922004380576, "grad_norm": 1.1933673620224, "learning_rate": 8.71742118635545e-07, "loss": 0.0118, "step": 312600 }, { "epoch": 3.3400288476948554, "grad_norm": 2.203812837600708, "learning_rate": 8.717308827930097e-07, "loss": 0.0059, "step": 312610 }, { "epoch": 3.3401356910091353, "grad_norm": 2.436788320541382, "learning_rate": 8.717196465307623e-07, "loss": 0.0073, "step": 312620 }, { "epoch": 3.340242534323415, "grad_norm": 1.3679248094558716, "learning_rate": 8.717084098488155e-07, "loss": 0.0145, "step": 312630 }, { "epoch": 3.3403493776376942, "grad_norm": 0.0872746929526329, "learning_rate": 8.716971727471816e-07, "loss": 0.0085, "step": 312640 }, { "epoch": 3.3404562209519737, "grad_norm": 3.406285285949707, "learning_rate": 8.716859352258737e-07, "loss": 0.0083, "step": 312650 }, { "epoch": 3.3405630642662536, "grad_norm": 1.2215754985809326, "learning_rate": 8.716746972849043e-07, "loss": 0.0814, "step": 312660 }, { "epoch": 3.340669907580533, "grad_norm": 0.0009532678523100913, "learning_rate": 8.716634589242859e-07, "loss": 0.0148, "step": 312670 }, { "epoch": 3.340776750894813, "grad_norm": 0.20316128432750702, "learning_rate": 8.716522201440318e-07, "loss": 0.0234, "step": 312680 }, { "epoch": 3.3408835942090924, "grad_norm": 0.006763865239918232, "learning_rate": 8.71640980944154e-07, "loss": 0.001, "step": 312690 }, { "epoch": 3.340990437523372, "grad_norm": 0.004727334715425968, "learning_rate": 8.716297413246655e-07, "loss": 0.0182, "step": 312700 }, { "epoch": 3.3410972808376513, "grad_norm": 0.0035571579355746508, "learning_rate": 8.716185012855791e-07, "loss": 0.007, "step": 312710 }, { "epoch": 3.3412041241519312, "grad_norm": 0.6750715374946594, "learning_rate": 8.716072608269074e-07, "loss": 0.004, "step": 312720 }, { "epoch": 3.3413109674662107, "grad_norm": 0.04001491516828537, "learning_rate": 8.715960199486629e-07, "loss": 0.0107, "step": 312730 }, { "epoch": 3.3414178107804906, "grad_norm": 5.455172061920166, "learning_rate": 8.715847786508585e-07, "loss": 0.0023, "step": 312740 }, { "epoch": 3.34152465409477, "grad_norm": 0.024016201496124268, "learning_rate": 8.715735369335068e-07, "loss": 0.0014, "step": 312750 }, { "epoch": 3.3416314974090495, "grad_norm": 0.7779151797294617, "learning_rate": 8.715622947966207e-07, "loss": 0.016, "step": 312760 }, { "epoch": 3.3417383407233294, "grad_norm": 0.02488025464117527, "learning_rate": 8.715510522402126e-07, "loss": 0.0111, "step": 312770 }, { "epoch": 3.341845184037609, "grad_norm": 0.3992653489112854, "learning_rate": 8.715398092642954e-07, "loss": 0.0117, "step": 312780 }, { "epoch": 3.3419520273518883, "grad_norm": 0.1916555017232895, "learning_rate": 8.715285658688815e-07, "loss": 0.0125, "step": 312790 }, { "epoch": 3.3420588706661682, "grad_norm": 0.0016457155579701066, "learning_rate": 8.715173220539839e-07, "loss": 0.0185, "step": 312800 }, { "epoch": 3.3421657139804477, "grad_norm": 0.06648296862840652, "learning_rate": 8.715060778196153e-07, "loss": 0.009, "step": 312810 }, { "epoch": 3.342272557294727, "grad_norm": 0.3204619884490967, "learning_rate": 8.714948331657883e-07, "loss": 0.0101, "step": 312820 }, { "epoch": 3.342379400609007, "grad_norm": 3.388953924179077, "learning_rate": 8.714835880925155e-07, "loss": 0.0183, "step": 312830 }, { "epoch": 3.3424862439232865, "grad_norm": 1.5807225704193115, "learning_rate": 8.714723425998098e-07, "loss": 0.0063, "step": 312840 }, { "epoch": 3.342593087237566, "grad_norm": 0.11403411626815796, "learning_rate": 8.714610966876838e-07, "loss": 0.0117, "step": 312850 }, { "epoch": 3.342699930551846, "grad_norm": 2.5529255867004395, "learning_rate": 8.714498503561501e-07, "loss": 0.0087, "step": 312860 }, { "epoch": 3.3428067738661253, "grad_norm": 0.01425487082451582, "learning_rate": 8.714386036052214e-07, "loss": 0.007, "step": 312870 }, { "epoch": 3.342913617180405, "grad_norm": 8.842536926269531, "learning_rate": 8.714273564349106e-07, "loss": 0.0105, "step": 312880 }, { "epoch": 3.3430204604946847, "grad_norm": 8.764359474182129, "learning_rate": 8.714161088452303e-07, "loss": 0.0115, "step": 312890 }, { "epoch": 3.343127303808964, "grad_norm": 7.051028728485107, "learning_rate": 8.714048608361932e-07, "loss": 0.0115, "step": 312900 }, { "epoch": 3.3432341471232436, "grad_norm": 0.06905669718980789, "learning_rate": 8.71393612407812e-07, "loss": 0.0041, "step": 312910 }, { "epoch": 3.3433409904375235, "grad_norm": 4.313526630401611, "learning_rate": 8.713823635600992e-07, "loss": 0.0135, "step": 312920 }, { "epoch": 3.343447833751803, "grad_norm": 0.004098021425306797, "learning_rate": 8.713711142930679e-07, "loss": 0.0428, "step": 312930 }, { "epoch": 3.3435546770660824, "grad_norm": 0.038809195160865784, "learning_rate": 8.713598646067304e-07, "loss": 0.0099, "step": 312940 }, { "epoch": 3.3436615203803624, "grad_norm": 0.45009395480155945, "learning_rate": 8.713486145010998e-07, "loss": 0.0069, "step": 312950 }, { "epoch": 3.343768363694642, "grad_norm": 0.20222800970077515, "learning_rate": 8.713373639761882e-07, "loss": 0.012, "step": 312960 }, { "epoch": 3.3438752070089213, "grad_norm": 0.05893122777342796, "learning_rate": 8.71326113032009e-07, "loss": 0.0027, "step": 312970 }, { "epoch": 3.343982050323201, "grad_norm": 0.06442319601774216, "learning_rate": 8.713148616685746e-07, "loss": 0.0099, "step": 312980 }, { "epoch": 3.3440888936374806, "grad_norm": 7.171985626220703, "learning_rate": 8.713036098858977e-07, "loss": 0.0065, "step": 312990 }, { "epoch": 3.34419573695176, "grad_norm": 0.03592139855027199, "learning_rate": 8.712923576839908e-07, "loss": 0.0164, "step": 313000 }, { "epoch": 3.34430258026604, "grad_norm": 4.267387866973877, "learning_rate": 8.71281105062867e-07, "loss": 0.0265, "step": 313010 }, { "epoch": 3.3444094235803195, "grad_norm": 0.04582478851079941, "learning_rate": 8.712698520225387e-07, "loss": 0.0072, "step": 313020 }, { "epoch": 3.344516266894599, "grad_norm": 0.1995222121477127, "learning_rate": 8.712585985630188e-07, "loss": 0.0085, "step": 313030 }, { "epoch": 3.344623110208879, "grad_norm": 13.845928192138672, "learning_rate": 8.7124734468432e-07, "loss": 0.0329, "step": 313040 }, { "epoch": 3.3447299535231583, "grad_norm": 2.207103967666626, "learning_rate": 8.712360903864547e-07, "loss": 0.005, "step": 313050 }, { "epoch": 3.3448367968374377, "grad_norm": 1.4834442138671875, "learning_rate": 8.712248356694361e-07, "loss": 0.0018, "step": 313060 }, { "epoch": 3.3449436401517176, "grad_norm": 0.08869294077157974, "learning_rate": 8.712135805332763e-07, "loss": 0.0023, "step": 313070 }, { "epoch": 3.345050483465997, "grad_norm": 5.077438831329346, "learning_rate": 8.712023249779887e-07, "loss": 0.0222, "step": 313080 }, { "epoch": 3.3451573267802766, "grad_norm": 0.03555779904127121, "learning_rate": 8.711910690035855e-07, "loss": 0.0056, "step": 313090 }, { "epoch": 3.3452641700945565, "grad_norm": 3.413799524307251, "learning_rate": 8.711798126100796e-07, "loss": 0.0012, "step": 313100 }, { "epoch": 3.345371013408836, "grad_norm": 0.06322453916072845, "learning_rate": 8.711685557974835e-07, "loss": 0.0071, "step": 313110 }, { "epoch": 3.3454778567231154, "grad_norm": 0.1010260209441185, "learning_rate": 8.711572985658102e-07, "loss": 0.0208, "step": 313120 }, { "epoch": 3.3455847000373953, "grad_norm": 0.9940736293792725, "learning_rate": 8.711460409150724e-07, "loss": 0.0099, "step": 313130 }, { "epoch": 3.3456915433516747, "grad_norm": 5.716855049133301, "learning_rate": 8.711347828452826e-07, "loss": 0.0417, "step": 313140 }, { "epoch": 3.345798386665954, "grad_norm": 0.027566522359848022, "learning_rate": 8.711235243564536e-07, "loss": 0.0054, "step": 313150 }, { "epoch": 3.345905229980234, "grad_norm": 0.0069341170601546764, "learning_rate": 8.711122654485983e-07, "loss": 0.0081, "step": 313160 }, { "epoch": 3.3460120732945136, "grad_norm": 1.5833789110183716, "learning_rate": 8.711010061217292e-07, "loss": 0.0038, "step": 313170 }, { "epoch": 3.346118916608793, "grad_norm": 0.020326755940914154, "learning_rate": 8.710897463758589e-07, "loss": 0.0516, "step": 313180 }, { "epoch": 3.346225759923073, "grad_norm": 1.1166741847991943, "learning_rate": 8.710784862110004e-07, "loss": 0.0188, "step": 313190 }, { "epoch": 3.3463326032373524, "grad_norm": 4.251717567443848, "learning_rate": 8.710672256271663e-07, "loss": 0.0191, "step": 313200 }, { "epoch": 3.3464394465516323, "grad_norm": 2.992722988128662, "learning_rate": 8.710559646243691e-07, "loss": 0.0637, "step": 313210 }, { "epoch": 3.3465462898659117, "grad_norm": 3.5567448139190674, "learning_rate": 8.710447032026219e-07, "loss": 0.0034, "step": 313220 }, { "epoch": 3.346653133180191, "grad_norm": 2.926908254623413, "learning_rate": 8.710334413619372e-07, "loss": 0.0163, "step": 313230 }, { "epoch": 3.3467599764944707, "grad_norm": 1.5998260974884033, "learning_rate": 8.710221791023278e-07, "loss": 0.018, "step": 313240 }, { "epoch": 3.3468668198087506, "grad_norm": 1.3018792867660522, "learning_rate": 8.710109164238062e-07, "loss": 0.0085, "step": 313250 }, { "epoch": 3.34697366312303, "grad_norm": 7.161614418029785, "learning_rate": 8.709996533263853e-07, "loss": 0.0185, "step": 313260 }, { "epoch": 3.34708050643731, "grad_norm": 0.23407094180583954, "learning_rate": 8.709883898100779e-07, "loss": 0.0205, "step": 313270 }, { "epoch": 3.3471873497515894, "grad_norm": 0.06776376068592072, "learning_rate": 8.709771258748966e-07, "loss": 0.018, "step": 313280 }, { "epoch": 3.347294193065869, "grad_norm": 0.1418834626674652, "learning_rate": 8.709658615208539e-07, "loss": 0.013, "step": 313290 }, { "epoch": 3.3474010363801483, "grad_norm": 0.023946568369865417, "learning_rate": 8.70954596747963e-07, "loss": 0.0069, "step": 313300 }, { "epoch": 3.347507879694428, "grad_norm": 0.09031761437654495, "learning_rate": 8.709433315562362e-07, "loss": 0.015, "step": 313310 }, { "epoch": 3.3476147230087077, "grad_norm": 0.005806045141071081, "learning_rate": 8.709320659456865e-07, "loss": 0.0013, "step": 313320 }, { "epoch": 3.3477215663229876, "grad_norm": 0.05967475846409798, "learning_rate": 8.709207999163267e-07, "loss": 0.0259, "step": 313330 }, { "epoch": 3.347828409637267, "grad_norm": 11.0322265625, "learning_rate": 8.709095334681688e-07, "loss": 0.0345, "step": 313340 }, { "epoch": 3.3479352529515465, "grad_norm": 0.02551628090441227, "learning_rate": 8.708982666012265e-07, "loss": 0.0257, "step": 313350 }, { "epoch": 3.348042096265826, "grad_norm": 5.315192699432373, "learning_rate": 8.708869993155119e-07, "loss": 0.0139, "step": 313360 }, { "epoch": 3.348148939580106, "grad_norm": 0.09300068765878677, "learning_rate": 8.708757316110378e-07, "loss": 0.0391, "step": 313370 }, { "epoch": 3.3482557828943853, "grad_norm": 3.2682011127471924, "learning_rate": 8.70864463487817e-07, "loss": 0.0072, "step": 313380 }, { "epoch": 3.348362626208665, "grad_norm": 1.9041305780410767, "learning_rate": 8.708531949458624e-07, "loss": 0.0082, "step": 313390 }, { "epoch": 3.3484694695229447, "grad_norm": 1.4153037071228027, "learning_rate": 8.708419259851865e-07, "loss": 0.0124, "step": 313400 }, { "epoch": 3.348576312837224, "grad_norm": 0.097397081553936, "learning_rate": 8.708306566058021e-07, "loss": 0.0244, "step": 313410 }, { "epoch": 3.3486831561515036, "grad_norm": 0.004273495636880398, "learning_rate": 8.708193868077218e-07, "loss": 0.0287, "step": 313420 }, { "epoch": 3.3487899994657835, "grad_norm": 0.01696518249809742, "learning_rate": 8.708081165909585e-07, "loss": 0.0029, "step": 313430 }, { "epoch": 3.348896842780063, "grad_norm": 0.059357330203056335, "learning_rate": 8.707968459555248e-07, "loss": 0.0107, "step": 313440 }, { "epoch": 3.349003686094343, "grad_norm": 0.4373103678226471, "learning_rate": 8.707855749014335e-07, "loss": 0.0157, "step": 313450 }, { "epoch": 3.3491105294086223, "grad_norm": 0.36374011635780334, "learning_rate": 8.707743034286973e-07, "loss": 0.0134, "step": 313460 }, { "epoch": 3.3492173727229018, "grad_norm": 0.06180768087506294, "learning_rate": 8.707630315373289e-07, "loss": 0.0111, "step": 313470 }, { "epoch": 3.3493242160371817, "grad_norm": 1.5660120248794556, "learning_rate": 8.707517592273411e-07, "loss": 0.071, "step": 313480 }, { "epoch": 3.349431059351461, "grad_norm": 1.052780032157898, "learning_rate": 8.707404864987466e-07, "loss": 0.0415, "step": 313490 }, { "epoch": 3.3495379026657406, "grad_norm": 0.3317275941371918, "learning_rate": 8.70729213351558e-07, "loss": 0.0161, "step": 313500 }, { "epoch": 3.3496447459800205, "grad_norm": 1.7246114015579224, "learning_rate": 8.707179397857883e-07, "loss": 0.0049, "step": 313510 }, { "epoch": 3.3497515892943, "grad_norm": 0.010930591262876987, "learning_rate": 8.707066658014499e-07, "loss": 0.0032, "step": 313520 }, { "epoch": 3.3498584326085794, "grad_norm": 1.768104076385498, "learning_rate": 8.706953913985559e-07, "loss": 0.0268, "step": 313530 }, { "epoch": 3.3499652759228593, "grad_norm": 0.021856850013136864, "learning_rate": 8.706841165771186e-07, "loss": 0.006, "step": 313540 }, { "epoch": 3.3500721192371388, "grad_norm": 0.3015313744544983, "learning_rate": 8.706728413371511e-07, "loss": 0.0093, "step": 313550 }, { "epoch": 3.3501789625514182, "grad_norm": 0.0028176181949675083, "learning_rate": 8.70661565678666e-07, "loss": 0.0017, "step": 313560 }, { "epoch": 3.350285805865698, "grad_norm": 1.4109373092651367, "learning_rate": 8.706502896016758e-07, "loss": 0.0119, "step": 313570 }, { "epoch": 3.3503926491799776, "grad_norm": 0.0024989787489175797, "learning_rate": 8.706390131061937e-07, "loss": 0.0114, "step": 313580 }, { "epoch": 3.350499492494257, "grad_norm": 3.7111246585845947, "learning_rate": 8.706277361922321e-07, "loss": 0.0169, "step": 313590 }, { "epoch": 3.350606335808537, "grad_norm": 0.03147587925195694, "learning_rate": 8.706164588598037e-07, "loss": 0.0096, "step": 313600 }, { "epoch": 3.3507131791228164, "grad_norm": 0.1600300371646881, "learning_rate": 8.706051811089215e-07, "loss": 0.0183, "step": 313610 }, { "epoch": 3.350820022437096, "grad_norm": 3.011127233505249, "learning_rate": 8.705939029395981e-07, "loss": 0.0041, "step": 313620 }, { "epoch": 3.3509268657513758, "grad_norm": 5.641323566436768, "learning_rate": 8.705826243518462e-07, "loss": 0.0138, "step": 313630 }, { "epoch": 3.3510337090656552, "grad_norm": 5.4976959228515625, "learning_rate": 8.705713453456785e-07, "loss": 0.0106, "step": 313640 }, { "epoch": 3.3511405523799347, "grad_norm": 0.2211449295282364, "learning_rate": 8.705600659211076e-07, "loss": 0.0017, "step": 313650 }, { "epoch": 3.3512473956942146, "grad_norm": 0.03079485334455967, "learning_rate": 8.705487860781466e-07, "loss": 0.0183, "step": 313660 }, { "epoch": 3.351354239008494, "grad_norm": 0.008870016783475876, "learning_rate": 8.70537505816808e-07, "loss": 0.0065, "step": 313670 }, { "epoch": 3.3514610823227735, "grad_norm": 0.0060682217590510845, "learning_rate": 8.705262251371048e-07, "loss": 0.0175, "step": 313680 }, { "epoch": 3.3515679256370534, "grad_norm": 0.052510280162096024, "learning_rate": 8.705149440390493e-07, "loss": 0.0076, "step": 313690 }, { "epoch": 3.351674768951333, "grad_norm": 0.19425319135189056, "learning_rate": 8.705036625226545e-07, "loss": 0.0173, "step": 313700 }, { "epoch": 3.3517816122656123, "grad_norm": 4.235297679901123, "learning_rate": 8.704923805879332e-07, "loss": 0.0142, "step": 313710 }, { "epoch": 3.3518884555798922, "grad_norm": 19.775636672973633, "learning_rate": 8.70481098234898e-07, "loss": 0.0165, "step": 313720 }, { "epoch": 3.3519952988941717, "grad_norm": 0.007564072962850332, "learning_rate": 8.704698154635617e-07, "loss": 0.014, "step": 313730 }, { "epoch": 3.352102142208451, "grad_norm": 5.9856696128845215, "learning_rate": 8.704585322739368e-07, "loss": 0.0219, "step": 313740 }, { "epoch": 3.352208985522731, "grad_norm": 0.9922448396682739, "learning_rate": 8.704472486660364e-07, "loss": 0.0038, "step": 313750 }, { "epoch": 3.3523158288370105, "grad_norm": 0.03043757565319538, "learning_rate": 8.704359646398731e-07, "loss": 0.0035, "step": 313760 }, { "epoch": 3.35242267215129, "grad_norm": 2.032677173614502, "learning_rate": 8.704246801954598e-07, "loss": 0.0082, "step": 313770 }, { "epoch": 3.35252951546557, "grad_norm": 24.760629653930664, "learning_rate": 8.704133953328089e-07, "loss": 0.0909, "step": 313780 }, { "epoch": 3.3526363587798493, "grad_norm": 1.6583820581436157, "learning_rate": 8.704021100519334e-07, "loss": 0.0043, "step": 313790 }, { "epoch": 3.352743202094129, "grad_norm": 5.217276096343994, "learning_rate": 8.703908243528459e-07, "loss": 0.0037, "step": 313800 }, { "epoch": 3.3528500454084087, "grad_norm": 5.705081939697266, "learning_rate": 8.703795382355592e-07, "loss": 0.0078, "step": 313810 }, { "epoch": 3.352956888722688, "grad_norm": 0.031157540157437325, "learning_rate": 8.703682517000861e-07, "loss": 0.0111, "step": 313820 }, { "epoch": 3.3530637320369676, "grad_norm": 0.15012983977794647, "learning_rate": 8.703569647464392e-07, "loss": 0.0273, "step": 313830 }, { "epoch": 3.3531705753512475, "grad_norm": 0.04215876758098602, "learning_rate": 8.703456773746314e-07, "loss": 0.039, "step": 313840 }, { "epoch": 3.353277418665527, "grad_norm": 6.166322708129883, "learning_rate": 8.703343895846754e-07, "loss": 0.0215, "step": 313850 }, { "epoch": 3.3533842619798064, "grad_norm": 0.09653881937265396, "learning_rate": 8.70323101376584e-07, "loss": 0.0061, "step": 313860 }, { "epoch": 3.3534911052940863, "grad_norm": 0.1277565211057663, "learning_rate": 8.703118127503697e-07, "loss": 0.0292, "step": 313870 }, { "epoch": 3.353597948608366, "grad_norm": 1.0674811601638794, "learning_rate": 8.703005237060454e-07, "loss": 0.0118, "step": 313880 }, { "epoch": 3.3537047919226453, "grad_norm": 0.5099560618400574, "learning_rate": 8.70289234243624e-07, "loss": 0.0022, "step": 313890 }, { "epoch": 3.353811635236925, "grad_norm": 0.013782506808638573, "learning_rate": 8.70277944363118e-07, "loss": 0.0139, "step": 313900 }, { "epoch": 3.3539184785512046, "grad_norm": 0.010088123381137848, "learning_rate": 8.702666540645403e-07, "loss": 0.0261, "step": 313910 }, { "epoch": 3.354025321865484, "grad_norm": 0.00856473483145237, "learning_rate": 8.702553633479038e-07, "loss": 0.0242, "step": 313920 }, { "epoch": 3.354132165179764, "grad_norm": 3.4385290145874023, "learning_rate": 8.702440722132207e-07, "loss": 0.0179, "step": 313930 }, { "epoch": 3.3542390084940434, "grad_norm": 0.3572603762149811, "learning_rate": 8.702327806605043e-07, "loss": 0.007, "step": 313940 }, { "epoch": 3.354345851808323, "grad_norm": 0.0077431341633200645, "learning_rate": 8.702214886897671e-07, "loss": 0.0156, "step": 313950 }, { "epoch": 3.354452695122603, "grad_norm": 0.002196060260757804, "learning_rate": 8.70210196301022e-07, "loss": 0.017, "step": 313960 }, { "epoch": 3.3545595384368823, "grad_norm": 0.016532110050320625, "learning_rate": 8.701989034942814e-07, "loss": 0.0201, "step": 313970 }, { "epoch": 3.354666381751162, "grad_norm": 0.3562815487384796, "learning_rate": 8.701876102695585e-07, "loss": 0.0137, "step": 313980 }, { "epoch": 3.3547732250654416, "grad_norm": 1.7310622930526733, "learning_rate": 8.701763166268659e-07, "loss": 0.0353, "step": 313990 }, { "epoch": 3.354880068379721, "grad_norm": 0.08208739757537842, "learning_rate": 8.70165022566216e-07, "loss": 0.0014, "step": 314000 }, { "epoch": 3.3549869116940005, "grad_norm": 0.14454255998134613, "learning_rate": 8.701537280876221e-07, "loss": 0.0155, "step": 314010 }, { "epoch": 3.3550937550082804, "grad_norm": 0.0375407412648201, "learning_rate": 8.701424331910967e-07, "loss": 0.0134, "step": 314020 }, { "epoch": 3.35520059832256, "grad_norm": 0.17928364872932434, "learning_rate": 8.701311378766524e-07, "loss": 0.0386, "step": 314030 }, { "epoch": 3.35530744163684, "grad_norm": 0.0009101132745854557, "learning_rate": 8.701198421443024e-07, "loss": 0.0382, "step": 314040 }, { "epoch": 3.3554142849511193, "grad_norm": 3.9877684116363525, "learning_rate": 8.701085459940589e-07, "loss": 0.0135, "step": 314050 }, { "epoch": 3.3555211282653987, "grad_norm": 0.013237769715487957, "learning_rate": 8.700972494259351e-07, "loss": 0.0057, "step": 314060 }, { "epoch": 3.355627971579678, "grad_norm": 1.225310206413269, "learning_rate": 8.700859524399433e-07, "loss": 0.0027, "step": 314070 }, { "epoch": 3.355734814893958, "grad_norm": 4.205779075622559, "learning_rate": 8.700746550360967e-07, "loss": 0.0623, "step": 314080 }, { "epoch": 3.3558416582082375, "grad_norm": 0.06113104522228241, "learning_rate": 8.70063357214408e-07, "loss": 0.0041, "step": 314090 }, { "epoch": 3.3559485015225174, "grad_norm": 1.3922821283340454, "learning_rate": 8.700520589748897e-07, "loss": 0.0176, "step": 314100 }, { "epoch": 3.356055344836797, "grad_norm": 0.05085061863064766, "learning_rate": 8.700407603175547e-07, "loss": 0.0096, "step": 314110 }, { "epoch": 3.3561621881510764, "grad_norm": 0.004486382007598877, "learning_rate": 8.700294612424157e-07, "loss": 0.0086, "step": 314120 }, { "epoch": 3.356269031465356, "grad_norm": 2.204571008682251, "learning_rate": 8.700181617494857e-07, "loss": 0.0044, "step": 314130 }, { "epoch": 3.3563758747796357, "grad_norm": 2.302321434020996, "learning_rate": 8.700068618387772e-07, "loss": 0.013, "step": 314140 }, { "epoch": 3.356482718093915, "grad_norm": 0.47832250595092773, "learning_rate": 8.69995561510303e-07, "loss": 0.0086, "step": 314150 }, { "epoch": 3.356589561408195, "grad_norm": 0.3156604468822479, "learning_rate": 8.699842607640758e-07, "loss": 0.0102, "step": 314160 }, { "epoch": 3.3566964047224745, "grad_norm": 2.767904758453369, "learning_rate": 8.699729596001085e-07, "loss": 0.0118, "step": 314170 }, { "epoch": 3.356803248036754, "grad_norm": 0.1050509661436081, "learning_rate": 8.699616580184139e-07, "loss": 0.0033, "step": 314180 }, { "epoch": 3.3569100913510335, "grad_norm": 0.814237654209137, "learning_rate": 8.699503560190045e-07, "loss": 0.0353, "step": 314190 }, { "epoch": 3.3570169346653134, "grad_norm": 0.001415969803929329, "learning_rate": 8.699390536018934e-07, "loss": 0.0012, "step": 314200 }, { "epoch": 3.357123777979593, "grad_norm": 1.2269147634506226, "learning_rate": 8.69927750767093e-07, "loss": 0.0308, "step": 314210 }, { "epoch": 3.3572306212938727, "grad_norm": 9.267094612121582, "learning_rate": 8.699164475146163e-07, "loss": 0.0044, "step": 314220 }, { "epoch": 3.357337464608152, "grad_norm": 0.03362387791275978, "learning_rate": 8.699051438444762e-07, "loss": 0.0245, "step": 314230 }, { "epoch": 3.3574443079224316, "grad_norm": 0.03560401126742363, "learning_rate": 8.69893839756685e-07, "loss": 0.006, "step": 314240 }, { "epoch": 3.3575511512367116, "grad_norm": 0.004466671496629715, "learning_rate": 8.69882535251256e-07, "loss": 0.0045, "step": 314250 }, { "epoch": 3.357657994550991, "grad_norm": 0.004998002666980028, "learning_rate": 8.698712303282016e-07, "loss": 0.0237, "step": 314260 }, { "epoch": 3.3577648378652705, "grad_norm": 0.5479162335395813, "learning_rate": 8.698599249875346e-07, "loss": 0.0121, "step": 314270 }, { "epoch": 3.3578716811795504, "grad_norm": 0.011967151425778866, "learning_rate": 8.698486192292679e-07, "loss": 0.0085, "step": 314280 }, { "epoch": 3.35797852449383, "grad_norm": 0.2182801514863968, "learning_rate": 8.698373130534142e-07, "loss": 0.0046, "step": 314290 }, { "epoch": 3.3580853678081093, "grad_norm": 0.003303819103166461, "learning_rate": 8.698260064599861e-07, "loss": 0.0095, "step": 314300 }, { "epoch": 3.358192211122389, "grad_norm": 2.8571271896362305, "learning_rate": 8.698146994489968e-07, "loss": 0.0101, "step": 314310 }, { "epoch": 3.3582990544366687, "grad_norm": 0.004033724311739206, "learning_rate": 8.698033920204586e-07, "loss": 0.06, "step": 314320 }, { "epoch": 3.358405897750948, "grad_norm": 7.558463096618652, "learning_rate": 8.697920841743844e-07, "loss": 0.0435, "step": 314330 }, { "epoch": 3.358512741065228, "grad_norm": 0.033661432564258575, "learning_rate": 8.697807759107873e-07, "loss": 0.0072, "step": 314340 }, { "epoch": 3.3586195843795075, "grad_norm": 0.009604252874851227, "learning_rate": 8.697694672296796e-07, "loss": 0.0025, "step": 314350 }, { "epoch": 3.358726427693787, "grad_norm": 5.428918361663818, "learning_rate": 8.697581581310742e-07, "loss": 0.0196, "step": 314360 }, { "epoch": 3.358833271008067, "grad_norm": 7.788860321044922, "learning_rate": 8.697468486149842e-07, "loss": 0.0142, "step": 314370 }, { "epoch": 3.3589401143223463, "grad_norm": 0.38341405987739563, "learning_rate": 8.697355386814218e-07, "loss": 0.0106, "step": 314380 }, { "epoch": 3.3590469576366258, "grad_norm": 0.015044749714434147, "learning_rate": 8.697242283304003e-07, "loss": 0.0094, "step": 314390 }, { "epoch": 3.3591538009509057, "grad_norm": 2.290588140487671, "learning_rate": 8.69712917561932e-07, "loss": 0.0048, "step": 314400 }, { "epoch": 3.359260644265185, "grad_norm": 1.4961354732513428, "learning_rate": 8.6970160637603e-07, "loss": 0.0155, "step": 314410 }, { "epoch": 3.3593674875794646, "grad_norm": 0.9975016117095947, "learning_rate": 8.696902947727071e-07, "loss": 0.0128, "step": 314420 }, { "epoch": 3.3594743308937445, "grad_norm": 0.002486890647560358, "learning_rate": 8.69678982751976e-07, "loss": 0.0083, "step": 314430 }, { "epoch": 3.359581174208024, "grad_norm": 0.20348547399044037, "learning_rate": 8.696676703138493e-07, "loss": 0.0094, "step": 314440 }, { "epoch": 3.3596880175223034, "grad_norm": 0.009635970927774906, "learning_rate": 8.696563574583399e-07, "loss": 0.0268, "step": 314450 }, { "epoch": 3.3597948608365833, "grad_norm": 1.1757391691207886, "learning_rate": 8.696450441854605e-07, "loss": 0.0189, "step": 314460 }, { "epoch": 3.3599017041508628, "grad_norm": 0.0013704084558412433, "learning_rate": 8.696337304952241e-07, "loss": 0.0145, "step": 314470 }, { "epoch": 3.360008547465142, "grad_norm": 1.6248958110809326, "learning_rate": 8.696224163876434e-07, "loss": 0.0031, "step": 314480 }, { "epoch": 3.360115390779422, "grad_norm": 6.221827030181885, "learning_rate": 8.696111018627308e-07, "loss": 0.0322, "step": 314490 }, { "epoch": 3.3602222340937016, "grad_norm": 1.5469049215316772, "learning_rate": 8.695997869204996e-07, "loss": 0.0167, "step": 314500 }, { "epoch": 3.360329077407981, "grad_norm": 0.02021396905183792, "learning_rate": 8.695884715609623e-07, "loss": 0.0118, "step": 314510 }, { "epoch": 3.360435920722261, "grad_norm": 0.01718384027481079, "learning_rate": 8.695771557841316e-07, "loss": 0.0029, "step": 314520 }, { "epoch": 3.3605427640365404, "grad_norm": 0.5306026935577393, "learning_rate": 8.695658395900205e-07, "loss": 0.0152, "step": 314530 }, { "epoch": 3.36064960735082, "grad_norm": 0.023080788552761078, "learning_rate": 8.695545229786416e-07, "loss": 0.0069, "step": 314540 }, { "epoch": 3.3607564506650998, "grad_norm": 5.143548965454102, "learning_rate": 8.695432059500078e-07, "loss": 0.0233, "step": 314550 }, { "epoch": 3.360863293979379, "grad_norm": 0.0034383325837552547, "learning_rate": 8.69531888504132e-07, "loss": 0.0148, "step": 314560 }, { "epoch": 3.3609701372936587, "grad_norm": 0.06783941388130188, "learning_rate": 8.695205706410267e-07, "loss": 0.017, "step": 314570 }, { "epoch": 3.3610769806079386, "grad_norm": 0.5158237218856812, "learning_rate": 8.695092523607046e-07, "loss": 0.0555, "step": 314580 }, { "epoch": 3.361183823922218, "grad_norm": 0.023472439497709274, "learning_rate": 8.69497933663179e-07, "loss": 0.0084, "step": 314590 }, { "epoch": 3.3612906672364975, "grad_norm": 0.0056897434405982494, "learning_rate": 8.69486614548462e-07, "loss": 0.0082, "step": 314600 }, { "epoch": 3.3613975105507774, "grad_norm": 0.03568361699581146, "learning_rate": 8.694752950165669e-07, "loss": 0.0271, "step": 314610 }, { "epoch": 3.361504353865057, "grad_norm": 0.4165922999382019, "learning_rate": 8.694639750675064e-07, "loss": 0.0206, "step": 314620 }, { "epoch": 3.3616111971793363, "grad_norm": 0.18756692111492157, "learning_rate": 8.694526547012931e-07, "loss": 0.0118, "step": 314630 }, { "epoch": 3.361718040493616, "grad_norm": 0.39299365878105164, "learning_rate": 8.694413339179398e-07, "loss": 0.0033, "step": 314640 }, { "epoch": 3.3618248838078957, "grad_norm": 2.4522552490234375, "learning_rate": 8.694300127174594e-07, "loss": 0.0371, "step": 314650 }, { "epoch": 3.361931727122175, "grad_norm": 8.946985244750977, "learning_rate": 8.694186910998647e-07, "loss": 0.0631, "step": 314660 }, { "epoch": 3.362038570436455, "grad_norm": 0.22542552649974823, "learning_rate": 8.694073690651682e-07, "loss": 0.013, "step": 314670 }, { "epoch": 3.3621454137507345, "grad_norm": 2.5211501121520996, "learning_rate": 8.693960466133831e-07, "loss": 0.0088, "step": 314680 }, { "epoch": 3.3622522570650144, "grad_norm": 6.247826099395752, "learning_rate": 8.693847237445219e-07, "loss": 0.0082, "step": 314690 }, { "epoch": 3.362359100379294, "grad_norm": 0.3285328149795532, "learning_rate": 8.693734004585976e-07, "loss": 0.0168, "step": 314700 }, { "epoch": 3.3624659436935733, "grad_norm": 4.045556545257568, "learning_rate": 8.693620767556227e-07, "loss": 0.0454, "step": 314710 }, { "epoch": 3.362572787007853, "grad_norm": 0.3700426518917084, "learning_rate": 8.693507526356101e-07, "loss": 0.0233, "step": 314720 }, { "epoch": 3.3626796303221327, "grad_norm": 0.05693377926945686, "learning_rate": 8.693394280985728e-07, "loss": 0.0278, "step": 314730 }, { "epoch": 3.362786473636412, "grad_norm": 0.026377806439995766, "learning_rate": 8.693281031445232e-07, "loss": 0.0049, "step": 314740 }, { "epoch": 3.362893316950692, "grad_norm": 0.4251120686531067, "learning_rate": 8.693167777734744e-07, "loss": 0.0059, "step": 314750 }, { "epoch": 3.3630001602649715, "grad_norm": 0.5965761542320251, "learning_rate": 8.693054519854393e-07, "loss": 0.0045, "step": 314760 }, { "epoch": 3.363107003579251, "grad_norm": 0.026435377076268196, "learning_rate": 8.692941257804302e-07, "loss": 0.0027, "step": 314770 }, { "epoch": 3.3632138468935304, "grad_norm": 0.7570445537567139, "learning_rate": 8.692827991584601e-07, "loss": 0.0029, "step": 314780 }, { "epoch": 3.3633206902078103, "grad_norm": 11.652240753173828, "learning_rate": 8.692714721195421e-07, "loss": 0.027, "step": 314790 }, { "epoch": 3.36342753352209, "grad_norm": 0.12254491448402405, "learning_rate": 8.692601446636887e-07, "loss": 0.0088, "step": 314800 }, { "epoch": 3.3635343768363697, "grad_norm": 0.09941857308149338, "learning_rate": 8.692488167909126e-07, "loss": 0.027, "step": 314810 }, { "epoch": 3.363641220150649, "grad_norm": 0.003637799294665456, "learning_rate": 8.692374885012268e-07, "loss": 0.0422, "step": 314820 }, { "epoch": 3.3637480634649286, "grad_norm": 3.034480571746826, "learning_rate": 8.69226159794644e-07, "loss": 0.0102, "step": 314830 }, { "epoch": 3.363854906779208, "grad_norm": 0.023278508335351944, "learning_rate": 8.692148306711768e-07, "loss": 0.0574, "step": 314840 }, { "epoch": 3.363961750093488, "grad_norm": 0.6734988689422607, "learning_rate": 8.692035011308385e-07, "loss": 0.0091, "step": 314850 }, { "epoch": 3.3640685934077674, "grad_norm": 1.7554937601089478, "learning_rate": 8.691921711736414e-07, "loss": 0.0369, "step": 314860 }, { "epoch": 3.3641754367220473, "grad_norm": 0.016416160389780998, "learning_rate": 8.691808407995985e-07, "loss": 0.0029, "step": 314870 }, { "epoch": 3.364282280036327, "grad_norm": 0.04268888011574745, "learning_rate": 8.691695100087227e-07, "loss": 0.0104, "step": 314880 }, { "epoch": 3.3643891233506062, "grad_norm": 3.3944003582000732, "learning_rate": 8.691581788010264e-07, "loss": 0.0063, "step": 314890 }, { "epoch": 3.3644959666648857, "grad_norm": 0.4717768430709839, "learning_rate": 8.691468471765229e-07, "loss": 0.0076, "step": 314900 }, { "epoch": 3.3646028099791656, "grad_norm": 0.003780186641961336, "learning_rate": 8.691355151352246e-07, "loss": 0.0015, "step": 314910 }, { "epoch": 3.364709653293445, "grad_norm": 8.120193481445312, "learning_rate": 8.691241826771445e-07, "loss": 0.0074, "step": 314920 }, { "epoch": 3.364816496607725, "grad_norm": 0.1510774791240692, "learning_rate": 8.691128498022954e-07, "loss": 0.0821, "step": 314930 }, { "epoch": 3.3649233399220044, "grad_norm": 0.017884215340018272, "learning_rate": 8.691015165106899e-07, "loss": 0.0271, "step": 314940 }, { "epoch": 3.365030183236284, "grad_norm": 2.6659576892852783, "learning_rate": 8.69090182802341e-07, "loss": 0.0177, "step": 314950 }, { "epoch": 3.365137026550564, "grad_norm": 0.004527208395302296, "learning_rate": 8.690788486772614e-07, "loss": 0.0352, "step": 314960 }, { "epoch": 3.3652438698648433, "grad_norm": 0.04437100142240524, "learning_rate": 8.690675141354639e-07, "loss": 0.0091, "step": 314970 }, { "epoch": 3.3653507131791227, "grad_norm": 0.007651582360267639, "learning_rate": 8.690561791769615e-07, "loss": 0.0013, "step": 314980 }, { "epoch": 3.3654575564934026, "grad_norm": 3.4705538749694824, "learning_rate": 8.690448438017667e-07, "loss": 0.0235, "step": 314990 }, { "epoch": 3.365564399807682, "grad_norm": 8.545866012573242, "learning_rate": 8.690335080098924e-07, "loss": 0.0117, "step": 315000 }, { "epoch": 3.3656712431219615, "grad_norm": 0.34260743856430054, "learning_rate": 8.690221718013514e-07, "loss": 0.0046, "step": 315010 }, { "epoch": 3.3657780864362414, "grad_norm": 0.3647359609603882, "learning_rate": 8.690108351761565e-07, "loss": 0.0214, "step": 315020 }, { "epoch": 3.365884929750521, "grad_norm": 3.4520044326782227, "learning_rate": 8.689994981343206e-07, "loss": 0.0182, "step": 315030 }, { "epoch": 3.3659917730648004, "grad_norm": 0.02431625872850418, "learning_rate": 8.689881606758565e-07, "loss": 0.016, "step": 315040 }, { "epoch": 3.3660986163790803, "grad_norm": 0.02768954448401928, "learning_rate": 8.689768228007766e-07, "loss": 0.0006, "step": 315050 }, { "epoch": 3.3662054596933597, "grad_norm": 0.002283903770148754, "learning_rate": 8.689654845090943e-07, "loss": 0.0012, "step": 315060 }, { "epoch": 3.366312303007639, "grad_norm": 0.5445920825004578, "learning_rate": 8.689541458008221e-07, "loss": 0.0106, "step": 315070 }, { "epoch": 3.366419146321919, "grad_norm": 0.006804106291383505, "learning_rate": 8.689428066759727e-07, "loss": 0.0354, "step": 315080 }, { "epoch": 3.3665259896361985, "grad_norm": 0.012053695507347584, "learning_rate": 8.689314671345591e-07, "loss": 0.0178, "step": 315090 }, { "epoch": 3.366632832950478, "grad_norm": 0.580325186252594, "learning_rate": 8.68920127176594e-07, "loss": 0.0108, "step": 315100 }, { "epoch": 3.366739676264758, "grad_norm": 0.4076729118824005, "learning_rate": 8.689087868020902e-07, "loss": 0.0006, "step": 315110 }, { "epoch": 3.3668465195790374, "grad_norm": 2.3741166591644287, "learning_rate": 8.688974460110607e-07, "loss": 0.0242, "step": 315120 }, { "epoch": 3.366953362893317, "grad_norm": 2.3836729526519775, "learning_rate": 8.68886104803518e-07, "loss": 0.0417, "step": 315130 }, { "epoch": 3.3670602062075967, "grad_norm": 1.3242688179016113, "learning_rate": 8.68874763179475e-07, "loss": 0.0245, "step": 315140 }, { "epoch": 3.367167049521876, "grad_norm": 0.04078410193324089, "learning_rate": 8.688634211389448e-07, "loss": 0.0141, "step": 315150 }, { "epoch": 3.3672738928361556, "grad_norm": 0.021407436579465866, "learning_rate": 8.688520786819396e-07, "loss": 0.0048, "step": 315160 }, { "epoch": 3.3673807361504355, "grad_norm": 1.7284752130508423, "learning_rate": 8.688407358084728e-07, "loss": 0.0024, "step": 315170 }, { "epoch": 3.367487579464715, "grad_norm": 1.4026005268096924, "learning_rate": 8.688293925185569e-07, "loss": 0.0184, "step": 315180 }, { "epoch": 3.3675944227789945, "grad_norm": 0.16534776985645294, "learning_rate": 8.688180488122047e-07, "loss": 0.0081, "step": 315190 }, { "epoch": 3.3677012660932744, "grad_norm": 3.3519246578216553, "learning_rate": 8.688067046894292e-07, "loss": 0.0149, "step": 315200 }, { "epoch": 3.367808109407554, "grad_norm": 0.14129319787025452, "learning_rate": 8.687953601502432e-07, "loss": 0.0302, "step": 315210 }, { "epoch": 3.3679149527218333, "grad_norm": 0.0026463684625923634, "learning_rate": 8.687840151946593e-07, "loss": 0.0011, "step": 315220 }, { "epoch": 3.368021796036113, "grad_norm": 1.2256267070770264, "learning_rate": 8.687726698226904e-07, "loss": 0.021, "step": 315230 }, { "epoch": 3.3681286393503926, "grad_norm": 0.005534951575100422, "learning_rate": 8.687613240343492e-07, "loss": 0.0163, "step": 315240 }, { "epoch": 3.368235482664672, "grad_norm": 0.10543977469205856, "learning_rate": 8.687499778296487e-07, "loss": 0.0139, "step": 315250 }, { "epoch": 3.368342325978952, "grad_norm": 15.809725761413574, "learning_rate": 8.687386312086018e-07, "loss": 0.0137, "step": 315260 }, { "epoch": 3.3684491692932315, "grad_norm": 2.523698091506958, "learning_rate": 8.687272841712211e-07, "loss": 0.0043, "step": 315270 }, { "epoch": 3.368556012607511, "grad_norm": 0.627683699131012, "learning_rate": 8.687159367175194e-07, "loss": 0.0012, "step": 315280 }, { "epoch": 3.368662855921791, "grad_norm": 1.2820221185684204, "learning_rate": 8.687045888475096e-07, "loss": 0.0254, "step": 315290 }, { "epoch": 3.3687696992360703, "grad_norm": 2.6590447425842285, "learning_rate": 8.686932405612045e-07, "loss": 0.0288, "step": 315300 }, { "epoch": 3.3688765425503497, "grad_norm": 0.1471439152956009, "learning_rate": 8.68681891858617e-07, "loss": 0.0038, "step": 315310 }, { "epoch": 3.3689833858646296, "grad_norm": 2.2213594913482666, "learning_rate": 8.686705427397596e-07, "loss": 0.0058, "step": 315320 }, { "epoch": 3.369090229178909, "grad_norm": 3.0162785053253174, "learning_rate": 8.686591932046454e-07, "loss": 0.0043, "step": 315330 }, { "epoch": 3.3691970724931886, "grad_norm": 1.250817060470581, "learning_rate": 8.686478432532874e-07, "loss": 0.0255, "step": 315340 }, { "epoch": 3.3693039158074685, "grad_norm": 1.55420982837677, "learning_rate": 8.686364928856977e-07, "loss": 0.0056, "step": 315350 }, { "epoch": 3.369410759121748, "grad_norm": 0.9896495938301086, "learning_rate": 8.6862514210189e-07, "loss": 0.0255, "step": 315360 }, { "epoch": 3.3695176024360274, "grad_norm": 0.1816326081752777, "learning_rate": 8.686137909018765e-07, "loss": 0.0013, "step": 315370 }, { "epoch": 3.3696244457503073, "grad_norm": 1.0575758218765259, "learning_rate": 8.686024392856701e-07, "loss": 0.0352, "step": 315380 }, { "epoch": 3.3697312890645867, "grad_norm": 3.0428414344787598, "learning_rate": 8.68591087253284e-07, "loss": 0.027, "step": 315390 }, { "epoch": 3.369838132378866, "grad_norm": 0.1048491820693016, "learning_rate": 8.685797348047305e-07, "loss": 0.0189, "step": 315400 }, { "epoch": 3.369944975693146, "grad_norm": 0.005145972594618797, "learning_rate": 8.685683819400229e-07, "loss": 0.0154, "step": 315410 }, { "epoch": 3.3700518190074256, "grad_norm": 6.054591655731201, "learning_rate": 8.685570286591735e-07, "loss": 0.0173, "step": 315420 }, { "epoch": 3.370158662321705, "grad_norm": 0.0024763382971286774, "learning_rate": 8.685456749621957e-07, "loss": 0.0011, "step": 315430 }, { "epoch": 3.370265505635985, "grad_norm": 3.3892860412597656, "learning_rate": 8.685343208491018e-07, "loss": 0.0292, "step": 315440 }, { "epoch": 3.3703723489502644, "grad_norm": 0.18999159336090088, "learning_rate": 8.68522966319905e-07, "loss": 0.0003, "step": 315450 }, { "epoch": 3.3704791922645443, "grad_norm": 0.03084726445376873, "learning_rate": 8.685116113746178e-07, "loss": 0.0016, "step": 315460 }, { "epoch": 3.3705860355788237, "grad_norm": 0.693315327167511, "learning_rate": 8.685002560132533e-07, "loss": 0.0087, "step": 315470 }, { "epoch": 3.370692878893103, "grad_norm": 0.12612277269363403, "learning_rate": 8.68488900235824e-07, "loss": 0.0029, "step": 315480 }, { "epoch": 3.3707997222073827, "grad_norm": 0.0028343060985207558, "learning_rate": 8.684775440423431e-07, "loss": 0.014, "step": 315490 }, { "epoch": 3.3709065655216626, "grad_norm": 0.07030748575925827, "learning_rate": 8.684661874328232e-07, "loss": 0.0361, "step": 315500 }, { "epoch": 3.371013408835942, "grad_norm": 1.9388513565063477, "learning_rate": 8.684548304072772e-07, "loss": 0.0106, "step": 315510 }, { "epoch": 3.371120252150222, "grad_norm": 0.006162944715470076, "learning_rate": 8.684434729657178e-07, "loss": 0.0069, "step": 315520 }, { "epoch": 3.3712270954645014, "grad_norm": 1.2626495361328125, "learning_rate": 8.684321151081579e-07, "loss": 0.0158, "step": 315530 }, { "epoch": 3.371333938778781, "grad_norm": 0.3372544050216675, "learning_rate": 8.684207568346105e-07, "loss": 0.0032, "step": 315540 }, { "epoch": 3.3714407820930603, "grad_norm": 0.0097708310931921, "learning_rate": 8.684093981450882e-07, "loss": 0.0334, "step": 315550 }, { "epoch": 3.37154762540734, "grad_norm": 4.2171711921691895, "learning_rate": 8.683980390396038e-07, "loss": 0.0189, "step": 315560 }, { "epoch": 3.3716544687216197, "grad_norm": 1.904352068901062, "learning_rate": 8.683866795181703e-07, "loss": 0.0097, "step": 315570 }, { "epoch": 3.3717613120358996, "grad_norm": 1.4562489986419678, "learning_rate": 8.683753195808004e-07, "loss": 0.0429, "step": 315580 }, { "epoch": 3.371868155350179, "grad_norm": 0.07529086619615555, "learning_rate": 8.683639592275068e-07, "loss": 0.0278, "step": 315590 }, { "epoch": 3.3719749986644585, "grad_norm": 4.612963676452637, "learning_rate": 8.683525984583027e-07, "loss": 0.0025, "step": 315600 }, { "epoch": 3.372081841978738, "grad_norm": 3.591273069381714, "learning_rate": 8.683412372732006e-07, "loss": 0.0201, "step": 315610 }, { "epoch": 3.372188685293018, "grad_norm": 5.376594066619873, "learning_rate": 8.683298756722135e-07, "loss": 0.01, "step": 315620 }, { "epoch": 3.3722955286072973, "grad_norm": 0.409896582365036, "learning_rate": 8.683185136553541e-07, "loss": 0.0025, "step": 315630 }, { "epoch": 3.372402371921577, "grad_norm": 0.26968201994895935, "learning_rate": 8.683071512226354e-07, "loss": 0.0226, "step": 315640 }, { "epoch": 3.3725092152358567, "grad_norm": 2.7138633728027344, "learning_rate": 8.682957883740702e-07, "loss": 0.0398, "step": 315650 }, { "epoch": 3.372616058550136, "grad_norm": 1.3123258352279663, "learning_rate": 8.68284425109671e-07, "loss": 0.0037, "step": 315660 }, { "epoch": 3.3727229018644156, "grad_norm": 0.6478339433670044, "learning_rate": 8.682730614294511e-07, "loss": 0.0167, "step": 315670 }, { "epoch": 3.3728297451786955, "grad_norm": 7.780293941497803, "learning_rate": 8.682616973334232e-07, "loss": 0.0122, "step": 315680 }, { "epoch": 3.372936588492975, "grad_norm": 0.06006623059511185, "learning_rate": 8.682503328215998e-07, "loss": 0.0042, "step": 315690 }, { "epoch": 3.373043431807255, "grad_norm": 0.22903592884540558, "learning_rate": 8.68238967893994e-07, "loss": 0.0232, "step": 315700 }, { "epoch": 3.3731502751215343, "grad_norm": 0.5324661731719971, "learning_rate": 8.682276025506188e-07, "loss": 0.0034, "step": 315710 }, { "epoch": 3.3732571184358138, "grad_norm": 0.07854156196117401, "learning_rate": 8.682162367914867e-07, "loss": 0.0213, "step": 315720 }, { "epoch": 3.3733639617500937, "grad_norm": 3.626781463623047, "learning_rate": 8.682048706166109e-07, "loss": 0.0078, "step": 315730 }, { "epoch": 3.373470805064373, "grad_norm": 0.08453093469142914, "learning_rate": 8.681935040260038e-07, "loss": 0.003, "step": 315740 }, { "epoch": 3.3735776483786526, "grad_norm": 5.134085655212402, "learning_rate": 8.681821370196785e-07, "loss": 0.014, "step": 315750 }, { "epoch": 3.3736844916929325, "grad_norm": 0.913565993309021, "learning_rate": 8.681707695976477e-07, "loss": 0.0083, "step": 315760 }, { "epoch": 3.373791335007212, "grad_norm": 0.07948865741491318, "learning_rate": 8.681594017599244e-07, "loss": 0.0008, "step": 315770 }, { "epoch": 3.3738981783214914, "grad_norm": 0.2213408648967743, "learning_rate": 8.681480335065213e-07, "loss": 0.0242, "step": 315780 }, { "epoch": 3.3740050216357713, "grad_norm": 0.004380348138511181, "learning_rate": 8.681366648374513e-07, "loss": 0.0324, "step": 315790 }, { "epoch": 3.3741118649500508, "grad_norm": 0.020801354199647903, "learning_rate": 8.681252957527272e-07, "loss": 0.0251, "step": 315800 }, { "epoch": 3.3742187082643302, "grad_norm": 1.5887012481689453, "learning_rate": 8.681139262523619e-07, "loss": 0.0089, "step": 315810 }, { "epoch": 3.37432555157861, "grad_norm": 0.028981147333979607, "learning_rate": 8.681025563363682e-07, "loss": 0.0053, "step": 315820 }, { "epoch": 3.3744323948928896, "grad_norm": 0.5197665691375732, "learning_rate": 8.68091186004759e-07, "loss": 0.0012, "step": 315830 }, { "epoch": 3.374539238207169, "grad_norm": 7.021207809448242, "learning_rate": 8.680798152575469e-07, "loss": 0.02, "step": 315840 }, { "epoch": 3.374646081521449, "grad_norm": 0.5221807956695557, "learning_rate": 8.68068444094745e-07, "loss": 0.0138, "step": 315850 }, { "epoch": 3.3747529248357284, "grad_norm": 0.6806206107139587, "learning_rate": 8.68057072516366e-07, "loss": 0.004, "step": 315860 }, { "epoch": 3.374859768150008, "grad_norm": 1.2779719829559326, "learning_rate": 8.680457005224229e-07, "loss": 0.0204, "step": 315870 }, { "epoch": 3.374966611464288, "grad_norm": 10.546719551086426, "learning_rate": 8.680343281129283e-07, "loss": 0.0375, "step": 315880 }, { "epoch": 3.3750734547785672, "grad_norm": 9.074790954589844, "learning_rate": 8.680229552878953e-07, "loss": 0.0163, "step": 315890 }, { "epoch": 3.3751802980928467, "grad_norm": 0.1802636682987213, "learning_rate": 8.680115820473366e-07, "loss": 0.0304, "step": 315900 }, { "epoch": 3.3752871414071266, "grad_norm": 0.0021717180497944355, "learning_rate": 8.68000208391265e-07, "loss": 0.006, "step": 315910 }, { "epoch": 3.375393984721406, "grad_norm": 0.0053170835599303246, "learning_rate": 8.679888343196932e-07, "loss": 0.0073, "step": 315920 }, { "epoch": 3.3755008280356855, "grad_norm": 0.03611311689019203, "learning_rate": 8.679774598326345e-07, "loss": 0.0112, "step": 315930 }, { "epoch": 3.3756076713499654, "grad_norm": 8.407736778259277, "learning_rate": 8.679660849301014e-07, "loss": 0.0024, "step": 315940 }, { "epoch": 3.375714514664245, "grad_norm": 0.14834634959697723, "learning_rate": 8.679547096121067e-07, "loss": 0.0068, "step": 315950 }, { "epoch": 3.3758213579785243, "grad_norm": 0.46654728055000305, "learning_rate": 8.679433338786634e-07, "loss": 0.0096, "step": 315960 }, { "epoch": 3.3759282012928042, "grad_norm": 0.026870111003518105, "learning_rate": 8.679319577297845e-07, "loss": 0.0399, "step": 315970 }, { "epoch": 3.3760350446070837, "grad_norm": 0.7547118663787842, "learning_rate": 8.679205811654824e-07, "loss": 0.005, "step": 315980 }, { "epoch": 3.376141887921363, "grad_norm": 0.063114695250988, "learning_rate": 8.679092041857703e-07, "loss": 0.0021, "step": 315990 }, { "epoch": 3.376248731235643, "grad_norm": 0.0015412282664328814, "learning_rate": 8.67897826790661e-07, "loss": 0.0193, "step": 316000 }, { "epoch": 3.3763555745499225, "grad_norm": 1.972720742225647, "learning_rate": 8.678864489801673e-07, "loss": 0.014, "step": 316010 }, { "epoch": 3.376462417864202, "grad_norm": 0.019275344908237457, "learning_rate": 8.678750707543019e-07, "loss": 0.0096, "step": 316020 }, { "epoch": 3.376569261178482, "grad_norm": 0.5829370617866516, "learning_rate": 8.678636921130779e-07, "loss": 0.0179, "step": 316030 }, { "epoch": 3.3766761044927613, "grad_norm": 3.50392484664917, "learning_rate": 8.67852313056508e-07, "loss": 0.0022, "step": 316040 }, { "epoch": 3.376782947807041, "grad_norm": 0.16120466589927673, "learning_rate": 8.678409335846051e-07, "loss": 0.0185, "step": 316050 }, { "epoch": 3.3768897911213207, "grad_norm": 0.012936062179505825, "learning_rate": 8.678295536973818e-07, "loss": 0.0023, "step": 316060 }, { "epoch": 3.3769966344356, "grad_norm": 4.630401611328125, "learning_rate": 8.678181733948515e-07, "loss": 0.0087, "step": 316070 }, { "epoch": 3.3771034777498796, "grad_norm": 0.002337117213755846, "learning_rate": 8.678067926770265e-07, "loss": 0.0085, "step": 316080 }, { "epoch": 3.3772103210641595, "grad_norm": 0.34589460492134094, "learning_rate": 8.677954115439199e-07, "loss": 0.0008, "step": 316090 }, { "epoch": 3.377317164378439, "grad_norm": 0.527498185634613, "learning_rate": 8.677840299955447e-07, "loss": 0.0024, "step": 316100 }, { "epoch": 3.3774240076927184, "grad_norm": 0.6523509621620178, "learning_rate": 8.677726480319134e-07, "loss": 0.0191, "step": 316110 }, { "epoch": 3.3775308510069983, "grad_norm": 0.35889482498168945, "learning_rate": 8.677612656530391e-07, "loss": 0.0051, "step": 316120 }, { "epoch": 3.377637694321278, "grad_norm": 3.0672061443328857, "learning_rate": 8.677498828589345e-07, "loss": 0.0788, "step": 316130 }, { "epoch": 3.3777445376355573, "grad_norm": 3.4326984882354736, "learning_rate": 8.677384996496125e-07, "loss": 0.0325, "step": 316140 }, { "epoch": 3.377851380949837, "grad_norm": 0.42179152369499207, "learning_rate": 8.677271160250861e-07, "loss": 0.0875, "step": 316150 }, { "epoch": 3.3779582242641166, "grad_norm": 0.00563333323225379, "learning_rate": 8.677157319853679e-07, "loss": 0.0266, "step": 316160 }, { "epoch": 3.3780650675783965, "grad_norm": 0.0039792051538825035, "learning_rate": 8.677043475304711e-07, "loss": 0.0004, "step": 316170 }, { "epoch": 3.378171910892676, "grad_norm": 0.24488110840320587, "learning_rate": 8.676929626604081e-07, "loss": 0.0427, "step": 316180 }, { "epoch": 3.3782787542069554, "grad_norm": 0.473732054233551, "learning_rate": 8.676815773751921e-07, "loss": 0.0173, "step": 316190 }, { "epoch": 3.378385597521235, "grad_norm": 0.5434901118278503, "learning_rate": 8.67670191674836e-07, "loss": 0.0158, "step": 316200 }, { "epoch": 3.378492440835515, "grad_norm": 0.03697268292307854, "learning_rate": 8.676588055593522e-07, "loss": 0.0201, "step": 316210 }, { "epoch": 3.3785992841497943, "grad_norm": 0.02214227430522442, "learning_rate": 8.676474190287539e-07, "loss": 0.0064, "step": 316220 }, { "epoch": 3.378706127464074, "grad_norm": 0.39560192823410034, "learning_rate": 8.676360320830541e-07, "loss": 0.0104, "step": 316230 }, { "epoch": 3.3788129707783536, "grad_norm": 5.652215957641602, "learning_rate": 8.676246447222655e-07, "loss": 0.0142, "step": 316240 }, { "epoch": 3.378919814092633, "grad_norm": 0.8648483157157898, "learning_rate": 8.676132569464008e-07, "loss": 0.018, "step": 316250 }, { "epoch": 3.3790266574069125, "grad_norm": 0.3191201388835907, "learning_rate": 8.676018687554729e-07, "loss": 0.057, "step": 316260 }, { "epoch": 3.3791335007211925, "grad_norm": 0.006263006944209337, "learning_rate": 8.675904801494949e-07, "loss": 0.0192, "step": 316270 }, { "epoch": 3.379240344035472, "grad_norm": 0.03624230623245239, "learning_rate": 8.675790911284795e-07, "loss": 0.0096, "step": 316280 }, { "epoch": 3.379347187349752, "grad_norm": 0.18596166372299194, "learning_rate": 8.675677016924394e-07, "loss": 0.0096, "step": 316290 }, { "epoch": 3.3794540306640313, "grad_norm": 0.04934215173125267, "learning_rate": 8.675563118413879e-07, "loss": 0.0131, "step": 316300 }, { "epoch": 3.3795608739783107, "grad_norm": 0.19384530186653137, "learning_rate": 8.675449215753374e-07, "loss": 0.0172, "step": 316310 }, { "epoch": 3.37966771729259, "grad_norm": 0.018053622916340828, "learning_rate": 8.67533530894301e-07, "loss": 0.0068, "step": 316320 }, { "epoch": 3.37977456060687, "grad_norm": 0.10210826247930527, "learning_rate": 8.675221397982915e-07, "loss": 0.006, "step": 316330 }, { "epoch": 3.3798814039211496, "grad_norm": 0.1864394098520279, "learning_rate": 8.675107482873217e-07, "loss": 0.0234, "step": 316340 }, { "epoch": 3.3799882472354295, "grad_norm": 0.004787826910614967, "learning_rate": 8.674993563614046e-07, "loss": 0.0022, "step": 316350 }, { "epoch": 3.380095090549709, "grad_norm": 0.8783475160598755, "learning_rate": 8.674879640205531e-07, "loss": 0.02, "step": 316360 }, { "epoch": 3.3802019338639884, "grad_norm": 0.14105725288391113, "learning_rate": 8.674765712647798e-07, "loss": 0.0044, "step": 316370 }, { "epoch": 3.380308777178268, "grad_norm": 0.3120395839214325, "learning_rate": 8.674651780940976e-07, "loss": 0.0071, "step": 316380 }, { "epoch": 3.3804156204925477, "grad_norm": 2.5929551124572754, "learning_rate": 8.674537845085197e-07, "loss": 0.0059, "step": 316390 }, { "epoch": 3.380522463806827, "grad_norm": 0.2534531354904175, "learning_rate": 8.674423905080586e-07, "loss": 0.0166, "step": 316400 }, { "epoch": 3.380629307121107, "grad_norm": 0.016545116901397705, "learning_rate": 8.674309960927274e-07, "loss": 0.0088, "step": 316410 }, { "epoch": 3.3807361504353866, "grad_norm": 0.15495865046977997, "learning_rate": 8.674196012625388e-07, "loss": 0.0644, "step": 316420 }, { "epoch": 3.380842993749666, "grad_norm": 0.028995146974921227, "learning_rate": 8.674082060175059e-07, "loss": 0.008, "step": 316430 }, { "epoch": 3.380949837063946, "grad_norm": 2.05771803855896, "learning_rate": 8.673968103576413e-07, "loss": 0.0526, "step": 316440 }, { "epoch": 3.3810566803782254, "grad_norm": 0.0697968453168869, "learning_rate": 8.673854142829579e-07, "loss": 0.0046, "step": 316450 }, { "epoch": 3.381163523692505, "grad_norm": 0.7215315103530884, "learning_rate": 8.673740177934686e-07, "loss": 0.0218, "step": 316460 }, { "epoch": 3.3812703670067847, "grad_norm": 0.0032371999695897102, "learning_rate": 8.673626208891865e-07, "loss": 0.0085, "step": 316470 }, { "epoch": 3.381377210321064, "grad_norm": 0.958076000213623, "learning_rate": 8.673512235701241e-07, "loss": 0.0351, "step": 316480 }, { "epoch": 3.3814840536353437, "grad_norm": 2.551337957382202, "learning_rate": 8.673398258362946e-07, "loss": 0.0028, "step": 316490 }, { "epoch": 3.3815908969496236, "grad_norm": 0.022174248471856117, "learning_rate": 8.673284276877106e-07, "loss": 0.0105, "step": 316500 }, { "epoch": 3.381697740263903, "grad_norm": 0.014246631413698196, "learning_rate": 8.673170291243852e-07, "loss": 0.0085, "step": 316510 }, { "epoch": 3.3818045835781825, "grad_norm": 0.20874860882759094, "learning_rate": 8.673056301463311e-07, "loss": 0.0144, "step": 316520 }, { "epoch": 3.3819114268924624, "grad_norm": 0.12208519876003265, "learning_rate": 8.672942307535611e-07, "loss": 0.0028, "step": 316530 }, { "epoch": 3.382018270206742, "grad_norm": 0.005250775255262852, "learning_rate": 8.672828309460884e-07, "loss": 0.0041, "step": 316540 }, { "epoch": 3.3821251135210213, "grad_norm": 6.071456432342529, "learning_rate": 8.672714307239255e-07, "loss": 0.0359, "step": 316550 }, { "epoch": 3.382231956835301, "grad_norm": 8.158897399902344, "learning_rate": 8.672600300870855e-07, "loss": 0.0026, "step": 316560 }, { "epoch": 3.3823388001495807, "grad_norm": 0.16005685925483704, "learning_rate": 8.672486290355811e-07, "loss": 0.0093, "step": 316570 }, { "epoch": 3.38244564346386, "grad_norm": 4.248718738555908, "learning_rate": 8.672372275694255e-07, "loss": 0.016, "step": 316580 }, { "epoch": 3.38255248677814, "grad_norm": 0.0050317044369876385, "learning_rate": 8.672258256886311e-07, "loss": 0.0368, "step": 316590 }, { "epoch": 3.3826593300924195, "grad_norm": 0.004055111203342676, "learning_rate": 8.672144233932113e-07, "loss": 0.0213, "step": 316600 }, { "epoch": 3.382766173406699, "grad_norm": 0.020875995978713036, "learning_rate": 8.672030206831784e-07, "loss": 0.0064, "step": 316610 }, { "epoch": 3.382873016720979, "grad_norm": 0.3159206211566925, "learning_rate": 8.671916175585458e-07, "loss": 0.0063, "step": 316620 }, { "epoch": 3.3829798600352583, "grad_norm": 0.45419853925704956, "learning_rate": 8.671802140193261e-07, "loss": 0.0056, "step": 316630 }, { "epoch": 3.3830867033495378, "grad_norm": 0.028766460716724396, "learning_rate": 8.671688100655323e-07, "loss": 0.0125, "step": 316640 }, { "epoch": 3.3831935466638177, "grad_norm": 0.09489642828702927, "learning_rate": 8.67157405697177e-07, "loss": 0.0008, "step": 316650 }, { "epoch": 3.383300389978097, "grad_norm": 0.15984345972537994, "learning_rate": 8.671460009142734e-07, "loss": 0.0027, "step": 316660 }, { "epoch": 3.3834072332923766, "grad_norm": 0.006205124780535698, "learning_rate": 8.671345957168342e-07, "loss": 0.0228, "step": 316670 }, { "epoch": 3.3835140766066565, "grad_norm": 0.37212178111076355, "learning_rate": 8.671231901048723e-07, "loss": 0.0112, "step": 316680 }, { "epoch": 3.383620919920936, "grad_norm": 1.2897361516952515, "learning_rate": 8.671117840784008e-07, "loss": 0.0222, "step": 316690 }, { "epoch": 3.3837277632352154, "grad_norm": 0.015530185773968697, "learning_rate": 8.671003776374323e-07, "loss": 0.0085, "step": 316700 }, { "epoch": 3.3838346065494953, "grad_norm": 0.001399245928041637, "learning_rate": 8.670889707819797e-07, "loss": 0.0178, "step": 316710 }, { "epoch": 3.3839414498637748, "grad_norm": 0.9033007621765137, "learning_rate": 8.670775635120559e-07, "loss": 0.0045, "step": 316720 }, { "epoch": 3.3840482931780542, "grad_norm": 0.009132078848779202, "learning_rate": 8.67066155827674e-07, "loss": 0.0069, "step": 316730 }, { "epoch": 3.384155136492334, "grad_norm": 0.015605500899255276, "learning_rate": 8.670547477288465e-07, "loss": 0.006, "step": 316740 }, { "epoch": 3.3842619798066136, "grad_norm": 0.20262089371681213, "learning_rate": 8.670433392155867e-07, "loss": 0.0122, "step": 316750 }, { "epoch": 3.384368823120893, "grad_norm": 0.9635758996009827, "learning_rate": 8.670319302879073e-07, "loss": 0.0085, "step": 316760 }, { "epoch": 3.384475666435173, "grad_norm": 0.027638975530862808, "learning_rate": 8.67020520945821e-07, "loss": 0.0009, "step": 316770 }, { "epoch": 3.3845825097494524, "grad_norm": 0.031868912279605865, "learning_rate": 8.670091111893407e-07, "loss": 0.0066, "step": 316780 }, { "epoch": 3.384689353063732, "grad_norm": 0.04753919690847397, "learning_rate": 8.669977010184797e-07, "loss": 0.0038, "step": 316790 }, { "epoch": 3.3847961963780118, "grad_norm": 0.00815419852733612, "learning_rate": 8.669862904332504e-07, "loss": 0.003, "step": 316800 }, { "epoch": 3.3849030396922912, "grad_norm": 7.164180278778076, "learning_rate": 8.66974879433666e-07, "loss": 0.0071, "step": 316810 }, { "epoch": 3.3850098830065707, "grad_norm": 0.016531068831682205, "learning_rate": 8.669634680197392e-07, "loss": 0.004, "step": 316820 }, { "epoch": 3.3851167263208506, "grad_norm": 0.0032425548415631056, "learning_rate": 8.66952056191483e-07, "loss": 0.0038, "step": 316830 }, { "epoch": 3.38522356963513, "grad_norm": 0.38893094658851624, "learning_rate": 8.669406439489102e-07, "loss": 0.0015, "step": 316840 }, { "epoch": 3.3853304129494095, "grad_norm": 0.9879447221755981, "learning_rate": 8.669292312920337e-07, "loss": 0.0174, "step": 316850 }, { "epoch": 3.3854372562636894, "grad_norm": 0.07511968910694122, "learning_rate": 8.669178182208664e-07, "loss": 0.0193, "step": 316860 }, { "epoch": 3.385544099577969, "grad_norm": 0.49900445342063904, "learning_rate": 8.669064047354213e-07, "loss": 0.0108, "step": 316870 }, { "epoch": 3.3856509428922483, "grad_norm": 0.01089075580239296, "learning_rate": 8.66894990835711e-07, "loss": 0.0102, "step": 316880 }, { "epoch": 3.3857577862065282, "grad_norm": 0.04501764848828316, "learning_rate": 8.668835765217486e-07, "loss": 0.0196, "step": 316890 }, { "epoch": 3.3858646295208077, "grad_norm": 3.2452914714813232, "learning_rate": 8.668721617935471e-07, "loss": 0.0316, "step": 316900 }, { "epoch": 3.385971472835087, "grad_norm": 0.04511497542262077, "learning_rate": 8.668607466511191e-07, "loss": 0.0057, "step": 316910 }, { "epoch": 3.386078316149367, "grad_norm": 0.34594863653182983, "learning_rate": 8.668493310944776e-07, "loss": 0.0101, "step": 316920 }, { "epoch": 3.3861851594636465, "grad_norm": 0.003402035217732191, "learning_rate": 8.668379151236356e-07, "loss": 0.0028, "step": 316930 }, { "epoch": 3.3862920027779264, "grad_norm": 0.5420166850090027, "learning_rate": 8.66826498738606e-07, "loss": 0.0033, "step": 316940 }, { "epoch": 3.386398846092206, "grad_norm": 0.20409759879112244, "learning_rate": 8.668150819394014e-07, "loss": 0.0179, "step": 316950 }, { "epoch": 3.3865056894064853, "grad_norm": 0.32141077518463135, "learning_rate": 8.668036647260351e-07, "loss": 0.0381, "step": 316960 }, { "epoch": 3.386612532720765, "grad_norm": 0.03863569721579552, "learning_rate": 8.667922470985196e-07, "loss": 0.037, "step": 316970 }, { "epoch": 3.3867193760350447, "grad_norm": 0.36039918661117554, "learning_rate": 8.667808290568681e-07, "loss": 0.0548, "step": 316980 }, { "epoch": 3.386826219349324, "grad_norm": 0.42986589670181274, "learning_rate": 8.667694106010932e-07, "loss": 0.0815, "step": 316990 }, { "epoch": 3.386933062663604, "grad_norm": 6.92306661605835, "learning_rate": 8.667579917312082e-07, "loss": 0.0382, "step": 317000 }, { "epoch": 3.3870399059778835, "grad_norm": 1.7300893068313599, "learning_rate": 8.667465724472255e-07, "loss": 0.0367, "step": 317010 }, { "epoch": 3.387146749292163, "grad_norm": 0.009748358279466629, "learning_rate": 8.667351527491583e-07, "loss": 0.0297, "step": 317020 }, { "epoch": 3.3872535926064424, "grad_norm": 0.04056772217154503, "learning_rate": 8.667237326370195e-07, "loss": 0.0072, "step": 317030 }, { "epoch": 3.3873604359207223, "grad_norm": 0.15562178194522858, "learning_rate": 8.66712312110822e-07, "loss": 0.0046, "step": 317040 }, { "epoch": 3.387467279235002, "grad_norm": 3.019186019897461, "learning_rate": 8.667008911705784e-07, "loss": 0.008, "step": 317050 }, { "epoch": 3.3875741225492817, "grad_norm": 0.03159691020846367, "learning_rate": 8.666894698163019e-07, "loss": 0.0203, "step": 317060 }, { "epoch": 3.387680965863561, "grad_norm": 0.12340690195560455, "learning_rate": 8.666780480480054e-07, "loss": 0.0014, "step": 317070 }, { "epoch": 3.3877878091778406, "grad_norm": 1.0632938146591187, "learning_rate": 8.666666258657016e-07, "loss": 0.0111, "step": 317080 }, { "epoch": 3.38789465249212, "grad_norm": 0.01620357483625412, "learning_rate": 8.666552032694035e-07, "loss": 0.0387, "step": 317090 }, { "epoch": 3.3880014958064, "grad_norm": 0.04415562376379967, "learning_rate": 8.666437802591242e-07, "loss": 0.0064, "step": 317100 }, { "epoch": 3.3881083391206794, "grad_norm": 0.003282043384388089, "learning_rate": 8.666323568348763e-07, "loss": 0.0106, "step": 317110 }, { "epoch": 3.3882151824349593, "grad_norm": 9.53570556640625, "learning_rate": 8.666209329966728e-07, "loss": 0.0756, "step": 317120 }, { "epoch": 3.388322025749239, "grad_norm": 0.012095926329493523, "learning_rate": 8.666095087445265e-07, "loss": 0.0065, "step": 317130 }, { "epoch": 3.3884288690635183, "grad_norm": 7.082210540771484, "learning_rate": 8.665980840784503e-07, "loss": 0.0116, "step": 317140 }, { "epoch": 3.3885357123777977, "grad_norm": 0.8265318274497986, "learning_rate": 8.665866589984574e-07, "loss": 0.0522, "step": 317150 }, { "epoch": 3.3886425556920776, "grad_norm": 1.4963657855987549, "learning_rate": 8.665752335045605e-07, "loss": 0.0116, "step": 317160 }, { "epoch": 3.388749399006357, "grad_norm": 0.7157605290412903, "learning_rate": 8.665638075967725e-07, "loss": 0.0011, "step": 317170 }, { "epoch": 3.388856242320637, "grad_norm": 2.6272315979003906, "learning_rate": 8.665523812751062e-07, "loss": 0.0158, "step": 317180 }, { "epoch": 3.3889630856349164, "grad_norm": 1.5838344097137451, "learning_rate": 8.665409545395747e-07, "loss": 0.0175, "step": 317190 }, { "epoch": 3.389069928949196, "grad_norm": 3.27260684967041, "learning_rate": 8.665295273901906e-07, "loss": 0.0206, "step": 317200 }, { "epoch": 3.389176772263476, "grad_norm": 0.010029171593487263, "learning_rate": 8.665180998269673e-07, "loss": 0.0125, "step": 317210 }, { "epoch": 3.3892836155777553, "grad_norm": 0.01355690322816372, "learning_rate": 8.665066718499172e-07, "loss": 0.0093, "step": 317220 }, { "epoch": 3.3893904588920347, "grad_norm": 3.2865893840789795, "learning_rate": 8.664952434590533e-07, "loss": 0.008, "step": 317230 }, { "epoch": 3.3894973022063146, "grad_norm": 0.00910264253616333, "learning_rate": 8.664838146543888e-07, "loss": 0.0063, "step": 317240 }, { "epoch": 3.389604145520594, "grad_norm": 0.0025323559530079365, "learning_rate": 8.664723854359364e-07, "loss": 0.0144, "step": 317250 }, { "epoch": 3.3897109888348735, "grad_norm": 1.760722041130066, "learning_rate": 8.664609558037089e-07, "loss": 0.013, "step": 317260 }, { "epoch": 3.3898178321491534, "grad_norm": 0.0016840838361531496, "learning_rate": 8.664495257577194e-07, "loss": 0.0216, "step": 317270 }, { "epoch": 3.389924675463433, "grad_norm": 5.990794658660889, "learning_rate": 8.664380952979809e-07, "loss": 0.0175, "step": 317280 }, { "epoch": 3.3900315187777124, "grad_norm": 1.7115188837051392, "learning_rate": 8.664266644245058e-07, "loss": 0.0147, "step": 317290 }, { "epoch": 3.3901383620919923, "grad_norm": 0.12450732290744781, "learning_rate": 8.664152331373076e-07, "loss": 0.0446, "step": 317300 }, { "epoch": 3.3902452054062717, "grad_norm": 16.621225357055664, "learning_rate": 8.664038014363988e-07, "loss": 0.0147, "step": 317310 }, { "epoch": 3.390352048720551, "grad_norm": 0.009210394695401192, "learning_rate": 8.663923693217925e-07, "loss": 0.0033, "step": 317320 }, { "epoch": 3.390458892034831, "grad_norm": 0.03054858185350895, "learning_rate": 8.663809367935016e-07, "loss": 0.0008, "step": 317330 }, { "epoch": 3.3905657353491105, "grad_norm": 0.11261966824531555, "learning_rate": 8.663695038515389e-07, "loss": 0.0049, "step": 317340 }, { "epoch": 3.39067257866339, "grad_norm": 0.001938267145305872, "learning_rate": 8.663580704959175e-07, "loss": 0.0267, "step": 317350 }, { "epoch": 3.39077942197767, "grad_norm": 0.3482469320297241, "learning_rate": 8.663466367266501e-07, "loss": 0.0017, "step": 317360 }, { "epoch": 3.3908862652919494, "grad_norm": 17.40323257446289, "learning_rate": 8.663352025437497e-07, "loss": 0.0024, "step": 317370 }, { "epoch": 3.390993108606229, "grad_norm": 1.2267690896987915, "learning_rate": 8.663237679472292e-07, "loss": 0.0168, "step": 317380 }, { "epoch": 3.3910999519205087, "grad_norm": 2.1465446949005127, "learning_rate": 8.663123329371015e-07, "loss": 0.0104, "step": 317390 }, { "epoch": 3.391206795234788, "grad_norm": 0.1732434630393982, "learning_rate": 8.663008975133795e-07, "loss": 0.0057, "step": 317400 }, { "epoch": 3.3913136385490676, "grad_norm": 0.06690245866775513, "learning_rate": 8.662894616760764e-07, "loss": 0.0162, "step": 317410 }, { "epoch": 3.3914204818633475, "grad_norm": 0.07397876679897308, "learning_rate": 8.662780254252046e-07, "loss": 0.0585, "step": 317420 }, { "epoch": 3.391527325177627, "grad_norm": 0.023191673681139946, "learning_rate": 8.662665887607771e-07, "loss": 0.016, "step": 317430 }, { "epoch": 3.3916341684919065, "grad_norm": 0.899253785610199, "learning_rate": 8.662551516828074e-07, "loss": 0.0074, "step": 317440 }, { "epoch": 3.3917410118061864, "grad_norm": 1.7164394855499268, "learning_rate": 8.662437141913077e-07, "loss": 0.0244, "step": 317450 }, { "epoch": 3.391847855120466, "grad_norm": 1.4350148439407349, "learning_rate": 8.662322762862914e-07, "loss": 0.0167, "step": 317460 }, { "epoch": 3.3919546984347453, "grad_norm": 1.6229890584945679, "learning_rate": 8.662208379677711e-07, "loss": 0.0576, "step": 317470 }, { "epoch": 3.392061541749025, "grad_norm": 0.0020732891280204058, "learning_rate": 8.662093992357597e-07, "loss": 0.0005, "step": 317480 }, { "epoch": 3.3921683850633046, "grad_norm": 0.007035607006400824, "learning_rate": 8.661979600902705e-07, "loss": 0.0139, "step": 317490 }, { "epoch": 3.392275228377584, "grad_norm": 0.009317921474575996, "learning_rate": 8.661865205313161e-07, "loss": 0.0263, "step": 317500 }, { "epoch": 3.392382071691864, "grad_norm": 0.45472943782806396, "learning_rate": 8.661750805589094e-07, "loss": 0.0548, "step": 317510 }, { "epoch": 3.3924889150061435, "grad_norm": 4.932361602783203, "learning_rate": 8.661636401730634e-07, "loss": 0.0159, "step": 317520 }, { "epoch": 3.392595758320423, "grad_norm": 0.03752855584025383, "learning_rate": 8.66152199373791e-07, "loss": 0.0163, "step": 317530 }, { "epoch": 3.392702601634703, "grad_norm": 0.24428613483905792, "learning_rate": 8.661407581611053e-07, "loss": 0.008, "step": 317540 }, { "epoch": 3.3928094449489823, "grad_norm": 0.016273709014058113, "learning_rate": 8.661293165350189e-07, "loss": 0.013, "step": 317550 }, { "epoch": 3.3929162882632617, "grad_norm": 0.2019280344247818, "learning_rate": 8.661178744955449e-07, "loss": 0.008, "step": 317560 }, { "epoch": 3.3930231315775417, "grad_norm": 0.010027660988271236, "learning_rate": 8.661064320426963e-07, "loss": 0.0092, "step": 317570 }, { "epoch": 3.393129974891821, "grad_norm": 0.020289234817028046, "learning_rate": 8.660949891764858e-07, "loss": 0.0004, "step": 317580 }, { "epoch": 3.3932368182061006, "grad_norm": 7.487838268280029, "learning_rate": 8.660835458969263e-07, "loss": 0.0074, "step": 317590 }, { "epoch": 3.3933436615203805, "grad_norm": 0.11074315011501312, "learning_rate": 8.660721022040309e-07, "loss": 0.0064, "step": 317600 }, { "epoch": 3.39345050483466, "grad_norm": 0.08630532771348953, "learning_rate": 8.660606580978127e-07, "loss": 0.0239, "step": 317610 }, { "epoch": 3.3935573481489394, "grad_norm": 1.537743091583252, "learning_rate": 8.660492135782842e-07, "loss": 0.0024, "step": 317620 }, { "epoch": 3.3936641914632193, "grad_norm": 0.042038436979055405, "learning_rate": 8.660377686454585e-07, "loss": 0.0049, "step": 317630 }, { "epoch": 3.3937710347774988, "grad_norm": 0.009828606620430946, "learning_rate": 8.660263232993487e-07, "loss": 0.0145, "step": 317640 }, { "epoch": 3.3938778780917787, "grad_norm": 0.001756287645548582, "learning_rate": 8.660148775399674e-07, "loss": 0.0153, "step": 317650 }, { "epoch": 3.393984721406058, "grad_norm": 0.013517187908291817, "learning_rate": 8.660034313673277e-07, "loss": 0.0184, "step": 317660 }, { "epoch": 3.3940915647203376, "grad_norm": 17.828556060791016, "learning_rate": 8.659919847814425e-07, "loss": 0.0349, "step": 317670 }, { "epoch": 3.394198408034617, "grad_norm": 0.026471657678484917, "learning_rate": 8.659805377823247e-07, "loss": 0.044, "step": 317680 }, { "epoch": 3.394305251348897, "grad_norm": 0.045316845178604126, "learning_rate": 8.659690903699873e-07, "loss": 0.0095, "step": 317690 }, { "epoch": 3.3944120946631764, "grad_norm": 1.5074888467788696, "learning_rate": 8.659576425444432e-07, "loss": 0.0294, "step": 317700 }, { "epoch": 3.3945189379774563, "grad_norm": 0.012292147614061832, "learning_rate": 8.659461943057051e-07, "loss": 0.0032, "step": 317710 }, { "epoch": 3.3946257812917358, "grad_norm": 0.4972517490386963, "learning_rate": 8.659347456537864e-07, "loss": 0.0206, "step": 317720 }, { "epoch": 3.394732624606015, "grad_norm": 0.014470755122601986, "learning_rate": 8.659232965886997e-07, "loss": 0.0399, "step": 317730 }, { "epoch": 3.3948394679202947, "grad_norm": 13.443441390991211, "learning_rate": 8.659118471104578e-07, "loss": 0.0221, "step": 317740 }, { "epoch": 3.3949463112345746, "grad_norm": 4.510245323181152, "learning_rate": 8.659003972190741e-07, "loss": 0.0076, "step": 317750 }, { "epoch": 3.395053154548854, "grad_norm": 2.8485677242279053, "learning_rate": 8.658889469145609e-07, "loss": 0.0259, "step": 317760 }, { "epoch": 3.395159997863134, "grad_norm": 5.242046356201172, "learning_rate": 8.658774961969317e-07, "loss": 0.0199, "step": 317770 }, { "epoch": 3.3952668411774134, "grad_norm": 0.04913583770394325, "learning_rate": 8.658660450661992e-07, "loss": 0.0111, "step": 317780 }, { "epoch": 3.395373684491693, "grad_norm": 1.3337608575820923, "learning_rate": 8.658545935223762e-07, "loss": 0.0715, "step": 317790 }, { "epoch": 3.3954805278059723, "grad_norm": 0.40096503496170044, "learning_rate": 8.658431415654758e-07, "loss": 0.0128, "step": 317800 }, { "epoch": 3.395587371120252, "grad_norm": 3.559685230255127, "learning_rate": 8.658316891955109e-07, "loss": 0.0326, "step": 317810 }, { "epoch": 3.3956942144345317, "grad_norm": 0.44381505250930786, "learning_rate": 8.658202364124944e-07, "loss": 0.0203, "step": 317820 }, { "epoch": 3.3958010577488116, "grad_norm": 0.040840815752744675, "learning_rate": 8.658087832164393e-07, "loss": 0.0144, "step": 317830 }, { "epoch": 3.395907901063091, "grad_norm": 0.04257631301879883, "learning_rate": 8.657973296073584e-07, "loss": 0.0096, "step": 317840 }, { "epoch": 3.3960147443773705, "grad_norm": 0.0009114656131714582, "learning_rate": 8.657858755852648e-07, "loss": 0.0083, "step": 317850 }, { "epoch": 3.39612158769165, "grad_norm": 1.200225591659546, "learning_rate": 8.657744211501712e-07, "loss": 0.0279, "step": 317860 }, { "epoch": 3.39622843100593, "grad_norm": 0.0032451848965138197, "learning_rate": 8.657629663020907e-07, "loss": 0.0058, "step": 317870 }, { "epoch": 3.3963352743202093, "grad_norm": 9.017379760742188, "learning_rate": 8.657515110410363e-07, "loss": 0.0072, "step": 317880 }, { "epoch": 3.396442117634489, "grad_norm": 0.21981656551361084, "learning_rate": 8.657400553670207e-07, "loss": 0.0241, "step": 317890 }, { "epoch": 3.3965489609487687, "grad_norm": 0.011541959829628468, "learning_rate": 8.657285992800571e-07, "loss": 0.0007, "step": 317900 }, { "epoch": 3.396655804263048, "grad_norm": 0.03807682543992996, "learning_rate": 8.657171427801582e-07, "loss": 0.0145, "step": 317910 }, { "epoch": 3.396762647577328, "grad_norm": 17.654726028442383, "learning_rate": 8.657056858673371e-07, "loss": 0.0088, "step": 317920 }, { "epoch": 3.3968694908916075, "grad_norm": 2.066993474960327, "learning_rate": 8.656942285416067e-07, "loss": 0.0216, "step": 317930 }, { "epoch": 3.396976334205887, "grad_norm": 0.0028991559520363808, "learning_rate": 8.656827708029799e-07, "loss": 0.0001, "step": 317940 }, { "epoch": 3.397083177520167, "grad_norm": 2.9170968532562256, "learning_rate": 8.656713126514696e-07, "loss": 0.006, "step": 317950 }, { "epoch": 3.3971900208344463, "grad_norm": 0.7253968119621277, "learning_rate": 8.656598540870889e-07, "loss": 0.0024, "step": 317960 }, { "epoch": 3.397296864148726, "grad_norm": 21.872066497802734, "learning_rate": 8.656483951098506e-07, "loss": 0.0173, "step": 317970 }, { "epoch": 3.3974037074630057, "grad_norm": 0.021502556279301643, "learning_rate": 8.656369357197677e-07, "loss": 0.0052, "step": 317980 }, { "epoch": 3.397510550777285, "grad_norm": 15.768234252929688, "learning_rate": 8.65625475916853e-07, "loss": 0.0152, "step": 317990 }, { "epoch": 3.3976173940915646, "grad_norm": 0.017447732388973236, "learning_rate": 8.656140157011196e-07, "loss": 0.0031, "step": 318000 }, { "epoch": 3.3977242374058445, "grad_norm": 5.28116512298584, "learning_rate": 8.656025550725803e-07, "loss": 0.0168, "step": 318010 }, { "epoch": 3.397831080720124, "grad_norm": 9.41183853149414, "learning_rate": 8.655910940312481e-07, "loss": 0.008, "step": 318020 }, { "epoch": 3.3979379240344034, "grad_norm": 0.07691875100135803, "learning_rate": 8.655796325771361e-07, "loss": 0.0133, "step": 318030 }, { "epoch": 3.3980447673486833, "grad_norm": 5.568333625793457, "learning_rate": 8.655681707102571e-07, "loss": 0.0141, "step": 318040 }, { "epoch": 3.398151610662963, "grad_norm": 0.11120713502168655, "learning_rate": 8.655567084306239e-07, "loss": 0.0142, "step": 318050 }, { "epoch": 3.3982584539772422, "grad_norm": 1.9702174663543701, "learning_rate": 8.655452457382497e-07, "loss": 0.0129, "step": 318060 }, { "epoch": 3.398365297291522, "grad_norm": 0.0017283702036365867, "learning_rate": 8.655337826331473e-07, "loss": 0.0036, "step": 318070 }, { "epoch": 3.3984721406058016, "grad_norm": 0.027542440220713615, "learning_rate": 8.655223191153298e-07, "loss": 0.0313, "step": 318080 }, { "epoch": 3.398578983920081, "grad_norm": 0.011873539537191391, "learning_rate": 8.655108551848098e-07, "loss": 0.0027, "step": 318090 }, { "epoch": 3.398685827234361, "grad_norm": 0.10229819267988205, "learning_rate": 8.654993908416006e-07, "loss": 0.0036, "step": 318100 }, { "epoch": 3.3987926705486404, "grad_norm": 0.08052469044923782, "learning_rate": 8.65487926085715e-07, "loss": 0.0018, "step": 318110 }, { "epoch": 3.39889951386292, "grad_norm": 3.5079095363616943, "learning_rate": 8.654764609171659e-07, "loss": 0.011, "step": 318120 }, { "epoch": 3.3990063571772, "grad_norm": 2.246090888977051, "learning_rate": 8.654649953359663e-07, "loss": 0.0301, "step": 318130 }, { "epoch": 3.3991132004914792, "grad_norm": 0.2604305148124695, "learning_rate": 8.654535293421291e-07, "loss": 0.0178, "step": 318140 }, { "epoch": 3.3992200438057587, "grad_norm": 0.004464598372578621, "learning_rate": 8.654420629356673e-07, "loss": 0.0219, "step": 318150 }, { "epoch": 3.3993268871200386, "grad_norm": 0.4936942458152771, "learning_rate": 8.654305961165939e-07, "loss": 0.0094, "step": 318160 }, { "epoch": 3.399433730434318, "grad_norm": 0.002387280808761716, "learning_rate": 8.654191288849217e-07, "loss": 0.0084, "step": 318170 }, { "epoch": 3.3995405737485975, "grad_norm": 6.056436061859131, "learning_rate": 8.654076612406638e-07, "loss": 0.0101, "step": 318180 }, { "epoch": 3.3996474170628774, "grad_norm": 0.6041319370269775, "learning_rate": 8.653961931838329e-07, "loss": 0.0095, "step": 318190 }, { "epoch": 3.399754260377157, "grad_norm": 0.01933100074529648, "learning_rate": 8.653847247144422e-07, "loss": 0.0138, "step": 318200 }, { "epoch": 3.3998611036914363, "grad_norm": 0.09451152384281158, "learning_rate": 8.653732558325046e-07, "loss": 0.0176, "step": 318210 }, { "epoch": 3.3999679470057163, "grad_norm": 1.250275731086731, "learning_rate": 8.653617865380332e-07, "loss": 0.0108, "step": 318220 }, { "epoch": 3.4000747903199957, "grad_norm": 0.09317398071289062, "learning_rate": 8.653503168310405e-07, "loss": 0.0138, "step": 318230 }, { "epoch": 3.400181633634275, "grad_norm": 0.6071431636810303, "learning_rate": 8.653388467115399e-07, "loss": 0.0084, "step": 318240 }, { "epoch": 3.400288476948555, "grad_norm": 0.003953482490032911, "learning_rate": 8.65327376179544e-07, "loss": 0.0135, "step": 318250 }, { "epoch": 3.4003953202628345, "grad_norm": 0.009720713831484318, "learning_rate": 8.65315905235066e-07, "loss": 0.0075, "step": 318260 }, { "epoch": 3.400502163577114, "grad_norm": 0.001886590849608183, "learning_rate": 8.653044338781188e-07, "loss": 0.0013, "step": 318270 }, { "epoch": 3.400609006891394, "grad_norm": 1.6295615434646606, "learning_rate": 8.652929621087153e-07, "loss": 0.0038, "step": 318280 }, { "epoch": 3.4007158502056734, "grad_norm": 0.46370935440063477, "learning_rate": 8.652814899268684e-07, "loss": 0.0266, "step": 318290 }, { "epoch": 3.400822693519953, "grad_norm": 0.22448070347309113, "learning_rate": 8.652700173325913e-07, "loss": 0.0521, "step": 318300 }, { "epoch": 3.4009295368342327, "grad_norm": 0.2504275441169739, "learning_rate": 8.652585443258966e-07, "loss": 0.0151, "step": 318310 }, { "epoch": 3.401036380148512, "grad_norm": 0.004077857825905085, "learning_rate": 8.652470709067976e-07, "loss": 0.0124, "step": 318320 }, { "epoch": 3.4011432234627916, "grad_norm": 0.010628631338477135, "learning_rate": 8.652355970753069e-07, "loss": 0.0261, "step": 318330 }, { "epoch": 3.4012500667770715, "grad_norm": 3.5187623500823975, "learning_rate": 8.652241228314378e-07, "loss": 0.0087, "step": 318340 }, { "epoch": 3.401356910091351, "grad_norm": 0.04538898169994354, "learning_rate": 8.652126481752031e-07, "loss": 0.0322, "step": 318350 }, { "epoch": 3.4014637534056305, "grad_norm": 0.6223954558372498, "learning_rate": 8.652011731066156e-07, "loss": 0.0076, "step": 318360 }, { "epoch": 3.4015705967199104, "grad_norm": 3.9635722637176514, "learning_rate": 8.651896976256886e-07, "loss": 0.0024, "step": 318370 }, { "epoch": 3.40167744003419, "grad_norm": 0.011762542650103569, "learning_rate": 8.651782217324349e-07, "loss": 0.0009, "step": 318380 }, { "epoch": 3.4017842833484693, "grad_norm": 0.01752917468547821, "learning_rate": 8.651667454268672e-07, "loss": 0.002, "step": 318390 }, { "epoch": 3.401891126662749, "grad_norm": 0.007523019798099995, "learning_rate": 8.651552687089988e-07, "loss": 0.0819, "step": 318400 }, { "epoch": 3.4019979699770286, "grad_norm": 0.007161903660744429, "learning_rate": 8.651437915788425e-07, "loss": 0.0033, "step": 318410 }, { "epoch": 3.4021048132913085, "grad_norm": 0.12200099229812622, "learning_rate": 8.651323140364114e-07, "loss": 0.0032, "step": 318420 }, { "epoch": 3.402211656605588, "grad_norm": 0.00869088713079691, "learning_rate": 8.651208360817184e-07, "loss": 0.001, "step": 318430 }, { "epoch": 3.4023184999198675, "grad_norm": 0.09713578969240189, "learning_rate": 8.651093577147763e-07, "loss": 0.0003, "step": 318440 }, { "epoch": 3.402425343234147, "grad_norm": 0.3970505893230438, "learning_rate": 8.650978789355982e-07, "loss": 0.0066, "step": 318450 }, { "epoch": 3.402532186548427, "grad_norm": 1.9581490755081177, "learning_rate": 8.650863997441971e-07, "loss": 0.0094, "step": 318460 }, { "epoch": 3.4026390298627063, "grad_norm": 0.0026836739853024483, "learning_rate": 8.650749201405857e-07, "loss": 0.0044, "step": 318470 }, { "epoch": 3.402745873176986, "grad_norm": 2.7751317024230957, "learning_rate": 8.650634401247774e-07, "loss": 0.0238, "step": 318480 }, { "epoch": 3.4028527164912656, "grad_norm": 0.0028003451880067587, "learning_rate": 8.650519596967846e-07, "loss": 0.01, "step": 318490 }, { "epoch": 3.402959559805545, "grad_norm": 0.2604607343673706, "learning_rate": 8.65040478856621e-07, "loss": 0.006, "step": 318500 }, { "epoch": 3.4030664031198246, "grad_norm": 0.2831782400608063, "learning_rate": 8.65028997604299e-07, "loss": 0.0035, "step": 318510 }, { "epoch": 3.4031732464341045, "grad_norm": 0.00233267480507493, "learning_rate": 8.650175159398316e-07, "loss": 0.0121, "step": 318520 }, { "epoch": 3.403280089748384, "grad_norm": 1.7718876600265503, "learning_rate": 8.650060338632319e-07, "loss": 0.0257, "step": 318530 }, { "epoch": 3.403386933062664, "grad_norm": 0.022601313889026642, "learning_rate": 8.64994551374513e-07, "loss": 0.0254, "step": 318540 }, { "epoch": 3.4034937763769433, "grad_norm": 1.8779948949813843, "learning_rate": 8.649830684736876e-07, "loss": 0.0097, "step": 318550 }, { "epoch": 3.4036006196912227, "grad_norm": 0.12793469429016113, "learning_rate": 8.649715851607686e-07, "loss": 0.0359, "step": 318560 }, { "epoch": 3.403707463005502, "grad_norm": 1.5558922290802002, "learning_rate": 8.649601014357695e-07, "loss": 0.0249, "step": 318570 }, { "epoch": 3.403814306319782, "grad_norm": 0.07674992829561234, "learning_rate": 8.649486172987026e-07, "loss": 0.0148, "step": 318580 }, { "epoch": 3.4039211496340616, "grad_norm": 0.008012944832444191, "learning_rate": 8.649371327495814e-07, "loss": 0.0054, "step": 318590 }, { "epoch": 3.4040279929483415, "grad_norm": 1.7384024858474731, "learning_rate": 8.649256477884184e-07, "loss": 0.0039, "step": 318600 }, { "epoch": 3.404134836262621, "grad_norm": 0.024700047448277473, "learning_rate": 8.64914162415227e-07, "loss": 0.0133, "step": 318610 }, { "epoch": 3.4042416795769004, "grad_norm": 0.9692399501800537, "learning_rate": 8.649026766300198e-07, "loss": 0.0067, "step": 318620 }, { "epoch": 3.40434852289118, "grad_norm": 0.004799500573426485, "learning_rate": 8.6489119043281e-07, "loss": 0.0079, "step": 318630 }, { "epoch": 3.4044553662054597, "grad_norm": 2.812087297439575, "learning_rate": 8.648797038236104e-07, "loss": 0.0052, "step": 318640 }, { "epoch": 3.404562209519739, "grad_norm": 2.8022234439849854, "learning_rate": 8.648682168024343e-07, "loss": 0.0078, "step": 318650 }, { "epoch": 3.404669052834019, "grad_norm": 1.0634765625, "learning_rate": 8.648567293692943e-07, "loss": 0.0028, "step": 318660 }, { "epoch": 3.4047758961482986, "grad_norm": 0.13484430313110352, "learning_rate": 8.648452415242036e-07, "loss": 0.0076, "step": 318670 }, { "epoch": 3.404882739462578, "grad_norm": 5.017117977142334, "learning_rate": 8.648337532671751e-07, "loss": 0.0111, "step": 318680 }, { "epoch": 3.404989582776858, "grad_norm": 0.0021031068172305822, "learning_rate": 8.648222645982215e-07, "loss": 0.0056, "step": 318690 }, { "epoch": 3.4050964260911374, "grad_norm": 4.388652324676514, "learning_rate": 8.648107755173563e-07, "loss": 0.0018, "step": 318700 }, { "epoch": 3.405203269405417, "grad_norm": 0.0264667309820652, "learning_rate": 8.64799286024592e-07, "loss": 0.0009, "step": 318710 }, { "epoch": 3.4053101127196967, "grad_norm": 17.815950393676758, "learning_rate": 8.64787796119942e-07, "loss": 0.022, "step": 318720 }, { "epoch": 3.405416956033976, "grad_norm": 4.71790075302124, "learning_rate": 8.64776305803419e-07, "loss": 0.0111, "step": 318730 }, { "epoch": 3.4055237993482557, "grad_norm": 0.5801549553871155, "learning_rate": 8.647648150750359e-07, "loss": 0.0091, "step": 318740 }, { "epoch": 3.4056306426625356, "grad_norm": 2.97772216796875, "learning_rate": 8.647533239348059e-07, "loss": 0.012, "step": 318750 }, { "epoch": 3.405737485976815, "grad_norm": 0.0010179654927924275, "learning_rate": 8.647418323827418e-07, "loss": 0.0047, "step": 318760 }, { "epoch": 3.4058443292910945, "grad_norm": 4.447122573852539, "learning_rate": 8.647303404188566e-07, "loss": 0.0088, "step": 318770 }, { "epoch": 3.4059511726053744, "grad_norm": 0.02036832831799984, "learning_rate": 8.647188480431636e-07, "loss": 0.0199, "step": 318780 }, { "epoch": 3.406058015919654, "grad_norm": 0.12535762786865234, "learning_rate": 8.647073552556752e-07, "loss": 0.02, "step": 318790 }, { "epoch": 3.4061648592339333, "grad_norm": 1.371103048324585, "learning_rate": 8.646958620564046e-07, "loss": 0.004, "step": 318800 }, { "epoch": 3.406271702548213, "grad_norm": 0.025795400142669678, "learning_rate": 8.646843684453651e-07, "loss": 0.0075, "step": 318810 }, { "epoch": 3.4063785458624927, "grad_norm": 1.4838223457336426, "learning_rate": 8.646728744225693e-07, "loss": 0.0026, "step": 318820 }, { "epoch": 3.406485389176772, "grad_norm": 0.15737731754779816, "learning_rate": 8.646613799880303e-07, "loss": 0.0004, "step": 318830 }, { "epoch": 3.406592232491052, "grad_norm": 0.012557287700474262, "learning_rate": 8.646498851417612e-07, "loss": 0.0345, "step": 318840 }, { "epoch": 3.4066990758053315, "grad_norm": 0.0054102446883916855, "learning_rate": 8.646383898837748e-07, "loss": 0.004, "step": 318850 }, { "epoch": 3.406805919119611, "grad_norm": 0.4167221486568451, "learning_rate": 8.64626894214084e-07, "loss": 0.0184, "step": 318860 }, { "epoch": 3.406912762433891, "grad_norm": 0.006274552550166845, "learning_rate": 8.64615398132702e-07, "loss": 0.0091, "step": 318870 }, { "epoch": 3.4070196057481703, "grad_norm": 0.015873407945036888, "learning_rate": 8.646039016396418e-07, "loss": 0.009, "step": 318880 }, { "epoch": 3.4071264490624498, "grad_norm": 2.6664865016937256, "learning_rate": 8.645924047349161e-07, "loss": 0.0009, "step": 318890 }, { "epoch": 3.4072332923767297, "grad_norm": 0.20095698535442352, "learning_rate": 8.645809074185382e-07, "loss": 0.0011, "step": 318900 }, { "epoch": 3.407340135691009, "grad_norm": 0.08527667820453644, "learning_rate": 8.64569409690521e-07, "loss": 0.0198, "step": 318910 }, { "epoch": 3.4074469790052886, "grad_norm": 0.12233483791351318, "learning_rate": 8.645579115508772e-07, "loss": 0.0156, "step": 318920 }, { "epoch": 3.4075538223195685, "grad_norm": 0.2270893156528473, "learning_rate": 8.645464129996201e-07, "loss": 0.0026, "step": 318930 }, { "epoch": 3.407660665633848, "grad_norm": 5.3600287437438965, "learning_rate": 8.645349140367626e-07, "loss": 0.0083, "step": 318940 }, { "epoch": 3.4077675089481274, "grad_norm": 0.3016112148761749, "learning_rate": 8.645234146623177e-07, "loss": 0.0237, "step": 318950 }, { "epoch": 3.4078743522624073, "grad_norm": 0.1045146957039833, "learning_rate": 8.645119148762983e-07, "loss": 0.0137, "step": 318960 }, { "epoch": 3.4079811955766868, "grad_norm": 0.11304952949285507, "learning_rate": 8.645004146787174e-07, "loss": 0.002, "step": 318970 }, { "epoch": 3.4080880388909662, "grad_norm": 0.17088429629802704, "learning_rate": 8.644889140695882e-07, "loss": 0.0463, "step": 318980 }, { "epoch": 3.408194882205246, "grad_norm": 0.009544910863041878, "learning_rate": 8.644774130489233e-07, "loss": 0.0416, "step": 318990 }, { "epoch": 3.4083017255195256, "grad_norm": 0.008095482364296913, "learning_rate": 8.64465911616736e-07, "loss": 0.0034, "step": 319000 }, { "epoch": 3.408408568833805, "grad_norm": 0.01623447798192501, "learning_rate": 8.644544097730391e-07, "loss": 0.002, "step": 319010 }, { "epoch": 3.408515412148085, "grad_norm": 4.853875160217285, "learning_rate": 8.644429075178458e-07, "loss": 0.0543, "step": 319020 }, { "epoch": 3.4086222554623644, "grad_norm": 6.437677383422852, "learning_rate": 8.644314048511688e-07, "loss": 0.028, "step": 319030 }, { "epoch": 3.408729098776644, "grad_norm": 2.691300392150879, "learning_rate": 8.644199017730213e-07, "loss": 0.0022, "step": 319040 }, { "epoch": 3.4088359420909238, "grad_norm": 0.1463439017534256, "learning_rate": 8.644083982834163e-07, "loss": 0.0071, "step": 319050 }, { "epoch": 3.4089427854052032, "grad_norm": 0.9512314796447754, "learning_rate": 8.643968943823667e-07, "loss": 0.0165, "step": 319060 }, { "epoch": 3.4090496287194827, "grad_norm": 0.01046688947826624, "learning_rate": 8.643853900698854e-07, "loss": 0.0027, "step": 319070 }, { "epoch": 3.4091564720337626, "grad_norm": 3.450881242752075, "learning_rate": 8.643738853459856e-07, "loss": 0.02, "step": 319080 }, { "epoch": 3.409263315348042, "grad_norm": 0.34148138761520386, "learning_rate": 8.643623802106801e-07, "loss": 0.0115, "step": 319090 }, { "epoch": 3.4093701586623215, "grad_norm": 0.5570899844169617, "learning_rate": 8.643508746639821e-07, "loss": 0.0032, "step": 319100 }, { "epoch": 3.4094770019766014, "grad_norm": 0.01584015041589737, "learning_rate": 8.643393687059042e-07, "loss": 0.0009, "step": 319110 }, { "epoch": 3.409583845290881, "grad_norm": 0.4755003750324249, "learning_rate": 8.643278623364598e-07, "loss": 0.0028, "step": 319120 }, { "epoch": 3.409690688605161, "grad_norm": 0.07641211152076721, "learning_rate": 8.643163555556618e-07, "loss": 0.0152, "step": 319130 }, { "epoch": 3.4097975319194402, "grad_norm": 0.9018752574920654, "learning_rate": 8.643048483635232e-07, "loss": 0.0289, "step": 319140 }, { "epoch": 3.4099043752337197, "grad_norm": 1.7172296047210693, "learning_rate": 8.642933407600568e-07, "loss": 0.0144, "step": 319150 }, { "epoch": 3.410011218547999, "grad_norm": 0.006916776765137911, "learning_rate": 8.642818327452758e-07, "loss": 0.003, "step": 319160 }, { "epoch": 3.410118061862279, "grad_norm": 0.0865246132016182, "learning_rate": 8.642703243191931e-07, "loss": 0.0027, "step": 319170 }, { "epoch": 3.4102249051765585, "grad_norm": 0.013555346056818962, "learning_rate": 8.642588154818217e-07, "loss": 0.0058, "step": 319180 }, { "epoch": 3.4103317484908384, "grad_norm": 1.7506986856460571, "learning_rate": 8.642473062331745e-07, "loss": 0.0209, "step": 319190 }, { "epoch": 3.410438591805118, "grad_norm": 8.141176223754883, "learning_rate": 8.642357965732648e-07, "loss": 0.0342, "step": 319200 }, { "epoch": 3.4105454351193973, "grad_norm": 0.012159882113337517, "learning_rate": 8.642242865021052e-07, "loss": 0.0002, "step": 319210 }, { "epoch": 3.410652278433677, "grad_norm": 0.0024304217658936977, "learning_rate": 8.642127760197091e-07, "loss": 0.0073, "step": 319220 }, { "epoch": 3.4107591217479567, "grad_norm": 29.19202423095703, "learning_rate": 8.642012651260892e-07, "loss": 0.0342, "step": 319230 }, { "epoch": 3.410865965062236, "grad_norm": 6.591669082641602, "learning_rate": 8.641897538212586e-07, "loss": 0.0094, "step": 319240 }, { "epoch": 3.410972808376516, "grad_norm": 0.8606843948364258, "learning_rate": 8.641782421052302e-07, "loss": 0.0287, "step": 319250 }, { "epoch": 3.4110796516907955, "grad_norm": 4.353621959686279, "learning_rate": 8.641667299780171e-07, "loss": 0.0033, "step": 319260 }, { "epoch": 3.411186495005075, "grad_norm": 3.1810736656188965, "learning_rate": 8.641552174396322e-07, "loss": 0.0069, "step": 319270 }, { "epoch": 3.4112933383193544, "grad_norm": 0.0035136353690177202, "learning_rate": 8.641437044900888e-07, "loss": 0.0152, "step": 319280 }, { "epoch": 3.4114001816336343, "grad_norm": 0.07146719843149185, "learning_rate": 8.641321911293995e-07, "loss": 0.0118, "step": 319290 }, { "epoch": 3.411507024947914, "grad_norm": 0.0021513481624424458, "learning_rate": 8.641206773575775e-07, "loss": 0.01, "step": 319300 }, { "epoch": 3.4116138682621937, "grad_norm": 0.09112540632486343, "learning_rate": 8.641091631746358e-07, "loss": 0.003, "step": 319310 }, { "epoch": 3.411720711576473, "grad_norm": 0.06251275539398193, "learning_rate": 8.640976485805874e-07, "loss": 0.0017, "step": 319320 }, { "epoch": 3.4118275548907526, "grad_norm": 3.1803386211395264, "learning_rate": 8.640861335754453e-07, "loss": 0.0045, "step": 319330 }, { "epoch": 3.411934398205032, "grad_norm": 0.018109191209077835, "learning_rate": 8.640746181592223e-07, "loss": 0.0075, "step": 319340 }, { "epoch": 3.412041241519312, "grad_norm": 16.98862648010254, "learning_rate": 8.640631023319317e-07, "loss": 0.0208, "step": 319350 }, { "epoch": 3.4121480848335914, "grad_norm": 1.3871967792510986, "learning_rate": 8.640515860935864e-07, "loss": 0.0135, "step": 319360 }, { "epoch": 3.4122549281478713, "grad_norm": 0.013958766125142574, "learning_rate": 8.640400694441994e-07, "loss": 0.0314, "step": 319370 }, { "epoch": 3.412361771462151, "grad_norm": 0.0015867705224081874, "learning_rate": 8.640285523837836e-07, "loss": 0.0083, "step": 319380 }, { "epoch": 3.4124686147764303, "grad_norm": 5.420103549957275, "learning_rate": 8.640170349123521e-07, "loss": 0.0196, "step": 319390 }, { "epoch": 3.41257545809071, "grad_norm": 9.93449592590332, "learning_rate": 8.640055170299178e-07, "loss": 0.0222, "step": 319400 }, { "epoch": 3.4126823014049896, "grad_norm": 0.8083789944648743, "learning_rate": 8.639939987364941e-07, "loss": 0.0014, "step": 319410 }, { "epoch": 3.412789144719269, "grad_norm": 2.4683609008789062, "learning_rate": 8.639824800320935e-07, "loss": 0.0334, "step": 319420 }, { "epoch": 3.412895988033549, "grad_norm": 0.049704913049936295, "learning_rate": 8.639709609167293e-07, "loss": 0.0058, "step": 319430 }, { "epoch": 3.4130028313478284, "grad_norm": 2.8803842067718506, "learning_rate": 8.639594413904144e-07, "loss": 0.0051, "step": 319440 }, { "epoch": 3.413109674662108, "grad_norm": 0.7220811247825623, "learning_rate": 8.639479214531617e-07, "loss": 0.0079, "step": 319450 }, { "epoch": 3.413216517976388, "grad_norm": 0.004454278387129307, "learning_rate": 8.639364011049843e-07, "loss": 0.0037, "step": 319460 }, { "epoch": 3.4133233612906673, "grad_norm": 0.1307520717382431, "learning_rate": 8.639248803458954e-07, "loss": 0.0042, "step": 319470 }, { "epoch": 3.4134302046049467, "grad_norm": 0.5925087928771973, "learning_rate": 8.639133591759077e-07, "loss": 0.0505, "step": 319480 }, { "epoch": 3.4135370479192266, "grad_norm": 2.0217690467834473, "learning_rate": 8.639018375950345e-07, "loss": 0.0047, "step": 319490 }, { "epoch": 3.413643891233506, "grad_norm": 0.002412592526525259, "learning_rate": 8.638903156032886e-07, "loss": 0.0056, "step": 319500 }, { "epoch": 3.4137507345477855, "grad_norm": 0.0046017407439649105, "learning_rate": 8.638787932006831e-07, "loss": 0.0112, "step": 319510 }, { "epoch": 3.4138575778620655, "grad_norm": 0.08132946491241455, "learning_rate": 8.638672703872309e-07, "loss": 0.0022, "step": 319520 }, { "epoch": 3.413964421176345, "grad_norm": 8.323445320129395, "learning_rate": 8.638557471629452e-07, "loss": 0.0109, "step": 319530 }, { "epoch": 3.4140712644906244, "grad_norm": 6.766885280609131, "learning_rate": 8.63844223527839e-07, "loss": 0.0525, "step": 319540 }, { "epoch": 3.4141781078049043, "grad_norm": 8.894222259521484, "learning_rate": 8.638326994819249e-07, "loss": 0.0037, "step": 319550 }, { "epoch": 3.4142849511191837, "grad_norm": 0.47530245780944824, "learning_rate": 8.638211750252163e-07, "loss": 0.0067, "step": 319560 }, { "epoch": 3.414391794433463, "grad_norm": 0.07356344908475876, "learning_rate": 8.638096501577264e-07, "loss": 0.0044, "step": 319570 }, { "epoch": 3.414498637747743, "grad_norm": 0.013230865821242332, "learning_rate": 8.637981248794676e-07, "loss": 0.0024, "step": 319580 }, { "epoch": 3.4146054810620226, "grad_norm": 4.301679611206055, "learning_rate": 8.637865991904534e-07, "loss": 0.0123, "step": 319590 }, { "epoch": 3.414712324376302, "grad_norm": 0.031839653849601746, "learning_rate": 8.637750730906968e-07, "loss": 0.005, "step": 319600 }, { "epoch": 3.414819167690582, "grad_norm": 0.6719170808792114, "learning_rate": 8.637635465802105e-07, "loss": 0.0165, "step": 319610 }, { "epoch": 3.4149260110048614, "grad_norm": 0.04236897826194763, "learning_rate": 8.637520196590078e-07, "loss": 0.005, "step": 319620 }, { "epoch": 3.415032854319141, "grad_norm": 0.03445569425821304, "learning_rate": 8.637404923271015e-07, "loss": 0.036, "step": 319630 }, { "epoch": 3.4151396976334207, "grad_norm": 0.016813794150948524, "learning_rate": 8.637289645845048e-07, "loss": 0.0133, "step": 319640 }, { "epoch": 3.4152465409477, "grad_norm": 0.08322998881340027, "learning_rate": 8.637174364312307e-07, "loss": 0.0233, "step": 319650 }, { "epoch": 3.4153533842619797, "grad_norm": 4.064055442810059, "learning_rate": 8.637059078672921e-07, "loss": 0.0095, "step": 319660 }, { "epoch": 3.4154602275762596, "grad_norm": 0.046468231827020645, "learning_rate": 8.636943788927021e-07, "loss": 0.0233, "step": 319670 }, { "epoch": 3.415567070890539, "grad_norm": 0.019816769286990166, "learning_rate": 8.636828495074738e-07, "loss": 0.0128, "step": 319680 }, { "epoch": 3.4156739142048185, "grad_norm": 0.006732237990945578, "learning_rate": 8.6367131971162e-07, "loss": 0.0237, "step": 319690 }, { "epoch": 3.4157807575190984, "grad_norm": 0.10406719893217087, "learning_rate": 8.636597895051538e-07, "loss": 0.0055, "step": 319700 }, { "epoch": 3.415887600833378, "grad_norm": 0.09673932194709778, "learning_rate": 8.636482588880884e-07, "loss": 0.0342, "step": 319710 }, { "epoch": 3.4159944441476573, "grad_norm": 2.202763795852661, "learning_rate": 8.636367278604366e-07, "loss": 0.0404, "step": 319720 }, { "epoch": 3.416101287461937, "grad_norm": 0.0052830008789896965, "learning_rate": 8.636251964222114e-07, "loss": 0.0223, "step": 319730 }, { "epoch": 3.4162081307762167, "grad_norm": 0.4445548355579376, "learning_rate": 8.636136645734259e-07, "loss": 0.0233, "step": 319740 }, { "epoch": 3.416314974090496, "grad_norm": 0.3707726299762726, "learning_rate": 8.636021323140931e-07, "loss": 0.0258, "step": 319750 }, { "epoch": 3.416421817404776, "grad_norm": 0.035402290523052216, "learning_rate": 8.635905996442263e-07, "loss": 0.0095, "step": 319760 }, { "epoch": 3.4165286607190555, "grad_norm": 0.0012266759295016527, "learning_rate": 8.63579066563838e-07, "loss": 0.0096, "step": 319770 }, { "epoch": 3.416635504033335, "grad_norm": 0.15259060263633728, "learning_rate": 8.635675330729418e-07, "loss": 0.0167, "step": 319780 }, { "epoch": 3.416742347347615, "grad_norm": 0.0033927736803889275, "learning_rate": 8.635559991715502e-07, "loss": 0.0041, "step": 319790 }, { "epoch": 3.4168491906618943, "grad_norm": 0.013226233422756195, "learning_rate": 8.635444648596765e-07, "loss": 0.0066, "step": 319800 }, { "epoch": 3.4169560339761738, "grad_norm": 3.9641425609588623, "learning_rate": 8.635329301373337e-07, "loss": 0.029, "step": 319810 }, { "epoch": 3.4170628772904537, "grad_norm": 0.31035181879997253, "learning_rate": 8.635213950045346e-07, "loss": 0.0206, "step": 319820 }, { "epoch": 3.417169720604733, "grad_norm": 2.8454809188842773, "learning_rate": 8.635098594612926e-07, "loss": 0.009, "step": 319830 }, { "epoch": 3.4172765639190126, "grad_norm": 0.015613403171300888, "learning_rate": 8.634983235076204e-07, "loss": 0.0146, "step": 319840 }, { "epoch": 3.4173834072332925, "grad_norm": 0.004861853551119566, "learning_rate": 8.634867871435312e-07, "loss": 0.0098, "step": 319850 }, { "epoch": 3.417490250547572, "grad_norm": 0.8208891749382019, "learning_rate": 8.634752503690381e-07, "loss": 0.0155, "step": 319860 }, { "epoch": 3.4175970938618514, "grad_norm": 0.020166367292404175, "learning_rate": 8.634637131841538e-07, "loss": 0.0357, "step": 319870 }, { "epoch": 3.4177039371761313, "grad_norm": 0.7452124953269958, "learning_rate": 8.634521755888917e-07, "loss": 0.0103, "step": 319880 }, { "epoch": 3.4178107804904108, "grad_norm": 0.05547662079334259, "learning_rate": 8.634406375832645e-07, "loss": 0.0148, "step": 319890 }, { "epoch": 3.4179176238046907, "grad_norm": 0.3906770646572113, "learning_rate": 8.634290991672857e-07, "loss": 0.0454, "step": 319900 }, { "epoch": 3.41802446711897, "grad_norm": 0.017601093277335167, "learning_rate": 8.634175603409676e-07, "loss": 0.0017, "step": 319910 }, { "epoch": 3.4181313104332496, "grad_norm": 0.0042296177707612514, "learning_rate": 8.63406021104324e-07, "loss": 0.0087, "step": 319920 }, { "epoch": 3.418238153747529, "grad_norm": 0.03692726045846939, "learning_rate": 8.633944814573675e-07, "loss": 0.0044, "step": 319930 }, { "epoch": 3.418344997061809, "grad_norm": 4.234769344329834, "learning_rate": 8.633829414001111e-07, "loss": 0.0153, "step": 319940 }, { "epoch": 3.4184518403760884, "grad_norm": 5.3187103271484375, "learning_rate": 8.633714009325679e-07, "loss": 0.0181, "step": 319950 }, { "epoch": 3.4185586836903683, "grad_norm": 6.026187419891357, "learning_rate": 8.633598600547512e-07, "loss": 0.0024, "step": 319960 }, { "epoch": 3.4186655270046478, "grad_norm": 0.03817081078886986, "learning_rate": 8.633483187666734e-07, "loss": 0.0011, "step": 319970 }, { "epoch": 3.4187723703189272, "grad_norm": 0.03303069993853569, "learning_rate": 8.633367770683483e-07, "loss": 0.0208, "step": 319980 }, { "epoch": 3.4188792136332067, "grad_norm": 3.0137369632720947, "learning_rate": 8.633252349597884e-07, "loss": 0.0493, "step": 319990 }, { "epoch": 3.4189860569474866, "grad_norm": 5.303791522979736, "learning_rate": 8.633136924410069e-07, "loss": 0.0161, "step": 320000 }, { "epoch": 3.419092900261766, "grad_norm": 1.6055351495742798, "learning_rate": 8.633021495120168e-07, "loss": 0.0241, "step": 320010 }, { "epoch": 3.419199743576046, "grad_norm": 0.003425952512770891, "learning_rate": 8.632906061728312e-07, "loss": 0.0039, "step": 320020 }, { "epoch": 3.4193065868903254, "grad_norm": 0.07861945778131485, "learning_rate": 8.632790624234631e-07, "loss": 0.013, "step": 320030 }, { "epoch": 3.419413430204605, "grad_norm": 4.029375076293945, "learning_rate": 8.632675182639255e-07, "loss": 0.0063, "step": 320040 }, { "epoch": 3.4195202735188843, "grad_norm": 0.002158272312954068, "learning_rate": 8.632559736942314e-07, "loss": 0.0102, "step": 320050 }, { "epoch": 3.4196271168331642, "grad_norm": 0.024658890441060066, "learning_rate": 8.63244428714394e-07, "loss": 0.0049, "step": 320060 }, { "epoch": 3.4197339601474437, "grad_norm": 0.2319907546043396, "learning_rate": 8.632328833244261e-07, "loss": 0.0474, "step": 320070 }, { "epoch": 3.4198408034617236, "grad_norm": 0.00732172466814518, "learning_rate": 8.632213375243409e-07, "loss": 0.0031, "step": 320080 }, { "epoch": 3.419947646776003, "grad_norm": 3.2529029846191406, "learning_rate": 8.632097913141514e-07, "loss": 0.0275, "step": 320090 }, { "epoch": 3.4200544900902825, "grad_norm": 0.00542735168710351, "learning_rate": 8.631982446938706e-07, "loss": 0.0066, "step": 320100 }, { "epoch": 3.420161333404562, "grad_norm": 0.012756133452057838, "learning_rate": 8.631866976635117e-07, "loss": 0.0018, "step": 320110 }, { "epoch": 3.420268176718842, "grad_norm": 0.002580344444140792, "learning_rate": 8.631751502230875e-07, "loss": 0.0085, "step": 320120 }, { "epoch": 3.4203750200331213, "grad_norm": 0.04474975913763046, "learning_rate": 8.631636023726112e-07, "loss": 0.0092, "step": 320130 }, { "epoch": 3.4204818633474012, "grad_norm": 0.034534480422735214, "learning_rate": 8.631520541120957e-07, "loss": 0.0239, "step": 320140 }, { "epoch": 3.4205887066616807, "grad_norm": 17.31915283203125, "learning_rate": 8.631405054415542e-07, "loss": 0.0271, "step": 320150 }, { "epoch": 3.42069554997596, "grad_norm": 1.2388240098953247, "learning_rate": 8.631289563609996e-07, "loss": 0.0033, "step": 320160 }, { "epoch": 3.42080239329024, "grad_norm": 0.008264165371656418, "learning_rate": 8.63117406870445e-07, "loss": 0.0195, "step": 320170 }, { "epoch": 3.4209092366045195, "grad_norm": 0.05864087492227554, "learning_rate": 8.631058569699036e-07, "loss": 0.0104, "step": 320180 }, { "epoch": 3.421016079918799, "grad_norm": 1.3674296140670776, "learning_rate": 8.630943066593881e-07, "loss": 0.0017, "step": 320190 }, { "epoch": 3.421122923233079, "grad_norm": 0.028997935354709625, "learning_rate": 8.630827559389117e-07, "loss": 0.0022, "step": 320200 }, { "epoch": 3.4212297665473583, "grad_norm": 0.43068161606788635, "learning_rate": 8.630712048084876e-07, "loss": 0.0292, "step": 320210 }, { "epoch": 3.421336609861638, "grad_norm": 4.356192588806152, "learning_rate": 8.630596532681287e-07, "loss": 0.0146, "step": 320220 }, { "epoch": 3.4214434531759177, "grad_norm": 0.0003000754804816097, "learning_rate": 8.63048101317848e-07, "loss": 0.0072, "step": 320230 }, { "epoch": 3.421550296490197, "grad_norm": 1.4629842042922974, "learning_rate": 8.630365489576586e-07, "loss": 0.0005, "step": 320240 }, { "epoch": 3.4216571398044766, "grad_norm": 0.17734071612358093, "learning_rate": 8.630249961875735e-07, "loss": 0.007, "step": 320250 }, { "epoch": 3.4217639831187565, "grad_norm": 0.002749011851847172, "learning_rate": 8.630134430076059e-07, "loss": 0.0187, "step": 320260 }, { "epoch": 3.421870826433036, "grad_norm": 1.0818661451339722, "learning_rate": 8.630018894177686e-07, "loss": 0.0106, "step": 320270 }, { "epoch": 3.4219776697473154, "grad_norm": 0.49615275859832764, "learning_rate": 8.629903354180747e-07, "loss": 0.0024, "step": 320280 }, { "epoch": 3.4220845130615953, "grad_norm": 1.9835586547851562, "learning_rate": 8.629787810085376e-07, "loss": 0.0034, "step": 320290 }, { "epoch": 3.422191356375875, "grad_norm": 0.053320515900850296, "learning_rate": 8.629672261891698e-07, "loss": 0.0114, "step": 320300 }, { "epoch": 3.4222981996901543, "grad_norm": 2.445976734161377, "learning_rate": 8.629556709599847e-07, "loss": 0.0246, "step": 320310 }, { "epoch": 3.422405043004434, "grad_norm": 2.142601728439331, "learning_rate": 8.629441153209953e-07, "loss": 0.0083, "step": 320320 }, { "epoch": 3.4225118863187136, "grad_norm": 4.47276496887207, "learning_rate": 8.629325592722146e-07, "loss": 0.0101, "step": 320330 }, { "epoch": 3.422618729632993, "grad_norm": 0.06338124722242355, "learning_rate": 8.629210028136555e-07, "loss": 0.0131, "step": 320340 }, { "epoch": 3.422725572947273, "grad_norm": 0.011357621289789677, "learning_rate": 8.629094459453314e-07, "loss": 0.0088, "step": 320350 }, { "epoch": 3.4228324162615524, "grad_norm": 0.09212006628513336, "learning_rate": 8.62897888667255e-07, "loss": 0.0108, "step": 320360 }, { "epoch": 3.422939259575832, "grad_norm": 0.001378397922962904, "learning_rate": 8.628863309794395e-07, "loss": 0.0146, "step": 320370 }, { "epoch": 3.423046102890112, "grad_norm": 0.28454017639160156, "learning_rate": 8.628747728818979e-07, "loss": 0.0166, "step": 320380 }, { "epoch": 3.4231529462043913, "grad_norm": 0.0037667264696210623, "learning_rate": 8.628632143746435e-07, "loss": 0.0062, "step": 320390 }, { "epoch": 3.4232597895186707, "grad_norm": 5.382299423217773, "learning_rate": 8.628516554576891e-07, "loss": 0.0161, "step": 320400 }, { "epoch": 3.4233666328329506, "grad_norm": 0.547978937625885, "learning_rate": 8.628400961310475e-07, "loss": 0.0415, "step": 320410 }, { "epoch": 3.42347347614723, "grad_norm": 0.6941152215003967, "learning_rate": 8.628285363947323e-07, "loss": 0.0067, "step": 320420 }, { "epoch": 3.4235803194615095, "grad_norm": 2.1905596256256104, "learning_rate": 8.628169762487563e-07, "loss": 0.0116, "step": 320430 }, { "epoch": 3.4236871627757894, "grad_norm": 0.07140480726957321, "learning_rate": 8.628054156931325e-07, "loss": 0.0046, "step": 320440 }, { "epoch": 3.423794006090069, "grad_norm": 1.2849754095077515, "learning_rate": 8.627938547278741e-07, "loss": 0.0083, "step": 320450 }, { "epoch": 3.4239008494043484, "grad_norm": 0.20518723130226135, "learning_rate": 8.627822933529939e-07, "loss": 0.027, "step": 320460 }, { "epoch": 3.4240076927186283, "grad_norm": 4.68231201171875, "learning_rate": 8.627707315685053e-07, "loss": 0.0062, "step": 320470 }, { "epoch": 3.4241145360329077, "grad_norm": 4.792455196380615, "learning_rate": 8.62759169374421e-07, "loss": 0.0586, "step": 320480 }, { "epoch": 3.424221379347187, "grad_norm": 0.019203241914510727, "learning_rate": 8.627476067707542e-07, "loss": 0.0039, "step": 320490 }, { "epoch": 3.424328222661467, "grad_norm": 12.434507369995117, "learning_rate": 8.627360437575181e-07, "loss": 0.0137, "step": 320500 }, { "epoch": 3.4244350659757465, "grad_norm": 3.999931812286377, "learning_rate": 8.627244803347256e-07, "loss": 0.0197, "step": 320510 }, { "epoch": 3.424541909290026, "grad_norm": 0.07058609277009964, "learning_rate": 8.627129165023896e-07, "loss": 0.008, "step": 320520 }, { "epoch": 3.424648752604306, "grad_norm": 0.3945561349391937, "learning_rate": 8.627013522605236e-07, "loss": 0.0049, "step": 320530 }, { "epoch": 3.4247555959185854, "grad_norm": 0.017706509679555893, "learning_rate": 8.626897876091404e-07, "loss": 0.0195, "step": 320540 }, { "epoch": 3.424862439232865, "grad_norm": 0.0035523090045899153, "learning_rate": 8.626782225482529e-07, "loss": 0.0097, "step": 320550 }, { "epoch": 3.4249692825471447, "grad_norm": 3.1875014305114746, "learning_rate": 8.626666570778742e-07, "loss": 0.0174, "step": 320560 }, { "epoch": 3.425076125861424, "grad_norm": 0.09356838464736938, "learning_rate": 8.626550911980177e-07, "loss": 0.0047, "step": 320570 }, { "epoch": 3.4251829691757036, "grad_norm": 2.342543363571167, "learning_rate": 8.626435249086963e-07, "loss": 0.0151, "step": 320580 }, { "epoch": 3.4252898124899835, "grad_norm": 0.03434007614850998, "learning_rate": 8.626319582099228e-07, "loss": 0.0018, "step": 320590 }, { "epoch": 3.425396655804263, "grad_norm": 0.003960306756198406, "learning_rate": 8.626203911017105e-07, "loss": 0.0105, "step": 320600 }, { "epoch": 3.425503499118543, "grad_norm": 0.0019988168496638536, "learning_rate": 8.626088235840725e-07, "loss": 0.0156, "step": 320610 }, { "epoch": 3.4256103424328224, "grad_norm": 0.25051623582839966, "learning_rate": 8.625972556570215e-07, "loss": 0.0052, "step": 320620 }, { "epoch": 3.425717185747102, "grad_norm": 5.245943069458008, "learning_rate": 8.625856873205711e-07, "loss": 0.0522, "step": 320630 }, { "epoch": 3.4258240290613813, "grad_norm": 0.005797236226499081, "learning_rate": 8.62574118574734e-07, "loss": 0.0046, "step": 320640 }, { "epoch": 3.425930872375661, "grad_norm": 4.528082370758057, "learning_rate": 8.625625494195235e-07, "loss": 0.0307, "step": 320650 }, { "epoch": 3.4260377156899406, "grad_norm": 0.08831567317247391, "learning_rate": 8.625509798549524e-07, "loss": 0.0151, "step": 320660 }, { "epoch": 3.4261445590042205, "grad_norm": 0.3233092427253723, "learning_rate": 8.625394098810338e-07, "loss": 0.0237, "step": 320670 }, { "epoch": 3.4262514023185, "grad_norm": 2.5951662063598633, "learning_rate": 8.625278394977808e-07, "loss": 0.0063, "step": 320680 }, { "epoch": 3.4263582456327795, "grad_norm": 8.227651596069336, "learning_rate": 8.625162687052067e-07, "loss": 0.0325, "step": 320690 }, { "epoch": 3.426465088947059, "grad_norm": 0.558244526386261, "learning_rate": 8.625046975033243e-07, "loss": 0.0089, "step": 320700 }, { "epoch": 3.426571932261339, "grad_norm": 0.029209604486823082, "learning_rate": 8.624931258921467e-07, "loss": 0.0679, "step": 320710 }, { "epoch": 3.4266787755756183, "grad_norm": 0.0037190995644778013, "learning_rate": 8.624815538716871e-07, "loss": 0.0066, "step": 320720 }, { "epoch": 3.426785618889898, "grad_norm": 1.310470700263977, "learning_rate": 8.624699814419583e-07, "loss": 0.0097, "step": 320730 }, { "epoch": 3.4268924622041776, "grad_norm": 0.17841599881649017, "learning_rate": 8.624584086029737e-07, "loss": 0.0221, "step": 320740 }, { "epoch": 3.426999305518457, "grad_norm": 0.09909892082214355, "learning_rate": 8.624468353547461e-07, "loss": 0.0037, "step": 320750 }, { "epoch": 3.4271061488327366, "grad_norm": 0.05305128172039986, "learning_rate": 8.624352616972887e-07, "loss": 0.0142, "step": 320760 }, { "epoch": 3.4272129921470165, "grad_norm": 0.09883926808834076, "learning_rate": 8.624236876306146e-07, "loss": 0.0205, "step": 320770 }, { "epoch": 3.427319835461296, "grad_norm": 0.32095545530319214, "learning_rate": 8.624121131547368e-07, "loss": 0.0063, "step": 320780 }, { "epoch": 3.427426678775576, "grad_norm": 0.9716058373451233, "learning_rate": 8.624005382696683e-07, "loss": 0.0067, "step": 320790 }, { "epoch": 3.4275335220898553, "grad_norm": 0.5407600402832031, "learning_rate": 8.623889629754222e-07, "loss": 0.0069, "step": 320800 }, { "epoch": 3.4276403654041347, "grad_norm": 1.4671807289123535, "learning_rate": 8.623773872720117e-07, "loss": 0.001, "step": 320810 }, { "epoch": 3.427747208718414, "grad_norm": 12.255659103393555, "learning_rate": 8.623658111594499e-07, "loss": 0.0351, "step": 320820 }, { "epoch": 3.427854052032694, "grad_norm": 0.015647418797016144, "learning_rate": 8.623542346377495e-07, "loss": 0.0062, "step": 320830 }, { "epoch": 3.4279608953469736, "grad_norm": 2.588700532913208, "learning_rate": 8.623426577069239e-07, "loss": 0.0293, "step": 320840 }, { "epoch": 3.4280677386612535, "grad_norm": 0.1896834820508957, "learning_rate": 8.623310803669861e-07, "loss": 0.0019, "step": 320850 }, { "epoch": 3.428174581975533, "grad_norm": 0.11430488526821136, "learning_rate": 8.623195026179491e-07, "loss": 0.0041, "step": 320860 }, { "epoch": 3.4282814252898124, "grad_norm": 0.0032751192338764668, "learning_rate": 8.623079244598262e-07, "loss": 0.0259, "step": 320870 }, { "epoch": 3.4283882686040923, "grad_norm": 0.3218691945075989, "learning_rate": 8.622963458926302e-07, "loss": 0.036, "step": 320880 }, { "epoch": 3.4284951119183718, "grad_norm": 0.03416343033313751, "learning_rate": 8.622847669163742e-07, "loss": 0.0095, "step": 320890 }, { "epoch": 3.428601955232651, "grad_norm": 0.002393560716882348, "learning_rate": 8.622731875310716e-07, "loss": 0.0127, "step": 320900 }, { "epoch": 3.428708798546931, "grad_norm": 0.20619148015975952, "learning_rate": 8.62261607736735e-07, "loss": 0.014, "step": 320910 }, { "epoch": 3.4288156418612106, "grad_norm": 0.007868512533605099, "learning_rate": 8.622500275333778e-07, "loss": 0.0269, "step": 320920 }, { "epoch": 3.42892248517549, "grad_norm": 0.017538253217935562, "learning_rate": 8.62238446921013e-07, "loss": 0.011, "step": 320930 }, { "epoch": 3.42902932848977, "grad_norm": 0.22649401426315308, "learning_rate": 8.622268658996536e-07, "loss": 0.0521, "step": 320940 }, { "epoch": 3.4291361718040494, "grad_norm": 0.13347172737121582, "learning_rate": 8.622152844693127e-07, "loss": 0.0067, "step": 320950 }, { "epoch": 3.429243015118329, "grad_norm": 1.2643096446990967, "learning_rate": 8.622037026300035e-07, "loss": 0.0109, "step": 320960 }, { "epoch": 3.4293498584326088, "grad_norm": 0.7042809724807739, "learning_rate": 8.62192120381739e-07, "loss": 0.0039, "step": 320970 }, { "epoch": 3.429456701746888, "grad_norm": 6.807766914367676, "learning_rate": 8.621805377245322e-07, "loss": 0.0111, "step": 320980 }, { "epoch": 3.4295635450611677, "grad_norm": 1.504440188407898, "learning_rate": 8.621689546583962e-07, "loss": 0.0112, "step": 320990 }, { "epoch": 3.4296703883754476, "grad_norm": 3.9487664699554443, "learning_rate": 8.621573711833441e-07, "loss": 0.0272, "step": 321000 }, { "epoch": 3.429777231689727, "grad_norm": 0.025109585374593735, "learning_rate": 8.621457872993891e-07, "loss": 0.0063, "step": 321010 }, { "epoch": 3.4298840750040065, "grad_norm": 9.070704460144043, "learning_rate": 8.621342030065441e-07, "loss": 0.0106, "step": 321020 }, { "epoch": 3.4299909183182864, "grad_norm": 0.004799638409167528, "learning_rate": 8.621226183048222e-07, "loss": 0.0029, "step": 321030 }, { "epoch": 3.430097761632566, "grad_norm": 0.07740169018507004, "learning_rate": 8.621110331942367e-07, "loss": 0.0178, "step": 321040 }, { "epoch": 3.4302046049468453, "grad_norm": 0.01356906071305275, "learning_rate": 8.620994476748004e-07, "loss": 0.011, "step": 321050 }, { "epoch": 3.430311448261125, "grad_norm": 0.68154376745224, "learning_rate": 8.620878617465265e-07, "loss": 0.0088, "step": 321060 }, { "epoch": 3.4304182915754047, "grad_norm": 0.02345283515751362, "learning_rate": 8.620762754094282e-07, "loss": 0.0074, "step": 321070 }, { "epoch": 3.430525134889684, "grad_norm": 1.513314127922058, "learning_rate": 8.620646886635184e-07, "loss": 0.0152, "step": 321080 }, { "epoch": 3.430631978203964, "grad_norm": 0.005122085101902485, "learning_rate": 8.620531015088101e-07, "loss": 0.0135, "step": 321090 }, { "epoch": 3.4307388215182435, "grad_norm": 0.014997555874288082, "learning_rate": 8.620415139453165e-07, "loss": 0.1008, "step": 321100 }, { "epoch": 3.430845664832523, "grad_norm": 0.011699516326189041, "learning_rate": 8.620299259730509e-07, "loss": 0.003, "step": 321110 }, { "epoch": 3.430952508146803, "grad_norm": 0.08512422442436218, "learning_rate": 8.620183375920261e-07, "loss": 0.0093, "step": 321120 }, { "epoch": 3.4310593514610823, "grad_norm": 4.947233200073242, "learning_rate": 8.620067488022553e-07, "loss": 0.0095, "step": 321130 }, { "epoch": 3.4311661947753618, "grad_norm": 0.13185343146324158, "learning_rate": 8.619951596037516e-07, "loss": 0.0035, "step": 321140 }, { "epoch": 3.4312730380896417, "grad_norm": 0.20873379707336426, "learning_rate": 8.61983569996528e-07, "loss": 0.0045, "step": 321150 }, { "epoch": 3.431379881403921, "grad_norm": 1.7519396543502808, "learning_rate": 8.619719799805976e-07, "loss": 0.0162, "step": 321160 }, { "epoch": 3.4314867247182006, "grad_norm": 2.884429693222046, "learning_rate": 8.619603895559735e-07, "loss": 0.018, "step": 321170 }, { "epoch": 3.4315935680324805, "grad_norm": 0.013731679879128933, "learning_rate": 8.619487987226688e-07, "loss": 0.0482, "step": 321180 }, { "epoch": 3.43170041134676, "grad_norm": 6.409229278564453, "learning_rate": 8.619372074806966e-07, "loss": 0.0251, "step": 321190 }, { "epoch": 3.4318072546610394, "grad_norm": 6.996452808380127, "learning_rate": 8.619256158300701e-07, "loss": 0.0225, "step": 321200 }, { "epoch": 3.4319140979753193, "grad_norm": 0.030714523047208786, "learning_rate": 8.619140237708021e-07, "loss": 0.0019, "step": 321210 }, { "epoch": 3.432020941289599, "grad_norm": 4.950961112976074, "learning_rate": 8.61902431302906e-07, "loss": 0.0047, "step": 321220 }, { "epoch": 3.4321277846038782, "grad_norm": 2.192924737930298, "learning_rate": 8.618908384263946e-07, "loss": 0.0177, "step": 321230 }, { "epoch": 3.432234627918158, "grad_norm": 0.005805756896734238, "learning_rate": 8.618792451412812e-07, "loss": 0.0066, "step": 321240 }, { "epoch": 3.4323414712324376, "grad_norm": 3.7787599563598633, "learning_rate": 8.618676514475789e-07, "loss": 0.0067, "step": 321250 }, { "epoch": 3.432448314546717, "grad_norm": 0.4825417697429657, "learning_rate": 8.618560573453005e-07, "loss": 0.0024, "step": 321260 }, { "epoch": 3.432555157860997, "grad_norm": 0.0027508987113833427, "learning_rate": 8.618444628344595e-07, "loss": 0.016, "step": 321270 }, { "epoch": 3.4326620011752764, "grad_norm": 0.018461793661117554, "learning_rate": 8.618328679150686e-07, "loss": 0.0114, "step": 321280 }, { "epoch": 3.432768844489556, "grad_norm": 0.0024117096327245235, "learning_rate": 8.618212725871412e-07, "loss": 0.0043, "step": 321290 }, { "epoch": 3.432875687803836, "grad_norm": 0.006498953327536583, "learning_rate": 8.618096768506904e-07, "loss": 0.0067, "step": 321300 }, { "epoch": 3.4329825311181152, "grad_norm": 0.7316944003105164, "learning_rate": 8.61798080705729e-07, "loss": 0.0106, "step": 321310 }, { "epoch": 3.4330893744323947, "grad_norm": 1.92274010181427, "learning_rate": 8.617864841522702e-07, "loss": 0.0134, "step": 321320 }, { "epoch": 3.4331962177466746, "grad_norm": 0.11489575356245041, "learning_rate": 8.617748871903273e-07, "loss": 0.0076, "step": 321330 }, { "epoch": 3.433303061060954, "grad_norm": 1.3913235664367676, "learning_rate": 8.617632898199132e-07, "loss": 0.0847, "step": 321340 }, { "epoch": 3.4334099043752335, "grad_norm": 0.3766508102416992, "learning_rate": 8.61751692041041e-07, "loss": 0.0104, "step": 321350 }, { "epoch": 3.4335167476895134, "grad_norm": 0.8182054758071899, "learning_rate": 8.617400938537239e-07, "loss": 0.0144, "step": 321360 }, { "epoch": 3.433623591003793, "grad_norm": 11.454809188842773, "learning_rate": 8.617284952579749e-07, "loss": 0.0153, "step": 321370 }, { "epoch": 3.433730434318073, "grad_norm": 0.2280055433511734, "learning_rate": 8.617168962538072e-07, "loss": 0.008, "step": 321380 }, { "epoch": 3.4338372776323522, "grad_norm": 0.005577090661972761, "learning_rate": 8.617052968412337e-07, "loss": 0.0041, "step": 321390 }, { "epoch": 3.4339441209466317, "grad_norm": 0.03480809926986694, "learning_rate": 8.616936970202678e-07, "loss": 0.0042, "step": 321400 }, { "epoch": 3.434050964260911, "grad_norm": 0.0789618119597435, "learning_rate": 8.616820967909222e-07, "loss": 0.0296, "step": 321410 }, { "epoch": 3.434157807575191, "grad_norm": 0.006027890834957361, "learning_rate": 8.616704961532104e-07, "loss": 0.0309, "step": 321420 }, { "epoch": 3.4342646508894705, "grad_norm": 3.0067806243896484, "learning_rate": 8.616588951071451e-07, "loss": 0.0067, "step": 321430 }, { "epoch": 3.4343714942037504, "grad_norm": 0.4675605893135071, "learning_rate": 8.616472936527398e-07, "loss": 0.0245, "step": 321440 }, { "epoch": 3.43447833751803, "grad_norm": 0.09110774099826813, "learning_rate": 8.616356917900074e-07, "loss": 0.0017, "step": 321450 }, { "epoch": 3.4345851808323093, "grad_norm": 15.873296737670898, "learning_rate": 8.61624089518961e-07, "loss": 0.0156, "step": 321460 }, { "epoch": 3.434692024146589, "grad_norm": 0.3958844840526581, "learning_rate": 8.616124868396136e-07, "loss": 0.0094, "step": 321470 }, { "epoch": 3.4347988674608687, "grad_norm": 1.1544666290283203, "learning_rate": 8.616008837519784e-07, "loss": 0.0074, "step": 321480 }, { "epoch": 3.434905710775148, "grad_norm": 0.048399291932582855, "learning_rate": 8.615892802560687e-07, "loss": 0.0031, "step": 321490 }, { "epoch": 3.435012554089428, "grad_norm": 0.009783298708498478, "learning_rate": 8.615776763518972e-07, "loss": 0.0429, "step": 321500 }, { "epoch": 3.4351193974037075, "grad_norm": 8.185600280761719, "learning_rate": 8.615660720394774e-07, "loss": 0.0193, "step": 321510 }, { "epoch": 3.435226240717987, "grad_norm": 8.84150505065918, "learning_rate": 8.615544673188222e-07, "loss": 0.0134, "step": 321520 }, { "epoch": 3.4353330840322664, "grad_norm": 0.23617929220199585, "learning_rate": 8.615428621899446e-07, "loss": 0.0017, "step": 321530 }, { "epoch": 3.4354399273465464, "grad_norm": 0.6678572297096252, "learning_rate": 8.615312566528579e-07, "loss": 0.0321, "step": 321540 }, { "epoch": 3.435546770660826, "grad_norm": 0.3055165708065033, "learning_rate": 8.615196507075751e-07, "loss": 0.0249, "step": 321550 }, { "epoch": 3.4356536139751057, "grad_norm": 0.19414429366588593, "learning_rate": 8.615080443541093e-07, "loss": 0.0231, "step": 321560 }, { "epoch": 3.435760457289385, "grad_norm": 1.0671371221542358, "learning_rate": 8.614964375924736e-07, "loss": 0.0282, "step": 321570 }, { "epoch": 3.4358673006036646, "grad_norm": 0.009628665633499622, "learning_rate": 8.614848304226813e-07, "loss": 0.0236, "step": 321580 }, { "epoch": 3.435974143917944, "grad_norm": 2.768822193145752, "learning_rate": 8.614732228447452e-07, "loss": 0.0189, "step": 321590 }, { "epoch": 3.436080987232224, "grad_norm": 3.5492441654205322, "learning_rate": 8.614616148586786e-07, "loss": 0.0175, "step": 321600 }, { "epoch": 3.4361878305465035, "grad_norm": 0.5812772512435913, "learning_rate": 8.614500064644945e-07, "loss": 0.0179, "step": 321610 }, { "epoch": 3.4362946738607834, "grad_norm": 0.009073269553482533, "learning_rate": 8.614383976622062e-07, "loss": 0.0263, "step": 321620 }, { "epoch": 3.436401517175063, "grad_norm": 1.9177851676940918, "learning_rate": 8.614267884518266e-07, "loss": 0.0134, "step": 321630 }, { "epoch": 3.4365083604893423, "grad_norm": 1.649475336074829, "learning_rate": 8.614151788333687e-07, "loss": 0.0143, "step": 321640 }, { "epoch": 3.436615203803622, "grad_norm": 0.014231679029762745, "learning_rate": 8.614035688068462e-07, "loss": 0.0203, "step": 321650 }, { "epoch": 3.4367220471179016, "grad_norm": 0.1433224231004715, "learning_rate": 8.613919583722714e-07, "loss": 0.0166, "step": 321660 }, { "epoch": 3.436828890432181, "grad_norm": 1.8174564838409424, "learning_rate": 8.61380347529658e-07, "loss": 0.0145, "step": 321670 }, { "epoch": 3.436935733746461, "grad_norm": 0.05594955012202263, "learning_rate": 8.613687362790188e-07, "loss": 0.0903, "step": 321680 }, { "epoch": 3.4370425770607405, "grad_norm": 6.416727542877197, "learning_rate": 8.613571246203672e-07, "loss": 0.0127, "step": 321690 }, { "epoch": 3.43714942037502, "grad_norm": 0.025237971916794777, "learning_rate": 8.613455125537159e-07, "loss": 0.0031, "step": 321700 }, { "epoch": 3.4372562636893, "grad_norm": 1.6727290153503418, "learning_rate": 8.613339000790784e-07, "loss": 0.0072, "step": 321710 }, { "epoch": 3.4373631070035793, "grad_norm": 7.795627117156982, "learning_rate": 8.613222871964676e-07, "loss": 0.0469, "step": 321720 }, { "epoch": 3.4374699503178587, "grad_norm": 0.13329480588436127, "learning_rate": 8.613106739058967e-07, "loss": 0.0054, "step": 321730 }, { "epoch": 3.4375767936321386, "grad_norm": 0.002019317587837577, "learning_rate": 8.612990602073788e-07, "loss": 0.0026, "step": 321740 }, { "epoch": 3.437683636946418, "grad_norm": 0.03695536032319069, "learning_rate": 8.612874461009269e-07, "loss": 0.0038, "step": 321750 }, { "epoch": 3.4377904802606976, "grad_norm": 0.0035070034209638834, "learning_rate": 8.612758315865542e-07, "loss": 0.0104, "step": 321760 }, { "epoch": 3.4378973235749775, "grad_norm": 0.0013997521018609405, "learning_rate": 8.61264216664274e-07, "loss": 0.0247, "step": 321770 }, { "epoch": 3.438004166889257, "grad_norm": 1.3558063507080078, "learning_rate": 8.61252601334099e-07, "loss": 0.0118, "step": 321780 }, { "epoch": 3.4381110102035364, "grad_norm": 5.3158183097839355, "learning_rate": 8.612409855960427e-07, "loss": 0.0164, "step": 321790 }, { "epoch": 3.4382178535178163, "grad_norm": 0.011522814631462097, "learning_rate": 8.61229369450118e-07, "loss": 0.0134, "step": 321800 }, { "epoch": 3.4383246968320957, "grad_norm": 0.0028648797888308764, "learning_rate": 8.612177528963382e-07, "loss": 0.0099, "step": 321810 }, { "epoch": 3.438431540146375, "grad_norm": 0.06866048276424408, "learning_rate": 8.612061359347162e-07, "loss": 0.0022, "step": 321820 }, { "epoch": 3.438538383460655, "grad_norm": 0.008218259550631046, "learning_rate": 8.611945185652651e-07, "loss": 0.014, "step": 321830 }, { "epoch": 3.4386452267749346, "grad_norm": 0.03148011863231659, "learning_rate": 8.611829007879983e-07, "loss": 0.0099, "step": 321840 }, { "epoch": 3.438752070089214, "grad_norm": 0.03944655880331993, "learning_rate": 8.611712826029287e-07, "loss": 0.0301, "step": 321850 }, { "epoch": 3.438858913403494, "grad_norm": 4.835958957672119, "learning_rate": 8.611596640100694e-07, "loss": 0.0105, "step": 321860 }, { "epoch": 3.4389657567177734, "grad_norm": 0.0399787612259388, "learning_rate": 8.611480450094337e-07, "loss": 0.0003, "step": 321870 }, { "epoch": 3.439072600032053, "grad_norm": 4.889524936676025, "learning_rate": 8.611364256010345e-07, "loss": 0.0125, "step": 321880 }, { "epoch": 3.4391794433463327, "grad_norm": 0.10802207887172699, "learning_rate": 8.611248057848851e-07, "loss": 0.0021, "step": 321890 }, { "epoch": 3.439286286660612, "grad_norm": 0.7455639243125916, "learning_rate": 8.611131855609986e-07, "loss": 0.0081, "step": 321900 }, { "epoch": 3.4393931299748917, "grad_norm": 0.03252469748258591, "learning_rate": 8.611015649293878e-07, "loss": 0.0074, "step": 321910 }, { "epoch": 3.4394999732891716, "grad_norm": 0.15066340565681458, "learning_rate": 8.610899438900663e-07, "loss": 0.007, "step": 321920 }, { "epoch": 3.439606816603451, "grad_norm": 0.023056579753756523, "learning_rate": 8.610783224430469e-07, "loss": 0.0593, "step": 321930 }, { "epoch": 3.4397136599177305, "grad_norm": 45.16525650024414, "learning_rate": 8.610667005883427e-07, "loss": 0.0062, "step": 321940 }, { "epoch": 3.4398205032320104, "grad_norm": 0.05113964527845383, "learning_rate": 8.610550783259672e-07, "loss": 0.0088, "step": 321950 }, { "epoch": 3.43992734654629, "grad_norm": 0.00725637236610055, "learning_rate": 8.610434556559332e-07, "loss": 0.0056, "step": 321960 }, { "epoch": 3.4400341898605693, "grad_norm": 0.01227058470249176, "learning_rate": 8.610318325782538e-07, "loss": 0.0018, "step": 321970 }, { "epoch": 3.440141033174849, "grad_norm": 0.017434632405638695, "learning_rate": 8.610202090929423e-07, "loss": 0.0119, "step": 321980 }, { "epoch": 3.4402478764891287, "grad_norm": 0.010858207009732723, "learning_rate": 8.610085852000117e-07, "loss": 0.0035, "step": 321990 }, { "epoch": 3.440354719803408, "grad_norm": 0.08963295072317123, "learning_rate": 8.609969608994752e-07, "loss": 0.0213, "step": 322000 }, { "epoch": 3.440461563117688, "grad_norm": 0.0014540978008881211, "learning_rate": 8.609853361913458e-07, "loss": 0.0127, "step": 322010 }, { "epoch": 3.4405684064319675, "grad_norm": 0.019679097458720207, "learning_rate": 8.609737110756368e-07, "loss": 0.0038, "step": 322020 }, { "epoch": 3.440675249746247, "grad_norm": 1.3135085105895996, "learning_rate": 8.609620855523612e-07, "loss": 0.0401, "step": 322030 }, { "epoch": 3.440782093060527, "grad_norm": 3.496610164642334, "learning_rate": 8.60950459621532e-07, "loss": 0.0082, "step": 322040 }, { "epoch": 3.4408889363748063, "grad_norm": 0.0025963187217712402, "learning_rate": 8.609388332831627e-07, "loss": 0.0013, "step": 322050 }, { "epoch": 3.4409957796890858, "grad_norm": 0.003792223986238241, "learning_rate": 8.609272065372661e-07, "loss": 0.0091, "step": 322060 }, { "epoch": 3.4411026230033657, "grad_norm": 3.3030409812927246, "learning_rate": 8.609155793838556e-07, "loss": 0.0057, "step": 322070 }, { "epoch": 3.441209466317645, "grad_norm": 8.983711242675781, "learning_rate": 8.609039518229441e-07, "loss": 0.0169, "step": 322080 }, { "epoch": 3.441316309631925, "grad_norm": 0.00867417361587286, "learning_rate": 8.608923238545447e-07, "loss": 0.0198, "step": 322090 }, { "epoch": 3.4414231529462045, "grad_norm": 0.12521694600582123, "learning_rate": 8.608806954786708e-07, "loss": 0.0076, "step": 322100 }, { "epoch": 3.441529996260484, "grad_norm": 0.2614702582359314, "learning_rate": 8.608690666953352e-07, "loss": 0.0071, "step": 322110 }, { "epoch": 3.4416368395747634, "grad_norm": 0.09946312755346298, "learning_rate": 8.608574375045512e-07, "loss": 0.0072, "step": 322120 }, { "epoch": 3.4417436828890433, "grad_norm": 0.008633377961814404, "learning_rate": 8.608458079063319e-07, "loss": 0.0155, "step": 322130 }, { "epoch": 3.4418505262033228, "grad_norm": 8.51008415222168, "learning_rate": 8.608341779006907e-07, "loss": 0.025, "step": 322140 }, { "epoch": 3.4419573695176027, "grad_norm": 2.6751482486724854, "learning_rate": 8.608225474876403e-07, "loss": 0.0052, "step": 322150 }, { "epoch": 3.442064212831882, "grad_norm": 0.012539146468043327, "learning_rate": 8.60810916667194e-07, "loss": 0.0266, "step": 322160 }, { "epoch": 3.4421710561461616, "grad_norm": 0.001573924790136516, "learning_rate": 8.607992854393649e-07, "loss": 0.0477, "step": 322170 }, { "epoch": 3.442277899460441, "grad_norm": 7.884760856628418, "learning_rate": 8.607876538041662e-07, "loss": 0.0251, "step": 322180 }, { "epoch": 3.442384742774721, "grad_norm": 3.2798821926116943, "learning_rate": 8.60776021761611e-07, "loss": 0.011, "step": 322190 }, { "epoch": 3.4424915860890004, "grad_norm": 0.015865806490182877, "learning_rate": 8.607643893117124e-07, "loss": 0.003, "step": 322200 }, { "epoch": 3.4425984294032803, "grad_norm": 0.08608799427747726, "learning_rate": 8.607527564544837e-07, "loss": 0.0027, "step": 322210 }, { "epoch": 3.4427052727175598, "grad_norm": 1.6163458824157715, "learning_rate": 8.60741123189938e-07, "loss": 0.003, "step": 322220 }, { "epoch": 3.4428121160318392, "grad_norm": 2.170835256576538, "learning_rate": 8.607294895180881e-07, "loss": 0.0151, "step": 322230 }, { "epoch": 3.4429189593461187, "grad_norm": 5.1045684814453125, "learning_rate": 8.607178554389476e-07, "loss": 0.0346, "step": 322240 }, { "epoch": 3.4430258026603986, "grad_norm": 0.21846839785575867, "learning_rate": 8.607062209525293e-07, "loss": 0.0027, "step": 322250 }, { "epoch": 3.443132645974678, "grad_norm": 0.013759594410657883, "learning_rate": 8.606945860588464e-07, "loss": 0.0084, "step": 322260 }, { "epoch": 3.443239489288958, "grad_norm": 3.2347159385681152, "learning_rate": 8.606829507579121e-07, "loss": 0.0157, "step": 322270 }, { "epoch": 3.4433463326032374, "grad_norm": 0.0012742322869598866, "learning_rate": 8.606713150497396e-07, "loss": 0.0065, "step": 322280 }, { "epoch": 3.443453175917517, "grad_norm": 0.004902893211692572, "learning_rate": 8.606596789343418e-07, "loss": 0.0015, "step": 322290 }, { "epoch": 3.4435600192317963, "grad_norm": 0.0030122045427560806, "learning_rate": 8.606480424117323e-07, "loss": 0.0045, "step": 322300 }, { "epoch": 3.4436668625460762, "grad_norm": 2.2276411056518555, "learning_rate": 8.606364054819237e-07, "loss": 0.0152, "step": 322310 }, { "epoch": 3.4437737058603557, "grad_norm": 2.553135633468628, "learning_rate": 8.606247681449295e-07, "loss": 0.0084, "step": 322320 }, { "epoch": 3.4438805491746356, "grad_norm": 2.0879693031311035, "learning_rate": 8.606131304007627e-07, "loss": 0.0321, "step": 322330 }, { "epoch": 3.443987392488915, "grad_norm": 0.05094972997903824, "learning_rate": 8.606014922494365e-07, "loss": 0.0039, "step": 322340 }, { "epoch": 3.4440942358031945, "grad_norm": 0.008311249315738678, "learning_rate": 8.60589853690964e-07, "loss": 0.0232, "step": 322350 }, { "epoch": 3.4442010791174744, "grad_norm": 0.021328352391719818, "learning_rate": 8.605782147253582e-07, "loss": 0.0023, "step": 322360 }, { "epoch": 3.444307922431754, "grad_norm": 0.12528690695762634, "learning_rate": 8.605665753526325e-07, "loss": 0.0378, "step": 322370 }, { "epoch": 3.4444147657460333, "grad_norm": 0.007397412322461605, "learning_rate": 8.605549355727998e-07, "loss": 0.0102, "step": 322380 }, { "epoch": 3.4445216090603132, "grad_norm": 0.12476011365652084, "learning_rate": 8.605432953858736e-07, "loss": 0.0204, "step": 322390 }, { "epoch": 3.4446284523745927, "grad_norm": 1.951204776763916, "learning_rate": 8.605316547918665e-07, "loss": 0.0216, "step": 322400 }, { "epoch": 3.444735295688872, "grad_norm": 0.2560364603996277, "learning_rate": 8.605200137907922e-07, "loss": 0.0224, "step": 322410 }, { "epoch": 3.444842139003152, "grad_norm": 0.9798195958137512, "learning_rate": 8.605083723826634e-07, "loss": 0.0063, "step": 322420 }, { "epoch": 3.4449489823174315, "grad_norm": 0.006460653617978096, "learning_rate": 8.604967305674935e-07, "loss": 0.0188, "step": 322430 }, { "epoch": 3.445055825631711, "grad_norm": 0.3538871109485626, "learning_rate": 8.604850883452957e-07, "loss": 0.0217, "step": 322440 }, { "epoch": 3.445162668945991, "grad_norm": 0.03894947096705437, "learning_rate": 8.604734457160829e-07, "loss": 0.0118, "step": 322450 }, { "epoch": 3.4452695122602703, "grad_norm": 0.3230585753917694, "learning_rate": 8.604618026798684e-07, "loss": 0.0007, "step": 322460 }, { "epoch": 3.44537635557455, "grad_norm": 0.002586835063993931, "learning_rate": 8.604501592366655e-07, "loss": 0.0206, "step": 322470 }, { "epoch": 3.4454831988888297, "grad_norm": 0.05302872508764267, "learning_rate": 8.60438515386487e-07, "loss": 0.0971, "step": 322480 }, { "epoch": 3.445590042203109, "grad_norm": 0.8558192253112793, "learning_rate": 8.604268711293462e-07, "loss": 0.0361, "step": 322490 }, { "epoch": 3.4456968855173886, "grad_norm": 0.11005549877882004, "learning_rate": 8.604152264652563e-07, "loss": 0.0021, "step": 322500 }, { "epoch": 3.4458037288316685, "grad_norm": 3.0031378269195557, "learning_rate": 8.604035813942303e-07, "loss": 0.035, "step": 322510 }, { "epoch": 3.445910572145948, "grad_norm": 0.04469052702188492, "learning_rate": 8.603919359162817e-07, "loss": 0.0123, "step": 322520 }, { "epoch": 3.4460174154602274, "grad_norm": 0.08253640681505203, "learning_rate": 8.603802900314233e-07, "loss": 0.0058, "step": 322530 }, { "epoch": 3.4461242587745073, "grad_norm": 1.1607815027236938, "learning_rate": 8.603686437396683e-07, "loss": 0.013, "step": 322540 }, { "epoch": 3.446231102088787, "grad_norm": 0.00390941696241498, "learning_rate": 8.603569970410298e-07, "loss": 0.0194, "step": 322550 }, { "epoch": 3.4463379454030663, "grad_norm": 6.660854339599609, "learning_rate": 8.603453499355211e-07, "loss": 0.0096, "step": 322560 }, { "epoch": 3.446444788717346, "grad_norm": 0.0017800877103582025, "learning_rate": 8.603337024231554e-07, "loss": 0.0067, "step": 322570 }, { "epoch": 3.4465516320316256, "grad_norm": 0.004554703366011381, "learning_rate": 8.603220545039457e-07, "loss": 0.0326, "step": 322580 }, { "epoch": 3.446658475345905, "grad_norm": 0.6957133412361145, "learning_rate": 8.603104061779052e-07, "loss": 0.0024, "step": 322590 }, { "epoch": 3.446765318660185, "grad_norm": 0.09355680644512177, "learning_rate": 8.602987574450471e-07, "loss": 0.0063, "step": 322600 }, { "epoch": 3.4468721619744644, "grad_norm": 0.0011660484597086906, "learning_rate": 8.602871083053845e-07, "loss": 0.014, "step": 322610 }, { "epoch": 3.446979005288744, "grad_norm": 0.01406838558614254, "learning_rate": 8.602754587589306e-07, "loss": 0.0079, "step": 322620 }, { "epoch": 3.447085848603024, "grad_norm": 0.4687355160713196, "learning_rate": 8.602638088056984e-07, "loss": 0.001, "step": 322630 }, { "epoch": 3.4471926919173033, "grad_norm": 1.2062726020812988, "learning_rate": 8.602521584457013e-07, "loss": 0.0218, "step": 322640 }, { "epoch": 3.4472995352315827, "grad_norm": 4.684798240661621, "learning_rate": 8.602405076789523e-07, "loss": 0.0189, "step": 322650 }, { "epoch": 3.4474063785458626, "grad_norm": 0.31635743379592896, "learning_rate": 8.602288565054645e-07, "loss": 0.0032, "step": 322660 }, { "epoch": 3.447513221860142, "grad_norm": 0.0029175011441111565, "learning_rate": 8.602172049252512e-07, "loss": 0.0058, "step": 322670 }, { "epoch": 3.4476200651744215, "grad_norm": 0.24389004707336426, "learning_rate": 8.602055529383254e-07, "loss": 0.0545, "step": 322680 }, { "epoch": 3.4477269084887014, "grad_norm": 0.0016299077542498708, "learning_rate": 8.601939005447006e-07, "loss": 0.0394, "step": 322690 }, { "epoch": 3.447833751802981, "grad_norm": 0.06244535371661186, "learning_rate": 8.601822477443894e-07, "loss": 0.0031, "step": 322700 }, { "epoch": 3.4479405951172604, "grad_norm": 0.0017148874467238784, "learning_rate": 8.601705945374053e-07, "loss": 0.0076, "step": 322710 }, { "epoch": 3.4480474384315403, "grad_norm": 0.018364302814006805, "learning_rate": 8.601589409237615e-07, "loss": 0.0007, "step": 322720 }, { "epoch": 3.4481542817458197, "grad_norm": 1.6415352821350098, "learning_rate": 8.601472869034711e-07, "loss": 0.0242, "step": 322730 }, { "epoch": 3.448261125060099, "grad_norm": 0.010409750044345856, "learning_rate": 8.601356324765472e-07, "loss": 0.0036, "step": 322740 }, { "epoch": 3.448367968374379, "grad_norm": 0.005497818347066641, "learning_rate": 8.601239776430029e-07, "loss": 0.026, "step": 322750 }, { "epoch": 3.4484748116886585, "grad_norm": 7.231472015380859, "learning_rate": 8.601123224028516e-07, "loss": 0.0367, "step": 322760 }, { "epoch": 3.448581655002938, "grad_norm": 4.6447601318359375, "learning_rate": 8.601006667561062e-07, "loss": 0.0167, "step": 322770 }, { "epoch": 3.448688498317218, "grad_norm": 0.7512819766998291, "learning_rate": 8.6008901070278e-07, "loss": 0.0425, "step": 322780 }, { "epoch": 3.4487953416314974, "grad_norm": 0.012683889828622341, "learning_rate": 8.600773542428862e-07, "loss": 0.0069, "step": 322790 }, { "epoch": 3.448902184945777, "grad_norm": 7.162156581878662, "learning_rate": 8.600656973764378e-07, "loss": 0.0417, "step": 322800 }, { "epoch": 3.4490090282600567, "grad_norm": 2.9541831016540527, "learning_rate": 8.600540401034481e-07, "loss": 0.0173, "step": 322810 }, { "epoch": 3.449115871574336, "grad_norm": 0.0018009532941505313, "learning_rate": 8.600423824239303e-07, "loss": 0.0019, "step": 322820 }, { "epoch": 3.4492227148886156, "grad_norm": 12.434232711791992, "learning_rate": 8.600307243378972e-07, "loss": 0.0259, "step": 322830 }, { "epoch": 3.4493295582028956, "grad_norm": 2.2586355209350586, "learning_rate": 8.600190658453624e-07, "loss": 0.0245, "step": 322840 }, { "epoch": 3.449436401517175, "grad_norm": 0.15356115996837616, "learning_rate": 8.60007406946339e-07, "loss": 0.0111, "step": 322850 }, { "epoch": 3.449543244831455, "grad_norm": 6.177268028259277, "learning_rate": 8.599957476408399e-07, "loss": 0.0105, "step": 322860 }, { "epoch": 3.4496500881457344, "grad_norm": 0.01615072414278984, "learning_rate": 8.599840879288786e-07, "loss": 0.0503, "step": 322870 }, { "epoch": 3.449756931460014, "grad_norm": 2.39717173576355, "learning_rate": 8.59972427810468e-07, "loss": 0.0127, "step": 322880 }, { "epoch": 3.4498637747742933, "grad_norm": 1.4801219701766968, "learning_rate": 8.599607672856213e-07, "loss": 0.0363, "step": 322890 }, { "epoch": 3.449970618088573, "grad_norm": 1.7423638105392456, "learning_rate": 8.599491063543518e-07, "loss": 0.0176, "step": 322900 }, { "epoch": 3.4500774614028527, "grad_norm": 0.0027120402082800865, "learning_rate": 8.599374450166727e-07, "loss": 0.0091, "step": 322910 }, { "epoch": 3.4501843047171326, "grad_norm": 1.6426156759262085, "learning_rate": 8.599257832725967e-07, "loss": 0.001, "step": 322920 }, { "epoch": 3.450291148031412, "grad_norm": 0.003619581228122115, "learning_rate": 8.599141211221377e-07, "loss": 0.0033, "step": 322930 }, { "epoch": 3.4503979913456915, "grad_norm": 0.14342401921749115, "learning_rate": 8.599024585653083e-07, "loss": 0.0058, "step": 322940 }, { "epoch": 3.450504834659971, "grad_norm": 1.5951169729232788, "learning_rate": 8.598907956021219e-07, "loss": 0.0226, "step": 322950 }, { "epoch": 3.450611677974251, "grad_norm": 1.1006823778152466, "learning_rate": 8.598791322325918e-07, "loss": 0.0117, "step": 322960 }, { "epoch": 3.4507185212885303, "grad_norm": 1.8224867582321167, "learning_rate": 8.598674684567308e-07, "loss": 0.0053, "step": 322970 }, { "epoch": 3.45082536460281, "grad_norm": 0.02439402975142002, "learning_rate": 8.598558042745523e-07, "loss": 0.0012, "step": 322980 }, { "epoch": 3.4509322079170897, "grad_norm": 0.005393525585532188, "learning_rate": 8.598441396860695e-07, "loss": 0.0292, "step": 322990 }, { "epoch": 3.451039051231369, "grad_norm": 3.661423921585083, "learning_rate": 8.598324746912955e-07, "loss": 0.0041, "step": 323000 }, { "epoch": 3.4511458945456486, "grad_norm": 3.5841314792633057, "learning_rate": 8.598208092902435e-07, "loss": 0.008, "step": 323010 }, { "epoch": 3.4512527378599285, "grad_norm": 0.004268862772732973, "learning_rate": 8.598091434829266e-07, "loss": 0.0139, "step": 323020 }, { "epoch": 3.451359581174208, "grad_norm": 0.019214123487472534, "learning_rate": 8.597974772693579e-07, "loss": 0.0197, "step": 323030 }, { "epoch": 3.451466424488488, "grad_norm": 2.7005293369293213, "learning_rate": 8.597858106495508e-07, "loss": 0.0269, "step": 323040 }, { "epoch": 3.4515732678027673, "grad_norm": 0.037341516464948654, "learning_rate": 8.597741436235184e-07, "loss": 0.0071, "step": 323050 }, { "epoch": 3.4516801111170468, "grad_norm": 0.5392370223999023, "learning_rate": 8.597624761912739e-07, "loss": 0.0079, "step": 323060 }, { "epoch": 3.451786954431326, "grad_norm": 0.3156052827835083, "learning_rate": 8.597508083528302e-07, "loss": 0.0083, "step": 323070 }, { "epoch": 3.451893797745606, "grad_norm": 0.8112310171127319, "learning_rate": 8.597391401082008e-07, "loss": 0.0013, "step": 323080 }, { "epoch": 3.4520006410598856, "grad_norm": 8.05798053741455, "learning_rate": 8.597274714573988e-07, "loss": 0.0095, "step": 323090 }, { "epoch": 3.4521074843741655, "grad_norm": 4.764862537384033, "learning_rate": 8.597158024004374e-07, "loss": 0.0157, "step": 323100 }, { "epoch": 3.452214327688445, "grad_norm": 0.8190204501152039, "learning_rate": 8.597041329373296e-07, "loss": 0.0132, "step": 323110 }, { "epoch": 3.4523211710027244, "grad_norm": 7.801425457000732, "learning_rate": 8.596924630680886e-07, "loss": 0.0236, "step": 323120 }, { "epoch": 3.4524280143170043, "grad_norm": 9.751791000366211, "learning_rate": 8.596807927927279e-07, "loss": 0.0061, "step": 323130 }, { "epoch": 3.4525348576312838, "grad_norm": 3.2505416870117188, "learning_rate": 8.596691221112602e-07, "loss": 0.0048, "step": 323140 }, { "epoch": 3.452641700945563, "grad_norm": 0.11240246891975403, "learning_rate": 8.596574510236991e-07, "loss": 0.0044, "step": 323150 }, { "epoch": 3.452748544259843, "grad_norm": 6.329623222351074, "learning_rate": 8.596457795300575e-07, "loss": 0.0373, "step": 323160 }, { "epoch": 3.4528553875741226, "grad_norm": 0.013654426671564579, "learning_rate": 8.596341076303486e-07, "loss": 0.0094, "step": 323170 }, { "epoch": 3.452962230888402, "grad_norm": 0.01144347433000803, "learning_rate": 8.596224353245857e-07, "loss": 0.0075, "step": 323180 }, { "epoch": 3.453069074202682, "grad_norm": 0.03389778733253479, "learning_rate": 8.59610762612782e-07, "loss": 0.0021, "step": 323190 }, { "epoch": 3.4531759175169614, "grad_norm": 1.644517421722412, "learning_rate": 8.595990894949505e-07, "loss": 0.009, "step": 323200 }, { "epoch": 3.453282760831241, "grad_norm": 0.0009383511496707797, "learning_rate": 8.595874159711046e-07, "loss": 0.0008, "step": 323210 }, { "epoch": 3.4533896041455208, "grad_norm": 0.6729866862297058, "learning_rate": 8.595757420412573e-07, "loss": 0.0108, "step": 323220 }, { "epoch": 3.4534964474598002, "grad_norm": 15.724401473999023, "learning_rate": 8.595640677054219e-07, "loss": 0.0307, "step": 323230 }, { "epoch": 3.4536032907740797, "grad_norm": 6.494754314422607, "learning_rate": 8.595523929636115e-07, "loss": 0.0659, "step": 323240 }, { "epoch": 3.4537101340883596, "grad_norm": 3.5523736476898193, "learning_rate": 8.595407178158394e-07, "loss": 0.0145, "step": 323250 }, { "epoch": 3.453816977402639, "grad_norm": 0.00182175706140697, "learning_rate": 8.595290422621186e-07, "loss": 0.0103, "step": 323260 }, { "epoch": 3.4539238207169185, "grad_norm": 2.8644566535949707, "learning_rate": 8.595173663024623e-07, "loss": 0.0219, "step": 323270 }, { "epoch": 3.4540306640311984, "grad_norm": 0.27977585792541504, "learning_rate": 8.595056899368837e-07, "loss": 0.0137, "step": 323280 }, { "epoch": 3.454137507345478, "grad_norm": 3.1736326217651367, "learning_rate": 8.594940131653964e-07, "loss": 0.0619, "step": 323290 }, { "epoch": 3.4542443506597573, "grad_norm": 0.14230309426784515, "learning_rate": 8.59482335988013e-07, "loss": 0.0009, "step": 323300 }, { "epoch": 3.4543511939740372, "grad_norm": 6.822082996368408, "learning_rate": 8.594706584047468e-07, "loss": 0.0303, "step": 323310 }, { "epoch": 3.4544580372883167, "grad_norm": 0.12318721413612366, "learning_rate": 8.594589804156114e-07, "loss": 0.0388, "step": 323320 }, { "epoch": 3.454564880602596, "grad_norm": 0.512305736541748, "learning_rate": 8.594473020206195e-07, "loss": 0.0132, "step": 323330 }, { "epoch": 3.454671723916876, "grad_norm": 3.7193591594696045, "learning_rate": 8.594356232197843e-07, "loss": 0.0203, "step": 323340 }, { "epoch": 3.4547785672311555, "grad_norm": 11.570496559143066, "learning_rate": 8.594239440131195e-07, "loss": 0.0037, "step": 323350 }, { "epoch": 3.454885410545435, "grad_norm": 2.6230955123901367, "learning_rate": 8.594122644006376e-07, "loss": 0.0046, "step": 323360 }, { "epoch": 3.454992253859715, "grad_norm": 3.2950358390808105, "learning_rate": 8.594005843823525e-07, "loss": 0.0059, "step": 323370 }, { "epoch": 3.4550990971739943, "grad_norm": 0.614337146282196, "learning_rate": 8.593889039582767e-07, "loss": 0.0209, "step": 323380 }, { "epoch": 3.455205940488274, "grad_norm": 2.715737819671631, "learning_rate": 8.593772231284238e-07, "loss": 0.0242, "step": 323390 }, { "epoch": 3.4553127838025537, "grad_norm": 1.705328106880188, "learning_rate": 8.593655418928068e-07, "loss": 0.0019, "step": 323400 }, { "epoch": 3.455419627116833, "grad_norm": 0.359884649515152, "learning_rate": 8.593538602514392e-07, "loss": 0.001, "step": 323410 }, { "epoch": 3.4555264704311126, "grad_norm": 10.078336715698242, "learning_rate": 8.593421782043338e-07, "loss": 0.0492, "step": 323420 }, { "epoch": 3.4556333137453925, "grad_norm": 0.022352352738380432, "learning_rate": 8.59330495751504e-07, "loss": 0.0119, "step": 323430 }, { "epoch": 3.455740157059672, "grad_norm": 0.6731358766555786, "learning_rate": 8.593188128929629e-07, "loss": 0.0241, "step": 323440 }, { "epoch": 3.4558470003739514, "grad_norm": 0.6354972720146179, "learning_rate": 8.593071296287239e-07, "loss": 0.062, "step": 323450 }, { "epoch": 3.4559538436882313, "grad_norm": 0.06776857376098633, "learning_rate": 8.592954459588e-07, "loss": 0.0021, "step": 323460 }, { "epoch": 3.456060687002511, "grad_norm": 6.135047435760498, "learning_rate": 8.592837618832043e-07, "loss": 0.0316, "step": 323470 }, { "epoch": 3.4561675303167902, "grad_norm": 0.07222539186477661, "learning_rate": 8.592720774019502e-07, "loss": 0.0196, "step": 323480 }, { "epoch": 3.45627437363107, "grad_norm": 1.3400156497955322, "learning_rate": 8.592603925150507e-07, "loss": 0.0134, "step": 323490 }, { "epoch": 3.4563812169453496, "grad_norm": 0.2735404372215271, "learning_rate": 8.592487072225192e-07, "loss": 0.0035, "step": 323500 }, { "epoch": 3.456488060259629, "grad_norm": 0.0023646531626582146, "learning_rate": 8.592370215243688e-07, "loss": 0.0034, "step": 323510 }, { "epoch": 3.456594903573909, "grad_norm": 3.3056442737579346, "learning_rate": 8.592253354206127e-07, "loss": 0.019, "step": 323520 }, { "epoch": 3.4567017468881884, "grad_norm": 9.89886474609375, "learning_rate": 8.59213648911264e-07, "loss": 0.0165, "step": 323530 }, { "epoch": 3.456808590202468, "grad_norm": 7.309861660003662, "learning_rate": 8.59201961996336e-07, "loss": 0.0156, "step": 323540 }, { "epoch": 3.456915433516748, "grad_norm": 0.08905456215143204, "learning_rate": 8.591902746758419e-07, "loss": 0.0015, "step": 323550 }, { "epoch": 3.4570222768310273, "grad_norm": 0.02637641131877899, "learning_rate": 8.59178586949795e-07, "loss": 0.006, "step": 323560 }, { "epoch": 3.457129120145307, "grad_norm": 6.380377769470215, "learning_rate": 8.591668988182082e-07, "loss": 0.0359, "step": 323570 }, { "epoch": 3.4572359634595866, "grad_norm": 0.0024079943541437387, "learning_rate": 8.591552102810949e-07, "loss": 0.0026, "step": 323580 }, { "epoch": 3.457342806773866, "grad_norm": 0.015441316179931164, "learning_rate": 8.591435213384683e-07, "loss": 0.0072, "step": 323590 }, { "epoch": 3.4574496500881455, "grad_norm": 0.8035551309585571, "learning_rate": 8.591318319903415e-07, "loss": 0.0032, "step": 323600 }, { "epoch": 3.4575564934024254, "grad_norm": 0.04984305053949356, "learning_rate": 8.591201422367278e-07, "loss": 0.0076, "step": 323610 }, { "epoch": 3.457663336716705, "grad_norm": 0.2003088891506195, "learning_rate": 8.591084520776403e-07, "loss": 0.0056, "step": 323620 }, { "epoch": 3.457770180030985, "grad_norm": 0.013080809265375137, "learning_rate": 8.590967615130924e-07, "loss": 0.0087, "step": 323630 }, { "epoch": 3.4578770233452643, "grad_norm": 1.9312609434127808, "learning_rate": 8.59085070543097e-07, "loss": 0.0218, "step": 323640 }, { "epoch": 3.4579838666595437, "grad_norm": 1.6172417402267456, "learning_rate": 8.590733791676676e-07, "loss": 0.1087, "step": 323650 }, { "epoch": 3.458090709973823, "grad_norm": 7.371887683868408, "learning_rate": 8.590616873868171e-07, "loss": 0.0171, "step": 323660 }, { "epoch": 3.458197553288103, "grad_norm": 0.010041606612503529, "learning_rate": 8.59049995200559e-07, "loss": 0.005, "step": 323670 }, { "epoch": 3.4583043966023825, "grad_norm": 0.0046146889217197895, "learning_rate": 8.590383026089061e-07, "loss": 0.0218, "step": 323680 }, { "epoch": 3.4584112399166624, "grad_norm": 5.223204612731934, "learning_rate": 8.590266096118722e-07, "loss": 0.0185, "step": 323690 }, { "epoch": 3.458518083230942, "grad_norm": 3.648488998413086, "learning_rate": 8.590149162094698e-07, "loss": 0.008, "step": 323700 }, { "epoch": 3.4586249265452214, "grad_norm": 4.933172702789307, "learning_rate": 8.590032224017128e-07, "loss": 0.0365, "step": 323710 }, { "epoch": 3.458731769859501, "grad_norm": 2.2141103744506836, "learning_rate": 8.589915281886138e-07, "loss": 0.0116, "step": 323720 }, { "epoch": 3.4588386131737807, "grad_norm": 0.02338150143623352, "learning_rate": 8.589798335701864e-07, "loss": 0.0007, "step": 323730 }, { "epoch": 3.45894545648806, "grad_norm": 1.4205079078674316, "learning_rate": 8.589681385464438e-07, "loss": 0.0015, "step": 323740 }, { "epoch": 3.45905229980234, "grad_norm": 3.4674696922302246, "learning_rate": 8.589564431173988e-07, "loss": 0.0292, "step": 323750 }, { "epoch": 3.4591591431166195, "grad_norm": 0.07592450082302094, "learning_rate": 8.589447472830651e-07, "loss": 0.002, "step": 323760 }, { "epoch": 3.459265986430899, "grad_norm": 0.006529886741191149, "learning_rate": 8.589330510434556e-07, "loss": 0.0077, "step": 323770 }, { "epoch": 3.4593728297451785, "grad_norm": 0.01903190277516842, "learning_rate": 8.589213543985836e-07, "loss": 0.0057, "step": 323780 }, { "epoch": 3.4594796730594584, "grad_norm": 0.40412282943725586, "learning_rate": 8.589096573484624e-07, "loss": 0.0035, "step": 323790 }, { "epoch": 3.459586516373738, "grad_norm": 9.5890531539917, "learning_rate": 8.58897959893105e-07, "loss": 0.0315, "step": 323800 }, { "epoch": 3.4596933596880177, "grad_norm": 1.1031694412231445, "learning_rate": 8.588862620325248e-07, "loss": 0.0155, "step": 323810 }, { "epoch": 3.459800203002297, "grad_norm": 0.7006959915161133, "learning_rate": 8.588745637667349e-07, "loss": 0.0025, "step": 323820 }, { "epoch": 3.4599070463165766, "grad_norm": 0.030369114130735397, "learning_rate": 8.588628650957483e-07, "loss": 0.007, "step": 323830 }, { "epoch": 3.4600138896308565, "grad_norm": 0.10794821381568909, "learning_rate": 8.588511660195788e-07, "loss": 0.0063, "step": 323840 }, { "epoch": 3.460120732945136, "grad_norm": 0.0318942628800869, "learning_rate": 8.58839466538239e-07, "loss": 0.0088, "step": 323850 }, { "epoch": 3.4602275762594155, "grad_norm": 2.6036202907562256, "learning_rate": 8.588277666517425e-07, "loss": 0.0317, "step": 323860 }, { "epoch": 3.4603344195736954, "grad_norm": 0.34665775299072266, "learning_rate": 8.588160663601023e-07, "loss": 0.0033, "step": 323870 }, { "epoch": 3.460441262887975, "grad_norm": 0.022770514711737633, "learning_rate": 8.588043656633317e-07, "loss": 0.0269, "step": 323880 }, { "epoch": 3.4605481062022543, "grad_norm": 8.744353294372559, "learning_rate": 8.587926645614438e-07, "loss": 0.0015, "step": 323890 }, { "epoch": 3.460654949516534, "grad_norm": 0.0016545196995139122, "learning_rate": 8.587809630544521e-07, "loss": 0.0031, "step": 323900 }, { "epoch": 3.4607617928308136, "grad_norm": 2.47540283203125, "learning_rate": 8.587692611423694e-07, "loss": 0.0046, "step": 323910 }, { "epoch": 3.460868636145093, "grad_norm": 0.2533636689186096, "learning_rate": 8.587575588252093e-07, "loss": 0.0026, "step": 323920 }, { "epoch": 3.460975479459373, "grad_norm": 0.004960041493177414, "learning_rate": 8.587458561029847e-07, "loss": 0.0635, "step": 323930 }, { "epoch": 3.4610823227736525, "grad_norm": 0.04302383214235306, "learning_rate": 8.58734152975709e-07, "loss": 0.004, "step": 323940 }, { "epoch": 3.461189166087932, "grad_norm": 4.467347621917725, "learning_rate": 8.587224494433954e-07, "loss": 0.0099, "step": 323950 }, { "epoch": 3.461296009402212, "grad_norm": 7.608130931854248, "learning_rate": 8.587107455060571e-07, "loss": 0.0098, "step": 323960 }, { "epoch": 3.4614028527164913, "grad_norm": 1.1060236692428589, "learning_rate": 8.586990411637074e-07, "loss": 0.0112, "step": 323970 }, { "epoch": 3.4615096960307707, "grad_norm": 7.880506992340088, "learning_rate": 8.586873364163592e-07, "loss": 0.0167, "step": 323980 }, { "epoch": 3.4616165393450506, "grad_norm": 0.05210964381694794, "learning_rate": 8.586756312640259e-07, "loss": 0.019, "step": 323990 }, { "epoch": 3.46172338265933, "grad_norm": 0.38768571615219116, "learning_rate": 8.586639257067209e-07, "loss": 0.0039, "step": 324000 }, { "epoch": 3.4618302259736096, "grad_norm": 9.824197769165039, "learning_rate": 8.586522197444572e-07, "loss": 0.0259, "step": 324010 }, { "epoch": 3.4619370692878895, "grad_norm": 0.008253779262304306, "learning_rate": 8.586405133772481e-07, "loss": 0.0076, "step": 324020 }, { "epoch": 3.462043912602169, "grad_norm": 0.041823048144578934, "learning_rate": 8.586288066051068e-07, "loss": 0.0131, "step": 324030 }, { "epoch": 3.4621507559164484, "grad_norm": 0.17917029559612274, "learning_rate": 8.586170994280464e-07, "loss": 0.0028, "step": 324040 }, { "epoch": 3.4622575992307283, "grad_norm": 0.0016052352730184793, "learning_rate": 8.586053918460802e-07, "loss": 0.0072, "step": 324050 }, { "epoch": 3.4623644425450077, "grad_norm": 2.4419054985046387, "learning_rate": 8.585936838592217e-07, "loss": 0.0073, "step": 324060 }, { "epoch": 3.462471285859287, "grad_norm": 1.5893255472183228, "learning_rate": 8.585819754674837e-07, "loss": 0.0077, "step": 324070 }, { "epoch": 3.462578129173567, "grad_norm": 1.178341269493103, "learning_rate": 8.585702666708796e-07, "loss": 0.0193, "step": 324080 }, { "epoch": 3.4626849724878466, "grad_norm": 0.002792303217574954, "learning_rate": 8.585585574694227e-07, "loss": 0.0211, "step": 324090 }, { "epoch": 3.462791815802126, "grad_norm": 0.01967010088264942, "learning_rate": 8.585468478631261e-07, "loss": 0.0014, "step": 324100 }, { "epoch": 3.462898659116406, "grad_norm": 0.03798273578286171, "learning_rate": 8.585351378520029e-07, "loss": 0.0068, "step": 324110 }, { "epoch": 3.4630055024306854, "grad_norm": 0.09321320056915283, "learning_rate": 8.585234274360666e-07, "loss": 0.0009, "step": 324120 }, { "epoch": 3.463112345744965, "grad_norm": 0.028583794832229614, "learning_rate": 8.585117166153302e-07, "loss": 0.0154, "step": 324130 }, { "epoch": 3.4632191890592448, "grad_norm": 0.022407688200473785, "learning_rate": 8.58500005389807e-07, "loss": 0.0315, "step": 324140 }, { "epoch": 3.463326032373524, "grad_norm": 1.7402360439300537, "learning_rate": 8.584882937595104e-07, "loss": 0.0331, "step": 324150 }, { "epoch": 3.4634328756878037, "grad_norm": 0.001154582598246634, "learning_rate": 8.584765817244534e-07, "loss": 0.0063, "step": 324160 }, { "epoch": 3.4635397190020836, "grad_norm": 0.15554729104042053, "learning_rate": 8.584648692846492e-07, "loss": 0.0043, "step": 324170 }, { "epoch": 3.463646562316363, "grad_norm": 0.0024973242543637753, "learning_rate": 8.584531564401111e-07, "loss": 0.0091, "step": 324180 }, { "epoch": 3.4637534056306425, "grad_norm": 0.4529944658279419, "learning_rate": 8.584414431908524e-07, "loss": 0.069, "step": 324190 }, { "epoch": 3.4638602489449224, "grad_norm": 0.6331443190574646, "learning_rate": 8.584297295368863e-07, "loss": 0.0136, "step": 324200 }, { "epoch": 3.463967092259202, "grad_norm": 0.9794985055923462, "learning_rate": 8.584180154782259e-07, "loss": 0.0085, "step": 324210 }, { "epoch": 3.4640739355734813, "grad_norm": 0.10037100315093994, "learning_rate": 8.584063010148846e-07, "loss": 0.0071, "step": 324220 }, { "epoch": 3.464180778887761, "grad_norm": 1.2969833612442017, "learning_rate": 8.583945861468755e-07, "loss": 0.0059, "step": 324230 }, { "epoch": 3.4642876222020407, "grad_norm": 1.9433465003967285, "learning_rate": 8.583828708742118e-07, "loss": 0.0224, "step": 324240 }, { "epoch": 3.46439446551632, "grad_norm": 6.048355579376221, "learning_rate": 8.583711551969069e-07, "loss": 0.0306, "step": 324250 }, { "epoch": 3.4645013088306, "grad_norm": 2.0163235664367676, "learning_rate": 8.583594391149738e-07, "loss": 0.0186, "step": 324260 }, { "epoch": 3.4646081521448795, "grad_norm": 0.28052854537963867, "learning_rate": 8.583477226284258e-07, "loss": 0.0059, "step": 324270 }, { "epoch": 3.464714995459159, "grad_norm": 7.1842756271362305, "learning_rate": 8.583360057372763e-07, "loss": 0.0263, "step": 324280 }, { "epoch": 3.464821838773439, "grad_norm": 3.372450828552246, "learning_rate": 8.583242884415384e-07, "loss": 0.0456, "step": 324290 }, { "epoch": 3.4649286820877183, "grad_norm": 5.365302085876465, "learning_rate": 8.583125707412254e-07, "loss": 0.0291, "step": 324300 }, { "epoch": 3.4650355254019978, "grad_norm": 7.198385715484619, "learning_rate": 8.583008526363503e-07, "loss": 0.0198, "step": 324310 }, { "epoch": 3.4651423687162777, "grad_norm": 5.117124080657959, "learning_rate": 8.582891341269265e-07, "loss": 0.0266, "step": 324320 }, { "epoch": 3.465249212030557, "grad_norm": 0.11213028430938721, "learning_rate": 8.582774152129673e-07, "loss": 0.0027, "step": 324330 }, { "epoch": 3.465356055344837, "grad_norm": 2.6313741207122803, "learning_rate": 8.582656958944858e-07, "loss": 0.0178, "step": 324340 }, { "epoch": 3.4654628986591165, "grad_norm": 0.025469735264778137, "learning_rate": 8.582539761714953e-07, "loss": 0.0028, "step": 324350 }, { "epoch": 3.465569741973396, "grad_norm": 0.15805821120738983, "learning_rate": 8.582422560440092e-07, "loss": 0.0613, "step": 324360 }, { "epoch": 3.4656765852876754, "grad_norm": 0.017367994412779808, "learning_rate": 8.582305355120403e-07, "loss": 0.0115, "step": 324370 }, { "epoch": 3.4657834286019553, "grad_norm": 0.005820738151669502, "learning_rate": 8.582188145756021e-07, "loss": 0.0266, "step": 324380 }, { "epoch": 3.4658902719162348, "grad_norm": 0.018059415742754936, "learning_rate": 8.582070932347081e-07, "loss": 0.0136, "step": 324390 }, { "epoch": 3.4659971152305147, "grad_norm": 0.23318186402320862, "learning_rate": 8.58195371489371e-07, "loss": 0.0035, "step": 324400 }, { "epoch": 3.466103958544794, "grad_norm": 0.27731141448020935, "learning_rate": 8.581836493396044e-07, "loss": 0.0062, "step": 324410 }, { "epoch": 3.4662108018590736, "grad_norm": 0.005866907071322203, "learning_rate": 8.581719267854215e-07, "loss": 0.0064, "step": 324420 }, { "epoch": 3.466317645173353, "grad_norm": 2.5985686779022217, "learning_rate": 8.581602038268354e-07, "loss": 0.0009, "step": 324430 }, { "epoch": 3.466424488487633, "grad_norm": 11.114015579223633, "learning_rate": 8.581484804638593e-07, "loss": 0.0125, "step": 324440 }, { "epoch": 3.4665313318019124, "grad_norm": 0.17789651453495026, "learning_rate": 8.581367566965065e-07, "loss": 0.0125, "step": 324450 }, { "epoch": 3.4666381751161923, "grad_norm": 3.014772653579712, "learning_rate": 8.581250325247904e-07, "loss": 0.0016, "step": 324460 }, { "epoch": 3.466745018430472, "grad_norm": 0.010038481093943119, "learning_rate": 8.581133079487242e-07, "loss": 0.0085, "step": 324470 }, { "epoch": 3.4668518617447512, "grad_norm": 0.003458495484665036, "learning_rate": 8.581015829683209e-07, "loss": 0.0155, "step": 324480 }, { "epoch": 3.4669587050590307, "grad_norm": 0.08086554706096649, "learning_rate": 8.580898575835939e-07, "loss": 0.0092, "step": 324490 }, { "epoch": 3.4670655483733106, "grad_norm": 0.016797002404928207, "learning_rate": 8.580781317945565e-07, "loss": 0.0034, "step": 324500 }, { "epoch": 3.46717239168759, "grad_norm": 0.012468366883695126, "learning_rate": 8.580664056012216e-07, "loss": 0.0155, "step": 324510 }, { "epoch": 3.46727923500187, "grad_norm": 0.02412978932261467, "learning_rate": 8.58054679003603e-07, "loss": 0.0122, "step": 324520 }, { "epoch": 3.4673860783161494, "grad_norm": 1.3669319152832031, "learning_rate": 8.580429520017136e-07, "loss": 0.0077, "step": 324530 }, { "epoch": 3.467492921630429, "grad_norm": 3.449816942214966, "learning_rate": 8.580312245955665e-07, "loss": 0.0121, "step": 324540 }, { "epoch": 3.4675997649447083, "grad_norm": 0.013195902109146118, "learning_rate": 8.580194967851753e-07, "loss": 0.0028, "step": 324550 }, { "epoch": 3.4677066082589882, "grad_norm": 0.0056693339720368385, "learning_rate": 8.580077685705531e-07, "loss": 0.0444, "step": 324560 }, { "epoch": 3.4678134515732677, "grad_norm": 20.086429595947266, "learning_rate": 8.57996039951713e-07, "loss": 0.0557, "step": 324570 }, { "epoch": 3.4679202948875476, "grad_norm": 0.020913301035761833, "learning_rate": 8.579843109286684e-07, "loss": 0.0064, "step": 324580 }, { "epoch": 3.468027138201827, "grad_norm": 0.0038347183726727962, "learning_rate": 8.579725815014324e-07, "loss": 0.0053, "step": 324590 }, { "epoch": 3.4681339815161065, "grad_norm": 0.18035636842250824, "learning_rate": 8.579608516700186e-07, "loss": 0.0036, "step": 324600 }, { "epoch": 3.4682408248303864, "grad_norm": 0.6557418704032898, "learning_rate": 8.579491214344397e-07, "loss": 0.0139, "step": 324610 }, { "epoch": 3.468347668144666, "grad_norm": 0.013858010992407799, "learning_rate": 8.579373907947094e-07, "loss": 0.0036, "step": 324620 }, { "epoch": 3.4684545114589453, "grad_norm": 0.3013671338558197, "learning_rate": 8.579256597508406e-07, "loss": 0.0176, "step": 324630 }, { "epoch": 3.4685613547732252, "grad_norm": 0.5327568054199219, "learning_rate": 8.579139283028469e-07, "loss": 0.0015, "step": 324640 }, { "epoch": 3.4686681980875047, "grad_norm": 0.0189535953104496, "learning_rate": 8.579021964507413e-07, "loss": 0.0009, "step": 324650 }, { "epoch": 3.468775041401784, "grad_norm": 5.906684398651123, "learning_rate": 8.57890464194537e-07, "loss": 0.0455, "step": 324660 }, { "epoch": 3.468881884716064, "grad_norm": 4.7265191078186035, "learning_rate": 8.578787315342475e-07, "loss": 0.0124, "step": 324670 }, { "epoch": 3.4689887280303435, "grad_norm": 0.08029625564813614, "learning_rate": 8.578669984698857e-07, "loss": 0.0143, "step": 324680 }, { "epoch": 3.469095571344623, "grad_norm": 3.3800458908081055, "learning_rate": 8.578552650014652e-07, "loss": 0.0071, "step": 324690 }, { "epoch": 3.469202414658903, "grad_norm": 9.365565299987793, "learning_rate": 8.578435311289991e-07, "loss": 0.0269, "step": 324700 }, { "epoch": 3.4693092579731823, "grad_norm": 0.16459015011787415, "learning_rate": 8.578317968525007e-07, "loss": 0.0008, "step": 324710 }, { "epoch": 3.469416101287462, "grad_norm": 0.15818092226982117, "learning_rate": 8.578200621719831e-07, "loss": 0.0105, "step": 324720 }, { "epoch": 3.4695229446017417, "grad_norm": 3.5740461349487305, "learning_rate": 8.578083270874596e-07, "loss": 0.009, "step": 324730 }, { "epoch": 3.469629787916021, "grad_norm": 0.04846219718456268, "learning_rate": 8.577965915989435e-07, "loss": 0.0023, "step": 324740 }, { "epoch": 3.4697366312303006, "grad_norm": 7.377224922180176, "learning_rate": 8.577848557064482e-07, "loss": 0.0189, "step": 324750 }, { "epoch": 3.4698434745445805, "grad_norm": 0.03195818141102791, "learning_rate": 8.577731194099869e-07, "loss": 0.0282, "step": 324760 }, { "epoch": 3.46995031785886, "grad_norm": 6.219922065734863, "learning_rate": 8.577613827095725e-07, "loss": 0.0265, "step": 324770 }, { "epoch": 3.4700571611731394, "grad_norm": 0.026228180155158043, "learning_rate": 8.577496456052184e-07, "loss": 0.0147, "step": 324780 }, { "epoch": 3.4701640044874194, "grad_norm": 2.9427478313446045, "learning_rate": 8.577379080969381e-07, "loss": 0.0097, "step": 324790 }, { "epoch": 3.470270847801699, "grad_norm": 0.1268390715122223, "learning_rate": 8.577261701847447e-07, "loss": 0.002, "step": 324800 }, { "epoch": 3.4703776911159783, "grad_norm": 0.15991634130477905, "learning_rate": 8.577144318686516e-07, "loss": 0.0166, "step": 324810 }, { "epoch": 3.470484534430258, "grad_norm": 9.180164337158203, "learning_rate": 8.577026931486717e-07, "loss": 0.0341, "step": 324820 }, { "epoch": 3.4705913777445376, "grad_norm": 2.385399580001831, "learning_rate": 8.576909540248185e-07, "loss": 0.0112, "step": 324830 }, { "epoch": 3.470698221058817, "grad_norm": 0.01830143854022026, "learning_rate": 8.576792144971053e-07, "loss": 0.0242, "step": 324840 }, { "epoch": 3.470805064373097, "grad_norm": 0.056968748569488525, "learning_rate": 8.576674745655451e-07, "loss": 0.0091, "step": 324850 }, { "epoch": 3.4709119076873765, "grad_norm": 5.948450088500977, "learning_rate": 8.576557342301515e-07, "loss": 0.0171, "step": 324860 }, { "epoch": 3.471018751001656, "grad_norm": 0.10047503560781479, "learning_rate": 8.576439934909375e-07, "loss": 0.0353, "step": 324870 }, { "epoch": 3.471125594315936, "grad_norm": 0.025992371141910553, "learning_rate": 8.576322523479165e-07, "loss": 0.0107, "step": 324880 }, { "epoch": 3.4712324376302153, "grad_norm": 0.6606817245483398, "learning_rate": 8.576205108011017e-07, "loss": 0.0093, "step": 324890 }, { "epoch": 3.4713392809444947, "grad_norm": 1.6295357942581177, "learning_rate": 8.576087688505063e-07, "loss": 0.0099, "step": 324900 }, { "epoch": 3.4714461242587746, "grad_norm": 0.006413956172764301, "learning_rate": 8.575970264961436e-07, "loss": 0.0117, "step": 324910 }, { "epoch": 3.471552967573054, "grad_norm": 0.06866057217121124, "learning_rate": 8.575852837380268e-07, "loss": 0.0106, "step": 324920 }, { "epoch": 3.4716598108873336, "grad_norm": 0.13208051025867462, "learning_rate": 8.575735405761694e-07, "loss": 0.0122, "step": 324930 }, { "epoch": 3.4717666542016135, "grad_norm": 0.014427666552364826, "learning_rate": 8.575617970105844e-07, "loss": 0.0076, "step": 324940 }, { "epoch": 3.471873497515893, "grad_norm": 0.8330778479576111, "learning_rate": 8.575500530412852e-07, "loss": 0.0039, "step": 324950 }, { "epoch": 3.4719803408301724, "grad_norm": 0.7366318106651306, "learning_rate": 8.575383086682848e-07, "loss": 0.0019, "step": 324960 }, { "epoch": 3.4720871841444523, "grad_norm": 0.29995760321617126, "learning_rate": 8.575265638915968e-07, "loss": 0.0022, "step": 324970 }, { "epoch": 3.4721940274587317, "grad_norm": 0.006194510962814093, "learning_rate": 8.575148187112344e-07, "loss": 0.0339, "step": 324980 }, { "epoch": 3.472300870773011, "grad_norm": 0.024875076487660408, "learning_rate": 8.575030731272107e-07, "loss": 0.0064, "step": 324990 }, { "epoch": 3.472407714087291, "grad_norm": 8.44619083404541, "learning_rate": 8.574913271395391e-07, "loss": 0.0511, "step": 325000 }, { "epoch": 3.4725145574015706, "grad_norm": 9.469884872436523, "learning_rate": 8.574795807482328e-07, "loss": 0.0174, "step": 325010 }, { "epoch": 3.47262140071585, "grad_norm": 9.631103515625, "learning_rate": 8.57467833953305e-07, "loss": 0.01, "step": 325020 }, { "epoch": 3.47272824403013, "grad_norm": 1.9139541387557983, "learning_rate": 8.574560867547691e-07, "loss": 0.0246, "step": 325030 }, { "epoch": 3.4728350873444094, "grad_norm": 5.05626916885376, "learning_rate": 8.574443391526382e-07, "loss": 0.023, "step": 325040 }, { "epoch": 3.4729419306586893, "grad_norm": 0.25780192017555237, "learning_rate": 8.574325911469259e-07, "loss": 0.0097, "step": 325050 }, { "epoch": 3.4730487739729687, "grad_norm": 0.01626742072403431, "learning_rate": 8.57420842737645e-07, "loss": 0.0011, "step": 325060 }, { "epoch": 3.473155617287248, "grad_norm": 0.006325871683657169, "learning_rate": 8.574090939248091e-07, "loss": 0.0045, "step": 325070 }, { "epoch": 3.4732624606015277, "grad_norm": 0.023208696395158768, "learning_rate": 8.573973447084314e-07, "loss": 0.0155, "step": 325080 }, { "epoch": 3.4733693039158076, "grad_norm": 0.01636457070708275, "learning_rate": 8.573855950885252e-07, "loss": 0.0067, "step": 325090 }, { "epoch": 3.473476147230087, "grad_norm": 0.3347795307636261, "learning_rate": 8.573738450651033e-07, "loss": 0.0148, "step": 325100 }, { "epoch": 3.473582990544367, "grad_norm": 7.2826457023620605, "learning_rate": 8.573620946381797e-07, "loss": 0.0405, "step": 325110 }, { "epoch": 3.4736898338586464, "grad_norm": 0.48747968673706055, "learning_rate": 8.573503438077672e-07, "loss": 0.0208, "step": 325120 }, { "epoch": 3.473796677172926, "grad_norm": 0.3467097282409668, "learning_rate": 8.573385925738792e-07, "loss": 0.0034, "step": 325130 }, { "epoch": 3.4739035204872053, "grad_norm": 4.270261764526367, "learning_rate": 8.573268409365291e-07, "loss": 0.0088, "step": 325140 }, { "epoch": 3.474010363801485, "grad_norm": 0.10453406721353531, "learning_rate": 8.573150888957297e-07, "loss": 0.0049, "step": 325150 }, { "epoch": 3.4741172071157647, "grad_norm": 0.02741275355219841, "learning_rate": 8.57303336451495e-07, "loss": 0.0033, "step": 325160 }, { "epoch": 3.4742240504300446, "grad_norm": 0.01349618285894394, "learning_rate": 8.572915836038377e-07, "loss": 0.0052, "step": 325170 }, { "epoch": 3.474330893744324, "grad_norm": 1.9199241399765015, "learning_rate": 8.57279830352771e-07, "loss": 0.0034, "step": 325180 }, { "epoch": 3.4744377370586035, "grad_norm": 0.004674501251429319, "learning_rate": 8.572680766983087e-07, "loss": 0.0065, "step": 325190 }, { "epoch": 3.474544580372883, "grad_norm": 2.9400556087493896, "learning_rate": 8.572563226404636e-07, "loss": 0.0033, "step": 325200 }, { "epoch": 3.474651423687163, "grad_norm": 0.0051224688068032265, "learning_rate": 8.572445681792492e-07, "loss": 0.0031, "step": 325210 }, { "epoch": 3.4747582670014423, "grad_norm": 0.0019312340300530195, "learning_rate": 8.572328133146789e-07, "loss": 0.0017, "step": 325220 }, { "epoch": 3.474865110315722, "grad_norm": 0.00561892194673419, "learning_rate": 8.572210580467657e-07, "loss": 0.0028, "step": 325230 }, { "epoch": 3.4749719536300017, "grad_norm": 0.002248471835628152, "learning_rate": 8.572093023755229e-07, "loss": 0.0062, "step": 325240 }, { "epoch": 3.475078796944281, "grad_norm": 8.844935417175293, "learning_rate": 8.571975463009638e-07, "loss": 0.0159, "step": 325250 }, { "epoch": 3.4751856402585606, "grad_norm": 0.0038585118018090725, "learning_rate": 8.571857898231019e-07, "loss": 0.0072, "step": 325260 }, { "epoch": 3.4752924835728405, "grad_norm": 4.355685234069824, "learning_rate": 8.571740329419501e-07, "loss": 0.007, "step": 325270 }, { "epoch": 3.47539932688712, "grad_norm": 2.680314302444458, "learning_rate": 8.571622756575219e-07, "loss": 0.021, "step": 325280 }, { "epoch": 3.4755061702014, "grad_norm": 0.029595648869872093, "learning_rate": 8.571505179698305e-07, "loss": 0.0518, "step": 325290 }, { "epoch": 3.4756130135156793, "grad_norm": 0.0037877685390412807, "learning_rate": 8.571387598788893e-07, "loss": 0.0112, "step": 325300 }, { "epoch": 3.4757198568299588, "grad_norm": 0.013362741097807884, "learning_rate": 8.571270013847114e-07, "loss": 0.0353, "step": 325310 }, { "epoch": 3.4758267001442387, "grad_norm": 0.018848447129130363, "learning_rate": 8.571152424873103e-07, "loss": 0.0164, "step": 325320 }, { "epoch": 3.475933543458518, "grad_norm": 0.12161921709775925, "learning_rate": 8.57103483186699e-07, "loss": 0.0188, "step": 325330 }, { "epoch": 3.4760403867727976, "grad_norm": 5.342118740081787, "learning_rate": 8.570917234828909e-07, "loss": 0.0175, "step": 325340 }, { "epoch": 3.4761472300870775, "grad_norm": 1.2488093376159668, "learning_rate": 8.570799633758994e-07, "loss": 0.0085, "step": 325350 }, { "epoch": 3.476254073401357, "grad_norm": 0.0017384856473654509, "learning_rate": 8.570682028657376e-07, "loss": 0.0105, "step": 325360 }, { "epoch": 3.4763609167156364, "grad_norm": 2.402144193649292, "learning_rate": 8.570564419524189e-07, "loss": 0.0177, "step": 325370 }, { "epoch": 3.4764677600299163, "grad_norm": 0.13731001317501068, "learning_rate": 8.570446806359565e-07, "loss": 0.018, "step": 325380 }, { "epoch": 3.4765746033441958, "grad_norm": 3.7045202255249023, "learning_rate": 8.570329189163636e-07, "loss": 0.015, "step": 325390 }, { "epoch": 3.4766814466584752, "grad_norm": 0.008511551655828953, "learning_rate": 8.570211567936539e-07, "loss": 0.0072, "step": 325400 }, { "epoch": 3.476788289972755, "grad_norm": 0.10996950417757034, "learning_rate": 8.570093942678401e-07, "loss": 0.0052, "step": 325410 }, { "epoch": 3.4768951332870346, "grad_norm": 1.411926507949829, "learning_rate": 8.569976313389358e-07, "loss": 0.011, "step": 325420 }, { "epoch": 3.477001976601314, "grad_norm": 1.7895305156707764, "learning_rate": 8.569858680069541e-07, "loss": 0.0154, "step": 325430 }, { "epoch": 3.477108819915594, "grad_norm": 9.206497192382812, "learning_rate": 8.569741042719087e-07, "loss": 0.0211, "step": 325440 }, { "epoch": 3.4772156632298734, "grad_norm": 2.914255142211914, "learning_rate": 8.569623401338124e-07, "loss": 0.0038, "step": 325450 }, { "epoch": 3.477322506544153, "grad_norm": 0.38382086157798767, "learning_rate": 8.569505755926787e-07, "loss": 0.0016, "step": 325460 }, { "epoch": 3.4774293498584328, "grad_norm": 0.4623714089393616, "learning_rate": 8.569388106485209e-07, "loss": 0.0086, "step": 325470 }, { "epoch": 3.4775361931727122, "grad_norm": 18.83072280883789, "learning_rate": 8.569270453013522e-07, "loss": 0.0367, "step": 325480 }, { "epoch": 3.4776430364869917, "grad_norm": 13.712745666503906, "learning_rate": 8.56915279551186e-07, "loss": 0.0073, "step": 325490 }, { "epoch": 3.4777498798012716, "grad_norm": 0.5273593068122864, "learning_rate": 8.569035133980354e-07, "loss": 0.0024, "step": 325500 }, { "epoch": 3.477856723115551, "grad_norm": 0.4589732587337494, "learning_rate": 8.568917468419139e-07, "loss": 0.0135, "step": 325510 }, { "epoch": 3.4779635664298305, "grad_norm": 13.31421184539795, "learning_rate": 8.568799798828347e-07, "loss": 0.0242, "step": 325520 }, { "epoch": 3.4780704097441104, "grad_norm": 0.8535768389701843, "learning_rate": 8.568682125208111e-07, "loss": 0.0105, "step": 325530 }, { "epoch": 3.47817725305839, "grad_norm": 0.07584177702665329, "learning_rate": 8.568564447558562e-07, "loss": 0.0396, "step": 325540 }, { "epoch": 3.4782840963726693, "grad_norm": 0.7790386080741882, "learning_rate": 8.568446765879835e-07, "loss": 0.0266, "step": 325550 }, { "epoch": 3.4783909396869492, "grad_norm": 4.193333148956299, "learning_rate": 8.568329080172064e-07, "loss": 0.0199, "step": 325560 }, { "epoch": 3.4784977830012287, "grad_norm": 0.0007843755884096026, "learning_rate": 8.568211390435377e-07, "loss": 0.0119, "step": 325570 }, { "epoch": 3.478604626315508, "grad_norm": 0.29391545057296753, "learning_rate": 8.568093696669911e-07, "loss": 0.0139, "step": 325580 }, { "epoch": 3.478711469629788, "grad_norm": 0.017831696197390556, "learning_rate": 8.567975998875799e-07, "loss": 0.0028, "step": 325590 }, { "epoch": 3.4788183129440675, "grad_norm": 2.3988609313964844, "learning_rate": 8.567858297053172e-07, "loss": 0.011, "step": 325600 }, { "epoch": 3.478925156258347, "grad_norm": 0.0031568666454404593, "learning_rate": 8.567740591202163e-07, "loss": 0.0093, "step": 325610 }, { "epoch": 3.479031999572627, "grad_norm": 0.008118213154375553, "learning_rate": 8.567622881322907e-07, "loss": 0.0024, "step": 325620 }, { "epoch": 3.4791388428869063, "grad_norm": 3.0073904991149902, "learning_rate": 8.567505167415536e-07, "loss": 0.0134, "step": 325630 }, { "epoch": 3.479245686201186, "grad_norm": 5.957433223724365, "learning_rate": 8.567387449480182e-07, "loss": 0.0049, "step": 325640 }, { "epoch": 3.4793525295154657, "grad_norm": 0.044168367981910706, "learning_rate": 8.567269727516977e-07, "loss": 0.0078, "step": 325650 }, { "epoch": 3.479459372829745, "grad_norm": 14.818114280700684, "learning_rate": 8.567152001526057e-07, "loss": 0.0502, "step": 325660 }, { "epoch": 3.4795662161440246, "grad_norm": 1.8454926013946533, "learning_rate": 8.567034271507552e-07, "loss": 0.0115, "step": 325670 }, { "epoch": 3.4796730594583045, "grad_norm": 0.02423747256398201, "learning_rate": 8.566916537461596e-07, "loss": 0.017, "step": 325680 }, { "epoch": 3.479779902772584, "grad_norm": 0.29398778080940247, "learning_rate": 8.566798799388322e-07, "loss": 0.0024, "step": 325690 }, { "epoch": 3.4798867460868634, "grad_norm": 4.647609710693359, "learning_rate": 8.566681057287863e-07, "loss": 0.0017, "step": 325700 }, { "epoch": 3.4799935894011433, "grad_norm": 1.2579452991485596, "learning_rate": 8.566563311160353e-07, "loss": 0.0243, "step": 325710 }, { "epoch": 3.480100432715423, "grad_norm": 12.639379501342773, "learning_rate": 8.566445561005923e-07, "loss": 0.012, "step": 325720 }, { "epoch": 3.4802072760297023, "grad_norm": 2.6180505752563477, "learning_rate": 8.566327806824707e-07, "loss": 0.0371, "step": 325730 }, { "epoch": 3.480314119343982, "grad_norm": 0.011017652228474617, "learning_rate": 8.566210048616838e-07, "loss": 0.0272, "step": 325740 }, { "epoch": 3.4804209626582616, "grad_norm": 7.7419843673706055, "learning_rate": 8.566092286382448e-07, "loss": 0.0208, "step": 325750 }, { "epoch": 3.480527805972541, "grad_norm": 3.0387251377105713, "learning_rate": 8.56597452012167e-07, "loss": 0.0205, "step": 325760 }, { "epoch": 3.480634649286821, "grad_norm": 0.9152665138244629, "learning_rate": 8.565856749834639e-07, "loss": 0.0018, "step": 325770 }, { "epoch": 3.4807414926011004, "grad_norm": 0.08474214375019073, "learning_rate": 8.565738975521486e-07, "loss": 0.0068, "step": 325780 }, { "epoch": 3.48084833591538, "grad_norm": 0.016938993707299232, "learning_rate": 8.565621197182345e-07, "loss": 0.0177, "step": 325790 }, { "epoch": 3.48095517922966, "grad_norm": 0.00605398416519165, "learning_rate": 8.565503414817349e-07, "loss": 0.0239, "step": 325800 }, { "epoch": 3.4810620225439393, "grad_norm": 0.02045242302119732, "learning_rate": 8.565385628426629e-07, "loss": 0.0064, "step": 325810 }, { "epoch": 3.481168865858219, "grad_norm": 0.011833677999675274, "learning_rate": 8.565267838010321e-07, "loss": 0.0276, "step": 325820 }, { "epoch": 3.4812757091724986, "grad_norm": 0.03169165924191475, "learning_rate": 8.565150043568556e-07, "loss": 0.0226, "step": 325830 }, { "epoch": 3.481382552486778, "grad_norm": 17.403202056884766, "learning_rate": 8.565032245101467e-07, "loss": 0.026, "step": 325840 }, { "epoch": 3.4814893958010575, "grad_norm": 42.13918685913086, "learning_rate": 8.564914442609188e-07, "loss": 0.0454, "step": 325850 }, { "epoch": 3.4815962391153374, "grad_norm": 0.0005027982988394797, "learning_rate": 8.564796636091852e-07, "loss": 0.0228, "step": 325860 }, { "epoch": 3.481703082429617, "grad_norm": 0.8400679230690002, "learning_rate": 8.564678825549592e-07, "loss": 0.027, "step": 325870 }, { "epoch": 3.481809925743897, "grad_norm": 1.96372652053833, "learning_rate": 8.564561010982539e-07, "loss": 0.0095, "step": 325880 }, { "epoch": 3.4819167690581763, "grad_norm": 0.5858327150344849, "learning_rate": 8.564443192390828e-07, "loss": 0.0103, "step": 325890 }, { "epoch": 3.4820236123724557, "grad_norm": 0.00347415404394269, "learning_rate": 8.564325369774592e-07, "loss": 0.0101, "step": 325900 }, { "epoch": 3.482130455686735, "grad_norm": 0.01111495029181242, "learning_rate": 8.564207543133963e-07, "loss": 0.0234, "step": 325910 }, { "epoch": 3.482237299001015, "grad_norm": 0.7809340357780457, "learning_rate": 8.564089712469076e-07, "loss": 0.0168, "step": 325920 }, { "epoch": 3.4823441423152945, "grad_norm": 0.5543100833892822, "learning_rate": 8.56397187778006e-07, "loss": 0.0056, "step": 325930 }, { "epoch": 3.4824509856295744, "grad_norm": 2.0506527423858643, "learning_rate": 8.563854039067053e-07, "loss": 0.0435, "step": 325940 }, { "epoch": 3.482557828943854, "grad_norm": 0.01058416161686182, "learning_rate": 8.563736196330187e-07, "loss": 0.0083, "step": 325950 }, { "epoch": 3.4826646722581334, "grad_norm": 1.6539027690887451, "learning_rate": 8.563618349569592e-07, "loss": 0.0118, "step": 325960 }, { "epoch": 3.482771515572413, "grad_norm": 0.6411159634590149, "learning_rate": 8.563500498785403e-07, "loss": 0.0121, "step": 325970 }, { "epoch": 3.4828783588866927, "grad_norm": 0.05487352982163429, "learning_rate": 8.563382643977753e-07, "loss": 0.0642, "step": 325980 }, { "epoch": 3.482985202200972, "grad_norm": 0.010126196779310703, "learning_rate": 8.563264785146775e-07, "loss": 0.0004, "step": 325990 }, { "epoch": 3.483092045515252, "grad_norm": 7.653385162353516, "learning_rate": 8.563146922292604e-07, "loss": 0.042, "step": 326000 }, { "epoch": 3.4831988888295315, "grad_norm": 0.023172559216618538, "learning_rate": 8.56302905541537e-07, "loss": 0.0313, "step": 326010 }, { "epoch": 3.483305732143811, "grad_norm": 0.007363733369857073, "learning_rate": 8.562911184515206e-07, "loss": 0.0466, "step": 326020 }, { "epoch": 3.4834125754580905, "grad_norm": 1.3812134265899658, "learning_rate": 8.562793309592246e-07, "loss": 0.0048, "step": 326030 }, { "epoch": 3.4835194187723704, "grad_norm": 0.15976256132125854, "learning_rate": 8.562675430646627e-07, "loss": 0.0116, "step": 326040 }, { "epoch": 3.48362626208665, "grad_norm": 0.018660377711057663, "learning_rate": 8.562557547678475e-07, "loss": 0.0206, "step": 326050 }, { "epoch": 3.4837331054009297, "grad_norm": 1.8400195837020874, "learning_rate": 8.562439660687928e-07, "loss": 0.0105, "step": 326060 }, { "epoch": 3.483839948715209, "grad_norm": 0.13746297359466553, "learning_rate": 8.562321769675118e-07, "loss": 0.0021, "step": 326070 }, { "epoch": 3.4839467920294886, "grad_norm": 0.3949669897556305, "learning_rate": 8.562203874640177e-07, "loss": 0.0055, "step": 326080 }, { "epoch": 3.4840536353437686, "grad_norm": 1.247578740119934, "learning_rate": 8.56208597558324e-07, "loss": 0.0311, "step": 326090 }, { "epoch": 3.484160478658048, "grad_norm": 0.6058580875396729, "learning_rate": 8.561968072504438e-07, "loss": 0.004, "step": 326100 }, { "epoch": 3.4842673219723275, "grad_norm": 0.04109000787138939, "learning_rate": 8.561850165403906e-07, "loss": 0.0077, "step": 326110 }, { "epoch": 3.4843741652866074, "grad_norm": 5.723123073577881, "learning_rate": 8.561732254281777e-07, "loss": 0.0106, "step": 326120 }, { "epoch": 3.484481008600887, "grad_norm": 0.04741407558321953, "learning_rate": 8.561614339138182e-07, "loss": 0.0051, "step": 326130 }, { "epoch": 3.4845878519151663, "grad_norm": 0.003723272355273366, "learning_rate": 8.561496419973256e-07, "loss": 0.0048, "step": 326140 }, { "epoch": 3.484694695229446, "grad_norm": 0.271821528673172, "learning_rate": 8.561378496787132e-07, "loss": 0.0137, "step": 326150 }, { "epoch": 3.4848015385437257, "grad_norm": 0.03521367534995079, "learning_rate": 8.561260569579945e-07, "loss": 0.0159, "step": 326160 }, { "epoch": 3.484908381858005, "grad_norm": 0.004969694651663303, "learning_rate": 8.561142638351822e-07, "loss": 0.0091, "step": 326170 }, { "epoch": 3.485015225172285, "grad_norm": 0.002068120054900646, "learning_rate": 8.561024703102903e-07, "loss": 0.0007, "step": 326180 }, { "epoch": 3.4851220684865645, "grad_norm": 11.068180084228516, "learning_rate": 8.560906763833318e-07, "loss": 0.0212, "step": 326190 }, { "epoch": 3.485228911800844, "grad_norm": 5.892841815948486, "learning_rate": 8.560788820543199e-07, "loss": 0.0145, "step": 326200 }, { "epoch": 3.485335755115124, "grad_norm": 0.04630771279335022, "learning_rate": 8.560670873232683e-07, "loss": 0.0063, "step": 326210 }, { "epoch": 3.4854425984294033, "grad_norm": 2.4028613567352295, "learning_rate": 8.560552921901898e-07, "loss": 0.0044, "step": 326220 }, { "epoch": 3.4855494417436828, "grad_norm": 0.0058909994550049305, "learning_rate": 8.560434966550982e-07, "loss": 0.0095, "step": 326230 }, { "epoch": 3.4856562850579627, "grad_norm": 0.004938861355185509, "learning_rate": 8.560317007180065e-07, "loss": 0.0041, "step": 326240 }, { "epoch": 3.485763128372242, "grad_norm": 6.393123149871826, "learning_rate": 8.560199043789283e-07, "loss": 0.0315, "step": 326250 }, { "epoch": 3.4858699716865216, "grad_norm": 0.00402448745444417, "learning_rate": 8.560081076378767e-07, "loss": 0.0066, "step": 326260 }, { "epoch": 3.4859768150008015, "grad_norm": 0.0016666370211169124, "learning_rate": 8.55996310494865e-07, "loss": 0.0107, "step": 326270 }, { "epoch": 3.486083658315081, "grad_norm": 0.005215729121118784, "learning_rate": 8.559845129499066e-07, "loss": 0.0177, "step": 326280 }, { "epoch": 3.4861905016293604, "grad_norm": 4.249370574951172, "learning_rate": 8.55972715003015e-07, "loss": 0.0042, "step": 326290 }, { "epoch": 3.4862973449436403, "grad_norm": 0.29045370221138, "learning_rate": 8.559609166542031e-07, "loss": 0.0096, "step": 326300 }, { "epoch": 3.4864041882579198, "grad_norm": 0.07438111305236816, "learning_rate": 8.559491179034845e-07, "loss": 0.0136, "step": 326310 }, { "epoch": 3.486511031572199, "grad_norm": 0.03958044946193695, "learning_rate": 8.559373187508725e-07, "loss": 0.0065, "step": 326320 }, { "epoch": 3.486617874886479, "grad_norm": 0.2775174379348755, "learning_rate": 8.559255191963805e-07, "loss": 0.009, "step": 326330 }, { "epoch": 3.4867247182007586, "grad_norm": 11.192275047302246, "learning_rate": 8.559137192400216e-07, "loss": 0.0215, "step": 326340 }, { "epoch": 3.486831561515038, "grad_norm": 0.5690748691558838, "learning_rate": 8.559019188818094e-07, "loss": 0.0445, "step": 326350 }, { "epoch": 3.486938404829318, "grad_norm": 0.22255341708660126, "learning_rate": 8.55890118121757e-07, "loss": 0.0063, "step": 326360 }, { "epoch": 3.4870452481435974, "grad_norm": 0.635025143623352, "learning_rate": 8.558783169598778e-07, "loss": 0.0093, "step": 326370 }, { "epoch": 3.487152091457877, "grad_norm": 1.073254942893982, "learning_rate": 8.558665153961851e-07, "loss": 0.014, "step": 326380 }, { "epoch": 3.4872589347721568, "grad_norm": 2.367227792739868, "learning_rate": 8.558547134306922e-07, "loss": 0.0194, "step": 326390 }, { "epoch": 3.487365778086436, "grad_norm": 0.2141282707452774, "learning_rate": 8.558429110634126e-07, "loss": 0.0577, "step": 326400 }, { "epoch": 3.4874726214007157, "grad_norm": 3.2197682857513428, "learning_rate": 8.558311082943594e-07, "loss": 0.0434, "step": 326410 }, { "epoch": 3.4875794647149956, "grad_norm": 2.704235315322876, "learning_rate": 8.558193051235461e-07, "loss": 0.014, "step": 326420 }, { "epoch": 3.487686308029275, "grad_norm": 15.183966636657715, "learning_rate": 8.558075015509859e-07, "loss": 0.005, "step": 326430 }, { "epoch": 3.4877931513435545, "grad_norm": 0.008201681077480316, "learning_rate": 8.557956975766921e-07, "loss": 0.0098, "step": 326440 }, { "epoch": 3.4878999946578344, "grad_norm": 7.486539363861084, "learning_rate": 8.557838932006782e-07, "loss": 0.0153, "step": 326450 }, { "epoch": 3.488006837972114, "grad_norm": 0.0009809235343709588, "learning_rate": 8.557720884229574e-07, "loss": 0.0094, "step": 326460 }, { "epoch": 3.4881136812863933, "grad_norm": 8.630739212036133, "learning_rate": 8.557602832435431e-07, "loss": 0.0107, "step": 326470 }, { "epoch": 3.4882205246006732, "grad_norm": 4.96387243270874, "learning_rate": 8.557484776624485e-07, "loss": 0.0456, "step": 326480 }, { "epoch": 3.4883273679149527, "grad_norm": 1.1168957948684692, "learning_rate": 8.557366716796871e-07, "loss": 0.002, "step": 326490 }, { "epoch": 3.488434211229232, "grad_norm": 0.3132171332836151, "learning_rate": 8.557248652952722e-07, "loss": 0.0118, "step": 326500 }, { "epoch": 3.488541054543512, "grad_norm": 0.01582934334874153, "learning_rate": 8.557130585092169e-07, "loss": 0.0187, "step": 326510 }, { "epoch": 3.4886478978577915, "grad_norm": 0.1670914739370346, "learning_rate": 8.557012513215349e-07, "loss": 0.034, "step": 326520 }, { "epoch": 3.4887547411720714, "grad_norm": 2.784339189529419, "learning_rate": 8.556894437322393e-07, "loss": 0.0267, "step": 326530 }, { "epoch": 3.488861584486351, "grad_norm": 0.0012817992828786373, "learning_rate": 8.556776357413433e-07, "loss": 0.0093, "step": 326540 }, { "epoch": 3.4889684278006303, "grad_norm": 0.0014629593351855874, "learning_rate": 8.556658273488606e-07, "loss": 0.0088, "step": 326550 }, { "epoch": 3.48907527111491, "grad_norm": 0.2823401391506195, "learning_rate": 8.556540185548042e-07, "loss": 0.0164, "step": 326560 }, { "epoch": 3.4891821144291897, "grad_norm": 3.198338747024536, "learning_rate": 8.556422093591877e-07, "loss": 0.0037, "step": 326570 }, { "epoch": 3.489288957743469, "grad_norm": 0.11019682139158249, "learning_rate": 8.556303997620243e-07, "loss": 0.0039, "step": 326580 }, { "epoch": 3.489395801057749, "grad_norm": 0.0031396278645843267, "learning_rate": 8.556185897633272e-07, "loss": 0.0056, "step": 326590 }, { "epoch": 3.4895026443720285, "grad_norm": 1.1742217540740967, "learning_rate": 8.5560677936311e-07, "loss": 0.0115, "step": 326600 }, { "epoch": 3.489609487686308, "grad_norm": 0.16903309524059296, "learning_rate": 8.555949685613859e-07, "loss": 0.0028, "step": 326610 }, { "epoch": 3.4897163310005874, "grad_norm": 0.1576705276966095, "learning_rate": 8.555831573581682e-07, "loss": 0.0081, "step": 326620 }, { "epoch": 3.4898231743148673, "grad_norm": 0.013114426285028458, "learning_rate": 8.555713457534704e-07, "loss": 0.0098, "step": 326630 }, { "epoch": 3.489930017629147, "grad_norm": 0.033323485404253006, "learning_rate": 8.555595337473054e-07, "loss": 0.0105, "step": 326640 }, { "epoch": 3.4900368609434267, "grad_norm": 1.2175488471984863, "learning_rate": 8.555477213396871e-07, "loss": 0.0103, "step": 326650 }, { "epoch": 3.490143704257706, "grad_norm": 4.620560169219971, "learning_rate": 8.555359085306285e-07, "loss": 0.004, "step": 326660 }, { "epoch": 3.4902505475719856, "grad_norm": 0.34844323992729187, "learning_rate": 8.555240953201431e-07, "loss": 0.0172, "step": 326670 }, { "epoch": 3.490357390886265, "grad_norm": 4.69167947769165, "learning_rate": 8.55512281708244e-07, "loss": 0.0082, "step": 326680 }, { "epoch": 3.490464234200545, "grad_norm": 0.008752438239753246, "learning_rate": 8.55500467694945e-07, "loss": 0.0113, "step": 326690 }, { "epoch": 3.4905710775148244, "grad_norm": 0.04794682562351227, "learning_rate": 8.55488653280259e-07, "loss": 0.0035, "step": 326700 }, { "epoch": 3.4906779208291043, "grad_norm": 0.0021682805381715298, "learning_rate": 8.554768384641994e-07, "loss": 0.0005, "step": 326710 }, { "epoch": 3.490784764143384, "grad_norm": 1.8175233602523804, "learning_rate": 8.554650232467797e-07, "loss": 0.0061, "step": 326720 }, { "epoch": 3.4908916074576632, "grad_norm": 1.0571376085281372, "learning_rate": 8.554532076280131e-07, "loss": 0.0046, "step": 326730 }, { "epoch": 3.4909984507719427, "grad_norm": 0.0009870963403955102, "learning_rate": 8.554413916079131e-07, "loss": 0.0062, "step": 326740 }, { "epoch": 3.4911052940862226, "grad_norm": 0.010738288052380085, "learning_rate": 8.554295751864929e-07, "loss": 0.0104, "step": 326750 }, { "epoch": 3.491212137400502, "grad_norm": 0.06598683446645737, "learning_rate": 8.554177583637659e-07, "loss": 0.0249, "step": 326760 }, { "epoch": 3.491318980714782, "grad_norm": 0.08280876278877258, "learning_rate": 8.554059411397452e-07, "loss": 0.0089, "step": 326770 }, { "epoch": 3.4914258240290614, "grad_norm": 0.011290650814771652, "learning_rate": 8.553941235144446e-07, "loss": 0.0191, "step": 326780 }, { "epoch": 3.491532667343341, "grad_norm": 0.09934743493795395, "learning_rate": 8.553823054878773e-07, "loss": 0.0147, "step": 326790 }, { "epoch": 3.491639510657621, "grad_norm": 0.05052962154150009, "learning_rate": 8.553704870600563e-07, "loss": 0.0221, "step": 326800 }, { "epoch": 3.4917463539719003, "grad_norm": 1.4107681512832642, "learning_rate": 8.553586682309953e-07, "loss": 0.0043, "step": 326810 }, { "epoch": 3.4918531972861797, "grad_norm": 0.024960899725556374, "learning_rate": 8.553468490007076e-07, "loss": 0.0098, "step": 326820 }, { "epoch": 3.4919600406004596, "grad_norm": 0.02317955158650875, "learning_rate": 8.553350293692065e-07, "loss": 0.0258, "step": 326830 }, { "epoch": 3.492066883914739, "grad_norm": 8.31923770904541, "learning_rate": 8.553232093365052e-07, "loss": 0.0141, "step": 326840 }, { "epoch": 3.4921737272290185, "grad_norm": 0.8643962740898132, "learning_rate": 8.553113889026174e-07, "loss": 0.0016, "step": 326850 }, { "epoch": 3.4922805705432984, "grad_norm": 0.0011690829414874315, "learning_rate": 8.552995680675561e-07, "loss": 0.0043, "step": 326860 }, { "epoch": 3.492387413857578, "grad_norm": 1.5153707265853882, "learning_rate": 8.552877468313347e-07, "loss": 0.008, "step": 326870 }, { "epoch": 3.4924942571718574, "grad_norm": 7.5979180335998535, "learning_rate": 8.552759251939666e-07, "loss": 0.0032, "step": 326880 }, { "epoch": 3.4926011004861373, "grad_norm": 0.021775051951408386, "learning_rate": 8.552641031554653e-07, "loss": 0.0016, "step": 326890 }, { "epoch": 3.4927079438004167, "grad_norm": 3.7428524494171143, "learning_rate": 8.55252280715844e-07, "loss": 0.0117, "step": 326900 }, { "epoch": 3.492814787114696, "grad_norm": 2.1036295890808105, "learning_rate": 8.55240457875116e-07, "loss": 0.004, "step": 326910 }, { "epoch": 3.492921630428976, "grad_norm": 5.831796646118164, "learning_rate": 8.552286346332948e-07, "loss": 0.0268, "step": 326920 }, { "epoch": 3.4930284737432555, "grad_norm": 0.3845426142215729, "learning_rate": 8.552168109903936e-07, "loss": 0.0448, "step": 326930 }, { "epoch": 3.493135317057535, "grad_norm": 0.022409876808524132, "learning_rate": 8.552049869464258e-07, "loss": 0.0083, "step": 326940 }, { "epoch": 3.493242160371815, "grad_norm": 0.10039794445037842, "learning_rate": 8.551931625014047e-07, "loss": 0.0068, "step": 326950 }, { "epoch": 3.4933490036860944, "grad_norm": 0.0339026004076004, "learning_rate": 8.551813376553438e-07, "loss": 0.0218, "step": 326960 }, { "epoch": 3.493455847000374, "grad_norm": 1.9767074584960938, "learning_rate": 8.551695124082564e-07, "loss": 0.0058, "step": 326970 }, { "epoch": 3.4935626903146537, "grad_norm": 0.08898217231035233, "learning_rate": 8.551576867601558e-07, "loss": 0.0068, "step": 326980 }, { "epoch": 3.493669533628933, "grad_norm": 27.30243682861328, "learning_rate": 8.551458607110554e-07, "loss": 0.0122, "step": 326990 }, { "epoch": 3.4937763769432126, "grad_norm": 0.009634013287723064, "learning_rate": 8.551340342609683e-07, "loss": 0.0007, "step": 327000 }, { "epoch": 3.4938832202574925, "grad_norm": 0.01021616905927658, "learning_rate": 8.551222074099082e-07, "loss": 0.0162, "step": 327010 }, { "epoch": 3.493990063571772, "grad_norm": 0.07885957509279251, "learning_rate": 8.551103801578883e-07, "loss": 0.0261, "step": 327020 }, { "epoch": 3.4940969068860515, "grad_norm": 0.1904410868883133, "learning_rate": 8.550985525049222e-07, "loss": 0.0026, "step": 327030 }, { "epoch": 3.4942037502003314, "grad_norm": 0.019963432103395462, "learning_rate": 8.550867244510227e-07, "loss": 0.0241, "step": 327040 }, { "epoch": 3.494310593514611, "grad_norm": 1.956809401512146, "learning_rate": 8.550748959962038e-07, "loss": 0.0182, "step": 327050 }, { "epoch": 3.4944174368288903, "grad_norm": 0.05159853771328926, "learning_rate": 8.550630671404782e-07, "loss": 0.006, "step": 327060 }, { "epoch": 3.49452428014317, "grad_norm": 1.552056908607483, "learning_rate": 8.550512378838599e-07, "loss": 0.016, "step": 327070 }, { "epoch": 3.4946311234574496, "grad_norm": 7.943794250488281, "learning_rate": 8.550394082263619e-07, "loss": 0.0235, "step": 327080 }, { "epoch": 3.494737966771729, "grad_norm": 7.165302753448486, "learning_rate": 8.550275781679974e-07, "loss": 0.0705, "step": 327090 }, { "epoch": 3.494844810086009, "grad_norm": 1.0165199041366577, "learning_rate": 8.550157477087802e-07, "loss": 0.0308, "step": 327100 }, { "epoch": 3.4949516534002885, "grad_norm": 2.917085886001587, "learning_rate": 8.550039168487234e-07, "loss": 0.0277, "step": 327110 }, { "epoch": 3.495058496714568, "grad_norm": 0.744920015335083, "learning_rate": 8.549920855878403e-07, "loss": 0.0015, "step": 327120 }, { "epoch": 3.495165340028848, "grad_norm": 0.10346271842718124, "learning_rate": 8.549802539261444e-07, "loss": 0.0276, "step": 327130 }, { "epoch": 3.4952721833431273, "grad_norm": 0.027344297617673874, "learning_rate": 8.549684218636491e-07, "loss": 0.0288, "step": 327140 }, { "epoch": 3.4953790266574067, "grad_norm": 0.10594501346349716, "learning_rate": 8.549565894003674e-07, "loss": 0.0004, "step": 327150 }, { "epoch": 3.4954858699716866, "grad_norm": 1.5697306394577026, "learning_rate": 8.54944756536313e-07, "loss": 0.0101, "step": 327160 }, { "epoch": 3.495592713285966, "grad_norm": 0.17190097272396088, "learning_rate": 8.549329232714993e-07, "loss": 0.0107, "step": 327170 }, { "epoch": 3.4956995566002456, "grad_norm": 0.009653500281274319, "learning_rate": 8.549210896059395e-07, "loss": 0.012, "step": 327180 }, { "epoch": 3.4958063999145255, "grad_norm": 0.2808026969432831, "learning_rate": 8.54909255539647e-07, "loss": 0.0019, "step": 327190 }, { "epoch": 3.495913243228805, "grad_norm": 0.027579588815569878, "learning_rate": 8.548974210726352e-07, "loss": 0.0108, "step": 327200 }, { "epoch": 3.4960200865430844, "grad_norm": 5.876904487609863, "learning_rate": 8.548855862049174e-07, "loss": 0.0294, "step": 327210 }, { "epoch": 3.4961269298573643, "grad_norm": 3.1831047534942627, "learning_rate": 8.54873750936507e-07, "loss": 0.007, "step": 327220 }, { "epoch": 3.4962337731716437, "grad_norm": 4.566915512084961, "learning_rate": 8.548619152674173e-07, "loss": 0.0169, "step": 327230 }, { "epoch": 3.496340616485923, "grad_norm": 5.591542720794678, "learning_rate": 8.548500791976616e-07, "loss": 0.0078, "step": 327240 }, { "epoch": 3.496447459800203, "grad_norm": 0.01002529077231884, "learning_rate": 8.548382427272536e-07, "loss": 0.01, "step": 327250 }, { "epoch": 3.4965543031144826, "grad_norm": 1.4937323331832886, "learning_rate": 8.548264058562063e-07, "loss": 0.0116, "step": 327260 }, { "epoch": 3.496661146428762, "grad_norm": 0.006265370175242424, "learning_rate": 8.548145685845334e-07, "loss": 0.0219, "step": 327270 }, { "epoch": 3.496767989743042, "grad_norm": 0.0641586184501648, "learning_rate": 8.548027309122478e-07, "loss": 0.004, "step": 327280 }, { "epoch": 3.4968748330573214, "grad_norm": 1.8667387962341309, "learning_rate": 8.547908928393633e-07, "loss": 0.0099, "step": 327290 }, { "epoch": 3.4969816763716013, "grad_norm": 0.0034834728576242924, "learning_rate": 8.547790543658931e-07, "loss": 0.0184, "step": 327300 }, { "epoch": 3.4970885196858807, "grad_norm": 0.003623149823397398, "learning_rate": 8.547672154918506e-07, "loss": 0.0528, "step": 327310 }, { "epoch": 3.49719536300016, "grad_norm": 1.7754571437835693, "learning_rate": 8.54755376217249e-07, "loss": 0.0043, "step": 327320 }, { "epoch": 3.4973022063144397, "grad_norm": 0.048786573112010956, "learning_rate": 8.547435365421019e-07, "loss": 0.0082, "step": 327330 }, { "epoch": 3.4974090496287196, "grad_norm": 1.5965144634246826, "learning_rate": 8.547316964664227e-07, "loss": 0.0052, "step": 327340 }, { "epoch": 3.497515892942999, "grad_norm": 0.01580609194934368, "learning_rate": 8.547198559902245e-07, "loss": 0.029, "step": 327350 }, { "epoch": 3.497622736257279, "grad_norm": 4.269807815551758, "learning_rate": 8.547080151135207e-07, "loss": 0.035, "step": 327360 }, { "epoch": 3.4977295795715584, "grad_norm": 4.119465351104736, "learning_rate": 8.54696173836325e-07, "loss": 0.0033, "step": 327370 }, { "epoch": 3.497836422885838, "grad_norm": 0.06097983196377754, "learning_rate": 8.546843321586504e-07, "loss": 0.0235, "step": 327380 }, { "epoch": 3.4979432662001173, "grad_norm": 3.21297287940979, "learning_rate": 8.546724900805105e-07, "loss": 0.002, "step": 327390 }, { "epoch": 3.498050109514397, "grad_norm": 0.021161120384931564, "learning_rate": 8.546606476019186e-07, "loss": 0.0359, "step": 327400 }, { "epoch": 3.4981569528286767, "grad_norm": 0.027143700048327446, "learning_rate": 8.54648804722888e-07, "loss": 0.0123, "step": 327410 }, { "epoch": 3.4982637961429566, "grad_norm": 2.3161261081695557, "learning_rate": 8.546369614434321e-07, "loss": 0.019, "step": 327420 }, { "epoch": 3.498370639457236, "grad_norm": 0.2951754033565521, "learning_rate": 8.546251177635644e-07, "loss": 0.0021, "step": 327430 }, { "epoch": 3.4984774827715155, "grad_norm": 0.024075154215097427, "learning_rate": 8.546132736832982e-07, "loss": 0.0174, "step": 327440 }, { "epoch": 3.498584326085795, "grad_norm": 0.010202476754784584, "learning_rate": 8.546014292026467e-07, "loss": 0.0381, "step": 327450 }, { "epoch": 3.498691169400075, "grad_norm": 0.11856053024530411, "learning_rate": 8.545895843216235e-07, "loss": 0.0407, "step": 327460 }, { "epoch": 3.4987980127143543, "grad_norm": 0.10843343287706375, "learning_rate": 8.54577739040242e-07, "loss": 0.0129, "step": 327470 }, { "epoch": 3.498904856028634, "grad_norm": 0.2417580634355545, "learning_rate": 8.545658933585154e-07, "loss": 0.0081, "step": 327480 }, { "epoch": 3.4990116993429137, "grad_norm": 0.12138497084379196, "learning_rate": 8.54554047276457e-07, "loss": 0.0084, "step": 327490 }, { "epoch": 3.499118542657193, "grad_norm": 11.32226276397705, "learning_rate": 8.545422007940806e-07, "loss": 0.0076, "step": 327500 }, { "epoch": 3.4992253859714726, "grad_norm": 1.3048985004425049, "learning_rate": 8.54530353911399e-07, "loss": 0.009, "step": 327510 }, { "epoch": 3.4993322292857525, "grad_norm": 0.17656360566616058, "learning_rate": 8.545185066284261e-07, "loss": 0.023, "step": 327520 }, { "epoch": 3.499439072600032, "grad_norm": 0.01328497938811779, "learning_rate": 8.54506658945175e-07, "loss": 0.0137, "step": 327530 }, { "epoch": 3.499545915914312, "grad_norm": 0.6450451612472534, "learning_rate": 8.54494810861659e-07, "loss": 0.0165, "step": 327540 }, { "epoch": 3.4996527592285913, "grad_norm": 2.455695867538452, "learning_rate": 8.544829623778917e-07, "loss": 0.017, "step": 327550 }, { "epoch": 3.4997596025428708, "grad_norm": 0.0432099886238575, "learning_rate": 8.544711134938865e-07, "loss": 0.0047, "step": 327560 }, { "epoch": 3.4998664458571507, "grad_norm": 0.005774850957095623, "learning_rate": 8.544592642096564e-07, "loss": 0.0264, "step": 327570 }, { "epoch": 3.49997328917143, "grad_norm": 0.009143903851509094, "learning_rate": 8.544474145252152e-07, "loss": 0.005, "step": 327580 }, { "epoch": 3.5000801324857096, "grad_norm": 0.297916978597641, "learning_rate": 8.544355644405761e-07, "loss": 0.0145, "step": 327590 }, { "epoch": 3.5001869757999895, "grad_norm": 0.10004457086324692, "learning_rate": 8.544237139557527e-07, "loss": 0.0113, "step": 327600 }, { "epoch": 3.500293819114269, "grad_norm": 0.03742247819900513, "learning_rate": 8.544118630707578e-07, "loss": 0.0205, "step": 327610 }, { "epoch": 3.5004006624285484, "grad_norm": 0.1618938446044922, "learning_rate": 8.544000117856053e-07, "loss": 0.0037, "step": 327620 }, { "epoch": 3.500507505742828, "grad_norm": 0.017157476395368576, "learning_rate": 8.543881601003085e-07, "loss": 0.0048, "step": 327630 }, { "epoch": 3.5006143490571078, "grad_norm": 1.9368150234222412, "learning_rate": 8.543763080148807e-07, "loss": 0.0039, "step": 327640 }, { "epoch": 3.5007211923713872, "grad_norm": 5.945335388183594, "learning_rate": 8.543644555293352e-07, "loss": 0.0197, "step": 327650 }, { "epoch": 3.500828035685667, "grad_norm": 0.00694291153922677, "learning_rate": 8.543526026436856e-07, "loss": 0.0035, "step": 327660 }, { "epoch": 3.5009348789999466, "grad_norm": 1.0083216428756714, "learning_rate": 8.543407493579452e-07, "loss": 0.0117, "step": 327670 }, { "epoch": 3.501041722314226, "grad_norm": 0.31943994760513306, "learning_rate": 8.543288956721273e-07, "loss": 0.0035, "step": 327680 }, { "epoch": 3.501148565628506, "grad_norm": 0.015044127590954304, "learning_rate": 8.543170415862453e-07, "loss": 0.009, "step": 327690 }, { "epoch": 3.5012554089427854, "grad_norm": 0.13508355617523193, "learning_rate": 8.543051871003126e-07, "loss": 0.0035, "step": 327700 }, { "epoch": 3.501362252257065, "grad_norm": 0.44614505767822266, "learning_rate": 8.542933322143427e-07, "loss": 0.014, "step": 327710 }, { "epoch": 3.501469095571345, "grad_norm": 2.7050750255584717, "learning_rate": 8.542814769283488e-07, "loss": 0.0216, "step": 327720 }, { "epoch": 3.5015759388856242, "grad_norm": 0.2590169608592987, "learning_rate": 8.542696212423444e-07, "loss": 0.011, "step": 327730 }, { "epoch": 3.5016827821999037, "grad_norm": 0.009863149374723434, "learning_rate": 8.542577651563429e-07, "loss": 0.0047, "step": 327740 }, { "epoch": 3.5017896255141836, "grad_norm": 0.007966221310198307, "learning_rate": 8.542459086703578e-07, "loss": 0.0513, "step": 327750 }, { "epoch": 3.501896468828463, "grad_norm": 0.8596200942993164, "learning_rate": 8.54234051784402e-07, "loss": 0.012, "step": 327760 }, { "epoch": 3.5020033121427425, "grad_norm": 0.00663897255435586, "learning_rate": 8.542221944984894e-07, "loss": 0.0497, "step": 327770 }, { "epoch": 3.5021101554570224, "grad_norm": 1.0865839719772339, "learning_rate": 8.542103368126332e-07, "loss": 0.013, "step": 327780 }, { "epoch": 3.502216998771302, "grad_norm": 0.07478116452693939, "learning_rate": 8.541984787268468e-07, "loss": 0.0094, "step": 327790 }, { "epoch": 3.5023238420855813, "grad_norm": 0.9894448518753052, "learning_rate": 8.541866202411434e-07, "loss": 0.0513, "step": 327800 }, { "epoch": 3.5024306853998612, "grad_norm": 0.30667951703071594, "learning_rate": 8.541747613555368e-07, "loss": 0.0038, "step": 327810 }, { "epoch": 3.5025375287141407, "grad_norm": 0.10694531351327896, "learning_rate": 8.5416290207004e-07, "loss": 0.0057, "step": 327820 }, { "epoch": 3.50264437202842, "grad_norm": 2.467763662338257, "learning_rate": 8.541510423846668e-07, "loss": 0.0025, "step": 327830 }, { "epoch": 3.5027512153427, "grad_norm": 0.015689220279455185, "learning_rate": 8.541391822994301e-07, "loss": 0.0048, "step": 327840 }, { "epoch": 3.5028580586569795, "grad_norm": 3.8280045986175537, "learning_rate": 8.541273218143436e-07, "loss": 0.0163, "step": 327850 }, { "epoch": 3.502964901971259, "grad_norm": 0.06060177832841873, "learning_rate": 8.541154609294207e-07, "loss": 0.01, "step": 327860 }, { "epoch": 3.503071745285539, "grad_norm": 0.009508500806987286, "learning_rate": 8.541035996446747e-07, "loss": 0.0283, "step": 327870 }, { "epoch": 3.5031785885998183, "grad_norm": 0.8420195579528809, "learning_rate": 8.54091737960119e-07, "loss": 0.0033, "step": 327880 }, { "epoch": 3.5032854319140982, "grad_norm": 0.7576900124549866, "learning_rate": 8.54079875875767e-07, "loss": 0.017, "step": 327890 }, { "epoch": 3.5033922752283777, "grad_norm": 0.014798924326896667, "learning_rate": 8.54068013391632e-07, "loss": 0.0164, "step": 327900 }, { "epoch": 3.503499118542657, "grad_norm": 8.945148468017578, "learning_rate": 8.540561505077277e-07, "loss": 0.0322, "step": 327910 }, { "epoch": 3.5036059618569366, "grad_norm": 0.9453946948051453, "learning_rate": 8.540442872240671e-07, "loss": 0.007, "step": 327920 }, { "epoch": 3.5037128051712165, "grad_norm": 1.1355695724487305, "learning_rate": 8.540324235406639e-07, "loss": 0.0086, "step": 327930 }, { "epoch": 3.503819648485496, "grad_norm": 0.004053099080920219, "learning_rate": 8.540205594575312e-07, "loss": 0.0062, "step": 327940 }, { "epoch": 3.503926491799776, "grad_norm": 0.013644242659211159, "learning_rate": 8.540086949746827e-07, "loss": 0.0193, "step": 327950 }, { "epoch": 3.5040333351140553, "grad_norm": 0.11145058274269104, "learning_rate": 8.539968300921316e-07, "loss": 0.0007, "step": 327960 }, { "epoch": 3.504140178428335, "grad_norm": 0.120626300573349, "learning_rate": 8.539849648098915e-07, "loss": 0.0067, "step": 327970 }, { "epoch": 3.5042470217426143, "grad_norm": 3.7398831844329834, "learning_rate": 8.539730991279756e-07, "loss": 0.0181, "step": 327980 }, { "epoch": 3.504353865056894, "grad_norm": 3.9440815448760986, "learning_rate": 8.539612330463973e-07, "loss": 0.0059, "step": 327990 }, { "epoch": 3.5044607083711736, "grad_norm": 0.14790603518486023, "learning_rate": 8.5394936656517e-07, "loss": 0.017, "step": 328000 }, { "epoch": 3.5045675516854535, "grad_norm": 14.534401893615723, "learning_rate": 8.539374996843073e-07, "loss": 0.0384, "step": 328010 }, { "epoch": 3.504674394999733, "grad_norm": 6.375437259674072, "learning_rate": 8.539256324038223e-07, "loss": 0.0123, "step": 328020 }, { "epoch": 3.5047812383140124, "grad_norm": 1.2672122716903687, "learning_rate": 8.539137647237287e-07, "loss": 0.0037, "step": 328030 }, { "epoch": 3.504888081628292, "grad_norm": 0.02385026030242443, "learning_rate": 8.539018966440397e-07, "loss": 0.0084, "step": 328040 }, { "epoch": 3.504994924942572, "grad_norm": 0.011931980960071087, "learning_rate": 8.538900281647687e-07, "loss": 0.011, "step": 328050 }, { "epoch": 3.5051017682568513, "grad_norm": 0.6701161861419678, "learning_rate": 8.538781592859292e-07, "loss": 0.0146, "step": 328060 }, { "epoch": 3.505208611571131, "grad_norm": 7.236784934997559, "learning_rate": 8.538662900075344e-07, "loss": 0.0538, "step": 328070 }, { "epoch": 3.5053154548854106, "grad_norm": 0.007934730499982834, "learning_rate": 8.538544203295981e-07, "loss": 0.0172, "step": 328080 }, { "epoch": 3.50542229819969, "grad_norm": 3.463939905166626, "learning_rate": 8.538425502521333e-07, "loss": 0.0392, "step": 328090 }, { "epoch": 3.5055291415139695, "grad_norm": 0.5322179794311523, "learning_rate": 8.538306797751535e-07, "loss": 0.0087, "step": 328100 }, { "epoch": 3.5056359848282495, "grad_norm": 2.2042648792266846, "learning_rate": 8.538188088986722e-07, "loss": 0.0151, "step": 328110 }, { "epoch": 3.505742828142529, "grad_norm": 0.857844352722168, "learning_rate": 8.538069376227029e-07, "loss": 0.0181, "step": 328120 }, { "epoch": 3.505849671456809, "grad_norm": 3.5955824851989746, "learning_rate": 8.537950659472587e-07, "loss": 0.0196, "step": 328130 }, { "epoch": 3.5059565147710883, "grad_norm": 0.023754499852657318, "learning_rate": 8.537831938723532e-07, "loss": 0.0054, "step": 328140 }, { "epoch": 3.5060633580853677, "grad_norm": 0.6764436364173889, "learning_rate": 8.537713213979997e-07, "loss": 0.0136, "step": 328150 }, { "epoch": 3.506170201399647, "grad_norm": 0.03448602184653282, "learning_rate": 8.537594485242119e-07, "loss": 0.0059, "step": 328160 }, { "epoch": 3.506277044713927, "grad_norm": 7.018514633178711, "learning_rate": 8.537475752510029e-07, "loss": 0.0264, "step": 328170 }, { "epoch": 3.5063838880282066, "grad_norm": 0.14976391196250916, "learning_rate": 8.537357015783862e-07, "loss": 0.0079, "step": 328180 }, { "epoch": 3.5064907313424865, "grad_norm": 0.025727052241563797, "learning_rate": 8.537238275063752e-07, "loss": 0.0219, "step": 328190 }, { "epoch": 3.506597574656766, "grad_norm": 0.3812929391860962, "learning_rate": 8.537119530349832e-07, "loss": 0.0102, "step": 328200 }, { "epoch": 3.5067044179710454, "grad_norm": 0.04553217440843582, "learning_rate": 8.537000781642237e-07, "loss": 0.0081, "step": 328210 }, { "epoch": 3.506811261285325, "grad_norm": 0.04180474206805229, "learning_rate": 8.536882028941101e-07, "loss": 0.015, "step": 328220 }, { "epoch": 3.5069181045996047, "grad_norm": 0.0029554644133895636, "learning_rate": 8.53676327224656e-07, "loss": 0.0106, "step": 328230 }, { "epoch": 3.507024947913884, "grad_norm": 0.2085556983947754, "learning_rate": 8.536644511558746e-07, "loss": 0.0015, "step": 328240 }, { "epoch": 3.507131791228164, "grad_norm": 0.07679197937250137, "learning_rate": 8.536525746877792e-07, "loss": 0.0042, "step": 328250 }, { "epoch": 3.5072386345424436, "grad_norm": 0.020786229521036148, "learning_rate": 8.536406978203834e-07, "loss": 0.0012, "step": 328260 }, { "epoch": 3.507345477856723, "grad_norm": 0.2340996414422989, "learning_rate": 8.536288205537006e-07, "loss": 0.0145, "step": 328270 }, { "epoch": 3.5074523211710025, "grad_norm": 0.6057754158973694, "learning_rate": 8.536169428877442e-07, "loss": 0.018, "step": 328280 }, { "epoch": 3.5075591644852824, "grad_norm": 0.28595420718193054, "learning_rate": 8.536050648225275e-07, "loss": 0.0159, "step": 328290 }, { "epoch": 3.507666007799562, "grad_norm": 0.025766145437955856, "learning_rate": 8.535931863580641e-07, "loss": 0.0041, "step": 328300 }, { "epoch": 3.5077728511138417, "grad_norm": 0.05861407890915871, "learning_rate": 8.535813074943672e-07, "loss": 0.0015, "step": 328310 }, { "epoch": 3.507879694428121, "grad_norm": 3.8941454887390137, "learning_rate": 8.535694282314504e-07, "loss": 0.0117, "step": 328320 }, { "epoch": 3.5079865377424007, "grad_norm": 1.6297638416290283, "learning_rate": 8.53557548569327e-07, "loss": 0.006, "step": 328330 }, { "epoch": 3.50809338105668, "grad_norm": 0.008849866688251495, "learning_rate": 8.535456685080105e-07, "loss": 0.0022, "step": 328340 }, { "epoch": 3.50820022437096, "grad_norm": 1.8398255109786987, "learning_rate": 8.535337880475142e-07, "loss": 0.0117, "step": 328350 }, { "epoch": 3.5083070676852395, "grad_norm": 3.948235273361206, "learning_rate": 8.535219071878515e-07, "loss": 0.012, "step": 328360 }, { "epoch": 3.5084139109995194, "grad_norm": 0.011897560209035873, "learning_rate": 8.53510025929036e-07, "loss": 0.0559, "step": 328370 }, { "epoch": 3.508520754313799, "grad_norm": 2.0461249351501465, "learning_rate": 8.534981442710809e-07, "loss": 0.0382, "step": 328380 }, { "epoch": 3.5086275976280783, "grad_norm": 0.16855213046073914, "learning_rate": 8.534862622139998e-07, "loss": 0.0033, "step": 328390 }, { "epoch": 3.5087344409423578, "grad_norm": 1.7915730476379395, "learning_rate": 8.534743797578058e-07, "loss": 0.0125, "step": 328400 }, { "epoch": 3.5088412842566377, "grad_norm": 0.02616862580180168, "learning_rate": 8.534624969025129e-07, "loss": 0.0019, "step": 328410 }, { "epoch": 3.508948127570917, "grad_norm": 0.012120961211621761, "learning_rate": 8.534506136481341e-07, "loss": 0.015, "step": 328420 }, { "epoch": 3.509054970885197, "grad_norm": 0.019271522760391235, "learning_rate": 8.534387299946827e-07, "loss": 0.0018, "step": 328430 }, { "epoch": 3.5091618141994765, "grad_norm": 0.018238862976431847, "learning_rate": 8.534268459421724e-07, "loss": 0.0416, "step": 328440 }, { "epoch": 3.509268657513756, "grad_norm": 0.023088086396455765, "learning_rate": 8.534149614906165e-07, "loss": 0.0564, "step": 328450 }, { "epoch": 3.509375500828036, "grad_norm": 0.8788106441497803, "learning_rate": 8.534030766400285e-07, "loss": 0.0109, "step": 328460 }, { "epoch": 3.5094823441423153, "grad_norm": 0.564520537853241, "learning_rate": 8.533911913904217e-07, "loss": 0.0224, "step": 328470 }, { "epoch": 3.5095891874565948, "grad_norm": 0.0016760482685640454, "learning_rate": 8.533793057418095e-07, "loss": 0.0018, "step": 328480 }, { "epoch": 3.5096960307708747, "grad_norm": 0.0069115543738007545, "learning_rate": 8.533674196942055e-07, "loss": 0.0125, "step": 328490 }, { "epoch": 3.509802874085154, "grad_norm": 0.11566724628210068, "learning_rate": 8.53355533247623e-07, "loss": 0.0188, "step": 328500 }, { "epoch": 3.5099097173994336, "grad_norm": 0.014504627324640751, "learning_rate": 8.533436464020753e-07, "loss": 0.0361, "step": 328510 }, { "epoch": 3.5100165607137135, "grad_norm": 0.1358404904603958, "learning_rate": 8.533317591575761e-07, "loss": 0.0195, "step": 328520 }, { "epoch": 3.510123404027993, "grad_norm": 0.0025879479944705963, "learning_rate": 8.533198715141387e-07, "loss": 0.0587, "step": 328530 }, { "epoch": 3.5102302473422724, "grad_norm": 0.9690602421760559, "learning_rate": 8.533079834717764e-07, "loss": 0.0176, "step": 328540 }, { "epoch": 3.5103370906565523, "grad_norm": 0.08844196051359177, "learning_rate": 8.532960950305028e-07, "loss": 0.0122, "step": 328550 }, { "epoch": 3.5104439339708318, "grad_norm": 0.004354556556791067, "learning_rate": 8.532842061903311e-07, "loss": 0.0068, "step": 328560 }, { "epoch": 3.5105507772851112, "grad_norm": 5.645024299621582, "learning_rate": 8.53272316951275e-07, "loss": 0.05, "step": 328570 }, { "epoch": 3.510657620599391, "grad_norm": 0.41364550590515137, "learning_rate": 8.532604273133478e-07, "loss": 0.021, "step": 328580 }, { "epoch": 3.5107644639136706, "grad_norm": 3.923360824584961, "learning_rate": 8.532485372765629e-07, "loss": 0.0079, "step": 328590 }, { "epoch": 3.5108713072279505, "grad_norm": 0.00451322877779603, "learning_rate": 8.532366468409337e-07, "loss": 0.0171, "step": 328600 }, { "epoch": 3.51097815054223, "grad_norm": 15.948646545410156, "learning_rate": 8.532247560064736e-07, "loss": 0.0187, "step": 328610 }, { "epoch": 3.5110849938565094, "grad_norm": 0.0031956976745277643, "learning_rate": 8.532128647731963e-07, "loss": 0.0045, "step": 328620 }, { "epoch": 3.511191837170789, "grad_norm": 6.950582504272461, "learning_rate": 8.532009731411147e-07, "loss": 0.0109, "step": 328630 }, { "epoch": 3.5112986804850688, "grad_norm": 4.278712272644043, "learning_rate": 8.531890811102429e-07, "loss": 0.0034, "step": 328640 }, { "epoch": 3.5114055237993482, "grad_norm": 0.9328448176383972, "learning_rate": 8.531771886805938e-07, "loss": 0.0141, "step": 328650 }, { "epoch": 3.511512367113628, "grad_norm": 4.958578586578369, "learning_rate": 8.531652958521809e-07, "loss": 0.0167, "step": 328660 }, { "epoch": 3.5116192104279076, "grad_norm": 1.035300612449646, "learning_rate": 8.53153402625018e-07, "loss": 0.0105, "step": 328670 }, { "epoch": 3.511726053742187, "grad_norm": 2.290832757949829, "learning_rate": 8.53141508999118e-07, "loss": 0.0157, "step": 328680 }, { "epoch": 3.5118328970564665, "grad_norm": 0.05105782300233841, "learning_rate": 8.531296149744947e-07, "loss": 0.048, "step": 328690 }, { "epoch": 3.5119397403707464, "grad_norm": 0.10296209901571274, "learning_rate": 8.531177205511614e-07, "loss": 0.025, "step": 328700 }, { "epoch": 3.512046583685026, "grad_norm": 1.9357893466949463, "learning_rate": 8.531058257291315e-07, "loss": 0.0108, "step": 328710 }, { "epoch": 3.5121534269993058, "grad_norm": 5.846623420715332, "learning_rate": 8.530939305084185e-07, "loss": 0.0142, "step": 328720 }, { "epoch": 3.5122602703135852, "grad_norm": 1.4711990356445312, "learning_rate": 8.530820348890357e-07, "loss": 0.0007, "step": 328730 }, { "epoch": 3.5123671136278647, "grad_norm": 7.10963249206543, "learning_rate": 8.530701388709969e-07, "loss": 0.0128, "step": 328740 }, { "epoch": 3.512473956942144, "grad_norm": 0.020416375249624252, "learning_rate": 8.53058242454315e-07, "loss": 0.0162, "step": 328750 }, { "epoch": 3.512580800256424, "grad_norm": 0.519775390625, "learning_rate": 8.530463456390038e-07, "loss": 0.0799, "step": 328760 }, { "epoch": 3.5126876435707035, "grad_norm": 0.00208858260884881, "learning_rate": 8.530344484250767e-07, "loss": 0.0147, "step": 328770 }, { "epoch": 3.5127944868849834, "grad_norm": 0.017359405755996704, "learning_rate": 8.530225508125469e-07, "loss": 0.0393, "step": 328780 }, { "epoch": 3.512901330199263, "grad_norm": 0.9229718446731567, "learning_rate": 8.530106528014283e-07, "loss": 0.0224, "step": 328790 }, { "epoch": 3.5130081735135423, "grad_norm": 0.21389061212539673, "learning_rate": 8.529987543917336e-07, "loss": 0.0169, "step": 328800 }, { "epoch": 3.513115016827822, "grad_norm": 8.068526268005371, "learning_rate": 8.529868555834769e-07, "loss": 0.011, "step": 328810 }, { "epoch": 3.5132218601421017, "grad_norm": 3.534963846206665, "learning_rate": 8.529749563766714e-07, "loss": 0.0057, "step": 328820 }, { "epoch": 3.513328703456381, "grad_norm": 3.889085292816162, "learning_rate": 8.529630567713305e-07, "loss": 0.0113, "step": 328830 }, { "epoch": 3.513435546770661, "grad_norm": 0.417082279920578, "learning_rate": 8.529511567674677e-07, "loss": 0.0086, "step": 328840 }, { "epoch": 3.5135423900849405, "grad_norm": 1.9104869365692139, "learning_rate": 8.529392563650963e-07, "loss": 0.0087, "step": 328850 }, { "epoch": 3.51364923339922, "grad_norm": 0.060007352381944656, "learning_rate": 8.5292735556423e-07, "loss": 0.0211, "step": 328860 }, { "epoch": 3.5137560767134994, "grad_norm": 1.5047023296356201, "learning_rate": 8.529154543648821e-07, "loss": 0.0092, "step": 328870 }, { "epoch": 3.5138629200277793, "grad_norm": 0.013037377037107944, "learning_rate": 8.529035527670658e-07, "loss": 0.0063, "step": 328880 }, { "epoch": 3.513969763342059, "grad_norm": 0.08529429882764816, "learning_rate": 8.52891650770795e-07, "loss": 0.0109, "step": 328890 }, { "epoch": 3.5140766066563387, "grad_norm": 16.91651153564453, "learning_rate": 8.528797483760828e-07, "loss": 0.0186, "step": 328900 }, { "epoch": 3.514183449970618, "grad_norm": 0.12925972044467926, "learning_rate": 8.528678455829426e-07, "loss": 0.0015, "step": 328910 }, { "epoch": 3.5142902932848976, "grad_norm": 0.07668166607618332, "learning_rate": 8.528559423913882e-07, "loss": 0.0017, "step": 328920 }, { "epoch": 3.514397136599177, "grad_norm": 0.01694668084383011, "learning_rate": 8.528440388014326e-07, "loss": 0.0102, "step": 328930 }, { "epoch": 3.514503979913457, "grad_norm": 1.1870325803756714, "learning_rate": 8.528321348130896e-07, "loss": 0.03, "step": 328940 }, { "epoch": 3.5146108232277364, "grad_norm": 0.01090221293270588, "learning_rate": 8.528202304263724e-07, "loss": 0.0119, "step": 328950 }, { "epoch": 3.5147176665420163, "grad_norm": 0.0076806084252893925, "learning_rate": 8.528083256412946e-07, "loss": 0.002, "step": 328960 }, { "epoch": 3.514824509856296, "grad_norm": 0.9485207796096802, "learning_rate": 8.527964204578695e-07, "loss": 0.0217, "step": 328970 }, { "epoch": 3.5149313531705753, "grad_norm": 1.8875178098678589, "learning_rate": 8.527845148761106e-07, "loss": 0.0079, "step": 328980 }, { "epoch": 3.5150381964848547, "grad_norm": 0.46067172288894653, "learning_rate": 8.527726088960314e-07, "loss": 0.06, "step": 328990 }, { "epoch": 3.5151450397991346, "grad_norm": 0.0050437841564416885, "learning_rate": 8.527607025176453e-07, "loss": 0.0197, "step": 329000 }, { "epoch": 3.515251883113414, "grad_norm": 0.3971252143383026, "learning_rate": 8.527487957409658e-07, "loss": 0.0121, "step": 329010 }, { "epoch": 3.515358726427694, "grad_norm": 0.0017371611902490258, "learning_rate": 8.527368885660062e-07, "loss": 0.0073, "step": 329020 }, { "epoch": 3.5154655697419734, "grad_norm": 0.008948758244514465, "learning_rate": 8.527249809927801e-07, "loss": 0.0384, "step": 329030 }, { "epoch": 3.515572413056253, "grad_norm": 1.9098206758499146, "learning_rate": 8.527130730213008e-07, "loss": 0.0068, "step": 329040 }, { "epoch": 3.5156792563705324, "grad_norm": 0.07865527272224426, "learning_rate": 8.527011646515818e-07, "loss": 0.013, "step": 329050 }, { "epoch": 3.5157860996848123, "grad_norm": 0.14665935933589935, "learning_rate": 8.526892558836367e-07, "loss": 0.0145, "step": 329060 }, { "epoch": 3.5158929429990917, "grad_norm": 0.21481937170028687, "learning_rate": 8.526773467174788e-07, "loss": 0.0303, "step": 329070 }, { "epoch": 3.5159997863133716, "grad_norm": 4.205007076263428, "learning_rate": 8.526654371531214e-07, "loss": 0.0066, "step": 329080 }, { "epoch": 3.516106629627651, "grad_norm": 0.19565384089946747, "learning_rate": 8.526535271905782e-07, "loss": 0.0014, "step": 329090 }, { "epoch": 3.5162134729419305, "grad_norm": 0.0047015780583024025, "learning_rate": 8.526416168298627e-07, "loss": 0.0006, "step": 329100 }, { "epoch": 3.51632031625621, "grad_norm": 0.1728144735097885, "learning_rate": 8.526297060709879e-07, "loss": 0.0106, "step": 329110 }, { "epoch": 3.51642715957049, "grad_norm": 0.0010702699655666947, "learning_rate": 8.526177949139677e-07, "loss": 0.0274, "step": 329120 }, { "epoch": 3.5165340028847694, "grad_norm": 2.3067970275878906, "learning_rate": 8.526058833588155e-07, "loss": 0.0439, "step": 329130 }, { "epoch": 3.5166408461990493, "grad_norm": 1.7897943258285522, "learning_rate": 8.525939714055445e-07, "loss": 0.0023, "step": 329140 }, { "epoch": 3.5167476895133287, "grad_norm": 0.3728897273540497, "learning_rate": 8.525820590541683e-07, "loss": 0.0148, "step": 329150 }, { "epoch": 3.516854532827608, "grad_norm": 7.103732109069824, "learning_rate": 8.525701463047003e-07, "loss": 0.0133, "step": 329160 }, { "epoch": 3.516961376141888, "grad_norm": 0.0049943276681005955, "learning_rate": 8.525582331571542e-07, "loss": 0.002, "step": 329170 }, { "epoch": 3.5170682194561675, "grad_norm": 2.5538439750671387, "learning_rate": 8.525463196115432e-07, "loss": 0.0031, "step": 329180 }, { "epoch": 3.517175062770447, "grad_norm": 0.05810921639204025, "learning_rate": 8.525344056678806e-07, "loss": 0.0143, "step": 329190 }, { "epoch": 3.517281906084727, "grad_norm": 0.017358653247356415, "learning_rate": 8.525224913261802e-07, "loss": 0.0116, "step": 329200 }, { "epoch": 3.5173887493990064, "grad_norm": 0.006263207644224167, "learning_rate": 8.525105765864551e-07, "loss": 0.0078, "step": 329210 }, { "epoch": 3.517495592713286, "grad_norm": 0.12878966331481934, "learning_rate": 8.524986614487193e-07, "loss": 0.0152, "step": 329220 }, { "epoch": 3.5176024360275657, "grad_norm": 0.0069544147700071335, "learning_rate": 8.524867459129857e-07, "loss": 0.0035, "step": 329230 }, { "epoch": 3.517709279341845, "grad_norm": 0.014145998284220695, "learning_rate": 8.524748299792679e-07, "loss": 0.0063, "step": 329240 }, { "epoch": 3.5178161226561246, "grad_norm": 5.0716118812561035, "learning_rate": 8.524629136475796e-07, "loss": 0.0339, "step": 329250 }, { "epoch": 3.5179229659704045, "grad_norm": 0.6956135630607605, "learning_rate": 8.524509969179338e-07, "loss": 0.0074, "step": 329260 }, { "epoch": 3.518029809284684, "grad_norm": 0.24104683101177216, "learning_rate": 8.524390797903445e-07, "loss": 0.0205, "step": 329270 }, { "epoch": 3.5181366525989635, "grad_norm": 1.6633647680282593, "learning_rate": 8.524271622648247e-07, "loss": 0.0083, "step": 329280 }, { "epoch": 3.5182434959132434, "grad_norm": 0.0060783070512115955, "learning_rate": 8.524152443413882e-07, "loss": 0.0024, "step": 329290 }, { "epoch": 3.518350339227523, "grad_norm": 0.0034803387243300676, "learning_rate": 8.524033260200483e-07, "loss": 0.0115, "step": 329300 }, { "epoch": 3.5184571825418023, "grad_norm": 0.005903689190745354, "learning_rate": 8.523914073008183e-07, "loss": 0.0018, "step": 329310 }, { "epoch": 3.518564025856082, "grad_norm": 0.14329244196414948, "learning_rate": 8.523794881837118e-07, "loss": 0.0165, "step": 329320 }, { "epoch": 3.5186708691703616, "grad_norm": 0.00505103450268507, "learning_rate": 8.523675686687423e-07, "loss": 0.0003, "step": 329330 }, { "epoch": 3.518777712484641, "grad_norm": 0.02673361264169216, "learning_rate": 8.523556487559233e-07, "loss": 0.0002, "step": 329340 }, { "epoch": 3.518884555798921, "grad_norm": 15.583152770996094, "learning_rate": 8.523437284452682e-07, "loss": 0.0107, "step": 329350 }, { "epoch": 3.5189913991132005, "grad_norm": 0.017857640981674194, "learning_rate": 8.523318077367903e-07, "loss": 0.0132, "step": 329360 }, { "epoch": 3.5190982424274804, "grad_norm": 1.061526894569397, "learning_rate": 8.523198866305033e-07, "loss": 0.0125, "step": 329370 }, { "epoch": 3.51920508574176, "grad_norm": 3.423767328262329, "learning_rate": 8.523079651264205e-07, "loss": 0.025, "step": 329380 }, { "epoch": 3.5193119290560393, "grad_norm": 0.003975673113018274, "learning_rate": 8.522960432245554e-07, "loss": 0.0306, "step": 329390 }, { "epoch": 3.5194187723703187, "grad_norm": 0.07422521710395813, "learning_rate": 8.522841209249214e-07, "loss": 0.016, "step": 329400 }, { "epoch": 3.5195256156845987, "grad_norm": 17.763141632080078, "learning_rate": 8.522721982275321e-07, "loss": 0.022, "step": 329410 }, { "epoch": 3.519632458998878, "grad_norm": 0.46552154421806335, "learning_rate": 8.522602751324011e-07, "loss": 0.0114, "step": 329420 }, { "epoch": 3.519739302313158, "grad_norm": 0.25724270939826965, "learning_rate": 8.522483516395413e-07, "loss": 0.0008, "step": 329430 }, { "epoch": 3.5198461456274375, "grad_norm": 1.2936646938323975, "learning_rate": 8.522364277489668e-07, "loss": 0.0182, "step": 329440 }, { "epoch": 3.519952988941717, "grad_norm": 0.05850830301642418, "learning_rate": 8.522245034606908e-07, "loss": 0.0012, "step": 329450 }, { "epoch": 3.5200598322559964, "grad_norm": 0.8994015455245972, "learning_rate": 8.522125787747267e-07, "loss": 0.0052, "step": 329460 }, { "epoch": 3.5201666755702763, "grad_norm": 0.0027306575793772936, "learning_rate": 8.522006536910879e-07, "loss": 0.0222, "step": 329470 }, { "epoch": 3.5202735188845558, "grad_norm": 0.8059225678443909, "learning_rate": 8.521887282097881e-07, "loss": 0.0726, "step": 329480 }, { "epoch": 3.5203803621988357, "grad_norm": 0.02619411051273346, "learning_rate": 8.521768023308407e-07, "loss": 0.0066, "step": 329490 }, { "epoch": 3.520487205513115, "grad_norm": 4.211541175842285, "learning_rate": 8.521648760542591e-07, "loss": 0.01, "step": 329500 }, { "epoch": 3.5205940488273946, "grad_norm": 0.2203625589609146, "learning_rate": 8.521529493800566e-07, "loss": 0.0119, "step": 329510 }, { "epoch": 3.520700892141674, "grad_norm": 0.28912249207496643, "learning_rate": 8.521410223082468e-07, "loss": 0.0148, "step": 329520 }, { "epoch": 3.520807735455954, "grad_norm": 1.0664186477661133, "learning_rate": 8.521290948388435e-07, "loss": 0.0221, "step": 329530 }, { "epoch": 3.5209145787702334, "grad_norm": 0.055685073137283325, "learning_rate": 8.521171669718596e-07, "loss": 0.0097, "step": 329540 }, { "epoch": 3.5210214220845133, "grad_norm": 0.02977677248418331, "learning_rate": 8.521052387073089e-07, "loss": 0.0028, "step": 329550 }, { "epoch": 3.5211282653987928, "grad_norm": 0.17043326795101166, "learning_rate": 8.52093310045205e-07, "loss": 0.0128, "step": 329560 }, { "epoch": 3.521235108713072, "grad_norm": 0.5829868912696838, "learning_rate": 8.52081380985561e-07, "loss": 0.0014, "step": 329570 }, { "epoch": 3.5213419520273517, "grad_norm": 2.8294265270233154, "learning_rate": 8.520694515283906e-07, "loss": 0.0224, "step": 329580 }, { "epoch": 3.5214487953416316, "grad_norm": 0.8087851405143738, "learning_rate": 8.520575216737074e-07, "loss": 0.0101, "step": 329590 }, { "epoch": 3.521555638655911, "grad_norm": 0.15181857347488403, "learning_rate": 8.520455914215243e-07, "loss": 0.0406, "step": 329600 }, { "epoch": 3.521662481970191, "grad_norm": 11.32850456237793, "learning_rate": 8.520336607718555e-07, "loss": 0.014, "step": 329610 }, { "epoch": 3.5217693252844704, "grad_norm": 0.01814712956547737, "learning_rate": 8.52021729724714e-07, "loss": 0.0087, "step": 329620 }, { "epoch": 3.52187616859875, "grad_norm": 2.535308361053467, "learning_rate": 8.520097982801133e-07, "loss": 0.0138, "step": 329630 }, { "epoch": 3.5219830119130293, "grad_norm": 4.281367778778076, "learning_rate": 8.519978664380672e-07, "loss": 0.0052, "step": 329640 }, { "epoch": 3.522089855227309, "grad_norm": 0.09976467490196228, "learning_rate": 8.519859341985886e-07, "loss": 0.017, "step": 329650 }, { "epoch": 3.5221966985415887, "grad_norm": 0.1308826208114624, "learning_rate": 8.519740015616917e-07, "loss": 0.058, "step": 329660 }, { "epoch": 3.5223035418558686, "grad_norm": 11.754205703735352, "learning_rate": 8.519620685273894e-07, "loss": 0.0506, "step": 329670 }, { "epoch": 3.522410385170148, "grad_norm": 0.8236117362976074, "learning_rate": 8.519501350956953e-07, "loss": 0.0073, "step": 329680 }, { "epoch": 3.5225172284844275, "grad_norm": 0.008186738938093185, "learning_rate": 8.519382012666229e-07, "loss": 0.0023, "step": 329690 }, { "epoch": 3.522624071798707, "grad_norm": 0.0021171490661799908, "learning_rate": 8.519262670401859e-07, "loss": 0.0168, "step": 329700 }, { "epoch": 3.522730915112987, "grad_norm": 0.027339421212673187, "learning_rate": 8.519143324163975e-07, "loss": 0.0131, "step": 329710 }, { "epoch": 3.5228377584272663, "grad_norm": 0.005945999175310135, "learning_rate": 8.519023973952713e-07, "loss": 0.0024, "step": 329720 }, { "epoch": 3.5229446017415462, "grad_norm": 0.7474514842033386, "learning_rate": 8.518904619768207e-07, "loss": 0.0028, "step": 329730 }, { "epoch": 3.5230514450558257, "grad_norm": 0.7034999132156372, "learning_rate": 8.518785261610592e-07, "loss": 0.0114, "step": 329740 }, { "epoch": 3.523158288370105, "grad_norm": 0.006510836072266102, "learning_rate": 8.518665899480002e-07, "loss": 0.0119, "step": 329750 }, { "epoch": 3.5232651316843846, "grad_norm": 0.006144089158624411, "learning_rate": 8.518546533376574e-07, "loss": 0.0052, "step": 329760 }, { "epoch": 3.5233719749986645, "grad_norm": 0.03743059188127518, "learning_rate": 8.518427163300442e-07, "loss": 0.0451, "step": 329770 }, { "epoch": 3.523478818312944, "grad_norm": 0.020429063588380814, "learning_rate": 8.518307789251738e-07, "loss": 0.004, "step": 329780 }, { "epoch": 3.523585661627224, "grad_norm": 1.235567569732666, "learning_rate": 8.5181884112306e-07, "loss": 0.0087, "step": 329790 }, { "epoch": 3.5236925049415033, "grad_norm": 2.5009541511535645, "learning_rate": 8.518069029237163e-07, "loss": 0.0038, "step": 329800 }, { "epoch": 3.523799348255783, "grad_norm": 3.4020490646362305, "learning_rate": 8.517949643271558e-07, "loss": 0.0038, "step": 329810 }, { "epoch": 3.5239061915700622, "grad_norm": 9.849763870239258, "learning_rate": 8.517830253333924e-07, "loss": 0.0614, "step": 329820 }, { "epoch": 3.524013034884342, "grad_norm": 0.01680171862244606, "learning_rate": 8.517710859424394e-07, "loss": 0.004, "step": 329830 }, { "epoch": 3.5241198781986216, "grad_norm": 0.18682506680488586, "learning_rate": 8.517591461543103e-07, "loss": 0.0329, "step": 329840 }, { "epoch": 3.5242267215129015, "grad_norm": 0.33655139803886414, "learning_rate": 8.517472059690185e-07, "loss": 0.015, "step": 329850 }, { "epoch": 3.524333564827181, "grad_norm": 0.005184344481676817, "learning_rate": 8.517352653865777e-07, "loss": 0.0037, "step": 329860 }, { "epoch": 3.5244404081414604, "grad_norm": 0.389385461807251, "learning_rate": 8.517233244070011e-07, "loss": 0.0307, "step": 329870 }, { "epoch": 3.52454725145574, "grad_norm": 2.034231662750244, "learning_rate": 8.517113830303023e-07, "loss": 0.0081, "step": 329880 }, { "epoch": 3.52465409477002, "grad_norm": 3.082737922668457, "learning_rate": 8.516994412564949e-07, "loss": 0.0211, "step": 329890 }, { "epoch": 3.5247609380842992, "grad_norm": 0.023363977670669556, "learning_rate": 8.516874990855924e-07, "loss": 0.0043, "step": 329900 }, { "epoch": 3.524867781398579, "grad_norm": 0.5887409448623657, "learning_rate": 8.51675556517608e-07, "loss": 0.0056, "step": 329910 }, { "epoch": 3.5249746247128586, "grad_norm": 0.039525195956230164, "learning_rate": 8.516636135525553e-07, "loss": 0.0095, "step": 329920 }, { "epoch": 3.525081468027138, "grad_norm": 1.5732157230377197, "learning_rate": 8.516516701904479e-07, "loss": 0.0179, "step": 329930 }, { "epoch": 3.525188311341418, "grad_norm": 1.1824698448181152, "learning_rate": 8.516397264312994e-07, "loss": 0.0262, "step": 329940 }, { "epoch": 3.5252951546556974, "grad_norm": 13.674311637878418, "learning_rate": 8.516277822751229e-07, "loss": 0.0089, "step": 329950 }, { "epoch": 3.525401997969977, "grad_norm": 0.5954684019088745, "learning_rate": 8.516158377219322e-07, "loss": 0.0401, "step": 329960 }, { "epoch": 3.525508841284257, "grad_norm": 0.9566748142242432, "learning_rate": 8.516038927717406e-07, "loss": 0.0074, "step": 329970 }, { "epoch": 3.5256156845985362, "grad_norm": 0.581200122833252, "learning_rate": 8.515919474245616e-07, "loss": 0.0359, "step": 329980 }, { "epoch": 3.5257225279128157, "grad_norm": 0.013106011785566807, "learning_rate": 8.515800016804089e-07, "loss": 0.0185, "step": 329990 }, { "epoch": 3.5258293712270956, "grad_norm": 0.042168665677309036, "learning_rate": 8.515680555392959e-07, "loss": 0.0077, "step": 330000 }, { "epoch": 3.525936214541375, "grad_norm": 0.26580095291137695, "learning_rate": 8.515561090012359e-07, "loss": 0.0113, "step": 330010 }, { "epoch": 3.5260430578556545, "grad_norm": 0.9840213656425476, "learning_rate": 8.515441620662424e-07, "loss": 0.005, "step": 330020 }, { "epoch": 3.5261499011699344, "grad_norm": 0.9904587268829346, "learning_rate": 8.515322147343293e-07, "loss": 0.0183, "step": 330030 }, { "epoch": 3.526256744484214, "grad_norm": 0.13757961988449097, "learning_rate": 8.515202670055097e-07, "loss": 0.0176, "step": 330040 }, { "epoch": 3.5263635877984933, "grad_norm": 0.2526257038116455, "learning_rate": 8.515083188797972e-07, "loss": 0.0047, "step": 330050 }, { "epoch": 3.5264704311127733, "grad_norm": 0.0033046125900000334, "learning_rate": 8.514963703572051e-07, "loss": 0.0474, "step": 330060 }, { "epoch": 3.5265772744270527, "grad_norm": 6.713199138641357, "learning_rate": 8.514844214377472e-07, "loss": 0.0281, "step": 330070 }, { "epoch": 3.5266841177413326, "grad_norm": 0.05275135114789009, "learning_rate": 8.514724721214368e-07, "loss": 0.0057, "step": 330080 }, { "epoch": 3.526790961055612, "grad_norm": 0.15988393127918243, "learning_rate": 8.514605224082876e-07, "loss": 0.0017, "step": 330090 }, { "epoch": 3.5268978043698915, "grad_norm": 3.8685905933380127, "learning_rate": 8.514485722983129e-07, "loss": 0.01, "step": 330100 }, { "epoch": 3.527004647684171, "grad_norm": 2.488525867462158, "learning_rate": 8.514366217915261e-07, "loss": 0.0033, "step": 330110 }, { "epoch": 3.527111490998451, "grad_norm": 0.0725025162100792, "learning_rate": 8.514246708879409e-07, "loss": 0.0001, "step": 330120 }, { "epoch": 3.5272183343127304, "grad_norm": 1.5365383625030518, "learning_rate": 8.514127195875709e-07, "loss": 0.0098, "step": 330130 }, { "epoch": 3.5273251776270103, "grad_norm": 0.17053274810314178, "learning_rate": 8.514007678904291e-07, "loss": 0.0042, "step": 330140 }, { "epoch": 3.5274320209412897, "grad_norm": 0.009702717885375023, "learning_rate": 8.513888157965296e-07, "loss": 0.0169, "step": 330150 }, { "epoch": 3.527538864255569, "grad_norm": 0.005229763686656952, "learning_rate": 8.513768633058855e-07, "loss": 0.0074, "step": 330160 }, { "epoch": 3.5276457075698486, "grad_norm": 0.36088109016418457, "learning_rate": 8.513649104185103e-07, "loss": 0.0003, "step": 330170 }, { "epoch": 3.5277525508841285, "grad_norm": 0.004256776068359613, "learning_rate": 8.513529571344177e-07, "loss": 0.0241, "step": 330180 }, { "epoch": 3.527859394198408, "grad_norm": 0.0045082722790539265, "learning_rate": 8.513410034536211e-07, "loss": 0.0282, "step": 330190 }, { "epoch": 3.527966237512688, "grad_norm": 13.745039939880371, "learning_rate": 8.513290493761338e-07, "loss": 0.0103, "step": 330200 }, { "epoch": 3.5280730808269674, "grad_norm": 16.35679054260254, "learning_rate": 8.513170949019697e-07, "loss": 0.0221, "step": 330210 }, { "epoch": 3.528179924141247, "grad_norm": 0.007777760270982981, "learning_rate": 8.51305140031142e-07, "loss": 0.0014, "step": 330220 }, { "epoch": 3.5282867674555263, "grad_norm": 1.1013578176498413, "learning_rate": 8.512931847636644e-07, "loss": 0.0073, "step": 330230 }, { "epoch": 3.528393610769806, "grad_norm": 0.6078638434410095, "learning_rate": 8.512812290995501e-07, "loss": 0.0207, "step": 330240 }, { "epoch": 3.5285004540840856, "grad_norm": 0.014497486874461174, "learning_rate": 8.512692730388129e-07, "loss": 0.0018, "step": 330250 }, { "epoch": 3.5286072973983655, "grad_norm": 9.335460662841797, "learning_rate": 8.512573165814663e-07, "loss": 0.0108, "step": 330260 }, { "epoch": 3.528714140712645, "grad_norm": 2.3683836460113525, "learning_rate": 8.512453597275236e-07, "loss": 0.0083, "step": 330270 }, { "epoch": 3.5288209840269245, "grad_norm": 0.9687711000442505, "learning_rate": 8.512334024769982e-07, "loss": 0.0094, "step": 330280 }, { "epoch": 3.528927827341204, "grad_norm": 3.2718162536621094, "learning_rate": 8.512214448299039e-07, "loss": 0.0084, "step": 330290 }, { "epoch": 3.529034670655484, "grad_norm": 0.027685383334755898, "learning_rate": 8.51209486786254e-07, "loss": 0.0036, "step": 330300 }, { "epoch": 3.5291415139697633, "grad_norm": 3.8794338703155518, "learning_rate": 8.511975283460622e-07, "loss": 0.0168, "step": 330310 }, { "epoch": 3.529248357284043, "grad_norm": 0.4974842667579651, "learning_rate": 8.511855695093419e-07, "loss": 0.001, "step": 330320 }, { "epoch": 3.5293552005983226, "grad_norm": 0.02518913894891739, "learning_rate": 8.511736102761065e-07, "loss": 0.0114, "step": 330330 }, { "epoch": 3.529462043912602, "grad_norm": 0.0025673890486359596, "learning_rate": 8.511616506463697e-07, "loss": 0.0227, "step": 330340 }, { "epoch": 3.5295688872268816, "grad_norm": 0.30348601937294006, "learning_rate": 8.511496906201447e-07, "loss": 0.0098, "step": 330350 }, { "epoch": 3.5296757305411615, "grad_norm": 2.1368091106414795, "learning_rate": 8.511377301974453e-07, "loss": 0.0076, "step": 330360 }, { "epoch": 3.529782573855441, "grad_norm": 0.05253244563937187, "learning_rate": 8.51125769378285e-07, "loss": 0.0231, "step": 330370 }, { "epoch": 3.529889417169721, "grad_norm": 0.16580837965011597, "learning_rate": 8.511138081626771e-07, "loss": 0.0022, "step": 330380 }, { "epoch": 3.5299962604840003, "grad_norm": 6.941075801849365, "learning_rate": 8.511018465506352e-07, "loss": 0.0105, "step": 330390 }, { "epoch": 3.5301031037982797, "grad_norm": 1.156817078590393, "learning_rate": 8.510898845421728e-07, "loss": 0.0231, "step": 330400 }, { "epoch": 3.530209947112559, "grad_norm": 0.42727792263031006, "learning_rate": 8.510779221373035e-07, "loss": 0.0027, "step": 330410 }, { "epoch": 3.530316790426839, "grad_norm": 0.003675983054563403, "learning_rate": 8.510659593360406e-07, "loss": 0.0084, "step": 330420 }, { "epoch": 3.5304236337411186, "grad_norm": 0.01090105902403593, "learning_rate": 8.510539961383979e-07, "loss": 0.0201, "step": 330430 }, { "epoch": 3.5305304770553985, "grad_norm": 1.6167820692062378, "learning_rate": 8.510420325443888e-07, "loss": 0.0214, "step": 330440 }, { "epoch": 3.530637320369678, "grad_norm": 0.0032885540276765823, "learning_rate": 8.510300685540265e-07, "loss": 0.0155, "step": 330450 }, { "epoch": 3.5307441636839574, "grad_norm": 2.404116630554199, "learning_rate": 8.510181041673249e-07, "loss": 0.0051, "step": 330460 }, { "epoch": 3.530851006998237, "grad_norm": 0.00786036066710949, "learning_rate": 8.510061393842973e-07, "loss": 0.0305, "step": 330470 }, { "epoch": 3.5309578503125167, "grad_norm": 0.0207982137799263, "learning_rate": 8.509941742049573e-07, "loss": 0.0152, "step": 330480 }, { "epoch": 3.531064693626796, "grad_norm": 0.047405946999788284, "learning_rate": 8.509822086293185e-07, "loss": 0.0223, "step": 330490 }, { "epoch": 3.531171536941076, "grad_norm": 1.224799394607544, "learning_rate": 8.509702426573942e-07, "loss": 0.0054, "step": 330500 }, { "epoch": 3.5312783802553556, "grad_norm": 17.66049575805664, "learning_rate": 8.50958276289198e-07, "loss": 0.0374, "step": 330510 }, { "epoch": 3.531385223569635, "grad_norm": 0.009020095691084862, "learning_rate": 8.509463095247434e-07, "loss": 0.0018, "step": 330520 }, { "epoch": 3.5314920668839145, "grad_norm": 2.579285144805908, "learning_rate": 8.50934342364044e-07, "loss": 0.0069, "step": 330530 }, { "epoch": 3.5315989101981944, "grad_norm": 0.10709992051124573, "learning_rate": 8.509223748071133e-07, "loss": 0.0027, "step": 330540 }, { "epoch": 3.531705753512474, "grad_norm": 0.2399376928806305, "learning_rate": 8.509104068539646e-07, "loss": 0.0054, "step": 330550 }, { "epoch": 3.5318125968267537, "grad_norm": 0.10106994211673737, "learning_rate": 8.508984385046116e-07, "loss": 0.0236, "step": 330560 }, { "epoch": 3.531919440141033, "grad_norm": 7.735856533050537, "learning_rate": 8.508864697590679e-07, "loss": 0.0179, "step": 330570 }, { "epoch": 3.5320262834553127, "grad_norm": 1.5579661130905151, "learning_rate": 8.508745006173467e-07, "loss": 0.0075, "step": 330580 }, { "epoch": 3.532133126769592, "grad_norm": 3.654888391494751, "learning_rate": 8.50862531079462e-07, "loss": 0.0194, "step": 330590 }, { "epoch": 3.532239970083872, "grad_norm": 0.019938986748456955, "learning_rate": 8.508505611454267e-07, "loss": 0.0201, "step": 330600 }, { "epoch": 3.5323468133981515, "grad_norm": 0.0011857987847179174, "learning_rate": 8.508385908152548e-07, "loss": 0.0146, "step": 330610 }, { "epoch": 3.5324536567124314, "grad_norm": 0.28389886021614075, "learning_rate": 8.508266200889596e-07, "loss": 0.0093, "step": 330620 }, { "epoch": 3.532560500026711, "grad_norm": 0.016890259459614754, "learning_rate": 8.508146489665546e-07, "loss": 0.0068, "step": 330630 }, { "epoch": 3.5326673433409903, "grad_norm": 0.13849225640296936, "learning_rate": 8.508026774480535e-07, "loss": 0.0047, "step": 330640 }, { "epoch": 3.53277418665527, "grad_norm": 11.64388370513916, "learning_rate": 8.507907055334696e-07, "loss": 0.0098, "step": 330650 }, { "epoch": 3.5328810299695497, "grad_norm": 3.1972742080688477, "learning_rate": 8.507787332228167e-07, "loss": 0.0398, "step": 330660 }, { "epoch": 3.532987873283829, "grad_norm": 0.582668125629425, "learning_rate": 8.50766760516108e-07, "loss": 0.0005, "step": 330670 }, { "epoch": 3.533094716598109, "grad_norm": 0.008185753598809242, "learning_rate": 8.507547874133571e-07, "loss": 0.0113, "step": 330680 }, { "epoch": 3.5332015599123885, "grad_norm": 3.7636466026306152, "learning_rate": 8.507428139145777e-07, "loss": 0.0039, "step": 330690 }, { "epoch": 3.533308403226668, "grad_norm": 4.147399425506592, "learning_rate": 8.507308400197832e-07, "loss": 0.0037, "step": 330700 }, { "epoch": 3.533415246540948, "grad_norm": 0.93587726354599, "learning_rate": 8.507188657289869e-07, "loss": 0.0252, "step": 330710 }, { "epoch": 3.5335220898552273, "grad_norm": 10.284276962280273, "learning_rate": 8.507068910422029e-07, "loss": 0.0097, "step": 330720 }, { "epoch": 3.5336289331695068, "grad_norm": 0.014499143697321415, "learning_rate": 8.506949159594441e-07, "loss": 0.0087, "step": 330730 }, { "epoch": 3.5337357764837867, "grad_norm": 2.467242956161499, "learning_rate": 8.506829404807242e-07, "loss": 0.0166, "step": 330740 }, { "epoch": 3.533842619798066, "grad_norm": 8.609241485595703, "learning_rate": 8.506709646060569e-07, "loss": 0.0363, "step": 330750 }, { "epoch": 3.5339494631123456, "grad_norm": 2.1325321197509766, "learning_rate": 8.506589883354557e-07, "loss": 0.0168, "step": 330760 }, { "epoch": 3.5340563064266255, "grad_norm": 3.596226692199707, "learning_rate": 8.506470116689339e-07, "loss": 0.0197, "step": 330770 }, { "epoch": 3.534163149740905, "grad_norm": 0.016445856541395187, "learning_rate": 8.506350346065052e-07, "loss": 0.0122, "step": 330780 }, { "epoch": 3.5342699930551844, "grad_norm": 0.044999681413173676, "learning_rate": 8.506230571481832e-07, "loss": 0.0072, "step": 330790 }, { "epoch": 3.5343768363694643, "grad_norm": 0.005183040164411068, "learning_rate": 8.506110792939811e-07, "loss": 0.0082, "step": 330800 }, { "epoch": 3.5344836796837438, "grad_norm": 4.875414848327637, "learning_rate": 8.505991010439126e-07, "loss": 0.0178, "step": 330810 }, { "epoch": 3.5345905229980232, "grad_norm": 0.1604011505842209, "learning_rate": 8.505871223979915e-07, "loss": 0.0063, "step": 330820 }, { "epoch": 3.534697366312303, "grad_norm": 0.04502356797456741, "learning_rate": 8.505751433562309e-07, "loss": 0.0096, "step": 330830 }, { "epoch": 3.5348042096265826, "grad_norm": 1.68616783618927, "learning_rate": 8.505631639186446e-07, "loss": 0.0055, "step": 330840 }, { "epoch": 3.5349110529408625, "grad_norm": 0.6834351420402527, "learning_rate": 8.505511840852459e-07, "loss": 0.0164, "step": 330850 }, { "epoch": 3.535017896255142, "grad_norm": 0.0019653411582112312, "learning_rate": 8.505392038560486e-07, "loss": 0.0008, "step": 330860 }, { "epoch": 3.5351247395694214, "grad_norm": 0.023359348997473717, "learning_rate": 8.505272232310661e-07, "loss": 0.0005, "step": 330870 }, { "epoch": 3.535231582883701, "grad_norm": 1.0223684310913086, "learning_rate": 8.505152422103118e-07, "loss": 0.0043, "step": 330880 }, { "epoch": 3.5353384261979808, "grad_norm": 1.4914031028747559, "learning_rate": 8.505032607937994e-07, "loss": 0.0015, "step": 330890 }, { "epoch": 3.5354452695122602, "grad_norm": 0.004794260952621698, "learning_rate": 8.504912789815424e-07, "loss": 0.0035, "step": 330900 }, { "epoch": 3.53555211282654, "grad_norm": 0.13851799070835114, "learning_rate": 8.504792967735541e-07, "loss": 0.0089, "step": 330910 }, { "epoch": 3.5356589561408196, "grad_norm": 0.0013597466750070453, "learning_rate": 8.504673141698484e-07, "loss": 0.0043, "step": 330920 }, { "epoch": 3.535765799455099, "grad_norm": 0.003613883862271905, "learning_rate": 8.504553311704387e-07, "loss": 0.0088, "step": 330930 }, { "epoch": 3.5358726427693785, "grad_norm": 2.7118287086486816, "learning_rate": 8.504433477753383e-07, "loss": 0.0049, "step": 330940 }, { "epoch": 3.5359794860836584, "grad_norm": 0.0034193098545074463, "learning_rate": 8.50431363984561e-07, "loss": 0.001, "step": 330950 }, { "epoch": 3.536086329397938, "grad_norm": 0.001091785030439496, "learning_rate": 8.504193797981203e-07, "loss": 0.0082, "step": 330960 }, { "epoch": 3.536193172712218, "grad_norm": 0.002905569737777114, "learning_rate": 8.504073952160296e-07, "loss": 0.0012, "step": 330970 }, { "epoch": 3.5363000160264972, "grad_norm": 0.048887163400650024, "learning_rate": 8.503954102383026e-07, "loss": 0.007, "step": 330980 }, { "epoch": 3.5364068593407767, "grad_norm": 0.0007617601659148932, "learning_rate": 8.503834248649526e-07, "loss": 0.0009, "step": 330990 }, { "epoch": 3.536513702655056, "grad_norm": 0.7482448816299438, "learning_rate": 8.503714390959933e-07, "loss": 0.0212, "step": 331000 }, { "epoch": 3.536620545969336, "grad_norm": 0.5057507753372192, "learning_rate": 8.503594529314382e-07, "loss": 0.0219, "step": 331010 }, { "epoch": 3.5367273892836155, "grad_norm": 0.8415505290031433, "learning_rate": 8.503474663713009e-07, "loss": 0.0211, "step": 331020 }, { "epoch": 3.5368342325978954, "grad_norm": 0.01008906401693821, "learning_rate": 8.503354794155948e-07, "loss": 0.0052, "step": 331030 }, { "epoch": 3.536941075912175, "grad_norm": 1.044079065322876, "learning_rate": 8.503234920643337e-07, "loss": 0.0049, "step": 331040 }, { "epoch": 3.5370479192264543, "grad_norm": 8.326674461364746, "learning_rate": 8.503115043175306e-07, "loss": 0.0233, "step": 331050 }, { "epoch": 3.537154762540734, "grad_norm": 0.034724973142147064, "learning_rate": 8.502995161751995e-07, "loss": 0.0044, "step": 331060 }, { "epoch": 3.5372616058550137, "grad_norm": 10.34634017944336, "learning_rate": 8.50287527637354e-07, "loss": 0.0159, "step": 331070 }, { "epoch": 3.537368449169293, "grad_norm": 6.221568584442139, "learning_rate": 8.502755387040072e-07, "loss": 0.0113, "step": 331080 }, { "epoch": 3.537475292483573, "grad_norm": 0.008751089684665203, "learning_rate": 8.502635493751731e-07, "loss": 0.0051, "step": 331090 }, { "epoch": 3.5375821357978525, "grad_norm": 0.039438396692276, "learning_rate": 8.502515596508649e-07, "loss": 0.03, "step": 331100 }, { "epoch": 3.537688979112132, "grad_norm": 1.5783467292785645, "learning_rate": 8.502395695310962e-07, "loss": 0.0108, "step": 331110 }, { "epoch": 3.5377958224264114, "grad_norm": 0.014615191146731377, "learning_rate": 8.502275790158806e-07, "loss": 0.0264, "step": 331120 }, { "epoch": 3.5379026657406913, "grad_norm": 0.012029525823891163, "learning_rate": 8.502155881052316e-07, "loss": 0.0051, "step": 331130 }, { "epoch": 3.538009509054971, "grad_norm": 5.5655317306518555, "learning_rate": 8.502035967991628e-07, "loss": 0.0091, "step": 331140 }, { "epoch": 3.5381163523692507, "grad_norm": 0.006324101239442825, "learning_rate": 8.501916050976879e-07, "loss": 0.0077, "step": 331150 }, { "epoch": 3.53822319568353, "grad_norm": 0.0742395669221878, "learning_rate": 8.501796130008201e-07, "loss": 0.0059, "step": 331160 }, { "epoch": 3.5383300389978096, "grad_norm": 0.058743588626384735, "learning_rate": 8.501676205085731e-07, "loss": 0.0153, "step": 331170 }, { "epoch": 3.538436882312089, "grad_norm": 0.04248247295618057, "learning_rate": 8.501556276209603e-07, "loss": 0.0332, "step": 331180 }, { "epoch": 3.538543725626369, "grad_norm": 0.24473455548286438, "learning_rate": 8.501436343379955e-07, "loss": 0.0108, "step": 331190 }, { "epoch": 3.5386505689406484, "grad_norm": 0.00292946957051754, "learning_rate": 8.501316406596922e-07, "loss": 0.008, "step": 331200 }, { "epoch": 3.5387574122549283, "grad_norm": 18.906185150146484, "learning_rate": 8.501196465860636e-07, "loss": 0.0229, "step": 331210 }, { "epoch": 3.538864255569208, "grad_norm": 3.2917189598083496, "learning_rate": 8.501076521171236e-07, "loss": 0.0031, "step": 331220 }, { "epoch": 3.5389710988834873, "grad_norm": 1.4939069747924805, "learning_rate": 8.500956572528856e-07, "loss": 0.034, "step": 331230 }, { "epoch": 3.5390779421977667, "grad_norm": 0.018865926191210747, "learning_rate": 8.500836619933633e-07, "loss": 0.0776, "step": 331240 }, { "epoch": 3.5391847855120466, "grad_norm": 0.043457210063934326, "learning_rate": 8.500716663385701e-07, "loss": 0.0041, "step": 331250 }, { "epoch": 3.539291628826326, "grad_norm": 0.07390117645263672, "learning_rate": 8.500596702885197e-07, "loss": 0.0052, "step": 331260 }, { "epoch": 3.539398472140606, "grad_norm": 0.5221887826919556, "learning_rate": 8.500476738432253e-07, "loss": 0.0033, "step": 331270 }, { "epoch": 3.5395053154548854, "grad_norm": 0.010637866333127022, "learning_rate": 8.500356770027006e-07, "loss": 0.031, "step": 331280 }, { "epoch": 3.539612158769165, "grad_norm": 0.015045066364109516, "learning_rate": 8.500236797669594e-07, "loss": 0.0118, "step": 331290 }, { "epoch": 3.5397190020834444, "grad_norm": 0.3439667820930481, "learning_rate": 8.50011682136015e-07, "loss": 0.0053, "step": 331300 }, { "epoch": 3.5398258453977243, "grad_norm": 7.858676433563232, "learning_rate": 8.499996841098809e-07, "loss": 0.0149, "step": 331310 }, { "epoch": 3.5399326887120037, "grad_norm": 18.976842880249023, "learning_rate": 8.499876856885708e-07, "loss": 0.0568, "step": 331320 }, { "epoch": 3.5400395320262836, "grad_norm": 0.10610563308000565, "learning_rate": 8.499756868720981e-07, "loss": 0.053, "step": 331330 }, { "epoch": 3.540146375340563, "grad_norm": 4.262912273406982, "learning_rate": 8.499636876604766e-07, "loss": 0.0377, "step": 331340 }, { "epoch": 3.5402532186548425, "grad_norm": 1.3321329355239868, "learning_rate": 8.499516880537197e-07, "loss": 0.0094, "step": 331350 }, { "epoch": 3.540360061969122, "grad_norm": 12.688474655151367, "learning_rate": 8.499396880518408e-07, "loss": 0.0401, "step": 331360 }, { "epoch": 3.540466905283402, "grad_norm": 0.2995622456073761, "learning_rate": 8.499276876548535e-07, "loss": 0.0197, "step": 331370 }, { "epoch": 3.5405737485976814, "grad_norm": 0.004371998831629753, "learning_rate": 8.499156868627717e-07, "loss": 0.0132, "step": 331380 }, { "epoch": 3.5406805919119613, "grad_norm": 0.1683346927165985, "learning_rate": 8.499036856756084e-07, "loss": 0.0045, "step": 331390 }, { "epoch": 3.5407874352262407, "grad_norm": 0.21414968371391296, "learning_rate": 8.498916840933777e-07, "loss": 0.0091, "step": 331400 }, { "epoch": 3.54089427854052, "grad_norm": 5.870412349700928, "learning_rate": 8.498796821160927e-07, "loss": 0.0413, "step": 331410 }, { "epoch": 3.5410011218548, "grad_norm": 0.8884931802749634, "learning_rate": 8.498676797437672e-07, "loss": 0.0238, "step": 331420 }, { "epoch": 3.5411079651690796, "grad_norm": 2.668534517288208, "learning_rate": 8.498556769764147e-07, "loss": 0.0269, "step": 331430 }, { "epoch": 3.541214808483359, "grad_norm": 1.3342230319976807, "learning_rate": 8.498436738140487e-07, "loss": 0.0175, "step": 331440 }, { "epoch": 3.541321651797639, "grad_norm": 7.043084621429443, "learning_rate": 8.498316702566826e-07, "loss": 0.0222, "step": 331450 }, { "epoch": 3.5414284951119184, "grad_norm": 0.18460427224636078, "learning_rate": 8.498196663043304e-07, "loss": 0.0047, "step": 331460 }, { "epoch": 3.541535338426198, "grad_norm": 4.2460408210754395, "learning_rate": 8.498076619570054e-07, "loss": 0.0038, "step": 331470 }, { "epoch": 3.5416421817404777, "grad_norm": 4.299131393432617, "learning_rate": 8.497956572147209e-07, "loss": 0.0228, "step": 331480 }, { "epoch": 3.541749025054757, "grad_norm": 0.011191298253834248, "learning_rate": 8.49783652077491e-07, "loss": 0.0038, "step": 331490 }, { "epoch": 3.5418558683690367, "grad_norm": 1.7086668014526367, "learning_rate": 8.497716465453289e-07, "loss": 0.004, "step": 331500 }, { "epoch": 3.5419627116833166, "grad_norm": 1.8922555446624756, "learning_rate": 8.49759640618248e-07, "loss": 0.0031, "step": 331510 }, { "epoch": 3.542069554997596, "grad_norm": 0.000566008617170155, "learning_rate": 8.497476342962622e-07, "loss": 0.0456, "step": 331520 }, { "epoch": 3.5421763983118755, "grad_norm": 8.109060287475586, "learning_rate": 8.49735627579385e-07, "loss": 0.0103, "step": 331530 }, { "epoch": 3.5422832416261554, "grad_norm": 6.1684794425964355, "learning_rate": 8.497236204676296e-07, "loss": 0.0155, "step": 331540 }, { "epoch": 3.542390084940435, "grad_norm": 0.006556957960128784, "learning_rate": 8.497116129610102e-07, "loss": 0.0083, "step": 331550 }, { "epoch": 3.5424969282547147, "grad_norm": 0.5768734812736511, "learning_rate": 8.496996050595399e-07, "loss": 0.0052, "step": 331560 }, { "epoch": 3.542603771568994, "grad_norm": 0.0012904007453471422, "learning_rate": 8.496875967632322e-07, "loss": 0.0188, "step": 331570 }, { "epoch": 3.5427106148832737, "grad_norm": 0.027359597384929657, "learning_rate": 8.49675588072101e-07, "loss": 0.0036, "step": 331580 }, { "epoch": 3.542817458197553, "grad_norm": 1.740955114364624, "learning_rate": 8.496635789861595e-07, "loss": 0.0153, "step": 331590 }, { "epoch": 3.542924301511833, "grad_norm": 0.16121360659599304, "learning_rate": 8.496515695054216e-07, "loss": 0.0024, "step": 331600 }, { "epoch": 3.5430311448261125, "grad_norm": 0.013562418520450592, "learning_rate": 8.496395596299006e-07, "loss": 0.0126, "step": 331610 }, { "epoch": 3.5431379881403924, "grad_norm": 0.06023189425468445, "learning_rate": 8.496275493596101e-07, "loss": 0.0082, "step": 331620 }, { "epoch": 3.543244831454672, "grad_norm": 3.1762688159942627, "learning_rate": 8.496155386945639e-07, "loss": 0.0139, "step": 331630 }, { "epoch": 3.5433516747689513, "grad_norm": 0.00993406679481268, "learning_rate": 8.49603527634775e-07, "loss": 0.0043, "step": 331640 }, { "epoch": 3.5434585180832308, "grad_norm": 10.415501594543457, "learning_rate": 8.495915161802576e-07, "loss": 0.0119, "step": 331650 }, { "epoch": 3.5435653613975107, "grad_norm": 5.415933132171631, "learning_rate": 8.49579504331025e-07, "loss": 0.0417, "step": 331660 }, { "epoch": 3.54367220471179, "grad_norm": 0.5741822123527527, "learning_rate": 8.495674920870908e-07, "loss": 0.0333, "step": 331670 }, { "epoch": 3.54377904802607, "grad_norm": 1.9675862789154053, "learning_rate": 8.495554794484683e-07, "loss": 0.0433, "step": 331680 }, { "epoch": 3.5438858913403495, "grad_norm": 0.07234024256467819, "learning_rate": 8.495434664151715e-07, "loss": 0.0075, "step": 331690 }, { "epoch": 3.543992734654629, "grad_norm": 0.001089008990675211, "learning_rate": 8.495314529872137e-07, "loss": 0.0131, "step": 331700 }, { "epoch": 3.5440995779689084, "grad_norm": 2.3912012577056885, "learning_rate": 8.495194391646083e-07, "loss": 0.033, "step": 331710 }, { "epoch": 3.5442064212831883, "grad_norm": 6.024006366729736, "learning_rate": 8.495074249473693e-07, "loss": 0.0092, "step": 331720 }, { "epoch": 3.5443132645974678, "grad_norm": 0.5475868582725525, "learning_rate": 8.494954103355099e-07, "loss": 0.0072, "step": 331730 }, { "epoch": 3.5444201079117477, "grad_norm": 0.005646595265716314, "learning_rate": 8.494833953290439e-07, "loss": 0.0132, "step": 331740 }, { "epoch": 3.544526951226027, "grad_norm": 0.085379958152771, "learning_rate": 8.494713799279846e-07, "loss": 0.0027, "step": 331750 }, { "epoch": 3.5446337945403066, "grad_norm": 0.9565885663032532, "learning_rate": 8.49459364132346e-07, "loss": 0.0642, "step": 331760 }, { "epoch": 3.544740637854586, "grad_norm": 4.963415622711182, "learning_rate": 8.494473479421414e-07, "loss": 0.0247, "step": 331770 }, { "epoch": 3.544847481168866, "grad_norm": 3.1857047080993652, "learning_rate": 8.494353313573841e-07, "loss": 0.0067, "step": 331780 }, { "epoch": 3.5449543244831454, "grad_norm": 12.629142761230469, "learning_rate": 8.494233143780881e-07, "loss": 0.0441, "step": 331790 }, { "epoch": 3.5450611677974253, "grad_norm": 0.21074479818344116, "learning_rate": 8.494112970042668e-07, "loss": 0.0174, "step": 331800 }, { "epoch": 3.5451680111117048, "grad_norm": 2.8629038333892822, "learning_rate": 8.493992792359339e-07, "loss": 0.0141, "step": 331810 }, { "epoch": 3.5452748544259842, "grad_norm": 0.04061363264918327, "learning_rate": 8.493872610731026e-07, "loss": 0.0077, "step": 331820 }, { "epoch": 3.5453816977402637, "grad_norm": 2.2676384449005127, "learning_rate": 8.49375242515787e-07, "loss": 0.0068, "step": 331830 }, { "epoch": 3.5454885410545436, "grad_norm": 0.09594164788722992, "learning_rate": 8.493632235640001e-07, "loss": 0.0058, "step": 331840 }, { "epoch": 3.545595384368823, "grad_norm": 0.006128788460046053, "learning_rate": 8.493512042177559e-07, "loss": 0.0153, "step": 331850 }, { "epoch": 3.545702227683103, "grad_norm": 0.4087032377719879, "learning_rate": 8.493391844770677e-07, "loss": 0.0213, "step": 331860 }, { "epoch": 3.5458090709973824, "grad_norm": 11.330229759216309, "learning_rate": 8.493271643419493e-07, "loss": 0.0032, "step": 331870 }, { "epoch": 3.545915914311662, "grad_norm": 0.11339451372623444, "learning_rate": 8.493151438124142e-07, "loss": 0.0096, "step": 331880 }, { "epoch": 3.5460227576259413, "grad_norm": 0.08382432162761688, "learning_rate": 8.493031228884759e-07, "loss": 0.0334, "step": 331890 }, { "epoch": 3.5461296009402212, "grad_norm": 0.39728015661239624, "learning_rate": 8.49291101570148e-07, "loss": 0.0033, "step": 331900 }, { "epoch": 3.5462364442545007, "grad_norm": 0.026240352541208267, "learning_rate": 8.49279079857444e-07, "loss": 0.0236, "step": 331910 }, { "epoch": 3.5463432875687806, "grad_norm": 0.0015895816031843424, "learning_rate": 8.492670577503778e-07, "loss": 0.0362, "step": 331920 }, { "epoch": 3.54645013088306, "grad_norm": 12.085901260375977, "learning_rate": 8.492550352489625e-07, "loss": 0.0337, "step": 331930 }, { "epoch": 3.5465569741973395, "grad_norm": 8.203062057495117, "learning_rate": 8.492430123532119e-07, "loss": 0.014, "step": 331940 }, { "epoch": 3.546663817511619, "grad_norm": 3.712150812149048, "learning_rate": 8.492309890631398e-07, "loss": 0.0118, "step": 331950 }, { "epoch": 3.546770660825899, "grad_norm": 0.04152575880289078, "learning_rate": 8.492189653787594e-07, "loss": 0.0107, "step": 331960 }, { "epoch": 3.5468775041401783, "grad_norm": 1.4014192819595337, "learning_rate": 8.492069413000844e-07, "loss": 0.007, "step": 331970 }, { "epoch": 3.5469843474544582, "grad_norm": 0.05299722030758858, "learning_rate": 8.491949168271285e-07, "loss": 0.0341, "step": 331980 }, { "epoch": 3.5470911907687377, "grad_norm": 0.008449537679553032, "learning_rate": 8.491828919599051e-07, "loss": 0.0103, "step": 331990 }, { "epoch": 3.547198034083017, "grad_norm": 2.9754292964935303, "learning_rate": 8.491708666984278e-07, "loss": 0.014, "step": 332000 }, { "epoch": 3.5473048773972966, "grad_norm": 1.7542493343353271, "learning_rate": 8.491588410427104e-07, "loss": 0.0426, "step": 332010 }, { "epoch": 3.5474117207115765, "grad_norm": 4.489734172821045, "learning_rate": 8.491468149927663e-07, "loss": 0.01, "step": 332020 }, { "epoch": 3.547518564025856, "grad_norm": 0.013173720799386501, "learning_rate": 8.491347885486091e-07, "loss": 0.0433, "step": 332030 }, { "epoch": 3.547625407340136, "grad_norm": 0.22128833830356598, "learning_rate": 8.491227617102522e-07, "loss": 0.0023, "step": 332040 }, { "epoch": 3.5477322506544153, "grad_norm": 3.74874210357666, "learning_rate": 8.491107344777096e-07, "loss": 0.0093, "step": 332050 }, { "epoch": 3.547839093968695, "grad_norm": 0.7016459703445435, "learning_rate": 8.490987068509945e-07, "loss": 0.0017, "step": 332060 }, { "epoch": 3.5479459372829742, "grad_norm": 0.7421004176139832, "learning_rate": 8.490866788301206e-07, "loss": 0.0214, "step": 332070 }, { "epoch": 3.548052780597254, "grad_norm": 0.8443055152893066, "learning_rate": 8.490746504151014e-07, "loss": 0.036, "step": 332080 }, { "epoch": 3.5481596239115336, "grad_norm": 0.002963122446089983, "learning_rate": 8.490626216059506e-07, "loss": 0.0128, "step": 332090 }, { "epoch": 3.5482664672258135, "grad_norm": 1.4254204034805298, "learning_rate": 8.490505924026821e-07, "loss": 0.009, "step": 332100 }, { "epoch": 3.548373310540093, "grad_norm": 0.004284121561795473, "learning_rate": 8.490385628053087e-07, "loss": 0.0126, "step": 332110 }, { "epoch": 3.5484801538543724, "grad_norm": 0.02082071639597416, "learning_rate": 8.490265328138447e-07, "loss": 0.0111, "step": 332120 }, { "epoch": 3.5485869971686523, "grad_norm": 0.669051468372345, "learning_rate": 8.490145024283033e-07, "loss": 0.0328, "step": 332130 }, { "epoch": 3.548693840482932, "grad_norm": 5.818005561828613, "learning_rate": 8.490024716486981e-07, "loss": 0.0159, "step": 332140 }, { "epoch": 3.5488006837972113, "grad_norm": 0.0034606563858687878, "learning_rate": 8.489904404750429e-07, "loss": 0.0056, "step": 332150 }, { "epoch": 3.548907527111491, "grad_norm": 2.0375118255615234, "learning_rate": 8.489784089073511e-07, "loss": 0.0145, "step": 332160 }, { "epoch": 3.5490143704257706, "grad_norm": 0.10147678107023239, "learning_rate": 8.489663769456363e-07, "loss": 0.0239, "step": 332170 }, { "epoch": 3.54912121374005, "grad_norm": 0.8205528259277344, "learning_rate": 8.489543445899122e-07, "loss": 0.0149, "step": 332180 }, { "epoch": 3.54922805705433, "grad_norm": 0.2749914824962616, "learning_rate": 8.489423118401923e-07, "loss": 0.0101, "step": 332190 }, { "epoch": 3.5493349003686094, "grad_norm": 0.006222825031727552, "learning_rate": 8.489302786964902e-07, "loss": 0.0071, "step": 332200 }, { "epoch": 3.549441743682889, "grad_norm": 0.060899533331394196, "learning_rate": 8.489182451588194e-07, "loss": 0.0263, "step": 332210 }, { "epoch": 3.549548586997169, "grad_norm": 0.7065132260322571, "learning_rate": 8.489062112271936e-07, "loss": 0.0347, "step": 332220 }, { "epoch": 3.5496554303114483, "grad_norm": 0.003445771522819996, "learning_rate": 8.488941769016263e-07, "loss": 0.0053, "step": 332230 }, { "epoch": 3.5497622736257277, "grad_norm": 0.13979405164718628, "learning_rate": 8.488821421821313e-07, "loss": 0.0124, "step": 332240 }, { "epoch": 3.5498691169400076, "grad_norm": 0.002808633027598262, "learning_rate": 8.488701070687218e-07, "loss": 0.0032, "step": 332250 }, { "epoch": 3.549975960254287, "grad_norm": 0.012697666883468628, "learning_rate": 8.488580715614118e-07, "loss": 0.0063, "step": 332260 }, { "epoch": 3.5500828035685665, "grad_norm": 0.019749628379940987, "learning_rate": 8.488460356602147e-07, "loss": 0.0021, "step": 332270 }, { "epoch": 3.5501896468828464, "grad_norm": 0.004476035013794899, "learning_rate": 8.488339993651441e-07, "loss": 0.0636, "step": 332280 }, { "epoch": 3.550296490197126, "grad_norm": 6.33896541595459, "learning_rate": 8.488219626762137e-07, "loss": 0.0071, "step": 332290 }, { "epoch": 3.5504033335114054, "grad_norm": 3.142488956451416, "learning_rate": 8.488099255934367e-07, "loss": 0.0165, "step": 332300 }, { "epoch": 3.5505101768256853, "grad_norm": 13.480342864990234, "learning_rate": 8.487978881168272e-07, "loss": 0.0149, "step": 332310 }, { "epoch": 3.5506170201399647, "grad_norm": 1.5538805723190308, "learning_rate": 8.487858502463983e-07, "loss": 0.0338, "step": 332320 }, { "epoch": 3.5507238634542446, "grad_norm": 4.807985305786133, "learning_rate": 8.487738119821641e-07, "loss": 0.0247, "step": 332330 }, { "epoch": 3.550830706768524, "grad_norm": 3.4775314331054688, "learning_rate": 8.487617733241379e-07, "loss": 0.0066, "step": 332340 }, { "epoch": 3.5509375500828035, "grad_norm": 0.5429568886756897, "learning_rate": 8.487497342723333e-07, "loss": 0.0919, "step": 332350 }, { "epoch": 3.551044393397083, "grad_norm": 1.3514724969863892, "learning_rate": 8.487376948267638e-07, "loss": 0.0099, "step": 332360 }, { "epoch": 3.551151236711363, "grad_norm": 0.0015256265178322792, "learning_rate": 8.487256549874433e-07, "loss": 0.0062, "step": 332370 }, { "epoch": 3.5512580800256424, "grad_norm": 2.828150987625122, "learning_rate": 8.487136147543852e-07, "loss": 0.0151, "step": 332380 }, { "epoch": 3.5513649233399223, "grad_norm": 0.005401941016316414, "learning_rate": 8.48701574127603e-07, "loss": 0.0048, "step": 332390 }, { "epoch": 3.5514717666542017, "grad_norm": 3.2433969974517822, "learning_rate": 8.486895331071105e-07, "loss": 0.0166, "step": 332400 }, { "epoch": 3.551578609968481, "grad_norm": 0.4161908030509949, "learning_rate": 8.486774916929211e-07, "loss": 0.011, "step": 332410 }, { "epoch": 3.5516854532827606, "grad_norm": 0.0520319789648056, "learning_rate": 8.486654498850485e-07, "loss": 0.0535, "step": 332420 }, { "epoch": 3.5517922965970405, "grad_norm": 2.5277419090270996, "learning_rate": 8.486534076835065e-07, "loss": 0.0429, "step": 332430 }, { "epoch": 3.55189913991132, "grad_norm": 0.5964410305023193, "learning_rate": 8.486413650883084e-07, "loss": 0.0073, "step": 332440 }, { "epoch": 3.5520059832256, "grad_norm": 4.318260192871094, "learning_rate": 8.486293220994678e-07, "loss": 0.012, "step": 332450 }, { "epoch": 3.5521128265398794, "grad_norm": 0.0056475019082427025, "learning_rate": 8.486172787169984e-07, "loss": 0.0113, "step": 332460 }, { "epoch": 3.552219669854159, "grad_norm": 0.4813576638698578, "learning_rate": 8.486052349409139e-07, "loss": 0.0209, "step": 332470 }, { "epoch": 3.5523265131684383, "grad_norm": 3.406451463699341, "learning_rate": 8.485931907712276e-07, "loss": 0.0029, "step": 332480 }, { "epoch": 3.552433356482718, "grad_norm": 0.3437250256538391, "learning_rate": 8.485811462079533e-07, "loss": 0.0123, "step": 332490 }, { "epoch": 3.5525401997969976, "grad_norm": 1.382267951965332, "learning_rate": 8.485691012511047e-07, "loss": 0.0163, "step": 332500 }, { "epoch": 3.5526470431112775, "grad_norm": 2.7214317321777344, "learning_rate": 8.485570559006951e-07, "loss": 0.0069, "step": 332510 }, { "epoch": 3.552753886425557, "grad_norm": 1.97209632396698, "learning_rate": 8.485450101567384e-07, "loss": 0.0097, "step": 332520 }, { "epoch": 3.5528607297398365, "grad_norm": 0.03350052982568741, "learning_rate": 8.485329640192481e-07, "loss": 0.0103, "step": 332530 }, { "epoch": 3.552967573054116, "grad_norm": 2.037060260772705, "learning_rate": 8.485209174882377e-07, "loss": 0.0357, "step": 332540 }, { "epoch": 3.553074416368396, "grad_norm": 0.042647238820791245, "learning_rate": 8.485088705637209e-07, "loss": 0.0077, "step": 332550 }, { "epoch": 3.5531812596826753, "grad_norm": 0.0025900949258357286, "learning_rate": 8.484968232457114e-07, "loss": 0.0115, "step": 332560 }, { "epoch": 3.553288102996955, "grad_norm": 0.02096436731517315, "learning_rate": 8.484847755342226e-07, "loss": 0.0108, "step": 332570 }, { "epoch": 3.5533949463112346, "grad_norm": 0.0059823845513165, "learning_rate": 8.484727274292681e-07, "loss": 0.0333, "step": 332580 }, { "epoch": 3.553501789625514, "grad_norm": 0.013747874647378922, "learning_rate": 8.484606789308617e-07, "loss": 0.0106, "step": 332590 }, { "epoch": 3.5536086329397936, "grad_norm": 2.0124993324279785, "learning_rate": 8.484486300390168e-07, "loss": 0.0118, "step": 332600 }, { "epoch": 3.5537154762540735, "grad_norm": 2.05600643157959, "learning_rate": 8.484365807537472e-07, "loss": 0.0173, "step": 332610 }, { "epoch": 3.553822319568353, "grad_norm": 0.07946156710386276, "learning_rate": 8.484245310750664e-07, "loss": 0.0299, "step": 332620 }, { "epoch": 3.553929162882633, "grad_norm": 0.11666931957006454, "learning_rate": 8.48412481002988e-07, "loss": 0.0153, "step": 332630 }, { "epoch": 3.5540360061969123, "grad_norm": 0.5921986699104309, "learning_rate": 8.484004305375256e-07, "loss": 0.0002, "step": 332640 }, { "epoch": 3.5541428495111917, "grad_norm": 3.6904540061950684, "learning_rate": 8.483883796786927e-07, "loss": 0.0115, "step": 332650 }, { "epoch": 3.554249692825471, "grad_norm": 2.5173375606536865, "learning_rate": 8.483763284265032e-07, "loss": 0.0203, "step": 332660 }, { "epoch": 3.554356536139751, "grad_norm": 1.7999473810195923, "learning_rate": 8.483642767809704e-07, "loss": 0.0146, "step": 332670 }, { "epoch": 3.5544633794540306, "grad_norm": 3.1672489643096924, "learning_rate": 8.483522247421081e-07, "loss": 0.0347, "step": 332680 }, { "epoch": 3.5545702227683105, "grad_norm": 2.863774538040161, "learning_rate": 8.483401723099298e-07, "loss": 0.0327, "step": 332690 }, { "epoch": 3.55467706608259, "grad_norm": 0.5241659283638, "learning_rate": 8.483281194844493e-07, "loss": 0.0091, "step": 332700 }, { "epoch": 3.5547839093968694, "grad_norm": 2.5157744884490967, "learning_rate": 8.483160662656799e-07, "loss": 0.0183, "step": 332710 }, { "epoch": 3.554890752711149, "grad_norm": 0.31053999066352844, "learning_rate": 8.483040126536354e-07, "loss": 0.0093, "step": 332720 }, { "epoch": 3.5549975960254288, "grad_norm": 0.26613104343414307, "learning_rate": 8.482919586483294e-07, "loss": 0.0031, "step": 332730 }, { "epoch": 3.555104439339708, "grad_norm": 0.005079508759081364, "learning_rate": 8.482799042497756e-07, "loss": 0.0088, "step": 332740 }, { "epoch": 3.555211282653988, "grad_norm": 2.752427577972412, "learning_rate": 8.482678494579874e-07, "loss": 0.0424, "step": 332750 }, { "epoch": 3.5553181259682676, "grad_norm": 0.10220225900411606, "learning_rate": 8.482557942729784e-07, "loss": 0.0391, "step": 332760 }, { "epoch": 3.555424969282547, "grad_norm": 2.170772075653076, "learning_rate": 8.482437386947625e-07, "loss": 0.0022, "step": 332770 }, { "epoch": 3.5555318125968265, "grad_norm": 0.0028066961094737053, "learning_rate": 8.482316827233531e-07, "loss": 0.0037, "step": 332780 }, { "epoch": 3.5556386559111064, "grad_norm": 0.037070468068122864, "learning_rate": 8.482196263587639e-07, "loss": 0.0118, "step": 332790 }, { "epoch": 3.555745499225386, "grad_norm": 6.438416481018066, "learning_rate": 8.482075696010083e-07, "loss": 0.0116, "step": 332800 }, { "epoch": 3.5558523425396658, "grad_norm": 0.004582736175507307, "learning_rate": 8.481955124501003e-07, "loss": 0.0479, "step": 332810 }, { "epoch": 3.555959185853945, "grad_norm": 0.12815098464488983, "learning_rate": 8.481834549060531e-07, "loss": 0.0295, "step": 332820 }, { "epoch": 3.5560660291682247, "grad_norm": 1.9563263654708862, "learning_rate": 8.481713969688806e-07, "loss": 0.0063, "step": 332830 }, { "epoch": 3.556172872482504, "grad_norm": 0.008930115960538387, "learning_rate": 8.481593386385962e-07, "loss": 0.0005, "step": 332840 }, { "epoch": 3.556279715796784, "grad_norm": 2.433293342590332, "learning_rate": 8.481472799152138e-07, "loss": 0.0127, "step": 332850 }, { "epoch": 3.5563865591110635, "grad_norm": 0.002613270189613104, "learning_rate": 8.481352207987467e-07, "loss": 0.0263, "step": 332860 }, { "epoch": 3.5564934024253434, "grad_norm": 0.0549708753824234, "learning_rate": 8.481231612892087e-07, "loss": 0.0046, "step": 332870 }, { "epoch": 3.556600245739623, "grad_norm": 0.005930247250944376, "learning_rate": 8.481111013866134e-07, "loss": 0.0018, "step": 332880 }, { "epoch": 3.5567070890539023, "grad_norm": 0.7246761918067932, "learning_rate": 8.480990410909743e-07, "loss": 0.0098, "step": 332890 }, { "epoch": 3.556813932368182, "grad_norm": 0.22674989700317383, "learning_rate": 8.480869804023052e-07, "loss": 0.0094, "step": 332900 }, { "epoch": 3.5569207756824617, "grad_norm": 0.0018001951975747943, "learning_rate": 8.480749193206196e-07, "loss": 0.0065, "step": 332910 }, { "epoch": 3.557027618996741, "grad_norm": 2.7158315181732178, "learning_rate": 8.480628578459312e-07, "loss": 0.026, "step": 332920 }, { "epoch": 3.557134462311021, "grad_norm": 1.0103827714920044, "learning_rate": 8.480507959782536e-07, "loss": 0.0334, "step": 332930 }, { "epoch": 3.5572413056253005, "grad_norm": 1.045918583869934, "learning_rate": 8.480387337176003e-07, "loss": 0.0118, "step": 332940 }, { "epoch": 3.55734814893958, "grad_norm": 3.8664772510528564, "learning_rate": 8.480266710639849e-07, "loss": 0.0175, "step": 332950 }, { "epoch": 3.55745499225386, "grad_norm": 0.05976349860429764, "learning_rate": 8.480146080174213e-07, "loss": 0.0012, "step": 332960 }, { "epoch": 3.5575618355681393, "grad_norm": 1.1748706102371216, "learning_rate": 8.480025445779228e-07, "loss": 0.0256, "step": 332970 }, { "epoch": 3.5576686788824188, "grad_norm": 0.17880725860595703, "learning_rate": 8.479904807455034e-07, "loss": 0.0137, "step": 332980 }, { "epoch": 3.5577755221966987, "grad_norm": 0.003911373671144247, "learning_rate": 8.479784165201762e-07, "loss": 0.0005, "step": 332990 }, { "epoch": 3.557882365510978, "grad_norm": 0.4289751946926117, "learning_rate": 8.479663519019553e-07, "loss": 0.0047, "step": 333000 }, { "epoch": 3.5579892088252576, "grad_norm": 1.9463084936141968, "learning_rate": 8.47954286890854e-07, "loss": 0.0232, "step": 333010 }, { "epoch": 3.5580960521395375, "grad_norm": 0.0072296797297894955, "learning_rate": 8.479422214868861e-07, "loss": 0.005, "step": 333020 }, { "epoch": 3.558202895453817, "grad_norm": 0.0058710468001663685, "learning_rate": 8.479301556900651e-07, "loss": 0.0033, "step": 333030 }, { "epoch": 3.558309738768097, "grad_norm": 0.739751935005188, "learning_rate": 8.479180895004048e-07, "loss": 0.02, "step": 333040 }, { "epoch": 3.5584165820823763, "grad_norm": 0.0016675859224051237, "learning_rate": 8.479060229179187e-07, "loss": 0.0008, "step": 333050 }, { "epoch": 3.558523425396656, "grad_norm": 0.07101235538721085, "learning_rate": 8.478939559426204e-07, "loss": 0.0015, "step": 333060 }, { "epoch": 3.5586302687109352, "grad_norm": 0.19425396621227264, "learning_rate": 8.478818885745236e-07, "loss": 0.0044, "step": 333070 }, { "epoch": 3.558737112025215, "grad_norm": 0.020258670672774315, "learning_rate": 8.478698208136419e-07, "loss": 0.0036, "step": 333080 }, { "epoch": 3.5588439553394946, "grad_norm": 0.07949218153953552, "learning_rate": 8.478577526599888e-07, "loss": 0.012, "step": 333090 }, { "epoch": 3.5589507986537745, "grad_norm": 0.17209658026695251, "learning_rate": 8.478456841135783e-07, "loss": 0.004, "step": 333100 }, { "epoch": 3.559057641968054, "grad_norm": 0.0029216292314231396, "learning_rate": 8.478336151744235e-07, "loss": 0.0083, "step": 333110 }, { "epoch": 3.5591644852823334, "grad_norm": 0.016665833070874214, "learning_rate": 8.478215458425383e-07, "loss": 0.0023, "step": 333120 }, { "epoch": 3.559271328596613, "grad_norm": 0.005711726378649473, "learning_rate": 8.478094761179367e-07, "loss": 0.0519, "step": 333130 }, { "epoch": 3.559378171910893, "grad_norm": 0.2355085015296936, "learning_rate": 8.477974060006315e-07, "loss": 0.0191, "step": 333140 }, { "epoch": 3.5594850152251722, "grad_norm": 0.032684825360774994, "learning_rate": 8.477853354906371e-07, "loss": 0.0096, "step": 333150 }, { "epoch": 3.559591858539452, "grad_norm": 0.510286808013916, "learning_rate": 8.477732645879666e-07, "loss": 0.0286, "step": 333160 }, { "epoch": 3.5596987018537316, "grad_norm": 0.0010716236429288983, "learning_rate": 8.477611932926339e-07, "loss": 0.0052, "step": 333170 }, { "epoch": 3.559805545168011, "grad_norm": 0.10152135044336319, "learning_rate": 8.477491216046526e-07, "loss": 0.0063, "step": 333180 }, { "epoch": 3.5599123884822905, "grad_norm": 0.6331458687782288, "learning_rate": 8.477370495240363e-07, "loss": 0.0224, "step": 333190 }, { "epoch": 3.5600192317965704, "grad_norm": 9.668595314025879, "learning_rate": 8.477249770507986e-07, "loss": 0.0275, "step": 333200 }, { "epoch": 3.56012607511085, "grad_norm": 0.6200894117355347, "learning_rate": 8.477129041849531e-07, "loss": 0.0072, "step": 333210 }, { "epoch": 3.56023291842513, "grad_norm": 0.01174934208393097, "learning_rate": 8.477008309265134e-07, "loss": 0.0193, "step": 333220 }, { "epoch": 3.5603397617394092, "grad_norm": 0.2681163251399994, "learning_rate": 8.476887572754935e-07, "loss": 0.0096, "step": 333230 }, { "epoch": 3.5604466050536887, "grad_norm": 0.0683584213256836, "learning_rate": 8.476766832319067e-07, "loss": 0.0033, "step": 333240 }, { "epoch": 3.560553448367968, "grad_norm": 0.0019144786056131124, "learning_rate": 8.476646087957665e-07, "loss": 0.0216, "step": 333250 }, { "epoch": 3.560660291682248, "grad_norm": 8.75137996673584, "learning_rate": 8.476525339670868e-07, "loss": 0.0106, "step": 333260 }, { "epoch": 3.5607671349965275, "grad_norm": 7.9375715255737305, "learning_rate": 8.476404587458812e-07, "loss": 0.0144, "step": 333270 }, { "epoch": 3.5608739783108074, "grad_norm": 0.12003745883703232, "learning_rate": 8.476283831321633e-07, "loss": 0.002, "step": 333280 }, { "epoch": 3.560980821625087, "grad_norm": 0.5033977031707764, "learning_rate": 8.476163071259466e-07, "loss": 0.0191, "step": 333290 }, { "epoch": 3.5610876649393663, "grad_norm": 0.0007612091139890254, "learning_rate": 8.47604230727245e-07, "loss": 0.0501, "step": 333300 }, { "epoch": 3.561194508253646, "grad_norm": 5.425525188446045, "learning_rate": 8.475921539360718e-07, "loss": 0.0112, "step": 333310 }, { "epoch": 3.5613013515679257, "grad_norm": 1.1049076318740845, "learning_rate": 8.475800767524411e-07, "loss": 0.0103, "step": 333320 }, { "epoch": 3.561408194882205, "grad_norm": 0.061932552605867386, "learning_rate": 8.47567999176366e-07, "loss": 0.0203, "step": 333330 }, { "epoch": 3.561515038196485, "grad_norm": 0.059025511145591736, "learning_rate": 8.475559212078604e-07, "loss": 0.0043, "step": 333340 }, { "epoch": 3.5616218815107645, "grad_norm": 0.024242084473371506, "learning_rate": 8.475438428469381e-07, "loss": 0.0117, "step": 333350 }, { "epoch": 3.561728724825044, "grad_norm": 0.12075375020503998, "learning_rate": 8.475317640936123e-07, "loss": 0.031, "step": 333360 }, { "epoch": 3.5618355681393234, "grad_norm": 0.006479570176452398, "learning_rate": 8.475196849478973e-07, "loss": 0.0015, "step": 333370 }, { "epoch": 3.5619424114536034, "grad_norm": 0.008675780147314072, "learning_rate": 8.475076054098062e-07, "loss": 0.0155, "step": 333380 }, { "epoch": 3.562049254767883, "grad_norm": 0.0007682736613787711, "learning_rate": 8.474955254793527e-07, "loss": 0.0007, "step": 333390 }, { "epoch": 3.5621560980821627, "grad_norm": 1.1135611534118652, "learning_rate": 8.474834451565504e-07, "loss": 0.0045, "step": 333400 }, { "epoch": 3.562262941396442, "grad_norm": 8.291624069213867, "learning_rate": 8.474713644414134e-07, "loss": 0.0127, "step": 333410 }, { "epoch": 3.5623697847107216, "grad_norm": 0.3961790204048157, "learning_rate": 8.474592833339547e-07, "loss": 0.0026, "step": 333420 }, { "epoch": 3.562476628025001, "grad_norm": 0.01361246407032013, "learning_rate": 8.474472018341885e-07, "loss": 0.0083, "step": 333430 }, { "epoch": 3.562583471339281, "grad_norm": 0.49407869577407837, "learning_rate": 8.474351199421281e-07, "loss": 0.0173, "step": 333440 }, { "epoch": 3.5626903146535605, "grad_norm": 0.07309576869010925, "learning_rate": 8.474230376577871e-07, "loss": 0.0229, "step": 333450 }, { "epoch": 3.5627971579678404, "grad_norm": 0.01805606484413147, "learning_rate": 8.474109549811794e-07, "loss": 0.0209, "step": 333460 }, { "epoch": 3.56290400128212, "grad_norm": 0.085671566426754, "learning_rate": 8.473988719123184e-07, "loss": 0.0117, "step": 333470 }, { "epoch": 3.5630108445963993, "grad_norm": 0.039723362773656845, "learning_rate": 8.47386788451218e-07, "loss": 0.0094, "step": 333480 }, { "epoch": 3.5631176879106787, "grad_norm": 0.009886988438665867, "learning_rate": 8.473747045978916e-07, "loss": 0.0551, "step": 333490 }, { "epoch": 3.5632245312249586, "grad_norm": 0.010946848429739475, "learning_rate": 8.47362620352353e-07, "loss": 0.004, "step": 333500 }, { "epoch": 3.563331374539238, "grad_norm": 0.014038112945854664, "learning_rate": 8.473505357146157e-07, "loss": 0.0208, "step": 333510 }, { "epoch": 3.563438217853518, "grad_norm": 0.035952597856521606, "learning_rate": 8.473384506846934e-07, "loss": 0.0332, "step": 333520 }, { "epoch": 3.5635450611677975, "grad_norm": 0.018431462347507477, "learning_rate": 8.473263652626e-07, "loss": 0.0051, "step": 333530 }, { "epoch": 3.563651904482077, "grad_norm": 5.662850856781006, "learning_rate": 8.473142794483488e-07, "loss": 0.0115, "step": 333540 }, { "epoch": 3.5637587477963564, "grad_norm": 1.6079812049865723, "learning_rate": 8.473021932419535e-07, "loss": 0.0081, "step": 333550 }, { "epoch": 3.5638655911106363, "grad_norm": 7.611974239349365, "learning_rate": 8.472901066434279e-07, "loss": 0.0492, "step": 333560 }, { "epoch": 3.5639724344249157, "grad_norm": 0.02757982164621353, "learning_rate": 8.472780196527854e-07, "loss": 0.0077, "step": 333570 }, { "epoch": 3.5640792777391956, "grad_norm": 2.5273091793060303, "learning_rate": 8.472659322700399e-07, "loss": 0.0147, "step": 333580 }, { "epoch": 3.564186121053475, "grad_norm": 0.7110534310340881, "learning_rate": 8.47253844495205e-07, "loss": 0.007, "step": 333590 }, { "epoch": 3.5642929643677546, "grad_norm": 4.344749450683594, "learning_rate": 8.472417563282943e-07, "loss": 0.0448, "step": 333600 }, { "epoch": 3.5643998076820345, "grad_norm": 0.002385893603786826, "learning_rate": 8.472296677693213e-07, "loss": 0.0157, "step": 333610 }, { "epoch": 3.564506650996314, "grad_norm": 2.9217474460601807, "learning_rate": 8.472175788183e-07, "loss": 0.0122, "step": 333620 }, { "epoch": 3.5646134943105934, "grad_norm": 4.399343490600586, "learning_rate": 8.472054894752438e-07, "loss": 0.0224, "step": 333630 }, { "epoch": 3.5647203376248733, "grad_norm": 0.27186331152915955, "learning_rate": 8.471933997401664e-07, "loss": 0.0199, "step": 333640 }, { "epoch": 3.5648271809391527, "grad_norm": 0.2009628564119339, "learning_rate": 8.471813096130814e-07, "loss": 0.0141, "step": 333650 }, { "epoch": 3.564934024253432, "grad_norm": 7.40091609954834, "learning_rate": 8.471692190940025e-07, "loss": 0.1135, "step": 333660 }, { "epoch": 3.565040867567712, "grad_norm": 0.4449287950992584, "learning_rate": 8.471571281829433e-07, "loss": 0.0123, "step": 333670 }, { "epoch": 3.5651477108819916, "grad_norm": 1.4915108680725098, "learning_rate": 8.471450368799175e-07, "loss": 0.0015, "step": 333680 }, { "epoch": 3.565254554196271, "grad_norm": 0.2718534469604492, "learning_rate": 8.471329451849389e-07, "loss": 0.0031, "step": 333690 }, { "epoch": 3.565361397510551, "grad_norm": 0.02658064477145672, "learning_rate": 8.471208530980209e-07, "loss": 0.0053, "step": 333700 }, { "epoch": 3.5654682408248304, "grad_norm": 0.29068049788475037, "learning_rate": 8.471087606191773e-07, "loss": 0.028, "step": 333710 }, { "epoch": 3.56557508413911, "grad_norm": 0.028089916333556175, "learning_rate": 8.470966677484217e-07, "loss": 0.0099, "step": 333720 }, { "epoch": 3.5656819274533897, "grad_norm": 4.549022674560547, "learning_rate": 8.470845744857677e-07, "loss": 0.0172, "step": 333730 }, { "epoch": 3.565788770767669, "grad_norm": 0.00783189944922924, "learning_rate": 8.47072480831229e-07, "loss": 0.0017, "step": 333740 }, { "epoch": 3.5658956140819487, "grad_norm": 4.0706658363342285, "learning_rate": 8.470603867848194e-07, "loss": 0.0136, "step": 333750 }, { "epoch": 3.5660024573962286, "grad_norm": 0.04421193152666092, "learning_rate": 8.470482923465524e-07, "loss": 0.002, "step": 333760 }, { "epoch": 3.566109300710508, "grad_norm": 2.093810558319092, "learning_rate": 8.470361975164417e-07, "loss": 0.0139, "step": 333770 }, { "epoch": 3.5662161440247875, "grad_norm": 0.004174421541392803, "learning_rate": 8.470241022945008e-07, "loss": 0.0018, "step": 333780 }, { "epoch": 3.5663229873390674, "grad_norm": 0.007495185825973749, "learning_rate": 8.470120066807437e-07, "loss": 0.0147, "step": 333790 }, { "epoch": 3.566429830653347, "grad_norm": 0.09269756078720093, "learning_rate": 8.469999106751837e-07, "loss": 0.0044, "step": 333800 }, { "epoch": 3.5665366739676267, "grad_norm": 0.022467143833637238, "learning_rate": 8.469878142778347e-07, "loss": 0.0074, "step": 333810 }, { "epoch": 3.566643517281906, "grad_norm": 0.03626053035259247, "learning_rate": 8.469757174887102e-07, "loss": 0.0266, "step": 333820 }, { "epoch": 3.5667503605961857, "grad_norm": 31.909198760986328, "learning_rate": 8.46963620307824e-07, "loss": 0.0102, "step": 333830 }, { "epoch": 3.566857203910465, "grad_norm": 0.009397454559803009, "learning_rate": 8.469515227351897e-07, "loss": 0.02, "step": 333840 }, { "epoch": 3.566964047224745, "grad_norm": 0.489239364862442, "learning_rate": 8.469394247708208e-07, "loss": 0.0185, "step": 333850 }, { "epoch": 3.5670708905390245, "grad_norm": 0.21912884712219238, "learning_rate": 8.469273264147312e-07, "loss": 0.0036, "step": 333860 }, { "epoch": 3.5671777338533044, "grad_norm": 7.075265407562256, "learning_rate": 8.469152276669345e-07, "loss": 0.0366, "step": 333870 }, { "epoch": 3.567284577167584, "grad_norm": 0.20252558588981628, "learning_rate": 8.469031285274443e-07, "loss": 0.0186, "step": 333880 }, { "epoch": 3.5673914204818633, "grad_norm": 0.22146907448768616, "learning_rate": 8.468910289962743e-07, "loss": 0.0145, "step": 333890 }, { "epoch": 3.5674982637961428, "grad_norm": 3.5836827754974365, "learning_rate": 8.468789290734382e-07, "loss": 0.0168, "step": 333900 }, { "epoch": 3.5676051071104227, "grad_norm": 0.029906682670116425, "learning_rate": 8.468668287589494e-07, "loss": 0.0083, "step": 333910 }, { "epoch": 3.567711950424702, "grad_norm": 0.18358413875102997, "learning_rate": 8.46854728052822e-07, "loss": 0.0285, "step": 333920 }, { "epoch": 3.567818793738982, "grad_norm": 5.333399772644043, "learning_rate": 8.468426269550693e-07, "loss": 0.0271, "step": 333930 }, { "epoch": 3.5679256370532615, "grad_norm": 0.003147117793560028, "learning_rate": 8.468305254657051e-07, "loss": 0.0129, "step": 333940 }, { "epoch": 3.568032480367541, "grad_norm": 16.232698440551758, "learning_rate": 8.468184235847431e-07, "loss": 0.0389, "step": 333950 }, { "epoch": 3.5681393236818204, "grad_norm": 0.009182093665003777, "learning_rate": 8.46806321312197e-07, "loss": 0.0031, "step": 333960 }, { "epoch": 3.5682461669961003, "grad_norm": 3.711188793182373, "learning_rate": 8.467942186480802e-07, "loss": 0.0279, "step": 333970 }, { "epoch": 3.5683530103103798, "grad_norm": 20.055635452270508, "learning_rate": 8.467821155924066e-07, "loss": 0.0118, "step": 333980 }, { "epoch": 3.5684598536246597, "grad_norm": 0.018122684210538864, "learning_rate": 8.4677001214519e-07, "loss": 0.0019, "step": 333990 }, { "epoch": 3.568566696938939, "grad_norm": 0.00758406100794673, "learning_rate": 8.467579083064438e-07, "loss": 0.0013, "step": 334000 }, { "epoch": 3.5686735402532186, "grad_norm": 0.6845197081565857, "learning_rate": 8.467458040761817e-07, "loss": 0.0053, "step": 334010 }, { "epoch": 3.568780383567498, "grad_norm": 7.747345447540283, "learning_rate": 8.467336994544174e-07, "loss": 0.0217, "step": 334020 }, { "epoch": 3.568887226881778, "grad_norm": 0.014360702596604824, "learning_rate": 8.467215944411646e-07, "loss": 0.0017, "step": 334030 }, { "epoch": 3.5689940701960574, "grad_norm": 1.6254544258117676, "learning_rate": 8.46709489036437e-07, "loss": 0.0386, "step": 334040 }, { "epoch": 3.5691009135103373, "grad_norm": 0.055280327796936035, "learning_rate": 8.466973832402482e-07, "loss": 0.0157, "step": 334050 }, { "epoch": 3.5692077568246168, "grad_norm": 1.1211838722229004, "learning_rate": 8.466852770526118e-07, "loss": 0.0114, "step": 334060 }, { "epoch": 3.5693146001388962, "grad_norm": 0.4608946442604065, "learning_rate": 8.466731704735417e-07, "loss": 0.0053, "step": 334070 }, { "epoch": 3.5694214434531757, "grad_norm": 0.0015803633723407984, "learning_rate": 8.466610635030514e-07, "loss": 0.0201, "step": 334080 }, { "epoch": 3.5695282867674556, "grad_norm": 0.10265999287366867, "learning_rate": 8.466489561411545e-07, "loss": 0.0627, "step": 334090 }, { "epoch": 3.569635130081735, "grad_norm": 0.0046814000234007835, "learning_rate": 8.466368483878648e-07, "loss": 0.0211, "step": 334100 }, { "epoch": 3.569741973396015, "grad_norm": 0.023084068670868874, "learning_rate": 8.466247402431959e-07, "loss": 0.0079, "step": 334110 }, { "epoch": 3.5698488167102944, "grad_norm": 0.04996216297149658, "learning_rate": 8.466126317071616e-07, "loss": 0.0243, "step": 334120 }, { "epoch": 3.569955660024574, "grad_norm": 0.06158658489584923, "learning_rate": 8.466005227797754e-07, "loss": 0.0088, "step": 334130 }, { "epoch": 3.5700625033388533, "grad_norm": 0.021292662248015404, "learning_rate": 8.46588413461051e-07, "loss": 0.044, "step": 334140 }, { "epoch": 3.5701693466531332, "grad_norm": 5.334392070770264, "learning_rate": 8.465763037510023e-07, "loss": 0.0111, "step": 334150 }, { "epoch": 3.5702761899674127, "grad_norm": 0.3598703444004059, "learning_rate": 8.465641936496427e-07, "loss": 0.0055, "step": 334160 }, { "epoch": 3.5703830332816926, "grad_norm": 7.053494453430176, "learning_rate": 8.46552083156986e-07, "loss": 0.0261, "step": 334170 }, { "epoch": 3.570489876595972, "grad_norm": 0.07551990449428558, "learning_rate": 8.465399722730458e-07, "loss": 0.0118, "step": 334180 }, { "epoch": 3.5705967199102515, "grad_norm": 0.07255978882312775, "learning_rate": 8.46527860997836e-07, "loss": 0.0203, "step": 334190 }, { "epoch": 3.570703563224531, "grad_norm": 0.44152939319610596, "learning_rate": 8.465157493313697e-07, "loss": 0.0117, "step": 334200 }, { "epoch": 3.570810406538811, "grad_norm": 2.0544540882110596, "learning_rate": 8.465036372736613e-07, "loss": 0.012, "step": 334210 }, { "epoch": 3.5709172498530903, "grad_norm": 23.651012420654297, "learning_rate": 8.464915248247239e-07, "loss": 0.0277, "step": 334220 }, { "epoch": 3.5710240931673702, "grad_norm": 0.8116657137870789, "learning_rate": 8.464794119845717e-07, "loss": 0.0171, "step": 334230 }, { "epoch": 3.5711309364816497, "grad_norm": 5.7528767585754395, "learning_rate": 8.464672987532179e-07, "loss": 0.0257, "step": 334240 }, { "epoch": 3.571237779795929, "grad_norm": 0.10251408815383911, "learning_rate": 8.464551851306765e-07, "loss": 0.0037, "step": 334250 }, { "epoch": 3.5713446231102086, "grad_norm": 0.03924279659986496, "learning_rate": 8.46443071116961e-07, "loss": 0.0065, "step": 334260 }, { "epoch": 3.5714514664244885, "grad_norm": 0.043900296092033386, "learning_rate": 8.46430956712085e-07, "loss": 0.03, "step": 334270 }, { "epoch": 3.571558309738768, "grad_norm": 0.03821621090173721, "learning_rate": 8.464188419160624e-07, "loss": 0.0061, "step": 334280 }, { "epoch": 3.571665153053048, "grad_norm": 4.4253153800964355, "learning_rate": 8.464067267289068e-07, "loss": 0.0097, "step": 334290 }, { "epoch": 3.5717719963673273, "grad_norm": 0.006400901824235916, "learning_rate": 8.463946111506318e-07, "loss": 0.0151, "step": 334300 }, { "epoch": 3.571878839681607, "grad_norm": 1.0746076107025146, "learning_rate": 8.463824951812512e-07, "loss": 0.0152, "step": 334310 }, { "epoch": 3.5719856829958863, "grad_norm": 11.233233451843262, "learning_rate": 8.463703788207786e-07, "loss": 0.0245, "step": 334320 }, { "epoch": 3.572092526310166, "grad_norm": 0.14388726651668549, "learning_rate": 8.463582620692276e-07, "loss": 0.0013, "step": 334330 }, { "epoch": 3.5721993696244456, "grad_norm": 0.324379563331604, "learning_rate": 8.463461449266122e-07, "loss": 0.015, "step": 334340 }, { "epoch": 3.5723062129387255, "grad_norm": 1.0158425569534302, "learning_rate": 8.463340273929458e-07, "loss": 0.0066, "step": 334350 }, { "epoch": 3.572413056253005, "grad_norm": 0.006633653771132231, "learning_rate": 8.463219094682418e-07, "loss": 0.0031, "step": 334360 }, { "epoch": 3.5725198995672844, "grad_norm": 1.3384714126586914, "learning_rate": 8.463097911525147e-07, "loss": 0.0153, "step": 334370 }, { "epoch": 3.5726267428815643, "grad_norm": 0.019253067672252655, "learning_rate": 8.462976724457775e-07, "loss": 0.0102, "step": 334380 }, { "epoch": 3.572733586195844, "grad_norm": 0.004811978433281183, "learning_rate": 8.462855533480442e-07, "loss": 0.0094, "step": 334390 }, { "epoch": 3.5728404295101233, "grad_norm": 0.01849338412284851, "learning_rate": 8.462734338593283e-07, "loss": 0.0324, "step": 334400 }, { "epoch": 3.572947272824403, "grad_norm": 0.6207595467567444, "learning_rate": 8.462613139796435e-07, "loss": 0.0058, "step": 334410 }, { "epoch": 3.5730541161386826, "grad_norm": 0.4268416166305542, "learning_rate": 8.462491937090035e-07, "loss": 0.0029, "step": 334420 }, { "epoch": 3.573160959452962, "grad_norm": 0.024232514202594757, "learning_rate": 8.462370730474222e-07, "loss": 0.0255, "step": 334430 }, { "epoch": 3.573267802767242, "grad_norm": 0.01725558005273342, "learning_rate": 8.462249519949131e-07, "loss": 0.0013, "step": 334440 }, { "epoch": 3.5733746460815214, "grad_norm": 0.9012187123298645, "learning_rate": 8.462128305514897e-07, "loss": 0.0085, "step": 334450 }, { "epoch": 3.573481489395801, "grad_norm": 2.3834235668182373, "learning_rate": 8.462007087171661e-07, "loss": 0.0122, "step": 334460 }, { "epoch": 3.573588332710081, "grad_norm": 2.535423755645752, "learning_rate": 8.461885864919557e-07, "loss": 0.0165, "step": 334470 }, { "epoch": 3.5736951760243603, "grad_norm": 3.239001750946045, "learning_rate": 8.461764638758722e-07, "loss": 0.0245, "step": 334480 }, { "epoch": 3.5738020193386397, "grad_norm": 0.2735556960105896, "learning_rate": 8.461643408689294e-07, "loss": 0.0226, "step": 334490 }, { "epoch": 3.5739088626529196, "grad_norm": 0.003400724148377776, "learning_rate": 8.461522174711409e-07, "loss": 0.0183, "step": 334500 }, { "epoch": 3.574015705967199, "grad_norm": 3.171435832977295, "learning_rate": 8.461400936825205e-07, "loss": 0.0097, "step": 334510 }, { "epoch": 3.574122549281479, "grad_norm": 0.05255844071507454, "learning_rate": 8.461279695030817e-07, "loss": 0.0067, "step": 334520 }, { "epoch": 3.5742293925957584, "grad_norm": 0.018854986876249313, "learning_rate": 8.461158449328383e-07, "loss": 0.0021, "step": 334530 }, { "epoch": 3.574336235910038, "grad_norm": 6.189505100250244, "learning_rate": 8.461037199718041e-07, "loss": 0.0126, "step": 334540 }, { "epoch": 3.5744430792243174, "grad_norm": 8.07039737701416, "learning_rate": 8.460915946199927e-07, "loss": 0.0078, "step": 334550 }, { "epoch": 3.5745499225385973, "grad_norm": 0.006155464798212051, "learning_rate": 8.460794688774175e-07, "loss": 0.0073, "step": 334560 }, { "epoch": 3.5746567658528767, "grad_norm": 0.1379580795764923, "learning_rate": 8.460673427440928e-07, "loss": 0.0144, "step": 334570 }, { "epoch": 3.5747636091671566, "grad_norm": 0.4446277618408203, "learning_rate": 8.460552162200317e-07, "loss": 0.0083, "step": 334580 }, { "epoch": 3.574870452481436, "grad_norm": 0.03407901152968407, "learning_rate": 8.460430893052483e-07, "loss": 0.0059, "step": 334590 }, { "epoch": 3.5749772957957155, "grad_norm": 0.10443129390478134, "learning_rate": 8.460309619997561e-07, "loss": 0.0038, "step": 334600 }, { "epoch": 3.575084139109995, "grad_norm": 4.906723976135254, "learning_rate": 8.460188343035688e-07, "loss": 0.0223, "step": 334610 }, { "epoch": 3.575190982424275, "grad_norm": 0.010653170756995678, "learning_rate": 8.460067062167e-07, "loss": 0.0009, "step": 334620 }, { "epoch": 3.5752978257385544, "grad_norm": 0.008211874403059483, "learning_rate": 8.459945777391637e-07, "loss": 0.0084, "step": 334630 }, { "epoch": 3.5754046690528343, "grad_norm": 0.01129136048257351, "learning_rate": 8.459824488709735e-07, "loss": 0.0145, "step": 334640 }, { "epoch": 3.5755115123671137, "grad_norm": 0.04632779583334923, "learning_rate": 8.459703196121427e-07, "loss": 0.0002, "step": 334650 }, { "epoch": 3.575618355681393, "grad_norm": 0.2433008998632431, "learning_rate": 8.459581899626855e-07, "loss": 0.0104, "step": 334660 }, { "epoch": 3.5757251989956726, "grad_norm": 0.25919297337532043, "learning_rate": 8.459460599226153e-07, "loss": 0.0055, "step": 334670 }, { "epoch": 3.5758320423099526, "grad_norm": 12.293753623962402, "learning_rate": 8.459339294919459e-07, "loss": 0.0264, "step": 334680 }, { "epoch": 3.575938885624232, "grad_norm": 11.090291976928711, "learning_rate": 8.459217986706911e-07, "loss": 0.0095, "step": 334690 }, { "epoch": 3.576045728938512, "grad_norm": 1.7519525289535522, "learning_rate": 8.459096674588644e-07, "loss": 0.008, "step": 334700 }, { "epoch": 3.5761525722527914, "grad_norm": 9.42685604095459, "learning_rate": 8.458975358564795e-07, "loss": 0.0633, "step": 334710 }, { "epoch": 3.576259415567071, "grad_norm": 0.33535224199295044, "learning_rate": 8.458854038635503e-07, "loss": 0.0022, "step": 334720 }, { "epoch": 3.5763662588813503, "grad_norm": 0.14277851581573486, "learning_rate": 8.458732714800903e-07, "loss": 0.0046, "step": 334730 }, { "epoch": 3.57647310219563, "grad_norm": 0.09739504009485245, "learning_rate": 8.458611387061133e-07, "loss": 0.0408, "step": 334740 }, { "epoch": 3.5765799455099097, "grad_norm": 0.007928382605314255, "learning_rate": 8.45849005541633e-07, "loss": 0.0016, "step": 334750 }, { "epoch": 3.5766867888241896, "grad_norm": 0.6422723531723022, "learning_rate": 8.45836871986663e-07, "loss": 0.0197, "step": 334760 }, { "epoch": 3.576793632138469, "grad_norm": 1.8800435066223145, "learning_rate": 8.458247380412171e-07, "loss": 0.0047, "step": 334770 }, { "epoch": 3.5769004754527485, "grad_norm": 1.4481158256530762, "learning_rate": 8.458126037053091e-07, "loss": 0.0275, "step": 334780 }, { "epoch": 3.577007318767028, "grad_norm": 4.171554088592529, "learning_rate": 8.458004689789523e-07, "loss": 0.0098, "step": 334790 }, { "epoch": 3.577114162081308, "grad_norm": 3.2881250381469727, "learning_rate": 8.457883338621608e-07, "loss": 0.0063, "step": 334800 }, { "epoch": 3.5772210053955873, "grad_norm": 0.0010705082677304745, "learning_rate": 8.457761983549483e-07, "loss": 0.0035, "step": 334810 }, { "epoch": 3.577327848709867, "grad_norm": 1.6627007722854614, "learning_rate": 8.457640624573281e-07, "loss": 0.0016, "step": 334820 }, { "epoch": 3.5774346920241467, "grad_norm": 0.10796385258436203, "learning_rate": 8.457519261693145e-07, "loss": 0.0102, "step": 334830 }, { "epoch": 3.577541535338426, "grad_norm": 0.027145521715283394, "learning_rate": 8.457397894909207e-07, "loss": 0.0019, "step": 334840 }, { "epoch": 3.5776483786527056, "grad_norm": 0.036590468138456345, "learning_rate": 8.457276524221606e-07, "loss": 0.0139, "step": 334850 }, { "epoch": 3.5777552219669855, "grad_norm": 1.4713866710662842, "learning_rate": 8.457155149630479e-07, "loss": 0.01, "step": 334860 }, { "epoch": 3.577862065281265, "grad_norm": 2.6261754035949707, "learning_rate": 8.457033771135963e-07, "loss": 0.0311, "step": 334870 }, { "epoch": 3.577968908595545, "grad_norm": 0.013646041974425316, "learning_rate": 8.456912388738194e-07, "loss": 0.0293, "step": 334880 }, { "epoch": 3.5780757519098243, "grad_norm": 2.5689260959625244, "learning_rate": 8.456791002437311e-07, "loss": 0.0105, "step": 334890 }, { "epoch": 3.5781825952241038, "grad_norm": 0.03457237780094147, "learning_rate": 8.45666961223345e-07, "loss": 0.0168, "step": 334900 }, { "epoch": 3.578289438538383, "grad_norm": 0.006295023951679468, "learning_rate": 8.456548218126749e-07, "loss": 0.0086, "step": 334910 }, { "epoch": 3.578396281852663, "grad_norm": 0.0685807317495346, "learning_rate": 8.456426820117343e-07, "loss": 0.0019, "step": 334920 }, { "epoch": 3.5785031251669426, "grad_norm": 0.002450222847983241, "learning_rate": 8.45630541820537e-07, "loss": 0.0019, "step": 334930 }, { "epoch": 3.5786099684812225, "grad_norm": 2.8747215270996094, "learning_rate": 8.456184012390968e-07, "loss": 0.0207, "step": 334940 }, { "epoch": 3.578716811795502, "grad_norm": 7.371588706970215, "learning_rate": 8.456062602674273e-07, "loss": 0.0084, "step": 334950 }, { "epoch": 3.5788236551097814, "grad_norm": 1.3382883071899414, "learning_rate": 8.455941189055422e-07, "loss": 0.01, "step": 334960 }, { "epoch": 3.578930498424061, "grad_norm": 0.018973782658576965, "learning_rate": 8.455819771534553e-07, "loss": 0.0215, "step": 334970 }, { "epoch": 3.5790373417383408, "grad_norm": 0.004128709901124239, "learning_rate": 8.455698350111804e-07, "loss": 0.009, "step": 334980 }, { "epoch": 3.57914418505262, "grad_norm": 0.16310742497444153, "learning_rate": 8.455576924787309e-07, "loss": 0.0174, "step": 334990 }, { "epoch": 3.5792510283669, "grad_norm": 14.785248756408691, "learning_rate": 8.455455495561208e-07, "loss": 0.0472, "step": 335000 }, { "epoch": 3.5793578716811796, "grad_norm": 0.01238330639898777, "learning_rate": 8.455334062433636e-07, "loss": 0.013, "step": 335010 }, { "epoch": 3.579464714995459, "grad_norm": 0.020272348076105118, "learning_rate": 8.455212625404731e-07, "loss": 0.0106, "step": 335020 }, { "epoch": 3.5795715583097385, "grad_norm": 5.895577907562256, "learning_rate": 8.455091184474632e-07, "loss": 0.0152, "step": 335030 }, { "epoch": 3.5796784016240184, "grad_norm": 0.6701569557189941, "learning_rate": 8.454969739643472e-07, "loss": 0.0006, "step": 335040 }, { "epoch": 3.579785244938298, "grad_norm": 2.974792003631592, "learning_rate": 8.454848290911392e-07, "loss": 0.0032, "step": 335050 }, { "epoch": 3.5798920882525778, "grad_norm": 0.9063138961791992, "learning_rate": 8.454726838278526e-07, "loss": 0.0123, "step": 335060 }, { "epoch": 3.5799989315668572, "grad_norm": 7.148541450500488, "learning_rate": 8.454605381745015e-07, "loss": 0.0185, "step": 335070 }, { "epoch": 3.5801057748811367, "grad_norm": 0.48282578587532043, "learning_rate": 8.454483921310991e-07, "loss": 0.0177, "step": 335080 }, { "epoch": 3.5802126181954166, "grad_norm": 2.0227062702178955, "learning_rate": 8.454362456976596e-07, "loss": 0.0273, "step": 335090 }, { "epoch": 3.580319461509696, "grad_norm": 7.548609256744385, "learning_rate": 8.454240988741963e-07, "loss": 0.0153, "step": 335100 }, { "epoch": 3.5804263048239755, "grad_norm": 1.288197636604309, "learning_rate": 8.454119516607234e-07, "loss": 0.022, "step": 335110 }, { "epoch": 3.5805331481382554, "grad_norm": 0.270514577627182, "learning_rate": 8.453998040572541e-07, "loss": 0.024, "step": 335120 }, { "epoch": 3.580639991452535, "grad_norm": 5.278041839599609, "learning_rate": 8.453876560638024e-07, "loss": 0.0358, "step": 335130 }, { "epoch": 3.5807468347668143, "grad_norm": 0.28073930740356445, "learning_rate": 8.453755076803822e-07, "loss": 0.0239, "step": 335140 }, { "epoch": 3.5808536780810942, "grad_norm": 0.0019492891151458025, "learning_rate": 8.453633589070068e-07, "loss": 0.0161, "step": 335150 }, { "epoch": 3.5809605213953737, "grad_norm": 0.04858362674713135, "learning_rate": 8.4535120974369e-07, "loss": 0.0196, "step": 335160 }, { "epoch": 3.581067364709653, "grad_norm": 0.004430900327861309, "learning_rate": 8.453390601904457e-07, "loss": 0.0064, "step": 335170 }, { "epoch": 3.581174208023933, "grad_norm": 1.1738382577896118, "learning_rate": 8.453269102472876e-07, "loss": 0.0047, "step": 335180 }, { "epoch": 3.5812810513382125, "grad_norm": 0.01660594344139099, "learning_rate": 8.453147599142293e-07, "loss": 0.0008, "step": 335190 }, { "epoch": 3.581387894652492, "grad_norm": 0.010714331641793251, "learning_rate": 8.453026091912846e-07, "loss": 0.0034, "step": 335200 }, { "epoch": 3.581494737966772, "grad_norm": 2.257988452911377, "learning_rate": 8.452904580784672e-07, "loss": 0.0082, "step": 335210 }, { "epoch": 3.5816015812810513, "grad_norm": 0.0018209534464403987, "learning_rate": 8.452783065757908e-07, "loss": 0.0134, "step": 335220 }, { "epoch": 3.581708424595331, "grad_norm": 0.051360778510570526, "learning_rate": 8.452661546832691e-07, "loss": 0.025, "step": 335230 }, { "epoch": 3.5818152679096107, "grad_norm": 4.3772125244140625, "learning_rate": 8.452540024009159e-07, "loss": 0.0064, "step": 335240 }, { "epoch": 3.58192211122389, "grad_norm": 21.926111221313477, "learning_rate": 8.452418497287447e-07, "loss": 0.0121, "step": 335250 }, { "epoch": 3.5820289545381696, "grad_norm": 0.2508831322193146, "learning_rate": 8.452296966667697e-07, "loss": 0.0168, "step": 335260 }, { "epoch": 3.5821357978524495, "grad_norm": 0.009345593862235546, "learning_rate": 8.452175432150041e-07, "loss": 0.0245, "step": 335270 }, { "epoch": 3.582242641166729, "grad_norm": 2.9580483436584473, "learning_rate": 8.452053893734618e-07, "loss": 0.0112, "step": 335280 }, { "epoch": 3.582349484481009, "grad_norm": 3.8285999298095703, "learning_rate": 8.451932351421569e-07, "loss": 0.0172, "step": 335290 }, { "epoch": 3.5824563277952883, "grad_norm": 0.004985225386917591, "learning_rate": 8.451810805211023e-07, "loss": 0.0051, "step": 335300 }, { "epoch": 3.582563171109568, "grad_norm": 0.09277276694774628, "learning_rate": 8.451689255103126e-07, "loss": 0.0043, "step": 335310 }, { "epoch": 3.5826700144238472, "grad_norm": 0.759833037853241, "learning_rate": 8.451567701098011e-07, "loss": 0.0052, "step": 335320 }, { "epoch": 3.582776857738127, "grad_norm": 0.051408737897872925, "learning_rate": 8.451446143195813e-07, "loss": 0.0427, "step": 335330 }, { "epoch": 3.5828837010524066, "grad_norm": 0.2050894945859909, "learning_rate": 8.451324581396673e-07, "loss": 0.0082, "step": 335340 }, { "epoch": 3.5829905443666865, "grad_norm": 0.1458795815706253, "learning_rate": 8.451203015700729e-07, "loss": 0.0238, "step": 335350 }, { "epoch": 3.583097387680966, "grad_norm": 0.0675010085105896, "learning_rate": 8.451081446108114e-07, "loss": 0.02, "step": 335360 }, { "epoch": 3.5832042309952454, "grad_norm": 0.010888000950217247, "learning_rate": 8.450959872618968e-07, "loss": 0.0139, "step": 335370 }, { "epoch": 3.583311074309525, "grad_norm": 0.1586409956216812, "learning_rate": 8.450838295233429e-07, "loss": 0.0003, "step": 335380 }, { "epoch": 3.583417917623805, "grad_norm": 0.025945808738470078, "learning_rate": 8.450716713951631e-07, "loss": 0.0035, "step": 335390 }, { "epoch": 3.5835247609380843, "grad_norm": 4.920520782470703, "learning_rate": 8.450595128773714e-07, "loss": 0.0088, "step": 335400 }, { "epoch": 3.583631604252364, "grad_norm": 1.9894179105758667, "learning_rate": 8.450473539699816e-07, "loss": 0.0075, "step": 335410 }, { "epoch": 3.5837384475666436, "grad_norm": 0.8182362914085388, "learning_rate": 8.450351946730072e-07, "loss": 0.0169, "step": 335420 }, { "epoch": 3.583845290880923, "grad_norm": 1.349603295326233, "learning_rate": 8.450230349864621e-07, "loss": 0.014, "step": 335430 }, { "epoch": 3.5839521341952025, "grad_norm": 1.320863962173462, "learning_rate": 8.450108749103599e-07, "loss": 0.0263, "step": 335440 }, { "epoch": 3.5840589775094824, "grad_norm": 0.0479404479265213, "learning_rate": 8.449987144447144e-07, "loss": 0.0026, "step": 335450 }, { "epoch": 3.584165820823762, "grad_norm": 1.1701045036315918, "learning_rate": 8.449865535895393e-07, "loss": 0.0005, "step": 335460 }, { "epoch": 3.584272664138042, "grad_norm": 5.219926834106445, "learning_rate": 8.449743923448483e-07, "loss": 0.0292, "step": 335470 }, { "epoch": 3.5843795074523213, "grad_norm": 0.6439716815948486, "learning_rate": 8.449622307106552e-07, "loss": 0.0027, "step": 335480 }, { "epoch": 3.5844863507666007, "grad_norm": 0.024380456656217575, "learning_rate": 8.449500686869737e-07, "loss": 0.017, "step": 335490 }, { "epoch": 3.58459319408088, "grad_norm": 4.144417762756348, "learning_rate": 8.449379062738176e-07, "loss": 0.0294, "step": 335500 }, { "epoch": 3.58470003739516, "grad_norm": 0.002911873860284686, "learning_rate": 8.449257434712005e-07, "loss": 0.014, "step": 335510 }, { "epoch": 3.5848068807094395, "grad_norm": 0.00792126078158617, "learning_rate": 8.449135802791363e-07, "loss": 0.0114, "step": 335520 }, { "epoch": 3.5849137240237194, "grad_norm": 4.844350814819336, "learning_rate": 8.449014166976385e-07, "loss": 0.007, "step": 335530 }, { "epoch": 3.585020567337999, "grad_norm": 0.004870508797466755, "learning_rate": 8.44889252726721e-07, "loss": 0.013, "step": 335540 }, { "epoch": 3.5851274106522784, "grad_norm": 0.041505515575408936, "learning_rate": 8.448770883663976e-07, "loss": 0.0066, "step": 335550 }, { "epoch": 3.585234253966558, "grad_norm": 5.426353931427002, "learning_rate": 8.448649236166818e-07, "loss": 0.0304, "step": 335560 }, { "epoch": 3.5853410972808377, "grad_norm": 0.01132311299443245, "learning_rate": 8.448527584775877e-07, "loss": 0.0025, "step": 335570 }, { "epoch": 3.585447940595117, "grad_norm": 0.042463283985853195, "learning_rate": 8.448405929491287e-07, "loss": 0.0055, "step": 335580 }, { "epoch": 3.585554783909397, "grad_norm": 0.6984323859214783, "learning_rate": 8.448284270313186e-07, "loss": 0.0046, "step": 335590 }, { "epoch": 3.5856616272236765, "grad_norm": 0.29653143882751465, "learning_rate": 8.448162607241711e-07, "loss": 0.0174, "step": 335600 }, { "epoch": 3.585768470537956, "grad_norm": 0.13334110379219055, "learning_rate": 8.448040940277002e-07, "loss": 0.0084, "step": 335610 }, { "epoch": 3.5858753138522355, "grad_norm": 0.010509936138987541, "learning_rate": 8.447919269419193e-07, "loss": 0.0157, "step": 335620 }, { "epoch": 3.5859821571665154, "grad_norm": 0.053285833448171616, "learning_rate": 8.447797594668423e-07, "loss": 0.0148, "step": 335630 }, { "epoch": 3.586089000480795, "grad_norm": 0.4827396273612976, "learning_rate": 8.44767591602483e-07, "loss": 0.0391, "step": 335640 }, { "epoch": 3.5861958437950747, "grad_norm": 2.920215129852295, "learning_rate": 8.44755423348855e-07, "loss": 0.0117, "step": 335650 }, { "epoch": 3.586302687109354, "grad_norm": 0.4440234303474426, "learning_rate": 8.447432547059723e-07, "loss": 0.0009, "step": 335660 }, { "epoch": 3.5864095304236336, "grad_norm": 2.1548283100128174, "learning_rate": 8.447310856738482e-07, "loss": 0.0085, "step": 335670 }, { "epoch": 3.586516373737913, "grad_norm": 9.554893493652344, "learning_rate": 8.447189162524968e-07, "loss": 0.0041, "step": 335680 }, { "epoch": 3.586623217052193, "grad_norm": 4.549046993255615, "learning_rate": 8.447067464419318e-07, "loss": 0.002, "step": 335690 }, { "epoch": 3.5867300603664725, "grad_norm": 0.003714316990226507, "learning_rate": 8.446945762421667e-07, "loss": 0.039, "step": 335700 }, { "epoch": 3.5868369036807524, "grad_norm": 0.8368019461631775, "learning_rate": 8.446824056532155e-07, "loss": 0.0024, "step": 335710 }, { "epoch": 3.586943746995032, "grad_norm": 0.22926439344882965, "learning_rate": 8.446702346750918e-07, "loss": 0.025, "step": 335720 }, { "epoch": 3.5870505903093113, "grad_norm": 0.06446272134780884, "learning_rate": 8.446580633078094e-07, "loss": 0.0172, "step": 335730 }, { "epoch": 3.5871574336235907, "grad_norm": 1.8904564380645752, "learning_rate": 8.446458915513821e-07, "loss": 0.008, "step": 335740 }, { "epoch": 3.5872642769378706, "grad_norm": 1.122188925743103, "learning_rate": 8.446337194058235e-07, "loss": 0.0268, "step": 335750 }, { "epoch": 3.58737112025215, "grad_norm": 0.031375702470541, "learning_rate": 8.446215468711475e-07, "loss": 0.0113, "step": 335760 }, { "epoch": 3.58747796356643, "grad_norm": 0.1786775141954422, "learning_rate": 8.446093739473678e-07, "loss": 0.0027, "step": 335770 }, { "epoch": 3.5875848068807095, "grad_norm": 2.0598366260528564, "learning_rate": 8.44597200634498e-07, "loss": 0.0026, "step": 335780 }, { "epoch": 3.587691650194989, "grad_norm": 0.032147642225027084, "learning_rate": 8.44585026932552e-07, "loss": 0.007, "step": 335790 }, { "epoch": 3.5877984935092684, "grad_norm": 3.3765041828155518, "learning_rate": 8.445728528415435e-07, "loss": 0.0009, "step": 335800 }, { "epoch": 3.5879053368235483, "grad_norm": 0.002712109824642539, "learning_rate": 8.445606783614863e-07, "loss": 0.0035, "step": 335810 }, { "epoch": 3.5880121801378277, "grad_norm": 0.008428399451076984, "learning_rate": 8.445485034923938e-07, "loss": 0.0189, "step": 335820 }, { "epoch": 3.5881190234521076, "grad_norm": 0.7516170144081116, "learning_rate": 8.445363282342803e-07, "loss": 0.0204, "step": 335830 }, { "epoch": 3.588225866766387, "grad_norm": 1.8665621280670166, "learning_rate": 8.445241525871593e-07, "loss": 0.0073, "step": 335840 }, { "epoch": 3.5883327100806666, "grad_norm": 0.010057145729660988, "learning_rate": 8.445119765510444e-07, "loss": 0.0209, "step": 335850 }, { "epoch": 3.5884395533949465, "grad_norm": 0.657711923122406, "learning_rate": 8.444998001259496e-07, "loss": 0.02, "step": 335860 }, { "epoch": 3.588546396709226, "grad_norm": 5.55678129196167, "learning_rate": 8.444876233118884e-07, "loss": 0.0082, "step": 335870 }, { "epoch": 3.5886532400235054, "grad_norm": 0.010999757796525955, "learning_rate": 8.444754461088748e-07, "loss": 0.043, "step": 335880 }, { "epoch": 3.5887600833377853, "grad_norm": 4.944437026977539, "learning_rate": 8.444632685169224e-07, "loss": 0.0208, "step": 335890 }, { "epoch": 3.5888669266520647, "grad_norm": 0.021699799224734306, "learning_rate": 8.444510905360448e-07, "loss": 0.0188, "step": 335900 }, { "epoch": 3.588973769966344, "grad_norm": 0.8646849393844604, "learning_rate": 8.444389121662561e-07, "loss": 0.0124, "step": 335910 }, { "epoch": 3.589080613280624, "grad_norm": 0.004997533280402422, "learning_rate": 8.444267334075699e-07, "loss": 0.0153, "step": 335920 }, { "epoch": 3.5891874565949036, "grad_norm": 1.0738502740859985, "learning_rate": 8.444145542599997e-07, "loss": 0.0029, "step": 335930 }, { "epoch": 3.589294299909183, "grad_norm": 4.1390380859375, "learning_rate": 8.444023747235597e-07, "loss": 0.0115, "step": 335940 }, { "epoch": 3.589401143223463, "grad_norm": 0.3052661418914795, "learning_rate": 8.443901947982635e-07, "loss": 0.0086, "step": 335950 }, { "epoch": 3.5895079865377424, "grad_norm": 0.048651665449142456, "learning_rate": 8.443780144841245e-07, "loss": 0.0128, "step": 335960 }, { "epoch": 3.589614829852022, "grad_norm": 3.0200390815734863, "learning_rate": 8.443658337811568e-07, "loss": 0.0064, "step": 335970 }, { "epoch": 3.5897216731663018, "grad_norm": 3.8496150970458984, "learning_rate": 8.443536526893742e-07, "loss": 0.0046, "step": 335980 }, { "epoch": 3.589828516480581, "grad_norm": 2.1736795902252197, "learning_rate": 8.443414712087902e-07, "loss": 0.0446, "step": 335990 }, { "epoch": 3.589935359794861, "grad_norm": 0.1869257688522339, "learning_rate": 8.443292893394189e-07, "loss": 0.005, "step": 336000 }, { "epoch": 3.5900422031091406, "grad_norm": 0.015191374346613884, "learning_rate": 8.443171070812736e-07, "loss": 0.0019, "step": 336010 }, { "epoch": 3.59014904642342, "grad_norm": 0.13348151743412018, "learning_rate": 8.443049244343685e-07, "loss": 0.0374, "step": 336020 }, { "epoch": 3.5902558897376995, "grad_norm": 0.342617392539978, "learning_rate": 8.442927413987171e-07, "loss": 0.0196, "step": 336030 }, { "epoch": 3.5903627330519794, "grad_norm": 0.9589366316795349, "learning_rate": 8.442805579743331e-07, "loss": 0.01, "step": 336040 }, { "epoch": 3.590469576366259, "grad_norm": 3.034348726272583, "learning_rate": 8.442683741612305e-07, "loss": 0.0382, "step": 336050 }, { "epoch": 3.5905764196805388, "grad_norm": 0.07933594286441803, "learning_rate": 8.442561899594228e-07, "loss": 0.0171, "step": 336060 }, { "epoch": 3.590683262994818, "grad_norm": 5.041698455810547, "learning_rate": 8.442440053689239e-07, "loss": 0.0068, "step": 336070 }, { "epoch": 3.5907901063090977, "grad_norm": 0.7720179557800293, "learning_rate": 8.442318203897476e-07, "loss": 0.0125, "step": 336080 }, { "epoch": 3.590896949623377, "grad_norm": 0.023951232433319092, "learning_rate": 8.442196350219076e-07, "loss": 0.0356, "step": 336090 }, { "epoch": 3.591003792937657, "grad_norm": 1.0750561952590942, "learning_rate": 8.442074492654177e-07, "loss": 0.0239, "step": 336100 }, { "epoch": 3.5911106362519365, "grad_norm": 3.0210022926330566, "learning_rate": 8.441952631202915e-07, "loss": 0.022, "step": 336110 }, { "epoch": 3.5912174795662164, "grad_norm": 0.23100942373275757, "learning_rate": 8.44183076586543e-07, "loss": 0.0135, "step": 336120 }, { "epoch": 3.591324322880496, "grad_norm": 2.5005710124969482, "learning_rate": 8.441708896641856e-07, "loss": 0.017, "step": 336130 }, { "epoch": 3.5914311661947753, "grad_norm": 0.01531070563942194, "learning_rate": 8.441587023532335e-07, "loss": 0.0095, "step": 336140 }, { "epoch": 3.5915380095090548, "grad_norm": 0.49063754081726074, "learning_rate": 8.441465146537002e-07, "loss": 0.044, "step": 336150 }, { "epoch": 3.5916448528233347, "grad_norm": 0.03332628682255745, "learning_rate": 8.441343265655994e-07, "loss": 0.0202, "step": 336160 }, { "epoch": 3.591751696137614, "grad_norm": 0.6061398983001709, "learning_rate": 8.441221380889451e-07, "loss": 0.0217, "step": 336170 }, { "epoch": 3.591858539451894, "grad_norm": 0.5983714461326599, "learning_rate": 8.441099492237509e-07, "loss": 0.0026, "step": 336180 }, { "epoch": 3.5919653827661735, "grad_norm": 6.948173999786377, "learning_rate": 8.440977599700305e-07, "loss": 0.0117, "step": 336190 }, { "epoch": 3.592072226080453, "grad_norm": 5.821483135223389, "learning_rate": 8.440855703277978e-07, "loss": 0.0066, "step": 336200 }, { "epoch": 3.5921790693947324, "grad_norm": 0.013684176839888096, "learning_rate": 8.440733802970667e-07, "loss": 0.0082, "step": 336210 }, { "epoch": 3.5922859127090123, "grad_norm": 6.310060977935791, "learning_rate": 8.440611898778507e-07, "loss": 0.0029, "step": 336220 }, { "epoch": 3.5923927560232918, "grad_norm": 0.04268201068043709, "learning_rate": 8.440489990701636e-07, "loss": 0.0055, "step": 336230 }, { "epoch": 3.5924995993375717, "grad_norm": 0.18869608640670776, "learning_rate": 8.440368078740192e-07, "loss": 0.0066, "step": 336240 }, { "epoch": 3.592606442651851, "grad_norm": 0.19173607230186462, "learning_rate": 8.440246162894312e-07, "loss": 0.0048, "step": 336250 }, { "epoch": 3.5927132859661306, "grad_norm": 3.939056873321533, "learning_rate": 8.440124243164137e-07, "loss": 0.0103, "step": 336260 }, { "epoch": 3.59282012928041, "grad_norm": 4.695708751678467, "learning_rate": 8.4400023195498e-07, "loss": 0.0253, "step": 336270 }, { "epoch": 3.59292697259469, "grad_norm": 12.026704788208008, "learning_rate": 8.439880392051441e-07, "loss": 0.0134, "step": 336280 }, { "epoch": 3.5930338159089694, "grad_norm": 9.880438804626465, "learning_rate": 8.439758460669198e-07, "loss": 0.0257, "step": 336290 }, { "epoch": 3.5931406592232493, "grad_norm": 0.03356475383043289, "learning_rate": 8.439636525403207e-07, "loss": 0.0089, "step": 336300 }, { "epoch": 3.593247502537529, "grad_norm": 0.0012081169988960028, "learning_rate": 8.439514586253608e-07, "loss": 0.0072, "step": 336310 }, { "epoch": 3.5933543458518082, "grad_norm": 0.918609619140625, "learning_rate": 8.439392643220538e-07, "loss": 0.007, "step": 336320 }, { "epoch": 3.5934611891660877, "grad_norm": 0.450054407119751, "learning_rate": 8.439270696304132e-07, "loss": 0.0157, "step": 336330 }, { "epoch": 3.5935680324803676, "grad_norm": 0.0014059606473892927, "learning_rate": 8.439148745504531e-07, "loss": 0.0028, "step": 336340 }, { "epoch": 3.593674875794647, "grad_norm": 0.24454692006111145, "learning_rate": 8.439026790821871e-07, "loss": 0.0036, "step": 336350 }, { "epoch": 3.593781719108927, "grad_norm": 0.45979174971580505, "learning_rate": 8.43890483225629e-07, "loss": 0.0298, "step": 336360 }, { "epoch": 3.5938885624232064, "grad_norm": 0.47859132289886475, "learning_rate": 8.438782869807928e-07, "loss": 0.0059, "step": 336370 }, { "epoch": 3.593995405737486, "grad_norm": 0.5532480478286743, "learning_rate": 8.438660903476919e-07, "loss": 0.009, "step": 336380 }, { "epoch": 3.5941022490517653, "grad_norm": 6.44732141494751, "learning_rate": 8.438538933263402e-07, "loss": 0.0172, "step": 336390 }, { "epoch": 3.5942090923660452, "grad_norm": 0.012071596458554268, "learning_rate": 8.438416959167516e-07, "loss": 0.0031, "step": 336400 }, { "epoch": 3.5943159356803247, "grad_norm": 2.465010404586792, "learning_rate": 8.438294981189397e-07, "loss": 0.0129, "step": 336410 }, { "epoch": 3.5944227789946046, "grad_norm": 0.37290987372398376, "learning_rate": 8.438172999329183e-07, "loss": 0.0126, "step": 336420 }, { "epoch": 3.594529622308884, "grad_norm": 0.018577123060822487, "learning_rate": 8.438051013587014e-07, "loss": 0.0025, "step": 336430 }, { "epoch": 3.5946364656231635, "grad_norm": 2.105865240097046, "learning_rate": 8.437929023963024e-07, "loss": 0.0085, "step": 336440 }, { "epoch": 3.594743308937443, "grad_norm": 7.450569152832031, "learning_rate": 8.437807030457353e-07, "loss": 0.0314, "step": 336450 }, { "epoch": 3.594850152251723, "grad_norm": 0.037347037345170975, "learning_rate": 8.437685033070138e-07, "loss": 0.0296, "step": 336460 }, { "epoch": 3.5949569955660023, "grad_norm": 0.64699786901474, "learning_rate": 8.437563031801519e-07, "loss": 0.0229, "step": 336470 }, { "epoch": 3.5950638388802822, "grad_norm": 2.911426544189453, "learning_rate": 8.437441026651631e-07, "loss": 0.0061, "step": 336480 }, { "epoch": 3.5951706821945617, "grad_norm": 0.05224006250500679, "learning_rate": 8.437319017620612e-07, "loss": 0.0072, "step": 336490 }, { "epoch": 3.595277525508841, "grad_norm": 0.03405383974313736, "learning_rate": 8.437197004708601e-07, "loss": 0.0029, "step": 336500 }, { "epoch": 3.5953843688231206, "grad_norm": 0.15892739593982697, "learning_rate": 8.437074987915735e-07, "loss": 0.0074, "step": 336510 }, { "epoch": 3.5954912121374005, "grad_norm": 0.006461249198764563, "learning_rate": 8.436952967242152e-07, "loss": 0.0029, "step": 336520 }, { "epoch": 3.59559805545168, "grad_norm": 0.021802734583616257, "learning_rate": 8.436830942687989e-07, "loss": 0.0524, "step": 336530 }, { "epoch": 3.59570489876596, "grad_norm": 0.029415670782327652, "learning_rate": 8.436708914253387e-07, "loss": 0.0166, "step": 336540 }, { "epoch": 3.5958117420802393, "grad_norm": 0.15939636528491974, "learning_rate": 8.436586881938479e-07, "loss": 0.0136, "step": 336550 }, { "epoch": 3.595918585394519, "grad_norm": 5.116026878356934, "learning_rate": 8.436464845743406e-07, "loss": 0.006, "step": 336560 }, { "epoch": 3.5960254287087987, "grad_norm": 3.5305352210998535, "learning_rate": 8.436342805668304e-07, "loss": 0.0068, "step": 336570 }, { "epoch": 3.596132272023078, "grad_norm": 0.0036251903511583805, "learning_rate": 8.436220761713311e-07, "loss": 0.0072, "step": 336580 }, { "epoch": 3.5962391153373576, "grad_norm": 0.007543595507740974, "learning_rate": 8.436098713878567e-07, "loss": 0.0257, "step": 336590 }, { "epoch": 3.5963459586516375, "grad_norm": 7.839290142059326, "learning_rate": 8.435976662164208e-07, "loss": 0.018, "step": 336600 }, { "epoch": 3.596452801965917, "grad_norm": 0.010587034747004509, "learning_rate": 8.435854606570372e-07, "loss": 0.0234, "step": 336610 }, { "epoch": 3.5965596452801964, "grad_norm": 0.002842546906322241, "learning_rate": 8.435732547097195e-07, "loss": 0.0128, "step": 336620 }, { "epoch": 3.5966664885944764, "grad_norm": 2.93881893157959, "learning_rate": 8.435610483744818e-07, "loss": 0.039, "step": 336630 }, { "epoch": 3.596773331908756, "grad_norm": 0.007392373401671648, "learning_rate": 8.435488416513379e-07, "loss": 0.0136, "step": 336640 }, { "epoch": 3.5968801752230353, "grad_norm": 6.620675563812256, "learning_rate": 8.435366345403012e-07, "loss": 0.0273, "step": 336650 }, { "epoch": 3.596987018537315, "grad_norm": 0.10989146679639816, "learning_rate": 8.435244270413858e-07, "loss": 0.0235, "step": 336660 }, { "epoch": 3.5970938618515946, "grad_norm": 0.002115691313520074, "learning_rate": 8.435122191546053e-07, "loss": 0.0175, "step": 336670 }, { "epoch": 3.597200705165874, "grad_norm": 1.962785005569458, "learning_rate": 8.435000108799737e-07, "loss": 0.0201, "step": 336680 }, { "epoch": 3.597307548480154, "grad_norm": 0.0026914484333246946, "learning_rate": 8.434878022175047e-07, "loss": 0.024, "step": 336690 }, { "epoch": 3.5974143917944335, "grad_norm": 0.15059664845466614, "learning_rate": 8.43475593167212e-07, "loss": 0.0375, "step": 336700 }, { "epoch": 3.597521235108713, "grad_norm": 0.19781668484210968, "learning_rate": 8.434633837291094e-07, "loss": 0.0012, "step": 336710 }, { "epoch": 3.597628078422993, "grad_norm": 0.009929156862199306, "learning_rate": 8.434511739032107e-07, "loss": 0.0387, "step": 336720 }, { "epoch": 3.5977349217372723, "grad_norm": 0.004875372163951397, "learning_rate": 8.434389636895297e-07, "loss": 0.0158, "step": 336730 }, { "epoch": 3.5978417650515517, "grad_norm": 0.04268556088209152, "learning_rate": 8.434267530880803e-07, "loss": 0.0059, "step": 336740 }, { "epoch": 3.5979486083658316, "grad_norm": 1.0076340436935425, "learning_rate": 8.434145420988761e-07, "loss": 0.0017, "step": 336750 }, { "epoch": 3.598055451680111, "grad_norm": 0.002874209312722087, "learning_rate": 8.434023307219309e-07, "loss": 0.0009, "step": 336760 }, { "epoch": 3.598162294994391, "grad_norm": 0.7353869676589966, "learning_rate": 8.433901189572585e-07, "loss": 0.0131, "step": 336770 }, { "epoch": 3.5982691383086705, "grad_norm": 0.10579051077365875, "learning_rate": 8.43377906804873e-07, "loss": 0.0026, "step": 336780 }, { "epoch": 3.59837598162295, "grad_norm": 17.39164924621582, "learning_rate": 8.433656942647878e-07, "loss": 0.0259, "step": 336790 }, { "epoch": 3.5984828249372294, "grad_norm": 0.10798203945159912, "learning_rate": 8.433534813370167e-07, "loss": 0.0444, "step": 336800 }, { "epoch": 3.5985896682515093, "grad_norm": 0.02259918674826622, "learning_rate": 8.433412680215736e-07, "loss": 0.0141, "step": 336810 }, { "epoch": 3.5986965115657887, "grad_norm": 0.4028463661670685, "learning_rate": 8.433290543184724e-07, "loss": 0.0006, "step": 336820 }, { "epoch": 3.5988033548800686, "grad_norm": 0.911638081073761, "learning_rate": 8.433168402277269e-07, "loss": 0.0078, "step": 336830 }, { "epoch": 3.598910198194348, "grad_norm": 0.0678810179233551, "learning_rate": 8.433046257493507e-07, "loss": 0.012, "step": 336840 }, { "epoch": 3.5990170415086276, "grad_norm": 2.9462647438049316, "learning_rate": 8.432924108833576e-07, "loss": 0.0022, "step": 336850 }, { "epoch": 3.599123884822907, "grad_norm": 0.002894073724746704, "learning_rate": 8.432801956297615e-07, "loss": 0.0148, "step": 336860 }, { "epoch": 3.599230728137187, "grad_norm": 0.056661996990442276, "learning_rate": 8.43267979988576e-07, "loss": 0.0111, "step": 336870 }, { "epoch": 3.5993375714514664, "grad_norm": 1.4661610126495361, "learning_rate": 8.432557639598153e-07, "loss": 0.0145, "step": 336880 }, { "epoch": 3.5994444147657463, "grad_norm": 3.636566400527954, "learning_rate": 8.432435475434928e-07, "loss": 0.0104, "step": 336890 }, { "epoch": 3.5995512580800257, "grad_norm": 0.3081667125225067, "learning_rate": 8.432313307396224e-07, "loss": 0.012, "step": 336900 }, { "epoch": 3.599658101394305, "grad_norm": 0.008225119672715664, "learning_rate": 8.43219113548218e-07, "loss": 0.012, "step": 336910 }, { "epoch": 3.5997649447085847, "grad_norm": 0.1720259189605713, "learning_rate": 8.432068959692933e-07, "loss": 0.0173, "step": 336920 }, { "epoch": 3.5998717880228646, "grad_norm": 1.4606820344924927, "learning_rate": 8.43194678002862e-07, "loss": 0.0056, "step": 336930 }, { "epoch": 3.599978631337144, "grad_norm": 1.699265718460083, "learning_rate": 8.431824596489381e-07, "loss": 0.0244, "step": 336940 }, { "epoch": 3.600085474651424, "grad_norm": 0.07336389273405075, "learning_rate": 8.431702409075354e-07, "loss": 0.0052, "step": 336950 }, { "epoch": 3.6001923179657034, "grad_norm": 0.05014738440513611, "learning_rate": 8.431580217786673e-07, "loss": 0.0063, "step": 336960 }, { "epoch": 3.600299161279983, "grad_norm": 0.031971681863069534, "learning_rate": 8.431458022623482e-07, "loss": 0.0008, "step": 336970 }, { "epoch": 3.6004060045942623, "grad_norm": 0.02048063650727272, "learning_rate": 8.431335823585915e-07, "loss": 0.0216, "step": 336980 }, { "epoch": 3.600512847908542, "grad_norm": 0.4434589147567749, "learning_rate": 8.43121362067411e-07, "loss": 0.0078, "step": 336990 }, { "epoch": 3.6006196912228217, "grad_norm": 0.040314383804798126, "learning_rate": 8.431091413888207e-07, "loss": 0.0252, "step": 337000 }, { "epoch": 3.6007265345371016, "grad_norm": 0.021925009787082672, "learning_rate": 8.430969203228343e-07, "loss": 0.0078, "step": 337010 }, { "epoch": 3.600833377851381, "grad_norm": 4.471268177032471, "learning_rate": 8.430846988694656e-07, "loss": 0.0205, "step": 337020 }, { "epoch": 3.6009402211656605, "grad_norm": 0.009770936332643032, "learning_rate": 8.430724770287282e-07, "loss": 0.0064, "step": 337030 }, { "epoch": 3.60104706447994, "grad_norm": 2.379208564758301, "learning_rate": 8.430602548006361e-07, "loss": 0.0065, "step": 337040 }, { "epoch": 3.60115390779422, "grad_norm": 0.32895010709762573, "learning_rate": 8.430480321852032e-07, "loss": 0.0039, "step": 337050 }, { "epoch": 3.6012607511084993, "grad_norm": 0.014459004625678062, "learning_rate": 8.430358091824432e-07, "loss": 0.0003, "step": 337060 }, { "epoch": 3.601367594422779, "grad_norm": 2.4055216312408447, "learning_rate": 8.430235857923698e-07, "loss": 0.0299, "step": 337070 }, { "epoch": 3.6014744377370587, "grad_norm": 4.036163806915283, "learning_rate": 8.430113620149969e-07, "loss": 0.016, "step": 337080 }, { "epoch": 3.601581281051338, "grad_norm": 8.01198959350586, "learning_rate": 8.429991378503382e-07, "loss": 0.0353, "step": 337090 }, { "epoch": 3.6016881243656176, "grad_norm": 0.014436648227274418, "learning_rate": 8.429869132984078e-07, "loss": 0.0113, "step": 337100 }, { "epoch": 3.6017949676798975, "grad_norm": 6.526733875274658, "learning_rate": 8.429746883592192e-07, "loss": 0.0173, "step": 337110 }, { "epoch": 3.601901810994177, "grad_norm": 2.8294529914855957, "learning_rate": 8.429624630327863e-07, "loss": 0.0393, "step": 337120 }, { "epoch": 3.602008654308457, "grad_norm": 0.008644125424325466, "learning_rate": 8.429502373191228e-07, "loss": 0.0009, "step": 337130 }, { "epoch": 3.6021154976227363, "grad_norm": 8.729286193847656, "learning_rate": 8.429380112182426e-07, "loss": 0.0291, "step": 337140 }, { "epoch": 3.6022223409370158, "grad_norm": 0.033990077674388885, "learning_rate": 8.429257847301596e-07, "loss": 0.004, "step": 337150 }, { "epoch": 3.6023291842512952, "grad_norm": 0.0075820148922502995, "learning_rate": 8.429135578548873e-07, "loss": 0.0144, "step": 337160 }, { "epoch": 3.602436027565575, "grad_norm": 0.3329813778400421, "learning_rate": 8.429013305924399e-07, "loss": 0.0062, "step": 337170 }, { "epoch": 3.6025428708798546, "grad_norm": 0.0008512734202668071, "learning_rate": 8.428891029428311e-07, "loss": 0.0088, "step": 337180 }, { "epoch": 3.6026497141941345, "grad_norm": 1.1456992626190186, "learning_rate": 8.428768749060745e-07, "loss": 0.011, "step": 337190 }, { "epoch": 3.602756557508414, "grad_norm": 2.6337969303131104, "learning_rate": 8.428646464821841e-07, "loss": 0.0034, "step": 337200 }, { "epoch": 3.6028634008226934, "grad_norm": 0.0994100272655487, "learning_rate": 8.428524176711735e-07, "loss": 0.0088, "step": 337210 }, { "epoch": 3.602970244136973, "grad_norm": 0.013305557891726494, "learning_rate": 8.428401884730567e-07, "loss": 0.0011, "step": 337220 }, { "epoch": 3.6030770874512528, "grad_norm": 0.3215150237083435, "learning_rate": 8.428279588878475e-07, "loss": 0.0027, "step": 337230 }, { "epoch": 3.6031839307655322, "grad_norm": 0.021062379702925682, "learning_rate": 8.428157289155596e-07, "loss": 0.0237, "step": 337240 }, { "epoch": 3.603290774079812, "grad_norm": 6.512852668762207, "learning_rate": 8.428034985562069e-07, "loss": 0.0079, "step": 337250 }, { "epoch": 3.6033976173940916, "grad_norm": 1.3843294382095337, "learning_rate": 8.427912678098033e-07, "loss": 0.019, "step": 337260 }, { "epoch": 3.603504460708371, "grad_norm": 4.9198784828186035, "learning_rate": 8.427790366763624e-07, "loss": 0.0106, "step": 337270 }, { "epoch": 3.6036113040226505, "grad_norm": 2.1384150981903076, "learning_rate": 8.427668051558981e-07, "loss": 0.0224, "step": 337280 }, { "epoch": 3.6037181473369304, "grad_norm": 0.008898225612938404, "learning_rate": 8.42754573248424e-07, "loss": 0.0183, "step": 337290 }, { "epoch": 3.60382499065121, "grad_norm": 1.1864389181137085, "learning_rate": 8.427423409539543e-07, "loss": 0.0079, "step": 337300 }, { "epoch": 3.6039318339654898, "grad_norm": 0.15508237481117249, "learning_rate": 8.427301082725027e-07, "loss": 0.0254, "step": 337310 }, { "epoch": 3.6040386772797692, "grad_norm": 0.004861560184508562, "learning_rate": 8.427178752040829e-07, "loss": 0.0018, "step": 337320 }, { "epoch": 3.6041455205940487, "grad_norm": 0.14356635510921478, "learning_rate": 8.427056417487086e-07, "loss": 0.0163, "step": 337330 }, { "epoch": 3.6042523639083286, "grad_norm": 0.006701861508190632, "learning_rate": 8.426934079063939e-07, "loss": 0.0118, "step": 337340 }, { "epoch": 3.604359207222608, "grad_norm": 0.02139304392039776, "learning_rate": 8.426811736771525e-07, "loss": 0.0291, "step": 337350 }, { "epoch": 3.6044660505368875, "grad_norm": 0.0014551974600180984, "learning_rate": 8.426689390609981e-07, "loss": 0.0148, "step": 337360 }, { "epoch": 3.6045728938511674, "grad_norm": 0.10086330771446228, "learning_rate": 8.426567040579447e-07, "loss": 0.0057, "step": 337370 }, { "epoch": 3.604679737165447, "grad_norm": 0.7119269967079163, "learning_rate": 8.426444686680061e-07, "loss": 0.0151, "step": 337380 }, { "epoch": 3.6047865804797263, "grad_norm": 0.012581931427121162, "learning_rate": 8.426322328911958e-07, "loss": 0.0233, "step": 337390 }, { "epoch": 3.6048934237940062, "grad_norm": 0.29863202571868896, "learning_rate": 8.42619996727528e-07, "loss": 0.0082, "step": 337400 }, { "epoch": 3.6050002671082857, "grad_norm": 0.007995853200554848, "learning_rate": 8.426077601770163e-07, "loss": 0.0038, "step": 337410 }, { "epoch": 3.605107110422565, "grad_norm": 10.781137466430664, "learning_rate": 8.425955232396747e-07, "loss": 0.0116, "step": 337420 }, { "epoch": 3.605213953736845, "grad_norm": 0.8546469807624817, "learning_rate": 8.425832859155166e-07, "loss": 0.0095, "step": 337430 }, { "epoch": 3.6053207970511245, "grad_norm": 0.3034161925315857, "learning_rate": 8.425710482045563e-07, "loss": 0.0188, "step": 337440 }, { "epoch": 3.605427640365404, "grad_norm": 0.002508726203814149, "learning_rate": 8.425588101068075e-07, "loss": 0.0022, "step": 337450 }, { "epoch": 3.605534483679684, "grad_norm": 0.0269063301384449, "learning_rate": 8.425465716222839e-07, "loss": 0.0115, "step": 337460 }, { "epoch": 3.6056413269939633, "grad_norm": 5.128323554992676, "learning_rate": 8.425343327509994e-07, "loss": 0.0108, "step": 337470 }, { "epoch": 3.6057481703082432, "grad_norm": 0.17544007301330566, "learning_rate": 8.425220934929677e-07, "loss": 0.0161, "step": 337480 }, { "epoch": 3.6058550136225227, "grad_norm": 2.216493606567383, "learning_rate": 8.425098538482028e-07, "loss": 0.0131, "step": 337490 }, { "epoch": 3.605961856936802, "grad_norm": 1.7035452127456665, "learning_rate": 8.424976138167184e-07, "loss": 0.0287, "step": 337500 }, { "epoch": 3.6060687002510816, "grad_norm": 0.03381510078907013, "learning_rate": 8.424853733985283e-07, "loss": 0.013, "step": 337510 }, { "epoch": 3.6061755435653615, "grad_norm": 0.01338521670550108, "learning_rate": 8.424731325936465e-07, "loss": 0.0149, "step": 337520 }, { "epoch": 3.606282386879641, "grad_norm": 0.20678205788135529, "learning_rate": 8.424608914020865e-07, "loss": 0.0158, "step": 337530 }, { "epoch": 3.606389230193921, "grad_norm": 0.0158351119607687, "learning_rate": 8.424486498238624e-07, "loss": 0.014, "step": 337540 }, { "epoch": 3.6064960735082003, "grad_norm": 1.1229286193847656, "learning_rate": 8.42436407858988e-07, "loss": 0.0126, "step": 337550 }, { "epoch": 3.60660291682248, "grad_norm": 1.5185590982437134, "learning_rate": 8.424241655074769e-07, "loss": 0.0184, "step": 337560 }, { "epoch": 3.6067097601367593, "grad_norm": 0.031072860583662987, "learning_rate": 8.424119227693431e-07, "loss": 0.0176, "step": 337570 }, { "epoch": 3.606816603451039, "grad_norm": 0.05194149538874626, "learning_rate": 8.423996796446004e-07, "loss": 0.0151, "step": 337580 }, { "epoch": 3.6069234467653186, "grad_norm": 4.501471996307373, "learning_rate": 8.423874361332627e-07, "loss": 0.0178, "step": 337590 }, { "epoch": 3.6070302900795985, "grad_norm": 0.7866647839546204, "learning_rate": 8.423751922353437e-07, "loss": 0.0061, "step": 337600 }, { "epoch": 3.607137133393878, "grad_norm": 0.011336115188896656, "learning_rate": 8.423629479508573e-07, "loss": 0.0111, "step": 337610 }, { "epoch": 3.6072439767081574, "grad_norm": 1.8513051271438599, "learning_rate": 8.423507032798172e-07, "loss": 0.0032, "step": 337620 }, { "epoch": 3.607350820022437, "grad_norm": 0.20547811686992645, "learning_rate": 8.423384582222373e-07, "loss": 0.0032, "step": 337630 }, { "epoch": 3.607457663336717, "grad_norm": 0.04175519943237305, "learning_rate": 8.423262127781316e-07, "loss": 0.0065, "step": 337640 }, { "epoch": 3.6075645066509963, "grad_norm": 2.3026580810546875, "learning_rate": 8.423139669475136e-07, "loss": 0.0175, "step": 337650 }, { "epoch": 3.607671349965276, "grad_norm": 0.010620363056659698, "learning_rate": 8.423017207303975e-07, "loss": 0.0022, "step": 337660 }, { "epoch": 3.6077781932795556, "grad_norm": 0.010575699619948864, "learning_rate": 8.422894741267967e-07, "loss": 0.0034, "step": 337670 }, { "epoch": 3.607885036593835, "grad_norm": 0.8096303939819336, "learning_rate": 8.422772271367255e-07, "loss": 0.007, "step": 337680 }, { "epoch": 3.6079918799081145, "grad_norm": 0.18765294551849365, "learning_rate": 8.422649797601972e-07, "loss": 0.0104, "step": 337690 }, { "epoch": 3.6080987232223944, "grad_norm": 8.276522636413574, "learning_rate": 8.422527319972261e-07, "loss": 0.0052, "step": 337700 }, { "epoch": 3.608205566536674, "grad_norm": NaN, "learning_rate": 8.422404838478257e-07, "loss": 0.0464, "step": 337710 }, { "epoch": 3.608312409850954, "grad_norm": 16.057477951049805, "learning_rate": 8.422282353120102e-07, "loss": 0.0394, "step": 337720 }, { "epoch": 3.6084192531652333, "grad_norm": 0.008820949122309685, "learning_rate": 8.422159863897928e-07, "loss": 0.0024, "step": 337730 }, { "epoch": 3.6085260964795127, "grad_norm": 0.006817842833697796, "learning_rate": 8.422037370811881e-07, "loss": 0.0258, "step": 337740 }, { "epoch": 3.608632939793792, "grad_norm": 0.0037355837412178516, "learning_rate": 8.421914873862093e-07, "loss": 0.019, "step": 337750 }, { "epoch": 3.608739783108072, "grad_norm": 0.026903094723820686, "learning_rate": 8.421792373048706e-07, "loss": 0.0077, "step": 337760 }, { "epoch": 3.6088466264223515, "grad_norm": 2.1815083026885986, "learning_rate": 8.421669868371858e-07, "loss": 0.0197, "step": 337770 }, { "epoch": 3.6089534697366314, "grad_norm": 5.212663173675537, "learning_rate": 8.421547359831686e-07, "loss": 0.0089, "step": 337780 }, { "epoch": 3.609060313050911, "grad_norm": 5.269658088684082, "learning_rate": 8.421424847428327e-07, "loss": 0.0071, "step": 337790 }, { "epoch": 3.6091671563651904, "grad_norm": 0.00820829439908266, "learning_rate": 8.421302331161923e-07, "loss": 0.0264, "step": 337800 }, { "epoch": 3.60927399967947, "grad_norm": 0.14354173839092255, "learning_rate": 8.42117981103261e-07, "loss": 0.0372, "step": 337810 }, { "epoch": 3.6093808429937497, "grad_norm": 7.6851935386657715, "learning_rate": 8.421057287040527e-07, "loss": 0.0233, "step": 337820 }, { "epoch": 3.609487686308029, "grad_norm": 0.04569251835346222, "learning_rate": 8.420934759185812e-07, "loss": 0.0042, "step": 337830 }, { "epoch": 3.609594529622309, "grad_norm": 0.9692466259002686, "learning_rate": 8.420812227468603e-07, "loss": 0.0022, "step": 337840 }, { "epoch": 3.6097013729365885, "grad_norm": 0.021979546174407005, "learning_rate": 8.42068969188904e-07, "loss": 0.0115, "step": 337850 }, { "epoch": 3.609808216250868, "grad_norm": 5.171769142150879, "learning_rate": 8.420567152447258e-07, "loss": 0.0291, "step": 337860 }, { "epoch": 3.6099150595651475, "grad_norm": 0.7150658965110779, "learning_rate": 8.420444609143398e-07, "loss": 0.0071, "step": 337870 }, { "epoch": 3.6100219028794274, "grad_norm": 0.013687213882803917, "learning_rate": 8.4203220619776e-07, "loss": 0.0207, "step": 337880 }, { "epoch": 3.610128746193707, "grad_norm": 0.002169041894376278, "learning_rate": 8.420199510949999e-07, "loss": 0.0028, "step": 337890 }, { "epoch": 3.6102355895079867, "grad_norm": 0.6894299387931824, "learning_rate": 8.420076956060734e-07, "loss": 0.0069, "step": 337900 }, { "epoch": 3.610342432822266, "grad_norm": 0.014283183962106705, "learning_rate": 8.419954397309945e-07, "loss": 0.0247, "step": 337910 }, { "epoch": 3.6104492761365456, "grad_norm": 0.18393881618976593, "learning_rate": 8.419831834697768e-07, "loss": 0.0119, "step": 337920 }, { "epoch": 3.610556119450825, "grad_norm": 1.6698098182678223, "learning_rate": 8.419709268224344e-07, "loss": 0.0167, "step": 337930 }, { "epoch": 3.610662962765105, "grad_norm": 0.0030338370706886053, "learning_rate": 8.41958669788981e-07, "loss": 0.0119, "step": 337940 }, { "epoch": 3.6107698060793845, "grad_norm": 0.07925020903348923, "learning_rate": 8.419464123694304e-07, "loss": 0.0177, "step": 337950 }, { "epoch": 3.6108766493936644, "grad_norm": 0.08681642264127731, "learning_rate": 8.419341545637966e-07, "loss": 0.0259, "step": 337960 }, { "epoch": 3.610983492707944, "grad_norm": 2.546010732650757, "learning_rate": 8.419218963720932e-07, "loss": 0.0009, "step": 337970 }, { "epoch": 3.6110903360222233, "grad_norm": 0.1121101826429367, "learning_rate": 8.419096377943342e-07, "loss": 0.0141, "step": 337980 }, { "epoch": 3.6111971793365027, "grad_norm": 0.013474303297698498, "learning_rate": 8.418973788305334e-07, "loss": 0.0253, "step": 337990 }, { "epoch": 3.6113040226507827, "grad_norm": 0.005606786347925663, "learning_rate": 8.418851194807048e-07, "loss": 0.0036, "step": 338000 }, { "epoch": 3.611410865965062, "grad_norm": 2.964354991912842, "learning_rate": 8.41872859744862e-07, "loss": 0.0124, "step": 338010 }, { "epoch": 3.611517709279342, "grad_norm": 0.01713748648762703, "learning_rate": 8.418605996230189e-07, "loss": 0.0269, "step": 338020 }, { "epoch": 3.6116245525936215, "grad_norm": 1.9399213790893555, "learning_rate": 8.418483391151893e-07, "loss": 0.0134, "step": 338030 }, { "epoch": 3.611731395907901, "grad_norm": 0.7529816031455994, "learning_rate": 8.418360782213873e-07, "loss": 0.0093, "step": 338040 }, { "epoch": 3.611838239222181, "grad_norm": 1.8430296182632446, "learning_rate": 8.418238169416265e-07, "loss": 0.0174, "step": 338050 }, { "epoch": 3.6119450825364603, "grad_norm": 1.8048386573791504, "learning_rate": 8.418115552759209e-07, "loss": 0.0046, "step": 338060 }, { "epoch": 3.6120519258507398, "grad_norm": 0.016859347000718117, "learning_rate": 8.417992932242841e-07, "loss": 0.011, "step": 338070 }, { "epoch": 3.6121587691650197, "grad_norm": 1.1422346830368042, "learning_rate": 8.417870307867303e-07, "loss": 0.0083, "step": 338080 }, { "epoch": 3.612265612479299, "grad_norm": 0.13516195118427277, "learning_rate": 8.41774767963273e-07, "loss": 0.0318, "step": 338090 }, { "epoch": 3.6123724557935786, "grad_norm": 0.029720883816480637, "learning_rate": 8.417625047539261e-07, "loss": 0.0109, "step": 338100 }, { "epoch": 3.6124792991078585, "grad_norm": 0.006265914533287287, "learning_rate": 8.417502411587038e-07, "loss": 0.0022, "step": 338110 }, { "epoch": 3.612586142422138, "grad_norm": 0.06085256114602089, "learning_rate": 8.417379771776196e-07, "loss": 0.0089, "step": 338120 }, { "epoch": 3.6126929857364174, "grad_norm": 0.058901406824588776, "learning_rate": 8.417257128106872e-07, "loss": 0.0086, "step": 338130 }, { "epoch": 3.6127998290506973, "grad_norm": 0.10148800909519196, "learning_rate": 8.417134480579209e-07, "loss": 0.0017, "step": 338140 }, { "epoch": 3.6129066723649768, "grad_norm": 0.09898226708173752, "learning_rate": 8.417011829193344e-07, "loss": 0.0065, "step": 338150 }, { "epoch": 3.613013515679256, "grad_norm": 0.00849099550396204, "learning_rate": 8.416889173949412e-07, "loss": 0.0084, "step": 338160 }, { "epoch": 3.613120358993536, "grad_norm": 0.05271126702427864, "learning_rate": 8.416766514847556e-07, "loss": 0.019, "step": 338170 }, { "epoch": 3.6132272023078156, "grad_norm": 0.02463602088391781, "learning_rate": 8.416643851887913e-07, "loss": 0.0262, "step": 338180 }, { "epoch": 3.613334045622095, "grad_norm": 0.0015947520732879639, "learning_rate": 8.41652118507062e-07, "loss": 0.007, "step": 338190 }, { "epoch": 3.613440888936375, "grad_norm": 0.781217634677887, "learning_rate": 8.416398514395817e-07, "loss": 0.001, "step": 338200 }, { "epoch": 3.6135477322506544, "grad_norm": 0.4649629592895508, "learning_rate": 8.416275839863643e-07, "loss": 0.0106, "step": 338210 }, { "epoch": 3.613654575564934, "grad_norm": 0.16491229832172394, "learning_rate": 8.416153161474235e-07, "loss": 0.0194, "step": 338220 }, { "epoch": 3.6137614188792138, "grad_norm": 0.037276189774274826, "learning_rate": 8.416030479227733e-07, "loss": 0.0308, "step": 338230 }, { "epoch": 3.613868262193493, "grad_norm": 8.820722579956055, "learning_rate": 8.415907793124275e-07, "loss": 0.0289, "step": 338240 }, { "epoch": 3.613975105507773, "grad_norm": 0.004936956334859133, "learning_rate": 8.415785103163998e-07, "loss": 0.0029, "step": 338250 }, { "epoch": 3.6140819488220526, "grad_norm": 1.94594407081604, "learning_rate": 8.415662409347044e-07, "loss": 0.0171, "step": 338260 }, { "epoch": 3.614188792136332, "grad_norm": 9.169897079467773, "learning_rate": 8.415539711673547e-07, "loss": 0.0131, "step": 338270 }, { "epoch": 3.6142956354506115, "grad_norm": 0.027621645480394363, "learning_rate": 8.415417010143648e-07, "loss": 0.0186, "step": 338280 }, { "epoch": 3.6144024787648914, "grad_norm": 0.12117017060518265, "learning_rate": 8.415294304757488e-07, "loss": 0.0039, "step": 338290 }, { "epoch": 3.614509322079171, "grad_norm": 0.0257607139647007, "learning_rate": 8.4151715955152e-07, "loss": 0.0171, "step": 338300 }, { "epoch": 3.6146161653934508, "grad_norm": 0.27986055612564087, "learning_rate": 8.415048882416927e-07, "loss": 0.0008, "step": 338310 }, { "epoch": 3.6147230087077302, "grad_norm": 0.15494374930858612, "learning_rate": 8.414926165462806e-07, "loss": 0.0199, "step": 338320 }, { "epoch": 3.6148298520220097, "grad_norm": 4.203098773956299, "learning_rate": 8.414803444652975e-07, "loss": 0.0117, "step": 338330 }, { "epoch": 3.614936695336289, "grad_norm": 0.02925860695540905, "learning_rate": 8.414680719987573e-07, "loss": 0.004, "step": 338340 }, { "epoch": 3.615043538650569, "grad_norm": 0.2547246515750885, "learning_rate": 8.41455799146674e-07, "loss": 0.0044, "step": 338350 }, { "epoch": 3.6151503819648485, "grad_norm": 0.5743333697319031, "learning_rate": 8.414435259090613e-07, "loss": 0.0034, "step": 338360 }, { "epoch": 3.6152572252791284, "grad_norm": 0.0061887833289802074, "learning_rate": 8.41431252285933e-07, "loss": 0.0007, "step": 338370 }, { "epoch": 3.615364068593408, "grad_norm": 3.750859260559082, "learning_rate": 8.414189782773032e-07, "loss": 0.0075, "step": 338380 }, { "epoch": 3.6154709119076873, "grad_norm": 0.0009894849499687552, "learning_rate": 8.414067038831853e-07, "loss": 0.0082, "step": 338390 }, { "epoch": 3.615577755221967, "grad_norm": 1.9395705461502075, "learning_rate": 8.413944291035937e-07, "loss": 0.01, "step": 338400 }, { "epoch": 3.6156845985362467, "grad_norm": 0.035317033529281616, "learning_rate": 8.41382153938542e-07, "loss": 0.0013, "step": 338410 }, { "epoch": 3.615791441850526, "grad_norm": 0.014793016947805882, "learning_rate": 8.413698783880441e-07, "loss": 0.0094, "step": 338420 }, { "epoch": 3.615898285164806, "grad_norm": 0.22910088300704956, "learning_rate": 8.413576024521138e-07, "loss": 0.0024, "step": 338430 }, { "epoch": 3.6160051284790855, "grad_norm": 0.0017526771407574415, "learning_rate": 8.41345326130765e-07, "loss": 0.0006, "step": 338440 }, { "epoch": 3.616111971793365, "grad_norm": 0.03957183286547661, "learning_rate": 8.413330494240116e-07, "loss": 0.0132, "step": 338450 }, { "epoch": 3.6162188151076444, "grad_norm": 7.141918182373047, "learning_rate": 8.413207723318675e-07, "loss": 0.0459, "step": 338460 }, { "epoch": 3.6163256584219243, "grad_norm": 0.0974554643034935, "learning_rate": 8.413084948543463e-07, "loss": 0.0035, "step": 338470 }, { "epoch": 3.616432501736204, "grad_norm": 0.6786885261535645, "learning_rate": 8.412962169914622e-07, "loss": 0.0026, "step": 338480 }, { "epoch": 3.6165393450504837, "grad_norm": 0.025002872571349144, "learning_rate": 8.412839387432289e-07, "loss": 0.0008, "step": 338490 }, { "epoch": 3.616646188364763, "grad_norm": 1.8681575059890747, "learning_rate": 8.412716601096601e-07, "loss": 0.0086, "step": 338500 }, { "epoch": 3.6167530316790426, "grad_norm": 0.027799827978014946, "learning_rate": 8.412593810907701e-07, "loss": 0.0123, "step": 338510 }, { "epoch": 3.616859874993322, "grad_norm": 0.14167198538780212, "learning_rate": 8.412471016865724e-07, "loss": 0.0058, "step": 338520 }, { "epoch": 3.616966718307602, "grad_norm": 0.41415855288505554, "learning_rate": 8.412348218970808e-07, "loss": 0.0215, "step": 338530 }, { "epoch": 3.6170735616218814, "grad_norm": 3.6671242713928223, "learning_rate": 8.412225417223096e-07, "loss": 0.0253, "step": 338540 }, { "epoch": 3.6171804049361613, "grad_norm": 0.01742321439087391, "learning_rate": 8.412102611622722e-07, "loss": 0.0003, "step": 338550 }, { "epoch": 3.617287248250441, "grad_norm": 0.011791029945015907, "learning_rate": 8.411979802169827e-07, "loss": 0.0133, "step": 338560 }, { "epoch": 3.6173940915647202, "grad_norm": 8.049212455749512, "learning_rate": 8.41185698886455e-07, "loss": 0.0159, "step": 338570 }, { "epoch": 3.6175009348789997, "grad_norm": 0.22796779870986938, "learning_rate": 8.41173417170703e-07, "loss": 0.0111, "step": 338580 }, { "epoch": 3.6176077781932796, "grad_norm": 0.03229624032974243, "learning_rate": 8.411611350697402e-07, "loss": 0.0197, "step": 338590 }, { "epoch": 3.617714621507559, "grad_norm": 5.816512584686279, "learning_rate": 8.411488525835808e-07, "loss": 0.0685, "step": 338600 }, { "epoch": 3.617821464821839, "grad_norm": 1.0189265012741089, "learning_rate": 8.411365697122386e-07, "loss": 0.0265, "step": 338610 }, { "epoch": 3.6179283081361184, "grad_norm": 2.844142436981201, "learning_rate": 8.411242864557276e-07, "loss": 0.0224, "step": 338620 }, { "epoch": 3.618035151450398, "grad_norm": 5.512203693389893, "learning_rate": 8.411120028140615e-07, "loss": 0.0111, "step": 338630 }, { "epoch": 3.6181419947646773, "grad_norm": 1.1700242757797241, "learning_rate": 8.410997187872541e-07, "loss": 0.0169, "step": 338640 }, { "epoch": 3.6182488380789573, "grad_norm": 0.14036358892917633, "learning_rate": 8.410874343753195e-07, "loss": 0.0051, "step": 338650 }, { "epoch": 3.6183556813932367, "grad_norm": 3.1335341930389404, "learning_rate": 8.410751495782713e-07, "loss": 0.0114, "step": 338660 }, { "epoch": 3.6184625247075166, "grad_norm": 0.3453764319419861, "learning_rate": 8.410628643961236e-07, "loss": 0.0334, "step": 338670 }, { "epoch": 3.618569368021796, "grad_norm": 0.013431264087557793, "learning_rate": 8.410505788288902e-07, "loss": 0.0165, "step": 338680 }, { "epoch": 3.6186762113360755, "grad_norm": 4.382525444030762, "learning_rate": 8.410382928765849e-07, "loss": 0.007, "step": 338690 }, { "epoch": 3.618783054650355, "grad_norm": 0.18972986936569214, "learning_rate": 8.410260065392216e-07, "loss": 0.0262, "step": 338700 }, { "epoch": 3.618889897964635, "grad_norm": 0.5851914286613464, "learning_rate": 8.410137198168144e-07, "loss": 0.0073, "step": 338710 }, { "epoch": 3.6189967412789144, "grad_norm": 4.184610843658447, "learning_rate": 8.410014327093768e-07, "loss": 0.0082, "step": 338720 }, { "epoch": 3.6191035845931943, "grad_norm": 6.426168918609619, "learning_rate": 8.409891452169229e-07, "loss": 0.0115, "step": 338730 }, { "epoch": 3.6192104279074737, "grad_norm": 0.008702206425368786, "learning_rate": 8.409768573394664e-07, "loss": 0.0022, "step": 338740 }, { "epoch": 3.619317271221753, "grad_norm": 0.025523263961076736, "learning_rate": 8.409645690770215e-07, "loss": 0.0089, "step": 338750 }, { "epoch": 3.6194241145360326, "grad_norm": 1.0444170236587524, "learning_rate": 8.409522804296016e-07, "loss": 0.0103, "step": 338760 }, { "epoch": 3.6195309578503125, "grad_norm": 0.2993129789829254, "learning_rate": 8.409399913972211e-07, "loss": 0.0071, "step": 338770 }, { "epoch": 3.619637801164592, "grad_norm": 0.11352618783712387, "learning_rate": 8.409277019798935e-07, "loss": 0.0027, "step": 338780 }, { "epoch": 3.619744644478872, "grad_norm": 0.03677148371934891, "learning_rate": 8.409154121776327e-07, "loss": 0.0173, "step": 338790 }, { "epoch": 3.6198514877931514, "grad_norm": 0.02576316148042679, "learning_rate": 8.409031219904528e-07, "loss": 0.0175, "step": 338800 }, { "epoch": 3.619958331107431, "grad_norm": 0.10146407783031464, "learning_rate": 8.408908314183676e-07, "loss": 0.0139, "step": 338810 }, { "epoch": 3.6200651744217107, "grad_norm": 2.0713412761688232, "learning_rate": 8.408785404613909e-07, "loss": 0.034, "step": 338820 }, { "epoch": 3.62017201773599, "grad_norm": 0.1351202130317688, "learning_rate": 8.408662491195365e-07, "loss": 0.0313, "step": 338830 }, { "epoch": 3.6202788610502696, "grad_norm": 0.00179920659866184, "learning_rate": 8.408539573928184e-07, "loss": 0.0273, "step": 338840 }, { "epoch": 3.6203857043645495, "grad_norm": 0.004216654226183891, "learning_rate": 8.408416652812504e-07, "loss": 0.046, "step": 338850 }, { "epoch": 3.620492547678829, "grad_norm": 0.010536347515881062, "learning_rate": 8.408293727848465e-07, "loss": 0.0062, "step": 338860 }, { "epoch": 3.6205993909931085, "grad_norm": 9.229366302490234, "learning_rate": 8.408170799036208e-07, "loss": 0.0117, "step": 338870 }, { "epoch": 3.6207062343073884, "grad_norm": 2.6872878074645996, "learning_rate": 8.408047866375864e-07, "loss": 0.0335, "step": 338880 }, { "epoch": 3.620813077621668, "grad_norm": 0.1191900372505188, "learning_rate": 8.407924929867581e-07, "loss": 0.0068, "step": 338890 }, { "epoch": 3.6209199209359473, "grad_norm": 0.0701657384634018, "learning_rate": 8.40780198951149e-07, "loss": 0.0097, "step": 338900 }, { "epoch": 3.621026764250227, "grad_norm": 7.350555896759033, "learning_rate": 8.407679045307735e-07, "loss": 0.0164, "step": 338910 }, { "epoch": 3.6211336075645066, "grad_norm": 0.2132422775030136, "learning_rate": 8.407556097256454e-07, "loss": 0.0189, "step": 338920 }, { "epoch": 3.621240450878786, "grad_norm": 0.8817068934440613, "learning_rate": 8.407433145357783e-07, "loss": 0.0049, "step": 338930 }, { "epoch": 3.621347294193066, "grad_norm": 0.4451538026332855, "learning_rate": 8.407310189611864e-07, "loss": 0.0174, "step": 338940 }, { "epoch": 3.6214541375073455, "grad_norm": 3.7017178535461426, "learning_rate": 8.407187230018835e-07, "loss": 0.0237, "step": 338950 }, { "epoch": 3.6215609808216254, "grad_norm": 1.069815993309021, "learning_rate": 8.407064266578834e-07, "loss": 0.0907, "step": 338960 }, { "epoch": 3.621667824135905, "grad_norm": 0.011885461397469044, "learning_rate": 8.406941299292001e-07, "loss": 0.0433, "step": 338970 }, { "epoch": 3.6217746674501843, "grad_norm": 4.07559871673584, "learning_rate": 8.406818328158472e-07, "loss": 0.0058, "step": 338980 }, { "epoch": 3.6218815107644637, "grad_norm": 0.004859317094087601, "learning_rate": 8.40669535317839e-07, "loss": 0.0135, "step": 338990 }, { "epoch": 3.6219883540787436, "grad_norm": 0.007260957267135382, "learning_rate": 8.406572374351892e-07, "loss": 0.0095, "step": 339000 }, { "epoch": 3.622095197393023, "grad_norm": 0.11953604966402054, "learning_rate": 8.406449391679116e-07, "loss": 0.0348, "step": 339010 }, { "epoch": 3.622202040707303, "grad_norm": 0.006187321152538061, "learning_rate": 8.4063264051602e-07, "loss": 0.0031, "step": 339020 }, { "epoch": 3.6223088840215825, "grad_norm": 0.6813008189201355, "learning_rate": 8.406203414795287e-07, "loss": 0.0022, "step": 339030 }, { "epoch": 3.622415727335862, "grad_norm": 0.0006657252670265734, "learning_rate": 8.406080420584512e-07, "loss": 0.014, "step": 339040 }, { "epoch": 3.6225225706501414, "grad_norm": 0.06723273545503616, "learning_rate": 8.405957422528014e-07, "loss": 0.0186, "step": 339050 }, { "epoch": 3.6226294139644213, "grad_norm": 0.28731751441955566, "learning_rate": 8.405834420625934e-07, "loss": 0.0516, "step": 339060 }, { "epoch": 3.6227362572787007, "grad_norm": 0.04449058324098587, "learning_rate": 8.405711414878412e-07, "loss": 0.0101, "step": 339070 }, { "epoch": 3.6228431005929806, "grad_norm": 1.2530570030212402, "learning_rate": 8.405588405285582e-07, "loss": 0.0043, "step": 339080 }, { "epoch": 3.62294994390726, "grad_norm": 1.7802467346191406, "learning_rate": 8.405465391847587e-07, "loss": 0.0196, "step": 339090 }, { "epoch": 3.6230567872215396, "grad_norm": 0.13024882972240448, "learning_rate": 8.405342374564564e-07, "loss": 0.0058, "step": 339100 }, { "epoch": 3.623163630535819, "grad_norm": 0.024796614423394203, "learning_rate": 8.405219353436652e-07, "loss": 0.0353, "step": 339110 }, { "epoch": 3.623270473850099, "grad_norm": 0.7541718482971191, "learning_rate": 8.405096328463992e-07, "loss": 0.0023, "step": 339120 }, { "epoch": 3.6233773171643784, "grad_norm": 4.252145290374756, "learning_rate": 8.404973299646719e-07, "loss": 0.0196, "step": 339130 }, { "epoch": 3.6234841604786583, "grad_norm": 0.012718021869659424, "learning_rate": 8.404850266984976e-07, "loss": 0.0209, "step": 339140 }, { "epoch": 3.6235910037929377, "grad_norm": 1.1932705640792847, "learning_rate": 8.4047272304789e-07, "loss": 0.0258, "step": 339150 }, { "epoch": 3.623697847107217, "grad_norm": 3.32218337059021, "learning_rate": 8.404604190128629e-07, "loss": 0.0065, "step": 339160 }, { "epoch": 3.6238046904214967, "grad_norm": 0.6926248669624329, "learning_rate": 8.404481145934303e-07, "loss": 0.0036, "step": 339170 }, { "epoch": 3.6239115337357766, "grad_norm": 0.009860565885901451, "learning_rate": 8.40435809789606e-07, "loss": 0.0079, "step": 339180 }, { "epoch": 3.624018377050056, "grad_norm": 3.8278565406799316, "learning_rate": 8.404235046014043e-07, "loss": 0.061, "step": 339190 }, { "epoch": 3.624125220364336, "grad_norm": 0.042145077139139175, "learning_rate": 8.404111990288385e-07, "loss": 0.0026, "step": 339200 }, { "epoch": 3.6242320636786154, "grad_norm": 0.00585302384570241, "learning_rate": 8.403988930719228e-07, "loss": 0.0116, "step": 339210 }, { "epoch": 3.624338906992895, "grad_norm": 9.512568473815918, "learning_rate": 8.403865867306711e-07, "loss": 0.0684, "step": 339220 }, { "epoch": 3.6244457503071743, "grad_norm": 0.018240928649902344, "learning_rate": 8.403742800050972e-07, "loss": 0.0033, "step": 339230 }, { "epoch": 3.624552593621454, "grad_norm": 1.8858726024627686, "learning_rate": 8.403619728952152e-07, "loss": 0.0099, "step": 339240 }, { "epoch": 3.6246594369357337, "grad_norm": 0.027593621984124184, "learning_rate": 8.403496654010387e-07, "loss": 0.014, "step": 339250 }, { "epoch": 3.6247662802500136, "grad_norm": 15.072449684143066, "learning_rate": 8.403373575225819e-07, "loss": 0.0325, "step": 339260 }, { "epoch": 3.624873123564293, "grad_norm": 0.3642801344394684, "learning_rate": 8.403250492598581e-07, "loss": 0.0073, "step": 339270 }, { "epoch": 3.6249799668785725, "grad_norm": 0.2786082625389099, "learning_rate": 8.40312740612882e-07, "loss": 0.0058, "step": 339280 }, { "epoch": 3.625086810192852, "grad_norm": 0.0027964087203145027, "learning_rate": 8.403004315816672e-07, "loss": 0.0104, "step": 339290 }, { "epoch": 3.625193653507132, "grad_norm": 1.0516878366470337, "learning_rate": 8.402881221662274e-07, "loss": 0.0176, "step": 339300 }, { "epoch": 3.6253004968214113, "grad_norm": 1.4613856077194214, "learning_rate": 8.402758123665766e-07, "loss": 0.0056, "step": 339310 }, { "epoch": 3.625407340135691, "grad_norm": 2.7103309631347656, "learning_rate": 8.402635021827288e-07, "loss": 0.0126, "step": 339320 }, { "epoch": 3.6255141834499707, "grad_norm": 0.002253905637189746, "learning_rate": 8.402511916146977e-07, "loss": 0.0039, "step": 339330 }, { "epoch": 3.62562102676425, "grad_norm": 1.2375237941741943, "learning_rate": 8.402388806624974e-07, "loss": 0.0019, "step": 339340 }, { "epoch": 3.6257278700785296, "grad_norm": 5.19178581237793, "learning_rate": 8.402265693261418e-07, "loss": 0.0088, "step": 339350 }, { "epoch": 3.6258347133928095, "grad_norm": 0.029817014932632446, "learning_rate": 8.402142576056446e-07, "loss": 0.0131, "step": 339360 }, { "epoch": 3.625941556707089, "grad_norm": 19.369916915893555, "learning_rate": 8.402019455010199e-07, "loss": 0.0093, "step": 339370 }, { "epoch": 3.626048400021369, "grad_norm": 6.6425557136535645, "learning_rate": 8.401896330122816e-07, "loss": 0.0111, "step": 339380 }, { "epoch": 3.6261552433356483, "grad_norm": 3.674149513244629, "learning_rate": 8.401773201394433e-07, "loss": 0.0058, "step": 339390 }, { "epoch": 3.6262620866499278, "grad_norm": 0.026304617524147034, "learning_rate": 8.401650068825193e-07, "loss": 0.0074, "step": 339400 }, { "epoch": 3.6263689299642072, "grad_norm": 0.3437140882015228, "learning_rate": 8.401526932415232e-07, "loss": 0.0139, "step": 339410 }, { "epoch": 3.626475773278487, "grad_norm": 8.258968353271484, "learning_rate": 8.401403792164693e-07, "loss": 0.0075, "step": 339420 }, { "epoch": 3.6265826165927666, "grad_norm": 6.579827308654785, "learning_rate": 8.401280648073711e-07, "loss": 0.0097, "step": 339430 }, { "epoch": 3.6266894599070465, "grad_norm": 3.497443199157715, "learning_rate": 8.401157500142426e-07, "loss": 0.0053, "step": 339440 }, { "epoch": 3.626796303221326, "grad_norm": 0.07538455724716187, "learning_rate": 8.401034348370978e-07, "loss": 0.0402, "step": 339450 }, { "epoch": 3.6269031465356054, "grad_norm": 0.003787076799198985, "learning_rate": 8.400911192759505e-07, "loss": 0.0366, "step": 339460 }, { "epoch": 3.627009989849885, "grad_norm": 0.006977733690291643, "learning_rate": 8.400788033308146e-07, "loss": 0.0192, "step": 339470 }, { "epoch": 3.6271168331641648, "grad_norm": 0.03670042008161545, "learning_rate": 8.400664870017042e-07, "loss": 0.0305, "step": 339480 }, { "epoch": 3.6272236764784442, "grad_norm": 3.2610251903533936, "learning_rate": 8.40054170288633e-07, "loss": 0.0317, "step": 339490 }, { "epoch": 3.627330519792724, "grad_norm": 4.4344868659973145, "learning_rate": 8.40041853191615e-07, "loss": 0.0244, "step": 339500 }, { "epoch": 3.6274373631070036, "grad_norm": 0.04712601751089096, "learning_rate": 8.400295357106641e-07, "loss": 0.0195, "step": 339510 }, { "epoch": 3.627544206421283, "grad_norm": 0.13225841522216797, "learning_rate": 8.400172178457943e-07, "loss": 0.0015, "step": 339520 }, { "epoch": 3.627651049735563, "grad_norm": 4.506186485290527, "learning_rate": 8.400048995970192e-07, "loss": 0.0148, "step": 339530 }, { "epoch": 3.6277578930498424, "grad_norm": 0.5275906324386597, "learning_rate": 8.399925809643529e-07, "loss": 0.0013, "step": 339540 }, { "epoch": 3.627864736364122, "grad_norm": 0.23536919057369232, "learning_rate": 8.399802619478095e-07, "loss": 0.0264, "step": 339550 }, { "epoch": 3.627971579678402, "grad_norm": 1.6205613613128662, "learning_rate": 8.399679425474025e-07, "loss": 0.0241, "step": 339560 }, { "epoch": 3.6280784229926812, "grad_norm": 2.298257350921631, "learning_rate": 8.399556227631463e-07, "loss": 0.0031, "step": 339570 }, { "epoch": 3.6281852663069607, "grad_norm": 4.4220356941223145, "learning_rate": 8.399433025950543e-07, "loss": 0.0208, "step": 339580 }, { "epoch": 3.6282921096212406, "grad_norm": 11.491787910461426, "learning_rate": 8.399309820431408e-07, "loss": 0.0171, "step": 339590 }, { "epoch": 3.62839895293552, "grad_norm": 0.008267832919955254, "learning_rate": 8.399186611074196e-07, "loss": 0.0278, "step": 339600 }, { "epoch": 3.6285057962497995, "grad_norm": 10.4638090133667, "learning_rate": 8.399063397879043e-07, "loss": 0.0317, "step": 339610 }, { "epoch": 3.6286126395640794, "grad_norm": 6.732821464538574, "learning_rate": 8.398940180846094e-07, "loss": 0.0402, "step": 339620 }, { "epoch": 3.628719482878359, "grad_norm": 2.85668683052063, "learning_rate": 8.398816959975484e-07, "loss": 0.0079, "step": 339630 }, { "epoch": 3.6288263261926383, "grad_norm": 0.12070533633232117, "learning_rate": 8.398693735267352e-07, "loss": 0.0026, "step": 339640 }, { "epoch": 3.6289331695069182, "grad_norm": 0.005034233443439007, "learning_rate": 8.398570506721839e-07, "loss": 0.0005, "step": 339650 }, { "epoch": 3.6290400128211977, "grad_norm": 0.4007781147956848, "learning_rate": 8.398447274339084e-07, "loss": 0.0296, "step": 339660 }, { "epoch": 3.629146856135477, "grad_norm": 14.909785270690918, "learning_rate": 8.398324038119225e-07, "loss": 0.0056, "step": 339670 }, { "epoch": 3.629253699449757, "grad_norm": 0.026855669915676117, "learning_rate": 8.398200798062402e-07, "loss": 0.0078, "step": 339680 }, { "epoch": 3.6293605427640365, "grad_norm": 0.09457054734230042, "learning_rate": 8.398077554168755e-07, "loss": 0.0085, "step": 339690 }, { "epoch": 3.629467386078316, "grad_norm": 0.02605554088950157, "learning_rate": 8.39795430643842e-07, "loss": 0.0053, "step": 339700 }, { "epoch": 3.629574229392596, "grad_norm": 2.6652190685272217, "learning_rate": 8.397831054871539e-07, "loss": 0.0175, "step": 339710 }, { "epoch": 3.6296810727068753, "grad_norm": 4.939078330993652, "learning_rate": 8.397707799468249e-07, "loss": 0.0119, "step": 339720 }, { "epoch": 3.6297879160211552, "grad_norm": 0.8955991268157959, "learning_rate": 8.397584540228692e-07, "loss": 0.0108, "step": 339730 }, { "epoch": 3.6298947593354347, "grad_norm": 0.02604532428085804, "learning_rate": 8.397461277153005e-07, "loss": 0.0231, "step": 339740 }, { "epoch": 3.630001602649714, "grad_norm": 0.004008421208709478, "learning_rate": 8.397338010241328e-07, "loss": 0.0365, "step": 339750 }, { "epoch": 3.6301084459639936, "grad_norm": 0.7118210196495056, "learning_rate": 8.3972147394938e-07, "loss": 0.0244, "step": 339760 }, { "epoch": 3.6302152892782735, "grad_norm": 0.013000926934182644, "learning_rate": 8.397091464910559e-07, "loss": 0.0011, "step": 339770 }, { "epoch": 3.630322132592553, "grad_norm": 0.07584185153245926, "learning_rate": 8.396968186491747e-07, "loss": 0.016, "step": 339780 }, { "epoch": 3.630428975906833, "grad_norm": 0.014554287306964397, "learning_rate": 8.396844904237501e-07, "loss": 0.01, "step": 339790 }, { "epoch": 3.6305358192211123, "grad_norm": 0.02348487079143524, "learning_rate": 8.396721618147961e-07, "loss": 0.0213, "step": 339800 }, { "epoch": 3.630642662535392, "grad_norm": 0.009446858428418636, "learning_rate": 8.396598328223265e-07, "loss": 0.0111, "step": 339810 }, { "epoch": 3.6307495058496713, "grad_norm": 2.234564781188965, "learning_rate": 8.396475034463555e-07, "loss": 0.0558, "step": 339820 }, { "epoch": 3.630856349163951, "grad_norm": 4.617076396942139, "learning_rate": 8.396351736868967e-07, "loss": 0.0279, "step": 339830 }, { "epoch": 3.6309631924782306, "grad_norm": 0.0355057418346405, "learning_rate": 8.396228435439641e-07, "loss": 0.0069, "step": 339840 }, { "epoch": 3.6310700357925105, "grad_norm": 2.3195197582244873, "learning_rate": 8.396105130175717e-07, "loss": 0.0126, "step": 339850 }, { "epoch": 3.63117687910679, "grad_norm": 3.3820059299468994, "learning_rate": 8.395981821077335e-07, "loss": 0.0061, "step": 339860 }, { "epoch": 3.6312837224210694, "grad_norm": 0.2934825122356415, "learning_rate": 8.395858508144634e-07, "loss": 0.0626, "step": 339870 }, { "epoch": 3.631390565735349, "grad_norm": 5.326621055603027, "learning_rate": 8.39573519137775e-07, "loss": 0.0218, "step": 339880 }, { "epoch": 3.631497409049629, "grad_norm": 10.68488597869873, "learning_rate": 8.395611870776827e-07, "loss": 0.0263, "step": 339890 }, { "epoch": 3.6316042523639083, "grad_norm": 0.0037847000639885664, "learning_rate": 8.395488546342e-07, "loss": 0.0209, "step": 339900 }, { "epoch": 3.631711095678188, "grad_norm": 2.1109836101531982, "learning_rate": 8.39536521807341e-07, "loss": 0.0042, "step": 339910 }, { "epoch": 3.6318179389924676, "grad_norm": 0.06913957744836807, "learning_rate": 8.395241885971198e-07, "loss": 0.0251, "step": 339920 }, { "epoch": 3.631924782306747, "grad_norm": 0.09651516377925873, "learning_rate": 8.3951185500355e-07, "loss": 0.0004, "step": 339930 }, { "epoch": 3.6320316256210265, "grad_norm": 0.02878761850297451, "learning_rate": 8.394995210266458e-07, "loss": 0.0033, "step": 339940 }, { "epoch": 3.6321384689353065, "grad_norm": 1.7750753164291382, "learning_rate": 8.39487186666421e-07, "loss": 0.0116, "step": 339950 }, { "epoch": 3.632245312249586, "grad_norm": 0.00957754347473383, "learning_rate": 8.394748519228896e-07, "loss": 0.006, "step": 339960 }, { "epoch": 3.632352155563866, "grad_norm": 4.007977485656738, "learning_rate": 8.394625167960654e-07, "loss": 0.0141, "step": 339970 }, { "epoch": 3.6324589988781453, "grad_norm": 0.023321593180298805, "learning_rate": 8.394501812859624e-07, "loss": 0.0185, "step": 339980 }, { "epoch": 3.6325658421924247, "grad_norm": 1.6162898540496826, "learning_rate": 8.394378453925945e-07, "loss": 0.0099, "step": 339990 }, { "epoch": 3.632672685506704, "grad_norm": 0.010274398140609264, "learning_rate": 8.394255091159758e-07, "loss": 0.007, "step": 340000 }, { "epoch": 3.632779528820984, "grad_norm": 0.008215929381549358, "learning_rate": 8.394131724561198e-07, "loss": 0.0325, "step": 340010 }, { "epoch": 3.6328863721352636, "grad_norm": 3.848008871078491, "learning_rate": 8.39400835413041e-07, "loss": 0.004, "step": 340020 }, { "epoch": 3.6329932154495435, "grad_norm": 0.042077936232089996, "learning_rate": 8.393884979867529e-07, "loss": 0.0261, "step": 340030 }, { "epoch": 3.633100058763823, "grad_norm": 0.004104998428374529, "learning_rate": 8.393761601772696e-07, "loss": 0.0121, "step": 340040 }, { "epoch": 3.6332069020781024, "grad_norm": 2.2962143421173096, "learning_rate": 8.393638219846051e-07, "loss": 0.0431, "step": 340050 }, { "epoch": 3.633313745392382, "grad_norm": 5.217197895050049, "learning_rate": 8.393514834087732e-07, "loss": 0.0167, "step": 340060 }, { "epoch": 3.6334205887066617, "grad_norm": 0.017473280429840088, "learning_rate": 8.393391444497877e-07, "loss": 0.0173, "step": 340070 }, { "epoch": 3.633527432020941, "grad_norm": 0.2655792236328125, "learning_rate": 8.393268051076627e-07, "loss": 0.0006, "step": 340080 }, { "epoch": 3.633634275335221, "grad_norm": 0.4521788954734802, "learning_rate": 8.393144653824122e-07, "loss": 0.008, "step": 340090 }, { "epoch": 3.6337411186495006, "grad_norm": 0.03759729862213135, "learning_rate": 8.393021252740502e-07, "loss": 0.0134, "step": 340100 }, { "epoch": 3.63384796196378, "grad_norm": 5.599462032318115, "learning_rate": 8.392897847825904e-07, "loss": 0.0074, "step": 340110 }, { "epoch": 3.6339548052780595, "grad_norm": 0.6135151386260986, "learning_rate": 8.392774439080468e-07, "loss": 0.0025, "step": 340120 }, { "epoch": 3.6340616485923394, "grad_norm": 0.3659292459487915, "learning_rate": 8.392651026504333e-07, "loss": 0.0086, "step": 340130 }, { "epoch": 3.634168491906619, "grad_norm": 0.01146083977073431, "learning_rate": 8.39252761009764e-07, "loss": 0.0022, "step": 340140 }, { "epoch": 3.6342753352208987, "grad_norm": 2.5684139728546143, "learning_rate": 8.392404189860527e-07, "loss": 0.0192, "step": 340150 }, { "epoch": 3.634382178535178, "grad_norm": 0.32591021060943604, "learning_rate": 8.392280765793132e-07, "loss": 0.0052, "step": 340160 }, { "epoch": 3.6344890218494577, "grad_norm": 0.021223345771431923, "learning_rate": 8.392157337895598e-07, "loss": 0.0292, "step": 340170 }, { "epoch": 3.634595865163737, "grad_norm": 0.011238782666623592, "learning_rate": 8.392033906168061e-07, "loss": 0.0089, "step": 340180 }, { "epoch": 3.634702708478017, "grad_norm": 0.004217170644551516, "learning_rate": 8.391910470610664e-07, "loss": 0.0052, "step": 340190 }, { "epoch": 3.6348095517922965, "grad_norm": 6.893495559692383, "learning_rate": 8.391787031223542e-07, "loss": 0.0114, "step": 340200 }, { "epoch": 3.6349163951065764, "grad_norm": 0.008625268004834652, "learning_rate": 8.391663588006836e-07, "loss": 0.0034, "step": 340210 }, { "epoch": 3.635023238420856, "grad_norm": 0.5140833854675293, "learning_rate": 8.391540140960687e-07, "loss": 0.0011, "step": 340220 }, { "epoch": 3.6351300817351353, "grad_norm": 0.6142094135284424, "learning_rate": 8.391416690085234e-07, "loss": 0.0219, "step": 340230 }, { "epoch": 3.6352369250494148, "grad_norm": 0.0030265043023973703, "learning_rate": 8.391293235380614e-07, "loss": 0.0012, "step": 340240 }, { "epoch": 3.6353437683636947, "grad_norm": 10.993739128112793, "learning_rate": 8.391169776846969e-07, "loss": 0.0113, "step": 340250 }, { "epoch": 3.635450611677974, "grad_norm": 0.04397892951965332, "learning_rate": 8.391046314484437e-07, "loss": 0.0077, "step": 340260 }, { "epoch": 3.635557454992254, "grad_norm": 7.81154727935791, "learning_rate": 8.390922848293157e-07, "loss": 0.0222, "step": 340270 }, { "epoch": 3.6356642983065335, "grad_norm": 0.01274594385176897, "learning_rate": 8.39079937827327e-07, "loss": 0.0024, "step": 340280 }, { "epoch": 3.635771141620813, "grad_norm": 0.0018745771376416087, "learning_rate": 8.390675904424915e-07, "loss": 0.0144, "step": 340290 }, { "epoch": 3.635877984935093, "grad_norm": 1.7706621885299683, "learning_rate": 8.39055242674823e-07, "loss": 0.0026, "step": 340300 }, { "epoch": 3.6359848282493723, "grad_norm": 1.262930989265442, "learning_rate": 8.390428945243356e-07, "loss": 0.0118, "step": 340310 }, { "epoch": 3.6360916715636518, "grad_norm": 2.4164483547210693, "learning_rate": 8.390305459910431e-07, "loss": 0.0057, "step": 340320 }, { "epoch": 3.6361985148779317, "grad_norm": 2.903099298477173, "learning_rate": 8.390181970749596e-07, "loss": 0.0081, "step": 340330 }, { "epoch": 3.636305358192211, "grad_norm": 0.42636582255363464, "learning_rate": 8.390058477760991e-07, "loss": 0.0843, "step": 340340 }, { "epoch": 3.6364122015064906, "grad_norm": 3.604396104812622, "learning_rate": 8.389934980944753e-07, "loss": 0.0452, "step": 340350 }, { "epoch": 3.6365190448207705, "grad_norm": 0.0011320447083562613, "learning_rate": 8.389811480301022e-07, "loss": 0.0177, "step": 340360 }, { "epoch": 3.63662588813505, "grad_norm": 0.01967235468327999, "learning_rate": 8.389687975829939e-07, "loss": 0.0138, "step": 340370 }, { "epoch": 3.6367327314493294, "grad_norm": 0.05081312730908394, "learning_rate": 8.389564467531641e-07, "loss": 0.0267, "step": 340380 }, { "epoch": 3.6368395747636093, "grad_norm": 0.12347801774740219, "learning_rate": 8.38944095540627e-07, "loss": 0.0216, "step": 340390 }, { "epoch": 3.6369464180778888, "grad_norm": 0.0129883186891675, "learning_rate": 8.389317439453964e-07, "loss": 0.0356, "step": 340400 }, { "epoch": 3.6370532613921682, "grad_norm": 0.009837089106440544, "learning_rate": 8.389193919674863e-07, "loss": 0.0441, "step": 340410 }, { "epoch": 3.637160104706448, "grad_norm": 1.2571532726287842, "learning_rate": 8.389070396069107e-07, "loss": 0.0065, "step": 340420 }, { "epoch": 3.6372669480207276, "grad_norm": 0.0644238144159317, "learning_rate": 8.388946868636833e-07, "loss": 0.0082, "step": 340430 }, { "epoch": 3.6373737913350075, "grad_norm": 11.236175537109375, "learning_rate": 8.388823337378183e-07, "loss": 0.0531, "step": 340440 }, { "epoch": 3.637480634649287, "grad_norm": 0.3523910939693451, "learning_rate": 8.388699802293295e-07, "loss": 0.0045, "step": 340450 }, { "epoch": 3.6375874779635664, "grad_norm": 0.06659857928752899, "learning_rate": 8.388576263382311e-07, "loss": 0.0022, "step": 340460 }, { "epoch": 3.637694321277846, "grad_norm": 0.21294337511062622, "learning_rate": 8.388452720645367e-07, "loss": 0.0027, "step": 340470 }, { "epoch": 3.6378011645921258, "grad_norm": 0.054014842957258224, "learning_rate": 8.388329174082605e-07, "loss": 0.0035, "step": 340480 }, { "epoch": 3.6379080079064052, "grad_norm": 0.11807285249233246, "learning_rate": 8.388205623694164e-07, "loss": 0.0081, "step": 340490 }, { "epoch": 3.638014851220685, "grad_norm": 5.451697826385498, "learning_rate": 8.388082069480183e-07, "loss": 0.0191, "step": 340500 }, { "epoch": 3.6381216945349646, "grad_norm": 0.5882720947265625, "learning_rate": 8.3879585114408e-07, "loss": 0.0356, "step": 340510 }, { "epoch": 3.638228537849244, "grad_norm": 0.5189839601516724, "learning_rate": 8.387834949576158e-07, "loss": 0.0422, "step": 340520 }, { "epoch": 3.6383353811635235, "grad_norm": 1.3608460426330566, "learning_rate": 8.387711383886393e-07, "loss": 0.0191, "step": 340530 }, { "epoch": 3.6384422244778034, "grad_norm": 0.01717395707964897, "learning_rate": 8.387587814371648e-07, "loss": 0.0021, "step": 340540 }, { "epoch": 3.638549067792083, "grad_norm": 1.1054128408432007, "learning_rate": 8.387464241032058e-07, "loss": 0.0184, "step": 340550 }, { "epoch": 3.6386559111063628, "grad_norm": 4.043813705444336, "learning_rate": 8.387340663867768e-07, "loss": 0.017, "step": 340560 }, { "epoch": 3.6387627544206422, "grad_norm": 0.00665840320289135, "learning_rate": 8.387217082878914e-07, "loss": 0.0363, "step": 340570 }, { "epoch": 3.6388695977349217, "grad_norm": 0.947651207447052, "learning_rate": 8.387093498065636e-07, "loss": 0.0148, "step": 340580 }, { "epoch": 3.638976441049201, "grad_norm": 0.28090423345565796, "learning_rate": 8.386969909428074e-07, "loss": 0.0066, "step": 340590 }, { "epoch": 3.639083284363481, "grad_norm": 2.8482301235198975, "learning_rate": 8.386846316966367e-07, "loss": 0.0017, "step": 340600 }, { "epoch": 3.6391901276777605, "grad_norm": 0.062264639884233475, "learning_rate": 8.386722720680655e-07, "loss": 0.0025, "step": 340610 }, { "epoch": 3.6392969709920404, "grad_norm": 8.495593070983887, "learning_rate": 8.386599120571078e-07, "loss": 0.0898, "step": 340620 }, { "epoch": 3.63940381430632, "grad_norm": 0.2050941437482834, "learning_rate": 8.386475516637774e-07, "loss": 0.04, "step": 340630 }, { "epoch": 3.6395106576205993, "grad_norm": 0.004411446861922741, "learning_rate": 8.386351908880884e-07, "loss": 0.0114, "step": 340640 }, { "epoch": 3.639617500934879, "grad_norm": 1.8540704250335693, "learning_rate": 8.386228297300548e-07, "loss": 0.0116, "step": 340650 }, { "epoch": 3.6397243442491587, "grad_norm": 0.012943882495164871, "learning_rate": 8.386104681896903e-07, "loss": 0.0358, "step": 340660 }, { "epoch": 3.639831187563438, "grad_norm": 5.969166278839111, "learning_rate": 8.385981062670092e-07, "loss": 0.0019, "step": 340670 }, { "epoch": 3.639938030877718, "grad_norm": 10.560423851013184, "learning_rate": 8.385857439620251e-07, "loss": 0.0335, "step": 340680 }, { "epoch": 3.6400448741919975, "grad_norm": 1.9228371381759644, "learning_rate": 8.385733812747523e-07, "loss": 0.0108, "step": 340690 }, { "epoch": 3.640151717506277, "grad_norm": 0.0033480271231383085, "learning_rate": 8.385610182052045e-07, "loss": 0.0048, "step": 340700 }, { "epoch": 3.6402585608205564, "grad_norm": 1.1606022119522095, "learning_rate": 8.385486547533958e-07, "loss": 0.0143, "step": 340710 }, { "epoch": 3.6403654041348363, "grad_norm": 0.08090738207101822, "learning_rate": 8.3853629091934e-07, "loss": 0.019, "step": 340720 }, { "epoch": 3.640472247449116, "grad_norm": 0.036934562027454376, "learning_rate": 8.385239267030513e-07, "loss": 0.0169, "step": 340730 }, { "epoch": 3.6405790907633957, "grad_norm": 3.184091567993164, "learning_rate": 8.385115621045436e-07, "loss": 0.0041, "step": 340740 }, { "epoch": 3.640685934077675, "grad_norm": 7.200640678405762, "learning_rate": 8.384991971238307e-07, "loss": 0.0128, "step": 340750 }, { "epoch": 3.6407927773919546, "grad_norm": 3.3168365955352783, "learning_rate": 8.384868317609266e-07, "loss": 0.0154, "step": 340760 }, { "epoch": 3.640899620706234, "grad_norm": 0.10202427208423615, "learning_rate": 8.384744660158455e-07, "loss": 0.0224, "step": 340770 }, { "epoch": 3.641006464020514, "grad_norm": 0.007494180928915739, "learning_rate": 8.384620998886009e-07, "loss": 0.013, "step": 340780 }, { "epoch": 3.6411133073347934, "grad_norm": 0.4744216203689575, "learning_rate": 8.384497333792073e-07, "loss": 0.015, "step": 340790 }, { "epoch": 3.6412201506490733, "grad_norm": 0.03407908231019974, "learning_rate": 8.384373664876785e-07, "loss": 0.0092, "step": 340800 }, { "epoch": 3.641326993963353, "grad_norm": 0.09954903274774551, "learning_rate": 8.38424999214028e-07, "loss": 0.0069, "step": 340810 }, { "epoch": 3.6414338372776323, "grad_norm": 3.8808341026306152, "learning_rate": 8.384126315582705e-07, "loss": 0.0156, "step": 340820 }, { "epoch": 3.6415406805919117, "grad_norm": 5.25495719909668, "learning_rate": 8.384002635204194e-07, "loss": 0.0147, "step": 340830 }, { "epoch": 3.6416475239061916, "grad_norm": 0.0442587286233902, "learning_rate": 8.383878951004888e-07, "loss": 0.0316, "step": 340840 }, { "epoch": 3.641754367220471, "grad_norm": 0.09081369638442993, "learning_rate": 8.383755262984929e-07, "loss": 0.0013, "step": 340850 }, { "epoch": 3.641861210534751, "grad_norm": 0.014384896494448185, "learning_rate": 8.383631571144453e-07, "loss": 0.02, "step": 340860 }, { "epoch": 3.6419680538490304, "grad_norm": 0.03722106292843819, "learning_rate": 8.383507875483604e-07, "loss": 0.0044, "step": 340870 }, { "epoch": 3.64207489716331, "grad_norm": 6.963109970092773, "learning_rate": 8.383384176002518e-07, "loss": 0.0114, "step": 340880 }, { "epoch": 3.6421817404775894, "grad_norm": 0.6814204454421997, "learning_rate": 8.383260472701335e-07, "loss": 0.013, "step": 340890 }, { "epoch": 3.6422885837918693, "grad_norm": 1.9624673128128052, "learning_rate": 8.383136765580196e-07, "loss": 0.0275, "step": 340900 }, { "epoch": 3.6423954271061487, "grad_norm": 0.09062615036964417, "learning_rate": 8.383013054639241e-07, "loss": 0.0161, "step": 340910 }, { "epoch": 3.6425022704204286, "grad_norm": 0.03733517602086067, "learning_rate": 8.382889339878609e-07, "loss": 0.0184, "step": 340920 }, { "epoch": 3.642609113734708, "grad_norm": 0.02195853553712368, "learning_rate": 8.38276562129844e-07, "loss": 0.0119, "step": 340930 }, { "epoch": 3.6427159570489875, "grad_norm": 1.4948577880859375, "learning_rate": 8.382641898898872e-07, "loss": 0.0056, "step": 340940 }, { "epoch": 3.642822800363267, "grad_norm": 0.005985416937619448, "learning_rate": 8.382518172680046e-07, "loss": 0.0039, "step": 340950 }, { "epoch": 3.642929643677547, "grad_norm": 0.028686298057436943, "learning_rate": 8.382394442642102e-07, "loss": 0.0172, "step": 340960 }, { "epoch": 3.6430364869918264, "grad_norm": 0.04132157564163208, "learning_rate": 8.38227070878518e-07, "loss": 0.0087, "step": 340970 }, { "epoch": 3.6431433303061063, "grad_norm": 2.305236577987671, "learning_rate": 8.382146971109419e-07, "loss": 0.0431, "step": 340980 }, { "epoch": 3.6432501736203857, "grad_norm": 0.019407961517572403, "learning_rate": 8.382023229614958e-07, "loss": 0.0009, "step": 340990 }, { "epoch": 3.643357016934665, "grad_norm": 12.286003112792969, "learning_rate": 8.38189948430194e-07, "loss": 0.0126, "step": 341000 }, { "epoch": 3.643463860248945, "grad_norm": 0.6543135046958923, "learning_rate": 8.3817757351705e-07, "loss": 0.0026, "step": 341010 }, { "epoch": 3.6435707035632245, "grad_norm": 0.0023879047948867083, "learning_rate": 8.38165198222078e-07, "loss": 0.0073, "step": 341020 }, { "epoch": 3.643677546877504, "grad_norm": 4.358514308929443, "learning_rate": 8.381528225452921e-07, "loss": 0.0195, "step": 341030 }, { "epoch": 3.643784390191784, "grad_norm": 0.009145312942564487, "learning_rate": 8.381404464867059e-07, "loss": 0.0279, "step": 341040 }, { "epoch": 3.6438912335060634, "grad_norm": 0.0029861778020858765, "learning_rate": 8.381280700463338e-07, "loss": 0.013, "step": 341050 }, { "epoch": 3.643998076820343, "grad_norm": 0.011090991087257862, "learning_rate": 8.381156932241895e-07, "loss": 0.0036, "step": 341060 }, { "epoch": 3.6441049201346227, "grad_norm": 3.3669533729553223, "learning_rate": 8.381033160202871e-07, "loss": 0.0075, "step": 341070 }, { "epoch": 3.644211763448902, "grad_norm": 0.00680641969665885, "learning_rate": 8.380909384346405e-07, "loss": 0.0053, "step": 341080 }, { "epoch": 3.6443186067631816, "grad_norm": 0.004961931612342596, "learning_rate": 8.380785604672637e-07, "loss": 0.0042, "step": 341090 }, { "epoch": 3.6444254500774615, "grad_norm": 0.5235845446586609, "learning_rate": 8.380661821181708e-07, "loss": 0.0451, "step": 341100 }, { "epoch": 3.644532293391741, "grad_norm": 3.869060754776001, "learning_rate": 8.380538033873756e-07, "loss": 0.0281, "step": 341110 }, { "epoch": 3.6446391367060205, "grad_norm": 8.855062484741211, "learning_rate": 8.380414242748923e-07, "loss": 0.0559, "step": 341120 }, { "epoch": 3.6447459800203004, "grad_norm": 0.004898264538496733, "learning_rate": 8.380290447807346e-07, "loss": 0.0118, "step": 341130 }, { "epoch": 3.64485282333458, "grad_norm": 0.003426395356655121, "learning_rate": 8.380166649049165e-07, "loss": 0.0141, "step": 341140 }, { "epoch": 3.6449596666488593, "grad_norm": 0.35582971572875977, "learning_rate": 8.380042846474521e-07, "loss": 0.015, "step": 341150 }, { "epoch": 3.645066509963139, "grad_norm": 0.0065821693278849125, "learning_rate": 8.379919040083554e-07, "loss": 0.0014, "step": 341160 }, { "epoch": 3.6451733532774186, "grad_norm": 1.8695833683013916, "learning_rate": 8.379795229876403e-07, "loss": 0.01, "step": 341170 }, { "epoch": 3.645280196591698, "grad_norm": 0.02757352776825428, "learning_rate": 8.379671415853209e-07, "loss": 0.0375, "step": 341180 }, { "epoch": 3.645387039905978, "grad_norm": 4.927894592285156, "learning_rate": 8.37954759801411e-07, "loss": 0.018, "step": 341190 }, { "epoch": 3.6454938832202575, "grad_norm": 0.023855455219745636, "learning_rate": 8.379423776359246e-07, "loss": 0.0245, "step": 341200 }, { "epoch": 3.6456007265345374, "grad_norm": 2.422436475753784, "learning_rate": 8.37929995088876e-07, "loss": 0.0162, "step": 341210 }, { "epoch": 3.645707569848817, "grad_norm": 0.04558458551764488, "learning_rate": 8.379176121602786e-07, "loss": 0.0098, "step": 341220 }, { "epoch": 3.6458144131630963, "grad_norm": 0.04492544010281563, "learning_rate": 8.37905228850147e-07, "loss": 0.0043, "step": 341230 }, { "epoch": 3.6459212564773757, "grad_norm": 0.44149985909461975, "learning_rate": 8.378928451584948e-07, "loss": 0.0029, "step": 341240 }, { "epoch": 3.6460280997916557, "grad_norm": 2.6957526206970215, "learning_rate": 8.37880461085336e-07, "loss": 0.0037, "step": 341250 }, { "epoch": 3.646134943105935, "grad_norm": 0.007201774977147579, "learning_rate": 8.378680766306848e-07, "loss": 0.0153, "step": 341260 }, { "epoch": 3.646241786420215, "grad_norm": 0.003071101615205407, "learning_rate": 8.37855691794555e-07, "loss": 0.0084, "step": 341270 }, { "epoch": 3.6463486297344945, "grad_norm": 0.06241974979639053, "learning_rate": 8.378433065769605e-07, "loss": 0.0177, "step": 341280 }, { "epoch": 3.646455473048774, "grad_norm": 12.95348072052002, "learning_rate": 8.378309209779155e-07, "loss": 0.026, "step": 341290 }, { "epoch": 3.6465623163630534, "grad_norm": 0.007207252085208893, "learning_rate": 8.378185349974339e-07, "loss": 0.0006, "step": 341300 }, { "epoch": 3.6466691596773333, "grad_norm": 0.6120830178260803, "learning_rate": 8.378061486355297e-07, "loss": 0.0091, "step": 341310 }, { "epoch": 3.6467760029916128, "grad_norm": 0.02559070847928524, "learning_rate": 8.377937618922168e-07, "loss": 0.0095, "step": 341320 }, { "epoch": 3.6468828463058927, "grad_norm": 0.019092615693807602, "learning_rate": 8.377813747675093e-07, "loss": 0.0224, "step": 341330 }, { "epoch": 3.646989689620172, "grad_norm": 6.4131574630737305, "learning_rate": 8.37768987261421e-07, "loss": 0.0028, "step": 341340 }, { "epoch": 3.6470965329344516, "grad_norm": 0.58177649974823, "learning_rate": 8.377565993739662e-07, "loss": 0.0063, "step": 341350 }, { "epoch": 3.647203376248731, "grad_norm": 5.800156593322754, "learning_rate": 8.377442111051587e-07, "loss": 0.0044, "step": 341360 }, { "epoch": 3.647310219563011, "grad_norm": 0.03367970883846283, "learning_rate": 8.377318224550124e-07, "loss": 0.0045, "step": 341370 }, { "epoch": 3.6474170628772904, "grad_norm": 0.3231378495693207, "learning_rate": 8.377194334235414e-07, "loss": 0.0108, "step": 341380 }, { "epoch": 3.6475239061915703, "grad_norm": 1.440393328666687, "learning_rate": 8.377070440107596e-07, "loss": 0.0105, "step": 341390 }, { "epoch": 3.6476307495058498, "grad_norm": 0.002641792641952634, "learning_rate": 8.376946542166812e-07, "loss": 0.0099, "step": 341400 }, { "epoch": 3.647737592820129, "grad_norm": 0.003150417236611247, "learning_rate": 8.376822640413199e-07, "loss": 0.0084, "step": 341410 }, { "epoch": 3.6478444361344087, "grad_norm": 0.3014431893825531, "learning_rate": 8.376698734846901e-07, "loss": 0.0817, "step": 341420 }, { "epoch": 3.6479512794486886, "grad_norm": 2.043221950531006, "learning_rate": 8.376574825468054e-07, "loss": 0.0128, "step": 341430 }, { "epoch": 3.648058122762968, "grad_norm": 0.17007571458816528, "learning_rate": 8.376450912276798e-07, "loss": 0.0048, "step": 341440 }, { "epoch": 3.648164966077248, "grad_norm": 1.629915475845337, "learning_rate": 8.376326995273275e-07, "loss": 0.0178, "step": 341450 }, { "epoch": 3.6482718093915274, "grad_norm": 0.05224896967411041, "learning_rate": 8.376203074457624e-07, "loss": 0.0333, "step": 341460 }, { "epoch": 3.648378652705807, "grad_norm": 1.7705376148223877, "learning_rate": 8.376079149829984e-07, "loss": 0.0056, "step": 341470 }, { "epoch": 3.6484854960200863, "grad_norm": 0.11372113227844238, "learning_rate": 8.375955221390498e-07, "loss": 0.0225, "step": 341480 }, { "epoch": 3.648592339334366, "grad_norm": 0.004921318497508764, "learning_rate": 8.375831289139302e-07, "loss": 0.0041, "step": 341490 }, { "epoch": 3.6486991826486457, "grad_norm": 1.9344501495361328, "learning_rate": 8.375707353076538e-07, "loss": 0.0092, "step": 341500 }, { "epoch": 3.6488060259629256, "grad_norm": 0.38929346203804016, "learning_rate": 8.375583413202345e-07, "loss": 0.0224, "step": 341510 }, { "epoch": 3.648912869277205, "grad_norm": 4.402092456817627, "learning_rate": 8.375459469516863e-07, "loss": 0.0127, "step": 341520 }, { "epoch": 3.6490197125914845, "grad_norm": 1.025053858757019, "learning_rate": 8.375335522020235e-07, "loss": 0.0097, "step": 341530 }, { "epoch": 3.649126555905764, "grad_norm": 0.013529406860470772, "learning_rate": 8.375211570712597e-07, "loss": 0.0012, "step": 341540 }, { "epoch": 3.649233399220044, "grad_norm": 0.11753363907337189, "learning_rate": 8.375087615594091e-07, "loss": 0.0065, "step": 341550 }, { "epoch": 3.6493402425343233, "grad_norm": 0.04132862761616707, "learning_rate": 8.374963656664855e-07, "loss": 0.0204, "step": 341560 }, { "epoch": 3.6494470858486032, "grad_norm": 5.581796646118164, "learning_rate": 8.374839693925032e-07, "loss": 0.0178, "step": 341570 }, { "epoch": 3.6495539291628827, "grad_norm": 0.017621638253331184, "learning_rate": 8.374715727374759e-07, "loss": 0.0071, "step": 341580 }, { "epoch": 3.649660772477162, "grad_norm": 0.3845360279083252, "learning_rate": 8.374591757014178e-07, "loss": 0.0109, "step": 341590 }, { "epoch": 3.6497676157914416, "grad_norm": 0.03634076192975044, "learning_rate": 8.374467782843428e-07, "loss": 0.0007, "step": 341600 }, { "epoch": 3.6498744591057215, "grad_norm": 0.11533123254776001, "learning_rate": 8.374343804862649e-07, "loss": 0.0033, "step": 341610 }, { "epoch": 3.649981302420001, "grad_norm": 0.05578342080116272, "learning_rate": 8.374219823071981e-07, "loss": 0.0108, "step": 341620 }, { "epoch": 3.650088145734281, "grad_norm": 0.015593652613461018, "learning_rate": 8.374095837471566e-07, "loss": 0.0123, "step": 341630 }, { "epoch": 3.6501949890485603, "grad_norm": 3.2462403774261475, "learning_rate": 8.373971848061539e-07, "loss": 0.0276, "step": 341640 }, { "epoch": 3.65030183236284, "grad_norm": 0.10006088763475418, "learning_rate": 8.373847854842047e-07, "loss": 0.0033, "step": 341650 }, { "epoch": 3.6504086756771192, "grad_norm": 0.03948106989264488, "learning_rate": 8.373723857813223e-07, "loss": 0.0006, "step": 341660 }, { "epoch": 3.650515518991399, "grad_norm": 12.548115730285645, "learning_rate": 8.373599856975211e-07, "loss": 0.0457, "step": 341670 }, { "epoch": 3.6506223623056786, "grad_norm": 0.030125770717859268, "learning_rate": 8.37347585232815e-07, "loss": 0.0135, "step": 341680 }, { "epoch": 3.6507292056199585, "grad_norm": 0.7192778587341309, "learning_rate": 8.373351843872181e-07, "loss": 0.0221, "step": 341690 }, { "epoch": 3.650836048934238, "grad_norm": 3.365445613861084, "learning_rate": 8.373227831607442e-07, "loss": 0.0225, "step": 341700 }, { "epoch": 3.6509428922485174, "grad_norm": 5.158738613128662, "learning_rate": 8.373103815534075e-07, "loss": 0.0194, "step": 341710 }, { "epoch": 3.651049735562797, "grad_norm": 0.05034922808408737, "learning_rate": 8.372979795652219e-07, "loss": 0.0064, "step": 341720 }, { "epoch": 3.651156578877077, "grad_norm": 0.02056773751974106, "learning_rate": 8.372855771962013e-07, "loss": 0.0021, "step": 341730 }, { "epoch": 3.6512634221913562, "grad_norm": 0.032762560993433, "learning_rate": 8.3727317444636e-07, "loss": 0.0233, "step": 341740 }, { "epoch": 3.651370265505636, "grad_norm": 0.012154258787631989, "learning_rate": 8.372607713157118e-07, "loss": 0.0012, "step": 341750 }, { "epoch": 3.6514771088199156, "grad_norm": 0.0480661615729332, "learning_rate": 8.372483678042706e-07, "loss": 0.0009, "step": 341760 }, { "epoch": 3.651583952134195, "grad_norm": 0.007930770516395569, "learning_rate": 8.372359639120508e-07, "loss": 0.0166, "step": 341770 }, { "epoch": 3.651690795448475, "grad_norm": 9.525384902954102, "learning_rate": 8.372235596390658e-07, "loss": 0.032, "step": 341780 }, { "epoch": 3.6517976387627544, "grad_norm": 0.03169064596295357, "learning_rate": 8.372111549853303e-07, "loss": 0.0045, "step": 341790 }, { "epoch": 3.651904482077034, "grad_norm": 0.06819317489862442, "learning_rate": 8.371987499508578e-07, "loss": 0.006, "step": 341800 }, { "epoch": 3.652011325391314, "grad_norm": 0.010071676224470139, "learning_rate": 8.371863445356624e-07, "loss": 0.0015, "step": 341810 }, { "epoch": 3.6521181687055932, "grad_norm": 3.547678232192993, "learning_rate": 8.371739387397583e-07, "loss": 0.0031, "step": 341820 }, { "epoch": 3.6522250120198727, "grad_norm": 0.002483826596289873, "learning_rate": 8.371615325631594e-07, "loss": 0.0096, "step": 341830 }, { "epoch": 3.6523318553341526, "grad_norm": 0.0033425556030124426, "learning_rate": 8.371491260058794e-07, "loss": 0.0148, "step": 341840 }, { "epoch": 3.652438698648432, "grad_norm": 3.516113758087158, "learning_rate": 8.371367190679328e-07, "loss": 0.0198, "step": 341850 }, { "epoch": 3.6525455419627115, "grad_norm": 0.006522127892822027, "learning_rate": 8.371243117493334e-07, "loss": 0.0039, "step": 341860 }, { "epoch": 3.6526523852769914, "grad_norm": 0.002852779347449541, "learning_rate": 8.371119040500951e-07, "loss": 0.0031, "step": 341870 }, { "epoch": 3.652759228591271, "grad_norm": 1.8257229328155518, "learning_rate": 8.370994959702323e-07, "loss": 0.019, "step": 341880 }, { "epoch": 3.6528660719055503, "grad_norm": 0.11585841327905655, "learning_rate": 8.370870875097585e-07, "loss": 0.0038, "step": 341890 }, { "epoch": 3.6529729152198303, "grad_norm": 0.0026958934031426907, "learning_rate": 8.370746786686879e-07, "loss": 0.0339, "step": 341900 }, { "epoch": 3.6530797585341097, "grad_norm": 0.011945403181016445, "learning_rate": 8.370622694470346e-07, "loss": 0.0034, "step": 341910 }, { "epoch": 3.6531866018483896, "grad_norm": 3.9112420082092285, "learning_rate": 8.370498598448126e-07, "loss": 0.0205, "step": 341920 }, { "epoch": 3.653293445162669, "grad_norm": 0.45837849378585815, "learning_rate": 8.370374498620358e-07, "loss": 0.0259, "step": 341930 }, { "epoch": 3.6534002884769485, "grad_norm": 9.55901050567627, "learning_rate": 8.370250394987184e-07, "loss": 0.0095, "step": 341940 }, { "epoch": 3.653507131791228, "grad_norm": 0.0020764977671205997, "learning_rate": 8.370126287548742e-07, "loss": 0.0064, "step": 341950 }, { "epoch": 3.653613975105508, "grad_norm": 0.03601313754916191, "learning_rate": 8.370002176305173e-07, "loss": 0.0024, "step": 341960 }, { "epoch": 3.6537208184197874, "grad_norm": 0.09701098501682281, "learning_rate": 8.369878061256618e-07, "loss": 0.003, "step": 341970 }, { "epoch": 3.6538276617340673, "grad_norm": 0.8062975406646729, "learning_rate": 8.369753942403214e-07, "loss": 0.0073, "step": 341980 }, { "epoch": 3.6539345050483467, "grad_norm": 0.007834765128791332, "learning_rate": 8.369629819745107e-07, "loss": 0.0118, "step": 341990 }, { "epoch": 3.654041348362626, "grad_norm": 0.02337465062737465, "learning_rate": 8.369505693282432e-07, "loss": 0.0067, "step": 342000 }, { "epoch": 3.6541481916769056, "grad_norm": 1.527640700340271, "learning_rate": 8.369381563015329e-07, "loss": 0.0266, "step": 342010 }, { "epoch": 3.6542550349911855, "grad_norm": 0.13192161917686462, "learning_rate": 8.369257428943943e-07, "loss": 0.0102, "step": 342020 }, { "epoch": 3.654361878305465, "grad_norm": 0.05840706825256348, "learning_rate": 8.36913329106841e-07, "loss": 0.0516, "step": 342030 }, { "epoch": 3.654468721619745, "grad_norm": 0.7239733338356018, "learning_rate": 8.36900914938887e-07, "loss": 0.0142, "step": 342040 }, { "epoch": 3.6545755649340244, "grad_norm": 0.0824262723326683, "learning_rate": 8.368885003905465e-07, "loss": 0.0194, "step": 342050 }, { "epoch": 3.654682408248304, "grad_norm": 0.01818280667066574, "learning_rate": 8.368760854618335e-07, "loss": 0.0057, "step": 342060 }, { "epoch": 3.6547892515625833, "grad_norm": 0.23989157378673553, "learning_rate": 8.368636701527621e-07, "loss": 0.0058, "step": 342070 }, { "epoch": 3.654896094876863, "grad_norm": 0.026242652907967567, "learning_rate": 8.36851254463346e-07, "loss": 0.0129, "step": 342080 }, { "epoch": 3.6550029381911426, "grad_norm": 0.02743224985897541, "learning_rate": 8.368388383935994e-07, "loss": 0.0025, "step": 342090 }, { "epoch": 3.6551097815054225, "grad_norm": 0.007225122768431902, "learning_rate": 8.368264219435364e-07, "loss": 0.0123, "step": 342100 }, { "epoch": 3.655216624819702, "grad_norm": 0.26709529757499695, "learning_rate": 8.368140051131709e-07, "loss": 0.0155, "step": 342110 }, { "epoch": 3.6553234681339815, "grad_norm": 0.023542622104287148, "learning_rate": 8.36801587902517e-07, "loss": 0.0003, "step": 342120 }, { "epoch": 3.655430311448261, "grad_norm": 0.31515824794769287, "learning_rate": 8.367891703115888e-07, "loss": 0.0079, "step": 342130 }, { "epoch": 3.655537154762541, "grad_norm": 0.07369335740804672, "learning_rate": 8.367767523404001e-07, "loss": 0.0064, "step": 342140 }, { "epoch": 3.6556439980768203, "grad_norm": 2.122366428375244, "learning_rate": 8.367643339889649e-07, "loss": 0.0325, "step": 342150 }, { "epoch": 3.6557508413911, "grad_norm": 0.04551929980516434, "learning_rate": 8.367519152572975e-07, "loss": 0.0285, "step": 342160 }, { "epoch": 3.6558576847053796, "grad_norm": 4.05963134765625, "learning_rate": 8.367394961454117e-07, "loss": 0.0056, "step": 342170 }, { "epoch": 3.655964528019659, "grad_norm": 1.4013854265213013, "learning_rate": 8.367270766533217e-07, "loss": 0.0237, "step": 342180 }, { "epoch": 3.6560713713339386, "grad_norm": 0.16327127814292908, "learning_rate": 8.367146567810414e-07, "loss": 0.0005, "step": 342190 }, { "epoch": 3.6561782146482185, "grad_norm": 0.0034915863070636988, "learning_rate": 8.367022365285848e-07, "loss": 0.0127, "step": 342200 }, { "epoch": 3.656285057962498, "grad_norm": 0.0011338569456711411, "learning_rate": 8.366898158959659e-07, "loss": 0.0036, "step": 342210 }, { "epoch": 3.656391901276778, "grad_norm": 0.015162520110607147, "learning_rate": 8.366773948831988e-07, "loss": 0.0025, "step": 342220 }, { "epoch": 3.6564987445910573, "grad_norm": 4.3183393478393555, "learning_rate": 8.366649734902978e-07, "loss": 0.009, "step": 342230 }, { "epoch": 3.6566055879053367, "grad_norm": 0.00556392315775156, "learning_rate": 8.366525517172762e-07, "loss": 0.0142, "step": 342240 }, { "epoch": 3.656712431219616, "grad_norm": 3.15876841545105, "learning_rate": 8.366401295641488e-07, "loss": 0.0082, "step": 342250 }, { "epoch": 3.656819274533896, "grad_norm": 0.014414165169000626, "learning_rate": 8.366277070309291e-07, "loss": 0.0352, "step": 342260 }, { "epoch": 3.6569261178481756, "grad_norm": 2.5543668270111084, "learning_rate": 8.366152841176313e-07, "loss": 0.0091, "step": 342270 }, { "epoch": 3.6570329611624555, "grad_norm": 3.9875540733337402, "learning_rate": 8.366028608242695e-07, "loss": 0.0027, "step": 342280 }, { "epoch": 3.657139804476735, "grad_norm": 0.002045833272859454, "learning_rate": 8.365904371508576e-07, "loss": 0.0073, "step": 342290 }, { "epoch": 3.6572466477910144, "grad_norm": 0.5993421077728271, "learning_rate": 8.365780130974098e-07, "loss": 0.0163, "step": 342300 }, { "epoch": 3.657353491105294, "grad_norm": 0.0033901389688253403, "learning_rate": 8.3656558866394e-07, "loss": 0.0039, "step": 342310 }, { "epoch": 3.6574603344195737, "grad_norm": 0.007230243179947138, "learning_rate": 8.365531638504623e-07, "loss": 0.0025, "step": 342320 }, { "epoch": 3.657567177733853, "grad_norm": 1.504302740097046, "learning_rate": 8.365407386569904e-07, "loss": 0.0122, "step": 342330 }, { "epoch": 3.657674021048133, "grad_norm": 0.8424915671348572, "learning_rate": 8.36528313083539e-07, "loss": 0.0057, "step": 342340 }, { "epoch": 3.6577808643624126, "grad_norm": 0.002178249415010214, "learning_rate": 8.365158871301214e-07, "loss": 0.0003, "step": 342350 }, { "epoch": 3.657887707676692, "grad_norm": 0.4026258885860443, "learning_rate": 8.365034607967522e-07, "loss": 0.024, "step": 342360 }, { "epoch": 3.6579945509909715, "grad_norm": 2.9089102745056152, "learning_rate": 8.364910340834452e-07, "loss": 0.0144, "step": 342370 }, { "epoch": 3.6581013943052514, "grad_norm": 2.364802598953247, "learning_rate": 8.364786069902142e-07, "loss": 0.0082, "step": 342380 }, { "epoch": 3.658208237619531, "grad_norm": 0.052860140800476074, "learning_rate": 8.364661795170737e-07, "loss": 0.0243, "step": 342390 }, { "epoch": 3.6583150809338107, "grad_norm": 0.47782689332962036, "learning_rate": 8.364537516640373e-07, "loss": 0.0146, "step": 342400 }, { "epoch": 3.65842192424809, "grad_norm": 0.04410932585597038, "learning_rate": 8.364413234311193e-07, "loss": 0.01, "step": 342410 }, { "epoch": 3.6585287675623697, "grad_norm": 0.012548874132335186, "learning_rate": 8.364288948183337e-07, "loss": 0.0254, "step": 342420 }, { "epoch": 3.658635610876649, "grad_norm": 0.0020762085914611816, "learning_rate": 8.364164658256944e-07, "loss": 0.001, "step": 342430 }, { "epoch": 3.658742454190929, "grad_norm": 0.07915346324443817, "learning_rate": 8.364040364532155e-07, "loss": 0.0248, "step": 342440 }, { "epoch": 3.6588492975052085, "grad_norm": 0.08996864408254623, "learning_rate": 8.363916067009112e-07, "loss": 0.0107, "step": 342450 }, { "epoch": 3.6589561408194884, "grad_norm": 8.051448822021484, "learning_rate": 8.363791765687953e-07, "loss": 0.0107, "step": 342460 }, { "epoch": 3.659062984133768, "grad_norm": 6.040561676025391, "learning_rate": 8.36366746056882e-07, "loss": 0.0137, "step": 342470 }, { "epoch": 3.6591698274480473, "grad_norm": 1.3944331407546997, "learning_rate": 8.363543151651851e-07, "loss": 0.0151, "step": 342480 }, { "epoch": 3.659276670762327, "grad_norm": 2.15334153175354, "learning_rate": 8.363418838937188e-07, "loss": 0.0166, "step": 342490 }, { "epoch": 3.6593835140766067, "grad_norm": 0.014008336700499058, "learning_rate": 8.363294522424973e-07, "loss": 0.0021, "step": 342500 }, { "epoch": 3.659490357390886, "grad_norm": 0.03915291652083397, "learning_rate": 8.363170202115344e-07, "loss": 0.0068, "step": 342510 }, { "epoch": 3.659597200705166, "grad_norm": 2.7792060375213623, "learning_rate": 8.36304587800844e-07, "loss": 0.016, "step": 342520 }, { "epoch": 3.6597040440194455, "grad_norm": 0.011589801870286465, "learning_rate": 8.362921550104405e-07, "loss": 0.02, "step": 342530 }, { "epoch": 3.659810887333725, "grad_norm": 2.030161142349243, "learning_rate": 8.362797218403379e-07, "loss": 0.0092, "step": 342540 }, { "epoch": 3.659917730648005, "grad_norm": 0.3224301040172577, "learning_rate": 8.3626728829055e-07, "loss": 0.025, "step": 342550 }, { "epoch": 3.6600245739622843, "grad_norm": 0.796485185623169, "learning_rate": 8.362548543610909e-07, "loss": 0.0002, "step": 342560 }, { "epoch": 3.6601314172765638, "grad_norm": 0.0030221864581108093, "learning_rate": 8.362424200519748e-07, "loss": 0.0121, "step": 342570 }, { "epoch": 3.6602382605908437, "grad_norm": 0.06752574443817139, "learning_rate": 8.362299853632155e-07, "loss": 0.0356, "step": 342580 }, { "epoch": 3.660345103905123, "grad_norm": 3.2969255447387695, "learning_rate": 8.362175502948272e-07, "loss": 0.0261, "step": 342590 }, { "epoch": 3.6604519472194026, "grad_norm": 1.9319015741348267, "learning_rate": 8.36205114846824e-07, "loss": 0.0213, "step": 342600 }, { "epoch": 3.6605587905336825, "grad_norm": 0.013506605289876461, "learning_rate": 8.361926790192198e-07, "loss": 0.0304, "step": 342610 }, { "epoch": 3.660665633847962, "grad_norm": 4.696348667144775, "learning_rate": 8.361802428120287e-07, "loss": 0.0124, "step": 342620 }, { "epoch": 3.6607724771622414, "grad_norm": 0.04750688746571541, "learning_rate": 8.361678062252646e-07, "loss": 0.0057, "step": 342630 }, { "epoch": 3.6608793204765213, "grad_norm": 0.02405793033540249, "learning_rate": 8.361553692589418e-07, "loss": 0.039, "step": 342640 }, { "epoch": 3.6609861637908008, "grad_norm": 4.432891368865967, "learning_rate": 8.361429319130743e-07, "loss": 0.0128, "step": 342650 }, { "epoch": 3.6610930071050802, "grad_norm": 9.436429977416992, "learning_rate": 8.36130494187676e-07, "loss": 0.0429, "step": 342660 }, { "epoch": 3.66119985041936, "grad_norm": 0.004760122392326593, "learning_rate": 8.361180560827609e-07, "loss": 0.0042, "step": 342670 }, { "epoch": 3.6613066937336396, "grad_norm": 3.1735219955444336, "learning_rate": 8.361056175983432e-07, "loss": 0.0211, "step": 342680 }, { "epoch": 3.6614135370479195, "grad_norm": 5.129115104675293, "learning_rate": 8.360931787344369e-07, "loss": 0.0022, "step": 342690 }, { "epoch": 3.661520380362199, "grad_norm": 1.6234188079833984, "learning_rate": 8.360807394910561e-07, "loss": 0.0029, "step": 342700 }, { "epoch": 3.6616272236764784, "grad_norm": 1.968698263168335, "learning_rate": 8.360682998682147e-07, "loss": 0.0019, "step": 342710 }, { "epoch": 3.661734066990758, "grad_norm": 0.027353322133421898, "learning_rate": 8.360558598659269e-07, "loss": 0.0498, "step": 342720 }, { "epoch": 3.6618409103050378, "grad_norm": 0.35714787244796753, "learning_rate": 8.360434194842067e-07, "loss": 0.0203, "step": 342730 }, { "epoch": 3.6619477536193172, "grad_norm": 0.06135512515902519, "learning_rate": 8.360309787230679e-07, "loss": 0.0078, "step": 342740 }, { "epoch": 3.662054596933597, "grad_norm": 3.571366786956787, "learning_rate": 8.360185375825249e-07, "loss": 0.0085, "step": 342750 }, { "epoch": 3.6621614402478766, "grad_norm": 0.0050457245670259, "learning_rate": 8.360060960625916e-07, "loss": 0.0536, "step": 342760 }, { "epoch": 3.662268283562156, "grad_norm": 0.1568916290998459, "learning_rate": 8.359936541632822e-07, "loss": 0.0126, "step": 342770 }, { "epoch": 3.6623751268764355, "grad_norm": 0.003848253283649683, "learning_rate": 8.359812118846104e-07, "loss": 0.0529, "step": 342780 }, { "epoch": 3.6624819701907154, "grad_norm": 7.623063564300537, "learning_rate": 8.359687692265907e-07, "loss": 0.0284, "step": 342790 }, { "epoch": 3.662588813504995, "grad_norm": 1.325465440750122, "learning_rate": 8.359563261892366e-07, "loss": 0.0149, "step": 342800 }, { "epoch": 3.662695656819275, "grad_norm": 2.133419990539551, "learning_rate": 8.359438827725627e-07, "loss": 0.0033, "step": 342810 }, { "epoch": 3.6628025001335542, "grad_norm": 2.2369391918182373, "learning_rate": 8.359314389765825e-07, "loss": 0.0103, "step": 342820 }, { "epoch": 3.6629093434478337, "grad_norm": 0.9982377290725708, "learning_rate": 8.359189948013106e-07, "loss": 0.0219, "step": 342830 }, { "epoch": 3.663016186762113, "grad_norm": 3.483652353286743, "learning_rate": 8.359065502467609e-07, "loss": 0.0142, "step": 342840 }, { "epoch": 3.663123030076393, "grad_norm": 1.801913857460022, "learning_rate": 8.358941053129471e-07, "loss": 0.0068, "step": 342850 }, { "epoch": 3.6632298733906725, "grad_norm": 3.7046000957489014, "learning_rate": 8.358816599998837e-07, "loss": 0.0133, "step": 342860 }, { "epoch": 3.6633367167049524, "grad_norm": 1.6302516460418701, "learning_rate": 8.358692143075843e-07, "loss": 0.0093, "step": 342870 }, { "epoch": 3.663443560019232, "grad_norm": 0.0398922860622406, "learning_rate": 8.358567682360634e-07, "loss": 0.0028, "step": 342880 }, { "epoch": 3.6635504033335113, "grad_norm": 0.0027921064756810665, "learning_rate": 8.358443217853348e-07, "loss": 0.019, "step": 342890 }, { "epoch": 3.663657246647791, "grad_norm": 0.003555615432560444, "learning_rate": 8.358318749554127e-07, "loss": 0.0245, "step": 342900 }, { "epoch": 3.6637640899620707, "grad_norm": 0.19530723989009857, "learning_rate": 8.358194277463111e-07, "loss": 0.0016, "step": 342910 }, { "epoch": 3.66387093327635, "grad_norm": 0.012600393034517765, "learning_rate": 8.358069801580439e-07, "loss": 0.0004, "step": 342920 }, { "epoch": 3.66397777659063, "grad_norm": 0.3067203462123871, "learning_rate": 8.357945321906253e-07, "loss": 0.0317, "step": 342930 }, { "epoch": 3.6640846199049095, "grad_norm": 0.9998226761817932, "learning_rate": 8.357820838440692e-07, "loss": 0.0063, "step": 342940 }, { "epoch": 3.664191463219189, "grad_norm": 0.012871540151536465, "learning_rate": 8.3576963511839e-07, "loss": 0.0122, "step": 342950 }, { "epoch": 3.6642983065334684, "grad_norm": 0.017405997961759567, "learning_rate": 8.357571860136015e-07, "loss": 0.0343, "step": 342960 }, { "epoch": 3.6644051498477483, "grad_norm": 0.017619440332055092, "learning_rate": 8.357447365297176e-07, "loss": 0.0077, "step": 342970 }, { "epoch": 3.664511993162028, "grad_norm": 0.01336046401411295, "learning_rate": 8.357322866667527e-07, "loss": 0.0156, "step": 342980 }, { "epoch": 3.6646188364763077, "grad_norm": 0.003557658288627863, "learning_rate": 8.357198364247208e-07, "loss": 0.0062, "step": 342990 }, { "epoch": 3.664725679790587, "grad_norm": 0.003938488662242889, "learning_rate": 8.357073858036359e-07, "loss": 0.0359, "step": 343000 }, { "epoch": 3.6648325231048666, "grad_norm": 0.017493708059191704, "learning_rate": 8.356949348035118e-07, "loss": 0.026, "step": 343010 }, { "epoch": 3.664939366419146, "grad_norm": 0.9784406423568726, "learning_rate": 8.356824834243629e-07, "loss": 0.0033, "step": 343020 }, { "epoch": 3.665046209733426, "grad_norm": 0.04167257994413376, "learning_rate": 8.356700316662031e-07, "loss": 0.0164, "step": 343030 }, { "epoch": 3.6651530530477054, "grad_norm": 0.0032807686366140842, "learning_rate": 8.356575795290466e-07, "loss": 0.0116, "step": 343040 }, { "epoch": 3.6652598963619853, "grad_norm": 1.0065710544586182, "learning_rate": 8.356451270129072e-07, "loss": 0.0008, "step": 343050 }, { "epoch": 3.665366739676265, "grad_norm": 0.06921188533306122, "learning_rate": 8.356326741177992e-07, "loss": 0.0238, "step": 343060 }, { "epoch": 3.6654735829905443, "grad_norm": 1.9351270198822021, "learning_rate": 8.356202208437366e-07, "loss": 0.0074, "step": 343070 }, { "epoch": 3.6655804263048237, "grad_norm": 1.45183527469635, "learning_rate": 8.356077671907334e-07, "loss": 0.0081, "step": 343080 }, { "epoch": 3.6656872696191036, "grad_norm": 0.404710054397583, "learning_rate": 8.355953131588038e-07, "loss": 0.0328, "step": 343090 }, { "epoch": 3.665794112933383, "grad_norm": 6.4260430335998535, "learning_rate": 8.355828587479616e-07, "loss": 0.0083, "step": 343100 }, { "epoch": 3.665900956247663, "grad_norm": 0.09893689304590225, "learning_rate": 8.355704039582212e-07, "loss": 0.006, "step": 343110 }, { "epoch": 3.6660077995619424, "grad_norm": 7.461308002471924, "learning_rate": 8.355579487895963e-07, "loss": 0.0102, "step": 343120 }, { "epoch": 3.666114642876222, "grad_norm": 0.02355004847049713, "learning_rate": 8.355454932421013e-07, "loss": 0.0143, "step": 343130 }, { "epoch": 3.6662214861905014, "grad_norm": 0.11818081140518188, "learning_rate": 8.355330373157501e-07, "loss": 0.0286, "step": 343140 }, { "epoch": 3.6663283295047813, "grad_norm": 1.5409456491470337, "learning_rate": 8.355205810105568e-07, "loss": 0.0215, "step": 343150 }, { "epoch": 3.6664351728190607, "grad_norm": 0.13782554864883423, "learning_rate": 8.355081243265353e-07, "loss": 0.0035, "step": 343160 }, { "epoch": 3.6665420161333406, "grad_norm": 0.7241187691688538, "learning_rate": 8.354956672636999e-07, "loss": 0.0052, "step": 343170 }, { "epoch": 3.66664885944762, "grad_norm": 12.969758033752441, "learning_rate": 8.354832098220646e-07, "loss": 0.0171, "step": 343180 }, { "epoch": 3.6667557027618995, "grad_norm": 0.017064571380615234, "learning_rate": 8.354707520016434e-07, "loss": 0.0012, "step": 343190 }, { "epoch": 3.666862546076179, "grad_norm": 0.7342170476913452, "learning_rate": 8.354582938024503e-07, "loss": 0.0033, "step": 343200 }, { "epoch": 3.666969389390459, "grad_norm": 4.947066783905029, "learning_rate": 8.354458352244996e-07, "loss": 0.005, "step": 343210 }, { "epoch": 3.6670762327047384, "grad_norm": 2.477933883666992, "learning_rate": 8.354333762678053e-07, "loss": 0.0128, "step": 343220 }, { "epoch": 3.6671830760190183, "grad_norm": 0.004449286963790655, "learning_rate": 8.354209169323814e-07, "loss": 0.0405, "step": 343230 }, { "epoch": 3.6672899193332977, "grad_norm": 0.1995815634727478, "learning_rate": 8.354084572182417e-07, "loss": 0.0023, "step": 343240 }, { "epoch": 3.667396762647577, "grad_norm": 0.022616935893893242, "learning_rate": 8.353959971254008e-07, "loss": 0.0029, "step": 343250 }, { "epoch": 3.667503605961857, "grad_norm": 1.65730881690979, "learning_rate": 8.353835366538725e-07, "loss": 0.0147, "step": 343260 }, { "epoch": 3.6676104492761366, "grad_norm": 1.5044811964035034, "learning_rate": 8.353710758036708e-07, "loss": 0.039, "step": 343270 }, { "epoch": 3.667717292590416, "grad_norm": 0.0028057547751814127, "learning_rate": 8.353586145748097e-07, "loss": 0.003, "step": 343280 }, { "epoch": 3.667824135904696, "grad_norm": 0.09819445759057999, "learning_rate": 8.353461529673036e-07, "loss": 0.003, "step": 343290 }, { "epoch": 3.6679309792189754, "grad_norm": 2.7276744842529297, "learning_rate": 8.353336909811662e-07, "loss": 0.0054, "step": 343300 }, { "epoch": 3.668037822533255, "grad_norm": 0.001262002857401967, "learning_rate": 8.353212286164119e-07, "loss": 0.0039, "step": 343310 }, { "epoch": 3.6681446658475347, "grad_norm": 0.07793885469436646, "learning_rate": 8.353087658730546e-07, "loss": 0.0003, "step": 343320 }, { "epoch": 3.668251509161814, "grad_norm": 0.0068117533810436726, "learning_rate": 8.352963027511084e-07, "loss": 0.0101, "step": 343330 }, { "epoch": 3.6683583524760937, "grad_norm": 0.016726389527320862, "learning_rate": 8.352838392505874e-07, "loss": 0.0098, "step": 343340 }, { "epoch": 3.6684651957903736, "grad_norm": 0.002196388551965356, "learning_rate": 8.352713753715057e-07, "loss": 0.0046, "step": 343350 }, { "epoch": 3.668572039104653, "grad_norm": 0.6877710223197937, "learning_rate": 8.352589111138772e-07, "loss": 0.0024, "step": 343360 }, { "epoch": 3.6686788824189325, "grad_norm": 5.940556526184082, "learning_rate": 8.352464464777161e-07, "loss": 0.0067, "step": 343370 }, { "epoch": 3.6687857257332124, "grad_norm": 0.07739396393299103, "learning_rate": 8.352339814630365e-07, "loss": 0.0201, "step": 343380 }, { "epoch": 3.668892569047492, "grad_norm": 0.2575114071369171, "learning_rate": 8.352215160698523e-07, "loss": 0.0024, "step": 343390 }, { "epoch": 3.6689994123617717, "grad_norm": 0.039986930787563324, "learning_rate": 8.352090502981779e-07, "loss": 0.0139, "step": 343400 }, { "epoch": 3.669106255676051, "grad_norm": 5.216710090637207, "learning_rate": 8.351965841480271e-07, "loss": 0.0277, "step": 343410 }, { "epoch": 3.6692130989903307, "grad_norm": 0.005568200256675482, "learning_rate": 8.351841176194141e-07, "loss": 0.0014, "step": 343420 }, { "epoch": 3.66931994230461, "grad_norm": 0.8482977151870728, "learning_rate": 8.351716507123529e-07, "loss": 0.0793, "step": 343430 }, { "epoch": 3.66942678561889, "grad_norm": 0.007893720641732216, "learning_rate": 8.351591834268576e-07, "loss": 0.0028, "step": 343440 }, { "epoch": 3.6695336289331695, "grad_norm": 0.0034546132665127516, "learning_rate": 8.351467157629423e-07, "loss": 0.0148, "step": 343450 }, { "epoch": 3.6696404722474494, "grad_norm": 1.4787752628326416, "learning_rate": 8.351342477206211e-07, "loss": 0.009, "step": 343460 }, { "epoch": 3.669747315561729, "grad_norm": 0.016401972621679306, "learning_rate": 8.351217792999079e-07, "loss": 0.0063, "step": 343470 }, { "epoch": 3.6698541588760083, "grad_norm": 0.30046769976615906, "learning_rate": 8.351093105008171e-07, "loss": 0.0077, "step": 343480 }, { "epoch": 3.6699610021902878, "grad_norm": 2.3073806762695312, "learning_rate": 8.350968413233624e-07, "loss": 0.0172, "step": 343490 }, { "epoch": 3.6700678455045677, "grad_norm": 0.002971240784972906, "learning_rate": 8.350843717675582e-07, "loss": 0.0077, "step": 343500 }, { "epoch": 3.670174688818847, "grad_norm": 0.3369274437427521, "learning_rate": 8.350719018334185e-07, "loss": 0.0057, "step": 343510 }, { "epoch": 3.670281532133127, "grad_norm": 0.846924364566803, "learning_rate": 8.350594315209572e-07, "loss": 0.0098, "step": 343520 }, { "epoch": 3.6703883754474065, "grad_norm": 2.5934150218963623, "learning_rate": 8.350469608301887e-07, "loss": 0.0087, "step": 343530 }, { "epoch": 3.670495218761686, "grad_norm": 3.918180227279663, "learning_rate": 8.350344897611268e-07, "loss": 0.0112, "step": 343540 }, { "epoch": 3.6706020620759654, "grad_norm": 0.038921553641557693, "learning_rate": 8.350220183137855e-07, "loss": 0.0027, "step": 343550 }, { "epoch": 3.6707089053902453, "grad_norm": 0.5782128572463989, "learning_rate": 8.350095464881792e-07, "loss": 0.0082, "step": 343560 }, { "epoch": 3.6708157487045248, "grad_norm": 0.3969770669937134, "learning_rate": 8.34997074284322e-07, "loss": 0.0135, "step": 343570 }, { "epoch": 3.6709225920188047, "grad_norm": 0.04987742006778717, "learning_rate": 8.349846017022275e-07, "loss": 0.0064, "step": 343580 }, { "epoch": 3.671029435333084, "grad_norm": 1.6210191249847412, "learning_rate": 8.349721287419103e-07, "loss": 0.0039, "step": 343590 }, { "epoch": 3.6711362786473636, "grad_norm": 3.9865500926971436, "learning_rate": 8.349596554033843e-07, "loss": 0.0167, "step": 343600 }, { "epoch": 3.671243121961643, "grad_norm": 0.03130808472633362, "learning_rate": 8.349471816866636e-07, "loss": 0.0005, "step": 343610 }, { "epoch": 3.671349965275923, "grad_norm": 0.1567351371049881, "learning_rate": 8.349347075917622e-07, "loss": 0.0152, "step": 343620 }, { "epoch": 3.6714568085902024, "grad_norm": 0.024128641933202744, "learning_rate": 8.349222331186941e-07, "loss": 0.0376, "step": 343630 }, { "epoch": 3.6715636519044823, "grad_norm": 0.5237770676612854, "learning_rate": 8.349097582674736e-07, "loss": 0.0262, "step": 343640 }, { "epoch": 3.6716704952187618, "grad_norm": 3.5556700229644775, "learning_rate": 8.348972830381148e-07, "loss": 0.0102, "step": 343650 }, { "epoch": 3.6717773385330412, "grad_norm": 7.289637088775635, "learning_rate": 8.348848074306316e-07, "loss": 0.0041, "step": 343660 }, { "epoch": 3.6718841818473207, "grad_norm": 0.9993907809257507, "learning_rate": 8.348723314450383e-07, "loss": 0.0044, "step": 343670 }, { "epoch": 3.6719910251616006, "grad_norm": 0.06281479448080063, "learning_rate": 8.348598550813487e-07, "loss": 0.0161, "step": 343680 }, { "epoch": 3.67209786847588, "grad_norm": 2.1024584770202637, "learning_rate": 8.34847378339577e-07, "loss": 0.0185, "step": 343690 }, { "epoch": 3.67220471179016, "grad_norm": 0.02709708921611309, "learning_rate": 8.348349012197375e-07, "loss": 0.002, "step": 343700 }, { "epoch": 3.6723115551044394, "grad_norm": 9.19767951965332, "learning_rate": 8.348224237218442e-07, "loss": 0.0095, "step": 343710 }, { "epoch": 3.672418398418719, "grad_norm": 0.024800898507237434, "learning_rate": 8.34809945845911e-07, "loss": 0.0368, "step": 343720 }, { "epoch": 3.6725252417329983, "grad_norm": 0.004711268004029989, "learning_rate": 8.34797467591952e-07, "loss": 0.0168, "step": 343730 }, { "epoch": 3.6726320850472782, "grad_norm": 0.10966697335243225, "learning_rate": 8.347849889599816e-07, "loss": 0.0535, "step": 343740 }, { "epoch": 3.6727389283615577, "grad_norm": 0.011129233054816723, "learning_rate": 8.347725099500136e-07, "loss": 0.0083, "step": 343750 }, { "epoch": 3.6728457716758376, "grad_norm": 4.676461219787598, "learning_rate": 8.347600305620622e-07, "loss": 0.0142, "step": 343760 }, { "epoch": 3.672952614990117, "grad_norm": 0.07931400835514069, "learning_rate": 8.347475507961412e-07, "loss": 0.0055, "step": 343770 }, { "epoch": 3.6730594583043965, "grad_norm": 13.656905174255371, "learning_rate": 8.347350706522651e-07, "loss": 0.027, "step": 343780 }, { "epoch": 3.673166301618676, "grad_norm": 0.0607827864587307, "learning_rate": 8.34722590130448e-07, "loss": 0.0244, "step": 343790 }, { "epoch": 3.673273144932956, "grad_norm": 0.2731688916683197, "learning_rate": 8.347101092307039e-07, "loss": 0.0267, "step": 343800 }, { "epoch": 3.6733799882472353, "grad_norm": 2.668888568878174, "learning_rate": 8.346976279530464e-07, "loss": 0.007, "step": 343810 }, { "epoch": 3.6734868315615152, "grad_norm": 1.0354480743408203, "learning_rate": 8.346851462974904e-07, "loss": 0.0244, "step": 343820 }, { "epoch": 3.6735936748757947, "grad_norm": 2.1496355533599854, "learning_rate": 8.346726642640495e-07, "loss": 0.0129, "step": 343830 }, { "epoch": 3.673700518190074, "grad_norm": 0.016757706180214882, "learning_rate": 8.346601818527378e-07, "loss": 0.0018, "step": 343840 }, { "epoch": 3.6738073615043536, "grad_norm": 0.00457478454336524, "learning_rate": 8.346476990635695e-07, "loss": 0.0094, "step": 343850 }, { "epoch": 3.6739142048186335, "grad_norm": 0.022585811093449593, "learning_rate": 8.346352158965588e-07, "loss": 0.0039, "step": 343860 }, { "epoch": 3.674021048132913, "grad_norm": 0.1342032551765442, "learning_rate": 8.346227323517196e-07, "loss": 0.0268, "step": 343870 }, { "epoch": 3.674127891447193, "grad_norm": 0.6983518004417419, "learning_rate": 8.346102484290663e-07, "loss": 0.0053, "step": 343880 }, { "epoch": 3.6742347347614723, "grad_norm": 0.07788430154323578, "learning_rate": 8.345977641286126e-07, "loss": 0.0173, "step": 343890 }, { "epoch": 3.674341578075752, "grad_norm": 0.07528499513864517, "learning_rate": 8.345852794503727e-07, "loss": 0.0115, "step": 343900 }, { "epoch": 3.6744484213900312, "grad_norm": 0.04209647700190544, "learning_rate": 8.345727943943609e-07, "loss": 0.0243, "step": 343910 }, { "epoch": 3.674555264704311, "grad_norm": 4.480329990386963, "learning_rate": 8.345603089605911e-07, "loss": 0.0174, "step": 343920 }, { "epoch": 3.6746621080185906, "grad_norm": 0.29009950160980225, "learning_rate": 8.345478231490774e-07, "loss": 0.0079, "step": 343930 }, { "epoch": 3.6747689513328705, "grad_norm": 2.9432621002197266, "learning_rate": 8.345353369598341e-07, "loss": 0.0102, "step": 343940 }, { "epoch": 3.67487579464715, "grad_norm": 0.4197695553302765, "learning_rate": 8.345228503928751e-07, "loss": 0.0021, "step": 343950 }, { "epoch": 3.6749826379614294, "grad_norm": 0.1245364099740982, "learning_rate": 8.345103634482146e-07, "loss": 0.0079, "step": 343960 }, { "epoch": 3.6750894812757093, "grad_norm": 2.3273913860321045, "learning_rate": 8.344978761258665e-07, "loss": 0.0037, "step": 343970 }, { "epoch": 3.675196324589989, "grad_norm": 0.03676379844546318, "learning_rate": 8.344853884258451e-07, "loss": 0.009, "step": 343980 }, { "epoch": 3.6753031679042683, "grad_norm": 0.037668559700250626, "learning_rate": 8.344729003481645e-07, "loss": 0.0057, "step": 343990 }, { "epoch": 3.675410011218548, "grad_norm": 0.029292549937963486, "learning_rate": 8.344604118928389e-07, "loss": 0.0236, "step": 344000 }, { "epoch": 3.6755168545328276, "grad_norm": 0.0852959156036377, "learning_rate": 8.344479230598819e-07, "loss": 0.0138, "step": 344010 }, { "epoch": 3.675623697847107, "grad_norm": 3.814567804336548, "learning_rate": 8.344354338493083e-07, "loss": 0.0202, "step": 344020 }, { "epoch": 3.675730541161387, "grad_norm": 0.0942826122045517, "learning_rate": 8.344229442611317e-07, "loss": 0.0178, "step": 344030 }, { "epoch": 3.6758373844756664, "grad_norm": 1.5324933528900146, "learning_rate": 8.344104542953663e-07, "loss": 0.0147, "step": 344040 }, { "epoch": 3.675944227789946, "grad_norm": 0.135468527674675, "learning_rate": 8.343979639520264e-07, "loss": 0.0266, "step": 344050 }, { "epoch": 3.676051071104226, "grad_norm": 0.5174995064735413, "learning_rate": 8.343854732311259e-07, "loss": 0.0144, "step": 344060 }, { "epoch": 3.6761579144185053, "grad_norm": 0.044881585985422134, "learning_rate": 8.34372982132679e-07, "loss": 0.0327, "step": 344070 }, { "epoch": 3.6762647577327847, "grad_norm": 0.029984651133418083, "learning_rate": 8.343604906566997e-07, "loss": 0.0189, "step": 344080 }, { "epoch": 3.6763716010470646, "grad_norm": 0.005362553987652063, "learning_rate": 8.343479988032021e-07, "loss": 0.0171, "step": 344090 }, { "epoch": 3.676478444361344, "grad_norm": 0.004887613467872143, "learning_rate": 8.343355065722005e-07, "loss": 0.0108, "step": 344100 }, { "epoch": 3.6765852876756235, "grad_norm": 6.982076168060303, "learning_rate": 8.343230139637088e-07, "loss": 0.0081, "step": 344110 }, { "epoch": 3.6766921309899034, "grad_norm": 0.021934520453214645, "learning_rate": 8.343105209777412e-07, "loss": 0.0235, "step": 344120 }, { "epoch": 3.676798974304183, "grad_norm": 0.04840751737356186, "learning_rate": 8.342980276143119e-07, "loss": 0.038, "step": 344130 }, { "epoch": 3.6769058176184624, "grad_norm": 0.7142816185951233, "learning_rate": 8.342855338734348e-07, "loss": 0.0596, "step": 344140 }, { "epoch": 3.6770126609327423, "grad_norm": 0.5174864530563354, "learning_rate": 8.342730397551241e-07, "loss": 0.0216, "step": 344150 }, { "epoch": 3.6771195042470217, "grad_norm": 0.9465133547782898, "learning_rate": 8.342605452593939e-07, "loss": 0.0328, "step": 344160 }, { "epoch": 3.6772263475613016, "grad_norm": 2.344930410385132, "learning_rate": 8.342480503862584e-07, "loss": 0.0315, "step": 344170 }, { "epoch": 3.677333190875581, "grad_norm": 0.04803807660937309, "learning_rate": 8.342355551357315e-07, "loss": 0.0231, "step": 344180 }, { "epoch": 3.6774400341898605, "grad_norm": 0.13865041732788086, "learning_rate": 8.342230595078276e-07, "loss": 0.0128, "step": 344190 }, { "epoch": 3.67754687750414, "grad_norm": 0.027197424322366714, "learning_rate": 8.342105635025605e-07, "loss": 0.0064, "step": 344200 }, { "epoch": 3.67765372081842, "grad_norm": 0.012062394991517067, "learning_rate": 8.341980671199445e-07, "loss": 0.0225, "step": 344210 }, { "epoch": 3.6777605641326994, "grad_norm": 0.5709183812141418, "learning_rate": 8.341855703599936e-07, "loss": 0.0168, "step": 344220 }, { "epoch": 3.6778674074469793, "grad_norm": 0.2027408331632614, "learning_rate": 8.341730732227221e-07, "loss": 0.0045, "step": 344230 }, { "epoch": 3.6779742507612587, "grad_norm": 0.007647581398487091, "learning_rate": 8.341605757081438e-07, "loss": 0.0048, "step": 344240 }, { "epoch": 3.678081094075538, "grad_norm": 5.193947792053223, "learning_rate": 8.34148077816273e-07, "loss": 0.0043, "step": 344250 }, { "epoch": 3.6781879373898176, "grad_norm": 9.931188583374023, "learning_rate": 8.341355795471239e-07, "loss": 0.0431, "step": 344260 }, { "epoch": 3.6782947807040975, "grad_norm": 0.19191038608551025, "learning_rate": 8.341230809007104e-07, "loss": 0.0129, "step": 344270 }, { "epoch": 3.678401624018377, "grad_norm": 0.0968860387802124, "learning_rate": 8.341105818770471e-07, "loss": 0.017, "step": 344280 }, { "epoch": 3.678508467332657, "grad_norm": 0.05182356759905815, "learning_rate": 8.340980824761473e-07, "loss": 0.0036, "step": 344290 }, { "epoch": 3.6786153106469364, "grad_norm": 1.043696641921997, "learning_rate": 8.340855826980258e-07, "loss": 0.0141, "step": 344300 }, { "epoch": 3.678722153961216, "grad_norm": 0.31976744532585144, "learning_rate": 8.340730825426964e-07, "loss": 0.0201, "step": 344310 }, { "epoch": 3.6788289972754953, "grad_norm": 0.025611575692892075, "learning_rate": 8.340605820101731e-07, "loss": 0.0096, "step": 344320 }, { "epoch": 3.678935840589775, "grad_norm": 0.0050929831340909, "learning_rate": 8.340480811004704e-07, "loss": 0.0077, "step": 344330 }, { "epoch": 3.6790426839040546, "grad_norm": 2.9883480072021484, "learning_rate": 8.340355798136022e-07, "loss": 0.0051, "step": 344340 }, { "epoch": 3.6791495272183345, "grad_norm": 0.05530143529176712, "learning_rate": 8.340230781495825e-07, "loss": 0.0041, "step": 344350 }, { "epoch": 3.679256370532614, "grad_norm": 0.8393409848213196, "learning_rate": 8.340105761084256e-07, "loss": 0.0082, "step": 344360 }, { "epoch": 3.6793632138468935, "grad_norm": 0.0020727012306451797, "learning_rate": 8.339980736901455e-07, "loss": 0.0026, "step": 344370 }, { "epoch": 3.679470057161173, "grad_norm": 1.7749054431915283, "learning_rate": 8.339855708947565e-07, "loss": 0.0235, "step": 344380 }, { "epoch": 3.679576900475453, "grad_norm": 0.18422003090381622, "learning_rate": 8.339730677222725e-07, "loss": 0.0087, "step": 344390 }, { "epoch": 3.6796837437897323, "grad_norm": 0.3369273543357849, "learning_rate": 8.339605641727077e-07, "loss": 0.0019, "step": 344400 }, { "epoch": 3.679790587104012, "grad_norm": 0.025194112211465836, "learning_rate": 8.339480602460761e-07, "loss": 0.0133, "step": 344410 }, { "epoch": 3.6798974304182916, "grad_norm": 0.15845449268817902, "learning_rate": 8.339355559423922e-07, "loss": 0.0078, "step": 344420 }, { "epoch": 3.680004273732571, "grad_norm": 0.9545953273773193, "learning_rate": 8.339230512616697e-07, "loss": 0.0038, "step": 344430 }, { "epoch": 3.6801111170468506, "grad_norm": 0.007070471998304129, "learning_rate": 8.339105462039228e-07, "loss": 0.0393, "step": 344440 }, { "epoch": 3.6802179603611305, "grad_norm": 6.692037105560303, "learning_rate": 8.338980407691658e-07, "loss": 0.0037, "step": 344450 }, { "epoch": 3.68032480367541, "grad_norm": 1.7322461605072021, "learning_rate": 8.338855349574126e-07, "loss": 0.0227, "step": 344460 }, { "epoch": 3.68043164698969, "grad_norm": 0.6362977027893066, "learning_rate": 8.338730287686775e-07, "loss": 0.0005, "step": 344470 }, { "epoch": 3.6805384903039693, "grad_norm": 17.47509002685547, "learning_rate": 8.338605222029746e-07, "loss": 0.0201, "step": 344480 }, { "epoch": 3.6806453336182487, "grad_norm": 0.0011386260157451034, "learning_rate": 8.33848015260318e-07, "loss": 0.0093, "step": 344490 }, { "epoch": 3.680752176932528, "grad_norm": 7.900669097900391, "learning_rate": 8.338355079407218e-07, "loss": 0.0192, "step": 344500 }, { "epoch": 3.680859020246808, "grad_norm": 5.1743574142456055, "learning_rate": 8.338230002442e-07, "loss": 0.0101, "step": 344510 }, { "epoch": 3.6809658635610876, "grad_norm": 0.17934399843215942, "learning_rate": 8.338104921707668e-07, "loss": 0.009, "step": 344520 }, { "epoch": 3.6810727068753675, "grad_norm": 7.455629825592041, "learning_rate": 8.337979837204365e-07, "loss": 0.0183, "step": 344530 }, { "epoch": 3.681179550189647, "grad_norm": 1.2060894966125488, "learning_rate": 8.33785474893223e-07, "loss": 0.0099, "step": 344540 }, { "epoch": 3.6812863935039264, "grad_norm": 8.166840553283691, "learning_rate": 8.337729656891405e-07, "loss": 0.0084, "step": 344550 }, { "epoch": 3.681393236818206, "grad_norm": 0.004810381680727005, "learning_rate": 8.337604561082033e-07, "loss": 0.0088, "step": 344560 }, { "epoch": 3.6815000801324858, "grad_norm": 0.15696623921394348, "learning_rate": 8.337479461504251e-07, "loss": 0.0418, "step": 344570 }, { "epoch": 3.681606923446765, "grad_norm": 0.007733238395303488, "learning_rate": 8.337354358158204e-07, "loss": 0.0219, "step": 344580 }, { "epoch": 3.681713766761045, "grad_norm": 0.009839077480137348, "learning_rate": 8.337229251044033e-07, "loss": 0.0087, "step": 344590 }, { "epoch": 3.6818206100753246, "grad_norm": 4.864077091217041, "learning_rate": 8.337104140161877e-07, "loss": 0.0111, "step": 344600 }, { "epoch": 3.681927453389604, "grad_norm": 0.2704966068267822, "learning_rate": 8.336979025511879e-07, "loss": 0.0049, "step": 344610 }, { "epoch": 3.6820342967038835, "grad_norm": 0.019896753132343292, "learning_rate": 8.336853907094179e-07, "loss": 0.0151, "step": 344620 }, { "epoch": 3.6821411400181634, "grad_norm": 9.24759578704834, "learning_rate": 8.33672878490892e-07, "loss": 0.0076, "step": 344630 }, { "epoch": 3.682247983332443, "grad_norm": 10.161632537841797, "learning_rate": 8.336603658956242e-07, "loss": 0.0412, "step": 344640 }, { "epoch": 3.6823548266467228, "grad_norm": 7.5556416511535645, "learning_rate": 8.336478529236289e-07, "loss": 0.009, "step": 344650 }, { "epoch": 3.682461669961002, "grad_norm": 1.0522316694259644, "learning_rate": 8.336353395749196e-07, "loss": 0.0235, "step": 344660 }, { "epoch": 3.6825685132752817, "grad_norm": 0.7795841693878174, "learning_rate": 8.33622825849511e-07, "loss": 0.0251, "step": 344670 }, { "epoch": 3.682675356589561, "grad_norm": 0.010624592192471027, "learning_rate": 8.336103117474172e-07, "loss": 0.0098, "step": 344680 }, { "epoch": 3.682782199903841, "grad_norm": 0.0038747566286474466, "learning_rate": 8.335977972686519e-07, "loss": 0.0024, "step": 344690 }, { "epoch": 3.6828890432181205, "grad_norm": 3.87032413482666, "learning_rate": 8.335852824132297e-07, "loss": 0.0288, "step": 344700 }, { "epoch": 3.6829958865324004, "grad_norm": 2.8239898681640625, "learning_rate": 8.335727671811644e-07, "loss": 0.0273, "step": 344710 }, { "epoch": 3.68310272984668, "grad_norm": 13.459826469421387, "learning_rate": 8.335602515724704e-07, "loss": 0.0337, "step": 344720 }, { "epoch": 3.6832095731609593, "grad_norm": 0.06892231851816177, "learning_rate": 8.335477355871616e-07, "loss": 0.011, "step": 344730 }, { "epoch": 3.683316416475239, "grad_norm": 8.690399169921875, "learning_rate": 8.335352192252523e-07, "loss": 0.0276, "step": 344740 }, { "epoch": 3.6834232597895187, "grad_norm": 2.9502758979797363, "learning_rate": 8.335227024867565e-07, "loss": 0.0304, "step": 344750 }, { "epoch": 3.683530103103798, "grad_norm": 0.3236563205718994, "learning_rate": 8.335101853716885e-07, "loss": 0.0036, "step": 344760 }, { "epoch": 3.683636946418078, "grad_norm": 0.16894051432609558, "learning_rate": 8.334976678800623e-07, "loss": 0.0007, "step": 344770 }, { "epoch": 3.6837437897323575, "grad_norm": 0.003982468042522669, "learning_rate": 8.33485150011892e-07, "loss": 0.0124, "step": 344780 }, { "epoch": 3.683850633046637, "grad_norm": 0.40296927094459534, "learning_rate": 8.334726317671918e-07, "loss": 0.0034, "step": 344790 }, { "epoch": 3.683957476360917, "grad_norm": 0.007890133187174797, "learning_rate": 8.33460113145976e-07, "loss": 0.0167, "step": 344800 }, { "epoch": 3.6840643196751963, "grad_norm": 0.005642310716211796, "learning_rate": 8.334475941482583e-07, "loss": 0.0079, "step": 344810 }, { "epoch": 3.684171162989476, "grad_norm": 0.05708552896976471, "learning_rate": 8.334350747740533e-07, "loss": 0.0084, "step": 344820 }, { "epoch": 3.6842780063037557, "grad_norm": 4.391377925872803, "learning_rate": 8.334225550233748e-07, "loss": 0.0269, "step": 344830 }, { "epoch": 3.684384849618035, "grad_norm": 1.2916616201400757, "learning_rate": 8.334100348962372e-07, "loss": 0.0093, "step": 344840 }, { "epoch": 3.6844916929323146, "grad_norm": 0.2231638878583908, "learning_rate": 8.333975143926544e-07, "loss": 0.0392, "step": 344850 }, { "epoch": 3.6845985362465945, "grad_norm": 2.1162614822387695, "learning_rate": 8.333849935126407e-07, "loss": 0.0198, "step": 344860 }, { "epoch": 3.684705379560874, "grad_norm": 0.004459524992853403, "learning_rate": 8.333724722562101e-07, "loss": 0.0098, "step": 344870 }, { "epoch": 3.684812222875154, "grad_norm": 0.02562924660742283, "learning_rate": 8.333599506233768e-07, "loss": 0.018, "step": 344880 }, { "epoch": 3.6849190661894333, "grad_norm": 0.09766221791505814, "learning_rate": 8.333474286141551e-07, "loss": 0.0085, "step": 344890 }, { "epoch": 3.685025909503713, "grad_norm": 0.26848357915878296, "learning_rate": 8.333349062285589e-07, "loss": 0.012, "step": 344900 }, { "epoch": 3.6851327528179922, "grad_norm": 0.0011226935312151909, "learning_rate": 8.333223834666025e-07, "loss": 0.0064, "step": 344910 }, { "epoch": 3.685239596132272, "grad_norm": 6.878376483917236, "learning_rate": 8.333098603282999e-07, "loss": 0.0188, "step": 344920 }, { "epoch": 3.6853464394465516, "grad_norm": 0.007723651826381683, "learning_rate": 8.332973368136655e-07, "loss": 0.009, "step": 344930 }, { "epoch": 3.6854532827608315, "grad_norm": 0.2372470498085022, "learning_rate": 8.33284812922713e-07, "loss": 0.0226, "step": 344940 }, { "epoch": 3.685560126075111, "grad_norm": 2.418874979019165, "learning_rate": 8.332722886554568e-07, "loss": 0.0036, "step": 344950 }, { "epoch": 3.6856669693893904, "grad_norm": 0.0261982474476099, "learning_rate": 8.332597640119113e-07, "loss": 0.007, "step": 344960 }, { "epoch": 3.68577381270367, "grad_norm": 0.7636143565177917, "learning_rate": 8.332472389920902e-07, "loss": 0.0441, "step": 344970 }, { "epoch": 3.68588065601795, "grad_norm": 11.982987403869629, "learning_rate": 8.332347135960077e-07, "loss": 0.0076, "step": 344980 }, { "epoch": 3.6859874993322292, "grad_norm": 4.2095513343811035, "learning_rate": 8.332221878236783e-07, "loss": 0.014, "step": 344990 }, { "epoch": 3.686094342646509, "grad_norm": 1.1180680990219116, "learning_rate": 8.332096616751157e-07, "loss": 0.0232, "step": 345000 }, { "epoch": 3.6862011859607886, "grad_norm": 4.776474952697754, "learning_rate": 8.331971351503343e-07, "loss": 0.0777, "step": 345010 }, { "epoch": 3.686308029275068, "grad_norm": 0.07081128656864166, "learning_rate": 8.331846082493482e-07, "loss": 0.0611, "step": 345020 }, { "epoch": 3.6864148725893475, "grad_norm": 0.7547569274902344, "learning_rate": 8.331720809721715e-07, "loss": 0.0085, "step": 345030 }, { "epoch": 3.6865217159036274, "grad_norm": 7.855301856994629, "learning_rate": 8.331595533188185e-07, "loss": 0.0058, "step": 345040 }, { "epoch": 3.686628559217907, "grad_norm": 0.012437431141734123, "learning_rate": 8.331470252893032e-07, "loss": 0.0077, "step": 345050 }, { "epoch": 3.686735402532187, "grad_norm": 0.017108650878071785, "learning_rate": 8.331344968836396e-07, "loss": 0.0015, "step": 345060 }, { "epoch": 3.6868422458464662, "grad_norm": 0.07761071622371674, "learning_rate": 8.33121968101842e-07, "loss": 0.0008, "step": 345070 }, { "epoch": 3.6869490891607457, "grad_norm": 0.005321504082530737, "learning_rate": 8.331094389439248e-07, "loss": 0.0161, "step": 345080 }, { "epoch": 3.687055932475025, "grad_norm": 0.1366278976202011, "learning_rate": 8.330969094099016e-07, "loss": 0.0036, "step": 345090 }, { "epoch": 3.687162775789305, "grad_norm": 0.8548314571380615, "learning_rate": 8.33084379499787e-07, "loss": 0.0066, "step": 345100 }, { "epoch": 3.6872696191035845, "grad_norm": 2.53311824798584, "learning_rate": 8.330718492135951e-07, "loss": 0.013, "step": 345110 }, { "epoch": 3.6873764624178644, "grad_norm": 2.4226174354553223, "learning_rate": 8.330593185513399e-07, "loss": 0.0147, "step": 345120 }, { "epoch": 3.687483305732144, "grad_norm": 14.342630386352539, "learning_rate": 8.330467875130356e-07, "loss": 0.0663, "step": 345130 }, { "epoch": 3.6875901490464233, "grad_norm": 0.05227360501885414, "learning_rate": 8.330342560986962e-07, "loss": 0.0148, "step": 345140 }, { "epoch": 3.687696992360703, "grad_norm": 0.002861606189981103, "learning_rate": 8.33021724308336e-07, "loss": 0.0038, "step": 345150 }, { "epoch": 3.6878038356749827, "grad_norm": 5.9165239334106445, "learning_rate": 8.330091921419693e-07, "loss": 0.0134, "step": 345160 }, { "epoch": 3.687910678989262, "grad_norm": 1.3017209768295288, "learning_rate": 8.3299665959961e-07, "loss": 0.0046, "step": 345170 }, { "epoch": 3.688017522303542, "grad_norm": 0.17855222523212433, "learning_rate": 8.329841266812722e-07, "loss": 0.0045, "step": 345180 }, { "epoch": 3.6881243656178215, "grad_norm": 0.00928694847971201, "learning_rate": 8.329715933869706e-07, "loss": 0.01, "step": 345190 }, { "epoch": 3.688231208932101, "grad_norm": 0.004082640167325735, "learning_rate": 8.329590597167187e-07, "loss": 0.0039, "step": 345200 }, { "epoch": 3.6883380522463804, "grad_norm": 0.0650302916765213, "learning_rate": 8.329465256705307e-07, "loss": 0.0082, "step": 345210 }, { "epoch": 3.6884448955606604, "grad_norm": 3.86466121673584, "learning_rate": 8.329339912484212e-07, "loss": 0.0028, "step": 345220 }, { "epoch": 3.68855173887494, "grad_norm": 2.0897903442382812, "learning_rate": 8.329214564504039e-07, "loss": 0.0226, "step": 345230 }, { "epoch": 3.6886585821892197, "grad_norm": 0.14182192087173462, "learning_rate": 8.329089212764932e-07, "loss": 0.0007, "step": 345240 }, { "epoch": 3.688765425503499, "grad_norm": 6.2852983474731445, "learning_rate": 8.328963857267033e-07, "loss": 0.0183, "step": 345250 }, { "epoch": 3.6888722688177786, "grad_norm": 0.13070394098758698, "learning_rate": 8.328838498010482e-07, "loss": 0.0131, "step": 345260 }, { "epoch": 3.688979112132058, "grad_norm": 9.899216651916504, "learning_rate": 8.328713134995423e-07, "loss": 0.0284, "step": 345270 }, { "epoch": 3.689085955446338, "grad_norm": 0.0037706007715314627, "learning_rate": 8.328587768221993e-07, "loss": 0.0127, "step": 345280 }, { "epoch": 3.6891927987606175, "grad_norm": 0.007680767681449652, "learning_rate": 8.328462397690337e-07, "loss": 0.0099, "step": 345290 }, { "epoch": 3.6892996420748974, "grad_norm": 0.2353363335132599, "learning_rate": 8.328337023400595e-07, "loss": 0.0236, "step": 345300 }, { "epoch": 3.689406485389177, "grad_norm": 0.0026788990944623947, "learning_rate": 8.32821164535291e-07, "loss": 0.0132, "step": 345310 }, { "epoch": 3.6895133287034563, "grad_norm": 0.008739362470805645, "learning_rate": 8.328086263547422e-07, "loss": 0.0067, "step": 345320 }, { "epoch": 3.6896201720177357, "grad_norm": 3.7848262786865234, "learning_rate": 8.327960877984274e-07, "loss": 0.0263, "step": 345330 }, { "epoch": 3.6897270153320156, "grad_norm": 0.08215592801570892, "learning_rate": 8.327835488663609e-07, "loss": 0.0095, "step": 345340 }, { "epoch": 3.689833858646295, "grad_norm": 0.01573689468204975, "learning_rate": 8.327710095585565e-07, "loss": 0.0038, "step": 345350 }, { "epoch": 3.689940701960575, "grad_norm": 0.0012354046339169145, "learning_rate": 8.327584698750286e-07, "loss": 0.0371, "step": 345360 }, { "epoch": 3.6900475452748545, "grad_norm": 0.005248107947409153, "learning_rate": 8.32745929815791e-07, "loss": 0.0188, "step": 345370 }, { "epoch": 3.690154388589134, "grad_norm": 1.3066405057907104, "learning_rate": 8.327333893808584e-07, "loss": 0.0116, "step": 345380 }, { "epoch": 3.6902612319034134, "grad_norm": 0.008724544197320938, "learning_rate": 8.327208485702446e-07, "loss": 0.0061, "step": 345390 }, { "epoch": 3.6903680752176933, "grad_norm": 0.013376131653785706, "learning_rate": 8.327083073839639e-07, "loss": 0.0194, "step": 345400 }, { "epoch": 3.6904749185319727, "grad_norm": 4.293062686920166, "learning_rate": 8.326957658220304e-07, "loss": 0.0745, "step": 345410 }, { "epoch": 3.6905817618462526, "grad_norm": 0.4708126485347748, "learning_rate": 8.326832238844582e-07, "loss": 0.0179, "step": 345420 }, { "epoch": 3.690688605160532, "grad_norm": 0.16590160131454468, "learning_rate": 8.326706815712614e-07, "loss": 0.0493, "step": 345430 }, { "epoch": 3.6907954484748116, "grad_norm": 1.3941020965576172, "learning_rate": 8.326581388824546e-07, "loss": 0.0163, "step": 345440 }, { "epoch": 3.6909022917890915, "grad_norm": 0.011642570607364178, "learning_rate": 8.326455958180516e-07, "loss": 0.0111, "step": 345450 }, { "epoch": 3.691009135103371, "grad_norm": 0.18314509093761444, "learning_rate": 8.326330523780665e-07, "loss": 0.0009, "step": 345460 }, { "epoch": 3.6911159784176504, "grad_norm": 0.8011054992675781, "learning_rate": 8.326205085625136e-07, "loss": 0.0056, "step": 345470 }, { "epoch": 3.6912228217319303, "grad_norm": 0.07302051037549973, "learning_rate": 8.32607964371407e-07, "loss": 0.0359, "step": 345480 }, { "epoch": 3.6913296650462097, "grad_norm": 0.012746114283800125, "learning_rate": 8.325954198047609e-07, "loss": 0.0152, "step": 345490 }, { "epoch": 3.691436508360489, "grad_norm": 2.249591112136841, "learning_rate": 8.325828748625896e-07, "loss": 0.0066, "step": 345500 }, { "epoch": 3.691543351674769, "grad_norm": 0.32538992166519165, "learning_rate": 8.32570329544907e-07, "loss": 0.0113, "step": 345510 }, { "epoch": 3.6916501949890486, "grad_norm": 0.012788613326847553, "learning_rate": 8.325577838517275e-07, "loss": 0.014, "step": 345520 }, { "epoch": 3.691757038303328, "grad_norm": 0.003490777686238289, "learning_rate": 8.325452377830651e-07, "loss": 0.0228, "step": 345530 }, { "epoch": 3.691863881617608, "grad_norm": 0.031196903437376022, "learning_rate": 8.325326913389341e-07, "loss": 0.0134, "step": 345540 }, { "epoch": 3.6919707249318874, "grad_norm": 0.03159866854548454, "learning_rate": 8.325201445193485e-07, "loss": 0.0163, "step": 345550 }, { "epoch": 3.692077568246167, "grad_norm": 0.15333542227745056, "learning_rate": 8.325075973243226e-07, "loss": 0.0202, "step": 345560 }, { "epoch": 3.6921844115604467, "grad_norm": 0.044470928609371185, "learning_rate": 8.324950497538706e-07, "loss": 0.0123, "step": 345570 }, { "epoch": 3.692291254874726, "grad_norm": 0.26438528299331665, "learning_rate": 8.324825018080063e-07, "loss": 0.0073, "step": 345580 }, { "epoch": 3.6923980981890057, "grad_norm": 0.25915709137916565, "learning_rate": 8.324699534867445e-07, "loss": 0.0052, "step": 345590 }, { "epoch": 3.6925049415032856, "grad_norm": 0.27402907609939575, "learning_rate": 8.324574047900989e-07, "loss": 0.0048, "step": 345600 }, { "epoch": 3.692611784817565, "grad_norm": 1.6411263942718506, "learning_rate": 8.324448557180839e-07, "loss": 0.0225, "step": 345610 }, { "epoch": 3.6927186281318445, "grad_norm": 0.5464740991592407, "learning_rate": 8.324323062707135e-07, "loss": 0.0198, "step": 345620 }, { "epoch": 3.6928254714461244, "grad_norm": 1.2904717922210693, "learning_rate": 8.324197564480018e-07, "loss": 0.016, "step": 345630 }, { "epoch": 3.692932314760404, "grad_norm": 0.012090733274817467, "learning_rate": 8.324072062499633e-07, "loss": 0.0033, "step": 345640 }, { "epoch": 3.6930391580746837, "grad_norm": 0.00937306322157383, "learning_rate": 8.32394655676612e-07, "loss": 0.0116, "step": 345650 }, { "epoch": 3.693146001388963, "grad_norm": 0.1398981362581253, "learning_rate": 8.323821047279618e-07, "loss": 0.0045, "step": 345660 }, { "epoch": 3.6932528447032427, "grad_norm": 0.6859559416770935, "learning_rate": 8.323695534040273e-07, "loss": 0.0125, "step": 345670 }, { "epoch": 3.693359688017522, "grad_norm": 1.8294825553894043, "learning_rate": 8.323570017048225e-07, "loss": 0.0095, "step": 345680 }, { "epoch": 3.693466531331802, "grad_norm": 0.19294340908527374, "learning_rate": 8.323444496303614e-07, "loss": 0.0213, "step": 345690 }, { "epoch": 3.6935733746460815, "grad_norm": 0.45211881399154663, "learning_rate": 8.323318971806585e-07, "loss": 0.0156, "step": 345700 }, { "epoch": 3.6936802179603614, "grad_norm": 0.35165756940841675, "learning_rate": 8.323193443557277e-07, "loss": 0.0077, "step": 345710 }, { "epoch": 3.693787061274641, "grad_norm": 0.0020949437748640776, "learning_rate": 8.323067911555834e-07, "loss": 0.004, "step": 345720 }, { "epoch": 3.6938939045889203, "grad_norm": 7.87592077255249, "learning_rate": 8.322942375802395e-07, "loss": 0.018, "step": 345730 }, { "epoch": 3.6940007479031998, "grad_norm": 3.6642727851867676, "learning_rate": 8.322816836297106e-07, "loss": 0.0093, "step": 345740 }, { "epoch": 3.6941075912174797, "grad_norm": 0.009644398465752602, "learning_rate": 8.322691293040103e-07, "loss": 0.0009, "step": 345750 }, { "epoch": 3.694214434531759, "grad_norm": 0.032318707555532455, "learning_rate": 8.322565746031531e-07, "loss": 0.0076, "step": 345760 }, { "epoch": 3.694321277846039, "grad_norm": 2.575204849243164, "learning_rate": 8.322440195271534e-07, "loss": 0.0038, "step": 345770 }, { "epoch": 3.6944281211603185, "grad_norm": 1.8726099729537964, "learning_rate": 8.322314640760249e-07, "loss": 0.0034, "step": 345780 }, { "epoch": 3.694534964474598, "grad_norm": 0.025770189240574837, "learning_rate": 8.32218908249782e-07, "loss": 0.0087, "step": 345790 }, { "epoch": 3.6946418077888774, "grad_norm": 0.6562917232513428, "learning_rate": 8.322063520484391e-07, "loss": 0.0079, "step": 345800 }, { "epoch": 3.6947486511031573, "grad_norm": 9.092545509338379, "learning_rate": 8.321937954720098e-07, "loss": 0.0044, "step": 345810 }, { "epoch": 3.6948554944174368, "grad_norm": 0.19088777899742126, "learning_rate": 8.321812385205089e-07, "loss": 0.0297, "step": 345820 }, { "epoch": 3.6949623377317167, "grad_norm": 0.002015000442042947, "learning_rate": 8.321686811939503e-07, "loss": 0.0065, "step": 345830 }, { "epoch": 3.695069181045996, "grad_norm": 9.90038013458252, "learning_rate": 8.321561234923481e-07, "loss": 0.0457, "step": 345840 }, { "epoch": 3.6951760243602756, "grad_norm": 0.008675413206219673, "learning_rate": 8.321435654157166e-07, "loss": 0.0004, "step": 345850 }, { "epoch": 3.695282867674555, "grad_norm": 0.14393673837184906, "learning_rate": 8.321310069640699e-07, "loss": 0.0517, "step": 345860 }, { "epoch": 3.695389710988835, "grad_norm": 0.06383785605430603, "learning_rate": 8.321184481374224e-07, "loss": 0.0045, "step": 345870 }, { "epoch": 3.6954965543031144, "grad_norm": 0.09806878119707108, "learning_rate": 8.321058889357879e-07, "loss": 0.0054, "step": 345880 }, { "epoch": 3.6956033976173943, "grad_norm": 7.273397445678711, "learning_rate": 8.320933293591808e-07, "loss": 0.0023, "step": 345890 }, { "epoch": 3.6957102409316738, "grad_norm": 0.41269728541374207, "learning_rate": 8.320807694076154e-07, "loss": 0.0313, "step": 345900 }, { "epoch": 3.6958170842459532, "grad_norm": 0.1009531170129776, "learning_rate": 8.320682090811058e-07, "loss": 0.0193, "step": 345910 }, { "epoch": 3.6959239275602327, "grad_norm": 2.9603431224823, "learning_rate": 8.320556483796659e-07, "loss": 0.0141, "step": 345920 }, { "epoch": 3.6960307708745126, "grad_norm": 0.007140470203012228, "learning_rate": 8.320430873033102e-07, "loss": 0.0076, "step": 345930 }, { "epoch": 3.696137614188792, "grad_norm": 0.010289625264704227, "learning_rate": 8.320305258520528e-07, "loss": 0.0135, "step": 345940 }, { "epoch": 3.696244457503072, "grad_norm": 0.05991138890385628, "learning_rate": 8.32017964025908e-07, "loss": 0.0046, "step": 345950 }, { "epoch": 3.6963513008173514, "grad_norm": 0.0008517717942595482, "learning_rate": 8.320054018248896e-07, "loss": 0.0101, "step": 345960 }, { "epoch": 3.696458144131631, "grad_norm": 1.577386736869812, "learning_rate": 8.319928392490123e-07, "loss": 0.0116, "step": 345970 }, { "epoch": 3.6965649874459103, "grad_norm": 3.294156312942505, "learning_rate": 8.319802762982899e-07, "loss": 0.016, "step": 345980 }, { "epoch": 3.6966718307601902, "grad_norm": 13.898200035095215, "learning_rate": 8.319677129727367e-07, "loss": 0.0097, "step": 345990 }, { "epoch": 3.6967786740744697, "grad_norm": 0.03376126289367676, "learning_rate": 8.31955149272367e-07, "loss": 0.0139, "step": 346000 }, { "epoch": 3.6968855173887496, "grad_norm": 1.3086885213851929, "learning_rate": 8.319425851971948e-07, "loss": 0.0084, "step": 346010 }, { "epoch": 3.696992360703029, "grad_norm": 10.169231414794922, "learning_rate": 8.319300207472344e-07, "loss": 0.0422, "step": 346020 }, { "epoch": 3.6970992040173085, "grad_norm": 0.00757375406101346, "learning_rate": 8.319174559224998e-07, "loss": 0.0045, "step": 346030 }, { "epoch": 3.697206047331588, "grad_norm": 1.5859097242355347, "learning_rate": 8.319048907230055e-07, "loss": 0.0109, "step": 346040 }, { "epoch": 3.697312890645868, "grad_norm": 1.8397680521011353, "learning_rate": 8.318923251487656e-07, "loss": 0.0075, "step": 346050 }, { "epoch": 3.6974197339601473, "grad_norm": 0.20560473203659058, "learning_rate": 8.318797591997941e-07, "loss": 0.0152, "step": 346060 }, { "epoch": 3.6975265772744272, "grad_norm": 0.0025481232441961765, "learning_rate": 8.318671928761054e-07, "loss": 0.0026, "step": 346070 }, { "epoch": 3.6976334205887067, "grad_norm": 0.004361135419458151, "learning_rate": 8.318546261777135e-07, "loss": 0.0263, "step": 346080 }, { "epoch": 3.697740263902986, "grad_norm": 0.005261909682303667, "learning_rate": 8.318420591046326e-07, "loss": 0.0066, "step": 346090 }, { "epoch": 3.6978471072172656, "grad_norm": 0.007964903488755226, "learning_rate": 8.318294916568771e-07, "loss": 0.0132, "step": 346100 }, { "epoch": 3.6979539505315455, "grad_norm": 14.136005401611328, "learning_rate": 8.318169238344611e-07, "loss": 0.0126, "step": 346110 }, { "epoch": 3.698060793845825, "grad_norm": 4.62959098815918, "learning_rate": 8.318043556373986e-07, "loss": 0.0193, "step": 346120 }, { "epoch": 3.698167637160105, "grad_norm": 5.465734481811523, "learning_rate": 8.317917870657041e-07, "loss": 0.0074, "step": 346130 }, { "epoch": 3.6982744804743843, "grad_norm": 2.6968085765838623, "learning_rate": 8.317792181193915e-07, "loss": 0.0011, "step": 346140 }, { "epoch": 3.698381323788664, "grad_norm": 3.832759380340576, "learning_rate": 8.317666487984751e-07, "loss": 0.0291, "step": 346150 }, { "epoch": 3.6984881671029433, "grad_norm": 0.26317086815834045, "learning_rate": 8.317540791029693e-07, "loss": 0.0097, "step": 346160 }, { "epoch": 3.698595010417223, "grad_norm": 0.5759973526000977, "learning_rate": 8.317415090328881e-07, "loss": 0.0058, "step": 346170 }, { "epoch": 3.6987018537315026, "grad_norm": 0.7237509489059448, "learning_rate": 8.317289385882456e-07, "loss": 0.0123, "step": 346180 }, { "epoch": 3.6988086970457825, "grad_norm": 0.010821064934134483, "learning_rate": 8.317163677690559e-07, "loss": 0.0157, "step": 346190 }, { "epoch": 3.698915540360062, "grad_norm": 0.7483665347099304, "learning_rate": 8.317037965753336e-07, "loss": 0.0075, "step": 346200 }, { "epoch": 3.6990223836743414, "grad_norm": 0.07024837285280228, "learning_rate": 8.316912250070927e-07, "loss": 0.0432, "step": 346210 }, { "epoch": 3.6991292269886213, "grad_norm": 0.09272941946983337, "learning_rate": 8.316786530643474e-07, "loss": 0.0438, "step": 346220 }, { "epoch": 3.699236070302901, "grad_norm": 7.74046516418457, "learning_rate": 8.316660807471116e-07, "loss": 0.0298, "step": 346230 }, { "epoch": 3.6993429136171803, "grad_norm": 0.05655258521437645, "learning_rate": 8.316535080554001e-07, "loss": 0.0266, "step": 346240 }, { "epoch": 3.69944975693146, "grad_norm": 0.00642025750130415, "learning_rate": 8.316409349892266e-07, "loss": 0.0064, "step": 346250 }, { "epoch": 3.6995566002457396, "grad_norm": 0.04822932556271553, "learning_rate": 8.316283615486055e-07, "loss": 0.015, "step": 346260 }, { "epoch": 3.699663443560019, "grad_norm": 0.5586894750595093, "learning_rate": 8.316157877335509e-07, "loss": 0.0146, "step": 346270 }, { "epoch": 3.699770286874299, "grad_norm": 0.024532146751880646, "learning_rate": 8.31603213544077e-07, "loss": 0.0025, "step": 346280 }, { "epoch": 3.6998771301885784, "grad_norm": 0.10348470509052277, "learning_rate": 8.315906389801981e-07, "loss": 0.0137, "step": 346290 }, { "epoch": 3.699983973502858, "grad_norm": 0.01778375543653965, "learning_rate": 8.315780640419283e-07, "loss": 0.0088, "step": 346300 }, { "epoch": 3.700090816817138, "grad_norm": 0.08552351593971252, "learning_rate": 8.31565488729282e-07, "loss": 0.0187, "step": 346310 }, { "epoch": 3.7001976601314173, "grad_norm": 0.001603089040145278, "learning_rate": 8.315529130422732e-07, "loss": 0.0576, "step": 346320 }, { "epoch": 3.7003045034456967, "grad_norm": 0.0031395871192216873, "learning_rate": 8.315403369809159e-07, "loss": 0.0021, "step": 346330 }, { "epoch": 3.7004113467599766, "grad_norm": 0.04067675769329071, "learning_rate": 8.315277605452248e-07, "loss": 0.0095, "step": 346340 }, { "epoch": 3.700518190074256, "grad_norm": 1.863329291343689, "learning_rate": 8.315151837352137e-07, "loss": 0.0029, "step": 346350 }, { "epoch": 3.700625033388536, "grad_norm": 7.886880874633789, "learning_rate": 8.31502606550897e-07, "loss": 0.0271, "step": 346360 }, { "epoch": 3.7007318767028154, "grad_norm": 2.7184345722198486, "learning_rate": 8.314900289922889e-07, "loss": 0.008, "step": 346370 }, { "epoch": 3.700838720017095, "grad_norm": 0.017616145312786102, "learning_rate": 8.314774510594033e-07, "loss": 0.0123, "step": 346380 }, { "epoch": 3.7009455633313744, "grad_norm": 0.21401405334472656, "learning_rate": 8.314648727522549e-07, "loss": 0.0085, "step": 346390 }, { "epoch": 3.7010524066456543, "grad_norm": 0.01435426902025938, "learning_rate": 8.314522940708574e-07, "loss": 0.0022, "step": 346400 }, { "epoch": 3.7011592499599337, "grad_norm": 0.03993474319577217, "learning_rate": 8.314397150152255e-07, "loss": 0.0211, "step": 346410 }, { "epoch": 3.7012660932742136, "grad_norm": 1.450024962425232, "learning_rate": 8.31427135585373e-07, "loss": 0.0227, "step": 346420 }, { "epoch": 3.701372936588493, "grad_norm": 0.05781765282154083, "learning_rate": 8.314145557813143e-07, "loss": 0.0085, "step": 346430 }, { "epoch": 3.7014797799027725, "grad_norm": 0.560369610786438, "learning_rate": 8.314019756030635e-07, "loss": 0.0028, "step": 346440 }, { "epoch": 3.701586623217052, "grad_norm": 2.9751083850860596, "learning_rate": 8.31389395050635e-07, "loss": 0.02, "step": 346450 }, { "epoch": 3.701693466531332, "grad_norm": 0.007651164196431637, "learning_rate": 8.313768141240426e-07, "loss": 0.0043, "step": 346460 }, { "epoch": 3.7018003098456114, "grad_norm": 0.05684572085738182, "learning_rate": 8.31364232823301e-07, "loss": 0.0085, "step": 346470 }, { "epoch": 3.7019071531598913, "grad_norm": 0.010077644139528275, "learning_rate": 8.31351651148424e-07, "loss": 0.014, "step": 346480 }, { "epoch": 3.7020139964741707, "grad_norm": 0.5913486480712891, "learning_rate": 8.31339069099426e-07, "loss": 0.0039, "step": 346490 }, { "epoch": 3.70212083978845, "grad_norm": 1.680336356163025, "learning_rate": 8.313264866763214e-07, "loss": 0.0309, "step": 346500 }, { "epoch": 3.7022276831027296, "grad_norm": 1.8396199941635132, "learning_rate": 8.313139038791241e-07, "loss": 0.0095, "step": 346510 }, { "epoch": 3.7023345264170096, "grad_norm": 0.001671237638220191, "learning_rate": 8.313013207078483e-07, "loss": 0.0094, "step": 346520 }, { "epoch": 3.702441369731289, "grad_norm": 0.00553783867508173, "learning_rate": 8.312887371625084e-07, "loss": 0.0097, "step": 346530 }, { "epoch": 3.702548213045569, "grad_norm": 0.014513718895614147, "learning_rate": 8.312761532431183e-07, "loss": 0.003, "step": 346540 }, { "epoch": 3.7026550563598484, "grad_norm": 11.540081024169922, "learning_rate": 8.312635689496927e-07, "loss": 0.055, "step": 346550 }, { "epoch": 3.702761899674128, "grad_norm": 8.315767288208008, "learning_rate": 8.312509842822454e-07, "loss": 0.014, "step": 346560 }, { "epoch": 3.7028687429884073, "grad_norm": 0.00331215001642704, "learning_rate": 8.312383992407908e-07, "loss": 0.0238, "step": 346570 }, { "epoch": 3.702975586302687, "grad_norm": 0.01740606129169464, "learning_rate": 8.31225813825343e-07, "loss": 0.0046, "step": 346580 }, { "epoch": 3.7030824296169667, "grad_norm": 2.201840400695801, "learning_rate": 8.312132280359161e-07, "loss": 0.0115, "step": 346590 }, { "epoch": 3.7031892729312466, "grad_norm": 1.5680370330810547, "learning_rate": 8.312006418725246e-07, "loss": 0.0188, "step": 346600 }, { "epoch": 3.703296116245526, "grad_norm": 0.5362483859062195, "learning_rate": 8.311880553351826e-07, "loss": 0.0203, "step": 346610 }, { "epoch": 3.7034029595598055, "grad_norm": 0.0036976474802941084, "learning_rate": 8.311754684239043e-07, "loss": 0.0006, "step": 346620 }, { "epoch": 3.703509802874085, "grad_norm": 0.0009513566037639976, "learning_rate": 8.311628811387039e-07, "loss": 0.0104, "step": 346630 }, { "epoch": 3.703616646188365, "grad_norm": 0.01882505975663662, "learning_rate": 8.311502934795955e-07, "loss": 0.005, "step": 346640 }, { "epoch": 3.7037234895026443, "grad_norm": 0.00492193503305316, "learning_rate": 8.311377054465935e-07, "loss": 0.0071, "step": 346650 }, { "epoch": 3.703830332816924, "grad_norm": 0.010115786455571651, "learning_rate": 8.311251170397122e-07, "loss": 0.0071, "step": 346660 }, { "epoch": 3.7039371761312037, "grad_norm": 0.010039796121418476, "learning_rate": 8.311125282589655e-07, "loss": 0.0103, "step": 346670 }, { "epoch": 3.704044019445483, "grad_norm": 5.681280136108398, "learning_rate": 8.310999391043679e-07, "loss": 0.0214, "step": 346680 }, { "epoch": 3.7041508627597626, "grad_norm": 0.34877026081085205, "learning_rate": 8.310873495759332e-07, "loss": 0.0007, "step": 346690 }, { "epoch": 3.7042577060740425, "grad_norm": 1.63068687915802, "learning_rate": 8.31074759673676e-07, "loss": 0.0279, "step": 346700 }, { "epoch": 3.704364549388322, "grad_norm": 3.8832855224609375, "learning_rate": 8.310621693976106e-07, "loss": 0.0038, "step": 346710 }, { "epoch": 3.704471392702602, "grad_norm": 0.009487991221249104, "learning_rate": 8.310495787477509e-07, "loss": 0.0086, "step": 346720 }, { "epoch": 3.7045782360168813, "grad_norm": 2.360724925994873, "learning_rate": 8.310369877241112e-07, "loss": 0.0229, "step": 346730 }, { "epoch": 3.7046850793311608, "grad_norm": 0.769893229007721, "learning_rate": 8.310243963267057e-07, "loss": 0.0007, "step": 346740 }, { "epoch": 3.70479192264544, "grad_norm": 0.00928745698183775, "learning_rate": 8.310118045555488e-07, "loss": 0.0332, "step": 346750 }, { "epoch": 3.70489876595972, "grad_norm": 3.077101230621338, "learning_rate": 8.309992124106546e-07, "loss": 0.002, "step": 346760 }, { "epoch": 3.7050056092739996, "grad_norm": 0.030357085168361664, "learning_rate": 8.309866198920372e-07, "loss": 0.0144, "step": 346770 }, { "epoch": 3.7051124525882795, "grad_norm": 4.278046131134033, "learning_rate": 8.309740269997109e-07, "loss": 0.0112, "step": 346780 }, { "epoch": 3.705219295902559, "grad_norm": 4.485700607299805, "learning_rate": 8.309614337336901e-07, "loss": 0.076, "step": 346790 }, { "epoch": 3.7053261392168384, "grad_norm": 0.002494724467396736, "learning_rate": 8.309488400939888e-07, "loss": 0.007, "step": 346800 }, { "epoch": 3.705432982531118, "grad_norm": 0.5994272828102112, "learning_rate": 8.309362460806213e-07, "loss": 0.0046, "step": 346810 }, { "epoch": 3.7055398258453978, "grad_norm": 0.010662810876965523, "learning_rate": 8.309236516936018e-07, "loss": 0.0081, "step": 346820 }, { "epoch": 3.705646669159677, "grad_norm": 6.675318241119385, "learning_rate": 8.309110569329444e-07, "loss": 0.0326, "step": 346830 }, { "epoch": 3.705753512473957, "grad_norm": 0.02578572742640972, "learning_rate": 8.308984617986635e-07, "loss": 0.0074, "step": 346840 }, { "epoch": 3.7058603557882366, "grad_norm": 1.0893845558166504, "learning_rate": 8.308858662907734e-07, "loss": 0.0128, "step": 346850 }, { "epoch": 3.705967199102516, "grad_norm": 1.277288794517517, "learning_rate": 8.30873270409288e-07, "loss": 0.0113, "step": 346860 }, { "epoch": 3.7060740424167955, "grad_norm": 0.03401033580303192, "learning_rate": 8.308606741542218e-07, "loss": 0.0023, "step": 346870 }, { "epoch": 3.7061808857310754, "grad_norm": 0.1096457913517952, "learning_rate": 8.308480775255888e-07, "loss": 0.0069, "step": 346880 }, { "epoch": 3.706287729045355, "grad_norm": 0.10504449158906937, "learning_rate": 8.308354805234035e-07, "loss": 0.0262, "step": 346890 }, { "epoch": 3.7063945723596348, "grad_norm": 1.9219145774841309, "learning_rate": 8.3082288314768e-07, "loss": 0.011, "step": 346900 }, { "epoch": 3.7065014156739142, "grad_norm": 5.184559345245361, "learning_rate": 8.308102853984324e-07, "loss": 0.0101, "step": 346910 }, { "epoch": 3.7066082589881937, "grad_norm": 0.013134592212736607, "learning_rate": 8.307976872756751e-07, "loss": 0.0296, "step": 346920 }, { "epoch": 3.7067151023024736, "grad_norm": 1.2965381145477295, "learning_rate": 8.307850887794221e-07, "loss": 0.0031, "step": 346930 }, { "epoch": 3.706821945616753, "grad_norm": 0.008661041967570782, "learning_rate": 8.30772489909688e-07, "loss": 0.0038, "step": 346940 }, { "epoch": 3.7069287889310325, "grad_norm": 3.2409183979034424, "learning_rate": 8.307598906664865e-07, "loss": 0.0342, "step": 346950 }, { "epoch": 3.7070356322453124, "grad_norm": 1.4525316953659058, "learning_rate": 8.307472910498323e-07, "loss": 0.0099, "step": 346960 }, { "epoch": 3.707142475559592, "grad_norm": 4.877758502960205, "learning_rate": 8.307346910597394e-07, "loss": 0.0096, "step": 346970 }, { "epoch": 3.7072493188738713, "grad_norm": 0.0147432591766119, "learning_rate": 8.307220906962223e-07, "loss": 0.0002, "step": 346980 }, { "epoch": 3.7073561621881512, "grad_norm": 0.30592697858810425, "learning_rate": 8.307094899592947e-07, "loss": 0.0047, "step": 346990 }, { "epoch": 3.7074630055024307, "grad_norm": 3.6621336936950684, "learning_rate": 8.306968888489712e-07, "loss": 0.0213, "step": 347000 }, { "epoch": 3.70756984881671, "grad_norm": 0.05217092111706734, "learning_rate": 8.306842873652659e-07, "loss": 0.0224, "step": 347010 }, { "epoch": 3.70767669213099, "grad_norm": 1.1499087810516357, "learning_rate": 8.306716855081932e-07, "loss": 0.0065, "step": 347020 }, { "epoch": 3.7077835354452695, "grad_norm": 0.054650429636240005, "learning_rate": 8.306590832777672e-07, "loss": 0.0382, "step": 347030 }, { "epoch": 3.707890378759549, "grad_norm": 0.11940915882587433, "learning_rate": 8.306464806740021e-07, "loss": 0.0231, "step": 347040 }, { "epoch": 3.707997222073829, "grad_norm": 0.0007409439422190189, "learning_rate": 8.306338776969121e-07, "loss": 0.0103, "step": 347050 }, { "epoch": 3.7081040653881083, "grad_norm": 0.0022432839032262564, "learning_rate": 8.306212743465117e-07, "loss": 0.0196, "step": 347060 }, { "epoch": 3.708210908702388, "grad_norm": 0.618817150592804, "learning_rate": 8.306086706228148e-07, "loss": 0.027, "step": 347070 }, { "epoch": 3.7083177520166677, "grad_norm": 15.722004890441895, "learning_rate": 8.305960665258358e-07, "loss": 0.0637, "step": 347080 }, { "epoch": 3.708424595330947, "grad_norm": 1.7196927070617676, "learning_rate": 8.305834620555887e-07, "loss": 0.0207, "step": 347090 }, { "epoch": 3.7085314386452266, "grad_norm": 0.0013059206539765, "learning_rate": 8.305708572120881e-07, "loss": 0.0146, "step": 347100 }, { "epoch": 3.7086382819595065, "grad_norm": 2.8480935096740723, "learning_rate": 8.30558251995348e-07, "loss": 0.0101, "step": 347110 }, { "epoch": 3.708745125273786, "grad_norm": 2.423583984375, "learning_rate": 8.305456464053827e-07, "loss": 0.0068, "step": 347120 }, { "epoch": 3.708851968588066, "grad_norm": 7.475066184997559, "learning_rate": 8.305330404422065e-07, "loss": 0.0283, "step": 347130 }, { "epoch": 3.7089588119023453, "grad_norm": 0.08154669404029846, "learning_rate": 8.305204341058335e-07, "loss": 0.0257, "step": 347140 }, { "epoch": 3.709065655216625, "grad_norm": 0.7969313859939575, "learning_rate": 8.305078273962778e-07, "loss": 0.0057, "step": 347150 }, { "epoch": 3.7091724985309042, "grad_norm": 14.169168472290039, "learning_rate": 8.304952203135541e-07, "loss": 0.0214, "step": 347160 }, { "epoch": 3.709279341845184, "grad_norm": 0.3377029299736023, "learning_rate": 8.304826128576763e-07, "loss": 0.01, "step": 347170 }, { "epoch": 3.7093861851594636, "grad_norm": 0.04073318839073181, "learning_rate": 8.304700050286586e-07, "loss": 0.0008, "step": 347180 }, { "epoch": 3.7094930284737435, "grad_norm": 1.0639188289642334, "learning_rate": 8.304573968265153e-07, "loss": 0.0106, "step": 347190 }, { "epoch": 3.709599871788023, "grad_norm": 0.01924547739326954, "learning_rate": 8.304447882512606e-07, "loss": 0.0005, "step": 347200 }, { "epoch": 3.7097067151023024, "grad_norm": 0.0011759728658944368, "learning_rate": 8.30432179302909e-07, "loss": 0.0096, "step": 347210 }, { "epoch": 3.709813558416582, "grad_norm": 0.0818931832909584, "learning_rate": 8.304195699814744e-07, "loss": 0.0101, "step": 347220 }, { "epoch": 3.709920401730862, "grad_norm": 0.07556819170713425, "learning_rate": 8.304069602869712e-07, "loss": 0.0155, "step": 347230 }, { "epoch": 3.7100272450451413, "grad_norm": 0.13915109634399414, "learning_rate": 8.303943502194136e-07, "loss": 0.0293, "step": 347240 }, { "epoch": 3.710134088359421, "grad_norm": 0.5608089566230774, "learning_rate": 8.303817397788158e-07, "loss": 0.0108, "step": 347250 }, { "epoch": 3.7102409316737006, "grad_norm": 0.1029081717133522, "learning_rate": 8.303691289651922e-07, "loss": 0.0178, "step": 347260 }, { "epoch": 3.71034777498798, "grad_norm": 10.469853401184082, "learning_rate": 8.303565177785567e-07, "loss": 0.0171, "step": 347270 }, { "epoch": 3.7104546183022595, "grad_norm": 0.5953121185302734, "learning_rate": 8.303439062189241e-07, "loss": 0.0413, "step": 347280 }, { "epoch": 3.7105614616165394, "grad_norm": 0.011045542545616627, "learning_rate": 8.30331294286308e-07, "loss": 0.0104, "step": 347290 }, { "epoch": 3.710668304930819, "grad_norm": 0.0009914017282426357, "learning_rate": 8.30318681980723e-07, "loss": 0.0051, "step": 347300 }, { "epoch": 3.710775148245099, "grad_norm": 2.330843210220337, "learning_rate": 8.303060693021834e-07, "loss": 0.0038, "step": 347310 }, { "epoch": 3.7108819915593783, "grad_norm": 5.372509956359863, "learning_rate": 8.302934562507031e-07, "loss": 0.0122, "step": 347320 }, { "epoch": 3.7109888348736577, "grad_norm": 0.041454099118709564, "learning_rate": 8.302808428262968e-07, "loss": 0.0073, "step": 347330 }, { "epoch": 3.711095678187937, "grad_norm": 0.24305881559848785, "learning_rate": 8.302682290289784e-07, "loss": 0.0013, "step": 347340 }, { "epoch": 3.711202521502217, "grad_norm": 0.04282023012638092, "learning_rate": 8.302556148587623e-07, "loss": 0.0353, "step": 347350 }, { "epoch": 3.7113093648164965, "grad_norm": 0.01748938485980034, "learning_rate": 8.302430003156626e-07, "loss": 0.0024, "step": 347360 }, { "epoch": 3.7114162081307764, "grad_norm": 0.2382529377937317, "learning_rate": 8.302303853996935e-07, "loss": 0.0014, "step": 347370 }, { "epoch": 3.711523051445056, "grad_norm": 0.0214398056268692, "learning_rate": 8.302177701108697e-07, "loss": 0.0035, "step": 347380 }, { "epoch": 3.7116298947593354, "grad_norm": 11.737318992614746, "learning_rate": 8.302051544492048e-07, "loss": 0.0137, "step": 347390 }, { "epoch": 3.711736738073615, "grad_norm": 0.015446275472640991, "learning_rate": 8.301925384147135e-07, "loss": 0.0043, "step": 347400 }, { "epoch": 3.7118435813878947, "grad_norm": 0.06776579469442368, "learning_rate": 8.3017992200741e-07, "loss": 0.0249, "step": 347410 }, { "epoch": 3.711950424702174, "grad_norm": 0.006471260916441679, "learning_rate": 8.301673052273083e-07, "loss": 0.0203, "step": 347420 }, { "epoch": 3.712057268016454, "grad_norm": 0.008385295048356056, "learning_rate": 8.301546880744228e-07, "loss": 0.0055, "step": 347430 }, { "epoch": 3.7121641113307335, "grad_norm": 1.0084069967269897, "learning_rate": 8.301420705487678e-07, "loss": 0.014, "step": 347440 }, { "epoch": 3.712270954645013, "grad_norm": 2.9797213077545166, "learning_rate": 8.301294526503575e-07, "loss": 0.0107, "step": 347450 }, { "epoch": 3.7123777979592925, "grad_norm": 0.013642040081322193, "learning_rate": 8.30116834379206e-07, "loss": 0.0006, "step": 347460 }, { "epoch": 3.7124846412735724, "grad_norm": 0.022931596264243126, "learning_rate": 8.301042157353278e-07, "loss": 0.0422, "step": 347470 }, { "epoch": 3.712591484587852, "grad_norm": 0.21619971096515656, "learning_rate": 8.30091596718737e-07, "loss": 0.0232, "step": 347480 }, { "epoch": 3.7126983279021317, "grad_norm": 1.797391653060913, "learning_rate": 8.300789773294478e-07, "loss": 0.0307, "step": 347490 }, { "epoch": 3.712805171216411, "grad_norm": 6.391571044921875, "learning_rate": 8.300663575674746e-07, "loss": 0.0122, "step": 347500 }, { "epoch": 3.7129120145306906, "grad_norm": 0.5050498247146606, "learning_rate": 8.300537374328316e-07, "loss": 0.0035, "step": 347510 }, { "epoch": 3.71301885784497, "grad_norm": 13.104354858398438, "learning_rate": 8.300411169255331e-07, "loss": 0.0397, "step": 347520 }, { "epoch": 3.71312570115925, "grad_norm": 0.0068039861507713795, "learning_rate": 8.30028496045593e-07, "loss": 0.0156, "step": 347530 }, { "epoch": 3.7132325444735295, "grad_norm": 0.002476130146533251, "learning_rate": 8.300158747930261e-07, "loss": 0.013, "step": 347540 }, { "epoch": 3.7133393877878094, "grad_norm": 0.3722478449344635, "learning_rate": 8.300032531678463e-07, "loss": 0.0045, "step": 347550 }, { "epoch": 3.713446231102089, "grad_norm": 0.9687064290046692, "learning_rate": 8.299906311700677e-07, "loss": 0.0023, "step": 347560 }, { "epoch": 3.7135530744163683, "grad_norm": 12.005759239196777, "learning_rate": 8.29978008799705e-07, "loss": 0.0399, "step": 347570 }, { "epoch": 3.7136599177306477, "grad_norm": 1.7948904037475586, "learning_rate": 8.299653860567722e-07, "loss": 0.0097, "step": 347580 }, { "epoch": 3.7137667610449276, "grad_norm": 3.770273447036743, "learning_rate": 8.299527629412834e-07, "loss": 0.0188, "step": 347590 }, { "epoch": 3.713873604359207, "grad_norm": 0.003717664862051606, "learning_rate": 8.299401394532533e-07, "loss": 0.0044, "step": 347600 }, { "epoch": 3.713980447673487, "grad_norm": 3.3951239585876465, "learning_rate": 8.299275155926957e-07, "loss": 0.0327, "step": 347610 }, { "epoch": 3.7140872909877665, "grad_norm": 0.011125771328806877, "learning_rate": 8.299148913596251e-07, "loss": 0.0175, "step": 347620 }, { "epoch": 3.714194134302046, "grad_norm": 12.108652114868164, "learning_rate": 8.299022667540556e-07, "loss": 0.0241, "step": 347630 }, { "epoch": 3.7143009776163254, "grad_norm": 0.1293625682592392, "learning_rate": 8.298896417760017e-07, "loss": 0.0034, "step": 347640 }, { "epoch": 3.7144078209306053, "grad_norm": 3.020639419555664, "learning_rate": 8.298770164254774e-07, "loss": 0.0074, "step": 347650 }, { "epoch": 3.7145146642448847, "grad_norm": 1.5030670166015625, "learning_rate": 8.29864390702497e-07, "loss": 0.0108, "step": 347660 }, { "epoch": 3.7146215075591646, "grad_norm": 0.0448494516313076, "learning_rate": 8.29851764607075e-07, "loss": 0.005, "step": 347670 }, { "epoch": 3.714728350873444, "grad_norm": 13.447779655456543, "learning_rate": 8.298391381392252e-07, "loss": 0.0461, "step": 347680 }, { "epoch": 3.7148351941877236, "grad_norm": 0.09296205639839172, "learning_rate": 8.298265112989623e-07, "loss": 0.019, "step": 347690 }, { "epoch": 3.7149420375020035, "grad_norm": 0.005883307196199894, "learning_rate": 8.298138840863002e-07, "loss": 0.0149, "step": 347700 }, { "epoch": 3.715048880816283, "grad_norm": 2.4212961196899414, "learning_rate": 8.298012565012535e-07, "loss": 0.019, "step": 347710 }, { "epoch": 3.7151557241305624, "grad_norm": 0.11601810902357101, "learning_rate": 8.297886285438363e-07, "loss": 0.0012, "step": 347720 }, { "epoch": 3.7152625674448423, "grad_norm": 1.4494953155517578, "learning_rate": 8.297760002140628e-07, "loss": 0.0081, "step": 347730 }, { "epoch": 3.7153694107591217, "grad_norm": 0.7014479041099548, "learning_rate": 8.297633715119474e-07, "loss": 0.002, "step": 347740 }, { "epoch": 3.715476254073401, "grad_norm": 0.022065751254558563, "learning_rate": 8.297507424375041e-07, "loss": 0.0021, "step": 347750 }, { "epoch": 3.715583097387681, "grad_norm": 0.11075393110513687, "learning_rate": 8.297381129907474e-07, "loss": 0.0206, "step": 347760 }, { "epoch": 3.7156899407019606, "grad_norm": 6.329326152801514, "learning_rate": 8.297254831716913e-07, "loss": 0.0025, "step": 347770 }, { "epoch": 3.71579678401624, "grad_norm": 3.044840097427368, "learning_rate": 8.297128529803505e-07, "loss": 0.0144, "step": 347780 }, { "epoch": 3.71590362733052, "grad_norm": 0.0035648406483232975, "learning_rate": 8.29700222416739e-07, "loss": 0.0006, "step": 347790 }, { "epoch": 3.7160104706447994, "grad_norm": 0.4461616277694702, "learning_rate": 8.296875914808709e-07, "loss": 0.0478, "step": 347800 }, { "epoch": 3.716117313959079, "grad_norm": 1.8865283727645874, "learning_rate": 8.296749601727608e-07, "loss": 0.0049, "step": 347810 }, { "epoch": 3.7162241572733588, "grad_norm": 0.05557958036661148, "learning_rate": 8.296623284924227e-07, "loss": 0.0191, "step": 347820 }, { "epoch": 3.716331000587638, "grad_norm": 0.011473658494651318, "learning_rate": 8.296496964398709e-07, "loss": 0.0195, "step": 347830 }, { "epoch": 3.716437843901918, "grad_norm": 0.016176197677850723, "learning_rate": 8.296370640151198e-07, "loss": 0.0144, "step": 347840 }, { "epoch": 3.7165446872161976, "grad_norm": 0.010572375729680061, "learning_rate": 8.296244312181835e-07, "loss": 0.0005, "step": 347850 }, { "epoch": 3.716651530530477, "grad_norm": 0.01740848645567894, "learning_rate": 8.296117980490764e-07, "loss": 0.0164, "step": 347860 }, { "epoch": 3.7167583738447565, "grad_norm": 0.0419788621366024, "learning_rate": 8.295991645078127e-07, "loss": 0.0022, "step": 347870 }, { "epoch": 3.7168652171590364, "grad_norm": 3.7451155185699463, "learning_rate": 8.295865305944068e-07, "loss": 0.0098, "step": 347880 }, { "epoch": 3.716972060473316, "grad_norm": 0.8581318259239197, "learning_rate": 8.295738963088725e-07, "loss": 0.0089, "step": 347890 }, { "epoch": 3.7170789037875958, "grad_norm": 0.03812224790453911, "learning_rate": 8.295612616512246e-07, "loss": 0.0097, "step": 347900 }, { "epoch": 3.717185747101875, "grad_norm": 0.019168779253959656, "learning_rate": 8.295486266214771e-07, "loss": 0.0083, "step": 347910 }, { "epoch": 3.7172925904161547, "grad_norm": 0.5831024050712585, "learning_rate": 8.295359912196445e-07, "loss": 0.0411, "step": 347920 }, { "epoch": 3.717399433730434, "grad_norm": 11.140621185302734, "learning_rate": 8.295233554457407e-07, "loss": 0.0304, "step": 347930 }, { "epoch": 3.717506277044714, "grad_norm": 3.2333459854125977, "learning_rate": 8.295107192997803e-07, "loss": 0.009, "step": 347940 }, { "epoch": 3.7176131203589935, "grad_norm": 1.5103555917739868, "learning_rate": 8.294980827817772e-07, "loss": 0.0046, "step": 347950 }, { "epoch": 3.7177199636732734, "grad_norm": 0.04555800184607506, "learning_rate": 8.294854458917462e-07, "loss": 0.0123, "step": 347960 }, { "epoch": 3.717826806987553, "grad_norm": 0.037077195942401886, "learning_rate": 8.29472808629701e-07, "loss": 0.0236, "step": 347970 }, { "epoch": 3.7179336503018323, "grad_norm": 0.9980065822601318, "learning_rate": 8.294601709956563e-07, "loss": 0.0005, "step": 347980 }, { "epoch": 3.7180404936161118, "grad_norm": 0.011218562722206116, "learning_rate": 8.294475329896262e-07, "loss": 0.0046, "step": 347990 }, { "epoch": 3.7181473369303917, "grad_norm": 0.018611522391438484, "learning_rate": 8.294348946116248e-07, "loss": 0.0427, "step": 348000 }, { "epoch": 3.718254180244671, "grad_norm": 0.594289243221283, "learning_rate": 8.294222558616667e-07, "loss": 0.0134, "step": 348010 }, { "epoch": 3.718361023558951, "grad_norm": 3.7163608074188232, "learning_rate": 8.29409616739766e-07, "loss": 0.017, "step": 348020 }, { "epoch": 3.7184678668732305, "grad_norm": 1.5469919443130493, "learning_rate": 8.29396977245937e-07, "loss": 0.0057, "step": 348030 }, { "epoch": 3.71857471018751, "grad_norm": 0.2908320128917694, "learning_rate": 8.293843373801939e-07, "loss": 0.0148, "step": 348040 }, { "epoch": 3.7186815535017894, "grad_norm": 0.01983458921313286, "learning_rate": 8.293716971425511e-07, "loss": 0.0007, "step": 348050 }, { "epoch": 3.7187883968160693, "grad_norm": 2.2705886363983154, "learning_rate": 8.293590565330227e-07, "loss": 0.0333, "step": 348060 }, { "epoch": 3.718895240130349, "grad_norm": 0.018348008394241333, "learning_rate": 8.29346415551623e-07, "loss": 0.0022, "step": 348070 }, { "epoch": 3.7190020834446287, "grad_norm": 0.004014434292912483, "learning_rate": 8.293337741983665e-07, "loss": 0.007, "step": 348080 }, { "epoch": 3.719108926758908, "grad_norm": 0.2813454270362854, "learning_rate": 8.293211324732672e-07, "loss": 0.0023, "step": 348090 }, { "epoch": 3.7192157700731876, "grad_norm": 0.016267165541648865, "learning_rate": 8.293084903763395e-07, "loss": 0.0358, "step": 348100 }, { "epoch": 3.719322613387467, "grad_norm": 7.9048638343811035, "learning_rate": 8.292958479075977e-07, "loss": 0.0066, "step": 348110 }, { "epoch": 3.719429456701747, "grad_norm": 0.17209933698177338, "learning_rate": 8.29283205067056e-07, "loss": 0.0082, "step": 348120 }, { "epoch": 3.7195363000160264, "grad_norm": 0.030518202111124992, "learning_rate": 8.292705618547288e-07, "loss": 0.0116, "step": 348130 }, { "epoch": 3.7196431433303063, "grad_norm": 0.00629169587045908, "learning_rate": 8.292579182706302e-07, "loss": 0.0207, "step": 348140 }, { "epoch": 3.719749986644586, "grad_norm": 0.10335104912519455, "learning_rate": 8.292452743147746e-07, "loss": 0.0314, "step": 348150 }, { "epoch": 3.7198568299588652, "grad_norm": 0.008388321846723557, "learning_rate": 8.292326299871762e-07, "loss": 0.0029, "step": 348160 }, { "epoch": 3.7199636732731447, "grad_norm": 0.21680869162082672, "learning_rate": 8.292199852878493e-07, "loss": 0.0396, "step": 348170 }, { "epoch": 3.7200705165874246, "grad_norm": 0.006272049620747566, "learning_rate": 8.292073402168082e-07, "loss": 0.005, "step": 348180 }, { "epoch": 3.720177359901704, "grad_norm": 10.171079635620117, "learning_rate": 8.291946947740673e-07, "loss": 0.0182, "step": 348190 }, { "epoch": 3.720284203215984, "grad_norm": 0.005547335371375084, "learning_rate": 8.291820489596405e-07, "loss": 0.0044, "step": 348200 }, { "epoch": 3.7203910465302634, "grad_norm": 0.01672343723475933, "learning_rate": 8.291694027735426e-07, "loss": 0.0101, "step": 348210 }, { "epoch": 3.720497889844543, "grad_norm": 0.003424419555813074, "learning_rate": 8.291567562157873e-07, "loss": 0.028, "step": 348220 }, { "epoch": 3.7206047331588223, "grad_norm": 0.11579558253288269, "learning_rate": 8.291441092863893e-07, "loss": 0.0167, "step": 348230 }, { "epoch": 3.7207115764731022, "grad_norm": 4.7682785987854, "learning_rate": 8.291314619853629e-07, "loss": 0.0066, "step": 348240 }, { "epoch": 3.7208184197873817, "grad_norm": 0.013270179741084576, "learning_rate": 8.291188143127221e-07, "loss": 0.0099, "step": 348250 }, { "epoch": 3.7209252631016616, "grad_norm": 0.0065519982017576694, "learning_rate": 8.291061662684813e-07, "loss": 0.0024, "step": 348260 }, { "epoch": 3.721032106415941, "grad_norm": 0.019290028139948845, "learning_rate": 8.290935178526548e-07, "loss": 0.0139, "step": 348270 }, { "epoch": 3.7211389497302205, "grad_norm": 10.597886085510254, "learning_rate": 8.290808690652571e-07, "loss": 0.0212, "step": 348280 }, { "epoch": 3.7212457930445, "grad_norm": 0.009418481960892677, "learning_rate": 8.290682199063019e-07, "loss": 0.0055, "step": 348290 }, { "epoch": 3.72135263635878, "grad_norm": 5.950762748718262, "learning_rate": 8.290555703758042e-07, "loss": 0.0049, "step": 348300 }, { "epoch": 3.7214594796730593, "grad_norm": 3.766221523284912, "learning_rate": 8.290429204737777e-07, "loss": 0.0258, "step": 348310 }, { "epoch": 3.7215663229873392, "grad_norm": 0.0165549423545599, "learning_rate": 8.290302702002369e-07, "loss": 0.0144, "step": 348320 }, { "epoch": 3.7216731663016187, "grad_norm": 5.066727161407471, "learning_rate": 8.290176195551961e-07, "loss": 0.0119, "step": 348330 }, { "epoch": 3.721780009615898, "grad_norm": 2.2128219604492188, "learning_rate": 8.290049685386697e-07, "loss": 0.0066, "step": 348340 }, { "epoch": 3.7218868529301776, "grad_norm": 0.8356796503067017, "learning_rate": 8.289923171506718e-07, "loss": 0.0329, "step": 348350 }, { "epoch": 3.7219936962444575, "grad_norm": 0.0032942453399300575, "learning_rate": 8.289796653912167e-07, "loss": 0.0075, "step": 348360 }, { "epoch": 3.722100539558737, "grad_norm": 5.7143025398254395, "learning_rate": 8.289670132603189e-07, "loss": 0.0172, "step": 348370 }, { "epoch": 3.722207382873017, "grad_norm": 0.0695076510310173, "learning_rate": 8.289543607579923e-07, "loss": 0.0008, "step": 348380 }, { "epoch": 3.7223142261872963, "grad_norm": 0.06685487926006317, "learning_rate": 8.289417078842516e-07, "loss": 0.0336, "step": 348390 }, { "epoch": 3.722421069501576, "grad_norm": 4.938605308532715, "learning_rate": 8.289290546391106e-07, "loss": 0.0091, "step": 348400 }, { "epoch": 3.7225279128158557, "grad_norm": 0.002584442961961031, "learning_rate": 8.289164010225841e-07, "loss": 0.0216, "step": 348410 }, { "epoch": 3.722634756130135, "grad_norm": 6.827962875366211, "learning_rate": 8.289037470346862e-07, "loss": 0.0134, "step": 348420 }, { "epoch": 3.7227415994444146, "grad_norm": 0.22216789424419403, "learning_rate": 8.288910926754308e-07, "loss": 0.0367, "step": 348430 }, { "epoch": 3.7228484427586945, "grad_norm": 0.004075887147337198, "learning_rate": 8.288784379448329e-07, "loss": 0.0132, "step": 348440 }, { "epoch": 3.722955286072974, "grad_norm": 0.010296111926436424, "learning_rate": 8.288657828429064e-07, "loss": 0.0104, "step": 348450 }, { "epoch": 3.7230621293872534, "grad_norm": 3.3615949153900146, "learning_rate": 8.288531273696654e-07, "loss": 0.0135, "step": 348460 }, { "epoch": 3.7231689727015334, "grad_norm": 0.08549751341342926, "learning_rate": 8.288404715251245e-07, "loss": 0.0108, "step": 348470 }, { "epoch": 3.723275816015813, "grad_norm": 0.10184509307146072, "learning_rate": 8.288278153092979e-07, "loss": 0.0221, "step": 348480 }, { "epoch": 3.7233826593300923, "grad_norm": 1.1430562734603882, "learning_rate": 8.288151587221999e-07, "loss": 0.0042, "step": 348490 }, { "epoch": 3.723489502644372, "grad_norm": 0.2820937931537628, "learning_rate": 8.288025017638447e-07, "loss": 0.0042, "step": 348500 }, { "epoch": 3.7235963459586516, "grad_norm": 1.0182517766952515, "learning_rate": 8.287898444342467e-07, "loss": 0.0018, "step": 348510 }, { "epoch": 3.723703189272931, "grad_norm": 1.7227462530136108, "learning_rate": 8.287771867334202e-07, "loss": 0.0189, "step": 348520 }, { "epoch": 3.723810032587211, "grad_norm": 2.485494613647461, "learning_rate": 8.287645286613794e-07, "loss": 0.0283, "step": 348530 }, { "epoch": 3.7239168759014905, "grad_norm": 0.012243046425282955, "learning_rate": 8.287518702181386e-07, "loss": 0.011, "step": 348540 }, { "epoch": 3.72402371921577, "grad_norm": 0.14511367678642273, "learning_rate": 8.287392114037121e-07, "loss": 0.0038, "step": 348550 }, { "epoch": 3.72413056253005, "grad_norm": 9.36030101776123, "learning_rate": 8.287265522181143e-07, "loss": 0.0245, "step": 348560 }, { "epoch": 3.7242374058443293, "grad_norm": 5.895375728607178, "learning_rate": 8.287138926613595e-07, "loss": 0.0107, "step": 348570 }, { "epoch": 3.7243442491586087, "grad_norm": 0.003109462559223175, "learning_rate": 8.287012327334617e-07, "loss": 0.0195, "step": 348580 }, { "epoch": 3.7244510924728886, "grad_norm": 4.121935844421387, "learning_rate": 8.286885724344355e-07, "loss": 0.0134, "step": 348590 }, { "epoch": 3.724557935787168, "grad_norm": 0.01521831750869751, "learning_rate": 8.286759117642951e-07, "loss": 0.0068, "step": 348600 }, { "epoch": 3.724664779101448, "grad_norm": 0.003364727133885026, "learning_rate": 8.286632507230548e-07, "loss": 0.0107, "step": 348610 }, { "epoch": 3.7247716224157275, "grad_norm": 0.48982304334640503, "learning_rate": 8.286505893107289e-07, "loss": 0.0064, "step": 348620 }, { "epoch": 3.724878465730007, "grad_norm": 0.903091311454773, "learning_rate": 8.286379275273316e-07, "loss": 0.0109, "step": 348630 }, { "epoch": 3.7249853090442864, "grad_norm": 3.7558388710021973, "learning_rate": 8.286252653728774e-07, "loss": 0.0115, "step": 348640 }, { "epoch": 3.7250921523585663, "grad_norm": 0.025857344269752502, "learning_rate": 8.286126028473803e-07, "loss": 0.016, "step": 348650 }, { "epoch": 3.7251989956728457, "grad_norm": 0.0892999917268753, "learning_rate": 8.285999399508548e-07, "loss": 0.0014, "step": 348660 }, { "epoch": 3.7253058389871256, "grad_norm": 0.004628762137144804, "learning_rate": 8.285872766833152e-07, "loss": 0.0292, "step": 348670 }, { "epoch": 3.725412682301405, "grad_norm": 1.860656976699829, "learning_rate": 8.28574613044776e-07, "loss": 0.0124, "step": 348680 }, { "epoch": 3.7255195256156846, "grad_norm": 0.006144962273538113, "learning_rate": 8.285619490352509e-07, "loss": 0.0192, "step": 348690 }, { "epoch": 3.725626368929964, "grad_norm": 0.14164070785045624, "learning_rate": 8.285492846547548e-07, "loss": 0.0139, "step": 348700 }, { "epoch": 3.725733212244244, "grad_norm": 0.04125818610191345, "learning_rate": 8.285366199033017e-07, "loss": 0.0077, "step": 348710 }, { "epoch": 3.7258400555585234, "grad_norm": 4.749935150146484, "learning_rate": 8.28523954780906e-07, "loss": 0.0129, "step": 348720 }, { "epoch": 3.7259468988728033, "grad_norm": 5.403761863708496, "learning_rate": 8.285112892875818e-07, "loss": 0.0139, "step": 348730 }, { "epoch": 3.7260537421870827, "grad_norm": 0.009031028486788273, "learning_rate": 8.284986234233436e-07, "loss": 0.0127, "step": 348740 }, { "epoch": 3.726160585501362, "grad_norm": 0.0103171830996871, "learning_rate": 8.284859571882058e-07, "loss": 0.0128, "step": 348750 }, { "epoch": 3.7262674288156417, "grad_norm": 0.5204407572746277, "learning_rate": 8.284732905821826e-07, "loss": 0.0254, "step": 348760 }, { "epoch": 3.7263742721299216, "grad_norm": 0.006402760744094849, "learning_rate": 8.28460623605288e-07, "loss": 0.0242, "step": 348770 }, { "epoch": 3.726481115444201, "grad_norm": 1.1143224239349365, "learning_rate": 8.284479562575369e-07, "loss": 0.0099, "step": 348780 }, { "epoch": 3.726587958758481, "grad_norm": 0.07348073273897171, "learning_rate": 8.284352885389429e-07, "loss": 0.0018, "step": 348790 }, { "epoch": 3.7266948020727604, "grad_norm": 0.0064530824311077595, "learning_rate": 8.284226204495208e-07, "loss": 0.0206, "step": 348800 }, { "epoch": 3.72680164538704, "grad_norm": 0.5420495271682739, "learning_rate": 8.284099519892849e-07, "loss": 0.0131, "step": 348810 }, { "epoch": 3.7269084887013193, "grad_norm": 0.044264327734708786, "learning_rate": 8.283972831582493e-07, "loss": 0.0287, "step": 348820 }, { "epoch": 3.727015332015599, "grad_norm": 0.011753110215067863, "learning_rate": 8.283846139564284e-07, "loss": 0.0234, "step": 348830 }, { "epoch": 3.7271221753298787, "grad_norm": 0.011926785111427307, "learning_rate": 8.283719443838365e-07, "loss": 0.0229, "step": 348840 }, { "epoch": 3.7272290186441586, "grad_norm": 1.7249144315719604, "learning_rate": 8.283592744404879e-07, "loss": 0.0157, "step": 348850 }, { "epoch": 3.727335861958438, "grad_norm": 8.131397247314453, "learning_rate": 8.283466041263967e-07, "loss": 0.0187, "step": 348860 }, { "epoch": 3.7274427052727175, "grad_norm": 3.133786201477051, "learning_rate": 8.283339334415778e-07, "loss": 0.0135, "step": 348870 }, { "epoch": 3.727549548586997, "grad_norm": 3.6387906074523926, "learning_rate": 8.283212623860448e-07, "loss": 0.0042, "step": 348880 }, { "epoch": 3.727656391901277, "grad_norm": 0.17724210023880005, "learning_rate": 8.283085909598124e-07, "loss": 0.0021, "step": 348890 }, { "epoch": 3.7277632352155563, "grad_norm": 0.11871287226676941, "learning_rate": 8.282959191628949e-07, "loss": 0.0189, "step": 348900 }, { "epoch": 3.727870078529836, "grad_norm": 0.14386533200740814, "learning_rate": 8.282832469953064e-07, "loss": 0.0084, "step": 348910 }, { "epoch": 3.7279769218441157, "grad_norm": 0.11041449755430222, "learning_rate": 8.282705744570613e-07, "loss": 0.0702, "step": 348920 }, { "epoch": 3.728083765158395, "grad_norm": 0.06290094554424286, "learning_rate": 8.282579015481741e-07, "loss": 0.0027, "step": 348930 }, { "epoch": 3.7281906084726746, "grad_norm": 0.022192085161805153, "learning_rate": 8.282452282686588e-07, "loss": 0.0021, "step": 348940 }, { "epoch": 3.7282974517869545, "grad_norm": 0.0016205162974074483, "learning_rate": 8.2823255461853e-07, "loss": 0.0406, "step": 348950 }, { "epoch": 3.728404295101234, "grad_norm": 1.0011743307113647, "learning_rate": 8.282198805978018e-07, "loss": 0.009, "step": 348960 }, { "epoch": 3.728511138415514, "grad_norm": 2.318955659866333, "learning_rate": 8.282072062064886e-07, "loss": 0.015, "step": 348970 }, { "epoch": 3.7286179817297933, "grad_norm": 0.2157634198665619, "learning_rate": 8.281945314446046e-07, "loss": 0.0209, "step": 348980 }, { "epoch": 3.7287248250440728, "grad_norm": 1.9037960767745972, "learning_rate": 8.281818563121643e-07, "loss": 0.0812, "step": 348990 }, { "epoch": 3.7288316683583522, "grad_norm": 0.009326384402811527, "learning_rate": 8.28169180809182e-07, "loss": 0.0066, "step": 349000 }, { "epoch": 3.728938511672632, "grad_norm": 0.011605153791606426, "learning_rate": 8.281565049356718e-07, "loss": 0.018, "step": 349010 }, { "epoch": 3.7290453549869116, "grad_norm": 0.04078562185168266, "learning_rate": 8.281438286916483e-07, "loss": 0.0058, "step": 349020 }, { "epoch": 3.7291521983011915, "grad_norm": 0.07645019888877869, "learning_rate": 8.281311520771255e-07, "loss": 0.0101, "step": 349030 }, { "epoch": 3.729259041615471, "grad_norm": 0.019065886735916138, "learning_rate": 8.281184750921178e-07, "loss": 0.0025, "step": 349040 }, { "epoch": 3.7293658849297504, "grad_norm": 0.0035067421849817038, "learning_rate": 8.281057977366397e-07, "loss": 0.0027, "step": 349050 }, { "epoch": 3.72947272824403, "grad_norm": 1.1570210456848145, "learning_rate": 8.280931200107053e-07, "loss": 0.062, "step": 349060 }, { "epoch": 3.7295795715583098, "grad_norm": 0.08663249760866165, "learning_rate": 8.280804419143291e-07, "loss": 0.0136, "step": 349070 }, { "epoch": 3.7296864148725892, "grad_norm": 1.172865390777588, "learning_rate": 8.280677634475253e-07, "loss": 0.0019, "step": 349080 }, { "epoch": 3.729793258186869, "grad_norm": 3.043306589126587, "learning_rate": 8.280550846103083e-07, "loss": 0.0094, "step": 349090 }, { "epoch": 3.7299001015011486, "grad_norm": 0.015343977138400078, "learning_rate": 8.280424054026922e-07, "loss": 0.0377, "step": 349100 }, { "epoch": 3.730006944815428, "grad_norm": 4.677877902984619, "learning_rate": 8.280297258246916e-07, "loss": 0.0162, "step": 349110 }, { "epoch": 3.7301137881297075, "grad_norm": 0.006097590550780296, "learning_rate": 8.280170458763204e-07, "loss": 0.0235, "step": 349120 }, { "epoch": 3.7302206314439874, "grad_norm": 1.0602912902832031, "learning_rate": 8.280043655575935e-07, "loss": 0.0173, "step": 349130 }, { "epoch": 3.730327474758267, "grad_norm": 4.435784816741943, "learning_rate": 8.279916848685249e-07, "loss": 0.0232, "step": 349140 }, { "epoch": 3.7304343180725468, "grad_norm": 7.471813201904297, "learning_rate": 8.279790038091288e-07, "loss": 0.0254, "step": 349150 }, { "epoch": 3.7305411613868262, "grad_norm": 0.8222282528877258, "learning_rate": 8.279663223794198e-07, "loss": 0.0033, "step": 349160 }, { "epoch": 3.7306480047011057, "grad_norm": 1.9172921180725098, "learning_rate": 8.279536405794118e-07, "loss": 0.032, "step": 349170 }, { "epoch": 3.7307548480153856, "grad_norm": 3.927708864212036, "learning_rate": 8.279409584091197e-07, "loss": 0.0093, "step": 349180 }, { "epoch": 3.730861691329665, "grad_norm": 0.01322950143367052, "learning_rate": 8.279282758685576e-07, "loss": 0.0055, "step": 349190 }, { "epoch": 3.7309685346439445, "grad_norm": 0.0026186967734247446, "learning_rate": 8.279155929577393e-07, "loss": 0.01, "step": 349200 }, { "epoch": 3.7310753779582244, "grad_norm": 0.2291872203350067, "learning_rate": 8.279029096766798e-07, "loss": 0.016, "step": 349210 }, { "epoch": 3.731182221272504, "grad_norm": 15.054834365844727, "learning_rate": 8.278902260253932e-07, "loss": 0.0133, "step": 349220 }, { "epoch": 3.7312890645867833, "grad_norm": 0.02811352349817753, "learning_rate": 8.278775420038936e-07, "loss": 0.0085, "step": 349230 }, { "epoch": 3.7313959079010632, "grad_norm": 0.1832568198442459, "learning_rate": 8.278648576121956e-07, "loss": 0.0362, "step": 349240 }, { "epoch": 3.7315027512153427, "grad_norm": 8.1603422164917, "learning_rate": 8.278521728503135e-07, "loss": 0.0111, "step": 349250 }, { "epoch": 3.731609594529622, "grad_norm": 0.14052292704582214, "learning_rate": 8.278394877182615e-07, "loss": 0.0232, "step": 349260 }, { "epoch": 3.731716437843902, "grad_norm": 0.09518719464540482, "learning_rate": 8.278268022160539e-07, "loss": 0.0105, "step": 349270 }, { "epoch": 3.7318232811581815, "grad_norm": 0.07498994469642639, "learning_rate": 8.278141163437053e-07, "loss": 0.0165, "step": 349280 }, { "epoch": 3.731930124472461, "grad_norm": 1.243642807006836, "learning_rate": 8.278014301012296e-07, "loss": 0.0238, "step": 349290 }, { "epoch": 3.732036967786741, "grad_norm": 5.383902549743652, "learning_rate": 8.277887434886415e-07, "loss": 0.0172, "step": 349300 }, { "epoch": 3.7321438111010203, "grad_norm": 0.33683493733406067, "learning_rate": 8.27776056505955e-07, "loss": 0.001, "step": 349310 }, { "epoch": 3.7322506544153002, "grad_norm": 0.43711864948272705, "learning_rate": 8.277633691531848e-07, "loss": 0.0111, "step": 349320 }, { "epoch": 3.7323574977295797, "grad_norm": 0.024432361125946045, "learning_rate": 8.27750681430345e-07, "loss": 0.0271, "step": 349330 }, { "epoch": 3.732464341043859, "grad_norm": 0.20246683061122894, "learning_rate": 8.277379933374498e-07, "loss": 0.0196, "step": 349340 }, { "epoch": 3.7325711843581386, "grad_norm": 0.16220548748970032, "learning_rate": 8.277253048745138e-07, "loss": 0.0352, "step": 349350 }, { "epoch": 3.7326780276724185, "grad_norm": 1.745798110961914, "learning_rate": 8.27712616041551e-07, "loss": 0.0049, "step": 349360 }, { "epoch": 3.732784870986698, "grad_norm": 8.094011306762695, "learning_rate": 8.27699926838576e-07, "loss": 0.0036, "step": 349370 }, { "epoch": 3.732891714300978, "grad_norm": 3.2291810512542725, "learning_rate": 8.276872372656032e-07, "loss": 0.0156, "step": 349380 }, { "epoch": 3.7329985576152573, "grad_norm": 0.0040902793407440186, "learning_rate": 8.276745473226467e-07, "loss": 0.0101, "step": 349390 }, { "epoch": 3.733105400929537, "grad_norm": 2.237196922302246, "learning_rate": 8.276618570097209e-07, "loss": 0.0023, "step": 349400 }, { "epoch": 3.7332122442438163, "grad_norm": 0.006302134599536657, "learning_rate": 8.276491663268401e-07, "loss": 0.019, "step": 349410 }, { "epoch": 3.733319087558096, "grad_norm": 0.7089686989784241, "learning_rate": 8.276364752740189e-07, "loss": 0.0017, "step": 349420 }, { "epoch": 3.7334259308723756, "grad_norm": 9.264291763305664, "learning_rate": 8.27623783851271e-07, "loss": 0.0457, "step": 349430 }, { "epoch": 3.7335327741866555, "grad_norm": 2.7012534141540527, "learning_rate": 8.276110920586112e-07, "loss": 0.0549, "step": 349440 }, { "epoch": 3.733639617500935, "grad_norm": 2.7308366298675537, "learning_rate": 8.27598399896054e-07, "loss": 0.0038, "step": 349450 }, { "epoch": 3.7337464608152144, "grad_norm": 3.0156426429748535, "learning_rate": 8.275857073636132e-07, "loss": 0.0083, "step": 349460 }, { "epoch": 3.733853304129494, "grad_norm": 0.002810067730024457, "learning_rate": 8.275730144613035e-07, "loss": 0.0001, "step": 349470 }, { "epoch": 3.733960147443774, "grad_norm": 2.8529367446899414, "learning_rate": 8.275603211891394e-07, "loss": 0.0175, "step": 349480 }, { "epoch": 3.7340669907580533, "grad_norm": 0.13064835965633392, "learning_rate": 8.275476275471347e-07, "loss": 0.0073, "step": 349490 }, { "epoch": 3.734173834072333, "grad_norm": 0.19301946461200714, "learning_rate": 8.275349335353042e-07, "loss": 0.0012, "step": 349500 }, { "epoch": 3.7342806773866126, "grad_norm": 0.9775864481925964, "learning_rate": 8.275222391536617e-07, "loss": 0.0205, "step": 349510 }, { "epoch": 3.734387520700892, "grad_norm": 6.9402875900268555, "learning_rate": 8.275095444022222e-07, "loss": 0.1062, "step": 349520 }, { "epoch": 3.7344943640151715, "grad_norm": 0.027917344123125076, "learning_rate": 8.274968492809995e-07, "loss": 0.015, "step": 349530 }, { "epoch": 3.7346012073294514, "grad_norm": 11.956697463989258, "learning_rate": 8.274841537900083e-07, "loss": 0.0268, "step": 349540 }, { "epoch": 3.734708050643731, "grad_norm": 0.4008175730705261, "learning_rate": 8.274714579292626e-07, "loss": 0.0041, "step": 349550 }, { "epoch": 3.734814893958011, "grad_norm": 0.07588349282741547, "learning_rate": 8.274587616987771e-07, "loss": 0.0021, "step": 349560 }, { "epoch": 3.7349217372722903, "grad_norm": 0.023385990411043167, "learning_rate": 8.274460650985659e-07, "loss": 0.0025, "step": 349570 }, { "epoch": 3.7350285805865697, "grad_norm": 9.151773452758789, "learning_rate": 8.274333681286433e-07, "loss": 0.0649, "step": 349580 }, { "epoch": 3.735135423900849, "grad_norm": 0.08112696558237076, "learning_rate": 8.274206707890237e-07, "loss": 0.0042, "step": 349590 }, { "epoch": 3.735242267215129, "grad_norm": 0.012569296173751354, "learning_rate": 8.274079730797215e-07, "loss": 0.008, "step": 349600 }, { "epoch": 3.7353491105294085, "grad_norm": 0.0013893578434363008, "learning_rate": 8.27395275000751e-07, "loss": 0.0202, "step": 349610 }, { "epoch": 3.7354559538436884, "grad_norm": 0.14068201184272766, "learning_rate": 8.273825765521265e-07, "loss": 0.0532, "step": 349620 }, { "epoch": 3.735562797157968, "grad_norm": 6.0057454109191895, "learning_rate": 8.273698777338624e-07, "loss": 0.072, "step": 349630 }, { "epoch": 3.7356696404722474, "grad_norm": 0.18111170828342438, "learning_rate": 8.27357178545973e-07, "loss": 0.0092, "step": 349640 }, { "epoch": 3.735776483786527, "grad_norm": 0.27823758125305176, "learning_rate": 8.273444789884726e-07, "loss": 0.0098, "step": 349650 }, { "epoch": 3.7358833271008067, "grad_norm": 0.20524385571479797, "learning_rate": 8.273317790613757e-07, "loss": 0.0168, "step": 349660 }, { "epoch": 3.735990170415086, "grad_norm": 0.015714138746261597, "learning_rate": 8.273190787646963e-07, "loss": 0.0085, "step": 349670 }, { "epoch": 3.736097013729366, "grad_norm": 0.003960771486163139, "learning_rate": 8.27306378098449e-07, "loss": 0.0034, "step": 349680 }, { "epoch": 3.7362038570436455, "grad_norm": 3.733013868331909, "learning_rate": 8.272936770626482e-07, "loss": 0.0307, "step": 349690 }, { "epoch": 3.736310700357925, "grad_norm": 6.244950771331787, "learning_rate": 8.272809756573082e-07, "loss": 0.023, "step": 349700 }, { "epoch": 3.7364175436722045, "grad_norm": 0.00913986936211586, "learning_rate": 8.272682738824431e-07, "loss": 0.0148, "step": 349710 }, { "epoch": 3.7365243869864844, "grad_norm": 0.019961940124630928, "learning_rate": 8.272555717380674e-07, "loss": 0.0021, "step": 349720 }, { "epoch": 3.736631230300764, "grad_norm": 0.48384755849838257, "learning_rate": 8.272428692241956e-07, "loss": 0.0113, "step": 349730 }, { "epoch": 3.7367380736150437, "grad_norm": 0.18363149464130402, "learning_rate": 8.272301663408419e-07, "loss": 0.0103, "step": 349740 }, { "epoch": 3.736844916929323, "grad_norm": 2.490968704223633, "learning_rate": 8.272174630880206e-07, "loss": 0.0067, "step": 349750 }, { "epoch": 3.7369517602436026, "grad_norm": 0.08063462376594543, "learning_rate": 8.27204759465746e-07, "loss": 0.0087, "step": 349760 }, { "epoch": 3.737058603557882, "grad_norm": 0.11137719452381134, "learning_rate": 8.271920554740325e-07, "loss": 0.0128, "step": 349770 }, { "epoch": 3.737165446872162, "grad_norm": 2.4089579582214355, "learning_rate": 8.271793511128948e-07, "loss": 0.0351, "step": 349780 }, { "epoch": 3.7372722901864415, "grad_norm": 0.06330502033233643, "learning_rate": 8.271666463823468e-07, "loss": 0.006, "step": 349790 }, { "epoch": 3.7373791335007214, "grad_norm": 0.1361032873392105, "learning_rate": 8.271539412824028e-07, "loss": 0.0002, "step": 349800 }, { "epoch": 3.737485976815001, "grad_norm": 0.07136135548353195, "learning_rate": 8.271412358130774e-07, "loss": 0.0102, "step": 349810 }, { "epoch": 3.7375928201292803, "grad_norm": 0.011848683468997478, "learning_rate": 8.271285299743849e-07, "loss": 0.0012, "step": 349820 }, { "epoch": 3.7376996634435597, "grad_norm": 0.01828075759112835, "learning_rate": 8.271158237663394e-07, "loss": 0.0324, "step": 349830 }, { "epoch": 3.7378065067578397, "grad_norm": 2.082014322280884, "learning_rate": 8.271031171889557e-07, "loss": 0.0069, "step": 349840 }, { "epoch": 3.737913350072119, "grad_norm": 0.024375054985284805, "learning_rate": 8.270904102422478e-07, "loss": 0.0078, "step": 349850 }, { "epoch": 3.738020193386399, "grad_norm": 14.998291969299316, "learning_rate": 8.270777029262301e-07, "loss": 0.03, "step": 349860 }, { "epoch": 3.7381270367006785, "grad_norm": 0.45393651723861694, "learning_rate": 8.27064995240917e-07, "loss": 0.0011, "step": 349870 }, { "epoch": 3.738233880014958, "grad_norm": 0.9036996960639954, "learning_rate": 8.270522871863228e-07, "loss": 0.0087, "step": 349880 }, { "epoch": 3.738340723329238, "grad_norm": 0.06094910204410553, "learning_rate": 8.27039578762462e-07, "loss": 0.0056, "step": 349890 }, { "epoch": 3.7384475666435173, "grad_norm": 1.7845704555511475, "learning_rate": 8.270268699693488e-07, "loss": 0.019, "step": 349900 }, { "epoch": 3.7385544099577968, "grad_norm": 0.060119666159152985, "learning_rate": 8.270141608069974e-07, "loss": 0.0007, "step": 349910 }, { "epoch": 3.7386612532720767, "grad_norm": 0.06550915539264679, "learning_rate": 8.270014512754225e-07, "loss": 0.0577, "step": 349920 }, { "epoch": 3.738768096586356, "grad_norm": 2.916144371032715, "learning_rate": 8.269887413746383e-07, "loss": 0.0068, "step": 349930 }, { "epoch": 3.7388749399006356, "grad_norm": 0.001949293538928032, "learning_rate": 8.269760311046589e-07, "loss": 0.0089, "step": 349940 }, { "epoch": 3.7389817832149155, "grad_norm": 1.6862972974777222, "learning_rate": 8.269633204654993e-07, "loss": 0.0134, "step": 349950 }, { "epoch": 3.739088626529195, "grad_norm": 0.013884947635233402, "learning_rate": 8.269506094571732e-07, "loss": 0.0196, "step": 349960 }, { "epoch": 3.7391954698434744, "grad_norm": 0.003454272635281086, "learning_rate": 8.26937898079695e-07, "loss": 0.0139, "step": 349970 }, { "epoch": 3.7393023131577543, "grad_norm": 0.09113901853561401, "learning_rate": 8.269251863330795e-07, "loss": 0.0176, "step": 349980 }, { "epoch": 3.7394091564720338, "grad_norm": 9.457530975341797, "learning_rate": 8.269124742173408e-07, "loss": 0.053, "step": 349990 }, { "epoch": 3.739515999786313, "grad_norm": 8.317421913146973, "learning_rate": 8.268997617324931e-07, "loss": 0.0183, "step": 350000 }, { "epoch": 3.739622843100593, "grad_norm": 0.10417404025793076, "learning_rate": 8.268870488785511e-07, "loss": 0.0654, "step": 350010 }, { "epoch": 3.7397296864148726, "grad_norm": 0.4961860179901123, "learning_rate": 8.268743356555288e-07, "loss": 0.0048, "step": 350020 }, { "epoch": 3.739836529729152, "grad_norm": 0.5070923566818237, "learning_rate": 8.268616220634405e-07, "loss": 0.0064, "step": 350030 }, { "epoch": 3.739943373043432, "grad_norm": 1.6282570362091064, "learning_rate": 8.268489081023011e-07, "loss": 0.017, "step": 350040 }, { "epoch": 3.7400502163577114, "grad_norm": 2.9630513191223145, "learning_rate": 8.268361937721245e-07, "loss": 0.0176, "step": 350050 }, { "epoch": 3.740157059671991, "grad_norm": 0.012277764268219471, "learning_rate": 8.268234790729252e-07, "loss": 0.028, "step": 350060 }, { "epoch": 3.7402639029862708, "grad_norm": 0.15085381269454956, "learning_rate": 8.268107640047175e-07, "loss": 0.0042, "step": 350070 }, { "epoch": 3.74037074630055, "grad_norm": 2.7791476249694824, "learning_rate": 8.267980485675158e-07, "loss": 0.0049, "step": 350080 }, { "epoch": 3.74047758961483, "grad_norm": 0.022362900897860527, "learning_rate": 8.267853327613343e-07, "loss": 0.0108, "step": 350090 }, { "epoch": 3.7405844329291096, "grad_norm": 0.03181881830096245, "learning_rate": 8.267726165861877e-07, "loss": 0.0063, "step": 350100 }, { "epoch": 3.740691276243389, "grad_norm": 11.440287590026855, "learning_rate": 8.2675990004209e-07, "loss": 0.0196, "step": 350110 }, { "epoch": 3.7407981195576685, "grad_norm": 7.024194240570068, "learning_rate": 8.267471831290558e-07, "loss": 0.0079, "step": 350120 }, { "epoch": 3.7409049628719484, "grad_norm": 0.2512070834636688, "learning_rate": 8.267344658470994e-07, "loss": 0.0085, "step": 350130 }, { "epoch": 3.741011806186228, "grad_norm": 0.9093431234359741, "learning_rate": 8.26721748196235e-07, "loss": 0.0061, "step": 350140 }, { "epoch": 3.7411186495005078, "grad_norm": 0.22857576608657837, "learning_rate": 8.267090301764771e-07, "loss": 0.0025, "step": 350150 }, { "epoch": 3.7412254928147872, "grad_norm": 0.13237567245960236, "learning_rate": 8.266963117878402e-07, "loss": 0.0002, "step": 350160 }, { "epoch": 3.7413323361290667, "grad_norm": 0.0040160370990633965, "learning_rate": 8.266835930303383e-07, "loss": 0.0701, "step": 350170 }, { "epoch": 3.741439179443346, "grad_norm": 0.7067037224769592, "learning_rate": 8.26670873903986e-07, "loss": 0.0081, "step": 350180 }, { "epoch": 3.741546022757626, "grad_norm": 5.314847946166992, "learning_rate": 8.266581544087978e-07, "loss": 0.0318, "step": 350190 }, { "epoch": 3.7416528660719055, "grad_norm": 0.010003003291785717, "learning_rate": 8.266454345447878e-07, "loss": 0.0059, "step": 350200 }, { "epoch": 3.7417597093861854, "grad_norm": 0.15204983949661255, "learning_rate": 8.266327143119704e-07, "loss": 0.0073, "step": 350210 }, { "epoch": 3.741866552700465, "grad_norm": 3.3398077487945557, "learning_rate": 8.266199937103601e-07, "loss": 0.0146, "step": 350220 }, { "epoch": 3.7419733960147443, "grad_norm": 0.002128861378878355, "learning_rate": 8.266072727399711e-07, "loss": 0.0387, "step": 350230 }, { "epoch": 3.742080239329024, "grad_norm": 1.565849781036377, "learning_rate": 8.265945514008179e-07, "loss": 0.0123, "step": 350240 }, { "epoch": 3.7421870826433037, "grad_norm": 2.427804470062256, "learning_rate": 8.265818296929148e-07, "loss": 0.0019, "step": 350250 }, { "epoch": 3.742293925957583, "grad_norm": 0.0018293345347046852, "learning_rate": 8.265691076162761e-07, "loss": 0.0246, "step": 350260 }, { "epoch": 3.742400769271863, "grad_norm": 0.01983707956969738, "learning_rate": 8.265563851709163e-07, "loss": 0.012, "step": 350270 }, { "epoch": 3.7425076125861425, "grad_norm": 11.005522727966309, "learning_rate": 8.265436623568496e-07, "loss": 0.017, "step": 350280 }, { "epoch": 3.742614455900422, "grad_norm": 3.408543825149536, "learning_rate": 8.265309391740905e-07, "loss": 0.0072, "step": 350290 }, { "epoch": 3.7427212992147014, "grad_norm": 4.112806797027588, "learning_rate": 8.265182156226533e-07, "loss": 0.0318, "step": 350300 }, { "epoch": 3.7428281425289813, "grad_norm": 5.7782063484191895, "learning_rate": 8.265054917025525e-07, "loss": 0.0039, "step": 350310 }, { "epoch": 3.742934985843261, "grad_norm": 8.643370628356934, "learning_rate": 8.264927674138023e-07, "loss": 0.0073, "step": 350320 }, { "epoch": 3.7430418291575407, "grad_norm": 0.022902678698301315, "learning_rate": 8.264800427564171e-07, "loss": 0.0066, "step": 350330 }, { "epoch": 3.74314867247182, "grad_norm": 0.015571096912026405, "learning_rate": 8.264673177304113e-07, "loss": 0.0018, "step": 350340 }, { "epoch": 3.7432555157860996, "grad_norm": 0.001917606103233993, "learning_rate": 8.264545923357993e-07, "loss": 0.0036, "step": 350350 }, { "epoch": 3.743362359100379, "grad_norm": 2.136479139328003, "learning_rate": 8.264418665725954e-07, "loss": 0.0232, "step": 350360 }, { "epoch": 3.743469202414659, "grad_norm": 0.7463620901107788, "learning_rate": 8.26429140440814e-07, "loss": 0.0144, "step": 350370 }, { "epoch": 3.7435760457289384, "grad_norm": 0.014911781996488571, "learning_rate": 8.264164139404694e-07, "loss": 0.0094, "step": 350380 }, { "epoch": 3.7436828890432183, "grad_norm": 0.005495735909789801, "learning_rate": 8.264036870715763e-07, "loss": 0.0175, "step": 350390 }, { "epoch": 3.743789732357498, "grad_norm": 0.03661004453897476, "learning_rate": 8.263909598341484e-07, "loss": 0.0181, "step": 350400 }, { "epoch": 3.7438965756717772, "grad_norm": 0.00269675487652421, "learning_rate": 8.263782322282007e-07, "loss": 0.0069, "step": 350410 }, { "epoch": 3.7440034189860567, "grad_norm": 0.08207674324512482, "learning_rate": 8.263655042537474e-07, "loss": 0.0066, "step": 350420 }, { "epoch": 3.7441102623003366, "grad_norm": 0.012430265545845032, "learning_rate": 8.263527759108026e-07, "loss": 0.0022, "step": 350430 }, { "epoch": 3.744217105614616, "grad_norm": 0.37836727499961853, "learning_rate": 8.263400471993811e-07, "loss": 0.0018, "step": 350440 }, { "epoch": 3.744323948928896, "grad_norm": 0.03158506378531456, "learning_rate": 8.26327318119497e-07, "loss": 0.0031, "step": 350450 }, { "epoch": 3.7444307922431754, "grad_norm": 0.0021654744632542133, "learning_rate": 8.263145886711647e-07, "loss": 0.0058, "step": 350460 }, { "epoch": 3.744537635557455, "grad_norm": 2.512688398361206, "learning_rate": 8.263018588543985e-07, "loss": 0.0172, "step": 350470 }, { "epoch": 3.7446444788717343, "grad_norm": 0.009530995972454548, "learning_rate": 8.262891286692131e-07, "loss": 0.0061, "step": 350480 }, { "epoch": 3.7447513221860143, "grad_norm": 0.5274980068206787, "learning_rate": 8.262763981156224e-07, "loss": 0.0081, "step": 350490 }, { "epoch": 3.7448581655002937, "grad_norm": 0.7140235304832458, "learning_rate": 8.262636671936411e-07, "loss": 0.0004, "step": 350500 }, { "epoch": 3.7449650088145736, "grad_norm": 2.7007761001586914, "learning_rate": 8.262509359032836e-07, "loss": 0.0309, "step": 350510 }, { "epoch": 3.745071852128853, "grad_norm": 0.01001090370118618, "learning_rate": 8.262382042445642e-07, "loss": 0.0043, "step": 350520 }, { "epoch": 3.7451786954431325, "grad_norm": 0.4284071922302246, "learning_rate": 8.262254722174971e-07, "loss": 0.009, "step": 350530 }, { "epoch": 3.745285538757412, "grad_norm": 0.06648530811071396, "learning_rate": 8.262127398220968e-07, "loss": 0.0111, "step": 350540 }, { "epoch": 3.745392382071692, "grad_norm": 14.112031936645508, "learning_rate": 8.262000070583777e-07, "loss": 0.0368, "step": 350550 }, { "epoch": 3.7454992253859714, "grad_norm": 0.0793452039361, "learning_rate": 8.261872739263542e-07, "loss": 0.0125, "step": 350560 }, { "epoch": 3.7456060687002513, "grad_norm": 0.024852046743035316, "learning_rate": 8.261745404260407e-07, "loss": 0.0027, "step": 350570 }, { "epoch": 3.7457129120145307, "grad_norm": 0.0007228268077597022, "learning_rate": 8.261618065574516e-07, "loss": 0.0249, "step": 350580 }, { "epoch": 3.74581975532881, "grad_norm": 0.05513739958405495, "learning_rate": 8.261490723206011e-07, "loss": 0.0315, "step": 350590 }, { "epoch": 3.7459265986430896, "grad_norm": 0.06859420239925385, "learning_rate": 8.261363377155038e-07, "loss": 0.0012, "step": 350600 }, { "epoch": 3.7460334419573695, "grad_norm": 0.148202583193779, "learning_rate": 8.261236027421737e-07, "loss": 0.0177, "step": 350610 }, { "epoch": 3.746140285271649, "grad_norm": 1.9086613655090332, "learning_rate": 8.261108674006257e-07, "loss": 0.0532, "step": 350620 }, { "epoch": 3.746247128585929, "grad_norm": 7.614893913269043, "learning_rate": 8.260981316908738e-07, "loss": 0.0117, "step": 350630 }, { "epoch": 3.7463539719002084, "grad_norm": 0.3223746120929718, "learning_rate": 8.260853956129326e-07, "loss": 0.0068, "step": 350640 }, { "epoch": 3.746460815214488, "grad_norm": 0.005204360466450453, "learning_rate": 8.260726591668162e-07, "loss": 0.0144, "step": 350650 }, { "epoch": 3.7465676585287677, "grad_norm": 1.6607093811035156, "learning_rate": 8.260599223525393e-07, "loss": 0.0098, "step": 350660 }, { "epoch": 3.746674501843047, "grad_norm": 0.027541549876332283, "learning_rate": 8.260471851701161e-07, "loss": 0.0142, "step": 350670 }, { "epoch": 3.7467813451573266, "grad_norm": 0.016270380467176437, "learning_rate": 8.260344476195609e-07, "loss": 0.0109, "step": 350680 }, { "epoch": 3.7468881884716065, "grad_norm": 0.0757572278380394, "learning_rate": 8.260217097008884e-07, "loss": 0.0038, "step": 350690 }, { "epoch": 3.746995031785886, "grad_norm": 0.0025647252332419157, "learning_rate": 8.260089714141127e-07, "loss": 0.0125, "step": 350700 }, { "epoch": 3.7471018751001655, "grad_norm": 0.4776207208633423, "learning_rate": 8.259962327592483e-07, "loss": 0.0008, "step": 350710 }, { "epoch": 3.7472087184144454, "grad_norm": 0.020512277260422707, "learning_rate": 8.259834937363094e-07, "loss": 0.0045, "step": 350720 }, { "epoch": 3.747315561728725, "grad_norm": 0.0848805233836174, "learning_rate": 8.259707543453107e-07, "loss": 0.0129, "step": 350730 }, { "epoch": 3.7474224050430043, "grad_norm": 0.00265683326870203, "learning_rate": 8.259580145862662e-07, "loss": 0.0177, "step": 350740 }, { "epoch": 3.747529248357284, "grad_norm": 0.03278700262308121, "learning_rate": 8.259452744591907e-07, "loss": 0.0548, "step": 350750 }, { "epoch": 3.7476360916715636, "grad_norm": 0.0035744598135352135, "learning_rate": 8.259325339640984e-07, "loss": 0.006, "step": 350760 }, { "epoch": 3.747742934985843, "grad_norm": 1.45906400680542, "learning_rate": 8.259197931010036e-07, "loss": 0.0028, "step": 350770 }, { "epoch": 3.747849778300123, "grad_norm": 0.1731289178133011, "learning_rate": 8.259070518699207e-07, "loss": 0.0219, "step": 350780 }, { "epoch": 3.7479566216144025, "grad_norm": 0.45179539918899536, "learning_rate": 8.258943102708643e-07, "loss": 0.0022, "step": 350790 }, { "epoch": 3.7480634649286824, "grad_norm": 22.507610321044922, "learning_rate": 8.258815683038485e-07, "loss": 0.0332, "step": 350800 }, { "epoch": 3.748170308242962, "grad_norm": 0.4513910710811615, "learning_rate": 8.258688259688878e-07, "loss": 0.0117, "step": 350810 }, { "epoch": 3.7482771515572413, "grad_norm": 0.6930527091026306, "learning_rate": 8.258560832659967e-07, "loss": 0.0067, "step": 350820 }, { "epoch": 3.7483839948715207, "grad_norm": 2.4064619541168213, "learning_rate": 8.258433401951894e-07, "loss": 0.0138, "step": 350830 }, { "epoch": 3.7484908381858006, "grad_norm": 0.20538051426410675, "learning_rate": 8.258305967564805e-07, "loss": 0.0099, "step": 350840 }, { "epoch": 3.74859768150008, "grad_norm": 16.94864273071289, "learning_rate": 8.258178529498842e-07, "loss": 0.0515, "step": 350850 }, { "epoch": 3.74870452481436, "grad_norm": 7.256179332733154, "learning_rate": 8.258051087754147e-07, "loss": 0.0067, "step": 350860 }, { "epoch": 3.7488113681286395, "grad_norm": 0.9464201927185059, "learning_rate": 8.257923642330871e-07, "loss": 0.0209, "step": 350870 }, { "epoch": 3.748918211442919, "grad_norm": 17.391563415527344, "learning_rate": 8.257796193229151e-07, "loss": 0.0268, "step": 350880 }, { "epoch": 3.7490250547571984, "grad_norm": 0.0024170144461095333, "learning_rate": 8.257668740449133e-07, "loss": 0.0345, "step": 350890 }, { "epoch": 3.7491318980714783, "grad_norm": 0.07770626246929169, "learning_rate": 8.257541283990961e-07, "loss": 0.0074, "step": 350900 }, { "epoch": 3.7492387413857577, "grad_norm": 0.20112064480781555, "learning_rate": 8.25741382385478e-07, "loss": 0.0335, "step": 350910 }, { "epoch": 3.7493455847000376, "grad_norm": 0.02469014748930931, "learning_rate": 8.257286360040732e-07, "loss": 0.0366, "step": 350920 }, { "epoch": 3.749452428014317, "grad_norm": 0.004468660335987806, "learning_rate": 8.257158892548963e-07, "loss": 0.0251, "step": 350930 }, { "epoch": 3.7495592713285966, "grad_norm": 0.0010291066719219089, "learning_rate": 8.257031421379614e-07, "loss": 0.0188, "step": 350940 }, { "epoch": 3.749666114642876, "grad_norm": 1.2415543794631958, "learning_rate": 8.256903946532831e-07, "loss": 0.0181, "step": 350950 }, { "epoch": 3.749772957957156, "grad_norm": 10.220030784606934, "learning_rate": 8.256776468008759e-07, "loss": 0.0311, "step": 350960 }, { "epoch": 3.7498798012714354, "grad_norm": 2.7720842361450195, "learning_rate": 8.256648985807541e-07, "loss": 0.0103, "step": 350970 }, { "epoch": 3.7499866445857153, "grad_norm": 0.031027885153889656, "learning_rate": 8.256521499929319e-07, "loss": 0.0038, "step": 350980 }, { "epoch": 3.7500934878999947, "grad_norm": 0.277458131313324, "learning_rate": 8.256394010374239e-07, "loss": 0.0144, "step": 350990 }, { "epoch": 3.750200331214274, "grad_norm": 0.035893358290195465, "learning_rate": 8.256266517142444e-07, "loss": 0.002, "step": 351000 }, { "epoch": 3.7503071745285537, "grad_norm": 8.711755752563477, "learning_rate": 8.256139020234079e-07, "loss": 0.0298, "step": 351010 }, { "epoch": 3.7504140178428336, "grad_norm": 2.3191757202148438, "learning_rate": 8.256011519649286e-07, "loss": 0.021, "step": 351020 }, { "epoch": 3.750520861157113, "grad_norm": 1.985477328300476, "learning_rate": 8.255884015388211e-07, "loss": 0.0042, "step": 351030 }, { "epoch": 3.750627704471393, "grad_norm": 2.191725015640259, "learning_rate": 8.255756507450998e-07, "loss": 0.0045, "step": 351040 }, { "epoch": 3.7507345477856724, "grad_norm": 0.0024836212396621704, "learning_rate": 8.25562899583779e-07, "loss": 0.0101, "step": 351050 }, { "epoch": 3.750841391099952, "grad_norm": 1.5737676620483398, "learning_rate": 8.25550148054873e-07, "loss": 0.0064, "step": 351060 }, { "epoch": 3.7509482344142313, "grad_norm": 0.3032407760620117, "learning_rate": 8.255373961583964e-07, "loss": 0.0037, "step": 351070 }, { "epoch": 3.751055077728511, "grad_norm": 0.6592122316360474, "learning_rate": 8.255246438943635e-07, "loss": 0.0059, "step": 351080 }, { "epoch": 3.7511619210427907, "grad_norm": 0.6277505159378052, "learning_rate": 8.255118912627887e-07, "loss": 0.0278, "step": 351090 }, { "epoch": 3.7512687643570706, "grad_norm": 15.191483497619629, "learning_rate": 8.254991382636865e-07, "loss": 0.0092, "step": 351100 }, { "epoch": 3.75137560767135, "grad_norm": 1.5084798336029053, "learning_rate": 8.25486384897071e-07, "loss": 0.034, "step": 351110 }, { "epoch": 3.7514824509856295, "grad_norm": 0.3807224929332733, "learning_rate": 8.25473631162957e-07, "loss": 0.0115, "step": 351120 }, { "epoch": 3.751589294299909, "grad_norm": 0.9630146622657776, "learning_rate": 8.254608770613587e-07, "loss": 0.0146, "step": 351130 }, { "epoch": 3.751696137614189, "grad_norm": 0.7211759686470032, "learning_rate": 8.254481225922904e-07, "loss": 0.0036, "step": 351140 }, { "epoch": 3.7518029809284683, "grad_norm": 0.18361157178878784, "learning_rate": 8.254353677557666e-07, "loss": 0.0139, "step": 351150 }, { "epoch": 3.751909824242748, "grad_norm": 0.021545162424445152, "learning_rate": 8.254226125518018e-07, "loss": 0.023, "step": 351160 }, { "epoch": 3.7520166675570277, "grad_norm": 0.15264686942100525, "learning_rate": 8.254098569804102e-07, "loss": 0.0005, "step": 351170 }, { "epoch": 3.752123510871307, "grad_norm": 0.19014407694339752, "learning_rate": 8.253971010416063e-07, "loss": 0.0224, "step": 351180 }, { "epoch": 3.7522303541855866, "grad_norm": 0.004012912977486849, "learning_rate": 8.253843447354047e-07, "loss": 0.0037, "step": 351190 }, { "epoch": 3.7523371974998665, "grad_norm": 0.0035576322115957737, "learning_rate": 8.253715880618195e-07, "loss": 0.0117, "step": 351200 }, { "epoch": 3.752444040814146, "grad_norm": 0.003330077277496457, "learning_rate": 8.253588310208652e-07, "loss": 0.0068, "step": 351210 }, { "epoch": 3.752550884128426, "grad_norm": 0.003437938168644905, "learning_rate": 8.253460736125562e-07, "loss": 0.0159, "step": 351220 }, { "epoch": 3.7526577274427053, "grad_norm": 0.3398053050041199, "learning_rate": 8.25333315836907e-07, "loss": 0.0123, "step": 351230 }, { "epoch": 3.7527645707569848, "grad_norm": 0.02312190644443035, "learning_rate": 8.253205576939319e-07, "loss": 0.0076, "step": 351240 }, { "epoch": 3.7528714140712642, "grad_norm": 0.011352668516337872, "learning_rate": 8.253077991836455e-07, "loss": 0.0145, "step": 351250 }, { "epoch": 3.752978257385544, "grad_norm": 7.452498435974121, "learning_rate": 8.252950403060617e-07, "loss": 0.0093, "step": 351260 }, { "epoch": 3.7530851006998236, "grad_norm": 0.010784071870148182, "learning_rate": 8.252822810611954e-07, "loss": 0.001, "step": 351270 }, { "epoch": 3.7531919440141035, "grad_norm": 10.927971839904785, "learning_rate": 8.25269521449061e-07, "loss": 0.0377, "step": 351280 }, { "epoch": 3.753298787328383, "grad_norm": 0.05305563285946846, "learning_rate": 8.252567614696726e-07, "loss": 0.0267, "step": 351290 }, { "epoch": 3.7534056306426624, "grad_norm": 0.6610161662101746, "learning_rate": 8.252440011230448e-07, "loss": 0.0382, "step": 351300 }, { "epoch": 3.753512473956942, "grad_norm": 10.999839782714844, "learning_rate": 8.25231240409192e-07, "loss": 0.0557, "step": 351310 }, { "epoch": 3.753619317271222, "grad_norm": 3.2556416988372803, "learning_rate": 8.252184793281287e-07, "loss": 0.0035, "step": 351320 }, { "epoch": 3.7537261605855012, "grad_norm": 3.997044801712036, "learning_rate": 8.252057178798691e-07, "loss": 0.0205, "step": 351330 }, { "epoch": 3.753833003899781, "grad_norm": 1.1795952320098877, "learning_rate": 8.251929560644275e-07, "loss": 0.0048, "step": 351340 }, { "epoch": 3.7539398472140606, "grad_norm": 0.03519109636545181, "learning_rate": 8.251801938818188e-07, "loss": 0.0058, "step": 351350 }, { "epoch": 3.75404669052834, "grad_norm": 0.0028382642194628716, "learning_rate": 8.251674313320569e-07, "loss": 0.0273, "step": 351360 }, { "epoch": 3.75415353384262, "grad_norm": 0.012749968096613884, "learning_rate": 8.251546684151564e-07, "loss": 0.0145, "step": 351370 }, { "epoch": 3.7542603771568994, "grad_norm": 0.020856261253356934, "learning_rate": 8.25141905131132e-07, "loss": 0.0007, "step": 351380 }, { "epoch": 3.754367220471179, "grad_norm": 3.909525156021118, "learning_rate": 8.251291414799978e-07, "loss": 0.0062, "step": 351390 }, { "epoch": 3.754474063785459, "grad_norm": 0.008993285708129406, "learning_rate": 8.251163774617681e-07, "loss": 0.008, "step": 351400 }, { "epoch": 3.7545809070997382, "grad_norm": 2.5943403244018555, "learning_rate": 8.251036130764576e-07, "loss": 0.0264, "step": 351410 }, { "epoch": 3.7546877504140177, "grad_norm": 0.4398215115070343, "learning_rate": 8.250908483240806e-07, "loss": 0.0012, "step": 351420 }, { "epoch": 3.7547945937282976, "grad_norm": 1.358620285987854, "learning_rate": 8.250780832046514e-07, "loss": 0.0235, "step": 351430 }, { "epoch": 3.754901437042577, "grad_norm": 0.006481790915131569, "learning_rate": 8.250653177181845e-07, "loss": 0.0043, "step": 351440 }, { "epoch": 3.7550082803568565, "grad_norm": 0.09904752671718597, "learning_rate": 8.250525518646943e-07, "loss": 0.0043, "step": 351450 }, { "epoch": 3.7551151236711364, "grad_norm": 6.298814296722412, "learning_rate": 8.250397856441953e-07, "loss": 0.0157, "step": 351460 }, { "epoch": 3.755221966985416, "grad_norm": 0.00262182648293674, "learning_rate": 8.250270190567019e-07, "loss": 0.0256, "step": 351470 }, { "epoch": 3.7553288102996953, "grad_norm": 2.5453643798828125, "learning_rate": 8.250142521022283e-07, "loss": 0.005, "step": 351480 }, { "epoch": 3.7554356536139752, "grad_norm": 1.734757661819458, "learning_rate": 8.250014847807893e-07, "loss": 0.0067, "step": 351490 }, { "epoch": 3.7555424969282547, "grad_norm": 0.0063489219173789024, "learning_rate": 8.24988717092399e-07, "loss": 0.0016, "step": 351500 }, { "epoch": 3.755649340242534, "grad_norm": 0.6755011081695557, "learning_rate": 8.249759490370718e-07, "loss": 0.0124, "step": 351510 }, { "epoch": 3.755756183556814, "grad_norm": 0.7871994972229004, "learning_rate": 8.249631806148222e-07, "loss": 0.0085, "step": 351520 }, { "epoch": 3.7558630268710935, "grad_norm": 1.5049145221710205, "learning_rate": 8.249504118256649e-07, "loss": 0.0173, "step": 351530 }, { "epoch": 3.755969870185373, "grad_norm": 0.009326412342488766, "learning_rate": 8.24937642669614e-07, "loss": 0.0071, "step": 351540 }, { "epoch": 3.756076713499653, "grad_norm": 0.6577569842338562, "learning_rate": 8.249248731466838e-07, "loss": 0.0006, "step": 351550 }, { "epoch": 3.7561835568139323, "grad_norm": 6.435199737548828, "learning_rate": 8.249121032568891e-07, "loss": 0.0531, "step": 351560 }, { "epoch": 3.7562904001282122, "grad_norm": 0.15819035470485687, "learning_rate": 8.24899333000244e-07, "loss": 0.0219, "step": 351570 }, { "epoch": 3.7563972434424917, "grad_norm": 0.00740845175459981, "learning_rate": 8.24886562376763e-07, "loss": 0.0108, "step": 351580 }, { "epoch": 3.756504086756771, "grad_norm": 0.0010552232852205634, "learning_rate": 8.248737913864606e-07, "loss": 0.0068, "step": 351590 }, { "epoch": 3.7566109300710506, "grad_norm": 2.084843158721924, "learning_rate": 8.24861020029351e-07, "loss": 0.0113, "step": 351600 }, { "epoch": 3.7567177733853305, "grad_norm": 1.063760757446289, "learning_rate": 8.248482483054491e-07, "loss": 0.0072, "step": 351610 }, { "epoch": 3.75682461669961, "grad_norm": 5.071766376495361, "learning_rate": 8.248354762147689e-07, "loss": 0.0101, "step": 351620 }, { "epoch": 3.75693146001389, "grad_norm": 0.0037085465155541897, "learning_rate": 8.248227037573249e-07, "loss": 0.0102, "step": 351630 }, { "epoch": 3.7570383033281693, "grad_norm": 9.234211921691895, "learning_rate": 8.248099309331316e-07, "loss": 0.0204, "step": 351640 }, { "epoch": 3.757145146642449, "grad_norm": 0.7472911477088928, "learning_rate": 8.247971577422033e-07, "loss": 0.0091, "step": 351650 }, { "epoch": 3.7572519899567283, "grad_norm": 2.4259533882141113, "learning_rate": 8.247843841845544e-07, "loss": 0.002, "step": 351660 }, { "epoch": 3.757358833271008, "grad_norm": 0.24080459773540497, "learning_rate": 8.247716102601996e-07, "loss": 0.0062, "step": 351670 }, { "epoch": 3.7574656765852876, "grad_norm": 1.3260517120361328, "learning_rate": 8.247588359691531e-07, "loss": 0.0179, "step": 351680 }, { "epoch": 3.7575725198995675, "grad_norm": 0.4688178598880768, "learning_rate": 8.247460613114292e-07, "loss": 0.0124, "step": 351690 }, { "epoch": 3.757679363213847, "grad_norm": 2.287529706954956, "learning_rate": 8.247332862870428e-07, "loss": 0.0084, "step": 351700 }, { "epoch": 3.7577862065281264, "grad_norm": 0.008484311401844025, "learning_rate": 8.247205108960077e-07, "loss": 0.0038, "step": 351710 }, { "epoch": 3.757893049842406, "grad_norm": 0.0040349205955863, "learning_rate": 8.247077351383388e-07, "loss": 0.0041, "step": 351720 }, { "epoch": 3.757999893156686, "grad_norm": 0.002650843933224678, "learning_rate": 8.246949590140504e-07, "loss": 0.0571, "step": 351730 }, { "epoch": 3.7581067364709653, "grad_norm": 0.05636115372180939, "learning_rate": 8.246821825231566e-07, "loss": 0.002, "step": 351740 }, { "epoch": 3.758213579785245, "grad_norm": 2.029632806777954, "learning_rate": 8.246694056656725e-07, "loss": 0.0107, "step": 351750 }, { "epoch": 3.7583204230995246, "grad_norm": 0.27452588081359863, "learning_rate": 8.24656628441612e-07, "loss": 0.0124, "step": 351760 }, { "epoch": 3.758427266413804, "grad_norm": 5.2468719482421875, "learning_rate": 8.246438508509896e-07, "loss": 0.0303, "step": 351770 }, { "epoch": 3.7585341097280835, "grad_norm": 0.41917043924331665, "learning_rate": 8.246310728938198e-07, "loss": 0.0071, "step": 351780 }, { "epoch": 3.7586409530423635, "grad_norm": 1.4438509941101074, "learning_rate": 8.24618294570117e-07, "loss": 0.0104, "step": 351790 }, { "epoch": 3.758747796356643, "grad_norm": 0.17778871953487396, "learning_rate": 8.246055158798957e-07, "loss": 0.0103, "step": 351800 }, { "epoch": 3.758854639670923, "grad_norm": 0.1574835330247879, "learning_rate": 8.245927368231703e-07, "loss": 0.0011, "step": 351810 }, { "epoch": 3.7589614829852023, "grad_norm": 0.0028170994482934475, "learning_rate": 8.245799573999552e-07, "loss": 0.0052, "step": 351820 }, { "epoch": 3.7590683262994817, "grad_norm": 7.291999340057373, "learning_rate": 8.245671776102647e-07, "loss": 0.0257, "step": 351830 }, { "epoch": 3.759175169613761, "grad_norm": 3.8884527683258057, "learning_rate": 8.245543974541135e-07, "loss": 0.0028, "step": 351840 }, { "epoch": 3.759282012928041, "grad_norm": 0.15895430743694305, "learning_rate": 8.245416169315158e-07, "loss": 0.0106, "step": 351850 }, { "epoch": 3.7593888562423206, "grad_norm": 0.008851089514791965, "learning_rate": 8.245288360424861e-07, "loss": 0.0028, "step": 351860 }, { "epoch": 3.7594956995566005, "grad_norm": 0.07360263168811798, "learning_rate": 8.24516054787039e-07, "loss": 0.068, "step": 351870 }, { "epoch": 3.75960254287088, "grad_norm": 1.2331833839416504, "learning_rate": 8.245032731651886e-07, "loss": 0.0091, "step": 351880 }, { "epoch": 3.7597093861851594, "grad_norm": 0.950950026512146, "learning_rate": 8.244904911769497e-07, "loss": 0.003, "step": 351890 }, { "epoch": 3.759816229499439, "grad_norm": 0.055577196180820465, "learning_rate": 8.244777088223365e-07, "loss": 0.0001, "step": 351900 }, { "epoch": 3.7599230728137187, "grad_norm": 2.178805112838745, "learning_rate": 8.244649261013633e-07, "loss": 0.0074, "step": 351910 }, { "epoch": 3.760029916127998, "grad_norm": 1.955378532409668, "learning_rate": 8.244521430140449e-07, "loss": 0.0062, "step": 351920 }, { "epoch": 3.760136759442278, "grad_norm": 1.634083867073059, "learning_rate": 8.244393595603956e-07, "loss": 0.009, "step": 351930 }, { "epoch": 3.7602436027565576, "grad_norm": 8.550381660461426, "learning_rate": 8.244265757404296e-07, "loss": 0.0471, "step": 351940 }, { "epoch": 3.760350446070837, "grad_norm": 0.060022782534360886, "learning_rate": 8.244137915541615e-07, "loss": 0.0292, "step": 351950 }, { "epoch": 3.7604572893851165, "grad_norm": 0.22197218239307404, "learning_rate": 8.244010070016058e-07, "loss": 0.0095, "step": 351960 }, { "epoch": 3.7605641326993964, "grad_norm": 3.677860736846924, "learning_rate": 8.243882220827769e-07, "loss": 0.015, "step": 351970 }, { "epoch": 3.760670976013676, "grad_norm": 2.64278244972229, "learning_rate": 8.243754367976892e-07, "loss": 0.0045, "step": 351980 }, { "epoch": 3.7607778193279557, "grad_norm": 0.057706318795681, "learning_rate": 8.243626511463572e-07, "loss": 0.0043, "step": 351990 }, { "epoch": 3.760884662642235, "grad_norm": 4.09398078918457, "learning_rate": 8.243498651287951e-07, "loss": 0.0114, "step": 352000 }, { "epoch": 3.7609915059565147, "grad_norm": 5.81991720199585, "learning_rate": 8.243370787450177e-07, "loss": 0.0147, "step": 352010 }, { "epoch": 3.761098349270794, "grad_norm": 0.04533475264906883, "learning_rate": 8.243242919950393e-07, "loss": 0.0096, "step": 352020 }, { "epoch": 3.761205192585074, "grad_norm": 0.013219049200415611, "learning_rate": 8.243115048788741e-07, "loss": 0.013, "step": 352030 }, { "epoch": 3.7613120358993535, "grad_norm": 0.004152284469455481, "learning_rate": 8.24298717396537e-07, "loss": 0.0063, "step": 352040 }, { "epoch": 3.7614188792136334, "grad_norm": 0.3448496162891388, "learning_rate": 8.242859295480419e-07, "loss": 0.0172, "step": 352050 }, { "epoch": 3.761525722527913, "grad_norm": 7.173663139343262, "learning_rate": 8.242731413334037e-07, "loss": 0.0228, "step": 352060 }, { "epoch": 3.7616325658421923, "grad_norm": 0.0011086505837738514, "learning_rate": 8.242603527526366e-07, "loss": 0.0093, "step": 352070 }, { "epoch": 3.7617394091564718, "grad_norm": 0.10966561734676361, "learning_rate": 8.24247563805755e-07, "loss": 0.0086, "step": 352080 }, { "epoch": 3.7618462524707517, "grad_norm": 0.0017458959482610226, "learning_rate": 8.242347744927736e-07, "loss": 0.0155, "step": 352090 }, { "epoch": 3.761953095785031, "grad_norm": 0.09707751125097275, "learning_rate": 8.242219848137065e-07, "loss": 0.0083, "step": 352100 }, { "epoch": 3.762059939099311, "grad_norm": 0.041476186364889145, "learning_rate": 8.242091947685683e-07, "loss": 0.0047, "step": 352110 }, { "epoch": 3.7621667824135905, "grad_norm": 1.4414347410202026, "learning_rate": 8.241964043573735e-07, "loss": 0.036, "step": 352120 }, { "epoch": 3.76227362572787, "grad_norm": 1.3958221673965454, "learning_rate": 8.241836135801366e-07, "loss": 0.027, "step": 352130 }, { "epoch": 3.76238046904215, "grad_norm": 2.772298812866211, "learning_rate": 8.241708224368719e-07, "loss": 0.0059, "step": 352140 }, { "epoch": 3.7624873123564293, "grad_norm": 0.021442364901304245, "learning_rate": 8.241580309275938e-07, "loss": 0.0093, "step": 352150 }, { "epoch": 3.7625941556707088, "grad_norm": 0.9290063381195068, "learning_rate": 8.24145239052317e-07, "loss": 0.0238, "step": 352160 }, { "epoch": 3.7627009989849887, "grad_norm": 0.03137001022696495, "learning_rate": 8.241324468110556e-07, "loss": 0.0079, "step": 352170 }, { "epoch": 3.762807842299268, "grad_norm": 0.4120149314403534, "learning_rate": 8.241196542038241e-07, "loss": 0.0017, "step": 352180 }, { "epoch": 3.7629146856135476, "grad_norm": 0.40129345655441284, "learning_rate": 8.241068612306372e-07, "loss": 0.0003, "step": 352190 }, { "epoch": 3.7630215289278275, "grad_norm": 0.6370961666107178, "learning_rate": 8.240940678915091e-07, "loss": 0.0034, "step": 352200 }, { "epoch": 3.763128372242107, "grad_norm": 0.02738712728023529, "learning_rate": 8.240812741864544e-07, "loss": 0.0308, "step": 352210 }, { "epoch": 3.7632352155563864, "grad_norm": 0.008540293201804161, "learning_rate": 8.240684801154875e-07, "loss": 0.0145, "step": 352220 }, { "epoch": 3.7633420588706663, "grad_norm": 0.9675621390342712, "learning_rate": 8.240556856786227e-07, "loss": 0.0092, "step": 352230 }, { "epoch": 3.7634489021849458, "grad_norm": 2.7614247798919678, "learning_rate": 8.240428908758748e-07, "loss": 0.0156, "step": 352240 }, { "epoch": 3.7635557454992252, "grad_norm": 0.6857601404190063, "learning_rate": 8.240300957072578e-07, "loss": 0.0061, "step": 352250 }, { "epoch": 3.763662588813505, "grad_norm": 0.6587121486663818, "learning_rate": 8.240173001727865e-07, "loss": 0.0252, "step": 352260 }, { "epoch": 3.7637694321277846, "grad_norm": 3.2090461254119873, "learning_rate": 8.240045042724751e-07, "loss": 0.0189, "step": 352270 }, { "epoch": 3.7638762754420645, "grad_norm": 0.379372239112854, "learning_rate": 8.239917080063383e-07, "loss": 0.0118, "step": 352280 }, { "epoch": 3.763983118756344, "grad_norm": 0.0010458541801199317, "learning_rate": 8.239789113743903e-07, "loss": 0.0217, "step": 352290 }, { "epoch": 3.7640899620706234, "grad_norm": 0.7938485145568848, "learning_rate": 8.239661143766458e-07, "loss": 0.0123, "step": 352300 }, { "epoch": 3.764196805384903, "grad_norm": 0.01232345961034298, "learning_rate": 8.239533170131189e-07, "loss": 0.0155, "step": 352310 }, { "epoch": 3.7643036486991828, "grad_norm": 0.03746985271573067, "learning_rate": 8.239405192838244e-07, "loss": 0.0017, "step": 352320 }, { "epoch": 3.7644104920134622, "grad_norm": 0.01307503692805767, "learning_rate": 8.239277211887765e-07, "loss": 0.0005, "step": 352330 }, { "epoch": 3.764517335327742, "grad_norm": 0.6407424807548523, "learning_rate": 8.239149227279898e-07, "loss": 0.0022, "step": 352340 }, { "epoch": 3.7646241786420216, "grad_norm": 0.2463841736316681, "learning_rate": 8.239021239014788e-07, "loss": 0.0123, "step": 352350 }, { "epoch": 3.764731021956301, "grad_norm": 0.38830265402793884, "learning_rate": 8.238893247092577e-07, "loss": 0.0173, "step": 352360 }, { "epoch": 3.7648378652705805, "grad_norm": 7.171002388000488, "learning_rate": 8.238765251513413e-07, "loss": 0.0072, "step": 352370 }, { "epoch": 3.7649447085848604, "grad_norm": 0.0017535232473164797, "learning_rate": 8.238637252277437e-07, "loss": 0.0099, "step": 352380 }, { "epoch": 3.76505155189914, "grad_norm": 0.0051000663079321384, "learning_rate": 8.238509249384796e-07, "loss": 0.0549, "step": 352390 }, { "epoch": 3.7651583952134198, "grad_norm": 0.1735508292913437, "learning_rate": 8.238381242835632e-07, "loss": 0.0011, "step": 352400 }, { "epoch": 3.7652652385276992, "grad_norm": 1.5602275133132935, "learning_rate": 8.238253232630094e-07, "loss": 0.0055, "step": 352410 }, { "epoch": 3.7653720818419787, "grad_norm": 2.14663028717041, "learning_rate": 8.238125218768322e-07, "loss": 0.0058, "step": 352420 }, { "epoch": 3.765478925156258, "grad_norm": 0.009975738823413849, "learning_rate": 8.237997201250462e-07, "loss": 0.0068, "step": 352430 }, { "epoch": 3.765585768470538, "grad_norm": 0.14796361327171326, "learning_rate": 8.237869180076659e-07, "loss": 0.013, "step": 352440 }, { "epoch": 3.7656926117848175, "grad_norm": 0.01112908124923706, "learning_rate": 8.237741155247058e-07, "loss": 0.0034, "step": 352450 }, { "epoch": 3.7657994550990974, "grad_norm": 0.1336532086133957, "learning_rate": 8.237613126761801e-07, "loss": 0.0011, "step": 352460 }, { "epoch": 3.765906298413377, "grad_norm": 0.1927860528230667, "learning_rate": 8.237485094621036e-07, "loss": 0.0032, "step": 352470 }, { "epoch": 3.7660131417276563, "grad_norm": 0.8957436084747314, "learning_rate": 8.237357058824906e-07, "loss": 0.0068, "step": 352480 }, { "epoch": 3.766119985041936, "grad_norm": 0.006853132974356413, "learning_rate": 8.237229019373555e-07, "loss": 0.006, "step": 352490 }, { "epoch": 3.7662268283562157, "grad_norm": 5.934228420257568, "learning_rate": 8.237100976267128e-07, "loss": 0.0232, "step": 352500 }, { "epoch": 3.766333671670495, "grad_norm": 0.30089810490608215, "learning_rate": 8.23697292950577e-07, "loss": 0.0084, "step": 352510 }, { "epoch": 3.766440514984775, "grad_norm": 0.007722134701907635, "learning_rate": 8.236844879089626e-07, "loss": 0.0276, "step": 352520 }, { "epoch": 3.7665473582990545, "grad_norm": 0.1252378672361374, "learning_rate": 8.236716825018839e-07, "loss": 0.0031, "step": 352530 }, { "epoch": 3.766654201613334, "grad_norm": 1.1801066398620605, "learning_rate": 8.236588767293555e-07, "loss": 0.0075, "step": 352540 }, { "epoch": 3.7667610449276134, "grad_norm": 9.731196403503418, "learning_rate": 8.236460705913917e-07, "loss": 0.0284, "step": 352550 }, { "epoch": 3.7668678882418933, "grad_norm": 0.8423541188240051, "learning_rate": 8.23633264088007e-07, "loss": 0.0113, "step": 352560 }, { "epoch": 3.766974731556173, "grad_norm": 0.19971537590026855, "learning_rate": 8.236204572192162e-07, "loss": 0.0359, "step": 352570 }, { "epoch": 3.7670815748704527, "grad_norm": 0.4743596911430359, "learning_rate": 8.236076499850333e-07, "loss": 0.0447, "step": 352580 }, { "epoch": 3.767188418184732, "grad_norm": 0.024619974195957184, "learning_rate": 8.235948423854729e-07, "loss": 0.0211, "step": 352590 }, { "epoch": 3.7672952614990116, "grad_norm": 0.0026651679072529078, "learning_rate": 8.235820344205495e-07, "loss": 0.0077, "step": 352600 }, { "epoch": 3.767402104813291, "grad_norm": 0.14722906053066254, "learning_rate": 8.235692260902777e-07, "loss": 0.0039, "step": 352610 }, { "epoch": 3.767508948127571, "grad_norm": 5.230077266693115, "learning_rate": 8.235564173946717e-07, "loss": 0.0149, "step": 352620 }, { "epoch": 3.7676157914418504, "grad_norm": 3.917924165725708, "learning_rate": 8.235436083337462e-07, "loss": 0.0457, "step": 352630 }, { "epoch": 3.7677226347561303, "grad_norm": 0.48287925124168396, "learning_rate": 8.235307989075154e-07, "loss": 0.008, "step": 352640 }, { "epoch": 3.76782947807041, "grad_norm": 0.7632799744606018, "learning_rate": 8.23517989115994e-07, "loss": 0.0471, "step": 352650 }, { "epoch": 3.7679363213846893, "grad_norm": 3.9759016036987305, "learning_rate": 8.235051789591964e-07, "loss": 0.0081, "step": 352660 }, { "epoch": 3.7680431646989687, "grad_norm": 0.10125074535608292, "learning_rate": 8.234923684371369e-07, "loss": 0.0022, "step": 352670 }, { "epoch": 3.7681500080132486, "grad_norm": 6.425429344177246, "learning_rate": 8.234795575498301e-07, "loss": 0.0124, "step": 352680 }, { "epoch": 3.768256851327528, "grad_norm": 0.23874537646770477, "learning_rate": 8.234667462972907e-07, "loss": 0.0311, "step": 352690 }, { "epoch": 3.768363694641808, "grad_norm": 0.008962609805166721, "learning_rate": 8.234539346795328e-07, "loss": 0.0023, "step": 352700 }, { "epoch": 3.7684705379560874, "grad_norm": 0.4669743478298187, "learning_rate": 8.23441122696571e-07, "loss": 0.01, "step": 352710 }, { "epoch": 3.768577381270367, "grad_norm": 0.4708346426486969, "learning_rate": 8.234283103484196e-07, "loss": 0.0115, "step": 352720 }, { "epoch": 3.7686842245846464, "grad_norm": 0.0263861995190382, "learning_rate": 8.234154976350935e-07, "loss": 0.0018, "step": 352730 }, { "epoch": 3.7687910678989263, "grad_norm": 0.3988765478134155, "learning_rate": 8.234026845566067e-07, "loss": 0.0284, "step": 352740 }, { "epoch": 3.7688979112132057, "grad_norm": 3.233942985534668, "learning_rate": 8.233898711129739e-07, "loss": 0.0116, "step": 352750 }, { "epoch": 3.7690047545274856, "grad_norm": 13.251293182373047, "learning_rate": 8.233770573042096e-07, "loss": 0.0252, "step": 352760 }, { "epoch": 3.769111597841765, "grad_norm": 0.22736382484436035, "learning_rate": 8.23364243130328e-07, "loss": 0.0178, "step": 352770 }, { "epoch": 3.7692184411560445, "grad_norm": 0.18418915569782257, "learning_rate": 8.233514285913441e-07, "loss": 0.0036, "step": 352780 }, { "epoch": 3.769325284470324, "grad_norm": 0.087820865213871, "learning_rate": 8.233386136872717e-07, "loss": 0.0125, "step": 352790 }, { "epoch": 3.769432127784604, "grad_norm": 0.3072136640548706, "learning_rate": 8.233257984181259e-07, "loss": 0.013, "step": 352800 }, { "epoch": 3.7695389710988834, "grad_norm": 4.299034595489502, "learning_rate": 8.233129827839207e-07, "loss": 0.0131, "step": 352810 }, { "epoch": 3.7696458144131633, "grad_norm": 0.019783994182944298, "learning_rate": 8.233001667846709e-07, "loss": 0.0121, "step": 352820 }, { "epoch": 3.7697526577274427, "grad_norm": 0.030603690072894096, "learning_rate": 8.232873504203906e-07, "loss": 0.0017, "step": 352830 }, { "epoch": 3.769859501041722, "grad_norm": 8.993584632873535, "learning_rate": 8.232745336910945e-07, "loss": 0.0179, "step": 352840 }, { "epoch": 3.769966344356002, "grad_norm": 0.03968283161520958, "learning_rate": 8.23261716596797e-07, "loss": 0.0131, "step": 352850 }, { "epoch": 3.7700731876702815, "grad_norm": 0.27180615067481995, "learning_rate": 8.232488991375129e-07, "loss": 0.022, "step": 352860 }, { "epoch": 3.770180030984561, "grad_norm": 3.7686760425567627, "learning_rate": 8.232360813132562e-07, "loss": 0.0245, "step": 352870 }, { "epoch": 3.770286874298841, "grad_norm": 6.082815170288086, "learning_rate": 8.232232631240414e-07, "loss": 0.0086, "step": 352880 }, { "epoch": 3.7703937176131204, "grad_norm": 0.17241956293582916, "learning_rate": 8.232104445698835e-07, "loss": 0.0051, "step": 352890 }, { "epoch": 3.7705005609274, "grad_norm": 0.001459483988583088, "learning_rate": 8.231976256507965e-07, "loss": 0.0102, "step": 352900 }, { "epoch": 3.7706074042416797, "grad_norm": 0.16083820164203644, "learning_rate": 8.231848063667948e-07, "loss": 0.0087, "step": 352910 }, { "epoch": 3.770714247555959, "grad_norm": 0.0015298613579943776, "learning_rate": 8.231719867178933e-07, "loss": 0.018, "step": 352920 }, { "epoch": 3.7708210908702386, "grad_norm": 0.017872711643576622, "learning_rate": 8.23159166704106e-07, "loss": 0.0152, "step": 352930 }, { "epoch": 3.7709279341845185, "grad_norm": 0.11076421290636063, "learning_rate": 8.231463463254477e-07, "loss": 0.0168, "step": 352940 }, { "epoch": 3.771034777498798, "grad_norm": 0.008141977712512016, "learning_rate": 8.23133525581933e-07, "loss": 0.0043, "step": 352950 }, { "epoch": 3.7711416208130775, "grad_norm": 0.020292755216360092, "learning_rate": 8.23120704473576e-07, "loss": 0.018, "step": 352960 }, { "epoch": 3.7712484641273574, "grad_norm": 2.9024322032928467, "learning_rate": 8.231078830003912e-07, "loss": 0.0309, "step": 352970 }, { "epoch": 3.771355307441637, "grad_norm": 0.05071344971656799, "learning_rate": 8.230950611623935e-07, "loss": 0.0131, "step": 352980 }, { "epoch": 3.7714621507559163, "grad_norm": 1.6553635597229004, "learning_rate": 8.230822389595968e-07, "loss": 0.0219, "step": 352990 }, { "epoch": 3.771568994070196, "grad_norm": 8.854554176330566, "learning_rate": 8.23069416392016e-07, "loss": 0.0165, "step": 353000 }, { "epoch": 3.7716758373844756, "grad_norm": 0.21472786366939545, "learning_rate": 8.230565934596655e-07, "loss": 0.0075, "step": 353010 }, { "epoch": 3.771782680698755, "grad_norm": 2.4782960414886475, "learning_rate": 8.230437701625597e-07, "loss": 0.0035, "step": 353020 }, { "epoch": 3.771889524013035, "grad_norm": 2.273604393005371, "learning_rate": 8.230309465007131e-07, "loss": 0.0045, "step": 353030 }, { "epoch": 3.7719963673273145, "grad_norm": 0.004004464019089937, "learning_rate": 8.230181224741401e-07, "loss": 0.0218, "step": 353040 }, { "epoch": 3.7721032106415944, "grad_norm": 0.0013719259295612574, "learning_rate": 8.230052980828552e-07, "loss": 0.0148, "step": 353050 }, { "epoch": 3.772210053955874, "grad_norm": 0.003026482881978154, "learning_rate": 8.229924733268732e-07, "loss": 0.023, "step": 353060 }, { "epoch": 3.7723168972701533, "grad_norm": 0.04135803505778313, "learning_rate": 8.229796482062081e-07, "loss": 0.0108, "step": 353070 }, { "epoch": 3.7724237405844327, "grad_norm": 6.421661853790283, "learning_rate": 8.229668227208748e-07, "loss": 0.0177, "step": 353080 }, { "epoch": 3.7725305838987127, "grad_norm": 0.003844783641397953, "learning_rate": 8.229539968708874e-07, "loss": 0.0075, "step": 353090 }, { "epoch": 3.772637427212992, "grad_norm": 0.1016845852136612, "learning_rate": 8.229411706562605e-07, "loss": 0.0022, "step": 353100 }, { "epoch": 3.772744270527272, "grad_norm": 2.6167736053466797, "learning_rate": 8.229283440770088e-07, "loss": 0.0158, "step": 353110 }, { "epoch": 3.7728511138415515, "grad_norm": 0.3337019383907318, "learning_rate": 8.229155171331466e-07, "loss": 0.0101, "step": 353120 }, { "epoch": 3.772957957155831, "grad_norm": 0.7907918095588684, "learning_rate": 8.229026898246884e-07, "loss": 0.036, "step": 353130 }, { "epoch": 3.7730648004701104, "grad_norm": 0.006729051936417818, "learning_rate": 8.228898621516487e-07, "loss": 0.0453, "step": 353140 }, { "epoch": 3.7731716437843903, "grad_norm": 0.02368989586830139, "learning_rate": 8.228770341140421e-07, "loss": 0.0289, "step": 353150 }, { "epoch": 3.7732784870986698, "grad_norm": 0.0251250471919775, "learning_rate": 8.228642057118829e-07, "loss": 0.0012, "step": 353160 }, { "epoch": 3.7733853304129497, "grad_norm": 0.016344165429472923, "learning_rate": 8.228513769451854e-07, "loss": 0.0263, "step": 353170 }, { "epoch": 3.773492173727229, "grad_norm": 0.11264964938163757, "learning_rate": 8.228385478139645e-07, "loss": 0.0083, "step": 353180 }, { "epoch": 3.7735990170415086, "grad_norm": 9.297721862792969, "learning_rate": 8.228257183182345e-07, "loss": 0.0167, "step": 353190 }, { "epoch": 3.773705860355788, "grad_norm": 6.487244129180908, "learning_rate": 8.2281288845801e-07, "loss": 0.0172, "step": 353200 }, { "epoch": 3.773812703670068, "grad_norm": 0.2539937198162079, "learning_rate": 8.228000582333054e-07, "loss": 0.0082, "step": 353210 }, { "epoch": 3.7739195469843474, "grad_norm": 0.010971824638545513, "learning_rate": 8.22787227644135e-07, "loss": 0.0113, "step": 353220 }, { "epoch": 3.7740263902986273, "grad_norm": 0.0027329083532094955, "learning_rate": 8.227743966905135e-07, "loss": 0.01, "step": 353230 }, { "epoch": 3.7741332336129068, "grad_norm": 0.00536380335688591, "learning_rate": 8.227615653724553e-07, "loss": 0.0038, "step": 353240 }, { "epoch": 3.774240076927186, "grad_norm": 0.008868241682648659, "learning_rate": 8.22748733689975e-07, "loss": 0.0033, "step": 353250 }, { "epoch": 3.7743469202414657, "grad_norm": 3.234064817428589, "learning_rate": 8.227359016430872e-07, "loss": 0.0068, "step": 353260 }, { "epoch": 3.7744537635557456, "grad_norm": 0.008080000057816505, "learning_rate": 8.22723069231806e-07, "loss": 0.0167, "step": 353270 }, { "epoch": 3.774560606870025, "grad_norm": 1.4713540077209473, "learning_rate": 8.227102364561461e-07, "loss": 0.0026, "step": 353280 }, { "epoch": 3.774667450184305, "grad_norm": 0.011546194553375244, "learning_rate": 8.22697403316122e-07, "loss": 0.0035, "step": 353290 }, { "epoch": 3.7747742934985844, "grad_norm": 0.14642556011676788, "learning_rate": 8.226845698117483e-07, "loss": 0.0089, "step": 353300 }, { "epoch": 3.774881136812864, "grad_norm": 0.054846104234457016, "learning_rate": 8.226717359430393e-07, "loss": 0.0192, "step": 353310 }, { "epoch": 3.7749879801271433, "grad_norm": 0.3418685495853424, "learning_rate": 8.226589017100095e-07, "loss": 0.0638, "step": 353320 }, { "epoch": 3.775094823441423, "grad_norm": 4.300019264221191, "learning_rate": 8.226460671126735e-07, "loss": 0.0415, "step": 353330 }, { "epoch": 3.7752016667557027, "grad_norm": 0.1961643397808075, "learning_rate": 8.226332321510456e-07, "loss": 0.009, "step": 353340 }, { "epoch": 3.7753085100699826, "grad_norm": 5.725978851318359, "learning_rate": 8.226203968251406e-07, "loss": 0.0097, "step": 353350 }, { "epoch": 3.775415353384262, "grad_norm": 11.80708122253418, "learning_rate": 8.226075611349728e-07, "loss": 0.01, "step": 353360 }, { "epoch": 3.7755221966985415, "grad_norm": 0.5839071869850159, "learning_rate": 8.225947250805567e-07, "loss": 0.0087, "step": 353370 }, { "epoch": 3.775629040012821, "grad_norm": 0.019068876281380653, "learning_rate": 8.225818886619067e-07, "loss": 0.008, "step": 353380 }, { "epoch": 3.775735883327101, "grad_norm": 0.8023826479911804, "learning_rate": 8.225690518790376e-07, "loss": 0.0157, "step": 353390 }, { "epoch": 3.7758427266413803, "grad_norm": 0.05057147517800331, "learning_rate": 8.225562147319635e-07, "loss": 0.0221, "step": 353400 }, { "epoch": 3.7759495699556602, "grad_norm": 1.9961528778076172, "learning_rate": 8.225433772206992e-07, "loss": 0.0199, "step": 353410 }, { "epoch": 3.7760564132699397, "grad_norm": 0.04036683216691017, "learning_rate": 8.225305393452591e-07, "loss": 0.0087, "step": 353420 }, { "epoch": 3.776163256584219, "grad_norm": 2.1321473121643066, "learning_rate": 8.225177011056575e-07, "loss": 0.0046, "step": 353430 }, { "epoch": 3.7762700998984986, "grad_norm": 0.00547777721658349, "learning_rate": 8.225048625019093e-07, "loss": 0.0012, "step": 353440 }, { "epoch": 3.7763769432127785, "grad_norm": 7.384347438812256, "learning_rate": 8.224920235340287e-07, "loss": 0.0292, "step": 353450 }, { "epoch": 3.776483786527058, "grad_norm": 0.028714556246995926, "learning_rate": 8.224791842020302e-07, "loss": 0.0061, "step": 353460 }, { "epoch": 3.776590629841338, "grad_norm": 0.631719708442688, "learning_rate": 8.224663445059285e-07, "loss": 0.015, "step": 353470 }, { "epoch": 3.7766974731556173, "grad_norm": 0.1796516478061676, "learning_rate": 8.224535044457378e-07, "loss": 0.0188, "step": 353480 }, { "epoch": 3.776804316469897, "grad_norm": 6.066251754760742, "learning_rate": 8.224406640214728e-07, "loss": 0.0427, "step": 353490 }, { "epoch": 3.7769111597841762, "grad_norm": 0.05659134313464165, "learning_rate": 8.224278232331481e-07, "loss": 0.0019, "step": 353500 }, { "epoch": 3.777018003098456, "grad_norm": 0.09822802990674973, "learning_rate": 8.224149820807779e-07, "loss": 0.0155, "step": 353510 }, { "epoch": 3.7771248464127356, "grad_norm": 0.010061235167086124, "learning_rate": 8.224021405643769e-07, "loss": 0.0439, "step": 353520 }, { "epoch": 3.7772316897270155, "grad_norm": 3.2444710731506348, "learning_rate": 8.223892986839596e-07, "loss": 0.0183, "step": 353530 }, { "epoch": 3.777338533041295, "grad_norm": 2.874119281768799, "learning_rate": 8.223764564395402e-07, "loss": 0.0165, "step": 353540 }, { "epoch": 3.7774453763555744, "grad_norm": 2.2283709049224854, "learning_rate": 8.223636138311336e-07, "loss": 0.005, "step": 353550 }, { "epoch": 3.777552219669854, "grad_norm": 0.5126526355743408, "learning_rate": 8.223507708587543e-07, "loss": 0.0058, "step": 353560 }, { "epoch": 3.777659062984134, "grad_norm": 6.242028713226318, "learning_rate": 8.223379275224165e-07, "loss": 0.043, "step": 353570 }, { "epoch": 3.7777659062984132, "grad_norm": 0.013231308199465275, "learning_rate": 8.223250838221349e-07, "loss": 0.0043, "step": 353580 }, { "epoch": 3.777872749612693, "grad_norm": 0.7749471068382263, "learning_rate": 8.22312239757924e-07, "loss": 0.0045, "step": 353590 }, { "epoch": 3.7779795929269726, "grad_norm": 0.5914238095283508, "learning_rate": 8.222993953297981e-07, "loss": 0.0093, "step": 353600 }, { "epoch": 3.778086436241252, "grad_norm": 0.009045335464179516, "learning_rate": 8.222865505377721e-07, "loss": 0.0144, "step": 353610 }, { "epoch": 3.778193279555532, "grad_norm": 0.10862637311220169, "learning_rate": 8.2227370538186e-07, "loss": 0.0127, "step": 353620 }, { "epoch": 3.7783001228698114, "grad_norm": 5.626058578491211, "learning_rate": 8.222608598620766e-07, "loss": 0.0202, "step": 353630 }, { "epoch": 3.778406966184091, "grad_norm": 15.475691795349121, "learning_rate": 8.222480139784366e-07, "loss": 0.0231, "step": 353640 }, { "epoch": 3.778513809498371, "grad_norm": 2.975325584411621, "learning_rate": 8.22235167730954e-07, "loss": 0.01, "step": 353650 }, { "epoch": 3.7786206528126502, "grad_norm": 0.023100337013602257, "learning_rate": 8.222223211196437e-07, "loss": 0.0014, "step": 353660 }, { "epoch": 3.7787274961269297, "grad_norm": 0.008413130417466164, "learning_rate": 8.222094741445201e-07, "loss": 0.0002, "step": 353670 }, { "epoch": 3.7788343394412096, "grad_norm": 4.551419734954834, "learning_rate": 8.221966268055977e-07, "loss": 0.0057, "step": 353680 }, { "epoch": 3.778941182755489, "grad_norm": 11.924102783203125, "learning_rate": 8.22183779102891e-07, "loss": 0.0117, "step": 353690 }, { "epoch": 3.7790480260697685, "grad_norm": 0.0020202111918479204, "learning_rate": 8.221709310364144e-07, "loss": 0.0065, "step": 353700 }, { "epoch": 3.7791548693840484, "grad_norm": 0.016273485496640205, "learning_rate": 8.221580826061824e-07, "loss": 0.0055, "step": 353710 }, { "epoch": 3.779261712698328, "grad_norm": 0.3709471821784973, "learning_rate": 8.221452338122099e-07, "loss": 0.0091, "step": 353720 }, { "epoch": 3.7793685560126073, "grad_norm": 0.006427701096981764, "learning_rate": 8.221323846545109e-07, "loss": 0.024, "step": 353730 }, { "epoch": 3.7794753993268873, "grad_norm": 0.028867455199360847, "learning_rate": 8.221195351331003e-07, "loss": 0.0041, "step": 353740 }, { "epoch": 3.7795822426411667, "grad_norm": 0.011084318161010742, "learning_rate": 8.221066852479923e-07, "loss": 0.0045, "step": 353750 }, { "epoch": 3.7796890859554466, "grad_norm": 0.0036250227130949497, "learning_rate": 8.220938349992016e-07, "loss": 0.0121, "step": 353760 }, { "epoch": 3.779795929269726, "grad_norm": 5.042433738708496, "learning_rate": 8.220809843867425e-07, "loss": 0.0125, "step": 353770 }, { "epoch": 3.7799027725840055, "grad_norm": 0.5974056720733643, "learning_rate": 8.220681334106299e-07, "loss": 0.0074, "step": 353780 }, { "epoch": 3.780009615898285, "grad_norm": 0.0563737116754055, "learning_rate": 8.220552820708779e-07, "loss": 0.0257, "step": 353790 }, { "epoch": 3.780116459212565, "grad_norm": 0.0037912195548415184, "learning_rate": 8.220424303675013e-07, "loss": 0.0111, "step": 353800 }, { "epoch": 3.7802233025268444, "grad_norm": 0.02079300582408905, "learning_rate": 8.220295783005144e-07, "loss": 0.0123, "step": 353810 }, { "epoch": 3.7803301458411243, "grad_norm": 0.04493821784853935, "learning_rate": 8.220167258699318e-07, "loss": 0.0098, "step": 353820 }, { "epoch": 3.7804369891554037, "grad_norm": 0.0008888725424185395, "learning_rate": 8.220038730757682e-07, "loss": 0.0009, "step": 353830 }, { "epoch": 3.780543832469683, "grad_norm": 3.498185157775879, "learning_rate": 8.219910199180377e-07, "loss": 0.0065, "step": 353840 }, { "epoch": 3.7806506757839626, "grad_norm": 0.012275095097720623, "learning_rate": 8.219781663967551e-07, "loss": 0.0451, "step": 353850 }, { "epoch": 3.7807575190982425, "grad_norm": 0.005402583163231611, "learning_rate": 8.219653125119348e-07, "loss": 0.0082, "step": 353860 }, { "epoch": 3.780864362412522, "grad_norm": 0.008910929784178734, "learning_rate": 8.219524582635915e-07, "loss": 0.0117, "step": 353870 }, { "epoch": 3.780971205726802, "grad_norm": 0.02072528563439846, "learning_rate": 8.219396036517394e-07, "loss": 0.0265, "step": 353880 }, { "epoch": 3.7810780490410814, "grad_norm": 0.214186429977417, "learning_rate": 8.219267486763932e-07, "loss": 0.0041, "step": 353890 }, { "epoch": 3.781184892355361, "grad_norm": 3.10034441947937, "learning_rate": 8.219138933375675e-07, "loss": 0.0139, "step": 353900 }, { "epoch": 3.7812917356696403, "grad_norm": 3.273740291595459, "learning_rate": 8.219010376352767e-07, "loss": 0.0298, "step": 353910 }, { "epoch": 3.78139857898392, "grad_norm": 5.303940296173096, "learning_rate": 8.218881815695355e-07, "loss": 0.0306, "step": 353920 }, { "epoch": 3.7815054222981996, "grad_norm": 0.010156398639082909, "learning_rate": 8.218753251403581e-07, "loss": 0.0051, "step": 353930 }, { "epoch": 3.7816122656124795, "grad_norm": 0.04917420446872711, "learning_rate": 8.21862468347759e-07, "loss": 0.0053, "step": 353940 }, { "epoch": 3.781719108926759, "grad_norm": 2.0025172233581543, "learning_rate": 8.218496111917531e-07, "loss": 0.0109, "step": 353950 }, { "epoch": 3.7818259522410385, "grad_norm": 0.8039235472679138, "learning_rate": 8.218367536723547e-07, "loss": 0.0511, "step": 353960 }, { "epoch": 3.781932795555318, "grad_norm": 44.8192253112793, "learning_rate": 8.218238957895783e-07, "loss": 0.0588, "step": 353970 }, { "epoch": 3.782039638869598, "grad_norm": 0.0006896041450090706, "learning_rate": 8.218110375434383e-07, "loss": 0.028, "step": 353980 }, { "epoch": 3.7821464821838773, "grad_norm": 0.0036492941435426474, "learning_rate": 8.217981789339494e-07, "loss": 0.0241, "step": 353990 }, { "epoch": 3.782253325498157, "grad_norm": 0.3668339252471924, "learning_rate": 8.217853199611261e-07, "loss": 0.0052, "step": 354000 }, { "epoch": 3.7823601688124366, "grad_norm": 0.03137601912021637, "learning_rate": 8.217724606249828e-07, "loss": 0.0181, "step": 354010 }, { "epoch": 3.782467012126716, "grad_norm": 2.02901554107666, "learning_rate": 8.217596009255343e-07, "loss": 0.0049, "step": 354020 }, { "epoch": 3.7825738554409956, "grad_norm": 0.14531990885734558, "learning_rate": 8.217467408627947e-07, "loss": 0.0073, "step": 354030 }, { "epoch": 3.7826806987552755, "grad_norm": 0.01551044825464487, "learning_rate": 8.21733880436779e-07, "loss": 0.0271, "step": 354040 }, { "epoch": 3.782787542069555, "grad_norm": 0.26519477367401123, "learning_rate": 8.217210196475013e-07, "loss": 0.0156, "step": 354050 }, { "epoch": 3.782894385383835, "grad_norm": 0.07411909103393555, "learning_rate": 8.217081584949764e-07, "loss": 0.0425, "step": 354060 }, { "epoch": 3.7830012286981143, "grad_norm": 6.546902179718018, "learning_rate": 8.216952969792186e-07, "loss": 0.0123, "step": 354070 }, { "epoch": 3.7831080720123937, "grad_norm": 1.9215712547302246, "learning_rate": 8.216824351002425e-07, "loss": 0.0141, "step": 354080 }, { "epoch": 3.783214915326673, "grad_norm": 1.512980580329895, "learning_rate": 8.216695728580628e-07, "loss": 0.0057, "step": 354090 }, { "epoch": 3.783321758640953, "grad_norm": 1.5592349767684937, "learning_rate": 8.216567102526936e-07, "loss": 0.0122, "step": 354100 }, { "epoch": 3.7834286019552326, "grad_norm": 0.002457514638081193, "learning_rate": 8.216438472841499e-07, "loss": 0.0105, "step": 354110 }, { "epoch": 3.7835354452695125, "grad_norm": 5.661716461181641, "learning_rate": 8.21630983952446e-07, "loss": 0.009, "step": 354120 }, { "epoch": 3.783642288583792, "grad_norm": 4.817490577697754, "learning_rate": 8.216181202575965e-07, "loss": 0.0136, "step": 354130 }, { "epoch": 3.7837491318980714, "grad_norm": 7.462141513824463, "learning_rate": 8.216052561996158e-07, "loss": 0.0291, "step": 354140 }, { "epoch": 3.783855975212351, "grad_norm": 0.316976398229599, "learning_rate": 8.215923917785186e-07, "loss": 0.007, "step": 354150 }, { "epoch": 3.7839628185266307, "grad_norm": 0.16421742737293243, "learning_rate": 8.215795269943191e-07, "loss": 0.0028, "step": 354160 }, { "epoch": 3.78406966184091, "grad_norm": 0.03835830092430115, "learning_rate": 8.215666618470323e-07, "loss": 0.0078, "step": 354170 }, { "epoch": 3.78417650515519, "grad_norm": 0.011574598960578442, "learning_rate": 8.215537963366723e-07, "loss": 0.0225, "step": 354180 }, { "epoch": 3.7842833484694696, "grad_norm": 2.816863775253296, "learning_rate": 8.215409304632538e-07, "loss": 0.0158, "step": 354190 }, { "epoch": 3.784390191783749, "grad_norm": 0.06127534806728363, "learning_rate": 8.215280642267913e-07, "loss": 0.0042, "step": 354200 }, { "epoch": 3.7844970350980285, "grad_norm": 0.11810635775327682, "learning_rate": 8.215151976272995e-07, "loss": 0.0011, "step": 354210 }, { "epoch": 3.7846038784123084, "grad_norm": 0.0014522015117108822, "learning_rate": 8.215023306647926e-07, "loss": 0.0064, "step": 354220 }, { "epoch": 3.784710721726588, "grad_norm": 0.09290700405836105, "learning_rate": 8.214894633392855e-07, "loss": 0.0053, "step": 354230 }, { "epoch": 3.7848175650408677, "grad_norm": 2.3703243732452393, "learning_rate": 8.214765956507923e-07, "loss": 0.002, "step": 354240 }, { "epoch": 3.784924408355147, "grad_norm": 0.1175801008939743, "learning_rate": 8.214637275993279e-07, "loss": 0.0119, "step": 354250 }, { "epoch": 3.7850312516694267, "grad_norm": 0.0015961288008838892, "learning_rate": 8.214508591849068e-07, "loss": 0.0421, "step": 354260 }, { "epoch": 3.785138094983706, "grad_norm": 3.0840110778808594, "learning_rate": 8.214379904075433e-07, "loss": 0.0125, "step": 354270 }, { "epoch": 3.785244938297986, "grad_norm": 0.4522385001182556, "learning_rate": 8.214251212672519e-07, "loss": 0.0185, "step": 354280 }, { "epoch": 3.7853517816122655, "grad_norm": 0.0012278066715225577, "learning_rate": 8.214122517640474e-07, "loss": 0.0167, "step": 354290 }, { "epoch": 3.7854586249265454, "grad_norm": 0.33474457263946533, "learning_rate": 8.213993818979442e-07, "loss": 0.0062, "step": 354300 }, { "epoch": 3.785565468240825, "grad_norm": 3.760113000869751, "learning_rate": 8.213865116689569e-07, "loss": 0.0566, "step": 354310 }, { "epoch": 3.7856723115551043, "grad_norm": 0.17675665020942688, "learning_rate": 8.213736410771e-07, "loss": 0.0038, "step": 354320 }, { "epoch": 3.785779154869384, "grad_norm": 3.8452632427215576, "learning_rate": 8.213607701223879e-07, "loss": 0.0451, "step": 354330 }, { "epoch": 3.7858859981836637, "grad_norm": 0.38006991147994995, "learning_rate": 8.213478988048352e-07, "loss": 0.006, "step": 354340 }, { "epoch": 3.785992841497943, "grad_norm": 0.31898629665374756, "learning_rate": 8.213350271244565e-07, "loss": 0.0032, "step": 354350 }, { "epoch": 3.786099684812223, "grad_norm": 0.10598685592412949, "learning_rate": 8.213221550812664e-07, "loss": 0.0102, "step": 354360 }, { "epoch": 3.7862065281265025, "grad_norm": 3.023129940032959, "learning_rate": 8.213092826752792e-07, "loss": 0.0158, "step": 354370 }, { "epoch": 3.786313371440782, "grad_norm": 3.450403928756714, "learning_rate": 8.212964099065099e-07, "loss": 0.0051, "step": 354380 }, { "epoch": 3.786420214755062, "grad_norm": 2.6448974609375, "learning_rate": 8.212835367749723e-07, "loss": 0.007, "step": 354390 }, { "epoch": 3.7865270580693413, "grad_norm": 0.039040111005306244, "learning_rate": 8.212706632806815e-07, "loss": 0.0283, "step": 354400 }, { "epoch": 3.7866339013836208, "grad_norm": 0.0507439449429512, "learning_rate": 8.212577894236518e-07, "loss": 0.0346, "step": 354410 }, { "epoch": 3.7867407446979007, "grad_norm": 0.025781074538826942, "learning_rate": 8.212449152038979e-07, "loss": 0.001, "step": 354420 }, { "epoch": 3.78684758801218, "grad_norm": 0.10155629366636276, "learning_rate": 8.212320406214343e-07, "loss": 0.023, "step": 354430 }, { "epoch": 3.7869544313264596, "grad_norm": 0.0026653327513486147, "learning_rate": 8.212191656762755e-07, "loss": 0.0131, "step": 354440 }, { "epoch": 3.7870612746407395, "grad_norm": 0.10039480775594711, "learning_rate": 8.212062903684358e-07, "loss": 0.0002, "step": 354450 }, { "epoch": 3.787168117955019, "grad_norm": 0.9035808444023132, "learning_rate": 8.211934146979302e-07, "loss": 0.0049, "step": 354460 }, { "epoch": 3.7872749612692984, "grad_norm": 5.7452826499938965, "learning_rate": 8.211805386647727e-07, "loss": 0.0036, "step": 354470 }, { "epoch": 3.7873818045835783, "grad_norm": 5.707221508026123, "learning_rate": 8.211676622689784e-07, "loss": 0.0195, "step": 354480 }, { "epoch": 3.7874886478978578, "grad_norm": 0.03432125970721245, "learning_rate": 8.211547855105613e-07, "loss": 0.0355, "step": 354490 }, { "epoch": 3.7875954912121372, "grad_norm": 0.7685706615447998, "learning_rate": 8.211419083895365e-07, "loss": 0.03, "step": 354500 }, { "epoch": 3.787702334526417, "grad_norm": 0.36683401465415955, "learning_rate": 8.211290309059182e-07, "loss": 0.0213, "step": 354510 }, { "epoch": 3.7878091778406966, "grad_norm": 0.24837535619735718, "learning_rate": 8.211161530597208e-07, "loss": 0.0012, "step": 354520 }, { "epoch": 3.7879160211549765, "grad_norm": 0.0023849308490753174, "learning_rate": 8.211032748509592e-07, "loss": 0.0218, "step": 354530 }, { "epoch": 3.788022864469256, "grad_norm": 0.012476174160838127, "learning_rate": 8.210903962796477e-07, "loss": 0.0526, "step": 354540 }, { "epoch": 3.7881297077835354, "grad_norm": 0.512589156627655, "learning_rate": 8.210775173458009e-07, "loss": 0.0035, "step": 354550 }, { "epoch": 3.788236551097815, "grad_norm": 0.0036249763797968626, "learning_rate": 8.210646380494335e-07, "loss": 0.0256, "step": 354560 }, { "epoch": 3.7883433944120948, "grad_norm": 0.6747426986694336, "learning_rate": 8.210517583905597e-07, "loss": 0.0208, "step": 354570 }, { "epoch": 3.7884502377263742, "grad_norm": 0.2301291972398758, "learning_rate": 8.210388783691944e-07, "loss": 0.0085, "step": 354580 }, { "epoch": 3.788557081040654, "grad_norm": 3.6925480365753174, "learning_rate": 8.210259979853518e-07, "loss": 0.0228, "step": 354590 }, { "epoch": 3.7886639243549336, "grad_norm": 0.007694989442825317, "learning_rate": 8.210131172390467e-07, "loss": 0.0148, "step": 354600 }, { "epoch": 3.788770767669213, "grad_norm": 0.47433051466941833, "learning_rate": 8.210002361302935e-07, "loss": 0.0012, "step": 354610 }, { "epoch": 3.7888776109834925, "grad_norm": 0.006300019565969706, "learning_rate": 8.209873546591069e-07, "loss": 0.0059, "step": 354620 }, { "epoch": 3.7889844542977724, "grad_norm": 0.07051138579845428, "learning_rate": 8.209744728255013e-07, "loss": 0.0063, "step": 354630 }, { "epoch": 3.789091297612052, "grad_norm": 0.005356327630579472, "learning_rate": 8.209615906294913e-07, "loss": 0.0073, "step": 354640 }, { "epoch": 3.789198140926332, "grad_norm": 0.06090956926345825, "learning_rate": 8.209487080710914e-07, "loss": 0.0103, "step": 354650 }, { "epoch": 3.7893049842406112, "grad_norm": 0.0016966026742011309, "learning_rate": 8.209358251503163e-07, "loss": 0.0117, "step": 354660 }, { "epoch": 3.7894118275548907, "grad_norm": 1.936383843421936, "learning_rate": 8.209229418671803e-07, "loss": 0.0136, "step": 354670 }, { "epoch": 3.78951867086917, "grad_norm": 8.500685691833496, "learning_rate": 8.209100582216981e-07, "loss": 0.0139, "step": 354680 }, { "epoch": 3.78962551418345, "grad_norm": 1.1516863107681274, "learning_rate": 8.208971742138844e-07, "loss": 0.0034, "step": 354690 }, { "epoch": 3.7897323574977295, "grad_norm": 0.9686474800109863, "learning_rate": 8.208842898437534e-07, "loss": 0.0191, "step": 354700 }, { "epoch": 3.7898392008120094, "grad_norm": 3.9040091037750244, "learning_rate": 8.208714051113198e-07, "loss": 0.0074, "step": 354710 }, { "epoch": 3.789946044126289, "grad_norm": 0.008321236819028854, "learning_rate": 8.208585200165983e-07, "loss": 0.004, "step": 354720 }, { "epoch": 3.7900528874405683, "grad_norm": 7.712172031402588, "learning_rate": 8.208456345596032e-07, "loss": 0.0091, "step": 354730 }, { "epoch": 3.790159730754848, "grad_norm": 3.9187819957733154, "learning_rate": 8.208327487403493e-07, "loss": 0.0147, "step": 354740 }, { "epoch": 3.7902665740691277, "grad_norm": 1.9277554750442505, "learning_rate": 8.208198625588508e-07, "loss": 0.0271, "step": 354750 }, { "epoch": 3.790373417383407, "grad_norm": 0.009404490701854229, "learning_rate": 8.208069760151227e-07, "loss": 0.0091, "step": 354760 }, { "epoch": 3.790480260697687, "grad_norm": 0.1147971823811531, "learning_rate": 8.207940891091791e-07, "loss": 0.0271, "step": 354770 }, { "epoch": 3.7905871040119665, "grad_norm": 2.341273069381714, "learning_rate": 8.207812018410348e-07, "loss": 0.0165, "step": 354780 }, { "epoch": 3.790693947326246, "grad_norm": 3.4379732608795166, "learning_rate": 8.207683142107044e-07, "loss": 0.0146, "step": 354790 }, { "epoch": 3.7908007906405254, "grad_norm": 0.03999214619398117, "learning_rate": 8.207554262182024e-07, "loss": 0.0325, "step": 354800 }, { "epoch": 3.7909076339548053, "grad_norm": 2.2019264698028564, "learning_rate": 8.207425378635431e-07, "loss": 0.0028, "step": 354810 }, { "epoch": 3.791014477269085, "grad_norm": 0.004201882518827915, "learning_rate": 8.207296491467415e-07, "loss": 0.0169, "step": 354820 }, { "epoch": 3.7911213205833647, "grad_norm": 0.013171156868338585, "learning_rate": 8.207167600678117e-07, "loss": 0.0176, "step": 354830 }, { "epoch": 3.791228163897644, "grad_norm": 0.052771300077438354, "learning_rate": 8.207038706267686e-07, "loss": 0.0073, "step": 354840 }, { "epoch": 3.7913350072119236, "grad_norm": 0.004433798138052225, "learning_rate": 8.206909808236266e-07, "loss": 0.0086, "step": 354850 }, { "epoch": 3.791441850526203, "grad_norm": 4.350432872772217, "learning_rate": 8.206780906584004e-07, "loss": 0.0102, "step": 354860 }, { "epoch": 3.791548693840483, "grad_norm": 0.030521554872393608, "learning_rate": 8.206652001311043e-07, "loss": 0.068, "step": 354870 }, { "epoch": 3.7916555371547624, "grad_norm": 2.525869846343994, "learning_rate": 8.20652309241753e-07, "loss": 0.0125, "step": 354880 }, { "epoch": 3.7917623804690423, "grad_norm": 0.029535381123423576, "learning_rate": 8.206394179903611e-07, "loss": 0.0179, "step": 354890 }, { "epoch": 3.791869223783322, "grad_norm": 0.6129951477050781, "learning_rate": 8.206265263769432e-07, "loss": 0.0119, "step": 354900 }, { "epoch": 3.7919760670976013, "grad_norm": 1.0624964237213135, "learning_rate": 8.206136344015136e-07, "loss": 0.0195, "step": 354910 }, { "epoch": 3.7920829104118807, "grad_norm": 0.8025289177894592, "learning_rate": 8.206007420640869e-07, "loss": 0.0111, "step": 354920 }, { "epoch": 3.7921897537261606, "grad_norm": 0.03517412021756172, "learning_rate": 8.205878493646779e-07, "loss": 0.0167, "step": 354930 }, { "epoch": 3.79229659704044, "grad_norm": 0.07622407376766205, "learning_rate": 8.205749563033011e-07, "loss": 0.0022, "step": 354940 }, { "epoch": 3.79240344035472, "grad_norm": 2.5551254749298096, "learning_rate": 8.205620628799707e-07, "loss": 0.0034, "step": 354950 }, { "epoch": 3.7925102836689994, "grad_norm": 6.388823986053467, "learning_rate": 8.205491690947019e-07, "loss": 0.0234, "step": 354960 }, { "epoch": 3.792617126983279, "grad_norm": 0.003912573680281639, "learning_rate": 8.205362749475086e-07, "loss": 0.0105, "step": 354970 }, { "epoch": 3.7927239702975584, "grad_norm": 0.7500984072685242, "learning_rate": 8.205233804384059e-07, "loss": 0.0117, "step": 354980 }, { "epoch": 3.7928308136118383, "grad_norm": 0.906696617603302, "learning_rate": 8.205104855674078e-07, "loss": 0.0063, "step": 354990 }, { "epoch": 3.7929376569261177, "grad_norm": 0.0013263380387797952, "learning_rate": 8.204975903345295e-07, "loss": 0.004, "step": 355000 }, { "epoch": 3.7930445002403976, "grad_norm": 0.09921355545520782, "learning_rate": 8.204846947397851e-07, "loss": 0.0096, "step": 355010 }, { "epoch": 3.793151343554677, "grad_norm": 0.16181211173534393, "learning_rate": 8.204717987831893e-07, "loss": 0.0027, "step": 355020 }, { "epoch": 3.7932581868689565, "grad_norm": 0.6567890048027039, "learning_rate": 8.204589024647566e-07, "loss": 0.0034, "step": 355030 }, { "epoch": 3.793365030183236, "grad_norm": 1.7864867448806763, "learning_rate": 8.204460057845017e-07, "loss": 0.0035, "step": 355040 }, { "epoch": 3.793471873497516, "grad_norm": 0.0070924884639680386, "learning_rate": 8.20433108742439e-07, "loss": 0.0183, "step": 355050 }, { "epoch": 3.7935787168117954, "grad_norm": 0.0643770843744278, "learning_rate": 8.204202113385832e-07, "loss": 0.0103, "step": 355060 }, { "epoch": 3.7936855601260753, "grad_norm": 0.009694725275039673, "learning_rate": 8.204073135729488e-07, "loss": 0.0279, "step": 355070 }, { "epoch": 3.7937924034403547, "grad_norm": 4.397455215454102, "learning_rate": 8.203944154455502e-07, "loss": 0.0022, "step": 355080 }, { "epoch": 3.793899246754634, "grad_norm": 0.01550927385687828, "learning_rate": 8.203815169564024e-07, "loss": 0.0109, "step": 355090 }, { "epoch": 3.794006090068914, "grad_norm": 9.95405101776123, "learning_rate": 8.203686181055193e-07, "loss": 0.0072, "step": 355100 }, { "epoch": 3.7941129333831936, "grad_norm": 4.750870704650879, "learning_rate": 8.203557188929162e-07, "loss": 0.0137, "step": 355110 }, { "epoch": 3.794219776697473, "grad_norm": 0.036925945430994034, "learning_rate": 8.203428193186071e-07, "loss": 0.0305, "step": 355120 }, { "epoch": 3.794326620011753, "grad_norm": 4.037998199462891, "learning_rate": 8.20329919382607e-07, "loss": 0.0259, "step": 355130 }, { "epoch": 3.7944334633260324, "grad_norm": 0.025387365370988846, "learning_rate": 8.2031701908493e-07, "loss": 0.025, "step": 355140 }, { "epoch": 3.794540306640312, "grad_norm": 0.5868324637413025, "learning_rate": 8.203041184255911e-07, "loss": 0.0084, "step": 355150 }, { "epoch": 3.7946471499545917, "grad_norm": 14.425264358520508, "learning_rate": 8.202912174046046e-07, "loss": 0.036, "step": 355160 }, { "epoch": 3.794753993268871, "grad_norm": 8.322751998901367, "learning_rate": 8.202783160219851e-07, "loss": 0.0271, "step": 355170 }, { "epoch": 3.7948608365831507, "grad_norm": 0.12524005770683289, "learning_rate": 8.202654142777473e-07, "loss": 0.0124, "step": 355180 }, { "epoch": 3.7949676798974306, "grad_norm": 1.8325668573379517, "learning_rate": 8.202525121719056e-07, "loss": 0.0122, "step": 355190 }, { "epoch": 3.79507452321171, "grad_norm": 3.162442684173584, "learning_rate": 8.202396097044747e-07, "loss": 0.0335, "step": 355200 }, { "epoch": 3.7951813665259895, "grad_norm": 0.11922046542167664, "learning_rate": 8.202267068754691e-07, "loss": 0.0004, "step": 355210 }, { "epoch": 3.7952882098402694, "grad_norm": 0.6005296111106873, "learning_rate": 8.202138036849033e-07, "loss": 0.0336, "step": 355220 }, { "epoch": 3.795395053154549, "grad_norm": 1.659373164176941, "learning_rate": 8.202009001327921e-07, "loss": 0.0169, "step": 355230 }, { "epoch": 3.7955018964688287, "grad_norm": 1.406683325767517, "learning_rate": 8.201879962191498e-07, "loss": 0.0447, "step": 355240 }, { "epoch": 3.795608739783108, "grad_norm": 3.348237991333008, "learning_rate": 8.201750919439912e-07, "loss": 0.048, "step": 355250 }, { "epoch": 3.7957155830973877, "grad_norm": 0.02623041719198227, "learning_rate": 8.201621873073306e-07, "loss": 0.0156, "step": 355260 }, { "epoch": 3.795822426411667, "grad_norm": 3.0445353984832764, "learning_rate": 8.201492823091828e-07, "loss": 0.0391, "step": 355270 }, { "epoch": 3.795929269725947, "grad_norm": 0.1002543494105339, "learning_rate": 8.201363769495623e-07, "loss": 0.0064, "step": 355280 }, { "epoch": 3.7960361130402265, "grad_norm": 3.1663665771484375, "learning_rate": 8.201234712284836e-07, "loss": 0.031, "step": 355290 }, { "epoch": 3.7961429563545064, "grad_norm": 1.2078899145126343, "learning_rate": 8.201105651459614e-07, "loss": 0.0242, "step": 355300 }, { "epoch": 3.796249799668786, "grad_norm": 0.3014572858810425, "learning_rate": 8.200976587020103e-07, "loss": 0.0219, "step": 355310 }, { "epoch": 3.7963566429830653, "grad_norm": 0.0054815285839140415, "learning_rate": 8.200847518966447e-07, "loss": 0.0523, "step": 355320 }, { "epoch": 3.7964634862973448, "grad_norm": 0.011455448344349861, "learning_rate": 8.200718447298793e-07, "loss": 0.0133, "step": 355330 }, { "epoch": 3.7965703296116247, "grad_norm": 3.8302063941955566, "learning_rate": 8.200589372017286e-07, "loss": 0.0147, "step": 355340 }, { "epoch": 3.796677172925904, "grad_norm": 0.25800082087516785, "learning_rate": 8.200460293122071e-07, "loss": 0.001, "step": 355350 }, { "epoch": 3.796784016240184, "grad_norm": 0.01794770359992981, "learning_rate": 8.200331210613296e-07, "loss": 0.0089, "step": 355360 }, { "epoch": 3.7968908595544635, "grad_norm": 6.820008277893066, "learning_rate": 8.200202124491105e-07, "loss": 0.0254, "step": 355370 }, { "epoch": 3.796997702868743, "grad_norm": 0.012489263899624348, "learning_rate": 8.200073034755645e-07, "loss": 0.0408, "step": 355380 }, { "epoch": 3.7971045461830224, "grad_norm": 0.21966028213500977, "learning_rate": 8.19994394140706e-07, "loss": 0.0029, "step": 355390 }, { "epoch": 3.7972113894973023, "grad_norm": 1.1565229892730713, "learning_rate": 8.199814844445498e-07, "loss": 0.0084, "step": 355400 }, { "epoch": 3.7973182328115818, "grad_norm": 2.1063828468322754, "learning_rate": 8.199685743871103e-07, "loss": 0.0161, "step": 355410 }, { "epoch": 3.7974250761258617, "grad_norm": 4.789454936981201, "learning_rate": 8.199556639684021e-07, "loss": 0.0071, "step": 355420 }, { "epoch": 3.797531919440141, "grad_norm": 0.008425264619290829, "learning_rate": 8.199427531884398e-07, "loss": 0.0046, "step": 355430 }, { "epoch": 3.7976387627544206, "grad_norm": 0.04024236649274826, "learning_rate": 8.19929842047238e-07, "loss": 0.0074, "step": 355440 }, { "epoch": 3.7977456060687, "grad_norm": 0.012082095257937908, "learning_rate": 8.199169305448113e-07, "loss": 0.011, "step": 355450 }, { "epoch": 3.79785244938298, "grad_norm": 0.07535941898822784, "learning_rate": 8.199040186811742e-07, "loss": 0.0058, "step": 355460 }, { "epoch": 3.7979592926972594, "grad_norm": 1.383910059928894, "learning_rate": 8.198911064563414e-07, "loss": 0.0088, "step": 355470 }, { "epoch": 3.7980661360115393, "grad_norm": 0.03956303745508194, "learning_rate": 8.198781938703272e-07, "loss": 0.047, "step": 355480 }, { "epoch": 3.7981729793258188, "grad_norm": 6.141899108886719, "learning_rate": 8.198652809231465e-07, "loss": 0.0076, "step": 355490 }, { "epoch": 3.7982798226400982, "grad_norm": 0.001974760787561536, "learning_rate": 8.198523676148139e-07, "loss": 0.0191, "step": 355500 }, { "epoch": 3.7983866659543777, "grad_norm": 4.69403076171875, "learning_rate": 8.198394539453436e-07, "loss": 0.0147, "step": 355510 }, { "epoch": 3.7984935092686576, "grad_norm": 0.03634508699178696, "learning_rate": 8.198265399147506e-07, "loss": 0.0131, "step": 355520 }, { "epoch": 3.798600352582937, "grad_norm": 0.004286749754101038, "learning_rate": 8.198136255230493e-07, "loss": 0.0119, "step": 355530 }, { "epoch": 3.798707195897217, "grad_norm": 0.059793561697006226, "learning_rate": 8.198007107702541e-07, "loss": 0.0041, "step": 355540 }, { "epoch": 3.7988140392114964, "grad_norm": 2.8586347103118896, "learning_rate": 8.197877956563799e-07, "loss": 0.0207, "step": 355550 }, { "epoch": 3.798920882525776, "grad_norm": 6.308444499969482, "learning_rate": 8.19774880181441e-07, "loss": 0.016, "step": 355560 }, { "epoch": 3.7990277258400553, "grad_norm": 0.024731779471039772, "learning_rate": 8.197619643454523e-07, "loss": 0.0087, "step": 355570 }, { "epoch": 3.7991345691543352, "grad_norm": 1.2903658151626587, "learning_rate": 8.19749048148428e-07, "loss": 0.0091, "step": 355580 }, { "epoch": 3.7992414124686147, "grad_norm": 0.017534365877509117, "learning_rate": 8.19736131590383e-07, "loss": 0.0098, "step": 355590 }, { "epoch": 3.7993482557828946, "grad_norm": 0.7937431931495667, "learning_rate": 8.197232146713317e-07, "loss": 0.0196, "step": 355600 }, { "epoch": 3.799455099097174, "grad_norm": 0.04114055633544922, "learning_rate": 8.19710297391289e-07, "loss": 0.0078, "step": 355610 }, { "epoch": 3.7995619424114535, "grad_norm": 0.09899034351110458, "learning_rate": 8.19697379750269e-07, "loss": 0.0034, "step": 355620 }, { "epoch": 3.799668785725733, "grad_norm": 1.3628053665161133, "learning_rate": 8.196844617482867e-07, "loss": 0.0127, "step": 355630 }, { "epoch": 3.799775629040013, "grad_norm": 0.006011591758579016, "learning_rate": 8.196715433853564e-07, "loss": 0.0213, "step": 355640 }, { "epoch": 3.7998824723542923, "grad_norm": 0.12108390033245087, "learning_rate": 8.196586246614929e-07, "loss": 0.0108, "step": 355650 }, { "epoch": 3.7999893156685722, "grad_norm": 8.270100593566895, "learning_rate": 8.196457055767108e-07, "loss": 0.0155, "step": 355660 }, { "epoch": 3.8000961589828517, "grad_norm": 12.93183708190918, "learning_rate": 8.196327861310243e-07, "loss": 0.0123, "step": 355670 }, { "epoch": 3.800203002297131, "grad_norm": 0.11508048325777054, "learning_rate": 8.196198663244484e-07, "loss": 0.0213, "step": 355680 }, { "epoch": 3.8003098456114106, "grad_norm": 2.612183094024658, "learning_rate": 8.196069461569974e-07, "loss": 0.0091, "step": 355690 }, { "epoch": 3.8004166889256905, "grad_norm": 0.016167420893907547, "learning_rate": 8.195940256286863e-07, "loss": 0.0035, "step": 355700 }, { "epoch": 3.80052353223997, "grad_norm": 0.008253506384789944, "learning_rate": 8.195811047395292e-07, "loss": 0.039, "step": 355710 }, { "epoch": 3.80063037555425, "grad_norm": 0.9653648138046265, "learning_rate": 8.19568183489541e-07, "loss": 0.0002, "step": 355720 }, { "epoch": 3.8007372188685293, "grad_norm": 4.6623334884643555, "learning_rate": 8.195552618787362e-07, "loss": 0.006, "step": 355730 }, { "epoch": 3.800844062182809, "grad_norm": 0.004194939509034157, "learning_rate": 8.195423399071294e-07, "loss": 0.0111, "step": 355740 }, { "epoch": 3.8009509054970883, "grad_norm": 0.0035534005146473646, "learning_rate": 8.195294175747352e-07, "loss": 0.0194, "step": 355750 }, { "epoch": 3.801057748811368, "grad_norm": 0.18836072087287903, "learning_rate": 8.19516494881568e-07, "loss": 0.0099, "step": 355760 }, { "epoch": 3.8011645921256476, "grad_norm": 1.6119189262390137, "learning_rate": 8.195035718276429e-07, "loss": 0.0137, "step": 355770 }, { "epoch": 3.8012714354399275, "grad_norm": 7.742469787597656, "learning_rate": 8.194906484129739e-07, "loss": 0.0245, "step": 355780 }, { "epoch": 3.801378278754207, "grad_norm": 1.32821786403656, "learning_rate": 8.194777246375758e-07, "loss": 0.0112, "step": 355790 }, { "epoch": 3.8014851220684864, "grad_norm": 0.5611532330513, "learning_rate": 8.194648005014634e-07, "loss": 0.0029, "step": 355800 }, { "epoch": 3.8015919653827663, "grad_norm": 0.013199753127992153, "learning_rate": 8.194518760046511e-07, "loss": 0.0276, "step": 355810 }, { "epoch": 3.801698808697046, "grad_norm": 0.4671868681907654, "learning_rate": 8.194389511471533e-07, "loss": 0.0269, "step": 355820 }, { "epoch": 3.8018056520113253, "grad_norm": 0.010834882967174053, "learning_rate": 8.194260259289851e-07, "loss": 0.0336, "step": 355830 }, { "epoch": 3.801912495325605, "grad_norm": 3.552905321121216, "learning_rate": 8.194131003501608e-07, "loss": 0.007, "step": 355840 }, { "epoch": 3.8020193386398846, "grad_norm": 1.0017296075820923, "learning_rate": 8.194001744106949e-07, "loss": 0.0717, "step": 355850 }, { "epoch": 3.802126181954164, "grad_norm": 14.740705490112305, "learning_rate": 8.193872481106021e-07, "loss": 0.0133, "step": 355860 }, { "epoch": 3.802233025268444, "grad_norm": 0.0052759768441319466, "learning_rate": 8.193743214498971e-07, "loss": 0.0067, "step": 355870 }, { "epoch": 3.8023398685827234, "grad_norm": 0.07147011905908585, "learning_rate": 8.193613944285942e-07, "loss": 0.0114, "step": 355880 }, { "epoch": 3.802446711897003, "grad_norm": 5.164864540100098, "learning_rate": 8.193484670467082e-07, "loss": 0.0098, "step": 355890 }, { "epoch": 3.802553555211283, "grad_norm": 1.9087536334991455, "learning_rate": 8.193355393042538e-07, "loss": 0.0058, "step": 355900 }, { "epoch": 3.8026603985255623, "grad_norm": 3.778806447982788, "learning_rate": 8.193226112012455e-07, "loss": 0.0219, "step": 355910 }, { "epoch": 3.8027672418398417, "grad_norm": 0.008024638518691063, "learning_rate": 8.193096827376977e-07, "loss": 0.0064, "step": 355920 }, { "epoch": 3.8028740851541216, "grad_norm": 2.580207347869873, "learning_rate": 8.192967539136253e-07, "loss": 0.0126, "step": 355930 }, { "epoch": 3.802980928468401, "grad_norm": 0.0018586207879707217, "learning_rate": 8.192838247290427e-07, "loss": 0.0018, "step": 355940 }, { "epoch": 3.8030877717826805, "grad_norm": 2.0812788009643555, "learning_rate": 8.192708951839647e-07, "loss": 0.0062, "step": 355950 }, { "epoch": 3.8031946150969604, "grad_norm": 0.002880165819078684, "learning_rate": 8.192579652784056e-07, "loss": 0.0133, "step": 355960 }, { "epoch": 3.80330145841124, "grad_norm": 0.04433143511414528, "learning_rate": 8.1924503501238e-07, "loss": 0.0045, "step": 355970 }, { "epoch": 3.8034083017255194, "grad_norm": 0.006613708566874266, "learning_rate": 8.19232104385903e-07, "loss": 0.0033, "step": 355980 }, { "epoch": 3.8035151450397993, "grad_norm": 5.934203147888184, "learning_rate": 8.192191733989887e-07, "loss": 0.0118, "step": 355990 }, { "epoch": 3.8036219883540787, "grad_norm": 0.05450110882520676, "learning_rate": 8.192062420516519e-07, "loss": 0.0031, "step": 356000 }, { "epoch": 3.8037288316683586, "grad_norm": 1.806374192237854, "learning_rate": 8.191933103439072e-07, "loss": 0.0051, "step": 356010 }, { "epoch": 3.803835674982638, "grad_norm": 0.008019383065402508, "learning_rate": 8.191803782757692e-07, "loss": 0.0094, "step": 356020 }, { "epoch": 3.8039425182969175, "grad_norm": 4.4528985023498535, "learning_rate": 8.191674458472524e-07, "loss": 0.0276, "step": 356030 }, { "epoch": 3.804049361611197, "grad_norm": 0.01060411799699068, "learning_rate": 8.191545130583715e-07, "loss": 0.0048, "step": 356040 }, { "epoch": 3.804156204925477, "grad_norm": 7.353969573974609, "learning_rate": 8.191415799091409e-07, "loss": 0.0356, "step": 356050 }, { "epoch": 3.8042630482397564, "grad_norm": 3.1047651767730713, "learning_rate": 8.191286463995755e-07, "loss": 0.0579, "step": 356060 }, { "epoch": 3.8043698915540363, "grad_norm": 3.387563467025757, "learning_rate": 8.191157125296898e-07, "loss": 0.0154, "step": 356070 }, { "epoch": 3.8044767348683157, "grad_norm": 0.09453634172677994, "learning_rate": 8.191027782994983e-07, "loss": 0.0015, "step": 356080 }, { "epoch": 3.804583578182595, "grad_norm": 4.095574378967285, "learning_rate": 8.190898437090157e-07, "loss": 0.009, "step": 356090 }, { "epoch": 3.8046904214968746, "grad_norm": 1.9530518054962158, "learning_rate": 8.190769087582566e-07, "loss": 0.0271, "step": 356100 }, { "epoch": 3.8047972648111545, "grad_norm": 4.260434150695801, "learning_rate": 8.190639734472355e-07, "loss": 0.0119, "step": 356110 }, { "epoch": 3.804904108125434, "grad_norm": 0.9004327058792114, "learning_rate": 8.190510377759673e-07, "loss": 0.0145, "step": 356120 }, { "epoch": 3.805010951439714, "grad_norm": 4.2262468338012695, "learning_rate": 8.190381017444663e-07, "loss": 0.0264, "step": 356130 }, { "epoch": 3.8051177947539934, "grad_norm": 0.14691604673862457, "learning_rate": 8.190251653527471e-07, "loss": 0.0632, "step": 356140 }, { "epoch": 3.805224638068273, "grad_norm": 1.2228015661239624, "learning_rate": 8.190122286008245e-07, "loss": 0.0174, "step": 356150 }, { "epoch": 3.8053314813825523, "grad_norm": 0.004690867383033037, "learning_rate": 8.18999291488713e-07, "loss": 0.0078, "step": 356160 }, { "epoch": 3.805438324696832, "grad_norm": 1.386114239692688, "learning_rate": 8.189863540164272e-07, "loss": 0.0051, "step": 356170 }, { "epoch": 3.8055451680111116, "grad_norm": 11.17824649810791, "learning_rate": 8.189734161839818e-07, "loss": 0.0215, "step": 356180 }, { "epoch": 3.8056520113253915, "grad_norm": 19.5386905670166, "learning_rate": 8.189604779913913e-07, "loss": 0.0241, "step": 356190 }, { "epoch": 3.805758854639671, "grad_norm": 0.001169638941064477, "learning_rate": 8.189475394386705e-07, "loss": 0.0153, "step": 356200 }, { "epoch": 3.8058656979539505, "grad_norm": 0.08735410869121552, "learning_rate": 8.189346005258337e-07, "loss": 0.0048, "step": 356210 }, { "epoch": 3.80597254126823, "grad_norm": 0.038844745606184006, "learning_rate": 8.189216612528957e-07, "loss": 0.0056, "step": 356220 }, { "epoch": 3.80607938458251, "grad_norm": 2.6375467777252197, "learning_rate": 8.189087216198709e-07, "loss": 0.0109, "step": 356230 }, { "epoch": 3.8061862278967893, "grad_norm": 0.6816983819007874, "learning_rate": 8.188957816267743e-07, "loss": 0.0174, "step": 356240 }, { "epoch": 3.806293071211069, "grad_norm": 0.015151162631809711, "learning_rate": 8.188828412736201e-07, "loss": 0.0148, "step": 356250 }, { "epoch": 3.8063999145253486, "grad_norm": 8.01158332824707, "learning_rate": 8.188699005604233e-07, "loss": 0.0324, "step": 356260 }, { "epoch": 3.806506757839628, "grad_norm": 0.34059372544288635, "learning_rate": 8.188569594871984e-07, "loss": 0.0124, "step": 356270 }, { "epoch": 3.8066136011539076, "grad_norm": 0.07362152636051178, "learning_rate": 8.188440180539596e-07, "loss": 0.0035, "step": 356280 }, { "epoch": 3.8067204444681875, "grad_norm": 9.0426025390625, "learning_rate": 8.188310762607221e-07, "loss": 0.0401, "step": 356290 }, { "epoch": 3.806827287782467, "grad_norm": 0.029738357290625572, "learning_rate": 8.188181341075003e-07, "loss": 0.0018, "step": 356300 }, { "epoch": 3.806934131096747, "grad_norm": 4.80368709564209, "learning_rate": 8.188051915943085e-07, "loss": 0.0065, "step": 356310 }, { "epoch": 3.8070409744110263, "grad_norm": 5.971827030181885, "learning_rate": 8.187922487211617e-07, "loss": 0.0181, "step": 356320 }, { "epoch": 3.8071478177253057, "grad_norm": 0.0028758624102920294, "learning_rate": 8.187793054880745e-07, "loss": 0.0103, "step": 356330 }, { "epoch": 3.807254661039585, "grad_norm": 1.4635965824127197, "learning_rate": 8.187663618950613e-07, "loss": 0.0133, "step": 356340 }, { "epoch": 3.807361504353865, "grad_norm": 0.002481005387380719, "learning_rate": 8.187534179421368e-07, "loss": 0.0474, "step": 356350 }, { "epoch": 3.8074683476681446, "grad_norm": 0.016538001596927643, "learning_rate": 8.187404736293156e-07, "loss": 0.002, "step": 356360 }, { "epoch": 3.8075751909824245, "grad_norm": 0.34481051564216614, "learning_rate": 8.187275289566124e-07, "loss": 0.0264, "step": 356370 }, { "epoch": 3.807682034296704, "grad_norm": 2.5281245708465576, "learning_rate": 8.187145839240416e-07, "loss": 0.003, "step": 356380 }, { "epoch": 3.8077888776109834, "grad_norm": 0.49376028776168823, "learning_rate": 8.187016385316183e-07, "loss": 0.0115, "step": 356390 }, { "epoch": 3.807895720925263, "grad_norm": 0.026478273794054985, "learning_rate": 8.186886927793566e-07, "loss": 0.0084, "step": 356400 }, { "epoch": 3.8080025642395428, "grad_norm": 0.0303809717297554, "learning_rate": 8.186757466672715e-07, "loss": 0.0315, "step": 356410 }, { "epoch": 3.808109407553822, "grad_norm": 0.00989026390016079, "learning_rate": 8.186628001953772e-07, "loss": 0.0053, "step": 356420 }, { "epoch": 3.808216250868102, "grad_norm": 0.007573755923658609, "learning_rate": 8.186498533636886e-07, "loss": 0.0469, "step": 356430 }, { "epoch": 3.8083230941823816, "grad_norm": 0.07119987159967422, "learning_rate": 8.186369061722204e-07, "loss": 0.0461, "step": 356440 }, { "epoch": 3.808429937496661, "grad_norm": 5.850101947784424, "learning_rate": 8.186239586209869e-07, "loss": 0.0119, "step": 356450 }, { "epoch": 3.8085367808109405, "grad_norm": 4.464505672454834, "learning_rate": 8.18611010710003e-07, "loss": 0.0244, "step": 356460 }, { "epoch": 3.8086436241252204, "grad_norm": 2.0101370811462402, "learning_rate": 8.185980624392833e-07, "loss": 0.0258, "step": 356470 }, { "epoch": 3.8087504674395, "grad_norm": 0.0010357197606936097, "learning_rate": 8.185851138088422e-07, "loss": 0.0033, "step": 356480 }, { "epoch": 3.8088573107537798, "grad_norm": 2.147179126739502, "learning_rate": 8.185721648186945e-07, "loss": 0.002, "step": 356490 }, { "epoch": 3.808964154068059, "grad_norm": 0.006770496256649494, "learning_rate": 8.185592154688547e-07, "loss": 0.016, "step": 356500 }, { "epoch": 3.8090709973823387, "grad_norm": 0.6405777335166931, "learning_rate": 8.185462657593377e-07, "loss": 0.0291, "step": 356510 }, { "epoch": 3.809177840696618, "grad_norm": 0.47185230255126953, "learning_rate": 8.185333156901578e-07, "loss": 0.0077, "step": 356520 }, { "epoch": 3.809284684010898, "grad_norm": 0.00436505489051342, "learning_rate": 8.185203652613297e-07, "loss": 0.0051, "step": 356530 }, { "epoch": 3.8093915273251775, "grad_norm": 0.22265279293060303, "learning_rate": 8.185074144728683e-07, "loss": 0.0012, "step": 356540 }, { "epoch": 3.8094983706394574, "grad_norm": 0.17952024936676025, "learning_rate": 8.184944633247878e-07, "loss": 0.0089, "step": 356550 }, { "epoch": 3.809605213953737, "grad_norm": 0.057034607976675034, "learning_rate": 8.18481511817103e-07, "loss": 0.028, "step": 356560 }, { "epoch": 3.8097120572680163, "grad_norm": 4.5494537353515625, "learning_rate": 8.184685599498286e-07, "loss": 0.007, "step": 356570 }, { "epoch": 3.809818900582296, "grad_norm": 0.009437446482479572, "learning_rate": 8.184556077229791e-07, "loss": 0.0063, "step": 356580 }, { "epoch": 3.8099257438965757, "grad_norm": 8.915154457092285, "learning_rate": 8.184426551365693e-07, "loss": 0.0064, "step": 356590 }, { "epoch": 3.810032587210855, "grad_norm": 0.08665383607149124, "learning_rate": 8.184297021906137e-07, "loss": 0.0041, "step": 356600 }, { "epoch": 3.810139430525135, "grad_norm": 0.2840515971183777, "learning_rate": 8.184167488851268e-07, "loss": 0.005, "step": 356610 }, { "epoch": 3.8102462738394145, "grad_norm": 0.005765350069850683, "learning_rate": 8.184037952201234e-07, "loss": 0.0032, "step": 356620 }, { "epoch": 3.810353117153694, "grad_norm": 2.476155996322632, "learning_rate": 8.183908411956182e-07, "loss": 0.0034, "step": 356630 }, { "epoch": 3.810459960467974, "grad_norm": 0.005827957298606634, "learning_rate": 8.183778868116256e-07, "loss": 0.0347, "step": 356640 }, { "epoch": 3.8105668037822533, "grad_norm": 0.020020347088575363, "learning_rate": 8.183649320681602e-07, "loss": 0.02, "step": 356650 }, { "epoch": 3.810673647096533, "grad_norm": 0.01406578067690134, "learning_rate": 8.183519769652371e-07, "loss": 0.0137, "step": 356660 }, { "epoch": 3.8107804904108127, "grad_norm": 0.006285354029387236, "learning_rate": 8.183390215028703e-07, "loss": 0.0162, "step": 356670 }, { "epoch": 3.810887333725092, "grad_norm": 1.3473633527755737, "learning_rate": 8.183260656810749e-07, "loss": 0.0065, "step": 356680 }, { "epoch": 3.8109941770393716, "grad_norm": 0.08792854100465775, "learning_rate": 8.183131094998652e-07, "loss": 0.0287, "step": 356690 }, { "epoch": 3.8111010203536515, "grad_norm": 0.00425153411924839, "learning_rate": 8.183001529592561e-07, "loss": 0.0671, "step": 356700 }, { "epoch": 3.811207863667931, "grad_norm": 0.3171200752258301, "learning_rate": 8.18287196059262e-07, "loss": 0.0158, "step": 356710 }, { "epoch": 3.811314706982211, "grad_norm": 0.009166250005364418, "learning_rate": 8.182742387998978e-07, "loss": 0.0117, "step": 356720 }, { "epoch": 3.8114215502964903, "grad_norm": 5.265223503112793, "learning_rate": 8.182612811811778e-07, "loss": 0.0063, "step": 356730 }, { "epoch": 3.81152839361077, "grad_norm": 0.0019916840828955173, "learning_rate": 8.182483232031167e-07, "loss": 0.0117, "step": 356740 }, { "epoch": 3.8116352369250492, "grad_norm": 14.502946853637695, "learning_rate": 8.182353648657295e-07, "loss": 0.0506, "step": 356750 }, { "epoch": 3.811742080239329, "grad_norm": 0.8102915287017822, "learning_rate": 8.182224061690304e-07, "loss": 0.0339, "step": 356760 }, { "epoch": 3.8118489235536086, "grad_norm": 0.08041281253099442, "learning_rate": 8.182094471130342e-07, "loss": 0.0061, "step": 356770 }, { "epoch": 3.8119557668678885, "grad_norm": 9.326714515686035, "learning_rate": 8.181964876977556e-07, "loss": 0.0091, "step": 356780 }, { "epoch": 3.812062610182168, "grad_norm": 3.7883589267730713, "learning_rate": 8.18183527923209e-07, "loss": 0.0231, "step": 356790 }, { "epoch": 3.8121694534964474, "grad_norm": 1.232503056526184, "learning_rate": 8.181705677894093e-07, "loss": 0.0182, "step": 356800 }, { "epoch": 3.812276296810727, "grad_norm": 0.0049070329405367374, "learning_rate": 8.181576072963709e-07, "loss": 0.0413, "step": 356810 }, { "epoch": 3.812383140125007, "grad_norm": 0.0645565390586853, "learning_rate": 8.181446464441086e-07, "loss": 0.002, "step": 356820 }, { "epoch": 3.8124899834392862, "grad_norm": 1.3028348684310913, "learning_rate": 8.181316852326371e-07, "loss": 0.0057, "step": 356830 }, { "epoch": 3.812596826753566, "grad_norm": 2.945727586746216, "learning_rate": 8.181187236619709e-07, "loss": 0.0225, "step": 356840 }, { "epoch": 3.8127036700678456, "grad_norm": 0.0368657112121582, "learning_rate": 8.181057617321246e-07, "loss": 0.0055, "step": 356850 }, { "epoch": 3.812810513382125, "grad_norm": 1.418043613433838, "learning_rate": 8.180927994431127e-07, "loss": 0.0046, "step": 356860 }, { "epoch": 3.8129173566964045, "grad_norm": 0.011732884682714939, "learning_rate": 8.180798367949502e-07, "loss": 0.0121, "step": 356870 }, { "epoch": 3.8130242000106844, "grad_norm": 0.48767802119255066, "learning_rate": 8.180668737876514e-07, "loss": 0.0086, "step": 356880 }, { "epoch": 3.813131043324964, "grad_norm": 0.7132958769798279, "learning_rate": 8.180539104212314e-07, "loss": 0.0213, "step": 356890 }, { "epoch": 3.813237886639244, "grad_norm": 1.7555021047592163, "learning_rate": 8.180409466957043e-07, "loss": 0.0082, "step": 356900 }, { "epoch": 3.8133447299535232, "grad_norm": 9.733879089355469, "learning_rate": 8.18027982611085e-07, "loss": 0.0372, "step": 356910 }, { "epoch": 3.8134515732678027, "grad_norm": 6.963687896728516, "learning_rate": 8.180150181673883e-07, "loss": 0.0361, "step": 356920 }, { "epoch": 3.813558416582082, "grad_norm": 0.3796863555908203, "learning_rate": 8.180020533646284e-07, "loss": 0.0192, "step": 356930 }, { "epoch": 3.813665259896362, "grad_norm": 0.32017695903778076, "learning_rate": 8.179890882028203e-07, "loss": 0.0198, "step": 356940 }, { "epoch": 3.8137721032106415, "grad_norm": 0.006341747473925352, "learning_rate": 8.179761226819786e-07, "loss": 0.0424, "step": 356950 }, { "epoch": 3.8138789465249214, "grad_norm": 0.144398033618927, "learning_rate": 8.179631568021176e-07, "loss": 0.0155, "step": 356960 }, { "epoch": 3.813985789839201, "grad_norm": 0.013512984849512577, "learning_rate": 8.179501905632524e-07, "loss": 0.0048, "step": 356970 }, { "epoch": 3.8140926331534803, "grad_norm": 3.1674842834472656, "learning_rate": 8.179372239653974e-07, "loss": 0.0095, "step": 356980 }, { "epoch": 3.81419947646776, "grad_norm": 0.014779441058635712, "learning_rate": 8.179242570085671e-07, "loss": 0.0057, "step": 356990 }, { "epoch": 3.8143063197820397, "grad_norm": 0.08065227419137955, "learning_rate": 8.179112896927766e-07, "loss": 0.0143, "step": 357000 }, { "epoch": 3.814413163096319, "grad_norm": 11.434492111206055, "learning_rate": 8.178983220180401e-07, "loss": 0.0206, "step": 357010 }, { "epoch": 3.814520006410599, "grad_norm": 0.0012026192853227258, "learning_rate": 8.178853539843724e-07, "loss": 0.0142, "step": 357020 }, { "epoch": 3.8146268497248785, "grad_norm": 3.97668194770813, "learning_rate": 8.178723855917882e-07, "loss": 0.0069, "step": 357030 }, { "epoch": 3.814733693039158, "grad_norm": 0.0022746059112250805, "learning_rate": 8.178594168403021e-07, "loss": 0.0289, "step": 357040 }, { "epoch": 3.8148405363534375, "grad_norm": 0.009713461622595787, "learning_rate": 8.178464477299286e-07, "loss": 0.0281, "step": 357050 }, { "epoch": 3.8149473796677174, "grad_norm": 0.004421587102115154, "learning_rate": 8.178334782606826e-07, "loss": 0.0162, "step": 357060 }, { "epoch": 3.815054222981997, "grad_norm": 0.015747012570500374, "learning_rate": 8.178205084325787e-07, "loss": 0.0023, "step": 357070 }, { "epoch": 3.8151610662962767, "grad_norm": 8.06563949584961, "learning_rate": 8.178075382456312e-07, "loss": 0.0105, "step": 357080 }, { "epoch": 3.815267909610556, "grad_norm": 0.07584761083126068, "learning_rate": 8.177945676998553e-07, "loss": 0.0326, "step": 357090 }, { "epoch": 3.8153747529248356, "grad_norm": 0.030844490975141525, "learning_rate": 8.177815967952651e-07, "loss": 0.0373, "step": 357100 }, { "epoch": 3.815481596239115, "grad_norm": 4.056656837463379, "learning_rate": 8.177686255318755e-07, "loss": 0.0196, "step": 357110 }, { "epoch": 3.815588439553395, "grad_norm": 2.5164685249328613, "learning_rate": 8.177556539097012e-07, "loss": 0.0176, "step": 357120 }, { "epoch": 3.8156952828676745, "grad_norm": 0.023477111011743546, "learning_rate": 8.177426819287569e-07, "loss": 0.0175, "step": 357130 }, { "epoch": 3.8158021261819544, "grad_norm": 0.03274279832839966, "learning_rate": 8.17729709589057e-07, "loss": 0.0607, "step": 357140 }, { "epoch": 3.815908969496234, "grad_norm": 0.976020336151123, "learning_rate": 8.177167368906164e-07, "loss": 0.053, "step": 357150 }, { "epoch": 3.8160158128105133, "grad_norm": 6.084947109222412, "learning_rate": 8.177037638334494e-07, "loss": 0.0071, "step": 357160 }, { "epoch": 3.8161226561247927, "grad_norm": 0.09224430471658707, "learning_rate": 8.17690790417571e-07, "loss": 0.0042, "step": 357170 }, { "epoch": 3.8162294994390726, "grad_norm": 1.8799853324890137, "learning_rate": 8.176778166429959e-07, "loss": 0.0221, "step": 357180 }, { "epoch": 3.816336342753352, "grad_norm": 2.910353899002075, "learning_rate": 8.176648425097384e-07, "loss": 0.0232, "step": 357190 }, { "epoch": 3.816443186067632, "grad_norm": 0.5135012269020081, "learning_rate": 8.176518680178132e-07, "loss": 0.0099, "step": 357200 }, { "epoch": 3.8165500293819115, "grad_norm": 0.3140869736671448, "learning_rate": 8.176388931672353e-07, "loss": 0.0048, "step": 357210 }, { "epoch": 3.816656872696191, "grad_norm": 5.956277847290039, "learning_rate": 8.176259179580189e-07, "loss": 0.0124, "step": 357220 }, { "epoch": 3.8167637160104704, "grad_norm": 3.933987855911255, "learning_rate": 8.17612942390179e-07, "loss": 0.0115, "step": 357230 }, { "epoch": 3.8168705593247503, "grad_norm": 10.2704439163208, "learning_rate": 8.175999664637301e-07, "loss": 0.0271, "step": 357240 }, { "epoch": 3.8169774026390297, "grad_norm": 5.35495662689209, "learning_rate": 8.175869901786868e-07, "loss": 0.022, "step": 357250 }, { "epoch": 3.8170842459533096, "grad_norm": 4.5148844718933105, "learning_rate": 8.175740135350639e-07, "loss": 0.0143, "step": 357260 }, { "epoch": 3.817191089267589, "grad_norm": 0.02739815041422844, "learning_rate": 8.17561036532876e-07, "loss": 0.0042, "step": 357270 }, { "epoch": 3.8172979325818686, "grad_norm": 0.2539525330066681, "learning_rate": 8.175480591721375e-07, "loss": 0.0257, "step": 357280 }, { "epoch": 3.8174047758961485, "grad_norm": 4.572818279266357, "learning_rate": 8.175350814528636e-07, "loss": 0.0438, "step": 357290 }, { "epoch": 3.817511619210428, "grad_norm": 0.012402869760990143, "learning_rate": 8.175221033750684e-07, "loss": 0.0023, "step": 357300 }, { "epoch": 3.8176184625247074, "grad_norm": 0.014879188500344753, "learning_rate": 8.175091249387669e-07, "loss": 0.0052, "step": 357310 }, { "epoch": 3.8177253058389873, "grad_norm": 1.4517567157745361, "learning_rate": 8.174961461439736e-07, "loss": 0.0136, "step": 357320 }, { "epoch": 3.8178321491532667, "grad_norm": 0.06428807228803635, "learning_rate": 8.174831669907032e-07, "loss": 0.0222, "step": 357330 }, { "epoch": 3.817938992467546, "grad_norm": 7.456155776977539, "learning_rate": 8.174701874789702e-07, "loss": 0.007, "step": 357340 }, { "epoch": 3.818045835781826, "grad_norm": 0.005229075904935598, "learning_rate": 8.174572076087896e-07, "loss": 0.0185, "step": 357350 }, { "epoch": 3.8181526790961056, "grad_norm": 0.2276908904314041, "learning_rate": 8.174442273801756e-07, "loss": 0.0069, "step": 357360 }, { "epoch": 3.818259522410385, "grad_norm": 0.2787143886089325, "learning_rate": 8.174312467931435e-07, "loss": 0.0113, "step": 357370 }, { "epoch": 3.818366365724665, "grad_norm": 6.21035623550415, "learning_rate": 8.174182658477073e-07, "loss": 0.0213, "step": 357380 }, { "epoch": 3.8184732090389444, "grad_norm": 0.061216022819280624, "learning_rate": 8.174052845438819e-07, "loss": 0.0004, "step": 357390 }, { "epoch": 3.818580052353224, "grad_norm": 0.24011223018169403, "learning_rate": 8.17392302881682e-07, "loss": 0.0727, "step": 357400 }, { "epoch": 3.8186868956675037, "grad_norm": 0.002950146794319153, "learning_rate": 8.173793208611223e-07, "loss": 0.0062, "step": 357410 }, { "epoch": 3.818793738981783, "grad_norm": 0.013005979359149933, "learning_rate": 8.173663384822174e-07, "loss": 0.0248, "step": 357420 }, { "epoch": 3.8189005822960627, "grad_norm": 7.266369819641113, "learning_rate": 8.173533557449819e-07, "loss": 0.0132, "step": 357430 }, { "epoch": 3.8190074256103426, "grad_norm": 0.16212007403373718, "learning_rate": 8.173403726494305e-07, "loss": 0.0034, "step": 357440 }, { "epoch": 3.819114268924622, "grad_norm": 0.6446593403816223, "learning_rate": 8.173273891955778e-07, "loss": 0.0016, "step": 357450 }, { "epoch": 3.8192211122389015, "grad_norm": 4.711062431335449, "learning_rate": 8.173144053834387e-07, "loss": 0.0037, "step": 357460 }, { "epoch": 3.8193279555531814, "grad_norm": 0.19776484370231628, "learning_rate": 8.173014212130276e-07, "loss": 0.0015, "step": 357470 }, { "epoch": 3.819434798867461, "grad_norm": 0.10573345422744751, "learning_rate": 8.172884366843593e-07, "loss": 0.0376, "step": 357480 }, { "epoch": 3.8195416421817407, "grad_norm": 0.018095705658197403, "learning_rate": 8.172754517974483e-07, "loss": 0.0213, "step": 357490 }, { "epoch": 3.81964848549602, "grad_norm": 0.009144812822341919, "learning_rate": 8.172624665523095e-07, "loss": 0.0216, "step": 357500 }, { "epoch": 3.8197553288102997, "grad_norm": 0.9378324151039124, "learning_rate": 8.172494809489574e-07, "loss": 0.001, "step": 357510 }, { "epoch": 3.819862172124579, "grad_norm": 0.015184939838945866, "learning_rate": 8.172364949874067e-07, "loss": 0.0013, "step": 357520 }, { "epoch": 3.819969015438859, "grad_norm": 0.005924257915467024, "learning_rate": 8.172235086676718e-07, "loss": 0.0109, "step": 357530 }, { "epoch": 3.8200758587531385, "grad_norm": 0.899232029914856, "learning_rate": 8.172105219897679e-07, "loss": 0.0044, "step": 357540 }, { "epoch": 3.8201827020674184, "grad_norm": 1.222255825996399, "learning_rate": 8.171975349537095e-07, "loss": 0.0143, "step": 357550 }, { "epoch": 3.820289545381698, "grad_norm": 0.049924664199352264, "learning_rate": 8.171845475595107e-07, "loss": 0.0014, "step": 357560 }, { "epoch": 3.8203963886959773, "grad_norm": 0.007355925627052784, "learning_rate": 8.17171559807187e-07, "loss": 0.0171, "step": 357570 }, { "epoch": 3.8205032320102568, "grad_norm": 0.03489163517951965, "learning_rate": 8.171585716967526e-07, "loss": 0.0138, "step": 357580 }, { "epoch": 3.8206100753245367, "grad_norm": 0.2667933702468872, "learning_rate": 8.171455832282221e-07, "loss": 0.0146, "step": 357590 }, { "epoch": 3.820716918638816, "grad_norm": 0.023562267422676086, "learning_rate": 8.171325944016103e-07, "loss": 0.0099, "step": 357600 }, { "epoch": 3.820823761953096, "grad_norm": 0.06619063019752502, "learning_rate": 8.171196052169321e-07, "loss": 0.0158, "step": 357610 }, { "epoch": 3.8209306052673755, "grad_norm": 8.738765716552734, "learning_rate": 8.171066156742017e-07, "loss": 0.0344, "step": 357620 }, { "epoch": 3.821037448581655, "grad_norm": 2.552699089050293, "learning_rate": 8.17093625773434e-07, "loss": 0.0202, "step": 357630 }, { "epoch": 3.8211442918959344, "grad_norm": 0.122035913169384, "learning_rate": 8.170806355146438e-07, "loss": 0.0049, "step": 357640 }, { "epoch": 3.8212511352102143, "grad_norm": 0.0321994312107563, "learning_rate": 8.170676448978455e-07, "loss": 0.032, "step": 357650 }, { "epoch": 3.8213579785244938, "grad_norm": 2.906400680541992, "learning_rate": 8.170546539230539e-07, "loss": 0.0154, "step": 357660 }, { "epoch": 3.8214648218387737, "grad_norm": 0.015443110838532448, "learning_rate": 8.170416625902838e-07, "loss": 0.0022, "step": 357670 }, { "epoch": 3.821571665153053, "grad_norm": 0.11299049854278564, "learning_rate": 8.170286708995495e-07, "loss": 0.0143, "step": 357680 }, { "epoch": 3.8216785084673326, "grad_norm": 0.005410485435277224, "learning_rate": 8.170156788508662e-07, "loss": 0.0101, "step": 357690 }, { "epoch": 3.821785351781612, "grad_norm": 0.16194948554039001, "learning_rate": 8.170026864442481e-07, "loss": 0.0127, "step": 357700 }, { "epoch": 3.821892195095892, "grad_norm": 1.4254568815231323, "learning_rate": 8.1698969367971e-07, "loss": 0.0088, "step": 357710 }, { "epoch": 3.8219990384101714, "grad_norm": 2.1045470237731934, "learning_rate": 8.169767005572667e-07, "loss": 0.0138, "step": 357720 }, { "epoch": 3.8221058817244513, "grad_norm": 0.0018319301307201385, "learning_rate": 8.169637070769329e-07, "loss": 0.012, "step": 357730 }, { "epoch": 3.8222127250387308, "grad_norm": 0.019023224711418152, "learning_rate": 8.16950713238723e-07, "loss": 0.0112, "step": 357740 }, { "epoch": 3.8223195683530102, "grad_norm": 2.307992935180664, "learning_rate": 8.169377190426518e-07, "loss": 0.0033, "step": 357750 }, { "epoch": 3.8224264116672897, "grad_norm": 0.004842530936002731, "learning_rate": 8.169247244887341e-07, "loss": 0.0018, "step": 357760 }, { "epoch": 3.8225332549815696, "grad_norm": 2.71917462348938, "learning_rate": 8.169117295769843e-07, "loss": 0.007, "step": 357770 }, { "epoch": 3.822640098295849, "grad_norm": 0.0065076579339802265, "learning_rate": 8.168987343074174e-07, "loss": 0.0134, "step": 357780 }, { "epoch": 3.822746941610129, "grad_norm": 0.18245422840118408, "learning_rate": 8.168857386800478e-07, "loss": 0.0254, "step": 357790 }, { "epoch": 3.8228537849244084, "grad_norm": 0.07087348401546478, "learning_rate": 8.168727426948902e-07, "loss": 0.0166, "step": 357800 }, { "epoch": 3.822960628238688, "grad_norm": 0.0007599065429531038, "learning_rate": 8.168597463519596e-07, "loss": 0.0379, "step": 357810 }, { "epoch": 3.8230674715529673, "grad_norm": 0.0697811022400856, "learning_rate": 8.168467496512703e-07, "loss": 0.0072, "step": 357820 }, { "epoch": 3.8231743148672472, "grad_norm": 2.9701831340789795, "learning_rate": 8.168337525928373e-07, "loss": 0.0063, "step": 357830 }, { "epoch": 3.8232811581815267, "grad_norm": 3.457780361175537, "learning_rate": 8.168207551766748e-07, "loss": 0.0199, "step": 357840 }, { "epoch": 3.8233880014958066, "grad_norm": 2.45184063911438, "learning_rate": 8.168077574027979e-07, "loss": 0.0192, "step": 357850 }, { "epoch": 3.823494844810086, "grad_norm": 0.011156085878610611, "learning_rate": 8.167947592712211e-07, "loss": 0.025, "step": 357860 }, { "epoch": 3.8236016881243655, "grad_norm": 4.268074989318848, "learning_rate": 8.167817607819592e-07, "loss": 0.0113, "step": 357870 }, { "epoch": 3.823708531438645, "grad_norm": 0.06563865393400192, "learning_rate": 8.167687619350266e-07, "loss": 0.0114, "step": 357880 }, { "epoch": 3.823815374752925, "grad_norm": 7.2154154777526855, "learning_rate": 8.167557627304382e-07, "loss": 0.0282, "step": 357890 }, { "epoch": 3.8239222180672043, "grad_norm": 0.016852468252182007, "learning_rate": 8.167427631682086e-07, "loss": 0.0087, "step": 357900 }, { "epoch": 3.8240290613814842, "grad_norm": 0.15061505138874054, "learning_rate": 8.167297632483527e-07, "loss": 0.0017, "step": 357910 }, { "epoch": 3.8241359046957637, "grad_norm": 1.2299154996871948, "learning_rate": 8.16716762970885e-07, "loss": 0.0123, "step": 357920 }, { "epoch": 3.824242748010043, "grad_norm": 0.04420080780982971, "learning_rate": 8.167037623358199e-07, "loss": 0.0227, "step": 357930 }, { "epoch": 3.8243495913243226, "grad_norm": 0.00546268792822957, "learning_rate": 8.166907613431726e-07, "loss": 0.0118, "step": 357940 }, { "epoch": 3.8244564346386025, "grad_norm": 0.033919740468263626, "learning_rate": 8.166777599929575e-07, "loss": 0.0079, "step": 357950 }, { "epoch": 3.824563277952882, "grad_norm": 6.665051460266113, "learning_rate": 8.166647582851892e-07, "loss": 0.0067, "step": 357960 }, { "epoch": 3.824670121267162, "grad_norm": 0.03996061906218529, "learning_rate": 8.166517562198826e-07, "loss": 0.0297, "step": 357970 }, { "epoch": 3.8247769645814413, "grad_norm": 4.117927551269531, "learning_rate": 8.166387537970523e-07, "loss": 0.0182, "step": 357980 }, { "epoch": 3.824883807895721, "grad_norm": 0.03240252286195755, "learning_rate": 8.166257510167127e-07, "loss": 0.0152, "step": 357990 }, { "epoch": 3.8249906512100003, "grad_norm": 5.635100364685059, "learning_rate": 8.166127478788789e-07, "loss": 0.0122, "step": 358000 }, { "epoch": 3.82509749452428, "grad_norm": 0.047767311334609985, "learning_rate": 8.165997443835655e-07, "loss": 0.0137, "step": 358010 }, { "epoch": 3.8252043378385596, "grad_norm": 0.635478675365448, "learning_rate": 8.165867405307869e-07, "loss": 0.0215, "step": 358020 }, { "epoch": 3.8253111811528395, "grad_norm": 0.041320353746414185, "learning_rate": 8.165737363205582e-07, "loss": 0.0223, "step": 358030 }, { "epoch": 3.825418024467119, "grad_norm": 0.19057434797286987, "learning_rate": 8.165607317528937e-07, "loss": 0.0157, "step": 358040 }, { "epoch": 3.8255248677813984, "grad_norm": 0.054718662053346634, "learning_rate": 8.165477268278082e-07, "loss": 0.0148, "step": 358050 }, { "epoch": 3.8256317110956783, "grad_norm": 0.3871096670627594, "learning_rate": 8.165347215453165e-07, "loss": 0.0003, "step": 358060 }, { "epoch": 3.825738554409958, "grad_norm": 5.795398235321045, "learning_rate": 8.165217159054333e-07, "loss": 0.014, "step": 358070 }, { "epoch": 3.8258453977242373, "grad_norm": 0.030845532193779945, "learning_rate": 8.165087099081731e-07, "loss": 0.0096, "step": 358080 }, { "epoch": 3.825952241038517, "grad_norm": 0.4565992057323456, "learning_rate": 8.164957035535507e-07, "loss": 0.0182, "step": 358090 }, { "epoch": 3.8260590843527966, "grad_norm": 0.009700954891741276, "learning_rate": 8.164826968415807e-07, "loss": 0.0107, "step": 358100 }, { "epoch": 3.826165927667076, "grad_norm": 3.54746675491333, "learning_rate": 8.164696897722778e-07, "loss": 0.0327, "step": 358110 }, { "epoch": 3.826272770981356, "grad_norm": 0.005182669032365084, "learning_rate": 8.164566823456569e-07, "loss": 0.0057, "step": 358120 }, { "epoch": 3.8263796142956354, "grad_norm": 0.001991252414882183, "learning_rate": 8.164436745617325e-07, "loss": 0.0172, "step": 358130 }, { "epoch": 3.826486457609915, "grad_norm": 0.12899501621723175, "learning_rate": 8.164306664205191e-07, "loss": 0.0091, "step": 358140 }, { "epoch": 3.826593300924195, "grad_norm": 0.36469244956970215, "learning_rate": 8.164176579220319e-07, "loss": 0.0105, "step": 358150 }, { "epoch": 3.8267001442384743, "grad_norm": 0.0020536878146231174, "learning_rate": 8.16404649066285e-07, "loss": 0.0107, "step": 358160 }, { "epoch": 3.8268069875527537, "grad_norm": 0.003664251882582903, "learning_rate": 8.163916398532936e-07, "loss": 0.0352, "step": 358170 }, { "epoch": 3.8269138308670336, "grad_norm": 0.40575242042541504, "learning_rate": 8.163786302830721e-07, "loss": 0.003, "step": 358180 }, { "epoch": 3.827020674181313, "grad_norm": 0.008681700564920902, "learning_rate": 8.163656203556352e-07, "loss": 0.0006, "step": 358190 }, { "epoch": 3.827127517495593, "grad_norm": 0.007750886492431164, "learning_rate": 8.163526100709976e-07, "loss": 0.0133, "step": 358200 }, { "epoch": 3.8272343608098724, "grad_norm": 0.0006510589737445116, "learning_rate": 8.163395994291742e-07, "loss": 0.0109, "step": 358210 }, { "epoch": 3.827341204124152, "grad_norm": 0.980802059173584, "learning_rate": 8.163265884301794e-07, "loss": 0.01, "step": 358220 }, { "epoch": 3.8274480474384314, "grad_norm": 3.216827392578125, "learning_rate": 8.16313577074028e-07, "loss": 0.0159, "step": 358230 }, { "epoch": 3.8275548907527113, "grad_norm": 0.004981198348104954, "learning_rate": 8.163005653607347e-07, "loss": 0.0076, "step": 358240 }, { "epoch": 3.8276617340669907, "grad_norm": 0.0009975339053198695, "learning_rate": 8.162875532903142e-07, "loss": 0.0064, "step": 358250 }, { "epoch": 3.8277685773812706, "grad_norm": 3.1870269775390625, "learning_rate": 8.162745408627811e-07, "loss": 0.0088, "step": 358260 }, { "epoch": 3.82787542069555, "grad_norm": 2.4067463874816895, "learning_rate": 8.162615280781501e-07, "loss": 0.033, "step": 358270 }, { "epoch": 3.8279822640098295, "grad_norm": 0.8765311241149902, "learning_rate": 8.162485149364362e-07, "loss": 0.0229, "step": 358280 }, { "epoch": 3.828089107324109, "grad_norm": 1.6826225519180298, "learning_rate": 8.162355014376538e-07, "loss": 0.0189, "step": 358290 }, { "epoch": 3.828195950638389, "grad_norm": 3.863262891769409, "learning_rate": 8.162224875818176e-07, "loss": 0.0123, "step": 358300 }, { "epoch": 3.8283027939526684, "grad_norm": 0.09535742551088333, "learning_rate": 8.162094733689424e-07, "loss": 0.0139, "step": 358310 }, { "epoch": 3.8284096372669483, "grad_norm": 1.7910970449447632, "learning_rate": 8.161964587990429e-07, "loss": 0.0162, "step": 358320 }, { "epoch": 3.8285164805812277, "grad_norm": 0.06771518290042877, "learning_rate": 8.161834438721336e-07, "loss": 0.0176, "step": 358330 }, { "epoch": 3.828623323895507, "grad_norm": 6.842925548553467, "learning_rate": 8.161704285882293e-07, "loss": 0.0162, "step": 358340 }, { "epoch": 3.8287301672097867, "grad_norm": 0.365536093711853, "learning_rate": 8.161574129473448e-07, "loss": 0.0109, "step": 358350 }, { "epoch": 3.8288370105240666, "grad_norm": 0.04582654684782028, "learning_rate": 8.161443969494947e-07, "loss": 0.0356, "step": 358360 }, { "epoch": 3.828943853838346, "grad_norm": 0.4197300970554352, "learning_rate": 8.161313805946938e-07, "loss": 0.0074, "step": 358370 }, { "epoch": 3.829050697152626, "grad_norm": 2.746238946914673, "learning_rate": 8.161183638829566e-07, "loss": 0.0311, "step": 358380 }, { "epoch": 3.8291575404669054, "grad_norm": 0.03579053282737732, "learning_rate": 8.16105346814298e-07, "loss": 0.0068, "step": 358390 }, { "epoch": 3.829264383781185, "grad_norm": 0.14739269018173218, "learning_rate": 8.160923293887326e-07, "loss": 0.0309, "step": 358400 }, { "epoch": 3.8293712270954643, "grad_norm": 0.8579592108726501, "learning_rate": 8.16079311606275e-07, "loss": 0.0381, "step": 358410 }, { "epoch": 3.829478070409744, "grad_norm": 0.1298823058605194, "learning_rate": 8.1606629346694e-07, "loss": 0.0178, "step": 358420 }, { "epoch": 3.8295849137240237, "grad_norm": 0.13082124292850494, "learning_rate": 8.160532749707422e-07, "loss": 0.0293, "step": 358430 }, { "epoch": 3.8296917570383036, "grad_norm": 1.1784889698028564, "learning_rate": 8.160402561176966e-07, "loss": 0.0234, "step": 358440 }, { "epoch": 3.829798600352583, "grad_norm": 0.028363347053527832, "learning_rate": 8.160272369078178e-07, "loss": 0.0161, "step": 358450 }, { "epoch": 3.8299054436668625, "grad_norm": 0.22873088717460632, "learning_rate": 8.160142173411202e-07, "loss": 0.0069, "step": 358460 }, { "epoch": 3.830012286981142, "grad_norm": 6.847295761108398, "learning_rate": 8.160011974176189e-07, "loss": 0.017, "step": 358470 }, { "epoch": 3.830119130295422, "grad_norm": 3.0787718296051025, "learning_rate": 8.159881771373281e-07, "loss": 0.0078, "step": 358480 }, { "epoch": 3.8302259736097013, "grad_norm": 0.1884194314479828, "learning_rate": 8.15975156500263e-07, "loss": 0.0027, "step": 358490 }, { "epoch": 3.830332816923981, "grad_norm": 0.0018877605907619, "learning_rate": 8.15962135506438e-07, "loss": 0.0159, "step": 358500 }, { "epoch": 3.8304396602382607, "grad_norm": 0.10324493795633316, "learning_rate": 8.15949114155868e-07, "loss": 0.0164, "step": 358510 }, { "epoch": 3.83054650355254, "grad_norm": 0.3082469403743744, "learning_rate": 8.159360924485676e-07, "loss": 0.0254, "step": 358520 }, { "epoch": 3.8306533468668196, "grad_norm": 10.840364456176758, "learning_rate": 8.159230703845514e-07, "loss": 0.0258, "step": 358530 }, { "epoch": 3.8307601901810995, "grad_norm": 0.771653950214386, "learning_rate": 8.159100479638344e-07, "loss": 0.0142, "step": 358540 }, { "epoch": 3.830867033495379, "grad_norm": 3.7765016555786133, "learning_rate": 8.158970251864309e-07, "loss": 0.003, "step": 358550 }, { "epoch": 3.830973876809659, "grad_norm": 0.01855531893670559, "learning_rate": 8.158840020523558e-07, "loss": 0.0109, "step": 358560 }, { "epoch": 3.8310807201239383, "grad_norm": 2.3741862773895264, "learning_rate": 8.15870978561624e-07, "loss": 0.0033, "step": 358570 }, { "epoch": 3.8311875634382178, "grad_norm": 4.323362827301025, "learning_rate": 8.158579547142499e-07, "loss": 0.0075, "step": 358580 }, { "epoch": 3.831294406752497, "grad_norm": 0.11342812329530716, "learning_rate": 8.158449305102484e-07, "loss": 0.0002, "step": 358590 }, { "epoch": 3.831401250066777, "grad_norm": 5.579315185546875, "learning_rate": 8.158319059496341e-07, "loss": 0.0232, "step": 358600 }, { "epoch": 3.8315080933810566, "grad_norm": 1.4312251806259155, "learning_rate": 8.158188810324219e-07, "loss": 0.0038, "step": 358610 }, { "epoch": 3.8316149366953365, "grad_norm": 4.147186279296875, "learning_rate": 8.15805855758626e-07, "loss": 0.0092, "step": 358620 }, { "epoch": 3.831721780009616, "grad_norm": 0.009304431267082691, "learning_rate": 8.157928301282617e-07, "loss": 0.0053, "step": 358630 }, { "epoch": 3.8318286233238954, "grad_norm": 3.8644847869873047, "learning_rate": 8.157798041413435e-07, "loss": 0.0198, "step": 358640 }, { "epoch": 3.831935466638175, "grad_norm": 4.927914142608643, "learning_rate": 8.157667777978859e-07, "loss": 0.0039, "step": 358650 }, { "epoch": 3.8320423099524548, "grad_norm": 6.308877468109131, "learning_rate": 8.15753751097904e-07, "loss": 0.0143, "step": 358660 }, { "epoch": 3.832149153266734, "grad_norm": 0.03334502503275871, "learning_rate": 8.157407240414121e-07, "loss": 0.0159, "step": 358670 }, { "epoch": 3.832255996581014, "grad_norm": 0.0036583503242582083, "learning_rate": 8.157276966284251e-07, "loss": 0.0072, "step": 358680 }, { "epoch": 3.8323628398952936, "grad_norm": 7.529560089111328, "learning_rate": 8.157146688589578e-07, "loss": 0.0119, "step": 358690 }, { "epoch": 3.832469683209573, "grad_norm": 2.8991072177886963, "learning_rate": 8.157016407330246e-07, "loss": 0.0112, "step": 358700 }, { "epoch": 3.8325765265238525, "grad_norm": 0.0043287118896842, "learning_rate": 8.156886122506407e-07, "loss": 0.0101, "step": 358710 }, { "epoch": 3.8326833698381324, "grad_norm": 0.007803484331816435, "learning_rate": 8.156755834118205e-07, "loss": 0.0275, "step": 358720 }, { "epoch": 3.832790213152412, "grad_norm": 3.2588140964508057, "learning_rate": 8.156625542165785e-07, "loss": 0.0041, "step": 358730 }, { "epoch": 3.8328970564666918, "grad_norm": 0.009422093629837036, "learning_rate": 8.156495246649298e-07, "loss": 0.0143, "step": 358740 }, { "epoch": 3.8330038997809712, "grad_norm": 0.06560219824314117, "learning_rate": 8.15636494756889e-07, "loss": 0.0157, "step": 358750 }, { "epoch": 3.8331107430952507, "grad_norm": 3.6162242889404297, "learning_rate": 8.156234644924706e-07, "loss": 0.0385, "step": 358760 }, { "epoch": 3.8332175864095306, "grad_norm": 0.016742002218961716, "learning_rate": 8.156104338716897e-07, "loss": 0.0107, "step": 358770 }, { "epoch": 3.83332442972381, "grad_norm": 5.062242031097412, "learning_rate": 8.155974028945607e-07, "loss": 0.0046, "step": 358780 }, { "epoch": 3.8334312730380895, "grad_norm": 4.895390033721924, "learning_rate": 8.155843715610984e-07, "loss": 0.0121, "step": 358790 }, { "epoch": 3.8335381163523694, "grad_norm": 1.623111605644226, "learning_rate": 8.155713398713177e-07, "loss": 0.0019, "step": 358800 }, { "epoch": 3.833644959666649, "grad_norm": 1.0494946241378784, "learning_rate": 8.15558307825233e-07, "loss": 0.0199, "step": 358810 }, { "epoch": 3.8337518029809283, "grad_norm": 1.6145492792129517, "learning_rate": 8.155452754228592e-07, "loss": 0.0205, "step": 358820 }, { "epoch": 3.8338586462952082, "grad_norm": 1.687135934829712, "learning_rate": 8.155322426642109e-07, "loss": 0.0211, "step": 358830 }, { "epoch": 3.8339654896094877, "grad_norm": 3.8981359004974365, "learning_rate": 8.155192095493029e-07, "loss": 0.0033, "step": 358840 }, { "epoch": 3.834072332923767, "grad_norm": 0.5613223314285278, "learning_rate": 8.155061760781498e-07, "loss": 0.0024, "step": 358850 }, { "epoch": 3.834179176238047, "grad_norm": 0.004289558622986078, "learning_rate": 8.154931422507666e-07, "loss": 0.0384, "step": 358860 }, { "epoch": 3.8342860195523265, "grad_norm": 0.31604862213134766, "learning_rate": 8.154801080671676e-07, "loss": 0.0079, "step": 358870 }, { "epoch": 3.834392862866606, "grad_norm": 0.448944628238678, "learning_rate": 8.15467073527368e-07, "loss": 0.0084, "step": 358880 }, { "epoch": 3.834499706180886, "grad_norm": 4.3677978515625, "learning_rate": 8.154540386313822e-07, "loss": 0.0235, "step": 358890 }, { "epoch": 3.8346065494951653, "grad_norm": 0.18300852179527283, "learning_rate": 8.15441003379225e-07, "loss": 0.0066, "step": 358900 }, { "epoch": 3.834713392809445, "grad_norm": 1.3209935426712036, "learning_rate": 8.154279677709109e-07, "loss": 0.0228, "step": 358910 }, { "epoch": 3.8348202361237247, "grad_norm": 0.004288818687200546, "learning_rate": 8.15414931806455e-07, "loss": 0.0142, "step": 358920 }, { "epoch": 3.834927079438004, "grad_norm": 6.046422481536865, "learning_rate": 8.154018954858718e-07, "loss": 0.0537, "step": 358930 }, { "epoch": 3.8350339227522836, "grad_norm": 4.118348598480225, "learning_rate": 8.153888588091761e-07, "loss": 0.0156, "step": 358940 }, { "epoch": 3.8351407660665635, "grad_norm": 0.008057398721575737, "learning_rate": 8.153758217763826e-07, "loss": 0.01, "step": 358950 }, { "epoch": 3.835247609380843, "grad_norm": 0.052820492535829544, "learning_rate": 8.153627843875059e-07, "loss": 0.0119, "step": 358960 }, { "epoch": 3.835354452695123, "grad_norm": 0.07725001126527786, "learning_rate": 8.153497466425608e-07, "loss": 0.0114, "step": 358970 }, { "epoch": 3.8354612960094023, "grad_norm": 0.6323438286781311, "learning_rate": 8.153367085415621e-07, "loss": 0.0065, "step": 358980 }, { "epoch": 3.835568139323682, "grad_norm": 1.52975332736969, "learning_rate": 8.153236700845243e-07, "loss": 0.0176, "step": 358990 }, { "epoch": 3.8356749826379613, "grad_norm": 2.17966628074646, "learning_rate": 8.153106312714625e-07, "loss": 0.0253, "step": 359000 }, { "epoch": 3.835781825952241, "grad_norm": 0.027292799204587936, "learning_rate": 8.152975921023913e-07, "loss": 0.0049, "step": 359010 }, { "epoch": 3.8358886692665206, "grad_norm": 1.725142240524292, "learning_rate": 8.152845525773249e-07, "loss": 0.0153, "step": 359020 }, { "epoch": 3.8359955125808005, "grad_norm": 2.1881260871887207, "learning_rate": 8.152715126962788e-07, "loss": 0.0015, "step": 359030 }, { "epoch": 3.83610235589508, "grad_norm": 0.04803428053855896, "learning_rate": 8.152584724592672e-07, "loss": 0.0123, "step": 359040 }, { "epoch": 3.8362091992093594, "grad_norm": 0.009021668694913387, "learning_rate": 8.152454318663051e-07, "loss": 0.0083, "step": 359050 }, { "epoch": 3.836316042523639, "grad_norm": 0.08764204382896423, "learning_rate": 8.152323909174069e-07, "loss": 0.0157, "step": 359060 }, { "epoch": 3.836422885837919, "grad_norm": 0.7281025052070618, "learning_rate": 8.152193496125876e-07, "loss": 0.005, "step": 359070 }, { "epoch": 3.8365297291521983, "grad_norm": 9.263452529907227, "learning_rate": 8.152063079518621e-07, "loss": 0.004, "step": 359080 }, { "epoch": 3.836636572466478, "grad_norm": 0.0707000344991684, "learning_rate": 8.151932659352447e-07, "loss": 0.0043, "step": 359090 }, { "epoch": 3.8367434157807576, "grad_norm": 0.002596881240606308, "learning_rate": 8.151802235627503e-07, "loss": 0.0038, "step": 359100 }, { "epoch": 3.836850259095037, "grad_norm": 0.6456428170204163, "learning_rate": 8.151671808343938e-07, "loss": 0.0154, "step": 359110 }, { "epoch": 3.8369571024093165, "grad_norm": 0.014315812848508358, "learning_rate": 8.151541377501896e-07, "loss": 0.0221, "step": 359120 }, { "epoch": 3.8370639457235964, "grad_norm": 0.0935131385922432, "learning_rate": 8.151410943101527e-07, "loss": 0.0072, "step": 359130 }, { "epoch": 3.837170789037876, "grad_norm": 0.04913432523608208, "learning_rate": 8.151280505142977e-07, "loss": 0.0011, "step": 359140 }, { "epoch": 3.837277632352156, "grad_norm": 0.8696105480194092, "learning_rate": 8.151150063626393e-07, "loss": 0.0205, "step": 359150 }, { "epoch": 3.8373844756664353, "grad_norm": 0.3977351188659668, "learning_rate": 8.151019618551922e-07, "loss": 0.0101, "step": 359160 }, { "epoch": 3.8374913189807147, "grad_norm": 0.0017576859099790454, "learning_rate": 8.150889169919715e-07, "loss": 0.0314, "step": 359170 }, { "epoch": 3.837598162294994, "grad_norm": 5.028768062591553, "learning_rate": 8.150758717729913e-07, "loss": 0.0231, "step": 359180 }, { "epoch": 3.837705005609274, "grad_norm": 10.637396812438965, "learning_rate": 8.150628261982668e-07, "loss": 0.0227, "step": 359190 }, { "epoch": 3.8378118489235535, "grad_norm": 0.0008258377201855183, "learning_rate": 8.150497802678126e-07, "loss": 0.0297, "step": 359200 }, { "epoch": 3.8379186922378334, "grad_norm": 0.13064120709896088, "learning_rate": 8.150367339816434e-07, "loss": 0.0168, "step": 359210 }, { "epoch": 3.838025535552113, "grad_norm": 6.536688327789307, "learning_rate": 8.15023687339774e-07, "loss": 0.0259, "step": 359220 }, { "epoch": 3.8381323788663924, "grad_norm": 0.4806312620639801, "learning_rate": 8.15010640342219e-07, "loss": 0.0006, "step": 359230 }, { "epoch": 3.838239222180672, "grad_norm": 0.018513968214392662, "learning_rate": 8.149975929889933e-07, "loss": 0.0122, "step": 359240 }, { "epoch": 3.8383460654949517, "grad_norm": 0.009157733991742134, "learning_rate": 8.149845452801114e-07, "loss": 0.0164, "step": 359250 }, { "epoch": 3.838452908809231, "grad_norm": 0.008585245348513126, "learning_rate": 8.149714972155883e-07, "loss": 0.0004, "step": 359260 }, { "epoch": 3.838559752123511, "grad_norm": 4.042717456817627, "learning_rate": 8.149584487954386e-07, "loss": 0.0402, "step": 359270 }, { "epoch": 3.8386665954377905, "grad_norm": 4.867302417755127, "learning_rate": 8.14945400019677e-07, "loss": 0.0126, "step": 359280 }, { "epoch": 3.83877343875207, "grad_norm": 0.45400944352149963, "learning_rate": 8.149323508883183e-07, "loss": 0.0074, "step": 359290 }, { "epoch": 3.8388802820663495, "grad_norm": 3.629645347595215, "learning_rate": 8.149193014013771e-07, "loss": 0.0174, "step": 359300 }, { "epoch": 3.8389871253806294, "grad_norm": 0.2953943908214569, "learning_rate": 8.149062515588684e-07, "loss": 0.0099, "step": 359310 }, { "epoch": 3.839093968694909, "grad_norm": 4.0464324951171875, "learning_rate": 8.148932013608067e-07, "loss": 0.0154, "step": 359320 }, { "epoch": 3.8392008120091887, "grad_norm": 0.7669675946235657, "learning_rate": 8.148801508072067e-07, "loss": 0.0161, "step": 359330 }, { "epoch": 3.839307655323468, "grad_norm": 0.007769215852022171, "learning_rate": 8.148670998980835e-07, "loss": 0.0023, "step": 359340 }, { "epoch": 3.8394144986377476, "grad_norm": 0.9705868363380432, "learning_rate": 8.148540486334515e-07, "loss": 0.0086, "step": 359350 }, { "epoch": 3.839521341952027, "grad_norm": 1.1049095392227173, "learning_rate": 8.148409970133254e-07, "loss": 0.0047, "step": 359360 }, { "epoch": 3.839628185266307, "grad_norm": 0.006728982552886009, "learning_rate": 8.148279450377201e-07, "loss": 0.0253, "step": 359370 }, { "epoch": 3.8397350285805865, "grad_norm": 0.4157352149486542, "learning_rate": 8.148148927066503e-07, "loss": 0.0129, "step": 359380 }, { "epoch": 3.8398418718948664, "grad_norm": 0.02625632844865322, "learning_rate": 8.148018400201307e-07, "loss": 0.007, "step": 359390 }, { "epoch": 3.839948715209146, "grad_norm": 0.01728818751871586, "learning_rate": 8.147887869781761e-07, "loss": 0.0123, "step": 359400 }, { "epoch": 3.8400555585234253, "grad_norm": 0.06338675320148468, "learning_rate": 8.147757335808013e-07, "loss": 0.0066, "step": 359410 }, { "epoch": 3.8401624018377047, "grad_norm": 15.309956550598145, "learning_rate": 8.147626798280207e-07, "loss": 0.0474, "step": 359420 }, { "epoch": 3.8402692451519846, "grad_norm": 5.522961616516113, "learning_rate": 8.147496257198494e-07, "loss": 0.0106, "step": 359430 }, { "epoch": 3.840376088466264, "grad_norm": 0.13755641877651215, "learning_rate": 8.147365712563021e-07, "loss": 0.0489, "step": 359440 }, { "epoch": 3.840482931780544, "grad_norm": 0.0011267077643424273, "learning_rate": 8.147235164373934e-07, "loss": 0.0165, "step": 359450 }, { "epoch": 3.8405897750948235, "grad_norm": 0.005090384744107723, "learning_rate": 8.147104612631381e-07, "loss": 0.005, "step": 359460 }, { "epoch": 3.840696618409103, "grad_norm": 0.11500496417284012, "learning_rate": 8.146974057335509e-07, "loss": 0.0003, "step": 359470 }, { "epoch": 3.8408034617233824, "grad_norm": 0.002013177378103137, "learning_rate": 8.146843498486465e-07, "loss": 0.0071, "step": 359480 }, { "epoch": 3.8409103050376623, "grad_norm": 1.5511540174484253, "learning_rate": 8.146712936084399e-07, "loss": 0.0389, "step": 359490 }, { "epoch": 3.8410171483519417, "grad_norm": 0.004922233521938324, "learning_rate": 8.146582370129455e-07, "loss": 0.0121, "step": 359500 }, { "epoch": 3.8411239916662216, "grad_norm": 0.12214962393045425, "learning_rate": 8.146451800621784e-07, "loss": 0.008, "step": 359510 }, { "epoch": 3.841230834980501, "grad_norm": 10.758748054504395, "learning_rate": 8.14632122756153e-07, "loss": 0.0455, "step": 359520 }, { "epoch": 3.8413376782947806, "grad_norm": 0.03284972161054611, "learning_rate": 8.146190650948841e-07, "loss": 0.0593, "step": 359530 }, { "epoch": 3.8414445216090605, "grad_norm": 0.02838679403066635, "learning_rate": 8.146060070783866e-07, "loss": 0.0121, "step": 359540 }, { "epoch": 3.84155136492334, "grad_norm": 0.0013689654879271984, "learning_rate": 8.145929487066754e-07, "loss": 0.063, "step": 359550 }, { "epoch": 3.8416582082376194, "grad_norm": 0.5436965823173523, "learning_rate": 8.145798899797647e-07, "loss": 0.0032, "step": 359560 }, { "epoch": 3.8417650515518993, "grad_norm": 0.8822665214538574, "learning_rate": 8.145668308976696e-07, "loss": 0.0212, "step": 359570 }, { "epoch": 3.8418718948661787, "grad_norm": 1.0492488145828247, "learning_rate": 8.145537714604051e-07, "loss": 0.0117, "step": 359580 }, { "epoch": 3.841978738180458, "grad_norm": 0.0035453380551189184, "learning_rate": 8.145407116679853e-07, "loss": 0.0041, "step": 359590 }, { "epoch": 3.842085581494738, "grad_norm": 0.05951470881700516, "learning_rate": 8.145276515204255e-07, "loss": 0.0035, "step": 359600 }, { "epoch": 3.8421924248090176, "grad_norm": 0.003156704129651189, "learning_rate": 8.145145910177402e-07, "loss": 0.0162, "step": 359610 }, { "epoch": 3.842299268123297, "grad_norm": 0.26944416761398315, "learning_rate": 8.145015301599442e-07, "loss": 0.0266, "step": 359620 }, { "epoch": 3.842406111437577, "grad_norm": 0.0700756385922432, "learning_rate": 8.144884689470522e-07, "loss": 0.0116, "step": 359630 }, { "epoch": 3.8425129547518564, "grad_norm": 6.910760879516602, "learning_rate": 8.14475407379079e-07, "loss": 0.0261, "step": 359640 }, { "epoch": 3.842619798066136, "grad_norm": 2.9705817699432373, "learning_rate": 8.144623454560393e-07, "loss": 0.0014, "step": 359650 }, { "epoch": 3.8427266413804158, "grad_norm": 3.4638259410858154, "learning_rate": 8.144492831779479e-07, "loss": 0.0072, "step": 359660 }, { "epoch": 3.842833484694695, "grad_norm": 0.12727388739585876, "learning_rate": 8.144362205448195e-07, "loss": 0.012, "step": 359670 }, { "epoch": 3.842940328008975, "grad_norm": 0.015577529557049274, "learning_rate": 8.144231575566689e-07, "loss": 0.0221, "step": 359680 }, { "epoch": 3.8430471713232546, "grad_norm": 0.2613222002983093, "learning_rate": 8.144100942135108e-07, "loss": 0.0341, "step": 359690 }, { "epoch": 3.843154014637534, "grad_norm": 0.011272399686276913, "learning_rate": 8.143970305153601e-07, "loss": 0.009, "step": 359700 }, { "epoch": 3.8432608579518135, "grad_norm": 0.012598466128110886, "learning_rate": 8.143839664622313e-07, "loss": 0.0021, "step": 359710 }, { "epoch": 3.8433677012660934, "grad_norm": 2.8798630237579346, "learning_rate": 8.143709020541394e-07, "loss": 0.0064, "step": 359720 }, { "epoch": 3.843474544580373, "grad_norm": 0.010633867233991623, "learning_rate": 8.143578372910989e-07, "loss": 0.0014, "step": 359730 }, { "epoch": 3.8435813878946528, "grad_norm": 0.006071492563933134, "learning_rate": 8.143447721731247e-07, "loss": 0.0044, "step": 359740 }, { "epoch": 3.843688231208932, "grad_norm": 3.624074697494507, "learning_rate": 8.143317067002316e-07, "loss": 0.0145, "step": 359750 }, { "epoch": 3.8437950745232117, "grad_norm": 0.12058462202548981, "learning_rate": 8.14318640872434e-07, "loss": 0.0037, "step": 359760 }, { "epoch": 3.843901917837491, "grad_norm": 0.000855721125844866, "learning_rate": 8.143055746897473e-07, "loss": 0.0225, "step": 359770 }, { "epoch": 3.844008761151771, "grad_norm": 6.734938621520996, "learning_rate": 8.142925081521857e-07, "loss": 0.0077, "step": 359780 }, { "epoch": 3.8441156044660505, "grad_norm": 0.02774052880704403, "learning_rate": 8.142794412597641e-07, "loss": 0.0047, "step": 359790 }, { "epoch": 3.8442224477803304, "grad_norm": 0.006227980367839336, "learning_rate": 8.142663740124974e-07, "loss": 0.0199, "step": 359800 }, { "epoch": 3.84432929109461, "grad_norm": 0.06178303062915802, "learning_rate": 8.142533064104003e-07, "loss": 0.0001, "step": 359810 }, { "epoch": 3.8444361344088893, "grad_norm": 0.11818468570709229, "learning_rate": 8.142402384534873e-07, "loss": 0.0133, "step": 359820 }, { "epoch": 3.8445429777231688, "grad_norm": 0.056564051657915115, "learning_rate": 8.142271701417736e-07, "loss": 0.0004, "step": 359830 }, { "epoch": 3.8446498210374487, "grad_norm": 2.5172860622406006, "learning_rate": 8.142141014752735e-07, "loss": 0.0117, "step": 359840 }, { "epoch": 3.844756664351728, "grad_norm": 1.7517279386520386, "learning_rate": 8.142010324540019e-07, "loss": 0.0035, "step": 359850 }, { "epoch": 3.844863507666008, "grad_norm": 0.3214399218559265, "learning_rate": 8.141879630779738e-07, "loss": 0.0176, "step": 359860 }, { "epoch": 3.8449703509802875, "grad_norm": 0.33748188614845276, "learning_rate": 8.141748933472037e-07, "loss": 0.0052, "step": 359870 }, { "epoch": 3.845077194294567, "grad_norm": 0.004831909667700529, "learning_rate": 8.141618232617065e-07, "loss": 0.0047, "step": 359880 }, { "epoch": 3.8451840376088464, "grad_norm": 4.762642860412598, "learning_rate": 8.141487528214969e-07, "loss": 0.0191, "step": 359890 }, { "epoch": 3.8452908809231263, "grad_norm": 0.02186443656682968, "learning_rate": 8.141356820265894e-07, "loss": 0.009, "step": 359900 }, { "epoch": 3.845397724237406, "grad_norm": 1.998556137084961, "learning_rate": 8.141226108769994e-07, "loss": 0.015, "step": 359910 }, { "epoch": 3.8455045675516857, "grad_norm": 0.05245199799537659, "learning_rate": 8.14109539372741e-07, "loss": 0.0142, "step": 359920 }, { "epoch": 3.845611410865965, "grad_norm": 0.0024124900810420513, "learning_rate": 8.140964675138293e-07, "loss": 0.0038, "step": 359930 }, { "epoch": 3.8457182541802446, "grad_norm": 0.0062201786786317825, "learning_rate": 8.14083395300279e-07, "loss": 0.0035, "step": 359940 }, { "epoch": 3.845825097494524, "grad_norm": 3.1286520957946777, "learning_rate": 8.140703227321049e-07, "loss": 0.0156, "step": 359950 }, { "epoch": 3.845931940808804, "grad_norm": 0.036005228757858276, "learning_rate": 8.140572498093215e-07, "loss": 0.0479, "step": 359960 }, { "epoch": 3.8460387841230834, "grad_norm": 3.4144489765167236, "learning_rate": 8.140441765319439e-07, "loss": 0.0576, "step": 359970 }, { "epoch": 3.8461456274373633, "grad_norm": 1.8906127214431763, "learning_rate": 8.140311028999867e-07, "loss": 0.0154, "step": 359980 }, { "epoch": 3.846252470751643, "grad_norm": 3.6609628200531006, "learning_rate": 8.140180289134647e-07, "loss": 0.0047, "step": 359990 }, { "epoch": 3.8463593140659222, "grad_norm": 0.05811382830142975, "learning_rate": 8.140049545723926e-07, "loss": 0.0163, "step": 360000 }, { "epoch": 3.8464661573802017, "grad_norm": 4.414154529571533, "learning_rate": 8.139918798767853e-07, "loss": 0.0398, "step": 360010 }, { "epoch": 3.8465730006944816, "grad_norm": 7.8080830574035645, "learning_rate": 8.139788048266574e-07, "loss": 0.0315, "step": 360020 }, { "epoch": 3.846679844008761, "grad_norm": 1.8042362928390503, "learning_rate": 8.139657294220237e-07, "loss": 0.0035, "step": 360030 }, { "epoch": 3.846786687323041, "grad_norm": 0.12698377668857574, "learning_rate": 8.13952653662899e-07, "loss": 0.0073, "step": 360040 }, { "epoch": 3.8468935306373204, "grad_norm": 0.05359359085559845, "learning_rate": 8.139395775492981e-07, "loss": 0.0021, "step": 360050 }, { "epoch": 3.8470003739516, "grad_norm": 0.8984028697013855, "learning_rate": 8.139265010812357e-07, "loss": 0.0014, "step": 360060 }, { "epoch": 3.8471072172658793, "grad_norm": 0.015680082142353058, "learning_rate": 8.139134242587267e-07, "loss": 0.0094, "step": 360070 }, { "epoch": 3.8472140605801592, "grad_norm": 10.797830581665039, "learning_rate": 8.139003470817855e-07, "loss": 0.0477, "step": 360080 }, { "epoch": 3.8473209038944387, "grad_norm": 7.063634872436523, "learning_rate": 8.138872695504275e-07, "loss": 0.0138, "step": 360090 }, { "epoch": 3.8474277472087186, "grad_norm": 0.08561334013938904, "learning_rate": 8.138741916646668e-07, "loss": 0.0118, "step": 360100 }, { "epoch": 3.847534590522998, "grad_norm": 0.36319682002067566, "learning_rate": 8.138611134245184e-07, "loss": 0.0046, "step": 360110 }, { "epoch": 3.8476414338372775, "grad_norm": 2.513350009918213, "learning_rate": 8.138480348299973e-07, "loss": 0.0066, "step": 360120 }, { "epoch": 3.847748277151557, "grad_norm": 5.563040733337402, "learning_rate": 8.13834955881118e-07, "loss": 0.0148, "step": 360130 }, { "epoch": 3.847855120465837, "grad_norm": 0.5045623183250427, "learning_rate": 8.138218765778953e-07, "loss": 0.0085, "step": 360140 }, { "epoch": 3.8479619637801163, "grad_norm": 0.21044261753559113, "learning_rate": 8.13808796920344e-07, "loss": 0.0716, "step": 360150 }, { "epoch": 3.8480688070943962, "grad_norm": 1.8759801387786865, "learning_rate": 8.13795716908479e-07, "loss": 0.0089, "step": 360160 }, { "epoch": 3.8481756504086757, "grad_norm": 1.3472542762756348, "learning_rate": 8.137826365423149e-07, "loss": 0.0102, "step": 360170 }, { "epoch": 3.848282493722955, "grad_norm": 0.003670662874355912, "learning_rate": 8.137695558218667e-07, "loss": 0.0158, "step": 360180 }, { "epoch": 3.8483893370372346, "grad_norm": 0.07919618487358093, "learning_rate": 8.137564747471486e-07, "loss": 0.0156, "step": 360190 }, { "epoch": 3.8484961803515145, "grad_norm": 0.06665794551372528, "learning_rate": 8.137433933181761e-07, "loss": 0.0035, "step": 360200 }, { "epoch": 3.848603023665794, "grad_norm": 0.189207524061203, "learning_rate": 8.137303115349635e-07, "loss": 0.0197, "step": 360210 }, { "epoch": 3.848709866980074, "grad_norm": 0.11046900600194931, "learning_rate": 8.137172293975257e-07, "loss": 0.0024, "step": 360220 }, { "epoch": 3.8488167102943533, "grad_norm": 0.4825862646102905, "learning_rate": 8.137041469058774e-07, "loss": 0.005, "step": 360230 }, { "epoch": 3.848923553608633, "grad_norm": 10.770557403564453, "learning_rate": 8.136910640600335e-07, "loss": 0.0085, "step": 360240 }, { "epoch": 3.8490303969229127, "grad_norm": 0.4771030843257904, "learning_rate": 8.136779808600087e-07, "loss": 0.0075, "step": 360250 }, { "epoch": 3.849137240237192, "grad_norm": 6.188089847564697, "learning_rate": 8.13664897305818e-07, "loss": 0.0132, "step": 360260 }, { "epoch": 3.8492440835514716, "grad_norm": 1.1628416776657104, "learning_rate": 8.136518133974757e-07, "loss": 0.0232, "step": 360270 }, { "epoch": 3.8493509268657515, "grad_norm": 0.01433002483099699, "learning_rate": 8.136387291349968e-07, "loss": 0.0161, "step": 360280 }, { "epoch": 3.849457770180031, "grad_norm": 0.008766911923885345, "learning_rate": 8.136256445183963e-07, "loss": 0.0329, "step": 360290 }, { "epoch": 3.8495646134943104, "grad_norm": 0.059378571808338165, "learning_rate": 8.136125595476886e-07, "loss": 0.0081, "step": 360300 }, { "epoch": 3.8496714568085904, "grad_norm": 0.021304497495293617, "learning_rate": 8.135994742228889e-07, "loss": 0.0115, "step": 360310 }, { "epoch": 3.84977830012287, "grad_norm": 4.022930145263672, "learning_rate": 8.135863885440116e-07, "loss": 0.0152, "step": 360320 }, { "epoch": 3.8498851434371493, "grad_norm": 4.91039514541626, "learning_rate": 8.135733025110714e-07, "loss": 0.0063, "step": 360330 }, { "epoch": 3.849991986751429, "grad_norm": 0.26046687364578247, "learning_rate": 8.135602161240835e-07, "loss": 0.0117, "step": 360340 }, { "epoch": 3.8500988300657086, "grad_norm": 0.004300481174141169, "learning_rate": 8.135471293830624e-07, "loss": 0.0082, "step": 360350 }, { "epoch": 3.850205673379988, "grad_norm": 0.035730890929698944, "learning_rate": 8.135340422880228e-07, "loss": 0.0195, "step": 360360 }, { "epoch": 3.850312516694268, "grad_norm": 0.2747127413749695, "learning_rate": 8.135209548389798e-07, "loss": 0.0083, "step": 360370 }, { "epoch": 3.8504193600085475, "grad_norm": 11.72410774230957, "learning_rate": 8.135078670359478e-07, "loss": 0.0607, "step": 360380 }, { "epoch": 3.850526203322827, "grad_norm": 0.308714359998703, "learning_rate": 8.134947788789419e-07, "loss": 0.0076, "step": 360390 }, { "epoch": 3.850633046637107, "grad_norm": 0.14384540915489197, "learning_rate": 8.134816903679766e-07, "loss": 0.0129, "step": 360400 }, { "epoch": 3.8507398899513863, "grad_norm": 0.09396163374185562, "learning_rate": 8.134686015030671e-07, "loss": 0.0137, "step": 360410 }, { "epoch": 3.8508467332656657, "grad_norm": 0.020512929186224937, "learning_rate": 8.134555122842276e-07, "loss": 0.0099, "step": 360420 }, { "epoch": 3.8509535765799456, "grad_norm": 0.1866561621427536, "learning_rate": 8.134424227114733e-07, "loss": 0.0088, "step": 360430 }, { "epoch": 3.851060419894225, "grad_norm": 0.2520699203014374, "learning_rate": 8.134293327848187e-07, "loss": 0.0075, "step": 360440 }, { "epoch": 3.851167263208505, "grad_norm": 11.312575340270996, "learning_rate": 8.134162425042789e-07, "loss": 0.0329, "step": 360450 }, { "epoch": 3.8512741065227845, "grad_norm": 0.15625722706317902, "learning_rate": 8.134031518698685e-07, "loss": 0.023, "step": 360460 }, { "epoch": 3.851380949837064, "grad_norm": 0.013387762941420078, "learning_rate": 8.133900608816021e-07, "loss": 0.008, "step": 360470 }, { "epoch": 3.8514877931513434, "grad_norm": 0.015630433335900307, "learning_rate": 8.133769695394949e-07, "loss": 0.0338, "step": 360480 }, { "epoch": 3.8515946364656233, "grad_norm": 0.0421258918941021, "learning_rate": 8.133638778435613e-07, "loss": 0.0104, "step": 360490 }, { "epoch": 3.8517014797799027, "grad_norm": 0.10319684445858002, "learning_rate": 8.133507857938162e-07, "loss": 0.0011, "step": 360500 }, { "epoch": 3.8518083230941826, "grad_norm": 0.4776739180088043, "learning_rate": 8.133376933902745e-07, "loss": 0.0091, "step": 360510 }, { "epoch": 3.851915166408462, "grad_norm": 4.17430305480957, "learning_rate": 8.13324600632951e-07, "loss": 0.0038, "step": 360520 }, { "epoch": 3.8520220097227416, "grad_norm": 0.3624030351638794, "learning_rate": 8.133115075218602e-07, "loss": 0.0052, "step": 360530 }, { "epoch": 3.852128853037021, "grad_norm": 0.038620613515377045, "learning_rate": 8.132984140570172e-07, "loss": 0.0035, "step": 360540 }, { "epoch": 3.852235696351301, "grad_norm": 5.1106743812561035, "learning_rate": 8.132853202384365e-07, "loss": 0.0129, "step": 360550 }, { "epoch": 3.8523425396655804, "grad_norm": 0.0013367511564865708, "learning_rate": 8.132722260661331e-07, "loss": 0.0274, "step": 360560 }, { "epoch": 3.8524493829798603, "grad_norm": 0.009151450358331203, "learning_rate": 8.132591315401217e-07, "loss": 0.0321, "step": 360570 }, { "epoch": 3.8525562262941397, "grad_norm": 0.021767141297459602, "learning_rate": 8.132460366604171e-07, "loss": 0.0259, "step": 360580 }, { "epoch": 3.852663069608419, "grad_norm": 6.287912368774414, "learning_rate": 8.132329414270341e-07, "loss": 0.0088, "step": 360590 }, { "epoch": 3.8527699129226987, "grad_norm": 0.6047199368476868, "learning_rate": 8.132198458399875e-07, "loss": 0.0651, "step": 360600 }, { "epoch": 3.8528767562369786, "grad_norm": 11.8608980178833, "learning_rate": 8.13206749899292e-07, "loss": 0.0059, "step": 360610 }, { "epoch": 3.852983599551258, "grad_norm": 3.5697174072265625, "learning_rate": 8.131936536049624e-07, "loss": 0.0151, "step": 360620 }, { "epoch": 3.853090442865538, "grad_norm": 2.0862038135528564, "learning_rate": 8.131805569570136e-07, "loss": 0.0375, "step": 360630 }, { "epoch": 3.8531972861798174, "grad_norm": 0.0055862716399133205, "learning_rate": 8.131674599554603e-07, "loss": 0.0089, "step": 360640 }, { "epoch": 3.853304129494097, "grad_norm": 0.7858822345733643, "learning_rate": 8.131543626003172e-07, "loss": 0.0264, "step": 360650 }, { "epoch": 3.8534109728083763, "grad_norm": 0.002185809426009655, "learning_rate": 8.131412648915994e-07, "loss": 0.0024, "step": 360660 }, { "epoch": 3.853517816122656, "grad_norm": 4.324826717376709, "learning_rate": 8.131281668293213e-07, "loss": 0.0276, "step": 360670 }, { "epoch": 3.8536246594369357, "grad_norm": 1.2006443738937378, "learning_rate": 8.13115068413498e-07, "loss": 0.0342, "step": 360680 }, { "epoch": 3.8537315027512156, "grad_norm": 5.43717622756958, "learning_rate": 8.131019696441441e-07, "loss": 0.0301, "step": 360690 }, { "epoch": 3.853838346065495, "grad_norm": 0.07141640037298203, "learning_rate": 8.130888705212744e-07, "loss": 0.0047, "step": 360700 }, { "epoch": 3.8539451893797745, "grad_norm": 0.9810030460357666, "learning_rate": 8.130757710449037e-07, "loss": 0.001, "step": 360710 }, { "epoch": 3.854052032694054, "grad_norm": 0.66347336769104, "learning_rate": 8.130626712150471e-07, "loss": 0.0012, "step": 360720 }, { "epoch": 3.854158876008334, "grad_norm": 0.14310526847839355, "learning_rate": 8.130495710317188e-07, "loss": 0.0071, "step": 360730 }, { "epoch": 3.8542657193226133, "grad_norm": 0.045954059809446335, "learning_rate": 8.13036470494934e-07, "loss": 0.0201, "step": 360740 }, { "epoch": 3.854372562636893, "grad_norm": 2.9680166244506836, "learning_rate": 8.130233696047076e-07, "loss": 0.0661, "step": 360750 }, { "epoch": 3.8544794059511727, "grad_norm": 1.1455013751983643, "learning_rate": 8.130102683610538e-07, "loss": 0.0067, "step": 360760 }, { "epoch": 3.854586249265452, "grad_norm": 0.017651312053203583, "learning_rate": 8.129971667639881e-07, "loss": 0.0046, "step": 360770 }, { "epoch": 3.8546930925797316, "grad_norm": 0.012175624258816242, "learning_rate": 8.129840648135248e-07, "loss": 0.0612, "step": 360780 }, { "epoch": 3.8547999358940115, "grad_norm": 1.8860770463943481, "learning_rate": 8.12970962509679e-07, "loss": 0.0092, "step": 360790 }, { "epoch": 3.854906779208291, "grad_norm": 2.863307476043701, "learning_rate": 8.129578598524652e-07, "loss": 0.0084, "step": 360800 }, { "epoch": 3.855013622522571, "grad_norm": 0.011851501651108265, "learning_rate": 8.129447568418986e-07, "loss": 0.008, "step": 360810 }, { "epoch": 3.8551204658368503, "grad_norm": 13.278409957885742, "learning_rate": 8.129316534779935e-07, "loss": 0.0093, "step": 360820 }, { "epoch": 3.8552273091511298, "grad_norm": 0.06105523928999901, "learning_rate": 8.129185497607651e-07, "loss": 0.1008, "step": 360830 }, { "epoch": 3.8553341524654092, "grad_norm": 0.030512241646647453, "learning_rate": 8.129054456902279e-07, "loss": 0.0223, "step": 360840 }, { "epoch": 3.855440995779689, "grad_norm": 0.0023184630554169416, "learning_rate": 8.12892341266397e-07, "loss": 0.0029, "step": 360850 }, { "epoch": 3.8555478390939686, "grad_norm": 4.225531101226807, "learning_rate": 8.128792364892869e-07, "loss": 0.0162, "step": 360860 }, { "epoch": 3.8556546824082485, "grad_norm": 0.018429826945066452, "learning_rate": 8.128661313589126e-07, "loss": 0.0102, "step": 360870 }, { "epoch": 3.855761525722528, "grad_norm": 0.012170429341495037, "learning_rate": 8.128530258752888e-07, "loss": 0.0042, "step": 360880 }, { "epoch": 3.8558683690368074, "grad_norm": 1.1867364645004272, "learning_rate": 8.128399200384304e-07, "loss": 0.0214, "step": 360890 }, { "epoch": 3.855975212351087, "grad_norm": 0.07949680835008621, "learning_rate": 8.12826813848352e-07, "loss": 0.0159, "step": 360900 }, { "epoch": 3.8560820556653668, "grad_norm": 0.0034992338623851538, "learning_rate": 8.128137073050686e-07, "loss": 0.0133, "step": 360910 }, { "epoch": 3.8561888989796462, "grad_norm": 0.022705189883708954, "learning_rate": 8.12800600408595e-07, "loss": 0.0221, "step": 360920 }, { "epoch": 3.856295742293926, "grad_norm": 0.012225713580846786, "learning_rate": 8.127874931589457e-07, "loss": 0.0016, "step": 360930 }, { "epoch": 3.8564025856082056, "grad_norm": 0.17073124647140503, "learning_rate": 8.127743855561358e-07, "loss": 0.0073, "step": 360940 }, { "epoch": 3.856509428922485, "grad_norm": 12.80213737487793, "learning_rate": 8.1276127760018e-07, "loss": 0.0099, "step": 360950 }, { "epoch": 3.8566162722367645, "grad_norm": 1.623419165611267, "learning_rate": 8.127481692910931e-07, "loss": 0.002, "step": 360960 }, { "epoch": 3.8567231155510444, "grad_norm": 0.0035717154387384653, "learning_rate": 8.1273506062889e-07, "loss": 0.0099, "step": 360970 }, { "epoch": 3.856829958865324, "grad_norm": 5.744212627410889, "learning_rate": 8.127219516135853e-07, "loss": 0.0267, "step": 360980 }, { "epoch": 3.8569368021796038, "grad_norm": 6.716490268707275, "learning_rate": 8.12708842245194e-07, "loss": 0.0163, "step": 360990 }, { "epoch": 3.8570436454938832, "grad_norm": 0.37349289655685425, "learning_rate": 8.126957325237309e-07, "loss": 0.0031, "step": 361000 }, { "epoch": 3.8571504888081627, "grad_norm": 0.015058046206831932, "learning_rate": 8.126826224492105e-07, "loss": 0.0059, "step": 361010 }, { "epoch": 3.8572573321224426, "grad_norm": 3.682770252227783, "learning_rate": 8.126695120216481e-07, "loss": 0.1026, "step": 361020 }, { "epoch": 3.857364175436722, "grad_norm": 0.028692152351140976, "learning_rate": 8.126564012410579e-07, "loss": 0.012, "step": 361030 }, { "epoch": 3.8574710187510015, "grad_norm": 0.14503389596939087, "learning_rate": 8.126432901074551e-07, "loss": 0.0047, "step": 361040 }, { "epoch": 3.8575778620652814, "grad_norm": 0.0030316233169287443, "learning_rate": 8.126301786208546e-07, "loss": 0.0021, "step": 361050 }, { "epoch": 3.857684705379561, "grad_norm": 0.005053743254393339, "learning_rate": 8.126170667812709e-07, "loss": 0.0157, "step": 361060 }, { "epoch": 3.8577915486938403, "grad_norm": 0.005003733094781637, "learning_rate": 8.12603954588719e-07, "loss": 0.0131, "step": 361070 }, { "epoch": 3.8578983920081202, "grad_norm": 3.3571009635925293, "learning_rate": 8.125908420432137e-07, "loss": 0.0142, "step": 361080 }, { "epoch": 3.8580052353223997, "grad_norm": 0.07369280606508255, "learning_rate": 8.125777291447696e-07, "loss": 0.0279, "step": 361090 }, { "epoch": 3.858112078636679, "grad_norm": 9.710790634155273, "learning_rate": 8.125646158934018e-07, "loss": 0.0092, "step": 361100 }, { "epoch": 3.858218921950959, "grad_norm": 0.0010734471725299954, "learning_rate": 8.125515022891247e-07, "loss": 0.0027, "step": 361110 }, { "epoch": 3.8583257652652385, "grad_norm": 0.05958450585603714, "learning_rate": 8.125383883319537e-07, "loss": 0.0039, "step": 361120 }, { "epoch": 3.858432608579518, "grad_norm": 4.830453872680664, "learning_rate": 8.12525274021903e-07, "loss": 0.0096, "step": 361130 }, { "epoch": 3.858539451893798, "grad_norm": 4.6577372550964355, "learning_rate": 8.125121593589878e-07, "loss": 0.0099, "step": 361140 }, { "epoch": 3.8586462952080773, "grad_norm": 0.04399111121892929, "learning_rate": 8.124990443432229e-07, "loss": 0.0019, "step": 361150 }, { "epoch": 3.8587531385223572, "grad_norm": 0.06068682670593262, "learning_rate": 8.124859289746228e-07, "loss": 0.02, "step": 361160 }, { "epoch": 3.8588599818366367, "grad_norm": 0.19937196373939514, "learning_rate": 8.124728132532026e-07, "loss": 0.0116, "step": 361170 }, { "epoch": 3.858966825150916, "grad_norm": 0.01876167021691799, "learning_rate": 8.12459697178977e-07, "loss": 0.0065, "step": 361180 }, { "epoch": 3.8590736684651956, "grad_norm": 0.10926458984613419, "learning_rate": 8.124465807519607e-07, "loss": 0.0015, "step": 361190 }, { "epoch": 3.8591805117794755, "grad_norm": 0.001048017293214798, "learning_rate": 8.124334639721688e-07, "loss": 0.0105, "step": 361200 }, { "epoch": 3.859287355093755, "grad_norm": 0.13250882923603058, "learning_rate": 8.124203468396157e-07, "loss": 0.0008, "step": 361210 }, { "epoch": 3.859394198408035, "grad_norm": 1.1513651609420776, "learning_rate": 8.124072293543169e-07, "loss": 0.0334, "step": 361220 }, { "epoch": 3.8595010417223143, "grad_norm": 0.11395575106143951, "learning_rate": 8.123941115162865e-07, "loss": 0.0242, "step": 361230 }, { "epoch": 3.859607885036594, "grad_norm": 8.239045143127441, "learning_rate": 8.123809933255395e-07, "loss": 0.0182, "step": 361240 }, { "epoch": 3.8597147283508733, "grad_norm": 4.749405860900879, "learning_rate": 8.123678747820908e-07, "loss": 0.0351, "step": 361250 }, { "epoch": 3.859821571665153, "grad_norm": 0.027132973074913025, "learning_rate": 8.123547558859551e-07, "loss": 0.0304, "step": 361260 }, { "epoch": 3.8599284149794326, "grad_norm": 0.7984430193901062, "learning_rate": 8.123416366371475e-07, "loss": 0.0112, "step": 361270 }, { "epoch": 3.8600352582937125, "grad_norm": 0.036572329699993134, "learning_rate": 8.123285170356826e-07, "loss": 0.0103, "step": 361280 }, { "epoch": 3.860142101607992, "grad_norm": 0.03185510262846947, "learning_rate": 8.123153970815752e-07, "loss": 0.0113, "step": 361290 }, { "epoch": 3.8602489449222714, "grad_norm": 0.05880090966820717, "learning_rate": 8.123022767748399e-07, "loss": 0.0055, "step": 361300 }, { "epoch": 3.860355788236551, "grad_norm": 0.021706663072109222, "learning_rate": 8.122891561154921e-07, "loss": 0.0024, "step": 361310 }, { "epoch": 3.860462631550831, "grad_norm": 0.24272863566875458, "learning_rate": 8.122760351035462e-07, "loss": 0.0325, "step": 361320 }, { "epoch": 3.8605694748651103, "grad_norm": 0.08772286772727966, "learning_rate": 8.122629137390172e-07, "loss": 0.0152, "step": 361330 }, { "epoch": 3.86067631817939, "grad_norm": 3.137465476989746, "learning_rate": 8.122497920219195e-07, "loss": 0.0104, "step": 361340 }, { "epoch": 3.8607831614936696, "grad_norm": 4.085721492767334, "learning_rate": 8.122366699522685e-07, "loss": 0.0157, "step": 361350 }, { "epoch": 3.860890004807949, "grad_norm": 0.36237242817878723, "learning_rate": 8.122235475300785e-07, "loss": 0.0092, "step": 361360 }, { "epoch": 3.8609968481222285, "grad_norm": 0.010678582824766636, "learning_rate": 8.122104247553648e-07, "loss": 0.0175, "step": 361370 }, { "epoch": 3.8611036914365084, "grad_norm": 13.049697875976562, "learning_rate": 8.121973016281418e-07, "loss": 0.0014, "step": 361380 }, { "epoch": 3.861210534750788, "grad_norm": 16.414627075195312, "learning_rate": 8.121841781484244e-07, "loss": 0.0076, "step": 361390 }, { "epoch": 3.861317378065068, "grad_norm": 1.0865284204483032, "learning_rate": 8.121710543162276e-07, "loss": 0.0155, "step": 361400 }, { "epoch": 3.8614242213793473, "grad_norm": 0.1651368886232376, "learning_rate": 8.121579301315661e-07, "loss": 0.0091, "step": 361410 }, { "epoch": 3.8615310646936267, "grad_norm": 0.00880469474941492, "learning_rate": 8.121448055944548e-07, "loss": 0.0091, "step": 361420 }, { "epoch": 3.861637908007906, "grad_norm": 1.7990037202835083, "learning_rate": 8.121316807049083e-07, "loss": 0.0092, "step": 361430 }, { "epoch": 3.861744751322186, "grad_norm": 0.00764109380543232, "learning_rate": 8.121185554629417e-07, "loss": 0.0107, "step": 361440 }, { "epoch": 3.8618515946364655, "grad_norm": 0.01510537602007389, "learning_rate": 8.121054298685697e-07, "loss": 0.0015, "step": 361450 }, { "epoch": 3.8619584379507454, "grad_norm": 0.5442349910736084, "learning_rate": 8.120923039218073e-07, "loss": 0.009, "step": 361460 }, { "epoch": 3.862065281265025, "grad_norm": 0.09551500529050827, "learning_rate": 8.120791776226687e-07, "loss": 0.0032, "step": 361470 }, { "epoch": 3.8621721245793044, "grad_norm": 2.558074474334717, "learning_rate": 8.120660509711694e-07, "loss": 0.0039, "step": 361480 }, { "epoch": 3.862278967893584, "grad_norm": 0.02306140959262848, "learning_rate": 8.120529239673242e-07, "loss": 0.0147, "step": 361490 }, { "epoch": 3.8623858112078637, "grad_norm": 0.13095884025096893, "learning_rate": 8.120397966111473e-07, "loss": 0.071, "step": 361500 }, { "epoch": 3.862492654522143, "grad_norm": 0.04973079636693001, "learning_rate": 8.120266689026542e-07, "loss": 0.0015, "step": 361510 }, { "epoch": 3.862599497836423, "grad_norm": 0.08632423728704453, "learning_rate": 8.120135408418594e-07, "loss": 0.0158, "step": 361520 }, { "epoch": 3.8627063411507025, "grad_norm": 0.6367315649986267, "learning_rate": 8.120004124287776e-07, "loss": 0.0163, "step": 361530 }, { "epoch": 3.862813184464982, "grad_norm": 0.019324196502566338, "learning_rate": 8.119872836634239e-07, "loss": 0.0023, "step": 361540 }, { "epoch": 3.8629200277792615, "grad_norm": 0.004067587666213512, "learning_rate": 8.11974154545813e-07, "loss": 0.0032, "step": 361550 }, { "epoch": 3.8630268710935414, "grad_norm": 5.861937046051025, "learning_rate": 8.119610250759597e-07, "loss": 0.0492, "step": 361560 }, { "epoch": 3.863133714407821, "grad_norm": 0.7596858143806458, "learning_rate": 8.119478952538788e-07, "loss": 0.012, "step": 361570 }, { "epoch": 3.8632405577221007, "grad_norm": 1.8297038078308105, "learning_rate": 8.119347650795852e-07, "loss": 0.0112, "step": 361580 }, { "epoch": 3.86334740103638, "grad_norm": 0.6772879958152771, "learning_rate": 8.119216345530938e-07, "loss": 0.0012, "step": 361590 }, { "epoch": 3.8634542443506596, "grad_norm": 0.03065260499715805, "learning_rate": 8.119085036744193e-07, "loss": 0.0191, "step": 361600 }, { "epoch": 3.863561087664939, "grad_norm": 0.21914303302764893, "learning_rate": 8.118953724435765e-07, "loss": 0.0064, "step": 361610 }, { "epoch": 3.863667930979219, "grad_norm": 2.618813991546631, "learning_rate": 8.118822408605804e-07, "loss": 0.012, "step": 361620 }, { "epoch": 3.8637747742934985, "grad_norm": 0.014245891943573952, "learning_rate": 8.118691089254457e-07, "loss": 0.0136, "step": 361630 }, { "epoch": 3.8638816176077784, "grad_norm": 3.2560606002807617, "learning_rate": 8.118559766381872e-07, "loss": 0.0111, "step": 361640 }, { "epoch": 3.863988460922058, "grad_norm": 3.1524457931518555, "learning_rate": 8.118428439988196e-07, "loss": 0.0101, "step": 361650 }, { "epoch": 3.8640953042363373, "grad_norm": 0.020662594586610794, "learning_rate": 8.118297110073582e-07, "loss": 0.0012, "step": 361660 }, { "epoch": 3.8642021475506168, "grad_norm": 0.25353267788887024, "learning_rate": 8.118165776638173e-07, "loss": 0.0303, "step": 361670 }, { "epoch": 3.8643089908648967, "grad_norm": 0.9254371523857117, "learning_rate": 8.11803443968212e-07, "loss": 0.0057, "step": 361680 }, { "epoch": 3.864415834179176, "grad_norm": 0.026577357202768326, "learning_rate": 8.117903099205572e-07, "loss": 0.0143, "step": 361690 }, { "epoch": 3.864522677493456, "grad_norm": 0.04776570945978165, "learning_rate": 8.117771755208675e-07, "loss": 0.0161, "step": 361700 }, { "epoch": 3.8646295208077355, "grad_norm": 1.1300146579742432, "learning_rate": 8.117640407691578e-07, "loss": 0.0237, "step": 361710 }, { "epoch": 3.864736364122015, "grad_norm": 0.47859838604927063, "learning_rate": 8.117509056654431e-07, "loss": 0.004, "step": 361720 }, { "epoch": 3.864843207436295, "grad_norm": 0.004912799224257469, "learning_rate": 8.117377702097379e-07, "loss": 0.0163, "step": 361730 }, { "epoch": 3.8649500507505743, "grad_norm": 0.003114225808531046, "learning_rate": 8.117246344020574e-07, "loss": 0.0163, "step": 361740 }, { "epoch": 3.8650568940648538, "grad_norm": 0.013277948834002018, "learning_rate": 8.117114982424163e-07, "loss": 0.0111, "step": 361750 }, { "epoch": 3.8651637373791337, "grad_norm": 0.04936772957444191, "learning_rate": 8.116983617308291e-07, "loss": 0.002, "step": 361760 }, { "epoch": 3.865270580693413, "grad_norm": 0.0063744280487298965, "learning_rate": 8.116852248673112e-07, "loss": 0.1333, "step": 361770 }, { "epoch": 3.8653774240076926, "grad_norm": 0.06529989838600159, "learning_rate": 8.11672087651877e-07, "loss": 0.0169, "step": 361780 }, { "epoch": 3.8654842673219725, "grad_norm": 2.3778374195098877, "learning_rate": 8.116589500845417e-07, "loss": 0.0159, "step": 361790 }, { "epoch": 3.865591110636252, "grad_norm": 0.0007286830223165452, "learning_rate": 8.116458121653198e-07, "loss": 0.0151, "step": 361800 }, { "epoch": 3.8656979539505314, "grad_norm": 0.0017959550023078918, "learning_rate": 8.116326738942261e-07, "loss": 0.0167, "step": 361810 }, { "epoch": 3.8658047972648113, "grad_norm": 0.8290449976921082, "learning_rate": 8.116195352712758e-07, "loss": 0.0197, "step": 361820 }, { "epoch": 3.8659116405790908, "grad_norm": 4.366606712341309, "learning_rate": 8.116063962964835e-07, "loss": 0.0171, "step": 361830 }, { "epoch": 3.86601848389337, "grad_norm": 0.23196353018283844, "learning_rate": 8.11593256969864e-07, "loss": 0.0067, "step": 361840 }, { "epoch": 3.86612532720765, "grad_norm": 0.20763155817985535, "learning_rate": 8.115801172914322e-07, "loss": 0.0077, "step": 361850 }, { "epoch": 3.8662321705219296, "grad_norm": 0.04797825217247009, "learning_rate": 8.115669772612031e-07, "loss": 0.037, "step": 361860 }, { "epoch": 3.866339013836209, "grad_norm": 2.088222026824951, "learning_rate": 8.11553836879191e-07, "loss": 0.0041, "step": 361870 }, { "epoch": 3.866445857150489, "grad_norm": 4.624549865722656, "learning_rate": 8.115406961454114e-07, "loss": 0.0402, "step": 361880 }, { "epoch": 3.8665527004647684, "grad_norm": 0.14615651965141296, "learning_rate": 8.115275550598788e-07, "loss": 0.0024, "step": 361890 }, { "epoch": 3.866659543779048, "grad_norm": 0.11796221882104874, "learning_rate": 8.115144136226079e-07, "loss": 0.0012, "step": 361900 }, { "epoch": 3.8667663870933278, "grad_norm": 2.5709798336029053, "learning_rate": 8.11501271833614e-07, "loss": 0.0017, "step": 361910 }, { "epoch": 3.866873230407607, "grad_norm": 0.028800182044506073, "learning_rate": 8.114881296929115e-07, "loss": 0.0102, "step": 361920 }, { "epoch": 3.866980073721887, "grad_norm": 9.421553611755371, "learning_rate": 8.114749872005153e-07, "loss": 0.0286, "step": 361930 }, { "epoch": 3.8670869170361666, "grad_norm": 0.6709650158882141, "learning_rate": 8.114618443564405e-07, "loss": 0.0464, "step": 361940 }, { "epoch": 3.867193760350446, "grad_norm": 0.003943875897675753, "learning_rate": 8.114487011607018e-07, "loss": 0.0007, "step": 361950 }, { "epoch": 3.8673006036647255, "grad_norm": 10.683097839355469, "learning_rate": 8.114355576133138e-07, "loss": 0.0456, "step": 361960 }, { "epoch": 3.8674074469790054, "grad_norm": 3.0885376930236816, "learning_rate": 8.114224137142917e-07, "loss": 0.0299, "step": 361970 }, { "epoch": 3.867514290293285, "grad_norm": 3.7135565280914307, "learning_rate": 8.114092694636503e-07, "loss": 0.0303, "step": 361980 }, { "epoch": 3.8676211336075648, "grad_norm": 0.005079914815723896, "learning_rate": 8.113961248614041e-07, "loss": 0.0086, "step": 361990 }, { "epoch": 3.8677279769218442, "grad_norm": 2.967165231704712, "learning_rate": 8.113829799075682e-07, "loss": 0.0126, "step": 362000 }, { "epoch": 3.8678348202361237, "grad_norm": 0.03996381163597107, "learning_rate": 8.113698346021574e-07, "loss": 0.011, "step": 362010 }, { "epoch": 3.867941663550403, "grad_norm": 0.44851434230804443, "learning_rate": 8.113566889451868e-07, "loss": 0.0037, "step": 362020 }, { "epoch": 3.868048506864683, "grad_norm": 0.0798661932349205, "learning_rate": 8.113435429366709e-07, "loss": 0.0097, "step": 362030 }, { "epoch": 3.8681553501789625, "grad_norm": 0.003960472997277975, "learning_rate": 8.113303965766245e-07, "loss": 0.0043, "step": 362040 }, { "epoch": 3.8682621934932424, "grad_norm": 0.3199580907821655, "learning_rate": 8.113172498650626e-07, "loss": 0.0239, "step": 362050 }, { "epoch": 3.868369036807522, "grad_norm": 0.03239613398909569, "learning_rate": 8.113041028020001e-07, "loss": 0.0257, "step": 362060 }, { "epoch": 3.8684758801218013, "grad_norm": 0.10426649451255798, "learning_rate": 8.112909553874518e-07, "loss": 0.0026, "step": 362070 }, { "epoch": 3.868582723436081, "grad_norm": 0.005290502216666937, "learning_rate": 8.112778076214324e-07, "loss": 0.0179, "step": 362080 }, { "epoch": 3.8686895667503607, "grad_norm": 0.025556545704603195, "learning_rate": 8.11264659503957e-07, "loss": 0.0139, "step": 362090 }, { "epoch": 3.86879641006464, "grad_norm": 0.2958611845970154, "learning_rate": 8.112515110350402e-07, "loss": 0.0107, "step": 362100 }, { "epoch": 3.86890325337892, "grad_norm": 0.021533751860260963, "learning_rate": 8.11238362214697e-07, "loss": 0.0092, "step": 362110 }, { "epoch": 3.8690100966931995, "grad_norm": 1.044959545135498, "learning_rate": 8.112252130429422e-07, "loss": 0.003, "step": 362120 }, { "epoch": 3.869116940007479, "grad_norm": 0.8665051460266113, "learning_rate": 8.112120635197907e-07, "loss": 0.0157, "step": 362130 }, { "epoch": 3.8692237833217584, "grad_norm": 0.0631074458360672, "learning_rate": 8.111989136452572e-07, "loss": 0.0137, "step": 362140 }, { "epoch": 3.8693306266360383, "grad_norm": 0.009673329070210457, "learning_rate": 8.111857634193568e-07, "loss": 0.0401, "step": 362150 }, { "epoch": 3.869437469950318, "grad_norm": 0.07068710774183273, "learning_rate": 8.111726128421039e-07, "loss": 0.0002, "step": 362160 }, { "epoch": 3.8695443132645977, "grad_norm": 3.6479434967041016, "learning_rate": 8.111594619135139e-07, "loss": 0.0047, "step": 362170 }, { "epoch": 3.869651156578877, "grad_norm": 0.07052773237228394, "learning_rate": 8.111463106336012e-07, "loss": 0.0052, "step": 362180 }, { "epoch": 3.8697579998931566, "grad_norm": 0.03210487216711044, "learning_rate": 8.11133159002381e-07, "loss": 0.0058, "step": 362190 }, { "epoch": 3.869864843207436, "grad_norm": 0.2577095627784729, "learning_rate": 8.111200070198678e-07, "loss": 0.0018, "step": 362200 }, { "epoch": 3.869971686521716, "grad_norm": 0.5982418060302734, "learning_rate": 8.111068546860767e-07, "loss": 0.0017, "step": 362210 }, { "epoch": 3.8700785298359954, "grad_norm": 1.1729390621185303, "learning_rate": 8.110937020010227e-07, "loss": 0.0473, "step": 362220 }, { "epoch": 3.8701853731502753, "grad_norm": 2.213934898376465, "learning_rate": 8.110805489647201e-07, "loss": 0.0062, "step": 362230 }, { "epoch": 3.870292216464555, "grad_norm": 11.022835731506348, "learning_rate": 8.110673955771843e-07, "loss": 0.0061, "step": 362240 }, { "epoch": 3.8703990597788342, "grad_norm": 0.1014694944024086, "learning_rate": 8.110542418384298e-07, "loss": 0.0094, "step": 362250 }, { "epoch": 3.8705059030931137, "grad_norm": 0.3837471008300781, "learning_rate": 8.110410877484717e-07, "loss": 0.0138, "step": 362260 }, { "epoch": 3.8706127464073936, "grad_norm": 0.6146189570426941, "learning_rate": 8.110279333073247e-07, "loss": 0.0043, "step": 362270 }, { "epoch": 3.870719589721673, "grad_norm": 4.649687767028809, "learning_rate": 8.110147785150036e-07, "loss": 0.0056, "step": 362280 }, { "epoch": 3.870826433035953, "grad_norm": 0.6103983521461487, "learning_rate": 8.110016233715235e-07, "loss": 0.0136, "step": 362290 }, { "epoch": 3.8709332763502324, "grad_norm": 0.021197784692049026, "learning_rate": 8.109884678768988e-07, "loss": 0.0172, "step": 362300 }, { "epoch": 3.871040119664512, "grad_norm": 2.569680690765381, "learning_rate": 8.109753120311449e-07, "loss": 0.0165, "step": 362310 }, { "epoch": 3.8711469629787914, "grad_norm": 0.48851221799850464, "learning_rate": 8.109621558342764e-07, "loss": 0.0406, "step": 362320 }, { "epoch": 3.8712538062930713, "grad_norm": 0.0038204463198781013, "learning_rate": 8.109489992863082e-07, "loss": 0.0164, "step": 362330 }, { "epoch": 3.8713606496073507, "grad_norm": 0.6353519558906555, "learning_rate": 8.10935842387255e-07, "loss": 0.0465, "step": 362340 }, { "epoch": 3.8714674929216306, "grad_norm": 1.2449846267700195, "learning_rate": 8.109226851371317e-07, "loss": 0.0057, "step": 362350 }, { "epoch": 3.87157433623591, "grad_norm": 0.3112555146217346, "learning_rate": 8.109095275359533e-07, "loss": 0.0168, "step": 362360 }, { "epoch": 3.8716811795501895, "grad_norm": 0.0005009265732951462, "learning_rate": 8.108963695837346e-07, "loss": 0.0064, "step": 362370 }, { "epoch": 3.871788022864469, "grad_norm": 0.004138585180044174, "learning_rate": 8.108832112804904e-07, "loss": 0.002, "step": 362380 }, { "epoch": 3.871894866178749, "grad_norm": 0.9024903774261475, "learning_rate": 8.108700526262355e-07, "loss": 0.0283, "step": 362390 }, { "epoch": 3.8720017094930284, "grad_norm": 0.010646644979715347, "learning_rate": 8.108568936209851e-07, "loss": 0.0102, "step": 362400 }, { "epoch": 3.8721085528073083, "grad_norm": 0.002131747081875801, "learning_rate": 8.108437342647536e-07, "loss": 0.0046, "step": 362410 }, { "epoch": 3.8722153961215877, "grad_norm": 0.004619175102561712, "learning_rate": 8.108305745575561e-07, "loss": 0.0711, "step": 362420 }, { "epoch": 3.872322239435867, "grad_norm": 0.0013245324371382594, "learning_rate": 8.108174144994075e-07, "loss": 0.0035, "step": 362430 }, { "epoch": 3.8724290827501466, "grad_norm": 0.04475061967968941, "learning_rate": 8.108042540903225e-07, "loss": 0.0017, "step": 362440 }, { "epoch": 3.8725359260644265, "grad_norm": 0.3693322241306305, "learning_rate": 8.107910933303162e-07, "loss": 0.0055, "step": 362450 }, { "epoch": 3.872642769378706, "grad_norm": 0.05220958590507507, "learning_rate": 8.107779322194031e-07, "loss": 0.0016, "step": 362460 }, { "epoch": 3.872749612692986, "grad_norm": 0.004937511403113604, "learning_rate": 8.107647707575983e-07, "loss": 0.0498, "step": 362470 }, { "epoch": 3.8728564560072654, "grad_norm": 0.08055312186479568, "learning_rate": 8.107516089449166e-07, "loss": 0.0237, "step": 362480 }, { "epoch": 3.872963299321545, "grad_norm": 0.008791842497885227, "learning_rate": 8.10738446781373e-07, "loss": 0.0707, "step": 362490 }, { "epoch": 3.8730701426358247, "grad_norm": 0.06563791632652283, "learning_rate": 8.107252842669822e-07, "loss": 0.0401, "step": 362500 }, { "epoch": 3.873176985950104, "grad_norm": 0.05374443158507347, "learning_rate": 8.10712121401759e-07, "loss": 0.0186, "step": 362510 }, { "epoch": 3.8732838292643836, "grad_norm": 0.031238339841365814, "learning_rate": 8.106989581857184e-07, "loss": 0.0158, "step": 362520 }, { "epoch": 3.8733906725786635, "grad_norm": 0.04605924338102341, "learning_rate": 8.106857946188754e-07, "loss": 0.0078, "step": 362530 }, { "epoch": 3.873497515892943, "grad_norm": 0.0138299111276865, "learning_rate": 8.106726307012444e-07, "loss": 0.0058, "step": 362540 }, { "epoch": 3.8736043592072225, "grad_norm": 11.394012451171875, "learning_rate": 8.106594664328408e-07, "loss": 0.0176, "step": 362550 }, { "epoch": 3.8737112025215024, "grad_norm": 0.02990744821727276, "learning_rate": 8.106463018136791e-07, "loss": 0.0106, "step": 362560 }, { "epoch": 3.873818045835782, "grad_norm": 2.900541305541992, "learning_rate": 8.106331368437744e-07, "loss": 0.0419, "step": 362570 }, { "epoch": 3.8739248891500613, "grad_norm": 0.5378236770629883, "learning_rate": 8.106199715231412e-07, "loss": 0.015, "step": 362580 }, { "epoch": 3.874031732464341, "grad_norm": 0.0015602950006723404, "learning_rate": 8.106068058517948e-07, "loss": 0.0053, "step": 362590 }, { "epoch": 3.8741385757786206, "grad_norm": 4.699603080749512, "learning_rate": 8.105936398297499e-07, "loss": 0.0079, "step": 362600 }, { "epoch": 3.8742454190929, "grad_norm": 0.008570009842514992, "learning_rate": 8.105804734570212e-07, "loss": 0.015, "step": 362610 }, { "epoch": 3.87435226240718, "grad_norm": 0.24158744513988495, "learning_rate": 8.105673067336238e-07, "loss": 0.0224, "step": 362620 }, { "epoch": 3.8744591057214595, "grad_norm": 1.074540138244629, "learning_rate": 8.105541396595724e-07, "loss": 0.005, "step": 362630 }, { "epoch": 3.8745659490357394, "grad_norm": 3.1507468223571777, "learning_rate": 8.105409722348819e-07, "loss": 0.0262, "step": 362640 }, { "epoch": 3.874672792350019, "grad_norm": 0.016578633338212967, "learning_rate": 8.105278044595673e-07, "loss": 0.01, "step": 362650 }, { "epoch": 3.8747796356642983, "grad_norm": 0.05196499079465866, "learning_rate": 8.105146363336435e-07, "loss": 0.013, "step": 362660 }, { "epoch": 3.8748864789785777, "grad_norm": 0.014520537108182907, "learning_rate": 8.105014678571251e-07, "loss": 0.0369, "step": 362670 }, { "epoch": 3.8749933222928576, "grad_norm": 0.01561678759753704, "learning_rate": 8.10488299030027e-07, "loss": 0.0019, "step": 362680 }, { "epoch": 3.875100165607137, "grad_norm": 2.9691267013549805, "learning_rate": 8.104751298523643e-07, "loss": 0.014, "step": 362690 }, { "epoch": 3.875207008921417, "grad_norm": 0.006118419114500284, "learning_rate": 8.104619603241519e-07, "loss": 0.0154, "step": 362700 }, { "epoch": 3.8753138522356965, "grad_norm": 0.08439341932535172, "learning_rate": 8.104487904454042e-07, "loss": 0.0031, "step": 362710 }, { "epoch": 3.875420695549976, "grad_norm": 2.039348602294922, "learning_rate": 8.104356202161365e-07, "loss": 0.0061, "step": 362720 }, { "epoch": 3.8755275388642554, "grad_norm": 0.1495273858308792, "learning_rate": 8.104224496363636e-07, "loss": 0.001, "step": 362730 }, { "epoch": 3.8756343821785353, "grad_norm": 1.5845571756362915, "learning_rate": 8.104092787061003e-07, "loss": 0.001, "step": 362740 }, { "epoch": 3.8757412254928147, "grad_norm": 0.4829593896865845, "learning_rate": 8.103961074253616e-07, "loss": 0.0127, "step": 362750 }, { "epoch": 3.8758480688070946, "grad_norm": 2.5175158977508545, "learning_rate": 8.10382935794162e-07, "loss": 0.0382, "step": 362760 }, { "epoch": 3.875954912121374, "grad_norm": 4.836911678314209, "learning_rate": 8.103697638125169e-07, "loss": 0.0125, "step": 362770 }, { "epoch": 3.8760617554356536, "grad_norm": 2.3554506301879883, "learning_rate": 8.103565914804408e-07, "loss": 0.0228, "step": 362780 }, { "epoch": 3.876168598749933, "grad_norm": 0.15467298030853271, "learning_rate": 8.103434187979487e-07, "loss": 0.0006, "step": 362790 }, { "epoch": 3.876275442064213, "grad_norm": 0.011024560779333115, "learning_rate": 8.103302457650556e-07, "loss": 0.0027, "step": 362800 }, { "epoch": 3.8763822853784924, "grad_norm": 0.04253631830215454, "learning_rate": 8.103170723817759e-07, "loss": 0.0007, "step": 362810 }, { "epoch": 3.8764891286927723, "grad_norm": 0.008244636468589306, "learning_rate": 8.103038986481251e-07, "loss": 0.0292, "step": 362820 }, { "epoch": 3.8765959720070517, "grad_norm": 0.003886191640049219, "learning_rate": 8.102907245641177e-07, "loss": 0.0293, "step": 362830 }, { "epoch": 3.876702815321331, "grad_norm": 0.10335370898246765, "learning_rate": 8.102775501297685e-07, "loss": 0.0091, "step": 362840 }, { "epoch": 3.8768096586356107, "grad_norm": 0.004275450948625803, "learning_rate": 8.102643753450927e-07, "loss": 0.0093, "step": 362850 }, { "epoch": 3.8769165019498906, "grad_norm": 0.06885010749101639, "learning_rate": 8.102512002101049e-07, "loss": 0.0355, "step": 362860 }, { "epoch": 3.87702334526417, "grad_norm": 0.8463093638420105, "learning_rate": 8.102380247248201e-07, "loss": 0.0136, "step": 362870 }, { "epoch": 3.87713018857845, "grad_norm": 0.1815357357263565, "learning_rate": 8.102248488892533e-07, "loss": 0.0126, "step": 362880 }, { "epoch": 3.8772370318927294, "grad_norm": 3.229987144470215, "learning_rate": 8.102116727034191e-07, "loss": 0.0059, "step": 362890 }, { "epoch": 3.877343875207009, "grad_norm": 0.059742871671915054, "learning_rate": 8.101984961673326e-07, "loss": 0.0033, "step": 362900 }, { "epoch": 3.8774507185212883, "grad_norm": 0.0076430547051131725, "learning_rate": 8.101853192810087e-07, "loss": 0.0135, "step": 362910 }, { "epoch": 3.877557561835568, "grad_norm": 0.09816759079694748, "learning_rate": 8.101721420444619e-07, "loss": 0.0069, "step": 362920 }, { "epoch": 3.8776644051498477, "grad_norm": 39.563541412353516, "learning_rate": 8.101589644577074e-07, "loss": 0.0166, "step": 362930 }, { "epoch": 3.8777712484641276, "grad_norm": 6.706739902496338, "learning_rate": 8.1014578652076e-07, "loss": 0.0161, "step": 362940 }, { "epoch": 3.877878091778407, "grad_norm": 0.0006965695065446198, "learning_rate": 8.101326082336349e-07, "loss": 0.0032, "step": 362950 }, { "epoch": 3.8779849350926865, "grad_norm": 0.012733560986816883, "learning_rate": 8.101194295963463e-07, "loss": 0.0175, "step": 362960 }, { "epoch": 3.878091778406966, "grad_norm": 0.09709378331899643, "learning_rate": 8.101062506089095e-07, "loss": 0.0254, "step": 362970 }, { "epoch": 3.878198621721246, "grad_norm": 3.960888385772705, "learning_rate": 8.100930712713394e-07, "loss": 0.0616, "step": 362980 }, { "epoch": 3.8783054650355253, "grad_norm": 1.6327946186065674, "learning_rate": 8.100798915836509e-07, "loss": 0.0054, "step": 362990 }, { "epoch": 3.878412308349805, "grad_norm": 4.924502372741699, "learning_rate": 8.100667115458587e-07, "loss": 0.0021, "step": 363000 }, { "epoch": 3.8785191516640847, "grad_norm": 4.227581977844238, "learning_rate": 8.10053531157978e-07, "loss": 0.0338, "step": 363010 }, { "epoch": 3.878625994978364, "grad_norm": 1.3884133100509644, "learning_rate": 8.10040350420023e-07, "loss": 0.0145, "step": 363020 }, { "epoch": 3.8787328382926436, "grad_norm": 0.003246522741392255, "learning_rate": 8.100271693320096e-07, "loss": 0.0026, "step": 363030 }, { "epoch": 3.8788396816069235, "grad_norm": 0.004926749039441347, "learning_rate": 8.100139878939517e-07, "loss": 0.0145, "step": 363040 }, { "epoch": 3.878946524921203, "grad_norm": 0.24446281790733337, "learning_rate": 8.100008061058648e-07, "loss": 0.0138, "step": 363050 }, { "epoch": 3.879053368235483, "grad_norm": 0.01717408373951912, "learning_rate": 8.099876239677637e-07, "loss": 0.001, "step": 363060 }, { "epoch": 3.8791602115497623, "grad_norm": 0.05739661306142807, "learning_rate": 8.099744414796629e-07, "loss": 0.0022, "step": 363070 }, { "epoch": 3.8792670548640418, "grad_norm": 0.12769784033298492, "learning_rate": 8.099612586415777e-07, "loss": 0.0028, "step": 363080 }, { "epoch": 3.8793738981783212, "grad_norm": 0.008210589177906513, "learning_rate": 8.099480754535228e-07, "loss": 0.0016, "step": 363090 }, { "epoch": 3.879480741492601, "grad_norm": 0.05292171239852905, "learning_rate": 8.099348919155132e-07, "loss": 0.0064, "step": 363100 }, { "epoch": 3.8795875848068806, "grad_norm": 6.21858024597168, "learning_rate": 8.099217080275638e-07, "loss": 0.0317, "step": 363110 }, { "epoch": 3.8796944281211605, "grad_norm": 2.6052122116088867, "learning_rate": 8.099085237896892e-07, "loss": 0.0101, "step": 363120 }, { "epoch": 3.87980127143544, "grad_norm": 0.02248329669237137, "learning_rate": 8.098953392019046e-07, "loss": 0.0066, "step": 363130 }, { "epoch": 3.8799081147497194, "grad_norm": 0.0020289968233555555, "learning_rate": 8.098821542642248e-07, "loss": 0.0098, "step": 363140 }, { "epoch": 3.880014958063999, "grad_norm": 2.000516653060913, "learning_rate": 8.098689689766646e-07, "loss": 0.006, "step": 363150 }, { "epoch": 3.880121801378279, "grad_norm": 0.1808263510465622, "learning_rate": 8.09855783339239e-07, "loss": 0.0049, "step": 363160 }, { "epoch": 3.8802286446925582, "grad_norm": 2.1303133964538574, "learning_rate": 8.098425973519627e-07, "loss": 0.0204, "step": 363170 }, { "epoch": 3.880335488006838, "grad_norm": 0.026677336543798447, "learning_rate": 8.098294110148508e-07, "loss": 0.0134, "step": 363180 }, { "epoch": 3.8804423313211176, "grad_norm": 0.004562037531286478, "learning_rate": 8.098162243279181e-07, "loss": 0.0207, "step": 363190 }, { "epoch": 3.880549174635397, "grad_norm": 0.01367254089564085, "learning_rate": 8.098030372911796e-07, "loss": 0.0077, "step": 363200 }, { "epoch": 3.880656017949677, "grad_norm": 0.2874550223350525, "learning_rate": 8.097898499046499e-07, "loss": 0.0402, "step": 363210 }, { "epoch": 3.8807628612639564, "grad_norm": 7.057174205780029, "learning_rate": 8.097766621683443e-07, "loss": 0.0111, "step": 363220 }, { "epoch": 3.880869704578236, "grad_norm": 0.875585675239563, "learning_rate": 8.097634740822774e-07, "loss": 0.0029, "step": 363230 }, { "epoch": 3.880976547892516, "grad_norm": 1.8921846151351929, "learning_rate": 8.09750285646464e-07, "loss": 0.0037, "step": 363240 }, { "epoch": 3.8810833912067952, "grad_norm": 0.09431448578834534, "learning_rate": 8.097370968609194e-07, "loss": 0.0103, "step": 363250 }, { "epoch": 3.8811902345210747, "grad_norm": 0.015534774400293827, "learning_rate": 8.097239077256582e-07, "loss": 0.0027, "step": 363260 }, { "epoch": 3.8812970778353546, "grad_norm": 0.002158168936148286, "learning_rate": 8.097107182406951e-07, "loss": 0.0075, "step": 363270 }, { "epoch": 3.881403921149634, "grad_norm": 0.013117166236042976, "learning_rate": 8.096975284060455e-07, "loss": 0.0012, "step": 363280 }, { "epoch": 3.8815107644639135, "grad_norm": 0.005156519822776318, "learning_rate": 8.096843382217238e-07, "loss": 0.0068, "step": 363290 }, { "epoch": 3.8816176077781934, "grad_norm": 0.0004270482459105551, "learning_rate": 8.096711476877451e-07, "loss": 0.0076, "step": 363300 }, { "epoch": 3.881724451092473, "grad_norm": 58.633216857910156, "learning_rate": 8.096579568041244e-07, "loss": 0.0355, "step": 363310 }, { "epoch": 3.8818312944067523, "grad_norm": 0.666732907295227, "learning_rate": 8.096447655708766e-07, "loss": 0.0358, "step": 363320 }, { "epoch": 3.8819381377210322, "grad_norm": 4.402419567108154, "learning_rate": 8.096315739880164e-07, "loss": 0.0129, "step": 363330 }, { "epoch": 3.8820449810353117, "grad_norm": 0.6177998781204224, "learning_rate": 8.096183820555586e-07, "loss": 0.011, "step": 363340 }, { "epoch": 3.882151824349591, "grad_norm": 0.40215668082237244, "learning_rate": 8.096051897735186e-07, "loss": 0.0149, "step": 363350 }, { "epoch": 3.882258667663871, "grad_norm": 0.3551720082759857, "learning_rate": 8.095919971419108e-07, "loss": 0.007, "step": 363360 }, { "epoch": 3.8823655109781505, "grad_norm": 4.412335395812988, "learning_rate": 8.095788041607503e-07, "loss": 0.0111, "step": 363370 }, { "epoch": 3.88247235429243, "grad_norm": 0.0030404317658394575, "learning_rate": 8.09565610830052e-07, "loss": 0.0037, "step": 363380 }, { "epoch": 3.88257919760671, "grad_norm": 0.4308093190193176, "learning_rate": 8.095524171498307e-07, "loss": 0.0076, "step": 363390 }, { "epoch": 3.8826860409209893, "grad_norm": 0.6422379612922668, "learning_rate": 8.095392231201012e-07, "loss": 0.0049, "step": 363400 }, { "epoch": 3.8827928842352692, "grad_norm": 0.1281977742910385, "learning_rate": 8.095260287408788e-07, "loss": 0.0077, "step": 363410 }, { "epoch": 3.8828997275495487, "grad_norm": 1.0835752487182617, "learning_rate": 8.095128340121782e-07, "loss": 0.0079, "step": 363420 }, { "epoch": 3.883006570863828, "grad_norm": 0.18468104302883148, "learning_rate": 8.094996389340141e-07, "loss": 0.0081, "step": 363430 }, { "epoch": 3.8831134141781076, "grad_norm": 2.2602968215942383, "learning_rate": 8.094864435064015e-07, "loss": 0.0176, "step": 363440 }, { "epoch": 3.8832202574923875, "grad_norm": 5.28305721282959, "learning_rate": 8.094732477293554e-07, "loss": 0.0073, "step": 363450 }, { "epoch": 3.883327100806667, "grad_norm": 0.03406871110200882, "learning_rate": 8.094600516028907e-07, "loss": 0.0274, "step": 363460 }, { "epoch": 3.883433944120947, "grad_norm": 0.20869208872318268, "learning_rate": 8.094468551270222e-07, "loss": 0.0162, "step": 363470 }, { "epoch": 3.8835407874352263, "grad_norm": 6.342848777770996, "learning_rate": 8.09433658301765e-07, "loss": 0.0238, "step": 363480 }, { "epoch": 3.883647630749506, "grad_norm": 0.0076366811990737915, "learning_rate": 8.094204611271335e-07, "loss": 0.0147, "step": 363490 }, { "epoch": 3.8837544740637853, "grad_norm": 2.384835958480835, "learning_rate": 8.094072636031433e-07, "loss": 0.0043, "step": 363500 }, { "epoch": 3.883861317378065, "grad_norm": 5.35090970993042, "learning_rate": 8.093940657298089e-07, "loss": 0.0106, "step": 363510 }, { "epoch": 3.8839681606923446, "grad_norm": 1.3939990997314453, "learning_rate": 8.093808675071451e-07, "loss": 0.0131, "step": 363520 }, { "epoch": 3.8840750040066245, "grad_norm": 0.0012560047907754779, "learning_rate": 8.093676689351669e-07, "loss": 0.0448, "step": 363530 }, { "epoch": 3.884181847320904, "grad_norm": 3.0231833457946777, "learning_rate": 8.093544700138895e-07, "loss": 0.005, "step": 363540 }, { "epoch": 3.8842886906351834, "grad_norm": 0.06533394008874893, "learning_rate": 8.093412707433272e-07, "loss": 0.0054, "step": 363550 }, { "epoch": 3.884395533949463, "grad_norm": 0.00531763955950737, "learning_rate": 8.093280711234955e-07, "loss": 0.0089, "step": 363560 }, { "epoch": 3.884502377263743, "grad_norm": 0.02864755131304264, "learning_rate": 8.093148711544091e-07, "loss": 0.0039, "step": 363570 }, { "epoch": 3.8846092205780223, "grad_norm": 2.049900531768799, "learning_rate": 8.093016708360827e-07, "loss": 0.0278, "step": 363580 }, { "epoch": 3.884716063892302, "grad_norm": 5.039288520812988, "learning_rate": 8.092884701685316e-07, "loss": 0.0014, "step": 363590 }, { "epoch": 3.8848229072065816, "grad_norm": 3.6017234325408936, "learning_rate": 8.092752691517704e-07, "loss": 0.029, "step": 363600 }, { "epoch": 3.884929750520861, "grad_norm": 1.457228422164917, "learning_rate": 8.092620677858139e-07, "loss": 0.0207, "step": 363610 }, { "epoch": 3.8850365938351406, "grad_norm": 0.19990843534469604, "learning_rate": 8.092488660706773e-07, "loss": 0.0225, "step": 363620 }, { "epoch": 3.8851434371494205, "grad_norm": 0.4474257230758667, "learning_rate": 8.092356640063754e-07, "loss": 0.0042, "step": 363630 }, { "epoch": 3.8852502804637, "grad_norm": 0.016698967665433884, "learning_rate": 8.09222461592923e-07, "loss": 0.0395, "step": 363640 }, { "epoch": 3.88535712377798, "grad_norm": 0.009024019353091717, "learning_rate": 8.092092588303353e-07, "loss": 0.0284, "step": 363650 }, { "epoch": 3.8854639670922593, "grad_norm": 1.602419376373291, "learning_rate": 8.091960557186269e-07, "loss": 0.0172, "step": 363660 }, { "epoch": 3.8855708104065387, "grad_norm": 0.07739800959825516, "learning_rate": 8.091828522578128e-07, "loss": 0.0037, "step": 363670 }, { "epoch": 3.885677653720818, "grad_norm": 7.4385151863098145, "learning_rate": 8.09169648447908e-07, "loss": 0.0161, "step": 363680 }, { "epoch": 3.885784497035098, "grad_norm": 0.016474314033985138, "learning_rate": 8.091564442889273e-07, "loss": 0.0081, "step": 363690 }, { "epoch": 3.8858913403493776, "grad_norm": 0.13208557665348053, "learning_rate": 8.091432397808856e-07, "loss": 0.0131, "step": 363700 }, { "epoch": 3.8859981836636575, "grad_norm": 7.780792236328125, "learning_rate": 8.091300349237979e-07, "loss": 0.0269, "step": 363710 }, { "epoch": 3.886105026977937, "grad_norm": 0.011368905194103718, "learning_rate": 8.09116829717679e-07, "loss": 0.0045, "step": 363720 }, { "epoch": 3.8862118702922164, "grad_norm": 0.7517860531806946, "learning_rate": 8.091036241625439e-07, "loss": 0.0019, "step": 363730 }, { "epoch": 3.886318713606496, "grad_norm": 0.6829450726509094, "learning_rate": 8.090904182584075e-07, "loss": 0.0103, "step": 363740 }, { "epoch": 3.8864255569207757, "grad_norm": 0.05315549299120903, "learning_rate": 8.090772120052846e-07, "loss": 0.013, "step": 363750 }, { "epoch": 3.886532400235055, "grad_norm": 0.036857906728982925, "learning_rate": 8.090640054031903e-07, "loss": 0.0007, "step": 363760 }, { "epoch": 3.886639243549335, "grad_norm": 5.352806091308594, "learning_rate": 8.090507984521393e-07, "loss": 0.0117, "step": 363770 }, { "epoch": 3.8867460868636146, "grad_norm": 6.399233818054199, "learning_rate": 8.090375911521468e-07, "loss": 0.0107, "step": 363780 }, { "epoch": 3.886852930177894, "grad_norm": 0.009341899305582047, "learning_rate": 8.090243835032273e-07, "loss": 0.0276, "step": 363790 }, { "epoch": 3.8869597734921735, "grad_norm": 0.007305764127522707, "learning_rate": 8.090111755053961e-07, "loss": 0.0097, "step": 363800 }, { "epoch": 3.8870666168064534, "grad_norm": 2.9769723415374756, "learning_rate": 8.08997967158668e-07, "loss": 0.0026, "step": 363810 }, { "epoch": 3.887173460120733, "grad_norm": 0.049769338220357895, "learning_rate": 8.089847584630578e-07, "loss": 0.0228, "step": 363820 }, { "epoch": 3.8872803034350127, "grad_norm": 2.6205132007598877, "learning_rate": 8.089715494185805e-07, "loss": 0.0283, "step": 363830 }, { "epoch": 3.887387146749292, "grad_norm": 0.298898845911026, "learning_rate": 8.089583400252511e-07, "loss": 0.0095, "step": 363840 }, { "epoch": 3.8874939900635717, "grad_norm": 1.0578786134719849, "learning_rate": 8.089451302830844e-07, "loss": 0.0046, "step": 363850 }, { "epoch": 3.887600833377851, "grad_norm": 0.24235039949417114, "learning_rate": 8.089319201920952e-07, "loss": 0.0117, "step": 363860 }, { "epoch": 3.887707676692131, "grad_norm": 0.06194864958524704, "learning_rate": 8.089187097522984e-07, "loss": 0.0016, "step": 363870 }, { "epoch": 3.8878145200064105, "grad_norm": 2.3421473503112793, "learning_rate": 8.089054989637094e-07, "loss": 0.0181, "step": 363880 }, { "epoch": 3.8879213633206904, "grad_norm": 0.02570689097046852, "learning_rate": 8.088922878263427e-07, "loss": 0.0153, "step": 363890 }, { "epoch": 3.88802820663497, "grad_norm": 0.0647350326180458, "learning_rate": 8.088790763402131e-07, "loss": 0.002, "step": 363900 }, { "epoch": 3.8881350499492493, "grad_norm": 0.0006375135853886604, "learning_rate": 8.088658645053359e-07, "loss": 0.0051, "step": 363910 }, { "epoch": 3.8882418932635288, "grad_norm": 0.01740170270204544, "learning_rate": 8.088526523217259e-07, "loss": 0.0303, "step": 363920 }, { "epoch": 3.8883487365778087, "grad_norm": 0.030224552378058434, "learning_rate": 8.088394397893978e-07, "loss": 0.0348, "step": 363930 }, { "epoch": 3.888455579892088, "grad_norm": 0.5002509951591492, "learning_rate": 8.088262269083665e-07, "loss": 0.0145, "step": 363940 }, { "epoch": 3.888562423206368, "grad_norm": 1.4641516208648682, "learning_rate": 8.088130136786473e-07, "loss": 0.0022, "step": 363950 }, { "epoch": 3.8886692665206475, "grad_norm": 0.0018685045652091503, "learning_rate": 8.087998001002549e-07, "loss": 0.0219, "step": 363960 }, { "epoch": 3.888776109834927, "grad_norm": 1.9604873657226562, "learning_rate": 8.087865861732042e-07, "loss": 0.0198, "step": 363970 }, { "epoch": 3.888882953149207, "grad_norm": 3.7637994289398193, "learning_rate": 8.0877337189751e-07, "loss": 0.0101, "step": 363980 }, { "epoch": 3.8889897964634863, "grad_norm": 0.8752497434616089, "learning_rate": 8.087601572731876e-07, "loss": 0.0168, "step": 363990 }, { "epoch": 3.8890966397777658, "grad_norm": 0.8726921677589417, "learning_rate": 8.087469423002515e-07, "loss": 0.0437, "step": 364000 }, { "epoch": 3.8892034830920457, "grad_norm": 0.006630045361816883, "learning_rate": 8.087337269787169e-07, "loss": 0.0169, "step": 364010 }, { "epoch": 3.889310326406325, "grad_norm": 0.004754106048494577, "learning_rate": 8.087205113085986e-07, "loss": 0.0212, "step": 364020 }, { "epoch": 3.8894171697206046, "grad_norm": 0.012475559487938881, "learning_rate": 8.087072952899115e-07, "loss": 0.005, "step": 364030 }, { "epoch": 3.8895240130348845, "grad_norm": 0.4837796986103058, "learning_rate": 8.086940789226707e-07, "loss": 0.0247, "step": 364040 }, { "epoch": 3.889630856349164, "grad_norm": 0.4850535988807678, "learning_rate": 8.086808622068908e-07, "loss": 0.001, "step": 364050 }, { "epoch": 3.8897376996634434, "grad_norm": 1.5356121063232422, "learning_rate": 8.08667645142587e-07, "loss": 0.0183, "step": 364060 }, { "epoch": 3.8898445429777233, "grad_norm": 4.4684343338012695, "learning_rate": 8.086544277297741e-07, "loss": 0.0053, "step": 364070 }, { "epoch": 3.8899513862920028, "grad_norm": 0.009496673941612244, "learning_rate": 8.08641209968467e-07, "loss": 0.0038, "step": 364080 }, { "epoch": 3.8900582296062822, "grad_norm": 0.0012638089247047901, "learning_rate": 8.086279918586809e-07, "loss": 0.0147, "step": 364090 }, { "epoch": 3.890165072920562, "grad_norm": 0.0015049584908410907, "learning_rate": 8.086147734004303e-07, "loss": 0.0185, "step": 364100 }, { "epoch": 3.8902719162348416, "grad_norm": 0.10867786407470703, "learning_rate": 8.086015545937304e-07, "loss": 0.0006, "step": 364110 }, { "epoch": 3.8903787595491215, "grad_norm": 2.7120423316955566, "learning_rate": 8.085883354385961e-07, "loss": 0.0078, "step": 364120 }, { "epoch": 3.890485602863401, "grad_norm": 1.4883977174758911, "learning_rate": 8.08575115935042e-07, "loss": 0.0095, "step": 364130 }, { "epoch": 3.8905924461776804, "grad_norm": 4.4670023918151855, "learning_rate": 8.085618960830837e-07, "loss": 0.005, "step": 364140 }, { "epoch": 3.89069928949196, "grad_norm": 2.3432257175445557, "learning_rate": 8.085486758827354e-07, "loss": 0.0098, "step": 364150 }, { "epoch": 3.8908061328062398, "grad_norm": 10.906573295593262, "learning_rate": 8.085354553340126e-07, "loss": 0.0328, "step": 364160 }, { "epoch": 3.8909129761205192, "grad_norm": 0.0011989009799435735, "learning_rate": 8.085222344369299e-07, "loss": 0.0256, "step": 364170 }, { "epoch": 3.891019819434799, "grad_norm": 0.022911537438631058, "learning_rate": 8.085090131915022e-07, "loss": 0.025, "step": 364180 }, { "epoch": 3.8911266627490786, "grad_norm": 1.2611685991287231, "learning_rate": 8.084957915977448e-07, "loss": 0.0054, "step": 364190 }, { "epoch": 3.891233506063358, "grad_norm": 0.021508552134037018, "learning_rate": 8.084825696556723e-07, "loss": 0.0238, "step": 364200 }, { "epoch": 3.8913403493776375, "grad_norm": 0.49163687229156494, "learning_rate": 8.084693473652995e-07, "loss": 0.0054, "step": 364210 }, { "epoch": 3.8914471926919174, "grad_norm": 5.228787899017334, "learning_rate": 8.084561247266417e-07, "loss": 0.0195, "step": 364220 }, { "epoch": 3.891554036006197, "grad_norm": 1.3793543577194214, "learning_rate": 8.084429017397136e-07, "loss": 0.0135, "step": 364230 }, { "epoch": 3.8916608793204768, "grad_norm": 1.8889440298080444, "learning_rate": 8.084296784045302e-07, "loss": 0.0043, "step": 364240 }, { "epoch": 3.8917677226347562, "grad_norm": 2.278027296066284, "learning_rate": 8.084164547211063e-07, "loss": 0.0221, "step": 364250 }, { "epoch": 3.8918745659490357, "grad_norm": 0.004556711763143539, "learning_rate": 8.08403230689457e-07, "loss": 0.0114, "step": 364260 }, { "epoch": 3.891981409263315, "grad_norm": 6.423713684082031, "learning_rate": 8.083900063095972e-07, "loss": 0.0119, "step": 364270 }, { "epoch": 3.892088252577595, "grad_norm": 9.735675811767578, "learning_rate": 8.083767815815419e-07, "loss": 0.0102, "step": 364280 }, { "epoch": 3.8921950958918745, "grad_norm": 0.10994522273540497, "learning_rate": 8.083635565053059e-07, "loss": 0.0183, "step": 364290 }, { "epoch": 3.8923019392061544, "grad_norm": 1.3105233907699585, "learning_rate": 8.083503310809041e-07, "loss": 0.0561, "step": 364300 }, { "epoch": 3.892408782520434, "grad_norm": 0.01069975271821022, "learning_rate": 8.083371053083515e-07, "loss": 0.0025, "step": 364310 }, { "epoch": 3.8925156258347133, "grad_norm": 0.004470255225896835, "learning_rate": 8.083238791876632e-07, "loss": 0.0029, "step": 364320 }, { "epoch": 3.892622469148993, "grad_norm": 1.544433355331421, "learning_rate": 8.083106527188537e-07, "loss": 0.0049, "step": 364330 }, { "epoch": 3.8927293124632727, "grad_norm": 7.271387577056885, "learning_rate": 8.082974259019384e-07, "loss": 0.0195, "step": 364340 }, { "epoch": 3.892836155777552, "grad_norm": 0.25644269585609436, "learning_rate": 8.08284198736932e-07, "loss": 0.0082, "step": 364350 }, { "epoch": 3.892942999091832, "grad_norm": 0.018861616030335426, "learning_rate": 8.082709712238493e-07, "loss": 0.0012, "step": 364360 }, { "epoch": 3.8930498424061115, "grad_norm": 2.771838426589966, "learning_rate": 8.082577433627057e-07, "loss": 0.0075, "step": 364370 }, { "epoch": 3.893156685720391, "grad_norm": 1.5058573484420776, "learning_rate": 8.082445151535157e-07, "loss": 0.0405, "step": 364380 }, { "epoch": 3.8932635290346704, "grad_norm": 0.15053513646125793, "learning_rate": 8.082312865962944e-07, "loss": 0.0036, "step": 364390 }, { "epoch": 3.8933703723489503, "grad_norm": 0.006327807437628508, "learning_rate": 8.082180576910567e-07, "loss": 0.0146, "step": 364400 }, { "epoch": 3.89347721566323, "grad_norm": 4.144615650177002, "learning_rate": 8.082048284378176e-07, "loss": 0.005, "step": 364410 }, { "epoch": 3.8935840589775097, "grad_norm": 9.519326210021973, "learning_rate": 8.08191598836592e-07, "loss": 0.0039, "step": 364420 }, { "epoch": 3.893690902291789, "grad_norm": 0.019325030967593193, "learning_rate": 8.081783688873946e-07, "loss": 0.0074, "step": 364430 }, { "epoch": 3.8937977456060686, "grad_norm": 0.0005074497312307358, "learning_rate": 8.081651385902408e-07, "loss": 0.0107, "step": 364440 }, { "epoch": 3.893904588920348, "grad_norm": 8.36292552947998, "learning_rate": 8.081519079451451e-07, "loss": 0.0163, "step": 364450 }, { "epoch": 3.894011432234628, "grad_norm": 0.12072502076625824, "learning_rate": 8.081386769521229e-07, "loss": 0.0715, "step": 364460 }, { "epoch": 3.8941182755489074, "grad_norm": 0.0729379802942276, "learning_rate": 8.081254456111887e-07, "loss": 0.0063, "step": 364470 }, { "epoch": 3.8942251188631873, "grad_norm": 1.0010018348693848, "learning_rate": 8.081122139223578e-07, "loss": 0.0074, "step": 364480 }, { "epoch": 3.894331962177467, "grad_norm": 1.7530081272125244, "learning_rate": 8.080989818856448e-07, "loss": 0.0089, "step": 364490 }, { "epoch": 3.8944388054917463, "grad_norm": 2.175654172897339, "learning_rate": 8.080857495010647e-07, "loss": 0.0832, "step": 364500 }, { "epoch": 3.8945456488060257, "grad_norm": 0.03923064097762108, "learning_rate": 8.080725167686326e-07, "loss": 0.0031, "step": 364510 }, { "epoch": 3.8946524921203056, "grad_norm": 0.019728360697627068, "learning_rate": 8.080592836883634e-07, "loss": 0.0193, "step": 364520 }, { "epoch": 3.894759335434585, "grad_norm": 5.680878162384033, "learning_rate": 8.080460502602721e-07, "loss": 0.0139, "step": 364530 }, { "epoch": 3.894866178748865, "grad_norm": 0.22442938387393951, "learning_rate": 8.080328164843735e-07, "loss": 0.0107, "step": 364540 }, { "epoch": 3.8949730220631444, "grad_norm": 1.2555482387542725, "learning_rate": 8.080195823606825e-07, "loss": 0.0069, "step": 364550 }, { "epoch": 3.895079865377424, "grad_norm": 0.01840871572494507, "learning_rate": 8.080063478892143e-07, "loss": 0.0033, "step": 364560 }, { "epoch": 3.8951867086917034, "grad_norm": 0.012107446789741516, "learning_rate": 8.079931130699837e-07, "loss": 0.0021, "step": 364570 }, { "epoch": 3.8952935520059833, "grad_norm": 0.07307077944278717, "learning_rate": 8.079798779030056e-07, "loss": 0.0049, "step": 364580 }, { "epoch": 3.8954003953202627, "grad_norm": 0.0025592970196157694, "learning_rate": 8.079666423882949e-07, "loss": 0.0062, "step": 364590 }, { "epoch": 3.8955072386345426, "grad_norm": 0.03491419181227684, "learning_rate": 8.079534065258666e-07, "loss": 0.0116, "step": 364600 }, { "epoch": 3.895614081948822, "grad_norm": 3.5993645191192627, "learning_rate": 8.079401703157358e-07, "loss": 0.0363, "step": 364610 }, { "epoch": 3.8957209252631015, "grad_norm": 0.015483039431273937, "learning_rate": 8.079269337579172e-07, "loss": 0.0082, "step": 364620 }, { "epoch": 3.895827768577381, "grad_norm": 0.005589538253843784, "learning_rate": 8.079136968524258e-07, "loss": 0.0132, "step": 364630 }, { "epoch": 3.895934611891661, "grad_norm": 0.10969822108745575, "learning_rate": 8.079004595992767e-07, "loss": 0.0086, "step": 364640 }, { "epoch": 3.8960414552059404, "grad_norm": 0.03865592181682587, "learning_rate": 8.078872219984847e-07, "loss": 0.0198, "step": 364650 }, { "epoch": 3.8961482985202203, "grad_norm": 0.5408602356910706, "learning_rate": 8.078739840500649e-07, "loss": 0.0101, "step": 364660 }, { "epoch": 3.8962551418344997, "grad_norm": 3.62308931350708, "learning_rate": 8.078607457540319e-07, "loss": 0.02, "step": 364670 }, { "epoch": 3.896361985148779, "grad_norm": 0.5499892234802246, "learning_rate": 8.078475071104012e-07, "loss": 0.0111, "step": 364680 }, { "epoch": 3.896468828463059, "grad_norm": 0.0012572616105899215, "learning_rate": 8.078342681191872e-07, "loss": 0.0153, "step": 364690 }, { "epoch": 3.8965756717773385, "grad_norm": 0.05350407212972641, "learning_rate": 8.078210287804051e-07, "loss": 0.0028, "step": 364700 }, { "epoch": 3.896682515091618, "grad_norm": 0.054251451045274734, "learning_rate": 8.078077890940699e-07, "loss": 0.0307, "step": 364710 }, { "epoch": 3.896789358405898, "grad_norm": 7.0937652587890625, "learning_rate": 8.077945490601963e-07, "loss": 0.027, "step": 364720 }, { "epoch": 3.8968962017201774, "grad_norm": 0.42956966161727905, "learning_rate": 8.077813086787997e-07, "loss": 0.0057, "step": 364730 }, { "epoch": 3.897003045034457, "grad_norm": 0.002698533935472369, "learning_rate": 8.077680679498946e-07, "loss": 0.0026, "step": 364740 }, { "epoch": 3.8971098883487367, "grad_norm": 0.6421062350273132, "learning_rate": 8.077548268734962e-07, "loss": 0.0131, "step": 364750 }, { "epoch": 3.897216731663016, "grad_norm": 0.4619918167591095, "learning_rate": 8.077415854496193e-07, "loss": 0.009, "step": 364760 }, { "epoch": 3.8973235749772956, "grad_norm": 0.17736682295799255, "learning_rate": 8.077283436782789e-07, "loss": 0.0095, "step": 364770 }, { "epoch": 3.8974304182915755, "grad_norm": 1.1433106660842896, "learning_rate": 8.077151015594901e-07, "loss": 0.0091, "step": 364780 }, { "epoch": 3.897537261605855, "grad_norm": 0.08170025050640106, "learning_rate": 8.077018590932676e-07, "loss": 0.0128, "step": 364790 }, { "epoch": 3.8976441049201345, "grad_norm": 0.08462255448102951, "learning_rate": 8.076886162796266e-07, "loss": 0.0098, "step": 364800 }, { "epoch": 3.8977509482344144, "grad_norm": 19.350500106811523, "learning_rate": 8.076753731185817e-07, "loss": 0.0688, "step": 364810 }, { "epoch": 3.897857791548694, "grad_norm": 0.18780554831027985, "learning_rate": 8.076621296101484e-07, "loss": 0.0148, "step": 364820 }, { "epoch": 3.8979646348629733, "grad_norm": 2.920691967010498, "learning_rate": 8.076488857543411e-07, "loss": 0.0105, "step": 364830 }, { "epoch": 3.898071478177253, "grad_norm": 2.362863302230835, "learning_rate": 8.07635641551175e-07, "loss": 0.0059, "step": 364840 }, { "epoch": 3.8981783214915326, "grad_norm": 0.5877866148948669, "learning_rate": 8.076223970006653e-07, "loss": 0.0006, "step": 364850 }, { "epoch": 3.898285164805812, "grad_norm": 0.003551007714122534, "learning_rate": 8.076091521028265e-07, "loss": 0.0087, "step": 364860 }, { "epoch": 3.898392008120092, "grad_norm": 1.613288164138794, "learning_rate": 8.075959068576737e-07, "loss": 0.0087, "step": 364870 }, { "epoch": 3.8984988514343715, "grad_norm": 0.027941035106778145, "learning_rate": 8.075826612652219e-07, "loss": 0.002, "step": 364880 }, { "epoch": 3.8986056947486514, "grad_norm": 7.459956169128418, "learning_rate": 8.075694153254862e-07, "loss": 0.0733, "step": 364890 }, { "epoch": 3.898712538062931, "grad_norm": 0.9341499209403992, "learning_rate": 8.075561690384813e-07, "loss": 0.0171, "step": 364900 }, { "epoch": 3.8988193813772103, "grad_norm": 10.2493896484375, "learning_rate": 8.075429224042223e-07, "loss": 0.0194, "step": 364910 }, { "epoch": 3.8989262246914898, "grad_norm": 0.9791009426116943, "learning_rate": 8.075296754227241e-07, "loss": 0.0168, "step": 364920 }, { "epoch": 3.8990330680057697, "grad_norm": 0.0027632438577711582, "learning_rate": 8.075164280940017e-07, "loss": 0.0223, "step": 364930 }, { "epoch": 3.899139911320049, "grad_norm": 0.0010289981728419662, "learning_rate": 8.0750318041807e-07, "loss": 0.0113, "step": 364940 }, { "epoch": 3.899246754634329, "grad_norm": 0.7153897881507874, "learning_rate": 8.074899323949442e-07, "loss": 0.0122, "step": 364950 }, { "epoch": 3.8993535979486085, "grad_norm": 10.752147674560547, "learning_rate": 8.074766840246388e-07, "loss": 0.0205, "step": 364960 }, { "epoch": 3.899460441262888, "grad_norm": 3.079721689224243, "learning_rate": 8.074634353071692e-07, "loss": 0.0478, "step": 364970 }, { "epoch": 3.8995672845771674, "grad_norm": 6.198204517364502, "learning_rate": 8.074501862425501e-07, "loss": 0.0125, "step": 364980 }, { "epoch": 3.8996741278914473, "grad_norm": 0.04679170995950699, "learning_rate": 8.074369368307966e-07, "loss": 0.0126, "step": 364990 }, { "epoch": 3.8997809712057268, "grad_norm": 8.2103910446167, "learning_rate": 8.074236870719235e-07, "loss": 0.0387, "step": 365000 }, { "epoch": 3.8998878145200067, "grad_norm": 0.0027607963420450687, "learning_rate": 8.07410436965946e-07, "loss": 0.0425, "step": 365010 }, { "epoch": 3.899994657834286, "grad_norm": 0.013950403779745102, "learning_rate": 8.073971865128788e-07, "loss": 0.0081, "step": 365020 }, { "epoch": 3.9001015011485656, "grad_norm": 1.5745600461959839, "learning_rate": 8.07383935712737e-07, "loss": 0.0022, "step": 365030 }, { "epoch": 3.900208344462845, "grad_norm": 3.4054677486419678, "learning_rate": 8.073706845655355e-07, "loss": 0.0334, "step": 365040 }, { "epoch": 3.900315187777125, "grad_norm": 0.2567071318626404, "learning_rate": 8.073574330712893e-07, "loss": 0.0074, "step": 365050 }, { "epoch": 3.9004220310914044, "grad_norm": 0.018772276118397713, "learning_rate": 8.073441812300134e-07, "loss": 0.0027, "step": 365060 }, { "epoch": 3.9005288744056843, "grad_norm": 0.07479783147573471, "learning_rate": 8.073309290417228e-07, "loss": 0.0003, "step": 365070 }, { "epoch": 3.9006357177199638, "grad_norm": 0.989310085773468, "learning_rate": 8.073176765064323e-07, "loss": 0.0036, "step": 365080 }, { "epoch": 3.900742561034243, "grad_norm": 0.002747161779552698, "learning_rate": 8.07304423624157e-07, "loss": 0.0059, "step": 365090 }, { "epoch": 3.9008494043485227, "grad_norm": 0.02560034580528736, "learning_rate": 8.072911703949117e-07, "loss": 0.0198, "step": 365100 }, { "epoch": 3.9009562476628026, "grad_norm": 5.4706830978393555, "learning_rate": 8.072779168187116e-07, "loss": 0.019, "step": 365110 }, { "epoch": 3.901063090977082, "grad_norm": 2.167634963989258, "learning_rate": 8.072646628955714e-07, "loss": 0.0324, "step": 365120 }, { "epoch": 3.901169934291362, "grad_norm": 6.435479164123535, "learning_rate": 8.072514086255063e-07, "loss": 0.0313, "step": 365130 }, { "epoch": 3.9012767776056414, "grad_norm": 0.026365870609879494, "learning_rate": 8.072381540085312e-07, "loss": 0.003, "step": 365140 }, { "epoch": 3.901383620919921, "grad_norm": 6.188460350036621, "learning_rate": 8.07224899044661e-07, "loss": 0.0155, "step": 365150 }, { "epoch": 3.9014904642342003, "grad_norm": 0.15655583143234253, "learning_rate": 8.072116437339107e-07, "loss": 0.0106, "step": 365160 }, { "epoch": 3.90159730754848, "grad_norm": 0.010292792692780495, "learning_rate": 8.071983880762954e-07, "loss": 0.0073, "step": 365170 }, { "epoch": 3.9017041508627597, "grad_norm": 2.910036087036133, "learning_rate": 8.071851320718297e-07, "loss": 0.0229, "step": 365180 }, { "epoch": 3.9018109941770396, "grad_norm": 3.6452956199645996, "learning_rate": 8.071718757205288e-07, "loss": 0.0111, "step": 365190 }, { "epoch": 3.901917837491319, "grad_norm": 0.38761723041534424, "learning_rate": 8.071586190224078e-07, "loss": 0.0125, "step": 365200 }, { "epoch": 3.9020246808055985, "grad_norm": 0.14491912722587585, "learning_rate": 8.071453619774815e-07, "loss": 0.0131, "step": 365210 }, { "epoch": 3.902131524119878, "grad_norm": 0.5610095262527466, "learning_rate": 8.071321045857649e-07, "loss": 0.0121, "step": 365220 }, { "epoch": 3.902238367434158, "grad_norm": 0.17693422734737396, "learning_rate": 8.07118846847273e-07, "loss": 0.0175, "step": 365230 }, { "epoch": 3.9023452107484373, "grad_norm": 0.16080144047737122, "learning_rate": 8.071055887620207e-07, "loss": 0.0028, "step": 365240 }, { "epoch": 3.9024520540627172, "grad_norm": 0.013961474411189556, "learning_rate": 8.070923303300231e-07, "loss": 0.0028, "step": 365250 }, { "epoch": 3.9025588973769967, "grad_norm": 0.15313738584518433, "learning_rate": 8.070790715512951e-07, "loss": 0.0051, "step": 365260 }, { "epoch": 3.902665740691276, "grad_norm": 1.8639365434646606, "learning_rate": 8.070658124258515e-07, "loss": 0.0099, "step": 365270 }, { "epoch": 3.9027725840055556, "grad_norm": 0.09486625343561172, "learning_rate": 8.070525529537075e-07, "loss": 0.0176, "step": 365280 }, { "epoch": 3.9028794273198355, "grad_norm": 0.8202488422393799, "learning_rate": 8.07039293134878e-07, "loss": 0.0236, "step": 365290 }, { "epoch": 3.902986270634115, "grad_norm": 0.01102752797305584, "learning_rate": 8.070260329693779e-07, "loss": 0.0104, "step": 365300 }, { "epoch": 3.903093113948395, "grad_norm": 2.81451153755188, "learning_rate": 8.070127724572225e-07, "loss": 0.0916, "step": 365310 }, { "epoch": 3.9031999572626743, "grad_norm": 2.9666435718536377, "learning_rate": 8.06999511598426e-07, "loss": 0.0035, "step": 365320 }, { "epoch": 3.903306800576954, "grad_norm": 0.0033227780368179083, "learning_rate": 8.069862503930043e-07, "loss": 0.0123, "step": 365330 }, { "epoch": 3.9034136438912332, "grad_norm": 0.1958385854959488, "learning_rate": 8.069729888409719e-07, "loss": 0.0007, "step": 365340 }, { "epoch": 3.903520487205513, "grad_norm": 3.3137733936309814, "learning_rate": 8.069597269423437e-07, "loss": 0.0286, "step": 365350 }, { "epoch": 3.9036273305197926, "grad_norm": 0.3783917725086212, "learning_rate": 8.06946464697135e-07, "loss": 0.0216, "step": 365360 }, { "epoch": 3.9037341738340725, "grad_norm": 0.018817413598299026, "learning_rate": 8.069332021053603e-07, "loss": 0.0062, "step": 365370 }, { "epoch": 3.903841017148352, "grad_norm": 0.014785563573241234, "learning_rate": 8.06919939167035e-07, "loss": 0.0095, "step": 365380 }, { "epoch": 3.9039478604626314, "grad_norm": 0.10222610086202621, "learning_rate": 8.069066758821738e-07, "loss": 0.0091, "step": 365390 }, { "epoch": 3.904054703776911, "grad_norm": 0.1348387897014618, "learning_rate": 8.06893412250792e-07, "loss": 0.0149, "step": 365400 }, { "epoch": 3.904161547091191, "grad_norm": 0.13254158198833466, "learning_rate": 8.068801482729043e-07, "loss": 0.0657, "step": 365410 }, { "epoch": 3.9042683904054702, "grad_norm": 3.5475056171417236, "learning_rate": 8.068668839485258e-07, "loss": 0.0129, "step": 365420 }, { "epoch": 3.90437523371975, "grad_norm": 0.02298271469771862, "learning_rate": 8.068536192776713e-07, "loss": 0.0028, "step": 365430 }, { "epoch": 3.9044820770340296, "grad_norm": 3.240600824356079, "learning_rate": 8.068403542603559e-07, "loss": 0.0086, "step": 365440 }, { "epoch": 3.904588920348309, "grad_norm": 0.7854270935058594, "learning_rate": 8.068270888965947e-07, "loss": 0.0246, "step": 365450 }, { "epoch": 3.904695763662589, "grad_norm": 0.5671899914741516, "learning_rate": 8.068138231864025e-07, "loss": 0.0469, "step": 365460 }, { "epoch": 3.9048026069768684, "grad_norm": 0.010862329974770546, "learning_rate": 8.068005571297944e-07, "loss": 0.001, "step": 365470 }, { "epoch": 3.904909450291148, "grad_norm": 0.012769022025167942, "learning_rate": 8.067872907267853e-07, "loss": 0.0014, "step": 365480 }, { "epoch": 3.905016293605428, "grad_norm": 0.021807612851262093, "learning_rate": 8.067740239773901e-07, "loss": 0.0101, "step": 365490 }, { "epoch": 3.9051231369197072, "grad_norm": 0.0036216911394149065, "learning_rate": 8.06760756881624e-07, "loss": 0.002, "step": 365500 }, { "epoch": 3.9052299802339867, "grad_norm": 0.0356539748609066, "learning_rate": 8.067474894395017e-07, "loss": 0.0243, "step": 365510 }, { "epoch": 3.9053368235482666, "grad_norm": 3.974276065826416, "learning_rate": 8.067342216510384e-07, "loss": 0.0094, "step": 365520 }, { "epoch": 3.905443666862546, "grad_norm": 14.508406639099121, "learning_rate": 8.067209535162491e-07, "loss": 0.0076, "step": 365530 }, { "epoch": 3.9055505101768255, "grad_norm": 0.02305384911596775, "learning_rate": 8.067076850351487e-07, "loss": 0.0038, "step": 365540 }, { "epoch": 3.9056573534911054, "grad_norm": 0.8470302224159241, "learning_rate": 8.06694416207752e-07, "loss": 0.014, "step": 365550 }, { "epoch": 3.905764196805385, "grad_norm": 0.011826147325336933, "learning_rate": 8.066811470340742e-07, "loss": 0.0105, "step": 365560 }, { "epoch": 3.9058710401196644, "grad_norm": 6.264764785766602, "learning_rate": 8.066678775141305e-07, "loss": 0.0129, "step": 365570 }, { "epoch": 3.9059778834339443, "grad_norm": 0.03880434110760689, "learning_rate": 8.066546076479352e-07, "loss": 0.0078, "step": 365580 }, { "epoch": 3.9060847267482237, "grad_norm": 0.2836901843547821, "learning_rate": 8.066413374355041e-07, "loss": 0.0104, "step": 365590 }, { "epoch": 3.9061915700625036, "grad_norm": 0.0017936876974999905, "learning_rate": 8.066280668768516e-07, "loss": 0.0052, "step": 365600 }, { "epoch": 3.906298413376783, "grad_norm": 0.0007796289282850921, "learning_rate": 8.066147959719928e-07, "loss": 0.0039, "step": 365610 }, { "epoch": 3.9064052566910625, "grad_norm": 0.5922003388404846, "learning_rate": 8.066015247209428e-07, "loss": 0.0039, "step": 365620 }, { "epoch": 3.906512100005342, "grad_norm": 0.005445889197289944, "learning_rate": 8.065882531237167e-07, "loss": 0.0011, "step": 365630 }, { "epoch": 3.906618943319622, "grad_norm": 0.010079785250127316, "learning_rate": 8.065749811803291e-07, "loss": 0.0069, "step": 365640 }, { "epoch": 3.9067257866339014, "grad_norm": 0.04037855193018913, "learning_rate": 8.065617088907954e-07, "loss": 0.0019, "step": 365650 }, { "epoch": 3.9068326299481813, "grad_norm": 0.004423743113875389, "learning_rate": 8.065484362551302e-07, "loss": 0.0013, "step": 365660 }, { "epoch": 3.9069394732624607, "grad_norm": 0.009317786432802677, "learning_rate": 8.065351632733489e-07, "loss": 0.0017, "step": 365670 }, { "epoch": 3.90704631657674, "grad_norm": 0.022521400824189186, "learning_rate": 8.06521889945466e-07, "loss": 0.0016, "step": 365680 }, { "epoch": 3.9071531598910196, "grad_norm": 6.082300186157227, "learning_rate": 8.065086162714969e-07, "loss": 0.0125, "step": 365690 }, { "epoch": 3.9072600032052995, "grad_norm": 5.83912992477417, "learning_rate": 8.064953422514564e-07, "loss": 0.0063, "step": 365700 }, { "epoch": 3.907366846519579, "grad_norm": 0.21354614198207855, "learning_rate": 8.064820678853597e-07, "loss": 0.0337, "step": 365710 }, { "epoch": 3.907473689833859, "grad_norm": 10.954243659973145, "learning_rate": 8.064687931732213e-07, "loss": 0.0024, "step": 365720 }, { "epoch": 3.9075805331481384, "grad_norm": 0.0018875811947509646, "learning_rate": 8.064555181150568e-07, "loss": 0.028, "step": 365730 }, { "epoch": 3.907687376462418, "grad_norm": 0.01677846536040306, "learning_rate": 8.064422427108808e-07, "loss": 0.0068, "step": 365740 }, { "epoch": 3.9077942197766973, "grad_norm": 0.33498597145080566, "learning_rate": 8.064289669607083e-07, "loss": 0.0091, "step": 365750 }, { "epoch": 3.907901063090977, "grad_norm": 0.0076079354621469975, "learning_rate": 8.064156908645544e-07, "loss": 0.0129, "step": 365760 }, { "epoch": 3.9080079064052566, "grad_norm": 0.011583443731069565, "learning_rate": 8.064024144224342e-07, "loss": 0.0011, "step": 365770 }, { "epoch": 3.9081147497195365, "grad_norm": 0.024801211431622505, "learning_rate": 8.063891376343624e-07, "loss": 0.0063, "step": 365780 }, { "epoch": 3.908221593033816, "grad_norm": 6.488670349121094, "learning_rate": 8.063758605003541e-07, "loss": 0.0357, "step": 365790 }, { "epoch": 3.9083284363480955, "grad_norm": 0.7786792516708374, "learning_rate": 8.063625830204243e-07, "loss": 0.0064, "step": 365800 }, { "epoch": 3.908435279662375, "grad_norm": 0.595500111579895, "learning_rate": 8.063493051945882e-07, "loss": 0.0077, "step": 365810 }, { "epoch": 3.908542122976655, "grad_norm": 0.002531976206228137, "learning_rate": 8.063360270228605e-07, "loss": 0.0198, "step": 365820 }, { "epoch": 3.9086489662909343, "grad_norm": 0.004342610482126474, "learning_rate": 8.063227485052564e-07, "loss": 0.0367, "step": 365830 }, { "epoch": 3.908755809605214, "grad_norm": 0.017102552577853203, "learning_rate": 8.063094696417907e-07, "loss": 0.0125, "step": 365840 }, { "epoch": 3.9088626529194936, "grad_norm": 0.02124803327023983, "learning_rate": 8.062961904324785e-07, "loss": 0.0265, "step": 365850 }, { "epoch": 3.908969496233773, "grad_norm": 4.217216968536377, "learning_rate": 8.062829108773348e-07, "loss": 0.0399, "step": 365860 }, { "epoch": 3.9090763395480526, "grad_norm": 12.685261726379395, "learning_rate": 8.062696309763745e-07, "loss": 0.0383, "step": 365870 }, { "epoch": 3.9091831828623325, "grad_norm": 4.915951728820801, "learning_rate": 8.062563507296128e-07, "loss": 0.0084, "step": 365880 }, { "epoch": 3.909290026176612, "grad_norm": 0.0359167754650116, "learning_rate": 8.062430701370644e-07, "loss": 0.0131, "step": 365890 }, { "epoch": 3.909396869490892, "grad_norm": 1.5413734912872314, "learning_rate": 8.062297891987446e-07, "loss": 0.0243, "step": 365900 }, { "epoch": 3.9095037128051713, "grad_norm": 0.016395030543208122, "learning_rate": 8.062165079146684e-07, "loss": 0.012, "step": 365910 }, { "epoch": 3.9096105561194507, "grad_norm": 1.2261887788772583, "learning_rate": 8.062032262848504e-07, "loss": 0.0196, "step": 365920 }, { "epoch": 3.90971739943373, "grad_norm": 12.173843383789062, "learning_rate": 8.061899443093059e-07, "loss": 0.009, "step": 365930 }, { "epoch": 3.90982424274801, "grad_norm": 0.02356172539293766, "learning_rate": 8.061766619880499e-07, "loss": 0.0681, "step": 365940 }, { "epoch": 3.9099310860622896, "grad_norm": 0.04884132742881775, "learning_rate": 8.061633793210973e-07, "loss": 0.0056, "step": 365950 }, { "epoch": 3.9100379293765695, "grad_norm": 0.7720146775245667, "learning_rate": 8.061500963084632e-07, "loss": 0.0025, "step": 365960 }, { "epoch": 3.910144772690849, "grad_norm": 2.0953400135040283, "learning_rate": 8.061368129501624e-07, "loss": 0.0697, "step": 365970 }, { "epoch": 3.9102516160051284, "grad_norm": 0.013254008255898952, "learning_rate": 8.061235292462101e-07, "loss": 0.01, "step": 365980 }, { "epoch": 3.910358459319408, "grad_norm": 0.006800293922424316, "learning_rate": 8.061102451966214e-07, "loss": 0.0095, "step": 365990 }, { "epoch": 3.9104653026336877, "grad_norm": 0.061484649777412415, "learning_rate": 8.06096960801411e-07, "loss": 0.0065, "step": 366000 }, { "epoch": 3.910572145947967, "grad_norm": 0.0029215132817626, "learning_rate": 8.060836760605941e-07, "loss": 0.0003, "step": 366010 }, { "epoch": 3.910678989262247, "grad_norm": 0.6573329567909241, "learning_rate": 8.060703909741854e-07, "loss": 0.0024, "step": 366020 }, { "epoch": 3.9107858325765266, "grad_norm": 0.40828362107276917, "learning_rate": 8.060571055422003e-07, "loss": 0.0747, "step": 366030 }, { "epoch": 3.910892675890806, "grad_norm": 0.0014418390346691012, "learning_rate": 8.060438197646535e-07, "loss": 0.0225, "step": 366040 }, { "epoch": 3.9109995192050855, "grad_norm": 3.51281476020813, "learning_rate": 8.060305336415602e-07, "loss": 0.014, "step": 366050 }, { "epoch": 3.9111063625193654, "grad_norm": 2.0613272190093994, "learning_rate": 8.060172471729355e-07, "loss": 0.0077, "step": 366060 }, { "epoch": 3.911213205833645, "grad_norm": 4.592963695526123, "learning_rate": 8.06003960358794e-07, "loss": 0.0175, "step": 366070 }, { "epoch": 3.9113200491479247, "grad_norm": 0.06278199702501297, "learning_rate": 8.05990673199151e-07, "loss": 0.009, "step": 366080 }, { "epoch": 3.911426892462204, "grad_norm": 4.7998480796813965, "learning_rate": 8.059773856940215e-07, "loss": 0.008, "step": 366090 }, { "epoch": 3.9115337357764837, "grad_norm": 0.005805374588817358, "learning_rate": 8.059640978434203e-07, "loss": 0.0103, "step": 366100 }, { "epoch": 3.911640579090763, "grad_norm": 0.10211342573165894, "learning_rate": 8.059508096473629e-07, "loss": 0.0056, "step": 366110 }, { "epoch": 3.911747422405043, "grad_norm": 1.106709361076355, "learning_rate": 8.059375211058636e-07, "loss": 0.0163, "step": 366120 }, { "epoch": 3.9118542657193225, "grad_norm": 0.0038207790348678827, "learning_rate": 8.059242322189378e-07, "loss": 0.0289, "step": 366130 }, { "epoch": 3.9119611090336024, "grad_norm": 0.0024017991963773966, "learning_rate": 8.059109429866004e-07, "loss": 0.0186, "step": 366140 }, { "epoch": 3.912067952347882, "grad_norm": 0.13403041660785675, "learning_rate": 8.058976534088667e-07, "loss": 0.0015, "step": 366150 }, { "epoch": 3.9121747956621613, "grad_norm": 0.0016453203279525042, "learning_rate": 8.058843634857513e-07, "loss": 0.0126, "step": 366160 }, { "epoch": 3.912281638976441, "grad_norm": 4.029127597808838, "learning_rate": 8.058710732172693e-07, "loss": 0.0182, "step": 366170 }, { "epoch": 3.9123884822907207, "grad_norm": 0.023439431563019753, "learning_rate": 8.058577826034358e-07, "loss": 0.0387, "step": 366180 }, { "epoch": 3.912495325605, "grad_norm": 0.10170883685350418, "learning_rate": 8.058444916442659e-07, "loss": 0.0074, "step": 366190 }, { "epoch": 3.91260216891928, "grad_norm": 0.009230144321918488, "learning_rate": 8.058312003397744e-07, "loss": 0.0069, "step": 366200 }, { "epoch": 3.9127090122335595, "grad_norm": 0.9584755897521973, "learning_rate": 8.058179086899763e-07, "loss": 0.0159, "step": 366210 }, { "epoch": 3.912815855547839, "grad_norm": 0.002799911191686988, "learning_rate": 8.058046166948869e-07, "loss": 0.0059, "step": 366220 }, { "epoch": 3.912922698862119, "grad_norm": 0.7944788336753845, "learning_rate": 8.057913243545209e-07, "loss": 0.016, "step": 366230 }, { "epoch": 3.9130295421763983, "grad_norm": 0.02043609879910946, "learning_rate": 8.057780316688934e-07, "loss": 0.0234, "step": 366240 }, { "epoch": 3.9131363854906778, "grad_norm": 0.06667298078536987, "learning_rate": 8.057647386380195e-07, "loss": 0.0248, "step": 366250 }, { "epoch": 3.9132432288049577, "grad_norm": 6.573270797729492, "learning_rate": 8.05751445261914e-07, "loss": 0.0125, "step": 366260 }, { "epoch": 3.913350072119237, "grad_norm": 0.06225971877574921, "learning_rate": 8.057381515405921e-07, "loss": 0.0016, "step": 366270 }, { "epoch": 3.9134569154335166, "grad_norm": 0.03545449301600456, "learning_rate": 8.057248574740689e-07, "loss": 0.0029, "step": 366280 }, { "epoch": 3.9135637587477965, "grad_norm": 0.03566008806228638, "learning_rate": 8.05711563062359e-07, "loss": 0.0165, "step": 366290 }, { "epoch": 3.913670602062076, "grad_norm": 0.13226188719272614, "learning_rate": 8.056982683054779e-07, "loss": 0.0181, "step": 366300 }, { "epoch": 3.9137774453763554, "grad_norm": 0.0020994325168430805, "learning_rate": 8.056849732034404e-07, "loss": 0.0093, "step": 366310 }, { "epoch": 3.9138842886906353, "grad_norm": 0.07216174900531769, "learning_rate": 8.056716777562613e-07, "loss": 0.0148, "step": 366320 }, { "epoch": 3.9139911320049148, "grad_norm": 0.001536018680781126, "learning_rate": 8.056583819639559e-07, "loss": 0.0059, "step": 366330 }, { "epoch": 3.9140979753191942, "grad_norm": 0.01317405141890049, "learning_rate": 8.056450858265391e-07, "loss": 0.0018, "step": 366340 }, { "epoch": 3.914204818633474, "grad_norm": 8.744312286376953, "learning_rate": 8.056317893440259e-07, "loss": 0.0163, "step": 366350 }, { "epoch": 3.9143116619477536, "grad_norm": 0.008567407727241516, "learning_rate": 8.056184925164315e-07, "loss": 0.0069, "step": 366360 }, { "epoch": 3.9144185052620335, "grad_norm": 1.8323643207550049, "learning_rate": 8.056051953437706e-07, "loss": 0.0274, "step": 366370 }, { "epoch": 3.914525348576313, "grad_norm": 1.06509530544281, "learning_rate": 8.055918978260583e-07, "loss": 0.016, "step": 366380 }, { "epoch": 3.9146321918905924, "grad_norm": 0.12285184115171432, "learning_rate": 8.055785999633098e-07, "loss": 0.002, "step": 366390 }, { "epoch": 3.914739035204872, "grad_norm": 0.030480410903692245, "learning_rate": 8.055653017555401e-07, "loss": 0.0006, "step": 366400 }, { "epoch": 3.914845878519152, "grad_norm": 0.23092509806156158, "learning_rate": 8.055520032027639e-07, "loss": 0.0163, "step": 366410 }, { "epoch": 3.9149527218334312, "grad_norm": 1.2632427215576172, "learning_rate": 8.055387043049966e-07, "loss": 0.0125, "step": 366420 }, { "epoch": 3.915059565147711, "grad_norm": 2.526582956314087, "learning_rate": 8.055254050622531e-07, "loss": 0.0037, "step": 366430 }, { "epoch": 3.9151664084619906, "grad_norm": 0.6651481986045837, "learning_rate": 8.05512105474548e-07, "loss": 0.0591, "step": 366440 }, { "epoch": 3.91527325177627, "grad_norm": 0.03592019900679588, "learning_rate": 8.05498805541897e-07, "loss": 0.0268, "step": 366450 }, { "epoch": 3.9153800950905495, "grad_norm": 6.846329689025879, "learning_rate": 8.054855052643148e-07, "loss": 0.0189, "step": 366460 }, { "epoch": 3.9154869384048294, "grad_norm": 0.41385573148727417, "learning_rate": 8.054722046418162e-07, "loss": 0.0273, "step": 366470 }, { "epoch": 3.915593781719109, "grad_norm": 0.6734862327575684, "learning_rate": 8.054589036744166e-07, "loss": 0.0086, "step": 366480 }, { "epoch": 3.915700625033389, "grad_norm": 3.0470449924468994, "learning_rate": 8.054456023621308e-07, "loss": 0.0152, "step": 366490 }, { "epoch": 3.9158074683476682, "grad_norm": 5.392131328582764, "learning_rate": 8.054323007049737e-07, "loss": 0.0366, "step": 366500 }, { "epoch": 3.9159143116619477, "grad_norm": 0.0029875936452299356, "learning_rate": 8.054189987029607e-07, "loss": 0.0185, "step": 366510 }, { "epoch": 3.916021154976227, "grad_norm": 0.07133418321609497, "learning_rate": 8.054056963561065e-07, "loss": 0.011, "step": 366520 }, { "epoch": 3.916127998290507, "grad_norm": 0.07584457844495773, "learning_rate": 8.053923936644263e-07, "loss": 0.019, "step": 366530 }, { "epoch": 3.9162348416047865, "grad_norm": 0.006963660474866629, "learning_rate": 8.05379090627935e-07, "loss": 0.0094, "step": 366540 }, { "epoch": 3.9163416849190664, "grad_norm": 0.01655266433954239, "learning_rate": 8.053657872466475e-07, "loss": 0.0058, "step": 366550 }, { "epoch": 3.916448528233346, "grad_norm": 0.09718172252178192, "learning_rate": 8.053524835205792e-07, "loss": 0.0221, "step": 366560 }, { "epoch": 3.9165553715476253, "grad_norm": 0.08057495951652527, "learning_rate": 8.053391794497448e-07, "loss": 0.0061, "step": 366570 }, { "epoch": 3.916662214861905, "grad_norm": 0.007873212918639183, "learning_rate": 8.053258750341593e-07, "loss": 0.0535, "step": 366580 }, { "epoch": 3.9167690581761847, "grad_norm": 0.10067518055438995, "learning_rate": 8.053125702738381e-07, "loss": 0.0054, "step": 366590 }, { "epoch": 3.916875901490464, "grad_norm": 0.007084712386131287, "learning_rate": 8.052992651687959e-07, "loss": 0.0036, "step": 366600 }, { "epoch": 3.916982744804744, "grad_norm": 3.153428554534912, "learning_rate": 8.052859597190477e-07, "loss": 0.0101, "step": 366610 }, { "epoch": 3.9170895881190235, "grad_norm": 0.0690499022603035, "learning_rate": 8.052726539246086e-07, "loss": 0.0296, "step": 366620 }, { "epoch": 3.917196431433303, "grad_norm": 10.443632125854492, "learning_rate": 8.052593477854936e-07, "loss": 0.0248, "step": 366630 }, { "epoch": 3.9173032747475824, "grad_norm": 4.05258321762085, "learning_rate": 8.052460413017179e-07, "loss": 0.0056, "step": 366640 }, { "epoch": 3.9174101180618623, "grad_norm": 3.301746129989624, "learning_rate": 8.052327344732963e-07, "loss": 0.0171, "step": 366650 }, { "epoch": 3.917516961376142, "grad_norm": 0.018372444435954094, "learning_rate": 8.05219427300244e-07, "loss": 0.0231, "step": 366660 }, { "epoch": 3.9176238046904217, "grad_norm": 4.647089958190918, "learning_rate": 8.052061197825756e-07, "loss": 0.0036, "step": 366670 }, { "epoch": 3.917730648004701, "grad_norm": 0.09554100036621094, "learning_rate": 8.051928119203068e-07, "loss": 0.0112, "step": 366680 }, { "epoch": 3.9178374913189806, "grad_norm": 0.019620418548583984, "learning_rate": 8.051795037134523e-07, "loss": 0.0321, "step": 366690 }, { "epoch": 3.91794433463326, "grad_norm": 4.797514915466309, "learning_rate": 8.05166195162027e-07, "loss": 0.0095, "step": 366700 }, { "epoch": 3.91805117794754, "grad_norm": 2.190272808074951, "learning_rate": 8.051528862660461e-07, "loss": 0.0048, "step": 366710 }, { "epoch": 3.9181580212618194, "grad_norm": 0.25919613242149353, "learning_rate": 8.051395770255244e-07, "loss": 0.01, "step": 366720 }, { "epoch": 3.9182648645760993, "grad_norm": 0.8182268142700195, "learning_rate": 8.051262674404772e-07, "loss": 0.0242, "step": 366730 }, { "epoch": 3.918371707890379, "grad_norm": 0.010674911551177502, "learning_rate": 8.051129575109195e-07, "loss": 0.0092, "step": 366740 }, { "epoch": 3.9184785512046583, "grad_norm": 5.135932445526123, "learning_rate": 8.05099647236866e-07, "loss": 0.0085, "step": 366750 }, { "epoch": 3.9185853945189377, "grad_norm": 0.026799386367201805, "learning_rate": 8.050863366183322e-07, "loss": 0.0086, "step": 366760 }, { "epoch": 3.9186922378332176, "grad_norm": 1.0216528177261353, "learning_rate": 8.050730256553328e-07, "loss": 0.0317, "step": 366770 }, { "epoch": 3.918799081147497, "grad_norm": 0.21263481676578522, "learning_rate": 8.050597143478828e-07, "loss": 0.012, "step": 366780 }, { "epoch": 3.918905924461777, "grad_norm": 3.2149741649627686, "learning_rate": 8.050464026959975e-07, "loss": 0.0151, "step": 366790 }, { "epoch": 3.9190127677760564, "grad_norm": 2.2208709716796875, "learning_rate": 8.050330906996919e-07, "loss": 0.0041, "step": 366800 }, { "epoch": 3.919119611090336, "grad_norm": 7.6763596534729, "learning_rate": 8.050197783589807e-07, "loss": 0.0053, "step": 366810 }, { "epoch": 3.9192264544046154, "grad_norm": 0.011915912851691246, "learning_rate": 8.050064656738793e-07, "loss": 0.0037, "step": 366820 }, { "epoch": 3.9193332977188953, "grad_norm": 4.985068321228027, "learning_rate": 8.049931526444025e-07, "loss": 0.0353, "step": 366830 }, { "epoch": 3.9194401410331747, "grad_norm": 0.09546194970607758, "learning_rate": 8.049798392705655e-07, "loss": 0.0237, "step": 366840 }, { "epoch": 3.9195469843474546, "grad_norm": 0.21172375977039337, "learning_rate": 8.049665255523833e-07, "loss": 0.0168, "step": 366850 }, { "epoch": 3.919653827661734, "grad_norm": 3.0142836570739746, "learning_rate": 8.049532114898705e-07, "loss": 0.0407, "step": 366860 }, { "epoch": 3.9197606709760136, "grad_norm": 0.2285703867673874, "learning_rate": 8.049398970830429e-07, "loss": 0.0067, "step": 366870 }, { "epoch": 3.919867514290293, "grad_norm": 3.0130772590637207, "learning_rate": 8.04926582331915e-07, "loss": 0.0301, "step": 366880 }, { "epoch": 3.919974357604573, "grad_norm": 0.011412282474339008, "learning_rate": 8.049132672365019e-07, "loss": 0.0108, "step": 366890 }, { "epoch": 3.9200812009188524, "grad_norm": 0.04243014007806778, "learning_rate": 8.048999517968189e-07, "loss": 0.0156, "step": 366900 }, { "epoch": 3.9201880442331323, "grad_norm": 1.228935956954956, "learning_rate": 8.048866360128808e-07, "loss": 0.0477, "step": 366910 }, { "epoch": 3.9202948875474117, "grad_norm": 0.19971096515655518, "learning_rate": 8.048733198847025e-07, "loss": 0.0193, "step": 366920 }, { "epoch": 3.920401730861691, "grad_norm": 2.4571850299835205, "learning_rate": 8.048600034122994e-07, "loss": 0.0083, "step": 366930 }, { "epoch": 3.920508574175971, "grad_norm": 0.08866903930902481, "learning_rate": 8.048466865956862e-07, "loss": 0.0101, "step": 366940 }, { "epoch": 3.9206154174902506, "grad_norm": 0.7110545635223389, "learning_rate": 8.048333694348781e-07, "loss": 0.0017, "step": 366950 }, { "epoch": 3.92072226080453, "grad_norm": 4.760128974914551, "learning_rate": 8.048200519298902e-07, "loss": 0.0132, "step": 366960 }, { "epoch": 3.92082910411881, "grad_norm": 11.315921783447266, "learning_rate": 8.048067340807375e-07, "loss": 0.0142, "step": 366970 }, { "epoch": 3.9209359474330894, "grad_norm": 1.6133453845977783, "learning_rate": 8.047934158874349e-07, "loss": 0.0124, "step": 366980 }, { "epoch": 3.921042790747369, "grad_norm": 0.024151399731636047, "learning_rate": 8.047800973499976e-07, "loss": 0.0131, "step": 366990 }, { "epoch": 3.9211496340616487, "grad_norm": 0.02026292122900486, "learning_rate": 8.047667784684405e-07, "loss": 0.0288, "step": 367000 }, { "epoch": 3.921256477375928, "grad_norm": 2.610891103744507, "learning_rate": 8.047534592427788e-07, "loss": 0.0316, "step": 367010 }, { "epoch": 3.9213633206902077, "grad_norm": 0.4540436863899231, "learning_rate": 8.047401396730273e-07, "loss": 0.0306, "step": 367020 }, { "epoch": 3.9214701640044876, "grad_norm": 0.09451665729284286, "learning_rate": 8.047268197592012e-07, "loss": 0.0014, "step": 367030 }, { "epoch": 3.921577007318767, "grad_norm": 2.1956193447113037, "learning_rate": 8.047134995013157e-07, "loss": 0.0121, "step": 367040 }, { "epoch": 3.9216838506330465, "grad_norm": 0.0059624481946229935, "learning_rate": 8.047001788993854e-07, "loss": 0.0065, "step": 367050 }, { "epoch": 3.9217906939473264, "grad_norm": 9.436086654663086, "learning_rate": 8.046868579534259e-07, "loss": 0.0278, "step": 367060 }, { "epoch": 3.921897537261606, "grad_norm": 0.8380478620529175, "learning_rate": 8.046735366634516e-07, "loss": 0.0027, "step": 367070 }, { "epoch": 3.9220043805758857, "grad_norm": 3.053201675415039, "learning_rate": 8.046602150294781e-07, "loss": 0.0275, "step": 367080 }, { "epoch": 3.922111223890165, "grad_norm": 2.75835919380188, "learning_rate": 8.046468930515202e-07, "loss": 0.0855, "step": 367090 }, { "epoch": 3.9222180672044447, "grad_norm": 0.023915767669677734, "learning_rate": 8.04633570729593e-07, "loss": 0.0034, "step": 367100 }, { "epoch": 3.922324910518724, "grad_norm": 0.03516475111246109, "learning_rate": 8.046202480637115e-07, "loss": 0.0276, "step": 367110 }, { "epoch": 3.922431753833004, "grad_norm": 0.28157004714012146, "learning_rate": 8.046069250538906e-07, "loss": 0.0005, "step": 367120 }, { "epoch": 3.9225385971472835, "grad_norm": 0.0038042012602090836, "learning_rate": 8.045936017001455e-07, "loss": 0.0467, "step": 367130 }, { "epoch": 3.9226454404615634, "grad_norm": 0.018296269699931145, "learning_rate": 8.045802780024915e-07, "loss": 0.0043, "step": 367140 }, { "epoch": 3.922752283775843, "grad_norm": 0.011707053519785404, "learning_rate": 8.045669539609432e-07, "loss": 0.0061, "step": 367150 }, { "epoch": 3.9228591270901223, "grad_norm": 0.8485327959060669, "learning_rate": 8.045536295755157e-07, "loss": 0.002, "step": 367160 }, { "epoch": 3.9229659704044018, "grad_norm": 0.23408399522304535, "learning_rate": 8.045403048462245e-07, "loss": 0.0042, "step": 367170 }, { "epoch": 3.9230728137186817, "grad_norm": 7.277039051055908, "learning_rate": 8.04526979773084e-07, "loss": 0.0423, "step": 367180 }, { "epoch": 3.923179657032961, "grad_norm": 0.4051826596260071, "learning_rate": 8.045136543561098e-07, "loss": 0.0255, "step": 367190 }, { "epoch": 3.923286500347241, "grad_norm": 9.059348106384277, "learning_rate": 8.045003285953165e-07, "loss": 0.01, "step": 367200 }, { "epoch": 3.9233933436615205, "grad_norm": 3.115290403366089, "learning_rate": 8.044870024907193e-07, "loss": 0.0036, "step": 367210 }, { "epoch": 3.9235001869758, "grad_norm": 0.10021759569644928, "learning_rate": 8.044736760423335e-07, "loss": 0.0018, "step": 367220 }, { "epoch": 3.9236070302900794, "grad_norm": 0.001156335580162704, "learning_rate": 8.044603492501738e-07, "loss": 0.0021, "step": 367230 }, { "epoch": 3.9237138736043593, "grad_norm": 3.583287477493286, "learning_rate": 8.044470221142554e-07, "loss": 0.0063, "step": 367240 }, { "epoch": 3.9238207169186388, "grad_norm": 3.6811532974243164, "learning_rate": 8.044336946345935e-07, "loss": 0.0155, "step": 367250 }, { "epoch": 3.9239275602329187, "grad_norm": 0.7634091377258301, "learning_rate": 8.044203668112029e-07, "loss": 0.0193, "step": 367260 }, { "epoch": 3.924034403547198, "grad_norm": 3.580251693725586, "learning_rate": 8.044070386440987e-07, "loss": 0.0091, "step": 367270 }, { "epoch": 3.9241412468614776, "grad_norm": 0.0057084448635578156, "learning_rate": 8.043937101332958e-07, "loss": 0.0061, "step": 367280 }, { "epoch": 3.924248090175757, "grad_norm": 0.20024971663951874, "learning_rate": 8.043803812788096e-07, "loss": 0.0064, "step": 367290 }, { "epoch": 3.924354933490037, "grad_norm": 0.2897290587425232, "learning_rate": 8.043670520806549e-07, "loss": 0.013, "step": 367300 }, { "epoch": 3.9244617768043164, "grad_norm": 1.6287423372268677, "learning_rate": 8.04353722538847e-07, "loss": 0.0019, "step": 367310 }, { "epoch": 3.9245686201185963, "grad_norm": 0.005565658211708069, "learning_rate": 8.043403926534006e-07, "loss": 0.006, "step": 367320 }, { "epoch": 3.9246754634328758, "grad_norm": 0.009398243390023708, "learning_rate": 8.04327062424331e-07, "loss": 0.0645, "step": 367330 }, { "epoch": 3.9247823067471552, "grad_norm": 4.8916192054748535, "learning_rate": 8.043137318516533e-07, "loss": 0.0323, "step": 367340 }, { "epoch": 3.9248891500614347, "grad_norm": 0.0011511268094182014, "learning_rate": 8.043004009353822e-07, "loss": 0.0099, "step": 367350 }, { "epoch": 3.9249959933757146, "grad_norm": 0.00830491166561842, "learning_rate": 8.042870696755331e-07, "loss": 0.0138, "step": 367360 }, { "epoch": 3.925102836689994, "grad_norm": 0.5625120401382446, "learning_rate": 8.042737380721209e-07, "loss": 0.0009, "step": 367370 }, { "epoch": 3.925209680004274, "grad_norm": 1.7756729125976562, "learning_rate": 8.042604061251608e-07, "loss": 0.0074, "step": 367380 }, { "epoch": 3.9253165233185534, "grad_norm": 0.015198753215372562, "learning_rate": 8.042470738346676e-07, "loss": 0.0227, "step": 367390 }, { "epoch": 3.925423366632833, "grad_norm": 13.1239595413208, "learning_rate": 8.042337412006566e-07, "loss": 0.0154, "step": 367400 }, { "epoch": 3.9255302099471123, "grad_norm": 0.003695267951115966, "learning_rate": 8.042204082231426e-07, "loss": 0.0068, "step": 367410 }, { "epoch": 3.9256370532613922, "grad_norm": 2.241060733795166, "learning_rate": 8.042070749021408e-07, "loss": 0.0072, "step": 367420 }, { "epoch": 3.9257438965756717, "grad_norm": 4.539845943450928, "learning_rate": 8.041937412376665e-07, "loss": 0.0171, "step": 367430 }, { "epoch": 3.9258507398899516, "grad_norm": 6.169908046722412, "learning_rate": 8.041804072297342e-07, "loss": 0.0421, "step": 367440 }, { "epoch": 3.925957583204231, "grad_norm": 3.4086997509002686, "learning_rate": 8.041670728783593e-07, "loss": 0.0214, "step": 367450 }, { "epoch": 3.9260644265185105, "grad_norm": 0.0027789270970970392, "learning_rate": 8.04153738183557e-07, "loss": 0.0081, "step": 367460 }, { "epoch": 3.92617126983279, "grad_norm": 0.015286588110029697, "learning_rate": 8.04140403145342e-07, "loss": 0.0257, "step": 367470 }, { "epoch": 3.92627811314707, "grad_norm": 0.2706112265586853, "learning_rate": 8.041270677637298e-07, "loss": 0.0012, "step": 367480 }, { "epoch": 3.9263849564613493, "grad_norm": 1.3263124227523804, "learning_rate": 8.041137320387349e-07, "loss": 0.0073, "step": 367490 }, { "epoch": 3.9264917997756292, "grad_norm": 0.3152675926685333, "learning_rate": 8.041003959703727e-07, "loss": 0.015, "step": 367500 }, { "epoch": 3.9265986430899087, "grad_norm": 0.17431484162807465, "learning_rate": 8.040870595586584e-07, "loss": 0.0341, "step": 367510 }, { "epoch": 3.926705486404188, "grad_norm": 2.617382049560547, "learning_rate": 8.040737228036067e-07, "loss": 0.0227, "step": 367520 }, { "epoch": 3.9268123297184676, "grad_norm": 7.202911376953125, "learning_rate": 8.040603857052328e-07, "loss": 0.0206, "step": 367530 }, { "epoch": 3.9269191730327475, "grad_norm": 0.08473256230354309, "learning_rate": 8.040470482635518e-07, "loss": 0.0119, "step": 367540 }, { "epoch": 3.927026016347027, "grad_norm": 0.18913425505161285, "learning_rate": 8.040337104785787e-07, "loss": 0.0067, "step": 367550 }, { "epoch": 3.927132859661307, "grad_norm": 1.213364601135254, "learning_rate": 8.040203723503287e-07, "loss": 0.0068, "step": 367560 }, { "epoch": 3.9272397029755863, "grad_norm": 0.2749408185482025, "learning_rate": 8.040070338788167e-07, "loss": 0.002, "step": 367570 }, { "epoch": 3.927346546289866, "grad_norm": 0.1370992362499237, "learning_rate": 8.039936950640578e-07, "loss": 0.0085, "step": 367580 }, { "epoch": 3.9274533896041453, "grad_norm": 0.09996630251407623, "learning_rate": 8.039803559060671e-07, "loss": 0.0109, "step": 367590 }, { "epoch": 3.927560232918425, "grad_norm": 6.456485748291016, "learning_rate": 8.039670164048596e-07, "loss": 0.041, "step": 367600 }, { "epoch": 3.9276670762327046, "grad_norm": 1.7279740571975708, "learning_rate": 8.039536765604503e-07, "loss": 0.006, "step": 367610 }, { "epoch": 3.9277739195469845, "grad_norm": 0.19122828543186188, "learning_rate": 8.039403363728545e-07, "loss": 0.01, "step": 367620 }, { "epoch": 3.927880762861264, "grad_norm": 0.9336403012275696, "learning_rate": 8.039269958420872e-07, "loss": 0.0004, "step": 367630 }, { "epoch": 3.9279876061755434, "grad_norm": 0.008521526120603085, "learning_rate": 8.039136549681632e-07, "loss": 0.001, "step": 367640 }, { "epoch": 3.9280944494898233, "grad_norm": 0.06030132621526718, "learning_rate": 8.039003137510978e-07, "loss": 0.0012, "step": 367650 }, { "epoch": 3.928201292804103, "grad_norm": 0.11842281371355057, "learning_rate": 8.038869721909061e-07, "loss": 0.0096, "step": 367660 }, { "epoch": 3.9283081361183823, "grad_norm": 0.009133713319897652, "learning_rate": 8.03873630287603e-07, "loss": 0.0003, "step": 367670 }, { "epoch": 3.928414979432662, "grad_norm": 0.005411113146692514, "learning_rate": 8.038602880412037e-07, "loss": 0.0008, "step": 367680 }, { "epoch": 3.9285218227469416, "grad_norm": 0.03370329365134239, "learning_rate": 8.03846945451723e-07, "loss": 0.0073, "step": 367690 }, { "epoch": 3.928628666061221, "grad_norm": 0.0014785825042054057, "learning_rate": 8.038336025191763e-07, "loss": 0.0023, "step": 367700 }, { "epoch": 3.928735509375501, "grad_norm": 0.00563901336863637, "learning_rate": 8.038202592435786e-07, "loss": 0.0027, "step": 367710 }, { "epoch": 3.9288423526897804, "grad_norm": 0.015186057426035404, "learning_rate": 8.038069156249447e-07, "loss": 0.008, "step": 367720 }, { "epoch": 3.92894919600406, "grad_norm": 1.4565435647964478, "learning_rate": 8.037935716632902e-07, "loss": 0.0015, "step": 367730 }, { "epoch": 3.92905603931834, "grad_norm": 0.00793039333075285, "learning_rate": 8.037802273586296e-07, "loss": 0.0001, "step": 367740 }, { "epoch": 3.9291628826326193, "grad_norm": 0.00959466490894556, "learning_rate": 8.037668827109781e-07, "loss": 0.0197, "step": 367750 }, { "epoch": 3.9292697259468987, "grad_norm": 2.4944825172424316, "learning_rate": 8.037535377203511e-07, "loss": 0.02, "step": 367760 }, { "epoch": 3.9293765692611786, "grad_norm": 1.1073356866836548, "learning_rate": 8.037401923867633e-07, "loss": 0.0027, "step": 367770 }, { "epoch": 3.929483412575458, "grad_norm": 6.451596736907959, "learning_rate": 8.037268467102298e-07, "loss": 0.016, "step": 367780 }, { "epoch": 3.9295902558897375, "grad_norm": 0.10339625924825668, "learning_rate": 8.03713500690766e-07, "loss": 0.0032, "step": 367790 }, { "epoch": 3.9296970992040174, "grad_norm": 0.15285827219486237, "learning_rate": 8.037001543283866e-07, "loss": 0.0045, "step": 367800 }, { "epoch": 3.929803942518297, "grad_norm": 0.15492059290409088, "learning_rate": 8.036868076231068e-07, "loss": 0.0047, "step": 367810 }, { "epoch": 3.9299107858325764, "grad_norm": 0.05127042904496193, "learning_rate": 8.036734605749418e-07, "loss": 0.0021, "step": 367820 }, { "epoch": 3.9300176291468563, "grad_norm": 0.1877875179052353, "learning_rate": 8.036601131839065e-07, "loss": 0.0196, "step": 367830 }, { "epoch": 3.9301244724611357, "grad_norm": 1.039036750793457, "learning_rate": 8.036467654500158e-07, "loss": 0.0036, "step": 367840 }, { "epoch": 3.9302313157754156, "grad_norm": 0.0035731259267777205, "learning_rate": 8.036334173732852e-07, "loss": 0.0008, "step": 367850 }, { "epoch": 3.930338159089695, "grad_norm": 0.0621492825448513, "learning_rate": 8.036200689537293e-07, "loss": 0.0077, "step": 367860 }, { "epoch": 3.9304450024039745, "grad_norm": 5.03513765335083, "learning_rate": 8.036067201913637e-07, "loss": 0.0169, "step": 367870 }, { "epoch": 3.930551845718254, "grad_norm": 0.9471527934074402, "learning_rate": 8.035933710862033e-07, "loss": 0.0291, "step": 367880 }, { "epoch": 3.930658689032534, "grad_norm": 5.035923480987549, "learning_rate": 8.035800216382628e-07, "loss": 0.006, "step": 367890 }, { "epoch": 3.9307655323468134, "grad_norm": 0.46379518508911133, "learning_rate": 8.035666718475575e-07, "loss": 0.0133, "step": 367900 }, { "epoch": 3.9308723756610933, "grad_norm": 0.008634548634290695, "learning_rate": 8.035533217141029e-07, "loss": 0.0038, "step": 367910 }, { "epoch": 3.9309792189753727, "grad_norm": 0.08519498258829117, "learning_rate": 8.035399712379133e-07, "loss": 0.0088, "step": 367920 }, { "epoch": 3.931086062289652, "grad_norm": 0.0748976320028305, "learning_rate": 8.035266204190043e-07, "loss": 0.0089, "step": 367930 }, { "epoch": 3.9311929056039316, "grad_norm": 0.028857694938778877, "learning_rate": 8.035132692573909e-07, "loss": 0.0289, "step": 367940 }, { "epoch": 3.9312997489182115, "grad_norm": 3.5957415103912354, "learning_rate": 8.034999177530879e-07, "loss": 0.0058, "step": 367950 }, { "epoch": 3.931406592232491, "grad_norm": 2.348292589187622, "learning_rate": 8.034865659061108e-07, "loss": 0.0256, "step": 367960 }, { "epoch": 3.931513435546771, "grad_norm": 0.07852475345134735, "learning_rate": 8.034732137164745e-07, "loss": 0.0516, "step": 367970 }, { "epoch": 3.9316202788610504, "grad_norm": 5.660548686981201, "learning_rate": 8.034598611841939e-07, "loss": 0.0177, "step": 367980 }, { "epoch": 3.93172712217533, "grad_norm": 5.605547904968262, "learning_rate": 8.034465083092843e-07, "loss": 0.0177, "step": 367990 }, { "epoch": 3.9318339654896093, "grad_norm": 0.001303846132941544, "learning_rate": 8.034331550917606e-07, "loss": 0.0126, "step": 368000 }, { "epoch": 3.931940808803889, "grad_norm": 0.007409779354929924, "learning_rate": 8.034198015316381e-07, "loss": 0.0119, "step": 368010 }, { "epoch": 3.9320476521181686, "grad_norm": 0.004815347027033567, "learning_rate": 8.034064476289315e-07, "loss": 0.042, "step": 368020 }, { "epoch": 3.9321544954324485, "grad_norm": 0.030533084645867348, "learning_rate": 8.033930933836563e-07, "loss": 0.015, "step": 368030 }, { "epoch": 3.932261338746728, "grad_norm": 0.1253965198993683, "learning_rate": 8.033797387958275e-07, "loss": 0.0074, "step": 368040 }, { "epoch": 3.9323681820610075, "grad_norm": 0.5013329982757568, "learning_rate": 8.033663838654599e-07, "loss": 0.0207, "step": 368050 }, { "epoch": 3.932475025375287, "grad_norm": 3.0387678146362305, "learning_rate": 8.033530285925687e-07, "loss": 0.0096, "step": 368060 }, { "epoch": 3.932581868689567, "grad_norm": 2.6913797855377197, "learning_rate": 8.033396729771691e-07, "loss": 0.012, "step": 368070 }, { "epoch": 3.9326887120038463, "grad_norm": 8.320752143859863, "learning_rate": 8.033263170192762e-07, "loss": 0.0247, "step": 368080 }, { "epoch": 3.932795555318126, "grad_norm": 0.001634744112379849, "learning_rate": 8.033129607189048e-07, "loss": 0.0049, "step": 368090 }, { "epoch": 3.9329023986324056, "grad_norm": 0.6208326816558838, "learning_rate": 8.032996040760705e-07, "loss": 0.0162, "step": 368100 }, { "epoch": 3.933009241946685, "grad_norm": 2.3144280910491943, "learning_rate": 8.032862470907879e-07, "loss": 0.0055, "step": 368110 }, { "epoch": 3.9331160852609646, "grad_norm": 0.05353374034166336, "learning_rate": 8.032728897630722e-07, "loss": 0.0112, "step": 368120 }, { "epoch": 3.9332229285752445, "grad_norm": 18.848453521728516, "learning_rate": 8.032595320929385e-07, "loss": 0.0456, "step": 368130 }, { "epoch": 3.933329771889524, "grad_norm": 0.24894949793815613, "learning_rate": 8.032461740804019e-07, "loss": 0.0023, "step": 368140 }, { "epoch": 3.933436615203804, "grad_norm": 0.01390937902033329, "learning_rate": 8.032328157254776e-07, "loss": 0.0199, "step": 368150 }, { "epoch": 3.9335434585180833, "grad_norm": 0.004462182521820068, "learning_rate": 8.032194570281805e-07, "loss": 0.0022, "step": 368160 }, { "epoch": 3.9336503018323628, "grad_norm": 0.05026274919509888, "learning_rate": 8.032060979885258e-07, "loss": 0.0048, "step": 368170 }, { "epoch": 3.933757145146642, "grad_norm": 2.848635673522949, "learning_rate": 8.031927386065284e-07, "loss": 0.0188, "step": 368180 }, { "epoch": 3.933863988460922, "grad_norm": 0.14824825525283813, "learning_rate": 8.031793788822037e-07, "loss": 0.009, "step": 368190 }, { "epoch": 3.9339708317752016, "grad_norm": 8.275294303894043, "learning_rate": 8.031660188155665e-07, "loss": 0.0097, "step": 368200 }, { "epoch": 3.9340776750894815, "grad_norm": 0.022502290084958076, "learning_rate": 8.031526584066319e-07, "loss": 0.0017, "step": 368210 }, { "epoch": 3.934184518403761, "grad_norm": 5.05816650390625, "learning_rate": 8.031392976554151e-07, "loss": 0.0132, "step": 368220 }, { "epoch": 3.9342913617180404, "grad_norm": 6.37969970703125, "learning_rate": 8.031259365619311e-07, "loss": 0.0822, "step": 368230 }, { "epoch": 3.93439820503232, "grad_norm": 0.015529409982264042, "learning_rate": 8.031125751261952e-07, "loss": 0.0032, "step": 368240 }, { "epoch": 3.9345050483465998, "grad_norm": 0.10141132771968842, "learning_rate": 8.030992133482223e-07, "loss": 0.0182, "step": 368250 }, { "epoch": 3.934611891660879, "grad_norm": 0.023653587326407433, "learning_rate": 8.030858512280274e-07, "loss": 0.0183, "step": 368260 }, { "epoch": 3.934718734975159, "grad_norm": 0.11612729728221893, "learning_rate": 8.030724887656257e-07, "loss": 0.0083, "step": 368270 }, { "epoch": 3.9348255782894386, "grad_norm": 0.0013076600152999163, "learning_rate": 8.030591259610324e-07, "loss": 0.004, "step": 368280 }, { "epoch": 3.934932421603718, "grad_norm": 0.04718434065580368, "learning_rate": 8.030457628142624e-07, "loss": 0.0171, "step": 368290 }, { "epoch": 3.9350392649179975, "grad_norm": 0.4146958589553833, "learning_rate": 8.030323993253309e-07, "loss": 0.0042, "step": 368300 }, { "epoch": 3.9351461082322774, "grad_norm": 11.586915016174316, "learning_rate": 8.03019035494253e-07, "loss": 0.021, "step": 368310 }, { "epoch": 3.935252951546557, "grad_norm": 0.47052982449531555, "learning_rate": 8.030056713210436e-07, "loss": 0.006, "step": 368320 }, { "epoch": 3.9353597948608368, "grad_norm": 26.1198787689209, "learning_rate": 8.029923068057181e-07, "loss": 0.004, "step": 368330 }, { "epoch": 3.935466638175116, "grad_norm": 0.002150685293599963, "learning_rate": 8.029789419482913e-07, "loss": 0.0006, "step": 368340 }, { "epoch": 3.9355734814893957, "grad_norm": 0.3210967481136322, "learning_rate": 8.029655767487784e-07, "loss": 0.0035, "step": 368350 }, { "epoch": 3.935680324803675, "grad_norm": 0.004873065743595362, "learning_rate": 8.029522112071946e-07, "loss": 0.0007, "step": 368360 }, { "epoch": 3.935787168117955, "grad_norm": 6.86335563659668, "learning_rate": 8.029388453235548e-07, "loss": 0.0078, "step": 368370 }, { "epoch": 3.9358940114322345, "grad_norm": 0.005305863451212645, "learning_rate": 8.029254790978741e-07, "loss": 0.0076, "step": 368380 }, { "epoch": 3.9360008547465144, "grad_norm": 0.018882473930716515, "learning_rate": 8.029121125301678e-07, "loss": 0.0193, "step": 368390 }, { "epoch": 3.936107698060794, "grad_norm": 0.0010663216235116124, "learning_rate": 8.028987456204509e-07, "loss": 0.0063, "step": 368400 }, { "epoch": 3.9362145413750733, "grad_norm": 1.8904145956039429, "learning_rate": 8.028853783687384e-07, "loss": 0.0073, "step": 368410 }, { "epoch": 3.936321384689353, "grad_norm": 0.013183023780584335, "learning_rate": 8.028720107750454e-07, "loss": 0.0012, "step": 368420 }, { "epoch": 3.9364282280036327, "grad_norm": 0.02809777483344078, "learning_rate": 8.028586428393871e-07, "loss": 0.0172, "step": 368430 }, { "epoch": 3.936535071317912, "grad_norm": 0.13443975150585175, "learning_rate": 8.028452745617784e-07, "loss": 0.0082, "step": 368440 }, { "epoch": 3.936641914632192, "grad_norm": 2.938194990158081, "learning_rate": 8.028319059422348e-07, "loss": 0.0029, "step": 368450 }, { "epoch": 3.9367487579464715, "grad_norm": 0.05982891470193863, "learning_rate": 8.028185369807708e-07, "loss": 0.0156, "step": 368460 }, { "epoch": 3.936855601260751, "grad_norm": 7.118079662322998, "learning_rate": 8.028051676774021e-07, "loss": 0.0449, "step": 368470 }, { "epoch": 3.936962444575031, "grad_norm": 1.5646926164627075, "learning_rate": 8.027917980321434e-07, "loss": 0.0041, "step": 368480 }, { "epoch": 3.9370692878893103, "grad_norm": 0.001202792627736926, "learning_rate": 8.027784280450098e-07, "loss": 0.0077, "step": 368490 }, { "epoch": 3.93717613120359, "grad_norm": 0.006672783754765987, "learning_rate": 8.027650577160167e-07, "loss": 0.0053, "step": 368500 }, { "epoch": 3.9372829745178697, "grad_norm": 0.06844733655452728, "learning_rate": 8.027516870451788e-07, "loss": 0.0088, "step": 368510 }, { "epoch": 3.937389817832149, "grad_norm": 2.907597780227661, "learning_rate": 8.027383160325115e-07, "loss": 0.011, "step": 368520 }, { "epoch": 3.9374966611464286, "grad_norm": 0.47953593730926514, "learning_rate": 8.027249446780298e-07, "loss": 0.0105, "step": 368530 }, { "epoch": 3.9376035044607085, "grad_norm": 2.4479990005493164, "learning_rate": 8.027115729817489e-07, "loss": 0.0069, "step": 368540 }, { "epoch": 3.937710347774988, "grad_norm": 0.01668553054332733, "learning_rate": 8.026982009436835e-07, "loss": 0.0237, "step": 368550 }, { "epoch": 3.937817191089268, "grad_norm": 3.133794069290161, "learning_rate": 8.026848285638493e-07, "loss": 0.0068, "step": 368560 }, { "epoch": 3.9379240344035473, "grad_norm": 0.1551111489534378, "learning_rate": 8.026714558422609e-07, "loss": 0.0174, "step": 368570 }, { "epoch": 3.938030877717827, "grad_norm": 0.8857914209365845, "learning_rate": 8.026580827789336e-07, "loss": 0.0221, "step": 368580 }, { "epoch": 3.9381377210321062, "grad_norm": 0.002168902661651373, "learning_rate": 8.026447093738824e-07, "loss": 0.0055, "step": 368590 }, { "epoch": 3.938244564346386, "grad_norm": 0.4872230887413025, "learning_rate": 8.026313356271226e-07, "loss": 0.0067, "step": 368600 }, { "epoch": 3.9383514076606656, "grad_norm": 21.19285011291504, "learning_rate": 8.026179615386692e-07, "loss": 0.0618, "step": 368610 }, { "epoch": 3.9384582509749455, "grad_norm": 0.0012050285004079342, "learning_rate": 8.026045871085372e-07, "loss": 0.016, "step": 368620 }, { "epoch": 3.938565094289225, "grad_norm": 0.005024537909775972, "learning_rate": 8.025912123367418e-07, "loss": 0.0044, "step": 368630 }, { "epoch": 3.9386719376035044, "grad_norm": 0.20999492704868317, "learning_rate": 8.025778372232982e-07, "loss": 0.0016, "step": 368640 }, { "epoch": 3.938778780917784, "grad_norm": 0.005662747658789158, "learning_rate": 8.025644617682213e-07, "loss": 0.0045, "step": 368650 }, { "epoch": 3.938885624232064, "grad_norm": 3.5644102096557617, "learning_rate": 8.025510859715262e-07, "loss": 0.0099, "step": 368660 }, { "epoch": 3.9389924675463432, "grad_norm": 0.033809464424848557, "learning_rate": 8.025377098332283e-07, "loss": 0.01, "step": 368670 }, { "epoch": 3.939099310860623, "grad_norm": 0.40506711602211, "learning_rate": 8.025243333533422e-07, "loss": 0.005, "step": 368680 }, { "epoch": 3.9392061541749026, "grad_norm": 3.283790111541748, "learning_rate": 8.025109565318835e-07, "loss": 0.049, "step": 368690 }, { "epoch": 3.939312997489182, "grad_norm": 0.038562022149562836, "learning_rate": 8.024975793688671e-07, "loss": 0.005, "step": 368700 }, { "epoch": 3.9394198408034615, "grad_norm": 0.01883433759212494, "learning_rate": 8.024842018643081e-07, "loss": 0.0238, "step": 368710 }, { "epoch": 3.9395266841177414, "grad_norm": 2.217885971069336, "learning_rate": 8.024708240182215e-07, "loss": 0.014, "step": 368720 }, { "epoch": 3.939633527432021, "grad_norm": 0.016199098899960518, "learning_rate": 8.024574458306226e-07, "loss": 0.0374, "step": 368730 }, { "epoch": 3.939740370746301, "grad_norm": 0.1226370632648468, "learning_rate": 8.024440673015265e-07, "loss": 0.0078, "step": 368740 }, { "epoch": 3.9398472140605802, "grad_norm": 2.8945019245147705, "learning_rate": 8.02430688430948e-07, "loss": 0.0166, "step": 368750 }, { "epoch": 3.9399540573748597, "grad_norm": 0.07064501941204071, "learning_rate": 8.024173092189026e-07, "loss": 0.0104, "step": 368760 }, { "epoch": 3.940060900689139, "grad_norm": 0.085573710501194, "learning_rate": 8.024039296654052e-07, "loss": 0.0208, "step": 368770 }, { "epoch": 3.940167744003419, "grad_norm": 2.9404149055480957, "learning_rate": 8.023905497704709e-07, "loss": 0.0151, "step": 368780 }, { "epoch": 3.9402745873176985, "grad_norm": 0.1598484218120575, "learning_rate": 8.023771695341149e-07, "loss": 0.0018, "step": 368790 }, { "epoch": 3.9403814306319784, "grad_norm": 0.004188793245702982, "learning_rate": 8.023637889563522e-07, "loss": 0.0126, "step": 368800 }, { "epoch": 3.940488273946258, "grad_norm": 0.06112494319677353, "learning_rate": 8.023504080371979e-07, "loss": 0.019, "step": 368810 }, { "epoch": 3.9405951172605374, "grad_norm": 0.005842843558639288, "learning_rate": 8.023370267766673e-07, "loss": 0.0018, "step": 368820 }, { "epoch": 3.940701960574817, "grad_norm": 0.013655022718012333, "learning_rate": 8.023236451747752e-07, "loss": 0.0074, "step": 368830 }, { "epoch": 3.9408088038890967, "grad_norm": 0.2328108847141266, "learning_rate": 8.023102632315371e-07, "loss": 0.0228, "step": 368840 }, { "epoch": 3.940915647203376, "grad_norm": 0.3620436191558838, "learning_rate": 8.022968809469678e-07, "loss": 0.0123, "step": 368850 }, { "epoch": 3.941022490517656, "grad_norm": 0.03825219348073006, "learning_rate": 8.022834983210824e-07, "loss": 0.0029, "step": 368860 }, { "epoch": 3.9411293338319355, "grad_norm": 3.985262632369995, "learning_rate": 8.022701153538963e-07, "loss": 0.002, "step": 368870 }, { "epoch": 3.941236177146215, "grad_norm": 0.06935595721006393, "learning_rate": 8.022567320454244e-07, "loss": 0.0172, "step": 368880 }, { "epoch": 3.9413430204604945, "grad_norm": 0.005600019823759794, "learning_rate": 8.022433483956817e-07, "loss": 0.0164, "step": 368890 }, { "epoch": 3.9414498637747744, "grad_norm": 6.703047752380371, "learning_rate": 8.022299644046836e-07, "loss": 0.007, "step": 368900 }, { "epoch": 3.941556707089054, "grad_norm": 6.5615692138671875, "learning_rate": 8.022165800724451e-07, "loss": 0.0052, "step": 368910 }, { "epoch": 3.9416635504033337, "grad_norm": 0.01802409067749977, "learning_rate": 8.022031953989811e-07, "loss": 0.0023, "step": 368920 }, { "epoch": 3.941770393717613, "grad_norm": 0.20334559679031372, "learning_rate": 8.02189810384307e-07, "loss": 0.0086, "step": 368930 }, { "epoch": 3.9418772370318926, "grad_norm": 5.606261730194092, "learning_rate": 8.021764250284377e-07, "loss": 0.0268, "step": 368940 }, { "epoch": 3.941984080346172, "grad_norm": 11.013589859008789, "learning_rate": 8.021630393313885e-07, "loss": 0.0154, "step": 368950 }, { "epoch": 3.942090923660452, "grad_norm": 1.429214596748352, "learning_rate": 8.021496532931744e-07, "loss": 0.0259, "step": 368960 }, { "epoch": 3.9421977669747315, "grad_norm": 0.9005364775657654, "learning_rate": 8.021362669138106e-07, "loss": 0.0238, "step": 368970 }, { "epoch": 3.9423046102890114, "grad_norm": 0.0033486236352473497, "learning_rate": 8.02122880193312e-07, "loss": 0.0088, "step": 368980 }, { "epoch": 3.942411453603291, "grad_norm": 0.006359434686601162, "learning_rate": 8.02109493131694e-07, "loss": 0.0085, "step": 368990 }, { "epoch": 3.9425182969175703, "grad_norm": 0.02653493918478489, "learning_rate": 8.020961057289715e-07, "loss": 0.0072, "step": 369000 }, { "epoch": 3.9426251402318497, "grad_norm": 0.31420549750328064, "learning_rate": 8.020827179851597e-07, "loss": 0.0085, "step": 369010 }, { "epoch": 3.9427319835461296, "grad_norm": 0.7032207250595093, "learning_rate": 8.020693299002737e-07, "loss": 0.0606, "step": 369020 }, { "epoch": 3.942838826860409, "grad_norm": 1.893609881401062, "learning_rate": 8.020559414743287e-07, "loss": 0.0092, "step": 369030 }, { "epoch": 3.942945670174689, "grad_norm": 0.07354115694761276, "learning_rate": 8.020425527073396e-07, "loss": 0.0092, "step": 369040 }, { "epoch": 3.9430525134889685, "grad_norm": 4.540886878967285, "learning_rate": 8.020291635993219e-07, "loss": 0.013, "step": 369050 }, { "epoch": 3.943159356803248, "grad_norm": 0.2344391942024231, "learning_rate": 8.020157741502902e-07, "loss": 0.0057, "step": 369060 }, { "epoch": 3.9432662001175274, "grad_norm": 0.48384973406791687, "learning_rate": 8.020023843602601e-07, "loss": 0.0537, "step": 369070 }, { "epoch": 3.9433730434318073, "grad_norm": 0.007127587217837572, "learning_rate": 8.019889942292466e-07, "loss": 0.0132, "step": 369080 }, { "epoch": 3.9434798867460867, "grad_norm": 0.030605997890233994, "learning_rate": 8.019756037572645e-07, "loss": 0.0081, "step": 369090 }, { "epoch": 3.9435867300603666, "grad_norm": 0.4231431186199188, "learning_rate": 8.019622129443293e-07, "loss": 0.001, "step": 369100 }, { "epoch": 3.943693573374646, "grad_norm": 9.717931747436523, "learning_rate": 8.019488217904559e-07, "loss": 0.0315, "step": 369110 }, { "epoch": 3.9438004166889256, "grad_norm": 1.7999317646026611, "learning_rate": 8.019354302956595e-07, "loss": 0.0075, "step": 369120 }, { "epoch": 3.9439072600032055, "grad_norm": 0.13803574442863464, "learning_rate": 8.019220384599552e-07, "loss": 0.0053, "step": 369130 }, { "epoch": 3.944014103317485, "grad_norm": 4.407440185546875, "learning_rate": 8.019086462833582e-07, "loss": 0.0106, "step": 369140 }, { "epoch": 3.9441209466317644, "grad_norm": 0.005768971052020788, "learning_rate": 8.018952537658834e-07, "loss": 0.0017, "step": 369150 }, { "epoch": 3.9442277899460443, "grad_norm": 0.10311789810657501, "learning_rate": 8.018818609075462e-07, "loss": 0.0034, "step": 369160 }, { "epoch": 3.9443346332603237, "grad_norm": 0.05949645861983299, "learning_rate": 8.018684677083615e-07, "loss": 0.0114, "step": 369170 }, { "epoch": 3.944441476574603, "grad_norm": 1.8293869495391846, "learning_rate": 8.018550741683446e-07, "loss": 0.0373, "step": 369180 }, { "epoch": 3.944548319888883, "grad_norm": 0.46771642565727234, "learning_rate": 8.018416802875105e-07, "loss": 0.018, "step": 369190 }, { "epoch": 3.9446551632031626, "grad_norm": 0.01605970971286297, "learning_rate": 8.018282860658745e-07, "loss": 0.0037, "step": 369200 }, { "epoch": 3.944762006517442, "grad_norm": 0.07872334122657776, "learning_rate": 8.018148915034515e-07, "loss": 0.0054, "step": 369210 }, { "epoch": 3.944868849831722, "grad_norm": 7.550278186798096, "learning_rate": 8.018014966002567e-07, "loss": 0.0095, "step": 369220 }, { "epoch": 3.9449756931460014, "grad_norm": 0.008147668093442917, "learning_rate": 8.017881013563051e-07, "loss": 0.0075, "step": 369230 }, { "epoch": 3.945082536460281, "grad_norm": 0.002376575255766511, "learning_rate": 8.017747057716121e-07, "loss": 0.0203, "step": 369240 }, { "epoch": 3.9451893797745607, "grad_norm": 0.0032139297109097242, "learning_rate": 8.017613098461927e-07, "loss": 0.01, "step": 369250 }, { "epoch": 3.94529622308884, "grad_norm": 0.6573325991630554, "learning_rate": 8.017479135800619e-07, "loss": 0.0007, "step": 369260 }, { "epoch": 3.9454030664031197, "grad_norm": 2.1831841468811035, "learning_rate": 8.01734516973235e-07, "loss": 0.0487, "step": 369270 }, { "epoch": 3.9455099097173996, "grad_norm": 1.2670564651489258, "learning_rate": 8.017211200257271e-07, "loss": 0.0501, "step": 369280 }, { "epoch": 3.945616753031679, "grad_norm": 0.02716490440070629, "learning_rate": 8.017077227375531e-07, "loss": 0.0033, "step": 369290 }, { "epoch": 3.9457235963459585, "grad_norm": 0.09330707788467407, "learning_rate": 8.016943251087286e-07, "loss": 0.0366, "step": 369300 }, { "epoch": 3.9458304396602384, "grad_norm": 0.02509772963821888, "learning_rate": 8.016809271392681e-07, "loss": 0.0003, "step": 369310 }, { "epoch": 3.945937282974518, "grad_norm": 2.0310652256011963, "learning_rate": 8.016675288291874e-07, "loss": 0.005, "step": 369320 }, { "epoch": 3.9460441262887977, "grad_norm": 0.0076598855666816235, "learning_rate": 8.016541301785011e-07, "loss": 0.006, "step": 369330 }, { "epoch": 3.946150969603077, "grad_norm": 0.48473167419433594, "learning_rate": 8.016407311872246e-07, "loss": 0.0328, "step": 369340 }, { "epoch": 3.9462578129173567, "grad_norm": 2.5117809772491455, "learning_rate": 8.016273318553727e-07, "loss": 0.014, "step": 369350 }, { "epoch": 3.946364656231636, "grad_norm": 2.2698380947113037, "learning_rate": 8.016139321829611e-07, "loss": 0.0243, "step": 369360 }, { "epoch": 3.946471499545916, "grad_norm": 1.5367772579193115, "learning_rate": 8.016005321700046e-07, "loss": 0.0085, "step": 369370 }, { "epoch": 3.9465783428601955, "grad_norm": 1.7204840183258057, "learning_rate": 8.015871318165182e-07, "loss": 0.0103, "step": 369380 }, { "epoch": 3.9466851861744754, "grad_norm": 0.060758452862501144, "learning_rate": 8.015737311225172e-07, "loss": 0.0106, "step": 369390 }, { "epoch": 3.946792029488755, "grad_norm": 0.003520701080560684, "learning_rate": 8.015603300880166e-07, "loss": 0.0114, "step": 369400 }, { "epoch": 3.9468988728030343, "grad_norm": 0.018438350409269333, "learning_rate": 8.015469287130318e-07, "loss": 0.0054, "step": 369410 }, { "epoch": 3.9470057161173138, "grad_norm": 0.12677881121635437, "learning_rate": 8.015335269975776e-07, "loss": 0.0258, "step": 369420 }, { "epoch": 3.9471125594315937, "grad_norm": 0.05938844382762909, "learning_rate": 8.015201249416694e-07, "loss": 0.0256, "step": 369430 }, { "epoch": 3.947219402745873, "grad_norm": 0.05730744078755379, "learning_rate": 8.015067225453222e-07, "loss": 0.0032, "step": 369440 }, { "epoch": 3.947326246060153, "grad_norm": 0.16404280066490173, "learning_rate": 8.014933198085512e-07, "loss": 0.0141, "step": 369450 }, { "epoch": 3.9474330893744325, "grad_norm": 2.1003994941711426, "learning_rate": 8.014799167313713e-07, "loss": 0.0041, "step": 369460 }, { "epoch": 3.947539932688712, "grad_norm": 2.029780626296997, "learning_rate": 8.01466513313798e-07, "loss": 0.0178, "step": 369470 }, { "epoch": 3.9476467760029914, "grad_norm": 2.12260365486145, "learning_rate": 8.014531095558462e-07, "loss": 0.0203, "step": 369480 }, { "epoch": 3.9477536193172713, "grad_norm": 7.813527584075928, "learning_rate": 8.01439705457531e-07, "loss": 0.0218, "step": 369490 }, { "epoch": 3.9478604626315508, "grad_norm": 3.3354647159576416, "learning_rate": 8.014263010188677e-07, "loss": 0.0364, "step": 369500 }, { "epoch": 3.9479673059458307, "grad_norm": 1.0060667991638184, "learning_rate": 8.014128962398714e-07, "loss": 0.0025, "step": 369510 }, { "epoch": 3.94807414926011, "grad_norm": 0.05307864397764206, "learning_rate": 8.013994911205572e-07, "loss": 0.0567, "step": 369520 }, { "epoch": 3.9481809925743896, "grad_norm": 3.5039193630218506, "learning_rate": 8.013860856609402e-07, "loss": 0.0502, "step": 369530 }, { "epoch": 3.948287835888669, "grad_norm": 0.06901229918003082, "learning_rate": 8.013726798610356e-07, "loss": 0.0068, "step": 369540 }, { "epoch": 3.948394679202949, "grad_norm": 2.914647340774536, "learning_rate": 8.013592737208583e-07, "loss": 0.0123, "step": 369550 }, { "epoch": 3.9485015225172284, "grad_norm": 0.03664391487836838, "learning_rate": 8.013458672404238e-07, "loss": 0.0113, "step": 369560 }, { "epoch": 3.9486083658315083, "grad_norm": 0.06940186768770218, "learning_rate": 8.013324604197471e-07, "loss": 0.0344, "step": 369570 }, { "epoch": 3.9487152091457878, "grad_norm": 0.013422785326838493, "learning_rate": 8.013190532588434e-07, "loss": 0.0621, "step": 369580 }, { "epoch": 3.9488220524600672, "grad_norm": 0.006803514901548624, "learning_rate": 8.013056457577275e-07, "loss": 0.001, "step": 369590 }, { "epoch": 3.9489288957743467, "grad_norm": 2.542470932006836, "learning_rate": 8.012922379164149e-07, "loss": 0.0094, "step": 369600 }, { "epoch": 3.9490357390886266, "grad_norm": 1.0879062414169312, "learning_rate": 8.012788297349206e-07, "loss": 0.0018, "step": 369610 }, { "epoch": 3.949142582402906, "grad_norm": 3.702528715133667, "learning_rate": 8.012654212132598e-07, "loss": 0.015, "step": 369620 }, { "epoch": 3.949249425717186, "grad_norm": 0.01306223589926958, "learning_rate": 8.012520123514476e-07, "loss": 0.0056, "step": 369630 }, { "epoch": 3.9493562690314654, "grad_norm": 2.1300652027130127, "learning_rate": 8.012386031494991e-07, "loss": 0.0026, "step": 369640 }, { "epoch": 3.949463112345745, "grad_norm": 0.003782801330089569, "learning_rate": 8.012251936074295e-07, "loss": 0.0411, "step": 369650 }, { "epoch": 3.9495699556600243, "grad_norm": 6.355005741119385, "learning_rate": 8.012117837252539e-07, "loss": 0.0174, "step": 369660 }, { "epoch": 3.9496767989743042, "grad_norm": 5.261641979217529, "learning_rate": 8.011983735029876e-07, "loss": 0.0079, "step": 369670 }, { "epoch": 3.9497836422885837, "grad_norm": 0.13448067009449005, "learning_rate": 8.011849629406456e-07, "loss": 0.013, "step": 369680 }, { "epoch": 3.9498904856028636, "grad_norm": 2.016493797302246, "learning_rate": 8.011715520382427e-07, "loss": 0.0214, "step": 369690 }, { "epoch": 3.949997328917143, "grad_norm": 13.006114959716797, "learning_rate": 8.011581407957948e-07, "loss": 0.0223, "step": 369700 }, { "epoch": 3.9501041722314225, "grad_norm": 1.0340588092803955, "learning_rate": 8.011447292133164e-07, "loss": 0.005, "step": 369710 }, { "epoch": 3.950211015545702, "grad_norm": 1.0095758438110352, "learning_rate": 8.01131317290823e-07, "loss": 0.0069, "step": 369720 }, { "epoch": 3.950317858859982, "grad_norm": 0.9866602420806885, "learning_rate": 8.011179050283294e-07, "loss": 0.0006, "step": 369730 }, { "epoch": 3.9504247021742613, "grad_norm": 1.0872918367385864, "learning_rate": 8.011044924258512e-07, "loss": 0.0042, "step": 369740 }, { "epoch": 3.9505315454885412, "grad_norm": 0.5685601234436035, "learning_rate": 8.010910794834032e-07, "loss": 0.0027, "step": 369750 }, { "epoch": 3.9506383888028207, "grad_norm": 0.09697366505861282, "learning_rate": 8.010776662010006e-07, "loss": 0.0233, "step": 369760 }, { "epoch": 3.9507452321171, "grad_norm": 2.475071430206299, "learning_rate": 8.010642525786587e-07, "loss": 0.0219, "step": 369770 }, { "epoch": 3.9508520754313796, "grad_norm": 3.30146861076355, "learning_rate": 8.010508386163926e-07, "loss": 0.0008, "step": 369780 }, { "epoch": 3.9509589187456595, "grad_norm": 4.157149791717529, "learning_rate": 8.010374243142171e-07, "loss": 0.0138, "step": 369790 }, { "epoch": 3.951065762059939, "grad_norm": 0.019581200554966927, "learning_rate": 8.010240096721478e-07, "loss": 0.0204, "step": 369800 }, { "epoch": 3.951172605374219, "grad_norm": 0.17298534512519836, "learning_rate": 8.010105946901997e-07, "loss": 0.0033, "step": 369810 }, { "epoch": 3.9512794486884983, "grad_norm": 0.5430799126625061, "learning_rate": 8.009971793683878e-07, "loss": 0.0134, "step": 369820 }, { "epoch": 3.951386292002778, "grad_norm": 3.2328131198883057, "learning_rate": 8.009837637067275e-07, "loss": 0.0317, "step": 369830 }, { "epoch": 3.9514931353170573, "grad_norm": 0.0013352351961657405, "learning_rate": 8.009703477052339e-07, "loss": 0.0031, "step": 369840 }, { "epoch": 3.951599978631337, "grad_norm": 0.003818976227194071, "learning_rate": 8.009569313639219e-07, "loss": 0.0097, "step": 369850 }, { "epoch": 3.9517068219456166, "grad_norm": 0.020963594317436218, "learning_rate": 8.009435146828067e-07, "loss": 0.0083, "step": 369860 }, { "epoch": 3.9518136652598965, "grad_norm": 0.016611792147159576, "learning_rate": 8.009300976619038e-07, "loss": 0.0092, "step": 369870 }, { "epoch": 3.951920508574176, "grad_norm": 0.0026836912147700787, "learning_rate": 8.009166803012281e-07, "loss": 0.0288, "step": 369880 }, { "epoch": 3.9520273518884554, "grad_norm": 3.9262535572052, "learning_rate": 8.009032626007945e-07, "loss": 0.0092, "step": 369890 }, { "epoch": 3.9521341952027353, "grad_norm": 0.008853182196617126, "learning_rate": 8.008898445606187e-07, "loss": 0.0044, "step": 369900 }, { "epoch": 3.952241038517015, "grad_norm": 0.003477757563814521, "learning_rate": 8.008764261807154e-07, "loss": 0.0105, "step": 369910 }, { "epoch": 3.9523478818312943, "grad_norm": 0.06358689069747925, "learning_rate": 8.008630074610998e-07, "loss": 0.0084, "step": 369920 }, { "epoch": 3.952454725145574, "grad_norm": 0.12007664889097214, "learning_rate": 8.008495884017874e-07, "loss": 0.0044, "step": 369930 }, { "epoch": 3.9525615684598536, "grad_norm": 0.014875768683850765, "learning_rate": 8.008361690027929e-07, "loss": 0.0187, "step": 369940 }, { "epoch": 3.952668411774133, "grad_norm": 0.00658893259242177, "learning_rate": 8.008227492641318e-07, "loss": 0.005, "step": 369950 }, { "epoch": 3.952775255088413, "grad_norm": 0.026344753801822662, "learning_rate": 8.008093291858191e-07, "loss": 0.0209, "step": 369960 }, { "epoch": 3.9528820984026924, "grad_norm": 2.5517141819000244, "learning_rate": 8.0079590876787e-07, "loss": 0.0124, "step": 369970 }, { "epoch": 3.952988941716972, "grad_norm": 7.86947774887085, "learning_rate": 8.007824880102994e-07, "loss": 0.0302, "step": 369980 }, { "epoch": 3.953095785031252, "grad_norm": 0.0038817315362393856, "learning_rate": 8.00769066913123e-07, "loss": 0.0034, "step": 369990 }, { "epoch": 3.9532026283455313, "grad_norm": 0.21516703069210052, "learning_rate": 8.007556454763553e-07, "loss": 0.0067, "step": 370000 }, { "epoch": 3.9533094716598107, "grad_norm": 0.45728549361228943, "learning_rate": 8.00742223700012e-07, "loss": 0.005, "step": 370010 }, { "epoch": 3.9534163149740906, "grad_norm": 0.9482095837593079, "learning_rate": 8.007288015841081e-07, "loss": 0.0065, "step": 370020 }, { "epoch": 3.95352315828837, "grad_norm": 0.012190411798655987, "learning_rate": 8.007153791286583e-07, "loss": 0.0286, "step": 370030 }, { "epoch": 3.95363000160265, "grad_norm": 0.8194060325622559, "learning_rate": 8.007019563336786e-07, "loss": 0.0416, "step": 370040 }, { "epoch": 3.9537368449169294, "grad_norm": 0.0028916355222463608, "learning_rate": 8.006885331991834e-07, "loss": 0.0218, "step": 370050 }, { "epoch": 3.953843688231209, "grad_norm": 0.002200563671067357, "learning_rate": 8.006751097251881e-07, "loss": 0.0043, "step": 370060 }, { "epoch": 3.9539505315454884, "grad_norm": 0.041031017899513245, "learning_rate": 8.006616859117081e-07, "loss": 0.0112, "step": 370070 }, { "epoch": 3.9540573748597683, "grad_norm": 2.2711422443389893, "learning_rate": 8.006482617587584e-07, "loss": 0.004, "step": 370080 }, { "epoch": 3.9541642181740477, "grad_norm": 0.14071431756019592, "learning_rate": 8.006348372663539e-07, "loss": 0.012, "step": 370090 }, { "epoch": 3.9542710614883276, "grad_norm": 2.1160569190979004, "learning_rate": 8.006214124345102e-07, "loss": 0.0107, "step": 370100 }, { "epoch": 3.954377904802607, "grad_norm": 0.514750599861145, "learning_rate": 8.00607987263242e-07, "loss": 0.0026, "step": 370110 }, { "epoch": 3.9544847481168865, "grad_norm": 0.020260032266378403, "learning_rate": 8.005945617525649e-07, "loss": 0.0201, "step": 370120 }, { "epoch": 3.954591591431166, "grad_norm": 13.211494445800781, "learning_rate": 8.005811359024939e-07, "loss": 0.0432, "step": 370130 }, { "epoch": 3.954698434745446, "grad_norm": 0.8309436440467834, "learning_rate": 8.005677097130439e-07, "loss": 0.0076, "step": 370140 }, { "epoch": 3.9548052780597254, "grad_norm": 0.012718992307782173, "learning_rate": 8.005542831842304e-07, "loss": 0.0108, "step": 370150 }, { "epoch": 3.9549121213740053, "grad_norm": 2.656564235687256, "learning_rate": 8.005408563160684e-07, "loss": 0.0113, "step": 370160 }, { "epoch": 3.9550189646882847, "grad_norm": 0.986168622970581, "learning_rate": 8.005274291085731e-07, "loss": 0.0075, "step": 370170 }, { "epoch": 3.955125808002564, "grad_norm": 0.0792027935385704, "learning_rate": 8.005140015617595e-07, "loss": 0.0046, "step": 370180 }, { "epoch": 3.9552326513168437, "grad_norm": 0.05231604352593422, "learning_rate": 8.005005736756432e-07, "loss": 0.0129, "step": 370190 }, { "epoch": 3.9553394946311236, "grad_norm": 0.009650840424001217, "learning_rate": 8.004871454502388e-07, "loss": 0.0205, "step": 370200 }, { "epoch": 3.955446337945403, "grad_norm": 0.04585554450750351, "learning_rate": 8.00473716885562e-07, "loss": 0.0015, "step": 370210 }, { "epoch": 3.955553181259683, "grad_norm": 0.012413989752531052, "learning_rate": 8.004602879816276e-07, "loss": 0.0116, "step": 370220 }, { "epoch": 3.9556600245739624, "grad_norm": 0.026258595287799835, "learning_rate": 8.004468587384508e-07, "loss": 0.0136, "step": 370230 }, { "epoch": 3.955766867888242, "grad_norm": 0.0021785751450806856, "learning_rate": 8.004334291560468e-07, "loss": 0.0012, "step": 370240 }, { "epoch": 3.9558737112025213, "grad_norm": 0.54682856798172, "learning_rate": 8.004199992344307e-07, "loss": 0.0047, "step": 370250 }, { "epoch": 3.955980554516801, "grad_norm": 16.50916290283203, "learning_rate": 8.004065689736179e-07, "loss": 0.0161, "step": 370260 }, { "epoch": 3.9560873978310807, "grad_norm": 0.26630619168281555, "learning_rate": 8.003931383736234e-07, "loss": 0.0399, "step": 370270 }, { "epoch": 3.9561942411453606, "grad_norm": 6.504971504211426, "learning_rate": 8.003797074344623e-07, "loss": 0.0359, "step": 370280 }, { "epoch": 3.95630108445964, "grad_norm": 3.1377298831939697, "learning_rate": 8.003662761561498e-07, "loss": 0.0119, "step": 370290 }, { "epoch": 3.9564079277739195, "grad_norm": 0.294498473405838, "learning_rate": 8.003528445387013e-07, "loss": 0.0385, "step": 370300 }, { "epoch": 3.956514771088199, "grad_norm": 2.625344753265381, "learning_rate": 8.003394125821317e-07, "loss": 0.0113, "step": 370310 }, { "epoch": 3.956621614402479, "grad_norm": 0.018111495301127434, "learning_rate": 8.003259802864562e-07, "loss": 0.0011, "step": 370320 }, { "epoch": 3.9567284577167583, "grad_norm": 0.013962805271148682, "learning_rate": 8.0031254765169e-07, "loss": 0.0281, "step": 370330 }, { "epoch": 3.956835301031038, "grad_norm": 0.008137472905218601, "learning_rate": 8.002991146778483e-07, "loss": 0.0046, "step": 370340 }, { "epoch": 3.9569421443453177, "grad_norm": 5.995974063873291, "learning_rate": 8.002856813649461e-07, "loss": 0.0048, "step": 370350 }, { "epoch": 3.957048987659597, "grad_norm": 1.2995966672897339, "learning_rate": 8.002722477129989e-07, "loss": 0.0859, "step": 370360 }, { "epoch": 3.9571558309738766, "grad_norm": 0.004252605605870485, "learning_rate": 8.002588137220216e-07, "loss": 0.0862, "step": 370370 }, { "epoch": 3.9572626742881565, "grad_norm": 0.005768732633441687, "learning_rate": 8.002453793920295e-07, "loss": 0.0069, "step": 370380 }, { "epoch": 3.957369517602436, "grad_norm": 1.006812572479248, "learning_rate": 8.002319447230374e-07, "loss": 0.003, "step": 370390 }, { "epoch": 3.957476360916716, "grad_norm": 0.08033005893230438, "learning_rate": 8.002185097150611e-07, "loss": 0.0798, "step": 370400 }, { "epoch": 3.9575832042309953, "grad_norm": 0.027477853000164032, "learning_rate": 8.002050743681154e-07, "loss": 0.0127, "step": 370410 }, { "epoch": 3.9576900475452748, "grad_norm": 0.021320976316928864, "learning_rate": 8.001916386822154e-07, "loss": 0.0223, "step": 370420 }, { "epoch": 3.957796890859554, "grad_norm": 3.0982272624969482, "learning_rate": 8.001782026573764e-07, "loss": 0.0274, "step": 370430 }, { "epoch": 3.957903734173834, "grad_norm": 0.5588467121124268, "learning_rate": 8.001647662936136e-07, "loss": 0.0129, "step": 370440 }, { "epoch": 3.9580105774881136, "grad_norm": 2.0658822059631348, "learning_rate": 8.001513295909421e-07, "loss": 0.011, "step": 370450 }, { "epoch": 3.9581174208023935, "grad_norm": 0.006691649556159973, "learning_rate": 8.001378925493771e-07, "loss": 0.0008, "step": 370460 }, { "epoch": 3.958224264116673, "grad_norm": 0.0014961279230192304, "learning_rate": 8.001244551689338e-07, "loss": 0.0386, "step": 370470 }, { "epoch": 3.9583311074309524, "grad_norm": 1.4167020320892334, "learning_rate": 8.001110174496272e-07, "loss": 0.0024, "step": 370480 }, { "epoch": 3.958437950745232, "grad_norm": 1.4620083570480347, "learning_rate": 8.000975793914728e-07, "loss": 0.0087, "step": 370490 }, { "epoch": 3.9585447940595118, "grad_norm": 0.525549054145813, "learning_rate": 8.000841409944856e-07, "loss": 0.0053, "step": 370500 }, { "epoch": 3.958651637373791, "grad_norm": 0.008493473753333092, "learning_rate": 8.000707022586807e-07, "loss": 0.0001, "step": 370510 }, { "epoch": 3.958758480688071, "grad_norm": 0.17009897530078888, "learning_rate": 8.000572631840732e-07, "loss": 0.02, "step": 370520 }, { "epoch": 3.9588653240023506, "grad_norm": 0.0047189765609800816, "learning_rate": 8.000438237706785e-07, "loss": 0.0102, "step": 370530 }, { "epoch": 3.95897216731663, "grad_norm": 0.395906001329422, "learning_rate": 8.000303840185118e-07, "loss": 0.0011, "step": 370540 }, { "epoch": 3.9590790106309095, "grad_norm": 0.003247196553274989, "learning_rate": 8.000169439275879e-07, "loss": 0.0122, "step": 370550 }, { "epoch": 3.9591858539451894, "grad_norm": 5.27518892288208, "learning_rate": 8.000035034979225e-07, "loss": 0.0096, "step": 370560 }, { "epoch": 3.959292697259469, "grad_norm": 4.411251068115234, "learning_rate": 7.999900627295302e-07, "loss": 0.0154, "step": 370570 }, { "epoch": 3.9593995405737488, "grad_norm": 9.896389961242676, "learning_rate": 7.999766216224267e-07, "loss": 0.0195, "step": 370580 }, { "epoch": 3.9595063838880282, "grad_norm": 0.009160883724689484, "learning_rate": 7.999631801766269e-07, "loss": 0.0021, "step": 370590 }, { "epoch": 3.9596132272023077, "grad_norm": 0.011657480150461197, "learning_rate": 7.999497383921459e-07, "loss": 0.0108, "step": 370600 }, { "epoch": 3.9597200705165876, "grad_norm": 0.255622535943985, "learning_rate": 7.999362962689993e-07, "loss": 0.0241, "step": 370610 }, { "epoch": 3.959826913830867, "grad_norm": 0.8640132546424866, "learning_rate": 7.999228538072018e-07, "loss": 0.0016, "step": 370620 }, { "epoch": 3.9599337571451465, "grad_norm": 0.06535495817661285, "learning_rate": 7.999094110067686e-07, "loss": 0.0141, "step": 370630 }, { "epoch": 3.9600406004594264, "grad_norm": 0.19132351875305176, "learning_rate": 7.998959678677153e-07, "loss": 0.0284, "step": 370640 }, { "epoch": 3.960147443773706, "grad_norm": 0.20828299224376678, "learning_rate": 7.998825243900568e-07, "loss": 0.0052, "step": 370650 }, { "epoch": 3.9602542870879853, "grad_norm": 4.09354829788208, "learning_rate": 7.998690805738082e-07, "loss": 0.0116, "step": 370660 }, { "epoch": 3.9603611304022652, "grad_norm": 0.11826308071613312, "learning_rate": 7.998556364189848e-07, "loss": 0.0134, "step": 370670 }, { "epoch": 3.9604679737165447, "grad_norm": 0.024901309981942177, "learning_rate": 7.998421919256017e-07, "loss": 0.0277, "step": 370680 }, { "epoch": 3.960574817030824, "grad_norm": 3.5837996006011963, "learning_rate": 7.998287470936742e-07, "loss": 0.0057, "step": 370690 }, { "epoch": 3.960681660345104, "grad_norm": 4.0980682373046875, "learning_rate": 7.998153019232174e-07, "loss": 0.0023, "step": 370700 }, { "epoch": 3.9607885036593835, "grad_norm": 0.036163270473480225, "learning_rate": 7.998018564142466e-07, "loss": 0.0133, "step": 370710 }, { "epoch": 3.960895346973663, "grad_norm": 3.2223594188690186, "learning_rate": 7.997884105667767e-07, "loss": 0.0272, "step": 370720 }, { "epoch": 3.961002190287943, "grad_norm": 0.010633482597768307, "learning_rate": 7.997749643808231e-07, "loss": 0.0184, "step": 370730 }, { "epoch": 3.9611090336022223, "grad_norm": 0.9501661062240601, "learning_rate": 7.997615178564011e-07, "loss": 0.0215, "step": 370740 }, { "epoch": 3.961215876916502, "grad_norm": 2.056180953979492, "learning_rate": 7.997480709935256e-07, "loss": 0.0287, "step": 370750 }, { "epoch": 3.9613227202307817, "grad_norm": 1.5442628860473633, "learning_rate": 7.997346237922119e-07, "loss": 0.0491, "step": 370760 }, { "epoch": 3.961429563545061, "grad_norm": 0.023377936333417892, "learning_rate": 7.997211762524751e-07, "loss": 0.0046, "step": 370770 }, { "epoch": 3.9615364068593406, "grad_norm": 0.062167007476091385, "learning_rate": 7.997077283743307e-07, "loss": 0.0437, "step": 370780 }, { "epoch": 3.9616432501736205, "grad_norm": 0.005336873233318329, "learning_rate": 7.996942801577935e-07, "loss": 0.0102, "step": 370790 }, { "epoch": 3.9617500934879, "grad_norm": 0.3620441257953644, "learning_rate": 7.996808316028788e-07, "loss": 0.0093, "step": 370800 }, { "epoch": 3.96185693680218, "grad_norm": 0.685089647769928, "learning_rate": 7.996673827096018e-07, "loss": 0.0368, "step": 370810 }, { "epoch": 3.9619637801164593, "grad_norm": 5.975852012634277, "learning_rate": 7.99653933477978e-07, "loss": 0.0051, "step": 370820 }, { "epoch": 3.962070623430739, "grad_norm": 0.37925985455513, "learning_rate": 7.99640483908022e-07, "loss": 0.0086, "step": 370830 }, { "epoch": 3.9621774667450183, "grad_norm": 1.7167292833328247, "learning_rate": 7.996270339997493e-07, "loss": 0.0036, "step": 370840 }, { "epoch": 3.962284310059298, "grad_norm": 0.12321941554546356, "learning_rate": 7.996135837531752e-07, "loss": 0.0048, "step": 370850 }, { "epoch": 3.9623911533735776, "grad_norm": 0.005926461424678564, "learning_rate": 7.996001331683147e-07, "loss": 0.022, "step": 370860 }, { "epoch": 3.9624979966878575, "grad_norm": 1.9012442827224731, "learning_rate": 7.99586682245183e-07, "loss": 0.0037, "step": 370870 }, { "epoch": 3.962604840002137, "grad_norm": 0.0339696928858757, "learning_rate": 7.995732309837953e-07, "loss": 0.0127, "step": 370880 }, { "epoch": 3.9627116833164164, "grad_norm": 0.12232445180416107, "learning_rate": 7.995597793841669e-07, "loss": 0.0078, "step": 370890 }, { "epoch": 3.962818526630696, "grad_norm": 0.033289216458797455, "learning_rate": 7.995463274463128e-07, "loss": 0.0155, "step": 370900 }, { "epoch": 3.962925369944976, "grad_norm": 2.0740737915039062, "learning_rate": 7.995328751702484e-07, "loss": 0.0046, "step": 370910 }, { "epoch": 3.9630322132592553, "grad_norm": 0.002318003447726369, "learning_rate": 7.995194225559887e-07, "loss": 0.0033, "step": 370920 }, { "epoch": 3.963139056573535, "grad_norm": 0.00821875873953104, "learning_rate": 7.99505969603549e-07, "loss": 0.0068, "step": 370930 }, { "epoch": 3.9632458998878146, "grad_norm": 0.011613689363002777, "learning_rate": 7.994925163129445e-07, "loss": 0.0208, "step": 370940 }, { "epoch": 3.963352743202094, "grad_norm": 3.3262600898742676, "learning_rate": 7.994790626841902e-07, "loss": 0.0123, "step": 370950 }, { "epoch": 3.9634595865163735, "grad_norm": 0.13119356334209442, "learning_rate": 7.994656087173015e-07, "loss": 0.0131, "step": 370960 }, { "epoch": 3.9635664298306534, "grad_norm": 0.003420477034524083, "learning_rate": 7.994521544122936e-07, "loss": 0.0155, "step": 370970 }, { "epoch": 3.963673273144933, "grad_norm": 0.02610025741159916, "learning_rate": 7.994386997691815e-07, "loss": 0.0142, "step": 370980 }, { "epoch": 3.963780116459213, "grad_norm": 1.4566340446472168, "learning_rate": 7.994252447879807e-07, "loss": 0.0082, "step": 370990 }, { "epoch": 3.9638869597734923, "grad_norm": 0.01948370411992073, "learning_rate": 7.99411789468706e-07, "loss": 0.008, "step": 371000 }, { "epoch": 3.9639938030877717, "grad_norm": 0.005457171704620123, "learning_rate": 7.993983338113729e-07, "loss": 0.0008, "step": 371010 }, { "epoch": 3.964100646402051, "grad_norm": 0.02946031093597412, "learning_rate": 7.993848778159965e-07, "loss": 0.0126, "step": 371020 }, { "epoch": 3.964207489716331, "grad_norm": 0.003162166802212596, "learning_rate": 7.993714214825918e-07, "loss": 0.0226, "step": 371030 }, { "epoch": 3.9643143330306105, "grad_norm": 0.017804985865950584, "learning_rate": 7.993579648111743e-07, "loss": 0.0128, "step": 371040 }, { "epoch": 3.9644211763448904, "grad_norm": 0.18292710185050964, "learning_rate": 7.993445078017591e-07, "loss": 0.0185, "step": 371050 }, { "epoch": 3.96452801965917, "grad_norm": 1.5799424648284912, "learning_rate": 7.993310504543614e-07, "loss": 0.0096, "step": 371060 }, { "epoch": 3.9646348629734494, "grad_norm": 2.9576759338378906, "learning_rate": 7.993175927689962e-07, "loss": 0.0044, "step": 371070 }, { "epoch": 3.964741706287729, "grad_norm": 0.6054369807243347, "learning_rate": 7.993041347456791e-07, "loss": 0.0085, "step": 371080 }, { "epoch": 3.9648485496020087, "grad_norm": 0.19765600562095642, "learning_rate": 7.992906763844247e-07, "loss": 0.0207, "step": 371090 }, { "epoch": 3.964955392916288, "grad_norm": 0.00975101999938488, "learning_rate": 7.992772176852487e-07, "loss": 0.0207, "step": 371100 }, { "epoch": 3.965062236230568, "grad_norm": 0.5666170120239258, "learning_rate": 7.992637586481662e-07, "loss": 0.002, "step": 371110 }, { "epoch": 3.9651690795448475, "grad_norm": 0.9140275716781616, "learning_rate": 7.992502992731922e-07, "loss": 0.0039, "step": 371120 }, { "epoch": 3.965275922859127, "grad_norm": 5.266610622406006, "learning_rate": 7.992368395603422e-07, "loss": 0.0366, "step": 371130 }, { "epoch": 3.9653827661734065, "grad_norm": 1.9614639282226562, "learning_rate": 7.99223379509631e-07, "loss": 0.0271, "step": 371140 }, { "epoch": 3.9654896094876864, "grad_norm": 0.6938737034797668, "learning_rate": 7.992099191210741e-07, "loss": 0.0206, "step": 371150 }, { "epoch": 3.965596452801966, "grad_norm": 0.006878295447677374, "learning_rate": 7.991964583946867e-07, "loss": 0.0126, "step": 371160 }, { "epoch": 3.9657032961162457, "grad_norm": 1.445317029953003, "learning_rate": 7.991829973304839e-07, "loss": 0.0054, "step": 371170 }, { "epoch": 3.965810139430525, "grad_norm": 0.026606010273098946, "learning_rate": 7.991695359284809e-07, "loss": 0.0063, "step": 371180 }, { "epoch": 3.9659169827448046, "grad_norm": 1.0005395412445068, "learning_rate": 7.991560741886929e-07, "loss": 0.0026, "step": 371190 }, { "epoch": 3.966023826059084, "grad_norm": 0.07752178609371185, "learning_rate": 7.991426121111351e-07, "loss": 0.0091, "step": 371200 }, { "epoch": 3.966130669373364, "grad_norm": 0.011240619234740734, "learning_rate": 7.991291496958228e-07, "loss": 0.024, "step": 371210 }, { "epoch": 3.9662375126876435, "grad_norm": 2.4660637378692627, "learning_rate": 7.99115686942771e-07, "loss": 0.0052, "step": 371220 }, { "epoch": 3.9663443560019234, "grad_norm": 0.24892206490039825, "learning_rate": 7.991022238519951e-07, "loss": 0.0067, "step": 371230 }, { "epoch": 3.966451199316203, "grad_norm": 0.11115317046642303, "learning_rate": 7.990887604235101e-07, "loss": 0.0145, "step": 371240 }, { "epoch": 3.9665580426304823, "grad_norm": 0.006588745396584272, "learning_rate": 7.990752966573314e-07, "loss": 0.0012, "step": 371250 }, { "epoch": 3.9666648859447617, "grad_norm": 0.01695156842470169, "learning_rate": 7.990618325534741e-07, "loss": 0.0016, "step": 371260 }, { "epoch": 3.9667717292590416, "grad_norm": 0.09543687850236893, "learning_rate": 7.990483681119535e-07, "loss": 0.002, "step": 371270 }, { "epoch": 3.966878572573321, "grad_norm": 0.012592796236276627, "learning_rate": 7.990349033327846e-07, "loss": 0.0198, "step": 371280 }, { "epoch": 3.966985415887601, "grad_norm": 0.006075848359614611, "learning_rate": 7.990214382159828e-07, "loss": 0.0029, "step": 371290 }, { "epoch": 3.9670922592018805, "grad_norm": 2.095233917236328, "learning_rate": 7.990079727615632e-07, "loss": 0.0015, "step": 371300 }, { "epoch": 3.96719910251616, "grad_norm": 0.021622657775878906, "learning_rate": 7.989945069695411e-07, "loss": 0.0329, "step": 371310 }, { "epoch": 3.9673059458304394, "grad_norm": 5.031371116638184, "learning_rate": 7.989810408399316e-07, "loss": 0.02, "step": 371320 }, { "epoch": 3.9674127891447193, "grad_norm": 0.002577729057520628, "learning_rate": 7.989675743727498e-07, "loss": 0.0132, "step": 371330 }, { "epoch": 3.9675196324589987, "grad_norm": 0.3403838872909546, "learning_rate": 7.98954107568011e-07, "loss": 0.0266, "step": 371340 }, { "epoch": 3.9676264757732786, "grad_norm": 10.235919952392578, "learning_rate": 7.989406404257309e-07, "loss": 0.0846, "step": 371350 }, { "epoch": 3.967733319087558, "grad_norm": 0.00947160180658102, "learning_rate": 7.989271729459238e-07, "loss": 0.0028, "step": 371360 }, { "epoch": 3.9678401624018376, "grad_norm": 0.0076149567030370235, "learning_rate": 7.989137051286055e-07, "loss": 0.0056, "step": 371370 }, { "epoch": 3.9679470057161175, "grad_norm": 0.006431680638343096, "learning_rate": 7.989002369737912e-07, "loss": 0.0045, "step": 371380 }, { "epoch": 3.968053849030397, "grad_norm": 1.5903037786483765, "learning_rate": 7.988867684814957e-07, "loss": 0.0112, "step": 371390 }, { "epoch": 3.9681606923446764, "grad_norm": 0.059858229011297226, "learning_rate": 7.988732996517347e-07, "loss": 0.0099, "step": 371400 }, { "epoch": 3.9682675356589563, "grad_norm": 0.09554620832204819, "learning_rate": 7.988598304845231e-07, "loss": 0.0015, "step": 371410 }, { "epoch": 3.9683743789732357, "grad_norm": 1.585785150527954, "learning_rate": 7.988463609798763e-07, "loss": 0.0157, "step": 371420 }, { "epoch": 3.968481222287515, "grad_norm": 0.06678513437509537, "learning_rate": 7.988328911378093e-07, "loss": 0.0077, "step": 371430 }, { "epoch": 3.968588065601795, "grad_norm": 11.53873348236084, "learning_rate": 7.988194209583374e-07, "loss": 0.0046, "step": 371440 }, { "epoch": 3.9686949089160746, "grad_norm": 0.09446784108877182, "learning_rate": 7.988059504414759e-07, "loss": 0.0139, "step": 371450 }, { "epoch": 3.968801752230354, "grad_norm": 1.1542383432388306, "learning_rate": 7.987924795872398e-07, "loss": 0.0398, "step": 371460 }, { "epoch": 3.968908595544634, "grad_norm": 3.958627700805664, "learning_rate": 7.987790083956447e-07, "loss": 0.0257, "step": 371470 }, { "epoch": 3.9690154388589134, "grad_norm": 0.22079522907733917, "learning_rate": 7.987655368667053e-07, "loss": 0.0147, "step": 371480 }, { "epoch": 3.969122282173193, "grad_norm": 6.386573791503906, "learning_rate": 7.987520650004371e-07, "loss": 0.0217, "step": 371490 }, { "epoch": 3.9692291254874728, "grad_norm": 0.015047818422317505, "learning_rate": 7.987385927968554e-07, "loss": 0.0118, "step": 371500 }, { "epoch": 3.969335968801752, "grad_norm": 0.007471300661563873, "learning_rate": 7.987251202559752e-07, "loss": 0.0026, "step": 371510 }, { "epoch": 3.969442812116032, "grad_norm": 0.5542502999305725, "learning_rate": 7.987116473778117e-07, "loss": 0.007, "step": 371520 }, { "epoch": 3.9695496554303116, "grad_norm": 9.759305000305176, "learning_rate": 7.986981741623804e-07, "loss": 0.0279, "step": 371530 }, { "epoch": 3.969656498744591, "grad_norm": 0.5380860567092896, "learning_rate": 7.986847006096962e-07, "loss": 0.0031, "step": 371540 }, { "epoch": 3.9697633420588705, "grad_norm": 0.6265886425971985, "learning_rate": 7.986712267197744e-07, "loss": 0.0133, "step": 371550 }, { "epoch": 3.9698701853731504, "grad_norm": 1.51409912109375, "learning_rate": 7.986577524926303e-07, "loss": 0.0116, "step": 371560 }, { "epoch": 3.96997702868743, "grad_norm": 1.2045989036560059, "learning_rate": 7.986442779282789e-07, "loss": 0.0059, "step": 371570 }, { "epoch": 3.9700838720017098, "grad_norm": 0.0006406034808605909, "learning_rate": 7.986308030267357e-07, "loss": 0.0115, "step": 371580 }, { "epoch": 3.970190715315989, "grad_norm": 0.007852038368582726, "learning_rate": 7.98617327788016e-07, "loss": 0.0019, "step": 371590 }, { "epoch": 3.9702975586302687, "grad_norm": 0.008448483422398567, "learning_rate": 7.986038522121345e-07, "loss": 0.0344, "step": 371600 }, { "epoch": 3.970404401944548, "grad_norm": 0.012569467537105083, "learning_rate": 7.985903762991068e-07, "loss": 0.0226, "step": 371610 }, { "epoch": 3.970511245258828, "grad_norm": 25.75099754333496, "learning_rate": 7.98576900048948e-07, "loss": 0.0514, "step": 371620 }, { "epoch": 3.9706180885731075, "grad_norm": 1.5600571632385254, "learning_rate": 7.985634234616734e-07, "loss": 0.006, "step": 371630 }, { "epoch": 3.9707249318873874, "grad_norm": 4.551693916320801, "learning_rate": 7.985499465372983e-07, "loss": 0.0057, "step": 371640 }, { "epoch": 3.970831775201667, "grad_norm": 0.2138718217611313, "learning_rate": 7.985364692758376e-07, "loss": 0.0015, "step": 371650 }, { "epoch": 3.9709386185159463, "grad_norm": 0.027247421443462372, "learning_rate": 7.985229916773066e-07, "loss": 0.0306, "step": 371660 }, { "epoch": 3.9710454618302258, "grad_norm": 1.7182135581970215, "learning_rate": 7.985095137417209e-07, "loss": 0.0016, "step": 371670 }, { "epoch": 3.9711523051445057, "grad_norm": 0.021971983835101128, "learning_rate": 7.984960354690951e-07, "loss": 0.0091, "step": 371680 }, { "epoch": 3.971259148458785, "grad_norm": 0.03457788750529289, "learning_rate": 7.984825568594448e-07, "loss": 0.0177, "step": 371690 }, { "epoch": 3.971365991773065, "grad_norm": 4.901852607727051, "learning_rate": 7.984690779127854e-07, "loss": 0.0085, "step": 371700 }, { "epoch": 3.9714728350873445, "grad_norm": 0.1282765120267868, "learning_rate": 7.984555986291317e-07, "loss": 0.002, "step": 371710 }, { "epoch": 3.971579678401624, "grad_norm": 4.90720796585083, "learning_rate": 7.984421190084991e-07, "loss": 0.0147, "step": 371720 }, { "epoch": 3.9716865217159034, "grad_norm": 0.03951066732406616, "learning_rate": 7.984286390509029e-07, "loss": 0.0031, "step": 371730 }, { "epoch": 3.9717933650301833, "grad_norm": 0.07430176436901093, "learning_rate": 7.984151587563583e-07, "loss": 0.0041, "step": 371740 }, { "epoch": 3.971900208344463, "grad_norm": 0.03466275334358215, "learning_rate": 7.984016781248804e-07, "loss": 0.0211, "step": 371750 }, { "epoch": 3.9720070516587427, "grad_norm": 0.1657560020685196, "learning_rate": 7.983881971564845e-07, "loss": 0.0057, "step": 371760 }, { "epoch": 3.972113894973022, "grad_norm": 0.0023580831475555897, "learning_rate": 7.983747158511857e-07, "loss": 0.0045, "step": 371770 }, { "epoch": 3.9722207382873016, "grad_norm": 0.3391183018684387, "learning_rate": 7.983612342089994e-07, "loss": 0.0494, "step": 371780 }, { "epoch": 3.972327581601581, "grad_norm": 8.21877384185791, "learning_rate": 7.983477522299408e-07, "loss": 0.0312, "step": 371790 }, { "epoch": 3.972434424915861, "grad_norm": 0.029855171218514442, "learning_rate": 7.983342699140249e-07, "loss": 0.0077, "step": 371800 }, { "epoch": 3.9725412682301404, "grad_norm": 11.579541206359863, "learning_rate": 7.983207872612674e-07, "loss": 0.0117, "step": 371810 }, { "epoch": 3.9726481115444203, "grad_norm": 1.950335144996643, "learning_rate": 7.98307304271683e-07, "loss": 0.0023, "step": 371820 }, { "epoch": 3.9727549548587, "grad_norm": 0.1546390801668167, "learning_rate": 7.98293820945287e-07, "loss": 0.0037, "step": 371830 }, { "epoch": 3.9728617981729792, "grad_norm": 0.002526357537135482, "learning_rate": 7.98280337282095e-07, "loss": 0.0042, "step": 371840 }, { "epoch": 3.9729686414872587, "grad_norm": 1.383715271949768, "learning_rate": 7.98266853282122e-07, "loss": 0.001, "step": 371850 }, { "epoch": 3.9730754848015386, "grad_norm": 0.00041947996942326427, "learning_rate": 7.982533689453831e-07, "loss": 0.0037, "step": 371860 }, { "epoch": 3.973182328115818, "grad_norm": 2.0550363063812256, "learning_rate": 7.982398842718936e-07, "loss": 0.0054, "step": 371870 }, { "epoch": 3.973289171430098, "grad_norm": 4.15688943862915, "learning_rate": 7.982263992616689e-07, "loss": 0.027, "step": 371880 }, { "epoch": 3.9733960147443774, "grad_norm": 3.4763877391815186, "learning_rate": 7.98212913914724e-07, "loss": 0.0095, "step": 371890 }, { "epoch": 3.973502858058657, "grad_norm": 0.055555328726768494, "learning_rate": 7.981994282310743e-07, "loss": 0.0021, "step": 371900 }, { "epoch": 3.9736097013729363, "grad_norm": 0.0038372257258743048, "learning_rate": 7.981859422107349e-07, "loss": 0.0078, "step": 371910 }, { "epoch": 3.9737165446872162, "grad_norm": 6.934371471405029, "learning_rate": 7.981724558537209e-07, "loss": 0.0154, "step": 371920 }, { "epoch": 3.9738233880014957, "grad_norm": 9.291919708251953, "learning_rate": 7.981589691600479e-07, "loss": 0.0132, "step": 371930 }, { "epoch": 3.9739302313157756, "grad_norm": 0.0008693619747646153, "learning_rate": 7.981454821297308e-07, "loss": 0.0123, "step": 371940 }, { "epoch": 3.974037074630055, "grad_norm": 0.0034816330298781395, "learning_rate": 7.98131994762785e-07, "loss": 0.0163, "step": 371950 }, { "epoch": 3.9741439179443345, "grad_norm": 7.255276203155518, "learning_rate": 7.981185070592257e-07, "loss": 0.0143, "step": 371960 }, { "epoch": 3.974250761258614, "grad_norm": 0.8852280378341675, "learning_rate": 7.98105019019068e-07, "loss": 0.012, "step": 371970 }, { "epoch": 3.974357604572894, "grad_norm": 2.525686025619507, "learning_rate": 7.980915306423273e-07, "loss": 0.0065, "step": 371980 }, { "epoch": 3.9744644478871733, "grad_norm": 0.017013337463140488, "learning_rate": 7.980780419290189e-07, "loss": 0.0145, "step": 371990 }, { "epoch": 3.9745712912014532, "grad_norm": 3.259965181350708, "learning_rate": 7.980645528791576e-07, "loss": 0.021, "step": 372000 }, { "epoch": 3.9746781345157327, "grad_norm": 2.63700795173645, "learning_rate": 7.980510634927592e-07, "loss": 0.01, "step": 372010 }, { "epoch": 3.974784977830012, "grad_norm": 0.049010489135980606, "learning_rate": 7.980375737698385e-07, "loss": 0.0219, "step": 372020 }, { "epoch": 3.9748918211442916, "grad_norm": 0.013094399124383926, "learning_rate": 7.98024083710411e-07, "loss": 0.001, "step": 372030 }, { "epoch": 3.9749986644585715, "grad_norm": 3.6651108264923096, "learning_rate": 7.980105933144918e-07, "loss": 0.0092, "step": 372040 }, { "epoch": 3.975105507772851, "grad_norm": 0.0017793395090848207, "learning_rate": 7.979971025820961e-07, "loss": 0.0216, "step": 372050 }, { "epoch": 3.975212351087131, "grad_norm": 0.887422502040863, "learning_rate": 7.979836115132392e-07, "loss": 0.0106, "step": 372060 }, { "epoch": 3.9753191944014103, "grad_norm": 0.16144411265850067, "learning_rate": 7.979701201079363e-07, "loss": 0.0106, "step": 372070 }, { "epoch": 3.97542603771569, "grad_norm": 0.38841480016708374, "learning_rate": 7.979566283662025e-07, "loss": 0.0005, "step": 372080 }, { "epoch": 3.9755328810299697, "grad_norm": 0.15181629359722137, "learning_rate": 7.979431362880534e-07, "loss": 0.0068, "step": 372090 }, { "epoch": 3.975639724344249, "grad_norm": 0.0007952422020025551, "learning_rate": 7.97929643873504e-07, "loss": 0.0013, "step": 372100 }, { "epoch": 3.9757465676585286, "grad_norm": 10.42874526977539, "learning_rate": 7.979161511225694e-07, "loss": 0.0397, "step": 372110 }, { "epoch": 3.9758534109728085, "grad_norm": 8.489489555358887, "learning_rate": 7.97902658035265e-07, "loss": 0.032, "step": 372120 }, { "epoch": 3.975960254287088, "grad_norm": 0.01961805671453476, "learning_rate": 7.978891646116062e-07, "loss": 0.0176, "step": 372130 }, { "epoch": 3.9760670976013675, "grad_norm": 6.384331703186035, "learning_rate": 7.978756708516079e-07, "loss": 0.0054, "step": 372140 }, { "epoch": 3.9761739409156474, "grad_norm": 0.023582616820931435, "learning_rate": 7.978621767552855e-07, "loss": 0.0424, "step": 372150 }, { "epoch": 3.976280784229927, "grad_norm": 0.019225602969527245, "learning_rate": 7.978486823226542e-07, "loss": 0.001, "step": 372160 }, { "epoch": 3.9763876275442063, "grad_norm": 0.2031574547290802, "learning_rate": 7.978351875537292e-07, "loss": 0.0011, "step": 372170 }, { "epoch": 3.976494470858486, "grad_norm": 0.00565972039476037, "learning_rate": 7.978216924485259e-07, "loss": 0.0021, "step": 372180 }, { "epoch": 3.9766013141727656, "grad_norm": 0.04820169135928154, "learning_rate": 7.978081970070595e-07, "loss": 0.021, "step": 372190 }, { "epoch": 3.976708157487045, "grad_norm": 3.09771466255188, "learning_rate": 7.97794701229345e-07, "loss": 0.0112, "step": 372200 }, { "epoch": 3.976815000801325, "grad_norm": 1.726786494255066, "learning_rate": 7.977812051153979e-07, "loss": 0.0019, "step": 372210 }, { "epoch": 3.9769218441156045, "grad_norm": 1.8276331424713135, "learning_rate": 7.977677086652335e-07, "loss": 0.0026, "step": 372220 }, { "epoch": 3.977028687429884, "grad_norm": 0.49554118514060974, "learning_rate": 7.977542118788666e-07, "loss": 0.0088, "step": 372230 }, { "epoch": 3.977135530744164, "grad_norm": 0.0044096726924180984, "learning_rate": 7.977407147563129e-07, "loss": 0.0058, "step": 372240 }, { "epoch": 3.9772423740584433, "grad_norm": 6.1341166496276855, "learning_rate": 7.977272172975874e-07, "loss": 0.0107, "step": 372250 }, { "epoch": 3.9773492173727227, "grad_norm": 0.4028352200984955, "learning_rate": 7.977137195027053e-07, "loss": 0.0008, "step": 372260 }, { "epoch": 3.9774560606870026, "grad_norm": 4.244889259338379, "learning_rate": 7.977002213716821e-07, "loss": 0.0084, "step": 372270 }, { "epoch": 3.977562904001282, "grad_norm": 0.6552578210830688, "learning_rate": 7.976867229045328e-07, "loss": 0.0078, "step": 372280 }, { "epoch": 3.977669747315562, "grad_norm": 0.7000442147254944, "learning_rate": 7.976732241012727e-07, "loss": 0.0032, "step": 372290 }, { "epoch": 3.9777765906298415, "grad_norm": 0.07166323810815811, "learning_rate": 7.976597249619172e-07, "loss": 0.0259, "step": 372300 }, { "epoch": 3.977883433944121, "grad_norm": 0.2265876978635788, "learning_rate": 7.976462254864813e-07, "loss": 0.0125, "step": 372310 }, { "epoch": 3.9779902772584004, "grad_norm": 0.005753743462264538, "learning_rate": 7.976327256749805e-07, "loss": 0.0091, "step": 372320 }, { "epoch": 3.9780971205726803, "grad_norm": 1.5762286186218262, "learning_rate": 7.976192255274297e-07, "loss": 0.0041, "step": 372330 }, { "epoch": 3.9782039638869597, "grad_norm": 0.016146447509527206, "learning_rate": 7.976057250438443e-07, "loss": 0.0118, "step": 372340 }, { "epoch": 3.9783108072012396, "grad_norm": 0.0727873295545578, "learning_rate": 7.975922242242396e-07, "loss": 0.0049, "step": 372350 }, { "epoch": 3.978417650515519, "grad_norm": 0.27216118574142456, "learning_rate": 7.97578723068631e-07, "loss": 0.01, "step": 372360 }, { "epoch": 3.9785244938297986, "grad_norm": 0.0116821164265275, "learning_rate": 7.975652215770334e-07, "loss": 0.0447, "step": 372370 }, { "epoch": 3.978631337144078, "grad_norm": 1.7037261724472046, "learning_rate": 7.975517197494622e-07, "loss": 0.0423, "step": 372380 }, { "epoch": 3.978738180458358, "grad_norm": 7.852747440338135, "learning_rate": 7.975382175859327e-07, "loss": 0.0127, "step": 372390 }, { "epoch": 3.9788450237726374, "grad_norm": 0.013108249753713608, "learning_rate": 7.975247150864601e-07, "loss": 0.0138, "step": 372400 }, { "epoch": 3.9789518670869173, "grad_norm": 0.3664076626300812, "learning_rate": 7.975112122510596e-07, "loss": 0.0027, "step": 372410 }, { "epoch": 3.9790587104011967, "grad_norm": 1.9279297590255737, "learning_rate": 7.974977090797466e-07, "loss": 0.0146, "step": 372420 }, { "epoch": 3.979165553715476, "grad_norm": 0.03405914083123207, "learning_rate": 7.974842055725361e-07, "loss": 0.0138, "step": 372430 }, { "epoch": 3.9792723970297557, "grad_norm": 0.005870543885976076, "learning_rate": 7.974707017294435e-07, "loss": 0.008, "step": 372440 }, { "epoch": 3.9793792403440356, "grad_norm": 0.01198421698063612, "learning_rate": 7.974571975504842e-07, "loss": 0.0007, "step": 372450 }, { "epoch": 3.979486083658315, "grad_norm": 0.01005614921450615, "learning_rate": 7.974436930356732e-07, "loss": 0.0195, "step": 372460 }, { "epoch": 3.979592926972595, "grad_norm": 0.009549286216497421, "learning_rate": 7.974301881850256e-07, "loss": 0.0048, "step": 372470 }, { "epoch": 3.9796997702868744, "grad_norm": 0.00409104535356164, "learning_rate": 7.974166829985571e-07, "loss": 0.0061, "step": 372480 }, { "epoch": 3.979806613601154, "grad_norm": 0.01926555298268795, "learning_rate": 7.974031774762826e-07, "loss": 0.0196, "step": 372490 }, { "epoch": 3.9799134569154333, "grad_norm": 0.1457364559173584, "learning_rate": 7.973896716182176e-07, "loss": 0.0304, "step": 372500 }, { "epoch": 3.980020300229713, "grad_norm": 0.06987638026475906, "learning_rate": 7.97376165424377e-07, "loss": 0.0267, "step": 372510 }, { "epoch": 3.9801271435439927, "grad_norm": 0.020038962364196777, "learning_rate": 7.973626588947764e-07, "loss": 0.0041, "step": 372520 }, { "epoch": 3.9802339868582726, "grad_norm": 0.18539485335350037, "learning_rate": 7.97349152029431e-07, "loss": 0.0217, "step": 372530 }, { "epoch": 3.980340830172552, "grad_norm": 4.435290813446045, "learning_rate": 7.973356448283558e-07, "loss": 0.0037, "step": 372540 }, { "epoch": 3.9804476734868315, "grad_norm": 0.20290639996528625, "learning_rate": 7.973221372915664e-07, "loss": 0.0511, "step": 372550 }, { "epoch": 3.980554516801111, "grad_norm": 0.005260084755718708, "learning_rate": 7.973086294190777e-07, "loss": 0.0288, "step": 372560 }, { "epoch": 3.980661360115391, "grad_norm": 1.9109718799591064, "learning_rate": 7.972951212109051e-07, "loss": 0.0051, "step": 372570 }, { "epoch": 3.9807682034296703, "grad_norm": 0.0025825556367635727, "learning_rate": 7.97281612667064e-07, "loss": 0.0056, "step": 372580 }, { "epoch": 3.98087504674395, "grad_norm": 9.614110946655273, "learning_rate": 7.972681037875694e-07, "loss": 0.0214, "step": 372590 }, { "epoch": 3.9809818900582297, "grad_norm": 1.6524639129638672, "learning_rate": 7.972545945724368e-07, "loss": 0.087, "step": 372600 }, { "epoch": 3.981088733372509, "grad_norm": 0.012259596027433872, "learning_rate": 7.972410850216813e-07, "loss": 0.0212, "step": 372610 }, { "epoch": 3.9811955766867886, "grad_norm": 0.005985552445054054, "learning_rate": 7.972275751353181e-07, "loss": 0.0059, "step": 372620 }, { "epoch": 3.9813024200010685, "grad_norm": 0.02865481749176979, "learning_rate": 7.972140649133627e-07, "loss": 0.0087, "step": 372630 }, { "epoch": 3.981409263315348, "grad_norm": 0.0729357898235321, "learning_rate": 7.9720055435583e-07, "loss": 0.02, "step": 372640 }, { "epoch": 3.981516106629628, "grad_norm": 11.34631061553955, "learning_rate": 7.971870434627356e-07, "loss": 0.0229, "step": 372650 }, { "epoch": 3.9816229499439073, "grad_norm": 3.8372814655303955, "learning_rate": 7.971735322340945e-07, "loss": 0.0069, "step": 372660 }, { "epoch": 3.9817297932581868, "grad_norm": 3.2195184230804443, "learning_rate": 7.971600206699221e-07, "loss": 0.0017, "step": 372670 }, { "epoch": 3.9818366365724662, "grad_norm": 0.0433911569416523, "learning_rate": 7.971465087702336e-07, "loss": 0.0135, "step": 372680 }, { "epoch": 3.981943479886746, "grad_norm": 3.6705362796783447, "learning_rate": 7.971329965350441e-07, "loss": 0.0108, "step": 372690 }, { "epoch": 3.9820503232010256, "grad_norm": 0.0014619100838899612, "learning_rate": 7.971194839643693e-07, "loss": 0.0041, "step": 372700 }, { "epoch": 3.9821571665153055, "grad_norm": 0.009570291265845299, "learning_rate": 7.97105971058224e-07, "loss": 0.022, "step": 372710 }, { "epoch": 3.982264009829585, "grad_norm": 0.0012608481338247657, "learning_rate": 7.970924578166236e-07, "loss": 0.0102, "step": 372720 }, { "epoch": 3.9823708531438644, "grad_norm": 0.007727894000709057, "learning_rate": 7.970789442395835e-07, "loss": 0.0063, "step": 372730 }, { "epoch": 3.982477696458144, "grad_norm": 0.011773844249546528, "learning_rate": 7.970654303271189e-07, "loss": 0.0207, "step": 372740 }, { "epoch": 3.9825845397724238, "grad_norm": 6.204885959625244, "learning_rate": 7.970519160792448e-07, "loss": 0.04, "step": 372750 }, { "epoch": 3.9826913830867032, "grad_norm": 1.7273621559143066, "learning_rate": 7.97038401495977e-07, "loss": 0.0125, "step": 372760 }, { "epoch": 3.982798226400983, "grad_norm": 0.059308651834726334, "learning_rate": 7.970248865773302e-07, "loss": 0.0036, "step": 372770 }, { "epoch": 3.9829050697152626, "grad_norm": 0.019720030948519707, "learning_rate": 7.970113713233199e-07, "loss": 0.0307, "step": 372780 }, { "epoch": 3.983011913029542, "grad_norm": 0.14442633092403412, "learning_rate": 7.969978557339613e-07, "loss": 0.0008, "step": 372790 }, { "epoch": 3.9831187563438215, "grad_norm": 8.011761665344238, "learning_rate": 7.969843398092697e-07, "loss": 0.0073, "step": 372800 }, { "epoch": 3.9832255996581014, "grad_norm": 0.010845642536878586, "learning_rate": 7.969708235492605e-07, "loss": 0.013, "step": 372810 }, { "epoch": 3.983332442972381, "grad_norm": 14.231011390686035, "learning_rate": 7.969573069539488e-07, "loss": 0.0277, "step": 372820 }, { "epoch": 3.9834392862866608, "grad_norm": 0.2713911831378937, "learning_rate": 7.969437900233498e-07, "loss": 0.0086, "step": 372830 }, { "epoch": 3.9835461296009402, "grad_norm": 0.08748748898506165, "learning_rate": 7.96930272757479e-07, "loss": 0.0682, "step": 372840 }, { "epoch": 3.9836529729152197, "grad_norm": 0.1057170107960701, "learning_rate": 7.969167551563513e-07, "loss": 0.023, "step": 372850 }, { "epoch": 3.9837598162294996, "grad_norm": 0.07441629469394684, "learning_rate": 7.969032372199822e-07, "loss": 0.0229, "step": 372860 }, { "epoch": 3.983866659543779, "grad_norm": 0.17921604216098785, "learning_rate": 7.968897189483871e-07, "loss": 0.0084, "step": 372870 }, { "epoch": 3.9839735028580585, "grad_norm": 0.11650118976831436, "learning_rate": 7.96876200341581e-07, "loss": 0.0163, "step": 372880 }, { "epoch": 3.9840803461723384, "grad_norm": 1.0985050201416016, "learning_rate": 7.968626813995792e-07, "loss": 0.003, "step": 372890 }, { "epoch": 3.984187189486618, "grad_norm": 0.010745669715106487, "learning_rate": 7.968491621223972e-07, "loss": 0.0068, "step": 372900 }, { "epoch": 3.9842940328008973, "grad_norm": 1.1721067428588867, "learning_rate": 7.968356425100499e-07, "loss": 0.0151, "step": 372910 }, { "epoch": 3.9844008761151772, "grad_norm": 0.16782023012638092, "learning_rate": 7.968221225625528e-07, "loss": 0.0146, "step": 372920 }, { "epoch": 3.9845077194294567, "grad_norm": 10.704835891723633, "learning_rate": 7.968086022799213e-07, "loss": 0.0365, "step": 372930 }, { "epoch": 3.984614562743736, "grad_norm": 2.5936942100524902, "learning_rate": 7.967950816621703e-07, "loss": 0.0249, "step": 372940 }, { "epoch": 3.984721406058016, "grad_norm": 2.9235734939575195, "learning_rate": 7.967815607093153e-07, "loss": 0.0151, "step": 372950 }, { "epoch": 3.9848282493722955, "grad_norm": 0.018608098849654198, "learning_rate": 7.967680394213716e-07, "loss": 0.0251, "step": 372960 }, { "epoch": 3.984935092686575, "grad_norm": 0.18552233278751373, "learning_rate": 7.967545177983543e-07, "loss": 0.0194, "step": 372970 }, { "epoch": 3.985041936000855, "grad_norm": 0.15514245629310608, "learning_rate": 7.967409958402788e-07, "loss": 0.0109, "step": 372980 }, { "epoch": 3.9851487793151343, "grad_norm": 0.005362231284379959, "learning_rate": 7.967274735471603e-07, "loss": 0.0089, "step": 372990 }, { "epoch": 3.9852556226294142, "grad_norm": 2.3036811351776123, "learning_rate": 7.967139509190139e-07, "loss": 0.0335, "step": 373000 }, { "epoch": 3.9853624659436937, "grad_norm": 5.347237586975098, "learning_rate": 7.967004279558554e-07, "loss": 0.0297, "step": 373010 }, { "epoch": 3.985469309257973, "grad_norm": 2.184784173965454, "learning_rate": 7.966869046576997e-07, "loss": 0.0282, "step": 373020 }, { "epoch": 3.9855761525722526, "grad_norm": 0.008985467255115509, "learning_rate": 7.966733810245619e-07, "loss": 0.0032, "step": 373030 }, { "epoch": 3.9856829958865325, "grad_norm": 0.11222724616527557, "learning_rate": 7.966598570564574e-07, "loss": 0.0313, "step": 373040 }, { "epoch": 3.985789839200812, "grad_norm": 4.686526298522949, "learning_rate": 7.966463327534017e-07, "loss": 0.0025, "step": 373050 }, { "epoch": 3.985896682515092, "grad_norm": 0.5674868822097778, "learning_rate": 7.966328081154098e-07, "loss": 0.0034, "step": 373060 }, { "epoch": 3.9860035258293713, "grad_norm": 0.2549391984939575, "learning_rate": 7.966192831424972e-07, "loss": 0.0134, "step": 373070 }, { "epoch": 3.986110369143651, "grad_norm": 0.04531104490160942, "learning_rate": 7.966057578346789e-07, "loss": 0.0018, "step": 373080 }, { "epoch": 3.9862172124579303, "grad_norm": 0.009621129371225834, "learning_rate": 7.965922321919704e-07, "loss": 0.0103, "step": 373090 }, { "epoch": 3.98632405577221, "grad_norm": 7.630256175994873, "learning_rate": 7.965787062143869e-07, "loss": 0.0107, "step": 373100 }, { "epoch": 3.9864308990864896, "grad_norm": 0.015430133789777756, "learning_rate": 7.965651799019435e-07, "loss": 0.006, "step": 373110 }, { "epoch": 3.9865377424007695, "grad_norm": 10.606886863708496, "learning_rate": 7.965516532546557e-07, "loss": 0.0111, "step": 373120 }, { "epoch": 3.986644585715049, "grad_norm": 0.019467851147055626, "learning_rate": 7.965381262725388e-07, "loss": 0.0283, "step": 373130 }, { "epoch": 3.9867514290293284, "grad_norm": 0.009347810409963131, "learning_rate": 7.965245989556078e-07, "loss": 0.0622, "step": 373140 }, { "epoch": 3.986858272343608, "grad_norm": 5.126534938812256, "learning_rate": 7.965110713038784e-07, "loss": 0.0295, "step": 373150 }, { "epoch": 3.986965115657888, "grad_norm": 0.061957526952028275, "learning_rate": 7.964975433173656e-07, "loss": 0.0032, "step": 373160 }, { "epoch": 3.9870719589721673, "grad_norm": 0.10274741053581238, "learning_rate": 7.964840149960844e-07, "loss": 0.0057, "step": 373170 }, { "epoch": 3.987178802286447, "grad_norm": 11.92505168914795, "learning_rate": 7.964704863400507e-07, "loss": 0.0089, "step": 373180 }, { "epoch": 3.9872856456007266, "grad_norm": 0.027603594586253166, "learning_rate": 7.964569573492792e-07, "loss": 0.0043, "step": 373190 }, { "epoch": 3.987392488915006, "grad_norm": 0.004237608052790165, "learning_rate": 7.964434280237855e-07, "loss": 0.011, "step": 373200 }, { "epoch": 3.9874993322292855, "grad_norm": 0.07023818790912628, "learning_rate": 7.964298983635849e-07, "loss": 0.0063, "step": 373210 }, { "epoch": 3.9876061755435654, "grad_norm": 0.16515220701694489, "learning_rate": 7.964163683686926e-07, "loss": 0.0331, "step": 373220 }, { "epoch": 3.987713018857845, "grad_norm": 0.15616999566555023, "learning_rate": 7.964028380391236e-07, "loss": 0.0178, "step": 373230 }, { "epoch": 3.987819862172125, "grad_norm": 0.4669714868068695, "learning_rate": 7.963893073748936e-07, "loss": 0.0407, "step": 373240 }, { "epoch": 3.9879267054864043, "grad_norm": 7.963461875915527, "learning_rate": 7.963757763760177e-07, "loss": 0.0111, "step": 373250 }, { "epoch": 3.9880335488006837, "grad_norm": 6.594942569732666, "learning_rate": 7.963622450425112e-07, "loss": 0.0075, "step": 373260 }, { "epoch": 3.988140392114963, "grad_norm": 0.013968615792691708, "learning_rate": 7.963487133743891e-07, "loss": 0.0015, "step": 373270 }, { "epoch": 3.988247235429243, "grad_norm": 0.014419890008866787, "learning_rate": 7.963351813716672e-07, "loss": 0.009, "step": 373280 }, { "epoch": 3.9883540787435225, "grad_norm": 0.001149404444731772, "learning_rate": 7.963216490343605e-07, "loss": 0.0508, "step": 373290 }, { "epoch": 3.9884609220578024, "grad_norm": 0.002293242607265711, "learning_rate": 7.963081163624843e-07, "loss": 0.0569, "step": 373300 }, { "epoch": 3.988567765372082, "grad_norm": 3.5271830558776855, "learning_rate": 7.962945833560537e-07, "loss": 0.0072, "step": 373310 }, { "epoch": 3.9886746086863614, "grad_norm": 0.03882487118244171, "learning_rate": 7.962810500150843e-07, "loss": 0.0076, "step": 373320 }, { "epoch": 3.988781452000641, "grad_norm": 8.115560531616211, "learning_rate": 7.962675163395913e-07, "loss": 0.0312, "step": 373330 }, { "epoch": 3.9888882953149207, "grad_norm": 0.3275860846042633, "learning_rate": 7.962539823295898e-07, "loss": 0.0288, "step": 373340 }, { "epoch": 3.9889951386292, "grad_norm": 0.007170557510107756, "learning_rate": 7.962404479850953e-07, "loss": 0.0282, "step": 373350 }, { "epoch": 3.98910198194348, "grad_norm": 0.001830754685215652, "learning_rate": 7.962269133061228e-07, "loss": 0.003, "step": 373360 }, { "epoch": 3.9892088252577595, "grad_norm": 0.5203151702880859, "learning_rate": 7.962133782926878e-07, "loss": 0.0072, "step": 373370 }, { "epoch": 3.989315668572039, "grad_norm": 0.002561242552474141, "learning_rate": 7.961998429448056e-07, "loss": 0.0071, "step": 373380 }, { "epoch": 3.9894225118863185, "grad_norm": 5.11474084854126, "learning_rate": 7.961863072624915e-07, "loss": 0.0064, "step": 373390 }, { "epoch": 3.9895293552005984, "grad_norm": 0.5417771935462952, "learning_rate": 7.961727712457606e-07, "loss": 0.0045, "step": 373400 }, { "epoch": 3.989636198514878, "grad_norm": 0.4883989691734314, "learning_rate": 7.961592348946282e-07, "loss": 0.0075, "step": 373410 }, { "epoch": 3.9897430418291577, "grad_norm": 2.4574499130249023, "learning_rate": 7.961456982091099e-07, "loss": 0.0023, "step": 373420 }, { "epoch": 3.989849885143437, "grad_norm": 0.07255711406469345, "learning_rate": 7.961321611892206e-07, "loss": 0.0081, "step": 373430 }, { "epoch": 3.9899567284577167, "grad_norm": 0.10543420165777206, "learning_rate": 7.961186238349757e-07, "loss": 0.0135, "step": 373440 }, { "epoch": 3.990063571771996, "grad_norm": 0.9046340584754944, "learning_rate": 7.961050861463907e-07, "loss": 0.0111, "step": 373450 }, { "epoch": 3.990170415086276, "grad_norm": 5.330838680267334, "learning_rate": 7.960915481234805e-07, "loss": 0.0205, "step": 373460 }, { "epoch": 3.9902772584005555, "grad_norm": 0.008689317852258682, "learning_rate": 7.960780097662607e-07, "loss": 0.013, "step": 373470 }, { "epoch": 3.9903841017148354, "grad_norm": 0.10007506608963013, "learning_rate": 7.960644710747466e-07, "loss": 0.0054, "step": 373480 }, { "epoch": 3.990490945029115, "grad_norm": 0.10797464847564697, "learning_rate": 7.960509320489532e-07, "loss": 0.0235, "step": 373490 }, { "epoch": 3.9905977883433943, "grad_norm": 0.6592726707458496, "learning_rate": 7.96037392688896e-07, "loss": 0.006, "step": 373500 }, { "epoch": 3.9907046316576738, "grad_norm": 0.14667584002017975, "learning_rate": 7.960238529945902e-07, "loss": 0.0023, "step": 373510 }, { "epoch": 3.9908114749719537, "grad_norm": 14.338623046875, "learning_rate": 7.960103129660511e-07, "loss": 0.0192, "step": 373520 }, { "epoch": 3.990918318286233, "grad_norm": 0.0687936320900917, "learning_rate": 7.959967726032943e-07, "loss": 0.0043, "step": 373530 }, { "epoch": 3.991025161600513, "grad_norm": 1.4027150869369507, "learning_rate": 7.959832319063344e-07, "loss": 0.0061, "step": 373540 }, { "epoch": 3.9911320049147925, "grad_norm": 0.005142421927303076, "learning_rate": 7.959696908751874e-07, "loss": 0.002, "step": 373550 }, { "epoch": 3.991238848229072, "grad_norm": 1.1701648235321045, "learning_rate": 7.959561495098681e-07, "loss": 0.0299, "step": 373560 }, { "epoch": 3.991345691543352, "grad_norm": 0.6653014421463013, "learning_rate": 7.959426078103922e-07, "loss": 0.0028, "step": 373570 }, { "epoch": 3.9914525348576313, "grad_norm": 0.19187913835048676, "learning_rate": 7.959290657767746e-07, "loss": 0.0091, "step": 373580 }, { "epoch": 3.9915593781719108, "grad_norm": 2.8174681663513184, "learning_rate": 7.959155234090307e-07, "loss": 0.0122, "step": 373590 }, { "epoch": 3.9916662214861907, "grad_norm": 0.9824293255805969, "learning_rate": 7.959019807071758e-07, "loss": 0.006, "step": 373600 }, { "epoch": 3.99177306480047, "grad_norm": 0.0004294509708415717, "learning_rate": 7.958884376712254e-07, "loss": 0.0033, "step": 373610 }, { "epoch": 3.9918799081147496, "grad_norm": 3.8195338249206543, "learning_rate": 7.958748943011945e-07, "loss": 0.0117, "step": 373620 }, { "epoch": 3.9919867514290295, "grad_norm": 1.453141212463379, "learning_rate": 7.958613505970985e-07, "loss": 0.0009, "step": 373630 }, { "epoch": 3.992093594743309, "grad_norm": 0.011790020391345024, "learning_rate": 7.958478065589528e-07, "loss": 0.0501, "step": 373640 }, { "epoch": 3.9922004380575884, "grad_norm": 0.08163659274578094, "learning_rate": 7.958342621867726e-07, "loss": 0.06, "step": 373650 }, { "epoch": 3.9923072813718683, "grad_norm": 0.1384180635213852, "learning_rate": 7.958207174805731e-07, "loss": 0.0043, "step": 373660 }, { "epoch": 3.9924141246861478, "grad_norm": 0.08662527799606323, "learning_rate": 7.958071724403696e-07, "loss": 0.0102, "step": 373670 }, { "epoch": 3.992520968000427, "grad_norm": 0.009175445884466171, "learning_rate": 7.957936270661777e-07, "loss": 0.0464, "step": 373680 }, { "epoch": 3.992627811314707, "grad_norm": 0.016937194392085075, "learning_rate": 7.957800813580125e-07, "loss": 0.0084, "step": 373690 }, { "epoch": 3.9927346546289866, "grad_norm": 0.0051607536152005196, "learning_rate": 7.957665353158891e-07, "loss": 0.0104, "step": 373700 }, { "epoch": 3.992841497943266, "grad_norm": 0.5310492515563965, "learning_rate": 7.957529889398229e-07, "loss": 0.0381, "step": 373710 }, { "epoch": 3.992948341257546, "grad_norm": 0.0054750279523432255, "learning_rate": 7.957394422298294e-07, "loss": 0.0052, "step": 373720 }, { "epoch": 3.9930551845718254, "grad_norm": 0.22340279817581177, "learning_rate": 7.957258951859237e-07, "loss": 0.0256, "step": 373730 }, { "epoch": 3.993162027886105, "grad_norm": 0.03136011213064194, "learning_rate": 7.957123478081212e-07, "loss": 0.0112, "step": 373740 }, { "epoch": 3.9932688712003848, "grad_norm": 0.0024671496357768774, "learning_rate": 7.956988000964371e-07, "loss": 0.0027, "step": 373750 }, { "epoch": 3.993375714514664, "grad_norm": 3.565664291381836, "learning_rate": 7.956852520508868e-07, "loss": 0.0151, "step": 373760 }, { "epoch": 3.993482557828944, "grad_norm": 0.7726950645446777, "learning_rate": 7.956717036714853e-07, "loss": 0.018, "step": 373770 }, { "epoch": 3.9935894011432236, "grad_norm": 0.0542936846613884, "learning_rate": 7.956581549582484e-07, "loss": 0.0023, "step": 373780 }, { "epoch": 3.993696244457503, "grad_norm": 2.0108134746551514, "learning_rate": 7.95644605911191e-07, "loss": 0.0025, "step": 373790 }, { "epoch": 3.9938030877717825, "grad_norm": 3.0529379844665527, "learning_rate": 7.956310565303284e-07, "loss": 0.0118, "step": 373800 }, { "epoch": 3.9939099310860624, "grad_norm": 0.10711286962032318, "learning_rate": 7.956175068156763e-07, "loss": 0.0715, "step": 373810 }, { "epoch": 3.994016774400342, "grad_norm": 0.04295147582888603, "learning_rate": 7.956039567672497e-07, "loss": 0.0101, "step": 373820 }, { "epoch": 3.9941236177146218, "grad_norm": 0.10122185200452805, "learning_rate": 7.955904063850637e-07, "loss": 0.0538, "step": 373830 }, { "epoch": 3.9942304610289012, "grad_norm": 0.050686124712228775, "learning_rate": 7.955768556691341e-07, "loss": 0.0103, "step": 373840 }, { "epoch": 3.9943373043431807, "grad_norm": 0.8588396906852722, "learning_rate": 7.955633046194757e-07, "loss": 0.0094, "step": 373850 }, { "epoch": 3.99444414765746, "grad_norm": 0.0031359747517853975, "learning_rate": 7.95549753236104e-07, "loss": 0.0103, "step": 373860 }, { "epoch": 3.99455099097174, "grad_norm": 0.031812265515327454, "learning_rate": 7.955362015190343e-07, "loss": 0.0177, "step": 373870 }, { "epoch": 3.9946578342860195, "grad_norm": 0.00266416952945292, "learning_rate": 7.955226494682821e-07, "loss": 0.0114, "step": 373880 }, { "epoch": 3.9947646776002994, "grad_norm": 0.8339290618896484, "learning_rate": 7.955090970838625e-07, "loss": 0.0088, "step": 373890 }, { "epoch": 3.994871520914579, "grad_norm": 4.743508338928223, "learning_rate": 7.954955443657907e-07, "loss": 0.0148, "step": 373900 }, { "epoch": 3.9949783642288583, "grad_norm": 0.05588427558541298, "learning_rate": 7.954819913140822e-07, "loss": 0.0026, "step": 373910 }, { "epoch": 3.995085207543138, "grad_norm": 0.024534922093153, "learning_rate": 7.954684379287523e-07, "loss": 0.0018, "step": 373920 }, { "epoch": 3.9951920508574177, "grad_norm": 5.329537391662598, "learning_rate": 7.954548842098162e-07, "loss": 0.0123, "step": 373930 }, { "epoch": 3.995298894171697, "grad_norm": 0.00627232575789094, "learning_rate": 7.954413301572891e-07, "loss": 0.0064, "step": 373940 }, { "epoch": 3.995405737485977, "grad_norm": 0.192128986120224, "learning_rate": 7.954277757711867e-07, "loss": 0.0066, "step": 373950 }, { "epoch": 3.9955125808002565, "grad_norm": 0.0051614814437925816, "learning_rate": 7.954142210515238e-07, "loss": 0.0232, "step": 373960 }, { "epoch": 3.995619424114536, "grad_norm": 0.08937592804431915, "learning_rate": 7.954006659983161e-07, "loss": 0.0052, "step": 373970 }, { "epoch": 3.9957262674288154, "grad_norm": 0.3321145474910736, "learning_rate": 7.953871106115787e-07, "loss": 0.0204, "step": 373980 }, { "epoch": 3.9958331107430953, "grad_norm": 2.069509506225586, "learning_rate": 7.95373554891327e-07, "loss": 0.0208, "step": 373990 }, { "epoch": 3.995939954057375, "grad_norm": 0.0013433982385322452, "learning_rate": 7.953599988375761e-07, "loss": 0.0074, "step": 374000 }, { "epoch": 3.9960467973716547, "grad_norm": 0.015535199083387852, "learning_rate": 7.953464424503416e-07, "loss": 0.0147, "step": 374010 }, { "epoch": 3.996153640685934, "grad_norm": 0.0013948326231911778, "learning_rate": 7.953328857296387e-07, "loss": 0.0077, "step": 374020 }, { "epoch": 3.9962604840002136, "grad_norm": 0.8048210740089417, "learning_rate": 7.953193286754827e-07, "loss": 0.0057, "step": 374030 }, { "epoch": 3.996367327314493, "grad_norm": 0.11480043828487396, "learning_rate": 7.953057712878888e-07, "loss": 0.044, "step": 374040 }, { "epoch": 3.996474170628773, "grad_norm": 0.005090615246444941, "learning_rate": 7.952922135668725e-07, "loss": 0.0309, "step": 374050 }, { "epoch": 3.9965810139430524, "grad_norm": 2.2944142818450928, "learning_rate": 7.952786555124489e-07, "loss": 0.0167, "step": 374060 }, { "epoch": 3.9966878572573323, "grad_norm": 0.4140961766242981, "learning_rate": 7.952650971246334e-07, "loss": 0.0091, "step": 374070 }, { "epoch": 3.996794700571612, "grad_norm": 0.004151614382863045, "learning_rate": 7.952515384034414e-07, "loss": 0.0029, "step": 374080 }, { "epoch": 3.9969015438858913, "grad_norm": 0.03666253760457039, "learning_rate": 7.95237979348888e-07, "loss": 0.0196, "step": 374090 }, { "epoch": 3.9970083872001707, "grad_norm": 0.5014813542366028, "learning_rate": 7.952244199609887e-07, "loss": 0.0143, "step": 374100 }, { "epoch": 3.9971152305144506, "grad_norm": 0.012791893444955349, "learning_rate": 7.95210860239759e-07, "loss": 0.0246, "step": 374110 }, { "epoch": 3.99722207382873, "grad_norm": 1.649613857269287, "learning_rate": 7.951973001852137e-07, "loss": 0.0162, "step": 374120 }, { "epoch": 3.99732891714301, "grad_norm": 0.13388141989707947, "learning_rate": 7.951837397973685e-07, "loss": 0.0042, "step": 374130 }, { "epoch": 3.9974357604572894, "grad_norm": 0.004950272850692272, "learning_rate": 7.951701790762384e-07, "loss": 0.0048, "step": 374140 }, { "epoch": 3.997542603771569, "grad_norm": 0.005869624204933643, "learning_rate": 7.951566180218389e-07, "loss": 0.0027, "step": 374150 }, { "epoch": 3.9976494470858484, "grad_norm": 1.9669867753982544, "learning_rate": 7.951430566341855e-07, "loss": 0.0284, "step": 374160 }, { "epoch": 3.9977562904001283, "grad_norm": 0.1426844447851181, "learning_rate": 7.951294949132931e-07, "loss": 0.0084, "step": 374170 }, { "epoch": 3.9978631337144077, "grad_norm": 0.009902242571115494, "learning_rate": 7.951159328591773e-07, "loss": 0.0216, "step": 374180 }, { "epoch": 3.9979699770286876, "grad_norm": 1.088070273399353, "learning_rate": 7.951023704718534e-07, "loss": 0.0277, "step": 374190 }, { "epoch": 3.998076820342967, "grad_norm": 0.07207179814577103, "learning_rate": 7.950888077513367e-07, "loss": 0.0016, "step": 374200 }, { "epoch": 3.9981836636572465, "grad_norm": 3.0637972354888916, "learning_rate": 7.950752446976423e-07, "loss": 0.0278, "step": 374210 }, { "epoch": 3.998290506971526, "grad_norm": 9.924962997436523, "learning_rate": 7.950616813107857e-07, "loss": 0.043, "step": 374220 }, { "epoch": 3.998397350285806, "grad_norm": 0.017956526950001717, "learning_rate": 7.950481175907823e-07, "loss": 0.0037, "step": 374230 }, { "epoch": 3.9985041936000854, "grad_norm": 22.52008056640625, "learning_rate": 7.950345535376473e-07, "loss": 0.034, "step": 374240 }, { "epoch": 3.9986110369143653, "grad_norm": 0.11095909774303436, "learning_rate": 7.950209891513959e-07, "loss": 0.0177, "step": 374250 }, { "epoch": 3.9987178802286447, "grad_norm": 0.004637781996279955, "learning_rate": 7.950074244320436e-07, "loss": 0.017, "step": 374260 }, { "epoch": 3.998824723542924, "grad_norm": 0.6634984016418457, "learning_rate": 7.949938593796057e-07, "loss": 0.0005, "step": 374270 }, { "epoch": 3.9989315668572036, "grad_norm": 0.0009990440448746085, "learning_rate": 7.949802939940973e-07, "loss": 0.0123, "step": 374280 }, { "epoch": 3.9990384101714835, "grad_norm": 0.0019430328393355012, "learning_rate": 7.949667282755341e-07, "loss": 0.0527, "step": 374290 }, { "epoch": 3.999145253485763, "grad_norm": 8.07763385772705, "learning_rate": 7.949531622239312e-07, "loss": 0.0061, "step": 374300 }, { "epoch": 3.999252096800043, "grad_norm": 0.010086324997246265, "learning_rate": 7.949395958393038e-07, "loss": 0.0156, "step": 374310 }, { "epoch": 3.9993589401143224, "grad_norm": 1.9656156301498413, "learning_rate": 7.949260291216674e-07, "loss": 0.0073, "step": 374320 }, { "epoch": 3.999465783428602, "grad_norm": 0.08855067938566208, "learning_rate": 7.949124620710373e-07, "loss": 0.012, "step": 374330 }, { "epoch": 3.9995726267428817, "grad_norm": 0.005329275503754616, "learning_rate": 7.948988946874287e-07, "loss": 0.0127, "step": 374340 }, { "epoch": 3.999679470057161, "grad_norm": 0.9252305030822754, "learning_rate": 7.94885326970857e-07, "loss": 0.005, "step": 374350 }, { "epoch": 3.9997863133714406, "grad_norm": 0.012477869167923927, "learning_rate": 7.948717589213374e-07, "loss": 0.0015, "step": 374360 }, { "epoch": 3.9998931566857205, "grad_norm": 0.13893933594226837, "learning_rate": 7.948581905388855e-07, "loss": 0.0027, "step": 374370 }, { "epoch": 4.0, "grad_norm": 0.038052842020988464, "learning_rate": 7.948446218235163e-07, "loss": 0.0348, "step": 374380 }, { "epoch": 4.0, "eval_accuracy": 0.7743296173546249, "eval_cer": 0.03425901385085058, "eval_loss": 0.02973778359591961, "eval_runtime": 14300.2282, "eval_samples_per_second": 0.696, "eval_steps_per_second": 0.348, "eval_wer": 0.08993566033630684, "step": 374380 }, { "epoch": 4.0001068433142795, "grad_norm": 0.0730554461479187, "learning_rate": 7.948310527752454e-07, "loss": 0.0086, "step": 374390 }, { "epoch": 4.000213686628559, "grad_norm": 0.3486537039279938, "learning_rate": 7.948174833940879e-07, "loss": 0.0037, "step": 374400 }, { "epoch": 4.000320529942839, "grad_norm": 0.20222227275371552, "learning_rate": 7.948039136800593e-07, "loss": 0.0106, "step": 374410 }, { "epoch": 4.000427373257119, "grad_norm": 6.386549949645996, "learning_rate": 7.947903436331749e-07, "loss": 0.0482, "step": 374420 }, { "epoch": 4.000534216571398, "grad_norm": 0.18067413568496704, "learning_rate": 7.947767732534497e-07, "loss": 0.0055, "step": 374430 }, { "epoch": 4.000641059885678, "grad_norm": 0.0013625947758555412, "learning_rate": 7.947632025408995e-07, "loss": 0.0183, "step": 374440 }, { "epoch": 4.000747903199957, "grad_norm": 0.4549087882041931, "learning_rate": 7.947496314955393e-07, "loss": 0.0024, "step": 374450 }, { "epoch": 4.000854746514237, "grad_norm": 0.006542051676660776, "learning_rate": 7.947360601173845e-07, "loss": 0.001, "step": 374460 }, { "epoch": 4.000961589828517, "grad_norm": 4.198208808898926, "learning_rate": 7.947224884064505e-07, "loss": 0.0048, "step": 374470 }, { "epoch": 4.001068433142796, "grad_norm": 0.0022398096043616533, "learning_rate": 7.947089163627525e-07, "loss": 0.0457, "step": 374480 }, { "epoch": 4.001175276457076, "grad_norm": 3.759201765060425, "learning_rate": 7.946953439863058e-07, "loss": 0.0103, "step": 374490 }, { "epoch": 4.001282119771355, "grad_norm": Infinity, "learning_rate": 7.946817712771261e-07, "loss": 0.0118, "step": 374500 }, { "epoch": 4.001388963085635, "grad_norm": 3.0727198123931885, "learning_rate": 7.946681982352283e-07, "loss": 0.0079, "step": 374510 }, { "epoch": 4.001495806399914, "grad_norm": 0.0174302626401186, "learning_rate": 7.946546248606279e-07, "loss": 0.0123, "step": 374520 }, { "epoch": 4.0016026497141945, "grad_norm": 0.14578339457511902, "learning_rate": 7.9464105115334e-07, "loss": 0.0013, "step": 374530 }, { "epoch": 4.001709493028474, "grad_norm": 1.4481539726257324, "learning_rate": 7.946274771133803e-07, "loss": 0.0021, "step": 374540 }, { "epoch": 4.0018163363427535, "grad_norm": 0.8034619092941284, "learning_rate": 7.946139027407639e-07, "loss": 0.0027, "step": 374550 }, { "epoch": 4.001923179657033, "grad_norm": 0.1574598103761673, "learning_rate": 7.946003280355061e-07, "loss": 0.0071, "step": 374560 }, { "epoch": 4.002030022971312, "grad_norm": 1.5296692848205566, "learning_rate": 7.945867529976224e-07, "loss": 0.0093, "step": 374570 }, { "epoch": 4.002136866285592, "grad_norm": 0.5366368293762207, "learning_rate": 7.94573177627128e-07, "loss": 0.0048, "step": 374580 }, { "epoch": 4.002243709599872, "grad_norm": 3.7953784465789795, "learning_rate": 7.945596019240383e-07, "loss": 0.0037, "step": 374590 }, { "epoch": 4.002350552914152, "grad_norm": 6.5740580558776855, "learning_rate": 7.945460258883684e-07, "loss": 0.0222, "step": 374600 }, { "epoch": 4.002457396228431, "grad_norm": 0.4109375774860382, "learning_rate": 7.94532449520134e-07, "loss": 0.0033, "step": 374610 }, { "epoch": 4.002564239542711, "grad_norm": 0.05098872259259224, "learning_rate": 7.945188728193501e-07, "loss": 0.0127, "step": 374620 }, { "epoch": 4.00267108285699, "grad_norm": 0.015151466242969036, "learning_rate": 7.945052957860322e-07, "loss": 0.0043, "step": 374630 }, { "epoch": 4.0027779261712695, "grad_norm": 0.12581989169120789, "learning_rate": 7.944917184201956e-07, "loss": 0.0089, "step": 374640 }, { "epoch": 4.00288476948555, "grad_norm": 0.008281568065285683, "learning_rate": 7.944781407218558e-07, "loss": 0.0046, "step": 374650 }, { "epoch": 4.002991612799829, "grad_norm": 0.005393090192228556, "learning_rate": 7.944645626910275e-07, "loss": 0.0092, "step": 374660 }, { "epoch": 4.003098456114109, "grad_norm": 0.0375700518488884, "learning_rate": 7.944509843277269e-07, "loss": 0.005, "step": 374670 }, { "epoch": 4.003205299428388, "grad_norm": 0.0037193852476775646, "learning_rate": 7.944374056319687e-07, "loss": 0.0004, "step": 374680 }, { "epoch": 4.003312142742668, "grad_norm": 0.00040835756226442754, "learning_rate": 7.944238266037686e-07, "loss": 0.0065, "step": 374690 }, { "epoch": 4.003418986056947, "grad_norm": 0.007609810680150986, "learning_rate": 7.944102472431417e-07, "loss": 0.0207, "step": 374700 }, { "epoch": 4.0035258293712275, "grad_norm": 0.11062272638082504, "learning_rate": 7.943966675501035e-07, "loss": 0.0002, "step": 374710 }, { "epoch": 4.003632672685507, "grad_norm": 0.023929258808493614, "learning_rate": 7.943830875246691e-07, "loss": 0.0061, "step": 374720 }, { "epoch": 4.003739515999786, "grad_norm": 7.141645908355713, "learning_rate": 7.943695071668542e-07, "loss": 0.0416, "step": 374730 }, { "epoch": 4.003846359314066, "grad_norm": 0.002946204971522093, "learning_rate": 7.943559264766736e-07, "loss": 0.0063, "step": 374740 }, { "epoch": 4.003953202628345, "grad_norm": 0.013558580540120602, "learning_rate": 7.943423454541431e-07, "loss": 0.0019, "step": 374750 }, { "epoch": 4.004060045942625, "grad_norm": 0.019723324105143547, "learning_rate": 7.94328764099278e-07, "loss": 0.0077, "step": 374760 }, { "epoch": 4.004166889256905, "grad_norm": 0.000833990634419024, "learning_rate": 7.943151824120933e-07, "loss": 0.0013, "step": 374770 }, { "epoch": 4.004273732571185, "grad_norm": 0.0028435641434043646, "learning_rate": 7.943016003926046e-07, "loss": 0.002, "step": 374780 }, { "epoch": 4.004380575885464, "grad_norm": 0.0008751782588660717, "learning_rate": 7.942880180408273e-07, "loss": 0.0111, "step": 374790 }, { "epoch": 4.0044874191997435, "grad_norm": 0.002798038301989436, "learning_rate": 7.942744353567765e-07, "loss": 0.0026, "step": 374800 }, { "epoch": 4.004594262514023, "grad_norm": 0.16135799884796143, "learning_rate": 7.942608523404677e-07, "loss": 0.001, "step": 374810 }, { "epoch": 4.004701105828302, "grad_norm": 0.005516712088137865, "learning_rate": 7.942472689919162e-07, "loss": 0.0134, "step": 374820 }, { "epoch": 4.004807949142583, "grad_norm": 0.24108123779296875, "learning_rate": 7.942336853111372e-07, "loss": 0.0012, "step": 374830 }, { "epoch": 4.004914792456862, "grad_norm": 0.20045188069343567, "learning_rate": 7.942201012981462e-07, "loss": 0.0076, "step": 374840 }, { "epoch": 4.005021635771142, "grad_norm": 0.14334352314472198, "learning_rate": 7.942065169529586e-07, "loss": 0.006, "step": 374850 }, { "epoch": 4.005128479085421, "grad_norm": 5.5439372062683105, "learning_rate": 7.941929322755896e-07, "loss": 0.0092, "step": 374860 }, { "epoch": 4.005235322399701, "grad_norm": 0.005920218303799629, "learning_rate": 7.941793472660545e-07, "loss": 0.0177, "step": 374870 }, { "epoch": 4.00534216571398, "grad_norm": 0.02944641374051571, "learning_rate": 7.941657619243688e-07, "loss": 0.0195, "step": 374880 }, { "epoch": 4.00544900902826, "grad_norm": 3.213853359222412, "learning_rate": 7.941521762505478e-07, "loss": 0.0118, "step": 374890 }, { "epoch": 4.00555585234254, "grad_norm": 0.008466443046927452, "learning_rate": 7.941385902446068e-07, "loss": 0.0079, "step": 374900 }, { "epoch": 4.005662695656819, "grad_norm": 0.27532362937927246, "learning_rate": 7.941250039065611e-07, "loss": 0.0009, "step": 374910 }, { "epoch": 4.005769538971099, "grad_norm": 0.007275200914591551, "learning_rate": 7.941114172364259e-07, "loss": 0.0228, "step": 374920 }, { "epoch": 4.005876382285378, "grad_norm": 0.031927142292261124, "learning_rate": 7.940978302342169e-07, "loss": 0.0005, "step": 374930 }, { "epoch": 4.005983225599658, "grad_norm": 0.012549172155559063, "learning_rate": 7.940842428999492e-07, "loss": 0.0027, "step": 374940 }, { "epoch": 4.006090068913938, "grad_norm": 0.002732782159000635, "learning_rate": 7.940706552336382e-07, "loss": 0.0083, "step": 374950 }, { "epoch": 4.0061969122282175, "grad_norm": 5.980823993682861, "learning_rate": 7.940570672352994e-07, "loss": 0.0262, "step": 374960 }, { "epoch": 4.006303755542497, "grad_norm": 2.6871020793914795, "learning_rate": 7.940434789049476e-07, "loss": 0.001, "step": 374970 }, { "epoch": 4.006410598856776, "grad_norm": 0.15691380202770233, "learning_rate": 7.940298902425989e-07, "loss": 0.0061, "step": 374980 }, { "epoch": 4.006517442171056, "grad_norm": 0.011339577846229076, "learning_rate": 7.940163012482679e-07, "loss": 0.003, "step": 374990 }, { "epoch": 4.006624285485335, "grad_norm": 0.021359838545322418, "learning_rate": 7.940027119219705e-07, "loss": 0.0137, "step": 375000 }, { "epoch": 4.006731128799616, "grad_norm": 0.05073552578687668, "learning_rate": 7.939891222637219e-07, "loss": 0.0122, "step": 375010 }, { "epoch": 4.006837972113895, "grad_norm": 0.012246057391166687, "learning_rate": 7.939755322735373e-07, "loss": 0.0042, "step": 375020 }, { "epoch": 4.006944815428175, "grad_norm": 0.0009141643531620502, "learning_rate": 7.93961941951432e-07, "loss": 0.0041, "step": 375030 }, { "epoch": 4.007051658742454, "grad_norm": 0.014514957554638386, "learning_rate": 7.939483512974218e-07, "loss": 0.0002, "step": 375040 }, { "epoch": 4.0071585020567335, "grad_norm": 0.024481529369950294, "learning_rate": 7.939347603115215e-07, "loss": 0.0051, "step": 375050 }, { "epoch": 4.007265345371013, "grad_norm": 0.05553179606795311, "learning_rate": 7.939211689937468e-07, "loss": 0.019, "step": 375060 }, { "epoch": 4.007372188685293, "grad_norm": 0.010198624804615974, "learning_rate": 7.939075773441128e-07, "loss": 0.0016, "step": 375070 }, { "epoch": 4.007479031999573, "grad_norm": 0.25146475434303284, "learning_rate": 7.938939853626351e-07, "loss": 0.0124, "step": 375080 }, { "epoch": 4.007585875313852, "grad_norm": 0.01241283304989338, "learning_rate": 7.938803930493288e-07, "loss": 0.0006, "step": 375090 }, { "epoch": 4.007692718628132, "grad_norm": 0.765642523765564, "learning_rate": 7.938668004042095e-07, "loss": 0.0083, "step": 375100 }, { "epoch": 4.007799561942411, "grad_norm": 0.006310704164206982, "learning_rate": 7.938532074272922e-07, "loss": 0.0057, "step": 375110 }, { "epoch": 4.007906405256691, "grad_norm": 0.17216439545154572, "learning_rate": 7.938396141185926e-07, "loss": 0.0034, "step": 375120 }, { "epoch": 4.008013248570971, "grad_norm": 0.5202274918556213, "learning_rate": 7.938260204781259e-07, "loss": 0.0068, "step": 375130 }, { "epoch": 4.00812009188525, "grad_norm": 0.4202207624912262, "learning_rate": 7.938124265059074e-07, "loss": 0.0005, "step": 375140 }, { "epoch": 4.00822693519953, "grad_norm": 0.08793794363737106, "learning_rate": 7.937988322019525e-07, "loss": 0.0077, "step": 375150 }, { "epoch": 4.008333778513809, "grad_norm": 0.024101734161376953, "learning_rate": 7.937852375662766e-07, "loss": 0.028, "step": 375160 }, { "epoch": 4.008440621828089, "grad_norm": 0.11993944644927979, "learning_rate": 7.937716425988948e-07, "loss": 0.0621, "step": 375170 }, { "epoch": 4.008547465142369, "grad_norm": 2.8847365379333496, "learning_rate": 7.937580472998228e-07, "loss": 0.0106, "step": 375180 }, { "epoch": 4.008654308456649, "grad_norm": 0.27029097080230713, "learning_rate": 7.937444516690758e-07, "loss": 0.0127, "step": 375190 }, { "epoch": 4.008761151770928, "grad_norm": 0.003665460040792823, "learning_rate": 7.937308557066691e-07, "loss": 0.0096, "step": 375200 }, { "epoch": 4.0088679950852075, "grad_norm": 0.010199983604252338, "learning_rate": 7.937172594126181e-07, "loss": 0.0076, "step": 375210 }, { "epoch": 4.008974838399487, "grad_norm": 0.00201598578132689, "learning_rate": 7.937036627869383e-07, "loss": 0.025, "step": 375220 }, { "epoch": 4.009081681713766, "grad_norm": 0.00191047927364707, "learning_rate": 7.936900658296446e-07, "loss": 0.032, "step": 375230 }, { "epoch": 4.009188525028047, "grad_norm": 0.09519877284765244, "learning_rate": 7.936764685407529e-07, "loss": 0.0369, "step": 375240 }, { "epoch": 4.009295368342326, "grad_norm": 1.8520091772079468, "learning_rate": 7.93662870920278e-07, "loss": 0.0075, "step": 375250 }, { "epoch": 4.009402211656606, "grad_norm": 0.005031113978475332, "learning_rate": 7.936492729682358e-07, "loss": 0.027, "step": 375260 }, { "epoch": 4.009509054970885, "grad_norm": 0.010672167874872684, "learning_rate": 7.936356746846414e-07, "loss": 0.0062, "step": 375270 }, { "epoch": 4.009615898285165, "grad_norm": 0.1632751226425171, "learning_rate": 7.9362207606951e-07, "loss": 0.0029, "step": 375280 }, { "epoch": 4.009722741599444, "grad_norm": 2.886247396469116, "learning_rate": 7.936084771228574e-07, "loss": 0.0097, "step": 375290 }, { "epoch": 4.009829584913724, "grad_norm": 0.015582781285047531, "learning_rate": 7.935948778446985e-07, "loss": 0.0013, "step": 375300 }, { "epoch": 4.009936428228004, "grad_norm": 0.02052474394440651, "learning_rate": 7.935812782350489e-07, "loss": 0.0036, "step": 375310 }, { "epoch": 4.010043271542283, "grad_norm": 0.007618271745741367, "learning_rate": 7.935676782939238e-07, "loss": 0.0013, "step": 375320 }, { "epoch": 4.010150114856563, "grad_norm": 0.22895768284797668, "learning_rate": 7.935540780213385e-07, "loss": 0.0073, "step": 375330 }, { "epoch": 4.010256958170842, "grad_norm": 11.505072593688965, "learning_rate": 7.935404774173087e-07, "loss": 0.0141, "step": 375340 }, { "epoch": 4.010363801485122, "grad_norm": 0.005049422848969698, "learning_rate": 7.935268764818495e-07, "loss": 0.0027, "step": 375350 }, { "epoch": 4.010470644799402, "grad_norm": 9.185153007507324, "learning_rate": 7.935132752149763e-07, "loss": 0.0156, "step": 375360 }, { "epoch": 4.0105774881136815, "grad_norm": 0.012099497951567173, "learning_rate": 7.934996736167044e-07, "loss": 0.0061, "step": 375370 }, { "epoch": 4.010684331427961, "grad_norm": 0.0449981652200222, "learning_rate": 7.934860716870493e-07, "loss": 0.0251, "step": 375380 }, { "epoch": 4.0107911747422405, "grad_norm": 0.2590709924697876, "learning_rate": 7.934724694260263e-07, "loss": 0.0021, "step": 375390 }, { "epoch": 4.01089801805652, "grad_norm": 4.492883205413818, "learning_rate": 7.934588668336506e-07, "loss": 0.0113, "step": 375400 }, { "epoch": 4.011004861370799, "grad_norm": 0.3028146028518677, "learning_rate": 7.934452639099378e-07, "loss": 0.0084, "step": 375410 }, { "epoch": 4.01111170468508, "grad_norm": 0.17457211017608643, "learning_rate": 7.934316606549032e-07, "loss": 0.0066, "step": 375420 }, { "epoch": 4.011218547999359, "grad_norm": 0.8501081466674805, "learning_rate": 7.93418057068562e-07, "loss": 0.0029, "step": 375430 }, { "epoch": 4.011325391313639, "grad_norm": 2.7935125827789307, "learning_rate": 7.934044531509297e-07, "loss": 0.0024, "step": 375440 }, { "epoch": 4.011432234627918, "grad_norm": 0.028892356902360916, "learning_rate": 7.933908489020215e-07, "loss": 0.0125, "step": 375450 }, { "epoch": 4.0115390779421976, "grad_norm": 8.995394706726074, "learning_rate": 7.933772443218532e-07, "loss": 0.0175, "step": 375460 }, { "epoch": 4.011645921256477, "grad_norm": 3.7492167949676514, "learning_rate": 7.933636394104395e-07, "loss": 0.002, "step": 375470 }, { "epoch": 4.011752764570757, "grad_norm": 0.12985444068908691, "learning_rate": 7.933500341677963e-07, "loss": 0.0011, "step": 375480 }, { "epoch": 4.011859607885037, "grad_norm": 0.005599305033683777, "learning_rate": 7.933364285939388e-07, "loss": 0.0412, "step": 375490 }, { "epoch": 4.011966451199316, "grad_norm": 0.5884566307067871, "learning_rate": 7.933228226888821e-07, "loss": 0.0097, "step": 375500 }, { "epoch": 4.012073294513596, "grad_norm": 2.269155740737915, "learning_rate": 7.933092164526421e-07, "loss": 0.0277, "step": 375510 }, { "epoch": 4.012180137827875, "grad_norm": 0.010772032663226128, "learning_rate": 7.932956098852337e-07, "loss": 0.0084, "step": 375520 }, { "epoch": 4.012286981142155, "grad_norm": 0.04960671439766884, "learning_rate": 7.932820029866725e-07, "loss": 0.0087, "step": 375530 }, { "epoch": 4.012393824456435, "grad_norm": 0.03170328587293625, "learning_rate": 7.932683957569737e-07, "loss": 0.0045, "step": 375540 }, { "epoch": 4.0125006677707145, "grad_norm": 18.338960647583008, "learning_rate": 7.932547881961528e-07, "loss": 0.0408, "step": 375550 }, { "epoch": 4.012607511084994, "grad_norm": 7.864830493927002, "learning_rate": 7.932411803042252e-07, "loss": 0.0089, "step": 375560 }, { "epoch": 4.012714354399273, "grad_norm": 0.13369220495224, "learning_rate": 7.932275720812058e-07, "loss": 0.0074, "step": 375570 }, { "epoch": 4.012821197713553, "grad_norm": 0.03749078884720802, "learning_rate": 7.932139635271106e-07, "loss": 0.0023, "step": 375580 }, { "epoch": 4.012928041027832, "grad_norm": 0.22991539537906647, "learning_rate": 7.932003546419548e-07, "loss": 0.0135, "step": 375590 }, { "epoch": 4.013034884342113, "grad_norm": 0.01745104044675827, "learning_rate": 7.931867454257536e-07, "loss": 0.0069, "step": 375600 }, { "epoch": 4.013141727656392, "grad_norm": 0.001080158050172031, "learning_rate": 7.931731358785224e-07, "loss": 0.0027, "step": 375610 }, { "epoch": 4.013248570970672, "grad_norm": 3.0074172019958496, "learning_rate": 7.931595260002766e-07, "loss": 0.0207, "step": 375620 }, { "epoch": 4.013355414284951, "grad_norm": 2.0183143615722656, "learning_rate": 7.931459157910317e-07, "loss": 0.0187, "step": 375630 }, { "epoch": 4.0134622575992305, "grad_norm": 1.6255278587341309, "learning_rate": 7.931323052508028e-07, "loss": 0.0861, "step": 375640 }, { "epoch": 4.01356910091351, "grad_norm": 0.04084915295243263, "learning_rate": 7.931186943796054e-07, "loss": 0.0024, "step": 375650 }, { "epoch": 4.01367594422779, "grad_norm": 0.01710505411028862, "learning_rate": 7.931050831774549e-07, "loss": 0.0174, "step": 375660 }, { "epoch": 4.01378278754207, "grad_norm": 0.3125007152557373, "learning_rate": 7.930914716443666e-07, "loss": 0.0051, "step": 375670 }, { "epoch": 4.013889630856349, "grad_norm": 0.824171245098114, "learning_rate": 7.930778597803559e-07, "loss": 0.0112, "step": 375680 }, { "epoch": 4.013996474170629, "grad_norm": 0.2910427153110504, "learning_rate": 7.930642475854383e-07, "loss": 0.0027, "step": 375690 }, { "epoch": 4.014103317484908, "grad_norm": 1.5145957469940186, "learning_rate": 7.930506350596288e-07, "loss": 0.0018, "step": 375700 }, { "epoch": 4.014210160799188, "grad_norm": 3.1304373741149902, "learning_rate": 7.930370222029432e-07, "loss": 0.0015, "step": 375710 }, { "epoch": 4.014317004113468, "grad_norm": 0.1435748189687729, "learning_rate": 7.930234090153968e-07, "loss": 0.0111, "step": 375720 }, { "epoch": 4.014423847427747, "grad_norm": 5.934023380279541, "learning_rate": 7.930097954970048e-07, "loss": 0.0075, "step": 375730 }, { "epoch": 4.014530690742027, "grad_norm": 0.6917917132377625, "learning_rate": 7.929961816477825e-07, "loss": 0.0009, "step": 375740 }, { "epoch": 4.014637534056306, "grad_norm": 0.001128568546846509, "learning_rate": 7.929825674677455e-07, "loss": 0.0072, "step": 375750 }, { "epoch": 4.014744377370586, "grad_norm": 7.589812278747559, "learning_rate": 7.92968952956909e-07, "loss": 0.0084, "step": 375760 }, { "epoch": 4.014851220684865, "grad_norm": 0.03141843155026436, "learning_rate": 7.929553381152884e-07, "loss": 0.0046, "step": 375770 }, { "epoch": 4.014958063999146, "grad_norm": 10.983332633972168, "learning_rate": 7.929417229428992e-07, "loss": 0.0474, "step": 375780 }, { "epoch": 4.015064907313425, "grad_norm": 0.023973096162080765, "learning_rate": 7.929281074397567e-07, "loss": 0.0006, "step": 375790 }, { "epoch": 4.0151717506277045, "grad_norm": 0.38254857063293457, "learning_rate": 7.929144916058761e-07, "loss": 0.0119, "step": 375800 }, { "epoch": 4.015278593941984, "grad_norm": 0.02212001010775566, "learning_rate": 7.929008754412732e-07, "loss": 0.0023, "step": 375810 }, { "epoch": 4.015385437256263, "grad_norm": 0.08830282837152481, "learning_rate": 7.928872589459629e-07, "loss": 0.0267, "step": 375820 }, { "epoch": 4.015492280570543, "grad_norm": 0.9693297743797302, "learning_rate": 7.928736421199608e-07, "loss": 0.008, "step": 375830 }, { "epoch": 4.015599123884823, "grad_norm": 0.057602476328611374, "learning_rate": 7.928600249632823e-07, "loss": 0.0143, "step": 375840 }, { "epoch": 4.015705967199103, "grad_norm": 0.8175355195999146, "learning_rate": 7.928464074759427e-07, "loss": 0.0012, "step": 375850 }, { "epoch": 4.015812810513382, "grad_norm": 0.14702393114566803, "learning_rate": 7.928327896579574e-07, "loss": 0.0005, "step": 375860 }, { "epoch": 4.015919653827662, "grad_norm": 1.9811769723892212, "learning_rate": 7.928191715093417e-07, "loss": 0.0023, "step": 375870 }, { "epoch": 4.016026497141941, "grad_norm": 0.27737781405448914, "learning_rate": 7.928055530301113e-07, "loss": 0.0094, "step": 375880 }, { "epoch": 4.016133340456221, "grad_norm": 0.007182569708675146, "learning_rate": 7.927919342202811e-07, "loss": 0.0101, "step": 375890 }, { "epoch": 4.016240183770501, "grad_norm": 7.869556427001953, "learning_rate": 7.927783150798668e-07, "loss": 0.0049, "step": 375900 }, { "epoch": 4.01634702708478, "grad_norm": 12.009636878967285, "learning_rate": 7.927646956088837e-07, "loss": 0.0132, "step": 375910 }, { "epoch": 4.01645387039906, "grad_norm": 0.8204431533813477, "learning_rate": 7.927510758073473e-07, "loss": 0.0091, "step": 375920 }, { "epoch": 4.016560713713339, "grad_norm": 0.37374845147132874, "learning_rate": 7.927374556752726e-07, "loss": 0.0181, "step": 375930 }, { "epoch": 4.016667557027619, "grad_norm": 0.2324793040752411, "learning_rate": 7.927238352126753e-07, "loss": 0.0003, "step": 375940 }, { "epoch": 4.016774400341899, "grad_norm": 0.0052266507409513, "learning_rate": 7.927102144195706e-07, "loss": 0.0113, "step": 375950 }, { "epoch": 4.0168812436561785, "grad_norm": 0.0010605866555124521, "learning_rate": 7.926965932959741e-07, "loss": 0.0109, "step": 375960 }, { "epoch": 4.016988086970458, "grad_norm": 0.0009138078894466162, "learning_rate": 7.92682971841901e-07, "loss": 0.002, "step": 375970 }, { "epoch": 4.017094930284737, "grad_norm": 2.76811146736145, "learning_rate": 7.926693500573668e-07, "loss": 0.0124, "step": 375980 }, { "epoch": 4.017201773599017, "grad_norm": 0.004091734066605568, "learning_rate": 7.926557279423869e-07, "loss": 0.0297, "step": 375990 }, { "epoch": 4.017308616913296, "grad_norm": 0.004940886050462723, "learning_rate": 7.926421054969762e-07, "loss": 0.0087, "step": 376000 }, { "epoch": 4.017415460227577, "grad_norm": 0.005411992780864239, "learning_rate": 7.926284827211509e-07, "loss": 0.0163, "step": 376010 }, { "epoch": 4.017522303541856, "grad_norm": 0.003959535155445337, "learning_rate": 7.926148596149259e-07, "loss": 0.0104, "step": 376020 }, { "epoch": 4.017629146856136, "grad_norm": 0.48157116770744324, "learning_rate": 7.926012361783165e-07, "loss": 0.0053, "step": 376030 }, { "epoch": 4.017735990170415, "grad_norm": 1.0937912464141846, "learning_rate": 7.925876124113382e-07, "loss": 0.0029, "step": 376040 }, { "epoch": 4.0178428334846945, "grad_norm": 0.015872064977884293, "learning_rate": 7.925739883140065e-07, "loss": 0.0054, "step": 376050 }, { "epoch": 4.017949676798974, "grad_norm": 0.7021071314811707, "learning_rate": 7.925603638863367e-07, "loss": 0.0105, "step": 376060 }, { "epoch": 4.018056520113254, "grad_norm": 0.09284189343452454, "learning_rate": 7.925467391283441e-07, "loss": 0.0116, "step": 376070 }, { "epoch": 4.018163363427534, "grad_norm": 0.0004777164722327143, "learning_rate": 7.925331140400442e-07, "loss": 0.0054, "step": 376080 }, { "epoch": 4.018270206741813, "grad_norm": 0.002597535029053688, "learning_rate": 7.925194886214523e-07, "loss": 0.005, "step": 376090 }, { "epoch": 4.018377050056093, "grad_norm": 4.29778528213501, "learning_rate": 7.925058628725838e-07, "loss": 0.0043, "step": 376100 }, { "epoch": 4.018483893370372, "grad_norm": 1.3526583909988403, "learning_rate": 7.924922367934541e-07, "loss": 0.0055, "step": 376110 }, { "epoch": 4.018590736684652, "grad_norm": 1.0038151741027832, "learning_rate": 7.924786103840786e-07, "loss": 0.0016, "step": 376120 }, { "epoch": 4.018697579998932, "grad_norm": 5.80590295791626, "learning_rate": 7.924649836444728e-07, "loss": 0.0095, "step": 376130 }, { "epoch": 4.018804423313211, "grad_norm": 0.005764308851212263, "learning_rate": 7.924513565746518e-07, "loss": 0.0116, "step": 376140 }, { "epoch": 4.018911266627491, "grad_norm": 0.06170855462551117, "learning_rate": 7.924377291746313e-07, "loss": 0.0523, "step": 376150 }, { "epoch": 4.01901810994177, "grad_norm": 0.8810864686965942, "learning_rate": 7.924241014444264e-07, "loss": 0.0102, "step": 376160 }, { "epoch": 4.01912495325605, "grad_norm": 5.42749547958374, "learning_rate": 7.924104733840527e-07, "loss": 0.0221, "step": 376170 }, { "epoch": 4.019231796570329, "grad_norm": 0.04761628061532974, "learning_rate": 7.923968449935254e-07, "loss": 0.0007, "step": 376180 }, { "epoch": 4.01933863988461, "grad_norm": 0.0041945031844079494, "learning_rate": 7.9238321627286e-07, "loss": 0.0033, "step": 376190 }, { "epoch": 4.019445483198889, "grad_norm": 0.014256162568926811, "learning_rate": 7.92369587222072e-07, "loss": 0.0034, "step": 376200 }, { "epoch": 4.0195523265131685, "grad_norm": 0.0004990038578398526, "learning_rate": 7.923559578411767e-07, "loss": 0.001, "step": 376210 }, { "epoch": 4.019659169827448, "grad_norm": 0.06961195915937424, "learning_rate": 7.923423281301893e-07, "loss": 0.014, "step": 376220 }, { "epoch": 4.019766013141727, "grad_norm": 0.010033264756202698, "learning_rate": 7.923286980891254e-07, "loss": 0.007, "step": 376230 }, { "epoch": 4.019872856456007, "grad_norm": 3.4642333984375, "learning_rate": 7.923150677180004e-07, "loss": 0.0044, "step": 376240 }, { "epoch": 4.019979699770287, "grad_norm": 0.9488529562950134, "learning_rate": 7.923014370168295e-07, "loss": 0.0188, "step": 376250 }, { "epoch": 4.020086543084567, "grad_norm": 0.01111795287579298, "learning_rate": 7.922878059856284e-07, "loss": 0.0018, "step": 376260 }, { "epoch": 4.020193386398846, "grad_norm": 2.7709953784942627, "learning_rate": 7.922741746244122e-07, "loss": 0.0079, "step": 376270 }, { "epoch": 4.020300229713126, "grad_norm": 1.054890513420105, "learning_rate": 7.922605429331964e-07, "loss": 0.0052, "step": 376280 }, { "epoch": 4.020407073027405, "grad_norm": 12.171616554260254, "learning_rate": 7.922469109119963e-07, "loss": 0.0206, "step": 376290 }, { "epoch": 4.0205139163416845, "grad_norm": 0.12845100462436676, "learning_rate": 7.922332785608274e-07, "loss": 0.0037, "step": 376300 }, { "epoch": 4.020620759655965, "grad_norm": 9.2022066116333, "learning_rate": 7.922196458797053e-07, "loss": 0.0248, "step": 376310 }, { "epoch": 4.020727602970244, "grad_norm": 0.23915959894657135, "learning_rate": 7.92206012868645e-07, "loss": 0.0149, "step": 376320 }, { "epoch": 4.020834446284524, "grad_norm": 0.5529187321662903, "learning_rate": 7.92192379527662e-07, "loss": 0.0161, "step": 376330 }, { "epoch": 4.020941289598803, "grad_norm": 0.03028581477701664, "learning_rate": 7.921787458567718e-07, "loss": 0.0049, "step": 376340 }, { "epoch": 4.021048132913083, "grad_norm": 0.9364995956420898, "learning_rate": 7.921651118559898e-07, "loss": 0.0037, "step": 376350 }, { "epoch": 4.021154976227362, "grad_norm": 0.05039265751838684, "learning_rate": 7.921514775253313e-07, "loss": 0.0188, "step": 376360 }, { "epoch": 4.0212618195416425, "grad_norm": 0.08030615001916885, "learning_rate": 7.921378428648118e-07, "loss": 0.0078, "step": 376370 }, { "epoch": 4.021368662855922, "grad_norm": 0.6348250508308411, "learning_rate": 7.921242078744466e-07, "loss": 0.0093, "step": 376380 }, { "epoch": 4.021475506170201, "grad_norm": 0.05893181264400482, "learning_rate": 7.92110572554251e-07, "loss": 0.0019, "step": 376390 }, { "epoch": 4.021582349484481, "grad_norm": 0.001486690598540008, "learning_rate": 7.920969369042405e-07, "loss": 0.0096, "step": 376400 }, { "epoch": 4.02168919279876, "grad_norm": 0.0005803264211863279, "learning_rate": 7.920833009244308e-07, "loss": 0.0171, "step": 376410 }, { "epoch": 4.02179603611304, "grad_norm": 0.007856646552681923, "learning_rate": 7.920696646148367e-07, "loss": 0.0121, "step": 376420 }, { "epoch": 4.02190287942732, "grad_norm": 0.15104353427886963, "learning_rate": 7.92056027975474e-07, "loss": 0.0029, "step": 376430 }, { "epoch": 4.0220097227416, "grad_norm": 0.007681945338845253, "learning_rate": 7.920423910063581e-07, "loss": 0.0291, "step": 376440 }, { "epoch": 4.022116566055879, "grad_norm": 0.8181422352790833, "learning_rate": 7.920287537075041e-07, "loss": 0.014, "step": 376450 }, { "epoch": 4.0222234093701585, "grad_norm": 0.06432554125785828, "learning_rate": 7.920151160789278e-07, "loss": 0.0028, "step": 376460 }, { "epoch": 4.022330252684438, "grad_norm": 0.007823158986866474, "learning_rate": 7.920014781206443e-07, "loss": 0.005, "step": 376470 }, { "epoch": 4.0224370959987175, "grad_norm": 0.002499078866094351, "learning_rate": 7.919878398326691e-07, "loss": 0.0035, "step": 376480 }, { "epoch": 4.022543939312998, "grad_norm": 0.051823318004608154, "learning_rate": 7.919742012150176e-07, "loss": 0.0002, "step": 376490 }, { "epoch": 4.022650782627277, "grad_norm": 0.3613858222961426, "learning_rate": 7.919605622677053e-07, "loss": 0.0008, "step": 376500 }, { "epoch": 4.022757625941557, "grad_norm": 1.201934576034546, "learning_rate": 7.919469229907473e-07, "loss": 0.016, "step": 376510 }, { "epoch": 4.022864469255836, "grad_norm": 0.0051697236485779285, "learning_rate": 7.919332833841592e-07, "loss": 0.0064, "step": 376520 }, { "epoch": 4.022971312570116, "grad_norm": 0.07477958500385284, "learning_rate": 7.919196434479565e-07, "loss": 0.0047, "step": 376530 }, { "epoch": 4.023078155884395, "grad_norm": 2.4012880325317383, "learning_rate": 7.919060031821545e-07, "loss": 0.0014, "step": 376540 }, { "epoch": 4.0231849991986754, "grad_norm": 0.8149198889732361, "learning_rate": 7.918923625867687e-07, "loss": 0.0024, "step": 376550 }, { "epoch": 4.023291842512955, "grad_norm": 1.022607684135437, "learning_rate": 7.918787216618141e-07, "loss": 0.0012, "step": 376560 }, { "epoch": 4.023398685827234, "grad_norm": 0.08663234114646912, "learning_rate": 7.918650804073064e-07, "loss": 0.01, "step": 376570 }, { "epoch": 4.023505529141514, "grad_norm": 12.594185829162598, "learning_rate": 7.918514388232612e-07, "loss": 0.0468, "step": 376580 }, { "epoch": 4.023612372455793, "grad_norm": 2.8150718212127686, "learning_rate": 7.918377969096937e-07, "loss": 0.0573, "step": 376590 }, { "epoch": 4.023719215770074, "grad_norm": 0.035428375005722046, "learning_rate": 7.918241546666191e-07, "loss": 0.0116, "step": 376600 }, { "epoch": 4.023826059084353, "grad_norm": 2.027164936065674, "learning_rate": 7.918105120940533e-07, "loss": 0.0068, "step": 376610 }, { "epoch": 4.0239329023986325, "grad_norm": 0.06753139197826385, "learning_rate": 7.917968691920111e-07, "loss": 0.0105, "step": 376620 }, { "epoch": 4.024039745712912, "grad_norm": 0.061589282006025314, "learning_rate": 7.917832259605084e-07, "loss": 0.0114, "step": 376630 }, { "epoch": 4.0241465890271915, "grad_norm": 0.01109487283974886, "learning_rate": 7.917695823995603e-07, "loss": 0.0007, "step": 376640 }, { "epoch": 4.024253432341471, "grad_norm": 1.5486738681793213, "learning_rate": 7.917559385091823e-07, "loss": 0.0089, "step": 376650 }, { "epoch": 4.024360275655751, "grad_norm": 1.2406537532806396, "learning_rate": 7.917422942893901e-07, "loss": 0.0047, "step": 376660 }, { "epoch": 4.024467118970031, "grad_norm": 3.4683594703674316, "learning_rate": 7.917286497401986e-07, "loss": 0.011, "step": 376670 }, { "epoch": 4.02457396228431, "grad_norm": 8.504651069641113, "learning_rate": 7.917150048616235e-07, "loss": 0.0238, "step": 376680 }, { "epoch": 4.02468080559859, "grad_norm": 0.0014673728728666902, "learning_rate": 7.917013596536801e-07, "loss": 0.0044, "step": 376690 }, { "epoch": 4.024787648912869, "grad_norm": 0.10499060153961182, "learning_rate": 7.916877141163839e-07, "loss": 0.003, "step": 376700 }, { "epoch": 4.024894492227149, "grad_norm": 0.2944958508014679, "learning_rate": 7.916740682497502e-07, "loss": 0.0467, "step": 376710 }, { "epoch": 4.025001335541429, "grad_norm": 0.067873015999794, "learning_rate": 7.916604220537946e-07, "loss": 0.0048, "step": 376720 }, { "epoch": 4.025108178855708, "grad_norm": 0.21534624695777893, "learning_rate": 7.916467755285321e-07, "loss": 0.0022, "step": 376730 }, { "epoch": 4.025215022169988, "grad_norm": 0.10218844562768936, "learning_rate": 7.916331286739786e-07, "loss": 0.0035, "step": 376740 }, { "epoch": 4.025321865484267, "grad_norm": 0.00160313886590302, "learning_rate": 7.916194814901491e-07, "loss": 0.0022, "step": 376750 }, { "epoch": 4.025428708798547, "grad_norm": 0.001999623142182827, "learning_rate": 7.916058339770594e-07, "loss": 0.0001, "step": 376760 }, { "epoch": 4.025535552112826, "grad_norm": 0.026164278388023376, "learning_rate": 7.915921861347246e-07, "loss": 0.0004, "step": 376770 }, { "epoch": 4.025642395427107, "grad_norm": 0.002968390239402652, "learning_rate": 7.915785379631604e-07, "loss": 0.0004, "step": 376780 }, { "epoch": 4.025749238741386, "grad_norm": 0.05633998662233353, "learning_rate": 7.915648894623819e-07, "loss": 0.0053, "step": 376790 }, { "epoch": 4.0258560820556655, "grad_norm": 0.008885545656085014, "learning_rate": 7.915512406324046e-07, "loss": 0.0064, "step": 376800 }, { "epoch": 4.025962925369945, "grad_norm": 0.06149378418922424, "learning_rate": 7.915375914732438e-07, "loss": 0.0061, "step": 376810 }, { "epoch": 4.026069768684224, "grad_norm": 3.3241732120513916, "learning_rate": 7.915239419849151e-07, "loss": 0.0096, "step": 376820 }, { "epoch": 4.026176611998504, "grad_norm": 0.0019008188974112272, "learning_rate": 7.915102921674342e-07, "loss": 0.0002, "step": 376830 }, { "epoch": 4.026283455312784, "grad_norm": 0.9462946653366089, "learning_rate": 7.914966420208158e-07, "loss": 0.0084, "step": 376840 }, { "epoch": 4.026390298627064, "grad_norm": 0.0799591913819313, "learning_rate": 7.914829915450759e-07, "loss": 0.0029, "step": 376850 }, { "epoch": 4.026497141941343, "grad_norm": 0.35991647839546204, "learning_rate": 7.914693407402296e-07, "loss": 0.0068, "step": 376860 }, { "epoch": 4.026603985255623, "grad_norm": 0.01557938102632761, "learning_rate": 7.914556896062925e-07, "loss": 0.0032, "step": 376870 }, { "epoch": 4.026710828569902, "grad_norm": 0.5588042140007019, "learning_rate": 7.914420381432798e-07, "loss": 0.0024, "step": 376880 }, { "epoch": 4.0268176718841815, "grad_norm": 0.0007150706951506436, "learning_rate": 7.914283863512072e-07, "loss": 0.0024, "step": 376890 }, { "epoch": 4.026924515198462, "grad_norm": 0.10497275739908218, "learning_rate": 7.914147342300899e-07, "loss": 0.014, "step": 376900 }, { "epoch": 4.027031358512741, "grad_norm": 0.37445271015167236, "learning_rate": 7.914010817799433e-07, "loss": 0.0057, "step": 376910 }, { "epoch": 4.027138201827021, "grad_norm": 11.550128936767578, "learning_rate": 7.91387429000783e-07, "loss": 0.0318, "step": 376920 }, { "epoch": 4.0272450451413, "grad_norm": 7.378941059112549, "learning_rate": 7.913737758926242e-07, "loss": 0.007, "step": 376930 }, { "epoch": 4.02735188845558, "grad_norm": 0.011833020485937595, "learning_rate": 7.913601224554825e-07, "loss": 0.0194, "step": 376940 }, { "epoch": 4.027458731769859, "grad_norm": 2.950618267059326, "learning_rate": 7.913464686893732e-07, "loss": 0.0113, "step": 376950 }, { "epoch": 4.0275655750841395, "grad_norm": 0.000780838483478874, "learning_rate": 7.913328145943117e-07, "loss": 0.0024, "step": 376960 }, { "epoch": 4.027672418398419, "grad_norm": 5.180200099945068, "learning_rate": 7.913191601703135e-07, "loss": 0.002, "step": 376970 }, { "epoch": 4.027779261712698, "grad_norm": 0.001178739476017654, "learning_rate": 7.91305505417394e-07, "loss": 0.0141, "step": 376980 }, { "epoch": 4.027886105026978, "grad_norm": 0.006011591758579016, "learning_rate": 7.912918503355686e-07, "loss": 0.0103, "step": 376990 }, { "epoch": 4.027992948341257, "grad_norm": 0.005088686011731625, "learning_rate": 7.912781949248527e-07, "loss": 0.0039, "step": 377000 }, { "epoch": 4.028099791655537, "grad_norm": 1.7054585218429565, "learning_rate": 7.912645391852617e-07, "loss": 0.0048, "step": 377010 }, { "epoch": 4.028206634969817, "grad_norm": 3.3763983249664307, "learning_rate": 7.912508831168111e-07, "loss": 0.0098, "step": 377020 }, { "epoch": 4.028313478284097, "grad_norm": 0.3949797451496124, "learning_rate": 7.912372267195162e-07, "loss": 0.0072, "step": 377030 }, { "epoch": 4.028420321598376, "grad_norm": 0.6386501789093018, "learning_rate": 7.912235699933927e-07, "loss": 0.0097, "step": 377040 }, { "epoch": 4.0285271649126555, "grad_norm": 1.9721784591674805, "learning_rate": 7.912099129384555e-07, "loss": 0.0005, "step": 377050 }, { "epoch": 4.028634008226935, "grad_norm": 1.1888227462768555, "learning_rate": 7.911962555547205e-07, "loss": 0.0054, "step": 377060 }, { "epoch": 4.028740851541214, "grad_norm": 0.6187689900398254, "learning_rate": 7.911825978422031e-07, "loss": 0.0016, "step": 377070 }, { "epoch": 4.028847694855495, "grad_norm": 0.0004289928765501827, "learning_rate": 7.911689398009183e-07, "loss": 0.0112, "step": 377080 }, { "epoch": 4.028954538169774, "grad_norm": 3.4326047897338867, "learning_rate": 7.91155281430882e-07, "loss": 0.0073, "step": 377090 }, { "epoch": 4.029061381484054, "grad_norm": 0.047330621629953384, "learning_rate": 7.911416227321092e-07, "loss": 0.0013, "step": 377100 }, { "epoch": 4.029168224798333, "grad_norm": 2.379207134246826, "learning_rate": 7.911279637046156e-07, "loss": 0.0079, "step": 377110 }, { "epoch": 4.029275068112613, "grad_norm": 0.005450572818517685, "learning_rate": 7.911143043484168e-07, "loss": 0.0057, "step": 377120 }, { "epoch": 4.029381911426892, "grad_norm": 2.8243567943573, "learning_rate": 7.911006446635277e-07, "loss": 0.0125, "step": 377130 }, { "epoch": 4.029488754741172, "grad_norm": 0.0037902758922427893, "learning_rate": 7.91086984649964e-07, "loss": 0.0046, "step": 377140 }, { "epoch": 4.029595598055452, "grad_norm": 1.63798189163208, "learning_rate": 7.910733243077412e-07, "loss": 0.0025, "step": 377150 }, { "epoch": 4.029702441369731, "grad_norm": 0.044989362359046936, "learning_rate": 7.910596636368747e-07, "loss": 0.0017, "step": 377160 }, { "epoch": 4.029809284684011, "grad_norm": 0.20118296146392822, "learning_rate": 7.910460026373798e-07, "loss": 0.0068, "step": 377170 }, { "epoch": 4.02991612799829, "grad_norm": 20.184598922729492, "learning_rate": 7.910323413092721e-07, "loss": 0.0335, "step": 377180 }, { "epoch": 4.03002297131257, "grad_norm": 0.01213332824409008, "learning_rate": 7.910186796525668e-07, "loss": 0.0052, "step": 377190 }, { "epoch": 4.03012981462685, "grad_norm": 0.004368237219750881, "learning_rate": 7.910050176672795e-07, "loss": 0.0003, "step": 377200 }, { "epoch": 4.0302366579411295, "grad_norm": 6.301258087158203, "learning_rate": 7.909913553534256e-07, "loss": 0.0014, "step": 377210 }, { "epoch": 4.030343501255409, "grad_norm": 0.39300206303596497, "learning_rate": 7.909776927110204e-07, "loss": 0.0098, "step": 377220 }, { "epoch": 4.030450344569688, "grad_norm": 0.20212453603744507, "learning_rate": 7.909640297400795e-07, "loss": 0.0155, "step": 377230 }, { "epoch": 4.030557187883968, "grad_norm": 0.07816339284181595, "learning_rate": 7.909503664406183e-07, "loss": 0.0021, "step": 377240 }, { "epoch": 4.030664031198247, "grad_norm": 6.5672736167907715, "learning_rate": 7.909367028126522e-07, "loss": 0.0283, "step": 377250 }, { "epoch": 4.030770874512528, "grad_norm": 0.13954587280750275, "learning_rate": 7.909230388561965e-07, "loss": 0.0112, "step": 377260 }, { "epoch": 4.030877717826807, "grad_norm": 0.848868191242218, "learning_rate": 7.909093745712667e-07, "loss": 0.0092, "step": 377270 }, { "epoch": 4.030984561141087, "grad_norm": 3.7484006881713867, "learning_rate": 7.908957099578782e-07, "loss": 0.0031, "step": 377280 }, { "epoch": 4.031091404455366, "grad_norm": 2.985405445098877, "learning_rate": 7.908820450160467e-07, "loss": 0.0222, "step": 377290 }, { "epoch": 4.0311982477696455, "grad_norm": 0.0633084774017334, "learning_rate": 7.908683797457874e-07, "loss": 0.0093, "step": 377300 }, { "epoch": 4.031305091083925, "grad_norm": 0.04168546944856644, "learning_rate": 7.908547141471156e-07, "loss": 0.0041, "step": 377310 }, { "epoch": 4.031411934398205, "grad_norm": 0.38539716601371765, "learning_rate": 7.908410482200469e-07, "loss": 0.0053, "step": 377320 }, { "epoch": 4.031518777712485, "grad_norm": 0.4792296886444092, "learning_rate": 7.908273819645968e-07, "loss": 0.01, "step": 377330 }, { "epoch": 4.031625621026764, "grad_norm": 1.8442281484603882, "learning_rate": 7.908137153807806e-07, "loss": 0.0253, "step": 377340 }, { "epoch": 4.031732464341044, "grad_norm": 0.015996474772691727, "learning_rate": 7.908000484686136e-07, "loss": 0.0325, "step": 377350 }, { "epoch": 4.031839307655323, "grad_norm": 0.00685877026990056, "learning_rate": 7.907863812281117e-07, "loss": 0.0188, "step": 377360 }, { "epoch": 4.0319461509696035, "grad_norm": 4.303040981292725, "learning_rate": 7.907727136592899e-07, "loss": 0.019, "step": 377370 }, { "epoch": 4.032052994283883, "grad_norm": 0.006676694843918085, "learning_rate": 7.907590457621636e-07, "loss": 0.0039, "step": 377380 }, { "epoch": 4.032159837598162, "grad_norm": 0.0043745883740484715, "learning_rate": 7.907453775367485e-07, "loss": 0.0027, "step": 377390 }, { "epoch": 4.032266680912442, "grad_norm": 0.006381104700267315, "learning_rate": 7.907317089830601e-07, "loss": 0.0069, "step": 377400 }, { "epoch": 4.032373524226721, "grad_norm": 6.398540496826172, "learning_rate": 7.907180401011135e-07, "loss": 0.0069, "step": 377410 }, { "epoch": 4.032480367541001, "grad_norm": 0.7609046697616577, "learning_rate": 7.907043708909243e-07, "loss": 0.0015, "step": 377420 }, { "epoch": 4.032587210855281, "grad_norm": 0.0189198050647974, "learning_rate": 7.90690701352508e-07, "loss": 0.0181, "step": 377430 }, { "epoch": 4.032694054169561, "grad_norm": 0.03514673188328743, "learning_rate": 7.906770314858798e-07, "loss": 0.0091, "step": 377440 }, { "epoch": 4.03280089748384, "grad_norm": 0.0468255989253521, "learning_rate": 7.906633612910554e-07, "loss": 0.0112, "step": 377450 }, { "epoch": 4.0329077407981195, "grad_norm": 0.007600981742143631, "learning_rate": 7.906496907680501e-07, "loss": 0.0149, "step": 377460 }, { "epoch": 4.033014584112399, "grad_norm": 0.020319443196058273, "learning_rate": 7.906360199168793e-07, "loss": 0.0028, "step": 377470 }, { "epoch": 4.0331214274266785, "grad_norm": 0.14488518238067627, "learning_rate": 7.906223487375587e-07, "loss": 0.0039, "step": 377480 }, { "epoch": 4.033228270740959, "grad_norm": 3.343944787979126, "learning_rate": 7.906086772301034e-07, "loss": 0.0094, "step": 377490 }, { "epoch": 4.033335114055238, "grad_norm": 0.0028408714570105076, "learning_rate": 7.90595005394529e-07, "loss": 0.0217, "step": 377500 }, { "epoch": 4.033441957369518, "grad_norm": 3.0260283946990967, "learning_rate": 7.905813332308508e-07, "loss": 0.0044, "step": 377510 }, { "epoch": 4.033548800683797, "grad_norm": 0.041658125817775726, "learning_rate": 7.905676607390844e-07, "loss": 0.0054, "step": 377520 }, { "epoch": 4.033655643998077, "grad_norm": 2.1776392459869385, "learning_rate": 7.905539879192453e-07, "loss": 0.0134, "step": 377530 }, { "epoch": 4.033762487312356, "grad_norm": 0.08424464613199234, "learning_rate": 7.905403147713487e-07, "loss": 0.0023, "step": 377540 }, { "epoch": 4.033869330626636, "grad_norm": 0.011218075640499592, "learning_rate": 7.905266412954101e-07, "loss": 0.0014, "step": 377550 }, { "epoch": 4.033976173940916, "grad_norm": 0.00731165474280715, "learning_rate": 7.905129674914451e-07, "loss": 0.0008, "step": 377560 }, { "epoch": 4.034083017255195, "grad_norm": 3.3326053619384766, "learning_rate": 7.90499293359469e-07, "loss": 0.0149, "step": 377570 }, { "epoch": 4.034189860569475, "grad_norm": 0.06271887570619583, "learning_rate": 7.904856188994972e-07, "loss": 0.0084, "step": 377580 }, { "epoch": 4.034296703883754, "grad_norm": 0.06140313297510147, "learning_rate": 7.904719441115453e-07, "loss": 0.0053, "step": 377590 }, { "epoch": 4.034403547198034, "grad_norm": 1.9239275455474854, "learning_rate": 7.904582689956287e-07, "loss": 0.011, "step": 377600 }, { "epoch": 4.034510390512314, "grad_norm": 12.60774040222168, "learning_rate": 7.904445935517627e-07, "loss": 0.015, "step": 377610 }, { "epoch": 4.0346172338265935, "grad_norm": 0.006054711062461138, "learning_rate": 7.904309177799629e-07, "loss": 0.0327, "step": 377620 }, { "epoch": 4.034724077140873, "grad_norm": 0.528732180595398, "learning_rate": 7.904172416802447e-07, "loss": 0.0061, "step": 377630 }, { "epoch": 4.0348309204551525, "grad_norm": 0.01390905398875475, "learning_rate": 7.904035652526236e-07, "loss": 0.0011, "step": 377640 }, { "epoch": 4.034937763769432, "grad_norm": 0.05170620605349541, "learning_rate": 7.903898884971148e-07, "loss": 0.0012, "step": 377650 }, { "epoch": 4.035044607083711, "grad_norm": 3.625502824783325, "learning_rate": 7.903762114137338e-07, "loss": 0.0144, "step": 377660 }, { "epoch": 4.035151450397992, "grad_norm": 0.029418012127280235, "learning_rate": 7.903625340024962e-07, "loss": 0.0031, "step": 377670 }, { "epoch": 4.035258293712271, "grad_norm": 0.009680379182100296, "learning_rate": 7.903488562634176e-07, "loss": 0.009, "step": 377680 }, { "epoch": 4.035365137026551, "grad_norm": 0.004427035339176655, "learning_rate": 7.903351781965129e-07, "loss": 0.0104, "step": 377690 }, { "epoch": 4.03547198034083, "grad_norm": 0.05278262868523598, "learning_rate": 7.903214998017981e-07, "loss": 0.0009, "step": 377700 }, { "epoch": 4.03557882365511, "grad_norm": 0.012581069953739643, "learning_rate": 7.903078210792884e-07, "loss": 0.0012, "step": 377710 }, { "epoch": 4.035685666969389, "grad_norm": 0.03559241071343422, "learning_rate": 7.902941420289992e-07, "loss": 0.0017, "step": 377720 }, { "epoch": 4.035792510283669, "grad_norm": 4.047593116760254, "learning_rate": 7.90280462650946e-07, "loss": 0.006, "step": 377730 }, { "epoch": 4.035899353597949, "grad_norm": 0.3528091013431549, "learning_rate": 7.902667829451442e-07, "loss": 0.0433, "step": 377740 }, { "epoch": 4.036006196912228, "grad_norm": 0.0752931609749794, "learning_rate": 7.902531029116095e-07, "loss": 0.0117, "step": 377750 }, { "epoch": 4.036113040226508, "grad_norm": 0.010508408769965172, "learning_rate": 7.90239422550357e-07, "loss": 0.007, "step": 377760 }, { "epoch": 4.036219883540787, "grad_norm": 0.3890647888183594, "learning_rate": 7.902257418614022e-07, "loss": 0.0098, "step": 377770 }, { "epoch": 4.036326726855067, "grad_norm": 0.0177934393286705, "learning_rate": 7.902120608447609e-07, "loss": 0.0029, "step": 377780 }, { "epoch": 4.036433570169347, "grad_norm": 4.588956356048584, "learning_rate": 7.901983795004478e-07, "loss": 0.0108, "step": 377790 }, { "epoch": 4.0365404134836265, "grad_norm": 0.605006992816925, "learning_rate": 7.901846978284793e-07, "loss": 0.001, "step": 377800 }, { "epoch": 4.036647256797906, "grad_norm": 0.01997276209294796, "learning_rate": 7.901710158288702e-07, "loss": 0.0196, "step": 377810 }, { "epoch": 4.036754100112185, "grad_norm": 0.017043203115463257, "learning_rate": 7.901573335016362e-07, "loss": 0.0285, "step": 377820 }, { "epoch": 4.036860943426465, "grad_norm": 6.047502040863037, "learning_rate": 7.901436508467926e-07, "loss": 0.0194, "step": 377830 }, { "epoch": 4.036967786740744, "grad_norm": 0.05069081857800484, "learning_rate": 7.901299678643549e-07, "loss": 0.0082, "step": 377840 }, { "epoch": 4.037074630055025, "grad_norm": 0.5057128071784973, "learning_rate": 7.901162845543385e-07, "loss": 0.0082, "step": 377850 }, { "epoch": 4.037181473369304, "grad_norm": 0.010992907918989658, "learning_rate": 7.901026009167591e-07, "loss": 0.0048, "step": 377860 }, { "epoch": 4.037288316683584, "grad_norm": 0.6040731072425842, "learning_rate": 7.900889169516319e-07, "loss": 0.0112, "step": 377870 }, { "epoch": 4.037395159997863, "grad_norm": 0.001980092842131853, "learning_rate": 7.900752326589724e-07, "loss": 0.0037, "step": 377880 }, { "epoch": 4.0375020033121425, "grad_norm": 2.144115686416626, "learning_rate": 7.90061548038796e-07, "loss": 0.0285, "step": 377890 }, { "epoch": 4.037608846626422, "grad_norm": 0.032661572098731995, "learning_rate": 7.900478630911183e-07, "loss": 0.0021, "step": 377900 }, { "epoch": 4.037715689940702, "grad_norm": 0.00459153251722455, "learning_rate": 7.900341778159547e-07, "loss": 0.0003, "step": 377910 }, { "epoch": 4.037822533254982, "grad_norm": 0.06114991754293442, "learning_rate": 7.900204922133206e-07, "loss": 0.0049, "step": 377920 }, { "epoch": 4.037929376569261, "grad_norm": 0.1640447974205017, "learning_rate": 7.900068062832315e-07, "loss": 0.0021, "step": 377930 }, { "epoch": 4.038036219883541, "grad_norm": 0.008396734483540058, "learning_rate": 7.899931200257027e-07, "loss": 0.0098, "step": 377940 }, { "epoch": 4.03814306319782, "grad_norm": 0.019854815676808357, "learning_rate": 7.899794334407499e-07, "loss": 0.004, "step": 377950 }, { "epoch": 4.0382499065121, "grad_norm": 0.09517404437065125, "learning_rate": 7.899657465283885e-07, "loss": 0.0046, "step": 377960 }, { "epoch": 4.03835674982638, "grad_norm": 0.0033124040346592665, "learning_rate": 7.899520592886337e-07, "loss": 0.0304, "step": 377970 }, { "epoch": 4.038463593140659, "grad_norm": 1.254454493522644, "learning_rate": 7.899383717215012e-07, "loss": 0.0077, "step": 377980 }, { "epoch": 4.038570436454939, "grad_norm": 0.46314868330955505, "learning_rate": 7.899246838270064e-07, "loss": 0.005, "step": 377990 }, { "epoch": 4.038677279769218, "grad_norm": 0.003859918564558029, "learning_rate": 7.899109956051648e-07, "loss": 0.0013, "step": 378000 }, { "epoch": 4.038784123083498, "grad_norm": 0.1258651167154312, "learning_rate": 7.898973070559918e-07, "loss": 0.0032, "step": 378010 }, { "epoch": 4.038890966397777, "grad_norm": 0.007201368920505047, "learning_rate": 7.898836181795028e-07, "loss": 0.0022, "step": 378020 }, { "epoch": 4.038997809712058, "grad_norm": 0.028773250058293343, "learning_rate": 7.898699289757134e-07, "loss": 0.0016, "step": 378030 }, { "epoch": 4.039104653026337, "grad_norm": 0.5838871002197266, "learning_rate": 7.898562394446389e-07, "loss": 0.0067, "step": 378040 }, { "epoch": 4.0392114963406165, "grad_norm": 0.09618598222732544, "learning_rate": 7.898425495862948e-07, "loss": 0.0029, "step": 378050 }, { "epoch": 4.039318339654896, "grad_norm": 0.10701681673526764, "learning_rate": 7.898288594006968e-07, "loss": 0.0029, "step": 378060 }, { "epoch": 4.039425182969175, "grad_norm": 3.8397531509399414, "learning_rate": 7.898151688878599e-07, "loss": 0.0208, "step": 378070 }, { "epoch": 4.039532026283455, "grad_norm": 0.013422368094325066, "learning_rate": 7.898014780477999e-07, "loss": 0.0061, "step": 378080 }, { "epoch": 4.039638869597735, "grad_norm": 0.46874234080314636, "learning_rate": 7.897877868805321e-07, "loss": 0.0149, "step": 378090 }, { "epoch": 4.039745712912015, "grad_norm": 0.003822165774181485, "learning_rate": 7.897740953860721e-07, "loss": 0.0212, "step": 378100 }, { "epoch": 4.039852556226294, "grad_norm": 0.06713055819272995, "learning_rate": 7.897604035644352e-07, "loss": 0.0034, "step": 378110 }, { "epoch": 4.039959399540574, "grad_norm": 0.005095473024994135, "learning_rate": 7.897467114156371e-07, "loss": 0.004, "step": 378120 }, { "epoch": 4.040066242854853, "grad_norm": 0.24967724084854126, "learning_rate": 7.897330189396929e-07, "loss": 0.0063, "step": 378130 }, { "epoch": 4.040173086169133, "grad_norm": 0.11603477597236633, "learning_rate": 7.897193261366183e-07, "loss": 0.0005, "step": 378140 }, { "epoch": 4.040279929483413, "grad_norm": 0.011131703853607178, "learning_rate": 7.897056330064287e-07, "loss": 0.0032, "step": 378150 }, { "epoch": 4.040386772797692, "grad_norm": 3.1246535778045654, "learning_rate": 7.896919395491397e-07, "loss": 0.0021, "step": 378160 }, { "epoch": 4.040493616111972, "grad_norm": 0.018595293164253235, "learning_rate": 7.896782457647665e-07, "loss": 0.0081, "step": 378170 }, { "epoch": 4.040600459426251, "grad_norm": 4.245301723480225, "learning_rate": 7.896645516533248e-07, "loss": 0.0138, "step": 378180 }, { "epoch": 4.040707302740531, "grad_norm": 0.003189784474670887, "learning_rate": 7.8965085721483e-07, "loss": 0.0061, "step": 378190 }, { "epoch": 4.040814146054811, "grad_norm": 0.020032085478305817, "learning_rate": 7.896371624492972e-07, "loss": 0.0003, "step": 378200 }, { "epoch": 4.0409209893690905, "grad_norm": 0.0070371306501328945, "learning_rate": 7.896234673567426e-07, "loss": 0.009, "step": 378210 }, { "epoch": 4.04102783268337, "grad_norm": 0.03476614132523537, "learning_rate": 7.896097719371811e-07, "loss": 0.015, "step": 378220 }, { "epoch": 4.041134675997649, "grad_norm": 0.032310087233781815, "learning_rate": 7.895960761906281e-07, "loss": 0.0098, "step": 378230 }, { "epoch": 4.041241519311929, "grad_norm": 0.17477034032344818, "learning_rate": 7.895823801170995e-07, "loss": 0.0065, "step": 378240 }, { "epoch": 4.041348362626208, "grad_norm": 1.43550705909729, "learning_rate": 7.895686837166104e-07, "loss": 0.0161, "step": 378250 }, { "epoch": 4.041455205940489, "grad_norm": 0.6067577600479126, "learning_rate": 7.895549869891765e-07, "loss": 0.0083, "step": 378260 }, { "epoch": 4.041562049254768, "grad_norm": 0.0010559294605627656, "learning_rate": 7.895412899348133e-07, "loss": 0.0019, "step": 378270 }, { "epoch": 4.041668892569048, "grad_norm": 0.00284542771987617, "learning_rate": 7.895275925535359e-07, "loss": 0.0032, "step": 378280 }, { "epoch": 4.041775735883327, "grad_norm": 0.055381420999765396, "learning_rate": 7.895138948453601e-07, "loss": 0.0006, "step": 378290 }, { "epoch": 4.0418825791976065, "grad_norm": 0.31193453073501587, "learning_rate": 7.895001968103012e-07, "loss": 0.0082, "step": 378300 }, { "epoch": 4.041989422511886, "grad_norm": 0.018825076520442963, "learning_rate": 7.894864984483747e-07, "loss": 0.0006, "step": 378310 }, { "epoch": 4.042096265826166, "grad_norm": 0.004683594685047865, "learning_rate": 7.894727997595962e-07, "loss": 0.002, "step": 378320 }, { "epoch": 4.042203109140446, "grad_norm": 3.613696813583374, "learning_rate": 7.89459100743981e-07, "loss": 0.0105, "step": 378330 }, { "epoch": 4.042309952454725, "grad_norm": 1.5927428007125854, "learning_rate": 7.894454014015447e-07, "loss": 0.006, "step": 378340 }, { "epoch": 4.042416795769005, "grad_norm": 0.0019506950629875064, "learning_rate": 7.894317017323027e-07, "loss": 0.0019, "step": 378350 }, { "epoch": 4.042523639083284, "grad_norm": 5.6847243309021, "learning_rate": 7.894180017362704e-07, "loss": 0.0127, "step": 378360 }, { "epoch": 4.042630482397564, "grad_norm": 0.1004452034831047, "learning_rate": 7.894043014134633e-07, "loss": 0.0072, "step": 378370 }, { "epoch": 4.042737325711844, "grad_norm": 0.006572991143912077, "learning_rate": 7.89390600763897e-07, "loss": 0.008, "step": 378380 }, { "epoch": 4.042844169026123, "grad_norm": 0.005301315803080797, "learning_rate": 7.893768997875867e-07, "loss": 0.0003, "step": 378390 }, { "epoch": 4.042951012340403, "grad_norm": 1.982702374458313, "learning_rate": 7.893631984845482e-07, "loss": 0.0355, "step": 378400 }, { "epoch": 4.043057855654682, "grad_norm": 0.0046010250225663185, "learning_rate": 7.893494968547969e-07, "loss": 0.0091, "step": 378410 }, { "epoch": 4.043164698968962, "grad_norm": 0.004177434369921684, "learning_rate": 7.893357948983479e-07, "loss": 0.0026, "step": 378420 }, { "epoch": 4.043271542283241, "grad_norm": 0.042751509696245193, "learning_rate": 7.893220926152171e-07, "loss": 0.0655, "step": 378430 }, { "epoch": 4.043378385597522, "grad_norm": 0.37238574028015137, "learning_rate": 7.893083900054198e-07, "loss": 0.0035, "step": 378440 }, { "epoch": 4.043485228911801, "grad_norm": 0.2730032503604889, "learning_rate": 7.892946870689717e-07, "loss": 0.0043, "step": 378450 }, { "epoch": 4.0435920722260805, "grad_norm": 9.320733070373535, "learning_rate": 7.892809838058878e-07, "loss": 0.0034, "step": 378460 }, { "epoch": 4.04369891554036, "grad_norm": 0.23014318943023682, "learning_rate": 7.89267280216184e-07, "loss": 0.0006, "step": 378470 }, { "epoch": 4.043805758854639, "grad_norm": 0.00042788550490513444, "learning_rate": 7.892535762998753e-07, "loss": 0.0059, "step": 378480 }, { "epoch": 4.043912602168919, "grad_norm": 0.09604176133871078, "learning_rate": 7.892398720569777e-07, "loss": 0.0019, "step": 378490 }, { "epoch": 4.044019445483199, "grad_norm": 0.04852525144815445, "learning_rate": 7.892261674875065e-07, "loss": 0.0032, "step": 378500 }, { "epoch": 4.044126288797479, "grad_norm": 0.06853361427783966, "learning_rate": 7.892124625914771e-07, "loss": 0.0019, "step": 378510 }, { "epoch": 4.044233132111758, "grad_norm": 2.1812026500701904, "learning_rate": 7.891987573689049e-07, "loss": 0.019, "step": 378520 }, { "epoch": 4.044339975426038, "grad_norm": 9.738032341003418, "learning_rate": 7.891850518198058e-07, "loss": 0.0334, "step": 378530 }, { "epoch": 4.044446818740317, "grad_norm": 0.027708733454346657, "learning_rate": 7.891713459441945e-07, "loss": 0.0039, "step": 378540 }, { "epoch": 4.0445536620545965, "grad_norm": 0.10171892493963242, "learning_rate": 7.891576397420871e-07, "loss": 0.0066, "step": 378550 }, { "epoch": 4.044660505368877, "grad_norm": 0.018085293471813202, "learning_rate": 7.89143933213499e-07, "loss": 0.0044, "step": 378560 }, { "epoch": 4.044767348683156, "grad_norm": 0.5484538078308105, "learning_rate": 7.891302263584457e-07, "loss": 0.0043, "step": 378570 }, { "epoch": 4.044874191997436, "grad_norm": 0.06736228615045547, "learning_rate": 7.891165191769422e-07, "loss": 0.0025, "step": 378580 }, { "epoch": 4.044981035311715, "grad_norm": 1.2890007495880127, "learning_rate": 7.891028116690045e-07, "loss": 0.0016, "step": 378590 }, { "epoch": 4.045087878625995, "grad_norm": 0.004058843944221735, "learning_rate": 7.890891038346479e-07, "loss": 0.004, "step": 378600 }, { "epoch": 4.045194721940274, "grad_norm": 0.46297600865364075, "learning_rate": 7.890753956738881e-07, "loss": 0.0182, "step": 378610 }, { "epoch": 4.0453015652545545, "grad_norm": 0.1563778519630432, "learning_rate": 7.8906168718674e-07, "loss": 0.0012, "step": 378620 }, { "epoch": 4.045408408568834, "grad_norm": 8.303274154663086, "learning_rate": 7.890479783732196e-07, "loss": 0.0127, "step": 378630 }, { "epoch": 4.0455152518831135, "grad_norm": 0.007643744349479675, "learning_rate": 7.890342692333421e-07, "loss": 0.0054, "step": 378640 }, { "epoch": 4.045622095197393, "grad_norm": 0.0032369489781558514, "learning_rate": 7.890205597671232e-07, "loss": 0.0051, "step": 378650 }, { "epoch": 4.045728938511672, "grad_norm": 1.652673363685608, "learning_rate": 7.890068499745784e-07, "loss": 0.0273, "step": 378660 }, { "epoch": 4.045835781825952, "grad_norm": 0.005980499554425478, "learning_rate": 7.889931398557228e-07, "loss": 0.001, "step": 378670 }, { "epoch": 4.045942625140232, "grad_norm": 0.10384989529848099, "learning_rate": 7.889794294105722e-07, "loss": 0.0439, "step": 378680 }, { "epoch": 4.046049468454512, "grad_norm": 0.8409494757652283, "learning_rate": 7.889657186391422e-07, "loss": 0.0237, "step": 378690 }, { "epoch": 4.046156311768791, "grad_norm": 4.249356746673584, "learning_rate": 7.88952007541448e-07, "loss": 0.0058, "step": 378700 }, { "epoch": 4.0462631550830706, "grad_norm": 0.49628013372421265, "learning_rate": 7.88938296117505e-07, "loss": 0.0128, "step": 378710 }, { "epoch": 4.04636999839735, "grad_norm": 1.841793179512024, "learning_rate": 7.889245843673291e-07, "loss": 0.0129, "step": 378720 }, { "epoch": 4.0464768417116295, "grad_norm": 0.1424318253993988, "learning_rate": 7.889108722909354e-07, "loss": 0.0006, "step": 378730 }, { "epoch": 4.04658368502591, "grad_norm": 0.017366988584399223, "learning_rate": 7.888971598883396e-07, "loss": 0.0068, "step": 378740 }, { "epoch": 4.046690528340189, "grad_norm": 0.0025850480888038874, "learning_rate": 7.888834471595571e-07, "loss": 0.0123, "step": 378750 }, { "epoch": 4.046797371654469, "grad_norm": 0.713516354560852, "learning_rate": 7.888697341046034e-07, "loss": 0.0183, "step": 378760 }, { "epoch": 4.046904214968748, "grad_norm": 0.002755118999630213, "learning_rate": 7.888560207234938e-07, "loss": 0.0084, "step": 378770 }, { "epoch": 4.047011058283028, "grad_norm": 6.472047328948975, "learning_rate": 7.888423070162441e-07, "loss": 0.0091, "step": 378780 }, { "epoch": 4.047117901597307, "grad_norm": 0.9066492319107056, "learning_rate": 7.888285929828695e-07, "loss": 0.0027, "step": 378790 }, { "epoch": 4.0472247449115875, "grad_norm": 0.013057207688689232, "learning_rate": 7.888148786233858e-07, "loss": 0.0006, "step": 378800 }, { "epoch": 4.047331588225867, "grad_norm": 0.22997000813484192, "learning_rate": 7.888011639378083e-07, "loss": 0.0077, "step": 378810 }, { "epoch": 4.047438431540146, "grad_norm": 3.671761989593506, "learning_rate": 7.887874489261524e-07, "loss": 0.0124, "step": 378820 }, { "epoch": 4.047545274854426, "grad_norm": 0.10172372311353683, "learning_rate": 7.887737335884337e-07, "loss": 0.0134, "step": 378830 }, { "epoch": 4.047652118168705, "grad_norm": 0.04510209336876869, "learning_rate": 7.887600179246676e-07, "loss": 0.0202, "step": 378840 }, { "epoch": 4.047758961482986, "grad_norm": 0.16323545575141907, "learning_rate": 7.887463019348697e-07, "loss": 0.0127, "step": 378850 }, { "epoch": 4.047865804797265, "grad_norm": 0.0007474052254110575, "learning_rate": 7.887325856190554e-07, "loss": 0.0103, "step": 378860 }, { "epoch": 4.047972648111545, "grad_norm": 0.44587481021881104, "learning_rate": 7.887188689772403e-07, "loss": 0.0013, "step": 378870 }, { "epoch": 4.048079491425824, "grad_norm": 0.026791026815772057, "learning_rate": 7.887051520094397e-07, "loss": 0.0104, "step": 378880 }, { "epoch": 4.0481863347401035, "grad_norm": 0.2703830599784851, "learning_rate": 7.886914347156693e-07, "loss": 0.0095, "step": 378890 }, { "epoch": 4.048293178054383, "grad_norm": 0.2120426446199417, "learning_rate": 7.886777170959444e-07, "loss": 0.0197, "step": 378900 }, { "epoch": 4.048400021368663, "grad_norm": 2.5393612384796143, "learning_rate": 7.886639991502806e-07, "loss": 0.0146, "step": 378910 }, { "epoch": 4.048506864682943, "grad_norm": 0.001437517930753529, "learning_rate": 7.886502808786935e-07, "loss": 0.0022, "step": 378920 }, { "epoch": 4.048613707997222, "grad_norm": 0.0844738632440567, "learning_rate": 7.886365622811981e-07, "loss": 0.0074, "step": 378930 }, { "epoch": 4.048720551311502, "grad_norm": 0.0015464884927496314, "learning_rate": 7.886228433578106e-07, "loss": 0.0025, "step": 378940 }, { "epoch": 4.048827394625781, "grad_norm": 0.008848474361002445, "learning_rate": 7.886091241085458e-07, "loss": 0.01, "step": 378950 }, { "epoch": 4.048934237940061, "grad_norm": 0.7190715074539185, "learning_rate": 7.885954045334198e-07, "loss": 0.0277, "step": 378960 }, { "epoch": 4.049041081254341, "grad_norm": 0.0013981722295284271, "learning_rate": 7.885816846324475e-07, "loss": 0.037, "step": 378970 }, { "epoch": 4.04914792456862, "grad_norm": 0.0035024252720177174, "learning_rate": 7.885679644056451e-07, "loss": 0.0051, "step": 378980 }, { "epoch": 4.0492547678829, "grad_norm": 12.328797340393066, "learning_rate": 7.885542438530274e-07, "loss": 0.021, "step": 378990 }, { "epoch": 4.049361611197179, "grad_norm": 4.014017105102539, "learning_rate": 7.885405229746103e-07, "loss": 0.0043, "step": 379000 }, { "epoch": 4.049468454511459, "grad_norm": 5.866367340087891, "learning_rate": 7.885268017704092e-07, "loss": 0.0232, "step": 379010 }, { "epoch": 4.049575297825738, "grad_norm": 2.5033583641052246, "learning_rate": 7.885130802404394e-07, "loss": 0.028, "step": 379020 }, { "epoch": 4.049682141140019, "grad_norm": 5.221144199371338, "learning_rate": 7.884993583847166e-07, "loss": 0.0079, "step": 379030 }, { "epoch": 4.049788984454298, "grad_norm": 0.3310259282588959, "learning_rate": 7.884856362032564e-07, "loss": 0.0105, "step": 379040 }, { "epoch": 4.0498958277685775, "grad_norm": 0.0035553902853280306, "learning_rate": 7.88471913696074e-07, "loss": 0.008, "step": 379050 }, { "epoch": 4.050002671082857, "grad_norm": 0.05561083182692528, "learning_rate": 7.884581908631851e-07, "loss": 0.0106, "step": 379060 }, { "epoch": 4.050109514397136, "grad_norm": 5.416758060455322, "learning_rate": 7.884444677046052e-07, "loss": 0.0176, "step": 379070 }, { "epoch": 4.050216357711416, "grad_norm": 0.0030651413835585117, "learning_rate": 7.884307442203495e-07, "loss": 0.0016, "step": 379080 }, { "epoch": 4.050323201025696, "grad_norm": 2.0483248233795166, "learning_rate": 7.884170204104339e-07, "loss": 0.014, "step": 379090 }, { "epoch": 4.050430044339976, "grad_norm": 7.957766532897949, "learning_rate": 7.884032962748737e-07, "loss": 0.0329, "step": 379100 }, { "epoch": 4.050536887654255, "grad_norm": 0.0012213074369356036, "learning_rate": 7.883895718136844e-07, "loss": 0.0016, "step": 379110 }, { "epoch": 4.050643730968535, "grad_norm": 0.01583746075630188, "learning_rate": 7.883758470268815e-07, "loss": 0.0068, "step": 379120 }, { "epoch": 4.050750574282814, "grad_norm": 0.1755140721797943, "learning_rate": 7.883621219144805e-07, "loss": 0.0124, "step": 379130 }, { "epoch": 4.0508574175970935, "grad_norm": 0.4642311632633209, "learning_rate": 7.883483964764968e-07, "loss": 0.0015, "step": 379140 }, { "epoch": 4.050964260911374, "grad_norm": 0.15492084622383118, "learning_rate": 7.883346707129463e-07, "loss": 0.0025, "step": 379150 }, { "epoch": 4.051071104225653, "grad_norm": 0.008036399260163307, "learning_rate": 7.883209446238439e-07, "loss": 0.0006, "step": 379160 }, { "epoch": 4.051177947539933, "grad_norm": 0.015380723401904106, "learning_rate": 7.883072182092055e-07, "loss": 0.0064, "step": 379170 }, { "epoch": 4.051284790854212, "grad_norm": 9.37997055053711, "learning_rate": 7.882934914690464e-07, "loss": 0.003, "step": 379180 }, { "epoch": 4.051391634168492, "grad_norm": 7.5218048095703125, "learning_rate": 7.882797644033823e-07, "loss": 0.0088, "step": 379190 }, { "epoch": 4.051498477482771, "grad_norm": 2.2974705696105957, "learning_rate": 7.882660370122286e-07, "loss": 0.0029, "step": 379200 }, { "epoch": 4.0516053207970515, "grad_norm": 1.0827617645263672, "learning_rate": 7.882523092956008e-07, "loss": 0.0048, "step": 379210 }, { "epoch": 4.051712164111331, "grad_norm": 3.0224103927612305, "learning_rate": 7.882385812535143e-07, "loss": 0.0058, "step": 379220 }, { "epoch": 4.05181900742561, "grad_norm": 0.9510831236839294, "learning_rate": 7.882248528859847e-07, "loss": 0.0011, "step": 379230 }, { "epoch": 4.05192585073989, "grad_norm": 0.03227333724498749, "learning_rate": 7.882111241930276e-07, "loss": 0.0018, "step": 379240 }, { "epoch": 4.052032694054169, "grad_norm": 0.6608101725578308, "learning_rate": 7.881973951746582e-07, "loss": 0.0482, "step": 379250 }, { "epoch": 4.052139537368449, "grad_norm": 0.0020379521884024143, "learning_rate": 7.881836658308924e-07, "loss": 0.0067, "step": 379260 }, { "epoch": 4.052246380682729, "grad_norm": 0.0022665208671242, "learning_rate": 7.881699361617453e-07, "loss": 0.007, "step": 379270 }, { "epoch": 4.052353223997009, "grad_norm": 0.6603493094444275, "learning_rate": 7.881562061672327e-07, "loss": 0.0075, "step": 379280 }, { "epoch": 4.052460067311288, "grad_norm": 0.0014048543525859714, "learning_rate": 7.881424758473699e-07, "loss": 0.0012, "step": 379290 }, { "epoch": 4.0525669106255675, "grad_norm": 0.02996223419904709, "learning_rate": 7.881287452021726e-07, "loss": 0.0001, "step": 379300 }, { "epoch": 4.052673753939847, "grad_norm": 0.03011702559888363, "learning_rate": 7.881150142316562e-07, "loss": 0.0015, "step": 379310 }, { "epoch": 4.052780597254126, "grad_norm": 0.005661394447088242, "learning_rate": 7.881012829358361e-07, "loss": 0.0075, "step": 379320 }, { "epoch": 4.052887440568407, "grad_norm": 0.005661677569150925, "learning_rate": 7.88087551314728e-07, "loss": 0.0072, "step": 379330 }, { "epoch": 4.052994283882686, "grad_norm": 0.04900279641151428, "learning_rate": 7.880738193683472e-07, "loss": 0.0134, "step": 379340 }, { "epoch": 4.053101127196966, "grad_norm": 1.4487894773483276, "learning_rate": 7.880600870967095e-07, "loss": 0.0035, "step": 379350 }, { "epoch": 4.053207970511245, "grad_norm": 2.407985210418701, "learning_rate": 7.880463544998301e-07, "loss": 0.0091, "step": 379360 }, { "epoch": 4.053314813825525, "grad_norm": 1.7046165466308594, "learning_rate": 7.880326215777246e-07, "loss": 0.0053, "step": 379370 }, { "epoch": 4.053421657139804, "grad_norm": 0.7251260280609131, "learning_rate": 7.880188883304086e-07, "loss": 0.0053, "step": 379380 }, { "epoch": 4.053528500454084, "grad_norm": 0.005338282790035009, "learning_rate": 7.880051547578975e-07, "loss": 0.0022, "step": 379390 }, { "epoch": 4.053635343768364, "grad_norm": 0.006971965543925762, "learning_rate": 7.87991420860207e-07, "loss": 0.0024, "step": 379400 }, { "epoch": 4.053742187082643, "grad_norm": 0.010600737296044827, "learning_rate": 7.879776866373522e-07, "loss": 0.0069, "step": 379410 }, { "epoch": 4.053849030396923, "grad_norm": 0.3475434184074402, "learning_rate": 7.879639520893491e-07, "loss": 0.0026, "step": 379420 }, { "epoch": 4.053955873711202, "grad_norm": 0.23187048733234406, "learning_rate": 7.879502172162127e-07, "loss": 0.0008, "step": 379430 }, { "epoch": 4.054062717025482, "grad_norm": 0.0357787124812603, "learning_rate": 7.87936482017959e-07, "loss": 0.0053, "step": 379440 }, { "epoch": 4.054169560339762, "grad_norm": 0.9951178431510925, "learning_rate": 7.879227464946031e-07, "loss": 0.0058, "step": 379450 }, { "epoch": 4.0542764036540415, "grad_norm": 0.011574666947126389, "learning_rate": 7.879090106461608e-07, "loss": 0.0034, "step": 379460 }, { "epoch": 4.054383246968321, "grad_norm": 0.3952018916606903, "learning_rate": 7.878952744726475e-07, "loss": 0.0018, "step": 379470 }, { "epoch": 4.0544900902826, "grad_norm": 5.762959003448486, "learning_rate": 7.878815379740786e-07, "loss": 0.0099, "step": 379480 }, { "epoch": 4.05459693359688, "grad_norm": 0.25218963623046875, "learning_rate": 7.878678011504698e-07, "loss": 0.0051, "step": 379490 }, { "epoch": 4.054703776911159, "grad_norm": 0.0046058944426476955, "learning_rate": 7.878540640018366e-07, "loss": 0.014, "step": 379500 }, { "epoch": 4.05481062022544, "grad_norm": 0.001148204319179058, "learning_rate": 7.878403265281944e-07, "loss": 0.0147, "step": 379510 }, { "epoch": 4.054917463539719, "grad_norm": 0.016828343272209167, "learning_rate": 7.878265887295588e-07, "loss": 0.0005, "step": 379520 }, { "epoch": 4.055024306853999, "grad_norm": 0.005977421998977661, "learning_rate": 7.87812850605945e-07, "loss": 0.0032, "step": 379530 }, { "epoch": 4.055131150168278, "grad_norm": 10.6953125, "learning_rate": 7.877991121573689e-07, "loss": 0.0183, "step": 379540 }, { "epoch": 4.0552379934825575, "grad_norm": 1.2038135528564453, "learning_rate": 7.87785373383846e-07, "loss": 0.0354, "step": 379550 }, { "epoch": 4.055344836796838, "grad_norm": 7.63240385055542, "learning_rate": 7.877716342853915e-07, "loss": 0.0086, "step": 379560 }, { "epoch": 4.055451680111117, "grad_norm": 5.745739936828613, "learning_rate": 7.877578948620213e-07, "loss": 0.0092, "step": 379570 }, { "epoch": 4.055558523425397, "grad_norm": 0.1716192364692688, "learning_rate": 7.877441551137507e-07, "loss": 0.0021, "step": 379580 }, { "epoch": 4.055665366739676, "grad_norm": 0.0030204446520656347, "learning_rate": 7.87730415040595e-07, "loss": 0.0023, "step": 379590 }, { "epoch": 4.055772210053956, "grad_norm": 0.00513587798923254, "learning_rate": 7.877166746425702e-07, "loss": 0.0033, "step": 379600 }, { "epoch": 4.055879053368235, "grad_norm": 0.11263515800237656, "learning_rate": 7.877029339196915e-07, "loss": 0.0014, "step": 379610 }, { "epoch": 4.0559858966825155, "grad_norm": 0.004866987932473421, "learning_rate": 7.876891928719744e-07, "loss": 0.0003, "step": 379620 }, { "epoch": 4.056092739996795, "grad_norm": 0.016812587156891823, "learning_rate": 7.876754514994346e-07, "loss": 0.0084, "step": 379630 }, { "epoch": 4.056199583311074, "grad_norm": 0.022266842424869537, "learning_rate": 7.876617098020875e-07, "loss": 0.0046, "step": 379640 }, { "epoch": 4.056306426625354, "grad_norm": 0.009464272297918797, "learning_rate": 7.876479677799484e-07, "loss": 0.0785, "step": 379650 }, { "epoch": 4.056413269939633, "grad_norm": 0.022385166957974434, "learning_rate": 7.876342254330333e-07, "loss": 0.0155, "step": 379660 }, { "epoch": 4.056520113253913, "grad_norm": 0.0036382663529366255, "learning_rate": 7.876204827613573e-07, "loss": 0.0103, "step": 379670 }, { "epoch": 4.056626956568193, "grad_norm": 0.8294512629508972, "learning_rate": 7.87606739764936e-07, "loss": 0.0025, "step": 379680 }, { "epoch": 4.056733799882473, "grad_norm": 8.003650665283203, "learning_rate": 7.875929964437852e-07, "loss": 0.0032, "step": 379690 }, { "epoch": 4.056840643196752, "grad_norm": 0.0906447097659111, "learning_rate": 7.875792527979202e-07, "loss": 0.0015, "step": 379700 }, { "epoch": 4.0569474865110315, "grad_norm": 0.9832265377044678, "learning_rate": 7.875655088273563e-07, "loss": 0.0176, "step": 379710 }, { "epoch": 4.057054329825311, "grad_norm": 0.05655105412006378, "learning_rate": 7.875517645321094e-07, "loss": 0.0065, "step": 379720 }, { "epoch": 4.0571611731395905, "grad_norm": 0.043014854192733765, "learning_rate": 7.875380199121947e-07, "loss": 0.0113, "step": 379730 }, { "epoch": 4.057268016453871, "grad_norm": 1.2733982801437378, "learning_rate": 7.87524274967628e-07, "loss": 0.0122, "step": 379740 }, { "epoch": 4.05737485976815, "grad_norm": 3.171422243118286, "learning_rate": 7.875105296984247e-07, "loss": 0.0076, "step": 379750 }, { "epoch": 4.05748170308243, "grad_norm": 0.3532229959964752, "learning_rate": 7.874967841046003e-07, "loss": 0.0061, "step": 379760 }, { "epoch": 4.057588546396709, "grad_norm": 0.36618301272392273, "learning_rate": 7.874830381861702e-07, "loss": 0.0174, "step": 379770 }, { "epoch": 4.057695389710989, "grad_norm": 0.01321618165820837, "learning_rate": 7.874692919431503e-07, "loss": 0.0096, "step": 379780 }, { "epoch": 4.057802233025268, "grad_norm": 5.392817974090576, "learning_rate": 7.874555453755555e-07, "loss": 0.0097, "step": 379790 }, { "epoch": 4.0579090763395484, "grad_norm": 0.06779855489730835, "learning_rate": 7.874417984834021e-07, "loss": 0.0049, "step": 379800 }, { "epoch": 4.058015919653828, "grad_norm": 0.003964902833104134, "learning_rate": 7.874280512667051e-07, "loss": 0.0045, "step": 379810 }, { "epoch": 4.058122762968107, "grad_norm": 0.21202360093593597, "learning_rate": 7.8741430372548e-07, "loss": 0.0064, "step": 379820 }, { "epoch": 4.058229606282387, "grad_norm": 0.08620621263980865, "learning_rate": 7.874005558597425e-07, "loss": 0.0049, "step": 379830 }, { "epoch": 4.058336449596666, "grad_norm": 0.09952627122402191, "learning_rate": 7.873868076695083e-07, "loss": 0.009, "step": 379840 }, { "epoch": 4.058443292910946, "grad_norm": 0.012145166285336018, "learning_rate": 7.873730591547925e-07, "loss": 0.0003, "step": 379850 }, { "epoch": 4.058550136225226, "grad_norm": 0.8087370991706848, "learning_rate": 7.873593103156109e-07, "loss": 0.002, "step": 379860 }, { "epoch": 4.0586569795395055, "grad_norm": 0.10474852472543716, "learning_rate": 7.873455611519788e-07, "loss": 0.0098, "step": 379870 }, { "epoch": 4.058763822853785, "grad_norm": 4.600540637969971, "learning_rate": 7.873318116639121e-07, "loss": 0.0178, "step": 379880 }, { "epoch": 4.0588706661680645, "grad_norm": 1.067765474319458, "learning_rate": 7.873180618514259e-07, "loss": 0.0016, "step": 379890 }, { "epoch": 4.058977509482344, "grad_norm": 0.09171516448259354, "learning_rate": 7.87304311714536e-07, "loss": 0.006, "step": 379900 }, { "epoch": 4.059084352796623, "grad_norm": 10.672160148620605, "learning_rate": 7.872905612532579e-07, "loss": 0.0248, "step": 379910 }, { "epoch": 4.059191196110904, "grad_norm": 3.278489351272583, "learning_rate": 7.87276810467607e-07, "loss": 0.0095, "step": 379920 }, { "epoch": 4.059298039425183, "grad_norm": 0.13176876306533813, "learning_rate": 7.872630593575989e-07, "loss": 0.0074, "step": 379930 }, { "epoch": 4.059404882739463, "grad_norm": 0.006248040124773979, "learning_rate": 7.872493079232492e-07, "loss": 0.0075, "step": 379940 }, { "epoch": 4.059511726053742, "grad_norm": 0.030720097944140434, "learning_rate": 7.872355561645733e-07, "loss": 0.0179, "step": 379950 }, { "epoch": 4.059618569368022, "grad_norm": 0.007288938853889704, "learning_rate": 7.872218040815867e-07, "loss": 0.0166, "step": 379960 }, { "epoch": 4.059725412682301, "grad_norm": 0.006359360180795193, "learning_rate": 7.872080516743052e-07, "loss": 0.0146, "step": 379970 }, { "epoch": 4.059832255996581, "grad_norm": 0.005733430851250887, "learning_rate": 7.87194298942744e-07, "loss": 0.0126, "step": 379980 }, { "epoch": 4.059939099310861, "grad_norm": 0.13120315968990326, "learning_rate": 7.871805458869188e-07, "loss": 0.0025, "step": 379990 }, { "epoch": 4.06004594262514, "grad_norm": 0.016041167080402374, "learning_rate": 7.87166792506845e-07, "loss": 0.0338, "step": 380000 }, { "epoch": 4.06015278593942, "grad_norm": 1.1963911056518555, "learning_rate": 7.871530388025383e-07, "loss": 0.005, "step": 380010 }, { "epoch": 4.060259629253699, "grad_norm": 0.01583723910152912, "learning_rate": 7.871392847740142e-07, "loss": 0.0135, "step": 380020 }, { "epoch": 4.060366472567979, "grad_norm": 0.07603859156370163, "learning_rate": 7.871255304212881e-07, "loss": 0.0039, "step": 380030 }, { "epoch": 4.060473315882259, "grad_norm": 0.018641943112015724, "learning_rate": 7.871117757443755e-07, "loss": 0.0093, "step": 380040 }, { "epoch": 4.0605801591965385, "grad_norm": 0.030606167390942574, "learning_rate": 7.870980207432922e-07, "loss": 0.0042, "step": 380050 }, { "epoch": 4.060687002510818, "grad_norm": 0.14492471516132355, "learning_rate": 7.870842654180535e-07, "loss": 0.0021, "step": 380060 }, { "epoch": 4.060793845825097, "grad_norm": 12.698592185974121, "learning_rate": 7.870705097686751e-07, "loss": 0.0707, "step": 380070 }, { "epoch": 4.060900689139377, "grad_norm": 0.04789137467741966, "learning_rate": 7.870567537951722e-07, "loss": 0.0037, "step": 380080 }, { "epoch": 4.061007532453656, "grad_norm": 9.502359390258789, "learning_rate": 7.870429974975608e-07, "loss": 0.0198, "step": 380090 }, { "epoch": 4.061114375767937, "grad_norm": 0.36022236943244934, "learning_rate": 7.870292408758561e-07, "loss": 0.0012, "step": 380100 }, { "epoch": 4.061221219082216, "grad_norm": 0.38720741868019104, "learning_rate": 7.870154839300737e-07, "loss": 0.0195, "step": 380110 }, { "epoch": 4.061328062396496, "grad_norm": 9.388972282409668, "learning_rate": 7.870017266602291e-07, "loss": 0.0095, "step": 380120 }, { "epoch": 4.061434905710775, "grad_norm": 0.012931044213473797, "learning_rate": 7.869879690663381e-07, "loss": 0.0037, "step": 380130 }, { "epoch": 4.0615417490250545, "grad_norm": 0.6560770869255066, "learning_rate": 7.869742111484159e-07, "loss": 0.0118, "step": 380140 }, { "epoch": 4.061648592339334, "grad_norm": 0.017635295167565346, "learning_rate": 7.869604529064782e-07, "loss": 0.0011, "step": 380150 }, { "epoch": 4.061755435653614, "grad_norm": 0.09912315756082535, "learning_rate": 7.869466943405403e-07, "loss": 0.0177, "step": 380160 }, { "epoch": 4.061862278967894, "grad_norm": 0.019160395488142967, "learning_rate": 7.86932935450618e-07, "loss": 0.0007, "step": 380170 }, { "epoch": 4.061969122282173, "grad_norm": 0.012841266579926014, "learning_rate": 7.869191762367269e-07, "loss": 0.0004, "step": 380180 }, { "epoch": 4.062075965596453, "grad_norm": 0.6178133487701416, "learning_rate": 7.869054166988822e-07, "loss": 0.001, "step": 380190 }, { "epoch": 4.062182808910732, "grad_norm": 15.74471664428711, "learning_rate": 7.868916568371e-07, "loss": 0.0211, "step": 380200 }, { "epoch": 4.062289652225012, "grad_norm": 0.003988534677773714, "learning_rate": 7.868778966513951e-07, "loss": 0.004, "step": 380210 }, { "epoch": 4.062396495539292, "grad_norm": 12.413084983825684, "learning_rate": 7.868641361417835e-07, "loss": 0.0279, "step": 380220 }, { "epoch": 4.062503338853571, "grad_norm": 0.004405150189995766, "learning_rate": 7.868503753082807e-07, "loss": 0.0001, "step": 380230 }, { "epoch": 4.062610182167851, "grad_norm": 0.5340983867645264, "learning_rate": 7.868366141509022e-07, "loss": 0.002, "step": 380240 }, { "epoch": 4.06271702548213, "grad_norm": 0.024759527295827866, "learning_rate": 7.868228526696633e-07, "loss": 0.0181, "step": 380250 }, { "epoch": 4.06282386879641, "grad_norm": 0.012899251654744148, "learning_rate": 7.868090908645799e-07, "loss": 0.0044, "step": 380260 }, { "epoch": 4.06293071211069, "grad_norm": 0.0018176422454416752, "learning_rate": 7.867953287356674e-07, "loss": 0.0167, "step": 380270 }, { "epoch": 4.06303755542497, "grad_norm": 13.300958633422852, "learning_rate": 7.867815662829414e-07, "loss": 0.0066, "step": 380280 }, { "epoch": 4.063144398739249, "grad_norm": 0.3686738908290863, "learning_rate": 7.867678035064172e-07, "loss": 0.0005, "step": 380290 }, { "epoch": 4.0632512420535285, "grad_norm": 0.0035131347831338644, "learning_rate": 7.867540404061105e-07, "loss": 0.0018, "step": 380300 }, { "epoch": 4.063358085367808, "grad_norm": 8.11133861541748, "learning_rate": 7.867402769820371e-07, "loss": 0.0109, "step": 380310 }, { "epoch": 4.063464928682087, "grad_norm": 0.085118867456913, "learning_rate": 7.867265132342121e-07, "loss": 0.0211, "step": 380320 }, { "epoch": 4.063571771996367, "grad_norm": 0.0022071741987019777, "learning_rate": 7.867127491626513e-07, "loss": 0.0045, "step": 380330 }, { "epoch": 4.063678615310647, "grad_norm": 0.012833448126912117, "learning_rate": 7.866989847673701e-07, "loss": 0.0012, "step": 380340 }, { "epoch": 4.063785458624927, "grad_norm": 0.005251913331449032, "learning_rate": 7.866852200483841e-07, "loss": 0.0073, "step": 380350 }, { "epoch": 4.063892301939206, "grad_norm": 0.06312178075313568, "learning_rate": 7.866714550057088e-07, "loss": 0.0083, "step": 380360 }, { "epoch": 4.063999145253486, "grad_norm": 5.647246360778809, "learning_rate": 7.8665768963936e-07, "loss": 0.0093, "step": 380370 }, { "epoch": 4.064105988567765, "grad_norm": 0.007101124618202448, "learning_rate": 7.866439239493528e-07, "loss": 0.0129, "step": 380380 }, { "epoch": 4.064212831882045, "grad_norm": 0.05176112428307533, "learning_rate": 7.86630157935703e-07, "loss": 0.0046, "step": 380390 }, { "epoch": 4.064319675196325, "grad_norm": 0.0018665401730686426, "learning_rate": 7.866163915984263e-07, "loss": 0.0033, "step": 380400 }, { "epoch": 4.064426518510604, "grad_norm": 0.002200489863753319, "learning_rate": 7.866026249375379e-07, "loss": 0.0124, "step": 380410 }, { "epoch": 4.064533361824884, "grad_norm": 0.015065492130815983, "learning_rate": 7.865888579530536e-07, "loss": 0.0056, "step": 380420 }, { "epoch": 4.064640205139163, "grad_norm": 0.0028126512188464403, "learning_rate": 7.865750906449889e-07, "loss": 0.0027, "step": 380430 }, { "epoch": 4.064747048453443, "grad_norm": 0.9491250514984131, "learning_rate": 7.865613230133592e-07, "loss": 0.041, "step": 380440 }, { "epoch": 4.064853891767723, "grad_norm": 0.32561466097831726, "learning_rate": 7.865475550581801e-07, "loss": 0.0369, "step": 380450 }, { "epoch": 4.0649607350820025, "grad_norm": 0.5302801132202148, "learning_rate": 7.865337867794672e-07, "loss": 0.0002, "step": 380460 }, { "epoch": 4.065067578396282, "grad_norm": 0.005583649035543203, "learning_rate": 7.865200181772361e-07, "loss": 0.0004, "step": 380470 }, { "epoch": 4.065174421710561, "grad_norm": 0.0026600302662700415, "learning_rate": 7.865062492515022e-07, "loss": 0.0112, "step": 380480 }, { "epoch": 4.065281265024841, "grad_norm": 2.73350191116333, "learning_rate": 7.864924800022812e-07, "loss": 0.01, "step": 380490 }, { "epoch": 4.06538810833912, "grad_norm": 0.2142282873392105, "learning_rate": 7.864787104295885e-07, "loss": 0.0067, "step": 380500 }, { "epoch": 4.065494951653401, "grad_norm": 0.05249708890914917, "learning_rate": 7.864649405334397e-07, "loss": 0.0014, "step": 380510 }, { "epoch": 4.06560179496768, "grad_norm": 9.657905578613281, "learning_rate": 7.864511703138503e-07, "loss": 0.0131, "step": 380520 }, { "epoch": 4.06570863828196, "grad_norm": 0.20911118388175964, "learning_rate": 7.86437399770836e-07, "loss": 0.0156, "step": 380530 }, { "epoch": 4.065815481596239, "grad_norm": 0.5160505175590515, "learning_rate": 7.864236289044124e-07, "loss": 0.0152, "step": 380540 }, { "epoch": 4.0659223249105185, "grad_norm": 0.0009701244416646659, "learning_rate": 7.864098577145948e-07, "loss": 0.0079, "step": 380550 }, { "epoch": 4.066029168224798, "grad_norm": 0.004349473863840103, "learning_rate": 7.863960862013987e-07, "loss": 0.0132, "step": 380560 }, { "epoch": 4.066136011539078, "grad_norm": 0.0003817473188973963, "learning_rate": 7.863823143648401e-07, "loss": 0.0011, "step": 380570 }, { "epoch": 4.066242854853358, "grad_norm": 4.350709438323975, "learning_rate": 7.86368542204934e-07, "loss": 0.0006, "step": 380580 }, { "epoch": 4.066349698167637, "grad_norm": 0.007367112208157778, "learning_rate": 7.863547697216962e-07, "loss": 0.0012, "step": 380590 }, { "epoch": 4.066456541481917, "grad_norm": 0.0031214384362101555, "learning_rate": 7.863409969151424e-07, "loss": 0.0029, "step": 380600 }, { "epoch": 4.066563384796196, "grad_norm": 0.04305819794535637, "learning_rate": 7.863272237852879e-07, "loss": 0.0003, "step": 380610 }, { "epoch": 4.066670228110476, "grad_norm": 0.05956088379025459, "learning_rate": 7.863134503321485e-07, "loss": 0.0035, "step": 380620 }, { "epoch": 4.066777071424756, "grad_norm": 0.6701927185058594, "learning_rate": 7.862996765557395e-07, "loss": 0.018, "step": 380630 }, { "epoch": 4.066883914739035, "grad_norm": 0.5798026323318481, "learning_rate": 7.862859024560765e-07, "loss": 0.0118, "step": 380640 }, { "epoch": 4.066990758053315, "grad_norm": 1.1006172895431519, "learning_rate": 7.862721280331751e-07, "loss": 0.0211, "step": 380650 }, { "epoch": 4.067097601367594, "grad_norm": 1.4517033100128174, "learning_rate": 7.86258353287051e-07, "loss": 0.0345, "step": 380660 }, { "epoch": 4.067204444681874, "grad_norm": 0.01772177778184414, "learning_rate": 7.862445782177195e-07, "loss": 0.0053, "step": 380670 }, { "epoch": 4.067311287996153, "grad_norm": 2.53765869140625, "learning_rate": 7.862308028251962e-07, "loss": 0.0272, "step": 380680 }, { "epoch": 4.067418131310434, "grad_norm": 0.02643473818898201, "learning_rate": 7.86217027109497e-07, "loss": 0.0055, "step": 380690 }, { "epoch": 4.067524974624713, "grad_norm": 0.023421766236424446, "learning_rate": 7.862032510706369e-07, "loss": 0.0152, "step": 380700 }, { "epoch": 4.0676318179389925, "grad_norm": 0.8037922382354736, "learning_rate": 7.861894747086318e-07, "loss": 0.0076, "step": 380710 }, { "epoch": 4.067738661253272, "grad_norm": 1.609498143196106, "learning_rate": 7.861756980234973e-07, "loss": 0.008, "step": 380720 }, { "epoch": 4.0678455045675515, "grad_norm": 2.469022512435913, "learning_rate": 7.861619210152487e-07, "loss": 0.0138, "step": 380730 }, { "epoch": 4.067952347881831, "grad_norm": 0.003695062827318907, "learning_rate": 7.861481436839017e-07, "loss": 0.0088, "step": 380740 }, { "epoch": 4.068059191196111, "grad_norm": 1.5821340084075928, "learning_rate": 7.861343660294719e-07, "loss": 0.0371, "step": 380750 }, { "epoch": 4.068166034510391, "grad_norm": 0.07513145357370377, "learning_rate": 7.861205880519748e-07, "loss": 0.0139, "step": 380760 }, { "epoch": 4.06827287782467, "grad_norm": 0.0030445766169577837, "learning_rate": 7.86106809751426e-07, "loss": 0.003, "step": 380770 }, { "epoch": 4.06837972113895, "grad_norm": 0.02440536767244339, "learning_rate": 7.860930311278411e-07, "loss": 0.0058, "step": 380780 }, { "epoch": 4.068486564453229, "grad_norm": 0.00020343590585980564, "learning_rate": 7.860792521812354e-07, "loss": 0.0168, "step": 380790 }, { "epoch": 4.0685934077675086, "grad_norm": 0.7482175230979919, "learning_rate": 7.860654729116247e-07, "loss": 0.0052, "step": 380800 }, { "epoch": 4.068700251081789, "grad_norm": 0.061749380081892014, "learning_rate": 7.860516933190245e-07, "loss": 0.0112, "step": 380810 }, { "epoch": 4.068807094396068, "grad_norm": 0.0022677055094391108, "learning_rate": 7.860379134034502e-07, "loss": 0.0048, "step": 380820 }, { "epoch": 4.068913937710348, "grad_norm": 10.271819114685059, "learning_rate": 7.860241331649178e-07, "loss": 0.0621, "step": 380830 }, { "epoch": 4.069020781024627, "grad_norm": 10.877334594726562, "learning_rate": 7.860103526034423e-07, "loss": 0.0055, "step": 380840 }, { "epoch": 4.069127624338907, "grad_norm": 0.8033804893493652, "learning_rate": 7.859965717190396e-07, "loss": 0.0287, "step": 380850 }, { "epoch": 4.069234467653186, "grad_norm": 0.00748052354902029, "learning_rate": 7.859827905117252e-07, "loss": 0.0113, "step": 380860 }, { "epoch": 4.0693413109674665, "grad_norm": 6.022890090942383, "learning_rate": 7.859690089815148e-07, "loss": 0.0138, "step": 380870 }, { "epoch": 4.069448154281746, "grad_norm": 0.012211974710226059, "learning_rate": 7.859552271284235e-07, "loss": 0.0508, "step": 380880 }, { "epoch": 4.0695549975960255, "grad_norm": 0.019751504063606262, "learning_rate": 7.859414449524674e-07, "loss": 0.0246, "step": 380890 }, { "epoch": 4.069661840910305, "grad_norm": 4.495893955230713, "learning_rate": 7.859276624536617e-07, "loss": 0.0052, "step": 380900 }, { "epoch": 4.069768684224584, "grad_norm": 0.016637742519378662, "learning_rate": 7.859138796320221e-07, "loss": 0.0034, "step": 380910 }, { "epoch": 4.069875527538864, "grad_norm": 0.34373095631599426, "learning_rate": 7.859000964875644e-07, "loss": 0.0032, "step": 380920 }, { "epoch": 4.069982370853144, "grad_norm": 0.01660417579114437, "learning_rate": 7.858863130203035e-07, "loss": 0.0027, "step": 380930 }, { "epoch": 4.070089214167424, "grad_norm": 0.626067042350769, "learning_rate": 7.858725292302557e-07, "loss": 0.0015, "step": 380940 }, { "epoch": 4.070196057481703, "grad_norm": 0.08298629522323608, "learning_rate": 7.858587451174361e-07, "loss": 0.0025, "step": 380950 }, { "epoch": 4.070302900795983, "grad_norm": 0.02518892101943493, "learning_rate": 7.858449606818605e-07, "loss": 0.0072, "step": 380960 }, { "epoch": 4.070409744110262, "grad_norm": 0.0133716044947505, "learning_rate": 7.858311759235443e-07, "loss": 0.005, "step": 380970 }, { "epoch": 4.0705165874245415, "grad_norm": 4.901330471038818, "learning_rate": 7.858173908425031e-07, "loss": 0.0135, "step": 380980 }, { "epoch": 4.070623430738822, "grad_norm": 6.899816513061523, "learning_rate": 7.858036054387525e-07, "loss": 0.0091, "step": 380990 }, { "epoch": 4.070730274053101, "grad_norm": 1.2782185077667236, "learning_rate": 7.857898197123081e-07, "loss": 0.0138, "step": 381000 }, { "epoch": 4.070837117367381, "grad_norm": 0.49077698588371277, "learning_rate": 7.857760336631853e-07, "loss": 0.0557, "step": 381010 }, { "epoch": 4.07094396068166, "grad_norm": 0.461283415555954, "learning_rate": 7.857622472914e-07, "loss": 0.0034, "step": 381020 }, { "epoch": 4.07105080399594, "grad_norm": 2.3451123237609863, "learning_rate": 7.857484605969674e-07, "loss": 0.0116, "step": 381030 }, { "epoch": 4.071157647310219, "grad_norm": 0.005961196962743998, "learning_rate": 7.857346735799033e-07, "loss": 0.0076, "step": 381040 }, { "epoch": 4.0712644906244995, "grad_norm": 0.3979695439338684, "learning_rate": 7.857208862402231e-07, "loss": 0.0052, "step": 381050 }, { "epoch": 4.071371333938779, "grad_norm": 0.31198394298553467, "learning_rate": 7.857070985779426e-07, "loss": 0.0028, "step": 381060 }, { "epoch": 4.071478177253058, "grad_norm": 0.0021270920988172293, "learning_rate": 7.856933105930771e-07, "loss": 0.0025, "step": 381070 }, { "epoch": 4.071585020567338, "grad_norm": 0.1623070389032364, "learning_rate": 7.856795222856423e-07, "loss": 0.001, "step": 381080 }, { "epoch": 4.071691863881617, "grad_norm": 0.3567220866680145, "learning_rate": 7.856657336556538e-07, "loss": 0.0042, "step": 381090 }, { "epoch": 4.071798707195898, "grad_norm": 0.058370910584926605, "learning_rate": 7.856519447031271e-07, "loss": 0.002, "step": 381100 }, { "epoch": 4.071905550510177, "grad_norm": 0.15875931084156036, "learning_rate": 7.856381554280778e-07, "loss": 0.0099, "step": 381110 }, { "epoch": 4.072012393824457, "grad_norm": 0.06015968322753906, "learning_rate": 7.856243658305215e-07, "loss": 0.0009, "step": 381120 }, { "epoch": 4.072119237138736, "grad_norm": 0.03215743601322174, "learning_rate": 7.856105759104737e-07, "loss": 0.0079, "step": 381130 }, { "epoch": 4.0722260804530155, "grad_norm": 0.2167959064245224, "learning_rate": 7.8559678566795e-07, "loss": 0.0045, "step": 381140 }, { "epoch": 4.072332923767295, "grad_norm": 0.0012515849666669965, "learning_rate": 7.855829951029659e-07, "loss": 0.0023, "step": 381150 }, { "epoch": 4.072439767081575, "grad_norm": 5.307242393493652, "learning_rate": 7.855692042155372e-07, "loss": 0.0056, "step": 381160 }, { "epoch": 4.072546610395855, "grad_norm": 0.017120549455285072, "learning_rate": 7.855554130056791e-07, "loss": 0.002, "step": 381170 }, { "epoch": 4.072653453710134, "grad_norm": 0.8435091972351074, "learning_rate": 7.855416214734076e-07, "loss": 0.0144, "step": 381180 }, { "epoch": 4.072760297024414, "grad_norm": 2.296307325363159, "learning_rate": 7.85527829618738e-07, "loss": 0.0022, "step": 381190 }, { "epoch": 4.072867140338693, "grad_norm": 7.749426364898682, "learning_rate": 7.85514037441686e-07, "loss": 0.0066, "step": 381200 }, { "epoch": 4.072973983652973, "grad_norm": 0.24366602301597595, "learning_rate": 7.85500244942267e-07, "loss": 0.0064, "step": 381210 }, { "epoch": 4.073080826967253, "grad_norm": 0.5113434791564941, "learning_rate": 7.854864521204967e-07, "loss": 0.0015, "step": 381220 }, { "epoch": 4.073187670281532, "grad_norm": 0.008641036227345467, "learning_rate": 7.854726589763906e-07, "loss": 0.0007, "step": 381230 }, { "epoch": 4.073294513595812, "grad_norm": 0.019714584574103355, "learning_rate": 7.854588655099645e-07, "loss": 0.0213, "step": 381240 }, { "epoch": 4.073401356910091, "grad_norm": 0.0009606950334273279, "learning_rate": 7.854450717212336e-07, "loss": 0.0101, "step": 381250 }, { "epoch": 4.073508200224371, "grad_norm": 0.051395710557699203, "learning_rate": 7.854312776102136e-07, "loss": 0.0057, "step": 381260 }, { "epoch": 4.07361504353865, "grad_norm": 0.05521904304623604, "learning_rate": 7.854174831769204e-07, "loss": 0.0025, "step": 381270 }, { "epoch": 4.073721886852931, "grad_norm": 0.004308345727622509, "learning_rate": 7.854036884213691e-07, "loss": 0.0027, "step": 381280 }, { "epoch": 4.07382873016721, "grad_norm": 8.540968894958496, "learning_rate": 7.853898933435756e-07, "loss": 0.0079, "step": 381290 }, { "epoch": 4.0739355734814895, "grad_norm": 0.01430437620729208, "learning_rate": 7.853760979435554e-07, "loss": 0.0047, "step": 381300 }, { "epoch": 4.074042416795769, "grad_norm": 0.01263928972184658, "learning_rate": 7.853623022213241e-07, "loss": 0.0059, "step": 381310 }, { "epoch": 4.074149260110048, "grad_norm": 9.328753471374512, "learning_rate": 7.853485061768971e-07, "loss": 0.0089, "step": 381320 }, { "epoch": 4.074256103424328, "grad_norm": 0.13680905103683472, "learning_rate": 7.853347098102901e-07, "loss": 0.0086, "step": 381330 }, { "epoch": 4.074362946738608, "grad_norm": 2.148590087890625, "learning_rate": 7.853209131215186e-07, "loss": 0.0057, "step": 381340 }, { "epoch": 4.074469790052888, "grad_norm": 0.16958662867546082, "learning_rate": 7.853071161105984e-07, "loss": 0.0054, "step": 381350 }, { "epoch": 4.074576633367167, "grad_norm": 0.8180366158485413, "learning_rate": 7.852933187775449e-07, "loss": 0.0021, "step": 381360 }, { "epoch": 4.074683476681447, "grad_norm": 3.3015565872192383, "learning_rate": 7.852795211223738e-07, "loss": 0.0023, "step": 381370 }, { "epoch": 4.074790319995726, "grad_norm": 0.037757840007543564, "learning_rate": 7.852657231451005e-07, "loss": 0.0133, "step": 381380 }, { "epoch": 4.0748971633100055, "grad_norm": 0.02745053730905056, "learning_rate": 7.852519248457405e-07, "loss": 0.0071, "step": 381390 }, { "epoch": 4.075004006624286, "grad_norm": 2.8407094478607178, "learning_rate": 7.852381262243098e-07, "loss": 0.0151, "step": 381400 }, { "epoch": 4.075110849938565, "grad_norm": 0.01785103976726532, "learning_rate": 7.852243272808238e-07, "loss": 0.0017, "step": 381410 }, { "epoch": 4.075217693252845, "grad_norm": 0.37904900312423706, "learning_rate": 7.852105280152977e-07, "loss": 0.0028, "step": 381420 }, { "epoch": 4.075324536567124, "grad_norm": 8.077923774719238, "learning_rate": 7.851967284277476e-07, "loss": 0.0088, "step": 381430 }, { "epoch": 4.075431379881404, "grad_norm": 1.461173415184021, "learning_rate": 7.851829285181888e-07, "loss": 0.0109, "step": 381440 }, { "epoch": 4.075538223195683, "grad_norm": 1.0372040271759033, "learning_rate": 7.85169128286637e-07, "loss": 0.0011, "step": 381450 }, { "epoch": 4.0756450665099635, "grad_norm": 0.04958156868815422, "learning_rate": 7.851553277331078e-07, "loss": 0.0161, "step": 381460 }, { "epoch": 4.075751909824243, "grad_norm": 7.439751148223877, "learning_rate": 7.851415268576165e-07, "loss": 0.0129, "step": 381470 }, { "epoch": 4.075858753138522, "grad_norm": 0.6197264194488525, "learning_rate": 7.85127725660179e-07, "loss": 0.0058, "step": 381480 }, { "epoch": 4.075965596452802, "grad_norm": 0.018463537096977234, "learning_rate": 7.851139241408107e-07, "loss": 0.0076, "step": 381490 }, { "epoch": 4.076072439767081, "grad_norm": 0.14891240000724792, "learning_rate": 7.851001222995274e-07, "loss": 0.015, "step": 381500 }, { "epoch": 4.076179283081361, "grad_norm": 0.0032178335823118687, "learning_rate": 7.850863201363445e-07, "loss": 0.017, "step": 381510 }, { "epoch": 4.076286126395641, "grad_norm": 0.003216054756194353, "learning_rate": 7.850725176512775e-07, "loss": 0.0087, "step": 381520 }, { "epoch": 4.076392969709921, "grad_norm": 2.9206910133361816, "learning_rate": 7.850587148443423e-07, "loss": 0.0419, "step": 381530 }, { "epoch": 4.0764998130242, "grad_norm": 0.04561484605073929, "learning_rate": 7.850449117155542e-07, "loss": 0.0055, "step": 381540 }, { "epoch": 4.0766066563384795, "grad_norm": 0.019990280270576477, "learning_rate": 7.85031108264929e-07, "loss": 0.0141, "step": 381550 }, { "epoch": 4.076713499652759, "grad_norm": 11.13584041595459, "learning_rate": 7.85017304492482e-07, "loss": 0.0126, "step": 381560 }, { "epoch": 4.076820342967038, "grad_norm": 0.017269086092710495, "learning_rate": 7.85003500398229e-07, "loss": 0.0029, "step": 381570 }, { "epoch": 4.076927186281319, "grad_norm": 0.007073657121509314, "learning_rate": 7.849896959821855e-07, "loss": 0.0001, "step": 381580 }, { "epoch": 4.077034029595598, "grad_norm": 0.08888091146945953, "learning_rate": 7.849758912443672e-07, "loss": 0.0035, "step": 381590 }, { "epoch": 4.077140872909878, "grad_norm": 0.6617403030395508, "learning_rate": 7.849620861847897e-07, "loss": 0.0024, "step": 381600 }, { "epoch": 4.077247716224157, "grad_norm": 0.000978930271230638, "learning_rate": 7.849482808034685e-07, "loss": 0.0169, "step": 381610 }, { "epoch": 4.077354559538437, "grad_norm": 0.007939756847918034, "learning_rate": 7.849344751004189e-07, "loss": 0.0038, "step": 381620 }, { "epoch": 4.077461402852716, "grad_norm": 0.037357084453105927, "learning_rate": 7.849206690756569e-07, "loss": 0.0217, "step": 381630 }, { "epoch": 4.077568246166996, "grad_norm": 0.16310030221939087, "learning_rate": 7.849068627291982e-07, "loss": 0.0007, "step": 381640 }, { "epoch": 4.077675089481276, "grad_norm": 0.0036055520176887512, "learning_rate": 7.848930560610578e-07, "loss": 0.0556, "step": 381650 }, { "epoch": 4.077781932795555, "grad_norm": 0.0014099347172304988, "learning_rate": 7.848792490712519e-07, "loss": 0.0203, "step": 381660 }, { "epoch": 4.077888776109835, "grad_norm": 3.5185413360595703, "learning_rate": 7.848654417597955e-07, "loss": 0.0063, "step": 381670 }, { "epoch": 4.077995619424114, "grad_norm": 3.334519863128662, "learning_rate": 7.848516341267049e-07, "loss": 0.0082, "step": 381680 }, { "epoch": 4.078102462738394, "grad_norm": 0.006041206419467926, "learning_rate": 7.848378261719952e-07, "loss": 0.007, "step": 381690 }, { "epoch": 4.078209306052674, "grad_norm": 4.919384002685547, "learning_rate": 7.848240178956819e-07, "loss": 0.0096, "step": 381700 }, { "epoch": 4.0783161493669535, "grad_norm": 0.07578343152999878, "learning_rate": 7.848102092977811e-07, "loss": 0.0129, "step": 381710 }, { "epoch": 4.078422992681233, "grad_norm": 0.008045597933232784, "learning_rate": 7.847964003783078e-07, "loss": 0.0066, "step": 381720 }, { "epoch": 4.078529835995512, "grad_norm": 0.15621215105056763, "learning_rate": 7.847825911372779e-07, "loss": 0.0143, "step": 381730 }, { "epoch": 4.078636679309792, "grad_norm": 0.42677539587020874, "learning_rate": 7.847687815747071e-07, "loss": 0.0007, "step": 381740 }, { "epoch": 4.078743522624071, "grad_norm": 0.026716962456703186, "learning_rate": 7.847549716906109e-07, "loss": 0.0144, "step": 381750 }, { "epoch": 4.078850365938352, "grad_norm": 1.8968249559402466, "learning_rate": 7.847411614850046e-07, "loss": 0.0048, "step": 381760 }, { "epoch": 4.078957209252631, "grad_norm": 0.1406378149986267, "learning_rate": 7.847273509579042e-07, "loss": 0.0007, "step": 381770 }, { "epoch": 4.079064052566911, "grad_norm": 0.5904219150543213, "learning_rate": 7.847135401093252e-07, "loss": 0.0094, "step": 381780 }, { "epoch": 4.07917089588119, "grad_norm": 0.0030222234781831503, "learning_rate": 7.846997289392829e-07, "loss": 0.0063, "step": 381790 }, { "epoch": 4.0792777391954695, "grad_norm": 0.005033873487263918, "learning_rate": 7.846859174477933e-07, "loss": 0.0037, "step": 381800 }, { "epoch": 4.07938458250975, "grad_norm": 0.0019678776152431965, "learning_rate": 7.846721056348717e-07, "loss": 0.0062, "step": 381810 }, { "epoch": 4.079491425824029, "grad_norm": 1.8942705392837524, "learning_rate": 7.846582935005339e-07, "loss": 0.0099, "step": 381820 }, { "epoch": 4.079598269138309, "grad_norm": 3.1250758171081543, "learning_rate": 7.846444810447954e-07, "loss": 0.005, "step": 381830 }, { "epoch": 4.079705112452588, "grad_norm": 1.0426737070083618, "learning_rate": 7.846306682676718e-07, "loss": 0.0088, "step": 381840 }, { "epoch": 4.079811955766868, "grad_norm": 0.05939071625471115, "learning_rate": 7.846168551691787e-07, "loss": 0.0244, "step": 381850 }, { "epoch": 4.079918799081147, "grad_norm": 2.3308634757995605, "learning_rate": 7.846030417493317e-07, "loss": 0.0085, "step": 381860 }, { "epoch": 4.0800256423954275, "grad_norm": 0.0038121433462947607, "learning_rate": 7.845892280081463e-07, "loss": 0.0001, "step": 381870 }, { "epoch": 4.080132485709707, "grad_norm": 0.017554115504026413, "learning_rate": 7.845754139456382e-07, "loss": 0.0007, "step": 381880 }, { "epoch": 4.0802393290239864, "grad_norm": 0.0020304457284510136, "learning_rate": 7.84561599561823e-07, "loss": 0.0021, "step": 381890 }, { "epoch": 4.080346172338266, "grad_norm": 1.8417906761169434, "learning_rate": 7.845477848567162e-07, "loss": 0.0035, "step": 381900 }, { "epoch": 4.080453015652545, "grad_norm": 0.0742780864238739, "learning_rate": 7.845339698303337e-07, "loss": 0.0071, "step": 381910 }, { "epoch": 4.080559858966825, "grad_norm": 6.079065799713135, "learning_rate": 7.845201544826907e-07, "loss": 0.0148, "step": 381920 }, { "epoch": 4.080666702281105, "grad_norm": 0.013658842071890831, "learning_rate": 7.845063388138029e-07, "loss": 0.0178, "step": 381930 }, { "epoch": 4.080773545595385, "grad_norm": 5.7787604331970215, "learning_rate": 7.844925228236861e-07, "loss": 0.015, "step": 381940 }, { "epoch": 4.080880388909664, "grad_norm": 4.890148162841797, "learning_rate": 7.844787065123557e-07, "loss": 0.0221, "step": 381950 }, { "epoch": 4.0809872322239436, "grad_norm": 1.351906418800354, "learning_rate": 7.844648898798274e-07, "loss": 0.0032, "step": 381960 }, { "epoch": 4.081094075538223, "grad_norm": 2.6226677894592285, "learning_rate": 7.844510729261169e-07, "loss": 0.0029, "step": 381970 }, { "epoch": 4.0812009188525025, "grad_norm": 9.22649097442627, "learning_rate": 7.844372556512395e-07, "loss": 0.0259, "step": 381980 }, { "epoch": 4.081307762166783, "grad_norm": 0.04705558344721794, "learning_rate": 7.84423438055211e-07, "loss": 0.0121, "step": 381990 }, { "epoch": 4.081414605481062, "grad_norm": 1.1515671014785767, "learning_rate": 7.84409620138047e-07, "loss": 0.0079, "step": 382000 }, { "epoch": 4.081521448795342, "grad_norm": 1.8948003053665161, "learning_rate": 7.843958018997632e-07, "loss": 0.0046, "step": 382010 }, { "epoch": 4.081628292109621, "grad_norm": 4.760286808013916, "learning_rate": 7.843819833403748e-07, "loss": 0.0109, "step": 382020 }, { "epoch": 4.081735135423901, "grad_norm": 1.0515466928482056, "learning_rate": 7.843681644598978e-07, "loss": 0.0011, "step": 382030 }, { "epoch": 4.08184197873818, "grad_norm": 0.00319391000084579, "learning_rate": 7.843543452583478e-07, "loss": 0.0179, "step": 382040 }, { "epoch": 4.0819488220524605, "grad_norm": 0.01741562783718109, "learning_rate": 7.8434052573574e-07, "loss": 0.0033, "step": 382050 }, { "epoch": 4.08205566536674, "grad_norm": 0.16442309319972992, "learning_rate": 7.843267058920906e-07, "loss": 0.0015, "step": 382060 }, { "epoch": 4.082162508681019, "grad_norm": 0.020850127562880516, "learning_rate": 7.843128857274147e-07, "loss": 0.0357, "step": 382070 }, { "epoch": 4.082269351995299, "grad_norm": 0.01593068428337574, "learning_rate": 7.842990652417283e-07, "loss": 0.0075, "step": 382080 }, { "epoch": 4.082376195309578, "grad_norm": 0.002532905898988247, "learning_rate": 7.842852444350466e-07, "loss": 0.0052, "step": 382090 }, { "epoch": 4.082483038623858, "grad_norm": 0.009800819680094719, "learning_rate": 7.842714233073854e-07, "loss": 0.0171, "step": 382100 }, { "epoch": 4.082589881938138, "grad_norm": 2.11551570892334, "learning_rate": 7.842576018587603e-07, "loss": 0.0024, "step": 382110 }, { "epoch": 4.082696725252418, "grad_norm": 0.7851659655570984, "learning_rate": 7.84243780089187e-07, "loss": 0.0023, "step": 382120 }, { "epoch": 4.082803568566697, "grad_norm": 2.406280040740967, "learning_rate": 7.84229957998681e-07, "loss": 0.002, "step": 382130 }, { "epoch": 4.0829104118809765, "grad_norm": 2.2784719467163086, "learning_rate": 7.842161355872579e-07, "loss": 0.0237, "step": 382140 }, { "epoch": 4.083017255195256, "grad_norm": 0.0050278836861252785, "learning_rate": 7.842023128549334e-07, "loss": 0.0048, "step": 382150 }, { "epoch": 4.083124098509535, "grad_norm": 0.0072937305085361, "learning_rate": 7.84188489801723e-07, "loss": 0.0005, "step": 382160 }, { "epoch": 4.083230941823816, "grad_norm": 2.6095783710479736, "learning_rate": 7.841746664276423e-07, "loss": 0.0356, "step": 382170 }, { "epoch": 4.083337785138095, "grad_norm": 1.0553371906280518, "learning_rate": 7.841608427327071e-07, "loss": 0.0036, "step": 382180 }, { "epoch": 4.083444628452375, "grad_norm": 1.0370299816131592, "learning_rate": 7.841470187169326e-07, "loss": 0.0071, "step": 382190 }, { "epoch": 4.083551471766654, "grad_norm": 2.832839250564575, "learning_rate": 7.84133194380335e-07, "loss": 0.0171, "step": 382200 }, { "epoch": 4.083658315080934, "grad_norm": 0.004879863932728767, "learning_rate": 7.841193697229294e-07, "loss": 0.0113, "step": 382210 }, { "epoch": 4.083765158395213, "grad_norm": 0.12527048587799072, "learning_rate": 7.841055447447315e-07, "loss": 0.0059, "step": 382220 }, { "epoch": 4.083872001709493, "grad_norm": 0.013819067738950253, "learning_rate": 7.840917194457571e-07, "loss": 0.0052, "step": 382230 }, { "epoch": 4.083978845023773, "grad_norm": 0.0038204446900635958, "learning_rate": 7.840778938260218e-07, "loss": 0.0013, "step": 382240 }, { "epoch": 4.084085688338052, "grad_norm": 1.3777565956115723, "learning_rate": 7.84064067885541e-07, "loss": 0.0076, "step": 382250 }, { "epoch": 4.084192531652332, "grad_norm": 0.3849755823612213, "learning_rate": 7.840502416243305e-07, "loss": 0.0069, "step": 382260 }, { "epoch": 4.084299374966611, "grad_norm": 0.045043397694826126, "learning_rate": 7.840364150424057e-07, "loss": 0.007, "step": 382270 }, { "epoch": 4.084406218280891, "grad_norm": 0.005673954263329506, "learning_rate": 7.840225881397824e-07, "loss": 0.0185, "step": 382280 }, { "epoch": 4.084513061595171, "grad_norm": 0.7078444361686707, "learning_rate": 7.840087609164762e-07, "loss": 0.003, "step": 382290 }, { "epoch": 4.0846199049094505, "grad_norm": 0.007896743714809418, "learning_rate": 7.839949333725026e-07, "loss": 0.0144, "step": 382300 }, { "epoch": 4.08472674822373, "grad_norm": 0.004859957844018936, "learning_rate": 7.839811055078773e-07, "loss": 0.0076, "step": 382310 }, { "epoch": 4.084833591538009, "grad_norm": 0.7975068092346191, "learning_rate": 7.83967277322616e-07, "loss": 0.0111, "step": 382320 }, { "epoch": 4.084940434852289, "grad_norm": 0.005514068529009819, "learning_rate": 7.839534488167342e-07, "loss": 0.0008, "step": 382330 }, { "epoch": 4.085047278166568, "grad_norm": 0.16077299416065216, "learning_rate": 7.839396199902475e-07, "loss": 0.0006, "step": 382340 }, { "epoch": 4.085154121480849, "grad_norm": 4.548897743225098, "learning_rate": 7.839257908431715e-07, "loss": 0.0077, "step": 382350 }, { "epoch": 4.085260964795128, "grad_norm": 0.11134056746959686, "learning_rate": 7.839119613755219e-07, "loss": 0.0058, "step": 382360 }, { "epoch": 4.085367808109408, "grad_norm": 0.07391317188739777, "learning_rate": 7.838981315873143e-07, "loss": 0.001, "step": 382370 }, { "epoch": 4.085474651423687, "grad_norm": 0.06500165909528732, "learning_rate": 7.838843014785643e-07, "loss": 0.0045, "step": 382380 }, { "epoch": 4.0855814947379665, "grad_norm": 5.868471622467041, "learning_rate": 7.838704710492874e-07, "loss": 0.0053, "step": 382390 }, { "epoch": 4.085688338052246, "grad_norm": 0.010496861301362514, "learning_rate": 7.838566402994993e-07, "loss": 0.0098, "step": 382400 }, { "epoch": 4.085795181366526, "grad_norm": 1.2433571815490723, "learning_rate": 7.838428092292158e-07, "loss": 0.0018, "step": 382410 }, { "epoch": 4.085902024680806, "grad_norm": 5.040939807891846, "learning_rate": 7.838289778384522e-07, "loss": 0.0083, "step": 382420 }, { "epoch": 4.086008867995085, "grad_norm": 0.035482145845890045, "learning_rate": 7.838151461272243e-07, "loss": 0.0232, "step": 382430 }, { "epoch": 4.086115711309365, "grad_norm": 5.35632848739624, "learning_rate": 7.838013140955475e-07, "loss": 0.0117, "step": 382440 }, { "epoch": 4.086222554623644, "grad_norm": 0.007923091761767864, "learning_rate": 7.837874817434378e-07, "loss": 0.0006, "step": 382450 }, { "epoch": 4.086329397937924, "grad_norm": 1.8073362112045288, "learning_rate": 7.837736490709106e-07, "loss": 0.0054, "step": 382460 }, { "epoch": 4.086436241252204, "grad_norm": 0.012424668297171593, "learning_rate": 7.837598160779815e-07, "loss": 0.0184, "step": 382470 }, { "epoch": 4.086543084566483, "grad_norm": 1.6083239316940308, "learning_rate": 7.837459827646662e-07, "loss": 0.0044, "step": 382480 }, { "epoch": 4.086649927880763, "grad_norm": 3.0090198516845703, "learning_rate": 7.837321491309801e-07, "loss": 0.0121, "step": 382490 }, { "epoch": 4.086756771195042, "grad_norm": 0.018403392285108566, "learning_rate": 7.837183151769391e-07, "loss": 0.0228, "step": 382500 }, { "epoch": 4.086863614509322, "grad_norm": 0.06993668526411057, "learning_rate": 7.837044809025588e-07, "loss": 0.005, "step": 382510 }, { "epoch": 4.086970457823602, "grad_norm": 6.024269104003906, "learning_rate": 7.836906463078547e-07, "loss": 0.0046, "step": 382520 }, { "epoch": 4.087077301137882, "grad_norm": 0.0023325716610997915, "learning_rate": 7.836768113928424e-07, "loss": 0.0024, "step": 382530 }, { "epoch": 4.087184144452161, "grad_norm": 0.06203192099928856, "learning_rate": 7.836629761575375e-07, "loss": 0.0057, "step": 382540 }, { "epoch": 4.0872909877664405, "grad_norm": 0.5204082131385803, "learning_rate": 7.836491406019559e-07, "loss": 0.019, "step": 382550 }, { "epoch": 4.08739783108072, "grad_norm": 11.545696258544922, "learning_rate": 7.836353047261128e-07, "loss": 0.0415, "step": 382560 }, { "epoch": 4.087504674394999, "grad_norm": 0.16497008502483368, "learning_rate": 7.83621468530024e-07, "loss": 0.0205, "step": 382570 }, { "epoch": 4.08761151770928, "grad_norm": 0.0038834388833492994, "learning_rate": 7.836076320137054e-07, "loss": 0.0199, "step": 382580 }, { "epoch": 4.087718361023559, "grad_norm": 0.0044179935939610004, "learning_rate": 7.835937951771722e-07, "loss": 0.0181, "step": 382590 }, { "epoch": 4.087825204337839, "grad_norm": 0.025028377771377563, "learning_rate": 7.835799580204403e-07, "loss": 0.0045, "step": 382600 }, { "epoch": 4.087932047652118, "grad_norm": 0.0060053858906030655, "learning_rate": 7.835661205435251e-07, "loss": 0.0321, "step": 382610 }, { "epoch": 4.088038890966398, "grad_norm": 4.482670783996582, "learning_rate": 7.835522827464425e-07, "loss": 0.0398, "step": 382620 }, { "epoch": 4.088145734280677, "grad_norm": 0.4171208143234253, "learning_rate": 7.835384446292078e-07, "loss": 0.0012, "step": 382630 }, { "epoch": 4.088252577594957, "grad_norm": 16.807228088378906, "learning_rate": 7.83524606191837e-07, "loss": 0.0082, "step": 382640 }, { "epoch": 4.088359420909237, "grad_norm": 4.875577449798584, "learning_rate": 7.835107674343453e-07, "loss": 0.0221, "step": 382650 }, { "epoch": 4.088466264223516, "grad_norm": 0.5853593349456787, "learning_rate": 7.834969283567488e-07, "loss": 0.0008, "step": 382660 }, { "epoch": 4.088573107537796, "grad_norm": 0.0037318281829357147, "learning_rate": 7.834830889590625e-07, "loss": 0.0089, "step": 382670 }, { "epoch": 4.088679950852075, "grad_norm": 3.126837968826294, "learning_rate": 7.834692492413028e-07, "loss": 0.0022, "step": 382680 }, { "epoch": 4.088786794166355, "grad_norm": 1.6509761810302734, "learning_rate": 7.834554092034848e-07, "loss": 0.002, "step": 382690 }, { "epoch": 4.088893637480635, "grad_norm": 0.016055496409535408, "learning_rate": 7.83441568845624e-07, "loss": 0.0048, "step": 382700 }, { "epoch": 4.0890004807949145, "grad_norm": 0.0015371405752375722, "learning_rate": 7.834277281677366e-07, "loss": 0.001, "step": 382710 }, { "epoch": 4.089107324109194, "grad_norm": 5.392501354217529, "learning_rate": 7.834138871698377e-07, "loss": 0.0032, "step": 382720 }, { "epoch": 4.089214167423473, "grad_norm": 2.8498151302337646, "learning_rate": 7.834000458519432e-07, "loss": 0.0132, "step": 382730 }, { "epoch": 4.089321010737753, "grad_norm": 4.517777919769287, "learning_rate": 7.833862042140686e-07, "loss": 0.0032, "step": 382740 }, { "epoch": 4.089427854052032, "grad_norm": 0.8180594444274902, "learning_rate": 7.833723622562298e-07, "loss": 0.0023, "step": 382750 }, { "epoch": 4.089534697366313, "grad_norm": 0.15199966728687286, "learning_rate": 7.83358519978442e-07, "loss": 0.0048, "step": 382760 }, { "epoch": 4.089641540680592, "grad_norm": 0.29864582419395447, "learning_rate": 7.83344677380721e-07, "loss": 0.0124, "step": 382770 }, { "epoch": 4.089748383994872, "grad_norm": 6.845086097717285, "learning_rate": 7.833308344630828e-07, "loss": 0.0168, "step": 382780 }, { "epoch": 4.089855227309151, "grad_norm": 1.1638433933258057, "learning_rate": 7.833169912255424e-07, "loss": 0.0133, "step": 382790 }, { "epoch": 4.0899620706234305, "grad_norm": 0.17372079193592072, "learning_rate": 7.833031476681159e-07, "loss": 0.0208, "step": 382800 }, { "epoch": 4.09006891393771, "grad_norm": 0.003130311146378517, "learning_rate": 7.832893037908187e-07, "loss": 0.003, "step": 382810 }, { "epoch": 4.09017575725199, "grad_norm": 2.1297168731689453, "learning_rate": 7.832754595936663e-07, "loss": 0.0069, "step": 382820 }, { "epoch": 4.09028260056627, "grad_norm": 0.7319245338439941, "learning_rate": 7.832616150766749e-07, "loss": 0.0063, "step": 382830 }, { "epoch": 4.090389443880549, "grad_norm": 0.03949768468737602, "learning_rate": 7.832477702398594e-07, "loss": 0.0029, "step": 382840 }, { "epoch": 4.090496287194829, "grad_norm": 0.025486279278993607, "learning_rate": 7.83233925083236e-07, "loss": 0.0176, "step": 382850 }, { "epoch": 4.090603130509108, "grad_norm": 19.444026947021484, "learning_rate": 7.832200796068202e-07, "loss": 0.0298, "step": 382860 }, { "epoch": 4.090709973823388, "grad_norm": 1.4449864625930786, "learning_rate": 7.832062338106273e-07, "loss": 0.0079, "step": 382870 }, { "epoch": 4.090816817137668, "grad_norm": 0.010965386405587196, "learning_rate": 7.831923876946734e-07, "loss": 0.0027, "step": 382880 }, { "epoch": 4.090923660451947, "grad_norm": 0.0016768536297604442, "learning_rate": 7.831785412589738e-07, "loss": 0.0093, "step": 382890 }, { "epoch": 4.091030503766227, "grad_norm": 0.14009137451648712, "learning_rate": 7.831646945035444e-07, "loss": 0.007, "step": 382900 }, { "epoch": 4.091137347080506, "grad_norm": 0.04574853554368019, "learning_rate": 7.831508474284005e-07, "loss": 0.0205, "step": 382910 }, { "epoch": 4.091244190394786, "grad_norm": 0.0016733105294406414, "learning_rate": 7.831370000335581e-07, "loss": 0.0222, "step": 382920 }, { "epoch": 4.091351033709065, "grad_norm": 3.2681772708892822, "learning_rate": 7.831231523190325e-07, "loss": 0.0016, "step": 382930 }, { "epoch": 4.091457877023346, "grad_norm": 0.9677978157997131, "learning_rate": 7.831093042848395e-07, "loss": 0.0198, "step": 382940 }, { "epoch": 4.091564720337625, "grad_norm": 3.475625991821289, "learning_rate": 7.83095455930995e-07, "loss": 0.0114, "step": 382950 }, { "epoch": 4.0916715636519045, "grad_norm": 0.001590144936926663, "learning_rate": 7.830816072575141e-07, "loss": 0.0086, "step": 382960 }, { "epoch": 4.091778406966184, "grad_norm": 3.2199013233184814, "learning_rate": 7.830677582644128e-07, "loss": 0.0043, "step": 382970 }, { "epoch": 4.0918852502804635, "grad_norm": 0.004023320972919464, "learning_rate": 7.830539089517066e-07, "loss": 0.0047, "step": 382980 }, { "epoch": 4.091992093594743, "grad_norm": 0.0007186440634541214, "learning_rate": 7.83040059319411e-07, "loss": 0.0096, "step": 382990 }, { "epoch": 4.092098936909023, "grad_norm": 0.08063437044620514, "learning_rate": 7.830262093675422e-07, "loss": 0.0361, "step": 383000 }, { "epoch": 4.092205780223303, "grad_norm": 0.005116279702633619, "learning_rate": 7.830123590961151e-07, "loss": 0.0002, "step": 383010 }, { "epoch": 4.092312623537582, "grad_norm": 0.006303291767835617, "learning_rate": 7.829985085051458e-07, "loss": 0.0094, "step": 383020 }, { "epoch": 4.092419466851862, "grad_norm": 1.6800121068954468, "learning_rate": 7.829846575946499e-07, "loss": 0.0025, "step": 383030 }, { "epoch": 4.092526310166141, "grad_norm": 9.102958679199219, "learning_rate": 7.829708063646427e-07, "loss": 0.0198, "step": 383040 }, { "epoch": 4.092633153480421, "grad_norm": 0.00840391032397747, "learning_rate": 7.829569548151404e-07, "loss": 0.0006, "step": 383050 }, { "epoch": 4.092739996794701, "grad_norm": 0.0034215161576867104, "learning_rate": 7.829431029461583e-07, "loss": 0.0153, "step": 383060 }, { "epoch": 4.09284684010898, "grad_norm": 0.4435754120349884, "learning_rate": 7.82929250757712e-07, "loss": 0.0011, "step": 383070 }, { "epoch": 4.09295368342326, "grad_norm": 2.117626428604126, "learning_rate": 7.829153982498172e-07, "loss": 0.0039, "step": 383080 }, { "epoch": 4.093060526737539, "grad_norm": 0.0021441378630697727, "learning_rate": 7.829015454224896e-07, "loss": 0.001, "step": 383090 }, { "epoch": 4.093167370051819, "grad_norm": 3.3594043254852295, "learning_rate": 7.828876922757448e-07, "loss": 0.0037, "step": 383100 }, { "epoch": 4.093274213366098, "grad_norm": 0.07206928730010986, "learning_rate": 7.828738388095986e-07, "loss": 0.0053, "step": 383110 }, { "epoch": 4.0933810566803785, "grad_norm": 4.260519027709961, "learning_rate": 7.828599850240663e-07, "loss": 0.0034, "step": 383120 }, { "epoch": 4.093487899994658, "grad_norm": 1.6804749965667725, "learning_rate": 7.828461309191637e-07, "loss": 0.0246, "step": 383130 }, { "epoch": 4.0935947433089375, "grad_norm": 0.9898325800895691, "learning_rate": 7.828322764949067e-07, "loss": 0.0086, "step": 383140 }, { "epoch": 4.093701586623217, "grad_norm": 0.014632368460297585, "learning_rate": 7.828184217513106e-07, "loss": 0.0318, "step": 383150 }, { "epoch": 4.093808429937496, "grad_norm": 0.02184862457215786, "learning_rate": 7.82804566688391e-07, "loss": 0.0092, "step": 383160 }, { "epoch": 4.093915273251776, "grad_norm": 3.9672324657440186, "learning_rate": 7.827907113061638e-07, "loss": 0.0093, "step": 383170 }, { "epoch": 4.094022116566056, "grad_norm": 0.06452016532421112, "learning_rate": 7.827768556046446e-07, "loss": 0.0102, "step": 383180 }, { "epoch": 4.094128959880336, "grad_norm": 0.21368424594402313, "learning_rate": 7.82762999583849e-07, "loss": 0.0006, "step": 383190 }, { "epoch": 4.094235803194615, "grad_norm": 0.015405161306262016, "learning_rate": 7.827491432437925e-07, "loss": 0.0104, "step": 383200 }, { "epoch": 4.094342646508895, "grad_norm": 0.004488322418183088, "learning_rate": 7.82735286584491e-07, "loss": 0.002, "step": 383210 }, { "epoch": 4.094449489823174, "grad_norm": 0.009673898108303547, "learning_rate": 7.8272142960596e-07, "loss": 0.0033, "step": 383220 }, { "epoch": 4.094556333137454, "grad_norm": 0.001439190236851573, "learning_rate": 7.82707572308215e-07, "loss": 0.0019, "step": 383230 }, { "epoch": 4.094663176451734, "grad_norm": 7.287801265716553, "learning_rate": 7.82693714691272e-07, "loss": 0.0135, "step": 383240 }, { "epoch": 4.094770019766013, "grad_norm": 0.027291428297758102, "learning_rate": 7.826798567551465e-07, "loss": 0.0313, "step": 383250 }, { "epoch": 4.094876863080293, "grad_norm": 0.002817837055772543, "learning_rate": 7.82665998499854e-07, "loss": 0.0024, "step": 383260 }, { "epoch": 4.094983706394572, "grad_norm": 0.29084667563438416, "learning_rate": 7.826521399254102e-07, "loss": 0.0055, "step": 383270 }, { "epoch": 4.095090549708852, "grad_norm": 0.038698725402355194, "learning_rate": 7.826382810318311e-07, "loss": 0.0305, "step": 383280 }, { "epoch": 4.095197393023131, "grad_norm": 0.01646537519991398, "learning_rate": 7.826244218191318e-07, "loss": 0.0382, "step": 383290 }, { "epoch": 4.0953042363374115, "grad_norm": 0.008150813169777393, "learning_rate": 7.826105622873281e-07, "loss": 0.0122, "step": 383300 }, { "epoch": 4.095411079651691, "grad_norm": 0.039520446211099625, "learning_rate": 7.82596702436436e-07, "loss": 0.0041, "step": 383310 }, { "epoch": 4.09551792296597, "grad_norm": 0.02463289350271225, "learning_rate": 7.825828422664708e-07, "loss": 0.0144, "step": 383320 }, { "epoch": 4.09562476628025, "grad_norm": 0.08073738217353821, "learning_rate": 7.825689817774481e-07, "loss": 0.0182, "step": 383330 }, { "epoch": 4.095731609594529, "grad_norm": 0.0003946717770304531, "learning_rate": 7.82555120969384e-07, "loss": 0.0056, "step": 383340 }, { "epoch": 4.09583845290881, "grad_norm": 0.8835169076919556, "learning_rate": 7.825412598422935e-07, "loss": 0.0042, "step": 383350 }, { "epoch": 4.095945296223089, "grad_norm": 0.0037004754412919283, "learning_rate": 7.825273983961928e-07, "loss": 0.0131, "step": 383360 }, { "epoch": 4.096052139537369, "grad_norm": 1.0909945964813232, "learning_rate": 7.825135366310972e-07, "loss": 0.0058, "step": 383370 }, { "epoch": 4.096158982851648, "grad_norm": 1.7187553644180298, "learning_rate": 7.824996745470227e-07, "loss": 0.0047, "step": 383380 }, { "epoch": 4.0962658261659275, "grad_norm": 0.008785001002252102, "learning_rate": 7.824858121439846e-07, "loss": 0.0012, "step": 383390 }, { "epoch": 4.096372669480207, "grad_norm": 0.0034871145617216825, "learning_rate": 7.824719494219987e-07, "loss": 0.0097, "step": 383400 }, { "epoch": 4.096479512794487, "grad_norm": 0.004961478989571333, "learning_rate": 7.824580863810807e-07, "loss": 0.0017, "step": 383410 }, { "epoch": 4.096586356108767, "grad_norm": 0.017483020201325417, "learning_rate": 7.82444223021246e-07, "loss": 0.0384, "step": 383420 }, { "epoch": 4.096693199423046, "grad_norm": 0.9482202529907227, "learning_rate": 7.824303593425108e-07, "loss": 0.0343, "step": 383430 }, { "epoch": 4.096800042737326, "grad_norm": 0.002776111476123333, "learning_rate": 7.824164953448901e-07, "loss": 0.0053, "step": 383440 }, { "epoch": 4.096906886051605, "grad_norm": 0.005963403731584549, "learning_rate": 7.824026310284001e-07, "loss": 0.0012, "step": 383450 }, { "epoch": 4.097013729365885, "grad_norm": 0.0005854249466210604, "learning_rate": 7.823887663930561e-07, "loss": 0.0018, "step": 383460 }, { "epoch": 4.097120572680165, "grad_norm": 0.15983209013938904, "learning_rate": 7.823749014388738e-07, "loss": 0.0052, "step": 383470 }, { "epoch": 4.097227415994444, "grad_norm": 0.02648487500846386, "learning_rate": 7.823610361658691e-07, "loss": 0.0094, "step": 383480 }, { "epoch": 4.097334259308724, "grad_norm": 0.0010722879087552428, "learning_rate": 7.823471705740574e-07, "loss": 0.0098, "step": 383490 }, { "epoch": 4.097441102623003, "grad_norm": 0.043506279587745667, "learning_rate": 7.823333046634543e-07, "loss": 0.0015, "step": 383500 }, { "epoch": 4.097547945937283, "grad_norm": 1.1326417922973633, "learning_rate": 7.823194384340759e-07, "loss": 0.001, "step": 383510 }, { "epoch": 4.097654789251562, "grad_norm": 0.057481441646814346, "learning_rate": 7.823055718859374e-07, "loss": 0.0029, "step": 383520 }, { "epoch": 4.097761632565843, "grad_norm": 0.2190038412809372, "learning_rate": 7.822917050190545e-07, "loss": 0.004, "step": 383530 }, { "epoch": 4.097868475880122, "grad_norm": 0.3875260353088379, "learning_rate": 7.82277837833443e-07, "loss": 0.0191, "step": 383540 }, { "epoch": 4.0979753191944015, "grad_norm": 0.00355202192440629, "learning_rate": 7.822639703291186e-07, "loss": 0.0048, "step": 383550 }, { "epoch": 4.098082162508681, "grad_norm": 4.830140590667725, "learning_rate": 7.822501025060968e-07, "loss": 0.0082, "step": 383560 }, { "epoch": 4.09818900582296, "grad_norm": 4.559521675109863, "learning_rate": 7.822362343643933e-07, "loss": 0.0111, "step": 383570 }, { "epoch": 4.09829584913724, "grad_norm": 0.09204649180173874, "learning_rate": 7.822223659040239e-07, "loss": 0.0068, "step": 383580 }, { "epoch": 4.09840269245152, "grad_norm": 1.0509827136993408, "learning_rate": 7.822084971250041e-07, "loss": 0.0048, "step": 383590 }, { "epoch": 4.0985095357658, "grad_norm": 0.014577340334653854, "learning_rate": 7.821946280273496e-07, "loss": 0.0008, "step": 383600 }, { "epoch": 4.098616379080079, "grad_norm": 4.9042649269104, "learning_rate": 7.82180758611076e-07, "loss": 0.0135, "step": 383610 }, { "epoch": 4.098723222394359, "grad_norm": 0.20484378933906555, "learning_rate": 7.821668888761992e-07, "loss": 0.0014, "step": 383620 }, { "epoch": 4.098830065708638, "grad_norm": 0.6075062155723572, "learning_rate": 7.821530188227346e-07, "loss": 0.003, "step": 383630 }, { "epoch": 4.0989369090229175, "grad_norm": 0.029034346342086792, "learning_rate": 7.821391484506979e-07, "loss": 0.0143, "step": 383640 }, { "epoch": 4.099043752337198, "grad_norm": 0.0018077612621709704, "learning_rate": 7.821252777601048e-07, "loss": 0.005, "step": 383650 }, { "epoch": 4.099150595651477, "grad_norm": 0.5323488712310791, "learning_rate": 7.821114067509711e-07, "loss": 0.0033, "step": 383660 }, { "epoch": 4.099257438965757, "grad_norm": 0.03360952064394951, "learning_rate": 7.820975354233122e-07, "loss": 0.0022, "step": 383670 }, { "epoch": 4.099364282280036, "grad_norm": 3.794733762741089, "learning_rate": 7.820836637771439e-07, "loss": 0.0135, "step": 383680 }, { "epoch": 4.099471125594316, "grad_norm": 0.0025382963940501213, "learning_rate": 7.820697918124818e-07, "loss": 0.0027, "step": 383690 }, { "epoch": 4.099577968908595, "grad_norm": 4.823031902313232, "learning_rate": 7.820559195293417e-07, "loss": 0.0447, "step": 383700 }, { "epoch": 4.0996848122228755, "grad_norm": 1.7902978658676147, "learning_rate": 7.820420469277391e-07, "loss": 0.016, "step": 383710 }, { "epoch": 4.099791655537155, "grad_norm": 0.0016478645848110318, "learning_rate": 7.820281740076899e-07, "loss": 0.0071, "step": 383720 }, { "epoch": 4.099898498851434, "grad_norm": 5.3194074630737305, "learning_rate": 7.820143007692094e-07, "loss": 0.0107, "step": 383730 }, { "epoch": 4.100005342165714, "grad_norm": 5.874472618103027, "learning_rate": 7.820004272123137e-07, "loss": 0.0063, "step": 383740 }, { "epoch": 4.100112185479993, "grad_norm": 2.9653608798980713, "learning_rate": 7.819865533370181e-07, "loss": 0.0053, "step": 383750 }, { "epoch": 4.100219028794273, "grad_norm": 3.5713918209075928, "learning_rate": 7.819726791433384e-07, "loss": 0.0128, "step": 383760 }, { "epoch": 4.100325872108553, "grad_norm": 0.008660294115543365, "learning_rate": 7.819588046312904e-07, "loss": 0.0156, "step": 383770 }, { "epoch": 4.100432715422833, "grad_norm": 0.00430062273517251, "learning_rate": 7.819449298008895e-07, "loss": 0.0102, "step": 383780 }, { "epoch": 4.100539558737112, "grad_norm": 0.00992637313902378, "learning_rate": 7.819310546521514e-07, "loss": 0.0149, "step": 383790 }, { "epoch": 4.1006464020513915, "grad_norm": 0.0009571886039339006, "learning_rate": 7.81917179185092e-07, "loss": 0.0076, "step": 383800 }, { "epoch": 4.100753245365671, "grad_norm": 0.014308253303170204, "learning_rate": 7.819033033997269e-07, "loss": 0.0073, "step": 383810 }, { "epoch": 4.10086008867995, "grad_norm": 4.642439842224121, "learning_rate": 7.818894272960714e-07, "loss": 0.0125, "step": 383820 }, { "epoch": 4.100966931994231, "grad_norm": 3.117769956588745, "learning_rate": 7.818755508741417e-07, "loss": 0.0058, "step": 383830 }, { "epoch": 4.10107377530851, "grad_norm": 0.24807418882846832, "learning_rate": 7.818616741339533e-07, "loss": 0.0002, "step": 383840 }, { "epoch": 4.10118061862279, "grad_norm": 0.016376974061131477, "learning_rate": 7.818477970755216e-07, "loss": 0.0013, "step": 383850 }, { "epoch": 4.101287461937069, "grad_norm": 0.009232855401933193, "learning_rate": 7.818339196988626e-07, "loss": 0.0192, "step": 383860 }, { "epoch": 4.101394305251349, "grad_norm": 0.002478186972439289, "learning_rate": 7.818200420039918e-07, "loss": 0.0045, "step": 383870 }, { "epoch": 4.101501148565628, "grad_norm": 0.001058797468431294, "learning_rate": 7.818061639909248e-07, "loss": 0.0092, "step": 383880 }, { "epoch": 4.101607991879908, "grad_norm": 0.0203099362552166, "learning_rate": 7.817922856596775e-07, "loss": 0.0041, "step": 383890 }, { "epoch": 4.101714835194188, "grad_norm": 0.9943646192550659, "learning_rate": 7.817784070102653e-07, "loss": 0.0101, "step": 383900 }, { "epoch": 4.101821678508467, "grad_norm": 0.03911513090133667, "learning_rate": 7.817645280427041e-07, "loss": 0.0013, "step": 383910 }, { "epoch": 4.101928521822747, "grad_norm": 0.0019430340034887195, "learning_rate": 7.817506487570095e-07, "loss": 0.0022, "step": 383920 }, { "epoch": 4.102035365137026, "grad_norm": 0.0063538807444274426, "learning_rate": 7.817367691531971e-07, "loss": 0.0136, "step": 383930 }, { "epoch": 4.102142208451306, "grad_norm": 0.0033168864902108908, "learning_rate": 7.817228892312829e-07, "loss": 0.0231, "step": 383940 }, { "epoch": 4.102249051765586, "grad_norm": 0.009723827242851257, "learning_rate": 7.817090089912818e-07, "loss": 0.0055, "step": 383950 }, { "epoch": 4.1023558950798655, "grad_norm": 0.005030105821788311, "learning_rate": 7.816951284332103e-07, "loss": 0.0012, "step": 383960 }, { "epoch": 4.102462738394145, "grad_norm": 0.08245593309402466, "learning_rate": 7.816812475570837e-07, "loss": 0.0055, "step": 383970 }, { "epoch": 4.1025695817084245, "grad_norm": 0.002532404847443104, "learning_rate": 7.816673663629177e-07, "loss": 0.0067, "step": 383980 }, { "epoch": 4.102676425022704, "grad_norm": 0.032947275787591934, "learning_rate": 7.816534848507279e-07, "loss": 0.0057, "step": 383990 }, { "epoch": 4.102783268336983, "grad_norm": 0.008290490135550499, "learning_rate": 7.816396030205301e-07, "loss": 0.0024, "step": 384000 }, { "epoch": 4.102890111651264, "grad_norm": 0.06593739986419678, "learning_rate": 7.8162572087234e-07, "loss": 0.0075, "step": 384010 }, { "epoch": 4.102996954965543, "grad_norm": 0.05004118010401726, "learning_rate": 7.816118384061732e-07, "loss": 0.0128, "step": 384020 }, { "epoch": 4.103103798279823, "grad_norm": 0.24702104926109314, "learning_rate": 7.815979556220454e-07, "loss": 0.0138, "step": 384030 }, { "epoch": 4.103210641594102, "grad_norm": 0.020081033930182457, "learning_rate": 7.81584072519972e-07, "loss": 0.0036, "step": 384040 }, { "epoch": 4.1033174849083816, "grad_norm": 0.07603911310434341, "learning_rate": 7.815701890999693e-07, "loss": 0.02, "step": 384050 }, { "epoch": 4.103424328222662, "grad_norm": 0.38905400037765503, "learning_rate": 7.815563053620524e-07, "loss": 0.0314, "step": 384060 }, { "epoch": 4.103531171536941, "grad_norm": 0.0004724573518615216, "learning_rate": 7.815424213062371e-07, "loss": 0.0229, "step": 384070 }, { "epoch": 4.103638014851221, "grad_norm": 0.07533121109008789, "learning_rate": 7.815285369325392e-07, "loss": 0.0005, "step": 384080 }, { "epoch": 4.1037448581655, "grad_norm": 3.174246072769165, "learning_rate": 7.815146522409746e-07, "loss": 0.0306, "step": 384090 }, { "epoch": 4.10385170147978, "grad_norm": 6.533763885498047, "learning_rate": 7.815007672315585e-07, "loss": 0.0306, "step": 384100 }, { "epoch": 4.103958544794059, "grad_norm": 0.013985132798552513, "learning_rate": 7.814868819043068e-07, "loss": 0.0033, "step": 384110 }, { "epoch": 4.1040653881083395, "grad_norm": 0.18181155622005463, "learning_rate": 7.814729962592352e-07, "loss": 0.0121, "step": 384120 }, { "epoch": 4.104172231422619, "grad_norm": 0.0033716573379933834, "learning_rate": 7.814591102963592e-07, "loss": 0.0013, "step": 384130 }, { "epoch": 4.1042790747368985, "grad_norm": 0.0021945752669125795, "learning_rate": 7.814452240156948e-07, "loss": 0.001, "step": 384140 }, { "epoch": 4.104385918051178, "grad_norm": 1.812856674194336, "learning_rate": 7.814313374172574e-07, "loss": 0.0045, "step": 384150 }, { "epoch": 4.104492761365457, "grad_norm": 2.2118067741394043, "learning_rate": 7.814174505010629e-07, "loss": 0.0129, "step": 384160 }, { "epoch": 4.104599604679737, "grad_norm": 0.20882077515125275, "learning_rate": 7.814035632671267e-07, "loss": 0.014, "step": 384170 }, { "epoch": 4.104706447994017, "grad_norm": 0.07495875656604767, "learning_rate": 7.813896757154648e-07, "loss": 0.0511, "step": 384180 }, { "epoch": 4.104813291308297, "grad_norm": 0.03891576826572418, "learning_rate": 7.813757878460925e-07, "loss": 0.0002, "step": 384190 }, { "epoch": 4.104920134622576, "grad_norm": 0.005156619008630514, "learning_rate": 7.813618996590258e-07, "loss": 0.0065, "step": 384200 }, { "epoch": 4.105026977936856, "grad_norm": 0.0059470245614647865, "learning_rate": 7.813480111542803e-07, "loss": 0.0129, "step": 384210 }, { "epoch": 4.105133821251135, "grad_norm": 0.014325524680316448, "learning_rate": 7.813341223318717e-07, "loss": 0.0135, "step": 384220 }, { "epoch": 4.1052406645654145, "grad_norm": 0.00133488979190588, "learning_rate": 7.813202331918156e-07, "loss": 0.0111, "step": 384230 }, { "epoch": 4.105347507879695, "grad_norm": 15.64499568939209, "learning_rate": 7.813063437341279e-07, "loss": 0.0505, "step": 384240 }, { "epoch": 4.105454351193974, "grad_norm": 1.2350506782531738, "learning_rate": 7.812924539588239e-07, "loss": 0.0071, "step": 384250 }, { "epoch": 4.105561194508254, "grad_norm": 0.9206893444061279, "learning_rate": 7.812785638659196e-07, "loss": 0.0107, "step": 384260 }, { "epoch": 4.105668037822533, "grad_norm": 1.3064626455307007, "learning_rate": 7.812646734554304e-07, "loss": 0.0301, "step": 384270 }, { "epoch": 4.105774881136813, "grad_norm": 0.0008967812173068523, "learning_rate": 7.812507827273722e-07, "loss": 0.0014, "step": 384280 }, { "epoch": 4.105881724451092, "grad_norm": 0.3218732476234436, "learning_rate": 7.812368916817607e-07, "loss": 0.0048, "step": 384290 }, { "epoch": 4.1059885677653725, "grad_norm": 0.0016307781916111708, "learning_rate": 7.812230003186116e-07, "loss": 0.0161, "step": 384300 }, { "epoch": 4.106095411079652, "grad_norm": 0.20395709574222565, "learning_rate": 7.812091086379405e-07, "loss": 0.0161, "step": 384310 }, { "epoch": 4.106202254393931, "grad_norm": 0.0016949933487921953, "learning_rate": 7.81195216639763e-07, "loss": 0.009, "step": 384320 }, { "epoch": 4.106309097708211, "grad_norm": 7.205140113830566, "learning_rate": 7.811813243240948e-07, "loss": 0.0176, "step": 384330 }, { "epoch": 4.10641594102249, "grad_norm": 0.008605566807091236, "learning_rate": 7.811674316909518e-07, "loss": 0.0003, "step": 384340 }, { "epoch": 4.10652278433677, "grad_norm": 3.865875244140625, "learning_rate": 7.811535387403495e-07, "loss": 0.0036, "step": 384350 }, { "epoch": 4.10662962765105, "grad_norm": 0.00788232870399952, "learning_rate": 7.811396454723036e-07, "loss": 0.005, "step": 384360 }, { "epoch": 4.10673647096533, "grad_norm": 0.04901326820254326, "learning_rate": 7.811257518868299e-07, "loss": 0.0018, "step": 384370 }, { "epoch": 4.106843314279609, "grad_norm": 1.5320106744766235, "learning_rate": 7.81111857983944e-07, "loss": 0.0066, "step": 384380 }, { "epoch": 4.1069501575938885, "grad_norm": 0.0728643462061882, "learning_rate": 7.810979637636616e-07, "loss": 0.0013, "step": 384390 }, { "epoch": 4.107057000908168, "grad_norm": 1.0457004308700562, "learning_rate": 7.810840692259984e-07, "loss": 0.015, "step": 384400 }, { "epoch": 4.107163844222447, "grad_norm": 0.06798696517944336, "learning_rate": 7.8107017437097e-07, "loss": 0.0395, "step": 384410 }, { "epoch": 4.107270687536728, "grad_norm": 3.530507802963257, "learning_rate": 7.810562791985922e-07, "loss": 0.0352, "step": 384420 }, { "epoch": 4.107377530851007, "grad_norm": 0.03178638219833374, "learning_rate": 7.810423837088806e-07, "loss": 0.0007, "step": 384430 }, { "epoch": 4.107484374165287, "grad_norm": 0.0008145184256136417, "learning_rate": 7.810284879018509e-07, "loss": 0.028, "step": 384440 }, { "epoch": 4.107591217479566, "grad_norm": 0.02643764019012451, "learning_rate": 7.810145917775191e-07, "loss": 0.0066, "step": 384450 }, { "epoch": 4.107698060793846, "grad_norm": 0.013769593089818954, "learning_rate": 7.810006953359003e-07, "loss": 0.009, "step": 384460 }, { "epoch": 4.107804904108125, "grad_norm": 0.00195900141261518, "learning_rate": 7.809867985770107e-07, "loss": 0.0012, "step": 384470 }, { "epoch": 4.107911747422405, "grad_norm": 0.037085533142089844, "learning_rate": 7.809729015008656e-07, "loss": 0.0235, "step": 384480 }, { "epoch": 4.108018590736685, "grad_norm": 0.0033057034015655518, "learning_rate": 7.80959004107481e-07, "loss": 0.02, "step": 384490 }, { "epoch": 4.108125434050964, "grad_norm": 10.417819023132324, "learning_rate": 7.809451063968725e-07, "loss": 0.0148, "step": 384500 }, { "epoch": 4.108232277365244, "grad_norm": 0.3127910792827606, "learning_rate": 7.809312083690558e-07, "loss": 0.0144, "step": 384510 }, { "epoch": 4.108339120679523, "grad_norm": 4.985213279724121, "learning_rate": 7.809173100240464e-07, "loss": 0.0101, "step": 384520 }, { "epoch": 4.108445963993803, "grad_norm": 0.03318864852190018, "learning_rate": 7.809034113618604e-07, "loss": 0.0046, "step": 384530 }, { "epoch": 4.108552807308083, "grad_norm": 0.008113544434309006, "learning_rate": 7.808895123825132e-07, "loss": 0.0041, "step": 384540 }, { "epoch": 4.1086596506223625, "grad_norm": 0.25756731629371643, "learning_rate": 7.808756130860205e-07, "loss": 0.0039, "step": 384550 }, { "epoch": 4.108766493936642, "grad_norm": 0.037852030247449875, "learning_rate": 7.808617134723978e-07, "loss": 0.0006, "step": 384560 }, { "epoch": 4.108873337250921, "grad_norm": 0.3074757158756256, "learning_rate": 7.808478135416614e-07, "loss": 0.01, "step": 384570 }, { "epoch": 4.108980180565201, "grad_norm": 0.6324251294136047, "learning_rate": 7.808339132938264e-07, "loss": 0.0054, "step": 384580 }, { "epoch": 4.10908702387948, "grad_norm": 9.341371536254883, "learning_rate": 7.808200127289088e-07, "loss": 0.0163, "step": 384590 }, { "epoch": 4.109193867193761, "grad_norm": 0.8815248012542725, "learning_rate": 7.808061118469243e-07, "loss": 0.0012, "step": 384600 }, { "epoch": 4.10930071050804, "grad_norm": 0.019413093104958534, "learning_rate": 7.807922106478881e-07, "loss": 0.0793, "step": 384610 }, { "epoch": 4.10940755382232, "grad_norm": 1.492292881011963, "learning_rate": 7.807783091318167e-07, "loss": 0.0088, "step": 384620 }, { "epoch": 4.109514397136599, "grad_norm": 3.2255334854125977, "learning_rate": 7.807644072987253e-07, "loss": 0.0179, "step": 384630 }, { "epoch": 4.1096212404508785, "grad_norm": 1.3821715116500854, "learning_rate": 7.807505051486296e-07, "loss": 0.0006, "step": 384640 }, { "epoch": 4.109728083765158, "grad_norm": 4.778295040130615, "learning_rate": 7.807366026815455e-07, "loss": 0.0268, "step": 384650 }, { "epoch": 4.109834927079438, "grad_norm": 0.0031792158260941505, "learning_rate": 7.807226998974887e-07, "loss": 0.0124, "step": 384660 }, { "epoch": 4.109941770393718, "grad_norm": 0.01840190403163433, "learning_rate": 7.807087967964745e-07, "loss": 0.0159, "step": 384670 }, { "epoch": 4.110048613707997, "grad_norm": 0.11877649277448654, "learning_rate": 7.806948933785191e-07, "loss": 0.0119, "step": 384680 }, { "epoch": 4.110155457022277, "grad_norm": 0.6378280520439148, "learning_rate": 7.80680989643638e-07, "loss": 0.0037, "step": 384690 }, { "epoch": 4.110262300336556, "grad_norm": 0.10896947979927063, "learning_rate": 7.806670855918467e-07, "loss": 0.0045, "step": 384700 }, { "epoch": 4.110369143650836, "grad_norm": 0.03798128291964531, "learning_rate": 7.806531812231612e-07, "loss": 0.0063, "step": 384710 }, { "epoch": 4.110475986965116, "grad_norm": 0.0016849058447405696, "learning_rate": 7.806392765375969e-07, "loss": 0.0029, "step": 384720 }, { "epoch": 4.110582830279395, "grad_norm": 0.0026023867540061474, "learning_rate": 7.806253715351698e-07, "loss": 0.0132, "step": 384730 }, { "epoch": 4.110689673593675, "grad_norm": 0.3791104257106781, "learning_rate": 7.806114662158955e-07, "loss": 0.0117, "step": 384740 }, { "epoch": 4.110796516907954, "grad_norm": 1.7544002532958984, "learning_rate": 7.805975605797897e-07, "loss": 0.007, "step": 384750 }, { "epoch": 4.110903360222234, "grad_norm": 15.548401832580566, "learning_rate": 7.80583654626868e-07, "loss": 0.012, "step": 384760 }, { "epoch": 4.111010203536514, "grad_norm": 0.0030041029676795006, "learning_rate": 7.805697483571463e-07, "loss": 0.0009, "step": 384770 }, { "epoch": 4.111117046850794, "grad_norm": 0.10155997425317764, "learning_rate": 7.805558417706402e-07, "loss": 0.0042, "step": 384780 }, { "epoch": 4.111223890165073, "grad_norm": 0.20657065510749817, "learning_rate": 7.805419348673652e-07, "loss": 0.0022, "step": 384790 }, { "epoch": 4.1113307334793525, "grad_norm": 0.0010260320268571377, "learning_rate": 7.805280276473372e-07, "loss": 0.0112, "step": 384800 }, { "epoch": 4.111437576793632, "grad_norm": 0.00626370869576931, "learning_rate": 7.80514120110572e-07, "loss": 0.0175, "step": 384810 }, { "epoch": 4.111544420107911, "grad_norm": 0.0180235356092453, "learning_rate": 7.805002122570851e-07, "loss": 0.0008, "step": 384820 }, { "epoch": 4.111651263422192, "grad_norm": 2.480435609817505, "learning_rate": 7.804863040868923e-07, "loss": 0.0144, "step": 384830 }, { "epoch": 4.111758106736471, "grad_norm": 10.721665382385254, "learning_rate": 7.804723956000093e-07, "loss": 0.0164, "step": 384840 }, { "epoch": 4.111864950050751, "grad_norm": 0.05387313291430473, "learning_rate": 7.804584867964519e-07, "loss": 0.0198, "step": 384850 }, { "epoch": 4.11197179336503, "grad_norm": 0.1503642052412033, "learning_rate": 7.804445776762356e-07, "loss": 0.0051, "step": 384860 }, { "epoch": 4.11207863667931, "grad_norm": 6.220462799072266, "learning_rate": 7.804306682393762e-07, "loss": 0.0171, "step": 384870 }, { "epoch": 4.112185479993589, "grad_norm": 0.004957862198352814, "learning_rate": 7.804167584858894e-07, "loss": 0.001, "step": 384880 }, { "epoch": 4.112292323307869, "grad_norm": 0.00962867308408022, "learning_rate": 7.804028484157909e-07, "loss": 0.0022, "step": 384890 }, { "epoch": 4.112399166622149, "grad_norm": 0.10230732709169388, "learning_rate": 7.803889380290964e-07, "loss": 0.0031, "step": 384900 }, { "epoch": 4.112506009936428, "grad_norm": 0.06771323084831238, "learning_rate": 7.803750273258218e-07, "loss": 0.0166, "step": 384910 }, { "epoch": 4.112612853250708, "grad_norm": 0.06995119899511337, "learning_rate": 7.803611163059827e-07, "loss": 0.0169, "step": 384920 }, { "epoch": 4.112719696564987, "grad_norm": 0.0052313897758722305, "learning_rate": 7.803472049695944e-07, "loss": 0.0094, "step": 384930 }, { "epoch": 4.112826539879267, "grad_norm": 0.0022046815138310194, "learning_rate": 7.80333293316673e-07, "loss": 0.0021, "step": 384940 }, { "epoch": 4.112933383193547, "grad_norm": 0.008396599441766739, "learning_rate": 7.803193813472345e-07, "loss": 0.0176, "step": 384950 }, { "epoch": 4.1130402265078265, "grad_norm": 2.9988951683044434, "learning_rate": 7.803054690612941e-07, "loss": 0.008, "step": 384960 }, { "epoch": 4.113147069822106, "grad_norm": 0.20057028532028198, "learning_rate": 7.802915564588674e-07, "loss": 0.0012, "step": 384970 }, { "epoch": 4.113253913136385, "grad_norm": 0.2145523577928543, "learning_rate": 7.802776435399708e-07, "loss": 0.0058, "step": 384980 }, { "epoch": 4.113360756450665, "grad_norm": 0.0062043494544923306, "learning_rate": 7.802637303046191e-07, "loss": 0.0083, "step": 384990 }, { "epoch": 4.113467599764944, "grad_norm": 0.4786287844181061, "learning_rate": 7.802498167528288e-07, "loss": 0.0006, "step": 385000 }, { "epoch": 4.113574443079225, "grad_norm": 0.05810309201478958, "learning_rate": 7.802359028846152e-07, "loss": 0.0026, "step": 385010 }, { "epoch": 4.113681286393504, "grad_norm": 0.010718708857893944, "learning_rate": 7.802219886999944e-07, "loss": 0.001, "step": 385020 }, { "epoch": 4.113788129707784, "grad_norm": 1.70552396774292, "learning_rate": 7.802080741989815e-07, "loss": 0.0093, "step": 385030 }, { "epoch": 4.113894973022063, "grad_norm": 0.2641596496105194, "learning_rate": 7.801941593815926e-07, "loss": 0.0068, "step": 385040 }, { "epoch": 4.1140018163363425, "grad_norm": 1.1306172609329224, "learning_rate": 7.801802442478433e-07, "loss": 0.0225, "step": 385050 }, { "epoch": 4.114108659650622, "grad_norm": 0.01644964888691902, "learning_rate": 7.801663287977496e-07, "loss": 0.0042, "step": 385060 }, { "epoch": 4.114215502964902, "grad_norm": 5.943456172943115, "learning_rate": 7.801524130313268e-07, "loss": 0.0133, "step": 385070 }, { "epoch": 4.114322346279182, "grad_norm": 0.008471332490444183, "learning_rate": 7.801384969485908e-07, "loss": 0.0017, "step": 385080 }, { "epoch": 4.114429189593461, "grad_norm": 1.0764740705490112, "learning_rate": 7.801245805495573e-07, "loss": 0.0156, "step": 385090 }, { "epoch": 4.114536032907741, "grad_norm": 0.024843508377671242, "learning_rate": 7.80110663834242e-07, "loss": 0.0293, "step": 385100 }, { "epoch": 4.11464287622202, "grad_norm": 0.006812608800828457, "learning_rate": 7.800967468026606e-07, "loss": 0.0328, "step": 385110 }, { "epoch": 4.1147497195363, "grad_norm": 0.3233536183834076, "learning_rate": 7.800828294548289e-07, "loss": 0.0127, "step": 385120 }, { "epoch": 4.11485656285058, "grad_norm": 0.050063639879226685, "learning_rate": 7.800689117907624e-07, "loss": 0.0167, "step": 385130 }, { "epoch": 4.1149634061648594, "grad_norm": 2.6223032474517822, "learning_rate": 7.800549938104771e-07, "loss": 0.0037, "step": 385140 }, { "epoch": 4.115070249479139, "grad_norm": 0.47619715332984924, "learning_rate": 7.800410755139886e-07, "loss": 0.0169, "step": 385150 }, { "epoch": 4.115177092793418, "grad_norm": 0.0007843461353331804, "learning_rate": 7.800271569013124e-07, "loss": 0.006, "step": 385160 }, { "epoch": 4.115283936107698, "grad_norm": 0.25843459367752075, "learning_rate": 7.800132379724647e-07, "loss": 0.0265, "step": 385170 }, { "epoch": 4.115390779421977, "grad_norm": 0.034243859350681305, "learning_rate": 7.799993187274606e-07, "loss": 0.0012, "step": 385180 }, { "epoch": 4.115497622736258, "grad_norm": 8.410761833190918, "learning_rate": 7.799853991663163e-07, "loss": 0.0479, "step": 385190 }, { "epoch": 4.115604466050537, "grad_norm": 2.07446551322937, "learning_rate": 7.799714792890474e-07, "loss": 0.0177, "step": 385200 }, { "epoch": 4.1157113093648166, "grad_norm": 0.24807867407798767, "learning_rate": 7.799575590956696e-07, "loss": 0.0005, "step": 385210 }, { "epoch": 4.115818152679096, "grad_norm": 0.0020971044432371855, "learning_rate": 7.799436385861984e-07, "loss": 0.0053, "step": 385220 }, { "epoch": 4.1159249959933755, "grad_norm": 0.6281478404998779, "learning_rate": 7.799297177606498e-07, "loss": 0.0026, "step": 385230 }, { "epoch": 4.116031839307655, "grad_norm": 0.004869456868618727, "learning_rate": 7.799157966190394e-07, "loss": 0.0083, "step": 385240 }, { "epoch": 4.116138682621935, "grad_norm": 4.335755348205566, "learning_rate": 7.79901875161383e-07, "loss": 0.0177, "step": 385250 }, { "epoch": 4.116245525936215, "grad_norm": 0.010751049965620041, "learning_rate": 7.798879533876962e-07, "loss": 0.044, "step": 385260 }, { "epoch": 4.116352369250494, "grad_norm": 1.9943653345108032, "learning_rate": 7.798740312979947e-07, "loss": 0.0011, "step": 385270 }, { "epoch": 4.116459212564774, "grad_norm": 13.692712783813477, "learning_rate": 7.798601088922944e-07, "loss": 0.0286, "step": 385280 }, { "epoch": 4.116566055879053, "grad_norm": 0.00926963984966278, "learning_rate": 7.798461861706109e-07, "loss": 0.0128, "step": 385290 }, { "epoch": 4.116672899193333, "grad_norm": 0.034555260092020035, "learning_rate": 7.798322631329599e-07, "loss": 0.0011, "step": 385300 }, { "epoch": 4.116779742507613, "grad_norm": 1.0398858785629272, "learning_rate": 7.798183397793573e-07, "loss": 0.0027, "step": 385310 }, { "epoch": 4.116886585821892, "grad_norm": 0.007627036888152361, "learning_rate": 7.798044161098186e-07, "loss": 0.0026, "step": 385320 }, { "epoch": 4.116993429136172, "grad_norm": 0.010458595119416714, "learning_rate": 7.797904921243595e-07, "loss": 0.0065, "step": 385330 }, { "epoch": 4.117100272450451, "grad_norm": 0.3044760823249817, "learning_rate": 7.797765678229959e-07, "loss": 0.0109, "step": 385340 }, { "epoch": 4.117207115764731, "grad_norm": 5.210391521453857, "learning_rate": 7.797626432057436e-07, "loss": 0.0083, "step": 385350 }, { "epoch": 4.11731395907901, "grad_norm": 0.01567261666059494, "learning_rate": 7.797487182726179e-07, "loss": 0.0128, "step": 385360 }, { "epoch": 4.117420802393291, "grad_norm": 0.001574994414113462, "learning_rate": 7.797347930236349e-07, "loss": 0.0392, "step": 385370 }, { "epoch": 4.11752764570757, "grad_norm": 0.19305403530597687, "learning_rate": 7.7972086745881e-07, "loss": 0.0029, "step": 385380 }, { "epoch": 4.1176344890218495, "grad_norm": 3.2094905376434326, "learning_rate": 7.797069415781594e-07, "loss": 0.0045, "step": 385390 }, { "epoch": 4.117741332336129, "grad_norm": 0.05490877106785774, "learning_rate": 7.796930153816986e-07, "loss": 0.0168, "step": 385400 }, { "epoch": 4.117848175650408, "grad_norm": 0.010183357633650303, "learning_rate": 7.796790888694429e-07, "loss": 0.0017, "step": 385410 }, { "epoch": 4.117955018964688, "grad_norm": 2.6167736053466797, "learning_rate": 7.796651620414088e-07, "loss": 0.0015, "step": 385420 }, { "epoch": 4.118061862278968, "grad_norm": 0.011822136119008064, "learning_rate": 7.796512348976115e-07, "loss": 0.0012, "step": 385430 }, { "epoch": 4.118168705593248, "grad_norm": 0.009413682855665684, "learning_rate": 7.796373074380666e-07, "loss": 0.0059, "step": 385440 }, { "epoch": 4.118275548907527, "grad_norm": 0.005305557046085596, "learning_rate": 7.796233796627904e-07, "loss": 0.0085, "step": 385450 }, { "epoch": 4.118382392221807, "grad_norm": 3.9783542156219482, "learning_rate": 7.796094515717982e-07, "loss": 0.0128, "step": 385460 }, { "epoch": 4.118489235536086, "grad_norm": 0.0027543348260223866, "learning_rate": 7.795955231651057e-07, "loss": 0.0002, "step": 385470 }, { "epoch": 4.118596078850366, "grad_norm": 0.0026604661252349615, "learning_rate": 7.795815944427288e-07, "loss": 0.0105, "step": 385480 }, { "epoch": 4.118702922164646, "grad_norm": 0.03155865892767906, "learning_rate": 7.795676654046835e-07, "loss": 0.0117, "step": 385490 }, { "epoch": 4.118809765478925, "grad_norm": 0.04140916094183922, "learning_rate": 7.795537360509847e-07, "loss": 0.005, "step": 385500 }, { "epoch": 4.118916608793205, "grad_norm": 4.337657451629639, "learning_rate": 7.79539806381649e-07, "loss": 0.015, "step": 385510 }, { "epoch": 4.119023452107484, "grad_norm": 0.8487147092819214, "learning_rate": 7.795258763966916e-07, "loss": 0.0095, "step": 385520 }, { "epoch": 4.119130295421764, "grad_norm": 0.21092021465301514, "learning_rate": 7.795119460961284e-07, "loss": 0.0233, "step": 385530 }, { "epoch": 4.119237138736044, "grad_norm": 0.003938437905162573, "learning_rate": 7.794980154799753e-07, "loss": 0.0005, "step": 385540 }, { "epoch": 4.1193439820503235, "grad_norm": 0.7225725054740906, "learning_rate": 7.794840845482476e-07, "loss": 0.0017, "step": 385550 }, { "epoch": 4.119450825364603, "grad_norm": 0.0038266750052571297, "learning_rate": 7.794701533009613e-07, "loss": 0.005, "step": 385560 }, { "epoch": 4.119557668678882, "grad_norm": 0.008467636071145535, "learning_rate": 7.794562217381322e-07, "loss": 0.0053, "step": 385570 }, { "epoch": 4.119664511993162, "grad_norm": 5.5229668617248535, "learning_rate": 7.794422898597759e-07, "loss": 0.0134, "step": 385580 }, { "epoch": 4.119771355307441, "grad_norm": 1.2157692909240723, "learning_rate": 7.794283576659082e-07, "loss": 0.0033, "step": 385590 }, { "epoch": 4.119878198621722, "grad_norm": 0.2935529947280884, "learning_rate": 7.794144251565447e-07, "loss": 0.0087, "step": 385600 }, { "epoch": 4.119985041936001, "grad_norm": 0.4770961105823517, "learning_rate": 7.794004923317013e-07, "loss": 0.0007, "step": 385610 }, { "epoch": 4.120091885250281, "grad_norm": 3.808708906173706, "learning_rate": 7.793865591913937e-07, "loss": 0.0143, "step": 385620 }, { "epoch": 4.12019872856456, "grad_norm": 0.0027247993275523186, "learning_rate": 7.793726257356375e-07, "loss": 0.0039, "step": 385630 }, { "epoch": 4.1203055718788395, "grad_norm": 0.001403610222041607, "learning_rate": 7.793586919644486e-07, "loss": 0.0116, "step": 385640 }, { "epoch": 4.120412415193119, "grad_norm": 4.181693077087402, "learning_rate": 7.793447578778426e-07, "loss": 0.0209, "step": 385650 }, { "epoch": 4.120519258507399, "grad_norm": 0.008970608003437519, "learning_rate": 7.793308234758354e-07, "loss": 0.005, "step": 385660 }, { "epoch": 4.120626101821679, "grad_norm": 0.039209868758916855, "learning_rate": 7.793168887584424e-07, "loss": 0.0129, "step": 385670 }, { "epoch": 4.120732945135958, "grad_norm": 0.006309219170361757, "learning_rate": 7.793029537256797e-07, "loss": 0.0125, "step": 385680 }, { "epoch": 4.120839788450238, "grad_norm": 0.06383197009563446, "learning_rate": 7.792890183775629e-07, "loss": 0.0019, "step": 385690 }, { "epoch": 4.120946631764517, "grad_norm": 0.0060089388862252235, "learning_rate": 7.792750827141076e-07, "loss": 0.043, "step": 385700 }, { "epoch": 4.121053475078797, "grad_norm": 0.442787766456604, "learning_rate": 7.792611467353297e-07, "loss": 0.0197, "step": 385710 }, { "epoch": 4.121160318393077, "grad_norm": 1.2762975692749023, "learning_rate": 7.79247210441245e-07, "loss": 0.0026, "step": 385720 }, { "epoch": 4.121267161707356, "grad_norm": 0.0017968526808544993, "learning_rate": 7.79233273831869e-07, "loss": 0.0022, "step": 385730 }, { "epoch": 4.121374005021636, "grad_norm": 0.09974294900894165, "learning_rate": 7.792193369072175e-07, "loss": 0.0056, "step": 385740 }, { "epoch": 4.121480848335915, "grad_norm": 0.027029238641262054, "learning_rate": 7.792053996673065e-07, "loss": 0.0018, "step": 385750 }, { "epoch": 4.121587691650195, "grad_norm": 0.0009226236725226045, "learning_rate": 7.791914621121513e-07, "loss": 0.0094, "step": 385760 }, { "epoch": 4.121694534964474, "grad_norm": 0.12386251986026764, "learning_rate": 7.791775242417681e-07, "loss": 0.0054, "step": 385770 }, { "epoch": 4.121801378278755, "grad_norm": 3.735071897506714, "learning_rate": 7.791635860561723e-07, "loss": 0.0272, "step": 385780 }, { "epoch": 4.121908221593034, "grad_norm": 0.001828086213208735, "learning_rate": 7.791496475553796e-07, "loss": 0.0039, "step": 385790 }, { "epoch": 4.1220150649073135, "grad_norm": 0.12304315716028214, "learning_rate": 7.791357087394061e-07, "loss": 0.0002, "step": 385800 }, { "epoch": 4.122121908221593, "grad_norm": 0.0005156444967724383, "learning_rate": 7.791217696082672e-07, "loss": 0.0021, "step": 385810 }, { "epoch": 4.122228751535872, "grad_norm": 0.00788660254329443, "learning_rate": 7.791078301619788e-07, "loss": 0.0065, "step": 385820 }, { "epoch": 4.122335594850152, "grad_norm": 0.018824778497219086, "learning_rate": 7.790938904005566e-07, "loss": 0.0033, "step": 385830 }, { "epoch": 4.122442438164432, "grad_norm": 0.01873435638844967, "learning_rate": 7.790799503240163e-07, "loss": 0.0008, "step": 385840 }, { "epoch": 4.122549281478712, "grad_norm": 0.02214772440493107, "learning_rate": 7.790660099323737e-07, "loss": 0.0054, "step": 385850 }, { "epoch": 4.122656124792991, "grad_norm": 0.015584168024361134, "learning_rate": 7.790520692256446e-07, "loss": 0.0021, "step": 385860 }, { "epoch": 4.122762968107271, "grad_norm": 0.006300072651356459, "learning_rate": 7.790381282038445e-07, "loss": 0.0061, "step": 385870 }, { "epoch": 4.12286981142155, "grad_norm": 0.003927603363990784, "learning_rate": 7.790241868669893e-07, "loss": 0.0004, "step": 385880 }, { "epoch": 4.1229766547358295, "grad_norm": 4.575760841369629, "learning_rate": 7.790102452150949e-07, "loss": 0.0083, "step": 385890 }, { "epoch": 4.12308349805011, "grad_norm": 3.708888530731201, "learning_rate": 7.789963032481765e-07, "loss": 0.0082, "step": 385900 }, { "epoch": 4.123190341364389, "grad_norm": 0.15272921323776245, "learning_rate": 7.789823609662505e-07, "loss": 0.0326, "step": 385910 }, { "epoch": 4.123297184678669, "grad_norm": 10.25727367401123, "learning_rate": 7.789684183693325e-07, "loss": 0.0439, "step": 385920 }, { "epoch": 4.123404027992948, "grad_norm": 0.23678427934646606, "learning_rate": 7.789544754574377e-07, "loss": 0.0193, "step": 385930 }, { "epoch": 4.123510871307228, "grad_norm": 0.5389388799667358, "learning_rate": 7.789405322305826e-07, "loss": 0.0128, "step": 385940 }, { "epoch": 4.123617714621507, "grad_norm": 0.0013546380214393139, "learning_rate": 7.789265886887826e-07, "loss": 0.0082, "step": 385950 }, { "epoch": 4.1237245579357875, "grad_norm": 0.0009212475270032883, "learning_rate": 7.78912644832053e-07, "loss": 0.0301, "step": 385960 }, { "epoch": 4.123831401250067, "grad_norm": 0.2692703902721405, "learning_rate": 7.788987006604104e-07, "loss": 0.0054, "step": 385970 }, { "epoch": 4.123938244564346, "grad_norm": 0.08427071571350098, "learning_rate": 7.788847561738701e-07, "loss": 0.0023, "step": 385980 }, { "epoch": 4.124045087878626, "grad_norm": 0.010831405408680439, "learning_rate": 7.788708113724477e-07, "loss": 0.0161, "step": 385990 }, { "epoch": 4.124151931192905, "grad_norm": 0.45181822776794434, "learning_rate": 7.788568662561592e-07, "loss": 0.0058, "step": 386000 }, { "epoch": 4.124258774507185, "grad_norm": 0.5820674896240234, "learning_rate": 7.788429208250202e-07, "loss": 0.0059, "step": 386010 }, { "epoch": 4.124365617821465, "grad_norm": 0.02898930385708809, "learning_rate": 7.788289750790464e-07, "loss": 0.0068, "step": 386020 }, { "epoch": 4.124472461135745, "grad_norm": 3.2996294498443604, "learning_rate": 7.788150290182538e-07, "loss": 0.0066, "step": 386030 }, { "epoch": 4.124579304450024, "grad_norm": 0.033021070063114166, "learning_rate": 7.788010826426578e-07, "loss": 0.0001, "step": 386040 }, { "epoch": 4.1246861477643035, "grad_norm": 3.9610867500305176, "learning_rate": 7.787871359522747e-07, "loss": 0.0222, "step": 386050 }, { "epoch": 4.124792991078583, "grad_norm": 0.07869032770395279, "learning_rate": 7.787731889471196e-07, "loss": 0.0014, "step": 386060 }, { "epoch": 4.1248998343928625, "grad_norm": 0.0014113504439592361, "learning_rate": 7.787592416272085e-07, "loss": 0.013, "step": 386070 }, { "epoch": 4.125006677707143, "grad_norm": 0.046443499624729156, "learning_rate": 7.787452939925573e-07, "loss": 0.0254, "step": 386080 }, { "epoch": 4.125113521021422, "grad_norm": 0.023213939741253853, "learning_rate": 7.787313460431816e-07, "loss": 0.0018, "step": 386090 }, { "epoch": 4.125220364335702, "grad_norm": 0.03772977739572525, "learning_rate": 7.78717397779097e-07, "loss": 0.0096, "step": 386100 }, { "epoch": 4.125327207649981, "grad_norm": 0.02046746201813221, "learning_rate": 7.787034492003198e-07, "loss": 0.0003, "step": 386110 }, { "epoch": 4.125434050964261, "grad_norm": 0.002380955731496215, "learning_rate": 7.786895003068651e-07, "loss": 0.0537, "step": 386120 }, { "epoch": 4.12554089427854, "grad_norm": 0.011562309227883816, "learning_rate": 7.786755510987488e-07, "loss": 0.0059, "step": 386130 }, { "epoch": 4.12564773759282, "grad_norm": 0.002783501287922263, "learning_rate": 7.786616015759871e-07, "loss": 0.0252, "step": 386140 }, { "epoch": 4.1257545809071, "grad_norm": 0.00065945292590186, "learning_rate": 7.786476517385953e-07, "loss": 0.0059, "step": 386150 }, { "epoch": 4.125861424221379, "grad_norm": 2.50476336479187, "learning_rate": 7.786337015865892e-07, "loss": 0.0017, "step": 386160 }, { "epoch": 4.125968267535659, "grad_norm": 13.827284812927246, "learning_rate": 7.786197511199846e-07, "loss": 0.0256, "step": 386170 }, { "epoch": 4.126075110849938, "grad_norm": 0.001510462025180459, "learning_rate": 7.786058003387973e-07, "loss": 0.0267, "step": 386180 }, { "epoch": 4.126181954164219, "grad_norm": 0.05328488349914551, "learning_rate": 7.785918492430431e-07, "loss": 0.0151, "step": 386190 }, { "epoch": 4.126288797478498, "grad_norm": 0.738712728023529, "learning_rate": 7.785778978327375e-07, "loss": 0.0258, "step": 386200 }, { "epoch": 4.1263956407927775, "grad_norm": 4.286586284637451, "learning_rate": 7.785639461078967e-07, "loss": 0.0067, "step": 386210 }, { "epoch": 4.126502484107057, "grad_norm": 0.01803182251751423, "learning_rate": 7.78549994068536e-07, "loss": 0.002, "step": 386220 }, { "epoch": 4.1266093274213365, "grad_norm": 1.380757451057434, "learning_rate": 7.785360417146714e-07, "loss": 0.0037, "step": 386230 }, { "epoch": 4.126716170735616, "grad_norm": 1.6789186000823975, "learning_rate": 7.785220890463184e-07, "loss": 0.008, "step": 386240 }, { "epoch": 4.126823014049895, "grad_norm": 0.6280413269996643, "learning_rate": 7.785081360634932e-07, "loss": 0.0379, "step": 386250 }, { "epoch": 4.126929857364176, "grad_norm": 0.33580482006073, "learning_rate": 7.784941827662113e-07, "loss": 0.0133, "step": 386260 }, { "epoch": 4.127036700678455, "grad_norm": 1.417659878730774, "learning_rate": 7.784802291544882e-07, "loss": 0.0028, "step": 386270 }, { "epoch": 4.127143543992735, "grad_norm": 0.001436092657968402, "learning_rate": 7.784662752283401e-07, "loss": 0.0006, "step": 386280 }, { "epoch": 4.127250387307014, "grad_norm": 4.734485626220703, "learning_rate": 7.784523209877825e-07, "loss": 0.0053, "step": 386290 }, { "epoch": 4.127357230621294, "grad_norm": 0.01214653067290783, "learning_rate": 7.784383664328312e-07, "loss": 0.0062, "step": 386300 }, { "epoch": 4.127464073935574, "grad_norm": 0.0010932708391919732, "learning_rate": 7.78424411563502e-07, "loss": 0.0047, "step": 386310 }, { "epoch": 4.127570917249853, "grad_norm": 0.24825236201286316, "learning_rate": 7.784104563798106e-07, "loss": 0.0029, "step": 386320 }, { "epoch": 4.127677760564133, "grad_norm": 0.0010920017957687378, "learning_rate": 7.783965008817728e-07, "loss": 0.004, "step": 386330 }, { "epoch": 4.127784603878412, "grad_norm": 1.761924147605896, "learning_rate": 7.783825450694043e-07, "loss": 0.0202, "step": 386340 }, { "epoch": 4.127891447192692, "grad_norm": 0.002558028558269143, "learning_rate": 7.783685889427209e-07, "loss": 0.0069, "step": 386350 }, { "epoch": 4.127998290506971, "grad_norm": 0.014317595399916172, "learning_rate": 7.783546325017385e-07, "loss": 0.0048, "step": 386360 }, { "epoch": 4.1281051338212515, "grad_norm": 0.0060041602700948715, "learning_rate": 7.783406757464725e-07, "loss": 0.0006, "step": 386370 }, { "epoch": 4.128211977135531, "grad_norm": 8.481090545654297, "learning_rate": 7.783267186769388e-07, "loss": 0.0098, "step": 386380 }, { "epoch": 4.1283188204498105, "grad_norm": 0.009464547969400883, "learning_rate": 7.783127612931535e-07, "loss": 0.0194, "step": 386390 }, { "epoch": 4.12842566376409, "grad_norm": 18.21856689453125, "learning_rate": 7.782988035951322e-07, "loss": 0.0043, "step": 386400 }, { "epoch": 4.128532507078369, "grad_norm": 1.6670329570770264, "learning_rate": 7.782848455828901e-07, "loss": 0.0158, "step": 386410 }, { "epoch": 4.128639350392649, "grad_norm": 1.4399296045303345, "learning_rate": 7.782708872564437e-07, "loss": 0.0103, "step": 386420 }, { "epoch": 4.128746193706929, "grad_norm": 0.005855937488377094, "learning_rate": 7.782569286158085e-07, "loss": 0.014, "step": 386430 }, { "epoch": 4.128853037021209, "grad_norm": 0.0016005815705284476, "learning_rate": 7.78242969661e-07, "loss": 0.0107, "step": 386440 }, { "epoch": 4.128959880335488, "grad_norm": 0.0457628034055233, "learning_rate": 7.782290103920344e-07, "loss": 0.0138, "step": 386450 }, { "epoch": 4.129066723649768, "grad_norm": 3.3154611587524414, "learning_rate": 7.782150508089271e-07, "loss": 0.0049, "step": 386460 }, { "epoch": 4.129173566964047, "grad_norm": 0.09829910099506378, "learning_rate": 7.78201090911694e-07, "loss": 0.0268, "step": 386470 }, { "epoch": 4.1292804102783265, "grad_norm": 10.070955276489258, "learning_rate": 7.78187130700351e-07, "loss": 0.0395, "step": 386480 }, { "epoch": 4.129387253592607, "grad_norm": 0.009087820537388325, "learning_rate": 7.781731701749137e-07, "loss": 0.0155, "step": 386490 }, { "epoch": 4.129494096906886, "grad_norm": 0.03685971722006798, "learning_rate": 7.781592093353979e-07, "loss": 0.0043, "step": 386500 }, { "epoch": 4.129600940221166, "grad_norm": 1.5723756551742554, "learning_rate": 7.781452481818194e-07, "loss": 0.0103, "step": 386510 }, { "epoch": 4.129707783535445, "grad_norm": 3.7826030254364014, "learning_rate": 7.781312867141938e-07, "loss": 0.005, "step": 386520 }, { "epoch": 4.129814626849725, "grad_norm": 0.018110966309905052, "learning_rate": 7.78117324932537e-07, "loss": 0.0064, "step": 386530 }, { "epoch": 4.129921470164004, "grad_norm": 6.290506362915039, "learning_rate": 7.781033628368648e-07, "loss": 0.0171, "step": 386540 }, { "epoch": 4.1300283134782845, "grad_norm": 0.10083457827568054, "learning_rate": 7.780894004271928e-07, "loss": 0.0086, "step": 386550 }, { "epoch": 4.130135156792564, "grad_norm": 0.014074505306780338, "learning_rate": 7.78075437703537e-07, "loss": 0.0265, "step": 386560 }, { "epoch": 4.130242000106843, "grad_norm": 7.27513313293457, "learning_rate": 7.780614746659131e-07, "loss": 0.0104, "step": 386570 }, { "epoch": 4.130348843421123, "grad_norm": 0.02041587606072426, "learning_rate": 7.780475113143366e-07, "loss": 0.0089, "step": 386580 }, { "epoch": 4.130455686735402, "grad_norm": 0.0023969411849975586, "learning_rate": 7.780335476488237e-07, "loss": 0.009, "step": 386590 }, { "epoch": 4.130562530049682, "grad_norm": 0.00784003920853138, "learning_rate": 7.780195836693899e-07, "loss": 0.0387, "step": 386600 }, { "epoch": 4.130669373363962, "grad_norm": 4.015779972076416, "learning_rate": 7.780056193760509e-07, "loss": 0.0024, "step": 386610 }, { "epoch": 4.130776216678242, "grad_norm": 0.0009315260103903711, "learning_rate": 7.779916547688226e-07, "loss": 0.0283, "step": 386620 }, { "epoch": 4.130883059992521, "grad_norm": 0.04037370905280113, "learning_rate": 7.779776898477208e-07, "loss": 0.0055, "step": 386630 }, { "epoch": 4.1309899033068005, "grad_norm": 0.02978174015879631, "learning_rate": 7.77963724612761e-07, "loss": 0.0009, "step": 386640 }, { "epoch": 4.13109674662108, "grad_norm": 2.50637149810791, "learning_rate": 7.779497590639595e-07, "loss": 0.0078, "step": 386650 }, { "epoch": 4.131203589935359, "grad_norm": 0.0654897540807724, "learning_rate": 7.779357932013316e-07, "loss": 0.0034, "step": 386660 }, { "epoch": 4.13131043324964, "grad_norm": 0.6538997292518616, "learning_rate": 7.779218270248933e-07, "loss": 0.0008, "step": 386670 }, { "epoch": 4.131417276563919, "grad_norm": 0.000821334368083626, "learning_rate": 7.7790786053466e-07, "loss": 0.0078, "step": 386680 }, { "epoch": 4.131524119878199, "grad_norm": 0.005276641808450222, "learning_rate": 7.77893893730648e-07, "loss": 0.0031, "step": 386690 }, { "epoch": 4.131630963192478, "grad_norm": 0.02622506022453308, "learning_rate": 7.778799266128728e-07, "loss": 0.0043, "step": 386700 }, { "epoch": 4.131737806506758, "grad_norm": 1.6901262998580933, "learning_rate": 7.778659591813501e-07, "loss": 0.0017, "step": 386710 }, { "epoch": 4.131844649821037, "grad_norm": 0.07772038131952286, "learning_rate": 7.778519914360959e-07, "loss": 0.0285, "step": 386720 }, { "epoch": 4.131951493135317, "grad_norm": 0.007473496254533529, "learning_rate": 7.778380233771257e-07, "loss": 0.0002, "step": 386730 }, { "epoch": 4.132058336449597, "grad_norm": 0.0029048402793705463, "learning_rate": 7.778240550044555e-07, "loss": 0.0007, "step": 386740 }, { "epoch": 4.132165179763876, "grad_norm": 0.0013159941881895065, "learning_rate": 7.778100863181008e-07, "loss": 0.0108, "step": 386750 }, { "epoch": 4.132272023078156, "grad_norm": 11.303750038146973, "learning_rate": 7.777961173180778e-07, "loss": 0.0846, "step": 386760 }, { "epoch": 4.132378866392435, "grad_norm": 0.03709554299712181, "learning_rate": 7.777821480044017e-07, "loss": 0.0229, "step": 386770 }, { "epoch": 4.132485709706715, "grad_norm": 1.7899320125579834, "learning_rate": 7.777681783770889e-07, "loss": 0.0038, "step": 386780 }, { "epoch": 4.132592553020995, "grad_norm": 0.05981815978884697, "learning_rate": 7.777542084361548e-07, "loss": 0.0105, "step": 386790 }, { "epoch": 4.1326993963352745, "grad_norm": 0.01189530547708273, "learning_rate": 7.777402381816151e-07, "loss": 0.0026, "step": 386800 }, { "epoch": 4.132806239649554, "grad_norm": 0.0014856199268251657, "learning_rate": 7.777262676134857e-07, "loss": 0.0019, "step": 386810 }, { "epoch": 4.132913082963833, "grad_norm": 0.014844371937215328, "learning_rate": 7.777122967317826e-07, "loss": 0.0068, "step": 386820 }, { "epoch": 4.133019926278113, "grad_norm": 1.7009576559066772, "learning_rate": 7.776983255365212e-07, "loss": 0.006, "step": 386830 }, { "epoch": 4.133126769592392, "grad_norm": 1.1402491331100464, "learning_rate": 7.776843540277173e-07, "loss": 0.0139, "step": 386840 }, { "epoch": 4.133233612906673, "grad_norm": 0.3309377431869507, "learning_rate": 7.776703822053871e-07, "loss": 0.0146, "step": 386850 }, { "epoch": 4.133340456220952, "grad_norm": 0.9210747480392456, "learning_rate": 7.77656410069546e-07, "loss": 0.0103, "step": 386860 }, { "epoch": 4.133447299535232, "grad_norm": 0.31250184774398804, "learning_rate": 7.776424376202097e-07, "loss": 0.0035, "step": 386870 }, { "epoch": 4.133554142849511, "grad_norm": 8.560227394104004, "learning_rate": 7.776284648573943e-07, "loss": 0.0088, "step": 386880 }, { "epoch": 4.1336609861637905, "grad_norm": 0.18628022074699402, "learning_rate": 7.776144917811154e-07, "loss": 0.0017, "step": 386890 }, { "epoch": 4.133767829478071, "grad_norm": 0.9156970381736755, "learning_rate": 7.776005183913886e-07, "loss": 0.0032, "step": 386900 }, { "epoch": 4.13387467279235, "grad_norm": 0.02279200591146946, "learning_rate": 7.775865446882299e-07, "loss": 0.0014, "step": 386910 }, { "epoch": 4.13398151610663, "grad_norm": 0.04803325608372688, "learning_rate": 7.775725706716553e-07, "loss": 0.0099, "step": 386920 }, { "epoch": 4.134088359420909, "grad_norm": 0.13544140756130219, "learning_rate": 7.775585963416801e-07, "loss": 0.0013, "step": 386930 }, { "epoch": 4.134195202735189, "grad_norm": 0.4677833616733551, "learning_rate": 7.775446216983204e-07, "loss": 0.0072, "step": 386940 }, { "epoch": 4.134302046049468, "grad_norm": 0.0035424446687102318, "learning_rate": 7.775306467415916e-07, "loss": 0.0062, "step": 386950 }, { "epoch": 4.134408889363748, "grad_norm": 1.0011217594146729, "learning_rate": 7.775166714715101e-07, "loss": 0.0054, "step": 386960 }, { "epoch": 4.134515732678028, "grad_norm": 0.004261113703250885, "learning_rate": 7.775026958880913e-07, "loss": 0.0, "step": 386970 }, { "epoch": 4.134622575992307, "grad_norm": 3.940168619155884, "learning_rate": 7.774887199913508e-07, "loss": 0.0056, "step": 386980 }, { "epoch": 4.134729419306587, "grad_norm": 0.10138574987649918, "learning_rate": 7.774747437813047e-07, "loss": 0.0021, "step": 386990 }, { "epoch": 4.134836262620866, "grad_norm": 0.02094799093902111, "learning_rate": 7.774607672579687e-07, "loss": 0.0374, "step": 387000 }, { "epoch": 4.134943105935146, "grad_norm": 0.418111115694046, "learning_rate": 7.774467904213586e-07, "loss": 0.0086, "step": 387010 }, { "epoch": 4.135049949249426, "grad_norm": 0.48814743757247925, "learning_rate": 7.7743281327149e-07, "loss": 0.004, "step": 387020 }, { "epoch": 4.135156792563706, "grad_norm": 0.01066665444523096, "learning_rate": 7.774188358083789e-07, "loss": 0.0058, "step": 387030 }, { "epoch": 4.135263635877985, "grad_norm": 2.5276072025299072, "learning_rate": 7.774048580320409e-07, "loss": 0.0073, "step": 387040 }, { "epoch": 4.1353704791922645, "grad_norm": 0.6049832105636597, "learning_rate": 7.77390879942492e-07, "loss": 0.0096, "step": 387050 }, { "epoch": 4.135477322506544, "grad_norm": 0.024218782782554626, "learning_rate": 7.773769015397478e-07, "loss": 0.0057, "step": 387060 }, { "epoch": 4.135584165820823, "grad_norm": 0.011972976848483086, "learning_rate": 7.773629228238242e-07, "loss": 0.0072, "step": 387070 }, { "epoch": 4.135691009135104, "grad_norm": 3.1459434032440186, "learning_rate": 7.77348943794737e-07, "loss": 0.0137, "step": 387080 }, { "epoch": 4.135797852449383, "grad_norm": 0.0026674894616007805, "learning_rate": 7.773349644525018e-07, "loss": 0.0011, "step": 387090 }, { "epoch": 4.135904695763663, "grad_norm": 0.05894700810313225, "learning_rate": 7.773209847971345e-07, "loss": 0.0117, "step": 387100 }, { "epoch": 4.136011539077942, "grad_norm": 8.424833297729492, "learning_rate": 7.773070048286508e-07, "loss": 0.0176, "step": 387110 }, { "epoch": 4.136118382392222, "grad_norm": 0.16135941445827484, "learning_rate": 7.772930245470667e-07, "loss": 0.0227, "step": 387120 }, { "epoch": 4.136225225706501, "grad_norm": 0.017381008714437485, "learning_rate": 7.772790439523978e-07, "loss": 0.0021, "step": 387130 }, { "epoch": 4.136332069020781, "grad_norm": 0.01693885400891304, "learning_rate": 7.772650630446599e-07, "loss": 0.0024, "step": 387140 }, { "epoch": 4.136438912335061, "grad_norm": 0.028421849012374878, "learning_rate": 7.772510818238687e-07, "loss": 0.009, "step": 387150 }, { "epoch": 4.13654575564934, "grad_norm": 0.19189581274986267, "learning_rate": 7.772371002900403e-07, "loss": 0.0012, "step": 387160 }, { "epoch": 4.13665259896362, "grad_norm": 0.038559697568416595, "learning_rate": 7.772231184431902e-07, "loss": 0.0015, "step": 387170 }, { "epoch": 4.136759442277899, "grad_norm": 10.256518363952637, "learning_rate": 7.772091362833343e-07, "loss": 0.0329, "step": 387180 }, { "epoch": 4.136866285592179, "grad_norm": 0.0681692361831665, "learning_rate": 7.771951538104884e-07, "loss": 0.0072, "step": 387190 }, { "epoch": 4.136973128906459, "grad_norm": 0.011291002854704857, "learning_rate": 7.771811710246683e-07, "loss": 0.019, "step": 387200 }, { "epoch": 4.1370799722207385, "grad_norm": 0.3363611698150635, "learning_rate": 7.771671879258895e-07, "loss": 0.0357, "step": 387210 }, { "epoch": 4.137186815535018, "grad_norm": 2.7288973331451416, "learning_rate": 7.771532045141682e-07, "loss": 0.0197, "step": 387220 }, { "epoch": 4.1372936588492975, "grad_norm": 0.061532292515039444, "learning_rate": 7.7713922078952e-07, "loss": 0.0012, "step": 387230 }, { "epoch": 4.137400502163577, "grad_norm": 1.0526201725006104, "learning_rate": 7.771252367519607e-07, "loss": 0.0018, "step": 387240 }, { "epoch": 4.137507345477856, "grad_norm": 0.023189973086118698, "learning_rate": 7.771112524015061e-07, "loss": 0.0147, "step": 387250 }, { "epoch": 4.137614188792137, "grad_norm": 0.2808513939380646, "learning_rate": 7.770972677381719e-07, "loss": 0.0073, "step": 387260 }, { "epoch": 4.137721032106416, "grad_norm": 0.01841166242957115, "learning_rate": 7.770832827619741e-07, "loss": 0.0058, "step": 387270 }, { "epoch": 4.137827875420696, "grad_norm": 28.36276626586914, "learning_rate": 7.770692974729283e-07, "loss": 0.0159, "step": 387280 }, { "epoch": 4.137934718734975, "grad_norm": 0.05374932661652565, "learning_rate": 7.770553118710503e-07, "loss": 0.0002, "step": 387290 }, { "epoch": 4.1380415620492546, "grad_norm": 2.3745193481445312, "learning_rate": 7.770413259563561e-07, "loss": 0.0076, "step": 387300 }, { "epoch": 4.138148405363534, "grad_norm": 0.09448575973510742, "learning_rate": 7.770273397288612e-07, "loss": 0.0103, "step": 387310 }, { "epoch": 4.138255248677814, "grad_norm": 10.556022644042969, "learning_rate": 7.770133531885816e-07, "loss": 0.005, "step": 387320 }, { "epoch": 4.138362091992094, "grad_norm": 0.03123963437974453, "learning_rate": 7.769993663355329e-07, "loss": 0.0205, "step": 387330 }, { "epoch": 4.138468935306373, "grad_norm": 1.0625921487808228, "learning_rate": 7.769853791697312e-07, "loss": 0.0049, "step": 387340 }, { "epoch": 4.138575778620653, "grad_norm": 0.011921669356524944, "learning_rate": 7.769713916911918e-07, "loss": 0.0029, "step": 387350 }, { "epoch": 4.138682621934932, "grad_norm": 0.018860915675759315, "learning_rate": 7.76957403899931e-07, "loss": 0.0036, "step": 387360 }, { "epoch": 4.138789465249212, "grad_norm": 0.8010743856430054, "learning_rate": 7.769434157959645e-07, "loss": 0.0137, "step": 387370 }, { "epoch": 4.138896308563492, "grad_norm": 0.007424372714012861, "learning_rate": 7.769294273793076e-07, "loss": 0.0113, "step": 387380 }, { "epoch": 4.1390031518777715, "grad_norm": 3.771719455718994, "learning_rate": 7.769154386499769e-07, "loss": 0.0027, "step": 387390 }, { "epoch": 4.139109995192051, "grad_norm": 22.02638816833496, "learning_rate": 7.769014496079876e-07, "loss": 0.034, "step": 387400 }, { "epoch": 4.13921683850633, "grad_norm": 0.6680570244789124, "learning_rate": 7.768874602533557e-07, "loss": 0.0073, "step": 387410 }, { "epoch": 4.13932368182061, "grad_norm": 3.200751304626465, "learning_rate": 7.768734705860969e-07, "loss": 0.0042, "step": 387420 }, { "epoch": 4.139430525134889, "grad_norm": 1.7337983846664429, "learning_rate": 7.768594806062271e-07, "loss": 0.0085, "step": 387430 }, { "epoch": 4.13953736844917, "grad_norm": 0.03645022585988045, "learning_rate": 7.76845490313762e-07, "loss": 0.0174, "step": 387440 }, { "epoch": 4.139644211763449, "grad_norm": 3.2678539752960205, "learning_rate": 7.768314997087175e-07, "loss": 0.0147, "step": 387450 }, { "epoch": 4.139751055077729, "grad_norm": 5.348913192749023, "learning_rate": 7.768175087911093e-07, "loss": 0.0102, "step": 387460 }, { "epoch": 4.139857898392008, "grad_norm": 16.290084838867188, "learning_rate": 7.768035175609533e-07, "loss": 0.0194, "step": 387470 }, { "epoch": 4.1399647417062875, "grad_norm": 2.9157419204711914, "learning_rate": 7.76789526018265e-07, "loss": 0.0157, "step": 387480 }, { "epoch": 4.140071585020567, "grad_norm": 0.220615416765213, "learning_rate": 7.767755341630607e-07, "loss": 0.0071, "step": 387490 }, { "epoch": 4.140178428334847, "grad_norm": 1.1891404390335083, "learning_rate": 7.767615419953559e-07, "loss": 0.0065, "step": 387500 }, { "epoch": 4.140285271649127, "grad_norm": 0.0498339906334877, "learning_rate": 7.767475495151664e-07, "loss": 0.0333, "step": 387510 }, { "epoch": 4.140392114963406, "grad_norm": 0.2679671347141266, "learning_rate": 7.76733556722508e-07, "loss": 0.0066, "step": 387520 }, { "epoch": 4.140498958277686, "grad_norm": 0.013672434724867344, "learning_rate": 7.767195636173965e-07, "loss": 0.0029, "step": 387530 }, { "epoch": 4.140605801591965, "grad_norm": 2.8731610774993896, "learning_rate": 7.767055701998478e-07, "loss": 0.0018, "step": 387540 }, { "epoch": 4.140712644906245, "grad_norm": 0.013453196734189987, "learning_rate": 7.766915764698776e-07, "loss": 0.0115, "step": 387550 }, { "epoch": 4.140819488220525, "grad_norm": 6.579577922821045, "learning_rate": 7.766775824275018e-07, "loss": 0.0162, "step": 387560 }, { "epoch": 4.140926331534804, "grad_norm": 0.0056338198482990265, "learning_rate": 7.766635880727362e-07, "loss": 0.0181, "step": 387570 }, { "epoch": 4.141033174849084, "grad_norm": 0.007447484880685806, "learning_rate": 7.766495934055962e-07, "loss": 0.0092, "step": 387580 }, { "epoch": 4.141140018163363, "grad_norm": 3.027860641479492, "learning_rate": 7.766355984260982e-07, "loss": 0.0257, "step": 387590 }, { "epoch": 4.141246861477643, "grad_norm": 0.007616681978106499, "learning_rate": 7.766216031342576e-07, "loss": 0.0177, "step": 387600 }, { "epoch": 4.141353704791922, "grad_norm": 5.410308361053467, "learning_rate": 7.766076075300903e-07, "loss": 0.0212, "step": 387610 }, { "epoch": 4.141460548106203, "grad_norm": 0.009579403325915337, "learning_rate": 7.765936116136123e-07, "loss": 0.0145, "step": 387620 }, { "epoch": 4.141567391420482, "grad_norm": 2.07012939453125, "learning_rate": 7.765796153848392e-07, "loss": 0.0257, "step": 387630 }, { "epoch": 4.1416742347347615, "grad_norm": 0.011953286826610565, "learning_rate": 7.765656188437868e-07, "loss": 0.0118, "step": 387640 }, { "epoch": 4.141781078049041, "grad_norm": 0.01907566748559475, "learning_rate": 7.765516219904708e-07, "loss": 0.006, "step": 387650 }, { "epoch": 4.14188792136332, "grad_norm": 0.7492485046386719, "learning_rate": 7.765376248249074e-07, "loss": 0.0008, "step": 387660 }, { "epoch": 4.1419947646776, "grad_norm": 7.1559672355651855, "learning_rate": 7.765236273471119e-07, "loss": 0.0259, "step": 387670 }, { "epoch": 4.14210160799188, "grad_norm": 0.003405084600672126, "learning_rate": 7.765096295571005e-07, "loss": 0.0114, "step": 387680 }, { "epoch": 4.14220845130616, "grad_norm": 2.8100080490112305, "learning_rate": 7.764956314548889e-07, "loss": 0.0069, "step": 387690 }, { "epoch": 4.142315294620439, "grad_norm": 0.1389266699552536, "learning_rate": 7.764816330404928e-07, "loss": 0.0082, "step": 387700 }, { "epoch": 4.142422137934719, "grad_norm": 2.923758029937744, "learning_rate": 7.76467634313928e-07, "loss": 0.0098, "step": 387710 }, { "epoch": 4.142528981248998, "grad_norm": 1.3290947675704956, "learning_rate": 7.764536352752105e-07, "loss": 0.0037, "step": 387720 }, { "epoch": 4.142635824563278, "grad_norm": 1.9090473651885986, "learning_rate": 7.764396359243559e-07, "loss": 0.0025, "step": 387730 }, { "epoch": 4.142742667877558, "grad_norm": 0.002448759041726589, "learning_rate": 7.764256362613802e-07, "loss": 0.0106, "step": 387740 }, { "epoch": 4.142849511191837, "grad_norm": 0.0018221009522676468, "learning_rate": 7.764116362862989e-07, "loss": 0.0056, "step": 387750 }, { "epoch": 4.142956354506117, "grad_norm": 0.029082048684358597, "learning_rate": 7.763976359991282e-07, "loss": 0.0348, "step": 387760 }, { "epoch": 4.143063197820396, "grad_norm": 0.825065553188324, "learning_rate": 7.763836353998837e-07, "loss": 0.0017, "step": 387770 }, { "epoch": 4.143170041134676, "grad_norm": 0.004250557627528906, "learning_rate": 7.76369634488581e-07, "loss": 0.0111, "step": 387780 }, { "epoch": 4.143276884448956, "grad_norm": 0.034665804356336594, "learning_rate": 7.763556332652364e-07, "loss": 0.0026, "step": 387790 }, { "epoch": 4.1433837277632355, "grad_norm": 5.256945610046387, "learning_rate": 7.763416317298652e-07, "loss": 0.0147, "step": 387800 }, { "epoch": 4.143490571077515, "grad_norm": 10.758284568786621, "learning_rate": 7.763276298824835e-07, "loss": 0.0268, "step": 387810 }, { "epoch": 4.143597414391794, "grad_norm": 4.326493740081787, "learning_rate": 7.763136277231071e-07, "loss": 0.0318, "step": 387820 }, { "epoch": 4.143704257706074, "grad_norm": 5.562508583068848, "learning_rate": 7.762996252517518e-07, "loss": 0.0083, "step": 387830 }, { "epoch": 4.143811101020353, "grad_norm": 0.048902563750743866, "learning_rate": 7.762856224684333e-07, "loss": 0.0035, "step": 387840 }, { "epoch": 4.143917944334634, "grad_norm": 6.369502544403076, "learning_rate": 7.762716193731675e-07, "loss": 0.0114, "step": 387850 }, { "epoch": 4.144024787648913, "grad_norm": 0.5490126609802246, "learning_rate": 7.762576159659703e-07, "loss": 0.0033, "step": 387860 }, { "epoch": 4.144131630963193, "grad_norm": 0.6964976191520691, "learning_rate": 7.762436122468572e-07, "loss": 0.0013, "step": 387870 }, { "epoch": 4.144238474277472, "grad_norm": 0.0013491734862327576, "learning_rate": 7.762296082158443e-07, "loss": 0.0076, "step": 387880 }, { "epoch": 4.1443453175917515, "grad_norm": 0.7598238587379456, "learning_rate": 7.762156038729474e-07, "loss": 0.0034, "step": 387890 }, { "epoch": 4.144452160906031, "grad_norm": 0.004339061211794615, "learning_rate": 7.762015992181821e-07, "loss": 0.0077, "step": 387900 }, { "epoch": 4.144559004220311, "grad_norm": 0.0030431027989834547, "learning_rate": 7.761875942515644e-07, "loss": 0.0025, "step": 387910 }, { "epoch": 4.144665847534591, "grad_norm": 0.06998420506715775, "learning_rate": 7.7617358897311e-07, "loss": 0.0044, "step": 387920 }, { "epoch": 4.14477269084887, "grad_norm": 6.511172294616699, "learning_rate": 7.76159583382835e-07, "loss": 0.0174, "step": 387930 }, { "epoch": 4.14487953416315, "grad_norm": 0.002906889421865344, "learning_rate": 7.761455774807549e-07, "loss": 0.0028, "step": 387940 }, { "epoch": 4.144986377477429, "grad_norm": 0.5613696575164795, "learning_rate": 7.761315712668854e-07, "loss": 0.0009, "step": 387950 }, { "epoch": 4.145093220791709, "grad_norm": 0.00786333717405796, "learning_rate": 7.761175647412426e-07, "loss": 0.0027, "step": 387960 }, { "epoch": 4.145200064105989, "grad_norm": 7.202997207641602, "learning_rate": 7.761035579038424e-07, "loss": 0.014, "step": 387970 }, { "epoch": 4.145306907420268, "grad_norm": 1.6425808668136597, "learning_rate": 7.760895507547003e-07, "loss": 0.0032, "step": 387980 }, { "epoch": 4.145413750734548, "grad_norm": 5.840240478515625, "learning_rate": 7.760755432938322e-07, "loss": 0.0143, "step": 387990 }, { "epoch": 4.145520594048827, "grad_norm": 0.002568264491856098, "learning_rate": 7.760615355212541e-07, "loss": 0.002, "step": 388000 }, { "epoch": 4.145627437363107, "grad_norm": 6.396437168121338, "learning_rate": 7.760475274369816e-07, "loss": 0.0136, "step": 388010 }, { "epoch": 4.145734280677386, "grad_norm": 0.2209566831588745, "learning_rate": 7.760335190410308e-07, "loss": 0.0013, "step": 388020 }, { "epoch": 4.145841123991667, "grad_norm": 5.837679862976074, "learning_rate": 7.760195103334172e-07, "loss": 0.0077, "step": 388030 }, { "epoch": 4.145947967305946, "grad_norm": 0.9468622803688049, "learning_rate": 7.760055013141567e-07, "loss": 0.0019, "step": 388040 }, { "epoch": 4.1460548106202255, "grad_norm": 1.1181635856628418, "learning_rate": 7.759914919832651e-07, "loss": 0.0116, "step": 388050 }, { "epoch": 4.146161653934505, "grad_norm": 0.0011253765551373363, "learning_rate": 7.759774823407584e-07, "loss": 0.0085, "step": 388060 }, { "epoch": 4.146268497248784, "grad_norm": 0.29124143719673157, "learning_rate": 7.759634723866521e-07, "loss": 0.0018, "step": 388070 }, { "epoch": 4.146375340563064, "grad_norm": 0.01073081512004137, "learning_rate": 7.759494621209626e-07, "loss": 0.0005, "step": 388080 }, { "epoch": 4.146482183877344, "grad_norm": 0.0018994045676663518, "learning_rate": 7.759354515437051e-07, "loss": 0.0193, "step": 388090 }, { "epoch": 4.146589027191624, "grad_norm": 14.362333297729492, "learning_rate": 7.759214406548955e-07, "loss": 0.0095, "step": 388100 }, { "epoch": 4.146695870505903, "grad_norm": 0.0028509204275906086, "learning_rate": 7.7590742945455e-07, "loss": 0.0052, "step": 388110 }, { "epoch": 4.146802713820183, "grad_norm": 5.3567681312561035, "learning_rate": 7.75893417942684e-07, "loss": 0.0015, "step": 388120 }, { "epoch": 4.146909557134462, "grad_norm": 1.4662606716156006, "learning_rate": 7.758794061193137e-07, "loss": 0.0012, "step": 388130 }, { "epoch": 4.1470164004487415, "grad_norm": 8.356847763061523, "learning_rate": 7.758653939844546e-07, "loss": 0.0052, "step": 388140 }, { "epoch": 4.147123243763022, "grad_norm": 0.18857154250144958, "learning_rate": 7.758513815381227e-07, "loss": 0.0024, "step": 388150 }, { "epoch": 4.147230087077301, "grad_norm": 1.7914644479751587, "learning_rate": 7.758373687803336e-07, "loss": 0.0055, "step": 388160 }, { "epoch": 4.147336930391581, "grad_norm": 0.0052203889936208725, "learning_rate": 7.758233557111036e-07, "loss": 0.001, "step": 388170 }, { "epoch": 4.14744377370586, "grad_norm": 13.168390274047852, "learning_rate": 7.75809342330448e-07, "loss": 0.0172, "step": 388180 }, { "epoch": 4.14755061702014, "grad_norm": 0.10288525372743607, "learning_rate": 7.757953286383829e-07, "loss": 0.0109, "step": 388190 }, { "epoch": 4.147657460334419, "grad_norm": 0.02068881131708622, "learning_rate": 7.757813146349242e-07, "loss": 0.0094, "step": 388200 }, { "epoch": 4.1477643036486995, "grad_norm": 0.07077136635780334, "learning_rate": 7.757673003200873e-07, "loss": 0.0043, "step": 388210 }, { "epoch": 4.147871146962979, "grad_norm": 0.0032367513049393892, "learning_rate": 7.757532856938885e-07, "loss": 0.0099, "step": 388220 }, { "epoch": 4.147977990277258, "grad_norm": 3.2796473503112793, "learning_rate": 7.757392707563434e-07, "loss": 0.0115, "step": 388230 }, { "epoch": 4.148084833591538, "grad_norm": 12.496603965759277, "learning_rate": 7.757252555074678e-07, "loss": 0.1243, "step": 388240 }, { "epoch": 4.148191676905817, "grad_norm": 3.176250696182251, "learning_rate": 7.757112399472777e-07, "loss": 0.0048, "step": 388250 }, { "epoch": 4.148298520220097, "grad_norm": 0.03296459838747978, "learning_rate": 7.756972240757886e-07, "loss": 0.0011, "step": 388260 }, { "epoch": 4.148405363534377, "grad_norm": 2.5292809009552, "learning_rate": 7.756832078930167e-07, "loss": 0.0032, "step": 388270 }, { "epoch": 4.148512206848657, "grad_norm": 1.1571751832962036, "learning_rate": 7.756691913989775e-07, "loss": 0.0027, "step": 388280 }, { "epoch": 4.148619050162936, "grad_norm": 0.6861971020698547, "learning_rate": 7.756551745936872e-07, "loss": 0.034, "step": 388290 }, { "epoch": 4.1487258934772155, "grad_norm": 0.04398934915661812, "learning_rate": 7.756411574771611e-07, "loss": 0.0215, "step": 388300 }, { "epoch": 4.148832736791495, "grad_norm": 0.0011351876892149448, "learning_rate": 7.756271400494155e-07, "loss": 0.0013, "step": 388310 }, { "epoch": 4.1489395801057745, "grad_norm": 0.013726383447647095, "learning_rate": 7.75613122310466e-07, "loss": 0.0098, "step": 388320 }, { "epoch": 4.149046423420055, "grad_norm": 1.0359381437301636, "learning_rate": 7.755991042603287e-07, "loss": 0.0441, "step": 388330 }, { "epoch": 4.149153266734334, "grad_norm": 2.812675952911377, "learning_rate": 7.755850858990189e-07, "loss": 0.013, "step": 388340 }, { "epoch": 4.149260110048614, "grad_norm": 0.24591632187366486, "learning_rate": 7.755710672265529e-07, "loss": 0.0561, "step": 388350 }, { "epoch": 4.149366953362893, "grad_norm": 0.001153674442321062, "learning_rate": 7.755570482429464e-07, "loss": 0.0102, "step": 388360 }, { "epoch": 4.149473796677173, "grad_norm": 0.19160130620002747, "learning_rate": 7.755430289482152e-07, "loss": 0.0043, "step": 388370 }, { "epoch": 4.149580639991452, "grad_norm": 0.29166141152381897, "learning_rate": 7.75529009342375e-07, "loss": 0.0066, "step": 388380 }, { "epoch": 4.1496874833057324, "grad_norm": 0.00865674577653408, "learning_rate": 7.755149894254418e-07, "loss": 0.0074, "step": 388390 }, { "epoch": 4.149794326620012, "grad_norm": 0.015005115419626236, "learning_rate": 7.755009691974315e-07, "loss": 0.027, "step": 388400 }, { "epoch": 4.149901169934291, "grad_norm": 0.12515130639076233, "learning_rate": 7.754869486583598e-07, "loss": 0.0054, "step": 388410 }, { "epoch": 4.150008013248571, "grad_norm": 0.21904082596302032, "learning_rate": 7.754729278082424e-07, "loss": 0.0144, "step": 388420 }, { "epoch": 4.15011485656285, "grad_norm": 0.12368432432413101, "learning_rate": 7.754589066470953e-07, "loss": 0.0004, "step": 388430 }, { "epoch": 4.150221699877131, "grad_norm": 0.33175820112228394, "learning_rate": 7.754448851749342e-07, "loss": 0.0001, "step": 388440 }, { "epoch": 4.15032854319141, "grad_norm": 1.833932876586914, "learning_rate": 7.754308633917752e-07, "loss": 0.006, "step": 388450 }, { "epoch": 4.1504353865056896, "grad_norm": 12.05770206451416, "learning_rate": 7.754168412976341e-07, "loss": 0.0414, "step": 388460 }, { "epoch": 4.150542229819969, "grad_norm": 0.7143535614013672, "learning_rate": 7.754028188925263e-07, "loss": 0.0136, "step": 388470 }, { "epoch": 4.1506490731342485, "grad_norm": 0.014403700828552246, "learning_rate": 7.753887961764681e-07, "loss": 0.0075, "step": 388480 }, { "epoch": 4.150755916448528, "grad_norm": 0.011280731298029423, "learning_rate": 7.753747731494752e-07, "loss": 0.0099, "step": 388490 }, { "epoch": 4.150862759762807, "grad_norm": 0.008351854048669338, "learning_rate": 7.753607498115633e-07, "loss": 0.0005, "step": 388500 }, { "epoch": 4.150969603077088, "grad_norm": 15.45649242401123, "learning_rate": 7.753467261627484e-07, "loss": 0.0314, "step": 388510 }, { "epoch": 4.151076446391367, "grad_norm": 2.730485677719116, "learning_rate": 7.753327022030461e-07, "loss": 0.0185, "step": 388520 }, { "epoch": 4.151183289705647, "grad_norm": 0.0013943352969363332, "learning_rate": 7.753186779324726e-07, "loss": 0.0013, "step": 388530 }, { "epoch": 4.151290133019926, "grad_norm": 0.020388031378388405, "learning_rate": 7.753046533510435e-07, "loss": 0.0014, "step": 388540 }, { "epoch": 4.151396976334206, "grad_norm": 0.11054588109254837, "learning_rate": 7.752906284587745e-07, "loss": 0.0008, "step": 388550 }, { "epoch": 4.151503819648486, "grad_norm": 0.4982149302959442, "learning_rate": 7.752766032556819e-07, "loss": 0.0016, "step": 388560 }, { "epoch": 4.151610662962765, "grad_norm": 0.027032190933823586, "learning_rate": 7.752625777417811e-07, "loss": 0.0233, "step": 388570 }, { "epoch": 4.151717506277045, "grad_norm": 0.0032112940680235624, "learning_rate": 7.752485519170879e-07, "loss": 0.0019, "step": 388580 }, { "epoch": 4.151824349591324, "grad_norm": 0.4237976670265198, "learning_rate": 7.752345257816185e-07, "loss": 0.0023, "step": 388590 }, { "epoch": 4.151931192905604, "grad_norm": 0.03553835302591324, "learning_rate": 7.752204993353886e-07, "loss": 0.0007, "step": 388600 }, { "epoch": 4.152038036219883, "grad_norm": 0.0022855070419609547, "learning_rate": 7.752064725784138e-07, "loss": 0.0024, "step": 388610 }, { "epoch": 4.152144879534164, "grad_norm": 0.1190270334482193, "learning_rate": 7.751924455107102e-07, "loss": 0.0138, "step": 388620 }, { "epoch": 4.152251722848443, "grad_norm": 0.03522034361958504, "learning_rate": 7.751784181322937e-07, "loss": 0.0029, "step": 388630 }, { "epoch": 4.1523585661627225, "grad_norm": 0.22345119714736938, "learning_rate": 7.751643904431797e-07, "loss": 0.0142, "step": 388640 }, { "epoch": 4.152465409477002, "grad_norm": 0.019552772864699364, "learning_rate": 7.751503624433846e-07, "loss": 0.0279, "step": 388650 }, { "epoch": 4.152572252791281, "grad_norm": 3.6634232997894287, "learning_rate": 7.75136334132924e-07, "loss": 0.0042, "step": 388660 }, { "epoch": 4.152679096105561, "grad_norm": 0.08685941994190216, "learning_rate": 7.751223055118134e-07, "loss": 0.0064, "step": 388670 }, { "epoch": 4.152785939419841, "grad_norm": 0.015430048108100891, "learning_rate": 7.751082765800692e-07, "loss": 0.0012, "step": 388680 }, { "epoch": 4.152892782734121, "grad_norm": 0.021973710507154465, "learning_rate": 7.750942473377069e-07, "loss": 0.0225, "step": 388690 }, { "epoch": 4.1529996260484, "grad_norm": 0.000998105970211327, "learning_rate": 7.750802177847425e-07, "loss": 0.0009, "step": 388700 }, { "epoch": 4.15310646936268, "grad_norm": 1.1075459718704224, "learning_rate": 7.750661879211918e-07, "loss": 0.0033, "step": 388710 }, { "epoch": 4.153213312676959, "grad_norm": 0.08442464470863342, "learning_rate": 7.750521577470706e-07, "loss": 0.0083, "step": 388720 }, { "epoch": 4.1533201559912385, "grad_norm": 0.0011492717312648892, "learning_rate": 7.750381272623947e-07, "loss": 0.006, "step": 388730 }, { "epoch": 4.153426999305519, "grad_norm": 2.86645770072937, "learning_rate": 7.750240964671799e-07, "loss": 0.0108, "step": 388740 }, { "epoch": 4.153533842619798, "grad_norm": 1.4974087476730347, "learning_rate": 7.750100653614423e-07, "loss": 0.0035, "step": 388750 }, { "epoch": 4.153640685934078, "grad_norm": 0.06585683673620224, "learning_rate": 7.749960339451975e-07, "loss": 0.0094, "step": 388760 }, { "epoch": 4.153747529248357, "grad_norm": 0.006994067225605249, "learning_rate": 7.749820022184615e-07, "loss": 0.0271, "step": 388770 }, { "epoch": 4.153854372562637, "grad_norm": 0.0009374776273034513, "learning_rate": 7.7496797018125e-07, "loss": 0.016, "step": 388780 }, { "epoch": 4.153961215876916, "grad_norm": 0.005721129477024078, "learning_rate": 7.74953937833579e-07, "loss": 0.0136, "step": 388790 }, { "epoch": 4.1540680591911965, "grad_norm": 8.005341529846191, "learning_rate": 7.749399051754641e-07, "loss": 0.0149, "step": 388800 }, { "epoch": 4.154174902505476, "grad_norm": 0.18325923383235931, "learning_rate": 7.749258722069214e-07, "loss": 0.0007, "step": 388810 }, { "epoch": 4.154281745819755, "grad_norm": 0.28721413016319275, "learning_rate": 7.749118389279666e-07, "loss": 0.0006, "step": 388820 }, { "epoch": 4.154388589134035, "grad_norm": 0.5233587026596069, "learning_rate": 7.748978053386156e-07, "loss": 0.018, "step": 388830 }, { "epoch": 4.154495432448314, "grad_norm": 0.04257708787918091, "learning_rate": 7.748837714388842e-07, "loss": 0.0064, "step": 388840 }, { "epoch": 4.154602275762594, "grad_norm": 6.544544696807861, "learning_rate": 7.748697372287884e-07, "loss": 0.0099, "step": 388850 }, { "epoch": 4.154709119076874, "grad_norm": 1.7891234159469604, "learning_rate": 7.748557027083438e-07, "loss": 0.0101, "step": 388860 }, { "epoch": 4.154815962391154, "grad_norm": 0.03705986589193344, "learning_rate": 7.748416678775663e-07, "loss": 0.0008, "step": 388870 }, { "epoch": 4.154922805705433, "grad_norm": 0.03253403306007385, "learning_rate": 7.74827632736472e-07, "loss": 0.0002, "step": 388880 }, { "epoch": 4.1550296490197125, "grad_norm": 1.2111743688583374, "learning_rate": 7.748135972850764e-07, "loss": 0.0027, "step": 388890 }, { "epoch": 4.155136492333992, "grad_norm": 0.9086251258850098, "learning_rate": 7.747995615233955e-07, "loss": 0.0143, "step": 388900 }, { "epoch": 4.155243335648271, "grad_norm": 1.1651403903961182, "learning_rate": 7.747855254514453e-07, "loss": 0.0102, "step": 388910 }, { "epoch": 4.155350178962552, "grad_norm": 3.856397867202759, "learning_rate": 7.747714890692413e-07, "loss": 0.0201, "step": 388920 }, { "epoch": 4.155457022276831, "grad_norm": 3.8619837760925293, "learning_rate": 7.747574523767998e-07, "loss": 0.0067, "step": 388930 }, { "epoch": 4.155563865591111, "grad_norm": 0.03558740019798279, "learning_rate": 7.747434153741364e-07, "loss": 0.0075, "step": 388940 }, { "epoch": 4.15567070890539, "grad_norm": 0.730996310710907, "learning_rate": 7.747293780612667e-07, "loss": 0.0139, "step": 388950 }, { "epoch": 4.15577755221967, "grad_norm": 2.030632972717285, "learning_rate": 7.74715340438207e-07, "loss": 0.018, "step": 388960 }, { "epoch": 4.155884395533949, "grad_norm": 0.0053648692555725574, "learning_rate": 7.747013025049729e-07, "loss": 0.0162, "step": 388970 }, { "epoch": 4.155991238848229, "grad_norm": 0.6336309909820557, "learning_rate": 7.746872642615802e-07, "loss": 0.0229, "step": 388980 }, { "epoch": 4.156098082162509, "grad_norm": 5.339346885681152, "learning_rate": 7.746732257080449e-07, "loss": 0.0037, "step": 388990 }, { "epoch": 4.156204925476788, "grad_norm": 0.0241119135171175, "learning_rate": 7.746591868443829e-07, "loss": 0.0078, "step": 389000 }, { "epoch": 4.156311768791068, "grad_norm": 0.004151004366576672, "learning_rate": 7.746451476706097e-07, "loss": 0.0129, "step": 389010 }, { "epoch": 4.156418612105347, "grad_norm": 0.020094454288482666, "learning_rate": 7.746311081867417e-07, "loss": 0.0239, "step": 389020 }, { "epoch": 4.156525455419627, "grad_norm": 0.6110495328903198, "learning_rate": 7.746170683927943e-07, "loss": 0.0162, "step": 389030 }, { "epoch": 4.156632298733907, "grad_norm": 2.435415029525757, "learning_rate": 7.746030282887834e-07, "loss": 0.0099, "step": 389040 }, { "epoch": 4.1567391420481865, "grad_norm": 0.6925145387649536, "learning_rate": 7.745889878747251e-07, "loss": 0.0055, "step": 389050 }, { "epoch": 4.156845985362466, "grad_norm": 4.108834266662598, "learning_rate": 7.745749471506352e-07, "loss": 0.0077, "step": 389060 }, { "epoch": 4.156952828676745, "grad_norm": 0.17533031105995178, "learning_rate": 7.745609061165292e-07, "loss": 0.0262, "step": 389070 }, { "epoch": 4.157059671991025, "grad_norm": 0.007235516794025898, "learning_rate": 7.745468647724234e-07, "loss": 0.0014, "step": 389080 }, { "epoch": 4.157166515305304, "grad_norm": 0.02593069337308407, "learning_rate": 7.745328231183335e-07, "loss": 0.0192, "step": 389090 }, { "epoch": 4.157273358619585, "grad_norm": 0.04013986140489578, "learning_rate": 7.745187811542753e-07, "loss": 0.0047, "step": 389100 }, { "epoch": 4.157380201933864, "grad_norm": 0.014674266800284386, "learning_rate": 7.745047388802646e-07, "loss": 0.0015, "step": 389110 }, { "epoch": 4.157487045248144, "grad_norm": 0.002647968241944909, "learning_rate": 7.744906962963174e-07, "loss": 0.0157, "step": 389120 }, { "epoch": 4.157593888562423, "grad_norm": 0.001666344585828483, "learning_rate": 7.744766534024494e-07, "loss": 0.01, "step": 389130 }, { "epoch": 4.1577007318767025, "grad_norm": 0.181461900472641, "learning_rate": 7.744626101986766e-07, "loss": 0.0012, "step": 389140 }, { "epoch": 4.157807575190983, "grad_norm": 0.1283847540616989, "learning_rate": 7.744485666850146e-07, "loss": 0.0221, "step": 389150 }, { "epoch": 4.157914418505262, "grad_norm": 0.007540966384112835, "learning_rate": 7.7443452286148e-07, "loss": 0.0088, "step": 389160 }, { "epoch": 4.158021261819542, "grad_norm": 0.014302415773272514, "learning_rate": 7.744204787280877e-07, "loss": 0.0004, "step": 389170 }, { "epoch": 4.158128105133821, "grad_norm": 0.011224103160202503, "learning_rate": 7.74406434284854e-07, "loss": 0.0164, "step": 389180 }, { "epoch": 4.158234948448101, "grad_norm": 0.0053152041509747505, "learning_rate": 7.743923895317948e-07, "loss": 0.0107, "step": 389190 }, { "epoch": 4.15834179176238, "grad_norm": 2.5112805366516113, "learning_rate": 7.743783444689256e-07, "loss": 0.0115, "step": 389200 }, { "epoch": 4.15844863507666, "grad_norm": 0.02741727977991104, "learning_rate": 7.743642990962629e-07, "loss": 0.0213, "step": 389210 }, { "epoch": 4.15855547839094, "grad_norm": 0.015428871847689152, "learning_rate": 7.74350253413822e-07, "loss": 0.0059, "step": 389220 }, { "epoch": 4.158662321705219, "grad_norm": 0.04365978017449379, "learning_rate": 7.743362074216191e-07, "loss": 0.004, "step": 389230 }, { "epoch": 4.158769165019499, "grad_norm": 0.00492242444306612, "learning_rate": 7.743221611196697e-07, "loss": 0.037, "step": 389240 }, { "epoch": 4.158876008333778, "grad_norm": 0.009059732779860497, "learning_rate": 7.743081145079902e-07, "loss": 0.0058, "step": 389250 }, { "epoch": 4.158982851648058, "grad_norm": 0.798234760761261, "learning_rate": 7.742940675865958e-07, "loss": 0.0019, "step": 389260 }, { "epoch": 4.159089694962338, "grad_norm": 0.09056656807661057, "learning_rate": 7.742800203555029e-07, "loss": 0.0202, "step": 389270 }, { "epoch": 4.159196538276618, "grad_norm": 0.6084890365600586, "learning_rate": 7.742659728147272e-07, "loss": 0.0009, "step": 389280 }, { "epoch": 4.159303381590897, "grad_norm": 4.033302307128906, "learning_rate": 7.742519249642843e-07, "loss": 0.0043, "step": 389290 }, { "epoch": 4.1594102249051765, "grad_norm": 0.12356974184513092, "learning_rate": 7.742378768041906e-07, "loss": 0.014, "step": 389300 }, { "epoch": 4.159517068219456, "grad_norm": 0.003054507542401552, "learning_rate": 7.742238283344613e-07, "loss": 0.0199, "step": 389310 }, { "epoch": 4.1596239115337355, "grad_norm": 1.590027928352356, "learning_rate": 7.742097795551128e-07, "loss": 0.0092, "step": 389320 }, { "epoch": 4.159730754848016, "grad_norm": 1.3950474262237549, "learning_rate": 7.741957304661607e-07, "loss": 0.0093, "step": 389330 }, { "epoch": 4.159837598162295, "grad_norm": 0.006667537149041891, "learning_rate": 7.741816810676209e-07, "loss": 0.0087, "step": 389340 }, { "epoch": 4.159944441476575, "grad_norm": 0.0014329556142911315, "learning_rate": 7.741676313595093e-07, "loss": 0.0046, "step": 389350 }, { "epoch": 4.160051284790854, "grad_norm": 0.03285893052816391, "learning_rate": 7.741535813418419e-07, "loss": 0.0113, "step": 389360 }, { "epoch": 4.160158128105134, "grad_norm": 0.4821758270263672, "learning_rate": 7.741395310146343e-07, "loss": 0.073, "step": 389370 }, { "epoch": 4.160264971419413, "grad_norm": 2.201500177383423, "learning_rate": 7.741254803779024e-07, "loss": 0.0018, "step": 389380 }, { "epoch": 4.160371814733693, "grad_norm": 0.004054070916026831, "learning_rate": 7.741114294316622e-07, "loss": 0.0173, "step": 389390 }, { "epoch": 4.160478658047973, "grad_norm": 0.7217652201652527, "learning_rate": 7.740973781759295e-07, "loss": 0.0016, "step": 389400 }, { "epoch": 4.160585501362252, "grad_norm": 0.005376295652240515, "learning_rate": 7.740833266107202e-07, "loss": 0.0226, "step": 389410 }, { "epoch": 4.160692344676532, "grad_norm": 5.7794880867004395, "learning_rate": 7.740692747360502e-07, "loss": 0.0237, "step": 389420 }, { "epoch": 4.160799187990811, "grad_norm": 0.7947291731834412, "learning_rate": 7.740552225519353e-07, "loss": 0.0019, "step": 389430 }, { "epoch": 4.160906031305091, "grad_norm": 13.835132598876953, "learning_rate": 7.740411700583912e-07, "loss": 0.0108, "step": 389440 }, { "epoch": 4.161012874619371, "grad_norm": 0.0023551625199615955, "learning_rate": 7.74027117255434e-07, "loss": 0.009, "step": 389450 }, { "epoch": 4.1611197179336505, "grad_norm": 12.296243667602539, "learning_rate": 7.740130641430796e-07, "loss": 0.0058, "step": 389460 }, { "epoch": 4.16122656124793, "grad_norm": 0.03083288110792637, "learning_rate": 7.739990107213437e-07, "loss": 0.0119, "step": 389470 }, { "epoch": 4.1613334045622095, "grad_norm": 7.270852565765381, "learning_rate": 7.739849569902424e-07, "loss": 0.0238, "step": 389480 }, { "epoch": 4.161440247876489, "grad_norm": 0.007546029053628445, "learning_rate": 7.739709029497911e-07, "loss": 0.0105, "step": 389490 }, { "epoch": 4.161547091190768, "grad_norm": 3.0070137977600098, "learning_rate": 7.739568486000061e-07, "loss": 0.0074, "step": 389500 }, { "epoch": 4.161653934505049, "grad_norm": 0.014444150030612946, "learning_rate": 7.739427939409032e-07, "loss": 0.0033, "step": 389510 }, { "epoch": 4.161760777819328, "grad_norm": 1.5912736654281616, "learning_rate": 7.739287389724982e-07, "loss": 0.0156, "step": 389520 }, { "epoch": 4.161867621133608, "grad_norm": 0.0034720315597951412, "learning_rate": 7.73914683694807e-07, "loss": 0.0209, "step": 389530 }, { "epoch": 4.161974464447887, "grad_norm": 0.016454432159662247, "learning_rate": 7.739006281078454e-07, "loss": 0.0035, "step": 389540 }, { "epoch": 4.162081307762167, "grad_norm": 0.005439100321382284, "learning_rate": 7.738865722116292e-07, "loss": 0.0439, "step": 389550 }, { "epoch": 4.162188151076446, "grad_norm": 0.0005242867628112435, "learning_rate": 7.738725160061745e-07, "loss": 0.0068, "step": 389560 }, { "epoch": 4.162294994390726, "grad_norm": 0.009789466857910156, "learning_rate": 7.738584594914972e-07, "loss": 0.0116, "step": 389570 }, { "epoch": 4.162401837705006, "grad_norm": 1.925093650817871, "learning_rate": 7.738444026676129e-07, "loss": 0.0555, "step": 389580 }, { "epoch": 4.162508681019285, "grad_norm": 0.012590419501066208, "learning_rate": 7.738303455345376e-07, "loss": 0.0027, "step": 389590 }, { "epoch": 4.162615524333565, "grad_norm": 5.075921058654785, "learning_rate": 7.738162880922871e-07, "loss": 0.0093, "step": 389600 }, { "epoch": 4.162722367647844, "grad_norm": 0.008471580222249031, "learning_rate": 7.738022303408776e-07, "loss": 0.0091, "step": 389610 }, { "epoch": 4.162829210962124, "grad_norm": 0.010531662032008171, "learning_rate": 7.737881722803246e-07, "loss": 0.0517, "step": 389620 }, { "epoch": 4.162936054276404, "grad_norm": 0.13255958259105682, "learning_rate": 7.73774113910644e-07, "loss": 0.0008, "step": 389630 }, { "epoch": 4.1630428975906835, "grad_norm": 0.2159881591796875, "learning_rate": 7.737600552318517e-07, "loss": 0.0055, "step": 389640 }, { "epoch": 4.163149740904963, "grad_norm": 0.00041685503674671054, "learning_rate": 7.737459962439638e-07, "loss": 0.009, "step": 389650 }, { "epoch": 4.163256584219242, "grad_norm": 0.03395326808094978, "learning_rate": 7.737319369469959e-07, "loss": 0.0139, "step": 389660 }, { "epoch": 4.163363427533522, "grad_norm": 0.004202910698950291, "learning_rate": 7.737178773409641e-07, "loss": 0.0146, "step": 389670 }, { "epoch": 4.163470270847801, "grad_norm": 0.02140391245484352, "learning_rate": 7.73703817425884e-07, "loss": 0.0006, "step": 389680 }, { "epoch": 4.163577114162082, "grad_norm": 0.005537928082048893, "learning_rate": 7.736897572017716e-07, "loss": 0.0024, "step": 389690 }, { "epoch": 4.163683957476361, "grad_norm": 0.06190888583660126, "learning_rate": 7.736756966686431e-07, "loss": 0.0007, "step": 389700 }, { "epoch": 4.163790800790641, "grad_norm": 0.005128059070557356, "learning_rate": 7.736616358265137e-07, "loss": 0.0006, "step": 389710 }, { "epoch": 4.16389764410492, "grad_norm": 11.798887252807617, "learning_rate": 7.736475746753999e-07, "loss": 0.0051, "step": 389720 }, { "epoch": 4.1640044874191995, "grad_norm": 0.007571364287286997, "learning_rate": 7.736335132153174e-07, "loss": 0.01, "step": 389730 }, { "epoch": 4.164111330733479, "grad_norm": 8.780036926269531, "learning_rate": 7.736194514462819e-07, "loss": 0.0094, "step": 389740 }, { "epoch": 4.164218174047759, "grad_norm": 0.03612441569566727, "learning_rate": 7.736053893683095e-07, "loss": 0.0254, "step": 389750 }, { "epoch": 4.164325017362039, "grad_norm": 0.2267012596130371, "learning_rate": 7.735913269814159e-07, "loss": 0.0053, "step": 389760 }, { "epoch": 4.164431860676318, "grad_norm": 0.06677711755037308, "learning_rate": 7.735772642856169e-07, "loss": 0.0036, "step": 389770 }, { "epoch": 4.164538703990598, "grad_norm": 6.428055286407471, "learning_rate": 7.735632012809286e-07, "loss": 0.0061, "step": 389780 }, { "epoch": 4.164645547304877, "grad_norm": 0.018810875713825226, "learning_rate": 7.735491379673668e-07, "loss": 0.0571, "step": 389790 }, { "epoch": 4.164752390619157, "grad_norm": 0.026959672570228577, "learning_rate": 7.735350743449476e-07, "loss": 0.0129, "step": 389800 }, { "epoch": 4.164859233933437, "grad_norm": 4.317539215087891, "learning_rate": 7.735210104136863e-07, "loss": 0.0061, "step": 389810 }, { "epoch": 4.164966077247716, "grad_norm": 0.02492310106754303, "learning_rate": 7.735069461735994e-07, "loss": 0.0128, "step": 389820 }, { "epoch": 4.165072920561996, "grad_norm": 0.04674902558326721, "learning_rate": 7.734928816247024e-07, "loss": 0.0034, "step": 389830 }, { "epoch": 4.165179763876275, "grad_norm": 5.2087788581848145, "learning_rate": 7.734788167670112e-07, "loss": 0.0171, "step": 389840 }, { "epoch": 4.165286607190555, "grad_norm": 7.824026584625244, "learning_rate": 7.734647516005419e-07, "loss": 0.0114, "step": 389850 }, { "epoch": 4.165393450504835, "grad_norm": 0.8840993642807007, "learning_rate": 7.734506861253103e-07, "loss": 0.0052, "step": 389860 }, { "epoch": 4.165500293819115, "grad_norm": 0.0006214127060957253, "learning_rate": 7.734366203413321e-07, "loss": 0.0022, "step": 389870 }, { "epoch": 4.165607137133394, "grad_norm": 0.2949361801147461, "learning_rate": 7.734225542486235e-07, "loss": 0.0129, "step": 389880 }, { "epoch": 4.1657139804476735, "grad_norm": 0.43534380197525024, "learning_rate": 7.734084878472001e-07, "loss": 0.0058, "step": 389890 }, { "epoch": 4.165820823761953, "grad_norm": 0.0004212819621898234, "learning_rate": 7.733944211370777e-07, "loss": 0.0061, "step": 389900 }, { "epoch": 4.165927667076232, "grad_norm": 11.120471000671387, "learning_rate": 7.733803541182727e-07, "loss": 0.0213, "step": 389910 }, { "epoch": 4.166034510390512, "grad_norm": 0.044979970902204514, "learning_rate": 7.733662867908004e-07, "loss": 0.0032, "step": 389920 }, { "epoch": 4.166141353704792, "grad_norm": 0.6740307211875916, "learning_rate": 7.733522191546771e-07, "loss": 0.0017, "step": 389930 }, { "epoch": 4.166248197019072, "grad_norm": 0.0014777191681787372, "learning_rate": 7.733381512099185e-07, "loss": 0.0076, "step": 389940 }, { "epoch": 4.166355040333351, "grad_norm": 0.02702932059764862, "learning_rate": 7.733240829565404e-07, "loss": 0.0045, "step": 389950 }, { "epoch": 4.166461883647631, "grad_norm": 3.8205156326293945, "learning_rate": 7.733100143945588e-07, "loss": 0.0101, "step": 389960 }, { "epoch": 4.16656872696191, "grad_norm": 0.013480841182172298, "learning_rate": 7.732959455239896e-07, "loss": 0.0085, "step": 389970 }, { "epoch": 4.16667557027619, "grad_norm": 6.1093010902404785, "learning_rate": 7.732818763448486e-07, "loss": 0.0039, "step": 389980 }, { "epoch": 4.16678241359047, "grad_norm": 0.07921923696994781, "learning_rate": 7.732678068571518e-07, "loss": 0.0046, "step": 389990 }, { "epoch": 4.166889256904749, "grad_norm": 0.0033662966452538967, "learning_rate": 7.732537370609149e-07, "loss": 0.0206, "step": 390000 }, { "epoch": 4.166996100219029, "grad_norm": 0.01341188233345747, "learning_rate": 7.73239666956154e-07, "loss": 0.0072, "step": 390010 }, { "epoch": 4.167102943533308, "grad_norm": 4.783201217651367, "learning_rate": 7.732255965428849e-07, "loss": 0.0164, "step": 390020 }, { "epoch": 4.167209786847588, "grad_norm": 7.035828113555908, "learning_rate": 7.732115258211236e-07, "loss": 0.0272, "step": 390030 }, { "epoch": 4.167316630161868, "grad_norm": 0.7344533205032349, "learning_rate": 7.731974547908856e-07, "loss": 0.01, "step": 390040 }, { "epoch": 4.1674234734761475, "grad_norm": 0.03878195583820343, "learning_rate": 7.731833834521873e-07, "loss": 0.0155, "step": 390050 }, { "epoch": 4.167530316790427, "grad_norm": 0.0013571757590398192, "learning_rate": 7.731693118050442e-07, "loss": 0.0082, "step": 390060 }, { "epoch": 4.167637160104706, "grad_norm": 0.09795167297124863, "learning_rate": 7.731552398494723e-07, "loss": 0.0004, "step": 390070 }, { "epoch": 4.167744003418986, "grad_norm": 7.204690933227539, "learning_rate": 7.731411675854876e-07, "loss": 0.0176, "step": 390080 }, { "epoch": 4.167850846733265, "grad_norm": 0.6732351779937744, "learning_rate": 7.731270950131059e-07, "loss": 0.0254, "step": 390090 }, { "epoch": 4.167957690047546, "grad_norm": 0.48540374636650085, "learning_rate": 7.73113022132343e-07, "loss": 0.0004, "step": 390100 }, { "epoch": 4.168064533361825, "grad_norm": 0.5312634110450745, "learning_rate": 7.730989489432152e-07, "loss": 0.0015, "step": 390110 }, { "epoch": 4.168171376676105, "grad_norm": 0.06471157819032669, "learning_rate": 7.730848754457378e-07, "loss": 0.0017, "step": 390120 }, { "epoch": 4.168278219990384, "grad_norm": 0.02719508484005928, "learning_rate": 7.73070801639927e-07, "loss": 0.0034, "step": 390130 }, { "epoch": 4.1683850633046635, "grad_norm": 0.2135365754365921, "learning_rate": 7.730567275257987e-07, "loss": 0.0093, "step": 390140 }, { "epoch": 4.168491906618943, "grad_norm": 0.03920285403728485, "learning_rate": 7.730426531033686e-07, "loss": 0.0017, "step": 390150 }, { "epoch": 4.168598749933223, "grad_norm": 0.0036338369827717543, "learning_rate": 7.730285783726527e-07, "loss": 0.0041, "step": 390160 }, { "epoch": 4.168705593247503, "grad_norm": 2.7618353366851807, "learning_rate": 7.730145033336673e-07, "loss": 0.0065, "step": 390170 }, { "epoch": 4.168812436561782, "grad_norm": 0.04944903403520584, "learning_rate": 7.730004279864275e-07, "loss": 0.0001, "step": 390180 }, { "epoch": 4.168919279876062, "grad_norm": 0.0687645897269249, "learning_rate": 7.729863523309498e-07, "loss": 0.0098, "step": 390190 }, { "epoch": 4.169026123190341, "grad_norm": 1.4115095138549805, "learning_rate": 7.7297227636725e-07, "loss": 0.0607, "step": 390200 }, { "epoch": 4.169132966504621, "grad_norm": 0.01953491009771824, "learning_rate": 7.729582000953439e-07, "loss": 0.0058, "step": 390210 }, { "epoch": 4.169239809818901, "grad_norm": 1.5584754943847656, "learning_rate": 7.729441235152472e-07, "loss": 0.0174, "step": 390220 }, { "epoch": 4.16934665313318, "grad_norm": 3.1443166732788086, "learning_rate": 7.729300466269761e-07, "loss": 0.0155, "step": 390230 }, { "epoch": 4.16945349644746, "grad_norm": 5.554842472076416, "learning_rate": 7.729159694305464e-07, "loss": 0.0009, "step": 390240 }, { "epoch": 4.169560339761739, "grad_norm": 0.2580890953540802, "learning_rate": 7.72901891925974e-07, "loss": 0.0273, "step": 390250 }, { "epoch": 4.169667183076019, "grad_norm": 0.45722144842147827, "learning_rate": 7.728878141132747e-07, "loss": 0.0118, "step": 390260 }, { "epoch": 4.169774026390298, "grad_norm": 0.0026973849162459373, "learning_rate": 7.728737359924644e-07, "loss": 0.0006, "step": 390270 }, { "epoch": 4.169880869704579, "grad_norm": 8.852164268493652, "learning_rate": 7.728596575635592e-07, "loss": 0.0137, "step": 390280 }, { "epoch": 4.169987713018858, "grad_norm": 0.1988600194454193, "learning_rate": 7.728455788265747e-07, "loss": 0.0123, "step": 390290 }, { "epoch": 4.1700945563331375, "grad_norm": 0.09328807890415192, "learning_rate": 7.728314997815271e-07, "loss": 0.0042, "step": 390300 }, { "epoch": 4.170201399647417, "grad_norm": 0.15952998399734497, "learning_rate": 7.72817420428432e-07, "loss": 0.0043, "step": 390310 }, { "epoch": 4.170308242961696, "grad_norm": 0.009027010761201382, "learning_rate": 7.728033407673056e-07, "loss": 0.0003, "step": 390320 }, { "epoch": 4.170415086275976, "grad_norm": 0.13553793728351593, "learning_rate": 7.727892607981634e-07, "loss": 0.0004, "step": 390330 }, { "epoch": 4.170521929590256, "grad_norm": 0.0012864383170381188, "learning_rate": 7.727751805210218e-07, "loss": 0.0031, "step": 390340 }, { "epoch": 4.170628772904536, "grad_norm": 0.591895580291748, "learning_rate": 7.727610999358962e-07, "loss": 0.0034, "step": 390350 }, { "epoch": 4.170735616218815, "grad_norm": 0.0009188086842186749, "learning_rate": 7.727470190428029e-07, "loss": 0.0139, "step": 390360 }, { "epoch": 4.170842459533095, "grad_norm": 0.41299137473106384, "learning_rate": 7.727329378417577e-07, "loss": 0.0002, "step": 390370 }, { "epoch": 4.170949302847374, "grad_norm": 0.08339038491249084, "learning_rate": 7.727188563327762e-07, "loss": 0.0061, "step": 390380 }, { "epoch": 4.1710561461616535, "grad_norm": 0.053354062139987946, "learning_rate": 7.727047745158748e-07, "loss": 0.0087, "step": 390390 }, { "epoch": 4.171162989475934, "grad_norm": 0.001736732549034059, "learning_rate": 7.72690692391069e-07, "loss": 0.0247, "step": 390400 }, { "epoch": 4.171269832790213, "grad_norm": 1.4644665718078613, "learning_rate": 7.726766099583747e-07, "loss": 0.0295, "step": 390410 }, { "epoch": 4.171376676104493, "grad_norm": 0.0035512251779437065, "learning_rate": 7.72662527217808e-07, "loss": 0.0049, "step": 390420 }, { "epoch": 4.171483519418772, "grad_norm": 0.003120079170912504, "learning_rate": 7.726484441693848e-07, "loss": 0.0002, "step": 390430 }, { "epoch": 4.171590362733052, "grad_norm": 0.015294804237782955, "learning_rate": 7.726343608131209e-07, "loss": 0.0016, "step": 390440 }, { "epoch": 4.171697206047331, "grad_norm": 0.005121248308569193, "learning_rate": 7.726202771490323e-07, "loss": 0.0032, "step": 390450 }, { "epoch": 4.1718040493616115, "grad_norm": 0.004176002461463213, "learning_rate": 7.726061931771346e-07, "loss": 0.0198, "step": 390460 }, { "epoch": 4.171910892675891, "grad_norm": 0.010209927335381508, "learning_rate": 7.725921088974441e-07, "loss": 0.0145, "step": 390470 }, { "epoch": 4.1720177359901705, "grad_norm": 0.002550755627453327, "learning_rate": 7.725780243099764e-07, "loss": 0.0119, "step": 390480 }, { "epoch": 4.17212457930445, "grad_norm": 0.048990294337272644, "learning_rate": 7.725639394147477e-07, "loss": 0.0024, "step": 390490 }, { "epoch": 4.172231422618729, "grad_norm": 12.464244842529297, "learning_rate": 7.725498542117736e-07, "loss": 0.0257, "step": 390500 }, { "epoch": 4.172338265933009, "grad_norm": 0.007228879723697901, "learning_rate": 7.725357687010704e-07, "loss": 0.008, "step": 390510 }, { "epoch": 4.172445109247289, "grad_norm": 4.775659084320068, "learning_rate": 7.725216828826535e-07, "loss": 0.0037, "step": 390520 }, { "epoch": 4.172551952561569, "grad_norm": 0.005550345405936241, "learning_rate": 7.725075967565392e-07, "loss": 0.0118, "step": 390530 }, { "epoch": 4.172658795875848, "grad_norm": 0.013332412578165531, "learning_rate": 7.724935103227432e-07, "loss": 0.0258, "step": 390540 }, { "epoch": 4.1727656391901276, "grad_norm": 0.0723271518945694, "learning_rate": 7.724794235812815e-07, "loss": 0.001, "step": 390550 }, { "epoch": 4.172872482504407, "grad_norm": 0.06260006129741669, "learning_rate": 7.724653365321698e-07, "loss": 0.0027, "step": 390560 }, { "epoch": 4.1729793258186865, "grad_norm": 0.004368269816040993, "learning_rate": 7.724512491754244e-07, "loss": 0.0273, "step": 390570 }, { "epoch": 4.173086169132967, "grad_norm": 5.030810832977295, "learning_rate": 7.724371615110607e-07, "loss": 0.0317, "step": 390580 }, { "epoch": 4.173193012447246, "grad_norm": 0.0012445300817489624, "learning_rate": 7.724230735390952e-07, "loss": 0.0522, "step": 390590 }, { "epoch": 4.173299855761526, "grad_norm": 0.0019407311920076609, "learning_rate": 7.724089852595435e-07, "loss": 0.0489, "step": 390600 }, { "epoch": 4.173406699075805, "grad_norm": 0.19333134591579437, "learning_rate": 7.723948966724212e-07, "loss": 0.014, "step": 390610 }, { "epoch": 4.173513542390085, "grad_norm": 3.6897897720336914, "learning_rate": 7.723808077777446e-07, "loss": 0.0116, "step": 390620 }, { "epoch": 4.173620385704364, "grad_norm": 0.2084467113018036, "learning_rate": 7.723667185755296e-07, "loss": 0.0003, "step": 390630 }, { "epoch": 4.1737272290186445, "grad_norm": 4.519967079162598, "learning_rate": 7.72352629065792e-07, "loss": 0.0167, "step": 390640 }, { "epoch": 4.173834072332924, "grad_norm": 1.9913769960403442, "learning_rate": 7.723385392485477e-07, "loss": 0.015, "step": 390650 }, { "epoch": 4.173940915647203, "grad_norm": 0.05531201511621475, "learning_rate": 7.723244491238126e-07, "loss": 0.0126, "step": 390660 }, { "epoch": 4.174047758961483, "grad_norm": 0.2479250580072403, "learning_rate": 7.723103586916027e-07, "loss": 0.0026, "step": 390670 }, { "epoch": 4.174154602275762, "grad_norm": 0.030895814299583435, "learning_rate": 7.722962679519339e-07, "loss": 0.0087, "step": 390680 }, { "epoch": 4.174261445590043, "grad_norm": 0.6608592867851257, "learning_rate": 7.722821769048218e-07, "loss": 0.005, "step": 390690 }, { "epoch": 4.174368288904322, "grad_norm": 0.0024480281863361597, "learning_rate": 7.722680855502827e-07, "loss": 0.0601, "step": 390700 }, { "epoch": 4.174475132218602, "grad_norm": 0.005368085112422705, "learning_rate": 7.722539938883326e-07, "loss": 0.0047, "step": 390710 }, { "epoch": 4.174581975532881, "grad_norm": 0.012002583593130112, "learning_rate": 7.722399019189869e-07, "loss": 0.0126, "step": 390720 }, { "epoch": 4.1746888188471605, "grad_norm": 0.0012690761359408498, "learning_rate": 7.72225809642262e-07, "loss": 0.0029, "step": 390730 }, { "epoch": 4.17479566216144, "grad_norm": 0.1001388430595398, "learning_rate": 7.722117170581736e-07, "loss": 0.0064, "step": 390740 }, { "epoch": 4.17490250547572, "grad_norm": 0.0034143116790801287, "learning_rate": 7.721976241667375e-07, "loss": 0.0284, "step": 390750 }, { "epoch": 4.17500934879, "grad_norm": 0.005335073918104172, "learning_rate": 7.721835309679698e-07, "loss": 0.0151, "step": 390760 }, { "epoch": 4.175116192104279, "grad_norm": 0.010715201497077942, "learning_rate": 7.721694374618864e-07, "loss": 0.0011, "step": 390770 }, { "epoch": 4.175223035418559, "grad_norm": 0.03829457610845566, "learning_rate": 7.721553436485031e-07, "loss": 0.0051, "step": 390780 }, { "epoch": 4.175329878732838, "grad_norm": 0.01885087601840496, "learning_rate": 7.72141249527836e-07, "loss": 0.0192, "step": 390790 }, { "epoch": 4.175436722047118, "grad_norm": 5.001884937286377, "learning_rate": 7.721271550999008e-07, "loss": 0.0031, "step": 390800 }, { "epoch": 4.175543565361398, "grad_norm": 0.0042833308689296246, "learning_rate": 7.721130603647135e-07, "loss": 0.0181, "step": 390810 }, { "epoch": 4.175650408675677, "grad_norm": 0.03448356315493584, "learning_rate": 7.7209896532229e-07, "loss": 0.0013, "step": 390820 }, { "epoch": 4.175757251989957, "grad_norm": 0.4471789002418518, "learning_rate": 7.720848699726462e-07, "loss": 0.0417, "step": 390830 }, { "epoch": 4.175864095304236, "grad_norm": 5.6046061515808105, "learning_rate": 7.720707743157982e-07, "loss": 0.0303, "step": 390840 }, { "epoch": 4.175970938618516, "grad_norm": 0.001015896676108241, "learning_rate": 7.720566783517617e-07, "loss": 0.0017, "step": 390850 }, { "epoch": 4.176077781932795, "grad_norm": 0.19187228381633759, "learning_rate": 7.720425820805525e-07, "loss": 0.0038, "step": 390860 }, { "epoch": 4.176184625247076, "grad_norm": 0.009154410101473331, "learning_rate": 7.72028485502187e-07, "loss": 0.0878, "step": 390870 }, { "epoch": 4.176291468561355, "grad_norm": 0.0034441163297742605, "learning_rate": 7.720143886166805e-07, "loss": 0.0022, "step": 390880 }, { "epoch": 4.1763983118756345, "grad_norm": 0.01510805543512106, "learning_rate": 7.720002914240493e-07, "loss": 0.025, "step": 390890 }, { "epoch": 4.176505155189914, "grad_norm": 1.8733770847320557, "learning_rate": 7.719861939243094e-07, "loss": 0.0145, "step": 390900 }, { "epoch": 4.176611998504193, "grad_norm": 0.01862090267241001, "learning_rate": 7.719720961174765e-07, "loss": 0.0057, "step": 390910 }, { "epoch": 4.176718841818473, "grad_norm": 0.0040068244561553, "learning_rate": 7.719579980035665e-07, "loss": 0.0012, "step": 390920 }, { "epoch": 4.176825685132753, "grad_norm": 2.0988309383392334, "learning_rate": 7.719438995825956e-07, "loss": 0.0195, "step": 390930 }, { "epoch": 4.176932528447033, "grad_norm": 1.4127607345581055, "learning_rate": 7.719298008545791e-07, "loss": 0.0008, "step": 390940 }, { "epoch": 4.177039371761312, "grad_norm": 0.545978844165802, "learning_rate": 7.719157018195337e-07, "loss": 0.0081, "step": 390950 }, { "epoch": 4.177146215075592, "grad_norm": 0.023451846092939377, "learning_rate": 7.719016024774749e-07, "loss": 0.0013, "step": 390960 }, { "epoch": 4.177253058389871, "grad_norm": 5.062769412994385, "learning_rate": 7.718875028284186e-07, "loss": 0.0166, "step": 390970 }, { "epoch": 4.1773599017041505, "grad_norm": 0.48170164227485657, "learning_rate": 7.718734028723807e-07, "loss": 0.0078, "step": 390980 }, { "epoch": 4.177466745018431, "grad_norm": 0.0025462149642407894, "learning_rate": 7.718593026093774e-07, "loss": 0.0115, "step": 390990 }, { "epoch": 4.17757358833271, "grad_norm": 1.3381181955337524, "learning_rate": 7.718452020394243e-07, "loss": 0.0037, "step": 391000 }, { "epoch": 4.17768043164699, "grad_norm": 0.5224592685699463, "learning_rate": 7.718311011625374e-07, "loss": 0.0145, "step": 391010 }, { "epoch": 4.177787274961269, "grad_norm": 0.08828017115592957, "learning_rate": 7.718169999787328e-07, "loss": 0.0118, "step": 391020 }, { "epoch": 4.177894118275549, "grad_norm": 7.682739734649658, "learning_rate": 7.718028984880263e-07, "loss": 0.0376, "step": 391030 }, { "epoch": 4.178000961589828, "grad_norm": 0.07912995666265488, "learning_rate": 7.717887966904337e-07, "loss": 0.0291, "step": 391040 }, { "epoch": 4.1781078049041085, "grad_norm": 0.007054655347019434, "learning_rate": 7.717746945859711e-07, "loss": 0.0002, "step": 391050 }, { "epoch": 4.178214648218388, "grad_norm": 0.7042533159255981, "learning_rate": 7.717605921746544e-07, "loss": 0.0063, "step": 391060 }, { "epoch": 4.178321491532667, "grad_norm": 0.004935272037982941, "learning_rate": 7.717464894564993e-07, "loss": 0.0004, "step": 391070 }, { "epoch": 4.178428334846947, "grad_norm": 0.06668385118246078, "learning_rate": 7.717323864315221e-07, "loss": 0.0059, "step": 391080 }, { "epoch": 4.178535178161226, "grad_norm": 0.006698730401694775, "learning_rate": 7.717182830997384e-07, "loss": 0.0007, "step": 391090 }, { "epoch": 4.178642021475506, "grad_norm": 0.0010419099126011133, "learning_rate": 7.717041794611644e-07, "loss": 0.0042, "step": 391100 }, { "epoch": 4.178748864789786, "grad_norm": 0.0026480553206056356, "learning_rate": 7.716900755158158e-07, "loss": 0.0023, "step": 391110 }, { "epoch": 4.178855708104066, "grad_norm": 1.166660189628601, "learning_rate": 7.716759712637085e-07, "loss": 0.0059, "step": 391120 }, { "epoch": 4.178962551418345, "grad_norm": 4.868875980377197, "learning_rate": 7.716618667048587e-07, "loss": 0.007, "step": 391130 }, { "epoch": 4.1790693947326245, "grad_norm": 26.54207420349121, "learning_rate": 7.71647761839282e-07, "loss": 0.0057, "step": 391140 }, { "epoch": 4.179176238046904, "grad_norm": 0.4125518798828125, "learning_rate": 7.716336566669946e-07, "loss": 0.0071, "step": 391150 }, { "epoch": 4.179283081361183, "grad_norm": 0.2244706004858017, "learning_rate": 7.716195511880121e-07, "loss": 0.0039, "step": 391160 }, { "epoch": 4.179389924675464, "grad_norm": 0.003448422532528639, "learning_rate": 7.716054454023508e-07, "loss": 0.0008, "step": 391170 }, { "epoch": 4.179496767989743, "grad_norm": 0.677003026008606, "learning_rate": 7.715913393100264e-07, "loss": 0.0311, "step": 391180 }, { "epoch": 4.179603611304023, "grad_norm": 2.044935703277588, "learning_rate": 7.71577232911055e-07, "loss": 0.0201, "step": 391190 }, { "epoch": 4.179710454618302, "grad_norm": 2.302093029022217, "learning_rate": 7.715631262054523e-07, "loss": 0.0054, "step": 391200 }, { "epoch": 4.179817297932582, "grad_norm": 0.12354074418544769, "learning_rate": 7.715490191932344e-07, "loss": 0.0017, "step": 391210 }, { "epoch": 4.179924141246861, "grad_norm": 0.0069382572546601295, "learning_rate": 7.715349118744169e-07, "loss": 0.0066, "step": 391220 }, { "epoch": 4.180030984561141, "grad_norm": 0.7720291614532471, "learning_rate": 7.715208042490163e-07, "loss": 0.0271, "step": 391230 }, { "epoch": 4.180137827875421, "grad_norm": 0.0005344569217413664, "learning_rate": 7.715066963170481e-07, "loss": 0.0059, "step": 391240 }, { "epoch": 4.1802446711897, "grad_norm": 0.03572958707809448, "learning_rate": 7.714925880785284e-07, "loss": 0.0087, "step": 391250 }, { "epoch": 4.18035151450398, "grad_norm": 0.012756041251122952, "learning_rate": 7.71478479533473e-07, "loss": 0.006, "step": 391260 }, { "epoch": 4.180458357818259, "grad_norm": 0.02971733547747135, "learning_rate": 7.714643706818979e-07, "loss": 0.0044, "step": 391270 }, { "epoch": 4.180565201132539, "grad_norm": 0.014797072857618332, "learning_rate": 7.714502615238192e-07, "loss": 0.0141, "step": 391280 }, { "epoch": 4.180672044446819, "grad_norm": 7.8929033279418945, "learning_rate": 7.714361520592525e-07, "loss": 0.0112, "step": 391290 }, { "epoch": 4.1807788877610985, "grad_norm": 0.12249720841646194, "learning_rate": 7.714220422882139e-07, "loss": 0.0002, "step": 391300 }, { "epoch": 4.180885731075378, "grad_norm": 9.907926559448242, "learning_rate": 7.714079322107195e-07, "loss": 0.0171, "step": 391310 }, { "epoch": 4.180992574389657, "grad_norm": 1.966657280921936, "learning_rate": 7.713938218267848e-07, "loss": 0.0057, "step": 391320 }, { "epoch": 4.181099417703937, "grad_norm": 1.1112154722213745, "learning_rate": 7.713797111364262e-07, "loss": 0.0226, "step": 391330 }, { "epoch": 4.181206261018216, "grad_norm": 1.2322349548339844, "learning_rate": 7.713656001396593e-07, "loss": 0.0052, "step": 391340 }, { "epoch": 4.181313104332497, "grad_norm": 0.0053014857694506645, "learning_rate": 7.713514888365002e-07, "loss": 0.0149, "step": 391350 }, { "epoch": 4.181419947646776, "grad_norm": 0.001988838193938136, "learning_rate": 7.713373772269649e-07, "loss": 0.0113, "step": 391360 }, { "epoch": 4.181526790961056, "grad_norm": 3.835150718688965, "learning_rate": 7.713232653110691e-07, "loss": 0.0143, "step": 391370 }, { "epoch": 4.181633634275335, "grad_norm": 0.009673124179244041, "learning_rate": 7.713091530888289e-07, "loss": 0.0119, "step": 391380 }, { "epoch": 4.1817404775896145, "grad_norm": 3.536379337310791, "learning_rate": 7.7129504056026e-07, "loss": 0.0086, "step": 391390 }, { "epoch": 4.181847320903895, "grad_norm": 0.00792994536459446, "learning_rate": 7.712809277253788e-07, "loss": 0.0028, "step": 391400 }, { "epoch": 4.181954164218174, "grad_norm": 0.006262767594307661, "learning_rate": 7.712668145842009e-07, "loss": 0.0239, "step": 391410 }, { "epoch": 4.182061007532454, "grad_norm": 0.01628418080508709, "learning_rate": 7.712527011367422e-07, "loss": 0.0177, "step": 391420 }, { "epoch": 4.182167850846733, "grad_norm": 6.545781135559082, "learning_rate": 7.712385873830187e-07, "loss": 0.0033, "step": 391430 }, { "epoch": 4.182274694161013, "grad_norm": 0.4078507125377655, "learning_rate": 7.712244733230464e-07, "loss": 0.0013, "step": 391440 }, { "epoch": 4.182381537475292, "grad_norm": 0.0002179201546823606, "learning_rate": 7.712103589568412e-07, "loss": 0.0103, "step": 391450 }, { "epoch": 4.182488380789572, "grad_norm": 1.4959479570388794, "learning_rate": 7.711962442844191e-07, "loss": 0.0724, "step": 391460 }, { "epoch": 4.182595224103852, "grad_norm": 3.508350133895874, "learning_rate": 7.71182129305796e-07, "loss": 0.006, "step": 391470 }, { "epoch": 4.182702067418131, "grad_norm": 0.005888383835554123, "learning_rate": 7.711680140209877e-07, "loss": 0.0019, "step": 391480 }, { "epoch": 4.182808910732411, "grad_norm": 0.3091021180152893, "learning_rate": 7.711538984300103e-07, "loss": 0.0194, "step": 391490 }, { "epoch": 4.18291575404669, "grad_norm": 0.04729076847434044, "learning_rate": 7.711397825328798e-07, "loss": 0.0036, "step": 391500 }, { "epoch": 4.18302259736097, "grad_norm": 0.0012114928103983402, "learning_rate": 7.711256663296118e-07, "loss": 0.0086, "step": 391510 }, { "epoch": 4.18312944067525, "grad_norm": 0.00025633382028900087, "learning_rate": 7.711115498202225e-07, "loss": 0.0413, "step": 391520 }, { "epoch": 4.18323628398953, "grad_norm": 8.245302200317383, "learning_rate": 7.71097433004728e-07, "loss": 0.0326, "step": 391530 }, { "epoch": 4.183343127303809, "grad_norm": 0.5753423571586609, "learning_rate": 7.710833158831439e-07, "loss": 0.0093, "step": 391540 }, { "epoch": 4.1834499706180885, "grad_norm": 0.002809828845784068, "learning_rate": 7.710691984554862e-07, "loss": 0.0156, "step": 391550 }, { "epoch": 4.183556813932368, "grad_norm": 0.24037866294384003, "learning_rate": 7.710550807217711e-07, "loss": 0.0006, "step": 391560 }, { "epoch": 4.1836636572466475, "grad_norm": 0.009006967768073082, "learning_rate": 7.710409626820142e-07, "loss": 0.0105, "step": 391570 }, { "epoch": 4.183770500560928, "grad_norm": 0.00926085002720356, "learning_rate": 7.710268443362315e-07, "loss": 0.0085, "step": 391580 }, { "epoch": 4.183877343875207, "grad_norm": 0.014225147664546967, "learning_rate": 7.710127256844392e-07, "loss": 0.0049, "step": 391590 }, { "epoch": 4.183984187189487, "grad_norm": 1.3275070190429688, "learning_rate": 7.709986067266532e-07, "loss": 0.0038, "step": 391600 }, { "epoch": 4.184091030503766, "grad_norm": 0.015547685325145721, "learning_rate": 7.709844874628891e-07, "loss": 0.0194, "step": 391610 }, { "epoch": 4.184197873818046, "grad_norm": 0.002952149137854576, "learning_rate": 7.709703678931632e-07, "loss": 0.0335, "step": 391620 }, { "epoch": 4.184304717132325, "grad_norm": 0.0011611388763412833, "learning_rate": 7.709562480174911e-07, "loss": 0.0071, "step": 391630 }, { "epoch": 4.1844115604466054, "grad_norm": 0.024081610143184662, "learning_rate": 7.709421278358892e-07, "loss": 0.0086, "step": 391640 }, { "epoch": 4.184518403760885, "grad_norm": 0.0024537050630897284, "learning_rate": 7.709280073483731e-07, "loss": 0.0048, "step": 391650 }, { "epoch": 4.184625247075164, "grad_norm": 1.896714210510254, "learning_rate": 7.709138865549587e-07, "loss": 0.0095, "step": 391660 }, { "epoch": 4.184732090389444, "grad_norm": 0.016042331233620644, "learning_rate": 7.708997654556622e-07, "loss": 0.0386, "step": 391670 }, { "epoch": 4.184838933703723, "grad_norm": 0.014557384885847569, "learning_rate": 7.708856440504995e-07, "loss": 0.0016, "step": 391680 }, { "epoch": 4.184945777018003, "grad_norm": 0.6079296469688416, "learning_rate": 7.708715223394864e-07, "loss": 0.0059, "step": 391690 }, { "epoch": 4.185052620332283, "grad_norm": 3.8231022357940674, "learning_rate": 7.708574003226388e-07, "loss": 0.0049, "step": 391700 }, { "epoch": 4.1851594636465625, "grad_norm": 1.6125746965408325, "learning_rate": 7.70843277999973e-07, "loss": 0.0024, "step": 391710 }, { "epoch": 4.185266306960842, "grad_norm": 7.29961633682251, "learning_rate": 7.708291553715043e-07, "loss": 0.009, "step": 391720 }, { "epoch": 4.1853731502751215, "grad_norm": 0.0023919017985463142, "learning_rate": 7.708150324372493e-07, "loss": 0.0204, "step": 391730 }, { "epoch": 4.185479993589401, "grad_norm": 2.1009507179260254, "learning_rate": 7.708009091972236e-07, "loss": 0.007, "step": 391740 }, { "epoch": 4.18558683690368, "grad_norm": 0.012102734297513962, "learning_rate": 7.707867856514434e-07, "loss": 0.001, "step": 391750 }, { "epoch": 4.185693680217961, "grad_norm": 1.2986228466033936, "learning_rate": 7.707726617999244e-07, "loss": 0.0213, "step": 391760 }, { "epoch": 4.18580052353224, "grad_norm": 0.08684584498405457, "learning_rate": 7.707585376426826e-07, "loss": 0.0126, "step": 391770 }, { "epoch": 4.18590736684652, "grad_norm": 0.0024132293183356524, "learning_rate": 7.707444131797338e-07, "loss": 0.0021, "step": 391780 }, { "epoch": 4.186014210160799, "grad_norm": 0.2120964527130127, "learning_rate": 7.707302884110945e-07, "loss": 0.0105, "step": 391790 }, { "epoch": 4.186121053475079, "grad_norm": 0.0013453214196488261, "learning_rate": 7.7071616333678e-07, "loss": 0.0063, "step": 391800 }, { "epoch": 4.186227896789358, "grad_norm": 0.0015473962994292378, "learning_rate": 7.707020379568066e-07, "loss": 0.0021, "step": 391810 }, { "epoch": 4.186334740103638, "grad_norm": 0.0016924341907724738, "learning_rate": 7.706879122711903e-07, "loss": 0.0004, "step": 391820 }, { "epoch": 4.186441583417918, "grad_norm": 0.13513636589050293, "learning_rate": 7.706737862799466e-07, "loss": 0.0007, "step": 391830 }, { "epoch": 4.186548426732197, "grad_norm": 0.20297208428382874, "learning_rate": 7.706596599830919e-07, "loss": 0.0149, "step": 391840 }, { "epoch": 4.186655270046477, "grad_norm": 0.059238314628601074, "learning_rate": 7.706455333806422e-07, "loss": 0.009, "step": 391850 }, { "epoch": 4.186762113360756, "grad_norm": 0.01889750175178051, "learning_rate": 7.70631406472613e-07, "loss": 0.0157, "step": 391860 }, { "epoch": 4.186868956675036, "grad_norm": 2.096316337585449, "learning_rate": 7.706172792590206e-07, "loss": 0.0073, "step": 391870 }, { "epoch": 4.186975799989316, "grad_norm": 3.0108494758605957, "learning_rate": 7.70603151739881e-07, "loss": 0.0103, "step": 391880 }, { "epoch": 4.1870826433035955, "grad_norm": 0.05798988416790962, "learning_rate": 7.7058902391521e-07, "loss": 0.0146, "step": 391890 }, { "epoch": 4.187189486617875, "grad_norm": 0.22012238204479218, "learning_rate": 7.705748957850234e-07, "loss": 0.0443, "step": 391900 }, { "epoch": 4.187296329932154, "grad_norm": 0.0016196353826671839, "learning_rate": 7.705607673493375e-07, "loss": 0.0155, "step": 391910 }, { "epoch": 4.187403173246434, "grad_norm": 5.802151679992676, "learning_rate": 7.705466386081679e-07, "loss": 0.0128, "step": 391920 }, { "epoch": 4.187510016560713, "grad_norm": 0.02098684385418892, "learning_rate": 7.705325095615309e-07, "loss": 0.0057, "step": 391930 }, { "epoch": 4.187616859874994, "grad_norm": 0.004345085006207228, "learning_rate": 7.705183802094422e-07, "loss": 0.0021, "step": 391940 }, { "epoch": 4.187723703189273, "grad_norm": 0.9290903806686401, "learning_rate": 7.705042505519178e-07, "loss": 0.0023, "step": 391950 }, { "epoch": 4.187830546503553, "grad_norm": 0.0015759044326841831, "learning_rate": 7.704901205889738e-07, "loss": 0.0357, "step": 391960 }, { "epoch": 4.187937389817832, "grad_norm": 0.012661783024668694, "learning_rate": 7.704759903206259e-07, "loss": 0.0098, "step": 391970 }, { "epoch": 4.1880442331321115, "grad_norm": 0.2341870367527008, "learning_rate": 7.704618597468901e-07, "loss": 0.0045, "step": 391980 }, { "epoch": 4.188151076446391, "grad_norm": 0.026256442070007324, "learning_rate": 7.704477288677828e-07, "loss": 0.0491, "step": 391990 }, { "epoch": 4.188257919760671, "grad_norm": 0.6702876687049866, "learning_rate": 7.704335976833192e-07, "loss": 0.0194, "step": 392000 }, { "epoch": 4.188364763074951, "grad_norm": 0.23597942292690277, "learning_rate": 7.70419466193516e-07, "loss": 0.0039, "step": 392010 }, { "epoch": 4.18847160638923, "grad_norm": 0.02601044625043869, "learning_rate": 7.704053343983888e-07, "loss": 0.0077, "step": 392020 }, { "epoch": 4.18857844970351, "grad_norm": 1.398677945137024, "learning_rate": 7.703912022979533e-07, "loss": 0.0067, "step": 392030 }, { "epoch": 4.188685293017789, "grad_norm": 0.010601554997265339, "learning_rate": 7.703770698922259e-07, "loss": 0.0016, "step": 392040 }, { "epoch": 4.188792136332069, "grad_norm": 0.013729563914239407, "learning_rate": 7.703629371812224e-07, "loss": 0.0102, "step": 392050 }, { "epoch": 4.188898979646349, "grad_norm": 0.10736940056085587, "learning_rate": 7.703488041649586e-07, "loss": 0.0008, "step": 392060 }, { "epoch": 4.189005822960628, "grad_norm": 0.036036375910043716, "learning_rate": 7.703346708434506e-07, "loss": 0.0104, "step": 392070 }, { "epoch": 4.189112666274908, "grad_norm": 0.06388716399669647, "learning_rate": 7.703205372167146e-07, "loss": 0.0092, "step": 392080 }, { "epoch": 4.189219509589187, "grad_norm": 0.011755425482988358, "learning_rate": 7.70306403284766e-07, "loss": 0.0024, "step": 392090 }, { "epoch": 4.189326352903467, "grad_norm": 0.002681337296962738, "learning_rate": 7.702922690476212e-07, "loss": 0.0126, "step": 392100 }, { "epoch": 4.189433196217747, "grad_norm": 12.984485626220703, "learning_rate": 7.702781345052962e-07, "loss": 0.0055, "step": 392110 }, { "epoch": 4.189540039532027, "grad_norm": 0.0012887329794466496, "learning_rate": 7.702639996578065e-07, "loss": 0.0093, "step": 392120 }, { "epoch": 4.189646882846306, "grad_norm": 2.5479793548583984, "learning_rate": 7.702498645051685e-07, "loss": 0.0045, "step": 392130 }, { "epoch": 4.1897537261605855, "grad_norm": 7.602672100067139, "learning_rate": 7.702357290473981e-07, "loss": 0.0209, "step": 392140 }, { "epoch": 4.189860569474865, "grad_norm": 1.0755783319473267, "learning_rate": 7.70221593284511e-07, "loss": 0.0185, "step": 392150 }, { "epoch": 4.189967412789144, "grad_norm": 1.4731072187423706, "learning_rate": 7.702074572165233e-07, "loss": 0.0016, "step": 392160 }, { "epoch": 4.190074256103424, "grad_norm": 4.81272554397583, "learning_rate": 7.701933208434512e-07, "loss": 0.0128, "step": 392170 }, { "epoch": 4.190181099417704, "grad_norm": 0.5692538022994995, "learning_rate": 7.701791841653103e-07, "loss": 0.0012, "step": 392180 }, { "epoch": 4.190287942731984, "grad_norm": 3.075575351715088, "learning_rate": 7.701650471821166e-07, "loss": 0.009, "step": 392190 }, { "epoch": 4.190394786046263, "grad_norm": 0.3322697877883911, "learning_rate": 7.701509098938863e-07, "loss": 0.0044, "step": 392200 }, { "epoch": 4.190501629360543, "grad_norm": 4.227558135986328, "learning_rate": 7.701367723006352e-07, "loss": 0.0037, "step": 392210 }, { "epoch": 4.190608472674822, "grad_norm": 1.1200374364852905, "learning_rate": 7.701226344023795e-07, "loss": 0.0084, "step": 392220 }, { "epoch": 4.190715315989102, "grad_norm": 0.9439806938171387, "learning_rate": 7.701084961991346e-07, "loss": 0.0111, "step": 392230 }, { "epoch": 4.190822159303382, "grad_norm": 0.021290048956871033, "learning_rate": 7.700943576909172e-07, "loss": 0.0028, "step": 392240 }, { "epoch": 4.190929002617661, "grad_norm": 0.5270687341690063, "learning_rate": 7.700802188777426e-07, "loss": 0.002, "step": 392250 }, { "epoch": 4.191035845931941, "grad_norm": 0.002828742377460003, "learning_rate": 7.70066079759627e-07, "loss": 0.0168, "step": 392260 }, { "epoch": 4.19114268924622, "grad_norm": 0.10532625019550323, "learning_rate": 7.700519403365866e-07, "loss": 0.0151, "step": 392270 }, { "epoch": 4.1912495325605, "grad_norm": 0.009146911092102528, "learning_rate": 7.700378006086373e-07, "loss": 0.0015, "step": 392280 }, { "epoch": 4.19135637587478, "grad_norm": 0.37618619203567505, "learning_rate": 7.700236605757946e-07, "loss": 0.0061, "step": 392290 }, { "epoch": 4.1914632191890595, "grad_norm": 0.7058011889457703, "learning_rate": 7.70009520238075e-07, "loss": 0.0015, "step": 392300 }, { "epoch": 4.191570062503339, "grad_norm": 3.6862547397613525, "learning_rate": 7.699953795954943e-07, "loss": 0.0193, "step": 392310 }, { "epoch": 4.191676905817618, "grad_norm": 0.05771060660481453, "learning_rate": 7.699812386480685e-07, "loss": 0.0, "step": 392320 }, { "epoch": 4.191783749131898, "grad_norm": 0.008199773728847504, "learning_rate": 7.699670973958132e-07, "loss": 0.0007, "step": 392330 }, { "epoch": 4.191890592446177, "grad_norm": 0.03302622213959694, "learning_rate": 7.69952955838745e-07, "loss": 0.0137, "step": 392340 }, { "epoch": 4.191997435760458, "grad_norm": 0.016323991119861603, "learning_rate": 7.699388139768793e-07, "loss": 0.0078, "step": 392350 }, { "epoch": 4.192104279074737, "grad_norm": 1.0743950605392456, "learning_rate": 7.699246718102326e-07, "loss": 0.005, "step": 392360 }, { "epoch": 4.192211122389017, "grad_norm": 1.7434029579162598, "learning_rate": 7.699105293388204e-07, "loss": 0.0037, "step": 392370 }, { "epoch": 4.192317965703296, "grad_norm": 0.02802061289548874, "learning_rate": 7.698963865626586e-07, "loss": 0.003, "step": 392380 }, { "epoch": 4.1924248090175755, "grad_norm": 3.057880163192749, "learning_rate": 7.698822434817637e-07, "loss": 0.0046, "step": 392390 }, { "epoch": 4.192531652331855, "grad_norm": 0.037708014249801636, "learning_rate": 7.698681000961513e-07, "loss": 0.0029, "step": 392400 }, { "epoch": 4.192638495646135, "grad_norm": 5.090190410614014, "learning_rate": 7.698539564058375e-07, "loss": 0.0158, "step": 392410 }, { "epoch": 4.192745338960415, "grad_norm": 0.03085584007203579, "learning_rate": 7.698398124108383e-07, "loss": 0.0006, "step": 392420 }, { "epoch": 4.192852182274694, "grad_norm": 5.7507548332214355, "learning_rate": 7.698256681111693e-07, "loss": 0.0017, "step": 392430 }, { "epoch": 4.192959025588974, "grad_norm": 5.365682125091553, "learning_rate": 7.698115235068469e-07, "loss": 0.0091, "step": 392440 }, { "epoch": 4.193065868903253, "grad_norm": 0.06264378130435944, "learning_rate": 7.697973785978869e-07, "loss": 0.0068, "step": 392450 }, { "epoch": 4.193172712217533, "grad_norm": 0.08704473078250885, "learning_rate": 7.697832333843055e-07, "loss": 0.0053, "step": 392460 }, { "epoch": 4.193279555531813, "grad_norm": 0.12569887936115265, "learning_rate": 7.697690878661183e-07, "loss": 0.0003, "step": 392470 }, { "epoch": 4.193386398846092, "grad_norm": 0.007311572320759296, "learning_rate": 7.697549420433414e-07, "loss": 0.0116, "step": 392480 }, { "epoch": 4.193493242160372, "grad_norm": 1.5953948497772217, "learning_rate": 7.697407959159907e-07, "loss": 0.0141, "step": 392490 }, { "epoch": 4.193600085474651, "grad_norm": 0.007661707233637571, "learning_rate": 7.697266494840826e-07, "loss": 0.0063, "step": 392500 }, { "epoch": 4.193706928788931, "grad_norm": 1.987034559249878, "learning_rate": 7.697125027476325e-07, "loss": 0.0062, "step": 392510 }, { "epoch": 4.19381377210321, "grad_norm": 1.4788576364517212, "learning_rate": 7.696983557066568e-07, "loss": 0.0216, "step": 392520 }, { "epoch": 4.193920615417491, "grad_norm": 2.8013851642608643, "learning_rate": 7.696842083611712e-07, "loss": 0.0156, "step": 392530 }, { "epoch": 4.19402745873177, "grad_norm": 0.07847040146589279, "learning_rate": 7.696700607111919e-07, "loss": 0.0078, "step": 392540 }, { "epoch": 4.1941343020460495, "grad_norm": 3.32845139503479, "learning_rate": 7.696559127567345e-07, "loss": 0.0231, "step": 392550 }, { "epoch": 4.194241145360329, "grad_norm": 4.871244430541992, "learning_rate": 7.696417644978154e-07, "loss": 0.0034, "step": 392560 }, { "epoch": 4.1943479886746085, "grad_norm": 0.929625928401947, "learning_rate": 7.696276159344506e-07, "loss": 0.0032, "step": 392570 }, { "epoch": 4.194454831988888, "grad_norm": 0.006537759210914373, "learning_rate": 7.696134670666554e-07, "loss": 0.0022, "step": 392580 }, { "epoch": 4.194561675303168, "grad_norm": 0.004998417571187019, "learning_rate": 7.695993178944466e-07, "loss": 0.0123, "step": 392590 }, { "epoch": 4.194668518617448, "grad_norm": 0.006408875808119774, "learning_rate": 7.695851684178397e-07, "loss": 0.0016, "step": 392600 }, { "epoch": 4.194775361931727, "grad_norm": 0.0061824871227145195, "learning_rate": 7.695710186368508e-07, "loss": 0.0134, "step": 392610 }, { "epoch": 4.194882205246007, "grad_norm": 0.7132369875907898, "learning_rate": 7.695568685514959e-07, "loss": 0.0123, "step": 392620 }, { "epoch": 4.194989048560286, "grad_norm": 0.5859572887420654, "learning_rate": 7.69542718161791e-07, "loss": 0.0061, "step": 392630 }, { "epoch": 4.1950958918745656, "grad_norm": 2.741173267364502, "learning_rate": 7.69528567467752e-07, "loss": 0.0234, "step": 392640 }, { "epoch": 4.195202735188846, "grad_norm": 0.0017323597567155957, "learning_rate": 7.695144164693948e-07, "loss": 0.0021, "step": 392650 }, { "epoch": 4.195309578503125, "grad_norm": 0.008802938275039196, "learning_rate": 7.695002651667355e-07, "loss": 0.0025, "step": 392660 }, { "epoch": 4.195416421817405, "grad_norm": 0.16956695914268494, "learning_rate": 7.694861135597902e-07, "loss": 0.0171, "step": 392670 }, { "epoch": 4.195523265131684, "grad_norm": 1.600217342376709, "learning_rate": 7.694719616485748e-07, "loss": 0.0127, "step": 392680 }, { "epoch": 4.195630108445964, "grad_norm": 0.12471272796392441, "learning_rate": 7.694578094331049e-07, "loss": 0.0029, "step": 392690 }, { "epoch": 4.195736951760243, "grad_norm": 0.5588239431381226, "learning_rate": 7.694436569133971e-07, "loss": 0.011, "step": 392700 }, { "epoch": 4.1958437950745235, "grad_norm": 0.029099050909280777, "learning_rate": 7.69429504089467e-07, "loss": 0.0011, "step": 392710 }, { "epoch": 4.195950638388803, "grad_norm": 9.305746078491211, "learning_rate": 7.694153509613304e-07, "loss": 0.0079, "step": 392720 }, { "epoch": 4.1960574817030825, "grad_norm": 0.002092367270961404, "learning_rate": 7.694011975290038e-07, "loss": 0.0005, "step": 392730 }, { "epoch": 4.196164325017362, "grad_norm": 8.895559310913086, "learning_rate": 7.693870437925028e-07, "loss": 0.0151, "step": 392740 }, { "epoch": 4.196271168331641, "grad_norm": 0.0967879444360733, "learning_rate": 7.693728897518436e-07, "loss": 0.0089, "step": 392750 }, { "epoch": 4.196378011645921, "grad_norm": 0.0012641909997910261, "learning_rate": 7.693587354070419e-07, "loss": 0.0136, "step": 392760 }, { "epoch": 4.196484854960201, "grad_norm": 0.0036822885740548372, "learning_rate": 7.69344580758114e-07, "loss": 0.0046, "step": 392770 }, { "epoch": 4.196591698274481, "grad_norm": 0.0009490765514783561, "learning_rate": 7.693304258050757e-07, "loss": 0.0191, "step": 392780 }, { "epoch": 4.19669854158876, "grad_norm": 0.0008859842782840133, "learning_rate": 7.69316270547943e-07, "loss": 0.0003, "step": 392790 }, { "epoch": 4.19680538490304, "grad_norm": 39.915897369384766, "learning_rate": 7.69302114986732e-07, "loss": 0.0282, "step": 392800 }, { "epoch": 4.196912228217319, "grad_norm": 0.02046368271112442, "learning_rate": 7.692879591214585e-07, "loss": 0.0601, "step": 392810 }, { "epoch": 4.197019071531599, "grad_norm": 0.0055953627452254295, "learning_rate": 7.692738029521387e-07, "loss": 0.0197, "step": 392820 }, { "epoch": 4.197125914845879, "grad_norm": 0.6986602544784546, "learning_rate": 7.692596464787883e-07, "loss": 0.0172, "step": 392830 }, { "epoch": 4.197232758160158, "grad_norm": 0.0024259244091808796, "learning_rate": 7.692454897014236e-07, "loss": 0.0021, "step": 392840 }, { "epoch": 4.197339601474438, "grad_norm": 10.782014846801758, "learning_rate": 7.692313326200602e-07, "loss": 0.0182, "step": 392850 }, { "epoch": 4.197446444788717, "grad_norm": 0.0352514274418354, "learning_rate": 7.692171752347144e-07, "loss": 0.0042, "step": 392860 }, { "epoch": 4.197553288102997, "grad_norm": 8.751733779907227, "learning_rate": 7.692030175454022e-07, "loss": 0.0068, "step": 392870 }, { "epoch": 4.197660131417276, "grad_norm": 1.9650788307189941, "learning_rate": 7.691888595521394e-07, "loss": 0.0037, "step": 392880 }, { "epoch": 4.1977669747315565, "grad_norm": 0.014004342257976532, "learning_rate": 7.69174701254942e-07, "loss": 0.0087, "step": 392890 }, { "epoch": 4.197873818045836, "grad_norm": 4.790933132171631, "learning_rate": 7.691605426538261e-07, "loss": 0.0044, "step": 392900 }, { "epoch": 4.197980661360115, "grad_norm": 0.37871408462524414, "learning_rate": 7.691463837488077e-07, "loss": 0.0027, "step": 392910 }, { "epoch": 4.198087504674395, "grad_norm": 0.03779122605919838, "learning_rate": 7.691322245399026e-07, "loss": 0.0104, "step": 392920 }, { "epoch": 4.198194347988674, "grad_norm": 0.001155289588496089, "learning_rate": 7.691180650271269e-07, "loss": 0.0084, "step": 392930 }, { "epoch": 4.198301191302955, "grad_norm": 0.00788145326077938, "learning_rate": 7.691039052104967e-07, "loss": 0.0667, "step": 392940 }, { "epoch": 4.198408034617234, "grad_norm": 0.5166875720024109, "learning_rate": 7.690897450900278e-07, "loss": 0.0128, "step": 392950 }, { "epoch": 4.198514877931514, "grad_norm": 3.029632806777954, "learning_rate": 7.690755846657363e-07, "loss": 0.0112, "step": 392960 }, { "epoch": 4.198621721245793, "grad_norm": 0.07079782336950302, "learning_rate": 7.690614239376383e-07, "loss": 0.0107, "step": 392970 }, { "epoch": 4.1987285645600725, "grad_norm": 1.3724135160446167, "learning_rate": 7.690472629057494e-07, "loss": 0.0048, "step": 392980 }, { "epoch": 4.198835407874352, "grad_norm": 0.052498698234558105, "learning_rate": 7.69033101570086e-07, "loss": 0.0019, "step": 392990 }, { "epoch": 4.198942251188632, "grad_norm": 0.007178554777055979, "learning_rate": 7.690189399306637e-07, "loss": 0.0031, "step": 393000 }, { "epoch": 4.199049094502912, "grad_norm": 0.7589934468269348, "learning_rate": 7.690047779874989e-07, "loss": 0.0033, "step": 393010 }, { "epoch": 4.199155937817191, "grad_norm": 7.1990838050842285, "learning_rate": 7.689906157406074e-07, "loss": 0.0227, "step": 393020 }, { "epoch": 4.199262781131471, "grad_norm": 2.518904447555542, "learning_rate": 7.689764531900052e-07, "loss": 0.0084, "step": 393030 }, { "epoch": 4.19936962444575, "grad_norm": 0.12765513360500336, "learning_rate": 7.689622903357082e-07, "loss": 0.0085, "step": 393040 }, { "epoch": 4.19947646776003, "grad_norm": 0.006200960371643305, "learning_rate": 7.689481271777325e-07, "loss": 0.0011, "step": 393050 }, { "epoch": 4.19958331107431, "grad_norm": 0.016918715089559555, "learning_rate": 7.689339637160941e-07, "loss": 0.0032, "step": 393060 }, { "epoch": 4.199690154388589, "grad_norm": 0.008082428947091103, "learning_rate": 7.68919799950809e-07, "loss": 0.02, "step": 393070 }, { "epoch": 4.199796997702869, "grad_norm": 0.022765757516026497, "learning_rate": 7.689056358818931e-07, "loss": 0.006, "step": 393080 }, { "epoch": 4.199903841017148, "grad_norm": 0.0029712931718677282, "learning_rate": 7.688914715093624e-07, "loss": 0.0247, "step": 393090 }, { "epoch": 4.200010684331428, "grad_norm": 0.18190225958824158, "learning_rate": 7.688773068332329e-07, "loss": 0.0051, "step": 393100 }, { "epoch": 4.200117527645707, "grad_norm": 0.029620496556162834, "learning_rate": 7.688631418535208e-07, "loss": 0.0146, "step": 393110 }, { "epoch": 4.200224370959988, "grad_norm": 0.009150039404630661, "learning_rate": 7.688489765702417e-07, "loss": 0.0075, "step": 393120 }, { "epoch": 4.200331214274267, "grad_norm": 2.6687026023864746, "learning_rate": 7.688348109834119e-07, "loss": 0.0228, "step": 393130 }, { "epoch": 4.2004380575885465, "grad_norm": 0.019847961142659187, "learning_rate": 7.688206450930474e-07, "loss": 0.0034, "step": 393140 }, { "epoch": 4.200544900902826, "grad_norm": 4.50389289855957, "learning_rate": 7.688064788991641e-07, "loss": 0.0127, "step": 393150 }, { "epoch": 4.200651744217105, "grad_norm": 0.1505667269229889, "learning_rate": 7.687923124017779e-07, "loss": 0.0659, "step": 393160 }, { "epoch": 4.200758587531385, "grad_norm": 0.7354859113693237, "learning_rate": 7.687781456009049e-07, "loss": 0.0019, "step": 393170 }, { "epoch": 4.200865430845665, "grad_norm": 0.1006564199924469, "learning_rate": 7.687639784965611e-07, "loss": 0.003, "step": 393180 }, { "epoch": 4.200972274159945, "grad_norm": 0.0006207166588865221, "learning_rate": 7.687498110887627e-07, "loss": 0.0154, "step": 393190 }, { "epoch": 4.201079117474224, "grad_norm": 0.7927636504173279, "learning_rate": 7.687356433775252e-07, "loss": 0.0095, "step": 393200 }, { "epoch": 4.201185960788504, "grad_norm": 0.038326431065797806, "learning_rate": 7.68721475362865e-07, "loss": 0.0006, "step": 393210 }, { "epoch": 4.201292804102783, "grad_norm": 0.011395066976547241, "learning_rate": 7.68707307044798e-07, "loss": 0.0009, "step": 393220 }, { "epoch": 4.2013996474170625, "grad_norm": 0.0048224665224552155, "learning_rate": 7.686931384233399e-07, "loss": 0.0641, "step": 393230 }, { "epoch": 4.201506490731343, "grad_norm": 0.2144790142774582, "learning_rate": 7.686789694985073e-07, "loss": 0.0079, "step": 393240 }, { "epoch": 4.201613334045622, "grad_norm": 2.556424856185913, "learning_rate": 7.686648002703158e-07, "loss": 0.0062, "step": 393250 }, { "epoch": 4.201720177359902, "grad_norm": 3.9726035594940186, "learning_rate": 7.686506307387813e-07, "loss": 0.0035, "step": 393260 }, { "epoch": 4.201827020674181, "grad_norm": 0.00539485365152359, "learning_rate": 7.686364609039201e-07, "loss": 0.0109, "step": 393270 }, { "epoch": 4.201933863988461, "grad_norm": 0.5238673686981201, "learning_rate": 7.686222907657481e-07, "loss": 0.0234, "step": 393280 }, { "epoch": 4.20204070730274, "grad_norm": 2.7783846855163574, "learning_rate": 7.686081203242811e-07, "loss": 0.0008, "step": 393290 }, { "epoch": 4.2021475506170205, "grad_norm": 0.27447080612182617, "learning_rate": 7.685939495795354e-07, "loss": 0.0258, "step": 393300 }, { "epoch": 4.2022543939313, "grad_norm": 0.007548000197857618, "learning_rate": 7.685797785315269e-07, "loss": 0.0063, "step": 393310 }, { "epoch": 4.202361237245579, "grad_norm": 0.26175448298454285, "learning_rate": 7.685656071802713e-07, "loss": 0.0038, "step": 393320 }, { "epoch": 4.202468080559859, "grad_norm": 0.0923653393983841, "learning_rate": 7.685514355257852e-07, "loss": 0.0012, "step": 393330 }, { "epoch": 4.202574923874138, "grad_norm": 0.03245865926146507, "learning_rate": 7.68537263568084e-07, "loss": 0.0081, "step": 393340 }, { "epoch": 4.202681767188418, "grad_norm": 0.005425835959613323, "learning_rate": 7.685230913071841e-07, "loss": 0.0033, "step": 393350 }, { "epoch": 4.202788610502698, "grad_norm": 2.2286417484283447, "learning_rate": 7.685089187431014e-07, "loss": 0.0046, "step": 393360 }, { "epoch": 4.202895453816978, "grad_norm": 0.16296271979808807, "learning_rate": 7.684947458758515e-07, "loss": 0.0033, "step": 393370 }, { "epoch": 4.203002297131257, "grad_norm": 0.0008421674137935042, "learning_rate": 7.68480572705451e-07, "loss": 0.0102, "step": 393380 }, { "epoch": 4.2031091404455365, "grad_norm": 1.8344658613204956, "learning_rate": 7.684663992319158e-07, "loss": 0.0058, "step": 393390 }, { "epoch": 4.203215983759816, "grad_norm": 0.11648458242416382, "learning_rate": 7.684522254552616e-07, "loss": 0.0089, "step": 393400 }, { "epoch": 4.203322827074095, "grad_norm": 0.05600950866937637, "learning_rate": 7.684380513755046e-07, "loss": 0.0508, "step": 393410 }, { "epoch": 4.203429670388376, "grad_norm": 0.6734768152236938, "learning_rate": 7.68423876992661e-07, "loss": 0.0115, "step": 393420 }, { "epoch": 4.203536513702655, "grad_norm": 0.034693412482738495, "learning_rate": 7.684097023067464e-07, "loss": 0.0396, "step": 393430 }, { "epoch": 4.203643357016935, "grad_norm": 1.1888383626937866, "learning_rate": 7.68395527317777e-07, "loss": 0.0019, "step": 393440 }, { "epoch": 4.203750200331214, "grad_norm": 6.5384907722473145, "learning_rate": 7.683813520257689e-07, "loss": 0.012, "step": 393450 }, { "epoch": 4.203857043645494, "grad_norm": 2.2133054733276367, "learning_rate": 7.683671764307378e-07, "loss": 0.0086, "step": 393460 }, { "epoch": 4.203963886959773, "grad_norm": 2.0145232677459717, "learning_rate": 7.683530005327002e-07, "loss": 0.0043, "step": 393470 }, { "epoch": 4.204070730274053, "grad_norm": 0.08234848082065582, "learning_rate": 7.683388243316716e-07, "loss": 0.0176, "step": 393480 }, { "epoch": 4.204177573588333, "grad_norm": 0.0491643026471138, "learning_rate": 7.683246478276681e-07, "loss": 0.0036, "step": 393490 }, { "epoch": 4.204284416902612, "grad_norm": 4.179017543792725, "learning_rate": 7.68310471020706e-07, "loss": 0.0006, "step": 393500 }, { "epoch": 4.204391260216892, "grad_norm": 0.9217012524604797, "learning_rate": 7.682962939108011e-07, "loss": 0.0043, "step": 393510 }, { "epoch": 4.204498103531171, "grad_norm": 5.950328350067139, "learning_rate": 7.682821164979693e-07, "loss": 0.0105, "step": 393520 }, { "epoch": 4.204604946845451, "grad_norm": 1.114709496498108, "learning_rate": 7.68267938782227e-07, "loss": 0.0225, "step": 393530 }, { "epoch": 4.204711790159731, "grad_norm": 0.1543467938899994, "learning_rate": 7.682537607635898e-07, "loss": 0.0088, "step": 393540 }, { "epoch": 4.2048186334740105, "grad_norm": 0.0055153570137917995, "learning_rate": 7.682395824420737e-07, "loss": 0.0204, "step": 393550 }, { "epoch": 4.20492547678829, "grad_norm": 0.014277434907853603, "learning_rate": 7.682254038176952e-07, "loss": 0.0037, "step": 393560 }, { "epoch": 4.205032320102569, "grad_norm": 0.08115501701831818, "learning_rate": 7.682112248904697e-07, "loss": 0.0121, "step": 393570 }, { "epoch": 4.205139163416849, "grad_norm": 7.368921279907227, "learning_rate": 7.681970456604137e-07, "loss": 0.0221, "step": 393580 }, { "epoch": 4.205246006731128, "grad_norm": 0.021297166123986244, "learning_rate": 7.681828661275429e-07, "loss": 0.0076, "step": 393590 }, { "epoch": 4.205352850045409, "grad_norm": 0.012926657684147358, "learning_rate": 7.681686862918734e-07, "loss": 0.0246, "step": 393600 }, { "epoch": 4.205459693359688, "grad_norm": 4.320055961608887, "learning_rate": 7.681545061534211e-07, "loss": 0.0236, "step": 393610 }, { "epoch": 4.205566536673968, "grad_norm": 0.03515586629509926, "learning_rate": 7.681403257122023e-07, "loss": 0.0084, "step": 393620 }, { "epoch": 4.205673379988247, "grad_norm": 0.002803616924211383, "learning_rate": 7.681261449682329e-07, "loss": 0.0191, "step": 393630 }, { "epoch": 4.2057802233025265, "grad_norm": 0.004677711520344019, "learning_rate": 7.681119639215285e-07, "loss": 0.0045, "step": 393640 }, { "epoch": 4.205887066616807, "grad_norm": 0.36451399326324463, "learning_rate": 7.680977825721057e-07, "loss": 0.0051, "step": 393650 }, { "epoch": 4.205993909931086, "grad_norm": 0.01964174397289753, "learning_rate": 7.680836009199803e-07, "loss": 0.0033, "step": 393660 }, { "epoch": 4.206100753245366, "grad_norm": 0.2076859176158905, "learning_rate": 7.680694189651682e-07, "loss": 0.0061, "step": 393670 }, { "epoch": 4.206207596559645, "grad_norm": 0.018266040831804276, "learning_rate": 7.680552367076856e-07, "loss": 0.0108, "step": 393680 }, { "epoch": 4.206314439873925, "grad_norm": 0.011094284243881702, "learning_rate": 7.680410541475483e-07, "loss": 0.0065, "step": 393690 }, { "epoch": 4.206421283188204, "grad_norm": 0.00903439149260521, "learning_rate": 7.680268712847724e-07, "loss": 0.0122, "step": 393700 }, { "epoch": 4.2065281265024845, "grad_norm": 4.850509166717529, "learning_rate": 7.680126881193739e-07, "loss": 0.0405, "step": 393710 }, { "epoch": 4.206634969816764, "grad_norm": 2.6300108432769775, "learning_rate": 7.67998504651369e-07, "loss": 0.0018, "step": 393720 }, { "epoch": 4.2067418131310435, "grad_norm": 13.45672607421875, "learning_rate": 7.679843208807735e-07, "loss": 0.0098, "step": 393730 }, { "epoch": 4.206848656445323, "grad_norm": 4.252828121185303, "learning_rate": 7.679701368076034e-07, "loss": 0.0041, "step": 393740 }, { "epoch": 4.206955499759602, "grad_norm": 0.11039835959672928, "learning_rate": 7.679559524318749e-07, "loss": 0.0006, "step": 393750 }, { "epoch": 4.207062343073882, "grad_norm": 7.9380340576171875, "learning_rate": 7.679417677536037e-07, "loss": 0.0052, "step": 393760 }, { "epoch": 4.207169186388162, "grad_norm": 0.0024185918737202883, "learning_rate": 7.679275827728063e-07, "loss": 0.0089, "step": 393770 }, { "epoch": 4.207276029702442, "grad_norm": 0.03150700032711029, "learning_rate": 7.679133974894982e-07, "loss": 0.0017, "step": 393780 }, { "epoch": 4.207382873016721, "grad_norm": 0.00561214005574584, "learning_rate": 7.678992119036957e-07, "loss": 0.0017, "step": 393790 }, { "epoch": 4.2074897163310006, "grad_norm": 0.00844087079167366, "learning_rate": 7.678850260154149e-07, "loss": 0.0007, "step": 393800 }, { "epoch": 4.20759655964528, "grad_norm": 0.07379381358623505, "learning_rate": 7.678708398246717e-07, "loss": 0.023, "step": 393810 }, { "epoch": 4.2077034029595595, "grad_norm": 0.19212526082992554, "learning_rate": 7.678566533314821e-07, "loss": 0.0023, "step": 393820 }, { "epoch": 4.20781024627384, "grad_norm": 0.003029497805982828, "learning_rate": 7.678424665358619e-07, "loss": 0.0039, "step": 393830 }, { "epoch": 4.207917089588119, "grad_norm": 0.16166450083255768, "learning_rate": 7.678282794378276e-07, "loss": 0.0048, "step": 393840 }, { "epoch": 4.208023932902399, "grad_norm": 0.016006994992494583, "learning_rate": 7.678140920373949e-07, "loss": 0.0019, "step": 393850 }, { "epoch": 4.208130776216678, "grad_norm": 2.6900579929351807, "learning_rate": 7.677999043345798e-07, "loss": 0.0013, "step": 393860 }, { "epoch": 4.208237619530958, "grad_norm": 6.505965232849121, "learning_rate": 7.677857163293983e-07, "loss": 0.0363, "step": 393870 }, { "epoch": 4.208344462845237, "grad_norm": 0.07578223943710327, "learning_rate": 7.677715280218668e-07, "loss": 0.0035, "step": 393880 }, { "epoch": 4.2084513061595175, "grad_norm": 0.06787600368261337, "learning_rate": 7.677573394120008e-07, "loss": 0.001, "step": 393890 }, { "epoch": 4.208558149473797, "grad_norm": 0.0024673999287188053, "learning_rate": 7.677431504998167e-07, "loss": 0.0061, "step": 393900 }, { "epoch": 4.208664992788076, "grad_norm": 1.3786966800689697, "learning_rate": 7.677289612853304e-07, "loss": 0.0189, "step": 393910 }, { "epoch": 4.208771836102356, "grad_norm": 3.2562665939331055, "learning_rate": 7.677147717685577e-07, "loss": 0.0229, "step": 393920 }, { "epoch": 4.208878679416635, "grad_norm": 0.8349857926368713, "learning_rate": 7.677005819495149e-07, "loss": 0.0034, "step": 393930 }, { "epoch": 4.208985522730915, "grad_norm": 0.9714908599853516, "learning_rate": 7.67686391828218e-07, "loss": 0.0051, "step": 393940 }, { "epoch": 4.209092366045195, "grad_norm": 9.418004989624023, "learning_rate": 7.676722014046828e-07, "loss": 0.0215, "step": 393950 }, { "epoch": 4.209199209359475, "grad_norm": 5.266727447509766, "learning_rate": 7.676580106789256e-07, "loss": 0.0121, "step": 393960 }, { "epoch": 4.209306052673754, "grad_norm": 0.004601856227964163, "learning_rate": 7.676438196509624e-07, "loss": 0.009, "step": 393970 }, { "epoch": 4.2094128959880335, "grad_norm": Infinity, "learning_rate": 7.67629628320809e-07, "loss": 0.0294, "step": 393980 }, { "epoch": 4.209519739302313, "grad_norm": 12.470760345458984, "learning_rate": 7.676154366884816e-07, "loss": 0.0039, "step": 393990 }, { "epoch": 4.209626582616592, "grad_norm": 0.012872626073658466, "learning_rate": 7.676012447539961e-07, "loss": 0.005, "step": 394000 }, { "epoch": 4.209733425930873, "grad_norm": 0.08399958908557892, "learning_rate": 7.675870525173687e-07, "loss": 0.004, "step": 394010 }, { "epoch": 4.209840269245152, "grad_norm": 6.773169994354248, "learning_rate": 7.675728599786154e-07, "loss": 0.0036, "step": 394020 }, { "epoch": 4.209947112559432, "grad_norm": 0.2607724070549011, "learning_rate": 7.675586671377519e-07, "loss": 0.0143, "step": 394030 }, { "epoch": 4.210053955873711, "grad_norm": 0.0060266307555139065, "learning_rate": 7.675444739947945e-07, "loss": 0.0028, "step": 394040 }, { "epoch": 4.210160799187991, "grad_norm": 0.18872345983982086, "learning_rate": 7.675302805497594e-07, "loss": 0.0016, "step": 394050 }, { "epoch": 4.21026764250227, "grad_norm": 0.023964732885360718, "learning_rate": 7.675160868026623e-07, "loss": 0.0022, "step": 394060 }, { "epoch": 4.21037448581655, "grad_norm": 0.5886915326118469, "learning_rate": 7.675018927535193e-07, "loss": 0.0043, "step": 394070 }, { "epoch": 4.21048132913083, "grad_norm": 0.017164072021842003, "learning_rate": 7.674876984023466e-07, "loss": 0.018, "step": 394080 }, { "epoch": 4.210588172445109, "grad_norm": 0.36634665727615356, "learning_rate": 7.6747350374916e-07, "loss": 0.0218, "step": 394090 }, { "epoch": 4.210695015759389, "grad_norm": 11.112354278564453, "learning_rate": 7.674593087939757e-07, "loss": 0.0148, "step": 394100 }, { "epoch": 4.210801859073668, "grad_norm": 0.04171256348490715, "learning_rate": 7.674451135368096e-07, "loss": 0.0016, "step": 394110 }, { "epoch": 4.210908702387948, "grad_norm": 0.00659170001745224, "learning_rate": 7.674309179776778e-07, "loss": 0.0206, "step": 394120 }, { "epoch": 4.211015545702228, "grad_norm": 0.2280663251876831, "learning_rate": 7.674167221165964e-07, "loss": 0.0042, "step": 394130 }, { "epoch": 4.2111223890165075, "grad_norm": 0.0334114208817482, "learning_rate": 7.674025259535812e-07, "loss": 0.0022, "step": 394140 }, { "epoch": 4.211229232330787, "grad_norm": 0.6443389654159546, "learning_rate": 7.673883294886483e-07, "loss": 0.0042, "step": 394150 }, { "epoch": 4.211336075645066, "grad_norm": 2.7756917476654053, "learning_rate": 7.67374132721814e-07, "loss": 0.0098, "step": 394160 }, { "epoch": 4.211442918959346, "grad_norm": 1.5837026834487915, "learning_rate": 7.673599356530939e-07, "loss": 0.0022, "step": 394170 }, { "epoch": 4.211549762273625, "grad_norm": 0.07962310314178467, "learning_rate": 7.673457382825045e-07, "loss": 0.0043, "step": 394180 }, { "epoch": 4.211656605587906, "grad_norm": 0.022379256784915924, "learning_rate": 7.673315406100615e-07, "loss": 0.0163, "step": 394190 }, { "epoch": 4.211763448902185, "grad_norm": 0.004141493234783411, "learning_rate": 7.673173426357809e-07, "loss": 0.0027, "step": 394200 }, { "epoch": 4.211870292216465, "grad_norm": 0.8882807493209839, "learning_rate": 7.67303144359679e-07, "loss": 0.0029, "step": 394210 }, { "epoch": 4.211977135530744, "grad_norm": 0.6018552184104919, "learning_rate": 7.672889457817716e-07, "loss": 0.0043, "step": 394220 }, { "epoch": 4.2120839788450235, "grad_norm": 0.005007138475775719, "learning_rate": 7.672747469020749e-07, "loss": 0.0034, "step": 394230 }, { "epoch": 4.212190822159303, "grad_norm": 0.0019081021891906857, "learning_rate": 7.672605477206047e-07, "loss": 0.0034, "step": 394240 }, { "epoch": 4.212297665473583, "grad_norm": 0.41995570063591003, "learning_rate": 7.672463482373772e-07, "loss": 0.0161, "step": 394250 }, { "epoch": 4.212404508787863, "grad_norm": 3.4965627193450928, "learning_rate": 7.672321484524084e-07, "loss": 0.0057, "step": 394260 }, { "epoch": 4.212511352102142, "grad_norm": 0.016541633754968643, "learning_rate": 7.672179483657144e-07, "loss": 0.0072, "step": 394270 }, { "epoch": 4.212618195416422, "grad_norm": 0.0052418760024011135, "learning_rate": 7.672037479773112e-07, "loss": 0.0092, "step": 394280 }, { "epoch": 4.212725038730701, "grad_norm": 0.6931660175323486, "learning_rate": 7.671895472872146e-07, "loss": 0.0025, "step": 394290 }, { "epoch": 4.212831882044981, "grad_norm": 9.025506019592285, "learning_rate": 7.671753462954411e-07, "loss": 0.0302, "step": 394300 }, { "epoch": 4.212938725359261, "grad_norm": 0.421069860458374, "learning_rate": 7.671611450020063e-07, "loss": 0.0069, "step": 394310 }, { "epoch": 4.21304556867354, "grad_norm": 4.017706871032715, "learning_rate": 7.671469434069264e-07, "loss": 0.0439, "step": 394320 }, { "epoch": 4.21315241198782, "grad_norm": 0.0038686241023242474, "learning_rate": 7.671327415102176e-07, "loss": 0.0017, "step": 394330 }, { "epoch": 4.213259255302099, "grad_norm": 3.7318673133850098, "learning_rate": 7.671185393118955e-07, "loss": 0.0029, "step": 394340 }, { "epoch": 4.213366098616379, "grad_norm": 2.5784108638763428, "learning_rate": 7.671043368119766e-07, "loss": 0.0082, "step": 394350 }, { "epoch": 4.213472941930659, "grad_norm": 0.016931796446442604, "learning_rate": 7.670901340104767e-07, "loss": 0.0086, "step": 394360 }, { "epoch": 4.213579785244939, "grad_norm": 1.1831575632095337, "learning_rate": 7.670759309074119e-07, "loss": 0.007, "step": 394370 }, { "epoch": 4.213686628559218, "grad_norm": 0.02631915919482708, "learning_rate": 7.670617275027982e-07, "loss": 0.0082, "step": 394380 }, { "epoch": 4.2137934718734975, "grad_norm": 0.0400906503200531, "learning_rate": 7.670475237966516e-07, "loss": 0.0364, "step": 394390 }, { "epoch": 4.213900315187777, "grad_norm": 1.3754805326461792, "learning_rate": 7.670333197889882e-07, "loss": 0.041, "step": 394400 }, { "epoch": 4.214007158502056, "grad_norm": 2.3693079948425293, "learning_rate": 7.670191154798242e-07, "loss": 0.0395, "step": 394410 }, { "epoch": 4.214114001816336, "grad_norm": 4.198930740356445, "learning_rate": 7.670049108691753e-07, "loss": 0.031, "step": 394420 }, { "epoch": 4.214220845130616, "grad_norm": 1.5864530801773071, "learning_rate": 7.669907059570577e-07, "loss": 0.0054, "step": 394430 }, { "epoch": 4.214327688444896, "grad_norm": 0.007144265342503786, "learning_rate": 7.669765007434874e-07, "loss": 0.0159, "step": 394440 }, { "epoch": 4.214434531759175, "grad_norm": 7.894355773925781, "learning_rate": 7.669622952284806e-07, "loss": 0.0041, "step": 394450 }, { "epoch": 4.214541375073455, "grad_norm": 0.2639741003513336, "learning_rate": 7.669480894120532e-07, "loss": 0.0082, "step": 394460 }, { "epoch": 4.214648218387734, "grad_norm": 0.001943881157785654, "learning_rate": 7.669338832942213e-07, "loss": 0.0132, "step": 394470 }, { "epoch": 4.214755061702014, "grad_norm": 0.007711037993431091, "learning_rate": 7.669196768750008e-07, "loss": 0.0015, "step": 394480 }, { "epoch": 4.214861905016294, "grad_norm": 0.0023438544012606144, "learning_rate": 7.66905470154408e-07, "loss": 0.0005, "step": 394490 }, { "epoch": 4.214968748330573, "grad_norm": 0.7585315108299255, "learning_rate": 7.668912631324587e-07, "loss": 0.0032, "step": 394500 }, { "epoch": 4.215075591644853, "grad_norm": 5.878197193145752, "learning_rate": 7.66877055809169e-07, "loss": 0.0396, "step": 394510 }, { "epoch": 4.215182434959132, "grad_norm": 0.013433734886348248, "learning_rate": 7.668628481845549e-07, "loss": 0.0506, "step": 394520 }, { "epoch": 4.215289278273412, "grad_norm": 0.0019294403027743101, "learning_rate": 7.668486402586325e-07, "loss": 0.0007, "step": 394530 }, { "epoch": 4.215396121587692, "grad_norm": 7.057521343231201, "learning_rate": 7.668344320314178e-07, "loss": 0.0204, "step": 394540 }, { "epoch": 4.2155029649019715, "grad_norm": 0.04834558814764023, "learning_rate": 7.668202235029271e-07, "loss": 0.0053, "step": 394550 }, { "epoch": 4.215609808216251, "grad_norm": 0.0059014433063566685, "learning_rate": 7.668060146731761e-07, "loss": 0.0019, "step": 394560 }, { "epoch": 4.21571665153053, "grad_norm": 1.8164249658584595, "learning_rate": 7.66791805542181e-07, "loss": 0.0117, "step": 394570 }, { "epoch": 4.21582349484481, "grad_norm": 29.057157516479492, "learning_rate": 7.667775961099578e-07, "loss": 0.0151, "step": 394580 }, { "epoch": 4.215930338159089, "grad_norm": 0.09976014494895935, "learning_rate": 7.667633863765226e-07, "loss": 0.0011, "step": 394590 }, { "epoch": 4.21603718147337, "grad_norm": 2.3697688579559326, "learning_rate": 7.667491763418913e-07, "loss": 0.01, "step": 394600 }, { "epoch": 4.216144024787649, "grad_norm": 0.039759885519742966, "learning_rate": 7.667349660060801e-07, "loss": 0.0038, "step": 394610 }, { "epoch": 4.216250868101929, "grad_norm": 3.2544217109680176, "learning_rate": 7.667207553691051e-07, "loss": 0.0207, "step": 394620 }, { "epoch": 4.216357711416208, "grad_norm": 1.6129720211029053, "learning_rate": 7.66706544430982e-07, "loss": 0.0145, "step": 394630 }, { "epoch": 4.2164645547304875, "grad_norm": 0.3030223250389099, "learning_rate": 7.666923331917273e-07, "loss": 0.0042, "step": 394640 }, { "epoch": 4.216571398044767, "grad_norm": 0.018946612253785133, "learning_rate": 7.666781216513567e-07, "loss": 0.0423, "step": 394650 }, { "epoch": 4.216678241359047, "grad_norm": 0.003991764970123768, "learning_rate": 7.666639098098865e-07, "loss": 0.0234, "step": 394660 }, { "epoch": 4.216785084673327, "grad_norm": 2.0647780895233154, "learning_rate": 7.666496976673324e-07, "loss": 0.0074, "step": 394670 }, { "epoch": 4.216891927987606, "grad_norm": 0.24242495000362396, "learning_rate": 7.666354852237109e-07, "loss": 0.0007, "step": 394680 }, { "epoch": 4.216998771301886, "grad_norm": 0.17523419857025146, "learning_rate": 7.666212724790378e-07, "loss": 0.0044, "step": 394690 }, { "epoch": 4.217105614616165, "grad_norm": 0.10469064861536026, "learning_rate": 7.666070594333289e-07, "loss": 0.0027, "step": 394700 }, { "epoch": 4.217212457930445, "grad_norm": 0.0054579442366957664, "learning_rate": 7.665928460866008e-07, "loss": 0.0032, "step": 394710 }, { "epoch": 4.217319301244725, "grad_norm": 7.219315528869629, "learning_rate": 7.665786324388691e-07, "loss": 0.0119, "step": 394720 }, { "epoch": 4.217426144559004, "grad_norm": 0.002654662588611245, "learning_rate": 7.665644184901501e-07, "loss": 0.001, "step": 394730 }, { "epoch": 4.217532987873284, "grad_norm": 6.430690765380859, "learning_rate": 7.665502042404597e-07, "loss": 0.0077, "step": 394740 }, { "epoch": 4.217639831187563, "grad_norm": 0.19925779104232788, "learning_rate": 7.66535989689814e-07, "loss": 0.0073, "step": 394750 }, { "epoch": 4.217746674501843, "grad_norm": 1.7804418802261353, "learning_rate": 7.665217748382291e-07, "loss": 0.008, "step": 394760 }, { "epoch": 4.217853517816122, "grad_norm": 5.349655628204346, "learning_rate": 7.665075596857209e-07, "loss": 0.0089, "step": 394770 }, { "epoch": 4.217960361130403, "grad_norm": 0.004256955813616514, "learning_rate": 7.664933442323057e-07, "loss": 0.0268, "step": 394780 }, { "epoch": 4.218067204444682, "grad_norm": 0.02144070342183113, "learning_rate": 7.664791284779992e-07, "loss": 0.0099, "step": 394790 }, { "epoch": 4.2181740477589615, "grad_norm": 1.3373463153839111, "learning_rate": 7.664649124228178e-07, "loss": 0.0008, "step": 394800 }, { "epoch": 4.218280891073241, "grad_norm": 0.02140878699719906, "learning_rate": 7.664506960667774e-07, "loss": 0.0017, "step": 394810 }, { "epoch": 4.2183877343875205, "grad_norm": 2.0866994857788086, "learning_rate": 7.664364794098941e-07, "loss": 0.0073, "step": 394820 }, { "epoch": 4.2184945777018, "grad_norm": 0.0025774810928851366, "learning_rate": 7.664222624521836e-07, "loss": 0.0303, "step": 394830 }, { "epoch": 4.21860142101608, "grad_norm": 7.956422805786133, "learning_rate": 7.664080451936627e-07, "loss": 0.0108, "step": 394840 }, { "epoch": 4.21870826433036, "grad_norm": 0.0030068105552345514, "learning_rate": 7.663938276343467e-07, "loss": 0.0032, "step": 394850 }, { "epoch": 4.218815107644639, "grad_norm": 0.0514376237988472, "learning_rate": 7.663796097742521e-07, "loss": 0.0003, "step": 394860 }, { "epoch": 4.218921950958919, "grad_norm": 0.0015989900566637516, "learning_rate": 7.663653916133948e-07, "loss": 0.0057, "step": 394870 }, { "epoch": 4.219028794273198, "grad_norm": 0.16771185398101807, "learning_rate": 7.663511731517908e-07, "loss": 0.0058, "step": 394880 }, { "epoch": 4.219135637587478, "grad_norm": 0.021167544648051262, "learning_rate": 7.663369543894562e-07, "loss": 0.001, "step": 394890 }, { "epoch": 4.219242480901758, "grad_norm": 0.12642395496368408, "learning_rate": 7.663227353264072e-07, "loss": 0.0067, "step": 394900 }, { "epoch": 4.219349324216037, "grad_norm": 4.0547685623168945, "learning_rate": 7.663085159626597e-07, "loss": 0.0068, "step": 394910 }, { "epoch": 4.219456167530317, "grad_norm": 3.663545608520508, "learning_rate": 7.662942962982297e-07, "loss": 0.0042, "step": 394920 }, { "epoch": 4.219563010844596, "grad_norm": 0.0008073296048678458, "learning_rate": 7.662800763331334e-07, "loss": 0.0269, "step": 394930 }, { "epoch": 4.219669854158876, "grad_norm": 1.2037934064865112, "learning_rate": 7.662658560673868e-07, "loss": 0.0109, "step": 394940 }, { "epoch": 4.219776697473155, "grad_norm": 0.01116321049630642, "learning_rate": 7.66251635501006e-07, "loss": 0.0008, "step": 394950 }, { "epoch": 4.2198835407874355, "grad_norm": 0.003753067459911108, "learning_rate": 7.66237414634007e-07, "loss": 0.0011, "step": 394960 }, { "epoch": 4.219990384101715, "grad_norm": 0.639218270778656, "learning_rate": 7.662231934664058e-07, "loss": 0.0025, "step": 394970 }, { "epoch": 4.2200972274159945, "grad_norm": 0.0765102356672287, "learning_rate": 7.662089719982186e-07, "loss": 0.0025, "step": 394980 }, { "epoch": 4.220204070730274, "grad_norm": 0.0010472481371834874, "learning_rate": 7.661947502294612e-07, "loss": 0.0027, "step": 394990 }, { "epoch": 4.220310914044553, "grad_norm": 0.608363926410675, "learning_rate": 7.661805281601498e-07, "loss": 0.0045, "step": 395000 }, { "epoch": 4.220417757358833, "grad_norm": 4.397806167602539, "learning_rate": 7.661663057903008e-07, "loss": 0.0019, "step": 395010 }, { "epoch": 4.220524600673113, "grad_norm": 10.399063110351562, "learning_rate": 7.661520831199298e-07, "loss": 0.0146, "step": 395020 }, { "epoch": 4.220631443987393, "grad_norm": 0.028537947684526443, "learning_rate": 7.66137860149053e-07, "loss": 0.0001, "step": 395030 }, { "epoch": 4.220738287301672, "grad_norm": 0.005530957598239183, "learning_rate": 7.661236368776866e-07, "loss": 0.0016, "step": 395040 }, { "epoch": 4.220845130615952, "grad_norm": 0.040904659777879715, "learning_rate": 7.661094133058463e-07, "loss": 0.0011, "step": 395050 }, { "epoch": 4.220951973930231, "grad_norm": 4.689080715179443, "learning_rate": 7.660951894335486e-07, "loss": 0.0208, "step": 395060 }, { "epoch": 4.221058817244511, "grad_norm": 0.12289851903915405, "learning_rate": 7.660809652608092e-07, "loss": 0.0058, "step": 395070 }, { "epoch": 4.221165660558791, "grad_norm": 0.1205439642071724, "learning_rate": 7.660667407876444e-07, "loss": 0.0046, "step": 395080 }, { "epoch": 4.22127250387307, "grad_norm": 0.020157581195235252, "learning_rate": 7.6605251601407e-07, "loss": 0.0131, "step": 395090 }, { "epoch": 4.22137934718735, "grad_norm": 8.0469388961792, "learning_rate": 7.660382909401025e-07, "loss": 0.0077, "step": 395100 }, { "epoch": 4.221486190501629, "grad_norm": 0.0006981480401009321, "learning_rate": 7.660240655657576e-07, "loss": 0.0254, "step": 395110 }, { "epoch": 4.221593033815909, "grad_norm": 0.9356865286827087, "learning_rate": 7.660098398910513e-07, "loss": 0.0019, "step": 395120 }, { "epoch": 4.221699877130188, "grad_norm": 0.0027951905503869057, "learning_rate": 7.65995613916e-07, "loss": 0.0057, "step": 395130 }, { "epoch": 4.2218067204444685, "grad_norm": 3.556098699569702, "learning_rate": 7.659813876406194e-07, "loss": 0.0084, "step": 395140 }, { "epoch": 4.221913563758748, "grad_norm": 4.0835442543029785, "learning_rate": 7.659671610649259e-07, "loss": 0.0194, "step": 395150 }, { "epoch": 4.222020407073027, "grad_norm": 0.29298675060272217, "learning_rate": 7.659529341889354e-07, "loss": 0.0217, "step": 395160 }, { "epoch": 4.222127250387307, "grad_norm": 0.007101129274815321, "learning_rate": 7.659387070126638e-07, "loss": 0.001, "step": 395170 }, { "epoch": 4.222234093701586, "grad_norm": 0.05096633732318878, "learning_rate": 7.659244795361275e-07, "loss": 0.0096, "step": 395180 }, { "epoch": 4.222340937015867, "grad_norm": 0.06880003213882446, "learning_rate": 7.659102517593423e-07, "loss": 0.0016, "step": 395190 }, { "epoch": 4.222447780330146, "grad_norm": 0.010061043314635754, "learning_rate": 7.658960236823244e-07, "loss": 0.0178, "step": 395200 }, { "epoch": 4.222554623644426, "grad_norm": 1.8166126012802124, "learning_rate": 7.658817953050898e-07, "loss": 0.0008, "step": 395210 }, { "epoch": 4.222661466958705, "grad_norm": 0.11720433086156845, "learning_rate": 7.658675666276546e-07, "loss": 0.0165, "step": 395220 }, { "epoch": 4.2227683102729845, "grad_norm": 0.9181973338127136, "learning_rate": 7.658533376500348e-07, "loss": 0.0069, "step": 395230 }, { "epoch": 4.222875153587264, "grad_norm": 0.002656329656019807, "learning_rate": 7.658391083722467e-07, "loss": 0.0088, "step": 395240 }, { "epoch": 4.222981996901544, "grad_norm": 0.0007457900210283697, "learning_rate": 7.65824878794306e-07, "loss": 0.0124, "step": 395250 }, { "epoch": 4.223088840215824, "grad_norm": 0.010716667398810387, "learning_rate": 7.658106489162288e-07, "loss": 0.0005, "step": 395260 }, { "epoch": 4.223195683530103, "grad_norm": 0.001079845940694213, "learning_rate": 7.657964187380317e-07, "loss": 0.0232, "step": 395270 }, { "epoch": 4.223302526844383, "grad_norm": 1.4805960655212402, "learning_rate": 7.6578218825973e-07, "loss": 0.0035, "step": 395280 }, { "epoch": 4.223409370158662, "grad_norm": 8.317052841186523, "learning_rate": 7.657679574813404e-07, "loss": 0.008, "step": 395290 }, { "epoch": 4.223516213472942, "grad_norm": 2.432985782623291, "learning_rate": 7.657537264028786e-07, "loss": 0.0174, "step": 395300 }, { "epoch": 4.223623056787222, "grad_norm": 0.021075155586004257, "learning_rate": 7.657394950243608e-07, "loss": 0.0342, "step": 395310 }, { "epoch": 4.223729900101501, "grad_norm": 0.9722669124603271, "learning_rate": 7.657252633458031e-07, "loss": 0.0021, "step": 395320 }, { "epoch": 4.223836743415781, "grad_norm": 0.3954828679561615, "learning_rate": 7.657110313672215e-07, "loss": 0.0181, "step": 395330 }, { "epoch": 4.22394358673006, "grad_norm": 0.009625189006328583, "learning_rate": 7.65696799088632e-07, "loss": 0.0433, "step": 395340 }, { "epoch": 4.22405043004434, "grad_norm": 0.0016635652864351869, "learning_rate": 7.656825665100511e-07, "loss": 0.0115, "step": 395350 }, { "epoch": 4.224157273358619, "grad_norm": 0.004797323606908321, "learning_rate": 7.656683336314942e-07, "loss": 0.0037, "step": 395360 }, { "epoch": 4.2242641166729, "grad_norm": 0.009285624139010906, "learning_rate": 7.656541004529777e-07, "loss": 0.0038, "step": 395370 }, { "epoch": 4.224370959987179, "grad_norm": 0.015455584041774273, "learning_rate": 7.656398669745179e-07, "loss": 0.001, "step": 395380 }, { "epoch": 4.2244778033014585, "grad_norm": 2.42626953125, "learning_rate": 7.656256331961305e-07, "loss": 0.0044, "step": 395390 }, { "epoch": 4.224584646615738, "grad_norm": 0.13864822685718536, "learning_rate": 7.656113991178317e-07, "loss": 0.011, "step": 395400 }, { "epoch": 4.224691489930017, "grad_norm": 0.018423913046717644, "learning_rate": 7.655971647396375e-07, "loss": 0.0468, "step": 395410 }, { "epoch": 4.224798333244297, "grad_norm": 0.4690764844417572, "learning_rate": 7.655829300615644e-07, "loss": 0.0027, "step": 395420 }, { "epoch": 4.224905176558577, "grad_norm": 0.10699794441461563, "learning_rate": 7.655686950836277e-07, "loss": 0.0099, "step": 395430 }, { "epoch": 4.225012019872857, "grad_norm": 0.08766159415245056, "learning_rate": 7.655544598058441e-07, "loss": 0.0017, "step": 395440 }, { "epoch": 4.225118863187136, "grad_norm": 8.949467658996582, "learning_rate": 7.655402242282296e-07, "loss": 0.0199, "step": 395450 }, { "epoch": 4.225225706501416, "grad_norm": 3.664646863937378, "learning_rate": 7.655259883507998e-07, "loss": 0.0019, "step": 395460 }, { "epoch": 4.225332549815695, "grad_norm": 6.404634952545166, "learning_rate": 7.655117521735714e-07, "loss": 0.0115, "step": 395470 }, { "epoch": 4.2254393931299745, "grad_norm": 4.633331775665283, "learning_rate": 7.654975156965602e-07, "loss": 0.021, "step": 395480 }, { "epoch": 4.225546236444255, "grad_norm": 0.062244947999715805, "learning_rate": 7.654832789197822e-07, "loss": 0.013, "step": 395490 }, { "epoch": 4.225653079758534, "grad_norm": 0.6889945864677429, "learning_rate": 7.654690418432534e-07, "loss": 0.0046, "step": 395500 }, { "epoch": 4.225759923072814, "grad_norm": 6.539072036743164, "learning_rate": 7.654548044669902e-07, "loss": 0.0134, "step": 395510 }, { "epoch": 4.225866766387093, "grad_norm": 2.9465155601501465, "learning_rate": 7.654405667910085e-07, "loss": 0.0167, "step": 395520 }, { "epoch": 4.225973609701373, "grad_norm": 0.17309965193271637, "learning_rate": 7.654263288153242e-07, "loss": 0.0052, "step": 395530 }, { "epoch": 4.226080453015652, "grad_norm": 0.004763572942465544, "learning_rate": 7.654120905399537e-07, "loss": 0.0118, "step": 395540 }, { "epoch": 4.2261872963299325, "grad_norm": 0.967012345790863, "learning_rate": 7.653978519649129e-07, "loss": 0.0168, "step": 395550 }, { "epoch": 4.226294139644212, "grad_norm": 0.12430895864963531, "learning_rate": 7.653836130902179e-07, "loss": 0.002, "step": 395560 }, { "epoch": 4.226400982958491, "grad_norm": 0.010032718069851398, "learning_rate": 7.653693739158846e-07, "loss": 0.0007, "step": 395570 }, { "epoch": 4.226507826272771, "grad_norm": 0.014695705845952034, "learning_rate": 7.653551344419294e-07, "loss": 0.0189, "step": 395580 }, { "epoch": 4.22661466958705, "grad_norm": 0.02699512057006359, "learning_rate": 7.653408946683682e-07, "loss": 0.0036, "step": 395590 }, { "epoch": 4.22672151290133, "grad_norm": 0.04334910959005356, "learning_rate": 7.653266545952171e-07, "loss": 0.0082, "step": 395600 }, { "epoch": 4.22682835621561, "grad_norm": 0.00629466911777854, "learning_rate": 7.653124142224921e-07, "loss": 0.0041, "step": 395610 }, { "epoch": 4.22693519952989, "grad_norm": 0.02178175188601017, "learning_rate": 7.652981735502096e-07, "loss": 0.003, "step": 395620 }, { "epoch": 4.227042042844169, "grad_norm": 0.003503629006445408, "learning_rate": 7.652839325783851e-07, "loss": 0.0695, "step": 395630 }, { "epoch": 4.2271488861584485, "grad_norm": 0.04465086758136749, "learning_rate": 7.652696913070353e-07, "loss": 0.0098, "step": 395640 }, { "epoch": 4.227255729472728, "grad_norm": 1.2228683233261108, "learning_rate": 7.652554497361759e-07, "loss": 0.0107, "step": 395650 }, { "epoch": 4.2273625727870074, "grad_norm": 0.009895611554384232, "learning_rate": 7.652412078658231e-07, "loss": 0.0046, "step": 395660 }, { "epoch": 4.227469416101288, "grad_norm": 0.02209859900176525, "learning_rate": 7.652269656959929e-07, "loss": 0.0065, "step": 395670 }, { "epoch": 4.227576259415567, "grad_norm": 0.3468163013458252, "learning_rate": 7.652127232267015e-07, "loss": 0.0065, "step": 395680 }, { "epoch": 4.227683102729847, "grad_norm": 0.0065080514177680016, "learning_rate": 7.651984804579648e-07, "loss": 0.011, "step": 395690 }, { "epoch": 4.227789946044126, "grad_norm": 0.05758149176836014, "learning_rate": 7.651842373897992e-07, "loss": 0.0002, "step": 395700 }, { "epoch": 4.227896789358406, "grad_norm": 0.06144764646887779, "learning_rate": 7.651699940222204e-07, "loss": 0.0066, "step": 395710 }, { "epoch": 4.228003632672685, "grad_norm": 3.5852465629577637, "learning_rate": 7.651557503552449e-07, "loss": 0.0031, "step": 395720 }, { "epoch": 4.228110475986965, "grad_norm": 2.278022289276123, "learning_rate": 7.651415063888882e-07, "loss": 0.0104, "step": 395730 }, { "epoch": 4.228217319301245, "grad_norm": 0.0018526321509853005, "learning_rate": 7.651272621231669e-07, "loss": 0.0058, "step": 395740 }, { "epoch": 4.228324162615524, "grad_norm": 0.32722291350364685, "learning_rate": 7.65113017558097e-07, "loss": 0.0039, "step": 395750 }, { "epoch": 4.228431005929804, "grad_norm": 0.06968144327402115, "learning_rate": 7.650987726936943e-07, "loss": 0.0121, "step": 395760 }, { "epoch": 4.228537849244083, "grad_norm": 9.12574577331543, "learning_rate": 7.650845275299754e-07, "loss": 0.0115, "step": 395770 }, { "epoch": 4.228644692558364, "grad_norm": 0.004835744388401508, "learning_rate": 7.650702820669557e-07, "loss": 0.0129, "step": 395780 }, { "epoch": 4.228751535872643, "grad_norm": 0.29537397623062134, "learning_rate": 7.650560363046519e-07, "loss": 0.0049, "step": 395790 }, { "epoch": 4.2288583791869225, "grad_norm": 0.001241871272213757, "learning_rate": 7.650417902430797e-07, "loss": 0.0088, "step": 395800 }, { "epoch": 4.228965222501202, "grad_norm": 0.992767333984375, "learning_rate": 7.650275438822553e-07, "loss": 0.0056, "step": 395810 }, { "epoch": 4.2290720658154815, "grad_norm": 0.006763185374438763, "learning_rate": 7.65013297222195e-07, "loss": 0.0062, "step": 395820 }, { "epoch": 4.229178909129761, "grad_norm": 0.06274867057800293, "learning_rate": 7.649990502629144e-07, "loss": 0.0025, "step": 395830 }, { "epoch": 4.22928575244404, "grad_norm": 0.002773919375613332, "learning_rate": 7.6498480300443e-07, "loss": 0.0003, "step": 395840 }, { "epoch": 4.229392595758321, "grad_norm": 0.04743880778551102, "learning_rate": 7.649705554467578e-07, "loss": 0.0008, "step": 395850 }, { "epoch": 4.2294994390726, "grad_norm": 0.0031883285846561193, "learning_rate": 7.649563075899139e-07, "loss": 0.0018, "step": 395860 }, { "epoch": 4.22960628238688, "grad_norm": 0.007639115210622549, "learning_rate": 7.649420594339142e-07, "loss": 0.003, "step": 395870 }, { "epoch": 4.229713125701159, "grad_norm": 0.6041934490203857, "learning_rate": 7.649278109787749e-07, "loss": 0.0026, "step": 395880 }, { "epoch": 4.2298199690154386, "grad_norm": 0.005153242032974958, "learning_rate": 7.649135622245122e-07, "loss": 0.0027, "step": 395890 }, { "epoch": 4.229926812329719, "grad_norm": 0.002707601524889469, "learning_rate": 7.648993131711421e-07, "loss": 0.0096, "step": 395900 }, { "epoch": 4.230033655643998, "grad_norm": 0.0011055668583139777, "learning_rate": 7.648850638186805e-07, "loss": 0.0062, "step": 395910 }, { "epoch": 4.230140498958278, "grad_norm": 0.21760529279708862, "learning_rate": 7.648708141671438e-07, "loss": 0.001, "step": 395920 }, { "epoch": 4.230247342272557, "grad_norm": 0.014797299169003963, "learning_rate": 7.648565642165481e-07, "loss": 0.0056, "step": 395930 }, { "epoch": 4.230354185586837, "grad_norm": 0.01264578104019165, "learning_rate": 7.648423139669091e-07, "loss": 0.0156, "step": 395940 }, { "epoch": 4.230461028901116, "grad_norm": 0.03967820107936859, "learning_rate": 7.648280634182434e-07, "loss": 0.0005, "step": 395950 }, { "epoch": 4.2305678722153965, "grad_norm": 0.019372407346963882, "learning_rate": 7.648138125705666e-07, "loss": 0.0149, "step": 395960 }, { "epoch": 4.230674715529676, "grad_norm": 0.004490294493734837, "learning_rate": 7.647995614238952e-07, "loss": 0.0021, "step": 395970 }, { "epoch": 4.2307815588439555, "grad_norm": 0.07487814128398895, "learning_rate": 7.647853099782448e-07, "loss": 0.0156, "step": 395980 }, { "epoch": 4.230888402158235, "grad_norm": 0.028592439368367195, "learning_rate": 7.647710582336322e-07, "loss": 0.0027, "step": 395990 }, { "epoch": 4.230995245472514, "grad_norm": 0.0034508011303842068, "learning_rate": 7.647568061900729e-07, "loss": 0.0136, "step": 396000 }, { "epoch": 4.231102088786794, "grad_norm": 0.01893654651939869, "learning_rate": 7.647425538475831e-07, "loss": 0.0012, "step": 396010 }, { "epoch": 4.231208932101074, "grad_norm": 0.1290777176618576, "learning_rate": 7.647283012061792e-07, "loss": 0.0253, "step": 396020 }, { "epoch": 4.231315775415354, "grad_norm": 5.789013862609863, "learning_rate": 7.647140482658769e-07, "loss": 0.0055, "step": 396030 }, { "epoch": 4.231422618729633, "grad_norm": 0.5661564469337463, "learning_rate": 7.646997950266925e-07, "loss": 0.0182, "step": 396040 }, { "epoch": 4.231529462043913, "grad_norm": 0.003953718580305576, "learning_rate": 7.646855414886421e-07, "loss": 0.0011, "step": 396050 }, { "epoch": 4.231636305358192, "grad_norm": 0.006083697080612183, "learning_rate": 7.646712876517417e-07, "loss": 0.0001, "step": 396060 }, { "epoch": 4.2317431486724715, "grad_norm": 1.9050315618515015, "learning_rate": 7.646570335160074e-07, "loss": 0.013, "step": 396070 }, { "epoch": 4.231849991986752, "grad_norm": 1.6029365062713623, "learning_rate": 7.646427790814554e-07, "loss": 0.0097, "step": 396080 }, { "epoch": 4.231956835301031, "grad_norm": 0.02624713070690632, "learning_rate": 7.646285243481016e-07, "loss": 0.0007, "step": 396090 }, { "epoch": 4.232063678615311, "grad_norm": 1.273516058921814, "learning_rate": 7.646142693159624e-07, "loss": 0.0268, "step": 396100 }, { "epoch": 4.23217052192959, "grad_norm": 0.20020927488803864, "learning_rate": 7.646000139850536e-07, "loss": 0.0036, "step": 396110 }, { "epoch": 4.23227736524387, "grad_norm": 0.48616740107536316, "learning_rate": 7.645857583553914e-07, "loss": 0.0041, "step": 396120 }, { "epoch": 4.232384208558149, "grad_norm": 2.4677376747131348, "learning_rate": 7.645715024269921e-07, "loss": 0.0167, "step": 396130 }, { "epoch": 4.2324910518724295, "grad_norm": 0.0014706262154504657, "learning_rate": 7.645572461998714e-07, "loss": 0.0041, "step": 396140 }, { "epoch": 4.232597895186709, "grad_norm": 1.0404791831970215, "learning_rate": 7.645429896740458e-07, "loss": 0.0021, "step": 396150 }, { "epoch": 4.232704738500988, "grad_norm": 0.0021551093086600304, "learning_rate": 7.64528732849531e-07, "loss": 0.0055, "step": 396160 }, { "epoch": 4.232811581815268, "grad_norm": 0.013165485113859177, "learning_rate": 7.645144757263433e-07, "loss": 0.0213, "step": 396170 }, { "epoch": 4.232918425129547, "grad_norm": 0.24645330011844635, "learning_rate": 7.645002183044989e-07, "loss": 0.0361, "step": 396180 }, { "epoch": 4.233025268443827, "grad_norm": 0.8016327023506165, "learning_rate": 7.644859605840138e-07, "loss": 0.0039, "step": 396190 }, { "epoch": 4.233132111758107, "grad_norm": 0.0161354411393404, "learning_rate": 7.644717025649041e-07, "loss": 0.0002, "step": 396200 }, { "epoch": 4.233238955072387, "grad_norm": 0.06490292400121689, "learning_rate": 7.644574442471859e-07, "loss": 0.0007, "step": 396210 }, { "epoch": 4.233345798386666, "grad_norm": 0.0023350236006081104, "learning_rate": 7.644431856308751e-07, "loss": 0.0063, "step": 396220 }, { "epoch": 4.2334526417009455, "grad_norm": 1.3012357950210571, "learning_rate": 7.64428926715988e-07, "loss": 0.0038, "step": 396230 }, { "epoch": 4.233559485015225, "grad_norm": 0.0009074389236047864, "learning_rate": 7.644146675025409e-07, "loss": 0.0001, "step": 396240 }, { "epoch": 4.233666328329504, "grad_norm": 0.42550650238990784, "learning_rate": 7.644004079905495e-07, "loss": 0.014, "step": 396250 }, { "epoch": 4.233773171643785, "grad_norm": 0.3514764904975891, "learning_rate": 7.643861481800302e-07, "loss": 0.0115, "step": 396260 }, { "epoch": 4.233880014958064, "grad_norm": 0.0020140267442911863, "learning_rate": 7.643718880709989e-07, "loss": 0.0088, "step": 396270 }, { "epoch": 4.233986858272344, "grad_norm": 0.002856404986232519, "learning_rate": 7.643576276634719e-07, "loss": 0.0018, "step": 396280 }, { "epoch": 4.234093701586623, "grad_norm": 0.5976546406745911, "learning_rate": 7.643433669574651e-07, "loss": 0.0082, "step": 396290 }, { "epoch": 4.234200544900903, "grad_norm": 0.003866331884637475, "learning_rate": 7.643291059529946e-07, "loss": 0.0187, "step": 396300 }, { "epoch": 4.234307388215182, "grad_norm": 1.4440521001815796, "learning_rate": 7.643148446500767e-07, "loss": 0.0038, "step": 396310 }, { "epoch": 4.234414231529462, "grad_norm": 2.4172756671905518, "learning_rate": 7.643005830487275e-07, "loss": 0.0127, "step": 396320 }, { "epoch": 4.234521074843742, "grad_norm": 0.02620534412562847, "learning_rate": 7.642863211489628e-07, "loss": 0.0105, "step": 396330 }, { "epoch": 4.234627918158021, "grad_norm": 0.2783680856227875, "learning_rate": 7.642720589507989e-07, "loss": 0.0082, "step": 396340 }, { "epoch": 4.234734761472301, "grad_norm": 5.758061408996582, "learning_rate": 7.642577964542521e-07, "loss": 0.0074, "step": 396350 }, { "epoch": 4.23484160478658, "grad_norm": 0.019560350105166435, "learning_rate": 7.642435336593381e-07, "loss": 0.0215, "step": 396360 }, { "epoch": 4.23494844810086, "grad_norm": 7.788845539093018, "learning_rate": 7.642292705660733e-07, "loss": 0.0044, "step": 396370 }, { "epoch": 4.23505529141514, "grad_norm": 1.7457717657089233, "learning_rate": 7.642150071744737e-07, "loss": 0.0027, "step": 396380 }, { "epoch": 4.2351621347294195, "grad_norm": 0.04247933253645897, "learning_rate": 7.642007434845554e-07, "loss": 0.0002, "step": 396390 }, { "epoch": 4.235268978043699, "grad_norm": 1.5950396060943604, "learning_rate": 7.641864794963345e-07, "loss": 0.0025, "step": 396400 }, { "epoch": 4.235375821357978, "grad_norm": 0.010488263331353664, "learning_rate": 7.641722152098272e-07, "loss": 0.0036, "step": 396410 }, { "epoch": 4.235482664672258, "grad_norm": 0.506131112575531, "learning_rate": 7.641579506250495e-07, "loss": 0.0047, "step": 396420 }, { "epoch": 4.235589507986537, "grad_norm": 0.015389162115752697, "learning_rate": 7.641436857420175e-07, "loss": 0.0014, "step": 396430 }, { "epoch": 4.235696351300818, "grad_norm": 1.324682593345642, "learning_rate": 7.641294205607474e-07, "loss": 0.0132, "step": 396440 }, { "epoch": 4.235803194615097, "grad_norm": 0.5288389921188354, "learning_rate": 7.641151550812553e-07, "loss": 0.0222, "step": 396450 }, { "epoch": 4.235910037929377, "grad_norm": 0.05928431823849678, "learning_rate": 7.641008893035571e-07, "loss": 0.0145, "step": 396460 }, { "epoch": 4.236016881243656, "grad_norm": 0.9667980074882507, "learning_rate": 7.640866232276691e-07, "loss": 0.0015, "step": 396470 }, { "epoch": 4.2361237245579355, "grad_norm": 3.2037994861602783, "learning_rate": 7.640723568536075e-07, "loss": 0.0058, "step": 396480 }, { "epoch": 4.236230567872215, "grad_norm": 0.0846652016043663, "learning_rate": 7.640580901813881e-07, "loss": 0.0203, "step": 396490 }, { "epoch": 4.236337411186495, "grad_norm": 2.9354519844055176, "learning_rate": 7.640438232110273e-07, "loss": 0.0173, "step": 396500 }, { "epoch": 4.236444254500775, "grad_norm": 0.008552290499210358, "learning_rate": 7.640295559425411e-07, "loss": 0.0013, "step": 396510 }, { "epoch": 4.236551097815054, "grad_norm": 0.026851046830415726, "learning_rate": 7.640152883759455e-07, "loss": 0.0445, "step": 396520 }, { "epoch": 4.236657941129334, "grad_norm": 0.06302258372306824, "learning_rate": 7.640010205112568e-07, "loss": 0.0031, "step": 396530 }, { "epoch": 4.236764784443613, "grad_norm": 0.0619003064930439, "learning_rate": 7.639867523484911e-07, "loss": 0.0152, "step": 396540 }, { "epoch": 4.236871627757893, "grad_norm": 0.5005040168762207, "learning_rate": 7.639724838876642e-07, "loss": 0.0062, "step": 396550 }, { "epoch": 4.236978471072173, "grad_norm": 4.402096748352051, "learning_rate": 7.639582151287927e-07, "loss": 0.0053, "step": 396560 }, { "epoch": 4.237085314386452, "grad_norm": 4.961287975311279, "learning_rate": 7.639439460718923e-07, "loss": 0.0212, "step": 396570 }, { "epoch": 4.237192157700732, "grad_norm": 0.00229345983825624, "learning_rate": 7.639296767169792e-07, "loss": 0.0045, "step": 396580 }, { "epoch": 4.237299001015011, "grad_norm": 4.685508728027344, "learning_rate": 7.639154070640697e-07, "loss": 0.007, "step": 396590 }, { "epoch": 4.237405844329291, "grad_norm": 4.496304988861084, "learning_rate": 7.639011371131797e-07, "loss": 0.0201, "step": 396600 }, { "epoch": 4.237512687643571, "grad_norm": 0.00315745547413826, "learning_rate": 7.638868668643254e-07, "loss": 0.0066, "step": 396610 }, { "epoch": 4.237619530957851, "grad_norm": 1.1405342817306519, "learning_rate": 7.638725963175232e-07, "loss": 0.0029, "step": 396620 }, { "epoch": 4.23772637427213, "grad_norm": 0.5342254638671875, "learning_rate": 7.638583254727886e-07, "loss": 0.0244, "step": 396630 }, { "epoch": 4.2378332175864095, "grad_norm": 2.083566188812256, "learning_rate": 7.638440543301381e-07, "loss": 0.0068, "step": 396640 }, { "epoch": 4.237940060900689, "grad_norm": 0.019116908311843872, "learning_rate": 7.638297828895879e-07, "loss": 0.0016, "step": 396650 }, { "epoch": 4.238046904214968, "grad_norm": 0.002873007208108902, "learning_rate": 7.638155111511537e-07, "loss": 0.0073, "step": 396660 }, { "epoch": 4.238153747529249, "grad_norm": 0.0021242022048681974, "learning_rate": 7.638012391148521e-07, "loss": 0.0004, "step": 396670 }, { "epoch": 4.238260590843528, "grad_norm": 11.568470001220703, "learning_rate": 7.637869667806989e-07, "loss": 0.0185, "step": 396680 }, { "epoch": 4.238367434157808, "grad_norm": 0.01030318345874548, "learning_rate": 7.637726941487103e-07, "loss": 0.0118, "step": 396690 }, { "epoch": 4.238474277472087, "grad_norm": 0.06474197655916214, "learning_rate": 7.637584212189026e-07, "loss": 0.0375, "step": 396700 }, { "epoch": 4.238581120786367, "grad_norm": 0.013362507335841656, "learning_rate": 7.637441479912914e-07, "loss": 0.0121, "step": 396710 }, { "epoch": 4.238687964100646, "grad_norm": 3.6666712760925293, "learning_rate": 7.637298744658934e-07, "loss": 0.0025, "step": 396720 }, { "epoch": 4.238794807414926, "grad_norm": 0.5239492058753967, "learning_rate": 7.637156006427243e-07, "loss": 0.0097, "step": 396730 }, { "epoch": 4.238901650729206, "grad_norm": 0.4576348662376404, "learning_rate": 7.637013265218005e-07, "loss": 0.0113, "step": 396740 }, { "epoch": 4.239008494043485, "grad_norm": 0.017939593642950058, "learning_rate": 7.63687052103138e-07, "loss": 0.005, "step": 396750 }, { "epoch": 4.239115337357765, "grad_norm": 3.9414470195770264, "learning_rate": 7.636727773867529e-07, "loss": 0.0131, "step": 396760 }, { "epoch": 4.239222180672044, "grad_norm": 0.009762079454958439, "learning_rate": 7.636585023726614e-07, "loss": 0.0213, "step": 396770 }, { "epoch": 4.239329023986324, "grad_norm": 0.002686292864382267, "learning_rate": 7.636442270608793e-07, "loss": 0.0029, "step": 396780 }, { "epoch": 4.239435867300604, "grad_norm": 0.3923893868923187, "learning_rate": 7.636299514514232e-07, "loss": 0.0104, "step": 396790 }, { "epoch": 4.2395427106148835, "grad_norm": 0.32624754309654236, "learning_rate": 7.636156755443091e-07, "loss": 0.0023, "step": 396800 }, { "epoch": 4.239649553929163, "grad_norm": 17.919158935546875, "learning_rate": 7.636013993395527e-07, "loss": 0.0284, "step": 396810 }, { "epoch": 4.239756397243442, "grad_norm": 2.05824613571167, "learning_rate": 7.635871228371707e-07, "loss": 0.0048, "step": 396820 }, { "epoch": 4.239863240557722, "grad_norm": 1.682788610458374, "learning_rate": 7.635728460371787e-07, "loss": 0.0365, "step": 396830 }, { "epoch": 4.239970083872001, "grad_norm": 0.02070455066859722, "learning_rate": 7.635585689395932e-07, "loss": 0.0052, "step": 396840 }, { "epoch": 4.240076927186282, "grad_norm": 0.004843718372285366, "learning_rate": 7.635442915444301e-07, "loss": 0.0101, "step": 396850 }, { "epoch": 4.240183770500561, "grad_norm": 0.874336302280426, "learning_rate": 7.635300138517058e-07, "loss": 0.0016, "step": 396860 }, { "epoch": 4.240290613814841, "grad_norm": 0.010051228106021881, "learning_rate": 7.635157358614359e-07, "loss": 0.001, "step": 396870 }, { "epoch": 4.24039745712912, "grad_norm": 0.9424886703491211, "learning_rate": 7.635014575736371e-07, "loss": 0.0106, "step": 396880 }, { "epoch": 4.2405043004433995, "grad_norm": 0.16833725571632385, "learning_rate": 7.634871789883252e-07, "loss": 0.0376, "step": 396890 }, { "epoch": 4.240611143757679, "grad_norm": 0.22520270943641663, "learning_rate": 7.634729001055164e-07, "loss": 0.001, "step": 396900 }, { "epoch": 4.240717987071959, "grad_norm": 0.31660979986190796, "learning_rate": 7.634586209252267e-07, "loss": 0.0058, "step": 396910 }, { "epoch": 4.240824830386239, "grad_norm": 0.3743298053741455, "learning_rate": 7.634443414474725e-07, "loss": 0.006, "step": 396920 }, { "epoch": 4.240931673700518, "grad_norm": 0.3068218529224396, "learning_rate": 7.634300616722698e-07, "loss": 0.0029, "step": 396930 }, { "epoch": 4.241038517014798, "grad_norm": 0.39945802092552185, "learning_rate": 7.634157815996345e-07, "loss": 0.0137, "step": 396940 }, { "epoch": 4.241145360329077, "grad_norm": 0.009726476855576038, "learning_rate": 7.634015012295829e-07, "loss": 0.0001, "step": 396950 }, { "epoch": 4.241252203643357, "grad_norm": 2.9045870304107666, "learning_rate": 7.633872205621313e-07, "loss": 0.0213, "step": 396960 }, { "epoch": 4.241359046957637, "grad_norm": 0.356812447309494, "learning_rate": 7.633729395972954e-07, "loss": 0.0044, "step": 396970 }, { "epoch": 4.2414658902719165, "grad_norm": 0.07168076187372208, "learning_rate": 7.633586583350919e-07, "loss": 0.0091, "step": 396980 }, { "epoch": 4.241572733586196, "grad_norm": 0.11693967878818512, "learning_rate": 7.633443767755363e-07, "loss": 0.0154, "step": 396990 }, { "epoch": 4.241679576900475, "grad_norm": 0.012429988011717796, "learning_rate": 7.633300949186452e-07, "loss": 0.0096, "step": 397000 }, { "epoch": 4.241786420214755, "grad_norm": 3.3504810333251953, "learning_rate": 7.633158127644345e-07, "loss": 0.0145, "step": 397010 }, { "epoch": 4.241893263529034, "grad_norm": 0.024349888786673546, "learning_rate": 7.633015303129204e-07, "loss": 0.0037, "step": 397020 }, { "epoch": 4.242000106843315, "grad_norm": 0.10658209770917892, "learning_rate": 7.632872475641189e-07, "loss": 0.0026, "step": 397030 }, { "epoch": 4.242106950157594, "grad_norm": 0.021931754425168037, "learning_rate": 7.632729645180462e-07, "loss": 0.0075, "step": 397040 }, { "epoch": 4.2422137934718736, "grad_norm": 6.231561660766602, "learning_rate": 7.632586811747185e-07, "loss": 0.02, "step": 397050 }, { "epoch": 4.242320636786153, "grad_norm": 0.008505703881382942, "learning_rate": 7.632443975341521e-07, "loss": 0.0349, "step": 397060 }, { "epoch": 4.2424274801004325, "grad_norm": 0.026723548769950867, "learning_rate": 7.632301135963627e-07, "loss": 0.0129, "step": 397070 }, { "epoch": 4.242534323414712, "grad_norm": 0.059523068368434906, "learning_rate": 7.632158293613667e-07, "loss": 0.0019, "step": 397080 }, { "epoch": 4.242641166728992, "grad_norm": 0.0011782017536461353, "learning_rate": 7.6320154482918e-07, "loss": 0.0011, "step": 397090 }, { "epoch": 4.242748010043272, "grad_norm": 0.0018226179527118802, "learning_rate": 7.631872599998191e-07, "loss": 0.0061, "step": 397100 }, { "epoch": 4.242854853357551, "grad_norm": 0.4583357870578766, "learning_rate": 7.631729748732998e-07, "loss": 0.01, "step": 397110 }, { "epoch": 4.242961696671831, "grad_norm": 0.004180566873401403, "learning_rate": 7.631586894496385e-07, "loss": 0.0041, "step": 397120 }, { "epoch": 4.24306853998611, "grad_norm": 0.834861695766449, "learning_rate": 7.631444037288511e-07, "loss": 0.0019, "step": 397130 }, { "epoch": 4.24317538330039, "grad_norm": 5.368474960327148, "learning_rate": 7.631301177109536e-07, "loss": 0.0112, "step": 397140 }, { "epoch": 4.24328222661467, "grad_norm": 0.49529391527175903, "learning_rate": 7.631158313959625e-07, "loss": 0.0069, "step": 397150 }, { "epoch": 4.243389069928949, "grad_norm": 0.4245804250240326, "learning_rate": 7.631015447838939e-07, "loss": 0.0044, "step": 397160 }, { "epoch": 4.243495913243229, "grad_norm": 0.003503736574202776, "learning_rate": 7.630872578747636e-07, "loss": 0.0032, "step": 397170 }, { "epoch": 4.243602756557508, "grad_norm": 0.05834251269698143, "learning_rate": 7.63072970668588e-07, "loss": 0.003, "step": 397180 }, { "epoch": 4.243709599871788, "grad_norm": 0.13159769773483276, "learning_rate": 7.630586831653832e-07, "loss": 0.0116, "step": 397190 }, { "epoch": 4.243816443186067, "grad_norm": 0.01906994916498661, "learning_rate": 7.630443953651653e-07, "loss": 0.0008, "step": 397200 }, { "epoch": 4.243923286500348, "grad_norm": 0.009581737220287323, "learning_rate": 7.630301072679503e-07, "loss": 0.0028, "step": 397210 }, { "epoch": 4.244030129814627, "grad_norm": 0.021761517971754074, "learning_rate": 7.630158188737545e-07, "loss": 0.0077, "step": 397220 }, { "epoch": 4.2441369731289065, "grad_norm": 12.055460929870605, "learning_rate": 7.63001530182594e-07, "loss": 0.013, "step": 397230 }, { "epoch": 4.244243816443186, "grad_norm": 7.957756042480469, "learning_rate": 7.62987241194485e-07, "loss": 0.0051, "step": 397240 }, { "epoch": 4.244350659757465, "grad_norm": 11.905738830566406, "learning_rate": 7.629729519094436e-07, "loss": 0.0032, "step": 397250 }, { "epoch": 4.244457503071745, "grad_norm": 7.663640975952148, "learning_rate": 7.629586623274857e-07, "loss": 0.0046, "step": 397260 }, { "epoch": 4.244564346386025, "grad_norm": 0.09571774303913116, "learning_rate": 7.629443724486278e-07, "loss": 0.0023, "step": 397270 }, { "epoch": 4.244671189700305, "grad_norm": 0.010957524180412292, "learning_rate": 7.629300822728856e-07, "loss": 0.0098, "step": 397280 }, { "epoch": 4.244778033014584, "grad_norm": 0.20035937428474426, "learning_rate": 7.629157918002758e-07, "loss": 0.0023, "step": 397290 }, { "epoch": 4.244884876328864, "grad_norm": 6.416749477386475, "learning_rate": 7.629015010308141e-07, "loss": 0.0101, "step": 397300 }, { "epoch": 4.244991719643143, "grad_norm": 0.052894216030836105, "learning_rate": 7.628872099645167e-07, "loss": 0.0332, "step": 397310 }, { "epoch": 4.245098562957423, "grad_norm": 0.004552735947072506, "learning_rate": 7.628729186013999e-07, "loss": 0.0035, "step": 397320 }, { "epoch": 4.245205406271703, "grad_norm": 0.03537200018763542, "learning_rate": 7.628586269414797e-07, "loss": 0.0038, "step": 397330 }, { "epoch": 4.245312249585982, "grad_norm": 1.1850541830062866, "learning_rate": 7.628443349847722e-07, "loss": 0.0172, "step": 397340 }, { "epoch": 4.245419092900262, "grad_norm": 0.21401187777519226, "learning_rate": 7.628300427312937e-07, "loss": 0.012, "step": 397350 }, { "epoch": 4.245525936214541, "grad_norm": 0.015328862704336643, "learning_rate": 7.628157501810604e-07, "loss": 0.0161, "step": 397360 }, { "epoch": 4.245632779528821, "grad_norm": 0.5776556730270386, "learning_rate": 7.628014573340879e-07, "loss": 0.0052, "step": 397370 }, { "epoch": 4.2457396228431, "grad_norm": 3.5239083766937256, "learning_rate": 7.62787164190393e-07, "loss": 0.0078, "step": 397380 }, { "epoch": 4.2458464661573805, "grad_norm": 0.10556720197200775, "learning_rate": 7.627728707499916e-07, "loss": 0.0048, "step": 397390 }, { "epoch": 4.24595330947166, "grad_norm": 0.7243891954421997, "learning_rate": 7.627585770128996e-07, "loss": 0.0112, "step": 397400 }, { "epoch": 4.246060152785939, "grad_norm": 0.5679725408554077, "learning_rate": 7.627442829791335e-07, "loss": 0.0036, "step": 397410 }, { "epoch": 4.246166996100219, "grad_norm": 5.446914196014404, "learning_rate": 7.62729988648709e-07, "loss": 0.0191, "step": 397420 }, { "epoch": 4.246273839414498, "grad_norm": 4.142700672149658, "learning_rate": 7.627156940216428e-07, "loss": 0.0017, "step": 397430 }, { "epoch": 4.246380682728779, "grad_norm": 0.12168608605861664, "learning_rate": 7.627013990979507e-07, "loss": 0.0113, "step": 397440 }, { "epoch": 4.246487526043058, "grad_norm": 0.0021102672908455133, "learning_rate": 7.626871038776489e-07, "loss": 0.0017, "step": 397450 }, { "epoch": 4.246594369357338, "grad_norm": 0.0015690855216234922, "learning_rate": 7.626728083607534e-07, "loss": 0.0359, "step": 397460 }, { "epoch": 4.246701212671617, "grad_norm": 1.4274020195007324, "learning_rate": 7.626585125472807e-07, "loss": 0.0055, "step": 397470 }, { "epoch": 4.2468080559858965, "grad_norm": 6.031782150268555, "learning_rate": 7.626442164372465e-07, "loss": 0.003, "step": 397480 }, { "epoch": 4.246914899300176, "grad_norm": 0.0014720051549375057, "learning_rate": 7.626299200306672e-07, "loss": 0.008, "step": 397490 }, { "epoch": 4.247021742614456, "grad_norm": 0.6668223738670349, "learning_rate": 7.626156233275589e-07, "loss": 0.0004, "step": 397500 }, { "epoch": 4.247128585928736, "grad_norm": 0.11249096691608429, "learning_rate": 7.626013263279379e-07, "loss": 0.0169, "step": 397510 }, { "epoch": 4.247235429243015, "grad_norm": 6.525211811065674, "learning_rate": 7.625870290318199e-07, "loss": 0.0108, "step": 397520 }, { "epoch": 4.247342272557295, "grad_norm": 0.193251833319664, "learning_rate": 7.625727314392214e-07, "loss": 0.0144, "step": 397530 }, { "epoch": 4.247449115871574, "grad_norm": 0.35943785309791565, "learning_rate": 7.625584335501586e-07, "loss": 0.0092, "step": 397540 }, { "epoch": 4.247555959185854, "grad_norm": 0.07647096365690231, "learning_rate": 7.625441353646475e-07, "loss": 0.0102, "step": 397550 }, { "epoch": 4.247662802500134, "grad_norm": 2.100961208343506, "learning_rate": 7.625298368827042e-07, "loss": 0.0201, "step": 397560 }, { "epoch": 4.247769645814413, "grad_norm": 0.017723146826028824, "learning_rate": 7.625155381043449e-07, "loss": 0.0065, "step": 397570 }, { "epoch": 4.247876489128693, "grad_norm": 0.2720729410648346, "learning_rate": 7.625012390295857e-07, "loss": 0.0073, "step": 397580 }, { "epoch": 4.247983332442972, "grad_norm": 0.004514021333307028, "learning_rate": 7.62486939658443e-07, "loss": 0.0026, "step": 397590 }, { "epoch": 4.248090175757252, "grad_norm": 0.5468526482582092, "learning_rate": 7.624726399909325e-07, "loss": 0.0022, "step": 397600 }, { "epoch": 4.248197019071531, "grad_norm": 0.0322323739528656, "learning_rate": 7.624583400270707e-07, "loss": 0.0032, "step": 397610 }, { "epoch": 4.248303862385812, "grad_norm": 0.0015577708836644888, "learning_rate": 7.624440397668736e-07, "loss": 0.0103, "step": 397620 }, { "epoch": 4.248410705700091, "grad_norm": 0.058498967438936234, "learning_rate": 7.624297392103573e-07, "loss": 0.005, "step": 397630 }, { "epoch": 4.2485175490143705, "grad_norm": 0.208433136343956, "learning_rate": 7.624154383575381e-07, "loss": 0.0142, "step": 397640 }, { "epoch": 4.24862439232865, "grad_norm": 0.0019473278662189841, "learning_rate": 7.624011372084321e-07, "loss": 0.0048, "step": 397650 }, { "epoch": 4.248731235642929, "grad_norm": 0.003156440332531929, "learning_rate": 7.623868357630554e-07, "loss": 0.0101, "step": 397660 }, { "epoch": 4.248838078957209, "grad_norm": 5.593291759490967, "learning_rate": 7.623725340214241e-07, "loss": 0.0181, "step": 397670 }, { "epoch": 4.248944922271489, "grad_norm": 0.01709720678627491, "learning_rate": 7.623582319835544e-07, "loss": 0.0009, "step": 397680 }, { "epoch": 4.249051765585769, "grad_norm": 6.43753719329834, "learning_rate": 7.623439296494625e-07, "loss": 0.0051, "step": 397690 }, { "epoch": 4.249158608900048, "grad_norm": 2.319441556930542, "learning_rate": 7.623296270191644e-07, "loss": 0.0122, "step": 397700 }, { "epoch": 4.249265452214328, "grad_norm": 13.394491195678711, "learning_rate": 7.623153240926765e-07, "loss": 0.004, "step": 397710 }, { "epoch": 4.249372295528607, "grad_norm": 0.015286127105355263, "learning_rate": 7.623010208700147e-07, "loss": 0.0068, "step": 397720 }, { "epoch": 4.2494791388428865, "grad_norm": 0.27384239435195923, "learning_rate": 7.622867173511953e-07, "loss": 0.009, "step": 397730 }, { "epoch": 4.249585982157167, "grad_norm": 7.9192681312561035, "learning_rate": 7.622724135362344e-07, "loss": 0.0044, "step": 397740 }, { "epoch": 4.249692825471446, "grad_norm": 0.0036906662862747908, "learning_rate": 7.622581094251481e-07, "loss": 0.0085, "step": 397750 }, { "epoch": 4.249799668785726, "grad_norm": 4.065411567687988, "learning_rate": 7.622438050179527e-07, "loss": 0.0004, "step": 397760 }, { "epoch": 4.249906512100005, "grad_norm": 0.0037999607156962156, "learning_rate": 7.622295003146641e-07, "loss": 0.0007, "step": 397770 }, { "epoch": 4.250013355414285, "grad_norm": 4.006841659545898, "learning_rate": 7.622151953152987e-07, "loss": 0.0034, "step": 397780 }, { "epoch": 4.250120198728564, "grad_norm": 0.018608255311846733, "learning_rate": 7.622008900198726e-07, "loss": 0.0008, "step": 397790 }, { "epoch": 4.2502270420428445, "grad_norm": 0.06391764432191849, "learning_rate": 7.621865844284019e-07, "loss": 0.0198, "step": 397800 }, { "epoch": 4.250333885357124, "grad_norm": 0.9101551175117493, "learning_rate": 7.621722785409026e-07, "loss": 0.0046, "step": 397810 }, { "epoch": 4.250440728671403, "grad_norm": 2.247931957244873, "learning_rate": 7.621579723573913e-07, "loss": 0.0158, "step": 397820 }, { "epoch": 4.250547571985683, "grad_norm": 0.0025061704218387604, "learning_rate": 7.621436658778834e-07, "loss": 0.006, "step": 397830 }, { "epoch": 4.250654415299962, "grad_norm": 0.554734468460083, "learning_rate": 7.62129359102396e-07, "loss": 0.0279, "step": 397840 }, { "epoch": 4.250761258614242, "grad_norm": 0.0033607177902013063, "learning_rate": 7.621150520309445e-07, "loss": 0.001, "step": 397850 }, { "epoch": 4.250868101928522, "grad_norm": 0.020632024854421616, "learning_rate": 7.621007446635453e-07, "loss": 0.0085, "step": 397860 }, { "epoch": 4.250974945242802, "grad_norm": 1.960693359375, "learning_rate": 7.620864370002148e-07, "loss": 0.0019, "step": 397870 }, { "epoch": 4.251081788557081, "grad_norm": 0.049030601978302, "learning_rate": 7.620721290409687e-07, "loss": 0.0025, "step": 397880 }, { "epoch": 4.2511886318713605, "grad_norm": 1.8653082847595215, "learning_rate": 7.620578207858235e-07, "loss": 0.0205, "step": 397890 }, { "epoch": 4.25129547518564, "grad_norm": 0.24577468633651733, "learning_rate": 7.620435122347953e-07, "loss": 0.0136, "step": 397900 }, { "epoch": 4.2514023184999195, "grad_norm": 4.1583943367004395, "learning_rate": 7.620292033879001e-07, "loss": 0.0218, "step": 397910 }, { "epoch": 4.2515091618142, "grad_norm": 0.07031681388616562, "learning_rate": 7.620148942451542e-07, "loss": 0.0016, "step": 397920 }, { "epoch": 4.251616005128479, "grad_norm": 11.698747634887695, "learning_rate": 7.620005848065739e-07, "loss": 0.0247, "step": 397930 }, { "epoch": 4.251722848442759, "grad_norm": 0.0077460757456719875, "learning_rate": 7.619862750721748e-07, "loss": 0.0227, "step": 397940 }, { "epoch": 4.251829691757038, "grad_norm": 0.4411090016365051, "learning_rate": 7.619719650419736e-07, "loss": 0.04, "step": 397950 }, { "epoch": 4.251936535071318, "grad_norm": 0.004384760744869709, "learning_rate": 7.619576547159862e-07, "loss": 0.0116, "step": 397960 }, { "epoch": 4.252043378385597, "grad_norm": 0.026278534904122353, "learning_rate": 7.619433440942289e-07, "loss": 0.0065, "step": 397970 }, { "epoch": 4.252150221699877, "grad_norm": 0.002110003028064966, "learning_rate": 7.619290331767179e-07, "loss": 0.0841, "step": 397980 }, { "epoch": 4.252257065014157, "grad_norm": 0.008721374906599522, "learning_rate": 7.619147219634692e-07, "loss": 0.0065, "step": 397990 }, { "epoch": 4.252363908328436, "grad_norm": 0.019948529079556465, "learning_rate": 7.619004104544988e-07, "loss": 0.0071, "step": 398000 }, { "epoch": 4.252470751642716, "grad_norm": 0.009117561392486095, "learning_rate": 7.618860986498233e-07, "loss": 0.0014, "step": 398010 }, { "epoch": 4.252577594956995, "grad_norm": 0.3722696304321289, "learning_rate": 7.618717865494586e-07, "loss": 0.0056, "step": 398020 }, { "epoch": 4.252684438271276, "grad_norm": 23.22459602355957, "learning_rate": 7.618574741534207e-07, "loss": 0.0281, "step": 398030 }, { "epoch": 4.252791281585555, "grad_norm": 0.002801287453621626, "learning_rate": 7.618431614617263e-07, "loss": 0.0095, "step": 398040 }, { "epoch": 4.2528981248998345, "grad_norm": 0.03603891283273697, "learning_rate": 7.618288484743909e-07, "loss": 0.007, "step": 398050 }, { "epoch": 4.253004968214114, "grad_norm": 0.9843006730079651, "learning_rate": 7.61814535191431e-07, "loss": 0.0457, "step": 398060 }, { "epoch": 4.2531118115283935, "grad_norm": 0.001474705757573247, "learning_rate": 7.618002216128629e-07, "loss": 0.0215, "step": 398070 }, { "epoch": 4.253218654842673, "grad_norm": 0.09202462434768677, "learning_rate": 7.617859077387024e-07, "loss": 0.0009, "step": 398080 }, { "epoch": 4.253325498156952, "grad_norm": 0.009073998779058456, "learning_rate": 7.617715935689659e-07, "loss": 0.0161, "step": 398090 }, { "epoch": 4.253432341471233, "grad_norm": 0.03137202933430672, "learning_rate": 7.617572791036697e-07, "loss": 0.0123, "step": 398100 }, { "epoch": 4.253539184785512, "grad_norm": 0.0064198849722743034, "learning_rate": 7.617429643428294e-07, "loss": 0.0133, "step": 398110 }, { "epoch": 4.253646028099792, "grad_norm": 0.042599987238645554, "learning_rate": 7.617286492864619e-07, "loss": 0.0011, "step": 398120 }, { "epoch": 4.253752871414071, "grad_norm": 7.044789791107178, "learning_rate": 7.617143339345829e-07, "loss": 0.0135, "step": 398130 }, { "epoch": 4.253859714728351, "grad_norm": 28.3622989654541, "learning_rate": 7.617000182872087e-07, "loss": 0.035, "step": 398140 }, { "epoch": 4.253966558042631, "grad_norm": 2.82673716545105, "learning_rate": 7.616857023443553e-07, "loss": 0.0082, "step": 398150 }, { "epoch": 4.25407340135691, "grad_norm": 0.2092791199684143, "learning_rate": 7.616713861060391e-07, "loss": 0.0037, "step": 398160 }, { "epoch": 4.25418024467119, "grad_norm": 3.540350914001465, "learning_rate": 7.61657069572276e-07, "loss": 0.0202, "step": 398170 }, { "epoch": 4.254287087985469, "grad_norm": 3.0271661281585693, "learning_rate": 7.616427527430824e-07, "loss": 0.0197, "step": 398180 }, { "epoch": 4.254393931299749, "grad_norm": 0.02159978821873665, "learning_rate": 7.616284356184745e-07, "loss": 0.007, "step": 398190 }, { "epoch": 4.254500774614028, "grad_norm": 0.021938370540738106, "learning_rate": 7.616141181984682e-07, "loss": 0.0047, "step": 398200 }, { "epoch": 4.2546076179283085, "grad_norm": 1.1045042276382446, "learning_rate": 7.615998004830799e-07, "loss": 0.0013, "step": 398210 }, { "epoch": 4.254714461242588, "grad_norm": 1.3958864212036133, "learning_rate": 7.615854824723258e-07, "loss": 0.0042, "step": 398220 }, { "epoch": 4.2548213045568675, "grad_norm": 0.434175968170166, "learning_rate": 7.615711641662217e-07, "loss": 0.0008, "step": 398230 }, { "epoch": 4.254928147871147, "grad_norm": 0.7933242917060852, "learning_rate": 7.615568455647842e-07, "loss": 0.0051, "step": 398240 }, { "epoch": 4.255034991185426, "grad_norm": 11.70838451385498, "learning_rate": 7.615425266680291e-07, "loss": 0.0036, "step": 398250 }, { "epoch": 4.255141834499706, "grad_norm": 0.07126767188310623, "learning_rate": 7.615282074759729e-07, "loss": 0.0002, "step": 398260 }, { "epoch": 4.255248677813986, "grad_norm": 1.3432292938232422, "learning_rate": 7.615138879886316e-07, "loss": 0.0108, "step": 398270 }, { "epoch": 4.255355521128266, "grad_norm": 0.007963882759213448, "learning_rate": 7.614995682060212e-07, "loss": 0.0069, "step": 398280 }, { "epoch": 4.255462364442545, "grad_norm": 0.47183382511138916, "learning_rate": 7.614852481281584e-07, "loss": 0.0362, "step": 398290 }, { "epoch": 4.255569207756825, "grad_norm": 0.009609021246433258, "learning_rate": 7.614709277550589e-07, "loss": 0.0407, "step": 398300 }, { "epoch": 4.255676051071104, "grad_norm": 0.0018702121451497078, "learning_rate": 7.614566070867388e-07, "loss": 0.0131, "step": 398310 }, { "epoch": 4.2557828943853835, "grad_norm": 10.330482482910156, "learning_rate": 7.614422861232146e-07, "loss": 0.0096, "step": 398320 }, { "epoch": 4.255889737699664, "grad_norm": 2.0944864749908447, "learning_rate": 7.614279648645023e-07, "loss": 0.0415, "step": 398330 }, { "epoch": 4.255996581013943, "grad_norm": 0.06363822519779205, "learning_rate": 7.61413643310618e-07, "loss": 0.0037, "step": 398340 }, { "epoch": 4.256103424328223, "grad_norm": 0.005072004161775112, "learning_rate": 7.613993214615781e-07, "loss": 0.0002, "step": 398350 }, { "epoch": 4.256210267642502, "grad_norm": 0.12774236500263214, "learning_rate": 7.613849993173986e-07, "loss": 0.0067, "step": 398360 }, { "epoch": 4.256317110956782, "grad_norm": 2.379626989364624, "learning_rate": 7.613706768780956e-07, "loss": 0.0057, "step": 398370 }, { "epoch": 4.256423954271061, "grad_norm": 0.04817730933427811, "learning_rate": 7.613563541436856e-07, "loss": 0.0024, "step": 398380 }, { "epoch": 4.2565307975853415, "grad_norm": 3.2619705200195312, "learning_rate": 7.613420311141845e-07, "loss": 0.0132, "step": 398390 }, { "epoch": 4.256637640899621, "grad_norm": 0.0853818878531456, "learning_rate": 7.613277077896084e-07, "loss": 0.0235, "step": 398400 }, { "epoch": 4.2567444842139, "grad_norm": 0.0042652166448533535, "learning_rate": 7.613133841699738e-07, "loss": 0.0019, "step": 398410 }, { "epoch": 4.25685132752818, "grad_norm": 0.003030820284038782, "learning_rate": 7.612990602552964e-07, "loss": 0.0047, "step": 398420 }, { "epoch": 4.256958170842459, "grad_norm": 5.128447532653809, "learning_rate": 7.612847360455927e-07, "loss": 0.0144, "step": 398430 }, { "epoch": 4.257065014156739, "grad_norm": 25.170970916748047, "learning_rate": 7.612704115408788e-07, "loss": 0.0472, "step": 398440 }, { "epoch": 4.257171857471019, "grad_norm": 0.6529116630554199, "learning_rate": 7.61256086741171e-07, "loss": 0.0102, "step": 398450 }, { "epoch": 4.257278700785299, "grad_norm": 3.2304675579071045, "learning_rate": 7.612417616464854e-07, "loss": 0.008, "step": 398460 }, { "epoch": 4.257385544099578, "grad_norm": 0.10027903318405151, "learning_rate": 7.612274362568381e-07, "loss": 0.0101, "step": 398470 }, { "epoch": 4.2574923874138575, "grad_norm": 1.1541352272033691, "learning_rate": 7.612131105722451e-07, "loss": 0.0033, "step": 398480 }, { "epoch": 4.257599230728137, "grad_norm": 1.9652457237243652, "learning_rate": 7.61198784592723e-07, "loss": 0.0045, "step": 398490 }, { "epoch": 4.257706074042416, "grad_norm": 10.517887115478516, "learning_rate": 7.611844583182878e-07, "loss": 0.0038, "step": 398500 }, { "epoch": 4.257812917356697, "grad_norm": 0.0061643472872674465, "learning_rate": 7.611701317489554e-07, "loss": 0.0034, "step": 398510 }, { "epoch": 4.257919760670976, "grad_norm": 0.0012917325366288424, "learning_rate": 7.611558048847424e-07, "loss": 0.0021, "step": 398520 }, { "epoch": 4.258026603985256, "grad_norm": 2.070352554321289, "learning_rate": 7.611414777256649e-07, "loss": 0.0309, "step": 398530 }, { "epoch": 4.258133447299535, "grad_norm": 0.007390971761196852, "learning_rate": 7.611271502717387e-07, "loss": 0.0023, "step": 398540 }, { "epoch": 4.258240290613815, "grad_norm": 0.4304808974266052, "learning_rate": 7.611128225229804e-07, "loss": 0.0085, "step": 398550 }, { "epoch": 4.258347133928094, "grad_norm": 0.014785807579755783, "learning_rate": 7.610984944794059e-07, "loss": 0.0065, "step": 398560 }, { "epoch": 4.258453977242374, "grad_norm": 0.08636321872472763, "learning_rate": 7.610841661410315e-07, "loss": 0.0287, "step": 398570 }, { "epoch": 4.258560820556654, "grad_norm": 0.00541886780411005, "learning_rate": 7.610698375078735e-07, "loss": 0.0037, "step": 398580 }, { "epoch": 4.258667663870933, "grad_norm": 2.0278711318969727, "learning_rate": 7.610555085799479e-07, "loss": 0.0053, "step": 398590 }, { "epoch": 4.258774507185213, "grad_norm": 0.0006864091847091913, "learning_rate": 7.610411793572707e-07, "loss": 0.0429, "step": 398600 }, { "epoch": 4.258881350499492, "grad_norm": 0.0034257492516189814, "learning_rate": 7.610268498398587e-07, "loss": 0.0008, "step": 398610 }, { "epoch": 4.258988193813772, "grad_norm": 1.6492488384246826, "learning_rate": 7.610125200277274e-07, "loss": 0.0012, "step": 398620 }, { "epoch": 4.259095037128052, "grad_norm": 0.006960407365113497, "learning_rate": 7.609981899208933e-07, "loss": 0.0025, "step": 398630 }, { "epoch": 4.2592018804423315, "grad_norm": 0.025148089975118637, "learning_rate": 7.609838595193728e-07, "loss": 0.0012, "step": 398640 }, { "epoch": 4.259308723756611, "grad_norm": 2.076199769973755, "learning_rate": 7.609695288231815e-07, "loss": 0.0207, "step": 398650 }, { "epoch": 4.25941556707089, "grad_norm": 1.5322569608688354, "learning_rate": 7.609551978323361e-07, "loss": 0.0175, "step": 398660 }, { "epoch": 4.25952241038517, "grad_norm": 8.798921585083008, "learning_rate": 7.609408665468524e-07, "loss": 0.0118, "step": 398670 }, { "epoch": 4.259629253699449, "grad_norm": 0.0378790982067585, "learning_rate": 7.609265349667469e-07, "loss": 0.0053, "step": 398680 }, { "epoch": 4.25973609701373, "grad_norm": 0.07242432236671448, "learning_rate": 7.609122030920356e-07, "loss": 0.0811, "step": 398690 }, { "epoch": 4.259842940328009, "grad_norm": 0.039632365107536316, "learning_rate": 7.608978709227348e-07, "loss": 0.0061, "step": 398700 }, { "epoch": 4.259949783642289, "grad_norm": 26.279306411743164, "learning_rate": 7.608835384588605e-07, "loss": 0.0122, "step": 398710 }, { "epoch": 4.260056626956568, "grad_norm": 0.21866914629936218, "learning_rate": 7.608692057004291e-07, "loss": 0.0064, "step": 398720 }, { "epoch": 4.2601634702708475, "grad_norm": 0.09832513332366943, "learning_rate": 7.608548726474568e-07, "loss": 0.0018, "step": 398730 }, { "epoch": 4.260270313585128, "grad_norm": 0.0030320819932967424, "learning_rate": 7.608405392999594e-07, "loss": 0.0019, "step": 398740 }, { "epoch": 4.260377156899407, "grad_norm": 2.6713132858276367, "learning_rate": 7.608262056579536e-07, "loss": 0.0105, "step": 398750 }, { "epoch": 4.260484000213687, "grad_norm": 1.3580381870269775, "learning_rate": 7.608118717214551e-07, "loss": 0.0158, "step": 398760 }, { "epoch": 4.260590843527966, "grad_norm": 0.03676991164684296, "learning_rate": 7.607975374904804e-07, "loss": 0.0239, "step": 398770 }, { "epoch": 4.260697686842246, "grad_norm": 0.22446046769618988, "learning_rate": 7.607832029650455e-07, "loss": 0.0038, "step": 398780 }, { "epoch": 4.260804530156525, "grad_norm": 0.010108635760843754, "learning_rate": 7.607688681451669e-07, "loss": 0.0119, "step": 398790 }, { "epoch": 4.260911373470805, "grad_norm": 1.2264258861541748, "learning_rate": 7.607545330308603e-07, "loss": 0.0081, "step": 398800 }, { "epoch": 4.261018216785085, "grad_norm": 4.628786087036133, "learning_rate": 7.607401976221424e-07, "loss": 0.0039, "step": 398810 }, { "epoch": 4.261125060099364, "grad_norm": 2.47037935256958, "learning_rate": 7.60725861919029e-07, "loss": 0.002, "step": 398820 }, { "epoch": 4.261231903413644, "grad_norm": 0.6702806949615479, "learning_rate": 7.607115259215364e-07, "loss": 0.0053, "step": 398830 }, { "epoch": 4.261338746727923, "grad_norm": 0.04214269667863846, "learning_rate": 7.606971896296808e-07, "loss": 0.0037, "step": 398840 }, { "epoch": 4.261445590042203, "grad_norm": 0.2153254896402359, "learning_rate": 7.606828530434786e-07, "loss": 0.008, "step": 398850 }, { "epoch": 4.261552433356483, "grad_norm": 0.15687566995620728, "learning_rate": 7.606685161629455e-07, "loss": 0.0057, "step": 398860 }, { "epoch": 4.261659276670763, "grad_norm": 0.028696706518530846, "learning_rate": 7.606541789880982e-07, "loss": 0.0034, "step": 398870 }, { "epoch": 4.261766119985042, "grad_norm": 0.02839004062116146, "learning_rate": 7.606398415189526e-07, "loss": 0.0271, "step": 398880 }, { "epoch": 4.2618729632993215, "grad_norm": 2.4931015968322754, "learning_rate": 7.60625503755525e-07, "loss": 0.0055, "step": 398890 }, { "epoch": 4.261979806613601, "grad_norm": 0.26720955967903137, "learning_rate": 7.606111656978315e-07, "loss": 0.014, "step": 398900 }, { "epoch": 4.26208664992788, "grad_norm": 0.004538699518889189, "learning_rate": 7.605968273458881e-07, "loss": 0.0065, "step": 398910 }, { "epoch": 4.26219349324216, "grad_norm": 0.19847139716148376, "learning_rate": 7.605824886997115e-07, "loss": 0.0342, "step": 398920 }, { "epoch": 4.26230033655644, "grad_norm": 0.22840771079063416, "learning_rate": 7.605681497593174e-07, "loss": 0.0044, "step": 398930 }, { "epoch": 4.26240717987072, "grad_norm": 3.8570733070373535, "learning_rate": 7.605538105247224e-07, "loss": 0.0195, "step": 398940 }, { "epoch": 4.262514023184999, "grad_norm": 0.05746367201209068, "learning_rate": 7.605394709959424e-07, "loss": 0.0212, "step": 398950 }, { "epoch": 4.262620866499279, "grad_norm": 0.8619394302368164, "learning_rate": 7.605251311729936e-07, "loss": 0.0216, "step": 398960 }, { "epoch": 4.262727709813558, "grad_norm": 0.2515241503715515, "learning_rate": 7.605107910558924e-07, "loss": 0.0086, "step": 398970 }, { "epoch": 4.262834553127838, "grad_norm": 0.1261787861585617, "learning_rate": 7.604964506446548e-07, "loss": 0.0028, "step": 398980 }, { "epoch": 4.262941396442118, "grad_norm": 0.9409259557723999, "learning_rate": 7.60482109939297e-07, "loss": 0.0168, "step": 398990 }, { "epoch": 4.263048239756397, "grad_norm": 0.02350718528032303, "learning_rate": 7.604677689398352e-07, "loss": 0.008, "step": 399000 }, { "epoch": 4.263155083070677, "grad_norm": 3.510488748550415, "learning_rate": 7.604534276462857e-07, "loss": 0.0103, "step": 399010 }, { "epoch": 4.263261926384956, "grad_norm": 0.005461486056447029, "learning_rate": 7.604390860586646e-07, "loss": 0.0176, "step": 399020 }, { "epoch": 4.263368769699236, "grad_norm": 0.7580599784851074, "learning_rate": 7.604247441769882e-07, "loss": 0.0012, "step": 399030 }, { "epoch": 4.263475613013516, "grad_norm": 0.005354313645511866, "learning_rate": 7.604104020012724e-07, "loss": 0.0011, "step": 399040 }, { "epoch": 4.2635824563277955, "grad_norm": 0.210972860455513, "learning_rate": 7.603960595315338e-07, "loss": 0.0053, "step": 399050 }, { "epoch": 4.263689299642075, "grad_norm": 0.0013954390306025743, "learning_rate": 7.603817167677884e-07, "loss": 0.0712, "step": 399060 }, { "epoch": 4.2637961429563545, "grad_norm": 0.058414276689291, "learning_rate": 7.603673737100522e-07, "loss": 0.0184, "step": 399070 }, { "epoch": 4.263902986270634, "grad_norm": 1.9106539487838745, "learning_rate": 7.603530303583419e-07, "loss": 0.0245, "step": 399080 }, { "epoch": 4.264009829584913, "grad_norm": 3.7842438220977783, "learning_rate": 7.603386867126731e-07, "loss": 0.0048, "step": 399090 }, { "epoch": 4.264116672899194, "grad_norm": 1.7317790985107422, "learning_rate": 7.603243427730624e-07, "loss": 0.0009, "step": 399100 }, { "epoch": 4.264223516213473, "grad_norm": 0.001611544517800212, "learning_rate": 7.603099985395257e-07, "loss": 0.0024, "step": 399110 }, { "epoch": 4.264330359527753, "grad_norm": 0.02650858648121357, "learning_rate": 7.602956540120795e-07, "loss": 0.0099, "step": 399120 }, { "epoch": 4.264437202842032, "grad_norm": 0.004244997166097164, "learning_rate": 7.602813091907399e-07, "loss": 0.0004, "step": 399130 }, { "epoch": 4.2645440461563116, "grad_norm": 0.02382054552435875, "learning_rate": 7.60266964075523e-07, "loss": 0.0129, "step": 399140 }, { "epoch": 4.264650889470591, "grad_norm": 0.08595294505357742, "learning_rate": 7.602526186664451e-07, "loss": 0.0046, "step": 399150 }, { "epoch": 4.264757732784871, "grad_norm": 0.22417083382606506, "learning_rate": 7.602382729635224e-07, "loss": 0.0117, "step": 399160 }, { "epoch": 4.264864576099151, "grad_norm": 0.522284746170044, "learning_rate": 7.602239269667708e-07, "loss": 0.0212, "step": 399170 }, { "epoch": 4.26497141941343, "grad_norm": 0.027179554104804993, "learning_rate": 7.60209580676207e-07, "loss": 0.0008, "step": 399180 }, { "epoch": 4.26507826272771, "grad_norm": 52.555057525634766, "learning_rate": 7.60195234091847e-07, "loss": 0.0497, "step": 399190 }, { "epoch": 4.265185106041989, "grad_norm": 0.011232140474021435, "learning_rate": 7.601808872137067e-07, "loss": 0.0124, "step": 399200 }, { "epoch": 4.265291949356269, "grad_norm": 0.054168451577425, "learning_rate": 7.601665400418027e-07, "loss": 0.0175, "step": 399210 }, { "epoch": 4.265398792670549, "grad_norm": 0.28220134973526, "learning_rate": 7.601521925761509e-07, "loss": 0.0123, "step": 399220 }, { "epoch": 4.2655056359848285, "grad_norm": 0.0388355478644371, "learning_rate": 7.601378448167677e-07, "loss": 0.0148, "step": 399230 }, { "epoch": 4.265612479299108, "grad_norm": 0.06416051834821701, "learning_rate": 7.601234967636694e-07, "loss": 0.0097, "step": 399240 }, { "epoch": 4.265719322613387, "grad_norm": 0.004169882740825415, "learning_rate": 7.601091484168718e-07, "loss": 0.0051, "step": 399250 }, { "epoch": 4.265826165927667, "grad_norm": 0.010018454864621162, "learning_rate": 7.600947997763915e-07, "loss": 0.0043, "step": 399260 }, { "epoch": 4.265933009241946, "grad_norm": 2.202174425125122, "learning_rate": 7.600804508422445e-07, "loss": 0.0075, "step": 399270 }, { "epoch": 4.266039852556227, "grad_norm": 0.061463724821805954, "learning_rate": 7.600661016144469e-07, "loss": 0.0077, "step": 399280 }, { "epoch": 4.266146695870506, "grad_norm": 0.10591285675764084, "learning_rate": 7.600517520930152e-07, "loss": 0.0063, "step": 399290 }, { "epoch": 4.266253539184786, "grad_norm": 0.9744676351547241, "learning_rate": 7.600374022779654e-07, "loss": 0.0071, "step": 399300 }, { "epoch": 4.266360382499065, "grad_norm": 0.051346901804208755, "learning_rate": 7.600230521693138e-07, "loss": 0.0011, "step": 399310 }, { "epoch": 4.2664672258133445, "grad_norm": 0.08031553775072098, "learning_rate": 7.600087017670766e-07, "loss": 0.0173, "step": 399320 }, { "epoch": 4.266574069127624, "grad_norm": 0.0025404735933989286, "learning_rate": 7.599943510712699e-07, "loss": 0.0293, "step": 399330 }, { "epoch": 4.266680912441904, "grad_norm": 4.048113822937012, "learning_rate": 7.599800000819099e-07, "loss": 0.0136, "step": 399340 }, { "epoch": 4.266787755756184, "grad_norm": 1.3070266246795654, "learning_rate": 7.599656487990128e-07, "loss": 0.0028, "step": 399350 }, { "epoch": 4.266894599070463, "grad_norm": 5.2151899337768555, "learning_rate": 7.59951297222595e-07, "loss": 0.0078, "step": 399360 }, { "epoch": 4.267001442384743, "grad_norm": 0.0490100271999836, "learning_rate": 7.599369453526725e-07, "loss": 0.0034, "step": 399370 }, { "epoch": 4.267108285699022, "grad_norm": 1.3621044158935547, "learning_rate": 7.599225931892616e-07, "loss": 0.0008, "step": 399380 }, { "epoch": 4.267215129013302, "grad_norm": 0.40913882851600647, "learning_rate": 7.599082407323784e-07, "loss": 0.0027, "step": 399390 }, { "epoch": 4.267321972327582, "grad_norm": 0.10736037790775299, "learning_rate": 7.598938879820393e-07, "loss": 0.0212, "step": 399400 }, { "epoch": 4.267428815641861, "grad_norm": 0.3745041489601135, "learning_rate": 7.598795349382602e-07, "loss": 0.0045, "step": 399410 }, { "epoch": 4.267535658956141, "grad_norm": 4.107534885406494, "learning_rate": 7.598651816010576e-07, "loss": 0.0116, "step": 399420 }, { "epoch": 4.26764250227042, "grad_norm": 0.10965629667043686, "learning_rate": 7.598508279704476e-07, "loss": 0.016, "step": 399430 }, { "epoch": 4.2677493455847, "grad_norm": 0.40839681029319763, "learning_rate": 7.598364740464465e-07, "loss": 0.0067, "step": 399440 }, { "epoch": 4.26785618889898, "grad_norm": 0.011485300958156586, "learning_rate": 7.598221198290702e-07, "loss": 0.0075, "step": 399450 }, { "epoch": 4.26796303221326, "grad_norm": 0.01492274645715952, "learning_rate": 7.598077653183352e-07, "loss": 0.0016, "step": 399460 }, { "epoch": 4.268069875527539, "grad_norm": 6.347409248352051, "learning_rate": 7.597934105142578e-07, "loss": 0.0345, "step": 399470 }, { "epoch": 4.2681767188418185, "grad_norm": 10.553491592407227, "learning_rate": 7.597790554168538e-07, "loss": 0.0206, "step": 399480 }, { "epoch": 4.268283562156098, "grad_norm": 0.0023686010390520096, "learning_rate": 7.597647000261398e-07, "loss": 0.0007, "step": 399490 }, { "epoch": 4.268390405470377, "grad_norm": 0.024222357198596, "learning_rate": 7.597503443421318e-07, "loss": 0.0055, "step": 399500 }, { "epoch": 4.268497248784657, "grad_norm": 0.00497430469840765, "learning_rate": 7.59735988364846e-07, "loss": 0.0017, "step": 399510 }, { "epoch": 4.268604092098937, "grad_norm": 2.2389895915985107, "learning_rate": 7.597216320942987e-07, "loss": 0.004, "step": 399520 }, { "epoch": 4.268710935413217, "grad_norm": 3.1821117401123047, "learning_rate": 7.597072755305061e-07, "loss": 0.0055, "step": 399530 }, { "epoch": 4.268817778727496, "grad_norm": 0.005470595322549343, "learning_rate": 7.596929186734843e-07, "loss": 0.0056, "step": 399540 }, { "epoch": 4.268924622041776, "grad_norm": 0.08428271114826202, "learning_rate": 7.596785615232497e-07, "loss": 0.0023, "step": 399550 }, { "epoch": 4.269031465356055, "grad_norm": 0.0014086394803598523, "learning_rate": 7.596642040798184e-07, "loss": 0.0144, "step": 399560 }, { "epoch": 4.269138308670335, "grad_norm": 0.02296999655663967, "learning_rate": 7.596498463432065e-07, "loss": 0.0034, "step": 399570 }, { "epoch": 4.269245151984615, "grad_norm": 2.19396710395813, "learning_rate": 7.596354883134303e-07, "loss": 0.0083, "step": 399580 }, { "epoch": 4.269351995298894, "grad_norm": 0.0036864595022052526, "learning_rate": 7.596211299905063e-07, "loss": 0.0095, "step": 399590 }, { "epoch": 4.269458838613174, "grad_norm": 12.851062774658203, "learning_rate": 7.596067713744503e-07, "loss": 0.0822, "step": 399600 }, { "epoch": 4.269565681927453, "grad_norm": 1.8184126615524292, "learning_rate": 7.595924124652786e-07, "loss": 0.0017, "step": 399610 }, { "epoch": 4.269672525241733, "grad_norm": 0.009663719683885574, "learning_rate": 7.595780532630077e-07, "loss": 0.0003, "step": 399620 }, { "epoch": 4.269779368556012, "grad_norm": 0.8583738803863525, "learning_rate": 7.595636937676533e-07, "loss": 0.0159, "step": 399630 }, { "epoch": 4.2698862118702925, "grad_norm": 0.004407316446304321, "learning_rate": 7.59549333979232e-07, "loss": 0.0071, "step": 399640 }, { "epoch": 4.269993055184572, "grad_norm": 0.1494426280260086, "learning_rate": 7.595349738977599e-07, "loss": 0.0159, "step": 399650 }, { "epoch": 4.270099898498851, "grad_norm": 0.007551307324320078, "learning_rate": 7.595206135232532e-07, "loss": 0.0028, "step": 399660 }, { "epoch": 4.270206741813131, "grad_norm": 0.03030531480908394, "learning_rate": 7.595062528557284e-07, "loss": 0.0208, "step": 399670 }, { "epoch": 4.27031358512741, "grad_norm": 8.978490829467773, "learning_rate": 7.594918918952012e-07, "loss": 0.0235, "step": 399680 }, { "epoch": 4.270420428441691, "grad_norm": 0.07845981419086456, "learning_rate": 7.59477530641688e-07, "loss": 0.0099, "step": 399690 }, { "epoch": 4.27052727175597, "grad_norm": 1.07150137424469, "learning_rate": 7.594631690952051e-07, "loss": 0.0007, "step": 399700 }, { "epoch": 4.27063411507025, "grad_norm": 2.448184013366699, "learning_rate": 7.59448807255769e-07, "loss": 0.0173, "step": 399710 }, { "epoch": 4.270740958384529, "grad_norm": 0.26053181290626526, "learning_rate": 7.594344451233953e-07, "loss": 0.0056, "step": 399720 }, { "epoch": 4.2708478016988085, "grad_norm": 1.90694260597229, "learning_rate": 7.594200826981007e-07, "loss": 0.0065, "step": 399730 }, { "epoch": 4.270954645013088, "grad_norm": 8.885443687438965, "learning_rate": 7.594057199799011e-07, "loss": 0.008, "step": 399740 }, { "epoch": 4.271061488327368, "grad_norm": 0.003678524401038885, "learning_rate": 7.593913569688131e-07, "loss": 0.0057, "step": 399750 }, { "epoch": 4.271168331641648, "grad_norm": 0.007550002541393042, "learning_rate": 7.593769936648525e-07, "loss": 0.0038, "step": 399760 }, { "epoch": 4.271275174955927, "grad_norm": 3.986569881439209, "learning_rate": 7.593626300680356e-07, "loss": 0.0119, "step": 399770 }, { "epoch": 4.271382018270207, "grad_norm": 0.4190768003463745, "learning_rate": 7.59348266178379e-07, "loss": 0.0027, "step": 399780 }, { "epoch": 4.271488861584486, "grad_norm": 0.047793205827474594, "learning_rate": 7.593339019958985e-07, "loss": 0.0012, "step": 399790 }, { "epoch": 4.271595704898766, "grad_norm": 2.6609768867492676, "learning_rate": 7.593195375206104e-07, "loss": 0.0127, "step": 399800 }, { "epoch": 4.271702548213046, "grad_norm": 0.0011251077521592379, "learning_rate": 7.593051727525311e-07, "loss": 0.0119, "step": 399810 }, { "epoch": 4.271809391527325, "grad_norm": 0.3547584116458893, "learning_rate": 7.592908076916764e-07, "loss": 0.0128, "step": 399820 }, { "epoch": 4.271916234841605, "grad_norm": 4.395890235900879, "learning_rate": 7.592764423380631e-07, "loss": 0.0392, "step": 399830 }, { "epoch": 4.272023078155884, "grad_norm": 4.526460647583008, "learning_rate": 7.592620766917071e-07, "loss": 0.0033, "step": 399840 }, { "epoch": 4.272129921470164, "grad_norm": 0.10655809938907623, "learning_rate": 7.592477107526245e-07, "loss": 0.0163, "step": 399850 }, { "epoch": 4.272236764784443, "grad_norm": 1.16184401512146, "learning_rate": 7.592333445208319e-07, "loss": 0.0078, "step": 399860 }, { "epoch": 4.272343608098724, "grad_norm": 8.915056228637695, "learning_rate": 7.592189779963451e-07, "loss": 0.0048, "step": 399870 }, { "epoch": 4.272450451413003, "grad_norm": 0.0010090383002534509, "learning_rate": 7.592046111791806e-07, "loss": 0.0087, "step": 399880 }, { "epoch": 4.2725572947272825, "grad_norm": 0.5974107384681702, "learning_rate": 7.591902440693544e-07, "loss": 0.015, "step": 399890 }, { "epoch": 4.272664138041562, "grad_norm": 9.669596672058105, "learning_rate": 7.59175876666883e-07, "loss": 0.0181, "step": 399900 }, { "epoch": 4.272770981355841, "grad_norm": 0.5730969309806824, "learning_rate": 7.591615089717824e-07, "loss": 0.0078, "step": 399910 }, { "epoch": 4.272877824670121, "grad_norm": 0.33143097162246704, "learning_rate": 7.591471409840689e-07, "loss": 0.0064, "step": 399920 }, { "epoch": 4.272984667984401, "grad_norm": 0.003058978822082281, "learning_rate": 7.591327727037587e-07, "loss": 0.0029, "step": 399930 }, { "epoch": 4.273091511298681, "grad_norm": 0.007281247992068529, "learning_rate": 7.59118404130868e-07, "loss": 0.0233, "step": 399940 }, { "epoch": 4.27319835461296, "grad_norm": 0.0188627727329731, "learning_rate": 7.591040352654132e-07, "loss": 0.0021, "step": 399950 }, { "epoch": 4.27330519792724, "grad_norm": 0.05210070684552193, "learning_rate": 7.590896661074104e-07, "loss": 0.0432, "step": 399960 }, { "epoch": 4.273412041241519, "grad_norm": 0.004948102869093418, "learning_rate": 7.590752966568757e-07, "loss": 0.0016, "step": 399970 }, { "epoch": 4.2735188845557985, "grad_norm": 0.005560028832405806, "learning_rate": 7.590609269138255e-07, "loss": 0.0021, "step": 399980 }, { "epoch": 4.273625727870079, "grad_norm": 0.00399928679689765, "learning_rate": 7.590465568782758e-07, "loss": 0.0049, "step": 399990 }, { "epoch": 4.273732571184358, "grad_norm": 1.755591630935669, "learning_rate": 7.590321865502431e-07, "loss": 0.0027, "step": 400000 }, { "epoch": 4.273839414498638, "grad_norm": 8.441557884216309, "learning_rate": 7.590178159297436e-07, "loss": 0.0295, "step": 400010 }, { "epoch": 4.273946257812917, "grad_norm": 0.072533018887043, "learning_rate": 7.590034450167933e-07, "loss": 0.0059, "step": 400020 }, { "epoch": 4.274053101127197, "grad_norm": 0.006098134443163872, "learning_rate": 7.589890738114087e-07, "loss": 0.0203, "step": 400030 }, { "epoch": 4.274159944441476, "grad_norm": 0.006180678494274616, "learning_rate": 7.589747023136057e-07, "loss": 0.0078, "step": 400040 }, { "epoch": 4.2742667877557565, "grad_norm": 7.060398101806641, "learning_rate": 7.589603305234009e-07, "loss": 0.087, "step": 400050 }, { "epoch": 4.274373631070036, "grad_norm": 0.008632393553853035, "learning_rate": 7.589459584408102e-07, "loss": 0.0136, "step": 400060 }, { "epoch": 4.274480474384315, "grad_norm": 0.28265589475631714, "learning_rate": 7.589315860658501e-07, "loss": 0.0033, "step": 400070 }, { "epoch": 4.274587317698595, "grad_norm": 0.002174829365685582, "learning_rate": 7.589172133985366e-07, "loss": 0.0021, "step": 400080 }, { "epoch": 4.274694161012874, "grad_norm": 0.06360223144292831, "learning_rate": 7.589028404388862e-07, "loss": 0.0028, "step": 400090 }, { "epoch": 4.274801004327154, "grad_norm": 0.006185389123857021, "learning_rate": 7.588884671869148e-07, "loss": 0.0179, "step": 400100 }, { "epoch": 4.274907847641434, "grad_norm": 0.010015659034252167, "learning_rate": 7.588740936426388e-07, "loss": 0.0056, "step": 400110 }, { "epoch": 4.275014690955714, "grad_norm": 0.07136678695678711, "learning_rate": 7.588597198060744e-07, "loss": 0.0235, "step": 400120 }, { "epoch": 4.275121534269993, "grad_norm": 2.006155014038086, "learning_rate": 7.588453456772379e-07, "loss": 0.0067, "step": 400130 }, { "epoch": 4.2752283775842725, "grad_norm": 6.903463363647461, "learning_rate": 7.588309712561454e-07, "loss": 0.0019, "step": 400140 }, { "epoch": 4.275335220898552, "grad_norm": 0.005278921686112881, "learning_rate": 7.588165965428131e-07, "loss": 0.0122, "step": 400150 }, { "epoch": 4.275442064212832, "grad_norm": 0.0014001294039189816, "learning_rate": 7.588022215372576e-07, "loss": 0.0013, "step": 400160 }, { "epoch": 4.275548907527112, "grad_norm": 0.09025922417640686, "learning_rate": 7.587878462394945e-07, "loss": 0.0193, "step": 400170 }, { "epoch": 4.275655750841391, "grad_norm": 0.0009215397294610739, "learning_rate": 7.587734706495408e-07, "loss": 0.0085, "step": 400180 }, { "epoch": 4.275762594155671, "grad_norm": 6.833036422729492, "learning_rate": 7.58759094767412e-07, "loss": 0.0036, "step": 400190 }, { "epoch": 4.27586943746995, "grad_norm": 0.011747818440198898, "learning_rate": 7.587447185931248e-07, "loss": 0.0023, "step": 400200 }, { "epoch": 4.27597628078423, "grad_norm": 2.74147891998291, "learning_rate": 7.587303421266953e-07, "loss": 0.007, "step": 400210 }, { "epoch": 4.276083124098509, "grad_norm": 0.47189536690711975, "learning_rate": 7.587159653681396e-07, "loss": 0.0106, "step": 400220 }, { "epoch": 4.2761899674127894, "grad_norm": 0.006311314646154642, "learning_rate": 7.587015883174741e-07, "loss": 0.0093, "step": 400230 }, { "epoch": 4.276296810727069, "grad_norm": 0.001581008778885007, "learning_rate": 7.586872109747151e-07, "loss": 0.004, "step": 400240 }, { "epoch": 4.276403654041348, "grad_norm": 0.13195481896400452, "learning_rate": 7.586728333398785e-07, "loss": 0.0156, "step": 400250 }, { "epoch": 4.276510497355628, "grad_norm": 2.6354568004608154, "learning_rate": 7.586584554129809e-07, "loss": 0.0185, "step": 400260 }, { "epoch": 4.276617340669907, "grad_norm": 0.11056008189916611, "learning_rate": 7.586440771940383e-07, "loss": 0.0142, "step": 400270 }, { "epoch": 4.276724183984188, "grad_norm": 0.001435017678886652, "learning_rate": 7.586296986830671e-07, "loss": 0.0034, "step": 400280 }, { "epoch": 4.276831027298467, "grad_norm": 0.039648279547691345, "learning_rate": 7.586153198800836e-07, "loss": 0.0068, "step": 400290 }, { "epoch": 4.2769378706127466, "grad_norm": 1.0297112464904785, "learning_rate": 7.586009407851037e-07, "loss": 0.0017, "step": 400300 }, { "epoch": 4.277044713927026, "grad_norm": 0.014860183000564575, "learning_rate": 7.585865613981437e-07, "loss": 0.0346, "step": 400310 }, { "epoch": 4.2771515572413055, "grad_norm": 0.0031158505007624626, "learning_rate": 7.585721817192201e-07, "loss": 0.0031, "step": 400320 }, { "epoch": 4.277258400555585, "grad_norm": 0.019696541130542755, "learning_rate": 7.585578017483489e-07, "loss": 0.0131, "step": 400330 }, { "epoch": 4.277365243869864, "grad_norm": 3.1148128509521484, "learning_rate": 7.585434214855465e-07, "loss": 0.0241, "step": 400340 }, { "epoch": 4.277472087184145, "grad_norm": 2.6282060146331787, "learning_rate": 7.585290409308289e-07, "loss": 0.0109, "step": 400350 }, { "epoch": 4.277578930498424, "grad_norm": 0.02468845620751381, "learning_rate": 7.585146600842127e-07, "loss": 0.0011, "step": 400360 }, { "epoch": 4.277685773812704, "grad_norm": 0.0006664908723905683, "learning_rate": 7.58500278945714e-07, "loss": 0.0292, "step": 400370 }, { "epoch": 4.277792617126983, "grad_norm": 0.5612335801124573, "learning_rate": 7.584858975153488e-07, "loss": 0.001, "step": 400380 }, { "epoch": 4.277899460441263, "grad_norm": 2.517549991607666, "learning_rate": 7.584715157931336e-07, "loss": 0.0139, "step": 400390 }, { "epoch": 4.278006303755543, "grad_norm": 0.01197208184748888, "learning_rate": 7.584571337790846e-07, "loss": 0.0345, "step": 400400 }, { "epoch": 4.278113147069822, "grad_norm": 0.056172724813222885, "learning_rate": 7.584427514732179e-07, "loss": 0.0075, "step": 400410 }, { "epoch": 4.278219990384102, "grad_norm": 1.835586667060852, "learning_rate": 7.584283688755497e-07, "loss": 0.006, "step": 400420 }, { "epoch": 4.278326833698381, "grad_norm": 0.12802836298942566, "learning_rate": 7.584139859860966e-07, "loss": 0.0039, "step": 400430 }, { "epoch": 4.278433677012661, "grad_norm": 0.13381807506084442, "learning_rate": 7.583996028048746e-07, "loss": 0.0165, "step": 400440 }, { "epoch": 4.27854052032694, "grad_norm": 0.2173093855381012, "learning_rate": 7.583852193318998e-07, "loss": 0.0028, "step": 400450 }, { "epoch": 4.278647363641221, "grad_norm": 0.554990291595459, "learning_rate": 7.583708355671887e-07, "loss": 0.0063, "step": 400460 }, { "epoch": 4.2787542069555, "grad_norm": Infinity, "learning_rate": 7.583564515107575e-07, "loss": 0.0863, "step": 400470 }, { "epoch": 4.2788610502697795, "grad_norm": 0.08526413142681122, "learning_rate": 7.583420671626222e-07, "loss": 0.0009, "step": 400480 }, { "epoch": 4.278967893584059, "grad_norm": 4.6388678550720215, "learning_rate": 7.583276825227993e-07, "loss": 0.0192, "step": 400490 }, { "epoch": 4.279074736898338, "grad_norm": 0.15310567617416382, "learning_rate": 7.583132975913049e-07, "loss": 0.0043, "step": 400500 }, { "epoch": 4.279181580212618, "grad_norm": 0.00448183435946703, "learning_rate": 7.582989123681551e-07, "loss": 0.0111, "step": 400510 }, { "epoch": 4.279288423526898, "grad_norm": 3.954925775527954, "learning_rate": 7.582845268533666e-07, "loss": 0.0049, "step": 400520 }, { "epoch": 4.279395266841178, "grad_norm": 0.010479260236024857, "learning_rate": 7.582701410469554e-07, "loss": 0.0029, "step": 400530 }, { "epoch": 4.279502110155457, "grad_norm": 0.020541714504361153, "learning_rate": 7.582557549489376e-07, "loss": 0.0016, "step": 400540 }, { "epoch": 4.279608953469737, "grad_norm": 0.0011557013494893909, "learning_rate": 7.582413685593297e-07, "loss": 0.0001, "step": 400550 }, { "epoch": 4.279715796784016, "grad_norm": 0.04530235379934311, "learning_rate": 7.582269818781477e-07, "loss": 0.0327, "step": 400560 }, { "epoch": 4.2798226400982955, "grad_norm": 0.9967128038406372, "learning_rate": 7.582125949054078e-07, "loss": 0.0185, "step": 400570 }, { "epoch": 4.279929483412576, "grad_norm": 0.00474972790107131, "learning_rate": 7.581982076411266e-07, "loss": 0.0034, "step": 400580 }, { "epoch": 4.280036326726855, "grad_norm": 8.288825988769531, "learning_rate": 7.5818382008532e-07, "loss": 0.0065, "step": 400590 }, { "epoch": 4.280143170041135, "grad_norm": 0.0010763710597530007, "learning_rate": 7.581694322380046e-07, "loss": 0.069, "step": 400600 }, { "epoch": 4.280250013355414, "grad_norm": 1.6350336074829102, "learning_rate": 7.581550440991962e-07, "loss": 0.0068, "step": 400610 }, { "epoch": 4.280356856669694, "grad_norm": 0.008626619353890419, "learning_rate": 7.581406556689113e-07, "loss": 0.0054, "step": 400620 }, { "epoch": 4.280463699983973, "grad_norm": 0.421275794506073, "learning_rate": 7.581262669471662e-07, "loss": 0.0102, "step": 400630 }, { "epoch": 4.2805705432982535, "grad_norm": 0.00655877823010087, "learning_rate": 7.58111877933977e-07, "loss": 0.0276, "step": 400640 }, { "epoch": 4.280677386612533, "grad_norm": 0.0629609078168869, "learning_rate": 7.580974886293601e-07, "loss": 0.0176, "step": 400650 }, { "epoch": 4.280784229926812, "grad_norm": 0.04926988482475281, "learning_rate": 7.580830990333315e-07, "loss": 0.0034, "step": 400660 }, { "epoch": 4.280891073241092, "grad_norm": 0.5114955306053162, "learning_rate": 7.580687091459077e-07, "loss": 0.0049, "step": 400670 }, { "epoch": 4.280997916555371, "grad_norm": 2.532259464263916, "learning_rate": 7.580543189671047e-07, "loss": 0.0062, "step": 400680 }, { "epoch": 4.281104759869651, "grad_norm": 0.5008007884025574, "learning_rate": 7.580399284969392e-07, "loss": 0.0116, "step": 400690 }, { "epoch": 4.281211603183931, "grad_norm": 0.028457961976528168, "learning_rate": 7.58025537735427e-07, "loss": 0.0021, "step": 400700 }, { "epoch": 4.281318446498211, "grad_norm": 1.4272173643112183, "learning_rate": 7.580111466825844e-07, "loss": 0.0018, "step": 400710 }, { "epoch": 4.28142528981249, "grad_norm": 0.021765321493148804, "learning_rate": 7.579967553384279e-07, "loss": 0.0054, "step": 400720 }, { "epoch": 4.2815321331267695, "grad_norm": 0.7636004090309143, "learning_rate": 7.579823637029736e-07, "loss": 0.0012, "step": 400730 }, { "epoch": 4.281638976441049, "grad_norm": 3.5163216590881348, "learning_rate": 7.579679717762375e-07, "loss": 0.0095, "step": 400740 }, { "epoch": 4.281745819755328, "grad_norm": 0.003127950243651867, "learning_rate": 7.579535795582364e-07, "loss": 0.0111, "step": 400750 }, { "epoch": 4.281852663069609, "grad_norm": 0.0029336996376514435, "learning_rate": 7.57939187048986e-07, "loss": 0.0016, "step": 400760 }, { "epoch": 4.281959506383888, "grad_norm": 1.4915391206741333, "learning_rate": 7.579247942485029e-07, "loss": 0.0052, "step": 400770 }, { "epoch": 4.282066349698168, "grad_norm": 0.008741338737308979, "learning_rate": 7.579104011568032e-07, "loss": 0.0005, "step": 400780 }, { "epoch": 4.282173193012447, "grad_norm": 0.009919824078679085, "learning_rate": 7.578960077739032e-07, "loss": 0.0012, "step": 400790 }, { "epoch": 4.282280036326727, "grad_norm": 0.011471150442957878, "learning_rate": 7.578816140998192e-07, "loss": 0.0011, "step": 400800 }, { "epoch": 4.282386879641006, "grad_norm": 0.012953363358974457, "learning_rate": 7.578672201345672e-07, "loss": 0.0001, "step": 400810 }, { "epoch": 4.282493722955286, "grad_norm": 0.03168370947241783, "learning_rate": 7.578528258781638e-07, "loss": 0.0048, "step": 400820 }, { "epoch": 4.282600566269566, "grad_norm": 1.395664095878601, "learning_rate": 7.578384313306252e-07, "loss": 0.0072, "step": 400830 }, { "epoch": 4.282707409583845, "grad_norm": 0.13344626128673553, "learning_rate": 7.578240364919675e-07, "loss": 0.0062, "step": 400840 }, { "epoch": 4.282814252898125, "grad_norm": 0.0032902576494961977, "learning_rate": 7.578096413622069e-07, "loss": 0.0158, "step": 400850 }, { "epoch": 4.282921096212404, "grad_norm": 4.151134490966797, "learning_rate": 7.577952459413598e-07, "loss": 0.0115, "step": 400860 }, { "epoch": 4.283027939526684, "grad_norm": 0.38290947675704956, "learning_rate": 7.577808502294424e-07, "loss": 0.0017, "step": 400870 }, { "epoch": 4.283134782840964, "grad_norm": 2.6905157566070557, "learning_rate": 7.57766454226471e-07, "loss": 0.008, "step": 400880 }, { "epoch": 4.2832416261552435, "grad_norm": 2.65171217918396, "learning_rate": 7.577520579324617e-07, "loss": 0.0038, "step": 400890 }, { "epoch": 4.283348469469523, "grad_norm": 0.018389742821455002, "learning_rate": 7.577376613474311e-07, "loss": 0.0015, "step": 400900 }, { "epoch": 4.283455312783802, "grad_norm": 0.03461446985602379, "learning_rate": 7.577232644713951e-07, "loss": 0.0114, "step": 400910 }, { "epoch": 4.283562156098082, "grad_norm": 3.4523420333862305, "learning_rate": 7.577088673043701e-07, "loss": 0.0113, "step": 400920 }, { "epoch": 4.283668999412361, "grad_norm": 0.04347686469554901, "learning_rate": 7.576944698463723e-07, "loss": 0.0017, "step": 400930 }, { "epoch": 4.283775842726642, "grad_norm": 0.13866402208805084, "learning_rate": 7.576800720974179e-07, "loss": 0.0071, "step": 400940 }, { "epoch": 4.283882686040921, "grad_norm": 0.0009196464088745415, "learning_rate": 7.576656740575234e-07, "loss": 0.004, "step": 400950 }, { "epoch": 4.283989529355201, "grad_norm": 2.942304849624634, "learning_rate": 7.576512757267049e-07, "loss": 0.0077, "step": 400960 }, { "epoch": 4.28409637266948, "grad_norm": 0.1105140894651413, "learning_rate": 7.576368771049787e-07, "loss": 0.0029, "step": 400970 }, { "epoch": 4.2842032159837595, "grad_norm": 0.0012906603515148163, "learning_rate": 7.576224781923608e-07, "loss": 0.0079, "step": 400980 }, { "epoch": 4.28431005929804, "grad_norm": 0.0018462594598531723, "learning_rate": 7.576080789888679e-07, "loss": 0.0189, "step": 400990 }, { "epoch": 4.284416902612319, "grad_norm": 2.590660572052002, "learning_rate": 7.575936794945159e-07, "loss": 0.0065, "step": 401000 }, { "epoch": 4.284523745926599, "grad_norm": 0.001879918621852994, "learning_rate": 7.575792797093213e-07, "loss": 0.0094, "step": 401010 }, { "epoch": 4.284630589240878, "grad_norm": 0.06327354907989502, "learning_rate": 7.575648796333003e-07, "loss": 0.0029, "step": 401020 }, { "epoch": 4.284737432555158, "grad_norm": 0.021504726260900497, "learning_rate": 7.575504792664689e-07, "loss": 0.0121, "step": 401030 }, { "epoch": 4.284844275869437, "grad_norm": 0.012866104021668434, "learning_rate": 7.575360786088438e-07, "loss": 0.0018, "step": 401040 }, { "epoch": 4.284951119183717, "grad_norm": 0.023235175758600235, "learning_rate": 7.575216776604409e-07, "loss": 0.0025, "step": 401050 }, { "epoch": 4.285057962497997, "grad_norm": 0.719160795211792, "learning_rate": 7.575072764212767e-07, "loss": 0.0007, "step": 401060 }, { "epoch": 4.285164805812276, "grad_norm": 0.0022306255996227264, "learning_rate": 7.574928748913672e-07, "loss": 0.024, "step": 401070 }, { "epoch": 4.285271649126556, "grad_norm": 3.38258695602417, "learning_rate": 7.574784730707288e-07, "loss": 0.0048, "step": 401080 }, { "epoch": 4.285378492440835, "grad_norm": 0.0022828930523246527, "learning_rate": 7.57464070959378e-07, "loss": 0.0026, "step": 401090 }, { "epoch": 4.285485335755115, "grad_norm": 0.07955840975046158, "learning_rate": 7.574496685573307e-07, "loss": 0.0015, "step": 401100 }, { "epoch": 4.285592179069395, "grad_norm": 0.5609647035598755, "learning_rate": 7.574352658646032e-07, "loss": 0.0018, "step": 401110 }, { "epoch": 4.285699022383675, "grad_norm": 0.03153929486870766, "learning_rate": 7.57420862881212e-07, "loss": 0.0102, "step": 401120 }, { "epoch": 4.285805865697954, "grad_norm": 7.733419895172119, "learning_rate": 7.574064596071732e-07, "loss": 0.0086, "step": 401130 }, { "epoch": 4.2859127090122335, "grad_norm": 0.1592095047235489, "learning_rate": 7.573920560425029e-07, "loss": 0.0067, "step": 401140 }, { "epoch": 4.286019552326513, "grad_norm": 1.2537587881088257, "learning_rate": 7.573776521872178e-07, "loss": 0.0157, "step": 401150 }, { "epoch": 4.2861263956407925, "grad_norm": 0.21395263075828552, "learning_rate": 7.573632480413336e-07, "loss": 0.0019, "step": 401160 }, { "epoch": 4.286233238955073, "grad_norm": 0.08332173526287079, "learning_rate": 7.573488436048672e-07, "loss": 0.0005, "step": 401170 }, { "epoch": 4.286340082269352, "grad_norm": 0.0036492166109383106, "learning_rate": 7.573344388778345e-07, "loss": 0.0071, "step": 401180 }, { "epoch": 4.286446925583632, "grad_norm": 0.21215307712554932, "learning_rate": 7.573200338602515e-07, "loss": 0.0029, "step": 401190 }, { "epoch": 4.286553768897911, "grad_norm": 0.08821403235197067, "learning_rate": 7.57305628552135e-07, "loss": 0.0022, "step": 401200 }, { "epoch": 4.286660612212191, "grad_norm": 0.0031833613757044077, "learning_rate": 7.57291222953501e-07, "loss": 0.0012, "step": 401210 }, { "epoch": 4.28676745552647, "grad_norm": 0.024342967197299004, "learning_rate": 7.572768170643657e-07, "loss": 0.0014, "step": 401220 }, { "epoch": 4.28687429884075, "grad_norm": 0.001022359007038176, "learning_rate": 7.572624108847456e-07, "loss": 0.0272, "step": 401230 }, { "epoch": 4.28698114215503, "grad_norm": 0.0023775871377438307, "learning_rate": 7.572480044146567e-07, "loss": 0.0017, "step": 401240 }, { "epoch": 4.287087985469309, "grad_norm": 0.00979597307741642, "learning_rate": 7.572335976541153e-07, "loss": 0.0496, "step": 401250 }, { "epoch": 4.287194828783589, "grad_norm": 2.2666592597961426, "learning_rate": 7.57219190603138e-07, "loss": 0.005, "step": 401260 }, { "epoch": 4.287301672097868, "grad_norm": 0.006968784611672163, "learning_rate": 7.572047832617406e-07, "loss": 0.0131, "step": 401270 }, { "epoch": 4.287408515412148, "grad_norm": 0.03421091288328171, "learning_rate": 7.571903756299398e-07, "loss": 0.0001, "step": 401280 }, { "epoch": 4.287515358726428, "grad_norm": 0.031592439860105515, "learning_rate": 7.571759677077514e-07, "loss": 0.0135, "step": 401290 }, { "epoch": 4.2876222020407075, "grad_norm": 0.0062255859375, "learning_rate": 7.571615594951921e-07, "loss": 0.0029, "step": 401300 }, { "epoch": 4.287729045354987, "grad_norm": 0.07496913522481918, "learning_rate": 7.571471509922779e-07, "loss": 0.0142, "step": 401310 }, { "epoch": 4.2878358886692665, "grad_norm": 0.00048448247252963483, "learning_rate": 7.571327421990252e-07, "loss": 0.0069, "step": 401320 }, { "epoch": 4.287942731983546, "grad_norm": 0.6925084590911865, "learning_rate": 7.571183331154503e-07, "loss": 0.003, "step": 401330 }, { "epoch": 4.288049575297825, "grad_norm": 0.003831160254776478, "learning_rate": 7.571039237415693e-07, "loss": 0.0045, "step": 401340 }, { "epoch": 4.288156418612106, "grad_norm": 0.011868872679769993, "learning_rate": 7.570895140773986e-07, "loss": 0.0152, "step": 401350 }, { "epoch": 4.288263261926385, "grad_norm": 0.40820178389549255, "learning_rate": 7.570751041229544e-07, "loss": 0.0008, "step": 401360 }, { "epoch": 4.288370105240665, "grad_norm": 4.679388523101807, "learning_rate": 7.570606938782532e-07, "loss": 0.0157, "step": 401370 }, { "epoch": 4.288476948554944, "grad_norm": 3.3685500621795654, "learning_rate": 7.570462833433109e-07, "loss": 0.0322, "step": 401380 }, { "epoch": 4.288583791869224, "grad_norm": 0.10042159259319305, "learning_rate": 7.570318725181438e-07, "loss": 0.0004, "step": 401390 }, { "epoch": 4.288690635183503, "grad_norm": 0.244571253657341, "learning_rate": 7.570174614027685e-07, "loss": 0.0012, "step": 401400 }, { "epoch": 4.288797478497783, "grad_norm": 0.051864851266145706, "learning_rate": 7.570030499972012e-07, "loss": 0.0127, "step": 401410 }, { "epoch": 4.288904321812063, "grad_norm": 0.8681322336196899, "learning_rate": 7.569886383014578e-07, "loss": 0.0041, "step": 401420 }, { "epoch": 4.289011165126342, "grad_norm": 0.009356275200843811, "learning_rate": 7.569742263155549e-07, "loss": 0.0004, "step": 401430 }, { "epoch": 4.289118008440622, "grad_norm": 0.058378878980875015, "learning_rate": 7.569598140395089e-07, "loss": 0.0046, "step": 401440 }, { "epoch": 4.289224851754901, "grad_norm": 0.0010410929098725319, "learning_rate": 7.569454014733356e-07, "loss": 0.0184, "step": 401450 }, { "epoch": 4.289331695069181, "grad_norm": 0.15306690335273743, "learning_rate": 7.569309886170518e-07, "loss": 0.0062, "step": 401460 }, { "epoch": 4.289438538383461, "grad_norm": 0.0018214834854006767, "learning_rate": 7.569165754706734e-07, "loss": 0.0078, "step": 401470 }, { "epoch": 4.2895453816977405, "grad_norm": 0.006315433885902166, "learning_rate": 7.569021620342169e-07, "loss": 0.0051, "step": 401480 }, { "epoch": 4.28965222501202, "grad_norm": 0.002148653380572796, "learning_rate": 7.568877483076983e-07, "loss": 0.0075, "step": 401490 }, { "epoch": 4.289759068326299, "grad_norm": 0.005227171815931797, "learning_rate": 7.568733342911342e-07, "loss": 0.0054, "step": 401500 }, { "epoch": 4.289865911640579, "grad_norm": 0.5916914343833923, "learning_rate": 7.568589199845406e-07, "loss": 0.0114, "step": 401510 }, { "epoch": 4.289972754954858, "grad_norm": 2.0285122394561768, "learning_rate": 7.568445053879339e-07, "loss": 0.0119, "step": 401520 }, { "epoch": 4.290079598269139, "grad_norm": 0.8990175724029541, "learning_rate": 7.568300905013305e-07, "loss": 0.0013, "step": 401530 }, { "epoch": 4.290186441583418, "grad_norm": 1.7323330640792847, "learning_rate": 7.568156753247463e-07, "loss": 0.032, "step": 401540 }, { "epoch": 4.290293284897698, "grad_norm": 0.0017384184757247567, "learning_rate": 7.568012598581981e-07, "loss": 0.0135, "step": 401550 }, { "epoch": 4.290400128211977, "grad_norm": 2.2484142780303955, "learning_rate": 7.567868441017016e-07, "loss": 0.0026, "step": 401560 }, { "epoch": 4.2905069715262565, "grad_norm": 1.9547507762908936, "learning_rate": 7.567724280552736e-07, "loss": 0.0059, "step": 401570 }, { "epoch": 4.290613814840536, "grad_norm": 2.042262554168701, "learning_rate": 7.567580117189301e-07, "loss": 0.0009, "step": 401580 }, { "epoch": 4.290720658154816, "grad_norm": 0.0007590198656544089, "learning_rate": 7.567435950926874e-07, "loss": 0.0003, "step": 401590 }, { "epoch": 4.290827501469096, "grad_norm": 0.0032750237733125687, "learning_rate": 7.567291781765618e-07, "loss": 0.0045, "step": 401600 }, { "epoch": 4.290934344783375, "grad_norm": 0.0013266053283587098, "learning_rate": 7.567147609705695e-07, "loss": 0.0133, "step": 401610 }, { "epoch": 4.291041188097655, "grad_norm": 5.634469032287598, "learning_rate": 7.56700343474727e-07, "loss": 0.0141, "step": 401620 }, { "epoch": 4.291148031411934, "grad_norm": 0.06022840365767479, "learning_rate": 7.566859256890504e-07, "loss": 0.0026, "step": 401630 }, { "epoch": 4.291254874726214, "grad_norm": 0.13746117055416107, "learning_rate": 7.566715076135559e-07, "loss": 0.0033, "step": 401640 }, { "epoch": 4.291361718040494, "grad_norm": 0.059141576290130615, "learning_rate": 7.5665708924826e-07, "loss": 0.03, "step": 401650 }, { "epoch": 4.291468561354773, "grad_norm": 1.769497036933899, "learning_rate": 7.56642670593179e-07, "loss": 0.0149, "step": 401660 }, { "epoch": 4.291575404669053, "grad_norm": 8.397428512573242, "learning_rate": 7.566282516483288e-07, "loss": 0.0196, "step": 401670 }, { "epoch": 4.291682247983332, "grad_norm": 0.0008133031660690904, "learning_rate": 7.566138324137261e-07, "loss": 0.0041, "step": 401680 }, { "epoch": 4.291789091297612, "grad_norm": 3.606964349746704, "learning_rate": 7.565994128893871e-07, "loss": 0.0165, "step": 401690 }, { "epoch": 4.291895934611892, "grad_norm": 1.748495101928711, "learning_rate": 7.565849930753279e-07, "loss": 0.0093, "step": 401700 }, { "epoch": 4.292002777926172, "grad_norm": 0.055273864418268204, "learning_rate": 7.565705729715648e-07, "loss": 0.0075, "step": 401710 }, { "epoch": 4.292109621240451, "grad_norm": 3.7952449321746826, "learning_rate": 7.565561525781142e-07, "loss": 0.0095, "step": 401720 }, { "epoch": 4.2922164645547305, "grad_norm": 4.2853569984436035, "learning_rate": 7.565417318949923e-07, "loss": 0.0067, "step": 401730 }, { "epoch": 4.29232330786901, "grad_norm": 1.1117044687271118, "learning_rate": 7.565273109222154e-07, "loss": 0.0086, "step": 401740 }, { "epoch": 4.292430151183289, "grad_norm": 0.030005736276507378, "learning_rate": 7.565128896598001e-07, "loss": 0.0176, "step": 401750 }, { "epoch": 4.292536994497569, "grad_norm": 0.030375169590115547, "learning_rate": 7.564984681077621e-07, "loss": 0.0295, "step": 401760 }, { "epoch": 4.292643837811849, "grad_norm": 0.49610817432403564, "learning_rate": 7.564840462661181e-07, "loss": 0.0046, "step": 401770 }, { "epoch": 4.292750681126129, "grad_norm": 0.3138885200023651, "learning_rate": 7.564696241348843e-07, "loss": 0.0058, "step": 401780 }, { "epoch": 4.292857524440408, "grad_norm": 0.055596474558115005, "learning_rate": 7.564552017140766e-07, "loss": 0.0089, "step": 401790 }, { "epoch": 4.292964367754688, "grad_norm": 0.20631523430347443, "learning_rate": 7.56440779003712e-07, "loss": 0.0086, "step": 401800 }, { "epoch": 4.293071211068967, "grad_norm": 7.183109760284424, "learning_rate": 7.564263560038062e-07, "loss": 0.0024, "step": 401810 }, { "epoch": 4.293178054383247, "grad_norm": 0.016577983275055885, "learning_rate": 7.564119327143757e-07, "loss": 0.0036, "step": 401820 }, { "epoch": 4.293284897697527, "grad_norm": 0.0864168331027031, "learning_rate": 7.563975091354369e-07, "loss": 0.005, "step": 401830 }, { "epoch": 4.293391741011806, "grad_norm": 0.7951733469963074, "learning_rate": 7.563830852670059e-07, "loss": 0.0059, "step": 401840 }, { "epoch": 4.293498584326086, "grad_norm": 0.02642362378537655, "learning_rate": 7.563686611090989e-07, "loss": 0.0054, "step": 401850 }, { "epoch": 4.293605427640365, "grad_norm": 0.004644428379833698, "learning_rate": 7.563542366617326e-07, "loss": 0.0001, "step": 401860 }, { "epoch": 4.293712270954645, "grad_norm": 0.006954877637326717, "learning_rate": 7.563398119249229e-07, "loss": 0.0189, "step": 401870 }, { "epoch": 4.293819114268924, "grad_norm": 2.1022653579711914, "learning_rate": 7.563253868986861e-07, "loss": 0.0063, "step": 401880 }, { "epoch": 4.2939259575832045, "grad_norm": 0.01322533655911684, "learning_rate": 7.563109615830387e-07, "loss": 0.0028, "step": 401890 }, { "epoch": 4.294032800897484, "grad_norm": 5.288788795471191, "learning_rate": 7.562965359779969e-07, "loss": 0.0047, "step": 401900 }, { "epoch": 4.294139644211763, "grad_norm": 0.1238965094089508, "learning_rate": 7.56282110083577e-07, "loss": 0.0066, "step": 401910 }, { "epoch": 4.294246487526043, "grad_norm": 0.29348355531692505, "learning_rate": 7.56267683899795e-07, "loss": 0.02, "step": 401920 }, { "epoch": 4.294353330840322, "grad_norm": 0.07128559052944183, "learning_rate": 7.562532574266678e-07, "loss": 0.0032, "step": 401930 }, { "epoch": 4.294460174154603, "grad_norm": 1.0784590244293213, "learning_rate": 7.562388306642111e-07, "loss": 0.0022, "step": 401940 }, { "epoch": 4.294567017468882, "grad_norm": 0.009071986190974712, "learning_rate": 7.562244036124414e-07, "loss": 0.0047, "step": 401950 }, { "epoch": 4.294673860783162, "grad_norm": 0.6450315713882446, "learning_rate": 7.562099762713752e-07, "loss": 0.0049, "step": 401960 }, { "epoch": 4.294780704097441, "grad_norm": 0.028874285519123077, "learning_rate": 7.561955486410285e-07, "loss": 0.0069, "step": 401970 }, { "epoch": 4.2948875474117205, "grad_norm": 0.07995748519897461, "learning_rate": 7.561811207214177e-07, "loss": 0.0188, "step": 401980 }, { "epoch": 4.294994390726, "grad_norm": 0.6882820725440979, "learning_rate": 7.56166692512559e-07, "loss": 0.0012, "step": 401990 }, { "epoch": 4.29510123404028, "grad_norm": 0.8610068559646606, "learning_rate": 7.561522640144688e-07, "loss": 0.017, "step": 402000 }, { "epoch": 4.29520807735456, "grad_norm": 0.001877646311186254, "learning_rate": 7.561378352271634e-07, "loss": 0.0054, "step": 402010 }, { "epoch": 4.295314920668839, "grad_norm": 0.0073135532438755035, "learning_rate": 7.56123406150659e-07, "loss": 0.0579, "step": 402020 }, { "epoch": 4.295421763983119, "grad_norm": 0.00032466635457240045, "learning_rate": 7.561089767849721e-07, "loss": 0.0019, "step": 402030 }, { "epoch": 4.295528607297398, "grad_norm": 0.0005399680230766535, "learning_rate": 7.560945471301187e-07, "loss": 0.0028, "step": 402040 }, { "epoch": 4.295635450611678, "grad_norm": 3.4158382415771484, "learning_rate": 7.560801171861153e-07, "loss": 0.0103, "step": 402050 }, { "epoch": 4.295742293925958, "grad_norm": 0.3866570293903351, "learning_rate": 7.56065686952978e-07, "loss": 0.0215, "step": 402060 }, { "epoch": 4.295849137240237, "grad_norm": 0.0013947151601314545, "learning_rate": 7.560512564307233e-07, "loss": 0.0051, "step": 402070 }, { "epoch": 4.295955980554517, "grad_norm": 0.0234841238707304, "learning_rate": 7.560368256193674e-07, "loss": 0.0162, "step": 402080 }, { "epoch": 4.296062823868796, "grad_norm": 0.07669203728437424, "learning_rate": 7.560223945189267e-07, "loss": 0.0022, "step": 402090 }, { "epoch": 4.296169667183076, "grad_norm": 0.9433596134185791, "learning_rate": 7.560079631294172e-07, "loss": 0.0015, "step": 402100 }, { "epoch": 4.296276510497355, "grad_norm": 0.12931157648563385, "learning_rate": 7.559935314508554e-07, "loss": 0.0444, "step": 402110 }, { "epoch": 4.296383353811636, "grad_norm": 4.763599395751953, "learning_rate": 7.559790994832577e-07, "loss": 0.0055, "step": 402120 }, { "epoch": 4.296490197125915, "grad_norm": 9.954904556274414, "learning_rate": 7.559646672266404e-07, "loss": 0.0084, "step": 402130 }, { "epoch": 4.2965970404401945, "grad_norm": 5.217856407165527, "learning_rate": 7.559502346810194e-07, "loss": 0.0142, "step": 402140 }, { "epoch": 4.296703883754474, "grad_norm": 0.27998653054237366, "learning_rate": 7.559358018464114e-07, "loss": 0.0248, "step": 402150 }, { "epoch": 4.296810727068753, "grad_norm": 0.6957483291625977, "learning_rate": 7.559213687228326e-07, "loss": 0.0022, "step": 402160 }, { "epoch": 4.296917570383033, "grad_norm": 0.12101498246192932, "learning_rate": 7.559069353102992e-07, "loss": 0.0117, "step": 402170 }, { "epoch": 4.297024413697313, "grad_norm": 7.9193949699401855, "learning_rate": 7.558925016088275e-07, "loss": 0.0427, "step": 402180 }, { "epoch": 4.297131257011593, "grad_norm": 5.7904486656188965, "learning_rate": 7.558780676184339e-07, "loss": 0.0297, "step": 402190 }, { "epoch": 4.297238100325872, "grad_norm": 1.1626120805740356, "learning_rate": 7.558636333391346e-07, "loss": 0.0037, "step": 402200 }, { "epoch": 4.297344943640152, "grad_norm": 0.01268385536968708, "learning_rate": 7.558491987709461e-07, "loss": 0.0026, "step": 402210 }, { "epoch": 4.297451786954431, "grad_norm": 2.385101318359375, "learning_rate": 7.558347639138844e-07, "loss": 0.0059, "step": 402220 }, { "epoch": 4.2975586302687105, "grad_norm": 0.03098544105887413, "learning_rate": 7.558203287679661e-07, "loss": 0.0278, "step": 402230 }, { "epoch": 4.297665473582991, "grad_norm": 0.31797701120376587, "learning_rate": 7.558058933332071e-07, "loss": 0.0013, "step": 402240 }, { "epoch": 4.29777231689727, "grad_norm": 3.1805195808410645, "learning_rate": 7.557914576096242e-07, "loss": 0.0086, "step": 402250 }, { "epoch": 4.29787916021155, "grad_norm": 8.790792465209961, "learning_rate": 7.557770215972332e-07, "loss": 0.0043, "step": 402260 }, { "epoch": 4.297986003525829, "grad_norm": 0.03335234895348549, "learning_rate": 7.557625852960508e-07, "loss": 0.0007, "step": 402270 }, { "epoch": 4.298092846840109, "grad_norm": 0.307807594537735, "learning_rate": 7.55748148706093e-07, "loss": 0.027, "step": 402280 }, { "epoch": 4.298199690154388, "grad_norm": 0.0007617842638865113, "learning_rate": 7.557337118273763e-07, "loss": 0.0196, "step": 402290 }, { "epoch": 4.2983065334686685, "grad_norm": 3.1981396675109863, "learning_rate": 7.557192746599171e-07, "loss": 0.0008, "step": 402300 }, { "epoch": 4.298413376782948, "grad_norm": 0.015578756108880043, "learning_rate": 7.557048372037313e-07, "loss": 0.0084, "step": 402310 }, { "epoch": 4.2985202200972275, "grad_norm": 3.10921573638916, "learning_rate": 7.556903994588356e-07, "loss": 0.0088, "step": 402320 }, { "epoch": 4.298627063411507, "grad_norm": 0.15130844712257385, "learning_rate": 7.556759614252461e-07, "loss": 0.0031, "step": 402330 }, { "epoch": 4.298733906725786, "grad_norm": 9.023237228393555, "learning_rate": 7.556615231029791e-07, "loss": 0.0035, "step": 402340 }, { "epoch": 4.298840750040066, "grad_norm": 0.15464623272418976, "learning_rate": 7.556470844920511e-07, "loss": 0.0118, "step": 402350 }, { "epoch": 4.298947593354346, "grad_norm": 0.0003948527737520635, "learning_rate": 7.55632645592478e-07, "loss": 0.0031, "step": 402360 }, { "epoch": 4.299054436668626, "grad_norm": 0.9124096035957336, "learning_rate": 7.556182064042764e-07, "loss": 0.0006, "step": 402370 }, { "epoch": 4.299161279982905, "grad_norm": 0.011773341335356236, "learning_rate": 7.556037669274627e-07, "loss": 0.008, "step": 402380 }, { "epoch": 4.2992681232971846, "grad_norm": 0.0019923595245927572, "learning_rate": 7.555893271620529e-07, "loss": 0.0116, "step": 402390 }, { "epoch": 4.299374966611464, "grad_norm": 1.0474201440811157, "learning_rate": 7.555748871080636e-07, "loss": 0.0063, "step": 402400 }, { "epoch": 4.299481809925744, "grad_norm": 0.01346641406416893, "learning_rate": 7.55560446765511e-07, "loss": 0.0006, "step": 402410 }, { "epoch": 4.299588653240024, "grad_norm": 6.0196213722229, "learning_rate": 7.555460061344112e-07, "loss": 0.0087, "step": 402420 }, { "epoch": 4.299695496554303, "grad_norm": 1.3337608575820923, "learning_rate": 7.555315652147807e-07, "loss": 0.0086, "step": 402430 }, { "epoch": 4.299802339868583, "grad_norm": 0.04863704741001129, "learning_rate": 7.555171240066358e-07, "loss": 0.0065, "step": 402440 }, { "epoch": 4.299909183182862, "grad_norm": 0.12283431738615036, "learning_rate": 7.555026825099928e-07, "loss": 0.004, "step": 402450 }, { "epoch": 4.300016026497142, "grad_norm": 0.3023757040500641, "learning_rate": 7.55488240724868e-07, "loss": 0.0022, "step": 402460 }, { "epoch": 4.300122869811421, "grad_norm": 0.28106197714805603, "learning_rate": 7.554737986512779e-07, "loss": 0.0065, "step": 402470 }, { "epoch": 4.3002297131257015, "grad_norm": 0.0011096108937636018, "learning_rate": 7.554593562892382e-07, "loss": 0.0062, "step": 402480 }, { "epoch": 4.300336556439981, "grad_norm": 0.0021019470877945423, "learning_rate": 7.554449136387657e-07, "loss": 0.0004, "step": 402490 }, { "epoch": 4.30044339975426, "grad_norm": 0.25417250394821167, "learning_rate": 7.554304706998768e-07, "loss": 0.0704, "step": 402500 }, { "epoch": 4.30055024306854, "grad_norm": 3.0633351802825928, "learning_rate": 7.554160274725874e-07, "loss": 0.0043, "step": 402510 }, { "epoch": 4.300657086382819, "grad_norm": 0.013229943811893463, "learning_rate": 7.554015839569143e-07, "loss": 0.002, "step": 402520 }, { "epoch": 4.3007639296971, "grad_norm": 0.0034551143180578947, "learning_rate": 7.553871401528733e-07, "loss": 0.0054, "step": 402530 }, { "epoch": 4.300870773011379, "grad_norm": 0.02853735350072384, "learning_rate": 7.553726960604811e-07, "loss": 0.0054, "step": 402540 }, { "epoch": 4.300977616325659, "grad_norm": 0.0007862742058932781, "learning_rate": 7.553582516797536e-07, "loss": 0.0017, "step": 402550 }, { "epoch": 4.301084459639938, "grad_norm": 7.3797078132629395, "learning_rate": 7.553438070107076e-07, "loss": 0.0169, "step": 402560 }, { "epoch": 4.3011913029542175, "grad_norm": 0.001067720353603363, "learning_rate": 7.553293620533591e-07, "loss": 0.0063, "step": 402570 }, { "epoch": 4.301298146268497, "grad_norm": 0.002391402842476964, "learning_rate": 7.553149168077245e-07, "loss": 0.0002, "step": 402580 }, { "epoch": 4.301404989582776, "grad_norm": 0.005876101087778807, "learning_rate": 7.5530047127382e-07, "loss": 0.0427, "step": 402590 }, { "epoch": 4.301511832897057, "grad_norm": 0.00500808609649539, "learning_rate": 7.552860254516622e-07, "loss": 0.008, "step": 402600 }, { "epoch": 4.301618676211336, "grad_norm": 0.0007184497080743313, "learning_rate": 7.552715793412672e-07, "loss": 0.0017, "step": 402610 }, { "epoch": 4.301725519525616, "grad_norm": 0.009891791269183159, "learning_rate": 7.55257132942651e-07, "loss": 0.0025, "step": 402620 }, { "epoch": 4.301832362839895, "grad_norm": 0.004284918773919344, "learning_rate": 7.552426862558306e-07, "loss": 0.0037, "step": 402630 }, { "epoch": 4.301939206154175, "grad_norm": 0.008397330529987812, "learning_rate": 7.552282392808218e-07, "loss": 0.003, "step": 402640 }, { "epoch": 4.302046049468455, "grad_norm": 0.005730613134801388, "learning_rate": 7.552137920176409e-07, "loss": 0.0004, "step": 402650 }, { "epoch": 4.302152892782734, "grad_norm": 7.056863784790039, "learning_rate": 7.551993444663045e-07, "loss": 0.0122, "step": 402660 }, { "epoch": 4.302259736097014, "grad_norm": 0.11784488707780838, "learning_rate": 7.551848966268288e-07, "loss": 0.0037, "step": 402670 }, { "epoch": 4.302366579411293, "grad_norm": 1.35044264793396, "learning_rate": 7.5517044849923e-07, "loss": 0.0169, "step": 402680 }, { "epoch": 4.302473422725573, "grad_norm": 0.00206974009051919, "learning_rate": 7.551560000835248e-07, "loss": 0.0033, "step": 402690 }, { "epoch": 4.302580266039852, "grad_norm": 0.007072224747389555, "learning_rate": 7.55141551379729e-07, "loss": 0.0053, "step": 402700 }, { "epoch": 4.302687109354133, "grad_norm": 6.246246814727783, "learning_rate": 7.551271023878591e-07, "loss": 0.0126, "step": 402710 }, { "epoch": 4.302793952668412, "grad_norm": 0.0013772522797808051, "learning_rate": 7.551126531079315e-07, "loss": 0.0176, "step": 402720 }, { "epoch": 4.3029007959826915, "grad_norm": 0.22560608386993408, "learning_rate": 7.550982035399624e-07, "loss": 0.0009, "step": 402730 }, { "epoch": 4.303007639296971, "grad_norm": 0.10687976330518723, "learning_rate": 7.550837536839682e-07, "loss": 0.0071, "step": 402740 }, { "epoch": 4.30311448261125, "grad_norm": 0.0018126042559742928, "learning_rate": 7.550693035399652e-07, "loss": 0.0093, "step": 402750 }, { "epoch": 4.30322132592553, "grad_norm": 0.02152722142636776, "learning_rate": 7.550548531079697e-07, "loss": 0.0065, "step": 402760 }, { "epoch": 4.30332816923981, "grad_norm": 0.08131512999534607, "learning_rate": 7.55040402387998e-07, "loss": 0.0138, "step": 402770 }, { "epoch": 4.30343501255409, "grad_norm": 0.020362621173262596, "learning_rate": 7.550259513800665e-07, "loss": 0.0013, "step": 402780 }, { "epoch": 4.303541855868369, "grad_norm": 1.5263149738311768, "learning_rate": 7.550115000841914e-07, "loss": 0.0053, "step": 402790 }, { "epoch": 4.303648699182649, "grad_norm": 0.010735830292105675, "learning_rate": 7.549970485003891e-07, "loss": 0.0002, "step": 402800 }, { "epoch": 4.303755542496928, "grad_norm": 0.7449170351028442, "learning_rate": 7.549825966286761e-07, "loss": 0.0404, "step": 402810 }, { "epoch": 4.3038623858112075, "grad_norm": 1.3292927742004395, "learning_rate": 7.549681444690681e-07, "loss": 0.0157, "step": 402820 }, { "epoch": 4.303969229125488, "grad_norm": 5.103297233581543, "learning_rate": 7.54953692021582e-07, "loss": 0.005, "step": 402830 }, { "epoch": 4.304076072439767, "grad_norm": 0.008811702951788902, "learning_rate": 7.54939239286234e-07, "loss": 0.0052, "step": 402840 }, { "epoch": 4.304182915754047, "grad_norm": 0.011582406237721443, "learning_rate": 7.549247862630402e-07, "loss": 0.0004, "step": 402850 }, { "epoch": 4.304289759068326, "grad_norm": 0.00632636109367013, "learning_rate": 7.549103329520172e-07, "loss": 0.001, "step": 402860 }, { "epoch": 4.304396602382606, "grad_norm": 0.004092279821634293, "learning_rate": 7.548958793531812e-07, "loss": 0.0069, "step": 402870 }, { "epoch": 4.304503445696885, "grad_norm": 3.873046398162842, "learning_rate": 7.548814254665485e-07, "loss": 0.0024, "step": 402880 }, { "epoch": 4.3046102890111655, "grad_norm": 0.07934555411338806, "learning_rate": 7.548669712921354e-07, "loss": 0.0035, "step": 402890 }, { "epoch": 4.304717132325445, "grad_norm": 3.2382047176361084, "learning_rate": 7.548525168299582e-07, "loss": 0.0178, "step": 402900 }, { "epoch": 4.304823975639724, "grad_norm": 0.018311694264411926, "learning_rate": 7.548380620800335e-07, "loss": 0.0, "step": 402910 }, { "epoch": 4.304930818954004, "grad_norm": 0.015507363714277744, "learning_rate": 7.548236070423771e-07, "loss": 0.0057, "step": 402920 }, { "epoch": 4.305037662268283, "grad_norm": 0.1914457082748413, "learning_rate": 7.548091517170058e-07, "loss": 0.0052, "step": 402930 }, { "epoch": 4.305144505582563, "grad_norm": 0.0008271834230981767, "learning_rate": 7.547946961039358e-07, "loss": 0.0013, "step": 402940 }, { "epoch": 4.305251348896843, "grad_norm": 3.503706455230713, "learning_rate": 7.54780240203183e-07, "loss": 0.0089, "step": 402950 }, { "epoch": 4.305358192211123, "grad_norm": 0.0034282139968127012, "learning_rate": 7.547657840147645e-07, "loss": 0.0064, "step": 402960 }, { "epoch": 4.305465035525402, "grad_norm": 0.004075599834322929, "learning_rate": 7.547513275386961e-07, "loss": 0.0078, "step": 402970 }, { "epoch": 4.3055718788396815, "grad_norm": 0.14809736609458923, "learning_rate": 7.547368707749941e-07, "loss": 0.0085, "step": 402980 }, { "epoch": 4.305678722153961, "grad_norm": 0.00049589789705351, "learning_rate": 7.547224137236749e-07, "loss": 0.003, "step": 402990 }, { "epoch": 4.30578556546824, "grad_norm": 0.01378312986344099, "learning_rate": 7.547079563847552e-07, "loss": 0.0169, "step": 403000 }, { "epoch": 4.305892408782521, "grad_norm": 0.0206189826130867, "learning_rate": 7.546934987582507e-07, "loss": 0.0152, "step": 403010 }, { "epoch": 4.3059992520968, "grad_norm": 0.011611081659793854, "learning_rate": 7.54679040844178e-07, "loss": 0.0037, "step": 403020 }, { "epoch": 4.30610609541108, "grad_norm": 0.36371153593063354, "learning_rate": 7.546645826425535e-07, "loss": 0.0209, "step": 403030 }, { "epoch": 4.306212938725359, "grad_norm": 0.7164046168327332, "learning_rate": 7.546501241533936e-07, "loss": 0.0024, "step": 403040 }, { "epoch": 4.306319782039639, "grad_norm": 5.425078392028809, "learning_rate": 7.546356653767143e-07, "loss": 0.0157, "step": 403050 }, { "epoch": 4.306426625353918, "grad_norm": 0.005461183842271566, "learning_rate": 7.546212063125324e-07, "loss": 0.0074, "step": 403060 }, { "epoch": 4.306533468668198, "grad_norm": 1.252152681350708, "learning_rate": 7.546067469608636e-07, "loss": 0.003, "step": 403070 }, { "epoch": 4.306640311982478, "grad_norm": 0.0050572482869029045, "learning_rate": 7.545922873217247e-07, "loss": 0.0262, "step": 403080 }, { "epoch": 4.306747155296757, "grad_norm": 2.905648708343506, "learning_rate": 7.54577827395132e-07, "loss": 0.0196, "step": 403090 }, { "epoch": 4.306853998611037, "grad_norm": 0.01019179169088602, "learning_rate": 7.545633671811016e-07, "loss": 0.0002, "step": 403100 }, { "epoch": 4.306960841925316, "grad_norm": 0.295911967754364, "learning_rate": 7.545489066796499e-07, "loss": 0.0128, "step": 403110 }, { "epoch": 4.307067685239597, "grad_norm": 0.004014180041849613, "learning_rate": 7.545344458907934e-07, "loss": 0.0041, "step": 403120 }, { "epoch": 4.307174528553876, "grad_norm": 10.328322410583496, "learning_rate": 7.545199848145482e-07, "loss": 0.0211, "step": 403130 }, { "epoch": 4.3072813718681555, "grad_norm": 1.3187544345855713, "learning_rate": 7.545055234509308e-07, "loss": 0.057, "step": 403140 }, { "epoch": 4.307388215182435, "grad_norm": 0.020877670496702194, "learning_rate": 7.544910617999574e-07, "loss": 0.0018, "step": 403150 }, { "epoch": 4.307495058496714, "grad_norm": 0.010094563476741314, "learning_rate": 7.544765998616443e-07, "loss": 0.0072, "step": 403160 }, { "epoch": 4.307601901810994, "grad_norm": 5.922644138336182, "learning_rate": 7.54462137636008e-07, "loss": 0.0482, "step": 403170 }, { "epoch": 4.307708745125273, "grad_norm": 0.01633208803832531, "learning_rate": 7.544476751230649e-07, "loss": 0.003, "step": 403180 }, { "epoch": 4.307815588439554, "grad_norm": 1.4779425859451294, "learning_rate": 7.54433212322831e-07, "loss": 0.0142, "step": 403190 }, { "epoch": 4.307922431753833, "grad_norm": 8.032830238342285, "learning_rate": 7.544187492353227e-07, "loss": 0.0089, "step": 403200 }, { "epoch": 4.308029275068113, "grad_norm": 0.0040298886597156525, "learning_rate": 7.544042858605566e-07, "loss": 0.0009, "step": 403210 }, { "epoch": 4.308136118382392, "grad_norm": 3.434262752532959, "learning_rate": 7.543898221985487e-07, "loss": 0.0167, "step": 403220 }, { "epoch": 4.3082429616966715, "grad_norm": 0.07153503596782684, "learning_rate": 7.543753582493158e-07, "loss": 0.0087, "step": 403230 }, { "epoch": 4.308349805010952, "grad_norm": 2.029387950897217, "learning_rate": 7.543608940128737e-07, "loss": 0.0226, "step": 403240 }, { "epoch": 4.308456648325231, "grad_norm": 0.17544466257095337, "learning_rate": 7.54346429489239e-07, "loss": 0.0109, "step": 403250 }, { "epoch": 4.308563491639511, "grad_norm": 0.0018881584983319044, "learning_rate": 7.543319646784279e-07, "loss": 0.0084, "step": 403260 }, { "epoch": 4.30867033495379, "grad_norm": 0.006468127015978098, "learning_rate": 7.543174995804569e-07, "loss": 0.011, "step": 403270 }, { "epoch": 4.30877717826807, "grad_norm": 0.3167194426059723, "learning_rate": 7.543030341953422e-07, "loss": 0.0321, "step": 403280 }, { "epoch": 4.308884021582349, "grad_norm": 0.01971667818725109, "learning_rate": 7.542885685231003e-07, "loss": 0.0008, "step": 403290 }, { "epoch": 4.308990864896629, "grad_norm": 0.4495832324028015, "learning_rate": 7.542741025637473e-07, "loss": 0.0202, "step": 403300 }, { "epoch": 4.309097708210909, "grad_norm": 0.08095184713602066, "learning_rate": 7.542596363172997e-07, "loss": 0.0055, "step": 403310 }, { "epoch": 4.309204551525188, "grad_norm": 0.01776178367435932, "learning_rate": 7.542451697837737e-07, "loss": 0.0114, "step": 403320 }, { "epoch": 4.309311394839468, "grad_norm": 0.39986321330070496, "learning_rate": 7.542307029631858e-07, "loss": 0.0282, "step": 403330 }, { "epoch": 4.309418238153747, "grad_norm": 0.7228144407272339, "learning_rate": 7.542162358555522e-07, "loss": 0.0087, "step": 403340 }, { "epoch": 4.309525081468027, "grad_norm": 0.19073137640953064, "learning_rate": 7.542017684608894e-07, "loss": 0.007, "step": 403350 }, { "epoch": 4.309631924782307, "grad_norm": 3.201979160308838, "learning_rate": 7.541873007792135e-07, "loss": 0.0217, "step": 403360 }, { "epoch": 4.309738768096587, "grad_norm": 0.0013076398754492402, "learning_rate": 7.54172832810541e-07, "loss": 0.017, "step": 403370 }, { "epoch": 4.309845611410866, "grad_norm": 0.3469477593898773, "learning_rate": 7.541583645548882e-07, "loss": 0.0087, "step": 403380 }, { "epoch": 4.3099524547251455, "grad_norm": 1.3655502796173096, "learning_rate": 7.541438960122713e-07, "loss": 0.0027, "step": 403390 }, { "epoch": 4.310059298039425, "grad_norm": 0.006735296919941902, "learning_rate": 7.541294271827069e-07, "loss": 0.005, "step": 403400 }, { "epoch": 4.3101661413537045, "grad_norm": 0.0019772646483033895, "learning_rate": 7.541149580662111e-07, "loss": 0.0009, "step": 403410 }, { "epoch": 4.310272984667985, "grad_norm": 0.0012011821381747723, "learning_rate": 7.541004886628004e-07, "loss": 0.0305, "step": 403420 }, { "epoch": 4.310379827982264, "grad_norm": 0.0015470058424398303, "learning_rate": 7.540860189724911e-07, "loss": 0.0034, "step": 403430 }, { "epoch": 4.310486671296544, "grad_norm": 1.9297775030136108, "learning_rate": 7.540715489952994e-07, "loss": 0.0119, "step": 403440 }, { "epoch": 4.310593514610823, "grad_norm": 0.0012380363186821342, "learning_rate": 7.540570787312418e-07, "loss": 0.0094, "step": 403450 }, { "epoch": 4.310700357925103, "grad_norm": 0.0038072494789958, "learning_rate": 7.540426081803346e-07, "loss": 0.0273, "step": 403460 }, { "epoch": 4.310807201239382, "grad_norm": 0.2751796245574951, "learning_rate": 7.540281373425941e-07, "loss": 0.0143, "step": 403470 }, { "epoch": 4.3109140445536624, "grad_norm": 6.184854984283447, "learning_rate": 7.540136662180368e-07, "loss": 0.0134, "step": 403480 }, { "epoch": 4.311020887867942, "grad_norm": 1.1378165483474731, "learning_rate": 7.539991948066786e-07, "loss": 0.0089, "step": 403490 }, { "epoch": 4.311127731182221, "grad_norm": 0.3399146795272827, "learning_rate": 7.539847231085363e-07, "loss": 0.0028, "step": 403500 }, { "epoch": 4.311234574496501, "grad_norm": 0.013624929822981358, "learning_rate": 7.539702511236261e-07, "loss": 0.0008, "step": 403510 }, { "epoch": 4.31134141781078, "grad_norm": 2.666938304901123, "learning_rate": 7.539557788519644e-07, "loss": 0.0032, "step": 403520 }, { "epoch": 4.31144826112506, "grad_norm": 0.009060705080628395, "learning_rate": 7.539413062935672e-07, "loss": 0.0204, "step": 403530 }, { "epoch": 4.31155510443934, "grad_norm": 3.586332082748413, "learning_rate": 7.539268334484514e-07, "loss": 0.0053, "step": 403540 }, { "epoch": 4.3116619477536196, "grad_norm": 0.0029339289758354425, "learning_rate": 7.539123603166328e-07, "loss": 0.0025, "step": 403550 }, { "epoch": 4.311768791067899, "grad_norm": 0.10728287696838379, "learning_rate": 7.53897886898128e-07, "loss": 0.0605, "step": 403560 }, { "epoch": 4.3118756343821785, "grad_norm": 11.147214889526367, "learning_rate": 7.538834131929535e-07, "loss": 0.0225, "step": 403570 }, { "epoch": 4.311982477696458, "grad_norm": 1.1597074270248413, "learning_rate": 7.538689392011255e-07, "loss": 0.0004, "step": 403580 }, { "epoch": 4.312089321010737, "grad_norm": 0.09793544560670853, "learning_rate": 7.538544649226602e-07, "loss": 0.0039, "step": 403590 }, { "epoch": 4.312196164325018, "grad_norm": 0.0073848385363817215, "learning_rate": 7.53839990357574e-07, "loss": 0.0011, "step": 403600 }, { "epoch": 4.312303007639297, "grad_norm": 0.9159025549888611, "learning_rate": 7.538255155058832e-07, "loss": 0.0019, "step": 403610 }, { "epoch": 4.312409850953577, "grad_norm": 1.1423081159591675, "learning_rate": 7.538110403676044e-07, "loss": 0.0328, "step": 403620 }, { "epoch": 4.312516694267856, "grad_norm": 9.920880317687988, "learning_rate": 7.537965649427538e-07, "loss": 0.0089, "step": 403630 }, { "epoch": 4.312623537582136, "grad_norm": 1.5271570682525635, "learning_rate": 7.537820892313477e-07, "loss": 0.0032, "step": 403640 }, { "epoch": 4.312730380896415, "grad_norm": 0.006569542922079563, "learning_rate": 7.537676132334023e-07, "loss": 0.0006, "step": 403650 }, { "epoch": 4.312837224210695, "grad_norm": 0.0008184157777577639, "learning_rate": 7.537531369489343e-07, "loss": 0.0024, "step": 403660 }, { "epoch": 4.312944067524975, "grad_norm": 0.015458481386303902, "learning_rate": 7.537386603779599e-07, "loss": 0.0076, "step": 403670 }, { "epoch": 4.313050910839254, "grad_norm": 17.266035079956055, "learning_rate": 7.537241835204953e-07, "loss": 0.0201, "step": 403680 }, { "epoch": 4.313157754153534, "grad_norm": 5.735929012298584, "learning_rate": 7.53709706376557e-07, "loss": 0.0278, "step": 403690 }, { "epoch": 4.313264597467813, "grad_norm": 0.004758293274790049, "learning_rate": 7.536952289461611e-07, "loss": 0.0017, "step": 403700 }, { "epoch": 4.313371440782093, "grad_norm": 0.006916741840541363, "learning_rate": 7.536807512293243e-07, "loss": 0.0165, "step": 403710 }, { "epoch": 4.313478284096373, "grad_norm": 31.216798782348633, "learning_rate": 7.536662732260627e-07, "loss": 0.0269, "step": 403720 }, { "epoch": 4.3135851274106525, "grad_norm": 0.03167817369103432, "learning_rate": 7.536517949363928e-07, "loss": 0.0086, "step": 403730 }, { "epoch": 4.313691970724932, "grad_norm": 6.378197193145752, "learning_rate": 7.536373163603309e-07, "loss": 0.0681, "step": 403740 }, { "epoch": 4.313798814039211, "grad_norm": 0.7680181264877319, "learning_rate": 7.536228374978935e-07, "loss": 0.0124, "step": 403750 }, { "epoch": 4.313905657353491, "grad_norm": 0.3802092373371124, "learning_rate": 7.536083583490964e-07, "loss": 0.0071, "step": 403760 }, { "epoch": 4.31401250066777, "grad_norm": 6.3144612312316895, "learning_rate": 7.535938789139565e-07, "loss": 0.0538, "step": 403770 }, { "epoch": 4.314119343982051, "grad_norm": 0.006516453344374895, "learning_rate": 7.535793991924901e-07, "loss": 0.0034, "step": 403780 }, { "epoch": 4.31422618729633, "grad_norm": 0.0013817684957757592, "learning_rate": 7.535649191847132e-07, "loss": 0.0019, "step": 403790 }, { "epoch": 4.31433303061061, "grad_norm": 0.26051777601242065, "learning_rate": 7.535504388906425e-07, "loss": 0.007, "step": 403800 }, { "epoch": 4.314439873924889, "grad_norm": 11.724384307861328, "learning_rate": 7.535359583102941e-07, "loss": 0.0146, "step": 403810 }, { "epoch": 4.3145467172391685, "grad_norm": 1.323075532913208, "learning_rate": 7.535214774436847e-07, "loss": 0.0009, "step": 403820 }, { "epoch": 4.314653560553448, "grad_norm": 0.027463672682642937, "learning_rate": 7.535069962908301e-07, "loss": 0.009, "step": 403830 }, { "epoch": 4.314760403867728, "grad_norm": 1.8257497549057007, "learning_rate": 7.534925148517472e-07, "loss": 0.002, "step": 403840 }, { "epoch": 4.314867247182008, "grad_norm": 2.2209558486938477, "learning_rate": 7.53478033126452e-07, "loss": 0.0132, "step": 403850 }, { "epoch": 4.314974090496287, "grad_norm": 0.5430846214294434, "learning_rate": 7.53463551114961e-07, "loss": 0.0015, "step": 403860 }, { "epoch": 4.315080933810567, "grad_norm": 0.4477757215499878, "learning_rate": 7.534490688172905e-07, "loss": 0.0079, "step": 403870 }, { "epoch": 4.315187777124846, "grad_norm": 0.05150282755494118, "learning_rate": 7.534345862334571e-07, "loss": 0.0002, "step": 403880 }, { "epoch": 4.315294620439126, "grad_norm": 0.8743668794631958, "learning_rate": 7.534201033634766e-07, "loss": 0.0042, "step": 403890 }, { "epoch": 4.315401463753406, "grad_norm": 2.1349735260009766, "learning_rate": 7.534056202073658e-07, "loss": 0.0153, "step": 403900 }, { "epoch": 4.315508307067685, "grad_norm": 0.008883630856871605, "learning_rate": 7.533911367651409e-07, "loss": 0.0117, "step": 403910 }, { "epoch": 4.315615150381965, "grad_norm": 0.11305395513772964, "learning_rate": 7.533766530368184e-07, "loss": 0.0004, "step": 403920 }, { "epoch": 4.315721993696244, "grad_norm": 1.4412086009979248, "learning_rate": 7.533621690224144e-07, "loss": 0.0134, "step": 403930 }, { "epoch": 4.315828837010524, "grad_norm": 0.004548176191747189, "learning_rate": 7.533476847219456e-07, "loss": 0.0429, "step": 403940 }, { "epoch": 4.315935680324804, "grad_norm": 0.2360103875398636, "learning_rate": 7.53333200135428e-07, "loss": 0.0147, "step": 403950 }, { "epoch": 4.316042523639084, "grad_norm": 0.0024338981602340937, "learning_rate": 7.53318715262878e-07, "loss": 0.0046, "step": 403960 }, { "epoch": 4.316149366953363, "grad_norm": 0.015931978821754456, "learning_rate": 7.533042301043123e-07, "loss": 0.0035, "step": 403970 }, { "epoch": 4.3162562102676425, "grad_norm": 0.12087896466255188, "learning_rate": 7.532897446597468e-07, "loss": 0.0047, "step": 403980 }, { "epoch": 4.316363053581922, "grad_norm": 0.022578591480851173, "learning_rate": 7.53275258929198e-07, "loss": 0.006, "step": 403990 }, { "epoch": 4.316469896896201, "grad_norm": 6.105622291564941, "learning_rate": 7.532607729126825e-07, "loss": 0.0122, "step": 404000 }, { "epoch": 4.316576740210481, "grad_norm": 0.6051416993141174, "learning_rate": 7.532462866102165e-07, "loss": 0.0373, "step": 404010 }, { "epoch": 4.316683583524761, "grad_norm": 0.1104922965168953, "learning_rate": 7.532318000218161e-07, "loss": 0.0045, "step": 404020 }, { "epoch": 4.316790426839041, "grad_norm": 4.9289398193359375, "learning_rate": 7.532173131474981e-07, "loss": 0.0118, "step": 404030 }, { "epoch": 4.31689727015332, "grad_norm": 0.0066535584628582, "learning_rate": 7.532028259872785e-07, "loss": 0.0094, "step": 404040 }, { "epoch": 4.3170041134676, "grad_norm": 0.019119741395115852, "learning_rate": 7.531883385411739e-07, "loss": 0.0124, "step": 404050 }, { "epoch": 4.317110956781879, "grad_norm": 3.8434133529663086, "learning_rate": 7.531738508092005e-07, "loss": 0.0064, "step": 404060 }, { "epoch": 4.317217800096159, "grad_norm": 0.32349005341529846, "learning_rate": 7.531593627913749e-07, "loss": 0.0111, "step": 404070 }, { "epoch": 4.317324643410439, "grad_norm": 0.01603076234459877, "learning_rate": 7.531448744877129e-07, "loss": 0.0018, "step": 404080 }, { "epoch": 4.317431486724718, "grad_norm": 0.06789665669202805, "learning_rate": 7.531303858982315e-07, "loss": 0.0095, "step": 404090 }, { "epoch": 4.317538330038998, "grad_norm": 0.5342628359794617, "learning_rate": 7.531158970229468e-07, "loss": 0.0027, "step": 404100 }, { "epoch": 4.317645173353277, "grad_norm": 0.9497733116149902, "learning_rate": 7.531014078618749e-07, "loss": 0.0061, "step": 404110 }, { "epoch": 4.317752016667557, "grad_norm": 0.0023210267536342144, "learning_rate": 7.530869184150326e-07, "loss": 0.0001, "step": 404120 }, { "epoch": 4.317858859981837, "grad_norm": 7.267088413238525, "learning_rate": 7.530724286824361e-07, "loss": 0.0031, "step": 404130 }, { "epoch": 4.3179657032961165, "grad_norm": 0.1410461664199829, "learning_rate": 7.530579386641016e-07, "loss": 0.0002, "step": 404140 }, { "epoch": 4.318072546610396, "grad_norm": 0.12769687175750732, "learning_rate": 7.530434483600457e-07, "loss": 0.0072, "step": 404150 }, { "epoch": 4.318179389924675, "grad_norm": 0.010016304440796375, "learning_rate": 7.530289577702846e-07, "loss": 0.0066, "step": 404160 }, { "epoch": 4.318286233238955, "grad_norm": 0.05269487202167511, "learning_rate": 7.530144668948347e-07, "loss": 0.0184, "step": 404170 }, { "epoch": 4.318393076553234, "grad_norm": 0.0024855337105691433, "learning_rate": 7.529999757337125e-07, "loss": 0.0005, "step": 404180 }, { "epoch": 4.318499919867515, "grad_norm": 0.0009654903551563621, "learning_rate": 7.529854842869341e-07, "loss": 0.002, "step": 404190 }, { "epoch": 4.318606763181794, "grad_norm": 0.005918699782341719, "learning_rate": 7.52970992554516e-07, "loss": 0.004, "step": 404200 }, { "epoch": 4.318713606496074, "grad_norm": 0.0024498477578163147, "learning_rate": 7.529565005364747e-07, "loss": 0.023, "step": 404210 }, { "epoch": 4.318820449810353, "grad_norm": 0.23197327554225922, "learning_rate": 7.529420082328261e-07, "loss": 0.0293, "step": 404220 }, { "epoch": 4.3189272931246325, "grad_norm": 0.10787497460842133, "learning_rate": 7.529275156435872e-07, "loss": 0.008, "step": 404230 }, { "epoch": 4.319034136438912, "grad_norm": 0.1716502457857132, "learning_rate": 7.529130227687739e-07, "loss": 0.004, "step": 404240 }, { "epoch": 4.319140979753192, "grad_norm": 0.029597438871860504, "learning_rate": 7.528985296084027e-07, "loss": 0.0157, "step": 404250 }, { "epoch": 4.319247823067472, "grad_norm": 0.003717816900461912, "learning_rate": 7.528840361624901e-07, "loss": 0.005, "step": 404260 }, { "epoch": 4.319354666381751, "grad_norm": 0.0023712271358817816, "learning_rate": 7.528695424310523e-07, "loss": 0.0257, "step": 404270 }, { "epoch": 4.319461509696031, "grad_norm": 0.019076021388173103, "learning_rate": 7.528550484141055e-07, "loss": 0.0007, "step": 404280 }, { "epoch": 4.31956835301031, "grad_norm": 2.2037527561187744, "learning_rate": 7.528405541116664e-07, "loss": 0.0033, "step": 404290 }, { "epoch": 4.31967519632459, "grad_norm": 0.16926726698875427, "learning_rate": 7.528260595237512e-07, "loss": 0.0664, "step": 404300 }, { "epoch": 4.31978203963887, "grad_norm": 0.6643611192703247, "learning_rate": 7.528115646503764e-07, "loss": 0.0109, "step": 404310 }, { "epoch": 4.319888882953149, "grad_norm": 0.008852419443428516, "learning_rate": 7.527970694915583e-07, "loss": 0.0146, "step": 404320 }, { "epoch": 4.319995726267429, "grad_norm": 11.815130233764648, "learning_rate": 7.52782574047313e-07, "loss": 0.0189, "step": 404330 }, { "epoch": 4.320102569581708, "grad_norm": 4.423058032989502, "learning_rate": 7.527680783176573e-07, "loss": 0.004, "step": 404340 }, { "epoch": 4.320209412895988, "grad_norm": 0.0016676447121426463, "learning_rate": 7.527535823026074e-07, "loss": 0.0039, "step": 404350 }, { "epoch": 4.320316256210267, "grad_norm": 1.903465747833252, "learning_rate": 7.527390860021794e-07, "loss": 0.0009, "step": 404360 }, { "epoch": 4.320423099524548, "grad_norm": 0.004087352659553289, "learning_rate": 7.5272458941639e-07, "loss": 0.002, "step": 404370 }, { "epoch": 4.320529942838827, "grad_norm": 0.20906348526477814, "learning_rate": 7.527100925452556e-07, "loss": 0.0021, "step": 404380 }, { "epoch": 4.3206367861531065, "grad_norm": 0.08963946998119354, "learning_rate": 7.526955953887923e-07, "loss": 0.0118, "step": 404390 }, { "epoch": 4.320743629467386, "grad_norm": 0.003892304142937064, "learning_rate": 7.526810979470167e-07, "loss": 0.0147, "step": 404400 }, { "epoch": 4.3208504727816655, "grad_norm": 14.130077362060547, "learning_rate": 7.526666002199449e-07, "loss": 0.0225, "step": 404410 }, { "epoch": 4.320957316095945, "grad_norm": 4.0429158210754395, "learning_rate": 7.526521022075935e-07, "loss": 0.0099, "step": 404420 }, { "epoch": 4.321064159410225, "grad_norm": 0.009832453913986683, "learning_rate": 7.526376039099789e-07, "loss": 0.0053, "step": 404430 }, { "epoch": 4.321171002724505, "grad_norm": 0.30133387446403503, "learning_rate": 7.526231053271173e-07, "loss": 0.0045, "step": 404440 }, { "epoch": 4.321277846038784, "grad_norm": 2.586033821105957, "learning_rate": 7.526086064590251e-07, "loss": 0.0021, "step": 404450 }, { "epoch": 4.321384689353064, "grad_norm": 0.4684886932373047, "learning_rate": 7.525941073057187e-07, "loss": 0.0052, "step": 404460 }, { "epoch": 4.321491532667343, "grad_norm": 0.0021872392389923334, "learning_rate": 7.525796078672146e-07, "loss": 0.0005, "step": 404470 }, { "epoch": 4.3215983759816226, "grad_norm": 0.1694452464580536, "learning_rate": 7.52565108143529e-07, "loss": 0.0356, "step": 404480 }, { "epoch": 4.321705219295903, "grad_norm": 0.0065575349144637585, "learning_rate": 7.525506081346784e-07, "loss": 0.0189, "step": 404490 }, { "epoch": 4.321812062610182, "grad_norm": 0.0015381580451503396, "learning_rate": 7.525361078406789e-07, "loss": 0.0008, "step": 404500 }, { "epoch": 4.321918905924462, "grad_norm": 0.9969807863235474, "learning_rate": 7.525216072615474e-07, "loss": 0.0057, "step": 404510 }, { "epoch": 4.322025749238741, "grad_norm": 0.001336467918008566, "learning_rate": 7.525071063972998e-07, "loss": 0.0044, "step": 404520 }, { "epoch": 4.322132592553021, "grad_norm": 0.14498044550418854, "learning_rate": 7.524926052479525e-07, "loss": 0.0104, "step": 404530 }, { "epoch": 4.3222394358673, "grad_norm": 0.476395845413208, "learning_rate": 7.524781038135222e-07, "loss": 0.013, "step": 404540 }, { "epoch": 4.3223462791815805, "grad_norm": 0.003640304319560528, "learning_rate": 7.524636020940249e-07, "loss": 0.0035, "step": 404550 }, { "epoch": 4.32245312249586, "grad_norm": 7.244779586791992, "learning_rate": 7.524491000894772e-07, "loss": 0.0152, "step": 404560 }, { "epoch": 4.3225599658101395, "grad_norm": 0.010064762085676193, "learning_rate": 7.524345977998954e-07, "loss": 0.0013, "step": 404570 }, { "epoch": 4.322666809124419, "grad_norm": 0.005886219907552004, "learning_rate": 7.52420095225296e-07, "loss": 0.0007, "step": 404580 }, { "epoch": 4.322773652438698, "grad_norm": 0.450449675321579, "learning_rate": 7.52405592365695e-07, "loss": 0.0023, "step": 404590 }, { "epoch": 4.322880495752978, "grad_norm": 0.009278468787670135, "learning_rate": 7.523910892211092e-07, "loss": 0.026, "step": 404600 }, { "epoch": 4.322987339067258, "grad_norm": 0.0022695946972817183, "learning_rate": 7.523765857915549e-07, "loss": 0.0003, "step": 404610 }, { "epoch": 4.323094182381538, "grad_norm": 0.021500883623957634, "learning_rate": 7.523620820770482e-07, "loss": 0.0006, "step": 404620 }, { "epoch": 4.323201025695817, "grad_norm": 0.0016251696506515145, "learning_rate": 7.523475780776059e-07, "loss": 0.0007, "step": 404630 }, { "epoch": 4.323307869010097, "grad_norm": 0.006042996421456337, "learning_rate": 7.523330737932439e-07, "loss": 0.0051, "step": 404640 }, { "epoch": 4.323414712324376, "grad_norm": 0.01821885257959366, "learning_rate": 7.523185692239788e-07, "loss": 0.0014, "step": 404650 }, { "epoch": 4.323521555638656, "grad_norm": 4.589451313018799, "learning_rate": 7.523040643698273e-07, "loss": 0.0099, "step": 404660 }, { "epoch": 4.323628398952936, "grad_norm": 5.212789058685303, "learning_rate": 7.522895592308052e-07, "loss": 0.0047, "step": 404670 }, { "epoch": 4.323735242267215, "grad_norm": 0.005735806655138731, "learning_rate": 7.522750538069291e-07, "loss": 0.0014, "step": 404680 }, { "epoch": 4.323842085581495, "grad_norm": 0.27073803544044495, "learning_rate": 7.522605480982156e-07, "loss": 0.0017, "step": 404690 }, { "epoch": 4.323948928895774, "grad_norm": 1.1517423391342163, "learning_rate": 7.522460421046808e-07, "loss": 0.0176, "step": 404700 }, { "epoch": 4.324055772210054, "grad_norm": 0.330506294965744, "learning_rate": 7.522315358263413e-07, "loss": 0.0001, "step": 404710 }, { "epoch": 4.324162615524333, "grad_norm": 0.0033835801295936108, "learning_rate": 7.522170292632133e-07, "loss": 0.0111, "step": 404720 }, { "epoch": 4.3242694588386135, "grad_norm": 10.162276268005371, "learning_rate": 7.522025224153132e-07, "loss": 0.0205, "step": 404730 }, { "epoch": 4.324376302152893, "grad_norm": 0.0009342563571408391, "learning_rate": 7.521880152826574e-07, "loss": 0.0093, "step": 404740 }, { "epoch": 4.324483145467172, "grad_norm": 5.272197246551514, "learning_rate": 7.521735078652624e-07, "loss": 0.0169, "step": 404750 }, { "epoch": 4.324589988781452, "grad_norm": 0.6841139793395996, "learning_rate": 7.521590001631444e-07, "loss": 0.0067, "step": 404760 }, { "epoch": 4.324696832095731, "grad_norm": 0.07550917565822601, "learning_rate": 7.521444921763199e-07, "loss": 0.0051, "step": 404770 }, { "epoch": 4.324803675410012, "grad_norm": 0.5704871416091919, "learning_rate": 7.521299839048052e-07, "loss": 0.0211, "step": 404780 }, { "epoch": 4.324910518724291, "grad_norm": 2.339433193206787, "learning_rate": 7.521154753486166e-07, "loss": 0.016, "step": 404790 }, { "epoch": 4.325017362038571, "grad_norm": 1.0043007135391235, "learning_rate": 7.521009665077708e-07, "loss": 0.0349, "step": 404800 }, { "epoch": 4.32512420535285, "grad_norm": 0.8573473691940308, "learning_rate": 7.520864573822839e-07, "loss": 0.0075, "step": 404810 }, { "epoch": 4.3252310486671295, "grad_norm": 0.016762806102633476, "learning_rate": 7.520719479721723e-07, "loss": 0.0065, "step": 404820 }, { "epoch": 4.325337891981409, "grad_norm": 25.749435424804688, "learning_rate": 7.520574382774526e-07, "loss": 0.0254, "step": 404830 }, { "epoch": 4.325444735295688, "grad_norm": 5.502304553985596, "learning_rate": 7.52042928298141e-07, "loss": 0.0138, "step": 404840 }, { "epoch": 4.325551578609969, "grad_norm": 3.5391530990600586, "learning_rate": 7.520284180342537e-07, "loss": 0.0089, "step": 404850 }, { "epoch": 4.325658421924248, "grad_norm": 0.3294685184955597, "learning_rate": 7.520139074858076e-07, "loss": 0.0006, "step": 404860 }, { "epoch": 4.325765265238528, "grad_norm": 0.017474332824349403, "learning_rate": 7.519993966528186e-07, "loss": 0.0011, "step": 404870 }, { "epoch": 4.325872108552807, "grad_norm": 0.03341849520802498, "learning_rate": 7.519848855353033e-07, "loss": 0.0217, "step": 404880 }, { "epoch": 4.325978951867087, "grad_norm": 13.400276184082031, "learning_rate": 7.519703741332782e-07, "loss": 0.0081, "step": 404890 }, { "epoch": 4.326085795181367, "grad_norm": 1.7734805345535278, "learning_rate": 7.519558624467592e-07, "loss": 0.0095, "step": 404900 }, { "epoch": 4.326192638495646, "grad_norm": 1.4129105806350708, "learning_rate": 7.519413504757633e-07, "loss": 0.0017, "step": 404910 }, { "epoch": 4.326299481809926, "grad_norm": 0.030726514756679535, "learning_rate": 7.519268382203064e-07, "loss": 0.0056, "step": 404920 }, { "epoch": 4.326406325124205, "grad_norm": 0.008262126706540585, "learning_rate": 7.519123256804052e-07, "loss": 0.0019, "step": 404930 }, { "epoch": 4.326513168438485, "grad_norm": 0.0037927881348878145, "learning_rate": 7.518978128560759e-07, "loss": 0.0084, "step": 404940 }, { "epoch": 4.326620011752764, "grad_norm": 11.663630485534668, "learning_rate": 7.51883299747335e-07, "loss": 0.0116, "step": 404950 }, { "epoch": 4.326726855067045, "grad_norm": 0.03264206647872925, "learning_rate": 7.518687863541988e-07, "loss": 0.0191, "step": 404960 }, { "epoch": 4.326833698381324, "grad_norm": 0.5757178664207458, "learning_rate": 7.518542726766839e-07, "loss": 0.0019, "step": 404970 }, { "epoch": 4.3269405416956035, "grad_norm": 0.0013873629504814744, "learning_rate": 7.518397587148064e-07, "loss": 0.0023, "step": 404980 }, { "epoch": 4.327047385009883, "grad_norm": 0.008814630098640919, "learning_rate": 7.518252444685827e-07, "loss": 0.0236, "step": 404990 }, { "epoch": 4.327154228324162, "grad_norm": 1.355318307876587, "learning_rate": 7.518107299380294e-07, "loss": 0.0173, "step": 405000 }, { "epoch": 4.327261071638442, "grad_norm": 0.006140874698758125, "learning_rate": 7.517962151231628e-07, "loss": 0.0054, "step": 405010 }, { "epoch": 4.327367914952722, "grad_norm": 2.287299633026123, "learning_rate": 7.517817000239992e-07, "loss": 0.0284, "step": 405020 }, { "epoch": 4.327474758267002, "grad_norm": 0.5110889673233032, "learning_rate": 7.517671846405551e-07, "loss": 0.0078, "step": 405030 }, { "epoch": 4.327581601581281, "grad_norm": 0.00826068315654993, "learning_rate": 7.51752668972847e-07, "loss": 0.0109, "step": 405040 }, { "epoch": 4.327688444895561, "grad_norm": 0.007396362721920013, "learning_rate": 7.517381530208909e-07, "loss": 0.0408, "step": 405050 }, { "epoch": 4.32779528820984, "grad_norm": 1.3724794387817383, "learning_rate": 7.517236367847035e-07, "loss": 0.0045, "step": 405060 }, { "epoch": 4.3279021315241195, "grad_norm": 0.003835476702079177, "learning_rate": 7.517091202643012e-07, "loss": 0.0164, "step": 405070 }, { "epoch": 4.3280089748384, "grad_norm": 0.29074910283088684, "learning_rate": 7.516946034597002e-07, "loss": 0.0057, "step": 405080 }, { "epoch": 4.328115818152679, "grad_norm": 0.1108153909444809, "learning_rate": 7.516800863709169e-07, "loss": 0.0019, "step": 405090 }, { "epoch": 4.328222661466959, "grad_norm": 0.05707564204931259, "learning_rate": 7.516655689979679e-07, "loss": 0.0012, "step": 405100 }, { "epoch": 4.328329504781238, "grad_norm": 0.16376487910747528, "learning_rate": 7.516510513408695e-07, "loss": 0.0013, "step": 405110 }, { "epoch": 4.328436348095518, "grad_norm": 0.009216015227138996, "learning_rate": 7.516365333996382e-07, "loss": 0.0013, "step": 405120 }, { "epoch": 4.328543191409797, "grad_norm": 1.869556188583374, "learning_rate": 7.5162201517429e-07, "loss": 0.0106, "step": 405130 }, { "epoch": 4.3286500347240775, "grad_norm": 4.871521949768066, "learning_rate": 7.516074966648417e-07, "loss": 0.0144, "step": 405140 }, { "epoch": 4.328756878038357, "grad_norm": 0.021608630195260048, "learning_rate": 7.515929778713095e-07, "loss": 0.0053, "step": 405150 }, { "epoch": 4.328863721352636, "grad_norm": 0.05275370553135872, "learning_rate": 7.515784587937098e-07, "loss": 0.0065, "step": 405160 }, { "epoch": 4.328970564666916, "grad_norm": 0.27220264077186584, "learning_rate": 7.515639394320592e-07, "loss": 0.0028, "step": 405170 }, { "epoch": 4.329077407981195, "grad_norm": 0.0011278712190687656, "learning_rate": 7.515494197863739e-07, "loss": 0.0053, "step": 405180 }, { "epoch": 4.329184251295475, "grad_norm": 0.024407483637332916, "learning_rate": 7.515348998566702e-07, "loss": 0.0054, "step": 405190 }, { "epoch": 4.329291094609755, "grad_norm": 0.0005233572446741164, "learning_rate": 7.515203796429645e-07, "loss": 0.0051, "step": 405200 }, { "epoch": 4.329397937924035, "grad_norm": 0.0012931817909702659, "learning_rate": 7.515058591452737e-07, "loss": 0.0086, "step": 405210 }, { "epoch": 4.329504781238314, "grad_norm": 9.317949295043945, "learning_rate": 7.514913383636134e-07, "loss": 0.0309, "step": 405220 }, { "epoch": 4.3296116245525935, "grad_norm": 0.06230935826897621, "learning_rate": 7.514768172980006e-07, "loss": 0.0065, "step": 405230 }, { "epoch": 4.329718467866873, "grad_norm": 22.74397850036621, "learning_rate": 7.514622959484515e-07, "loss": 0.0457, "step": 405240 }, { "epoch": 4.329825311181152, "grad_norm": 0.3593975305557251, "learning_rate": 7.514477743149824e-07, "loss": 0.0026, "step": 405250 }, { "epoch": 4.329932154495433, "grad_norm": 3.877774238586426, "learning_rate": 7.514332523976099e-07, "loss": 0.023, "step": 405260 }, { "epoch": 4.330038997809712, "grad_norm": 0.01823434606194496, "learning_rate": 7.514187301963502e-07, "loss": 0.0024, "step": 405270 }, { "epoch": 4.330145841123992, "grad_norm": 0.016591552644968033, "learning_rate": 7.514042077112197e-07, "loss": 0.0051, "step": 405280 }, { "epoch": 4.330252684438271, "grad_norm": 0.01295430213212967, "learning_rate": 7.513896849422351e-07, "loss": 0.006, "step": 405290 }, { "epoch": 4.330359527752551, "grad_norm": 10.960625648498535, "learning_rate": 7.513751618894125e-07, "loss": 0.0173, "step": 405300 }, { "epoch": 4.33046637106683, "grad_norm": 0.0017372863367199898, "learning_rate": 7.513606385527683e-07, "loss": 0.0001, "step": 405310 }, { "epoch": 4.33057321438111, "grad_norm": 0.14663496613502502, "learning_rate": 7.513461149323189e-07, "loss": 0.0191, "step": 405320 }, { "epoch": 4.33068005769539, "grad_norm": 11.790096282958984, "learning_rate": 7.513315910280809e-07, "loss": 0.0191, "step": 405330 }, { "epoch": 4.330786901009669, "grad_norm": 1.0530766248703003, "learning_rate": 7.513170668400704e-07, "loss": 0.0357, "step": 405340 }, { "epoch": 4.330893744323949, "grad_norm": 0.7470759749412537, "learning_rate": 7.513025423683041e-07, "loss": 0.026, "step": 405350 }, { "epoch": 4.331000587638228, "grad_norm": 25.895793914794922, "learning_rate": 7.512880176127982e-07, "loss": 0.0172, "step": 405360 }, { "epoch": 4.331107430952509, "grad_norm": 0.02039264887571335, "learning_rate": 7.512734925735693e-07, "loss": 0.0033, "step": 405370 }, { "epoch": 4.331214274266788, "grad_norm": 4.256869792938232, "learning_rate": 7.512589672506336e-07, "loss": 0.0093, "step": 405380 }, { "epoch": 4.3313211175810675, "grad_norm": 0.08334188163280487, "learning_rate": 7.512444416440075e-07, "loss": 0.0127, "step": 405390 }, { "epoch": 4.331427960895347, "grad_norm": 0.04343249648809433, "learning_rate": 7.512299157537074e-07, "loss": 0.0011, "step": 405400 }, { "epoch": 4.331534804209626, "grad_norm": 0.08138921856880188, "learning_rate": 7.512153895797498e-07, "loss": 0.0139, "step": 405410 }, { "epoch": 4.331641647523906, "grad_norm": 5.494460582733154, "learning_rate": 7.512008631221511e-07, "loss": 0.0085, "step": 405420 }, { "epoch": 4.331748490838185, "grad_norm": 0.022816689684987068, "learning_rate": 7.511863363809276e-07, "loss": 0.0005, "step": 405430 }, { "epoch": 4.331855334152466, "grad_norm": 0.13872943818569183, "learning_rate": 7.511718093560961e-07, "loss": 0.014, "step": 405440 }, { "epoch": 4.331962177466745, "grad_norm": 0.009678895585238934, "learning_rate": 7.511572820476723e-07, "loss": 0.0001, "step": 405450 }, { "epoch": 4.332069020781025, "grad_norm": 0.033186160027980804, "learning_rate": 7.511427544556731e-07, "loss": 0.0171, "step": 405460 }, { "epoch": 4.332175864095304, "grad_norm": 0.023518096655607224, "learning_rate": 7.511282265801147e-07, "loss": 0.0012, "step": 405470 }, { "epoch": 4.3322827074095835, "grad_norm": 0.3894273638725281, "learning_rate": 7.511136984210138e-07, "loss": 0.022, "step": 405480 }, { "epoch": 4.332389550723864, "grad_norm": 0.11208988726139069, "learning_rate": 7.510991699783864e-07, "loss": 0.0122, "step": 405490 }, { "epoch": 4.332496394038143, "grad_norm": 0.009841558523476124, "learning_rate": 7.51084641252249e-07, "loss": 0.0033, "step": 405500 }, { "epoch": 4.332603237352423, "grad_norm": 0.005905614700168371, "learning_rate": 7.510701122426183e-07, "loss": 0.0063, "step": 405510 }, { "epoch": 4.332710080666702, "grad_norm": 0.21525977551937103, "learning_rate": 7.510555829495104e-07, "loss": 0.0013, "step": 405520 }, { "epoch": 4.332816923980982, "grad_norm": 0.013364631682634354, "learning_rate": 7.510410533729418e-07, "loss": 0.0135, "step": 405530 }, { "epoch": 4.332923767295261, "grad_norm": 0.486465722322464, "learning_rate": 7.510265235129289e-07, "loss": 0.0086, "step": 405540 }, { "epoch": 4.333030610609541, "grad_norm": 0.8633469939231873, "learning_rate": 7.510119933694881e-07, "loss": 0.0014, "step": 405550 }, { "epoch": 4.333137453923821, "grad_norm": 0.2031283974647522, "learning_rate": 7.509974629426359e-07, "loss": 0.0017, "step": 405560 }, { "epoch": 4.3332442972381005, "grad_norm": 7.540382385253906, "learning_rate": 7.509829322323885e-07, "loss": 0.0015, "step": 405570 }, { "epoch": 4.33335114055238, "grad_norm": 0.018690912052989006, "learning_rate": 7.509684012387625e-07, "loss": 0.0108, "step": 405580 }, { "epoch": 4.333457983866659, "grad_norm": 1.0158307552337646, "learning_rate": 7.509538699617741e-07, "loss": 0.0127, "step": 405590 }, { "epoch": 4.333564827180939, "grad_norm": 0.0017526154406368732, "learning_rate": 7.509393384014401e-07, "loss": 0.0081, "step": 405600 }, { "epoch": 4.333671670495219, "grad_norm": 6.902090072631836, "learning_rate": 7.509248065577765e-07, "loss": 0.0176, "step": 405610 }, { "epoch": 4.333778513809499, "grad_norm": 2.9447505474090576, "learning_rate": 7.509102744307998e-07, "loss": 0.0345, "step": 405620 }, { "epoch": 4.333885357123778, "grad_norm": 1.9645864963531494, "learning_rate": 7.508957420205266e-07, "loss": 0.021, "step": 405630 }, { "epoch": 4.3339922004380576, "grad_norm": 0.4664212465286255, "learning_rate": 7.50881209326973e-07, "loss": 0.0028, "step": 405640 }, { "epoch": 4.334099043752337, "grad_norm": 2.0417699813842773, "learning_rate": 7.508666763501557e-07, "loss": 0.0073, "step": 405650 }, { "epoch": 4.3342058870666165, "grad_norm": 11.30931282043457, "learning_rate": 7.50852143090091e-07, "loss": 0.0167, "step": 405660 }, { "epoch": 4.334312730380897, "grad_norm": 1.445213794708252, "learning_rate": 7.508376095467952e-07, "loss": 0.0016, "step": 405670 }, { "epoch": 4.334419573695176, "grad_norm": 3.832723617553711, "learning_rate": 7.508230757202849e-07, "loss": 0.005, "step": 405680 }, { "epoch": 4.334526417009456, "grad_norm": 5.238753318786621, "learning_rate": 7.508085416105764e-07, "loss": 0.0104, "step": 405690 }, { "epoch": 4.334633260323735, "grad_norm": 3.7130095958709717, "learning_rate": 7.507940072176859e-07, "loss": 0.0334, "step": 405700 }, { "epoch": 4.334740103638015, "grad_norm": 0.021796924993395805, "learning_rate": 7.507794725416303e-07, "loss": 0.007, "step": 405710 }, { "epoch": 4.334846946952294, "grad_norm": 0.3359093964099884, "learning_rate": 7.507649375824256e-07, "loss": 0.0153, "step": 405720 }, { "epoch": 4.3349537902665745, "grad_norm": 0.087973952293396, "learning_rate": 7.507504023400885e-07, "loss": 0.0158, "step": 405730 }, { "epoch": 4.335060633580854, "grad_norm": 0.00402279756963253, "learning_rate": 7.507358668146353e-07, "loss": 0.0015, "step": 405740 }, { "epoch": 4.335167476895133, "grad_norm": 0.06036566570401192, "learning_rate": 7.507213310060823e-07, "loss": 0.0052, "step": 405750 }, { "epoch": 4.335274320209413, "grad_norm": 4.343861103057861, "learning_rate": 7.507067949144459e-07, "loss": 0.07, "step": 405760 }, { "epoch": 4.335381163523692, "grad_norm": 0.001928671495988965, "learning_rate": 7.506922585397428e-07, "loss": 0.0059, "step": 405770 }, { "epoch": 4.335488006837972, "grad_norm": 8.85737419128418, "learning_rate": 7.506777218819891e-07, "loss": 0.0263, "step": 405780 }, { "epoch": 4.335594850152252, "grad_norm": 0.007869720458984375, "learning_rate": 7.506631849412013e-07, "loss": 0.0052, "step": 405790 }, { "epoch": 4.335701693466532, "grad_norm": 0.0021339862141758204, "learning_rate": 7.50648647717396e-07, "loss": 0.0029, "step": 405800 }, { "epoch": 4.335808536780811, "grad_norm": 0.00850839912891388, "learning_rate": 7.506341102105894e-07, "loss": 0.0007, "step": 405810 }, { "epoch": 4.3359153800950905, "grad_norm": 7.9846510887146, "learning_rate": 7.506195724207978e-07, "loss": 0.018, "step": 405820 }, { "epoch": 4.33602222340937, "grad_norm": 0.9159558415412903, "learning_rate": 7.50605034348038e-07, "loss": 0.0038, "step": 405830 }, { "epoch": 4.336129066723649, "grad_norm": 0.08301535248756409, "learning_rate": 7.505904959923262e-07, "loss": 0.0204, "step": 405840 }, { "epoch": 4.33623591003793, "grad_norm": 0.03778564929962158, "learning_rate": 7.505759573536787e-07, "loss": 0.0143, "step": 405850 }, { "epoch": 4.336342753352209, "grad_norm": 0.24870462715625763, "learning_rate": 7.50561418432112e-07, "loss": 0.0044, "step": 405860 }, { "epoch": 4.336449596666489, "grad_norm": 0.0031264950521290302, "learning_rate": 7.505468792276426e-07, "loss": 0.0275, "step": 405870 }, { "epoch": 4.336556439980768, "grad_norm": 0.8024778962135315, "learning_rate": 7.505323397402869e-07, "loss": 0.0071, "step": 405880 }, { "epoch": 4.336663283295048, "grad_norm": 2.897164821624756, "learning_rate": 7.505177999700613e-07, "loss": 0.0041, "step": 405890 }, { "epoch": 4.336770126609327, "grad_norm": 1.4155734777450562, "learning_rate": 7.505032599169821e-07, "loss": 0.0033, "step": 405900 }, { "epoch": 4.336876969923607, "grad_norm": 9.073083877563477, "learning_rate": 7.50488719581066e-07, "loss": 0.024, "step": 405910 }, { "epoch": 4.336983813237887, "grad_norm": 0.020376302301883698, "learning_rate": 7.504741789623292e-07, "loss": 0.0002, "step": 405920 }, { "epoch": 4.337090656552166, "grad_norm": 0.003275153459981084, "learning_rate": 7.504596380607879e-07, "loss": 0.0127, "step": 405930 }, { "epoch": 4.337197499866446, "grad_norm": 6.50293493270874, "learning_rate": 7.504450968764589e-07, "loss": 0.0129, "step": 405940 }, { "epoch": 4.337304343180725, "grad_norm": 0.29012802243232727, "learning_rate": 7.504305554093587e-07, "loss": 0.0005, "step": 405950 }, { "epoch": 4.337411186495005, "grad_norm": 0.006029989570379257, "learning_rate": 7.504160136595031e-07, "loss": 0.0266, "step": 405960 }, { "epoch": 4.337518029809285, "grad_norm": 2.338886022567749, "learning_rate": 7.504014716269092e-07, "loss": 0.0049, "step": 405970 }, { "epoch": 4.3376248731235645, "grad_norm": 0.04314035922288895, "learning_rate": 7.503869293115931e-07, "loss": 0.0102, "step": 405980 }, { "epoch": 4.337731716437844, "grad_norm": 0.04701649770140648, "learning_rate": 7.503723867135712e-07, "loss": 0.0264, "step": 405990 }, { "epoch": 4.337838559752123, "grad_norm": 2.861689805984497, "learning_rate": 7.5035784383286e-07, "loss": 0.025, "step": 406000 }, { "epoch": 4.337945403066403, "grad_norm": 0.0011922562262043357, "learning_rate": 7.50343300669476e-07, "loss": 0.0392, "step": 406010 }, { "epoch": 4.338052246380682, "grad_norm": 2.959810495376587, "learning_rate": 7.503287572234354e-07, "loss": 0.0157, "step": 406020 }, { "epoch": 4.338159089694963, "grad_norm": 25.759952545166016, "learning_rate": 7.503142134947546e-07, "loss": 0.0351, "step": 406030 }, { "epoch": 4.338265933009242, "grad_norm": 0.0908045768737793, "learning_rate": 7.502996694834503e-07, "loss": 0.0045, "step": 406040 }, { "epoch": 4.338372776323522, "grad_norm": 0.045322235673666, "learning_rate": 7.502851251895388e-07, "loss": 0.0144, "step": 406050 }, { "epoch": 4.338479619637801, "grad_norm": 0.06672477722167969, "learning_rate": 7.502705806130365e-07, "loss": 0.0022, "step": 406060 }, { "epoch": 4.3385864629520805, "grad_norm": 0.0011649350635707378, "learning_rate": 7.502560357539598e-07, "loss": 0.0035, "step": 406070 }, { "epoch": 4.338693306266361, "grad_norm": 0.0037503100465983152, "learning_rate": 7.502414906123253e-07, "loss": 0.0166, "step": 406080 }, { "epoch": 4.33880014958064, "grad_norm": 10.342124938964844, "learning_rate": 7.502269451881491e-07, "loss": 0.0087, "step": 406090 }, { "epoch": 4.33890699289492, "grad_norm": 4.772073268890381, "learning_rate": 7.502123994814477e-07, "loss": 0.0092, "step": 406100 }, { "epoch": 4.339013836209199, "grad_norm": 0.0014114496298134327, "learning_rate": 7.501978534922379e-07, "loss": 0.0089, "step": 406110 }, { "epoch": 4.339120679523479, "grad_norm": 0.9108558893203735, "learning_rate": 7.501833072205358e-07, "loss": 0.0073, "step": 406120 }, { "epoch": 4.339227522837758, "grad_norm": 0.014690483920276165, "learning_rate": 7.501687606663578e-07, "loss": 0.0016, "step": 406130 }, { "epoch": 4.339334366152038, "grad_norm": 3.0050814151763916, "learning_rate": 7.501542138297203e-07, "loss": 0.0041, "step": 406140 }, { "epoch": 4.339441209466318, "grad_norm": 17.882383346557617, "learning_rate": 7.501396667106399e-07, "loss": 0.0051, "step": 406150 }, { "epoch": 4.339548052780597, "grad_norm": 0.5751211643218994, "learning_rate": 7.501251193091328e-07, "loss": 0.0178, "step": 406160 }, { "epoch": 4.339654896094877, "grad_norm": 10.412680625915527, "learning_rate": 7.501105716252157e-07, "loss": 0.0085, "step": 406170 }, { "epoch": 4.339761739409156, "grad_norm": 19.802509307861328, "learning_rate": 7.500960236589049e-07, "loss": 0.0122, "step": 406180 }, { "epoch": 4.339868582723436, "grad_norm": 2.6750388145446777, "learning_rate": 7.500814754102167e-07, "loss": 0.0064, "step": 406190 }, { "epoch": 4.339975426037716, "grad_norm": 0.1913035362958908, "learning_rate": 7.500669268791677e-07, "loss": 0.0033, "step": 406200 }, { "epoch": 4.340082269351996, "grad_norm": 0.0073507316410541534, "learning_rate": 7.500523780657744e-07, "loss": 0.0232, "step": 406210 }, { "epoch": 4.340189112666275, "grad_norm": 1.4077742099761963, "learning_rate": 7.500378289700529e-07, "loss": 0.0014, "step": 406220 }, { "epoch": 4.3402959559805545, "grad_norm": 0.0005785399698652327, "learning_rate": 7.5002327959202e-07, "loss": 0.0091, "step": 406230 }, { "epoch": 4.340402799294834, "grad_norm": 0.051357872784137726, "learning_rate": 7.500087299316917e-07, "loss": 0.0059, "step": 406240 }, { "epoch": 4.340509642609113, "grad_norm": 3.865635395050049, "learning_rate": 7.499941799890849e-07, "loss": 0.0063, "step": 406250 }, { "epoch": 4.340616485923393, "grad_norm": 0.028634395450353622, "learning_rate": 7.499796297642157e-07, "loss": 0.0039, "step": 406260 }, { "epoch": 4.340723329237673, "grad_norm": 0.0017256762366741896, "learning_rate": 7.499650792571006e-07, "loss": 0.0002, "step": 406270 }, { "epoch": 4.340830172551953, "grad_norm": 0.8613024353981018, "learning_rate": 7.499505284677561e-07, "loss": 0.0024, "step": 406280 }, { "epoch": 4.340937015866232, "grad_norm": 0.0810844749212265, "learning_rate": 7.499359773961987e-07, "loss": 0.0044, "step": 406290 }, { "epoch": 4.341043859180512, "grad_norm": 5.781585693359375, "learning_rate": 7.499214260424444e-07, "loss": 0.0089, "step": 406300 }, { "epoch": 4.341150702494791, "grad_norm": 0.0072357687167823315, "learning_rate": 7.499068744065102e-07, "loss": 0.0097, "step": 406310 }, { "epoch": 4.341257545809071, "grad_norm": 10.509618759155273, "learning_rate": 7.498923224884122e-07, "loss": 0.0146, "step": 406320 }, { "epoch": 4.341364389123351, "grad_norm": 1.0018484592437744, "learning_rate": 7.49877770288167e-07, "loss": 0.0278, "step": 406330 }, { "epoch": 4.34147123243763, "grad_norm": 0.021167894825339317, "learning_rate": 7.498632178057909e-07, "loss": 0.014, "step": 406340 }, { "epoch": 4.34157807575191, "grad_norm": 0.024313010275363922, "learning_rate": 7.498486650413003e-07, "loss": 0.0103, "step": 406350 }, { "epoch": 4.341684919066189, "grad_norm": 0.013474611565470695, "learning_rate": 7.498341119947118e-07, "loss": 0.0021, "step": 406360 }, { "epoch": 4.341791762380469, "grad_norm": 0.01020203810185194, "learning_rate": 7.498195586660416e-07, "loss": 0.0101, "step": 406370 }, { "epoch": 4.341898605694749, "grad_norm": 0.11861471086740494, "learning_rate": 7.498050050553064e-07, "loss": 0.0104, "step": 406380 }, { "epoch": 4.3420054490090285, "grad_norm": 2.1491007804870605, "learning_rate": 7.497904511625224e-07, "loss": 0.0584, "step": 406390 }, { "epoch": 4.342112292323308, "grad_norm": 2.391662836074829, "learning_rate": 7.497758969877063e-07, "loss": 0.0016, "step": 406400 }, { "epoch": 4.342219135637587, "grad_norm": 0.00536268949508667, "learning_rate": 7.497613425308742e-07, "loss": 0.008, "step": 406410 }, { "epoch": 4.342325978951867, "grad_norm": 0.0023564351722598076, "learning_rate": 7.497467877920427e-07, "loss": 0.0162, "step": 406420 }, { "epoch": 4.342432822266146, "grad_norm": 0.08474838733673096, "learning_rate": 7.497322327712283e-07, "loss": 0.0193, "step": 406430 }, { "epoch": 4.342539665580427, "grad_norm": 1.1733181476593018, "learning_rate": 7.497176774684473e-07, "loss": 0.0136, "step": 406440 }, { "epoch": 4.342646508894706, "grad_norm": 1.0971145629882812, "learning_rate": 7.497031218837162e-07, "loss": 0.0062, "step": 406450 }, { "epoch": 4.342753352208986, "grad_norm": 0.002625591354444623, "learning_rate": 7.496885660170514e-07, "loss": 0.0026, "step": 406460 }, { "epoch": 4.342860195523265, "grad_norm": 0.025681177154183388, "learning_rate": 7.496740098684694e-07, "loss": 0.0026, "step": 406470 }, { "epoch": 4.3429670388375445, "grad_norm": 0.01605704054236412, "learning_rate": 7.496594534379866e-07, "loss": 0.0068, "step": 406480 }, { "epoch": 4.343073882151824, "grad_norm": 0.26324450969696045, "learning_rate": 7.496448967256195e-07, "loss": 0.0029, "step": 406490 }, { "epoch": 4.343180725466104, "grad_norm": 0.046735428273677826, "learning_rate": 7.496303397313843e-07, "loss": 0.0042, "step": 406500 }, { "epoch": 4.343287568780384, "grad_norm": 0.003746958216652274, "learning_rate": 7.496157824552977e-07, "loss": 0.0232, "step": 406510 }, { "epoch": 4.343394412094663, "grad_norm": 2.72411847114563, "learning_rate": 7.496012248973762e-07, "loss": 0.0061, "step": 406520 }, { "epoch": 4.343501255408943, "grad_norm": 0.01611150987446308, "learning_rate": 7.495866670576358e-07, "loss": 0.017, "step": 406530 }, { "epoch": 4.343608098723222, "grad_norm": 0.008938359096646309, "learning_rate": 7.495721089360934e-07, "loss": 0.0085, "step": 406540 }, { "epoch": 4.343714942037502, "grad_norm": 1.8020482063293457, "learning_rate": 7.495575505327653e-07, "loss": 0.0135, "step": 406550 }, { "epoch": 4.343821785351782, "grad_norm": 0.002868691924959421, "learning_rate": 7.495429918476677e-07, "loss": 0.0057, "step": 406560 }, { "epoch": 4.343928628666061, "grad_norm": 1.9334161281585693, "learning_rate": 7.495284328808174e-07, "loss": 0.0045, "step": 406570 }, { "epoch": 4.344035471980341, "grad_norm": 5.431342124938965, "learning_rate": 7.495138736322306e-07, "loss": 0.0341, "step": 406580 }, { "epoch": 4.34414231529462, "grad_norm": 5.137434959411621, "learning_rate": 7.494993141019237e-07, "loss": 0.0285, "step": 406590 }, { "epoch": 4.3442491586089, "grad_norm": 0.07688561826944351, "learning_rate": 7.494847542899134e-07, "loss": 0.0043, "step": 406600 }, { "epoch": 4.344356001923179, "grad_norm": 4.197765827178955, "learning_rate": 7.494701941962159e-07, "loss": 0.0073, "step": 406610 }, { "epoch": 4.34446284523746, "grad_norm": 0.12674376368522644, "learning_rate": 7.494556338208477e-07, "loss": 0.0156, "step": 406620 }, { "epoch": 4.344569688551739, "grad_norm": 7.536352634429932, "learning_rate": 7.494410731638253e-07, "loss": 0.0109, "step": 406630 }, { "epoch": 4.3446765318660185, "grad_norm": 0.02626972086727619, "learning_rate": 7.494265122251651e-07, "loss": 0.0002, "step": 406640 }, { "epoch": 4.344783375180298, "grad_norm": 0.001504169893451035, "learning_rate": 7.494119510048835e-07, "loss": 0.0265, "step": 406650 }, { "epoch": 4.3448902184945775, "grad_norm": 0.23779039084911346, "learning_rate": 7.49397389502997e-07, "loss": 0.0314, "step": 406660 }, { "epoch": 4.344997061808857, "grad_norm": 0.01643030345439911, "learning_rate": 7.493828277195221e-07, "loss": 0.003, "step": 406670 }, { "epoch": 4.345103905123137, "grad_norm": 0.0049188449047505856, "learning_rate": 7.493682656544751e-07, "loss": 0.002, "step": 406680 }, { "epoch": 4.345210748437417, "grad_norm": 0.007044763304293156, "learning_rate": 7.493537033078725e-07, "loss": 0.0107, "step": 406690 }, { "epoch": 4.345317591751696, "grad_norm": 0.3241232633590698, "learning_rate": 7.493391406797308e-07, "loss": 0.0181, "step": 406700 }, { "epoch": 4.345424435065976, "grad_norm": 0.03316932171583176, "learning_rate": 7.493245777700664e-07, "loss": 0.0011, "step": 406710 }, { "epoch": 4.345531278380255, "grad_norm": 0.0008057304075919092, "learning_rate": 7.493100145788957e-07, "loss": 0.0052, "step": 406720 }, { "epoch": 4.345638121694535, "grad_norm": 0.030872540548443794, "learning_rate": 7.492954511062352e-07, "loss": 0.0071, "step": 406730 }, { "epoch": 4.345744965008815, "grad_norm": 6.490174293518066, "learning_rate": 7.492808873521013e-07, "loss": 0.0148, "step": 406740 }, { "epoch": 4.345851808323094, "grad_norm": 0.021124716848134995, "learning_rate": 7.492663233165105e-07, "loss": 0.0025, "step": 406750 }, { "epoch": 4.345958651637374, "grad_norm": 0.063575379550457, "learning_rate": 7.492517589994792e-07, "loss": 0.0138, "step": 406760 }, { "epoch": 4.346065494951653, "grad_norm": 6.5926079750061035, "learning_rate": 7.492371944010237e-07, "loss": 0.0151, "step": 406770 }, { "epoch": 4.346172338265933, "grad_norm": 5.940738677978516, "learning_rate": 7.492226295211607e-07, "loss": 0.0073, "step": 406780 }, { "epoch": 4.346279181580212, "grad_norm": 0.035060252994298935, "learning_rate": 7.492080643599066e-07, "loss": 0.0045, "step": 406790 }, { "epoch": 4.3463860248944926, "grad_norm": 4.743886947631836, "learning_rate": 7.491934989172778e-07, "loss": 0.0161, "step": 406800 }, { "epoch": 4.346492868208772, "grad_norm": 0.000737832800950855, "learning_rate": 7.491789331932907e-07, "loss": 0.0002, "step": 406810 }, { "epoch": 4.3465997115230515, "grad_norm": 0.565588116645813, "learning_rate": 7.491643671879617e-07, "loss": 0.0051, "step": 406820 }, { "epoch": 4.346706554837331, "grad_norm": 1.6205222606658936, "learning_rate": 7.491498009013075e-07, "loss": 0.0059, "step": 406830 }, { "epoch": 4.34681339815161, "grad_norm": 0.021128956228494644, "learning_rate": 7.491352343333442e-07, "loss": 0.0074, "step": 406840 }, { "epoch": 4.34692024146589, "grad_norm": 3.781256914138794, "learning_rate": 7.491206674840884e-07, "loss": 0.0093, "step": 406850 }, { "epoch": 4.34702708478017, "grad_norm": 0.3200172781944275, "learning_rate": 7.491061003535567e-07, "loss": 0.0095, "step": 406860 }, { "epoch": 4.34713392809445, "grad_norm": 3.496583938598633, "learning_rate": 7.490915329417653e-07, "loss": 0.0053, "step": 406870 }, { "epoch": 4.347240771408729, "grad_norm": 0.04004179313778877, "learning_rate": 7.490769652487308e-07, "loss": 0.0022, "step": 406880 }, { "epoch": 4.347347614723009, "grad_norm": 8.408432006835938, "learning_rate": 7.490623972744697e-07, "loss": 0.0215, "step": 406890 }, { "epoch": 4.347454458037288, "grad_norm": 0.00898047350347042, "learning_rate": 7.490478290189982e-07, "loss": 0.0017, "step": 406900 }, { "epoch": 4.347561301351568, "grad_norm": 0.0011927345767617226, "learning_rate": 7.490332604823331e-07, "loss": 0.0102, "step": 406910 }, { "epoch": 4.347668144665848, "grad_norm": 5.244032382965088, "learning_rate": 7.490186916644904e-07, "loss": 0.0069, "step": 406920 }, { "epoch": 4.347774987980127, "grad_norm": 7.086475372314453, "learning_rate": 7.490041225654871e-07, "loss": 0.0123, "step": 406930 }, { "epoch": 4.347881831294407, "grad_norm": 2.6933753490448, "learning_rate": 7.489895531853392e-07, "loss": 0.0076, "step": 406940 }, { "epoch": 4.347988674608686, "grad_norm": 0.269737184047699, "learning_rate": 7.489749835240631e-07, "loss": 0.0176, "step": 406950 }, { "epoch": 4.348095517922966, "grad_norm": 0.11650525033473969, "learning_rate": 7.489604135816758e-07, "loss": 0.0025, "step": 406960 }, { "epoch": 4.348202361237245, "grad_norm": 0.003786643035709858, "learning_rate": 7.489458433581931e-07, "loss": 0.003, "step": 406970 }, { "epoch": 4.3483092045515255, "grad_norm": 6.140995979309082, "learning_rate": 7.489312728536318e-07, "loss": 0.0007, "step": 406980 }, { "epoch": 4.348416047865805, "grad_norm": 0.007594959810376167, "learning_rate": 7.489167020680084e-07, "loss": 0.0002, "step": 406990 }, { "epoch": 4.348522891180084, "grad_norm": 0.2747237980365753, "learning_rate": 7.489021310013394e-07, "loss": 0.0112, "step": 407000 }, { "epoch": 4.348629734494364, "grad_norm": 0.01622091792523861, "learning_rate": 7.488875596536409e-07, "loss": 0.0033, "step": 407010 }, { "epoch": 4.348736577808643, "grad_norm": 0.007463207934051752, "learning_rate": 7.488729880249296e-07, "loss": 0.0106, "step": 407020 }, { "epoch": 4.348843421122924, "grad_norm": 0.004143593367189169, "learning_rate": 7.488584161152221e-07, "loss": 0.0067, "step": 407030 }, { "epoch": 4.348950264437203, "grad_norm": 0.0037935455329716206, "learning_rate": 7.488438439245344e-07, "loss": 0.0013, "step": 407040 }, { "epoch": 4.349057107751483, "grad_norm": 0.9493897557258606, "learning_rate": 7.488292714528833e-07, "loss": 0.0186, "step": 407050 }, { "epoch": 4.349163951065762, "grad_norm": 0.011932767927646637, "learning_rate": 7.488146987002852e-07, "loss": 0.018, "step": 407060 }, { "epoch": 4.3492707943800415, "grad_norm": 5.810781478881836, "learning_rate": 7.488001256667566e-07, "loss": 0.0088, "step": 407070 }, { "epoch": 4.349377637694321, "grad_norm": 0.07643180340528488, "learning_rate": 7.487855523523138e-07, "loss": 0.0055, "step": 407080 }, { "epoch": 4.349484481008601, "grad_norm": 0.3215623199939728, "learning_rate": 7.487709787569733e-07, "loss": 0.0007, "step": 407090 }, { "epoch": 4.349591324322881, "grad_norm": 0.3754977285861969, "learning_rate": 7.487564048807516e-07, "loss": 0.0215, "step": 407100 }, { "epoch": 4.34969816763716, "grad_norm": 6.327983379364014, "learning_rate": 7.487418307236653e-07, "loss": 0.0227, "step": 407110 }, { "epoch": 4.34980501095144, "grad_norm": 12.628106117248535, "learning_rate": 7.487272562857305e-07, "loss": 0.0226, "step": 407120 }, { "epoch": 4.349911854265719, "grad_norm": 0.07362965494394302, "learning_rate": 7.48712681566964e-07, "loss": 0.0229, "step": 407130 }, { "epoch": 4.350018697579999, "grad_norm": 0.04178458824753761, "learning_rate": 7.486981065673821e-07, "loss": 0.003, "step": 407140 }, { "epoch": 4.350125540894279, "grad_norm": 0.7456302046775818, "learning_rate": 7.486835312870011e-07, "loss": 0.0044, "step": 407150 }, { "epoch": 4.350232384208558, "grad_norm": 0.010348256677389145, "learning_rate": 7.486689557258377e-07, "loss": 0.0037, "step": 407160 }, { "epoch": 4.350339227522838, "grad_norm": 2.7498111724853516, "learning_rate": 7.486543798839084e-07, "loss": 0.0257, "step": 407170 }, { "epoch": 4.350446070837117, "grad_norm": 1.2776159048080444, "learning_rate": 7.486398037612294e-07, "loss": 0.0437, "step": 407180 }, { "epoch": 4.350552914151397, "grad_norm": 0.041131965816020966, "learning_rate": 7.486252273578173e-07, "loss": 0.0024, "step": 407190 }, { "epoch": 4.350659757465676, "grad_norm": 0.01987260766327381, "learning_rate": 7.486106506736887e-07, "loss": 0.0171, "step": 407200 }, { "epoch": 4.350766600779957, "grad_norm": 0.40408146381378174, "learning_rate": 7.485960737088598e-07, "loss": 0.005, "step": 407210 }, { "epoch": 4.350873444094236, "grad_norm": 0.008588175289332867, "learning_rate": 7.485814964633473e-07, "loss": 0.0239, "step": 407220 }, { "epoch": 4.3509802874085155, "grad_norm": 0.0011233605910092592, "learning_rate": 7.485669189371673e-07, "loss": 0.0199, "step": 407230 }, { "epoch": 4.351087130722795, "grad_norm": 0.02395043708384037, "learning_rate": 7.485523411303366e-07, "loss": 0.0009, "step": 407240 }, { "epoch": 4.351193974037074, "grad_norm": 4.577908515930176, "learning_rate": 7.485377630428715e-07, "loss": 0.0053, "step": 407250 }, { "epoch": 4.351300817351354, "grad_norm": 0.550513505935669, "learning_rate": 7.485231846747887e-07, "loss": 0.0145, "step": 407260 }, { "epoch": 4.351407660665634, "grad_norm": 0.898506224155426, "learning_rate": 7.485086060261042e-07, "loss": 0.0043, "step": 407270 }, { "epoch": 4.351514503979914, "grad_norm": 2.310377597808838, "learning_rate": 7.48494027096835e-07, "loss": 0.0079, "step": 407280 }, { "epoch": 4.351621347294193, "grad_norm": 4.883774757385254, "learning_rate": 7.48479447886997e-07, "loss": 0.0101, "step": 407290 }, { "epoch": 4.351728190608473, "grad_norm": 6.238983154296875, "learning_rate": 7.484648683966071e-07, "loss": 0.0059, "step": 407300 }, { "epoch": 4.351835033922752, "grad_norm": 0.06441453844308853, "learning_rate": 7.484502886256816e-07, "loss": 0.0012, "step": 407310 }, { "epoch": 4.3519418772370315, "grad_norm": 0.05543031543493271, "learning_rate": 7.48435708574237e-07, "loss": 0.0013, "step": 407320 }, { "epoch": 4.352048720551312, "grad_norm": 0.0030493387021124363, "learning_rate": 7.484211282422896e-07, "loss": 0.0051, "step": 407330 }, { "epoch": 4.352155563865591, "grad_norm": 0.8739689588546753, "learning_rate": 7.484065476298562e-07, "loss": 0.0096, "step": 407340 }, { "epoch": 4.352262407179871, "grad_norm": 1.2226108312606812, "learning_rate": 7.48391966736953e-07, "loss": 0.0122, "step": 407350 }, { "epoch": 4.35236925049415, "grad_norm": 3.716918706893921, "learning_rate": 7.483773855635964e-07, "loss": 0.0053, "step": 407360 }, { "epoch": 4.35247609380843, "grad_norm": 0.045530665665864944, "learning_rate": 7.483628041098032e-07, "loss": 0.0081, "step": 407370 }, { "epoch": 4.352582937122709, "grad_norm": 0.006587497424334288, "learning_rate": 7.483482223755895e-07, "loss": 0.0142, "step": 407380 }, { "epoch": 4.3526897804369895, "grad_norm": 0.05298032984137535, "learning_rate": 7.483336403609719e-07, "loss": 0.0144, "step": 407390 }, { "epoch": 4.352796623751269, "grad_norm": 0.42678216099739075, "learning_rate": 7.483190580659668e-07, "loss": 0.0097, "step": 407400 }, { "epoch": 4.352903467065548, "grad_norm": 2.7403018474578857, "learning_rate": 7.483044754905909e-07, "loss": 0.0038, "step": 407410 }, { "epoch": 4.353010310379828, "grad_norm": 0.006988331209868193, "learning_rate": 7.482898926348606e-07, "loss": 0.0084, "step": 407420 }, { "epoch": 4.353117153694107, "grad_norm": 1.8631032705307007, "learning_rate": 7.482753094987922e-07, "loss": 0.0177, "step": 407430 }, { "epoch": 4.353223997008387, "grad_norm": 0.14535801112651825, "learning_rate": 7.48260726082402e-07, "loss": 0.0115, "step": 407440 }, { "epoch": 4.353330840322667, "grad_norm": 0.1056223213672638, "learning_rate": 7.482461423857069e-07, "loss": 0.0163, "step": 407450 }, { "epoch": 4.353437683636947, "grad_norm": 0.6348730325698853, "learning_rate": 7.482315584087232e-07, "loss": 0.012, "step": 407460 }, { "epoch": 4.353544526951226, "grad_norm": 0.03126734495162964, "learning_rate": 7.482169741514672e-07, "loss": 0.0076, "step": 407470 }, { "epoch": 4.3536513702655055, "grad_norm": 2.5850961208343506, "learning_rate": 7.482023896139556e-07, "loss": 0.0044, "step": 407480 }, { "epoch": 4.353758213579785, "grad_norm": 0.8959424495697021, "learning_rate": 7.481878047962048e-07, "loss": 0.0024, "step": 407490 }, { "epoch": 4.3538650568940644, "grad_norm": 2.618953227996826, "learning_rate": 7.481732196982311e-07, "loss": 0.014, "step": 407500 }, { "epoch": 4.353971900208345, "grad_norm": 3.08540940284729, "learning_rate": 7.481586343200513e-07, "loss": 0.0118, "step": 407510 }, { "epoch": 4.354078743522624, "grad_norm": 0.002891204319894314, "learning_rate": 7.481440486616816e-07, "loss": 0.0012, "step": 407520 }, { "epoch": 4.354185586836904, "grad_norm": 1.3222129344940186, "learning_rate": 7.481294627231385e-07, "loss": 0.0039, "step": 407530 }, { "epoch": 4.354292430151183, "grad_norm": 0.04904161021113396, "learning_rate": 7.481148765044385e-07, "loss": 0.0023, "step": 407540 }, { "epoch": 4.354399273465463, "grad_norm": 0.07267214357852936, "learning_rate": 7.481002900055981e-07, "loss": 0.0013, "step": 407550 }, { "epoch": 4.354506116779742, "grad_norm": 0.9546170830726624, "learning_rate": 7.480857032266336e-07, "loss": 0.0006, "step": 407560 }, { "epoch": 4.354612960094022, "grad_norm": 0.020843222737312317, "learning_rate": 7.480711161675619e-07, "loss": 0.0086, "step": 407570 }, { "epoch": 4.354719803408302, "grad_norm": 0.04063067585229874, "learning_rate": 7.48056528828399e-07, "loss": 0.0015, "step": 407580 }, { "epoch": 4.354826646722581, "grad_norm": 0.009432108141481876, "learning_rate": 7.480419412091616e-07, "loss": 0.0012, "step": 407590 }, { "epoch": 4.354933490036861, "grad_norm": 0.9807742834091187, "learning_rate": 7.480273533098661e-07, "loss": 0.002, "step": 407600 }, { "epoch": 4.35504033335114, "grad_norm": 0.006956047844141722, "learning_rate": 7.480127651305291e-07, "loss": 0.0002, "step": 407610 }, { "epoch": 4.355147176665421, "grad_norm": 0.02270849607884884, "learning_rate": 7.479981766711668e-07, "loss": 0.0006, "step": 407620 }, { "epoch": 4.3552540199797, "grad_norm": 0.003026410238817334, "learning_rate": 7.479835879317959e-07, "loss": 0.0002, "step": 407630 }, { "epoch": 4.3553608632939795, "grad_norm": 0.0645965039730072, "learning_rate": 7.47968998912433e-07, "loss": 0.0017, "step": 407640 }, { "epoch": 4.355467706608259, "grad_norm": 0.03969947621226311, "learning_rate": 7.479544096130942e-07, "loss": 0.0003, "step": 407650 }, { "epoch": 4.3555745499225385, "grad_norm": 0.1925073266029358, "learning_rate": 7.479398200337962e-07, "loss": 0.0004, "step": 407660 }, { "epoch": 4.355681393236818, "grad_norm": 0.3722265660762787, "learning_rate": 7.479252301745553e-07, "loss": 0.0011, "step": 407670 }, { "epoch": 4.355788236551097, "grad_norm": 4.314570903778076, "learning_rate": 7.479106400353883e-07, "loss": 0.018, "step": 407680 }, { "epoch": 4.355895079865378, "grad_norm": 4.758393287658691, "learning_rate": 7.478960496163115e-07, "loss": 0.0214, "step": 407690 }, { "epoch": 4.356001923179657, "grad_norm": 0.026925450190901756, "learning_rate": 7.478814589173412e-07, "loss": 0.0095, "step": 407700 }, { "epoch": 4.356108766493937, "grad_norm": 1.9286549091339111, "learning_rate": 7.47866867938494e-07, "loss": 0.0009, "step": 407710 }, { "epoch": 4.356215609808216, "grad_norm": 1.9344722032546997, "learning_rate": 7.478522766797866e-07, "loss": 0.0106, "step": 407720 }, { "epoch": 4.3563224531224956, "grad_norm": 7.166334629058838, "learning_rate": 7.478376851412351e-07, "loss": 0.012, "step": 407730 }, { "epoch": 4.356429296436776, "grad_norm": 9.705079078674316, "learning_rate": 7.478230933228562e-07, "loss": 0.0046, "step": 407740 }, { "epoch": 4.356536139751055, "grad_norm": 0.015139933675527573, "learning_rate": 7.478085012246665e-07, "loss": 0.0079, "step": 407750 }, { "epoch": 4.356642983065335, "grad_norm": 3.366734027862549, "learning_rate": 7.477939088466821e-07, "loss": 0.0044, "step": 407760 }, { "epoch": 4.356749826379614, "grad_norm": 0.0020505920983850956, "learning_rate": 7.477793161889197e-07, "loss": 0.0122, "step": 407770 }, { "epoch": 4.356856669693894, "grad_norm": 4.5011887550354, "learning_rate": 7.477647232513957e-07, "loss": 0.0154, "step": 407780 }, { "epoch": 4.356963513008173, "grad_norm": 0.06492511183023453, "learning_rate": 7.477501300341266e-07, "loss": 0.0072, "step": 407790 }, { "epoch": 4.357070356322453, "grad_norm": 5.255481243133545, "learning_rate": 7.477355365371291e-07, "loss": 0.011, "step": 407800 }, { "epoch": 4.357177199636733, "grad_norm": 3.1024281978607178, "learning_rate": 7.477209427604192e-07, "loss": 0.0041, "step": 407810 }, { "epoch": 4.3572840429510125, "grad_norm": 0.0006342415581457317, "learning_rate": 7.47706348704014e-07, "loss": 0.0221, "step": 407820 }, { "epoch": 4.357390886265292, "grad_norm": 0.0009189933189190924, "learning_rate": 7.476917543679294e-07, "loss": 0.0069, "step": 407830 }, { "epoch": 4.357497729579571, "grad_norm": 2.13421893119812, "learning_rate": 7.476771597521822e-07, "loss": 0.0086, "step": 407840 }, { "epoch": 4.357604572893851, "grad_norm": 0.536540150642395, "learning_rate": 7.476625648567886e-07, "loss": 0.0138, "step": 407850 }, { "epoch": 4.357711416208131, "grad_norm": 0.11101110279560089, "learning_rate": 7.476479696817657e-07, "loss": 0.0077, "step": 407860 }, { "epoch": 4.357818259522411, "grad_norm": 0.002439629752188921, "learning_rate": 7.476333742271291e-07, "loss": 0.0008, "step": 407870 }, { "epoch": 4.35792510283669, "grad_norm": 0.004294797778129578, "learning_rate": 7.47618778492896e-07, "loss": 0.0031, "step": 407880 }, { "epoch": 4.35803194615097, "grad_norm": 0.8629395365715027, "learning_rate": 7.476041824790827e-07, "loss": 0.001, "step": 407890 }, { "epoch": 4.358138789465249, "grad_norm": 0.43121206760406494, "learning_rate": 7.475895861857054e-07, "loss": 0.0037, "step": 407900 }, { "epoch": 4.3582456327795285, "grad_norm": 0.018856562674045563, "learning_rate": 7.475749896127808e-07, "loss": 0.0038, "step": 407910 }, { "epoch": 4.358352476093809, "grad_norm": 0.021263400092720985, "learning_rate": 7.475603927603252e-07, "loss": 0.0047, "step": 407920 }, { "epoch": 4.358459319408088, "grad_norm": 3.613633394241333, "learning_rate": 7.475457956283554e-07, "loss": 0.0112, "step": 407930 }, { "epoch": 4.358566162722368, "grad_norm": 1.100463628768921, "learning_rate": 7.475311982168877e-07, "loss": 0.0095, "step": 407940 }, { "epoch": 4.358673006036647, "grad_norm": 4.935297012329102, "learning_rate": 7.475166005259385e-07, "loss": 0.0054, "step": 407950 }, { "epoch": 4.358779849350927, "grad_norm": 0.42852213978767395, "learning_rate": 7.475020025555245e-07, "loss": 0.0034, "step": 407960 }, { "epoch": 4.358886692665206, "grad_norm": 0.059535298496484756, "learning_rate": 7.47487404305662e-07, "loss": 0.0105, "step": 407970 }, { "epoch": 4.3589935359794865, "grad_norm": 0.0035237157717347145, "learning_rate": 7.474728057763676e-07, "loss": 0.0169, "step": 407980 }, { "epoch": 4.359100379293766, "grad_norm": 0.0035472605377435684, "learning_rate": 7.474582069676577e-07, "loss": 0.0068, "step": 407990 }, { "epoch": 4.359207222608045, "grad_norm": 0.08564425259828568, "learning_rate": 7.474436078795489e-07, "loss": 0.0018, "step": 408000 }, { "epoch": 4.359314065922325, "grad_norm": 0.002082669176161289, "learning_rate": 7.474290085120574e-07, "loss": 0.0019, "step": 408010 }, { "epoch": 4.359420909236604, "grad_norm": 0.013795103877782822, "learning_rate": 7.474144088651999e-07, "loss": 0.0014, "step": 408020 }, { "epoch": 4.359527752550884, "grad_norm": 3.5723187923431396, "learning_rate": 7.473998089389931e-07, "loss": 0.0034, "step": 408030 }, { "epoch": 4.359634595865164, "grad_norm": 0.6820483803749084, "learning_rate": 7.473852087334529e-07, "loss": 0.0181, "step": 408040 }, { "epoch": 4.359741439179444, "grad_norm": 0.41253310441970825, "learning_rate": 7.473706082485965e-07, "loss": 0.0074, "step": 408050 }, { "epoch": 4.359848282493723, "grad_norm": 7.587732791900635, "learning_rate": 7.473560074844398e-07, "loss": 0.0083, "step": 408060 }, { "epoch": 4.3599551258080025, "grad_norm": 0.0003115962608717382, "learning_rate": 7.473414064409994e-07, "loss": 0.0051, "step": 408070 }, { "epoch": 4.360061969122282, "grad_norm": 0.11902176588773727, "learning_rate": 7.47326805118292e-07, "loss": 0.0181, "step": 408080 }, { "epoch": 4.360168812436561, "grad_norm": 13.40947437286377, "learning_rate": 7.47312203516334e-07, "loss": 0.0329, "step": 408090 }, { "epoch": 4.360275655750842, "grad_norm": 0.22849714756011963, "learning_rate": 7.472976016351417e-07, "loss": 0.0047, "step": 408100 }, { "epoch": 4.360382499065121, "grad_norm": 0.004154140595346689, "learning_rate": 7.47282999474732e-07, "loss": 0.0051, "step": 408110 }, { "epoch": 4.360489342379401, "grad_norm": 0.017984231933951378, "learning_rate": 7.47268397035121e-07, "loss": 0.0029, "step": 408120 }, { "epoch": 4.36059618569368, "grad_norm": 1.0634665489196777, "learning_rate": 7.472537943163253e-07, "loss": 0.0021, "step": 408130 }, { "epoch": 4.36070302900796, "grad_norm": 0.000940941390581429, "learning_rate": 7.472391913183615e-07, "loss": 0.0024, "step": 408140 }, { "epoch": 4.360809872322239, "grad_norm": 0.17276109755039215, "learning_rate": 7.472245880412459e-07, "loss": 0.0312, "step": 408150 }, { "epoch": 4.360916715636519, "grad_norm": 0.007327503990381956, "learning_rate": 7.472099844849951e-07, "loss": 0.0014, "step": 408160 }, { "epoch": 4.361023558950799, "grad_norm": 0.0019353078678250313, "learning_rate": 7.471953806496256e-07, "loss": 0.0285, "step": 408170 }, { "epoch": 4.361130402265078, "grad_norm": 3.095616340637207, "learning_rate": 7.471807765351537e-07, "loss": 0.0062, "step": 408180 }, { "epoch": 4.361237245579358, "grad_norm": 0.004069331102073193, "learning_rate": 7.471661721415962e-07, "loss": 0.0061, "step": 408190 }, { "epoch": 4.361344088893637, "grad_norm": 0.3923150599002838, "learning_rate": 7.471515674689694e-07, "loss": 0.0084, "step": 408200 }, { "epoch": 4.361450932207917, "grad_norm": 0.00796834658831358, "learning_rate": 7.471369625172897e-07, "loss": 0.007, "step": 408210 }, { "epoch": 4.361557775522197, "grad_norm": 0.20933233201503754, "learning_rate": 7.471223572865739e-07, "loss": 0.0021, "step": 408220 }, { "epoch": 4.3616646188364765, "grad_norm": 0.00047958464710973203, "learning_rate": 7.471077517768382e-07, "loss": 0.0122, "step": 408230 }, { "epoch": 4.361771462150756, "grad_norm": 0.011271847411990166, "learning_rate": 7.470931459880993e-07, "loss": 0.0154, "step": 408240 }, { "epoch": 4.361878305465035, "grad_norm": 7.859970569610596, "learning_rate": 7.470785399203735e-07, "loss": 0.0159, "step": 408250 }, { "epoch": 4.361985148779315, "grad_norm": 0.017783524468541145, "learning_rate": 7.470639335736775e-07, "loss": 0.0026, "step": 408260 }, { "epoch": 4.362091992093594, "grad_norm": 0.019394101575016975, "learning_rate": 7.470493269480275e-07, "loss": 0.0066, "step": 408270 }, { "epoch": 4.362198835407875, "grad_norm": 0.07062795013189316, "learning_rate": 7.470347200434404e-07, "loss": 0.0014, "step": 408280 }, { "epoch": 4.362305678722154, "grad_norm": 0.016307571902871132, "learning_rate": 7.470201128599323e-07, "loss": 0.0067, "step": 408290 }, { "epoch": 4.362412522036434, "grad_norm": 0.01525591779500246, "learning_rate": 7.470055053975198e-07, "loss": 0.0001, "step": 408300 }, { "epoch": 4.362519365350713, "grad_norm": 2.1293275356292725, "learning_rate": 7.469908976562196e-07, "loss": 0.0026, "step": 408310 }, { "epoch": 4.3626262086649925, "grad_norm": 0.01705819182097912, "learning_rate": 7.46976289636048e-07, "loss": 0.001, "step": 408320 }, { "epoch": 4.362733051979273, "grad_norm": 4.132820129394531, "learning_rate": 7.469616813370214e-07, "loss": 0.0038, "step": 408330 }, { "epoch": 4.362839895293552, "grad_norm": 0.014395919628441334, "learning_rate": 7.469470727591564e-07, "loss": 0.0244, "step": 408340 }, { "epoch": 4.362946738607832, "grad_norm": 0.19668631255626678, "learning_rate": 7.469324639024697e-07, "loss": 0.0142, "step": 408350 }, { "epoch": 4.363053581922111, "grad_norm": 0.010794839821755886, "learning_rate": 7.469178547669774e-07, "loss": 0.0053, "step": 408360 }, { "epoch": 4.363160425236391, "grad_norm": 0.031837813556194305, "learning_rate": 7.469032453526965e-07, "loss": 0.006, "step": 408370 }, { "epoch": 4.36326726855067, "grad_norm": 0.5394162535667419, "learning_rate": 7.468886356596429e-07, "loss": 0.0049, "step": 408380 }, { "epoch": 4.36337411186495, "grad_norm": 3.979067802429199, "learning_rate": 7.468740256878336e-07, "loss": 0.0269, "step": 408390 }, { "epoch": 4.36348095517923, "grad_norm": 0.0011554080992937088, "learning_rate": 7.468594154372849e-07, "loss": 0.0228, "step": 408400 }, { "epoch": 4.363587798493509, "grad_norm": 0.004920061212033033, "learning_rate": 7.468448049080132e-07, "loss": 0.0026, "step": 408410 }, { "epoch": 4.363694641807789, "grad_norm": 0.0364406555891037, "learning_rate": 7.468301941000351e-07, "loss": 0.0402, "step": 408420 }, { "epoch": 4.363801485122068, "grad_norm": 1.6713330745697021, "learning_rate": 7.468155830133671e-07, "loss": 0.0221, "step": 408430 }, { "epoch": 4.363908328436348, "grad_norm": 0.0697326809167862, "learning_rate": 7.468009716480255e-07, "loss": 0.0004, "step": 408440 }, { "epoch": 4.364015171750628, "grad_norm": 0.1432689130306244, "learning_rate": 7.467863600040272e-07, "loss": 0.0076, "step": 408450 }, { "epoch": 4.364122015064908, "grad_norm": 0.0034986964892596006, "learning_rate": 7.467717480813884e-07, "loss": 0.0027, "step": 408460 }, { "epoch": 4.364228858379187, "grad_norm": 0.0012453807285055518, "learning_rate": 7.467571358801257e-07, "loss": 0.0038, "step": 408470 }, { "epoch": 4.3643357016934665, "grad_norm": 17.96581268310547, "learning_rate": 7.467425234002555e-07, "loss": 0.0127, "step": 408480 }, { "epoch": 4.364442545007746, "grad_norm": 1.8223243951797485, "learning_rate": 7.467279106417945e-07, "loss": 0.011, "step": 408490 }, { "epoch": 4.364549388322025, "grad_norm": 0.15122836828231812, "learning_rate": 7.46713297604759e-07, "loss": 0.007, "step": 408500 }, { "epoch": 4.364656231636305, "grad_norm": 0.0014533615903928876, "learning_rate": 7.466986842891656e-07, "loss": 0.0202, "step": 408510 }, { "epoch": 4.364763074950585, "grad_norm": 0.5903444290161133, "learning_rate": 7.466840706950307e-07, "loss": 0.0112, "step": 408520 }, { "epoch": 4.364869918264865, "grad_norm": 0.0033108049537986517, "learning_rate": 7.466694568223708e-07, "loss": 0.0025, "step": 408530 }, { "epoch": 4.364976761579144, "grad_norm": 0.019607270136475563, "learning_rate": 7.466548426712026e-07, "loss": 0.0098, "step": 408540 }, { "epoch": 4.365083604893424, "grad_norm": 0.010717079043388367, "learning_rate": 7.466402282415424e-07, "loss": 0.0014, "step": 408550 }, { "epoch": 4.365190448207703, "grad_norm": 0.8464290499687195, "learning_rate": 7.466256135334069e-07, "loss": 0.0053, "step": 408560 }, { "epoch": 4.365297291521983, "grad_norm": 2.2904014587402344, "learning_rate": 7.466109985468122e-07, "loss": 0.0198, "step": 408570 }, { "epoch": 4.365404134836263, "grad_norm": 1.6740577220916748, "learning_rate": 7.465963832817752e-07, "loss": 0.0012, "step": 408580 }, { "epoch": 4.365510978150542, "grad_norm": 0.02430049516260624, "learning_rate": 7.465817677383123e-07, "loss": 0.0047, "step": 408590 }, { "epoch": 4.365617821464822, "grad_norm": 4.610909938812256, "learning_rate": 7.465671519164399e-07, "loss": 0.0228, "step": 408600 }, { "epoch": 4.365724664779101, "grad_norm": 0.0017861933447420597, "learning_rate": 7.465525358161746e-07, "loss": 0.0151, "step": 408610 }, { "epoch": 4.365831508093381, "grad_norm": 0.5387423634529114, "learning_rate": 7.465379194375329e-07, "loss": 0.0147, "step": 408620 }, { "epoch": 4.365938351407661, "grad_norm": 0.008679697290062904, "learning_rate": 7.465233027805314e-07, "loss": 0.0083, "step": 408630 }, { "epoch": 4.3660451947219405, "grad_norm": 0.47462350130081177, "learning_rate": 7.465086858451863e-07, "loss": 0.0086, "step": 408640 }, { "epoch": 4.36615203803622, "grad_norm": 0.8861795663833618, "learning_rate": 7.464940686315146e-07, "loss": 0.0201, "step": 408650 }, { "epoch": 4.366258881350499, "grad_norm": 0.019486041739583015, "learning_rate": 7.464794511395322e-07, "loss": 0.0193, "step": 408660 }, { "epoch": 4.366365724664779, "grad_norm": 0.018068093806505203, "learning_rate": 7.46464833369256e-07, "loss": 0.0043, "step": 408670 }, { "epoch": 4.366472567979058, "grad_norm": 0.7238363027572632, "learning_rate": 7.464502153207022e-07, "loss": 0.011, "step": 408680 }, { "epoch": 4.366579411293339, "grad_norm": 8.10653305053711, "learning_rate": 7.464355969938878e-07, "loss": 0.0242, "step": 408690 }, { "epoch": 4.366686254607618, "grad_norm": 0.007905611768364906, "learning_rate": 7.464209783888289e-07, "loss": 0.0003, "step": 408700 }, { "epoch": 4.366793097921898, "grad_norm": 10.03791332244873, "learning_rate": 7.464063595055421e-07, "loss": 0.0258, "step": 408710 }, { "epoch": 4.366899941236177, "grad_norm": 2.2205958366394043, "learning_rate": 7.46391740344044e-07, "loss": 0.0112, "step": 408720 }, { "epoch": 4.3670067845504565, "grad_norm": 0.577629029750824, "learning_rate": 7.463771209043509e-07, "loss": 0.0053, "step": 408730 }, { "epoch": 4.367113627864736, "grad_norm": 0.1301993727684021, "learning_rate": 7.463625011864795e-07, "loss": 0.0025, "step": 408740 }, { "epoch": 4.367220471179016, "grad_norm": 0.6253333687782288, "learning_rate": 7.463478811904463e-07, "loss": 0.0213, "step": 408750 }, { "epoch": 4.367327314493296, "grad_norm": 3.1550943851470947, "learning_rate": 7.463332609162676e-07, "loss": 0.0007, "step": 408760 }, { "epoch": 4.367434157807575, "grad_norm": 0.7934465408325195, "learning_rate": 7.463186403639602e-07, "loss": 0.0029, "step": 408770 }, { "epoch": 4.367541001121855, "grad_norm": 0.02372482605278492, "learning_rate": 7.463040195335404e-07, "loss": 0.002, "step": 408780 }, { "epoch": 4.367647844436134, "grad_norm": 0.006671660114079714, "learning_rate": 7.462893984250246e-07, "loss": 0.0165, "step": 408790 }, { "epoch": 4.367754687750414, "grad_norm": 12.926223754882812, "learning_rate": 7.462747770384299e-07, "loss": 0.0137, "step": 408800 }, { "epoch": 4.367861531064694, "grad_norm": 4.6535563468933105, "learning_rate": 7.46260155373772e-07, "loss": 0.014, "step": 408810 }, { "epoch": 4.3679683743789735, "grad_norm": 0.19753333926200867, "learning_rate": 7.46245533431068e-07, "loss": 0.001, "step": 408820 }, { "epoch": 4.368075217693253, "grad_norm": 0.2970312237739563, "learning_rate": 7.462309112103342e-07, "loss": 0.0016, "step": 408830 }, { "epoch": 4.368182061007532, "grad_norm": 0.1176815778017044, "learning_rate": 7.46216288711587e-07, "loss": 0.0128, "step": 408840 }, { "epoch": 4.368288904321812, "grad_norm": 0.0009959990857169032, "learning_rate": 7.462016659348431e-07, "loss": 0.0076, "step": 408850 }, { "epoch": 4.368395747636091, "grad_norm": 13.657305717468262, "learning_rate": 7.46187042880119e-07, "loss": 0.0136, "step": 408860 }, { "epoch": 4.368502590950372, "grad_norm": 0.021645154803991318, "learning_rate": 7.46172419547431e-07, "loss": 0.0024, "step": 408870 }, { "epoch": 4.368609434264651, "grad_norm": 0.059889163821935654, "learning_rate": 7.461577959367958e-07, "loss": 0.0154, "step": 408880 }, { "epoch": 4.3687162775789306, "grad_norm": 0.00386129692196846, "learning_rate": 7.4614317204823e-07, "loss": 0.0066, "step": 408890 }, { "epoch": 4.36882312089321, "grad_norm": 0.2612663805484772, "learning_rate": 7.461285478817498e-07, "loss": 0.039, "step": 408900 }, { "epoch": 4.3689299642074895, "grad_norm": 0.005234900861978531, "learning_rate": 7.461139234373721e-07, "loss": 0.0034, "step": 408910 }, { "epoch": 4.369036807521769, "grad_norm": 3.2560081481933594, "learning_rate": 7.460992987151131e-07, "loss": 0.0037, "step": 408920 }, { "epoch": 4.369143650836049, "grad_norm": 0.003983594477176666, "learning_rate": 7.460846737149893e-07, "loss": 0.0051, "step": 408930 }, { "epoch": 4.369250494150329, "grad_norm": 0.000783145020250231, "learning_rate": 7.460700484370175e-07, "loss": 0.0087, "step": 408940 }, { "epoch": 4.369357337464608, "grad_norm": 1.7322025299072266, "learning_rate": 7.46055422881214e-07, "loss": 0.0051, "step": 408950 }, { "epoch": 4.369464180778888, "grad_norm": 0.012338675558567047, "learning_rate": 7.460407970475952e-07, "loss": 0.0058, "step": 408960 }, { "epoch": 4.369571024093167, "grad_norm": 0.0009160039480775595, "learning_rate": 7.460261709361781e-07, "loss": 0.01, "step": 408970 }, { "epoch": 4.369677867407447, "grad_norm": 1.6233292818069458, "learning_rate": 7.460115445469786e-07, "loss": 0.009, "step": 408980 }, { "epoch": 4.369784710721727, "grad_norm": 3.1073110103607178, "learning_rate": 7.459969178800136e-07, "loss": 0.0049, "step": 408990 }, { "epoch": 4.369891554036006, "grad_norm": 2.3976402282714844, "learning_rate": 7.459822909352997e-07, "loss": 0.0012, "step": 409000 }, { "epoch": 4.369998397350286, "grad_norm": 0.008870412595570087, "learning_rate": 7.459676637128529e-07, "loss": 0.004, "step": 409010 }, { "epoch": 4.370105240664565, "grad_norm": 0.5958292484283447, "learning_rate": 7.459530362126902e-07, "loss": 0.0041, "step": 409020 }, { "epoch": 4.370212083978845, "grad_norm": 0.06701725721359253, "learning_rate": 7.459384084348279e-07, "loss": 0.0049, "step": 409030 }, { "epoch": 4.370318927293125, "grad_norm": 0.0007977205677889287, "learning_rate": 7.459237803792826e-07, "loss": 0.0135, "step": 409040 }, { "epoch": 4.370425770607405, "grad_norm": 0.0823666900396347, "learning_rate": 7.459091520460708e-07, "loss": 0.0142, "step": 409050 }, { "epoch": 4.370532613921684, "grad_norm": 0.001295079360716045, "learning_rate": 7.458945234352091e-07, "loss": 0.0023, "step": 409060 }, { "epoch": 4.3706394572359635, "grad_norm": 0.18665246665477753, "learning_rate": 7.458798945467138e-07, "loss": 0.0041, "step": 409070 }, { "epoch": 4.370746300550243, "grad_norm": 0.7333768010139465, "learning_rate": 7.458652653806017e-07, "loss": 0.0083, "step": 409080 }, { "epoch": 4.370853143864522, "grad_norm": 0.0021726100239902735, "learning_rate": 7.458506359368891e-07, "loss": 0.0044, "step": 409090 }, { "epoch": 4.370959987178802, "grad_norm": 0.021900886669754982, "learning_rate": 7.458360062155924e-07, "loss": 0.0051, "step": 409100 }, { "epoch": 4.371066830493082, "grad_norm": 0.07672546058893204, "learning_rate": 7.458213762167285e-07, "loss": 0.0019, "step": 409110 }, { "epoch": 4.371173673807362, "grad_norm": 0.26009827852249146, "learning_rate": 7.458067459403135e-07, "loss": 0.0077, "step": 409120 }, { "epoch": 4.371280517121641, "grad_norm": 0.005503224674612284, "learning_rate": 7.457921153863642e-07, "loss": 0.0115, "step": 409130 }, { "epoch": 4.371387360435921, "grad_norm": 0.006203196942806244, "learning_rate": 7.457774845548972e-07, "loss": 0.0125, "step": 409140 }, { "epoch": 4.3714942037502, "grad_norm": 0.011714168824255466, "learning_rate": 7.457628534459287e-07, "loss": 0.0055, "step": 409150 }, { "epoch": 4.37160104706448, "grad_norm": 0.05362023040652275, "learning_rate": 7.457482220594755e-07, "loss": 0.0164, "step": 409160 }, { "epoch": 4.37170789037876, "grad_norm": 0.014157626777887344, "learning_rate": 7.457335903955539e-07, "loss": 0.009, "step": 409170 }, { "epoch": 4.371814733693039, "grad_norm": 0.03730364516377449, "learning_rate": 7.457189584541807e-07, "loss": 0.021, "step": 409180 }, { "epoch": 4.371921577007319, "grad_norm": 0.015500001609325409, "learning_rate": 7.457043262353722e-07, "loss": 0.001, "step": 409190 }, { "epoch": 4.372028420321598, "grad_norm": 0.036683693528175354, "learning_rate": 7.456896937391449e-07, "loss": 0.0007, "step": 409200 }, { "epoch": 4.372135263635878, "grad_norm": 6.848859786987305, "learning_rate": 7.456750609655154e-07, "loss": 0.0083, "step": 409210 }, { "epoch": 4.372242106950157, "grad_norm": 0.49532583355903625, "learning_rate": 7.456604279145002e-07, "loss": 0.0133, "step": 409220 }, { "epoch": 4.3723489502644375, "grad_norm": 0.11039391160011292, "learning_rate": 7.456457945861159e-07, "loss": 0.0147, "step": 409230 }, { "epoch": 4.372455793578717, "grad_norm": 0.04877655580639839, "learning_rate": 7.45631160980379e-07, "loss": 0.0157, "step": 409240 }, { "epoch": 4.372562636892996, "grad_norm": 0.08042852580547333, "learning_rate": 7.456165270973058e-07, "loss": 0.0036, "step": 409250 }, { "epoch": 4.372669480207276, "grad_norm": 0.009423048235476017, "learning_rate": 7.456018929369132e-07, "loss": 0.002, "step": 409260 }, { "epoch": 4.372776323521555, "grad_norm": 2.530308485031128, "learning_rate": 7.455872584992173e-07, "loss": 0.0011, "step": 409270 }, { "epoch": 4.372883166835836, "grad_norm": 0.02739749103784561, "learning_rate": 7.455726237842351e-07, "loss": 0.0013, "step": 409280 }, { "epoch": 4.372990010150115, "grad_norm": 0.0036773125175386667, "learning_rate": 7.455579887919829e-07, "loss": 0.0051, "step": 409290 }, { "epoch": 4.373096853464395, "grad_norm": 0.012092906050384045, "learning_rate": 7.455433535224768e-07, "loss": 0.0155, "step": 409300 }, { "epoch": 4.373203696778674, "grad_norm": 18.971200942993164, "learning_rate": 7.455287179757341e-07, "loss": 0.0163, "step": 409310 }, { "epoch": 4.3733105400929535, "grad_norm": 0.0218738354742527, "learning_rate": 7.455140821517709e-07, "loss": 0.0022, "step": 409320 }, { "epoch": 4.373417383407233, "grad_norm": 0.014290724880993366, "learning_rate": 7.454994460506037e-07, "loss": 0.0093, "step": 409330 }, { "epoch": 4.373524226721513, "grad_norm": 0.17520004510879517, "learning_rate": 7.454848096722491e-07, "loss": 0.0416, "step": 409340 }, { "epoch": 4.373631070035793, "grad_norm": 0.2094319760799408, "learning_rate": 7.454701730167235e-07, "loss": 0.0009, "step": 409350 }, { "epoch": 4.373737913350072, "grad_norm": 0.004199606366455555, "learning_rate": 7.454555360840437e-07, "loss": 0.0004, "step": 409360 }, { "epoch": 4.373844756664352, "grad_norm": 0.6507675647735596, "learning_rate": 7.454408988742262e-07, "loss": 0.0068, "step": 409370 }, { "epoch": 4.373951599978631, "grad_norm": 0.9323941469192505, "learning_rate": 7.454262613872871e-07, "loss": 0.0045, "step": 409380 }, { "epoch": 4.374058443292911, "grad_norm": 0.003436888102442026, "learning_rate": 7.454116236232435e-07, "loss": 0.001, "step": 409390 }, { "epoch": 4.374165286607191, "grad_norm": 3.6503515243530273, "learning_rate": 7.453969855821115e-07, "loss": 0.0043, "step": 409400 }, { "epoch": 4.37427212992147, "grad_norm": 0.017328260466456413, "learning_rate": 7.453823472639078e-07, "loss": 0.005, "step": 409410 }, { "epoch": 4.37437897323575, "grad_norm": 0.014663227833807468, "learning_rate": 7.453677086686489e-07, "loss": 0.0055, "step": 409420 }, { "epoch": 4.374485816550029, "grad_norm": 0.012453624047338963, "learning_rate": 7.453530697963515e-07, "loss": 0.0006, "step": 409430 }, { "epoch": 4.374592659864309, "grad_norm": 0.15342144668102264, "learning_rate": 7.453384306470317e-07, "loss": 0.0014, "step": 409440 }, { "epoch": 4.374699503178588, "grad_norm": 0.006410188507288694, "learning_rate": 7.453237912207064e-07, "loss": 0.0033, "step": 409450 }, { "epoch": 4.374806346492869, "grad_norm": 2.3255820274353027, "learning_rate": 7.453091515173921e-07, "loss": 0.0159, "step": 409460 }, { "epoch": 4.374913189807148, "grad_norm": 9.627211570739746, "learning_rate": 7.452945115371052e-07, "loss": 0.0096, "step": 409470 }, { "epoch": 4.3750200331214275, "grad_norm": 0.10252183675765991, "learning_rate": 7.452798712798622e-07, "loss": 0.0002, "step": 409480 }, { "epoch": 4.375126876435707, "grad_norm": 0.06468590348958969, "learning_rate": 7.452652307456798e-07, "loss": 0.0176, "step": 409490 }, { "epoch": 4.375233719749986, "grad_norm": 0.1948866844177246, "learning_rate": 7.452505899345744e-07, "loss": 0.002, "step": 409500 }, { "epoch": 4.375340563064266, "grad_norm": 0.0032764130737632513, "learning_rate": 7.452359488465627e-07, "loss": 0.0176, "step": 409510 }, { "epoch": 4.375447406378546, "grad_norm": 0.0562153160572052, "learning_rate": 7.45221307481661e-07, "loss": 0.0666, "step": 409520 }, { "epoch": 4.375554249692826, "grad_norm": 1.639473557472229, "learning_rate": 7.452066658398859e-07, "loss": 0.0065, "step": 409530 }, { "epoch": 4.375661093007105, "grad_norm": 1.565122127532959, "learning_rate": 7.451920239212542e-07, "loss": 0.0094, "step": 409540 }, { "epoch": 4.375767936321385, "grad_norm": 0.003403750015422702, "learning_rate": 7.451773817257819e-07, "loss": 0.0101, "step": 409550 }, { "epoch": 4.375874779635664, "grad_norm": 0.8265910744667053, "learning_rate": 7.45162739253486e-07, "loss": 0.0097, "step": 409560 }, { "epoch": 4.3759816229499435, "grad_norm": 0.015043447725474834, "learning_rate": 7.451480965043829e-07, "loss": 0.0007, "step": 409570 }, { "epoch": 4.376088466264224, "grad_norm": 14.558725357055664, "learning_rate": 7.451334534784888e-07, "loss": 0.0107, "step": 409580 }, { "epoch": 4.376195309578503, "grad_norm": 0.0019992340821772814, "learning_rate": 7.451188101758208e-07, "loss": 0.0028, "step": 409590 }, { "epoch": 4.376302152892783, "grad_norm": 3.016789674758911, "learning_rate": 7.451041665963952e-07, "loss": 0.0114, "step": 409600 }, { "epoch": 4.376408996207062, "grad_norm": 0.44704362750053406, "learning_rate": 7.450895227402283e-07, "loss": 0.0099, "step": 409610 }, { "epoch": 4.376515839521342, "grad_norm": 0.0014834213070571423, "learning_rate": 7.45074878607337e-07, "loss": 0.0058, "step": 409620 }, { "epoch": 4.376622682835621, "grad_norm": 0.02930617332458496, "learning_rate": 7.450602341977376e-07, "loss": 0.0245, "step": 409630 }, { "epoch": 4.3767295261499015, "grad_norm": 0.05142521113157272, "learning_rate": 7.450455895114467e-07, "loss": 0.0099, "step": 409640 }, { "epoch": 4.376836369464181, "grad_norm": 0.030594700947403908, "learning_rate": 7.450309445484808e-07, "loss": 0.022, "step": 409650 }, { "epoch": 4.37694321277846, "grad_norm": 0.2506679892539978, "learning_rate": 7.450162993088566e-07, "loss": 0.0007, "step": 409660 }, { "epoch": 4.37705005609274, "grad_norm": 1.6525062322616577, "learning_rate": 7.450016537925903e-07, "loss": 0.0009, "step": 409670 }, { "epoch": 4.377156899407019, "grad_norm": 0.03395812585949898, "learning_rate": 7.449870079996988e-07, "loss": 0.0076, "step": 409680 }, { "epoch": 4.377263742721299, "grad_norm": 0.017064783722162247, "learning_rate": 7.449723619301985e-07, "loss": 0.0252, "step": 409690 }, { "epoch": 4.377370586035579, "grad_norm": 0.015475068241357803, "learning_rate": 7.449577155841058e-07, "loss": 0.0074, "step": 409700 }, { "epoch": 4.377477429349859, "grad_norm": 0.7291357517242432, "learning_rate": 7.449430689614375e-07, "loss": 0.0269, "step": 409710 }, { "epoch": 4.377584272664138, "grad_norm": 1.6444063186645508, "learning_rate": 7.449284220622098e-07, "loss": 0.0011, "step": 409720 }, { "epoch": 4.3776911159784175, "grad_norm": 0.11833366006612778, "learning_rate": 7.449137748864395e-07, "loss": 0.0043, "step": 409730 }, { "epoch": 4.377797959292697, "grad_norm": 0.00239307782612741, "learning_rate": 7.448991274341431e-07, "loss": 0.006, "step": 409740 }, { "epoch": 4.3779048026069765, "grad_norm": 3.693743944168091, "learning_rate": 7.448844797053371e-07, "loss": 0.014, "step": 409750 }, { "epoch": 4.378011645921257, "grad_norm": 0.011119759641587734, "learning_rate": 7.448698317000379e-07, "loss": 0.0322, "step": 409760 }, { "epoch": 4.378118489235536, "grad_norm": 1.3461087942123413, "learning_rate": 7.448551834182623e-07, "loss": 0.0064, "step": 409770 }, { "epoch": 4.378225332549816, "grad_norm": 4.895817756652832, "learning_rate": 7.448405348600268e-07, "loss": 0.0405, "step": 409780 }, { "epoch": 4.378332175864095, "grad_norm": 10.606474876403809, "learning_rate": 7.448258860253478e-07, "loss": 0.0112, "step": 409790 }, { "epoch": 4.378439019178375, "grad_norm": 1.6251513957977295, "learning_rate": 7.448112369142418e-07, "loss": 0.0013, "step": 409800 }, { "epoch": 4.378545862492654, "grad_norm": 0.0176936574280262, "learning_rate": 7.447965875267255e-07, "loss": 0.0148, "step": 409810 }, { "epoch": 4.378652705806934, "grad_norm": 1.3431118726730347, "learning_rate": 7.447819378628153e-07, "loss": 0.0033, "step": 409820 }, { "epoch": 4.378759549121214, "grad_norm": 0.006514163222163916, "learning_rate": 7.44767287922528e-07, "loss": 0.0241, "step": 409830 }, { "epoch": 4.378866392435493, "grad_norm": 0.01539213489741087, "learning_rate": 7.447526377058799e-07, "loss": 0.0192, "step": 409840 }, { "epoch": 4.378973235749773, "grad_norm": 0.004791216924786568, "learning_rate": 7.447379872128875e-07, "loss": 0.0099, "step": 409850 }, { "epoch": 4.379080079064052, "grad_norm": 0.12056487798690796, "learning_rate": 7.447233364435675e-07, "loss": 0.0011, "step": 409860 }, { "epoch": 4.379186922378333, "grad_norm": 2.490441083908081, "learning_rate": 7.447086853979363e-07, "loss": 0.0259, "step": 409870 }, { "epoch": 4.379293765692612, "grad_norm": 0.1156432256102562, "learning_rate": 7.446940340760107e-07, "loss": 0.0031, "step": 409880 }, { "epoch": 4.3794006090068915, "grad_norm": 0.00359725602902472, "learning_rate": 7.446793824778071e-07, "loss": 0.0157, "step": 409890 }, { "epoch": 4.379507452321171, "grad_norm": 0.06360000371932983, "learning_rate": 7.446647306033418e-07, "loss": 0.0162, "step": 409900 }, { "epoch": 4.3796142956354505, "grad_norm": 0.146302729845047, "learning_rate": 7.446500784526316e-07, "loss": 0.0359, "step": 409910 }, { "epoch": 4.37972113894973, "grad_norm": 1.206688404083252, "learning_rate": 7.446354260256931e-07, "loss": 0.0051, "step": 409920 }, { "epoch": 4.379827982264009, "grad_norm": 0.2606537640094757, "learning_rate": 7.446207733225426e-07, "loss": 0.0025, "step": 409930 }, { "epoch": 4.37993482557829, "grad_norm": 22.32881736755371, "learning_rate": 7.446061203431969e-07, "loss": 0.0022, "step": 409940 }, { "epoch": 4.380041668892569, "grad_norm": 0.003787491237744689, "learning_rate": 7.445914670876723e-07, "loss": 0.0065, "step": 409950 }, { "epoch": 4.380148512206849, "grad_norm": 1.4552762508392334, "learning_rate": 7.445768135559855e-07, "loss": 0.0149, "step": 409960 }, { "epoch": 4.380255355521128, "grad_norm": 0.08971177786588669, "learning_rate": 7.445621597481533e-07, "loss": 0.0016, "step": 409970 }, { "epoch": 4.380362198835408, "grad_norm": 15.669358253479004, "learning_rate": 7.445475056641917e-07, "loss": 0.0208, "step": 409980 }, { "epoch": 4.380469042149688, "grad_norm": 0.019126256927847862, "learning_rate": 7.445328513041176e-07, "loss": 0.0019, "step": 409990 }, { "epoch": 4.380575885463967, "grad_norm": 1.3055912256240845, "learning_rate": 7.445181966679474e-07, "loss": 0.0091, "step": 410000 }, { "epoch": 4.380682728778247, "grad_norm": 0.10541027039289474, "learning_rate": 7.445035417556977e-07, "loss": 0.0014, "step": 410010 }, { "epoch": 4.380789572092526, "grad_norm": 0.9107683897018433, "learning_rate": 7.444888865673851e-07, "loss": 0.0163, "step": 410020 }, { "epoch": 4.380896415406806, "grad_norm": 0.0010146587155759335, "learning_rate": 7.44474231103026e-07, "loss": 0.0036, "step": 410030 }, { "epoch": 4.381003258721085, "grad_norm": 2.4413583278656006, "learning_rate": 7.444595753626371e-07, "loss": 0.0087, "step": 410040 }, { "epoch": 4.3811101020353655, "grad_norm": 7.938812732696533, "learning_rate": 7.444449193462349e-07, "loss": 0.0246, "step": 410050 }, { "epoch": 4.381216945349645, "grad_norm": 0.024364881217479706, "learning_rate": 7.444302630538359e-07, "loss": 0.0031, "step": 410060 }, { "epoch": 4.3813237886639245, "grad_norm": 0.006251469720155001, "learning_rate": 7.444156064854567e-07, "loss": 0.0009, "step": 410070 }, { "epoch": 4.381430631978204, "grad_norm": 0.0021007664036005735, "learning_rate": 7.444009496411139e-07, "loss": 0.0127, "step": 410080 }, { "epoch": 4.381537475292483, "grad_norm": 0.0045603197067976, "learning_rate": 7.44386292520824e-07, "loss": 0.0071, "step": 410090 }, { "epoch": 4.381644318606763, "grad_norm": 0.02333647757768631, "learning_rate": 7.443716351246034e-07, "loss": 0.0203, "step": 410100 }, { "epoch": 4.381751161921043, "grad_norm": 2.2417500019073486, "learning_rate": 7.443569774524687e-07, "loss": 0.0087, "step": 410110 }, { "epoch": 4.381858005235323, "grad_norm": 0.01282715518027544, "learning_rate": 7.443423195044367e-07, "loss": 0.0006, "step": 410120 }, { "epoch": 4.381964848549602, "grad_norm": 4.69230318069458, "learning_rate": 7.443276612805238e-07, "loss": 0.0063, "step": 410130 }, { "epoch": 4.382071691863882, "grad_norm": 0.0014851090963929892, "learning_rate": 7.443130027807463e-07, "loss": 0.0131, "step": 410140 }, { "epoch": 4.382178535178161, "grad_norm": 0.46950700879096985, "learning_rate": 7.442983440051213e-07, "loss": 0.0031, "step": 410150 }, { "epoch": 4.3822853784924405, "grad_norm": 0.12718871235847473, "learning_rate": 7.442836849536648e-07, "loss": 0.0065, "step": 410160 }, { "epoch": 4.382392221806721, "grad_norm": 0.0018039734568446875, "learning_rate": 7.442690256263937e-07, "loss": 0.0094, "step": 410170 }, { "epoch": 4.382499065121, "grad_norm": 5.1024370193481445, "learning_rate": 7.442543660233244e-07, "loss": 0.0133, "step": 410180 }, { "epoch": 4.38260590843528, "grad_norm": 0.32589784264564514, "learning_rate": 7.442397061444734e-07, "loss": 0.0102, "step": 410190 }, { "epoch": 4.382712751749559, "grad_norm": 1.4799838066101074, "learning_rate": 7.442250459898575e-07, "loss": 0.0031, "step": 410200 }, { "epoch": 4.382819595063839, "grad_norm": 0.024849260225892067, "learning_rate": 7.44210385559493e-07, "loss": 0.0162, "step": 410210 }, { "epoch": 4.382926438378118, "grad_norm": 0.0026501198299229145, "learning_rate": 7.441957248533966e-07, "loss": 0.0034, "step": 410220 }, { "epoch": 4.3830332816923985, "grad_norm": 0.010981699451804161, "learning_rate": 7.441810638715848e-07, "loss": 0.0138, "step": 410230 }, { "epoch": 4.383140125006678, "grad_norm": 4.27268648147583, "learning_rate": 7.44166402614074e-07, "loss": 0.0026, "step": 410240 }, { "epoch": 4.383246968320957, "grad_norm": 0.032959435135126114, "learning_rate": 7.441517410808811e-07, "loss": 0.0105, "step": 410250 }, { "epoch": 4.383353811635237, "grad_norm": 0.11740487068891525, "learning_rate": 7.441370792720224e-07, "loss": 0.002, "step": 410260 }, { "epoch": 4.383460654949516, "grad_norm": 0.03482243791222572, "learning_rate": 7.441224171875145e-07, "loss": 0.0201, "step": 410270 }, { "epoch": 4.383567498263796, "grad_norm": 1.0950356721878052, "learning_rate": 7.441077548273739e-07, "loss": 0.0273, "step": 410280 }, { "epoch": 4.383674341578076, "grad_norm": 0.005289306864142418, "learning_rate": 7.440930921916173e-07, "loss": 0.0002, "step": 410290 }, { "epoch": 4.383781184892356, "grad_norm": 3.8722951412200928, "learning_rate": 7.440784292802611e-07, "loss": 0.0021, "step": 410300 }, { "epoch": 4.383888028206635, "grad_norm": 1.119446873664856, "learning_rate": 7.44063766093322e-07, "loss": 0.0128, "step": 410310 }, { "epoch": 4.3839948715209145, "grad_norm": 5.018190383911133, "learning_rate": 7.440491026308165e-07, "loss": 0.0057, "step": 410320 }, { "epoch": 4.384101714835194, "grad_norm": 0.0023201184812933207, "learning_rate": 7.440344388927611e-07, "loss": 0.0096, "step": 410330 }, { "epoch": 4.384208558149473, "grad_norm": 0.0527840219438076, "learning_rate": 7.440197748791724e-07, "loss": 0.0044, "step": 410340 }, { "epoch": 4.384315401463754, "grad_norm": 14.561637878417969, "learning_rate": 7.440051105900669e-07, "loss": 0.0147, "step": 410350 }, { "epoch": 4.384422244778033, "grad_norm": 0.15422505140304565, "learning_rate": 7.439904460254613e-07, "loss": 0.0032, "step": 410360 }, { "epoch": 4.384529088092313, "grad_norm": 0.0017220464069396257, "learning_rate": 7.439757811853721e-07, "loss": 0.0049, "step": 410370 }, { "epoch": 4.384635931406592, "grad_norm": 0.00285289715975523, "learning_rate": 7.439611160698157e-07, "loss": 0.0003, "step": 410380 }, { "epoch": 4.384742774720872, "grad_norm": 0.49755245447158813, "learning_rate": 7.43946450678809e-07, "loss": 0.0048, "step": 410390 }, { "epoch": 4.384849618035151, "grad_norm": 0.00043450170778669417, "learning_rate": 7.439317850123681e-07, "loss": 0.0128, "step": 410400 }, { "epoch": 4.384956461349431, "grad_norm": 0.1470605880022049, "learning_rate": 7.439171190705099e-07, "loss": 0.0037, "step": 410410 }, { "epoch": 4.385063304663711, "grad_norm": 0.042265020310878754, "learning_rate": 7.439024528532509e-07, "loss": 0.0086, "step": 410420 }, { "epoch": 4.38517014797799, "grad_norm": 0.0006293925107456744, "learning_rate": 7.438877863606076e-07, "loss": 0.0117, "step": 410430 }, { "epoch": 4.38527699129227, "grad_norm": 0.3538413345813751, "learning_rate": 7.438731195925966e-07, "loss": 0.0014, "step": 410440 }, { "epoch": 4.385383834606549, "grad_norm": 0.025436554104089737, "learning_rate": 7.438584525492344e-07, "loss": 0.0018, "step": 410450 }, { "epoch": 4.385490677920829, "grad_norm": 0.019909555092453957, "learning_rate": 7.438437852305377e-07, "loss": 0.0001, "step": 410460 }, { "epoch": 4.385597521235109, "grad_norm": 0.0019461431074887514, "learning_rate": 7.438291176365227e-07, "loss": 0.0062, "step": 410470 }, { "epoch": 4.3857043645493885, "grad_norm": 0.1738073080778122, "learning_rate": 7.438144497672065e-07, "loss": 0.0014, "step": 410480 }, { "epoch": 4.385811207863668, "grad_norm": 0.0029252441599965096, "learning_rate": 7.437997816226054e-07, "loss": 0.0203, "step": 410490 }, { "epoch": 4.385918051177947, "grad_norm": 0.06000825762748718, "learning_rate": 7.437851132027357e-07, "loss": 0.0073, "step": 410500 }, { "epoch": 4.386024894492227, "grad_norm": 0.3268675208091736, "learning_rate": 7.437704445076145e-07, "loss": 0.0004, "step": 410510 }, { "epoch": 4.386131737806506, "grad_norm": 3.947622537612915, "learning_rate": 7.437557755372578e-07, "loss": 0.0182, "step": 410520 }, { "epoch": 4.386238581120787, "grad_norm": 0.4501623809337616, "learning_rate": 7.437411062916827e-07, "loss": 0.0211, "step": 410530 }, { "epoch": 4.386345424435066, "grad_norm": 0.11003787815570831, "learning_rate": 7.437264367709053e-07, "loss": 0.0643, "step": 410540 }, { "epoch": 4.386452267749346, "grad_norm": 6.174858570098877, "learning_rate": 7.437117669749423e-07, "loss": 0.0119, "step": 410550 }, { "epoch": 4.386559111063625, "grad_norm": 0.25805240869522095, "learning_rate": 7.436970969038105e-07, "loss": 0.0156, "step": 410560 }, { "epoch": 4.3866659543779045, "grad_norm": 0.015882378444075584, "learning_rate": 7.436824265575264e-07, "loss": 0.0053, "step": 410570 }, { "epoch": 4.386772797692185, "grad_norm": 0.014876922592520714, "learning_rate": 7.436677559361061e-07, "loss": 0.0093, "step": 410580 }, { "epoch": 4.386879641006464, "grad_norm": 0.03467792645096779, "learning_rate": 7.436530850395667e-07, "loss": 0.0018, "step": 410590 }, { "epoch": 4.386986484320744, "grad_norm": 0.29654937982559204, "learning_rate": 7.436384138679246e-07, "loss": 0.0086, "step": 410600 }, { "epoch": 4.387093327635023, "grad_norm": 0.016032839193940163, "learning_rate": 7.436237424211961e-07, "loss": 0.0128, "step": 410610 }, { "epoch": 4.387200170949303, "grad_norm": 0.056740351021289825, "learning_rate": 7.436090706993984e-07, "loss": 0.0016, "step": 410620 }, { "epoch": 4.387307014263582, "grad_norm": 2.0673043727874756, "learning_rate": 7.435943987025475e-07, "loss": 0.0022, "step": 410630 }, { "epoch": 4.387413857577862, "grad_norm": 0.0011339704506099224, "learning_rate": 7.4357972643066e-07, "loss": 0.0092, "step": 410640 }, { "epoch": 4.387520700892142, "grad_norm": 1.0005831718444824, "learning_rate": 7.435650538837528e-07, "loss": 0.0046, "step": 410650 }, { "epoch": 4.387627544206421, "grad_norm": 0.00608584750443697, "learning_rate": 7.435503810618422e-07, "loss": 0.0042, "step": 410660 }, { "epoch": 4.387734387520701, "grad_norm": 3.505061149597168, "learning_rate": 7.435357079649448e-07, "loss": 0.0598, "step": 410670 }, { "epoch": 4.38784123083498, "grad_norm": 0.01034887321293354, "learning_rate": 7.435210345930772e-07, "loss": 0.0025, "step": 410680 }, { "epoch": 4.38794807414926, "grad_norm": 0.013840260915458202, "learning_rate": 7.435063609462562e-07, "loss": 0.0099, "step": 410690 }, { "epoch": 4.38805491746354, "grad_norm": 0.06803791224956512, "learning_rate": 7.434916870244979e-07, "loss": 0.0067, "step": 410700 }, { "epoch": 4.38816176077782, "grad_norm": 0.03942970186471939, "learning_rate": 7.434770128278192e-07, "loss": 0.004, "step": 410710 }, { "epoch": 4.388268604092099, "grad_norm": 0.4039953947067261, "learning_rate": 7.434623383562364e-07, "loss": 0.0052, "step": 410720 }, { "epoch": 4.3883754474063785, "grad_norm": 0.05691424384713173, "learning_rate": 7.434476636097664e-07, "loss": 0.0246, "step": 410730 }, { "epoch": 4.388482290720658, "grad_norm": 1.8587974309921265, "learning_rate": 7.434329885884256e-07, "loss": 0.0168, "step": 410740 }, { "epoch": 4.3885891340349374, "grad_norm": 0.004187670536339283, "learning_rate": 7.434183132922306e-07, "loss": 0.0077, "step": 410750 }, { "epoch": 4.388695977349217, "grad_norm": 0.012113054282963276, "learning_rate": 7.43403637721198e-07, "loss": 0.0159, "step": 410760 }, { "epoch": 4.388802820663497, "grad_norm": 3.181191921234131, "learning_rate": 7.433889618753442e-07, "loss": 0.0065, "step": 410770 }, { "epoch": 4.388909663977777, "grad_norm": 14.432843208312988, "learning_rate": 7.433742857546859e-07, "loss": 0.0491, "step": 410780 }, { "epoch": 4.389016507292056, "grad_norm": 1.0555295944213867, "learning_rate": 7.433596093592397e-07, "loss": 0.0011, "step": 410790 }, { "epoch": 4.389123350606336, "grad_norm": 0.35852789878845215, "learning_rate": 7.433449326890221e-07, "loss": 0.0183, "step": 410800 }, { "epoch": 4.389230193920615, "grad_norm": 0.007552565541118383, "learning_rate": 7.433302557440497e-07, "loss": 0.0027, "step": 410810 }, { "epoch": 4.389337037234895, "grad_norm": 0.08886455744504929, "learning_rate": 7.433155785243392e-07, "loss": 0.0009, "step": 410820 }, { "epoch": 4.389443880549175, "grad_norm": 0.05889927223324776, "learning_rate": 7.43300901029907e-07, "loss": 0.0219, "step": 410830 }, { "epoch": 4.389550723863454, "grad_norm": 0.03500307723879814, "learning_rate": 7.432862232607696e-07, "loss": 0.0013, "step": 410840 }, { "epoch": 4.389657567177734, "grad_norm": 0.004962792154401541, "learning_rate": 7.432715452169438e-07, "loss": 0.0039, "step": 410850 }, { "epoch": 4.389764410492013, "grad_norm": 1.0766233205795288, "learning_rate": 7.432568668984461e-07, "loss": 0.0031, "step": 410860 }, { "epoch": 4.389871253806293, "grad_norm": 0.004397772718220949, "learning_rate": 7.43242188305293e-07, "loss": 0.0088, "step": 410870 }, { "epoch": 4.389978097120573, "grad_norm": 0.03249083831906319, "learning_rate": 7.432275094375011e-07, "loss": 0.0291, "step": 410880 }, { "epoch": 4.3900849404348525, "grad_norm": 0.09556116908788681, "learning_rate": 7.432128302950871e-07, "loss": 0.0015, "step": 410890 }, { "epoch": 4.390191783749132, "grad_norm": 0.0201975516974926, "learning_rate": 7.431981508780672e-07, "loss": 0.0052, "step": 410900 }, { "epoch": 4.3902986270634115, "grad_norm": 0.004744386300444603, "learning_rate": 7.431834711864584e-07, "loss": 0.0016, "step": 410910 }, { "epoch": 4.390405470377691, "grad_norm": 0.008706568740308285, "learning_rate": 7.431687912202771e-07, "loss": 0.0059, "step": 410920 }, { "epoch": 4.39051231369197, "grad_norm": 0.018645338714122772, "learning_rate": 7.431541109795398e-07, "loss": 0.0016, "step": 410930 }, { "epoch": 4.390619157006251, "grad_norm": 0.3699604570865631, "learning_rate": 7.431394304642633e-07, "loss": 0.006, "step": 410940 }, { "epoch": 4.39072600032053, "grad_norm": 0.011554762721061707, "learning_rate": 7.431247496744639e-07, "loss": 0.0146, "step": 410950 }, { "epoch": 4.39083284363481, "grad_norm": 0.0035375047009438276, "learning_rate": 7.431100686101583e-07, "loss": 0.0618, "step": 410960 }, { "epoch": 4.390939686949089, "grad_norm": 2.415827512741089, "learning_rate": 7.430953872713632e-07, "loss": 0.0045, "step": 410970 }, { "epoch": 4.3910465302633686, "grad_norm": 3.391273021697998, "learning_rate": 7.43080705658095e-07, "loss": 0.0034, "step": 410980 }, { "epoch": 4.391153373577648, "grad_norm": 0.009342264384031296, "learning_rate": 7.430660237703705e-07, "loss": 0.0017, "step": 410990 }, { "epoch": 4.391260216891928, "grad_norm": 0.002077333861961961, "learning_rate": 7.430513416082058e-07, "loss": 0.0001, "step": 411000 }, { "epoch": 4.391367060206208, "grad_norm": 0.29791325330734253, "learning_rate": 7.43036659171618e-07, "loss": 0.0131, "step": 411010 }, { "epoch": 4.391473903520487, "grad_norm": 0.0021718493662774563, "learning_rate": 7.430219764606234e-07, "loss": 0.0271, "step": 411020 }, { "epoch": 4.391580746834767, "grad_norm": 0.00645054504275322, "learning_rate": 7.430072934752387e-07, "loss": 0.0155, "step": 411030 }, { "epoch": 4.391687590149046, "grad_norm": 0.003953276667743921, "learning_rate": 7.429926102154803e-07, "loss": 0.0068, "step": 411040 }, { "epoch": 4.391794433463326, "grad_norm": 3.830456018447876, "learning_rate": 7.42977926681365e-07, "loss": 0.006, "step": 411050 }, { "epoch": 4.391901276777606, "grad_norm": 3.8767924308776855, "learning_rate": 7.429632428729094e-07, "loss": 0.0127, "step": 411060 }, { "epoch": 4.3920081200918855, "grad_norm": 0.00482358830049634, "learning_rate": 7.429485587901298e-07, "loss": 0.0264, "step": 411070 }, { "epoch": 4.392114963406165, "grad_norm": 0.6217401623725891, "learning_rate": 7.429338744330429e-07, "loss": 0.0056, "step": 411080 }, { "epoch": 4.392221806720444, "grad_norm": 0.07168295979499817, "learning_rate": 7.429191898016655e-07, "loss": 0.0012, "step": 411090 }, { "epoch": 4.392328650034724, "grad_norm": 0.029920656234025955, "learning_rate": 7.429045048960137e-07, "loss": 0.0007, "step": 411100 }, { "epoch": 4.392435493349003, "grad_norm": 0.048443086445331573, "learning_rate": 7.428898197161047e-07, "loss": 0.0726, "step": 411110 }, { "epoch": 4.392542336663284, "grad_norm": 16.917463302612305, "learning_rate": 7.428751342619546e-07, "loss": 0.0092, "step": 411120 }, { "epoch": 4.392649179977563, "grad_norm": 0.006873459555208683, "learning_rate": 7.428604485335803e-07, "loss": 0.0093, "step": 411130 }, { "epoch": 4.392756023291843, "grad_norm": 0.43722841143608093, "learning_rate": 7.428457625309981e-07, "loss": 0.0103, "step": 411140 }, { "epoch": 4.392862866606122, "grad_norm": 0.5709549784660339, "learning_rate": 7.428310762542246e-07, "loss": 0.0159, "step": 411150 }, { "epoch": 4.3929697099204015, "grad_norm": 0.0058140261098742485, "learning_rate": 7.428163897032767e-07, "loss": 0.0259, "step": 411160 }, { "epoch": 4.393076553234681, "grad_norm": 0.006962554529309273, "learning_rate": 7.428017028781706e-07, "loss": 0.0005, "step": 411170 }, { "epoch": 4.393183396548961, "grad_norm": 0.004785832483321428, "learning_rate": 7.427870157789232e-07, "loss": 0.0017, "step": 411180 }, { "epoch": 4.393290239863241, "grad_norm": 6.930643081665039, "learning_rate": 7.427723284055508e-07, "loss": 0.006, "step": 411190 }, { "epoch": 4.39339708317752, "grad_norm": 0.014908827841281891, "learning_rate": 7.427576407580702e-07, "loss": 0.0028, "step": 411200 }, { "epoch": 4.3935039264918, "grad_norm": 0.23496516048908234, "learning_rate": 7.427429528364978e-07, "loss": 0.0209, "step": 411210 }, { "epoch": 4.393610769806079, "grad_norm": 5.565099716186523, "learning_rate": 7.427282646408503e-07, "loss": 0.0173, "step": 411220 }, { "epoch": 4.393717613120359, "grad_norm": 0.0018771950853988528, "learning_rate": 7.427135761711445e-07, "loss": 0.0194, "step": 411230 }, { "epoch": 4.393824456434639, "grad_norm": 0.040833957493305206, "learning_rate": 7.426988874273965e-07, "loss": 0.0025, "step": 411240 }, { "epoch": 4.393931299748918, "grad_norm": 0.3361154794692993, "learning_rate": 7.426841984096233e-07, "loss": 0.0089, "step": 411250 }, { "epoch": 4.394038143063198, "grad_norm": 0.01646711677312851, "learning_rate": 7.426695091178413e-07, "loss": 0.003, "step": 411260 }, { "epoch": 4.394144986377477, "grad_norm": 0.004354964941740036, "learning_rate": 7.42654819552067e-07, "loss": 0.0004, "step": 411270 }, { "epoch": 4.394251829691757, "grad_norm": 0.0927705243229866, "learning_rate": 7.426401297123172e-07, "loss": 0.0104, "step": 411280 }, { "epoch": 4.394358673006037, "grad_norm": 0.005449457094073296, "learning_rate": 7.426254395986084e-07, "loss": 0.0183, "step": 411290 }, { "epoch": 4.394465516320317, "grad_norm": 0.069370798766613, "learning_rate": 7.42610749210957e-07, "loss": 0.0015, "step": 411300 }, { "epoch": 4.394572359634596, "grad_norm": 0.001731688855215907, "learning_rate": 7.425960585493799e-07, "loss": 0.0033, "step": 411310 }, { "epoch": 4.3946792029488755, "grad_norm": 0.015022850595414639, "learning_rate": 7.425813676138934e-07, "loss": 0.0016, "step": 411320 }, { "epoch": 4.394786046263155, "grad_norm": 10.498924255371094, "learning_rate": 7.425666764045144e-07, "loss": 0.0079, "step": 411330 }, { "epoch": 4.394892889577434, "grad_norm": 0.01175568625330925, "learning_rate": 7.425519849212593e-07, "loss": 0.0167, "step": 411340 }, { "epoch": 4.394999732891714, "grad_norm": 0.02075190655887127, "learning_rate": 7.425372931641445e-07, "loss": 0.0043, "step": 411350 }, { "epoch": 4.395106576205994, "grad_norm": 0.21067970991134644, "learning_rate": 7.42522601133187e-07, "loss": 0.0052, "step": 411360 }, { "epoch": 4.395213419520274, "grad_norm": 0.004200605675578117, "learning_rate": 7.425079088284032e-07, "loss": 0.0089, "step": 411370 }, { "epoch": 4.395320262834553, "grad_norm": 0.06999922543764114, "learning_rate": 7.424932162498094e-07, "loss": 0.0082, "step": 411380 }, { "epoch": 4.395427106148833, "grad_norm": 0.446269154548645, "learning_rate": 7.424785233974228e-07, "loss": 0.0035, "step": 411390 }, { "epoch": 4.395533949463112, "grad_norm": 5.408097267150879, "learning_rate": 7.424638302712594e-07, "loss": 0.0228, "step": 411400 }, { "epoch": 4.395640792777392, "grad_norm": 3.5276660919189453, "learning_rate": 7.424491368713361e-07, "loss": 0.0026, "step": 411410 }, { "epoch": 4.395747636091672, "grad_norm": 3.8056750297546387, "learning_rate": 7.424344431976694e-07, "loss": 0.0048, "step": 411420 }, { "epoch": 4.395854479405951, "grad_norm": 0.0074431411921978, "learning_rate": 7.42419749250276e-07, "loss": 0.0051, "step": 411430 }, { "epoch": 4.395961322720231, "grad_norm": 1.8041908740997314, "learning_rate": 7.424050550291722e-07, "loss": 0.0111, "step": 411440 }, { "epoch": 4.39606816603451, "grad_norm": 0.0014439483638852835, "learning_rate": 7.423903605343751e-07, "loss": 0.006, "step": 411450 }, { "epoch": 4.39617500934879, "grad_norm": 0.3783116638660431, "learning_rate": 7.42375665765901e-07, "loss": 0.0196, "step": 411460 }, { "epoch": 4.396281852663069, "grad_norm": 3.8124377727508545, "learning_rate": 7.423609707237662e-07, "loss": 0.0172, "step": 411470 }, { "epoch": 4.3963886959773495, "grad_norm": 0.005148681811988354, "learning_rate": 7.423462754079877e-07, "loss": 0.0076, "step": 411480 }, { "epoch": 4.396495539291629, "grad_norm": 0.0890689417719841, "learning_rate": 7.423315798185819e-07, "loss": 0.0078, "step": 411490 }, { "epoch": 4.396602382605908, "grad_norm": 0.044815439730882645, "learning_rate": 7.423168839555656e-07, "loss": 0.0004, "step": 411500 }, { "epoch": 4.396709225920188, "grad_norm": 0.00031243773992173374, "learning_rate": 7.423021878189552e-07, "loss": 0.0061, "step": 411510 }, { "epoch": 4.396816069234467, "grad_norm": 0.11182908713817596, "learning_rate": 7.422874914087673e-07, "loss": 0.0116, "step": 411520 }, { "epoch": 4.396922912548748, "grad_norm": 0.025212589651346207, "learning_rate": 7.422727947250186e-07, "loss": 0.0068, "step": 411530 }, { "epoch": 4.397029755863027, "grad_norm": 2.768474817276001, "learning_rate": 7.422580977677257e-07, "loss": 0.0037, "step": 411540 }, { "epoch": 4.397136599177307, "grad_norm": 1.1769905090332031, "learning_rate": 7.422434005369049e-07, "loss": 0.0036, "step": 411550 }, { "epoch": 4.397243442491586, "grad_norm": 0.048902567476034164, "learning_rate": 7.422287030325731e-07, "loss": 0.0002, "step": 411560 }, { "epoch": 4.3973502858058655, "grad_norm": 0.062160149216651917, "learning_rate": 7.42214005254747e-07, "loss": 0.0049, "step": 411570 }, { "epoch": 4.397457129120145, "grad_norm": 0.0018962026806548238, "learning_rate": 7.421993072034427e-07, "loss": 0.0029, "step": 411580 }, { "epoch": 4.397563972434425, "grad_norm": 0.6196790933609009, "learning_rate": 7.421846088786773e-07, "loss": 0.0096, "step": 411590 }, { "epoch": 4.397670815748705, "grad_norm": 1.2552008628845215, "learning_rate": 7.421699102804672e-07, "loss": 0.0051, "step": 411600 }, { "epoch": 4.397777659062984, "grad_norm": 0.003098508343100548, "learning_rate": 7.421552114088288e-07, "loss": 0.0058, "step": 411610 }, { "epoch": 4.397884502377264, "grad_norm": 3.7949790954589844, "learning_rate": 7.421405122637791e-07, "loss": 0.009, "step": 411620 }, { "epoch": 4.397991345691543, "grad_norm": 0.006604035850614309, "learning_rate": 7.421258128453345e-07, "loss": 0.0119, "step": 411630 }, { "epoch": 4.398098189005823, "grad_norm": 17.55277442932129, "learning_rate": 7.421111131535114e-07, "loss": 0.016, "step": 411640 }, { "epoch": 4.398205032320103, "grad_norm": 0.008650592528283596, "learning_rate": 7.420964131883267e-07, "loss": 0.0065, "step": 411650 }, { "epoch": 4.398311875634382, "grad_norm": 0.003914274275302887, "learning_rate": 7.420817129497969e-07, "loss": 0.0292, "step": 411660 }, { "epoch": 4.398418718948662, "grad_norm": 10.281251907348633, "learning_rate": 7.420670124379385e-07, "loss": 0.0225, "step": 411670 }, { "epoch": 4.398525562262941, "grad_norm": 0.005970865022391081, "learning_rate": 7.420523116527682e-07, "loss": 0.0014, "step": 411680 }, { "epoch": 4.398632405577221, "grad_norm": 0.07412167638540268, "learning_rate": 7.420376105943026e-07, "loss": 0.0044, "step": 411690 }, { "epoch": 4.3987392488915, "grad_norm": 0.08618408441543579, "learning_rate": 7.420229092625582e-07, "loss": 0.0004, "step": 411700 }, { "epoch": 4.398846092205781, "grad_norm": 4.39231014251709, "learning_rate": 7.420082076575517e-07, "loss": 0.0185, "step": 411710 }, { "epoch": 4.39895293552006, "grad_norm": 0.011727864854037762, "learning_rate": 7.419935057792997e-07, "loss": 0.0097, "step": 411720 }, { "epoch": 4.3990597788343395, "grad_norm": 9.964579582214355, "learning_rate": 7.419788036278187e-07, "loss": 0.0141, "step": 411730 }, { "epoch": 4.399166622148619, "grad_norm": 0.3070148825645447, "learning_rate": 7.419641012031255e-07, "loss": 0.0097, "step": 411740 }, { "epoch": 4.399273465462898, "grad_norm": 2.6260950565338135, "learning_rate": 7.419493985052365e-07, "loss": 0.0252, "step": 411750 }, { "epoch": 4.399380308777178, "grad_norm": 4.646496772766113, "learning_rate": 7.419346955341683e-07, "loss": 0.0186, "step": 411760 }, { "epoch": 4.399487152091458, "grad_norm": 3.763605833053589, "learning_rate": 7.419199922899377e-07, "loss": 0.0062, "step": 411770 }, { "epoch": 4.399593995405738, "grad_norm": 0.006762686651200056, "learning_rate": 7.419052887725611e-07, "loss": 0.0315, "step": 411780 }, { "epoch": 4.399700838720017, "grad_norm": 0.39807364344596863, "learning_rate": 7.418905849820552e-07, "loss": 0.0068, "step": 411790 }, { "epoch": 4.399807682034297, "grad_norm": 0.5257735252380371, "learning_rate": 7.418758809184366e-07, "loss": 0.0032, "step": 411800 }, { "epoch": 4.399914525348576, "grad_norm": 0.0060012186877429485, "learning_rate": 7.418611765817217e-07, "loss": 0.0232, "step": 411810 }, { "epoch": 4.4000213686628555, "grad_norm": 0.06038576364517212, "learning_rate": 7.418464719719275e-07, "loss": 0.0035, "step": 411820 }, { "epoch": 4.400128211977136, "grad_norm": 0.0005868520238436759, "learning_rate": 7.418317670890702e-07, "loss": 0.0038, "step": 411830 }, { "epoch": 4.400235055291415, "grad_norm": 0.17905454337596893, "learning_rate": 7.418170619331668e-07, "loss": 0.0066, "step": 411840 }, { "epoch": 4.400341898605695, "grad_norm": 0.10334394127130508, "learning_rate": 7.418023565042335e-07, "loss": 0.0079, "step": 411850 }, { "epoch": 4.400448741919974, "grad_norm": 0.013212082907557487, "learning_rate": 7.417876508022871e-07, "loss": 0.0168, "step": 411860 }, { "epoch": 4.400555585234254, "grad_norm": 5.44117546081543, "learning_rate": 7.417729448273442e-07, "loss": 0.0186, "step": 411870 }, { "epoch": 4.400662428548533, "grad_norm": 1.1413053274154663, "learning_rate": 7.417582385794214e-07, "loss": 0.0119, "step": 411880 }, { "epoch": 4.4007692718628135, "grad_norm": 0.07551924139261246, "learning_rate": 7.417435320585353e-07, "loss": 0.0078, "step": 411890 }, { "epoch": 4.400876115177093, "grad_norm": 1.161210536956787, "learning_rate": 7.417288252647025e-07, "loss": 0.0106, "step": 411900 }, { "epoch": 4.400982958491372, "grad_norm": 0.4734121263027191, "learning_rate": 7.417141181979398e-07, "loss": 0.0063, "step": 411910 }, { "epoch": 4.401089801805652, "grad_norm": 5.012463092803955, "learning_rate": 7.416994108582634e-07, "loss": 0.0481, "step": 411920 }, { "epoch": 4.401196645119931, "grad_norm": 0.013378092087805271, "learning_rate": 7.416847032456902e-07, "loss": 0.0208, "step": 411930 }, { "epoch": 4.401303488434211, "grad_norm": 1.8984079360961914, "learning_rate": 7.416699953602368e-07, "loss": 0.0156, "step": 411940 }, { "epoch": 4.401410331748491, "grad_norm": 0.09119492024183273, "learning_rate": 7.416552872019196e-07, "loss": 0.0064, "step": 411950 }, { "epoch": 4.401517175062771, "grad_norm": 0.008454637601971626, "learning_rate": 7.416405787707555e-07, "loss": 0.0113, "step": 411960 }, { "epoch": 4.40162401837705, "grad_norm": 0.014167919754981995, "learning_rate": 7.416258700667608e-07, "loss": 0.0072, "step": 411970 }, { "epoch": 4.4017308616913295, "grad_norm": 0.012607752345502377, "learning_rate": 7.416111610899522e-07, "loss": 0.0029, "step": 411980 }, { "epoch": 4.401837705005609, "grad_norm": 0.004438997246325016, "learning_rate": 7.415964518403465e-07, "loss": 0.007, "step": 411990 }, { "epoch": 4.401944548319889, "grad_norm": 0.005135887302458286, "learning_rate": 7.415817423179601e-07, "loss": 0.0016, "step": 412000 }, { "epoch": 4.402051391634169, "grad_norm": 0.040812961757183075, "learning_rate": 7.415670325228099e-07, "loss": 0.0116, "step": 412010 }, { "epoch": 4.402158234948448, "grad_norm": 1.0904608964920044, "learning_rate": 7.415523224549119e-07, "loss": 0.0273, "step": 412020 }, { "epoch": 4.402265078262728, "grad_norm": 0.0036728130653500557, "learning_rate": 7.415376121142834e-07, "loss": 0.005, "step": 412030 }, { "epoch": 4.402371921577007, "grad_norm": 0.02486761100590229, "learning_rate": 7.415229015009406e-07, "loss": 0.0059, "step": 412040 }, { "epoch": 4.402478764891287, "grad_norm": 0.38167059421539307, "learning_rate": 7.415081906149003e-07, "loss": 0.0015, "step": 412050 }, { "epoch": 4.402585608205566, "grad_norm": 5.905473709106445, "learning_rate": 7.414934794561789e-07, "loss": 0.0113, "step": 412060 }, { "epoch": 4.4026924515198465, "grad_norm": 1.6687839031219482, "learning_rate": 7.414787680247932e-07, "loss": 0.0016, "step": 412070 }, { "epoch": 4.402799294834126, "grad_norm": 0.03791763260960579, "learning_rate": 7.414640563207597e-07, "loss": 0.0208, "step": 412080 }, { "epoch": 4.402906138148405, "grad_norm": 0.011781741864979267, "learning_rate": 7.41449344344095e-07, "loss": 0.011, "step": 412090 }, { "epoch": 4.403012981462685, "grad_norm": 0.05309176445007324, "learning_rate": 7.41434632094816e-07, "loss": 0.0068, "step": 412100 }, { "epoch": 4.403119824776964, "grad_norm": 0.0037451477255672216, "learning_rate": 7.41419919572939e-07, "loss": 0.0117, "step": 412110 }, { "epoch": 4.403226668091245, "grad_norm": 0.009221505373716354, "learning_rate": 7.414052067784804e-07, "loss": 0.0014, "step": 412120 }, { "epoch": 4.403333511405524, "grad_norm": 2.552987575531006, "learning_rate": 7.413904937114574e-07, "loss": 0.0279, "step": 412130 }, { "epoch": 4.4034403547198036, "grad_norm": 10.905174255371094, "learning_rate": 7.413757803718864e-07, "loss": 0.0363, "step": 412140 }, { "epoch": 4.403547198034083, "grad_norm": 0.054752789437770844, "learning_rate": 7.413610667597836e-07, "loss": 0.024, "step": 412150 }, { "epoch": 4.4036540413483625, "grad_norm": 0.04601169750094414, "learning_rate": 7.413463528751661e-07, "loss": 0.0072, "step": 412160 }, { "epoch": 4.403760884662642, "grad_norm": 0.06032769754528999, "learning_rate": 7.413316387180505e-07, "loss": 0.0004, "step": 412170 }, { "epoch": 4.403867727976921, "grad_norm": 0.046914149075746536, "learning_rate": 7.41316924288453e-07, "loss": 0.0027, "step": 412180 }, { "epoch": 4.403974571291202, "grad_norm": 0.007309065666049719, "learning_rate": 7.413022095863907e-07, "loss": 0.0105, "step": 412190 }, { "epoch": 4.404081414605481, "grad_norm": 0.7290756106376648, "learning_rate": 7.412874946118799e-07, "loss": 0.0034, "step": 412200 }, { "epoch": 4.404188257919761, "grad_norm": 0.024015536531805992, "learning_rate": 7.412727793649373e-07, "loss": 0.0019, "step": 412210 }, { "epoch": 4.40429510123404, "grad_norm": 0.0026166257448494434, "learning_rate": 7.412580638455796e-07, "loss": 0.0025, "step": 412220 }, { "epoch": 4.40440194454832, "grad_norm": 1.1095983982086182, "learning_rate": 7.412433480538231e-07, "loss": 0.0109, "step": 412230 }, { "epoch": 4.4045087878626, "grad_norm": 0.024973101913928986, "learning_rate": 7.412286319896849e-07, "loss": 0.0103, "step": 412240 }, { "epoch": 4.404615631176879, "grad_norm": 0.03977776691317558, "learning_rate": 7.412139156531813e-07, "loss": 0.0087, "step": 412250 }, { "epoch": 4.404722474491159, "grad_norm": 4.119129180908203, "learning_rate": 7.411991990443289e-07, "loss": 0.0042, "step": 412260 }, { "epoch": 4.404829317805438, "grad_norm": 0.0015083765611052513, "learning_rate": 7.411844821631445e-07, "loss": 0.0159, "step": 412270 }, { "epoch": 4.404936161119718, "grad_norm": 2.7012972831726074, "learning_rate": 7.411697650096446e-07, "loss": 0.0028, "step": 412280 }, { "epoch": 4.405043004433997, "grad_norm": 3.125715494155884, "learning_rate": 7.411550475838457e-07, "loss": 0.0031, "step": 412290 }, { "epoch": 4.405149847748278, "grad_norm": 0.024450020864605904, "learning_rate": 7.411403298857648e-07, "loss": 0.0014, "step": 412300 }, { "epoch": 4.405256691062557, "grad_norm": 0.5291077494621277, "learning_rate": 7.411256119154181e-07, "loss": 0.0024, "step": 412310 }, { "epoch": 4.4053635343768365, "grad_norm": 2.461463689804077, "learning_rate": 7.411108936728224e-07, "loss": 0.0784, "step": 412320 }, { "epoch": 4.405470377691116, "grad_norm": 0.0026443980168551207, "learning_rate": 7.410961751579944e-07, "loss": 0.0012, "step": 412330 }, { "epoch": 4.405577221005395, "grad_norm": 0.0039893342182040215, "learning_rate": 7.410814563709504e-07, "loss": 0.0062, "step": 412340 }, { "epoch": 4.405684064319675, "grad_norm": 0.0019332434749230742, "learning_rate": 7.410667373117075e-07, "loss": 0.0052, "step": 412350 }, { "epoch": 4.405790907633955, "grad_norm": 0.001797268632799387, "learning_rate": 7.410520179802819e-07, "loss": 0.0108, "step": 412360 }, { "epoch": 4.405897750948235, "grad_norm": 0.07844894379377365, "learning_rate": 7.410372983766904e-07, "loss": 0.0056, "step": 412370 }, { "epoch": 4.406004594262514, "grad_norm": 4.561184883117676, "learning_rate": 7.410225785009496e-07, "loss": 0.0037, "step": 412380 }, { "epoch": 4.406111437576794, "grad_norm": 5.0632734298706055, "learning_rate": 7.410078583530763e-07, "loss": 0.0044, "step": 412390 }, { "epoch": 4.406218280891073, "grad_norm": 0.052943311631679535, "learning_rate": 7.409931379330867e-07, "loss": 0.0076, "step": 412400 }, { "epoch": 4.4063251242053525, "grad_norm": 6.530889987945557, "learning_rate": 7.409784172409977e-07, "loss": 0.0149, "step": 412410 }, { "epoch": 4.406431967519633, "grad_norm": 7.198366165161133, "learning_rate": 7.409636962768259e-07, "loss": 0.004, "step": 412420 }, { "epoch": 4.406538810833912, "grad_norm": 2.3166589736938477, "learning_rate": 7.409489750405881e-07, "loss": 0.0071, "step": 412430 }, { "epoch": 4.406645654148192, "grad_norm": 0.1367148458957672, "learning_rate": 7.409342535323004e-07, "loss": 0.0155, "step": 412440 }, { "epoch": 4.406752497462471, "grad_norm": 2.437203884124756, "learning_rate": 7.4091953175198e-07, "loss": 0.0005, "step": 412450 }, { "epoch": 4.406859340776751, "grad_norm": 0.4701600968837738, "learning_rate": 7.409048096996431e-07, "loss": 0.0077, "step": 412460 }, { "epoch": 4.40696618409103, "grad_norm": 0.07747136056423187, "learning_rate": 7.408900873753065e-07, "loss": 0.0059, "step": 412470 }, { "epoch": 4.4070730274053105, "grad_norm": 0.010684088803827763, "learning_rate": 7.408753647789869e-07, "loss": 0.0236, "step": 412480 }, { "epoch": 4.40717987071959, "grad_norm": 0.020078131929039955, "learning_rate": 7.408606419107007e-07, "loss": 0.0009, "step": 412490 }, { "epoch": 4.407286714033869, "grad_norm": 0.29813024401664734, "learning_rate": 7.408459187704648e-07, "loss": 0.0112, "step": 412500 }, { "epoch": 4.407393557348149, "grad_norm": 0.06973690539598465, "learning_rate": 7.408311953582956e-07, "loss": 0.0115, "step": 412510 }, { "epoch": 4.407500400662428, "grad_norm": 0.3126808702945709, "learning_rate": 7.408164716742098e-07, "loss": 0.0016, "step": 412520 }, { "epoch": 4.407607243976708, "grad_norm": 0.2878398597240448, "learning_rate": 7.408017477182242e-07, "loss": 0.0047, "step": 412530 }, { "epoch": 4.407714087290988, "grad_norm": 0.004826139658689499, "learning_rate": 7.40787023490355e-07, "loss": 0.0221, "step": 412540 }, { "epoch": 4.407820930605268, "grad_norm": 0.0013343087630346417, "learning_rate": 7.407722989906192e-07, "loss": 0.0004, "step": 412550 }, { "epoch": 4.407927773919547, "grad_norm": 0.006486617960035801, "learning_rate": 7.407575742190333e-07, "loss": 0.0005, "step": 412560 }, { "epoch": 4.4080346172338265, "grad_norm": 0.2581174075603485, "learning_rate": 7.40742849175614e-07, "loss": 0.003, "step": 412570 }, { "epoch": 4.408141460548106, "grad_norm": 0.9457286596298218, "learning_rate": 7.407281238603776e-07, "loss": 0.0027, "step": 412580 }, { "epoch": 4.408248303862385, "grad_norm": 0.17697778344154358, "learning_rate": 7.407133982733412e-07, "loss": 0.0043, "step": 412590 }, { "epoch": 4.408355147176666, "grad_norm": 0.21442165970802307, "learning_rate": 7.406986724145212e-07, "loss": 0.0121, "step": 412600 }, { "epoch": 4.408461990490945, "grad_norm": 1.0376116037368774, "learning_rate": 7.406839462839342e-07, "loss": 0.001, "step": 412610 }, { "epoch": 4.408568833805225, "grad_norm": 0.0031627328135073185, "learning_rate": 7.406692198815969e-07, "loss": 0.0027, "step": 412620 }, { "epoch": 4.408675677119504, "grad_norm": 0.01733492501080036, "learning_rate": 7.406544932075257e-07, "loss": 0.0115, "step": 412630 }, { "epoch": 4.408782520433784, "grad_norm": 0.0021282329689711332, "learning_rate": 7.406397662617376e-07, "loss": 0.0127, "step": 412640 }, { "epoch": 4.408889363748063, "grad_norm": 0.0062148370780050755, "learning_rate": 7.40625039044249e-07, "loss": 0.0051, "step": 412650 }, { "epoch": 4.408996207062343, "grad_norm": 0.003789371345192194, "learning_rate": 7.406103115550765e-07, "loss": 0.0014, "step": 412660 }, { "epoch": 4.409103050376623, "grad_norm": 0.07328374683856964, "learning_rate": 7.405955837942368e-07, "loss": 0.0638, "step": 412670 }, { "epoch": 4.409209893690902, "grad_norm": 0.002520813839510083, "learning_rate": 7.405808557617466e-07, "loss": 0.0044, "step": 412680 }, { "epoch": 4.409316737005182, "grad_norm": 0.0035356301814317703, "learning_rate": 7.405661274576225e-07, "loss": 0.0045, "step": 412690 }, { "epoch": 4.409423580319461, "grad_norm": 5.501593589782715, "learning_rate": 7.40551398881881e-07, "loss": 0.039, "step": 412700 }, { "epoch": 4.409530423633741, "grad_norm": 4.631148338317871, "learning_rate": 7.405366700345389e-07, "loss": 0.0146, "step": 412710 }, { "epoch": 4.409637266948021, "grad_norm": 0.8149479031562805, "learning_rate": 7.405219409156126e-07, "loss": 0.0426, "step": 412720 }, { "epoch": 4.4097441102623005, "grad_norm": 0.047219403088092804, "learning_rate": 7.40507211525119e-07, "loss": 0.0068, "step": 412730 }, { "epoch": 4.40985095357658, "grad_norm": 0.669572651386261, "learning_rate": 7.404924818630747e-07, "loss": 0.0031, "step": 412740 }, { "epoch": 4.409957796890859, "grad_norm": 0.10961674153804779, "learning_rate": 7.404777519294961e-07, "loss": 0.0042, "step": 412750 }, { "epoch": 4.410064640205139, "grad_norm": 2.1991159915924072, "learning_rate": 7.404630217243999e-07, "loss": 0.0183, "step": 412760 }, { "epoch": 4.410171483519418, "grad_norm": 0.010573101229965687, "learning_rate": 7.40448291247803e-07, "loss": 0.003, "step": 412770 }, { "epoch": 4.410278326833699, "grad_norm": 0.3644137680530548, "learning_rate": 7.404335604997215e-07, "loss": 0.0024, "step": 412780 }, { "epoch": 4.410385170147978, "grad_norm": 0.025436408817768097, "learning_rate": 7.404188294801727e-07, "loss": 0.0016, "step": 412790 }, { "epoch": 4.410492013462258, "grad_norm": 0.00963938981294632, "learning_rate": 7.404040981891727e-07, "loss": 0.0003, "step": 412800 }, { "epoch": 4.410598856776537, "grad_norm": 0.009665613994002342, "learning_rate": 7.403893666267383e-07, "loss": 0.0049, "step": 412810 }, { "epoch": 4.4107057000908165, "grad_norm": 0.3348463177680969, "learning_rate": 7.403746347928863e-07, "loss": 0.0161, "step": 412820 }, { "epoch": 4.410812543405097, "grad_norm": 0.0022025969810783863, "learning_rate": 7.403599026876333e-07, "loss": 0.0003, "step": 412830 }, { "epoch": 4.410919386719376, "grad_norm": 0.013141974806785583, "learning_rate": 7.403451703109955e-07, "loss": 0.0107, "step": 412840 }, { "epoch": 4.411026230033656, "grad_norm": 0.004381503444164991, "learning_rate": 7.403304376629902e-07, "loss": 0.0011, "step": 412850 }, { "epoch": 4.411133073347935, "grad_norm": 0.07712709158658981, "learning_rate": 7.403157047436334e-07, "loss": 0.0107, "step": 412860 }, { "epoch": 4.411239916662215, "grad_norm": 0.0023770860861986876, "learning_rate": 7.403009715529422e-07, "loss": 0.005, "step": 412870 }, { "epoch": 4.411346759976494, "grad_norm": 0.0012879592832177877, "learning_rate": 7.402862380909332e-07, "loss": 0.0338, "step": 412880 }, { "epoch": 4.411453603290774, "grad_norm": 0.0026028184220194817, "learning_rate": 7.402715043576226e-07, "loss": 0.0007, "step": 412890 }, { "epoch": 4.411560446605054, "grad_norm": 0.012211657129228115, "learning_rate": 7.402567703530276e-07, "loss": 0.0005, "step": 412900 }, { "epoch": 4.411667289919333, "grad_norm": 0.007793922908604145, "learning_rate": 7.402420360771644e-07, "loss": 0.0094, "step": 412910 }, { "epoch": 4.411774133233613, "grad_norm": 0.012357600964605808, "learning_rate": 7.402273015300499e-07, "loss": 0.0005, "step": 412920 }, { "epoch": 4.411880976547892, "grad_norm": 0.009546334855258465, "learning_rate": 7.402125667117007e-07, "loss": 0.0084, "step": 412930 }, { "epoch": 4.411987819862172, "grad_norm": 0.02850794978439808, "learning_rate": 7.401978316221332e-07, "loss": 0.0151, "step": 412940 }, { "epoch": 4.412094663176452, "grad_norm": 7.355983257293701, "learning_rate": 7.401830962613642e-07, "loss": 0.0096, "step": 412950 }, { "epoch": 4.412201506490732, "grad_norm": 0.17172633111476898, "learning_rate": 7.401683606294106e-07, "loss": 0.0077, "step": 412960 }, { "epoch": 4.412308349805011, "grad_norm": 0.01207482349127531, "learning_rate": 7.401536247262885e-07, "loss": 0.0058, "step": 412970 }, { "epoch": 4.4124151931192905, "grad_norm": 1.666324257850647, "learning_rate": 7.401388885520151e-07, "loss": 0.0036, "step": 412980 }, { "epoch": 4.41252203643357, "grad_norm": 0.21457664668560028, "learning_rate": 7.401241521066066e-07, "loss": 0.0031, "step": 412990 }, { "epoch": 4.4126288797478495, "grad_norm": 0.0007413435960188508, "learning_rate": 7.4010941539008e-07, "loss": 0.039, "step": 413000 }, { "epoch": 4.41273572306213, "grad_norm": 2.7318880558013916, "learning_rate": 7.400946784024516e-07, "loss": 0.0251, "step": 413010 }, { "epoch": 4.412842566376409, "grad_norm": 1.3420876264572144, "learning_rate": 7.400799411437382e-07, "loss": 0.0215, "step": 413020 }, { "epoch": 4.412949409690689, "grad_norm": 0.07465104013681412, "learning_rate": 7.400652036139564e-07, "loss": 0.0123, "step": 413030 }, { "epoch": 4.413056253004968, "grad_norm": 7.7434515953063965, "learning_rate": 7.400504658131228e-07, "loss": 0.0603, "step": 413040 }, { "epoch": 4.413163096319248, "grad_norm": 7.403841018676758, "learning_rate": 7.400357277412543e-07, "loss": 0.0215, "step": 413050 }, { "epoch": 4.413269939633527, "grad_norm": 0.09619202464818954, "learning_rate": 7.400209893983672e-07, "loss": 0.0014, "step": 413060 }, { "epoch": 4.413376782947807, "grad_norm": 1.2392605543136597, "learning_rate": 7.400062507844784e-07, "loss": 0.0069, "step": 413070 }, { "epoch": 4.413483626262087, "grad_norm": 1.798393726348877, "learning_rate": 7.399915118996044e-07, "loss": 0.0036, "step": 413080 }, { "epoch": 4.413590469576366, "grad_norm": 0.030304865911602974, "learning_rate": 7.399767727437618e-07, "loss": 0.0088, "step": 413090 }, { "epoch": 4.413697312890646, "grad_norm": 0.013456826098263264, "learning_rate": 7.399620333169674e-07, "loss": 0.0061, "step": 413100 }, { "epoch": 4.413804156204925, "grad_norm": 0.253225713968277, "learning_rate": 7.399472936192377e-07, "loss": 0.0015, "step": 413110 }, { "epoch": 4.413910999519205, "grad_norm": 0.005466050002723932, "learning_rate": 7.399325536505894e-07, "loss": 0.0351, "step": 413120 }, { "epoch": 4.414017842833485, "grad_norm": 0.07216906547546387, "learning_rate": 7.399178134110391e-07, "loss": 0.0141, "step": 413130 }, { "epoch": 4.4141246861477645, "grad_norm": 0.01011145394295454, "learning_rate": 7.399030729006036e-07, "loss": 0.0035, "step": 413140 }, { "epoch": 4.414231529462044, "grad_norm": 0.6275700926780701, "learning_rate": 7.398883321192992e-07, "loss": 0.0111, "step": 413150 }, { "epoch": 4.4143383727763235, "grad_norm": 0.1128983423113823, "learning_rate": 7.398735910671431e-07, "loss": 0.0009, "step": 413160 }, { "epoch": 4.414445216090603, "grad_norm": 0.33717140555381775, "learning_rate": 7.398588497441514e-07, "loss": 0.0187, "step": 413170 }, { "epoch": 4.414552059404882, "grad_norm": 0.35653918981552124, "learning_rate": 7.39844108150341e-07, "loss": 0.0034, "step": 413180 }, { "epoch": 4.414658902719163, "grad_norm": 0.004650903400033712, "learning_rate": 7.398293662857285e-07, "loss": 0.0058, "step": 413190 }, { "epoch": 4.414765746033442, "grad_norm": 0.029080791398882866, "learning_rate": 7.398146241503305e-07, "loss": 0.01, "step": 413200 }, { "epoch": 4.414872589347722, "grad_norm": 0.05785607919096947, "learning_rate": 7.397998817441637e-07, "loss": 0.0141, "step": 413210 }, { "epoch": 4.414979432662001, "grad_norm": 2.4268414974212646, "learning_rate": 7.397851390672448e-07, "loss": 0.0404, "step": 413220 }, { "epoch": 4.415086275976281, "grad_norm": 0.5842194557189941, "learning_rate": 7.397703961195903e-07, "loss": 0.0012, "step": 413230 }, { "epoch": 4.41519311929056, "grad_norm": 0.02939416468143463, "learning_rate": 7.39755652901217e-07, "loss": 0.0028, "step": 413240 }, { "epoch": 4.41529996260484, "grad_norm": 0.36661627888679504, "learning_rate": 7.397409094121414e-07, "loss": 0.002, "step": 413250 }, { "epoch": 4.41540680591912, "grad_norm": 0.002754358807578683, "learning_rate": 7.397261656523803e-07, "loss": 0.0037, "step": 413260 }, { "epoch": 4.415513649233399, "grad_norm": 0.008440871722996235, "learning_rate": 7.397114216219503e-07, "loss": 0.0106, "step": 413270 }, { "epoch": 4.415620492547679, "grad_norm": 0.28712454438209534, "learning_rate": 7.396966773208677e-07, "loss": 0.0448, "step": 413280 }, { "epoch": 4.415727335861958, "grad_norm": 3.3813109397888184, "learning_rate": 7.396819327491498e-07, "loss": 0.0044, "step": 413290 }, { "epoch": 4.415834179176238, "grad_norm": 0.001994469901546836, "learning_rate": 7.396671879068127e-07, "loss": 0.0266, "step": 413300 }, { "epoch": 4.415941022490518, "grad_norm": 0.09956411272287369, "learning_rate": 7.396524427938734e-07, "loss": 0.0295, "step": 413310 }, { "epoch": 4.4160478658047975, "grad_norm": 4.494021415710449, "learning_rate": 7.396376974103483e-07, "loss": 0.0064, "step": 413320 }, { "epoch": 4.416154709119077, "grad_norm": 0.007050117943435907, "learning_rate": 7.396229517562542e-07, "loss": 0.004, "step": 413330 }, { "epoch": 4.416261552433356, "grad_norm": 0.01625385507941246, "learning_rate": 7.396082058316077e-07, "loss": 0.0044, "step": 413340 }, { "epoch": 4.416368395747636, "grad_norm": 0.011665888130664825, "learning_rate": 7.395934596364255e-07, "loss": 0.0032, "step": 413350 }, { "epoch": 4.416475239061915, "grad_norm": 0.8541886210441589, "learning_rate": 7.395787131707243e-07, "loss": 0.0081, "step": 413360 }, { "epoch": 4.416582082376196, "grad_norm": 0.012361280620098114, "learning_rate": 7.395639664345204e-07, "loss": 0.01, "step": 413370 }, { "epoch": 4.416688925690475, "grad_norm": 0.03501627966761589, "learning_rate": 7.395492194278308e-07, "loss": 0.0083, "step": 413380 }, { "epoch": 4.416795769004755, "grad_norm": 3.203577756881714, "learning_rate": 7.395344721506722e-07, "loss": 0.0031, "step": 413390 }, { "epoch": 4.416902612319034, "grad_norm": 0.013319110497832298, "learning_rate": 7.39519724603061e-07, "loss": 0.0005, "step": 413400 }, { "epoch": 4.4170094556333135, "grad_norm": 0.005775887053459883, "learning_rate": 7.395049767850139e-07, "loss": 0.0148, "step": 413410 }, { "epoch": 4.417116298947593, "grad_norm": 0.32399389147758484, "learning_rate": 7.394902286965477e-07, "loss": 0.0018, "step": 413420 }, { "epoch": 4.417223142261873, "grad_norm": 0.06285019963979721, "learning_rate": 7.394754803376789e-07, "loss": 0.0005, "step": 413430 }, { "epoch": 4.417329985576153, "grad_norm": 6.525868892669678, "learning_rate": 7.394607317084241e-07, "loss": 0.0133, "step": 413440 }, { "epoch": 4.417436828890432, "grad_norm": 0.492565780878067, "learning_rate": 7.394459828088002e-07, "loss": 0.0351, "step": 413450 }, { "epoch": 4.417543672204712, "grad_norm": 0.029202159494161606, "learning_rate": 7.394312336388236e-07, "loss": 0.0275, "step": 413460 }, { "epoch": 4.417650515518991, "grad_norm": 0.09532282501459122, "learning_rate": 7.394164841985112e-07, "loss": 0.009, "step": 413470 }, { "epoch": 4.417757358833271, "grad_norm": 1.1195454597473145, "learning_rate": 7.394017344878794e-07, "loss": 0.0104, "step": 413480 }, { "epoch": 4.417864202147551, "grad_norm": 0.01295835617929697, "learning_rate": 7.393869845069451e-07, "loss": 0.0138, "step": 413490 }, { "epoch": 4.41797104546183, "grad_norm": 0.19998174905776978, "learning_rate": 7.393722342557247e-07, "loss": 0.0096, "step": 413500 }, { "epoch": 4.41807788877611, "grad_norm": 0.26877519488334656, "learning_rate": 7.393574837342352e-07, "loss": 0.0053, "step": 413510 }, { "epoch": 4.418184732090389, "grad_norm": 0.01788186840713024, "learning_rate": 7.393427329424927e-07, "loss": 0.0039, "step": 413520 }, { "epoch": 4.418291575404669, "grad_norm": 0.4583386480808258, "learning_rate": 7.393279818805145e-07, "loss": 0.0061, "step": 413530 }, { "epoch": 4.418398418718949, "grad_norm": 0.8608818054199219, "learning_rate": 7.393132305483168e-07, "loss": 0.0108, "step": 413540 }, { "epoch": 4.418505262033229, "grad_norm": 7.495481967926025, "learning_rate": 7.392984789459164e-07, "loss": 0.0252, "step": 413550 }, { "epoch": 4.418612105347508, "grad_norm": 0.5448040962219238, "learning_rate": 7.3928372707333e-07, "loss": 0.0019, "step": 413560 }, { "epoch": 4.4187189486617875, "grad_norm": 2.044238805770874, "learning_rate": 7.392689749305743e-07, "loss": 0.0109, "step": 413570 }, { "epoch": 4.418825791976067, "grad_norm": 0.31064698100090027, "learning_rate": 7.392542225176656e-07, "loss": 0.0157, "step": 413580 }, { "epoch": 4.418932635290346, "grad_norm": 0.0011720197508111596, "learning_rate": 7.39239469834621e-07, "loss": 0.0065, "step": 413590 }, { "epoch": 4.419039478604626, "grad_norm": 0.31172025203704834, "learning_rate": 7.392247168814569e-07, "loss": 0.0037, "step": 413600 }, { "epoch": 4.419146321918906, "grad_norm": 6.218442916870117, "learning_rate": 7.392099636581901e-07, "loss": 0.0055, "step": 413610 }, { "epoch": 4.419253165233186, "grad_norm": 0.0537416934967041, "learning_rate": 7.391952101648372e-07, "loss": 0.0092, "step": 413620 }, { "epoch": 4.419360008547465, "grad_norm": 0.020477520301938057, "learning_rate": 7.391804564014149e-07, "loss": 0.0057, "step": 413630 }, { "epoch": 4.419466851861745, "grad_norm": 0.005849401466548443, "learning_rate": 7.391657023679396e-07, "loss": 0.0159, "step": 413640 }, { "epoch": 4.419573695176024, "grad_norm": 9.153820037841797, "learning_rate": 7.391509480644285e-07, "loss": 0.0068, "step": 413650 }, { "epoch": 4.419680538490304, "grad_norm": 0.49451950192451477, "learning_rate": 7.391361934908978e-07, "loss": 0.0193, "step": 413660 }, { "epoch": 4.419787381804584, "grad_norm": 0.03868081048130989, "learning_rate": 7.391214386473642e-07, "loss": 0.019, "step": 413670 }, { "epoch": 4.419894225118863, "grad_norm": 2.8755881786346436, "learning_rate": 7.391066835338446e-07, "loss": 0.0094, "step": 413680 }, { "epoch": 4.420001068433143, "grad_norm": 0.055924154818058014, "learning_rate": 7.390919281503552e-07, "loss": 0.0158, "step": 413690 }, { "epoch": 4.420107911747422, "grad_norm": 1.8372244834899902, "learning_rate": 7.390771724969133e-07, "loss": 0.0075, "step": 413700 }, { "epoch": 4.420214755061702, "grad_norm": 0.007963932119309902, "learning_rate": 7.390624165735351e-07, "loss": 0.0029, "step": 413710 }, { "epoch": 4.420321598375981, "grad_norm": 5.29021692276001, "learning_rate": 7.390476603802374e-07, "loss": 0.0056, "step": 413720 }, { "epoch": 4.4204284416902615, "grad_norm": 0.0025776594411581755, "learning_rate": 7.390329039170369e-07, "loss": 0.0018, "step": 413730 }, { "epoch": 4.420535285004541, "grad_norm": 0.0029993560165166855, "learning_rate": 7.390181471839502e-07, "loss": 0.0151, "step": 413740 }, { "epoch": 4.42064212831882, "grad_norm": 0.14905646443367004, "learning_rate": 7.390033901809937e-07, "loss": 0.0031, "step": 413750 }, { "epoch": 4.4207489716331, "grad_norm": 0.006409608293324709, "learning_rate": 7.389886329081847e-07, "loss": 0.0627, "step": 413760 }, { "epoch": 4.420855814947379, "grad_norm": 0.017731957137584686, "learning_rate": 7.389738753655394e-07, "loss": 0.0043, "step": 413770 }, { "epoch": 4.42096265826166, "grad_norm": 4.691639423370361, "learning_rate": 7.389591175530745e-07, "loss": 0.0053, "step": 413780 }, { "epoch": 4.421069501575939, "grad_norm": 0.035775985568761826, "learning_rate": 7.389443594708068e-07, "loss": 0.0246, "step": 413790 }, { "epoch": 4.421176344890219, "grad_norm": 0.009209338575601578, "learning_rate": 7.389296011187528e-07, "loss": 0.0004, "step": 413800 }, { "epoch": 4.421283188204498, "grad_norm": 0.0940958559513092, "learning_rate": 7.389148424969292e-07, "loss": 0.0003, "step": 413810 }, { "epoch": 4.4213900315187775, "grad_norm": 0.019394023343920708, "learning_rate": 7.389000836053529e-07, "loss": 0.0003, "step": 413820 }, { "epoch": 4.421496874833057, "grad_norm": 0.3137191832065582, "learning_rate": 7.388853244440401e-07, "loss": 0.011, "step": 413830 }, { "epoch": 4.421603718147337, "grad_norm": 0.0019558018539100885, "learning_rate": 7.388705650130081e-07, "loss": 0.0231, "step": 413840 }, { "epoch": 4.421710561461617, "grad_norm": 0.007455739658325911, "learning_rate": 7.388558053122729e-07, "loss": 0.002, "step": 413850 }, { "epoch": 4.421817404775896, "grad_norm": 0.006846389267593622, "learning_rate": 7.388410453418515e-07, "loss": 0.0007, "step": 413860 }, { "epoch": 4.421924248090176, "grad_norm": 0.5946532487869263, "learning_rate": 7.388262851017607e-07, "loss": 0.0047, "step": 413870 }, { "epoch": 4.422031091404455, "grad_norm": 0.017781687900424004, "learning_rate": 7.388115245920168e-07, "loss": 0.0212, "step": 413880 }, { "epoch": 4.422137934718735, "grad_norm": 0.014717139303684235, "learning_rate": 7.387967638126369e-07, "loss": 0.002, "step": 413890 }, { "epoch": 4.422244778033015, "grad_norm": 0.008292688988149166, "learning_rate": 7.387820027636373e-07, "loss": 0.0029, "step": 413900 }, { "epoch": 4.422351621347294, "grad_norm": 0.07536696642637253, "learning_rate": 7.387672414450348e-07, "loss": 0.0043, "step": 413910 }, { "epoch": 4.422458464661574, "grad_norm": 0.06531690806150436, "learning_rate": 7.387524798568459e-07, "loss": 0.0009, "step": 413920 }, { "epoch": 4.422565307975853, "grad_norm": 0.00669721607118845, "learning_rate": 7.387377179990877e-07, "loss": 0.0006, "step": 413930 }, { "epoch": 4.422672151290133, "grad_norm": 0.6706395149230957, "learning_rate": 7.387229558717764e-07, "loss": 0.0052, "step": 413940 }, { "epoch": 4.422778994604412, "grad_norm": 0.0055872052907943726, "learning_rate": 7.387081934749289e-07, "loss": 0.0026, "step": 413950 }, { "epoch": 4.422885837918693, "grad_norm": 6.855681896209717, "learning_rate": 7.38693430808562e-07, "loss": 0.0139, "step": 413960 }, { "epoch": 4.422992681232972, "grad_norm": 0.004485482815653086, "learning_rate": 7.38678667872692e-07, "loss": 0.0022, "step": 413970 }, { "epoch": 4.4230995245472515, "grad_norm": 4.391708850860596, "learning_rate": 7.38663904667336e-07, "loss": 0.0096, "step": 413980 }, { "epoch": 4.423206367861531, "grad_norm": 0.03870882838964462, "learning_rate": 7.386491411925102e-07, "loss": 0.0081, "step": 413990 }, { "epoch": 4.4233132111758104, "grad_norm": 0.58193439245224, "learning_rate": 7.386343774482316e-07, "loss": 0.015, "step": 414000 }, { "epoch": 4.42342005449009, "grad_norm": 0.3568804860115051, "learning_rate": 7.386196134345168e-07, "loss": 0.0013, "step": 414010 }, { "epoch": 4.42352689780437, "grad_norm": 3.533529758453369, "learning_rate": 7.386048491513824e-07, "loss": 0.0045, "step": 414020 }, { "epoch": 4.42363374111865, "grad_norm": 0.0034154383465647697, "learning_rate": 7.385900845988452e-07, "loss": 0.0125, "step": 414030 }, { "epoch": 4.423740584432929, "grad_norm": 0.7739620804786682, "learning_rate": 7.385753197769218e-07, "loss": 0.0103, "step": 414040 }, { "epoch": 4.423847427747209, "grad_norm": 0.017608124762773514, "learning_rate": 7.385605546856288e-07, "loss": 0.0054, "step": 414050 }, { "epoch": 4.423954271061488, "grad_norm": 0.36364081501960754, "learning_rate": 7.385457893249828e-07, "loss": 0.0027, "step": 414060 }, { "epoch": 4.4240611143757675, "grad_norm": 21.620254516601562, "learning_rate": 7.385310236950008e-07, "loss": 0.0217, "step": 414070 }, { "epoch": 4.424167957690048, "grad_norm": 1.0278444290161133, "learning_rate": 7.385162577956992e-07, "loss": 0.0152, "step": 414080 }, { "epoch": 4.424274801004327, "grad_norm": 0.0017356985481455922, "learning_rate": 7.385014916270946e-07, "loss": 0.0067, "step": 414090 }, { "epoch": 4.424381644318607, "grad_norm": 0.006809513084590435, "learning_rate": 7.384867251892041e-07, "loss": 0.0004, "step": 414100 }, { "epoch": 4.424488487632886, "grad_norm": 0.036463573575019836, "learning_rate": 7.38471958482044e-07, "loss": 0.0005, "step": 414110 }, { "epoch": 4.424595330947166, "grad_norm": 1.1017719507217407, "learning_rate": 7.38457191505631e-07, "loss": 0.0236, "step": 414120 }, { "epoch": 4.424702174261445, "grad_norm": 2.1881937980651855, "learning_rate": 7.384424242599818e-07, "loss": 0.009, "step": 414130 }, { "epoch": 4.4248090175757255, "grad_norm": 0.7692593336105347, "learning_rate": 7.384276567451131e-07, "loss": 0.0079, "step": 414140 }, { "epoch": 4.424915860890005, "grad_norm": 0.009401622228324413, "learning_rate": 7.384128889610416e-07, "loss": 0.0044, "step": 414150 }, { "epoch": 4.4250227042042845, "grad_norm": 0.0027231168933212757, "learning_rate": 7.383981209077841e-07, "loss": 0.0091, "step": 414160 }, { "epoch": 4.425129547518564, "grad_norm": 0.006014951039105654, "learning_rate": 7.38383352585357e-07, "loss": 0.0085, "step": 414170 }, { "epoch": 4.425236390832843, "grad_norm": 0.00285192858427763, "learning_rate": 7.38368583993777e-07, "loss": 0.0049, "step": 414180 }, { "epoch": 4.425343234147123, "grad_norm": 0.038207780569791794, "learning_rate": 7.38353815133061e-07, "loss": 0.006, "step": 414190 }, { "epoch": 4.425450077461403, "grad_norm": 1.6971102952957153, "learning_rate": 7.383390460032256e-07, "loss": 0.0142, "step": 414200 }, { "epoch": 4.425556920775683, "grad_norm": 0.009245278313755989, "learning_rate": 7.383242766042873e-07, "loss": 0.0009, "step": 414210 }, { "epoch": 4.425663764089962, "grad_norm": 4.493808746337891, "learning_rate": 7.383095069362631e-07, "loss": 0.0036, "step": 414220 }, { "epoch": 4.4257706074042416, "grad_norm": 0.006569382268935442, "learning_rate": 7.382947369991692e-07, "loss": 0.0077, "step": 414230 }, { "epoch": 4.425877450718521, "grad_norm": 0.7339744567871094, "learning_rate": 7.382799667930227e-07, "loss": 0.0048, "step": 414240 }, { "epoch": 4.425984294032801, "grad_norm": 0.006901953369379044, "learning_rate": 7.382651963178402e-07, "loss": 0.0186, "step": 414250 }, { "epoch": 4.426091137347081, "grad_norm": 0.0021433322690427303, "learning_rate": 7.382504255736383e-07, "loss": 0.0104, "step": 414260 }, { "epoch": 4.42619798066136, "grad_norm": 1.299274206161499, "learning_rate": 7.382356545604336e-07, "loss": 0.0008, "step": 414270 }, { "epoch": 4.42630482397564, "grad_norm": 2.151883602142334, "learning_rate": 7.382208832782428e-07, "loss": 0.0048, "step": 414280 }, { "epoch": 4.426411667289919, "grad_norm": 1.0233502388000488, "learning_rate": 7.382061117270828e-07, "loss": 0.0097, "step": 414290 }, { "epoch": 4.426518510604199, "grad_norm": 0.002181996824219823, "learning_rate": 7.381913399069702e-07, "loss": 0.0001, "step": 414300 }, { "epoch": 4.426625353918478, "grad_norm": 1.1083632707595825, "learning_rate": 7.381765678179214e-07, "loss": 0.0064, "step": 414310 }, { "epoch": 4.4267321972327585, "grad_norm": 0.06605435162782669, "learning_rate": 7.381617954599533e-07, "loss": 0.0056, "step": 414320 }, { "epoch": 4.426839040547038, "grad_norm": 21.127784729003906, "learning_rate": 7.381470228330826e-07, "loss": 0.013, "step": 414330 }, { "epoch": 4.426945883861317, "grad_norm": 0.523385226726532, "learning_rate": 7.38132249937326e-07, "loss": 0.0076, "step": 414340 }, { "epoch": 4.427052727175597, "grad_norm": 0.27959635853767395, "learning_rate": 7.381174767726999e-07, "loss": 0.0076, "step": 414350 }, { "epoch": 4.427159570489876, "grad_norm": 5.108375072479248, "learning_rate": 7.381027033392215e-07, "loss": 0.0009, "step": 414360 }, { "epoch": 4.427266413804157, "grad_norm": 0.0024588974192738533, "learning_rate": 7.380879296369072e-07, "loss": 0.0001, "step": 414370 }, { "epoch": 4.427373257118436, "grad_norm": 0.003540889360010624, "learning_rate": 7.380731556657733e-07, "loss": 0.0043, "step": 414380 }, { "epoch": 4.427480100432716, "grad_norm": 0.40076377987861633, "learning_rate": 7.380583814258371e-07, "loss": 0.0048, "step": 414390 }, { "epoch": 4.427586943746995, "grad_norm": 0.06601160019636154, "learning_rate": 7.380436069171149e-07, "loss": 0.0193, "step": 414400 }, { "epoch": 4.4276937870612745, "grad_norm": 0.0009037131094373763, "learning_rate": 7.380288321396236e-07, "loss": 0.018, "step": 414410 }, { "epoch": 4.427800630375554, "grad_norm": 0.0016946931136772037, "learning_rate": 7.380140570933797e-07, "loss": 0.0051, "step": 414420 }, { "epoch": 4.427907473689833, "grad_norm": 0.005091161001473665, "learning_rate": 7.379992817784e-07, "loss": 0.0102, "step": 414430 }, { "epoch": 4.428014317004114, "grad_norm": 0.14095835387706757, "learning_rate": 7.379845061947012e-07, "loss": 0.0052, "step": 414440 }, { "epoch": 4.428121160318393, "grad_norm": 1.4640629291534424, "learning_rate": 7.379697303422999e-07, "loss": 0.0063, "step": 414450 }, { "epoch": 4.428228003632673, "grad_norm": 2.6548449993133545, "learning_rate": 7.379549542212128e-07, "loss": 0.0058, "step": 414460 }, { "epoch": 4.428334846946952, "grad_norm": 4.099955081939697, "learning_rate": 7.379401778314567e-07, "loss": 0.006, "step": 414470 }, { "epoch": 4.428441690261232, "grad_norm": 0.32307153940200806, "learning_rate": 7.379254011730481e-07, "loss": 0.002, "step": 414480 }, { "epoch": 4.428548533575512, "grad_norm": 0.003913884051144123, "learning_rate": 7.379106242460036e-07, "loss": 0.0276, "step": 414490 }, { "epoch": 4.428655376889791, "grad_norm": 0.013893007300794125, "learning_rate": 7.378958470503403e-07, "loss": 0.0073, "step": 414500 }, { "epoch": 4.428762220204071, "grad_norm": 0.0036076263058930635, "learning_rate": 7.378810695860746e-07, "loss": 0.0076, "step": 414510 }, { "epoch": 4.42886906351835, "grad_norm": 1.656072974205017, "learning_rate": 7.37866291853223e-07, "loss": 0.003, "step": 414520 }, { "epoch": 4.42897590683263, "grad_norm": 10.418471336364746, "learning_rate": 7.378515138518026e-07, "loss": 0.0139, "step": 414530 }, { "epoch": 4.429082750146909, "grad_norm": 4.365047454833984, "learning_rate": 7.378367355818298e-07, "loss": 0.0152, "step": 414540 }, { "epoch": 4.42918959346119, "grad_norm": 0.046618010848760605, "learning_rate": 7.378219570433214e-07, "loss": 0.0002, "step": 414550 }, { "epoch": 4.429296436775469, "grad_norm": 0.003352809464558959, "learning_rate": 7.37807178236294e-07, "loss": 0.0024, "step": 414560 }, { "epoch": 4.4294032800897485, "grad_norm": 0.011011111550033092, "learning_rate": 7.377923991607645e-07, "loss": 0.0267, "step": 414570 }, { "epoch": 4.429510123404028, "grad_norm": 0.9130967855453491, "learning_rate": 7.377776198167494e-07, "loss": 0.0111, "step": 414580 }, { "epoch": 4.429616966718307, "grad_norm": 4.494525909423828, "learning_rate": 7.377628402042653e-07, "loss": 0.0022, "step": 414590 }, { "epoch": 4.429723810032587, "grad_norm": 0.0007919651689007878, "learning_rate": 7.377480603233291e-07, "loss": 0.0042, "step": 414600 }, { "epoch": 4.429830653346867, "grad_norm": 0.1186455637216568, "learning_rate": 7.377332801739573e-07, "loss": 0.0034, "step": 414610 }, { "epoch": 4.429937496661147, "grad_norm": 0.0005307893734425306, "learning_rate": 7.377184997561669e-07, "loss": 0.0025, "step": 414620 }, { "epoch": 4.430044339975426, "grad_norm": 1.54238760471344, "learning_rate": 7.377037190699742e-07, "loss": 0.0047, "step": 414630 }, { "epoch": 4.430151183289706, "grad_norm": 0.8626803755760193, "learning_rate": 7.376889381153961e-07, "loss": 0.0268, "step": 414640 }, { "epoch": 4.430258026603985, "grad_norm": 0.06156127527356148, "learning_rate": 7.376741568924493e-07, "loss": 0.0079, "step": 414650 }, { "epoch": 4.4303648699182645, "grad_norm": 11.699467658996582, "learning_rate": 7.376593754011503e-07, "loss": 0.0123, "step": 414660 }, { "epoch": 4.430471713232545, "grad_norm": 0.052432671189308167, "learning_rate": 7.376445936415161e-07, "loss": 0.0042, "step": 414670 }, { "epoch": 4.430578556546824, "grad_norm": 0.0038643351290374994, "learning_rate": 7.376298116135632e-07, "loss": 0.0001, "step": 414680 }, { "epoch": 4.430685399861104, "grad_norm": 0.022467590868473053, "learning_rate": 7.376150293173081e-07, "loss": 0.0036, "step": 414690 }, { "epoch": 4.430792243175383, "grad_norm": 5.103517055511475, "learning_rate": 7.376002467527679e-07, "loss": 0.0112, "step": 414700 }, { "epoch": 4.430899086489663, "grad_norm": 3.068220853805542, "learning_rate": 7.375854639199591e-07, "loss": 0.0152, "step": 414710 }, { "epoch": 4.431005929803942, "grad_norm": 0.46181198954582214, "learning_rate": 7.375706808188982e-07, "loss": 0.0076, "step": 414720 }, { "epoch": 4.4311127731182225, "grad_norm": 2.6283206939697266, "learning_rate": 7.375558974496023e-07, "loss": 0.0063, "step": 414730 }, { "epoch": 4.431219616432502, "grad_norm": 0.014259567484259605, "learning_rate": 7.375411138120877e-07, "loss": 0.0002, "step": 414740 }, { "epoch": 4.431326459746781, "grad_norm": 0.0026393679436296225, "learning_rate": 7.375263299063713e-07, "loss": 0.004, "step": 414750 }, { "epoch": 4.431433303061061, "grad_norm": 0.0060860104858875275, "learning_rate": 7.375115457324697e-07, "loss": 0.0079, "step": 414760 }, { "epoch": 4.43154014637534, "grad_norm": 2.76426362991333, "learning_rate": 7.374967612903998e-07, "loss": 0.0088, "step": 414770 }, { "epoch": 4.43164698968962, "grad_norm": 0.0017507786396890879, "learning_rate": 7.37481976580178e-07, "loss": 0.0036, "step": 414780 }, { "epoch": 4.4317538330039, "grad_norm": 0.04082362726330757, "learning_rate": 7.374671916018212e-07, "loss": 0.0028, "step": 414790 }, { "epoch": 4.43186067631818, "grad_norm": 0.0036324718967080116, "learning_rate": 7.374524063553459e-07, "loss": 0.0024, "step": 414800 }, { "epoch": 4.431967519632459, "grad_norm": 2.7330658435821533, "learning_rate": 7.374376208407688e-07, "loss": 0.0027, "step": 414810 }, { "epoch": 4.4320743629467385, "grad_norm": 5.20318603515625, "learning_rate": 7.37422835058107e-07, "loss": 0.0173, "step": 414820 }, { "epoch": 4.432181206261018, "grad_norm": 0.02998328022658825, "learning_rate": 7.374080490073767e-07, "loss": 0.0017, "step": 414830 }, { "epoch": 4.432288049575297, "grad_norm": 0.6037129163742065, "learning_rate": 7.37393262688595e-07, "loss": 0.0099, "step": 414840 }, { "epoch": 4.432394892889578, "grad_norm": 0.42853257060050964, "learning_rate": 7.373784761017782e-07, "loss": 0.0096, "step": 414850 }, { "epoch": 4.432501736203857, "grad_norm": 0.3697141110897064, "learning_rate": 7.373636892469433e-07, "loss": 0.0199, "step": 414860 }, { "epoch": 4.432608579518137, "grad_norm": 0.004310287535190582, "learning_rate": 7.373489021241067e-07, "loss": 0.016, "step": 414870 }, { "epoch": 4.432715422832416, "grad_norm": 0.005399155896157026, "learning_rate": 7.373341147332854e-07, "loss": 0.0179, "step": 414880 }, { "epoch": 4.432822266146696, "grad_norm": 0.31679561734199524, "learning_rate": 7.373193270744959e-07, "loss": 0.0128, "step": 414890 }, { "epoch": 4.432929109460975, "grad_norm": 4.44358491897583, "learning_rate": 7.373045391477551e-07, "loss": 0.0211, "step": 414900 }, { "epoch": 4.433035952775255, "grad_norm": 1.5443907976150513, "learning_rate": 7.372897509530797e-07, "loss": 0.0083, "step": 414910 }, { "epoch": 4.433142796089535, "grad_norm": 0.006915774196386337, "learning_rate": 7.372749624904859e-07, "loss": 0.0213, "step": 414920 }, { "epoch": 4.433249639403814, "grad_norm": 8.320923805236816, "learning_rate": 7.372601737599911e-07, "loss": 0.0113, "step": 414930 }, { "epoch": 4.433356482718094, "grad_norm": 0.002784976502880454, "learning_rate": 7.372453847616115e-07, "loss": 0.0051, "step": 414940 }, { "epoch": 4.433463326032373, "grad_norm": 0.0037018381990492344, "learning_rate": 7.372305954953639e-07, "loss": 0.0212, "step": 414950 }, { "epoch": 4.433570169346654, "grad_norm": 2.2051358222961426, "learning_rate": 7.372158059612649e-07, "loss": 0.0027, "step": 414960 }, { "epoch": 4.433677012660933, "grad_norm": 0.0016921745846047997, "learning_rate": 7.372010161593318e-07, "loss": 0.0085, "step": 414970 }, { "epoch": 4.4337838559752125, "grad_norm": 4.812113285064697, "learning_rate": 7.371862260895804e-07, "loss": 0.0052, "step": 414980 }, { "epoch": 4.433890699289492, "grad_norm": 2.9885222911834717, "learning_rate": 7.371714357520281e-07, "loss": 0.0117, "step": 414990 }, { "epoch": 4.433997542603771, "grad_norm": 0.4050743877887726, "learning_rate": 7.371566451466913e-07, "loss": 0.0074, "step": 415000 }, { "epoch": 4.434104385918051, "grad_norm": 0.001815812778659165, "learning_rate": 7.371418542735868e-07, "loss": 0.0153, "step": 415010 }, { "epoch": 4.43421122923233, "grad_norm": 0.19987821578979492, "learning_rate": 7.371270631327313e-07, "loss": 0.0099, "step": 415020 }, { "epoch": 4.434318072546611, "grad_norm": 0.01471638586372137, "learning_rate": 7.371122717241411e-07, "loss": 0.0017, "step": 415030 }, { "epoch": 4.43442491586089, "grad_norm": 0.005637820344418287, "learning_rate": 7.370974800478336e-07, "loss": 0.0046, "step": 415040 }, { "epoch": 4.43453175917517, "grad_norm": 2.60198712348938, "learning_rate": 7.370826881038252e-07, "loss": 0.025, "step": 415050 }, { "epoch": 4.434638602489449, "grad_norm": 0.15160298347473145, "learning_rate": 7.370678958921323e-07, "loss": 0.0048, "step": 415060 }, { "epoch": 4.4347454458037285, "grad_norm": 1.0444711446762085, "learning_rate": 7.370531034127719e-07, "loss": 0.0064, "step": 415070 }, { "epoch": 4.434852289118009, "grad_norm": 0.0025890108663588762, "learning_rate": 7.370383106657607e-07, "loss": 0.0033, "step": 415080 }, { "epoch": 4.434959132432288, "grad_norm": 0.0835905373096466, "learning_rate": 7.370235176511154e-07, "loss": 0.0159, "step": 415090 }, { "epoch": 4.435065975746568, "grad_norm": 3.4833426475524902, "learning_rate": 7.370087243688527e-07, "loss": 0.0076, "step": 415100 }, { "epoch": 4.435172819060847, "grad_norm": 6.518243789672852, "learning_rate": 7.369939308189891e-07, "loss": 0.0231, "step": 415110 }, { "epoch": 4.435279662375127, "grad_norm": 0.49932408332824707, "learning_rate": 7.369791370015415e-07, "loss": 0.0056, "step": 415120 }, { "epoch": 4.435386505689406, "grad_norm": 0.09936108440160751, "learning_rate": 7.369643429165266e-07, "loss": 0.0252, "step": 415130 }, { "epoch": 4.435493349003686, "grad_norm": 0.010846731252968311, "learning_rate": 7.369495485639611e-07, "loss": 0.0169, "step": 415140 }, { "epoch": 4.435600192317966, "grad_norm": 0.003139653243124485, "learning_rate": 7.369347539438617e-07, "loss": 0.0057, "step": 415150 }, { "epoch": 4.435707035632245, "grad_norm": 1.1232088804244995, "learning_rate": 7.369199590562451e-07, "loss": 0.002, "step": 415160 }, { "epoch": 4.435813878946525, "grad_norm": 0.2157481163740158, "learning_rate": 7.369051639011279e-07, "loss": 0.0001, "step": 415170 }, { "epoch": 4.435920722260804, "grad_norm": 0.09967091679573059, "learning_rate": 7.368903684785269e-07, "loss": 0.0102, "step": 415180 }, { "epoch": 4.436027565575084, "grad_norm": 3.2449281215667725, "learning_rate": 7.368755727884588e-07, "loss": 0.0041, "step": 415190 }, { "epoch": 4.436134408889364, "grad_norm": 0.009390274994075298, "learning_rate": 7.368607768309403e-07, "loss": 0.0258, "step": 415200 }, { "epoch": 4.436241252203644, "grad_norm": 0.34220895171165466, "learning_rate": 7.368459806059881e-07, "loss": 0.007, "step": 415210 }, { "epoch": 4.436348095517923, "grad_norm": 0.2987991273403168, "learning_rate": 7.36831184113619e-07, "loss": 0.0006, "step": 415220 }, { "epoch": 4.4364549388322025, "grad_norm": 0.022116107866168022, "learning_rate": 7.368163873538494e-07, "loss": 0.0159, "step": 415230 }, { "epoch": 4.436561782146482, "grad_norm": 0.019712064415216446, "learning_rate": 7.368015903266964e-07, "loss": 0.0194, "step": 415240 }, { "epoch": 4.4366686254607615, "grad_norm": 0.003212511073797941, "learning_rate": 7.367867930321766e-07, "loss": 0.0093, "step": 415250 }, { "epoch": 4.436775468775042, "grad_norm": 1.4435068368911743, "learning_rate": 7.367719954703064e-07, "loss": 0.0237, "step": 415260 }, { "epoch": 4.436882312089321, "grad_norm": 3.29878830909729, "learning_rate": 7.36757197641103e-07, "loss": 0.0042, "step": 415270 }, { "epoch": 4.436989155403601, "grad_norm": 0.12315867096185684, "learning_rate": 7.367423995445828e-07, "loss": 0.0041, "step": 415280 }, { "epoch": 4.43709599871788, "grad_norm": 0.006579053122550249, "learning_rate": 7.367276011807625e-07, "loss": 0.0051, "step": 415290 }, { "epoch": 4.43720284203216, "grad_norm": 6.4706501960754395, "learning_rate": 7.36712802549659e-07, "loss": 0.0038, "step": 415300 }, { "epoch": 4.437309685346439, "grad_norm": 0.10449177771806717, "learning_rate": 7.366980036512886e-07, "loss": 0.0135, "step": 415310 }, { "epoch": 4.4374165286607195, "grad_norm": 0.0033410724718123674, "learning_rate": 7.366832044856685e-07, "loss": 0.0251, "step": 415320 }, { "epoch": 4.437523371974999, "grad_norm": 8.530023574829102, "learning_rate": 7.366684050528152e-07, "loss": 0.014, "step": 415330 }, { "epoch": 4.437630215289278, "grad_norm": 0.0010709698544815183, "learning_rate": 7.366536053527453e-07, "loss": 0.0016, "step": 415340 }, { "epoch": 4.437737058603558, "grad_norm": 0.006399035453796387, "learning_rate": 7.366388053854757e-07, "loss": 0.0023, "step": 415350 }, { "epoch": 4.437843901917837, "grad_norm": 5.865902423858643, "learning_rate": 7.366240051510231e-07, "loss": 0.008, "step": 415360 }, { "epoch": 4.437950745232117, "grad_norm": 0.19425716996192932, "learning_rate": 7.36609204649404e-07, "loss": 0.0182, "step": 415370 }, { "epoch": 4.438057588546397, "grad_norm": 0.02850080095231533, "learning_rate": 7.365944038806354e-07, "loss": 0.0117, "step": 415380 }, { "epoch": 4.4381644318606766, "grad_norm": 0.52565598487854, "learning_rate": 7.365796028447337e-07, "loss": 0.0006, "step": 415390 }, { "epoch": 4.438271275174956, "grad_norm": 4.5916314125061035, "learning_rate": 7.365648015417158e-07, "loss": 0.0054, "step": 415400 }, { "epoch": 4.4383781184892355, "grad_norm": 0.4800909757614136, "learning_rate": 7.365499999715984e-07, "loss": 0.0007, "step": 415410 }, { "epoch": 4.438484961803515, "grad_norm": 2.169438362121582, "learning_rate": 7.365351981343982e-07, "loss": 0.0063, "step": 415420 }, { "epoch": 4.438591805117794, "grad_norm": 0.02216167561709881, "learning_rate": 7.36520396030132e-07, "loss": 0.0005, "step": 415430 }, { "epoch": 4.438698648432075, "grad_norm": 1.7001835107803345, "learning_rate": 7.365055936588162e-07, "loss": 0.0025, "step": 415440 }, { "epoch": 4.438805491746354, "grad_norm": 5.174698352813721, "learning_rate": 7.36490791020468e-07, "loss": 0.0113, "step": 415450 }, { "epoch": 4.438912335060634, "grad_norm": 0.0007424643263220787, "learning_rate": 7.364759881151037e-07, "loss": 0.0014, "step": 415460 }, { "epoch": 4.439019178374913, "grad_norm": 0.011331290006637573, "learning_rate": 7.364611849427402e-07, "loss": 0.0025, "step": 415470 }, { "epoch": 4.439126021689193, "grad_norm": 0.0016632898477837443, "learning_rate": 7.364463815033941e-07, "loss": 0.0121, "step": 415480 }, { "epoch": 4.439232865003472, "grad_norm": 3.0789918899536133, "learning_rate": 7.364315777970823e-07, "loss": 0.0044, "step": 415490 }, { "epoch": 4.439339708317752, "grad_norm": 0.006356801837682724, "learning_rate": 7.364167738238214e-07, "loss": 0.0057, "step": 415500 }, { "epoch": 4.439446551632032, "grad_norm": 0.46469226479530334, "learning_rate": 7.364019695836281e-07, "loss": 0.0054, "step": 415510 }, { "epoch": 4.439553394946311, "grad_norm": 0.09577643126249313, "learning_rate": 7.36387165076519e-07, "loss": 0.0177, "step": 415520 }, { "epoch": 4.439660238260591, "grad_norm": 0.004497576039284468, "learning_rate": 7.363723603025111e-07, "loss": 0.005, "step": 415530 }, { "epoch": 4.43976708157487, "grad_norm": 0.051333051174879074, "learning_rate": 7.36357555261621e-07, "loss": 0.0024, "step": 415540 }, { "epoch": 4.43987392488915, "grad_norm": 0.16313889622688293, "learning_rate": 7.363427499538653e-07, "loss": 0.0143, "step": 415550 }, { "epoch": 4.43998076820343, "grad_norm": 0.07629946619272232, "learning_rate": 7.363279443792607e-07, "loss": 0.013, "step": 415560 }, { "epoch": 4.4400876115177095, "grad_norm": 3.1310458183288574, "learning_rate": 7.363131385378243e-07, "loss": 0.0123, "step": 415570 }, { "epoch": 4.440194454831989, "grad_norm": 0.7817153334617615, "learning_rate": 7.362983324295722e-07, "loss": 0.0098, "step": 415580 }, { "epoch": 4.440301298146268, "grad_norm": 3.759779453277588, "learning_rate": 7.362835260545217e-07, "loss": 0.003, "step": 415590 }, { "epoch": 4.440408141460548, "grad_norm": 0.006653042044490576, "learning_rate": 7.36268719412689e-07, "loss": 0.0014, "step": 415600 }, { "epoch": 4.440514984774827, "grad_norm": 0.014318365603685379, "learning_rate": 7.362539125040914e-07, "loss": 0.0089, "step": 415610 }, { "epoch": 4.440621828089108, "grad_norm": 0.013101857155561447, "learning_rate": 7.362391053287451e-07, "loss": 0.037, "step": 415620 }, { "epoch": 4.440728671403387, "grad_norm": 0.0012541853357106447, "learning_rate": 7.362242978866671e-07, "loss": 0.0049, "step": 415630 }, { "epoch": 4.440835514717667, "grad_norm": 0.6733637452125549, "learning_rate": 7.36209490177874e-07, "loss": 0.0087, "step": 415640 }, { "epoch": 4.440942358031946, "grad_norm": 0.09107311815023422, "learning_rate": 7.361946822023827e-07, "loss": 0.0084, "step": 415650 }, { "epoch": 4.4410492013462255, "grad_norm": 0.05442441999912262, "learning_rate": 7.361798739602096e-07, "loss": 0.0223, "step": 415660 }, { "epoch": 4.441156044660505, "grad_norm": 0.19386611878871918, "learning_rate": 7.361650654513716e-07, "loss": 0.0028, "step": 415670 }, { "epoch": 4.441262887974785, "grad_norm": 0.008234063163399696, "learning_rate": 7.361502566758855e-07, "loss": 0.0054, "step": 415680 }, { "epoch": 4.441369731289065, "grad_norm": 0.0010017225285992026, "learning_rate": 7.361354476337679e-07, "loss": 0.0055, "step": 415690 }, { "epoch": 4.441476574603344, "grad_norm": 0.4320935010910034, "learning_rate": 7.361206383250356e-07, "loss": 0.0037, "step": 415700 }, { "epoch": 4.441583417917624, "grad_norm": 0.12839774787425995, "learning_rate": 7.361058287497053e-07, "loss": 0.0061, "step": 415710 }, { "epoch": 4.441690261231903, "grad_norm": 0.42806464433670044, "learning_rate": 7.360910189077935e-07, "loss": 0.0043, "step": 415720 }, { "epoch": 4.441797104546183, "grad_norm": 0.002084271749481559, "learning_rate": 7.360762087993173e-07, "loss": 0.0034, "step": 415730 }, { "epoch": 4.441903947860463, "grad_norm": 1.0801823139190674, "learning_rate": 7.360613984242933e-07, "loss": 0.0005, "step": 415740 }, { "epoch": 4.442010791174742, "grad_norm": 0.0007793719996698201, "learning_rate": 7.36046587782738e-07, "loss": 0.0053, "step": 415750 }, { "epoch": 4.442117634489022, "grad_norm": 0.015439718030393124, "learning_rate": 7.360317768746683e-07, "loss": 0.02, "step": 415760 }, { "epoch": 4.442224477803301, "grad_norm": 0.8316969871520996, "learning_rate": 7.36016965700101e-07, "loss": 0.007, "step": 415770 }, { "epoch": 4.442331321117581, "grad_norm": 0.9502266049385071, "learning_rate": 7.360021542590526e-07, "loss": 0.0048, "step": 415780 }, { "epoch": 4.442438164431861, "grad_norm": 0.21001958847045898, "learning_rate": 7.359873425515402e-07, "loss": 0.0102, "step": 415790 }, { "epoch": 4.442545007746141, "grad_norm": 5.075577259063721, "learning_rate": 7.3597253057758e-07, "loss": 0.0023, "step": 415800 }, { "epoch": 4.44265185106042, "grad_norm": 0.016215885058045387, "learning_rate": 7.359577183371893e-07, "loss": 0.0056, "step": 415810 }, { "epoch": 4.4427586943746995, "grad_norm": 1.27353835105896, "learning_rate": 7.359429058303842e-07, "loss": 0.0007, "step": 415820 }, { "epoch": 4.442865537688979, "grad_norm": 0.43717139959335327, "learning_rate": 7.35928093057182e-07, "loss": 0.0101, "step": 415830 }, { "epoch": 4.442972381003258, "grad_norm": 0.0026940505485981703, "learning_rate": 7.35913280017599e-07, "loss": 0.0004, "step": 415840 }, { "epoch": 4.443079224317538, "grad_norm": 0.0007923258235678077, "learning_rate": 7.358984667116523e-07, "loss": 0.0182, "step": 415850 }, { "epoch": 4.443186067631818, "grad_norm": 0.003434031503275037, "learning_rate": 7.358836531393582e-07, "loss": 0.0108, "step": 415860 }, { "epoch": 4.443292910946098, "grad_norm": 3.0605196952819824, "learning_rate": 7.358688393007337e-07, "loss": 0.003, "step": 415870 }, { "epoch": 4.443399754260377, "grad_norm": 0.01038955245167017, "learning_rate": 7.358540251957957e-07, "loss": 0.006, "step": 415880 }, { "epoch": 4.443506597574657, "grad_norm": 0.0005036217044107616, "learning_rate": 7.358392108245605e-07, "loss": 0.006, "step": 415890 }, { "epoch": 4.443613440888936, "grad_norm": 0.021751806139945984, "learning_rate": 7.358243961870451e-07, "loss": 0.0027, "step": 415900 }, { "epoch": 4.443720284203216, "grad_norm": 0.0009613106958568096, "learning_rate": 7.358095812832662e-07, "loss": 0.0071, "step": 415910 }, { "epoch": 4.443827127517496, "grad_norm": 0.12052604556083679, "learning_rate": 7.357947661132403e-07, "loss": 0.0287, "step": 415920 }, { "epoch": 4.443933970831775, "grad_norm": 0.34992310404777527, "learning_rate": 7.357799506769845e-07, "loss": 0.0123, "step": 415930 }, { "epoch": 4.444040814146055, "grad_norm": 0.8189385533332825, "learning_rate": 7.357651349745153e-07, "loss": 0.0166, "step": 415940 }, { "epoch": 4.444147657460334, "grad_norm": 0.0015090295346453786, "learning_rate": 7.357503190058494e-07, "loss": 0.0108, "step": 415950 }, { "epoch": 4.444254500774614, "grad_norm": 0.0021250504069030285, "learning_rate": 7.357355027710037e-07, "loss": 0.0008, "step": 415960 }, { "epoch": 4.444361344088894, "grad_norm": 0.004397956654429436, "learning_rate": 7.357206862699948e-07, "loss": 0.0123, "step": 415970 }, { "epoch": 4.4444681874031735, "grad_norm": 0.4273824393749237, "learning_rate": 7.357058695028394e-07, "loss": 0.0014, "step": 415980 }, { "epoch": 4.444575030717453, "grad_norm": 0.0044632889330387115, "learning_rate": 7.356910524695544e-07, "loss": 0.0074, "step": 415990 }, { "epoch": 4.444681874031732, "grad_norm": 0.005238205194473267, "learning_rate": 7.356762351701562e-07, "loss": 0.0266, "step": 416000 }, { "epoch": 4.444788717346012, "grad_norm": 0.004625609610229731, "learning_rate": 7.356614176046619e-07, "loss": 0.0126, "step": 416010 }, { "epoch": 4.444895560660291, "grad_norm": 1.5077441930770874, "learning_rate": 7.356465997730881e-07, "loss": 0.0234, "step": 416020 }, { "epoch": 4.445002403974572, "grad_norm": 0.19335469603538513, "learning_rate": 7.356317816754514e-07, "loss": 0.0141, "step": 416030 }, { "epoch": 4.445109247288851, "grad_norm": 4.07640266418457, "learning_rate": 7.356169633117687e-07, "loss": 0.0425, "step": 416040 }, { "epoch": 4.445216090603131, "grad_norm": 0.03487178683280945, "learning_rate": 7.356021446820567e-07, "loss": 0.0085, "step": 416050 }, { "epoch": 4.44532293391741, "grad_norm": 0.004386446438729763, "learning_rate": 7.35587325786332e-07, "loss": 0.0085, "step": 416060 }, { "epoch": 4.4454297772316895, "grad_norm": 0.03216024115681648, "learning_rate": 7.355725066246114e-07, "loss": 0.0052, "step": 416070 }, { "epoch": 4.445536620545969, "grad_norm": 3.424192190170288, "learning_rate": 7.355576871969118e-07, "loss": 0.0113, "step": 416080 }, { "epoch": 4.445643463860249, "grad_norm": 0.015588697046041489, "learning_rate": 7.355428675032498e-07, "loss": 0.0033, "step": 416090 }, { "epoch": 4.445750307174529, "grad_norm": 0.6790387630462646, "learning_rate": 7.35528047543642e-07, "loss": 0.0075, "step": 416100 }, { "epoch": 4.445857150488808, "grad_norm": 0.019877653568983078, "learning_rate": 7.355132273181052e-07, "loss": 0.0003, "step": 416110 }, { "epoch": 4.445963993803088, "grad_norm": 0.002375938929617405, "learning_rate": 7.354984068266564e-07, "loss": 0.0217, "step": 416120 }, { "epoch": 4.446070837117367, "grad_norm": 14.565755844116211, "learning_rate": 7.354835860693119e-07, "loss": 0.0133, "step": 416130 }, { "epoch": 4.446177680431647, "grad_norm": 0.00902466755360365, "learning_rate": 7.354687650460889e-07, "loss": 0.0032, "step": 416140 }, { "epoch": 4.446284523745927, "grad_norm": 0.0016607979778200388, "learning_rate": 7.354539437570037e-07, "loss": 0.002, "step": 416150 }, { "epoch": 4.446391367060206, "grad_norm": 19.064369201660156, "learning_rate": 7.354391222020734e-07, "loss": 0.0114, "step": 416160 }, { "epoch": 4.446498210374486, "grad_norm": 0.4760589003562927, "learning_rate": 7.354243003813143e-07, "loss": 0.0061, "step": 416170 }, { "epoch": 4.446605053688765, "grad_norm": 0.5832316875457764, "learning_rate": 7.354094782947436e-07, "loss": 0.0133, "step": 416180 }, { "epoch": 4.446711897003045, "grad_norm": 0.25031188130378723, "learning_rate": 7.353946559423778e-07, "loss": 0.0031, "step": 416190 }, { "epoch": 4.446818740317324, "grad_norm": 3.1167359352111816, "learning_rate": 7.353798333242337e-07, "loss": 0.0015, "step": 416200 }, { "epoch": 4.446925583631605, "grad_norm": 1.9840426445007324, "learning_rate": 7.35365010440328e-07, "loss": 0.0057, "step": 416210 }, { "epoch": 4.447032426945884, "grad_norm": 0.15052203834056854, "learning_rate": 7.353501872906775e-07, "loss": 0.0261, "step": 416220 }, { "epoch": 4.4471392702601635, "grad_norm": 1.271410346031189, "learning_rate": 7.353353638752986e-07, "loss": 0.0048, "step": 416230 }, { "epoch": 4.447246113574443, "grad_norm": 0.2388157993555069, "learning_rate": 7.353205401942086e-07, "loss": 0.0025, "step": 416240 }, { "epoch": 4.4473529568887225, "grad_norm": 5.675680637359619, "learning_rate": 7.353057162474239e-07, "loss": 0.0288, "step": 416250 }, { "epoch": 4.447459800203002, "grad_norm": 0.041316356509923935, "learning_rate": 7.35290892034961e-07, "loss": 0.0041, "step": 416260 }, { "epoch": 4.447566643517282, "grad_norm": 3.518247365951538, "learning_rate": 7.352760675568372e-07, "loss": 0.0312, "step": 416270 }, { "epoch": 4.447673486831562, "grad_norm": 0.33859768509864807, "learning_rate": 7.352612428130691e-07, "loss": 0.0025, "step": 416280 }, { "epoch": 4.447780330145841, "grad_norm": 10.749897003173828, "learning_rate": 7.35246417803673e-07, "loss": 0.0065, "step": 416290 }, { "epoch": 4.447887173460121, "grad_norm": 17.959335327148438, "learning_rate": 7.352315925286661e-07, "loss": 0.0227, "step": 416300 }, { "epoch": 4.4479940167744, "grad_norm": 0.008460981771349907, "learning_rate": 7.35216766988065e-07, "loss": 0.008, "step": 416310 }, { "epoch": 4.4481008600886796, "grad_norm": 0.0035268166102468967, "learning_rate": 7.352019411818863e-07, "loss": 0.0078, "step": 416320 }, { "epoch": 4.44820770340296, "grad_norm": 0.018901782110333443, "learning_rate": 7.35187115110147e-07, "loss": 0.0175, "step": 416330 }, { "epoch": 4.448314546717239, "grad_norm": 0.1392507553100586, "learning_rate": 7.351722887728635e-07, "loss": 0.0011, "step": 416340 }, { "epoch": 4.448421390031519, "grad_norm": 0.01775314286351204, "learning_rate": 7.351574621700529e-07, "loss": 0.0036, "step": 416350 }, { "epoch": 4.448528233345798, "grad_norm": 0.004905062261968851, "learning_rate": 7.351426353017317e-07, "loss": 0.0043, "step": 416360 }, { "epoch": 4.448635076660078, "grad_norm": 0.1763692945241928, "learning_rate": 7.351278081679167e-07, "loss": 0.0037, "step": 416370 }, { "epoch": 4.448741919974357, "grad_norm": 0.006671616341918707, "learning_rate": 7.351129807686247e-07, "loss": 0.0017, "step": 416380 }, { "epoch": 4.4488487632886375, "grad_norm": 0.0014073257334530354, "learning_rate": 7.350981531038724e-07, "loss": 0.004, "step": 416390 }, { "epoch": 4.448955606602917, "grad_norm": 0.6950331330299377, "learning_rate": 7.350833251736765e-07, "loss": 0.0183, "step": 416400 }, { "epoch": 4.4490624499171965, "grad_norm": 0.10329141467809677, "learning_rate": 7.350684969780538e-07, "loss": 0.0077, "step": 416410 }, { "epoch": 4.449169293231476, "grad_norm": 0.004874940030276775, "learning_rate": 7.350536685170212e-07, "loss": 0.0014, "step": 416420 }, { "epoch": 4.449276136545755, "grad_norm": 0.007606907747685909, "learning_rate": 7.35038839790595e-07, "loss": 0.0249, "step": 416430 }, { "epoch": 4.449382979860035, "grad_norm": 0.0008554996456950903, "learning_rate": 7.350240107987924e-07, "loss": 0.0052, "step": 416440 }, { "epoch": 4.449489823174315, "grad_norm": 0.08103438466787338, "learning_rate": 7.350091815416298e-07, "loss": 0.0041, "step": 416450 }, { "epoch": 4.449596666488595, "grad_norm": 3.9507296085357666, "learning_rate": 7.349943520191242e-07, "loss": 0.0077, "step": 416460 }, { "epoch": 4.449703509802874, "grad_norm": 0.0015089362859725952, "learning_rate": 7.349795222312922e-07, "loss": 0.0029, "step": 416470 }, { "epoch": 4.449810353117154, "grad_norm": 1.9077417850494385, "learning_rate": 7.349646921781506e-07, "loss": 0.0433, "step": 416480 }, { "epoch": 4.449917196431433, "grad_norm": 0.003848060267046094, "learning_rate": 7.349498618597161e-07, "loss": 0.021, "step": 416490 }, { "epoch": 4.450024039745713, "grad_norm": 0.0007507120608352125, "learning_rate": 7.349350312760055e-07, "loss": 0.0086, "step": 416500 }, { "epoch": 4.450130883059993, "grad_norm": 0.0025039364118129015, "learning_rate": 7.349202004270356e-07, "loss": 0.0002, "step": 416510 }, { "epoch": 4.450237726374272, "grad_norm": 0.0014111140044406056, "learning_rate": 7.349053693128229e-07, "loss": 0.0028, "step": 416520 }, { "epoch": 4.450344569688552, "grad_norm": 0.8572109937667847, "learning_rate": 7.348905379333845e-07, "loss": 0.0039, "step": 416530 }, { "epoch": 4.450451413002831, "grad_norm": 0.0007109948201104999, "learning_rate": 7.348757062887368e-07, "loss": 0.0018, "step": 416540 }, { "epoch": 4.450558256317111, "grad_norm": 0.10912391543388367, "learning_rate": 7.348608743788967e-07, "loss": 0.0014, "step": 416550 }, { "epoch": 4.45066509963139, "grad_norm": 4.661530494689941, "learning_rate": 7.34846042203881e-07, "loss": 0.01, "step": 416560 }, { "epoch": 4.4507719429456705, "grad_norm": 0.1963048279285431, "learning_rate": 7.348312097637064e-07, "loss": 0.0007, "step": 416570 }, { "epoch": 4.45087878625995, "grad_norm": 0.0005038838717155159, "learning_rate": 7.348163770583896e-07, "loss": 0.0006, "step": 416580 }, { "epoch": 4.450985629574229, "grad_norm": 1.4390323162078857, "learning_rate": 7.348015440879475e-07, "loss": 0.0015, "step": 416590 }, { "epoch": 4.451092472888509, "grad_norm": 0.37194347381591797, "learning_rate": 7.347867108523966e-07, "loss": 0.0009, "step": 416600 }, { "epoch": 4.451199316202788, "grad_norm": 0.0036335596814751625, "learning_rate": 7.347718773517539e-07, "loss": 0.0013, "step": 416610 }, { "epoch": 4.451306159517069, "grad_norm": 0.03845378756523132, "learning_rate": 7.34757043586036e-07, "loss": 0.0051, "step": 416620 }, { "epoch": 4.451413002831348, "grad_norm": 0.003109362442046404, "learning_rate": 7.347422095552596e-07, "loss": 0.0176, "step": 416630 }, { "epoch": 4.451519846145628, "grad_norm": 0.009319656528532505, "learning_rate": 7.347273752594417e-07, "loss": 0.0093, "step": 416640 }, { "epoch": 4.451626689459907, "grad_norm": 0.8303625583648682, "learning_rate": 7.347125406985987e-07, "loss": 0.0123, "step": 416650 }, { "epoch": 4.4517335327741865, "grad_norm": 1.6008732318878174, "learning_rate": 7.346977058727475e-07, "loss": 0.01, "step": 416660 }, { "epoch": 4.451840376088466, "grad_norm": 1.0434949398040771, "learning_rate": 7.346828707819051e-07, "loss": 0.0052, "step": 416670 }, { "epoch": 4.451947219402745, "grad_norm": 7.464119911193848, "learning_rate": 7.346680354260878e-07, "loss": 0.0101, "step": 416680 }, { "epoch": 4.452054062717026, "grad_norm": 3.5666003227233887, "learning_rate": 7.346531998053126e-07, "loss": 0.0048, "step": 416690 }, { "epoch": 4.452160906031305, "grad_norm": 2.264573574066162, "learning_rate": 7.346383639195963e-07, "loss": 0.0094, "step": 416700 }, { "epoch": 4.452267749345585, "grad_norm": 0.11702447384595871, "learning_rate": 7.346235277689556e-07, "loss": 0.0081, "step": 416710 }, { "epoch": 4.452374592659864, "grad_norm": 0.0006914929253980517, "learning_rate": 7.346086913534072e-07, "loss": 0.0096, "step": 416720 }, { "epoch": 4.452481435974144, "grad_norm": 0.07159476727247238, "learning_rate": 7.34593854672968e-07, "loss": 0.0007, "step": 416730 }, { "epoch": 4.452588279288424, "grad_norm": 0.05626223608851433, "learning_rate": 7.345790177276544e-07, "loss": 0.0175, "step": 416740 }, { "epoch": 4.452695122602703, "grad_norm": 1.1516530513763428, "learning_rate": 7.345641805174835e-07, "loss": 0.0013, "step": 416750 }, { "epoch": 4.452801965916983, "grad_norm": 0.6448490619659424, "learning_rate": 7.345493430424719e-07, "loss": 0.0023, "step": 416760 }, { "epoch": 4.452908809231262, "grad_norm": 0.035723455250263214, "learning_rate": 7.345345053026364e-07, "loss": 0.0133, "step": 416770 }, { "epoch": 4.453015652545542, "grad_norm": 0.00014985482266638428, "learning_rate": 7.345196672979937e-07, "loss": 0.0116, "step": 416780 }, { "epoch": 4.453122495859821, "grad_norm": 0.000634186202660203, "learning_rate": 7.345048290285607e-07, "loss": 0.0088, "step": 416790 }, { "epoch": 4.453229339174102, "grad_norm": 0.07436591386795044, "learning_rate": 7.34489990494354e-07, "loss": 0.0026, "step": 416800 }, { "epoch": 4.453336182488381, "grad_norm": 0.15351921319961548, "learning_rate": 7.344751516953904e-07, "loss": 0.0003, "step": 416810 }, { "epoch": 4.4534430258026605, "grad_norm": 1.5040453672409058, "learning_rate": 7.344603126316867e-07, "loss": 0.0117, "step": 416820 }, { "epoch": 4.45354986911694, "grad_norm": 10.043211936950684, "learning_rate": 7.344454733032596e-07, "loss": 0.0036, "step": 416830 }, { "epoch": 4.453656712431219, "grad_norm": 0.1734461933374405, "learning_rate": 7.344306337101258e-07, "loss": 0.0095, "step": 416840 }, { "epoch": 4.453763555745499, "grad_norm": 0.022409649565815926, "learning_rate": 7.344157938523023e-07, "loss": 0.001, "step": 416850 }, { "epoch": 4.453870399059779, "grad_norm": 6.32753324508667, "learning_rate": 7.344009537298055e-07, "loss": 0.0168, "step": 416860 }, { "epoch": 4.453977242374059, "grad_norm": 0.3879682719707489, "learning_rate": 7.343861133426523e-07, "loss": 0.0102, "step": 416870 }, { "epoch": 4.454084085688338, "grad_norm": 0.09263543039560318, "learning_rate": 7.343712726908596e-07, "loss": 0.0013, "step": 416880 }, { "epoch": 4.454190929002618, "grad_norm": 12.766898155212402, "learning_rate": 7.34356431774444e-07, "loss": 0.0071, "step": 416890 }, { "epoch": 4.454297772316897, "grad_norm": 0.08282031118869781, "learning_rate": 7.343415905934223e-07, "loss": 0.0011, "step": 416900 }, { "epoch": 4.4544046156311765, "grad_norm": 0.04961881414055824, "learning_rate": 7.343267491478113e-07, "loss": 0.0233, "step": 416910 }, { "epoch": 4.454511458945457, "grad_norm": 0.26240864396095276, "learning_rate": 7.343119074376276e-07, "loss": 0.0039, "step": 416920 }, { "epoch": 4.454618302259736, "grad_norm": 0.009946685284376144, "learning_rate": 7.342970654628883e-07, "loss": 0.0085, "step": 416930 }, { "epoch": 4.454725145574016, "grad_norm": 0.21656428277492523, "learning_rate": 7.342822232236098e-07, "loss": 0.0043, "step": 416940 }, { "epoch": 4.454831988888295, "grad_norm": 0.014980889856815338, "learning_rate": 7.342673807198089e-07, "loss": 0.0125, "step": 416950 }, { "epoch": 4.454938832202575, "grad_norm": 5.642757415771484, "learning_rate": 7.342525379515026e-07, "loss": 0.0155, "step": 416960 }, { "epoch": 4.455045675516854, "grad_norm": 1.1806751489639282, "learning_rate": 7.342376949187072e-07, "loss": 0.0139, "step": 416970 }, { "epoch": 4.4551525188311345, "grad_norm": 0.020836662501096725, "learning_rate": 7.342228516214401e-07, "loss": 0.0416, "step": 416980 }, { "epoch": 4.455259362145414, "grad_norm": 0.21904057264328003, "learning_rate": 7.342080080597178e-07, "loss": 0.0078, "step": 416990 }, { "epoch": 4.455366205459693, "grad_norm": 0.0026868872810155153, "learning_rate": 7.341931642335566e-07, "loss": 0.0012, "step": 417000 }, { "epoch": 4.455473048773973, "grad_norm": 0.009182671085000038, "learning_rate": 7.341783201429739e-07, "loss": 0.0612, "step": 417010 }, { "epoch": 4.455579892088252, "grad_norm": 0.008272739127278328, "learning_rate": 7.341634757879862e-07, "loss": 0.0048, "step": 417020 }, { "epoch": 4.455686735402532, "grad_norm": 0.008312666788697243, "learning_rate": 7.341486311686101e-07, "loss": 0.0049, "step": 417030 }, { "epoch": 4.455793578716812, "grad_norm": 0.3419964015483856, "learning_rate": 7.341337862848626e-07, "loss": 0.0108, "step": 417040 }, { "epoch": 4.455900422031092, "grad_norm": 2.0804941654205322, "learning_rate": 7.341189411367605e-07, "loss": 0.0118, "step": 417050 }, { "epoch": 4.456007265345371, "grad_norm": 1.3029683828353882, "learning_rate": 7.341040957243203e-07, "loss": 0.0028, "step": 417060 }, { "epoch": 4.4561141086596505, "grad_norm": 0.0008915724465623498, "learning_rate": 7.34089250047559e-07, "loss": 0.0108, "step": 417070 }, { "epoch": 4.45622095197393, "grad_norm": 1.165643334388733, "learning_rate": 7.340744041064931e-07, "loss": 0.0172, "step": 417080 }, { "epoch": 4.456327795288209, "grad_norm": 0.7892576456069946, "learning_rate": 7.340595579011397e-07, "loss": 0.0185, "step": 417090 }, { "epoch": 4.45643463860249, "grad_norm": 0.09633152931928635, "learning_rate": 7.340447114315154e-07, "loss": 0.008, "step": 417100 }, { "epoch": 4.456541481916769, "grad_norm": 0.6389933824539185, "learning_rate": 7.340298646976369e-07, "loss": 0.0261, "step": 417110 }, { "epoch": 4.456648325231049, "grad_norm": 8.53255844116211, "learning_rate": 7.34015017699521e-07, "loss": 0.0224, "step": 417120 }, { "epoch": 4.456755168545328, "grad_norm": 0.0034686538856476545, "learning_rate": 7.340001704371846e-07, "loss": 0.0009, "step": 417130 }, { "epoch": 4.456862011859608, "grad_norm": 0.00513059226796031, "learning_rate": 7.339853229106442e-07, "loss": 0.0006, "step": 417140 }, { "epoch": 4.456968855173887, "grad_norm": 0.015793994069099426, "learning_rate": 7.339704751199167e-07, "loss": 0.0107, "step": 417150 }, { "epoch": 4.457075698488167, "grad_norm": 0.0051470547914505005, "learning_rate": 7.33955627065019e-07, "loss": 0.0011, "step": 417160 }, { "epoch": 4.457182541802447, "grad_norm": 1.0101948976516724, "learning_rate": 7.339407787459676e-07, "loss": 0.0036, "step": 417170 }, { "epoch": 4.457289385116726, "grad_norm": 0.28481370210647583, "learning_rate": 7.339259301627796e-07, "loss": 0.0006, "step": 417180 }, { "epoch": 4.457396228431006, "grad_norm": 2.5561413764953613, "learning_rate": 7.339110813154714e-07, "loss": 0.0077, "step": 417190 }, { "epoch": 4.457503071745285, "grad_norm": 0.9788088202476501, "learning_rate": 7.3389623220406e-07, "loss": 0.0042, "step": 417200 }, { "epoch": 4.457609915059566, "grad_norm": 0.004563193768262863, "learning_rate": 7.338813828285621e-07, "loss": 0.0061, "step": 417210 }, { "epoch": 4.457716758373845, "grad_norm": 0.0034834234975278378, "learning_rate": 7.338665331889945e-07, "loss": 0.0004, "step": 417220 }, { "epoch": 4.4578236016881245, "grad_norm": 0.028492754325270653, "learning_rate": 7.338516832853738e-07, "loss": 0.0039, "step": 417230 }, { "epoch": 4.457930445002404, "grad_norm": 20.53641700744629, "learning_rate": 7.338368331177172e-07, "loss": 0.0218, "step": 417240 }, { "epoch": 4.4580372883166834, "grad_norm": 0.07787440717220306, "learning_rate": 7.338219826860409e-07, "loss": 0.016, "step": 417250 }, { "epoch": 4.458144131630963, "grad_norm": 2.8322067260742188, "learning_rate": 7.338071319903619e-07, "loss": 0.01, "step": 417260 }, { "epoch": 4.458250974945242, "grad_norm": 0.8534947037696838, "learning_rate": 7.337922810306971e-07, "loss": 0.0106, "step": 417270 }, { "epoch": 4.458357818259523, "grad_norm": 6.970445156097412, "learning_rate": 7.337774298070633e-07, "loss": 0.0127, "step": 417280 }, { "epoch": 4.458464661573802, "grad_norm": 0.12757901847362518, "learning_rate": 7.33762578319477e-07, "loss": 0.0007, "step": 417290 }, { "epoch": 4.458571504888082, "grad_norm": 0.6587270498275757, "learning_rate": 7.33747726567955e-07, "loss": 0.0079, "step": 417300 }, { "epoch": 4.458678348202361, "grad_norm": 9.007445335388184, "learning_rate": 7.337328745525144e-07, "loss": 0.0228, "step": 417310 }, { "epoch": 4.4587851915166405, "grad_norm": 4.758812427520752, "learning_rate": 7.337180222731716e-07, "loss": 0.0312, "step": 417320 }, { "epoch": 4.458892034830921, "grad_norm": 0.0026385514065623283, "learning_rate": 7.337031697299436e-07, "loss": 0.001, "step": 417330 }, { "epoch": 4.4589988781452, "grad_norm": 1.5250096321105957, "learning_rate": 7.33688316922847e-07, "loss": 0.0237, "step": 417340 }, { "epoch": 4.45910572145948, "grad_norm": 0.022325988858938217, "learning_rate": 7.336734638518987e-07, "loss": 0.007, "step": 417350 }, { "epoch": 4.459212564773759, "grad_norm": 0.021370092406868935, "learning_rate": 7.336586105171154e-07, "loss": 0.0139, "step": 417360 }, { "epoch": 4.459319408088039, "grad_norm": 5.952831268310547, "learning_rate": 7.336437569185139e-07, "loss": 0.0171, "step": 417370 }, { "epoch": 4.459426251402318, "grad_norm": 3.46748423576355, "learning_rate": 7.336289030561109e-07, "loss": 0.006, "step": 417380 }, { "epoch": 4.459533094716598, "grad_norm": 0.040620867162942886, "learning_rate": 7.336140489299233e-07, "loss": 0.0052, "step": 417390 }, { "epoch": 4.459639938030878, "grad_norm": 0.021443605422973633, "learning_rate": 7.335991945399677e-07, "loss": 0.0015, "step": 417400 }, { "epoch": 4.4597467813451575, "grad_norm": 0.018527578562498093, "learning_rate": 7.335843398862612e-07, "loss": 0.0037, "step": 417410 }, { "epoch": 4.459853624659437, "grad_norm": 0.026662219315767288, "learning_rate": 7.335694849688202e-07, "loss": 0.0169, "step": 417420 }, { "epoch": 4.459960467973716, "grad_norm": 0.04231022670865059, "learning_rate": 7.335546297876616e-07, "loss": 0.0026, "step": 417430 }, { "epoch": 4.460067311287996, "grad_norm": 0.774157702922821, "learning_rate": 7.335397743428023e-07, "loss": 0.0037, "step": 417440 }, { "epoch": 4.460174154602276, "grad_norm": 0.8362148404121399, "learning_rate": 7.335249186342589e-07, "loss": 0.0021, "step": 417450 }, { "epoch": 4.460280997916556, "grad_norm": 1.3545620441436768, "learning_rate": 7.335100626620483e-07, "loss": 0.0054, "step": 417460 }, { "epoch": 4.460387841230835, "grad_norm": 1.7091314792633057, "learning_rate": 7.334952064261872e-07, "loss": 0.0076, "step": 417470 }, { "epoch": 4.4604946845451146, "grad_norm": 0.0013760350411757827, "learning_rate": 7.334803499266924e-07, "loss": 0.0011, "step": 417480 }, { "epoch": 4.460601527859394, "grad_norm": 1.9900490045547485, "learning_rate": 7.334654931635805e-07, "loss": 0.0036, "step": 417490 }, { "epoch": 4.4607083711736735, "grad_norm": 0.0274924598634243, "learning_rate": 7.334506361368687e-07, "loss": 0.009, "step": 417500 }, { "epoch": 4.460815214487954, "grad_norm": 7.124987602233887, "learning_rate": 7.334357788465733e-07, "loss": 0.0163, "step": 417510 }, { "epoch": 4.460922057802233, "grad_norm": 0.06944317370653152, "learning_rate": 7.334209212927114e-07, "loss": 0.0007, "step": 417520 }, { "epoch": 4.461028901116513, "grad_norm": 13.405487060546875, "learning_rate": 7.334060634752996e-07, "loss": 0.0202, "step": 417530 }, { "epoch": 4.461135744430792, "grad_norm": 0.05139963701367378, "learning_rate": 7.333912053943547e-07, "loss": 0.0022, "step": 417540 }, { "epoch": 4.461242587745072, "grad_norm": 0.3343740999698639, "learning_rate": 7.333763470498937e-07, "loss": 0.0038, "step": 417550 }, { "epoch": 4.461349431059351, "grad_norm": 5.013935089111328, "learning_rate": 7.333614884419331e-07, "loss": 0.009, "step": 417560 }, { "epoch": 4.4614562743736315, "grad_norm": 0.5919363498687744, "learning_rate": 7.333466295704898e-07, "loss": 0.0331, "step": 417570 }, { "epoch": 4.461563117687911, "grad_norm": 0.006751369684934616, "learning_rate": 7.333317704355804e-07, "loss": 0.0004, "step": 417580 }, { "epoch": 4.46166996100219, "grad_norm": 0.2752286195755005, "learning_rate": 7.333169110372219e-07, "loss": 0.0079, "step": 417590 }, { "epoch": 4.46177680431647, "grad_norm": 1.9750081300735474, "learning_rate": 7.33302051375431e-07, "loss": 0.0041, "step": 417600 }, { "epoch": 4.461883647630749, "grad_norm": 3.756343126296997, "learning_rate": 7.332871914502245e-07, "loss": 0.0078, "step": 417610 }, { "epoch": 4.461990490945029, "grad_norm": 1.1429576873779297, "learning_rate": 7.332723312616193e-07, "loss": 0.0195, "step": 417620 }, { "epoch": 4.462097334259309, "grad_norm": 2.93528413772583, "learning_rate": 7.332574708096317e-07, "loss": 0.012, "step": 417630 }, { "epoch": 4.462204177573589, "grad_norm": 0.014925863593816757, "learning_rate": 7.33242610094279e-07, "loss": 0.0083, "step": 417640 }, { "epoch": 4.462311020887868, "grad_norm": 0.006624027621001005, "learning_rate": 7.332277491155779e-07, "loss": 0.0464, "step": 417650 }, { "epoch": 4.4624178642021475, "grad_norm": 0.3422016203403473, "learning_rate": 7.332128878735448e-07, "loss": 0.0101, "step": 417660 }, { "epoch": 4.462524707516427, "grad_norm": 0.05893462523818016, "learning_rate": 7.331980263681969e-07, "loss": 0.0014, "step": 417670 }, { "epoch": 4.462631550830706, "grad_norm": 3.6376171112060547, "learning_rate": 7.331831645995507e-07, "loss": 0.004, "step": 417680 }, { "epoch": 4.462738394144987, "grad_norm": 0.08176503330469131, "learning_rate": 7.331683025676234e-07, "loss": 0.0281, "step": 417690 }, { "epoch": 4.462845237459266, "grad_norm": 0.8999119997024536, "learning_rate": 7.331534402724312e-07, "loss": 0.0054, "step": 417700 }, { "epoch": 4.462952080773546, "grad_norm": 0.18949933350086212, "learning_rate": 7.331385777139914e-07, "loss": 0.0002, "step": 417710 }, { "epoch": 4.463058924087825, "grad_norm": 0.1365983635187149, "learning_rate": 7.331237148923203e-07, "loss": 0.006, "step": 417720 }, { "epoch": 4.463165767402105, "grad_norm": 4.538715362548828, "learning_rate": 7.33108851807435e-07, "loss": 0.0068, "step": 417730 }, { "epoch": 4.463272610716384, "grad_norm": 0.371720552444458, "learning_rate": 7.330939884593524e-07, "loss": 0.0035, "step": 417740 }, { "epoch": 4.463379454030664, "grad_norm": 0.080657958984375, "learning_rate": 7.33079124848089e-07, "loss": 0.0067, "step": 417750 }, { "epoch": 4.463486297344944, "grad_norm": 0.017854725942015648, "learning_rate": 7.330642609736616e-07, "loss": 0.0073, "step": 417760 }, { "epoch": 4.463593140659223, "grad_norm": 0.009864851832389832, "learning_rate": 7.33049396836087e-07, "loss": 0.0069, "step": 417770 }, { "epoch": 4.463699983973503, "grad_norm": 0.4023866653442383, "learning_rate": 7.330345324353822e-07, "loss": 0.0011, "step": 417780 }, { "epoch": 4.463806827287782, "grad_norm": 2.4959325790405273, "learning_rate": 7.330196677715639e-07, "loss": 0.0058, "step": 417790 }, { "epoch": 4.463913670602062, "grad_norm": 0.0021020444110035896, "learning_rate": 7.330048028446485e-07, "loss": 0.0037, "step": 417800 }, { "epoch": 4.464020513916342, "grad_norm": 0.8555750250816345, "learning_rate": 7.329899376546534e-07, "loss": 0.0049, "step": 417810 }, { "epoch": 4.4641273572306215, "grad_norm": 0.014878612011671066, "learning_rate": 7.329750722015949e-07, "loss": 0.0172, "step": 417820 }, { "epoch": 4.464234200544901, "grad_norm": 1.0974236726760864, "learning_rate": 7.329602064854899e-07, "loss": 0.0019, "step": 417830 }, { "epoch": 4.46434104385918, "grad_norm": 0.0036588404327630997, "learning_rate": 7.329453405063555e-07, "loss": 0.0173, "step": 417840 }, { "epoch": 4.46444788717346, "grad_norm": 0.16823381185531616, "learning_rate": 7.329304742642081e-07, "loss": 0.0019, "step": 417850 }, { "epoch": 4.464554730487739, "grad_norm": 1.1771092414855957, "learning_rate": 7.329156077590646e-07, "loss": 0.0055, "step": 417860 }, { "epoch": 4.46466157380202, "grad_norm": 0.16383369266986847, "learning_rate": 7.329007409909418e-07, "loss": 0.0006, "step": 417870 }, { "epoch": 4.464768417116299, "grad_norm": 0.016265787184238434, "learning_rate": 7.328858739598565e-07, "loss": 0.0002, "step": 417880 }, { "epoch": 4.464875260430579, "grad_norm": 3.0824520587921143, "learning_rate": 7.328710066658254e-07, "loss": 0.0141, "step": 417890 }, { "epoch": 4.464982103744858, "grad_norm": 0.9878219962120056, "learning_rate": 7.328561391088655e-07, "loss": 0.005, "step": 417900 }, { "epoch": 4.4650889470591375, "grad_norm": 0.046044912189245224, "learning_rate": 7.328412712889933e-07, "loss": 0.0028, "step": 417910 }, { "epoch": 4.465195790373418, "grad_norm": 12.151627540588379, "learning_rate": 7.328264032062257e-07, "loss": 0.0675, "step": 417920 }, { "epoch": 4.465302633687697, "grad_norm": 0.16448180377483368, "learning_rate": 7.328115348605797e-07, "loss": 0.0049, "step": 417930 }, { "epoch": 4.465409477001977, "grad_norm": 0.01292192842811346, "learning_rate": 7.327966662520717e-07, "loss": 0.0034, "step": 417940 }, { "epoch": 4.465516320316256, "grad_norm": 9.95187759399414, "learning_rate": 7.327817973807189e-07, "loss": 0.0301, "step": 417950 }, { "epoch": 4.465623163630536, "grad_norm": 0.04181702062487602, "learning_rate": 7.327669282465378e-07, "loss": 0.0011, "step": 417960 }, { "epoch": 4.465730006944815, "grad_norm": 1.807308316230774, "learning_rate": 7.327520588495452e-07, "loss": 0.0018, "step": 417970 }, { "epoch": 4.465836850259095, "grad_norm": 0.023661525920033455, "learning_rate": 7.32737189189758e-07, "loss": 0.0499, "step": 417980 }, { "epoch": 4.465943693573375, "grad_norm": 0.05207070708274841, "learning_rate": 7.327223192671931e-07, "loss": 0.0016, "step": 417990 }, { "epoch": 4.466050536887654, "grad_norm": 0.002435285598039627, "learning_rate": 7.327074490818669e-07, "loss": 0.005, "step": 418000 }, { "epoch": 4.466157380201934, "grad_norm": 0.004341088235378265, "learning_rate": 7.326925786337965e-07, "loss": 0.0238, "step": 418010 }, { "epoch": 4.466264223516213, "grad_norm": 6.289331912994385, "learning_rate": 7.326777079229987e-07, "loss": 0.0105, "step": 418020 }, { "epoch": 4.466371066830493, "grad_norm": 5.69936466217041, "learning_rate": 7.326628369494901e-07, "loss": 0.0073, "step": 418030 }, { "epoch": 4.466477910144773, "grad_norm": 2.255274534225464, "learning_rate": 7.326479657132876e-07, "loss": 0.0253, "step": 418040 }, { "epoch": 4.466584753459053, "grad_norm": 0.00693963747471571, "learning_rate": 7.326330942144081e-07, "loss": 0.0004, "step": 418050 }, { "epoch": 4.466691596773332, "grad_norm": 0.09094248712062836, "learning_rate": 7.326182224528681e-07, "loss": 0.0133, "step": 418060 }, { "epoch": 4.4667984400876115, "grad_norm": 0.047720614820718765, "learning_rate": 7.326033504286848e-07, "loss": 0.0018, "step": 418070 }, { "epoch": 4.466905283401891, "grad_norm": 5.152246952056885, "learning_rate": 7.325884781418746e-07, "loss": 0.0072, "step": 418080 }, { "epoch": 4.46701212671617, "grad_norm": 10.399219512939453, "learning_rate": 7.325736055924545e-07, "loss": 0.0281, "step": 418090 }, { "epoch": 4.46711897003045, "grad_norm": 4.320017337799072, "learning_rate": 7.325587327804412e-07, "loss": 0.0445, "step": 418100 }, { "epoch": 4.46722581334473, "grad_norm": 0.7321410775184631, "learning_rate": 7.325438597058518e-07, "loss": 0.0786, "step": 418110 }, { "epoch": 4.46733265665901, "grad_norm": 0.00415825517848134, "learning_rate": 7.325289863687025e-07, "loss": 0.0256, "step": 418120 }, { "epoch": 4.467439499973289, "grad_norm": 0.015675412490963936, "learning_rate": 7.325141127690105e-07, "loss": 0.0098, "step": 418130 }, { "epoch": 4.467546343287569, "grad_norm": 0.07830873876810074, "learning_rate": 7.324992389067926e-07, "loss": 0.0233, "step": 418140 }, { "epoch": 4.467653186601848, "grad_norm": 0.06976766884326935, "learning_rate": 7.324843647820654e-07, "loss": 0.0008, "step": 418150 }, { "epoch": 4.467760029916128, "grad_norm": 15.216104507446289, "learning_rate": 7.324694903948459e-07, "loss": 0.0164, "step": 418160 }, { "epoch": 4.467866873230408, "grad_norm": 0.004338308237493038, "learning_rate": 7.324546157451507e-07, "loss": 0.0053, "step": 418170 }, { "epoch": 4.467973716544687, "grad_norm": 0.13004741072654724, "learning_rate": 7.324397408329969e-07, "loss": 0.0038, "step": 418180 }, { "epoch": 4.468080559858967, "grad_norm": 0.3897683024406433, "learning_rate": 7.32424865658401e-07, "loss": 0.0058, "step": 418190 }, { "epoch": 4.468187403173246, "grad_norm": 0.20367875695228577, "learning_rate": 7.324099902213798e-07, "loss": 0.004, "step": 418200 }, { "epoch": 4.468294246487526, "grad_norm": 0.0021536380518227816, "learning_rate": 7.323951145219502e-07, "loss": 0.0093, "step": 418210 }, { "epoch": 4.468401089801806, "grad_norm": 0.00332259782589972, "learning_rate": 7.323802385601291e-07, "loss": 0.0002, "step": 418220 }, { "epoch": 4.4685079331160855, "grad_norm": 4.808382511138916, "learning_rate": 7.32365362335933e-07, "loss": 0.0081, "step": 418230 }, { "epoch": 4.468614776430365, "grad_norm": 0.01592993550002575, "learning_rate": 7.323504858493789e-07, "loss": 0.0038, "step": 418240 }, { "epoch": 4.468721619744644, "grad_norm": 4.372958183288574, "learning_rate": 7.323356091004838e-07, "loss": 0.004, "step": 418250 }, { "epoch": 4.468828463058924, "grad_norm": 1.6469045877456665, "learning_rate": 7.323207320892639e-07, "loss": 0.0094, "step": 418260 }, { "epoch": 4.468935306373203, "grad_norm": 0.017149658873677254, "learning_rate": 7.323058548157366e-07, "loss": 0.0112, "step": 418270 }, { "epoch": 4.469042149687484, "grad_norm": 0.007265470456331968, "learning_rate": 7.322909772799184e-07, "loss": 0.0007, "step": 418280 }, { "epoch": 4.469148993001763, "grad_norm": 0.0013884989311918616, "learning_rate": 7.322760994818261e-07, "loss": 0.0179, "step": 418290 }, { "epoch": 4.469255836316043, "grad_norm": 0.022457195445895195, "learning_rate": 7.322612214214766e-07, "loss": 0.0015, "step": 418300 }, { "epoch": 4.469362679630322, "grad_norm": 0.9513269066810608, "learning_rate": 7.322463430988867e-07, "loss": 0.0079, "step": 418310 }, { "epoch": 4.4694695229446015, "grad_norm": 0.00872908253222704, "learning_rate": 7.32231464514073e-07, "loss": 0.0014, "step": 418320 }, { "epoch": 4.469576366258881, "grad_norm": 0.01814277656376362, "learning_rate": 7.322165856670526e-07, "loss": 0.0085, "step": 418330 }, { "epoch": 4.469683209573161, "grad_norm": 0.008927926421165466, "learning_rate": 7.32201706557842e-07, "loss": 0.0019, "step": 418340 }, { "epoch": 4.469790052887441, "grad_norm": 0.20545469224452972, "learning_rate": 7.321868271864583e-07, "loss": 0.0013, "step": 418350 }, { "epoch": 4.46989689620172, "grad_norm": 0.12678036093711853, "learning_rate": 7.321719475529181e-07, "loss": 0.0071, "step": 418360 }, { "epoch": 4.470003739516, "grad_norm": 0.002867314498871565, "learning_rate": 7.321570676572381e-07, "loss": 0.0015, "step": 418370 }, { "epoch": 4.470110582830279, "grad_norm": 0.050769269466400146, "learning_rate": 7.321421874994356e-07, "loss": 0.019, "step": 418380 }, { "epoch": 4.470217426144559, "grad_norm": 0.026758387684822083, "learning_rate": 7.321273070795269e-07, "loss": 0.0013, "step": 418390 }, { "epoch": 4.470324269458839, "grad_norm": 0.27643027901649475, "learning_rate": 7.321124263975287e-07, "loss": 0.0097, "step": 418400 }, { "epoch": 4.470431112773118, "grad_norm": 0.008747927844524384, "learning_rate": 7.320975454534584e-07, "loss": 0.0025, "step": 418410 }, { "epoch": 4.470537956087398, "grad_norm": 0.04112698510289192, "learning_rate": 7.320826642473322e-07, "loss": 0.0126, "step": 418420 }, { "epoch": 4.470644799401677, "grad_norm": 11.479175567626953, "learning_rate": 7.320677827791673e-07, "loss": 0.0052, "step": 418430 }, { "epoch": 4.470751642715957, "grad_norm": 0.002280145650729537, "learning_rate": 7.320529010489802e-07, "loss": 0.0119, "step": 418440 }, { "epoch": 4.470858486030236, "grad_norm": 0.90877765417099, "learning_rate": 7.32038019056788e-07, "loss": 0.0342, "step": 418450 }, { "epoch": 4.470965329344517, "grad_norm": 0.0006330640753731132, "learning_rate": 7.320231368026073e-07, "loss": 0.0021, "step": 418460 }, { "epoch": 4.471072172658796, "grad_norm": 0.678386390209198, "learning_rate": 7.32008254286455e-07, "loss": 0.0009, "step": 418470 }, { "epoch": 4.4711790159730755, "grad_norm": 2.5579023361206055, "learning_rate": 7.319933715083479e-07, "loss": 0.021, "step": 418480 }, { "epoch": 4.471285859287355, "grad_norm": 0.24019818007946014, "learning_rate": 7.319784884683027e-07, "loss": 0.0013, "step": 418490 }, { "epoch": 4.4713927026016345, "grad_norm": 0.004034464713186026, "learning_rate": 7.319636051663365e-07, "loss": 0.014, "step": 418500 }, { "epoch": 4.471499545915914, "grad_norm": 0.05516150966286659, "learning_rate": 7.319487216024656e-07, "loss": 0.0172, "step": 418510 }, { "epoch": 4.471606389230194, "grad_norm": 2.2905046939849854, "learning_rate": 7.319338377767073e-07, "loss": 0.0188, "step": 418520 }, { "epoch": 4.471713232544474, "grad_norm": 0.760479748249054, "learning_rate": 7.319189536890781e-07, "loss": 0.0241, "step": 418530 }, { "epoch": 4.471820075858753, "grad_norm": 0.0015091188251972198, "learning_rate": 7.319040693395949e-07, "loss": 0.0167, "step": 418540 }, { "epoch": 4.471926919173033, "grad_norm": 0.0027723817620426416, "learning_rate": 7.318891847282745e-07, "loss": 0.0024, "step": 418550 }, { "epoch": 4.472033762487312, "grad_norm": 0.0384223610162735, "learning_rate": 7.318742998551337e-07, "loss": 0.0039, "step": 418560 }, { "epoch": 4.472140605801592, "grad_norm": 0.01811845414340496, "learning_rate": 7.318594147201892e-07, "loss": 0.0162, "step": 418570 }, { "epoch": 4.472247449115872, "grad_norm": 0.0026008570566773415, "learning_rate": 7.31844529323458e-07, "loss": 0.0082, "step": 418580 }, { "epoch": 4.472354292430151, "grad_norm": 0.024690140038728714, "learning_rate": 7.31829643664957e-07, "loss": 0.0037, "step": 418590 }, { "epoch": 4.472461135744431, "grad_norm": 0.010183234699070454, "learning_rate": 7.318147577447026e-07, "loss": 0.0155, "step": 418600 }, { "epoch": 4.47256797905871, "grad_norm": 0.0019717293325811625, "learning_rate": 7.317998715627119e-07, "loss": 0.0049, "step": 418610 }, { "epoch": 4.47267482237299, "grad_norm": 0.0022576036863029003, "learning_rate": 7.317849851190018e-07, "loss": 0.0016, "step": 418620 }, { "epoch": 4.472781665687269, "grad_norm": 0.04109598323702812, "learning_rate": 7.317700984135888e-07, "loss": 0.0009, "step": 418630 }, { "epoch": 4.4728885090015496, "grad_norm": 13.346830368041992, "learning_rate": 7.3175521144649e-07, "loss": 0.0171, "step": 418640 }, { "epoch": 4.472995352315829, "grad_norm": 0.03548257052898407, "learning_rate": 7.31740324217722e-07, "loss": 0.0168, "step": 418650 }, { "epoch": 4.4731021956301085, "grad_norm": 0.429435133934021, "learning_rate": 7.317254367273016e-07, "loss": 0.0142, "step": 418660 }, { "epoch": 4.473209038944388, "grad_norm": 0.10376781970262527, "learning_rate": 7.317105489752457e-07, "loss": 0.0059, "step": 418670 }, { "epoch": 4.473315882258667, "grad_norm": 3.306262731552124, "learning_rate": 7.316956609615712e-07, "loss": 0.014, "step": 418680 }, { "epoch": 4.473422725572947, "grad_norm": 0.07476264983415604, "learning_rate": 7.316807726862947e-07, "loss": 0.0036, "step": 418690 }, { "epoch": 4.473529568887227, "grad_norm": 0.003371169324964285, "learning_rate": 7.316658841494332e-07, "loss": 0.0109, "step": 418700 }, { "epoch": 4.473636412201507, "grad_norm": 0.07329937815666199, "learning_rate": 7.316509953510035e-07, "loss": 0.0297, "step": 418710 }, { "epoch": 4.473743255515786, "grad_norm": 0.009164673276245594, "learning_rate": 7.316361062910222e-07, "loss": 0.0034, "step": 418720 }, { "epoch": 4.473850098830066, "grad_norm": 3.0658280849456787, "learning_rate": 7.316212169695064e-07, "loss": 0.0187, "step": 418730 }, { "epoch": 4.473956942144345, "grad_norm": 0.45043277740478516, "learning_rate": 7.316063273864726e-07, "loss": 0.0031, "step": 418740 }, { "epoch": 4.474063785458625, "grad_norm": 0.1937350481748581, "learning_rate": 7.31591437541938e-07, "loss": 0.0142, "step": 418750 }, { "epoch": 4.474170628772905, "grad_norm": 0.12645898759365082, "learning_rate": 7.31576547435919e-07, "loss": 0.0003, "step": 418760 }, { "epoch": 4.474277472087184, "grad_norm": 12.51438045501709, "learning_rate": 7.315616570684327e-07, "loss": 0.0121, "step": 418770 }, { "epoch": 4.474384315401464, "grad_norm": 0.006056715734302998, "learning_rate": 7.315467664394958e-07, "loss": 0.0028, "step": 418780 }, { "epoch": 4.474491158715743, "grad_norm": 1.814957857131958, "learning_rate": 7.315318755491251e-07, "loss": 0.0046, "step": 418790 }, { "epoch": 4.474598002030023, "grad_norm": 0.02470068261027336, "learning_rate": 7.315169843973374e-07, "loss": 0.0121, "step": 418800 }, { "epoch": 4.474704845344302, "grad_norm": 5.309976577758789, "learning_rate": 7.315020929841496e-07, "loss": 0.0097, "step": 418810 }, { "epoch": 4.4748116886585825, "grad_norm": 0.18769536912441254, "learning_rate": 7.314872013095785e-07, "loss": 0.0363, "step": 418820 }, { "epoch": 4.474918531972862, "grad_norm": 0.09970267117023468, "learning_rate": 7.314723093736408e-07, "loss": 0.0117, "step": 418830 }, { "epoch": 4.475025375287141, "grad_norm": 0.032445669174194336, "learning_rate": 7.314574171763534e-07, "loss": 0.0025, "step": 418840 }, { "epoch": 4.475132218601421, "grad_norm": 6.224240779876709, "learning_rate": 7.314425247177331e-07, "loss": 0.0165, "step": 418850 }, { "epoch": 4.4752390619157, "grad_norm": 0.10262340307235718, "learning_rate": 7.314276319977967e-07, "loss": 0.0077, "step": 418860 }, { "epoch": 4.475345905229981, "grad_norm": 0.03665779531002045, "learning_rate": 7.314127390165612e-07, "loss": 0.002, "step": 418870 }, { "epoch": 4.47545274854426, "grad_norm": 0.034996964037418365, "learning_rate": 7.313978457740432e-07, "loss": 0.005, "step": 418880 }, { "epoch": 4.47555959185854, "grad_norm": 0.007896283641457558, "learning_rate": 7.313829522702595e-07, "loss": 0.0007, "step": 418890 }, { "epoch": 4.475666435172819, "grad_norm": 17.857776641845703, "learning_rate": 7.31368058505227e-07, "loss": 0.0447, "step": 418900 }, { "epoch": 4.4757732784870985, "grad_norm": 0.17881183326244354, "learning_rate": 7.313531644789623e-07, "loss": 0.011, "step": 418910 }, { "epoch": 4.475880121801378, "grad_norm": 0.4583197832107544, "learning_rate": 7.313382701914827e-07, "loss": 0.0013, "step": 418920 }, { "epoch": 4.475986965115658, "grad_norm": 0.04195531830191612, "learning_rate": 7.313233756428048e-07, "loss": 0.0005, "step": 418930 }, { "epoch": 4.476093808429938, "grad_norm": 0.41342809796333313, "learning_rate": 7.31308480832945e-07, "loss": 0.0156, "step": 418940 }, { "epoch": 4.476200651744217, "grad_norm": 0.022876020520925522, "learning_rate": 7.312935857619207e-07, "loss": 0.0083, "step": 418950 }, { "epoch": 4.476307495058497, "grad_norm": 0.002985192695632577, "learning_rate": 7.312786904297484e-07, "loss": 0.0115, "step": 418960 }, { "epoch": 4.476414338372776, "grad_norm": 0.009320241399109364, "learning_rate": 7.312637948364451e-07, "loss": 0.0184, "step": 418970 }, { "epoch": 4.476521181687056, "grad_norm": 2.1695940494537354, "learning_rate": 7.312488989820275e-07, "loss": 0.0159, "step": 418980 }, { "epoch": 4.476628025001336, "grad_norm": 0.06256293505430222, "learning_rate": 7.312340028665124e-07, "loss": 0.0136, "step": 418990 }, { "epoch": 4.476734868315615, "grad_norm": 0.6141452193260193, "learning_rate": 7.312191064899166e-07, "loss": 0.0062, "step": 419000 }, { "epoch": 4.476841711629895, "grad_norm": 0.001048742444254458, "learning_rate": 7.31204209852257e-07, "loss": 0.0191, "step": 419010 }, { "epoch": 4.476948554944174, "grad_norm": 0.013754579238593578, "learning_rate": 7.311893129535506e-07, "loss": 0.0202, "step": 419020 }, { "epoch": 4.477055398258454, "grad_norm": 0.7304787635803223, "learning_rate": 7.311744157938138e-07, "loss": 0.0199, "step": 419030 }, { "epoch": 4.477162241572733, "grad_norm": 0.7578228116035461, "learning_rate": 7.311595183730637e-07, "loss": 0.0133, "step": 419040 }, { "epoch": 4.477269084887014, "grad_norm": 0.01962571032345295, "learning_rate": 7.311446206913171e-07, "loss": 0.0023, "step": 419050 }, { "epoch": 4.477375928201293, "grad_norm": 0.0012350943870842457, "learning_rate": 7.311297227485907e-07, "loss": 0.0036, "step": 419060 }, { "epoch": 4.4774827715155725, "grad_norm": 1.418674349784851, "learning_rate": 7.311148245449013e-07, "loss": 0.0485, "step": 419070 }, { "epoch": 4.477589614829852, "grad_norm": 1.568162441253662, "learning_rate": 7.310999260802661e-07, "loss": 0.0015, "step": 419080 }, { "epoch": 4.477696458144131, "grad_norm": 0.001356355263851583, "learning_rate": 7.310850273547015e-07, "loss": 0.0088, "step": 419090 }, { "epoch": 4.477803301458411, "grad_norm": 0.03323935717344284, "learning_rate": 7.310701283682244e-07, "loss": 0.0045, "step": 419100 }, { "epoch": 4.477910144772691, "grad_norm": 0.018696162849664688, "learning_rate": 7.310552291208517e-07, "loss": 0.0004, "step": 419110 }, { "epoch": 4.478016988086971, "grad_norm": 0.018911223858594894, "learning_rate": 7.310403296126003e-07, "loss": 0.0056, "step": 419120 }, { "epoch": 4.47812383140125, "grad_norm": 0.0014748836401849985, "learning_rate": 7.310254298434869e-07, "loss": 0.0082, "step": 419130 }, { "epoch": 4.47823067471553, "grad_norm": 0.004858572036027908, "learning_rate": 7.310105298135283e-07, "loss": 0.0008, "step": 419140 }, { "epoch": 4.478337518029809, "grad_norm": 0.01212818268686533, "learning_rate": 7.309956295227414e-07, "loss": 0.0089, "step": 419150 }, { "epoch": 4.4784443613440885, "grad_norm": 0.007342166732996702, "learning_rate": 7.30980728971143e-07, "loss": 0.0112, "step": 419160 }, { "epoch": 4.478551204658369, "grad_norm": 9.535392761230469, "learning_rate": 7.3096582815875e-07, "loss": 0.0243, "step": 419170 }, { "epoch": 4.478658047972648, "grad_norm": 0.03004171885550022, "learning_rate": 7.309509270855791e-07, "loss": 0.0234, "step": 419180 }, { "epoch": 4.478764891286928, "grad_norm": 0.19573719799518585, "learning_rate": 7.309360257516472e-07, "loss": 0.0113, "step": 419190 }, { "epoch": 4.478871734601207, "grad_norm": 0.0024936345871537924, "learning_rate": 7.309211241569709e-07, "loss": 0.0306, "step": 419200 }, { "epoch": 4.478978577915487, "grad_norm": 0.004226007033139467, "learning_rate": 7.309062223015674e-07, "loss": 0.0213, "step": 419210 }, { "epoch": 4.479085421229766, "grad_norm": 0.16913872957229614, "learning_rate": 7.308913201854534e-07, "loss": 0.0156, "step": 419220 }, { "epoch": 4.4791922645440465, "grad_norm": 2.8220558166503906, "learning_rate": 7.308764178086455e-07, "loss": 0.0022, "step": 419230 }, { "epoch": 4.479299107858326, "grad_norm": 0.0005577809060923755, "learning_rate": 7.308615151711609e-07, "loss": 0.0142, "step": 419240 }, { "epoch": 4.479405951172605, "grad_norm": 0.0025887624360620975, "learning_rate": 7.308466122730161e-07, "loss": 0.0019, "step": 419250 }, { "epoch": 4.479512794486885, "grad_norm": 0.21747826039791107, "learning_rate": 7.308317091142281e-07, "loss": 0.0078, "step": 419260 }, { "epoch": 4.479619637801164, "grad_norm": 0.467924028635025, "learning_rate": 7.308168056948136e-07, "loss": 0.0022, "step": 419270 }, { "epoch": 4.479726481115444, "grad_norm": 0.03305472061038017, "learning_rate": 7.308019020147895e-07, "loss": 0.0085, "step": 419280 }, { "epoch": 4.479833324429724, "grad_norm": 8.565431594848633, "learning_rate": 7.307869980741728e-07, "loss": 0.019, "step": 419290 }, { "epoch": 4.479940167744004, "grad_norm": 0.007096708752214909, "learning_rate": 7.3077209387298e-07, "loss": 0.0006, "step": 419300 }, { "epoch": 4.480047011058283, "grad_norm": 0.00719600822776556, "learning_rate": 7.307571894112281e-07, "loss": 0.0051, "step": 419310 }, { "epoch": 4.4801538543725625, "grad_norm": 0.012019633315503597, "learning_rate": 7.30742284688934e-07, "loss": 0.0174, "step": 419320 }, { "epoch": 4.480260697686842, "grad_norm": 1.1772708892822266, "learning_rate": 7.307273797061145e-07, "loss": 0.0111, "step": 419330 }, { "epoch": 4.4803675410011214, "grad_norm": 8.471874237060547, "learning_rate": 7.307124744627861e-07, "loss": 0.0066, "step": 419340 }, { "epoch": 4.480474384315402, "grad_norm": 0.012962833046913147, "learning_rate": 7.30697568958966e-07, "loss": 0.0231, "step": 419350 }, { "epoch": 4.480581227629681, "grad_norm": 0.006957876030355692, "learning_rate": 7.306826631946712e-07, "loss": 0.0001, "step": 419360 }, { "epoch": 4.480688070943961, "grad_norm": 1.2374367713928223, "learning_rate": 7.306677571699181e-07, "loss": 0.0403, "step": 419370 }, { "epoch": 4.48079491425824, "grad_norm": 0.0038698497228324413, "learning_rate": 7.306528508847236e-07, "loss": 0.0047, "step": 419380 }, { "epoch": 4.48090175757252, "grad_norm": 0.060605958104133606, "learning_rate": 7.306379443391047e-07, "loss": 0.0122, "step": 419390 }, { "epoch": 4.481008600886799, "grad_norm": 0.025337517261505127, "learning_rate": 7.30623037533078e-07, "loss": 0.0242, "step": 419400 }, { "epoch": 4.481115444201079, "grad_norm": 0.7060911059379578, "learning_rate": 7.306081304666607e-07, "loss": 0.0198, "step": 419410 }, { "epoch": 4.481222287515359, "grad_norm": 0.010416211560368538, "learning_rate": 7.305932231398695e-07, "loss": 0.0536, "step": 419420 }, { "epoch": 4.481329130829638, "grad_norm": 0.00885977502912283, "learning_rate": 7.305783155527209e-07, "loss": 0.0028, "step": 419430 }, { "epoch": 4.481435974143918, "grad_norm": 0.04158725216984749, "learning_rate": 7.305634077052321e-07, "loss": 0.0052, "step": 419440 }, { "epoch": 4.481542817458197, "grad_norm": 0.3948453664779663, "learning_rate": 7.305484995974199e-07, "loss": 0.0135, "step": 419450 }, { "epoch": 4.481649660772478, "grad_norm": 3.9748754501342773, "learning_rate": 7.305335912293008e-07, "loss": 0.0048, "step": 419460 }, { "epoch": 4.481756504086757, "grad_norm": 0.009087719954550266, "learning_rate": 7.30518682600892e-07, "loss": 0.0461, "step": 419470 }, { "epoch": 4.4818633474010365, "grad_norm": 0.031113265082240105, "learning_rate": 7.305037737122101e-07, "loss": 0.0148, "step": 419480 }, { "epoch": 4.481970190715316, "grad_norm": 0.04597457870841026, "learning_rate": 7.304888645632723e-07, "loss": 0.0079, "step": 419490 }, { "epoch": 4.4820770340295955, "grad_norm": 0.12020795047283173, "learning_rate": 7.304739551540951e-07, "loss": 0.0003, "step": 419500 }, { "epoch": 4.482183877343875, "grad_norm": 0.01560189202427864, "learning_rate": 7.304590454846952e-07, "loss": 0.0147, "step": 419510 }, { "epoch": 4.482290720658154, "grad_norm": 0.002361149061471224, "learning_rate": 7.304441355550898e-07, "loss": 0.001, "step": 419520 }, { "epoch": 4.482397563972435, "grad_norm": 0.006332376506179571, "learning_rate": 7.304292253652957e-07, "loss": 0.0112, "step": 419530 }, { "epoch": 4.482504407286714, "grad_norm": 0.08620309829711914, "learning_rate": 7.304143149153294e-07, "loss": 0.0027, "step": 419540 }, { "epoch": 4.482611250600994, "grad_norm": 0.06843554973602295, "learning_rate": 7.303994042052081e-07, "loss": 0.0083, "step": 419550 }, { "epoch": 4.482718093915273, "grad_norm": 1.7194052934646606, "learning_rate": 7.303844932349483e-07, "loss": 0.0136, "step": 419560 }, { "epoch": 4.4828249372295526, "grad_norm": 0.01077202521264553, "learning_rate": 7.303695820045672e-07, "loss": 0.0134, "step": 419570 }, { "epoch": 4.482931780543833, "grad_norm": 0.05617830902338028, "learning_rate": 7.303546705140814e-07, "loss": 0.0056, "step": 419580 }, { "epoch": 4.483038623858112, "grad_norm": 0.013773879036307335, "learning_rate": 7.303397587635077e-07, "loss": 0.0299, "step": 419590 }, { "epoch": 4.483145467172392, "grad_norm": 0.15705320239067078, "learning_rate": 7.30324846752863e-07, "loss": 0.0011, "step": 419600 }, { "epoch": 4.483252310486671, "grad_norm": 6.736672878265381, "learning_rate": 7.303099344821644e-07, "loss": 0.0102, "step": 419610 }, { "epoch": 4.483359153800951, "grad_norm": 0.0008092352072708309, "learning_rate": 7.302950219514283e-07, "loss": 0.0008, "step": 419620 }, { "epoch": 4.48346599711523, "grad_norm": 0.022279789671301842, "learning_rate": 7.302801091606719e-07, "loss": 0.0098, "step": 419630 }, { "epoch": 4.48357284042951, "grad_norm": 0.7270951867103577, "learning_rate": 7.302651961099118e-07, "loss": 0.0013, "step": 419640 }, { "epoch": 4.48367968374379, "grad_norm": 5.3807806968688965, "learning_rate": 7.302502827991649e-07, "loss": 0.0178, "step": 419650 }, { "epoch": 4.4837865270580695, "grad_norm": 0.11044851690530777, "learning_rate": 7.30235369228448e-07, "loss": 0.0197, "step": 419660 }, { "epoch": 4.483893370372349, "grad_norm": 0.26808714866638184, "learning_rate": 7.30220455397778e-07, "loss": 0.0046, "step": 419670 }, { "epoch": 4.484000213686628, "grad_norm": 0.00730177853256464, "learning_rate": 7.302055413071717e-07, "loss": 0.007, "step": 419680 }, { "epoch": 4.484107057000908, "grad_norm": 0.003528222208842635, "learning_rate": 7.301906269566461e-07, "loss": 0.0144, "step": 419690 }, { "epoch": 4.484213900315188, "grad_norm": 0.0019493763102218509, "learning_rate": 7.30175712346218e-07, "loss": 0.0065, "step": 419700 }, { "epoch": 4.484320743629468, "grad_norm": 0.001832277630455792, "learning_rate": 7.301607974759039e-07, "loss": 0.0037, "step": 419710 }, { "epoch": 4.484427586943747, "grad_norm": 0.23173227906227112, "learning_rate": 7.301458823457209e-07, "loss": 0.0298, "step": 419720 }, { "epoch": 4.484534430258027, "grad_norm": 0.0012738736113533378, "learning_rate": 7.301309669556861e-07, "loss": 0.0062, "step": 419730 }, { "epoch": 4.484641273572306, "grad_norm": 0.15478312969207764, "learning_rate": 7.301160513058157e-07, "loss": 0.0078, "step": 419740 }, { "epoch": 4.4847481168865855, "grad_norm": 0.7139398455619812, "learning_rate": 7.301011353961272e-07, "loss": 0.0064, "step": 419750 }, { "epoch": 4.484854960200866, "grad_norm": 0.01681157946586609, "learning_rate": 7.300862192266372e-07, "loss": 0.0085, "step": 419760 }, { "epoch": 4.484961803515145, "grad_norm": 0.05502539128065109, "learning_rate": 7.300713027973622e-07, "loss": 0.0155, "step": 419770 }, { "epoch": 4.485068646829425, "grad_norm": 5.352450370788574, "learning_rate": 7.300563861083195e-07, "loss": 0.0514, "step": 419780 }, { "epoch": 4.485175490143704, "grad_norm": 0.0017459940863773227, "learning_rate": 7.300414691595258e-07, "loss": 0.0005, "step": 419790 }, { "epoch": 4.485282333457984, "grad_norm": 0.8965086340904236, "learning_rate": 7.300265519509978e-07, "loss": 0.0074, "step": 419800 }, { "epoch": 4.485389176772263, "grad_norm": 0.004621685482561588, "learning_rate": 7.300116344827526e-07, "loss": 0.0081, "step": 419810 }, { "epoch": 4.4854960200865435, "grad_norm": 4.2780585289001465, "learning_rate": 7.299967167548069e-07, "loss": 0.0237, "step": 419820 }, { "epoch": 4.485602863400823, "grad_norm": 0.003797930432483554, "learning_rate": 7.299817987671775e-07, "loss": 0.0073, "step": 419830 }, { "epoch": 4.485709706715102, "grad_norm": 0.04570891335606575, "learning_rate": 7.299668805198813e-07, "loss": 0.0081, "step": 419840 }, { "epoch": 4.485816550029382, "grad_norm": 1.2674968242645264, "learning_rate": 7.299519620129351e-07, "loss": 0.0098, "step": 419850 }, { "epoch": 4.485923393343661, "grad_norm": 0.7095909118652344, "learning_rate": 7.299370432463557e-07, "loss": 0.0359, "step": 419860 }, { "epoch": 4.486030236657941, "grad_norm": 12.4481840133667, "learning_rate": 7.299221242201602e-07, "loss": 0.009, "step": 419870 }, { "epoch": 4.486137079972221, "grad_norm": 0.0060942829586565495, "learning_rate": 7.299072049343651e-07, "loss": 0.0031, "step": 419880 }, { "epoch": 4.486243923286501, "grad_norm": 0.16514724493026733, "learning_rate": 7.298922853889877e-07, "loss": 0.0021, "step": 419890 }, { "epoch": 4.48635076660078, "grad_norm": 1.7068614959716797, "learning_rate": 7.298773655840442e-07, "loss": 0.0143, "step": 419900 }, { "epoch": 4.4864576099150595, "grad_norm": 0.013901959173381329, "learning_rate": 7.29862445519552e-07, "loss": 0.0049, "step": 419910 }, { "epoch": 4.486564453229339, "grad_norm": 0.0007249377085827291, "learning_rate": 7.298475251955277e-07, "loss": 0.0087, "step": 419920 }, { "epoch": 4.486671296543618, "grad_norm": 2.4866485595703125, "learning_rate": 7.298326046119883e-07, "loss": 0.0033, "step": 419930 }, { "epoch": 4.486778139857899, "grad_norm": 1.6620628833770752, "learning_rate": 7.298176837689503e-07, "loss": 0.0052, "step": 419940 }, { "epoch": 4.486884983172178, "grad_norm": 1.1379579305648804, "learning_rate": 7.29802762666431e-07, "loss": 0.019, "step": 419950 }, { "epoch": 4.486991826486458, "grad_norm": 0.4054016172885895, "learning_rate": 7.297878413044471e-07, "loss": 0.0064, "step": 419960 }, { "epoch": 4.487098669800737, "grad_norm": 0.007378715556114912, "learning_rate": 7.297729196830151e-07, "loss": 0.0084, "step": 419970 }, { "epoch": 4.487205513115017, "grad_norm": 0.00693465955555439, "learning_rate": 7.297579978021525e-07, "loss": 0.0059, "step": 419980 }, { "epoch": 4.487312356429296, "grad_norm": 0.027734138071537018, "learning_rate": 7.297430756618754e-07, "loss": 0.0139, "step": 419990 }, { "epoch": 4.487419199743576, "grad_norm": 0.6651778817176819, "learning_rate": 7.297281532622013e-07, "loss": 0.0207, "step": 420000 }, { "epoch": 4.487526043057856, "grad_norm": 0.9730191230773926, "learning_rate": 7.297132306031466e-07, "loss": 0.0127, "step": 420010 }, { "epoch": 4.487632886372135, "grad_norm": 1.7270644903182983, "learning_rate": 7.296983076847285e-07, "loss": 0.0214, "step": 420020 }, { "epoch": 4.487739729686415, "grad_norm": 0.05669573321938515, "learning_rate": 7.296833845069636e-07, "loss": 0.0098, "step": 420030 }, { "epoch": 4.487846573000694, "grad_norm": 0.03120581805706024, "learning_rate": 7.296684610698688e-07, "loss": 0.0035, "step": 420040 }, { "epoch": 4.487953416314974, "grad_norm": 0.12273341417312622, "learning_rate": 7.296535373734611e-07, "loss": 0.034, "step": 420050 }, { "epoch": 4.488060259629254, "grad_norm": 0.021292155608534813, "learning_rate": 7.296386134177569e-07, "loss": 0.002, "step": 420060 }, { "epoch": 4.4881671029435335, "grad_norm": 0.017070360481739044, "learning_rate": 7.296236892027736e-07, "loss": 0.0021, "step": 420070 }, { "epoch": 4.488273946257813, "grad_norm": 0.0002754756424110383, "learning_rate": 7.296087647285278e-07, "loss": 0.0121, "step": 420080 }, { "epoch": 4.488380789572092, "grad_norm": 0.004208965227007866, "learning_rate": 7.295938399950365e-07, "loss": 0.0026, "step": 420090 }, { "epoch": 4.488487632886372, "grad_norm": 0.2526090145111084, "learning_rate": 7.295789150023162e-07, "loss": 0.0254, "step": 420100 }, { "epoch": 4.488594476200651, "grad_norm": 0.13982830941677094, "learning_rate": 7.29563989750384e-07, "loss": 0.0038, "step": 420110 }, { "epoch": 4.488701319514932, "grad_norm": 0.009220512583851814, "learning_rate": 7.29549064239257e-07, "loss": 0.0106, "step": 420120 }, { "epoch": 4.488808162829211, "grad_norm": 1.3140637874603271, "learning_rate": 7.295341384689516e-07, "loss": 0.0097, "step": 420130 }, { "epoch": 4.488915006143491, "grad_norm": 7.261062145233154, "learning_rate": 7.295192124394848e-07, "loss": 0.0136, "step": 420140 }, { "epoch": 4.48902184945777, "grad_norm": 0.0028587982524186373, "learning_rate": 7.295042861508736e-07, "loss": 0.0006, "step": 420150 }, { "epoch": 4.4891286927720495, "grad_norm": 0.003538442077115178, "learning_rate": 7.294893596031346e-07, "loss": 0.0191, "step": 420160 }, { "epoch": 4.48923553608633, "grad_norm": 0.006193631328642368, "learning_rate": 7.294744327962847e-07, "loss": 0.0085, "step": 420170 }, { "epoch": 4.489342379400609, "grad_norm": 0.18512126803398132, "learning_rate": 7.294595057303411e-07, "loss": 0.0034, "step": 420180 }, { "epoch": 4.489449222714889, "grad_norm": 0.011775690130889416, "learning_rate": 7.294445784053204e-07, "loss": 0.0009, "step": 420190 }, { "epoch": 4.489556066029168, "grad_norm": 3.420224189758301, "learning_rate": 7.294296508212392e-07, "loss": 0.0039, "step": 420200 }, { "epoch": 4.489662909343448, "grad_norm": 0.4336256682872772, "learning_rate": 7.294147229781148e-07, "loss": 0.0138, "step": 420210 }, { "epoch": 4.489769752657727, "grad_norm": 0.04215018078684807, "learning_rate": 7.293997948759638e-07, "loss": 0.0033, "step": 420220 }, { "epoch": 4.489876595972007, "grad_norm": 3.5007073879241943, "learning_rate": 7.293848665148031e-07, "loss": 0.0109, "step": 420230 }, { "epoch": 4.489983439286287, "grad_norm": 0.2147556096315384, "learning_rate": 7.293699378946496e-07, "loss": 0.0069, "step": 420240 }, { "epoch": 4.490090282600566, "grad_norm": 0.15377889573574066, "learning_rate": 7.293550090155202e-07, "loss": 0.0027, "step": 420250 }, { "epoch": 4.490197125914846, "grad_norm": 4.1847991943359375, "learning_rate": 7.293400798774316e-07, "loss": 0.0049, "step": 420260 }, { "epoch": 4.490303969229125, "grad_norm": 7.5493292808532715, "learning_rate": 7.293251504804008e-07, "loss": 0.0066, "step": 420270 }, { "epoch": 4.490410812543405, "grad_norm": 0.25813913345336914, "learning_rate": 7.293102208244444e-07, "loss": 0.0025, "step": 420280 }, { "epoch": 4.490517655857685, "grad_norm": 0.02279527485370636, "learning_rate": 7.292952909095798e-07, "loss": 0.0053, "step": 420290 }, { "epoch": 4.490624499171965, "grad_norm": 0.2353605180978775, "learning_rate": 7.292803607358233e-07, "loss": 0.0601, "step": 420300 }, { "epoch": 4.490731342486244, "grad_norm": 0.1503743678331375, "learning_rate": 7.29265430303192e-07, "loss": 0.0118, "step": 420310 }, { "epoch": 4.4908381858005235, "grad_norm": 0.005361455958336592, "learning_rate": 7.292504996117027e-07, "loss": 0.0073, "step": 420320 }, { "epoch": 4.490945029114803, "grad_norm": 11.865525245666504, "learning_rate": 7.292355686613722e-07, "loss": 0.0057, "step": 420330 }, { "epoch": 4.491051872429082, "grad_norm": 0.005567159038037062, "learning_rate": 7.292206374522176e-07, "loss": 0.0077, "step": 420340 }, { "epoch": 4.491158715743362, "grad_norm": 0.0036294222809374332, "learning_rate": 7.292057059842556e-07, "loss": 0.0065, "step": 420350 }, { "epoch": 4.491265559057642, "grad_norm": 0.2484854757785797, "learning_rate": 7.291907742575029e-07, "loss": 0.0103, "step": 420360 }, { "epoch": 4.491372402371922, "grad_norm": 0.015208717435598373, "learning_rate": 7.291758422719767e-07, "loss": 0.0036, "step": 420370 }, { "epoch": 4.491479245686201, "grad_norm": 0.025139665231108665, "learning_rate": 7.291609100276937e-07, "loss": 0.0241, "step": 420380 }, { "epoch": 4.491586089000481, "grad_norm": 0.021660663187503815, "learning_rate": 7.291459775246707e-07, "loss": 0.0001, "step": 420390 }, { "epoch": 4.49169293231476, "grad_norm": 1.9473901987075806, "learning_rate": 7.291310447629244e-07, "loss": 0.0012, "step": 420400 }, { "epoch": 4.49179977562904, "grad_norm": 0.026241768151521683, "learning_rate": 7.291161117424721e-07, "loss": 0.0014, "step": 420410 }, { "epoch": 4.49190661894332, "grad_norm": 0.002234250772744417, "learning_rate": 7.291011784633303e-07, "loss": 0.0064, "step": 420420 }, { "epoch": 4.492013462257599, "grad_norm": 5.380880832672119, "learning_rate": 7.29086244925516e-07, "loss": 0.0046, "step": 420430 }, { "epoch": 4.492120305571879, "grad_norm": 1.1890991926193237, "learning_rate": 7.290713111290459e-07, "loss": 0.0085, "step": 420440 }, { "epoch": 4.492227148886158, "grad_norm": 0.026979660615324974, "learning_rate": 7.290563770739372e-07, "loss": 0.0003, "step": 420450 }, { "epoch": 4.492333992200438, "grad_norm": 0.0002975380630232394, "learning_rate": 7.290414427602065e-07, "loss": 0.001, "step": 420460 }, { "epoch": 4.492440835514718, "grad_norm": 0.005051259882748127, "learning_rate": 7.290265081878707e-07, "loss": 0.0129, "step": 420470 }, { "epoch": 4.4925476788289975, "grad_norm": 0.005577118135988712, "learning_rate": 7.290115733569466e-07, "loss": 0.0053, "step": 420480 }, { "epoch": 4.492654522143277, "grad_norm": 0.0005835674819536507, "learning_rate": 7.289966382674514e-07, "loss": 0.0052, "step": 420490 }, { "epoch": 4.492761365457556, "grad_norm": 10.133172035217285, "learning_rate": 7.289817029194016e-07, "loss": 0.0153, "step": 420500 }, { "epoch": 4.492868208771836, "grad_norm": 0.2223213016986847, "learning_rate": 7.28966767312814e-07, "loss": 0.0008, "step": 420510 }, { "epoch": 4.492975052086115, "grad_norm": 1.4179856777191162, "learning_rate": 7.289518314477059e-07, "loss": 0.0196, "step": 420520 }, { "epoch": 4.493081895400396, "grad_norm": 0.09293025732040405, "learning_rate": 7.289368953240937e-07, "loss": 0.0056, "step": 420530 }, { "epoch": 4.493188738714675, "grad_norm": 0.0031378113199025393, "learning_rate": 7.289219589419946e-07, "loss": 0.0014, "step": 420540 }, { "epoch": 4.493295582028955, "grad_norm": 0.2029525637626648, "learning_rate": 7.289070223014252e-07, "loss": 0.0008, "step": 420550 }, { "epoch": 4.493402425343234, "grad_norm": 0.004729066509753466, "learning_rate": 7.288920854024026e-07, "loss": 0.026, "step": 420560 }, { "epoch": 4.4935092686575135, "grad_norm": 0.032852768898010254, "learning_rate": 7.288771482449436e-07, "loss": 0.0063, "step": 420570 }, { "epoch": 4.493616111971793, "grad_norm": 0.024637531489133835, "learning_rate": 7.288622108290649e-07, "loss": 0.008, "step": 420580 }, { "epoch": 4.493722955286073, "grad_norm": 0.7180623412132263, "learning_rate": 7.288472731547836e-07, "loss": 0.0176, "step": 420590 }, { "epoch": 4.493829798600353, "grad_norm": 0.0009632364381104708, "learning_rate": 7.288323352221163e-07, "loss": 0.0042, "step": 420600 }, { "epoch": 4.493936641914632, "grad_norm": 1.3913336992263794, "learning_rate": 7.288173970310801e-07, "loss": 0.0144, "step": 420610 }, { "epoch": 4.494043485228912, "grad_norm": 0.8613280057907104, "learning_rate": 7.288024585816919e-07, "loss": 0.0099, "step": 420620 }, { "epoch": 4.494150328543191, "grad_norm": 0.0013330606743693352, "learning_rate": 7.287875198739683e-07, "loss": 0.0001, "step": 420630 }, { "epoch": 4.494257171857471, "grad_norm": 1.264556646347046, "learning_rate": 7.287725809079264e-07, "loss": 0.0112, "step": 420640 }, { "epoch": 4.494364015171751, "grad_norm": 0.005957521963864565, "learning_rate": 7.28757641683583e-07, "loss": 0.0112, "step": 420650 }, { "epoch": 4.4944708584860305, "grad_norm": 7.532142639160156, "learning_rate": 7.28742702200955e-07, "loss": 0.0148, "step": 420660 }, { "epoch": 4.49457770180031, "grad_norm": 0.004501239396631718, "learning_rate": 7.287277624600592e-07, "loss": 0.0056, "step": 420670 }, { "epoch": 4.494684545114589, "grad_norm": 0.40357664227485657, "learning_rate": 7.287128224609124e-07, "loss": 0.0092, "step": 420680 }, { "epoch": 4.494791388428869, "grad_norm": 0.0019807119388133287, "learning_rate": 7.286978822035317e-07, "loss": 0.0099, "step": 420690 }, { "epoch": 4.494898231743148, "grad_norm": 0.4737909436225891, "learning_rate": 7.286829416879338e-07, "loss": 0.0441, "step": 420700 }, { "epoch": 4.495005075057429, "grad_norm": 0.0324317030608654, "learning_rate": 7.286680009141355e-07, "loss": 0.0109, "step": 420710 }, { "epoch": 4.495111918371708, "grad_norm": 0.023195911198854446, "learning_rate": 7.286530598821539e-07, "loss": 0.0062, "step": 420720 }, { "epoch": 4.4952187616859876, "grad_norm": 0.0021455546375364065, "learning_rate": 7.286381185920056e-07, "loss": 0.0042, "step": 420730 }, { "epoch": 4.495325605000267, "grad_norm": 1.6571307182312012, "learning_rate": 7.286231770437076e-07, "loss": 0.0068, "step": 420740 }, { "epoch": 4.4954324483145465, "grad_norm": 0.012367733754217625, "learning_rate": 7.286082352372769e-07, "loss": 0.0088, "step": 420750 }, { "epoch": 4.495539291628826, "grad_norm": 6.971158027648926, "learning_rate": 7.285932931727303e-07, "loss": 0.0134, "step": 420760 }, { "epoch": 4.495646134943106, "grad_norm": 0.04244961217045784, "learning_rate": 7.285783508500844e-07, "loss": 0.0243, "step": 420770 }, { "epoch": 4.495752978257386, "grad_norm": 0.031766727566719055, "learning_rate": 7.285634082693565e-07, "loss": 0.0011, "step": 420780 }, { "epoch": 4.495859821571665, "grad_norm": 0.0017917508957907557, "learning_rate": 7.285484654305632e-07, "loss": 0.0052, "step": 420790 }, { "epoch": 4.495966664885945, "grad_norm": 0.06381310522556305, "learning_rate": 7.285335223337212e-07, "loss": 0.0053, "step": 420800 }, { "epoch": 4.496073508200224, "grad_norm": 9.016561508178711, "learning_rate": 7.285185789788479e-07, "loss": 0.0141, "step": 420810 }, { "epoch": 4.496180351514504, "grad_norm": 0.004993474576622248, "learning_rate": 7.285036353659598e-07, "loss": 0.0053, "step": 420820 }, { "epoch": 4.496287194828784, "grad_norm": 0.24381236732006073, "learning_rate": 7.284886914950739e-07, "loss": 0.0009, "step": 420830 }, { "epoch": 4.496394038143063, "grad_norm": 0.025991903617978096, "learning_rate": 7.284737473662069e-07, "loss": 0.0176, "step": 420840 }, { "epoch": 4.496500881457343, "grad_norm": 0.0017305511282756925, "learning_rate": 7.284588029793758e-07, "loss": 0.0158, "step": 420850 }, { "epoch": 4.496607724771622, "grad_norm": 0.293376624584198, "learning_rate": 7.284438583345975e-07, "loss": 0.0015, "step": 420860 }, { "epoch": 4.496714568085902, "grad_norm": 0.004417190328240395, "learning_rate": 7.284289134318888e-07, "loss": 0.0083, "step": 420870 }, { "epoch": 4.496821411400182, "grad_norm": 0.9026216864585876, "learning_rate": 7.284139682712668e-07, "loss": 0.0031, "step": 420880 }, { "epoch": 4.496928254714462, "grad_norm": 0.009871133603155613, "learning_rate": 7.28399022852748e-07, "loss": 0.0074, "step": 420890 }, { "epoch": 4.497035098028741, "grad_norm": 0.021355584263801575, "learning_rate": 7.283840771763496e-07, "loss": 0.0209, "step": 420900 }, { "epoch": 4.4971419413430205, "grad_norm": 0.1351550817489624, "learning_rate": 7.283691312420882e-07, "loss": 0.0054, "step": 420910 }, { "epoch": 4.4972487846573, "grad_norm": 0.006802170071750879, "learning_rate": 7.283541850499809e-07, "loss": 0.006, "step": 420920 }, { "epoch": 4.497355627971579, "grad_norm": 0.025753820315003395, "learning_rate": 7.283392386000445e-07, "loss": 0.0012, "step": 420930 }, { "epoch": 4.497462471285859, "grad_norm": 0.005018511787056923, "learning_rate": 7.283242918922958e-07, "loss": 0.0115, "step": 420940 }, { "epoch": 4.497569314600139, "grad_norm": 0.005337128881365061, "learning_rate": 7.283093449267519e-07, "loss": 0.0055, "step": 420950 }, { "epoch": 4.497676157914419, "grad_norm": 0.4811996519565582, "learning_rate": 7.282943977034294e-07, "loss": 0.0172, "step": 420960 }, { "epoch": 4.497783001228698, "grad_norm": 0.05016901716589928, "learning_rate": 7.282794502223453e-07, "loss": 0.0064, "step": 420970 }, { "epoch": 4.497889844542978, "grad_norm": 7.394469261169434, "learning_rate": 7.282645024835164e-07, "loss": 0.0098, "step": 420980 }, { "epoch": 4.497996687857257, "grad_norm": 0.001159662613645196, "learning_rate": 7.282495544869598e-07, "loss": 0.0061, "step": 420990 }, { "epoch": 4.498103531171537, "grad_norm": 0.02546616643667221, "learning_rate": 7.282346062326921e-07, "loss": 0.0041, "step": 421000 }, { "epoch": 4.498210374485817, "grad_norm": 4.748743057250977, "learning_rate": 7.282196577207304e-07, "loss": 0.005, "step": 421010 }, { "epoch": 4.498317217800096, "grad_norm": 0.0735662505030632, "learning_rate": 7.282047089510915e-07, "loss": 0.0019, "step": 421020 }, { "epoch": 4.498424061114376, "grad_norm": 0.11620286107063293, "learning_rate": 7.28189759923792e-07, "loss": 0.0043, "step": 421030 }, { "epoch": 4.498530904428655, "grad_norm": 0.8545882701873779, "learning_rate": 7.281748106388494e-07, "loss": 0.0116, "step": 421040 }, { "epoch": 4.498637747742935, "grad_norm": 3.077955484390259, "learning_rate": 7.2815986109628e-07, "loss": 0.0052, "step": 421050 }, { "epoch": 4.498744591057214, "grad_norm": 1.700000286102295, "learning_rate": 7.281449112961009e-07, "loss": 0.0062, "step": 421060 }, { "epoch": 4.4988514343714945, "grad_norm": 0.0058387634344398975, "learning_rate": 7.281299612383291e-07, "loss": 0.0044, "step": 421070 }, { "epoch": 4.498958277685774, "grad_norm": 0.0017902752151712775, "learning_rate": 7.281150109229812e-07, "loss": 0.0103, "step": 421080 }, { "epoch": 4.499065121000053, "grad_norm": 0.04319708049297333, "learning_rate": 7.281000603500744e-07, "loss": 0.0132, "step": 421090 }, { "epoch": 4.499171964314333, "grad_norm": 0.08476026356220245, "learning_rate": 7.280851095196253e-07, "loss": 0.0085, "step": 421100 }, { "epoch": 4.499278807628612, "grad_norm": 0.31205815076828003, "learning_rate": 7.280701584316509e-07, "loss": 0.0075, "step": 421110 }, { "epoch": 4.499385650942893, "grad_norm": 0.15970048308372498, "learning_rate": 7.280552070861682e-07, "loss": 0.0302, "step": 421120 }, { "epoch": 4.499492494257172, "grad_norm": 0.004358797799795866, "learning_rate": 7.280402554831939e-07, "loss": 0.0006, "step": 421130 }, { "epoch": 4.499599337571452, "grad_norm": 0.02374911494553089, "learning_rate": 7.280253036227448e-07, "loss": 0.0031, "step": 421140 }, { "epoch": 4.499706180885731, "grad_norm": 0.00697123259305954, "learning_rate": 7.280103515048381e-07, "loss": 0.0096, "step": 421150 }, { "epoch": 4.4998130242000105, "grad_norm": 0.17271026968955994, "learning_rate": 7.279953991294904e-07, "loss": 0.0013, "step": 421160 }, { "epoch": 4.49991986751429, "grad_norm": 0.15387743711471558, "learning_rate": 7.279804464967187e-07, "loss": 0.0017, "step": 421170 }, { "epoch": 4.500026710828569, "grad_norm": 0.00822854321449995, "learning_rate": 7.279654936065398e-07, "loss": 0.0078, "step": 421180 }, { "epoch": 4.50013355414285, "grad_norm": 4.041144847869873, "learning_rate": 7.279505404589707e-07, "loss": 0.0061, "step": 421190 }, { "epoch": 4.500240397457129, "grad_norm": 4.186069011688232, "learning_rate": 7.279355870540284e-07, "loss": 0.0106, "step": 421200 }, { "epoch": 4.500347240771409, "grad_norm": 0.5551406145095825, "learning_rate": 7.279206333917295e-07, "loss": 0.0103, "step": 421210 }, { "epoch": 4.500454084085688, "grad_norm": 0.048616357147693634, "learning_rate": 7.279056794720911e-07, "loss": 0.0228, "step": 421220 }, { "epoch": 4.500560927399968, "grad_norm": 0.20083199441432953, "learning_rate": 7.278907252951298e-07, "loss": 0.0528, "step": 421230 }, { "epoch": 4.500667770714248, "grad_norm": 1.6004902124404907, "learning_rate": 7.278757708608627e-07, "loss": 0.0096, "step": 421240 }, { "epoch": 4.500774614028527, "grad_norm": 2.0053162574768066, "learning_rate": 7.278608161693067e-07, "loss": 0.0044, "step": 421250 }, { "epoch": 4.500881457342807, "grad_norm": 0.07070093601942062, "learning_rate": 7.278458612204786e-07, "loss": 0.0072, "step": 421260 }, { "epoch": 4.500988300657086, "grad_norm": 0.5127115845680237, "learning_rate": 7.278309060143955e-07, "loss": 0.0208, "step": 421270 }, { "epoch": 4.501095143971366, "grad_norm": 0.05653117597103119, "learning_rate": 7.278159505510739e-07, "loss": 0.0041, "step": 421280 }, { "epoch": 4.501201987285645, "grad_norm": 2.586780071258545, "learning_rate": 7.27800994830531e-07, "loss": 0.0052, "step": 421290 }, { "epoch": 4.501308830599926, "grad_norm": 0.06121337413787842, "learning_rate": 7.277860388527837e-07, "loss": 0.0053, "step": 421300 }, { "epoch": 4.501415673914205, "grad_norm": 0.003270181594416499, "learning_rate": 7.277710826178486e-07, "loss": 0.0066, "step": 421310 }, { "epoch": 4.5015225172284845, "grad_norm": 1.7740654945373535, "learning_rate": 7.277561261257428e-07, "loss": 0.0027, "step": 421320 }, { "epoch": 4.501629360542764, "grad_norm": 0.35722577571868896, "learning_rate": 7.277411693764833e-07, "loss": 0.0039, "step": 421330 }, { "epoch": 4.501736203857043, "grad_norm": 6.051172733306885, "learning_rate": 7.277262123700864e-07, "loss": 0.0154, "step": 421340 }, { "epoch": 4.501843047171323, "grad_norm": 0.01608896255493164, "learning_rate": 7.277112551065699e-07, "loss": 0.0014, "step": 421350 }, { "epoch": 4.501949890485603, "grad_norm": 1.9431465864181519, "learning_rate": 7.2769629758595e-07, "loss": 0.005, "step": 421360 }, { "epoch": 4.502056733799883, "grad_norm": 0.01888999715447426, "learning_rate": 7.276813398082437e-07, "loss": 0.0054, "step": 421370 }, { "epoch": 4.502163577114162, "grad_norm": 0.01886940933763981, "learning_rate": 7.276663817734682e-07, "loss": 0.0226, "step": 421380 }, { "epoch": 4.502270420428442, "grad_norm": 0.7936204075813293, "learning_rate": 7.2765142348164e-07, "loss": 0.0135, "step": 421390 }, { "epoch": 4.502377263742721, "grad_norm": 0.3714796006679535, "learning_rate": 7.276364649327762e-07, "loss": 0.045, "step": 421400 }, { "epoch": 4.5024841070570005, "grad_norm": 4.435879230499268, "learning_rate": 7.276215061268937e-07, "loss": 0.0027, "step": 421410 }, { "epoch": 4.502590950371281, "grad_norm": 3.8259191513061523, "learning_rate": 7.276065470640094e-07, "loss": 0.0061, "step": 421420 }, { "epoch": 4.50269779368556, "grad_norm": 12.968986511230469, "learning_rate": 7.275915877441398e-07, "loss": 0.0579, "step": 421430 }, { "epoch": 4.50280463699984, "grad_norm": 0.18666218221187592, "learning_rate": 7.275766281673025e-07, "loss": 0.014, "step": 421440 }, { "epoch": 4.502911480314119, "grad_norm": 0.0023226700723171234, "learning_rate": 7.275616683335138e-07, "loss": 0.0138, "step": 421450 }, { "epoch": 4.503018323628399, "grad_norm": 0.004896031692624092, "learning_rate": 7.275467082427909e-07, "loss": 0.0168, "step": 421460 }, { "epoch": 4.503125166942679, "grad_norm": 7.6536478996276855, "learning_rate": 7.275317478951505e-07, "loss": 0.0065, "step": 421470 }, { "epoch": 4.5032320102569585, "grad_norm": 0.005491626914590597, "learning_rate": 7.275167872906097e-07, "loss": 0.0091, "step": 421480 }, { "epoch": 4.503338853571238, "grad_norm": 14.186888694763184, "learning_rate": 7.275018264291852e-07, "loss": 0.0281, "step": 421490 }, { "epoch": 4.503445696885517, "grad_norm": 0.041909825056791306, "learning_rate": 7.274868653108939e-07, "loss": 0.0208, "step": 421500 }, { "epoch": 4.503552540199797, "grad_norm": 0.010971925221383572, "learning_rate": 7.274719039357529e-07, "loss": 0.0119, "step": 421510 }, { "epoch": 4.503659383514076, "grad_norm": 0.022533798590302467, "learning_rate": 7.274569423037789e-07, "loss": 0.0128, "step": 421520 }, { "epoch": 4.503766226828356, "grad_norm": 0.33129122853279114, "learning_rate": 7.274419804149889e-07, "loss": 0.0129, "step": 421530 }, { "epoch": 4.503873070142636, "grad_norm": 0.02255784347653389, "learning_rate": 7.274270182693997e-07, "loss": 0.0018, "step": 421540 }, { "epoch": 4.503979913456916, "grad_norm": 13.011770248413086, "learning_rate": 7.274120558670282e-07, "loss": 0.0114, "step": 421550 }, { "epoch": 4.504086756771195, "grad_norm": 0.08197087049484253, "learning_rate": 7.273970932078915e-07, "loss": 0.0013, "step": 421560 }, { "epoch": 4.5041936000854745, "grad_norm": 1.6274839639663696, "learning_rate": 7.27382130292006e-07, "loss": 0.0185, "step": 421570 }, { "epoch": 4.504300443399754, "grad_norm": 0.04120180010795593, "learning_rate": 7.273671671193891e-07, "loss": 0.0011, "step": 421580 }, { "epoch": 4.504407286714034, "grad_norm": 11.454988479614258, "learning_rate": 7.273522036900575e-07, "loss": 0.0053, "step": 421590 }, { "epoch": 4.504514130028314, "grad_norm": 0.4490622282028198, "learning_rate": 7.273372400040281e-07, "loss": 0.0027, "step": 421600 }, { "epoch": 4.504620973342593, "grad_norm": 3.8790273666381836, "learning_rate": 7.273222760613178e-07, "loss": 0.0384, "step": 421610 }, { "epoch": 4.504727816656873, "grad_norm": 0.00948435626924038, "learning_rate": 7.273073118619435e-07, "loss": 0.022, "step": 421620 }, { "epoch": 4.504834659971152, "grad_norm": 0.7516928315162659, "learning_rate": 7.27292347405922e-07, "loss": 0.0097, "step": 421630 }, { "epoch": 4.504941503285432, "grad_norm": 0.06905325502157211, "learning_rate": 7.272773826932703e-07, "loss": 0.0163, "step": 421640 }, { "epoch": 4.505048346599711, "grad_norm": 0.1055050864815712, "learning_rate": 7.272624177240053e-07, "loss": 0.0276, "step": 421650 }, { "epoch": 4.505155189913991, "grad_norm": 10.167431831359863, "learning_rate": 7.27247452498144e-07, "loss": 0.0182, "step": 421660 }, { "epoch": 4.505262033228271, "grad_norm": 0.8365029096603394, "learning_rate": 7.272324870157031e-07, "loss": 0.0039, "step": 421670 }, { "epoch": 4.50536887654255, "grad_norm": 0.13925263285636902, "learning_rate": 7.272175212766995e-07, "loss": 0.0132, "step": 421680 }, { "epoch": 4.50547571985683, "grad_norm": 0.6443079113960266, "learning_rate": 7.272025552811503e-07, "loss": 0.0126, "step": 421690 }, { "epoch": 4.505582563171109, "grad_norm": 1.0630954504013062, "learning_rate": 7.271875890290722e-07, "loss": 0.0094, "step": 421700 }, { "epoch": 4.50568940648539, "grad_norm": 0.00456628343090415, "learning_rate": 7.271726225204822e-07, "loss": 0.021, "step": 421710 }, { "epoch": 4.505796249799669, "grad_norm": 0.026214774698019028, "learning_rate": 7.271576557553971e-07, "loss": 0.0055, "step": 421720 }, { "epoch": 4.5059030931139485, "grad_norm": 2.941124439239502, "learning_rate": 7.271426887338339e-07, "loss": 0.016, "step": 421730 }, { "epoch": 4.506009936428228, "grad_norm": 0.006925065536051989, "learning_rate": 7.271277214558094e-07, "loss": 0.0079, "step": 421740 }, { "epoch": 4.5061167797425075, "grad_norm": 0.003227242734283209, "learning_rate": 7.271127539213406e-07, "loss": 0.0063, "step": 421750 }, { "epoch": 4.506223623056787, "grad_norm": 2.2300264835357666, "learning_rate": 7.270977861304445e-07, "loss": 0.0042, "step": 421760 }, { "epoch": 4.506330466371066, "grad_norm": 0.4216472804546356, "learning_rate": 7.270828180831377e-07, "loss": 0.0005, "step": 421770 }, { "epoch": 4.506437309685347, "grad_norm": 2.989488363265991, "learning_rate": 7.270678497794372e-07, "loss": 0.0054, "step": 421780 }, { "epoch": 4.506544152999626, "grad_norm": 10.225722312927246, "learning_rate": 7.2705288121936e-07, "loss": 0.0128, "step": 421790 }, { "epoch": 4.506650996313906, "grad_norm": 0.3148318827152252, "learning_rate": 7.270379124029231e-07, "loss": 0.0023, "step": 421800 }, { "epoch": 4.506757839628185, "grad_norm": 0.42748141288757324, "learning_rate": 7.270229433301432e-07, "loss": 0.0062, "step": 421810 }, { "epoch": 4.506864682942465, "grad_norm": 0.004060270730406046, "learning_rate": 7.270079740010372e-07, "loss": 0.0043, "step": 421820 }, { "epoch": 4.506971526256745, "grad_norm": 0.003548530163243413, "learning_rate": 7.26993004415622e-07, "loss": 0.0158, "step": 421830 }, { "epoch": 4.507078369571024, "grad_norm": 0.005245887208729982, "learning_rate": 7.269780345739147e-07, "loss": 0.0407, "step": 421840 }, { "epoch": 4.507185212885304, "grad_norm": 15.598861694335938, "learning_rate": 7.269630644759319e-07, "loss": 0.0149, "step": 421850 }, { "epoch": 4.507292056199583, "grad_norm": 0.023622144013643265, "learning_rate": 7.269480941216908e-07, "loss": 0.0063, "step": 421860 }, { "epoch": 4.507398899513863, "grad_norm": 0.06308270990848541, "learning_rate": 7.269331235112084e-07, "loss": 0.0313, "step": 421870 }, { "epoch": 4.507505742828142, "grad_norm": 0.004073777236044407, "learning_rate": 7.269181526445011e-07, "loss": 0.0023, "step": 421880 }, { "epoch": 4.507612586142422, "grad_norm": 6.8318681716918945, "learning_rate": 7.269031815215861e-07, "loss": 0.0257, "step": 421890 }, { "epoch": 4.507719429456702, "grad_norm": 9.511281967163086, "learning_rate": 7.268882101424803e-07, "loss": 0.0172, "step": 421900 }, { "epoch": 4.5078262727709815, "grad_norm": 0.00882816594094038, "learning_rate": 7.268732385072006e-07, "loss": 0.0209, "step": 421910 }, { "epoch": 4.507933116085261, "grad_norm": 0.1250249743461609, "learning_rate": 7.268582666157638e-07, "loss": 0.0174, "step": 421920 }, { "epoch": 4.50803995939954, "grad_norm": 0.031908947974443436, "learning_rate": 7.268432944681872e-07, "loss": 0.0139, "step": 421930 }, { "epoch": 4.50814680271382, "grad_norm": 0.00256255641579628, "learning_rate": 7.268283220644871e-07, "loss": 0.0025, "step": 421940 }, { "epoch": 4.5082536460281, "grad_norm": 0.014495487324893475, "learning_rate": 7.268133494046809e-07, "loss": 0.0029, "step": 421950 }, { "epoch": 4.50836048934238, "grad_norm": 5.270346164703369, "learning_rate": 7.267983764887851e-07, "loss": 0.0101, "step": 421960 }, { "epoch": 4.508467332656659, "grad_norm": 0.01705622859299183, "learning_rate": 7.267834033168169e-07, "loss": 0.0047, "step": 421970 }, { "epoch": 4.508574175970939, "grad_norm": 0.062085770070552826, "learning_rate": 7.267684298887932e-07, "loss": 0.0126, "step": 421980 }, { "epoch": 4.508681019285218, "grad_norm": 0.20107509195804596, "learning_rate": 7.267534562047308e-07, "loss": 0.0002, "step": 421990 }, { "epoch": 4.5087878625994975, "grad_norm": 0.0017953081987798214, "learning_rate": 7.267384822646466e-07, "loss": 0.0265, "step": 422000 }, { "epoch": 4.508894705913778, "grad_norm": 0.0008020615205168724, "learning_rate": 7.267235080685576e-07, "loss": 0.0084, "step": 422010 }, { "epoch": 4.509001549228057, "grad_norm": 0.0694086030125618, "learning_rate": 7.267085336164804e-07, "loss": 0.0037, "step": 422020 }, { "epoch": 4.509108392542337, "grad_norm": 1.0033754110336304, "learning_rate": 7.266935589084325e-07, "loss": 0.0027, "step": 422030 }, { "epoch": 4.509215235856616, "grad_norm": 0.10239741951227188, "learning_rate": 7.266785839444303e-07, "loss": 0.0009, "step": 422040 }, { "epoch": 4.509322079170896, "grad_norm": 0.01276587788015604, "learning_rate": 7.266636087244909e-07, "loss": 0.0075, "step": 422050 }, { "epoch": 4.509428922485175, "grad_norm": 8.012284278869629, "learning_rate": 7.26648633248631e-07, "loss": 0.0052, "step": 422060 }, { "epoch": 4.5095357657994555, "grad_norm": 0.012101922184228897, "learning_rate": 7.266336575168679e-07, "loss": 0.0204, "step": 422070 }, { "epoch": 4.509642609113735, "grad_norm": 0.4451955556869507, "learning_rate": 7.266186815292182e-07, "loss": 0.0083, "step": 422080 }, { "epoch": 4.509749452428014, "grad_norm": 0.7846558094024658, "learning_rate": 7.266037052856991e-07, "loss": 0.0037, "step": 422090 }, { "epoch": 4.509856295742294, "grad_norm": 0.2270628809928894, "learning_rate": 7.265887287863272e-07, "loss": 0.0027, "step": 422100 }, { "epoch": 4.509963139056573, "grad_norm": 0.15483421087265015, "learning_rate": 7.265737520311194e-07, "loss": 0.0139, "step": 422110 }, { "epoch": 4.510069982370853, "grad_norm": 0.23721511662006378, "learning_rate": 7.265587750200928e-07, "loss": 0.0002, "step": 422120 }, { "epoch": 4.510176825685133, "grad_norm": 0.3392270505428314, "learning_rate": 7.265437977532643e-07, "loss": 0.0256, "step": 422130 }, { "epoch": 4.510283668999413, "grad_norm": 6.054694652557373, "learning_rate": 7.265288202306507e-07, "loss": 0.0206, "step": 422140 }, { "epoch": 4.510390512313692, "grad_norm": 5.304699420928955, "learning_rate": 7.265138424522689e-07, "loss": 0.0057, "step": 422150 }, { "epoch": 4.5104973556279715, "grad_norm": 0.0038359619211405516, "learning_rate": 7.26498864418136e-07, "loss": 0.0122, "step": 422160 }, { "epoch": 4.510604198942251, "grad_norm": 0.0010559275979176164, "learning_rate": 7.264838861282686e-07, "loss": 0.0044, "step": 422170 }, { "epoch": 4.51071104225653, "grad_norm": 0.0002966435858979821, "learning_rate": 7.264689075826839e-07, "loss": 0.0077, "step": 422180 }, { "epoch": 4.510817885570811, "grad_norm": 1.585601806640625, "learning_rate": 7.264539287813989e-07, "loss": 0.0117, "step": 422190 }, { "epoch": 4.51092472888509, "grad_norm": 0.013684041798114777, "learning_rate": 7.264389497244301e-07, "loss": 0.0009, "step": 422200 }, { "epoch": 4.51103157219937, "grad_norm": 0.09237772971391678, "learning_rate": 7.264239704117947e-07, "loss": 0.0067, "step": 422210 }, { "epoch": 4.511138415513649, "grad_norm": 0.021683774888515472, "learning_rate": 7.264089908435095e-07, "loss": 0.0001, "step": 422220 }, { "epoch": 4.511245258827929, "grad_norm": 0.3283085525035858, "learning_rate": 7.263940110195917e-07, "loss": 0.0037, "step": 422230 }, { "epoch": 4.511352102142208, "grad_norm": 0.12782014906406403, "learning_rate": 7.263790309400577e-07, "loss": 0.0003, "step": 422240 }, { "epoch": 4.511458945456488, "grad_norm": 0.33299458026885986, "learning_rate": 7.263640506049248e-07, "loss": 0.0007, "step": 422250 }, { "epoch": 4.511565788770768, "grad_norm": 0.03330201655626297, "learning_rate": 7.263490700142099e-07, "loss": 0.0085, "step": 422260 }, { "epoch": 4.511672632085047, "grad_norm": 0.14264333248138428, "learning_rate": 7.263340891679297e-07, "loss": 0.002, "step": 422270 }, { "epoch": 4.511779475399327, "grad_norm": 0.05153908580541611, "learning_rate": 7.263191080661011e-07, "loss": 0.0081, "step": 422280 }, { "epoch": 4.511886318713606, "grad_norm": 0.005119974259287119, "learning_rate": 7.263041267087413e-07, "loss": 0.0039, "step": 422290 }, { "epoch": 4.511993162027887, "grad_norm": 0.3153901994228363, "learning_rate": 7.262891450958671e-07, "loss": 0.0227, "step": 422300 }, { "epoch": 4.512100005342166, "grad_norm": 2.448300838470459, "learning_rate": 7.262741632274954e-07, "loss": 0.0152, "step": 422310 }, { "epoch": 4.5122068486564455, "grad_norm": 0.0021186689846217632, "learning_rate": 7.262591811036429e-07, "loss": 0.0014, "step": 422320 }, { "epoch": 4.512313691970725, "grad_norm": 0.00952055212110281, "learning_rate": 7.262441987243268e-07, "loss": 0.0002, "step": 422330 }, { "epoch": 4.512420535285004, "grad_norm": 2.849321126937866, "learning_rate": 7.26229216089564e-07, "loss": 0.0075, "step": 422340 }, { "epoch": 4.512527378599284, "grad_norm": 0.0014726589433848858, "learning_rate": 7.262142331993712e-07, "loss": 0.0016, "step": 422350 }, { "epoch": 4.512634221913563, "grad_norm": 0.0015519860899075866, "learning_rate": 7.261992500537657e-07, "loss": 0.0369, "step": 422360 }, { "epoch": 4.512741065227844, "grad_norm": 0.028091583400964737, "learning_rate": 7.261842666527638e-07, "loss": 0.0149, "step": 422370 }, { "epoch": 4.512847908542123, "grad_norm": 0.0015181393828243017, "learning_rate": 7.261692829963832e-07, "loss": 0.0024, "step": 422380 }, { "epoch": 4.512954751856403, "grad_norm": 0.009467958472669125, "learning_rate": 7.261542990846402e-07, "loss": 0.003, "step": 422390 }, { "epoch": 4.513061595170682, "grad_norm": 0.6641167402267456, "learning_rate": 7.261393149175518e-07, "loss": 0.017, "step": 422400 }, { "epoch": 4.5131684384849615, "grad_norm": 0.0040737334638834, "learning_rate": 7.261243304951353e-07, "loss": 0.0089, "step": 422410 }, { "epoch": 4.513275281799242, "grad_norm": 0.6666938066482544, "learning_rate": 7.261093458174073e-07, "loss": 0.0024, "step": 422420 }, { "epoch": 4.513382125113521, "grad_norm": 4.515061378479004, "learning_rate": 7.260943608843847e-07, "loss": 0.0149, "step": 422430 }, { "epoch": 4.513488968427801, "grad_norm": 1.7817798852920532, "learning_rate": 7.260793756960844e-07, "loss": 0.0099, "step": 422440 }, { "epoch": 4.51359581174208, "grad_norm": 0.012717684730887413, "learning_rate": 7.260643902525236e-07, "loss": 0.0014, "step": 422450 }, { "epoch": 4.51370265505636, "grad_norm": 0.04805363714694977, "learning_rate": 7.260494045537191e-07, "loss": 0.0121, "step": 422460 }, { "epoch": 4.513809498370639, "grad_norm": 0.03353865444660187, "learning_rate": 7.260344185996876e-07, "loss": 0.0022, "step": 422470 }, { "epoch": 4.513916341684919, "grad_norm": 0.24866485595703125, "learning_rate": 7.260194323904463e-07, "loss": 0.0022, "step": 422480 }, { "epoch": 4.514023184999199, "grad_norm": 2.766234874725342, "learning_rate": 7.260044459260119e-07, "loss": 0.0069, "step": 422490 }, { "epoch": 4.514130028313478, "grad_norm": 0.005568128079175949, "learning_rate": 7.259894592064015e-07, "loss": 0.0006, "step": 422500 }, { "epoch": 4.514236871627758, "grad_norm": 0.026017947122454643, "learning_rate": 7.259744722316319e-07, "loss": 0.0471, "step": 422510 }, { "epoch": 4.514343714942037, "grad_norm": 0.059256814420223236, "learning_rate": 7.259594850017201e-07, "loss": 0.0566, "step": 422520 }, { "epoch": 4.514450558256317, "grad_norm": 10.31146240234375, "learning_rate": 7.259444975166831e-07, "loss": 0.0074, "step": 422530 }, { "epoch": 4.514557401570597, "grad_norm": 0.11093049496412277, "learning_rate": 7.259295097765375e-07, "loss": 0.017, "step": 422540 }, { "epoch": 4.514664244884877, "grad_norm": 0.00807352177798748, "learning_rate": 7.259145217813005e-07, "loss": 0.0029, "step": 422550 }, { "epoch": 4.514771088199156, "grad_norm": 4.463250160217285, "learning_rate": 7.25899533530989e-07, "loss": 0.0028, "step": 422560 }, { "epoch": 4.5148779315134355, "grad_norm": 0.00099427648819983, "learning_rate": 7.258845450256198e-07, "loss": 0.0439, "step": 422570 }, { "epoch": 4.514984774827715, "grad_norm": 1.8739018440246582, "learning_rate": 7.258695562652099e-07, "loss": 0.0093, "step": 422580 }, { "epoch": 4.5150916181419944, "grad_norm": 2.9435486793518066, "learning_rate": 7.258545672497763e-07, "loss": 0.0166, "step": 422590 }, { "epoch": 4.515198461456274, "grad_norm": 9.25979995727539, "learning_rate": 7.258395779793358e-07, "loss": 0.0139, "step": 422600 }, { "epoch": 4.515305304770554, "grad_norm": 0.018367307260632515, "learning_rate": 7.258245884539054e-07, "loss": 0.0089, "step": 422610 }, { "epoch": 4.515412148084834, "grad_norm": 4.166489124298096, "learning_rate": 7.25809598673502e-07, "loss": 0.0101, "step": 422620 }, { "epoch": 4.515518991399113, "grad_norm": 1.7954232692718506, "learning_rate": 7.257946086381426e-07, "loss": 0.0077, "step": 422630 }, { "epoch": 4.515625834713393, "grad_norm": 0.42166516184806824, "learning_rate": 7.25779618347844e-07, "loss": 0.0014, "step": 422640 }, { "epoch": 4.515732678027672, "grad_norm": 0.0020555611699819565, "learning_rate": 7.257646278026229e-07, "loss": 0.0121, "step": 422650 }, { "epoch": 4.515839521341952, "grad_norm": 4.492007255554199, "learning_rate": 7.257496370024968e-07, "loss": 0.0159, "step": 422660 }, { "epoch": 4.515946364656232, "grad_norm": 0.02450769580900669, "learning_rate": 7.257346459474823e-07, "loss": 0.0063, "step": 422670 }, { "epoch": 4.516053207970511, "grad_norm": 0.008201255463063717, "learning_rate": 7.257196546375962e-07, "loss": 0.0049, "step": 422680 }, { "epoch": 4.516160051284791, "grad_norm": 0.43574070930480957, "learning_rate": 7.257046630728557e-07, "loss": 0.0067, "step": 422690 }, { "epoch": 4.51626689459907, "grad_norm": 2.435152053833008, "learning_rate": 7.256896712532776e-07, "loss": 0.0069, "step": 422700 }, { "epoch": 4.51637373791335, "grad_norm": 0.0008552768267691135, "learning_rate": 7.256746791788787e-07, "loss": 0.0204, "step": 422710 }, { "epoch": 4.516480581227629, "grad_norm": 0.06192493066191673, "learning_rate": 7.256596868496763e-07, "loss": 0.0048, "step": 422720 }, { "epoch": 4.5165874245419095, "grad_norm": 0.0472455732524395, "learning_rate": 7.256446942656868e-07, "loss": 0.0056, "step": 422730 }, { "epoch": 4.516694267856189, "grad_norm": 1.7527275085449219, "learning_rate": 7.256297014269275e-07, "loss": 0.0066, "step": 422740 }, { "epoch": 4.5168011111704685, "grad_norm": 7.554622173309326, "learning_rate": 7.256147083334153e-07, "loss": 0.0052, "step": 422750 }, { "epoch": 4.516907954484748, "grad_norm": 0.767084538936615, "learning_rate": 7.25599714985167e-07, "loss": 0.0114, "step": 422760 }, { "epoch": 4.517014797799027, "grad_norm": 0.2761784493923187, "learning_rate": 7.255847213821997e-07, "loss": 0.0113, "step": 422770 }, { "epoch": 4.517121641113308, "grad_norm": 0.0818156972527504, "learning_rate": 7.255697275245301e-07, "loss": 0.0018, "step": 422780 }, { "epoch": 4.517228484427587, "grad_norm": 0.16722731292247772, "learning_rate": 7.255547334121753e-07, "loss": 0.0049, "step": 422790 }, { "epoch": 4.517335327741867, "grad_norm": 0.07999160140752792, "learning_rate": 7.255397390451522e-07, "loss": 0.0104, "step": 422800 }, { "epoch": 4.517442171056146, "grad_norm": 5.064745903015137, "learning_rate": 7.255247444234778e-07, "loss": 0.0206, "step": 422810 }, { "epoch": 4.5175490143704256, "grad_norm": 0.04367434233427048, "learning_rate": 7.255097495471687e-07, "loss": 0.0008, "step": 422820 }, { "epoch": 4.517655857684705, "grad_norm": 0.0019984880927950144, "learning_rate": 7.254947544162423e-07, "loss": 0.0102, "step": 422830 }, { "epoch": 4.517762700998985, "grad_norm": 2.3228657245635986, "learning_rate": 7.254797590307152e-07, "loss": 0.0324, "step": 422840 }, { "epoch": 4.517869544313265, "grad_norm": 0.3806781768798828, "learning_rate": 7.254647633906043e-07, "loss": 0.0062, "step": 422850 }, { "epoch": 4.517976387627544, "grad_norm": 0.19302135705947876, "learning_rate": 7.25449767495927e-07, "loss": 0.0156, "step": 422860 }, { "epoch": 4.518083230941824, "grad_norm": 0.0376405231654644, "learning_rate": 7.254347713466997e-07, "loss": 0.0046, "step": 422870 }, { "epoch": 4.518190074256103, "grad_norm": 0.16989576816558838, "learning_rate": 7.254197749429396e-07, "loss": 0.0163, "step": 422880 }, { "epoch": 4.518296917570383, "grad_norm": 6.41620397567749, "learning_rate": 7.254047782846635e-07, "loss": 0.005, "step": 422890 }, { "epoch": 4.518403760884663, "grad_norm": 2.6694395542144775, "learning_rate": 7.253897813718884e-07, "loss": 0.0031, "step": 422900 }, { "epoch": 4.5185106041989425, "grad_norm": 0.018439501523971558, "learning_rate": 7.253747842046312e-07, "loss": 0.0097, "step": 422910 }, { "epoch": 4.518617447513222, "grad_norm": 0.09713096916675568, "learning_rate": 7.253597867829088e-07, "loss": 0.0027, "step": 422920 }, { "epoch": 4.518724290827501, "grad_norm": 0.019012708216905594, "learning_rate": 7.253447891067384e-07, "loss": 0.0082, "step": 422930 }, { "epoch": 4.518831134141781, "grad_norm": 4.591488361358643, "learning_rate": 7.253297911761366e-07, "loss": 0.009, "step": 422940 }, { "epoch": 4.51893797745606, "grad_norm": 1.277647852897644, "learning_rate": 7.253147929911205e-07, "loss": 0.0042, "step": 422950 }, { "epoch": 4.519044820770341, "grad_norm": 0.044635895639657974, "learning_rate": 7.252997945517069e-07, "loss": 0.001, "step": 422960 }, { "epoch": 4.51915166408462, "grad_norm": 0.5438423156738281, "learning_rate": 7.25284795857913e-07, "loss": 0.0327, "step": 422970 }, { "epoch": 4.5192585073989, "grad_norm": 0.003398069879040122, "learning_rate": 7.252697969097554e-07, "loss": 0.0024, "step": 422980 }, { "epoch": 4.519365350713179, "grad_norm": 0.029819967225193977, "learning_rate": 7.252547977072512e-07, "loss": 0.0027, "step": 422990 }, { "epoch": 4.5194721940274585, "grad_norm": 0.0019642182160168886, "learning_rate": 7.252397982504174e-07, "loss": 0.0042, "step": 423000 }, { "epoch": 4.519579037341739, "grad_norm": 0.006398903205990791, "learning_rate": 7.252247985392709e-07, "loss": 0.0093, "step": 423010 }, { "epoch": 4.519685880656018, "grad_norm": 9.14941120147705, "learning_rate": 7.252097985738284e-07, "loss": 0.0671, "step": 423020 }, { "epoch": 4.519792723970298, "grad_norm": 9.170058250427246, "learning_rate": 7.251947983541073e-07, "loss": 0.0078, "step": 423030 }, { "epoch": 4.519899567284577, "grad_norm": 0.016662899404764175, "learning_rate": 7.251797978801241e-07, "loss": 0.0039, "step": 423040 }, { "epoch": 4.520006410598857, "grad_norm": 0.008052228018641472, "learning_rate": 7.25164797151896e-07, "loss": 0.0008, "step": 423050 }, { "epoch": 4.520113253913136, "grad_norm": 0.024342572316527367, "learning_rate": 7.251497961694399e-07, "loss": 0.0005, "step": 423060 }, { "epoch": 4.520220097227416, "grad_norm": 0.09710939973592758, "learning_rate": 7.251347949327726e-07, "loss": 0.0008, "step": 423070 }, { "epoch": 4.520326940541696, "grad_norm": 0.19063979387283325, "learning_rate": 7.251197934419111e-07, "loss": 0.0002, "step": 423080 }, { "epoch": 4.520433783855975, "grad_norm": 0.6843034625053406, "learning_rate": 7.251047916968724e-07, "loss": 0.0051, "step": 423090 }, { "epoch": 4.520540627170255, "grad_norm": 0.3050902485847473, "learning_rate": 7.250897896976735e-07, "loss": 0.0006, "step": 423100 }, { "epoch": 4.520647470484534, "grad_norm": 0.09290845692157745, "learning_rate": 7.25074787444331e-07, "loss": 0.0122, "step": 423110 }, { "epoch": 4.520754313798814, "grad_norm": 0.00105797138530761, "learning_rate": 7.250597849368622e-07, "loss": 0.0165, "step": 423120 }, { "epoch": 4.520861157113094, "grad_norm": 2.8810794353485107, "learning_rate": 7.250447821752841e-07, "loss": 0.0255, "step": 423130 }, { "epoch": 4.520968000427374, "grad_norm": 0.35147297382354736, "learning_rate": 7.250297791596132e-07, "loss": 0.0048, "step": 423140 }, { "epoch": 4.521074843741653, "grad_norm": 0.0038022990338504314, "learning_rate": 7.250147758898668e-07, "loss": 0.0046, "step": 423150 }, { "epoch": 4.5211816870559325, "grad_norm": 0.0012406706809997559, "learning_rate": 7.249997723660618e-07, "loss": 0.0013, "step": 423160 }, { "epoch": 4.521288530370212, "grad_norm": 1.734041452407837, "learning_rate": 7.249847685882149e-07, "loss": 0.0261, "step": 423170 }, { "epoch": 4.521395373684491, "grad_norm": 6.626750469207764, "learning_rate": 7.249697645563434e-07, "loss": 0.0097, "step": 423180 }, { "epoch": 4.521502216998771, "grad_norm": 0.16272227466106415, "learning_rate": 7.249547602704639e-07, "loss": 0.0132, "step": 423190 }, { "epoch": 4.521609060313051, "grad_norm": 0.0073163933120667934, "learning_rate": 7.249397557305936e-07, "loss": 0.0046, "step": 423200 }, { "epoch": 4.521715903627331, "grad_norm": 0.048428721725940704, "learning_rate": 7.249247509367493e-07, "loss": 0.0073, "step": 423210 }, { "epoch": 4.52182274694161, "grad_norm": 0.0030301648657768965, "learning_rate": 7.249097458889481e-07, "loss": 0.0035, "step": 423220 }, { "epoch": 4.52192959025589, "grad_norm": 0.36713430285453796, "learning_rate": 7.248947405872067e-07, "loss": 0.0038, "step": 423230 }, { "epoch": 4.522036433570169, "grad_norm": 0.8359083533287048, "learning_rate": 7.248797350315422e-07, "loss": 0.0006, "step": 423240 }, { "epoch": 4.522143276884449, "grad_norm": 0.42041221261024475, "learning_rate": 7.248647292219714e-07, "loss": 0.0122, "step": 423250 }, { "epoch": 4.522250120198729, "grad_norm": 0.001984295668080449, "learning_rate": 7.248497231585114e-07, "loss": 0.0048, "step": 423260 }, { "epoch": 4.522356963513008, "grad_norm": 0.0464426688849926, "learning_rate": 7.248347168411793e-07, "loss": 0.0035, "step": 423270 }, { "epoch": 4.522463806827288, "grad_norm": 0.0065191821195185184, "learning_rate": 7.248197102699916e-07, "loss": 0.0057, "step": 423280 }, { "epoch": 4.522570650141567, "grad_norm": 0.4057779610157013, "learning_rate": 7.248047034449656e-07, "loss": 0.0064, "step": 423290 }, { "epoch": 4.522677493455847, "grad_norm": 0.5555055141448975, "learning_rate": 7.24789696366118e-07, "loss": 0.0179, "step": 423300 }, { "epoch": 4.522784336770126, "grad_norm": 0.052338533103466034, "learning_rate": 7.247746890334659e-07, "loss": 0.007, "step": 423310 }, { "epoch": 4.5228911800844065, "grad_norm": 0.03453686833381653, "learning_rate": 7.247596814470263e-07, "loss": 0.0082, "step": 423320 }, { "epoch": 4.522998023398686, "grad_norm": 0.3112403154373169, "learning_rate": 7.24744673606816e-07, "loss": 0.0083, "step": 423330 }, { "epoch": 4.523104866712965, "grad_norm": 0.09813263267278671, "learning_rate": 7.247296655128519e-07, "loss": 0.0053, "step": 423340 }, { "epoch": 4.523211710027245, "grad_norm": 0.6289729475975037, "learning_rate": 7.247146571651512e-07, "loss": 0.0035, "step": 423350 }, { "epoch": 4.523318553341524, "grad_norm": 7.318199157714844, "learning_rate": 7.246996485637306e-07, "loss": 0.002, "step": 423360 }, { "epoch": 4.523425396655805, "grad_norm": 0.008799450471997261, "learning_rate": 7.246846397086072e-07, "loss": 0.0012, "step": 423370 }, { "epoch": 4.523532239970084, "grad_norm": 0.01846272312104702, "learning_rate": 7.246696305997978e-07, "loss": 0.0042, "step": 423380 }, { "epoch": 4.523639083284364, "grad_norm": 0.03198231756687164, "learning_rate": 7.246546212373195e-07, "loss": 0.0077, "step": 423390 }, { "epoch": 4.523745926598643, "grad_norm": 0.0026089653838425875, "learning_rate": 7.246396116211892e-07, "loss": 0.0018, "step": 423400 }, { "epoch": 4.5238527699129225, "grad_norm": 0.0346762053668499, "learning_rate": 7.246246017514238e-07, "loss": 0.008, "step": 423410 }, { "epoch": 4.523959613227202, "grad_norm": 0.46197664737701416, "learning_rate": 7.246095916280404e-07, "loss": 0.0013, "step": 423420 }, { "epoch": 4.524066456541481, "grad_norm": 5.357468605041504, "learning_rate": 7.245945812510555e-07, "loss": 0.0171, "step": 423430 }, { "epoch": 4.524173299855762, "grad_norm": 0.09140677750110626, "learning_rate": 7.245795706204866e-07, "loss": 0.0035, "step": 423440 }, { "epoch": 4.524280143170041, "grad_norm": 0.45999839901924133, "learning_rate": 7.245645597363503e-07, "loss": 0.0152, "step": 423450 }, { "epoch": 4.524386986484321, "grad_norm": 0.46226203441619873, "learning_rate": 7.245495485986638e-07, "loss": 0.0095, "step": 423460 }, { "epoch": 4.5244938297986, "grad_norm": 0.09380040317773819, "learning_rate": 7.245345372074438e-07, "loss": 0.011, "step": 423470 }, { "epoch": 4.52460067311288, "grad_norm": 1.7592483758926392, "learning_rate": 7.245195255627075e-07, "loss": 0.0129, "step": 423480 }, { "epoch": 4.52470751642716, "grad_norm": 0.014410422183573246, "learning_rate": 7.245045136644716e-07, "loss": 0.0042, "step": 423490 }, { "epoch": 4.524814359741439, "grad_norm": 5.705301284790039, "learning_rate": 7.244895015127532e-07, "loss": 0.0099, "step": 423500 }, { "epoch": 4.524921203055719, "grad_norm": 11.770792961120605, "learning_rate": 7.244744891075692e-07, "loss": 0.0471, "step": 423510 }, { "epoch": 4.525028046369998, "grad_norm": 0.06662607938051224, "learning_rate": 7.244594764489364e-07, "loss": 0.0031, "step": 423520 }, { "epoch": 4.525134889684278, "grad_norm": 0.0013062700163573027, "learning_rate": 7.244444635368722e-07, "loss": 0.0128, "step": 423530 }, { "epoch": 4.525241732998557, "grad_norm": 0.0008728418033570051, "learning_rate": 7.24429450371393e-07, "loss": 0.009, "step": 423540 }, { "epoch": 4.525348576312838, "grad_norm": 0.0005700532929040492, "learning_rate": 7.244144369525162e-07, "loss": 0.0069, "step": 423550 }, { "epoch": 4.525455419627117, "grad_norm": 0.18033631145954132, "learning_rate": 7.243994232802585e-07, "loss": 0.0124, "step": 423560 }, { "epoch": 4.5255622629413965, "grad_norm": 1.441088318824768, "learning_rate": 7.243844093546368e-07, "loss": 0.0156, "step": 423570 }, { "epoch": 4.525669106255676, "grad_norm": 0.26186197996139526, "learning_rate": 7.243693951756684e-07, "loss": 0.007, "step": 423580 }, { "epoch": 4.525775949569955, "grad_norm": 0.002372625283896923, "learning_rate": 7.243543807433699e-07, "loss": 0.0014, "step": 423590 }, { "epoch": 4.525882792884235, "grad_norm": 0.14404936134815216, "learning_rate": 7.243393660577584e-07, "loss": 0.0161, "step": 423600 }, { "epoch": 4.525989636198515, "grad_norm": 0.14764900505542755, "learning_rate": 7.243243511188507e-07, "loss": 0.0057, "step": 423610 }, { "epoch": 4.526096479512795, "grad_norm": 0.0014764996012672782, "learning_rate": 7.243093359266639e-07, "loss": 0.0335, "step": 423620 }, { "epoch": 4.526203322827074, "grad_norm": 1.09227454662323, "learning_rate": 7.242943204812151e-07, "loss": 0.0073, "step": 423630 }, { "epoch": 4.526310166141354, "grad_norm": 0.6941593885421753, "learning_rate": 7.24279304782521e-07, "loss": 0.002, "step": 423640 }, { "epoch": 4.526417009455633, "grad_norm": 1.6615078449249268, "learning_rate": 7.242642888305985e-07, "loss": 0.007, "step": 423650 }, { "epoch": 4.5265238527699125, "grad_norm": 0.12828628718852997, "learning_rate": 7.242492726254649e-07, "loss": 0.0007, "step": 423660 }, { "epoch": 4.526630696084193, "grad_norm": 0.6872645616531372, "learning_rate": 7.242342561671368e-07, "loss": 0.0014, "step": 423670 }, { "epoch": 4.526737539398472, "grad_norm": 0.037748079746961594, "learning_rate": 7.242192394556313e-07, "loss": 0.0112, "step": 423680 }, { "epoch": 4.526844382712752, "grad_norm": 0.001948875724337995, "learning_rate": 7.242042224909654e-07, "loss": 0.0019, "step": 423690 }, { "epoch": 4.526951226027031, "grad_norm": 0.0005702339112758636, "learning_rate": 7.241892052731561e-07, "loss": 0.0031, "step": 423700 }, { "epoch": 4.527058069341311, "grad_norm": 3.935547113418579, "learning_rate": 7.241741878022202e-07, "loss": 0.0072, "step": 423710 }, { "epoch": 4.527164912655591, "grad_norm": 0.09999178349971771, "learning_rate": 7.241591700781746e-07, "loss": 0.004, "step": 423720 }, { "epoch": 4.5272717559698705, "grad_norm": 0.04542909190058708, "learning_rate": 7.241441521010365e-07, "loss": 0.0201, "step": 423730 }, { "epoch": 4.52737859928415, "grad_norm": 0.029752789065241814, "learning_rate": 7.241291338708227e-07, "loss": 0.0415, "step": 423740 }, { "epoch": 4.527485442598429, "grad_norm": 0.004808285739272833, "learning_rate": 7.241141153875503e-07, "loss": 0.0029, "step": 423750 }, { "epoch": 4.527592285912709, "grad_norm": 0.005133551545441151, "learning_rate": 7.240990966512362e-07, "loss": 0.005, "step": 423760 }, { "epoch": 4.527699129226988, "grad_norm": 0.0037879771552979946, "learning_rate": 7.240840776618971e-07, "loss": 0.0051, "step": 423770 }, { "epoch": 4.527805972541268, "grad_norm": 0.016337143257260323, "learning_rate": 7.240690584195501e-07, "loss": 0.0028, "step": 423780 }, { "epoch": 4.527912815855548, "grad_norm": 0.009632659144699574, "learning_rate": 7.240540389242124e-07, "loss": 0.0007, "step": 423790 }, { "epoch": 4.528019659169828, "grad_norm": 0.001685234485194087, "learning_rate": 7.240390191759007e-07, "loss": 0.0144, "step": 423800 }, { "epoch": 4.528126502484107, "grad_norm": 0.003659558016806841, "learning_rate": 7.240239991746321e-07, "loss": 0.0023, "step": 423810 }, { "epoch": 4.5282333457983865, "grad_norm": 5.526543617248535, "learning_rate": 7.240089789204234e-07, "loss": 0.003, "step": 423820 }, { "epoch": 4.528340189112666, "grad_norm": 0.005029692780226469, "learning_rate": 7.239939584132918e-07, "loss": 0.0085, "step": 423830 }, { "epoch": 4.528447032426946, "grad_norm": 1.224561333656311, "learning_rate": 7.239789376532542e-07, "loss": 0.0083, "step": 423840 }, { "epoch": 4.528553875741226, "grad_norm": 3.3567495346069336, "learning_rate": 7.239639166403272e-07, "loss": 0.0096, "step": 423850 }, { "epoch": 4.528660719055505, "grad_norm": 10.023242950439453, "learning_rate": 7.239488953745282e-07, "loss": 0.028, "step": 423860 }, { "epoch": 4.528767562369785, "grad_norm": 0.16146458685398102, "learning_rate": 7.23933873855874e-07, "loss": 0.005, "step": 423870 }, { "epoch": 4.528874405684064, "grad_norm": 0.002720070770010352, "learning_rate": 7.239188520843814e-07, "loss": 0.0037, "step": 423880 }, { "epoch": 4.528981248998344, "grad_norm": 0.01920316554605961, "learning_rate": 7.239038300600678e-07, "loss": 0.019, "step": 423890 }, { "epoch": 4.529088092312623, "grad_norm": 0.003050730563700199, "learning_rate": 7.238888077829496e-07, "loss": 0.003, "step": 423900 }, { "epoch": 4.5291949356269035, "grad_norm": 2.9940695762634277, "learning_rate": 7.238737852530441e-07, "loss": 0.0357, "step": 423910 }, { "epoch": 4.529301778941183, "grad_norm": 2.6830129623413086, "learning_rate": 7.238587624703683e-07, "loss": 0.0315, "step": 423920 }, { "epoch": 4.529408622255462, "grad_norm": 4.661716938018799, "learning_rate": 7.23843739434939e-07, "loss": 0.0063, "step": 423930 }, { "epoch": 4.529515465569742, "grad_norm": 1.616234302520752, "learning_rate": 7.238287161467731e-07, "loss": 0.0021, "step": 423940 }, { "epoch": 4.529622308884021, "grad_norm": 1.4984323978424072, "learning_rate": 7.238136926058878e-07, "loss": 0.0025, "step": 423950 }, { "epoch": 4.529729152198302, "grad_norm": 0.019048603251576424, "learning_rate": 7.237986688123001e-07, "loss": 0.0013, "step": 423960 }, { "epoch": 4.529835995512581, "grad_norm": 0.015786388888955116, "learning_rate": 7.237836447660266e-07, "loss": 0.0018, "step": 423970 }, { "epoch": 4.5299428388268606, "grad_norm": 0.0005979744018986821, "learning_rate": 7.237686204670845e-07, "loss": 0.0042, "step": 423980 }, { "epoch": 4.53004968214114, "grad_norm": 7.67644739151001, "learning_rate": 7.237535959154908e-07, "loss": 0.0094, "step": 423990 }, { "epoch": 4.5301565254554195, "grad_norm": 4.3691325187683105, "learning_rate": 7.237385711112624e-07, "loss": 0.0124, "step": 424000 }, { "epoch": 4.530263368769699, "grad_norm": 0.003383639035746455, "learning_rate": 7.237235460544162e-07, "loss": 0.0011, "step": 424010 }, { "epoch": 4.530370212083978, "grad_norm": 0.00932389684021473, "learning_rate": 7.237085207449693e-07, "loss": 0.0105, "step": 424020 }, { "epoch": 4.530477055398259, "grad_norm": 0.035177312791347504, "learning_rate": 7.236934951829385e-07, "loss": 0.0028, "step": 424030 }, { "epoch": 4.530583898712538, "grad_norm": 0.03537631407380104, "learning_rate": 7.23678469368341e-07, "loss": 0.0138, "step": 424040 }, { "epoch": 4.530690742026818, "grad_norm": 0.21393707394599915, "learning_rate": 7.236634433011936e-07, "loss": 0.0021, "step": 424050 }, { "epoch": 4.530797585341097, "grad_norm": 0.012765169143676758, "learning_rate": 7.236484169815132e-07, "loss": 0.0007, "step": 424060 }, { "epoch": 4.530904428655377, "grad_norm": 0.009257365949451923, "learning_rate": 7.236333904093171e-07, "loss": 0.0106, "step": 424070 }, { "epoch": 4.531011271969657, "grad_norm": 0.005537607707083225, "learning_rate": 7.236183635846217e-07, "loss": 0.008, "step": 424080 }, { "epoch": 4.531118115283936, "grad_norm": 0.24073317646980286, "learning_rate": 7.236033365074445e-07, "loss": 0.0144, "step": 424090 }, { "epoch": 4.531224958598216, "grad_norm": 1.139295220375061, "learning_rate": 7.235883091778023e-07, "loss": 0.0039, "step": 424100 }, { "epoch": 4.531331801912495, "grad_norm": 29.812210083007812, "learning_rate": 7.235732815957117e-07, "loss": 0.0237, "step": 424110 }, { "epoch": 4.531438645226775, "grad_norm": 0.9704573750495911, "learning_rate": 7.235582537611903e-07, "loss": 0.0009, "step": 424120 }, { "epoch": 4.531545488541054, "grad_norm": 0.004889120813459158, "learning_rate": 7.235432256742547e-07, "loss": 0.0074, "step": 424130 }, { "epoch": 4.531652331855334, "grad_norm": 0.006012440659105778, "learning_rate": 7.235281973349218e-07, "loss": 0.0053, "step": 424140 }, { "epoch": 4.531759175169614, "grad_norm": 0.12132219970226288, "learning_rate": 7.235131687432088e-07, "loss": 0.0084, "step": 424150 }, { "epoch": 4.5318660184838935, "grad_norm": 0.10514508932828903, "learning_rate": 7.234981398991325e-07, "loss": 0.0028, "step": 424160 }, { "epoch": 4.531972861798173, "grad_norm": 0.020733006298542023, "learning_rate": 7.234831108027101e-07, "loss": 0.0037, "step": 424170 }, { "epoch": 4.532079705112452, "grad_norm": 0.02441725879907608, "learning_rate": 7.234680814539582e-07, "loss": 0.0025, "step": 424180 }, { "epoch": 4.532186548426732, "grad_norm": 0.02457199990749359, "learning_rate": 7.234530518528941e-07, "loss": 0.0079, "step": 424190 }, { "epoch": 4.532293391741012, "grad_norm": 0.01047265063971281, "learning_rate": 7.234380219995347e-07, "loss": 0.0201, "step": 424200 }, { "epoch": 4.532400235055292, "grad_norm": 7.685698986053467, "learning_rate": 7.234229918938967e-07, "loss": 0.0056, "step": 424210 }, { "epoch": 4.532507078369571, "grad_norm": 0.013202875852584839, "learning_rate": 7.234079615359974e-07, "loss": 0.0078, "step": 424220 }, { "epoch": 4.532613921683851, "grad_norm": 2.8063809871673584, "learning_rate": 7.233929309258536e-07, "loss": 0.0073, "step": 424230 }, { "epoch": 4.53272076499813, "grad_norm": 0.418025404214859, "learning_rate": 7.233779000634824e-07, "loss": 0.0422, "step": 424240 }, { "epoch": 4.5328276083124095, "grad_norm": 1.1606231927871704, "learning_rate": 7.233628689489006e-07, "loss": 0.0124, "step": 424250 }, { "epoch": 4.53293445162669, "grad_norm": 0.014773665927350521, "learning_rate": 7.233478375821253e-07, "loss": 0.001, "step": 424260 }, { "epoch": 4.533041294940969, "grad_norm": 0.04271036013960838, "learning_rate": 7.233328059631735e-07, "loss": 0.0133, "step": 424270 }, { "epoch": 4.533148138255249, "grad_norm": 0.0027944843750447035, "learning_rate": 7.233177740920619e-07, "loss": 0.0144, "step": 424280 }, { "epoch": 4.533254981569528, "grad_norm": 0.0867779329419136, "learning_rate": 7.233027419688079e-07, "loss": 0.0018, "step": 424290 }, { "epoch": 4.533361824883808, "grad_norm": 0.19240926206111908, "learning_rate": 7.232877095934282e-07, "loss": 0.0055, "step": 424300 }, { "epoch": 4.533468668198087, "grad_norm": 0.04735748842358589, "learning_rate": 7.232726769659399e-07, "loss": 0.0017, "step": 424310 }, { "epoch": 4.5335755115123675, "grad_norm": 0.24480777978897095, "learning_rate": 7.232576440863597e-07, "loss": 0.0004, "step": 424320 }, { "epoch": 4.533682354826647, "grad_norm": 0.020967384800314903, "learning_rate": 7.232426109547048e-07, "loss": 0.0099, "step": 424330 }, { "epoch": 4.533789198140926, "grad_norm": 0.06938889622688293, "learning_rate": 7.232275775709923e-07, "loss": 0.0092, "step": 424340 }, { "epoch": 4.533896041455206, "grad_norm": 0.09804518520832062, "learning_rate": 7.232125439352389e-07, "loss": 0.0385, "step": 424350 }, { "epoch": 4.534002884769485, "grad_norm": 0.004796965979039669, "learning_rate": 7.231975100474617e-07, "loss": 0.0094, "step": 424360 }, { "epoch": 4.534109728083765, "grad_norm": 0.0016085432143881917, "learning_rate": 7.231824759076776e-07, "loss": 0.0029, "step": 424370 }, { "epoch": 4.534216571398045, "grad_norm": 0.0011405526893213391, "learning_rate": 7.231674415159038e-07, "loss": 0.0198, "step": 424380 }, { "epoch": 4.534323414712325, "grad_norm": 0.002624433720484376, "learning_rate": 7.23152406872157e-07, "loss": 0.0047, "step": 424390 }, { "epoch": 4.534430258026604, "grad_norm": 0.05725012347102165, "learning_rate": 7.231373719764543e-07, "loss": 0.0011, "step": 424400 }, { "epoch": 4.5345371013408835, "grad_norm": 0.1695612072944641, "learning_rate": 7.231223368288128e-07, "loss": 0.0024, "step": 424410 }, { "epoch": 4.534643944655163, "grad_norm": 0.058297738432884216, "learning_rate": 7.231073014292491e-07, "loss": 0.0022, "step": 424420 }, { "epoch": 4.534750787969443, "grad_norm": 0.009109758771955967, "learning_rate": 7.230922657777807e-07, "loss": 0.0066, "step": 424430 }, { "epoch": 4.534857631283723, "grad_norm": 0.0012073289835825562, "learning_rate": 7.230772298744241e-07, "loss": 0.0139, "step": 424440 }, { "epoch": 4.534964474598002, "grad_norm": 5.887613773345947, "learning_rate": 7.230621937191965e-07, "loss": 0.0089, "step": 424450 }, { "epoch": 4.535071317912282, "grad_norm": 0.012262209318578243, "learning_rate": 7.230471573121149e-07, "loss": 0.0005, "step": 424460 }, { "epoch": 4.535178161226561, "grad_norm": 0.0007516274927183986, "learning_rate": 7.230321206531962e-07, "loss": 0.001, "step": 424470 }, { "epoch": 4.535285004540841, "grad_norm": 2.936774969100952, "learning_rate": 7.230170837424574e-07, "loss": 0.0016, "step": 424480 }, { "epoch": 4.53539184785512, "grad_norm": 0.002000063192099333, "learning_rate": 7.230020465799155e-07, "loss": 0.0016, "step": 424490 }, { "epoch": 4.5354986911694, "grad_norm": 5.205625057220459, "learning_rate": 7.229870091655874e-07, "loss": 0.0275, "step": 424500 }, { "epoch": 4.53560553448368, "grad_norm": 0.001988527597859502, "learning_rate": 7.229719714994902e-07, "loss": 0.0027, "step": 424510 }, { "epoch": 4.535712377797959, "grad_norm": 0.01258015912026167, "learning_rate": 7.229569335816407e-07, "loss": 0.0098, "step": 424520 }, { "epoch": 4.535819221112239, "grad_norm": 0.003936989698559046, "learning_rate": 7.229418954120561e-07, "loss": 0.0061, "step": 424530 }, { "epoch": 4.535926064426518, "grad_norm": 0.0006984256324358284, "learning_rate": 7.229268569907533e-07, "loss": 0.0031, "step": 424540 }, { "epoch": 4.536032907740799, "grad_norm": 0.49620112776756287, "learning_rate": 7.229118183177493e-07, "loss": 0.0001, "step": 424550 }, { "epoch": 4.536139751055078, "grad_norm": 0.11393541842699051, "learning_rate": 7.228967793930608e-07, "loss": 0.0, "step": 424560 }, { "epoch": 4.5362465943693575, "grad_norm": 0.017488133162260056, "learning_rate": 7.228817402167053e-07, "loss": 0.0027, "step": 424570 }, { "epoch": 4.536353437683637, "grad_norm": 0.01611183024942875, "learning_rate": 7.228667007886993e-07, "loss": 0.0005, "step": 424580 }, { "epoch": 4.536460280997916, "grad_norm": 0.335453599691391, "learning_rate": 7.2285166110906e-07, "loss": 0.0079, "step": 424590 }, { "epoch": 4.536567124312196, "grad_norm": 6.008216857910156, "learning_rate": 7.228366211778044e-07, "loss": 0.0049, "step": 424600 }, { "epoch": 4.536673967626475, "grad_norm": 0.8140125274658203, "learning_rate": 7.228215809949495e-07, "loss": 0.0027, "step": 424610 }, { "epoch": 4.536780810940756, "grad_norm": 1.4143916368484497, "learning_rate": 7.228065405605121e-07, "loss": 0.0003, "step": 424620 }, { "epoch": 4.536887654255035, "grad_norm": 0.0023066187277436256, "learning_rate": 7.227914998745093e-07, "loss": 0.0082, "step": 424630 }, { "epoch": 4.536994497569315, "grad_norm": 0.028557846322655678, "learning_rate": 7.22776458936958e-07, "loss": 0.0041, "step": 424640 }, { "epoch": 4.537101340883594, "grad_norm": 0.5879378914833069, "learning_rate": 7.227614177478755e-07, "loss": 0.0062, "step": 424650 }, { "epoch": 4.5372081841978735, "grad_norm": 0.005913969129323959, "learning_rate": 7.227463763072783e-07, "loss": 0.0162, "step": 424660 }, { "epoch": 4.537315027512154, "grad_norm": 6.192991733551025, "learning_rate": 7.227313346151839e-07, "loss": 0.0018, "step": 424670 }, { "epoch": 4.537421870826433, "grad_norm": 0.8817659616470337, "learning_rate": 7.227162926716087e-07, "loss": 0.0025, "step": 424680 }, { "epoch": 4.537528714140713, "grad_norm": 7.322812557220459, "learning_rate": 7.227012504765702e-07, "loss": 0.0179, "step": 424690 }, { "epoch": 4.537635557454992, "grad_norm": 0.0012739208759739995, "learning_rate": 7.226862080300853e-07, "loss": 0.0061, "step": 424700 }, { "epoch": 4.537742400769272, "grad_norm": 0.00930295791476965, "learning_rate": 7.226711653321706e-07, "loss": 0.0022, "step": 424710 }, { "epoch": 4.537849244083551, "grad_norm": 0.042194612324237823, "learning_rate": 7.226561223828435e-07, "loss": 0.0064, "step": 424720 }, { "epoch": 4.537956087397831, "grad_norm": 0.2939348816871643, "learning_rate": 7.226410791821208e-07, "loss": 0.0229, "step": 424730 }, { "epoch": 4.538062930712111, "grad_norm": 0.009767279960215092, "learning_rate": 7.226260357300194e-07, "loss": 0.0003, "step": 424740 }, { "epoch": 4.53816977402639, "grad_norm": 0.5990375876426697, "learning_rate": 7.226109920265566e-07, "loss": 0.0012, "step": 424750 }, { "epoch": 4.53827661734067, "grad_norm": 0.1654779314994812, "learning_rate": 7.225959480717491e-07, "loss": 0.0048, "step": 424760 }, { "epoch": 4.538383460654949, "grad_norm": 0.004565129056572914, "learning_rate": 7.22580903865614e-07, "loss": 0.0047, "step": 424770 }, { "epoch": 4.538490303969229, "grad_norm": 0.014323755167424679, "learning_rate": 7.225658594081682e-07, "loss": 0.0086, "step": 424780 }, { "epoch": 4.538597147283509, "grad_norm": 0.003582519479095936, "learning_rate": 7.225508146994288e-07, "loss": 0.008, "step": 424790 }, { "epoch": 4.538703990597789, "grad_norm": 4.194408416748047, "learning_rate": 7.225357697394128e-07, "loss": 0.0063, "step": 424800 }, { "epoch": 4.538810833912068, "grad_norm": 0.08730243891477585, "learning_rate": 7.225207245281371e-07, "loss": 0.0042, "step": 424810 }, { "epoch": 4.5389176772263475, "grad_norm": 0.007133255712687969, "learning_rate": 7.225056790656185e-07, "loss": 0.0244, "step": 424820 }, { "epoch": 4.539024520540627, "grad_norm": 0.0009528398513793945, "learning_rate": 7.224906333518743e-07, "loss": 0.0281, "step": 424830 }, { "epoch": 4.5391313638549065, "grad_norm": 0.012028663419187069, "learning_rate": 7.224755873869215e-07, "loss": 0.0033, "step": 424840 }, { "epoch": 4.539238207169186, "grad_norm": 0.04460446164011955, "learning_rate": 7.22460541170777e-07, "loss": 0.0003, "step": 424850 }, { "epoch": 4.539345050483466, "grad_norm": 0.33480384945869446, "learning_rate": 7.224454947034575e-07, "loss": 0.0056, "step": 424860 }, { "epoch": 4.539451893797746, "grad_norm": 0.0024112414103001356, "learning_rate": 7.224304479849805e-07, "loss": 0.0194, "step": 424870 }, { "epoch": 4.539558737112025, "grad_norm": 0.0727422758936882, "learning_rate": 7.224154010153626e-07, "loss": 0.0211, "step": 424880 }, { "epoch": 4.539665580426305, "grad_norm": 0.00203648186288774, "learning_rate": 7.224003537946211e-07, "loss": 0.0026, "step": 424890 }, { "epoch": 4.539772423740584, "grad_norm": 1.4852229356765747, "learning_rate": 7.223853063227726e-07, "loss": 0.0035, "step": 424900 }, { "epoch": 4.539879267054864, "grad_norm": 0.9390925168991089, "learning_rate": 7.223702585998344e-07, "loss": 0.0067, "step": 424910 }, { "epoch": 4.539986110369144, "grad_norm": 3.8655688762664795, "learning_rate": 7.223552106258234e-07, "loss": 0.0082, "step": 424920 }, { "epoch": 4.540092953683423, "grad_norm": 0.5271345973014832, "learning_rate": 7.223401624007565e-07, "loss": 0.0033, "step": 424930 }, { "epoch": 4.540199796997703, "grad_norm": 0.0007710166391916573, "learning_rate": 7.223251139246509e-07, "loss": 0.008, "step": 424940 }, { "epoch": 4.540306640311982, "grad_norm": 0.003700818167999387, "learning_rate": 7.223100651975234e-07, "loss": 0.0018, "step": 424950 }, { "epoch": 4.540413483626262, "grad_norm": 0.001563703641295433, "learning_rate": 7.22295016219391e-07, "loss": 0.0095, "step": 424960 }, { "epoch": 4.540520326940542, "grad_norm": 5.646750450134277, "learning_rate": 7.222799669902708e-07, "loss": 0.0589, "step": 424970 }, { "epoch": 4.5406271702548215, "grad_norm": 1.021183729171753, "learning_rate": 7.222649175101798e-07, "loss": 0.0039, "step": 424980 }, { "epoch": 4.540734013569101, "grad_norm": 0.0020317062735557556, "learning_rate": 7.222498677791349e-07, "loss": 0.0055, "step": 424990 }, { "epoch": 4.5408408568833805, "grad_norm": 0.005346267018467188, "learning_rate": 7.22234817797153e-07, "loss": 0.0025, "step": 425000 }, { "epoch": 4.54094770019766, "grad_norm": 0.010961983352899551, "learning_rate": 7.222197675642514e-07, "loss": 0.0009, "step": 425010 }, { "epoch": 4.541054543511939, "grad_norm": 0.013026734814047813, "learning_rate": 7.222047170804467e-07, "loss": 0.0316, "step": 425020 }, { "epoch": 4.54116138682622, "grad_norm": 7.104355812072754, "learning_rate": 7.221896663457562e-07, "loss": 0.0019, "step": 425030 }, { "epoch": 4.541268230140499, "grad_norm": 0.47779250144958496, "learning_rate": 7.221746153601971e-07, "loss": 0.0469, "step": 425040 }, { "epoch": 4.541375073454779, "grad_norm": 0.005710747558623552, "learning_rate": 7.221595641237856e-07, "loss": 0.0179, "step": 425050 }, { "epoch": 4.541481916769058, "grad_norm": 0.6930626630783081, "learning_rate": 7.221445126365395e-07, "loss": 0.0195, "step": 425060 }, { "epoch": 4.541588760083338, "grad_norm": 0.08229425549507141, "learning_rate": 7.221294608984755e-07, "loss": 0.0004, "step": 425070 }, { "epoch": 4.541695603397617, "grad_norm": 0.005313350353389978, "learning_rate": 7.221144089096104e-07, "loss": 0.0017, "step": 425080 }, { "epoch": 4.541802446711897, "grad_norm": 0.009717308916151524, "learning_rate": 7.220993566699615e-07, "loss": 0.003, "step": 425090 }, { "epoch": 4.541909290026177, "grad_norm": 0.009628869593143463, "learning_rate": 7.220843041795456e-07, "loss": 0.0045, "step": 425100 }, { "epoch": 4.542016133340456, "grad_norm": 4.490528106689453, "learning_rate": 7.220692514383797e-07, "loss": 0.013, "step": 425110 }, { "epoch": 4.542122976654736, "grad_norm": 0.004970682319253683, "learning_rate": 7.220541984464809e-07, "loss": 0.0218, "step": 425120 }, { "epoch": 4.542229819969015, "grad_norm": 0.012587868608534336, "learning_rate": 7.220391452038664e-07, "loss": 0.0043, "step": 425130 }, { "epoch": 4.542336663283295, "grad_norm": 0.010157790035009384, "learning_rate": 7.220240917105526e-07, "loss": 0.0005, "step": 425140 }, { "epoch": 4.542443506597575, "grad_norm": 2.542840003967285, "learning_rate": 7.22009037966557e-07, "loss": 0.025, "step": 425150 }, { "epoch": 4.5425503499118545, "grad_norm": 0.02014467492699623, "learning_rate": 7.219939839718962e-07, "loss": 0.0014, "step": 425160 }, { "epoch": 4.542657193226134, "grad_norm": 4.232100009918213, "learning_rate": 7.219789297265878e-07, "loss": 0.0198, "step": 425170 }, { "epoch": 4.542764036540413, "grad_norm": 9.53804874420166, "learning_rate": 7.219638752306483e-07, "loss": 0.0314, "step": 425180 }, { "epoch": 4.542870879854693, "grad_norm": 0.009862425737082958, "learning_rate": 7.219488204840947e-07, "loss": 0.0066, "step": 425190 }, { "epoch": 4.542977723168972, "grad_norm": 0.0189900491386652, "learning_rate": 7.219337654869443e-07, "loss": 0.001, "step": 425200 }, { "epoch": 4.543084566483253, "grad_norm": 0.5480584502220154, "learning_rate": 7.219187102392139e-07, "loss": 0.0185, "step": 425210 }, { "epoch": 4.543191409797532, "grad_norm": 11.34524154663086, "learning_rate": 7.219036547409204e-07, "loss": 0.0342, "step": 425220 }, { "epoch": 4.543298253111812, "grad_norm": 0.6412558555603027, "learning_rate": 7.21888598992081e-07, "loss": 0.014, "step": 425230 }, { "epoch": 4.543405096426091, "grad_norm": 7.35688591003418, "learning_rate": 7.218735429927127e-07, "loss": 0.0158, "step": 425240 }, { "epoch": 4.5435119397403705, "grad_norm": 0.008501733653247356, "learning_rate": 7.218584867428323e-07, "loss": 0.0024, "step": 425250 }, { "epoch": 4.543618783054651, "grad_norm": 0.2645829916000366, "learning_rate": 7.21843430242457e-07, "loss": 0.0143, "step": 425260 }, { "epoch": 4.54372562636893, "grad_norm": 6.962947368621826, "learning_rate": 7.218283734916038e-07, "loss": 0.0105, "step": 425270 }, { "epoch": 4.54383246968321, "grad_norm": 0.4638362228870392, "learning_rate": 7.218133164902895e-07, "loss": 0.0047, "step": 425280 }, { "epoch": 4.543939312997489, "grad_norm": 0.05730024725198746, "learning_rate": 7.217982592385312e-07, "loss": 0.0049, "step": 425290 }, { "epoch": 4.544046156311769, "grad_norm": 0.07769237458705902, "learning_rate": 7.217832017363461e-07, "loss": 0.0092, "step": 425300 }, { "epoch": 4.544152999626048, "grad_norm": 0.9075239896774292, "learning_rate": 7.217681439837508e-07, "loss": 0.004, "step": 425310 }, { "epoch": 4.544259842940328, "grad_norm": 0.04386333376169205, "learning_rate": 7.217530859807627e-07, "loss": 0.0052, "step": 425320 }, { "epoch": 4.544366686254608, "grad_norm": 0.0005818169447593391, "learning_rate": 7.217380277273985e-07, "loss": 0.0128, "step": 425330 }, { "epoch": 4.544473529568887, "grad_norm": 0.10651472210884094, "learning_rate": 7.217229692236755e-07, "loss": 0.0308, "step": 425340 }, { "epoch": 4.544580372883167, "grad_norm": 0.0018901515286415815, "learning_rate": 7.217079104696104e-07, "loss": 0.0083, "step": 425350 }, { "epoch": 4.544687216197446, "grad_norm": 0.7046982049942017, "learning_rate": 7.216928514652203e-07, "loss": 0.0074, "step": 425360 }, { "epoch": 4.544794059511726, "grad_norm": 6.738494396209717, "learning_rate": 7.216777922105223e-07, "loss": 0.0207, "step": 425370 }, { "epoch": 4.544900902826006, "grad_norm": 0.010270845144987106, "learning_rate": 7.216627327055336e-07, "loss": 0.0055, "step": 425380 }, { "epoch": 4.545007746140286, "grad_norm": 0.026508647948503494, "learning_rate": 7.216476729502705e-07, "loss": 0.0028, "step": 425390 }, { "epoch": 4.545114589454565, "grad_norm": 0.129415363073349, "learning_rate": 7.216326129447507e-07, "loss": 0.0041, "step": 425400 }, { "epoch": 4.5452214327688445, "grad_norm": 0.005252334754914045, "learning_rate": 7.216175526889909e-07, "loss": 0.0055, "step": 425410 }, { "epoch": 4.545328276083124, "grad_norm": 0.0010734329698607326, "learning_rate": 7.216024921830082e-07, "loss": 0.0043, "step": 425420 }, { "epoch": 4.545435119397403, "grad_norm": 0.11927144229412079, "learning_rate": 7.215874314268195e-07, "loss": 0.008, "step": 425430 }, { "epoch": 4.545541962711683, "grad_norm": 0.020486995577812195, "learning_rate": 7.215723704204419e-07, "loss": 0.0022, "step": 425440 }, { "epoch": 4.545648806025963, "grad_norm": 0.029018638655543327, "learning_rate": 7.215573091638923e-07, "loss": 0.014, "step": 425450 }, { "epoch": 4.545755649340243, "grad_norm": 3.6511895656585693, "learning_rate": 7.215422476571879e-07, "loss": 0.0068, "step": 425460 }, { "epoch": 4.545862492654522, "grad_norm": 6.957381248474121, "learning_rate": 7.215271859003456e-07, "loss": 0.0074, "step": 425470 }, { "epoch": 4.545969335968802, "grad_norm": 2.282780647277832, "learning_rate": 7.215121238933822e-07, "loss": 0.008, "step": 425480 }, { "epoch": 4.546076179283081, "grad_norm": 1.150741457939148, "learning_rate": 7.214970616363151e-07, "loss": 0.0048, "step": 425490 }, { "epoch": 4.546183022597361, "grad_norm": 9.8986234664917, "learning_rate": 7.21481999129161e-07, "loss": 0.0142, "step": 425500 }, { "epoch": 4.546289865911641, "grad_norm": 0.03777044638991356, "learning_rate": 7.21466936371937e-07, "loss": 0.0038, "step": 425510 }, { "epoch": 4.54639670922592, "grad_norm": 0.0028137729968875647, "learning_rate": 7.214518733646601e-07, "loss": 0.0118, "step": 425520 }, { "epoch": 4.5465035525402, "grad_norm": 1.3376693725585938, "learning_rate": 7.214368101073472e-07, "loss": 0.0021, "step": 425530 }, { "epoch": 4.546610395854479, "grad_norm": 0.6201884150505066, "learning_rate": 7.214217466000157e-07, "loss": 0.0015, "step": 425540 }, { "epoch": 4.546717239168759, "grad_norm": 0.012969824485480785, "learning_rate": 7.214066828426821e-07, "loss": 0.0023, "step": 425550 }, { "epoch": 4.546824082483038, "grad_norm": 0.008704815059900284, "learning_rate": 7.213916188353638e-07, "loss": 0.0053, "step": 425560 }, { "epoch": 4.5469309257973185, "grad_norm": 1.6105260848999023, "learning_rate": 7.213765545780777e-07, "loss": 0.0135, "step": 425570 }, { "epoch": 4.547037769111598, "grad_norm": 3.7071545124053955, "learning_rate": 7.213614900708407e-07, "loss": 0.0016, "step": 425580 }, { "epoch": 4.547144612425877, "grad_norm": 0.004888132214546204, "learning_rate": 7.213464253136698e-07, "loss": 0.0166, "step": 425590 }, { "epoch": 4.547251455740157, "grad_norm": 4.637415409088135, "learning_rate": 7.21331360306582e-07, "loss": 0.0045, "step": 425600 }, { "epoch": 4.547358299054436, "grad_norm": 0.0006870533688925207, "learning_rate": 7.213162950495944e-07, "loss": 0.0086, "step": 425610 }, { "epoch": 4.547465142368717, "grad_norm": 1.7955337762832642, "learning_rate": 7.213012295427242e-07, "loss": 0.0263, "step": 425620 }, { "epoch": 4.547571985682996, "grad_norm": 0.031371623277664185, "learning_rate": 7.212861637859881e-07, "loss": 0.0101, "step": 425630 }, { "epoch": 4.547678828997276, "grad_norm": 0.4587860703468323, "learning_rate": 7.212710977794033e-07, "loss": 0.0002, "step": 425640 }, { "epoch": 4.547785672311555, "grad_norm": 8.90445613861084, "learning_rate": 7.212560315229866e-07, "loss": 0.0248, "step": 425650 }, { "epoch": 4.5478925156258345, "grad_norm": 12.923750877380371, "learning_rate": 7.212409650167553e-07, "loss": 0.0088, "step": 425660 }, { "epoch": 4.547999358940114, "grad_norm": 3.038367986679077, "learning_rate": 7.212258982607261e-07, "loss": 0.0166, "step": 425670 }, { "epoch": 4.548106202254393, "grad_norm": 0.00857945904135704, "learning_rate": 7.212108312549161e-07, "loss": 0.0089, "step": 425680 }, { "epoch": 4.548213045568674, "grad_norm": 0.002737457398325205, "learning_rate": 7.211957639993426e-07, "loss": 0.002, "step": 425690 }, { "epoch": 4.548319888882953, "grad_norm": 0.06287900358438492, "learning_rate": 7.211806964940224e-07, "loss": 0.0183, "step": 425700 }, { "epoch": 4.548426732197233, "grad_norm": 0.7592479586601257, "learning_rate": 7.211656287389722e-07, "loss": 0.0021, "step": 425710 }, { "epoch": 4.548533575511512, "grad_norm": 0.21803905069828033, "learning_rate": 7.211505607342096e-07, "loss": 0.0006, "step": 425720 }, { "epoch": 4.548640418825792, "grad_norm": 0.5370083451271057, "learning_rate": 7.211354924797512e-07, "loss": 0.0144, "step": 425730 }, { "epoch": 4.548747262140072, "grad_norm": 3.954451084136963, "learning_rate": 7.211204239756141e-07, "loss": 0.0036, "step": 425740 }, { "epoch": 4.548854105454351, "grad_norm": 0.32657238841056824, "learning_rate": 7.211053552218155e-07, "loss": 0.0105, "step": 425750 }, { "epoch": 4.548960948768631, "grad_norm": 8.716361999511719, "learning_rate": 7.210902862183721e-07, "loss": 0.0121, "step": 425760 }, { "epoch": 4.54906779208291, "grad_norm": 0.01009284146130085, "learning_rate": 7.210752169653013e-07, "loss": 0.0071, "step": 425770 }, { "epoch": 4.54917463539719, "grad_norm": 0.2525164484977722, "learning_rate": 7.210601474626197e-07, "loss": 0.0107, "step": 425780 }, { "epoch": 4.549281478711469, "grad_norm": 0.5171775221824646, "learning_rate": 7.210450777103445e-07, "loss": 0.0117, "step": 425790 }, { "epoch": 4.54938832202575, "grad_norm": 1.7534900903701782, "learning_rate": 7.210300077084928e-07, "loss": 0.009, "step": 425800 }, { "epoch": 4.549495165340029, "grad_norm": 0.005064881406724453, "learning_rate": 7.210149374570817e-07, "loss": 0.0033, "step": 425810 }, { "epoch": 4.5496020086543085, "grad_norm": 0.006030951160937548, "learning_rate": 7.209998669561278e-07, "loss": 0.0055, "step": 425820 }, { "epoch": 4.549708851968588, "grad_norm": 0.47159531712532043, "learning_rate": 7.209847962056485e-07, "loss": 0.0021, "step": 425830 }, { "epoch": 4.5498156952828674, "grad_norm": 9.565526008605957, "learning_rate": 7.209697252056606e-07, "loss": 0.0258, "step": 425840 }, { "epoch": 4.549922538597147, "grad_norm": 10.388503074645996, "learning_rate": 7.209546539561813e-07, "loss": 0.0076, "step": 425850 }, { "epoch": 4.550029381911427, "grad_norm": 7.047033786773682, "learning_rate": 7.209395824572274e-07, "loss": 0.0364, "step": 425860 }, { "epoch": 4.550136225225707, "grad_norm": 0.22511814534664154, "learning_rate": 7.209245107088163e-07, "loss": 0.0007, "step": 425870 }, { "epoch": 4.550243068539986, "grad_norm": 0.6817896962165833, "learning_rate": 7.209094387109644e-07, "loss": 0.001, "step": 425880 }, { "epoch": 4.550349911854266, "grad_norm": 0.0023967523593455553, "learning_rate": 7.208943664636893e-07, "loss": 0.0107, "step": 425890 }, { "epoch": 4.550456755168545, "grad_norm": 0.019389891996979713, "learning_rate": 7.208792939670077e-07, "loss": 0.0131, "step": 425900 }, { "epoch": 4.5505635984828245, "grad_norm": 3.8745057582855225, "learning_rate": 7.208642212209365e-07, "loss": 0.0122, "step": 425910 }, { "epoch": 4.550670441797105, "grad_norm": 5.558736801147461, "learning_rate": 7.208491482254932e-07, "loss": 0.0466, "step": 425920 }, { "epoch": 4.550777285111384, "grad_norm": 0.052672889083623886, "learning_rate": 7.208340749806946e-07, "loss": 0.0013, "step": 425930 }, { "epoch": 4.550884128425664, "grad_norm": 3.989731788635254, "learning_rate": 7.208190014865574e-07, "loss": 0.0259, "step": 425940 }, { "epoch": 4.550990971739943, "grad_norm": 0.0006652068695984781, "learning_rate": 7.20803927743099e-07, "loss": 0.0012, "step": 425950 }, { "epoch": 4.551097815054223, "grad_norm": 0.008775983937084675, "learning_rate": 7.207888537503364e-07, "loss": 0.0013, "step": 425960 }, { "epoch": 4.551204658368503, "grad_norm": 0.11495493352413177, "learning_rate": 7.207737795082865e-07, "loss": 0.0005, "step": 425970 }, { "epoch": 4.5513115016827825, "grad_norm": 0.0034350750502198935, "learning_rate": 7.207587050169664e-07, "loss": 0.0416, "step": 425980 }, { "epoch": 4.551418344997062, "grad_norm": 0.0635806992650032, "learning_rate": 7.207436302763928e-07, "loss": 0.0068, "step": 425990 }, { "epoch": 4.5515251883113415, "grad_norm": 1.6206614971160889, "learning_rate": 7.207285552865832e-07, "loss": 0.0026, "step": 426000 }, { "epoch": 4.551632031625621, "grad_norm": 3.1164422035217285, "learning_rate": 7.207134800475544e-07, "loss": 0.0083, "step": 426010 }, { "epoch": 4.5517388749399, "grad_norm": 7.047910690307617, "learning_rate": 7.206984045593232e-07, "loss": 0.0193, "step": 426020 }, { "epoch": 4.55184571825418, "grad_norm": 1.5717378854751587, "learning_rate": 7.206833288219071e-07, "loss": 0.0064, "step": 426030 }, { "epoch": 4.55195256156846, "grad_norm": 0.003967795986682177, "learning_rate": 7.206682528353229e-07, "loss": 0.0028, "step": 426040 }, { "epoch": 4.55205940488274, "grad_norm": 0.006132043898105621, "learning_rate": 7.206531765995873e-07, "loss": 0.0037, "step": 426050 }, { "epoch": 4.552166248197019, "grad_norm": 0.030242344364523888, "learning_rate": 7.206381001147179e-07, "loss": 0.0009, "step": 426060 }, { "epoch": 4.5522730915112986, "grad_norm": 0.004399422090500593, "learning_rate": 7.206230233807314e-07, "loss": 0.0088, "step": 426070 }, { "epoch": 4.552379934825578, "grad_norm": 5.39506721496582, "learning_rate": 7.206079463976445e-07, "loss": 0.0246, "step": 426080 }, { "epoch": 4.552486778139858, "grad_norm": 0.020535144954919815, "learning_rate": 7.20592869165475e-07, "loss": 0.0017, "step": 426090 }, { "epoch": 4.552593621454138, "grad_norm": 0.0402272492647171, "learning_rate": 7.205777916842393e-07, "loss": 0.0041, "step": 426100 }, { "epoch": 4.552700464768417, "grad_norm": 0.5949289202690125, "learning_rate": 7.205627139539546e-07, "loss": 0.0107, "step": 426110 }, { "epoch": 4.552807308082697, "grad_norm": 3.2850711345672607, "learning_rate": 7.205476359746381e-07, "loss": 0.0171, "step": 426120 }, { "epoch": 4.552914151396976, "grad_norm": 0.1832323968410492, "learning_rate": 7.205325577463065e-07, "loss": 0.0019, "step": 426130 }, { "epoch": 4.553020994711256, "grad_norm": 0.007800297345966101, "learning_rate": 7.205174792689771e-07, "loss": 0.0432, "step": 426140 }, { "epoch": 4.553127838025535, "grad_norm": 0.012141651473939419, "learning_rate": 7.205024005426668e-07, "loss": 0.0046, "step": 426150 }, { "epoch": 4.5532346813398155, "grad_norm": 0.0066546108573675156, "learning_rate": 7.204873215673926e-07, "loss": 0.0002, "step": 426160 }, { "epoch": 4.553341524654095, "grad_norm": 3.782590627670288, "learning_rate": 7.204722423431717e-07, "loss": 0.0043, "step": 426170 }, { "epoch": 4.553448367968374, "grad_norm": 3.499044418334961, "learning_rate": 7.20457162870021e-07, "loss": 0.0057, "step": 426180 }, { "epoch": 4.553555211282654, "grad_norm": 3.385352849960327, "learning_rate": 7.204420831479574e-07, "loss": 0.0062, "step": 426190 }, { "epoch": 4.553662054596933, "grad_norm": 5.920772552490234, "learning_rate": 7.204270031769982e-07, "loss": 0.005, "step": 426200 }, { "epoch": 4.553768897911214, "grad_norm": 0.014048806391656399, "learning_rate": 7.204119229571601e-07, "loss": 0.0029, "step": 426210 }, { "epoch": 4.553875741225493, "grad_norm": 0.03813399374485016, "learning_rate": 7.203968424884603e-07, "loss": 0.0002, "step": 426220 }, { "epoch": 4.553982584539773, "grad_norm": 1.9820911884307861, "learning_rate": 7.203817617709161e-07, "loss": 0.0045, "step": 426230 }, { "epoch": 4.554089427854052, "grad_norm": 0.0034473505802452564, "learning_rate": 7.203666808045438e-07, "loss": 0.0035, "step": 426240 }, { "epoch": 4.5541962711683315, "grad_norm": 3.8875632286071777, "learning_rate": 7.203515995893613e-07, "loss": 0.0028, "step": 426250 }, { "epoch": 4.554303114482611, "grad_norm": 5.247346878051758, "learning_rate": 7.203365181253851e-07, "loss": 0.0075, "step": 426260 }, { "epoch": 4.55440995779689, "grad_norm": 0.7172396183013916, "learning_rate": 7.203214364126324e-07, "loss": 0.0114, "step": 426270 }, { "epoch": 4.554516801111171, "grad_norm": 0.008895193226635456, "learning_rate": 7.203063544511199e-07, "loss": 0.0071, "step": 426280 }, { "epoch": 4.55462364442545, "grad_norm": 0.007982867769896984, "learning_rate": 7.202912722408652e-07, "loss": 0.0016, "step": 426290 }, { "epoch": 4.55473048773973, "grad_norm": 0.28020867705345154, "learning_rate": 7.202761897818848e-07, "loss": 0.0154, "step": 426300 }, { "epoch": 4.554837331054009, "grad_norm": 0.0014487113803625107, "learning_rate": 7.20261107074196e-07, "loss": 0.0037, "step": 426310 }, { "epoch": 4.554944174368289, "grad_norm": 0.07533919811248779, "learning_rate": 7.20246024117816e-07, "loss": 0.0163, "step": 426320 }, { "epoch": 4.555051017682569, "grad_norm": 7.199512481689453, "learning_rate": 7.202309409127613e-07, "loss": 0.0605, "step": 426330 }, { "epoch": 4.555157860996848, "grad_norm": 5.965936183929443, "learning_rate": 7.202158574590495e-07, "loss": 0.0058, "step": 426340 }, { "epoch": 4.555264704311128, "grad_norm": 7.579892635345459, "learning_rate": 7.202007737566973e-07, "loss": 0.0437, "step": 426350 }, { "epoch": 4.555371547625407, "grad_norm": 0.7513131499290466, "learning_rate": 7.201856898057219e-07, "loss": 0.0008, "step": 426360 }, { "epoch": 4.555478390939687, "grad_norm": 0.32094159722328186, "learning_rate": 7.201706056061401e-07, "loss": 0.0266, "step": 426370 }, { "epoch": 4.555585234253966, "grad_norm": 0.043906498700380325, "learning_rate": 7.201555211579691e-07, "loss": 0.0012, "step": 426380 }, { "epoch": 4.555692077568246, "grad_norm": 0.00045871760812588036, "learning_rate": 7.201404364612258e-07, "loss": 0.012, "step": 426390 }, { "epoch": 4.555798920882526, "grad_norm": 0.042351994663476944, "learning_rate": 7.201253515159276e-07, "loss": 0.0006, "step": 426400 }, { "epoch": 4.5559057641968055, "grad_norm": 0.0035609714686870575, "learning_rate": 7.20110266322091e-07, "loss": 0.0003, "step": 426410 }, { "epoch": 4.556012607511085, "grad_norm": 0.595678985118866, "learning_rate": 7.200951808797334e-07, "loss": 0.0185, "step": 426420 }, { "epoch": 4.556119450825364, "grad_norm": 0.07127567380666733, "learning_rate": 7.200800951888718e-07, "loss": 0.0044, "step": 426430 }, { "epoch": 4.556226294139644, "grad_norm": 6.107090950012207, "learning_rate": 7.200650092495231e-07, "loss": 0.0052, "step": 426440 }, { "epoch": 4.556333137453924, "grad_norm": 1.0633628368377686, "learning_rate": 7.200499230617043e-07, "loss": 0.0199, "step": 426450 }, { "epoch": 4.556439980768204, "grad_norm": 0.05811896547675133, "learning_rate": 7.200348366254328e-07, "loss": 0.002, "step": 426460 }, { "epoch": 4.556546824082483, "grad_norm": 0.0015247238334268332, "learning_rate": 7.200197499407251e-07, "loss": 0.0184, "step": 426470 }, { "epoch": 4.556653667396763, "grad_norm": 0.18635500967502594, "learning_rate": 7.200046630075986e-07, "loss": 0.0041, "step": 426480 }, { "epoch": 4.556760510711042, "grad_norm": 4.735134124755859, "learning_rate": 7.199895758260701e-07, "loss": 0.0127, "step": 426490 }, { "epoch": 4.5568673540253215, "grad_norm": 9.061760902404785, "learning_rate": 7.199744883961569e-07, "loss": 0.0048, "step": 426500 }, { "epoch": 4.556974197339602, "grad_norm": 4.614725112915039, "learning_rate": 7.199594007178759e-07, "loss": 0.0218, "step": 426510 }, { "epoch": 4.557081040653881, "grad_norm": 0.005352649837732315, "learning_rate": 7.199443127912441e-07, "loss": 0.0155, "step": 426520 }, { "epoch": 4.557187883968161, "grad_norm": 1.252360224723816, "learning_rate": 7.199292246162785e-07, "loss": 0.0055, "step": 426530 }, { "epoch": 4.55729472728244, "grad_norm": 0.0026023220270872116, "learning_rate": 7.199141361929964e-07, "loss": 0.0122, "step": 426540 }, { "epoch": 4.55740157059672, "grad_norm": 0.6884467005729675, "learning_rate": 7.198990475214144e-07, "loss": 0.0067, "step": 426550 }, { "epoch": 4.557508413910999, "grad_norm": 2.9063045978546143, "learning_rate": 7.198839586015499e-07, "loss": 0.0103, "step": 426560 }, { "epoch": 4.5576152572252795, "grad_norm": 0.21135105192661285, "learning_rate": 7.198688694334198e-07, "loss": 0.0067, "step": 426570 }, { "epoch": 4.557722100539559, "grad_norm": 1.1524590253829956, "learning_rate": 7.198537800170413e-07, "loss": 0.0043, "step": 426580 }, { "epoch": 4.557828943853838, "grad_norm": 0.0076684849336743355, "learning_rate": 7.198386903524311e-07, "loss": 0.0195, "step": 426590 }, { "epoch": 4.557935787168118, "grad_norm": 0.011956416070461273, "learning_rate": 7.198236004396064e-07, "loss": 0.0032, "step": 426600 }, { "epoch": 4.558042630482397, "grad_norm": 0.21320104598999023, "learning_rate": 7.198085102785845e-07, "loss": 0.0359, "step": 426610 }, { "epoch": 4.558149473796677, "grad_norm": 0.000759309739805758, "learning_rate": 7.197934198693819e-07, "loss": 0.0035, "step": 426620 }, { "epoch": 4.558256317110957, "grad_norm": 1.3947417736053467, "learning_rate": 7.197783292120162e-07, "loss": 0.0207, "step": 426630 }, { "epoch": 4.558363160425237, "grad_norm": 6.423970699310303, "learning_rate": 7.197632383065041e-07, "loss": 0.0026, "step": 426640 }, { "epoch": 4.558470003739516, "grad_norm": 0.006214747671037912, "learning_rate": 7.197481471528626e-07, "loss": 0.0195, "step": 426650 }, { "epoch": 4.5585768470537955, "grad_norm": 2.1697208881378174, "learning_rate": 7.19733055751109e-07, "loss": 0.0154, "step": 426660 }, { "epoch": 4.558683690368075, "grad_norm": 1.120836853981018, "learning_rate": 7.197179641012602e-07, "loss": 0.0009, "step": 426670 }, { "epoch": 4.558790533682355, "grad_norm": 0.5552549362182617, "learning_rate": 7.197028722033331e-07, "loss": 0.0055, "step": 426680 }, { "epoch": 4.558897376996635, "grad_norm": 0.002473273314535618, "learning_rate": 7.19687780057345e-07, "loss": 0.0056, "step": 426690 }, { "epoch": 4.559004220310914, "grad_norm": 0.03473445400595665, "learning_rate": 7.196726876633127e-07, "loss": 0.0052, "step": 426700 }, { "epoch": 4.559111063625194, "grad_norm": 0.022226236760616302, "learning_rate": 7.196575950212534e-07, "loss": 0.0076, "step": 426710 }, { "epoch": 4.559217906939473, "grad_norm": 0.009950220584869385, "learning_rate": 7.196425021311843e-07, "loss": 0.0028, "step": 426720 }, { "epoch": 4.559324750253753, "grad_norm": 0.5342055559158325, "learning_rate": 7.196274089931221e-07, "loss": 0.0236, "step": 426730 }, { "epoch": 4.559431593568032, "grad_norm": 0.04488244652748108, "learning_rate": 7.196123156070838e-07, "loss": 0.0122, "step": 426740 }, { "epoch": 4.559538436882312, "grad_norm": 0.006882959045469761, "learning_rate": 7.195972219730868e-07, "loss": 0.0049, "step": 426750 }, { "epoch": 4.559645280196592, "grad_norm": 0.014346117153763771, "learning_rate": 7.19582128091148e-07, "loss": 0.0012, "step": 426760 }, { "epoch": 4.559752123510871, "grad_norm": 0.9365466833114624, "learning_rate": 7.195670339612843e-07, "loss": 0.0047, "step": 426770 }, { "epoch": 4.559858966825151, "grad_norm": 10.959918022155762, "learning_rate": 7.19551939583513e-07, "loss": 0.0079, "step": 426780 }, { "epoch": 4.55996581013943, "grad_norm": 0.004581090062856674, "learning_rate": 7.195368449578507e-07, "loss": 0.0048, "step": 426790 }, { "epoch": 4.560072653453711, "grad_norm": 0.23097695410251617, "learning_rate": 7.195217500843149e-07, "loss": 0.0008, "step": 426800 }, { "epoch": 4.56017949676799, "grad_norm": 0.0005227744695730507, "learning_rate": 7.195066549629225e-07, "loss": 0.0253, "step": 426810 }, { "epoch": 4.5602863400822695, "grad_norm": 0.004051830619573593, "learning_rate": 7.194915595936904e-07, "loss": 0.0096, "step": 426820 }, { "epoch": 4.560393183396549, "grad_norm": 0.46884435415267944, "learning_rate": 7.19476463976636e-07, "loss": 0.0015, "step": 426830 }, { "epoch": 4.560500026710828, "grad_norm": 0.010929490439593792, "learning_rate": 7.194613681117758e-07, "loss": 0.0025, "step": 426840 }, { "epoch": 4.560606870025108, "grad_norm": 4.684654712677002, "learning_rate": 7.194462719991274e-07, "loss": 0.0084, "step": 426850 }, { "epoch": 4.560713713339387, "grad_norm": 0.01615835912525654, "learning_rate": 7.194311756387075e-07, "loss": 0.0081, "step": 426860 }, { "epoch": 4.560820556653668, "grad_norm": 4.52765417098999, "learning_rate": 7.194160790305332e-07, "loss": 0.0256, "step": 426870 }, { "epoch": 4.560927399967947, "grad_norm": 0.0027665572706609964, "learning_rate": 7.194009821746217e-07, "loss": 0.0118, "step": 426880 }, { "epoch": 4.561034243282227, "grad_norm": 0.003386172465980053, "learning_rate": 7.193858850709898e-07, "loss": 0.0039, "step": 426890 }, { "epoch": 4.561141086596506, "grad_norm": 0.003557586809620261, "learning_rate": 7.193707877196547e-07, "loss": 0.0001, "step": 426900 }, { "epoch": 4.5612479299107855, "grad_norm": 0.09232234209775925, "learning_rate": 7.193556901206336e-07, "loss": 0.0015, "step": 426910 }, { "epoch": 4.561354773225066, "grad_norm": 7.035635948181152, "learning_rate": 7.193405922739433e-07, "loss": 0.007, "step": 426920 }, { "epoch": 4.561461616539345, "grad_norm": 0.37221473455429077, "learning_rate": 7.193254941796007e-07, "loss": 0.0062, "step": 426930 }, { "epoch": 4.561568459853625, "grad_norm": 0.22450090944766998, "learning_rate": 7.193103958376233e-07, "loss": 0.0027, "step": 426940 }, { "epoch": 4.561675303167904, "grad_norm": 0.009840435348451138, "learning_rate": 7.19295297248028e-07, "loss": 0.0127, "step": 426950 }, { "epoch": 4.561782146482184, "grad_norm": 0.0033604635391384363, "learning_rate": 7.192801984108314e-07, "loss": 0.0069, "step": 426960 }, { "epoch": 4.561888989796463, "grad_norm": 0.0023843615781515837, "learning_rate": 7.192650993260512e-07, "loss": 0.0107, "step": 426970 }, { "epoch": 4.561995833110743, "grad_norm": 0.06678164005279541, "learning_rate": 7.19249999993704e-07, "loss": 0.0281, "step": 426980 }, { "epoch": 4.562102676425023, "grad_norm": 0.29721084237098694, "learning_rate": 7.19234900413807e-07, "loss": 0.0101, "step": 426990 }, { "epoch": 4.562209519739302, "grad_norm": 0.10483352839946747, "learning_rate": 7.192198005863775e-07, "loss": 0.0065, "step": 427000 }, { "epoch": 4.562316363053582, "grad_norm": 0.009423846378922462, "learning_rate": 7.192047005114321e-07, "loss": 0.0311, "step": 427010 }, { "epoch": 4.562423206367861, "grad_norm": 0.010234900750219822, "learning_rate": 7.19189600188988e-07, "loss": 0.0072, "step": 427020 }, { "epoch": 4.562530049682141, "grad_norm": 0.03512337803840637, "learning_rate": 7.191744996190624e-07, "loss": 0.0002, "step": 427030 }, { "epoch": 4.562636892996421, "grad_norm": 0.4647665023803711, "learning_rate": 7.191593988016722e-07, "loss": 0.0031, "step": 427040 }, { "epoch": 4.562743736310701, "grad_norm": 0.45128023624420166, "learning_rate": 7.191442977368345e-07, "loss": 0.0046, "step": 427050 }, { "epoch": 4.56285057962498, "grad_norm": 0.011804534122347832, "learning_rate": 7.191291964245664e-07, "loss": 0.0125, "step": 427060 }, { "epoch": 4.5629574229392595, "grad_norm": 0.5006011724472046, "learning_rate": 7.191140948648848e-07, "loss": 0.0132, "step": 427070 }, { "epoch": 4.563064266253539, "grad_norm": 0.03459852561354637, "learning_rate": 7.190989930578069e-07, "loss": 0.004, "step": 427080 }, { "epoch": 4.5631711095678185, "grad_norm": 0.8796720504760742, "learning_rate": 7.190838910033497e-07, "loss": 0.0084, "step": 427090 }, { "epoch": 4.563277952882098, "grad_norm": 0.1183137595653534, "learning_rate": 7.190687887015304e-07, "loss": 0.0018, "step": 427100 }, { "epoch": 4.563384796196378, "grad_norm": 0.03150298446416855, "learning_rate": 7.190536861523656e-07, "loss": 0.0219, "step": 427110 }, { "epoch": 4.563491639510658, "grad_norm": 3.6128759384155273, "learning_rate": 7.19038583355873e-07, "loss": 0.002, "step": 427120 }, { "epoch": 4.563598482824937, "grad_norm": 0.0004134404007345438, "learning_rate": 7.190234803120689e-07, "loss": 0.0006, "step": 427130 }, { "epoch": 4.563705326139217, "grad_norm": 0.3070845305919647, "learning_rate": 7.190083770209711e-07, "loss": 0.005, "step": 427140 }, { "epoch": 4.563812169453496, "grad_norm": 1.0334196090698242, "learning_rate": 7.189932734825961e-07, "loss": 0.0031, "step": 427150 }, { "epoch": 4.5639190127677765, "grad_norm": 2.597081422805786, "learning_rate": 7.189781696969614e-07, "loss": 0.0111, "step": 427160 }, { "epoch": 4.564025856082056, "grad_norm": 7.337990760803223, "learning_rate": 7.189630656640837e-07, "loss": 0.005, "step": 427170 }, { "epoch": 4.564132699396335, "grad_norm": 0.013065217062830925, "learning_rate": 7.189479613839801e-07, "loss": 0.0016, "step": 427180 }, { "epoch": 4.564239542710615, "grad_norm": 0.026986965909600258, "learning_rate": 7.189328568566677e-07, "loss": 0.003, "step": 427190 }, { "epoch": 4.564346386024894, "grad_norm": 0.09005118906497955, "learning_rate": 7.189177520821636e-07, "loss": 0.0003, "step": 427200 }, { "epoch": 4.564453229339174, "grad_norm": 0.02021569013595581, "learning_rate": 7.18902647060485e-07, "loss": 0.0066, "step": 427210 }, { "epoch": 4.564560072653454, "grad_norm": 0.19416381418704987, "learning_rate": 7.188875417916487e-07, "loss": 0.0162, "step": 427220 }, { "epoch": 4.5646669159677336, "grad_norm": 1.263596534729004, "learning_rate": 7.188724362756717e-07, "loss": 0.0025, "step": 427230 }, { "epoch": 4.564773759282013, "grad_norm": 0.002699192613363266, "learning_rate": 7.188573305125714e-07, "loss": 0.0036, "step": 427240 }, { "epoch": 4.5648806025962925, "grad_norm": 5.301203727722168, "learning_rate": 7.188422245023646e-07, "loss": 0.0054, "step": 427250 }, { "epoch": 4.564987445910572, "grad_norm": 0.0013966058613732457, "learning_rate": 7.188271182450685e-07, "loss": 0.0089, "step": 427260 }, { "epoch": 4.565094289224851, "grad_norm": 0.12400791049003601, "learning_rate": 7.188120117407e-07, "loss": 0.0203, "step": 427270 }, { "epoch": 4.565201132539132, "grad_norm": 8.540538787841797, "learning_rate": 7.18796904989276e-07, "loss": 0.0198, "step": 427280 }, { "epoch": 4.565307975853411, "grad_norm": 1.2644808292388916, "learning_rate": 7.187817979908141e-07, "loss": 0.0074, "step": 427290 }, { "epoch": 4.565414819167691, "grad_norm": 0.7410967946052551, "learning_rate": 7.187666907453308e-07, "loss": 0.006, "step": 427300 }, { "epoch": 4.56552166248197, "grad_norm": 0.3149057924747467, "learning_rate": 7.187515832528436e-07, "loss": 0.0002, "step": 427310 }, { "epoch": 4.56562850579625, "grad_norm": 0.09859958291053772, "learning_rate": 7.187364755133693e-07, "loss": 0.0055, "step": 427320 }, { "epoch": 4.565735349110529, "grad_norm": 1.3482803106307983, "learning_rate": 7.187213675269248e-07, "loss": 0.0033, "step": 427330 }, { "epoch": 4.565842192424809, "grad_norm": 0.017947135493159294, "learning_rate": 7.187062592935277e-07, "loss": 0.0009, "step": 427340 }, { "epoch": 4.565949035739089, "grad_norm": 0.008277758955955505, "learning_rate": 7.186911508131945e-07, "loss": 0.0248, "step": 427350 }, { "epoch": 4.566055879053368, "grad_norm": 0.6033152341842651, "learning_rate": 7.186760420859425e-07, "loss": 0.0018, "step": 427360 }, { "epoch": 4.566162722367648, "grad_norm": 0.0013764760224148631, "learning_rate": 7.186609331117889e-07, "loss": 0.0046, "step": 427370 }, { "epoch": 4.566269565681927, "grad_norm": 6.265199184417725, "learning_rate": 7.186458238907504e-07, "loss": 0.0076, "step": 427380 }, { "epoch": 4.566376408996208, "grad_norm": 0.04374564066529274, "learning_rate": 7.186307144228444e-07, "loss": 0.0358, "step": 427390 }, { "epoch": 4.566483252310487, "grad_norm": 0.002049945993348956, "learning_rate": 7.186156047080878e-07, "loss": 0.0006, "step": 427400 }, { "epoch": 4.5665900956247665, "grad_norm": 0.002969275461509824, "learning_rate": 7.186004947464977e-07, "loss": 0.0074, "step": 427410 }, { "epoch": 4.566696938939046, "grad_norm": 0.0636131688952446, "learning_rate": 7.185853845380911e-07, "loss": 0.0042, "step": 427420 }, { "epoch": 4.566803782253325, "grad_norm": 0.007497782818973064, "learning_rate": 7.185702740828852e-07, "loss": 0.0007, "step": 427430 }, { "epoch": 4.566910625567605, "grad_norm": 0.027368027716875076, "learning_rate": 7.185551633808968e-07, "loss": 0.0035, "step": 427440 }, { "epoch": 4.567017468881884, "grad_norm": 0.048437394201755524, "learning_rate": 7.185400524321432e-07, "loss": 0.0003, "step": 427450 }, { "epoch": 4.567124312196165, "grad_norm": 2.0390288829803467, "learning_rate": 7.185249412366415e-07, "loss": 0.0098, "step": 427460 }, { "epoch": 4.567231155510444, "grad_norm": 0.006009182892739773, "learning_rate": 7.185098297944083e-07, "loss": 0.0024, "step": 427470 }, { "epoch": 4.567337998824724, "grad_norm": 0.9551514983177185, "learning_rate": 7.184947181054614e-07, "loss": 0.0048, "step": 427480 }, { "epoch": 4.567444842139003, "grad_norm": 0.008171028457581997, "learning_rate": 7.184796061698173e-07, "loss": 0.0026, "step": 427490 }, { "epoch": 4.5675516854532825, "grad_norm": 0.005949364043772221, "learning_rate": 7.184644939874932e-07, "loss": 0.0039, "step": 427500 }, { "epoch": 4.567658528767563, "grad_norm": 0.0007643171120435, "learning_rate": 7.184493815585062e-07, "loss": 0.0082, "step": 427510 }, { "epoch": 4.567765372081842, "grad_norm": 0.0004706673789769411, "learning_rate": 7.184342688828735e-07, "loss": 0.0093, "step": 427520 }, { "epoch": 4.567872215396122, "grad_norm": 0.00753288809210062, "learning_rate": 7.184191559606119e-07, "loss": 0.0002, "step": 427530 }, { "epoch": 4.567979058710401, "grad_norm": 0.07344979792833328, "learning_rate": 7.184040427917386e-07, "loss": 0.0025, "step": 427540 }, { "epoch": 4.568085902024681, "grad_norm": 0.002711196430027485, "learning_rate": 7.183889293762707e-07, "loss": 0.001, "step": 427550 }, { "epoch": 4.56819274533896, "grad_norm": 0.0019802842289209366, "learning_rate": 7.183738157142251e-07, "loss": 0.0056, "step": 427560 }, { "epoch": 4.56829958865324, "grad_norm": 0.0018579805037006736, "learning_rate": 7.183587018056192e-07, "loss": 0.0049, "step": 427570 }, { "epoch": 4.56840643196752, "grad_norm": 0.02043994329869747, "learning_rate": 7.183435876504697e-07, "loss": 0.006, "step": 427580 }, { "epoch": 4.568513275281799, "grad_norm": 0.0009000285645015538, "learning_rate": 7.183284732487937e-07, "loss": 0.0117, "step": 427590 }, { "epoch": 4.568620118596079, "grad_norm": 0.14866681396961212, "learning_rate": 7.183133586006088e-07, "loss": 0.0301, "step": 427600 }, { "epoch": 4.568726961910358, "grad_norm": 4.0745720863342285, "learning_rate": 7.182982437059313e-07, "loss": 0.0073, "step": 427610 }, { "epoch": 4.568833805224638, "grad_norm": 1.7676923274993896, "learning_rate": 7.182831285647786e-07, "loss": 0.0177, "step": 427620 }, { "epoch": 4.568940648538918, "grad_norm": 0.7486005425453186, "learning_rate": 7.182680131771678e-07, "loss": 0.002, "step": 427630 }, { "epoch": 4.569047491853198, "grad_norm": 0.49090999364852905, "learning_rate": 7.18252897543116e-07, "loss": 0.0207, "step": 427640 }, { "epoch": 4.569154335167477, "grad_norm": 0.32439643144607544, "learning_rate": 7.182377816626401e-07, "loss": 0.0182, "step": 427650 }, { "epoch": 4.5692611784817565, "grad_norm": 0.0016732669901102781, "learning_rate": 7.182226655357574e-07, "loss": 0.0013, "step": 427660 }, { "epoch": 4.569368021796036, "grad_norm": 0.004707605578005314, "learning_rate": 7.182075491624848e-07, "loss": 0.0163, "step": 427670 }, { "epoch": 4.569474865110315, "grad_norm": 0.6605902314186096, "learning_rate": 7.181924325428393e-07, "loss": 0.0012, "step": 427680 }, { "epoch": 4.569581708424595, "grad_norm": 5.8667802810668945, "learning_rate": 7.181773156768382e-07, "loss": 0.0061, "step": 427690 }, { "epoch": 4.569688551738875, "grad_norm": 0.3196333050727844, "learning_rate": 7.181621985644985e-07, "loss": 0.0183, "step": 427700 }, { "epoch": 4.569795395053155, "grad_norm": 0.0014305105432868004, "learning_rate": 7.181470812058371e-07, "loss": 0.0024, "step": 427710 }, { "epoch": 4.569902238367434, "grad_norm": 0.0041477070190012455, "learning_rate": 7.181319636008713e-07, "loss": 0.0125, "step": 427720 }, { "epoch": 4.570009081681714, "grad_norm": 5.774749755859375, "learning_rate": 7.181168457496179e-07, "loss": 0.005, "step": 427730 }, { "epoch": 4.570115924995993, "grad_norm": 0.013898821547627449, "learning_rate": 7.181017276520942e-07, "loss": 0.0135, "step": 427740 }, { "epoch": 4.570222768310273, "grad_norm": 0.0016324014868587255, "learning_rate": 7.180866093083172e-07, "loss": 0.003, "step": 427750 }, { "epoch": 4.570329611624553, "grad_norm": 0.0026117342058569193, "learning_rate": 7.180714907183038e-07, "loss": 0.0013, "step": 427760 }, { "epoch": 4.570436454938832, "grad_norm": 0.0007791226380504668, "learning_rate": 7.180563718820715e-07, "loss": 0.0045, "step": 427770 }, { "epoch": 4.570543298253112, "grad_norm": 0.4183652997016907, "learning_rate": 7.180412527996369e-07, "loss": 0.0016, "step": 427780 }, { "epoch": 4.570650141567391, "grad_norm": 0.2639467716217041, "learning_rate": 7.180261334710173e-07, "loss": 0.0187, "step": 427790 }, { "epoch": 4.570756984881671, "grad_norm": 4.4977312088012695, "learning_rate": 7.180110138962299e-07, "loss": 0.0114, "step": 427800 }, { "epoch": 4.57086382819595, "grad_norm": 0.38516831398010254, "learning_rate": 7.179958940752913e-07, "loss": 0.018, "step": 427810 }, { "epoch": 4.5709706715102305, "grad_norm": 0.8880357146263123, "learning_rate": 7.179807740082191e-07, "loss": 0.0016, "step": 427820 }, { "epoch": 4.57107751482451, "grad_norm": 0.0006984802312217653, "learning_rate": 7.1796565369503e-07, "loss": 0.001, "step": 427830 }, { "epoch": 4.571184358138789, "grad_norm": 0.0009746005525812507, "learning_rate": 7.179505331357413e-07, "loss": 0.0105, "step": 427840 }, { "epoch": 4.571291201453069, "grad_norm": 0.0014813028974458575, "learning_rate": 7.179354123303701e-07, "loss": 0.0016, "step": 427850 }, { "epoch": 4.571398044767348, "grad_norm": 1.4918358325958252, "learning_rate": 7.179202912789334e-07, "loss": 0.0035, "step": 427860 }, { "epoch": 4.571504888081629, "grad_norm": 0.002387585584074259, "learning_rate": 7.17905169981448e-07, "loss": 0.0036, "step": 427870 }, { "epoch": 4.571611731395908, "grad_norm": 0.014658474363386631, "learning_rate": 7.178900484379314e-07, "loss": 0.0163, "step": 427880 }, { "epoch": 4.571718574710188, "grad_norm": 1.7902673482894897, "learning_rate": 7.178749266484006e-07, "loss": 0.0035, "step": 427890 }, { "epoch": 4.571825418024467, "grad_norm": 0.21047143638134003, "learning_rate": 7.178598046128723e-07, "loss": 0.0007, "step": 427900 }, { "epoch": 4.5719322613387465, "grad_norm": 15.463998794555664, "learning_rate": 7.17844682331364e-07, "loss": 0.0292, "step": 427910 }, { "epoch": 4.572039104653026, "grad_norm": 0.0037219112273305655, "learning_rate": 7.178295598038926e-07, "loss": 0.0002, "step": 427920 }, { "epoch": 4.572145947967306, "grad_norm": 0.07647373527288437, "learning_rate": 7.178144370304751e-07, "loss": 0.0002, "step": 427930 }, { "epoch": 4.572252791281586, "grad_norm": 0.020411916077136993, "learning_rate": 7.177993140111288e-07, "loss": 0.0023, "step": 427940 }, { "epoch": 4.572359634595865, "grad_norm": 0.020875489339232445, "learning_rate": 7.177841907458706e-07, "loss": 0.0117, "step": 427950 }, { "epoch": 4.572466477910145, "grad_norm": 0.0069196149706840515, "learning_rate": 7.177690672347175e-07, "loss": 0.0002, "step": 427960 }, { "epoch": 4.572573321224424, "grad_norm": 0.007112601306289434, "learning_rate": 7.177539434776868e-07, "loss": 0.0154, "step": 427970 }, { "epoch": 4.572680164538704, "grad_norm": 0.025146760046482086, "learning_rate": 7.177388194747954e-07, "loss": 0.0129, "step": 427980 }, { "epoch": 4.572787007852984, "grad_norm": 0.0714942216873169, "learning_rate": 7.177236952260605e-07, "loss": 0.0009, "step": 427990 }, { "epoch": 4.572893851167263, "grad_norm": 0.045393165200948715, "learning_rate": 7.17708570731499e-07, "loss": 0.0008, "step": 428000 }, { "epoch": 4.573000694481543, "grad_norm": 0.29367595911026, "learning_rate": 7.176934459911283e-07, "loss": 0.0045, "step": 428010 }, { "epoch": 4.573107537795822, "grad_norm": 0.003168320283293724, "learning_rate": 7.17678321004965e-07, "loss": 0.0105, "step": 428020 }, { "epoch": 4.573214381110102, "grad_norm": 0.00149813131429255, "learning_rate": 7.176631957730267e-07, "loss": 0.0266, "step": 428030 }, { "epoch": 4.573321224424381, "grad_norm": 1.4909560680389404, "learning_rate": 7.176480702953301e-07, "loss": 0.0138, "step": 428040 }, { "epoch": 4.573428067738662, "grad_norm": 0.0036736915353685617, "learning_rate": 7.176329445718924e-07, "loss": 0.0081, "step": 428050 }, { "epoch": 4.573534911052941, "grad_norm": 3.766641616821289, "learning_rate": 7.176178186027306e-07, "loss": 0.0133, "step": 428060 }, { "epoch": 4.5736417543672205, "grad_norm": 0.3333223760128021, "learning_rate": 7.176026923878619e-07, "loss": 0.0011, "step": 428070 }, { "epoch": 4.5737485976815, "grad_norm": 4.0496439933776855, "learning_rate": 7.175875659273035e-07, "loss": 0.0089, "step": 428080 }, { "epoch": 4.5738554409957795, "grad_norm": 3.265393018722534, "learning_rate": 7.175724392210722e-07, "loss": 0.0056, "step": 428090 }, { "epoch": 4.573962284310059, "grad_norm": 0.0007149073062464595, "learning_rate": 7.175573122691851e-07, "loss": 0.0034, "step": 428100 }, { "epoch": 4.574069127624339, "grad_norm": 4.808971881866455, "learning_rate": 7.175421850716597e-07, "loss": 0.0173, "step": 428110 }, { "epoch": 4.574175970938619, "grad_norm": 0.30465173721313477, "learning_rate": 7.175270576285124e-07, "loss": 0.0027, "step": 428120 }, { "epoch": 4.574282814252898, "grad_norm": 2.1854748725891113, "learning_rate": 7.175119299397607e-07, "loss": 0.0054, "step": 428130 }, { "epoch": 4.574389657567178, "grad_norm": 3.224444627761841, "learning_rate": 7.174968020054216e-07, "loss": 0.0068, "step": 428140 }, { "epoch": 4.574496500881457, "grad_norm": 0.7670411467552185, "learning_rate": 7.174816738255124e-07, "loss": 0.0106, "step": 428150 }, { "epoch": 4.5746033441957366, "grad_norm": 1.0850458145141602, "learning_rate": 7.174665454000497e-07, "loss": 0.0002, "step": 428160 }, { "epoch": 4.574710187510017, "grad_norm": 0.009646035730838776, "learning_rate": 7.174514167290511e-07, "loss": 0.0095, "step": 428170 }, { "epoch": 4.574817030824296, "grad_norm": 0.8764134049415588, "learning_rate": 7.174362878125334e-07, "loss": 0.0205, "step": 428180 }, { "epoch": 4.574923874138576, "grad_norm": 4.939123153686523, "learning_rate": 7.174211586505136e-07, "loss": 0.0015, "step": 428190 }, { "epoch": 4.575030717452855, "grad_norm": 0.030360978096723557, "learning_rate": 7.174060292430089e-07, "loss": 0.0079, "step": 428200 }, { "epoch": 4.575137560767135, "grad_norm": 0.02273489721119404, "learning_rate": 7.173908995900366e-07, "loss": 0.0031, "step": 428210 }, { "epoch": 4.575244404081415, "grad_norm": 0.16066129505634308, "learning_rate": 7.173757696916133e-07, "loss": 0.0138, "step": 428220 }, { "epoch": 4.5753512473956945, "grad_norm": 0.0027345011476427317, "learning_rate": 7.173606395477565e-07, "loss": 0.0108, "step": 428230 }, { "epoch": 4.575458090709974, "grad_norm": 0.01661190576851368, "learning_rate": 7.173455091584832e-07, "loss": 0.0031, "step": 428240 }, { "epoch": 4.5755649340242535, "grad_norm": 0.005624738056212664, "learning_rate": 7.173303785238101e-07, "loss": 0.0078, "step": 428250 }, { "epoch": 4.575671777338533, "grad_norm": 3.417797088623047, "learning_rate": 7.173152476437549e-07, "loss": 0.0069, "step": 428260 }, { "epoch": 4.575778620652812, "grad_norm": 0.029687369242310524, "learning_rate": 7.173001165183343e-07, "loss": 0.0255, "step": 428270 }, { "epoch": 4.575885463967092, "grad_norm": 1.2695049047470093, "learning_rate": 7.172849851475655e-07, "loss": 0.0078, "step": 428280 }, { "epoch": 4.575992307281372, "grad_norm": 1.8116847276687622, "learning_rate": 7.172698535314656e-07, "loss": 0.0015, "step": 428290 }, { "epoch": 4.576099150595652, "grad_norm": 10.741144180297852, "learning_rate": 7.172547216700515e-07, "loss": 0.0074, "step": 428300 }, { "epoch": 4.576205993909931, "grad_norm": 0.06597438454627991, "learning_rate": 7.172395895633406e-07, "loss": 0.0027, "step": 428310 }, { "epoch": 4.576312837224211, "grad_norm": 0.08124605566263199, "learning_rate": 7.172244572113496e-07, "loss": 0.024, "step": 428320 }, { "epoch": 4.57641968053849, "grad_norm": 0.36073294281959534, "learning_rate": 7.172093246140957e-07, "loss": 0.002, "step": 428330 }, { "epoch": 4.57652652385277, "grad_norm": 0.3364577889442444, "learning_rate": 7.171941917715965e-07, "loss": 0.0121, "step": 428340 }, { "epoch": 4.57663336716705, "grad_norm": 0.005624860990792513, "learning_rate": 7.171790586838684e-07, "loss": 0.0291, "step": 428350 }, { "epoch": 4.576740210481329, "grad_norm": 0.00357438693754375, "learning_rate": 7.171639253509287e-07, "loss": 0.0072, "step": 428360 }, { "epoch": 4.576847053795609, "grad_norm": 0.07993508875370026, "learning_rate": 7.171487917727947e-07, "loss": 0.0034, "step": 428370 }, { "epoch": 4.576953897109888, "grad_norm": 0.0028997445479035378, "learning_rate": 7.171336579494832e-07, "loss": 0.0063, "step": 428380 }, { "epoch": 4.577060740424168, "grad_norm": 8.795721054077148, "learning_rate": 7.171185238810115e-07, "loss": 0.0136, "step": 428390 }, { "epoch": 4.577167583738447, "grad_norm": 0.043158646672964096, "learning_rate": 7.171033895673966e-07, "loss": 0.0031, "step": 428400 }, { "epoch": 4.5772744270527275, "grad_norm": 0.10902076959609985, "learning_rate": 7.170882550086555e-07, "loss": 0.0023, "step": 428410 }, { "epoch": 4.577381270367007, "grad_norm": 0.030009381473064423, "learning_rate": 7.170731202048054e-07, "loss": 0.0044, "step": 428420 }, { "epoch": 4.577488113681286, "grad_norm": 5.52191162109375, "learning_rate": 7.170579851558633e-07, "loss": 0.0094, "step": 428430 }, { "epoch": 4.577594956995566, "grad_norm": 0.01184125803411007, "learning_rate": 7.170428498618464e-07, "loss": 0.0211, "step": 428440 }, { "epoch": 4.577701800309845, "grad_norm": 1.6037112474441528, "learning_rate": 7.170277143227719e-07, "loss": 0.068, "step": 428450 }, { "epoch": 4.577808643624126, "grad_norm": 0.05684778094291687, "learning_rate": 7.170125785386565e-07, "loss": 0.0054, "step": 428460 }, { "epoch": 4.577915486938405, "grad_norm": 1.5302047729492188, "learning_rate": 7.169974425095176e-07, "loss": 0.0072, "step": 428470 }, { "epoch": 4.578022330252685, "grad_norm": 1.0533040761947632, "learning_rate": 7.169823062353723e-07, "loss": 0.0046, "step": 428480 }, { "epoch": 4.578129173566964, "grad_norm": 0.0036706700921058655, "learning_rate": 7.169671697162375e-07, "loss": 0.0014, "step": 428490 }, { "epoch": 4.5782360168812435, "grad_norm": 0.29940375685691833, "learning_rate": 7.169520329521304e-07, "loss": 0.0105, "step": 428500 }, { "epoch": 4.578342860195523, "grad_norm": 0.0018514335388317704, "learning_rate": 7.169368959430681e-07, "loss": 0.0136, "step": 428510 }, { "epoch": 4.578449703509802, "grad_norm": 0.008231647312641144, "learning_rate": 7.169217586890677e-07, "loss": 0.0008, "step": 428520 }, { "epoch": 4.578556546824083, "grad_norm": 1.8525129556655884, "learning_rate": 7.169066211901462e-07, "loss": 0.0004, "step": 428530 }, { "epoch": 4.578663390138362, "grad_norm": 0.06430160254240036, "learning_rate": 7.168914834463209e-07, "loss": 0.0077, "step": 428540 }, { "epoch": 4.578770233452642, "grad_norm": 0.34969228506088257, "learning_rate": 7.168763454576085e-07, "loss": 0.0048, "step": 428550 }, { "epoch": 4.578877076766921, "grad_norm": 3.380967378616333, "learning_rate": 7.168612072240265e-07, "loss": 0.0141, "step": 428560 }, { "epoch": 4.578983920081201, "grad_norm": 0.0026219289284199476, "learning_rate": 7.168460687455919e-07, "loss": 0.0094, "step": 428570 }, { "epoch": 4.579090763395481, "grad_norm": 0.05854495242238045, "learning_rate": 7.168309300223217e-07, "loss": 0.0028, "step": 428580 }, { "epoch": 4.57919760670976, "grad_norm": 0.01511660497635603, "learning_rate": 7.168157910542328e-07, "loss": 0.006, "step": 428590 }, { "epoch": 4.57930445002404, "grad_norm": 0.05577301234006882, "learning_rate": 7.168006518413428e-07, "loss": 0.0119, "step": 428600 }, { "epoch": 4.579411293338319, "grad_norm": 0.2409151792526245, "learning_rate": 7.167855123836684e-07, "loss": 0.0062, "step": 428610 }, { "epoch": 4.579518136652599, "grad_norm": 0.7157065868377686, "learning_rate": 7.167703726812266e-07, "loss": 0.0059, "step": 428620 }, { "epoch": 4.579624979966878, "grad_norm": 0.001803106744773686, "learning_rate": 7.167552327340348e-07, "loss": 0.0067, "step": 428630 }, { "epoch": 4.579731823281158, "grad_norm": 0.01560033205896616, "learning_rate": 7.167400925421101e-07, "loss": 0.0019, "step": 428640 }, { "epoch": 4.579838666595438, "grad_norm": 0.7331879138946533, "learning_rate": 7.167249521054693e-07, "loss": 0.0035, "step": 428650 }, { "epoch": 4.5799455099097175, "grad_norm": 0.5376145839691162, "learning_rate": 7.167098114241299e-07, "loss": 0.0029, "step": 428660 }, { "epoch": 4.580052353223997, "grad_norm": 3.659489631652832, "learning_rate": 7.166946704981085e-07, "loss": 0.0029, "step": 428670 }, { "epoch": 4.580159196538276, "grad_norm": 0.007069041486829519, "learning_rate": 7.166795293274227e-07, "loss": 0.0264, "step": 428680 }, { "epoch": 4.580266039852556, "grad_norm": 0.5262117981910706, "learning_rate": 7.166643879120893e-07, "loss": 0.01, "step": 428690 }, { "epoch": 4.580372883166836, "grad_norm": 2.986128807067871, "learning_rate": 7.166492462521254e-07, "loss": 0.0296, "step": 428700 }, { "epoch": 4.580479726481116, "grad_norm": 0.04861263930797577, "learning_rate": 7.166341043475481e-07, "loss": 0.001, "step": 428710 }, { "epoch": 4.580586569795395, "grad_norm": 2.5102789402008057, "learning_rate": 7.166189621983747e-07, "loss": 0.0124, "step": 428720 }, { "epoch": 4.580693413109675, "grad_norm": 0.001978559885174036, "learning_rate": 7.166038198046219e-07, "loss": 0.0004, "step": 428730 }, { "epoch": 4.580800256423954, "grad_norm": 1.4688010215759277, "learning_rate": 7.165886771663072e-07, "loss": 0.0129, "step": 428740 }, { "epoch": 4.5809070997382335, "grad_norm": 0.33414432406425476, "learning_rate": 7.165735342834476e-07, "loss": 0.0149, "step": 428750 }, { "epoch": 4.581013943052514, "grad_norm": 0.002513619139790535, "learning_rate": 7.1655839115606e-07, "loss": 0.0038, "step": 428760 }, { "epoch": 4.581120786366793, "grad_norm": 1.0262730121612549, "learning_rate": 7.165432477841616e-07, "loss": 0.0093, "step": 428770 }, { "epoch": 4.581227629681073, "grad_norm": 0.006812904961407185, "learning_rate": 7.165281041677697e-07, "loss": 0.0019, "step": 428780 }, { "epoch": 4.581334472995352, "grad_norm": 0.04079243168234825, "learning_rate": 7.165129603069012e-07, "loss": 0.0132, "step": 428790 }, { "epoch": 4.581441316309632, "grad_norm": 0.006805336568504572, "learning_rate": 7.164978162015732e-07, "loss": 0.0121, "step": 428800 }, { "epoch": 4.581548159623911, "grad_norm": 0.013204777613282204, "learning_rate": 7.164826718518028e-07, "loss": 0.0018, "step": 428810 }, { "epoch": 4.5816550029381915, "grad_norm": 5.391684055328369, "learning_rate": 7.164675272576071e-07, "loss": 0.037, "step": 428820 }, { "epoch": 4.581761846252471, "grad_norm": 0.291995108127594, "learning_rate": 7.164523824190032e-07, "loss": 0.012, "step": 428830 }, { "epoch": 4.58186868956675, "grad_norm": 0.2331850379705429, "learning_rate": 7.164372373360082e-07, "loss": 0.0083, "step": 428840 }, { "epoch": 4.58197553288103, "grad_norm": 5.583688735961914, "learning_rate": 7.164220920086393e-07, "loss": 0.0261, "step": 428850 }, { "epoch": 4.582082376195309, "grad_norm": 2.5448215007781982, "learning_rate": 7.164069464369136e-07, "loss": 0.0231, "step": 428860 }, { "epoch": 4.582189219509589, "grad_norm": 0.07161790132522583, "learning_rate": 7.16391800620848e-07, "loss": 0.0218, "step": 428870 }, { "epoch": 4.582296062823869, "grad_norm": 0.1220572218298912, "learning_rate": 7.163766545604598e-07, "loss": 0.0034, "step": 428880 }, { "epoch": 4.582402906138149, "grad_norm": 8.95971965789795, "learning_rate": 7.16361508255766e-07, "loss": 0.0262, "step": 428890 }, { "epoch": 4.582509749452428, "grad_norm": 5.401689529418945, "learning_rate": 7.163463617067837e-07, "loss": 0.0045, "step": 428900 }, { "epoch": 4.5826165927667075, "grad_norm": 0.33135277032852173, "learning_rate": 7.163312149135301e-07, "loss": 0.0054, "step": 428910 }, { "epoch": 4.582723436080987, "grad_norm": 0.013583335094153881, "learning_rate": 7.163160678760223e-07, "loss": 0.043, "step": 428920 }, { "epoch": 4.582830279395267, "grad_norm": 0.007754098158329725, "learning_rate": 7.163009205942771e-07, "loss": 0.0004, "step": 428930 }, { "epoch": 4.582937122709547, "grad_norm": 0.10068197548389435, "learning_rate": 7.162857730683119e-07, "loss": 0.0039, "step": 428940 }, { "epoch": 4.583043966023826, "grad_norm": 1.636148452758789, "learning_rate": 7.162706252981438e-07, "loss": 0.0105, "step": 428950 }, { "epoch": 4.583150809338106, "grad_norm": 0.06469427049160004, "learning_rate": 7.162554772837899e-07, "loss": 0.0026, "step": 428960 }, { "epoch": 4.583257652652385, "grad_norm": 0.002458810107782483, "learning_rate": 7.162403290252672e-07, "loss": 0.0135, "step": 428970 }, { "epoch": 4.583364495966665, "grad_norm": 1.210891604423523, "learning_rate": 7.162251805225928e-07, "loss": 0.0106, "step": 428980 }, { "epoch": 4.583471339280944, "grad_norm": 4.300581455230713, "learning_rate": 7.162100317757839e-07, "loss": 0.0175, "step": 428990 }, { "epoch": 4.583578182595224, "grad_norm": 0.1695469170808792, "learning_rate": 7.161948827848576e-07, "loss": 0.0129, "step": 429000 }, { "epoch": 4.583685025909504, "grad_norm": 0.7643407583236694, "learning_rate": 7.16179733549831e-07, "loss": 0.0046, "step": 429010 }, { "epoch": 4.583791869223783, "grad_norm": 3.0356903076171875, "learning_rate": 7.161645840707209e-07, "loss": 0.0168, "step": 429020 }, { "epoch": 4.583898712538063, "grad_norm": 0.0021713862661272287, "learning_rate": 7.161494343475449e-07, "loss": 0.0046, "step": 429030 }, { "epoch": 4.584005555852342, "grad_norm": 4.006285190582275, "learning_rate": 7.161342843803198e-07, "loss": 0.0068, "step": 429040 }, { "epoch": 4.584112399166623, "grad_norm": 3.8678290843963623, "learning_rate": 7.161191341690628e-07, "loss": 0.021, "step": 429050 }, { "epoch": 4.584219242480902, "grad_norm": 0.5380112528800964, "learning_rate": 7.16103983713791e-07, "loss": 0.0028, "step": 429060 }, { "epoch": 4.5843260857951815, "grad_norm": 0.15382206439971924, "learning_rate": 7.160888330145214e-07, "loss": 0.0143, "step": 429070 }, { "epoch": 4.584432929109461, "grad_norm": 5.378620624542236, "learning_rate": 7.160736820712713e-07, "loss": 0.0094, "step": 429080 }, { "epoch": 4.5845397724237404, "grad_norm": 0.25714215636253357, "learning_rate": 7.160585308840575e-07, "loss": 0.0048, "step": 429090 }, { "epoch": 4.58464661573802, "grad_norm": 0.04266437143087387, "learning_rate": 7.160433794528976e-07, "loss": 0.0026, "step": 429100 }, { "epoch": 4.584753459052299, "grad_norm": 4.145964622497559, "learning_rate": 7.160282277778082e-07, "loss": 0.0056, "step": 429110 }, { "epoch": 4.58486030236658, "grad_norm": 2.9766643047332764, "learning_rate": 7.160130758588068e-07, "loss": 0.0057, "step": 429120 }, { "epoch": 4.584967145680859, "grad_norm": 0.5974262356758118, "learning_rate": 7.159979236959102e-07, "loss": 0.0072, "step": 429130 }, { "epoch": 4.585073988995139, "grad_norm": 3.3910980224609375, "learning_rate": 7.159827712891357e-07, "loss": 0.0033, "step": 429140 }, { "epoch": 4.585180832309418, "grad_norm": 0.004601082764565945, "learning_rate": 7.159676186385004e-07, "loss": 0.0021, "step": 429150 }, { "epoch": 4.5852876756236975, "grad_norm": 0.5436481237411499, "learning_rate": 7.159524657440212e-07, "loss": 0.0451, "step": 429160 }, { "epoch": 4.585394518937978, "grad_norm": 0.7997849583625793, "learning_rate": 7.159373126057155e-07, "loss": 0.0037, "step": 429170 }, { "epoch": 4.585501362252257, "grad_norm": 3.895174741744995, "learning_rate": 7.159221592236004e-07, "loss": 0.0139, "step": 429180 }, { "epoch": 4.585608205566537, "grad_norm": 0.008979614824056625, "learning_rate": 7.159070055976927e-07, "loss": 0.0163, "step": 429190 }, { "epoch": 4.585715048880816, "grad_norm": 0.0041251094080507755, "learning_rate": 7.158918517280097e-07, "loss": 0.0107, "step": 429200 }, { "epoch": 4.585821892195096, "grad_norm": 0.002585507230833173, "learning_rate": 7.158766976145686e-07, "loss": 0.0133, "step": 429210 }, { "epoch": 4.585928735509375, "grad_norm": 0.0347730852663517, "learning_rate": 7.158615432573864e-07, "loss": 0.0021, "step": 429220 }, { "epoch": 4.586035578823655, "grad_norm": 5.563202381134033, "learning_rate": 7.158463886564802e-07, "loss": 0.0171, "step": 429230 }, { "epoch": 4.586142422137935, "grad_norm": 0.0189280454069376, "learning_rate": 7.158312338118671e-07, "loss": 0.0058, "step": 429240 }, { "epoch": 4.5862492654522145, "grad_norm": 1.2216389179229736, "learning_rate": 7.158160787235642e-07, "loss": 0.0082, "step": 429250 }, { "epoch": 4.586356108766494, "grad_norm": 0.009197399951517582, "learning_rate": 7.158009233915887e-07, "loss": 0.0024, "step": 429260 }, { "epoch": 4.586462952080773, "grad_norm": 0.05985771119594574, "learning_rate": 7.157857678159577e-07, "loss": 0.0172, "step": 429270 }, { "epoch": 4.586569795395053, "grad_norm": 3.56398606300354, "learning_rate": 7.157706119966883e-07, "loss": 0.0099, "step": 429280 }, { "epoch": 4.586676638709333, "grad_norm": 0.4110042452812195, "learning_rate": 7.157554559337976e-07, "loss": 0.0022, "step": 429290 }, { "epoch": 4.586783482023613, "grad_norm": 1.0481071472167969, "learning_rate": 7.157402996273026e-07, "loss": 0.0089, "step": 429300 }, { "epoch": 4.586890325337892, "grad_norm": 0.04389188438653946, "learning_rate": 7.157251430772206e-07, "loss": 0.0035, "step": 429310 }, { "epoch": 4.5869971686521716, "grad_norm": 1.8457499742507935, "learning_rate": 7.157099862835687e-07, "loss": 0.023, "step": 429320 }, { "epoch": 4.587104011966451, "grad_norm": 0.21516284346580505, "learning_rate": 7.156948292463638e-07, "loss": 0.001, "step": 429330 }, { "epoch": 4.5872108552807305, "grad_norm": 8.068840980529785, "learning_rate": 7.156796719656233e-07, "loss": 0.0107, "step": 429340 }, { "epoch": 4.58731769859501, "grad_norm": 0.8503513336181641, "learning_rate": 7.156645144413642e-07, "loss": 0.0383, "step": 429350 }, { "epoch": 4.58742454190929, "grad_norm": 0.43496447801589966, "learning_rate": 7.156493566736033e-07, "loss": 0.0014, "step": 429360 }, { "epoch": 4.58753138522357, "grad_norm": 0.0037610954605042934, "learning_rate": 7.156341986623584e-07, "loss": 0.0145, "step": 429370 }, { "epoch": 4.587638228537849, "grad_norm": 0.18700888752937317, "learning_rate": 7.156190404076458e-07, "loss": 0.0111, "step": 429380 }, { "epoch": 4.587745071852129, "grad_norm": 6.97006368637085, "learning_rate": 7.156038819094834e-07, "loss": 0.0233, "step": 429390 }, { "epoch": 4.587851915166408, "grad_norm": 0.2305273860692978, "learning_rate": 7.155887231678877e-07, "loss": 0.0042, "step": 429400 }, { "epoch": 4.5879587584806885, "grad_norm": 0.011175474151968956, "learning_rate": 7.155735641828762e-07, "loss": 0.0084, "step": 429410 }, { "epoch": 4.588065601794968, "grad_norm": 2.2042605876922607, "learning_rate": 7.155584049544657e-07, "loss": 0.0178, "step": 429420 }, { "epoch": 4.588172445109247, "grad_norm": 0.004864051006734371, "learning_rate": 7.155432454826736e-07, "loss": 0.0028, "step": 429430 }, { "epoch": 4.588279288423527, "grad_norm": 0.0035132593475282192, "learning_rate": 7.15528085767517e-07, "loss": 0.0018, "step": 429440 }, { "epoch": 4.588386131737806, "grad_norm": 0.011923553422093391, "learning_rate": 7.155129258090128e-07, "loss": 0.0207, "step": 429450 }, { "epoch": 4.588492975052086, "grad_norm": 0.0011985127348452806, "learning_rate": 7.154977656071783e-07, "loss": 0.0115, "step": 429460 }, { "epoch": 4.588599818366366, "grad_norm": 0.00567946583032608, "learning_rate": 7.154826051620305e-07, "loss": 0.006, "step": 429470 }, { "epoch": 4.588706661680646, "grad_norm": 5.450333595275879, "learning_rate": 7.154674444735866e-07, "loss": 0.0067, "step": 429480 }, { "epoch": 4.588813504994925, "grad_norm": 0.0035116763319820166, "learning_rate": 7.154522835418638e-07, "loss": 0.028, "step": 429490 }, { "epoch": 4.5889203483092045, "grad_norm": 6.506699085235596, "learning_rate": 7.15437122366879e-07, "loss": 0.0025, "step": 429500 }, { "epoch": 4.589027191623484, "grad_norm": 0.005332261323928833, "learning_rate": 7.154219609486495e-07, "loss": 0.0066, "step": 429510 }, { "epoch": 4.589134034937763, "grad_norm": 0.020266056060791016, "learning_rate": 7.154067992871924e-07, "loss": 0.0098, "step": 429520 }, { "epoch": 4.589240878252044, "grad_norm": 0.01202352624386549, "learning_rate": 7.153916373825245e-07, "loss": 0.0071, "step": 429530 }, { "epoch": 4.589347721566323, "grad_norm": 0.088913694024086, "learning_rate": 7.153764752346633e-07, "loss": 0.0039, "step": 429540 }, { "epoch": 4.589454564880603, "grad_norm": 0.8916491866111755, "learning_rate": 7.15361312843626e-07, "loss": 0.0014, "step": 429550 }, { "epoch": 4.589561408194882, "grad_norm": 0.0073828003369271755, "learning_rate": 7.153461502094294e-07, "loss": 0.014, "step": 429560 }, { "epoch": 4.589668251509162, "grad_norm": 0.6854148507118225, "learning_rate": 7.153309873320907e-07, "loss": 0.0034, "step": 429570 }, { "epoch": 4.589775094823441, "grad_norm": 0.7413608431816101, "learning_rate": 7.15315824211627e-07, "loss": 0.007, "step": 429580 }, { "epoch": 4.589881938137721, "grad_norm": 0.03152475878596306, "learning_rate": 7.153006608480557e-07, "loss": 0.016, "step": 429590 }, { "epoch": 4.589988781452001, "grad_norm": 0.18069858849048615, "learning_rate": 7.152854972413936e-07, "loss": 0.0045, "step": 429600 }, { "epoch": 4.59009562476628, "grad_norm": 6.587960243225098, "learning_rate": 7.152703333916579e-07, "loss": 0.0081, "step": 429610 }, { "epoch": 4.59020246808056, "grad_norm": 2.918006181716919, "learning_rate": 7.152551692988657e-07, "loss": 0.0121, "step": 429620 }, { "epoch": 4.590309311394839, "grad_norm": 0.0041267117485404015, "learning_rate": 7.152400049630341e-07, "loss": 0.0004, "step": 429630 }, { "epoch": 4.59041615470912, "grad_norm": 0.0029877612832933664, "learning_rate": 7.152248403841804e-07, "loss": 0.022, "step": 429640 }, { "epoch": 4.590522998023399, "grad_norm": 0.07388397306203842, "learning_rate": 7.152096755623217e-07, "loss": 0.007, "step": 429650 }, { "epoch": 4.5906298413376785, "grad_norm": 5.786182403564453, "learning_rate": 7.15194510497475e-07, "loss": 0.0057, "step": 429660 }, { "epoch": 4.590736684651958, "grad_norm": 0.0007743476307950914, "learning_rate": 7.151793451896573e-07, "loss": 0.0195, "step": 429670 }, { "epoch": 4.590843527966237, "grad_norm": 0.012497547082602978, "learning_rate": 7.151641796388861e-07, "loss": 0.0035, "step": 429680 }, { "epoch": 4.590950371280517, "grad_norm": 0.22795948386192322, "learning_rate": 7.151490138451781e-07, "loss": 0.0051, "step": 429690 }, { "epoch": 4.591057214594796, "grad_norm": 0.007285304367542267, "learning_rate": 7.151338478085507e-07, "loss": 0.0206, "step": 429700 }, { "epoch": 4.591164057909077, "grad_norm": 0.01861589215695858, "learning_rate": 7.151186815290209e-07, "loss": 0.005, "step": 429710 }, { "epoch": 4.591270901223356, "grad_norm": 0.01462650392204523, "learning_rate": 7.15103515006606e-07, "loss": 0.0002, "step": 429720 }, { "epoch": 4.591377744537636, "grad_norm": 1.4257967472076416, "learning_rate": 7.150883482413228e-07, "loss": 0.0221, "step": 429730 }, { "epoch": 4.591484587851915, "grad_norm": 0.04084307327866554, "learning_rate": 7.150731812331889e-07, "loss": 0.005, "step": 429740 }, { "epoch": 4.5915914311661945, "grad_norm": 1.296038269996643, "learning_rate": 7.15058013982221e-07, "loss": 0.019, "step": 429750 }, { "epoch": 4.591698274480475, "grad_norm": 0.003410991979762912, "learning_rate": 7.150428464884363e-07, "loss": 0.0001, "step": 429760 }, { "epoch": 4.591805117794754, "grad_norm": 2.34721302986145, "learning_rate": 7.150276787518521e-07, "loss": 0.0106, "step": 429770 }, { "epoch": 4.591911961109034, "grad_norm": 0.003270986257120967, "learning_rate": 7.150125107724854e-07, "loss": 0.0009, "step": 429780 }, { "epoch": 4.592018804423313, "grad_norm": 0.013683047145605087, "learning_rate": 7.149973425503532e-07, "loss": 0.0007, "step": 429790 }, { "epoch": 4.592125647737593, "grad_norm": 0.002860315842553973, "learning_rate": 7.14982174085473e-07, "loss": 0.0087, "step": 429800 }, { "epoch": 4.592232491051872, "grad_norm": 0.07203193753957748, "learning_rate": 7.149670053778616e-07, "loss": 0.0052, "step": 429810 }, { "epoch": 4.592339334366152, "grad_norm": 1.351384162902832, "learning_rate": 7.149518364275362e-07, "loss": 0.0065, "step": 429820 }, { "epoch": 4.592446177680432, "grad_norm": 0.26914361119270325, "learning_rate": 7.14936667234514e-07, "loss": 0.0187, "step": 429830 }, { "epoch": 4.592553020994711, "grad_norm": 3.367093563079834, "learning_rate": 7.149214977988121e-07, "loss": 0.0005, "step": 429840 }, { "epoch": 4.592659864308991, "grad_norm": 0.86789870262146, "learning_rate": 7.149063281204476e-07, "loss": 0.0416, "step": 429850 }, { "epoch": 4.59276670762327, "grad_norm": 0.010126258246600628, "learning_rate": 7.148911581994376e-07, "loss": 0.0077, "step": 429860 }, { "epoch": 4.59287355093755, "grad_norm": 0.003975206986069679, "learning_rate": 7.148759880357993e-07, "loss": 0.0148, "step": 429870 }, { "epoch": 4.59298039425183, "grad_norm": 4.802966117858887, "learning_rate": 7.148608176295497e-07, "loss": 0.0072, "step": 429880 }, { "epoch": 4.59308723756611, "grad_norm": 7.089027404785156, "learning_rate": 7.148456469807062e-07, "loss": 0.019, "step": 429890 }, { "epoch": 4.593194080880389, "grad_norm": 0.0035905027762055397, "learning_rate": 7.148304760892856e-07, "loss": 0.0016, "step": 429900 }, { "epoch": 4.5933009241946685, "grad_norm": 0.025443769991397858, "learning_rate": 7.148153049553052e-07, "loss": 0.0054, "step": 429910 }, { "epoch": 4.593407767508948, "grad_norm": 0.9275480508804321, "learning_rate": 7.148001335787822e-07, "loss": 0.0181, "step": 429920 }, { "epoch": 4.593514610823227, "grad_norm": 1.9702370166778564, "learning_rate": 7.147849619597334e-07, "loss": 0.0014, "step": 429930 }, { "epoch": 4.593621454137507, "grad_norm": 6.623493194580078, "learning_rate": 7.147697900981764e-07, "loss": 0.0176, "step": 429940 }, { "epoch": 4.593728297451787, "grad_norm": 0.29229795932769775, "learning_rate": 7.147546179941282e-07, "loss": 0.0282, "step": 429950 }, { "epoch": 4.593835140766067, "grad_norm": 0.9990957379341125, "learning_rate": 7.147394456476055e-07, "loss": 0.0017, "step": 429960 }, { "epoch": 4.593941984080346, "grad_norm": 0.06651820987462997, "learning_rate": 7.147242730586259e-07, "loss": 0.0023, "step": 429970 }, { "epoch": 4.594048827394626, "grad_norm": 0.4003051817417145, "learning_rate": 7.147091002272065e-07, "loss": 0.0063, "step": 429980 }, { "epoch": 4.594155670708905, "grad_norm": 0.003636791370809078, "learning_rate": 7.146939271533642e-07, "loss": 0.018, "step": 429990 }, { "epoch": 4.594262514023185, "grad_norm": 0.00822055246680975, "learning_rate": 7.146787538371163e-07, "loss": 0.0004, "step": 430000 }, { "epoch": 4.594369357337465, "grad_norm": 0.01686333864927292, "learning_rate": 7.146635802784797e-07, "loss": 0.0096, "step": 430010 }, { "epoch": 4.594476200651744, "grad_norm": 0.06874604523181915, "learning_rate": 7.14648406477472e-07, "loss": 0.0038, "step": 430020 }, { "epoch": 4.594583043966024, "grad_norm": 0.0008387102861888707, "learning_rate": 7.146332324341099e-07, "loss": 0.0041, "step": 430030 }, { "epoch": 4.594689887280303, "grad_norm": 0.12844686210155487, "learning_rate": 7.146180581484107e-07, "loss": 0.0097, "step": 430040 }, { "epoch": 4.594796730594583, "grad_norm": 2.627927541732788, "learning_rate": 7.146028836203915e-07, "loss": 0.0057, "step": 430050 }, { "epoch": 4.594903573908862, "grad_norm": 14.144991874694824, "learning_rate": 7.145877088500694e-07, "loss": 0.0233, "step": 430060 }, { "epoch": 4.5950104172231425, "grad_norm": 0.00318370433524251, "learning_rate": 7.145725338374616e-07, "loss": 0.0048, "step": 430070 }, { "epoch": 4.595117260537422, "grad_norm": 0.0007238920661620796, "learning_rate": 7.145573585825854e-07, "loss": 0.0078, "step": 430080 }, { "epoch": 4.595224103851701, "grad_norm": 0.010039390996098518, "learning_rate": 7.145421830854577e-07, "loss": 0.0004, "step": 430090 }, { "epoch": 4.595330947165981, "grad_norm": 1.288196325302124, "learning_rate": 7.145270073460954e-07, "loss": 0.0146, "step": 430100 }, { "epoch": 4.59543779048026, "grad_norm": 0.2799539268016815, "learning_rate": 7.145118313645162e-07, "loss": 0.0398, "step": 430110 }, { "epoch": 4.595544633794541, "grad_norm": 0.07686814665794373, "learning_rate": 7.14496655140737e-07, "loss": 0.0106, "step": 430120 }, { "epoch": 4.59565147710882, "grad_norm": 0.15467776358127594, "learning_rate": 7.144814786747745e-07, "loss": 0.0049, "step": 430130 }, { "epoch": 4.5957583204231, "grad_norm": 0.0034648675937205553, "learning_rate": 7.144663019666465e-07, "loss": 0.0132, "step": 430140 }, { "epoch": 4.595865163737379, "grad_norm": 0.008656281046569347, "learning_rate": 7.1445112501637e-07, "loss": 0.0034, "step": 430150 }, { "epoch": 4.5959720070516585, "grad_norm": 0.09820032864809036, "learning_rate": 7.144359478239617e-07, "loss": 0.0362, "step": 430160 }, { "epoch": 4.596078850365938, "grad_norm": 0.7215024828910828, "learning_rate": 7.144207703894392e-07, "loss": 0.0091, "step": 430170 }, { "epoch": 4.596185693680218, "grad_norm": 0.0008662762702442706, "learning_rate": 7.144055927128196e-07, "loss": 0.0121, "step": 430180 }, { "epoch": 4.596292536994498, "grad_norm": 0.016187723726034164, "learning_rate": 7.143904147941196e-07, "loss": 0.0008, "step": 430190 }, { "epoch": 4.596399380308777, "grad_norm": 0.6226449608802795, "learning_rate": 7.143752366333569e-07, "loss": 0.0107, "step": 430200 }, { "epoch": 4.596506223623057, "grad_norm": 0.07002826035022736, "learning_rate": 7.143600582305481e-07, "loss": 0.0104, "step": 430210 }, { "epoch": 4.596613066937336, "grad_norm": 2.5905263423919678, "learning_rate": 7.143448795857107e-07, "loss": 0.0081, "step": 430220 }, { "epoch": 4.596719910251616, "grad_norm": 0.2007855474948883, "learning_rate": 7.14329700698862e-07, "loss": 0.0004, "step": 430230 }, { "epoch": 4.596826753565896, "grad_norm": 0.012097636237740517, "learning_rate": 7.143145215700186e-07, "loss": 0.0153, "step": 430240 }, { "epoch": 4.596933596880175, "grad_norm": 0.004999740514904261, "learning_rate": 7.14299342199198e-07, "loss": 0.0026, "step": 430250 }, { "epoch": 4.597040440194455, "grad_norm": 10.587770462036133, "learning_rate": 7.142841625864175e-07, "loss": 0.0138, "step": 430260 }, { "epoch": 4.597147283508734, "grad_norm": 3.4285783767700195, "learning_rate": 7.142689827316937e-07, "loss": 0.0112, "step": 430270 }, { "epoch": 4.597254126823014, "grad_norm": 0.1416793167591095, "learning_rate": 7.142538026350442e-07, "loss": 0.0121, "step": 430280 }, { "epoch": 4.597360970137293, "grad_norm": 0.5128036737442017, "learning_rate": 7.14238622296486e-07, "loss": 0.0156, "step": 430290 }, { "epoch": 4.597467813451574, "grad_norm": 0.6576905846595764, "learning_rate": 7.142234417160361e-07, "loss": 0.0042, "step": 430300 }, { "epoch": 4.597574656765853, "grad_norm": 0.002538251457735896, "learning_rate": 7.142082608937118e-07, "loss": 0.0077, "step": 430310 }, { "epoch": 4.5976815000801325, "grad_norm": 0.007798399310559034, "learning_rate": 7.141930798295304e-07, "loss": 0.0124, "step": 430320 }, { "epoch": 4.597788343394412, "grad_norm": 0.007923341356217861, "learning_rate": 7.141778985235087e-07, "loss": 0.0153, "step": 430330 }, { "epoch": 4.5978951867086915, "grad_norm": 0.04299505800008774, "learning_rate": 7.141627169756638e-07, "loss": 0.0041, "step": 430340 }, { "epoch": 4.598002030022972, "grad_norm": 0.11049070209264755, "learning_rate": 7.141475351860133e-07, "loss": 0.0273, "step": 430350 }, { "epoch": 4.598108873337251, "grad_norm": 0.17027591168880463, "learning_rate": 7.14132353154574e-07, "loss": 0.0032, "step": 430360 }, { "epoch": 4.598215716651531, "grad_norm": 0.0045188418589532375, "learning_rate": 7.141171708813632e-07, "loss": 0.0058, "step": 430370 }, { "epoch": 4.59832255996581, "grad_norm": 0.2180551290512085, "learning_rate": 7.141019883663977e-07, "loss": 0.008, "step": 430380 }, { "epoch": 4.59842940328009, "grad_norm": 0.02156001515686512, "learning_rate": 7.14086805609695e-07, "loss": 0.0294, "step": 430390 }, { "epoch": 4.598536246594369, "grad_norm": 0.006045746151357889, "learning_rate": 7.140716226112723e-07, "loss": 0.0214, "step": 430400 }, { "epoch": 4.598643089908649, "grad_norm": 0.006891374476253986, "learning_rate": 7.140564393711464e-07, "loss": 0.0085, "step": 430410 }, { "epoch": 4.598749933222929, "grad_norm": 2.6167151927948, "learning_rate": 7.140412558893347e-07, "loss": 0.0024, "step": 430420 }, { "epoch": 4.598856776537208, "grad_norm": 8.26269245147705, "learning_rate": 7.140260721658543e-07, "loss": 0.0008, "step": 430430 }, { "epoch": 4.598963619851488, "grad_norm": 0.0013183490373194218, "learning_rate": 7.140108882007222e-07, "loss": 0.0026, "step": 430440 }, { "epoch": 4.599070463165767, "grad_norm": 0.07745811343193054, "learning_rate": 7.139957039939558e-07, "loss": 0.0508, "step": 430450 }, { "epoch": 4.599177306480047, "grad_norm": 0.005448582582175732, "learning_rate": 7.13980519545572e-07, "loss": 0.0517, "step": 430460 }, { "epoch": 4.599284149794327, "grad_norm": 0.030839061364531517, "learning_rate": 7.139653348555881e-07, "loss": 0.0028, "step": 430470 }, { "epoch": 4.5993909931086066, "grad_norm": 0.5377431511878967, "learning_rate": 7.139501499240211e-07, "loss": 0.0131, "step": 430480 }, { "epoch": 4.599497836422886, "grad_norm": 6.760610580444336, "learning_rate": 7.139349647508883e-07, "loss": 0.0076, "step": 430490 }, { "epoch": 4.5996046797371655, "grad_norm": 0.0058625247329473495, "learning_rate": 7.139197793362069e-07, "loss": 0.0073, "step": 430500 }, { "epoch": 4.599711523051445, "grad_norm": 0.02868277207016945, "learning_rate": 7.139045936799937e-07, "loss": 0.008, "step": 430510 }, { "epoch": 4.599818366365724, "grad_norm": 0.03564409911632538, "learning_rate": 7.138894077822662e-07, "loss": 0.0032, "step": 430520 }, { "epoch": 4.599925209680004, "grad_norm": 3.323045253753662, "learning_rate": 7.138742216430413e-07, "loss": 0.0099, "step": 430530 }, { "epoch": 4.600032052994284, "grad_norm": 0.00856212992221117, "learning_rate": 7.138590352623364e-07, "loss": 0.0194, "step": 430540 }, { "epoch": 4.600138896308564, "grad_norm": 0.012077851220965385, "learning_rate": 7.138438486401685e-07, "loss": 0.0112, "step": 430550 }, { "epoch": 4.600245739622843, "grad_norm": 5.552377223968506, "learning_rate": 7.138286617765547e-07, "loss": 0.024, "step": 430560 }, { "epoch": 4.600352582937123, "grad_norm": 0.042073220014572144, "learning_rate": 7.138134746715122e-07, "loss": 0.0003, "step": 430570 }, { "epoch": 4.600459426251402, "grad_norm": 0.0026190844364464283, "learning_rate": 7.137982873250582e-07, "loss": 0.0077, "step": 430580 }, { "epoch": 4.600566269565682, "grad_norm": 1.0343266725540161, "learning_rate": 7.137830997372097e-07, "loss": 0.0139, "step": 430590 }, { "epoch": 4.600673112879962, "grad_norm": 0.0030724406242370605, "learning_rate": 7.137679119079841e-07, "loss": 0.0016, "step": 430600 }, { "epoch": 4.600779956194241, "grad_norm": 2.920243740081787, "learning_rate": 7.137527238373981e-07, "loss": 0.0072, "step": 430610 }, { "epoch": 4.600886799508521, "grad_norm": 0.002329288749024272, "learning_rate": 7.137375355254694e-07, "loss": 0.0065, "step": 430620 }, { "epoch": 4.6009936428228, "grad_norm": 0.47427889704704285, "learning_rate": 7.137223469722149e-07, "loss": 0.0015, "step": 430630 }, { "epoch": 4.60110048613708, "grad_norm": 0.010446351952850819, "learning_rate": 7.137071581776516e-07, "loss": 0.0177, "step": 430640 }, { "epoch": 4.601207329451359, "grad_norm": 0.0024370579048991203, "learning_rate": 7.136919691417968e-07, "loss": 0.0012, "step": 430650 }, { "epoch": 4.6013141727656395, "grad_norm": 0.45374244451522827, "learning_rate": 7.136767798646676e-07, "loss": 0.0068, "step": 430660 }, { "epoch": 4.601421016079919, "grad_norm": 0.0704185888171196, "learning_rate": 7.136615903462815e-07, "loss": 0.0333, "step": 430670 }, { "epoch": 4.601527859394198, "grad_norm": 3.8892829418182373, "learning_rate": 7.136464005866549e-07, "loss": 0.0077, "step": 430680 }, { "epoch": 4.601634702708478, "grad_norm": 0.03053200989961624, "learning_rate": 7.136312105858057e-07, "loss": 0.0171, "step": 430690 }, { "epoch": 4.601741546022757, "grad_norm": 0.15983930230140686, "learning_rate": 7.136160203437507e-07, "loss": 0.0002, "step": 430700 }, { "epoch": 4.601848389337038, "grad_norm": 0.006567105650901794, "learning_rate": 7.13600829860507e-07, "loss": 0.0078, "step": 430710 }, { "epoch": 4.601955232651317, "grad_norm": 0.3777194619178772, "learning_rate": 7.13585639136092e-07, "loss": 0.0041, "step": 430720 }, { "epoch": 4.602062075965597, "grad_norm": 0.0014896424254402518, "learning_rate": 7.135704481705224e-07, "loss": 0.0236, "step": 430730 }, { "epoch": 4.602168919279876, "grad_norm": 0.003848092630505562, "learning_rate": 7.13555256963816e-07, "loss": 0.0166, "step": 430740 }, { "epoch": 4.6022757625941555, "grad_norm": 0.0036204024218022823, "learning_rate": 7.135400655159894e-07, "loss": 0.0089, "step": 430750 }, { "epoch": 4.602382605908435, "grad_norm": 0.040130242705345154, "learning_rate": 7.135248738270599e-07, "loss": 0.0012, "step": 430760 }, { "epoch": 4.602489449222714, "grad_norm": 26.619474411010742, "learning_rate": 7.135096818970447e-07, "loss": 0.0049, "step": 430770 }, { "epoch": 4.602596292536995, "grad_norm": 0.4801591634750366, "learning_rate": 7.134944897259612e-07, "loss": 0.0032, "step": 430780 }, { "epoch": 4.602703135851274, "grad_norm": 0.0015026924666017294, "learning_rate": 7.134792973138259e-07, "loss": 0.0006, "step": 430790 }, { "epoch": 4.602809979165554, "grad_norm": 0.004407887347042561, "learning_rate": 7.134641046606567e-07, "loss": 0.0112, "step": 430800 }, { "epoch": 4.602916822479833, "grad_norm": 4.099338054656982, "learning_rate": 7.134489117664703e-07, "loss": 0.002, "step": 430810 }, { "epoch": 4.603023665794113, "grad_norm": 0.06818421185016632, "learning_rate": 7.13433718631284e-07, "loss": 0.0025, "step": 430820 }, { "epoch": 4.603130509108393, "grad_norm": 0.007170021999627352, "learning_rate": 7.134185252551149e-07, "loss": 0.0025, "step": 430830 }, { "epoch": 4.603237352422672, "grad_norm": 2.3440582752227783, "learning_rate": 7.134033316379802e-07, "loss": 0.0084, "step": 430840 }, { "epoch": 4.603344195736952, "grad_norm": 0.0074942042119801044, "learning_rate": 7.13388137779897e-07, "loss": 0.0014, "step": 430850 }, { "epoch": 4.603451039051231, "grad_norm": 0.07035156339406967, "learning_rate": 7.133729436808825e-07, "loss": 0.0015, "step": 430860 }, { "epoch": 4.603557882365511, "grad_norm": 0.09005136787891388, "learning_rate": 7.133577493409538e-07, "loss": 0.0006, "step": 430870 }, { "epoch": 4.60366472567979, "grad_norm": 0.002293902449309826, "learning_rate": 7.133425547601281e-07, "loss": 0.0132, "step": 430880 }, { "epoch": 4.603771568994071, "grad_norm": 0.025074254721403122, "learning_rate": 7.133273599384228e-07, "loss": 0.0031, "step": 430890 }, { "epoch": 4.60387841230835, "grad_norm": 0.0036064398009330034, "learning_rate": 7.133121648758544e-07, "loss": 0.0332, "step": 430900 }, { "epoch": 4.6039852556226295, "grad_norm": 0.002605247311294079, "learning_rate": 7.132969695724408e-07, "loss": 0.0046, "step": 430910 }, { "epoch": 4.604092098936909, "grad_norm": 0.015323705039918423, "learning_rate": 7.132817740281986e-07, "loss": 0.0303, "step": 430920 }, { "epoch": 4.604198942251188, "grad_norm": 1.93898606300354, "learning_rate": 7.132665782431453e-07, "loss": 0.0019, "step": 430930 }, { "epoch": 4.604305785565468, "grad_norm": 0.040730979293584824, "learning_rate": 7.13251382217298e-07, "loss": 0.0104, "step": 430940 }, { "epoch": 4.604412628879748, "grad_norm": 0.052401937544345856, "learning_rate": 7.132361859506735e-07, "loss": 0.0034, "step": 430950 }, { "epoch": 4.604519472194028, "grad_norm": 0.004053190350532532, "learning_rate": 7.132209894432895e-07, "loss": 0.0008, "step": 430960 }, { "epoch": 4.604626315508307, "grad_norm": 7.934940338134766, "learning_rate": 7.132057926951629e-07, "loss": 0.005, "step": 430970 }, { "epoch": 4.604733158822587, "grad_norm": 0.0033794385381042957, "learning_rate": 7.131905957063109e-07, "loss": 0.0069, "step": 430980 }, { "epoch": 4.604840002136866, "grad_norm": 27.39194679260254, "learning_rate": 7.131753984767505e-07, "loss": 0.0317, "step": 430990 }, { "epoch": 4.6049468454511455, "grad_norm": 0.008656805381178856, "learning_rate": 7.131602010064991e-07, "loss": 0.0013, "step": 431000 }, { "epoch": 4.605053688765426, "grad_norm": 0.003660251386463642, "learning_rate": 7.131450032955736e-07, "loss": 0.0234, "step": 431010 }, { "epoch": 4.605160532079705, "grad_norm": 0.3784237205982208, "learning_rate": 7.131298053439913e-07, "loss": 0.0004, "step": 431020 }, { "epoch": 4.605267375393985, "grad_norm": 1.4433907270431519, "learning_rate": 7.131146071517696e-07, "loss": 0.0028, "step": 431030 }, { "epoch": 4.605374218708264, "grad_norm": 0.0026313161943107843, "learning_rate": 7.130994087189252e-07, "loss": 0.0072, "step": 431040 }, { "epoch": 4.605481062022544, "grad_norm": 2.000991106033325, "learning_rate": 7.130842100454757e-07, "loss": 0.0077, "step": 431050 }, { "epoch": 4.605587905336823, "grad_norm": 17.9984130859375, "learning_rate": 7.130690111314379e-07, "loss": 0.0743, "step": 431060 }, { "epoch": 4.6056947486511035, "grad_norm": 4.9232683181762695, "learning_rate": 7.13053811976829e-07, "loss": 0.0037, "step": 431070 }, { "epoch": 4.605801591965383, "grad_norm": 0.44026681780815125, "learning_rate": 7.130386125816665e-07, "loss": 0.0162, "step": 431080 }, { "epoch": 4.605908435279662, "grad_norm": 0.5215801000595093, "learning_rate": 7.130234129459672e-07, "loss": 0.0005, "step": 431090 }, { "epoch": 4.606015278593942, "grad_norm": 0.0014698929153382778, "learning_rate": 7.130082130697483e-07, "loss": 0.0025, "step": 431100 }, { "epoch": 4.606122121908221, "grad_norm": 0.008450393564999104, "learning_rate": 7.129930129530273e-07, "loss": 0.0036, "step": 431110 }, { "epoch": 4.606228965222501, "grad_norm": 0.4514932334423065, "learning_rate": 7.129778125958211e-07, "loss": 0.0017, "step": 431120 }, { "epoch": 4.606335808536781, "grad_norm": 0.02209867537021637, "learning_rate": 7.129626119981466e-07, "loss": 0.0044, "step": 431130 }, { "epoch": 4.606442651851061, "grad_norm": 0.007064637262374163, "learning_rate": 7.129474111600215e-07, "loss": 0.0006, "step": 431140 }, { "epoch": 4.60654949516534, "grad_norm": 5.54435396194458, "learning_rate": 7.129322100814626e-07, "loss": 0.0205, "step": 431150 }, { "epoch": 4.6066563384796195, "grad_norm": 0.028920182958245277, "learning_rate": 7.129170087624869e-07, "loss": 0.0016, "step": 431160 }, { "epoch": 4.606763181793899, "grad_norm": 0.0017131069907918572, "learning_rate": 7.129018072031122e-07, "loss": 0.0031, "step": 431170 }, { "epoch": 4.606870025108179, "grad_norm": 0.013802152127027512, "learning_rate": 7.128866054033551e-07, "loss": 0.0084, "step": 431180 }, { "epoch": 4.606976868422459, "grad_norm": 2.8562467098236084, "learning_rate": 7.12871403363233e-07, "loss": 0.0336, "step": 431190 }, { "epoch": 4.607083711736738, "grad_norm": 0.028323836624622345, "learning_rate": 7.128562010827631e-07, "loss": 0.0094, "step": 431200 }, { "epoch": 4.607190555051018, "grad_norm": 0.0015907326014712453, "learning_rate": 7.128409985619625e-07, "loss": 0.0025, "step": 431210 }, { "epoch": 4.607297398365297, "grad_norm": 1.9874675273895264, "learning_rate": 7.128257958008482e-07, "loss": 0.031, "step": 431220 }, { "epoch": 4.607404241679577, "grad_norm": 0.0699140876531601, "learning_rate": 7.128105927994377e-07, "loss": 0.0126, "step": 431230 }, { "epoch": 4.607511084993856, "grad_norm": 6.071882247924805, "learning_rate": 7.127953895577478e-07, "loss": 0.0031, "step": 431240 }, { "epoch": 4.607617928308136, "grad_norm": 3.5447680950164795, "learning_rate": 7.127801860757959e-07, "loss": 0.0111, "step": 431250 }, { "epoch": 4.607724771622416, "grad_norm": 0.6072641611099243, "learning_rate": 7.127649823535992e-07, "loss": 0.0014, "step": 431260 }, { "epoch": 4.607831614936695, "grad_norm": 0.001965364208444953, "learning_rate": 7.127497783911747e-07, "loss": 0.0402, "step": 431270 }, { "epoch": 4.607938458250975, "grad_norm": 5.550854682922363, "learning_rate": 7.127345741885395e-07, "loss": 0.0083, "step": 431280 }, { "epoch": 4.608045301565254, "grad_norm": 0.027342725545167923, "learning_rate": 7.12719369745711e-07, "loss": 0.0367, "step": 431290 }, { "epoch": 4.608152144879535, "grad_norm": 0.1736830174922943, "learning_rate": 7.127041650627065e-07, "loss": 0.0013, "step": 431300 }, { "epoch": 4.608258988193814, "grad_norm": 0.03191482275724411, "learning_rate": 7.126889601395427e-07, "loss": 0.0245, "step": 431310 }, { "epoch": 4.6083658315080935, "grad_norm": 0.8062231540679932, "learning_rate": 7.126737549762371e-07, "loss": 0.0026, "step": 431320 }, { "epoch": 4.608472674822373, "grad_norm": 0.033403825014829636, "learning_rate": 7.126585495728067e-07, "loss": 0.0046, "step": 431330 }, { "epoch": 4.6085795181366525, "grad_norm": 2.48569917678833, "learning_rate": 7.126433439292688e-07, "loss": 0.008, "step": 431340 }, { "epoch": 4.608686361450932, "grad_norm": 0.06638310104608536, "learning_rate": 7.126281380456406e-07, "loss": 0.002, "step": 431350 }, { "epoch": 4.608793204765211, "grad_norm": 0.037556733936071396, "learning_rate": 7.126129319219391e-07, "loss": 0.0002, "step": 431360 }, { "epoch": 4.608900048079492, "grad_norm": 0.15540476143360138, "learning_rate": 7.125977255581816e-07, "loss": 0.0021, "step": 431370 }, { "epoch": 4.609006891393771, "grad_norm": 10.111490249633789, "learning_rate": 7.12582518954385e-07, "loss": 0.0357, "step": 431380 }, { "epoch": 4.609113734708051, "grad_norm": 0.006155773065984249, "learning_rate": 7.12567312110567e-07, "loss": 0.0091, "step": 431390 }, { "epoch": 4.60922057802233, "grad_norm": 1.0546247959136963, "learning_rate": 7.125521050267443e-07, "loss": 0.0058, "step": 431400 }, { "epoch": 4.6093274213366096, "grad_norm": 0.0017065483843907714, "learning_rate": 7.125368977029342e-07, "loss": 0.0131, "step": 431410 }, { "epoch": 4.60943426465089, "grad_norm": 14.996524810791016, "learning_rate": 7.12521690139154e-07, "loss": 0.0273, "step": 431420 }, { "epoch": 4.609541107965169, "grad_norm": 3.170464515686035, "learning_rate": 7.125064823354209e-07, "loss": 0.0147, "step": 431430 }, { "epoch": 4.609647951279449, "grad_norm": 2.8828835487365723, "learning_rate": 7.124912742917517e-07, "loss": 0.0051, "step": 431440 }, { "epoch": 4.609754794593728, "grad_norm": 0.004255200736224651, "learning_rate": 7.124760660081639e-07, "loss": 0.002, "step": 431450 }, { "epoch": 4.609861637908008, "grad_norm": 0.012372706085443497, "learning_rate": 7.124608574846746e-07, "loss": 0.0116, "step": 431460 }, { "epoch": 4.609968481222287, "grad_norm": 0.020999515429139137, "learning_rate": 7.12445648721301e-07, "loss": 0.0173, "step": 431470 }, { "epoch": 4.610075324536567, "grad_norm": 2.5592129230499268, "learning_rate": 7.124304397180601e-07, "loss": 0.0022, "step": 431480 }, { "epoch": 4.610182167850847, "grad_norm": 0.00024602352641522884, "learning_rate": 7.124152304749693e-07, "loss": 0.0107, "step": 431490 }, { "epoch": 4.6102890111651265, "grad_norm": 0.17123878002166748, "learning_rate": 7.124000209920456e-07, "loss": 0.0026, "step": 431500 }, { "epoch": 4.610395854479406, "grad_norm": 3.501081943511963, "learning_rate": 7.123848112693064e-07, "loss": 0.0014, "step": 431510 }, { "epoch": 4.610502697793685, "grad_norm": 0.02120765671133995, "learning_rate": 7.123696013067685e-07, "loss": 0.0005, "step": 431520 }, { "epoch": 4.610609541107965, "grad_norm": 0.004087935667484999, "learning_rate": 7.123543911044494e-07, "loss": 0.0179, "step": 431530 }, { "epoch": 4.610716384422245, "grad_norm": 0.079767607152462, "learning_rate": 7.123391806623662e-07, "loss": 0.0114, "step": 431540 }, { "epoch": 4.610823227736525, "grad_norm": 0.057801708579063416, "learning_rate": 7.123239699805361e-07, "loss": 0.0516, "step": 431550 }, { "epoch": 4.610930071050804, "grad_norm": 2.930762767791748, "learning_rate": 7.12308759058976e-07, "loss": 0.0033, "step": 431560 }, { "epoch": 4.611036914365084, "grad_norm": 0.02106642536818981, "learning_rate": 7.122935478977034e-07, "loss": 0.0034, "step": 431570 }, { "epoch": 4.611143757679363, "grad_norm": 0.289767861366272, "learning_rate": 7.122783364967354e-07, "loss": 0.0276, "step": 431580 }, { "epoch": 4.6112506009936425, "grad_norm": 0.004868254531174898, "learning_rate": 7.122631248560892e-07, "loss": 0.0142, "step": 431590 }, { "epoch": 4.611357444307922, "grad_norm": 0.004622875712811947, "learning_rate": 7.122479129757817e-07, "loss": 0.0182, "step": 431600 }, { "epoch": 4.611464287622202, "grad_norm": 0.018534554168581963, "learning_rate": 7.122327008558304e-07, "loss": 0.0154, "step": 431610 }, { "epoch": 4.611571130936482, "grad_norm": 0.02846600115299225, "learning_rate": 7.122174884962524e-07, "loss": 0.0075, "step": 431620 }, { "epoch": 4.611677974250761, "grad_norm": 3.6352152824401855, "learning_rate": 7.122022758970648e-07, "loss": 0.0108, "step": 431630 }, { "epoch": 4.611784817565041, "grad_norm": 0.01701582409441471, "learning_rate": 7.121870630582848e-07, "loss": 0.0018, "step": 431640 }, { "epoch": 4.61189166087932, "grad_norm": 0.08138832449913025, "learning_rate": 7.121718499799297e-07, "loss": 0.0075, "step": 431650 }, { "epoch": 4.6119985041936005, "grad_norm": 0.37250828742980957, "learning_rate": 7.121566366620165e-07, "loss": 0.0009, "step": 431660 }, { "epoch": 4.61210534750788, "grad_norm": 0.010241576470434666, "learning_rate": 7.121414231045622e-07, "loss": 0.0088, "step": 431670 }, { "epoch": 4.612212190822159, "grad_norm": 0.014921258203685284, "learning_rate": 7.121262093075846e-07, "loss": 0.0134, "step": 431680 }, { "epoch": 4.612319034136439, "grad_norm": 0.04772216081619263, "learning_rate": 7.121109952711004e-07, "loss": 0.0057, "step": 431690 }, { "epoch": 4.612425877450718, "grad_norm": 0.0066544609144330025, "learning_rate": 7.120957809951267e-07, "loss": 0.0057, "step": 431700 }, { "epoch": 4.612532720764998, "grad_norm": 0.0040814802050590515, "learning_rate": 7.120805664796811e-07, "loss": 0.0027, "step": 431710 }, { "epoch": 4.612639564079278, "grad_norm": 2.3052985668182373, "learning_rate": 7.120653517247805e-07, "loss": 0.01, "step": 431720 }, { "epoch": 4.612746407393558, "grad_norm": 0.7704384326934814, "learning_rate": 7.120501367304419e-07, "loss": 0.0098, "step": 431730 }, { "epoch": 4.612853250707837, "grad_norm": 0.20770879089832306, "learning_rate": 7.120349214966828e-07, "loss": 0.0079, "step": 431740 }, { "epoch": 4.6129600940221165, "grad_norm": 0.015964776277542114, "learning_rate": 7.120197060235203e-07, "loss": 0.0055, "step": 431750 }, { "epoch": 4.613066937336396, "grad_norm": 0.015526819974184036, "learning_rate": 7.120044903109715e-07, "loss": 0.0134, "step": 431760 }, { "epoch": 4.613173780650675, "grad_norm": 2.7561144828796387, "learning_rate": 7.119892743590537e-07, "loss": 0.0087, "step": 431770 }, { "epoch": 4.613280623964956, "grad_norm": 0.8258581161499023, "learning_rate": 7.11974058167784e-07, "loss": 0.0171, "step": 431780 }, { "epoch": 4.613387467279235, "grad_norm": 0.1655433475971222, "learning_rate": 7.119588417371796e-07, "loss": 0.0033, "step": 431790 }, { "epoch": 4.613494310593515, "grad_norm": 0.006835621781647205, "learning_rate": 7.119436250672577e-07, "loss": 0.0086, "step": 431800 }, { "epoch": 4.613601153907794, "grad_norm": 1.8267877101898193, "learning_rate": 7.119284081580352e-07, "loss": 0.0014, "step": 431810 }, { "epoch": 4.613707997222074, "grad_norm": 0.012777606956660748, "learning_rate": 7.119131910095299e-07, "loss": 0.0031, "step": 431820 }, { "epoch": 4.613814840536353, "grad_norm": 0.014184676110744476, "learning_rate": 7.118979736217584e-07, "loss": 0.007, "step": 431830 }, { "epoch": 4.613921683850633, "grad_norm": 0.13385345041751862, "learning_rate": 7.118827559947382e-07, "loss": 0.0029, "step": 431840 }, { "epoch": 4.614028527164913, "grad_norm": 1.0115268230438232, "learning_rate": 7.118675381284862e-07, "loss": 0.0018, "step": 431850 }, { "epoch": 4.614135370479192, "grad_norm": 0.0036758477799594402, "learning_rate": 7.1185232002302e-07, "loss": 0.0003, "step": 431860 }, { "epoch": 4.614242213793472, "grad_norm": 0.10326484590768814, "learning_rate": 7.118371016783563e-07, "loss": 0.0057, "step": 431870 }, { "epoch": 4.614349057107751, "grad_norm": 0.004676223732531071, "learning_rate": 7.118218830945127e-07, "loss": 0.0061, "step": 431880 }, { "epoch": 4.614455900422032, "grad_norm": 0.05124950036406517, "learning_rate": 7.118066642715062e-07, "loss": 0.0114, "step": 431890 }, { "epoch": 4.614562743736311, "grad_norm": 0.025281695649027824, "learning_rate": 7.117914452093538e-07, "loss": 0.0084, "step": 431900 }, { "epoch": 4.6146695870505905, "grad_norm": 0.006795578170567751, "learning_rate": 7.117762259080731e-07, "loss": 0.004, "step": 431910 }, { "epoch": 4.61477643036487, "grad_norm": 0.003247399814426899, "learning_rate": 7.117610063676809e-07, "loss": 0.0072, "step": 431920 }, { "epoch": 4.614883273679149, "grad_norm": 2.0522940158843994, "learning_rate": 7.117457865881946e-07, "loss": 0.0023, "step": 431930 }, { "epoch": 4.614990116993429, "grad_norm": 0.14553332328796387, "learning_rate": 7.117305665696313e-07, "loss": 0.0059, "step": 431940 }, { "epoch": 4.615096960307708, "grad_norm": 0.030222613364458084, "learning_rate": 7.117153463120083e-07, "loss": 0.0345, "step": 431950 }, { "epoch": 4.615203803621989, "grad_norm": 1.5977927446365356, "learning_rate": 7.117001258153425e-07, "loss": 0.0024, "step": 431960 }, { "epoch": 4.615310646936268, "grad_norm": 3.873490333557129, "learning_rate": 7.116849050796514e-07, "loss": 0.0255, "step": 431970 }, { "epoch": 4.615417490250548, "grad_norm": 0.005855907686054707, "learning_rate": 7.116696841049521e-07, "loss": 0.0118, "step": 431980 }, { "epoch": 4.615524333564827, "grad_norm": 2.734651803970337, "learning_rate": 7.116544628912617e-07, "loss": 0.009, "step": 431990 }, { "epoch": 4.6156311768791065, "grad_norm": 0.8298577070236206, "learning_rate": 7.116392414385975e-07, "loss": 0.0024, "step": 432000 }, { "epoch": 4.615738020193387, "grad_norm": 0.016817834228277206, "learning_rate": 7.116240197469766e-07, "loss": 0.0002, "step": 432010 }, { "epoch": 4.615844863507666, "grad_norm": 0.44400420784950256, "learning_rate": 7.116087978164162e-07, "loss": 0.0133, "step": 432020 }, { "epoch": 4.615951706821946, "grad_norm": 2.6451337337493896, "learning_rate": 7.115935756469336e-07, "loss": 0.0022, "step": 432030 }, { "epoch": 4.616058550136225, "grad_norm": 0.0042097242549061775, "learning_rate": 7.115783532385457e-07, "loss": 0.0036, "step": 432040 }, { "epoch": 4.616165393450505, "grad_norm": 0.01612810790538788, "learning_rate": 7.115631305912699e-07, "loss": 0.0036, "step": 432050 }, { "epoch": 4.616272236764784, "grad_norm": 0.03079456090927124, "learning_rate": 7.115479077051235e-07, "loss": 0.0105, "step": 432060 }, { "epoch": 4.616379080079064, "grad_norm": 0.8792246580123901, "learning_rate": 7.115326845801234e-07, "loss": 0.0067, "step": 432070 }, { "epoch": 4.616485923393344, "grad_norm": 5.380899906158447, "learning_rate": 7.115174612162871e-07, "loss": 0.0881, "step": 432080 }, { "epoch": 4.616592766707623, "grad_norm": 0.5522412061691284, "learning_rate": 7.115022376136315e-07, "loss": 0.0018, "step": 432090 }, { "epoch": 4.616699610021903, "grad_norm": 0.008324863389134407, "learning_rate": 7.114870137721738e-07, "loss": 0.0005, "step": 432100 }, { "epoch": 4.616806453336182, "grad_norm": 0.019062725827097893, "learning_rate": 7.114717896919315e-07, "loss": 0.0022, "step": 432110 }, { "epoch": 4.616913296650462, "grad_norm": 0.0012117641745135188, "learning_rate": 7.114565653729216e-07, "loss": 0.0051, "step": 432120 }, { "epoch": 4.617020139964742, "grad_norm": 0.04472104460000992, "learning_rate": 7.11441340815161e-07, "loss": 0.0003, "step": 432130 }, { "epoch": 4.617126983279022, "grad_norm": 1.2694157361984253, "learning_rate": 7.114261160186674e-07, "loss": 0.0053, "step": 432140 }, { "epoch": 4.617233826593301, "grad_norm": 0.7775508165359497, "learning_rate": 7.114108909834578e-07, "loss": 0.0039, "step": 432150 }, { "epoch": 4.6173406699075805, "grad_norm": 0.2164640724658966, "learning_rate": 7.113956657095493e-07, "loss": 0.0056, "step": 432160 }, { "epoch": 4.61744751322186, "grad_norm": 0.5364038944244385, "learning_rate": 7.11380440196959e-07, "loss": 0.0121, "step": 432170 }, { "epoch": 4.617554356536139, "grad_norm": 0.003643479198217392, "learning_rate": 7.113652144457043e-07, "loss": 0.0198, "step": 432180 }, { "epoch": 4.617661199850419, "grad_norm": 1.8848977088928223, "learning_rate": 7.113499884558024e-07, "loss": 0.0064, "step": 432190 }, { "epoch": 4.617768043164699, "grad_norm": 0.01847846247255802, "learning_rate": 7.113347622272706e-07, "loss": 0.0094, "step": 432200 }, { "epoch": 4.617874886478979, "grad_norm": 0.03152577951550484, "learning_rate": 7.113195357601255e-07, "loss": 0.0089, "step": 432210 }, { "epoch": 4.617981729793258, "grad_norm": 0.21794722974300385, "learning_rate": 7.113043090543848e-07, "loss": 0.0155, "step": 432220 }, { "epoch": 4.618088573107538, "grad_norm": 0.012056018225848675, "learning_rate": 7.112890821100656e-07, "loss": 0.0061, "step": 432230 }, { "epoch": 4.618195416421817, "grad_norm": 0.43420279026031494, "learning_rate": 7.112738549271852e-07, "loss": 0.0095, "step": 432240 }, { "epoch": 4.618302259736097, "grad_norm": 0.0032498661894351244, "learning_rate": 7.112586275057606e-07, "loss": 0.0111, "step": 432250 }, { "epoch": 4.618409103050377, "grad_norm": 0.010638313367962837, "learning_rate": 7.11243399845809e-07, "loss": 0.0065, "step": 432260 }, { "epoch": 4.618515946364656, "grad_norm": 1.609987735748291, "learning_rate": 7.112281719473478e-07, "loss": 0.0137, "step": 432270 }, { "epoch": 4.618622789678936, "grad_norm": 5.3780317306518555, "learning_rate": 7.11212943810394e-07, "loss": 0.0071, "step": 432280 }, { "epoch": 4.618729632993215, "grad_norm": 0.02912103570997715, "learning_rate": 7.111977154349648e-07, "loss": 0.0115, "step": 432290 }, { "epoch": 4.618836476307495, "grad_norm": 0.006425907835364342, "learning_rate": 7.111824868210774e-07, "loss": 0.0021, "step": 432300 }, { "epoch": 4.618943319621774, "grad_norm": 0.891133725643158, "learning_rate": 7.111672579687491e-07, "loss": 0.0033, "step": 432310 }, { "epoch": 4.6190501629360545, "grad_norm": 0.02660333923995495, "learning_rate": 7.111520288779971e-07, "loss": 0.0064, "step": 432320 }, { "epoch": 4.619157006250334, "grad_norm": 0.23701399564743042, "learning_rate": 7.111367995488384e-07, "loss": 0.0035, "step": 432330 }, { "epoch": 4.6192638495646134, "grad_norm": 0.005026673432439566, "learning_rate": 7.111215699812904e-07, "loss": 0.0092, "step": 432340 }, { "epoch": 4.619370692878893, "grad_norm": 0.0028967068064957857, "learning_rate": 7.111063401753701e-07, "loss": 0.0093, "step": 432350 }, { "epoch": 4.619477536193172, "grad_norm": 4.704397678375244, "learning_rate": 7.110911101310949e-07, "loss": 0.0057, "step": 432360 }, { "epoch": 4.619584379507453, "grad_norm": 0.09380003064870834, "learning_rate": 7.110758798484819e-07, "loss": 0.0112, "step": 432370 }, { "epoch": 4.619691222821732, "grad_norm": 0.13383075594902039, "learning_rate": 7.110606493275483e-07, "loss": 0.0106, "step": 432380 }, { "epoch": 4.619798066136012, "grad_norm": 0.028165308758616447, "learning_rate": 7.110454185683113e-07, "loss": 0.0047, "step": 432390 }, { "epoch": 4.619904909450291, "grad_norm": 1.0323442220687866, "learning_rate": 7.110301875707882e-07, "loss": 0.0071, "step": 432400 }, { "epoch": 4.6200117527645705, "grad_norm": 6.18019437789917, "learning_rate": 7.110149563349959e-07, "loss": 0.0251, "step": 432410 }, { "epoch": 4.62011859607885, "grad_norm": 0.10524436086416245, "learning_rate": 7.109997248609518e-07, "loss": 0.0093, "step": 432420 }, { "epoch": 4.62022543939313, "grad_norm": 1.5716257095336914, "learning_rate": 7.109844931486733e-07, "loss": 0.0007, "step": 432430 }, { "epoch": 4.62033228270741, "grad_norm": 0.018095457926392555, "learning_rate": 7.109692611981772e-07, "loss": 0.0023, "step": 432440 }, { "epoch": 4.620439126021689, "grad_norm": 4.714813709259033, "learning_rate": 7.109540290094809e-07, "loss": 0.0092, "step": 432450 }, { "epoch": 4.620545969335969, "grad_norm": 3.756802558898926, "learning_rate": 7.109387965826016e-07, "loss": 0.0017, "step": 432460 }, { "epoch": 4.620652812650248, "grad_norm": 3.912978172302246, "learning_rate": 7.109235639175564e-07, "loss": 0.004, "step": 432470 }, { "epoch": 4.620759655964528, "grad_norm": 0.0008374331519007683, "learning_rate": 7.109083310143627e-07, "loss": 0.0062, "step": 432480 }, { "epoch": 4.620866499278808, "grad_norm": 0.01614750549197197, "learning_rate": 7.108930978730376e-07, "loss": 0.0016, "step": 432490 }, { "epoch": 4.6209733425930875, "grad_norm": 0.016085360199213028, "learning_rate": 7.108778644935981e-07, "loss": 0.0053, "step": 432500 }, { "epoch": 4.621080185907367, "grad_norm": 0.004520495887845755, "learning_rate": 7.108626308760617e-07, "loss": 0.003, "step": 432510 }, { "epoch": 4.621187029221646, "grad_norm": 0.3859259784221649, "learning_rate": 7.108473970204454e-07, "loss": 0.0044, "step": 432520 }, { "epoch": 4.621293872535926, "grad_norm": 0.006205524317920208, "learning_rate": 7.108321629267666e-07, "loss": 0.0009, "step": 432530 }, { "epoch": 4.621400715850205, "grad_norm": 7.2678022384643555, "learning_rate": 7.108169285950423e-07, "loss": 0.0118, "step": 432540 }, { "epoch": 4.621507559164486, "grad_norm": 0.005889562889933586, "learning_rate": 7.108016940252898e-07, "loss": 0.0004, "step": 432550 }, { "epoch": 4.621614402478765, "grad_norm": 14.01353645324707, "learning_rate": 7.107864592175262e-07, "loss": 0.0217, "step": 432560 }, { "epoch": 4.6217212457930446, "grad_norm": 0.4343382716178894, "learning_rate": 7.107712241717688e-07, "loss": 0.0165, "step": 432570 }, { "epoch": 4.621828089107324, "grad_norm": 0.041858069598674774, "learning_rate": 7.107559888880348e-07, "loss": 0.0009, "step": 432580 }, { "epoch": 4.6219349324216035, "grad_norm": 0.1588905304670334, "learning_rate": 7.107407533663414e-07, "loss": 0.0069, "step": 432590 }, { "epoch": 4.622041775735884, "grad_norm": 0.005292963236570358, "learning_rate": 7.107255176067058e-07, "loss": 0.022, "step": 432600 }, { "epoch": 4.622148619050163, "grad_norm": 2.213700532913208, "learning_rate": 7.107102816091452e-07, "loss": 0.0226, "step": 432610 }, { "epoch": 4.622255462364443, "grad_norm": 0.07409843057394028, "learning_rate": 7.106950453736768e-07, "loss": 0.0208, "step": 432620 }, { "epoch": 4.622362305678722, "grad_norm": 13.58004093170166, "learning_rate": 7.106798089003177e-07, "loss": 0.0098, "step": 432630 }, { "epoch": 4.622469148993002, "grad_norm": 0.2520529627799988, "learning_rate": 7.106645721890852e-07, "loss": 0.004, "step": 432640 }, { "epoch": 4.622575992307281, "grad_norm": 1.410856008529663, "learning_rate": 7.106493352399966e-07, "loss": 0.0082, "step": 432650 }, { "epoch": 4.622682835621561, "grad_norm": 0.01529797911643982, "learning_rate": 7.10634098053069e-07, "loss": 0.0028, "step": 432660 }, { "epoch": 4.622789678935841, "grad_norm": 0.014567752368748188, "learning_rate": 7.106188606283194e-07, "loss": 0.0113, "step": 432670 }, { "epoch": 4.62289652225012, "grad_norm": 0.20595262944698334, "learning_rate": 7.106036229657655e-07, "loss": 0.001, "step": 432680 }, { "epoch": 4.6230033655644, "grad_norm": 0.002494694432243705, "learning_rate": 7.105883850654242e-07, "loss": 0.0157, "step": 432690 }, { "epoch": 4.623110208878679, "grad_norm": 0.0027417312376201153, "learning_rate": 7.105731469273124e-07, "loss": 0.0141, "step": 432700 }, { "epoch": 4.623217052192959, "grad_norm": 0.4733898937702179, "learning_rate": 7.105579085514479e-07, "loss": 0.0057, "step": 432710 }, { "epoch": 4.623323895507239, "grad_norm": 0.0007822780171409249, "learning_rate": 7.105426699378476e-07, "loss": 0.0018, "step": 432720 }, { "epoch": 4.623430738821519, "grad_norm": 0.07549300044775009, "learning_rate": 7.105274310865285e-07, "loss": 0.0026, "step": 432730 }, { "epoch": 4.623537582135798, "grad_norm": 0.014398014172911644, "learning_rate": 7.105121919975083e-07, "loss": 0.0011, "step": 432740 }, { "epoch": 4.6236444254500775, "grad_norm": 0.2655099928379059, "learning_rate": 7.104969526708039e-07, "loss": 0.0005, "step": 432750 }, { "epoch": 4.623751268764357, "grad_norm": 0.005072703119367361, "learning_rate": 7.104817131064326e-07, "loss": 0.0055, "step": 432760 }, { "epoch": 4.623858112078636, "grad_norm": 0.4877771735191345, "learning_rate": 7.104664733044114e-07, "loss": 0.0005, "step": 432770 }, { "epoch": 4.623964955392916, "grad_norm": 7.626477241516113, "learning_rate": 7.104512332647578e-07, "loss": 0.0044, "step": 432780 }, { "epoch": 4.624071798707196, "grad_norm": 2.011894702911377, "learning_rate": 7.104359929874888e-07, "loss": 0.0042, "step": 432790 }, { "epoch": 4.624178642021476, "grad_norm": 1.748201847076416, "learning_rate": 7.104207524726218e-07, "loss": 0.0028, "step": 432800 }, { "epoch": 4.624285485335755, "grad_norm": 0.025346754118800163, "learning_rate": 7.104055117201738e-07, "loss": 0.0167, "step": 432810 }, { "epoch": 4.624392328650035, "grad_norm": 0.053325261920690536, "learning_rate": 7.103902707301621e-07, "loss": 0.0043, "step": 432820 }, { "epoch": 4.624499171964314, "grad_norm": 10.584694862365723, "learning_rate": 7.103750295026039e-07, "loss": 0.0095, "step": 432830 }, { "epoch": 4.624606015278594, "grad_norm": 0.001126263989135623, "learning_rate": 7.103597880375164e-07, "loss": 0.0041, "step": 432840 }, { "epoch": 4.624712858592874, "grad_norm": 0.09813925623893738, "learning_rate": 7.10344546334917e-07, "loss": 0.0132, "step": 432850 }, { "epoch": 4.624819701907153, "grad_norm": 0.0037569866981357336, "learning_rate": 7.103293043948224e-07, "loss": 0.0059, "step": 432860 }, { "epoch": 4.624926545221433, "grad_norm": 0.049625761806964874, "learning_rate": 7.103140622172503e-07, "loss": 0.0009, "step": 432870 }, { "epoch": 4.625033388535712, "grad_norm": 7.227705478668213, "learning_rate": 7.102988198022178e-07, "loss": 0.0154, "step": 432880 }, { "epoch": 4.625140231849992, "grad_norm": 18.623096466064453, "learning_rate": 7.10283577149742e-07, "loss": 0.0207, "step": 432890 }, { "epoch": 4.625247075164271, "grad_norm": 0.005320051219314337, "learning_rate": 7.102683342598401e-07, "loss": 0.0126, "step": 432900 }, { "epoch": 4.6253539184785515, "grad_norm": 0.0012140772305428982, "learning_rate": 7.102530911325295e-07, "loss": 0.0111, "step": 432910 }, { "epoch": 4.625460761792831, "grad_norm": 1.304356575012207, "learning_rate": 7.102378477678274e-07, "loss": 0.0028, "step": 432920 }, { "epoch": 4.62556760510711, "grad_norm": 0.05275352671742439, "learning_rate": 7.102226041657506e-07, "loss": 0.0028, "step": 432930 }, { "epoch": 4.62567444842139, "grad_norm": 1.114595890045166, "learning_rate": 7.102073603263169e-07, "loss": 0.0035, "step": 432940 }, { "epoch": 4.625781291735669, "grad_norm": 0.459503173828125, "learning_rate": 7.10192116249543e-07, "loss": 0.0344, "step": 432950 }, { "epoch": 4.62588813504995, "grad_norm": 2.4479079246520996, "learning_rate": 7.101768719354465e-07, "loss": 0.0039, "step": 432960 }, { "epoch": 4.625994978364229, "grad_norm": 0.09926002472639084, "learning_rate": 7.101616273840443e-07, "loss": 0.0092, "step": 432970 }, { "epoch": 4.626101821678509, "grad_norm": 7.375637531280518, "learning_rate": 7.101463825953539e-07, "loss": 0.0183, "step": 432980 }, { "epoch": 4.626208664992788, "grad_norm": 0.016370829194784164, "learning_rate": 7.101311375693923e-07, "loss": 0.074, "step": 432990 }, { "epoch": 4.6263155083070675, "grad_norm": 3.16184401512146, "learning_rate": 7.101158923061769e-07, "loss": 0.0058, "step": 433000 }, { "epoch": 4.626422351621347, "grad_norm": 0.028977179899811745, "learning_rate": 7.101006468057246e-07, "loss": 0.0013, "step": 433010 }, { "epoch": 4.626529194935626, "grad_norm": 1.2191160917282104, "learning_rate": 7.10085401068053e-07, "loss": 0.0046, "step": 433020 }, { "epoch": 4.626636038249907, "grad_norm": 2.4131710529327393, "learning_rate": 7.100701550931792e-07, "loss": 0.0122, "step": 433030 }, { "epoch": 4.626742881564186, "grad_norm": 11.768003463745117, "learning_rate": 7.100549088811201e-07, "loss": 0.015, "step": 433040 }, { "epoch": 4.626849724878466, "grad_norm": 0.0038798744790256023, "learning_rate": 7.100396624318933e-07, "loss": 0.0067, "step": 433050 }, { "epoch": 4.626956568192745, "grad_norm": 4.656064033508301, "learning_rate": 7.100244157455159e-07, "loss": 0.0064, "step": 433060 }, { "epoch": 4.627063411507025, "grad_norm": 0.9168747663497925, "learning_rate": 7.100091688220049e-07, "loss": 0.0028, "step": 433070 }, { "epoch": 4.627170254821305, "grad_norm": 0.29539257287979126, "learning_rate": 7.099939216613779e-07, "loss": 0.0004, "step": 433080 }, { "epoch": 4.627277098135584, "grad_norm": 0.8404282927513123, "learning_rate": 7.099786742636517e-07, "loss": 0.0057, "step": 433090 }, { "epoch": 4.627383941449864, "grad_norm": 1.7930716276168823, "learning_rate": 7.099634266288438e-07, "loss": 0.0196, "step": 433100 }, { "epoch": 4.627490784764143, "grad_norm": 0.04730702564120293, "learning_rate": 7.099481787569715e-07, "loss": 0.0088, "step": 433110 }, { "epoch": 4.627597628078423, "grad_norm": 11.656712532043457, "learning_rate": 7.099329306480517e-07, "loss": 0.0251, "step": 433120 }, { "epoch": 4.627704471392702, "grad_norm": 0.00395699217915535, "learning_rate": 7.099176823021018e-07, "loss": 0.0133, "step": 433130 }, { "epoch": 4.627811314706983, "grad_norm": 3.191086530685425, "learning_rate": 7.09902433719139e-07, "loss": 0.0618, "step": 433140 }, { "epoch": 4.627918158021262, "grad_norm": 2.0723202228546143, "learning_rate": 7.098871848991805e-07, "loss": 0.0044, "step": 433150 }, { "epoch": 4.6280250013355415, "grad_norm": 7.760772228240967, "learning_rate": 7.098719358422436e-07, "loss": 0.0067, "step": 433160 }, { "epoch": 4.628131844649821, "grad_norm": 0.02519005350768566, "learning_rate": 7.098566865483453e-07, "loss": 0.0012, "step": 433170 }, { "epoch": 4.6282386879641, "grad_norm": 0.0211898572742939, "learning_rate": 7.098414370175032e-07, "loss": 0.0144, "step": 433180 }, { "epoch": 4.62834553127838, "grad_norm": 0.007667499128729105, "learning_rate": 7.09826187249734e-07, "loss": 0.0069, "step": 433190 }, { "epoch": 4.62845237459266, "grad_norm": 0.1611284613609314, "learning_rate": 7.098109372450554e-07, "loss": 0.0039, "step": 433200 }, { "epoch": 4.62855921790694, "grad_norm": 0.004690404515713453, "learning_rate": 7.097956870034843e-07, "loss": 0.0141, "step": 433210 }, { "epoch": 4.628666061221219, "grad_norm": 7.313928127288818, "learning_rate": 7.097804365250382e-07, "loss": 0.0095, "step": 433220 }, { "epoch": 4.628772904535499, "grad_norm": 5.633139133453369, "learning_rate": 7.097651858097339e-07, "loss": 0.0284, "step": 433230 }, { "epoch": 4.628879747849778, "grad_norm": 0.010241126641631126, "learning_rate": 7.09749934857589e-07, "loss": 0.008, "step": 433240 }, { "epoch": 4.6289865911640575, "grad_norm": 0.01711469143629074, "learning_rate": 7.097346836686207e-07, "loss": 0.0009, "step": 433250 }, { "epoch": 4.629093434478338, "grad_norm": 1.5924620628356934, "learning_rate": 7.097194322428462e-07, "loss": 0.0117, "step": 433260 }, { "epoch": 4.629200277792617, "grad_norm": 0.7685497999191284, "learning_rate": 7.097041805802822e-07, "loss": 0.0011, "step": 433270 }, { "epoch": 4.629307121106897, "grad_norm": 0.003004242666065693, "learning_rate": 7.096889286809468e-07, "loss": 0.0053, "step": 433280 }, { "epoch": 4.629413964421176, "grad_norm": 3.414093255996704, "learning_rate": 7.096736765448566e-07, "loss": 0.018, "step": 433290 }, { "epoch": 4.629520807735456, "grad_norm": 4.602948188781738, "learning_rate": 7.096584241720289e-07, "loss": 0.0141, "step": 433300 }, { "epoch": 4.629627651049736, "grad_norm": 0.1653076410293579, "learning_rate": 7.096431715624811e-07, "loss": 0.0077, "step": 433310 }, { "epoch": 4.6297344943640155, "grad_norm": 0.007869316264986992, "learning_rate": 7.096279187162303e-07, "loss": 0.0078, "step": 433320 }, { "epoch": 4.629841337678295, "grad_norm": 0.45109453797340393, "learning_rate": 7.096126656332938e-07, "loss": 0.009, "step": 433330 }, { "epoch": 4.629948180992574, "grad_norm": 6.529263496398926, "learning_rate": 7.095974123136888e-07, "loss": 0.0273, "step": 433340 }, { "epoch": 4.630055024306854, "grad_norm": 6.48617696762085, "learning_rate": 7.095821587574325e-07, "loss": 0.0064, "step": 433350 }, { "epoch": 4.630161867621133, "grad_norm": 0.454965204000473, "learning_rate": 7.095669049645421e-07, "loss": 0.0153, "step": 433360 }, { "epoch": 4.630268710935413, "grad_norm": 0.04409901052713394, "learning_rate": 7.095516509350348e-07, "loss": 0.0008, "step": 433370 }, { "epoch": 4.630375554249693, "grad_norm": 0.01349596492946148, "learning_rate": 7.095363966689278e-07, "loss": 0.0031, "step": 433380 }, { "epoch": 4.630482397563973, "grad_norm": 3.06072735786438, "learning_rate": 7.095211421662386e-07, "loss": 0.0157, "step": 433390 }, { "epoch": 4.630589240878252, "grad_norm": 0.0031072136480361223, "learning_rate": 7.095058874269841e-07, "loss": 0.0139, "step": 433400 }, { "epoch": 4.6306960841925315, "grad_norm": 0.016923828050494194, "learning_rate": 7.094906324511817e-07, "loss": 0.0302, "step": 433410 }, { "epoch": 4.630802927506811, "grad_norm": 4.70621395111084, "learning_rate": 7.094753772388487e-07, "loss": 0.0068, "step": 433420 }, { "epoch": 4.630909770821091, "grad_norm": 2.162731409072876, "learning_rate": 7.094601217900019e-07, "loss": 0.0098, "step": 433430 }, { "epoch": 4.631016614135371, "grad_norm": 0.32868683338165283, "learning_rate": 7.094448661046589e-07, "loss": 0.0096, "step": 433440 }, { "epoch": 4.63112345744965, "grad_norm": 1.7870391607284546, "learning_rate": 7.09429610182837e-07, "loss": 0.0055, "step": 433450 }, { "epoch": 4.63123030076393, "grad_norm": 2.6244688034057617, "learning_rate": 7.094143540245531e-07, "loss": 0.0111, "step": 433460 }, { "epoch": 4.631337144078209, "grad_norm": 8.765929222106934, "learning_rate": 7.093990976298245e-07, "loss": 0.0152, "step": 433470 }, { "epoch": 4.631443987392489, "grad_norm": 0.060753270983695984, "learning_rate": 7.093838409986688e-07, "loss": 0.0067, "step": 433480 }, { "epoch": 4.631550830706768, "grad_norm": 1.9520725011825562, "learning_rate": 7.093685841311027e-07, "loss": 0.0209, "step": 433490 }, { "epoch": 4.631657674021048, "grad_norm": 6.37069845199585, "learning_rate": 7.093533270271438e-07, "loss": 0.0067, "step": 433500 }, { "epoch": 4.631764517335328, "grad_norm": 2.1919848918914795, "learning_rate": 7.09338069686809e-07, "loss": 0.0181, "step": 433510 }, { "epoch": 4.631871360649607, "grad_norm": 1.5330486297607422, "learning_rate": 7.09322812110116e-07, "loss": 0.0054, "step": 433520 }, { "epoch": 4.631978203963887, "grad_norm": 11.762843132019043, "learning_rate": 7.093075542970815e-07, "loss": 0.0028, "step": 433530 }, { "epoch": 4.632085047278166, "grad_norm": 1.0363491773605347, "learning_rate": 7.092922962477231e-07, "loss": 0.0124, "step": 433540 }, { "epoch": 4.632191890592447, "grad_norm": 0.0355009064078331, "learning_rate": 7.092770379620579e-07, "loss": 0.0044, "step": 433550 }, { "epoch": 4.632298733906726, "grad_norm": 0.33626729249954224, "learning_rate": 7.092617794401032e-07, "loss": 0.0006, "step": 433560 }, { "epoch": 4.6324055772210055, "grad_norm": 1.3675035238265991, "learning_rate": 7.092465206818759e-07, "loss": 0.003, "step": 433570 }, { "epoch": 4.632512420535285, "grad_norm": 0.04020067676901817, "learning_rate": 7.092312616873936e-07, "loss": 0.0157, "step": 433580 }, { "epoch": 4.6326192638495645, "grad_norm": 0.029472747817635536, "learning_rate": 7.092160024566735e-07, "loss": 0.0106, "step": 433590 }, { "epoch": 4.632726107163844, "grad_norm": 0.027231264859437943, "learning_rate": 7.092007429897329e-07, "loss": 0.0019, "step": 433600 }, { "epoch": 4.632832950478123, "grad_norm": 0.09681418538093567, "learning_rate": 7.091854832865886e-07, "loss": 0.0291, "step": 433610 }, { "epoch": 4.632939793792404, "grad_norm": 5.102154731750488, "learning_rate": 7.091702233472582e-07, "loss": 0.0045, "step": 433620 }, { "epoch": 4.633046637106683, "grad_norm": 0.016927247866988182, "learning_rate": 7.091549631717588e-07, "loss": 0.003, "step": 433630 }, { "epoch": 4.633153480420963, "grad_norm": 1.531266450881958, "learning_rate": 7.091397027601077e-07, "loss": 0.0032, "step": 433640 }, { "epoch": 4.633260323735242, "grad_norm": 0.9127492904663086, "learning_rate": 7.091244421123221e-07, "loss": 0.0215, "step": 433650 }, { "epoch": 4.633367167049522, "grad_norm": 0.0057043759152293205, "learning_rate": 7.091091812284192e-07, "loss": 0.0085, "step": 433660 }, { "epoch": 4.633474010363802, "grad_norm": 1.6152030229568481, "learning_rate": 7.090939201084162e-07, "loss": 0.0046, "step": 433670 }, { "epoch": 4.633580853678081, "grad_norm": 0.40351662039756775, "learning_rate": 7.090786587523305e-07, "loss": 0.0016, "step": 433680 }, { "epoch": 4.633687696992361, "grad_norm": 0.030090339481830597, "learning_rate": 7.090633971601791e-07, "loss": 0.0053, "step": 433690 }, { "epoch": 4.63379454030664, "grad_norm": 0.00430607283487916, "learning_rate": 7.090481353319793e-07, "loss": 0.0047, "step": 433700 }, { "epoch": 4.63390138362092, "grad_norm": 0.14740383625030518, "learning_rate": 7.090328732677486e-07, "loss": 0.0009, "step": 433710 }, { "epoch": 4.634008226935199, "grad_norm": 0.151087686419487, "learning_rate": 7.090176109675037e-07, "loss": 0.0105, "step": 433720 }, { "epoch": 4.634115070249479, "grad_norm": 0.0016576761845499277, "learning_rate": 7.090023484312624e-07, "loss": 0.0072, "step": 433730 }, { "epoch": 4.634221913563759, "grad_norm": 4.132235527038574, "learning_rate": 7.089870856590417e-07, "loss": 0.005, "step": 433740 }, { "epoch": 4.6343287568780385, "grad_norm": 0.7789948582649231, "learning_rate": 7.089718226508586e-07, "loss": 0.0008, "step": 433750 }, { "epoch": 4.634435600192318, "grad_norm": 0.054168812930583954, "learning_rate": 7.089565594067307e-07, "loss": 0.0188, "step": 433760 }, { "epoch": 4.634542443506597, "grad_norm": 0.5188396573066711, "learning_rate": 7.089412959266751e-07, "loss": 0.0028, "step": 433770 }, { "epoch": 4.634649286820877, "grad_norm": 0.004486431367695332, "learning_rate": 7.089260322107088e-07, "loss": 0.0032, "step": 433780 }, { "epoch": 4.634756130135157, "grad_norm": 1.1898114681243896, "learning_rate": 7.089107682588495e-07, "loss": 0.002, "step": 433790 }, { "epoch": 4.634862973449437, "grad_norm": 0.006680797319859266, "learning_rate": 7.088955040711139e-07, "loss": 0.0004, "step": 433800 }, { "epoch": 4.634969816763716, "grad_norm": 0.005433911457657814, "learning_rate": 7.088802396475196e-07, "loss": 0.0067, "step": 433810 }, { "epoch": 4.635076660077996, "grad_norm": 0.034032367169857025, "learning_rate": 7.088649749880837e-07, "loss": 0.0038, "step": 433820 }, { "epoch": 4.635183503392275, "grad_norm": 0.023212917149066925, "learning_rate": 7.088497100928237e-07, "loss": 0.0016, "step": 433830 }, { "epoch": 4.6352903467065545, "grad_norm": 0.06738433986902237, "learning_rate": 7.088344449617565e-07, "loss": 0.0181, "step": 433840 }, { "epoch": 4.635397190020835, "grad_norm": 3.9641530513763428, "learning_rate": 7.088191795948994e-07, "loss": 0.01, "step": 433850 }, { "epoch": 4.635504033335114, "grad_norm": 0.0024536545388400555, "learning_rate": 7.088039139922697e-07, "loss": 0.0003, "step": 433860 }, { "epoch": 4.635610876649394, "grad_norm": 13.988534927368164, "learning_rate": 7.087886481538846e-07, "loss": 0.0052, "step": 433870 }, { "epoch": 4.635717719963673, "grad_norm": 0.40851154923439026, "learning_rate": 7.087733820797615e-07, "loss": 0.0035, "step": 433880 }, { "epoch": 4.635824563277953, "grad_norm": 10.697514533996582, "learning_rate": 7.087581157699174e-07, "loss": 0.0686, "step": 433890 }, { "epoch": 4.635931406592232, "grad_norm": 3.4515960216522217, "learning_rate": 7.087428492243696e-07, "loss": 0.0287, "step": 433900 }, { "epoch": 4.6360382499065125, "grad_norm": 0.007122702896595001, "learning_rate": 7.087275824431353e-07, "loss": 0.0112, "step": 433910 }, { "epoch": 4.636145093220792, "grad_norm": 0.0033859899267554283, "learning_rate": 7.08712315426232e-07, "loss": 0.0005, "step": 433920 }, { "epoch": 4.636251936535071, "grad_norm": 1.2893588542938232, "learning_rate": 7.086970481736766e-07, "loss": 0.0057, "step": 433930 }, { "epoch": 4.636358779849351, "grad_norm": 0.0005599219002760947, "learning_rate": 7.086817806854866e-07, "loss": 0.0223, "step": 433940 }, { "epoch": 4.63646562316363, "grad_norm": 5.583122730255127, "learning_rate": 7.08666512961679e-07, "loss": 0.0171, "step": 433950 }, { "epoch": 4.63657246647791, "grad_norm": 4.7725043296813965, "learning_rate": 7.086512450022713e-07, "loss": 0.0169, "step": 433960 }, { "epoch": 4.63667930979219, "grad_norm": 5.324432373046875, "learning_rate": 7.086359768072805e-07, "loss": 0.0246, "step": 433970 }, { "epoch": 4.63678615310647, "grad_norm": 2.1159274578094482, "learning_rate": 7.086207083767239e-07, "loss": 0.0008, "step": 433980 }, { "epoch": 4.636892996420749, "grad_norm": 0.0005737933097407222, "learning_rate": 7.086054397106188e-07, "loss": 0.0007, "step": 433990 }, { "epoch": 4.6369998397350285, "grad_norm": 0.003494572127237916, "learning_rate": 7.085901708089825e-07, "loss": 0.0008, "step": 434000 }, { "epoch": 4.637106683049308, "grad_norm": 0.001960202120244503, "learning_rate": 7.08574901671832e-07, "loss": 0.0095, "step": 434010 }, { "epoch": 4.637213526363587, "grad_norm": 0.011630439199507236, "learning_rate": 7.085596322991848e-07, "loss": 0.008, "step": 434020 }, { "epoch": 4.637320369677868, "grad_norm": 0.0006643160595558584, "learning_rate": 7.085443626910581e-07, "loss": 0.0022, "step": 434030 }, { "epoch": 4.637427212992147, "grad_norm": 0.9984611868858337, "learning_rate": 7.085290928474689e-07, "loss": 0.0101, "step": 434040 }, { "epoch": 4.637534056306427, "grad_norm": 0.7854799628257751, "learning_rate": 7.085138227684348e-07, "loss": 0.0081, "step": 434050 }, { "epoch": 4.637640899620706, "grad_norm": 0.36694467067718506, "learning_rate": 7.084985524539727e-07, "loss": 0.0145, "step": 434060 }, { "epoch": 4.637747742934986, "grad_norm": 0.017748340964317322, "learning_rate": 7.084832819041e-07, "loss": 0.0011, "step": 434070 }, { "epoch": 4.637854586249265, "grad_norm": 8.272384643554688, "learning_rate": 7.08468011118834e-07, "loss": 0.0132, "step": 434080 }, { "epoch": 4.637961429563545, "grad_norm": 0.000768483558204025, "learning_rate": 7.084527400981919e-07, "loss": 0.0263, "step": 434090 }, { "epoch": 4.638068272877825, "grad_norm": 5.410109519958496, "learning_rate": 7.084374688421909e-07, "loss": 0.0375, "step": 434100 }, { "epoch": 4.638175116192104, "grad_norm": 0.05118139833211899, "learning_rate": 7.084221973508483e-07, "loss": 0.0034, "step": 434110 }, { "epoch": 4.638281959506384, "grad_norm": 0.032774053514003754, "learning_rate": 7.084069256241812e-07, "loss": 0.0112, "step": 434120 }, { "epoch": 4.638388802820663, "grad_norm": 0.055407267063856125, "learning_rate": 7.083916536622071e-07, "loss": 0.0052, "step": 434130 }, { "epoch": 4.638495646134944, "grad_norm": 0.8091071248054504, "learning_rate": 7.083763814649429e-07, "loss": 0.0052, "step": 434140 }, { "epoch": 4.638602489449223, "grad_norm": 8.125600814819336, "learning_rate": 7.083611090324062e-07, "loss": 0.0049, "step": 434150 }, { "epoch": 4.6387093327635025, "grad_norm": 0.588583767414093, "learning_rate": 7.08345836364614e-07, "loss": 0.0172, "step": 434160 }, { "epoch": 4.638816176077782, "grad_norm": 0.01678738184273243, "learning_rate": 7.083305634615837e-07, "loss": 0.003, "step": 434170 }, { "epoch": 4.638923019392061, "grad_norm": 0.6414116024971008, "learning_rate": 7.083152903233323e-07, "loss": 0.0101, "step": 434180 }, { "epoch": 4.639029862706341, "grad_norm": 0.021000919863581657, "learning_rate": 7.083000169498775e-07, "loss": 0.0305, "step": 434190 }, { "epoch": 4.63913670602062, "grad_norm": 0.006322518456727266, "learning_rate": 7.082847433412362e-07, "loss": 0.052, "step": 434200 }, { "epoch": 4.639243549334901, "grad_norm": 0.0021007603500038385, "learning_rate": 7.082694694974255e-07, "loss": 0.0053, "step": 434210 }, { "epoch": 4.63935039264918, "grad_norm": 0.024479445070028305, "learning_rate": 7.08254195418463e-07, "loss": 0.0092, "step": 434220 }, { "epoch": 4.63945723596346, "grad_norm": 0.855231523513794, "learning_rate": 7.082389211043658e-07, "loss": 0.001, "step": 434230 }, { "epoch": 4.639564079277739, "grad_norm": 0.004330261144787073, "learning_rate": 7.082236465551511e-07, "loss": 0.0013, "step": 434240 }, { "epoch": 4.6396709225920185, "grad_norm": 0.22988907992839813, "learning_rate": 7.082083717708362e-07, "loss": 0.0003, "step": 434250 }, { "epoch": 4.639777765906299, "grad_norm": 0.0011063843267038465, "learning_rate": 7.081930967514384e-07, "loss": 0.0028, "step": 434260 }, { "epoch": 4.639884609220578, "grad_norm": 0.010967842303216457, "learning_rate": 7.081778214969747e-07, "loss": 0.0051, "step": 434270 }, { "epoch": 4.639991452534858, "grad_norm": 0.08278624713420868, "learning_rate": 7.081625460074627e-07, "loss": 0.0013, "step": 434280 }, { "epoch": 4.640098295849137, "grad_norm": 0.5659107565879822, "learning_rate": 7.081472702829195e-07, "loss": 0.0004, "step": 434290 }, { "epoch": 4.640205139163417, "grad_norm": 5.391555309295654, "learning_rate": 7.081319943233621e-07, "loss": 0.001, "step": 434300 }, { "epoch": 4.640311982477696, "grad_norm": 0.5789682865142822, "learning_rate": 7.081167181288081e-07, "loss": 0.0017, "step": 434310 }, { "epoch": 4.640418825791976, "grad_norm": 0.23422344028949738, "learning_rate": 7.081014416992747e-07, "loss": 0.0215, "step": 434320 }, { "epoch": 4.640525669106256, "grad_norm": 0.13426309823989868, "learning_rate": 7.080861650347789e-07, "loss": 0.0022, "step": 434330 }, { "epoch": 4.640632512420535, "grad_norm": 0.004619426093995571, "learning_rate": 7.080708881353383e-07, "loss": 0.0123, "step": 434340 }, { "epoch": 4.640739355734815, "grad_norm": 0.025136420503258705, "learning_rate": 7.080556110009698e-07, "loss": 0.0161, "step": 434350 }, { "epoch": 4.640846199049094, "grad_norm": 0.026929669082164764, "learning_rate": 7.080403336316909e-07, "loss": 0.0053, "step": 434360 }, { "epoch": 4.640953042363374, "grad_norm": 0.4754160940647125, "learning_rate": 7.080250560275188e-07, "loss": 0.0018, "step": 434370 }, { "epoch": 4.641059885677654, "grad_norm": 6.625864028930664, "learning_rate": 7.080097781884706e-07, "loss": 0.0155, "step": 434380 }, { "epoch": 4.641166728991934, "grad_norm": 0.012362482957541943, "learning_rate": 7.079945001145637e-07, "loss": 0.0054, "step": 434390 }, { "epoch": 4.641273572306213, "grad_norm": 0.0138162262737751, "learning_rate": 7.079792218058154e-07, "loss": 0.0037, "step": 434400 }, { "epoch": 4.6413804156204925, "grad_norm": 0.025496967136859894, "learning_rate": 7.079639432622428e-07, "loss": 0.0108, "step": 434410 }, { "epoch": 4.641487258934772, "grad_norm": 0.0019424764905124903, "learning_rate": 7.079486644838632e-07, "loss": 0.0019, "step": 434420 }, { "epoch": 4.6415941022490514, "grad_norm": 0.16738875210285187, "learning_rate": 7.079333854706937e-07, "loss": 0.0005, "step": 434430 }, { "epoch": 4.641700945563331, "grad_norm": 0.06394660472869873, "learning_rate": 7.079181062227519e-07, "loss": 0.0183, "step": 434440 }, { "epoch": 4.641807788877611, "grad_norm": 8.502313613891602, "learning_rate": 7.079028267400548e-07, "loss": 0.0119, "step": 434450 }, { "epoch": 4.641914632191891, "grad_norm": 0.14161823689937592, "learning_rate": 7.078875470226199e-07, "loss": 0.0181, "step": 434460 }, { "epoch": 4.64202147550617, "grad_norm": 3.9731719493865967, "learning_rate": 7.078722670704639e-07, "loss": 0.0074, "step": 434470 }, { "epoch": 4.64212831882045, "grad_norm": 1.3061827421188354, "learning_rate": 7.078569868836048e-07, "loss": 0.0008, "step": 434480 }, { "epoch": 4.642235162134729, "grad_norm": 0.0010602261172607541, "learning_rate": 7.078417064620593e-07, "loss": 0.0038, "step": 434490 }, { "epoch": 4.642342005449009, "grad_norm": 0.0057583763264119625, "learning_rate": 7.078264258058449e-07, "loss": 0.0008, "step": 434500 }, { "epoch": 4.642448848763289, "grad_norm": 0.014160346239805222, "learning_rate": 7.078111449149788e-07, "loss": 0.0025, "step": 434510 }, { "epoch": 4.642555692077568, "grad_norm": 0.004250918049365282, "learning_rate": 7.077958637894781e-07, "loss": 0.0014, "step": 434520 }, { "epoch": 4.642662535391848, "grad_norm": 0.0065642767585814, "learning_rate": 7.077805824293603e-07, "loss": 0.0174, "step": 434530 }, { "epoch": 4.642769378706127, "grad_norm": 0.0042130062356591225, "learning_rate": 7.077653008346425e-07, "loss": 0.0066, "step": 434540 }, { "epoch": 4.642876222020407, "grad_norm": 0.6750319004058838, "learning_rate": 7.077500190053419e-07, "loss": 0.0073, "step": 434550 }, { "epoch": 4.642983065334686, "grad_norm": 4.574824333190918, "learning_rate": 7.077347369414759e-07, "loss": 0.007, "step": 434560 }, { "epoch": 4.6430899086489665, "grad_norm": 0.01844867132604122, "learning_rate": 7.077194546430618e-07, "loss": 0.0019, "step": 434570 }, { "epoch": 4.643196751963246, "grad_norm": 0.13424913585186005, "learning_rate": 7.077041721101167e-07, "loss": 0.0166, "step": 434580 }, { "epoch": 4.6433035952775255, "grad_norm": 0.0175623781979084, "learning_rate": 7.076888893426579e-07, "loss": 0.0007, "step": 434590 }, { "epoch": 4.643410438591805, "grad_norm": 0.9778088331222534, "learning_rate": 7.076736063407027e-07, "loss": 0.0232, "step": 434600 }, { "epoch": 4.643517281906084, "grad_norm": 2.525087594985962, "learning_rate": 7.076583231042683e-07, "loss": 0.0056, "step": 434610 }, { "epoch": 4.643624125220365, "grad_norm": 2.9345736503601074, "learning_rate": 7.07643039633372e-07, "loss": 0.0034, "step": 434620 }, { "epoch": 4.643730968534644, "grad_norm": 0.10358395427465439, "learning_rate": 7.07627755928031e-07, "loss": 0.0033, "step": 434630 }, { "epoch": 4.643837811848924, "grad_norm": 0.17279653251171112, "learning_rate": 7.076124719882626e-07, "loss": 0.0154, "step": 434640 }, { "epoch": 4.643944655163203, "grad_norm": 5.231759071350098, "learning_rate": 7.075971878140842e-07, "loss": 0.0066, "step": 434650 }, { "epoch": 4.6440514984774826, "grad_norm": 0.006538964807987213, "learning_rate": 7.075819034055127e-07, "loss": 0.0032, "step": 434660 }, { "epoch": 4.644158341791762, "grad_norm": 0.10251042246818542, "learning_rate": 7.075666187625656e-07, "loss": 0.0171, "step": 434670 }, { "epoch": 4.644265185106042, "grad_norm": 0.11274472624063492, "learning_rate": 7.075513338852603e-07, "loss": 0.0051, "step": 434680 }, { "epoch": 4.644372028420322, "grad_norm": 0.35843735933303833, "learning_rate": 7.075360487736136e-07, "loss": 0.0032, "step": 434690 }, { "epoch": 4.644478871734601, "grad_norm": 0.004525704309344292, "learning_rate": 7.075207634276433e-07, "loss": 0.0032, "step": 434700 }, { "epoch": 4.644585715048881, "grad_norm": 0.011575448326766491, "learning_rate": 7.075054778473662e-07, "loss": 0.0238, "step": 434710 }, { "epoch": 4.64469255836316, "grad_norm": 0.10384362936019897, "learning_rate": 7.074901920327998e-07, "loss": 0.0025, "step": 434720 }, { "epoch": 4.64479940167744, "grad_norm": 0.03773055598139763, "learning_rate": 7.074749059839614e-07, "loss": 0.0028, "step": 434730 }, { "epoch": 4.64490624499172, "grad_norm": 0.30481207370758057, "learning_rate": 7.07459619700868e-07, "loss": 0.0051, "step": 434740 }, { "epoch": 4.6450130883059995, "grad_norm": 0.01594451256096363, "learning_rate": 7.074443331835371e-07, "loss": 0.0054, "step": 434750 }, { "epoch": 4.645119931620279, "grad_norm": 6.279111385345459, "learning_rate": 7.074290464319861e-07, "loss": 0.0133, "step": 434760 }, { "epoch": 4.645226774934558, "grad_norm": 0.03126717358827591, "learning_rate": 7.074137594462317e-07, "loss": 0.0033, "step": 434770 }, { "epoch": 4.645333618248838, "grad_norm": 0.0297484640032053, "learning_rate": 7.073984722262918e-07, "loss": 0.0036, "step": 434780 }, { "epoch": 4.645440461563117, "grad_norm": 0.00044846319360658526, "learning_rate": 7.073831847721832e-07, "loss": 0.0032, "step": 434790 }, { "epoch": 4.645547304877398, "grad_norm": 0.9332497119903564, "learning_rate": 7.073678970839234e-07, "loss": 0.0116, "step": 434800 }, { "epoch": 4.645654148191677, "grad_norm": 1.7068113088607788, "learning_rate": 7.073526091615296e-07, "loss": 0.0156, "step": 434810 }, { "epoch": 4.645760991505957, "grad_norm": 0.1724313348531723, "learning_rate": 7.07337321005019e-07, "loss": 0.0188, "step": 434820 }, { "epoch": 4.645867834820236, "grad_norm": 3.760982036590576, "learning_rate": 7.073220326144091e-07, "loss": 0.0059, "step": 434830 }, { "epoch": 4.6459746781345155, "grad_norm": 3.348395347595215, "learning_rate": 7.073067439897168e-07, "loss": 0.0249, "step": 434840 }, { "epoch": 4.646081521448796, "grad_norm": 0.03629400581121445, "learning_rate": 7.072914551309596e-07, "loss": 0.004, "step": 434850 }, { "epoch": 4.646188364763075, "grad_norm": 11.723379135131836, "learning_rate": 7.072761660381547e-07, "loss": 0.0077, "step": 434860 }, { "epoch": 4.646295208077355, "grad_norm": 0.007438425440341234, "learning_rate": 7.072608767113193e-07, "loss": 0.0008, "step": 434870 }, { "epoch": 4.646402051391634, "grad_norm": 4.805665016174316, "learning_rate": 7.072455871504708e-07, "loss": 0.018, "step": 434880 }, { "epoch": 4.646508894705914, "grad_norm": 0.01921282336115837, "learning_rate": 7.072302973556263e-07, "loss": 0.0082, "step": 434890 }, { "epoch": 4.646615738020193, "grad_norm": 0.720005452632904, "learning_rate": 7.072150073268033e-07, "loss": 0.0012, "step": 434900 }, { "epoch": 4.646722581334473, "grad_norm": 1.0735293626785278, "learning_rate": 7.071997170640189e-07, "loss": 0.0056, "step": 434910 }, { "epoch": 4.646829424648753, "grad_norm": 0.9604373574256897, "learning_rate": 7.071844265672902e-07, "loss": 0.003, "step": 434920 }, { "epoch": 4.646936267963032, "grad_norm": 0.0010384288616478443, "learning_rate": 7.071691358366348e-07, "loss": 0.0218, "step": 434930 }, { "epoch": 4.647043111277312, "grad_norm": 0.014971546828746796, "learning_rate": 7.071538448720697e-07, "loss": 0.0014, "step": 434940 }, { "epoch": 4.647149954591591, "grad_norm": 0.9705054759979248, "learning_rate": 7.071385536736124e-07, "loss": 0.0034, "step": 434950 }, { "epoch": 4.647256797905871, "grad_norm": 0.1332567036151886, "learning_rate": 7.071232622412801e-07, "loss": 0.0096, "step": 434960 }, { "epoch": 4.647363641220151, "grad_norm": 0.02429969049990177, "learning_rate": 7.071079705750898e-07, "loss": 0.0041, "step": 434970 }, { "epoch": 4.647470484534431, "grad_norm": 1.0381085872650146, "learning_rate": 7.070926786750591e-07, "loss": 0.0065, "step": 434980 }, { "epoch": 4.64757732784871, "grad_norm": 0.0010073388693854213, "learning_rate": 7.07077386541205e-07, "loss": 0.0142, "step": 434990 }, { "epoch": 4.6476841711629895, "grad_norm": 6.412327289581299, "learning_rate": 7.070620941735451e-07, "loss": 0.0253, "step": 435000 }, { "epoch": 4.647791014477269, "grad_norm": 0.6240739226341248, "learning_rate": 7.070468015720963e-07, "loss": 0.0053, "step": 435010 }, { "epoch": 4.647897857791548, "grad_norm": 8.998980522155762, "learning_rate": 7.070315087368762e-07, "loss": 0.0072, "step": 435020 }, { "epoch": 4.648004701105828, "grad_norm": 0.0021128742955625057, "learning_rate": 7.070162156679018e-07, "loss": 0.0212, "step": 435030 }, { "epoch": 4.648111544420108, "grad_norm": 0.3397352993488312, "learning_rate": 7.070009223651904e-07, "loss": 0.0094, "step": 435040 }, { "epoch": 4.648218387734388, "grad_norm": 0.5243175029754639, "learning_rate": 7.069856288287594e-07, "loss": 0.0048, "step": 435050 }, { "epoch": 4.648325231048667, "grad_norm": 0.023849403485655785, "learning_rate": 7.06970335058626e-07, "loss": 0.0095, "step": 435060 }, { "epoch": 4.648432074362947, "grad_norm": 0.0022672133054584265, "learning_rate": 7.069550410548075e-07, "loss": 0.0018, "step": 435070 }, { "epoch": 4.648538917677226, "grad_norm": 6.716836929321289, "learning_rate": 7.069397468173212e-07, "loss": 0.0165, "step": 435080 }, { "epoch": 4.648645760991506, "grad_norm": 0.05075448006391525, "learning_rate": 7.069244523461841e-07, "loss": 0.008, "step": 435090 }, { "epoch": 4.648752604305786, "grad_norm": 0.2913814187049866, "learning_rate": 7.069091576414139e-07, "loss": 0.002, "step": 435100 }, { "epoch": 4.648859447620065, "grad_norm": 0.6644357442855835, "learning_rate": 7.068938627030275e-07, "loss": 0.0004, "step": 435110 }, { "epoch": 4.648966290934345, "grad_norm": 0.002591392956674099, "learning_rate": 7.068785675310424e-07, "loss": 0.0039, "step": 435120 }, { "epoch": 4.649073134248624, "grad_norm": 9.100061416625977, "learning_rate": 7.068632721254757e-07, "loss": 0.0234, "step": 435130 }, { "epoch": 4.649179977562904, "grad_norm": 1.6327025890350342, "learning_rate": 7.068479764863448e-07, "loss": 0.0009, "step": 435140 }, { "epoch": 4.649286820877183, "grad_norm": 1.9195728302001953, "learning_rate": 7.068326806136668e-07, "loss": 0.0078, "step": 435150 }, { "epoch": 4.6493936641914635, "grad_norm": 11.811134338378906, "learning_rate": 7.068173845074595e-07, "loss": 0.0255, "step": 435160 }, { "epoch": 4.649500507505743, "grad_norm": 0.2928800880908966, "learning_rate": 7.068020881677394e-07, "loss": 0.018, "step": 435170 }, { "epoch": 4.649607350820022, "grad_norm": 0.0018432391807436943, "learning_rate": 7.067867915945242e-07, "loss": 0.0032, "step": 435180 }, { "epoch": 4.649714194134302, "grad_norm": 1.7077285051345825, "learning_rate": 7.067714947878312e-07, "loss": 0.0066, "step": 435190 }, { "epoch": 4.649821037448581, "grad_norm": 0.0780881717801094, "learning_rate": 7.067561977476774e-07, "loss": 0.0172, "step": 435200 }, { "epoch": 4.649927880762862, "grad_norm": 0.003937901463359594, "learning_rate": 7.067409004740803e-07, "loss": 0.0041, "step": 435210 }, { "epoch": 4.650034724077141, "grad_norm": 0.0012848653132095933, "learning_rate": 7.067256029670572e-07, "loss": 0.004, "step": 435220 }, { "epoch": 4.650141567391421, "grad_norm": 0.08302784711122513, "learning_rate": 7.067103052266253e-07, "loss": 0.0042, "step": 435230 }, { "epoch": 4.6502484107057, "grad_norm": 0.1398114114999771, "learning_rate": 7.066950072528017e-07, "loss": 0.0321, "step": 435240 }, { "epoch": 4.6503552540199795, "grad_norm": 0.008215324953198433, "learning_rate": 7.066797090456041e-07, "loss": 0.002, "step": 435250 }, { "epoch": 4.650462097334259, "grad_norm": 0.019498856738209724, "learning_rate": 7.066644106050494e-07, "loss": 0.0055, "step": 435260 }, { "epoch": 4.650568940648538, "grad_norm": 0.028461921960115433, "learning_rate": 7.066491119311549e-07, "loss": 0.001, "step": 435270 }, { "epoch": 4.650675783962819, "grad_norm": 1.3058762550354004, "learning_rate": 7.06633813023938e-07, "loss": 0.0007, "step": 435280 }, { "epoch": 4.650782627277098, "grad_norm": 1.2652162313461304, "learning_rate": 7.066185138834158e-07, "loss": 0.0055, "step": 435290 }, { "epoch": 4.650889470591378, "grad_norm": 0.0058450051583349705, "learning_rate": 7.066032145096059e-07, "loss": 0.0193, "step": 435300 }, { "epoch": 4.650996313905657, "grad_norm": 0.037974659353494644, "learning_rate": 7.065879149025255e-07, "loss": 0.0776, "step": 435310 }, { "epoch": 4.651103157219937, "grad_norm": 2.4329946041107178, "learning_rate": 7.065726150621915e-07, "loss": 0.0009, "step": 435320 }, { "epoch": 4.651210000534217, "grad_norm": 0.8957452178001404, "learning_rate": 7.065573149886215e-07, "loss": 0.0615, "step": 435330 }, { "epoch": 4.651316843848496, "grad_norm": 0.021722381934523582, "learning_rate": 7.065420146818327e-07, "loss": 0.0116, "step": 435340 }, { "epoch": 4.651423687162776, "grad_norm": 0.28921830654144287, "learning_rate": 7.065267141418423e-07, "loss": 0.023, "step": 435350 }, { "epoch": 4.651530530477055, "grad_norm": 1.8429996967315674, "learning_rate": 7.065114133686678e-07, "loss": 0.0007, "step": 435360 }, { "epoch": 4.651637373791335, "grad_norm": 0.39401698112487793, "learning_rate": 7.064961123623262e-07, "loss": 0.0131, "step": 435370 }, { "epoch": 4.651744217105614, "grad_norm": 0.0036055725067853928, "learning_rate": 7.06480811122835e-07, "loss": 0.0071, "step": 435380 }, { "epoch": 4.651851060419895, "grad_norm": 0.021459713578224182, "learning_rate": 7.064655096502112e-07, "loss": 0.0072, "step": 435390 }, { "epoch": 4.651957903734174, "grad_norm": 0.016054116189479828, "learning_rate": 7.064502079444725e-07, "loss": 0.0169, "step": 435400 }, { "epoch": 4.6520647470484535, "grad_norm": 0.026098458096385002, "learning_rate": 7.064349060056359e-07, "loss": 0.0147, "step": 435410 }, { "epoch": 4.652171590362733, "grad_norm": 0.8476215600967407, "learning_rate": 7.064196038337185e-07, "loss": 0.0036, "step": 435420 }, { "epoch": 4.652278433677012, "grad_norm": 0.16795291006565094, "learning_rate": 7.06404301428738e-07, "loss": 0.01, "step": 435430 }, { "epoch": 4.652385276991292, "grad_norm": 4.786403656005859, "learning_rate": 7.063889987907113e-07, "loss": 0.0243, "step": 435440 }, { "epoch": 4.652492120305572, "grad_norm": 0.004482385702431202, "learning_rate": 7.06373695919656e-07, "loss": 0.0031, "step": 435450 }, { "epoch": 4.652598963619852, "grad_norm": 0.613231897354126, "learning_rate": 7.063583928155891e-07, "loss": 0.0191, "step": 435460 }, { "epoch": 4.652705806934131, "grad_norm": 0.0009634863818064332, "learning_rate": 7.063430894785281e-07, "loss": 0.0048, "step": 435470 }, { "epoch": 4.652812650248411, "grad_norm": 0.024514146149158478, "learning_rate": 7.063277859084902e-07, "loss": 0.01, "step": 435480 }, { "epoch": 4.65291949356269, "grad_norm": 0.017012380063533783, "learning_rate": 7.063124821054925e-07, "loss": 0.0049, "step": 435490 }, { "epoch": 4.6530263368769695, "grad_norm": 0.6156657934188843, "learning_rate": 7.062971780695526e-07, "loss": 0.0109, "step": 435500 }, { "epoch": 4.65313318019125, "grad_norm": 0.07361530512571335, "learning_rate": 7.062818738006876e-07, "loss": 0.0001, "step": 435510 }, { "epoch": 4.653240023505529, "grad_norm": 0.06605064123868942, "learning_rate": 7.062665692989147e-07, "loss": 0.0318, "step": 435520 }, { "epoch": 4.653346866819809, "grad_norm": 3.7684266567230225, "learning_rate": 7.062512645642514e-07, "loss": 0.0087, "step": 435530 }, { "epoch": 4.653453710134088, "grad_norm": 0.002973160007968545, "learning_rate": 7.062359595967148e-07, "loss": 0.0001, "step": 435540 }, { "epoch": 4.653560553448368, "grad_norm": 0.19606539607048035, "learning_rate": 7.062206543963221e-07, "loss": 0.0232, "step": 435550 }, { "epoch": 4.653667396762648, "grad_norm": 0.1925465315580368, "learning_rate": 7.062053489630909e-07, "loss": 0.0245, "step": 435560 }, { "epoch": 4.6537742400769275, "grad_norm": 0.012642374262213707, "learning_rate": 7.061900432970383e-07, "loss": 0.01, "step": 435570 }, { "epoch": 4.653881083391207, "grad_norm": 0.16330616176128387, "learning_rate": 7.061747373981815e-07, "loss": 0.0028, "step": 435580 }, { "epoch": 4.6539879267054864, "grad_norm": 0.5742022395133972, "learning_rate": 7.061594312665378e-07, "loss": 0.0047, "step": 435590 }, { "epoch": 4.654094770019766, "grad_norm": 0.2985851466655731, "learning_rate": 7.061441249021247e-07, "loss": 0.0178, "step": 435600 }, { "epoch": 4.654201613334045, "grad_norm": 1.1144059896469116, "learning_rate": 7.061288183049592e-07, "loss": 0.0029, "step": 435610 }, { "epoch": 4.654308456648325, "grad_norm": 1.0262596607208252, "learning_rate": 7.061135114750587e-07, "loss": 0.0011, "step": 435620 }, { "epoch": 4.654415299962605, "grad_norm": 9.074357032775879, "learning_rate": 7.060982044124406e-07, "loss": 0.0072, "step": 435630 }, { "epoch": 4.654522143276885, "grad_norm": 2.4968199729919434, "learning_rate": 7.06082897117122e-07, "loss": 0.0076, "step": 435640 }, { "epoch": 4.654628986591164, "grad_norm": 0.004472328815609217, "learning_rate": 7.060675895891203e-07, "loss": 0.0176, "step": 435650 }, { "epoch": 4.6547358299054435, "grad_norm": 0.06225857138633728, "learning_rate": 7.060522818284526e-07, "loss": 0.0021, "step": 435660 }, { "epoch": 4.654842673219723, "grad_norm": 6.675610542297363, "learning_rate": 7.060369738351363e-07, "loss": 0.0059, "step": 435670 }, { "epoch": 4.654949516534003, "grad_norm": 0.023579386994242668, "learning_rate": 7.06021665609189e-07, "loss": 0.0083, "step": 435680 }, { "epoch": 4.655056359848283, "grad_norm": 4.018362522125244, "learning_rate": 7.060063571506274e-07, "loss": 0.0061, "step": 435690 }, { "epoch": 4.655163203162562, "grad_norm": 0.02185428887605667, "learning_rate": 7.059910484594693e-07, "loss": 0.0081, "step": 435700 }, { "epoch": 4.655270046476842, "grad_norm": 0.0013290825299918652, "learning_rate": 7.059757395357315e-07, "loss": 0.0074, "step": 435710 }, { "epoch": 4.655376889791121, "grad_norm": 0.0014027705183252692, "learning_rate": 7.059604303794317e-07, "loss": 0.0037, "step": 435720 }, { "epoch": 4.655483733105401, "grad_norm": 0.002546765608713031, "learning_rate": 7.059451209905871e-07, "loss": 0.0063, "step": 435730 }, { "epoch": 4.65559057641968, "grad_norm": 0.9532309770584106, "learning_rate": 7.059298113692148e-07, "loss": 0.0028, "step": 435740 }, { "epoch": 4.6556974197339605, "grad_norm": 2.0117740631103516, "learning_rate": 7.059145015153323e-07, "loss": 0.0014, "step": 435750 }, { "epoch": 4.65580426304824, "grad_norm": 0.0030378373339772224, "learning_rate": 7.058991914289567e-07, "loss": 0.0037, "step": 435760 }, { "epoch": 4.655911106362519, "grad_norm": 0.037665970623493195, "learning_rate": 7.058838811101055e-07, "loss": 0.0083, "step": 435770 }, { "epoch": 4.656017949676799, "grad_norm": 0.13898015022277832, "learning_rate": 7.058685705587957e-07, "loss": 0.0001, "step": 435780 }, { "epoch": 4.656124792991078, "grad_norm": 7.093338489532471, "learning_rate": 7.058532597750448e-07, "loss": 0.0136, "step": 435790 }, { "epoch": 4.656231636305359, "grad_norm": 0.0013434041757136583, "learning_rate": 7.0583794875887e-07, "loss": 0.032, "step": 435800 }, { "epoch": 4.656338479619638, "grad_norm": 0.7127428650856018, "learning_rate": 7.058226375102887e-07, "loss": 0.0049, "step": 435810 }, { "epoch": 4.6564453229339176, "grad_norm": 0.007376994471997023, "learning_rate": 7.05807326029318e-07, "loss": 0.0044, "step": 435820 }, { "epoch": 4.656552166248197, "grad_norm": 0.01167552825063467, "learning_rate": 7.057920143159755e-07, "loss": 0.0294, "step": 435830 }, { "epoch": 4.6566590095624765, "grad_norm": 0.013311533257365227, "learning_rate": 7.057767023702781e-07, "loss": 0.0098, "step": 435840 }, { "epoch": 4.656765852876756, "grad_norm": 0.4169599115848541, "learning_rate": 7.057613901922433e-07, "loss": 0.0011, "step": 435850 }, { "epoch": 4.656872696191035, "grad_norm": 0.009956269524991512, "learning_rate": 7.057460777818883e-07, "loss": 0.0108, "step": 435860 }, { "epoch": 4.656979539505316, "grad_norm": 0.0027993437834084034, "learning_rate": 7.057307651392306e-07, "loss": 0.0021, "step": 435870 }, { "epoch": 4.657086382819595, "grad_norm": 2.2087900638580322, "learning_rate": 7.057154522642873e-07, "loss": 0.0067, "step": 435880 }, { "epoch": 4.657193226133875, "grad_norm": 0.10441868752241135, "learning_rate": 7.057001391570756e-07, "loss": 0.0172, "step": 435890 }, { "epoch": 4.657300069448154, "grad_norm": 14.282073974609375, "learning_rate": 7.056848258176131e-07, "loss": 0.0043, "step": 435900 }, { "epoch": 4.657406912762434, "grad_norm": 0.01345617976039648, "learning_rate": 7.056695122459169e-07, "loss": 0.0117, "step": 435910 }, { "epoch": 4.657513756076714, "grad_norm": 0.03011741116642952, "learning_rate": 7.056541984420041e-07, "loss": 0.0106, "step": 435920 }, { "epoch": 4.657620599390993, "grad_norm": 10.8111572265625, "learning_rate": 7.056388844058924e-07, "loss": 0.0264, "step": 435930 }, { "epoch": 4.657727442705273, "grad_norm": 0.15428730845451355, "learning_rate": 7.056235701375987e-07, "loss": 0.0044, "step": 435940 }, { "epoch": 4.657834286019552, "grad_norm": 5.088981628417969, "learning_rate": 7.056082556371406e-07, "loss": 0.01, "step": 435950 }, { "epoch": 4.657941129333832, "grad_norm": 0.025625353679060936, "learning_rate": 7.055929409045352e-07, "loss": 0.0027, "step": 435960 }, { "epoch": 4.658047972648111, "grad_norm": 0.7476858496665955, "learning_rate": 7.055776259398e-07, "loss": 0.0073, "step": 435970 }, { "epoch": 4.658154815962391, "grad_norm": 0.08868991583585739, "learning_rate": 7.055623107429519e-07, "loss": 0.0119, "step": 435980 }, { "epoch": 4.658261659276671, "grad_norm": 0.3405831456184387, "learning_rate": 7.055469953140086e-07, "loss": 0.0102, "step": 435990 }, { "epoch": 4.6583685025909505, "grad_norm": 3.9021711349487305, "learning_rate": 7.055316796529871e-07, "loss": 0.0019, "step": 436000 }, { "epoch": 4.65847534590523, "grad_norm": 0.07243111729621887, "learning_rate": 7.055163637599049e-07, "loss": 0.0153, "step": 436010 }, { "epoch": 4.658582189219509, "grad_norm": 0.11455940455198288, "learning_rate": 7.055010476347792e-07, "loss": 0.0031, "step": 436020 }, { "epoch": 4.658689032533789, "grad_norm": 1.3783906698226929, "learning_rate": 7.054857312776274e-07, "loss": 0.0151, "step": 436030 }, { "epoch": 4.658795875848069, "grad_norm": 3.8263370990753174, "learning_rate": 7.054704146884666e-07, "loss": 0.0054, "step": 436040 }, { "epoch": 4.658902719162349, "grad_norm": 0.007809509988874197, "learning_rate": 7.054550978673143e-07, "loss": 0.0004, "step": 436050 }, { "epoch": 4.659009562476628, "grad_norm": 0.0019313892116770148, "learning_rate": 7.054397808141875e-07, "loss": 0.005, "step": 436060 }, { "epoch": 4.659116405790908, "grad_norm": 3.9332849979400635, "learning_rate": 7.054244635291038e-07, "loss": 0.0041, "step": 436070 }, { "epoch": 4.659223249105187, "grad_norm": 3.800727367401123, "learning_rate": 7.054091460120805e-07, "loss": 0.0233, "step": 436080 }, { "epoch": 4.6593300924194665, "grad_norm": 7.903870105743408, "learning_rate": 7.053938282631345e-07, "loss": 0.0198, "step": 436090 }, { "epoch": 4.659436935733747, "grad_norm": 0.03245801851153374, "learning_rate": 7.053785102822836e-07, "loss": 0.0009, "step": 436100 }, { "epoch": 4.659543779048026, "grad_norm": 0.027458341792225838, "learning_rate": 7.053631920695447e-07, "loss": 0.0116, "step": 436110 }, { "epoch": 4.659650622362306, "grad_norm": 0.06590056419372559, "learning_rate": 7.053478736249353e-07, "loss": 0.0134, "step": 436120 }, { "epoch": 4.659757465676585, "grad_norm": 5.6486496925354, "learning_rate": 7.053325549484727e-07, "loss": 0.0131, "step": 436130 }, { "epoch": 4.659864308990865, "grad_norm": 0.06094290688633919, "learning_rate": 7.053172360401742e-07, "loss": 0.0137, "step": 436140 }, { "epoch": 4.659971152305144, "grad_norm": 0.0009189469274133444, "learning_rate": 7.053019169000569e-07, "loss": 0.0127, "step": 436150 }, { "epoch": 4.6600779956194245, "grad_norm": 0.47642782330513, "learning_rate": 7.052865975281383e-07, "loss": 0.0021, "step": 436160 }, { "epoch": 4.660184838933704, "grad_norm": 4.689299583435059, "learning_rate": 7.052712779244357e-07, "loss": 0.006, "step": 436170 }, { "epoch": 4.660291682247983, "grad_norm": 3.5701072216033936, "learning_rate": 7.052559580889664e-07, "loss": 0.0148, "step": 436180 }, { "epoch": 4.660398525562263, "grad_norm": 1.2691446542739868, "learning_rate": 7.052406380217475e-07, "loss": 0.0063, "step": 436190 }, { "epoch": 4.660505368876542, "grad_norm": 0.005703658331185579, "learning_rate": 7.052253177227965e-07, "loss": 0.001, "step": 436200 }, { "epoch": 4.660612212190822, "grad_norm": 1.1546601057052612, "learning_rate": 7.052099971921305e-07, "loss": 0.0478, "step": 436210 }, { "epoch": 4.660719055505102, "grad_norm": 0.03190777450799942, "learning_rate": 7.051946764297669e-07, "loss": 0.016, "step": 436220 }, { "epoch": 4.660825898819382, "grad_norm": 1.0845355987548828, "learning_rate": 7.051793554357233e-07, "loss": 0.0197, "step": 436230 }, { "epoch": 4.660932742133661, "grad_norm": 0.6747145056724548, "learning_rate": 7.051640342100164e-07, "loss": 0.0002, "step": 436240 }, { "epoch": 4.6610395854479405, "grad_norm": 0.020941119641065598, "learning_rate": 7.051487127526641e-07, "loss": 0.0104, "step": 436250 }, { "epoch": 4.66114642876222, "grad_norm": 0.004484985023736954, "learning_rate": 7.051333910636831e-07, "loss": 0.0001, "step": 436260 }, { "epoch": 4.6612532720765, "grad_norm": 2.7529077529907227, "learning_rate": 7.051180691430913e-07, "loss": 0.0069, "step": 436270 }, { "epoch": 4.66136011539078, "grad_norm": 2.465869903564453, "learning_rate": 7.051027469909057e-07, "loss": 0.0018, "step": 436280 }, { "epoch": 4.661466958705059, "grad_norm": 0.0022990028373897076, "learning_rate": 7.050874246071434e-07, "loss": 0.0162, "step": 436290 }, { "epoch": 4.661573802019339, "grad_norm": 11.43510913848877, "learning_rate": 7.050721019918221e-07, "loss": 0.0125, "step": 436300 }, { "epoch": 4.661680645333618, "grad_norm": 0.021452758461236954, "learning_rate": 7.050567791449589e-07, "loss": 0.0091, "step": 436310 }, { "epoch": 4.661787488647898, "grad_norm": 0.016560854390263557, "learning_rate": 7.05041456066571e-07, "loss": 0.0018, "step": 436320 }, { "epoch": 4.661894331962177, "grad_norm": 0.01620819978415966, "learning_rate": 7.05026132756676e-07, "loss": 0.0083, "step": 436330 }, { "epoch": 4.662001175276457, "grad_norm": 0.0006827711476944387, "learning_rate": 7.050108092152908e-07, "loss": 0.0098, "step": 436340 }, { "epoch": 4.662108018590737, "grad_norm": 0.034909918904304504, "learning_rate": 7.049954854424332e-07, "loss": 0.0061, "step": 436350 }, { "epoch": 4.662214861905016, "grad_norm": 3.174071788787842, "learning_rate": 7.049801614381201e-07, "loss": 0.0027, "step": 436360 }, { "epoch": 4.662321705219296, "grad_norm": 0.016812216490507126, "learning_rate": 7.049648372023688e-07, "loss": 0.0193, "step": 436370 }, { "epoch": 4.662428548533575, "grad_norm": 2.517652750015259, "learning_rate": 7.049495127351969e-07, "loss": 0.0027, "step": 436380 }, { "epoch": 4.662535391847856, "grad_norm": 0.02650531753897667, "learning_rate": 7.049341880366215e-07, "loss": 0.0264, "step": 436390 }, { "epoch": 4.662642235162135, "grad_norm": 3.7658581733703613, "learning_rate": 7.0491886310666e-07, "loss": 0.0319, "step": 436400 }, { "epoch": 4.6627490784764145, "grad_norm": 0.13072502613067627, "learning_rate": 7.049035379453295e-07, "loss": 0.0012, "step": 436410 }, { "epoch": 4.662855921790694, "grad_norm": 0.013770394958555698, "learning_rate": 7.048882125526475e-07, "loss": 0.0137, "step": 436420 }, { "epoch": 4.662962765104973, "grad_norm": 0.042435839772224426, "learning_rate": 7.048728869286314e-07, "loss": 0.0094, "step": 436430 }, { "epoch": 4.663069608419253, "grad_norm": 0.23494194447994232, "learning_rate": 7.048575610732981e-07, "loss": 0.008, "step": 436440 }, { "epoch": 4.663176451733532, "grad_norm": 0.11762209236621857, "learning_rate": 7.048422349866652e-07, "loss": 0.0008, "step": 436450 }, { "epoch": 4.663283295047813, "grad_norm": 0.0866842195391655, "learning_rate": 7.0482690866875e-07, "loss": 0.0085, "step": 436460 }, { "epoch": 4.663390138362092, "grad_norm": 9.079339027404785, "learning_rate": 7.048115821195697e-07, "loss": 0.0156, "step": 436470 }, { "epoch": 4.663496981676372, "grad_norm": 1.4767022132873535, "learning_rate": 7.047962553391418e-07, "loss": 0.0067, "step": 436480 }, { "epoch": 4.663603824990651, "grad_norm": 0.12938351929187775, "learning_rate": 7.047809283274832e-07, "loss": 0.0005, "step": 436490 }, { "epoch": 4.6637106683049305, "grad_norm": 0.07426002621650696, "learning_rate": 7.047656010846118e-07, "loss": 0.0094, "step": 436500 }, { "epoch": 4.663817511619211, "grad_norm": 0.022595668211579323, "learning_rate": 7.047502736105445e-07, "loss": 0.0033, "step": 436510 }, { "epoch": 4.66392435493349, "grad_norm": 0.14366179704666138, "learning_rate": 7.047349459052986e-07, "loss": 0.0117, "step": 436520 }, { "epoch": 4.66403119824777, "grad_norm": 4.578254222869873, "learning_rate": 7.047196179688916e-07, "loss": 0.0076, "step": 436530 }, { "epoch": 4.664138041562049, "grad_norm": 0.2022271603345871, "learning_rate": 7.047042898013407e-07, "loss": 0.0074, "step": 436540 }, { "epoch": 4.664244884876329, "grad_norm": 0.006746155209839344, "learning_rate": 7.04688961402663e-07, "loss": 0.0154, "step": 436550 }, { "epoch": 4.664351728190608, "grad_norm": 2.979100227355957, "learning_rate": 7.046736327728761e-07, "loss": 0.0174, "step": 436560 }, { "epoch": 4.664458571504888, "grad_norm": 0.0032497579231858253, "learning_rate": 7.046583039119975e-07, "loss": 0.0056, "step": 436570 }, { "epoch": 4.664565414819168, "grad_norm": 0.10629618912935257, "learning_rate": 7.046429748200439e-07, "loss": 0.0039, "step": 436580 }, { "epoch": 4.664672258133447, "grad_norm": 6.122880935668945, "learning_rate": 7.04627645497033e-07, "loss": 0.0179, "step": 436590 }, { "epoch": 4.664779101447727, "grad_norm": 0.39405015110969543, "learning_rate": 7.046123159429822e-07, "loss": 0.0048, "step": 436600 }, { "epoch": 4.664885944762006, "grad_norm": 3.378322124481201, "learning_rate": 7.045969861579084e-07, "loss": 0.0008, "step": 436610 }, { "epoch": 4.664992788076286, "grad_norm": 0.009162066504359245, "learning_rate": 7.045816561418294e-07, "loss": 0.0233, "step": 436620 }, { "epoch": 4.665099631390566, "grad_norm": 0.036988358944654465, "learning_rate": 7.04566325894762e-07, "loss": 0.0038, "step": 436630 }, { "epoch": 4.665206474704846, "grad_norm": 0.0016644538845866919, "learning_rate": 7.045509954167241e-07, "loss": 0.0136, "step": 436640 }, { "epoch": 4.665313318019125, "grad_norm": 0.2044229358434677, "learning_rate": 7.045356647077325e-07, "loss": 0.0161, "step": 436650 }, { "epoch": 4.6654201613334045, "grad_norm": 3.2574691772460938, "learning_rate": 7.045203337678047e-07, "loss": 0.0074, "step": 436660 }, { "epoch": 4.665527004647684, "grad_norm": 0.024848390370607376, "learning_rate": 7.04505002596958e-07, "loss": 0.0245, "step": 436670 }, { "epoch": 4.6656338479619635, "grad_norm": 0.0756179541349411, "learning_rate": 7.044896711952098e-07, "loss": 0.0037, "step": 436680 }, { "epoch": 4.665740691276243, "grad_norm": 0.01611827127635479, "learning_rate": 7.044743395625772e-07, "loss": 0.0108, "step": 436690 }, { "epoch": 4.665847534590523, "grad_norm": 0.11035618185997009, "learning_rate": 7.044590076990777e-07, "loss": 0.0127, "step": 436700 }, { "epoch": 4.665954377904803, "grad_norm": 0.0007092398009262979, "learning_rate": 7.044436756047287e-07, "loss": 0.0239, "step": 436710 }, { "epoch": 4.666061221219082, "grad_norm": 0.04061468690633774, "learning_rate": 7.044283432795471e-07, "loss": 0.0025, "step": 436720 }, { "epoch": 4.666168064533362, "grad_norm": 20.448461532592773, "learning_rate": 7.044130107235506e-07, "loss": 0.0443, "step": 436730 }, { "epoch": 4.666274907847641, "grad_norm": 5.603858947753906, "learning_rate": 7.043976779367565e-07, "loss": 0.0258, "step": 436740 }, { "epoch": 4.666381751161921, "grad_norm": 8.126032829284668, "learning_rate": 7.043823449191819e-07, "loss": 0.0221, "step": 436750 }, { "epoch": 4.666488594476201, "grad_norm": 0.007251071278005838, "learning_rate": 7.043670116708442e-07, "loss": 0.0111, "step": 436760 }, { "epoch": 4.66659543779048, "grad_norm": 0.02699945867061615, "learning_rate": 7.043516781917607e-07, "loss": 0.0021, "step": 436770 }, { "epoch": 4.66670228110476, "grad_norm": 6.798269748687744, "learning_rate": 7.043363444819487e-07, "loss": 0.003, "step": 436780 }, { "epoch": 4.666809124419039, "grad_norm": 4.537129878997803, "learning_rate": 7.043210105414256e-07, "loss": 0.0049, "step": 436790 }, { "epoch": 4.666915967733319, "grad_norm": 0.14266197383403778, "learning_rate": 7.043056763702087e-07, "loss": 0.0044, "step": 436800 }, { "epoch": 4.667022811047599, "grad_norm": 0.002423137193545699, "learning_rate": 7.042903419683151e-07, "loss": 0.0001, "step": 436810 }, { "epoch": 4.6671296543618785, "grad_norm": 4.089669704437256, "learning_rate": 7.042750073357625e-07, "loss": 0.0034, "step": 436820 }, { "epoch": 4.667236497676158, "grad_norm": 0.15328864753246307, "learning_rate": 7.042596724725677e-07, "loss": 0.0042, "step": 436830 }, { "epoch": 4.6673433409904375, "grad_norm": 0.00302613852545619, "learning_rate": 7.042443373787486e-07, "loss": 0.0096, "step": 436840 }, { "epoch": 4.667450184304717, "grad_norm": 0.014535920694470406, "learning_rate": 7.042290020543223e-07, "loss": 0.0023, "step": 436850 }, { "epoch": 4.667557027618996, "grad_norm": 0.006760565564036369, "learning_rate": 7.042136664993058e-07, "loss": 0.0263, "step": 436860 }, { "epoch": 4.667663870933277, "grad_norm": 0.0146656334400177, "learning_rate": 7.041983307137168e-07, "loss": 0.004, "step": 436870 }, { "epoch": 4.667770714247556, "grad_norm": 4.172985076904297, "learning_rate": 7.041829946975725e-07, "loss": 0.0061, "step": 436880 }, { "epoch": 4.667877557561836, "grad_norm": 0.022490711882710457, "learning_rate": 7.0416765845089e-07, "loss": 0.0019, "step": 436890 }, { "epoch": 4.667984400876115, "grad_norm": 0.0030248616822063923, "learning_rate": 7.041523219736869e-07, "loss": 0.0066, "step": 436900 }, { "epoch": 4.668091244190395, "grad_norm": 0.0363285131752491, "learning_rate": 7.041369852659805e-07, "loss": 0.0037, "step": 436910 }, { "epoch": 4.668198087504674, "grad_norm": 0.4614042639732361, "learning_rate": 7.041216483277879e-07, "loss": 0.0023, "step": 436920 }, { "epoch": 4.668304930818954, "grad_norm": 0.5560165643692017, "learning_rate": 7.041063111591267e-07, "loss": 0.0038, "step": 436930 }, { "epoch": 4.668411774133234, "grad_norm": 0.005607261322438717, "learning_rate": 7.04090973760014e-07, "loss": 0.0094, "step": 436940 }, { "epoch": 4.668518617447513, "grad_norm": 0.03473815321922302, "learning_rate": 7.040756361304672e-07, "loss": 0.0037, "step": 436950 }, { "epoch": 4.668625460761793, "grad_norm": 0.1585509330034256, "learning_rate": 7.040602982705035e-07, "loss": 0.004, "step": 436960 }, { "epoch": 4.668732304076072, "grad_norm": 0.02479289285838604, "learning_rate": 7.040449601801404e-07, "loss": 0.014, "step": 436970 }, { "epoch": 4.668839147390352, "grad_norm": 22.641582489013672, "learning_rate": 7.040296218593952e-07, "loss": 0.0134, "step": 436980 }, { "epoch": 4.668945990704632, "grad_norm": 1.5227960348129272, "learning_rate": 7.040142833082849e-07, "loss": 0.0067, "step": 436990 }, { "epoch": 4.6690528340189115, "grad_norm": 0.12954869866371155, "learning_rate": 7.039989445268274e-07, "loss": 0.0075, "step": 437000 }, { "epoch": 4.669159677333191, "grad_norm": 0.47560742497444153, "learning_rate": 7.039836055150395e-07, "loss": 0.0227, "step": 437010 }, { "epoch": 4.66926652064747, "grad_norm": 0.09189081192016602, "learning_rate": 7.039682662729389e-07, "loss": 0.0155, "step": 437020 }, { "epoch": 4.66937336396175, "grad_norm": 0.14524409174919128, "learning_rate": 7.039529268005426e-07, "loss": 0.0022, "step": 437030 }, { "epoch": 4.669480207276029, "grad_norm": 3.461581230163574, "learning_rate": 7.03937587097868e-07, "loss": 0.0037, "step": 437040 }, { "epoch": 4.66958705059031, "grad_norm": 3.1072208881378174, "learning_rate": 7.039222471649326e-07, "loss": 0.0584, "step": 437050 }, { "epoch": 4.669693893904589, "grad_norm": 0.7369077205657959, "learning_rate": 7.039069070017535e-07, "loss": 0.0065, "step": 437060 }, { "epoch": 4.669800737218869, "grad_norm": 0.045465074479579926, "learning_rate": 7.038915666083481e-07, "loss": 0.0329, "step": 437070 }, { "epoch": 4.669907580533148, "grad_norm": 0.005335010588169098, "learning_rate": 7.038762259847339e-07, "loss": 0.0028, "step": 437080 }, { "epoch": 4.6700144238474275, "grad_norm": 0.07859718799591064, "learning_rate": 7.038608851309279e-07, "loss": 0.0016, "step": 437090 }, { "epoch": 4.670121267161708, "grad_norm": 0.10368337482213974, "learning_rate": 7.038455440469476e-07, "loss": 0.0048, "step": 437100 }, { "epoch": 4.670228110475987, "grad_norm": 0.06638290733098984, "learning_rate": 7.038302027328104e-07, "loss": 0.0089, "step": 437110 }, { "epoch": 4.670334953790267, "grad_norm": 1.1073169708251953, "learning_rate": 7.038148611885333e-07, "loss": 0.0184, "step": 437120 }, { "epoch": 4.670441797104546, "grad_norm": 0.10677064210176468, "learning_rate": 7.037995194141341e-07, "loss": 0.0005, "step": 437130 }, { "epoch": 4.670548640418826, "grad_norm": 5.214645862579346, "learning_rate": 7.037841774096297e-07, "loss": 0.0289, "step": 437140 }, { "epoch": 4.670655483733105, "grad_norm": 0.07340522110462189, "learning_rate": 7.037688351750375e-07, "loss": 0.0189, "step": 437150 }, { "epoch": 4.670762327047385, "grad_norm": 1.5631215572357178, "learning_rate": 7.037534927103751e-07, "loss": 0.0004, "step": 437160 }, { "epoch": 4.670869170361665, "grad_norm": 0.0003266857529524714, "learning_rate": 7.037381500156597e-07, "loss": 0.0111, "step": 437170 }, { "epoch": 4.670976013675944, "grad_norm": 6.890496253967285, "learning_rate": 7.037228070909082e-07, "loss": 0.0174, "step": 437180 }, { "epoch": 4.671082856990224, "grad_norm": 0.011106297373771667, "learning_rate": 7.037074639361386e-07, "loss": 0.0202, "step": 437190 }, { "epoch": 4.671189700304503, "grad_norm": 0.13054385781288147, "learning_rate": 7.036921205513678e-07, "loss": 0.0034, "step": 437200 }, { "epoch": 4.671296543618783, "grad_norm": 0.008851372636854649, "learning_rate": 7.036767769366131e-07, "loss": 0.0234, "step": 437210 }, { "epoch": 4.671403386933063, "grad_norm": 0.008397473022341728, "learning_rate": 7.036614330918921e-07, "loss": 0.0136, "step": 437220 }, { "epoch": 4.671510230247343, "grad_norm": 2.39833402633667, "learning_rate": 7.036460890172218e-07, "loss": 0.0245, "step": 437230 }, { "epoch": 4.671617073561622, "grad_norm": 1.6493723392486572, "learning_rate": 7.036307447126199e-07, "loss": 0.0181, "step": 437240 }, { "epoch": 4.6717239168759015, "grad_norm": 0.005583727732300758, "learning_rate": 7.036154001781034e-07, "loss": 0.0174, "step": 437250 }, { "epoch": 4.671830760190181, "grad_norm": 0.03307520970702171, "learning_rate": 7.036000554136897e-07, "loss": 0.0119, "step": 437260 }, { "epoch": 4.67193760350446, "grad_norm": 3.4890546798706055, "learning_rate": 7.035847104193962e-07, "loss": 0.0194, "step": 437270 }, { "epoch": 4.67204444681874, "grad_norm": 0.04414137080311775, "learning_rate": 7.035693651952402e-07, "loss": 0.0024, "step": 437280 }, { "epoch": 4.67215129013302, "grad_norm": 0.20949043333530426, "learning_rate": 7.035540197412392e-07, "loss": 0.0053, "step": 437290 }, { "epoch": 4.6722581334473, "grad_norm": 0.07818128168582916, "learning_rate": 7.035386740574101e-07, "loss": 0.0026, "step": 437300 }, { "epoch": 4.672364976761579, "grad_norm": 0.6828551888465881, "learning_rate": 7.035233281437707e-07, "loss": 0.0179, "step": 437310 }, { "epoch": 4.672471820075859, "grad_norm": 0.1277417540550232, "learning_rate": 7.035079820003379e-07, "loss": 0.0017, "step": 437320 }, { "epoch": 4.672578663390138, "grad_norm": 0.37816616892814636, "learning_rate": 7.034926356271293e-07, "loss": 0.0152, "step": 437330 }, { "epoch": 4.672685506704418, "grad_norm": 0.10192202031612396, "learning_rate": 7.034772890241623e-07, "loss": 0.0147, "step": 437340 }, { "epoch": 4.672792350018698, "grad_norm": 0.0048173945397138596, "learning_rate": 7.034619421914538e-07, "loss": 0.0089, "step": 437350 }, { "epoch": 4.672899193332977, "grad_norm": 0.16072063148021698, "learning_rate": 7.034465951290216e-07, "loss": 0.001, "step": 437360 }, { "epoch": 4.673006036647257, "grad_norm": 0.004819704685360193, "learning_rate": 7.034312478368829e-07, "loss": 0.0111, "step": 437370 }, { "epoch": 4.673112879961536, "grad_norm": 0.07687804102897644, "learning_rate": 7.034159003150548e-07, "loss": 0.0037, "step": 437380 }, { "epoch": 4.673219723275816, "grad_norm": 0.43697863817214966, "learning_rate": 7.03400552563555e-07, "loss": 0.0001, "step": 437390 }, { "epoch": 4.673326566590095, "grad_norm": 1.778249979019165, "learning_rate": 7.033852045824005e-07, "loss": 0.0007, "step": 437400 }, { "epoch": 4.6734334099043755, "grad_norm": 3.0215229988098145, "learning_rate": 7.033698563716086e-07, "loss": 0.0134, "step": 437410 }, { "epoch": 4.673540253218655, "grad_norm": 0.009152007289230824, "learning_rate": 7.033545079311969e-07, "loss": 0.0103, "step": 437420 }, { "epoch": 4.673647096532934, "grad_norm": 0.5010967254638672, "learning_rate": 7.033391592611827e-07, "loss": 0.0032, "step": 437430 }, { "epoch": 4.673753939847214, "grad_norm": 0.01547720655798912, "learning_rate": 7.033238103615832e-07, "loss": 0.0026, "step": 437440 }, { "epoch": 4.673860783161493, "grad_norm": 0.003691284917294979, "learning_rate": 7.033084612324159e-07, "loss": 0.0029, "step": 437450 }, { "epoch": 4.673967626475774, "grad_norm": 1.1636583805084229, "learning_rate": 7.032931118736977e-07, "loss": 0.0176, "step": 437460 }, { "epoch": 4.674074469790053, "grad_norm": 0.5684720277786255, "learning_rate": 7.032777622854464e-07, "loss": 0.0118, "step": 437470 }, { "epoch": 4.674181313104333, "grad_norm": 1.021697998046875, "learning_rate": 7.032624124676792e-07, "loss": 0.0019, "step": 437480 }, { "epoch": 4.674288156418612, "grad_norm": 0.005984881892800331, "learning_rate": 7.032470624204133e-07, "loss": 0.0244, "step": 437490 }, { "epoch": 4.6743949997328915, "grad_norm": 0.1739446371793747, "learning_rate": 7.032317121436663e-07, "loss": 0.0064, "step": 437500 }, { "epoch": 4.674501843047171, "grad_norm": 0.02285466529428959, "learning_rate": 7.032163616374552e-07, "loss": 0.0194, "step": 437510 }, { "epoch": 4.67460868636145, "grad_norm": 0.3285876214504242, "learning_rate": 7.032010109017975e-07, "loss": 0.0013, "step": 437520 }, { "epoch": 4.674715529675731, "grad_norm": 3.158362865447998, "learning_rate": 7.031856599367105e-07, "loss": 0.0041, "step": 437530 }, { "epoch": 4.67482237299001, "grad_norm": 2.2730116844177246, "learning_rate": 7.031703087422117e-07, "loss": 0.0182, "step": 437540 }, { "epoch": 4.67492921630429, "grad_norm": 0.001894151559099555, "learning_rate": 7.031549573183182e-07, "loss": 0.0164, "step": 437550 }, { "epoch": 4.675036059618569, "grad_norm": 0.9374119639396667, "learning_rate": 7.031396056650474e-07, "loss": 0.0519, "step": 437560 }, { "epoch": 4.675142902932849, "grad_norm": 0.12382660806179047, "learning_rate": 7.031242537824167e-07, "loss": 0.023, "step": 437570 }, { "epoch": 4.675249746247129, "grad_norm": 0.004904135130345821, "learning_rate": 7.031089016704433e-07, "loss": 0.0018, "step": 437580 }, { "epoch": 4.675356589561408, "grad_norm": 0.3106245994567871, "learning_rate": 7.030935493291448e-07, "loss": 0.0321, "step": 437590 }, { "epoch": 4.675463432875688, "grad_norm": 0.024777842685580254, "learning_rate": 7.030781967585381e-07, "loss": 0.0008, "step": 437600 }, { "epoch": 4.675570276189967, "grad_norm": 0.6166998744010925, "learning_rate": 7.030628439586411e-07, "loss": 0.0279, "step": 437610 }, { "epoch": 4.675677119504247, "grad_norm": 0.018757078796625137, "learning_rate": 7.030474909294704e-07, "loss": 0.0147, "step": 437620 }, { "epoch": 4.675783962818526, "grad_norm": 2.488192319869995, "learning_rate": 7.03032137671044e-07, "loss": 0.0049, "step": 437630 }, { "epoch": 4.675890806132807, "grad_norm": 2.759366273880005, "learning_rate": 7.03016784183379e-07, "loss": 0.0235, "step": 437640 }, { "epoch": 4.675997649447086, "grad_norm": 0.2888339161872864, "learning_rate": 7.030014304664929e-07, "loss": 0.0198, "step": 437650 }, { "epoch": 4.6761044927613655, "grad_norm": 0.10308681428432465, "learning_rate": 7.029860765204025e-07, "loss": 0.0042, "step": 437660 }, { "epoch": 4.676211336075645, "grad_norm": 0.027101032435894012, "learning_rate": 7.029707223451258e-07, "loss": 0.0044, "step": 437670 }, { "epoch": 4.6763181793899244, "grad_norm": 2.211970806121826, "learning_rate": 7.029553679406797e-07, "loss": 0.0128, "step": 437680 }, { "epoch": 4.676425022704204, "grad_norm": 0.005947926547378302, "learning_rate": 7.029400133070816e-07, "loss": 0.0095, "step": 437690 }, { "epoch": 4.676531866018484, "grad_norm": 2.0306286811828613, "learning_rate": 7.029246584443491e-07, "loss": 0.0073, "step": 437700 }, { "epoch": 4.676638709332764, "grad_norm": 0.03802257031202316, "learning_rate": 7.029093033524993e-07, "loss": 0.0193, "step": 437710 }, { "epoch": 4.676745552647043, "grad_norm": 0.005250003654509783, "learning_rate": 7.028939480315495e-07, "loss": 0.0077, "step": 437720 }, { "epoch": 4.676852395961323, "grad_norm": 0.005672956816852093, "learning_rate": 7.028785924815172e-07, "loss": 0.0093, "step": 437730 }, { "epoch": 4.676959239275602, "grad_norm": 1.6190215349197388, "learning_rate": 7.028632367024196e-07, "loss": 0.0037, "step": 437740 }, { "epoch": 4.6770660825898815, "grad_norm": 0.13555337488651276, "learning_rate": 7.028478806942741e-07, "loss": 0.0023, "step": 437750 }, { "epoch": 4.677172925904162, "grad_norm": 3.448704242706299, "learning_rate": 7.028325244570979e-07, "loss": 0.0117, "step": 437760 }, { "epoch": 4.677279769218441, "grad_norm": 0.0009617321193218231, "learning_rate": 7.028171679909088e-07, "loss": 0.0013, "step": 437770 }, { "epoch": 4.677386612532721, "grad_norm": 0.01253849919885397, "learning_rate": 7.028018112957234e-07, "loss": 0.0008, "step": 437780 }, { "epoch": 4.677493455847, "grad_norm": 0.06036064773797989, "learning_rate": 7.027864543715599e-07, "loss": 0.014, "step": 437790 }, { "epoch": 4.67760029916128, "grad_norm": 0.018668191507458687, "learning_rate": 7.027710972184348e-07, "loss": 0.0017, "step": 437800 }, { "epoch": 4.67770714247556, "grad_norm": 0.3436548113822937, "learning_rate": 7.02755739836366e-07, "loss": 0.0023, "step": 437810 }, { "epoch": 4.6778139857898395, "grad_norm": 2.296377420425415, "learning_rate": 7.027403822253707e-07, "loss": 0.0037, "step": 437820 }, { "epoch": 4.677920829104119, "grad_norm": 0.01711154729127884, "learning_rate": 7.027250243854661e-07, "loss": 0.0054, "step": 437830 }, { "epoch": 4.6780276724183985, "grad_norm": 0.06181030347943306, "learning_rate": 7.027096663166697e-07, "loss": 0.0125, "step": 437840 }, { "epoch": 4.678134515732678, "grad_norm": 0.0053940643556416035, "learning_rate": 7.026943080189989e-07, "loss": 0.0156, "step": 437850 }, { "epoch": 4.678241359046957, "grad_norm": 0.08011435717344284, "learning_rate": 7.026789494924707e-07, "loss": 0.0067, "step": 437860 }, { "epoch": 4.678348202361237, "grad_norm": 9.332337379455566, "learning_rate": 7.026635907371028e-07, "loss": 0.0022, "step": 437870 }, { "epoch": 4.678455045675517, "grad_norm": 0.0426347441971302, "learning_rate": 7.026482317529124e-07, "loss": 0.0024, "step": 437880 }, { "epoch": 4.678561888989797, "grad_norm": 0.03107808530330658, "learning_rate": 7.026328725399169e-07, "loss": 0.0036, "step": 437890 }, { "epoch": 4.678668732304076, "grad_norm": 4.237342357635498, "learning_rate": 7.026175130981336e-07, "loss": 0.0166, "step": 437900 }, { "epoch": 4.6787755756183556, "grad_norm": 0.6199955344200134, "learning_rate": 7.026021534275796e-07, "loss": 0.006, "step": 437910 }, { "epoch": 4.678882418932635, "grad_norm": 0.007422988303005695, "learning_rate": 7.025867935282728e-07, "loss": 0.0073, "step": 437920 }, { "epoch": 4.678989262246915, "grad_norm": 0.002876078011468053, "learning_rate": 7.025714334002299e-07, "loss": 0.0125, "step": 437930 }, { "epoch": 4.679096105561195, "grad_norm": 0.6096453070640564, "learning_rate": 7.025560730434688e-07, "loss": 0.0008, "step": 437940 }, { "epoch": 4.679202948875474, "grad_norm": 0.001824245322495699, "learning_rate": 7.025407124580065e-07, "loss": 0.0077, "step": 437950 }, { "epoch": 4.679309792189754, "grad_norm": 0.005512854550033808, "learning_rate": 7.025253516438606e-07, "loss": 0.0186, "step": 437960 }, { "epoch": 4.679416635504033, "grad_norm": 0.3600921630859375, "learning_rate": 7.025099906010482e-07, "loss": 0.0074, "step": 437970 }, { "epoch": 4.679523478818313, "grad_norm": 0.3168219327926636, "learning_rate": 7.024946293295866e-07, "loss": 0.0003, "step": 437980 }, { "epoch": 4.679630322132592, "grad_norm": 0.026393519714474678, "learning_rate": 7.024792678294936e-07, "loss": 0.001, "step": 437990 }, { "epoch": 4.6797371654468725, "grad_norm": 0.0034954736474901438, "learning_rate": 7.024639061007859e-07, "loss": 0.0148, "step": 438000 }, { "epoch": 4.679844008761152, "grad_norm": 0.0013885465450584888, "learning_rate": 7.024485441434813e-07, "loss": 0.0272, "step": 438010 }, { "epoch": 4.679950852075431, "grad_norm": 0.0013120864750817418, "learning_rate": 7.02433181957597e-07, "loss": 0.0001, "step": 438020 }, { "epoch": 4.680057695389711, "grad_norm": 0.5839163661003113, "learning_rate": 7.024178195431503e-07, "loss": 0.0004, "step": 438030 }, { "epoch": 4.68016453870399, "grad_norm": 0.008128550834953785, "learning_rate": 7.024024569001588e-07, "loss": 0.0177, "step": 438040 }, { "epoch": 4.680271382018271, "grad_norm": 0.502331554889679, "learning_rate": 7.023870940286395e-07, "loss": 0.0163, "step": 438050 }, { "epoch": 4.68037822533255, "grad_norm": 0.005765093490481377, "learning_rate": 7.023717309286099e-07, "loss": 0.0031, "step": 438060 }, { "epoch": 4.68048506864683, "grad_norm": 5.362837791442871, "learning_rate": 7.023563676000874e-07, "loss": 0.0125, "step": 438070 }, { "epoch": 4.680591911961109, "grad_norm": 4.28234338760376, "learning_rate": 7.023410040430892e-07, "loss": 0.0632, "step": 438080 }, { "epoch": 4.6806987552753885, "grad_norm": 0.15581367909908295, "learning_rate": 7.023256402576328e-07, "loss": 0.0244, "step": 438090 }, { "epoch": 4.680805598589668, "grad_norm": 0.00319848139770329, "learning_rate": 7.023102762437355e-07, "loss": 0.0094, "step": 438100 }, { "epoch": 4.680912441903947, "grad_norm": 4.001079559326172, "learning_rate": 7.022949120014147e-07, "loss": 0.003, "step": 438110 }, { "epoch": 4.681019285218228, "grad_norm": 6.601895809173584, "learning_rate": 7.022795475306875e-07, "loss": 0.0335, "step": 438120 }, { "epoch": 4.681126128532507, "grad_norm": 0.004994466435164213, "learning_rate": 7.022641828315715e-07, "loss": 0.0042, "step": 438130 }, { "epoch": 4.681232971846787, "grad_norm": 0.7523473501205444, "learning_rate": 7.02248817904084e-07, "loss": 0.006, "step": 438140 }, { "epoch": 4.681339815161066, "grad_norm": 2.6633083820343018, "learning_rate": 7.022334527482423e-07, "loss": 0.019, "step": 438150 }, { "epoch": 4.681446658475346, "grad_norm": 0.005026388913393021, "learning_rate": 7.022180873640638e-07, "loss": 0.0005, "step": 438160 }, { "epoch": 4.681553501789626, "grad_norm": 0.014743846841156483, "learning_rate": 7.022027217515658e-07, "loss": 0.004, "step": 438170 }, { "epoch": 4.681660345103905, "grad_norm": 0.17289191484451294, "learning_rate": 7.021873559107656e-07, "loss": 0.0036, "step": 438180 }, { "epoch": 4.681767188418185, "grad_norm": 0.015233874320983887, "learning_rate": 7.021719898416807e-07, "loss": 0.0026, "step": 438190 }, { "epoch": 4.681874031732464, "grad_norm": 0.0017585939494892955, "learning_rate": 7.021566235443282e-07, "loss": 0.0246, "step": 438200 }, { "epoch": 4.681980875046744, "grad_norm": 0.009324870072305202, "learning_rate": 7.021412570187257e-07, "loss": 0.001, "step": 438210 }, { "epoch": 4.682087718361023, "grad_norm": 3.660693407058716, "learning_rate": 7.021258902648905e-07, "loss": 0.0292, "step": 438220 }, { "epoch": 4.682194561675303, "grad_norm": 0.0008654326666146517, "learning_rate": 7.021105232828399e-07, "loss": 0.0163, "step": 438230 }, { "epoch": 4.682301404989583, "grad_norm": 0.005055937450379133, "learning_rate": 7.020951560725913e-07, "loss": 0.0018, "step": 438240 }, { "epoch": 4.6824082483038625, "grad_norm": 0.06338930130004883, "learning_rate": 7.020797886341621e-07, "loss": 0.0031, "step": 438250 }, { "epoch": 4.682515091618142, "grad_norm": 0.011838451959192753, "learning_rate": 7.020644209675694e-07, "loss": 0.0032, "step": 438260 }, { "epoch": 4.682621934932421, "grad_norm": 0.10104827582836151, "learning_rate": 7.020490530728308e-07, "loss": 0.0117, "step": 438270 }, { "epoch": 4.682728778246701, "grad_norm": 0.006422826088964939, "learning_rate": 7.020336849499636e-07, "loss": 0.025, "step": 438280 }, { "epoch": 4.682835621560981, "grad_norm": 0.02155819907784462, "learning_rate": 7.02018316598985e-07, "loss": 0.0005, "step": 438290 }, { "epoch": 4.682942464875261, "grad_norm": 3.360395669937134, "learning_rate": 7.020029480199126e-07, "loss": 0.0525, "step": 438300 }, { "epoch": 4.68304930818954, "grad_norm": 0.5777198076248169, "learning_rate": 7.019875792127636e-07, "loss": 0.0297, "step": 438310 }, { "epoch": 4.68315615150382, "grad_norm": 0.0030638519674539566, "learning_rate": 7.019722101775553e-07, "loss": 0.0014, "step": 438320 }, { "epoch": 4.683262994818099, "grad_norm": 17.483623504638672, "learning_rate": 7.019568409143052e-07, "loss": 0.0148, "step": 438330 }, { "epoch": 4.6833698381323785, "grad_norm": 1.368430733680725, "learning_rate": 7.019414714230306e-07, "loss": 0.0009, "step": 438340 }, { "epoch": 4.683476681446659, "grad_norm": 7.479557991027832, "learning_rate": 7.019261017037488e-07, "loss": 0.0111, "step": 438350 }, { "epoch": 4.683583524760938, "grad_norm": 3.5626678466796875, "learning_rate": 7.019107317564772e-07, "loss": 0.0108, "step": 438360 }, { "epoch": 4.683690368075218, "grad_norm": 0.9984123706817627, "learning_rate": 7.018953615812332e-07, "loss": 0.0013, "step": 438370 }, { "epoch": 4.683797211389497, "grad_norm": 0.036162376403808594, "learning_rate": 7.01879991178034e-07, "loss": 0.0035, "step": 438380 }, { "epoch": 4.683904054703777, "grad_norm": 0.0858517438173294, "learning_rate": 7.018646205468971e-07, "loss": 0.0091, "step": 438390 }, { "epoch": 4.684010898018056, "grad_norm": 0.019227605313062668, "learning_rate": 7.018492496878397e-07, "loss": 0.0075, "step": 438400 }, { "epoch": 4.6841177413323365, "grad_norm": 0.21642492711544037, "learning_rate": 7.018338786008794e-07, "loss": 0.0174, "step": 438410 }, { "epoch": 4.684224584646616, "grad_norm": 0.17910392582416534, "learning_rate": 7.018185072860334e-07, "loss": 0.0036, "step": 438420 }, { "epoch": 4.684331427960895, "grad_norm": 0.23923178017139435, "learning_rate": 7.018031357433191e-07, "loss": 0.0087, "step": 438430 }, { "epoch": 4.684438271275175, "grad_norm": 0.0011047367006540298, "learning_rate": 7.017877639727538e-07, "loss": 0.0055, "step": 438440 }, { "epoch": 4.684545114589454, "grad_norm": 1.105100393295288, "learning_rate": 7.017723919743549e-07, "loss": 0.0224, "step": 438450 }, { "epoch": 4.684651957903734, "grad_norm": 0.005110185127705336, "learning_rate": 7.017570197481396e-07, "loss": 0.004, "step": 438460 }, { "epoch": 4.684758801218014, "grad_norm": 0.0070553855039179325, "learning_rate": 7.017416472941256e-07, "loss": 0.0009, "step": 438470 }, { "epoch": 4.684865644532294, "grad_norm": 0.0041311695240437984, "learning_rate": 7.017262746123301e-07, "loss": 0.0046, "step": 438480 }, { "epoch": 4.684972487846573, "grad_norm": 0.11652982980012894, "learning_rate": 7.0171090170277e-07, "loss": 0.0126, "step": 438490 }, { "epoch": 4.6850793311608525, "grad_norm": 1.0991928577423096, "learning_rate": 7.016955285654634e-07, "loss": 0.004, "step": 438500 }, { "epoch": 4.685186174475132, "grad_norm": 0.05478576570749283, "learning_rate": 7.016801552004274e-07, "loss": 0.0041, "step": 438510 }, { "epoch": 4.685293017789412, "grad_norm": 5.568030834197998, "learning_rate": 7.01664781607679e-07, "loss": 0.0032, "step": 438520 }, { "epoch": 4.685399861103692, "grad_norm": 0.0029843663796782494, "learning_rate": 7.016494077872362e-07, "loss": 0.0005, "step": 438530 }, { "epoch": 4.685506704417971, "grad_norm": 1.588594675064087, "learning_rate": 7.016340337391155e-07, "loss": 0.0219, "step": 438540 }, { "epoch": 4.685613547732251, "grad_norm": 0.796895444393158, "learning_rate": 7.016186594633352e-07, "loss": 0.0012, "step": 438550 }, { "epoch": 4.68572039104653, "grad_norm": 3.9035794734954834, "learning_rate": 7.016032849599121e-07, "loss": 0.0114, "step": 438560 }, { "epoch": 4.68582723436081, "grad_norm": 0.0818011462688446, "learning_rate": 7.015879102288635e-07, "loss": 0.0034, "step": 438570 }, { "epoch": 4.685934077675089, "grad_norm": 0.011797833256423473, "learning_rate": 7.015725352702071e-07, "loss": 0.029, "step": 438580 }, { "epoch": 4.686040920989369, "grad_norm": 0.003233125898987055, "learning_rate": 7.015571600839601e-07, "loss": 0.0016, "step": 438590 }, { "epoch": 4.686147764303649, "grad_norm": 0.9833294153213501, "learning_rate": 7.015417846701398e-07, "loss": 0.0046, "step": 438600 }, { "epoch": 4.686254607617928, "grad_norm": 0.0014818336348980665, "learning_rate": 7.015264090287636e-07, "loss": 0.004, "step": 438610 }, { "epoch": 4.686361450932208, "grad_norm": 2.745173215866089, "learning_rate": 7.015110331598491e-07, "loss": 0.0044, "step": 438620 }, { "epoch": 4.686468294246487, "grad_norm": 0.07191862910985947, "learning_rate": 7.014956570634131e-07, "loss": 0.0119, "step": 438630 }, { "epoch": 4.686575137560768, "grad_norm": 0.005225240718573332, "learning_rate": 7.014802807394735e-07, "loss": 0.0006, "step": 438640 }, { "epoch": 4.686681980875047, "grad_norm": 0.012119113467633724, "learning_rate": 7.014649041880474e-07, "loss": 0.0056, "step": 438650 }, { "epoch": 4.6867888241893265, "grad_norm": 0.0031395521946251392, "learning_rate": 7.014495274091522e-07, "loss": 0.011, "step": 438660 }, { "epoch": 4.686895667503606, "grad_norm": 0.008913462050259113, "learning_rate": 7.014341504028053e-07, "loss": 0.0039, "step": 438670 }, { "epoch": 4.687002510817885, "grad_norm": 0.0030100885778665543, "learning_rate": 7.014187731690241e-07, "loss": 0.0088, "step": 438680 }, { "epoch": 4.687109354132165, "grad_norm": 0.23246943950653076, "learning_rate": 7.014033957078258e-07, "loss": 0.0006, "step": 438690 }, { "epoch": 4.687216197446444, "grad_norm": 0.08694199472665787, "learning_rate": 7.01388018019228e-07, "loss": 0.0153, "step": 438700 }, { "epoch": 4.687323040760725, "grad_norm": 0.12183528393507004, "learning_rate": 7.013726401032478e-07, "loss": 0.0059, "step": 438710 }, { "epoch": 4.687429884075004, "grad_norm": 0.0022098729386925697, "learning_rate": 7.013572619599027e-07, "loss": 0.0306, "step": 438720 }, { "epoch": 4.687536727389284, "grad_norm": 2.9155423641204834, "learning_rate": 7.0134188358921e-07, "loss": 0.0169, "step": 438730 }, { "epoch": 4.687643570703563, "grad_norm": 0.016773099079728127, "learning_rate": 7.013265049911873e-07, "loss": 0.0099, "step": 438740 }, { "epoch": 4.6877504140178425, "grad_norm": 3.251830577850342, "learning_rate": 7.013111261658517e-07, "loss": 0.0058, "step": 438750 }, { "epoch": 4.687857257332123, "grad_norm": 0.0019009802490472794, "learning_rate": 7.012957471132206e-07, "loss": 0.0031, "step": 438760 }, { "epoch": 4.687964100646402, "grad_norm": 0.7944722175598145, "learning_rate": 7.012803678333114e-07, "loss": 0.0005, "step": 438770 }, { "epoch": 4.688070943960682, "grad_norm": 4.5429253578186035, "learning_rate": 7.012649883261415e-07, "loss": 0.005, "step": 438780 }, { "epoch": 4.688177787274961, "grad_norm": 0.15749269723892212, "learning_rate": 7.012496085917283e-07, "loss": 0.0187, "step": 438790 }, { "epoch": 4.688284630589241, "grad_norm": 1.4397025108337402, "learning_rate": 7.01234228630089e-07, "loss": 0.0025, "step": 438800 }, { "epoch": 4.68839147390352, "grad_norm": 0.033281054347753525, "learning_rate": 7.012188484412411e-07, "loss": 0.0073, "step": 438810 }, { "epoch": 4.6884983172178, "grad_norm": 2.422322988510132, "learning_rate": 7.01203468025202e-07, "loss": 0.0117, "step": 438820 }, { "epoch": 4.68860516053208, "grad_norm": 0.21049858629703522, "learning_rate": 7.011880873819888e-07, "loss": 0.0154, "step": 438830 }, { "epoch": 4.688712003846359, "grad_norm": 0.10746297985315323, "learning_rate": 7.011727065116193e-07, "loss": 0.0084, "step": 438840 }, { "epoch": 4.688818847160639, "grad_norm": 0.02026844024658203, "learning_rate": 7.011573254141105e-07, "loss": 0.0278, "step": 438850 }, { "epoch": 4.688925690474918, "grad_norm": 0.018449867144227028, "learning_rate": 7.0114194408948e-07, "loss": 0.0026, "step": 438860 }, { "epoch": 4.689032533789198, "grad_norm": 18.38506507873535, "learning_rate": 7.01126562537745e-07, "loss": 0.0208, "step": 438870 }, { "epoch": 4.689139377103478, "grad_norm": 0.0014633260434493423, "learning_rate": 7.011111807589229e-07, "loss": 0.0012, "step": 438880 }, { "epoch": 4.689246220417758, "grad_norm": 0.011154896579682827, "learning_rate": 7.010957987530312e-07, "loss": 0.0028, "step": 438890 }, { "epoch": 4.689353063732037, "grad_norm": 6.058119773864746, "learning_rate": 7.010804165200871e-07, "loss": 0.0467, "step": 438900 }, { "epoch": 4.6894599070463165, "grad_norm": 0.0033793661277741194, "learning_rate": 7.010650340601081e-07, "loss": 0.016, "step": 438910 }, { "epoch": 4.689566750360596, "grad_norm": 0.07544003427028656, "learning_rate": 7.010496513731113e-07, "loss": 0.0016, "step": 438920 }, { "epoch": 4.6896735936748755, "grad_norm": 0.005472023505717516, "learning_rate": 7.010342684591145e-07, "loss": 0.0, "step": 438930 }, { "epoch": 4.689780436989155, "grad_norm": 8.289335250854492, "learning_rate": 7.010188853181347e-07, "loss": 0.0271, "step": 438940 }, { "epoch": 4.689887280303435, "grad_norm": 0.6059143543243408, "learning_rate": 7.010035019501894e-07, "loss": 0.0191, "step": 438950 }, { "epoch": 4.689994123617715, "grad_norm": 2.11820650100708, "learning_rate": 7.009881183552962e-07, "loss": 0.0276, "step": 438960 }, { "epoch": 4.690100966931994, "grad_norm": 0.008627132512629032, "learning_rate": 7.009727345334721e-07, "loss": 0.0346, "step": 438970 }, { "epoch": 4.690207810246274, "grad_norm": 2.298238515853882, "learning_rate": 7.009573504847345e-07, "loss": 0.0103, "step": 438980 }, { "epoch": 4.690314653560553, "grad_norm": 0.0012365564471110702, "learning_rate": 7.00941966209101e-07, "loss": 0.0053, "step": 438990 }, { "epoch": 4.6904214968748335, "grad_norm": 0.015099944546818733, "learning_rate": 7.009265817065887e-07, "loss": 0.0077, "step": 439000 }, { "epoch": 4.690528340189113, "grad_norm": 3.5598807334899902, "learning_rate": 7.009111969772153e-07, "loss": 0.0045, "step": 439010 }, { "epoch": 4.690635183503392, "grad_norm": 1.6988165378570557, "learning_rate": 7.00895812020998e-07, "loss": 0.0023, "step": 439020 }, { "epoch": 4.690742026817672, "grad_norm": 0.006422362755984068, "learning_rate": 7.00880426837954e-07, "loss": 0.0133, "step": 439030 }, { "epoch": 4.690848870131951, "grad_norm": 0.02443217858672142, "learning_rate": 7.00865041428101e-07, "loss": 0.0141, "step": 439040 }, { "epoch": 4.690955713446231, "grad_norm": 1.3948452472686768, "learning_rate": 7.008496557914563e-07, "loss": 0.0101, "step": 439050 }, { "epoch": 4.691062556760511, "grad_norm": 0.07733553647994995, "learning_rate": 7.00834269928037e-07, "loss": 0.0016, "step": 439060 }, { "epoch": 4.6911694000747906, "grad_norm": 0.01750839501619339, "learning_rate": 7.008188838378606e-07, "loss": 0.0015, "step": 439070 }, { "epoch": 4.69127624338907, "grad_norm": 0.030011583119630814, "learning_rate": 7.008034975209446e-07, "loss": 0.0035, "step": 439080 }, { "epoch": 4.6913830867033495, "grad_norm": 4.220424652099609, "learning_rate": 7.007881109773064e-07, "loss": 0.0062, "step": 439090 }, { "epoch": 4.691489930017629, "grad_norm": 0.30751386284828186, "learning_rate": 7.007727242069632e-07, "loss": 0.0049, "step": 439100 }, { "epoch": 4.691596773331908, "grad_norm": 0.004591044969856739, "learning_rate": 7.007573372099324e-07, "loss": 0.002, "step": 439110 }, { "epoch": 4.691703616646189, "grad_norm": 0.04898786544799805, "learning_rate": 7.007419499862314e-07, "loss": 0.0003, "step": 439120 }, { "epoch": 4.691810459960468, "grad_norm": 0.010104854591190815, "learning_rate": 7.007265625358775e-07, "loss": 0.0012, "step": 439130 }, { "epoch": 4.691917303274748, "grad_norm": 0.5264993906021118, "learning_rate": 7.007111748588884e-07, "loss": 0.003, "step": 439140 }, { "epoch": 4.692024146589027, "grad_norm": 0.5434700846672058, "learning_rate": 7.006957869552811e-07, "loss": 0.0215, "step": 439150 }, { "epoch": 4.692130989903307, "grad_norm": 0.004347724840044975, "learning_rate": 7.006803988250732e-07, "loss": 0.0046, "step": 439160 }, { "epoch": 4.692237833217586, "grad_norm": 0.4544416666030884, "learning_rate": 7.006650104682819e-07, "loss": 0.0065, "step": 439170 }, { "epoch": 4.692344676531866, "grad_norm": 0.01279004942625761, "learning_rate": 7.006496218849246e-07, "loss": 0.0202, "step": 439180 }, { "epoch": 4.692451519846146, "grad_norm": 4.092675685882568, "learning_rate": 7.006342330750187e-07, "loss": 0.0012, "step": 439190 }, { "epoch": 4.692558363160425, "grad_norm": 0.0018696599872782826, "learning_rate": 7.006188440385819e-07, "loss": 0.0001, "step": 439200 }, { "epoch": 4.692665206474705, "grad_norm": 0.032993294298648834, "learning_rate": 7.00603454775631e-07, "loss": 0.0083, "step": 439210 }, { "epoch": 4.692772049788984, "grad_norm": 0.004375805612653494, "learning_rate": 7.005880652861838e-07, "loss": 0.049, "step": 439220 }, { "epoch": 4.692878893103265, "grad_norm": 0.021982410922646523, "learning_rate": 7.005726755702573e-07, "loss": 0.0234, "step": 439230 }, { "epoch": 4.692985736417544, "grad_norm": 8.325587272644043, "learning_rate": 7.005572856278694e-07, "loss": 0.0394, "step": 439240 }, { "epoch": 4.6930925797318235, "grad_norm": 0.0002595529949758202, "learning_rate": 7.005418954590372e-07, "loss": 0.0024, "step": 439250 }, { "epoch": 4.693199423046103, "grad_norm": 0.004944878630340099, "learning_rate": 7.005265050637779e-07, "loss": 0.0168, "step": 439260 }, { "epoch": 4.693306266360382, "grad_norm": 0.004420517012476921, "learning_rate": 7.00511114442109e-07, "loss": 0.0019, "step": 439270 }, { "epoch": 4.693413109674662, "grad_norm": 0.03979906812310219, "learning_rate": 7.004957235940481e-07, "loss": 0.0109, "step": 439280 }, { "epoch": 4.693519952988941, "grad_norm": 0.018359258770942688, "learning_rate": 7.004803325196123e-07, "loss": 0.013, "step": 439290 }, { "epoch": 4.693626796303222, "grad_norm": 0.4981309473514557, "learning_rate": 7.004649412188191e-07, "loss": 0.0097, "step": 439300 }, { "epoch": 4.693733639617501, "grad_norm": 0.053383927792310715, "learning_rate": 7.004495496916858e-07, "loss": 0.0045, "step": 439310 }, { "epoch": 4.693840482931781, "grad_norm": 0.00587870879098773, "learning_rate": 7.004341579382298e-07, "loss": 0.0114, "step": 439320 }, { "epoch": 4.69394732624606, "grad_norm": 0.004777187947183847, "learning_rate": 7.004187659584686e-07, "loss": 0.0012, "step": 439330 }, { "epoch": 4.6940541695603395, "grad_norm": 9.130434036254883, "learning_rate": 7.004033737524195e-07, "loss": 0.0462, "step": 439340 }, { "epoch": 4.69416101287462, "grad_norm": 0.3802111744880676, "learning_rate": 7.003879813200998e-07, "loss": 0.0332, "step": 439350 }, { "epoch": 4.694267856188899, "grad_norm": 6.691871643066406, "learning_rate": 7.00372588661527e-07, "loss": 0.0082, "step": 439360 }, { "epoch": 4.694374699503179, "grad_norm": 1.586918830871582, "learning_rate": 7.003571957767184e-07, "loss": 0.0012, "step": 439370 }, { "epoch": 4.694481542817458, "grad_norm": 0.002193732187151909, "learning_rate": 7.003418026656915e-07, "loss": 0.0048, "step": 439380 }, { "epoch": 4.694588386131738, "grad_norm": 1.0005160570144653, "learning_rate": 7.003264093284635e-07, "loss": 0.0013, "step": 439390 }, { "epoch": 4.694695229446017, "grad_norm": 1.6067523956298828, "learning_rate": 7.003110157650517e-07, "loss": 0.0082, "step": 439400 }, { "epoch": 4.694802072760297, "grad_norm": 1.8080620765686035, "learning_rate": 7.002956219754738e-07, "loss": 0.0108, "step": 439410 }, { "epoch": 4.694908916074577, "grad_norm": 0.00837849173694849, "learning_rate": 7.00280227959747e-07, "loss": 0.0061, "step": 439420 }, { "epoch": 4.695015759388856, "grad_norm": 0.017463937401771545, "learning_rate": 7.002648337178887e-07, "loss": 0.0014, "step": 439430 }, { "epoch": 4.695122602703136, "grad_norm": 0.02145201340317726, "learning_rate": 7.002494392499164e-07, "loss": 0.0106, "step": 439440 }, { "epoch": 4.695229446017415, "grad_norm": 0.03396404907107353, "learning_rate": 7.002340445558473e-07, "loss": 0.0037, "step": 439450 }, { "epoch": 4.695336289331695, "grad_norm": 0.3116855323314667, "learning_rate": 7.002186496356987e-07, "loss": 0.0371, "step": 439460 }, { "epoch": 4.695443132645975, "grad_norm": 0.01138849463313818, "learning_rate": 7.002032544894884e-07, "loss": 0.0007, "step": 439470 }, { "epoch": 4.695549975960255, "grad_norm": 0.014156895689666271, "learning_rate": 7.001878591172334e-07, "loss": 0.0086, "step": 439480 }, { "epoch": 4.695656819274534, "grad_norm": 0.1318584531545639, "learning_rate": 7.001724635189511e-07, "loss": 0.0109, "step": 439490 }, { "epoch": 4.6957636625888135, "grad_norm": 0.06651901453733444, "learning_rate": 7.001570676946591e-07, "loss": 0.0059, "step": 439500 }, { "epoch": 4.695870505903093, "grad_norm": 0.004041309468448162, "learning_rate": 7.001416716443746e-07, "loss": 0.0054, "step": 439510 }, { "epoch": 4.695977349217372, "grad_norm": 3.363861560821533, "learning_rate": 7.00126275368115e-07, "loss": 0.0008, "step": 439520 }, { "epoch": 4.696084192531652, "grad_norm": 0.0006055832491256297, "learning_rate": 7.001108788658978e-07, "loss": 0.0112, "step": 439530 }, { "epoch": 4.696191035845932, "grad_norm": 0.019488872960209846, "learning_rate": 7.000954821377404e-07, "loss": 0.0066, "step": 439540 }, { "epoch": 4.696297879160212, "grad_norm": 1.1296217441558838, "learning_rate": 7.000800851836599e-07, "loss": 0.0092, "step": 439550 }, { "epoch": 4.696404722474491, "grad_norm": 0.0016723144799470901, "learning_rate": 7.000646880036741e-07, "loss": 0.0029, "step": 439560 }, { "epoch": 4.696511565788771, "grad_norm": 0.9816063642501831, "learning_rate": 7.000492905978e-07, "loss": 0.0062, "step": 439570 }, { "epoch": 4.69661840910305, "grad_norm": 0.09998008608818054, "learning_rate": 7.000338929660554e-07, "loss": 0.0033, "step": 439580 }, { "epoch": 4.69672525241733, "grad_norm": 0.8331708908081055, "learning_rate": 7.000184951084571e-07, "loss": 0.0013, "step": 439590 }, { "epoch": 4.69683209573161, "grad_norm": 0.00606642197817564, "learning_rate": 7.00003097025023e-07, "loss": 0.0091, "step": 439600 }, { "epoch": 4.696938939045889, "grad_norm": 0.3561462461948395, "learning_rate": 6.999876987157704e-07, "loss": 0.0021, "step": 439610 }, { "epoch": 4.697045782360169, "grad_norm": 0.01335554663091898, "learning_rate": 6.999723001807164e-07, "loss": 0.0392, "step": 439620 }, { "epoch": 4.697152625674448, "grad_norm": 0.03517133742570877, "learning_rate": 6.999569014198786e-07, "loss": 0.0047, "step": 439630 }, { "epoch": 4.697259468988728, "grad_norm": 0.004023082088679075, "learning_rate": 6.999415024332746e-07, "loss": 0.0008, "step": 439640 }, { "epoch": 4.697366312303007, "grad_norm": 0.03354021906852722, "learning_rate": 6.999261032209214e-07, "loss": 0.011, "step": 439650 }, { "epoch": 4.6974731556172875, "grad_norm": 0.013204673305153847, "learning_rate": 6.999107037828365e-07, "loss": 0.0061, "step": 439660 }, { "epoch": 4.697579998931567, "grad_norm": 0.00876191258430481, "learning_rate": 6.998953041190373e-07, "loss": 0.002, "step": 439670 }, { "epoch": 4.697686842245846, "grad_norm": 0.034903813153505325, "learning_rate": 6.998799042295414e-07, "loss": 0.0213, "step": 439680 }, { "epoch": 4.697793685560126, "grad_norm": 0.002224311465397477, "learning_rate": 6.998645041143657e-07, "loss": 0.019, "step": 439690 }, { "epoch": 4.697900528874405, "grad_norm": 3.9572017192840576, "learning_rate": 6.998491037735282e-07, "loss": 0.0042, "step": 439700 }, { "epoch": 4.698007372188686, "grad_norm": 0.05290171504020691, "learning_rate": 6.998337032070458e-07, "loss": 0.0021, "step": 439710 }, { "epoch": 4.698114215502965, "grad_norm": 0.000552249257452786, "learning_rate": 6.99818302414936e-07, "loss": 0.0019, "step": 439720 }, { "epoch": 4.698221058817245, "grad_norm": 0.002212613122537732, "learning_rate": 6.998029013972164e-07, "loss": 0.0037, "step": 439730 }, { "epoch": 4.698327902131524, "grad_norm": 0.0017315087607130408, "learning_rate": 6.997875001539043e-07, "loss": 0.0072, "step": 439740 }, { "epoch": 4.6984347454458035, "grad_norm": 0.005413150880485773, "learning_rate": 6.997720986850169e-07, "loss": 0.0044, "step": 439750 }, { "epoch": 4.698541588760083, "grad_norm": 0.5571155548095703, "learning_rate": 6.997566969905717e-07, "loss": 0.0006, "step": 439760 }, { "epoch": 4.698648432074363, "grad_norm": 0.004885036498308182, "learning_rate": 6.997412950705861e-07, "loss": 0.008, "step": 439770 }, { "epoch": 4.698755275388643, "grad_norm": 0.003126045223325491, "learning_rate": 6.997258929250777e-07, "loss": 0.0004, "step": 439780 }, { "epoch": 4.698862118702922, "grad_norm": 0.016772454604506493, "learning_rate": 6.997104905540636e-07, "loss": 0.0088, "step": 439790 }, { "epoch": 4.698968962017202, "grad_norm": 0.014708795584738255, "learning_rate": 6.996950879575611e-07, "loss": 0.0085, "step": 439800 }, { "epoch": 4.699075805331481, "grad_norm": 0.3100208342075348, "learning_rate": 6.996796851355881e-07, "loss": 0.0256, "step": 439810 }, { "epoch": 4.699182648645761, "grad_norm": 0.8201605677604675, "learning_rate": 6.996642820881614e-07, "loss": 0.0111, "step": 439820 }, { "epoch": 4.699289491960041, "grad_norm": 0.015446990728378296, "learning_rate": 6.996488788152988e-07, "loss": 0.009, "step": 439830 }, { "epoch": 4.69939633527432, "grad_norm": 0.0009689861908555031, "learning_rate": 6.996334753170175e-07, "loss": 0.003, "step": 439840 }, { "epoch": 4.6995031785886, "grad_norm": 1.9484272003173828, "learning_rate": 6.996180715933349e-07, "loss": 0.0015, "step": 439850 }, { "epoch": 4.699610021902879, "grad_norm": 0.003743911162018776, "learning_rate": 6.996026676442684e-07, "loss": 0.0362, "step": 439860 }, { "epoch": 4.699716865217159, "grad_norm": 0.08673398196697235, "learning_rate": 6.995872634698355e-07, "loss": 0.0001, "step": 439870 }, { "epoch": 4.699823708531438, "grad_norm": 2.929072618484497, "learning_rate": 6.995718590700536e-07, "loss": 0.0031, "step": 439880 }, { "epoch": 4.699930551845719, "grad_norm": 1.621006727218628, "learning_rate": 6.995564544449398e-07, "loss": 0.0112, "step": 439890 }, { "epoch": 4.700037395159998, "grad_norm": 0.015184701420366764, "learning_rate": 6.995410495945119e-07, "loss": 0.0041, "step": 439900 }, { "epoch": 4.7001442384742775, "grad_norm": 0.0013911295682191849, "learning_rate": 6.99525644518787e-07, "loss": 0.0133, "step": 439910 }, { "epoch": 4.700251081788557, "grad_norm": 0.0008312965510413051, "learning_rate": 6.995102392177826e-07, "loss": 0.0079, "step": 439920 }, { "epoch": 4.7003579251028365, "grad_norm": 0.0038570635952055454, "learning_rate": 6.99494833691516e-07, "loss": 0.0001, "step": 439930 }, { "epoch": 4.700464768417116, "grad_norm": 0.0025816401466727257, "learning_rate": 6.994794279400049e-07, "loss": 0.0129, "step": 439940 }, { "epoch": 4.700571611731396, "grad_norm": 0.2461434304714203, "learning_rate": 6.994640219632663e-07, "loss": 0.0069, "step": 439950 }, { "epoch": 4.700678455045676, "grad_norm": 0.01164238154888153, "learning_rate": 6.994486157613178e-07, "loss": 0.0011, "step": 439960 }, { "epoch": 4.700785298359955, "grad_norm": 0.03695106878876686, "learning_rate": 6.994332093341767e-07, "loss": 0.0064, "step": 439970 }, { "epoch": 4.700892141674235, "grad_norm": 0.12649381160736084, "learning_rate": 6.994178026818606e-07, "loss": 0.0019, "step": 439980 }, { "epoch": 4.700998984988514, "grad_norm": 0.13320837914943695, "learning_rate": 6.994023958043867e-07, "loss": 0.016, "step": 439990 }, { "epoch": 4.7011058283027936, "grad_norm": 0.011456716805696487, "learning_rate": 6.993869887017722e-07, "loss": 0.0191, "step": 440000 }, { "epoch": 4.701212671617074, "grad_norm": 0.011706541292369366, "learning_rate": 6.993715813740351e-07, "loss": 0.0, "step": 440010 }, { "epoch": 4.701319514931353, "grad_norm": 3.7690107822418213, "learning_rate": 6.993561738211923e-07, "loss": 0.0014, "step": 440020 }, { "epoch": 4.701426358245633, "grad_norm": 0.0010313214734196663, "learning_rate": 6.993407660432612e-07, "loss": 0.0653, "step": 440030 }, { "epoch": 4.701533201559912, "grad_norm": 0.031103558838367462, "learning_rate": 6.993253580402595e-07, "loss": 0.0258, "step": 440040 }, { "epoch": 4.701640044874192, "grad_norm": 0.001467555295675993, "learning_rate": 6.993099498122044e-07, "loss": 0.0106, "step": 440050 }, { "epoch": 4.701746888188472, "grad_norm": 0.8196625709533691, "learning_rate": 6.992945413591133e-07, "loss": 0.0015, "step": 440060 }, { "epoch": 4.7018537315027515, "grad_norm": 0.22359292209148407, "learning_rate": 6.992791326810037e-07, "loss": 0.0048, "step": 440070 }, { "epoch": 4.701960574817031, "grad_norm": 0.0008596272673457861, "learning_rate": 6.992637237778928e-07, "loss": 0.0014, "step": 440080 }, { "epoch": 4.7020674181313105, "grad_norm": 0.5814721584320068, "learning_rate": 6.992483146497981e-07, "loss": 0.0047, "step": 440090 }, { "epoch": 4.70217426144559, "grad_norm": 9.016973495483398, "learning_rate": 6.992329052967372e-07, "loss": 0.0101, "step": 440100 }, { "epoch": 4.702281104759869, "grad_norm": 0.007748476229608059, "learning_rate": 6.99217495718727e-07, "loss": 0.0118, "step": 440110 }, { "epoch": 4.702387948074149, "grad_norm": 0.01604287512600422, "learning_rate": 6.992020859157854e-07, "loss": 0.0059, "step": 440120 }, { "epoch": 4.702494791388429, "grad_norm": 0.00393984280526638, "learning_rate": 6.991866758879297e-07, "loss": 0.0004, "step": 440130 }, { "epoch": 4.702601634702709, "grad_norm": 0.9699341058731079, "learning_rate": 6.991712656351771e-07, "loss": 0.0007, "step": 440140 }, { "epoch": 4.702708478016988, "grad_norm": 0.46361106634140015, "learning_rate": 6.99155855157545e-07, "loss": 0.0088, "step": 440150 }, { "epoch": 4.702815321331268, "grad_norm": 0.10379531979560852, "learning_rate": 6.991404444550511e-07, "loss": 0.0081, "step": 440160 }, { "epoch": 4.702922164645547, "grad_norm": 7.79498291015625, "learning_rate": 6.991250335277125e-07, "loss": 0.0507, "step": 440170 }, { "epoch": 4.703029007959827, "grad_norm": 0.0676453486084938, "learning_rate": 6.991096223755469e-07, "loss": 0.0014, "step": 440180 }, { "epoch": 4.703135851274107, "grad_norm": 0.016581853851675987, "learning_rate": 6.990942109985712e-07, "loss": 0.0007, "step": 440190 }, { "epoch": 4.703242694588386, "grad_norm": 0.022341690957546234, "learning_rate": 6.990787993968032e-07, "loss": 0.0084, "step": 440200 }, { "epoch": 4.703349537902666, "grad_norm": 0.004067953210324049, "learning_rate": 6.990633875702605e-07, "loss": 0.0111, "step": 440210 }, { "epoch": 4.703456381216945, "grad_norm": 4.086061000823975, "learning_rate": 6.9904797551896e-07, "loss": 0.0187, "step": 440220 }, { "epoch": 4.703563224531225, "grad_norm": 0.7774802446365356, "learning_rate": 6.990325632429192e-07, "loss": 0.0029, "step": 440230 }, { "epoch": 4.703670067845504, "grad_norm": 7.658517360687256, "learning_rate": 6.990171507421557e-07, "loss": 0.0285, "step": 440240 }, { "epoch": 4.7037769111597845, "grad_norm": 0.009302007965743542, "learning_rate": 6.990017380166869e-07, "loss": 0.0118, "step": 440250 }, { "epoch": 4.703883754474064, "grad_norm": 2.372779130935669, "learning_rate": 6.9898632506653e-07, "loss": 0.002, "step": 440260 }, { "epoch": 4.703990597788343, "grad_norm": 0.8550277352333069, "learning_rate": 6.989709118917026e-07, "loss": 0.0056, "step": 440270 }, { "epoch": 4.704097441102623, "grad_norm": 4.123682498931885, "learning_rate": 6.98955498492222e-07, "loss": 0.0032, "step": 440280 }, { "epoch": 4.704204284416902, "grad_norm": 1.1569640636444092, "learning_rate": 6.989400848681057e-07, "loss": 0.0094, "step": 440290 }, { "epoch": 4.704311127731183, "grad_norm": 0.40800678730010986, "learning_rate": 6.989246710193709e-07, "loss": 0.0015, "step": 440300 }, { "epoch": 4.704417971045462, "grad_norm": 0.022551313042640686, "learning_rate": 6.989092569460352e-07, "loss": 0.026, "step": 440310 }, { "epoch": 4.704524814359742, "grad_norm": 3.7059738636016846, "learning_rate": 6.988938426481159e-07, "loss": 0.0084, "step": 440320 }, { "epoch": 4.704631657674021, "grad_norm": 0.11926968395709991, "learning_rate": 6.988784281256304e-07, "loss": 0.0028, "step": 440330 }, { "epoch": 4.7047385009883005, "grad_norm": 0.003001145087182522, "learning_rate": 6.988630133785962e-07, "loss": 0.0055, "step": 440340 }, { "epoch": 4.70484534430258, "grad_norm": 6.15146541595459, "learning_rate": 6.988475984070307e-07, "loss": 0.0249, "step": 440350 }, { "epoch": 4.704952187616859, "grad_norm": 0.2712702453136444, "learning_rate": 6.988321832109513e-07, "loss": 0.0105, "step": 440360 }, { "epoch": 4.70505903093114, "grad_norm": 0.0017420146614313126, "learning_rate": 6.988167677903752e-07, "loss": 0.0007, "step": 440370 }, { "epoch": 4.705165874245419, "grad_norm": 0.009459053166210651, "learning_rate": 6.9880135214532e-07, "loss": 0.002, "step": 440380 }, { "epoch": 4.705272717559699, "grad_norm": 0.0009488326031714678, "learning_rate": 6.987859362758031e-07, "loss": 0.0019, "step": 440390 }, { "epoch": 4.705379560873978, "grad_norm": 0.025891711935400963, "learning_rate": 6.987705201818419e-07, "loss": 0.0022, "step": 440400 }, { "epoch": 4.705486404188258, "grad_norm": 3.9422552585601807, "learning_rate": 6.987551038634538e-07, "loss": 0.007, "step": 440410 }, { "epoch": 4.705593247502538, "grad_norm": 0.0015588622773066163, "learning_rate": 6.987396873206561e-07, "loss": 0.0155, "step": 440420 }, { "epoch": 4.705700090816817, "grad_norm": 1.7155166864395142, "learning_rate": 6.987242705534664e-07, "loss": 0.0073, "step": 440430 }, { "epoch": 4.705806934131097, "grad_norm": 0.0007974605541676283, "learning_rate": 6.987088535619019e-07, "loss": 0.0061, "step": 440440 }, { "epoch": 4.705913777445376, "grad_norm": 0.0050031389109790325, "learning_rate": 6.986934363459801e-07, "loss": 0.0011, "step": 440450 }, { "epoch": 4.706020620759656, "grad_norm": 0.0015304614789783955, "learning_rate": 6.986780189057185e-07, "loss": 0.0008, "step": 440460 }, { "epoch": 4.706127464073935, "grad_norm": 0.023000916466116905, "learning_rate": 6.986626012411343e-07, "loss": 0.0056, "step": 440470 }, { "epoch": 4.706234307388215, "grad_norm": 0.031849708408117294, "learning_rate": 6.986471833522451e-07, "loss": 0.0054, "step": 440480 }, { "epoch": 4.706341150702495, "grad_norm": 0.2711530029773712, "learning_rate": 6.986317652390683e-07, "loss": 0.0055, "step": 440490 }, { "epoch": 4.7064479940167745, "grad_norm": 0.00994290690869093, "learning_rate": 6.986163469016212e-07, "loss": 0.0225, "step": 440500 }, { "epoch": 4.706554837331054, "grad_norm": 0.001578498282469809, "learning_rate": 6.986009283399213e-07, "loss": 0.0292, "step": 440510 }, { "epoch": 4.706661680645333, "grad_norm": 0.010233184322714806, "learning_rate": 6.985855095539858e-07, "loss": 0.0063, "step": 440520 }, { "epoch": 4.706768523959613, "grad_norm": 2.767690896987915, "learning_rate": 6.985700905438324e-07, "loss": 0.0037, "step": 440530 }, { "epoch": 4.706875367273893, "grad_norm": 0.002915968419983983, "learning_rate": 6.985546713094783e-07, "loss": 0.0031, "step": 440540 }, { "epoch": 4.706982210588173, "grad_norm": 0.0037980854976922274, "learning_rate": 6.985392518509412e-07, "loss": 0.0125, "step": 440550 }, { "epoch": 4.707089053902452, "grad_norm": 0.024336159229278564, "learning_rate": 6.985238321682381e-07, "loss": 0.005, "step": 440560 }, { "epoch": 4.707195897216732, "grad_norm": 0.013116911984980106, "learning_rate": 6.985084122613867e-07, "loss": 0.0021, "step": 440570 }, { "epoch": 4.707302740531011, "grad_norm": 0.013426362536847591, "learning_rate": 6.984929921304043e-07, "loss": 0.0136, "step": 440580 }, { "epoch": 4.7074095838452905, "grad_norm": 2.9572739601135254, "learning_rate": 6.984775717753083e-07, "loss": 0.0138, "step": 440590 }, { "epoch": 4.707516427159571, "grad_norm": 0.0016910572303459048, "learning_rate": 6.984621511961161e-07, "loss": 0.0003, "step": 440600 }, { "epoch": 4.70762327047385, "grad_norm": 0.0012370026670396328, "learning_rate": 6.984467303928453e-07, "loss": 0.0018, "step": 440610 }, { "epoch": 4.70773011378813, "grad_norm": 0.6148761510848999, "learning_rate": 6.984313093655131e-07, "loss": 0.0024, "step": 440620 }, { "epoch": 4.707836957102409, "grad_norm": 3.340641498565674, "learning_rate": 6.984158881141368e-07, "loss": 0.0357, "step": 440630 }, { "epoch": 4.707943800416689, "grad_norm": 0.2722274959087372, "learning_rate": 6.984004666387342e-07, "loss": 0.0078, "step": 440640 }, { "epoch": 4.708050643730968, "grad_norm": 18.332931518554688, "learning_rate": 6.983850449393225e-07, "loss": 0.0072, "step": 440650 }, { "epoch": 4.7081574870452485, "grad_norm": 0.08314263820648193, "learning_rate": 6.983696230159189e-07, "loss": 0.0344, "step": 440660 }, { "epoch": 4.708264330359528, "grad_norm": 3.2237157821655273, "learning_rate": 6.983542008685413e-07, "loss": 0.014, "step": 440670 }, { "epoch": 4.708371173673807, "grad_norm": 0.009140810929238796, "learning_rate": 6.983387784972068e-07, "loss": 0.0194, "step": 440680 }, { "epoch": 4.708478016988087, "grad_norm": 0.9438827633857727, "learning_rate": 6.983233559019326e-07, "loss": 0.0097, "step": 440690 }, { "epoch": 4.708584860302366, "grad_norm": 0.04405108466744423, "learning_rate": 6.983079330827366e-07, "loss": 0.0041, "step": 440700 }, { "epoch": 4.708691703616646, "grad_norm": 8.573732376098633, "learning_rate": 6.982925100396359e-07, "loss": 0.0319, "step": 440710 }, { "epoch": 4.708798546930926, "grad_norm": 0.006294408813118935, "learning_rate": 6.982770867726479e-07, "loss": 0.0107, "step": 440720 }, { "epoch": 4.708905390245206, "grad_norm": 0.004948458634316921, "learning_rate": 6.982616632817903e-07, "loss": 0.0022, "step": 440730 }, { "epoch": 4.709012233559485, "grad_norm": 2.9476850032806396, "learning_rate": 6.982462395670802e-07, "loss": 0.0121, "step": 440740 }, { "epoch": 4.7091190768737645, "grad_norm": 2.1259539127349854, "learning_rate": 6.982308156285351e-07, "loss": 0.0084, "step": 440750 }, { "epoch": 4.709225920188044, "grad_norm": 0.011731273494660854, "learning_rate": 6.982153914661725e-07, "loss": 0.002, "step": 440760 }, { "epoch": 4.709332763502324, "grad_norm": 0.08825727552175522, "learning_rate": 6.981999670800097e-07, "loss": 0.0093, "step": 440770 }, { "epoch": 4.709439606816604, "grad_norm": 0.0023700608871877193, "learning_rate": 6.981845424700642e-07, "loss": 0.0034, "step": 440780 }, { "epoch": 4.709546450130883, "grad_norm": 0.5626744031906128, "learning_rate": 6.981691176363534e-07, "loss": 0.0076, "step": 440790 }, { "epoch": 4.709653293445163, "grad_norm": 1.723166584968567, "learning_rate": 6.981536925788948e-07, "loss": 0.0923, "step": 440800 }, { "epoch": 4.709760136759442, "grad_norm": 0.0017703454941511154, "learning_rate": 6.981382672977057e-07, "loss": 0.0122, "step": 440810 }, { "epoch": 4.709866980073722, "grad_norm": 3.828819990158081, "learning_rate": 6.981228417928036e-07, "loss": 0.0051, "step": 440820 }, { "epoch": 4.709973823388001, "grad_norm": 0.07881093770265579, "learning_rate": 6.981074160642056e-07, "loss": 0.0256, "step": 440830 }, { "epoch": 4.710080666702281, "grad_norm": 1.9516706466674805, "learning_rate": 6.980919901119296e-07, "loss": 0.0075, "step": 440840 }, { "epoch": 4.710187510016561, "grad_norm": 0.11482208967208862, "learning_rate": 6.980765639359928e-07, "loss": 0.0022, "step": 440850 }, { "epoch": 4.71029435333084, "grad_norm": 0.17560061812400818, "learning_rate": 6.980611375364125e-07, "loss": 0.0095, "step": 440860 }, { "epoch": 4.71040119664512, "grad_norm": 2.852322578430176, "learning_rate": 6.980457109132063e-07, "loss": 0.0062, "step": 440870 }, { "epoch": 4.710508039959399, "grad_norm": 0.7360796332359314, "learning_rate": 6.980302840663915e-07, "loss": 0.0152, "step": 440880 }, { "epoch": 4.71061488327368, "grad_norm": 0.26106202602386475, "learning_rate": 6.980148569959855e-07, "loss": 0.0007, "step": 440890 }, { "epoch": 4.710721726587959, "grad_norm": 0.011361939832568169, "learning_rate": 6.979994297020058e-07, "loss": 0.0068, "step": 440900 }, { "epoch": 4.7108285699022385, "grad_norm": 7.338510513305664, "learning_rate": 6.9798400218447e-07, "loss": 0.0044, "step": 440910 }, { "epoch": 4.710935413216518, "grad_norm": 5.527843952178955, "learning_rate": 6.97968574443395e-07, "loss": 0.0111, "step": 440920 }, { "epoch": 4.7110422565307974, "grad_norm": 0.05058518424630165, "learning_rate": 6.979531464787989e-07, "loss": 0.0241, "step": 440930 }, { "epoch": 4.711149099845077, "grad_norm": 1.4492523670196533, "learning_rate": 6.979377182906985e-07, "loss": 0.0051, "step": 440940 }, { "epoch": 4.711255943159356, "grad_norm": 0.017716510221362114, "learning_rate": 6.979222898791116e-07, "loss": 0.0007, "step": 440950 }, { "epoch": 4.711362786473637, "grad_norm": 0.007982296869158745, "learning_rate": 6.979068612440555e-07, "loss": 0.0008, "step": 440960 }, { "epoch": 4.711469629787916, "grad_norm": 0.001778750098310411, "learning_rate": 6.978914323855474e-07, "loss": 0.0081, "step": 440970 }, { "epoch": 4.711576473102196, "grad_norm": 0.011855021119117737, "learning_rate": 6.978760033036052e-07, "loss": 0.0, "step": 440980 }, { "epoch": 4.711683316416475, "grad_norm": 1.766801357269287, "learning_rate": 6.97860573998246e-07, "loss": 0.001, "step": 440990 }, { "epoch": 4.7117901597307545, "grad_norm": 0.018637537956237793, "learning_rate": 6.978451444694872e-07, "loss": 0.0002, "step": 441000 }, { "epoch": 4.711897003045035, "grad_norm": 0.4943081736564636, "learning_rate": 6.978297147173463e-07, "loss": 0.0026, "step": 441010 }, { "epoch": 4.712003846359314, "grad_norm": 0.7160009145736694, "learning_rate": 6.978142847418409e-07, "loss": 0.0049, "step": 441020 }, { "epoch": 4.712110689673594, "grad_norm": 0.008115730248391628, "learning_rate": 6.97798854542988e-07, "loss": 0.0005, "step": 441030 }, { "epoch": 4.712217532987873, "grad_norm": 0.0211905837059021, "learning_rate": 6.977834241208053e-07, "loss": 0.0156, "step": 441040 }, { "epoch": 4.712324376302153, "grad_norm": 0.08442126959562302, "learning_rate": 6.977679934753103e-07, "loss": 0.0045, "step": 441050 }, { "epoch": 4.712431219616432, "grad_norm": 5.352349758148193, "learning_rate": 6.977525626065203e-07, "loss": 0.0198, "step": 441060 }, { "epoch": 4.712538062930712, "grad_norm": 0.20242738723754883, "learning_rate": 6.977371315144526e-07, "loss": 0.0016, "step": 441070 }, { "epoch": 4.712644906244992, "grad_norm": 10.230801582336426, "learning_rate": 6.977217001991249e-07, "loss": 0.0445, "step": 441080 }, { "epoch": 4.7127517495592715, "grad_norm": 0.508517324924469, "learning_rate": 6.977062686605544e-07, "loss": 0.0002, "step": 441090 }, { "epoch": 4.712858592873551, "grad_norm": 3.2335805892944336, "learning_rate": 6.976908368987586e-07, "loss": 0.0312, "step": 441100 }, { "epoch": 4.71296543618783, "grad_norm": 0.3254723846912384, "learning_rate": 6.976754049137549e-07, "loss": 0.0166, "step": 441110 }, { "epoch": 4.71307227950211, "grad_norm": 1.812968134880066, "learning_rate": 6.976599727055608e-07, "loss": 0.0014, "step": 441120 }, { "epoch": 4.71317912281639, "grad_norm": 0.008984318934381008, "learning_rate": 6.976445402741938e-07, "loss": 0.0192, "step": 441130 }, { "epoch": 4.71328596613067, "grad_norm": 0.0013676669914275408, "learning_rate": 6.97629107619671e-07, "loss": 0.0024, "step": 441140 }, { "epoch": 4.713392809444949, "grad_norm": 0.09215951710939407, "learning_rate": 6.9761367474201e-07, "loss": 0.0165, "step": 441150 }, { "epoch": 4.7134996527592286, "grad_norm": 0.0027405943255871534, "learning_rate": 6.975982416412285e-07, "loss": 0.0042, "step": 441160 }, { "epoch": 4.713606496073508, "grad_norm": 0.02745402418076992, "learning_rate": 6.975828083173434e-07, "loss": 0.011, "step": 441170 }, { "epoch": 4.7137133393877875, "grad_norm": 0.0023532153572887182, "learning_rate": 6.975673747703726e-07, "loss": 0.0046, "step": 441180 }, { "epoch": 4.713820182702067, "grad_norm": 0.0609188973903656, "learning_rate": 6.975519410003333e-07, "loss": 0.0004, "step": 441190 }, { "epoch": 4.713927026016347, "grad_norm": 0.0003259523946326226, "learning_rate": 6.975365070072427e-07, "loss": 0.0016, "step": 441200 }, { "epoch": 4.714033869330627, "grad_norm": 14.376429557800293, "learning_rate": 6.975210727911187e-07, "loss": 0.0083, "step": 441210 }, { "epoch": 4.714140712644906, "grad_norm": 0.010954887606203556, "learning_rate": 6.975056383519786e-07, "loss": 0.0041, "step": 441220 }, { "epoch": 4.714247555959186, "grad_norm": 0.007138497196137905, "learning_rate": 6.974902036898394e-07, "loss": 0.0138, "step": 441230 }, { "epoch": 4.714354399273465, "grad_norm": 0.06953979283571243, "learning_rate": 6.974747688047191e-07, "loss": 0.0138, "step": 441240 }, { "epoch": 4.7144612425877455, "grad_norm": 1.2312510013580322, "learning_rate": 6.974593336966349e-07, "loss": 0.0246, "step": 441250 }, { "epoch": 4.714568085902025, "grad_norm": 0.00588643504306674, "learning_rate": 6.974438983656042e-07, "loss": 0.0111, "step": 441260 }, { "epoch": 4.714674929216304, "grad_norm": 0.012411223724484444, "learning_rate": 6.974284628116442e-07, "loss": 0.0022, "step": 441270 }, { "epoch": 4.714781772530584, "grad_norm": 0.014159301295876503, "learning_rate": 6.974130270347729e-07, "loss": 0.0023, "step": 441280 }, { "epoch": 4.714888615844863, "grad_norm": 1.6021678447723389, "learning_rate": 6.973975910350072e-07, "loss": 0.0072, "step": 441290 }, { "epoch": 4.714995459159143, "grad_norm": 0.017413916066288948, "learning_rate": 6.973821548123648e-07, "loss": 0.0247, "step": 441300 }, { "epoch": 4.715102302473423, "grad_norm": 0.0035353461280465126, "learning_rate": 6.973667183668627e-07, "loss": 0.0015, "step": 441310 }, { "epoch": 4.715209145787703, "grad_norm": 0.012055235914885998, "learning_rate": 6.973512816985192e-07, "loss": 0.0001, "step": 441320 }, { "epoch": 4.715315989101982, "grad_norm": 0.005920951720327139, "learning_rate": 6.973358448073509e-07, "loss": 0.0019, "step": 441330 }, { "epoch": 4.7154228324162615, "grad_norm": 0.00183314539026469, "learning_rate": 6.973204076933757e-07, "loss": 0.003, "step": 441340 }, { "epoch": 4.715529675730541, "grad_norm": 0.00818491168320179, "learning_rate": 6.973049703566107e-07, "loss": 0.001, "step": 441350 }, { "epoch": 4.71563651904482, "grad_norm": 0.029285389930009842, "learning_rate": 6.972895327970737e-07, "loss": 0.0095, "step": 441360 }, { "epoch": 4.715743362359101, "grad_norm": 4.644861698150635, "learning_rate": 6.972740950147817e-07, "loss": 0.0164, "step": 441370 }, { "epoch": 4.71585020567338, "grad_norm": 0.043310072273015976, "learning_rate": 6.972586570097525e-07, "loss": 0.0075, "step": 441380 }, { "epoch": 4.71595704898766, "grad_norm": 3.8834714889526367, "learning_rate": 6.972432187820033e-07, "loss": 0.0057, "step": 441390 }, { "epoch": 4.716063892301939, "grad_norm": 1.440971851348877, "learning_rate": 6.972277803315517e-07, "loss": 0.0013, "step": 441400 }, { "epoch": 4.716170735616219, "grad_norm": 0.003925187047570944, "learning_rate": 6.972123416584151e-07, "loss": 0.01, "step": 441410 }, { "epoch": 4.716277578930498, "grad_norm": 17.40984535217285, "learning_rate": 6.971969027626108e-07, "loss": 0.0073, "step": 441420 }, { "epoch": 4.716384422244778, "grad_norm": 0.010412833653390408, "learning_rate": 6.971814636441562e-07, "loss": 0.0048, "step": 441430 }, { "epoch": 4.716491265559058, "grad_norm": 6.513783931732178, "learning_rate": 6.97166024303069e-07, "loss": 0.0156, "step": 441440 }, { "epoch": 4.716598108873337, "grad_norm": 0.21994024515151978, "learning_rate": 6.971505847393665e-07, "loss": 0.0098, "step": 441450 }, { "epoch": 4.716704952187617, "grad_norm": 0.3296448290348053, "learning_rate": 6.97135144953066e-07, "loss": 0.0017, "step": 441460 }, { "epoch": 4.716811795501896, "grad_norm": 2.883171319961548, "learning_rate": 6.971197049441852e-07, "loss": 0.0045, "step": 441470 }, { "epoch": 4.716918638816177, "grad_norm": 0.004508818034082651, "learning_rate": 6.971042647127414e-07, "loss": 0.0079, "step": 441480 }, { "epoch": 4.717025482130456, "grad_norm": 0.36192020773887634, "learning_rate": 6.970888242587517e-07, "loss": 0.0006, "step": 441490 }, { "epoch": 4.7171323254447355, "grad_norm": 0.02319486252963543, "learning_rate": 6.97073383582234e-07, "loss": 0.0115, "step": 441500 }, { "epoch": 4.717239168759015, "grad_norm": 0.047185588628053665, "learning_rate": 6.970579426832056e-07, "loss": 0.0204, "step": 441510 }, { "epoch": 4.717346012073294, "grad_norm": 0.018124356865882874, "learning_rate": 6.97042501561684e-07, "loss": 0.0011, "step": 441520 }, { "epoch": 4.717452855387574, "grad_norm": 2.1585237979888916, "learning_rate": 6.970270602176864e-07, "loss": 0.0056, "step": 441530 }, { "epoch": 4.717559698701853, "grad_norm": 0.021937498822808266, "learning_rate": 6.970116186512304e-07, "loss": 0.0037, "step": 441540 }, { "epoch": 4.717666542016134, "grad_norm": 7.019344806671143, "learning_rate": 6.969961768623336e-07, "loss": 0.0101, "step": 441550 }, { "epoch": 4.717773385330413, "grad_norm": 1.1106137037277222, "learning_rate": 6.969807348510131e-07, "loss": 0.0096, "step": 441560 }, { "epoch": 4.717880228644693, "grad_norm": 0.22572478652000427, "learning_rate": 6.969652926172864e-07, "loss": 0.0254, "step": 441570 }, { "epoch": 4.717987071958972, "grad_norm": 1.5985559225082397, "learning_rate": 6.969498501611711e-07, "loss": 0.0034, "step": 441580 }, { "epoch": 4.7180939152732515, "grad_norm": 0.00604566466063261, "learning_rate": 6.969344074826846e-07, "loss": 0.0081, "step": 441590 }, { "epoch": 4.718200758587532, "grad_norm": 0.08025474846363068, "learning_rate": 6.969189645818442e-07, "loss": 0.0025, "step": 441600 }, { "epoch": 4.718307601901811, "grad_norm": 5.151495456695557, "learning_rate": 6.969035214586675e-07, "loss": 0.0038, "step": 441610 }, { "epoch": 4.718414445216091, "grad_norm": 0.007300165481865406, "learning_rate": 6.968880781131719e-07, "loss": 0.0004, "step": 441620 }, { "epoch": 4.71852128853037, "grad_norm": 0.002337502781301737, "learning_rate": 6.968726345453745e-07, "loss": 0.0024, "step": 441630 }, { "epoch": 4.71862813184465, "grad_norm": 0.350397527217865, "learning_rate": 6.968571907552934e-07, "loss": 0.0025, "step": 441640 }, { "epoch": 4.718734975158929, "grad_norm": 5.049760818481445, "learning_rate": 6.968417467429457e-07, "loss": 0.0031, "step": 441650 }, { "epoch": 4.718841818473209, "grad_norm": 0.004268021788448095, "learning_rate": 6.968263025083485e-07, "loss": 0.0012, "step": 441660 }, { "epoch": 4.718948661787489, "grad_norm": 0.006237114779651165, "learning_rate": 6.968108580515198e-07, "loss": 0.0036, "step": 441670 }, { "epoch": 4.719055505101768, "grad_norm": 0.30811384320259094, "learning_rate": 6.967954133724766e-07, "loss": 0.0064, "step": 441680 }, { "epoch": 4.719162348416048, "grad_norm": 3.227531909942627, "learning_rate": 6.967799684712366e-07, "loss": 0.006, "step": 441690 }, { "epoch": 4.719269191730327, "grad_norm": 0.008611885830760002, "learning_rate": 6.967645233478172e-07, "loss": 0.0106, "step": 441700 }, { "epoch": 4.719376035044607, "grad_norm": 0.4054725170135498, "learning_rate": 6.967490780022358e-07, "loss": 0.0508, "step": 441710 }, { "epoch": 4.719482878358887, "grad_norm": 0.6003152132034302, "learning_rate": 6.967336324345098e-07, "loss": 0.0126, "step": 441720 }, { "epoch": 4.719589721673167, "grad_norm": 12.201889991760254, "learning_rate": 6.967181866446568e-07, "loss": 0.0066, "step": 441730 }, { "epoch": 4.719696564987446, "grad_norm": 0.00795824360102415, "learning_rate": 6.967027406326939e-07, "loss": 0.0001, "step": 441740 }, { "epoch": 4.7198034083017255, "grad_norm": 4.365636348724365, "learning_rate": 6.966872943986389e-07, "loss": 0.0196, "step": 441750 }, { "epoch": 4.719910251616005, "grad_norm": 9.586832046508789, "learning_rate": 6.966718479425092e-07, "loss": 0.005, "step": 441760 }, { "epoch": 4.720017094930284, "grad_norm": 0.030195223167538643, "learning_rate": 6.966564012643219e-07, "loss": 0.0011, "step": 441770 }, { "epoch": 4.720123938244564, "grad_norm": 3.4647419452667236, "learning_rate": 6.966409543640948e-07, "loss": 0.0028, "step": 441780 }, { "epoch": 4.720230781558844, "grad_norm": 1.0126703977584839, "learning_rate": 6.966255072418454e-07, "loss": 0.0019, "step": 441790 }, { "epoch": 4.720337624873124, "grad_norm": 0.014026859775185585, "learning_rate": 6.966100598975907e-07, "loss": 0.0209, "step": 441800 }, { "epoch": 4.720444468187403, "grad_norm": 0.05394984781742096, "learning_rate": 6.965946123313486e-07, "loss": 0.0174, "step": 441810 }, { "epoch": 4.720551311501683, "grad_norm": 0.24640978872776031, "learning_rate": 6.965791645431362e-07, "loss": 0.025, "step": 441820 }, { "epoch": 4.720658154815962, "grad_norm": 1.7784450054168701, "learning_rate": 6.96563716532971e-07, "loss": 0.0074, "step": 441830 }, { "epoch": 4.720764998130242, "grad_norm": 0.0004082334053236991, "learning_rate": 6.965482683008708e-07, "loss": 0.0002, "step": 441840 }, { "epoch": 4.720871841444522, "grad_norm": 0.0011303747305646539, "learning_rate": 6.965328198468526e-07, "loss": 0.0016, "step": 441850 }, { "epoch": 4.720978684758801, "grad_norm": 0.0037938798777759075, "learning_rate": 6.96517371170934e-07, "loss": 0.0087, "step": 441860 }, { "epoch": 4.721085528073081, "grad_norm": 0.01176164485514164, "learning_rate": 6.965019222731326e-07, "loss": 0.0023, "step": 441870 }, { "epoch": 4.72119237138736, "grad_norm": 0.7887198328971863, "learning_rate": 6.964864731534656e-07, "loss": 0.0012, "step": 441880 }, { "epoch": 4.72129921470164, "grad_norm": 2.424687385559082, "learning_rate": 6.964710238119505e-07, "loss": 0.0016, "step": 441890 }, { "epoch": 4.721406058015919, "grad_norm": 1.3529866933822632, "learning_rate": 6.964555742486049e-07, "loss": 0.0046, "step": 441900 }, { "epoch": 4.7215129013301995, "grad_norm": 15.419102668762207, "learning_rate": 6.964401244634459e-07, "loss": 0.0177, "step": 441910 }, { "epoch": 4.721619744644479, "grad_norm": 0.009692784398794174, "learning_rate": 6.964246744564914e-07, "loss": 0.0031, "step": 441920 }, { "epoch": 4.721726587958758, "grad_norm": 10.814367294311523, "learning_rate": 6.964092242277586e-07, "loss": 0.0153, "step": 441930 }, { "epoch": 4.721833431273038, "grad_norm": 0.0018600185867398977, "learning_rate": 6.963937737772649e-07, "loss": 0.0379, "step": 441940 }, { "epoch": 4.721940274587317, "grad_norm": 0.0016276396345347166, "learning_rate": 6.963783231050278e-07, "loss": 0.0238, "step": 441950 }, { "epoch": 4.722047117901598, "grad_norm": 0.0015836076345294714, "learning_rate": 6.963628722110648e-07, "loss": 0.0012, "step": 441960 }, { "epoch": 4.722153961215877, "grad_norm": 0.31398138403892517, "learning_rate": 6.963474210953932e-07, "loss": 0.0205, "step": 441970 }, { "epoch": 4.722260804530157, "grad_norm": 0.003329760627821088, "learning_rate": 6.963319697580307e-07, "loss": 0.0025, "step": 441980 }, { "epoch": 4.722367647844436, "grad_norm": 0.46022331714630127, "learning_rate": 6.963165181989945e-07, "loss": 0.0003, "step": 441990 }, { "epoch": 4.7224744911587155, "grad_norm": 1.695839285850525, "learning_rate": 6.963010664183021e-07, "loss": 0.0013, "step": 442000 }, { "epoch": 4.722581334472995, "grad_norm": 0.23061348497867584, "learning_rate": 6.96285614415971e-07, "loss": 0.0308, "step": 442010 }, { "epoch": 4.722688177787275, "grad_norm": 0.08901244401931763, "learning_rate": 6.962701621920186e-07, "loss": 0.0159, "step": 442020 }, { "epoch": 4.722795021101555, "grad_norm": 0.517667829990387, "learning_rate": 6.962547097464625e-07, "loss": 0.0128, "step": 442030 }, { "epoch": 4.722901864415834, "grad_norm": 0.029257846996188164, "learning_rate": 6.962392570793199e-07, "loss": 0.0258, "step": 442040 }, { "epoch": 4.723008707730114, "grad_norm": 0.028760254383087158, "learning_rate": 6.962238041906083e-07, "loss": 0.004, "step": 442050 }, { "epoch": 4.723115551044393, "grad_norm": 0.2157711535692215, "learning_rate": 6.962083510803453e-07, "loss": 0.0017, "step": 442060 }, { "epoch": 4.723222394358673, "grad_norm": 1.7091271877288818, "learning_rate": 6.961928977485483e-07, "loss": 0.022, "step": 442070 }, { "epoch": 4.723329237672953, "grad_norm": 0.0022457202430814505, "learning_rate": 6.961774441952349e-07, "loss": 0.0039, "step": 442080 }, { "epoch": 4.723436080987232, "grad_norm": 0.013368538580834866, "learning_rate": 6.961619904204219e-07, "loss": 0.0013, "step": 442090 }, { "epoch": 4.723542924301512, "grad_norm": 0.9332815408706665, "learning_rate": 6.961465364241275e-07, "loss": 0.0098, "step": 442100 }, { "epoch": 4.723649767615791, "grad_norm": 1.1411807537078857, "learning_rate": 6.961310822063689e-07, "loss": 0.0013, "step": 442110 }, { "epoch": 4.723756610930071, "grad_norm": 0.01862868294119835, "learning_rate": 6.961156277671634e-07, "loss": 0.0071, "step": 442120 }, { "epoch": 4.72386345424435, "grad_norm": 0.04411545768380165, "learning_rate": 6.961001731065286e-07, "loss": 0.0098, "step": 442130 }, { "epoch": 4.723970297558631, "grad_norm": 3.2951016426086426, "learning_rate": 6.960847182244818e-07, "loss": 0.0101, "step": 442140 }, { "epoch": 4.72407714087291, "grad_norm": 0.1910342276096344, "learning_rate": 6.960692631210407e-07, "loss": 0.0004, "step": 442150 }, { "epoch": 4.7241839841871895, "grad_norm": 0.0026223985478281975, "learning_rate": 6.960538077962225e-07, "loss": 0.0031, "step": 442160 }, { "epoch": 4.724290827501469, "grad_norm": 4.199716567993164, "learning_rate": 6.960383522500448e-07, "loss": 0.001, "step": 442170 }, { "epoch": 4.7243976708157485, "grad_norm": 0.08829731494188309, "learning_rate": 6.96022896482525e-07, "loss": 0.0018, "step": 442180 }, { "epoch": 4.724504514130029, "grad_norm": 0.3743199408054352, "learning_rate": 6.960074404936806e-07, "loss": 0.0084, "step": 442190 }, { "epoch": 4.724611357444308, "grad_norm": 0.012188552878797054, "learning_rate": 6.95991984283529e-07, "loss": 0.003, "step": 442200 }, { "epoch": 4.724718200758588, "grad_norm": Infinity, "learning_rate": 6.959765278520875e-07, "loss": 0.0204, "step": 442210 }, { "epoch": 4.724825044072867, "grad_norm": 0.09030763804912567, "learning_rate": 6.959610711993739e-07, "loss": 0.0171, "step": 442220 }, { "epoch": 4.724931887387147, "grad_norm": 0.32180213928222656, "learning_rate": 6.959456143254053e-07, "loss": 0.0193, "step": 442230 }, { "epoch": 4.725038730701426, "grad_norm": 0.9740784764289856, "learning_rate": 6.959301572301994e-07, "loss": 0.0077, "step": 442240 }, { "epoch": 4.725145574015706, "grad_norm": 0.04809385910630226, "learning_rate": 6.959146999137737e-07, "loss": 0.0091, "step": 442250 }, { "epoch": 4.725252417329986, "grad_norm": 0.001201807288452983, "learning_rate": 6.958992423761453e-07, "loss": 0.0022, "step": 442260 }, { "epoch": 4.725359260644265, "grad_norm": 0.0019236734369769692, "learning_rate": 6.95883784617332e-07, "loss": 0.0145, "step": 442270 }, { "epoch": 4.725466103958545, "grad_norm": 0.019799012690782547, "learning_rate": 6.958683266373511e-07, "loss": 0.0154, "step": 442280 }, { "epoch": 4.725572947272824, "grad_norm": 0.004186415579169989, "learning_rate": 6.9585286843622e-07, "loss": 0.0012, "step": 442290 }, { "epoch": 4.725679790587104, "grad_norm": 0.010868429206311703, "learning_rate": 6.958374100139563e-07, "loss": 0.0002, "step": 442300 }, { "epoch": 4.725786633901384, "grad_norm": 4.042393207550049, "learning_rate": 6.958219513705773e-07, "loss": 0.009, "step": 442310 }, { "epoch": 4.7258934772156636, "grad_norm": 0.4839719235897064, "learning_rate": 6.958064925061007e-07, "loss": 0.0029, "step": 442320 }, { "epoch": 4.726000320529943, "grad_norm": 0.05817558988928795, "learning_rate": 6.957910334205435e-07, "loss": 0.0026, "step": 442330 }, { "epoch": 4.7261071638442225, "grad_norm": 9.206733703613281, "learning_rate": 6.957755741139237e-07, "loss": 0.0119, "step": 442340 }, { "epoch": 4.726214007158502, "grad_norm": 0.17636854946613312, "learning_rate": 6.957601145862584e-07, "loss": 0.0098, "step": 442350 }, { "epoch": 4.726320850472781, "grad_norm": 1.2492529153823853, "learning_rate": 6.957446548375652e-07, "loss": 0.0201, "step": 442360 }, { "epoch": 4.726427693787061, "grad_norm": 0.002218167996034026, "learning_rate": 6.957291948678614e-07, "loss": 0.0095, "step": 442370 }, { "epoch": 4.726534537101341, "grad_norm": 0.0014494776260107756, "learning_rate": 6.957137346771647e-07, "loss": 0.0002, "step": 442380 }, { "epoch": 4.726641380415621, "grad_norm": 0.02012425661087036, "learning_rate": 6.956982742654926e-07, "loss": 0.0032, "step": 442390 }, { "epoch": 4.7267482237299, "grad_norm": 0.008997258730232716, "learning_rate": 6.95682813632862e-07, "loss": 0.0108, "step": 442400 }, { "epoch": 4.72685506704418, "grad_norm": 0.004652743693441153, "learning_rate": 6.95667352779291e-07, "loss": 0.0016, "step": 442410 }, { "epoch": 4.726961910358459, "grad_norm": 0.00218317867256701, "learning_rate": 6.956518917047967e-07, "loss": 0.0208, "step": 442420 }, { "epoch": 4.727068753672739, "grad_norm": 0.007460638880729675, "learning_rate": 6.956364304093966e-07, "loss": 0.0095, "step": 442430 }, { "epoch": 4.727175596987019, "grad_norm": 0.3594057261943817, "learning_rate": 6.956209688931083e-07, "loss": 0.0043, "step": 442440 }, { "epoch": 4.727282440301298, "grad_norm": 0.0014078918611630797, "learning_rate": 6.956055071559492e-07, "loss": 0.0074, "step": 442450 }, { "epoch": 4.727389283615578, "grad_norm": 13.85755729675293, "learning_rate": 6.955900451979365e-07, "loss": 0.0312, "step": 442460 }, { "epoch": 4.727496126929857, "grad_norm": 0.030843913555145264, "learning_rate": 6.95574583019088e-07, "loss": 0.0818, "step": 442470 }, { "epoch": 4.727602970244137, "grad_norm": 0.03459816426038742, "learning_rate": 6.95559120619421e-07, "loss": 0.0002, "step": 442480 }, { "epoch": 4.727709813558416, "grad_norm": 0.015160956420004368, "learning_rate": 6.955436579989531e-07, "loss": 0.0033, "step": 442490 }, { "epoch": 4.7278166568726965, "grad_norm": 0.0007558198994956911, "learning_rate": 6.955281951577017e-07, "loss": 0.0009, "step": 442500 }, { "epoch": 4.727923500186976, "grad_norm": 0.07462479919195175, "learning_rate": 6.95512732095684e-07, "loss": 0.047, "step": 442510 }, { "epoch": 4.728030343501255, "grad_norm": 0.38575369119644165, "learning_rate": 6.954972688129178e-07, "loss": 0.0171, "step": 442520 }, { "epoch": 4.728137186815535, "grad_norm": 2.039276123046875, "learning_rate": 6.954818053094205e-07, "loss": 0.0135, "step": 442530 }, { "epoch": 4.728244030129814, "grad_norm": 0.0020653256215155125, "learning_rate": 6.954663415852093e-07, "loss": 0.0123, "step": 442540 }, { "epoch": 4.728350873444095, "grad_norm": 0.0138335470110178, "learning_rate": 6.95450877640302e-07, "loss": 0.0068, "step": 442550 }, { "epoch": 4.728457716758374, "grad_norm": 0.007378644309937954, "learning_rate": 6.954354134747158e-07, "loss": 0.0008, "step": 442560 }, { "epoch": 4.728564560072654, "grad_norm": 13.04069995880127, "learning_rate": 6.954199490884682e-07, "loss": 0.0468, "step": 442570 }, { "epoch": 4.728671403386933, "grad_norm": 2.1857786178588867, "learning_rate": 6.95404484481577e-07, "loss": 0.0266, "step": 442580 }, { "epoch": 4.7287782467012125, "grad_norm": 0.012418915517628193, "learning_rate": 6.953890196540592e-07, "loss": 0.0007, "step": 442590 }, { "epoch": 4.728885090015492, "grad_norm": 0.008216528221964836, "learning_rate": 6.953735546059324e-07, "loss": 0.0074, "step": 442600 }, { "epoch": 4.728991933329771, "grad_norm": 0.10594894737005234, "learning_rate": 6.953580893372141e-07, "loss": 0.0018, "step": 442610 }, { "epoch": 4.729098776644052, "grad_norm": 0.0025345708709210157, "learning_rate": 6.953426238479221e-07, "loss": 0.0082, "step": 442620 }, { "epoch": 4.729205619958331, "grad_norm": 0.13748924434185028, "learning_rate": 6.953271581380732e-07, "loss": 0.002, "step": 442630 }, { "epoch": 4.729312463272611, "grad_norm": 2.0166733264923096, "learning_rate": 6.953116922076854e-07, "loss": 0.0054, "step": 442640 }, { "epoch": 4.72941930658689, "grad_norm": 0.04793696850538254, "learning_rate": 6.952962260567758e-07, "loss": 0.0078, "step": 442650 }, { "epoch": 4.72952614990117, "grad_norm": 0.02349065989255905, "learning_rate": 6.95280759685362e-07, "loss": 0.005, "step": 442660 }, { "epoch": 4.72963299321545, "grad_norm": 0.006644869223237038, "learning_rate": 6.952652930934615e-07, "loss": 0.0091, "step": 442670 }, { "epoch": 4.729739836529729, "grad_norm": 0.06806810200214386, "learning_rate": 6.952498262810918e-07, "loss": 0.0011, "step": 442680 }, { "epoch": 4.729846679844009, "grad_norm": 0.0067254360765218735, "learning_rate": 6.952343592482704e-07, "loss": 0.0172, "step": 442690 }, { "epoch": 4.729953523158288, "grad_norm": 0.44159165024757385, "learning_rate": 6.952188919950145e-07, "loss": 0.0208, "step": 442700 }, { "epoch": 4.730060366472568, "grad_norm": 2.251556158065796, "learning_rate": 6.952034245213419e-07, "loss": 0.01, "step": 442710 }, { "epoch": 4.730167209786847, "grad_norm": 0.4511910080909729, "learning_rate": 6.951879568272697e-07, "loss": 0.0248, "step": 442720 }, { "epoch": 4.730274053101128, "grad_norm": 12.761524200439453, "learning_rate": 6.951724889128157e-07, "loss": 0.0198, "step": 442730 }, { "epoch": 4.730380896415407, "grad_norm": 0.0025188736617565155, "learning_rate": 6.951570207779972e-07, "loss": 0.0089, "step": 442740 }, { "epoch": 4.7304877397296865, "grad_norm": 0.33401092886924744, "learning_rate": 6.951415524228318e-07, "loss": 0.0097, "step": 442750 }, { "epoch": 4.730594583043966, "grad_norm": 0.4836393892765045, "learning_rate": 6.951260838473368e-07, "loss": 0.0032, "step": 442760 }, { "epoch": 4.730701426358245, "grad_norm": 5.654746055603027, "learning_rate": 6.951106150515297e-07, "loss": 0.0156, "step": 442770 }, { "epoch": 4.730808269672525, "grad_norm": 2.1554148197174072, "learning_rate": 6.95095146035428e-07, "loss": 0.0137, "step": 442780 }, { "epoch": 4.730915112986805, "grad_norm": 0.019365593791007996, "learning_rate": 6.950796767990492e-07, "loss": 0.0026, "step": 442790 }, { "epoch": 4.731021956301085, "grad_norm": 0.005013884510844946, "learning_rate": 6.950642073424105e-07, "loss": 0.0047, "step": 442800 }, { "epoch": 4.731128799615364, "grad_norm": 0.01073346845805645, "learning_rate": 6.950487376655299e-07, "loss": 0.0024, "step": 442810 }, { "epoch": 4.731235642929644, "grad_norm": 0.008724024519324303, "learning_rate": 6.950332677684244e-07, "loss": 0.0042, "step": 442820 }, { "epoch": 4.731342486243923, "grad_norm": 0.28439709544181824, "learning_rate": 6.950177976511116e-07, "loss": 0.0018, "step": 442830 }, { "epoch": 4.7314493295582025, "grad_norm": 0.06458577513694763, "learning_rate": 6.95002327313609e-07, "loss": 0.0132, "step": 442840 }, { "epoch": 4.731556172872483, "grad_norm": 3.1835362911224365, "learning_rate": 6.949868567559341e-07, "loss": 0.0044, "step": 442850 }, { "epoch": 4.731663016186762, "grad_norm": 0.003922461532056332, "learning_rate": 6.949713859781042e-07, "loss": 0.017, "step": 442860 }, { "epoch": 4.731769859501042, "grad_norm": 2.379502773284912, "learning_rate": 6.949559149801371e-07, "loss": 0.0061, "step": 442870 }, { "epoch": 4.731876702815321, "grad_norm": 8.397871017456055, "learning_rate": 6.949404437620499e-07, "loss": 0.0084, "step": 442880 }, { "epoch": 4.731983546129601, "grad_norm": 1.3780555725097656, "learning_rate": 6.949249723238603e-07, "loss": 0.0031, "step": 442890 }, { "epoch": 4.73209038944388, "grad_norm": 0.010599416680634022, "learning_rate": 6.949095006655856e-07, "loss": 0.0015, "step": 442900 }, { "epoch": 4.7321972327581605, "grad_norm": 0.003995722625404596, "learning_rate": 6.948940287872435e-07, "loss": 0.0013, "step": 442910 }, { "epoch": 4.73230407607244, "grad_norm": 0.8662506341934204, "learning_rate": 6.948785566888512e-07, "loss": 0.007, "step": 442920 }, { "epoch": 4.732410919386719, "grad_norm": 3.4240758419036865, "learning_rate": 6.948630843704264e-07, "loss": 0.0022, "step": 442930 }, { "epoch": 4.732517762700999, "grad_norm": 0.0011588948545977473, "learning_rate": 6.948476118319864e-07, "loss": 0.0058, "step": 442940 }, { "epoch": 4.732624606015278, "grad_norm": 0.6359741687774658, "learning_rate": 6.948321390735487e-07, "loss": 0.0113, "step": 442950 }, { "epoch": 4.732731449329558, "grad_norm": 0.013942616991698742, "learning_rate": 6.948166660951309e-07, "loss": 0.0201, "step": 442960 }, { "epoch": 4.732838292643838, "grad_norm": 0.01911785826086998, "learning_rate": 6.948011928967505e-07, "loss": 0.015, "step": 442970 }, { "epoch": 4.732945135958118, "grad_norm": 0.2951197326183319, "learning_rate": 6.947857194784246e-07, "loss": 0.0042, "step": 442980 }, { "epoch": 4.733051979272397, "grad_norm": 0.0012167772511020303, "learning_rate": 6.94770245840171e-07, "loss": 0.0142, "step": 442990 }, { "epoch": 4.7331588225866765, "grad_norm": 0.019644977524876595, "learning_rate": 6.947547719820071e-07, "loss": 0.0016, "step": 443000 }, { "epoch": 4.733265665900956, "grad_norm": 0.00953306071460247, "learning_rate": 6.947392979039503e-07, "loss": 0.0022, "step": 443010 }, { "epoch": 4.733372509215236, "grad_norm": 0.376828134059906, "learning_rate": 6.947238236060184e-07, "loss": 0.0144, "step": 443020 }, { "epoch": 4.733479352529516, "grad_norm": 0.0013737133704125881, "learning_rate": 6.947083490882284e-07, "loss": 0.0032, "step": 443030 }, { "epoch": 4.733586195843795, "grad_norm": 0.36042600870132446, "learning_rate": 6.946928743505979e-07, "loss": 0.0232, "step": 443040 }, { "epoch": 4.733693039158075, "grad_norm": 1.759172797203064, "learning_rate": 6.946773993931447e-07, "loss": 0.0119, "step": 443050 }, { "epoch": 4.733799882472354, "grad_norm": 0.0034736436791718006, "learning_rate": 6.946619242158858e-07, "loss": 0.0118, "step": 443060 }, { "epoch": 4.733906725786634, "grad_norm": 7.198626518249512, "learning_rate": 6.946464488188391e-07, "loss": 0.003, "step": 443070 }, { "epoch": 4.734013569100913, "grad_norm": 0.0018178768223151565, "learning_rate": 6.946309732020217e-07, "loss": 0.0322, "step": 443080 }, { "epoch": 4.734120412415193, "grad_norm": 0.0013988107675686479, "learning_rate": 6.946154973654512e-07, "loss": 0.0155, "step": 443090 }, { "epoch": 4.734227255729473, "grad_norm": 0.898608922958374, "learning_rate": 6.946000213091453e-07, "loss": 0.0054, "step": 443100 }, { "epoch": 4.734334099043752, "grad_norm": 0.014398320578038692, "learning_rate": 6.945845450331211e-07, "loss": 0.0106, "step": 443110 }, { "epoch": 4.734440942358032, "grad_norm": 1.0215469598770142, "learning_rate": 6.945690685373964e-07, "loss": 0.0026, "step": 443120 }, { "epoch": 4.734547785672311, "grad_norm": 0.04875639081001282, "learning_rate": 6.945535918219886e-07, "loss": 0.0036, "step": 443130 }, { "epoch": 4.734654628986592, "grad_norm": 0.014742741361260414, "learning_rate": 6.945381148869149e-07, "loss": 0.0223, "step": 443140 }, { "epoch": 4.734761472300871, "grad_norm": 0.25211530923843384, "learning_rate": 6.945226377321931e-07, "loss": 0.0125, "step": 443150 }, { "epoch": 4.7348683156151505, "grad_norm": 0.47076350450515747, "learning_rate": 6.945071603578406e-07, "loss": 0.0043, "step": 443160 }, { "epoch": 4.73497515892943, "grad_norm": 0.4736602008342743, "learning_rate": 6.944916827638747e-07, "loss": 0.0041, "step": 443170 }, { "epoch": 4.7350820022437095, "grad_norm": 4.615755081176758, "learning_rate": 6.944762049503132e-07, "loss": 0.0138, "step": 443180 }, { "epoch": 4.735188845557989, "grad_norm": 0.005653568543493748, "learning_rate": 6.944607269171732e-07, "loss": 0.0006, "step": 443190 }, { "epoch": 4.735295688872268, "grad_norm": 4.650283336639404, "learning_rate": 6.944452486644726e-07, "loss": 0.0113, "step": 443200 }, { "epoch": 4.735402532186549, "grad_norm": 0.0020461049862205982, "learning_rate": 6.944297701922284e-07, "loss": 0.0055, "step": 443210 }, { "epoch": 4.735509375500828, "grad_norm": 0.042037855833768845, "learning_rate": 6.944142915004584e-07, "loss": 0.0119, "step": 443220 }, { "epoch": 4.735616218815108, "grad_norm": 0.003921227995306253, "learning_rate": 6.9439881258918e-07, "loss": 0.0009, "step": 443230 }, { "epoch": 4.735723062129387, "grad_norm": 0.1715737134218216, "learning_rate": 6.943833334584107e-07, "loss": 0.0088, "step": 443240 }, { "epoch": 4.7358299054436666, "grad_norm": 0.24989546835422516, "learning_rate": 6.943678541081678e-07, "loss": 0.0163, "step": 443250 }, { "epoch": 4.735936748757947, "grad_norm": 0.2778817117214203, "learning_rate": 6.943523745384691e-07, "loss": 0.0008, "step": 443260 }, { "epoch": 4.736043592072226, "grad_norm": 0.008220258168876171, "learning_rate": 6.943368947493319e-07, "loss": 0.0004, "step": 443270 }, { "epoch": 4.736150435386506, "grad_norm": 0.0024621470365673304, "learning_rate": 6.943214147407735e-07, "loss": 0.0101, "step": 443280 }, { "epoch": 4.736257278700785, "grad_norm": 0.05689629167318344, "learning_rate": 6.943059345128117e-07, "loss": 0.0191, "step": 443290 }, { "epoch": 4.736364122015065, "grad_norm": 1.957539439201355, "learning_rate": 6.942904540654637e-07, "loss": 0.0051, "step": 443300 }, { "epoch": 4.736470965329344, "grad_norm": 3.871967315673828, "learning_rate": 6.942749733987473e-07, "loss": 0.0109, "step": 443310 }, { "epoch": 4.736577808643624, "grad_norm": 4.169219017028809, "learning_rate": 6.942594925126797e-07, "loss": 0.003, "step": 443320 }, { "epoch": 4.736684651957904, "grad_norm": 0.03585781157016754, "learning_rate": 6.942440114072785e-07, "loss": 0.009, "step": 443330 }, { "epoch": 4.7367914952721835, "grad_norm": 0.008557068184018135, "learning_rate": 6.942285300825609e-07, "loss": 0.0077, "step": 443340 }, { "epoch": 4.736898338586463, "grad_norm": 0.047533854842185974, "learning_rate": 6.94213048538545e-07, "loss": 0.0055, "step": 443350 }, { "epoch": 4.737005181900742, "grad_norm": 0.06065228208899498, "learning_rate": 6.941975667752479e-07, "loss": 0.0243, "step": 443360 }, { "epoch": 4.737112025215022, "grad_norm": 3.3989064693450928, "learning_rate": 6.941820847926869e-07, "loss": 0.0057, "step": 443370 }, { "epoch": 4.737218868529302, "grad_norm": 0.00036619603633880615, "learning_rate": 6.941666025908796e-07, "loss": 0.002, "step": 443380 }, { "epoch": 4.737325711843582, "grad_norm": 0.48771294951438904, "learning_rate": 6.941511201698438e-07, "loss": 0.0011, "step": 443390 }, { "epoch": 4.737432555157861, "grad_norm": 0.031716227531433105, "learning_rate": 6.941356375295966e-07, "loss": 0.0053, "step": 443400 }, { "epoch": 4.737539398472141, "grad_norm": 0.0008377722115255892, "learning_rate": 6.941201546701556e-07, "loss": 0.0146, "step": 443410 }, { "epoch": 4.73764624178642, "grad_norm": 0.007950268685817719, "learning_rate": 6.941046715915384e-07, "loss": 0.0006, "step": 443420 }, { "epoch": 4.7377530851006995, "grad_norm": 0.00390305882319808, "learning_rate": 6.940891882937623e-07, "loss": 0.0003, "step": 443430 }, { "epoch": 4.737859928414979, "grad_norm": 1.4687687158584595, "learning_rate": 6.940737047768448e-07, "loss": 0.0076, "step": 443440 }, { "epoch": 4.737966771729259, "grad_norm": 0.0029070840682834387, "learning_rate": 6.940582210408036e-07, "loss": 0.0136, "step": 443450 }, { "epoch": 4.738073615043539, "grad_norm": 7.327996730804443, "learning_rate": 6.940427370856559e-07, "loss": 0.0099, "step": 443460 }, { "epoch": 4.738180458357818, "grad_norm": 0.23219448328018188, "learning_rate": 6.940272529114193e-07, "loss": 0.0065, "step": 443470 }, { "epoch": 4.738287301672098, "grad_norm": 0.44216805696487427, "learning_rate": 6.940117685181113e-07, "loss": 0.0032, "step": 443480 }, { "epoch": 4.738394144986377, "grad_norm": 0.5255971550941467, "learning_rate": 6.939962839057494e-07, "loss": 0.0071, "step": 443490 }, { "epoch": 4.7385009883006575, "grad_norm": 0.049849435687065125, "learning_rate": 6.93980799074351e-07, "loss": 0.0244, "step": 443500 }, { "epoch": 4.738607831614937, "grad_norm": 0.002053525298833847, "learning_rate": 6.939653140239337e-07, "loss": 0.0053, "step": 443510 }, { "epoch": 4.738714674929216, "grad_norm": 0.041805196553468704, "learning_rate": 6.939498287545149e-07, "loss": 0.0027, "step": 443520 }, { "epoch": 4.738821518243496, "grad_norm": 0.12270308285951614, "learning_rate": 6.939343432661121e-07, "loss": 0.0017, "step": 443530 }, { "epoch": 4.738928361557775, "grad_norm": 0.38545677065849304, "learning_rate": 6.939188575587428e-07, "loss": 0.002, "step": 443540 }, { "epoch": 4.739035204872055, "grad_norm": 0.2694912254810333, "learning_rate": 6.939033716324245e-07, "loss": 0.0022, "step": 443550 }, { "epoch": 4.739142048186335, "grad_norm": 0.001956803957000375, "learning_rate": 6.938878854871746e-07, "loss": 0.0031, "step": 443560 }, { "epoch": 4.739248891500615, "grad_norm": 0.020757511258125305, "learning_rate": 6.938723991230107e-07, "loss": 0.0117, "step": 443570 }, { "epoch": 4.739355734814894, "grad_norm": 0.005050518549978733, "learning_rate": 6.938569125399502e-07, "loss": 0.011, "step": 443580 }, { "epoch": 4.7394625781291735, "grad_norm": 0.0022721318528056145, "learning_rate": 6.938414257380105e-07, "loss": 0.0279, "step": 443590 }, { "epoch": 4.739569421443453, "grad_norm": 1.1426446437835693, "learning_rate": 6.938259387172094e-07, "loss": 0.011, "step": 443600 }, { "epoch": 4.739676264757732, "grad_norm": 0.0006311801262199879, "learning_rate": 6.93810451477564e-07, "loss": 0.0011, "step": 443610 }, { "epoch": 4.739783108072013, "grad_norm": 0.0011368630221113563, "learning_rate": 6.937949640190921e-07, "loss": 0.0173, "step": 443620 }, { "epoch": 4.739889951386292, "grad_norm": 0.0015313621843233705, "learning_rate": 6.93779476341811e-07, "loss": 0.0006, "step": 443630 }, { "epoch": 4.739996794700572, "grad_norm": 0.0016796429408714175, "learning_rate": 6.937639884457382e-07, "loss": 0.0121, "step": 443640 }, { "epoch": 4.740103638014851, "grad_norm": 0.3075184226036072, "learning_rate": 6.937485003308914e-07, "loss": 0.0042, "step": 443650 }, { "epoch": 4.740210481329131, "grad_norm": 0.7701431512832642, "learning_rate": 6.937330119972878e-07, "loss": 0.0061, "step": 443660 }, { "epoch": 4.74031732464341, "grad_norm": 5.774364948272705, "learning_rate": 6.937175234449451e-07, "loss": 0.0119, "step": 443670 }, { "epoch": 4.74042416795769, "grad_norm": 0.01878901943564415, "learning_rate": 6.937020346738806e-07, "loss": 0.0025, "step": 443680 }, { "epoch": 4.74053101127197, "grad_norm": 7.051002025604248, "learning_rate": 6.936865456841119e-07, "loss": 0.0183, "step": 443690 }, { "epoch": 4.740637854586249, "grad_norm": 0.001944524934515357, "learning_rate": 6.936710564756565e-07, "loss": 0.0212, "step": 443700 }, { "epoch": 4.740744697900529, "grad_norm": 0.16837003827095032, "learning_rate": 6.936555670485318e-07, "loss": 0.0018, "step": 443710 }, { "epoch": 4.740851541214808, "grad_norm": 0.44242343306541443, "learning_rate": 6.936400774027554e-07, "loss": 0.0022, "step": 443720 }, { "epoch": 4.740958384529089, "grad_norm": 0.0338418148458004, "learning_rate": 6.936245875383449e-07, "loss": 0.0222, "step": 443730 }, { "epoch": 4.741065227843368, "grad_norm": 1.1637554168701172, "learning_rate": 6.936090974553172e-07, "loss": 0.0097, "step": 443740 }, { "epoch": 4.7411720711576475, "grad_norm": 0.011544656939804554, "learning_rate": 6.935936071536906e-07, "loss": 0.0198, "step": 443750 }, { "epoch": 4.741278914471927, "grad_norm": 0.022614140063524246, "learning_rate": 6.935781166334822e-07, "loss": 0.0077, "step": 443760 }, { "epoch": 4.741385757786206, "grad_norm": 5.374297142028809, "learning_rate": 6.935626258947092e-07, "loss": 0.0233, "step": 443770 }, { "epoch": 4.741492601100486, "grad_norm": 0.003033007960766554, "learning_rate": 6.935471349373897e-07, "loss": 0.0094, "step": 443780 }, { "epoch": 4.741599444414765, "grad_norm": 1.4738425016403198, "learning_rate": 6.935316437615409e-07, "loss": 0.0028, "step": 443790 }, { "epoch": 4.741706287729046, "grad_norm": 0.007761382032185793, "learning_rate": 6.935161523671799e-07, "loss": 0.0117, "step": 443800 }, { "epoch": 4.741813131043325, "grad_norm": 0.008127243258059025, "learning_rate": 6.935006607543247e-07, "loss": 0.0038, "step": 443810 }, { "epoch": 4.741919974357605, "grad_norm": 0.3514210283756256, "learning_rate": 6.93485168922993e-07, "loss": 0.0042, "step": 443820 }, { "epoch": 4.742026817671884, "grad_norm": 0.04065782576799393, "learning_rate": 6.934696768732014e-07, "loss": 0.0135, "step": 443830 }, { "epoch": 4.7421336609861635, "grad_norm": 0.001628683996386826, "learning_rate": 6.934541846049683e-07, "loss": 0.0041, "step": 443840 }, { "epoch": 4.742240504300444, "grad_norm": 3.461895704269409, "learning_rate": 6.934386921183107e-07, "loss": 0.009, "step": 443850 }, { "epoch": 4.742347347614723, "grad_norm": 3.459998369216919, "learning_rate": 6.934231994132462e-07, "loss": 0.0033, "step": 443860 }, { "epoch": 4.742454190929003, "grad_norm": 0.1985398530960083, "learning_rate": 6.934077064897924e-07, "loss": 0.0068, "step": 443870 }, { "epoch": 4.742561034243282, "grad_norm": 0.00923237856477499, "learning_rate": 6.933922133479666e-07, "loss": 0.0152, "step": 443880 }, { "epoch": 4.742667877557562, "grad_norm": 0.006998993922024965, "learning_rate": 6.933767199877865e-07, "loss": 0.0025, "step": 443890 }, { "epoch": 4.742774720871841, "grad_norm": 0.02142333984375, "learning_rate": 6.933612264092694e-07, "loss": 0.0041, "step": 443900 }, { "epoch": 4.742881564186121, "grad_norm": 0.04268850013613701, "learning_rate": 6.933457326124329e-07, "loss": 0.0014, "step": 443910 }, { "epoch": 4.742988407500401, "grad_norm": 0.020326822996139526, "learning_rate": 6.933302385972945e-07, "loss": 0.0018, "step": 443920 }, { "epoch": 4.74309525081468, "grad_norm": 5.897371768951416, "learning_rate": 6.933147443638716e-07, "loss": 0.0058, "step": 443930 }, { "epoch": 4.74320209412896, "grad_norm": 0.015323527157306671, "learning_rate": 6.932992499121818e-07, "loss": 0.0265, "step": 443940 }, { "epoch": 4.743308937443239, "grad_norm": 5.225327968597412, "learning_rate": 6.932837552422426e-07, "loss": 0.0068, "step": 443950 }, { "epoch": 4.743415780757519, "grad_norm": 0.5881728529930115, "learning_rate": 6.932682603540715e-07, "loss": 0.0113, "step": 443960 }, { "epoch": 4.743522624071799, "grad_norm": 2.9922544956207275, "learning_rate": 6.932527652476858e-07, "loss": 0.0101, "step": 443970 }, { "epoch": 4.743629467386079, "grad_norm": 0.3126859962940216, "learning_rate": 6.932372699231033e-07, "loss": 0.0379, "step": 443980 }, { "epoch": 4.743736310700358, "grad_norm": 0.08229244500398636, "learning_rate": 6.932217743803413e-07, "loss": 0.0024, "step": 443990 }, { "epoch": 4.7438431540146375, "grad_norm": 4.180503845214844, "learning_rate": 6.932062786194172e-07, "loss": 0.0202, "step": 444000 }, { "epoch": 4.743949997328917, "grad_norm": 5.4148664474487305, "learning_rate": 6.931907826403487e-07, "loss": 0.0054, "step": 444010 }, { "epoch": 4.744056840643196, "grad_norm": 0.0019554367754608393, "learning_rate": 6.931752864431534e-07, "loss": 0.0055, "step": 444020 }, { "epoch": 4.744163683957476, "grad_norm": 0.13260075449943542, "learning_rate": 6.931597900278484e-07, "loss": 0.002, "step": 444030 }, { "epoch": 4.744270527271756, "grad_norm": 0.001175165525637567, "learning_rate": 6.931442933944514e-07, "loss": 0.0061, "step": 444040 }, { "epoch": 4.744377370586036, "grad_norm": 0.33834710717201233, "learning_rate": 6.9312879654298e-07, "loss": 0.0012, "step": 444050 }, { "epoch": 4.744484213900315, "grad_norm": 0.053853023797273636, "learning_rate": 6.931132994734517e-07, "loss": 0.0075, "step": 444060 }, { "epoch": 4.744591057214595, "grad_norm": 0.018297962844371796, "learning_rate": 6.930978021858839e-07, "loss": 0.0015, "step": 444070 }, { "epoch": 4.744697900528874, "grad_norm": 0.002251557307317853, "learning_rate": 6.930823046802939e-07, "loss": 0.0003, "step": 444080 }, { "epoch": 4.744804743843154, "grad_norm": 0.18634510040283203, "learning_rate": 6.930668069566996e-07, "loss": 0.0087, "step": 444090 }, { "epoch": 4.744911587157434, "grad_norm": 0.013306205160915852, "learning_rate": 6.930513090151183e-07, "loss": 0.0014, "step": 444100 }, { "epoch": 4.745018430471713, "grad_norm": 0.0666114091873169, "learning_rate": 6.930358108555674e-07, "loss": 0.0048, "step": 444110 }, { "epoch": 4.745125273785993, "grad_norm": 0.2876872718334198, "learning_rate": 6.930203124780646e-07, "loss": 0.0057, "step": 444120 }, { "epoch": 4.745232117100272, "grad_norm": 0.02006286010146141, "learning_rate": 6.930048138826272e-07, "loss": 0.0002, "step": 444130 }, { "epoch": 4.745338960414552, "grad_norm": 7.151463985443115, "learning_rate": 6.929893150692729e-07, "loss": 0.0085, "step": 444140 }, { "epoch": 4.745445803728831, "grad_norm": 1.6825883388519287, "learning_rate": 6.929738160380189e-07, "loss": 0.0013, "step": 444150 }, { "epoch": 4.7455526470431115, "grad_norm": 1.9902411699295044, "learning_rate": 6.929583167888831e-07, "loss": 0.0097, "step": 444160 }, { "epoch": 4.745659490357391, "grad_norm": 0.14638884365558624, "learning_rate": 6.929428173218827e-07, "loss": 0.0004, "step": 444170 }, { "epoch": 4.7457663336716704, "grad_norm": 0.5970036387443542, "learning_rate": 6.929273176370353e-07, "loss": 0.0038, "step": 444180 }, { "epoch": 4.74587317698595, "grad_norm": 0.03623032569885254, "learning_rate": 6.929118177343586e-07, "loss": 0.0007, "step": 444190 }, { "epoch": 4.745980020300229, "grad_norm": 0.0013881654012948275, "learning_rate": 6.928963176138697e-07, "loss": 0.0042, "step": 444200 }, { "epoch": 4.74608686361451, "grad_norm": 0.023818599060177803, "learning_rate": 6.928808172755863e-07, "loss": 0.0161, "step": 444210 }, { "epoch": 4.746193706928789, "grad_norm": 0.5654292106628418, "learning_rate": 6.928653167195258e-07, "loss": 0.0118, "step": 444220 }, { "epoch": 4.746300550243069, "grad_norm": 0.018958469852805138, "learning_rate": 6.928498159457061e-07, "loss": 0.0273, "step": 444230 }, { "epoch": 4.746407393557348, "grad_norm": 0.009340140968561172, "learning_rate": 6.928343149541441e-07, "loss": 0.0114, "step": 444240 }, { "epoch": 4.7465142368716275, "grad_norm": 0.002542960923165083, "learning_rate": 6.928188137448578e-07, "loss": 0.0042, "step": 444250 }, { "epoch": 4.746621080185907, "grad_norm": 0.17625932395458221, "learning_rate": 6.928033123178644e-07, "loss": 0.0033, "step": 444260 }, { "epoch": 4.746727923500187, "grad_norm": 6.546645641326904, "learning_rate": 6.927878106731815e-07, "loss": 0.0091, "step": 444270 }, { "epoch": 4.746834766814467, "grad_norm": 0.5496516227722168, "learning_rate": 6.927723088108267e-07, "loss": 0.0074, "step": 444280 }, { "epoch": 4.746941610128746, "grad_norm": 0.27720075845718384, "learning_rate": 6.927568067308174e-07, "loss": 0.0027, "step": 444290 }, { "epoch": 4.747048453443026, "grad_norm": 0.10111143440008163, "learning_rate": 6.927413044331711e-07, "loss": 0.0037, "step": 444300 }, { "epoch": 4.747155296757305, "grad_norm": 0.012305114418268204, "learning_rate": 6.927258019179053e-07, "loss": 0.0119, "step": 444310 }, { "epoch": 4.747262140071585, "grad_norm": 0.005918205715715885, "learning_rate": 6.927102991850376e-07, "loss": 0.0196, "step": 444320 }, { "epoch": 4.747368983385865, "grad_norm": 0.004363528918474913, "learning_rate": 6.926947962345854e-07, "loss": 0.0089, "step": 444330 }, { "epoch": 4.7474758267001445, "grad_norm": 0.0036802624817937613, "learning_rate": 6.926792930665661e-07, "loss": 0.0107, "step": 444340 }, { "epoch": 4.747582670014424, "grad_norm": 0.1256428360939026, "learning_rate": 6.926637896809974e-07, "loss": 0.0222, "step": 444350 }, { "epoch": 4.747689513328703, "grad_norm": 0.006684815511107445, "learning_rate": 6.926482860778968e-07, "loss": 0.0001, "step": 444360 }, { "epoch": 4.747796356642983, "grad_norm": 0.011449497193098068, "learning_rate": 6.926327822572818e-07, "loss": 0.0092, "step": 444370 }, { "epoch": 4.747903199957262, "grad_norm": 0.23598413169384003, "learning_rate": 6.926172782191697e-07, "loss": 0.0083, "step": 444380 }, { "epoch": 4.748010043271543, "grad_norm": 4.590575695037842, "learning_rate": 6.926017739635783e-07, "loss": 0.0271, "step": 444390 }, { "epoch": 4.748116886585822, "grad_norm": 0.25526630878448486, "learning_rate": 6.925862694905249e-07, "loss": 0.0064, "step": 444400 }, { "epoch": 4.7482237299001016, "grad_norm": 0.0038148858584463596, "learning_rate": 6.925707648000269e-07, "loss": 0.0127, "step": 444410 }, { "epoch": 4.748330573214381, "grad_norm": 0.0010648670140653849, "learning_rate": 6.925552598921023e-07, "loss": 0.0036, "step": 444420 }, { "epoch": 4.7484374165286605, "grad_norm": 9.987079620361328, "learning_rate": 6.92539754766768e-07, "loss": 0.0054, "step": 444430 }, { "epoch": 4.748544259842941, "grad_norm": 10.978904724121094, "learning_rate": 6.92524249424042e-07, "loss": 0.0048, "step": 444440 }, { "epoch": 4.74865110315722, "grad_norm": 6.015536308288574, "learning_rate": 6.925087438639414e-07, "loss": 0.0115, "step": 444450 }, { "epoch": 4.7487579464715, "grad_norm": 0.04087749868631363, "learning_rate": 6.924932380864839e-07, "loss": 0.0011, "step": 444460 }, { "epoch": 4.748864789785779, "grad_norm": 8.783045768737793, "learning_rate": 6.924777320916872e-07, "loss": 0.0203, "step": 444470 }, { "epoch": 4.748971633100059, "grad_norm": 0.05709542706608772, "learning_rate": 6.924622258795685e-07, "loss": 0.0109, "step": 444480 }, { "epoch": 4.749078476414338, "grad_norm": 0.007835662923753262, "learning_rate": 6.924467194501454e-07, "loss": 0.0105, "step": 444490 }, { "epoch": 4.749185319728618, "grad_norm": 0.002725421916693449, "learning_rate": 6.924312128034355e-07, "loss": 0.0004, "step": 444500 }, { "epoch": 4.749292163042898, "grad_norm": 0.11374454945325851, "learning_rate": 6.924157059394562e-07, "loss": 0.0178, "step": 444510 }, { "epoch": 4.749399006357177, "grad_norm": 0.0020284270867705345, "learning_rate": 6.92400198858225e-07, "loss": 0.0101, "step": 444520 }, { "epoch": 4.749505849671457, "grad_norm": 0.07812388241291046, "learning_rate": 6.923846915597594e-07, "loss": 0.0139, "step": 444530 }, { "epoch": 4.749612692985736, "grad_norm": 2.0348806381225586, "learning_rate": 6.923691840440771e-07, "loss": 0.0075, "step": 444540 }, { "epoch": 4.749719536300016, "grad_norm": 1.5508800745010376, "learning_rate": 6.923536763111955e-07, "loss": 0.019, "step": 444550 }, { "epoch": 4.749826379614296, "grad_norm": 0.01653341017663479, "learning_rate": 6.923381683611319e-07, "loss": 0.0028, "step": 444560 }, { "epoch": 4.749933222928576, "grad_norm": 0.06212535500526428, "learning_rate": 6.923226601939041e-07, "loss": 0.006, "step": 444570 }, { "epoch": 4.750040066242855, "grad_norm": 0.0027521627489477396, "learning_rate": 6.923071518095295e-07, "loss": 0.0037, "step": 444580 }, { "epoch": 4.7501469095571345, "grad_norm": 0.0014659534208476543, "learning_rate": 6.922916432080257e-07, "loss": 0.0001, "step": 444590 }, { "epoch": 4.750253752871414, "grad_norm": 0.0007005635416135192, "learning_rate": 6.922761343894099e-07, "loss": 0.0107, "step": 444600 }, { "epoch": 4.750360596185693, "grad_norm": 12.418011665344238, "learning_rate": 6.922606253537e-07, "loss": 0.0305, "step": 444610 }, { "epoch": 4.750467439499973, "grad_norm": 0.03167494386434555, "learning_rate": 6.922451161009134e-07, "loss": 0.0014, "step": 444620 }, { "epoch": 4.750574282814253, "grad_norm": 1.0396853685379028, "learning_rate": 6.922296066310673e-07, "loss": 0.014, "step": 444630 }, { "epoch": 4.750681126128533, "grad_norm": 0.08712629228830338, "learning_rate": 6.922140969441798e-07, "loss": 0.0042, "step": 444640 }, { "epoch": 4.750787969442812, "grad_norm": 0.0013111355947330594, "learning_rate": 6.921985870402678e-07, "loss": 0.0056, "step": 444650 }, { "epoch": 4.750894812757092, "grad_norm": 0.007072163745760918, "learning_rate": 6.921830769193492e-07, "loss": 0.0044, "step": 444660 }, { "epoch": 4.751001656071371, "grad_norm": 0.055576734244823456, "learning_rate": 6.921675665814415e-07, "loss": 0.0021, "step": 444670 }, { "epoch": 4.751108499385651, "grad_norm": 0.6538481712341309, "learning_rate": 6.92152056026562e-07, "loss": 0.0048, "step": 444680 }, { "epoch": 4.751215342699931, "grad_norm": 0.26666221022605896, "learning_rate": 6.921365452547283e-07, "loss": 0.0023, "step": 444690 }, { "epoch": 4.75132218601421, "grad_norm": 0.08810840547084808, "learning_rate": 6.92121034265958e-07, "loss": 0.0023, "step": 444700 }, { "epoch": 4.75142902932849, "grad_norm": 0.27683448791503906, "learning_rate": 6.921055230602684e-07, "loss": 0.0038, "step": 444710 }, { "epoch": 4.751535872642769, "grad_norm": 0.0008419890073128045, "learning_rate": 6.920900116376773e-07, "loss": 0.0023, "step": 444720 }, { "epoch": 4.751642715957049, "grad_norm": 1.9594836235046387, "learning_rate": 6.920744999982022e-07, "loss": 0.0044, "step": 444730 }, { "epoch": 4.751749559271328, "grad_norm": 0.010784441605210304, "learning_rate": 6.920589881418603e-07, "loss": 0.0093, "step": 444740 }, { "epoch": 4.7518564025856085, "grad_norm": 0.010532058775424957, "learning_rate": 6.920434760686694e-07, "loss": 0.0004, "step": 444750 }, { "epoch": 4.751963245899888, "grad_norm": 0.270149290561676, "learning_rate": 6.920279637786469e-07, "loss": 0.0114, "step": 444760 }, { "epoch": 4.752070089214167, "grad_norm": 0.09679333865642548, "learning_rate": 6.920124512718103e-07, "loss": 0.0014, "step": 444770 }, { "epoch": 4.752176932528447, "grad_norm": 0.07081503421068192, "learning_rate": 6.919969385481772e-07, "loss": 0.0016, "step": 444780 }, { "epoch": 4.752283775842726, "grad_norm": 6.454828262329102, "learning_rate": 6.919814256077652e-07, "loss": 0.0109, "step": 444790 }, { "epoch": 4.752390619157007, "grad_norm": 0.02374100126326084, "learning_rate": 6.919659124505915e-07, "loss": 0.0223, "step": 444800 }, { "epoch": 4.752497462471286, "grad_norm": 0.012880358844995499, "learning_rate": 6.919503990766737e-07, "loss": 0.0182, "step": 444810 }, { "epoch": 4.752604305785566, "grad_norm": 0.017203306779265404, "learning_rate": 6.919348854860295e-07, "loss": 0.0058, "step": 444820 }, { "epoch": 4.752711149099845, "grad_norm": 0.10685529559850693, "learning_rate": 6.919193716786764e-07, "loss": 0.0058, "step": 444830 }, { "epoch": 4.7528179924141245, "grad_norm": 0.009659052826464176, "learning_rate": 6.919038576546319e-07, "loss": 0.0142, "step": 444840 }, { "epoch": 4.752924835728404, "grad_norm": 0.06771717220544815, "learning_rate": 6.918883434139133e-07, "loss": 0.0113, "step": 444850 }, { "epoch": 4.753031679042683, "grad_norm": 0.00699530029669404, "learning_rate": 6.918728289565383e-07, "loss": 0.0087, "step": 444860 }, { "epoch": 4.753138522356964, "grad_norm": 0.009125988930463791, "learning_rate": 6.918573142825245e-07, "loss": 0.0084, "step": 444870 }, { "epoch": 4.753245365671243, "grad_norm": 0.011009244248270988, "learning_rate": 6.918417993918892e-07, "loss": 0.0536, "step": 444880 }, { "epoch": 4.753352208985523, "grad_norm": 0.04913384094834328, "learning_rate": 6.9182628428465e-07, "loss": 0.0093, "step": 444890 }, { "epoch": 4.753459052299802, "grad_norm": 0.0025090198032557964, "learning_rate": 6.918107689608246e-07, "loss": 0.0058, "step": 444900 }, { "epoch": 4.753565895614082, "grad_norm": 0.0056874435395002365, "learning_rate": 6.917952534204303e-07, "loss": 0.0008, "step": 444910 }, { "epoch": 4.753672738928362, "grad_norm": 0.2042691558599472, "learning_rate": 6.917797376634847e-07, "loss": 0.0048, "step": 444920 }, { "epoch": 4.753779582242641, "grad_norm": 0.2818874716758728, "learning_rate": 6.917642216900052e-07, "loss": 0.0108, "step": 444930 }, { "epoch": 4.753886425556921, "grad_norm": 3.6654202938079834, "learning_rate": 6.917487055000095e-07, "loss": 0.0047, "step": 444940 }, { "epoch": 4.7539932688712, "grad_norm": 0.02066117897629738, "learning_rate": 6.917331890935152e-07, "loss": 0.0041, "step": 444950 }, { "epoch": 4.75410011218548, "grad_norm": 0.09326780587434769, "learning_rate": 6.917176724705394e-07, "loss": 0.0026, "step": 444960 }, { "epoch": 4.754206955499759, "grad_norm": 1.4227067232131958, "learning_rate": 6.917021556311e-07, "loss": 0.0115, "step": 444970 }, { "epoch": 4.75431379881404, "grad_norm": 0.005072126165032387, "learning_rate": 6.916866385752144e-07, "loss": 0.003, "step": 444980 }, { "epoch": 4.754420642128319, "grad_norm": 0.009720075875520706, "learning_rate": 6.916711213029002e-07, "loss": 0.002, "step": 444990 }, { "epoch": 4.7545274854425985, "grad_norm": 0.05833553895354271, "learning_rate": 6.916556038141745e-07, "loss": 0.0074, "step": 445000 }, { "epoch": 4.754634328756878, "grad_norm": 0.0012544373748824, "learning_rate": 6.916400861090555e-07, "loss": 0.0062, "step": 445010 }, { "epoch": 4.754741172071157, "grad_norm": 0.11241857707500458, "learning_rate": 6.916245681875602e-07, "loss": 0.0041, "step": 445020 }, { "epoch": 4.754848015385437, "grad_norm": 0.11842280626296997, "learning_rate": 6.916090500497065e-07, "loss": 0.0203, "step": 445030 }, { "epoch": 4.754954858699717, "grad_norm": 0.014079716056585312, "learning_rate": 6.915935316955116e-07, "loss": 0.0006, "step": 445040 }, { "epoch": 4.755061702013997, "grad_norm": 0.20512418448925018, "learning_rate": 6.915780131249931e-07, "loss": 0.0069, "step": 445050 }, { "epoch": 4.755168545328276, "grad_norm": 1.4319486618041992, "learning_rate": 6.915624943381686e-07, "loss": 0.0039, "step": 445060 }, { "epoch": 4.755275388642556, "grad_norm": 0.0045813824981451035, "learning_rate": 6.915469753350555e-07, "loss": 0.0084, "step": 445070 }, { "epoch": 4.755382231956835, "grad_norm": 0.0011640226002782583, "learning_rate": 6.915314561156715e-07, "loss": 0.0096, "step": 445080 }, { "epoch": 4.7554890752711145, "grad_norm": 2.6365714073181152, "learning_rate": 6.91515936680034e-07, "loss": 0.0152, "step": 445090 }, { "epoch": 4.755595918585395, "grad_norm": 0.050611723214387894, "learning_rate": 6.915004170281608e-07, "loss": 0.0014, "step": 445100 }, { "epoch": 4.755702761899674, "grad_norm": 1.7806732654571533, "learning_rate": 6.914848971600688e-07, "loss": 0.0123, "step": 445110 }, { "epoch": 4.755809605213954, "grad_norm": 0.20885077118873596, "learning_rate": 6.914693770757762e-07, "loss": 0.0066, "step": 445120 }, { "epoch": 4.755916448528233, "grad_norm": 0.4632042348384857, "learning_rate": 6.914538567753e-07, "loss": 0.0007, "step": 445130 }, { "epoch": 4.756023291842513, "grad_norm": 0.01801786944270134, "learning_rate": 6.91438336258658e-07, "loss": 0.0035, "step": 445140 }, { "epoch": 4.756130135156793, "grad_norm": 0.0509832501411438, "learning_rate": 6.914228155258677e-07, "loss": 0.0087, "step": 445150 }, { "epoch": 4.7562369784710725, "grad_norm": 0.3054407238960266, "learning_rate": 6.914072945769465e-07, "loss": 0.0301, "step": 445160 }, { "epoch": 4.756343821785352, "grad_norm": 1.3550347089767456, "learning_rate": 6.913917734119121e-07, "loss": 0.0049, "step": 445170 }, { "epoch": 4.756450665099631, "grad_norm": 0.01648312620818615, "learning_rate": 6.91376252030782e-07, "loss": 0.0011, "step": 445180 }, { "epoch": 4.756557508413911, "grad_norm": 0.008459093049168587, "learning_rate": 6.913607304335735e-07, "loss": 0.0024, "step": 445190 }, { "epoch": 4.75666435172819, "grad_norm": 2.6099095344543457, "learning_rate": 6.913452086203044e-07, "loss": 0.0132, "step": 445200 }, { "epoch": 4.75677119504247, "grad_norm": 2.8506977558135986, "learning_rate": 6.913296865909922e-07, "loss": 0.015, "step": 445210 }, { "epoch": 4.75687803835675, "grad_norm": 4.379843711853027, "learning_rate": 6.913141643456541e-07, "loss": 0.0355, "step": 445220 }, { "epoch": 4.75698488167103, "grad_norm": 0.06413416564464569, "learning_rate": 6.912986418843082e-07, "loss": 0.0034, "step": 445230 }, { "epoch": 4.757091724985309, "grad_norm": 1.4814598560333252, "learning_rate": 6.912831192069716e-07, "loss": 0.0142, "step": 445240 }, { "epoch": 4.7571985682995885, "grad_norm": 0.10433252900838852, "learning_rate": 6.912675963136617e-07, "loss": 0.001, "step": 445250 }, { "epoch": 4.757305411613868, "grad_norm": 0.07236193865537643, "learning_rate": 6.912520732043965e-07, "loss": 0.0093, "step": 445260 }, { "epoch": 4.757412254928148, "grad_norm": 0.002531306818127632, "learning_rate": 6.912365498791933e-07, "loss": 0.0112, "step": 445270 }, { "epoch": 4.757519098242428, "grad_norm": 10.189911842346191, "learning_rate": 6.912210263380694e-07, "loss": 0.0293, "step": 445280 }, { "epoch": 4.757625941556707, "grad_norm": 0.009270367212593555, "learning_rate": 6.912055025810426e-07, "loss": 0.0027, "step": 445290 }, { "epoch": 4.757732784870987, "grad_norm": 0.002257008571177721, "learning_rate": 6.911899786081303e-07, "loss": 0.0208, "step": 445300 }, { "epoch": 4.757839628185266, "grad_norm": 0.7254009246826172, "learning_rate": 6.9117445441935e-07, "loss": 0.0294, "step": 445310 }, { "epoch": 4.757946471499546, "grad_norm": 0.03263937681913376, "learning_rate": 6.911589300147196e-07, "loss": 0.0024, "step": 445320 }, { "epoch": 4.758053314813825, "grad_norm": 0.005492737051099539, "learning_rate": 6.911434053942563e-07, "loss": 0.0042, "step": 445330 }, { "epoch": 4.758160158128105, "grad_norm": 1.5806764364242554, "learning_rate": 6.911278805579774e-07, "loss": 0.0032, "step": 445340 }, { "epoch": 4.758267001442385, "grad_norm": 3.320026159286499, "learning_rate": 6.91112355505901e-07, "loss": 0.0071, "step": 445350 }, { "epoch": 4.758373844756664, "grad_norm": 0.12787644565105438, "learning_rate": 6.910968302380442e-07, "loss": 0.0021, "step": 445360 }, { "epoch": 4.758480688070944, "grad_norm": 6.434544086456299, "learning_rate": 6.910813047544246e-07, "loss": 0.0795, "step": 445370 }, { "epoch": 4.758587531385223, "grad_norm": 2.972299575805664, "learning_rate": 6.910657790550599e-07, "loss": 0.0031, "step": 445380 }, { "epoch": 4.758694374699504, "grad_norm": 0.01040008943527937, "learning_rate": 6.910502531399674e-07, "loss": 0.0131, "step": 445390 }, { "epoch": 4.758801218013783, "grad_norm": 0.7221737504005432, "learning_rate": 6.910347270091648e-07, "loss": 0.013, "step": 445400 }, { "epoch": 4.7589080613280625, "grad_norm": 2.594444751739502, "learning_rate": 6.910192006626696e-07, "loss": 0.0355, "step": 445410 }, { "epoch": 4.759014904642342, "grad_norm": 0.07459241896867752, "learning_rate": 6.910036741004992e-07, "loss": 0.0107, "step": 445420 }, { "epoch": 4.7591217479566215, "grad_norm": 0.016787422820925713, "learning_rate": 6.909881473226714e-07, "loss": 0.0052, "step": 445430 }, { "epoch": 4.759228591270901, "grad_norm": 0.06681288033723831, "learning_rate": 6.909726203292034e-07, "loss": 0.0038, "step": 445440 }, { "epoch": 4.75933543458518, "grad_norm": 2.9841432571411133, "learning_rate": 6.90957093120113e-07, "loss": 0.0108, "step": 445450 }, { "epoch": 4.759442277899461, "grad_norm": 0.007192487362772226, "learning_rate": 6.909415656954176e-07, "loss": 0.0268, "step": 445460 }, { "epoch": 4.75954912121374, "grad_norm": 0.3072458505630493, "learning_rate": 6.909260380551348e-07, "loss": 0.0017, "step": 445470 }, { "epoch": 4.75965596452802, "grad_norm": 10.716609954833984, "learning_rate": 6.90910510199282e-07, "loss": 0.0041, "step": 445480 }, { "epoch": 4.759762807842299, "grad_norm": 0.0009075543493963778, "learning_rate": 6.90894982127877e-07, "loss": 0.006, "step": 445490 }, { "epoch": 4.759869651156579, "grad_norm": 0.02804100513458252, "learning_rate": 6.908794538409371e-07, "loss": 0.0068, "step": 445500 }, { "epoch": 4.759976494470859, "grad_norm": 0.013012837618589401, "learning_rate": 6.908639253384798e-07, "loss": 0.0348, "step": 445510 }, { "epoch": 4.760083337785138, "grad_norm": 2.4143357276916504, "learning_rate": 6.908483966205226e-07, "loss": 0.0091, "step": 445520 }, { "epoch": 4.760190181099418, "grad_norm": 10.452585220336914, "learning_rate": 6.908328676870834e-07, "loss": 0.0243, "step": 445530 }, { "epoch": 4.760297024413697, "grad_norm": 1.0061509609222412, "learning_rate": 6.908173385381793e-07, "loss": 0.0067, "step": 445540 }, { "epoch": 4.760403867727977, "grad_norm": 0.01699814945459366, "learning_rate": 6.908018091738282e-07, "loss": 0.0012, "step": 445550 }, { "epoch": 4.760510711042256, "grad_norm": 0.0020727557130157948, "learning_rate": 6.907862795940474e-07, "loss": 0.0034, "step": 445560 }, { "epoch": 4.760617554356536, "grad_norm": 0.014491045847535133, "learning_rate": 6.907707497988545e-07, "loss": 0.0043, "step": 445570 }, { "epoch": 4.760724397670816, "grad_norm": 0.09212995320558548, "learning_rate": 6.90755219788267e-07, "loss": 0.0045, "step": 445580 }, { "epoch": 4.7608312409850955, "grad_norm": 0.001575266127474606, "learning_rate": 6.907396895623026e-07, "loss": 0.0078, "step": 445590 }, { "epoch": 4.760938084299375, "grad_norm": 0.017731016501784325, "learning_rate": 6.907241591209784e-07, "loss": 0.0005, "step": 445600 }, { "epoch": 4.761044927613654, "grad_norm": 0.12612773478031158, "learning_rate": 6.907086284643124e-07, "loss": 0.0053, "step": 445610 }, { "epoch": 4.761151770927934, "grad_norm": 9.634056091308594, "learning_rate": 6.906930975923218e-07, "loss": 0.0146, "step": 445620 }, { "epoch": 4.761258614242214, "grad_norm": 0.3481079339981079, "learning_rate": 6.906775665050246e-07, "loss": 0.0149, "step": 445630 }, { "epoch": 4.761365457556494, "grad_norm": 0.0035674087703227997, "learning_rate": 6.90662035202438e-07, "loss": 0.001, "step": 445640 }, { "epoch": 4.761472300870773, "grad_norm": 0.009254169650375843, "learning_rate": 6.906465036845794e-07, "loss": 0.0221, "step": 445650 }, { "epoch": 4.761579144185053, "grad_norm": 0.029245983809232712, "learning_rate": 6.906309719514665e-07, "loss": 0.0122, "step": 445660 }, { "epoch": 4.761685987499332, "grad_norm": 0.2690851092338562, "learning_rate": 6.90615440003117e-07, "loss": 0.0027, "step": 445670 }, { "epoch": 4.7617928308136115, "grad_norm": 3.0966169834136963, "learning_rate": 6.905999078395481e-07, "loss": 0.023, "step": 445680 }, { "epoch": 4.761899674127892, "grad_norm": 0.005272552836686373, "learning_rate": 6.905843754607776e-07, "loss": 0.0194, "step": 445690 }, { "epoch": 4.762006517442171, "grad_norm": 0.047503285109996796, "learning_rate": 6.905688428668231e-07, "loss": 0.0188, "step": 445700 }, { "epoch": 4.762113360756451, "grad_norm": 0.48289236426353455, "learning_rate": 6.905533100577018e-07, "loss": 0.001, "step": 445710 }, { "epoch": 4.76222020407073, "grad_norm": 2.0397374629974365, "learning_rate": 6.905377770334316e-07, "loss": 0.0025, "step": 445720 }, { "epoch": 4.76232704738501, "grad_norm": 0.0030546896159648895, "learning_rate": 6.905222437940297e-07, "loss": 0.0127, "step": 445730 }, { "epoch": 4.762433890699289, "grad_norm": 1.4707341194152832, "learning_rate": 6.905067103395138e-07, "loss": 0.0344, "step": 445740 }, { "epoch": 4.7625407340135695, "grad_norm": 0.05407175421714783, "learning_rate": 6.904911766699015e-07, "loss": 0.0117, "step": 445750 }, { "epoch": 4.762647577327849, "grad_norm": 0.0017367050750181079, "learning_rate": 6.904756427852104e-07, "loss": 0.001, "step": 445760 }, { "epoch": 4.762754420642128, "grad_norm": 4.404820442199707, "learning_rate": 6.904601086854578e-07, "loss": 0.0031, "step": 445770 }, { "epoch": 4.762861263956408, "grad_norm": 6.644093990325928, "learning_rate": 6.904445743706614e-07, "loss": 0.0121, "step": 445780 }, { "epoch": 4.762968107270687, "grad_norm": 0.005066855810582638, "learning_rate": 6.904290398408386e-07, "loss": 0.0018, "step": 445790 }, { "epoch": 4.763074950584967, "grad_norm": 0.007740135304629803, "learning_rate": 6.904135050960071e-07, "loss": 0.0075, "step": 445800 }, { "epoch": 4.763181793899247, "grad_norm": 0.03387864679098129, "learning_rate": 6.903979701361844e-07, "loss": 0.0117, "step": 445810 }, { "epoch": 4.763288637213527, "grad_norm": 0.02114345319569111, "learning_rate": 6.903824349613881e-07, "loss": 0.011, "step": 445820 }, { "epoch": 4.763395480527806, "grad_norm": 0.06006758660078049, "learning_rate": 6.903668995716355e-07, "loss": 0.0126, "step": 445830 }, { "epoch": 4.7635023238420855, "grad_norm": 0.8386043310165405, "learning_rate": 6.903513639669445e-07, "loss": 0.0013, "step": 445840 }, { "epoch": 4.763609167156365, "grad_norm": 2.339226245880127, "learning_rate": 6.903358281473323e-07, "loss": 0.0037, "step": 445850 }, { "epoch": 4.763716010470644, "grad_norm": 0.08120252937078476, "learning_rate": 6.903202921128166e-07, "loss": 0.0029, "step": 445860 }, { "epoch": 4.763822853784925, "grad_norm": 0.45243149995803833, "learning_rate": 6.90304755863415e-07, "loss": 0.0058, "step": 445870 }, { "epoch": 4.763929697099204, "grad_norm": 14.90401554107666, "learning_rate": 6.902892193991448e-07, "loss": 0.0115, "step": 445880 }, { "epoch": 4.764036540413484, "grad_norm": 0.05550194904208183, "learning_rate": 6.90273682720024e-07, "loss": 0.0251, "step": 445890 }, { "epoch": 4.764143383727763, "grad_norm": 0.0041090645827353, "learning_rate": 6.902581458260697e-07, "loss": 0.0056, "step": 445900 }, { "epoch": 4.764250227042043, "grad_norm": 0.0042485203593969345, "learning_rate": 6.902426087172994e-07, "loss": 0.0007, "step": 445910 }, { "epoch": 4.764357070356322, "grad_norm": 0.0014199288561940193, "learning_rate": 6.902270713937311e-07, "loss": 0.0103, "step": 445920 }, { "epoch": 4.764463913670602, "grad_norm": 0.007733982987701893, "learning_rate": 6.902115338553821e-07, "loss": 0.0129, "step": 445930 }, { "epoch": 4.764570756984882, "grad_norm": 0.8002271056175232, "learning_rate": 6.901959961022698e-07, "loss": 0.051, "step": 445940 }, { "epoch": 4.764677600299161, "grad_norm": 7.241085052490234, "learning_rate": 6.901804581344119e-07, "loss": 0.0052, "step": 445950 }, { "epoch": 4.764784443613441, "grad_norm": 0.0019996354822069407, "learning_rate": 6.901649199518259e-07, "loss": 0.0106, "step": 445960 }, { "epoch": 4.76489128692772, "grad_norm": 0.390238881111145, "learning_rate": 6.901493815545293e-07, "loss": 0.0021, "step": 445970 }, { "epoch": 4.764998130242001, "grad_norm": 0.006044297479093075, "learning_rate": 6.901338429425397e-07, "loss": 0.0243, "step": 445980 }, { "epoch": 4.76510497355628, "grad_norm": 0.005852122325450182, "learning_rate": 6.901183041158747e-07, "loss": 0.0436, "step": 445990 }, { "epoch": 4.7652118168705595, "grad_norm": 24.690187454223633, "learning_rate": 6.901027650745518e-07, "loss": 0.0353, "step": 446000 }, { "epoch": 4.765318660184839, "grad_norm": 5.382442951202393, "learning_rate": 6.900872258185885e-07, "loss": 0.0012, "step": 446010 }, { "epoch": 4.765425503499118, "grad_norm": 0.5144590139389038, "learning_rate": 6.900716863480023e-07, "loss": 0.0061, "step": 446020 }, { "epoch": 4.765532346813398, "grad_norm": 0.002644709311425686, "learning_rate": 6.900561466628109e-07, "loss": 0.0259, "step": 446030 }, { "epoch": 4.765639190127677, "grad_norm": 0.02147304080426693, "learning_rate": 6.900406067630318e-07, "loss": 0.0093, "step": 446040 }, { "epoch": 4.765746033441958, "grad_norm": 0.004775903187692165, "learning_rate": 6.900250666486824e-07, "loss": 0.001, "step": 446050 }, { "epoch": 4.765852876756237, "grad_norm": 2.0713891983032227, "learning_rate": 6.900095263197805e-07, "loss": 0.0077, "step": 446060 }, { "epoch": 4.765959720070517, "grad_norm": 0.005755624268203974, "learning_rate": 6.899939857763434e-07, "loss": 0.0031, "step": 446070 }, { "epoch": 4.766066563384796, "grad_norm": 0.051465537399053574, "learning_rate": 6.899784450183888e-07, "loss": 0.0294, "step": 446080 }, { "epoch": 4.7661734066990755, "grad_norm": 0.11543615162372589, "learning_rate": 6.899629040459342e-07, "loss": 0.0085, "step": 446090 }, { "epoch": 4.766280250013356, "grad_norm": 7.351184844970703, "learning_rate": 6.899473628589971e-07, "loss": 0.0129, "step": 446100 }, { "epoch": 4.766387093327635, "grad_norm": 0.001894062734209001, "learning_rate": 6.899318214575951e-07, "loss": 0.0037, "step": 446110 }, { "epoch": 4.766493936641915, "grad_norm": 0.03825617954134941, "learning_rate": 6.899162798417458e-07, "loss": 0.0039, "step": 446120 }, { "epoch": 4.766600779956194, "grad_norm": 0.014821819961071014, "learning_rate": 6.899007380114667e-07, "loss": 0.0217, "step": 446130 }, { "epoch": 4.766707623270474, "grad_norm": 0.0021196450106799603, "learning_rate": 6.898851959667753e-07, "loss": 0.0067, "step": 446140 }, { "epoch": 4.766814466584753, "grad_norm": 2.0827693939208984, "learning_rate": 6.89869653707689e-07, "loss": 0.0046, "step": 446150 }, { "epoch": 4.766921309899033, "grad_norm": 0.0015869871713221073, "learning_rate": 6.898541112342257e-07, "loss": 0.0049, "step": 446160 }, { "epoch": 4.767028153213313, "grad_norm": 14.732497215270996, "learning_rate": 6.898385685464027e-07, "loss": 0.0413, "step": 446170 }, { "epoch": 4.767134996527592, "grad_norm": 0.007422157563269138, "learning_rate": 6.898230256442377e-07, "loss": 0.0031, "step": 446180 }, { "epoch": 4.767241839841872, "grad_norm": 5.50284481048584, "learning_rate": 6.898074825277482e-07, "loss": 0.0034, "step": 446190 }, { "epoch": 4.767348683156151, "grad_norm": 0.045091304928064346, "learning_rate": 6.897919391969518e-07, "loss": 0.0022, "step": 446200 }, { "epoch": 4.767455526470431, "grad_norm": 0.002083233557641506, "learning_rate": 6.897763956518658e-07, "loss": 0.0027, "step": 446210 }, { "epoch": 4.767562369784711, "grad_norm": 0.015598648227751255, "learning_rate": 6.897608518925079e-07, "loss": 0.0081, "step": 446220 }, { "epoch": 4.767669213098991, "grad_norm": 0.02792627364397049, "learning_rate": 6.897453079188959e-07, "loss": 0.0014, "step": 446230 }, { "epoch": 4.76777605641327, "grad_norm": 0.006803377065807581, "learning_rate": 6.89729763731047e-07, "loss": 0.0011, "step": 446240 }, { "epoch": 4.7678828997275495, "grad_norm": 0.004655023571103811, "learning_rate": 6.897142193289789e-07, "loss": 0.0325, "step": 446250 }, { "epoch": 4.767989743041829, "grad_norm": 0.26262253522872925, "learning_rate": 6.896986747127092e-07, "loss": 0.0059, "step": 446260 }, { "epoch": 4.7680965863561084, "grad_norm": 0.10286203026771545, "learning_rate": 6.896831298822553e-07, "loss": 0.0058, "step": 446270 }, { "epoch": 4.768203429670388, "grad_norm": 0.0024109557271003723, "learning_rate": 6.896675848376348e-07, "loss": 0.0082, "step": 446280 }, { "epoch": 4.768310272984668, "grad_norm": 0.37626123428344727, "learning_rate": 6.896520395788653e-07, "loss": 0.0259, "step": 446290 }, { "epoch": 4.768417116298948, "grad_norm": 0.005029262509196997, "learning_rate": 6.896364941059643e-07, "loss": 0.0156, "step": 446300 }, { "epoch": 4.768523959613227, "grad_norm": 1.6940042972564697, "learning_rate": 6.896209484189495e-07, "loss": 0.0096, "step": 446310 }, { "epoch": 4.768630802927507, "grad_norm": 0.004205090459436178, "learning_rate": 6.896054025178382e-07, "loss": 0.0056, "step": 446320 }, { "epoch": 4.768737646241786, "grad_norm": 3.000896692276001, "learning_rate": 6.895898564026482e-07, "loss": 0.0099, "step": 446330 }, { "epoch": 4.768844489556066, "grad_norm": 13.281831741333008, "learning_rate": 6.895743100733968e-07, "loss": 0.0033, "step": 446340 }, { "epoch": 4.768951332870346, "grad_norm": 0.00969743076711893, "learning_rate": 6.895587635301019e-07, "loss": 0.0035, "step": 446350 }, { "epoch": 4.769058176184625, "grad_norm": 4.180621147155762, "learning_rate": 6.895432167727809e-07, "loss": 0.005, "step": 446360 }, { "epoch": 4.769165019498905, "grad_norm": 0.004000806715339422, "learning_rate": 6.89527669801451e-07, "loss": 0.0027, "step": 446370 }, { "epoch": 4.769271862813184, "grad_norm": 2.0144286155700684, "learning_rate": 6.895121226161302e-07, "loss": 0.0066, "step": 446380 }, { "epoch": 4.769378706127464, "grad_norm": 1.5027071237564087, "learning_rate": 6.894965752168358e-07, "loss": 0.0066, "step": 446390 }, { "epoch": 4.769485549441743, "grad_norm": 0.002455261070281267, "learning_rate": 6.894810276035856e-07, "loss": 0.0024, "step": 446400 }, { "epoch": 4.7695923927560235, "grad_norm": 0.884181797504425, "learning_rate": 6.89465479776397e-07, "loss": 0.0121, "step": 446410 }, { "epoch": 4.769699236070303, "grad_norm": 0.00879175029695034, "learning_rate": 6.894499317352876e-07, "loss": 0.0028, "step": 446420 }, { "epoch": 4.7698060793845825, "grad_norm": 0.01017901860177517, "learning_rate": 6.894343834802749e-07, "loss": 0.0148, "step": 446430 }, { "epoch": 4.769912922698862, "grad_norm": 0.015778139233589172, "learning_rate": 6.894188350113764e-07, "loss": 0.0008, "step": 446440 }, { "epoch": 4.770019766013141, "grad_norm": 0.7377698421478271, "learning_rate": 6.894032863286099e-07, "loss": 0.0065, "step": 446450 }, { "epoch": 4.770126609327422, "grad_norm": 0.03480944782495499, "learning_rate": 6.893877374319926e-07, "loss": 0.0062, "step": 446460 }, { "epoch": 4.770233452641701, "grad_norm": 0.0032057457137852907, "learning_rate": 6.893721883215424e-07, "loss": 0.0002, "step": 446470 }, { "epoch": 4.770340295955981, "grad_norm": 0.005728536285459995, "learning_rate": 6.893566389972767e-07, "loss": 0.0132, "step": 446480 }, { "epoch": 4.77044713927026, "grad_norm": 0.002381374128162861, "learning_rate": 6.89341089459213e-07, "loss": 0.0023, "step": 446490 }, { "epoch": 4.7705539825845396, "grad_norm": 0.970368504524231, "learning_rate": 6.89325539707369e-07, "loss": 0.0091, "step": 446500 }, { "epoch": 4.770660825898819, "grad_norm": 0.04152704030275345, "learning_rate": 6.89309989741762e-07, "loss": 0.0026, "step": 446510 }, { "epoch": 4.770767669213099, "grad_norm": 0.5271379351615906, "learning_rate": 6.892944395624099e-07, "loss": 0.0154, "step": 446520 }, { "epoch": 4.770874512527379, "grad_norm": 0.06783546507358551, "learning_rate": 6.892788891693302e-07, "loss": 0.0123, "step": 446530 }, { "epoch": 4.770981355841658, "grad_norm": 0.02667160890996456, "learning_rate": 6.892633385625401e-07, "loss": 0.0, "step": 446540 }, { "epoch": 4.771088199155938, "grad_norm": 25.054174423217773, "learning_rate": 6.892477877420576e-07, "loss": 0.028, "step": 446550 }, { "epoch": 4.771195042470217, "grad_norm": 0.06164134293794632, "learning_rate": 6.892322367079e-07, "loss": 0.0095, "step": 446560 }, { "epoch": 4.771301885784497, "grad_norm": 0.032994311302900314, "learning_rate": 6.892166854600849e-07, "loss": 0.0564, "step": 446570 }, { "epoch": 4.771408729098777, "grad_norm": 0.006233804393559694, "learning_rate": 6.892011339986299e-07, "loss": 0.0166, "step": 446580 }, { "epoch": 4.7715155724130565, "grad_norm": 0.008507258258759975, "learning_rate": 6.891855823235524e-07, "loss": 0.0081, "step": 446590 }, { "epoch": 4.771622415727336, "grad_norm": 0.012709606438875198, "learning_rate": 6.891700304348702e-07, "loss": 0.0224, "step": 446600 }, { "epoch": 4.771729259041615, "grad_norm": 0.009180808439850807, "learning_rate": 6.891544783326009e-07, "loss": 0.0036, "step": 446610 }, { "epoch": 4.771836102355895, "grad_norm": 0.149781733751297, "learning_rate": 6.891389260167617e-07, "loss": 0.0034, "step": 446620 }, { "epoch": 4.771942945670174, "grad_norm": 0.06584648787975311, "learning_rate": 6.891233734873704e-07, "loss": 0.0152, "step": 446630 }, { "epoch": 4.772049788984455, "grad_norm": 3.846205949783325, "learning_rate": 6.891078207444448e-07, "loss": 0.01, "step": 446640 }, { "epoch": 4.772156632298734, "grad_norm": 0.13430270552635193, "learning_rate": 6.890922677880018e-07, "loss": 0.005, "step": 446650 }, { "epoch": 4.772263475613014, "grad_norm": 0.7288913726806641, "learning_rate": 6.890767146180597e-07, "loss": 0.0016, "step": 446660 }, { "epoch": 4.772370318927293, "grad_norm": 2.76784610748291, "learning_rate": 6.890611612346357e-07, "loss": 0.0064, "step": 446670 }, { "epoch": 4.7724771622415725, "grad_norm": 0.08165299147367477, "learning_rate": 6.890456076377471e-07, "loss": 0.0103, "step": 446680 }, { "epoch": 4.772584005555853, "grad_norm": 2.2932252883911133, "learning_rate": 6.89030053827412e-07, "loss": 0.0018, "step": 446690 }, { "epoch": 4.772690848870132, "grad_norm": 0.049263712018728256, "learning_rate": 6.890144998036476e-07, "loss": 0.0077, "step": 446700 }, { "epoch": 4.772797692184412, "grad_norm": 0.010633259080350399, "learning_rate": 6.889989455664716e-07, "loss": 0.0072, "step": 446710 }, { "epoch": 4.772904535498691, "grad_norm": 0.0049870191141963005, "learning_rate": 6.889833911159015e-07, "loss": 0.0035, "step": 446720 }, { "epoch": 4.773011378812971, "grad_norm": 11.171794891357422, "learning_rate": 6.889678364519548e-07, "loss": 0.02, "step": 446730 }, { "epoch": 4.77311822212725, "grad_norm": 0.020981471985578537, "learning_rate": 6.889522815746493e-07, "loss": 0.0072, "step": 446740 }, { "epoch": 4.77322506544153, "grad_norm": 1.6610416173934937, "learning_rate": 6.889367264840023e-07, "loss": 0.0128, "step": 446750 }, { "epoch": 4.77333190875581, "grad_norm": 1.1657969951629639, "learning_rate": 6.889211711800316e-07, "loss": 0.0072, "step": 446760 }, { "epoch": 4.773438752070089, "grad_norm": 0.011214980855584145, "learning_rate": 6.889056156627544e-07, "loss": 0.0129, "step": 446770 }, { "epoch": 4.773545595384369, "grad_norm": 0.029748015105724335, "learning_rate": 6.888900599321887e-07, "loss": 0.0177, "step": 446780 }, { "epoch": 4.773652438698648, "grad_norm": 0.0021872837096452713, "learning_rate": 6.888745039883517e-07, "loss": 0.0065, "step": 446790 }, { "epoch": 4.773759282012928, "grad_norm": 1.693526029586792, "learning_rate": 6.888589478312614e-07, "loss": 0.0126, "step": 446800 }, { "epoch": 4.773866125327208, "grad_norm": 0.014592239633202553, "learning_rate": 6.88843391460935e-07, "loss": 0.0066, "step": 446810 }, { "epoch": 4.773972968641488, "grad_norm": 0.003985056187957525, "learning_rate": 6.8882783487739e-07, "loss": 0.0016, "step": 446820 }, { "epoch": 4.774079811955767, "grad_norm": 0.10752654075622559, "learning_rate": 6.888122780806444e-07, "loss": 0.005, "step": 446830 }, { "epoch": 4.7741866552700465, "grad_norm": 1.1624383926391602, "learning_rate": 6.887967210707153e-07, "loss": 0.0082, "step": 446840 }, { "epoch": 4.774293498584326, "grad_norm": 2.5415477752685547, "learning_rate": 6.887811638476204e-07, "loss": 0.0082, "step": 446850 }, { "epoch": 4.774400341898605, "grad_norm": 0.16313159465789795, "learning_rate": 6.887656064113775e-07, "loss": 0.0098, "step": 446860 }, { "epoch": 4.774507185212885, "grad_norm": 0.002804482588544488, "learning_rate": 6.887500487620039e-07, "loss": 0.0009, "step": 446870 }, { "epoch": 4.774614028527165, "grad_norm": 0.009024396538734436, "learning_rate": 6.887344908995172e-07, "loss": 0.0078, "step": 446880 }, { "epoch": 4.774720871841445, "grad_norm": 1.5562937259674072, "learning_rate": 6.88718932823935e-07, "loss": 0.0028, "step": 446890 }, { "epoch": 4.774827715155724, "grad_norm": 2.882612943649292, "learning_rate": 6.887033745352751e-07, "loss": 0.0117, "step": 446900 }, { "epoch": 4.774934558470004, "grad_norm": 0.04038802161812782, "learning_rate": 6.886878160335545e-07, "loss": 0.0019, "step": 446910 }, { "epoch": 4.775041401784283, "grad_norm": 0.02960370108485222, "learning_rate": 6.886722573187914e-07, "loss": 0.0131, "step": 446920 }, { "epoch": 4.775148245098563, "grad_norm": 0.016099225729703903, "learning_rate": 6.886566983910029e-07, "loss": 0.008, "step": 446930 }, { "epoch": 4.775255088412843, "grad_norm": 0.08582472801208496, "learning_rate": 6.886411392502069e-07, "loss": 0.0049, "step": 446940 }, { "epoch": 4.775361931727122, "grad_norm": 0.5485470294952393, "learning_rate": 6.886255798964207e-07, "loss": 0.0016, "step": 446950 }, { "epoch": 4.775468775041402, "grad_norm": 1.5383996963500977, "learning_rate": 6.88610020329662e-07, "loss": 0.0004, "step": 446960 }, { "epoch": 4.775575618355681, "grad_norm": 1.572460412979126, "learning_rate": 6.885944605499483e-07, "loss": 0.006, "step": 446970 }, { "epoch": 4.775682461669961, "grad_norm": 0.005760913714766502, "learning_rate": 6.885789005572974e-07, "loss": 0.0007, "step": 446980 }, { "epoch": 4.77578930498424, "grad_norm": 0.004734407644718885, "learning_rate": 6.885633403517265e-07, "loss": 0.0168, "step": 446990 }, { "epoch": 4.7758961482985205, "grad_norm": 13.891975402832031, "learning_rate": 6.885477799332535e-07, "loss": 0.0023, "step": 447000 }, { "epoch": 4.7760029916128, "grad_norm": 1.5440847873687744, "learning_rate": 6.885322193018958e-07, "loss": 0.0021, "step": 447010 }, { "epoch": 4.776109834927079, "grad_norm": 2.1088593006134033, "learning_rate": 6.885166584576709e-07, "loss": 0.0091, "step": 447020 }, { "epoch": 4.776216678241359, "grad_norm": 6.44777774810791, "learning_rate": 6.885010974005966e-07, "loss": 0.0154, "step": 447030 }, { "epoch": 4.776323521555638, "grad_norm": 0.1092170774936676, "learning_rate": 6.884855361306903e-07, "loss": 0.0047, "step": 447040 }, { "epoch": 4.776430364869919, "grad_norm": 0.03211401775479317, "learning_rate": 6.884699746479694e-07, "loss": 0.0058, "step": 447050 }, { "epoch": 4.776537208184198, "grad_norm": 0.014008753933012486, "learning_rate": 6.884544129524519e-07, "loss": 0.0029, "step": 447060 }, { "epoch": 4.776644051498478, "grad_norm": 2.3129050731658936, "learning_rate": 6.88438851044155e-07, "loss": 0.0129, "step": 447070 }, { "epoch": 4.776750894812757, "grad_norm": 0.01140339020639658, "learning_rate": 6.884232889230965e-07, "loss": 0.0017, "step": 447080 }, { "epoch": 4.7768577381270365, "grad_norm": 0.06893663108348846, "learning_rate": 6.884077265892939e-07, "loss": 0.0048, "step": 447090 }, { "epoch": 4.776964581441316, "grad_norm": 6.454840183258057, "learning_rate": 6.883921640427648e-07, "loss": 0.0043, "step": 447100 }, { "epoch": 4.777071424755595, "grad_norm": 5.070648193359375, "learning_rate": 6.883766012835266e-07, "loss": 0.0169, "step": 447110 }, { "epoch": 4.777178268069876, "grad_norm": 0.005059503018856049, "learning_rate": 6.883610383115971e-07, "loss": 0.0001, "step": 447120 }, { "epoch": 4.777285111384155, "grad_norm": 0.00616404740139842, "learning_rate": 6.883454751269938e-07, "loss": 0.0054, "step": 447130 }, { "epoch": 4.777391954698435, "grad_norm": 5.080602169036865, "learning_rate": 6.88329911729734e-07, "loss": 0.0133, "step": 447140 }, { "epoch": 4.777498798012714, "grad_norm": 5.084926128387451, "learning_rate": 6.883143481198359e-07, "loss": 0.0163, "step": 447150 }, { "epoch": 4.777605641326994, "grad_norm": 0.0032966649159789085, "learning_rate": 6.882987842973164e-07, "loss": 0.0046, "step": 447160 }, { "epoch": 4.777712484641274, "grad_norm": 0.0028247740119695663, "learning_rate": 6.882832202621934e-07, "loss": 0.0031, "step": 447170 }, { "epoch": 4.777819327955553, "grad_norm": 0.0048548015765845776, "learning_rate": 6.882676560144845e-07, "loss": 0.0178, "step": 447180 }, { "epoch": 4.777926171269833, "grad_norm": 0.006916058715432882, "learning_rate": 6.882520915542072e-07, "loss": 0.0055, "step": 447190 }, { "epoch": 4.778033014584112, "grad_norm": 0.012692905962467194, "learning_rate": 6.882365268813792e-07, "loss": 0.0002, "step": 447200 }, { "epoch": 4.778139857898392, "grad_norm": 0.08341523259878159, "learning_rate": 6.882209619960177e-07, "loss": 0.0225, "step": 447210 }, { "epoch": 4.778246701212671, "grad_norm": 6.516732215881348, "learning_rate": 6.882053968981407e-07, "loss": 0.0316, "step": 447220 }, { "epoch": 4.778353544526952, "grad_norm": 0.0008882932597771287, "learning_rate": 6.881898315877656e-07, "loss": 0.0079, "step": 447230 }, { "epoch": 4.778460387841231, "grad_norm": 0.0017427504062652588, "learning_rate": 6.881742660649099e-07, "loss": 0.0111, "step": 447240 }, { "epoch": 4.7785672311555105, "grad_norm": 0.9432207942008972, "learning_rate": 6.881587003295913e-07, "loss": 0.0106, "step": 447250 }, { "epoch": 4.77867407446979, "grad_norm": 0.006448245141655207, "learning_rate": 6.881431343818273e-07, "loss": 0.0047, "step": 447260 }, { "epoch": 4.778780917784069, "grad_norm": 0.01704060472548008, "learning_rate": 6.881275682216356e-07, "loss": 0.0156, "step": 447270 }, { "epoch": 4.778887761098349, "grad_norm": 0.9583166241645813, "learning_rate": 6.881120018490335e-07, "loss": 0.0088, "step": 447280 }, { "epoch": 4.778994604412629, "grad_norm": 2.0889313220977783, "learning_rate": 6.880964352640388e-07, "loss": 0.0084, "step": 447290 }, { "epoch": 4.779101447726909, "grad_norm": 0.005978464614599943, "learning_rate": 6.880808684666692e-07, "loss": 0.0002, "step": 447300 }, { "epoch": 4.779208291041188, "grad_norm": 0.0006203088560141623, "learning_rate": 6.880653014569418e-07, "loss": 0.0037, "step": 447310 }, { "epoch": 4.779315134355468, "grad_norm": 5.813592433929443, "learning_rate": 6.880497342348747e-07, "loss": 0.0064, "step": 447320 }, { "epoch": 4.779421977669747, "grad_norm": 0.04104846715927124, "learning_rate": 6.880341668004853e-07, "loss": 0.0135, "step": 447330 }, { "epoch": 4.7795288209840265, "grad_norm": 0.6863247752189636, "learning_rate": 6.88018599153791e-07, "loss": 0.0064, "step": 447340 }, { "epoch": 4.779635664298307, "grad_norm": 0.0011603935854509473, "learning_rate": 6.880030312948096e-07, "loss": 0.0048, "step": 447350 }, { "epoch": 4.779742507612586, "grad_norm": 0.001335368724539876, "learning_rate": 6.879874632235585e-07, "loss": 0.002, "step": 447360 }, { "epoch": 4.779849350926866, "grad_norm": 0.009432845748960972, "learning_rate": 6.879718949400555e-07, "loss": 0.0052, "step": 447370 }, { "epoch": 4.779956194241145, "grad_norm": 0.007218772079795599, "learning_rate": 6.879563264443178e-07, "loss": 0.0081, "step": 447380 }, { "epoch": 4.780063037555425, "grad_norm": 0.0006893128738738596, "learning_rate": 6.879407577363634e-07, "loss": 0.002, "step": 447390 }, { "epoch": 4.780169880869705, "grad_norm": 0.0009628972038626671, "learning_rate": 6.879251888162096e-07, "loss": 0.004, "step": 447400 }, { "epoch": 4.7802767241839845, "grad_norm": 0.04159563407301903, "learning_rate": 6.879096196838742e-07, "loss": 0.0002, "step": 447410 }, { "epoch": 4.780383567498264, "grad_norm": 0.6891175508499146, "learning_rate": 6.878940503393747e-07, "loss": 0.012, "step": 447420 }, { "epoch": 4.7804904108125434, "grad_norm": 1.7915034294128418, "learning_rate": 6.878784807827285e-07, "loss": 0.0047, "step": 447430 }, { "epoch": 4.780597254126823, "grad_norm": 0.003621194511651993, "learning_rate": 6.878629110139533e-07, "loss": 0.0069, "step": 447440 }, { "epoch": 4.780704097441102, "grad_norm": 1.8329839706420898, "learning_rate": 6.878473410330668e-07, "loss": 0.0032, "step": 447450 }, { "epoch": 4.780810940755382, "grad_norm": 0.008503040298819542, "learning_rate": 6.878317708400865e-07, "loss": 0.0088, "step": 447460 }, { "epoch": 4.780917784069662, "grad_norm": 0.3243473768234253, "learning_rate": 6.878162004350299e-07, "loss": 0.0037, "step": 447470 }, { "epoch": 4.781024627383942, "grad_norm": 0.4346141517162323, "learning_rate": 6.878006298179146e-07, "loss": 0.0143, "step": 447480 }, { "epoch": 4.781131470698221, "grad_norm": 0.006344730965793133, "learning_rate": 6.877850589887582e-07, "loss": 0.0028, "step": 447490 }, { "epoch": 4.7812383140125005, "grad_norm": 0.003399558598175645, "learning_rate": 6.877694879475784e-07, "loss": 0.0016, "step": 447500 }, { "epoch": 4.78134515732678, "grad_norm": 0.00791329424828291, "learning_rate": 6.877539166943925e-07, "loss": 0.0052, "step": 447510 }, { "epoch": 4.78145200064106, "grad_norm": 0.10089979320764542, "learning_rate": 6.877383452292185e-07, "loss": 0.0072, "step": 447520 }, { "epoch": 4.78155884395534, "grad_norm": 0.4716031849384308, "learning_rate": 6.877227735520736e-07, "loss": 0.0076, "step": 447530 }, { "epoch": 4.781665687269619, "grad_norm": 0.022119391709566116, "learning_rate": 6.877072016629755e-07, "loss": 0.0001, "step": 447540 }, { "epoch": 4.781772530583899, "grad_norm": 0.0009975386783480644, "learning_rate": 6.876916295619418e-07, "loss": 0.0359, "step": 447550 }, { "epoch": 4.781879373898178, "grad_norm": 0.0028703962452709675, "learning_rate": 6.876760572489901e-07, "loss": 0.0017, "step": 447560 }, { "epoch": 4.781986217212458, "grad_norm": 0.0041448213160037994, "learning_rate": 6.87660484724138e-07, "loss": 0.009, "step": 447570 }, { "epoch": 4.782093060526737, "grad_norm": 1.9467942714691162, "learning_rate": 6.87644911987403e-07, "loss": 0.0329, "step": 447580 }, { "epoch": 4.7821999038410175, "grad_norm": 0.06992315500974655, "learning_rate": 6.876293390388027e-07, "loss": 0.0105, "step": 447590 }, { "epoch": 4.782306747155297, "grad_norm": 0.5237975120544434, "learning_rate": 6.876137658783548e-07, "loss": 0.0021, "step": 447600 }, { "epoch": 4.782413590469576, "grad_norm": 1.250423550605774, "learning_rate": 6.875981925060769e-07, "loss": 0.0173, "step": 447610 }, { "epoch": 4.782520433783856, "grad_norm": 3.1074726581573486, "learning_rate": 6.875826189219862e-07, "loss": 0.0176, "step": 447620 }, { "epoch": 4.782627277098135, "grad_norm": 0.06187663972377777, "learning_rate": 6.875670451261008e-07, "loss": 0.0011, "step": 447630 }, { "epoch": 4.782734120412416, "grad_norm": 0.8351467847824097, "learning_rate": 6.87551471118438e-07, "loss": 0.0075, "step": 447640 }, { "epoch": 4.782840963726695, "grad_norm": 0.02524585835635662, "learning_rate": 6.875358968990153e-07, "loss": 0.0022, "step": 447650 }, { "epoch": 4.7829478070409746, "grad_norm": 0.5198317170143127, "learning_rate": 6.875203224678506e-07, "loss": 0.0022, "step": 447660 }, { "epoch": 4.783054650355254, "grad_norm": 0.10202006250619888, "learning_rate": 6.875047478249613e-07, "loss": 0.101, "step": 447670 }, { "epoch": 4.7831614936695335, "grad_norm": 1.7086721658706665, "learning_rate": 6.87489172970365e-07, "loss": 0.0109, "step": 447680 }, { "epoch": 4.783268336983813, "grad_norm": 0.00799036119133234, "learning_rate": 6.874735979040791e-07, "loss": 0.0049, "step": 447690 }, { "epoch": 4.783375180298092, "grad_norm": 1.2645511627197266, "learning_rate": 6.874580226261214e-07, "loss": 0.0138, "step": 447700 }, { "epoch": 4.783482023612373, "grad_norm": 0.18357446789741516, "learning_rate": 6.874424471365094e-07, "loss": 0.0252, "step": 447710 }, { "epoch": 4.783588866926652, "grad_norm": 0.019776763394474983, "learning_rate": 6.874268714352609e-07, "loss": 0.006, "step": 447720 }, { "epoch": 4.783695710240932, "grad_norm": 0.0028586811386048794, "learning_rate": 6.874112955223931e-07, "loss": 0.0038, "step": 447730 }, { "epoch": 4.783802553555211, "grad_norm": 0.0016719975974410772, "learning_rate": 6.87395719397924e-07, "loss": 0.0129, "step": 447740 }, { "epoch": 4.783909396869491, "grad_norm": 0.14764632284641266, "learning_rate": 6.873801430618709e-07, "loss": 0.0086, "step": 447750 }, { "epoch": 4.784016240183771, "grad_norm": 0.005987080745398998, "learning_rate": 6.873645665142516e-07, "loss": 0.0019, "step": 447760 }, { "epoch": 4.78412308349805, "grad_norm": 0.001409747637808323, "learning_rate": 6.873489897550835e-07, "loss": 0.0029, "step": 447770 }, { "epoch": 4.78422992681233, "grad_norm": 0.015191501006484032, "learning_rate": 6.873334127843842e-07, "loss": 0.0072, "step": 447780 }, { "epoch": 4.784336770126609, "grad_norm": 0.7459467649459839, "learning_rate": 6.873178356021714e-07, "loss": 0.0016, "step": 447790 }, { "epoch": 4.784443613440889, "grad_norm": 7.1812310218811035, "learning_rate": 6.873022582084625e-07, "loss": 0.0093, "step": 447800 }, { "epoch": 4.784550456755168, "grad_norm": 0.5459519624710083, "learning_rate": 6.872866806032753e-07, "loss": 0.0052, "step": 447810 }, { "epoch": 4.784657300069448, "grad_norm": 0.06339558213949203, "learning_rate": 6.872711027866274e-07, "loss": 0.0057, "step": 447820 }, { "epoch": 4.784764143383728, "grad_norm": 3.2830758094787598, "learning_rate": 6.872555247585361e-07, "loss": 0.005, "step": 447830 }, { "epoch": 4.7848709866980075, "grad_norm": 0.034519683569669724, "learning_rate": 6.872399465190194e-07, "loss": 0.0001, "step": 447840 }, { "epoch": 4.784977830012287, "grad_norm": 0.5817407965660095, "learning_rate": 6.872243680680945e-07, "loss": 0.0169, "step": 447850 }, { "epoch": 4.785084673326566, "grad_norm": 0.015911370515823364, "learning_rate": 6.872087894057792e-07, "loss": 0.004, "step": 447860 }, { "epoch": 4.785191516640846, "grad_norm": 0.002867325209081173, "learning_rate": 6.87193210532091e-07, "loss": 0.0034, "step": 447870 }, { "epoch": 4.785298359955126, "grad_norm": 0.05407170206308365, "learning_rate": 6.871776314470476e-07, "loss": 0.0034, "step": 447880 }, { "epoch": 4.785405203269406, "grad_norm": 0.8454868197441101, "learning_rate": 6.871620521506665e-07, "loss": 0.0161, "step": 447890 }, { "epoch": 4.785512046583685, "grad_norm": 0.002303985645994544, "learning_rate": 6.871464726429654e-07, "loss": 0.0028, "step": 447900 }, { "epoch": 4.785618889897965, "grad_norm": 0.119559146463871, "learning_rate": 6.871308929239616e-07, "loss": 0.003, "step": 447910 }, { "epoch": 4.785725733212244, "grad_norm": 0.016426974907517433, "learning_rate": 6.871153129936731e-07, "loss": 0.0073, "step": 447920 }, { "epoch": 4.7858325765265235, "grad_norm": 0.0027778276707977057, "learning_rate": 6.870997328521173e-07, "loss": 0.0032, "step": 447930 }, { "epoch": 4.785939419840804, "grad_norm": 0.004881780128926039, "learning_rate": 6.870841524993116e-07, "loss": 0.0001, "step": 447940 }, { "epoch": 4.786046263155083, "grad_norm": 0.04050935432314873, "learning_rate": 6.870685719352741e-07, "loss": 0.0012, "step": 447950 }, { "epoch": 4.786153106469363, "grad_norm": 0.1934252679347992, "learning_rate": 6.870529911600217e-07, "loss": 0.0039, "step": 447960 }, { "epoch": 4.786259949783642, "grad_norm": 1.2099759578704834, "learning_rate": 6.870374101735725e-07, "loss": 0.001, "step": 447970 }, { "epoch": 4.786366793097922, "grad_norm": 0.5292605757713318, "learning_rate": 6.87021828975944e-07, "loss": 0.0047, "step": 447980 }, { "epoch": 4.786473636412201, "grad_norm": 0.003019647905603051, "learning_rate": 6.870062475671536e-07, "loss": 0.0117, "step": 447990 }, { "epoch": 4.7865804797264815, "grad_norm": 0.38559290766716003, "learning_rate": 6.869906659472191e-07, "loss": 0.0072, "step": 448000 }, { "epoch": 4.786687323040761, "grad_norm": 0.08823706209659576, "learning_rate": 6.869750841161581e-07, "loss": 0.0045, "step": 448010 }, { "epoch": 4.78679416635504, "grad_norm": 0.016409479081630707, "learning_rate": 6.86959502073988e-07, "loss": 0.003, "step": 448020 }, { "epoch": 4.78690100966932, "grad_norm": 2.4628396034240723, "learning_rate": 6.869439198207265e-07, "loss": 0.0052, "step": 448030 }, { "epoch": 4.787007852983599, "grad_norm": 0.003090563463047147, "learning_rate": 6.869283373563914e-07, "loss": 0.0035, "step": 448040 }, { "epoch": 4.787114696297879, "grad_norm": 0.0007323719910345972, "learning_rate": 6.869127546809998e-07, "loss": 0.004, "step": 448050 }, { "epoch": 4.787221539612159, "grad_norm": 2.617579221725464, "learning_rate": 6.868971717945699e-07, "loss": 0.0036, "step": 448060 }, { "epoch": 4.787328382926439, "grad_norm": 0.12324588745832443, "learning_rate": 6.868815886971189e-07, "loss": 0.0095, "step": 448070 }, { "epoch": 4.787435226240718, "grad_norm": 0.37278154492378235, "learning_rate": 6.868660053886643e-07, "loss": 0.0006, "step": 448080 }, { "epoch": 4.7875420695549975, "grad_norm": 0.04668041318655014, "learning_rate": 6.868504218692241e-07, "loss": 0.0329, "step": 448090 }, { "epoch": 4.787648912869277, "grad_norm": 0.006152796093374491, "learning_rate": 6.868348381388156e-07, "loss": 0.0071, "step": 448100 }, { "epoch": 4.787755756183557, "grad_norm": 0.004687505308538675, "learning_rate": 6.868192541974564e-07, "loss": 0.0077, "step": 448110 }, { "epoch": 4.787862599497837, "grad_norm": 0.05294857919216156, "learning_rate": 6.868036700451642e-07, "loss": 0.0064, "step": 448120 }, { "epoch": 4.787969442812116, "grad_norm": 0.25560835003852844, "learning_rate": 6.867880856819567e-07, "loss": 0.0028, "step": 448130 }, { "epoch": 4.788076286126396, "grad_norm": 0.02437731996178627, "learning_rate": 6.867725011078511e-07, "loss": 0.001, "step": 448140 }, { "epoch": 4.788183129440675, "grad_norm": 0.32102832198143005, "learning_rate": 6.867569163228655e-07, "loss": 0.0153, "step": 448150 }, { "epoch": 4.788289972754955, "grad_norm": 0.01246895082294941, "learning_rate": 6.867413313270169e-07, "loss": 0.0065, "step": 448160 }, { "epoch": 4.788396816069234, "grad_norm": 0.0011648753425106406, "learning_rate": 6.867257461203236e-07, "loss": 0.0001, "step": 448170 }, { "epoch": 4.788503659383514, "grad_norm": 0.07584819942712784, "learning_rate": 6.867101607028028e-07, "loss": 0.0127, "step": 448180 }, { "epoch": 4.788610502697794, "grad_norm": 0.03060782514512539, "learning_rate": 6.866945750744719e-07, "loss": 0.0095, "step": 448190 }, { "epoch": 4.788717346012073, "grad_norm": 0.2675152122974396, "learning_rate": 6.86678989235349e-07, "loss": 0.0369, "step": 448200 }, { "epoch": 4.788824189326353, "grad_norm": 0.15985724329948425, "learning_rate": 6.866634031854512e-07, "loss": 0.0317, "step": 448210 }, { "epoch": 4.788931032640632, "grad_norm": 2.925575017929077, "learning_rate": 6.866478169247964e-07, "loss": 0.0128, "step": 448220 }, { "epoch": 4.789037875954913, "grad_norm": 2.893205165863037, "learning_rate": 6.866322304534023e-07, "loss": 0.0286, "step": 448230 }, { "epoch": 4.789144719269192, "grad_norm": 0.02842017635703087, "learning_rate": 6.866166437712862e-07, "loss": 0.0114, "step": 448240 }, { "epoch": 4.7892515625834715, "grad_norm": 0.0036106419283896685, "learning_rate": 6.866010568784657e-07, "loss": 0.0015, "step": 448250 }, { "epoch": 4.789358405897751, "grad_norm": 0.09575628489255905, "learning_rate": 6.865854697749586e-07, "loss": 0.004, "step": 448260 }, { "epoch": 4.78946524921203, "grad_norm": 0.009056291542947292, "learning_rate": 6.865698824607827e-07, "loss": 0.0028, "step": 448270 }, { "epoch": 4.78957209252631, "grad_norm": 0.003456861712038517, "learning_rate": 6.865542949359549e-07, "loss": 0.0011, "step": 448280 }, { "epoch": 4.789678935840589, "grad_norm": 3.0789992809295654, "learning_rate": 6.865387072004936e-07, "loss": 0.0052, "step": 448290 }, { "epoch": 4.78978577915487, "grad_norm": 0.03801165893673897, "learning_rate": 6.865231192544158e-07, "loss": 0.0096, "step": 448300 }, { "epoch": 4.789892622469149, "grad_norm": 0.0582856610417366, "learning_rate": 6.865075310977393e-07, "loss": 0.002, "step": 448310 }, { "epoch": 4.789999465783429, "grad_norm": 0.1301436722278595, "learning_rate": 6.864919427304819e-07, "loss": 0.0046, "step": 448320 }, { "epoch": 4.790106309097708, "grad_norm": 1.1312593221664429, "learning_rate": 6.864763541526607e-07, "loss": 0.0022, "step": 448330 }, { "epoch": 4.7902131524119875, "grad_norm": 0.2995612323284149, "learning_rate": 6.864607653642938e-07, "loss": 0.0331, "step": 448340 }, { "epoch": 4.790319995726268, "grad_norm": 1.1345750093460083, "learning_rate": 6.864451763653987e-07, "loss": 0.006, "step": 448350 }, { "epoch": 4.790426839040547, "grad_norm": 0.09374744445085526, "learning_rate": 6.864295871559928e-07, "loss": 0.0173, "step": 448360 }, { "epoch": 4.790533682354827, "grad_norm": 0.021738089621067047, "learning_rate": 6.864139977360938e-07, "loss": 0.0083, "step": 448370 }, { "epoch": 4.790640525669106, "grad_norm": 0.010377932339906693, "learning_rate": 6.863984081057195e-07, "loss": 0.0028, "step": 448380 }, { "epoch": 4.790747368983386, "grad_norm": 0.055909812450408936, "learning_rate": 6.863828182648871e-07, "loss": 0.0018, "step": 448390 }, { "epoch": 4.790854212297665, "grad_norm": 0.0007562924292869866, "learning_rate": 6.863672282136147e-07, "loss": 0.0013, "step": 448400 }, { "epoch": 4.790961055611945, "grad_norm": 0.003548071486875415, "learning_rate": 6.863516379519196e-07, "loss": 0.0113, "step": 448410 }, { "epoch": 4.791067898926225, "grad_norm": 1.55855131149292, "learning_rate": 6.863360474798192e-07, "loss": 0.0082, "step": 448420 }, { "epoch": 4.791174742240504, "grad_norm": 5.064484596252441, "learning_rate": 6.863204567973316e-07, "loss": 0.0084, "step": 448430 }, { "epoch": 4.791281585554784, "grad_norm": 0.3945053815841675, "learning_rate": 6.86304865904474e-07, "loss": 0.002, "step": 448440 }, { "epoch": 4.791388428869063, "grad_norm": 2.4158873558044434, "learning_rate": 6.862892748012641e-07, "loss": 0.0461, "step": 448450 }, { "epoch": 4.791495272183343, "grad_norm": 0.019581833854317665, "learning_rate": 6.862736834877196e-07, "loss": 0.0096, "step": 448460 }, { "epoch": 4.791602115497623, "grad_norm": 2.4695351123809814, "learning_rate": 6.862580919638581e-07, "loss": 0.0057, "step": 448470 }, { "epoch": 4.791708958811903, "grad_norm": 15.143118858337402, "learning_rate": 6.862425002296971e-07, "loss": 0.0274, "step": 448480 }, { "epoch": 4.791815802126182, "grad_norm": 0.0591774620115757, "learning_rate": 6.862269082852543e-07, "loss": 0.012, "step": 448490 }, { "epoch": 4.7919226454404615, "grad_norm": 0.00030688385595567524, "learning_rate": 6.862113161305474e-07, "loss": 0.0177, "step": 448500 }, { "epoch": 4.792029488754741, "grad_norm": 0.07075612246990204, "learning_rate": 6.861957237655935e-07, "loss": 0.0052, "step": 448510 }, { "epoch": 4.7921363320690205, "grad_norm": 6.134954929351807, "learning_rate": 6.861801311904109e-07, "loss": 0.0052, "step": 448520 }, { "epoch": 4.7922431753833, "grad_norm": 0.0024847984313964844, "learning_rate": 6.861645384050167e-07, "loss": 0.0017, "step": 448530 }, { "epoch": 4.79235001869758, "grad_norm": 0.022401051595807076, "learning_rate": 6.861489454094288e-07, "loss": 0.0033, "step": 448540 }, { "epoch": 4.79245686201186, "grad_norm": 2.1598126888275146, "learning_rate": 6.861333522036648e-07, "loss": 0.0069, "step": 448550 }, { "epoch": 4.792563705326139, "grad_norm": 4.814889907836914, "learning_rate": 6.861177587877419e-07, "loss": 0.0068, "step": 448560 }, { "epoch": 4.792670548640419, "grad_norm": 0.13672354817390442, "learning_rate": 6.861021651616782e-07, "loss": 0.0027, "step": 448570 }, { "epoch": 4.792777391954698, "grad_norm": 6.44766092300415, "learning_rate": 6.860865713254911e-07, "loss": 0.0049, "step": 448580 }, { "epoch": 4.792884235268978, "grad_norm": 0.014063562266528606, "learning_rate": 6.860709772791982e-07, "loss": 0.0111, "step": 448590 }, { "epoch": 4.792991078583258, "grad_norm": 0.03363805264234543, "learning_rate": 6.860553830228171e-07, "loss": 0.0112, "step": 448600 }, { "epoch": 4.793097921897537, "grad_norm": 0.10043526440858841, "learning_rate": 6.860397885563654e-07, "loss": 0.0016, "step": 448610 }, { "epoch": 4.793204765211817, "grad_norm": 0.0055324058048427105, "learning_rate": 6.860241938798607e-07, "loss": 0.0003, "step": 448620 }, { "epoch": 4.793311608526096, "grad_norm": 0.0151791051030159, "learning_rate": 6.860085989933209e-07, "loss": 0.0011, "step": 448630 }, { "epoch": 4.793418451840376, "grad_norm": 8.238168716430664, "learning_rate": 6.859930038967631e-07, "loss": 0.0333, "step": 448640 }, { "epoch": 4.793525295154656, "grad_norm": 0.004605958238244057, "learning_rate": 6.859774085902052e-07, "loss": 0.0018, "step": 448650 }, { "epoch": 4.7936321384689355, "grad_norm": 0.057787831872701645, "learning_rate": 6.859618130736647e-07, "loss": 0.0486, "step": 448660 }, { "epoch": 4.793738981783215, "grad_norm": 16.622400283813477, "learning_rate": 6.859462173471595e-07, "loss": 0.0042, "step": 448670 }, { "epoch": 4.7938458250974945, "grad_norm": 0.36457109451293945, "learning_rate": 6.859306214107068e-07, "loss": 0.0321, "step": 448680 }, { "epoch": 4.793952668411774, "grad_norm": 0.0024745240807533264, "learning_rate": 6.859150252643244e-07, "loss": 0.0255, "step": 448690 }, { "epoch": 4.794059511726053, "grad_norm": 0.0007701400900259614, "learning_rate": 6.858994289080301e-07, "loss": 0.0037, "step": 448700 }, { "epoch": 4.794166355040334, "grad_norm": 5.481106758117676, "learning_rate": 6.858838323418411e-07, "loss": 0.0046, "step": 448710 }, { "epoch": 4.794273198354613, "grad_norm": 1.9421976804733276, "learning_rate": 6.858682355657754e-07, "loss": 0.0084, "step": 448720 }, { "epoch": 4.794380041668893, "grad_norm": 2.9701852798461914, "learning_rate": 6.858526385798502e-07, "loss": 0.0226, "step": 448730 }, { "epoch": 4.794486884983172, "grad_norm": 0.46935102343559265, "learning_rate": 6.858370413840835e-07, "loss": 0.0168, "step": 448740 }, { "epoch": 4.794593728297452, "grad_norm": 4.614078044891357, "learning_rate": 6.858214439784928e-07, "loss": 0.04, "step": 448750 }, { "epoch": 4.794700571611731, "grad_norm": 0.0016824386548250914, "learning_rate": 6.858058463630956e-07, "loss": 0.0129, "step": 448760 }, { "epoch": 4.794807414926011, "grad_norm": 0.0010108210844919086, "learning_rate": 6.857902485379096e-07, "loss": 0.0051, "step": 448770 }, { "epoch": 4.794914258240291, "grad_norm": 0.3774777054786682, "learning_rate": 6.857746505029524e-07, "loss": 0.0051, "step": 448780 }, { "epoch": 4.79502110155457, "grad_norm": 0.03476810082793236, "learning_rate": 6.857590522582415e-07, "loss": 0.0016, "step": 448790 }, { "epoch": 4.79512794486885, "grad_norm": 0.019369803369045258, "learning_rate": 6.857434538037947e-07, "loss": 0.0007, "step": 448800 }, { "epoch": 4.795234788183129, "grad_norm": 0.003780352883040905, "learning_rate": 6.857278551396295e-07, "loss": 0.0004, "step": 448810 }, { "epoch": 4.795341631497409, "grad_norm": 0.011335290037095547, "learning_rate": 6.857122562657634e-07, "loss": 0.0024, "step": 448820 }, { "epoch": 4.795448474811689, "grad_norm": 0.0016505050007253885, "learning_rate": 6.856966571822144e-07, "loss": 0.0011, "step": 448830 }, { "epoch": 4.7955553181259685, "grad_norm": 0.004010838456451893, "learning_rate": 6.856810578889998e-07, "loss": 0.0083, "step": 448840 }, { "epoch": 4.795662161440248, "grad_norm": 0.6509959101676941, "learning_rate": 6.856654583861371e-07, "loss": 0.003, "step": 448850 }, { "epoch": 4.795769004754527, "grad_norm": 0.0023246563505381346, "learning_rate": 6.856498586736442e-07, "loss": 0.0073, "step": 448860 }, { "epoch": 4.795875848068807, "grad_norm": 5.160618782043457, "learning_rate": 6.856342587515387e-07, "loss": 0.0078, "step": 448870 }, { "epoch": 4.795982691383086, "grad_norm": 0.14844079315662384, "learning_rate": 6.856186586198379e-07, "loss": 0.0043, "step": 448880 }, { "epoch": 4.796089534697367, "grad_norm": 0.005126927513629198, "learning_rate": 6.856030582785598e-07, "loss": 0.0123, "step": 448890 }, { "epoch": 4.796196378011646, "grad_norm": 0.7732664942741394, "learning_rate": 6.855874577277218e-07, "loss": 0.0051, "step": 448900 }, { "epoch": 4.796303221325926, "grad_norm": 2.2444403171539307, "learning_rate": 6.855718569673414e-07, "loss": 0.0136, "step": 448910 }, { "epoch": 4.796410064640205, "grad_norm": 5.202573299407959, "learning_rate": 6.855562559974366e-07, "loss": 0.0336, "step": 448920 }, { "epoch": 4.7965169079544845, "grad_norm": 0.047411516308784485, "learning_rate": 6.855406548180246e-07, "loss": 0.0219, "step": 448930 }, { "epoch": 4.796623751268765, "grad_norm": 0.013672729954123497, "learning_rate": 6.855250534291233e-07, "loss": 0.0013, "step": 448940 }, { "epoch": 4.796730594583044, "grad_norm": 0.014748882502317429, "learning_rate": 6.855094518307501e-07, "loss": 0.012, "step": 448950 }, { "epoch": 4.796837437897324, "grad_norm": 0.08054235577583313, "learning_rate": 6.854938500229228e-07, "loss": 0.021, "step": 448960 }, { "epoch": 4.796944281211603, "grad_norm": 0.16288504004478455, "learning_rate": 6.85478248005659e-07, "loss": 0.0059, "step": 448970 }, { "epoch": 4.797051124525883, "grad_norm": 4.2668914794921875, "learning_rate": 6.854626457789761e-07, "loss": 0.0254, "step": 448980 }, { "epoch": 4.797157967840162, "grad_norm": 1.5519858598709106, "learning_rate": 6.854470433428921e-07, "loss": 0.006, "step": 448990 }, { "epoch": 4.797264811154442, "grad_norm": 0.0083747124299407, "learning_rate": 6.854314406974242e-07, "loss": 0.0369, "step": 449000 }, { "epoch": 4.797371654468722, "grad_norm": 3.371232032775879, "learning_rate": 6.854158378425903e-07, "loss": 0.0346, "step": 449010 }, { "epoch": 4.797478497783001, "grad_norm": 0.0897931307554245, "learning_rate": 6.854002347784078e-07, "loss": 0.0075, "step": 449020 }, { "epoch": 4.797585341097281, "grad_norm": 1.8472340106964111, "learning_rate": 6.853846315048947e-07, "loss": 0.011, "step": 449030 }, { "epoch": 4.79769218441156, "grad_norm": 0.1327666938304901, "learning_rate": 6.853690280220682e-07, "loss": 0.0204, "step": 449040 }, { "epoch": 4.79779902772584, "grad_norm": 1.4548516273498535, "learning_rate": 6.853534243299461e-07, "loss": 0.002, "step": 449050 }, { "epoch": 4.79790587104012, "grad_norm": 0.007653295528143644, "learning_rate": 6.853378204285459e-07, "loss": 0.0111, "step": 449060 }, { "epoch": 4.7980127143544, "grad_norm": 0.01989796943962574, "learning_rate": 6.853222163178855e-07, "loss": 0.0068, "step": 449070 }, { "epoch": 4.798119557668679, "grad_norm": 0.09895379841327667, "learning_rate": 6.853066119979821e-07, "loss": 0.0018, "step": 449080 }, { "epoch": 4.7982264009829585, "grad_norm": 0.004079905804246664, "learning_rate": 6.852910074688538e-07, "loss": 0.0091, "step": 449090 }, { "epoch": 4.798333244297238, "grad_norm": 0.017001423984766006, "learning_rate": 6.852754027305179e-07, "loss": 0.0117, "step": 449100 }, { "epoch": 4.798440087611517, "grad_norm": 0.05870566889643669, "learning_rate": 6.85259797782992e-07, "loss": 0.0001, "step": 449110 }, { "epoch": 4.798546930925797, "grad_norm": 0.05615859851241112, "learning_rate": 6.852441926262938e-07, "loss": 0.0113, "step": 449120 }, { "epoch": 4.798653774240077, "grad_norm": 0.028621073812246323, "learning_rate": 6.852285872604409e-07, "loss": 0.011, "step": 449130 }, { "epoch": 4.798760617554357, "grad_norm": 1.2796790599822998, "learning_rate": 6.85212981685451e-07, "loss": 0.0224, "step": 449140 }, { "epoch": 4.798867460868636, "grad_norm": 5.485055923461914, "learning_rate": 6.851973759013418e-07, "loss": 0.002, "step": 449150 }, { "epoch": 4.798974304182916, "grad_norm": 0.0075354985892772675, "learning_rate": 6.851817699081306e-07, "loss": 0.0043, "step": 449160 }, { "epoch": 4.799081147497195, "grad_norm": 0.5733178853988647, "learning_rate": 6.851661637058352e-07, "loss": 0.0167, "step": 449170 }, { "epoch": 4.799187990811475, "grad_norm": 0.17352549731731415, "learning_rate": 6.851505572944734e-07, "loss": 0.0025, "step": 449180 }, { "epoch": 4.799294834125755, "grad_norm": 0.003033071057870984, "learning_rate": 6.851349506740624e-07, "loss": 0.0026, "step": 449190 }, { "epoch": 4.799401677440034, "grad_norm": 7.994915008544922, "learning_rate": 6.851193438446204e-07, "loss": 0.0072, "step": 449200 }, { "epoch": 4.799508520754314, "grad_norm": 1.048168659210205, "learning_rate": 6.851037368061644e-07, "loss": 0.0202, "step": 449210 }, { "epoch": 4.799615364068593, "grad_norm": 0.0529203899204731, "learning_rate": 6.850881295587123e-07, "loss": 0.0027, "step": 449220 }, { "epoch": 4.799722207382873, "grad_norm": 2.2602624893188477, "learning_rate": 6.85072522102282e-07, "loss": 0.003, "step": 449230 }, { "epoch": 4.799829050697152, "grad_norm": 0.508711576461792, "learning_rate": 6.850569144368907e-07, "loss": 0.0106, "step": 449240 }, { "epoch": 4.7999358940114325, "grad_norm": 0.39677080512046814, "learning_rate": 6.850413065625561e-07, "loss": 0.0115, "step": 449250 }, { "epoch": 4.800042737325712, "grad_norm": 0.0036106084007769823, "learning_rate": 6.85025698479296e-07, "loss": 0.0003, "step": 449260 }, { "epoch": 4.800149580639991, "grad_norm": 0.012472853995859623, "learning_rate": 6.850100901871279e-07, "loss": 0.013, "step": 449270 }, { "epoch": 4.800256423954271, "grad_norm": 8.238590240478516, "learning_rate": 6.849944816860694e-07, "loss": 0.0171, "step": 449280 }, { "epoch": 4.80036326726855, "grad_norm": 3.840998888015747, "learning_rate": 6.84978872976138e-07, "loss": 0.0209, "step": 449290 }, { "epoch": 4.800470110582831, "grad_norm": 0.01040051132440567, "learning_rate": 6.849632640573518e-07, "loss": 0.0014, "step": 449300 }, { "epoch": 4.80057695389711, "grad_norm": 2.560753107070923, "learning_rate": 6.84947654929728e-07, "loss": 0.0084, "step": 449310 }, { "epoch": 4.80068379721139, "grad_norm": 0.4386770725250244, "learning_rate": 6.849320455932843e-07, "loss": 0.0034, "step": 449320 }, { "epoch": 4.800790640525669, "grad_norm": 0.9231902360916138, "learning_rate": 6.849164360480384e-07, "loss": 0.0022, "step": 449330 }, { "epoch": 4.8008974838399485, "grad_norm": 0.42474788427352905, "learning_rate": 6.849008262940079e-07, "loss": 0.0054, "step": 449340 }, { "epoch": 4.801004327154228, "grad_norm": 0.014131812378764153, "learning_rate": 6.848852163312106e-07, "loss": 0.001, "step": 449350 }, { "epoch": 4.801111170468507, "grad_norm": 0.10792843252420425, "learning_rate": 6.848696061596636e-07, "loss": 0.0012, "step": 449360 }, { "epoch": 4.801218013782788, "grad_norm": 0.10277849435806274, "learning_rate": 6.848539957793851e-07, "loss": 0.0029, "step": 449370 }, { "epoch": 4.801324857097067, "grad_norm": 0.39942270517349243, "learning_rate": 6.848383851903924e-07, "loss": 0.0001, "step": 449380 }, { "epoch": 4.801431700411347, "grad_norm": 0.007457965984940529, "learning_rate": 6.848227743927031e-07, "loss": 0.0067, "step": 449390 }, { "epoch": 4.801538543725626, "grad_norm": 0.010003991425037384, "learning_rate": 6.84807163386335e-07, "loss": 0.002, "step": 449400 }, { "epoch": 4.801645387039906, "grad_norm": 7.200416088104248, "learning_rate": 6.847915521713059e-07, "loss": 0.0074, "step": 449410 }, { "epoch": 4.801752230354186, "grad_norm": 0.0454980693757534, "learning_rate": 6.847759407476328e-07, "loss": 0.0063, "step": 449420 }, { "epoch": 4.801859073668465, "grad_norm": 12.95574951171875, "learning_rate": 6.847603291153341e-07, "loss": 0.0181, "step": 449430 }, { "epoch": 4.801965916982745, "grad_norm": 8.70814037322998, "learning_rate": 6.847447172744268e-07, "loss": 0.0116, "step": 449440 }, { "epoch": 4.802072760297024, "grad_norm": 0.6070240139961243, "learning_rate": 6.847291052249289e-07, "loss": 0.0084, "step": 449450 }, { "epoch": 4.802179603611304, "grad_norm": 0.3659602403640747, "learning_rate": 6.847134929668577e-07, "loss": 0.0072, "step": 449460 }, { "epoch": 4.802286446925583, "grad_norm": 0.004534163977950811, "learning_rate": 6.846978805002312e-07, "loss": 0.0009, "step": 449470 }, { "epoch": 4.802393290239864, "grad_norm": 0.040920190513134, "learning_rate": 6.846822678250668e-07, "loss": 0.0103, "step": 449480 }, { "epoch": 4.802500133554143, "grad_norm": 1.7427321672439575, "learning_rate": 6.846666549413822e-07, "loss": 0.0045, "step": 449490 }, { "epoch": 4.8026069768684225, "grad_norm": 0.0036636418662965298, "learning_rate": 6.846510418491949e-07, "loss": 0.0005, "step": 449500 }, { "epoch": 4.802713820182702, "grad_norm": 0.4243239462375641, "learning_rate": 6.846354285485228e-07, "loss": 0.0081, "step": 449510 }, { "epoch": 4.8028206634969814, "grad_norm": 0.01518159918487072, "learning_rate": 6.846198150393834e-07, "loss": 0.0009, "step": 449520 }, { "epoch": 4.802927506811261, "grad_norm": 0.00035423587542027235, "learning_rate": 6.846042013217942e-07, "loss": 0.0013, "step": 449530 }, { "epoch": 4.803034350125541, "grad_norm": 2.1250808238983154, "learning_rate": 6.845885873957729e-07, "loss": 0.003, "step": 449540 }, { "epoch": 4.803141193439821, "grad_norm": 0.06685716658830643, "learning_rate": 6.845729732613373e-07, "loss": 0.0002, "step": 449550 }, { "epoch": 4.8032480367541, "grad_norm": 0.004898483864963055, "learning_rate": 6.845573589185046e-07, "loss": 0.0098, "step": 449560 }, { "epoch": 4.80335488006838, "grad_norm": 0.3655153214931488, "learning_rate": 6.84541744367293e-07, "loss": 0.0081, "step": 449570 }, { "epoch": 4.803461723382659, "grad_norm": 0.1313677281141281, "learning_rate": 6.845261296077196e-07, "loss": 0.0242, "step": 449580 }, { "epoch": 4.8035685666969385, "grad_norm": 0.028352316468954086, "learning_rate": 6.845105146398025e-07, "loss": 0.0023, "step": 449590 }, { "epoch": 4.803675410011219, "grad_norm": 0.002728075720369816, "learning_rate": 6.844948994635589e-07, "loss": 0.0083, "step": 449600 }, { "epoch": 4.803782253325498, "grad_norm": 0.06376463174819946, "learning_rate": 6.844792840790067e-07, "loss": 0.0018, "step": 449610 }, { "epoch": 4.803889096639778, "grad_norm": 0.3984777629375458, "learning_rate": 6.844636684861634e-07, "loss": 0.0009, "step": 449620 }, { "epoch": 4.803995939954057, "grad_norm": 0.003001562785357237, "learning_rate": 6.844480526850469e-07, "loss": 0.0243, "step": 449630 }, { "epoch": 4.804102783268337, "grad_norm": 2.9772958755493164, "learning_rate": 6.844324366756745e-07, "loss": 0.0111, "step": 449640 }, { "epoch": 4.804209626582617, "grad_norm": 0.072374127805233, "learning_rate": 6.84416820458064e-07, "loss": 0.0011, "step": 449650 }, { "epoch": 4.8043164698968965, "grad_norm": 0.010989530943334103, "learning_rate": 6.844012040322329e-07, "loss": 0.0029, "step": 449660 }, { "epoch": 4.804423313211176, "grad_norm": 0.03689555451273918, "learning_rate": 6.843855873981991e-07, "loss": 0.0129, "step": 449670 }, { "epoch": 4.8045301565254555, "grad_norm": 0.03756541386246681, "learning_rate": 6.843699705559798e-07, "loss": 0.0041, "step": 449680 }, { "epoch": 4.804636999839735, "grad_norm": 0.038970015943050385, "learning_rate": 6.843543535055931e-07, "loss": 0.0004, "step": 449690 }, { "epoch": 4.804743843154014, "grad_norm": 2.531066417694092, "learning_rate": 6.843387362470563e-07, "loss": 0.0684, "step": 449700 }, { "epoch": 4.804850686468294, "grad_norm": 5.430006504058838, "learning_rate": 6.843231187803873e-07, "loss": 0.0345, "step": 449710 }, { "epoch": 4.804957529782574, "grad_norm": 0.402908593416214, "learning_rate": 6.843075011056035e-07, "loss": 0.0044, "step": 449720 }, { "epoch": 4.805064373096854, "grad_norm": 0.00899613369256258, "learning_rate": 6.842918832227225e-07, "loss": 0.0103, "step": 449730 }, { "epoch": 4.805171216411133, "grad_norm": 1.210863471031189, "learning_rate": 6.842762651317622e-07, "loss": 0.0056, "step": 449740 }, { "epoch": 4.8052780597254126, "grad_norm": 0.007706106640398502, "learning_rate": 6.842606468327401e-07, "loss": 0.002, "step": 449750 }, { "epoch": 4.805384903039692, "grad_norm": 0.014179438352584839, "learning_rate": 6.842450283256738e-07, "loss": 0.0034, "step": 449760 }, { "epoch": 4.805491746353972, "grad_norm": 0.009075041860342026, "learning_rate": 6.84229409610581e-07, "loss": 0.0032, "step": 449770 }, { "epoch": 4.805598589668252, "grad_norm": 1.015509843826294, "learning_rate": 6.842137906874791e-07, "loss": 0.0006, "step": 449780 }, { "epoch": 4.805705432982531, "grad_norm": 0.986436128616333, "learning_rate": 6.841981715563861e-07, "loss": 0.0281, "step": 449790 }, { "epoch": 4.805812276296811, "grad_norm": 3.362889528274536, "learning_rate": 6.841825522173195e-07, "loss": 0.0038, "step": 449800 }, { "epoch": 4.80591911961109, "grad_norm": 0.005555460229516029, "learning_rate": 6.841669326702968e-07, "loss": 0.0035, "step": 449810 }, { "epoch": 4.80602596292537, "grad_norm": 0.018082531169056892, "learning_rate": 6.841513129153357e-07, "loss": 0.0001, "step": 449820 }, { "epoch": 4.806132806239649, "grad_norm": 0.1571764498949051, "learning_rate": 6.84135692952454e-07, "loss": 0.011, "step": 449830 }, { "epoch": 4.8062396495539295, "grad_norm": 0.002880091778934002, "learning_rate": 6.841200727816691e-07, "loss": 0.0017, "step": 449840 }, { "epoch": 4.806346492868209, "grad_norm": 0.00541359419003129, "learning_rate": 6.841044524029988e-07, "loss": 0.0335, "step": 449850 }, { "epoch": 4.806453336182488, "grad_norm": 0.001892134197987616, "learning_rate": 6.840888318164606e-07, "loss": 0.0104, "step": 449860 }, { "epoch": 4.806560179496768, "grad_norm": 0.7629048228263855, "learning_rate": 6.840732110220723e-07, "loss": 0.0149, "step": 449870 }, { "epoch": 4.806667022811047, "grad_norm": 0.005349814426153898, "learning_rate": 6.840575900198513e-07, "loss": 0.0628, "step": 449880 }, { "epoch": 4.806773866125328, "grad_norm": 0.12878836691379547, "learning_rate": 6.840419688098156e-07, "loss": 0.0185, "step": 449890 }, { "epoch": 4.806880709439607, "grad_norm": 0.42248618602752686, "learning_rate": 6.840263473919825e-07, "loss": 0.0048, "step": 449900 }, { "epoch": 4.806987552753887, "grad_norm": 0.019934672862291336, "learning_rate": 6.840107257663698e-07, "loss": 0.0174, "step": 449910 }, { "epoch": 4.807094396068166, "grad_norm": 0.28493136167526245, "learning_rate": 6.83995103932995e-07, "loss": 0.0095, "step": 449920 }, { "epoch": 4.8072012393824455, "grad_norm": 0.04080633819103241, "learning_rate": 6.839794818918761e-07, "loss": 0.0069, "step": 449930 }, { "epoch": 4.807308082696725, "grad_norm": 0.04284396022558212, "learning_rate": 6.839638596430302e-07, "loss": 0.004, "step": 449940 }, { "epoch": 4.807414926011004, "grad_norm": 0.48704707622528076, "learning_rate": 6.839482371864754e-07, "loss": 0.0051, "step": 449950 }, { "epoch": 4.807521769325285, "grad_norm": 0.007049637380987406, "learning_rate": 6.839326145222289e-07, "loss": 0.0063, "step": 449960 }, { "epoch": 4.807628612639564, "grad_norm": 2.1889090538024902, "learning_rate": 6.83916991650309e-07, "loss": 0.0027, "step": 449970 }, { "epoch": 4.807735455953844, "grad_norm": 0.07962249219417572, "learning_rate": 6.839013685707326e-07, "loss": 0.0352, "step": 449980 }, { "epoch": 4.807842299268123, "grad_norm": 3.256587505340576, "learning_rate": 6.838857452835178e-07, "loss": 0.0284, "step": 449990 }, { "epoch": 4.807949142582403, "grad_norm": 0.0474931001663208, "learning_rate": 6.838701217886822e-07, "loss": 0.0003, "step": 450000 }, { "epoch": 4.808055985896683, "grad_norm": 0.02139362506568432, "learning_rate": 6.838544980862433e-07, "loss": 0.0006, "step": 450010 }, { "epoch": 4.808162829210962, "grad_norm": 0.4378754794597626, "learning_rate": 6.838388741762186e-07, "loss": 0.0246, "step": 450020 }, { "epoch": 4.808269672525242, "grad_norm": 0.20570120215415955, "learning_rate": 6.838232500586262e-07, "loss": 0.0021, "step": 450030 }, { "epoch": 4.808376515839521, "grad_norm": 0.25146907567977905, "learning_rate": 6.838076257334834e-07, "loss": 0.0292, "step": 450040 }, { "epoch": 4.808483359153801, "grad_norm": 0.45938268303871155, "learning_rate": 6.837920012008078e-07, "loss": 0.0125, "step": 450050 }, { "epoch": 4.80859020246808, "grad_norm": 6.347445011138916, "learning_rate": 6.837763764606173e-07, "loss": 0.0191, "step": 450060 }, { "epoch": 4.80869704578236, "grad_norm": 0.018489984795451164, "learning_rate": 6.837607515129294e-07, "loss": 0.0015, "step": 450070 }, { "epoch": 4.80880388909664, "grad_norm": 1.1236523389816284, "learning_rate": 6.837451263577616e-07, "loss": 0.0105, "step": 450080 }, { "epoch": 4.8089107324109195, "grad_norm": 0.08384031057357788, "learning_rate": 6.837295009951319e-07, "loss": 0.0036, "step": 450090 }, { "epoch": 4.809017575725199, "grad_norm": 0.1128469780087471, "learning_rate": 6.837138754250575e-07, "loss": 0.0071, "step": 450100 }, { "epoch": 4.809124419039478, "grad_norm": 0.2641923427581787, "learning_rate": 6.836982496475564e-07, "loss": 0.0007, "step": 450110 }, { "epoch": 4.809231262353758, "grad_norm": 0.07139883935451508, "learning_rate": 6.836826236626461e-07, "loss": 0.0034, "step": 450120 }, { "epoch": 4.809338105668038, "grad_norm": 0.009331864304840565, "learning_rate": 6.836669974703443e-07, "loss": 0.006, "step": 450130 }, { "epoch": 4.809444948982318, "grad_norm": 5.827654838562012, "learning_rate": 6.836513710706685e-07, "loss": 0.0109, "step": 450140 }, { "epoch": 4.809551792296597, "grad_norm": 0.007619227282702923, "learning_rate": 6.836357444636366e-07, "loss": 0.0725, "step": 450150 }, { "epoch": 4.809658635610877, "grad_norm": 2.6531360149383545, "learning_rate": 6.836201176492658e-07, "loss": 0.0336, "step": 450160 }, { "epoch": 4.809765478925156, "grad_norm": 0.9420948028564453, "learning_rate": 6.836044906275743e-07, "loss": 0.0141, "step": 450170 }, { "epoch": 4.8098723222394355, "grad_norm": 1.9525349140167236, "learning_rate": 6.835888633985793e-07, "loss": 0.0046, "step": 450180 }, { "epoch": 4.809979165553716, "grad_norm": 0.0011930440086871386, "learning_rate": 6.835732359622988e-07, "loss": 0.003, "step": 450190 }, { "epoch": 4.810086008867995, "grad_norm": 0.011228363960981369, "learning_rate": 6.835576083187501e-07, "loss": 0.0024, "step": 450200 }, { "epoch": 4.810192852182275, "grad_norm": 0.6899672746658325, "learning_rate": 6.83541980467951e-07, "loss": 0.0011, "step": 450210 }, { "epoch": 4.810299695496554, "grad_norm": 0.04054155573248863, "learning_rate": 6.835263524099192e-07, "loss": 0.0125, "step": 450220 }, { "epoch": 4.810406538810834, "grad_norm": 0.9348686933517456, "learning_rate": 6.835107241446722e-07, "loss": 0.0034, "step": 450230 }, { "epoch": 4.810513382125113, "grad_norm": 4.067798137664795, "learning_rate": 6.83495095672228e-07, "loss": 0.0139, "step": 450240 }, { "epoch": 4.8106202254393935, "grad_norm": 0.03154506906867027, "learning_rate": 6.834794669926038e-07, "loss": 0.0021, "step": 450250 }, { "epoch": 4.810727068753673, "grad_norm": 4.668880939483643, "learning_rate": 6.834638381058174e-07, "loss": 0.018, "step": 450260 }, { "epoch": 4.810833912067952, "grad_norm": 13.574012756347656, "learning_rate": 6.834482090118867e-07, "loss": 0.0164, "step": 450270 }, { "epoch": 4.810940755382232, "grad_norm": 0.0027599805034697056, "learning_rate": 6.834325797108289e-07, "loss": 0.0036, "step": 450280 }, { "epoch": 4.811047598696511, "grad_norm": 8.58021068572998, "learning_rate": 6.834169502026619e-07, "loss": 0.0145, "step": 450290 }, { "epoch": 4.811154442010791, "grad_norm": 11.930619239807129, "learning_rate": 6.834013204874034e-07, "loss": 0.0199, "step": 450300 }, { "epoch": 4.811261285325071, "grad_norm": 0.5986418724060059, "learning_rate": 6.833856905650709e-07, "loss": 0.012, "step": 450310 }, { "epoch": 4.811368128639351, "grad_norm": 5.275291919708252, "learning_rate": 6.833700604356822e-07, "loss": 0.0195, "step": 450320 }, { "epoch": 4.81147497195363, "grad_norm": 0.047642745077610016, "learning_rate": 6.833544300992548e-07, "loss": 0.0141, "step": 450330 }, { "epoch": 4.8115818152679095, "grad_norm": 0.02014339528977871, "learning_rate": 6.833387995558064e-07, "loss": 0.0108, "step": 450340 }, { "epoch": 4.811688658582189, "grad_norm": 0.06831954419612885, "learning_rate": 6.833231688053546e-07, "loss": 0.0151, "step": 450350 }, { "epoch": 4.811795501896469, "grad_norm": 0.13504113256931305, "learning_rate": 6.833075378479173e-07, "loss": 0.0003, "step": 450360 }, { "epoch": 4.811902345210749, "grad_norm": 0.8000100255012512, "learning_rate": 6.832919066835119e-07, "loss": 0.0064, "step": 450370 }, { "epoch": 4.812009188525028, "grad_norm": 0.1010480523109436, "learning_rate": 6.83276275312156e-07, "loss": 0.0025, "step": 450380 }, { "epoch": 4.812116031839308, "grad_norm": 0.017159024253487587, "learning_rate": 6.832606437338674e-07, "loss": 0.0084, "step": 450390 }, { "epoch": 4.812222875153587, "grad_norm": 1.6096725463867188, "learning_rate": 6.832450119486637e-07, "loss": 0.009, "step": 450400 }, { "epoch": 4.812329718467867, "grad_norm": 0.08647719770669937, "learning_rate": 6.832293799565626e-07, "loss": 0.0051, "step": 450410 }, { "epoch": 4.812436561782146, "grad_norm": 0.0035890855360776186, "learning_rate": 6.832137477575816e-07, "loss": 0.0098, "step": 450420 }, { "epoch": 4.812543405096426, "grad_norm": 0.03493354842066765, "learning_rate": 6.831981153517385e-07, "loss": 0.001, "step": 450430 }, { "epoch": 4.812650248410706, "grad_norm": 1.4771283864974976, "learning_rate": 6.83182482739051e-07, "loss": 0.0017, "step": 450440 }, { "epoch": 4.812757091724985, "grad_norm": 0.02706971950829029, "learning_rate": 6.831668499195364e-07, "loss": 0.0078, "step": 450450 }, { "epoch": 4.812863935039265, "grad_norm": 0.23534925282001495, "learning_rate": 6.831512168932129e-07, "loss": 0.0143, "step": 450460 }, { "epoch": 4.812970778353544, "grad_norm": 0.0023761901538819075, "learning_rate": 6.831355836600976e-07, "loss": 0.0072, "step": 450470 }, { "epoch": 4.813077621667825, "grad_norm": 0.03270510584115982, "learning_rate": 6.831199502202085e-07, "loss": 0.0041, "step": 450480 }, { "epoch": 4.813184464982104, "grad_norm": 3.671849489212036, "learning_rate": 6.831043165735632e-07, "loss": 0.0225, "step": 450490 }, { "epoch": 4.8132913082963835, "grad_norm": 0.025040097534656525, "learning_rate": 6.830886827201793e-07, "loss": 0.0014, "step": 450500 }, { "epoch": 4.813398151610663, "grad_norm": 0.2794150114059448, "learning_rate": 6.830730486600744e-07, "loss": 0.0002, "step": 450510 }, { "epoch": 4.813504994924942, "grad_norm": 0.014596354216337204, "learning_rate": 6.830574143932664e-07, "loss": 0.0005, "step": 450520 }, { "epoch": 4.813611838239222, "grad_norm": 1.4770797491073608, "learning_rate": 6.830417799197725e-07, "loss": 0.005, "step": 450530 }, { "epoch": 4.813718681553501, "grad_norm": 2.259105682373047, "learning_rate": 6.830261452396107e-07, "loss": 0.0024, "step": 450540 }, { "epoch": 4.813825524867782, "grad_norm": 0.0064817084930837154, "learning_rate": 6.830105103527985e-07, "loss": 0.0037, "step": 450550 }, { "epoch": 4.813932368182061, "grad_norm": 0.0038025809917598963, "learning_rate": 6.829948752593539e-07, "loss": 0.0151, "step": 450560 }, { "epoch": 4.814039211496341, "grad_norm": 0.008045804686844349, "learning_rate": 6.82979239959294e-07, "loss": 0.0054, "step": 450570 }, { "epoch": 4.81414605481062, "grad_norm": 3.5645980834960938, "learning_rate": 6.829636044526368e-07, "loss": 0.0345, "step": 450580 }, { "epoch": 4.8142528981248995, "grad_norm": 1.6167640686035156, "learning_rate": 6.829479687393998e-07, "loss": 0.0071, "step": 450590 }, { "epoch": 4.81435974143918, "grad_norm": 6.249877452850342, "learning_rate": 6.82932332819601e-07, "loss": 0.009, "step": 450600 }, { "epoch": 4.814466584753459, "grad_norm": 10.896808624267578, "learning_rate": 6.829166966932576e-07, "loss": 0.0042, "step": 450610 }, { "epoch": 4.814573428067739, "grad_norm": 0.07568533718585968, "learning_rate": 6.829010603603874e-07, "loss": 0.0042, "step": 450620 }, { "epoch": 4.814680271382018, "grad_norm": 0.028568070381879807, "learning_rate": 6.828854238210082e-07, "loss": 0.0018, "step": 450630 }, { "epoch": 4.814787114696298, "grad_norm": 0.0252806656062603, "learning_rate": 6.828697870751377e-07, "loss": 0.0121, "step": 450640 }, { "epoch": 4.814893958010577, "grad_norm": 8.156404495239258, "learning_rate": 6.828541501227932e-07, "loss": 0.0184, "step": 450650 }, { "epoch": 4.815000801324857, "grad_norm": 0.003725682618096471, "learning_rate": 6.828385129639925e-07, "loss": 0.0001, "step": 450660 }, { "epoch": 4.815107644639137, "grad_norm": 0.02643011324107647, "learning_rate": 6.828228755987533e-07, "loss": 0.0001, "step": 450670 }, { "epoch": 4.8152144879534164, "grad_norm": 0.015015814453363419, "learning_rate": 6.828072380270935e-07, "loss": 0.0001, "step": 450680 }, { "epoch": 4.815321331267696, "grad_norm": 4.364053249359131, "learning_rate": 6.827916002490304e-07, "loss": 0.0087, "step": 450690 }, { "epoch": 4.815428174581975, "grad_norm": 0.013769772835075855, "learning_rate": 6.827759622645817e-07, "loss": 0.0047, "step": 450700 }, { "epoch": 4.815535017896255, "grad_norm": 0.030505573377013206, "learning_rate": 6.827603240737654e-07, "loss": 0.0112, "step": 450710 }, { "epoch": 4.815641861210535, "grad_norm": 0.9400148391723633, "learning_rate": 6.827446856765988e-07, "loss": 0.0045, "step": 450720 }, { "epoch": 4.815748704524815, "grad_norm": 0.0024622841738164425, "learning_rate": 6.827290470730995e-07, "loss": 0.0028, "step": 450730 }, { "epoch": 4.815855547839094, "grad_norm": 4.817695617675781, "learning_rate": 6.827134082632855e-07, "loss": 0.0232, "step": 450740 }, { "epoch": 4.8159623911533735, "grad_norm": 0.035441115498542786, "learning_rate": 6.826977692471743e-07, "loss": 0.0032, "step": 450750 }, { "epoch": 4.816069234467653, "grad_norm": 0.011554372496902943, "learning_rate": 6.826821300247833e-07, "loss": 0.0007, "step": 450760 }, { "epoch": 4.8161760777819325, "grad_norm": 2.5697169303894043, "learning_rate": 6.826664905961306e-07, "loss": 0.0192, "step": 450770 }, { "epoch": 4.816282921096212, "grad_norm": 0.026151033118367195, "learning_rate": 6.826508509612336e-07, "loss": 0.0077, "step": 450780 }, { "epoch": 4.816389764410492, "grad_norm": 0.16810192167758942, "learning_rate": 6.826352111201098e-07, "loss": 0.0009, "step": 450790 }, { "epoch": 4.816496607724772, "grad_norm": 0.15845490992069244, "learning_rate": 6.826195710727774e-07, "loss": 0.0021, "step": 450800 }, { "epoch": 4.816603451039051, "grad_norm": 0.05409720167517662, "learning_rate": 6.826039308192534e-07, "loss": 0.0019, "step": 450810 }, { "epoch": 4.816710294353331, "grad_norm": 0.708548903465271, "learning_rate": 6.825882903595559e-07, "loss": 0.0033, "step": 450820 }, { "epoch": 4.81681713766761, "grad_norm": 0.32693177461624146, "learning_rate": 6.825726496937025e-07, "loss": 0.0137, "step": 450830 }, { "epoch": 4.8169239809818905, "grad_norm": 0.0011814865283668041, "learning_rate": 6.825570088217107e-07, "loss": 0.0013, "step": 450840 }, { "epoch": 4.81703082429617, "grad_norm": 0.008575922809541225, "learning_rate": 6.825413677435984e-07, "loss": 0.0228, "step": 450850 }, { "epoch": 4.817137667610449, "grad_norm": 0.020282164216041565, "learning_rate": 6.82525726459383e-07, "loss": 0.0037, "step": 450860 }, { "epoch": 4.817244510924729, "grad_norm": 0.005944578908383846, "learning_rate": 6.825100849690823e-07, "loss": 0.002, "step": 450870 }, { "epoch": 4.817351354239008, "grad_norm": 0.016389528289437294, "learning_rate": 6.82494443272714e-07, "loss": 0.0076, "step": 450880 }, { "epoch": 4.817458197553288, "grad_norm": 9.131840705871582, "learning_rate": 6.824788013702957e-07, "loss": 0.0149, "step": 450890 }, { "epoch": 4.817565040867568, "grad_norm": 0.09387356042861938, "learning_rate": 6.82463159261845e-07, "loss": 0.0118, "step": 450900 }, { "epoch": 4.8176718841818476, "grad_norm": 0.0053927237167954445, "learning_rate": 6.824475169473796e-07, "loss": 0.0008, "step": 450910 }, { "epoch": 4.817778727496127, "grad_norm": 0.029177522286772728, "learning_rate": 6.824318744269173e-07, "loss": 0.0003, "step": 450920 }, { "epoch": 4.8178855708104065, "grad_norm": 0.12908600270748138, "learning_rate": 6.824162317004754e-07, "loss": 0.0029, "step": 450930 }, { "epoch": 4.817992414124686, "grad_norm": 0.5627428889274597, "learning_rate": 6.824005887680722e-07, "loss": 0.004, "step": 450940 }, { "epoch": 4.818099257438965, "grad_norm": 0.004080065060406923, "learning_rate": 6.823849456297247e-07, "loss": 0.0096, "step": 450950 }, { "epoch": 4.818206100753246, "grad_norm": 0.1901710331439972, "learning_rate": 6.823693022854509e-07, "loss": 0.0383, "step": 450960 }, { "epoch": 4.818312944067525, "grad_norm": 0.009028417058289051, "learning_rate": 6.823536587352683e-07, "loss": 0.0067, "step": 450970 }, { "epoch": 4.818419787381805, "grad_norm": 8.060002326965332, "learning_rate": 6.823380149791948e-07, "loss": 0.0176, "step": 450980 }, { "epoch": 4.818526630696084, "grad_norm": 0.0016293198568746448, "learning_rate": 6.823223710172478e-07, "loss": 0.0048, "step": 450990 }, { "epoch": 4.818633474010364, "grad_norm": 0.0011924796272069216, "learning_rate": 6.823067268494451e-07, "loss": 0.0009, "step": 451000 }, { "epoch": 4.818740317324643, "grad_norm": 0.004066399298608303, "learning_rate": 6.822910824758045e-07, "loss": 0.0001, "step": 451010 }, { "epoch": 4.818847160638923, "grad_norm": 1.9180505275726318, "learning_rate": 6.822754378963434e-07, "loss": 0.0022, "step": 451020 }, { "epoch": 4.818954003953203, "grad_norm": 0.048304542899131775, "learning_rate": 6.822597931110795e-07, "loss": 0.0134, "step": 451030 }, { "epoch": 4.819060847267482, "grad_norm": 0.008000748232007027, "learning_rate": 6.822441481200307e-07, "loss": 0.04, "step": 451040 }, { "epoch": 4.819167690581762, "grad_norm": 1.0531915426254272, "learning_rate": 6.822285029232142e-07, "loss": 0.0048, "step": 451050 }, { "epoch": 4.819274533896041, "grad_norm": 0.024264369159936905, "learning_rate": 6.822128575206482e-07, "loss": 0.0027, "step": 451060 }, { "epoch": 4.819381377210322, "grad_norm": 3.1922216415405273, "learning_rate": 6.821972119123501e-07, "loss": 0.0534, "step": 451070 }, { "epoch": 4.819488220524601, "grad_norm": 0.17048537731170654, "learning_rate": 6.821815660983376e-07, "loss": 0.0071, "step": 451080 }, { "epoch": 4.8195950638388805, "grad_norm": 0.022631250321865082, "learning_rate": 6.821659200786285e-07, "loss": 0.0076, "step": 451090 }, { "epoch": 4.81970190715316, "grad_norm": 0.5037474036216736, "learning_rate": 6.8215027385324e-07, "loss": 0.001, "step": 451100 }, { "epoch": 4.819808750467439, "grad_norm": 0.002566776005551219, "learning_rate": 6.821346274221904e-07, "loss": 0.0013, "step": 451110 }, { "epoch": 4.819915593781719, "grad_norm": 6.336102485656738, "learning_rate": 6.82118980785497e-07, "loss": 0.0066, "step": 451120 }, { "epoch": 4.820022437095998, "grad_norm": 0.043270647525787354, "learning_rate": 6.821033339431775e-07, "loss": 0.0109, "step": 451130 }, { "epoch": 4.820129280410279, "grad_norm": 0.09793104976415634, "learning_rate": 6.820876868952495e-07, "loss": 0.002, "step": 451140 }, { "epoch": 4.820236123724558, "grad_norm": 0.8641296029090881, "learning_rate": 6.820720396417309e-07, "loss": 0.0066, "step": 451150 }, { "epoch": 4.820342967038838, "grad_norm": 2.7561724185943604, "learning_rate": 6.82056392182639e-07, "loss": 0.0019, "step": 451160 }, { "epoch": 4.820449810353117, "grad_norm": 0.003996447194367647, "learning_rate": 6.820407445179919e-07, "loss": 0.0039, "step": 451170 }, { "epoch": 4.8205566536673965, "grad_norm": 0.0009723750408738852, "learning_rate": 6.82025096647807e-07, "loss": 0.0005, "step": 451180 }, { "epoch": 4.820663496981677, "grad_norm": 3.4015655517578125, "learning_rate": 6.82009448572102e-07, "loss": 0.0173, "step": 451190 }, { "epoch": 4.820770340295956, "grad_norm": 0.019791096448898315, "learning_rate": 6.819938002908945e-07, "loss": 0.0025, "step": 451200 }, { "epoch": 4.820877183610236, "grad_norm": 4.205208778381348, "learning_rate": 6.819781518042024e-07, "loss": 0.0058, "step": 451210 }, { "epoch": 4.820984026924515, "grad_norm": 14.731470108032227, "learning_rate": 6.819625031120432e-07, "loss": 0.0175, "step": 451220 }, { "epoch": 4.821090870238795, "grad_norm": 0.18091903626918793, "learning_rate": 6.819468542144347e-07, "loss": 0.0046, "step": 451230 }, { "epoch": 4.821197713553074, "grad_norm": 1.4446715116500854, "learning_rate": 6.819312051113944e-07, "loss": 0.0085, "step": 451240 }, { "epoch": 4.821304556867354, "grad_norm": 0.002496112138032913, "learning_rate": 6.819155558029399e-07, "loss": 0.0039, "step": 451250 }, { "epoch": 4.821411400181634, "grad_norm": 1.8225386142730713, "learning_rate": 6.818999062890892e-07, "loss": 0.0162, "step": 451260 }, { "epoch": 4.821518243495913, "grad_norm": 0.0038675216492265463, "learning_rate": 6.818842565698597e-07, "loss": 0.0042, "step": 451270 }, { "epoch": 4.821625086810193, "grad_norm": 0.07429052889347076, "learning_rate": 6.818686066452691e-07, "loss": 0.0244, "step": 451280 }, { "epoch": 4.821731930124472, "grad_norm": 3.7233633995056152, "learning_rate": 6.818529565153353e-07, "loss": 0.0172, "step": 451290 }, { "epoch": 4.821838773438752, "grad_norm": 1.6668356657028198, "learning_rate": 6.818373061800755e-07, "loss": 0.0051, "step": 451300 }, { "epoch": 4.821945616753032, "grad_norm": 0.01676018163561821, "learning_rate": 6.818216556395079e-07, "loss": 0.002, "step": 451310 }, { "epoch": 4.822052460067312, "grad_norm": 0.13607238233089447, "learning_rate": 6.818060048936499e-07, "loss": 0.0017, "step": 451320 }, { "epoch": 4.822159303381591, "grad_norm": 0.4749722182750702, "learning_rate": 6.81790353942519e-07, "loss": 0.0042, "step": 451330 }, { "epoch": 4.8222661466958705, "grad_norm": 3.2876245975494385, "learning_rate": 6.817747027861334e-07, "loss": 0.0074, "step": 451340 }, { "epoch": 4.82237299001015, "grad_norm": 1.386565089225769, "learning_rate": 6.817590514245101e-07, "loss": 0.0084, "step": 451350 }, { "epoch": 4.822479833324429, "grad_norm": 0.08734087646007538, "learning_rate": 6.817433998576674e-07, "loss": 0.0034, "step": 451360 }, { "epoch": 4.822586676638709, "grad_norm": 0.010700355283915997, "learning_rate": 6.817277480856225e-07, "loss": 0.008, "step": 451370 }, { "epoch": 4.822693519952989, "grad_norm": 0.538973867893219, "learning_rate": 6.817120961083933e-07, "loss": 0.0746, "step": 451380 }, { "epoch": 4.822800363267269, "grad_norm": 0.6977462768554688, "learning_rate": 6.816964439259975e-07, "loss": 0.003, "step": 451390 }, { "epoch": 4.822907206581548, "grad_norm": 0.029412100091576576, "learning_rate": 6.816807915384526e-07, "loss": 0.005, "step": 451400 }, { "epoch": 4.823014049895828, "grad_norm": 17.594663619995117, "learning_rate": 6.816651389457765e-07, "loss": 0.0202, "step": 451410 }, { "epoch": 4.823120893210107, "grad_norm": 0.011432337574660778, "learning_rate": 6.816494861479866e-07, "loss": 0.0323, "step": 451420 }, { "epoch": 4.823227736524387, "grad_norm": 0.012910774908959866, "learning_rate": 6.816338331451009e-07, "loss": 0.0029, "step": 451430 }, { "epoch": 4.823334579838667, "grad_norm": 0.057036370038986206, "learning_rate": 6.816181799371368e-07, "loss": 0.0063, "step": 451440 }, { "epoch": 4.823441423152946, "grad_norm": 5.146211624145508, "learning_rate": 6.816025265241119e-07, "loss": 0.0088, "step": 451450 }, { "epoch": 4.823548266467226, "grad_norm": 0.13007597625255585, "learning_rate": 6.815868729060442e-07, "loss": 0.008, "step": 451460 }, { "epoch": 4.823655109781505, "grad_norm": 0.08190234750509262, "learning_rate": 6.815712190829512e-07, "loss": 0.0126, "step": 451470 }, { "epoch": 4.823761953095785, "grad_norm": 0.002966641914099455, "learning_rate": 6.815555650548506e-07, "loss": 0.004, "step": 451480 }, { "epoch": 4.823868796410064, "grad_norm": 0.0770798772573471, "learning_rate": 6.8153991082176e-07, "loss": 0.0089, "step": 451490 }, { "epoch": 4.8239756397243445, "grad_norm": 0.6343833804130554, "learning_rate": 6.815242563836972e-07, "loss": 0.0024, "step": 451500 }, { "epoch": 4.824082483038624, "grad_norm": 0.006604252383112907, "learning_rate": 6.815086017406797e-07, "loss": 0.0102, "step": 451510 }, { "epoch": 4.824189326352903, "grad_norm": 1.7148826122283936, "learning_rate": 6.814929468927255e-07, "loss": 0.0357, "step": 451520 }, { "epoch": 4.824296169667183, "grad_norm": 3.0306317806243896, "learning_rate": 6.81477291839852e-07, "loss": 0.0155, "step": 451530 }, { "epoch": 4.824403012981462, "grad_norm": 0.09064634889364243, "learning_rate": 6.814616365820769e-07, "loss": 0.0067, "step": 451540 }, { "epoch": 4.824509856295743, "grad_norm": 0.0799742192029953, "learning_rate": 6.814459811194179e-07, "loss": 0.0124, "step": 451550 }, { "epoch": 4.824616699610022, "grad_norm": 0.001910324557684362, "learning_rate": 6.814303254518927e-07, "loss": 0.0123, "step": 451560 }, { "epoch": 4.824723542924302, "grad_norm": 0.04227592051029205, "learning_rate": 6.81414669579519e-07, "loss": 0.0003, "step": 451570 }, { "epoch": 4.824830386238581, "grad_norm": 0.004483697470277548, "learning_rate": 6.813990135023144e-07, "loss": 0.0018, "step": 451580 }, { "epoch": 4.8249372295528605, "grad_norm": 0.127730593085289, "learning_rate": 6.813833572202966e-07, "loss": 0.0034, "step": 451590 }, { "epoch": 4.82504407286714, "grad_norm": 0.3609738349914551, "learning_rate": 6.813677007334834e-07, "loss": 0.0003, "step": 451600 }, { "epoch": 4.82515091618142, "grad_norm": 0.028251217678189278, "learning_rate": 6.813520440418924e-07, "loss": 0.0029, "step": 451610 }, { "epoch": 4.8252577594957, "grad_norm": 0.06864379346370697, "learning_rate": 6.813363871455411e-07, "loss": 0.0018, "step": 451620 }, { "epoch": 4.825364602809979, "grad_norm": 7.150421619415283, "learning_rate": 6.813207300444475e-07, "loss": 0.0051, "step": 451630 }, { "epoch": 4.825471446124259, "grad_norm": 0.013814816251397133, "learning_rate": 6.81305072738629e-07, "loss": 0.0067, "step": 451640 }, { "epoch": 4.825578289438538, "grad_norm": 2.5031282901763916, "learning_rate": 6.812894152281033e-07, "loss": 0.0018, "step": 451650 }, { "epoch": 4.825685132752818, "grad_norm": 0.14728239178657532, "learning_rate": 6.812737575128884e-07, "loss": 0.0208, "step": 451660 }, { "epoch": 4.825791976067098, "grad_norm": 0.00037238854565657675, "learning_rate": 6.812580995930015e-07, "loss": 0.0072, "step": 451670 }, { "epoch": 4.825898819381377, "grad_norm": 1.651157259941101, "learning_rate": 6.812424414684607e-07, "loss": 0.0173, "step": 451680 }, { "epoch": 4.826005662695657, "grad_norm": 1.6545391082763672, "learning_rate": 6.812267831392836e-07, "loss": 0.0163, "step": 451690 }, { "epoch": 4.826112506009936, "grad_norm": 0.0030027851462364197, "learning_rate": 6.812111246054875e-07, "loss": 0.0023, "step": 451700 }, { "epoch": 4.826219349324216, "grad_norm": 0.004977333825081587, "learning_rate": 6.811954658670907e-07, "loss": 0.006, "step": 451710 }, { "epoch": 4.826326192638495, "grad_norm": 0.03824830427765846, "learning_rate": 6.811798069241102e-07, "loss": 0.0087, "step": 451720 }, { "epoch": 4.826433035952776, "grad_norm": 0.013759803958237171, "learning_rate": 6.811641477765641e-07, "loss": 0.0198, "step": 451730 }, { "epoch": 4.826539879267055, "grad_norm": 0.006226883735507727, "learning_rate": 6.811484884244703e-07, "loss": 0.0114, "step": 451740 }, { "epoch": 4.8266467225813345, "grad_norm": 0.011914852075278759, "learning_rate": 6.811328288678459e-07, "loss": 0.0025, "step": 451750 }, { "epoch": 4.826753565895614, "grad_norm": 0.008771104738116264, "learning_rate": 6.81117169106709e-07, "loss": 0.0203, "step": 451760 }, { "epoch": 4.8268604092098935, "grad_norm": 0.009368211030960083, "learning_rate": 6.81101509141077e-07, "loss": 0.0002, "step": 451770 }, { "epoch": 4.826967252524173, "grad_norm": 0.002864540321752429, "learning_rate": 6.810858489709679e-07, "loss": 0.0073, "step": 451780 }, { "epoch": 4.827074095838453, "grad_norm": 3.581496000289917, "learning_rate": 6.81070188596399e-07, "loss": 0.0068, "step": 451790 }, { "epoch": 4.827180939152733, "grad_norm": 0.06690347194671631, "learning_rate": 6.810545280173883e-07, "loss": 0.0115, "step": 451800 }, { "epoch": 4.827287782467012, "grad_norm": 4.361405372619629, "learning_rate": 6.810388672339533e-07, "loss": 0.0117, "step": 451810 }, { "epoch": 4.827394625781292, "grad_norm": 0.0854225605726242, "learning_rate": 6.810232062461119e-07, "loss": 0.023, "step": 451820 }, { "epoch": 4.827501469095571, "grad_norm": 0.3259277939796448, "learning_rate": 6.810075450538814e-07, "loss": 0.022, "step": 451830 }, { "epoch": 4.8276083124098506, "grad_norm": 0.03257172554731369, "learning_rate": 6.809918836572801e-07, "loss": 0.0294, "step": 451840 }, { "epoch": 4.827715155724131, "grad_norm": 4.164308547973633, "learning_rate": 6.80976222056325e-07, "loss": 0.007, "step": 451850 }, { "epoch": 4.82782199903841, "grad_norm": 0.03203744441270828, "learning_rate": 6.809605602510342e-07, "loss": 0.0202, "step": 451860 }, { "epoch": 4.82792884235269, "grad_norm": 4.185431003570557, "learning_rate": 6.809448982414252e-07, "loss": 0.0056, "step": 451870 }, { "epoch": 4.828035685666969, "grad_norm": 3.915834426879883, "learning_rate": 6.809292360275158e-07, "loss": 0.0087, "step": 451880 }, { "epoch": 4.828142528981249, "grad_norm": 0.007271388545632362, "learning_rate": 6.809135736093237e-07, "loss": 0.0124, "step": 451890 }, { "epoch": 4.828249372295529, "grad_norm": 6.190769672393799, "learning_rate": 6.808979109868664e-07, "loss": 0.0087, "step": 451900 }, { "epoch": 4.8283562156098085, "grad_norm": 0.0029387956019490957, "learning_rate": 6.808822481601619e-07, "loss": 0.0103, "step": 451910 }, { "epoch": 4.828463058924088, "grad_norm": 1.1551345586776733, "learning_rate": 6.808665851292274e-07, "loss": 0.021, "step": 451920 }, { "epoch": 4.8285699022383675, "grad_norm": 4.760500907897949, "learning_rate": 6.808509218940811e-07, "loss": 0.0139, "step": 451930 }, { "epoch": 4.828676745552647, "grad_norm": 0.08109692484140396, "learning_rate": 6.808352584547404e-07, "loss": 0.0002, "step": 451940 }, { "epoch": 4.828783588866926, "grad_norm": 0.027730777859687805, "learning_rate": 6.808195948112231e-07, "loss": 0.0208, "step": 451950 }, { "epoch": 4.828890432181206, "grad_norm": 0.05566409230232239, "learning_rate": 6.808039309635467e-07, "loss": 0.0001, "step": 451960 }, { "epoch": 4.828997275495486, "grad_norm": 0.22439512610435486, "learning_rate": 6.807882669117291e-07, "loss": 0.0411, "step": 451970 }, { "epoch": 4.829104118809766, "grad_norm": 0.19402752816677094, "learning_rate": 6.80772602655788e-07, "loss": 0.0012, "step": 451980 }, { "epoch": 4.829210962124045, "grad_norm": 0.0027473997324705124, "learning_rate": 6.807569381957407e-07, "loss": 0.0028, "step": 451990 }, { "epoch": 4.829317805438325, "grad_norm": 10.57995319366455, "learning_rate": 6.807412735316055e-07, "loss": 0.0101, "step": 452000 }, { "epoch": 4.829424648752604, "grad_norm": 0.05518585816025734, "learning_rate": 6.807256086633995e-07, "loss": 0.0138, "step": 452010 }, { "epoch": 4.829531492066884, "grad_norm": 0.11941543221473694, "learning_rate": 6.807099435911408e-07, "loss": 0.0072, "step": 452020 }, { "epoch": 4.829638335381164, "grad_norm": 0.4308960437774658, "learning_rate": 6.806942783148469e-07, "loss": 0.0095, "step": 452030 }, { "epoch": 4.829745178695443, "grad_norm": 3.402029037475586, "learning_rate": 6.806786128345354e-07, "loss": 0.0127, "step": 452040 }, { "epoch": 4.829852022009723, "grad_norm": 3.276219129562378, "learning_rate": 6.806629471502243e-07, "loss": 0.0171, "step": 452050 }, { "epoch": 4.829958865324002, "grad_norm": 0.01866920478641987, "learning_rate": 6.80647281261931e-07, "loss": 0.0148, "step": 452060 }, { "epoch": 4.830065708638282, "grad_norm": 0.04901289939880371, "learning_rate": 6.806316151696734e-07, "loss": 0.0025, "step": 452070 }, { "epoch": 4.830172551952561, "grad_norm": 0.10682094097137451, "learning_rate": 6.80615948873469e-07, "loss": 0.016, "step": 452080 }, { "epoch": 4.8302793952668415, "grad_norm": 0.05216233804821968, "learning_rate": 6.806002823733356e-07, "loss": 0.0027, "step": 452090 }, { "epoch": 4.830386238581121, "grad_norm": 0.0029330148827284575, "learning_rate": 6.805846156692907e-07, "loss": 0.0124, "step": 452100 }, { "epoch": 4.8304930818954, "grad_norm": 3.7155561447143555, "learning_rate": 6.805689487613522e-07, "loss": 0.0042, "step": 452110 }, { "epoch": 4.83059992520968, "grad_norm": 0.0016469690017402172, "learning_rate": 6.805532816495377e-07, "loss": 0.0049, "step": 452120 }, { "epoch": 4.830706768523959, "grad_norm": 0.007223015651106834, "learning_rate": 6.80537614333865e-07, "loss": 0.0035, "step": 452130 }, { "epoch": 4.83081361183824, "grad_norm": 0.0008152068476192653, "learning_rate": 6.805219468143517e-07, "loss": 0.0001, "step": 452140 }, { "epoch": 4.830920455152519, "grad_norm": 0.006841883063316345, "learning_rate": 6.805062790910156e-07, "loss": 0.0088, "step": 452150 }, { "epoch": 4.831027298466799, "grad_norm": 2.441542148590088, "learning_rate": 6.80490611163874e-07, "loss": 0.001, "step": 452160 }, { "epoch": 4.831134141781078, "grad_norm": 0.007328403182327747, "learning_rate": 6.804749430329451e-07, "loss": 0.0491, "step": 452170 }, { "epoch": 4.8312409850953575, "grad_norm": 1.743182897567749, "learning_rate": 6.804592746982464e-07, "loss": 0.0144, "step": 452180 }, { "epoch": 4.831347828409637, "grad_norm": 0.5262417793273926, "learning_rate": 6.804436061597954e-07, "loss": 0.006, "step": 452190 }, { "epoch": 4.831454671723916, "grad_norm": 0.005445827730000019, "learning_rate": 6.804279374176101e-07, "loss": 0.0004, "step": 452200 }, { "epoch": 4.831561515038197, "grad_norm": 0.007909432053565979, "learning_rate": 6.80412268471708e-07, "loss": 0.0045, "step": 452210 }, { "epoch": 4.831668358352476, "grad_norm": 0.0023496777284890413, "learning_rate": 6.803965993221066e-07, "loss": 0.0009, "step": 452220 }, { "epoch": 4.831775201666756, "grad_norm": 5.7767534255981445, "learning_rate": 6.803809299688242e-07, "loss": 0.0066, "step": 452230 }, { "epoch": 4.831882044981035, "grad_norm": 0.6516345143318176, "learning_rate": 6.803652604118778e-07, "loss": 0.0005, "step": 452240 }, { "epoch": 4.831988888295315, "grad_norm": 0.00819482747465372, "learning_rate": 6.803495906512856e-07, "loss": 0.0041, "step": 452250 }, { "epoch": 4.832095731609595, "grad_norm": 19.52625846862793, "learning_rate": 6.80333920687065e-07, "loss": 0.0664, "step": 452260 }, { "epoch": 4.832202574923874, "grad_norm": 0.0024347021244466305, "learning_rate": 6.803182505192338e-07, "loss": 0.0196, "step": 452270 }, { "epoch": 4.832309418238154, "grad_norm": 7.726663589477539, "learning_rate": 6.803025801478098e-07, "loss": 0.0128, "step": 452280 }, { "epoch": 4.832416261552433, "grad_norm": 0.0023258011788129807, "learning_rate": 6.802869095728105e-07, "loss": 0.0026, "step": 452290 }, { "epoch": 4.832523104866713, "grad_norm": 0.6568769216537476, "learning_rate": 6.802712387942537e-07, "loss": 0.0206, "step": 452300 }, { "epoch": 4.832629948180992, "grad_norm": 0.18404676020145416, "learning_rate": 6.80255567812157e-07, "loss": 0.0002, "step": 452310 }, { "epoch": 4.832736791495272, "grad_norm": 0.02446795627474785, "learning_rate": 6.802398966265384e-07, "loss": 0.0042, "step": 452320 }, { "epoch": 4.832843634809552, "grad_norm": 0.09081508219242096, "learning_rate": 6.80224225237415e-07, "loss": 0.017, "step": 452330 }, { "epoch": 4.8329504781238315, "grad_norm": 0.014776176773011684, "learning_rate": 6.802085536448051e-07, "loss": 0.0064, "step": 452340 }, { "epoch": 4.833057321438111, "grad_norm": 0.0032274750992655754, "learning_rate": 6.80192881848726e-07, "loss": 0.0046, "step": 452350 }, { "epoch": 4.83316416475239, "grad_norm": 0.012959122657775879, "learning_rate": 6.801772098491956e-07, "loss": 0.0283, "step": 452360 }, { "epoch": 4.83327100806667, "grad_norm": 0.6811327338218689, "learning_rate": 6.801615376462315e-07, "loss": 0.0154, "step": 452370 }, { "epoch": 4.83337785138095, "grad_norm": 0.030593398958444595, "learning_rate": 6.801458652398516e-07, "loss": 0.002, "step": 452380 }, { "epoch": 4.83348469469523, "grad_norm": 0.32750019431114197, "learning_rate": 6.801301926300731e-07, "loss": 0.0148, "step": 452390 }, { "epoch": 4.833591538009509, "grad_norm": 7.0860185623168945, "learning_rate": 6.801145198169143e-07, "loss": 0.0442, "step": 452400 }, { "epoch": 4.833698381323789, "grad_norm": 0.006271854508668184, "learning_rate": 6.800988468003924e-07, "loss": 0.0265, "step": 452410 }, { "epoch": 4.833805224638068, "grad_norm": 0.006077637895941734, "learning_rate": 6.800831735805255e-07, "loss": 0.0008, "step": 452420 }, { "epoch": 4.8339120679523475, "grad_norm": 0.009203108958899975, "learning_rate": 6.800675001573309e-07, "loss": 0.0066, "step": 452430 }, { "epoch": 4.834018911266628, "grad_norm": 0.007056703791022301, "learning_rate": 6.800518265308266e-07, "loss": 0.0011, "step": 452440 }, { "epoch": 4.834125754580907, "grad_norm": 0.012373614124953747, "learning_rate": 6.800361527010302e-07, "loss": 0.0057, "step": 452450 }, { "epoch": 4.834232597895187, "grad_norm": 0.005897056311368942, "learning_rate": 6.800204786679594e-07, "loss": 0.0038, "step": 452460 }, { "epoch": 4.834339441209466, "grad_norm": 1.0379422903060913, "learning_rate": 6.800048044316319e-07, "loss": 0.0287, "step": 452470 }, { "epoch": 4.834446284523746, "grad_norm": 1.251213788986206, "learning_rate": 6.799891299920653e-07, "loss": 0.0164, "step": 452480 }, { "epoch": 4.834553127838025, "grad_norm": 0.005165993236005306, "learning_rate": 6.799734553492776e-07, "loss": 0.0073, "step": 452490 }, { "epoch": 4.8346599711523055, "grad_norm": 0.0013107014819979668, "learning_rate": 6.799577805032861e-07, "loss": 0.0, "step": 452500 }, { "epoch": 4.834766814466585, "grad_norm": 0.0017159732524305582, "learning_rate": 6.799421054541088e-07, "loss": 0.0226, "step": 452510 }, { "epoch": 4.834873657780864, "grad_norm": 11.278681755065918, "learning_rate": 6.799264302017632e-07, "loss": 0.032, "step": 452520 }, { "epoch": 4.834980501095144, "grad_norm": 0.10285351425409317, "learning_rate": 6.799107547462671e-07, "loss": 0.0131, "step": 452530 }, { "epoch": 4.835087344409423, "grad_norm": 3.0741477012634277, "learning_rate": 6.798950790876382e-07, "loss": 0.0013, "step": 452540 }, { "epoch": 4.835194187723703, "grad_norm": 4.872648239135742, "learning_rate": 6.798794032258942e-07, "loss": 0.0049, "step": 452550 }, { "epoch": 4.835301031037983, "grad_norm": 0.006431493442505598, "learning_rate": 6.798637271610528e-07, "loss": 0.0372, "step": 452560 }, { "epoch": 4.835407874352263, "grad_norm": 4.6707682609558105, "learning_rate": 6.798480508931316e-07, "loss": 0.0016, "step": 452570 }, { "epoch": 4.835514717666542, "grad_norm": 0.7664213180541992, "learning_rate": 6.798323744221484e-07, "loss": 0.0013, "step": 452580 }, { "epoch": 4.8356215609808215, "grad_norm": 0.0011883616680279374, "learning_rate": 6.798166977481208e-07, "loss": 0.0006, "step": 452590 }, { "epoch": 4.835728404295101, "grad_norm": 6.389772891998291, "learning_rate": 6.798010208710667e-07, "loss": 0.0045, "step": 452600 }, { "epoch": 4.835835247609381, "grad_norm": 0.011242518201470375, "learning_rate": 6.797853437910038e-07, "loss": 0.001, "step": 452610 }, { "epoch": 4.835942090923661, "grad_norm": 0.040620170533657074, "learning_rate": 6.797696665079494e-07, "loss": 0.0068, "step": 452620 }, { "epoch": 4.83604893423794, "grad_norm": 0.17752240598201752, "learning_rate": 6.797539890219215e-07, "loss": 0.0008, "step": 452630 }, { "epoch": 4.83615577755222, "grad_norm": 0.03029550611972809, "learning_rate": 6.797383113329378e-07, "loss": 0.0029, "step": 452640 }, { "epoch": 4.836262620866499, "grad_norm": 0.019238749518990517, "learning_rate": 6.797226334410163e-07, "loss": 0.0686, "step": 452650 }, { "epoch": 4.836369464180779, "grad_norm": 0.774642825126648, "learning_rate": 6.797069553461741e-07, "loss": 0.0236, "step": 452660 }, { "epoch": 4.836476307495058, "grad_norm": 2.9027962684631348, "learning_rate": 6.79691277048429e-07, "loss": 0.0093, "step": 452670 }, { "epoch": 4.836583150809338, "grad_norm": 0.012094832956790924, "learning_rate": 6.796755985477992e-07, "loss": 0.0192, "step": 452680 }, { "epoch": 4.836689994123618, "grad_norm": 5.378292083740234, "learning_rate": 6.79659919844302e-07, "loss": 0.004, "step": 452690 }, { "epoch": 4.836796837437897, "grad_norm": 0.0014025260461494327, "learning_rate": 6.796442409379551e-07, "loss": 0.0082, "step": 452700 }, { "epoch": 4.836903680752177, "grad_norm": 7.721960067749023, "learning_rate": 6.796285618287765e-07, "loss": 0.0011, "step": 452710 }, { "epoch": 4.837010524066456, "grad_norm": 1.554476022720337, "learning_rate": 6.796128825167835e-07, "loss": 0.0117, "step": 452720 }, { "epoch": 4.837117367380737, "grad_norm": 0.048643384128808975, "learning_rate": 6.795972030019941e-07, "loss": 0.0003, "step": 452730 }, { "epoch": 4.837224210695016, "grad_norm": 0.0003188037662766874, "learning_rate": 6.795815232844258e-07, "loss": 0.0004, "step": 452740 }, { "epoch": 4.8373310540092955, "grad_norm": 0.0010328833013772964, "learning_rate": 6.795658433640965e-07, "loss": 0.0044, "step": 452750 }, { "epoch": 4.837437897323575, "grad_norm": 0.9424408674240112, "learning_rate": 6.795501632410238e-07, "loss": 0.0022, "step": 452760 }, { "epoch": 4.8375447406378544, "grad_norm": 0.0012313334736973047, "learning_rate": 6.795344829152254e-07, "loss": 0.0099, "step": 452770 }, { "epoch": 4.837651583952134, "grad_norm": 0.010232220403850079, "learning_rate": 6.795188023867191e-07, "loss": 0.0125, "step": 452780 }, { "epoch": 4.837758427266413, "grad_norm": 0.035634249448776245, "learning_rate": 6.795031216555223e-07, "loss": 0.0038, "step": 452790 }, { "epoch": 4.837865270580694, "grad_norm": 0.003827662905678153, "learning_rate": 6.794874407216531e-07, "loss": 0.0088, "step": 452800 }, { "epoch": 4.837972113894973, "grad_norm": 0.0007207402377389371, "learning_rate": 6.794717595851289e-07, "loss": 0.0093, "step": 452810 }, { "epoch": 4.838078957209253, "grad_norm": 0.0017771213315427303, "learning_rate": 6.794560782459677e-07, "loss": 0.0041, "step": 452820 }, { "epoch": 4.838185800523532, "grad_norm": 0.0007825159700587392, "learning_rate": 6.794403967041871e-07, "loss": 0.0159, "step": 452830 }, { "epoch": 4.8382926438378115, "grad_norm": 10.594932556152344, "learning_rate": 6.794247149598046e-07, "loss": 0.024, "step": 452840 }, { "epoch": 4.838399487152092, "grad_norm": 0.009102161042392254, "learning_rate": 6.794090330128381e-07, "loss": 0.0002, "step": 452850 }, { "epoch": 4.838506330466371, "grad_norm": 1.399719476699829, "learning_rate": 6.793933508633053e-07, "loss": 0.0044, "step": 452860 }, { "epoch": 4.838613173780651, "grad_norm": 0.001173292868770659, "learning_rate": 6.793776685112238e-07, "loss": 0.0019, "step": 452870 }, { "epoch": 4.83872001709493, "grad_norm": 0.06973454356193542, "learning_rate": 6.793619859566113e-07, "loss": 0.0006, "step": 452880 }, { "epoch": 4.83882686040921, "grad_norm": 0.004650282207876444, "learning_rate": 6.793463031994858e-07, "loss": 0.0232, "step": 452890 }, { "epoch": 4.838933703723489, "grad_norm": 0.0018030022038146853, "learning_rate": 6.793306202398645e-07, "loss": 0.0046, "step": 452900 }, { "epoch": 4.839040547037769, "grad_norm": 0.08265754580497742, "learning_rate": 6.793149370777656e-07, "loss": 0.0244, "step": 452910 }, { "epoch": 4.839147390352049, "grad_norm": 0.007839804515242577, "learning_rate": 6.792992537132068e-07, "loss": 0.0035, "step": 452920 }, { "epoch": 4.8392542336663285, "grad_norm": 0.11052721738815308, "learning_rate": 6.792835701462052e-07, "loss": 0.0083, "step": 452930 }, { "epoch": 4.839361076980608, "grad_norm": 0.7353889346122742, "learning_rate": 6.792678863767791e-07, "loss": 0.009, "step": 452940 }, { "epoch": 4.839467920294887, "grad_norm": 7.9502339363098145, "learning_rate": 6.792522024049461e-07, "loss": 0.0073, "step": 452950 }, { "epoch": 4.839574763609167, "grad_norm": 0.0026379923801869154, "learning_rate": 6.792365182307237e-07, "loss": 0.0013, "step": 452960 }, { "epoch": 4.839681606923447, "grad_norm": 0.002565463073551655, "learning_rate": 6.792208338541299e-07, "loss": 0.0005, "step": 452970 }, { "epoch": 4.839788450237727, "grad_norm": 0.20805831253528595, "learning_rate": 6.79205149275182e-07, "loss": 0.0053, "step": 452980 }, { "epoch": 4.839895293552006, "grad_norm": 0.07570341974496841, "learning_rate": 6.791894644938981e-07, "loss": 0.0083, "step": 452990 }, { "epoch": 4.8400021368662856, "grad_norm": 0.1788184940814972, "learning_rate": 6.791737795102959e-07, "loss": 0.0399, "step": 453000 }, { "epoch": 4.840108980180565, "grad_norm": 0.0053180367685854435, "learning_rate": 6.791580943243928e-07, "loss": 0.0022, "step": 453010 }, { "epoch": 4.8402158234948445, "grad_norm": 6.994518756866455, "learning_rate": 6.791424089362068e-07, "loss": 0.0815, "step": 453020 }, { "epoch": 4.840322666809124, "grad_norm": 0.0036518056876957417, "learning_rate": 6.791267233457554e-07, "loss": 0.0132, "step": 453030 }, { "epoch": 4.840429510123404, "grad_norm": 1.1441304683685303, "learning_rate": 6.791110375530564e-07, "loss": 0.0029, "step": 453040 }, { "epoch": 4.840536353437684, "grad_norm": 0.06559981405735016, "learning_rate": 6.790953515581276e-07, "loss": 0.0089, "step": 453050 }, { "epoch": 4.840643196751963, "grad_norm": 0.0031716113444417715, "learning_rate": 6.790796653609866e-07, "loss": 0.0028, "step": 453060 }, { "epoch": 4.840750040066243, "grad_norm": 0.0065498147159814835, "learning_rate": 6.790639789616511e-07, "loss": 0.004, "step": 453070 }, { "epoch": 4.840856883380522, "grad_norm": 0.14502748847007751, "learning_rate": 6.79048292360139e-07, "loss": 0.0081, "step": 453080 }, { "epoch": 4.8409637266948025, "grad_norm": 1.5952094793319702, "learning_rate": 6.790326055564677e-07, "loss": 0.0052, "step": 453090 }, { "epoch": 4.841070570009082, "grad_norm": 11.019814491271973, "learning_rate": 6.790169185506551e-07, "loss": 0.0162, "step": 453100 }, { "epoch": 4.841177413323361, "grad_norm": 6.968310356140137, "learning_rate": 6.79001231342719e-07, "loss": 0.0235, "step": 453110 }, { "epoch": 4.841284256637641, "grad_norm": 0.0035942557733505964, "learning_rate": 6.789855439326769e-07, "loss": 0.0365, "step": 453120 }, { "epoch": 4.84139109995192, "grad_norm": 0.8291025161743164, "learning_rate": 6.789698563205466e-07, "loss": 0.0019, "step": 453130 }, { "epoch": 4.8414979432662, "grad_norm": 0.029362110421061516, "learning_rate": 6.789541685063459e-07, "loss": 0.0102, "step": 453140 }, { "epoch": 4.84160478658048, "grad_norm": 0.0009535981807857752, "learning_rate": 6.789384804900925e-07, "loss": 0.0151, "step": 453150 }, { "epoch": 4.84171162989476, "grad_norm": 0.9029272794723511, "learning_rate": 6.789227922718039e-07, "loss": 0.0037, "step": 453160 }, { "epoch": 4.841818473209039, "grad_norm": 0.13665911555290222, "learning_rate": 6.78907103851498e-07, "loss": 0.006, "step": 453170 }, { "epoch": 4.8419253165233185, "grad_norm": 0.20118975639343262, "learning_rate": 6.788914152291926e-07, "loss": 0.0088, "step": 453180 }, { "epoch": 4.842032159837598, "grad_norm": 0.0010092718293890357, "learning_rate": 6.78875726404905e-07, "loss": 0.0009, "step": 453190 }, { "epoch": 4.842139003151877, "grad_norm": 0.0882938951253891, "learning_rate": 6.788600373786535e-07, "loss": 0.008, "step": 453200 }, { "epoch": 4.842245846466158, "grad_norm": 0.0005888722371309996, "learning_rate": 6.788443481504553e-07, "loss": 0.0066, "step": 453210 }, { "epoch": 4.842352689780437, "grad_norm": 0.04038584232330322, "learning_rate": 6.788286587203286e-07, "loss": 0.0536, "step": 453220 }, { "epoch": 4.842459533094717, "grad_norm": 0.7986190319061279, "learning_rate": 6.788129690882907e-07, "loss": 0.033, "step": 453230 }, { "epoch": 4.842566376408996, "grad_norm": 0.027260392904281616, "learning_rate": 6.787972792543594e-07, "loss": 0.0283, "step": 453240 }, { "epoch": 4.842673219723276, "grad_norm": 0.0038036557380110025, "learning_rate": 6.787815892185526e-07, "loss": 0.0005, "step": 453250 }, { "epoch": 4.842780063037555, "grad_norm": 6.9970526695251465, "learning_rate": 6.787658989808878e-07, "loss": 0.0151, "step": 453260 }, { "epoch": 4.842886906351835, "grad_norm": 0.09709619730710983, "learning_rate": 6.787502085413828e-07, "loss": 0.0079, "step": 453270 }, { "epoch": 4.842993749666115, "grad_norm": 2.5790677070617676, "learning_rate": 6.787345179000554e-07, "loss": 0.0076, "step": 453280 }, { "epoch": 4.843100592980394, "grad_norm": 0.015911661088466644, "learning_rate": 6.787188270569233e-07, "loss": 0.0161, "step": 453290 }, { "epoch": 4.843207436294674, "grad_norm": 0.06427495926618576, "learning_rate": 6.787031360120039e-07, "loss": 0.0007, "step": 453300 }, { "epoch": 4.843314279608953, "grad_norm": 0.056725915521383286, "learning_rate": 6.786874447653154e-07, "loss": 0.0115, "step": 453310 }, { "epoch": 4.843421122923234, "grad_norm": 0.0005214727716520429, "learning_rate": 6.786717533168752e-07, "loss": 0.0221, "step": 453320 }, { "epoch": 4.843527966237513, "grad_norm": 0.8448746800422668, "learning_rate": 6.786560616667011e-07, "loss": 0.0009, "step": 453330 }, { "epoch": 4.8436348095517925, "grad_norm": 0.0010946476832032204, "learning_rate": 6.786403698148109e-07, "loss": 0.0035, "step": 453340 }, { "epoch": 4.843741652866072, "grad_norm": 2.2076425552368164, "learning_rate": 6.786246777612222e-07, "loss": 0.0061, "step": 453350 }, { "epoch": 4.843848496180351, "grad_norm": 0.33705583214759827, "learning_rate": 6.786089855059527e-07, "loss": 0.0126, "step": 453360 }, { "epoch": 4.843955339494631, "grad_norm": 0.02921128086745739, "learning_rate": 6.785932930490202e-07, "loss": 0.0024, "step": 453370 }, { "epoch": 4.84406218280891, "grad_norm": 4.549973487854004, "learning_rate": 6.785776003904423e-07, "loss": 0.0023, "step": 453380 }, { "epoch": 4.844169026123191, "grad_norm": 6.684062957763672, "learning_rate": 6.785619075302371e-07, "loss": 0.0256, "step": 453390 }, { "epoch": 4.84427586943747, "grad_norm": 0.06592794507741928, "learning_rate": 6.785462144684218e-07, "loss": 0.0016, "step": 453400 }, { "epoch": 4.84438271275175, "grad_norm": 0.004558752290904522, "learning_rate": 6.785305212050143e-07, "loss": 0.0125, "step": 453410 }, { "epoch": 4.844489556066029, "grad_norm": 1.3136345148086548, "learning_rate": 6.785148277400324e-07, "loss": 0.0132, "step": 453420 }, { "epoch": 4.8445963993803085, "grad_norm": 0.2500528395175934, "learning_rate": 6.78499134073494e-07, "loss": 0.0143, "step": 453430 }, { "epoch": 4.844703242694589, "grad_norm": 7.677279949188232, "learning_rate": 6.784834402054163e-07, "loss": 0.0171, "step": 453440 }, { "epoch": 4.844810086008868, "grad_norm": 0.007753872312605381, "learning_rate": 6.784677461358174e-07, "loss": 0.0045, "step": 453450 }, { "epoch": 4.844916929323148, "grad_norm": 1.7700260877609253, "learning_rate": 6.784520518647153e-07, "loss": 0.0039, "step": 453460 }, { "epoch": 4.845023772637427, "grad_norm": 2.007467031478882, "learning_rate": 6.784363573921269e-07, "loss": 0.0025, "step": 453470 }, { "epoch": 4.845130615951707, "grad_norm": 4.031991481781006, "learning_rate": 6.784206627180706e-07, "loss": 0.0186, "step": 453480 }, { "epoch": 4.845237459265986, "grad_norm": 0.0010659544495865703, "learning_rate": 6.78404967842564e-07, "loss": 0.0004, "step": 453490 }, { "epoch": 4.845344302580266, "grad_norm": 0.006066977046430111, "learning_rate": 6.783892727656245e-07, "loss": 0.0018, "step": 453500 }, { "epoch": 4.845451145894546, "grad_norm": 0.7673470973968506, "learning_rate": 6.783735774872702e-07, "loss": 0.009, "step": 453510 }, { "epoch": 4.845557989208825, "grad_norm": 0.026251427829265594, "learning_rate": 6.783578820075186e-07, "loss": 0.0331, "step": 453520 }, { "epoch": 4.845664832523105, "grad_norm": 3.8387253284454346, "learning_rate": 6.783421863263875e-07, "loss": 0.0081, "step": 453530 }, { "epoch": 4.845771675837384, "grad_norm": 0.3248961269855499, "learning_rate": 6.783264904438947e-07, "loss": 0.0375, "step": 453540 }, { "epoch": 4.845878519151664, "grad_norm": 0.649211585521698, "learning_rate": 6.783107943600577e-07, "loss": 0.0037, "step": 453550 }, { "epoch": 4.845985362465944, "grad_norm": 2.9146454334259033, "learning_rate": 6.782950980748944e-07, "loss": 0.0033, "step": 453560 }, { "epoch": 4.846092205780224, "grad_norm": 0.009669401682913303, "learning_rate": 6.782794015884225e-07, "loss": 0.0004, "step": 453570 }, { "epoch": 4.846199049094503, "grad_norm": 0.0063774301670491695, "learning_rate": 6.782637049006597e-07, "loss": 0.0421, "step": 453580 }, { "epoch": 4.8463058924087825, "grad_norm": 0.4592394232749939, "learning_rate": 6.782480080116237e-07, "loss": 0.0123, "step": 453590 }, { "epoch": 4.846412735723062, "grad_norm": 4.877560615539551, "learning_rate": 6.782323109213324e-07, "loss": 0.0094, "step": 453600 }, { "epoch": 4.846519579037341, "grad_norm": 0.028568018227815628, "learning_rate": 6.782166136298031e-07, "loss": 0.009, "step": 453610 }, { "epoch": 4.846626422351621, "grad_norm": 0.6817123293876648, "learning_rate": 6.78200916137054e-07, "loss": 0.0119, "step": 453620 }, { "epoch": 4.846733265665901, "grad_norm": 0.7861871123313904, "learning_rate": 6.781852184431025e-07, "loss": 0.0009, "step": 453630 }, { "epoch": 4.846840108980181, "grad_norm": 0.009104903787374496, "learning_rate": 6.781695205479665e-07, "loss": 0.008, "step": 453640 }, { "epoch": 4.84694695229446, "grad_norm": 0.12501955032348633, "learning_rate": 6.781538224516636e-07, "loss": 0.0088, "step": 453650 }, { "epoch": 4.84705379560874, "grad_norm": 0.11948946118354797, "learning_rate": 6.781381241542117e-07, "loss": 0.006, "step": 453660 }, { "epoch": 4.847160638923019, "grad_norm": 1.5634807348251343, "learning_rate": 6.781224256556282e-07, "loss": 0.0262, "step": 453670 }, { "epoch": 4.847267482237299, "grad_norm": 0.00802430510520935, "learning_rate": 6.781067269559313e-07, "loss": 0.0361, "step": 453680 }, { "epoch": 4.847374325551579, "grad_norm": 0.006917822174727917, "learning_rate": 6.780910280551383e-07, "loss": 0.0248, "step": 453690 }, { "epoch": 4.847481168865858, "grad_norm": 1.7838881015777588, "learning_rate": 6.780753289532672e-07, "loss": 0.0015, "step": 453700 }, { "epoch": 4.847588012180138, "grad_norm": 4.100480556488037, "learning_rate": 6.780596296503355e-07, "loss": 0.0132, "step": 453710 }, { "epoch": 4.847694855494417, "grad_norm": 0.02352256514132023, "learning_rate": 6.780439301463611e-07, "loss": 0.0069, "step": 453720 }, { "epoch": 4.847801698808697, "grad_norm": 0.2101527750492096, "learning_rate": 6.780282304413616e-07, "loss": 0.0107, "step": 453730 }, { "epoch": 4.847908542122976, "grad_norm": 0.19916732609272003, "learning_rate": 6.780125305353548e-07, "loss": 0.0002, "step": 453740 }, { "epoch": 4.8480153854372565, "grad_norm": 0.029136374592781067, "learning_rate": 6.779968304283585e-07, "loss": 0.0026, "step": 453750 }, { "epoch": 4.848122228751536, "grad_norm": 0.023431846871972084, "learning_rate": 6.779811301203902e-07, "loss": 0.0173, "step": 453760 }, { "epoch": 4.848229072065815, "grad_norm": 0.004567001946270466, "learning_rate": 6.779654296114681e-07, "loss": 0.0051, "step": 453770 }, { "epoch": 4.848335915380095, "grad_norm": 1.2879996299743652, "learning_rate": 6.779497289016094e-07, "loss": 0.0025, "step": 453780 }, { "epoch": 4.848442758694374, "grad_norm": 0.5515247583389282, "learning_rate": 6.779340279908319e-07, "loss": 0.0089, "step": 453790 }, { "epoch": 4.848549602008655, "grad_norm": 0.020972007885575294, "learning_rate": 6.779183268791535e-07, "loss": 0.0257, "step": 453800 }, { "epoch": 4.848656445322934, "grad_norm": 0.002889142371714115, "learning_rate": 6.77902625566592e-07, "loss": 0.0079, "step": 453810 }, { "epoch": 4.848763288637214, "grad_norm": 0.009918690659105778, "learning_rate": 6.77886924053165e-07, "loss": 0.0072, "step": 453820 }, { "epoch": 4.848870131951493, "grad_norm": 0.035329628735780716, "learning_rate": 6.778712223388902e-07, "loss": 0.0078, "step": 453830 }, { "epoch": 4.8489769752657725, "grad_norm": 0.024992726743221283, "learning_rate": 6.778555204237852e-07, "loss": 0.002, "step": 453840 }, { "epoch": 4.849083818580052, "grad_norm": 0.21296119689941406, "learning_rate": 6.778398183078681e-07, "loss": 0.006, "step": 453850 }, { "epoch": 4.849190661894332, "grad_norm": 0.47200924158096313, "learning_rate": 6.778241159911564e-07, "loss": 0.0186, "step": 453860 }, { "epoch": 4.849297505208612, "grad_norm": 0.18900245428085327, "learning_rate": 6.778084134736678e-07, "loss": 0.0035, "step": 453870 }, { "epoch": 4.849404348522891, "grad_norm": 0.01715545915067196, "learning_rate": 6.777927107554201e-07, "loss": 0.0036, "step": 453880 }, { "epoch": 4.849511191837171, "grad_norm": 0.03901587799191475, "learning_rate": 6.777770078364311e-07, "loss": 0.0013, "step": 453890 }, { "epoch": 4.84961803515145, "grad_norm": 0.00811376515775919, "learning_rate": 6.777613047167182e-07, "loss": 0.0086, "step": 453900 }, { "epoch": 4.84972487846573, "grad_norm": 0.03233807906508446, "learning_rate": 6.777456013962995e-07, "loss": 0.0003, "step": 453910 }, { "epoch": 4.84983172178001, "grad_norm": 1.4114623069763184, "learning_rate": 6.777298978751928e-07, "loss": 0.0039, "step": 453920 }, { "epoch": 4.8499385650942894, "grad_norm": 15.59705638885498, "learning_rate": 6.777141941534154e-07, "loss": 0.0024, "step": 453930 }, { "epoch": 4.850045408408569, "grad_norm": 5.477151870727539, "learning_rate": 6.776984902309853e-07, "loss": 0.0074, "step": 453940 }, { "epoch": 4.850152251722848, "grad_norm": 0.07894924283027649, "learning_rate": 6.776827861079202e-07, "loss": 0.0004, "step": 453950 }, { "epoch": 4.850259095037128, "grad_norm": 0.019217928871512413, "learning_rate": 6.776670817842378e-07, "loss": 0.0041, "step": 453960 }, { "epoch": 4.850365938351407, "grad_norm": 0.002449913416057825, "learning_rate": 6.776513772599559e-07, "loss": 0.0031, "step": 453970 }, { "epoch": 4.850472781665688, "grad_norm": 17.69772720336914, "learning_rate": 6.776356725350921e-07, "loss": 0.0102, "step": 453980 }, { "epoch": 4.850579624979967, "grad_norm": 0.016641682013869286, "learning_rate": 6.776199676096643e-07, "loss": 0.002, "step": 453990 }, { "epoch": 4.8506864682942465, "grad_norm": 0.010100385174155235, "learning_rate": 6.776042624836902e-07, "loss": 0.0076, "step": 454000 }, { "epoch": 4.850793311608526, "grad_norm": 0.3750712275505066, "learning_rate": 6.775885571571875e-07, "loss": 0.004, "step": 454010 }, { "epoch": 4.8509001549228055, "grad_norm": 0.13731278479099274, "learning_rate": 6.775728516301737e-07, "loss": 0.0275, "step": 454020 }, { "epoch": 4.851006998237086, "grad_norm": 0.01122704055160284, "learning_rate": 6.775571459026669e-07, "loss": 0.0021, "step": 454030 }, { "epoch": 4.851113841551365, "grad_norm": 0.005974154453724623, "learning_rate": 6.775414399746847e-07, "loss": 0.0033, "step": 454040 }, { "epoch": 4.851220684865645, "grad_norm": 0.8273299932479858, "learning_rate": 6.775257338462448e-07, "loss": 0.0035, "step": 454050 }, { "epoch": 4.851327528179924, "grad_norm": 0.5748028755187988, "learning_rate": 6.77510027517365e-07, "loss": 0.0153, "step": 454060 }, { "epoch": 4.851434371494204, "grad_norm": 0.017766529694199562, "learning_rate": 6.774943209880628e-07, "loss": 0.0159, "step": 454070 }, { "epoch": 4.851541214808483, "grad_norm": 0.870643138885498, "learning_rate": 6.774786142583563e-07, "loss": 0.0008, "step": 454080 }, { "epoch": 4.851648058122763, "grad_norm": 0.09842217713594437, "learning_rate": 6.77462907328263e-07, "loss": 0.0318, "step": 454090 }, { "epoch": 4.851754901437043, "grad_norm": 2.8565104007720947, "learning_rate": 6.774472001978006e-07, "loss": 0.0077, "step": 454100 }, { "epoch": 4.851861744751322, "grad_norm": 0.0021808533929288387, "learning_rate": 6.77431492866987e-07, "loss": 0.0185, "step": 454110 }, { "epoch": 4.851968588065602, "grad_norm": 0.014230634085834026, "learning_rate": 6.774157853358398e-07, "loss": 0.0056, "step": 454120 }, { "epoch": 4.852075431379881, "grad_norm": 0.00368364411406219, "learning_rate": 6.774000776043769e-07, "loss": 0.004, "step": 454130 }, { "epoch": 4.852182274694161, "grad_norm": 0.07616490870714188, "learning_rate": 6.773843696726159e-07, "loss": 0.0041, "step": 454140 }, { "epoch": 4.852289118008441, "grad_norm": 0.07230158895254135, "learning_rate": 6.773686615405745e-07, "loss": 0.0022, "step": 454150 }, { "epoch": 4.8523959613227206, "grad_norm": 0.001890335464850068, "learning_rate": 6.773529532082705e-07, "loss": 0.04, "step": 454160 }, { "epoch": 4.852502804637, "grad_norm": 0.028234131634235382, "learning_rate": 6.773372446757217e-07, "loss": 0.0008, "step": 454170 }, { "epoch": 4.8526096479512795, "grad_norm": 0.007428593002259731, "learning_rate": 6.773215359429455e-07, "loss": 0.0008, "step": 454180 }, { "epoch": 4.852716491265559, "grad_norm": 0.01844213344156742, "learning_rate": 6.773058270099602e-07, "loss": 0.0034, "step": 454190 }, { "epoch": 4.852823334579838, "grad_norm": 0.00649633165448904, "learning_rate": 6.772901178767832e-07, "loss": 0.0051, "step": 454200 }, { "epoch": 4.852930177894118, "grad_norm": 0.01952909305691719, "learning_rate": 6.772744085434321e-07, "loss": 0.0045, "step": 454210 }, { "epoch": 4.853037021208398, "grad_norm": 0.0019513149745762348, "learning_rate": 6.77258699009925e-07, "loss": 0.0009, "step": 454220 }, { "epoch": 4.853143864522678, "grad_norm": 0.04194388911128044, "learning_rate": 6.772429892762794e-07, "loss": 0.013, "step": 454230 }, { "epoch": 4.853250707836957, "grad_norm": 0.11343365162611008, "learning_rate": 6.77227279342513e-07, "loss": 0.001, "step": 454240 }, { "epoch": 4.853357551151237, "grad_norm": 1.6563373804092407, "learning_rate": 6.772115692086438e-07, "loss": 0.0018, "step": 454250 }, { "epoch": 4.853464394465516, "grad_norm": 0.01005134079605341, "learning_rate": 6.771958588746892e-07, "loss": 0.0012, "step": 454260 }, { "epoch": 4.853571237779796, "grad_norm": 0.002026434987783432, "learning_rate": 6.771801483406672e-07, "loss": 0.0071, "step": 454270 }, { "epoch": 4.853678081094076, "grad_norm": 3.014157772064209, "learning_rate": 6.771644376065954e-07, "loss": 0.0059, "step": 454280 }, { "epoch": 4.853784924408355, "grad_norm": 4.509829998016357, "learning_rate": 6.771487266724915e-07, "loss": 0.0212, "step": 454290 }, { "epoch": 4.853891767722635, "grad_norm": 0.06648149341344833, "learning_rate": 6.771330155383735e-07, "loss": 0.0248, "step": 454300 }, { "epoch": 4.853998611036914, "grad_norm": 0.013969936408102512, "learning_rate": 6.771173042042589e-07, "loss": 0.0194, "step": 454310 }, { "epoch": 4.854105454351194, "grad_norm": 3.5906732082366943, "learning_rate": 6.771015926701653e-07, "loss": 0.0093, "step": 454320 }, { "epoch": 4.854212297665473, "grad_norm": 1.8332083225250244, "learning_rate": 6.770858809361108e-07, "loss": 0.0218, "step": 454330 }, { "epoch": 4.8543191409797535, "grad_norm": 0.018697822466492653, "learning_rate": 6.770701690021129e-07, "loss": 0.0071, "step": 454340 }, { "epoch": 4.854425984294033, "grad_norm": 2.3445651531219482, "learning_rate": 6.770544568681895e-07, "loss": 0.0032, "step": 454350 }, { "epoch": 4.854532827608312, "grad_norm": 1.4384568929672241, "learning_rate": 6.770387445343581e-07, "loss": 0.0065, "step": 454360 }, { "epoch": 4.854639670922592, "grad_norm": 0.030767768621444702, "learning_rate": 6.770230320006369e-07, "loss": 0.0088, "step": 454370 }, { "epoch": 4.854746514236871, "grad_norm": 4.886591911315918, "learning_rate": 6.770073192670431e-07, "loss": 0.0142, "step": 454380 }, { "epoch": 4.854853357551152, "grad_norm": 2.2954232692718506, "learning_rate": 6.769916063335948e-07, "loss": 0.0172, "step": 454390 }, { "epoch": 4.854960200865431, "grad_norm": 7.5718560218811035, "learning_rate": 6.769758932003096e-07, "loss": 0.0214, "step": 454400 }, { "epoch": 4.855067044179711, "grad_norm": 4.4729814529418945, "learning_rate": 6.769601798672052e-07, "loss": 0.0068, "step": 454410 }, { "epoch": 4.85517388749399, "grad_norm": 0.006421682424843311, "learning_rate": 6.769444663342993e-07, "loss": 0.0111, "step": 454420 }, { "epoch": 4.8552807308082695, "grad_norm": 5.57001256942749, "learning_rate": 6.7692875260161e-07, "loss": 0.0078, "step": 454430 }, { "epoch": 4.855387574122549, "grad_norm": 1.5235118865966797, "learning_rate": 6.769130386691545e-07, "loss": 0.0197, "step": 454440 }, { "epoch": 4.855494417436828, "grad_norm": 0.009865746833384037, "learning_rate": 6.76897324536951e-07, "loss": 0.0284, "step": 454450 }, { "epoch": 4.855601260751109, "grad_norm": 0.182933047413826, "learning_rate": 6.768816102050172e-07, "loss": 0.0047, "step": 454460 }, { "epoch": 4.855708104065388, "grad_norm": 0.00305655668489635, "learning_rate": 6.768658956733704e-07, "loss": 0.02, "step": 454470 }, { "epoch": 4.855814947379668, "grad_norm": 0.023702390491962433, "learning_rate": 6.768501809420288e-07, "loss": 0.0278, "step": 454480 }, { "epoch": 4.855921790693947, "grad_norm": 1.883323311805725, "learning_rate": 6.768344660110102e-07, "loss": 0.027, "step": 454490 }, { "epoch": 4.856028634008227, "grad_norm": 0.00105267483741045, "learning_rate": 6.768187508803318e-07, "loss": 0.0069, "step": 454500 }, { "epoch": 4.856135477322507, "grad_norm": 0.4038996994495392, "learning_rate": 6.768030355500119e-07, "loss": 0.0055, "step": 454510 }, { "epoch": 4.856242320636786, "grad_norm": 0.005182486493140459, "learning_rate": 6.76787320020068e-07, "loss": 0.0072, "step": 454520 }, { "epoch": 4.856349163951066, "grad_norm": 0.20639321208000183, "learning_rate": 6.767716042905177e-07, "loss": 0.0021, "step": 454530 }, { "epoch": 4.856456007265345, "grad_norm": 0.012453493662178516, "learning_rate": 6.767558883613791e-07, "loss": 0.0008, "step": 454540 }, { "epoch": 4.856562850579625, "grad_norm": 0.13025474548339844, "learning_rate": 6.767401722326697e-07, "loss": 0.009, "step": 454550 }, { "epoch": 4.856669693893904, "grad_norm": 0.02854999341070652, "learning_rate": 6.767244559044074e-07, "loss": 0.0545, "step": 454560 }, { "epoch": 4.856776537208185, "grad_norm": 0.029280878603458405, "learning_rate": 6.767087393766098e-07, "loss": 0.0039, "step": 454570 }, { "epoch": 4.856883380522464, "grad_norm": 0.0037524511571973562, "learning_rate": 6.766930226492946e-07, "loss": 0.0372, "step": 454580 }, { "epoch": 4.8569902238367435, "grad_norm": 0.0004726608167402446, "learning_rate": 6.766773057224797e-07, "loss": 0.0071, "step": 454590 }, { "epoch": 4.857097067151023, "grad_norm": 0.01009003259241581, "learning_rate": 6.766615885961828e-07, "loss": 0.005, "step": 454600 }, { "epoch": 4.857203910465302, "grad_norm": 0.0015334851341322064, "learning_rate": 6.766458712704216e-07, "loss": 0.0011, "step": 454610 }, { "epoch": 4.857310753779582, "grad_norm": 0.06015220284461975, "learning_rate": 6.76630153745214e-07, "loss": 0.0108, "step": 454620 }, { "epoch": 4.857417597093862, "grad_norm": 0.008893904276192188, "learning_rate": 6.766144360205776e-07, "loss": 0.007, "step": 454630 }, { "epoch": 4.857524440408142, "grad_norm": 0.06361860781908035, "learning_rate": 6.7659871809653e-07, "loss": 0.0007, "step": 454640 }, { "epoch": 4.857631283722421, "grad_norm": 0.08556856215000153, "learning_rate": 6.765829999730893e-07, "loss": 0.0033, "step": 454650 }, { "epoch": 4.857738127036701, "grad_norm": 0.0844721868634224, "learning_rate": 6.765672816502729e-07, "loss": 0.0151, "step": 454660 }, { "epoch": 4.85784497035098, "grad_norm": 0.6316214799880981, "learning_rate": 6.765515631280989e-07, "loss": 0.0052, "step": 454670 }, { "epoch": 4.8579518136652595, "grad_norm": 1.0044392347335815, "learning_rate": 6.765358444065847e-07, "loss": 0.0042, "step": 454680 }, { "epoch": 4.85805865697954, "grad_norm": 0.010697527788579464, "learning_rate": 6.765201254857483e-07, "loss": 0.0246, "step": 454690 }, { "epoch": 4.858165500293819, "grad_norm": 0.06675367057323456, "learning_rate": 6.765044063656073e-07, "loss": 0.0061, "step": 454700 }, { "epoch": 4.858272343608099, "grad_norm": 0.3332039415836334, "learning_rate": 6.764886870461795e-07, "loss": 0.0106, "step": 454710 }, { "epoch": 4.858379186922378, "grad_norm": 0.07310108840465546, "learning_rate": 6.764729675274828e-07, "loss": 0.0079, "step": 454720 }, { "epoch": 4.858486030236658, "grad_norm": 8.59128189086914, "learning_rate": 6.764572478095345e-07, "loss": 0.0061, "step": 454730 }, { "epoch": 4.858592873550937, "grad_norm": 0.635328471660614, "learning_rate": 6.76441527892353e-07, "loss": 0.0271, "step": 454740 }, { "epoch": 4.8586997168652175, "grad_norm": 0.09905806928873062, "learning_rate": 6.764258077759555e-07, "loss": 0.009, "step": 454750 }, { "epoch": 4.858806560179497, "grad_norm": 8.009841918945312, "learning_rate": 6.7641008746036e-07, "loss": 0.0015, "step": 454760 }, { "epoch": 4.858913403493776, "grad_norm": 0.04028124362230301, "learning_rate": 6.763943669455842e-07, "loss": 0.028, "step": 454770 }, { "epoch": 4.859020246808056, "grad_norm": 0.2701146900653839, "learning_rate": 6.763786462316458e-07, "loss": 0.0309, "step": 454780 }, { "epoch": 4.859127090122335, "grad_norm": 0.005201384890824556, "learning_rate": 6.763629253185625e-07, "loss": 0.0008, "step": 454790 }, { "epoch": 4.859233933436615, "grad_norm": 5.171854019165039, "learning_rate": 6.763472042063524e-07, "loss": 0.0022, "step": 454800 }, { "epoch": 4.859340776750895, "grad_norm": 0.004615589044988155, "learning_rate": 6.763314828950329e-07, "loss": 0.0098, "step": 454810 }, { "epoch": 4.859447620065175, "grad_norm": 0.000963364727795124, "learning_rate": 6.763157613846218e-07, "loss": 0.0003, "step": 454820 }, { "epoch": 4.859554463379454, "grad_norm": 0.004393065348267555, "learning_rate": 6.76300039675137e-07, "loss": 0.0014, "step": 454830 }, { "epoch": 4.8596613066937335, "grad_norm": 0.0023945842403918505, "learning_rate": 6.762843177665959e-07, "loss": 0.0025, "step": 454840 }, { "epoch": 4.859768150008013, "grad_norm": 0.00846041738986969, "learning_rate": 6.762685956590167e-07, "loss": 0.0171, "step": 454850 }, { "epoch": 4.859874993322293, "grad_norm": 0.5043480396270752, "learning_rate": 6.76252873352417e-07, "loss": 0.006, "step": 454860 }, { "epoch": 4.859981836636573, "grad_norm": 0.01570010371506214, "learning_rate": 6.762371508468144e-07, "loss": 0.0114, "step": 454870 }, { "epoch": 4.860088679950852, "grad_norm": 0.018584653735160828, "learning_rate": 6.762214281422267e-07, "loss": 0.0005, "step": 454880 }, { "epoch": 4.860195523265132, "grad_norm": 0.044090162962675095, "learning_rate": 6.762057052386718e-07, "loss": 0.0131, "step": 454890 }, { "epoch": 4.860302366579411, "grad_norm": 0.007371545769274235, "learning_rate": 6.761899821361673e-07, "loss": 0.0063, "step": 454900 }, { "epoch": 4.860409209893691, "grad_norm": 0.001234530471265316, "learning_rate": 6.761742588347312e-07, "loss": 0.0049, "step": 454910 }, { "epoch": 4.86051605320797, "grad_norm": 0.005633411463350058, "learning_rate": 6.761585353343808e-07, "loss": 0.0039, "step": 454920 }, { "epoch": 4.86062289652225, "grad_norm": 0.0015408210456371307, "learning_rate": 6.761428116351342e-07, "loss": 0.0069, "step": 454930 }, { "epoch": 4.86072973983653, "grad_norm": 0.0016452318523079157, "learning_rate": 6.761270877370092e-07, "loss": 0.0009, "step": 454940 }, { "epoch": 4.860836583150809, "grad_norm": 4.8007612228393555, "learning_rate": 6.761113636400233e-07, "loss": 0.0044, "step": 454950 }, { "epoch": 4.860943426465089, "grad_norm": 2.230292320251465, "learning_rate": 6.760956393441944e-07, "loss": 0.0103, "step": 454960 }, { "epoch": 4.861050269779368, "grad_norm": 0.17607933282852173, "learning_rate": 6.760799148495402e-07, "loss": 0.0014, "step": 454970 }, { "epoch": 4.861157113093649, "grad_norm": 2.4564759731292725, "learning_rate": 6.760641901560785e-07, "loss": 0.0125, "step": 454980 }, { "epoch": 4.861263956407928, "grad_norm": 23.064760208129883, "learning_rate": 6.760484652638271e-07, "loss": 0.008, "step": 454990 }, { "epoch": 4.8613707997222075, "grad_norm": 2.002483606338501, "learning_rate": 6.760327401728037e-07, "loss": 0.0012, "step": 455000 }, { "epoch": 4.861477643036487, "grad_norm": 14.500154495239258, "learning_rate": 6.76017014883026e-07, "loss": 0.0155, "step": 455010 }, { "epoch": 4.8615844863507665, "grad_norm": 0.015661301091313362, "learning_rate": 6.760012893945117e-07, "loss": 0.0009, "step": 455020 }, { "epoch": 4.861691329665046, "grad_norm": 0.30412009358406067, "learning_rate": 6.759855637072789e-07, "loss": 0.0001, "step": 455030 }, { "epoch": 4.861798172979325, "grad_norm": 2.632828712463379, "learning_rate": 6.75969837821345e-07, "loss": 0.0464, "step": 455040 }, { "epoch": 4.861905016293606, "grad_norm": 0.015615936368703842, "learning_rate": 6.759541117367279e-07, "loss": 0.0015, "step": 455050 }, { "epoch": 4.862011859607885, "grad_norm": 0.0028103378135710955, "learning_rate": 6.759383854534453e-07, "loss": 0.0091, "step": 455060 }, { "epoch": 4.862118702922165, "grad_norm": 1.6983745098114014, "learning_rate": 6.759226589715149e-07, "loss": 0.0048, "step": 455070 }, { "epoch": 4.862225546236444, "grad_norm": 0.022668050602078438, "learning_rate": 6.759069322909548e-07, "loss": 0.0173, "step": 455080 }, { "epoch": 4.8623323895507236, "grad_norm": 0.0053247748874127865, "learning_rate": 6.758912054117822e-07, "loss": 0.0224, "step": 455090 }, { "epoch": 4.862439232865004, "grad_norm": 5.244132041931152, "learning_rate": 6.758754783340152e-07, "loss": 0.0104, "step": 455100 }, { "epoch": 4.862546076179283, "grad_norm": 0.11406806856393814, "learning_rate": 6.758597510576717e-07, "loss": 0.0132, "step": 455110 }, { "epoch": 4.862652919493563, "grad_norm": 3.5963425636291504, "learning_rate": 6.758440235827691e-07, "loss": 0.003, "step": 455120 }, { "epoch": 4.862759762807842, "grad_norm": 0.0006881022709421813, "learning_rate": 6.758282959093254e-07, "loss": 0.0062, "step": 455130 }, { "epoch": 4.862866606122122, "grad_norm": 0.7116712331771851, "learning_rate": 6.758125680373583e-07, "loss": 0.0066, "step": 455140 }, { "epoch": 4.862973449436401, "grad_norm": 0.125477597117424, "learning_rate": 6.757968399668854e-07, "loss": 0.0028, "step": 455150 }, { "epoch": 4.863080292750681, "grad_norm": 2.6465182304382324, "learning_rate": 6.757811116979248e-07, "loss": 0.0036, "step": 455160 }, { "epoch": 4.863187136064961, "grad_norm": 0.09345422685146332, "learning_rate": 6.75765383230494e-07, "loss": 0.0381, "step": 455170 }, { "epoch": 4.8632939793792405, "grad_norm": 0.11760789901018143, "learning_rate": 6.757496545646107e-07, "loss": 0.0026, "step": 455180 }, { "epoch": 4.86340082269352, "grad_norm": 0.003825180698186159, "learning_rate": 6.757339257002929e-07, "loss": 0.0186, "step": 455190 }, { "epoch": 4.863507666007799, "grad_norm": 0.009192599914968014, "learning_rate": 6.757181966375583e-07, "loss": 0.0182, "step": 455200 }, { "epoch": 4.863614509322079, "grad_norm": 0.0013387073995545506, "learning_rate": 6.757024673764244e-07, "loss": 0.0027, "step": 455210 }, { "epoch": 4.863721352636359, "grad_norm": 0.604690670967102, "learning_rate": 6.756867379169093e-07, "loss": 0.0007, "step": 455220 }, { "epoch": 4.863828195950639, "grad_norm": 0.0054914625361561775, "learning_rate": 6.756710082590305e-07, "loss": 0.0041, "step": 455230 }, { "epoch": 4.863935039264918, "grad_norm": 0.002724024001508951, "learning_rate": 6.756552784028059e-07, "loss": 0.011, "step": 455240 }, { "epoch": 4.864041882579198, "grad_norm": 2.2958405017852783, "learning_rate": 6.756395483482533e-07, "loss": 0.0107, "step": 455250 }, { "epoch": 4.864148725893477, "grad_norm": 10.471664428710938, "learning_rate": 6.756238180953902e-07, "loss": 0.0049, "step": 455260 }, { "epoch": 4.8642555692077565, "grad_norm": 0.4195903539657593, "learning_rate": 6.756080876442348e-07, "loss": 0.0048, "step": 455270 }, { "epoch": 4.864362412522036, "grad_norm": 0.16678443551063538, "learning_rate": 6.755923569948046e-07, "loss": 0.009, "step": 455280 }, { "epoch": 4.864469255836316, "grad_norm": 5.6649580001831055, "learning_rate": 6.755766261471172e-07, "loss": 0.0186, "step": 455290 }, { "epoch": 4.864576099150596, "grad_norm": 0.10801441967487335, "learning_rate": 6.755608951011906e-07, "loss": 0.0049, "step": 455300 }, { "epoch": 4.864682942464875, "grad_norm": 4.611208915710449, "learning_rate": 6.755451638570426e-07, "loss": 0.0059, "step": 455310 }, { "epoch": 4.864789785779155, "grad_norm": 0.00439459877088666, "learning_rate": 6.755294324146909e-07, "loss": 0.0142, "step": 455320 }, { "epoch": 4.864896629093434, "grad_norm": 0.19522927701473236, "learning_rate": 6.755137007741529e-07, "loss": 0.0061, "step": 455330 }, { "epoch": 4.8650034724077145, "grad_norm": 0.48003721237182617, "learning_rate": 6.75497968935447e-07, "loss": 0.024, "step": 455340 }, { "epoch": 4.865110315721994, "grad_norm": 2.699113130569458, "learning_rate": 6.754822368985905e-07, "loss": 0.0026, "step": 455350 }, { "epoch": 4.865217159036273, "grad_norm": 0.013440961949527264, "learning_rate": 6.754665046636015e-07, "loss": 0.0082, "step": 455360 }, { "epoch": 4.865324002350553, "grad_norm": 0.023254672065377235, "learning_rate": 6.754507722304974e-07, "loss": 0.0152, "step": 455370 }, { "epoch": 4.865430845664832, "grad_norm": 0.03018411248922348, "learning_rate": 6.75435039599296e-07, "loss": 0.017, "step": 455380 }, { "epoch": 4.865537688979112, "grad_norm": 1.7887847423553467, "learning_rate": 6.754193067700156e-07, "loss": 0.0095, "step": 455390 }, { "epoch": 4.865644532293392, "grad_norm": 0.02730376273393631, "learning_rate": 6.754035737426733e-07, "loss": 0.0175, "step": 455400 }, { "epoch": 4.865751375607672, "grad_norm": 0.04469390586018562, "learning_rate": 6.75387840517287e-07, "loss": 0.0026, "step": 455410 }, { "epoch": 4.865858218921951, "grad_norm": 0.01790493167936802, "learning_rate": 6.753721070938749e-07, "loss": 0.0129, "step": 455420 }, { "epoch": 4.8659650622362305, "grad_norm": 0.009741301648318768, "learning_rate": 6.753563734724543e-07, "loss": 0.0079, "step": 455430 }, { "epoch": 4.86607190555051, "grad_norm": 0.004429832566529512, "learning_rate": 6.753406396530431e-07, "loss": 0.0041, "step": 455440 }, { "epoch": 4.866178748864789, "grad_norm": 0.19180044531822205, "learning_rate": 6.753249056356591e-07, "loss": 0.0028, "step": 455450 }, { "epoch": 4.86628559217907, "grad_norm": 1.4617271423339844, "learning_rate": 6.753091714203201e-07, "loss": 0.0075, "step": 455460 }, { "epoch": 4.866392435493349, "grad_norm": 1.5235862731933594, "learning_rate": 6.752934370070437e-07, "loss": 0.0041, "step": 455470 }, { "epoch": 4.866499278807629, "grad_norm": 0.015276850201189518, "learning_rate": 6.752777023958481e-07, "loss": 0.001, "step": 455480 }, { "epoch": 4.866606122121908, "grad_norm": 0.09662597626447678, "learning_rate": 6.752619675867503e-07, "loss": 0.0031, "step": 455490 }, { "epoch": 4.866712965436188, "grad_norm": 0.0008088240283541381, "learning_rate": 6.752462325797688e-07, "loss": 0.0217, "step": 455500 }, { "epoch": 4.866819808750467, "grad_norm": 1.3160319328308105, "learning_rate": 6.75230497374921e-07, "loss": 0.0022, "step": 455510 }, { "epoch": 4.866926652064747, "grad_norm": 2.735076427459717, "learning_rate": 6.752147619722247e-07, "loss": 0.0022, "step": 455520 }, { "epoch": 4.867033495379027, "grad_norm": 0.5264562368392944, "learning_rate": 6.751990263716978e-07, "loss": 0.0278, "step": 455530 }, { "epoch": 4.867140338693306, "grad_norm": 2.8292949199676514, "learning_rate": 6.751832905733578e-07, "loss": 0.0012, "step": 455540 }, { "epoch": 4.867247182007586, "grad_norm": 0.0423710010945797, "learning_rate": 6.751675545772227e-07, "loss": 0.0136, "step": 455550 }, { "epoch": 4.867354025321865, "grad_norm": 1.3857572078704834, "learning_rate": 6.751518183833104e-07, "loss": 0.0041, "step": 455560 }, { "epoch": 4.867460868636146, "grad_norm": 0.0009143765782937407, "learning_rate": 6.751360819916384e-07, "loss": 0.002, "step": 455570 }, { "epoch": 4.867567711950425, "grad_norm": 0.13702529668807983, "learning_rate": 6.751203454022244e-07, "loss": 0.0035, "step": 455580 }, { "epoch": 4.8676745552647045, "grad_norm": 2.500288248062134, "learning_rate": 6.751046086150864e-07, "loss": 0.0059, "step": 455590 }, { "epoch": 4.867781398578984, "grad_norm": 0.011364047415554523, "learning_rate": 6.75088871630242e-07, "loss": 0.0015, "step": 455600 }, { "epoch": 4.867888241893263, "grad_norm": 0.8649647235870361, "learning_rate": 6.750731344477093e-07, "loss": 0.0151, "step": 455610 }, { "epoch": 4.867995085207543, "grad_norm": 0.033834367990493774, "learning_rate": 6.750573970675055e-07, "loss": 0.0017, "step": 455620 }, { "epoch": 4.868101928521822, "grad_norm": 2.977165699005127, "learning_rate": 6.75041659489649e-07, "loss": 0.0064, "step": 455630 }, { "epoch": 4.868208771836103, "grad_norm": 5.980941295623779, "learning_rate": 6.75025921714157e-07, "loss": 0.0044, "step": 455640 }, { "epoch": 4.868315615150382, "grad_norm": 0.015602614730596542, "learning_rate": 6.750101837410476e-07, "loss": 0.0918, "step": 455650 }, { "epoch": 4.868422458464662, "grad_norm": 0.4337296783924103, "learning_rate": 6.749944455703386e-07, "loss": 0.013, "step": 455660 }, { "epoch": 4.868529301778941, "grad_norm": 0.017608001828193665, "learning_rate": 6.749787072020475e-07, "loss": 0.0211, "step": 455670 }, { "epoch": 4.8686361450932205, "grad_norm": 3.685889959335327, "learning_rate": 6.749629686361924e-07, "loss": 0.0096, "step": 455680 }, { "epoch": 4.868742988407501, "grad_norm": 0.002821714151650667, "learning_rate": 6.749472298727908e-07, "loss": 0.0062, "step": 455690 }, { "epoch": 4.86884983172178, "grad_norm": 0.04352812469005585, "learning_rate": 6.749314909118605e-07, "loss": 0.0079, "step": 455700 }, { "epoch": 4.86895667503606, "grad_norm": 0.03252081945538521, "learning_rate": 6.749157517534196e-07, "loss": 0.0124, "step": 455710 }, { "epoch": 4.869063518350339, "grad_norm": 0.021647483110427856, "learning_rate": 6.749000123974854e-07, "loss": 0.0163, "step": 455720 }, { "epoch": 4.869170361664619, "grad_norm": 0.006556482054293156, "learning_rate": 6.748842728440759e-07, "loss": 0.0011, "step": 455730 }, { "epoch": 4.869277204978898, "grad_norm": 4.2071452140808105, "learning_rate": 6.748685330932089e-07, "loss": 0.0106, "step": 455740 }, { "epoch": 4.869384048293178, "grad_norm": 3.7096259593963623, "learning_rate": 6.748527931449021e-07, "loss": 0.0164, "step": 455750 }, { "epoch": 4.869490891607458, "grad_norm": 0.053776051849126816, "learning_rate": 6.748370529991733e-07, "loss": 0.0002, "step": 455760 }, { "epoch": 4.869597734921737, "grad_norm": 0.06613937765359879, "learning_rate": 6.748213126560404e-07, "loss": 0.0094, "step": 455770 }, { "epoch": 4.869704578236017, "grad_norm": 2.6310997009277344, "learning_rate": 6.748055721155208e-07, "loss": 0.0102, "step": 455780 }, { "epoch": 4.869811421550296, "grad_norm": 0.001716536469757557, "learning_rate": 6.747898313776326e-07, "loss": 0.002, "step": 455790 }, { "epoch": 4.869918264864576, "grad_norm": 0.022839030250906944, "learning_rate": 6.747740904423938e-07, "loss": 0.0094, "step": 455800 }, { "epoch": 4.870025108178856, "grad_norm": 0.020181920379400253, "learning_rate": 6.747583493098214e-07, "loss": 0.0002, "step": 455810 }, { "epoch": 4.870131951493136, "grad_norm": 0.0008999042911455035, "learning_rate": 6.747426079799338e-07, "loss": 0.002, "step": 455820 }, { "epoch": 4.870238794807415, "grad_norm": 3.553877830505371, "learning_rate": 6.747268664527488e-07, "loss": 0.0106, "step": 455830 }, { "epoch": 4.8703456381216945, "grad_norm": 0.8224462866783142, "learning_rate": 6.747111247282838e-07, "loss": 0.0026, "step": 455840 }, { "epoch": 4.870452481435974, "grad_norm": 0.02294239215552807, "learning_rate": 6.746953828065568e-07, "loss": 0.0012, "step": 455850 }, { "epoch": 4.870559324750253, "grad_norm": 2.665313720703125, "learning_rate": 6.746796406875856e-07, "loss": 0.0076, "step": 455860 }, { "epoch": 4.870666168064533, "grad_norm": 3.9358580112457275, "learning_rate": 6.746638983713877e-07, "loss": 0.0018, "step": 455870 }, { "epoch": 4.870773011378813, "grad_norm": 3.8414463996887207, "learning_rate": 6.746481558579812e-07, "loss": 0.0088, "step": 455880 }, { "epoch": 4.870879854693093, "grad_norm": 0.018606683239340782, "learning_rate": 6.746324131473836e-07, "loss": 0.0012, "step": 455890 }, { "epoch": 4.870986698007372, "grad_norm": 0.3578163981437683, "learning_rate": 6.74616670239613e-07, "loss": 0.0116, "step": 455900 }, { "epoch": 4.871093541321652, "grad_norm": 0.01566261798143387, "learning_rate": 6.74600927134687e-07, "loss": 0.0047, "step": 455910 }, { "epoch": 4.871200384635931, "grad_norm": 2.5193889141082764, "learning_rate": 6.745851838326233e-07, "loss": 0.0027, "step": 455920 }, { "epoch": 4.871307227950211, "grad_norm": 0.8284225463867188, "learning_rate": 6.745694403334398e-07, "loss": 0.0179, "step": 455930 }, { "epoch": 4.871414071264491, "grad_norm": 0.0005506773013621569, "learning_rate": 6.745536966371541e-07, "loss": 0.0095, "step": 455940 }, { "epoch": 4.87152091457877, "grad_norm": 0.9642342925071716, "learning_rate": 6.745379527437842e-07, "loss": 0.0126, "step": 455950 }, { "epoch": 4.87162775789305, "grad_norm": 2.9934115409851074, "learning_rate": 6.745222086533479e-07, "loss": 0.0088, "step": 455960 }, { "epoch": 4.871734601207329, "grad_norm": 0.04386972263455391, "learning_rate": 6.745064643658628e-07, "loss": 0.0085, "step": 455970 }, { "epoch": 4.871841444521609, "grad_norm": 0.08866403251886368, "learning_rate": 6.744907198813465e-07, "loss": 0.0055, "step": 455980 }, { "epoch": 4.871948287835888, "grad_norm": 6.744875431060791, "learning_rate": 6.744749751998173e-07, "loss": 0.0092, "step": 455990 }, { "epoch": 4.8720551311501685, "grad_norm": 3.7833316326141357, "learning_rate": 6.744592303212926e-07, "loss": 0.0175, "step": 456000 }, { "epoch": 4.872161974464448, "grad_norm": 0.577565610408783, "learning_rate": 6.744434852457902e-07, "loss": 0.0017, "step": 456010 }, { "epoch": 4.8722688177787274, "grad_norm": 0.006116913631558418, "learning_rate": 6.74427739973328e-07, "loss": 0.0098, "step": 456020 }, { "epoch": 4.872375661093007, "grad_norm": 1.3277554512023926, "learning_rate": 6.744119945039238e-07, "loss": 0.0017, "step": 456030 }, { "epoch": 4.872482504407286, "grad_norm": 12.490503311157227, "learning_rate": 6.743962488375951e-07, "loss": 0.0189, "step": 456040 }, { "epoch": 4.872589347721567, "grad_norm": 0.18331082165241241, "learning_rate": 6.743805029743598e-07, "loss": 0.0197, "step": 456050 }, { "epoch": 4.872696191035846, "grad_norm": 3.4772300720214844, "learning_rate": 6.743647569142361e-07, "loss": 0.0061, "step": 456060 }, { "epoch": 4.872803034350126, "grad_norm": 0.0008821883238852024, "learning_rate": 6.743490106572412e-07, "loss": 0.0035, "step": 456070 }, { "epoch": 4.872909877664405, "grad_norm": 0.1901867538690567, "learning_rate": 6.743332642033932e-07, "loss": 0.0006, "step": 456080 }, { "epoch": 4.8730167209786845, "grad_norm": 0.0656425803899765, "learning_rate": 6.743175175527097e-07, "loss": 0.0034, "step": 456090 }, { "epoch": 4.873123564292964, "grad_norm": 0.004246129654347897, "learning_rate": 6.743017707052085e-07, "loss": 0.007, "step": 456100 }, { "epoch": 4.873230407607244, "grad_norm": 0.007879235781729221, "learning_rate": 6.742860236609076e-07, "loss": 0.0437, "step": 456110 }, { "epoch": 4.873337250921524, "grad_norm": 0.4744797348976135, "learning_rate": 6.742702764198247e-07, "loss": 0.0002, "step": 456120 }, { "epoch": 4.873444094235803, "grad_norm": 7.17183256149292, "learning_rate": 6.742545289819772e-07, "loss": 0.015, "step": 456130 }, { "epoch": 4.873550937550083, "grad_norm": 13.645087242126465, "learning_rate": 6.742387813473835e-07, "loss": 0.1026, "step": 456140 }, { "epoch": 4.873657780864362, "grad_norm": 4.208194732666016, "learning_rate": 6.742230335160608e-07, "loss": 0.0287, "step": 456150 }, { "epoch": 4.873764624178642, "grad_norm": 1.1184862852096558, "learning_rate": 6.742072854880273e-07, "loss": 0.0009, "step": 456160 }, { "epoch": 4.873871467492922, "grad_norm": 0.13666096329689026, "learning_rate": 6.741915372633007e-07, "loss": 0.0074, "step": 456170 }, { "epoch": 4.8739783108072015, "grad_norm": 0.0017996010137721896, "learning_rate": 6.741757888418985e-07, "loss": 0.0027, "step": 456180 }, { "epoch": 4.874085154121481, "grad_norm": 4.641212463378906, "learning_rate": 6.741600402238389e-07, "loss": 0.0014, "step": 456190 }, { "epoch": 4.87419199743576, "grad_norm": 0.06152300164103508, "learning_rate": 6.741442914091393e-07, "loss": 0.0042, "step": 456200 }, { "epoch": 4.87429884075004, "grad_norm": 0.0012161564081907272, "learning_rate": 6.741285423978177e-07, "loss": 0.0187, "step": 456210 }, { "epoch": 4.874405684064319, "grad_norm": 0.005574262700974941, "learning_rate": 6.741127931898919e-07, "loss": 0.0199, "step": 456220 }, { "epoch": 4.8745125273786, "grad_norm": 0.008232307620346546, "learning_rate": 6.740970437853794e-07, "loss": 0.0044, "step": 456230 }, { "epoch": 4.874619370692879, "grad_norm": 0.05019694194197655, "learning_rate": 6.740812941842985e-07, "loss": 0.0098, "step": 456240 }, { "epoch": 4.8747262140071586, "grad_norm": 6.001110076904297, "learning_rate": 6.740655443866665e-07, "loss": 0.0257, "step": 456250 }, { "epoch": 4.874833057321438, "grad_norm": 0.005467223934829235, "learning_rate": 6.740497943925014e-07, "loss": 0.0384, "step": 456260 }, { "epoch": 4.8749399006357175, "grad_norm": 0.041250865906476974, "learning_rate": 6.740340442018209e-07, "loss": 0.0009, "step": 456270 }, { "epoch": 4.875046743949998, "grad_norm": 2.052417516708374, "learning_rate": 6.740182938146429e-07, "loss": 0.0024, "step": 456280 }, { "epoch": 4.875153587264277, "grad_norm": 5.3479108810424805, "learning_rate": 6.740025432309852e-07, "loss": 0.0033, "step": 456290 }, { "epoch": 4.875260430578557, "grad_norm": 0.003156891092658043, "learning_rate": 6.739867924508653e-07, "loss": 0.0129, "step": 456300 }, { "epoch": 4.875367273892836, "grad_norm": 0.6329383850097656, "learning_rate": 6.739710414743013e-07, "loss": 0.004, "step": 456310 }, { "epoch": 4.875474117207116, "grad_norm": 0.4492132365703583, "learning_rate": 6.739552903013108e-07, "loss": 0.0042, "step": 456320 }, { "epoch": 4.875580960521395, "grad_norm": 13.929765701293945, "learning_rate": 6.739395389319117e-07, "loss": 0.0128, "step": 456330 }, { "epoch": 4.875687803835675, "grad_norm": 5.5147881507873535, "learning_rate": 6.739237873661217e-07, "loss": 0.0068, "step": 456340 }, { "epoch": 4.875794647149955, "grad_norm": 0.02841542847454548, "learning_rate": 6.739080356039586e-07, "loss": 0.0002, "step": 456350 }, { "epoch": 4.875901490464234, "grad_norm": 0.06267277151346207, "learning_rate": 6.738922836454402e-07, "loss": 0.0071, "step": 456360 }, { "epoch": 4.876008333778514, "grad_norm": 0.0631989911198616, "learning_rate": 6.738765314905844e-07, "loss": 0.0027, "step": 456370 }, { "epoch": 4.876115177092793, "grad_norm": 0.003013916313648224, "learning_rate": 6.738607791394088e-07, "loss": 0.0024, "step": 456380 }, { "epoch": 4.876222020407073, "grad_norm": 0.6290614008903503, "learning_rate": 6.738450265919314e-07, "loss": 0.0035, "step": 456390 }, { "epoch": 4.876328863721353, "grad_norm": 2.6833252906799316, "learning_rate": 6.738292738481697e-07, "loss": 0.0033, "step": 456400 }, { "epoch": 4.876435707035633, "grad_norm": 0.0015058956341817975, "learning_rate": 6.738135209081414e-07, "loss": 0.0014, "step": 456410 }, { "epoch": 4.876542550349912, "grad_norm": 0.002497462322935462, "learning_rate": 6.737977677718649e-07, "loss": 0.0171, "step": 456420 }, { "epoch": 4.8766493936641915, "grad_norm": 0.0017673063557595015, "learning_rate": 6.737820144393575e-07, "loss": 0.0141, "step": 456430 }, { "epoch": 4.876756236978471, "grad_norm": 0.008036821149289608, "learning_rate": 6.737662609106369e-07, "loss": 0.0124, "step": 456440 }, { "epoch": 4.87686308029275, "grad_norm": 0.12435474246740341, "learning_rate": 6.737505071857212e-07, "loss": 0.0023, "step": 456450 }, { "epoch": 4.87696992360703, "grad_norm": 0.006892719771713018, "learning_rate": 6.737347532646282e-07, "loss": 0.0132, "step": 456460 }, { "epoch": 4.87707676692131, "grad_norm": 0.3317451477050781, "learning_rate": 6.737189991473754e-07, "loss": 0.0134, "step": 456470 }, { "epoch": 4.87718361023559, "grad_norm": 0.005006308667361736, "learning_rate": 6.737032448339808e-07, "loss": 0.0019, "step": 456480 }, { "epoch": 4.877290453549869, "grad_norm": 0.7688479423522949, "learning_rate": 6.736874903244622e-07, "loss": 0.0005, "step": 456490 }, { "epoch": 4.877397296864149, "grad_norm": 0.0004896035534329712, "learning_rate": 6.73671735618837e-07, "loss": 0.0136, "step": 456500 }, { "epoch": 4.877504140178428, "grad_norm": 2.57696795463562, "learning_rate": 6.736559807171236e-07, "loss": 0.0092, "step": 456510 }, { "epoch": 4.877610983492708, "grad_norm": 0.17772898077964783, "learning_rate": 6.736402256193394e-07, "loss": 0.0045, "step": 456520 }, { "epoch": 4.877717826806988, "grad_norm": 0.44630858302116394, "learning_rate": 6.736244703255023e-07, "loss": 0.0017, "step": 456530 }, { "epoch": 4.877824670121267, "grad_norm": 0.0008652384276501834, "learning_rate": 6.736087148356299e-07, "loss": 0.0277, "step": 456540 }, { "epoch": 4.877931513435547, "grad_norm": 0.004567873198539019, "learning_rate": 6.735929591497405e-07, "loss": 0.0104, "step": 456550 }, { "epoch": 4.878038356749826, "grad_norm": 0.00208710297010839, "learning_rate": 6.735772032678513e-07, "loss": 0.0019, "step": 456560 }, { "epoch": 4.878145200064106, "grad_norm": 0.053331006318330765, "learning_rate": 6.735614471899804e-07, "loss": 0.0051, "step": 456570 }, { "epoch": 4.878252043378385, "grad_norm": 0.05017879977822304, "learning_rate": 6.735456909161455e-07, "loss": 0.017, "step": 456580 }, { "epoch": 4.8783588866926655, "grad_norm": 0.03414461016654968, "learning_rate": 6.735299344463644e-07, "loss": 0.0006, "step": 456590 }, { "epoch": 4.878465730006945, "grad_norm": 0.35169491171836853, "learning_rate": 6.73514177780655e-07, "loss": 0.0065, "step": 456600 }, { "epoch": 4.878572573321224, "grad_norm": 6.776297092437744, "learning_rate": 6.734984209190349e-07, "loss": 0.0063, "step": 456610 }, { "epoch": 4.878679416635504, "grad_norm": 0.03740088269114494, "learning_rate": 6.734826638615221e-07, "loss": 0.0122, "step": 456620 }, { "epoch": 4.878786259949783, "grad_norm": 0.0006592453573830426, "learning_rate": 6.734669066081342e-07, "loss": 0.0062, "step": 456630 }, { "epoch": 4.878893103264064, "grad_norm": 42.89207077026367, "learning_rate": 6.73451149158889e-07, "loss": 0.0141, "step": 456640 }, { "epoch": 4.878999946578343, "grad_norm": 0.008031617850065231, "learning_rate": 6.734353915138045e-07, "loss": 0.0065, "step": 456650 }, { "epoch": 4.879106789892623, "grad_norm": 3.960076332092285, "learning_rate": 6.734196336728984e-07, "loss": 0.0054, "step": 456660 }, { "epoch": 4.879213633206902, "grad_norm": 0.39484044909477234, "learning_rate": 6.734038756361882e-07, "loss": 0.0002, "step": 456670 }, { "epoch": 4.8793204765211815, "grad_norm": 0.29226037859916687, "learning_rate": 6.733881174036922e-07, "loss": 0.0196, "step": 456680 }, { "epoch": 4.879427319835461, "grad_norm": 0.03259793296456337, "learning_rate": 6.733723589754278e-07, "loss": 0.0242, "step": 456690 }, { "epoch": 4.87953416314974, "grad_norm": 0.040886279195547104, "learning_rate": 6.733566003514129e-07, "loss": 0.008, "step": 456700 }, { "epoch": 4.879641006464021, "grad_norm": 0.007584180682897568, "learning_rate": 6.733408415316653e-07, "loss": 0.0173, "step": 456710 }, { "epoch": 4.8797478497783, "grad_norm": 0.005887662526220083, "learning_rate": 6.733250825162027e-07, "loss": 0.0179, "step": 456720 }, { "epoch": 4.87985469309258, "grad_norm": 0.13597267866134644, "learning_rate": 6.733093233050433e-07, "loss": 0.0016, "step": 456730 }, { "epoch": 4.879961536406859, "grad_norm": 0.5216678977012634, "learning_rate": 6.732935638982045e-07, "loss": 0.0481, "step": 456740 }, { "epoch": 4.880068379721139, "grad_norm": 0.0032151241321116686, "learning_rate": 6.732778042957041e-07, "loss": 0.0013, "step": 456750 }, { "epoch": 4.880175223035419, "grad_norm": 0.22885927557945251, "learning_rate": 6.732620444975601e-07, "loss": 0.0021, "step": 456760 }, { "epoch": 4.880282066349698, "grad_norm": 0.9180298447608948, "learning_rate": 6.7324628450379e-07, "loss": 0.0041, "step": 456770 }, { "epoch": 4.880388909663978, "grad_norm": 0.09468422830104828, "learning_rate": 6.732305243144119e-07, "loss": 0.0088, "step": 456780 }, { "epoch": 4.880495752978257, "grad_norm": 0.9384093880653381, "learning_rate": 6.732147639294435e-07, "loss": 0.0015, "step": 456790 }, { "epoch": 4.880602596292537, "grad_norm": 2.7623159885406494, "learning_rate": 6.731990033489025e-07, "loss": 0.0181, "step": 456800 }, { "epoch": 4.880709439606816, "grad_norm": 0.0011268462985754013, "learning_rate": 6.731832425728065e-07, "loss": 0.0027, "step": 456810 }, { "epoch": 4.880816282921097, "grad_norm": 0.0008791251457296312, "learning_rate": 6.731674816011739e-07, "loss": 0.0179, "step": 456820 }, { "epoch": 4.880923126235376, "grad_norm": 1.1130508184432983, "learning_rate": 6.731517204340222e-07, "loss": 0.024, "step": 456830 }, { "epoch": 4.8810299695496555, "grad_norm": 2.5212361812591553, "learning_rate": 6.731359590713687e-07, "loss": 0.0052, "step": 456840 }, { "epoch": 4.881136812863935, "grad_norm": 0.013637744821608067, "learning_rate": 6.73120197513232e-07, "loss": 0.0103, "step": 456850 }, { "epoch": 4.881243656178214, "grad_norm": 3.147125005722046, "learning_rate": 6.731044357596294e-07, "loss": 0.0041, "step": 456860 }, { "epoch": 4.881350499492494, "grad_norm": 3.086911201477051, "learning_rate": 6.730886738105789e-07, "loss": 0.0117, "step": 456870 }, { "epoch": 4.881457342806774, "grad_norm": 0.0016187028959393501, "learning_rate": 6.730729116660982e-07, "loss": 0.0146, "step": 456880 }, { "epoch": 4.881564186121054, "grad_norm": 0.02064945548772812, "learning_rate": 6.73057149326205e-07, "loss": 0.0116, "step": 456890 }, { "epoch": 4.881671029435333, "grad_norm": 0.027174238115549088, "learning_rate": 6.730413867909173e-07, "loss": 0.0007, "step": 456900 }, { "epoch": 4.881777872749613, "grad_norm": 4.274805545806885, "learning_rate": 6.730256240602529e-07, "loss": 0.0062, "step": 456910 }, { "epoch": 4.881884716063892, "grad_norm": 3.9154138565063477, "learning_rate": 6.730098611342295e-07, "loss": 0.0272, "step": 456920 }, { "epoch": 4.8819915593781715, "grad_norm": 0.0087501909583807, "learning_rate": 6.729940980128648e-07, "loss": 0.012, "step": 456930 }, { "epoch": 4.882098402692452, "grad_norm": 0.007053873501718044, "learning_rate": 6.729783346961768e-07, "loss": 0.0036, "step": 456940 }, { "epoch": 4.882205246006731, "grad_norm": 0.37823083996772766, "learning_rate": 6.729625711841831e-07, "loss": 0.0916, "step": 456950 }, { "epoch": 4.882312089321011, "grad_norm": 0.8893886804580688, "learning_rate": 6.729468074769017e-07, "loss": 0.0019, "step": 456960 }, { "epoch": 4.88241893263529, "grad_norm": 0.011836082674562931, "learning_rate": 6.729310435743503e-07, "loss": 0.0013, "step": 456970 }, { "epoch": 4.88252577594957, "grad_norm": 0.009253228083252907, "learning_rate": 6.729152794765466e-07, "loss": 0.0154, "step": 456980 }, { "epoch": 4.88263261926385, "grad_norm": 0.009532955475151539, "learning_rate": 6.728995151835086e-07, "loss": 0.0009, "step": 456990 }, { "epoch": 4.8827394625781295, "grad_norm": 5.156250953674316, "learning_rate": 6.72883750695254e-07, "loss": 0.0183, "step": 457000 }, { "epoch": 4.882846305892409, "grad_norm": 0.018227213993668556, "learning_rate": 6.728679860118005e-07, "loss": 0.0046, "step": 457010 }, { "epoch": 4.882953149206688, "grad_norm": 1.6430344581604004, "learning_rate": 6.728522211331662e-07, "loss": 0.0084, "step": 457020 }, { "epoch": 4.883059992520968, "grad_norm": 0.33371052145957947, "learning_rate": 6.728364560593686e-07, "loss": 0.0046, "step": 457030 }, { "epoch": 4.883166835835247, "grad_norm": 0.02889767289161682, "learning_rate": 6.728206907904254e-07, "loss": 0.0095, "step": 457040 }, { "epoch": 4.883273679149527, "grad_norm": 1.5888800621032715, "learning_rate": 6.728049253263548e-07, "loss": 0.0089, "step": 457050 }, { "epoch": 4.883380522463807, "grad_norm": 0.024747811257839203, "learning_rate": 6.727891596671744e-07, "loss": 0.0344, "step": 457060 }, { "epoch": 4.883487365778087, "grad_norm": 0.018433166667819023, "learning_rate": 6.727733938129018e-07, "loss": 0.0023, "step": 457070 }, { "epoch": 4.883594209092366, "grad_norm": 0.01851922646164894, "learning_rate": 6.727576277635553e-07, "loss": 0.0028, "step": 457080 }, { "epoch": 4.8837010524066455, "grad_norm": 0.0024624925572425127, "learning_rate": 6.727418615191523e-07, "loss": 0.0004, "step": 457090 }, { "epoch": 4.883807895720925, "grad_norm": 0.0015932372771203518, "learning_rate": 6.727260950797106e-07, "loss": 0.0002, "step": 457100 }, { "epoch": 4.883914739035205, "grad_norm": 0.002422564197331667, "learning_rate": 6.727103284452481e-07, "loss": 0.0473, "step": 457110 }, { "epoch": 4.884021582349485, "grad_norm": 1.5502841472625732, "learning_rate": 6.726945616157827e-07, "loss": 0.0086, "step": 457120 }, { "epoch": 4.884128425663764, "grad_norm": 0.004444852937012911, "learning_rate": 6.72678794591332e-07, "loss": 0.001, "step": 457130 }, { "epoch": 4.884235268978044, "grad_norm": 0.0002732922730501741, "learning_rate": 6.72663027371914e-07, "loss": 0.0075, "step": 457140 }, { "epoch": 4.884342112292323, "grad_norm": 0.0004997472860850394, "learning_rate": 6.726472599575463e-07, "loss": 0.0014, "step": 457150 }, { "epoch": 4.884448955606603, "grad_norm": 0.006079154089093208, "learning_rate": 6.72631492348247e-07, "loss": 0.0121, "step": 457160 }, { "epoch": 4.884555798920882, "grad_norm": 0.030739882960915565, "learning_rate": 6.726157245440335e-07, "loss": 0.0046, "step": 457170 }, { "epoch": 4.8846626422351624, "grad_norm": 0.002977279946208, "learning_rate": 6.72599956544924e-07, "loss": 0.005, "step": 457180 }, { "epoch": 4.884769485549442, "grad_norm": 2.4765970706939697, "learning_rate": 6.72584188350936e-07, "loss": 0.0026, "step": 457190 }, { "epoch": 4.884876328863721, "grad_norm": 1.1080613136291504, "learning_rate": 6.725684199620875e-07, "loss": 0.0041, "step": 457200 }, { "epoch": 4.884983172178001, "grad_norm": 0.003387236036360264, "learning_rate": 6.725526513783961e-07, "loss": 0.0038, "step": 457210 }, { "epoch": 4.88509001549228, "grad_norm": 0.025113189592957497, "learning_rate": 6.725368825998798e-07, "loss": 0.01, "step": 457220 }, { "epoch": 4.885196858806561, "grad_norm": 0.0179038904607296, "learning_rate": 6.725211136265565e-07, "loss": 0.0045, "step": 457230 }, { "epoch": 4.88530370212084, "grad_norm": 8.434552192687988, "learning_rate": 6.725053444584437e-07, "loss": 0.0068, "step": 457240 }, { "epoch": 4.8854105454351195, "grad_norm": 0.0028679820243269205, "learning_rate": 6.724895750955594e-07, "loss": 0.0099, "step": 457250 }, { "epoch": 4.885517388749399, "grad_norm": 0.055859316140413284, "learning_rate": 6.724738055379214e-07, "loss": 0.0053, "step": 457260 }, { "epoch": 4.8856242320636785, "grad_norm": 0.914054811000824, "learning_rate": 6.724580357855473e-07, "loss": 0.0033, "step": 457270 }, { "epoch": 4.885731075377958, "grad_norm": 0.19311097264289856, "learning_rate": 6.724422658384551e-07, "loss": 0.0067, "step": 457280 }, { "epoch": 4.885837918692237, "grad_norm": 4.3777666091918945, "learning_rate": 6.724264956966627e-07, "loss": 0.0032, "step": 457290 }, { "epoch": 4.885944762006518, "grad_norm": 3.270504951477051, "learning_rate": 6.724107253601876e-07, "loss": 0.0139, "step": 457300 }, { "epoch": 4.886051605320797, "grad_norm": 0.022753944620490074, "learning_rate": 6.723949548290478e-07, "loss": 0.0176, "step": 457310 }, { "epoch": 4.886158448635077, "grad_norm": 10.965882301330566, "learning_rate": 6.723791841032611e-07, "loss": 0.0115, "step": 457320 }, { "epoch": 4.886265291949356, "grad_norm": 0.04747278615832329, "learning_rate": 6.723634131828454e-07, "loss": 0.0003, "step": 457330 }, { "epoch": 4.886372135263636, "grad_norm": 0.0038283250760287046, "learning_rate": 6.723476420678184e-07, "loss": 0.0003, "step": 457340 }, { "epoch": 4.886478978577916, "grad_norm": 0.7095466256141663, "learning_rate": 6.723318707581977e-07, "loss": 0.0221, "step": 457350 }, { "epoch": 4.886585821892195, "grad_norm": 0.7583574652671814, "learning_rate": 6.723160992540015e-07, "loss": 0.003, "step": 457360 }, { "epoch": 4.886692665206475, "grad_norm": 0.014241158030927181, "learning_rate": 6.723003275552475e-07, "loss": 0.0023, "step": 457370 }, { "epoch": 4.886799508520754, "grad_norm": 2.322338104248047, "learning_rate": 6.722845556619532e-07, "loss": 0.0084, "step": 457380 }, { "epoch": 4.886906351835034, "grad_norm": 0.010648318566381931, "learning_rate": 6.722687835741368e-07, "loss": 0.0017, "step": 457390 }, { "epoch": 4.887013195149313, "grad_norm": 0.016027506440877914, "learning_rate": 6.722530112918158e-07, "loss": 0.0006, "step": 457400 }, { "epoch": 4.887120038463593, "grad_norm": 14.43685245513916, "learning_rate": 6.722372388150082e-07, "loss": 0.0024, "step": 457410 }, { "epoch": 4.887226881777873, "grad_norm": 0.016203630715608597, "learning_rate": 6.722214661437318e-07, "loss": 0.0055, "step": 457420 }, { "epoch": 4.8873337250921525, "grad_norm": 0.21617791056632996, "learning_rate": 6.722056932780044e-07, "loss": 0.0139, "step": 457430 }, { "epoch": 4.887440568406432, "grad_norm": 1.428834319114685, "learning_rate": 6.721899202178436e-07, "loss": 0.0051, "step": 457440 }, { "epoch": 4.887547411720711, "grad_norm": 0.012962422333657742, "learning_rate": 6.721741469632676e-07, "loss": 0.011, "step": 457450 }, { "epoch": 4.887654255034991, "grad_norm": 0.03048856183886528, "learning_rate": 6.721583735142938e-07, "loss": 0.004, "step": 457460 }, { "epoch": 4.887761098349271, "grad_norm": 0.012713815085589886, "learning_rate": 6.721425998709403e-07, "loss": 0.0011, "step": 457470 }, { "epoch": 4.887867941663551, "grad_norm": 0.006535911932587624, "learning_rate": 6.721268260332248e-07, "loss": 0.013, "step": 457480 }, { "epoch": 4.88797478497783, "grad_norm": 10.42856502532959, "learning_rate": 6.72111052001165e-07, "loss": 0.0578, "step": 457490 }, { "epoch": 4.88808162829211, "grad_norm": 0.0019395584240555763, "learning_rate": 6.72095277774779e-07, "loss": 0.0135, "step": 457500 }, { "epoch": 4.888188471606389, "grad_norm": 3.4014174938201904, "learning_rate": 6.720795033540844e-07, "loss": 0.0075, "step": 457510 }, { "epoch": 4.8882953149206685, "grad_norm": 11.114934921264648, "learning_rate": 6.720637287390992e-07, "loss": 0.008, "step": 457520 }, { "epoch": 4.888402158234949, "grad_norm": 0.12818464636802673, "learning_rate": 6.720479539298407e-07, "loss": 0.0118, "step": 457530 }, { "epoch": 4.888509001549228, "grad_norm": 4.247567176818848, "learning_rate": 6.720321789263273e-07, "loss": 0.0076, "step": 457540 }, { "epoch": 4.888615844863508, "grad_norm": 3.73995304107666, "learning_rate": 6.720164037285766e-07, "loss": 0.0453, "step": 457550 }, { "epoch": 4.888722688177787, "grad_norm": 0.3202334940433502, "learning_rate": 6.720006283366064e-07, "loss": 0.0063, "step": 457560 }, { "epoch": 4.888829531492067, "grad_norm": 0.008147191256284714, "learning_rate": 6.719848527504345e-07, "loss": 0.0044, "step": 457570 }, { "epoch": 4.888936374806346, "grad_norm": 0.061886776238679886, "learning_rate": 6.719690769700788e-07, "loss": 0.0345, "step": 457580 }, { "epoch": 4.8890432181206265, "grad_norm": 4.652193546295166, "learning_rate": 6.719533009955569e-07, "loss": 0.0125, "step": 457590 }, { "epoch": 4.889150061434906, "grad_norm": 0.02458258904516697, "learning_rate": 6.719375248268868e-07, "loss": 0.0035, "step": 457600 }, { "epoch": 4.889256904749185, "grad_norm": 2.418045997619629, "learning_rate": 6.719217484640863e-07, "loss": 0.0062, "step": 457610 }, { "epoch": 4.889363748063465, "grad_norm": 5.292072296142578, "learning_rate": 6.719059719071731e-07, "loss": 0.0045, "step": 457620 }, { "epoch": 4.889470591377744, "grad_norm": 1.7324002981185913, "learning_rate": 6.718901951561651e-07, "loss": 0.023, "step": 457630 }, { "epoch": 4.889577434692024, "grad_norm": 0.01016316283494234, "learning_rate": 6.7187441821108e-07, "loss": 0.0019, "step": 457640 }, { "epoch": 4.889684278006304, "grad_norm": 0.07005424052476883, "learning_rate": 6.718586410719358e-07, "loss": 0.0052, "step": 457650 }, { "epoch": 4.889791121320584, "grad_norm": 0.004222271032631397, "learning_rate": 6.718428637387503e-07, "loss": 0.0125, "step": 457660 }, { "epoch": 4.889897964634863, "grad_norm": 0.33052709698677063, "learning_rate": 6.718270862115412e-07, "loss": 0.033, "step": 457670 }, { "epoch": 4.8900048079491425, "grad_norm": 0.6059083938598633, "learning_rate": 6.718113084903263e-07, "loss": 0.0101, "step": 457680 }, { "epoch": 4.890111651263422, "grad_norm": 0.05695558339357376, "learning_rate": 6.717955305751234e-07, "loss": 0.0109, "step": 457690 }, { "epoch": 4.890218494577701, "grad_norm": 5.58029317855835, "learning_rate": 6.717797524659505e-07, "loss": 0.0016, "step": 457700 }, { "epoch": 4.890325337891982, "grad_norm": 0.06562985479831696, "learning_rate": 6.717639741628253e-07, "loss": 0.0037, "step": 457710 }, { "epoch": 4.890432181206261, "grad_norm": 0.00885202270001173, "learning_rate": 6.717481956657654e-07, "loss": 0.0205, "step": 457720 }, { "epoch": 4.890539024520541, "grad_norm": 3.802746295928955, "learning_rate": 6.717324169747891e-07, "loss": 0.0065, "step": 457730 }, { "epoch": 4.89064586783482, "grad_norm": 0.19669824838638306, "learning_rate": 6.717166380899139e-07, "loss": 0.0073, "step": 457740 }, { "epoch": 4.8907527111491, "grad_norm": 0.3460542857646942, "learning_rate": 6.717008590111574e-07, "loss": 0.0181, "step": 457750 }, { "epoch": 4.890859554463379, "grad_norm": 0.008158017881214619, "learning_rate": 6.71685079738538e-07, "loss": 0.0079, "step": 457760 }, { "epoch": 4.890966397777659, "grad_norm": 2.2298583984375, "learning_rate": 6.716693002720731e-07, "loss": 0.0031, "step": 457770 }, { "epoch": 4.891073241091939, "grad_norm": 0.8906246423721313, "learning_rate": 6.716535206117805e-07, "loss": 0.0092, "step": 457780 }, { "epoch": 4.891180084406218, "grad_norm": 8.921897888183594, "learning_rate": 6.716377407576782e-07, "loss": 0.0157, "step": 457790 }, { "epoch": 4.891286927720498, "grad_norm": 2.253476858139038, "learning_rate": 6.716219607097838e-07, "loss": 0.0025, "step": 457800 }, { "epoch": 4.891393771034777, "grad_norm": 1.3745695352554321, "learning_rate": 6.716061804681154e-07, "loss": 0.0074, "step": 457810 }, { "epoch": 4.891500614349058, "grad_norm": 0.022238461300730705, "learning_rate": 6.715904000326907e-07, "loss": 0.0007, "step": 457820 }, { "epoch": 4.891607457663337, "grad_norm": 0.003619246883317828, "learning_rate": 6.715746194035273e-07, "loss": 0.0216, "step": 457830 }, { "epoch": 4.8917143009776165, "grad_norm": 4.052999496459961, "learning_rate": 6.715588385806433e-07, "loss": 0.0211, "step": 457840 }, { "epoch": 4.891821144291896, "grad_norm": 1.0186874866485596, "learning_rate": 6.715430575640565e-07, "loss": 0.0086, "step": 457850 }, { "epoch": 4.891927987606175, "grad_norm": 0.011997881345450878, "learning_rate": 6.715272763537846e-07, "loss": 0.0201, "step": 457860 }, { "epoch": 4.892034830920455, "grad_norm": 0.8169986605644226, "learning_rate": 6.715114949498454e-07, "loss": 0.0018, "step": 457870 }, { "epoch": 4.892141674234734, "grad_norm": 0.0006924713379703462, "learning_rate": 6.714957133522568e-07, "loss": 0.0047, "step": 457880 }, { "epoch": 4.892248517549015, "grad_norm": 0.004233525600284338, "learning_rate": 6.714799315610367e-07, "loss": 0.0073, "step": 457890 }, { "epoch": 4.892355360863294, "grad_norm": 0.08467310667037964, "learning_rate": 6.714641495762027e-07, "loss": 0.005, "step": 457900 }, { "epoch": 4.892462204177574, "grad_norm": 0.05285712704062462, "learning_rate": 6.714483673977728e-07, "loss": 0.0042, "step": 457910 }, { "epoch": 4.892569047491853, "grad_norm": 0.010256355628371239, "learning_rate": 6.714325850257647e-07, "loss": 0.0069, "step": 457920 }, { "epoch": 4.8926758908061325, "grad_norm": 0.04469633474946022, "learning_rate": 6.714168024601963e-07, "loss": 0.016, "step": 457930 }, { "epoch": 4.892782734120413, "grad_norm": 0.005127412732690573, "learning_rate": 6.714010197010854e-07, "loss": 0.0035, "step": 457940 }, { "epoch": 4.892889577434692, "grad_norm": 0.0160788893699646, "learning_rate": 6.713852367484498e-07, "loss": 0.0509, "step": 457950 }, { "epoch": 4.892996420748972, "grad_norm": 6.482046127319336, "learning_rate": 6.713694536023073e-07, "loss": 0.0104, "step": 457960 }, { "epoch": 4.893103264063251, "grad_norm": 12.125728607177734, "learning_rate": 6.713536702626758e-07, "loss": 0.0195, "step": 457970 }, { "epoch": 4.893210107377531, "grad_norm": 0.017087485641241074, "learning_rate": 6.71337886729573e-07, "loss": 0.0324, "step": 457980 }, { "epoch": 4.89331695069181, "grad_norm": 7.232852935791016, "learning_rate": 6.71322103003017e-07, "loss": 0.0125, "step": 457990 }, { "epoch": 4.89342379400609, "grad_norm": 6.089878082275391, "learning_rate": 6.713063190830253e-07, "loss": 0.0133, "step": 458000 }, { "epoch": 4.89353063732037, "grad_norm": 0.02509923279285431, "learning_rate": 6.712905349696158e-07, "loss": 0.0012, "step": 458010 }, { "epoch": 4.893637480634649, "grad_norm": 4.234316349029541, "learning_rate": 6.712747506628063e-07, "loss": 0.0005, "step": 458020 }, { "epoch": 4.893744323948929, "grad_norm": 0.010018621571362019, "learning_rate": 6.712589661626148e-07, "loss": 0.0007, "step": 458030 }, { "epoch": 4.893851167263208, "grad_norm": 0.0026208502240478992, "learning_rate": 6.712431814690589e-07, "loss": 0.0004, "step": 458040 }, { "epoch": 4.893958010577488, "grad_norm": 0.00132838380523026, "learning_rate": 6.712273965821565e-07, "loss": 0.0056, "step": 458050 }, { "epoch": 4.894064853891768, "grad_norm": 1.786672830581665, "learning_rate": 6.712116115019256e-07, "loss": 0.0106, "step": 458060 }, { "epoch": 4.894171697206048, "grad_norm": 0.003923396579921246, "learning_rate": 6.711958262283838e-07, "loss": 0.0011, "step": 458070 }, { "epoch": 4.894278540520327, "grad_norm": 0.2724858224391937, "learning_rate": 6.711800407615489e-07, "loss": 0.003, "step": 458080 }, { "epoch": 4.8943853838346065, "grad_norm": 0.02004992589354515, "learning_rate": 6.711642551014389e-07, "loss": 0.0011, "step": 458090 }, { "epoch": 4.894492227148886, "grad_norm": 0.07335672527551651, "learning_rate": 6.711484692480716e-07, "loss": 0.0147, "step": 458100 }, { "epoch": 4.8945990704631654, "grad_norm": 0.00044688652269542217, "learning_rate": 6.711326832014646e-07, "loss": 0.0122, "step": 458110 }, { "epoch": 4.894705913777445, "grad_norm": 1.4206279516220093, "learning_rate": 6.71116896961636e-07, "loss": 0.0325, "step": 458120 }, { "epoch": 4.894812757091725, "grad_norm": 3.836181879043579, "learning_rate": 6.711011105286034e-07, "loss": 0.0122, "step": 458130 }, { "epoch": 4.894919600406005, "grad_norm": 0.04577839374542236, "learning_rate": 6.710853239023848e-07, "loss": 0.0076, "step": 458140 }, { "epoch": 4.895026443720284, "grad_norm": 0.00684465654194355, "learning_rate": 6.71069537082998e-07, "loss": 0.0151, "step": 458150 }, { "epoch": 4.895133287034564, "grad_norm": 0.0006581859197467566, "learning_rate": 6.710537500704607e-07, "loss": 0.0009, "step": 458160 }, { "epoch": 4.895240130348843, "grad_norm": 0.10929092764854431, "learning_rate": 6.71037962864791e-07, "loss": 0.0126, "step": 458170 }, { "epoch": 4.895346973663123, "grad_norm": 0.15567344427108765, "learning_rate": 6.710221754660064e-07, "loss": 0.0006, "step": 458180 }, { "epoch": 4.895453816977403, "grad_norm": 0.0015254714526236057, "learning_rate": 6.710063878741248e-07, "loss": 0.0165, "step": 458190 }, { "epoch": 4.895560660291682, "grad_norm": 7.260957717895508, "learning_rate": 6.709906000891642e-07, "loss": 0.0354, "step": 458200 }, { "epoch": 4.895667503605962, "grad_norm": 0.0005763869266957045, "learning_rate": 6.709748121111422e-07, "loss": 0.0549, "step": 458210 }, { "epoch": 4.895774346920241, "grad_norm": 0.000984057318419218, "learning_rate": 6.709590239400768e-07, "loss": 0.011, "step": 458220 }, { "epoch": 4.895881190234521, "grad_norm": 0.0020157897379249334, "learning_rate": 6.709432355759856e-07, "loss": 0.0005, "step": 458230 }, { "epoch": 4.8959880335488, "grad_norm": 3.8719751834869385, "learning_rate": 6.709274470188869e-07, "loss": 0.0036, "step": 458240 }, { "epoch": 4.8960948768630805, "grad_norm": 3.281494379043579, "learning_rate": 6.709116582687979e-07, "loss": 0.001, "step": 458250 }, { "epoch": 4.89620172017736, "grad_norm": 0.007654455956071615, "learning_rate": 6.70895869325737e-07, "loss": 0.0396, "step": 458260 }, { "epoch": 4.8963085634916395, "grad_norm": 9.364354133605957, "learning_rate": 6.708800801897216e-07, "loss": 0.0107, "step": 458270 }, { "epoch": 4.896415406805919, "grad_norm": 0.0039064353331923485, "learning_rate": 6.708642908607697e-07, "loss": 0.0071, "step": 458280 }, { "epoch": 4.896522250120198, "grad_norm": 0.027393918484449387, "learning_rate": 6.70848501338899e-07, "loss": 0.0198, "step": 458290 }, { "epoch": 4.896629093434479, "grad_norm": 3.258150815963745, "learning_rate": 6.708327116241277e-07, "loss": 0.0177, "step": 458300 }, { "epoch": 4.896735936748758, "grad_norm": 0.29693982005119324, "learning_rate": 6.708169217164733e-07, "loss": 0.0131, "step": 458310 }, { "epoch": 4.896842780063038, "grad_norm": 1.5555423498153687, "learning_rate": 6.708011316159535e-07, "loss": 0.0013, "step": 458320 }, { "epoch": 4.896949623377317, "grad_norm": 3.0434067249298096, "learning_rate": 6.707853413225865e-07, "loss": 0.0046, "step": 458330 }, { "epoch": 4.8970564666915966, "grad_norm": 0.14616650342941284, "learning_rate": 6.707695508363901e-07, "loss": 0.0152, "step": 458340 }, { "epoch": 4.897163310005876, "grad_norm": 0.23717357218265533, "learning_rate": 6.707537601573819e-07, "loss": 0.0096, "step": 458350 }, { "epoch": 4.897270153320156, "grad_norm": 0.018671492114663124, "learning_rate": 6.707379692855797e-07, "loss": 0.0044, "step": 458360 }, { "epoch": 4.897376996634436, "grad_norm": 0.39013931155204773, "learning_rate": 6.707221782210016e-07, "loss": 0.0065, "step": 458370 }, { "epoch": 4.897483839948715, "grad_norm": 0.03675300255417824, "learning_rate": 6.707063869636651e-07, "loss": 0.0006, "step": 458380 }, { "epoch": 4.897590683262995, "grad_norm": 0.08333960175514221, "learning_rate": 6.706905955135883e-07, "loss": 0.0088, "step": 458390 }, { "epoch": 4.897697526577274, "grad_norm": 2.0045928955078125, "learning_rate": 6.70674803870789e-07, "loss": 0.0169, "step": 458400 }, { "epoch": 4.897804369891554, "grad_norm": 2.5736122131347656, "learning_rate": 6.706590120352848e-07, "loss": 0.0173, "step": 458410 }, { "epoch": 4.897911213205834, "grad_norm": 0.128941148519516, "learning_rate": 6.706432200070939e-07, "loss": 0.0238, "step": 458420 }, { "epoch": 4.8980180565201135, "grad_norm": 0.0006085112108848989, "learning_rate": 6.706274277862337e-07, "loss": 0.0223, "step": 458430 }, { "epoch": 4.898124899834393, "grad_norm": 0.00038640163256786764, "learning_rate": 6.706116353727224e-07, "loss": 0.0097, "step": 458440 }, { "epoch": 4.898231743148672, "grad_norm": 0.24430762231349945, "learning_rate": 6.705958427665776e-07, "loss": 0.0353, "step": 458450 }, { "epoch": 4.898338586462952, "grad_norm": 3.3728528022766113, "learning_rate": 6.705800499678172e-07, "loss": 0.01, "step": 458460 }, { "epoch": 4.898445429777231, "grad_norm": 0.0011003253748640418, "learning_rate": 6.705642569764592e-07, "loss": 0.0003, "step": 458470 }, { "epoch": 4.898552273091512, "grad_norm": 1.8614463806152344, "learning_rate": 6.705484637925212e-07, "loss": 0.0019, "step": 458480 }, { "epoch": 4.898659116405791, "grad_norm": 0.001355332089588046, "learning_rate": 6.70532670416021e-07, "loss": 0.0005, "step": 458490 }, { "epoch": 4.898765959720071, "grad_norm": 0.014398240484297276, "learning_rate": 6.705168768469768e-07, "loss": 0.002, "step": 458500 }, { "epoch": 4.89887280303435, "grad_norm": 8.001708030700684, "learning_rate": 6.705010830854061e-07, "loss": 0.0064, "step": 458510 }, { "epoch": 4.8989796463486295, "grad_norm": 0.08184856921434402, "learning_rate": 6.704852891313266e-07, "loss": 0.0105, "step": 458520 }, { "epoch": 4.89908648966291, "grad_norm": 0.04721567779779434, "learning_rate": 6.704694949847565e-07, "loss": 0.0079, "step": 458530 }, { "epoch": 4.899193332977189, "grad_norm": 0.028901932761073112, "learning_rate": 6.704537006457136e-07, "loss": 0.0086, "step": 458540 }, { "epoch": 4.899300176291469, "grad_norm": 0.13364538550376892, "learning_rate": 6.704379061142153e-07, "loss": 0.0011, "step": 458550 }, { "epoch": 4.899407019605748, "grad_norm": 0.12486309558153152, "learning_rate": 6.7042211139028e-07, "loss": 0.0028, "step": 458560 }, { "epoch": 4.899513862920028, "grad_norm": 0.00699904328212142, "learning_rate": 6.704063164739252e-07, "loss": 0.0014, "step": 458570 }, { "epoch": 4.899620706234307, "grad_norm": 0.15303611755371094, "learning_rate": 6.703905213651687e-07, "loss": 0.0193, "step": 458580 }, { "epoch": 4.899727549548587, "grad_norm": 0.6765344738960266, "learning_rate": 6.703747260640287e-07, "loss": 0.0051, "step": 458590 }, { "epoch": 4.899834392862867, "grad_norm": 0.0005693212733604014, "learning_rate": 6.703589305705226e-07, "loss": 0.0076, "step": 458600 }, { "epoch": 4.899941236177146, "grad_norm": 0.30894729495048523, "learning_rate": 6.703431348846684e-07, "loss": 0.0133, "step": 458610 }, { "epoch": 4.900048079491426, "grad_norm": 0.01855243369936943, "learning_rate": 6.703273390064839e-07, "loss": 0.0227, "step": 458620 }, { "epoch": 4.900154922805705, "grad_norm": 0.0011430097511038184, "learning_rate": 6.703115429359871e-07, "loss": 0.0013, "step": 458630 }, { "epoch": 4.900261766119985, "grad_norm": 0.001457293750718236, "learning_rate": 6.702957466731955e-07, "loss": 0.0001, "step": 458640 }, { "epoch": 4.900368609434265, "grad_norm": 0.011944884434342384, "learning_rate": 6.702799502181275e-07, "loss": 0.0117, "step": 458650 }, { "epoch": 4.900475452748545, "grad_norm": 0.027370641008019447, "learning_rate": 6.702641535708002e-07, "loss": 0.0069, "step": 458660 }, { "epoch": 4.900582296062824, "grad_norm": 0.022711768746376038, "learning_rate": 6.702483567312321e-07, "loss": 0.0109, "step": 458670 }, { "epoch": 4.9006891393771035, "grad_norm": 2.605456590652466, "learning_rate": 6.702325596994407e-07, "loss": 0.0139, "step": 458680 }, { "epoch": 4.900795982691383, "grad_norm": 0.0021733271423727274, "learning_rate": 6.702167624754438e-07, "loss": 0.0193, "step": 458690 }, { "epoch": 4.900902826005662, "grad_norm": 3.849780321121216, "learning_rate": 6.702009650592594e-07, "loss": 0.0283, "step": 458700 }, { "epoch": 4.901009669319942, "grad_norm": 10.030296325683594, "learning_rate": 6.701851674509053e-07, "loss": 0.0647, "step": 458710 }, { "epoch": 4.901116512634222, "grad_norm": 0.021104441955685616, "learning_rate": 6.701693696503991e-07, "loss": 0.003, "step": 458720 }, { "epoch": 4.901223355948502, "grad_norm": 0.11861418187618256, "learning_rate": 6.701535716577591e-07, "loss": 0.0024, "step": 458730 }, { "epoch": 4.901330199262781, "grad_norm": 0.20974138379096985, "learning_rate": 6.701377734730027e-07, "loss": 0.0013, "step": 458740 }, { "epoch": 4.901437042577061, "grad_norm": 1.2397927045822144, "learning_rate": 6.701219750961481e-07, "loss": 0.0208, "step": 458750 }, { "epoch": 4.90154388589134, "grad_norm": 0.2355952262878418, "learning_rate": 6.701061765272127e-07, "loss": 0.0154, "step": 458760 }, { "epoch": 4.90165072920562, "grad_norm": 0.2758660614490509, "learning_rate": 6.700903777662148e-07, "loss": 0.003, "step": 458770 }, { "epoch": 4.9017575725199, "grad_norm": 0.08255700021982193, "learning_rate": 6.700745788131719e-07, "loss": 0.0017, "step": 458780 }, { "epoch": 4.901864415834179, "grad_norm": 0.027145063504576683, "learning_rate": 6.700587796681021e-07, "loss": 0.0021, "step": 458790 }, { "epoch": 4.901971259148459, "grad_norm": 9.232635498046875, "learning_rate": 6.70042980331023e-07, "loss": 0.0119, "step": 458800 }, { "epoch": 4.902078102462738, "grad_norm": 0.008395557291805744, "learning_rate": 6.700271808019524e-07, "loss": 0.0069, "step": 458810 }, { "epoch": 4.902184945777018, "grad_norm": 0.32793688774108887, "learning_rate": 6.700113810809085e-07, "loss": 0.0018, "step": 458820 }, { "epoch": 4.902291789091297, "grad_norm": 0.001983721274882555, "learning_rate": 6.69995581167909e-07, "loss": 0.0011, "step": 458830 }, { "epoch": 4.9023986324055775, "grad_norm": 7.315751552581787, "learning_rate": 6.699797810629714e-07, "loss": 0.0075, "step": 458840 }, { "epoch": 4.902505475719857, "grad_norm": 0.012666018679738045, "learning_rate": 6.699639807661139e-07, "loss": 0.0063, "step": 458850 }, { "epoch": 4.902612319034136, "grad_norm": 0.0073919459246098995, "learning_rate": 6.699481802773543e-07, "loss": 0.0105, "step": 458860 }, { "epoch": 4.902719162348416, "grad_norm": 0.08822105079889297, "learning_rate": 6.699323795967102e-07, "loss": 0.0001, "step": 458870 }, { "epoch": 4.902826005662695, "grad_norm": 0.0210377536714077, "learning_rate": 6.699165787241999e-07, "loss": 0.0022, "step": 458880 }, { "epoch": 4.902932848976976, "grad_norm": 8.30659294128418, "learning_rate": 6.699007776598407e-07, "loss": 0.0045, "step": 458890 }, { "epoch": 4.903039692291255, "grad_norm": 1.4283835887908936, "learning_rate": 6.698849764036509e-07, "loss": 0.0014, "step": 458900 }, { "epoch": 4.903146535605535, "grad_norm": 0.002336244797334075, "learning_rate": 6.69869174955648e-07, "loss": 0.0052, "step": 458910 }, { "epoch": 4.903253378919814, "grad_norm": 0.00432370463386178, "learning_rate": 6.6985337331585e-07, "loss": 0.0095, "step": 458920 }, { "epoch": 4.9033602222340935, "grad_norm": 0.5387611985206604, "learning_rate": 6.698375714842747e-07, "loss": 0.0039, "step": 458930 }, { "epoch": 4.903467065548373, "grad_norm": 0.0025541388895362616, "learning_rate": 6.698217694609402e-07, "loss": 0.001, "step": 458940 }, { "epoch": 4.903573908862652, "grad_norm": 0.0030200029723346233, "learning_rate": 6.698059672458638e-07, "loss": 0.0001, "step": 458950 }, { "epoch": 4.903680752176933, "grad_norm": 0.009718239307403564, "learning_rate": 6.697901648390638e-07, "loss": 0.0136, "step": 458960 }, { "epoch": 4.903787595491212, "grad_norm": 0.0038642094004899263, "learning_rate": 6.697743622405579e-07, "loss": 0.0044, "step": 458970 }, { "epoch": 4.903894438805492, "grad_norm": 4.514670372009277, "learning_rate": 6.697585594503638e-07, "loss": 0.0015, "step": 458980 }, { "epoch": 4.904001282119771, "grad_norm": 0.39386847615242004, "learning_rate": 6.697427564684995e-07, "loss": 0.0025, "step": 458990 }, { "epoch": 4.904108125434051, "grad_norm": 0.5454846024513245, "learning_rate": 6.69726953294983e-07, "loss": 0.0165, "step": 459000 }, { "epoch": 4.904214968748331, "grad_norm": 0.39358389377593994, "learning_rate": 6.697111499298318e-07, "loss": 0.0025, "step": 459010 }, { "epoch": 4.90432181206261, "grad_norm": 0.0042592310346663, "learning_rate": 6.696953463730639e-07, "loss": 0.0012, "step": 459020 }, { "epoch": 4.90442865537689, "grad_norm": 8.138789176940918, "learning_rate": 6.696795426246973e-07, "loss": 0.0108, "step": 459030 }, { "epoch": 4.904535498691169, "grad_norm": 0.0008128774352371693, "learning_rate": 6.696637386847495e-07, "loss": 0.0121, "step": 459040 }, { "epoch": 4.904642342005449, "grad_norm": 0.0074358186684548855, "learning_rate": 6.696479345532387e-07, "loss": 0.0119, "step": 459050 }, { "epoch": 4.904749185319728, "grad_norm": 0.09420495480298996, "learning_rate": 6.696321302301823e-07, "loss": 0.0041, "step": 459060 }, { "epoch": 4.904856028634009, "grad_norm": 0.020292386412620544, "learning_rate": 6.696163257155987e-07, "loss": 0.0077, "step": 459070 }, { "epoch": 4.904962871948288, "grad_norm": 0.015521702356636524, "learning_rate": 6.696005210095053e-07, "loss": 0.0006, "step": 459080 }, { "epoch": 4.9050697152625675, "grad_norm": 0.036084145307540894, "learning_rate": 6.695847161119201e-07, "loss": 0.0, "step": 459090 }, { "epoch": 4.905176558576847, "grad_norm": 0.07786048203706741, "learning_rate": 6.695689110228611e-07, "loss": 0.0051, "step": 459100 }, { "epoch": 4.905283401891126, "grad_norm": 0.057741448283195496, "learning_rate": 6.695531057423459e-07, "loss": 0.0042, "step": 459110 }, { "epoch": 4.905390245205406, "grad_norm": 0.020752323791384697, "learning_rate": 6.695373002703925e-07, "loss": 0.0069, "step": 459120 }, { "epoch": 4.905497088519686, "grad_norm": 0.04840226471424103, "learning_rate": 6.695214946070187e-07, "loss": 0.0058, "step": 459130 }, { "epoch": 4.905603931833966, "grad_norm": 0.0661243200302124, "learning_rate": 6.695056887522423e-07, "loss": 0.0278, "step": 459140 }, { "epoch": 4.905710775148245, "grad_norm": 0.05907481908798218, "learning_rate": 6.694898827060812e-07, "loss": 0.0046, "step": 459150 }, { "epoch": 4.905817618462525, "grad_norm": 0.0041651129722595215, "learning_rate": 6.694740764685532e-07, "loss": 0.0093, "step": 459160 }, { "epoch": 4.905924461776804, "grad_norm": 0.009598291479051113, "learning_rate": 6.694582700396762e-07, "loss": 0.0014, "step": 459170 }, { "epoch": 4.9060313050910835, "grad_norm": 0.00414296705275774, "learning_rate": 6.694424634194681e-07, "loss": 0.0023, "step": 459180 }, { "epoch": 4.906138148405364, "grad_norm": 0.23696167767047882, "learning_rate": 6.694266566079466e-07, "loss": 0.0046, "step": 459190 }, { "epoch": 4.906244991719643, "grad_norm": 0.2655990421772003, "learning_rate": 6.694108496051297e-07, "loss": 0.0176, "step": 459200 }, { "epoch": 4.906351835033923, "grad_norm": 6.583045959472656, "learning_rate": 6.69395042411035e-07, "loss": 0.0115, "step": 459210 }, { "epoch": 4.906458678348202, "grad_norm": 0.017629941925406456, "learning_rate": 6.693792350256806e-07, "loss": 0.0148, "step": 459220 }, { "epoch": 4.906565521662482, "grad_norm": 0.0107263820245862, "learning_rate": 6.693634274490844e-07, "loss": 0.0161, "step": 459230 }, { "epoch": 4.906672364976762, "grad_norm": 0.1339585781097412, "learning_rate": 6.693476196812639e-07, "loss": 0.0027, "step": 459240 }, { "epoch": 4.9067792082910415, "grad_norm": 0.6102654337882996, "learning_rate": 6.693318117222373e-07, "loss": 0.0064, "step": 459250 }, { "epoch": 4.906886051605321, "grad_norm": 0.010896502062678337, "learning_rate": 6.693160035720221e-07, "loss": 0.0164, "step": 459260 }, { "epoch": 4.9069928949196004, "grad_norm": 0.002372609917074442, "learning_rate": 6.693001952306365e-07, "loss": 0.0078, "step": 459270 }, { "epoch": 4.90709973823388, "grad_norm": 0.001500520040281117, "learning_rate": 6.692843866980982e-07, "loss": 0.0132, "step": 459280 }, { "epoch": 4.907206581548159, "grad_norm": 0.2447839081287384, "learning_rate": 6.69268577974425e-07, "loss": 0.0016, "step": 459290 }, { "epoch": 4.907313424862439, "grad_norm": 0.03890286386013031, "learning_rate": 6.692527690596349e-07, "loss": 0.0115, "step": 459300 }, { "epoch": 4.907420268176719, "grad_norm": 6.048182010650635, "learning_rate": 6.692369599537457e-07, "loss": 0.024, "step": 459310 }, { "epoch": 4.907527111490999, "grad_norm": 0.1255491077899933, "learning_rate": 6.692211506567749e-07, "loss": 0.0118, "step": 459320 }, { "epoch": 4.907633954805278, "grad_norm": 0.00259830872528255, "learning_rate": 6.692053411687408e-07, "loss": 0.0062, "step": 459330 }, { "epoch": 4.9077407981195575, "grad_norm": 0.009798750281333923, "learning_rate": 6.691895314896613e-07, "loss": 0.0037, "step": 459340 }, { "epoch": 4.907847641433837, "grad_norm": 0.024326883256435394, "learning_rate": 6.691737216195537e-07, "loss": 0.0165, "step": 459350 }, { "epoch": 4.907954484748117, "grad_norm": 0.012055960483849049, "learning_rate": 6.691579115584365e-07, "loss": 0.0037, "step": 459360 }, { "epoch": 4.908061328062397, "grad_norm": 3.0645689964294434, "learning_rate": 6.69142101306327e-07, "loss": 0.0111, "step": 459370 }, { "epoch": 4.908168171376676, "grad_norm": 0.0023996499367058277, "learning_rate": 6.691262908632436e-07, "loss": 0.0032, "step": 459380 }, { "epoch": 4.908275014690956, "grad_norm": 0.10812146961688995, "learning_rate": 6.691104802292036e-07, "loss": 0.0019, "step": 459390 }, { "epoch": 4.908381858005235, "grad_norm": 0.04190356284379959, "learning_rate": 6.690946694042252e-07, "loss": 0.0007, "step": 459400 }, { "epoch": 4.908488701319515, "grad_norm": 0.03901340812444687, "learning_rate": 6.690788583883261e-07, "loss": 0.0373, "step": 459410 }, { "epoch": 4.908595544633794, "grad_norm": 0.009099866263568401, "learning_rate": 6.690630471815243e-07, "loss": 0.0055, "step": 459420 }, { "epoch": 4.9087023879480745, "grad_norm": 0.0016495956806465983, "learning_rate": 6.690472357838375e-07, "loss": 0.0001, "step": 459430 }, { "epoch": 4.908809231262354, "grad_norm": 0.05499547719955444, "learning_rate": 6.690314241952836e-07, "loss": 0.0082, "step": 459440 }, { "epoch": 4.908916074576633, "grad_norm": 0.12305667996406555, "learning_rate": 6.690156124158805e-07, "loss": 0.0073, "step": 459450 }, { "epoch": 4.909022917890913, "grad_norm": 1.1348472833633423, "learning_rate": 6.689998004456459e-07, "loss": 0.0075, "step": 459460 }, { "epoch": 4.909129761205192, "grad_norm": 2.401169776916504, "learning_rate": 6.68983988284598e-07, "loss": 0.0011, "step": 459470 }, { "epoch": 4.909236604519473, "grad_norm": 0.002857802901417017, "learning_rate": 6.689681759327543e-07, "loss": 0.0087, "step": 459480 }, { "epoch": 4.909343447833752, "grad_norm": 0.0010590970050543547, "learning_rate": 6.689523633901327e-07, "loss": 0.001, "step": 459490 }, { "epoch": 4.9094502911480316, "grad_norm": 0.01679074391722679, "learning_rate": 6.689365506567512e-07, "loss": 0.0059, "step": 459500 }, { "epoch": 4.909557134462311, "grad_norm": 0.4102313816547394, "learning_rate": 6.689207377326278e-07, "loss": 0.0137, "step": 459510 }, { "epoch": 4.9096639777765905, "grad_norm": 7.405121803283691, "learning_rate": 6.689049246177798e-07, "loss": 0.0515, "step": 459520 }, { "epoch": 4.90977082109087, "grad_norm": 0.023258181288838387, "learning_rate": 6.688891113122256e-07, "loss": 0.0178, "step": 459530 }, { "epoch": 4.909877664405149, "grad_norm": 0.011882773600518703, "learning_rate": 6.688732978159828e-07, "loss": 0.006, "step": 459540 }, { "epoch": 4.90998450771943, "grad_norm": 3.8064486980438232, "learning_rate": 6.688574841290692e-07, "loss": 0.0065, "step": 459550 }, { "epoch": 4.910091351033709, "grad_norm": 0.6609296798706055, "learning_rate": 6.688416702515028e-07, "loss": 0.0014, "step": 459560 }, { "epoch": 4.910198194347989, "grad_norm": 0.7437448501586914, "learning_rate": 6.688258561833016e-07, "loss": 0.0131, "step": 459570 }, { "epoch": 4.910305037662268, "grad_norm": 0.10342005640268326, "learning_rate": 6.68810041924483e-07, "loss": 0.0063, "step": 459580 }, { "epoch": 4.910411880976548, "grad_norm": 0.01776237040758133, "learning_rate": 6.687942274750653e-07, "loss": 0.0094, "step": 459590 }, { "epoch": 4.910518724290828, "grad_norm": 0.011632616631686687, "learning_rate": 6.687784128350661e-07, "loss": 0.0021, "step": 459600 }, { "epoch": 4.910625567605107, "grad_norm": 0.003242440987378359, "learning_rate": 6.687625980045033e-07, "loss": 0.0044, "step": 459610 }, { "epoch": 4.910732410919387, "grad_norm": 0.045289307832717896, "learning_rate": 6.687467829833948e-07, "loss": 0.0009, "step": 459620 }, { "epoch": 4.910839254233666, "grad_norm": 0.054334044456481934, "learning_rate": 6.687309677717586e-07, "loss": 0.0083, "step": 459630 }, { "epoch": 4.910946097547946, "grad_norm": 0.0432467944920063, "learning_rate": 6.687151523696122e-07, "loss": 0.0225, "step": 459640 }, { "epoch": 4.911052940862225, "grad_norm": 0.006920238491147757, "learning_rate": 6.686993367769738e-07, "loss": 0.0081, "step": 459650 }, { "epoch": 4.911159784176505, "grad_norm": 0.11084776371717453, "learning_rate": 6.686835209938609e-07, "loss": 0.0099, "step": 459660 }, { "epoch": 4.911266627490785, "grad_norm": 5.113949298858643, "learning_rate": 6.686677050202918e-07, "loss": 0.0249, "step": 459670 }, { "epoch": 4.9113734708050645, "grad_norm": 0.008459960110485554, "learning_rate": 6.68651888856284e-07, "loss": 0.0506, "step": 459680 }, { "epoch": 4.911480314119344, "grad_norm": 0.004851261153817177, "learning_rate": 6.686360725018556e-07, "loss": 0.0256, "step": 459690 }, { "epoch": 4.911587157433623, "grad_norm": 0.8537535071372986, "learning_rate": 6.686202559570243e-07, "loss": 0.0068, "step": 459700 }, { "epoch": 4.911694000747903, "grad_norm": 0.26490768790245056, "learning_rate": 6.68604439221808e-07, "loss": 0.0009, "step": 459710 }, { "epoch": 4.911800844062183, "grad_norm": 0.0020793494768440723, "learning_rate": 6.685886222962246e-07, "loss": 0.0004, "step": 459720 }, { "epoch": 4.911907687376463, "grad_norm": 6.552561283111572, "learning_rate": 6.685728051802918e-07, "loss": 0.0021, "step": 459730 }, { "epoch": 4.912014530690742, "grad_norm": 2.7735230922698975, "learning_rate": 6.685569878740277e-07, "loss": 0.0075, "step": 459740 }, { "epoch": 4.912121374005022, "grad_norm": 0.00241664657369256, "learning_rate": 6.685411703774499e-07, "loss": 0.0084, "step": 459750 }, { "epoch": 4.912228217319301, "grad_norm": 0.3891773819923401, "learning_rate": 6.685253526905765e-07, "loss": 0.0049, "step": 459760 }, { "epoch": 4.9123350606335805, "grad_norm": 0.019039154052734375, "learning_rate": 6.685095348134253e-07, "loss": 0.0086, "step": 459770 }, { "epoch": 4.912441903947861, "grad_norm": 1.9738898277282715, "learning_rate": 6.684937167460141e-07, "loss": 0.0094, "step": 459780 }, { "epoch": 4.91254874726214, "grad_norm": 2.6298205852508545, "learning_rate": 6.684778984883608e-07, "loss": 0.001, "step": 459790 }, { "epoch": 4.91265559057642, "grad_norm": 0.016504768282175064, "learning_rate": 6.684620800404832e-07, "loss": 0.0035, "step": 459800 }, { "epoch": 4.912762433890699, "grad_norm": 0.026617905125021935, "learning_rate": 6.684462614023991e-07, "loss": 0.0007, "step": 459810 }, { "epoch": 4.912869277204979, "grad_norm": 0.005178425926715136, "learning_rate": 6.684304425741266e-07, "loss": 0.0112, "step": 459820 }, { "epoch": 4.912976120519258, "grad_norm": 11.732804298400879, "learning_rate": 6.684146235556834e-07, "loss": 0.0483, "step": 459830 }, { "epoch": 4.9130829638335385, "grad_norm": 0.005336795002222061, "learning_rate": 6.683988043470872e-07, "loss": 0.0042, "step": 459840 }, { "epoch": 4.913189807147818, "grad_norm": 0.10350203514099121, "learning_rate": 6.683829849483563e-07, "loss": 0.0045, "step": 459850 }, { "epoch": 4.913296650462097, "grad_norm": 0.010289311408996582, "learning_rate": 6.683671653595082e-07, "loss": 0.0022, "step": 459860 }, { "epoch": 4.913403493776377, "grad_norm": 0.039014168083667755, "learning_rate": 6.683513455805609e-07, "loss": 0.0063, "step": 459870 }, { "epoch": 4.913510337090656, "grad_norm": 0.0024269516579806805, "learning_rate": 6.683355256115322e-07, "loss": 0.0084, "step": 459880 }, { "epoch": 4.913617180404936, "grad_norm": 0.004286701790988445, "learning_rate": 6.683197054524398e-07, "loss": 0.0022, "step": 459890 }, { "epoch": 4.913724023719216, "grad_norm": 26.103538513183594, "learning_rate": 6.683038851033021e-07, "loss": 0.0033, "step": 459900 }, { "epoch": 4.913830867033496, "grad_norm": 0.9200177788734436, "learning_rate": 6.682880645641364e-07, "loss": 0.0078, "step": 459910 }, { "epoch": 4.913937710347775, "grad_norm": 0.009377849288284779, "learning_rate": 6.682722438349606e-07, "loss": 0.0025, "step": 459920 }, { "epoch": 4.9140445536620545, "grad_norm": 0.9810952544212341, "learning_rate": 6.68256422915793e-07, "loss": 0.0123, "step": 459930 }, { "epoch": 4.914151396976334, "grad_norm": 0.008540951646864414, "learning_rate": 6.682406018066512e-07, "loss": 0.0014, "step": 459940 }, { "epoch": 4.914258240290614, "grad_norm": 6.264205455780029, "learning_rate": 6.68224780507553e-07, "loss": 0.0251, "step": 459950 }, { "epoch": 4.914365083604894, "grad_norm": 0.4359305500984192, "learning_rate": 6.682089590185163e-07, "loss": 0.0083, "step": 459960 }, { "epoch": 4.914471926919173, "grad_norm": 1.2173235416412354, "learning_rate": 6.681931373395591e-07, "loss": 0.0142, "step": 459970 }, { "epoch": 4.914578770233453, "grad_norm": 0.9667354226112366, "learning_rate": 6.681773154706988e-07, "loss": 0.0038, "step": 459980 }, { "epoch": 4.914685613547732, "grad_norm": 0.18988080322742462, "learning_rate": 6.68161493411954e-07, "loss": 0.0069, "step": 459990 }, { "epoch": 4.914792456862012, "grad_norm": 2.3927652835845947, "learning_rate": 6.681456711633421e-07, "loss": 0.0017, "step": 460000 }, { "epoch": 4.914899300176291, "grad_norm": 0.028509510681033134, "learning_rate": 6.68129848724881e-07, "loss": 0.0213, "step": 460010 }, { "epoch": 4.915006143490571, "grad_norm": 0.10723190754652023, "learning_rate": 6.681140260965885e-07, "loss": 0.0038, "step": 460020 }, { "epoch": 4.915112986804851, "grad_norm": 0.1940983086824417, "learning_rate": 6.680982032784828e-07, "loss": 0.0014, "step": 460030 }, { "epoch": 4.91521983011913, "grad_norm": 0.0043331533670425415, "learning_rate": 6.680823802705814e-07, "loss": 0.0093, "step": 460040 }, { "epoch": 4.91532667343341, "grad_norm": 0.0031862847972661257, "learning_rate": 6.680665570729024e-07, "loss": 0.0002, "step": 460050 }, { "epoch": 4.915433516747689, "grad_norm": 0.012658381834626198, "learning_rate": 6.680507336854635e-07, "loss": 0.009, "step": 460060 }, { "epoch": 4.91554036006197, "grad_norm": 0.05963154509663582, "learning_rate": 6.680349101082826e-07, "loss": 0.0045, "step": 460070 }, { "epoch": 4.915647203376249, "grad_norm": 0.11856909841299057, "learning_rate": 6.680190863413777e-07, "loss": 0.0092, "step": 460080 }, { "epoch": 4.9157540466905285, "grad_norm": 0.044440388679504395, "learning_rate": 6.680032623847665e-07, "loss": 0.0039, "step": 460090 }, { "epoch": 4.915860890004808, "grad_norm": 0.0015463302843272686, "learning_rate": 6.67987438238467e-07, "loss": 0.0055, "step": 460100 }, { "epoch": 4.915967733319087, "grad_norm": 0.012953398749232292, "learning_rate": 6.67971613902497e-07, "loss": 0.0016, "step": 460110 }, { "epoch": 4.916074576633367, "grad_norm": 0.0019195564091205597, "learning_rate": 6.679557893768743e-07, "loss": 0.0198, "step": 460120 }, { "epoch": 4.916181419947646, "grad_norm": 0.004863954149186611, "learning_rate": 6.679399646616169e-07, "loss": 0.0068, "step": 460130 }, { "epoch": 4.916288263261927, "grad_norm": 0.10966652631759644, "learning_rate": 6.679241397567426e-07, "loss": 0.0039, "step": 460140 }, { "epoch": 4.916395106576206, "grad_norm": 0.0015141452895477414, "learning_rate": 6.679083146622693e-07, "loss": 0.0057, "step": 460150 }, { "epoch": 4.916501949890486, "grad_norm": 0.0039051135536283255, "learning_rate": 6.678924893782148e-07, "loss": 0.0081, "step": 460160 }, { "epoch": 4.916608793204765, "grad_norm": 0.09992820769548416, "learning_rate": 6.678766639045969e-07, "loss": 0.0055, "step": 460170 }, { "epoch": 4.9167156365190445, "grad_norm": 1.0795044898986816, "learning_rate": 6.678608382414337e-07, "loss": 0.006, "step": 460180 }, { "epoch": 4.916822479833325, "grad_norm": 0.007943843491375446, "learning_rate": 6.67845012388743e-07, "loss": 0.0037, "step": 460190 }, { "epoch": 4.916929323147604, "grad_norm": 6.096065521240234, "learning_rate": 6.678291863465426e-07, "loss": 0.0092, "step": 460200 }, { "epoch": 4.917036166461884, "grad_norm": 0.006431047338992357, "learning_rate": 6.678133601148502e-07, "loss": 0.0091, "step": 460210 }, { "epoch": 4.917143009776163, "grad_norm": 3.0216639041900635, "learning_rate": 6.677975336936841e-07, "loss": 0.0041, "step": 460220 }, { "epoch": 4.917249853090443, "grad_norm": 0.010364503599703312, "learning_rate": 6.677817070830617e-07, "loss": 0.0053, "step": 460230 }, { "epoch": 4.917356696404722, "grad_norm": 2.475940227508545, "learning_rate": 6.677658802830012e-07, "loss": 0.0066, "step": 460240 }, { "epoch": 4.917463539719002, "grad_norm": 0.47208189964294434, "learning_rate": 6.677500532935204e-07, "loss": 0.0056, "step": 460250 }, { "epoch": 4.917570383033282, "grad_norm": 0.10781940072774887, "learning_rate": 6.677342261146371e-07, "loss": 0.0248, "step": 460260 }, { "epoch": 4.917677226347561, "grad_norm": Infinity, "learning_rate": 6.67718398746369e-07, "loss": 0.0154, "step": 460270 }, { "epoch": 4.917784069661841, "grad_norm": 2.2981209754943848, "learning_rate": 6.677025711887345e-07, "loss": 0.0046, "step": 460280 }, { "epoch": 4.91789091297612, "grad_norm": 0.024246733635663986, "learning_rate": 6.676867434417509e-07, "loss": 0.0077, "step": 460290 }, { "epoch": 4.9179977562904, "grad_norm": 0.43854862451553345, "learning_rate": 6.676709155054365e-07, "loss": 0.0044, "step": 460300 }, { "epoch": 4.91810459960468, "grad_norm": 4.1440510749816895, "learning_rate": 6.676550873798088e-07, "loss": 0.0143, "step": 460310 }, { "epoch": 4.91821144291896, "grad_norm": 0.3972536623477936, "learning_rate": 6.676392590648859e-07, "loss": 0.0262, "step": 460320 }, { "epoch": 4.918318286233239, "grad_norm": 0.002160462783649564, "learning_rate": 6.676234305606857e-07, "loss": 0.0014, "step": 460330 }, { "epoch": 4.9184251295475185, "grad_norm": 1.1773277521133423, "learning_rate": 6.676076018672259e-07, "loss": 0.012, "step": 460340 }, { "epoch": 4.918531972861798, "grad_norm": 0.25516578555107117, "learning_rate": 6.675917729845245e-07, "loss": 0.0214, "step": 460350 }, { "epoch": 4.9186388161760775, "grad_norm": 0.5524200797080994, "learning_rate": 6.675759439125993e-07, "loss": 0.0012, "step": 460360 }, { "epoch": 4.918745659490357, "grad_norm": 0.32544150948524475, "learning_rate": 6.675601146514683e-07, "loss": 0.0222, "step": 460370 }, { "epoch": 4.918852502804637, "grad_norm": 2.3772451877593994, "learning_rate": 6.675442852011492e-07, "loss": 0.0071, "step": 460380 }, { "epoch": 4.918959346118917, "grad_norm": 0.009438189677894115, "learning_rate": 6.675284555616601e-07, "loss": 0.0144, "step": 460390 }, { "epoch": 4.919066189433196, "grad_norm": 0.15667012333869934, "learning_rate": 6.675126257330186e-07, "loss": 0.0139, "step": 460400 }, { "epoch": 4.919173032747476, "grad_norm": 0.019365904852747917, "learning_rate": 6.674967957152426e-07, "loss": 0.0016, "step": 460410 }, { "epoch": 4.919279876061755, "grad_norm": 0.0030895830132067204, "learning_rate": 6.674809655083503e-07, "loss": 0.0116, "step": 460420 }, { "epoch": 4.919386719376035, "grad_norm": 0.0030521019361913204, "learning_rate": 6.674651351123593e-07, "loss": 0.0027, "step": 460430 }, { "epoch": 4.919493562690315, "grad_norm": 9.527759552001953, "learning_rate": 6.674493045272874e-07, "loss": 0.0073, "step": 460440 }, { "epoch": 4.919600406004594, "grad_norm": 0.02154645510017872, "learning_rate": 6.674334737531528e-07, "loss": 0.0032, "step": 460450 }, { "epoch": 4.919707249318874, "grad_norm": 0.002215577056631446, "learning_rate": 6.674176427899729e-07, "loss": 0.0008, "step": 460460 }, { "epoch": 4.919814092633153, "grad_norm": 0.006867397576570511, "learning_rate": 6.674018116377661e-07, "loss": 0.0017, "step": 460470 }, { "epoch": 4.919920935947433, "grad_norm": 0.0015775029314681888, "learning_rate": 6.673859802965501e-07, "loss": 0.0001, "step": 460480 }, { "epoch": 4.920027779261713, "grad_norm": 1.8677873611450195, "learning_rate": 6.673701487663423e-07, "loss": 0.0174, "step": 460490 }, { "epoch": 4.9201346225759925, "grad_norm": 0.003970239777117968, "learning_rate": 6.673543170471613e-07, "loss": 0.019, "step": 460500 }, { "epoch": 4.920241465890272, "grad_norm": 5.3411173820495605, "learning_rate": 6.673384851390245e-07, "loss": 0.0322, "step": 460510 }, { "epoch": 4.9203483092045515, "grad_norm": 0.08465568721294403, "learning_rate": 6.673226530419499e-07, "loss": 0.0036, "step": 460520 }, { "epoch": 4.920455152518831, "grad_norm": 0.0008176984265446663, "learning_rate": 6.673068207559555e-07, "loss": 0.001, "step": 460530 }, { "epoch": 4.92056199583311, "grad_norm": 0.3123558759689331, "learning_rate": 6.672909882810592e-07, "loss": 0.0095, "step": 460540 }, { "epoch": 4.920668839147391, "grad_norm": 0.5813455581665039, "learning_rate": 6.672751556172785e-07, "loss": 0.0005, "step": 460550 }, { "epoch": 4.92077568246167, "grad_norm": 0.013390591368079185, "learning_rate": 6.672593227646314e-07, "loss": 0.0028, "step": 460560 }, { "epoch": 4.92088252577595, "grad_norm": 0.0032730356324464083, "learning_rate": 6.672434897231363e-07, "loss": 0.0028, "step": 460570 }, { "epoch": 4.920989369090229, "grad_norm": 0.009736616164445877, "learning_rate": 6.672276564928103e-07, "loss": 0.008, "step": 460580 }, { "epoch": 4.921096212404509, "grad_norm": 1.966678261756897, "learning_rate": 6.672118230736718e-07, "loss": 0.0045, "step": 460590 }, { "epoch": 4.921203055718788, "grad_norm": 0.0019337531412020326, "learning_rate": 6.671959894657387e-07, "loss": 0.0112, "step": 460600 }, { "epoch": 4.921309899033068, "grad_norm": 0.06780143827199936, "learning_rate": 6.671801556690285e-07, "loss": 0.0013, "step": 460610 }, { "epoch": 4.921416742347348, "grad_norm": 12.174546241760254, "learning_rate": 6.671643216835594e-07, "loss": 0.0056, "step": 460620 }, { "epoch": 4.921523585661627, "grad_norm": 0.12303738296031952, "learning_rate": 6.67148487509349e-07, "loss": 0.007, "step": 460630 }, { "epoch": 4.921630428975907, "grad_norm": 0.0034140043426305056, "learning_rate": 6.671326531464156e-07, "loss": 0.0056, "step": 460640 }, { "epoch": 4.921737272290186, "grad_norm": 2.1438663005828857, "learning_rate": 6.671168185947766e-07, "loss": 0.0061, "step": 460650 }, { "epoch": 4.921844115604466, "grad_norm": 0.0033299122005701065, "learning_rate": 6.671009838544502e-07, "loss": 0.0016, "step": 460660 }, { "epoch": 4.921950958918746, "grad_norm": 0.04656305909156799, "learning_rate": 6.670851489254541e-07, "loss": 0.0133, "step": 460670 }, { "epoch": 4.9220578022330255, "grad_norm": 3.9141688346862793, "learning_rate": 6.670693138078063e-07, "loss": 0.0084, "step": 460680 }, { "epoch": 4.922164645547305, "grad_norm": 0.2353232502937317, "learning_rate": 6.670534785015246e-07, "loss": 0.0068, "step": 460690 }, { "epoch": 4.922271488861584, "grad_norm": 0.023572426289319992, "learning_rate": 6.67037643006627e-07, "loss": 0.0011, "step": 460700 }, { "epoch": 4.922378332175864, "grad_norm": 1.0001128911972046, "learning_rate": 6.670218073231313e-07, "loss": 0.001, "step": 460710 }, { "epoch": 4.922485175490143, "grad_norm": 0.00198567402549088, "learning_rate": 6.670059714510553e-07, "loss": 0.0054, "step": 460720 }, { "epoch": 4.922592018804424, "grad_norm": 0.010048827156424522, "learning_rate": 6.66990135390417e-07, "loss": 0.0034, "step": 460730 }, { "epoch": 4.922698862118703, "grad_norm": 0.02675507217645645, "learning_rate": 6.669742991412342e-07, "loss": 0.0016, "step": 460740 }, { "epoch": 4.922805705432983, "grad_norm": 0.007451646029949188, "learning_rate": 6.669584627035248e-07, "loss": 0.0122, "step": 460750 }, { "epoch": 4.922912548747262, "grad_norm": 0.03586239740252495, "learning_rate": 6.669426260773068e-07, "loss": 0.0086, "step": 460760 }, { "epoch": 4.9230193920615415, "grad_norm": 0.19897958636283875, "learning_rate": 6.669267892625979e-07, "loss": 0.0047, "step": 460770 }, { "epoch": 4.923126235375822, "grad_norm": 0.8788247108459473, "learning_rate": 6.669109522594161e-07, "loss": 0.0032, "step": 460780 }, { "epoch": 4.923233078690101, "grad_norm": 0.004351254552602768, "learning_rate": 6.668951150677791e-07, "loss": 0.0076, "step": 460790 }, { "epoch": 4.923339922004381, "grad_norm": 1.8906700611114502, "learning_rate": 6.66879277687705e-07, "loss": 0.0062, "step": 460800 }, { "epoch": 4.92344676531866, "grad_norm": 1.0979547500610352, "learning_rate": 6.668634401192117e-07, "loss": 0.0032, "step": 460810 }, { "epoch": 4.92355360863294, "grad_norm": 0.26268646121025085, "learning_rate": 6.66847602362317e-07, "loss": 0.0225, "step": 460820 }, { "epoch": 4.923660451947219, "grad_norm": 0.5287062525749207, "learning_rate": 6.668317644170385e-07, "loss": 0.0044, "step": 460830 }, { "epoch": 4.923767295261499, "grad_norm": 0.001528067165054381, "learning_rate": 6.668159262833945e-07, "loss": 0.0149, "step": 460840 }, { "epoch": 4.923874138575779, "grad_norm": 0.6624948978424072, "learning_rate": 6.668000879614028e-07, "loss": 0.0073, "step": 460850 }, { "epoch": 4.923980981890058, "grad_norm": 4.559540271759033, "learning_rate": 6.667842494510811e-07, "loss": 0.0137, "step": 460860 }, { "epoch": 4.924087825204338, "grad_norm": 1.8443057537078857, "learning_rate": 6.667684107524476e-07, "loss": 0.003, "step": 460870 }, { "epoch": 4.924194668518617, "grad_norm": 0.008559106849133968, "learning_rate": 6.667525718655198e-07, "loss": 0.0133, "step": 460880 }, { "epoch": 4.924301511832897, "grad_norm": 2.170849561691284, "learning_rate": 6.667367327903157e-07, "loss": 0.0077, "step": 460890 }, { "epoch": 4.924408355147177, "grad_norm": 0.0284469835460186, "learning_rate": 6.667208935268533e-07, "loss": 0.0024, "step": 460900 }, { "epoch": 4.924515198461457, "grad_norm": 0.11162833869457245, "learning_rate": 6.667050540751507e-07, "loss": 0.017, "step": 460910 }, { "epoch": 4.924622041775736, "grad_norm": 0.002389686182141304, "learning_rate": 6.666892144352251e-07, "loss": 0.0087, "step": 460920 }, { "epoch": 4.9247288850900155, "grad_norm": 0.5634428858757019, "learning_rate": 6.66673374607095e-07, "loss": 0.0102, "step": 460930 }, { "epoch": 4.924835728404295, "grad_norm": 0.17608119547367096, "learning_rate": 6.66657534590778e-07, "loss": 0.009, "step": 460940 }, { "epoch": 4.924942571718574, "grad_norm": 0.0981609970331192, "learning_rate": 6.666416943862922e-07, "loss": 0.0031, "step": 460950 }, { "epoch": 4.925049415032854, "grad_norm": 0.04159128665924072, "learning_rate": 6.666258539936552e-07, "loss": 0.0118, "step": 460960 }, { "epoch": 4.925156258347134, "grad_norm": 0.01206514984369278, "learning_rate": 6.666100134128852e-07, "loss": 0.0003, "step": 460970 }, { "epoch": 4.925263101661414, "grad_norm": 11.622509956359863, "learning_rate": 6.665941726439996e-07, "loss": 0.0022, "step": 460980 }, { "epoch": 4.925369944975693, "grad_norm": 3.0822582244873047, "learning_rate": 6.665783316870168e-07, "loss": 0.0169, "step": 460990 }, { "epoch": 4.925476788289973, "grad_norm": 0.0015095002017915249, "learning_rate": 6.665624905419545e-07, "loss": 0.0116, "step": 461000 }, { "epoch": 4.925583631604252, "grad_norm": 0.006270119454711676, "learning_rate": 6.665466492088306e-07, "loss": 0.0017, "step": 461010 }, { "epoch": 4.925690474918532, "grad_norm": 1.0736557245254517, "learning_rate": 6.66530807687663e-07, "loss": 0.0055, "step": 461020 }, { "epoch": 4.925797318232812, "grad_norm": 0.027061494067311287, "learning_rate": 6.665149659784694e-07, "loss": 0.0108, "step": 461030 }, { "epoch": 4.925904161547091, "grad_norm": 0.006003349553793669, "learning_rate": 6.66499124081268e-07, "loss": 0.0002, "step": 461040 }, { "epoch": 4.926011004861371, "grad_norm": 0.46923720836639404, "learning_rate": 6.664832819960767e-07, "loss": 0.0099, "step": 461050 }, { "epoch": 4.92611784817565, "grad_norm": 0.10407958924770355, "learning_rate": 6.664674397229129e-07, "loss": 0.0017, "step": 461060 }, { "epoch": 4.92622469148993, "grad_norm": 0.0018396362429484725, "learning_rate": 6.664515972617948e-07, "loss": 0.023, "step": 461070 }, { "epoch": 4.926331534804209, "grad_norm": 2.4908297061920166, "learning_rate": 6.664357546127403e-07, "loss": 0.0063, "step": 461080 }, { "epoch": 4.9264383781184895, "grad_norm": 0.0016901289345696568, "learning_rate": 6.664199117757674e-07, "loss": 0.0117, "step": 461090 }, { "epoch": 4.926545221432769, "grad_norm": 0.006525869481265545, "learning_rate": 6.664040687508939e-07, "loss": 0.0108, "step": 461100 }, { "epoch": 4.926652064747048, "grad_norm": 0.17032817006111145, "learning_rate": 6.663882255381374e-07, "loss": 0.0112, "step": 461110 }, { "epoch": 4.926758908061328, "grad_norm": 0.00349779287353158, "learning_rate": 6.663723821375161e-07, "loss": 0.0175, "step": 461120 }, { "epoch": 4.926865751375607, "grad_norm": 0.02106643095612526, "learning_rate": 6.663565385490479e-07, "loss": 0.0105, "step": 461130 }, { "epoch": 4.926972594689888, "grad_norm": 0.002544362796470523, "learning_rate": 6.663406947727508e-07, "loss": 0.0709, "step": 461140 }, { "epoch": 4.927079438004167, "grad_norm": 1.5271172523498535, "learning_rate": 6.663248508086423e-07, "loss": 0.0036, "step": 461150 }, { "epoch": 4.927186281318447, "grad_norm": 0.017948724329471588, "learning_rate": 6.663090066567404e-07, "loss": 0.0006, "step": 461160 }, { "epoch": 4.927293124632726, "grad_norm": 1.1483662128448486, "learning_rate": 6.662931623170632e-07, "loss": 0.0015, "step": 461170 }, { "epoch": 4.9273999679470055, "grad_norm": 0.08545760810375214, "learning_rate": 6.662773177896283e-07, "loss": 0.0028, "step": 461180 }, { "epoch": 4.927506811261285, "grad_norm": 0.8321171998977661, "learning_rate": 6.66261473074454e-07, "loss": 0.0023, "step": 461190 }, { "epoch": 4.927613654575564, "grad_norm": 2.3993561267852783, "learning_rate": 6.662456281715578e-07, "loss": 0.0084, "step": 461200 }, { "epoch": 4.927720497889845, "grad_norm": 0.001598380389623344, "learning_rate": 6.662297830809578e-07, "loss": 0.0355, "step": 461210 }, { "epoch": 4.927827341204124, "grad_norm": 1.5744298696517944, "learning_rate": 6.662139378026718e-07, "loss": 0.0091, "step": 461220 }, { "epoch": 4.927934184518404, "grad_norm": 0.004363568499684334, "learning_rate": 6.661980923367178e-07, "loss": 0.0178, "step": 461230 }, { "epoch": 4.928041027832683, "grad_norm": 0.0800713449716568, "learning_rate": 6.661822466831135e-07, "loss": 0.0054, "step": 461240 }, { "epoch": 4.928147871146963, "grad_norm": 0.06538750231266022, "learning_rate": 6.66166400841877e-07, "loss": 0.0077, "step": 461250 }, { "epoch": 4.928254714461243, "grad_norm": 7.969750881195068, "learning_rate": 6.66150554813026e-07, "loss": 0.0155, "step": 461260 }, { "epoch": 4.928361557775522, "grad_norm": 0.9898862242698669, "learning_rate": 6.661347085965786e-07, "loss": 0.0076, "step": 461270 }, { "epoch": 4.928468401089802, "grad_norm": 2.3149914741516113, "learning_rate": 6.661188621925524e-07, "loss": 0.0085, "step": 461280 }, { "epoch": 4.928575244404081, "grad_norm": 0.020940646529197693, "learning_rate": 6.661030156009656e-07, "loss": 0.0022, "step": 461290 }, { "epoch": 4.928682087718361, "grad_norm": 0.06385911256074905, "learning_rate": 6.660871688218359e-07, "loss": 0.0106, "step": 461300 }, { "epoch": 4.92878893103264, "grad_norm": 8.107694625854492, "learning_rate": 6.660713218551813e-07, "loss": 0.0165, "step": 461310 }, { "epoch": 4.928895774346921, "grad_norm": 0.00268154707737267, "learning_rate": 6.660554747010195e-07, "loss": 0.0005, "step": 461320 }, { "epoch": 4.9290026176612, "grad_norm": 0.7561628818511963, "learning_rate": 6.660396273593687e-07, "loss": 0.0111, "step": 461330 }, { "epoch": 4.9291094609754795, "grad_norm": 0.0058622886426746845, "learning_rate": 6.660237798302467e-07, "loss": 0.0075, "step": 461340 }, { "epoch": 4.929216304289759, "grad_norm": 0.02146172896027565, "learning_rate": 6.660079321136711e-07, "loss": 0.0057, "step": 461350 }, { "epoch": 4.9293231476040384, "grad_norm": 0.008592863567173481, "learning_rate": 6.659920842096603e-07, "loss": 0.0049, "step": 461360 }, { "epoch": 4.929429990918318, "grad_norm": 2.9186007976531982, "learning_rate": 6.659762361182318e-07, "loss": 0.0152, "step": 461370 }, { "epoch": 4.929536834232598, "grad_norm": 0.04948902502655983, "learning_rate": 6.659603878394033e-07, "loss": 0.0012, "step": 461380 }, { "epoch": 4.929643677546878, "grad_norm": 0.159580796957016, "learning_rate": 6.659445393731932e-07, "loss": 0.0003, "step": 461390 }, { "epoch": 4.929750520861157, "grad_norm": 1.8498679399490356, "learning_rate": 6.659286907196193e-07, "loss": 0.0161, "step": 461400 }, { "epoch": 4.929857364175437, "grad_norm": 0.009282927960157394, "learning_rate": 6.659128418786993e-07, "loss": 0.0266, "step": 461410 }, { "epoch": 4.929964207489716, "grad_norm": 0.01886768639087677, "learning_rate": 6.658969928504512e-07, "loss": 0.0018, "step": 461420 }, { "epoch": 4.9300710508039955, "grad_norm": 0.03488898649811745, "learning_rate": 6.658811436348927e-07, "loss": 0.0014, "step": 461430 }, { "epoch": 4.930177894118276, "grad_norm": 0.008143899962306023, "learning_rate": 6.658652942320422e-07, "loss": 0.0049, "step": 461440 }, { "epoch": 4.930284737432555, "grad_norm": 0.0013027478707954288, "learning_rate": 6.658494446419172e-07, "loss": 0.0032, "step": 461450 }, { "epoch": 4.930391580746835, "grad_norm": 2.0682287216186523, "learning_rate": 6.658335948645355e-07, "loss": 0.0023, "step": 461460 }, { "epoch": 4.930498424061114, "grad_norm": 1.2265502214431763, "learning_rate": 6.658177448999152e-07, "loss": 0.0114, "step": 461470 }, { "epoch": 4.930605267375394, "grad_norm": 0.006871841847896576, "learning_rate": 6.658018947480741e-07, "loss": 0.0005, "step": 461480 }, { "epoch": 4.930712110689674, "grad_norm": 0.0014051321195438504, "learning_rate": 6.657860444090302e-07, "loss": 0.0062, "step": 461490 }, { "epoch": 4.9308189540039535, "grad_norm": 0.06669556349515915, "learning_rate": 6.657701938828014e-07, "loss": 0.0001, "step": 461500 }, { "epoch": 4.930925797318233, "grad_norm": 0.03783251345157623, "learning_rate": 6.657543431694054e-07, "loss": 0.0009, "step": 461510 }, { "epoch": 4.9310326406325125, "grad_norm": 1.7866727113723755, "learning_rate": 6.657384922688604e-07, "loss": 0.0116, "step": 461520 }, { "epoch": 4.931139483946792, "grad_norm": 0.0069541288539767265, "learning_rate": 6.657226411811842e-07, "loss": 0.0135, "step": 461530 }, { "epoch": 4.931246327261071, "grad_norm": 6.863548755645752, "learning_rate": 6.657067899063944e-07, "loss": 0.0059, "step": 461540 }, { "epoch": 4.931353170575351, "grad_norm": 0.8680108785629272, "learning_rate": 6.656909384445092e-07, "loss": 0.0212, "step": 461550 }, { "epoch": 4.931460013889631, "grad_norm": 0.3300115466117859, "learning_rate": 6.656750867955466e-07, "loss": 0.0187, "step": 461560 }, { "epoch": 4.931566857203911, "grad_norm": 0.0038120767567306757, "learning_rate": 6.656592349595241e-07, "loss": 0.0053, "step": 461570 }, { "epoch": 4.93167370051819, "grad_norm": 1.278898000717163, "learning_rate": 6.656433829364598e-07, "loss": 0.0066, "step": 461580 }, { "epoch": 4.9317805438324696, "grad_norm": 4.70146369934082, "learning_rate": 6.656275307263717e-07, "loss": 0.004, "step": 461590 }, { "epoch": 4.931887387146749, "grad_norm": 0.00124313123524189, "learning_rate": 6.656116783292776e-07, "loss": 0.0111, "step": 461600 }, { "epoch": 4.931994230461029, "grad_norm": 0.011873363517224789, "learning_rate": 6.655958257451955e-07, "loss": 0.0026, "step": 461610 }, { "epoch": 4.932101073775309, "grad_norm": 0.002497259993106127, "learning_rate": 6.655799729741432e-07, "loss": 0.0012, "step": 461620 }, { "epoch": 4.932207917089588, "grad_norm": 0.035723667591810226, "learning_rate": 6.655641200161384e-07, "loss": 0.006, "step": 461630 }, { "epoch": 4.932314760403868, "grad_norm": 0.04477718472480774, "learning_rate": 6.655482668711995e-07, "loss": 0.0048, "step": 461640 }, { "epoch": 4.932421603718147, "grad_norm": 0.008084104396402836, "learning_rate": 6.65532413539344e-07, "loss": 0.002, "step": 461650 }, { "epoch": 4.932528447032427, "grad_norm": 4.596238613128662, "learning_rate": 6.655165600205898e-07, "loss": 0.036, "step": 461660 }, { "epoch": 4.932635290346706, "grad_norm": 0.05329255759716034, "learning_rate": 6.655007063149551e-07, "loss": 0.0022, "step": 461670 }, { "epoch": 4.9327421336609865, "grad_norm": 2.782912015914917, "learning_rate": 6.654848524224576e-07, "loss": 0.0105, "step": 461680 }, { "epoch": 4.932848976975266, "grad_norm": 0.009739621542394161, "learning_rate": 6.65468998343115e-07, "loss": 0.005, "step": 461690 }, { "epoch": 4.932955820289545, "grad_norm": 0.0013720177812501788, "learning_rate": 6.654531440769456e-07, "loss": 0.0079, "step": 461700 }, { "epoch": 4.933062663603825, "grad_norm": 1.09098482131958, "learning_rate": 6.654372896239671e-07, "loss": 0.0052, "step": 461710 }, { "epoch": 4.933169506918104, "grad_norm": 0.01170322671532631, "learning_rate": 6.654214349841974e-07, "loss": 0.0364, "step": 461720 }, { "epoch": 4.933276350232385, "grad_norm": 4.597201347351074, "learning_rate": 6.654055801576544e-07, "loss": 0.0238, "step": 461730 }, { "epoch": 4.933383193546664, "grad_norm": 0.03903289884328842, "learning_rate": 6.65389725144356e-07, "loss": 0.0131, "step": 461740 }, { "epoch": 4.933490036860944, "grad_norm": 0.028154492378234863, "learning_rate": 6.653738699443201e-07, "loss": 0.0145, "step": 461750 }, { "epoch": 4.933596880175223, "grad_norm": 5.206347465515137, "learning_rate": 6.653580145575647e-07, "loss": 0.0063, "step": 461760 }, { "epoch": 4.9337037234895025, "grad_norm": 4.425434112548828, "learning_rate": 6.653421589841076e-07, "loss": 0.0041, "step": 461770 }, { "epoch": 4.933810566803782, "grad_norm": 0.0037070929538458586, "learning_rate": 6.653263032239667e-07, "loss": 0.0001, "step": 461780 }, { "epoch": 4.933917410118061, "grad_norm": 0.005668335128575563, "learning_rate": 6.653104472771599e-07, "loss": 0.0132, "step": 461790 }, { "epoch": 4.934024253432342, "grad_norm": 0.060050755739212036, "learning_rate": 6.652945911437051e-07, "loss": 0.0043, "step": 461800 }, { "epoch": 4.934131096746621, "grad_norm": 0.02472870796918869, "learning_rate": 6.652787348236203e-07, "loss": 0.0052, "step": 461810 }, { "epoch": 4.934237940060901, "grad_norm": 0.011335666291415691, "learning_rate": 6.652628783169233e-07, "loss": 0.0136, "step": 461820 }, { "epoch": 4.93434478337518, "grad_norm": 0.004692165181040764, "learning_rate": 6.65247021623632e-07, "loss": 0.0247, "step": 461830 }, { "epoch": 4.93445162668946, "grad_norm": 0.059072934091091156, "learning_rate": 6.652311647437645e-07, "loss": 0.0056, "step": 461840 }, { "epoch": 4.93455847000374, "grad_norm": 0.011380323208868504, "learning_rate": 6.652153076773383e-07, "loss": 0.0276, "step": 461850 }, { "epoch": 4.934665313318019, "grad_norm": 0.2666667103767395, "learning_rate": 6.651994504243718e-07, "loss": 0.0104, "step": 461860 }, { "epoch": 4.934772156632299, "grad_norm": 0.0148029625415802, "learning_rate": 6.651835929848826e-07, "loss": 0.008, "step": 461870 }, { "epoch": 4.934878999946578, "grad_norm": 0.013639247044920921, "learning_rate": 6.651677353588886e-07, "loss": 0.0176, "step": 461880 }, { "epoch": 4.934985843260858, "grad_norm": 0.00021003275469411165, "learning_rate": 6.651518775464076e-07, "loss": 0.0126, "step": 461890 }, { "epoch": 4.935092686575137, "grad_norm": 0.0753357782959938, "learning_rate": 6.651360195474579e-07, "loss": 0.0066, "step": 461900 }, { "epoch": 4.935199529889417, "grad_norm": 5.88412618637085, "learning_rate": 6.651201613620571e-07, "loss": 0.0034, "step": 461910 }, { "epoch": 4.935306373203697, "grad_norm": 0.003008595434948802, "learning_rate": 6.651043029902232e-07, "loss": 0.0171, "step": 461920 }, { "epoch": 4.9354132165179765, "grad_norm": 0.2331666201353073, "learning_rate": 6.65088444431974e-07, "loss": 0.0092, "step": 461930 }, { "epoch": 4.935520059832256, "grad_norm": 0.003540752222761512, "learning_rate": 6.650725856873275e-07, "loss": 0.027, "step": 461940 }, { "epoch": 4.935626903146535, "grad_norm": 3.8363699913024902, "learning_rate": 6.650567267563016e-07, "loss": 0.0131, "step": 461950 }, { "epoch": 4.935733746460815, "grad_norm": 6.762491703033447, "learning_rate": 6.650408676389142e-07, "loss": 0.02, "step": 461960 }, { "epoch": 4.935840589775095, "grad_norm": 7.480650424957275, "learning_rate": 6.650250083351832e-07, "loss": 0.0056, "step": 461970 }, { "epoch": 4.935947433089375, "grad_norm": 0.007416656240820885, "learning_rate": 6.650091488451266e-07, "loss": 0.0034, "step": 461980 }, { "epoch": 4.936054276403654, "grad_norm": 0.30165067315101624, "learning_rate": 6.649932891687622e-07, "loss": 0.0006, "step": 461990 }, { "epoch": 4.936161119717934, "grad_norm": 0.00042545926407910883, "learning_rate": 6.649774293061078e-07, "loss": 0.032, "step": 462000 }, { "epoch": 4.936267963032213, "grad_norm": 1.4745490550994873, "learning_rate": 6.649615692571815e-07, "loss": 0.0108, "step": 462010 }, { "epoch": 4.9363748063464925, "grad_norm": 0.0009193061850965023, "learning_rate": 6.649457090220012e-07, "loss": 0.0081, "step": 462020 }, { "epoch": 4.936481649660773, "grad_norm": 0.14229613542556763, "learning_rate": 6.649298486005846e-07, "loss": 0.0047, "step": 462030 }, { "epoch": 4.936588492975052, "grad_norm": 1.7605410814285278, "learning_rate": 6.649139879929499e-07, "loss": 0.0036, "step": 462040 }, { "epoch": 4.936695336289332, "grad_norm": 1.2553139925003052, "learning_rate": 6.648981271991149e-07, "loss": 0.0126, "step": 462050 }, { "epoch": 4.936802179603611, "grad_norm": 0.7519148588180542, "learning_rate": 6.648822662190973e-07, "loss": 0.0188, "step": 462060 }, { "epoch": 4.936909022917891, "grad_norm": 7.692196846008301, "learning_rate": 6.648664050529154e-07, "loss": 0.0057, "step": 462070 }, { "epoch": 4.93701586623217, "grad_norm": 0.0013164096744731069, "learning_rate": 6.648505437005867e-07, "loss": 0.0001, "step": 462080 }, { "epoch": 4.9371227095464505, "grad_norm": 0.8744977712631226, "learning_rate": 6.648346821621292e-07, "loss": 0.0174, "step": 462090 }, { "epoch": 4.93722955286073, "grad_norm": 0.002496856963261962, "learning_rate": 6.648188204375611e-07, "loss": 0.0102, "step": 462100 }, { "epoch": 4.937336396175009, "grad_norm": 6.427206516265869, "learning_rate": 6.648029585269001e-07, "loss": 0.0154, "step": 462110 }, { "epoch": 4.937443239489289, "grad_norm": 0.015605865977704525, "learning_rate": 6.647870964301641e-07, "loss": 0.001, "step": 462120 }, { "epoch": 4.937550082803568, "grad_norm": 0.0531155988574028, "learning_rate": 6.64771234147371e-07, "loss": 0.0091, "step": 462130 }, { "epoch": 4.937656926117848, "grad_norm": 0.03838885575532913, "learning_rate": 6.647553716785388e-07, "loss": 0.0063, "step": 462140 }, { "epoch": 4.937763769432128, "grad_norm": 3.808314800262451, "learning_rate": 6.647395090236852e-07, "loss": 0.0127, "step": 462150 }, { "epoch": 4.937870612746408, "grad_norm": 0.0010674864752218127, "learning_rate": 6.647236461828284e-07, "loss": 0.0062, "step": 462160 }, { "epoch": 4.937977456060687, "grad_norm": 1.1751694679260254, "learning_rate": 6.64707783155986e-07, "loss": 0.0206, "step": 462170 }, { "epoch": 4.9380842993749665, "grad_norm": 0.0002978878328576684, "learning_rate": 6.646919199431764e-07, "loss": 0.026, "step": 462180 }, { "epoch": 4.938191142689246, "grad_norm": 0.0331600122153759, "learning_rate": 6.646760565444169e-07, "loss": 0.0086, "step": 462190 }, { "epoch": 4.938297986003526, "grad_norm": 3.914867639541626, "learning_rate": 6.646601929597258e-07, "loss": 0.0177, "step": 462200 }, { "epoch": 4.938404829317806, "grad_norm": 0.003698450978845358, "learning_rate": 6.646443291891209e-07, "loss": 0.0031, "step": 462210 }, { "epoch": 4.938511672632085, "grad_norm": 1.247388243675232, "learning_rate": 6.646284652326201e-07, "loss": 0.0061, "step": 462220 }, { "epoch": 4.938618515946365, "grad_norm": 0.0007501623476855457, "learning_rate": 6.646126010902414e-07, "loss": 0.0069, "step": 462230 }, { "epoch": 4.938725359260644, "grad_norm": 0.0031613591127097607, "learning_rate": 6.645967367620026e-07, "loss": 0.0203, "step": 462240 }, { "epoch": 4.938832202574924, "grad_norm": 0.008960885927081108, "learning_rate": 6.645808722479217e-07, "loss": 0.0178, "step": 462250 }, { "epoch": 4.938939045889203, "grad_norm": 1.6832497119903564, "learning_rate": 6.645650075480165e-07, "loss": 0.0149, "step": 462260 }, { "epoch": 4.939045889203483, "grad_norm": 3.1492080688476562, "learning_rate": 6.645491426623051e-07, "loss": 0.0087, "step": 462270 }, { "epoch": 4.939152732517763, "grad_norm": 0.00560947647318244, "learning_rate": 6.645332775908053e-07, "loss": 0.0021, "step": 462280 }, { "epoch": 4.939259575832042, "grad_norm": 1.6581109762191772, "learning_rate": 6.645174123335349e-07, "loss": 0.0042, "step": 462290 }, { "epoch": 4.939366419146322, "grad_norm": 6.143590927124023, "learning_rate": 6.64501546890512e-07, "loss": 0.0219, "step": 462300 }, { "epoch": 4.939473262460601, "grad_norm": 0.0053913588635623455, "learning_rate": 6.644856812617546e-07, "loss": 0.0055, "step": 462310 }, { "epoch": 4.939580105774882, "grad_norm": 0.47491201758384705, "learning_rate": 6.644698154472801e-07, "loss": 0.002, "step": 462320 }, { "epoch": 4.939686949089161, "grad_norm": 0.027187421917915344, "learning_rate": 6.64453949447107e-07, "loss": 0.0021, "step": 462330 }, { "epoch": 4.9397937924034405, "grad_norm": 0.04018614813685417, "learning_rate": 6.644380832612529e-07, "loss": 0.0001, "step": 462340 }, { "epoch": 4.93990063571772, "grad_norm": 0.0014443505788221955, "learning_rate": 6.644222168897358e-07, "loss": 0.0005, "step": 462350 }, { "epoch": 4.940007479031999, "grad_norm": 4.848442554473877, "learning_rate": 6.644063503325736e-07, "loss": 0.0079, "step": 462360 }, { "epoch": 4.940114322346279, "grad_norm": 0.11270825564861298, "learning_rate": 6.643904835897842e-07, "loss": 0.0092, "step": 462370 }, { "epoch": 4.940221165660558, "grad_norm": 27.39229965209961, "learning_rate": 6.643746166613857e-07, "loss": 0.0439, "step": 462380 }, { "epoch": 4.940328008974839, "grad_norm": 7.452887535095215, "learning_rate": 6.643587495473958e-07, "loss": 0.0085, "step": 462390 }, { "epoch": 4.940434852289118, "grad_norm": 0.03943059965968132, "learning_rate": 6.643428822478323e-07, "loss": 0.0177, "step": 462400 }, { "epoch": 4.940541695603398, "grad_norm": 8.545454025268555, "learning_rate": 6.643270147627135e-07, "loss": 0.006, "step": 462410 }, { "epoch": 4.940648538917677, "grad_norm": 0.003910244442522526, "learning_rate": 6.643111470920569e-07, "loss": 0.0006, "step": 462420 }, { "epoch": 4.9407553822319565, "grad_norm": 6.992885589599609, "learning_rate": 6.642952792358807e-07, "loss": 0.0099, "step": 462430 }, { "epoch": 4.940862225546237, "grad_norm": 3.6883339881896973, "learning_rate": 6.642794111942027e-07, "loss": 0.0143, "step": 462440 }, { "epoch": 4.940969068860516, "grad_norm": 0.4582845866680145, "learning_rate": 6.642635429670409e-07, "loss": 0.007, "step": 462450 }, { "epoch": 4.941075912174796, "grad_norm": 0.015929728746414185, "learning_rate": 6.642476745544131e-07, "loss": 0.0049, "step": 462460 }, { "epoch": 4.941182755489075, "grad_norm": 0.02873951569199562, "learning_rate": 6.642318059563375e-07, "loss": 0.0001, "step": 462470 }, { "epoch": 4.941289598803355, "grad_norm": 0.005476113874465227, "learning_rate": 6.642159371728315e-07, "loss": 0.0253, "step": 462480 }, { "epoch": 4.941396442117634, "grad_norm": 0.004005090333521366, "learning_rate": 6.642000682039134e-07, "loss": 0.021, "step": 462490 }, { "epoch": 4.941503285431914, "grad_norm": 3.4838109016418457, "learning_rate": 6.641841990496011e-07, "loss": 0.0045, "step": 462500 }, { "epoch": 4.941610128746194, "grad_norm": 0.17151933908462524, "learning_rate": 6.641683297099124e-07, "loss": 0.0086, "step": 462510 }, { "epoch": 4.9417169720604734, "grad_norm": 0.5178844928741455, "learning_rate": 6.641524601848652e-07, "loss": 0.0267, "step": 462520 }, { "epoch": 4.941823815374753, "grad_norm": 0.007669942453503609, "learning_rate": 6.641365904744776e-07, "loss": 0.0197, "step": 462530 }, { "epoch": 4.941930658689032, "grad_norm": 0.00802757777273655, "learning_rate": 6.641207205787674e-07, "loss": 0.0309, "step": 462540 }, { "epoch": 4.942037502003312, "grad_norm": 0.0028769895434379578, "learning_rate": 6.641048504977525e-07, "loss": 0.0047, "step": 462550 }, { "epoch": 4.942144345317592, "grad_norm": 0.1286575347185135, "learning_rate": 6.640889802314507e-07, "loss": 0.0081, "step": 462560 }, { "epoch": 4.942251188631872, "grad_norm": 4.636501312255859, "learning_rate": 6.640731097798802e-07, "loss": 0.0026, "step": 462570 }, { "epoch": 4.942358031946151, "grad_norm": 9.140270233154297, "learning_rate": 6.640572391430587e-07, "loss": 0.0025, "step": 462580 }, { "epoch": 4.9424648752604305, "grad_norm": 0.003565751016139984, "learning_rate": 6.640413683210043e-07, "loss": 0.0089, "step": 462590 }, { "epoch": 4.94257171857471, "grad_norm": 0.3147951364517212, "learning_rate": 6.640254973137346e-07, "loss": 0.0065, "step": 462600 }, { "epoch": 4.9426785618889895, "grad_norm": 0.03166528046131134, "learning_rate": 6.64009626121268e-07, "loss": 0.0012, "step": 462610 }, { "epoch": 4.942785405203269, "grad_norm": 0.003096402855589986, "learning_rate": 6.639937547436221e-07, "loss": 0.0175, "step": 462620 }, { "epoch": 4.942892248517549, "grad_norm": 0.009092839434742928, "learning_rate": 6.639778831808147e-07, "loss": 0.0045, "step": 462630 }, { "epoch": 4.942999091831829, "grad_norm": 0.012513374909758568, "learning_rate": 6.63962011432864e-07, "loss": 0.0204, "step": 462640 }, { "epoch": 4.943105935146108, "grad_norm": 1.0812879800796509, "learning_rate": 6.639461394997879e-07, "loss": 0.004, "step": 462650 }, { "epoch": 4.943212778460388, "grad_norm": 6.761449813842773, "learning_rate": 6.63930267381604e-07, "loss": 0.0096, "step": 462660 }, { "epoch": 4.943319621774667, "grad_norm": 0.0009611151763238013, "learning_rate": 6.639143950783307e-07, "loss": 0.003, "step": 462670 }, { "epoch": 4.9434264650889475, "grad_norm": 0.0024860885459929705, "learning_rate": 6.638985225899856e-07, "loss": 0.0027, "step": 462680 }, { "epoch": 4.943533308403227, "grad_norm": 0.0009948888327926397, "learning_rate": 6.638826499165866e-07, "loss": 0.0025, "step": 462690 }, { "epoch": 4.943640151717506, "grad_norm": 4.109165668487549, "learning_rate": 6.638667770581519e-07, "loss": 0.0062, "step": 462700 }, { "epoch": 4.943746995031786, "grad_norm": 0.002112754387781024, "learning_rate": 6.638509040146992e-07, "loss": 0.0155, "step": 462710 }, { "epoch": 4.943853838346065, "grad_norm": 0.002930613001808524, "learning_rate": 6.638350307862464e-07, "loss": 0.0006, "step": 462720 }, { "epoch": 4.943960681660345, "grad_norm": 1.9194973707199097, "learning_rate": 6.638191573728114e-07, "loss": 0.0177, "step": 462730 }, { "epoch": 4.944067524974625, "grad_norm": 0.006086895242333412, "learning_rate": 6.638032837744124e-07, "loss": 0.0008, "step": 462740 }, { "epoch": 4.9441743682889046, "grad_norm": 0.01227629091590643, "learning_rate": 6.637874099910669e-07, "loss": 0.0031, "step": 462750 }, { "epoch": 4.944281211603184, "grad_norm": 0.0007491245050914586, "learning_rate": 6.637715360227933e-07, "loss": 0.0214, "step": 462760 }, { "epoch": 4.9443880549174635, "grad_norm": 3.2206225395202637, "learning_rate": 6.637556618696091e-07, "loss": 0.0033, "step": 462770 }, { "epoch": 4.944494898231743, "grad_norm": 3.031099319458008, "learning_rate": 6.637397875315325e-07, "loss": 0.0098, "step": 462780 }, { "epoch": 4.944601741546022, "grad_norm": 0.0047657848335802555, "learning_rate": 6.637239130085813e-07, "loss": 0.0024, "step": 462790 }, { "epoch": 4.944708584860303, "grad_norm": 3.5637216567993164, "learning_rate": 6.637080383007734e-07, "loss": 0.0043, "step": 462800 }, { "epoch": 4.944815428174582, "grad_norm": 0.3354601562023163, "learning_rate": 6.636921634081269e-07, "loss": 0.0004, "step": 462810 }, { "epoch": 4.944922271488862, "grad_norm": 0.09779871255159378, "learning_rate": 6.636762883306595e-07, "loss": 0.0133, "step": 462820 }, { "epoch": 4.945029114803141, "grad_norm": 0.08163457363843918, "learning_rate": 6.636604130683891e-07, "loss": 0.0018, "step": 462830 }, { "epoch": 4.945135958117421, "grad_norm": 0.821647047996521, "learning_rate": 6.636445376213339e-07, "loss": 0.0007, "step": 462840 }, { "epoch": 4.9452428014317, "grad_norm": 0.004365463741123676, "learning_rate": 6.636286619895115e-07, "loss": 0.0288, "step": 462850 }, { "epoch": 4.94534964474598, "grad_norm": 0.04552879184484482, "learning_rate": 6.636127861729402e-07, "loss": 0.0087, "step": 462860 }, { "epoch": 4.94545648806026, "grad_norm": 0.004785060416907072, "learning_rate": 6.635969101716376e-07, "loss": 0.0085, "step": 462870 }, { "epoch": 4.945563331374539, "grad_norm": 4.256466388702393, "learning_rate": 6.635810339856217e-07, "loss": 0.0081, "step": 462880 }, { "epoch": 4.945670174688819, "grad_norm": 0.6374831795692444, "learning_rate": 6.635651576149105e-07, "loss": 0.005, "step": 462890 }, { "epoch": 4.945777018003098, "grad_norm": 0.0020061456598341465, "learning_rate": 6.635492810595219e-07, "loss": 0.0039, "step": 462900 }, { "epoch": 4.945883861317379, "grad_norm": 0.0059288847260177135, "learning_rate": 6.63533404319474e-07, "loss": 0.007, "step": 462910 }, { "epoch": 4.945990704631658, "grad_norm": 0.004745179321616888, "learning_rate": 6.635175273947842e-07, "loss": 0.0031, "step": 462920 }, { "epoch": 4.9460975479459375, "grad_norm": 0.004814902786165476, "learning_rate": 6.63501650285471e-07, "loss": 0.0135, "step": 462930 }, { "epoch": 4.946204391260217, "grad_norm": 0.004258269909769297, "learning_rate": 6.634857729915522e-07, "loss": 0.0056, "step": 462940 }, { "epoch": 4.946311234574496, "grad_norm": 0.9237861633300781, "learning_rate": 6.634698955130454e-07, "loss": 0.0095, "step": 462950 }, { "epoch": 4.946418077888776, "grad_norm": 0.5268658399581909, "learning_rate": 6.634540178499688e-07, "loss": 0.0226, "step": 462960 }, { "epoch": 4.946524921203055, "grad_norm": 0.001522771897725761, "learning_rate": 6.634381400023402e-07, "loss": 0.0001, "step": 462970 }, { "epoch": 4.946631764517336, "grad_norm": 0.06314334273338318, "learning_rate": 6.634222619701778e-07, "loss": 0.0095, "step": 462980 }, { "epoch": 4.946738607831615, "grad_norm": 0.09441563487052917, "learning_rate": 6.634063837534993e-07, "loss": 0.0003, "step": 462990 }, { "epoch": 4.946845451145895, "grad_norm": 0.010359318926930428, "learning_rate": 6.633905053523226e-07, "loss": 0.0063, "step": 463000 }, { "epoch": 4.946952294460174, "grad_norm": 0.718359649181366, "learning_rate": 6.633746267666656e-07, "loss": 0.013, "step": 463010 }, { "epoch": 4.9470591377744535, "grad_norm": 2.1858768463134766, "learning_rate": 6.633587479965465e-07, "loss": 0.0444, "step": 463020 }, { "epoch": 4.947165981088734, "grad_norm": 0.0010709965135902166, "learning_rate": 6.633428690419829e-07, "loss": 0.0038, "step": 463030 }, { "epoch": 4.947272824403013, "grad_norm": 0.0074086314998567104, "learning_rate": 6.63326989902993e-07, "loss": 0.0103, "step": 463040 }, { "epoch": 4.947379667717293, "grad_norm": 0.0046539222821593285, "learning_rate": 6.633111105795944e-07, "loss": 0.0034, "step": 463050 }, { "epoch": 4.947486511031572, "grad_norm": 0.012731785885989666, "learning_rate": 6.632952310718054e-07, "loss": 0.0064, "step": 463060 }, { "epoch": 4.947593354345852, "grad_norm": 0.036143362522125244, "learning_rate": 6.632793513796438e-07, "loss": 0.0117, "step": 463070 }, { "epoch": 4.947700197660131, "grad_norm": 0.006041673477739096, "learning_rate": 6.632634715031273e-07, "loss": 0.0108, "step": 463080 }, { "epoch": 4.947807040974411, "grad_norm": 0.0075326538644731045, "learning_rate": 6.63247591442274e-07, "loss": 0.0064, "step": 463090 }, { "epoch": 4.947913884288691, "grad_norm": 0.4693678617477417, "learning_rate": 6.632317111971021e-07, "loss": 0.0167, "step": 463100 }, { "epoch": 4.94802072760297, "grad_norm": 0.008711273781955242, "learning_rate": 6.632158307676292e-07, "loss": 0.0031, "step": 463110 }, { "epoch": 4.94812757091725, "grad_norm": 0.5071322321891785, "learning_rate": 6.631999501538732e-07, "loss": 0.0059, "step": 463120 }, { "epoch": 4.948234414231529, "grad_norm": 3.4255261421203613, "learning_rate": 6.631840693558522e-07, "loss": 0.024, "step": 463130 }, { "epoch": 4.948341257545809, "grad_norm": 0.0025114205200225115, "learning_rate": 6.631681883735841e-07, "loss": 0.0035, "step": 463140 }, { "epoch": 4.948448100860089, "grad_norm": 4.227324962615967, "learning_rate": 6.631523072070867e-07, "loss": 0.0081, "step": 463150 }, { "epoch": 4.948554944174369, "grad_norm": 0.0033672305289655924, "learning_rate": 6.631364258563781e-07, "loss": 0.0071, "step": 463160 }, { "epoch": 4.948661787488648, "grad_norm": 0.020570239052176476, "learning_rate": 6.631205443214761e-07, "loss": 0.0029, "step": 463170 }, { "epoch": 4.9487686308029275, "grad_norm": 0.0006846381584182382, "learning_rate": 6.631046626023988e-07, "loss": 0.0009, "step": 463180 }, { "epoch": 4.948875474117207, "grad_norm": 0.7764687538146973, "learning_rate": 6.630887806991639e-07, "loss": 0.01, "step": 463190 }, { "epoch": 4.948982317431486, "grad_norm": 1.6562858819961548, "learning_rate": 6.630728986117897e-07, "loss": 0.0038, "step": 463200 }, { "epoch": 4.949089160745766, "grad_norm": 0.0020815960597246885, "learning_rate": 6.630570163402936e-07, "loss": 0.0308, "step": 463210 }, { "epoch": 4.949196004060046, "grad_norm": 0.43721839785575867, "learning_rate": 6.63041133884694e-07, "loss": 0.0118, "step": 463220 }, { "epoch": 4.949302847374326, "grad_norm": 0.14394521713256836, "learning_rate": 6.630252512450085e-07, "loss": 0.0275, "step": 463230 }, { "epoch": 4.949409690688605, "grad_norm": 2.082319974899292, "learning_rate": 6.630093684212554e-07, "loss": 0.0044, "step": 463240 }, { "epoch": 4.949516534002885, "grad_norm": 8.926461219787598, "learning_rate": 6.629934854134523e-07, "loss": 0.0344, "step": 463250 }, { "epoch": 4.949623377317164, "grad_norm": 1.7754610776901245, "learning_rate": 6.629776022216172e-07, "loss": 0.0046, "step": 463260 }, { "epoch": 4.949730220631444, "grad_norm": 0.0025657915975898504, "learning_rate": 6.629617188457681e-07, "loss": 0.0061, "step": 463270 }, { "epoch": 4.949837063945724, "grad_norm": 0.05099959671497345, "learning_rate": 6.62945835285923e-07, "loss": 0.0108, "step": 463280 }, { "epoch": 4.949943907260003, "grad_norm": 0.0019303811714053154, "learning_rate": 6.629299515420998e-07, "loss": 0.0133, "step": 463290 }, { "epoch": 4.950050750574283, "grad_norm": 0.0023428008425980806, "learning_rate": 6.629140676143162e-07, "loss": 0.0015, "step": 463300 }, { "epoch": 4.950157593888562, "grad_norm": 5.18241548538208, "learning_rate": 6.628981835025905e-07, "loss": 0.0078, "step": 463310 }, { "epoch": 4.950264437202842, "grad_norm": 1.8111572265625, "learning_rate": 6.628822992069404e-07, "loss": 0.0071, "step": 463320 }, { "epoch": 4.950371280517121, "grad_norm": 0.005138947628438473, "learning_rate": 6.628664147273839e-07, "loss": 0.0024, "step": 463330 }, { "epoch": 4.9504781238314015, "grad_norm": 0.044715166091918945, "learning_rate": 6.628505300639389e-07, "loss": 0.0021, "step": 463340 }, { "epoch": 4.950584967145681, "grad_norm": 0.03461147099733353, "learning_rate": 6.628346452166233e-07, "loss": 0.003, "step": 463350 }, { "epoch": 4.95069181045996, "grad_norm": 0.017079537734389305, "learning_rate": 6.628187601854552e-07, "loss": 0.0018, "step": 463360 }, { "epoch": 4.95079865377424, "grad_norm": 0.7137081027030945, "learning_rate": 6.628028749704524e-07, "loss": 0.0194, "step": 463370 }, { "epoch": 4.950905497088519, "grad_norm": 0.8580963611602783, "learning_rate": 6.627869895716328e-07, "loss": 0.0183, "step": 463380 }, { "epoch": 4.9510123404028, "grad_norm": 2.481027841567993, "learning_rate": 6.627711039890144e-07, "loss": 0.0006, "step": 463390 }, { "epoch": 4.951119183717079, "grad_norm": 0.02559572458267212, "learning_rate": 6.627552182226153e-07, "loss": 0.0044, "step": 463400 }, { "epoch": 4.951226027031359, "grad_norm": 0.00669823307543993, "learning_rate": 6.627393322724533e-07, "loss": 0.0005, "step": 463410 }, { "epoch": 4.951332870345638, "grad_norm": 0.0009623153018765152, "learning_rate": 6.627234461385462e-07, "loss": 0.0005, "step": 463420 }, { "epoch": 4.9514397136599175, "grad_norm": 0.0020251155365258455, "learning_rate": 6.627075598209118e-07, "loss": 0.0057, "step": 463430 }, { "epoch": 4.951546556974197, "grad_norm": 0.00391771737486124, "learning_rate": 6.626916733195687e-07, "loss": 0.0013, "step": 463440 }, { "epoch": 4.951653400288477, "grad_norm": 0.003351274412125349, "learning_rate": 6.626757866345343e-07, "loss": 0.0041, "step": 463450 }, { "epoch": 4.951760243602757, "grad_norm": 3.6792819499969482, "learning_rate": 6.626598997658265e-07, "loss": 0.0341, "step": 463460 }, { "epoch": 4.951867086917036, "grad_norm": 0.003700703615322709, "learning_rate": 6.626440127134635e-07, "loss": 0.0213, "step": 463470 }, { "epoch": 4.951973930231316, "grad_norm": 0.002303525572642684, "learning_rate": 6.62628125477463e-07, "loss": 0.0034, "step": 463480 }, { "epoch": 4.952080773545595, "grad_norm": 0.7027994394302368, "learning_rate": 6.626122380578434e-07, "loss": 0.0027, "step": 463490 }, { "epoch": 4.952187616859875, "grad_norm": 0.002350941998884082, "learning_rate": 6.62596350454622e-07, "loss": 0.0167, "step": 463500 }, { "epoch": 4.952294460174155, "grad_norm": 0.000981063931249082, "learning_rate": 6.625804626678172e-07, "loss": 0.018, "step": 463510 }, { "epoch": 4.952401303488434, "grad_norm": 0.01050819642841816, "learning_rate": 6.625645746974467e-07, "loss": 0.0064, "step": 463520 }, { "epoch": 4.952508146802714, "grad_norm": 0.09849226474761963, "learning_rate": 6.625486865435286e-07, "loss": 0.0036, "step": 463530 }, { "epoch": 4.952614990116993, "grad_norm": 0.07439162582159042, "learning_rate": 6.625327982060807e-07, "loss": 0.0036, "step": 463540 }, { "epoch": 4.952721833431273, "grad_norm": 0.007556033320724964, "learning_rate": 6.625169096851211e-07, "loss": 0.0024, "step": 463550 }, { "epoch": 4.952828676745552, "grad_norm": 0.06043708696961403, "learning_rate": 6.625010209806676e-07, "loss": 0.0033, "step": 463560 }, { "epoch": 4.952935520059833, "grad_norm": 0.11429328471422195, "learning_rate": 6.62485132092738e-07, "loss": 0.006, "step": 463570 }, { "epoch": 4.953042363374112, "grad_norm": 0.024695873260498047, "learning_rate": 6.624692430213508e-07, "loss": 0.0062, "step": 463580 }, { "epoch": 4.9531492066883915, "grad_norm": 0.012935607694089413, "learning_rate": 6.624533537665233e-07, "loss": 0.0002, "step": 463590 }, { "epoch": 4.953256050002671, "grad_norm": 3.2214176654815674, "learning_rate": 6.624374643282737e-07, "loss": 0.001, "step": 463600 }, { "epoch": 4.9533628933169505, "grad_norm": 1.0926681756973267, "learning_rate": 6.624215747066201e-07, "loss": 0.0065, "step": 463610 }, { "epoch": 4.95346973663123, "grad_norm": 13.907563209533691, "learning_rate": 6.624056849015803e-07, "loss": 0.0125, "step": 463620 }, { "epoch": 4.95357657994551, "grad_norm": 0.07824397832155228, "learning_rate": 6.62389794913172e-07, "loss": 0.005, "step": 463630 }, { "epoch": 4.95368342325979, "grad_norm": 0.012093409895896912, "learning_rate": 6.623739047414134e-07, "loss": 0.001, "step": 463640 }, { "epoch": 4.953790266574069, "grad_norm": 0.0012965545756742358, "learning_rate": 6.623580143863226e-07, "loss": 0.0269, "step": 463650 }, { "epoch": 4.953897109888349, "grad_norm": 0.059945277869701385, "learning_rate": 6.62342123847917e-07, "loss": 0.0043, "step": 463660 }, { "epoch": 4.954003953202628, "grad_norm": 0.027005253359675407, "learning_rate": 6.623262331262153e-07, "loss": 0.0045, "step": 463670 }, { "epoch": 4.9541107965169076, "grad_norm": 0.027304938063025475, "learning_rate": 6.623103422212349e-07, "loss": 0.0448, "step": 463680 }, { "epoch": 4.954217639831188, "grad_norm": 0.3069918155670166, "learning_rate": 6.622944511329938e-07, "loss": 0.0029, "step": 463690 }, { "epoch": 4.954324483145467, "grad_norm": 0.05386950820684433, "learning_rate": 6.6227855986151e-07, "loss": 0.0005, "step": 463700 }, { "epoch": 4.954431326459747, "grad_norm": 0.04932098835706711, "learning_rate": 6.622626684068015e-07, "loss": 0.0002, "step": 463710 }, { "epoch": 4.954538169774026, "grad_norm": 0.07052411884069443, "learning_rate": 6.622467767688862e-07, "loss": 0.008, "step": 463720 }, { "epoch": 4.954645013088306, "grad_norm": 0.6556878685951233, "learning_rate": 6.62230884947782e-07, "loss": 0.018, "step": 463730 }, { "epoch": 4.954751856402586, "grad_norm": 0.001900401315651834, "learning_rate": 6.622149929435069e-07, "loss": 0.0058, "step": 463740 }, { "epoch": 4.9548586997168655, "grad_norm": 0.04964936524629593, "learning_rate": 6.62199100756079e-07, "loss": 0.0004, "step": 463750 }, { "epoch": 4.954965543031145, "grad_norm": 1.0105527639389038, "learning_rate": 6.621832083855159e-07, "loss": 0.0077, "step": 463760 }, { "epoch": 4.9550723863454245, "grad_norm": 0.003936712630093098, "learning_rate": 6.621673158318357e-07, "loss": 0.0113, "step": 463770 }, { "epoch": 4.955179229659704, "grad_norm": 0.007973279803991318, "learning_rate": 6.621514230950564e-07, "loss": 0.0075, "step": 463780 }, { "epoch": 4.955286072973983, "grad_norm": 0.015725258737802505, "learning_rate": 6.621355301751959e-07, "loss": 0.0464, "step": 463790 }, { "epoch": 4.955392916288263, "grad_norm": 5.396373748779297, "learning_rate": 6.621196370722721e-07, "loss": 0.028, "step": 463800 }, { "epoch": 4.955499759602543, "grad_norm": 0.19024373590946198, "learning_rate": 6.62103743786303e-07, "loss": 0.0084, "step": 463810 }, { "epoch": 4.955606602916823, "grad_norm": 0.032912131398916245, "learning_rate": 6.620878503173068e-07, "loss": 0.0053, "step": 463820 }, { "epoch": 4.955713446231102, "grad_norm": 0.3481936454772949, "learning_rate": 6.620719566653008e-07, "loss": 0.0052, "step": 463830 }, { "epoch": 4.955820289545382, "grad_norm": 0.05446859449148178, "learning_rate": 6.620560628303036e-07, "loss": 0.0015, "step": 463840 }, { "epoch": 4.955927132859661, "grad_norm": 7.615246772766113, "learning_rate": 6.620401688123329e-07, "loss": 0.0101, "step": 463850 }, { "epoch": 4.956033976173941, "grad_norm": 0.1943526268005371, "learning_rate": 6.620242746114064e-07, "loss": 0.0017, "step": 463860 }, { "epoch": 4.956140819488221, "grad_norm": 0.0015237919287756085, "learning_rate": 6.620083802275424e-07, "loss": 0.0348, "step": 463870 }, { "epoch": 4.9562476628025, "grad_norm": 0.08342123031616211, "learning_rate": 6.619924856607587e-07, "loss": 0.0016, "step": 463880 }, { "epoch": 4.95635450611678, "grad_norm": 0.01155958604067564, "learning_rate": 6.619765909110732e-07, "loss": 0.0304, "step": 463890 }, { "epoch": 4.956461349431059, "grad_norm": 0.005538953933864832, "learning_rate": 6.61960695978504e-07, "loss": 0.0004, "step": 463900 }, { "epoch": 4.956568192745339, "grad_norm": 0.11946363002061844, "learning_rate": 6.619448008630689e-07, "loss": 0.0036, "step": 463910 }, { "epoch": 4.956675036059618, "grad_norm": 0.7923703789710999, "learning_rate": 6.619289055647859e-07, "loss": 0.0031, "step": 463920 }, { "epoch": 4.9567818793738985, "grad_norm": 0.005559059325605631, "learning_rate": 6.619130100836729e-07, "loss": 0.0155, "step": 463930 }, { "epoch": 4.956888722688178, "grad_norm": 0.0013254511868581176, "learning_rate": 6.618971144197478e-07, "loss": 0.0101, "step": 463940 }, { "epoch": 4.956995566002457, "grad_norm": 4.826869964599609, "learning_rate": 6.618812185730289e-07, "loss": 0.0771, "step": 463950 }, { "epoch": 4.957102409316737, "grad_norm": 0.02188429981470108, "learning_rate": 6.618653225435338e-07, "loss": 0.0059, "step": 463960 }, { "epoch": 4.957209252631016, "grad_norm": 0.010631605051457882, "learning_rate": 6.618494263312804e-07, "loss": 0.0135, "step": 463970 }, { "epoch": 4.957316095945297, "grad_norm": 0.009456213563680649, "learning_rate": 6.61833529936287e-07, "loss": 0.0021, "step": 463980 }, { "epoch": 4.957422939259576, "grad_norm": 0.00305869965814054, "learning_rate": 6.618176333585713e-07, "loss": 0.0045, "step": 463990 }, { "epoch": 4.957529782573856, "grad_norm": 0.014399213716387749, "learning_rate": 6.61801736598151e-07, "loss": 0.0033, "step": 464000 }, { "epoch": 4.957636625888135, "grad_norm": 0.038924869149923325, "learning_rate": 6.617858396550445e-07, "loss": 0.0041, "step": 464010 }, { "epoch": 4.9577434692024145, "grad_norm": 0.016683271154761314, "learning_rate": 6.617699425292697e-07, "loss": 0.001, "step": 464020 }, { "epoch": 4.957850312516694, "grad_norm": 0.001105356845073402, "learning_rate": 6.617540452208442e-07, "loss": 0.0016, "step": 464030 }, { "epoch": 4.957957155830973, "grad_norm": 0.26894304156303406, "learning_rate": 6.617381477297863e-07, "loss": 0.0011, "step": 464040 }, { "epoch": 4.958063999145254, "grad_norm": 0.1674044132232666, "learning_rate": 6.617222500561138e-07, "loss": 0.0005, "step": 464050 }, { "epoch": 4.958170842459533, "grad_norm": 0.21727247536182404, "learning_rate": 6.617063521998447e-07, "loss": 0.0008, "step": 464060 }, { "epoch": 4.958277685773813, "grad_norm": 4.596296787261963, "learning_rate": 6.616904541609969e-07, "loss": 0.0055, "step": 464070 }, { "epoch": 4.958384529088092, "grad_norm": 0.4406421482563019, "learning_rate": 6.616745559395884e-07, "loss": 0.0017, "step": 464080 }, { "epoch": 4.958491372402372, "grad_norm": 0.1343168169260025, "learning_rate": 6.616586575356371e-07, "loss": 0.001, "step": 464090 }, { "epoch": 4.958598215716652, "grad_norm": 0.0014268482336774468, "learning_rate": 6.61642758949161e-07, "loss": 0.023, "step": 464100 }, { "epoch": 4.958705059030931, "grad_norm": 0.006396429613232613, "learning_rate": 6.61626860180178e-07, "loss": 0.0048, "step": 464110 }, { "epoch": 4.958811902345211, "grad_norm": 0.004121773410588503, "learning_rate": 6.616109612287061e-07, "loss": 0.0065, "step": 464120 }, { "epoch": 4.95891874565949, "grad_norm": 0.029333854094147682, "learning_rate": 6.615950620947631e-07, "loss": 0.0082, "step": 464130 }, { "epoch": 4.95902558897377, "grad_norm": 0.005892436020076275, "learning_rate": 6.615791627783672e-07, "loss": 0.0001, "step": 464140 }, { "epoch": 4.959132432288049, "grad_norm": 0.0007655523368157446, "learning_rate": 6.615632632795362e-07, "loss": 0.0322, "step": 464150 }, { "epoch": 4.959239275602329, "grad_norm": 6.94713830947876, "learning_rate": 6.615473635982881e-07, "loss": 0.0071, "step": 464160 }, { "epoch": 4.959346118916609, "grad_norm": 0.008613918907940388, "learning_rate": 6.615314637346407e-07, "loss": 0.0012, "step": 464170 }, { "epoch": 4.9594529622308885, "grad_norm": 0.03763746842741966, "learning_rate": 6.615155636886122e-07, "loss": 0.0023, "step": 464180 }, { "epoch": 4.959559805545168, "grad_norm": 0.07663083076477051, "learning_rate": 6.614996634602206e-07, "loss": 0.0089, "step": 464190 }, { "epoch": 4.959666648859447, "grad_norm": 0.0017374936724081635, "learning_rate": 6.614837630494835e-07, "loss": 0.0139, "step": 464200 }, { "epoch": 4.959773492173727, "grad_norm": 0.008779016323387623, "learning_rate": 6.614678624564192e-07, "loss": 0.0057, "step": 464210 }, { "epoch": 4.959880335488007, "grad_norm": 0.003131748177111149, "learning_rate": 6.614519616810455e-07, "loss": 0.0071, "step": 464220 }, { "epoch": 4.959987178802287, "grad_norm": 5.628422737121582, "learning_rate": 6.614360607233801e-07, "loss": 0.0056, "step": 464230 }, { "epoch": 4.960094022116566, "grad_norm": 0.0629849061369896, "learning_rate": 6.614201595834414e-07, "loss": 0.0012, "step": 464240 }, { "epoch": 4.960200865430846, "grad_norm": 0.2977484464645386, "learning_rate": 6.614042582612471e-07, "loss": 0.0062, "step": 464250 }, { "epoch": 4.960307708745125, "grad_norm": 0.025558480992913246, "learning_rate": 6.613883567568152e-07, "loss": 0.0065, "step": 464260 }, { "epoch": 4.9604145520594045, "grad_norm": 0.0008637394057586789, "learning_rate": 6.613724550701636e-07, "loss": 0.0132, "step": 464270 }, { "epoch": 4.960521395373685, "grad_norm": 0.06896737217903137, "learning_rate": 6.613565532013105e-07, "loss": 0.0022, "step": 464280 }, { "epoch": 4.960628238687964, "grad_norm": 0.05975153669714928, "learning_rate": 6.613406511502735e-07, "loss": 0.009, "step": 464290 }, { "epoch": 4.960735082002244, "grad_norm": 0.5514241456985474, "learning_rate": 6.613247489170708e-07, "loss": 0.0063, "step": 464300 }, { "epoch": 4.960841925316523, "grad_norm": 0.3387432098388672, "learning_rate": 6.613088465017202e-07, "loss": 0.0088, "step": 464310 }, { "epoch": 4.960948768630803, "grad_norm": 0.45221540331840515, "learning_rate": 6.612929439042398e-07, "loss": 0.0021, "step": 464320 }, { "epoch": 4.961055611945082, "grad_norm": 0.023303547874093056, "learning_rate": 6.612770411246476e-07, "loss": 0.004, "step": 464330 }, { "epoch": 4.9611624552593625, "grad_norm": 0.27508020401000977, "learning_rate": 6.612611381629614e-07, "loss": 0.0116, "step": 464340 }, { "epoch": 4.961269298573642, "grad_norm": 0.41153451800346375, "learning_rate": 6.612452350191992e-07, "loss": 0.0012, "step": 464350 }, { "epoch": 4.961376141887921, "grad_norm": 0.0012840860290452838, "learning_rate": 6.61229331693379e-07, "loss": 0.0007, "step": 464360 }, { "epoch": 4.961482985202201, "grad_norm": 0.00706868153065443, "learning_rate": 6.612134281855183e-07, "loss": 0.013, "step": 464370 }, { "epoch": 4.96158982851648, "grad_norm": 0.03525478392839432, "learning_rate": 6.611975244956361e-07, "loss": 0.0095, "step": 464380 }, { "epoch": 4.96169667183076, "grad_norm": 0.5953627228736877, "learning_rate": 6.611816206237494e-07, "loss": 0.0015, "step": 464390 }, { "epoch": 4.96180351514504, "grad_norm": 2.5320394039154053, "learning_rate": 6.611657165698764e-07, "loss": 0.0168, "step": 464400 }, { "epoch": 4.96191035845932, "grad_norm": 5.2177042961120605, "learning_rate": 6.611498123340354e-07, "loss": 0.0149, "step": 464410 }, { "epoch": 4.962017201773599, "grad_norm": 0.0016842696350067854, "learning_rate": 6.611339079162439e-07, "loss": 0.0051, "step": 464420 }, { "epoch": 4.9621240450878785, "grad_norm": 0.013908020220696926, "learning_rate": 6.611180033165201e-07, "loss": 0.0043, "step": 464430 }, { "epoch": 4.962230888402158, "grad_norm": 0.024958420544862747, "learning_rate": 6.611020985348818e-07, "loss": 0.0077, "step": 464440 }, { "epoch": 4.962337731716438, "grad_norm": 0.7448420524597168, "learning_rate": 6.610861935713473e-07, "loss": 0.0022, "step": 464450 }, { "epoch": 4.962444575030718, "grad_norm": 0.04874025285243988, "learning_rate": 6.610702884259342e-07, "loss": 0.0184, "step": 464460 }, { "epoch": 4.962551418344997, "grad_norm": 6.214303970336914, "learning_rate": 6.610543830986607e-07, "loss": 0.0124, "step": 464470 }, { "epoch": 4.962658261659277, "grad_norm": 0.011999509297311306, "learning_rate": 6.610384775895446e-07, "loss": 0.0109, "step": 464480 }, { "epoch": 4.962765104973556, "grad_norm": 2.669929265975952, "learning_rate": 6.610225718986037e-07, "loss": 0.0154, "step": 464490 }, { "epoch": 4.962871948287836, "grad_norm": 0.011346605606377125, "learning_rate": 6.610066660258565e-07, "loss": 0.0028, "step": 464500 }, { "epoch": 4.962978791602115, "grad_norm": 0.2581733465194702, "learning_rate": 6.609907599713204e-07, "loss": 0.0002, "step": 464510 }, { "epoch": 4.963085634916395, "grad_norm": 1.8884527683258057, "learning_rate": 6.609748537350136e-07, "loss": 0.0115, "step": 464520 }, { "epoch": 4.963192478230675, "grad_norm": 0.022269757464528084, "learning_rate": 6.609589473169541e-07, "loss": 0.0017, "step": 464530 }, { "epoch": 4.963299321544954, "grad_norm": 0.015077302232384682, "learning_rate": 6.609430407171598e-07, "loss": 0.0029, "step": 464540 }, { "epoch": 4.963406164859234, "grad_norm": 0.07577137649059296, "learning_rate": 6.609271339356487e-07, "loss": 0.0021, "step": 464550 }, { "epoch": 4.963513008173513, "grad_norm": 0.6249011158943176, "learning_rate": 6.609112269724387e-07, "loss": 0.0078, "step": 464560 }, { "epoch": 4.963619851487794, "grad_norm": 0.02794100157916546, "learning_rate": 6.608953198275477e-07, "loss": 0.0053, "step": 464570 }, { "epoch": 4.963726694802073, "grad_norm": 0.0035362481139600277, "learning_rate": 6.608794125009938e-07, "loss": 0.0048, "step": 464580 }, { "epoch": 4.9638335381163525, "grad_norm": 0.0033586362842470407, "learning_rate": 6.608635049927949e-07, "loss": 0.0024, "step": 464590 }, { "epoch": 4.963940381430632, "grad_norm": 4.027379989624023, "learning_rate": 6.608475973029689e-07, "loss": 0.0078, "step": 464600 }, { "epoch": 4.9640472247449114, "grad_norm": 0.018341610208153725, "learning_rate": 6.608316894315339e-07, "loss": 0.0034, "step": 464610 }, { "epoch": 4.964154068059191, "grad_norm": 0.04360652714967728, "learning_rate": 6.608157813785078e-07, "loss": 0.0025, "step": 464620 }, { "epoch": 4.96426091137347, "grad_norm": 0.003937097731977701, "learning_rate": 6.607998731439084e-07, "loss": 0.0033, "step": 464630 }, { "epoch": 4.964367754687751, "grad_norm": 0.001558532239869237, "learning_rate": 6.60783964727754e-07, "loss": 0.0053, "step": 464640 }, { "epoch": 4.96447459800203, "grad_norm": 0.2725861072540283, "learning_rate": 6.607680561300623e-07, "loss": 0.0139, "step": 464650 }, { "epoch": 4.96458144131631, "grad_norm": 0.0005726517410948873, "learning_rate": 6.607521473508513e-07, "loss": 0.0066, "step": 464660 }, { "epoch": 4.964688284630589, "grad_norm": 0.00504733482375741, "learning_rate": 6.60736238390139e-07, "loss": 0.0129, "step": 464670 }, { "epoch": 4.9647951279448685, "grad_norm": 3.8002524375915527, "learning_rate": 6.607203292479434e-07, "loss": 0.0161, "step": 464680 }, { "epoch": 4.964901971259149, "grad_norm": 0.0017367219552397728, "learning_rate": 6.607044199242823e-07, "loss": 0.0107, "step": 464690 }, { "epoch": 4.965008814573428, "grad_norm": 2.558586835861206, "learning_rate": 6.60688510419174e-07, "loss": 0.0145, "step": 464700 }, { "epoch": 4.965115657887708, "grad_norm": 0.04711480066180229, "learning_rate": 6.60672600732636e-07, "loss": 0.0036, "step": 464710 }, { "epoch": 4.965222501201987, "grad_norm": 0.006974068935960531, "learning_rate": 6.606566908646866e-07, "loss": 0.0143, "step": 464720 }, { "epoch": 4.965329344516267, "grad_norm": 0.0009767065057531, "learning_rate": 6.606407808153437e-07, "loss": 0.0004, "step": 464730 }, { "epoch": 4.965436187830546, "grad_norm": 0.1574918031692505, "learning_rate": 6.606248705846251e-07, "loss": 0.0001, "step": 464740 }, { "epoch": 4.965543031144826, "grad_norm": 0.030291490256786346, "learning_rate": 6.60608960172549e-07, "loss": 0.011, "step": 464750 }, { "epoch": 4.965649874459106, "grad_norm": 1.4372862577438354, "learning_rate": 6.605930495791333e-07, "loss": 0.0391, "step": 464760 }, { "epoch": 4.9657567177733855, "grad_norm": 0.0030426892917603254, "learning_rate": 6.605771388043959e-07, "loss": 0.0396, "step": 464770 }, { "epoch": 4.965863561087665, "grad_norm": 0.36217761039733887, "learning_rate": 6.605612278483548e-07, "loss": 0.0062, "step": 464780 }, { "epoch": 4.965970404401944, "grad_norm": 1.226322889328003, "learning_rate": 6.60545316711028e-07, "loss": 0.0041, "step": 464790 }, { "epoch": 4.966077247716224, "grad_norm": 0.015045988373458385, "learning_rate": 6.605294053924333e-07, "loss": 0.0001, "step": 464800 }, { "epoch": 4.966184091030504, "grad_norm": 0.004050994757562876, "learning_rate": 6.605134938925888e-07, "loss": 0.0132, "step": 464810 }, { "epoch": 4.966290934344784, "grad_norm": 0.006014785263687372, "learning_rate": 6.604975822115127e-07, "loss": 0.0102, "step": 464820 }, { "epoch": 4.966397777659063, "grad_norm": 0.07527817785739899, "learning_rate": 6.604816703492223e-07, "loss": 0.0017, "step": 464830 }, { "epoch": 4.9665046209733426, "grad_norm": 0.004056441131979227, "learning_rate": 6.604657583057363e-07, "loss": 0.0039, "step": 464840 }, { "epoch": 4.966611464287622, "grad_norm": 0.0039133490063250065, "learning_rate": 6.604498460810722e-07, "loss": 0.0062, "step": 464850 }, { "epoch": 4.9667183076019015, "grad_norm": 0.08597787469625473, "learning_rate": 6.604339336752481e-07, "loss": 0.001, "step": 464860 }, { "epoch": 4.966825150916181, "grad_norm": 0.0005665982025675476, "learning_rate": 6.604180210882821e-07, "loss": 0.0001, "step": 464870 }, { "epoch": 4.966931994230461, "grad_norm": 0.13978829979896545, "learning_rate": 6.604021083201921e-07, "loss": 0.0148, "step": 464880 }, { "epoch": 4.967038837544741, "grad_norm": 12.084223747253418, "learning_rate": 6.603861953709958e-07, "loss": 0.0064, "step": 464890 }, { "epoch": 4.96714568085902, "grad_norm": 2.879180431365967, "learning_rate": 6.603702822407116e-07, "loss": 0.0035, "step": 464900 }, { "epoch": 4.9672525241733, "grad_norm": 0.004502871539443731, "learning_rate": 6.603543689293571e-07, "loss": 0.0044, "step": 464910 }, { "epoch": 4.967359367487579, "grad_norm": 0.6930535435676575, "learning_rate": 6.603384554369504e-07, "loss": 0.0012, "step": 464920 }, { "epoch": 4.9674662108018595, "grad_norm": 0.014964804984629154, "learning_rate": 6.603225417635095e-07, "loss": 0.0288, "step": 464930 }, { "epoch": 4.967573054116139, "grad_norm": 0.06005951389670372, "learning_rate": 6.603066279090523e-07, "loss": 0.0012, "step": 464940 }, { "epoch": 4.967679897430418, "grad_norm": 0.11906629055738449, "learning_rate": 6.60290713873597e-07, "loss": 0.0118, "step": 464950 }, { "epoch": 4.967786740744698, "grad_norm": 0.023525606840848923, "learning_rate": 6.602747996571614e-07, "loss": 0.0223, "step": 464960 }, { "epoch": 4.967893584058977, "grad_norm": 0.012767194770276546, "learning_rate": 6.602588852597633e-07, "loss": 0.0052, "step": 464970 }, { "epoch": 4.968000427373257, "grad_norm": 0.09201746433973312, "learning_rate": 6.602429706814208e-07, "loss": 0.005, "step": 464980 }, { "epoch": 4.968107270687537, "grad_norm": 0.029640328139066696, "learning_rate": 6.60227055922152e-07, "loss": 0.0144, "step": 464990 }, { "epoch": 4.968214114001817, "grad_norm": 0.008365522138774395, "learning_rate": 6.602111409819746e-07, "loss": 0.005, "step": 465000 }, { "epoch": 4.968320957316096, "grad_norm": 0.0009739036322571337, "learning_rate": 6.601952258609069e-07, "loss": 0.0008, "step": 465010 }, { "epoch": 4.9684278006303755, "grad_norm": 0.007699197623878717, "learning_rate": 6.601793105589667e-07, "loss": 0.0062, "step": 465020 }, { "epoch": 4.968534643944655, "grad_norm": 3.9703075885772705, "learning_rate": 6.601633950761719e-07, "loss": 0.0013, "step": 465030 }, { "epoch": 4.968641487258934, "grad_norm": 1.9570327997207642, "learning_rate": 6.601474794125405e-07, "loss": 0.0013, "step": 465040 }, { "epoch": 4.968748330573215, "grad_norm": 4.000970840454102, "learning_rate": 6.601315635680907e-07, "loss": 0.0345, "step": 465050 }, { "epoch": 4.968855173887494, "grad_norm": 0.03503928706049919, "learning_rate": 6.6011564754284e-07, "loss": 0.0043, "step": 465060 }, { "epoch": 4.968962017201774, "grad_norm": 0.013635591603815556, "learning_rate": 6.600997313368068e-07, "loss": 0.0023, "step": 465070 }, { "epoch": 4.969068860516053, "grad_norm": 0.003975837025791407, "learning_rate": 6.60083814950009e-07, "loss": 0.0045, "step": 465080 }, { "epoch": 4.969175703830333, "grad_norm": 0.0007057958282530308, "learning_rate": 6.600678983824642e-07, "loss": 0.0047, "step": 465090 }, { "epoch": 4.969282547144612, "grad_norm": 0.0200883150100708, "learning_rate": 6.60051981634191e-07, "loss": 0.0003, "step": 465100 }, { "epoch": 4.969389390458892, "grad_norm": 0.046737946569919586, "learning_rate": 6.600360647052068e-07, "loss": 0.0042, "step": 465110 }, { "epoch": 4.969496233773172, "grad_norm": 0.03136932849884033, "learning_rate": 6.6002014759553e-07, "loss": 0.0371, "step": 465120 }, { "epoch": 4.969603077087451, "grad_norm": 0.0019111604196950793, "learning_rate": 6.600042303051783e-07, "loss": 0.0001, "step": 465130 }, { "epoch": 4.969709920401731, "grad_norm": 0.024590283632278442, "learning_rate": 6.599883128341697e-07, "loss": 0.0044, "step": 465140 }, { "epoch": 4.96981676371601, "grad_norm": 0.2567253112792969, "learning_rate": 6.599723951825222e-07, "loss": 0.004, "step": 465150 }, { "epoch": 4.969923607030291, "grad_norm": 7.757331371307373, "learning_rate": 6.599564773502539e-07, "loss": 0.0117, "step": 465160 }, { "epoch": 4.97003045034457, "grad_norm": 0.973696231842041, "learning_rate": 6.599405593373825e-07, "loss": 0.0207, "step": 465170 }, { "epoch": 4.9701372936588495, "grad_norm": 0.0015550522366538644, "learning_rate": 6.599246411439263e-07, "loss": 0.0004, "step": 465180 }, { "epoch": 4.970244136973129, "grad_norm": 0.0031154188327491283, "learning_rate": 6.599087227699031e-07, "loss": 0.0047, "step": 465190 }, { "epoch": 4.970350980287408, "grad_norm": 2.595184564590454, "learning_rate": 6.598928042153308e-07, "loss": 0.0077, "step": 465200 }, { "epoch": 4.970457823601688, "grad_norm": 0.002854503458365798, "learning_rate": 6.598768854802275e-07, "loss": 0.0116, "step": 465210 }, { "epoch": 4.970564666915967, "grad_norm": 0.011825231835246086, "learning_rate": 6.598609665646111e-07, "loss": 0.001, "step": 465220 }, { "epoch": 4.970671510230248, "grad_norm": 0.04523496702313423, "learning_rate": 6.598450474684996e-07, "loss": 0.0174, "step": 465230 }, { "epoch": 4.970778353544527, "grad_norm": 0.028805401176214218, "learning_rate": 6.598291281919111e-07, "loss": 0.0077, "step": 465240 }, { "epoch": 4.970885196858807, "grad_norm": 3.5443527698516846, "learning_rate": 6.598132087348633e-07, "loss": 0.0124, "step": 465250 }, { "epoch": 4.970992040173086, "grad_norm": 0.008144436404109001, "learning_rate": 6.597972890973743e-07, "loss": 0.0077, "step": 465260 }, { "epoch": 4.9710988834873655, "grad_norm": 0.0005438703810796142, "learning_rate": 6.597813692794623e-07, "loss": 0.0116, "step": 465270 }, { "epoch": 4.971205726801646, "grad_norm": 2.023444414138794, "learning_rate": 6.59765449281145e-07, "loss": 0.011, "step": 465280 }, { "epoch": 4.971312570115925, "grad_norm": 1.0970854759216309, "learning_rate": 6.597495291024403e-07, "loss": 0.0113, "step": 465290 }, { "epoch": 4.971419413430205, "grad_norm": 2.834759473800659, "learning_rate": 6.597336087433664e-07, "loss": 0.0088, "step": 465300 }, { "epoch": 4.971526256744484, "grad_norm": 0.015948766842484474, "learning_rate": 6.597176882039413e-07, "loss": 0.0035, "step": 465310 }, { "epoch": 4.971633100058764, "grad_norm": 2.683990478515625, "learning_rate": 6.597017674841827e-07, "loss": 0.043, "step": 465320 }, { "epoch": 4.971739943373043, "grad_norm": 0.15827152132987976, "learning_rate": 6.59685846584109e-07, "loss": 0.0049, "step": 465330 }, { "epoch": 4.971846786687323, "grad_norm": 0.0069986977614462376, "learning_rate": 6.596699255037377e-07, "loss": 0.0161, "step": 465340 }, { "epoch": 4.971953630001603, "grad_norm": 0.14606425166130066, "learning_rate": 6.59654004243087e-07, "loss": 0.0054, "step": 465350 }, { "epoch": 4.972060473315882, "grad_norm": 0.004685870837420225, "learning_rate": 6.59638082802175e-07, "loss": 0.009, "step": 465360 }, { "epoch": 4.972167316630162, "grad_norm": 3.6719038486480713, "learning_rate": 6.596221611810195e-07, "loss": 0.0032, "step": 465370 }, { "epoch": 4.972274159944441, "grad_norm": 0.004088497254997492, "learning_rate": 6.596062393796387e-07, "loss": 0.0227, "step": 465380 }, { "epoch": 4.972381003258721, "grad_norm": 0.3858366310596466, "learning_rate": 6.595903173980501e-07, "loss": 0.0053, "step": 465390 }, { "epoch": 4.972487846573001, "grad_norm": 0.01085467729717493, "learning_rate": 6.595743952362722e-07, "loss": 0.0079, "step": 465400 }, { "epoch": 4.972594689887281, "grad_norm": 0.31789466738700867, "learning_rate": 6.595584728943226e-07, "loss": 0.0014, "step": 465410 }, { "epoch": 4.97270153320156, "grad_norm": 3.878152370452881, "learning_rate": 6.595425503722196e-07, "loss": 0.0142, "step": 465420 }, { "epoch": 4.9728083765158395, "grad_norm": 0.12327343970537186, "learning_rate": 6.59526627669981e-07, "loss": 0.0001, "step": 465430 }, { "epoch": 4.972915219830119, "grad_norm": 15.568355560302734, "learning_rate": 6.595107047876247e-07, "loss": 0.0315, "step": 465440 }, { "epoch": 4.973022063144398, "grad_norm": 0.00148198998067528, "learning_rate": 6.59494781725169e-07, "loss": 0.0041, "step": 465450 }, { "epoch": 4.973128906458678, "grad_norm": 0.12116461992263794, "learning_rate": 6.594788584826314e-07, "loss": 0.0057, "step": 465460 }, { "epoch": 4.973235749772958, "grad_norm": 2.796473979949951, "learning_rate": 6.594629350600303e-07, "loss": 0.0058, "step": 465470 }, { "epoch": 4.973342593087238, "grad_norm": 0.004399200435727835, "learning_rate": 6.594470114573833e-07, "loss": 0.0012, "step": 465480 }, { "epoch": 4.973449436401517, "grad_norm": 0.005214363802224398, "learning_rate": 6.594310876747087e-07, "loss": 0.0006, "step": 465490 }, { "epoch": 4.973556279715797, "grad_norm": 6.973727703094482, "learning_rate": 6.594151637120245e-07, "loss": 0.0056, "step": 465500 }, { "epoch": 4.973663123030076, "grad_norm": 0.005059889983385801, "learning_rate": 6.593992395693484e-07, "loss": 0.0094, "step": 465510 }, { "epoch": 4.973769966344356, "grad_norm": 0.06697303801774979, "learning_rate": 6.593833152466987e-07, "loss": 0.0026, "step": 465520 }, { "epoch": 4.973876809658636, "grad_norm": 0.044944144785404205, "learning_rate": 6.593673907440931e-07, "loss": 0.0033, "step": 465530 }, { "epoch": 4.973983652972915, "grad_norm": 2.7596893310546875, "learning_rate": 6.593514660615496e-07, "loss": 0.0053, "step": 465540 }, { "epoch": 4.974090496287195, "grad_norm": 0.4825023412704468, "learning_rate": 6.593355411990864e-07, "loss": 0.0041, "step": 465550 }, { "epoch": 4.974197339601474, "grad_norm": 0.02326883003115654, "learning_rate": 6.593196161567214e-07, "loss": 0.0013, "step": 465560 }, { "epoch": 4.974304182915754, "grad_norm": 1.6577174663543701, "learning_rate": 6.593036909344723e-07, "loss": 0.0025, "step": 465570 }, { "epoch": 4.974411026230033, "grad_norm": 1.3546091318130493, "learning_rate": 6.592877655323574e-07, "loss": 0.0017, "step": 465580 }, { "epoch": 4.9745178695443135, "grad_norm": 0.027089059352874756, "learning_rate": 6.592718399503947e-07, "loss": 0.0001, "step": 465590 }, { "epoch": 4.974624712858593, "grad_norm": 0.010918544605374336, "learning_rate": 6.59255914188602e-07, "loss": 0.0034, "step": 465600 }, { "epoch": 4.974731556172872, "grad_norm": 0.0011172746308147907, "learning_rate": 6.592399882469973e-07, "loss": 0.0025, "step": 465610 }, { "epoch": 4.974838399487152, "grad_norm": 0.0018813618225976825, "learning_rate": 6.592240621255987e-07, "loss": 0.0206, "step": 465620 }, { "epoch": 4.974945242801431, "grad_norm": 0.025404253974556923, "learning_rate": 6.59208135824424e-07, "loss": 0.0004, "step": 465630 }, { "epoch": 4.975052086115712, "grad_norm": 0.005702292546629906, "learning_rate": 6.591922093434915e-07, "loss": 0.014, "step": 465640 }, { "epoch": 4.975158929429991, "grad_norm": 0.10171575099229813, "learning_rate": 6.591762826828189e-07, "loss": 0.0484, "step": 465650 }, { "epoch": 4.975265772744271, "grad_norm": 0.25179892778396606, "learning_rate": 6.591603558424241e-07, "loss": 0.0011, "step": 465660 }, { "epoch": 4.97537261605855, "grad_norm": 0.12263189256191254, "learning_rate": 6.591444288223256e-07, "loss": 0.0023, "step": 465670 }, { "epoch": 4.9754794593728295, "grad_norm": 0.7394419312477112, "learning_rate": 6.591285016225406e-07, "loss": 0.0229, "step": 465680 }, { "epoch": 4.975586302687109, "grad_norm": 0.002246767282485962, "learning_rate": 6.591125742430878e-07, "loss": 0.0037, "step": 465690 }, { "epoch": 4.975693146001389, "grad_norm": 5.329897403717041, "learning_rate": 6.590966466839848e-07, "loss": 0.007, "step": 465700 }, { "epoch": 4.975799989315669, "grad_norm": 0.01638694666326046, "learning_rate": 6.590807189452497e-07, "loss": 0.007, "step": 465710 }, { "epoch": 4.975906832629948, "grad_norm": 2.671403169631958, "learning_rate": 6.590647910269005e-07, "loss": 0.0197, "step": 465720 }, { "epoch": 4.976013675944228, "grad_norm": 12.724451065063477, "learning_rate": 6.590488629289551e-07, "loss": 0.0131, "step": 465730 }, { "epoch": 4.976120519258507, "grad_norm": 0.03583712503314018, "learning_rate": 6.590329346514314e-07, "loss": 0.0013, "step": 465740 }, { "epoch": 4.976227362572787, "grad_norm": 0.001916312612593174, "learning_rate": 6.590170061943477e-07, "loss": 0.0039, "step": 465750 }, { "epoch": 4.976334205887067, "grad_norm": 0.0015920681180432439, "learning_rate": 6.590010775577218e-07, "loss": 0.0052, "step": 465760 }, { "epoch": 4.9764410492013464, "grad_norm": 1.082550287246704, "learning_rate": 6.589851487415716e-07, "loss": 0.0164, "step": 465770 }, { "epoch": 4.976547892515626, "grad_norm": 0.03707714378833771, "learning_rate": 6.589692197459152e-07, "loss": 0.0075, "step": 465780 }, { "epoch": 4.976654735829905, "grad_norm": 1.1680433750152588, "learning_rate": 6.589532905707706e-07, "loss": 0.001, "step": 465790 }, { "epoch": 4.976761579144185, "grad_norm": 0.006570072844624519, "learning_rate": 6.589373612161558e-07, "loss": 0.011, "step": 465800 }, { "epoch": 4.976868422458464, "grad_norm": 0.4578499495983124, "learning_rate": 6.589214316820886e-07, "loss": 0.0036, "step": 465810 }, { "epoch": 4.976975265772745, "grad_norm": 0.017250096425414085, "learning_rate": 6.58905501968587e-07, "loss": 0.0014, "step": 465820 }, { "epoch": 4.977082109087024, "grad_norm": 0.0015950928209349513, "learning_rate": 6.588895720756693e-07, "loss": 0.0019, "step": 465830 }, { "epoch": 4.9771889524013035, "grad_norm": 0.08528976142406464, "learning_rate": 6.588736420033531e-07, "loss": 0.0011, "step": 465840 }, { "epoch": 4.977295795715583, "grad_norm": 0.09214881807565689, "learning_rate": 6.588577117516568e-07, "loss": 0.0003, "step": 465850 }, { "epoch": 4.9774026390298625, "grad_norm": 0.008739759214222431, "learning_rate": 6.588417813205979e-07, "loss": 0.0032, "step": 465860 }, { "epoch": 4.977509482344143, "grad_norm": 0.00586091261357069, "learning_rate": 6.588258507101949e-07, "loss": 0.0181, "step": 465870 }, { "epoch": 4.977616325658422, "grad_norm": 1.5324757099151611, "learning_rate": 6.588099199204653e-07, "loss": 0.0018, "step": 465880 }, { "epoch": 4.977723168972702, "grad_norm": 0.12828953564167023, "learning_rate": 6.587939889514275e-07, "loss": 0.0009, "step": 465890 }, { "epoch": 4.977830012286981, "grad_norm": 0.003188757225871086, "learning_rate": 6.587780578030991e-07, "loss": 0.0061, "step": 465900 }, { "epoch": 4.977936855601261, "grad_norm": 0.0631726086139679, "learning_rate": 6.587621264754983e-07, "loss": 0.0139, "step": 465910 }, { "epoch": 4.97804369891554, "grad_norm": 0.07298623770475388, "learning_rate": 6.587461949686433e-07, "loss": 0.0186, "step": 465920 }, { "epoch": 4.97815054222982, "grad_norm": 0.021895499899983406, "learning_rate": 6.587302632825518e-07, "loss": 0.001, "step": 465930 }, { "epoch": 4.9782573855441, "grad_norm": 0.0017611960647627711, "learning_rate": 6.587143314172418e-07, "loss": 0.0021, "step": 465940 }, { "epoch": 4.978364228858379, "grad_norm": 0.3676944673061371, "learning_rate": 6.586983993727313e-07, "loss": 0.0138, "step": 465950 }, { "epoch": 4.978471072172659, "grad_norm": 0.0006473983521573246, "learning_rate": 6.586824671490384e-07, "loss": 0.0144, "step": 465960 }, { "epoch": 4.978577915486938, "grad_norm": 2.455514430999756, "learning_rate": 6.58666534746181e-07, "loss": 0.0047, "step": 465970 }, { "epoch": 4.978684758801218, "grad_norm": 0.0009343134588561952, "learning_rate": 6.58650602164177e-07, "loss": 0.0362, "step": 465980 }, { "epoch": 4.978791602115498, "grad_norm": 1.1658174991607666, "learning_rate": 6.586346694030447e-07, "loss": 0.0208, "step": 465990 }, { "epoch": 4.9788984454297776, "grad_norm": 0.6624032258987427, "learning_rate": 6.586187364628017e-07, "loss": 0.0061, "step": 466000 }, { "epoch": 4.979005288744057, "grad_norm": 0.007769149728119373, "learning_rate": 6.586028033434663e-07, "loss": 0.0133, "step": 466010 }, { "epoch": 4.9791121320583365, "grad_norm": 0.2431800663471222, "learning_rate": 6.585868700450562e-07, "loss": 0.0099, "step": 466020 }, { "epoch": 4.979218975372616, "grad_norm": 0.00142005761153996, "learning_rate": 6.585709365675896e-07, "loss": 0.0013, "step": 466030 }, { "epoch": 4.979325818686895, "grad_norm": 3.3898754119873047, "learning_rate": 6.585550029110846e-07, "loss": 0.0072, "step": 466040 }, { "epoch": 4.979432662001175, "grad_norm": 0.6716205477714539, "learning_rate": 6.585390690755589e-07, "loss": 0.0064, "step": 466050 }, { "epoch": 4.979539505315455, "grad_norm": 8.46780776977539, "learning_rate": 6.585231350610306e-07, "loss": 0.0108, "step": 466060 }, { "epoch": 4.979646348629735, "grad_norm": 0.009890706278383732, "learning_rate": 6.585072008675179e-07, "loss": 0.0076, "step": 466070 }, { "epoch": 4.979753191944014, "grad_norm": 0.0012593957362696528, "learning_rate": 6.584912664950383e-07, "loss": 0.0025, "step": 466080 }, { "epoch": 4.979860035258294, "grad_norm": 0.0006381099810823798, "learning_rate": 6.584753319436104e-07, "loss": 0.009, "step": 466090 }, { "epoch": 4.979966878572573, "grad_norm": 0.6641058325767517, "learning_rate": 6.584593972132518e-07, "loss": 0.0022, "step": 466100 }, { "epoch": 4.980073721886853, "grad_norm": 0.05570410192012787, "learning_rate": 6.584434623039806e-07, "loss": 0.0098, "step": 466110 }, { "epoch": 4.980180565201133, "grad_norm": 0.013184002600610256, "learning_rate": 6.584275272158146e-07, "loss": 0.0112, "step": 466120 }, { "epoch": 4.980287408515412, "grad_norm": 0.0149130430072546, "learning_rate": 6.584115919487722e-07, "loss": 0.002, "step": 466130 }, { "epoch": 4.980394251829692, "grad_norm": 0.003883581841364503, "learning_rate": 6.58395656502871e-07, "loss": 0.0052, "step": 466140 }, { "epoch": 4.980501095143971, "grad_norm": 0.013682623393833637, "learning_rate": 6.583797208781293e-07, "loss": 0.0129, "step": 466150 }, { "epoch": 4.980607938458251, "grad_norm": 0.536838948726654, "learning_rate": 6.583637850745649e-07, "loss": 0.0066, "step": 466160 }, { "epoch": 4.98071478177253, "grad_norm": 0.011140536516904831, "learning_rate": 6.583478490921957e-07, "loss": 0.0046, "step": 466170 }, { "epoch": 4.9808216250868105, "grad_norm": 0.004590524360537529, "learning_rate": 6.583319129310398e-07, "loss": 0.0161, "step": 466180 }, { "epoch": 4.98092846840109, "grad_norm": 18.187397003173828, "learning_rate": 6.583159765911154e-07, "loss": 0.017, "step": 466190 }, { "epoch": 4.981035311715369, "grad_norm": 7.544930934906006, "learning_rate": 6.583000400724402e-07, "loss": 0.0071, "step": 466200 }, { "epoch": 4.981142155029649, "grad_norm": 0.006134642753750086, "learning_rate": 6.582841033750324e-07, "loss": 0.0068, "step": 466210 }, { "epoch": 4.981248998343928, "grad_norm": 0.242802232503891, "learning_rate": 6.5826816649891e-07, "loss": 0.0007, "step": 466220 }, { "epoch": 4.981355841658209, "grad_norm": 0.007563469465821981, "learning_rate": 6.582522294440906e-07, "loss": 0.0026, "step": 466230 }, { "epoch": 4.981462684972488, "grad_norm": 0.01843276247382164, "learning_rate": 6.582362922105927e-07, "loss": 0.0152, "step": 466240 }, { "epoch": 4.981569528286768, "grad_norm": 0.010850470513105392, "learning_rate": 6.58220354798434e-07, "loss": 0.0005, "step": 466250 }, { "epoch": 4.981676371601047, "grad_norm": 0.0004494242020882666, "learning_rate": 6.582044172076325e-07, "loss": 0.0108, "step": 466260 }, { "epoch": 4.9817832149153265, "grad_norm": 0.16064408421516418, "learning_rate": 6.581884794382064e-07, "loss": 0.0135, "step": 466270 }, { "epoch": 4.981890058229606, "grad_norm": 0.004265731666237116, "learning_rate": 6.581725414901735e-07, "loss": 0.0063, "step": 466280 }, { "epoch": 4.981996901543885, "grad_norm": 0.010379803366959095, "learning_rate": 6.58156603363552e-07, "loss": 0.0039, "step": 466290 }, { "epoch": 4.982103744858166, "grad_norm": 0.0068474579602479935, "learning_rate": 6.581406650583596e-07, "loss": 0.0074, "step": 466300 }, { "epoch": 4.982210588172445, "grad_norm": 0.009102359414100647, "learning_rate": 6.581247265746146e-07, "loss": 0.0059, "step": 466310 }, { "epoch": 4.982317431486725, "grad_norm": 3.349808692932129, "learning_rate": 6.581087879123347e-07, "loss": 0.0068, "step": 466320 }, { "epoch": 4.982424274801004, "grad_norm": 2.008410692214966, "learning_rate": 6.580928490715381e-07, "loss": 0.002, "step": 466330 }, { "epoch": 4.982531118115284, "grad_norm": 0.077779121696949, "learning_rate": 6.580769100522428e-07, "loss": 0.0101, "step": 466340 }, { "epoch": 4.982637961429564, "grad_norm": 1.7944211959838867, "learning_rate": 6.580609708544666e-07, "loss": 0.0091, "step": 466350 }, { "epoch": 4.982744804743843, "grad_norm": 7.831963539123535, "learning_rate": 6.580450314782278e-07, "loss": 0.0058, "step": 466360 }, { "epoch": 4.982851648058123, "grad_norm": 0.012409565038979053, "learning_rate": 6.58029091923544e-07, "loss": 0.004, "step": 466370 }, { "epoch": 4.982958491372402, "grad_norm": 34.25625991821289, "learning_rate": 6.580131521904336e-07, "loss": 0.0164, "step": 466380 }, { "epoch": 4.983065334686682, "grad_norm": 0.001380739500746131, "learning_rate": 6.579972122789144e-07, "loss": 0.0009, "step": 466390 }, { "epoch": 4.983172178000961, "grad_norm": 0.0014970025513321161, "learning_rate": 6.579812721890044e-07, "loss": 0.0125, "step": 466400 }, { "epoch": 4.983279021315242, "grad_norm": 0.005151140503585339, "learning_rate": 6.579653319207216e-07, "loss": 0.0022, "step": 466410 }, { "epoch": 4.983385864629521, "grad_norm": 1.21305251121521, "learning_rate": 6.579493914740842e-07, "loss": 0.0004, "step": 466420 }, { "epoch": 4.9834927079438005, "grad_norm": 0.17144528031349182, "learning_rate": 6.579334508491099e-07, "loss": 0.0101, "step": 466430 }, { "epoch": 4.98359955125808, "grad_norm": 0.012385894544422626, "learning_rate": 6.579175100458167e-07, "loss": 0.0009, "step": 466440 }, { "epoch": 4.983706394572359, "grad_norm": 5.2416157722473145, "learning_rate": 6.579015690642229e-07, "loss": 0.0048, "step": 466450 }, { "epoch": 4.983813237886639, "grad_norm": 0.001711724093183875, "learning_rate": 6.578856279043462e-07, "loss": 0.005, "step": 466460 }, { "epoch": 4.983920081200919, "grad_norm": 0.008005977608263493, "learning_rate": 6.578696865662048e-07, "loss": 0.0128, "step": 466470 }, { "epoch": 4.984026924515199, "grad_norm": 3.968843698501587, "learning_rate": 6.578537450498165e-07, "loss": 0.0115, "step": 466480 }, { "epoch": 4.984133767829478, "grad_norm": 0.7479345798492432, "learning_rate": 6.578378033551994e-07, "loss": 0.0129, "step": 466490 }, { "epoch": 4.984240611143758, "grad_norm": 1.7431567907333374, "learning_rate": 6.578218614823716e-07, "loss": 0.0038, "step": 466500 }, { "epoch": 4.984347454458037, "grad_norm": 0.0011736589949578047, "learning_rate": 6.578059194313508e-07, "loss": 0.0146, "step": 466510 }, { "epoch": 4.9844542977723165, "grad_norm": 0.03557770699262619, "learning_rate": 6.577899772021556e-07, "loss": 0.0326, "step": 466520 }, { "epoch": 4.984561141086597, "grad_norm": 0.0037648167926818132, "learning_rate": 6.577740347948033e-07, "loss": 0.0124, "step": 466530 }, { "epoch": 4.984667984400876, "grad_norm": 1.9515621662139893, "learning_rate": 6.577580922093122e-07, "loss": 0.0289, "step": 466540 }, { "epoch": 4.984774827715156, "grad_norm": 0.20256561040878296, "learning_rate": 6.577421494457005e-07, "loss": 0.0309, "step": 466550 }, { "epoch": 4.984881671029435, "grad_norm": 0.002814100356772542, "learning_rate": 6.577262065039859e-07, "loss": 0.0064, "step": 466560 }, { "epoch": 4.984988514343715, "grad_norm": 0.028749488294124603, "learning_rate": 6.577102633841864e-07, "loss": 0.0015, "step": 466570 }, { "epoch": 4.985095357657994, "grad_norm": 0.000373432703781873, "learning_rate": 6.576943200863203e-07, "loss": 0.006, "step": 466580 }, { "epoch": 4.9852022009722745, "grad_norm": 4.54278039932251, "learning_rate": 6.576783766104054e-07, "loss": 0.0171, "step": 466590 }, { "epoch": 4.985309044286554, "grad_norm": 0.00567222386598587, "learning_rate": 6.576624329564595e-07, "loss": 0.009, "step": 466600 }, { "epoch": 4.985415887600833, "grad_norm": 5.65696907043457, "learning_rate": 6.576464891245011e-07, "loss": 0.07, "step": 466610 }, { "epoch": 4.985522730915113, "grad_norm": 0.02817045897245407, "learning_rate": 6.576305451145477e-07, "loss": 0.0024, "step": 466620 }, { "epoch": 4.985629574229392, "grad_norm": 1.2190024852752686, "learning_rate": 6.576146009266176e-07, "loss": 0.0119, "step": 466630 }, { "epoch": 4.985736417543672, "grad_norm": 0.022539669647812843, "learning_rate": 6.575986565607287e-07, "loss": 0.0004, "step": 466640 }, { "epoch": 4.985843260857952, "grad_norm": 0.002387033076956868, "learning_rate": 6.57582712016899e-07, "loss": 0.0055, "step": 466650 }, { "epoch": 4.985950104172232, "grad_norm": 1.0005302429199219, "learning_rate": 6.575667672951465e-07, "loss": 0.0091, "step": 466660 }, { "epoch": 4.986056947486511, "grad_norm": 0.001972288591787219, "learning_rate": 6.575508223954893e-07, "loss": 0.0001, "step": 466670 }, { "epoch": 4.9861637908007905, "grad_norm": 0.14361318945884705, "learning_rate": 6.575348773179452e-07, "loss": 0.0314, "step": 466680 }, { "epoch": 4.98627063411507, "grad_norm": 3.7523815631866455, "learning_rate": 6.575189320625325e-07, "loss": 0.0283, "step": 466690 }, { "epoch": 4.98637747742935, "grad_norm": 1.2088756561279297, "learning_rate": 6.57502986629269e-07, "loss": 0.0298, "step": 466700 }, { "epoch": 4.98648432074363, "grad_norm": 0.014466855674982071, "learning_rate": 6.574870410181727e-07, "loss": 0.0054, "step": 466710 }, { "epoch": 4.986591164057909, "grad_norm": 0.5152299404144287, "learning_rate": 6.574710952292617e-07, "loss": 0.0227, "step": 466720 }, { "epoch": 4.986698007372189, "grad_norm": 5.088245391845703, "learning_rate": 6.574551492625539e-07, "loss": 0.0104, "step": 466730 }, { "epoch": 4.986804850686468, "grad_norm": 0.0195908322930336, "learning_rate": 6.574392031180674e-07, "loss": 0.0026, "step": 466740 }, { "epoch": 4.986911694000748, "grad_norm": 0.008299075998365879, "learning_rate": 6.574232567958201e-07, "loss": 0.0018, "step": 466750 }, { "epoch": 4.987018537315027, "grad_norm": 0.005243372637778521, "learning_rate": 6.574073102958302e-07, "loss": 0.0027, "step": 466760 }, { "epoch": 4.987125380629307, "grad_norm": 0.03462906926870346, "learning_rate": 6.573913636181155e-07, "loss": 0.0059, "step": 466770 }, { "epoch": 4.987232223943587, "grad_norm": 0.01123828999698162, "learning_rate": 6.573754167626939e-07, "loss": 0.0093, "step": 466780 }, { "epoch": 4.987339067257866, "grad_norm": 0.09267366677522659, "learning_rate": 6.573594697295838e-07, "loss": 0.0008, "step": 466790 }, { "epoch": 4.987445910572146, "grad_norm": 0.04741271585226059, "learning_rate": 6.573435225188028e-07, "loss": 0.0003, "step": 466800 }, { "epoch": 4.987552753886425, "grad_norm": 0.033521998673677444, "learning_rate": 6.573275751303692e-07, "loss": 0.0134, "step": 466810 }, { "epoch": 4.987659597200706, "grad_norm": 5.381209373474121, "learning_rate": 6.573116275643009e-07, "loss": 0.0171, "step": 466820 }, { "epoch": 4.987766440514985, "grad_norm": 0.0021089217625558376, "learning_rate": 6.572956798206158e-07, "loss": 0.0012, "step": 466830 }, { "epoch": 4.9878732838292645, "grad_norm": 0.0033198257442563772, "learning_rate": 6.572797318993321e-07, "loss": 0.0058, "step": 466840 }, { "epoch": 4.987980127143544, "grad_norm": 0.026955945417284966, "learning_rate": 6.572637838004677e-07, "loss": 0.0007, "step": 466850 }, { "epoch": 4.9880869704578235, "grad_norm": 0.14132463932037354, "learning_rate": 6.572478355240406e-07, "loss": 0.005, "step": 466860 }, { "epoch": 4.988193813772103, "grad_norm": 0.010045589879155159, "learning_rate": 6.572318870700689e-07, "loss": 0.0002, "step": 466870 }, { "epoch": 4.988300657086382, "grad_norm": 0.06507580727338791, "learning_rate": 6.572159384385704e-07, "loss": 0.0042, "step": 466880 }, { "epoch": 4.988407500400663, "grad_norm": 0.007348379585891962, "learning_rate": 6.571999896295633e-07, "loss": 0.0033, "step": 466890 }, { "epoch": 4.988514343714942, "grad_norm": 0.023083455860614777, "learning_rate": 6.571840406430656e-07, "loss": 0.0387, "step": 466900 }, { "epoch": 4.988621187029222, "grad_norm": 0.0180311631411314, "learning_rate": 6.571680914790951e-07, "loss": 0.0073, "step": 466910 }, { "epoch": 4.988728030343501, "grad_norm": 1.8701978921890259, "learning_rate": 6.571521421376701e-07, "loss": 0.0066, "step": 466920 }, { "epoch": 4.9888348736577806, "grad_norm": 1.6048928499221802, "learning_rate": 6.571361926188085e-07, "loss": 0.0059, "step": 466930 }, { "epoch": 4.988941716972061, "grad_norm": 0.002846835646778345, "learning_rate": 6.571202429225282e-07, "loss": 0.0053, "step": 466940 }, { "epoch": 4.98904856028634, "grad_norm": 0.00884708110243082, "learning_rate": 6.571042930488473e-07, "loss": 0.0257, "step": 466950 }, { "epoch": 4.98915540360062, "grad_norm": 0.0009177184547297657, "learning_rate": 6.570883429977837e-07, "loss": 0.0321, "step": 466960 }, { "epoch": 4.989262246914899, "grad_norm": 12.236390113830566, "learning_rate": 6.570723927693556e-07, "loss": 0.0267, "step": 466970 }, { "epoch": 4.989369090229179, "grad_norm": 0.00586792454123497, "learning_rate": 6.570564423635809e-07, "loss": 0.0027, "step": 466980 }, { "epoch": 4.989475933543458, "grad_norm": 0.01825304701924324, "learning_rate": 6.570404917804777e-07, "loss": 0.0002, "step": 466990 }, { "epoch": 4.989582776857738, "grad_norm": 0.05845203995704651, "learning_rate": 6.570245410200639e-07, "loss": 0.0002, "step": 467000 }, { "epoch": 4.989689620172018, "grad_norm": 1.076035499572754, "learning_rate": 6.570085900823574e-07, "loss": 0.0143, "step": 467010 }, { "epoch": 4.9897964634862975, "grad_norm": 1.7739272117614746, "learning_rate": 6.569926389673765e-07, "loss": 0.0045, "step": 467020 }, { "epoch": 4.989903306800577, "grad_norm": 0.06909792125225067, "learning_rate": 6.56976687675139e-07, "loss": 0.0012, "step": 467030 }, { "epoch": 4.990010150114856, "grad_norm": 3.459786891937256, "learning_rate": 6.569607362056631e-07, "loss": 0.014, "step": 467040 }, { "epoch": 4.990116993429136, "grad_norm": 0.16547676920890808, "learning_rate": 6.569447845589665e-07, "loss": 0.0017, "step": 467050 }, { "epoch": 4.990223836743416, "grad_norm": 0.09933838993310928, "learning_rate": 6.569288327350675e-07, "loss": 0.0203, "step": 467060 }, { "epoch": 4.990330680057696, "grad_norm": 0.16920670866966248, "learning_rate": 6.569128807339841e-07, "loss": 0.0063, "step": 467070 }, { "epoch": 4.990437523371975, "grad_norm": 0.04032202810049057, "learning_rate": 6.56896928555734e-07, "loss": 0.0019, "step": 467080 }, { "epoch": 4.990544366686255, "grad_norm": 0.030067436397075653, "learning_rate": 6.568809762003355e-07, "loss": 0.0081, "step": 467090 }, { "epoch": 4.990651210000534, "grad_norm": 0.5764604806900024, "learning_rate": 6.568650236678066e-07, "loss": 0.0022, "step": 467100 }, { "epoch": 4.9907580533148135, "grad_norm": 0.7503361701965332, "learning_rate": 6.568490709581651e-07, "loss": 0.0272, "step": 467110 }, { "epoch": 4.990864896629093, "grad_norm": 0.005987534765154123, "learning_rate": 6.568331180714294e-07, "loss": 0.0039, "step": 467120 }, { "epoch": 4.990971739943373, "grad_norm": 0.030552472919225693, "learning_rate": 6.568171650076172e-07, "loss": 0.0062, "step": 467130 }, { "epoch": 4.991078583257653, "grad_norm": 0.33891332149505615, "learning_rate": 6.568012117667465e-07, "loss": 0.0039, "step": 467140 }, { "epoch": 4.991185426571932, "grad_norm": 5.199457168579102, "learning_rate": 6.567852583488354e-07, "loss": 0.008, "step": 467150 }, { "epoch": 4.991292269886212, "grad_norm": 1.8538886308670044, "learning_rate": 6.56769304753902e-07, "loss": 0.0045, "step": 467160 }, { "epoch": 4.991399113200491, "grad_norm": 1.1872644424438477, "learning_rate": 6.56753350981964e-07, "loss": 0.0059, "step": 467170 }, { "epoch": 4.9915059565147715, "grad_norm": 6.249703884124756, "learning_rate": 6.567373970330399e-07, "loss": 0.0089, "step": 467180 }, { "epoch": 4.991612799829051, "grad_norm": 0.012773431837558746, "learning_rate": 6.567214429071474e-07, "loss": 0.0091, "step": 467190 }, { "epoch": 4.99171964314333, "grad_norm": 0.010004221461713314, "learning_rate": 6.567054886043044e-07, "loss": 0.0039, "step": 467200 }, { "epoch": 4.99182648645761, "grad_norm": 0.12353204190731049, "learning_rate": 6.566895341245292e-07, "loss": 0.0007, "step": 467210 }, { "epoch": 4.991933329771889, "grad_norm": 0.01111369114369154, "learning_rate": 6.566735794678398e-07, "loss": 0.0023, "step": 467220 }, { "epoch": 4.992040173086169, "grad_norm": 4.413179397583008, "learning_rate": 6.566576246342539e-07, "loss": 0.008, "step": 467230 }, { "epoch": 4.992147016400449, "grad_norm": 0.04932308942079544, "learning_rate": 6.566416696237897e-07, "loss": 0.0049, "step": 467240 }, { "epoch": 4.992253859714729, "grad_norm": 0.6709335446357727, "learning_rate": 6.566257144364653e-07, "loss": 0.002, "step": 467250 }, { "epoch": 4.992360703029008, "grad_norm": 4.563384532928467, "learning_rate": 6.566097590722987e-07, "loss": 0.0063, "step": 467260 }, { "epoch": 4.9924675463432875, "grad_norm": 0.11230723559856415, "learning_rate": 6.565938035313079e-07, "loss": 0.0025, "step": 467270 }, { "epoch": 4.992574389657567, "grad_norm": 0.7753280997276306, "learning_rate": 6.565778478135108e-07, "loss": 0.002, "step": 467280 }, { "epoch": 4.992681232971846, "grad_norm": 0.02281930297613144, "learning_rate": 6.565618919189254e-07, "loss": 0.0184, "step": 467290 }, { "epoch": 4.992788076286127, "grad_norm": 0.10868972539901733, "learning_rate": 6.5654593584757e-07, "loss": 0.0165, "step": 467300 }, { "epoch": 4.992894919600406, "grad_norm": 0.02842208370566368, "learning_rate": 6.565299795994624e-07, "loss": 0.0107, "step": 467310 }, { "epoch": 4.993001762914686, "grad_norm": 0.7269539833068848, "learning_rate": 6.565140231746204e-07, "loss": 0.002, "step": 467320 }, { "epoch": 4.993108606228965, "grad_norm": 0.006668480578809977, "learning_rate": 6.564980665730625e-07, "loss": 0.0102, "step": 467330 }, { "epoch": 4.993215449543245, "grad_norm": 0.1496461033821106, "learning_rate": 6.564821097948064e-07, "loss": 0.0793, "step": 467340 }, { "epoch": 4.993322292857524, "grad_norm": 0.47654348611831665, "learning_rate": 6.564661528398702e-07, "loss": 0.0111, "step": 467350 }, { "epoch": 4.993429136171804, "grad_norm": 0.021883642300963402, "learning_rate": 6.56450195708272e-07, "loss": 0.003, "step": 467360 }, { "epoch": 4.993535979486084, "grad_norm": 0.14082281291484833, "learning_rate": 6.564342384000295e-07, "loss": 0.0039, "step": 467370 }, { "epoch": 4.993642822800363, "grad_norm": 0.013324090279638767, "learning_rate": 6.564182809151611e-07, "loss": 0.0373, "step": 467380 }, { "epoch": 4.993749666114643, "grad_norm": 0.04070989042520523, "learning_rate": 6.564023232536846e-07, "loss": 0.0309, "step": 467390 }, { "epoch": 4.993856509428922, "grad_norm": 9.181780815124512, "learning_rate": 6.563863654156181e-07, "loss": 0.0119, "step": 467400 }, { "epoch": 4.993963352743203, "grad_norm": 0.5159465074539185, "learning_rate": 6.563704074009796e-07, "loss": 0.0255, "step": 467410 }, { "epoch": 4.994070196057482, "grad_norm": 3.136892080307007, "learning_rate": 6.563544492097871e-07, "loss": 0.0116, "step": 467420 }, { "epoch": 4.9941770393717615, "grad_norm": 0.06465751677751541, "learning_rate": 6.563384908420585e-07, "loss": 0.0029, "step": 467430 }, { "epoch": 4.994283882686041, "grad_norm": 0.3205365538597107, "learning_rate": 6.563225322978121e-07, "loss": 0.0053, "step": 467440 }, { "epoch": 4.99439072600032, "grad_norm": 2.7185275554656982, "learning_rate": 6.563065735770656e-07, "loss": 0.0143, "step": 467450 }, { "epoch": 4.9944975693146, "grad_norm": 0.014363911002874374, "learning_rate": 6.562906146798374e-07, "loss": 0.0353, "step": 467460 }, { "epoch": 4.994604412628879, "grad_norm": 0.007139472756534815, "learning_rate": 6.562746556061453e-07, "loss": 0.0068, "step": 467470 }, { "epoch": 4.99471125594316, "grad_norm": 4.065832138061523, "learning_rate": 6.562586963560071e-07, "loss": 0.003, "step": 467480 }, { "epoch": 4.994818099257439, "grad_norm": 0.0029342605266720057, "learning_rate": 6.562427369294412e-07, "loss": 0.0256, "step": 467490 }, { "epoch": 4.994924942571719, "grad_norm": 0.011258470825850964, "learning_rate": 6.562267773264655e-07, "loss": 0.0647, "step": 467500 }, { "epoch": 4.995031785885998, "grad_norm": 3.116481065750122, "learning_rate": 6.562108175470978e-07, "loss": 0.0113, "step": 467510 }, { "epoch": 4.9951386292002775, "grad_norm": 2.022996664047241, "learning_rate": 6.561948575913564e-07, "loss": 0.0106, "step": 467520 }, { "epoch": 4.995245472514558, "grad_norm": 1.2164812088012695, "learning_rate": 6.561788974592592e-07, "loss": 0.0099, "step": 467530 }, { "epoch": 4.995352315828837, "grad_norm": 0.002169853774830699, "learning_rate": 6.561629371508241e-07, "loss": 0.0036, "step": 467540 }, { "epoch": 4.995459159143117, "grad_norm": 0.0022225966677069664, "learning_rate": 6.561469766660695e-07, "loss": 0.0085, "step": 467550 }, { "epoch": 4.995566002457396, "grad_norm": 0.02009758912026882, "learning_rate": 6.561310160050132e-07, "loss": 0.0019, "step": 467560 }, { "epoch": 4.995672845771676, "grad_norm": 0.13956744968891144, "learning_rate": 6.56115055167673e-07, "loss": 0.0093, "step": 467570 }, { "epoch": 4.995779689085955, "grad_norm": 5.634243011474609, "learning_rate": 6.560990941540672e-07, "loss": 0.007, "step": 467580 }, { "epoch": 4.995886532400235, "grad_norm": 0.001820575911551714, "learning_rate": 6.560831329642137e-07, "loss": 0.0016, "step": 467590 }, { "epoch": 4.995993375714515, "grad_norm": 5.134936332702637, "learning_rate": 6.560671715981306e-07, "loss": 0.0054, "step": 467600 }, { "epoch": 4.996100219028794, "grad_norm": 0.0330943763256073, "learning_rate": 6.560512100558359e-07, "loss": 0.0073, "step": 467610 }, { "epoch": 4.996207062343074, "grad_norm": 0.0059821163304150105, "learning_rate": 6.560352483373476e-07, "loss": 0.0084, "step": 467620 }, { "epoch": 4.996313905657353, "grad_norm": 5.640111446380615, "learning_rate": 6.560192864426837e-07, "loss": 0.0077, "step": 467630 }, { "epoch": 4.996420748971633, "grad_norm": 0.011484409682452679, "learning_rate": 6.560033243718622e-07, "loss": 0.0077, "step": 467640 }, { "epoch": 4.996527592285913, "grad_norm": 0.06868670880794525, "learning_rate": 6.559873621249012e-07, "loss": 0.0148, "step": 467650 }, { "epoch": 4.996634435600193, "grad_norm": 0.19217345118522644, "learning_rate": 6.559713997018188e-07, "loss": 0.0006, "step": 467660 }, { "epoch": 4.996741278914472, "grad_norm": 6.50192403793335, "learning_rate": 6.559554371026329e-07, "loss": 0.0167, "step": 467670 }, { "epoch": 4.9968481222287515, "grad_norm": 8.86863899230957, "learning_rate": 6.559394743273614e-07, "loss": 0.01, "step": 467680 }, { "epoch": 4.996954965543031, "grad_norm": 1.1095921993255615, "learning_rate": 6.559235113760227e-07, "loss": 0.0148, "step": 467690 }, { "epoch": 4.99706180885731, "grad_norm": 19.313785552978516, "learning_rate": 6.559075482486345e-07, "loss": 0.0195, "step": 467700 }, { "epoch": 4.99716865217159, "grad_norm": 0.010396773926913738, "learning_rate": 6.558915849452147e-07, "loss": 0.0169, "step": 467710 }, { "epoch": 4.99727549548587, "grad_norm": 0.00439367163926363, "learning_rate": 6.558756214657817e-07, "loss": 0.0066, "step": 467720 }, { "epoch": 4.99738233880015, "grad_norm": 0.10008580982685089, "learning_rate": 6.558596578103535e-07, "loss": 0.003, "step": 467730 }, { "epoch": 4.997489182114429, "grad_norm": 0.7227421998977661, "learning_rate": 6.558436939789478e-07, "loss": 0.0003, "step": 467740 }, { "epoch": 4.997596025428709, "grad_norm": 2.349306583404541, "learning_rate": 6.558277299715828e-07, "loss": 0.0051, "step": 467750 }, { "epoch": 4.997702868742988, "grad_norm": 0.09186387807130814, "learning_rate": 6.558117657882766e-07, "loss": 0.0064, "step": 467760 }, { "epoch": 4.997809712057268, "grad_norm": 0.013631455600261688, "learning_rate": 6.557958014290471e-07, "loss": 0.0001, "step": 467770 }, { "epoch": 4.997916555371548, "grad_norm": 0.025006970390677452, "learning_rate": 6.557798368939125e-07, "loss": 0.0075, "step": 467780 }, { "epoch": 4.998023398685827, "grad_norm": 0.006919112056493759, "learning_rate": 6.557638721828907e-07, "loss": 0.0005, "step": 467790 }, { "epoch": 4.998130242000107, "grad_norm": 0.9225899577140808, "learning_rate": 6.557479072959995e-07, "loss": 0.0147, "step": 467800 }, { "epoch": 4.998237085314386, "grad_norm": 0.307247519493103, "learning_rate": 6.557319422332574e-07, "loss": 0.0088, "step": 467810 }, { "epoch": 4.998343928628666, "grad_norm": 0.2196594625711441, "learning_rate": 6.55715976994682e-07, "loss": 0.0039, "step": 467820 }, { "epoch": 4.998450771942945, "grad_norm": 0.0022723395377397537, "learning_rate": 6.557000115802916e-07, "loss": 0.0126, "step": 467830 }, { "epoch": 4.9985576152572255, "grad_norm": 0.7096075415611267, "learning_rate": 6.556840459901042e-07, "loss": 0.002, "step": 467840 }, { "epoch": 4.998664458571505, "grad_norm": 0.028079548850655556, "learning_rate": 6.556680802241377e-07, "loss": 0.0093, "step": 467850 }, { "epoch": 4.9987713018857844, "grad_norm": 0.32767707109451294, "learning_rate": 6.556521142824102e-07, "loss": 0.0165, "step": 467860 }, { "epoch": 4.998878145200064, "grad_norm": 1.8972827196121216, "learning_rate": 6.556361481649397e-07, "loss": 0.0036, "step": 467870 }, { "epoch": 4.998984988514343, "grad_norm": 3.4526517391204834, "learning_rate": 6.556201818717441e-07, "loss": 0.0102, "step": 467880 }, { "epoch": 4.999091831828624, "grad_norm": 7.059267997741699, "learning_rate": 6.556042154028418e-07, "loss": 0.0078, "step": 467890 }, { "epoch": 4.999198675142903, "grad_norm": 0.03419740870594978, "learning_rate": 6.555882487582505e-07, "loss": 0.0007, "step": 467900 }, { "epoch": 4.999305518457183, "grad_norm": 0.12488408386707306, "learning_rate": 6.555722819379885e-07, "loss": 0.0095, "step": 467910 }, { "epoch": 4.999412361771462, "grad_norm": 2.8581089973449707, "learning_rate": 6.555563149420733e-07, "loss": 0.0138, "step": 467920 }, { "epoch": 4.9995192050857415, "grad_norm": 0.001123074907809496, "learning_rate": 6.555403477705235e-07, "loss": 0.0031, "step": 467930 }, { "epoch": 4.999626048400021, "grad_norm": 0.0023641374427825212, "learning_rate": 6.555243804233569e-07, "loss": 0.0026, "step": 467940 }, { "epoch": 4.999732891714301, "grad_norm": 0.9156149625778198, "learning_rate": 6.555084129005914e-07, "loss": 0.0037, "step": 467950 }, { "epoch": 4.999839735028581, "grad_norm": 0.26859748363494873, "learning_rate": 6.554924452022455e-07, "loss": 0.0079, "step": 467960 }, { "epoch": 4.99994657834286, "grad_norm": 9.683379173278809, "learning_rate": 6.554764773283366e-07, "loss": 0.0139, "step": 467970 }, { "epoch": 5.0, "eval_accuracy": 0.8006427638847042, "eval_cer": 0.03080366345672468, "eval_loss": 0.02977699227631092, "eval_runtime": 9290.5264, "eval_samples_per_second": 1.072, "eval_steps_per_second": 0.536, "eval_wer": 0.07669474404003357, "step": 467975 } ], "logging_steps": 10, "max_steps": 935950, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.179888540024832e+21, "train_batch_size": 2, "trial_name": null, "trial_params": null }