{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4512, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022167112317987225, "grad_norm": 5.25, "learning_rate": 5e-05, "loss": 2.4928, "step": 1 }, { "epoch": 0.0004433422463597445, "grad_norm": 5.59375, "learning_rate": 4.998891843971631e-05, "loss": 2.3844, "step": 2 }, { "epoch": 0.0006650133695396168, "grad_norm": 2.703125, "learning_rate": 4.997783687943263e-05, "loss": 2.143, "step": 3 }, { "epoch": 0.000886684492719489, "grad_norm": 2.046875, "learning_rate": 4.996675531914894e-05, "loss": 2.0502, "step": 4 }, { "epoch": 0.0011083556158993613, "grad_norm": 1.5078125, "learning_rate": 4.9955673758865253e-05, "loss": 1.934, "step": 5 }, { "epoch": 0.0013300267390792336, "grad_norm": 1.6875, "learning_rate": 4.994459219858156e-05, "loss": 1.7655, "step": 6 }, { "epoch": 0.0015516978622591058, "grad_norm": 1.5625, "learning_rate": 4.993351063829788e-05, "loss": 1.8348, "step": 7 }, { "epoch": 0.001773368985438978, "grad_norm": 1.1484375, "learning_rate": 4.992242907801419e-05, "loss": 1.7421, "step": 8 }, { "epoch": 0.0019950401086188505, "grad_norm": 1.0859375, "learning_rate": 4.99113475177305e-05, "loss": 1.7308, "step": 9 }, { "epoch": 0.0022167112317987227, "grad_norm": 1.1171875, "learning_rate": 4.9900265957446814e-05, "loss": 1.7524, "step": 10 }, { "epoch": 0.002438382354978595, "grad_norm": 1.0078125, "learning_rate": 4.988918439716312e-05, "loss": 1.5646, "step": 11 }, { "epoch": 0.002660053478158467, "grad_norm": 0.92578125, "learning_rate": 4.987810283687944e-05, "loss": 1.5959, "step": 12 }, { "epoch": 0.0028817246013383394, "grad_norm": 0.98046875, "learning_rate": 4.986702127659575e-05, "loss": 1.6068, "step": 13 }, { "epoch": 0.0031033957245182116, "grad_norm": 0.99609375, "learning_rate": 4.985593971631206e-05, "loss": 1.6548, "step": 14 }, { "epoch": 0.003325066847698084, "grad_norm": 0.84765625, "learning_rate": 4.9844858156028374e-05, "loss": 1.6669, "step": 15 }, { "epoch": 0.003546737970877956, "grad_norm": 0.828125, "learning_rate": 4.9833776595744684e-05, "loss": 1.4858, "step": 16 }, { "epoch": 0.0037684090940578283, "grad_norm": 0.83203125, "learning_rate": 4.9822695035461e-05, "loss": 1.5872, "step": 17 }, { "epoch": 0.003990080217237701, "grad_norm": 0.90625, "learning_rate": 4.981161347517731e-05, "loss": 1.535, "step": 18 }, { "epoch": 0.004211751340417573, "grad_norm": 0.8125, "learning_rate": 4.980053191489362e-05, "loss": 1.5109, "step": 19 }, { "epoch": 0.004433422463597445, "grad_norm": 0.9453125, "learning_rate": 4.978945035460993e-05, "loss": 1.4356, "step": 20 }, { "epoch": 0.004655093586777318, "grad_norm": 0.89453125, "learning_rate": 4.9778368794326244e-05, "loss": 1.5005, "step": 21 }, { "epoch": 0.00487676470995719, "grad_norm": 0.796875, "learning_rate": 4.9767287234042554e-05, "loss": 1.4387, "step": 22 }, { "epoch": 0.005098435833137062, "grad_norm": 0.8203125, "learning_rate": 4.975620567375886e-05, "loss": 1.4803, "step": 23 }, { "epoch": 0.005320106956316934, "grad_norm": 0.78125, "learning_rate": 4.974512411347518e-05, "loss": 1.5224, "step": 24 }, { "epoch": 0.0055417780794968065, "grad_norm": 0.75390625, "learning_rate": 4.973404255319149e-05, "loss": 1.4582, "step": 25 }, { "epoch": 0.005763449202676679, "grad_norm": 0.74609375, "learning_rate": 4.9722960992907805e-05, "loss": 1.4638, "step": 26 }, { "epoch": 0.005985120325856551, "grad_norm": 0.69921875, "learning_rate": 4.9711879432624114e-05, "loss": 1.4837, "step": 27 }, { "epoch": 0.006206791449036423, "grad_norm": 0.86328125, "learning_rate": 4.9700797872340424e-05, "loss": 1.3484, "step": 28 }, { "epoch": 0.006428462572216295, "grad_norm": 0.78125, "learning_rate": 4.968971631205674e-05, "loss": 1.4714, "step": 29 }, { "epoch": 0.006650133695396168, "grad_norm": 0.8203125, "learning_rate": 4.967863475177305e-05, "loss": 1.4059, "step": 30 }, { "epoch": 0.00687180481857604, "grad_norm": 0.7421875, "learning_rate": 4.9667553191489365e-05, "loss": 1.4141, "step": 31 }, { "epoch": 0.007093475941755912, "grad_norm": 0.8203125, "learning_rate": 4.9656471631205675e-05, "loss": 1.3564, "step": 32 }, { "epoch": 0.007315147064935784, "grad_norm": 0.66015625, "learning_rate": 4.964539007092199e-05, "loss": 1.401, "step": 33 }, { "epoch": 0.0075368181881156565, "grad_norm": 0.69140625, "learning_rate": 4.96343085106383e-05, "loss": 1.3554, "step": 34 }, { "epoch": 0.007758489311295529, "grad_norm": 0.73828125, "learning_rate": 4.962322695035461e-05, "loss": 1.3967, "step": 35 }, { "epoch": 0.007980160434475402, "grad_norm": 0.66015625, "learning_rate": 4.9612145390070926e-05, "loss": 1.2334, "step": 36 }, { "epoch": 0.008201831557655274, "grad_norm": 0.82421875, "learning_rate": 4.9601063829787235e-05, "loss": 1.4104, "step": 37 }, { "epoch": 0.008423502680835146, "grad_norm": 0.671875, "learning_rate": 4.958998226950355e-05, "loss": 1.3131, "step": 38 }, { "epoch": 0.008645173804015019, "grad_norm": 0.875, "learning_rate": 4.957890070921986e-05, "loss": 1.3771, "step": 39 }, { "epoch": 0.00886684492719489, "grad_norm": 0.67578125, "learning_rate": 4.956781914893617e-05, "loss": 1.2778, "step": 40 }, { "epoch": 0.009088516050374763, "grad_norm": 0.703125, "learning_rate": 4.9556737588652486e-05, "loss": 1.3737, "step": 41 }, { "epoch": 0.009310187173554635, "grad_norm": 0.79296875, "learning_rate": 4.9545656028368796e-05, "loss": 1.3855, "step": 42 }, { "epoch": 0.009531858296734507, "grad_norm": 0.703125, "learning_rate": 4.953457446808511e-05, "loss": 1.3425, "step": 43 }, { "epoch": 0.00975352941991438, "grad_norm": 0.7578125, "learning_rate": 4.952349290780142e-05, "loss": 1.4265, "step": 44 }, { "epoch": 0.009975200543094252, "grad_norm": 0.7734375, "learning_rate": 4.951241134751774e-05, "loss": 1.4106, "step": 45 }, { "epoch": 0.010196871666274124, "grad_norm": 0.6796875, "learning_rate": 4.950132978723405e-05, "loss": 1.3605, "step": 46 }, { "epoch": 0.010418542789453996, "grad_norm": 0.65234375, "learning_rate": 4.9490248226950356e-05, "loss": 1.364, "step": 47 }, { "epoch": 0.010640213912633869, "grad_norm": 0.703125, "learning_rate": 4.947916666666667e-05, "loss": 1.3301, "step": 48 }, { "epoch": 0.01086188503581374, "grad_norm": 0.65625, "learning_rate": 4.946808510638298e-05, "loss": 1.3655, "step": 49 }, { "epoch": 0.011083556158993613, "grad_norm": 0.73046875, "learning_rate": 4.94570035460993e-05, "loss": 1.3096, "step": 50 }, { "epoch": 0.011305227282173485, "grad_norm": 0.64453125, "learning_rate": 4.944592198581561e-05, "loss": 1.2784, "step": 51 }, { "epoch": 0.011526898405353357, "grad_norm": 0.66015625, "learning_rate": 4.9434840425531917e-05, "loss": 1.3075, "step": 52 }, { "epoch": 0.01174856952853323, "grad_norm": 0.66796875, "learning_rate": 4.942375886524823e-05, "loss": 1.4279, "step": 53 }, { "epoch": 0.011970240651713102, "grad_norm": 0.62890625, "learning_rate": 4.9412677304964535e-05, "loss": 1.2118, "step": 54 }, { "epoch": 0.012191911774892974, "grad_norm": 0.6953125, "learning_rate": 4.940159574468085e-05, "loss": 1.3529, "step": 55 }, { "epoch": 0.012413582898072846, "grad_norm": 0.67578125, "learning_rate": 4.939051418439716e-05, "loss": 1.3201, "step": 56 }, { "epoch": 0.012635254021252719, "grad_norm": 0.6953125, "learning_rate": 4.937943262411348e-05, "loss": 1.4035, "step": 57 }, { "epoch": 0.01285692514443259, "grad_norm": 0.76171875, "learning_rate": 4.9368351063829786e-05, "loss": 1.3042, "step": 58 }, { "epoch": 0.013078596267612463, "grad_norm": 0.64453125, "learning_rate": 4.93572695035461e-05, "loss": 1.328, "step": 59 }, { "epoch": 0.013300267390792335, "grad_norm": 0.81640625, "learning_rate": 4.934618794326241e-05, "loss": 1.3672, "step": 60 }, { "epoch": 0.013521938513972208, "grad_norm": 0.6796875, "learning_rate": 4.933510638297872e-05, "loss": 1.2706, "step": 61 }, { "epoch": 0.01374360963715208, "grad_norm": 0.80078125, "learning_rate": 4.932402482269504e-05, "loss": 1.351, "step": 62 }, { "epoch": 0.013965280760331952, "grad_norm": 0.70703125, "learning_rate": 4.931294326241135e-05, "loss": 1.3058, "step": 63 }, { "epoch": 0.014186951883511824, "grad_norm": 0.671875, "learning_rate": 4.930186170212766e-05, "loss": 1.3053, "step": 64 }, { "epoch": 0.014408623006691696, "grad_norm": 0.796875, "learning_rate": 4.929078014184397e-05, "loss": 1.29, "step": 65 }, { "epoch": 0.014630294129871569, "grad_norm": 0.62109375, "learning_rate": 4.927969858156029e-05, "loss": 1.2795, "step": 66 }, { "epoch": 0.014851965253051441, "grad_norm": 0.66015625, "learning_rate": 4.92686170212766e-05, "loss": 1.3033, "step": 67 }, { "epoch": 0.015073636376231313, "grad_norm": 0.78125, "learning_rate": 4.925753546099291e-05, "loss": 1.2857, "step": 68 }, { "epoch": 0.015295307499411185, "grad_norm": 0.69140625, "learning_rate": 4.9246453900709223e-05, "loss": 1.3207, "step": 69 }, { "epoch": 0.015516978622591058, "grad_norm": 0.83984375, "learning_rate": 4.923537234042553e-05, "loss": 1.2963, "step": 70 }, { "epoch": 0.01573864974577093, "grad_norm": 0.625, "learning_rate": 4.922429078014185e-05, "loss": 1.212, "step": 71 }, { "epoch": 0.015960320868950804, "grad_norm": 0.85546875, "learning_rate": 4.921320921985816e-05, "loss": 1.2508, "step": 72 }, { "epoch": 0.016181991992130676, "grad_norm": 0.67578125, "learning_rate": 4.920212765957447e-05, "loss": 1.2978, "step": 73 }, { "epoch": 0.016403663115310548, "grad_norm": 0.6796875, "learning_rate": 4.9191046099290784e-05, "loss": 1.2412, "step": 74 }, { "epoch": 0.01662533423849042, "grad_norm": 0.8203125, "learning_rate": 4.917996453900709e-05, "loss": 1.31, "step": 75 }, { "epoch": 0.016847005361670293, "grad_norm": 0.60546875, "learning_rate": 4.916888297872341e-05, "loss": 1.2267, "step": 76 }, { "epoch": 0.017068676484850165, "grad_norm": 0.83984375, "learning_rate": 4.915780141843972e-05, "loss": 1.2333, "step": 77 }, { "epoch": 0.017290347608030037, "grad_norm": 0.7890625, "learning_rate": 4.9146719858156035e-05, "loss": 1.3258, "step": 78 }, { "epoch": 0.01751201873120991, "grad_norm": 0.71484375, "learning_rate": 4.9135638297872344e-05, "loss": 1.2518, "step": 79 }, { "epoch": 0.01773368985438978, "grad_norm": 0.8515625, "learning_rate": 4.9124556737588654e-05, "loss": 1.2463, "step": 80 }, { "epoch": 0.017955360977569654, "grad_norm": 0.6328125, "learning_rate": 4.911347517730497e-05, "loss": 1.3133, "step": 81 }, { "epoch": 0.018177032100749526, "grad_norm": 0.671875, "learning_rate": 4.910239361702128e-05, "loss": 1.281, "step": 82 }, { "epoch": 0.018398703223929398, "grad_norm": 0.74609375, "learning_rate": 4.9091312056737596e-05, "loss": 1.2834, "step": 83 }, { "epoch": 0.01862037434710927, "grad_norm": 0.62890625, "learning_rate": 4.9080230496453905e-05, "loss": 1.3083, "step": 84 }, { "epoch": 0.018842045470289143, "grad_norm": 0.74609375, "learning_rate": 4.9069148936170214e-05, "loss": 1.2476, "step": 85 }, { "epoch": 0.019063716593469015, "grad_norm": 0.62109375, "learning_rate": 4.905806737588653e-05, "loss": 1.2453, "step": 86 }, { "epoch": 0.019285387716648887, "grad_norm": 0.6328125, "learning_rate": 4.904698581560284e-05, "loss": 1.288, "step": 87 }, { "epoch": 0.01950705883982876, "grad_norm": 0.75390625, "learning_rate": 4.9035904255319156e-05, "loss": 1.2469, "step": 88 }, { "epoch": 0.01972872996300863, "grad_norm": 0.62109375, "learning_rate": 4.9024822695035465e-05, "loss": 1.2268, "step": 89 }, { "epoch": 0.019950401086188504, "grad_norm": 0.8046875, "learning_rate": 4.9013741134751775e-05, "loss": 1.3664, "step": 90 }, { "epoch": 0.020172072209368376, "grad_norm": 0.7109375, "learning_rate": 4.9002659574468084e-05, "loss": 1.3602, "step": 91 }, { "epoch": 0.020393743332548248, "grad_norm": 0.70703125, "learning_rate": 4.89915780141844e-05, "loss": 1.3503, "step": 92 }, { "epoch": 0.02061541445572812, "grad_norm": 0.71875, "learning_rate": 4.898049645390071e-05, "loss": 1.245, "step": 93 }, { "epoch": 0.020837085578907993, "grad_norm": 0.6875, "learning_rate": 4.896941489361702e-05, "loss": 1.2814, "step": 94 }, { "epoch": 0.021058756702087865, "grad_norm": 0.69140625, "learning_rate": 4.8958333333333335e-05, "loss": 1.2771, "step": 95 }, { "epoch": 0.021280427825267737, "grad_norm": 0.578125, "learning_rate": 4.8947251773049645e-05, "loss": 1.3106, "step": 96 }, { "epoch": 0.02150209894844761, "grad_norm": 0.8046875, "learning_rate": 4.893617021276596e-05, "loss": 1.2845, "step": 97 }, { "epoch": 0.02172377007162748, "grad_norm": 0.609375, "learning_rate": 4.892508865248227e-05, "loss": 1.2768, "step": 98 }, { "epoch": 0.021945441194807354, "grad_norm": 0.8359375, "learning_rate": 4.891400709219858e-05, "loss": 1.3875, "step": 99 }, { "epoch": 0.022167112317987226, "grad_norm": 0.6640625, "learning_rate": 4.8902925531914896e-05, "loss": 1.2096, "step": 100 }, { "epoch": 0.0223887834411671, "grad_norm": 0.66796875, "learning_rate": 4.8891843971631205e-05, "loss": 1.2254, "step": 101 }, { "epoch": 0.02261045456434697, "grad_norm": 0.80859375, "learning_rate": 4.888076241134752e-05, "loss": 1.2675, "step": 102 }, { "epoch": 0.022832125687526843, "grad_norm": 0.625, "learning_rate": 4.886968085106383e-05, "loss": 1.2367, "step": 103 }, { "epoch": 0.023053796810706715, "grad_norm": 0.859375, "learning_rate": 4.885859929078015e-05, "loss": 1.2759, "step": 104 }, { "epoch": 0.023275467933886587, "grad_norm": 0.61328125, "learning_rate": 4.8847517730496456e-05, "loss": 1.2307, "step": 105 }, { "epoch": 0.02349713905706646, "grad_norm": 0.6328125, "learning_rate": 4.8836436170212766e-05, "loss": 1.251, "step": 106 }, { "epoch": 0.02371881018024633, "grad_norm": 0.69140625, "learning_rate": 4.882535460992908e-05, "loss": 1.2184, "step": 107 }, { "epoch": 0.023940481303426204, "grad_norm": 0.63671875, "learning_rate": 4.881427304964539e-05, "loss": 1.2471, "step": 108 }, { "epoch": 0.024162152426606076, "grad_norm": 0.65625, "learning_rate": 4.880319148936171e-05, "loss": 1.2344, "step": 109 }, { "epoch": 0.02438382354978595, "grad_norm": 0.65625, "learning_rate": 4.879210992907802e-05, "loss": 1.1993, "step": 110 }, { "epoch": 0.02460549467296582, "grad_norm": 0.62890625, "learning_rate": 4.8781028368794326e-05, "loss": 1.331, "step": 111 }, { "epoch": 0.024827165796145693, "grad_norm": 0.68359375, "learning_rate": 4.876994680851064e-05, "loss": 1.2389, "step": 112 }, { "epoch": 0.025048836919325565, "grad_norm": 0.62890625, "learning_rate": 4.875886524822695e-05, "loss": 1.2301, "step": 113 }, { "epoch": 0.025270508042505437, "grad_norm": 0.64453125, "learning_rate": 4.874778368794327e-05, "loss": 1.2312, "step": 114 }, { "epoch": 0.02549217916568531, "grad_norm": 0.640625, "learning_rate": 4.873670212765958e-05, "loss": 1.2761, "step": 115 }, { "epoch": 0.02571385028886518, "grad_norm": 0.6015625, "learning_rate": 4.872562056737589e-05, "loss": 1.1716, "step": 116 }, { "epoch": 0.025935521412045054, "grad_norm": 0.64453125, "learning_rate": 4.87145390070922e-05, "loss": 1.2271, "step": 117 }, { "epoch": 0.026157192535224926, "grad_norm": 0.64453125, "learning_rate": 4.870345744680851e-05, "loss": 1.1925, "step": 118 }, { "epoch": 0.0263788636584048, "grad_norm": 0.64453125, "learning_rate": 4.869237588652483e-05, "loss": 1.2699, "step": 119 }, { "epoch": 0.02660053478158467, "grad_norm": 0.6015625, "learning_rate": 4.868129432624114e-05, "loss": 1.308, "step": 120 }, { "epoch": 0.026822205904764543, "grad_norm": 0.67578125, "learning_rate": 4.8670212765957454e-05, "loss": 1.2268, "step": 121 }, { "epoch": 0.027043877027944415, "grad_norm": 0.6796875, "learning_rate": 4.865913120567376e-05, "loss": 1.2822, "step": 122 }, { "epoch": 0.027265548151124287, "grad_norm": 0.60546875, "learning_rate": 4.864804964539007e-05, "loss": 1.2346, "step": 123 }, { "epoch": 0.02748721927430416, "grad_norm": 0.609375, "learning_rate": 4.863696808510639e-05, "loss": 1.3303, "step": 124 }, { "epoch": 0.02770889039748403, "grad_norm": 0.58984375, "learning_rate": 4.862588652482269e-05, "loss": 1.2214, "step": 125 }, { "epoch": 0.027930561520663904, "grad_norm": 0.625, "learning_rate": 4.861480496453901e-05, "loss": 1.2111, "step": 126 }, { "epoch": 0.028152232643843776, "grad_norm": 0.5859375, "learning_rate": 4.860372340425532e-05, "loss": 1.2288, "step": 127 }, { "epoch": 0.02837390376702365, "grad_norm": 0.70703125, "learning_rate": 4.859264184397163e-05, "loss": 1.2867, "step": 128 }, { "epoch": 0.02859557489020352, "grad_norm": 0.65234375, "learning_rate": 4.858156028368794e-05, "loss": 1.1696, "step": 129 }, { "epoch": 0.028817246013383393, "grad_norm": 0.625, "learning_rate": 4.857047872340426e-05, "loss": 1.2193, "step": 130 }, { "epoch": 0.029038917136563265, "grad_norm": 0.6640625, "learning_rate": 4.855939716312057e-05, "loss": 1.2689, "step": 131 }, { "epoch": 0.029260588259743137, "grad_norm": 0.625, "learning_rate": 4.854831560283688e-05, "loss": 1.1696, "step": 132 }, { "epoch": 0.02948225938292301, "grad_norm": 0.703125, "learning_rate": 4.8537234042553194e-05, "loss": 1.2124, "step": 133 }, { "epoch": 0.029703930506102882, "grad_norm": 0.6640625, "learning_rate": 4.85261524822695e-05, "loss": 1.2704, "step": 134 }, { "epoch": 0.029925601629282754, "grad_norm": 0.70703125, "learning_rate": 4.851507092198582e-05, "loss": 1.2055, "step": 135 }, { "epoch": 0.030147272752462626, "grad_norm": 0.6640625, "learning_rate": 4.850398936170213e-05, "loss": 1.2207, "step": 136 }, { "epoch": 0.0303689438756425, "grad_norm": 0.71875, "learning_rate": 4.8492907801418445e-05, "loss": 1.2708, "step": 137 }, { "epoch": 0.03059061499882237, "grad_norm": 0.78125, "learning_rate": 4.8481826241134754e-05, "loss": 1.2557, "step": 138 }, { "epoch": 0.030812286122002243, "grad_norm": 0.671875, "learning_rate": 4.847074468085106e-05, "loss": 1.2601, "step": 139 }, { "epoch": 0.031033957245182115, "grad_norm": 0.69921875, "learning_rate": 4.845966312056738e-05, "loss": 1.2565, "step": 140 }, { "epoch": 0.03125562836836199, "grad_norm": 0.6328125, "learning_rate": 4.844858156028369e-05, "loss": 1.2406, "step": 141 }, { "epoch": 0.03147729949154186, "grad_norm": 0.625, "learning_rate": 4.8437500000000005e-05, "loss": 1.2394, "step": 142 }, { "epoch": 0.03169897061472173, "grad_norm": 0.59375, "learning_rate": 4.8426418439716314e-05, "loss": 1.2303, "step": 143 }, { "epoch": 0.03192064173790161, "grad_norm": 0.625, "learning_rate": 4.8415336879432624e-05, "loss": 1.1855, "step": 144 }, { "epoch": 0.032142312861081476, "grad_norm": 0.671875, "learning_rate": 4.840425531914894e-05, "loss": 1.3472, "step": 145 }, { "epoch": 0.03236398398426135, "grad_norm": 0.66015625, "learning_rate": 4.839317375886525e-05, "loss": 1.1354, "step": 146 }, { "epoch": 0.03258565510744122, "grad_norm": 0.625, "learning_rate": 4.8382092198581566e-05, "loss": 1.1916, "step": 147 }, { "epoch": 0.032807326230621096, "grad_norm": 0.62890625, "learning_rate": 4.8371010638297875e-05, "loss": 1.3368, "step": 148 }, { "epoch": 0.033028997353800965, "grad_norm": 0.63671875, "learning_rate": 4.835992907801419e-05, "loss": 1.2152, "step": 149 }, { "epoch": 0.03325066847698084, "grad_norm": 0.62109375, "learning_rate": 4.83488475177305e-05, "loss": 1.1198, "step": 150 }, { "epoch": 0.03347233960016071, "grad_norm": 0.62890625, "learning_rate": 4.833776595744681e-05, "loss": 1.2466, "step": 151 }, { "epoch": 0.033694010723340585, "grad_norm": 0.6875, "learning_rate": 4.8326684397163126e-05, "loss": 1.197, "step": 152 }, { "epoch": 0.033915681846520454, "grad_norm": 0.74609375, "learning_rate": 4.8315602836879435e-05, "loss": 1.2431, "step": 153 }, { "epoch": 0.03413735296970033, "grad_norm": 0.609375, "learning_rate": 4.830452127659575e-05, "loss": 1.2288, "step": 154 }, { "epoch": 0.0343590240928802, "grad_norm": 0.68359375, "learning_rate": 4.829343971631206e-05, "loss": 1.2595, "step": 155 }, { "epoch": 0.034580695216060074, "grad_norm": 0.5859375, "learning_rate": 4.828235815602837e-05, "loss": 1.1714, "step": 156 }, { "epoch": 0.03480236633923994, "grad_norm": 0.625, "learning_rate": 4.8271276595744686e-05, "loss": 1.2896, "step": 157 }, { "epoch": 0.03502403746241982, "grad_norm": 0.625, "learning_rate": 4.8260195035460996e-05, "loss": 1.2414, "step": 158 }, { "epoch": 0.03524570858559969, "grad_norm": 0.62109375, "learning_rate": 4.824911347517731e-05, "loss": 1.2135, "step": 159 }, { "epoch": 0.03546737970877956, "grad_norm": 0.66015625, "learning_rate": 4.8238031914893615e-05, "loss": 1.1921, "step": 160 }, { "epoch": 0.03568905083195943, "grad_norm": 0.59375, "learning_rate": 4.822695035460993e-05, "loss": 1.1546, "step": 161 }, { "epoch": 0.03591072195513931, "grad_norm": 0.703125, "learning_rate": 4.821586879432624e-05, "loss": 1.2703, "step": 162 }, { "epoch": 0.036132393078319176, "grad_norm": 0.67578125, "learning_rate": 4.8204787234042556e-05, "loss": 1.2453, "step": 163 }, { "epoch": 0.03635406420149905, "grad_norm": 0.61328125, "learning_rate": 4.8193705673758866e-05, "loss": 1.2265, "step": 164 }, { "epoch": 0.03657573532467892, "grad_norm": 0.7734375, "learning_rate": 4.8182624113475175e-05, "loss": 1.2338, "step": 165 }, { "epoch": 0.036797406447858796, "grad_norm": 0.640625, "learning_rate": 4.817154255319149e-05, "loss": 1.1997, "step": 166 }, { "epoch": 0.037019077571038665, "grad_norm": 0.62109375, "learning_rate": 4.81604609929078e-05, "loss": 1.2857, "step": 167 }, { "epoch": 0.03724074869421854, "grad_norm": 0.7421875, "learning_rate": 4.814937943262412e-05, "loss": 1.2515, "step": 168 }, { "epoch": 0.03746241981739841, "grad_norm": 0.65625, "learning_rate": 4.8138297872340426e-05, "loss": 1.2469, "step": 169 }, { "epoch": 0.037684090940578285, "grad_norm": 0.609375, "learning_rate": 4.8127216312056736e-05, "loss": 1.2149, "step": 170 }, { "epoch": 0.037905762063758154, "grad_norm": 0.63671875, "learning_rate": 4.811613475177305e-05, "loss": 1.2568, "step": 171 }, { "epoch": 0.03812743318693803, "grad_norm": 0.609375, "learning_rate": 4.810505319148936e-05, "loss": 1.1277, "step": 172 }, { "epoch": 0.0383491043101179, "grad_norm": 0.734375, "learning_rate": 4.809397163120568e-05, "loss": 1.1728, "step": 173 }, { "epoch": 0.038570775433297774, "grad_norm": 0.578125, "learning_rate": 4.808289007092199e-05, "loss": 1.2044, "step": 174 }, { "epoch": 0.03879244655647764, "grad_norm": 0.6796875, "learning_rate": 4.80718085106383e-05, "loss": 1.2342, "step": 175 }, { "epoch": 0.03901411767965752, "grad_norm": 0.63671875, "learning_rate": 4.806072695035461e-05, "loss": 1.2321, "step": 176 }, { "epoch": 0.03923578880283739, "grad_norm": 0.7421875, "learning_rate": 4.804964539007092e-05, "loss": 1.1989, "step": 177 }, { "epoch": 0.03945745992601726, "grad_norm": 0.58203125, "learning_rate": 4.803856382978724e-05, "loss": 1.2201, "step": 178 }, { "epoch": 0.03967913104919713, "grad_norm": 0.62109375, "learning_rate": 4.802748226950355e-05, "loss": 1.2183, "step": 179 }, { "epoch": 0.03990080217237701, "grad_norm": 0.6875, "learning_rate": 4.801640070921986e-05, "loss": 1.2285, "step": 180 }, { "epoch": 0.040122473295556876, "grad_norm": 0.61328125, "learning_rate": 4.800531914893617e-05, "loss": 1.3125, "step": 181 }, { "epoch": 0.04034414441873675, "grad_norm": 0.7265625, "learning_rate": 4.799423758865248e-05, "loss": 1.2238, "step": 182 }, { "epoch": 0.04056581554191662, "grad_norm": 0.60546875, "learning_rate": 4.79831560283688e-05, "loss": 1.2125, "step": 183 }, { "epoch": 0.040787486665096497, "grad_norm": 0.78515625, "learning_rate": 4.797207446808511e-05, "loss": 1.1871, "step": 184 }, { "epoch": 0.041009157788276365, "grad_norm": 0.609375, "learning_rate": 4.7960992907801424e-05, "loss": 1.2445, "step": 185 }, { "epoch": 0.04123082891145624, "grad_norm": 0.6640625, "learning_rate": 4.794991134751773e-05, "loss": 1.215, "step": 186 }, { "epoch": 0.04145250003463611, "grad_norm": 0.7421875, "learning_rate": 4.793882978723405e-05, "loss": 1.2395, "step": 187 }, { "epoch": 0.041674171157815985, "grad_norm": 0.625, "learning_rate": 4.792774822695036e-05, "loss": 1.1975, "step": 188 }, { "epoch": 0.041895842280995854, "grad_norm": 0.7421875, "learning_rate": 4.791666666666667e-05, "loss": 1.2715, "step": 189 }, { "epoch": 0.04211751340417573, "grad_norm": 0.61328125, "learning_rate": 4.7905585106382984e-05, "loss": 1.1666, "step": 190 }, { "epoch": 0.0423391845273556, "grad_norm": 0.625, "learning_rate": 4.7894503546099294e-05, "loss": 1.2481, "step": 191 }, { "epoch": 0.042560855650535474, "grad_norm": 0.5859375, "learning_rate": 4.788342198581561e-05, "loss": 1.229, "step": 192 }, { "epoch": 0.04278252677371535, "grad_norm": 0.59375, "learning_rate": 4.787234042553192e-05, "loss": 1.2004, "step": 193 }, { "epoch": 0.04300419789689522, "grad_norm": 0.640625, "learning_rate": 4.786125886524823e-05, "loss": 1.2616, "step": 194 }, { "epoch": 0.043225869020075094, "grad_norm": 0.5859375, "learning_rate": 4.785017730496454e-05, "loss": 1.2168, "step": 195 }, { "epoch": 0.04344754014325496, "grad_norm": 0.6015625, "learning_rate": 4.7839095744680854e-05, "loss": 1.2264, "step": 196 }, { "epoch": 0.04366921126643484, "grad_norm": 0.6015625, "learning_rate": 4.7828014184397164e-05, "loss": 1.2855, "step": 197 }, { "epoch": 0.04389088238961471, "grad_norm": 0.640625, "learning_rate": 4.781693262411347e-05, "loss": 1.0987, "step": 198 }, { "epoch": 0.04411255351279458, "grad_norm": 0.640625, "learning_rate": 4.780585106382979e-05, "loss": 1.2787, "step": 199 }, { "epoch": 0.04433422463597445, "grad_norm": 0.61328125, "learning_rate": 4.77947695035461e-05, "loss": 1.2178, "step": 200 }, { "epoch": 0.04455589575915433, "grad_norm": 0.671875, "learning_rate": 4.7783687943262415e-05, "loss": 1.2095, "step": 201 }, { "epoch": 0.0447775668823342, "grad_norm": 0.5859375, "learning_rate": 4.7772606382978724e-05, "loss": 1.1216, "step": 202 }, { "epoch": 0.04499923800551407, "grad_norm": 0.63671875, "learning_rate": 4.776152482269503e-05, "loss": 1.2102, "step": 203 }, { "epoch": 0.04522090912869394, "grad_norm": 0.63671875, "learning_rate": 4.775044326241135e-05, "loss": 1.3472, "step": 204 }, { "epoch": 0.04544258025187382, "grad_norm": 0.70703125, "learning_rate": 4.773936170212766e-05, "loss": 1.2333, "step": 205 }, { "epoch": 0.045664251375053685, "grad_norm": 0.69921875, "learning_rate": 4.7728280141843975e-05, "loss": 1.1638, "step": 206 }, { "epoch": 0.04588592249823356, "grad_norm": 0.70703125, "learning_rate": 4.7717198581560284e-05, "loss": 1.182, "step": 207 }, { "epoch": 0.04610759362141343, "grad_norm": 0.66015625, "learning_rate": 4.77061170212766e-05, "loss": 1.2263, "step": 208 }, { "epoch": 0.046329264744593306, "grad_norm": 0.7265625, "learning_rate": 4.769503546099291e-05, "loss": 1.1835, "step": 209 }, { "epoch": 0.046550935867773174, "grad_norm": 0.62109375, "learning_rate": 4.768395390070922e-05, "loss": 1.279, "step": 210 }, { "epoch": 0.04677260699095305, "grad_norm": 0.6328125, "learning_rate": 4.7672872340425536e-05, "loss": 1.1722, "step": 211 }, { "epoch": 0.04699427811413292, "grad_norm": 0.69140625, "learning_rate": 4.7661790780141845e-05, "loss": 1.233, "step": 212 }, { "epoch": 0.047215949237312795, "grad_norm": 0.625, "learning_rate": 4.765070921985816e-05, "loss": 1.209, "step": 213 }, { "epoch": 0.04743762036049266, "grad_norm": 0.72265625, "learning_rate": 4.763962765957447e-05, "loss": 1.1385, "step": 214 }, { "epoch": 0.04765929148367254, "grad_norm": 0.640625, "learning_rate": 4.762854609929078e-05, "loss": 1.2725, "step": 215 }, { "epoch": 0.04788096260685241, "grad_norm": 0.76171875, "learning_rate": 4.7617464539007096e-05, "loss": 1.1957, "step": 216 }, { "epoch": 0.04810263373003228, "grad_norm": 0.60546875, "learning_rate": 4.7606382978723405e-05, "loss": 1.1292, "step": 217 }, { "epoch": 0.04832430485321215, "grad_norm": 0.6953125, "learning_rate": 4.759530141843972e-05, "loss": 1.2632, "step": 218 }, { "epoch": 0.04854597597639203, "grad_norm": 0.6484375, "learning_rate": 4.758421985815603e-05, "loss": 1.2185, "step": 219 }, { "epoch": 0.0487676470995719, "grad_norm": 0.703125, "learning_rate": 4.757313829787235e-05, "loss": 1.2458, "step": 220 }, { "epoch": 0.04898931822275177, "grad_norm": 0.73046875, "learning_rate": 4.7562056737588657e-05, "loss": 1.1262, "step": 221 }, { "epoch": 0.04921098934593164, "grad_norm": 0.671875, "learning_rate": 4.7550975177304966e-05, "loss": 1.2865, "step": 222 }, { "epoch": 0.04943266046911152, "grad_norm": 0.75390625, "learning_rate": 4.753989361702128e-05, "loss": 1.2658, "step": 223 }, { "epoch": 0.049654331592291386, "grad_norm": 0.6328125, "learning_rate": 4.752881205673759e-05, "loss": 1.1715, "step": 224 }, { "epoch": 0.04987600271547126, "grad_norm": 0.68359375, "learning_rate": 4.751773049645391e-05, "loss": 1.1957, "step": 225 }, { "epoch": 0.05009767383865113, "grad_norm": 0.62890625, "learning_rate": 4.750664893617022e-05, "loss": 1.2098, "step": 226 }, { "epoch": 0.050319344961831006, "grad_norm": 0.73046875, "learning_rate": 4.7495567375886526e-05, "loss": 1.2651, "step": 227 }, { "epoch": 0.050541016085010874, "grad_norm": 0.59765625, "learning_rate": 4.748448581560284e-05, "loss": 1.1375, "step": 228 }, { "epoch": 0.05076268720819075, "grad_norm": 0.58203125, "learning_rate": 4.747340425531915e-05, "loss": 1.1772, "step": 229 }, { "epoch": 0.05098435833137062, "grad_norm": 0.6328125, "learning_rate": 4.746232269503547e-05, "loss": 1.1983, "step": 230 }, { "epoch": 0.051206029454550495, "grad_norm": 0.609375, "learning_rate": 4.745124113475177e-05, "loss": 1.2062, "step": 231 }, { "epoch": 0.05142770057773036, "grad_norm": 0.5859375, "learning_rate": 4.744015957446809e-05, "loss": 1.253, "step": 232 }, { "epoch": 0.05164937170091024, "grad_norm": 0.59375, "learning_rate": 4.7429078014184396e-05, "loss": 1.1982, "step": 233 }, { "epoch": 0.05187104282409011, "grad_norm": 0.6015625, "learning_rate": 4.741799645390071e-05, "loss": 1.1987, "step": 234 }, { "epoch": 0.052092713947269983, "grad_norm": 0.63671875, "learning_rate": 4.740691489361702e-05, "loss": 1.1441, "step": 235 }, { "epoch": 0.05231438507044985, "grad_norm": 0.6171875, "learning_rate": 4.739583333333333e-05, "loss": 1.2859, "step": 236 }, { "epoch": 0.05253605619362973, "grad_norm": 0.6328125, "learning_rate": 4.738475177304965e-05, "loss": 1.225, "step": 237 }, { "epoch": 0.0527577273168096, "grad_norm": 0.6015625, "learning_rate": 4.737367021276596e-05, "loss": 1.0873, "step": 238 }, { "epoch": 0.05297939843998947, "grad_norm": 0.625, "learning_rate": 4.736258865248227e-05, "loss": 1.2329, "step": 239 }, { "epoch": 0.05320106956316934, "grad_norm": 0.69921875, "learning_rate": 4.735150709219858e-05, "loss": 1.2002, "step": 240 }, { "epoch": 0.05342274068634922, "grad_norm": 0.61328125, "learning_rate": 4.734042553191489e-05, "loss": 1.1636, "step": 241 }, { "epoch": 0.053644411809529086, "grad_norm": 0.765625, "learning_rate": 4.732934397163121e-05, "loss": 1.2061, "step": 242 }, { "epoch": 0.05386608293270896, "grad_norm": 0.61328125, "learning_rate": 4.731826241134752e-05, "loss": 1.151, "step": 243 }, { "epoch": 0.05408775405588883, "grad_norm": 0.62890625, "learning_rate": 4.730718085106383e-05, "loss": 1.2353, "step": 244 }, { "epoch": 0.054309425179068706, "grad_norm": 0.69921875, "learning_rate": 4.729609929078014e-05, "loss": 1.2378, "step": 245 }, { "epoch": 0.054531096302248575, "grad_norm": 0.62109375, "learning_rate": 4.728501773049646e-05, "loss": 1.213, "step": 246 }, { "epoch": 0.05475276742542845, "grad_norm": 0.70703125, "learning_rate": 4.727393617021277e-05, "loss": 1.2381, "step": 247 }, { "epoch": 0.05497443854860832, "grad_norm": 0.7109375, "learning_rate": 4.726285460992908e-05, "loss": 1.223, "step": 248 }, { "epoch": 0.055196109671788195, "grad_norm": 0.640625, "learning_rate": 4.7251773049645394e-05, "loss": 1.152, "step": 249 }, { "epoch": 0.05541778079496806, "grad_norm": 0.69921875, "learning_rate": 4.72406914893617e-05, "loss": 1.1454, "step": 250 }, { "epoch": 0.05563945191814794, "grad_norm": 0.62890625, "learning_rate": 4.722960992907802e-05, "loss": 1.1784, "step": 251 }, { "epoch": 0.05586112304132781, "grad_norm": 0.6015625, "learning_rate": 4.721852836879433e-05, "loss": 1.1417, "step": 252 }, { "epoch": 0.056082794164507684, "grad_norm": 0.66015625, "learning_rate": 4.720744680851064e-05, "loss": 1.159, "step": 253 }, { "epoch": 0.05630446528768755, "grad_norm": 0.58984375, "learning_rate": 4.7196365248226954e-05, "loss": 1.1739, "step": 254 }, { "epoch": 0.05652613641086743, "grad_norm": 0.609375, "learning_rate": 4.7185283687943264e-05, "loss": 1.2051, "step": 255 }, { "epoch": 0.0567478075340473, "grad_norm": 0.59765625, "learning_rate": 4.717420212765958e-05, "loss": 1.2306, "step": 256 }, { "epoch": 0.05696947865722717, "grad_norm": 0.5859375, "learning_rate": 4.716312056737589e-05, "loss": 1.1911, "step": 257 }, { "epoch": 0.05719114978040704, "grad_norm": 0.76171875, "learning_rate": 4.7152039007092205e-05, "loss": 1.1765, "step": 258 }, { "epoch": 0.05741282090358692, "grad_norm": 0.58984375, "learning_rate": 4.7140957446808515e-05, "loss": 1.1636, "step": 259 }, { "epoch": 0.057634492026766786, "grad_norm": 0.65625, "learning_rate": 4.7129875886524824e-05, "loss": 1.1886, "step": 260 }, { "epoch": 0.05785616314994666, "grad_norm": 0.71484375, "learning_rate": 4.711879432624114e-05, "loss": 1.1817, "step": 261 }, { "epoch": 0.05807783427312653, "grad_norm": 0.62890625, "learning_rate": 4.710771276595745e-05, "loss": 1.2234, "step": 262 }, { "epoch": 0.058299505396306406, "grad_norm": 0.8671875, "learning_rate": 4.7096631205673766e-05, "loss": 1.1853, "step": 263 }, { "epoch": 0.058521176519486275, "grad_norm": 0.6484375, "learning_rate": 4.7085549645390075e-05, "loss": 1.2258, "step": 264 }, { "epoch": 0.05874284764266615, "grad_norm": 0.72265625, "learning_rate": 4.7074468085106385e-05, "loss": 1.2698, "step": 265 }, { "epoch": 0.05896451876584602, "grad_norm": 0.87890625, "learning_rate": 4.7063386524822694e-05, "loss": 1.2248, "step": 266 }, { "epoch": 0.059186189889025895, "grad_norm": 0.6171875, "learning_rate": 4.705230496453901e-05, "loss": 1.2476, "step": 267 }, { "epoch": 0.059407861012205763, "grad_norm": 0.91015625, "learning_rate": 4.704122340425532e-05, "loss": 1.2618, "step": 268 }, { "epoch": 0.05962953213538564, "grad_norm": 0.62109375, "learning_rate": 4.703014184397163e-05, "loss": 1.2447, "step": 269 }, { "epoch": 0.05985120325856551, "grad_norm": 0.65625, "learning_rate": 4.7019060283687945e-05, "loss": 1.1608, "step": 270 }, { "epoch": 0.060072874381745384, "grad_norm": 0.640625, "learning_rate": 4.7007978723404254e-05, "loss": 1.1773, "step": 271 }, { "epoch": 0.06029454550492525, "grad_norm": 0.6015625, "learning_rate": 4.699689716312057e-05, "loss": 1.126, "step": 272 }, { "epoch": 0.06051621662810513, "grad_norm": 0.6328125, "learning_rate": 4.698581560283688e-05, "loss": 1.2002, "step": 273 }, { "epoch": 0.060737887751285, "grad_norm": 0.67578125, "learning_rate": 4.697473404255319e-05, "loss": 1.2164, "step": 274 }, { "epoch": 0.06095955887446487, "grad_norm": 0.6953125, "learning_rate": 4.6963652482269506e-05, "loss": 1.2253, "step": 275 }, { "epoch": 0.06118122999764474, "grad_norm": 0.6484375, "learning_rate": 4.6952570921985815e-05, "loss": 1.1533, "step": 276 }, { "epoch": 0.06140290112082462, "grad_norm": 0.84375, "learning_rate": 4.694148936170213e-05, "loss": 1.1344, "step": 277 }, { "epoch": 0.061624572244004486, "grad_norm": 0.61328125, "learning_rate": 4.693040780141844e-05, "loss": 1.1862, "step": 278 }, { "epoch": 0.06184624336718436, "grad_norm": 0.7890625, "learning_rate": 4.691932624113476e-05, "loss": 1.2467, "step": 279 }, { "epoch": 0.06206791449036423, "grad_norm": 0.65234375, "learning_rate": 4.6908244680851066e-05, "loss": 1.2025, "step": 280 }, { "epoch": 0.062289585613544106, "grad_norm": 0.58984375, "learning_rate": 4.6897163120567375e-05, "loss": 1.2342, "step": 281 }, { "epoch": 0.06251125673672397, "grad_norm": 0.91015625, "learning_rate": 4.688608156028369e-05, "loss": 1.1946, "step": 282 }, { "epoch": 0.06273292785990385, "grad_norm": 0.59765625, "learning_rate": 4.6875e-05, "loss": 1.2536, "step": 283 }, { "epoch": 0.06295459898308373, "grad_norm": 0.796875, "learning_rate": 4.686391843971632e-05, "loss": 1.1861, "step": 284 }, { "epoch": 0.06317627010626359, "grad_norm": 0.765625, "learning_rate": 4.6852836879432627e-05, "loss": 1.1138, "step": 285 }, { "epoch": 0.06339794122944346, "grad_norm": 0.58984375, "learning_rate": 4.6841755319148936e-05, "loss": 1.181, "step": 286 }, { "epoch": 0.06361961235262334, "grad_norm": 0.7109375, "learning_rate": 4.683067375886525e-05, "loss": 1.1424, "step": 287 }, { "epoch": 0.06384128347580321, "grad_norm": 0.68359375, "learning_rate": 4.681959219858156e-05, "loss": 1.1854, "step": 288 }, { "epoch": 0.06406295459898309, "grad_norm": 0.63671875, "learning_rate": 4.680851063829788e-05, "loss": 1.1678, "step": 289 }, { "epoch": 0.06428462572216295, "grad_norm": 0.78515625, "learning_rate": 4.679742907801419e-05, "loss": 1.2155, "step": 290 }, { "epoch": 0.06450629684534283, "grad_norm": 0.58203125, "learning_rate": 4.67863475177305e-05, "loss": 1.1599, "step": 291 }, { "epoch": 0.0647279679685227, "grad_norm": 0.71484375, "learning_rate": 4.677526595744681e-05, "loss": 1.2912, "step": 292 }, { "epoch": 0.06494963909170258, "grad_norm": 0.62890625, "learning_rate": 4.676418439716312e-05, "loss": 1.214, "step": 293 }, { "epoch": 0.06517131021488244, "grad_norm": 0.59765625, "learning_rate": 4.675310283687944e-05, "loss": 1.2474, "step": 294 }, { "epoch": 0.06539298133806232, "grad_norm": 0.625, "learning_rate": 4.674202127659575e-05, "loss": 1.1679, "step": 295 }, { "epoch": 0.06561465246124219, "grad_norm": 0.71484375, "learning_rate": 4.6730939716312064e-05, "loss": 1.2695, "step": 296 }, { "epoch": 0.06583632358442207, "grad_norm": 0.58203125, "learning_rate": 4.671985815602837e-05, "loss": 1.09, "step": 297 }, { "epoch": 0.06605799470760193, "grad_norm": 0.66796875, "learning_rate": 4.670877659574468e-05, "loss": 1.2001, "step": 298 }, { "epoch": 0.0662796658307818, "grad_norm": 0.76171875, "learning_rate": 4.6697695035461e-05, "loss": 1.2108, "step": 299 }, { "epoch": 0.06650133695396168, "grad_norm": 0.5546875, "learning_rate": 4.668661347517731e-05, "loss": 1.0916, "step": 300 }, { "epoch": 0.06672300807714156, "grad_norm": 0.63671875, "learning_rate": 4.667553191489362e-05, "loss": 1.0878, "step": 301 }, { "epoch": 0.06694467920032142, "grad_norm": 0.76171875, "learning_rate": 4.666445035460993e-05, "loss": 1.1664, "step": 302 }, { "epoch": 0.0671663503235013, "grad_norm": 0.59375, "learning_rate": 4.665336879432624e-05, "loss": 1.1269, "step": 303 }, { "epoch": 0.06738802144668117, "grad_norm": 0.76953125, "learning_rate": 4.664228723404255e-05, "loss": 1.2485, "step": 304 }, { "epoch": 0.06760969256986105, "grad_norm": 0.69140625, "learning_rate": 4.663120567375887e-05, "loss": 1.1229, "step": 305 }, { "epoch": 0.06783136369304091, "grad_norm": 2.140625, "learning_rate": 4.662012411347518e-05, "loss": 1.1373, "step": 306 }, { "epoch": 0.06805303481622078, "grad_norm": 0.8359375, "learning_rate": 4.660904255319149e-05, "loss": 1.2232, "step": 307 }, { "epoch": 0.06827470593940066, "grad_norm": 0.59375, "learning_rate": 4.65979609929078e-05, "loss": 1.1279, "step": 308 }, { "epoch": 0.06849637706258054, "grad_norm": 0.62109375, "learning_rate": 4.658687943262411e-05, "loss": 1.1986, "step": 309 }, { "epoch": 0.0687180481857604, "grad_norm": 0.58984375, "learning_rate": 4.657579787234043e-05, "loss": 1.089, "step": 310 }, { "epoch": 0.06893971930894027, "grad_norm": 0.5703125, "learning_rate": 4.656471631205674e-05, "loss": 1.1939, "step": 311 }, { "epoch": 0.06916139043212015, "grad_norm": 0.62109375, "learning_rate": 4.655363475177305e-05, "loss": 1.1746, "step": 312 }, { "epoch": 0.06938306155530002, "grad_norm": 0.6015625, "learning_rate": 4.6542553191489364e-05, "loss": 1.1813, "step": 313 }, { "epoch": 0.06960473267847989, "grad_norm": 0.609375, "learning_rate": 4.653147163120567e-05, "loss": 1.1812, "step": 314 }, { "epoch": 0.06982640380165976, "grad_norm": 0.55859375, "learning_rate": 4.652039007092199e-05, "loss": 1.1576, "step": 315 }, { "epoch": 0.07004807492483964, "grad_norm": 0.5703125, "learning_rate": 4.65093085106383e-05, "loss": 1.232, "step": 316 }, { "epoch": 0.07026974604801951, "grad_norm": 0.625, "learning_rate": 4.6498226950354615e-05, "loss": 1.1755, "step": 317 }, { "epoch": 0.07049141717119937, "grad_norm": 0.5859375, "learning_rate": 4.6487145390070924e-05, "loss": 1.2468, "step": 318 }, { "epoch": 0.07071308829437925, "grad_norm": 0.58984375, "learning_rate": 4.6476063829787234e-05, "loss": 1.2491, "step": 319 }, { "epoch": 0.07093475941755913, "grad_norm": 0.60546875, "learning_rate": 4.646498226950355e-05, "loss": 1.2151, "step": 320 }, { "epoch": 0.071156430540739, "grad_norm": 0.59375, "learning_rate": 4.645390070921986e-05, "loss": 1.1435, "step": 321 }, { "epoch": 0.07137810166391886, "grad_norm": 0.61328125, "learning_rate": 4.6442819148936175e-05, "loss": 1.0738, "step": 322 }, { "epoch": 0.07159977278709874, "grad_norm": 0.59375, "learning_rate": 4.6431737588652485e-05, "loss": 1.2518, "step": 323 }, { "epoch": 0.07182144391027862, "grad_norm": 0.578125, "learning_rate": 4.6420656028368794e-05, "loss": 1.1838, "step": 324 }, { "epoch": 0.07204311503345849, "grad_norm": 0.65625, "learning_rate": 4.640957446808511e-05, "loss": 1.2395, "step": 325 }, { "epoch": 0.07226478615663835, "grad_norm": 0.58203125, "learning_rate": 4.639849290780142e-05, "loss": 1.1588, "step": 326 }, { "epoch": 0.07248645727981823, "grad_norm": 0.6171875, "learning_rate": 4.6387411347517736e-05, "loss": 1.2015, "step": 327 }, { "epoch": 0.0727081284029981, "grad_norm": 0.609375, "learning_rate": 4.6376329787234045e-05, "loss": 1.2173, "step": 328 }, { "epoch": 0.07292979952617798, "grad_norm": 0.5625, "learning_rate": 4.636524822695036e-05, "loss": 1.1264, "step": 329 }, { "epoch": 0.07315147064935784, "grad_norm": 0.5859375, "learning_rate": 4.635416666666667e-05, "loss": 1.2091, "step": 330 }, { "epoch": 0.07337314177253772, "grad_norm": 0.67578125, "learning_rate": 4.634308510638298e-05, "loss": 1.1475, "step": 331 }, { "epoch": 0.07359481289571759, "grad_norm": 0.58984375, "learning_rate": 4.6332003546099296e-05, "loss": 1.1698, "step": 332 }, { "epoch": 0.07381648401889747, "grad_norm": 0.62109375, "learning_rate": 4.6320921985815606e-05, "loss": 1.2536, "step": 333 }, { "epoch": 0.07403815514207733, "grad_norm": 0.58984375, "learning_rate": 4.630984042553192e-05, "loss": 1.2047, "step": 334 }, { "epoch": 0.0742598262652572, "grad_norm": 0.58984375, "learning_rate": 4.629875886524823e-05, "loss": 1.1451, "step": 335 }, { "epoch": 0.07448149738843708, "grad_norm": 0.58203125, "learning_rate": 4.628767730496454e-05, "loss": 1.1679, "step": 336 }, { "epoch": 0.07470316851161696, "grad_norm": 0.60546875, "learning_rate": 4.627659574468085e-05, "loss": 1.1816, "step": 337 }, { "epoch": 0.07492483963479682, "grad_norm": 0.65625, "learning_rate": 4.6265514184397166e-05, "loss": 1.0929, "step": 338 }, { "epoch": 0.0751465107579767, "grad_norm": 0.5859375, "learning_rate": 4.6254432624113476e-05, "loss": 1.1479, "step": 339 }, { "epoch": 0.07536818188115657, "grad_norm": 0.59765625, "learning_rate": 4.6243351063829785e-05, "loss": 1.143, "step": 340 }, { "epoch": 0.07558985300433645, "grad_norm": 0.625, "learning_rate": 4.62322695035461e-05, "loss": 1.2104, "step": 341 }, { "epoch": 0.07581152412751631, "grad_norm": 0.59765625, "learning_rate": 4.622118794326241e-05, "loss": 1.1263, "step": 342 }, { "epoch": 0.07603319525069618, "grad_norm": 0.57421875, "learning_rate": 4.621010638297873e-05, "loss": 1.1674, "step": 343 }, { "epoch": 0.07625486637387606, "grad_norm": 0.59765625, "learning_rate": 4.6199024822695036e-05, "loss": 1.1418, "step": 344 }, { "epoch": 0.07647653749705594, "grad_norm": 0.7265625, "learning_rate": 4.6187943262411345e-05, "loss": 1.2031, "step": 345 }, { "epoch": 0.0766982086202358, "grad_norm": 0.5859375, "learning_rate": 4.617686170212766e-05, "loss": 1.1494, "step": 346 }, { "epoch": 0.07691987974341567, "grad_norm": 0.60546875, "learning_rate": 4.616578014184397e-05, "loss": 1.1933, "step": 347 }, { "epoch": 0.07714155086659555, "grad_norm": 0.62109375, "learning_rate": 4.615469858156029e-05, "loss": 1.176, "step": 348 }, { "epoch": 0.07736322198977542, "grad_norm": 0.59375, "learning_rate": 4.6143617021276597e-05, "loss": 1.1557, "step": 349 }, { "epoch": 0.07758489311295529, "grad_norm": 0.6171875, "learning_rate": 4.613253546099291e-05, "loss": 1.186, "step": 350 }, { "epoch": 0.07780656423613516, "grad_norm": 0.6484375, "learning_rate": 4.612145390070922e-05, "loss": 1.2334, "step": 351 }, { "epoch": 0.07802823535931504, "grad_norm": 0.60546875, "learning_rate": 4.611037234042553e-05, "loss": 1.1298, "step": 352 }, { "epoch": 0.07824990648249491, "grad_norm": 0.6328125, "learning_rate": 4.609929078014185e-05, "loss": 1.1733, "step": 353 }, { "epoch": 0.07847157760567477, "grad_norm": 0.71484375, "learning_rate": 4.608820921985816e-05, "loss": 1.0679, "step": 354 }, { "epoch": 0.07869324872885465, "grad_norm": 0.60546875, "learning_rate": 4.607712765957447e-05, "loss": 1.2471, "step": 355 }, { "epoch": 0.07891491985203453, "grad_norm": 0.7109375, "learning_rate": 4.606604609929078e-05, "loss": 1.1761, "step": 356 }, { "epoch": 0.0791365909752144, "grad_norm": 0.71484375, "learning_rate": 4.605496453900709e-05, "loss": 1.2286, "step": 357 }, { "epoch": 0.07935826209839426, "grad_norm": 0.578125, "learning_rate": 4.604388297872341e-05, "loss": 1.1837, "step": 358 }, { "epoch": 0.07957993322157414, "grad_norm": 0.65234375, "learning_rate": 4.603280141843972e-05, "loss": 1.1021, "step": 359 }, { "epoch": 0.07980160434475402, "grad_norm": 0.6484375, "learning_rate": 4.6021719858156034e-05, "loss": 1.1319, "step": 360 }, { "epoch": 0.08002327546793389, "grad_norm": 0.58984375, "learning_rate": 4.601063829787234e-05, "loss": 1.1076, "step": 361 }, { "epoch": 0.08024494659111375, "grad_norm": 0.6171875, "learning_rate": 4.599955673758866e-05, "loss": 1.1289, "step": 362 }, { "epoch": 0.08046661771429363, "grad_norm": 0.71875, "learning_rate": 4.598847517730497e-05, "loss": 1.1329, "step": 363 }, { "epoch": 0.0806882888374735, "grad_norm": 0.62890625, "learning_rate": 4.597739361702128e-05, "loss": 1.1699, "step": 364 }, { "epoch": 0.08090995996065338, "grad_norm": 0.69140625, "learning_rate": 4.5966312056737594e-05, "loss": 1.1021, "step": 365 }, { "epoch": 0.08113163108383324, "grad_norm": 0.66015625, "learning_rate": 4.5955230496453904e-05, "loss": 1.2512, "step": 366 }, { "epoch": 0.08135330220701312, "grad_norm": 0.62109375, "learning_rate": 4.594414893617022e-05, "loss": 1.0613, "step": 367 }, { "epoch": 0.08157497333019299, "grad_norm": 0.703125, "learning_rate": 4.593306737588653e-05, "loss": 1.1529, "step": 368 }, { "epoch": 0.08179664445337287, "grad_norm": 0.59375, "learning_rate": 4.592198581560284e-05, "loss": 1.1369, "step": 369 }, { "epoch": 0.08201831557655273, "grad_norm": 0.6796875, "learning_rate": 4.5910904255319155e-05, "loss": 1.1841, "step": 370 }, { "epoch": 0.0822399866997326, "grad_norm": 0.63671875, "learning_rate": 4.5899822695035464e-05, "loss": 1.1044, "step": 371 }, { "epoch": 0.08246165782291248, "grad_norm": 0.61328125, "learning_rate": 4.588874113475177e-05, "loss": 1.185, "step": 372 }, { "epoch": 0.08268332894609236, "grad_norm": 0.57421875, "learning_rate": 4.587765957446808e-05, "loss": 1.1334, "step": 373 }, { "epoch": 0.08290500006927222, "grad_norm": 0.7421875, "learning_rate": 4.58665780141844e-05, "loss": 1.2439, "step": 374 }, { "epoch": 0.0831266711924521, "grad_norm": 0.6328125, "learning_rate": 4.585549645390071e-05, "loss": 1.1869, "step": 375 }, { "epoch": 0.08334834231563197, "grad_norm": 0.640625, "learning_rate": 4.5844414893617024e-05, "loss": 1.1067, "step": 376 }, { "epoch": 0.08357001343881185, "grad_norm": 0.60546875, "learning_rate": 4.5833333333333334e-05, "loss": 1.2285, "step": 377 }, { "epoch": 0.08379168456199171, "grad_norm": 0.58203125, "learning_rate": 4.582225177304964e-05, "loss": 1.1117, "step": 378 }, { "epoch": 0.08401335568517158, "grad_norm": 0.58984375, "learning_rate": 4.581117021276596e-05, "loss": 1.1442, "step": 379 }, { "epoch": 0.08423502680835146, "grad_norm": 0.62109375, "learning_rate": 4.580008865248227e-05, "loss": 1.0993, "step": 380 }, { "epoch": 0.08445669793153134, "grad_norm": 0.59765625, "learning_rate": 4.5789007092198585e-05, "loss": 1.1708, "step": 381 }, { "epoch": 0.0846783690547112, "grad_norm": 0.59765625, "learning_rate": 4.5777925531914894e-05, "loss": 1.2034, "step": 382 }, { "epoch": 0.08490004017789107, "grad_norm": 0.65625, "learning_rate": 4.5766843971631204e-05, "loss": 1.185, "step": 383 }, { "epoch": 0.08512171130107095, "grad_norm": 0.5625, "learning_rate": 4.575576241134752e-05, "loss": 1.115, "step": 384 }, { "epoch": 0.08534338242425082, "grad_norm": 0.59765625, "learning_rate": 4.574468085106383e-05, "loss": 1.2001, "step": 385 }, { "epoch": 0.0855650535474307, "grad_norm": 0.57421875, "learning_rate": 4.5733599290780145e-05, "loss": 1.2337, "step": 386 }, { "epoch": 0.08578672467061056, "grad_norm": 0.58203125, "learning_rate": 4.5722517730496455e-05, "loss": 1.1613, "step": 387 }, { "epoch": 0.08600839579379044, "grad_norm": 0.58203125, "learning_rate": 4.571143617021277e-05, "loss": 1.1028, "step": 388 }, { "epoch": 0.08623006691697031, "grad_norm": 0.5625, "learning_rate": 4.570035460992908e-05, "loss": 1.1307, "step": 389 }, { "epoch": 0.08645173804015019, "grad_norm": 0.625, "learning_rate": 4.568927304964539e-05, "loss": 1.2912, "step": 390 }, { "epoch": 0.08667340916333005, "grad_norm": 0.58203125, "learning_rate": 4.5678191489361706e-05, "loss": 1.1057, "step": 391 }, { "epoch": 0.08689508028650993, "grad_norm": 0.640625, "learning_rate": 4.5667109929078015e-05, "loss": 1.223, "step": 392 }, { "epoch": 0.0871167514096898, "grad_norm": 0.62109375, "learning_rate": 4.565602836879433e-05, "loss": 1.1402, "step": 393 }, { "epoch": 0.08733842253286968, "grad_norm": 0.55078125, "learning_rate": 4.564494680851064e-05, "loss": 1.0877, "step": 394 }, { "epoch": 0.08756009365604954, "grad_norm": 0.609375, "learning_rate": 4.563386524822695e-05, "loss": 1.1255, "step": 395 }, { "epoch": 0.08778176477922942, "grad_norm": 0.65234375, "learning_rate": 4.5622783687943266e-05, "loss": 1.2081, "step": 396 }, { "epoch": 0.08800343590240929, "grad_norm": 0.58203125, "learning_rate": 4.5611702127659576e-05, "loss": 1.158, "step": 397 }, { "epoch": 0.08822510702558917, "grad_norm": 0.78515625, "learning_rate": 4.560062056737589e-05, "loss": 1.1877, "step": 398 }, { "epoch": 0.08844677814876903, "grad_norm": 0.578125, "learning_rate": 4.55895390070922e-05, "loss": 1.1791, "step": 399 }, { "epoch": 0.0886684492719489, "grad_norm": 0.5703125, "learning_rate": 4.557845744680852e-05, "loss": 1.1324, "step": 400 }, { "epoch": 0.08889012039512878, "grad_norm": 0.6953125, "learning_rate": 4.556737588652483e-05, "loss": 1.2358, "step": 401 }, { "epoch": 0.08911179151830866, "grad_norm": 0.6171875, "learning_rate": 4.5556294326241136e-05, "loss": 1.1244, "step": 402 }, { "epoch": 0.08933346264148852, "grad_norm": 0.5859375, "learning_rate": 4.554521276595745e-05, "loss": 1.0925, "step": 403 }, { "epoch": 0.0895551337646684, "grad_norm": 0.71484375, "learning_rate": 4.553413120567376e-05, "loss": 1.1454, "step": 404 }, { "epoch": 0.08977680488784827, "grad_norm": 0.61328125, "learning_rate": 4.552304964539008e-05, "loss": 1.2172, "step": 405 }, { "epoch": 0.08999847601102814, "grad_norm": 0.59765625, "learning_rate": 4.551196808510639e-05, "loss": 1.2227, "step": 406 }, { "epoch": 0.090220147134208, "grad_norm": 0.640625, "learning_rate": 4.55008865248227e-05, "loss": 1.1568, "step": 407 }, { "epoch": 0.09044181825738788, "grad_norm": 0.5703125, "learning_rate": 4.5489804964539006e-05, "loss": 1.1846, "step": 408 }, { "epoch": 0.09066348938056776, "grad_norm": 0.59375, "learning_rate": 4.547872340425532e-05, "loss": 1.1106, "step": 409 }, { "epoch": 0.09088516050374763, "grad_norm": 0.5703125, "learning_rate": 4.546764184397163e-05, "loss": 1.1594, "step": 410 }, { "epoch": 0.0911068316269275, "grad_norm": 0.625, "learning_rate": 4.545656028368794e-05, "loss": 1.2431, "step": 411 }, { "epoch": 0.09132850275010737, "grad_norm": 0.59765625, "learning_rate": 4.544547872340426e-05, "loss": 1.2329, "step": 412 }, { "epoch": 0.09155017387328725, "grad_norm": 0.57421875, "learning_rate": 4.5434397163120567e-05, "loss": 1.1418, "step": 413 }, { "epoch": 0.09177184499646712, "grad_norm": 0.5625, "learning_rate": 4.542331560283688e-05, "loss": 1.1969, "step": 414 }, { "epoch": 0.09199351611964698, "grad_norm": 0.59375, "learning_rate": 4.541223404255319e-05, "loss": 1.1349, "step": 415 }, { "epoch": 0.09221518724282686, "grad_norm": 0.59765625, "learning_rate": 4.54011524822695e-05, "loss": 1.132, "step": 416 }, { "epoch": 0.09243685836600674, "grad_norm": 0.59375, "learning_rate": 4.539007092198582e-05, "loss": 1.1807, "step": 417 }, { "epoch": 0.09265852948918661, "grad_norm": 0.60546875, "learning_rate": 4.537898936170213e-05, "loss": 1.1078, "step": 418 }, { "epoch": 0.09288020061236647, "grad_norm": 0.640625, "learning_rate": 4.536790780141844e-05, "loss": 1.1512, "step": 419 }, { "epoch": 0.09310187173554635, "grad_norm": 0.69921875, "learning_rate": 4.535682624113475e-05, "loss": 1.1427, "step": 420 }, { "epoch": 0.09332354285872622, "grad_norm": 0.6484375, "learning_rate": 4.534574468085107e-05, "loss": 1.1237, "step": 421 }, { "epoch": 0.0935452139819061, "grad_norm": 0.59375, "learning_rate": 4.533466312056738e-05, "loss": 1.1252, "step": 422 }, { "epoch": 0.09376688510508596, "grad_norm": 0.6171875, "learning_rate": 4.532358156028369e-05, "loss": 1.1294, "step": 423 }, { "epoch": 0.09398855622826584, "grad_norm": 0.60546875, "learning_rate": 4.5312500000000004e-05, "loss": 1.2915, "step": 424 }, { "epoch": 0.09421022735144571, "grad_norm": 0.56640625, "learning_rate": 4.530141843971631e-05, "loss": 1.069, "step": 425 }, { "epoch": 0.09443189847462559, "grad_norm": 0.55859375, "learning_rate": 4.529033687943263e-05, "loss": 1.1275, "step": 426 }, { "epoch": 0.09465356959780545, "grad_norm": 0.58984375, "learning_rate": 4.527925531914894e-05, "loss": 1.1225, "step": 427 }, { "epoch": 0.09487524072098533, "grad_norm": 0.59765625, "learning_rate": 4.526817375886525e-05, "loss": 1.0582, "step": 428 }, { "epoch": 0.0950969118441652, "grad_norm": 0.6328125, "learning_rate": 4.5257092198581564e-05, "loss": 1.2343, "step": 429 }, { "epoch": 0.09531858296734508, "grad_norm": 0.58203125, "learning_rate": 4.5246010638297874e-05, "loss": 1.2059, "step": 430 }, { "epoch": 0.09554025409052494, "grad_norm": 0.58203125, "learning_rate": 4.523492907801419e-05, "loss": 1.1842, "step": 431 }, { "epoch": 0.09576192521370482, "grad_norm": 0.53125, "learning_rate": 4.52238475177305e-05, "loss": 1.0674, "step": 432 }, { "epoch": 0.09598359633688469, "grad_norm": 0.578125, "learning_rate": 4.5212765957446815e-05, "loss": 1.1393, "step": 433 }, { "epoch": 0.09620526746006457, "grad_norm": 0.609375, "learning_rate": 4.5201684397163125e-05, "loss": 1.11, "step": 434 }, { "epoch": 0.09642693858324443, "grad_norm": 0.59765625, "learning_rate": 4.5190602836879434e-05, "loss": 1.1767, "step": 435 }, { "epoch": 0.0966486097064243, "grad_norm": 0.59375, "learning_rate": 4.517952127659575e-05, "loss": 1.1238, "step": 436 }, { "epoch": 0.09687028082960418, "grad_norm": 0.5859375, "learning_rate": 4.516843971631206e-05, "loss": 1.0371, "step": 437 }, { "epoch": 0.09709195195278406, "grad_norm": 0.59375, "learning_rate": 4.5157358156028376e-05, "loss": 1.0579, "step": 438 }, { "epoch": 0.09731362307596392, "grad_norm": 0.578125, "learning_rate": 4.5146276595744685e-05, "loss": 1.1158, "step": 439 }, { "epoch": 0.0975352941991438, "grad_norm": 0.62109375, "learning_rate": 4.5135195035460994e-05, "loss": 1.1236, "step": 440 }, { "epoch": 0.09775696532232367, "grad_norm": 0.625, "learning_rate": 4.512411347517731e-05, "loss": 1.168, "step": 441 }, { "epoch": 0.09797863644550354, "grad_norm": 0.60546875, "learning_rate": 4.511303191489361e-05, "loss": 1.11, "step": 442 }, { "epoch": 0.0982003075686834, "grad_norm": 0.6796875, "learning_rate": 4.510195035460993e-05, "loss": 1.1196, "step": 443 }, { "epoch": 0.09842197869186328, "grad_norm": 0.56640625, "learning_rate": 4.509086879432624e-05, "loss": 1.1697, "step": 444 }, { "epoch": 0.09864364981504316, "grad_norm": 0.5703125, "learning_rate": 4.5079787234042555e-05, "loss": 1.1046, "step": 445 }, { "epoch": 0.09886532093822303, "grad_norm": 0.625, "learning_rate": 4.5068705673758864e-05, "loss": 1.1741, "step": 446 }, { "epoch": 0.0990869920614029, "grad_norm": 0.5625, "learning_rate": 4.505762411347518e-05, "loss": 1.1927, "step": 447 }, { "epoch": 0.09930866318458277, "grad_norm": 0.59765625, "learning_rate": 4.504654255319149e-05, "loss": 1.1459, "step": 448 }, { "epoch": 0.09953033430776265, "grad_norm": 0.5703125, "learning_rate": 4.50354609929078e-05, "loss": 1.0706, "step": 449 }, { "epoch": 0.09975200543094252, "grad_norm": 0.58203125, "learning_rate": 4.5024379432624115e-05, "loss": 1.1452, "step": 450 }, { "epoch": 0.09997367655412238, "grad_norm": 0.6484375, "learning_rate": 4.5013297872340425e-05, "loss": 1.1822, "step": 451 }, { "epoch": 0.10019534767730226, "grad_norm": 0.5859375, "learning_rate": 4.500221631205674e-05, "loss": 1.1645, "step": 452 }, { "epoch": 0.10041701880048214, "grad_norm": 0.625, "learning_rate": 4.499113475177305e-05, "loss": 1.1715, "step": 453 }, { "epoch": 0.10063868992366201, "grad_norm": 0.640625, "learning_rate": 4.498005319148936e-05, "loss": 1.1887, "step": 454 }, { "epoch": 0.10086036104684187, "grad_norm": 0.56640625, "learning_rate": 4.4968971631205676e-05, "loss": 1.0327, "step": 455 }, { "epoch": 0.10108203217002175, "grad_norm": 0.6640625, "learning_rate": 4.4957890070921985e-05, "loss": 1.2075, "step": 456 }, { "epoch": 0.10130370329320162, "grad_norm": 0.55859375, "learning_rate": 4.49468085106383e-05, "loss": 1.1472, "step": 457 }, { "epoch": 0.1015253744163815, "grad_norm": 0.58203125, "learning_rate": 4.493572695035461e-05, "loss": 1.1863, "step": 458 }, { "epoch": 0.10174704553956136, "grad_norm": 0.59375, "learning_rate": 4.492464539007093e-05, "loss": 1.0698, "step": 459 }, { "epoch": 0.10196871666274124, "grad_norm": 0.625, "learning_rate": 4.4913563829787236e-05, "loss": 1.2159, "step": 460 }, { "epoch": 0.10219038778592111, "grad_norm": 0.57421875, "learning_rate": 4.4902482269503546e-05, "loss": 1.1115, "step": 461 }, { "epoch": 0.10241205890910099, "grad_norm": 0.56640625, "learning_rate": 4.489140070921986e-05, "loss": 1.1293, "step": 462 }, { "epoch": 0.10263373003228085, "grad_norm": 0.578125, "learning_rate": 4.488031914893617e-05, "loss": 1.1001, "step": 463 }, { "epoch": 0.10285540115546073, "grad_norm": 0.5859375, "learning_rate": 4.486923758865249e-05, "loss": 1.115, "step": 464 }, { "epoch": 0.1030770722786406, "grad_norm": 0.5703125, "learning_rate": 4.48581560283688e-05, "loss": 1.2009, "step": 465 }, { "epoch": 0.10329874340182048, "grad_norm": 0.5546875, "learning_rate": 4.4847074468085106e-05, "loss": 1.112, "step": 466 }, { "epoch": 0.10352041452500034, "grad_norm": 0.66015625, "learning_rate": 4.483599290780142e-05, "loss": 1.2751, "step": 467 }, { "epoch": 0.10374208564818022, "grad_norm": 0.546875, "learning_rate": 4.482491134751773e-05, "loss": 1.1093, "step": 468 }, { "epoch": 0.10396375677136009, "grad_norm": 0.75390625, "learning_rate": 4.481382978723405e-05, "loss": 1.1493, "step": 469 }, { "epoch": 0.10418542789453997, "grad_norm": 0.59765625, "learning_rate": 4.480274822695036e-05, "loss": 1.095, "step": 470 }, { "epoch": 0.10440709901771983, "grad_norm": 0.81640625, "learning_rate": 4.4791666666666673e-05, "loss": 1.2497, "step": 471 }, { "epoch": 0.1046287701408997, "grad_norm": 0.64453125, "learning_rate": 4.478058510638298e-05, "loss": 1.1959, "step": 472 }, { "epoch": 0.10485044126407958, "grad_norm": 0.66015625, "learning_rate": 4.476950354609929e-05, "loss": 1.219, "step": 473 }, { "epoch": 0.10507211238725946, "grad_norm": 0.68359375, "learning_rate": 4.475842198581561e-05, "loss": 1.0271, "step": 474 }, { "epoch": 0.10529378351043932, "grad_norm": 0.62890625, "learning_rate": 4.474734042553192e-05, "loss": 1.0813, "step": 475 }, { "epoch": 0.1055154546336192, "grad_norm": 0.61328125, "learning_rate": 4.4736258865248234e-05, "loss": 1.1742, "step": 476 }, { "epoch": 0.10573712575679907, "grad_norm": 0.625, "learning_rate": 4.4725177304964537e-05, "loss": 1.1361, "step": 477 }, { "epoch": 0.10595879687997894, "grad_norm": 0.609375, "learning_rate": 4.471409574468085e-05, "loss": 1.0924, "step": 478 }, { "epoch": 0.1061804680031588, "grad_norm": 0.62109375, "learning_rate": 4.470301418439716e-05, "loss": 1.1461, "step": 479 }, { "epoch": 0.10640213912633868, "grad_norm": 0.62109375, "learning_rate": 4.469193262411348e-05, "loss": 1.2294, "step": 480 }, { "epoch": 0.10662381024951856, "grad_norm": 0.6015625, "learning_rate": 4.468085106382979e-05, "loss": 1.1709, "step": 481 }, { "epoch": 0.10684548137269843, "grad_norm": 0.609375, "learning_rate": 4.46697695035461e-05, "loss": 1.1796, "step": 482 }, { "epoch": 0.10706715249587831, "grad_norm": 0.6171875, "learning_rate": 4.465868794326241e-05, "loss": 1.1291, "step": 483 }, { "epoch": 0.10728882361905817, "grad_norm": 0.62109375, "learning_rate": 4.464760638297872e-05, "loss": 1.1642, "step": 484 }, { "epoch": 0.10751049474223805, "grad_norm": 0.59765625, "learning_rate": 4.463652482269504e-05, "loss": 1.1299, "step": 485 }, { "epoch": 0.10773216586541792, "grad_norm": 0.6171875, "learning_rate": 4.462544326241135e-05, "loss": 1.0489, "step": 486 }, { "epoch": 0.1079538369885978, "grad_norm": 0.640625, "learning_rate": 4.461436170212766e-05, "loss": 1.121, "step": 487 }, { "epoch": 0.10817550811177766, "grad_norm": 0.5625, "learning_rate": 4.4603280141843974e-05, "loss": 1.1293, "step": 488 }, { "epoch": 0.10839717923495754, "grad_norm": 0.609375, "learning_rate": 4.459219858156028e-05, "loss": 1.1127, "step": 489 }, { "epoch": 0.10861885035813741, "grad_norm": 0.58984375, "learning_rate": 4.45811170212766e-05, "loss": 1.1264, "step": 490 }, { "epoch": 0.10884052148131729, "grad_norm": 0.640625, "learning_rate": 4.457003546099291e-05, "loss": 1.1534, "step": 491 }, { "epoch": 0.10906219260449715, "grad_norm": 0.6015625, "learning_rate": 4.4558953900709225e-05, "loss": 1.1875, "step": 492 }, { "epoch": 0.10928386372767702, "grad_norm": 0.6015625, "learning_rate": 4.4547872340425534e-05, "loss": 1.1251, "step": 493 }, { "epoch": 0.1095055348508569, "grad_norm": 0.59765625, "learning_rate": 4.4536790780141844e-05, "loss": 1.1177, "step": 494 }, { "epoch": 0.10972720597403678, "grad_norm": 0.57421875, "learning_rate": 4.452570921985816e-05, "loss": 1.0519, "step": 495 }, { "epoch": 0.10994887709721664, "grad_norm": 0.6015625, "learning_rate": 4.451462765957447e-05, "loss": 1.148, "step": 496 }, { "epoch": 0.11017054822039651, "grad_norm": 0.62109375, "learning_rate": 4.4503546099290785e-05, "loss": 1.168, "step": 497 }, { "epoch": 0.11039221934357639, "grad_norm": 0.6015625, "learning_rate": 4.4492464539007095e-05, "loss": 1.1885, "step": 498 }, { "epoch": 0.11061389046675627, "grad_norm": 0.578125, "learning_rate": 4.4481382978723404e-05, "loss": 1.1129, "step": 499 }, { "epoch": 0.11083556158993613, "grad_norm": 0.640625, "learning_rate": 4.447030141843972e-05, "loss": 1.0542, "step": 500 }, { "epoch": 0.111057232713116, "grad_norm": 0.62109375, "learning_rate": 4.445921985815603e-05, "loss": 1.1776, "step": 501 }, { "epoch": 0.11127890383629588, "grad_norm": 0.609375, "learning_rate": 4.4448138297872346e-05, "loss": 1.0941, "step": 502 }, { "epoch": 0.11150057495947575, "grad_norm": 0.5546875, "learning_rate": 4.4437056737588655e-05, "loss": 1.0751, "step": 503 }, { "epoch": 0.11172224608265562, "grad_norm": 0.640625, "learning_rate": 4.442597517730497e-05, "loss": 1.1448, "step": 504 }, { "epoch": 0.11194391720583549, "grad_norm": 0.5625, "learning_rate": 4.441489361702128e-05, "loss": 1.1144, "step": 505 }, { "epoch": 0.11216558832901537, "grad_norm": 0.5390625, "learning_rate": 4.440381205673759e-05, "loss": 1.0676, "step": 506 }, { "epoch": 0.11238725945219524, "grad_norm": 0.59375, "learning_rate": 4.4392730496453906e-05, "loss": 1.2399, "step": 507 }, { "epoch": 0.1126089305753751, "grad_norm": 0.65625, "learning_rate": 4.4381648936170216e-05, "loss": 1.1519, "step": 508 }, { "epoch": 0.11283060169855498, "grad_norm": 0.59375, "learning_rate": 4.437056737588653e-05, "loss": 1.136, "step": 509 }, { "epoch": 0.11305227282173486, "grad_norm": 0.734375, "learning_rate": 4.435948581560284e-05, "loss": 1.0757, "step": 510 }, { "epoch": 0.11327394394491473, "grad_norm": 0.56640625, "learning_rate": 4.434840425531915e-05, "loss": 1.1596, "step": 511 }, { "epoch": 0.1134956150680946, "grad_norm": 0.59765625, "learning_rate": 4.433732269503547e-05, "loss": 1.239, "step": 512 }, { "epoch": 0.11371728619127447, "grad_norm": 0.61328125, "learning_rate": 4.432624113475177e-05, "loss": 1.1146, "step": 513 }, { "epoch": 0.11393895731445434, "grad_norm": 0.5546875, "learning_rate": 4.4315159574468085e-05, "loss": 1.0758, "step": 514 }, { "epoch": 0.11416062843763422, "grad_norm": 0.609375, "learning_rate": 4.4304078014184395e-05, "loss": 1.2077, "step": 515 }, { "epoch": 0.11438229956081408, "grad_norm": 0.62109375, "learning_rate": 4.429299645390071e-05, "loss": 1.0799, "step": 516 }, { "epoch": 0.11460397068399396, "grad_norm": 0.6171875, "learning_rate": 4.428191489361702e-05, "loss": 1.0429, "step": 517 }, { "epoch": 0.11482564180717383, "grad_norm": 0.59375, "learning_rate": 4.4270833333333337e-05, "loss": 1.1481, "step": 518 }, { "epoch": 0.11504731293035371, "grad_norm": 0.68359375, "learning_rate": 4.4259751773049646e-05, "loss": 1.2062, "step": 519 }, { "epoch": 0.11526898405353357, "grad_norm": 0.8125, "learning_rate": 4.4248670212765955e-05, "loss": 1.061, "step": 520 }, { "epoch": 0.11549065517671345, "grad_norm": 0.6171875, "learning_rate": 4.423758865248227e-05, "loss": 1.1538, "step": 521 }, { "epoch": 0.11571232629989332, "grad_norm": 0.6875, "learning_rate": 4.422650709219858e-05, "loss": 1.2099, "step": 522 }, { "epoch": 0.1159339974230732, "grad_norm": 0.98828125, "learning_rate": 4.42154255319149e-05, "loss": 1.1281, "step": 523 }, { "epoch": 0.11615566854625306, "grad_norm": 0.5703125, "learning_rate": 4.4204343971631206e-05, "loss": 1.0995, "step": 524 }, { "epoch": 0.11637733966943294, "grad_norm": 0.74609375, "learning_rate": 4.4193262411347516e-05, "loss": 1.1286, "step": 525 }, { "epoch": 0.11659901079261281, "grad_norm": 0.7890625, "learning_rate": 4.418218085106383e-05, "loss": 1.0997, "step": 526 }, { "epoch": 0.11682068191579269, "grad_norm": 0.5390625, "learning_rate": 4.417109929078014e-05, "loss": 1.0016, "step": 527 }, { "epoch": 0.11704235303897255, "grad_norm": 0.63671875, "learning_rate": 4.416001773049646e-05, "loss": 1.1109, "step": 528 }, { "epoch": 0.11726402416215242, "grad_norm": 0.78515625, "learning_rate": 4.414893617021277e-05, "loss": 1.2083, "step": 529 }, { "epoch": 0.1174856952853323, "grad_norm": 0.61328125, "learning_rate": 4.413785460992908e-05, "loss": 1.0841, "step": 530 }, { "epoch": 0.11770736640851218, "grad_norm": 0.73046875, "learning_rate": 4.412677304964539e-05, "loss": 1.1966, "step": 531 }, { "epoch": 0.11792903753169204, "grad_norm": 0.80078125, "learning_rate": 4.41156914893617e-05, "loss": 1.0795, "step": 532 }, { "epoch": 0.11815070865487191, "grad_norm": 0.609375, "learning_rate": 4.410460992907802e-05, "loss": 1.1683, "step": 533 }, { "epoch": 0.11837237977805179, "grad_norm": 0.6171875, "learning_rate": 4.409352836879433e-05, "loss": 1.1942, "step": 534 }, { "epoch": 0.11859405090123167, "grad_norm": 0.62109375, "learning_rate": 4.4082446808510643e-05, "loss": 1.0572, "step": 535 }, { "epoch": 0.11881572202441153, "grad_norm": 0.609375, "learning_rate": 4.407136524822695e-05, "loss": 1.2266, "step": 536 }, { "epoch": 0.1190373931475914, "grad_norm": 0.5703125, "learning_rate": 4.406028368794327e-05, "loss": 1.145, "step": 537 }, { "epoch": 0.11925906427077128, "grad_norm": 0.69921875, "learning_rate": 4.404920212765958e-05, "loss": 1.1795, "step": 538 }, { "epoch": 0.11948073539395115, "grad_norm": 0.5859375, "learning_rate": 4.403812056737589e-05, "loss": 1.1222, "step": 539 }, { "epoch": 0.11970240651713102, "grad_norm": 0.5546875, "learning_rate": 4.4027039007092204e-05, "loss": 1.086, "step": 540 }, { "epoch": 0.11992407764031089, "grad_norm": 0.5703125, "learning_rate": 4.401595744680851e-05, "loss": 1.1097, "step": 541 }, { "epoch": 0.12014574876349077, "grad_norm": 0.6015625, "learning_rate": 4.400487588652483e-05, "loss": 1.0839, "step": 542 }, { "epoch": 0.12036741988667064, "grad_norm": 0.55859375, "learning_rate": 4.399379432624114e-05, "loss": 1.1394, "step": 543 }, { "epoch": 0.1205890910098505, "grad_norm": 0.59375, "learning_rate": 4.398271276595745e-05, "loss": 1.1087, "step": 544 }, { "epoch": 0.12081076213303038, "grad_norm": 0.62890625, "learning_rate": 4.3971631205673764e-05, "loss": 1.1875, "step": 545 }, { "epoch": 0.12103243325621026, "grad_norm": 0.609375, "learning_rate": 4.3960549645390074e-05, "loss": 1.1523, "step": 546 }, { "epoch": 0.12125410437939013, "grad_norm": 0.59765625, "learning_rate": 4.394946808510639e-05, "loss": 1.0962, "step": 547 }, { "epoch": 0.12147577550257, "grad_norm": 0.5546875, "learning_rate": 4.393838652482269e-05, "loss": 1.1531, "step": 548 }, { "epoch": 0.12169744662574987, "grad_norm": 0.58203125, "learning_rate": 4.392730496453901e-05, "loss": 1.0095, "step": 549 }, { "epoch": 0.12191911774892975, "grad_norm": 0.59375, "learning_rate": 4.391622340425532e-05, "loss": 1.151, "step": 550 }, { "epoch": 0.12214078887210962, "grad_norm": 0.609375, "learning_rate": 4.3905141843971634e-05, "loss": 1.1342, "step": 551 }, { "epoch": 0.12236245999528948, "grad_norm": 0.6171875, "learning_rate": 4.3894060283687944e-05, "loss": 1.1548, "step": 552 }, { "epoch": 0.12258413111846936, "grad_norm": 0.58203125, "learning_rate": 4.388297872340425e-05, "loss": 1.1262, "step": 553 }, { "epoch": 0.12280580224164923, "grad_norm": 0.6640625, "learning_rate": 4.387189716312057e-05, "loss": 1.0931, "step": 554 }, { "epoch": 0.12302747336482911, "grad_norm": 0.578125, "learning_rate": 4.386081560283688e-05, "loss": 1.0828, "step": 555 }, { "epoch": 0.12324914448800897, "grad_norm": 0.66796875, "learning_rate": 4.3849734042553195e-05, "loss": 1.2722, "step": 556 }, { "epoch": 0.12347081561118885, "grad_norm": 0.61328125, "learning_rate": 4.3838652482269504e-05, "loss": 1.0861, "step": 557 }, { "epoch": 0.12369248673436872, "grad_norm": 0.60546875, "learning_rate": 4.3827570921985814e-05, "loss": 1.1263, "step": 558 }, { "epoch": 0.1239141578575486, "grad_norm": 0.55859375, "learning_rate": 4.381648936170213e-05, "loss": 1.1174, "step": 559 }, { "epoch": 0.12413582898072846, "grad_norm": 0.59375, "learning_rate": 4.380540780141844e-05, "loss": 1.1781, "step": 560 }, { "epoch": 0.12435750010390834, "grad_norm": 0.62890625, "learning_rate": 4.3794326241134755e-05, "loss": 1.1783, "step": 561 }, { "epoch": 0.12457917122708821, "grad_norm": 0.5703125, "learning_rate": 4.3783244680851065e-05, "loss": 1.0421, "step": 562 }, { "epoch": 0.12480084235026809, "grad_norm": 0.64453125, "learning_rate": 4.377216312056738e-05, "loss": 1.1082, "step": 563 }, { "epoch": 0.12502251347344795, "grad_norm": 0.640625, "learning_rate": 4.376108156028369e-05, "loss": 1.1459, "step": 564 }, { "epoch": 0.12524418459662784, "grad_norm": 0.55078125, "learning_rate": 4.375e-05, "loss": 1.097, "step": 565 }, { "epoch": 0.1254658557198077, "grad_norm": 0.62890625, "learning_rate": 4.3738918439716316e-05, "loss": 1.1132, "step": 566 }, { "epoch": 0.12568752684298756, "grad_norm": 0.640625, "learning_rate": 4.3727836879432625e-05, "loss": 1.1678, "step": 567 }, { "epoch": 0.12590919796616745, "grad_norm": 0.58203125, "learning_rate": 4.371675531914894e-05, "loss": 1.1612, "step": 568 }, { "epoch": 0.12613086908934731, "grad_norm": 0.5703125, "learning_rate": 4.370567375886525e-05, "loss": 1.1793, "step": 569 }, { "epoch": 0.12635254021252718, "grad_norm": 0.6484375, "learning_rate": 4.369459219858156e-05, "loss": 1.1724, "step": 570 }, { "epoch": 0.12657421133570707, "grad_norm": 0.640625, "learning_rate": 4.3683510638297876e-05, "loss": 1.1745, "step": 571 }, { "epoch": 0.12679588245888693, "grad_norm": 0.53515625, "learning_rate": 4.3672429078014186e-05, "loss": 1.0962, "step": 572 }, { "epoch": 0.12701755358206682, "grad_norm": 0.6640625, "learning_rate": 4.36613475177305e-05, "loss": 1.0822, "step": 573 }, { "epoch": 0.12723922470524668, "grad_norm": 0.6953125, "learning_rate": 4.365026595744681e-05, "loss": 1.1365, "step": 574 }, { "epoch": 0.12746089582842654, "grad_norm": 0.5859375, "learning_rate": 4.363918439716313e-05, "loss": 1.1719, "step": 575 }, { "epoch": 0.12768256695160643, "grad_norm": 0.6953125, "learning_rate": 4.362810283687944e-05, "loss": 1.0451, "step": 576 }, { "epoch": 0.1279042380747863, "grad_norm": 0.7109375, "learning_rate": 4.3617021276595746e-05, "loss": 1.1154, "step": 577 }, { "epoch": 0.12812590919796618, "grad_norm": 0.58984375, "learning_rate": 4.360593971631206e-05, "loss": 1.1503, "step": 578 }, { "epoch": 0.12834758032114604, "grad_norm": 3.390625, "learning_rate": 4.359485815602837e-05, "loss": 1.1082, "step": 579 }, { "epoch": 0.1285692514443259, "grad_norm": 0.8046875, "learning_rate": 4.358377659574469e-05, "loss": 1.1631, "step": 580 }, { "epoch": 0.1287909225675058, "grad_norm": 0.5859375, "learning_rate": 4.3572695035461e-05, "loss": 1.1386, "step": 581 }, { "epoch": 0.12901259369068566, "grad_norm": 0.55859375, "learning_rate": 4.3561613475177307e-05, "loss": 1.1142, "step": 582 }, { "epoch": 0.12923426481386552, "grad_norm": 0.70703125, "learning_rate": 4.3550531914893616e-05, "loss": 1.1513, "step": 583 }, { "epoch": 0.1294559359370454, "grad_norm": 0.59765625, "learning_rate": 4.3539450354609925e-05, "loss": 1.12, "step": 584 }, { "epoch": 0.12967760706022527, "grad_norm": 0.56640625, "learning_rate": 4.352836879432624e-05, "loss": 1.095, "step": 585 }, { "epoch": 0.12989927818340516, "grad_norm": 0.6640625, "learning_rate": 4.351728723404255e-05, "loss": 1.1431, "step": 586 }, { "epoch": 0.13012094930658502, "grad_norm": 0.55859375, "learning_rate": 4.350620567375887e-05, "loss": 1.1165, "step": 587 }, { "epoch": 0.13034262042976488, "grad_norm": 0.546875, "learning_rate": 4.3495124113475176e-05, "loss": 1.1041, "step": 588 }, { "epoch": 0.13056429155294477, "grad_norm": 0.53125, "learning_rate": 4.348404255319149e-05, "loss": 1.1035, "step": 589 }, { "epoch": 0.13078596267612463, "grad_norm": 0.55859375, "learning_rate": 4.34729609929078e-05, "loss": 1.0795, "step": 590 }, { "epoch": 0.1310076337993045, "grad_norm": 0.609375, "learning_rate": 4.346187943262411e-05, "loss": 1.1414, "step": 591 }, { "epoch": 0.13122930492248439, "grad_norm": 0.57421875, "learning_rate": 4.345079787234043e-05, "loss": 1.0786, "step": 592 }, { "epoch": 0.13145097604566425, "grad_norm": 0.5703125, "learning_rate": 4.343971631205674e-05, "loss": 1.1834, "step": 593 }, { "epoch": 0.13167264716884414, "grad_norm": 0.54296875, "learning_rate": 4.342863475177305e-05, "loss": 1.049, "step": 594 }, { "epoch": 0.131894318292024, "grad_norm": 0.58203125, "learning_rate": 4.341755319148936e-05, "loss": 1.0533, "step": 595 }, { "epoch": 0.13211598941520386, "grad_norm": 0.578125, "learning_rate": 4.340647163120567e-05, "loss": 1.1854, "step": 596 }, { "epoch": 0.13233766053838375, "grad_norm": 0.578125, "learning_rate": 4.339539007092199e-05, "loss": 1.0785, "step": 597 }, { "epoch": 0.1325593316615636, "grad_norm": 0.55859375, "learning_rate": 4.33843085106383e-05, "loss": 1.0836, "step": 598 }, { "epoch": 0.13278100278474347, "grad_norm": 0.5859375, "learning_rate": 4.3373226950354613e-05, "loss": 1.2054, "step": 599 }, { "epoch": 0.13300267390792336, "grad_norm": 0.6015625, "learning_rate": 4.336214539007092e-05, "loss": 1.1217, "step": 600 }, { "epoch": 0.13322434503110323, "grad_norm": 0.5703125, "learning_rate": 4.335106382978724e-05, "loss": 1.1636, "step": 601 }, { "epoch": 0.13344601615428311, "grad_norm": 0.546875, "learning_rate": 4.333998226950355e-05, "loss": 1.1236, "step": 602 }, { "epoch": 0.13366768727746298, "grad_norm": 0.6328125, "learning_rate": 4.332890070921986e-05, "loss": 1.1087, "step": 603 }, { "epoch": 0.13388935840064284, "grad_norm": 0.5546875, "learning_rate": 4.3317819148936174e-05, "loss": 1.149, "step": 604 }, { "epoch": 0.13411102952382273, "grad_norm": 0.55859375, "learning_rate": 4.330673758865248e-05, "loss": 1.1638, "step": 605 }, { "epoch": 0.1343327006470026, "grad_norm": 0.60546875, "learning_rate": 4.32956560283688e-05, "loss": 1.1395, "step": 606 }, { "epoch": 0.13455437177018245, "grad_norm": 0.6328125, "learning_rate": 4.328457446808511e-05, "loss": 1.124, "step": 607 }, { "epoch": 0.13477604289336234, "grad_norm": 0.55078125, "learning_rate": 4.3273492907801425e-05, "loss": 1.0788, "step": 608 }, { "epoch": 0.1349977140165422, "grad_norm": 0.71484375, "learning_rate": 4.3262411347517734e-05, "loss": 1.1119, "step": 609 }, { "epoch": 0.1352193851397221, "grad_norm": 0.70703125, "learning_rate": 4.3251329787234044e-05, "loss": 1.0843, "step": 610 }, { "epoch": 0.13544105626290195, "grad_norm": 0.6640625, "learning_rate": 4.324024822695036e-05, "loss": 1.1962, "step": 611 }, { "epoch": 0.13566272738608182, "grad_norm": 0.6328125, "learning_rate": 4.322916666666667e-05, "loss": 1.1139, "step": 612 }, { "epoch": 0.1358843985092617, "grad_norm": 0.61328125, "learning_rate": 4.3218085106382986e-05, "loss": 1.2608, "step": 613 }, { "epoch": 0.13610606963244157, "grad_norm": 0.5703125, "learning_rate": 4.3207003546099295e-05, "loss": 1.1481, "step": 614 }, { "epoch": 0.13632774075562143, "grad_norm": 0.5625, "learning_rate": 4.3195921985815604e-05, "loss": 1.1273, "step": 615 }, { "epoch": 0.13654941187880132, "grad_norm": 0.6171875, "learning_rate": 4.318484042553192e-05, "loss": 1.1791, "step": 616 }, { "epoch": 0.13677108300198118, "grad_norm": 0.59375, "learning_rate": 4.317375886524823e-05, "loss": 1.0876, "step": 617 }, { "epoch": 0.13699275412516107, "grad_norm": 0.55859375, "learning_rate": 4.316267730496454e-05, "loss": 1.1308, "step": 618 }, { "epoch": 0.13721442524834093, "grad_norm": 0.59765625, "learning_rate": 4.315159574468085e-05, "loss": 1.1021, "step": 619 }, { "epoch": 0.1374360963715208, "grad_norm": 0.59765625, "learning_rate": 4.3140514184397165e-05, "loss": 1.0917, "step": 620 }, { "epoch": 0.13765776749470068, "grad_norm": 0.59765625, "learning_rate": 4.3129432624113474e-05, "loss": 1.1821, "step": 621 }, { "epoch": 0.13787943861788055, "grad_norm": 0.609375, "learning_rate": 4.311835106382979e-05, "loss": 1.0716, "step": 622 }, { "epoch": 0.1381011097410604, "grad_norm": 0.58203125, "learning_rate": 4.31072695035461e-05, "loss": 1.0054, "step": 623 }, { "epoch": 0.1383227808642403, "grad_norm": 0.66015625, "learning_rate": 4.309618794326241e-05, "loss": 1.09, "step": 624 }, { "epoch": 0.13854445198742016, "grad_norm": 0.62890625, "learning_rate": 4.3085106382978725e-05, "loss": 1.0722, "step": 625 }, { "epoch": 0.13876612311060005, "grad_norm": 0.5546875, "learning_rate": 4.3074024822695035e-05, "loss": 1.0691, "step": 626 }, { "epoch": 0.1389877942337799, "grad_norm": 0.59375, "learning_rate": 4.306294326241135e-05, "loss": 1.1729, "step": 627 }, { "epoch": 0.13920946535695977, "grad_norm": 0.578125, "learning_rate": 4.305186170212766e-05, "loss": 1.1905, "step": 628 }, { "epoch": 0.13943113648013966, "grad_norm": 0.5703125, "learning_rate": 4.304078014184397e-05, "loss": 1.0868, "step": 629 }, { "epoch": 0.13965280760331952, "grad_norm": 0.58203125, "learning_rate": 4.3029698581560286e-05, "loss": 1.1288, "step": 630 }, { "epoch": 0.13987447872649938, "grad_norm": 0.5546875, "learning_rate": 4.3018617021276595e-05, "loss": 1.1323, "step": 631 }, { "epoch": 0.14009614984967927, "grad_norm": 0.5625, "learning_rate": 4.300753546099291e-05, "loss": 1.1331, "step": 632 }, { "epoch": 0.14031782097285914, "grad_norm": 0.5546875, "learning_rate": 4.299645390070922e-05, "loss": 1.1377, "step": 633 }, { "epoch": 0.14053949209603903, "grad_norm": 0.5625, "learning_rate": 4.298537234042554e-05, "loss": 1.101, "step": 634 }, { "epoch": 0.1407611632192189, "grad_norm": 0.59375, "learning_rate": 4.2974290780141846e-05, "loss": 1.218, "step": 635 }, { "epoch": 0.14098283434239875, "grad_norm": 0.55078125, "learning_rate": 4.2963209219858156e-05, "loss": 1.1177, "step": 636 }, { "epoch": 0.14120450546557864, "grad_norm": 0.5625, "learning_rate": 4.295212765957447e-05, "loss": 1.0796, "step": 637 }, { "epoch": 0.1414261765887585, "grad_norm": 0.56640625, "learning_rate": 4.294104609929078e-05, "loss": 1.1, "step": 638 }, { "epoch": 0.14164784771193836, "grad_norm": 0.58203125, "learning_rate": 4.29299645390071e-05, "loss": 1.1496, "step": 639 }, { "epoch": 0.14186951883511825, "grad_norm": 0.55078125, "learning_rate": 4.291888297872341e-05, "loss": 1.1581, "step": 640 }, { "epoch": 0.14209118995829811, "grad_norm": 0.5703125, "learning_rate": 4.2907801418439716e-05, "loss": 1.1138, "step": 641 }, { "epoch": 0.142312861081478, "grad_norm": 0.5703125, "learning_rate": 4.289671985815603e-05, "loss": 1.1154, "step": 642 }, { "epoch": 0.14253453220465787, "grad_norm": 0.58984375, "learning_rate": 4.288563829787234e-05, "loss": 1.1473, "step": 643 }, { "epoch": 0.14275620332783773, "grad_norm": 0.58984375, "learning_rate": 4.287455673758866e-05, "loss": 1.1447, "step": 644 }, { "epoch": 0.14297787445101762, "grad_norm": 0.56640625, "learning_rate": 4.286347517730497e-05, "loss": 1.1132, "step": 645 }, { "epoch": 0.14319954557419748, "grad_norm": 0.67578125, "learning_rate": 4.285239361702128e-05, "loss": 1.1097, "step": 646 }, { "epoch": 0.14342121669737734, "grad_norm": 0.5546875, "learning_rate": 4.284131205673759e-05, "loss": 1.1375, "step": 647 }, { "epoch": 0.14364288782055723, "grad_norm": 0.57421875, "learning_rate": 4.28302304964539e-05, "loss": 1.1122, "step": 648 }, { "epoch": 0.1438645589437371, "grad_norm": 0.63671875, "learning_rate": 4.281914893617022e-05, "loss": 1.2522, "step": 649 }, { "epoch": 0.14408623006691698, "grad_norm": 0.59765625, "learning_rate": 4.280806737588653e-05, "loss": 1.0806, "step": 650 }, { "epoch": 0.14430790119009684, "grad_norm": 0.5625, "learning_rate": 4.2796985815602844e-05, "loss": 1.0597, "step": 651 }, { "epoch": 0.1445295723132767, "grad_norm": 0.55078125, "learning_rate": 4.278590425531915e-05, "loss": 1.0353, "step": 652 }, { "epoch": 0.1447512434364566, "grad_norm": 0.5703125, "learning_rate": 4.277482269503546e-05, "loss": 1.1385, "step": 653 }, { "epoch": 0.14497291455963646, "grad_norm": 0.59375, "learning_rate": 4.276374113475177e-05, "loss": 1.1809, "step": 654 }, { "epoch": 0.14519458568281632, "grad_norm": 0.54296875, "learning_rate": 4.275265957446808e-05, "loss": 1.0719, "step": 655 }, { "epoch": 0.1454162568059962, "grad_norm": 0.58984375, "learning_rate": 4.27415780141844e-05, "loss": 1.2039, "step": 656 }, { "epoch": 0.14563792792917607, "grad_norm": 0.57421875, "learning_rate": 4.273049645390071e-05, "loss": 1.1732, "step": 657 }, { "epoch": 0.14585959905235596, "grad_norm": 0.5703125, "learning_rate": 4.271941489361702e-05, "loss": 1.1045, "step": 658 }, { "epoch": 0.14608127017553582, "grad_norm": 0.55078125, "learning_rate": 4.270833333333333e-05, "loss": 1.0836, "step": 659 }, { "epoch": 0.14630294129871568, "grad_norm": 0.55078125, "learning_rate": 4.269725177304965e-05, "loss": 1.0504, "step": 660 }, { "epoch": 0.14652461242189557, "grad_norm": 0.60546875, "learning_rate": 4.268617021276596e-05, "loss": 1.1062, "step": 661 }, { "epoch": 0.14674628354507543, "grad_norm": 0.59765625, "learning_rate": 4.267508865248227e-05, "loss": 1.1401, "step": 662 }, { "epoch": 0.1469679546682553, "grad_norm": 0.58984375, "learning_rate": 4.2664007092198584e-05, "loss": 1.0788, "step": 663 }, { "epoch": 0.14718962579143519, "grad_norm": 0.58203125, "learning_rate": 4.265292553191489e-05, "loss": 1.149, "step": 664 }, { "epoch": 0.14741129691461505, "grad_norm": 0.5546875, "learning_rate": 4.264184397163121e-05, "loss": 1.1308, "step": 665 }, { "epoch": 0.14763296803779494, "grad_norm": 0.5859375, "learning_rate": 4.263076241134752e-05, "loss": 1.1653, "step": 666 }, { "epoch": 0.1478546391609748, "grad_norm": 0.5859375, "learning_rate": 4.2619680851063835e-05, "loss": 1.1009, "step": 667 }, { "epoch": 0.14807631028415466, "grad_norm": 0.57421875, "learning_rate": 4.2608599290780144e-05, "loss": 1.1914, "step": 668 }, { "epoch": 0.14829798140733455, "grad_norm": 0.5546875, "learning_rate": 4.259751773049645e-05, "loss": 1.0183, "step": 669 }, { "epoch": 0.1485196525305144, "grad_norm": 0.58203125, "learning_rate": 4.258643617021277e-05, "loss": 1.1266, "step": 670 }, { "epoch": 0.14874132365369427, "grad_norm": 0.609375, "learning_rate": 4.257535460992908e-05, "loss": 1.0658, "step": 671 }, { "epoch": 0.14896299477687416, "grad_norm": 0.5390625, "learning_rate": 4.2564273049645395e-05, "loss": 1.1203, "step": 672 }, { "epoch": 0.14918466590005403, "grad_norm": 0.55078125, "learning_rate": 4.2553191489361704e-05, "loss": 1.0395, "step": 673 }, { "epoch": 0.14940633702323391, "grad_norm": 0.59765625, "learning_rate": 4.2542109929078014e-05, "loss": 1.1655, "step": 674 }, { "epoch": 0.14962800814641378, "grad_norm": 0.55078125, "learning_rate": 4.253102836879433e-05, "loss": 1.0787, "step": 675 }, { "epoch": 0.14984967926959364, "grad_norm": 0.546875, "learning_rate": 4.251994680851064e-05, "loss": 1.1816, "step": 676 }, { "epoch": 0.15007135039277353, "grad_norm": 0.57421875, "learning_rate": 4.2508865248226956e-05, "loss": 1.1623, "step": 677 }, { "epoch": 0.1502930215159534, "grad_norm": 0.62890625, "learning_rate": 4.2497783687943265e-05, "loss": 1.0942, "step": 678 }, { "epoch": 0.15051469263913328, "grad_norm": 0.61328125, "learning_rate": 4.248670212765958e-05, "loss": 1.2645, "step": 679 }, { "epoch": 0.15073636376231314, "grad_norm": 0.5703125, "learning_rate": 4.247562056737589e-05, "loss": 1.1063, "step": 680 }, { "epoch": 0.150958034885493, "grad_norm": 0.5703125, "learning_rate": 4.24645390070922e-05, "loss": 1.0815, "step": 681 }, { "epoch": 0.1511797060086729, "grad_norm": 0.57421875, "learning_rate": 4.2453457446808516e-05, "loss": 1.1624, "step": 682 }, { "epoch": 0.15140137713185275, "grad_norm": 0.56640625, "learning_rate": 4.2442375886524825e-05, "loss": 1.1466, "step": 683 }, { "epoch": 0.15162304825503262, "grad_norm": 0.59765625, "learning_rate": 4.243129432624114e-05, "loss": 1.0659, "step": 684 }, { "epoch": 0.1518447193782125, "grad_norm": 0.609375, "learning_rate": 4.242021276595745e-05, "loss": 1.0483, "step": 685 }, { "epoch": 0.15206639050139237, "grad_norm": 0.52734375, "learning_rate": 4.240913120567376e-05, "loss": 1.0702, "step": 686 }, { "epoch": 0.15228806162457226, "grad_norm": 0.55859375, "learning_rate": 4.2398049645390076e-05, "loss": 1.0716, "step": 687 }, { "epoch": 0.15250973274775212, "grad_norm": 0.59765625, "learning_rate": 4.2386968085106386e-05, "loss": 1.0993, "step": 688 }, { "epoch": 0.15273140387093198, "grad_norm": 0.56640625, "learning_rate": 4.2375886524822695e-05, "loss": 1.1247, "step": 689 }, { "epoch": 0.15295307499411187, "grad_norm": 0.56640625, "learning_rate": 4.2364804964539005e-05, "loss": 1.1853, "step": 690 }, { "epoch": 0.15317474611729173, "grad_norm": 0.53125, "learning_rate": 4.235372340425532e-05, "loss": 1.0719, "step": 691 }, { "epoch": 0.1533964172404716, "grad_norm": 0.546875, "learning_rate": 4.234264184397163e-05, "loss": 1.0023, "step": 692 }, { "epoch": 0.15361808836365148, "grad_norm": 0.57421875, "learning_rate": 4.2331560283687946e-05, "loss": 1.166, "step": 693 }, { "epoch": 0.15383975948683135, "grad_norm": 0.61328125, "learning_rate": 4.2320478723404256e-05, "loss": 1.1636, "step": 694 }, { "epoch": 0.15406143061001124, "grad_norm": 0.5546875, "learning_rate": 4.2309397163120565e-05, "loss": 1.1256, "step": 695 }, { "epoch": 0.1542831017331911, "grad_norm": 0.57421875, "learning_rate": 4.229831560283688e-05, "loss": 1.1631, "step": 696 }, { "epoch": 0.15450477285637096, "grad_norm": 0.55859375, "learning_rate": 4.228723404255319e-05, "loss": 1.1216, "step": 697 }, { "epoch": 0.15472644397955085, "grad_norm": 0.61328125, "learning_rate": 4.227615248226951e-05, "loss": 1.1284, "step": 698 }, { "epoch": 0.1549481151027307, "grad_norm": 0.5546875, "learning_rate": 4.2265070921985816e-05, "loss": 1.0667, "step": 699 }, { "epoch": 0.15516978622591057, "grad_norm": 0.57421875, "learning_rate": 4.2253989361702126e-05, "loss": 1.1627, "step": 700 }, { "epoch": 0.15539145734909046, "grad_norm": 0.625, "learning_rate": 4.224290780141844e-05, "loss": 1.207, "step": 701 }, { "epoch": 0.15561312847227032, "grad_norm": 0.60546875, "learning_rate": 4.223182624113475e-05, "loss": 1.2099, "step": 702 }, { "epoch": 0.1558347995954502, "grad_norm": 0.60546875, "learning_rate": 4.222074468085107e-05, "loss": 1.1193, "step": 703 }, { "epoch": 0.15605647071863007, "grad_norm": 0.55859375, "learning_rate": 4.220966312056738e-05, "loss": 1.0355, "step": 704 }, { "epoch": 0.15627814184180994, "grad_norm": 0.65625, "learning_rate": 4.219858156028369e-05, "loss": 1.2025, "step": 705 }, { "epoch": 0.15649981296498983, "grad_norm": 0.56640625, "learning_rate": 4.21875e-05, "loss": 1.1003, "step": 706 }, { "epoch": 0.1567214840881697, "grad_norm": 0.5703125, "learning_rate": 4.217641843971631e-05, "loss": 1.1118, "step": 707 }, { "epoch": 0.15694315521134955, "grad_norm": 0.69140625, "learning_rate": 4.216533687943263e-05, "loss": 1.1456, "step": 708 }, { "epoch": 0.15716482633452944, "grad_norm": 0.59765625, "learning_rate": 4.215425531914894e-05, "loss": 1.1172, "step": 709 }, { "epoch": 0.1573864974577093, "grad_norm": 0.57421875, "learning_rate": 4.214317375886525e-05, "loss": 1.0819, "step": 710 }, { "epoch": 0.1576081685808892, "grad_norm": 0.6640625, "learning_rate": 4.213209219858156e-05, "loss": 1.1855, "step": 711 }, { "epoch": 0.15782983970406905, "grad_norm": 0.59765625, "learning_rate": 4.212101063829787e-05, "loss": 1.1358, "step": 712 }, { "epoch": 0.15805151082724891, "grad_norm": 0.55859375, "learning_rate": 4.210992907801419e-05, "loss": 1.0407, "step": 713 }, { "epoch": 0.1582731819504288, "grad_norm": 0.671875, "learning_rate": 4.20988475177305e-05, "loss": 1.0603, "step": 714 }, { "epoch": 0.15849485307360867, "grad_norm": 0.578125, "learning_rate": 4.2087765957446814e-05, "loss": 1.0826, "step": 715 }, { "epoch": 0.15871652419678853, "grad_norm": 0.55859375, "learning_rate": 4.207668439716312e-05, "loss": 1.1281, "step": 716 }, { "epoch": 0.15893819531996842, "grad_norm": 0.5859375, "learning_rate": 4.206560283687944e-05, "loss": 1.1546, "step": 717 }, { "epoch": 0.15915986644314828, "grad_norm": 0.6875, "learning_rate": 4.205452127659575e-05, "loss": 1.0915, "step": 718 }, { "epoch": 0.15938153756632817, "grad_norm": 0.5625, "learning_rate": 4.204343971631206e-05, "loss": 1.0548, "step": 719 }, { "epoch": 0.15960320868950803, "grad_norm": 0.56640625, "learning_rate": 4.2032358156028374e-05, "loss": 1.113, "step": 720 }, { "epoch": 0.1598248798126879, "grad_norm": 0.625, "learning_rate": 4.2021276595744684e-05, "loss": 0.9687, "step": 721 }, { "epoch": 0.16004655093586778, "grad_norm": 0.62890625, "learning_rate": 4.2010195035461e-05, "loss": 1.1048, "step": 722 }, { "epoch": 0.16026822205904764, "grad_norm": 0.58203125, "learning_rate": 4.199911347517731e-05, "loss": 1.1723, "step": 723 }, { "epoch": 0.1604898931822275, "grad_norm": 0.6015625, "learning_rate": 4.198803191489362e-05, "loss": 1.1319, "step": 724 }, { "epoch": 0.1607115643054074, "grad_norm": 0.546875, "learning_rate": 4.197695035460993e-05, "loss": 1.0508, "step": 725 }, { "epoch": 0.16093323542858726, "grad_norm": 0.578125, "learning_rate": 4.196586879432624e-05, "loss": 1.1642, "step": 726 }, { "epoch": 0.16115490655176715, "grad_norm": 0.57421875, "learning_rate": 4.1954787234042554e-05, "loss": 1.1295, "step": 727 }, { "epoch": 0.161376577674947, "grad_norm": 0.56640625, "learning_rate": 4.194370567375886e-05, "loss": 1.1529, "step": 728 }, { "epoch": 0.16159824879812687, "grad_norm": 0.546875, "learning_rate": 4.193262411347518e-05, "loss": 1.0953, "step": 729 }, { "epoch": 0.16181991992130676, "grad_norm": 0.5703125, "learning_rate": 4.192154255319149e-05, "loss": 1.0837, "step": 730 }, { "epoch": 0.16204159104448662, "grad_norm": 0.5859375, "learning_rate": 4.1910460992907805e-05, "loss": 1.1024, "step": 731 }, { "epoch": 0.16226326216766648, "grad_norm": 0.60546875, "learning_rate": 4.1899379432624114e-05, "loss": 1.2048, "step": 732 }, { "epoch": 0.16248493329084637, "grad_norm": 0.5390625, "learning_rate": 4.188829787234042e-05, "loss": 1.071, "step": 733 }, { "epoch": 0.16270660441402623, "grad_norm": 0.55859375, "learning_rate": 4.187721631205674e-05, "loss": 1.1015, "step": 734 }, { "epoch": 0.16292827553720612, "grad_norm": 0.5625, "learning_rate": 4.186613475177305e-05, "loss": 1.1471, "step": 735 }, { "epoch": 0.16314994666038599, "grad_norm": 0.58203125, "learning_rate": 4.1855053191489365e-05, "loss": 1.095, "step": 736 }, { "epoch": 0.16337161778356585, "grad_norm": 0.56640625, "learning_rate": 4.1843971631205674e-05, "loss": 1.1136, "step": 737 }, { "epoch": 0.16359328890674574, "grad_norm": 0.53515625, "learning_rate": 4.183289007092199e-05, "loss": 1.1099, "step": 738 }, { "epoch": 0.1638149600299256, "grad_norm": 0.578125, "learning_rate": 4.18218085106383e-05, "loss": 1.1404, "step": 739 }, { "epoch": 0.16403663115310546, "grad_norm": 0.578125, "learning_rate": 4.181072695035461e-05, "loss": 1.1531, "step": 740 }, { "epoch": 0.16425830227628535, "grad_norm": 0.546875, "learning_rate": 4.1799645390070926e-05, "loss": 1.0379, "step": 741 }, { "epoch": 0.1644799733994652, "grad_norm": 0.578125, "learning_rate": 4.1788563829787235e-05, "loss": 1.1378, "step": 742 }, { "epoch": 0.1647016445226451, "grad_norm": 0.53515625, "learning_rate": 4.177748226950355e-05, "loss": 1.0489, "step": 743 }, { "epoch": 0.16492331564582496, "grad_norm": 0.5625, "learning_rate": 4.176640070921986e-05, "loss": 1.1667, "step": 744 }, { "epoch": 0.16514498676900483, "grad_norm": 0.578125, "learning_rate": 4.175531914893617e-05, "loss": 1.1528, "step": 745 }, { "epoch": 0.16536665789218472, "grad_norm": 0.56640625, "learning_rate": 4.1744237588652486e-05, "loss": 1.0581, "step": 746 }, { "epoch": 0.16558832901536458, "grad_norm": 0.5546875, "learning_rate": 4.1733156028368795e-05, "loss": 1.1598, "step": 747 }, { "epoch": 0.16581000013854444, "grad_norm": 0.5625, "learning_rate": 4.172207446808511e-05, "loss": 1.0955, "step": 748 }, { "epoch": 0.16603167126172433, "grad_norm": 0.55078125, "learning_rate": 4.171099290780142e-05, "loss": 1.1391, "step": 749 }, { "epoch": 0.1662533423849042, "grad_norm": 0.5703125, "learning_rate": 4.169991134751774e-05, "loss": 1.09, "step": 750 }, { "epoch": 0.16647501350808408, "grad_norm": 0.546875, "learning_rate": 4.1688829787234047e-05, "loss": 1.1716, "step": 751 }, { "epoch": 0.16669668463126394, "grad_norm": 0.56640625, "learning_rate": 4.1677748226950356e-05, "loss": 1.1607, "step": 752 }, { "epoch": 0.1669183557544438, "grad_norm": 0.56640625, "learning_rate": 4.166666666666667e-05, "loss": 1.0705, "step": 753 }, { "epoch": 0.1671400268776237, "grad_norm": 0.57421875, "learning_rate": 4.165558510638298e-05, "loss": 1.0974, "step": 754 }, { "epoch": 0.16736169800080355, "grad_norm": 0.56640625, "learning_rate": 4.16445035460993e-05, "loss": 1.04, "step": 755 }, { "epoch": 0.16758336912398342, "grad_norm": 0.62109375, "learning_rate": 4.163342198581561e-05, "loss": 1.1617, "step": 756 }, { "epoch": 0.1678050402471633, "grad_norm": 0.62890625, "learning_rate": 4.1622340425531916e-05, "loss": 1.0709, "step": 757 }, { "epoch": 0.16802671137034317, "grad_norm": 0.61328125, "learning_rate": 4.161125886524823e-05, "loss": 1.1535, "step": 758 }, { "epoch": 0.16824838249352306, "grad_norm": 0.60546875, "learning_rate": 4.1600177304964535e-05, "loss": 1.1539, "step": 759 }, { "epoch": 0.16847005361670292, "grad_norm": 0.5859375, "learning_rate": 4.158909574468085e-05, "loss": 1.0717, "step": 760 }, { "epoch": 0.16869172473988278, "grad_norm": 0.56640625, "learning_rate": 4.157801418439716e-05, "loss": 1.1315, "step": 761 }, { "epoch": 0.16891339586306267, "grad_norm": 0.55859375, "learning_rate": 4.156693262411348e-05, "loss": 1.0891, "step": 762 }, { "epoch": 0.16913506698624253, "grad_norm": 0.546875, "learning_rate": 4.1555851063829786e-05, "loss": 1.0439, "step": 763 }, { "epoch": 0.1693567381094224, "grad_norm": 0.58984375, "learning_rate": 4.15447695035461e-05, "loss": 1.1215, "step": 764 }, { "epoch": 0.16957840923260228, "grad_norm": 0.5703125, "learning_rate": 4.153368794326241e-05, "loss": 1.1352, "step": 765 }, { "epoch": 0.16980008035578215, "grad_norm": 0.546875, "learning_rate": 4.152260638297872e-05, "loss": 1.1519, "step": 766 }, { "epoch": 0.17002175147896204, "grad_norm": 0.54296875, "learning_rate": 4.151152482269504e-05, "loss": 1.1452, "step": 767 }, { "epoch": 0.1702434226021419, "grad_norm": 0.60546875, "learning_rate": 4.150044326241135e-05, "loss": 1.1287, "step": 768 }, { "epoch": 0.17046509372532176, "grad_norm": 0.5859375, "learning_rate": 4.148936170212766e-05, "loss": 1.1141, "step": 769 }, { "epoch": 0.17068676484850165, "grad_norm": 0.5625, "learning_rate": 4.147828014184397e-05, "loss": 1.0781, "step": 770 }, { "epoch": 0.1709084359716815, "grad_norm": 0.55078125, "learning_rate": 4.146719858156028e-05, "loss": 1.1908, "step": 771 }, { "epoch": 0.1711301070948614, "grad_norm": 0.5625, "learning_rate": 4.14561170212766e-05, "loss": 1.1105, "step": 772 }, { "epoch": 0.17135177821804126, "grad_norm": 0.53125, "learning_rate": 4.144503546099291e-05, "loss": 1.0326, "step": 773 }, { "epoch": 0.17157344934122112, "grad_norm": 0.58203125, "learning_rate": 4.143395390070922e-05, "loss": 1.0847, "step": 774 }, { "epoch": 0.171795120464401, "grad_norm": 0.55859375, "learning_rate": 4.142287234042553e-05, "loss": 1.0905, "step": 775 }, { "epoch": 0.17201679158758088, "grad_norm": 0.609375, "learning_rate": 4.141179078014185e-05, "loss": 1.092, "step": 776 }, { "epoch": 0.17223846271076074, "grad_norm": 0.5546875, "learning_rate": 4.140070921985816e-05, "loss": 1.158, "step": 777 }, { "epoch": 0.17246013383394063, "grad_norm": 0.55078125, "learning_rate": 4.138962765957447e-05, "loss": 1.1017, "step": 778 }, { "epoch": 0.1726818049571205, "grad_norm": 0.59765625, "learning_rate": 4.1378546099290784e-05, "loss": 1.096, "step": 779 }, { "epoch": 0.17290347608030038, "grad_norm": 0.57421875, "learning_rate": 4.136746453900709e-05, "loss": 1.1275, "step": 780 }, { "epoch": 0.17312514720348024, "grad_norm": 0.60546875, "learning_rate": 4.135638297872341e-05, "loss": 1.2182, "step": 781 }, { "epoch": 0.1733468183266601, "grad_norm": 0.5703125, "learning_rate": 4.134530141843972e-05, "loss": 1.0946, "step": 782 }, { "epoch": 0.17356848944984, "grad_norm": 0.578125, "learning_rate": 4.133421985815603e-05, "loss": 1.2072, "step": 783 }, { "epoch": 0.17379016057301985, "grad_norm": 0.55078125, "learning_rate": 4.1323138297872344e-05, "loss": 1.1059, "step": 784 }, { "epoch": 0.17401183169619971, "grad_norm": 0.55078125, "learning_rate": 4.1312056737588654e-05, "loss": 1.1039, "step": 785 }, { "epoch": 0.1742335028193796, "grad_norm": 0.63671875, "learning_rate": 4.130097517730497e-05, "loss": 1.1967, "step": 786 }, { "epoch": 0.17445517394255947, "grad_norm": 0.5703125, "learning_rate": 4.128989361702128e-05, "loss": 1.0762, "step": 787 }, { "epoch": 0.17467684506573936, "grad_norm": 0.59375, "learning_rate": 4.1278812056737595e-05, "loss": 1.1092, "step": 788 }, { "epoch": 0.17489851618891922, "grad_norm": 0.57421875, "learning_rate": 4.1267730496453905e-05, "loss": 1.1121, "step": 789 }, { "epoch": 0.17512018731209908, "grad_norm": 0.6171875, "learning_rate": 4.1256648936170214e-05, "loss": 1.115, "step": 790 }, { "epoch": 0.17534185843527897, "grad_norm": 0.5546875, "learning_rate": 4.124556737588653e-05, "loss": 1.1062, "step": 791 }, { "epoch": 0.17556352955845883, "grad_norm": 0.55859375, "learning_rate": 4.123448581560284e-05, "loss": 1.1642, "step": 792 }, { "epoch": 0.1757852006816387, "grad_norm": 0.58203125, "learning_rate": 4.1223404255319156e-05, "loss": 1.1158, "step": 793 }, { "epoch": 0.17600687180481858, "grad_norm": 0.55078125, "learning_rate": 4.1212322695035465e-05, "loss": 1.1088, "step": 794 }, { "epoch": 0.17622854292799844, "grad_norm": 0.56640625, "learning_rate": 4.1201241134751775e-05, "loss": 0.9708, "step": 795 }, { "epoch": 0.17645021405117833, "grad_norm": 0.60546875, "learning_rate": 4.1190159574468084e-05, "loss": 1.1183, "step": 796 }, { "epoch": 0.1766718851743582, "grad_norm": 0.5625, "learning_rate": 4.11790780141844e-05, "loss": 1.1391, "step": 797 }, { "epoch": 0.17689355629753806, "grad_norm": 0.56640625, "learning_rate": 4.116799645390071e-05, "loss": 1.0771, "step": 798 }, { "epoch": 0.17711522742071795, "grad_norm": 0.5859375, "learning_rate": 4.115691489361702e-05, "loss": 1.1332, "step": 799 }, { "epoch": 0.1773368985438978, "grad_norm": 0.5625, "learning_rate": 4.1145833333333335e-05, "loss": 1.0507, "step": 800 }, { "epoch": 0.17755856966707767, "grad_norm": 0.59375, "learning_rate": 4.1134751773049644e-05, "loss": 1.0821, "step": 801 }, { "epoch": 0.17778024079025756, "grad_norm": 0.55859375, "learning_rate": 4.112367021276596e-05, "loss": 1.0921, "step": 802 }, { "epoch": 0.17800191191343742, "grad_norm": 0.55859375, "learning_rate": 4.111258865248227e-05, "loss": 1.0537, "step": 803 }, { "epoch": 0.1782235830366173, "grad_norm": 0.58203125, "learning_rate": 4.110150709219858e-05, "loss": 1.1826, "step": 804 }, { "epoch": 0.17844525415979717, "grad_norm": 0.58203125, "learning_rate": 4.1090425531914896e-05, "loss": 1.1159, "step": 805 }, { "epoch": 0.17866692528297703, "grad_norm": 0.55859375, "learning_rate": 4.1079343971631205e-05, "loss": 1.0787, "step": 806 }, { "epoch": 0.17888859640615692, "grad_norm": 0.5390625, "learning_rate": 4.106826241134752e-05, "loss": 1.0752, "step": 807 }, { "epoch": 0.1791102675293368, "grad_norm": 0.546875, "learning_rate": 4.105718085106383e-05, "loss": 1.081, "step": 808 }, { "epoch": 0.17933193865251665, "grad_norm": 0.58203125, "learning_rate": 4.104609929078015e-05, "loss": 1.1961, "step": 809 }, { "epoch": 0.17955360977569654, "grad_norm": 0.55859375, "learning_rate": 4.1035017730496456e-05, "loss": 1.1239, "step": 810 }, { "epoch": 0.1797752808988764, "grad_norm": 0.59375, "learning_rate": 4.1023936170212765e-05, "loss": 1.1824, "step": 811 }, { "epoch": 0.1799969520220563, "grad_norm": 0.546875, "learning_rate": 4.101285460992908e-05, "loss": 1.008, "step": 812 }, { "epoch": 0.18021862314523615, "grad_norm": 0.5859375, "learning_rate": 4.100177304964539e-05, "loss": 1.1917, "step": 813 }, { "epoch": 0.180440294268416, "grad_norm": 0.55859375, "learning_rate": 4.099069148936171e-05, "loss": 1.0886, "step": 814 }, { "epoch": 0.1806619653915959, "grad_norm": 0.59765625, "learning_rate": 4.0979609929078017e-05, "loss": 1.1011, "step": 815 }, { "epoch": 0.18088363651477576, "grad_norm": 0.55078125, "learning_rate": 4.0968528368794326e-05, "loss": 1.1442, "step": 816 }, { "epoch": 0.18110530763795563, "grad_norm": 0.578125, "learning_rate": 4.095744680851064e-05, "loss": 1.1048, "step": 817 }, { "epoch": 0.18132697876113552, "grad_norm": 0.640625, "learning_rate": 4.094636524822695e-05, "loss": 1.0257, "step": 818 }, { "epoch": 0.18154864988431538, "grad_norm": 0.53125, "learning_rate": 4.093528368794327e-05, "loss": 1.0855, "step": 819 }, { "epoch": 0.18177032100749527, "grad_norm": 0.5546875, "learning_rate": 4.092420212765958e-05, "loss": 1.0904, "step": 820 }, { "epoch": 0.18199199213067513, "grad_norm": 0.546875, "learning_rate": 4.091312056737589e-05, "loss": 1.1451, "step": 821 }, { "epoch": 0.182213663253855, "grad_norm": 0.56640625, "learning_rate": 4.09020390070922e-05, "loss": 1.0625, "step": 822 }, { "epoch": 0.18243533437703488, "grad_norm": 0.609375, "learning_rate": 4.089095744680851e-05, "loss": 1.0747, "step": 823 }, { "epoch": 0.18265700550021474, "grad_norm": 0.64453125, "learning_rate": 4.087987588652483e-05, "loss": 1.1276, "step": 824 }, { "epoch": 0.1828786766233946, "grad_norm": 0.58203125, "learning_rate": 4.086879432624114e-05, "loss": 1.1015, "step": 825 }, { "epoch": 0.1831003477465745, "grad_norm": 0.58984375, "learning_rate": 4.0857712765957454e-05, "loss": 1.093, "step": 826 }, { "epoch": 0.18332201886975436, "grad_norm": 0.62109375, "learning_rate": 4.084663120567376e-05, "loss": 1.1077, "step": 827 }, { "epoch": 0.18354368999293424, "grad_norm": 0.6015625, "learning_rate": 4.083554964539007e-05, "loss": 1.1412, "step": 828 }, { "epoch": 0.1837653611161141, "grad_norm": 0.578125, "learning_rate": 4.082446808510639e-05, "loss": 1.1167, "step": 829 }, { "epoch": 0.18398703223929397, "grad_norm": 0.5703125, "learning_rate": 4.081338652482269e-05, "loss": 1.049, "step": 830 }, { "epoch": 0.18420870336247386, "grad_norm": 0.56640625, "learning_rate": 4.080230496453901e-05, "loss": 1.0411, "step": 831 }, { "epoch": 0.18443037448565372, "grad_norm": 0.55859375, "learning_rate": 4.079122340425532e-05, "loss": 1.1635, "step": 832 }, { "epoch": 0.18465204560883358, "grad_norm": 0.59375, "learning_rate": 4.078014184397163e-05, "loss": 1.0864, "step": 833 }, { "epoch": 0.18487371673201347, "grad_norm": 0.57421875, "learning_rate": 4.076906028368794e-05, "loss": 1.1863, "step": 834 }, { "epoch": 0.18509538785519333, "grad_norm": 0.5859375, "learning_rate": 4.075797872340426e-05, "loss": 1.141, "step": 835 }, { "epoch": 0.18531705897837322, "grad_norm": 0.58984375, "learning_rate": 4.074689716312057e-05, "loss": 1.1105, "step": 836 }, { "epoch": 0.18553873010155308, "grad_norm": 0.56640625, "learning_rate": 4.073581560283688e-05, "loss": 1.1292, "step": 837 }, { "epoch": 0.18576040122473295, "grad_norm": 0.54296875, "learning_rate": 4.072473404255319e-05, "loss": 1.1166, "step": 838 }, { "epoch": 0.18598207234791284, "grad_norm": 0.640625, "learning_rate": 4.07136524822695e-05, "loss": 1.1105, "step": 839 }, { "epoch": 0.1862037434710927, "grad_norm": 0.59765625, "learning_rate": 4.070257092198582e-05, "loss": 1.0785, "step": 840 }, { "epoch": 0.18642541459427256, "grad_norm": 0.56640625, "learning_rate": 4.069148936170213e-05, "loss": 1.0879, "step": 841 }, { "epoch": 0.18664708571745245, "grad_norm": 0.546875, "learning_rate": 4.068040780141844e-05, "loss": 1.1344, "step": 842 }, { "epoch": 0.1868687568406323, "grad_norm": 0.58203125, "learning_rate": 4.0669326241134754e-05, "loss": 1.0606, "step": 843 }, { "epoch": 0.1870904279638122, "grad_norm": 0.57421875, "learning_rate": 4.065824468085106e-05, "loss": 1.1272, "step": 844 }, { "epoch": 0.18731209908699206, "grad_norm": 0.54296875, "learning_rate": 4.064716312056738e-05, "loss": 1.1059, "step": 845 }, { "epoch": 0.18753377021017192, "grad_norm": 0.5859375, "learning_rate": 4.063608156028369e-05, "loss": 1.1818, "step": 846 }, { "epoch": 0.1877554413333518, "grad_norm": 0.6640625, "learning_rate": 4.0625000000000005e-05, "loss": 1.1786, "step": 847 }, { "epoch": 0.18797711245653168, "grad_norm": 0.57421875, "learning_rate": 4.0613918439716314e-05, "loss": 1.1371, "step": 848 }, { "epoch": 0.18819878357971154, "grad_norm": 0.55078125, "learning_rate": 4.0602836879432624e-05, "loss": 1.0657, "step": 849 }, { "epoch": 0.18842045470289143, "grad_norm": 0.57421875, "learning_rate": 4.059175531914894e-05, "loss": 1.1525, "step": 850 }, { "epoch": 0.1886421258260713, "grad_norm": 0.55859375, "learning_rate": 4.058067375886525e-05, "loss": 1.1276, "step": 851 }, { "epoch": 0.18886379694925118, "grad_norm": 0.64453125, "learning_rate": 4.0569592198581565e-05, "loss": 1.1817, "step": 852 }, { "epoch": 0.18908546807243104, "grad_norm": 0.5703125, "learning_rate": 4.0558510638297875e-05, "loss": 1.0705, "step": 853 }, { "epoch": 0.1893071391956109, "grad_norm": 0.59765625, "learning_rate": 4.0547429078014184e-05, "loss": 1.067, "step": 854 }, { "epoch": 0.1895288103187908, "grad_norm": 0.55078125, "learning_rate": 4.05363475177305e-05, "loss": 1.0836, "step": 855 }, { "epoch": 0.18975048144197065, "grad_norm": 0.5390625, "learning_rate": 4.052526595744681e-05, "loss": 1.0062, "step": 856 }, { "epoch": 0.18997215256515051, "grad_norm": 0.58984375, "learning_rate": 4.0514184397163126e-05, "loss": 1.1627, "step": 857 }, { "epoch": 0.1901938236883304, "grad_norm": 0.578125, "learning_rate": 4.0503102836879435e-05, "loss": 1.1994, "step": 858 }, { "epoch": 0.19041549481151027, "grad_norm": 0.546875, "learning_rate": 4.049202127659575e-05, "loss": 1.1146, "step": 859 }, { "epoch": 0.19063716593469016, "grad_norm": 0.5625, "learning_rate": 4.048093971631206e-05, "loss": 1.1191, "step": 860 }, { "epoch": 0.19085883705787002, "grad_norm": 0.58984375, "learning_rate": 4.046985815602837e-05, "loss": 1.1345, "step": 861 }, { "epoch": 0.19108050818104988, "grad_norm": 0.59765625, "learning_rate": 4.0458776595744686e-05, "loss": 1.1536, "step": 862 }, { "epoch": 0.19130217930422977, "grad_norm": 0.55078125, "learning_rate": 4.0447695035460996e-05, "loss": 1.0818, "step": 863 }, { "epoch": 0.19152385042740963, "grad_norm": 0.546875, "learning_rate": 4.043661347517731e-05, "loss": 1.1253, "step": 864 }, { "epoch": 0.19174552155058952, "grad_norm": 0.55859375, "learning_rate": 4.0425531914893614e-05, "loss": 1.056, "step": 865 }, { "epoch": 0.19196719267376938, "grad_norm": 0.55859375, "learning_rate": 4.041445035460993e-05, "loss": 1.0821, "step": 866 }, { "epoch": 0.19218886379694924, "grad_norm": 0.59375, "learning_rate": 4.040336879432624e-05, "loss": 1.0917, "step": 867 }, { "epoch": 0.19241053492012913, "grad_norm": 0.5390625, "learning_rate": 4.0392287234042556e-05, "loss": 1.1953, "step": 868 }, { "epoch": 0.192632206043309, "grad_norm": 0.55078125, "learning_rate": 4.0381205673758866e-05, "loss": 1.0714, "step": 869 }, { "epoch": 0.19285387716648886, "grad_norm": 0.54296875, "learning_rate": 4.0370124113475175e-05, "loss": 1.1476, "step": 870 }, { "epoch": 0.19307554828966875, "grad_norm": 0.546875, "learning_rate": 4.035904255319149e-05, "loss": 1.0958, "step": 871 }, { "epoch": 0.1932972194128486, "grad_norm": 0.55078125, "learning_rate": 4.03479609929078e-05, "loss": 1.0981, "step": 872 }, { "epoch": 0.1935188905360285, "grad_norm": 0.5859375, "learning_rate": 4.033687943262412e-05, "loss": 1.134, "step": 873 }, { "epoch": 0.19374056165920836, "grad_norm": 0.59765625, "learning_rate": 4.0325797872340426e-05, "loss": 1.0197, "step": 874 }, { "epoch": 0.19396223278238822, "grad_norm": 0.5625, "learning_rate": 4.0314716312056735e-05, "loss": 1.1016, "step": 875 }, { "epoch": 0.1941839039055681, "grad_norm": 0.58203125, "learning_rate": 4.030363475177305e-05, "loss": 1.1149, "step": 876 }, { "epoch": 0.19440557502874797, "grad_norm": 0.59765625, "learning_rate": 4.029255319148936e-05, "loss": 1.1285, "step": 877 }, { "epoch": 0.19462724615192784, "grad_norm": 0.59765625, "learning_rate": 4.028147163120568e-05, "loss": 1.0918, "step": 878 }, { "epoch": 0.19484891727510772, "grad_norm": 0.55859375, "learning_rate": 4.0270390070921987e-05, "loss": 1.0616, "step": 879 }, { "epoch": 0.1950705883982876, "grad_norm": 0.6171875, "learning_rate": 4.02593085106383e-05, "loss": 1.1652, "step": 880 }, { "epoch": 0.19529225952146748, "grad_norm": 0.6015625, "learning_rate": 4.024822695035461e-05, "loss": 1.1721, "step": 881 }, { "epoch": 0.19551393064464734, "grad_norm": 0.546875, "learning_rate": 4.023714539007092e-05, "loss": 1.0397, "step": 882 }, { "epoch": 0.1957356017678272, "grad_norm": 0.5625, "learning_rate": 4.022606382978724e-05, "loss": 1.0066, "step": 883 }, { "epoch": 0.1959572728910071, "grad_norm": 0.53515625, "learning_rate": 4.021498226950355e-05, "loss": 1.111, "step": 884 }, { "epoch": 0.19617894401418695, "grad_norm": 0.5546875, "learning_rate": 4.020390070921986e-05, "loss": 1.1013, "step": 885 }, { "epoch": 0.1964006151373668, "grad_norm": 0.53125, "learning_rate": 4.019281914893617e-05, "loss": 1.0366, "step": 886 }, { "epoch": 0.1966222862605467, "grad_norm": 0.57421875, "learning_rate": 4.018173758865248e-05, "loss": 1.147, "step": 887 }, { "epoch": 0.19684395738372656, "grad_norm": 0.57421875, "learning_rate": 4.01706560283688e-05, "loss": 1.1365, "step": 888 }, { "epoch": 0.19706562850690645, "grad_norm": 0.55078125, "learning_rate": 4.015957446808511e-05, "loss": 1.1375, "step": 889 }, { "epoch": 0.19728729963008632, "grad_norm": 0.5625, "learning_rate": 4.0148492907801424e-05, "loss": 1.1198, "step": 890 }, { "epoch": 0.19750897075326618, "grad_norm": 0.57421875, "learning_rate": 4.013741134751773e-05, "loss": 1.0935, "step": 891 }, { "epoch": 0.19773064187644607, "grad_norm": 0.57421875, "learning_rate": 4.012632978723405e-05, "loss": 1.1221, "step": 892 }, { "epoch": 0.19795231299962593, "grad_norm": 0.55859375, "learning_rate": 4.011524822695036e-05, "loss": 1.0277, "step": 893 }, { "epoch": 0.1981739841228058, "grad_norm": 0.59765625, "learning_rate": 4.010416666666667e-05, "loss": 1.1503, "step": 894 }, { "epoch": 0.19839565524598568, "grad_norm": 0.5703125, "learning_rate": 4.0093085106382984e-05, "loss": 1.1028, "step": 895 }, { "epoch": 0.19861732636916554, "grad_norm": 0.61328125, "learning_rate": 4.0082003546099293e-05, "loss": 1.1125, "step": 896 }, { "epoch": 0.19883899749234543, "grad_norm": 0.58203125, "learning_rate": 4.007092198581561e-05, "loss": 1.0871, "step": 897 }, { "epoch": 0.1990606686155253, "grad_norm": 0.55859375, "learning_rate": 4.005984042553192e-05, "loss": 1.0967, "step": 898 }, { "epoch": 0.19928233973870516, "grad_norm": 0.56640625, "learning_rate": 4.004875886524823e-05, "loss": 1.1361, "step": 899 }, { "epoch": 0.19950401086188504, "grad_norm": 0.5625, "learning_rate": 4.003767730496454e-05, "loss": 1.1155, "step": 900 }, { "epoch": 0.1997256819850649, "grad_norm": 0.59375, "learning_rate": 4.002659574468085e-05, "loss": 1.2198, "step": 901 }, { "epoch": 0.19994735310824477, "grad_norm": 0.58203125, "learning_rate": 4.001551418439716e-05, "loss": 1.1419, "step": 902 }, { "epoch": 0.20016902423142466, "grad_norm": 0.55078125, "learning_rate": 4.000443262411347e-05, "loss": 1.0935, "step": 903 }, { "epoch": 0.20039069535460452, "grad_norm": 0.578125, "learning_rate": 3.999335106382979e-05, "loss": 1.0913, "step": 904 }, { "epoch": 0.2006123664777844, "grad_norm": 0.546875, "learning_rate": 3.99822695035461e-05, "loss": 1.0567, "step": 905 }, { "epoch": 0.20083403760096427, "grad_norm": 0.55078125, "learning_rate": 3.9971187943262414e-05, "loss": 1.082, "step": 906 }, { "epoch": 0.20105570872414413, "grad_norm": 0.58984375, "learning_rate": 3.9960106382978724e-05, "loss": 1.201, "step": 907 }, { "epoch": 0.20127737984732402, "grad_norm": 0.546875, "learning_rate": 3.994902482269503e-05, "loss": 1.0919, "step": 908 }, { "epoch": 0.20149905097050388, "grad_norm": 0.57421875, "learning_rate": 3.993794326241135e-05, "loss": 1.1331, "step": 909 }, { "epoch": 0.20172072209368375, "grad_norm": 0.578125, "learning_rate": 3.992686170212766e-05, "loss": 1.0951, "step": 910 }, { "epoch": 0.20194239321686364, "grad_norm": 0.5390625, "learning_rate": 3.9915780141843975e-05, "loss": 1.0839, "step": 911 }, { "epoch": 0.2021640643400435, "grad_norm": 0.5546875, "learning_rate": 3.9904698581560284e-05, "loss": 0.9959, "step": 912 }, { "epoch": 0.2023857354632234, "grad_norm": 0.57421875, "learning_rate": 3.9893617021276594e-05, "loss": 1.0792, "step": 913 }, { "epoch": 0.20260740658640325, "grad_norm": 0.625, "learning_rate": 3.988253546099291e-05, "loss": 1.2012, "step": 914 }, { "epoch": 0.2028290777095831, "grad_norm": 0.5546875, "learning_rate": 3.987145390070922e-05, "loss": 1.0546, "step": 915 }, { "epoch": 0.203050748832763, "grad_norm": 0.5625, "learning_rate": 3.9860372340425535e-05, "loss": 1.1379, "step": 916 }, { "epoch": 0.20327241995594286, "grad_norm": 0.57421875, "learning_rate": 3.9849290780141845e-05, "loss": 1.1595, "step": 917 }, { "epoch": 0.20349409107912272, "grad_norm": 0.5859375, "learning_rate": 3.983820921985816e-05, "loss": 1.2136, "step": 918 }, { "epoch": 0.2037157622023026, "grad_norm": 0.57421875, "learning_rate": 3.982712765957447e-05, "loss": 1.1241, "step": 919 }, { "epoch": 0.20393743332548248, "grad_norm": 0.59765625, "learning_rate": 3.981604609929078e-05, "loss": 1.0965, "step": 920 }, { "epoch": 0.20415910444866237, "grad_norm": 0.52734375, "learning_rate": 3.9804964539007096e-05, "loss": 1.0266, "step": 921 }, { "epoch": 0.20438077557184223, "grad_norm": 0.62109375, "learning_rate": 3.9793882978723405e-05, "loss": 1.0336, "step": 922 }, { "epoch": 0.2046024466950221, "grad_norm": 0.6171875, "learning_rate": 3.978280141843972e-05, "loss": 1.1021, "step": 923 }, { "epoch": 0.20482411781820198, "grad_norm": 0.55859375, "learning_rate": 3.977171985815603e-05, "loss": 1.1139, "step": 924 }, { "epoch": 0.20504578894138184, "grad_norm": 0.6015625, "learning_rate": 3.976063829787234e-05, "loss": 1.1907, "step": 925 }, { "epoch": 0.2052674600645617, "grad_norm": 0.6328125, "learning_rate": 3.9749556737588656e-05, "loss": 1.0888, "step": 926 }, { "epoch": 0.2054891311877416, "grad_norm": 0.546875, "learning_rate": 3.9738475177304966e-05, "loss": 1.0677, "step": 927 }, { "epoch": 0.20571080231092145, "grad_norm": 0.60546875, "learning_rate": 3.972739361702128e-05, "loss": 1.1277, "step": 928 }, { "epoch": 0.20593247343410134, "grad_norm": 0.5546875, "learning_rate": 3.971631205673759e-05, "loss": 1.0377, "step": 929 }, { "epoch": 0.2061541445572812, "grad_norm": 0.57421875, "learning_rate": 3.970523049645391e-05, "loss": 1.1031, "step": 930 }, { "epoch": 0.20637581568046107, "grad_norm": 0.5625, "learning_rate": 3.969414893617022e-05, "loss": 1.1133, "step": 931 }, { "epoch": 0.20659748680364096, "grad_norm": 0.55859375, "learning_rate": 3.9683067375886526e-05, "loss": 0.99, "step": 932 }, { "epoch": 0.20681915792682082, "grad_norm": 0.5546875, "learning_rate": 3.967198581560284e-05, "loss": 1.0951, "step": 933 }, { "epoch": 0.20704082905000068, "grad_norm": 0.58203125, "learning_rate": 3.966090425531915e-05, "loss": 1.0486, "step": 934 }, { "epoch": 0.20726250017318057, "grad_norm": 0.57421875, "learning_rate": 3.964982269503547e-05, "loss": 1.0554, "step": 935 }, { "epoch": 0.20748417129636043, "grad_norm": 0.53125, "learning_rate": 3.963874113475177e-05, "loss": 1.0752, "step": 936 }, { "epoch": 0.20770584241954032, "grad_norm": 0.578125, "learning_rate": 3.962765957446809e-05, "loss": 1.202, "step": 937 }, { "epoch": 0.20792751354272018, "grad_norm": 0.56640625, "learning_rate": 3.9616578014184396e-05, "loss": 1.0163, "step": 938 }, { "epoch": 0.20814918466590004, "grad_norm": 0.578125, "learning_rate": 3.960549645390071e-05, "loss": 1.1345, "step": 939 }, { "epoch": 0.20837085578907993, "grad_norm": 0.5390625, "learning_rate": 3.959441489361702e-05, "loss": 1.1122, "step": 940 }, { "epoch": 0.2085925269122598, "grad_norm": 0.5859375, "learning_rate": 3.958333333333333e-05, "loss": 1.1418, "step": 941 }, { "epoch": 0.20881419803543966, "grad_norm": 0.5859375, "learning_rate": 3.957225177304965e-05, "loss": 1.1478, "step": 942 }, { "epoch": 0.20903586915861955, "grad_norm": 0.55078125, "learning_rate": 3.9561170212765957e-05, "loss": 1.0611, "step": 943 }, { "epoch": 0.2092575402817994, "grad_norm": 0.54296875, "learning_rate": 3.955008865248227e-05, "loss": 1.0316, "step": 944 }, { "epoch": 0.2094792114049793, "grad_norm": 0.54296875, "learning_rate": 3.953900709219858e-05, "loss": 1.069, "step": 945 }, { "epoch": 0.20970088252815916, "grad_norm": 0.57421875, "learning_rate": 3.952792553191489e-05, "loss": 1.1273, "step": 946 }, { "epoch": 0.20992255365133902, "grad_norm": 0.61328125, "learning_rate": 3.951684397163121e-05, "loss": 1.1539, "step": 947 }, { "epoch": 0.2101442247745189, "grad_norm": 0.55859375, "learning_rate": 3.950576241134752e-05, "loss": 1.1316, "step": 948 }, { "epoch": 0.21036589589769877, "grad_norm": 0.53125, "learning_rate": 3.949468085106383e-05, "loss": 1.0411, "step": 949 }, { "epoch": 0.21058756702087864, "grad_norm": 0.55859375, "learning_rate": 3.948359929078014e-05, "loss": 1.1272, "step": 950 }, { "epoch": 0.21080923814405853, "grad_norm": 0.59375, "learning_rate": 3.947251773049646e-05, "loss": 1.0209, "step": 951 }, { "epoch": 0.2110309092672384, "grad_norm": 0.5546875, "learning_rate": 3.946143617021277e-05, "loss": 1.0363, "step": 952 }, { "epoch": 0.21125258039041828, "grad_norm": 0.53515625, "learning_rate": 3.945035460992908e-05, "loss": 1.0515, "step": 953 }, { "epoch": 0.21147425151359814, "grad_norm": 0.546875, "learning_rate": 3.9439273049645394e-05, "loss": 1.0278, "step": 954 }, { "epoch": 0.211695922636778, "grad_norm": 0.5703125, "learning_rate": 3.94281914893617e-05, "loss": 1.1206, "step": 955 }, { "epoch": 0.2119175937599579, "grad_norm": 0.546875, "learning_rate": 3.941710992907802e-05, "loss": 1.0487, "step": 956 }, { "epoch": 0.21213926488313775, "grad_norm": 0.5703125, "learning_rate": 3.940602836879433e-05, "loss": 1.0625, "step": 957 }, { "epoch": 0.2123609360063176, "grad_norm": 0.5390625, "learning_rate": 3.939494680851064e-05, "loss": 1.0685, "step": 958 }, { "epoch": 0.2125826071294975, "grad_norm": 0.56640625, "learning_rate": 3.9383865248226954e-05, "loss": 1.1041, "step": 959 }, { "epoch": 0.21280427825267736, "grad_norm": 0.58984375, "learning_rate": 3.9372783687943264e-05, "loss": 1.0758, "step": 960 }, { "epoch": 0.21302594937585725, "grad_norm": 0.5703125, "learning_rate": 3.936170212765958e-05, "loss": 1.1718, "step": 961 }, { "epoch": 0.21324762049903712, "grad_norm": 0.546875, "learning_rate": 3.935062056737589e-05, "loss": 1.0698, "step": 962 }, { "epoch": 0.21346929162221698, "grad_norm": 0.5859375, "learning_rate": 3.9339539007092205e-05, "loss": 1.0479, "step": 963 }, { "epoch": 0.21369096274539687, "grad_norm": 0.55078125, "learning_rate": 3.9328457446808515e-05, "loss": 1.1561, "step": 964 }, { "epoch": 0.21391263386857673, "grad_norm": 0.55859375, "learning_rate": 3.9317375886524824e-05, "loss": 1.0908, "step": 965 }, { "epoch": 0.21413430499175662, "grad_norm": 0.56640625, "learning_rate": 3.930629432624114e-05, "loss": 1.1535, "step": 966 }, { "epoch": 0.21435597611493648, "grad_norm": 0.5546875, "learning_rate": 3.929521276595745e-05, "loss": 1.0549, "step": 967 }, { "epoch": 0.21457764723811634, "grad_norm": 0.57421875, "learning_rate": 3.9284131205673766e-05, "loss": 1.1064, "step": 968 }, { "epoch": 0.21479931836129623, "grad_norm": 0.58984375, "learning_rate": 3.9273049645390075e-05, "loss": 1.075, "step": 969 }, { "epoch": 0.2150209894844761, "grad_norm": 0.57421875, "learning_rate": 3.9261968085106384e-05, "loss": 1.1064, "step": 970 }, { "epoch": 0.21524266060765596, "grad_norm": 0.52734375, "learning_rate": 3.9250886524822694e-05, "loss": 1.0332, "step": 971 }, { "epoch": 0.21546433173083585, "grad_norm": 0.54296875, "learning_rate": 3.9239804964539e-05, "loss": 1.0448, "step": 972 }, { "epoch": 0.2156860028540157, "grad_norm": 0.55859375, "learning_rate": 3.922872340425532e-05, "loss": 1.0769, "step": 973 }, { "epoch": 0.2159076739771956, "grad_norm": 0.55078125, "learning_rate": 3.921764184397163e-05, "loss": 1.0522, "step": 974 }, { "epoch": 0.21612934510037546, "grad_norm": 0.55859375, "learning_rate": 3.9206560283687945e-05, "loss": 1.1141, "step": 975 }, { "epoch": 0.21635101622355532, "grad_norm": 0.59765625, "learning_rate": 3.9195478723404254e-05, "loss": 1.1625, "step": 976 }, { "epoch": 0.2165726873467352, "grad_norm": 0.57421875, "learning_rate": 3.918439716312057e-05, "loss": 1.0777, "step": 977 }, { "epoch": 0.21679435846991507, "grad_norm": 0.5234375, "learning_rate": 3.917331560283688e-05, "loss": 1.0062, "step": 978 }, { "epoch": 0.21701602959309493, "grad_norm": 0.60546875, "learning_rate": 3.916223404255319e-05, "loss": 1.1379, "step": 979 }, { "epoch": 0.21723770071627482, "grad_norm": 0.56640625, "learning_rate": 3.9151152482269505e-05, "loss": 1.1211, "step": 980 }, { "epoch": 0.21745937183945468, "grad_norm": 0.53515625, "learning_rate": 3.9140070921985815e-05, "loss": 1.0164, "step": 981 }, { "epoch": 0.21768104296263457, "grad_norm": 0.5859375, "learning_rate": 3.912898936170213e-05, "loss": 1.0912, "step": 982 }, { "epoch": 0.21790271408581444, "grad_norm": 0.55078125, "learning_rate": 3.911790780141844e-05, "loss": 1.0713, "step": 983 }, { "epoch": 0.2181243852089943, "grad_norm": 0.55078125, "learning_rate": 3.910682624113475e-05, "loss": 1.0897, "step": 984 }, { "epoch": 0.2183460563321742, "grad_norm": 0.5703125, "learning_rate": 3.9095744680851066e-05, "loss": 1.0814, "step": 985 }, { "epoch": 0.21856772745535405, "grad_norm": 0.55859375, "learning_rate": 3.9084663120567375e-05, "loss": 1.1087, "step": 986 }, { "epoch": 0.2187893985785339, "grad_norm": 0.56640625, "learning_rate": 3.907358156028369e-05, "loss": 1.1144, "step": 987 }, { "epoch": 0.2190110697017138, "grad_norm": 0.58203125, "learning_rate": 3.90625e-05, "loss": 1.1075, "step": 988 }, { "epoch": 0.21923274082489366, "grad_norm": 0.546875, "learning_rate": 3.905141843971632e-05, "loss": 1.0946, "step": 989 }, { "epoch": 0.21945441194807355, "grad_norm": 0.58203125, "learning_rate": 3.9040336879432626e-05, "loss": 1.1236, "step": 990 }, { "epoch": 0.21967608307125341, "grad_norm": 0.6171875, "learning_rate": 3.9029255319148936e-05, "loss": 1.0939, "step": 991 }, { "epoch": 0.21989775419443328, "grad_norm": 0.55078125, "learning_rate": 3.901817375886525e-05, "loss": 1.1655, "step": 992 }, { "epoch": 0.22011942531761317, "grad_norm": 0.6015625, "learning_rate": 3.900709219858156e-05, "loss": 1.1355, "step": 993 }, { "epoch": 0.22034109644079303, "grad_norm": 0.55859375, "learning_rate": 3.899601063829788e-05, "loss": 1.1206, "step": 994 }, { "epoch": 0.2205627675639729, "grad_norm": 0.5859375, "learning_rate": 3.898492907801419e-05, "loss": 1.2048, "step": 995 }, { "epoch": 0.22078443868715278, "grad_norm": 0.5703125, "learning_rate": 3.8973847517730496e-05, "loss": 1.038, "step": 996 }, { "epoch": 0.22100610981033264, "grad_norm": 0.56640625, "learning_rate": 3.896276595744681e-05, "loss": 1.1102, "step": 997 }, { "epoch": 0.22122778093351253, "grad_norm": 0.61328125, "learning_rate": 3.895168439716312e-05, "loss": 1.074, "step": 998 }, { "epoch": 0.2214494520566924, "grad_norm": 0.57421875, "learning_rate": 3.894060283687944e-05, "loss": 1.129, "step": 999 }, { "epoch": 0.22167112317987225, "grad_norm": 0.55078125, "learning_rate": 3.892952127659575e-05, "loss": 1.046, "step": 1000 }, { "epoch": 0.22189279430305214, "grad_norm": 0.57421875, "learning_rate": 3.8918439716312063e-05, "loss": 1.0343, "step": 1001 }, { "epoch": 0.222114465426232, "grad_norm": 0.546875, "learning_rate": 3.890735815602837e-05, "loss": 1.046, "step": 1002 }, { "epoch": 0.22233613654941187, "grad_norm": 0.578125, "learning_rate": 3.889627659574468e-05, "loss": 1.084, "step": 1003 }, { "epoch": 0.22255780767259176, "grad_norm": 0.6015625, "learning_rate": 3.8885195035461e-05, "loss": 1.0543, "step": 1004 }, { "epoch": 0.22277947879577162, "grad_norm": 0.5625, "learning_rate": 3.887411347517731e-05, "loss": 1.1219, "step": 1005 }, { "epoch": 0.2230011499189515, "grad_norm": 0.54296875, "learning_rate": 3.886303191489362e-05, "loss": 1.1259, "step": 1006 }, { "epoch": 0.22322282104213137, "grad_norm": 0.55859375, "learning_rate": 3.8851950354609927e-05, "loss": 1.1077, "step": 1007 }, { "epoch": 0.22344449216531123, "grad_norm": 0.5390625, "learning_rate": 3.884086879432624e-05, "loss": 1.0858, "step": 1008 }, { "epoch": 0.22366616328849112, "grad_norm": 0.57421875, "learning_rate": 3.882978723404255e-05, "loss": 1.1156, "step": 1009 }, { "epoch": 0.22388783441167098, "grad_norm": 0.53125, "learning_rate": 3.881870567375887e-05, "loss": 1.0229, "step": 1010 }, { "epoch": 0.22410950553485084, "grad_norm": 0.578125, "learning_rate": 3.880762411347518e-05, "loss": 1.137, "step": 1011 }, { "epoch": 0.22433117665803073, "grad_norm": 0.5546875, "learning_rate": 3.879654255319149e-05, "loss": 1.1522, "step": 1012 }, { "epoch": 0.2245528477812106, "grad_norm": 0.5703125, "learning_rate": 3.87854609929078e-05, "loss": 1.138, "step": 1013 }, { "epoch": 0.22477451890439049, "grad_norm": 0.58984375, "learning_rate": 3.877437943262411e-05, "loss": 1.0969, "step": 1014 }, { "epoch": 0.22499619002757035, "grad_norm": 0.55859375, "learning_rate": 3.876329787234043e-05, "loss": 1.1013, "step": 1015 }, { "epoch": 0.2252178611507502, "grad_norm": 0.55859375, "learning_rate": 3.875221631205674e-05, "loss": 1.1197, "step": 1016 }, { "epoch": 0.2254395322739301, "grad_norm": 0.59765625, "learning_rate": 3.874113475177305e-05, "loss": 1.1261, "step": 1017 }, { "epoch": 0.22566120339710996, "grad_norm": 0.5625, "learning_rate": 3.8730053191489364e-05, "loss": 1.0634, "step": 1018 }, { "epoch": 0.22588287452028982, "grad_norm": 0.5703125, "learning_rate": 3.871897163120567e-05, "loss": 1.052, "step": 1019 }, { "epoch": 0.2261045456434697, "grad_norm": 0.5625, "learning_rate": 3.870789007092199e-05, "loss": 1.1686, "step": 1020 }, { "epoch": 0.22632621676664957, "grad_norm": 0.55078125, "learning_rate": 3.86968085106383e-05, "loss": 1.0777, "step": 1021 }, { "epoch": 0.22654788788982946, "grad_norm": 0.55859375, "learning_rate": 3.8685726950354615e-05, "loss": 1.1016, "step": 1022 }, { "epoch": 0.22676955901300933, "grad_norm": 0.54296875, "learning_rate": 3.8674645390070924e-05, "loss": 1.0689, "step": 1023 }, { "epoch": 0.2269912301361892, "grad_norm": 0.5546875, "learning_rate": 3.8663563829787234e-05, "loss": 1.0591, "step": 1024 }, { "epoch": 0.22721290125936908, "grad_norm": 0.62109375, "learning_rate": 3.865248226950355e-05, "loss": 1.0253, "step": 1025 }, { "epoch": 0.22743457238254894, "grad_norm": 0.58203125, "learning_rate": 3.864140070921986e-05, "loss": 1.164, "step": 1026 }, { "epoch": 0.2276562435057288, "grad_norm": 0.5625, "learning_rate": 3.8630319148936175e-05, "loss": 1.0846, "step": 1027 }, { "epoch": 0.2278779146289087, "grad_norm": 0.55078125, "learning_rate": 3.8619237588652485e-05, "loss": 1.1627, "step": 1028 }, { "epoch": 0.22809958575208855, "grad_norm": 0.546875, "learning_rate": 3.8608156028368794e-05, "loss": 1.0794, "step": 1029 }, { "epoch": 0.22832125687526844, "grad_norm": 0.546875, "learning_rate": 3.859707446808511e-05, "loss": 1.0545, "step": 1030 }, { "epoch": 0.2285429279984483, "grad_norm": 0.59375, "learning_rate": 3.858599290780142e-05, "loss": 1.1253, "step": 1031 }, { "epoch": 0.22876459912162816, "grad_norm": 0.5859375, "learning_rate": 3.8574911347517736e-05, "loss": 1.1273, "step": 1032 }, { "epoch": 0.22898627024480805, "grad_norm": 0.5625, "learning_rate": 3.8563829787234045e-05, "loss": 1.0514, "step": 1033 }, { "epoch": 0.22920794136798792, "grad_norm": 0.5546875, "learning_rate": 3.855274822695036e-05, "loss": 1.0982, "step": 1034 }, { "epoch": 0.22942961249116778, "grad_norm": 0.609375, "learning_rate": 3.854166666666667e-05, "loss": 1.0915, "step": 1035 }, { "epoch": 0.22965128361434767, "grad_norm": 0.5625, "learning_rate": 3.853058510638298e-05, "loss": 1.1614, "step": 1036 }, { "epoch": 0.22987295473752753, "grad_norm": 0.6171875, "learning_rate": 3.8519503546099296e-05, "loss": 1.0946, "step": 1037 }, { "epoch": 0.23009462586070742, "grad_norm": 0.546875, "learning_rate": 3.8508421985815606e-05, "loss": 1.028, "step": 1038 }, { "epoch": 0.23031629698388728, "grad_norm": 0.6171875, "learning_rate": 3.849734042553192e-05, "loss": 1.1039, "step": 1039 }, { "epoch": 0.23053796810706714, "grad_norm": 0.5859375, "learning_rate": 3.848625886524823e-05, "loss": 1.1144, "step": 1040 }, { "epoch": 0.23075963923024703, "grad_norm": 0.6953125, "learning_rate": 3.847517730496454e-05, "loss": 1.0651, "step": 1041 }, { "epoch": 0.2309813103534269, "grad_norm": 0.578125, "learning_rate": 3.846409574468085e-05, "loss": 1.1087, "step": 1042 }, { "epoch": 0.23120298147660676, "grad_norm": 0.59375, "learning_rate": 3.845301418439716e-05, "loss": 1.0922, "step": 1043 }, { "epoch": 0.23142465259978665, "grad_norm": 0.6484375, "learning_rate": 3.8441932624113475e-05, "loss": 1.1501, "step": 1044 }, { "epoch": 0.2316463237229665, "grad_norm": 0.6328125, "learning_rate": 3.8430851063829785e-05, "loss": 1.0377, "step": 1045 }, { "epoch": 0.2318679948461464, "grad_norm": 0.5703125, "learning_rate": 3.84197695035461e-05, "loss": 1.2284, "step": 1046 }, { "epoch": 0.23208966596932626, "grad_norm": 0.65234375, "learning_rate": 3.840868794326241e-05, "loss": 1.1178, "step": 1047 }, { "epoch": 0.23231133709250612, "grad_norm": 0.6015625, "learning_rate": 3.8397606382978727e-05, "loss": 1.1181, "step": 1048 }, { "epoch": 0.232533008215686, "grad_norm": 0.57421875, "learning_rate": 3.8386524822695036e-05, "loss": 1.0682, "step": 1049 }, { "epoch": 0.23275467933886587, "grad_norm": 0.55859375, "learning_rate": 3.8375443262411345e-05, "loss": 1.1243, "step": 1050 }, { "epoch": 0.23297635046204573, "grad_norm": 0.55859375, "learning_rate": 3.836436170212766e-05, "loss": 1.1432, "step": 1051 }, { "epoch": 0.23319802158522562, "grad_norm": 0.546875, "learning_rate": 3.835328014184397e-05, "loss": 1.1096, "step": 1052 }, { "epoch": 0.23341969270840549, "grad_norm": 0.5859375, "learning_rate": 3.834219858156029e-05, "loss": 1.1116, "step": 1053 }, { "epoch": 0.23364136383158537, "grad_norm": 0.53515625, "learning_rate": 3.8331117021276596e-05, "loss": 1.0828, "step": 1054 }, { "epoch": 0.23386303495476524, "grad_norm": 0.5546875, "learning_rate": 3.8320035460992906e-05, "loss": 1.0731, "step": 1055 }, { "epoch": 0.2340847060779451, "grad_norm": 0.65625, "learning_rate": 3.830895390070922e-05, "loss": 1.1117, "step": 1056 }, { "epoch": 0.234306377201125, "grad_norm": 0.56640625, "learning_rate": 3.829787234042553e-05, "loss": 1.1328, "step": 1057 }, { "epoch": 0.23452804832430485, "grad_norm": 0.54296875, "learning_rate": 3.828679078014185e-05, "loss": 1.1444, "step": 1058 }, { "epoch": 0.23474971944748474, "grad_norm": 0.53515625, "learning_rate": 3.827570921985816e-05, "loss": 1.0157, "step": 1059 }, { "epoch": 0.2349713905706646, "grad_norm": 0.5703125, "learning_rate": 3.826462765957447e-05, "loss": 1.015, "step": 1060 }, { "epoch": 0.23519306169384446, "grad_norm": 0.5625, "learning_rate": 3.825354609929078e-05, "loss": 1.1105, "step": 1061 }, { "epoch": 0.23541473281702435, "grad_norm": 0.54296875, "learning_rate": 3.824246453900709e-05, "loss": 1.1509, "step": 1062 }, { "epoch": 0.23563640394020421, "grad_norm": 0.54296875, "learning_rate": 3.823138297872341e-05, "loss": 1.0429, "step": 1063 }, { "epoch": 0.23585807506338408, "grad_norm": 0.5703125, "learning_rate": 3.822030141843972e-05, "loss": 1.06, "step": 1064 }, { "epoch": 0.23607974618656397, "grad_norm": 0.546875, "learning_rate": 3.8209219858156033e-05, "loss": 1.0618, "step": 1065 }, { "epoch": 0.23630141730974383, "grad_norm": 0.56640625, "learning_rate": 3.819813829787234e-05, "loss": 1.1442, "step": 1066 }, { "epoch": 0.23652308843292372, "grad_norm": 0.609375, "learning_rate": 3.818705673758866e-05, "loss": 1.1859, "step": 1067 }, { "epoch": 0.23674475955610358, "grad_norm": 0.5546875, "learning_rate": 3.817597517730497e-05, "loss": 1.0459, "step": 1068 }, { "epoch": 0.23696643067928344, "grad_norm": 0.5625, "learning_rate": 3.816489361702128e-05, "loss": 1.0595, "step": 1069 }, { "epoch": 0.23718810180246333, "grad_norm": 0.546875, "learning_rate": 3.8153812056737594e-05, "loss": 1.1191, "step": 1070 }, { "epoch": 0.2374097729256432, "grad_norm": 0.55859375, "learning_rate": 3.81427304964539e-05, "loss": 1.1082, "step": 1071 }, { "epoch": 0.23763144404882305, "grad_norm": 0.5625, "learning_rate": 3.813164893617022e-05, "loss": 1.0872, "step": 1072 }, { "epoch": 0.23785311517200294, "grad_norm": 0.5625, "learning_rate": 3.812056737588653e-05, "loss": 1.142, "step": 1073 }, { "epoch": 0.2380747862951828, "grad_norm": 0.5859375, "learning_rate": 3.810948581560284e-05, "loss": 1.0158, "step": 1074 }, { "epoch": 0.2382964574183627, "grad_norm": 0.58203125, "learning_rate": 3.8098404255319154e-05, "loss": 1.1642, "step": 1075 }, { "epoch": 0.23851812854154256, "grad_norm": 0.5625, "learning_rate": 3.8087322695035464e-05, "loss": 1.1158, "step": 1076 }, { "epoch": 0.23873979966472242, "grad_norm": 0.55859375, "learning_rate": 3.807624113475177e-05, "loss": 1.086, "step": 1077 }, { "epoch": 0.2389614707879023, "grad_norm": 0.57421875, "learning_rate": 3.806515957446808e-05, "loss": 1.1246, "step": 1078 }, { "epoch": 0.23918314191108217, "grad_norm": 0.56640625, "learning_rate": 3.80540780141844e-05, "loss": 1.1013, "step": 1079 }, { "epoch": 0.23940481303426203, "grad_norm": 0.5546875, "learning_rate": 3.804299645390071e-05, "loss": 1.1008, "step": 1080 }, { "epoch": 0.23962648415744192, "grad_norm": 0.5703125, "learning_rate": 3.8031914893617024e-05, "loss": 1.0681, "step": 1081 }, { "epoch": 0.23984815528062178, "grad_norm": 0.57421875, "learning_rate": 3.8020833333333334e-05, "loss": 1.0432, "step": 1082 }, { "epoch": 0.24006982640380167, "grad_norm": 0.56640625, "learning_rate": 3.800975177304964e-05, "loss": 1.1779, "step": 1083 }, { "epoch": 0.24029149752698153, "grad_norm": 0.5703125, "learning_rate": 3.799867021276596e-05, "loss": 1.128, "step": 1084 }, { "epoch": 0.2405131686501614, "grad_norm": 0.6171875, "learning_rate": 3.798758865248227e-05, "loss": 1.1536, "step": 1085 }, { "epoch": 0.24073483977334129, "grad_norm": 0.609375, "learning_rate": 3.7976507092198585e-05, "loss": 1.1454, "step": 1086 }, { "epoch": 0.24095651089652115, "grad_norm": 0.5390625, "learning_rate": 3.7965425531914894e-05, "loss": 1.1124, "step": 1087 }, { "epoch": 0.241178182019701, "grad_norm": 0.6015625, "learning_rate": 3.7954343971631204e-05, "loss": 1.049, "step": 1088 }, { "epoch": 0.2413998531428809, "grad_norm": 0.54296875, "learning_rate": 3.794326241134752e-05, "loss": 1.0489, "step": 1089 }, { "epoch": 0.24162152426606076, "grad_norm": 0.578125, "learning_rate": 3.793218085106383e-05, "loss": 1.1585, "step": 1090 }, { "epoch": 0.24184319538924065, "grad_norm": 0.55078125, "learning_rate": 3.7921099290780145e-05, "loss": 1.0556, "step": 1091 }, { "epoch": 0.2420648665124205, "grad_norm": 0.54296875, "learning_rate": 3.7910017730496455e-05, "loss": 1.1152, "step": 1092 }, { "epoch": 0.24228653763560037, "grad_norm": 0.5546875, "learning_rate": 3.789893617021277e-05, "loss": 1.1673, "step": 1093 }, { "epoch": 0.24250820875878026, "grad_norm": 0.5703125, "learning_rate": 3.788785460992908e-05, "loss": 1.0269, "step": 1094 }, { "epoch": 0.24272987988196013, "grad_norm": 0.55859375, "learning_rate": 3.787677304964539e-05, "loss": 1.0772, "step": 1095 }, { "epoch": 0.24295155100514, "grad_norm": 0.59765625, "learning_rate": 3.7865691489361706e-05, "loss": 1.0221, "step": 1096 }, { "epoch": 0.24317322212831988, "grad_norm": 0.5625, "learning_rate": 3.7854609929078015e-05, "loss": 1.1116, "step": 1097 }, { "epoch": 0.24339489325149974, "grad_norm": 0.5703125, "learning_rate": 3.784352836879433e-05, "loss": 1.1459, "step": 1098 }, { "epoch": 0.24361656437467963, "grad_norm": 0.53515625, "learning_rate": 3.783244680851064e-05, "loss": 1.0477, "step": 1099 }, { "epoch": 0.2438382354978595, "grad_norm": 0.5546875, "learning_rate": 3.782136524822695e-05, "loss": 1.0725, "step": 1100 }, { "epoch": 0.24405990662103935, "grad_norm": 0.55859375, "learning_rate": 3.7810283687943266e-05, "loss": 1.0823, "step": 1101 }, { "epoch": 0.24428157774421924, "grad_norm": 0.59765625, "learning_rate": 3.7799202127659576e-05, "loss": 1.0571, "step": 1102 }, { "epoch": 0.2445032488673991, "grad_norm": 0.5546875, "learning_rate": 3.778812056737589e-05, "loss": 1.1536, "step": 1103 }, { "epoch": 0.24472491999057897, "grad_norm": 0.59765625, "learning_rate": 3.77770390070922e-05, "loss": 1.1657, "step": 1104 }, { "epoch": 0.24494659111375885, "grad_norm": 0.55859375, "learning_rate": 3.776595744680852e-05, "loss": 1.1421, "step": 1105 }, { "epoch": 0.24516826223693872, "grad_norm": 0.5546875, "learning_rate": 3.775487588652483e-05, "loss": 1.0382, "step": 1106 }, { "epoch": 0.2453899333601186, "grad_norm": 0.671875, "learning_rate": 3.7743794326241136e-05, "loss": 1.1288, "step": 1107 }, { "epoch": 0.24561160448329847, "grad_norm": 0.58203125, "learning_rate": 3.773271276595745e-05, "loss": 1.0629, "step": 1108 }, { "epoch": 0.24583327560647833, "grad_norm": 0.68359375, "learning_rate": 3.772163120567376e-05, "loss": 1.077, "step": 1109 }, { "epoch": 0.24605494672965822, "grad_norm": 0.62109375, "learning_rate": 3.771054964539008e-05, "loss": 1.1191, "step": 1110 }, { "epoch": 0.24627661785283808, "grad_norm": 0.58203125, "learning_rate": 3.769946808510639e-05, "loss": 1.168, "step": 1111 }, { "epoch": 0.24649828897601794, "grad_norm": 0.57421875, "learning_rate": 3.7688386524822697e-05, "loss": 1.1002, "step": 1112 }, { "epoch": 0.24671996009919783, "grad_norm": 0.6328125, "learning_rate": 3.7677304964539006e-05, "loss": 1.0976, "step": 1113 }, { "epoch": 0.2469416312223777, "grad_norm": 0.59765625, "learning_rate": 3.7666223404255315e-05, "loss": 1.1669, "step": 1114 }, { "epoch": 0.24716330234555758, "grad_norm": 0.58984375, "learning_rate": 3.765514184397163e-05, "loss": 1.1634, "step": 1115 }, { "epoch": 0.24738497346873745, "grad_norm": 0.5546875, "learning_rate": 3.764406028368794e-05, "loss": 1.0767, "step": 1116 }, { "epoch": 0.2476066445919173, "grad_norm": 0.578125, "learning_rate": 3.763297872340426e-05, "loss": 1.0798, "step": 1117 }, { "epoch": 0.2478283157150972, "grad_norm": 0.578125, "learning_rate": 3.7621897163120566e-05, "loss": 1.0743, "step": 1118 }, { "epoch": 0.24804998683827706, "grad_norm": 0.55859375, "learning_rate": 3.761081560283688e-05, "loss": 1.182, "step": 1119 }, { "epoch": 0.24827165796145692, "grad_norm": 0.55859375, "learning_rate": 3.759973404255319e-05, "loss": 1.194, "step": 1120 }, { "epoch": 0.2484933290846368, "grad_norm": 0.5859375, "learning_rate": 3.75886524822695e-05, "loss": 1.0496, "step": 1121 }, { "epoch": 0.24871500020781667, "grad_norm": 0.609375, "learning_rate": 3.757757092198582e-05, "loss": 1.082, "step": 1122 }, { "epoch": 0.24893667133099656, "grad_norm": 0.58203125, "learning_rate": 3.756648936170213e-05, "loss": 1.0583, "step": 1123 }, { "epoch": 0.24915834245417642, "grad_norm": 0.5703125, "learning_rate": 3.755540780141844e-05, "loss": 1.0983, "step": 1124 }, { "epoch": 0.24938001357735629, "grad_norm": 0.57421875, "learning_rate": 3.754432624113475e-05, "loss": 1.0621, "step": 1125 }, { "epoch": 0.24960168470053617, "grad_norm": 0.6015625, "learning_rate": 3.753324468085106e-05, "loss": 1.1131, "step": 1126 }, { "epoch": 0.24982335582371604, "grad_norm": 0.5703125, "learning_rate": 3.752216312056738e-05, "loss": 1.0925, "step": 1127 }, { "epoch": 0.2500450269468959, "grad_norm": 0.62890625, "learning_rate": 3.751108156028369e-05, "loss": 1.086, "step": 1128 }, { "epoch": 0.2502666980700758, "grad_norm": 0.58984375, "learning_rate": 3.7500000000000003e-05, "loss": 1.0158, "step": 1129 }, { "epoch": 0.2504883691932557, "grad_norm": 0.56640625, "learning_rate": 3.748891843971631e-05, "loss": 1.069, "step": 1130 }, { "epoch": 0.2507100403164355, "grad_norm": 0.59375, "learning_rate": 3.747783687943263e-05, "loss": 1.0599, "step": 1131 }, { "epoch": 0.2509317114396154, "grad_norm": 0.5859375, "learning_rate": 3.746675531914894e-05, "loss": 1.0481, "step": 1132 }, { "epoch": 0.2511533825627953, "grad_norm": 0.55859375, "learning_rate": 3.745567375886525e-05, "loss": 1.1282, "step": 1133 }, { "epoch": 0.2513750536859751, "grad_norm": 0.55078125, "learning_rate": 3.7444592198581564e-05, "loss": 1.0427, "step": 1134 }, { "epoch": 0.251596724809155, "grad_norm": 0.5625, "learning_rate": 3.743351063829787e-05, "loss": 1.0866, "step": 1135 }, { "epoch": 0.2518183959323349, "grad_norm": 0.578125, "learning_rate": 3.742242907801419e-05, "loss": 1.0906, "step": 1136 }, { "epoch": 0.25204006705551474, "grad_norm": 0.60546875, "learning_rate": 3.74113475177305e-05, "loss": 1.0871, "step": 1137 }, { "epoch": 0.25226173817869463, "grad_norm": 0.578125, "learning_rate": 3.7400265957446815e-05, "loss": 1.0786, "step": 1138 }, { "epoch": 0.2524834093018745, "grad_norm": 0.55859375, "learning_rate": 3.7389184397163124e-05, "loss": 1.1423, "step": 1139 }, { "epoch": 0.25270508042505435, "grad_norm": 0.578125, "learning_rate": 3.7378102836879434e-05, "loss": 1.0988, "step": 1140 }, { "epoch": 0.25292675154823424, "grad_norm": 0.56640625, "learning_rate": 3.736702127659575e-05, "loss": 1.0353, "step": 1141 }, { "epoch": 0.25314842267141413, "grad_norm": 0.6015625, "learning_rate": 3.735593971631206e-05, "loss": 1.0751, "step": 1142 }, { "epoch": 0.253370093794594, "grad_norm": 0.57421875, "learning_rate": 3.7344858156028376e-05, "loss": 1.0766, "step": 1143 }, { "epoch": 0.25359176491777385, "grad_norm": 0.71875, "learning_rate": 3.7333776595744685e-05, "loss": 1.0661, "step": 1144 }, { "epoch": 0.25381343604095374, "grad_norm": 0.64453125, "learning_rate": 3.7322695035460994e-05, "loss": 1.0701, "step": 1145 }, { "epoch": 0.25403510716413363, "grad_norm": 0.59375, "learning_rate": 3.731161347517731e-05, "loss": 1.1358, "step": 1146 }, { "epoch": 0.25425677828731347, "grad_norm": 0.53125, "learning_rate": 3.730053191489361e-05, "loss": 1.0428, "step": 1147 }, { "epoch": 0.25447844941049336, "grad_norm": 0.578125, "learning_rate": 3.728945035460993e-05, "loss": 1.0277, "step": 1148 }, { "epoch": 0.25470012053367325, "grad_norm": 0.546875, "learning_rate": 3.727836879432624e-05, "loss": 1.0419, "step": 1149 }, { "epoch": 0.2549217916568531, "grad_norm": 0.59765625, "learning_rate": 3.7267287234042555e-05, "loss": 1.1509, "step": 1150 }, { "epoch": 0.25514346278003297, "grad_norm": 0.546875, "learning_rate": 3.7256205673758864e-05, "loss": 1.099, "step": 1151 }, { "epoch": 0.25536513390321286, "grad_norm": 0.5546875, "learning_rate": 3.724512411347518e-05, "loss": 1.1045, "step": 1152 }, { "epoch": 0.2555868050263927, "grad_norm": 0.58203125, "learning_rate": 3.723404255319149e-05, "loss": 1.1071, "step": 1153 }, { "epoch": 0.2558084761495726, "grad_norm": 0.55078125, "learning_rate": 3.72229609929078e-05, "loss": 1.0216, "step": 1154 }, { "epoch": 0.2560301472727525, "grad_norm": 0.56640625, "learning_rate": 3.7211879432624115e-05, "loss": 1.0962, "step": 1155 }, { "epoch": 0.25625181839593236, "grad_norm": 0.515625, "learning_rate": 3.7200797872340425e-05, "loss": 1.0115, "step": 1156 }, { "epoch": 0.2564734895191122, "grad_norm": 0.53515625, "learning_rate": 3.718971631205674e-05, "loss": 1.0846, "step": 1157 }, { "epoch": 0.2566951606422921, "grad_norm": 0.56640625, "learning_rate": 3.717863475177305e-05, "loss": 1.1191, "step": 1158 }, { "epoch": 0.256916831765472, "grad_norm": 0.5234375, "learning_rate": 3.716755319148936e-05, "loss": 1.0698, "step": 1159 }, { "epoch": 0.2571385028886518, "grad_norm": 0.54296875, "learning_rate": 3.7156471631205676e-05, "loss": 0.9721, "step": 1160 }, { "epoch": 0.2573601740118317, "grad_norm": 0.5703125, "learning_rate": 3.7145390070921985e-05, "loss": 1.0309, "step": 1161 }, { "epoch": 0.2575818451350116, "grad_norm": 0.546875, "learning_rate": 3.71343085106383e-05, "loss": 1.0875, "step": 1162 }, { "epoch": 0.2578035162581914, "grad_norm": 0.54296875, "learning_rate": 3.712322695035461e-05, "loss": 1.0578, "step": 1163 }, { "epoch": 0.2580251873813713, "grad_norm": 0.54296875, "learning_rate": 3.711214539007093e-05, "loss": 1.0739, "step": 1164 }, { "epoch": 0.2582468585045512, "grad_norm": 0.546875, "learning_rate": 3.7101063829787236e-05, "loss": 1.1281, "step": 1165 }, { "epoch": 0.25846852962773104, "grad_norm": 0.546875, "learning_rate": 3.7089982269503546e-05, "loss": 1.1359, "step": 1166 }, { "epoch": 0.2586902007509109, "grad_norm": 0.54296875, "learning_rate": 3.707890070921986e-05, "loss": 1.0861, "step": 1167 }, { "epoch": 0.2589118718740908, "grad_norm": 0.5703125, "learning_rate": 3.706781914893617e-05, "loss": 1.0898, "step": 1168 }, { "epoch": 0.25913354299727065, "grad_norm": 0.5625, "learning_rate": 3.705673758865249e-05, "loss": 1.072, "step": 1169 }, { "epoch": 0.25935521412045054, "grad_norm": 0.5234375, "learning_rate": 3.70456560283688e-05, "loss": 1.0521, "step": 1170 }, { "epoch": 0.25957688524363043, "grad_norm": 0.63671875, "learning_rate": 3.7034574468085106e-05, "loss": 1.1378, "step": 1171 }, { "epoch": 0.2597985563668103, "grad_norm": 0.56640625, "learning_rate": 3.702349290780142e-05, "loss": 1.1619, "step": 1172 }, { "epoch": 0.26002022748999015, "grad_norm": 0.5546875, "learning_rate": 3.701241134751773e-05, "loss": 1.1667, "step": 1173 }, { "epoch": 0.26024189861317004, "grad_norm": 0.5625, "learning_rate": 3.700132978723405e-05, "loss": 1.0783, "step": 1174 }, { "epoch": 0.26046356973634993, "grad_norm": 0.58203125, "learning_rate": 3.699024822695036e-05, "loss": 1.0546, "step": 1175 }, { "epoch": 0.26068524085952977, "grad_norm": 0.546875, "learning_rate": 3.697916666666667e-05, "loss": 1.1039, "step": 1176 }, { "epoch": 0.26090691198270966, "grad_norm": 0.52734375, "learning_rate": 3.696808510638298e-05, "loss": 1.0836, "step": 1177 }, { "epoch": 0.26112858310588954, "grad_norm": 0.57421875, "learning_rate": 3.695700354609929e-05, "loss": 1.1, "step": 1178 }, { "epoch": 0.2613502542290694, "grad_norm": 0.5859375, "learning_rate": 3.694592198581561e-05, "loss": 1.118, "step": 1179 }, { "epoch": 0.26157192535224927, "grad_norm": 0.5390625, "learning_rate": 3.693484042553192e-05, "loss": 0.9902, "step": 1180 }, { "epoch": 0.26179359647542916, "grad_norm": 0.59375, "learning_rate": 3.6923758865248234e-05, "loss": 1.0663, "step": 1181 }, { "epoch": 0.262015267598609, "grad_norm": 0.53515625, "learning_rate": 3.6912677304964536e-05, "loss": 1.0741, "step": 1182 }, { "epoch": 0.2622369387217889, "grad_norm": 0.54296875, "learning_rate": 3.690159574468085e-05, "loss": 1.086, "step": 1183 }, { "epoch": 0.26245860984496877, "grad_norm": 0.578125, "learning_rate": 3.689051418439716e-05, "loss": 1.0613, "step": 1184 }, { "epoch": 0.2626802809681486, "grad_norm": 0.54296875, "learning_rate": 3.687943262411347e-05, "loss": 1.1692, "step": 1185 }, { "epoch": 0.2629019520913285, "grad_norm": 0.6015625, "learning_rate": 3.686835106382979e-05, "loss": 1.1072, "step": 1186 }, { "epoch": 0.2631236232145084, "grad_norm": 0.50390625, "learning_rate": 3.68572695035461e-05, "loss": 0.9502, "step": 1187 }, { "epoch": 0.2633452943376883, "grad_norm": 0.55859375, "learning_rate": 3.684618794326241e-05, "loss": 1.1528, "step": 1188 }, { "epoch": 0.2635669654608681, "grad_norm": 0.55859375, "learning_rate": 3.683510638297872e-05, "loss": 1.127, "step": 1189 }, { "epoch": 0.263788636584048, "grad_norm": 0.5546875, "learning_rate": 3.682402482269504e-05, "loss": 1.0409, "step": 1190 }, { "epoch": 0.2640103077072279, "grad_norm": 0.56640625, "learning_rate": 3.681294326241135e-05, "loss": 1.0083, "step": 1191 }, { "epoch": 0.2642319788304077, "grad_norm": 0.5859375, "learning_rate": 3.680186170212766e-05, "loss": 1.078, "step": 1192 }, { "epoch": 0.2644536499535876, "grad_norm": 0.57421875, "learning_rate": 3.6790780141843974e-05, "loss": 1.0678, "step": 1193 }, { "epoch": 0.2646753210767675, "grad_norm": 0.55859375, "learning_rate": 3.677969858156028e-05, "loss": 1.0183, "step": 1194 }, { "epoch": 0.26489699219994733, "grad_norm": 0.53515625, "learning_rate": 3.67686170212766e-05, "loss": 1.0689, "step": 1195 }, { "epoch": 0.2651186633231272, "grad_norm": 0.55078125, "learning_rate": 3.675753546099291e-05, "loss": 1.1602, "step": 1196 }, { "epoch": 0.2653403344463071, "grad_norm": 0.56640625, "learning_rate": 3.6746453900709225e-05, "loss": 1.0848, "step": 1197 }, { "epoch": 0.26556200556948695, "grad_norm": 0.5546875, "learning_rate": 3.6735372340425534e-05, "loss": 1.0575, "step": 1198 }, { "epoch": 0.26578367669266684, "grad_norm": 0.5625, "learning_rate": 3.672429078014184e-05, "loss": 1.1849, "step": 1199 }, { "epoch": 0.2660053478158467, "grad_norm": 0.59375, "learning_rate": 3.671320921985816e-05, "loss": 1.0669, "step": 1200 }, { "epoch": 0.26622701893902656, "grad_norm": 0.5703125, "learning_rate": 3.670212765957447e-05, "loss": 1.109, "step": 1201 }, { "epoch": 0.26644869006220645, "grad_norm": 0.546875, "learning_rate": 3.6691046099290785e-05, "loss": 1.1298, "step": 1202 }, { "epoch": 0.26667036118538634, "grad_norm": 0.54296875, "learning_rate": 3.6679964539007094e-05, "loss": 1.0298, "step": 1203 }, { "epoch": 0.26689203230856623, "grad_norm": 0.55078125, "learning_rate": 3.6668882978723404e-05, "loss": 1.1657, "step": 1204 }, { "epoch": 0.26711370343174606, "grad_norm": 0.55859375, "learning_rate": 3.665780141843972e-05, "loss": 1.0999, "step": 1205 }, { "epoch": 0.26733537455492595, "grad_norm": 0.58984375, "learning_rate": 3.664671985815603e-05, "loss": 1.0471, "step": 1206 }, { "epoch": 0.26755704567810584, "grad_norm": 0.546875, "learning_rate": 3.6635638297872346e-05, "loss": 1.0611, "step": 1207 }, { "epoch": 0.2677787168012857, "grad_norm": 0.5625, "learning_rate": 3.6624556737588655e-05, "loss": 1.0849, "step": 1208 }, { "epoch": 0.26800038792446557, "grad_norm": 0.54296875, "learning_rate": 3.661347517730497e-05, "loss": 1.1081, "step": 1209 }, { "epoch": 0.26822205904764546, "grad_norm": 0.56640625, "learning_rate": 3.660239361702128e-05, "loss": 1.0888, "step": 1210 }, { "epoch": 0.2684437301708253, "grad_norm": 0.5390625, "learning_rate": 3.659131205673759e-05, "loss": 1.0688, "step": 1211 }, { "epoch": 0.2686654012940052, "grad_norm": 0.5390625, "learning_rate": 3.6580230496453906e-05, "loss": 1.1431, "step": 1212 }, { "epoch": 0.26888707241718507, "grad_norm": 0.58203125, "learning_rate": 3.6569148936170215e-05, "loss": 1.1767, "step": 1213 }, { "epoch": 0.2691087435403649, "grad_norm": 0.5234375, "learning_rate": 3.655806737588653e-05, "loss": 1.035, "step": 1214 }, { "epoch": 0.2693304146635448, "grad_norm": 0.56640625, "learning_rate": 3.654698581560284e-05, "loss": 1.1016, "step": 1215 }, { "epoch": 0.2695520857867247, "grad_norm": 0.54296875, "learning_rate": 3.653590425531915e-05, "loss": 1.0792, "step": 1216 }, { "epoch": 0.2697737569099045, "grad_norm": 0.5390625, "learning_rate": 3.6524822695035466e-05, "loss": 1.1361, "step": 1217 }, { "epoch": 0.2699954280330844, "grad_norm": 0.5625, "learning_rate": 3.651374113475177e-05, "loss": 1.0629, "step": 1218 }, { "epoch": 0.2702170991562643, "grad_norm": 0.55078125, "learning_rate": 3.6502659574468085e-05, "loss": 1.0251, "step": 1219 }, { "epoch": 0.2704387702794442, "grad_norm": 0.5859375, "learning_rate": 3.6491578014184395e-05, "loss": 1.0535, "step": 1220 }, { "epoch": 0.270660441402624, "grad_norm": 0.546875, "learning_rate": 3.648049645390071e-05, "loss": 1.1195, "step": 1221 }, { "epoch": 0.2708821125258039, "grad_norm": 0.55859375, "learning_rate": 3.646941489361702e-05, "loss": 1.0771, "step": 1222 }, { "epoch": 0.2711037836489838, "grad_norm": 0.63671875, "learning_rate": 3.6458333333333336e-05, "loss": 1.1544, "step": 1223 }, { "epoch": 0.27132545477216363, "grad_norm": 0.6015625, "learning_rate": 3.6447251773049646e-05, "loss": 1.1223, "step": 1224 }, { "epoch": 0.2715471258953435, "grad_norm": 0.54296875, "learning_rate": 3.6436170212765955e-05, "loss": 0.9906, "step": 1225 }, { "epoch": 0.2717687970185234, "grad_norm": 0.546875, "learning_rate": 3.642508865248227e-05, "loss": 1.0607, "step": 1226 }, { "epoch": 0.27199046814170325, "grad_norm": 0.546875, "learning_rate": 3.641400709219858e-05, "loss": 1.0994, "step": 1227 }, { "epoch": 0.27221213926488314, "grad_norm": 0.5859375, "learning_rate": 3.64029255319149e-05, "loss": 1.1175, "step": 1228 }, { "epoch": 0.272433810388063, "grad_norm": 0.56640625, "learning_rate": 3.6391843971631206e-05, "loss": 1.1848, "step": 1229 }, { "epoch": 0.27265548151124286, "grad_norm": 0.58203125, "learning_rate": 3.6380762411347516e-05, "loss": 1.0692, "step": 1230 }, { "epoch": 0.27287715263442275, "grad_norm": 0.578125, "learning_rate": 3.636968085106383e-05, "loss": 1.1174, "step": 1231 }, { "epoch": 0.27309882375760264, "grad_norm": 0.55859375, "learning_rate": 3.635859929078014e-05, "loss": 1.0694, "step": 1232 }, { "epoch": 0.27332049488078247, "grad_norm": 0.53515625, "learning_rate": 3.634751773049646e-05, "loss": 1.0576, "step": 1233 }, { "epoch": 0.27354216600396236, "grad_norm": 0.58984375, "learning_rate": 3.633643617021277e-05, "loss": 1.1358, "step": 1234 }, { "epoch": 0.27376383712714225, "grad_norm": 0.55078125, "learning_rate": 3.632535460992908e-05, "loss": 1.0912, "step": 1235 }, { "epoch": 0.27398550825032214, "grad_norm": 0.55859375, "learning_rate": 3.631427304964539e-05, "loss": 1.1244, "step": 1236 }, { "epoch": 0.274207179373502, "grad_norm": 0.5859375, "learning_rate": 3.63031914893617e-05, "loss": 1.1316, "step": 1237 }, { "epoch": 0.27442885049668186, "grad_norm": 0.57421875, "learning_rate": 3.629210992907802e-05, "loss": 1.1115, "step": 1238 }, { "epoch": 0.27465052161986175, "grad_norm": 0.54296875, "learning_rate": 3.628102836879433e-05, "loss": 1.1031, "step": 1239 }, { "epoch": 0.2748721927430416, "grad_norm": 0.55078125, "learning_rate": 3.626994680851064e-05, "loss": 1.1372, "step": 1240 }, { "epoch": 0.2750938638662215, "grad_norm": 0.5546875, "learning_rate": 3.625886524822695e-05, "loss": 1.0307, "step": 1241 }, { "epoch": 0.27531553498940137, "grad_norm": 0.5390625, "learning_rate": 3.624778368794326e-05, "loss": 1.0687, "step": 1242 }, { "epoch": 0.2755372061125812, "grad_norm": 0.53515625, "learning_rate": 3.623670212765958e-05, "loss": 1.0858, "step": 1243 }, { "epoch": 0.2757588772357611, "grad_norm": 0.53515625, "learning_rate": 3.622562056737589e-05, "loss": 1.1357, "step": 1244 }, { "epoch": 0.275980548358941, "grad_norm": 0.546875, "learning_rate": 3.6214539007092204e-05, "loss": 1.0514, "step": 1245 }, { "epoch": 0.2762022194821208, "grad_norm": 0.55859375, "learning_rate": 3.620345744680851e-05, "loss": 1.0562, "step": 1246 }, { "epoch": 0.2764238906053007, "grad_norm": 0.56640625, "learning_rate": 3.619237588652483e-05, "loss": 1.0281, "step": 1247 }, { "epoch": 0.2766455617284806, "grad_norm": 0.55859375, "learning_rate": 3.618129432624114e-05, "loss": 1.0965, "step": 1248 }, { "epoch": 0.2768672328516604, "grad_norm": 0.53125, "learning_rate": 3.617021276595745e-05, "loss": 1.0669, "step": 1249 }, { "epoch": 0.2770889039748403, "grad_norm": 0.53125, "learning_rate": 3.6159131205673764e-05, "loss": 1.0647, "step": 1250 }, { "epoch": 0.2773105750980202, "grad_norm": 0.55859375, "learning_rate": 3.6148049645390074e-05, "loss": 1.1189, "step": 1251 }, { "epoch": 0.2775322462212001, "grad_norm": 0.51953125, "learning_rate": 3.613696808510639e-05, "loss": 0.9969, "step": 1252 }, { "epoch": 0.27775391734437993, "grad_norm": 0.5546875, "learning_rate": 3.612588652482269e-05, "loss": 1.0547, "step": 1253 }, { "epoch": 0.2779755884675598, "grad_norm": 0.54296875, "learning_rate": 3.611480496453901e-05, "loss": 1.0374, "step": 1254 }, { "epoch": 0.2781972595907397, "grad_norm": 0.5546875, "learning_rate": 3.610372340425532e-05, "loss": 1.1003, "step": 1255 }, { "epoch": 0.27841893071391954, "grad_norm": 0.5703125, "learning_rate": 3.609264184397163e-05, "loss": 1.1851, "step": 1256 }, { "epoch": 0.27864060183709943, "grad_norm": 0.55859375, "learning_rate": 3.6081560283687944e-05, "loss": 1.0755, "step": 1257 }, { "epoch": 0.2788622729602793, "grad_norm": 0.53125, "learning_rate": 3.607047872340425e-05, "loss": 1.1149, "step": 1258 }, { "epoch": 0.27908394408345916, "grad_norm": 0.58203125, "learning_rate": 3.605939716312057e-05, "loss": 1.052, "step": 1259 }, { "epoch": 0.27930561520663905, "grad_norm": 0.57421875, "learning_rate": 3.604831560283688e-05, "loss": 1.1425, "step": 1260 }, { "epoch": 0.27952728632981894, "grad_norm": 0.5859375, "learning_rate": 3.6037234042553195e-05, "loss": 1.083, "step": 1261 }, { "epoch": 0.27974895745299877, "grad_norm": 0.5234375, "learning_rate": 3.6026152482269504e-05, "loss": 1.0433, "step": 1262 }, { "epoch": 0.27997062857617866, "grad_norm": 0.55078125, "learning_rate": 3.601507092198581e-05, "loss": 1.0052, "step": 1263 }, { "epoch": 0.28019229969935855, "grad_norm": 0.5390625, "learning_rate": 3.600398936170213e-05, "loss": 1.1098, "step": 1264 }, { "epoch": 0.28041397082253844, "grad_norm": 0.54296875, "learning_rate": 3.599290780141844e-05, "loss": 1.164, "step": 1265 }, { "epoch": 0.2806356419457183, "grad_norm": 0.55078125, "learning_rate": 3.5981826241134755e-05, "loss": 1.0431, "step": 1266 }, { "epoch": 0.28085731306889816, "grad_norm": 0.546875, "learning_rate": 3.5970744680851064e-05, "loss": 0.994, "step": 1267 }, { "epoch": 0.28107898419207805, "grad_norm": 0.52734375, "learning_rate": 3.595966312056738e-05, "loss": 1.0764, "step": 1268 }, { "epoch": 0.2813006553152579, "grad_norm": 0.51953125, "learning_rate": 3.594858156028369e-05, "loss": 1.068, "step": 1269 }, { "epoch": 0.2815223264384378, "grad_norm": 0.51953125, "learning_rate": 3.59375e-05, "loss": 1.125, "step": 1270 }, { "epoch": 0.28174399756161767, "grad_norm": 0.55078125, "learning_rate": 3.5926418439716316e-05, "loss": 1.0988, "step": 1271 }, { "epoch": 0.2819656686847975, "grad_norm": 0.546875, "learning_rate": 3.5915336879432625e-05, "loss": 1.0977, "step": 1272 }, { "epoch": 0.2821873398079774, "grad_norm": 0.56640625, "learning_rate": 3.590425531914894e-05, "loss": 1.0563, "step": 1273 }, { "epoch": 0.2824090109311573, "grad_norm": 0.58984375, "learning_rate": 3.589317375886525e-05, "loss": 1.185, "step": 1274 }, { "epoch": 0.2826306820543371, "grad_norm": 0.53125, "learning_rate": 3.588209219858156e-05, "loss": 1.0969, "step": 1275 }, { "epoch": 0.282852353177517, "grad_norm": 0.546875, "learning_rate": 3.5871010638297876e-05, "loss": 1.0589, "step": 1276 }, { "epoch": 0.2830740243006969, "grad_norm": 0.5546875, "learning_rate": 3.5859929078014185e-05, "loss": 1.1898, "step": 1277 }, { "epoch": 0.2832956954238767, "grad_norm": 0.56640625, "learning_rate": 3.58488475177305e-05, "loss": 1.0897, "step": 1278 }, { "epoch": 0.2835173665470566, "grad_norm": 0.55078125, "learning_rate": 3.583776595744681e-05, "loss": 1.0777, "step": 1279 }, { "epoch": 0.2837390376702365, "grad_norm": 0.54296875, "learning_rate": 3.582668439716313e-05, "loss": 1.0688, "step": 1280 }, { "epoch": 0.2839607087934164, "grad_norm": 0.54296875, "learning_rate": 3.5815602836879437e-05, "loss": 1.0668, "step": 1281 }, { "epoch": 0.28418237991659623, "grad_norm": 0.5546875, "learning_rate": 3.5804521276595746e-05, "loss": 1.0211, "step": 1282 }, { "epoch": 0.2844040510397761, "grad_norm": 0.546875, "learning_rate": 3.579343971631206e-05, "loss": 1.0116, "step": 1283 }, { "epoch": 0.284625722162956, "grad_norm": 0.59375, "learning_rate": 3.578235815602837e-05, "loss": 1.1268, "step": 1284 }, { "epoch": 0.28484739328613584, "grad_norm": 0.60546875, "learning_rate": 3.577127659574469e-05, "loss": 1.1264, "step": 1285 }, { "epoch": 0.28506906440931573, "grad_norm": 0.59375, "learning_rate": 3.5760195035461e-05, "loss": 1.13, "step": 1286 }, { "epoch": 0.2852907355324956, "grad_norm": 0.546875, "learning_rate": 3.5749113475177306e-05, "loss": 1.036, "step": 1287 }, { "epoch": 0.28551240665567545, "grad_norm": 0.5625, "learning_rate": 3.5738031914893616e-05, "loss": 1.0744, "step": 1288 }, { "epoch": 0.28573407777885534, "grad_norm": 0.55859375, "learning_rate": 3.5726950354609925e-05, "loss": 1.0938, "step": 1289 }, { "epoch": 0.28595574890203523, "grad_norm": 0.58203125, "learning_rate": 3.571586879432624e-05, "loss": 1.1058, "step": 1290 }, { "epoch": 0.28617742002521507, "grad_norm": 0.52734375, "learning_rate": 3.570478723404255e-05, "loss": 1.1305, "step": 1291 }, { "epoch": 0.28639909114839496, "grad_norm": 0.578125, "learning_rate": 3.569370567375887e-05, "loss": 1.0595, "step": 1292 }, { "epoch": 0.28662076227157485, "grad_norm": 0.55078125, "learning_rate": 3.5682624113475176e-05, "loss": 1.1303, "step": 1293 }, { "epoch": 0.2868424333947547, "grad_norm": 0.5546875, "learning_rate": 3.567154255319149e-05, "loss": 1.1005, "step": 1294 }, { "epoch": 0.28706410451793457, "grad_norm": 0.55859375, "learning_rate": 3.56604609929078e-05, "loss": 1.1639, "step": 1295 }, { "epoch": 0.28728577564111446, "grad_norm": 0.55078125, "learning_rate": 3.564937943262411e-05, "loss": 1.0757, "step": 1296 }, { "epoch": 0.28750744676429435, "grad_norm": 0.5859375, "learning_rate": 3.563829787234043e-05, "loss": 1.1298, "step": 1297 }, { "epoch": 0.2877291178874742, "grad_norm": 0.5390625, "learning_rate": 3.562721631205674e-05, "loss": 1.0329, "step": 1298 }, { "epoch": 0.2879507890106541, "grad_norm": 0.53515625, "learning_rate": 3.561613475177305e-05, "loss": 1.0653, "step": 1299 }, { "epoch": 0.28817246013383396, "grad_norm": 0.5546875, "learning_rate": 3.560505319148936e-05, "loss": 1.1243, "step": 1300 }, { "epoch": 0.2883941312570138, "grad_norm": 0.53515625, "learning_rate": 3.559397163120567e-05, "loss": 0.9638, "step": 1301 }, { "epoch": 0.2886158023801937, "grad_norm": 0.5859375, "learning_rate": 3.558289007092199e-05, "loss": 1.1244, "step": 1302 }, { "epoch": 0.2888374735033736, "grad_norm": 0.53515625, "learning_rate": 3.55718085106383e-05, "loss": 1.0497, "step": 1303 }, { "epoch": 0.2890591446265534, "grad_norm": 0.55078125, "learning_rate": 3.556072695035461e-05, "loss": 1.1137, "step": 1304 }, { "epoch": 0.2892808157497333, "grad_norm": 0.55859375, "learning_rate": 3.554964539007092e-05, "loss": 1.1452, "step": 1305 }, { "epoch": 0.2895024868729132, "grad_norm": 0.55859375, "learning_rate": 3.553856382978724e-05, "loss": 1.0838, "step": 1306 }, { "epoch": 0.289724157996093, "grad_norm": 0.5390625, "learning_rate": 3.552748226950355e-05, "loss": 0.9608, "step": 1307 }, { "epoch": 0.2899458291192729, "grad_norm": 0.5625, "learning_rate": 3.551640070921986e-05, "loss": 1.0848, "step": 1308 }, { "epoch": 0.2901675002424528, "grad_norm": 0.57421875, "learning_rate": 3.5505319148936174e-05, "loss": 1.068, "step": 1309 }, { "epoch": 0.29038917136563264, "grad_norm": 0.56640625, "learning_rate": 3.549423758865248e-05, "loss": 1.1537, "step": 1310 }, { "epoch": 0.2906108424888125, "grad_norm": 0.58203125, "learning_rate": 3.54831560283688e-05, "loss": 1.1356, "step": 1311 }, { "epoch": 0.2908325136119924, "grad_norm": 0.5625, "learning_rate": 3.547207446808511e-05, "loss": 1.1489, "step": 1312 }, { "epoch": 0.2910541847351723, "grad_norm": 0.5625, "learning_rate": 3.546099290780142e-05, "loss": 1.1305, "step": 1313 }, { "epoch": 0.29127585585835214, "grad_norm": 0.53125, "learning_rate": 3.5449911347517734e-05, "loss": 1.0386, "step": 1314 }, { "epoch": 0.29149752698153203, "grad_norm": 0.5625, "learning_rate": 3.5438829787234044e-05, "loss": 1.0628, "step": 1315 }, { "epoch": 0.2917191981047119, "grad_norm": 0.51171875, "learning_rate": 3.542774822695036e-05, "loss": 1.0682, "step": 1316 }, { "epoch": 0.29194086922789175, "grad_norm": 0.5546875, "learning_rate": 3.541666666666667e-05, "loss": 1.0756, "step": 1317 }, { "epoch": 0.29216254035107164, "grad_norm": 0.58203125, "learning_rate": 3.5405585106382985e-05, "loss": 1.1178, "step": 1318 }, { "epoch": 0.29238421147425153, "grad_norm": 0.578125, "learning_rate": 3.5394503546099295e-05, "loss": 1.0562, "step": 1319 }, { "epoch": 0.29260588259743137, "grad_norm": 0.5625, "learning_rate": 3.5383421985815604e-05, "loss": 1.1028, "step": 1320 }, { "epoch": 0.29282755372061126, "grad_norm": 0.53515625, "learning_rate": 3.537234042553192e-05, "loss": 1.0474, "step": 1321 }, { "epoch": 0.29304922484379115, "grad_norm": 0.55078125, "learning_rate": 3.536125886524823e-05, "loss": 1.1136, "step": 1322 }, { "epoch": 0.293270895966971, "grad_norm": 0.5546875, "learning_rate": 3.535017730496454e-05, "loss": 1.0761, "step": 1323 }, { "epoch": 0.29349256709015087, "grad_norm": 0.55078125, "learning_rate": 3.533909574468085e-05, "loss": 1.1287, "step": 1324 }, { "epoch": 0.29371423821333076, "grad_norm": 0.5546875, "learning_rate": 3.5328014184397165e-05, "loss": 1.1799, "step": 1325 }, { "epoch": 0.2939359093365106, "grad_norm": 0.59765625, "learning_rate": 3.5316932624113474e-05, "loss": 1.1375, "step": 1326 }, { "epoch": 0.2941575804596905, "grad_norm": 0.57421875, "learning_rate": 3.530585106382979e-05, "loss": 1.1042, "step": 1327 }, { "epoch": 0.29437925158287037, "grad_norm": 0.56640625, "learning_rate": 3.52947695035461e-05, "loss": 1.1, "step": 1328 }, { "epoch": 0.29460092270605026, "grad_norm": 0.5390625, "learning_rate": 3.528368794326241e-05, "loss": 1.0854, "step": 1329 }, { "epoch": 0.2948225938292301, "grad_norm": 0.54296875, "learning_rate": 3.5272606382978725e-05, "loss": 1.0855, "step": 1330 }, { "epoch": 0.29504426495241, "grad_norm": 0.55859375, "learning_rate": 3.5261524822695034e-05, "loss": 0.9981, "step": 1331 }, { "epoch": 0.2952659360755899, "grad_norm": 0.5546875, "learning_rate": 3.525044326241135e-05, "loss": 1.0957, "step": 1332 }, { "epoch": 0.2954876071987697, "grad_norm": 0.5390625, "learning_rate": 3.523936170212766e-05, "loss": 0.9855, "step": 1333 }, { "epoch": 0.2957092783219496, "grad_norm": 0.58984375, "learning_rate": 3.522828014184397e-05, "loss": 1.1548, "step": 1334 }, { "epoch": 0.2959309494451295, "grad_norm": 0.58203125, "learning_rate": 3.5217198581560286e-05, "loss": 1.1388, "step": 1335 }, { "epoch": 0.2961526205683093, "grad_norm": 0.546875, "learning_rate": 3.5206117021276595e-05, "loss": 1.0787, "step": 1336 }, { "epoch": 0.2963742916914892, "grad_norm": 0.53515625, "learning_rate": 3.519503546099291e-05, "loss": 1.0577, "step": 1337 }, { "epoch": 0.2965959628146691, "grad_norm": 0.546875, "learning_rate": 3.518395390070922e-05, "loss": 1.0984, "step": 1338 }, { "epoch": 0.29681763393784893, "grad_norm": 0.57421875, "learning_rate": 3.517287234042554e-05, "loss": 1.1452, "step": 1339 }, { "epoch": 0.2970393050610288, "grad_norm": 0.56640625, "learning_rate": 3.5161790780141846e-05, "loss": 1.1263, "step": 1340 }, { "epoch": 0.2972609761842087, "grad_norm": 0.5546875, "learning_rate": 3.5150709219858155e-05, "loss": 0.9869, "step": 1341 }, { "epoch": 0.29748264730738855, "grad_norm": 0.59375, "learning_rate": 3.513962765957447e-05, "loss": 1.1955, "step": 1342 }, { "epoch": 0.29770431843056844, "grad_norm": 0.5546875, "learning_rate": 3.512854609929078e-05, "loss": 1.0492, "step": 1343 }, { "epoch": 0.2979259895537483, "grad_norm": 0.5625, "learning_rate": 3.51174645390071e-05, "loss": 1.1899, "step": 1344 }, { "epoch": 0.2981476606769282, "grad_norm": 0.5390625, "learning_rate": 3.5106382978723407e-05, "loss": 1.154, "step": 1345 }, { "epoch": 0.29836933180010805, "grad_norm": 0.6015625, "learning_rate": 3.5095301418439716e-05, "loss": 1.1196, "step": 1346 }, { "epoch": 0.29859100292328794, "grad_norm": 0.5546875, "learning_rate": 3.508421985815603e-05, "loss": 1.0363, "step": 1347 }, { "epoch": 0.29881267404646783, "grad_norm": 0.578125, "learning_rate": 3.507313829787234e-05, "loss": 1.0857, "step": 1348 }, { "epoch": 0.29903434516964766, "grad_norm": 0.5546875, "learning_rate": 3.506205673758866e-05, "loss": 1.0871, "step": 1349 }, { "epoch": 0.29925601629282755, "grad_norm": 0.58203125, "learning_rate": 3.505097517730497e-05, "loss": 1.087, "step": 1350 }, { "epoch": 0.29947768741600744, "grad_norm": 0.58203125, "learning_rate": 3.503989361702128e-05, "loss": 1.1664, "step": 1351 }, { "epoch": 0.2996993585391873, "grad_norm": 0.56640625, "learning_rate": 3.502881205673759e-05, "loss": 1.0135, "step": 1352 }, { "epoch": 0.29992102966236717, "grad_norm": 0.5234375, "learning_rate": 3.50177304964539e-05, "loss": 1.0404, "step": 1353 }, { "epoch": 0.30014270078554706, "grad_norm": 0.62109375, "learning_rate": 3.500664893617022e-05, "loss": 1.0943, "step": 1354 }, { "epoch": 0.3003643719087269, "grad_norm": 0.546875, "learning_rate": 3.499556737588653e-05, "loss": 1.2055, "step": 1355 }, { "epoch": 0.3005860430319068, "grad_norm": 0.53125, "learning_rate": 3.4984485815602844e-05, "loss": 1.0073, "step": 1356 }, { "epoch": 0.30080771415508667, "grad_norm": 0.55078125, "learning_rate": 3.497340425531915e-05, "loss": 1.1254, "step": 1357 }, { "epoch": 0.30102938527826656, "grad_norm": 0.578125, "learning_rate": 3.496232269503546e-05, "loss": 1.0797, "step": 1358 }, { "epoch": 0.3012510564014464, "grad_norm": 0.56640625, "learning_rate": 3.495124113475177e-05, "loss": 1.1263, "step": 1359 }, { "epoch": 0.3014727275246263, "grad_norm": 0.5703125, "learning_rate": 3.494015957446808e-05, "loss": 1.0726, "step": 1360 }, { "epoch": 0.3016943986478062, "grad_norm": 0.55859375, "learning_rate": 3.49290780141844e-05, "loss": 1.0998, "step": 1361 }, { "epoch": 0.301916069770986, "grad_norm": 0.5625, "learning_rate": 3.491799645390071e-05, "loss": 1.068, "step": 1362 }, { "epoch": 0.3021377408941659, "grad_norm": 0.5234375, "learning_rate": 3.490691489361702e-05, "loss": 0.9592, "step": 1363 }, { "epoch": 0.3023594120173458, "grad_norm": 0.53125, "learning_rate": 3.489583333333333e-05, "loss": 1.1263, "step": 1364 }, { "epoch": 0.3025810831405256, "grad_norm": 0.51953125, "learning_rate": 3.488475177304965e-05, "loss": 1.0686, "step": 1365 }, { "epoch": 0.3028027542637055, "grad_norm": 0.578125, "learning_rate": 3.487367021276596e-05, "loss": 1.2145, "step": 1366 }, { "epoch": 0.3030244253868854, "grad_norm": 0.53515625, "learning_rate": 3.486258865248227e-05, "loss": 1.0628, "step": 1367 }, { "epoch": 0.30324609651006523, "grad_norm": 0.5546875, "learning_rate": 3.485150709219858e-05, "loss": 1.1392, "step": 1368 }, { "epoch": 0.3034677676332451, "grad_norm": 0.625, "learning_rate": 3.484042553191489e-05, "loss": 1.0941, "step": 1369 }, { "epoch": 0.303689438756425, "grad_norm": 0.5546875, "learning_rate": 3.482934397163121e-05, "loss": 1.0868, "step": 1370 }, { "epoch": 0.30391110987960485, "grad_norm": 0.5546875, "learning_rate": 3.481826241134752e-05, "loss": 1.1303, "step": 1371 }, { "epoch": 0.30413278100278474, "grad_norm": 0.57421875, "learning_rate": 3.480718085106383e-05, "loss": 1.0939, "step": 1372 }, { "epoch": 0.3043544521259646, "grad_norm": 0.5546875, "learning_rate": 3.4796099290780144e-05, "loss": 1.1246, "step": 1373 }, { "epoch": 0.3045761232491445, "grad_norm": 0.59765625, "learning_rate": 3.478501773049645e-05, "loss": 1.0958, "step": 1374 }, { "epoch": 0.30479779437232435, "grad_norm": 0.5703125, "learning_rate": 3.477393617021277e-05, "loss": 1.0596, "step": 1375 }, { "epoch": 0.30501946549550424, "grad_norm": 0.5390625, "learning_rate": 3.476285460992908e-05, "loss": 1.0953, "step": 1376 }, { "epoch": 0.30524113661868413, "grad_norm": 0.54296875, "learning_rate": 3.4751773049645395e-05, "loss": 1.1289, "step": 1377 }, { "epoch": 0.30546280774186396, "grad_norm": 0.54296875, "learning_rate": 3.4740691489361704e-05, "loss": 1.0557, "step": 1378 }, { "epoch": 0.30568447886504385, "grad_norm": 0.546875, "learning_rate": 3.4729609929078014e-05, "loss": 1.0776, "step": 1379 }, { "epoch": 0.30590614998822374, "grad_norm": 0.54296875, "learning_rate": 3.471852836879433e-05, "loss": 1.0452, "step": 1380 }, { "epoch": 0.3061278211114036, "grad_norm": 0.515625, "learning_rate": 3.470744680851064e-05, "loss": 1.0615, "step": 1381 }, { "epoch": 0.30634949223458346, "grad_norm": 0.55078125, "learning_rate": 3.4696365248226955e-05, "loss": 1.1042, "step": 1382 }, { "epoch": 0.30657116335776335, "grad_norm": 0.53125, "learning_rate": 3.4685283687943265e-05, "loss": 0.9959, "step": 1383 }, { "epoch": 0.3067928344809432, "grad_norm": 0.5546875, "learning_rate": 3.4674202127659574e-05, "loss": 1.1255, "step": 1384 }, { "epoch": 0.3070145056041231, "grad_norm": 0.5546875, "learning_rate": 3.466312056737589e-05, "loss": 1.1289, "step": 1385 }, { "epoch": 0.30723617672730297, "grad_norm": 0.5546875, "learning_rate": 3.46520390070922e-05, "loss": 1.0697, "step": 1386 }, { "epoch": 0.3074578478504828, "grad_norm": 0.54296875, "learning_rate": 3.4640957446808516e-05, "loss": 1.0572, "step": 1387 }, { "epoch": 0.3076795189736627, "grad_norm": 0.546875, "learning_rate": 3.4629875886524825e-05, "loss": 1.1435, "step": 1388 }, { "epoch": 0.3079011900968426, "grad_norm": 0.56640625, "learning_rate": 3.461879432624114e-05, "loss": 1.1173, "step": 1389 }, { "epoch": 0.30812286122002247, "grad_norm": 0.55859375, "learning_rate": 3.460771276595745e-05, "loss": 1.1411, "step": 1390 }, { "epoch": 0.3083445323432023, "grad_norm": 0.55859375, "learning_rate": 3.459663120567376e-05, "loss": 1.0913, "step": 1391 }, { "epoch": 0.3085662034663822, "grad_norm": 0.58984375, "learning_rate": 3.4585549645390076e-05, "loss": 0.9846, "step": 1392 }, { "epoch": 0.3087878745895621, "grad_norm": 0.5703125, "learning_rate": 3.4574468085106386e-05, "loss": 1.0717, "step": 1393 }, { "epoch": 0.3090095457127419, "grad_norm": 0.55078125, "learning_rate": 3.4563386524822695e-05, "loss": 1.0619, "step": 1394 }, { "epoch": 0.3092312168359218, "grad_norm": 0.5859375, "learning_rate": 3.4552304964539004e-05, "loss": 1.2208, "step": 1395 }, { "epoch": 0.3094528879591017, "grad_norm": 0.64453125, "learning_rate": 3.454122340425532e-05, "loss": 1.051, "step": 1396 }, { "epoch": 0.30967455908228153, "grad_norm": 0.546875, "learning_rate": 3.453014184397163e-05, "loss": 1.0764, "step": 1397 }, { "epoch": 0.3098962302054614, "grad_norm": 0.56640625, "learning_rate": 3.4519060283687946e-05, "loss": 1.1092, "step": 1398 }, { "epoch": 0.3101179013286413, "grad_norm": 0.5546875, "learning_rate": 3.4507978723404256e-05, "loss": 1.1889, "step": 1399 }, { "epoch": 0.31033957245182114, "grad_norm": 0.55078125, "learning_rate": 3.4496897163120565e-05, "loss": 1.0299, "step": 1400 }, { "epoch": 0.31056124357500103, "grad_norm": 0.57421875, "learning_rate": 3.448581560283688e-05, "loss": 1.0247, "step": 1401 }, { "epoch": 0.3107829146981809, "grad_norm": 0.5703125, "learning_rate": 3.447473404255319e-05, "loss": 1.1406, "step": 1402 }, { "epoch": 0.31100458582136076, "grad_norm": 0.52734375, "learning_rate": 3.446365248226951e-05, "loss": 1.0548, "step": 1403 }, { "epoch": 0.31122625694454065, "grad_norm": 0.55859375, "learning_rate": 3.4452570921985816e-05, "loss": 1.0396, "step": 1404 }, { "epoch": 0.31144792806772054, "grad_norm": 0.5546875, "learning_rate": 3.4441489361702125e-05, "loss": 1.0848, "step": 1405 }, { "epoch": 0.3116695991909004, "grad_norm": 0.55859375, "learning_rate": 3.443040780141844e-05, "loss": 1.1112, "step": 1406 }, { "epoch": 0.31189127031408026, "grad_norm": 0.5546875, "learning_rate": 3.441932624113475e-05, "loss": 1.1291, "step": 1407 }, { "epoch": 0.31211294143726015, "grad_norm": 0.77734375, "learning_rate": 3.440824468085107e-05, "loss": 1.0963, "step": 1408 }, { "epoch": 0.31233461256044004, "grad_norm": 0.53125, "learning_rate": 3.4397163120567377e-05, "loss": 1.0796, "step": 1409 }, { "epoch": 0.3125562836836199, "grad_norm": 0.546875, "learning_rate": 3.438608156028369e-05, "loss": 1.2544, "step": 1410 }, { "epoch": 0.31277795480679976, "grad_norm": 0.55078125, "learning_rate": 3.4375e-05, "loss": 1.1028, "step": 1411 }, { "epoch": 0.31299962592997965, "grad_norm": 0.5859375, "learning_rate": 3.436391843971631e-05, "loss": 1.0567, "step": 1412 }, { "epoch": 0.3132212970531595, "grad_norm": 0.55078125, "learning_rate": 3.435283687943263e-05, "loss": 1.1137, "step": 1413 }, { "epoch": 0.3134429681763394, "grad_norm": 0.54296875, "learning_rate": 3.434175531914894e-05, "loss": 1.0383, "step": 1414 }, { "epoch": 0.31366463929951927, "grad_norm": 0.56640625, "learning_rate": 3.433067375886525e-05, "loss": 1.1001, "step": 1415 }, { "epoch": 0.3138863104226991, "grad_norm": 0.53515625, "learning_rate": 3.431959219858156e-05, "loss": 0.957, "step": 1416 }, { "epoch": 0.314107981545879, "grad_norm": 0.5625, "learning_rate": 3.430851063829787e-05, "loss": 1.1164, "step": 1417 }, { "epoch": 0.3143296526690589, "grad_norm": 0.52734375, "learning_rate": 3.429742907801419e-05, "loss": 1.0081, "step": 1418 }, { "epoch": 0.3145513237922387, "grad_norm": 0.55078125, "learning_rate": 3.42863475177305e-05, "loss": 1.1221, "step": 1419 }, { "epoch": 0.3147729949154186, "grad_norm": 0.55078125, "learning_rate": 3.4275265957446814e-05, "loss": 1.0767, "step": 1420 }, { "epoch": 0.3149946660385985, "grad_norm": 0.54296875, "learning_rate": 3.426418439716312e-05, "loss": 1.1139, "step": 1421 }, { "epoch": 0.3152163371617784, "grad_norm": 0.56640625, "learning_rate": 3.425310283687944e-05, "loss": 1.1211, "step": 1422 }, { "epoch": 0.3154380082849582, "grad_norm": 0.5546875, "learning_rate": 3.424202127659575e-05, "loss": 1.1362, "step": 1423 }, { "epoch": 0.3156596794081381, "grad_norm": 0.609375, "learning_rate": 3.423093971631206e-05, "loss": 1.1251, "step": 1424 }, { "epoch": 0.315881350531318, "grad_norm": 0.52734375, "learning_rate": 3.4219858156028374e-05, "loss": 1.0712, "step": 1425 }, { "epoch": 0.31610302165449783, "grad_norm": 0.5703125, "learning_rate": 3.4208776595744683e-05, "loss": 1.0999, "step": 1426 }, { "epoch": 0.3163246927776777, "grad_norm": 0.55078125, "learning_rate": 3.4197695035461e-05, "loss": 1.1378, "step": 1427 }, { "epoch": 0.3165463639008576, "grad_norm": 0.56640625, "learning_rate": 3.418661347517731e-05, "loss": 1.0925, "step": 1428 }, { "epoch": 0.31676803502403744, "grad_norm": 0.53515625, "learning_rate": 3.417553191489362e-05, "loss": 1.1403, "step": 1429 }, { "epoch": 0.31698970614721733, "grad_norm": 0.5390625, "learning_rate": 3.416445035460993e-05, "loss": 1.1108, "step": 1430 }, { "epoch": 0.3172113772703972, "grad_norm": 0.53515625, "learning_rate": 3.415336879432624e-05, "loss": 1.0718, "step": 1431 }, { "epoch": 0.31743304839357706, "grad_norm": 0.55859375, "learning_rate": 3.414228723404255e-05, "loss": 1.1259, "step": 1432 }, { "epoch": 0.31765471951675694, "grad_norm": 0.51171875, "learning_rate": 3.413120567375886e-05, "loss": 1.0405, "step": 1433 }, { "epoch": 0.31787639063993683, "grad_norm": 0.56640625, "learning_rate": 3.412012411347518e-05, "loss": 1.1395, "step": 1434 }, { "epoch": 0.31809806176311667, "grad_norm": 0.54296875, "learning_rate": 3.410904255319149e-05, "loss": 0.9944, "step": 1435 }, { "epoch": 0.31831973288629656, "grad_norm": 0.55078125, "learning_rate": 3.4097960992907804e-05, "loss": 1.0423, "step": 1436 }, { "epoch": 0.31854140400947645, "grad_norm": 0.578125, "learning_rate": 3.4086879432624114e-05, "loss": 1.0989, "step": 1437 }, { "epoch": 0.31876307513265634, "grad_norm": 0.52734375, "learning_rate": 3.407579787234042e-05, "loss": 1.0034, "step": 1438 }, { "epoch": 0.31898474625583617, "grad_norm": 0.57421875, "learning_rate": 3.406471631205674e-05, "loss": 1.1704, "step": 1439 }, { "epoch": 0.31920641737901606, "grad_norm": 0.5390625, "learning_rate": 3.405363475177305e-05, "loss": 1.0118, "step": 1440 }, { "epoch": 0.31942808850219595, "grad_norm": 0.55859375, "learning_rate": 3.4042553191489365e-05, "loss": 1.1827, "step": 1441 }, { "epoch": 0.3196497596253758, "grad_norm": 0.55078125, "learning_rate": 3.4031471631205674e-05, "loss": 1.1719, "step": 1442 }, { "epoch": 0.3198714307485557, "grad_norm": 0.5859375, "learning_rate": 3.4020390070921984e-05, "loss": 1.0437, "step": 1443 }, { "epoch": 0.32009310187173556, "grad_norm": 0.5390625, "learning_rate": 3.40093085106383e-05, "loss": 1.1285, "step": 1444 }, { "epoch": 0.3203147729949154, "grad_norm": 0.5859375, "learning_rate": 3.399822695035461e-05, "loss": 1.1695, "step": 1445 }, { "epoch": 0.3205364441180953, "grad_norm": 0.52734375, "learning_rate": 3.3987145390070925e-05, "loss": 0.994, "step": 1446 }, { "epoch": 0.3207581152412752, "grad_norm": 0.58203125, "learning_rate": 3.3976063829787235e-05, "loss": 1.1077, "step": 1447 }, { "epoch": 0.320979786364455, "grad_norm": 0.53125, "learning_rate": 3.396498226950355e-05, "loss": 1.06, "step": 1448 }, { "epoch": 0.3212014574876349, "grad_norm": 0.59765625, "learning_rate": 3.395390070921986e-05, "loss": 1.1334, "step": 1449 }, { "epoch": 0.3214231286108148, "grad_norm": 0.546875, "learning_rate": 3.394281914893617e-05, "loss": 1.0949, "step": 1450 }, { "epoch": 0.3216447997339947, "grad_norm": 0.546875, "learning_rate": 3.3931737588652486e-05, "loss": 1.0892, "step": 1451 }, { "epoch": 0.3218664708571745, "grad_norm": 0.57421875, "learning_rate": 3.3920656028368795e-05, "loss": 1.076, "step": 1452 }, { "epoch": 0.3220881419803544, "grad_norm": 0.5703125, "learning_rate": 3.390957446808511e-05, "loss": 1.0836, "step": 1453 }, { "epoch": 0.3223098131035343, "grad_norm": 0.53515625, "learning_rate": 3.389849290780142e-05, "loss": 1.0223, "step": 1454 }, { "epoch": 0.3225314842267141, "grad_norm": 0.5859375, "learning_rate": 3.388741134751773e-05, "loss": 1.1154, "step": 1455 }, { "epoch": 0.322753155349894, "grad_norm": 0.55078125, "learning_rate": 3.3876329787234046e-05, "loss": 1.0999, "step": 1456 }, { "epoch": 0.3229748264730739, "grad_norm": 0.59765625, "learning_rate": 3.3865248226950356e-05, "loss": 1.1948, "step": 1457 }, { "epoch": 0.32319649759625374, "grad_norm": 0.55078125, "learning_rate": 3.385416666666667e-05, "loss": 1.1271, "step": 1458 }, { "epoch": 0.32341816871943363, "grad_norm": 0.55859375, "learning_rate": 3.384308510638298e-05, "loss": 0.9928, "step": 1459 }, { "epoch": 0.3236398398426135, "grad_norm": 0.5390625, "learning_rate": 3.38320035460993e-05, "loss": 1.041, "step": 1460 }, { "epoch": 0.32386151096579335, "grad_norm": 0.5546875, "learning_rate": 3.382092198581561e-05, "loss": 1.1448, "step": 1461 }, { "epoch": 0.32408318208897324, "grad_norm": 0.52734375, "learning_rate": 3.3809840425531916e-05, "loss": 1.0546, "step": 1462 }, { "epoch": 0.32430485321215313, "grad_norm": 0.52734375, "learning_rate": 3.379875886524823e-05, "loss": 1.0823, "step": 1463 }, { "epoch": 0.32452652433533297, "grad_norm": 0.5859375, "learning_rate": 3.3787677304964535e-05, "loss": 1.0914, "step": 1464 }, { "epoch": 0.32474819545851286, "grad_norm": 0.5234375, "learning_rate": 3.377659574468085e-05, "loss": 1.0281, "step": 1465 }, { "epoch": 0.32496986658169275, "grad_norm": 0.56640625, "learning_rate": 3.376551418439716e-05, "loss": 1.0677, "step": 1466 }, { "epoch": 0.32519153770487264, "grad_norm": 0.53515625, "learning_rate": 3.375443262411348e-05, "loss": 1.0868, "step": 1467 }, { "epoch": 0.32541320882805247, "grad_norm": 0.55078125, "learning_rate": 3.3743351063829786e-05, "loss": 1.0291, "step": 1468 }, { "epoch": 0.32563487995123236, "grad_norm": 0.55078125, "learning_rate": 3.37322695035461e-05, "loss": 1.0937, "step": 1469 }, { "epoch": 0.32585655107441225, "grad_norm": 0.54296875, "learning_rate": 3.372118794326241e-05, "loss": 1.0602, "step": 1470 }, { "epoch": 0.3260782221975921, "grad_norm": 0.54296875, "learning_rate": 3.371010638297872e-05, "loss": 1.1038, "step": 1471 }, { "epoch": 0.32629989332077197, "grad_norm": 0.51171875, "learning_rate": 3.369902482269504e-05, "loss": 1.1045, "step": 1472 }, { "epoch": 0.32652156444395186, "grad_norm": 0.54296875, "learning_rate": 3.3687943262411347e-05, "loss": 1.1143, "step": 1473 }, { "epoch": 0.3267432355671317, "grad_norm": 0.5546875, "learning_rate": 3.367686170212766e-05, "loss": 1.0978, "step": 1474 }, { "epoch": 0.3269649066903116, "grad_norm": 0.53515625, "learning_rate": 3.366578014184397e-05, "loss": 1.0695, "step": 1475 }, { "epoch": 0.3271865778134915, "grad_norm": 0.54296875, "learning_rate": 3.365469858156028e-05, "loss": 1.0821, "step": 1476 }, { "epoch": 0.3274082489366713, "grad_norm": 0.578125, "learning_rate": 3.36436170212766e-05, "loss": 1.1628, "step": 1477 }, { "epoch": 0.3276299200598512, "grad_norm": 0.5390625, "learning_rate": 3.363253546099291e-05, "loss": 1.1144, "step": 1478 }, { "epoch": 0.3278515911830311, "grad_norm": 0.5390625, "learning_rate": 3.362145390070922e-05, "loss": 1.1236, "step": 1479 }, { "epoch": 0.3280732623062109, "grad_norm": 0.69140625, "learning_rate": 3.361037234042553e-05, "loss": 1.1065, "step": 1480 }, { "epoch": 0.3282949334293908, "grad_norm": 0.5625, "learning_rate": 3.359929078014185e-05, "loss": 1.1359, "step": 1481 }, { "epoch": 0.3285166045525707, "grad_norm": 0.53125, "learning_rate": 3.358820921985816e-05, "loss": 1.0794, "step": 1482 }, { "epoch": 0.3287382756757506, "grad_norm": 0.54296875, "learning_rate": 3.357712765957447e-05, "loss": 1.0407, "step": 1483 }, { "epoch": 0.3289599467989304, "grad_norm": 0.53125, "learning_rate": 3.3566046099290784e-05, "loss": 1.0956, "step": 1484 }, { "epoch": 0.3291816179221103, "grad_norm": 0.546875, "learning_rate": 3.355496453900709e-05, "loss": 1.0421, "step": 1485 }, { "epoch": 0.3294032890452902, "grad_norm": 0.54296875, "learning_rate": 3.354388297872341e-05, "loss": 1.0391, "step": 1486 }, { "epoch": 0.32962496016847004, "grad_norm": 0.53515625, "learning_rate": 3.353280141843972e-05, "loss": 1.0712, "step": 1487 }, { "epoch": 0.3298466312916499, "grad_norm": 0.546875, "learning_rate": 3.352171985815603e-05, "loss": 1.0603, "step": 1488 }, { "epoch": 0.3300683024148298, "grad_norm": 0.5703125, "learning_rate": 3.3510638297872344e-05, "loss": 1.0943, "step": 1489 }, { "epoch": 0.33028997353800965, "grad_norm": 0.5390625, "learning_rate": 3.3499556737588654e-05, "loss": 1.0167, "step": 1490 }, { "epoch": 0.33051164466118954, "grad_norm": 0.57421875, "learning_rate": 3.348847517730497e-05, "loss": 1.0814, "step": 1491 }, { "epoch": 0.33073331578436943, "grad_norm": 0.55078125, "learning_rate": 3.347739361702128e-05, "loss": 1.1148, "step": 1492 }, { "epoch": 0.33095498690754926, "grad_norm": 0.5390625, "learning_rate": 3.3466312056737595e-05, "loss": 1.1129, "step": 1493 }, { "epoch": 0.33117665803072915, "grad_norm": 0.5390625, "learning_rate": 3.3455230496453905e-05, "loss": 1.0362, "step": 1494 }, { "epoch": 0.33139832915390904, "grad_norm": 0.5703125, "learning_rate": 3.3444148936170214e-05, "loss": 1.1766, "step": 1495 }, { "epoch": 0.3316200002770889, "grad_norm": 0.5546875, "learning_rate": 3.343306737588653e-05, "loss": 1.0723, "step": 1496 }, { "epoch": 0.33184167140026877, "grad_norm": 0.53125, "learning_rate": 3.342198581560284e-05, "loss": 1.0164, "step": 1497 }, { "epoch": 0.33206334252344866, "grad_norm": 0.546875, "learning_rate": 3.3410904255319156e-05, "loss": 1.024, "step": 1498 }, { "epoch": 0.33228501364662855, "grad_norm": 0.54296875, "learning_rate": 3.3399822695035465e-05, "loss": 1.0563, "step": 1499 }, { "epoch": 0.3325066847698084, "grad_norm": 0.578125, "learning_rate": 3.3388741134751774e-05, "loss": 1.1396, "step": 1500 }, { "epoch": 0.33272835589298827, "grad_norm": 0.5703125, "learning_rate": 3.3377659574468084e-05, "loss": 1.1032, "step": 1501 }, { "epoch": 0.33295002701616816, "grad_norm": 0.57421875, "learning_rate": 3.336657801418439e-05, "loss": 1.0449, "step": 1502 }, { "epoch": 0.333171698139348, "grad_norm": 0.58203125, "learning_rate": 3.335549645390071e-05, "loss": 1.123, "step": 1503 }, { "epoch": 0.3333933692625279, "grad_norm": 0.5625, "learning_rate": 3.334441489361702e-05, "loss": 1.0854, "step": 1504 }, { "epoch": 0.3336150403857078, "grad_norm": 0.56640625, "learning_rate": 3.3333333333333335e-05, "loss": 1.0557, "step": 1505 }, { "epoch": 0.3338367115088876, "grad_norm": 0.59375, "learning_rate": 3.3322251773049644e-05, "loss": 1.1234, "step": 1506 }, { "epoch": 0.3340583826320675, "grad_norm": 0.53515625, "learning_rate": 3.331117021276596e-05, "loss": 1.05, "step": 1507 }, { "epoch": 0.3342800537552474, "grad_norm": 0.59375, "learning_rate": 3.330008865248227e-05, "loss": 1.0272, "step": 1508 }, { "epoch": 0.3345017248784272, "grad_norm": 0.578125, "learning_rate": 3.328900709219858e-05, "loss": 1.0746, "step": 1509 }, { "epoch": 0.3347233960016071, "grad_norm": 0.5625, "learning_rate": 3.3277925531914895e-05, "loss": 1.1145, "step": 1510 }, { "epoch": 0.334945067124787, "grad_norm": 0.51171875, "learning_rate": 3.3266843971631205e-05, "loss": 1.0484, "step": 1511 }, { "epoch": 0.33516673824796683, "grad_norm": 0.5390625, "learning_rate": 3.325576241134752e-05, "loss": 1.0511, "step": 1512 }, { "epoch": 0.3353884093711467, "grad_norm": 0.53515625, "learning_rate": 3.324468085106383e-05, "loss": 1.1089, "step": 1513 }, { "epoch": 0.3356100804943266, "grad_norm": 0.5703125, "learning_rate": 3.323359929078014e-05, "loss": 1.0795, "step": 1514 }, { "epoch": 0.3358317516175065, "grad_norm": 0.53125, "learning_rate": 3.3222517730496456e-05, "loss": 1.0315, "step": 1515 }, { "epoch": 0.33605342274068634, "grad_norm": 0.5390625, "learning_rate": 3.3211436170212765e-05, "loss": 1.0942, "step": 1516 }, { "epoch": 0.3362750938638662, "grad_norm": 0.55859375, "learning_rate": 3.320035460992908e-05, "loss": 1.1337, "step": 1517 }, { "epoch": 0.3364967649870461, "grad_norm": 0.59765625, "learning_rate": 3.318927304964539e-05, "loss": 1.0888, "step": 1518 }, { "epoch": 0.33671843611022595, "grad_norm": 0.54296875, "learning_rate": 3.317819148936171e-05, "loss": 1.0593, "step": 1519 }, { "epoch": 0.33694010723340584, "grad_norm": 0.5546875, "learning_rate": 3.3167109929078016e-05, "loss": 1.0034, "step": 1520 }, { "epoch": 0.33716177835658573, "grad_norm": 0.578125, "learning_rate": 3.3156028368794326e-05, "loss": 1.0265, "step": 1521 }, { "epoch": 0.33738344947976556, "grad_norm": 0.62109375, "learning_rate": 3.314494680851064e-05, "loss": 1.0661, "step": 1522 }, { "epoch": 0.33760512060294545, "grad_norm": 0.546875, "learning_rate": 3.313386524822695e-05, "loss": 1.0973, "step": 1523 }, { "epoch": 0.33782679172612534, "grad_norm": 0.57421875, "learning_rate": 3.312278368794327e-05, "loss": 1.1092, "step": 1524 }, { "epoch": 0.3380484628493052, "grad_norm": 0.60546875, "learning_rate": 3.311170212765958e-05, "loss": 1.0723, "step": 1525 }, { "epoch": 0.33827013397248507, "grad_norm": 0.57421875, "learning_rate": 3.3100620567375886e-05, "loss": 0.9582, "step": 1526 }, { "epoch": 0.33849180509566495, "grad_norm": 0.58203125, "learning_rate": 3.30895390070922e-05, "loss": 1.0228, "step": 1527 }, { "epoch": 0.3387134762188448, "grad_norm": 0.5390625, "learning_rate": 3.307845744680851e-05, "loss": 1.1238, "step": 1528 }, { "epoch": 0.3389351473420247, "grad_norm": 0.5859375, "learning_rate": 3.306737588652483e-05, "loss": 1.1411, "step": 1529 }, { "epoch": 0.33915681846520457, "grad_norm": 0.6015625, "learning_rate": 3.305629432624114e-05, "loss": 1.1203, "step": 1530 }, { "epoch": 0.33937848958838446, "grad_norm": 0.55078125, "learning_rate": 3.3045212765957453e-05, "loss": 1.0914, "step": 1531 }, { "epoch": 0.3396001607115643, "grad_norm": 0.53125, "learning_rate": 3.303413120567376e-05, "loss": 1.053, "step": 1532 }, { "epoch": 0.3398218318347442, "grad_norm": 0.52734375, "learning_rate": 3.302304964539007e-05, "loss": 1.0773, "step": 1533 }, { "epoch": 0.34004350295792407, "grad_norm": 0.546875, "learning_rate": 3.301196808510639e-05, "loss": 1.0552, "step": 1534 }, { "epoch": 0.3402651740811039, "grad_norm": 0.5703125, "learning_rate": 3.300088652482269e-05, "loss": 1.1184, "step": 1535 }, { "epoch": 0.3404868452042838, "grad_norm": 0.546875, "learning_rate": 3.298980496453901e-05, "loss": 1.1186, "step": 1536 }, { "epoch": 0.3407085163274637, "grad_norm": 0.53515625, "learning_rate": 3.2978723404255317e-05, "loss": 1.1059, "step": 1537 }, { "epoch": 0.3409301874506435, "grad_norm": 0.5625, "learning_rate": 3.296764184397163e-05, "loss": 1.0754, "step": 1538 }, { "epoch": 0.3411518585738234, "grad_norm": 0.5546875, "learning_rate": 3.295656028368794e-05, "loss": 1.0713, "step": 1539 }, { "epoch": 0.3413735296970033, "grad_norm": 0.5390625, "learning_rate": 3.294547872340426e-05, "loss": 1.0251, "step": 1540 }, { "epoch": 0.34159520082018313, "grad_norm": 0.5546875, "learning_rate": 3.293439716312057e-05, "loss": 1.0852, "step": 1541 }, { "epoch": 0.341816871943363, "grad_norm": 0.55859375, "learning_rate": 3.292331560283688e-05, "loss": 1.0642, "step": 1542 }, { "epoch": 0.3420385430665429, "grad_norm": 0.546875, "learning_rate": 3.291223404255319e-05, "loss": 1.0878, "step": 1543 }, { "epoch": 0.3422602141897228, "grad_norm": 0.53515625, "learning_rate": 3.29011524822695e-05, "loss": 1.0631, "step": 1544 }, { "epoch": 0.34248188531290263, "grad_norm": 0.53515625, "learning_rate": 3.289007092198582e-05, "loss": 0.9345, "step": 1545 }, { "epoch": 0.3427035564360825, "grad_norm": 0.55859375, "learning_rate": 3.287898936170213e-05, "loss": 1.0899, "step": 1546 }, { "epoch": 0.3429252275592624, "grad_norm": 0.55078125, "learning_rate": 3.286790780141844e-05, "loss": 1.0785, "step": 1547 }, { "epoch": 0.34314689868244225, "grad_norm": 0.5625, "learning_rate": 3.2856826241134754e-05, "loss": 1.1187, "step": 1548 }, { "epoch": 0.34336856980562214, "grad_norm": 0.54296875, "learning_rate": 3.284574468085106e-05, "loss": 1.0846, "step": 1549 }, { "epoch": 0.343590240928802, "grad_norm": 0.5390625, "learning_rate": 3.283466312056738e-05, "loss": 1.0402, "step": 1550 }, { "epoch": 0.34381191205198186, "grad_norm": 0.50390625, "learning_rate": 3.282358156028369e-05, "loss": 1.0193, "step": 1551 }, { "epoch": 0.34403358317516175, "grad_norm": 0.515625, "learning_rate": 3.2812500000000005e-05, "loss": 1.0553, "step": 1552 }, { "epoch": 0.34425525429834164, "grad_norm": 0.5390625, "learning_rate": 3.2801418439716314e-05, "loss": 1.0914, "step": 1553 }, { "epoch": 0.3444769254215215, "grad_norm": 0.5546875, "learning_rate": 3.2790336879432624e-05, "loss": 1.0298, "step": 1554 }, { "epoch": 0.34469859654470136, "grad_norm": 0.57421875, "learning_rate": 3.277925531914894e-05, "loss": 1.186, "step": 1555 }, { "epoch": 0.34492026766788125, "grad_norm": 0.53515625, "learning_rate": 3.276817375886525e-05, "loss": 1.0653, "step": 1556 }, { "epoch": 0.3451419387910611, "grad_norm": 0.5234375, "learning_rate": 3.2757092198581565e-05, "loss": 1.1442, "step": 1557 }, { "epoch": 0.345363609914241, "grad_norm": 0.54296875, "learning_rate": 3.2746010638297875e-05, "loss": 1.021, "step": 1558 }, { "epoch": 0.34558528103742087, "grad_norm": 0.52734375, "learning_rate": 3.2734929078014184e-05, "loss": 1.0735, "step": 1559 }, { "epoch": 0.34580695216060076, "grad_norm": 0.53515625, "learning_rate": 3.27238475177305e-05, "loss": 1.1056, "step": 1560 }, { "epoch": 0.3460286232837806, "grad_norm": 0.53515625, "learning_rate": 3.271276595744681e-05, "loss": 1.0752, "step": 1561 }, { "epoch": 0.3462502944069605, "grad_norm": 0.578125, "learning_rate": 3.2701684397163126e-05, "loss": 1.1119, "step": 1562 }, { "epoch": 0.34647196553014037, "grad_norm": 0.54296875, "learning_rate": 3.2690602836879435e-05, "loss": 0.9663, "step": 1563 }, { "epoch": 0.3466936366533202, "grad_norm": 0.5234375, "learning_rate": 3.267952127659575e-05, "loss": 1.1179, "step": 1564 }, { "epoch": 0.3469153077765001, "grad_norm": 0.546875, "learning_rate": 3.266843971631206e-05, "loss": 1.0582, "step": 1565 }, { "epoch": 0.34713697889968, "grad_norm": 0.546875, "learning_rate": 3.265735815602837e-05, "loss": 1.1623, "step": 1566 }, { "epoch": 0.3473586500228598, "grad_norm": 0.5546875, "learning_rate": 3.2646276595744686e-05, "loss": 1.0389, "step": 1567 }, { "epoch": 0.3475803211460397, "grad_norm": 0.53125, "learning_rate": 3.2635195035460996e-05, "loss": 1.0939, "step": 1568 }, { "epoch": 0.3478019922692196, "grad_norm": 0.5390625, "learning_rate": 3.262411347517731e-05, "loss": 0.9464, "step": 1569 }, { "epoch": 0.34802366339239943, "grad_norm": 0.5390625, "learning_rate": 3.2613031914893614e-05, "loss": 1.1019, "step": 1570 }, { "epoch": 0.3482453345155793, "grad_norm": 0.546875, "learning_rate": 3.260195035460993e-05, "loss": 1.1262, "step": 1571 }, { "epoch": 0.3484670056387592, "grad_norm": 0.57421875, "learning_rate": 3.259086879432624e-05, "loss": 1.139, "step": 1572 }, { "epoch": 0.34868867676193904, "grad_norm": 0.5546875, "learning_rate": 3.257978723404255e-05, "loss": 1.1178, "step": 1573 }, { "epoch": 0.34891034788511893, "grad_norm": 0.51953125, "learning_rate": 3.2568705673758865e-05, "loss": 0.9865, "step": 1574 }, { "epoch": 0.3491320190082988, "grad_norm": 0.56640625, "learning_rate": 3.2557624113475175e-05, "loss": 1.097, "step": 1575 }, { "epoch": 0.3493536901314787, "grad_norm": 0.54296875, "learning_rate": 3.254654255319149e-05, "loss": 1.0927, "step": 1576 }, { "epoch": 0.34957536125465855, "grad_norm": 0.56640625, "learning_rate": 3.25354609929078e-05, "loss": 1.04, "step": 1577 }, { "epoch": 0.34979703237783843, "grad_norm": 0.52734375, "learning_rate": 3.2524379432624117e-05, "loss": 1.0629, "step": 1578 }, { "epoch": 0.3500187035010183, "grad_norm": 0.58984375, "learning_rate": 3.2513297872340426e-05, "loss": 1.1684, "step": 1579 }, { "epoch": 0.35024037462419816, "grad_norm": 0.55859375, "learning_rate": 3.2502216312056735e-05, "loss": 1.1499, "step": 1580 }, { "epoch": 0.35046204574737805, "grad_norm": 0.54296875, "learning_rate": 3.249113475177305e-05, "loss": 1.1229, "step": 1581 }, { "epoch": 0.35068371687055794, "grad_norm": 0.52734375, "learning_rate": 3.248005319148936e-05, "loss": 1.0343, "step": 1582 }, { "epoch": 0.35090538799373777, "grad_norm": 0.54296875, "learning_rate": 3.246897163120568e-05, "loss": 1.0997, "step": 1583 }, { "epoch": 0.35112705911691766, "grad_norm": 0.58984375, "learning_rate": 3.2457890070921986e-05, "loss": 1.0885, "step": 1584 }, { "epoch": 0.35134873024009755, "grad_norm": 0.52734375, "learning_rate": 3.2446808510638296e-05, "loss": 1.0496, "step": 1585 }, { "epoch": 0.3515704013632774, "grad_norm": 0.54296875, "learning_rate": 3.243572695035461e-05, "loss": 1.132, "step": 1586 }, { "epoch": 0.3517920724864573, "grad_norm": 0.55859375, "learning_rate": 3.242464539007092e-05, "loss": 1.0325, "step": 1587 }, { "epoch": 0.35201374360963716, "grad_norm": 0.5546875, "learning_rate": 3.241356382978724e-05, "loss": 1.0356, "step": 1588 }, { "epoch": 0.352235414732817, "grad_norm": 0.55859375, "learning_rate": 3.240248226950355e-05, "loss": 1.1656, "step": 1589 }, { "epoch": 0.3524570858559969, "grad_norm": 0.5703125, "learning_rate": 3.239140070921986e-05, "loss": 1.0979, "step": 1590 }, { "epoch": 0.3526787569791768, "grad_norm": 0.58984375, "learning_rate": 3.238031914893617e-05, "loss": 1.1422, "step": 1591 }, { "epoch": 0.35290042810235667, "grad_norm": 0.52734375, "learning_rate": 3.236923758865248e-05, "loss": 1.1519, "step": 1592 }, { "epoch": 0.3531220992255365, "grad_norm": 0.52734375, "learning_rate": 3.23581560283688e-05, "loss": 1.0832, "step": 1593 }, { "epoch": 0.3533437703487164, "grad_norm": 0.53515625, "learning_rate": 3.234707446808511e-05, "loss": 1.1118, "step": 1594 }, { "epoch": 0.3535654414718963, "grad_norm": 0.5390625, "learning_rate": 3.2335992907801423e-05, "loss": 1.0889, "step": 1595 }, { "epoch": 0.3537871125950761, "grad_norm": 0.55859375, "learning_rate": 3.232491134751773e-05, "loss": 1.1411, "step": 1596 }, { "epoch": 0.354008783718256, "grad_norm": 0.55078125, "learning_rate": 3.231382978723405e-05, "loss": 1.0561, "step": 1597 }, { "epoch": 0.3542304548414359, "grad_norm": 0.51953125, "learning_rate": 3.230274822695036e-05, "loss": 1.0084, "step": 1598 }, { "epoch": 0.3544521259646157, "grad_norm": 0.5546875, "learning_rate": 3.229166666666667e-05, "loss": 1.0636, "step": 1599 }, { "epoch": 0.3546737970877956, "grad_norm": 0.57421875, "learning_rate": 3.2280585106382984e-05, "loss": 1.0465, "step": 1600 }, { "epoch": 0.3548954682109755, "grad_norm": 0.56640625, "learning_rate": 3.226950354609929e-05, "loss": 1.0414, "step": 1601 }, { "epoch": 0.35511713933415534, "grad_norm": 0.52734375, "learning_rate": 3.225842198581561e-05, "loss": 1.0434, "step": 1602 }, { "epoch": 0.35533881045733523, "grad_norm": 0.55078125, "learning_rate": 3.224734042553192e-05, "loss": 1.0777, "step": 1603 }, { "epoch": 0.3555604815805151, "grad_norm": 0.546875, "learning_rate": 3.223625886524823e-05, "loss": 1.073, "step": 1604 }, { "epoch": 0.35578215270369495, "grad_norm": 0.53515625, "learning_rate": 3.222517730496454e-05, "loss": 1.077, "step": 1605 }, { "epoch": 0.35600382382687484, "grad_norm": 0.5546875, "learning_rate": 3.221409574468085e-05, "loss": 0.9954, "step": 1606 }, { "epoch": 0.35622549495005473, "grad_norm": 0.53515625, "learning_rate": 3.220301418439716e-05, "loss": 1.1045, "step": 1607 }, { "epoch": 0.3564471660732346, "grad_norm": 0.57421875, "learning_rate": 3.219193262411347e-05, "loss": 1.2003, "step": 1608 }, { "epoch": 0.35666883719641446, "grad_norm": 0.5546875, "learning_rate": 3.218085106382979e-05, "loss": 1.0661, "step": 1609 }, { "epoch": 0.35689050831959435, "grad_norm": 0.55859375, "learning_rate": 3.21697695035461e-05, "loss": 1.0717, "step": 1610 }, { "epoch": 0.35711217944277424, "grad_norm": 0.54296875, "learning_rate": 3.2158687943262414e-05, "loss": 1.08, "step": 1611 }, { "epoch": 0.35733385056595407, "grad_norm": 0.546875, "learning_rate": 3.2147606382978724e-05, "loss": 1.0705, "step": 1612 }, { "epoch": 0.35755552168913396, "grad_norm": 0.55078125, "learning_rate": 3.213652482269503e-05, "loss": 1.0242, "step": 1613 }, { "epoch": 0.35777719281231385, "grad_norm": 0.56640625, "learning_rate": 3.212544326241135e-05, "loss": 0.9979, "step": 1614 }, { "epoch": 0.3579988639354937, "grad_norm": 0.5859375, "learning_rate": 3.211436170212766e-05, "loss": 1.0093, "step": 1615 }, { "epoch": 0.3582205350586736, "grad_norm": 0.54296875, "learning_rate": 3.2103280141843975e-05, "loss": 1.1504, "step": 1616 }, { "epoch": 0.35844220618185346, "grad_norm": 0.5703125, "learning_rate": 3.2092198581560284e-05, "loss": 1.1413, "step": 1617 }, { "epoch": 0.3586638773050333, "grad_norm": 0.60546875, "learning_rate": 3.2081117021276594e-05, "loss": 1.1193, "step": 1618 }, { "epoch": 0.3588855484282132, "grad_norm": 0.5625, "learning_rate": 3.207003546099291e-05, "loss": 1.1671, "step": 1619 }, { "epoch": 0.3591072195513931, "grad_norm": 0.55078125, "learning_rate": 3.205895390070922e-05, "loss": 0.9861, "step": 1620 }, { "epoch": 0.3593288906745729, "grad_norm": 0.58203125, "learning_rate": 3.2047872340425535e-05, "loss": 1.0758, "step": 1621 }, { "epoch": 0.3595505617977528, "grad_norm": 0.5546875, "learning_rate": 3.2036790780141845e-05, "loss": 1.0257, "step": 1622 }, { "epoch": 0.3597722329209327, "grad_norm": 0.54296875, "learning_rate": 3.202570921985816e-05, "loss": 1.0989, "step": 1623 }, { "epoch": 0.3599939040441126, "grad_norm": 0.53515625, "learning_rate": 3.201462765957447e-05, "loss": 1.0134, "step": 1624 }, { "epoch": 0.3602155751672924, "grad_norm": 0.578125, "learning_rate": 3.200354609929078e-05, "loss": 1.1305, "step": 1625 }, { "epoch": 0.3604372462904723, "grad_norm": 0.56640625, "learning_rate": 3.1992464539007096e-05, "loss": 1.0672, "step": 1626 }, { "epoch": 0.3606589174136522, "grad_norm": 0.5859375, "learning_rate": 3.1981382978723405e-05, "loss": 1.1593, "step": 1627 }, { "epoch": 0.360880588536832, "grad_norm": 0.5546875, "learning_rate": 3.197030141843972e-05, "loss": 1.0464, "step": 1628 }, { "epoch": 0.3611022596600119, "grad_norm": 0.54296875, "learning_rate": 3.195921985815603e-05, "loss": 1.1029, "step": 1629 }, { "epoch": 0.3613239307831918, "grad_norm": 0.55078125, "learning_rate": 3.194813829787234e-05, "loss": 1.1134, "step": 1630 }, { "epoch": 0.36154560190637164, "grad_norm": 0.55859375, "learning_rate": 3.1937056737588656e-05, "loss": 1.1543, "step": 1631 }, { "epoch": 0.36176727302955153, "grad_norm": 0.55078125, "learning_rate": 3.1925975177304966e-05, "loss": 1.0044, "step": 1632 }, { "epoch": 0.3619889441527314, "grad_norm": 0.5546875, "learning_rate": 3.191489361702128e-05, "loss": 1.0287, "step": 1633 }, { "epoch": 0.36221061527591125, "grad_norm": 0.55859375, "learning_rate": 3.190381205673759e-05, "loss": 1.0148, "step": 1634 }, { "epoch": 0.36243228639909114, "grad_norm": 0.5390625, "learning_rate": 3.189273049645391e-05, "loss": 1.0748, "step": 1635 }, { "epoch": 0.36265395752227103, "grad_norm": 0.55859375, "learning_rate": 3.188164893617022e-05, "loss": 1.1118, "step": 1636 }, { "epoch": 0.3628756286454509, "grad_norm": 0.55859375, "learning_rate": 3.1870567375886526e-05, "loss": 1.0047, "step": 1637 }, { "epoch": 0.36309729976863075, "grad_norm": 0.5625, "learning_rate": 3.185948581560284e-05, "loss": 1.0974, "step": 1638 }, { "epoch": 0.36331897089181064, "grad_norm": 0.55078125, "learning_rate": 3.184840425531915e-05, "loss": 1.1221, "step": 1639 }, { "epoch": 0.36354064201499053, "grad_norm": 0.5546875, "learning_rate": 3.183732269503547e-05, "loss": 1.0383, "step": 1640 }, { "epoch": 0.36376231313817037, "grad_norm": 0.55859375, "learning_rate": 3.182624113475177e-05, "loss": 1.1345, "step": 1641 }, { "epoch": 0.36398398426135026, "grad_norm": 0.55078125, "learning_rate": 3.1815159574468087e-05, "loss": 1.0928, "step": 1642 }, { "epoch": 0.36420565538453015, "grad_norm": 0.57421875, "learning_rate": 3.1804078014184396e-05, "loss": 1.0349, "step": 1643 }, { "epoch": 0.36442732650771, "grad_norm": 0.5625, "learning_rate": 3.1792996453900705e-05, "loss": 1.0732, "step": 1644 }, { "epoch": 0.36464899763088987, "grad_norm": 0.5859375, "learning_rate": 3.178191489361702e-05, "loss": 1.1783, "step": 1645 }, { "epoch": 0.36487066875406976, "grad_norm": 0.55078125, "learning_rate": 3.177083333333333e-05, "loss": 1.0512, "step": 1646 }, { "epoch": 0.3650923398772496, "grad_norm": 0.5546875, "learning_rate": 3.175975177304965e-05, "loss": 1.001, "step": 1647 }, { "epoch": 0.3653140110004295, "grad_norm": 0.55078125, "learning_rate": 3.1748670212765956e-05, "loss": 1.0833, "step": 1648 }, { "epoch": 0.3655356821236094, "grad_norm": 0.52734375, "learning_rate": 3.173758865248227e-05, "loss": 1.0549, "step": 1649 }, { "epoch": 0.3657573532467892, "grad_norm": 0.55078125, "learning_rate": 3.172650709219858e-05, "loss": 1.0534, "step": 1650 }, { "epoch": 0.3659790243699691, "grad_norm": 0.54296875, "learning_rate": 3.171542553191489e-05, "loss": 1.0991, "step": 1651 }, { "epoch": 0.366200695493149, "grad_norm": 0.5703125, "learning_rate": 3.170434397163121e-05, "loss": 1.0854, "step": 1652 }, { "epoch": 0.3664223666163289, "grad_norm": 0.56640625, "learning_rate": 3.169326241134752e-05, "loss": 1.0966, "step": 1653 }, { "epoch": 0.3666440377395087, "grad_norm": 0.56640625, "learning_rate": 3.168218085106383e-05, "loss": 1.1289, "step": 1654 }, { "epoch": 0.3668657088626886, "grad_norm": 0.61328125, "learning_rate": 3.167109929078014e-05, "loss": 1.0532, "step": 1655 }, { "epoch": 0.3670873799858685, "grad_norm": 0.5546875, "learning_rate": 3.166001773049645e-05, "loss": 1.1422, "step": 1656 }, { "epoch": 0.3673090511090483, "grad_norm": 0.53125, "learning_rate": 3.164893617021277e-05, "loss": 1.0124, "step": 1657 }, { "epoch": 0.3675307222322282, "grad_norm": 0.53515625, "learning_rate": 3.163785460992908e-05, "loss": 1.0219, "step": 1658 }, { "epoch": 0.3677523933554081, "grad_norm": 0.54296875, "learning_rate": 3.1626773049645393e-05, "loss": 1.0747, "step": 1659 }, { "epoch": 0.36797406447858794, "grad_norm": 0.53125, "learning_rate": 3.16156914893617e-05, "loss": 1.0201, "step": 1660 }, { "epoch": 0.3681957356017678, "grad_norm": 0.52734375, "learning_rate": 3.160460992907802e-05, "loss": 1.069, "step": 1661 }, { "epoch": 0.3684174067249477, "grad_norm": 0.56640625, "learning_rate": 3.159352836879433e-05, "loss": 1.0282, "step": 1662 }, { "epoch": 0.36863907784812755, "grad_norm": 0.578125, "learning_rate": 3.158244680851064e-05, "loss": 1.1187, "step": 1663 }, { "epoch": 0.36886074897130744, "grad_norm": 0.578125, "learning_rate": 3.1571365248226954e-05, "loss": 1.0578, "step": 1664 }, { "epoch": 0.36908242009448733, "grad_norm": 0.578125, "learning_rate": 3.156028368794326e-05, "loss": 1.0863, "step": 1665 }, { "epoch": 0.36930409121766716, "grad_norm": 0.5625, "learning_rate": 3.154920212765958e-05, "loss": 1.0361, "step": 1666 }, { "epoch": 0.36952576234084705, "grad_norm": 0.578125, "learning_rate": 3.153812056737589e-05, "loss": 1.0641, "step": 1667 }, { "epoch": 0.36974743346402694, "grad_norm": 0.56640625, "learning_rate": 3.1527039007092205e-05, "loss": 1.1084, "step": 1668 }, { "epoch": 0.36996910458720683, "grad_norm": 0.5546875, "learning_rate": 3.1515957446808514e-05, "loss": 1.1427, "step": 1669 }, { "epoch": 0.37019077571038667, "grad_norm": 0.59375, "learning_rate": 3.1504875886524824e-05, "loss": 1.0634, "step": 1670 }, { "epoch": 0.37041244683356656, "grad_norm": 0.55078125, "learning_rate": 3.149379432624114e-05, "loss": 1.0453, "step": 1671 }, { "epoch": 0.37063411795674644, "grad_norm": 0.54296875, "learning_rate": 3.148271276595745e-05, "loss": 1.0526, "step": 1672 }, { "epoch": 0.3708557890799263, "grad_norm": 0.5546875, "learning_rate": 3.1471631205673766e-05, "loss": 1.0758, "step": 1673 }, { "epoch": 0.37107746020310617, "grad_norm": 0.5625, "learning_rate": 3.1460549645390075e-05, "loss": 1.0775, "step": 1674 }, { "epoch": 0.37129913132628606, "grad_norm": 0.5625, "learning_rate": 3.1449468085106384e-05, "loss": 1.0894, "step": 1675 }, { "epoch": 0.3715208024494659, "grad_norm": 0.53125, "learning_rate": 3.1438386524822694e-05, "loss": 1.047, "step": 1676 }, { "epoch": 0.3717424735726458, "grad_norm": 0.57421875, "learning_rate": 3.1427304964539e-05, "loss": 1.1214, "step": 1677 }, { "epoch": 0.37196414469582567, "grad_norm": 0.55078125, "learning_rate": 3.141622340425532e-05, "loss": 1.1048, "step": 1678 }, { "epoch": 0.3721858158190055, "grad_norm": 0.625, "learning_rate": 3.140514184397163e-05, "loss": 1.1535, "step": 1679 }, { "epoch": 0.3724074869421854, "grad_norm": 0.578125, "learning_rate": 3.1394060283687945e-05, "loss": 1.0768, "step": 1680 }, { "epoch": 0.3726291580653653, "grad_norm": 0.515625, "learning_rate": 3.1382978723404254e-05, "loss": 0.9692, "step": 1681 }, { "epoch": 0.3728508291885451, "grad_norm": 0.5546875, "learning_rate": 3.137189716312057e-05, "loss": 1.1584, "step": 1682 }, { "epoch": 0.373072500311725, "grad_norm": 0.57421875, "learning_rate": 3.136081560283688e-05, "loss": 1.1067, "step": 1683 }, { "epoch": 0.3732941714349049, "grad_norm": 0.55078125, "learning_rate": 3.134973404255319e-05, "loss": 1.0451, "step": 1684 }, { "epoch": 0.3735158425580848, "grad_norm": 0.5234375, "learning_rate": 3.1338652482269505e-05, "loss": 1.0259, "step": 1685 }, { "epoch": 0.3737375136812646, "grad_norm": 0.53125, "learning_rate": 3.1327570921985815e-05, "loss": 1.0812, "step": 1686 }, { "epoch": 0.3739591848044445, "grad_norm": 0.63671875, "learning_rate": 3.131648936170213e-05, "loss": 1.0407, "step": 1687 }, { "epoch": 0.3741808559276244, "grad_norm": 0.61328125, "learning_rate": 3.130540780141844e-05, "loss": 1.0705, "step": 1688 }, { "epoch": 0.37440252705080423, "grad_norm": 0.54296875, "learning_rate": 3.129432624113475e-05, "loss": 1.0796, "step": 1689 }, { "epoch": 0.3746241981739841, "grad_norm": 0.546875, "learning_rate": 3.1283244680851066e-05, "loss": 1.0817, "step": 1690 }, { "epoch": 0.374845869297164, "grad_norm": 0.578125, "learning_rate": 3.1272163120567375e-05, "loss": 1.0526, "step": 1691 }, { "epoch": 0.37506754042034385, "grad_norm": 0.76953125, "learning_rate": 3.126108156028369e-05, "loss": 0.9534, "step": 1692 }, { "epoch": 0.37528921154352374, "grad_norm": 0.57421875, "learning_rate": 3.125e-05, "loss": 1.0196, "step": 1693 }, { "epoch": 0.3755108826667036, "grad_norm": 0.5546875, "learning_rate": 3.123891843971632e-05, "loss": 1.0907, "step": 1694 }, { "epoch": 0.37573255378988346, "grad_norm": 0.5625, "learning_rate": 3.1227836879432626e-05, "loss": 1.1134, "step": 1695 }, { "epoch": 0.37595422491306335, "grad_norm": 0.58984375, "learning_rate": 3.1216755319148936e-05, "loss": 1.1179, "step": 1696 }, { "epoch": 0.37617589603624324, "grad_norm": 0.55078125, "learning_rate": 3.120567375886525e-05, "loss": 1.1223, "step": 1697 }, { "epoch": 0.3763975671594231, "grad_norm": 0.58984375, "learning_rate": 3.119459219858156e-05, "loss": 1.1392, "step": 1698 }, { "epoch": 0.37661923828260296, "grad_norm": 0.53515625, "learning_rate": 3.118351063829788e-05, "loss": 1.0329, "step": 1699 }, { "epoch": 0.37684090940578285, "grad_norm": 0.53515625, "learning_rate": 3.117242907801419e-05, "loss": 1.0708, "step": 1700 }, { "epoch": 0.37706258052896274, "grad_norm": 0.5546875, "learning_rate": 3.1161347517730496e-05, "loss": 1.0748, "step": 1701 }, { "epoch": 0.3772842516521426, "grad_norm": 0.59375, "learning_rate": 3.115026595744681e-05, "loss": 1.1126, "step": 1702 }, { "epoch": 0.37750592277532247, "grad_norm": 0.546875, "learning_rate": 3.113918439716312e-05, "loss": 1.0668, "step": 1703 }, { "epoch": 0.37772759389850236, "grad_norm": 0.53515625, "learning_rate": 3.112810283687944e-05, "loss": 1.0325, "step": 1704 }, { "epoch": 0.3779492650216822, "grad_norm": 0.58203125, "learning_rate": 3.111702127659575e-05, "loss": 1.1104, "step": 1705 }, { "epoch": 0.3781709361448621, "grad_norm": 0.546875, "learning_rate": 3.110593971631206e-05, "loss": 1.0356, "step": 1706 }, { "epoch": 0.37839260726804197, "grad_norm": 0.5546875, "learning_rate": 3.109485815602837e-05, "loss": 1.0819, "step": 1707 }, { "epoch": 0.3786142783912218, "grad_norm": 0.59375, "learning_rate": 3.108377659574468e-05, "loss": 1.072, "step": 1708 }, { "epoch": 0.3788359495144017, "grad_norm": 0.5859375, "learning_rate": 3.1072695035461e-05, "loss": 1.075, "step": 1709 }, { "epoch": 0.3790576206375816, "grad_norm": 0.546875, "learning_rate": 3.106161347517731e-05, "loss": 1.1036, "step": 1710 }, { "epoch": 0.3792792917607614, "grad_norm": 0.55859375, "learning_rate": 3.105053191489362e-05, "loss": 1.1068, "step": 1711 }, { "epoch": 0.3795009628839413, "grad_norm": 0.57421875, "learning_rate": 3.1039450354609926e-05, "loss": 1.08, "step": 1712 }, { "epoch": 0.3797226340071212, "grad_norm": 0.53125, "learning_rate": 3.102836879432624e-05, "loss": 1.0237, "step": 1713 }, { "epoch": 0.37994430513030103, "grad_norm": 0.59375, "learning_rate": 3.101728723404255e-05, "loss": 1.1404, "step": 1714 }, { "epoch": 0.3801659762534809, "grad_norm": 0.56640625, "learning_rate": 3.100620567375886e-05, "loss": 1.0981, "step": 1715 }, { "epoch": 0.3803876473766608, "grad_norm": 0.546875, "learning_rate": 3.099512411347518e-05, "loss": 1.1042, "step": 1716 }, { "epoch": 0.3806093184998407, "grad_norm": 0.52734375, "learning_rate": 3.098404255319149e-05, "loss": 1.023, "step": 1717 }, { "epoch": 0.38083098962302053, "grad_norm": 0.55078125, "learning_rate": 3.09729609929078e-05, "loss": 1.1534, "step": 1718 }, { "epoch": 0.3810526607462004, "grad_norm": 0.53515625, "learning_rate": 3.096187943262411e-05, "loss": 1.1189, "step": 1719 }, { "epoch": 0.3812743318693803, "grad_norm": 0.61328125, "learning_rate": 3.095079787234043e-05, "loss": 1.0895, "step": 1720 }, { "epoch": 0.38149600299256015, "grad_norm": 0.56640625, "learning_rate": 3.093971631205674e-05, "loss": 1.0713, "step": 1721 }, { "epoch": 0.38171767411574004, "grad_norm": 0.60546875, "learning_rate": 3.092863475177305e-05, "loss": 1.0773, "step": 1722 }, { "epoch": 0.3819393452389199, "grad_norm": 0.5546875, "learning_rate": 3.0917553191489363e-05, "loss": 0.9962, "step": 1723 }, { "epoch": 0.38216101636209976, "grad_norm": 0.5703125, "learning_rate": 3.090647163120567e-05, "loss": 1.1071, "step": 1724 }, { "epoch": 0.38238268748527965, "grad_norm": 0.58203125, "learning_rate": 3.089539007092199e-05, "loss": 1.0851, "step": 1725 }, { "epoch": 0.38260435860845954, "grad_norm": 0.53515625, "learning_rate": 3.08843085106383e-05, "loss": 1.0309, "step": 1726 }, { "epoch": 0.3828260297316394, "grad_norm": 0.53125, "learning_rate": 3.0873226950354615e-05, "loss": 1.0888, "step": 1727 }, { "epoch": 0.38304770085481926, "grad_norm": 0.5546875, "learning_rate": 3.0862145390070924e-05, "loss": 1.0947, "step": 1728 }, { "epoch": 0.38326937197799915, "grad_norm": 0.56640625, "learning_rate": 3.085106382978723e-05, "loss": 1.0298, "step": 1729 }, { "epoch": 0.38349104310117904, "grad_norm": 0.546875, "learning_rate": 3.083998226950355e-05, "loss": 1.0819, "step": 1730 }, { "epoch": 0.3837127142243589, "grad_norm": 0.546875, "learning_rate": 3.082890070921986e-05, "loss": 1.0148, "step": 1731 }, { "epoch": 0.38393438534753876, "grad_norm": 0.578125, "learning_rate": 3.0817819148936175e-05, "loss": 1.1135, "step": 1732 }, { "epoch": 0.38415605647071865, "grad_norm": 0.5546875, "learning_rate": 3.0806737588652484e-05, "loss": 1.1013, "step": 1733 }, { "epoch": 0.3843777275938985, "grad_norm": 0.5390625, "learning_rate": 3.0795656028368794e-05, "loss": 1.0614, "step": 1734 }, { "epoch": 0.3845993987170784, "grad_norm": 0.5390625, "learning_rate": 3.078457446808511e-05, "loss": 1.062, "step": 1735 }, { "epoch": 0.38482106984025827, "grad_norm": 0.58984375, "learning_rate": 3.077349290780142e-05, "loss": 1.1123, "step": 1736 }, { "epoch": 0.3850427409634381, "grad_norm": 0.5390625, "learning_rate": 3.0762411347517736e-05, "loss": 1.0282, "step": 1737 }, { "epoch": 0.385264412086618, "grad_norm": 0.546875, "learning_rate": 3.0751329787234045e-05, "loss": 0.9849, "step": 1738 }, { "epoch": 0.3854860832097979, "grad_norm": 0.5625, "learning_rate": 3.074024822695036e-05, "loss": 1.0886, "step": 1739 }, { "epoch": 0.3857077543329777, "grad_norm": 0.53125, "learning_rate": 3.072916666666667e-05, "loss": 1.0374, "step": 1740 }, { "epoch": 0.3859294254561576, "grad_norm": 0.55078125, "learning_rate": 3.071808510638298e-05, "loss": 1.062, "step": 1741 }, { "epoch": 0.3861510965793375, "grad_norm": 0.5546875, "learning_rate": 3.0707003546099296e-05, "loss": 1.1657, "step": 1742 }, { "epoch": 0.38637276770251733, "grad_norm": 0.578125, "learning_rate": 3.0695921985815605e-05, "loss": 1.0489, "step": 1743 }, { "epoch": 0.3865944388256972, "grad_norm": 0.53515625, "learning_rate": 3.068484042553192e-05, "loss": 1.0536, "step": 1744 }, { "epoch": 0.3868161099488771, "grad_norm": 0.52734375, "learning_rate": 3.067375886524823e-05, "loss": 1.0023, "step": 1745 }, { "epoch": 0.387037781072057, "grad_norm": 0.53125, "learning_rate": 3.066267730496454e-05, "loss": 1.0318, "step": 1746 }, { "epoch": 0.38725945219523683, "grad_norm": 0.54296875, "learning_rate": 3.065159574468085e-05, "loss": 1.1275, "step": 1747 }, { "epoch": 0.3874811233184167, "grad_norm": 0.5390625, "learning_rate": 3.064051418439716e-05, "loss": 1.0775, "step": 1748 }, { "epoch": 0.3877027944415966, "grad_norm": 0.55859375, "learning_rate": 3.0629432624113475e-05, "loss": 1.0469, "step": 1749 }, { "epoch": 0.38792446556477644, "grad_norm": 0.5625, "learning_rate": 3.0618351063829785e-05, "loss": 1.2092, "step": 1750 }, { "epoch": 0.38814613668795633, "grad_norm": 0.55078125, "learning_rate": 3.06072695035461e-05, "loss": 1.1103, "step": 1751 }, { "epoch": 0.3883678078111362, "grad_norm": 0.53515625, "learning_rate": 3.059618794326241e-05, "loss": 1.0393, "step": 1752 }, { "epoch": 0.38858947893431606, "grad_norm": 0.515625, "learning_rate": 3.0585106382978726e-05, "loss": 0.9429, "step": 1753 }, { "epoch": 0.38881115005749595, "grad_norm": 0.5625, "learning_rate": 3.0574024822695036e-05, "loss": 1.0562, "step": 1754 }, { "epoch": 0.38903282118067584, "grad_norm": 0.59375, "learning_rate": 3.0562943262411345e-05, "loss": 1.0651, "step": 1755 }, { "epoch": 0.38925449230385567, "grad_norm": 0.55078125, "learning_rate": 3.055186170212766e-05, "loss": 1.1126, "step": 1756 }, { "epoch": 0.38947616342703556, "grad_norm": 0.55859375, "learning_rate": 3.054078014184397e-05, "loss": 1.055, "step": 1757 }, { "epoch": 0.38969783455021545, "grad_norm": 0.55078125, "learning_rate": 3.052969858156029e-05, "loss": 1.0916, "step": 1758 }, { "epoch": 0.3899195056733953, "grad_norm": 0.546875, "learning_rate": 3.0518617021276596e-05, "loss": 1.036, "step": 1759 }, { "epoch": 0.3901411767965752, "grad_norm": 0.5859375, "learning_rate": 3.050753546099291e-05, "loss": 1.1016, "step": 1760 }, { "epoch": 0.39036284791975506, "grad_norm": 0.5546875, "learning_rate": 3.0496453900709222e-05, "loss": 1.0466, "step": 1761 }, { "epoch": 0.39058451904293495, "grad_norm": 0.53515625, "learning_rate": 3.0485372340425535e-05, "loss": 1.0381, "step": 1762 }, { "epoch": 0.3908061901661148, "grad_norm": 0.55859375, "learning_rate": 3.0474290780141844e-05, "loss": 1.0523, "step": 1763 }, { "epoch": 0.3910278612892947, "grad_norm": 0.546875, "learning_rate": 3.0463209219858157e-05, "loss": 1.0353, "step": 1764 }, { "epoch": 0.39124953241247457, "grad_norm": 0.5234375, "learning_rate": 3.045212765957447e-05, "loss": 0.9952, "step": 1765 }, { "epoch": 0.3914712035356544, "grad_norm": 0.51171875, "learning_rate": 3.0441046099290782e-05, "loss": 1.1088, "step": 1766 }, { "epoch": 0.3916928746588343, "grad_norm": 0.5390625, "learning_rate": 3.0429964539007095e-05, "loss": 1.036, "step": 1767 }, { "epoch": 0.3919145457820142, "grad_norm": 0.55859375, "learning_rate": 3.0418882978723408e-05, "loss": 1.0703, "step": 1768 }, { "epoch": 0.392136216905194, "grad_norm": 0.5546875, "learning_rate": 3.0407801418439717e-05, "loss": 1.1346, "step": 1769 }, { "epoch": 0.3923578880283739, "grad_norm": 0.55078125, "learning_rate": 3.039671985815603e-05, "loss": 1.129, "step": 1770 }, { "epoch": 0.3925795591515538, "grad_norm": 0.51953125, "learning_rate": 3.0385638297872343e-05, "loss": 1.0168, "step": 1771 }, { "epoch": 0.3928012302747336, "grad_norm": 0.546875, "learning_rate": 3.0374556737588655e-05, "loss": 1.087, "step": 1772 }, { "epoch": 0.3930229013979135, "grad_norm": 0.5625, "learning_rate": 3.0363475177304968e-05, "loss": 1.1662, "step": 1773 }, { "epoch": 0.3932445725210934, "grad_norm": 0.53125, "learning_rate": 3.035239361702128e-05, "loss": 1.0613, "step": 1774 }, { "epoch": 0.39346624364427324, "grad_norm": 0.70703125, "learning_rate": 3.034131205673759e-05, "loss": 1.0528, "step": 1775 }, { "epoch": 0.39368791476745313, "grad_norm": 0.53125, "learning_rate": 3.0330230496453903e-05, "loss": 1.0366, "step": 1776 }, { "epoch": 0.393909585890633, "grad_norm": 0.5625, "learning_rate": 3.0319148936170216e-05, "loss": 1.0617, "step": 1777 }, { "epoch": 0.3941312570138129, "grad_norm": 0.546875, "learning_rate": 3.030806737588653e-05, "loss": 1.0617, "step": 1778 }, { "epoch": 0.39435292813699274, "grad_norm": 0.53125, "learning_rate": 3.029698581560284e-05, "loss": 1.0763, "step": 1779 }, { "epoch": 0.39457459926017263, "grad_norm": 0.5390625, "learning_rate": 3.0285904255319154e-05, "loss": 1.0702, "step": 1780 }, { "epoch": 0.3947962703833525, "grad_norm": 0.5625, "learning_rate": 3.0274822695035464e-05, "loss": 1.1691, "step": 1781 }, { "epoch": 0.39501794150653236, "grad_norm": 0.55859375, "learning_rate": 3.0263741134751773e-05, "loss": 1.0757, "step": 1782 }, { "epoch": 0.39523961262971224, "grad_norm": 0.60546875, "learning_rate": 3.0252659574468082e-05, "loss": 1.1711, "step": 1783 }, { "epoch": 0.39546128375289213, "grad_norm": 0.515625, "learning_rate": 3.0241578014184395e-05, "loss": 1.0053, "step": 1784 }, { "epoch": 0.39568295487607197, "grad_norm": 0.62109375, "learning_rate": 3.0230496453900708e-05, "loss": 1.0726, "step": 1785 }, { "epoch": 0.39590462599925186, "grad_norm": 0.59765625, "learning_rate": 3.021941489361702e-05, "loss": 1.1356, "step": 1786 }, { "epoch": 0.39612629712243175, "grad_norm": 0.5546875, "learning_rate": 3.0208333333333334e-05, "loss": 1.1148, "step": 1787 }, { "epoch": 0.3963479682456116, "grad_norm": 0.54296875, "learning_rate": 3.0197251773049646e-05, "loss": 1.0126, "step": 1788 }, { "epoch": 0.39656963936879147, "grad_norm": 0.53125, "learning_rate": 3.0186170212765956e-05, "loss": 1.0889, "step": 1789 }, { "epoch": 0.39679131049197136, "grad_norm": 0.5546875, "learning_rate": 3.017508865248227e-05, "loss": 1.0787, "step": 1790 }, { "epoch": 0.3970129816151512, "grad_norm": 0.55859375, "learning_rate": 3.016400709219858e-05, "loss": 1.1008, "step": 1791 }, { "epoch": 0.3972346527383311, "grad_norm": 0.53515625, "learning_rate": 3.0152925531914894e-05, "loss": 1.0729, "step": 1792 }, { "epoch": 0.397456323861511, "grad_norm": 0.54296875, "learning_rate": 3.0141843971631207e-05, "loss": 1.0201, "step": 1793 }, { "epoch": 0.39767799498469086, "grad_norm": 0.56640625, "learning_rate": 3.013076241134752e-05, "loss": 1.1127, "step": 1794 }, { "epoch": 0.3978996661078707, "grad_norm": 0.546875, "learning_rate": 3.0119680851063832e-05, "loss": 1.1101, "step": 1795 }, { "epoch": 0.3981213372310506, "grad_norm": 0.54296875, "learning_rate": 3.010859929078014e-05, "loss": 1.0619, "step": 1796 }, { "epoch": 0.3983430083542305, "grad_norm": 0.546875, "learning_rate": 3.0097517730496454e-05, "loss": 1.1626, "step": 1797 }, { "epoch": 0.3985646794774103, "grad_norm": 0.5703125, "learning_rate": 3.0086436170212767e-05, "loss": 1.1334, "step": 1798 }, { "epoch": 0.3987863506005902, "grad_norm": 0.56640625, "learning_rate": 3.007535460992908e-05, "loss": 1.0825, "step": 1799 }, { "epoch": 0.3990080217237701, "grad_norm": 0.5703125, "learning_rate": 3.0064273049645393e-05, "loss": 1.0454, "step": 1800 }, { "epoch": 0.3992296928469499, "grad_norm": 0.6015625, "learning_rate": 3.0053191489361706e-05, "loss": 1.2032, "step": 1801 }, { "epoch": 0.3994513639701298, "grad_norm": 0.53515625, "learning_rate": 3.0042109929078015e-05, "loss": 1.0541, "step": 1802 }, { "epoch": 0.3996730350933097, "grad_norm": 0.54296875, "learning_rate": 3.0031028368794328e-05, "loss": 1.0506, "step": 1803 }, { "epoch": 0.39989470621648954, "grad_norm": 0.55078125, "learning_rate": 3.001994680851064e-05, "loss": 1.0646, "step": 1804 }, { "epoch": 0.4001163773396694, "grad_norm": 0.53515625, "learning_rate": 3.0008865248226953e-05, "loss": 1.0764, "step": 1805 }, { "epoch": 0.4003380484628493, "grad_norm": 0.52734375, "learning_rate": 2.9997783687943266e-05, "loss": 1.0272, "step": 1806 }, { "epoch": 0.40055971958602915, "grad_norm": 0.5625, "learning_rate": 2.998670212765958e-05, "loss": 1.0775, "step": 1807 }, { "epoch": 0.40078139070920904, "grad_norm": 0.546875, "learning_rate": 2.9975620567375888e-05, "loss": 1.1488, "step": 1808 }, { "epoch": 0.40100306183238893, "grad_norm": 0.5703125, "learning_rate": 2.99645390070922e-05, "loss": 1.0818, "step": 1809 }, { "epoch": 0.4012247329555688, "grad_norm": 0.56640625, "learning_rate": 2.9953457446808514e-05, "loss": 1.0341, "step": 1810 }, { "epoch": 0.40144640407874865, "grad_norm": 0.53125, "learning_rate": 2.9942375886524826e-05, "loss": 1.11, "step": 1811 }, { "epoch": 0.40166807520192854, "grad_norm": 0.5546875, "learning_rate": 2.993129432624114e-05, "loss": 1.0788, "step": 1812 }, { "epoch": 0.40188974632510843, "grad_norm": 0.56640625, "learning_rate": 2.9920212765957452e-05, "loss": 1.034, "step": 1813 }, { "epoch": 0.40211141744828827, "grad_norm": 0.5859375, "learning_rate": 2.990913120567376e-05, "loss": 1.0284, "step": 1814 }, { "epoch": 0.40233308857146816, "grad_norm": 0.58203125, "learning_rate": 2.9898049645390074e-05, "loss": 1.1933, "step": 1815 }, { "epoch": 0.40255475969464805, "grad_norm": 0.546875, "learning_rate": 2.9886968085106387e-05, "loss": 1.1094, "step": 1816 }, { "epoch": 0.4027764308178279, "grad_norm": 0.5703125, "learning_rate": 2.9875886524822693e-05, "loss": 1.1624, "step": 1817 }, { "epoch": 0.40299810194100777, "grad_norm": 0.57421875, "learning_rate": 2.9864804964539006e-05, "loss": 1.1233, "step": 1818 }, { "epoch": 0.40321977306418766, "grad_norm": 0.51171875, "learning_rate": 2.985372340425532e-05, "loss": 1.0178, "step": 1819 }, { "epoch": 0.4034414441873675, "grad_norm": 0.53515625, "learning_rate": 2.984264184397163e-05, "loss": 1.0607, "step": 1820 }, { "epoch": 0.4036631153105474, "grad_norm": 0.57421875, "learning_rate": 2.9831560283687944e-05, "loss": 1.0593, "step": 1821 }, { "epoch": 0.40388478643372727, "grad_norm": 0.56640625, "learning_rate": 2.9820478723404253e-05, "loss": 1.1032, "step": 1822 }, { "epoch": 0.4041064575569071, "grad_norm": 0.54296875, "learning_rate": 2.9809397163120566e-05, "loss": 1.0632, "step": 1823 }, { "epoch": 0.404328128680087, "grad_norm": 0.53515625, "learning_rate": 2.979831560283688e-05, "loss": 1.0456, "step": 1824 }, { "epoch": 0.4045497998032669, "grad_norm": 0.57421875, "learning_rate": 2.9787234042553192e-05, "loss": 1.1357, "step": 1825 }, { "epoch": 0.4047714709264468, "grad_norm": 0.54296875, "learning_rate": 2.9776152482269505e-05, "loss": 1.0561, "step": 1826 }, { "epoch": 0.4049931420496266, "grad_norm": 0.5390625, "learning_rate": 2.9765070921985817e-05, "loss": 1.0303, "step": 1827 }, { "epoch": 0.4052148131728065, "grad_norm": 0.52734375, "learning_rate": 2.9753989361702127e-05, "loss": 1.0232, "step": 1828 }, { "epoch": 0.4054364842959864, "grad_norm": 0.55859375, "learning_rate": 2.974290780141844e-05, "loss": 1.0504, "step": 1829 }, { "epoch": 0.4056581554191662, "grad_norm": 0.5703125, "learning_rate": 2.9731826241134752e-05, "loss": 1.1879, "step": 1830 }, { "epoch": 0.4058798265423461, "grad_norm": 0.57421875, "learning_rate": 2.9720744680851065e-05, "loss": 1.0619, "step": 1831 }, { "epoch": 0.406101497665526, "grad_norm": 0.54296875, "learning_rate": 2.9709663120567378e-05, "loss": 1.0791, "step": 1832 }, { "epoch": 0.40632316878870584, "grad_norm": 0.55078125, "learning_rate": 2.969858156028369e-05, "loss": 1.0517, "step": 1833 }, { "epoch": 0.4065448399118857, "grad_norm": 0.52734375, "learning_rate": 2.96875e-05, "loss": 1.0723, "step": 1834 }, { "epoch": 0.4067665110350656, "grad_norm": 0.53515625, "learning_rate": 2.9676418439716313e-05, "loss": 1.0544, "step": 1835 }, { "epoch": 0.40698818215824545, "grad_norm": 0.52734375, "learning_rate": 2.9665336879432625e-05, "loss": 1.0495, "step": 1836 }, { "epoch": 0.40720985328142534, "grad_norm": 0.53515625, "learning_rate": 2.9654255319148938e-05, "loss": 1.0713, "step": 1837 }, { "epoch": 0.4074315244046052, "grad_norm": 0.5625, "learning_rate": 2.964317375886525e-05, "loss": 1.0693, "step": 1838 }, { "epoch": 0.4076531955277851, "grad_norm": 0.55078125, "learning_rate": 2.9632092198581564e-05, "loss": 1.1322, "step": 1839 }, { "epoch": 0.40787486665096495, "grad_norm": 0.5390625, "learning_rate": 2.9621010638297873e-05, "loss": 1.0671, "step": 1840 }, { "epoch": 0.40809653777414484, "grad_norm": 0.5234375, "learning_rate": 2.9609929078014186e-05, "loss": 1.0125, "step": 1841 }, { "epoch": 0.40831820889732473, "grad_norm": 0.51171875, "learning_rate": 2.95988475177305e-05, "loss": 0.989, "step": 1842 }, { "epoch": 0.40853988002050456, "grad_norm": 0.5234375, "learning_rate": 2.958776595744681e-05, "loss": 1.0858, "step": 1843 }, { "epoch": 0.40876155114368445, "grad_norm": 0.5703125, "learning_rate": 2.9576684397163124e-05, "loss": 1.1117, "step": 1844 }, { "epoch": 0.40898322226686434, "grad_norm": 0.52734375, "learning_rate": 2.9565602836879437e-05, "loss": 1.0709, "step": 1845 }, { "epoch": 0.4092048933900442, "grad_norm": 0.57421875, "learning_rate": 2.9554521276595746e-05, "loss": 1.0727, "step": 1846 }, { "epoch": 0.40942656451322407, "grad_norm": 0.67578125, "learning_rate": 2.954343971631206e-05, "loss": 1.0599, "step": 1847 }, { "epoch": 0.40964823563640396, "grad_norm": 0.5703125, "learning_rate": 2.9532358156028372e-05, "loss": 1.0241, "step": 1848 }, { "epoch": 0.4098699067595838, "grad_norm": 0.5546875, "learning_rate": 2.9521276595744685e-05, "loss": 1.0801, "step": 1849 }, { "epoch": 0.4100915778827637, "grad_norm": 0.5546875, "learning_rate": 2.9510195035460998e-05, "loss": 1.0297, "step": 1850 }, { "epoch": 0.41031324900594357, "grad_norm": 0.5390625, "learning_rate": 2.949911347517731e-05, "loss": 1.1335, "step": 1851 }, { "epoch": 0.4105349201291234, "grad_norm": 0.5234375, "learning_rate": 2.9488031914893616e-05, "loss": 1.0155, "step": 1852 }, { "epoch": 0.4107565912523033, "grad_norm": 0.53125, "learning_rate": 2.947695035460993e-05, "loss": 1.1065, "step": 1853 }, { "epoch": 0.4109782623754832, "grad_norm": 0.57421875, "learning_rate": 2.946586879432624e-05, "loss": 1.027, "step": 1854 }, { "epoch": 0.4111999334986631, "grad_norm": 0.55078125, "learning_rate": 2.945478723404255e-05, "loss": 1.0472, "step": 1855 }, { "epoch": 0.4114216046218429, "grad_norm": 0.5625, "learning_rate": 2.9443705673758864e-05, "loss": 1.079, "step": 1856 }, { "epoch": 0.4116432757450228, "grad_norm": 0.5390625, "learning_rate": 2.9432624113475177e-05, "loss": 1.044, "step": 1857 }, { "epoch": 0.4118649468682027, "grad_norm": 0.53125, "learning_rate": 2.942154255319149e-05, "loss": 1.0743, "step": 1858 }, { "epoch": 0.4120866179913825, "grad_norm": 0.53125, "learning_rate": 2.9410460992907802e-05, "loss": 1.0247, "step": 1859 }, { "epoch": 0.4123082891145624, "grad_norm": 0.5390625, "learning_rate": 2.9399379432624115e-05, "loss": 1.1155, "step": 1860 }, { "epoch": 0.4125299602377423, "grad_norm": 0.57421875, "learning_rate": 2.9388297872340424e-05, "loss": 1.15, "step": 1861 }, { "epoch": 0.41275163136092213, "grad_norm": 0.53515625, "learning_rate": 2.9377216312056737e-05, "loss": 1.0699, "step": 1862 }, { "epoch": 0.412973302484102, "grad_norm": 0.515625, "learning_rate": 2.936613475177305e-05, "loss": 1.0271, "step": 1863 }, { "epoch": 0.4131949736072819, "grad_norm": 0.578125, "learning_rate": 2.9355053191489363e-05, "loss": 1.0606, "step": 1864 }, { "epoch": 0.41341664473046175, "grad_norm": 0.5390625, "learning_rate": 2.9343971631205676e-05, "loss": 0.9832, "step": 1865 }, { "epoch": 0.41363831585364164, "grad_norm": 0.546875, "learning_rate": 2.933289007092199e-05, "loss": 1.0075, "step": 1866 }, { "epoch": 0.4138599869768215, "grad_norm": 0.54296875, "learning_rate": 2.9321808510638298e-05, "loss": 1.0495, "step": 1867 }, { "epoch": 0.41408165810000136, "grad_norm": 0.5546875, "learning_rate": 2.931072695035461e-05, "loss": 1.0246, "step": 1868 }, { "epoch": 0.41430332922318125, "grad_norm": 0.546875, "learning_rate": 2.9299645390070923e-05, "loss": 1.0661, "step": 1869 }, { "epoch": 0.41452500034636114, "grad_norm": 0.62890625, "learning_rate": 2.9288563829787236e-05, "loss": 1.0278, "step": 1870 }, { "epoch": 0.41474667146954103, "grad_norm": 0.546875, "learning_rate": 2.927748226950355e-05, "loss": 1.0379, "step": 1871 }, { "epoch": 0.41496834259272086, "grad_norm": 0.55078125, "learning_rate": 2.926640070921986e-05, "loss": 1.1128, "step": 1872 }, { "epoch": 0.41519001371590075, "grad_norm": 0.52734375, "learning_rate": 2.925531914893617e-05, "loss": 1.0835, "step": 1873 }, { "epoch": 0.41541168483908064, "grad_norm": 0.58984375, "learning_rate": 2.9244237588652484e-05, "loss": 0.9935, "step": 1874 }, { "epoch": 0.4156333559622605, "grad_norm": 0.5390625, "learning_rate": 2.9233156028368797e-05, "loss": 0.9913, "step": 1875 }, { "epoch": 0.41585502708544037, "grad_norm": 0.5390625, "learning_rate": 2.922207446808511e-05, "loss": 1.1606, "step": 1876 }, { "epoch": 0.41607669820862025, "grad_norm": 0.60546875, "learning_rate": 2.9210992907801422e-05, "loss": 1.0589, "step": 1877 }, { "epoch": 0.4162983693318001, "grad_norm": 0.546875, "learning_rate": 2.9199911347517735e-05, "loss": 1.1098, "step": 1878 }, { "epoch": 0.41652004045498, "grad_norm": 0.51953125, "learning_rate": 2.9188829787234044e-05, "loss": 1.096, "step": 1879 }, { "epoch": 0.41674171157815987, "grad_norm": 0.54296875, "learning_rate": 2.9177748226950357e-05, "loss": 1.0453, "step": 1880 }, { "epoch": 0.4169633827013397, "grad_norm": 0.5546875, "learning_rate": 2.916666666666667e-05, "loss": 1.129, "step": 1881 }, { "epoch": 0.4171850538245196, "grad_norm": 0.53515625, "learning_rate": 2.9155585106382983e-05, "loss": 1.0894, "step": 1882 }, { "epoch": 0.4174067249476995, "grad_norm": 0.57421875, "learning_rate": 2.9144503546099295e-05, "loss": 1.0665, "step": 1883 }, { "epoch": 0.4176283960708793, "grad_norm": 0.55078125, "learning_rate": 2.9133421985815608e-05, "loss": 0.9685, "step": 1884 }, { "epoch": 0.4178500671940592, "grad_norm": 0.546875, "learning_rate": 2.9122340425531917e-05, "loss": 1.1025, "step": 1885 }, { "epoch": 0.4180717383172391, "grad_norm": 0.55078125, "learning_rate": 2.911125886524823e-05, "loss": 1.1066, "step": 1886 }, { "epoch": 0.418293409440419, "grad_norm": 0.5625, "learning_rate": 2.9100177304964536e-05, "loss": 1.0676, "step": 1887 }, { "epoch": 0.4185150805635988, "grad_norm": 0.53515625, "learning_rate": 2.908909574468085e-05, "loss": 1.0722, "step": 1888 }, { "epoch": 0.4187367516867787, "grad_norm": 0.51171875, "learning_rate": 2.9078014184397162e-05, "loss": 1.1078, "step": 1889 }, { "epoch": 0.4189584228099586, "grad_norm": 0.53515625, "learning_rate": 2.9066932624113475e-05, "loss": 1.1046, "step": 1890 }, { "epoch": 0.41918009393313843, "grad_norm": 0.5546875, "learning_rate": 2.9055851063829787e-05, "loss": 1.0871, "step": 1891 }, { "epoch": 0.4194017650563183, "grad_norm": 0.53125, "learning_rate": 2.90447695035461e-05, "loss": 1.0794, "step": 1892 }, { "epoch": 0.4196234361794982, "grad_norm": 0.53515625, "learning_rate": 2.903368794326241e-05, "loss": 1.135, "step": 1893 }, { "epoch": 0.41984510730267804, "grad_norm": 0.59375, "learning_rate": 2.9022606382978722e-05, "loss": 1.0757, "step": 1894 }, { "epoch": 0.42006677842585793, "grad_norm": 0.5390625, "learning_rate": 2.9011524822695035e-05, "loss": 1.0185, "step": 1895 }, { "epoch": 0.4202884495490378, "grad_norm": 0.5546875, "learning_rate": 2.9000443262411348e-05, "loss": 1.0184, "step": 1896 }, { "epoch": 0.42051012067221766, "grad_norm": 0.56640625, "learning_rate": 2.898936170212766e-05, "loss": 1.0308, "step": 1897 }, { "epoch": 0.42073179179539755, "grad_norm": 0.57421875, "learning_rate": 2.8978280141843973e-05, "loss": 1.0421, "step": 1898 }, { "epoch": 0.42095346291857744, "grad_norm": 0.54296875, "learning_rate": 2.8967198581560283e-05, "loss": 1.041, "step": 1899 }, { "epoch": 0.42117513404175727, "grad_norm": 0.56640625, "learning_rate": 2.8956117021276595e-05, "loss": 1.0498, "step": 1900 }, { "epoch": 0.42139680516493716, "grad_norm": 0.54296875, "learning_rate": 2.8945035460992908e-05, "loss": 0.9443, "step": 1901 }, { "epoch": 0.42161847628811705, "grad_norm": 0.546875, "learning_rate": 2.893395390070922e-05, "loss": 1.0247, "step": 1902 }, { "epoch": 0.42184014741129694, "grad_norm": 0.5703125, "learning_rate": 2.8922872340425534e-05, "loss": 1.1047, "step": 1903 }, { "epoch": 0.4220618185344768, "grad_norm": 0.5234375, "learning_rate": 2.8911790780141847e-05, "loss": 1.0446, "step": 1904 }, { "epoch": 0.42228348965765666, "grad_norm": 0.60546875, "learning_rate": 2.8900709219858156e-05, "loss": 1.1034, "step": 1905 }, { "epoch": 0.42250516078083655, "grad_norm": 0.54296875, "learning_rate": 2.888962765957447e-05, "loss": 1.0335, "step": 1906 }, { "epoch": 0.4227268319040164, "grad_norm": 0.5390625, "learning_rate": 2.887854609929078e-05, "loss": 1.0877, "step": 1907 }, { "epoch": 0.4229485030271963, "grad_norm": 0.578125, "learning_rate": 2.8867464539007094e-05, "loss": 1.0863, "step": 1908 }, { "epoch": 0.42317017415037617, "grad_norm": 0.54296875, "learning_rate": 2.8856382978723407e-05, "loss": 1.0875, "step": 1909 }, { "epoch": 0.423391845273556, "grad_norm": 0.5234375, "learning_rate": 2.884530141843972e-05, "loss": 1.0644, "step": 1910 }, { "epoch": 0.4236135163967359, "grad_norm": 0.546875, "learning_rate": 2.883421985815603e-05, "loss": 1.0918, "step": 1911 }, { "epoch": 0.4238351875199158, "grad_norm": 0.53125, "learning_rate": 2.8823138297872342e-05, "loss": 1.0324, "step": 1912 }, { "epoch": 0.4240568586430956, "grad_norm": 0.55859375, "learning_rate": 2.8812056737588655e-05, "loss": 1.0912, "step": 1913 }, { "epoch": 0.4242785297662755, "grad_norm": 0.54296875, "learning_rate": 2.8800975177304968e-05, "loss": 1.0869, "step": 1914 }, { "epoch": 0.4245002008894554, "grad_norm": 0.5703125, "learning_rate": 2.878989361702128e-05, "loss": 1.0759, "step": 1915 }, { "epoch": 0.4247218720126352, "grad_norm": 0.5703125, "learning_rate": 2.8778812056737593e-05, "loss": 1.1188, "step": 1916 }, { "epoch": 0.4249435431358151, "grad_norm": 0.55078125, "learning_rate": 2.8767730496453902e-05, "loss": 1.0943, "step": 1917 }, { "epoch": 0.425165214258995, "grad_norm": 0.5390625, "learning_rate": 2.8756648936170215e-05, "loss": 1.0517, "step": 1918 }, { "epoch": 0.4253868853821749, "grad_norm": 0.55859375, "learning_rate": 2.8745567375886528e-05, "loss": 1.0721, "step": 1919 }, { "epoch": 0.42560855650535473, "grad_norm": 0.55859375, "learning_rate": 2.873448581560284e-05, "loss": 1.0277, "step": 1920 }, { "epoch": 0.4258302276285346, "grad_norm": 0.5390625, "learning_rate": 2.8723404255319154e-05, "loss": 1.0305, "step": 1921 }, { "epoch": 0.4260518987517145, "grad_norm": 0.53125, "learning_rate": 2.8712322695035466e-05, "loss": 1.0189, "step": 1922 }, { "epoch": 0.42627356987489434, "grad_norm": 0.546875, "learning_rate": 2.8701241134751772e-05, "loss": 1.1226, "step": 1923 }, { "epoch": 0.42649524099807423, "grad_norm": 0.546875, "learning_rate": 2.8690159574468085e-05, "loss": 1.012, "step": 1924 }, { "epoch": 0.4267169121212541, "grad_norm": 0.54296875, "learning_rate": 2.8679078014184398e-05, "loss": 1.097, "step": 1925 }, { "epoch": 0.42693858324443396, "grad_norm": 0.54296875, "learning_rate": 2.8667996453900707e-05, "loss": 1.0644, "step": 1926 }, { "epoch": 0.42716025436761385, "grad_norm": 0.55078125, "learning_rate": 2.865691489361702e-05, "loss": 1.0647, "step": 1927 }, { "epoch": 0.42738192549079373, "grad_norm": 0.5703125, "learning_rate": 2.8645833333333333e-05, "loss": 1.0995, "step": 1928 }, { "epoch": 0.42760359661397357, "grad_norm": 0.57421875, "learning_rate": 2.8634751773049646e-05, "loss": 1.0931, "step": 1929 }, { "epoch": 0.42782526773715346, "grad_norm": 0.5390625, "learning_rate": 2.862367021276596e-05, "loss": 1.0377, "step": 1930 }, { "epoch": 0.42804693886033335, "grad_norm": 0.53125, "learning_rate": 2.861258865248227e-05, "loss": 1.0715, "step": 1931 }, { "epoch": 0.42826860998351324, "grad_norm": 0.51171875, "learning_rate": 2.860150709219858e-05, "loss": 1.0062, "step": 1932 }, { "epoch": 0.42849028110669307, "grad_norm": 0.546875, "learning_rate": 2.8590425531914893e-05, "loss": 1.0737, "step": 1933 }, { "epoch": 0.42871195222987296, "grad_norm": 0.55859375, "learning_rate": 2.8579343971631206e-05, "loss": 0.988, "step": 1934 }, { "epoch": 0.42893362335305285, "grad_norm": 0.546875, "learning_rate": 2.856826241134752e-05, "loss": 1.1087, "step": 1935 }, { "epoch": 0.4291552944762327, "grad_norm": 0.53515625, "learning_rate": 2.855718085106383e-05, "loss": 1.0711, "step": 1936 }, { "epoch": 0.4293769655994126, "grad_norm": 0.5546875, "learning_rate": 2.8546099290780144e-05, "loss": 1.0871, "step": 1937 }, { "epoch": 0.42959863672259246, "grad_norm": 0.546875, "learning_rate": 2.8535017730496454e-05, "loss": 1.0234, "step": 1938 }, { "epoch": 0.4298203078457723, "grad_norm": 0.54296875, "learning_rate": 2.8523936170212767e-05, "loss": 1.1728, "step": 1939 }, { "epoch": 0.4300419789689522, "grad_norm": 0.546875, "learning_rate": 2.851285460992908e-05, "loss": 1.1015, "step": 1940 }, { "epoch": 0.4302636500921321, "grad_norm": 0.578125, "learning_rate": 2.8501773049645392e-05, "loss": 1.0451, "step": 1941 }, { "epoch": 0.4304853212153119, "grad_norm": 0.57421875, "learning_rate": 2.8490691489361705e-05, "loss": 1.1781, "step": 1942 }, { "epoch": 0.4307069923384918, "grad_norm": 0.546875, "learning_rate": 2.8479609929078018e-05, "loss": 1.1745, "step": 1943 }, { "epoch": 0.4309286634616717, "grad_norm": 0.51171875, "learning_rate": 2.8468528368794327e-05, "loss": 0.9988, "step": 1944 }, { "epoch": 0.4311503345848515, "grad_norm": 0.546875, "learning_rate": 2.845744680851064e-05, "loss": 0.9729, "step": 1945 }, { "epoch": 0.4313720057080314, "grad_norm": 0.5078125, "learning_rate": 2.8446365248226953e-05, "loss": 0.9826, "step": 1946 }, { "epoch": 0.4315936768312113, "grad_norm": 0.53515625, "learning_rate": 2.8435283687943265e-05, "loss": 1.0342, "step": 1947 }, { "epoch": 0.4318153479543912, "grad_norm": 0.578125, "learning_rate": 2.8424202127659578e-05, "loss": 1.1357, "step": 1948 }, { "epoch": 0.432037019077571, "grad_norm": 0.54296875, "learning_rate": 2.841312056737589e-05, "loss": 1.0058, "step": 1949 }, { "epoch": 0.4322586902007509, "grad_norm": 0.5546875, "learning_rate": 2.84020390070922e-05, "loss": 1.0679, "step": 1950 }, { "epoch": 0.4324803613239308, "grad_norm": 0.53125, "learning_rate": 2.8390957446808513e-05, "loss": 1.0606, "step": 1951 }, { "epoch": 0.43270203244711064, "grad_norm": 0.5390625, "learning_rate": 2.8379875886524826e-05, "loss": 1.0014, "step": 1952 }, { "epoch": 0.43292370357029053, "grad_norm": 0.546875, "learning_rate": 2.836879432624114e-05, "loss": 1.0752, "step": 1953 }, { "epoch": 0.4331453746934704, "grad_norm": 0.5859375, "learning_rate": 2.835771276595745e-05, "loss": 1.0293, "step": 1954 }, { "epoch": 0.43336704581665025, "grad_norm": 0.52734375, "learning_rate": 2.8346631205673764e-05, "loss": 1.0217, "step": 1955 }, { "epoch": 0.43358871693983014, "grad_norm": 0.57421875, "learning_rate": 2.8335549645390073e-05, "loss": 1.0466, "step": 1956 }, { "epoch": 0.43381038806301003, "grad_norm": 0.53515625, "learning_rate": 2.8324468085106386e-05, "loss": 1.0968, "step": 1957 }, { "epoch": 0.43403205918618987, "grad_norm": 0.58984375, "learning_rate": 2.8313386524822692e-05, "loss": 1.0764, "step": 1958 }, { "epoch": 0.43425373030936976, "grad_norm": 0.5625, "learning_rate": 2.8302304964539005e-05, "loss": 1.0732, "step": 1959 }, { "epoch": 0.43447540143254965, "grad_norm": 0.53125, "learning_rate": 2.8291223404255318e-05, "loss": 1.0733, "step": 1960 }, { "epoch": 0.4346970725557295, "grad_norm": 0.53125, "learning_rate": 2.828014184397163e-05, "loss": 1.17, "step": 1961 }, { "epoch": 0.43491874367890937, "grad_norm": 0.54296875, "learning_rate": 2.8269060283687943e-05, "loss": 1.0326, "step": 1962 }, { "epoch": 0.43514041480208926, "grad_norm": 0.5625, "learning_rate": 2.8257978723404256e-05, "loss": 1.1013, "step": 1963 }, { "epoch": 0.43536208592526915, "grad_norm": 0.546875, "learning_rate": 2.8246897163120566e-05, "loss": 1.0918, "step": 1964 }, { "epoch": 0.435583757048449, "grad_norm": 0.51953125, "learning_rate": 2.8235815602836878e-05, "loss": 1.0592, "step": 1965 }, { "epoch": 0.4358054281716289, "grad_norm": 0.55078125, "learning_rate": 2.822473404255319e-05, "loss": 1.1057, "step": 1966 }, { "epoch": 0.43602709929480876, "grad_norm": 0.5234375, "learning_rate": 2.8213652482269504e-05, "loss": 1.0299, "step": 1967 }, { "epoch": 0.4362487704179886, "grad_norm": 0.55859375, "learning_rate": 2.8202570921985817e-05, "loss": 1.091, "step": 1968 }, { "epoch": 0.4364704415411685, "grad_norm": 0.5546875, "learning_rate": 2.819148936170213e-05, "loss": 1.1346, "step": 1969 }, { "epoch": 0.4366921126643484, "grad_norm": 0.53125, "learning_rate": 2.818040780141844e-05, "loss": 1.0619, "step": 1970 }, { "epoch": 0.4369137837875282, "grad_norm": 0.58984375, "learning_rate": 2.816932624113475e-05, "loss": 1.1196, "step": 1971 }, { "epoch": 0.4371354549107081, "grad_norm": 0.5625, "learning_rate": 2.8158244680851064e-05, "loss": 1.0432, "step": 1972 }, { "epoch": 0.437357126033888, "grad_norm": 0.52734375, "learning_rate": 2.8147163120567377e-05, "loss": 1.0507, "step": 1973 }, { "epoch": 0.4375787971570678, "grad_norm": 0.5546875, "learning_rate": 2.813608156028369e-05, "loss": 1.0618, "step": 1974 }, { "epoch": 0.4378004682802477, "grad_norm": 0.53125, "learning_rate": 2.8125000000000003e-05, "loss": 1.06, "step": 1975 }, { "epoch": 0.4380221394034276, "grad_norm": 0.51171875, "learning_rate": 2.8113918439716312e-05, "loss": 1.0084, "step": 1976 }, { "epoch": 0.43824381052660744, "grad_norm": 0.546875, "learning_rate": 2.8102836879432625e-05, "loss": 1.0542, "step": 1977 }, { "epoch": 0.4384654816497873, "grad_norm": 0.53515625, "learning_rate": 2.8091755319148938e-05, "loss": 1.093, "step": 1978 }, { "epoch": 0.4386871527729672, "grad_norm": 0.56640625, "learning_rate": 2.808067375886525e-05, "loss": 1.2109, "step": 1979 }, { "epoch": 0.4389088238961471, "grad_norm": 0.51953125, "learning_rate": 2.8069592198581563e-05, "loss": 1.0345, "step": 1980 }, { "epoch": 0.43913049501932694, "grad_norm": 0.52734375, "learning_rate": 2.8058510638297876e-05, "loss": 1.0238, "step": 1981 }, { "epoch": 0.43935216614250683, "grad_norm": 0.546875, "learning_rate": 2.8047429078014185e-05, "loss": 1.0999, "step": 1982 }, { "epoch": 0.4395738372656867, "grad_norm": 0.5546875, "learning_rate": 2.8036347517730498e-05, "loss": 1.0786, "step": 1983 }, { "epoch": 0.43979550838886655, "grad_norm": 0.53515625, "learning_rate": 2.802526595744681e-05, "loss": 1.0391, "step": 1984 }, { "epoch": 0.44001717951204644, "grad_norm": 0.53515625, "learning_rate": 2.8014184397163124e-05, "loss": 1.0347, "step": 1985 }, { "epoch": 0.44023885063522633, "grad_norm": 0.51953125, "learning_rate": 2.8003102836879436e-05, "loss": 1.0725, "step": 1986 }, { "epoch": 0.44046052175840616, "grad_norm": 0.5546875, "learning_rate": 2.799202127659575e-05, "loss": 1.0151, "step": 1987 }, { "epoch": 0.44068219288158605, "grad_norm": 0.5859375, "learning_rate": 2.798093971631206e-05, "loss": 1.0685, "step": 1988 }, { "epoch": 0.44090386400476594, "grad_norm": 0.51953125, "learning_rate": 2.796985815602837e-05, "loss": 1.0182, "step": 1989 }, { "epoch": 0.4411255351279458, "grad_norm": 0.5859375, "learning_rate": 2.7958776595744684e-05, "loss": 1.0398, "step": 1990 }, { "epoch": 0.44134720625112567, "grad_norm": 0.5546875, "learning_rate": 2.7947695035460997e-05, "loss": 1.1178, "step": 1991 }, { "epoch": 0.44156887737430556, "grad_norm": 0.54296875, "learning_rate": 2.793661347517731e-05, "loss": 1.1338, "step": 1992 }, { "epoch": 0.4417905484974854, "grad_norm": 0.53125, "learning_rate": 2.7925531914893616e-05, "loss": 1.0845, "step": 1993 }, { "epoch": 0.4420122196206653, "grad_norm": 0.546875, "learning_rate": 2.791445035460993e-05, "loss": 1.1143, "step": 1994 }, { "epoch": 0.44223389074384517, "grad_norm": 0.52734375, "learning_rate": 2.790336879432624e-05, "loss": 1.0021, "step": 1995 }, { "epoch": 0.44245556186702506, "grad_norm": 0.57421875, "learning_rate": 2.7892287234042554e-05, "loss": 1.1236, "step": 1996 }, { "epoch": 0.4426772329902049, "grad_norm": 0.546875, "learning_rate": 2.7881205673758863e-05, "loss": 1.0833, "step": 1997 }, { "epoch": 0.4428989041133848, "grad_norm": 0.53515625, "learning_rate": 2.7870124113475176e-05, "loss": 1.1335, "step": 1998 }, { "epoch": 0.4431205752365647, "grad_norm": 0.55859375, "learning_rate": 2.785904255319149e-05, "loss": 1.0818, "step": 1999 }, { "epoch": 0.4433422463597445, "grad_norm": 0.54296875, "learning_rate": 2.78479609929078e-05, "loss": 1.1088, "step": 2000 }, { "epoch": 0.4435639174829244, "grad_norm": 0.546875, "learning_rate": 2.7836879432624114e-05, "loss": 1.1353, "step": 2001 }, { "epoch": 0.4437855886061043, "grad_norm": 0.5390625, "learning_rate": 2.7825797872340427e-05, "loss": 1.0762, "step": 2002 }, { "epoch": 0.4440072597292841, "grad_norm": 0.546875, "learning_rate": 2.7814716312056737e-05, "loss": 1.1226, "step": 2003 }, { "epoch": 0.444228930852464, "grad_norm": 0.5625, "learning_rate": 2.780363475177305e-05, "loss": 1.1078, "step": 2004 }, { "epoch": 0.4444506019756439, "grad_norm": 0.57421875, "learning_rate": 2.7792553191489362e-05, "loss": 1.1049, "step": 2005 }, { "epoch": 0.44467227309882373, "grad_norm": 0.53515625, "learning_rate": 2.7781471631205675e-05, "loss": 1.1204, "step": 2006 }, { "epoch": 0.4448939442220036, "grad_norm": 0.58203125, "learning_rate": 2.7770390070921988e-05, "loss": 1.0731, "step": 2007 }, { "epoch": 0.4451156153451835, "grad_norm": 0.5625, "learning_rate": 2.77593085106383e-05, "loss": 1.1391, "step": 2008 }, { "epoch": 0.44533728646836335, "grad_norm": 0.546875, "learning_rate": 2.774822695035461e-05, "loss": 1.0959, "step": 2009 }, { "epoch": 0.44555895759154324, "grad_norm": 0.54296875, "learning_rate": 2.7737145390070923e-05, "loss": 1.0376, "step": 2010 }, { "epoch": 0.4457806287147231, "grad_norm": 0.53125, "learning_rate": 2.7726063829787235e-05, "loss": 1.0757, "step": 2011 }, { "epoch": 0.446002299837903, "grad_norm": 0.5546875, "learning_rate": 2.7714982269503548e-05, "loss": 1.1023, "step": 2012 }, { "epoch": 0.44622397096108285, "grad_norm": 0.55859375, "learning_rate": 2.770390070921986e-05, "loss": 1.054, "step": 2013 }, { "epoch": 0.44644564208426274, "grad_norm": 0.5078125, "learning_rate": 2.7692819148936174e-05, "loss": 1.0547, "step": 2014 }, { "epoch": 0.44666731320744263, "grad_norm": 0.52734375, "learning_rate": 2.7681737588652483e-05, "loss": 1.051, "step": 2015 }, { "epoch": 0.44688898433062246, "grad_norm": 0.54296875, "learning_rate": 2.7670656028368796e-05, "loss": 1.0955, "step": 2016 }, { "epoch": 0.44711065545380235, "grad_norm": 0.55859375, "learning_rate": 2.765957446808511e-05, "loss": 1.0958, "step": 2017 }, { "epoch": 0.44733232657698224, "grad_norm": 0.578125, "learning_rate": 2.764849290780142e-05, "loss": 1.1102, "step": 2018 }, { "epoch": 0.4475539977001621, "grad_norm": 0.5546875, "learning_rate": 2.7637411347517734e-05, "loss": 1.0404, "step": 2019 }, { "epoch": 0.44777566882334197, "grad_norm": 0.57421875, "learning_rate": 2.7626329787234047e-05, "loss": 1.0287, "step": 2020 }, { "epoch": 0.44799733994652186, "grad_norm": 0.55078125, "learning_rate": 2.7615248226950356e-05, "loss": 1.0932, "step": 2021 }, { "epoch": 0.4482190110697017, "grad_norm": 0.5546875, "learning_rate": 2.760416666666667e-05, "loss": 1.0095, "step": 2022 }, { "epoch": 0.4484406821928816, "grad_norm": 0.546875, "learning_rate": 2.7593085106382982e-05, "loss": 1.0371, "step": 2023 }, { "epoch": 0.44866235331606147, "grad_norm": 0.54296875, "learning_rate": 2.7582003546099295e-05, "loss": 1.0851, "step": 2024 }, { "epoch": 0.44888402443924136, "grad_norm": 0.53125, "learning_rate": 2.7570921985815607e-05, "loss": 1.0673, "step": 2025 }, { "epoch": 0.4491056955624212, "grad_norm": 0.5390625, "learning_rate": 2.755984042553192e-05, "loss": 1.0347, "step": 2026 }, { "epoch": 0.4493273666856011, "grad_norm": 0.55859375, "learning_rate": 2.754875886524823e-05, "loss": 1.0209, "step": 2027 }, { "epoch": 0.44954903780878097, "grad_norm": 0.515625, "learning_rate": 2.753767730496454e-05, "loss": 1.0396, "step": 2028 }, { "epoch": 0.4497707089319608, "grad_norm": 0.55859375, "learning_rate": 2.7526595744680848e-05, "loss": 0.9958, "step": 2029 }, { "epoch": 0.4499923800551407, "grad_norm": 0.54296875, "learning_rate": 2.751551418439716e-05, "loss": 1.0298, "step": 2030 }, { "epoch": 0.4502140511783206, "grad_norm": 0.55078125, "learning_rate": 2.7504432624113474e-05, "loss": 1.0627, "step": 2031 }, { "epoch": 0.4504357223015004, "grad_norm": 0.55859375, "learning_rate": 2.7493351063829787e-05, "loss": 0.9965, "step": 2032 }, { "epoch": 0.4506573934246803, "grad_norm": 0.578125, "learning_rate": 2.74822695035461e-05, "loss": 1.0925, "step": 2033 }, { "epoch": 0.4508790645478602, "grad_norm": 0.5546875, "learning_rate": 2.7471187943262412e-05, "loss": 1.1904, "step": 2034 }, { "epoch": 0.45110073567104003, "grad_norm": 0.54296875, "learning_rate": 2.746010638297872e-05, "loss": 1.0373, "step": 2035 }, { "epoch": 0.4513224067942199, "grad_norm": 0.546875, "learning_rate": 2.7449024822695034e-05, "loss": 1.0685, "step": 2036 }, { "epoch": 0.4515440779173998, "grad_norm": 0.54296875, "learning_rate": 2.7437943262411347e-05, "loss": 1.0669, "step": 2037 }, { "epoch": 0.45176574904057964, "grad_norm": 0.578125, "learning_rate": 2.742686170212766e-05, "loss": 1.0543, "step": 2038 }, { "epoch": 0.45198742016375953, "grad_norm": 0.5546875, "learning_rate": 2.7415780141843973e-05, "loss": 1.0356, "step": 2039 }, { "epoch": 0.4522090912869394, "grad_norm": 0.54296875, "learning_rate": 2.7404698581560285e-05, "loss": 1.0584, "step": 2040 }, { "epoch": 0.4524307624101193, "grad_norm": 0.54296875, "learning_rate": 2.7393617021276595e-05, "loss": 1.0994, "step": 2041 }, { "epoch": 0.45265243353329915, "grad_norm": 0.51953125, "learning_rate": 2.7382535460992908e-05, "loss": 1.1, "step": 2042 }, { "epoch": 0.45287410465647904, "grad_norm": 0.56640625, "learning_rate": 2.737145390070922e-05, "loss": 1.0918, "step": 2043 }, { "epoch": 0.4530957757796589, "grad_norm": 0.57421875, "learning_rate": 2.7360372340425533e-05, "loss": 1.1229, "step": 2044 }, { "epoch": 0.45331744690283876, "grad_norm": 0.54296875, "learning_rate": 2.7349290780141846e-05, "loss": 1.0361, "step": 2045 }, { "epoch": 0.45353911802601865, "grad_norm": 0.53125, "learning_rate": 2.733820921985816e-05, "loss": 1.0717, "step": 2046 }, { "epoch": 0.45376078914919854, "grad_norm": 0.57421875, "learning_rate": 2.7327127659574468e-05, "loss": 1.0294, "step": 2047 }, { "epoch": 0.4539824602723784, "grad_norm": 0.53125, "learning_rate": 2.731604609929078e-05, "loss": 1.056, "step": 2048 }, { "epoch": 0.45420413139555826, "grad_norm": 0.5390625, "learning_rate": 2.7304964539007094e-05, "loss": 1.0722, "step": 2049 }, { "epoch": 0.45442580251873815, "grad_norm": 0.5625, "learning_rate": 2.7293882978723406e-05, "loss": 1.1549, "step": 2050 }, { "epoch": 0.454647473641918, "grad_norm": 0.55859375, "learning_rate": 2.728280141843972e-05, "loss": 1.041, "step": 2051 }, { "epoch": 0.4548691447650979, "grad_norm": 0.53515625, "learning_rate": 2.7271719858156032e-05, "loss": 1.0596, "step": 2052 }, { "epoch": 0.45509081588827777, "grad_norm": 0.546875, "learning_rate": 2.726063829787234e-05, "loss": 1.127, "step": 2053 }, { "epoch": 0.4553124870114576, "grad_norm": 0.52734375, "learning_rate": 2.7249556737588654e-05, "loss": 1.0876, "step": 2054 }, { "epoch": 0.4555341581346375, "grad_norm": 0.5546875, "learning_rate": 2.7238475177304967e-05, "loss": 1.057, "step": 2055 }, { "epoch": 0.4557558292578174, "grad_norm": 0.55859375, "learning_rate": 2.722739361702128e-05, "loss": 1.0741, "step": 2056 }, { "epoch": 0.45597750038099727, "grad_norm": 0.5546875, "learning_rate": 2.7216312056737592e-05, "loss": 1.1066, "step": 2057 }, { "epoch": 0.4561991715041771, "grad_norm": 0.578125, "learning_rate": 2.7205230496453905e-05, "loss": 1.1269, "step": 2058 }, { "epoch": 0.456420842627357, "grad_norm": 0.578125, "learning_rate": 2.7194148936170215e-05, "loss": 1.1391, "step": 2059 }, { "epoch": 0.4566425137505369, "grad_norm": 0.57421875, "learning_rate": 2.7183067375886527e-05, "loss": 1.0812, "step": 2060 }, { "epoch": 0.4568641848737167, "grad_norm": 0.54296875, "learning_rate": 2.717198581560284e-05, "loss": 1.0411, "step": 2061 }, { "epoch": 0.4570858559968966, "grad_norm": 0.55078125, "learning_rate": 2.7160904255319153e-05, "loss": 1.0471, "step": 2062 }, { "epoch": 0.4573075271200765, "grad_norm": 0.5234375, "learning_rate": 2.7149822695035466e-05, "loss": 1.0838, "step": 2063 }, { "epoch": 0.45752919824325633, "grad_norm": 0.54296875, "learning_rate": 2.713874113475177e-05, "loss": 1.0791, "step": 2064 }, { "epoch": 0.4577508693664362, "grad_norm": 0.546875, "learning_rate": 2.7127659574468084e-05, "loss": 1.0647, "step": 2065 }, { "epoch": 0.4579725404896161, "grad_norm": 0.546875, "learning_rate": 2.7116578014184397e-05, "loss": 1.0947, "step": 2066 }, { "epoch": 0.45819421161279594, "grad_norm": 0.53125, "learning_rate": 2.710549645390071e-05, "loss": 1.06, "step": 2067 }, { "epoch": 0.45841588273597583, "grad_norm": 0.5546875, "learning_rate": 2.709441489361702e-05, "loss": 1.0451, "step": 2068 }, { "epoch": 0.4586375538591557, "grad_norm": 0.52734375, "learning_rate": 2.7083333333333332e-05, "loss": 1.025, "step": 2069 }, { "epoch": 0.45885922498233556, "grad_norm": 0.52734375, "learning_rate": 2.7072251773049645e-05, "loss": 1.0595, "step": 2070 }, { "epoch": 0.45908089610551545, "grad_norm": 0.54296875, "learning_rate": 2.7061170212765958e-05, "loss": 1.0679, "step": 2071 }, { "epoch": 0.45930256722869534, "grad_norm": 0.5625, "learning_rate": 2.705008865248227e-05, "loss": 1.0789, "step": 2072 }, { "epoch": 0.4595242383518752, "grad_norm": 0.546875, "learning_rate": 2.7039007092198583e-05, "loss": 1.0786, "step": 2073 }, { "epoch": 0.45974590947505506, "grad_norm": 0.57421875, "learning_rate": 2.7027925531914893e-05, "loss": 1.0774, "step": 2074 }, { "epoch": 0.45996758059823495, "grad_norm": 0.5859375, "learning_rate": 2.7016843971631205e-05, "loss": 1.0867, "step": 2075 }, { "epoch": 0.46018925172141484, "grad_norm": 0.54296875, "learning_rate": 2.7005762411347518e-05, "loss": 1.0718, "step": 2076 }, { "epoch": 0.46041092284459467, "grad_norm": 0.55859375, "learning_rate": 2.699468085106383e-05, "loss": 1.1624, "step": 2077 }, { "epoch": 0.46063259396777456, "grad_norm": 0.52734375, "learning_rate": 2.6983599290780144e-05, "loss": 1.0223, "step": 2078 }, { "epoch": 0.46085426509095445, "grad_norm": 0.5390625, "learning_rate": 2.6972517730496456e-05, "loss": 1.063, "step": 2079 }, { "epoch": 0.4610759362141343, "grad_norm": 0.6484375, "learning_rate": 2.6961436170212766e-05, "loss": 1.0911, "step": 2080 }, { "epoch": 0.4612976073373142, "grad_norm": 0.5625, "learning_rate": 2.695035460992908e-05, "loss": 1.0486, "step": 2081 }, { "epoch": 0.46151927846049406, "grad_norm": 0.51171875, "learning_rate": 2.693927304964539e-05, "loss": 1.0711, "step": 2082 }, { "epoch": 0.4617409495836739, "grad_norm": 0.59375, "learning_rate": 2.6928191489361704e-05, "loss": 1.1152, "step": 2083 }, { "epoch": 0.4619626207068538, "grad_norm": 0.54296875, "learning_rate": 2.6917109929078017e-05, "loss": 1.0497, "step": 2084 }, { "epoch": 0.4621842918300337, "grad_norm": 0.53515625, "learning_rate": 2.690602836879433e-05, "loss": 1.1095, "step": 2085 }, { "epoch": 0.4624059629532135, "grad_norm": 0.55859375, "learning_rate": 2.689494680851064e-05, "loss": 1.128, "step": 2086 }, { "epoch": 0.4626276340763934, "grad_norm": 0.515625, "learning_rate": 2.6883865248226952e-05, "loss": 1.0529, "step": 2087 }, { "epoch": 0.4628493051995733, "grad_norm": 0.5546875, "learning_rate": 2.6872783687943265e-05, "loss": 1.0228, "step": 2088 }, { "epoch": 0.4630709763227532, "grad_norm": 0.5390625, "learning_rate": 2.6861702127659577e-05, "loss": 1.038, "step": 2089 }, { "epoch": 0.463292647445933, "grad_norm": 0.52734375, "learning_rate": 2.685062056737589e-05, "loss": 1.0834, "step": 2090 }, { "epoch": 0.4635143185691129, "grad_norm": 0.55078125, "learning_rate": 2.6839539007092203e-05, "loss": 1.0533, "step": 2091 }, { "epoch": 0.4637359896922928, "grad_norm": 0.51953125, "learning_rate": 2.6828457446808512e-05, "loss": 1.0443, "step": 2092 }, { "epoch": 0.46395766081547263, "grad_norm": 0.546875, "learning_rate": 2.6817375886524825e-05, "loss": 1.0802, "step": 2093 }, { "epoch": 0.4641793319386525, "grad_norm": 0.546875, "learning_rate": 2.6806294326241138e-05, "loss": 1.0886, "step": 2094 }, { "epoch": 0.4644010030618324, "grad_norm": 0.5625, "learning_rate": 2.679521276595745e-05, "loss": 1.0709, "step": 2095 }, { "epoch": 0.46462267418501224, "grad_norm": 0.55859375, "learning_rate": 2.6784131205673763e-05, "loss": 1.0588, "step": 2096 }, { "epoch": 0.46484434530819213, "grad_norm": 0.5390625, "learning_rate": 2.6773049645390076e-05, "loss": 1.0837, "step": 2097 }, { "epoch": 0.465066016431372, "grad_norm": 0.5703125, "learning_rate": 2.6761968085106386e-05, "loss": 1.0948, "step": 2098 }, { "epoch": 0.46528768755455185, "grad_norm": 0.546875, "learning_rate": 2.6750886524822695e-05, "loss": 1.0857, "step": 2099 }, { "epoch": 0.46550935867773174, "grad_norm": 0.5625, "learning_rate": 2.6739804964539004e-05, "loss": 1.072, "step": 2100 }, { "epoch": 0.46573102980091163, "grad_norm": 0.55859375, "learning_rate": 2.6728723404255317e-05, "loss": 1.1326, "step": 2101 }, { "epoch": 0.46595270092409147, "grad_norm": 0.5234375, "learning_rate": 2.671764184397163e-05, "loss": 1.0077, "step": 2102 }, { "epoch": 0.46617437204727136, "grad_norm": 0.55078125, "learning_rate": 2.6706560283687943e-05, "loss": 1.0634, "step": 2103 }, { "epoch": 0.46639604317045125, "grad_norm": 0.5390625, "learning_rate": 2.6695478723404255e-05, "loss": 1.0234, "step": 2104 }, { "epoch": 0.46661771429363114, "grad_norm": 0.5546875, "learning_rate": 2.6684397163120568e-05, "loss": 1.0367, "step": 2105 }, { "epoch": 0.46683938541681097, "grad_norm": 0.5703125, "learning_rate": 2.6673315602836878e-05, "loss": 1.0188, "step": 2106 }, { "epoch": 0.46706105653999086, "grad_norm": 0.5546875, "learning_rate": 2.666223404255319e-05, "loss": 1.0528, "step": 2107 }, { "epoch": 0.46728272766317075, "grad_norm": 0.5546875, "learning_rate": 2.6651152482269503e-05, "loss": 1.0443, "step": 2108 }, { "epoch": 0.4675043987863506, "grad_norm": 0.5546875, "learning_rate": 2.6640070921985816e-05, "loss": 1.0338, "step": 2109 }, { "epoch": 0.4677260699095305, "grad_norm": 0.53515625, "learning_rate": 2.662898936170213e-05, "loss": 1.0296, "step": 2110 }, { "epoch": 0.46794774103271036, "grad_norm": 0.5625, "learning_rate": 2.661790780141844e-05, "loss": 1.0717, "step": 2111 }, { "epoch": 0.4681694121558902, "grad_norm": 0.53125, "learning_rate": 2.660682624113475e-05, "loss": 1.0402, "step": 2112 }, { "epoch": 0.4683910832790701, "grad_norm": 0.5390625, "learning_rate": 2.6595744680851064e-05, "loss": 1.0842, "step": 2113 }, { "epoch": 0.46861275440225, "grad_norm": 0.5390625, "learning_rate": 2.6584663120567376e-05, "loss": 1.0321, "step": 2114 }, { "epoch": 0.4688344255254298, "grad_norm": 0.53515625, "learning_rate": 2.657358156028369e-05, "loss": 1.057, "step": 2115 }, { "epoch": 0.4690560966486097, "grad_norm": 0.55078125, "learning_rate": 2.6562500000000002e-05, "loss": 1.1055, "step": 2116 }, { "epoch": 0.4692777677717896, "grad_norm": 0.5546875, "learning_rate": 2.6551418439716315e-05, "loss": 1.0799, "step": 2117 }, { "epoch": 0.4694994388949695, "grad_norm": 0.53125, "learning_rate": 2.6540336879432624e-05, "loss": 1.0114, "step": 2118 }, { "epoch": 0.4697211100181493, "grad_norm": 0.55078125, "learning_rate": 2.6529255319148937e-05, "loss": 1.1328, "step": 2119 }, { "epoch": 0.4699427811413292, "grad_norm": 0.55078125, "learning_rate": 2.651817375886525e-05, "loss": 1.0849, "step": 2120 }, { "epoch": 0.4701644522645091, "grad_norm": 0.5234375, "learning_rate": 2.6507092198581562e-05, "loss": 1.0347, "step": 2121 }, { "epoch": 0.4703861233876889, "grad_norm": 0.53125, "learning_rate": 2.6496010638297875e-05, "loss": 1.0537, "step": 2122 }, { "epoch": 0.4706077945108688, "grad_norm": 0.5703125, "learning_rate": 2.6484929078014188e-05, "loss": 1.0858, "step": 2123 }, { "epoch": 0.4708294656340487, "grad_norm": 0.546875, "learning_rate": 2.6473847517730497e-05, "loss": 1.1067, "step": 2124 }, { "epoch": 0.47105113675722854, "grad_norm": 0.52734375, "learning_rate": 2.646276595744681e-05, "loss": 1.0663, "step": 2125 }, { "epoch": 0.47127280788040843, "grad_norm": 0.54296875, "learning_rate": 2.6451684397163123e-05, "loss": 1.0913, "step": 2126 }, { "epoch": 0.4714944790035883, "grad_norm": 0.55078125, "learning_rate": 2.6440602836879436e-05, "loss": 1.042, "step": 2127 }, { "epoch": 0.47171615012676815, "grad_norm": 0.56640625, "learning_rate": 2.642952127659575e-05, "loss": 1.1444, "step": 2128 }, { "epoch": 0.47193782124994804, "grad_norm": 0.57421875, "learning_rate": 2.641843971631206e-05, "loss": 1.078, "step": 2129 }, { "epoch": 0.47215949237312793, "grad_norm": 0.5234375, "learning_rate": 2.6407358156028374e-05, "loss": 1.0386, "step": 2130 }, { "epoch": 0.47238116349630777, "grad_norm": 0.52734375, "learning_rate": 2.6396276595744683e-05, "loss": 1.0871, "step": 2131 }, { "epoch": 0.47260283461948766, "grad_norm": 0.56640625, "learning_rate": 2.6385195035460996e-05, "loss": 1.1034, "step": 2132 }, { "epoch": 0.47282450574266754, "grad_norm": 0.546875, "learning_rate": 2.637411347517731e-05, "loss": 1.1099, "step": 2133 }, { "epoch": 0.47304617686584743, "grad_norm": 0.55859375, "learning_rate": 2.6363031914893615e-05, "loss": 1.0764, "step": 2134 }, { "epoch": 0.47326784798902727, "grad_norm": 0.51953125, "learning_rate": 2.6351950354609928e-05, "loss": 0.993, "step": 2135 }, { "epoch": 0.47348951911220716, "grad_norm": 0.53125, "learning_rate": 2.634086879432624e-05, "loss": 0.9992, "step": 2136 }, { "epoch": 0.47371119023538705, "grad_norm": 0.5546875, "learning_rate": 2.6329787234042553e-05, "loss": 1.152, "step": 2137 }, { "epoch": 0.4739328613585669, "grad_norm": 0.546875, "learning_rate": 2.6318705673758866e-05, "loss": 1.159, "step": 2138 }, { "epoch": 0.47415453248174677, "grad_norm": 0.5234375, "learning_rate": 2.6307624113475175e-05, "loss": 1.0732, "step": 2139 }, { "epoch": 0.47437620360492666, "grad_norm": 0.54296875, "learning_rate": 2.6296542553191488e-05, "loss": 1.0734, "step": 2140 }, { "epoch": 0.4745978747281065, "grad_norm": 0.546875, "learning_rate": 2.62854609929078e-05, "loss": 1.0994, "step": 2141 }, { "epoch": 0.4748195458512864, "grad_norm": 0.58203125, "learning_rate": 2.6274379432624114e-05, "loss": 1.0954, "step": 2142 }, { "epoch": 0.4750412169744663, "grad_norm": 0.55078125, "learning_rate": 2.6263297872340426e-05, "loss": 0.9868, "step": 2143 }, { "epoch": 0.4752628880976461, "grad_norm": 0.53515625, "learning_rate": 2.625221631205674e-05, "loss": 1.1428, "step": 2144 }, { "epoch": 0.475484559220826, "grad_norm": 0.53515625, "learning_rate": 2.624113475177305e-05, "loss": 1.0821, "step": 2145 }, { "epoch": 0.4757062303440059, "grad_norm": 0.53515625, "learning_rate": 2.623005319148936e-05, "loss": 1.0854, "step": 2146 }, { "epoch": 0.4759279014671857, "grad_norm": 0.52734375, "learning_rate": 2.6218971631205674e-05, "loss": 1.0751, "step": 2147 }, { "epoch": 0.4761495725903656, "grad_norm": 0.51953125, "learning_rate": 2.6207890070921987e-05, "loss": 0.9752, "step": 2148 }, { "epoch": 0.4763712437135455, "grad_norm": 0.54296875, "learning_rate": 2.61968085106383e-05, "loss": 1.029, "step": 2149 }, { "epoch": 0.4765929148367254, "grad_norm": 0.5390625, "learning_rate": 2.6185726950354612e-05, "loss": 1.1242, "step": 2150 }, { "epoch": 0.4768145859599052, "grad_norm": 0.52734375, "learning_rate": 2.6174645390070922e-05, "loss": 1.0049, "step": 2151 }, { "epoch": 0.4770362570830851, "grad_norm": 0.5546875, "learning_rate": 2.6163563829787235e-05, "loss": 1.0578, "step": 2152 }, { "epoch": 0.477257928206265, "grad_norm": 0.5390625, "learning_rate": 2.6152482269503547e-05, "loss": 1.1153, "step": 2153 }, { "epoch": 0.47747959932944484, "grad_norm": 0.54296875, "learning_rate": 2.614140070921986e-05, "loss": 1.0218, "step": 2154 }, { "epoch": 0.4777012704526247, "grad_norm": 0.5234375, "learning_rate": 2.6130319148936173e-05, "loss": 1.0612, "step": 2155 }, { "epoch": 0.4779229415758046, "grad_norm": 0.52734375, "learning_rate": 2.6119237588652486e-05, "loss": 0.9462, "step": 2156 }, { "epoch": 0.47814461269898445, "grad_norm": 0.53515625, "learning_rate": 2.6108156028368795e-05, "loss": 1.0834, "step": 2157 }, { "epoch": 0.47836628382216434, "grad_norm": 0.515625, "learning_rate": 2.6097074468085108e-05, "loss": 1.0042, "step": 2158 }, { "epoch": 0.47858795494534423, "grad_norm": 0.5625, "learning_rate": 2.608599290780142e-05, "loss": 1.1078, "step": 2159 }, { "epoch": 0.47880962606852406, "grad_norm": 0.546875, "learning_rate": 2.6074911347517733e-05, "loss": 1.1589, "step": 2160 }, { "epoch": 0.47903129719170395, "grad_norm": 0.546875, "learning_rate": 2.6063829787234046e-05, "loss": 1.0318, "step": 2161 }, { "epoch": 0.47925296831488384, "grad_norm": 0.55859375, "learning_rate": 2.605274822695036e-05, "loss": 1.0208, "step": 2162 }, { "epoch": 0.4794746394380637, "grad_norm": 0.5390625, "learning_rate": 2.604166666666667e-05, "loss": 1.0525, "step": 2163 }, { "epoch": 0.47969631056124357, "grad_norm": 0.52734375, "learning_rate": 2.603058510638298e-05, "loss": 1.069, "step": 2164 }, { "epoch": 0.47991798168442346, "grad_norm": 0.546875, "learning_rate": 2.6019503546099294e-05, "loss": 1.0686, "step": 2165 }, { "epoch": 0.48013965280760335, "grad_norm": 0.5546875, "learning_rate": 2.6008421985815607e-05, "loss": 1.0694, "step": 2166 }, { "epoch": 0.4803613239307832, "grad_norm": 0.52734375, "learning_rate": 2.599734042553192e-05, "loss": 1.0407, "step": 2167 }, { "epoch": 0.48058299505396307, "grad_norm": 0.546875, "learning_rate": 2.5986258865248232e-05, "loss": 1.034, "step": 2168 }, { "epoch": 0.48080466617714296, "grad_norm": 0.5546875, "learning_rate": 2.5975177304964538e-05, "loss": 1.0056, "step": 2169 }, { "epoch": 0.4810263373003228, "grad_norm": 0.54296875, "learning_rate": 2.596409574468085e-05, "loss": 1.1252, "step": 2170 }, { "epoch": 0.4812480084235027, "grad_norm": 0.5546875, "learning_rate": 2.595301418439716e-05, "loss": 1.0388, "step": 2171 }, { "epoch": 0.48146967954668257, "grad_norm": 0.5625, "learning_rate": 2.5941932624113473e-05, "loss": 0.9959, "step": 2172 }, { "epoch": 0.4816913506698624, "grad_norm": 0.51953125, "learning_rate": 2.5930851063829786e-05, "loss": 1.0954, "step": 2173 }, { "epoch": 0.4819130217930423, "grad_norm": 0.546875, "learning_rate": 2.59197695035461e-05, "loss": 1.095, "step": 2174 }, { "epoch": 0.4821346929162222, "grad_norm": 0.53125, "learning_rate": 2.590868794326241e-05, "loss": 1.0904, "step": 2175 }, { "epoch": 0.482356364039402, "grad_norm": 0.54296875, "learning_rate": 2.5897606382978724e-05, "loss": 1.1019, "step": 2176 }, { "epoch": 0.4825780351625819, "grad_norm": 0.56640625, "learning_rate": 2.5886524822695034e-05, "loss": 1.0865, "step": 2177 }, { "epoch": 0.4827997062857618, "grad_norm": 0.5546875, "learning_rate": 2.5875443262411346e-05, "loss": 1.1029, "step": 2178 }, { "epoch": 0.48302137740894163, "grad_norm": 0.5390625, "learning_rate": 2.586436170212766e-05, "loss": 1.0777, "step": 2179 }, { "epoch": 0.4832430485321215, "grad_norm": 0.5859375, "learning_rate": 2.5853280141843972e-05, "loss": 1.0641, "step": 2180 }, { "epoch": 0.4834647196553014, "grad_norm": 0.53515625, "learning_rate": 2.5842198581560285e-05, "loss": 1.1112, "step": 2181 }, { "epoch": 0.4836863907784813, "grad_norm": 0.5859375, "learning_rate": 2.5831117021276597e-05, "loss": 1.0206, "step": 2182 }, { "epoch": 0.48390806190166114, "grad_norm": 0.546875, "learning_rate": 2.5820035460992907e-05, "loss": 1.1046, "step": 2183 }, { "epoch": 0.484129733024841, "grad_norm": 0.546875, "learning_rate": 2.580895390070922e-05, "loss": 1.0961, "step": 2184 }, { "epoch": 0.4843514041480209, "grad_norm": 0.5546875, "learning_rate": 2.5797872340425532e-05, "loss": 1.1643, "step": 2185 }, { "epoch": 0.48457307527120075, "grad_norm": 0.5546875, "learning_rate": 2.5786790780141845e-05, "loss": 1.0277, "step": 2186 }, { "epoch": 0.48479474639438064, "grad_norm": 0.546875, "learning_rate": 2.5775709219858158e-05, "loss": 1.0958, "step": 2187 }, { "epoch": 0.4850164175175605, "grad_norm": 0.54296875, "learning_rate": 2.576462765957447e-05, "loss": 1.0509, "step": 2188 }, { "epoch": 0.48523808864074036, "grad_norm": 0.57421875, "learning_rate": 2.575354609929078e-05, "loss": 1.2095, "step": 2189 }, { "epoch": 0.48545975976392025, "grad_norm": 0.57421875, "learning_rate": 2.5742464539007093e-05, "loss": 1.1324, "step": 2190 }, { "epoch": 0.48568143088710014, "grad_norm": 0.5234375, "learning_rate": 2.5731382978723406e-05, "loss": 1.0071, "step": 2191 }, { "epoch": 0.48590310201028, "grad_norm": 0.52734375, "learning_rate": 2.572030141843972e-05, "loss": 1.0802, "step": 2192 }, { "epoch": 0.48612477313345986, "grad_norm": 0.53125, "learning_rate": 2.570921985815603e-05, "loss": 1.0772, "step": 2193 }, { "epoch": 0.48634644425663975, "grad_norm": 0.54296875, "learning_rate": 2.5698138297872344e-05, "loss": 0.9953, "step": 2194 }, { "epoch": 0.4865681153798196, "grad_norm": 0.57421875, "learning_rate": 2.5687056737588657e-05, "loss": 1.0645, "step": 2195 }, { "epoch": 0.4867897865029995, "grad_norm": 0.52734375, "learning_rate": 2.5675975177304966e-05, "loss": 1.0249, "step": 2196 }, { "epoch": 0.48701145762617937, "grad_norm": 0.5625, "learning_rate": 2.566489361702128e-05, "loss": 1.0376, "step": 2197 }, { "epoch": 0.48723312874935926, "grad_norm": 0.58984375, "learning_rate": 2.565381205673759e-05, "loss": 1.1615, "step": 2198 }, { "epoch": 0.4874547998725391, "grad_norm": 0.55078125, "learning_rate": 2.5642730496453904e-05, "loss": 1.0291, "step": 2199 }, { "epoch": 0.487676470995719, "grad_norm": 0.546875, "learning_rate": 2.5631648936170217e-05, "loss": 1.0522, "step": 2200 }, { "epoch": 0.48789814211889887, "grad_norm": 0.5625, "learning_rate": 2.562056737588653e-05, "loss": 1.0891, "step": 2201 }, { "epoch": 0.4881198132420787, "grad_norm": 0.5546875, "learning_rate": 2.560948581560284e-05, "loss": 1.0545, "step": 2202 }, { "epoch": 0.4883414843652586, "grad_norm": 0.54296875, "learning_rate": 2.5598404255319152e-05, "loss": 1.0657, "step": 2203 }, { "epoch": 0.4885631554884385, "grad_norm": 0.51953125, "learning_rate": 2.5587322695035465e-05, "loss": 1.0817, "step": 2204 }, { "epoch": 0.4887848266116183, "grad_norm": 0.56640625, "learning_rate": 2.557624113475177e-05, "loss": 1.1755, "step": 2205 }, { "epoch": 0.4890064977347982, "grad_norm": 0.53125, "learning_rate": 2.5565159574468084e-05, "loss": 1.0614, "step": 2206 }, { "epoch": 0.4892281688579781, "grad_norm": 0.5546875, "learning_rate": 2.5554078014184396e-05, "loss": 1.0517, "step": 2207 }, { "epoch": 0.48944983998115793, "grad_norm": 0.55859375, "learning_rate": 2.554299645390071e-05, "loss": 1.1571, "step": 2208 }, { "epoch": 0.4896715111043378, "grad_norm": 0.53125, "learning_rate": 2.5531914893617022e-05, "loss": 1.0776, "step": 2209 }, { "epoch": 0.4898931822275177, "grad_norm": 0.498046875, "learning_rate": 2.552083333333333e-05, "loss": 1.0273, "step": 2210 }, { "epoch": 0.4901148533506976, "grad_norm": 0.56640625, "learning_rate": 2.5509751773049644e-05, "loss": 1.1002, "step": 2211 }, { "epoch": 0.49033652447387743, "grad_norm": 0.5703125, "learning_rate": 2.5498670212765957e-05, "loss": 1.1025, "step": 2212 }, { "epoch": 0.4905581955970573, "grad_norm": 0.60546875, "learning_rate": 2.548758865248227e-05, "loss": 1.1137, "step": 2213 }, { "epoch": 0.4907798667202372, "grad_norm": 0.53515625, "learning_rate": 2.5476507092198582e-05, "loss": 1.0794, "step": 2214 }, { "epoch": 0.49100153784341705, "grad_norm": 0.55078125, "learning_rate": 2.5465425531914895e-05, "loss": 1.0219, "step": 2215 }, { "epoch": 0.49122320896659694, "grad_norm": 0.546875, "learning_rate": 2.5454343971631205e-05, "loss": 1.0833, "step": 2216 }, { "epoch": 0.4914448800897768, "grad_norm": 0.54296875, "learning_rate": 2.5443262411347517e-05, "loss": 1.1168, "step": 2217 }, { "epoch": 0.49166655121295666, "grad_norm": 0.5390625, "learning_rate": 2.543218085106383e-05, "loss": 1.1011, "step": 2218 }, { "epoch": 0.49188822233613655, "grad_norm": 0.5625, "learning_rate": 2.5421099290780143e-05, "loss": 1.0567, "step": 2219 }, { "epoch": 0.49210989345931644, "grad_norm": 0.55078125, "learning_rate": 2.5410017730496456e-05, "loss": 1.052, "step": 2220 }, { "epoch": 0.4923315645824963, "grad_norm": 0.5390625, "learning_rate": 2.539893617021277e-05, "loss": 1.1003, "step": 2221 }, { "epoch": 0.49255323570567616, "grad_norm": 0.546875, "learning_rate": 2.5387854609929078e-05, "loss": 1.1147, "step": 2222 }, { "epoch": 0.49277490682885605, "grad_norm": 0.55078125, "learning_rate": 2.537677304964539e-05, "loss": 1.0588, "step": 2223 }, { "epoch": 0.4929965779520359, "grad_norm": 0.5546875, "learning_rate": 2.5365691489361703e-05, "loss": 1.1103, "step": 2224 }, { "epoch": 0.4932182490752158, "grad_norm": 0.546875, "learning_rate": 2.5354609929078016e-05, "loss": 1.0082, "step": 2225 }, { "epoch": 0.49343992019839567, "grad_norm": 0.56640625, "learning_rate": 2.534352836879433e-05, "loss": 1.1523, "step": 2226 }, { "epoch": 0.49366159132157555, "grad_norm": 0.578125, "learning_rate": 2.5332446808510642e-05, "loss": 1.0632, "step": 2227 }, { "epoch": 0.4938832624447554, "grad_norm": 0.53515625, "learning_rate": 2.532136524822695e-05, "loss": 1.0308, "step": 2228 }, { "epoch": 0.4941049335679353, "grad_norm": 0.5390625, "learning_rate": 2.5310283687943264e-05, "loss": 1.15, "step": 2229 }, { "epoch": 0.49432660469111517, "grad_norm": 0.52734375, "learning_rate": 2.5299202127659577e-05, "loss": 1.0434, "step": 2230 }, { "epoch": 0.494548275814295, "grad_norm": 0.5859375, "learning_rate": 2.528812056737589e-05, "loss": 1.0143, "step": 2231 }, { "epoch": 0.4947699469374749, "grad_norm": 0.546875, "learning_rate": 2.5277039007092202e-05, "loss": 1.0844, "step": 2232 }, { "epoch": 0.4949916180606548, "grad_norm": 0.546875, "learning_rate": 2.5265957446808515e-05, "loss": 1.0765, "step": 2233 }, { "epoch": 0.4952132891838346, "grad_norm": 0.55859375, "learning_rate": 2.5254875886524824e-05, "loss": 1.0558, "step": 2234 }, { "epoch": 0.4954349603070145, "grad_norm": 0.51953125, "learning_rate": 2.5243794326241137e-05, "loss": 0.9725, "step": 2235 }, { "epoch": 0.4956566314301944, "grad_norm": 0.5390625, "learning_rate": 2.523271276595745e-05, "loss": 1.0716, "step": 2236 }, { "epoch": 0.49587830255337423, "grad_norm": 0.51171875, "learning_rate": 2.5221631205673763e-05, "loss": 1.054, "step": 2237 }, { "epoch": 0.4960999736765541, "grad_norm": 0.5234375, "learning_rate": 2.5210549645390075e-05, "loss": 1.0674, "step": 2238 }, { "epoch": 0.496321644799734, "grad_norm": 0.5546875, "learning_rate": 2.5199468085106388e-05, "loss": 1.081, "step": 2239 }, { "epoch": 0.49654331592291384, "grad_norm": 0.59765625, "learning_rate": 2.5188386524822694e-05, "loss": 1.1061, "step": 2240 }, { "epoch": 0.49676498704609373, "grad_norm": 0.53515625, "learning_rate": 2.5177304964539007e-05, "loss": 1.0851, "step": 2241 }, { "epoch": 0.4969866581692736, "grad_norm": 0.53125, "learning_rate": 2.5166223404255316e-05, "loss": 1.1324, "step": 2242 }, { "epoch": 0.4972083292924535, "grad_norm": 0.54296875, "learning_rate": 2.515514184397163e-05, "loss": 0.9931, "step": 2243 }, { "epoch": 0.49743000041563334, "grad_norm": 0.55078125, "learning_rate": 2.5144060283687942e-05, "loss": 1.1517, "step": 2244 }, { "epoch": 0.49765167153881323, "grad_norm": 0.57421875, "learning_rate": 2.5132978723404255e-05, "loss": 1.1268, "step": 2245 }, { "epoch": 0.4978733426619931, "grad_norm": 0.52734375, "learning_rate": 2.5121897163120567e-05, "loss": 1.0662, "step": 2246 }, { "epoch": 0.49809501378517296, "grad_norm": 0.52734375, "learning_rate": 2.511081560283688e-05, "loss": 1.044, "step": 2247 }, { "epoch": 0.49831668490835285, "grad_norm": 0.57421875, "learning_rate": 2.509973404255319e-05, "loss": 1.0751, "step": 2248 }, { "epoch": 0.49853835603153274, "grad_norm": 0.5546875, "learning_rate": 2.5088652482269502e-05, "loss": 1.1497, "step": 2249 }, { "epoch": 0.49876002715471257, "grad_norm": 0.52734375, "learning_rate": 2.5077570921985815e-05, "loss": 1.0932, "step": 2250 }, { "epoch": 0.49898169827789246, "grad_norm": 0.53125, "learning_rate": 2.5066489361702128e-05, "loss": 1.0934, "step": 2251 }, { "epoch": 0.49920336940107235, "grad_norm": 0.55078125, "learning_rate": 2.505540780141844e-05, "loss": 1.0036, "step": 2252 }, { "epoch": 0.4994250405242522, "grad_norm": 0.53515625, "learning_rate": 2.5044326241134753e-05, "loss": 1.0565, "step": 2253 }, { "epoch": 0.4996467116474321, "grad_norm": 0.56640625, "learning_rate": 2.5033244680851063e-05, "loss": 1.1783, "step": 2254 }, { "epoch": 0.49986838277061196, "grad_norm": 0.5390625, "learning_rate": 2.5022163120567376e-05, "loss": 1.0204, "step": 2255 }, { "epoch": 0.5000900538937918, "grad_norm": 0.55859375, "learning_rate": 2.501108156028369e-05, "loss": 1.0533, "step": 2256 }, { "epoch": 0.5003117250169717, "grad_norm": 0.5546875, "learning_rate": 2.5e-05, "loss": 1.1035, "step": 2257 }, { "epoch": 0.5005333961401516, "grad_norm": 0.51953125, "learning_rate": 2.4988918439716314e-05, "loss": 0.9965, "step": 2258 }, { "epoch": 0.5007550672633314, "grad_norm": 0.6328125, "learning_rate": 2.4977836879432627e-05, "loss": 1.1892, "step": 2259 }, { "epoch": 0.5009767383865114, "grad_norm": 0.546875, "learning_rate": 2.496675531914894e-05, "loss": 0.982, "step": 2260 }, { "epoch": 0.5011984095096912, "grad_norm": 0.57421875, "learning_rate": 2.495567375886525e-05, "loss": 1.1229, "step": 2261 }, { "epoch": 0.501420080632871, "grad_norm": 0.578125, "learning_rate": 2.494459219858156e-05, "loss": 1.0592, "step": 2262 }, { "epoch": 0.501641751756051, "grad_norm": 0.5234375, "learning_rate": 2.4933510638297874e-05, "loss": 1.0566, "step": 2263 }, { "epoch": 0.5018634228792308, "grad_norm": 0.53125, "learning_rate": 2.4922429078014187e-05, "loss": 1.0256, "step": 2264 }, { "epoch": 0.5020850940024106, "grad_norm": 0.5234375, "learning_rate": 2.49113475177305e-05, "loss": 1.026, "step": 2265 }, { "epoch": 0.5023067651255906, "grad_norm": 0.5390625, "learning_rate": 2.490026595744681e-05, "loss": 1.0737, "step": 2266 }, { "epoch": 0.5025284362487704, "grad_norm": 0.55859375, "learning_rate": 2.4889184397163122e-05, "loss": 1.0942, "step": 2267 }, { "epoch": 0.5027501073719503, "grad_norm": 0.58203125, "learning_rate": 2.487810283687943e-05, "loss": 1.1148, "step": 2268 }, { "epoch": 0.5029717784951302, "grad_norm": 0.53515625, "learning_rate": 2.4867021276595744e-05, "loss": 1.0947, "step": 2269 }, { "epoch": 0.50319344961831, "grad_norm": 0.5390625, "learning_rate": 2.4855939716312057e-05, "loss": 1.0416, "step": 2270 }, { "epoch": 0.5034151207414899, "grad_norm": 0.53515625, "learning_rate": 2.484485815602837e-05, "loss": 0.9838, "step": 2271 }, { "epoch": 0.5036367918646698, "grad_norm": 0.578125, "learning_rate": 2.4833776595744683e-05, "loss": 1.0598, "step": 2272 }, { "epoch": 0.5038584629878496, "grad_norm": 0.546875, "learning_rate": 2.4822695035460995e-05, "loss": 1.0646, "step": 2273 }, { "epoch": 0.5040801341110295, "grad_norm": 0.53515625, "learning_rate": 2.4811613475177305e-05, "loss": 1.034, "step": 2274 }, { "epoch": 0.5043018052342094, "grad_norm": 0.54296875, "learning_rate": 2.4800531914893618e-05, "loss": 1.1018, "step": 2275 }, { "epoch": 0.5045234763573893, "grad_norm": 0.52734375, "learning_rate": 2.478945035460993e-05, "loss": 1.0433, "step": 2276 }, { "epoch": 0.5047451474805691, "grad_norm": 0.5390625, "learning_rate": 2.4778368794326243e-05, "loss": 1.067, "step": 2277 }, { "epoch": 0.504966818603749, "grad_norm": 0.5390625, "learning_rate": 2.4767287234042556e-05, "loss": 0.9928, "step": 2278 }, { "epoch": 0.5051884897269289, "grad_norm": 0.5390625, "learning_rate": 2.475620567375887e-05, "loss": 1.0773, "step": 2279 }, { "epoch": 0.5054101608501087, "grad_norm": 0.51171875, "learning_rate": 2.4745124113475178e-05, "loss": 0.9997, "step": 2280 }, { "epoch": 0.5056318319732886, "grad_norm": 0.5546875, "learning_rate": 2.473404255319149e-05, "loss": 1.1274, "step": 2281 }, { "epoch": 0.5058535030964685, "grad_norm": 0.52734375, "learning_rate": 2.4722960992907804e-05, "loss": 1.0398, "step": 2282 }, { "epoch": 0.5060751742196484, "grad_norm": 0.55078125, "learning_rate": 2.4711879432624116e-05, "loss": 1.0467, "step": 2283 }, { "epoch": 0.5062968453428283, "grad_norm": 0.515625, "learning_rate": 2.4700797872340426e-05, "loss": 1.0171, "step": 2284 }, { "epoch": 0.5065185164660081, "grad_norm": 0.51171875, "learning_rate": 2.468971631205674e-05, "loss": 1.0654, "step": 2285 }, { "epoch": 0.506740187589188, "grad_norm": 0.55859375, "learning_rate": 2.467863475177305e-05, "loss": 1.1381, "step": 2286 }, { "epoch": 0.5069618587123679, "grad_norm": 0.60546875, "learning_rate": 2.466755319148936e-05, "loss": 1.1802, "step": 2287 }, { "epoch": 0.5071835298355477, "grad_norm": 0.5390625, "learning_rate": 2.4656471631205673e-05, "loss": 1.1065, "step": 2288 }, { "epoch": 0.5074052009587277, "grad_norm": 0.546875, "learning_rate": 2.4645390070921986e-05, "loss": 1.0203, "step": 2289 }, { "epoch": 0.5076268720819075, "grad_norm": 0.5390625, "learning_rate": 2.46343085106383e-05, "loss": 1.1121, "step": 2290 }, { "epoch": 0.5078485432050873, "grad_norm": 0.53515625, "learning_rate": 2.4623226950354612e-05, "loss": 1.0639, "step": 2291 }, { "epoch": 0.5080702143282673, "grad_norm": 0.57421875, "learning_rate": 2.4612145390070925e-05, "loss": 0.9878, "step": 2292 }, { "epoch": 0.5082918854514471, "grad_norm": 0.51953125, "learning_rate": 2.4601063829787234e-05, "loss": 1.0301, "step": 2293 }, { "epoch": 0.5085135565746269, "grad_norm": 0.546875, "learning_rate": 2.4589982269503547e-05, "loss": 1.0984, "step": 2294 }, { "epoch": 0.5087352276978069, "grad_norm": 0.52734375, "learning_rate": 2.457890070921986e-05, "loss": 1.1119, "step": 2295 }, { "epoch": 0.5089568988209867, "grad_norm": 0.55078125, "learning_rate": 2.4567819148936172e-05, "loss": 1.0773, "step": 2296 }, { "epoch": 0.5091785699441665, "grad_norm": 0.5390625, "learning_rate": 2.4556737588652485e-05, "loss": 1.0437, "step": 2297 }, { "epoch": 0.5094002410673465, "grad_norm": 0.5390625, "learning_rate": 2.4545656028368798e-05, "loss": 1.1064, "step": 2298 }, { "epoch": 0.5096219121905263, "grad_norm": 0.53125, "learning_rate": 2.4534574468085107e-05, "loss": 1.095, "step": 2299 }, { "epoch": 0.5098435833137062, "grad_norm": 0.546875, "learning_rate": 2.452349290780142e-05, "loss": 1.0203, "step": 2300 }, { "epoch": 0.5100652544368861, "grad_norm": 0.54296875, "learning_rate": 2.4512411347517733e-05, "loss": 1.0739, "step": 2301 }, { "epoch": 0.5102869255600659, "grad_norm": 0.5703125, "learning_rate": 2.4501329787234042e-05, "loss": 1.0633, "step": 2302 }, { "epoch": 0.5105085966832458, "grad_norm": 0.55078125, "learning_rate": 2.4490248226950355e-05, "loss": 1.1161, "step": 2303 }, { "epoch": 0.5107302678064257, "grad_norm": 0.515625, "learning_rate": 2.4479166666666668e-05, "loss": 1.0114, "step": 2304 }, { "epoch": 0.5109519389296056, "grad_norm": 0.5390625, "learning_rate": 2.446808510638298e-05, "loss": 1.0295, "step": 2305 }, { "epoch": 0.5111736100527854, "grad_norm": 0.55078125, "learning_rate": 2.445700354609929e-05, "loss": 1.159, "step": 2306 }, { "epoch": 0.5113952811759653, "grad_norm": 0.5234375, "learning_rate": 2.4445921985815603e-05, "loss": 1.0553, "step": 2307 }, { "epoch": 0.5116169522991452, "grad_norm": 0.54296875, "learning_rate": 2.4434840425531915e-05, "loss": 1.0522, "step": 2308 }, { "epoch": 0.511838623422325, "grad_norm": 0.53515625, "learning_rate": 2.4423758865248228e-05, "loss": 1.0757, "step": 2309 }, { "epoch": 0.512060294545505, "grad_norm": 0.5703125, "learning_rate": 2.441267730496454e-05, "loss": 1.0949, "step": 2310 }, { "epoch": 0.5122819656686848, "grad_norm": 0.55078125, "learning_rate": 2.4401595744680854e-05, "loss": 1.1383, "step": 2311 }, { "epoch": 0.5125036367918647, "grad_norm": 0.54296875, "learning_rate": 2.4390514184397163e-05, "loss": 1.0704, "step": 2312 }, { "epoch": 0.5127253079150446, "grad_norm": 0.56640625, "learning_rate": 2.4379432624113476e-05, "loss": 1.0672, "step": 2313 }, { "epoch": 0.5129469790382244, "grad_norm": 0.54296875, "learning_rate": 2.436835106382979e-05, "loss": 1.1365, "step": 2314 }, { "epoch": 0.5131686501614043, "grad_norm": 0.54296875, "learning_rate": 2.43572695035461e-05, "loss": 1.1047, "step": 2315 }, { "epoch": 0.5133903212845842, "grad_norm": 0.546875, "learning_rate": 2.4346187943262414e-05, "loss": 1.1746, "step": 2316 }, { "epoch": 0.513611992407764, "grad_norm": 0.5390625, "learning_rate": 2.4335106382978727e-05, "loss": 1.097, "step": 2317 }, { "epoch": 0.513833663530944, "grad_norm": 0.55859375, "learning_rate": 2.4324024822695036e-05, "loss": 1.0532, "step": 2318 }, { "epoch": 0.5140553346541238, "grad_norm": 0.5546875, "learning_rate": 2.4312943262411346e-05, "loss": 1.1371, "step": 2319 }, { "epoch": 0.5142770057773036, "grad_norm": 0.5625, "learning_rate": 2.430186170212766e-05, "loss": 1.0722, "step": 2320 }, { "epoch": 0.5144986769004836, "grad_norm": 0.5390625, "learning_rate": 2.429078014184397e-05, "loss": 1.0794, "step": 2321 }, { "epoch": 0.5147203480236634, "grad_norm": 0.5625, "learning_rate": 2.4279698581560284e-05, "loss": 1.0923, "step": 2322 }, { "epoch": 0.5149420191468432, "grad_norm": 0.52734375, "learning_rate": 2.4268617021276597e-05, "loss": 1.078, "step": 2323 }, { "epoch": 0.5151636902700232, "grad_norm": 0.52734375, "learning_rate": 2.425753546099291e-05, "loss": 1.0584, "step": 2324 }, { "epoch": 0.515385361393203, "grad_norm": 0.51171875, "learning_rate": 2.4246453900709222e-05, "loss": 1.0207, "step": 2325 }, { "epoch": 0.5156070325163828, "grad_norm": 0.5234375, "learning_rate": 2.423537234042553e-05, "loss": 1.0721, "step": 2326 }, { "epoch": 0.5158287036395628, "grad_norm": 0.54296875, "learning_rate": 2.4224290780141844e-05, "loss": 1.0515, "step": 2327 }, { "epoch": 0.5160503747627426, "grad_norm": 0.52734375, "learning_rate": 2.4213209219858157e-05, "loss": 1.0426, "step": 2328 }, { "epoch": 0.5162720458859225, "grad_norm": 0.5703125, "learning_rate": 2.420212765957447e-05, "loss": 1.053, "step": 2329 }, { "epoch": 0.5164937170091024, "grad_norm": 0.54296875, "learning_rate": 2.4191046099290783e-05, "loss": 1.1044, "step": 2330 }, { "epoch": 0.5167153881322822, "grad_norm": 0.5546875, "learning_rate": 2.4179964539007096e-05, "loss": 1.1024, "step": 2331 }, { "epoch": 0.5169370592554621, "grad_norm": 0.5390625, "learning_rate": 2.4168882978723405e-05, "loss": 0.9703, "step": 2332 }, { "epoch": 0.517158730378642, "grad_norm": 0.578125, "learning_rate": 2.4157801418439718e-05, "loss": 1.2084, "step": 2333 }, { "epoch": 0.5173804015018219, "grad_norm": 0.5625, "learning_rate": 2.414671985815603e-05, "loss": 1.0352, "step": 2334 }, { "epoch": 0.5176020726250017, "grad_norm": 0.5390625, "learning_rate": 2.4135638297872343e-05, "loss": 1.0904, "step": 2335 }, { "epoch": 0.5178237437481816, "grad_norm": 0.53125, "learning_rate": 2.4124556737588656e-05, "loss": 1.1091, "step": 2336 }, { "epoch": 0.5180454148713615, "grad_norm": 0.5859375, "learning_rate": 2.4113475177304965e-05, "loss": 1.0156, "step": 2337 }, { "epoch": 0.5182670859945413, "grad_norm": 0.53125, "learning_rate": 2.4102393617021278e-05, "loss": 1.0467, "step": 2338 }, { "epoch": 0.5184887571177212, "grad_norm": 0.546875, "learning_rate": 2.4091312056737588e-05, "loss": 1.059, "step": 2339 }, { "epoch": 0.5187104282409011, "grad_norm": 0.5546875, "learning_rate": 2.40802304964539e-05, "loss": 1.095, "step": 2340 }, { "epoch": 0.5189320993640809, "grad_norm": 0.578125, "learning_rate": 2.4069148936170213e-05, "loss": 1.1033, "step": 2341 }, { "epoch": 0.5191537704872609, "grad_norm": 0.53515625, "learning_rate": 2.4058067375886526e-05, "loss": 0.9865, "step": 2342 }, { "epoch": 0.5193754416104407, "grad_norm": 0.55859375, "learning_rate": 2.404698581560284e-05, "loss": 1.0505, "step": 2343 }, { "epoch": 0.5195971127336206, "grad_norm": 0.53125, "learning_rate": 2.403590425531915e-05, "loss": 1.0866, "step": 2344 }, { "epoch": 0.5198187838568005, "grad_norm": 0.56640625, "learning_rate": 2.402482269503546e-05, "loss": 1.128, "step": 2345 }, { "epoch": 0.5200404549799803, "grad_norm": 0.55078125, "learning_rate": 2.4013741134751774e-05, "loss": 1.187, "step": 2346 }, { "epoch": 0.5202621261031602, "grad_norm": 0.53515625, "learning_rate": 2.4002659574468086e-05, "loss": 1.0538, "step": 2347 }, { "epoch": 0.5204837972263401, "grad_norm": 0.52734375, "learning_rate": 2.39915780141844e-05, "loss": 1.0369, "step": 2348 }, { "epoch": 0.5207054683495199, "grad_norm": 0.5390625, "learning_rate": 2.3980496453900712e-05, "loss": 1.0172, "step": 2349 }, { "epoch": 0.5209271394726999, "grad_norm": 0.5703125, "learning_rate": 2.3969414893617025e-05, "loss": 1.0412, "step": 2350 }, { "epoch": 0.5211488105958797, "grad_norm": 0.53125, "learning_rate": 2.3958333333333334e-05, "loss": 0.9905, "step": 2351 }, { "epoch": 0.5213704817190595, "grad_norm": 0.5546875, "learning_rate": 2.3947251773049647e-05, "loss": 1.1193, "step": 2352 }, { "epoch": 0.5215921528422395, "grad_norm": 0.5390625, "learning_rate": 2.393617021276596e-05, "loss": 1.0922, "step": 2353 }, { "epoch": 0.5218138239654193, "grad_norm": 0.5546875, "learning_rate": 2.392508865248227e-05, "loss": 1.0086, "step": 2354 }, { "epoch": 0.5220354950885991, "grad_norm": 0.52734375, "learning_rate": 2.3914007092198582e-05, "loss": 1.0499, "step": 2355 }, { "epoch": 0.5222571662117791, "grad_norm": 0.5390625, "learning_rate": 2.3902925531914895e-05, "loss": 0.9932, "step": 2356 }, { "epoch": 0.5224788373349589, "grad_norm": 0.546875, "learning_rate": 2.3891843971631207e-05, "loss": 1.1366, "step": 2357 }, { "epoch": 0.5227005084581388, "grad_norm": 0.50390625, "learning_rate": 2.3880762411347517e-05, "loss": 1.0603, "step": 2358 }, { "epoch": 0.5229221795813187, "grad_norm": 0.5, "learning_rate": 2.386968085106383e-05, "loss": 1.0016, "step": 2359 }, { "epoch": 0.5231438507044985, "grad_norm": 0.546875, "learning_rate": 2.3858599290780142e-05, "loss": 1.0769, "step": 2360 }, { "epoch": 0.5233655218276784, "grad_norm": 0.515625, "learning_rate": 2.3847517730496455e-05, "loss": 1.0617, "step": 2361 }, { "epoch": 0.5235871929508583, "grad_norm": 0.5078125, "learning_rate": 2.3836436170212768e-05, "loss": 0.9999, "step": 2362 }, { "epoch": 0.5238088640740381, "grad_norm": 0.55078125, "learning_rate": 2.382535460992908e-05, "loss": 1.0211, "step": 2363 }, { "epoch": 0.524030535197218, "grad_norm": 0.546875, "learning_rate": 2.381427304964539e-05, "loss": 1.1707, "step": 2364 }, { "epoch": 0.5242522063203979, "grad_norm": 0.52734375, "learning_rate": 2.3803191489361703e-05, "loss": 1.0614, "step": 2365 }, { "epoch": 0.5244738774435778, "grad_norm": 0.55078125, "learning_rate": 2.3792109929078015e-05, "loss": 1.045, "step": 2366 }, { "epoch": 0.5246955485667576, "grad_norm": 0.5546875, "learning_rate": 2.3781028368794328e-05, "loss": 1.1253, "step": 2367 }, { "epoch": 0.5249172196899375, "grad_norm": 0.52734375, "learning_rate": 2.376994680851064e-05, "loss": 1.0423, "step": 2368 }, { "epoch": 0.5251388908131174, "grad_norm": 0.5390625, "learning_rate": 2.3758865248226954e-05, "loss": 1.1256, "step": 2369 }, { "epoch": 0.5253605619362972, "grad_norm": 0.59765625, "learning_rate": 2.3747783687943263e-05, "loss": 1.119, "step": 2370 }, { "epoch": 0.5255822330594772, "grad_norm": 0.56640625, "learning_rate": 2.3736702127659576e-05, "loss": 1.0497, "step": 2371 }, { "epoch": 0.525803904182657, "grad_norm": 0.58203125, "learning_rate": 2.3725620567375885e-05, "loss": 1.1207, "step": 2372 }, { "epoch": 0.5260255753058368, "grad_norm": 0.5546875, "learning_rate": 2.3714539007092198e-05, "loss": 1.0645, "step": 2373 }, { "epoch": 0.5262472464290168, "grad_norm": 0.5546875, "learning_rate": 2.370345744680851e-05, "loss": 1.1082, "step": 2374 }, { "epoch": 0.5264689175521966, "grad_norm": 0.5625, "learning_rate": 2.3692375886524824e-05, "loss": 1.0115, "step": 2375 }, { "epoch": 0.5266905886753765, "grad_norm": 0.53515625, "learning_rate": 2.3681294326241136e-05, "loss": 1.053, "step": 2376 }, { "epoch": 0.5269122597985564, "grad_norm": 0.5625, "learning_rate": 2.3670212765957446e-05, "loss": 1.0386, "step": 2377 }, { "epoch": 0.5271339309217362, "grad_norm": 0.5625, "learning_rate": 2.365913120567376e-05, "loss": 1.1074, "step": 2378 }, { "epoch": 0.5273556020449162, "grad_norm": 0.5546875, "learning_rate": 2.364804964539007e-05, "loss": 1.1233, "step": 2379 }, { "epoch": 0.527577273168096, "grad_norm": 0.515625, "learning_rate": 2.3636968085106384e-05, "loss": 1.0652, "step": 2380 }, { "epoch": 0.5277989442912758, "grad_norm": 0.5625, "learning_rate": 2.3625886524822697e-05, "loss": 1.0359, "step": 2381 }, { "epoch": 0.5280206154144558, "grad_norm": 0.5234375, "learning_rate": 2.361480496453901e-05, "loss": 1.0286, "step": 2382 }, { "epoch": 0.5282422865376356, "grad_norm": 0.53125, "learning_rate": 2.360372340425532e-05, "loss": 1.091, "step": 2383 }, { "epoch": 0.5284639576608154, "grad_norm": 0.5390625, "learning_rate": 2.3592641843971632e-05, "loss": 1.0276, "step": 2384 }, { "epoch": 0.5286856287839954, "grad_norm": 0.5859375, "learning_rate": 2.3581560283687945e-05, "loss": 1.1016, "step": 2385 }, { "epoch": 0.5289072999071752, "grad_norm": 0.76171875, "learning_rate": 2.3570478723404257e-05, "loss": 1.0375, "step": 2386 }, { "epoch": 0.529128971030355, "grad_norm": 0.55078125, "learning_rate": 2.355939716312057e-05, "loss": 1.1008, "step": 2387 }, { "epoch": 0.529350642153535, "grad_norm": 0.54296875, "learning_rate": 2.3548315602836883e-05, "loss": 1.1065, "step": 2388 }, { "epoch": 0.5295723132767148, "grad_norm": 0.55078125, "learning_rate": 2.3537234042553192e-05, "loss": 1.0207, "step": 2389 }, { "epoch": 0.5297939843998947, "grad_norm": 0.56640625, "learning_rate": 2.3526152482269505e-05, "loss": 1.1695, "step": 2390 }, { "epoch": 0.5300156555230746, "grad_norm": 0.56640625, "learning_rate": 2.3515070921985814e-05, "loss": 1.0511, "step": 2391 }, { "epoch": 0.5302373266462544, "grad_norm": 0.57421875, "learning_rate": 2.3503989361702127e-05, "loss": 1.0044, "step": 2392 }, { "epoch": 0.5304589977694343, "grad_norm": 0.53125, "learning_rate": 2.349290780141844e-05, "loss": 1.0411, "step": 2393 }, { "epoch": 0.5306806688926142, "grad_norm": 0.56640625, "learning_rate": 2.3481826241134753e-05, "loss": 1.0584, "step": 2394 }, { "epoch": 0.5309023400157941, "grad_norm": 0.56640625, "learning_rate": 2.3470744680851066e-05, "loss": 1.0629, "step": 2395 }, { "epoch": 0.5311240111389739, "grad_norm": 0.54296875, "learning_rate": 2.345966312056738e-05, "loss": 1.0633, "step": 2396 }, { "epoch": 0.5313456822621538, "grad_norm": 0.546875, "learning_rate": 2.3448581560283688e-05, "loss": 1.0474, "step": 2397 }, { "epoch": 0.5315673533853337, "grad_norm": 0.55859375, "learning_rate": 2.34375e-05, "loss": 1.102, "step": 2398 }, { "epoch": 0.5317890245085135, "grad_norm": 0.57421875, "learning_rate": 2.3426418439716313e-05, "loss": 1.1672, "step": 2399 }, { "epoch": 0.5320106956316935, "grad_norm": 0.57421875, "learning_rate": 2.3415336879432626e-05, "loss": 1.0592, "step": 2400 }, { "epoch": 0.5322323667548733, "grad_norm": 0.55859375, "learning_rate": 2.340425531914894e-05, "loss": 1.137, "step": 2401 }, { "epoch": 0.5324540378780531, "grad_norm": 0.5625, "learning_rate": 2.339317375886525e-05, "loss": 1.0461, "step": 2402 }, { "epoch": 0.5326757090012331, "grad_norm": 0.59765625, "learning_rate": 2.338209219858156e-05, "loss": 1.0904, "step": 2403 }, { "epoch": 0.5328973801244129, "grad_norm": 0.5703125, "learning_rate": 2.3371010638297874e-05, "loss": 1.0768, "step": 2404 }, { "epoch": 0.5331190512475928, "grad_norm": 0.51953125, "learning_rate": 2.3359929078014187e-05, "loss": 1.0037, "step": 2405 }, { "epoch": 0.5333407223707727, "grad_norm": 0.5234375, "learning_rate": 2.33488475177305e-05, "loss": 1.0201, "step": 2406 }, { "epoch": 0.5335623934939525, "grad_norm": 0.52734375, "learning_rate": 2.333776595744681e-05, "loss": 1.0228, "step": 2407 }, { "epoch": 0.5337840646171325, "grad_norm": 0.54296875, "learning_rate": 2.332668439716312e-05, "loss": 1.0696, "step": 2408 }, { "epoch": 0.5340057357403123, "grad_norm": 0.51953125, "learning_rate": 2.3315602836879434e-05, "loss": 1.0121, "step": 2409 }, { "epoch": 0.5342274068634921, "grad_norm": 0.58984375, "learning_rate": 2.3304521276595744e-05, "loss": 1.1016, "step": 2410 }, { "epoch": 0.5344490779866721, "grad_norm": 0.53125, "learning_rate": 2.3293439716312056e-05, "loss": 1.0021, "step": 2411 }, { "epoch": 0.5346707491098519, "grad_norm": 0.52734375, "learning_rate": 2.328235815602837e-05, "loss": 1.0487, "step": 2412 }, { "epoch": 0.5348924202330317, "grad_norm": 0.55859375, "learning_rate": 2.3271276595744682e-05, "loss": 1.0432, "step": 2413 }, { "epoch": 0.5351140913562117, "grad_norm": 0.53515625, "learning_rate": 2.3260195035460995e-05, "loss": 1.0637, "step": 2414 }, { "epoch": 0.5353357624793915, "grad_norm": 0.5546875, "learning_rate": 2.3249113475177307e-05, "loss": 1.0864, "step": 2415 }, { "epoch": 0.5355574336025714, "grad_norm": 0.58984375, "learning_rate": 2.3238031914893617e-05, "loss": 1.0852, "step": 2416 }, { "epoch": 0.5357791047257513, "grad_norm": 0.55078125, "learning_rate": 2.322695035460993e-05, "loss": 1.0204, "step": 2417 }, { "epoch": 0.5360007758489311, "grad_norm": 0.53125, "learning_rate": 2.3215868794326242e-05, "loss": 0.9592, "step": 2418 }, { "epoch": 0.536222446972111, "grad_norm": 0.53515625, "learning_rate": 2.3204787234042555e-05, "loss": 1.1444, "step": 2419 }, { "epoch": 0.5364441180952909, "grad_norm": 0.54296875, "learning_rate": 2.3193705673758868e-05, "loss": 1.0528, "step": 2420 }, { "epoch": 0.5366657892184707, "grad_norm": 0.5859375, "learning_rate": 2.318262411347518e-05, "loss": 1.0902, "step": 2421 }, { "epoch": 0.5368874603416506, "grad_norm": 0.55078125, "learning_rate": 2.317154255319149e-05, "loss": 1.0214, "step": 2422 }, { "epoch": 0.5371091314648305, "grad_norm": 0.54296875, "learning_rate": 2.3160460992907803e-05, "loss": 0.9871, "step": 2423 }, { "epoch": 0.5373308025880104, "grad_norm": 0.546875, "learning_rate": 2.3149379432624116e-05, "loss": 1.0338, "step": 2424 }, { "epoch": 0.5375524737111902, "grad_norm": 0.5625, "learning_rate": 2.3138297872340425e-05, "loss": 1.1209, "step": 2425 }, { "epoch": 0.5377741448343701, "grad_norm": 0.52734375, "learning_rate": 2.3127216312056738e-05, "loss": 1.1306, "step": 2426 }, { "epoch": 0.53799581595755, "grad_norm": 0.53515625, "learning_rate": 2.311613475177305e-05, "loss": 1.137, "step": 2427 }, { "epoch": 0.5382174870807298, "grad_norm": 0.51953125, "learning_rate": 2.3105053191489363e-05, "loss": 1.0566, "step": 2428 }, { "epoch": 0.5384391582039098, "grad_norm": 0.5546875, "learning_rate": 2.3093971631205673e-05, "loss": 1.1303, "step": 2429 }, { "epoch": 0.5386608293270896, "grad_norm": 0.55859375, "learning_rate": 2.3082890070921985e-05, "loss": 1.005, "step": 2430 }, { "epoch": 0.5388825004502694, "grad_norm": 0.53515625, "learning_rate": 2.3071808510638298e-05, "loss": 1.059, "step": 2431 }, { "epoch": 0.5391041715734494, "grad_norm": 0.546875, "learning_rate": 2.306072695035461e-05, "loss": 1.0739, "step": 2432 }, { "epoch": 0.5393258426966292, "grad_norm": 0.578125, "learning_rate": 2.3049645390070924e-05, "loss": 1.0334, "step": 2433 }, { "epoch": 0.539547513819809, "grad_norm": 0.55078125, "learning_rate": 2.3038563829787237e-05, "loss": 1.1629, "step": 2434 }, { "epoch": 0.539769184942989, "grad_norm": 0.56640625, "learning_rate": 2.3027482269503546e-05, "loss": 1.0791, "step": 2435 }, { "epoch": 0.5399908560661688, "grad_norm": 0.53125, "learning_rate": 2.301640070921986e-05, "loss": 1.0525, "step": 2436 }, { "epoch": 0.5402125271893488, "grad_norm": 0.53515625, "learning_rate": 2.300531914893617e-05, "loss": 1.0824, "step": 2437 }, { "epoch": 0.5404341983125286, "grad_norm": 0.5546875, "learning_rate": 2.2994237588652484e-05, "loss": 1.0839, "step": 2438 }, { "epoch": 0.5406558694357084, "grad_norm": 0.53125, "learning_rate": 2.2983156028368797e-05, "loss": 1.0707, "step": 2439 }, { "epoch": 0.5408775405588884, "grad_norm": 0.50390625, "learning_rate": 2.297207446808511e-05, "loss": 1.002, "step": 2440 }, { "epoch": 0.5410992116820682, "grad_norm": 0.52734375, "learning_rate": 2.296099290780142e-05, "loss": 1.0054, "step": 2441 }, { "epoch": 0.541320882805248, "grad_norm": 0.53125, "learning_rate": 2.2949911347517732e-05, "loss": 1.0383, "step": 2442 }, { "epoch": 0.541542553928428, "grad_norm": 0.5234375, "learning_rate": 2.293882978723404e-05, "loss": 1.038, "step": 2443 }, { "epoch": 0.5417642250516078, "grad_norm": 0.5546875, "learning_rate": 2.2927748226950354e-05, "loss": 1.0582, "step": 2444 }, { "epoch": 0.5419858961747877, "grad_norm": 0.55078125, "learning_rate": 2.2916666666666667e-05, "loss": 1.1131, "step": 2445 }, { "epoch": 0.5422075672979676, "grad_norm": 0.56640625, "learning_rate": 2.290558510638298e-05, "loss": 1.0913, "step": 2446 }, { "epoch": 0.5424292384211474, "grad_norm": 0.55859375, "learning_rate": 2.2894503546099292e-05, "loss": 1.0668, "step": 2447 }, { "epoch": 0.5426509095443273, "grad_norm": 0.54296875, "learning_rate": 2.2883421985815602e-05, "loss": 1.115, "step": 2448 }, { "epoch": 0.5428725806675072, "grad_norm": 0.52734375, "learning_rate": 2.2872340425531915e-05, "loss": 1.1051, "step": 2449 }, { "epoch": 0.543094251790687, "grad_norm": 0.52734375, "learning_rate": 2.2861258865248227e-05, "loss": 1.0022, "step": 2450 }, { "epoch": 0.5433159229138669, "grad_norm": 0.5234375, "learning_rate": 2.285017730496454e-05, "loss": 1.0984, "step": 2451 }, { "epoch": 0.5435375940370468, "grad_norm": 0.55078125, "learning_rate": 2.2839095744680853e-05, "loss": 1.1317, "step": 2452 }, { "epoch": 0.5437592651602267, "grad_norm": 0.52734375, "learning_rate": 2.2828014184397166e-05, "loss": 1.1066, "step": 2453 }, { "epoch": 0.5439809362834065, "grad_norm": 0.55859375, "learning_rate": 2.2816932624113475e-05, "loss": 1.1307, "step": 2454 }, { "epoch": 0.5442026074065864, "grad_norm": 0.55078125, "learning_rate": 2.2805851063829788e-05, "loss": 1.0661, "step": 2455 }, { "epoch": 0.5444242785297663, "grad_norm": 0.5703125, "learning_rate": 2.27947695035461e-05, "loss": 1.0302, "step": 2456 }, { "epoch": 0.5446459496529461, "grad_norm": 0.5625, "learning_rate": 2.2783687943262413e-05, "loss": 1.1223, "step": 2457 }, { "epoch": 0.544867620776126, "grad_norm": 0.54296875, "learning_rate": 2.2772606382978726e-05, "loss": 1.0521, "step": 2458 }, { "epoch": 0.5450892918993059, "grad_norm": 0.5625, "learning_rate": 2.276152482269504e-05, "loss": 1.0761, "step": 2459 }, { "epoch": 0.5453109630224857, "grad_norm": 0.53515625, "learning_rate": 2.275044326241135e-05, "loss": 1.055, "step": 2460 }, { "epoch": 0.5455326341456657, "grad_norm": 0.54296875, "learning_rate": 2.273936170212766e-05, "loss": 1.0687, "step": 2461 }, { "epoch": 0.5457543052688455, "grad_norm": 0.51171875, "learning_rate": 2.272828014184397e-05, "loss": 1.0379, "step": 2462 }, { "epoch": 0.5459759763920253, "grad_norm": 0.53515625, "learning_rate": 2.2717198581560283e-05, "loss": 1.028, "step": 2463 }, { "epoch": 0.5461976475152053, "grad_norm": 0.5546875, "learning_rate": 2.2706117021276596e-05, "loss": 1.0503, "step": 2464 }, { "epoch": 0.5464193186383851, "grad_norm": 0.5234375, "learning_rate": 2.269503546099291e-05, "loss": 1.0525, "step": 2465 }, { "epoch": 0.5466409897615649, "grad_norm": 0.55078125, "learning_rate": 2.268395390070922e-05, "loss": 1.0692, "step": 2466 }, { "epoch": 0.5468626608847449, "grad_norm": 0.54296875, "learning_rate": 2.2672872340425534e-05, "loss": 1.1014, "step": 2467 }, { "epoch": 0.5470843320079247, "grad_norm": 0.5390625, "learning_rate": 2.2661790780141844e-05, "loss": 1.0175, "step": 2468 }, { "epoch": 0.5473060031311047, "grad_norm": 0.54296875, "learning_rate": 2.2650709219858157e-05, "loss": 1.0705, "step": 2469 }, { "epoch": 0.5475276742542845, "grad_norm": 0.53515625, "learning_rate": 2.263962765957447e-05, "loss": 1.0777, "step": 2470 }, { "epoch": 0.5477493453774643, "grad_norm": 0.52734375, "learning_rate": 2.2628546099290782e-05, "loss": 1.0402, "step": 2471 }, { "epoch": 0.5479710165006443, "grad_norm": 0.546875, "learning_rate": 2.2617464539007095e-05, "loss": 1.0361, "step": 2472 }, { "epoch": 0.5481926876238241, "grad_norm": 0.55078125, "learning_rate": 2.2606382978723408e-05, "loss": 1.039, "step": 2473 }, { "epoch": 0.548414358747004, "grad_norm": 0.5625, "learning_rate": 2.2595301418439717e-05, "loss": 1.1053, "step": 2474 }, { "epoch": 0.5486360298701839, "grad_norm": 0.5234375, "learning_rate": 2.258421985815603e-05, "loss": 1.0323, "step": 2475 }, { "epoch": 0.5488577009933637, "grad_norm": 0.53515625, "learning_rate": 2.2573138297872343e-05, "loss": 1.069, "step": 2476 }, { "epoch": 0.5490793721165436, "grad_norm": 0.53125, "learning_rate": 2.2562056737588655e-05, "loss": 1.0659, "step": 2477 }, { "epoch": 0.5493010432397235, "grad_norm": 0.53515625, "learning_rate": 2.2550975177304965e-05, "loss": 1.0118, "step": 2478 }, { "epoch": 0.5495227143629033, "grad_norm": 0.5390625, "learning_rate": 2.2539893617021277e-05, "loss": 1.0831, "step": 2479 }, { "epoch": 0.5497443854860832, "grad_norm": 0.53515625, "learning_rate": 2.252881205673759e-05, "loss": 1.0092, "step": 2480 }, { "epoch": 0.5499660566092631, "grad_norm": 0.55078125, "learning_rate": 2.25177304964539e-05, "loss": 1.0536, "step": 2481 }, { "epoch": 0.550187727732443, "grad_norm": 0.54296875, "learning_rate": 2.2506648936170212e-05, "loss": 1.0548, "step": 2482 }, { "epoch": 0.5504093988556228, "grad_norm": 0.56640625, "learning_rate": 2.2495567375886525e-05, "loss": 1.1503, "step": 2483 }, { "epoch": 0.5506310699788027, "grad_norm": 0.55078125, "learning_rate": 2.2484485815602838e-05, "loss": 1.1458, "step": 2484 }, { "epoch": 0.5508527411019826, "grad_norm": 0.5234375, "learning_rate": 2.247340425531915e-05, "loss": 1.0468, "step": 2485 }, { "epoch": 0.5510744122251624, "grad_norm": 0.5390625, "learning_rate": 2.2462322695035463e-05, "loss": 0.9871, "step": 2486 }, { "epoch": 0.5512960833483423, "grad_norm": 0.57421875, "learning_rate": 2.2451241134751773e-05, "loss": 1.0484, "step": 2487 }, { "epoch": 0.5515177544715222, "grad_norm": 0.52734375, "learning_rate": 2.2440159574468086e-05, "loss": 1.0748, "step": 2488 }, { "epoch": 0.551739425594702, "grad_norm": 0.5625, "learning_rate": 2.24290780141844e-05, "loss": 1.1409, "step": 2489 }, { "epoch": 0.551961096717882, "grad_norm": 0.5546875, "learning_rate": 2.241799645390071e-05, "loss": 1.0755, "step": 2490 }, { "epoch": 0.5521827678410618, "grad_norm": 0.578125, "learning_rate": 2.2406914893617024e-05, "loss": 1.1004, "step": 2491 }, { "epoch": 0.5524044389642416, "grad_norm": 0.52734375, "learning_rate": 2.2395833333333337e-05, "loss": 1.0165, "step": 2492 }, { "epoch": 0.5526261100874216, "grad_norm": 0.546875, "learning_rate": 2.2384751773049646e-05, "loss": 1.0977, "step": 2493 }, { "epoch": 0.5528477812106014, "grad_norm": 0.5625, "learning_rate": 2.237367021276596e-05, "loss": 1.0485, "step": 2494 }, { "epoch": 0.5530694523337812, "grad_norm": 0.51953125, "learning_rate": 2.2362588652482268e-05, "loss": 1.0603, "step": 2495 }, { "epoch": 0.5532911234569612, "grad_norm": 0.56640625, "learning_rate": 2.235150709219858e-05, "loss": 1.1143, "step": 2496 }, { "epoch": 0.553512794580141, "grad_norm": 0.53515625, "learning_rate": 2.2340425531914894e-05, "loss": 1.0577, "step": 2497 }, { "epoch": 0.5537344657033209, "grad_norm": 0.546875, "learning_rate": 2.2329343971631207e-05, "loss": 1.1242, "step": 2498 }, { "epoch": 0.5539561368265008, "grad_norm": 0.53515625, "learning_rate": 2.231826241134752e-05, "loss": 1.0357, "step": 2499 }, { "epoch": 0.5541778079496806, "grad_norm": 0.53515625, "learning_rate": 2.230718085106383e-05, "loss": 1.0976, "step": 2500 }, { "epoch": 0.5543994790728606, "grad_norm": 0.5546875, "learning_rate": 2.229609929078014e-05, "loss": 1.1327, "step": 2501 }, { "epoch": 0.5546211501960404, "grad_norm": 0.53125, "learning_rate": 2.2285017730496454e-05, "loss": 1.0969, "step": 2502 }, { "epoch": 0.5548428213192202, "grad_norm": 0.51953125, "learning_rate": 2.2273936170212767e-05, "loss": 1.0276, "step": 2503 }, { "epoch": 0.5550644924424002, "grad_norm": 0.546875, "learning_rate": 2.226285460992908e-05, "loss": 1.081, "step": 2504 }, { "epoch": 0.55528616356558, "grad_norm": 0.5234375, "learning_rate": 2.2251773049645393e-05, "loss": 1.0396, "step": 2505 }, { "epoch": 0.5555078346887599, "grad_norm": 0.52734375, "learning_rate": 2.2240691489361702e-05, "loss": 1.0531, "step": 2506 }, { "epoch": 0.5557295058119398, "grad_norm": 0.5234375, "learning_rate": 2.2229609929078015e-05, "loss": 1.0798, "step": 2507 }, { "epoch": 0.5559511769351196, "grad_norm": 0.5390625, "learning_rate": 2.2218528368794328e-05, "loss": 1.0605, "step": 2508 }, { "epoch": 0.5561728480582995, "grad_norm": 0.55078125, "learning_rate": 2.220744680851064e-05, "loss": 1.0412, "step": 2509 }, { "epoch": 0.5563945191814794, "grad_norm": 0.578125, "learning_rate": 2.2196365248226953e-05, "loss": 1.0441, "step": 2510 }, { "epoch": 0.5566161903046593, "grad_norm": 0.5234375, "learning_rate": 2.2185283687943266e-05, "loss": 1.0844, "step": 2511 }, { "epoch": 0.5568378614278391, "grad_norm": 0.51953125, "learning_rate": 2.2174202127659575e-05, "loss": 1.0066, "step": 2512 }, { "epoch": 0.557059532551019, "grad_norm": 0.515625, "learning_rate": 2.2163120567375885e-05, "loss": 1.0233, "step": 2513 }, { "epoch": 0.5572812036741989, "grad_norm": 0.5625, "learning_rate": 2.2152039007092197e-05, "loss": 1.1535, "step": 2514 }, { "epoch": 0.5575028747973787, "grad_norm": 0.546875, "learning_rate": 2.214095744680851e-05, "loss": 1.042, "step": 2515 }, { "epoch": 0.5577245459205586, "grad_norm": 0.50390625, "learning_rate": 2.2129875886524823e-05, "loss": 1.0836, "step": 2516 }, { "epoch": 0.5579462170437385, "grad_norm": 0.55078125, "learning_rate": 2.2118794326241136e-05, "loss": 1.1688, "step": 2517 }, { "epoch": 0.5581678881669183, "grad_norm": 0.54296875, "learning_rate": 2.210771276595745e-05, "loss": 1.1264, "step": 2518 }, { "epoch": 0.5583895592900983, "grad_norm": 0.546875, "learning_rate": 2.2096631205673758e-05, "loss": 1.057, "step": 2519 }, { "epoch": 0.5586112304132781, "grad_norm": 0.62109375, "learning_rate": 2.208554964539007e-05, "loss": 1.0609, "step": 2520 }, { "epoch": 0.5588329015364579, "grad_norm": 0.53125, "learning_rate": 2.2074468085106383e-05, "loss": 1.1339, "step": 2521 }, { "epoch": 0.5590545726596379, "grad_norm": 0.55859375, "learning_rate": 2.2063386524822696e-05, "loss": 1.0546, "step": 2522 }, { "epoch": 0.5592762437828177, "grad_norm": 0.5390625, "learning_rate": 2.205230496453901e-05, "loss": 1.0544, "step": 2523 }, { "epoch": 0.5594979149059975, "grad_norm": 0.5625, "learning_rate": 2.2041223404255322e-05, "loss": 1.1257, "step": 2524 }, { "epoch": 0.5597195860291775, "grad_norm": 0.5390625, "learning_rate": 2.2030141843971635e-05, "loss": 1.0872, "step": 2525 }, { "epoch": 0.5599412571523573, "grad_norm": 0.5390625, "learning_rate": 2.2019060283687944e-05, "loss": 1.0807, "step": 2526 }, { "epoch": 0.5601629282755372, "grad_norm": 0.54296875, "learning_rate": 2.2007978723404257e-05, "loss": 1.1201, "step": 2527 }, { "epoch": 0.5603845993987171, "grad_norm": 0.55078125, "learning_rate": 2.199689716312057e-05, "loss": 1.1581, "step": 2528 }, { "epoch": 0.5606062705218969, "grad_norm": 0.5546875, "learning_rate": 2.1985815602836882e-05, "loss": 1.1399, "step": 2529 }, { "epoch": 0.5608279416450769, "grad_norm": 0.53515625, "learning_rate": 2.1974734042553195e-05, "loss": 1.1031, "step": 2530 }, { "epoch": 0.5610496127682567, "grad_norm": 0.51171875, "learning_rate": 2.1963652482269504e-05, "loss": 0.9773, "step": 2531 }, { "epoch": 0.5612712838914365, "grad_norm": 0.546875, "learning_rate": 2.1952570921985817e-05, "loss": 1.0705, "step": 2532 }, { "epoch": 0.5614929550146165, "grad_norm": 0.5625, "learning_rate": 2.1941489361702127e-05, "loss": 1.0745, "step": 2533 }, { "epoch": 0.5617146261377963, "grad_norm": 0.55078125, "learning_rate": 2.193040780141844e-05, "loss": 1.1592, "step": 2534 }, { "epoch": 0.5619362972609762, "grad_norm": 0.54296875, "learning_rate": 2.1919326241134752e-05, "loss": 1.0912, "step": 2535 }, { "epoch": 0.5621579683841561, "grad_norm": 0.51953125, "learning_rate": 2.1908244680851065e-05, "loss": 1.0957, "step": 2536 }, { "epoch": 0.5623796395073359, "grad_norm": 0.54296875, "learning_rate": 2.1897163120567378e-05, "loss": 1.043, "step": 2537 }, { "epoch": 0.5626013106305158, "grad_norm": 0.5546875, "learning_rate": 2.188608156028369e-05, "loss": 1.077, "step": 2538 }, { "epoch": 0.5628229817536957, "grad_norm": 0.54296875, "learning_rate": 2.1875e-05, "loss": 1.0869, "step": 2539 }, { "epoch": 0.5630446528768756, "grad_norm": 0.53125, "learning_rate": 2.1863918439716313e-05, "loss": 1.0609, "step": 2540 }, { "epoch": 0.5632663240000554, "grad_norm": 0.53515625, "learning_rate": 2.1852836879432625e-05, "loss": 1.029, "step": 2541 }, { "epoch": 0.5634879951232353, "grad_norm": 0.5546875, "learning_rate": 2.1841755319148938e-05, "loss": 1.0923, "step": 2542 }, { "epoch": 0.5637096662464152, "grad_norm": 0.52734375, "learning_rate": 2.183067375886525e-05, "loss": 1.0415, "step": 2543 }, { "epoch": 0.563931337369595, "grad_norm": 0.57421875, "learning_rate": 2.1819592198581564e-05, "loss": 1.1108, "step": 2544 }, { "epoch": 0.5641530084927749, "grad_norm": 0.57421875, "learning_rate": 2.1808510638297873e-05, "loss": 1.167, "step": 2545 }, { "epoch": 0.5643746796159548, "grad_norm": 0.5234375, "learning_rate": 2.1797429078014186e-05, "loss": 1.0231, "step": 2546 }, { "epoch": 0.5645963507391346, "grad_norm": 0.5546875, "learning_rate": 2.17863475177305e-05, "loss": 1.0216, "step": 2547 }, { "epoch": 0.5648180218623146, "grad_norm": 0.53515625, "learning_rate": 2.1775265957446808e-05, "loss": 1.0182, "step": 2548 }, { "epoch": 0.5650396929854944, "grad_norm": 0.6015625, "learning_rate": 2.176418439716312e-05, "loss": 1.1143, "step": 2549 }, { "epoch": 0.5652613641086742, "grad_norm": 0.55078125, "learning_rate": 2.1753102836879433e-05, "loss": 1.07, "step": 2550 }, { "epoch": 0.5654830352318542, "grad_norm": 0.5859375, "learning_rate": 2.1742021276595746e-05, "loss": 1.1145, "step": 2551 }, { "epoch": 0.565704706355034, "grad_norm": 0.51953125, "learning_rate": 2.1730939716312056e-05, "loss": 1.11, "step": 2552 }, { "epoch": 0.5659263774782138, "grad_norm": 0.56640625, "learning_rate": 2.171985815602837e-05, "loss": 1.0496, "step": 2553 }, { "epoch": 0.5661480486013938, "grad_norm": 0.54296875, "learning_rate": 2.170877659574468e-05, "loss": 1.1145, "step": 2554 }, { "epoch": 0.5663697197245736, "grad_norm": 0.55078125, "learning_rate": 2.1697695035460994e-05, "loss": 1.149, "step": 2555 }, { "epoch": 0.5665913908477535, "grad_norm": 0.53125, "learning_rate": 2.1686613475177307e-05, "loss": 0.9896, "step": 2556 }, { "epoch": 0.5668130619709334, "grad_norm": 0.53125, "learning_rate": 2.167553191489362e-05, "loss": 1.0462, "step": 2557 }, { "epoch": 0.5670347330941132, "grad_norm": 0.546875, "learning_rate": 2.166445035460993e-05, "loss": 1.0474, "step": 2558 }, { "epoch": 0.5672564042172931, "grad_norm": 0.5234375, "learning_rate": 2.165336879432624e-05, "loss": 1.0056, "step": 2559 }, { "epoch": 0.567478075340473, "grad_norm": 0.55859375, "learning_rate": 2.1642287234042554e-05, "loss": 1.0506, "step": 2560 }, { "epoch": 0.5676997464636528, "grad_norm": 0.55859375, "learning_rate": 2.1631205673758867e-05, "loss": 1.1678, "step": 2561 }, { "epoch": 0.5679214175868328, "grad_norm": 0.5625, "learning_rate": 2.162012411347518e-05, "loss": 1.0826, "step": 2562 }, { "epoch": 0.5681430887100126, "grad_norm": 0.56640625, "learning_rate": 2.1609042553191493e-05, "loss": 1.0454, "step": 2563 }, { "epoch": 0.5683647598331925, "grad_norm": 0.5390625, "learning_rate": 2.1597960992907802e-05, "loss": 1.1277, "step": 2564 }, { "epoch": 0.5685864309563724, "grad_norm": 0.546875, "learning_rate": 2.1586879432624115e-05, "loss": 1.0725, "step": 2565 }, { "epoch": 0.5688081020795522, "grad_norm": 0.53515625, "learning_rate": 2.1575797872340424e-05, "loss": 1.0453, "step": 2566 }, { "epoch": 0.5690297732027321, "grad_norm": 0.52734375, "learning_rate": 2.1564716312056737e-05, "loss": 0.9405, "step": 2567 }, { "epoch": 0.569251444325912, "grad_norm": 0.55078125, "learning_rate": 2.155363475177305e-05, "loss": 1.0826, "step": 2568 }, { "epoch": 0.5694731154490918, "grad_norm": 0.54296875, "learning_rate": 2.1542553191489363e-05, "loss": 1.1166, "step": 2569 }, { "epoch": 0.5696947865722717, "grad_norm": 0.55859375, "learning_rate": 2.1531471631205675e-05, "loss": 1.0938, "step": 2570 }, { "epoch": 0.5699164576954516, "grad_norm": 0.53515625, "learning_rate": 2.1520390070921985e-05, "loss": 1.0302, "step": 2571 }, { "epoch": 0.5701381288186315, "grad_norm": 0.5234375, "learning_rate": 2.1509308510638298e-05, "loss": 0.9994, "step": 2572 }, { "epoch": 0.5703597999418113, "grad_norm": 0.54296875, "learning_rate": 2.149822695035461e-05, "loss": 1.0817, "step": 2573 }, { "epoch": 0.5705814710649912, "grad_norm": 0.546875, "learning_rate": 2.1487145390070923e-05, "loss": 1.0166, "step": 2574 }, { "epoch": 0.5708031421881711, "grad_norm": 0.578125, "learning_rate": 2.1476063829787236e-05, "loss": 1.1116, "step": 2575 }, { "epoch": 0.5710248133113509, "grad_norm": 0.5546875, "learning_rate": 2.146498226950355e-05, "loss": 1.1138, "step": 2576 }, { "epoch": 0.5712464844345309, "grad_norm": 0.57421875, "learning_rate": 2.1453900709219858e-05, "loss": 1.0747, "step": 2577 }, { "epoch": 0.5714681555577107, "grad_norm": 0.58984375, "learning_rate": 2.144281914893617e-05, "loss": 1.086, "step": 2578 }, { "epoch": 0.5716898266808905, "grad_norm": 0.53515625, "learning_rate": 2.1431737588652484e-05, "loss": 1.0217, "step": 2579 }, { "epoch": 0.5719114978040705, "grad_norm": 0.51953125, "learning_rate": 2.1420656028368796e-05, "loss": 1.048, "step": 2580 }, { "epoch": 0.5721331689272503, "grad_norm": 0.55078125, "learning_rate": 2.140957446808511e-05, "loss": 1.009, "step": 2581 }, { "epoch": 0.5723548400504301, "grad_norm": 0.52734375, "learning_rate": 2.1398492907801422e-05, "loss": 1.0347, "step": 2582 }, { "epoch": 0.5725765111736101, "grad_norm": 0.5859375, "learning_rate": 2.138741134751773e-05, "loss": 1.1568, "step": 2583 }, { "epoch": 0.5727981822967899, "grad_norm": 0.5390625, "learning_rate": 2.137632978723404e-05, "loss": 1.0305, "step": 2584 }, { "epoch": 0.5730198534199697, "grad_norm": 0.5546875, "learning_rate": 2.1365248226950353e-05, "loss": 1.0743, "step": 2585 }, { "epoch": 0.5732415245431497, "grad_norm": 0.52734375, "learning_rate": 2.1354166666666666e-05, "loss": 1.0126, "step": 2586 }, { "epoch": 0.5734631956663295, "grad_norm": 0.52734375, "learning_rate": 2.134308510638298e-05, "loss": 1.0005, "step": 2587 }, { "epoch": 0.5736848667895094, "grad_norm": 0.5546875, "learning_rate": 2.1332003546099292e-05, "loss": 1.0495, "step": 2588 }, { "epoch": 0.5739065379126893, "grad_norm": 0.5859375, "learning_rate": 2.1320921985815605e-05, "loss": 1.1184, "step": 2589 }, { "epoch": 0.5741282090358691, "grad_norm": 0.55859375, "learning_rate": 2.1309840425531917e-05, "loss": 1.1343, "step": 2590 }, { "epoch": 0.574349880159049, "grad_norm": 0.59375, "learning_rate": 2.1298758865248227e-05, "loss": 1.1093, "step": 2591 }, { "epoch": 0.5745715512822289, "grad_norm": 0.5078125, "learning_rate": 2.128767730496454e-05, "loss": 1.0019, "step": 2592 }, { "epoch": 0.5747932224054088, "grad_norm": 0.51953125, "learning_rate": 2.1276595744680852e-05, "loss": 1.023, "step": 2593 }, { "epoch": 0.5750148935285887, "grad_norm": 0.55859375, "learning_rate": 2.1265514184397165e-05, "loss": 1.0901, "step": 2594 }, { "epoch": 0.5752365646517685, "grad_norm": 0.53125, "learning_rate": 2.1254432624113478e-05, "loss": 1.081, "step": 2595 }, { "epoch": 0.5754582357749484, "grad_norm": 0.58984375, "learning_rate": 2.124335106382979e-05, "loss": 1.0487, "step": 2596 }, { "epoch": 0.5756799068981283, "grad_norm": 0.55859375, "learning_rate": 2.12322695035461e-05, "loss": 1.125, "step": 2597 }, { "epoch": 0.5759015780213081, "grad_norm": 0.55078125, "learning_rate": 2.1221187943262413e-05, "loss": 1.0754, "step": 2598 }, { "epoch": 0.576123249144488, "grad_norm": 0.55078125, "learning_rate": 2.1210106382978725e-05, "loss": 1.2028, "step": 2599 }, { "epoch": 0.5763449202676679, "grad_norm": 0.53515625, "learning_rate": 2.1199024822695038e-05, "loss": 1.051, "step": 2600 }, { "epoch": 0.5765665913908478, "grad_norm": 0.53125, "learning_rate": 2.1187943262411348e-05, "loss": 1.0796, "step": 2601 }, { "epoch": 0.5767882625140276, "grad_norm": 0.54296875, "learning_rate": 2.117686170212766e-05, "loss": 1.0482, "step": 2602 }, { "epoch": 0.5770099336372075, "grad_norm": 0.5546875, "learning_rate": 2.1165780141843973e-05, "loss": 1.0798, "step": 2603 }, { "epoch": 0.5772316047603874, "grad_norm": 0.5234375, "learning_rate": 2.1154698581560283e-05, "loss": 1.0843, "step": 2604 }, { "epoch": 0.5774532758835672, "grad_norm": 0.51953125, "learning_rate": 2.1143617021276595e-05, "loss": 1.043, "step": 2605 }, { "epoch": 0.5776749470067472, "grad_norm": 0.6171875, "learning_rate": 2.1132535460992908e-05, "loss": 1.0888, "step": 2606 }, { "epoch": 0.577896618129927, "grad_norm": 0.55859375, "learning_rate": 2.112145390070922e-05, "loss": 1.1155, "step": 2607 }, { "epoch": 0.5781182892531068, "grad_norm": 0.5234375, "learning_rate": 2.1110372340425534e-05, "loss": 1.079, "step": 2608 }, { "epoch": 0.5783399603762868, "grad_norm": 0.52734375, "learning_rate": 2.1099290780141846e-05, "loss": 0.9774, "step": 2609 }, { "epoch": 0.5785616314994666, "grad_norm": 0.52734375, "learning_rate": 2.1088209219858156e-05, "loss": 1.0358, "step": 2610 }, { "epoch": 0.5787833026226464, "grad_norm": 0.546875, "learning_rate": 2.107712765957447e-05, "loss": 1.1751, "step": 2611 }, { "epoch": 0.5790049737458264, "grad_norm": 0.51953125, "learning_rate": 2.106604609929078e-05, "loss": 1.0923, "step": 2612 }, { "epoch": 0.5792266448690062, "grad_norm": 0.546875, "learning_rate": 2.1054964539007094e-05, "loss": 1.0193, "step": 2613 }, { "epoch": 0.579448315992186, "grad_norm": 0.53515625, "learning_rate": 2.1043882978723407e-05, "loss": 1.053, "step": 2614 }, { "epoch": 0.579669987115366, "grad_norm": 0.5390625, "learning_rate": 2.103280141843972e-05, "loss": 1.0653, "step": 2615 }, { "epoch": 0.5798916582385458, "grad_norm": 0.55078125, "learning_rate": 2.102171985815603e-05, "loss": 0.9994, "step": 2616 }, { "epoch": 0.5801133293617257, "grad_norm": 0.59375, "learning_rate": 2.1010638297872342e-05, "loss": 1.1332, "step": 2617 }, { "epoch": 0.5803350004849056, "grad_norm": 0.51953125, "learning_rate": 2.0999556737588655e-05, "loss": 1.0507, "step": 2618 }, { "epoch": 0.5805566716080854, "grad_norm": 0.5234375, "learning_rate": 2.0988475177304964e-05, "loss": 1.057, "step": 2619 }, { "epoch": 0.5807783427312653, "grad_norm": 0.58203125, "learning_rate": 2.0977393617021277e-05, "loss": 1.0312, "step": 2620 }, { "epoch": 0.5810000138544452, "grad_norm": 0.51171875, "learning_rate": 2.096631205673759e-05, "loss": 0.9411, "step": 2621 }, { "epoch": 0.581221684977625, "grad_norm": 0.55078125, "learning_rate": 2.0955230496453902e-05, "loss": 1.0262, "step": 2622 }, { "epoch": 0.581443356100805, "grad_norm": 0.54296875, "learning_rate": 2.094414893617021e-05, "loss": 1.0521, "step": 2623 }, { "epoch": 0.5816650272239848, "grad_norm": 0.5234375, "learning_rate": 2.0933067375886524e-05, "loss": 1.0337, "step": 2624 }, { "epoch": 0.5818866983471647, "grad_norm": 0.5390625, "learning_rate": 2.0921985815602837e-05, "loss": 1.0532, "step": 2625 }, { "epoch": 0.5821083694703446, "grad_norm": 0.53125, "learning_rate": 2.091090425531915e-05, "loss": 1.0592, "step": 2626 }, { "epoch": 0.5823300405935244, "grad_norm": 0.54296875, "learning_rate": 2.0899822695035463e-05, "loss": 1.0297, "step": 2627 }, { "epoch": 0.5825517117167043, "grad_norm": 0.55859375, "learning_rate": 2.0888741134751776e-05, "loss": 1.0443, "step": 2628 }, { "epoch": 0.5827733828398842, "grad_norm": 0.5390625, "learning_rate": 2.0877659574468085e-05, "loss": 1.0924, "step": 2629 }, { "epoch": 0.5829950539630641, "grad_norm": 0.53515625, "learning_rate": 2.0866578014184398e-05, "loss": 1.1077, "step": 2630 }, { "epoch": 0.5832167250862439, "grad_norm": 0.51171875, "learning_rate": 2.085549645390071e-05, "loss": 1.0503, "step": 2631 }, { "epoch": 0.5834383962094238, "grad_norm": 0.55078125, "learning_rate": 2.0844414893617023e-05, "loss": 1.0474, "step": 2632 }, { "epoch": 0.5836600673326037, "grad_norm": 0.6015625, "learning_rate": 2.0833333333333336e-05, "loss": 1.2078, "step": 2633 }, { "epoch": 0.5838817384557835, "grad_norm": 0.54296875, "learning_rate": 2.082225177304965e-05, "loss": 1.0936, "step": 2634 }, { "epoch": 0.5841034095789635, "grad_norm": 0.53515625, "learning_rate": 2.0811170212765958e-05, "loss": 1.1127, "step": 2635 }, { "epoch": 0.5843250807021433, "grad_norm": 0.546875, "learning_rate": 2.0800088652482268e-05, "loss": 1.0783, "step": 2636 }, { "epoch": 0.5845467518253231, "grad_norm": 0.5078125, "learning_rate": 2.078900709219858e-05, "loss": 1.039, "step": 2637 }, { "epoch": 0.5847684229485031, "grad_norm": 0.57421875, "learning_rate": 2.0777925531914893e-05, "loss": 1.1135, "step": 2638 }, { "epoch": 0.5849900940716829, "grad_norm": 0.56640625, "learning_rate": 2.0766843971631206e-05, "loss": 1.091, "step": 2639 }, { "epoch": 0.5852117651948627, "grad_norm": 0.56640625, "learning_rate": 2.075576241134752e-05, "loss": 1.0413, "step": 2640 }, { "epoch": 0.5854334363180427, "grad_norm": 0.55078125, "learning_rate": 2.074468085106383e-05, "loss": 1.0899, "step": 2641 }, { "epoch": 0.5856551074412225, "grad_norm": 0.55078125, "learning_rate": 2.073359929078014e-05, "loss": 1.0762, "step": 2642 }, { "epoch": 0.5858767785644023, "grad_norm": 0.51171875, "learning_rate": 2.0722517730496454e-05, "loss": 1.0086, "step": 2643 }, { "epoch": 0.5860984496875823, "grad_norm": 0.5390625, "learning_rate": 2.0711436170212766e-05, "loss": 1.0494, "step": 2644 }, { "epoch": 0.5863201208107621, "grad_norm": 0.51171875, "learning_rate": 2.070035460992908e-05, "loss": 1.0343, "step": 2645 }, { "epoch": 0.586541791933942, "grad_norm": 0.5625, "learning_rate": 2.0689273049645392e-05, "loss": 1.0641, "step": 2646 }, { "epoch": 0.5867634630571219, "grad_norm": 0.5703125, "learning_rate": 2.0678191489361705e-05, "loss": 0.9913, "step": 2647 }, { "epoch": 0.5869851341803017, "grad_norm": 0.54296875, "learning_rate": 2.0667109929078014e-05, "loss": 1.0734, "step": 2648 }, { "epoch": 0.5872068053034816, "grad_norm": 0.55078125, "learning_rate": 2.0656028368794327e-05, "loss": 1.0598, "step": 2649 }, { "epoch": 0.5874284764266615, "grad_norm": 0.5625, "learning_rate": 2.064494680851064e-05, "loss": 1.0518, "step": 2650 }, { "epoch": 0.5876501475498414, "grad_norm": 0.5546875, "learning_rate": 2.0633865248226952e-05, "loss": 1.0725, "step": 2651 }, { "epoch": 0.5878718186730212, "grad_norm": 0.52734375, "learning_rate": 2.0622783687943265e-05, "loss": 0.9972, "step": 2652 }, { "epoch": 0.5880934897962011, "grad_norm": 0.51953125, "learning_rate": 2.0611702127659578e-05, "loss": 1.0614, "step": 2653 }, { "epoch": 0.588315160919381, "grad_norm": 0.51953125, "learning_rate": 2.0600620567375887e-05, "loss": 1.0495, "step": 2654 }, { "epoch": 0.5885368320425609, "grad_norm": 0.56640625, "learning_rate": 2.05895390070922e-05, "loss": 1.0699, "step": 2655 }, { "epoch": 0.5887585031657407, "grad_norm": 0.55078125, "learning_rate": 2.057845744680851e-05, "loss": 1.112, "step": 2656 }, { "epoch": 0.5889801742889206, "grad_norm": 0.546875, "learning_rate": 2.0567375886524822e-05, "loss": 1.0635, "step": 2657 }, { "epoch": 0.5892018454121005, "grad_norm": 0.5390625, "learning_rate": 2.0556294326241135e-05, "loss": 1.0826, "step": 2658 }, { "epoch": 0.5894235165352804, "grad_norm": 0.515625, "learning_rate": 2.0545212765957448e-05, "loss": 1.0142, "step": 2659 }, { "epoch": 0.5896451876584602, "grad_norm": 0.54296875, "learning_rate": 2.053413120567376e-05, "loss": 1.0509, "step": 2660 }, { "epoch": 0.5898668587816401, "grad_norm": 0.53125, "learning_rate": 2.0523049645390073e-05, "loss": 1.0495, "step": 2661 }, { "epoch": 0.59008852990482, "grad_norm": 0.52734375, "learning_rate": 2.0511968085106383e-05, "loss": 1.0464, "step": 2662 }, { "epoch": 0.5903102010279998, "grad_norm": 0.54296875, "learning_rate": 2.0500886524822695e-05, "loss": 1.0612, "step": 2663 }, { "epoch": 0.5905318721511797, "grad_norm": 0.5703125, "learning_rate": 2.0489804964539008e-05, "loss": 1.158, "step": 2664 }, { "epoch": 0.5907535432743596, "grad_norm": 0.52734375, "learning_rate": 2.047872340425532e-05, "loss": 1.1186, "step": 2665 }, { "epoch": 0.5909752143975394, "grad_norm": 0.546875, "learning_rate": 2.0467641843971634e-05, "loss": 1.1161, "step": 2666 }, { "epoch": 0.5911968855207194, "grad_norm": 0.53515625, "learning_rate": 2.0456560283687947e-05, "loss": 1.1103, "step": 2667 }, { "epoch": 0.5914185566438992, "grad_norm": 0.54296875, "learning_rate": 2.0445478723404256e-05, "loss": 0.9993, "step": 2668 }, { "epoch": 0.591640227767079, "grad_norm": 0.53515625, "learning_rate": 2.043439716312057e-05, "loss": 1.0817, "step": 2669 }, { "epoch": 0.591861898890259, "grad_norm": 0.5390625, "learning_rate": 2.042331560283688e-05, "loss": 1.0513, "step": 2670 }, { "epoch": 0.5920835700134388, "grad_norm": 0.51953125, "learning_rate": 2.0412234042553194e-05, "loss": 0.955, "step": 2671 }, { "epoch": 0.5923052411366186, "grad_norm": 0.546875, "learning_rate": 2.0401152482269504e-05, "loss": 1.0487, "step": 2672 }, { "epoch": 0.5925269122597986, "grad_norm": 0.578125, "learning_rate": 2.0390070921985816e-05, "loss": 1.1239, "step": 2673 }, { "epoch": 0.5927485833829784, "grad_norm": 0.54296875, "learning_rate": 2.037898936170213e-05, "loss": 1.0767, "step": 2674 }, { "epoch": 0.5929702545061583, "grad_norm": 0.55859375, "learning_rate": 2.036790780141844e-05, "loss": 1.0888, "step": 2675 }, { "epoch": 0.5931919256293382, "grad_norm": 0.5625, "learning_rate": 2.035682624113475e-05, "loss": 1.0714, "step": 2676 }, { "epoch": 0.593413596752518, "grad_norm": 0.5859375, "learning_rate": 2.0345744680851064e-05, "loss": 1.0202, "step": 2677 }, { "epoch": 0.5936352678756979, "grad_norm": 0.5390625, "learning_rate": 2.0334663120567377e-05, "loss": 1.0647, "step": 2678 }, { "epoch": 0.5938569389988778, "grad_norm": 0.5625, "learning_rate": 2.032358156028369e-05, "loss": 1.0916, "step": 2679 }, { "epoch": 0.5940786101220576, "grad_norm": 0.5625, "learning_rate": 2.0312500000000002e-05, "loss": 1.0805, "step": 2680 }, { "epoch": 0.5943002812452375, "grad_norm": 0.494140625, "learning_rate": 2.0301418439716312e-05, "loss": 0.8988, "step": 2681 }, { "epoch": 0.5945219523684174, "grad_norm": 0.53125, "learning_rate": 2.0290336879432625e-05, "loss": 1.0868, "step": 2682 }, { "epoch": 0.5947436234915973, "grad_norm": 0.53515625, "learning_rate": 2.0279255319148937e-05, "loss": 1.0638, "step": 2683 }, { "epoch": 0.5949652946147771, "grad_norm": 0.5546875, "learning_rate": 2.026817375886525e-05, "loss": 1.0633, "step": 2684 }, { "epoch": 0.595186965737957, "grad_norm": 0.5078125, "learning_rate": 2.0257092198581563e-05, "loss": 1.0696, "step": 2685 }, { "epoch": 0.5954086368611369, "grad_norm": 0.52734375, "learning_rate": 2.0246010638297876e-05, "loss": 0.989, "step": 2686 }, { "epoch": 0.5956303079843168, "grad_norm": 0.55078125, "learning_rate": 2.0234929078014185e-05, "loss": 1.094, "step": 2687 }, { "epoch": 0.5958519791074967, "grad_norm": 0.53515625, "learning_rate": 2.0223847517730498e-05, "loss": 1.0757, "step": 2688 }, { "epoch": 0.5960736502306765, "grad_norm": 0.55859375, "learning_rate": 2.0212765957446807e-05, "loss": 1.1053, "step": 2689 }, { "epoch": 0.5962953213538564, "grad_norm": 0.5390625, "learning_rate": 2.020168439716312e-05, "loss": 1.1141, "step": 2690 }, { "epoch": 0.5965169924770363, "grad_norm": 0.5625, "learning_rate": 2.0190602836879433e-05, "loss": 1.0383, "step": 2691 }, { "epoch": 0.5967386636002161, "grad_norm": 0.54296875, "learning_rate": 2.0179521276595746e-05, "loss": 1.0966, "step": 2692 }, { "epoch": 0.596960334723396, "grad_norm": 0.546875, "learning_rate": 2.016843971631206e-05, "loss": 1.1202, "step": 2693 }, { "epoch": 0.5971820058465759, "grad_norm": 0.5546875, "learning_rate": 2.0157358156028368e-05, "loss": 1.109, "step": 2694 }, { "epoch": 0.5974036769697557, "grad_norm": 0.55859375, "learning_rate": 2.014627659574468e-05, "loss": 1.0853, "step": 2695 }, { "epoch": 0.5976253480929357, "grad_norm": 0.54296875, "learning_rate": 2.0135195035460993e-05, "loss": 1.0744, "step": 2696 }, { "epoch": 0.5978470192161155, "grad_norm": 0.55078125, "learning_rate": 2.0124113475177306e-05, "loss": 1.063, "step": 2697 }, { "epoch": 0.5980686903392953, "grad_norm": 0.55078125, "learning_rate": 2.011303191489362e-05, "loss": 1.1278, "step": 2698 }, { "epoch": 0.5982903614624753, "grad_norm": 0.515625, "learning_rate": 2.010195035460993e-05, "loss": 1.0763, "step": 2699 }, { "epoch": 0.5985120325856551, "grad_norm": 0.55078125, "learning_rate": 2.009086879432624e-05, "loss": 1.1404, "step": 2700 }, { "epoch": 0.5987337037088349, "grad_norm": 0.55078125, "learning_rate": 2.0079787234042554e-05, "loss": 1.1006, "step": 2701 }, { "epoch": 0.5989553748320149, "grad_norm": 0.54296875, "learning_rate": 2.0068705673758867e-05, "loss": 1.1278, "step": 2702 }, { "epoch": 0.5991770459551947, "grad_norm": 0.51953125, "learning_rate": 2.005762411347518e-05, "loss": 1.0639, "step": 2703 }, { "epoch": 0.5993987170783746, "grad_norm": 0.55859375, "learning_rate": 2.0046542553191492e-05, "loss": 1.0635, "step": 2704 }, { "epoch": 0.5996203882015545, "grad_norm": 0.54296875, "learning_rate": 2.0035460992907805e-05, "loss": 1.1056, "step": 2705 }, { "epoch": 0.5998420593247343, "grad_norm": 0.5390625, "learning_rate": 2.0024379432624114e-05, "loss": 1.1368, "step": 2706 }, { "epoch": 0.6000637304479142, "grad_norm": 0.54296875, "learning_rate": 2.0013297872340424e-05, "loss": 1.0227, "step": 2707 }, { "epoch": 0.6002854015710941, "grad_norm": 0.58984375, "learning_rate": 2.0002216312056736e-05, "loss": 1.0906, "step": 2708 }, { "epoch": 0.600507072694274, "grad_norm": 0.55078125, "learning_rate": 1.999113475177305e-05, "loss": 1.0906, "step": 2709 }, { "epoch": 0.6007287438174538, "grad_norm": 0.55078125, "learning_rate": 1.9980053191489362e-05, "loss": 1.079, "step": 2710 }, { "epoch": 0.6009504149406337, "grad_norm": 0.54296875, "learning_rate": 1.9968971631205675e-05, "loss": 1.0477, "step": 2711 }, { "epoch": 0.6011720860638136, "grad_norm": 0.52734375, "learning_rate": 1.9957890070921987e-05, "loss": 1.0694, "step": 2712 }, { "epoch": 0.6013937571869934, "grad_norm": 0.53125, "learning_rate": 1.9946808510638297e-05, "loss": 1.017, "step": 2713 }, { "epoch": 0.6016154283101733, "grad_norm": 0.5390625, "learning_rate": 1.993572695035461e-05, "loss": 1.0763, "step": 2714 }, { "epoch": 0.6018370994333532, "grad_norm": 0.515625, "learning_rate": 1.9924645390070922e-05, "loss": 0.9844, "step": 2715 }, { "epoch": 0.6020587705565331, "grad_norm": 0.5546875, "learning_rate": 1.9913563829787235e-05, "loss": 1.098, "step": 2716 }, { "epoch": 0.602280441679713, "grad_norm": 0.5390625, "learning_rate": 1.9902482269503548e-05, "loss": 1.1258, "step": 2717 }, { "epoch": 0.6025021128028928, "grad_norm": 0.53515625, "learning_rate": 1.989140070921986e-05, "loss": 1.1011, "step": 2718 }, { "epoch": 0.6027237839260727, "grad_norm": 0.55078125, "learning_rate": 1.988031914893617e-05, "loss": 1.0768, "step": 2719 }, { "epoch": 0.6029454550492526, "grad_norm": 0.5234375, "learning_rate": 1.9869237588652483e-05, "loss": 0.9082, "step": 2720 }, { "epoch": 0.6031671261724324, "grad_norm": 0.51171875, "learning_rate": 1.9858156028368796e-05, "loss": 1.0605, "step": 2721 }, { "epoch": 0.6033887972956123, "grad_norm": 0.57421875, "learning_rate": 1.984707446808511e-05, "loss": 1.075, "step": 2722 }, { "epoch": 0.6036104684187922, "grad_norm": 0.54296875, "learning_rate": 1.983599290780142e-05, "loss": 1.0449, "step": 2723 }, { "epoch": 0.603832139541972, "grad_norm": 0.5234375, "learning_rate": 1.9824911347517734e-05, "loss": 1.0167, "step": 2724 }, { "epoch": 0.604053810665152, "grad_norm": 0.5390625, "learning_rate": 1.9813829787234043e-05, "loss": 1.1276, "step": 2725 }, { "epoch": 0.6042754817883318, "grad_norm": 0.55859375, "learning_rate": 1.9802748226950356e-05, "loss": 1.0704, "step": 2726 }, { "epoch": 0.6044971529115116, "grad_norm": 0.54296875, "learning_rate": 1.9791666666666665e-05, "loss": 1.0355, "step": 2727 }, { "epoch": 0.6047188240346916, "grad_norm": 0.51171875, "learning_rate": 1.9780585106382978e-05, "loss": 1.0428, "step": 2728 }, { "epoch": 0.6049404951578714, "grad_norm": 0.52734375, "learning_rate": 1.976950354609929e-05, "loss": 1.0507, "step": 2729 }, { "epoch": 0.6051621662810512, "grad_norm": 0.546875, "learning_rate": 1.9758421985815604e-05, "loss": 1.1153, "step": 2730 }, { "epoch": 0.6053838374042312, "grad_norm": 0.52734375, "learning_rate": 1.9747340425531917e-05, "loss": 0.9936, "step": 2731 }, { "epoch": 0.605605508527411, "grad_norm": 0.51953125, "learning_rate": 1.973625886524823e-05, "loss": 1.0616, "step": 2732 }, { "epoch": 0.6058271796505909, "grad_norm": 0.5234375, "learning_rate": 1.972517730496454e-05, "loss": 1.0708, "step": 2733 }, { "epoch": 0.6060488507737708, "grad_norm": 0.54296875, "learning_rate": 1.971409574468085e-05, "loss": 1.0288, "step": 2734 }, { "epoch": 0.6062705218969506, "grad_norm": 0.51953125, "learning_rate": 1.9703014184397164e-05, "loss": 0.9916, "step": 2735 }, { "epoch": 0.6064921930201305, "grad_norm": 0.56640625, "learning_rate": 1.9691932624113477e-05, "loss": 1.034, "step": 2736 }, { "epoch": 0.6067138641433104, "grad_norm": 0.5390625, "learning_rate": 1.968085106382979e-05, "loss": 1.0529, "step": 2737 }, { "epoch": 0.6069355352664902, "grad_norm": 0.53125, "learning_rate": 1.9669769503546103e-05, "loss": 1.0519, "step": 2738 }, { "epoch": 0.6071572063896701, "grad_norm": 0.5390625, "learning_rate": 1.9658687943262412e-05, "loss": 1.0966, "step": 2739 }, { "epoch": 0.60737887751285, "grad_norm": 0.53125, "learning_rate": 1.9647606382978725e-05, "loss": 1.0678, "step": 2740 }, { "epoch": 0.6076005486360299, "grad_norm": 0.546875, "learning_rate": 1.9636524822695038e-05, "loss": 1.1027, "step": 2741 }, { "epoch": 0.6078222197592097, "grad_norm": 0.53125, "learning_rate": 1.9625443262411347e-05, "loss": 0.9959, "step": 2742 }, { "epoch": 0.6080438908823896, "grad_norm": 0.52734375, "learning_rate": 1.961436170212766e-05, "loss": 1.0454, "step": 2743 }, { "epoch": 0.6082655620055695, "grad_norm": 0.51171875, "learning_rate": 1.9603280141843972e-05, "loss": 1.0341, "step": 2744 }, { "epoch": 0.6084872331287493, "grad_norm": 0.51953125, "learning_rate": 1.9592198581560285e-05, "loss": 1.0468, "step": 2745 }, { "epoch": 0.6087089042519293, "grad_norm": 0.5234375, "learning_rate": 1.9581117021276595e-05, "loss": 0.9858, "step": 2746 }, { "epoch": 0.6089305753751091, "grad_norm": 0.546875, "learning_rate": 1.9570035460992907e-05, "loss": 1.0232, "step": 2747 }, { "epoch": 0.609152246498289, "grad_norm": 0.5234375, "learning_rate": 1.955895390070922e-05, "loss": 1.0659, "step": 2748 }, { "epoch": 0.6093739176214689, "grad_norm": 0.53515625, "learning_rate": 1.9547872340425533e-05, "loss": 1.0683, "step": 2749 }, { "epoch": 0.6095955887446487, "grad_norm": 0.5078125, "learning_rate": 1.9536790780141846e-05, "loss": 1.0137, "step": 2750 }, { "epoch": 0.6098172598678286, "grad_norm": 0.5390625, "learning_rate": 1.952570921985816e-05, "loss": 1.1133, "step": 2751 }, { "epoch": 0.6100389309910085, "grad_norm": 0.5546875, "learning_rate": 1.9514627659574468e-05, "loss": 1.1374, "step": 2752 }, { "epoch": 0.6102606021141883, "grad_norm": 0.52734375, "learning_rate": 1.950354609929078e-05, "loss": 0.9535, "step": 2753 }, { "epoch": 0.6104822732373683, "grad_norm": 0.54296875, "learning_rate": 1.9492464539007093e-05, "loss": 1.1172, "step": 2754 }, { "epoch": 0.6107039443605481, "grad_norm": 0.53125, "learning_rate": 1.9481382978723406e-05, "loss": 1.0082, "step": 2755 }, { "epoch": 0.6109256154837279, "grad_norm": 0.515625, "learning_rate": 1.947030141843972e-05, "loss": 0.9639, "step": 2756 }, { "epoch": 0.6111472866069079, "grad_norm": 0.51953125, "learning_rate": 1.9459219858156032e-05, "loss": 0.9512, "step": 2757 }, { "epoch": 0.6113689577300877, "grad_norm": 0.5625, "learning_rate": 1.944813829787234e-05, "loss": 1.1392, "step": 2758 }, { "epoch": 0.6115906288532675, "grad_norm": 0.53125, "learning_rate": 1.9437056737588654e-05, "loss": 1.1186, "step": 2759 }, { "epoch": 0.6118122999764475, "grad_norm": 0.58203125, "learning_rate": 1.9425975177304963e-05, "loss": 1.0992, "step": 2760 }, { "epoch": 0.6120339710996273, "grad_norm": 0.5546875, "learning_rate": 1.9414893617021276e-05, "loss": 1.0826, "step": 2761 }, { "epoch": 0.6122556422228072, "grad_norm": 0.52734375, "learning_rate": 1.940381205673759e-05, "loss": 1.0862, "step": 2762 }, { "epoch": 0.6124773133459871, "grad_norm": 0.53125, "learning_rate": 1.93927304964539e-05, "loss": 1.0268, "step": 2763 }, { "epoch": 0.6126989844691669, "grad_norm": 0.53125, "learning_rate": 1.9381648936170214e-05, "loss": 1.0828, "step": 2764 }, { "epoch": 0.6129206555923468, "grad_norm": 0.5234375, "learning_rate": 1.9370567375886524e-05, "loss": 1.0122, "step": 2765 }, { "epoch": 0.6131423267155267, "grad_norm": 0.54296875, "learning_rate": 1.9359485815602837e-05, "loss": 1.0905, "step": 2766 }, { "epoch": 0.6133639978387065, "grad_norm": 0.56640625, "learning_rate": 1.934840425531915e-05, "loss": 1.1144, "step": 2767 }, { "epoch": 0.6135856689618864, "grad_norm": 0.52734375, "learning_rate": 1.9337322695035462e-05, "loss": 1.063, "step": 2768 }, { "epoch": 0.6138073400850663, "grad_norm": 0.5234375, "learning_rate": 1.9326241134751775e-05, "loss": 1.0151, "step": 2769 }, { "epoch": 0.6140290112082462, "grad_norm": 0.56640625, "learning_rate": 1.9315159574468088e-05, "loss": 1.0645, "step": 2770 }, { "epoch": 0.614250682331426, "grad_norm": 0.515625, "learning_rate": 1.9304078014184397e-05, "loss": 1.0374, "step": 2771 }, { "epoch": 0.6144723534546059, "grad_norm": 0.515625, "learning_rate": 1.929299645390071e-05, "loss": 0.9999, "step": 2772 }, { "epoch": 0.6146940245777858, "grad_norm": 0.58984375, "learning_rate": 1.9281914893617023e-05, "loss": 1.0569, "step": 2773 }, { "epoch": 0.6149156957009656, "grad_norm": 0.53515625, "learning_rate": 1.9270833333333335e-05, "loss": 1.1317, "step": 2774 }, { "epoch": 0.6151373668241455, "grad_norm": 0.51171875, "learning_rate": 1.9259751773049648e-05, "loss": 1.0185, "step": 2775 }, { "epoch": 0.6153590379473254, "grad_norm": 0.5625, "learning_rate": 1.924867021276596e-05, "loss": 1.1123, "step": 2776 }, { "epoch": 0.6155807090705052, "grad_norm": 0.5703125, "learning_rate": 1.923758865248227e-05, "loss": 1.1103, "step": 2777 }, { "epoch": 0.6158023801936852, "grad_norm": 0.5390625, "learning_rate": 1.922650709219858e-05, "loss": 1.0965, "step": 2778 }, { "epoch": 0.616024051316865, "grad_norm": 0.52734375, "learning_rate": 1.9215425531914892e-05, "loss": 1.0605, "step": 2779 }, { "epoch": 0.6162457224400449, "grad_norm": 0.56640625, "learning_rate": 1.9204343971631205e-05, "loss": 1.1257, "step": 2780 }, { "epoch": 0.6164673935632248, "grad_norm": 0.5625, "learning_rate": 1.9193262411347518e-05, "loss": 1.0113, "step": 2781 }, { "epoch": 0.6166890646864046, "grad_norm": 0.5703125, "learning_rate": 1.918218085106383e-05, "loss": 1.0627, "step": 2782 }, { "epoch": 0.6169107358095846, "grad_norm": 0.5546875, "learning_rate": 1.9171099290780143e-05, "loss": 1.0813, "step": 2783 }, { "epoch": 0.6171324069327644, "grad_norm": 0.5703125, "learning_rate": 1.9160017730496453e-05, "loss": 1.0342, "step": 2784 }, { "epoch": 0.6173540780559442, "grad_norm": 0.5625, "learning_rate": 1.9148936170212766e-05, "loss": 1.0203, "step": 2785 }, { "epoch": 0.6175757491791242, "grad_norm": 0.546875, "learning_rate": 1.913785460992908e-05, "loss": 1.1195, "step": 2786 }, { "epoch": 0.617797420302304, "grad_norm": 0.52734375, "learning_rate": 1.912677304964539e-05, "loss": 1.0517, "step": 2787 }, { "epoch": 0.6180190914254838, "grad_norm": 0.52734375, "learning_rate": 1.9115691489361704e-05, "loss": 1.0526, "step": 2788 }, { "epoch": 0.6182407625486638, "grad_norm": 0.5234375, "learning_rate": 1.9104609929078017e-05, "loss": 1.039, "step": 2789 }, { "epoch": 0.6184624336718436, "grad_norm": 0.54296875, "learning_rate": 1.909352836879433e-05, "loss": 1.1233, "step": 2790 }, { "epoch": 0.6186841047950234, "grad_norm": 0.53125, "learning_rate": 1.908244680851064e-05, "loss": 1.0538, "step": 2791 }, { "epoch": 0.6189057759182034, "grad_norm": 0.54296875, "learning_rate": 1.907136524822695e-05, "loss": 1.0443, "step": 2792 }, { "epoch": 0.6191274470413832, "grad_norm": 0.59765625, "learning_rate": 1.9060283687943264e-05, "loss": 1.1085, "step": 2793 }, { "epoch": 0.6193491181645631, "grad_norm": 0.5625, "learning_rate": 1.9049202127659577e-05, "loss": 1.0907, "step": 2794 }, { "epoch": 0.619570789287743, "grad_norm": 0.57421875, "learning_rate": 1.9038120567375887e-05, "loss": 1.0741, "step": 2795 }, { "epoch": 0.6197924604109228, "grad_norm": 0.56640625, "learning_rate": 1.90270390070922e-05, "loss": 1.0929, "step": 2796 }, { "epoch": 0.6200141315341027, "grad_norm": 0.5234375, "learning_rate": 1.9015957446808512e-05, "loss": 0.9815, "step": 2797 }, { "epoch": 0.6202358026572826, "grad_norm": 0.546875, "learning_rate": 1.900487588652482e-05, "loss": 1.082, "step": 2798 }, { "epoch": 0.6204574737804625, "grad_norm": 0.53515625, "learning_rate": 1.8993794326241134e-05, "loss": 1.051, "step": 2799 }, { "epoch": 0.6206791449036423, "grad_norm": 0.51953125, "learning_rate": 1.8982712765957447e-05, "loss": 1.1087, "step": 2800 }, { "epoch": 0.6209008160268222, "grad_norm": 0.53515625, "learning_rate": 1.897163120567376e-05, "loss": 1.0896, "step": 2801 }, { "epoch": 0.6211224871500021, "grad_norm": 0.53515625, "learning_rate": 1.8960549645390073e-05, "loss": 1.115, "step": 2802 }, { "epoch": 0.6213441582731819, "grad_norm": 0.54296875, "learning_rate": 1.8949468085106385e-05, "loss": 1.0572, "step": 2803 }, { "epoch": 0.6215658293963618, "grad_norm": 0.5390625, "learning_rate": 1.8938386524822695e-05, "loss": 1.093, "step": 2804 }, { "epoch": 0.6217875005195417, "grad_norm": 0.5390625, "learning_rate": 1.8927304964539008e-05, "loss": 1.1308, "step": 2805 }, { "epoch": 0.6220091716427215, "grad_norm": 0.55859375, "learning_rate": 1.891622340425532e-05, "loss": 1.0474, "step": 2806 }, { "epoch": 0.6222308427659015, "grad_norm": 0.53515625, "learning_rate": 1.8905141843971633e-05, "loss": 1.0826, "step": 2807 }, { "epoch": 0.6224525138890813, "grad_norm": 0.56640625, "learning_rate": 1.8894060283687946e-05, "loss": 1.1395, "step": 2808 }, { "epoch": 0.6226741850122612, "grad_norm": 0.5625, "learning_rate": 1.888297872340426e-05, "loss": 0.9825, "step": 2809 }, { "epoch": 0.6228958561354411, "grad_norm": 0.5390625, "learning_rate": 1.8871897163120568e-05, "loss": 1.0434, "step": 2810 }, { "epoch": 0.6231175272586209, "grad_norm": 0.53515625, "learning_rate": 1.886081560283688e-05, "loss": 1.0005, "step": 2811 }, { "epoch": 0.6233391983818009, "grad_norm": 0.55078125, "learning_rate": 1.8849734042553194e-05, "loss": 1.0015, "step": 2812 }, { "epoch": 0.6235608695049807, "grad_norm": 0.52734375, "learning_rate": 1.8838652482269503e-05, "loss": 1.061, "step": 2813 }, { "epoch": 0.6237825406281605, "grad_norm": 0.53125, "learning_rate": 1.8827570921985816e-05, "loss": 1.0944, "step": 2814 }, { "epoch": 0.6240042117513405, "grad_norm": 0.56640625, "learning_rate": 1.881648936170213e-05, "loss": 1.0685, "step": 2815 }, { "epoch": 0.6242258828745203, "grad_norm": 0.5390625, "learning_rate": 1.880540780141844e-05, "loss": 1.0243, "step": 2816 }, { "epoch": 0.6244475539977001, "grad_norm": 0.53125, "learning_rate": 1.879432624113475e-05, "loss": 1.0713, "step": 2817 }, { "epoch": 0.6246692251208801, "grad_norm": 0.55078125, "learning_rate": 1.8783244680851063e-05, "loss": 1.1072, "step": 2818 }, { "epoch": 0.6248908962440599, "grad_norm": 0.52734375, "learning_rate": 1.8772163120567376e-05, "loss": 1.0686, "step": 2819 }, { "epoch": 0.6251125673672397, "grad_norm": 0.5546875, "learning_rate": 1.876108156028369e-05, "loss": 1.0592, "step": 2820 }, { "epoch": 0.6253342384904197, "grad_norm": 0.55859375, "learning_rate": 1.8750000000000002e-05, "loss": 1.0491, "step": 2821 }, { "epoch": 0.6255559096135995, "grad_norm": 0.51171875, "learning_rate": 1.8738918439716315e-05, "loss": 1.0343, "step": 2822 }, { "epoch": 0.6257775807367794, "grad_norm": 0.56640625, "learning_rate": 1.8727836879432624e-05, "loss": 1.0821, "step": 2823 }, { "epoch": 0.6259992518599593, "grad_norm": 0.5078125, "learning_rate": 1.8716755319148937e-05, "loss": 1.0169, "step": 2824 }, { "epoch": 0.6262209229831391, "grad_norm": 0.51953125, "learning_rate": 1.870567375886525e-05, "loss": 1.0375, "step": 2825 }, { "epoch": 0.626442594106319, "grad_norm": 0.52734375, "learning_rate": 1.8694592198581562e-05, "loss": 1.0406, "step": 2826 }, { "epoch": 0.6266642652294989, "grad_norm": 0.55078125, "learning_rate": 1.8683510638297875e-05, "loss": 1.1049, "step": 2827 }, { "epoch": 0.6268859363526788, "grad_norm": 0.55078125, "learning_rate": 1.8672429078014188e-05, "loss": 1.0874, "step": 2828 }, { "epoch": 0.6271076074758586, "grad_norm": 0.55859375, "learning_rate": 1.8661347517730497e-05, "loss": 1.0233, "step": 2829 }, { "epoch": 0.6273292785990385, "grad_norm": 0.51953125, "learning_rate": 1.8650265957446807e-05, "loss": 0.9327, "step": 2830 }, { "epoch": 0.6275509497222184, "grad_norm": 0.52734375, "learning_rate": 1.863918439716312e-05, "loss": 1.0383, "step": 2831 }, { "epoch": 0.6277726208453982, "grad_norm": 0.53125, "learning_rate": 1.8628102836879432e-05, "loss": 1.0474, "step": 2832 }, { "epoch": 0.6279942919685781, "grad_norm": 0.546875, "learning_rate": 1.8617021276595745e-05, "loss": 1.0759, "step": 2833 }, { "epoch": 0.628215963091758, "grad_norm": 0.5234375, "learning_rate": 1.8605939716312058e-05, "loss": 1.0289, "step": 2834 }, { "epoch": 0.6284376342149378, "grad_norm": 0.5703125, "learning_rate": 1.859485815602837e-05, "loss": 1.0691, "step": 2835 }, { "epoch": 0.6286593053381178, "grad_norm": 0.52734375, "learning_rate": 1.858377659574468e-05, "loss": 1.0982, "step": 2836 }, { "epoch": 0.6288809764612976, "grad_norm": 0.5234375, "learning_rate": 1.8572695035460993e-05, "loss": 1.01, "step": 2837 }, { "epoch": 0.6291026475844774, "grad_norm": 0.546875, "learning_rate": 1.8561613475177305e-05, "loss": 1.099, "step": 2838 }, { "epoch": 0.6293243187076574, "grad_norm": 0.53515625, "learning_rate": 1.8550531914893618e-05, "loss": 1.1202, "step": 2839 }, { "epoch": 0.6295459898308372, "grad_norm": 0.5390625, "learning_rate": 1.853945035460993e-05, "loss": 1.1065, "step": 2840 }, { "epoch": 0.6297676609540171, "grad_norm": 0.52734375, "learning_rate": 1.8528368794326244e-05, "loss": 1.0766, "step": 2841 }, { "epoch": 0.629989332077197, "grad_norm": 0.51953125, "learning_rate": 1.8517287234042553e-05, "loss": 1.0519, "step": 2842 }, { "epoch": 0.6302110032003768, "grad_norm": 0.5703125, "learning_rate": 1.8506205673758866e-05, "loss": 1.1379, "step": 2843 }, { "epoch": 0.6304326743235568, "grad_norm": 0.5390625, "learning_rate": 1.849512411347518e-05, "loss": 1.1419, "step": 2844 }, { "epoch": 0.6306543454467366, "grad_norm": 0.54296875, "learning_rate": 1.848404255319149e-05, "loss": 1.038, "step": 2845 }, { "epoch": 0.6308760165699164, "grad_norm": 0.546875, "learning_rate": 1.8472960992907804e-05, "loss": 1.0235, "step": 2846 }, { "epoch": 0.6310976876930964, "grad_norm": 0.52734375, "learning_rate": 1.8461879432624117e-05, "loss": 1.086, "step": 2847 }, { "epoch": 0.6313193588162762, "grad_norm": 0.5234375, "learning_rate": 1.8450797872340426e-05, "loss": 1.1238, "step": 2848 }, { "epoch": 0.631541029939456, "grad_norm": 0.51171875, "learning_rate": 1.8439716312056736e-05, "loss": 1.0863, "step": 2849 }, { "epoch": 0.631762701062636, "grad_norm": 0.546875, "learning_rate": 1.842863475177305e-05, "loss": 1.0365, "step": 2850 }, { "epoch": 0.6319843721858158, "grad_norm": 0.5390625, "learning_rate": 1.841755319148936e-05, "loss": 1.0921, "step": 2851 }, { "epoch": 0.6322060433089957, "grad_norm": 0.546875, "learning_rate": 1.8406471631205674e-05, "loss": 1.1287, "step": 2852 }, { "epoch": 0.6324277144321756, "grad_norm": 0.53515625, "learning_rate": 1.8395390070921987e-05, "loss": 1.174, "step": 2853 }, { "epoch": 0.6326493855553554, "grad_norm": 0.53515625, "learning_rate": 1.83843085106383e-05, "loss": 1.0733, "step": 2854 }, { "epoch": 0.6328710566785353, "grad_norm": 0.53515625, "learning_rate": 1.8373226950354612e-05, "loss": 1.0454, "step": 2855 }, { "epoch": 0.6330927278017152, "grad_norm": 0.546875, "learning_rate": 1.836214539007092e-05, "loss": 1.0767, "step": 2856 }, { "epoch": 0.633314398924895, "grad_norm": 0.5234375, "learning_rate": 1.8351063829787234e-05, "loss": 1.0276, "step": 2857 }, { "epoch": 0.6335360700480749, "grad_norm": 0.515625, "learning_rate": 1.8339982269503547e-05, "loss": 1.0653, "step": 2858 }, { "epoch": 0.6337577411712548, "grad_norm": 0.55078125, "learning_rate": 1.832890070921986e-05, "loss": 1.0524, "step": 2859 }, { "epoch": 0.6339794122944347, "grad_norm": 0.54296875, "learning_rate": 1.8317819148936173e-05, "loss": 1.008, "step": 2860 }, { "epoch": 0.6342010834176145, "grad_norm": 0.50390625, "learning_rate": 1.8306737588652486e-05, "loss": 0.8634, "step": 2861 }, { "epoch": 0.6344227545407944, "grad_norm": 0.546875, "learning_rate": 1.8295656028368795e-05, "loss": 1.0167, "step": 2862 }, { "epoch": 0.6346444256639743, "grad_norm": 0.55859375, "learning_rate": 1.8284574468085108e-05, "loss": 1.062, "step": 2863 }, { "epoch": 0.6348660967871541, "grad_norm": 0.6015625, "learning_rate": 1.827349290780142e-05, "loss": 1.1784, "step": 2864 }, { "epoch": 0.6350877679103341, "grad_norm": 0.53515625, "learning_rate": 1.8262411347517733e-05, "loss": 1.0917, "step": 2865 }, { "epoch": 0.6353094390335139, "grad_norm": 0.52734375, "learning_rate": 1.8251329787234043e-05, "loss": 1.1033, "step": 2866 }, { "epoch": 0.6355311101566937, "grad_norm": 0.546875, "learning_rate": 1.8240248226950355e-05, "loss": 1.0926, "step": 2867 }, { "epoch": 0.6357527812798737, "grad_norm": 0.55078125, "learning_rate": 1.8229166666666668e-05, "loss": 1.049, "step": 2868 }, { "epoch": 0.6359744524030535, "grad_norm": 0.55859375, "learning_rate": 1.8218085106382978e-05, "loss": 1.0492, "step": 2869 }, { "epoch": 0.6361961235262333, "grad_norm": 0.5703125, "learning_rate": 1.820700354609929e-05, "loss": 1.1187, "step": 2870 }, { "epoch": 0.6364177946494133, "grad_norm": 0.5703125, "learning_rate": 1.8195921985815603e-05, "loss": 1.1427, "step": 2871 }, { "epoch": 0.6366394657725931, "grad_norm": 0.546875, "learning_rate": 1.8184840425531916e-05, "loss": 1.0449, "step": 2872 }, { "epoch": 0.6368611368957731, "grad_norm": 0.53515625, "learning_rate": 1.817375886524823e-05, "loss": 1.1198, "step": 2873 }, { "epoch": 0.6370828080189529, "grad_norm": 0.53125, "learning_rate": 1.816267730496454e-05, "loss": 1.102, "step": 2874 }, { "epoch": 0.6373044791421327, "grad_norm": 0.53125, "learning_rate": 1.815159574468085e-05, "loss": 1.0491, "step": 2875 }, { "epoch": 0.6375261502653127, "grad_norm": 0.54296875, "learning_rate": 1.8140514184397164e-05, "loss": 1.0366, "step": 2876 }, { "epoch": 0.6377478213884925, "grad_norm": 0.5546875, "learning_rate": 1.8129432624113476e-05, "loss": 1.1637, "step": 2877 }, { "epoch": 0.6379694925116723, "grad_norm": 0.52734375, "learning_rate": 1.811835106382979e-05, "loss": 1.0143, "step": 2878 }, { "epoch": 0.6381911636348523, "grad_norm": 0.5234375, "learning_rate": 1.8107269503546102e-05, "loss": 1.0431, "step": 2879 }, { "epoch": 0.6384128347580321, "grad_norm": 0.5390625, "learning_rate": 1.8096187943262415e-05, "loss": 1.0406, "step": 2880 }, { "epoch": 0.638634505881212, "grad_norm": 0.58203125, "learning_rate": 1.8085106382978724e-05, "loss": 1.152, "step": 2881 }, { "epoch": 0.6388561770043919, "grad_norm": 0.5234375, "learning_rate": 1.8074024822695037e-05, "loss": 1.0487, "step": 2882 }, { "epoch": 0.6390778481275717, "grad_norm": 0.54296875, "learning_rate": 1.8062943262411346e-05, "loss": 1.1286, "step": 2883 }, { "epoch": 0.6392995192507516, "grad_norm": 0.55078125, "learning_rate": 1.805186170212766e-05, "loss": 1.0559, "step": 2884 }, { "epoch": 0.6395211903739315, "grad_norm": 0.5234375, "learning_rate": 1.8040780141843972e-05, "loss": 1.0309, "step": 2885 }, { "epoch": 0.6397428614971113, "grad_norm": 0.51953125, "learning_rate": 1.8029698581560285e-05, "loss": 1.0789, "step": 2886 }, { "epoch": 0.6399645326202912, "grad_norm": 0.54296875, "learning_rate": 1.8018617021276597e-05, "loss": 1.0439, "step": 2887 }, { "epoch": 0.6401862037434711, "grad_norm": 0.5, "learning_rate": 1.8007535460992907e-05, "loss": 1.0489, "step": 2888 }, { "epoch": 0.640407874866651, "grad_norm": 0.52734375, "learning_rate": 1.799645390070922e-05, "loss": 1.0211, "step": 2889 }, { "epoch": 0.6406295459898308, "grad_norm": 0.54296875, "learning_rate": 1.7985372340425532e-05, "loss": 1.0404, "step": 2890 }, { "epoch": 0.6408512171130107, "grad_norm": 0.53125, "learning_rate": 1.7974290780141845e-05, "loss": 1.0153, "step": 2891 }, { "epoch": 0.6410728882361906, "grad_norm": 0.5390625, "learning_rate": 1.7963209219858158e-05, "loss": 1.149, "step": 2892 }, { "epoch": 0.6412945593593704, "grad_norm": 0.53125, "learning_rate": 1.795212765957447e-05, "loss": 1.0166, "step": 2893 }, { "epoch": 0.6415162304825504, "grad_norm": 0.53125, "learning_rate": 1.794104609929078e-05, "loss": 1.168, "step": 2894 }, { "epoch": 0.6417379016057302, "grad_norm": 0.53125, "learning_rate": 1.7929964539007093e-05, "loss": 1.0663, "step": 2895 }, { "epoch": 0.64195957272891, "grad_norm": 0.51953125, "learning_rate": 1.7918882978723405e-05, "loss": 1.0468, "step": 2896 }, { "epoch": 0.64218124385209, "grad_norm": 0.5546875, "learning_rate": 1.7907801418439718e-05, "loss": 1.0934, "step": 2897 }, { "epoch": 0.6424029149752698, "grad_norm": 0.54296875, "learning_rate": 1.789671985815603e-05, "loss": 0.9616, "step": 2898 }, { "epoch": 0.6426245860984496, "grad_norm": 0.5546875, "learning_rate": 1.7885638297872344e-05, "loss": 0.9883, "step": 2899 }, { "epoch": 0.6428462572216296, "grad_norm": 0.52734375, "learning_rate": 1.7874556737588653e-05, "loss": 1.0268, "step": 2900 }, { "epoch": 0.6430679283448094, "grad_norm": 0.51171875, "learning_rate": 1.7863475177304963e-05, "loss": 1.03, "step": 2901 }, { "epoch": 0.6432895994679894, "grad_norm": 0.5546875, "learning_rate": 1.7852393617021275e-05, "loss": 1.0328, "step": 2902 }, { "epoch": 0.6435112705911692, "grad_norm": 0.54296875, "learning_rate": 1.7841312056737588e-05, "loss": 1.1126, "step": 2903 }, { "epoch": 0.643732941714349, "grad_norm": 0.51171875, "learning_rate": 1.78302304964539e-05, "loss": 1.0104, "step": 2904 }, { "epoch": 0.643954612837529, "grad_norm": 0.55078125, "learning_rate": 1.7819148936170214e-05, "loss": 1.0255, "step": 2905 }, { "epoch": 0.6441762839607088, "grad_norm": 0.53515625, "learning_rate": 1.7808067375886526e-05, "loss": 1.1297, "step": 2906 }, { "epoch": 0.6443979550838886, "grad_norm": 0.498046875, "learning_rate": 1.7796985815602836e-05, "loss": 0.9699, "step": 2907 }, { "epoch": 0.6446196262070686, "grad_norm": 0.5625, "learning_rate": 1.778590425531915e-05, "loss": 1.158, "step": 2908 }, { "epoch": 0.6448412973302484, "grad_norm": 0.5390625, "learning_rate": 1.777482269503546e-05, "loss": 1.1015, "step": 2909 }, { "epoch": 0.6450629684534283, "grad_norm": 0.53125, "learning_rate": 1.7763741134751774e-05, "loss": 1.1614, "step": 2910 }, { "epoch": 0.6452846395766082, "grad_norm": 0.51953125, "learning_rate": 1.7752659574468087e-05, "loss": 1.0256, "step": 2911 }, { "epoch": 0.645506310699788, "grad_norm": 0.53515625, "learning_rate": 1.77415780141844e-05, "loss": 1.0949, "step": 2912 }, { "epoch": 0.6457279818229679, "grad_norm": 0.51171875, "learning_rate": 1.773049645390071e-05, "loss": 1.0164, "step": 2913 }, { "epoch": 0.6459496529461478, "grad_norm": 0.55859375, "learning_rate": 1.7719414893617022e-05, "loss": 1.1811, "step": 2914 }, { "epoch": 0.6461713240693276, "grad_norm": 0.53125, "learning_rate": 1.7708333333333335e-05, "loss": 1.0455, "step": 2915 }, { "epoch": 0.6463929951925075, "grad_norm": 0.546875, "learning_rate": 1.7697251773049647e-05, "loss": 0.9994, "step": 2916 }, { "epoch": 0.6466146663156874, "grad_norm": 0.5390625, "learning_rate": 1.768617021276596e-05, "loss": 1.0737, "step": 2917 }, { "epoch": 0.6468363374388673, "grad_norm": 0.5234375, "learning_rate": 1.767508865248227e-05, "loss": 1.0341, "step": 2918 }, { "epoch": 0.6470580085620471, "grad_norm": 0.52734375, "learning_rate": 1.7664007092198582e-05, "loss": 1.0839, "step": 2919 }, { "epoch": 0.647279679685227, "grad_norm": 0.52734375, "learning_rate": 1.7652925531914895e-05, "loss": 0.999, "step": 2920 }, { "epoch": 0.6475013508084069, "grad_norm": 0.53515625, "learning_rate": 1.7641843971631204e-05, "loss": 0.982, "step": 2921 }, { "epoch": 0.6477230219315867, "grad_norm": 0.54296875, "learning_rate": 1.7630762411347517e-05, "loss": 1.0832, "step": 2922 }, { "epoch": 0.6479446930547667, "grad_norm": 0.53515625, "learning_rate": 1.761968085106383e-05, "loss": 1.1123, "step": 2923 }, { "epoch": 0.6481663641779465, "grad_norm": 0.55859375, "learning_rate": 1.7608599290780143e-05, "loss": 1.0619, "step": 2924 }, { "epoch": 0.6483880353011263, "grad_norm": 0.52734375, "learning_rate": 1.7597517730496456e-05, "loss": 1.029, "step": 2925 }, { "epoch": 0.6486097064243063, "grad_norm": 0.54296875, "learning_rate": 1.758643617021277e-05, "loss": 1.0899, "step": 2926 }, { "epoch": 0.6488313775474861, "grad_norm": 0.56640625, "learning_rate": 1.7575354609929078e-05, "loss": 1.1331, "step": 2927 }, { "epoch": 0.6490530486706659, "grad_norm": 0.52734375, "learning_rate": 1.756427304964539e-05, "loss": 1.0601, "step": 2928 }, { "epoch": 0.6492747197938459, "grad_norm": 0.53125, "learning_rate": 1.7553191489361703e-05, "loss": 0.9767, "step": 2929 }, { "epoch": 0.6494963909170257, "grad_norm": 0.53125, "learning_rate": 1.7542109929078016e-05, "loss": 1.0425, "step": 2930 }, { "epoch": 0.6497180620402055, "grad_norm": 0.55078125, "learning_rate": 1.753102836879433e-05, "loss": 1.1375, "step": 2931 }, { "epoch": 0.6499397331633855, "grad_norm": 0.55859375, "learning_rate": 1.751994680851064e-05, "loss": 1.0997, "step": 2932 }, { "epoch": 0.6501614042865653, "grad_norm": 0.53515625, "learning_rate": 1.750886524822695e-05, "loss": 1.0405, "step": 2933 }, { "epoch": 0.6503830754097453, "grad_norm": 0.5234375, "learning_rate": 1.7497783687943264e-05, "loss": 1.0229, "step": 2934 }, { "epoch": 0.6506047465329251, "grad_norm": 0.546875, "learning_rate": 1.7486702127659577e-05, "loss": 1.0638, "step": 2935 }, { "epoch": 0.6508264176561049, "grad_norm": 0.56640625, "learning_rate": 1.7475620567375886e-05, "loss": 1.0075, "step": 2936 }, { "epoch": 0.6510480887792849, "grad_norm": 0.55859375, "learning_rate": 1.74645390070922e-05, "loss": 1.0946, "step": 2937 }, { "epoch": 0.6512697599024647, "grad_norm": 0.55859375, "learning_rate": 1.745345744680851e-05, "loss": 1.0247, "step": 2938 }, { "epoch": 0.6514914310256446, "grad_norm": 0.53515625, "learning_rate": 1.7442375886524824e-05, "loss": 1.0884, "step": 2939 }, { "epoch": 0.6517131021488245, "grad_norm": 0.54296875, "learning_rate": 1.7431294326241134e-05, "loss": 1.0722, "step": 2940 }, { "epoch": 0.6519347732720043, "grad_norm": 0.5546875, "learning_rate": 1.7420212765957446e-05, "loss": 1.0759, "step": 2941 }, { "epoch": 0.6521564443951842, "grad_norm": 0.546875, "learning_rate": 1.740913120567376e-05, "loss": 1.0558, "step": 2942 }, { "epoch": 0.6523781155183641, "grad_norm": 0.546875, "learning_rate": 1.7398049645390072e-05, "loss": 1.137, "step": 2943 }, { "epoch": 0.6525997866415439, "grad_norm": 0.515625, "learning_rate": 1.7386968085106385e-05, "loss": 1.0656, "step": 2944 }, { "epoch": 0.6528214577647238, "grad_norm": 0.53125, "learning_rate": 1.7375886524822697e-05, "loss": 1.0487, "step": 2945 }, { "epoch": 0.6530431288879037, "grad_norm": 0.546875, "learning_rate": 1.7364804964539007e-05, "loss": 1.0596, "step": 2946 }, { "epoch": 0.6532648000110836, "grad_norm": 0.53125, "learning_rate": 1.735372340425532e-05, "loss": 1.002, "step": 2947 }, { "epoch": 0.6534864711342634, "grad_norm": 0.5390625, "learning_rate": 1.7342641843971632e-05, "loss": 1.1404, "step": 2948 }, { "epoch": 0.6537081422574433, "grad_norm": 0.55078125, "learning_rate": 1.7331560283687945e-05, "loss": 1.0889, "step": 2949 }, { "epoch": 0.6539298133806232, "grad_norm": 0.53125, "learning_rate": 1.7320478723404258e-05, "loss": 1.0595, "step": 2950 }, { "epoch": 0.654151484503803, "grad_norm": 0.52734375, "learning_rate": 1.730939716312057e-05, "loss": 1.052, "step": 2951 }, { "epoch": 0.654373155626983, "grad_norm": 0.51171875, "learning_rate": 1.729831560283688e-05, "loss": 1.0179, "step": 2952 }, { "epoch": 0.6545948267501628, "grad_norm": 0.55078125, "learning_rate": 1.7287234042553193e-05, "loss": 1.1435, "step": 2953 }, { "epoch": 0.6548164978733426, "grad_norm": 0.5234375, "learning_rate": 1.7276152482269502e-05, "loss": 1.0882, "step": 2954 }, { "epoch": 0.6550381689965226, "grad_norm": 0.53515625, "learning_rate": 1.7265070921985815e-05, "loss": 1.062, "step": 2955 }, { "epoch": 0.6552598401197024, "grad_norm": 0.53125, "learning_rate": 1.7253989361702128e-05, "loss": 1.0851, "step": 2956 }, { "epoch": 0.6554815112428822, "grad_norm": 0.5859375, "learning_rate": 1.724290780141844e-05, "loss": 1.1138, "step": 2957 }, { "epoch": 0.6557031823660622, "grad_norm": 0.546875, "learning_rate": 1.7231826241134753e-05, "loss": 1.0394, "step": 2958 }, { "epoch": 0.655924853489242, "grad_norm": 0.53515625, "learning_rate": 1.7220744680851063e-05, "loss": 1.1348, "step": 2959 }, { "epoch": 0.6561465246124218, "grad_norm": 0.51953125, "learning_rate": 1.7209663120567375e-05, "loss": 1.0549, "step": 2960 }, { "epoch": 0.6563681957356018, "grad_norm": 0.53125, "learning_rate": 1.7198581560283688e-05, "loss": 1.021, "step": 2961 }, { "epoch": 0.6565898668587816, "grad_norm": 0.52734375, "learning_rate": 1.71875e-05, "loss": 1.0553, "step": 2962 }, { "epoch": 0.6568115379819615, "grad_norm": 0.546875, "learning_rate": 1.7176418439716314e-05, "loss": 1.0244, "step": 2963 }, { "epoch": 0.6570332091051414, "grad_norm": 0.5390625, "learning_rate": 1.7165336879432627e-05, "loss": 1.0261, "step": 2964 }, { "epoch": 0.6572548802283212, "grad_norm": 0.55078125, "learning_rate": 1.7154255319148936e-05, "loss": 1.0877, "step": 2965 }, { "epoch": 0.6574765513515012, "grad_norm": 0.546875, "learning_rate": 1.714317375886525e-05, "loss": 1.0712, "step": 2966 }, { "epoch": 0.657698222474681, "grad_norm": 0.498046875, "learning_rate": 1.713209219858156e-05, "loss": 0.97, "step": 2967 }, { "epoch": 0.6579198935978608, "grad_norm": 0.52734375, "learning_rate": 1.7121010638297874e-05, "loss": 1.0486, "step": 2968 }, { "epoch": 0.6581415647210408, "grad_norm": 0.55078125, "learning_rate": 1.7109929078014187e-05, "loss": 1.1387, "step": 2969 }, { "epoch": 0.6583632358442206, "grad_norm": 0.5234375, "learning_rate": 1.70988475177305e-05, "loss": 1.0318, "step": 2970 }, { "epoch": 0.6585849069674005, "grad_norm": 0.55078125, "learning_rate": 1.708776595744681e-05, "loss": 1.0561, "step": 2971 }, { "epoch": 0.6588065780905804, "grad_norm": 0.53125, "learning_rate": 1.707668439716312e-05, "loss": 1.0657, "step": 2972 }, { "epoch": 0.6590282492137602, "grad_norm": 0.55078125, "learning_rate": 1.706560283687943e-05, "loss": 1.1025, "step": 2973 }, { "epoch": 0.6592499203369401, "grad_norm": 0.55859375, "learning_rate": 1.7054521276595744e-05, "loss": 1.0386, "step": 2974 }, { "epoch": 0.65947159146012, "grad_norm": 0.52734375, "learning_rate": 1.7043439716312057e-05, "loss": 1.0776, "step": 2975 }, { "epoch": 0.6596932625832999, "grad_norm": 0.54296875, "learning_rate": 1.703235815602837e-05, "loss": 1.1168, "step": 2976 }, { "epoch": 0.6599149337064797, "grad_norm": 0.55859375, "learning_rate": 1.7021276595744682e-05, "loss": 1.0806, "step": 2977 }, { "epoch": 0.6601366048296596, "grad_norm": 0.55859375, "learning_rate": 1.7010195035460992e-05, "loss": 1.0456, "step": 2978 }, { "epoch": 0.6603582759528395, "grad_norm": 0.515625, "learning_rate": 1.6999113475177305e-05, "loss": 1.0801, "step": 2979 }, { "epoch": 0.6605799470760193, "grad_norm": 0.52734375, "learning_rate": 1.6988031914893617e-05, "loss": 1.0588, "step": 2980 }, { "epoch": 0.6608016181991992, "grad_norm": 0.51953125, "learning_rate": 1.697695035460993e-05, "loss": 1.0004, "step": 2981 }, { "epoch": 0.6610232893223791, "grad_norm": 0.515625, "learning_rate": 1.6965868794326243e-05, "loss": 1.0702, "step": 2982 }, { "epoch": 0.6612449604455589, "grad_norm": 0.546875, "learning_rate": 1.6954787234042556e-05, "loss": 1.0442, "step": 2983 }, { "epoch": 0.6614666315687389, "grad_norm": 0.546875, "learning_rate": 1.6943705673758865e-05, "loss": 1.0168, "step": 2984 }, { "epoch": 0.6616883026919187, "grad_norm": 0.53515625, "learning_rate": 1.6932624113475178e-05, "loss": 1.0931, "step": 2985 }, { "epoch": 0.6619099738150985, "grad_norm": 0.55078125, "learning_rate": 1.692154255319149e-05, "loss": 1.068, "step": 2986 }, { "epoch": 0.6621316449382785, "grad_norm": 0.54296875, "learning_rate": 1.6910460992907803e-05, "loss": 1.1408, "step": 2987 }, { "epoch": 0.6623533160614583, "grad_norm": 0.51953125, "learning_rate": 1.6899379432624116e-05, "loss": 0.9528, "step": 2988 }, { "epoch": 0.6625749871846381, "grad_norm": 0.55078125, "learning_rate": 1.6888297872340426e-05, "loss": 1.1238, "step": 2989 }, { "epoch": 0.6627966583078181, "grad_norm": 0.52734375, "learning_rate": 1.687721631205674e-05, "loss": 1.0534, "step": 2990 }, { "epoch": 0.6630183294309979, "grad_norm": 0.515625, "learning_rate": 1.686613475177305e-05, "loss": 1.0171, "step": 2991 }, { "epoch": 0.6632400005541778, "grad_norm": 0.5, "learning_rate": 1.685505319148936e-05, "loss": 0.9732, "step": 2992 }, { "epoch": 0.6634616716773577, "grad_norm": 0.55078125, "learning_rate": 1.6843971631205673e-05, "loss": 1.0895, "step": 2993 }, { "epoch": 0.6636833428005375, "grad_norm": 0.5234375, "learning_rate": 1.6832890070921986e-05, "loss": 1.0272, "step": 2994 }, { "epoch": 0.6639050139237175, "grad_norm": 0.51953125, "learning_rate": 1.68218085106383e-05, "loss": 1.0313, "step": 2995 }, { "epoch": 0.6641266850468973, "grad_norm": 0.546875, "learning_rate": 1.681072695035461e-05, "loss": 1.0776, "step": 2996 }, { "epoch": 0.6643483561700771, "grad_norm": 0.52734375, "learning_rate": 1.6799645390070924e-05, "loss": 1.0556, "step": 2997 }, { "epoch": 0.6645700272932571, "grad_norm": 0.55859375, "learning_rate": 1.6788563829787234e-05, "loss": 1.0297, "step": 2998 }, { "epoch": 0.6647916984164369, "grad_norm": 0.54296875, "learning_rate": 1.6777482269503547e-05, "loss": 1.0225, "step": 2999 }, { "epoch": 0.6650133695396168, "grad_norm": 0.52734375, "learning_rate": 1.676640070921986e-05, "loss": 1.1155, "step": 3000 }, { "epoch": 0.6652350406627967, "grad_norm": 0.53515625, "learning_rate": 1.6755319148936172e-05, "loss": 1.1139, "step": 3001 }, { "epoch": 0.6654567117859765, "grad_norm": 0.53515625, "learning_rate": 1.6744237588652485e-05, "loss": 1.0513, "step": 3002 }, { "epoch": 0.6656783829091564, "grad_norm": 0.55078125, "learning_rate": 1.6733156028368798e-05, "loss": 1.1035, "step": 3003 }, { "epoch": 0.6659000540323363, "grad_norm": 0.54296875, "learning_rate": 1.6722074468085107e-05, "loss": 1.1178, "step": 3004 }, { "epoch": 0.6661217251555162, "grad_norm": 0.54296875, "learning_rate": 1.671099290780142e-05, "loss": 1.0633, "step": 3005 }, { "epoch": 0.666343396278696, "grad_norm": 0.54296875, "learning_rate": 1.6699911347517733e-05, "loss": 1.0535, "step": 3006 }, { "epoch": 0.6665650674018759, "grad_norm": 0.55078125, "learning_rate": 1.6688829787234042e-05, "loss": 1.1156, "step": 3007 }, { "epoch": 0.6667867385250558, "grad_norm": 0.53515625, "learning_rate": 1.6677748226950355e-05, "loss": 1.0999, "step": 3008 }, { "epoch": 0.6670084096482356, "grad_norm": 0.55078125, "learning_rate": 1.6666666666666667e-05, "loss": 1.074, "step": 3009 }, { "epoch": 0.6672300807714155, "grad_norm": 0.53515625, "learning_rate": 1.665558510638298e-05, "loss": 0.9507, "step": 3010 }, { "epoch": 0.6674517518945954, "grad_norm": 0.52734375, "learning_rate": 1.664450354609929e-05, "loss": 1.1219, "step": 3011 }, { "epoch": 0.6676734230177752, "grad_norm": 0.5390625, "learning_rate": 1.6633421985815602e-05, "loss": 1.0515, "step": 3012 }, { "epoch": 0.6678950941409552, "grad_norm": 0.54296875, "learning_rate": 1.6622340425531915e-05, "loss": 1.0314, "step": 3013 }, { "epoch": 0.668116765264135, "grad_norm": 0.51171875, "learning_rate": 1.6611258865248228e-05, "loss": 0.9883, "step": 3014 }, { "epoch": 0.6683384363873148, "grad_norm": 0.546875, "learning_rate": 1.660017730496454e-05, "loss": 1.0291, "step": 3015 }, { "epoch": 0.6685601075104948, "grad_norm": 0.5390625, "learning_rate": 1.6589095744680853e-05, "loss": 1.0305, "step": 3016 }, { "epoch": 0.6687817786336746, "grad_norm": 0.51953125, "learning_rate": 1.6578014184397163e-05, "loss": 0.9722, "step": 3017 }, { "epoch": 0.6690034497568544, "grad_norm": 0.52734375, "learning_rate": 1.6566932624113476e-05, "loss": 1.0207, "step": 3018 }, { "epoch": 0.6692251208800344, "grad_norm": 0.56640625, "learning_rate": 1.655585106382979e-05, "loss": 1.0547, "step": 3019 }, { "epoch": 0.6694467920032142, "grad_norm": 0.5234375, "learning_rate": 1.65447695035461e-05, "loss": 1.0244, "step": 3020 }, { "epoch": 0.669668463126394, "grad_norm": 0.55078125, "learning_rate": 1.6533687943262414e-05, "loss": 1.0673, "step": 3021 }, { "epoch": 0.669890134249574, "grad_norm": 0.55078125, "learning_rate": 1.6522606382978727e-05, "loss": 1.092, "step": 3022 }, { "epoch": 0.6701118053727538, "grad_norm": 0.5234375, "learning_rate": 1.6511524822695036e-05, "loss": 1.0315, "step": 3023 }, { "epoch": 0.6703334764959337, "grad_norm": 0.54296875, "learning_rate": 1.6500443262411346e-05, "loss": 1.018, "step": 3024 }, { "epoch": 0.6705551476191136, "grad_norm": 0.52734375, "learning_rate": 1.6489361702127658e-05, "loss": 1.0384, "step": 3025 }, { "epoch": 0.6707768187422934, "grad_norm": 0.55078125, "learning_rate": 1.647828014184397e-05, "loss": 1.1072, "step": 3026 }, { "epoch": 0.6709984898654734, "grad_norm": 0.5546875, "learning_rate": 1.6467198581560284e-05, "loss": 1.061, "step": 3027 }, { "epoch": 0.6712201609886532, "grad_norm": 0.5546875, "learning_rate": 1.6456117021276597e-05, "loss": 0.9726, "step": 3028 }, { "epoch": 0.6714418321118331, "grad_norm": 0.5234375, "learning_rate": 1.644503546099291e-05, "loss": 1.1216, "step": 3029 }, { "epoch": 0.671663503235013, "grad_norm": 0.51953125, "learning_rate": 1.643395390070922e-05, "loss": 1.1099, "step": 3030 }, { "epoch": 0.6718851743581928, "grad_norm": 0.54296875, "learning_rate": 1.642287234042553e-05, "loss": 1.0405, "step": 3031 }, { "epoch": 0.6721068454813727, "grad_norm": 0.546875, "learning_rate": 1.6411790780141844e-05, "loss": 1.0389, "step": 3032 }, { "epoch": 0.6723285166045526, "grad_norm": 0.5390625, "learning_rate": 1.6400709219858157e-05, "loss": 1.0554, "step": 3033 }, { "epoch": 0.6725501877277325, "grad_norm": 0.5234375, "learning_rate": 1.638962765957447e-05, "loss": 1.0294, "step": 3034 }, { "epoch": 0.6727718588509123, "grad_norm": 0.546875, "learning_rate": 1.6378546099290783e-05, "loss": 1.0637, "step": 3035 }, { "epoch": 0.6729935299740922, "grad_norm": 0.52734375, "learning_rate": 1.6367464539007092e-05, "loss": 0.9783, "step": 3036 }, { "epoch": 0.6732152010972721, "grad_norm": 0.55078125, "learning_rate": 1.6356382978723405e-05, "loss": 1.0798, "step": 3037 }, { "epoch": 0.6734368722204519, "grad_norm": 0.52734375, "learning_rate": 1.6345301418439718e-05, "loss": 1.0996, "step": 3038 }, { "epoch": 0.6736585433436318, "grad_norm": 0.578125, "learning_rate": 1.633421985815603e-05, "loss": 1.0544, "step": 3039 }, { "epoch": 0.6738802144668117, "grad_norm": 0.546875, "learning_rate": 1.6323138297872343e-05, "loss": 1.0622, "step": 3040 }, { "epoch": 0.6741018855899915, "grad_norm": 0.5625, "learning_rate": 1.6312056737588656e-05, "loss": 1.1166, "step": 3041 }, { "epoch": 0.6743235567131715, "grad_norm": 0.546875, "learning_rate": 1.6300975177304965e-05, "loss": 1.0693, "step": 3042 }, { "epoch": 0.6745452278363513, "grad_norm": 0.5546875, "learning_rate": 1.6289893617021275e-05, "loss": 1.0398, "step": 3043 }, { "epoch": 0.6747668989595311, "grad_norm": 0.5234375, "learning_rate": 1.6278812056737587e-05, "loss": 1.0684, "step": 3044 }, { "epoch": 0.6749885700827111, "grad_norm": 0.546875, "learning_rate": 1.62677304964539e-05, "loss": 1.1431, "step": 3045 }, { "epoch": 0.6752102412058909, "grad_norm": 0.5390625, "learning_rate": 1.6256648936170213e-05, "loss": 1.0879, "step": 3046 }, { "epoch": 0.6754319123290707, "grad_norm": 0.5390625, "learning_rate": 1.6245567375886526e-05, "loss": 1.0984, "step": 3047 }, { "epoch": 0.6756535834522507, "grad_norm": 0.53515625, "learning_rate": 1.623448581560284e-05, "loss": 1.0935, "step": 3048 }, { "epoch": 0.6758752545754305, "grad_norm": 0.53515625, "learning_rate": 1.6223404255319148e-05, "loss": 1.0126, "step": 3049 }, { "epoch": 0.6760969256986104, "grad_norm": 0.5546875, "learning_rate": 1.621232269503546e-05, "loss": 1.1018, "step": 3050 }, { "epoch": 0.6763185968217903, "grad_norm": 0.578125, "learning_rate": 1.6201241134751773e-05, "loss": 1.0748, "step": 3051 }, { "epoch": 0.6765402679449701, "grad_norm": 0.56640625, "learning_rate": 1.6190159574468086e-05, "loss": 1.104, "step": 3052 }, { "epoch": 0.67676193906815, "grad_norm": 0.5390625, "learning_rate": 1.61790780141844e-05, "loss": 1.0345, "step": 3053 }, { "epoch": 0.6769836101913299, "grad_norm": 0.55078125, "learning_rate": 1.6167996453900712e-05, "loss": 1.0924, "step": 3054 }, { "epoch": 0.6772052813145097, "grad_norm": 0.50390625, "learning_rate": 1.6156914893617025e-05, "loss": 0.9499, "step": 3055 }, { "epoch": 0.6774269524376896, "grad_norm": 0.54296875, "learning_rate": 1.6145833333333334e-05, "loss": 1.0058, "step": 3056 }, { "epoch": 0.6776486235608695, "grad_norm": 0.5390625, "learning_rate": 1.6134751773049647e-05, "loss": 1.0964, "step": 3057 }, { "epoch": 0.6778702946840494, "grad_norm": 0.5546875, "learning_rate": 1.612367021276596e-05, "loss": 1.0099, "step": 3058 }, { "epoch": 0.6780919658072293, "grad_norm": 0.5390625, "learning_rate": 1.611258865248227e-05, "loss": 1.1312, "step": 3059 }, { "epoch": 0.6783136369304091, "grad_norm": 0.59765625, "learning_rate": 1.610150709219858e-05, "loss": 1.1048, "step": 3060 }, { "epoch": 0.678535308053589, "grad_norm": 0.5390625, "learning_rate": 1.6090425531914894e-05, "loss": 1.0279, "step": 3061 }, { "epoch": 0.6787569791767689, "grad_norm": 0.53515625, "learning_rate": 1.6079343971631207e-05, "loss": 1.0098, "step": 3062 }, { "epoch": 0.6789786502999487, "grad_norm": 0.52734375, "learning_rate": 1.6068262411347517e-05, "loss": 1.0483, "step": 3063 }, { "epoch": 0.6792003214231286, "grad_norm": 0.55078125, "learning_rate": 1.605718085106383e-05, "loss": 1.0047, "step": 3064 }, { "epoch": 0.6794219925463085, "grad_norm": 0.5078125, "learning_rate": 1.6046099290780142e-05, "loss": 1.0343, "step": 3065 }, { "epoch": 0.6796436636694884, "grad_norm": 0.55859375, "learning_rate": 1.6035017730496455e-05, "loss": 1.0844, "step": 3066 }, { "epoch": 0.6798653347926682, "grad_norm": 0.54296875, "learning_rate": 1.6023936170212768e-05, "loss": 1.041, "step": 3067 }, { "epoch": 0.6800870059158481, "grad_norm": 0.546875, "learning_rate": 1.601285460992908e-05, "loss": 1.0819, "step": 3068 }, { "epoch": 0.680308677039028, "grad_norm": 0.5390625, "learning_rate": 1.600177304964539e-05, "loss": 1.0992, "step": 3069 }, { "epoch": 0.6805303481622078, "grad_norm": 0.54296875, "learning_rate": 1.5990691489361703e-05, "loss": 1.1017, "step": 3070 }, { "epoch": 0.6807520192853878, "grad_norm": 0.55859375, "learning_rate": 1.5979609929078015e-05, "loss": 1.0707, "step": 3071 }, { "epoch": 0.6809736904085676, "grad_norm": 0.54296875, "learning_rate": 1.5968528368794328e-05, "loss": 1.0407, "step": 3072 }, { "epoch": 0.6811953615317474, "grad_norm": 0.54296875, "learning_rate": 1.595744680851064e-05, "loss": 1.0709, "step": 3073 }, { "epoch": 0.6814170326549274, "grad_norm": 0.5234375, "learning_rate": 1.5946365248226954e-05, "loss": 0.9937, "step": 3074 }, { "epoch": 0.6816387037781072, "grad_norm": 0.55859375, "learning_rate": 1.5935283687943263e-05, "loss": 1.0319, "step": 3075 }, { "epoch": 0.681860374901287, "grad_norm": 0.5234375, "learning_rate": 1.5924202127659576e-05, "loss": 1.0515, "step": 3076 }, { "epoch": 0.682082046024467, "grad_norm": 0.5390625, "learning_rate": 1.5913120567375885e-05, "loss": 1.0201, "step": 3077 }, { "epoch": 0.6823037171476468, "grad_norm": 0.51953125, "learning_rate": 1.5902039007092198e-05, "loss": 1.0503, "step": 3078 }, { "epoch": 0.6825253882708266, "grad_norm": 0.5234375, "learning_rate": 1.589095744680851e-05, "loss": 1.0363, "step": 3079 }, { "epoch": 0.6827470593940066, "grad_norm": 0.498046875, "learning_rate": 1.5879875886524823e-05, "loss": 0.9803, "step": 3080 }, { "epoch": 0.6829687305171864, "grad_norm": 0.54296875, "learning_rate": 1.5868794326241136e-05, "loss": 1.064, "step": 3081 }, { "epoch": 0.6831904016403663, "grad_norm": 0.5390625, "learning_rate": 1.5857712765957446e-05, "loss": 1.0234, "step": 3082 }, { "epoch": 0.6834120727635462, "grad_norm": 0.55859375, "learning_rate": 1.584663120567376e-05, "loss": 1.1277, "step": 3083 }, { "epoch": 0.683633743886726, "grad_norm": 0.5546875, "learning_rate": 1.583554964539007e-05, "loss": 1.1579, "step": 3084 }, { "epoch": 0.6838554150099059, "grad_norm": 0.515625, "learning_rate": 1.5824468085106384e-05, "loss": 1.0643, "step": 3085 }, { "epoch": 0.6840770861330858, "grad_norm": 0.55078125, "learning_rate": 1.5813386524822697e-05, "loss": 1.1657, "step": 3086 }, { "epoch": 0.6842987572562657, "grad_norm": 0.578125, "learning_rate": 1.580230496453901e-05, "loss": 1.1738, "step": 3087 }, { "epoch": 0.6845204283794456, "grad_norm": 0.54296875, "learning_rate": 1.579122340425532e-05, "loss": 1.0754, "step": 3088 }, { "epoch": 0.6847420995026254, "grad_norm": 0.546875, "learning_rate": 1.578014184397163e-05, "loss": 1.1018, "step": 3089 }, { "epoch": 0.6849637706258053, "grad_norm": 0.55078125, "learning_rate": 1.5769060283687944e-05, "loss": 1.1033, "step": 3090 }, { "epoch": 0.6851854417489852, "grad_norm": 0.53125, "learning_rate": 1.5757978723404257e-05, "loss": 0.9843, "step": 3091 }, { "epoch": 0.685407112872165, "grad_norm": 0.53125, "learning_rate": 1.574689716312057e-05, "loss": 1.1463, "step": 3092 }, { "epoch": 0.6856287839953449, "grad_norm": 0.5625, "learning_rate": 1.5735815602836883e-05, "loss": 1.1287, "step": 3093 }, { "epoch": 0.6858504551185248, "grad_norm": 0.5703125, "learning_rate": 1.5724734042553192e-05, "loss": 1.0387, "step": 3094 }, { "epoch": 0.6860721262417047, "grad_norm": 0.53125, "learning_rate": 1.57136524822695e-05, "loss": 1.1457, "step": 3095 }, { "epoch": 0.6862937973648845, "grad_norm": 0.55859375, "learning_rate": 1.5702570921985814e-05, "loss": 0.979, "step": 3096 }, { "epoch": 0.6865154684880644, "grad_norm": 0.52734375, "learning_rate": 1.5691489361702127e-05, "loss": 1.0438, "step": 3097 }, { "epoch": 0.6867371396112443, "grad_norm": 0.54296875, "learning_rate": 1.568040780141844e-05, "loss": 1.076, "step": 3098 }, { "epoch": 0.6869588107344241, "grad_norm": 0.51953125, "learning_rate": 1.5669326241134753e-05, "loss": 1.1202, "step": 3099 }, { "epoch": 0.687180481857604, "grad_norm": 0.546875, "learning_rate": 1.5658244680851065e-05, "loss": 1.0486, "step": 3100 }, { "epoch": 0.6874021529807839, "grad_norm": 0.53125, "learning_rate": 1.5647163120567375e-05, "loss": 1.0824, "step": 3101 }, { "epoch": 0.6876238241039637, "grad_norm": 0.54296875, "learning_rate": 1.5636081560283688e-05, "loss": 1.1201, "step": 3102 }, { "epoch": 0.6878454952271437, "grad_norm": 0.546875, "learning_rate": 1.5625e-05, "loss": 1.0713, "step": 3103 }, { "epoch": 0.6880671663503235, "grad_norm": 0.52734375, "learning_rate": 1.5613918439716313e-05, "loss": 1.0791, "step": 3104 }, { "epoch": 0.6882888374735033, "grad_norm": 0.5390625, "learning_rate": 1.5602836879432626e-05, "loss": 1.1041, "step": 3105 }, { "epoch": 0.6885105085966833, "grad_norm": 0.5390625, "learning_rate": 1.559175531914894e-05, "loss": 1.0925, "step": 3106 }, { "epoch": 0.6887321797198631, "grad_norm": 0.5234375, "learning_rate": 1.5580673758865248e-05, "loss": 1.0969, "step": 3107 }, { "epoch": 0.688953850843043, "grad_norm": 0.53125, "learning_rate": 1.556959219858156e-05, "loss": 1.0405, "step": 3108 }, { "epoch": 0.6891755219662229, "grad_norm": 0.54296875, "learning_rate": 1.5558510638297874e-05, "loss": 1.0595, "step": 3109 }, { "epoch": 0.6893971930894027, "grad_norm": 0.546875, "learning_rate": 1.5547429078014186e-05, "loss": 1.0337, "step": 3110 }, { "epoch": 0.6896188642125826, "grad_norm": 0.6171875, "learning_rate": 1.55363475177305e-05, "loss": 1.0235, "step": 3111 }, { "epoch": 0.6898405353357625, "grad_norm": 0.5625, "learning_rate": 1.552526595744681e-05, "loss": 1.0753, "step": 3112 }, { "epoch": 0.6900622064589423, "grad_norm": 0.515625, "learning_rate": 1.551418439716312e-05, "loss": 1.0278, "step": 3113 }, { "epoch": 0.6902838775821222, "grad_norm": 0.55859375, "learning_rate": 1.550310283687943e-05, "loss": 1.1233, "step": 3114 }, { "epoch": 0.6905055487053021, "grad_norm": 0.5234375, "learning_rate": 1.5492021276595743e-05, "loss": 1.0451, "step": 3115 }, { "epoch": 0.690727219828482, "grad_norm": 0.57421875, "learning_rate": 1.5480939716312056e-05, "loss": 1.0905, "step": 3116 }, { "epoch": 0.6909488909516618, "grad_norm": 0.55859375, "learning_rate": 1.546985815602837e-05, "loss": 1.0696, "step": 3117 }, { "epoch": 0.6911705620748417, "grad_norm": 0.53125, "learning_rate": 1.5458776595744682e-05, "loss": 1.0716, "step": 3118 }, { "epoch": 0.6913922331980216, "grad_norm": 0.54296875, "learning_rate": 1.5447695035460995e-05, "loss": 1.0443, "step": 3119 }, { "epoch": 0.6916139043212015, "grad_norm": 0.546875, "learning_rate": 1.5436613475177307e-05, "loss": 1.1036, "step": 3120 }, { "epoch": 0.6918355754443813, "grad_norm": 0.52734375, "learning_rate": 1.5425531914893617e-05, "loss": 1.0197, "step": 3121 }, { "epoch": 0.6920572465675612, "grad_norm": 0.54296875, "learning_rate": 1.541445035460993e-05, "loss": 1.0462, "step": 3122 }, { "epoch": 0.6922789176907411, "grad_norm": 0.55078125, "learning_rate": 1.5403368794326242e-05, "loss": 1.1009, "step": 3123 }, { "epoch": 0.692500588813921, "grad_norm": 0.5390625, "learning_rate": 1.5392287234042555e-05, "loss": 1.1294, "step": 3124 }, { "epoch": 0.6927222599371008, "grad_norm": 0.52734375, "learning_rate": 1.5381205673758868e-05, "loss": 1.0184, "step": 3125 }, { "epoch": 0.6929439310602807, "grad_norm": 0.55078125, "learning_rate": 1.537012411347518e-05, "loss": 1.0624, "step": 3126 }, { "epoch": 0.6931656021834606, "grad_norm": 0.546875, "learning_rate": 1.535904255319149e-05, "loss": 1.0382, "step": 3127 }, { "epoch": 0.6933872733066404, "grad_norm": 0.59375, "learning_rate": 1.5347960992907803e-05, "loss": 1.0551, "step": 3128 }, { "epoch": 0.6936089444298204, "grad_norm": 0.54296875, "learning_rate": 1.5336879432624115e-05, "loss": 1.0013, "step": 3129 }, { "epoch": 0.6938306155530002, "grad_norm": 0.5, "learning_rate": 1.5325797872340425e-05, "loss": 1.024, "step": 3130 }, { "epoch": 0.69405228667618, "grad_norm": 0.5234375, "learning_rate": 1.5314716312056738e-05, "loss": 1.0626, "step": 3131 }, { "epoch": 0.69427395779936, "grad_norm": 0.5390625, "learning_rate": 1.530363475177305e-05, "loss": 1.0274, "step": 3132 }, { "epoch": 0.6944956289225398, "grad_norm": 0.53515625, "learning_rate": 1.5292553191489363e-05, "loss": 1.1479, "step": 3133 }, { "epoch": 0.6947173000457196, "grad_norm": 0.58203125, "learning_rate": 1.5281471631205673e-05, "loss": 1.1099, "step": 3134 }, { "epoch": 0.6949389711688996, "grad_norm": 0.53515625, "learning_rate": 1.5270390070921985e-05, "loss": 0.9859, "step": 3135 }, { "epoch": 0.6951606422920794, "grad_norm": 0.515625, "learning_rate": 1.5259308510638298e-05, "loss": 1.0201, "step": 3136 }, { "epoch": 0.6953823134152592, "grad_norm": 0.53125, "learning_rate": 1.5248226950354611e-05, "loss": 1.0238, "step": 3137 }, { "epoch": 0.6956039845384392, "grad_norm": 0.55078125, "learning_rate": 1.5237145390070922e-05, "loss": 1.1147, "step": 3138 }, { "epoch": 0.695825655661619, "grad_norm": 0.51953125, "learning_rate": 1.5226063829787235e-05, "loss": 0.985, "step": 3139 }, { "epoch": 0.6960473267847989, "grad_norm": 0.5078125, "learning_rate": 1.5214982269503547e-05, "loss": 0.9952, "step": 3140 }, { "epoch": 0.6962689979079788, "grad_norm": 0.51171875, "learning_rate": 1.5203900709219859e-05, "loss": 1.0024, "step": 3141 }, { "epoch": 0.6964906690311586, "grad_norm": 0.5234375, "learning_rate": 1.5192819148936171e-05, "loss": 1.0118, "step": 3142 }, { "epoch": 0.6967123401543385, "grad_norm": 0.5546875, "learning_rate": 1.5181737588652484e-05, "loss": 1.0664, "step": 3143 }, { "epoch": 0.6969340112775184, "grad_norm": 0.53125, "learning_rate": 1.5170656028368795e-05, "loss": 1.0934, "step": 3144 }, { "epoch": 0.6971556824006983, "grad_norm": 0.55859375, "learning_rate": 1.5159574468085108e-05, "loss": 1.0815, "step": 3145 }, { "epoch": 0.6973773535238781, "grad_norm": 0.5859375, "learning_rate": 1.514849290780142e-05, "loss": 1.1221, "step": 3146 }, { "epoch": 0.697599024647058, "grad_norm": 0.54296875, "learning_rate": 1.5137411347517732e-05, "loss": 1.0476, "step": 3147 }, { "epoch": 0.6978206957702379, "grad_norm": 0.53125, "learning_rate": 1.5126329787234041e-05, "loss": 1.1338, "step": 3148 }, { "epoch": 0.6980423668934177, "grad_norm": 0.5625, "learning_rate": 1.5115248226950354e-05, "loss": 1.0605, "step": 3149 }, { "epoch": 0.6982640380165976, "grad_norm": 0.5546875, "learning_rate": 1.5104166666666667e-05, "loss": 1.0292, "step": 3150 }, { "epoch": 0.6984857091397775, "grad_norm": 0.55859375, "learning_rate": 1.5093085106382978e-05, "loss": 1.1477, "step": 3151 }, { "epoch": 0.6987073802629574, "grad_norm": 0.5703125, "learning_rate": 1.508200354609929e-05, "loss": 1.109, "step": 3152 }, { "epoch": 0.6989290513861373, "grad_norm": 0.53515625, "learning_rate": 1.5070921985815603e-05, "loss": 1.0093, "step": 3153 }, { "epoch": 0.6991507225093171, "grad_norm": 0.52734375, "learning_rate": 1.5059840425531916e-05, "loss": 1.0908, "step": 3154 }, { "epoch": 0.699372393632497, "grad_norm": 0.53515625, "learning_rate": 1.5048758865248227e-05, "loss": 1.0217, "step": 3155 }, { "epoch": 0.6995940647556769, "grad_norm": 0.54296875, "learning_rate": 1.503767730496454e-05, "loss": 1.0938, "step": 3156 }, { "epoch": 0.6998157358788567, "grad_norm": 0.5546875, "learning_rate": 1.5026595744680853e-05, "loss": 1.1035, "step": 3157 }, { "epoch": 0.7000374070020366, "grad_norm": 0.53125, "learning_rate": 1.5015514184397164e-05, "loss": 1.1295, "step": 3158 }, { "epoch": 0.7002590781252165, "grad_norm": 0.625, "learning_rate": 1.5004432624113477e-05, "loss": 1.0647, "step": 3159 }, { "epoch": 0.7004807492483963, "grad_norm": 0.55078125, "learning_rate": 1.499335106382979e-05, "loss": 1.0721, "step": 3160 }, { "epoch": 0.7007024203715763, "grad_norm": 0.52734375, "learning_rate": 1.49822695035461e-05, "loss": 1.1273, "step": 3161 }, { "epoch": 0.7009240914947561, "grad_norm": 0.5078125, "learning_rate": 1.4971187943262413e-05, "loss": 1.0382, "step": 3162 }, { "epoch": 0.7011457626179359, "grad_norm": 0.5703125, "learning_rate": 1.4960106382978726e-05, "loss": 1.1047, "step": 3163 }, { "epoch": 0.7013674337411159, "grad_norm": 0.53515625, "learning_rate": 1.4949024822695037e-05, "loss": 1.0751, "step": 3164 }, { "epoch": 0.7015891048642957, "grad_norm": 0.51953125, "learning_rate": 1.4937943262411346e-05, "loss": 1.0315, "step": 3165 }, { "epoch": 0.7018107759874755, "grad_norm": 0.53515625, "learning_rate": 1.492686170212766e-05, "loss": 1.0582, "step": 3166 }, { "epoch": 0.7020324471106555, "grad_norm": 0.56640625, "learning_rate": 1.4915780141843972e-05, "loss": 0.9657, "step": 3167 }, { "epoch": 0.7022541182338353, "grad_norm": 0.53125, "learning_rate": 1.4904698581560283e-05, "loss": 1.0686, "step": 3168 }, { "epoch": 0.7024757893570152, "grad_norm": 0.5390625, "learning_rate": 1.4893617021276596e-05, "loss": 1.0856, "step": 3169 }, { "epoch": 0.7026974604801951, "grad_norm": 0.578125, "learning_rate": 1.4882535460992909e-05, "loss": 1.0348, "step": 3170 }, { "epoch": 0.7029191316033749, "grad_norm": 0.5390625, "learning_rate": 1.487145390070922e-05, "loss": 1.106, "step": 3171 }, { "epoch": 0.7031408027265548, "grad_norm": 0.55859375, "learning_rate": 1.4860372340425533e-05, "loss": 1.0437, "step": 3172 }, { "epoch": 0.7033624738497347, "grad_norm": 0.55078125, "learning_rate": 1.4849290780141845e-05, "loss": 1.0714, "step": 3173 }, { "epoch": 0.7035841449729145, "grad_norm": 0.51953125, "learning_rate": 1.4838209219858156e-05, "loss": 1.0043, "step": 3174 }, { "epoch": 0.7038058160960944, "grad_norm": 0.5390625, "learning_rate": 1.4827127659574469e-05, "loss": 1.15, "step": 3175 }, { "epoch": 0.7040274872192743, "grad_norm": 0.546875, "learning_rate": 1.4816046099290782e-05, "loss": 1.0112, "step": 3176 }, { "epoch": 0.7042491583424542, "grad_norm": 0.51953125, "learning_rate": 1.4804964539007093e-05, "loss": 1.0412, "step": 3177 }, { "epoch": 0.704470829465634, "grad_norm": 0.5234375, "learning_rate": 1.4793882978723406e-05, "loss": 1.0155, "step": 3178 }, { "epoch": 0.7046925005888139, "grad_norm": 0.51953125, "learning_rate": 1.4782801418439719e-05, "loss": 0.9839, "step": 3179 }, { "epoch": 0.7049141717119938, "grad_norm": 0.5390625, "learning_rate": 1.477171985815603e-05, "loss": 0.9864, "step": 3180 }, { "epoch": 0.7051358428351737, "grad_norm": 0.55859375, "learning_rate": 1.4760638297872342e-05, "loss": 1.0983, "step": 3181 }, { "epoch": 0.7053575139583536, "grad_norm": 0.52734375, "learning_rate": 1.4749556737588655e-05, "loss": 1.0097, "step": 3182 }, { "epoch": 0.7055791850815334, "grad_norm": 0.546875, "learning_rate": 1.4738475177304965e-05, "loss": 1.0447, "step": 3183 }, { "epoch": 0.7058008562047133, "grad_norm": 0.5390625, "learning_rate": 1.4727393617021276e-05, "loss": 1.0388, "step": 3184 }, { "epoch": 0.7060225273278932, "grad_norm": 0.5390625, "learning_rate": 1.4716312056737588e-05, "loss": 1.0951, "step": 3185 }, { "epoch": 0.706244198451073, "grad_norm": 0.5625, "learning_rate": 1.4705230496453901e-05, "loss": 1.1451, "step": 3186 }, { "epoch": 0.706465869574253, "grad_norm": 0.5703125, "learning_rate": 1.4694148936170212e-05, "loss": 1.083, "step": 3187 }, { "epoch": 0.7066875406974328, "grad_norm": 0.5234375, "learning_rate": 1.4683067375886525e-05, "loss": 1.0721, "step": 3188 }, { "epoch": 0.7069092118206126, "grad_norm": 0.55078125, "learning_rate": 1.4671985815602838e-05, "loss": 1.0734, "step": 3189 }, { "epoch": 0.7071308829437926, "grad_norm": 0.546875, "learning_rate": 1.4660904255319149e-05, "loss": 1.0761, "step": 3190 }, { "epoch": 0.7073525540669724, "grad_norm": 0.53515625, "learning_rate": 1.4649822695035462e-05, "loss": 1.0864, "step": 3191 }, { "epoch": 0.7075742251901522, "grad_norm": 0.56640625, "learning_rate": 1.4638741134751774e-05, "loss": 1.0944, "step": 3192 }, { "epoch": 0.7077958963133322, "grad_norm": 0.55078125, "learning_rate": 1.4627659574468085e-05, "loss": 1.0967, "step": 3193 }, { "epoch": 0.708017567436512, "grad_norm": 0.55078125, "learning_rate": 1.4616578014184398e-05, "loss": 1.0549, "step": 3194 }, { "epoch": 0.7082392385596918, "grad_norm": 0.54296875, "learning_rate": 1.4605496453900711e-05, "loss": 1.0966, "step": 3195 }, { "epoch": 0.7084609096828718, "grad_norm": 0.52734375, "learning_rate": 1.4594414893617022e-05, "loss": 1.0719, "step": 3196 }, { "epoch": 0.7086825808060516, "grad_norm": 0.5625, "learning_rate": 1.4583333333333335e-05, "loss": 1.0477, "step": 3197 }, { "epoch": 0.7089042519292315, "grad_norm": 0.5625, "learning_rate": 1.4572251773049648e-05, "loss": 1.1308, "step": 3198 }, { "epoch": 0.7091259230524114, "grad_norm": 0.5546875, "learning_rate": 1.4561170212765959e-05, "loss": 1.1437, "step": 3199 }, { "epoch": 0.7093475941755912, "grad_norm": 0.55078125, "learning_rate": 1.4550088652482268e-05, "loss": 1.0861, "step": 3200 }, { "epoch": 0.7095692652987711, "grad_norm": 0.53515625, "learning_rate": 1.4539007092198581e-05, "loss": 1.1071, "step": 3201 }, { "epoch": 0.709790936421951, "grad_norm": 0.546875, "learning_rate": 1.4527925531914894e-05, "loss": 1.0645, "step": 3202 }, { "epoch": 0.7100126075451308, "grad_norm": 0.54296875, "learning_rate": 1.4516843971631205e-05, "loss": 1.0434, "step": 3203 }, { "epoch": 0.7102342786683107, "grad_norm": 0.5625, "learning_rate": 1.4505762411347518e-05, "loss": 1.1475, "step": 3204 }, { "epoch": 0.7104559497914906, "grad_norm": 0.68359375, "learning_rate": 1.449468085106383e-05, "loss": 1.0598, "step": 3205 }, { "epoch": 0.7106776209146705, "grad_norm": 0.54296875, "learning_rate": 1.4483599290780141e-05, "loss": 1.1064, "step": 3206 }, { "epoch": 0.7108992920378503, "grad_norm": 0.5390625, "learning_rate": 1.4472517730496454e-05, "loss": 1.1153, "step": 3207 }, { "epoch": 0.7111209631610302, "grad_norm": 0.5390625, "learning_rate": 1.4461436170212767e-05, "loss": 1.0384, "step": 3208 }, { "epoch": 0.7113426342842101, "grad_norm": 0.546875, "learning_rate": 1.4450354609929078e-05, "loss": 1.0709, "step": 3209 }, { "epoch": 0.7115643054073899, "grad_norm": 0.53125, "learning_rate": 1.443927304964539e-05, "loss": 1.0218, "step": 3210 }, { "epoch": 0.7117859765305699, "grad_norm": 0.53125, "learning_rate": 1.4428191489361704e-05, "loss": 1.0765, "step": 3211 }, { "epoch": 0.7120076476537497, "grad_norm": 0.515625, "learning_rate": 1.4417109929078015e-05, "loss": 1.1358, "step": 3212 }, { "epoch": 0.7122293187769296, "grad_norm": 0.5390625, "learning_rate": 1.4406028368794327e-05, "loss": 1.0722, "step": 3213 }, { "epoch": 0.7124509899001095, "grad_norm": 0.5625, "learning_rate": 1.439494680851064e-05, "loss": 1.1461, "step": 3214 }, { "epoch": 0.7126726610232893, "grad_norm": 0.55078125, "learning_rate": 1.4383865248226951e-05, "loss": 1.1373, "step": 3215 }, { "epoch": 0.7128943321464692, "grad_norm": 0.53125, "learning_rate": 1.4372783687943264e-05, "loss": 1.1153, "step": 3216 }, { "epoch": 0.7131160032696491, "grad_norm": 0.5546875, "learning_rate": 1.4361702127659577e-05, "loss": 1.1056, "step": 3217 }, { "epoch": 0.7133376743928289, "grad_norm": 0.52734375, "learning_rate": 1.4350620567375886e-05, "loss": 1.0355, "step": 3218 }, { "epoch": 0.7135593455160089, "grad_norm": 0.54296875, "learning_rate": 1.4339539007092199e-05, "loss": 1.112, "step": 3219 }, { "epoch": 0.7137810166391887, "grad_norm": 0.59375, "learning_rate": 1.432845744680851e-05, "loss": 1.0796, "step": 3220 }, { "epoch": 0.7140026877623685, "grad_norm": 0.51953125, "learning_rate": 1.4317375886524823e-05, "loss": 1.0387, "step": 3221 }, { "epoch": 0.7142243588855485, "grad_norm": 0.5546875, "learning_rate": 1.4306294326241136e-05, "loss": 1.0065, "step": 3222 }, { "epoch": 0.7144460300087283, "grad_norm": 0.53125, "learning_rate": 1.4295212765957447e-05, "loss": 1.0336, "step": 3223 }, { "epoch": 0.7146677011319081, "grad_norm": 0.51953125, "learning_rate": 1.428413120567376e-05, "loss": 1.1116, "step": 3224 }, { "epoch": 0.7148893722550881, "grad_norm": 0.5078125, "learning_rate": 1.4273049645390072e-05, "loss": 1.0096, "step": 3225 }, { "epoch": 0.7151110433782679, "grad_norm": 0.55078125, "learning_rate": 1.4261968085106383e-05, "loss": 1.0467, "step": 3226 }, { "epoch": 0.7153327145014478, "grad_norm": 0.51953125, "learning_rate": 1.4250886524822696e-05, "loss": 0.9986, "step": 3227 }, { "epoch": 0.7155543856246277, "grad_norm": 0.53515625, "learning_rate": 1.4239804964539009e-05, "loss": 1.0683, "step": 3228 }, { "epoch": 0.7157760567478075, "grad_norm": 0.52734375, "learning_rate": 1.422872340425532e-05, "loss": 1.0627, "step": 3229 }, { "epoch": 0.7159977278709874, "grad_norm": 0.5390625, "learning_rate": 1.4217641843971633e-05, "loss": 1.0285, "step": 3230 }, { "epoch": 0.7162193989941673, "grad_norm": 0.51953125, "learning_rate": 1.4206560283687945e-05, "loss": 1.0254, "step": 3231 }, { "epoch": 0.7164410701173471, "grad_norm": 0.5234375, "learning_rate": 1.4195478723404257e-05, "loss": 1.0621, "step": 3232 }, { "epoch": 0.716662741240527, "grad_norm": 0.55078125, "learning_rate": 1.418439716312057e-05, "loss": 1.1523, "step": 3233 }, { "epoch": 0.7168844123637069, "grad_norm": 0.52734375, "learning_rate": 1.4173315602836882e-05, "loss": 1.0827, "step": 3234 }, { "epoch": 0.7171060834868868, "grad_norm": 0.53515625, "learning_rate": 1.4162234042553193e-05, "loss": 1.0611, "step": 3235 }, { "epoch": 0.7173277546100666, "grad_norm": 0.52734375, "learning_rate": 1.4151152482269503e-05, "loss": 1.0701, "step": 3236 }, { "epoch": 0.7175494257332465, "grad_norm": 0.55859375, "learning_rate": 1.4140070921985815e-05, "loss": 1.0388, "step": 3237 }, { "epoch": 0.7177710968564264, "grad_norm": 0.55859375, "learning_rate": 1.4128989361702128e-05, "loss": 1.0853, "step": 3238 }, { "epoch": 0.7179927679796062, "grad_norm": 0.55078125, "learning_rate": 1.4117907801418439e-05, "loss": 1.1522, "step": 3239 }, { "epoch": 0.7182144391027862, "grad_norm": 0.5546875, "learning_rate": 1.4106826241134752e-05, "loss": 0.9982, "step": 3240 }, { "epoch": 0.718436110225966, "grad_norm": 0.5703125, "learning_rate": 1.4095744680851065e-05, "loss": 1.0629, "step": 3241 }, { "epoch": 0.7186577813491458, "grad_norm": 0.50390625, "learning_rate": 1.4084663120567376e-05, "loss": 1.0083, "step": 3242 }, { "epoch": 0.7188794524723258, "grad_norm": 0.55859375, "learning_rate": 1.4073581560283689e-05, "loss": 1.1235, "step": 3243 }, { "epoch": 0.7191011235955056, "grad_norm": 0.54296875, "learning_rate": 1.4062500000000001e-05, "loss": 1.0393, "step": 3244 }, { "epoch": 0.7193227947186855, "grad_norm": 0.5390625, "learning_rate": 1.4051418439716312e-05, "loss": 1.0951, "step": 3245 }, { "epoch": 0.7195444658418654, "grad_norm": 0.54296875, "learning_rate": 1.4040336879432625e-05, "loss": 1.046, "step": 3246 }, { "epoch": 0.7197661369650452, "grad_norm": 0.5390625, "learning_rate": 1.4029255319148938e-05, "loss": 1.1069, "step": 3247 }, { "epoch": 0.7199878080882252, "grad_norm": 0.51953125, "learning_rate": 1.4018173758865249e-05, "loss": 1.037, "step": 3248 }, { "epoch": 0.720209479211405, "grad_norm": 0.53125, "learning_rate": 1.4007092198581562e-05, "loss": 1.0303, "step": 3249 }, { "epoch": 0.7204311503345848, "grad_norm": 0.53515625, "learning_rate": 1.3996010638297875e-05, "loss": 1.091, "step": 3250 }, { "epoch": 0.7206528214577648, "grad_norm": 0.53515625, "learning_rate": 1.3984929078014186e-05, "loss": 1.1363, "step": 3251 }, { "epoch": 0.7208744925809446, "grad_norm": 0.54296875, "learning_rate": 1.3973847517730498e-05, "loss": 1.0559, "step": 3252 }, { "epoch": 0.7210961637041244, "grad_norm": 0.54296875, "learning_rate": 1.3962765957446808e-05, "loss": 1.0075, "step": 3253 }, { "epoch": 0.7213178348273044, "grad_norm": 0.546875, "learning_rate": 1.395168439716312e-05, "loss": 1.031, "step": 3254 }, { "epoch": 0.7215395059504842, "grad_norm": 0.5390625, "learning_rate": 1.3940602836879432e-05, "loss": 1.088, "step": 3255 }, { "epoch": 0.721761177073664, "grad_norm": 0.54296875, "learning_rate": 1.3929521276595744e-05, "loss": 1.0837, "step": 3256 }, { "epoch": 0.721982848196844, "grad_norm": 0.55859375, "learning_rate": 1.3918439716312057e-05, "loss": 1.1372, "step": 3257 }, { "epoch": 0.7222045193200238, "grad_norm": 0.53515625, "learning_rate": 1.3907358156028368e-05, "loss": 0.9899, "step": 3258 }, { "epoch": 0.7224261904432037, "grad_norm": 0.52734375, "learning_rate": 1.3896276595744681e-05, "loss": 1.0461, "step": 3259 }, { "epoch": 0.7226478615663836, "grad_norm": 0.53515625, "learning_rate": 1.3885195035460994e-05, "loss": 1.0524, "step": 3260 }, { "epoch": 0.7228695326895634, "grad_norm": 0.51953125, "learning_rate": 1.3874113475177305e-05, "loss": 1.0727, "step": 3261 }, { "epoch": 0.7230912038127433, "grad_norm": 0.5546875, "learning_rate": 1.3863031914893618e-05, "loss": 1.0881, "step": 3262 }, { "epoch": 0.7233128749359232, "grad_norm": 0.546875, "learning_rate": 1.385195035460993e-05, "loss": 1.0382, "step": 3263 }, { "epoch": 0.7235345460591031, "grad_norm": 0.52734375, "learning_rate": 1.3840868794326242e-05, "loss": 1.081, "step": 3264 }, { "epoch": 0.7237562171822829, "grad_norm": 0.50390625, "learning_rate": 1.3829787234042554e-05, "loss": 1.0048, "step": 3265 }, { "epoch": 0.7239778883054628, "grad_norm": 0.53125, "learning_rate": 1.3818705673758867e-05, "loss": 1.0255, "step": 3266 }, { "epoch": 0.7241995594286427, "grad_norm": 0.546875, "learning_rate": 1.3807624113475178e-05, "loss": 1.0934, "step": 3267 }, { "epoch": 0.7244212305518225, "grad_norm": 0.52734375, "learning_rate": 1.3796542553191491e-05, "loss": 0.9919, "step": 3268 }, { "epoch": 0.7246429016750024, "grad_norm": 0.54296875, "learning_rate": 1.3785460992907804e-05, "loss": 1.1344, "step": 3269 }, { "epoch": 0.7248645727981823, "grad_norm": 0.515625, "learning_rate": 1.3774379432624115e-05, "loss": 0.9856, "step": 3270 }, { "epoch": 0.7250862439213621, "grad_norm": 0.5390625, "learning_rate": 1.3763297872340424e-05, "loss": 1.0523, "step": 3271 }, { "epoch": 0.7253079150445421, "grad_norm": 0.5546875, "learning_rate": 1.3752216312056737e-05, "loss": 1.0443, "step": 3272 }, { "epoch": 0.7255295861677219, "grad_norm": 0.55859375, "learning_rate": 1.374113475177305e-05, "loss": 1.0388, "step": 3273 }, { "epoch": 0.7257512572909018, "grad_norm": 0.59375, "learning_rate": 1.373005319148936e-05, "loss": 1.1538, "step": 3274 }, { "epoch": 0.7259729284140817, "grad_norm": 0.546875, "learning_rate": 1.3718971631205674e-05, "loss": 1.0733, "step": 3275 }, { "epoch": 0.7261945995372615, "grad_norm": 0.5390625, "learning_rate": 1.3707890070921986e-05, "loss": 1.1022, "step": 3276 }, { "epoch": 0.7264162706604415, "grad_norm": 0.55078125, "learning_rate": 1.3696808510638297e-05, "loss": 1.0454, "step": 3277 }, { "epoch": 0.7266379417836213, "grad_norm": 0.55078125, "learning_rate": 1.368572695035461e-05, "loss": 1.0732, "step": 3278 }, { "epoch": 0.7268596129068011, "grad_norm": 0.5625, "learning_rate": 1.3674645390070923e-05, "loss": 1.1015, "step": 3279 }, { "epoch": 0.7270812840299811, "grad_norm": 0.52734375, "learning_rate": 1.3663563829787234e-05, "loss": 1.0891, "step": 3280 }, { "epoch": 0.7273029551531609, "grad_norm": 0.53125, "learning_rate": 1.3652482269503547e-05, "loss": 1.0757, "step": 3281 }, { "epoch": 0.7275246262763407, "grad_norm": 0.53515625, "learning_rate": 1.364140070921986e-05, "loss": 1.0127, "step": 3282 }, { "epoch": 0.7277462973995207, "grad_norm": 0.5546875, "learning_rate": 1.363031914893617e-05, "loss": 1.0812, "step": 3283 }, { "epoch": 0.7279679685227005, "grad_norm": 0.55078125, "learning_rate": 1.3619237588652483e-05, "loss": 1.1364, "step": 3284 }, { "epoch": 0.7281896396458803, "grad_norm": 0.55078125, "learning_rate": 1.3608156028368796e-05, "loss": 1.0534, "step": 3285 }, { "epoch": 0.7284113107690603, "grad_norm": 0.51953125, "learning_rate": 1.3597074468085107e-05, "loss": 1.009, "step": 3286 }, { "epoch": 0.7286329818922401, "grad_norm": 0.53515625, "learning_rate": 1.358599290780142e-05, "loss": 1.087, "step": 3287 }, { "epoch": 0.72885465301542, "grad_norm": 0.54296875, "learning_rate": 1.3574911347517733e-05, "loss": 1.1662, "step": 3288 }, { "epoch": 0.7290763241385999, "grad_norm": 0.55078125, "learning_rate": 1.3563829787234042e-05, "loss": 1.0795, "step": 3289 }, { "epoch": 0.7292979952617797, "grad_norm": 0.5703125, "learning_rate": 1.3552748226950355e-05, "loss": 1.0913, "step": 3290 }, { "epoch": 0.7295196663849596, "grad_norm": 0.5546875, "learning_rate": 1.3541666666666666e-05, "loss": 1.0539, "step": 3291 }, { "epoch": 0.7297413375081395, "grad_norm": 0.5703125, "learning_rate": 1.3530585106382979e-05, "loss": 1.1006, "step": 3292 }, { "epoch": 0.7299630086313194, "grad_norm": 0.53125, "learning_rate": 1.3519503546099292e-05, "loss": 1.0264, "step": 3293 }, { "epoch": 0.7301846797544992, "grad_norm": 0.54296875, "learning_rate": 1.3508421985815603e-05, "loss": 1.0686, "step": 3294 }, { "epoch": 0.7304063508776791, "grad_norm": 0.578125, "learning_rate": 1.3497340425531915e-05, "loss": 1.0074, "step": 3295 }, { "epoch": 0.730628022000859, "grad_norm": 0.5234375, "learning_rate": 1.3486258865248228e-05, "loss": 0.9592, "step": 3296 }, { "epoch": 0.7308496931240388, "grad_norm": 0.51953125, "learning_rate": 1.347517730496454e-05, "loss": 1.0767, "step": 3297 }, { "epoch": 0.7310713642472187, "grad_norm": 0.59765625, "learning_rate": 1.3464095744680852e-05, "loss": 1.0717, "step": 3298 }, { "epoch": 0.7312930353703986, "grad_norm": 0.5078125, "learning_rate": 1.3453014184397165e-05, "loss": 1.0156, "step": 3299 }, { "epoch": 0.7315147064935784, "grad_norm": 0.54296875, "learning_rate": 1.3441932624113476e-05, "loss": 1.0939, "step": 3300 }, { "epoch": 0.7317363776167584, "grad_norm": 0.5078125, "learning_rate": 1.3430851063829789e-05, "loss": 1.0364, "step": 3301 }, { "epoch": 0.7319580487399382, "grad_norm": 0.5625, "learning_rate": 1.3419769503546101e-05, "loss": 1.0877, "step": 3302 }, { "epoch": 0.732179719863118, "grad_norm": 0.5234375, "learning_rate": 1.3408687943262413e-05, "loss": 1.0959, "step": 3303 }, { "epoch": 0.732401390986298, "grad_norm": 0.5078125, "learning_rate": 1.3397606382978725e-05, "loss": 1.0424, "step": 3304 }, { "epoch": 0.7326230621094778, "grad_norm": 0.5234375, "learning_rate": 1.3386524822695038e-05, "loss": 1.0602, "step": 3305 }, { "epoch": 0.7328447332326578, "grad_norm": 0.51171875, "learning_rate": 1.3375443262411347e-05, "loss": 1.0816, "step": 3306 }, { "epoch": 0.7330664043558376, "grad_norm": 0.52734375, "learning_rate": 1.3364361702127659e-05, "loss": 0.9729, "step": 3307 }, { "epoch": 0.7332880754790174, "grad_norm": 0.5078125, "learning_rate": 1.3353280141843971e-05, "loss": 1.0248, "step": 3308 }, { "epoch": 0.7335097466021974, "grad_norm": 0.51953125, "learning_rate": 1.3342198581560284e-05, "loss": 1.0605, "step": 3309 }, { "epoch": 0.7337314177253772, "grad_norm": 0.5390625, "learning_rate": 1.3331117021276595e-05, "loss": 1.132, "step": 3310 }, { "epoch": 0.733953088848557, "grad_norm": 0.5625, "learning_rate": 1.3320035460992908e-05, "loss": 1.0959, "step": 3311 }, { "epoch": 0.734174759971737, "grad_norm": 0.5390625, "learning_rate": 1.330895390070922e-05, "loss": 1.0783, "step": 3312 }, { "epoch": 0.7343964310949168, "grad_norm": 0.5390625, "learning_rate": 1.3297872340425532e-05, "loss": 1.0724, "step": 3313 }, { "epoch": 0.7346181022180966, "grad_norm": 0.5390625, "learning_rate": 1.3286790780141845e-05, "loss": 1.0448, "step": 3314 }, { "epoch": 0.7348397733412766, "grad_norm": 0.54296875, "learning_rate": 1.3275709219858157e-05, "loss": 1.0774, "step": 3315 }, { "epoch": 0.7350614444644564, "grad_norm": 0.53515625, "learning_rate": 1.3264627659574468e-05, "loss": 1.0564, "step": 3316 }, { "epoch": 0.7352831155876363, "grad_norm": 0.5234375, "learning_rate": 1.3253546099290781e-05, "loss": 1.0598, "step": 3317 }, { "epoch": 0.7355047867108162, "grad_norm": 0.51953125, "learning_rate": 1.3242464539007094e-05, "loss": 1.0464, "step": 3318 }, { "epoch": 0.735726457833996, "grad_norm": 0.53515625, "learning_rate": 1.3231382978723405e-05, "loss": 1.0607, "step": 3319 }, { "epoch": 0.7359481289571759, "grad_norm": 0.55859375, "learning_rate": 1.3220301418439718e-05, "loss": 1.1299, "step": 3320 }, { "epoch": 0.7361698000803558, "grad_norm": 0.54296875, "learning_rate": 1.320921985815603e-05, "loss": 1.0537, "step": 3321 }, { "epoch": 0.7363914712035357, "grad_norm": 0.54296875, "learning_rate": 1.3198138297872342e-05, "loss": 1.1245, "step": 3322 }, { "epoch": 0.7366131423267155, "grad_norm": 0.53125, "learning_rate": 1.3187056737588654e-05, "loss": 1.14, "step": 3323 }, { "epoch": 0.7368348134498954, "grad_norm": 0.5546875, "learning_rate": 1.3175975177304964e-05, "loss": 1.033, "step": 3324 }, { "epoch": 0.7370564845730753, "grad_norm": 0.57421875, "learning_rate": 1.3164893617021277e-05, "loss": 1.0623, "step": 3325 }, { "epoch": 0.7372781556962551, "grad_norm": 0.53515625, "learning_rate": 1.3153812056737588e-05, "loss": 1.0745, "step": 3326 }, { "epoch": 0.737499826819435, "grad_norm": 0.5546875, "learning_rate": 1.31427304964539e-05, "loss": 1.0541, "step": 3327 }, { "epoch": 0.7377214979426149, "grad_norm": 0.5546875, "learning_rate": 1.3131648936170213e-05, "loss": 1.0316, "step": 3328 }, { "epoch": 0.7379431690657947, "grad_norm": 0.5390625, "learning_rate": 1.3120567375886524e-05, "loss": 1.0243, "step": 3329 }, { "epoch": 0.7381648401889747, "grad_norm": 0.52734375, "learning_rate": 1.3109485815602837e-05, "loss": 1.1326, "step": 3330 }, { "epoch": 0.7383865113121545, "grad_norm": 0.5859375, "learning_rate": 1.309840425531915e-05, "loss": 1.1716, "step": 3331 }, { "epoch": 0.7386081824353343, "grad_norm": 0.53515625, "learning_rate": 1.3087322695035461e-05, "loss": 1.0776, "step": 3332 }, { "epoch": 0.7388298535585143, "grad_norm": 0.53515625, "learning_rate": 1.3076241134751774e-05, "loss": 1.0199, "step": 3333 }, { "epoch": 0.7390515246816941, "grad_norm": 0.55078125, "learning_rate": 1.3065159574468086e-05, "loss": 1.0925, "step": 3334 }, { "epoch": 0.7392731958048739, "grad_norm": 0.53515625, "learning_rate": 1.3054078014184398e-05, "loss": 1.0358, "step": 3335 }, { "epoch": 0.7394948669280539, "grad_norm": 0.51171875, "learning_rate": 1.304299645390071e-05, "loss": 1.0586, "step": 3336 }, { "epoch": 0.7397165380512337, "grad_norm": 0.51953125, "learning_rate": 1.3031914893617023e-05, "loss": 1.0715, "step": 3337 }, { "epoch": 0.7399382091744137, "grad_norm": 0.54296875, "learning_rate": 1.3020833333333334e-05, "loss": 1.0851, "step": 3338 }, { "epoch": 0.7401598802975935, "grad_norm": 0.55078125, "learning_rate": 1.3009751773049647e-05, "loss": 1.0982, "step": 3339 }, { "epoch": 0.7403815514207733, "grad_norm": 0.53515625, "learning_rate": 1.299867021276596e-05, "loss": 1.062, "step": 3340 }, { "epoch": 0.7406032225439533, "grad_norm": 0.51953125, "learning_rate": 1.2987588652482269e-05, "loss": 1.0733, "step": 3341 }, { "epoch": 0.7408248936671331, "grad_norm": 0.546875, "learning_rate": 1.297650709219858e-05, "loss": 1.0629, "step": 3342 }, { "epoch": 0.741046564790313, "grad_norm": 0.546875, "learning_rate": 1.2965425531914893e-05, "loss": 1.0523, "step": 3343 }, { "epoch": 0.7412682359134929, "grad_norm": 0.546875, "learning_rate": 1.2954343971631206e-05, "loss": 1.1027, "step": 3344 }, { "epoch": 0.7414899070366727, "grad_norm": 0.50390625, "learning_rate": 1.2943262411347517e-05, "loss": 1.0892, "step": 3345 }, { "epoch": 0.7417115781598526, "grad_norm": 0.515625, "learning_rate": 1.293218085106383e-05, "loss": 1.0634, "step": 3346 }, { "epoch": 0.7419332492830325, "grad_norm": 0.5546875, "learning_rate": 1.2921099290780142e-05, "loss": 1.0258, "step": 3347 }, { "epoch": 0.7421549204062123, "grad_norm": 0.54296875, "learning_rate": 1.2910017730496453e-05, "loss": 0.9997, "step": 3348 }, { "epoch": 0.7423765915293922, "grad_norm": 0.53515625, "learning_rate": 1.2898936170212766e-05, "loss": 1.0235, "step": 3349 }, { "epoch": 0.7425982626525721, "grad_norm": 0.5625, "learning_rate": 1.2887854609929079e-05, "loss": 1.0496, "step": 3350 }, { "epoch": 0.742819933775752, "grad_norm": 0.54296875, "learning_rate": 1.287677304964539e-05, "loss": 1.1402, "step": 3351 }, { "epoch": 0.7430416048989318, "grad_norm": 0.5390625, "learning_rate": 1.2865691489361703e-05, "loss": 1.0674, "step": 3352 }, { "epoch": 0.7432632760221117, "grad_norm": 0.52734375, "learning_rate": 1.2854609929078016e-05, "loss": 1.0137, "step": 3353 }, { "epoch": 0.7434849471452916, "grad_norm": 0.53515625, "learning_rate": 1.2843528368794328e-05, "loss": 1.0285, "step": 3354 }, { "epoch": 0.7437066182684714, "grad_norm": 0.54296875, "learning_rate": 1.283244680851064e-05, "loss": 1.0713, "step": 3355 }, { "epoch": 0.7439282893916513, "grad_norm": 0.54296875, "learning_rate": 1.2821365248226952e-05, "loss": 1.052, "step": 3356 }, { "epoch": 0.7441499605148312, "grad_norm": 0.54296875, "learning_rate": 1.2810283687943265e-05, "loss": 1.0977, "step": 3357 }, { "epoch": 0.744371631638011, "grad_norm": 0.55078125, "learning_rate": 1.2799202127659576e-05, "loss": 1.0696, "step": 3358 }, { "epoch": 0.744593302761191, "grad_norm": 0.5703125, "learning_rate": 1.2788120567375885e-05, "loss": 1.0775, "step": 3359 }, { "epoch": 0.7448149738843708, "grad_norm": 0.54296875, "learning_rate": 1.2777039007092198e-05, "loss": 1.1173, "step": 3360 }, { "epoch": 0.7450366450075506, "grad_norm": 0.5703125, "learning_rate": 1.2765957446808511e-05, "loss": 1.0798, "step": 3361 }, { "epoch": 0.7452583161307306, "grad_norm": 0.53125, "learning_rate": 1.2754875886524822e-05, "loss": 1.0916, "step": 3362 }, { "epoch": 0.7454799872539104, "grad_norm": 0.53515625, "learning_rate": 1.2743794326241135e-05, "loss": 1.0044, "step": 3363 }, { "epoch": 0.7457016583770902, "grad_norm": 0.56640625, "learning_rate": 1.2732712765957448e-05, "loss": 1.0508, "step": 3364 }, { "epoch": 0.7459233295002702, "grad_norm": 0.546875, "learning_rate": 1.2721631205673759e-05, "loss": 1.0719, "step": 3365 }, { "epoch": 0.74614500062345, "grad_norm": 0.5390625, "learning_rate": 1.2710549645390071e-05, "loss": 1.0356, "step": 3366 }, { "epoch": 0.74636667174663, "grad_norm": 0.54296875, "learning_rate": 1.2699468085106384e-05, "loss": 1.0157, "step": 3367 }, { "epoch": 0.7465883428698098, "grad_norm": 0.54296875, "learning_rate": 1.2688386524822695e-05, "loss": 1.015, "step": 3368 }, { "epoch": 0.7468100139929896, "grad_norm": 0.546875, "learning_rate": 1.2677304964539008e-05, "loss": 1.0204, "step": 3369 }, { "epoch": 0.7470316851161696, "grad_norm": 0.546875, "learning_rate": 1.2666223404255321e-05, "loss": 1.1563, "step": 3370 }, { "epoch": 0.7472533562393494, "grad_norm": 0.5546875, "learning_rate": 1.2655141843971632e-05, "loss": 1.112, "step": 3371 }, { "epoch": 0.7474750273625292, "grad_norm": 0.5546875, "learning_rate": 1.2644060283687945e-05, "loss": 1.0713, "step": 3372 }, { "epoch": 0.7476966984857092, "grad_norm": 0.5234375, "learning_rate": 1.2632978723404257e-05, "loss": 1.036, "step": 3373 }, { "epoch": 0.747918369608889, "grad_norm": 0.52734375, "learning_rate": 1.2621897163120569e-05, "loss": 1.0352, "step": 3374 }, { "epoch": 0.7481400407320689, "grad_norm": 0.54296875, "learning_rate": 1.2610815602836881e-05, "loss": 1.0637, "step": 3375 }, { "epoch": 0.7483617118552488, "grad_norm": 0.53515625, "learning_rate": 1.2599734042553194e-05, "loss": 1.029, "step": 3376 }, { "epoch": 0.7485833829784286, "grad_norm": 0.53515625, "learning_rate": 1.2588652482269504e-05, "loss": 1.0233, "step": 3377 }, { "epoch": 0.7488050541016085, "grad_norm": 0.53125, "learning_rate": 1.2577570921985815e-05, "loss": 1.0519, "step": 3378 }, { "epoch": 0.7490267252247884, "grad_norm": 0.53125, "learning_rate": 1.2566489361702127e-05, "loss": 1.1046, "step": 3379 }, { "epoch": 0.7492483963479682, "grad_norm": 0.53515625, "learning_rate": 1.255540780141844e-05, "loss": 1.0568, "step": 3380 }, { "epoch": 0.7494700674711481, "grad_norm": 0.5234375, "learning_rate": 1.2544326241134751e-05, "loss": 1.0437, "step": 3381 }, { "epoch": 0.749691738594328, "grad_norm": 0.5390625, "learning_rate": 1.2533244680851064e-05, "loss": 1.0531, "step": 3382 }, { "epoch": 0.7499134097175079, "grad_norm": 0.55078125, "learning_rate": 1.2522163120567377e-05, "loss": 1.1, "step": 3383 }, { "epoch": 0.7501350808406877, "grad_norm": 0.52734375, "learning_rate": 1.2511081560283688e-05, "loss": 1.0226, "step": 3384 }, { "epoch": 0.7503567519638676, "grad_norm": 0.546875, "learning_rate": 1.25e-05, "loss": 1.1065, "step": 3385 }, { "epoch": 0.7505784230870475, "grad_norm": 0.51953125, "learning_rate": 1.2488918439716313e-05, "loss": 1.0403, "step": 3386 }, { "epoch": 0.7508000942102273, "grad_norm": 0.55078125, "learning_rate": 1.2477836879432624e-05, "loss": 0.9953, "step": 3387 }, { "epoch": 0.7510217653334073, "grad_norm": 0.5078125, "learning_rate": 1.2466755319148937e-05, "loss": 1.0215, "step": 3388 }, { "epoch": 0.7512434364565871, "grad_norm": 0.5234375, "learning_rate": 1.245567375886525e-05, "loss": 1.0796, "step": 3389 }, { "epoch": 0.7514651075797669, "grad_norm": 0.5390625, "learning_rate": 1.2444592198581561e-05, "loss": 1.0621, "step": 3390 }, { "epoch": 0.7516867787029469, "grad_norm": 0.546875, "learning_rate": 1.2433510638297872e-05, "loss": 1.0469, "step": 3391 }, { "epoch": 0.7519084498261267, "grad_norm": 0.52734375, "learning_rate": 1.2422429078014185e-05, "loss": 1.0279, "step": 3392 }, { "epoch": 0.7521301209493065, "grad_norm": 0.51171875, "learning_rate": 1.2411347517730498e-05, "loss": 0.9848, "step": 3393 }, { "epoch": 0.7523517920724865, "grad_norm": 0.53515625, "learning_rate": 1.2400265957446809e-05, "loss": 1.0622, "step": 3394 }, { "epoch": 0.7525734631956663, "grad_norm": 0.53515625, "learning_rate": 1.2389184397163122e-05, "loss": 1.1416, "step": 3395 }, { "epoch": 0.7527951343188461, "grad_norm": 0.53125, "learning_rate": 1.2378102836879434e-05, "loss": 1.0262, "step": 3396 }, { "epoch": 0.7530168054420261, "grad_norm": 0.5390625, "learning_rate": 1.2367021276595745e-05, "loss": 0.9715, "step": 3397 }, { "epoch": 0.7532384765652059, "grad_norm": 0.5625, "learning_rate": 1.2355939716312058e-05, "loss": 1.1375, "step": 3398 }, { "epoch": 0.7534601476883859, "grad_norm": 0.58203125, "learning_rate": 1.234485815602837e-05, "loss": 1.1215, "step": 3399 }, { "epoch": 0.7536818188115657, "grad_norm": 0.53125, "learning_rate": 1.233377659574468e-05, "loss": 1.0518, "step": 3400 }, { "epoch": 0.7539034899347455, "grad_norm": 0.54296875, "learning_rate": 1.2322695035460993e-05, "loss": 1.1843, "step": 3401 }, { "epoch": 0.7541251610579255, "grad_norm": 0.5390625, "learning_rate": 1.2311613475177306e-05, "loss": 1.1131, "step": 3402 }, { "epoch": 0.7543468321811053, "grad_norm": 0.546875, "learning_rate": 1.2300531914893617e-05, "loss": 1.0295, "step": 3403 }, { "epoch": 0.7545685033042852, "grad_norm": 0.53515625, "learning_rate": 1.228945035460993e-05, "loss": 1.0617, "step": 3404 }, { "epoch": 0.7547901744274651, "grad_norm": 0.55078125, "learning_rate": 1.2278368794326242e-05, "loss": 1.0284, "step": 3405 }, { "epoch": 0.7550118455506449, "grad_norm": 0.5546875, "learning_rate": 1.2267287234042554e-05, "loss": 1.0985, "step": 3406 }, { "epoch": 0.7552335166738248, "grad_norm": 0.51953125, "learning_rate": 1.2256205673758866e-05, "loss": 1.055, "step": 3407 }, { "epoch": 0.7554551877970047, "grad_norm": 0.53125, "learning_rate": 1.2245124113475177e-05, "loss": 1.1109, "step": 3408 }, { "epoch": 0.7556768589201845, "grad_norm": 0.54296875, "learning_rate": 1.223404255319149e-05, "loss": 1.1397, "step": 3409 }, { "epoch": 0.7558985300433644, "grad_norm": 0.5234375, "learning_rate": 1.2222960992907801e-05, "loss": 1.0769, "step": 3410 }, { "epoch": 0.7561202011665443, "grad_norm": 0.5546875, "learning_rate": 1.2211879432624114e-05, "loss": 1.1592, "step": 3411 }, { "epoch": 0.7563418722897242, "grad_norm": 0.54296875, "learning_rate": 1.2200797872340427e-05, "loss": 1.0412, "step": 3412 }, { "epoch": 0.756563543412904, "grad_norm": 0.5390625, "learning_rate": 1.2189716312056738e-05, "loss": 1.1021, "step": 3413 }, { "epoch": 0.7567852145360839, "grad_norm": 0.51171875, "learning_rate": 1.217863475177305e-05, "loss": 1.0101, "step": 3414 }, { "epoch": 0.7570068856592638, "grad_norm": 0.55078125, "learning_rate": 1.2167553191489363e-05, "loss": 1.058, "step": 3415 }, { "epoch": 0.7572285567824436, "grad_norm": 0.546875, "learning_rate": 1.2156471631205673e-05, "loss": 1.1631, "step": 3416 }, { "epoch": 0.7574502279056236, "grad_norm": 0.53125, "learning_rate": 1.2145390070921986e-05, "loss": 1.0364, "step": 3417 }, { "epoch": 0.7576718990288034, "grad_norm": 0.54296875, "learning_rate": 1.2134308510638298e-05, "loss": 1.0268, "step": 3418 }, { "epoch": 0.7578935701519832, "grad_norm": 0.53515625, "learning_rate": 1.2123226950354611e-05, "loss": 1.0822, "step": 3419 }, { "epoch": 0.7581152412751632, "grad_norm": 0.51953125, "learning_rate": 1.2112145390070922e-05, "loss": 1.014, "step": 3420 }, { "epoch": 0.758336912398343, "grad_norm": 0.50390625, "learning_rate": 1.2101063829787235e-05, "loss": 1.0758, "step": 3421 }, { "epoch": 0.7585585835215228, "grad_norm": 0.54296875, "learning_rate": 1.2089982269503548e-05, "loss": 1.1211, "step": 3422 }, { "epoch": 0.7587802546447028, "grad_norm": 0.53515625, "learning_rate": 1.2078900709219859e-05, "loss": 1.0646, "step": 3423 }, { "epoch": 0.7590019257678826, "grad_norm": 0.53125, "learning_rate": 1.2067819148936172e-05, "loss": 1.0359, "step": 3424 }, { "epoch": 0.7592235968910624, "grad_norm": 0.53515625, "learning_rate": 1.2056737588652483e-05, "loss": 1.0472, "step": 3425 }, { "epoch": 0.7594452680142424, "grad_norm": 0.53515625, "learning_rate": 1.2045656028368794e-05, "loss": 1.0971, "step": 3426 }, { "epoch": 0.7596669391374222, "grad_norm": 0.5234375, "learning_rate": 1.2034574468085107e-05, "loss": 1.1333, "step": 3427 }, { "epoch": 0.7598886102606021, "grad_norm": 0.546875, "learning_rate": 1.202349290780142e-05, "loss": 1.0716, "step": 3428 }, { "epoch": 0.760110281383782, "grad_norm": 0.5234375, "learning_rate": 1.201241134751773e-05, "loss": 1.0371, "step": 3429 }, { "epoch": 0.7603319525069618, "grad_norm": 0.53125, "learning_rate": 1.2001329787234043e-05, "loss": 1.0665, "step": 3430 }, { "epoch": 0.7605536236301418, "grad_norm": 0.5390625, "learning_rate": 1.1990248226950356e-05, "loss": 1.0711, "step": 3431 }, { "epoch": 0.7607752947533216, "grad_norm": 0.55859375, "learning_rate": 1.1979166666666667e-05, "loss": 1.0062, "step": 3432 }, { "epoch": 0.7609969658765015, "grad_norm": 0.53125, "learning_rate": 1.196808510638298e-05, "loss": 1.0227, "step": 3433 }, { "epoch": 0.7612186369996814, "grad_norm": 0.53515625, "learning_rate": 1.1957003546099291e-05, "loss": 1.0493, "step": 3434 }, { "epoch": 0.7614403081228612, "grad_norm": 0.57421875, "learning_rate": 1.1945921985815604e-05, "loss": 1.1317, "step": 3435 }, { "epoch": 0.7616619792460411, "grad_norm": 0.55078125, "learning_rate": 1.1934840425531915e-05, "loss": 1.0606, "step": 3436 }, { "epoch": 0.761883650369221, "grad_norm": 0.5859375, "learning_rate": 1.1923758865248228e-05, "loss": 1.1118, "step": 3437 }, { "epoch": 0.7621053214924008, "grad_norm": 0.5390625, "learning_rate": 1.191267730496454e-05, "loss": 1.0352, "step": 3438 }, { "epoch": 0.7623269926155807, "grad_norm": 0.5234375, "learning_rate": 1.1901595744680851e-05, "loss": 1.0648, "step": 3439 }, { "epoch": 0.7625486637387606, "grad_norm": 0.5078125, "learning_rate": 1.1890514184397164e-05, "loss": 1.0273, "step": 3440 }, { "epoch": 0.7627703348619405, "grad_norm": 0.52734375, "learning_rate": 1.1879432624113477e-05, "loss": 1.0359, "step": 3441 }, { "epoch": 0.7629920059851203, "grad_norm": 0.53515625, "learning_rate": 1.1868351063829788e-05, "loss": 1.0423, "step": 3442 }, { "epoch": 0.7632136771083002, "grad_norm": 0.5078125, "learning_rate": 1.1857269503546099e-05, "loss": 0.9831, "step": 3443 }, { "epoch": 0.7634353482314801, "grad_norm": 0.53515625, "learning_rate": 1.1846187943262412e-05, "loss": 1.021, "step": 3444 }, { "epoch": 0.7636570193546599, "grad_norm": 0.50390625, "learning_rate": 1.1835106382978723e-05, "loss": 0.984, "step": 3445 }, { "epoch": 0.7638786904778399, "grad_norm": 0.5390625, "learning_rate": 1.1824024822695036e-05, "loss": 1.1069, "step": 3446 }, { "epoch": 0.7641003616010197, "grad_norm": 0.53125, "learning_rate": 1.1812943262411348e-05, "loss": 0.9933, "step": 3447 }, { "epoch": 0.7643220327241995, "grad_norm": 0.5390625, "learning_rate": 1.180186170212766e-05, "loss": 1.0971, "step": 3448 }, { "epoch": 0.7645437038473795, "grad_norm": 0.5234375, "learning_rate": 1.1790780141843972e-05, "loss": 1.0485, "step": 3449 }, { "epoch": 0.7647653749705593, "grad_norm": 0.54296875, "learning_rate": 1.1779698581560285e-05, "loss": 1.0575, "step": 3450 }, { "epoch": 0.7649870460937391, "grad_norm": 0.546875, "learning_rate": 1.1768617021276596e-05, "loss": 1.0749, "step": 3451 }, { "epoch": 0.7652087172169191, "grad_norm": 0.51953125, "learning_rate": 1.1757535460992907e-05, "loss": 1.0056, "step": 3452 }, { "epoch": 0.7654303883400989, "grad_norm": 0.5234375, "learning_rate": 1.174645390070922e-05, "loss": 0.9888, "step": 3453 }, { "epoch": 0.7656520594632787, "grad_norm": 0.53125, "learning_rate": 1.1735372340425533e-05, "loss": 1.0008, "step": 3454 }, { "epoch": 0.7658737305864587, "grad_norm": 0.51953125, "learning_rate": 1.1724290780141844e-05, "loss": 0.9589, "step": 3455 }, { "epoch": 0.7660954017096385, "grad_norm": 0.5390625, "learning_rate": 1.1713209219858157e-05, "loss": 1.0857, "step": 3456 }, { "epoch": 0.7663170728328184, "grad_norm": 0.5078125, "learning_rate": 1.170212765957447e-05, "loss": 0.9959, "step": 3457 }, { "epoch": 0.7665387439559983, "grad_norm": 0.5546875, "learning_rate": 1.169104609929078e-05, "loss": 1.1433, "step": 3458 }, { "epoch": 0.7667604150791781, "grad_norm": 0.53515625, "learning_rate": 1.1679964539007093e-05, "loss": 1.0381, "step": 3459 }, { "epoch": 0.7669820862023581, "grad_norm": 0.57421875, "learning_rate": 1.1668882978723404e-05, "loss": 1.188, "step": 3460 }, { "epoch": 0.7672037573255379, "grad_norm": 0.546875, "learning_rate": 1.1657801418439717e-05, "loss": 1.0617, "step": 3461 }, { "epoch": 0.7674254284487178, "grad_norm": 0.54296875, "learning_rate": 1.1646719858156028e-05, "loss": 1.012, "step": 3462 }, { "epoch": 0.7676470995718977, "grad_norm": 0.51953125, "learning_rate": 1.1635638297872341e-05, "loss": 1.0168, "step": 3463 }, { "epoch": 0.7678687706950775, "grad_norm": 0.52734375, "learning_rate": 1.1624556737588654e-05, "loss": 1.131, "step": 3464 }, { "epoch": 0.7680904418182574, "grad_norm": 0.51171875, "learning_rate": 1.1613475177304965e-05, "loss": 1.0424, "step": 3465 }, { "epoch": 0.7683121129414373, "grad_norm": 0.54296875, "learning_rate": 1.1602393617021278e-05, "loss": 1.06, "step": 3466 }, { "epoch": 0.7685337840646171, "grad_norm": 0.54296875, "learning_rate": 1.159131205673759e-05, "loss": 1.0367, "step": 3467 }, { "epoch": 0.768755455187797, "grad_norm": 0.59375, "learning_rate": 1.1580230496453901e-05, "loss": 1.1177, "step": 3468 }, { "epoch": 0.7689771263109769, "grad_norm": 0.515625, "learning_rate": 1.1569148936170213e-05, "loss": 1.055, "step": 3469 }, { "epoch": 0.7691987974341568, "grad_norm": 0.51171875, "learning_rate": 1.1558067375886525e-05, "loss": 1.1313, "step": 3470 }, { "epoch": 0.7694204685573366, "grad_norm": 0.57421875, "learning_rate": 1.1546985815602836e-05, "loss": 1.101, "step": 3471 }, { "epoch": 0.7696421396805165, "grad_norm": 0.54296875, "learning_rate": 1.1535904255319149e-05, "loss": 1.0493, "step": 3472 }, { "epoch": 0.7698638108036964, "grad_norm": 0.5390625, "learning_rate": 1.1524822695035462e-05, "loss": 1.0317, "step": 3473 }, { "epoch": 0.7700854819268762, "grad_norm": 0.53515625, "learning_rate": 1.1513741134751773e-05, "loss": 1.0725, "step": 3474 }, { "epoch": 0.7703071530500561, "grad_norm": 0.5390625, "learning_rate": 1.1502659574468086e-05, "loss": 1.0257, "step": 3475 }, { "epoch": 0.770528824173236, "grad_norm": 0.55859375, "learning_rate": 1.1491578014184399e-05, "loss": 1.0863, "step": 3476 }, { "epoch": 0.7707504952964158, "grad_norm": 0.5546875, "learning_rate": 1.148049645390071e-05, "loss": 1.0688, "step": 3477 }, { "epoch": 0.7709721664195958, "grad_norm": 0.53515625, "learning_rate": 1.146941489361702e-05, "loss": 0.9935, "step": 3478 }, { "epoch": 0.7711938375427756, "grad_norm": 0.5234375, "learning_rate": 1.1458333333333333e-05, "loss": 1.0395, "step": 3479 }, { "epoch": 0.7714155086659554, "grad_norm": 0.515625, "learning_rate": 1.1447251773049646e-05, "loss": 1.0086, "step": 3480 }, { "epoch": 0.7716371797891354, "grad_norm": 0.515625, "learning_rate": 1.1436170212765957e-05, "loss": 1.0135, "step": 3481 }, { "epoch": 0.7718588509123152, "grad_norm": 0.5078125, "learning_rate": 1.142508865248227e-05, "loss": 1.0301, "step": 3482 }, { "epoch": 0.772080522035495, "grad_norm": 0.54296875, "learning_rate": 1.1414007092198583e-05, "loss": 1.0836, "step": 3483 }, { "epoch": 0.772302193158675, "grad_norm": 0.53515625, "learning_rate": 1.1402925531914894e-05, "loss": 1.0413, "step": 3484 }, { "epoch": 0.7725238642818548, "grad_norm": 0.5078125, "learning_rate": 1.1391843971631207e-05, "loss": 1.0057, "step": 3485 }, { "epoch": 0.7727455354050347, "grad_norm": 0.54296875, "learning_rate": 1.138076241134752e-05, "loss": 1.0237, "step": 3486 }, { "epoch": 0.7729672065282146, "grad_norm": 0.59375, "learning_rate": 1.136968085106383e-05, "loss": 1.2092, "step": 3487 }, { "epoch": 0.7731888776513944, "grad_norm": 0.55859375, "learning_rate": 1.1358599290780142e-05, "loss": 1.084, "step": 3488 }, { "epoch": 0.7734105487745743, "grad_norm": 0.546875, "learning_rate": 1.1347517730496454e-05, "loss": 1.0697, "step": 3489 }, { "epoch": 0.7736322198977542, "grad_norm": 0.5546875, "learning_rate": 1.1336436170212767e-05, "loss": 1.0926, "step": 3490 }, { "epoch": 0.773853891020934, "grad_norm": 0.5390625, "learning_rate": 1.1325354609929078e-05, "loss": 1.0841, "step": 3491 }, { "epoch": 0.774075562144114, "grad_norm": 0.55859375, "learning_rate": 1.1314273049645391e-05, "loss": 1.0976, "step": 3492 }, { "epoch": 0.7742972332672938, "grad_norm": 0.55859375, "learning_rate": 1.1303191489361704e-05, "loss": 1.0714, "step": 3493 }, { "epoch": 0.7745189043904737, "grad_norm": 0.53125, "learning_rate": 1.1292109929078015e-05, "loss": 1.0548, "step": 3494 }, { "epoch": 0.7747405755136536, "grad_norm": 0.51953125, "learning_rate": 1.1281028368794328e-05, "loss": 0.9596, "step": 3495 }, { "epoch": 0.7749622466368334, "grad_norm": 0.51953125, "learning_rate": 1.1269946808510639e-05, "loss": 0.9851, "step": 3496 }, { "epoch": 0.7751839177600133, "grad_norm": 0.52734375, "learning_rate": 1.125886524822695e-05, "loss": 0.9939, "step": 3497 }, { "epoch": 0.7754055888831932, "grad_norm": 0.51171875, "learning_rate": 1.1247783687943263e-05, "loss": 1.0144, "step": 3498 }, { "epoch": 0.775627260006373, "grad_norm": 0.54296875, "learning_rate": 1.1236702127659575e-05, "loss": 1.0473, "step": 3499 }, { "epoch": 0.7758489311295529, "grad_norm": 0.53515625, "learning_rate": 1.1225620567375886e-05, "loss": 1.1318, "step": 3500 }, { "epoch": 0.7760706022527328, "grad_norm": 0.54296875, "learning_rate": 1.12145390070922e-05, "loss": 1.069, "step": 3501 }, { "epoch": 0.7762922733759127, "grad_norm": 0.546875, "learning_rate": 1.1203457446808512e-05, "loss": 1.1217, "step": 3502 }, { "epoch": 0.7765139444990925, "grad_norm": 0.53125, "learning_rate": 1.1192375886524823e-05, "loss": 1.0795, "step": 3503 }, { "epoch": 0.7767356156222724, "grad_norm": 0.5625, "learning_rate": 1.1181294326241134e-05, "loss": 1.0797, "step": 3504 }, { "epoch": 0.7769572867454523, "grad_norm": 0.5390625, "learning_rate": 1.1170212765957447e-05, "loss": 1.0317, "step": 3505 }, { "epoch": 0.7771789578686321, "grad_norm": 0.52734375, "learning_rate": 1.115913120567376e-05, "loss": 1.0536, "step": 3506 }, { "epoch": 0.7774006289918121, "grad_norm": 0.55078125, "learning_rate": 1.114804964539007e-05, "loss": 1.028, "step": 3507 }, { "epoch": 0.7776223001149919, "grad_norm": 0.53125, "learning_rate": 1.1136968085106384e-05, "loss": 1.0979, "step": 3508 }, { "epoch": 0.7778439712381717, "grad_norm": 0.5859375, "learning_rate": 1.1125886524822696e-05, "loss": 1.1107, "step": 3509 }, { "epoch": 0.7780656423613517, "grad_norm": 0.57421875, "learning_rate": 1.1114804964539007e-05, "loss": 0.9966, "step": 3510 }, { "epoch": 0.7782873134845315, "grad_norm": 0.53515625, "learning_rate": 1.110372340425532e-05, "loss": 0.9976, "step": 3511 }, { "epoch": 0.7785089846077113, "grad_norm": 0.5390625, "learning_rate": 1.1092641843971633e-05, "loss": 1.002, "step": 3512 }, { "epoch": 0.7787306557308913, "grad_norm": 0.51953125, "learning_rate": 1.1081560283687942e-05, "loss": 1.0535, "step": 3513 }, { "epoch": 0.7789523268540711, "grad_norm": 0.5390625, "learning_rate": 1.1070478723404255e-05, "loss": 1.1907, "step": 3514 }, { "epoch": 0.779173997977251, "grad_norm": 0.55078125, "learning_rate": 1.1059397163120568e-05, "loss": 1.0914, "step": 3515 }, { "epoch": 0.7793956691004309, "grad_norm": 0.5625, "learning_rate": 1.1048315602836879e-05, "loss": 1.0511, "step": 3516 }, { "epoch": 0.7796173402236107, "grad_norm": 0.53515625, "learning_rate": 1.1037234042553192e-05, "loss": 1.0876, "step": 3517 }, { "epoch": 0.7798390113467906, "grad_norm": 0.55859375, "learning_rate": 1.1026152482269504e-05, "loss": 1.0615, "step": 3518 }, { "epoch": 0.7800606824699705, "grad_norm": 0.54296875, "learning_rate": 1.1015070921985817e-05, "loss": 1.0605, "step": 3519 }, { "epoch": 0.7802823535931503, "grad_norm": 0.52734375, "learning_rate": 1.1003989361702128e-05, "loss": 1.0085, "step": 3520 }, { "epoch": 0.7805040247163302, "grad_norm": 0.5078125, "learning_rate": 1.0992907801418441e-05, "loss": 1.0368, "step": 3521 }, { "epoch": 0.7807256958395101, "grad_norm": 0.51953125, "learning_rate": 1.0981826241134752e-05, "loss": 1.0266, "step": 3522 }, { "epoch": 0.78094736696269, "grad_norm": 0.5390625, "learning_rate": 1.0970744680851063e-05, "loss": 0.9917, "step": 3523 }, { "epoch": 0.7811690380858699, "grad_norm": 0.50390625, "learning_rate": 1.0959663120567376e-05, "loss": 0.953, "step": 3524 }, { "epoch": 0.7813907092090497, "grad_norm": 0.53515625, "learning_rate": 1.0948581560283689e-05, "loss": 1.0507, "step": 3525 }, { "epoch": 0.7816123803322296, "grad_norm": 0.55078125, "learning_rate": 1.09375e-05, "loss": 1.1004, "step": 3526 }, { "epoch": 0.7818340514554095, "grad_norm": 0.52734375, "learning_rate": 1.0926418439716313e-05, "loss": 0.9954, "step": 3527 }, { "epoch": 0.7820557225785894, "grad_norm": 0.57421875, "learning_rate": 1.0915336879432625e-05, "loss": 1.0923, "step": 3528 }, { "epoch": 0.7822773937017692, "grad_norm": 0.5234375, "learning_rate": 1.0904255319148937e-05, "loss": 1.0054, "step": 3529 }, { "epoch": 0.7824990648249491, "grad_norm": 0.51953125, "learning_rate": 1.089317375886525e-05, "loss": 1.0723, "step": 3530 }, { "epoch": 0.782720735948129, "grad_norm": 0.52734375, "learning_rate": 1.088209219858156e-05, "loss": 1.0159, "step": 3531 }, { "epoch": 0.7829424070713088, "grad_norm": 0.52734375, "learning_rate": 1.0871010638297873e-05, "loss": 1.0644, "step": 3532 }, { "epoch": 0.7831640781944887, "grad_norm": 0.53515625, "learning_rate": 1.0859929078014184e-05, "loss": 1.0978, "step": 3533 }, { "epoch": 0.7833857493176686, "grad_norm": 0.5546875, "learning_rate": 1.0848847517730497e-05, "loss": 1.1605, "step": 3534 }, { "epoch": 0.7836074204408484, "grad_norm": 0.5390625, "learning_rate": 1.083776595744681e-05, "loss": 0.9452, "step": 3535 }, { "epoch": 0.7838290915640284, "grad_norm": 0.546875, "learning_rate": 1.082668439716312e-05, "loss": 1.0922, "step": 3536 }, { "epoch": 0.7840507626872082, "grad_norm": 0.5625, "learning_rate": 1.0815602836879434e-05, "loss": 1.1099, "step": 3537 }, { "epoch": 0.784272433810388, "grad_norm": 0.54296875, "learning_rate": 1.0804521276595746e-05, "loss": 1.0498, "step": 3538 }, { "epoch": 0.784494104933568, "grad_norm": 0.53515625, "learning_rate": 1.0793439716312057e-05, "loss": 1.0306, "step": 3539 }, { "epoch": 0.7847157760567478, "grad_norm": 0.53125, "learning_rate": 1.0782358156028369e-05, "loss": 1.0848, "step": 3540 }, { "epoch": 0.7849374471799276, "grad_norm": 0.5234375, "learning_rate": 1.0771276595744681e-05, "loss": 1.0524, "step": 3541 }, { "epoch": 0.7851591183031076, "grad_norm": 0.55078125, "learning_rate": 1.0760195035460992e-05, "loss": 1.0017, "step": 3542 }, { "epoch": 0.7853807894262874, "grad_norm": 0.53515625, "learning_rate": 1.0749113475177305e-05, "loss": 1.0437, "step": 3543 }, { "epoch": 0.7856024605494673, "grad_norm": 0.56640625, "learning_rate": 1.0738031914893618e-05, "loss": 1.1189, "step": 3544 }, { "epoch": 0.7858241316726472, "grad_norm": 0.54296875, "learning_rate": 1.0726950354609929e-05, "loss": 1.0415, "step": 3545 }, { "epoch": 0.786045802795827, "grad_norm": 0.5625, "learning_rate": 1.0715868794326242e-05, "loss": 1.1585, "step": 3546 }, { "epoch": 0.7862674739190069, "grad_norm": 0.5234375, "learning_rate": 1.0704787234042555e-05, "loss": 0.9679, "step": 3547 }, { "epoch": 0.7864891450421868, "grad_norm": 0.5234375, "learning_rate": 1.0693705673758866e-05, "loss": 1.0446, "step": 3548 }, { "epoch": 0.7867108161653666, "grad_norm": 0.5234375, "learning_rate": 1.0682624113475177e-05, "loss": 0.9905, "step": 3549 }, { "epoch": 0.7869324872885465, "grad_norm": 0.52734375, "learning_rate": 1.067154255319149e-05, "loss": 1.0055, "step": 3550 }, { "epoch": 0.7871541584117264, "grad_norm": 0.53515625, "learning_rate": 1.0660460992907802e-05, "loss": 1.1306, "step": 3551 }, { "epoch": 0.7873758295349063, "grad_norm": 0.55078125, "learning_rate": 1.0649379432624113e-05, "loss": 1.1294, "step": 3552 }, { "epoch": 0.7875975006580861, "grad_norm": 0.546875, "learning_rate": 1.0638297872340426e-05, "loss": 1.1314, "step": 3553 }, { "epoch": 0.787819171781266, "grad_norm": 0.55859375, "learning_rate": 1.0627216312056739e-05, "loss": 1.0976, "step": 3554 }, { "epoch": 0.7880408429044459, "grad_norm": 0.53125, "learning_rate": 1.061613475177305e-05, "loss": 1.1122, "step": 3555 }, { "epoch": 0.7882625140276258, "grad_norm": 0.51953125, "learning_rate": 1.0605053191489363e-05, "loss": 1.0026, "step": 3556 }, { "epoch": 0.7884841851508056, "grad_norm": 0.546875, "learning_rate": 1.0593971631205674e-05, "loss": 1.0863, "step": 3557 }, { "epoch": 0.7887058562739855, "grad_norm": 0.5234375, "learning_rate": 1.0582890070921987e-05, "loss": 1.0838, "step": 3558 }, { "epoch": 0.7889275273971654, "grad_norm": 0.546875, "learning_rate": 1.0571808510638298e-05, "loss": 1.0417, "step": 3559 }, { "epoch": 0.7891491985203453, "grad_norm": 0.53515625, "learning_rate": 1.056072695035461e-05, "loss": 1.0427, "step": 3560 }, { "epoch": 0.7893708696435251, "grad_norm": 0.52734375, "learning_rate": 1.0549645390070923e-05, "loss": 1.0365, "step": 3561 }, { "epoch": 0.789592540766705, "grad_norm": 0.53125, "learning_rate": 1.0538563829787234e-05, "loss": 1.0828, "step": 3562 }, { "epoch": 0.7898142118898849, "grad_norm": 0.53515625, "learning_rate": 1.0527482269503547e-05, "loss": 0.9692, "step": 3563 }, { "epoch": 0.7900358830130647, "grad_norm": 0.5390625, "learning_rate": 1.051640070921986e-05, "loss": 1.0665, "step": 3564 }, { "epoch": 0.7902575541362447, "grad_norm": 0.515625, "learning_rate": 1.0505319148936171e-05, "loss": 1.0505, "step": 3565 }, { "epoch": 0.7904792252594245, "grad_norm": 0.55078125, "learning_rate": 1.0494237588652482e-05, "loss": 1.0496, "step": 3566 }, { "epoch": 0.7907008963826043, "grad_norm": 0.53125, "learning_rate": 1.0483156028368795e-05, "loss": 1.0909, "step": 3567 }, { "epoch": 0.7909225675057843, "grad_norm": 0.53515625, "learning_rate": 1.0472074468085106e-05, "loss": 1.015, "step": 3568 }, { "epoch": 0.7911442386289641, "grad_norm": 0.546875, "learning_rate": 1.0460992907801419e-05, "loss": 1.0107, "step": 3569 }, { "epoch": 0.7913659097521439, "grad_norm": 0.515625, "learning_rate": 1.0449911347517731e-05, "loss": 0.9929, "step": 3570 }, { "epoch": 0.7915875808753239, "grad_norm": 0.546875, "learning_rate": 1.0438829787234042e-05, "loss": 1.076, "step": 3571 }, { "epoch": 0.7918092519985037, "grad_norm": 0.53125, "learning_rate": 1.0427748226950355e-05, "loss": 1.084, "step": 3572 }, { "epoch": 0.7920309231216835, "grad_norm": 0.5234375, "learning_rate": 1.0416666666666668e-05, "loss": 1.0753, "step": 3573 }, { "epoch": 0.7922525942448635, "grad_norm": 0.5234375, "learning_rate": 1.0405585106382979e-05, "loss": 1.0331, "step": 3574 }, { "epoch": 0.7924742653680433, "grad_norm": 0.5078125, "learning_rate": 1.039450354609929e-05, "loss": 1.0602, "step": 3575 }, { "epoch": 0.7926959364912232, "grad_norm": 0.5390625, "learning_rate": 1.0383421985815603e-05, "loss": 1.1142, "step": 3576 }, { "epoch": 0.7929176076144031, "grad_norm": 0.5390625, "learning_rate": 1.0372340425531916e-05, "loss": 1.0396, "step": 3577 }, { "epoch": 0.7931392787375829, "grad_norm": 0.546875, "learning_rate": 1.0361258865248227e-05, "loss": 1.0623, "step": 3578 }, { "epoch": 0.7933609498607628, "grad_norm": 0.53515625, "learning_rate": 1.035017730496454e-05, "loss": 1.1202, "step": 3579 }, { "epoch": 0.7935826209839427, "grad_norm": 0.5390625, "learning_rate": 1.0339095744680852e-05, "loss": 1.0898, "step": 3580 }, { "epoch": 0.7938042921071226, "grad_norm": 0.5234375, "learning_rate": 1.0328014184397163e-05, "loss": 1.0499, "step": 3581 }, { "epoch": 0.7940259632303024, "grad_norm": 0.51953125, "learning_rate": 1.0316932624113476e-05, "loss": 1.0742, "step": 3582 }, { "epoch": 0.7942476343534823, "grad_norm": 0.546875, "learning_rate": 1.0305851063829789e-05, "loss": 1.0557, "step": 3583 }, { "epoch": 0.7944693054766622, "grad_norm": 0.54296875, "learning_rate": 1.02947695035461e-05, "loss": 1.0896, "step": 3584 }, { "epoch": 0.7946909765998421, "grad_norm": 0.56640625, "learning_rate": 1.0283687943262411e-05, "loss": 1.0947, "step": 3585 }, { "epoch": 0.794912647723022, "grad_norm": 0.5234375, "learning_rate": 1.0272606382978724e-05, "loss": 1.0387, "step": 3586 }, { "epoch": 0.7951343188462018, "grad_norm": 0.51171875, "learning_rate": 1.0261524822695037e-05, "loss": 1.0478, "step": 3587 }, { "epoch": 0.7953559899693817, "grad_norm": 0.54296875, "learning_rate": 1.0250443262411348e-05, "loss": 1.0819, "step": 3588 }, { "epoch": 0.7955776610925616, "grad_norm": 0.515625, "learning_rate": 1.023936170212766e-05, "loss": 1.0448, "step": 3589 }, { "epoch": 0.7957993322157414, "grad_norm": 0.51953125, "learning_rate": 1.0228280141843973e-05, "loss": 1.0284, "step": 3590 }, { "epoch": 0.7960210033389213, "grad_norm": 0.515625, "learning_rate": 1.0217198581560284e-05, "loss": 0.9725, "step": 3591 }, { "epoch": 0.7962426744621012, "grad_norm": 0.546875, "learning_rate": 1.0206117021276597e-05, "loss": 1.1223, "step": 3592 }, { "epoch": 0.796464345585281, "grad_norm": 0.53125, "learning_rate": 1.0195035460992908e-05, "loss": 0.9867, "step": 3593 }, { "epoch": 0.796686016708461, "grad_norm": 0.52734375, "learning_rate": 1.018395390070922e-05, "loss": 1.1006, "step": 3594 }, { "epoch": 0.7969076878316408, "grad_norm": 0.55078125, "learning_rate": 1.0172872340425532e-05, "loss": 1.1094, "step": 3595 }, { "epoch": 0.7971293589548206, "grad_norm": 0.515625, "learning_rate": 1.0161790780141845e-05, "loss": 0.9868, "step": 3596 }, { "epoch": 0.7973510300780006, "grad_norm": 0.50390625, "learning_rate": 1.0150709219858156e-05, "loss": 1.0885, "step": 3597 }, { "epoch": 0.7975727012011804, "grad_norm": 0.53125, "learning_rate": 1.0139627659574469e-05, "loss": 1.0394, "step": 3598 }, { "epoch": 0.7977943723243602, "grad_norm": 0.53125, "learning_rate": 1.0128546099290781e-05, "loss": 1.0845, "step": 3599 }, { "epoch": 0.7980160434475402, "grad_norm": 0.546875, "learning_rate": 1.0117464539007093e-05, "loss": 1.0903, "step": 3600 }, { "epoch": 0.79823771457072, "grad_norm": 0.54296875, "learning_rate": 1.0106382978723404e-05, "loss": 1.1236, "step": 3601 }, { "epoch": 0.7984593856938998, "grad_norm": 0.546875, "learning_rate": 1.0095301418439716e-05, "loss": 1.0949, "step": 3602 }, { "epoch": 0.7986810568170798, "grad_norm": 0.546875, "learning_rate": 1.008421985815603e-05, "loss": 1.0734, "step": 3603 }, { "epoch": 0.7989027279402596, "grad_norm": 0.5234375, "learning_rate": 1.007313829787234e-05, "loss": 1.0592, "step": 3604 }, { "epoch": 0.7991243990634395, "grad_norm": 0.53125, "learning_rate": 1.0062056737588653e-05, "loss": 0.9966, "step": 3605 }, { "epoch": 0.7993460701866194, "grad_norm": 0.53125, "learning_rate": 1.0050975177304966e-05, "loss": 1.0789, "step": 3606 }, { "epoch": 0.7995677413097992, "grad_norm": 0.53515625, "learning_rate": 1.0039893617021277e-05, "loss": 1.1653, "step": 3607 }, { "epoch": 0.7997894124329791, "grad_norm": 0.55078125, "learning_rate": 1.002881205673759e-05, "loss": 1.0343, "step": 3608 }, { "epoch": 0.800011083556159, "grad_norm": 0.54296875, "learning_rate": 1.0017730496453902e-05, "loss": 1.1095, "step": 3609 }, { "epoch": 0.8002327546793389, "grad_norm": 0.515625, "learning_rate": 1.0006648936170212e-05, "loss": 1.0042, "step": 3610 }, { "epoch": 0.8004544258025187, "grad_norm": 0.52734375, "learning_rate": 9.995567375886525e-06, "loss": 1.0206, "step": 3611 }, { "epoch": 0.8006760969256986, "grad_norm": 0.5546875, "learning_rate": 9.984485815602837e-06, "loss": 1.1687, "step": 3612 }, { "epoch": 0.8008977680488785, "grad_norm": 0.546875, "learning_rate": 9.973404255319148e-06, "loss": 1.1093, "step": 3613 }, { "epoch": 0.8011194391720583, "grad_norm": 0.53125, "learning_rate": 9.962322695035461e-06, "loss": 1.0197, "step": 3614 }, { "epoch": 0.8013411102952382, "grad_norm": 0.5234375, "learning_rate": 9.951241134751774e-06, "loss": 1.0752, "step": 3615 }, { "epoch": 0.8015627814184181, "grad_norm": 0.51171875, "learning_rate": 9.940159574468085e-06, "loss": 1.0816, "step": 3616 }, { "epoch": 0.801784452541598, "grad_norm": 0.53125, "learning_rate": 9.929078014184398e-06, "loss": 1.0579, "step": 3617 }, { "epoch": 0.8020061236647779, "grad_norm": 0.515625, "learning_rate": 9.91799645390071e-06, "loss": 1.1015, "step": 3618 }, { "epoch": 0.8022277947879577, "grad_norm": 0.5703125, "learning_rate": 9.906914893617022e-06, "loss": 1.1639, "step": 3619 }, { "epoch": 0.8024494659111376, "grad_norm": 0.546875, "learning_rate": 9.895833333333333e-06, "loss": 1.0971, "step": 3620 }, { "epoch": 0.8026711370343175, "grad_norm": 0.53515625, "learning_rate": 9.884751773049646e-06, "loss": 1.1564, "step": 3621 }, { "epoch": 0.8028928081574973, "grad_norm": 0.53125, "learning_rate": 9.873670212765958e-06, "loss": 1.0438, "step": 3622 }, { "epoch": 0.8031144792806773, "grad_norm": 0.5546875, "learning_rate": 9.86258865248227e-06, "loss": 1.1564, "step": 3623 }, { "epoch": 0.8033361504038571, "grad_norm": 0.55859375, "learning_rate": 9.851507092198582e-06, "loss": 1.0563, "step": 3624 }, { "epoch": 0.8035578215270369, "grad_norm": 0.52734375, "learning_rate": 9.840425531914895e-06, "loss": 1.0317, "step": 3625 }, { "epoch": 0.8037794926502169, "grad_norm": 0.51953125, "learning_rate": 9.829343971631206e-06, "loss": 1.0312, "step": 3626 }, { "epoch": 0.8040011637733967, "grad_norm": 0.51953125, "learning_rate": 9.818262411347519e-06, "loss": 1.027, "step": 3627 }, { "epoch": 0.8042228348965765, "grad_norm": 0.5546875, "learning_rate": 9.80718085106383e-06, "loss": 1.1182, "step": 3628 }, { "epoch": 0.8044445060197565, "grad_norm": 0.5234375, "learning_rate": 9.796099290780143e-06, "loss": 1.0568, "step": 3629 }, { "epoch": 0.8046661771429363, "grad_norm": 0.55078125, "learning_rate": 9.785017730496454e-06, "loss": 1.0494, "step": 3630 }, { "epoch": 0.8048878482661161, "grad_norm": 0.52734375, "learning_rate": 9.773936170212766e-06, "loss": 1.0871, "step": 3631 }, { "epoch": 0.8051095193892961, "grad_norm": 0.498046875, "learning_rate": 9.76285460992908e-06, "loss": 1.0324, "step": 3632 }, { "epoch": 0.8053311905124759, "grad_norm": 0.55078125, "learning_rate": 9.75177304964539e-06, "loss": 1.0863, "step": 3633 }, { "epoch": 0.8055528616356558, "grad_norm": 0.5078125, "learning_rate": 9.740691489361703e-06, "loss": 0.9651, "step": 3634 }, { "epoch": 0.8057745327588357, "grad_norm": 0.5390625, "learning_rate": 9.729609929078016e-06, "loss": 1.0532, "step": 3635 }, { "epoch": 0.8059962038820155, "grad_norm": 0.53125, "learning_rate": 9.718528368794327e-06, "loss": 1.1293, "step": 3636 }, { "epoch": 0.8062178750051954, "grad_norm": 0.53125, "learning_rate": 9.707446808510638e-06, "loss": 1.0389, "step": 3637 }, { "epoch": 0.8064395461283753, "grad_norm": 0.51953125, "learning_rate": 9.69636524822695e-06, "loss": 1.0405, "step": 3638 }, { "epoch": 0.8066612172515552, "grad_norm": 0.5390625, "learning_rate": 9.685283687943262e-06, "loss": 1.0947, "step": 3639 }, { "epoch": 0.806882888374735, "grad_norm": 0.5546875, "learning_rate": 9.674202127659575e-06, "loss": 1.088, "step": 3640 }, { "epoch": 0.8071045594979149, "grad_norm": 0.53125, "learning_rate": 9.663120567375887e-06, "loss": 1.0416, "step": 3641 }, { "epoch": 0.8073262306210948, "grad_norm": 0.546875, "learning_rate": 9.652039007092198e-06, "loss": 1.0895, "step": 3642 }, { "epoch": 0.8075479017442746, "grad_norm": 0.55078125, "learning_rate": 9.640957446808511e-06, "loss": 1.0486, "step": 3643 }, { "epoch": 0.8077695728674545, "grad_norm": 0.5234375, "learning_rate": 9.629875886524824e-06, "loss": 0.9969, "step": 3644 }, { "epoch": 0.8079912439906344, "grad_norm": 0.54296875, "learning_rate": 9.618794326241135e-06, "loss": 1.1615, "step": 3645 }, { "epoch": 0.8082129151138142, "grad_norm": 0.54296875, "learning_rate": 9.607712765957446e-06, "loss": 1.042, "step": 3646 }, { "epoch": 0.8084345862369942, "grad_norm": 0.515625, "learning_rate": 9.596631205673759e-06, "loss": 1.0996, "step": 3647 }, { "epoch": 0.808656257360174, "grad_norm": 0.53515625, "learning_rate": 9.585549645390072e-06, "loss": 1.0373, "step": 3648 }, { "epoch": 0.8088779284833539, "grad_norm": 0.50390625, "learning_rate": 9.574468085106383e-06, "loss": 1.0136, "step": 3649 }, { "epoch": 0.8090995996065338, "grad_norm": 0.51953125, "learning_rate": 9.563386524822696e-06, "loss": 1.0511, "step": 3650 }, { "epoch": 0.8093212707297136, "grad_norm": 0.546875, "learning_rate": 9.552304964539008e-06, "loss": 1.0732, "step": 3651 }, { "epoch": 0.8095429418528935, "grad_norm": 0.5390625, "learning_rate": 9.54122340425532e-06, "loss": 1.0434, "step": 3652 }, { "epoch": 0.8097646129760734, "grad_norm": 0.53515625, "learning_rate": 9.530141843971632e-06, "loss": 1.0846, "step": 3653 }, { "epoch": 0.8099862840992532, "grad_norm": 0.53515625, "learning_rate": 9.519060283687943e-06, "loss": 1.0511, "step": 3654 }, { "epoch": 0.8102079552224332, "grad_norm": 0.54296875, "learning_rate": 9.507978723404256e-06, "loss": 1.0607, "step": 3655 }, { "epoch": 0.810429626345613, "grad_norm": 0.58984375, "learning_rate": 9.496897163120567e-06, "loss": 1.1284, "step": 3656 }, { "epoch": 0.8106512974687928, "grad_norm": 0.53125, "learning_rate": 9.48581560283688e-06, "loss": 1.0809, "step": 3657 }, { "epoch": 0.8108729685919728, "grad_norm": 0.52734375, "learning_rate": 9.474734042553193e-06, "loss": 1.0741, "step": 3658 }, { "epoch": 0.8110946397151526, "grad_norm": 0.53515625, "learning_rate": 9.463652482269504e-06, "loss": 1.0947, "step": 3659 }, { "epoch": 0.8113163108383324, "grad_norm": 0.53125, "learning_rate": 9.452570921985817e-06, "loss": 1.0569, "step": 3660 }, { "epoch": 0.8115379819615124, "grad_norm": 0.515625, "learning_rate": 9.44148936170213e-06, "loss": 1.0224, "step": 3661 }, { "epoch": 0.8117596530846922, "grad_norm": 0.54296875, "learning_rate": 9.43040780141844e-06, "loss": 1.0828, "step": 3662 }, { "epoch": 0.8119813242078721, "grad_norm": 0.515625, "learning_rate": 9.419326241134751e-06, "loss": 1.0048, "step": 3663 }, { "epoch": 0.812202995331052, "grad_norm": 0.5234375, "learning_rate": 9.408244680851064e-06, "loss": 0.9833, "step": 3664 }, { "epoch": 0.8124246664542318, "grad_norm": 0.5390625, "learning_rate": 9.397163120567375e-06, "loss": 1.1448, "step": 3665 }, { "epoch": 0.8126463375774117, "grad_norm": 0.54296875, "learning_rate": 9.386081560283688e-06, "loss": 1.1312, "step": 3666 }, { "epoch": 0.8128680087005916, "grad_norm": 0.53515625, "learning_rate": 9.375000000000001e-06, "loss": 1.0931, "step": 3667 }, { "epoch": 0.8130896798237714, "grad_norm": 0.5625, "learning_rate": 9.363918439716312e-06, "loss": 1.0773, "step": 3668 }, { "epoch": 0.8133113509469513, "grad_norm": 0.53515625, "learning_rate": 9.352836879432625e-06, "loss": 1.0811, "step": 3669 }, { "epoch": 0.8135330220701312, "grad_norm": 0.5703125, "learning_rate": 9.341755319148937e-06, "loss": 1.0848, "step": 3670 }, { "epoch": 0.8137546931933111, "grad_norm": 0.515625, "learning_rate": 9.330673758865249e-06, "loss": 1.0282, "step": 3671 }, { "epoch": 0.8139763643164909, "grad_norm": 0.55078125, "learning_rate": 9.31959219858156e-06, "loss": 1.0873, "step": 3672 }, { "epoch": 0.8141980354396708, "grad_norm": 0.5546875, "learning_rate": 9.308510638297872e-06, "loss": 1.0792, "step": 3673 }, { "epoch": 0.8144197065628507, "grad_norm": 0.5625, "learning_rate": 9.297429078014185e-06, "loss": 1.0707, "step": 3674 }, { "epoch": 0.8146413776860305, "grad_norm": 0.52734375, "learning_rate": 9.286347517730496e-06, "loss": 1.0807, "step": 3675 }, { "epoch": 0.8148630488092105, "grad_norm": 0.55078125, "learning_rate": 9.275265957446809e-06, "loss": 1.1317, "step": 3676 }, { "epoch": 0.8150847199323903, "grad_norm": 0.52734375, "learning_rate": 9.264184397163122e-06, "loss": 1.032, "step": 3677 }, { "epoch": 0.8153063910555702, "grad_norm": 0.5546875, "learning_rate": 9.253102836879433e-06, "loss": 0.9913, "step": 3678 }, { "epoch": 0.8155280621787501, "grad_norm": 0.51953125, "learning_rate": 9.242021276595746e-06, "loss": 1.053, "step": 3679 }, { "epoch": 0.8157497333019299, "grad_norm": 0.5625, "learning_rate": 9.230939716312058e-06, "loss": 1.0952, "step": 3680 }, { "epoch": 0.8159714044251098, "grad_norm": 0.515625, "learning_rate": 9.219858156028368e-06, "loss": 1.0963, "step": 3681 }, { "epoch": 0.8161930755482897, "grad_norm": 0.53125, "learning_rate": 9.20877659574468e-06, "loss": 1.063, "step": 3682 }, { "epoch": 0.8164147466714695, "grad_norm": 0.55078125, "learning_rate": 9.197695035460993e-06, "loss": 1.1237, "step": 3683 }, { "epoch": 0.8166364177946495, "grad_norm": 0.53515625, "learning_rate": 9.186613475177306e-06, "loss": 1.0974, "step": 3684 }, { "epoch": 0.8168580889178293, "grad_norm": 0.5390625, "learning_rate": 9.175531914893617e-06, "loss": 1.0874, "step": 3685 }, { "epoch": 0.8170797600410091, "grad_norm": 0.578125, "learning_rate": 9.16445035460993e-06, "loss": 1.0408, "step": 3686 }, { "epoch": 0.8173014311641891, "grad_norm": 0.53515625, "learning_rate": 9.153368794326243e-06, "loss": 1.0537, "step": 3687 }, { "epoch": 0.8175231022873689, "grad_norm": 0.546875, "learning_rate": 9.142287234042554e-06, "loss": 1.0367, "step": 3688 }, { "epoch": 0.8177447734105487, "grad_norm": 0.53515625, "learning_rate": 9.131205673758867e-06, "loss": 1.0156, "step": 3689 }, { "epoch": 0.8179664445337287, "grad_norm": 0.515625, "learning_rate": 9.120124113475178e-06, "loss": 0.9964, "step": 3690 }, { "epoch": 0.8181881156569085, "grad_norm": 0.515625, "learning_rate": 9.109042553191489e-06, "loss": 1.052, "step": 3691 }, { "epoch": 0.8184097867800884, "grad_norm": 0.5390625, "learning_rate": 9.097960992907802e-06, "loss": 1.124, "step": 3692 }, { "epoch": 0.8186314579032683, "grad_norm": 0.53125, "learning_rate": 9.086879432624114e-06, "loss": 1.072, "step": 3693 }, { "epoch": 0.8188531290264481, "grad_norm": 0.53515625, "learning_rate": 9.075797872340425e-06, "loss": 1.0441, "step": 3694 }, { "epoch": 0.819074800149628, "grad_norm": 0.53515625, "learning_rate": 9.064716312056738e-06, "loss": 1.0245, "step": 3695 }, { "epoch": 0.8192964712728079, "grad_norm": 0.54296875, "learning_rate": 9.053634751773051e-06, "loss": 1.0448, "step": 3696 }, { "epoch": 0.8195181423959877, "grad_norm": 0.53515625, "learning_rate": 9.042553191489362e-06, "loss": 1.0314, "step": 3697 }, { "epoch": 0.8197398135191676, "grad_norm": 0.52734375, "learning_rate": 9.031471631205673e-06, "loss": 1.0712, "step": 3698 }, { "epoch": 0.8199614846423475, "grad_norm": 0.515625, "learning_rate": 9.020390070921986e-06, "loss": 1.0416, "step": 3699 }, { "epoch": 0.8201831557655274, "grad_norm": 0.51953125, "learning_rate": 9.009308510638299e-06, "loss": 1.1125, "step": 3700 }, { "epoch": 0.8204048268887072, "grad_norm": 0.53125, "learning_rate": 8.99822695035461e-06, "loss": 1.1419, "step": 3701 }, { "epoch": 0.8206264980118871, "grad_norm": 0.53515625, "learning_rate": 8.987145390070923e-06, "loss": 1.0038, "step": 3702 }, { "epoch": 0.820848169135067, "grad_norm": 0.55078125, "learning_rate": 8.976063829787235e-06, "loss": 1.1571, "step": 3703 }, { "epoch": 0.8210698402582468, "grad_norm": 0.5390625, "learning_rate": 8.964982269503546e-06, "loss": 1.0702, "step": 3704 }, { "epoch": 0.8212915113814268, "grad_norm": 0.54296875, "learning_rate": 8.953900709219859e-06, "loss": 1.1352, "step": 3705 }, { "epoch": 0.8215131825046066, "grad_norm": 0.55078125, "learning_rate": 8.942819148936172e-06, "loss": 1.024, "step": 3706 }, { "epoch": 0.8217348536277864, "grad_norm": 0.5390625, "learning_rate": 8.931737588652481e-06, "loss": 1.1012, "step": 3707 }, { "epoch": 0.8219565247509664, "grad_norm": 0.51953125, "learning_rate": 8.920656028368794e-06, "loss": 1.0292, "step": 3708 }, { "epoch": 0.8221781958741462, "grad_norm": 0.56640625, "learning_rate": 8.909574468085107e-06, "loss": 1.0817, "step": 3709 }, { "epoch": 0.8223998669973261, "grad_norm": 0.546875, "learning_rate": 8.898492907801418e-06, "loss": 1.0713, "step": 3710 }, { "epoch": 0.822621538120506, "grad_norm": 0.5546875, "learning_rate": 8.88741134751773e-06, "loss": 1.1059, "step": 3711 }, { "epoch": 0.8228432092436858, "grad_norm": 0.5625, "learning_rate": 8.876329787234043e-06, "loss": 1.1753, "step": 3712 }, { "epoch": 0.8230648803668658, "grad_norm": 0.53125, "learning_rate": 8.865248226950355e-06, "loss": 1.0114, "step": 3713 }, { "epoch": 0.8232865514900456, "grad_norm": 0.5078125, "learning_rate": 8.854166666666667e-06, "loss": 1.0378, "step": 3714 }, { "epoch": 0.8235082226132254, "grad_norm": 0.53515625, "learning_rate": 8.84308510638298e-06, "loss": 1.0285, "step": 3715 }, { "epoch": 0.8237298937364054, "grad_norm": 0.546875, "learning_rate": 8.832003546099291e-06, "loss": 1.0662, "step": 3716 }, { "epoch": 0.8239515648595852, "grad_norm": 0.55859375, "learning_rate": 8.820921985815602e-06, "loss": 1.0866, "step": 3717 }, { "epoch": 0.824173235982765, "grad_norm": 0.53515625, "learning_rate": 8.809840425531915e-06, "loss": 1.0665, "step": 3718 }, { "epoch": 0.824394907105945, "grad_norm": 0.53125, "learning_rate": 8.798758865248228e-06, "loss": 1.0713, "step": 3719 }, { "epoch": 0.8246165782291248, "grad_norm": 0.5546875, "learning_rate": 8.787677304964539e-06, "loss": 1.1104, "step": 3720 }, { "epoch": 0.8248382493523047, "grad_norm": 0.546875, "learning_rate": 8.776595744680852e-06, "loss": 1.0634, "step": 3721 }, { "epoch": 0.8250599204754846, "grad_norm": 0.53125, "learning_rate": 8.765514184397164e-06, "loss": 1.0545, "step": 3722 }, { "epoch": 0.8252815915986644, "grad_norm": 0.578125, "learning_rate": 8.754432624113475e-06, "loss": 1.0893, "step": 3723 }, { "epoch": 0.8255032627218443, "grad_norm": 0.515625, "learning_rate": 8.743351063829788e-06, "loss": 1.0323, "step": 3724 }, { "epoch": 0.8257249338450242, "grad_norm": 0.50390625, "learning_rate": 8.7322695035461e-06, "loss": 1.0191, "step": 3725 }, { "epoch": 0.825946604968204, "grad_norm": 0.53515625, "learning_rate": 8.721187943262412e-06, "loss": 1.0382, "step": 3726 }, { "epoch": 0.8261682760913839, "grad_norm": 0.54296875, "learning_rate": 8.710106382978723e-06, "loss": 0.9977, "step": 3727 }, { "epoch": 0.8263899472145638, "grad_norm": 0.515625, "learning_rate": 8.699024822695036e-06, "loss": 0.9896, "step": 3728 }, { "epoch": 0.8266116183377437, "grad_norm": 0.5546875, "learning_rate": 8.687943262411349e-06, "loss": 1.1331, "step": 3729 }, { "epoch": 0.8268332894609235, "grad_norm": 0.51953125, "learning_rate": 8.67686170212766e-06, "loss": 1.0687, "step": 3730 }, { "epoch": 0.8270549605841034, "grad_norm": 0.52734375, "learning_rate": 8.665780141843973e-06, "loss": 1.0276, "step": 3731 }, { "epoch": 0.8272766317072833, "grad_norm": 0.53125, "learning_rate": 8.654698581560285e-06, "loss": 1.0357, "step": 3732 }, { "epoch": 0.8274983028304631, "grad_norm": 0.5546875, "learning_rate": 8.643617021276596e-06, "loss": 1.0405, "step": 3733 }, { "epoch": 0.827719973953643, "grad_norm": 0.55078125, "learning_rate": 8.632535460992908e-06, "loss": 1.0428, "step": 3734 }, { "epoch": 0.8279416450768229, "grad_norm": 0.609375, "learning_rate": 8.62145390070922e-06, "loss": 1.0338, "step": 3735 }, { "epoch": 0.8281633162000027, "grad_norm": 0.5625, "learning_rate": 8.610372340425531e-06, "loss": 1.1265, "step": 3736 }, { "epoch": 0.8283849873231827, "grad_norm": 0.55859375, "learning_rate": 8.599290780141844e-06, "loss": 1.1206, "step": 3737 }, { "epoch": 0.8286066584463625, "grad_norm": 0.52734375, "learning_rate": 8.588209219858157e-06, "loss": 1.0322, "step": 3738 }, { "epoch": 0.8288283295695423, "grad_norm": 0.51171875, "learning_rate": 8.577127659574468e-06, "loss": 1.0672, "step": 3739 }, { "epoch": 0.8290500006927223, "grad_norm": 0.53125, "learning_rate": 8.56604609929078e-06, "loss": 1.0378, "step": 3740 }, { "epoch": 0.8292716718159021, "grad_norm": 0.53125, "learning_rate": 8.554964539007094e-06, "loss": 1.095, "step": 3741 }, { "epoch": 0.8294933429390821, "grad_norm": 0.52734375, "learning_rate": 8.543882978723405e-06, "loss": 1.0219, "step": 3742 }, { "epoch": 0.8297150140622619, "grad_norm": 0.5625, "learning_rate": 8.532801418439716e-06, "loss": 1.1559, "step": 3743 }, { "epoch": 0.8299366851854417, "grad_norm": 0.56640625, "learning_rate": 8.521719858156028e-06, "loss": 1.1419, "step": 3744 }, { "epoch": 0.8301583563086217, "grad_norm": 0.53125, "learning_rate": 8.510638297872341e-06, "loss": 1.0907, "step": 3745 }, { "epoch": 0.8303800274318015, "grad_norm": 0.515625, "learning_rate": 8.499556737588652e-06, "loss": 1.0455, "step": 3746 }, { "epoch": 0.8306016985549813, "grad_norm": 0.5078125, "learning_rate": 8.488475177304965e-06, "loss": 1.0405, "step": 3747 }, { "epoch": 0.8308233696781613, "grad_norm": 0.53125, "learning_rate": 8.477393617021278e-06, "loss": 1.056, "step": 3748 }, { "epoch": 0.8310450408013411, "grad_norm": 0.5390625, "learning_rate": 8.466312056737589e-06, "loss": 1.1041, "step": 3749 }, { "epoch": 0.831266711924521, "grad_norm": 0.52734375, "learning_rate": 8.455230496453902e-06, "loss": 0.9967, "step": 3750 }, { "epoch": 0.8314883830477009, "grad_norm": 0.5546875, "learning_rate": 8.444148936170213e-06, "loss": 1.1276, "step": 3751 }, { "epoch": 0.8317100541708807, "grad_norm": 0.5625, "learning_rate": 8.433067375886526e-06, "loss": 1.0666, "step": 3752 }, { "epoch": 0.8319317252940606, "grad_norm": 0.5390625, "learning_rate": 8.421985815602837e-06, "loss": 1.1478, "step": 3753 }, { "epoch": 0.8321533964172405, "grad_norm": 0.51953125, "learning_rate": 8.41090425531915e-06, "loss": 0.9781, "step": 3754 }, { "epoch": 0.8323750675404203, "grad_norm": 0.53515625, "learning_rate": 8.399822695035462e-06, "loss": 1.0835, "step": 3755 }, { "epoch": 0.8325967386636002, "grad_norm": 0.5234375, "learning_rate": 8.388741134751773e-06, "loss": 0.9982, "step": 3756 }, { "epoch": 0.8328184097867801, "grad_norm": 0.50390625, "learning_rate": 8.377659574468086e-06, "loss": 1.024, "step": 3757 }, { "epoch": 0.83304008090996, "grad_norm": 0.5078125, "learning_rate": 8.366578014184399e-06, "loss": 1.0016, "step": 3758 }, { "epoch": 0.8332617520331398, "grad_norm": 0.52734375, "learning_rate": 8.35549645390071e-06, "loss": 1.0871, "step": 3759 }, { "epoch": 0.8334834231563197, "grad_norm": 0.515625, "learning_rate": 8.344414893617021e-06, "loss": 1.0605, "step": 3760 }, { "epoch": 0.8337050942794996, "grad_norm": 0.546875, "learning_rate": 8.333333333333334e-06, "loss": 1.0714, "step": 3761 }, { "epoch": 0.8339267654026794, "grad_norm": 0.546875, "learning_rate": 8.322251773049645e-06, "loss": 0.9774, "step": 3762 }, { "epoch": 0.8341484365258593, "grad_norm": 0.55859375, "learning_rate": 8.311170212765958e-06, "loss": 1.1085, "step": 3763 }, { "epoch": 0.8343701076490392, "grad_norm": 0.5625, "learning_rate": 8.30008865248227e-06, "loss": 1.2066, "step": 3764 }, { "epoch": 0.834591778772219, "grad_norm": 0.53515625, "learning_rate": 8.289007092198581e-06, "loss": 1.0879, "step": 3765 }, { "epoch": 0.834813449895399, "grad_norm": 0.58203125, "learning_rate": 8.277925531914894e-06, "loss": 1.055, "step": 3766 }, { "epoch": 0.8350351210185788, "grad_norm": 0.53515625, "learning_rate": 8.266843971631207e-06, "loss": 1.0889, "step": 3767 }, { "epoch": 0.8352567921417586, "grad_norm": 0.515625, "learning_rate": 8.255762411347518e-06, "loss": 1.0096, "step": 3768 }, { "epoch": 0.8354784632649386, "grad_norm": 0.52734375, "learning_rate": 8.244680851063829e-06, "loss": 1.1029, "step": 3769 }, { "epoch": 0.8357001343881184, "grad_norm": 0.5625, "learning_rate": 8.233599290780142e-06, "loss": 1.0404, "step": 3770 }, { "epoch": 0.8359218055112984, "grad_norm": 0.53515625, "learning_rate": 8.222517730496455e-06, "loss": 1.0565, "step": 3771 }, { "epoch": 0.8361434766344782, "grad_norm": 0.5703125, "learning_rate": 8.211436170212766e-06, "loss": 1.0607, "step": 3772 }, { "epoch": 0.836365147757658, "grad_norm": 0.51953125, "learning_rate": 8.200354609929079e-06, "loss": 1.0284, "step": 3773 }, { "epoch": 0.836586818880838, "grad_norm": 0.53125, "learning_rate": 8.189273049645391e-06, "loss": 0.9892, "step": 3774 }, { "epoch": 0.8368084900040178, "grad_norm": 0.5234375, "learning_rate": 8.178191489361702e-06, "loss": 1.0325, "step": 3775 }, { "epoch": 0.8370301611271976, "grad_norm": 0.52734375, "learning_rate": 8.167109929078015e-06, "loss": 1.1228, "step": 3776 }, { "epoch": 0.8372518322503776, "grad_norm": 0.51953125, "learning_rate": 8.156028368794328e-06, "loss": 1.0449, "step": 3777 }, { "epoch": 0.8374735033735574, "grad_norm": 0.53515625, "learning_rate": 8.144946808510637e-06, "loss": 1.0875, "step": 3778 }, { "epoch": 0.8376951744967372, "grad_norm": 0.51953125, "learning_rate": 8.13386524822695e-06, "loss": 0.9883, "step": 3779 }, { "epoch": 0.8379168456199172, "grad_norm": 0.60546875, "learning_rate": 8.122783687943263e-06, "loss": 1.0322, "step": 3780 }, { "epoch": 0.838138516743097, "grad_norm": 0.53515625, "learning_rate": 8.111702127659574e-06, "loss": 1.0688, "step": 3781 }, { "epoch": 0.8383601878662769, "grad_norm": 0.53125, "learning_rate": 8.100620567375887e-06, "loss": 1.0298, "step": 3782 }, { "epoch": 0.8385818589894568, "grad_norm": 0.515625, "learning_rate": 8.0895390070922e-06, "loss": 0.966, "step": 3783 }, { "epoch": 0.8388035301126366, "grad_norm": 0.5390625, "learning_rate": 8.078457446808512e-06, "loss": 1.0123, "step": 3784 }, { "epoch": 0.8390252012358165, "grad_norm": 0.54296875, "learning_rate": 8.067375886524823e-06, "loss": 1.1283, "step": 3785 }, { "epoch": 0.8392468723589964, "grad_norm": 0.52734375, "learning_rate": 8.056294326241134e-06, "loss": 1.0987, "step": 3786 }, { "epoch": 0.8394685434821763, "grad_norm": 0.5546875, "learning_rate": 8.045212765957447e-06, "loss": 1.0831, "step": 3787 }, { "epoch": 0.8396902146053561, "grad_norm": 0.51953125, "learning_rate": 8.034131205673758e-06, "loss": 1.0669, "step": 3788 }, { "epoch": 0.839911885728536, "grad_norm": 0.55078125, "learning_rate": 8.023049645390071e-06, "loss": 1.1153, "step": 3789 }, { "epoch": 0.8401335568517159, "grad_norm": 0.5859375, "learning_rate": 8.011968085106384e-06, "loss": 1.1081, "step": 3790 }, { "epoch": 0.8403552279748957, "grad_norm": 0.5625, "learning_rate": 8.000886524822695e-06, "loss": 1.1154, "step": 3791 }, { "epoch": 0.8405768990980756, "grad_norm": 0.546875, "learning_rate": 7.989804964539008e-06, "loss": 1.0797, "step": 3792 }, { "epoch": 0.8407985702212555, "grad_norm": 0.55859375, "learning_rate": 7.97872340425532e-06, "loss": 1.0756, "step": 3793 }, { "epoch": 0.8410202413444353, "grad_norm": 0.54296875, "learning_rate": 7.967641843971632e-06, "loss": 1.0833, "step": 3794 }, { "epoch": 0.8412419124676153, "grad_norm": 0.5703125, "learning_rate": 7.956560283687943e-06, "loss": 1.0517, "step": 3795 }, { "epoch": 0.8414635835907951, "grad_norm": 0.546875, "learning_rate": 7.945478723404255e-06, "loss": 1.1272, "step": 3796 }, { "epoch": 0.8416852547139749, "grad_norm": 0.54296875, "learning_rate": 7.934397163120568e-06, "loss": 1.0793, "step": 3797 }, { "epoch": 0.8419069258371549, "grad_norm": 0.53515625, "learning_rate": 7.92331560283688e-06, "loss": 1.0411, "step": 3798 }, { "epoch": 0.8421285969603347, "grad_norm": 0.54296875, "learning_rate": 7.912234042553192e-06, "loss": 1.0579, "step": 3799 }, { "epoch": 0.8423502680835145, "grad_norm": 0.53515625, "learning_rate": 7.901152482269505e-06, "loss": 1.0771, "step": 3800 }, { "epoch": 0.8425719392066945, "grad_norm": 0.5390625, "learning_rate": 7.890070921985816e-06, "loss": 0.9791, "step": 3801 }, { "epoch": 0.8427936103298743, "grad_norm": 0.53515625, "learning_rate": 7.878989361702129e-06, "loss": 1.0166, "step": 3802 }, { "epoch": 0.8430152814530543, "grad_norm": 0.50390625, "learning_rate": 7.867907801418441e-06, "loss": 1.0814, "step": 3803 }, { "epoch": 0.8432369525762341, "grad_norm": 0.54296875, "learning_rate": 7.85682624113475e-06, "loss": 1.0426, "step": 3804 }, { "epoch": 0.8434586236994139, "grad_norm": 0.5859375, "learning_rate": 7.845744680851064e-06, "loss": 1.1472, "step": 3805 }, { "epoch": 0.8436802948225939, "grad_norm": 0.53515625, "learning_rate": 7.834663120567376e-06, "loss": 1.0493, "step": 3806 }, { "epoch": 0.8439019659457737, "grad_norm": 0.5234375, "learning_rate": 7.823581560283687e-06, "loss": 1.0033, "step": 3807 }, { "epoch": 0.8441236370689535, "grad_norm": 0.55859375, "learning_rate": 7.8125e-06, "loss": 1.0374, "step": 3808 }, { "epoch": 0.8443453081921335, "grad_norm": 0.53125, "learning_rate": 7.801418439716313e-06, "loss": 1.0231, "step": 3809 }, { "epoch": 0.8445669793153133, "grad_norm": 0.54296875, "learning_rate": 7.790336879432624e-06, "loss": 1.1002, "step": 3810 }, { "epoch": 0.8447886504384932, "grad_norm": 0.52734375, "learning_rate": 7.779255319148937e-06, "loss": 1.0748, "step": 3811 }, { "epoch": 0.8450103215616731, "grad_norm": 0.51953125, "learning_rate": 7.76817375886525e-06, "loss": 1.0226, "step": 3812 }, { "epoch": 0.8452319926848529, "grad_norm": 0.52734375, "learning_rate": 7.75709219858156e-06, "loss": 1.0473, "step": 3813 }, { "epoch": 0.8454536638080328, "grad_norm": 0.5703125, "learning_rate": 7.746010638297872e-06, "loss": 1.2281, "step": 3814 }, { "epoch": 0.8456753349312127, "grad_norm": 0.5390625, "learning_rate": 7.734929078014184e-06, "loss": 1.0407, "step": 3815 }, { "epoch": 0.8458970060543926, "grad_norm": 0.5234375, "learning_rate": 7.723847517730497e-06, "loss": 1.1674, "step": 3816 }, { "epoch": 0.8461186771775724, "grad_norm": 0.53125, "learning_rate": 7.712765957446808e-06, "loss": 1.0294, "step": 3817 }, { "epoch": 0.8463403483007523, "grad_norm": 0.55859375, "learning_rate": 7.701684397163121e-06, "loss": 1.0747, "step": 3818 }, { "epoch": 0.8465620194239322, "grad_norm": 0.546875, "learning_rate": 7.690602836879434e-06, "loss": 1.0697, "step": 3819 }, { "epoch": 0.846783690547112, "grad_norm": 0.5546875, "learning_rate": 7.679521276595745e-06, "loss": 1.0376, "step": 3820 }, { "epoch": 0.847005361670292, "grad_norm": 0.53125, "learning_rate": 7.668439716312058e-06, "loss": 1.0591, "step": 3821 }, { "epoch": 0.8472270327934718, "grad_norm": 0.55078125, "learning_rate": 7.657358156028369e-06, "loss": 1.0721, "step": 3822 }, { "epoch": 0.8474487039166516, "grad_norm": 0.52734375, "learning_rate": 7.646276595744682e-06, "loss": 1.0499, "step": 3823 }, { "epoch": 0.8476703750398316, "grad_norm": 0.53515625, "learning_rate": 7.635195035460993e-06, "loss": 1.0491, "step": 3824 }, { "epoch": 0.8478920461630114, "grad_norm": 0.53125, "learning_rate": 7.6241134751773054e-06, "loss": 1.1152, "step": 3825 }, { "epoch": 0.8481137172861912, "grad_norm": 0.53125, "learning_rate": 7.613031914893617e-06, "loss": 1.1188, "step": 3826 }, { "epoch": 0.8483353884093712, "grad_norm": 0.546875, "learning_rate": 7.601950354609929e-06, "loss": 1.0861, "step": 3827 }, { "epoch": 0.848557059532551, "grad_norm": 0.57421875, "learning_rate": 7.590868794326242e-06, "loss": 1.0633, "step": 3828 }, { "epoch": 0.8487787306557308, "grad_norm": 0.56640625, "learning_rate": 7.579787234042554e-06, "loss": 1.0932, "step": 3829 }, { "epoch": 0.8490004017789108, "grad_norm": 0.55078125, "learning_rate": 7.568705673758866e-06, "loss": 1.1762, "step": 3830 }, { "epoch": 0.8492220729020906, "grad_norm": 0.5703125, "learning_rate": 7.557624113475177e-06, "loss": 1.0522, "step": 3831 }, { "epoch": 0.8494437440252705, "grad_norm": 0.515625, "learning_rate": 7.546542553191489e-06, "loss": 1.0305, "step": 3832 }, { "epoch": 0.8496654151484504, "grad_norm": 0.53515625, "learning_rate": 7.535460992907802e-06, "loss": 0.9925, "step": 3833 }, { "epoch": 0.8498870862716302, "grad_norm": 0.5546875, "learning_rate": 7.524379432624114e-06, "loss": 1.162, "step": 3834 }, { "epoch": 0.8501087573948102, "grad_norm": 0.5234375, "learning_rate": 7.513297872340426e-06, "loss": 0.9842, "step": 3835 }, { "epoch": 0.85033042851799, "grad_norm": 0.5625, "learning_rate": 7.502216312056738e-06, "loss": 1.1437, "step": 3836 }, { "epoch": 0.8505520996411698, "grad_norm": 0.53125, "learning_rate": 7.49113475177305e-06, "loss": 1.0396, "step": 3837 }, { "epoch": 0.8507737707643498, "grad_norm": 0.56640625, "learning_rate": 7.480053191489363e-06, "loss": 1.0917, "step": 3838 }, { "epoch": 0.8509954418875296, "grad_norm": 0.54296875, "learning_rate": 7.468971631205673e-06, "loss": 0.977, "step": 3839 }, { "epoch": 0.8512171130107095, "grad_norm": 0.54296875, "learning_rate": 7.457890070921986e-06, "loss": 1.0984, "step": 3840 }, { "epoch": 0.8514387841338894, "grad_norm": 0.53515625, "learning_rate": 7.446808510638298e-06, "loss": 1.0291, "step": 3841 }, { "epoch": 0.8516604552570692, "grad_norm": 0.53125, "learning_rate": 7.43572695035461e-06, "loss": 0.9689, "step": 3842 }, { "epoch": 0.8518821263802491, "grad_norm": 0.53515625, "learning_rate": 7.424645390070923e-06, "loss": 1.0776, "step": 3843 }, { "epoch": 0.852103797503429, "grad_norm": 0.53515625, "learning_rate": 7.4135638297872346e-06, "loss": 1.0529, "step": 3844 }, { "epoch": 0.8523254686266089, "grad_norm": 0.5234375, "learning_rate": 7.4024822695035465e-06, "loss": 1.0595, "step": 3845 }, { "epoch": 0.8525471397497887, "grad_norm": 0.5390625, "learning_rate": 7.391400709219859e-06, "loss": 1.0394, "step": 3846 }, { "epoch": 0.8527688108729686, "grad_norm": 0.546875, "learning_rate": 7.380319148936171e-06, "loss": 1.0583, "step": 3847 }, { "epoch": 0.8529904819961485, "grad_norm": 0.53515625, "learning_rate": 7.369237588652482e-06, "loss": 1.0942, "step": 3848 }, { "epoch": 0.8532121531193283, "grad_norm": 0.52734375, "learning_rate": 7.358156028368794e-06, "loss": 0.9857, "step": 3849 }, { "epoch": 0.8534338242425082, "grad_norm": 0.53125, "learning_rate": 7.347074468085106e-06, "loss": 1.0033, "step": 3850 }, { "epoch": 0.8536554953656881, "grad_norm": 0.5078125, "learning_rate": 7.335992907801419e-06, "loss": 1.04, "step": 3851 }, { "epoch": 0.8538771664888679, "grad_norm": 0.5390625, "learning_rate": 7.324911347517731e-06, "loss": 1.089, "step": 3852 }, { "epoch": 0.8540988376120479, "grad_norm": 0.546875, "learning_rate": 7.313829787234043e-06, "loss": 1.0632, "step": 3853 }, { "epoch": 0.8543205087352277, "grad_norm": 0.5234375, "learning_rate": 7.3027482269503555e-06, "loss": 0.99, "step": 3854 }, { "epoch": 0.8545421798584075, "grad_norm": 0.54296875, "learning_rate": 7.2916666666666674e-06, "loss": 1.1357, "step": 3855 }, { "epoch": 0.8547638509815875, "grad_norm": 0.5234375, "learning_rate": 7.280585106382979e-06, "loss": 1.0184, "step": 3856 }, { "epoch": 0.8549855221047673, "grad_norm": 0.50390625, "learning_rate": 7.2695035460992904e-06, "loss": 1.0386, "step": 3857 }, { "epoch": 0.8552071932279471, "grad_norm": 0.52734375, "learning_rate": 7.258421985815602e-06, "loss": 0.9852, "step": 3858 }, { "epoch": 0.8554288643511271, "grad_norm": 0.515625, "learning_rate": 7.247340425531915e-06, "loss": 0.97, "step": 3859 }, { "epoch": 0.8556505354743069, "grad_norm": 0.52734375, "learning_rate": 7.236258865248227e-06, "loss": 1.029, "step": 3860 }, { "epoch": 0.8558722065974868, "grad_norm": 0.53515625, "learning_rate": 7.225177304964539e-06, "loss": 1.0532, "step": 3861 }, { "epoch": 0.8560938777206667, "grad_norm": 0.5078125, "learning_rate": 7.214095744680852e-06, "loss": 0.9844, "step": 3862 }, { "epoch": 0.8563155488438465, "grad_norm": 0.55859375, "learning_rate": 7.203014184397164e-06, "loss": 1.1441, "step": 3863 }, { "epoch": 0.8565372199670265, "grad_norm": 0.5390625, "learning_rate": 7.191932624113476e-06, "loss": 1.0695, "step": 3864 }, { "epoch": 0.8567588910902063, "grad_norm": 0.55078125, "learning_rate": 7.180851063829788e-06, "loss": 1.1063, "step": 3865 }, { "epoch": 0.8569805622133861, "grad_norm": 0.51171875, "learning_rate": 7.1697695035460995e-06, "loss": 1.0451, "step": 3866 }, { "epoch": 0.8572022333365661, "grad_norm": 0.51953125, "learning_rate": 7.158687943262411e-06, "loss": 0.9942, "step": 3867 }, { "epoch": 0.8574239044597459, "grad_norm": 0.51953125, "learning_rate": 7.147606382978723e-06, "loss": 1.0308, "step": 3868 }, { "epoch": 0.8576455755829258, "grad_norm": 0.55078125, "learning_rate": 7.136524822695036e-06, "loss": 1.1455, "step": 3869 }, { "epoch": 0.8578672467061057, "grad_norm": 0.51953125, "learning_rate": 7.125443262411348e-06, "loss": 0.912, "step": 3870 }, { "epoch": 0.8580889178292855, "grad_norm": 0.5859375, "learning_rate": 7.11436170212766e-06, "loss": 1.0421, "step": 3871 }, { "epoch": 0.8583105889524654, "grad_norm": 0.55078125, "learning_rate": 7.103280141843973e-06, "loss": 1.0863, "step": 3872 }, { "epoch": 0.8585322600756453, "grad_norm": 0.5390625, "learning_rate": 7.092198581560285e-06, "loss": 1.0408, "step": 3873 }, { "epoch": 0.8587539311988251, "grad_norm": 0.58984375, "learning_rate": 7.0811170212765966e-06, "loss": 1.1442, "step": 3874 }, { "epoch": 0.858975602322005, "grad_norm": 0.53515625, "learning_rate": 7.070035460992908e-06, "loss": 1.0679, "step": 3875 }, { "epoch": 0.8591972734451849, "grad_norm": 0.53125, "learning_rate": 7.0589539007092196e-06, "loss": 1.1049, "step": 3876 }, { "epoch": 0.8594189445683648, "grad_norm": 0.53515625, "learning_rate": 7.047872340425532e-06, "loss": 1.0751, "step": 3877 }, { "epoch": 0.8596406156915446, "grad_norm": 0.51953125, "learning_rate": 7.036790780141844e-06, "loss": 1.0192, "step": 3878 }, { "epoch": 0.8598622868147245, "grad_norm": 0.5546875, "learning_rate": 7.025709219858156e-06, "loss": 1.1482, "step": 3879 }, { "epoch": 0.8600839579379044, "grad_norm": 0.494140625, "learning_rate": 7.014627659574469e-06, "loss": 0.976, "step": 3880 }, { "epoch": 0.8603056290610842, "grad_norm": 0.52734375, "learning_rate": 7.003546099290781e-06, "loss": 1.1071, "step": 3881 }, { "epoch": 0.8605273001842642, "grad_norm": 0.5546875, "learning_rate": 6.992464539007093e-06, "loss": 1.0835, "step": 3882 }, { "epoch": 0.860748971307444, "grad_norm": 0.52734375, "learning_rate": 6.981382978723404e-06, "loss": 0.9842, "step": 3883 }, { "epoch": 0.8609706424306238, "grad_norm": 0.56640625, "learning_rate": 6.970301418439716e-06, "loss": 1.0744, "step": 3884 }, { "epoch": 0.8611923135538038, "grad_norm": 0.546875, "learning_rate": 6.959219858156029e-06, "loss": 1.0416, "step": 3885 }, { "epoch": 0.8614139846769836, "grad_norm": 0.52734375, "learning_rate": 6.9481382978723405e-06, "loss": 1.0606, "step": 3886 }, { "epoch": 0.8616356558001634, "grad_norm": 0.53125, "learning_rate": 6.9370567375886524e-06, "loss": 1.0483, "step": 3887 }, { "epoch": 0.8618573269233434, "grad_norm": 0.51171875, "learning_rate": 6.925975177304965e-06, "loss": 0.9619, "step": 3888 }, { "epoch": 0.8620789980465232, "grad_norm": 0.5, "learning_rate": 6.914893617021277e-06, "loss": 0.9724, "step": 3889 }, { "epoch": 0.862300669169703, "grad_norm": 0.53515625, "learning_rate": 6.903812056737589e-06, "loss": 1.127, "step": 3890 }, { "epoch": 0.862522340292883, "grad_norm": 0.5390625, "learning_rate": 6.892730496453902e-06, "loss": 1.0509, "step": 3891 }, { "epoch": 0.8627440114160628, "grad_norm": 0.51953125, "learning_rate": 6.881648936170212e-06, "loss": 1.0427, "step": 3892 }, { "epoch": 0.8629656825392427, "grad_norm": 0.53515625, "learning_rate": 6.870567375886525e-06, "loss": 1.0084, "step": 3893 }, { "epoch": 0.8631873536624226, "grad_norm": 0.5390625, "learning_rate": 6.859485815602837e-06, "loss": 1.0132, "step": 3894 }, { "epoch": 0.8634090247856024, "grad_norm": 0.51953125, "learning_rate": 6.848404255319149e-06, "loss": 1.1117, "step": 3895 }, { "epoch": 0.8636306959087824, "grad_norm": 0.52734375, "learning_rate": 6.8373226950354615e-06, "loss": 1.0998, "step": 3896 }, { "epoch": 0.8638523670319622, "grad_norm": 0.53515625, "learning_rate": 6.826241134751773e-06, "loss": 1.0529, "step": 3897 }, { "epoch": 0.864074038155142, "grad_norm": 0.55078125, "learning_rate": 6.815159574468085e-06, "loss": 1.0597, "step": 3898 }, { "epoch": 0.864295709278322, "grad_norm": 0.51953125, "learning_rate": 6.804078014184398e-06, "loss": 1.0262, "step": 3899 }, { "epoch": 0.8645173804015018, "grad_norm": 0.53125, "learning_rate": 6.79299645390071e-06, "loss": 1.0431, "step": 3900 }, { "epoch": 0.8647390515246817, "grad_norm": 0.54296875, "learning_rate": 6.781914893617021e-06, "loss": 1.1592, "step": 3901 }, { "epoch": 0.8649607226478616, "grad_norm": 0.53515625, "learning_rate": 6.770833333333333e-06, "loss": 1.0309, "step": 3902 }, { "epoch": 0.8651823937710414, "grad_norm": 0.56640625, "learning_rate": 6.759751773049646e-06, "loss": 1.0845, "step": 3903 }, { "epoch": 0.8654040648942213, "grad_norm": 0.5390625, "learning_rate": 6.748670212765958e-06, "loss": 1.0519, "step": 3904 }, { "epoch": 0.8656257360174012, "grad_norm": 0.5390625, "learning_rate": 6.73758865248227e-06, "loss": 1.0626, "step": 3905 }, { "epoch": 0.8658474071405811, "grad_norm": 0.55078125, "learning_rate": 6.726507092198582e-06, "loss": 1.1068, "step": 3906 }, { "epoch": 0.8660690782637609, "grad_norm": 0.546875, "learning_rate": 6.715425531914894e-06, "loss": 1.0494, "step": 3907 }, { "epoch": 0.8662907493869408, "grad_norm": 0.53515625, "learning_rate": 6.704343971631206e-06, "loss": 1.0967, "step": 3908 }, { "epoch": 0.8665124205101207, "grad_norm": 0.5078125, "learning_rate": 6.693262411347519e-06, "loss": 1.0198, "step": 3909 }, { "epoch": 0.8667340916333005, "grad_norm": 0.56640625, "learning_rate": 6.682180851063829e-06, "loss": 1.0888, "step": 3910 }, { "epoch": 0.8669557627564805, "grad_norm": 0.51171875, "learning_rate": 6.671099290780142e-06, "loss": 1.0159, "step": 3911 }, { "epoch": 0.8671774338796603, "grad_norm": 0.53125, "learning_rate": 6.660017730496454e-06, "loss": 1.0703, "step": 3912 }, { "epoch": 0.8673991050028401, "grad_norm": 0.5390625, "learning_rate": 6.648936170212766e-06, "loss": 1.0169, "step": 3913 }, { "epoch": 0.8676207761260201, "grad_norm": 0.52734375, "learning_rate": 6.637854609929079e-06, "loss": 0.9706, "step": 3914 }, { "epoch": 0.8678424472491999, "grad_norm": 0.5390625, "learning_rate": 6.626773049645391e-06, "loss": 1.086, "step": 3915 }, { "epoch": 0.8680641183723797, "grad_norm": 0.53125, "learning_rate": 6.6156914893617025e-06, "loss": 1.0466, "step": 3916 }, { "epoch": 0.8682857894955597, "grad_norm": 0.54296875, "learning_rate": 6.604609929078015e-06, "loss": 1.0692, "step": 3917 }, { "epoch": 0.8685074606187395, "grad_norm": 0.5625, "learning_rate": 6.593528368794327e-06, "loss": 1.1467, "step": 3918 }, { "epoch": 0.8687291317419193, "grad_norm": 0.546875, "learning_rate": 6.582446808510638e-06, "loss": 1.0268, "step": 3919 }, { "epoch": 0.8689508028650993, "grad_norm": 0.51953125, "learning_rate": 6.57136524822695e-06, "loss": 1.0932, "step": 3920 }, { "epoch": 0.8691724739882791, "grad_norm": 0.52734375, "learning_rate": 6.560283687943262e-06, "loss": 1.0807, "step": 3921 }, { "epoch": 0.869394145111459, "grad_norm": 0.52734375, "learning_rate": 6.549202127659575e-06, "loss": 1.126, "step": 3922 }, { "epoch": 0.8696158162346389, "grad_norm": 0.55078125, "learning_rate": 6.538120567375887e-06, "loss": 1.1647, "step": 3923 }, { "epoch": 0.8698374873578187, "grad_norm": 0.5390625, "learning_rate": 6.527039007092199e-06, "loss": 1.1028, "step": 3924 }, { "epoch": 0.8700591584809986, "grad_norm": 0.51171875, "learning_rate": 6.5159574468085115e-06, "loss": 1.0553, "step": 3925 }, { "epoch": 0.8702808296041785, "grad_norm": 0.51953125, "learning_rate": 6.5048758865248235e-06, "loss": 1.0862, "step": 3926 }, { "epoch": 0.8705025007273584, "grad_norm": 0.53125, "learning_rate": 6.4937943262411345e-06, "loss": 1.0671, "step": 3927 }, { "epoch": 0.8707241718505383, "grad_norm": 0.52734375, "learning_rate": 6.4827127659574465e-06, "loss": 1.1149, "step": 3928 }, { "epoch": 0.8709458429737181, "grad_norm": 0.5234375, "learning_rate": 6.471631205673758e-06, "loss": 1.0807, "step": 3929 }, { "epoch": 0.871167514096898, "grad_norm": 0.515625, "learning_rate": 6.460549645390071e-06, "loss": 1.033, "step": 3930 }, { "epoch": 0.8713891852200779, "grad_norm": 0.54296875, "learning_rate": 6.449468085106383e-06, "loss": 1.0373, "step": 3931 }, { "epoch": 0.8716108563432577, "grad_norm": 0.5546875, "learning_rate": 6.438386524822695e-06, "loss": 1.1008, "step": 3932 }, { "epoch": 0.8718325274664376, "grad_norm": 0.515625, "learning_rate": 6.427304964539008e-06, "loss": 0.996, "step": 3933 }, { "epoch": 0.8720541985896175, "grad_norm": 0.52734375, "learning_rate": 6.41622340425532e-06, "loss": 1.0901, "step": 3934 }, { "epoch": 0.8722758697127974, "grad_norm": 0.5625, "learning_rate": 6.4051418439716325e-06, "loss": 1.0928, "step": 3935 }, { "epoch": 0.8724975408359772, "grad_norm": 0.54296875, "learning_rate": 6.394060283687943e-06, "loss": 1.0536, "step": 3936 }, { "epoch": 0.8727192119591571, "grad_norm": 0.52734375, "learning_rate": 6.3829787234042555e-06, "loss": 1.1691, "step": 3937 }, { "epoch": 0.872940883082337, "grad_norm": 0.53515625, "learning_rate": 6.3718971631205674e-06, "loss": 1.0605, "step": 3938 }, { "epoch": 0.8731625542055168, "grad_norm": 0.58984375, "learning_rate": 6.360815602836879e-06, "loss": 1.0854, "step": 3939 }, { "epoch": 0.8733842253286968, "grad_norm": 0.51953125, "learning_rate": 6.349734042553192e-06, "loss": 1.1151, "step": 3940 }, { "epoch": 0.8736058964518766, "grad_norm": 0.53515625, "learning_rate": 6.338652482269504e-06, "loss": 1.0609, "step": 3941 }, { "epoch": 0.8738275675750564, "grad_norm": 0.53125, "learning_rate": 6.327570921985816e-06, "loss": 1.0316, "step": 3942 }, { "epoch": 0.8740492386982364, "grad_norm": 0.5078125, "learning_rate": 6.316489361702129e-06, "loss": 1.0687, "step": 3943 }, { "epoch": 0.8742709098214162, "grad_norm": 0.55078125, "learning_rate": 6.305407801418441e-06, "loss": 1.0388, "step": 3944 }, { "epoch": 0.874492580944596, "grad_norm": 0.53125, "learning_rate": 6.294326241134752e-06, "loss": 1.0288, "step": 3945 }, { "epoch": 0.874714252067776, "grad_norm": 0.53125, "learning_rate": 6.283244680851064e-06, "loss": 1.0177, "step": 3946 }, { "epoch": 0.8749359231909558, "grad_norm": 0.53515625, "learning_rate": 6.272163120567376e-06, "loss": 1.0318, "step": 3947 }, { "epoch": 0.8751575943141356, "grad_norm": 0.53125, "learning_rate": 6.261081560283688e-06, "loss": 1.1776, "step": 3948 }, { "epoch": 0.8753792654373156, "grad_norm": 0.58203125, "learning_rate": 6.25e-06, "loss": 1.0392, "step": 3949 }, { "epoch": 0.8756009365604954, "grad_norm": 0.53515625, "learning_rate": 6.238918439716312e-06, "loss": 1.1303, "step": 3950 }, { "epoch": 0.8758226076836753, "grad_norm": 0.5546875, "learning_rate": 6.227836879432625e-06, "loss": 1.0884, "step": 3951 }, { "epoch": 0.8760442788068552, "grad_norm": 0.54296875, "learning_rate": 6.216755319148936e-06, "loss": 1.0803, "step": 3952 }, { "epoch": 0.876265949930035, "grad_norm": 0.54296875, "learning_rate": 6.205673758865249e-06, "loss": 1.083, "step": 3953 }, { "epoch": 0.8764876210532149, "grad_norm": 0.58203125, "learning_rate": 6.194592198581561e-06, "loss": 1.1509, "step": 3954 }, { "epoch": 0.8767092921763948, "grad_norm": 0.5390625, "learning_rate": 6.183510638297873e-06, "loss": 1.0857, "step": 3955 }, { "epoch": 0.8769309632995747, "grad_norm": 0.5703125, "learning_rate": 6.172429078014185e-06, "loss": 1.1083, "step": 3956 }, { "epoch": 0.8771526344227546, "grad_norm": 0.5859375, "learning_rate": 6.1613475177304966e-06, "loss": 1.0864, "step": 3957 }, { "epoch": 0.8773743055459344, "grad_norm": 0.51953125, "learning_rate": 6.1502659574468085e-06, "loss": 1.059, "step": 3958 }, { "epoch": 0.8775959766691143, "grad_norm": 0.53125, "learning_rate": 6.139184397163121e-06, "loss": 1.1206, "step": 3959 }, { "epoch": 0.8778176477922942, "grad_norm": 0.53125, "learning_rate": 6.128102836879433e-06, "loss": 1.0082, "step": 3960 }, { "epoch": 0.878039318915474, "grad_norm": 0.55078125, "learning_rate": 6.117021276595745e-06, "loss": 1.0716, "step": 3961 }, { "epoch": 0.8782609900386539, "grad_norm": 0.5234375, "learning_rate": 6.105939716312057e-06, "loss": 1.0603, "step": 3962 }, { "epoch": 0.8784826611618338, "grad_norm": 0.5390625, "learning_rate": 6.094858156028369e-06, "loss": 1.0686, "step": 3963 }, { "epoch": 0.8787043322850137, "grad_norm": 0.546875, "learning_rate": 6.083776595744682e-06, "loss": 1.1205, "step": 3964 }, { "epoch": 0.8789260034081935, "grad_norm": 0.53515625, "learning_rate": 6.072695035460993e-06, "loss": 1.0159, "step": 3965 }, { "epoch": 0.8791476745313734, "grad_norm": 0.515625, "learning_rate": 6.0616134751773056e-06, "loss": 1.0068, "step": 3966 }, { "epoch": 0.8793693456545533, "grad_norm": 0.53515625, "learning_rate": 6.0505319148936175e-06, "loss": 1.011, "step": 3967 }, { "epoch": 0.8795910167777331, "grad_norm": 0.5234375, "learning_rate": 6.0394503546099294e-06, "loss": 1.0569, "step": 3968 }, { "epoch": 0.879812687900913, "grad_norm": 0.51171875, "learning_rate": 6.028368794326241e-06, "loss": 0.9883, "step": 3969 }, { "epoch": 0.8800343590240929, "grad_norm": 0.51171875, "learning_rate": 6.017287234042553e-06, "loss": 1.0591, "step": 3970 }, { "epoch": 0.8802560301472727, "grad_norm": 0.50390625, "learning_rate": 6.006205673758865e-06, "loss": 1.043, "step": 3971 }, { "epoch": 0.8804777012704527, "grad_norm": 0.5078125, "learning_rate": 5.995124113475178e-06, "loss": 1.0236, "step": 3972 }, { "epoch": 0.8806993723936325, "grad_norm": 0.51953125, "learning_rate": 5.98404255319149e-06, "loss": 1.0314, "step": 3973 }, { "epoch": 0.8809210435168123, "grad_norm": 0.5390625, "learning_rate": 5.972960992907802e-06, "loss": 0.9985, "step": 3974 }, { "epoch": 0.8811427146399923, "grad_norm": 0.53125, "learning_rate": 5.961879432624114e-06, "loss": 1.062, "step": 3975 }, { "epoch": 0.8813643857631721, "grad_norm": 0.53515625, "learning_rate": 5.950797872340426e-06, "loss": 0.978, "step": 3976 }, { "epoch": 0.8815860568863519, "grad_norm": 0.53125, "learning_rate": 5.9397163120567384e-06, "loss": 1.0343, "step": 3977 }, { "epoch": 0.8818077280095319, "grad_norm": 0.5, "learning_rate": 5.9286347517730495e-06, "loss": 1.0003, "step": 3978 }, { "epoch": 0.8820293991327117, "grad_norm": 0.56640625, "learning_rate": 5.9175531914893615e-06, "loss": 1.1025, "step": 3979 }, { "epoch": 0.8822510702558916, "grad_norm": 0.546875, "learning_rate": 5.906471631205674e-06, "loss": 1.0927, "step": 3980 }, { "epoch": 0.8824727413790715, "grad_norm": 0.54296875, "learning_rate": 5.895390070921986e-06, "loss": 1.1231, "step": 3981 }, { "epoch": 0.8826944125022513, "grad_norm": 0.51953125, "learning_rate": 5.884308510638298e-06, "loss": 1.0181, "step": 3982 }, { "epoch": 0.8829160836254312, "grad_norm": 0.5234375, "learning_rate": 5.87322695035461e-06, "loss": 1.0111, "step": 3983 }, { "epoch": 0.8831377547486111, "grad_norm": 0.498046875, "learning_rate": 5.862145390070922e-06, "loss": 0.9956, "step": 3984 }, { "epoch": 0.883359425871791, "grad_norm": 0.5703125, "learning_rate": 5.851063829787235e-06, "loss": 1.1019, "step": 3985 }, { "epoch": 0.8835810969949708, "grad_norm": 0.546875, "learning_rate": 5.839982269503547e-06, "loss": 1.1021, "step": 3986 }, { "epoch": 0.8838027681181507, "grad_norm": 0.55078125, "learning_rate": 5.8289007092198586e-06, "loss": 1.0807, "step": 3987 }, { "epoch": 0.8840244392413306, "grad_norm": 0.5234375, "learning_rate": 5.8178191489361705e-06, "loss": 1.0161, "step": 3988 }, { "epoch": 0.8842461103645105, "grad_norm": 0.51171875, "learning_rate": 5.806737588652482e-06, "loss": 0.9921, "step": 3989 }, { "epoch": 0.8844677814876903, "grad_norm": 0.52734375, "learning_rate": 5.795656028368795e-06, "loss": 0.9902, "step": 3990 }, { "epoch": 0.8846894526108702, "grad_norm": 0.56640625, "learning_rate": 5.784574468085106e-06, "loss": 1.1233, "step": 3991 }, { "epoch": 0.8849111237340501, "grad_norm": 0.546875, "learning_rate": 5.773492907801418e-06, "loss": 1.0293, "step": 3992 }, { "epoch": 0.88513279485723, "grad_norm": 0.52734375, "learning_rate": 5.762411347517731e-06, "loss": 1.0118, "step": 3993 }, { "epoch": 0.8853544659804098, "grad_norm": 0.51171875, "learning_rate": 5.751329787234043e-06, "loss": 1.0708, "step": 3994 }, { "epoch": 0.8855761371035897, "grad_norm": 0.5078125, "learning_rate": 5.740248226950355e-06, "loss": 1.0268, "step": 3995 }, { "epoch": 0.8857978082267696, "grad_norm": 0.57421875, "learning_rate": 5.729166666666667e-06, "loss": 1.0872, "step": 3996 }, { "epoch": 0.8860194793499494, "grad_norm": 0.5546875, "learning_rate": 5.718085106382979e-06, "loss": 1.0616, "step": 3997 }, { "epoch": 0.8862411504731293, "grad_norm": 0.5390625, "learning_rate": 5.7070035460992914e-06, "loss": 1.1493, "step": 3998 }, { "epoch": 0.8864628215963092, "grad_norm": 0.55859375, "learning_rate": 5.695921985815603e-06, "loss": 1.1266, "step": 3999 }, { "epoch": 0.886684492719489, "grad_norm": 0.5546875, "learning_rate": 5.684840425531915e-06, "loss": 1.1644, "step": 4000 }, { "epoch": 0.886906163842669, "grad_norm": 0.51953125, "learning_rate": 5.673758865248227e-06, "loss": 1.0611, "step": 4001 }, { "epoch": 0.8871278349658488, "grad_norm": 0.53125, "learning_rate": 5.662677304964539e-06, "loss": 1.0565, "step": 4002 }, { "epoch": 0.8873495060890286, "grad_norm": 0.5390625, "learning_rate": 5.651595744680852e-06, "loss": 1.0956, "step": 4003 }, { "epoch": 0.8875711772122086, "grad_norm": 0.5703125, "learning_rate": 5.640514184397164e-06, "loss": 1.0548, "step": 4004 }, { "epoch": 0.8877928483353884, "grad_norm": 0.52734375, "learning_rate": 5.629432624113475e-06, "loss": 1.0155, "step": 4005 }, { "epoch": 0.8880145194585682, "grad_norm": 0.5234375, "learning_rate": 5.618351063829788e-06, "loss": 1.0428, "step": 4006 }, { "epoch": 0.8882361905817482, "grad_norm": 0.5390625, "learning_rate": 5.6072695035461e-06, "loss": 1.0466, "step": 4007 }, { "epoch": 0.888457861704928, "grad_norm": 0.57421875, "learning_rate": 5.5961879432624115e-06, "loss": 1.1041, "step": 4008 }, { "epoch": 0.8886795328281079, "grad_norm": 0.546875, "learning_rate": 5.5851063829787235e-06, "loss": 1.1509, "step": 4009 }, { "epoch": 0.8889012039512878, "grad_norm": 0.51171875, "learning_rate": 5.574024822695035e-06, "loss": 1.0892, "step": 4010 }, { "epoch": 0.8891228750744676, "grad_norm": 0.53125, "learning_rate": 5.562943262411348e-06, "loss": 1.0702, "step": 4011 }, { "epoch": 0.8893445461976475, "grad_norm": 0.5390625, "learning_rate": 5.55186170212766e-06, "loss": 1.0705, "step": 4012 }, { "epoch": 0.8895662173208274, "grad_norm": 0.54296875, "learning_rate": 5.540780141843971e-06, "loss": 1.0159, "step": 4013 }, { "epoch": 0.8897878884440072, "grad_norm": 0.515625, "learning_rate": 5.529698581560284e-06, "loss": 1.0626, "step": 4014 }, { "epoch": 0.8900095595671871, "grad_norm": 0.5390625, "learning_rate": 5.518617021276596e-06, "loss": 1.0627, "step": 4015 }, { "epoch": 0.890231230690367, "grad_norm": 0.55078125, "learning_rate": 5.507535460992909e-06, "loss": 1.0857, "step": 4016 }, { "epoch": 0.8904529018135469, "grad_norm": 0.58203125, "learning_rate": 5.4964539007092206e-06, "loss": 1.0937, "step": 4017 }, { "epoch": 0.8906745729367267, "grad_norm": 0.5234375, "learning_rate": 5.485372340425532e-06, "loss": 1.1083, "step": 4018 }, { "epoch": 0.8908962440599066, "grad_norm": 0.53515625, "learning_rate": 5.474290780141844e-06, "loss": 1.077, "step": 4019 }, { "epoch": 0.8911179151830865, "grad_norm": 0.52734375, "learning_rate": 5.463209219858156e-06, "loss": 1.044, "step": 4020 }, { "epoch": 0.8913395863062664, "grad_norm": 0.53515625, "learning_rate": 5.452127659574468e-06, "loss": 1.0431, "step": 4021 }, { "epoch": 0.8915612574294463, "grad_norm": 0.53515625, "learning_rate": 5.44104609929078e-06, "loss": 1.0662, "step": 4022 }, { "epoch": 0.8917829285526261, "grad_norm": 0.5234375, "learning_rate": 5.429964539007092e-06, "loss": 0.9689, "step": 4023 }, { "epoch": 0.892004599675806, "grad_norm": 0.58203125, "learning_rate": 5.418882978723405e-06, "loss": 1.0703, "step": 4024 }, { "epoch": 0.8922262707989859, "grad_norm": 0.5, "learning_rate": 5.407801418439717e-06, "loss": 0.9798, "step": 4025 }, { "epoch": 0.8924479419221657, "grad_norm": 0.5234375, "learning_rate": 5.396719858156029e-06, "loss": 1.0288, "step": 4026 }, { "epoch": 0.8926696130453456, "grad_norm": 0.54296875, "learning_rate": 5.385638297872341e-06, "loss": 1.0136, "step": 4027 }, { "epoch": 0.8928912841685255, "grad_norm": 0.57421875, "learning_rate": 5.374556737588653e-06, "loss": 1.1633, "step": 4028 }, { "epoch": 0.8931129552917053, "grad_norm": 0.51953125, "learning_rate": 5.3634751773049645e-06, "loss": 0.9176, "step": 4029 }, { "epoch": 0.8933346264148853, "grad_norm": 0.55078125, "learning_rate": 5.352393617021277e-06, "loss": 1.0195, "step": 4030 }, { "epoch": 0.8935562975380651, "grad_norm": 0.50390625, "learning_rate": 5.341312056737588e-06, "loss": 0.9962, "step": 4031 }, { "epoch": 0.8937779686612449, "grad_norm": 0.52734375, "learning_rate": 5.330230496453901e-06, "loss": 1.084, "step": 4032 }, { "epoch": 0.8939996397844249, "grad_norm": 0.55859375, "learning_rate": 5.319148936170213e-06, "loss": 1.1262, "step": 4033 }, { "epoch": 0.8942213109076047, "grad_norm": 0.5234375, "learning_rate": 5.308067375886525e-06, "loss": 1.0317, "step": 4034 }, { "epoch": 0.8944429820307845, "grad_norm": 0.51171875, "learning_rate": 5.296985815602837e-06, "loss": 1.0554, "step": 4035 }, { "epoch": 0.8946646531539645, "grad_norm": 0.55078125, "learning_rate": 5.285904255319149e-06, "loss": 1.0929, "step": 4036 }, { "epoch": 0.8948863242771443, "grad_norm": 0.5703125, "learning_rate": 5.274822695035462e-06, "loss": 1.08, "step": 4037 }, { "epoch": 0.8951079954003242, "grad_norm": 0.58203125, "learning_rate": 5.2637411347517735e-06, "loss": 1.0986, "step": 4038 }, { "epoch": 0.8953296665235041, "grad_norm": 0.55078125, "learning_rate": 5.2526595744680855e-06, "loss": 1.081, "step": 4039 }, { "epoch": 0.8955513376466839, "grad_norm": 0.54296875, "learning_rate": 5.241578014184397e-06, "loss": 1.0123, "step": 4040 }, { "epoch": 0.8957730087698638, "grad_norm": 0.52734375, "learning_rate": 5.230496453900709e-06, "loss": 1.0333, "step": 4041 }, { "epoch": 0.8959946798930437, "grad_norm": 0.54296875, "learning_rate": 5.219414893617021e-06, "loss": 1.0636, "step": 4042 }, { "epoch": 0.8962163510162235, "grad_norm": 0.5390625, "learning_rate": 5.208333333333334e-06, "loss": 1.0371, "step": 4043 }, { "epoch": 0.8964380221394034, "grad_norm": 0.54296875, "learning_rate": 5.197251773049645e-06, "loss": 1.1199, "step": 4044 }, { "epoch": 0.8966596932625833, "grad_norm": 0.5390625, "learning_rate": 5.186170212765958e-06, "loss": 1.1032, "step": 4045 }, { "epoch": 0.8968813643857632, "grad_norm": 0.53125, "learning_rate": 5.17508865248227e-06, "loss": 1.0578, "step": 4046 }, { "epoch": 0.897103035508943, "grad_norm": 0.546875, "learning_rate": 5.164007092198582e-06, "loss": 1.0748, "step": 4047 }, { "epoch": 0.8973247066321229, "grad_norm": 0.53125, "learning_rate": 5.1529255319148945e-06, "loss": 1.0368, "step": 4048 }, { "epoch": 0.8975463777553028, "grad_norm": 0.52734375, "learning_rate": 5.1418439716312056e-06, "loss": 1.0363, "step": 4049 }, { "epoch": 0.8977680488784827, "grad_norm": 0.54296875, "learning_rate": 5.130762411347518e-06, "loss": 1.0098, "step": 4050 }, { "epoch": 0.8979897200016626, "grad_norm": 0.5078125, "learning_rate": 5.11968085106383e-06, "loss": 1.0122, "step": 4051 }, { "epoch": 0.8982113911248424, "grad_norm": 0.51953125, "learning_rate": 5.108599290780142e-06, "loss": 1.1204, "step": 4052 }, { "epoch": 0.8984330622480223, "grad_norm": 0.55859375, "learning_rate": 5.097517730496454e-06, "loss": 1.1359, "step": 4053 }, { "epoch": 0.8986547333712022, "grad_norm": 0.55078125, "learning_rate": 5.086436170212766e-06, "loss": 1.0489, "step": 4054 }, { "epoch": 0.898876404494382, "grad_norm": 0.5234375, "learning_rate": 5.075354609929078e-06, "loss": 0.9773, "step": 4055 }, { "epoch": 0.8990980756175619, "grad_norm": 0.54296875, "learning_rate": 5.064273049645391e-06, "loss": 1.0971, "step": 4056 }, { "epoch": 0.8993197467407418, "grad_norm": 0.54296875, "learning_rate": 5.053191489361702e-06, "loss": 1.0095, "step": 4057 }, { "epoch": 0.8995414178639216, "grad_norm": 0.54296875, "learning_rate": 5.042109929078015e-06, "loss": 1.0625, "step": 4058 }, { "epoch": 0.8997630889871016, "grad_norm": 0.5234375, "learning_rate": 5.0310283687943265e-06, "loss": 1.0253, "step": 4059 }, { "epoch": 0.8999847601102814, "grad_norm": 0.54296875, "learning_rate": 5.0199468085106384e-06, "loss": 1.1332, "step": 4060 }, { "epoch": 0.9002064312334612, "grad_norm": 0.515625, "learning_rate": 5.008865248226951e-06, "loss": 1.0018, "step": 4061 }, { "epoch": 0.9004281023566412, "grad_norm": 0.5390625, "learning_rate": 4.997783687943262e-06, "loss": 1.0262, "step": 4062 }, { "epoch": 0.900649773479821, "grad_norm": 0.53515625, "learning_rate": 4.986702127659574e-06, "loss": 1.0449, "step": 4063 }, { "epoch": 0.9008714446030008, "grad_norm": 0.53125, "learning_rate": 4.975620567375887e-06, "loss": 1.0393, "step": 4064 }, { "epoch": 0.9010931157261808, "grad_norm": 0.5234375, "learning_rate": 4.964539007092199e-06, "loss": 1.0982, "step": 4065 }, { "epoch": 0.9013147868493606, "grad_norm": 0.5234375, "learning_rate": 4.953457446808511e-06, "loss": 1.0778, "step": 4066 }, { "epoch": 0.9015364579725405, "grad_norm": 0.490234375, "learning_rate": 4.942375886524823e-06, "loss": 1.0302, "step": 4067 }, { "epoch": 0.9017581290957204, "grad_norm": 0.51953125, "learning_rate": 4.931294326241135e-06, "loss": 1.015, "step": 4068 }, { "epoch": 0.9019798002189002, "grad_norm": 0.5703125, "learning_rate": 4.9202127659574475e-06, "loss": 1.102, "step": 4069 }, { "epoch": 0.9022014713420801, "grad_norm": 0.5234375, "learning_rate": 4.909131205673759e-06, "loss": 1.0794, "step": 4070 }, { "epoch": 0.90242314246526, "grad_norm": 0.546875, "learning_rate": 4.898049645390071e-06, "loss": 1.1394, "step": 4071 }, { "epoch": 0.9026448135884398, "grad_norm": 0.53515625, "learning_rate": 4.886968085106383e-06, "loss": 0.9597, "step": 4072 }, { "epoch": 0.9028664847116197, "grad_norm": 0.53515625, "learning_rate": 4.875886524822695e-06, "loss": 1.0352, "step": 4073 }, { "epoch": 0.9030881558347996, "grad_norm": 0.52734375, "learning_rate": 4.864804964539008e-06, "loss": 1.04, "step": 4074 }, { "epoch": 0.9033098269579795, "grad_norm": 0.53515625, "learning_rate": 4.853723404255319e-06, "loss": 1.0522, "step": 4075 }, { "epoch": 0.9035314980811593, "grad_norm": 0.55859375, "learning_rate": 4.842641843971631e-06, "loss": 1.1555, "step": 4076 }, { "epoch": 0.9037531692043392, "grad_norm": 0.5234375, "learning_rate": 4.831560283687944e-06, "loss": 0.9696, "step": 4077 }, { "epoch": 0.9039748403275191, "grad_norm": 0.52734375, "learning_rate": 4.820478723404256e-06, "loss": 1.0786, "step": 4078 }, { "epoch": 0.9041965114506989, "grad_norm": 0.52734375, "learning_rate": 4.8093971631205676e-06, "loss": 0.9996, "step": 4079 }, { "epoch": 0.9044181825738788, "grad_norm": 0.5390625, "learning_rate": 4.7983156028368795e-06, "loss": 1.133, "step": 4080 }, { "epoch": 0.9046398536970587, "grad_norm": 0.5234375, "learning_rate": 4.787234042553191e-06, "loss": 1.0362, "step": 4081 }, { "epoch": 0.9048615248202386, "grad_norm": 0.5, "learning_rate": 4.776152482269504e-06, "loss": 1.019, "step": 4082 }, { "epoch": 0.9050831959434185, "grad_norm": 0.52734375, "learning_rate": 4.765070921985816e-06, "loss": 1.0364, "step": 4083 }, { "epoch": 0.9053048670665983, "grad_norm": 0.51953125, "learning_rate": 4.753989361702128e-06, "loss": 1.0505, "step": 4084 }, { "epoch": 0.9055265381897782, "grad_norm": 0.53515625, "learning_rate": 4.74290780141844e-06, "loss": 1.0516, "step": 4085 }, { "epoch": 0.9057482093129581, "grad_norm": 0.5546875, "learning_rate": 4.731826241134752e-06, "loss": 1.0952, "step": 4086 }, { "epoch": 0.9059698804361379, "grad_norm": 0.5390625, "learning_rate": 4.720744680851065e-06, "loss": 1.0561, "step": 4087 }, { "epoch": 0.9061915515593179, "grad_norm": 0.53125, "learning_rate": 4.709663120567376e-06, "loss": 1.029, "step": 4088 }, { "epoch": 0.9064132226824977, "grad_norm": 0.53515625, "learning_rate": 4.698581560283688e-06, "loss": 1.0467, "step": 4089 }, { "epoch": 0.9066348938056775, "grad_norm": 0.546875, "learning_rate": 4.6875000000000004e-06, "loss": 1.0881, "step": 4090 }, { "epoch": 0.9068565649288575, "grad_norm": 0.52734375, "learning_rate": 4.676418439716312e-06, "loss": 1.0394, "step": 4091 }, { "epoch": 0.9070782360520373, "grad_norm": 0.515625, "learning_rate": 4.665336879432624e-06, "loss": 0.9915, "step": 4092 }, { "epoch": 0.9072999071752171, "grad_norm": 0.54296875, "learning_rate": 4.654255319148936e-06, "loss": 1.0396, "step": 4093 }, { "epoch": 0.9075215782983971, "grad_norm": 0.5234375, "learning_rate": 4.643173758865248e-06, "loss": 1.0946, "step": 4094 }, { "epoch": 0.9077432494215769, "grad_norm": 0.55078125, "learning_rate": 4.632092198581561e-06, "loss": 0.9812, "step": 4095 }, { "epoch": 0.9079649205447567, "grad_norm": 0.55078125, "learning_rate": 4.621010638297873e-06, "loss": 1.0718, "step": 4096 }, { "epoch": 0.9081865916679367, "grad_norm": 0.5234375, "learning_rate": 4.609929078014184e-06, "loss": 1.0174, "step": 4097 }, { "epoch": 0.9084082627911165, "grad_norm": 0.53125, "learning_rate": 4.598847517730497e-06, "loss": 1.0643, "step": 4098 }, { "epoch": 0.9086299339142964, "grad_norm": 0.53125, "learning_rate": 4.587765957446809e-06, "loss": 1.0825, "step": 4099 }, { "epoch": 0.9088516050374763, "grad_norm": 0.5078125, "learning_rate": 4.576684397163121e-06, "loss": 1.0266, "step": 4100 }, { "epoch": 0.9090732761606561, "grad_norm": 0.51953125, "learning_rate": 4.565602836879433e-06, "loss": 1.0741, "step": 4101 }, { "epoch": 0.909294947283836, "grad_norm": 0.51171875, "learning_rate": 4.554521276595744e-06, "loss": 1.0376, "step": 4102 }, { "epoch": 0.9095166184070159, "grad_norm": 0.55859375, "learning_rate": 4.543439716312057e-06, "loss": 1.0397, "step": 4103 }, { "epoch": 0.9097382895301958, "grad_norm": 0.52734375, "learning_rate": 4.532358156028369e-06, "loss": 1.0647, "step": 4104 }, { "epoch": 0.9099599606533756, "grad_norm": 0.5625, "learning_rate": 4.521276595744681e-06, "loss": 1.075, "step": 4105 }, { "epoch": 0.9101816317765555, "grad_norm": 0.5390625, "learning_rate": 4.510195035460993e-06, "loss": 1.0195, "step": 4106 }, { "epoch": 0.9104033028997354, "grad_norm": 0.54296875, "learning_rate": 4.499113475177305e-06, "loss": 1.1034, "step": 4107 }, { "epoch": 0.9106249740229152, "grad_norm": 0.5390625, "learning_rate": 4.488031914893618e-06, "loss": 1.1039, "step": 4108 }, { "epoch": 0.9108466451460951, "grad_norm": 0.53125, "learning_rate": 4.4769503546099296e-06, "loss": 1.0483, "step": 4109 }, { "epoch": 0.911068316269275, "grad_norm": 0.55859375, "learning_rate": 4.465868794326241e-06, "loss": 1.0837, "step": 4110 }, { "epoch": 0.9112899873924548, "grad_norm": 0.54296875, "learning_rate": 4.454787234042553e-06, "loss": 1.0826, "step": 4111 }, { "epoch": 0.9115116585156348, "grad_norm": 0.515625, "learning_rate": 4.443705673758865e-06, "loss": 0.9726, "step": 4112 }, { "epoch": 0.9117333296388146, "grad_norm": 0.51953125, "learning_rate": 4.432624113475177e-06, "loss": 1.1107, "step": 4113 }, { "epoch": 0.9119550007619945, "grad_norm": 0.5390625, "learning_rate": 4.42154255319149e-06, "loss": 1.0623, "step": 4114 }, { "epoch": 0.9121766718851744, "grad_norm": 0.5234375, "learning_rate": 4.410460992907801e-06, "loss": 1.0353, "step": 4115 }, { "epoch": 0.9123983430083542, "grad_norm": 0.5546875, "learning_rate": 4.399379432624114e-06, "loss": 1.1029, "step": 4116 }, { "epoch": 0.9126200141315342, "grad_norm": 0.55078125, "learning_rate": 4.388297872340426e-06, "loss": 1.0932, "step": 4117 }, { "epoch": 0.912841685254714, "grad_norm": 0.52734375, "learning_rate": 4.377216312056738e-06, "loss": 1.0248, "step": 4118 }, { "epoch": 0.9130633563778938, "grad_norm": 0.53125, "learning_rate": 4.36613475177305e-06, "loss": 1.049, "step": 4119 }, { "epoch": 0.9132850275010738, "grad_norm": 0.51953125, "learning_rate": 4.355053191489362e-06, "loss": 1.0123, "step": 4120 }, { "epoch": 0.9135066986242536, "grad_norm": 0.5390625, "learning_rate": 4.343971631205674e-06, "loss": 1.0665, "step": 4121 }, { "epoch": 0.9137283697474334, "grad_norm": 0.55078125, "learning_rate": 4.332890070921986e-06, "loss": 1.14, "step": 4122 }, { "epoch": 0.9139500408706134, "grad_norm": 0.57421875, "learning_rate": 4.321808510638298e-06, "loss": 1.1105, "step": 4123 }, { "epoch": 0.9141717119937932, "grad_norm": 0.5234375, "learning_rate": 4.31072695035461e-06, "loss": 1.0229, "step": 4124 }, { "epoch": 0.914393383116973, "grad_norm": 0.5078125, "learning_rate": 4.299645390070922e-06, "loss": 1.0079, "step": 4125 }, { "epoch": 0.914615054240153, "grad_norm": 0.5390625, "learning_rate": 4.288563829787234e-06, "loss": 1.0, "step": 4126 }, { "epoch": 0.9148367253633328, "grad_norm": 0.5234375, "learning_rate": 4.277482269503547e-06, "loss": 0.9966, "step": 4127 }, { "epoch": 0.9150583964865127, "grad_norm": 0.5390625, "learning_rate": 4.266400709219858e-06, "loss": 1.0466, "step": 4128 }, { "epoch": 0.9152800676096926, "grad_norm": 0.5390625, "learning_rate": 4.255319148936171e-06, "loss": 1.1246, "step": 4129 }, { "epoch": 0.9155017387328724, "grad_norm": 0.52734375, "learning_rate": 4.2442375886524825e-06, "loss": 1.0089, "step": 4130 }, { "epoch": 0.9157234098560523, "grad_norm": 0.5234375, "learning_rate": 4.2331560283687945e-06, "loss": 1.1047, "step": 4131 }, { "epoch": 0.9159450809792322, "grad_norm": 0.52734375, "learning_rate": 4.222074468085106e-06, "loss": 1.0518, "step": 4132 }, { "epoch": 0.916166752102412, "grad_norm": 0.53125, "learning_rate": 4.210992907801418e-06, "loss": 1.0827, "step": 4133 }, { "epoch": 0.9163884232255919, "grad_norm": 0.54296875, "learning_rate": 4.199911347517731e-06, "loss": 1.0355, "step": 4134 }, { "epoch": 0.9166100943487718, "grad_norm": 0.515625, "learning_rate": 4.188829787234043e-06, "loss": 1.033, "step": 4135 }, { "epoch": 0.9168317654719517, "grad_norm": 0.5390625, "learning_rate": 4.177748226950355e-06, "loss": 1.0185, "step": 4136 }, { "epoch": 0.9170534365951315, "grad_norm": 0.5234375, "learning_rate": 4.166666666666667e-06, "loss": 1.0345, "step": 4137 }, { "epoch": 0.9172751077183114, "grad_norm": 0.54296875, "learning_rate": 4.155585106382979e-06, "loss": 1.0552, "step": 4138 }, { "epoch": 0.9174967788414913, "grad_norm": 0.54296875, "learning_rate": 4.144503546099291e-06, "loss": 1.0325, "step": 4139 }, { "epoch": 0.9177184499646711, "grad_norm": 0.55078125, "learning_rate": 4.1334219858156035e-06, "loss": 1.0571, "step": 4140 }, { "epoch": 0.9179401210878511, "grad_norm": 0.54296875, "learning_rate": 4.1223404255319146e-06, "loss": 1.0801, "step": 4141 }, { "epoch": 0.9181617922110309, "grad_norm": 0.56640625, "learning_rate": 4.111258865248227e-06, "loss": 1.0389, "step": 4142 }, { "epoch": 0.9183834633342108, "grad_norm": 0.5390625, "learning_rate": 4.100177304964539e-06, "loss": 1.0598, "step": 4143 }, { "epoch": 0.9186051344573907, "grad_norm": 0.54296875, "learning_rate": 4.089095744680851e-06, "loss": 1.0978, "step": 4144 }, { "epoch": 0.9188268055805705, "grad_norm": 0.53515625, "learning_rate": 4.078014184397164e-06, "loss": 1.1343, "step": 4145 }, { "epoch": 0.9190484767037504, "grad_norm": 0.53515625, "learning_rate": 4.066932624113475e-06, "loss": 1.029, "step": 4146 }, { "epoch": 0.9192701478269303, "grad_norm": 0.55078125, "learning_rate": 4.055851063829787e-06, "loss": 1.1317, "step": 4147 }, { "epoch": 0.9194918189501101, "grad_norm": 0.498046875, "learning_rate": 4.0447695035461e-06, "loss": 1.0049, "step": 4148 }, { "epoch": 0.9197134900732901, "grad_norm": 0.59375, "learning_rate": 4.033687943262412e-06, "loss": 1.0435, "step": 4149 }, { "epoch": 0.9199351611964699, "grad_norm": 0.53125, "learning_rate": 4.022606382978724e-06, "loss": 1.0193, "step": 4150 }, { "epoch": 0.9201568323196497, "grad_norm": 0.54296875, "learning_rate": 4.0115248226950355e-06, "loss": 1.0566, "step": 4151 }, { "epoch": 0.9203785034428297, "grad_norm": 0.52734375, "learning_rate": 4.0004432624113474e-06, "loss": 1.1296, "step": 4152 }, { "epoch": 0.9206001745660095, "grad_norm": 0.5, "learning_rate": 3.98936170212766e-06, "loss": 1.0564, "step": 4153 }, { "epoch": 0.9208218456891893, "grad_norm": 0.5546875, "learning_rate": 3.978280141843971e-06, "loss": 1.0458, "step": 4154 }, { "epoch": 0.9210435168123693, "grad_norm": 0.55859375, "learning_rate": 3.967198581560284e-06, "loss": 0.9869, "step": 4155 }, { "epoch": 0.9212651879355491, "grad_norm": 0.53515625, "learning_rate": 3.956117021276596e-06, "loss": 1.0914, "step": 4156 }, { "epoch": 0.921486859058729, "grad_norm": 0.515625, "learning_rate": 3.945035460992908e-06, "loss": 1.071, "step": 4157 }, { "epoch": 0.9217085301819089, "grad_norm": 0.55078125, "learning_rate": 3.933953900709221e-06, "loss": 1.1355, "step": 4158 }, { "epoch": 0.9219302013050887, "grad_norm": 0.53125, "learning_rate": 3.922872340425532e-06, "loss": 1.045, "step": 4159 }, { "epoch": 0.9221518724282686, "grad_norm": 0.54296875, "learning_rate": 3.911790780141844e-06, "loss": 1.0915, "step": 4160 }, { "epoch": 0.9223735435514485, "grad_norm": 0.5390625, "learning_rate": 3.9007092198581565e-06, "loss": 1.043, "step": 4161 }, { "epoch": 0.9225952146746283, "grad_norm": 0.53125, "learning_rate": 3.889627659574468e-06, "loss": 1.0785, "step": 4162 }, { "epoch": 0.9228168857978082, "grad_norm": 0.55078125, "learning_rate": 3.87854609929078e-06, "loss": 1.0934, "step": 4163 }, { "epoch": 0.9230385569209881, "grad_norm": 0.53125, "learning_rate": 3.867464539007092e-06, "loss": 1.0329, "step": 4164 }, { "epoch": 0.923260228044168, "grad_norm": 0.53515625, "learning_rate": 3.856382978723404e-06, "loss": 1.0649, "step": 4165 }, { "epoch": 0.9234818991673478, "grad_norm": 0.5390625, "learning_rate": 3.845301418439717e-06, "loss": 1.1184, "step": 4166 }, { "epoch": 0.9237035702905277, "grad_norm": 0.53125, "learning_rate": 3.834219858156029e-06, "loss": 1.0437, "step": 4167 }, { "epoch": 0.9239252414137076, "grad_norm": 0.5390625, "learning_rate": 3.823138297872341e-06, "loss": 1.0755, "step": 4168 }, { "epoch": 0.9241469125368874, "grad_norm": 0.53515625, "learning_rate": 3.8120567375886527e-06, "loss": 0.9985, "step": 4169 }, { "epoch": 0.9243685836600674, "grad_norm": 0.53515625, "learning_rate": 3.8009751773049646e-06, "loss": 1.0667, "step": 4170 }, { "epoch": 0.9245902547832472, "grad_norm": 0.55078125, "learning_rate": 3.789893617021277e-06, "loss": 1.0499, "step": 4171 }, { "epoch": 0.924811925906427, "grad_norm": 0.53125, "learning_rate": 3.7788120567375885e-06, "loss": 1.0388, "step": 4172 }, { "epoch": 0.925033597029607, "grad_norm": 0.57421875, "learning_rate": 3.767730496453901e-06, "loss": 1.0378, "step": 4173 }, { "epoch": 0.9252552681527868, "grad_norm": 0.6171875, "learning_rate": 3.756648936170213e-06, "loss": 1.138, "step": 4174 }, { "epoch": 0.9254769392759667, "grad_norm": 0.53125, "learning_rate": 3.745567375886525e-06, "loss": 0.9989, "step": 4175 }, { "epoch": 0.9256986103991466, "grad_norm": 0.515625, "learning_rate": 3.7344858156028366e-06, "loss": 1.0191, "step": 4176 }, { "epoch": 0.9259202815223264, "grad_norm": 0.55859375, "learning_rate": 3.723404255319149e-06, "loss": 1.0944, "step": 4177 }, { "epoch": 0.9261419526455064, "grad_norm": 0.5390625, "learning_rate": 3.7123226950354613e-06, "loss": 1.0403, "step": 4178 }, { "epoch": 0.9263636237686862, "grad_norm": 0.55078125, "learning_rate": 3.7012411347517732e-06, "loss": 1.0829, "step": 4179 }, { "epoch": 0.926585294891866, "grad_norm": 0.51171875, "learning_rate": 3.6901595744680856e-06, "loss": 1.0367, "step": 4180 }, { "epoch": 0.926806966015046, "grad_norm": 0.5234375, "learning_rate": 3.679078014184397e-06, "loss": 1.0605, "step": 4181 }, { "epoch": 0.9270286371382258, "grad_norm": 0.50390625, "learning_rate": 3.6679964539007094e-06, "loss": 1.0258, "step": 4182 }, { "epoch": 0.9272503082614056, "grad_norm": 0.54296875, "learning_rate": 3.6569148936170214e-06, "loss": 1.0455, "step": 4183 }, { "epoch": 0.9274719793845856, "grad_norm": 0.546875, "learning_rate": 3.6458333333333337e-06, "loss": 1.0805, "step": 4184 }, { "epoch": 0.9276936505077654, "grad_norm": 0.52734375, "learning_rate": 3.6347517730496452e-06, "loss": 1.016, "step": 4185 }, { "epoch": 0.9279153216309453, "grad_norm": 0.5390625, "learning_rate": 3.6236702127659576e-06, "loss": 1.1262, "step": 4186 }, { "epoch": 0.9281369927541252, "grad_norm": 0.5234375, "learning_rate": 3.6125886524822695e-06, "loss": 1.0326, "step": 4187 }, { "epoch": 0.928358663877305, "grad_norm": 0.546875, "learning_rate": 3.601507092198582e-06, "loss": 1.0729, "step": 4188 }, { "epoch": 0.9285803350004849, "grad_norm": 0.5625, "learning_rate": 3.590425531914894e-06, "loss": 1.1359, "step": 4189 }, { "epoch": 0.9288020061236648, "grad_norm": 0.5234375, "learning_rate": 3.5793439716312057e-06, "loss": 1.0427, "step": 4190 }, { "epoch": 0.9290236772468446, "grad_norm": 0.5703125, "learning_rate": 3.568262411347518e-06, "loss": 1.0517, "step": 4191 }, { "epoch": 0.9292453483700245, "grad_norm": 0.5234375, "learning_rate": 3.55718085106383e-06, "loss": 0.9972, "step": 4192 }, { "epoch": 0.9294670194932044, "grad_norm": 0.5390625, "learning_rate": 3.5460992907801423e-06, "loss": 1.0478, "step": 4193 }, { "epoch": 0.9296886906163843, "grad_norm": 0.5546875, "learning_rate": 3.535017730496454e-06, "loss": 1.086, "step": 4194 }, { "epoch": 0.9299103617395641, "grad_norm": 0.5703125, "learning_rate": 3.523936170212766e-06, "loss": 1.1176, "step": 4195 }, { "epoch": 0.930132032862744, "grad_norm": 0.5859375, "learning_rate": 3.512854609929078e-06, "loss": 1.1519, "step": 4196 }, { "epoch": 0.9303537039859239, "grad_norm": 0.52734375, "learning_rate": 3.5017730496453904e-06, "loss": 1.0474, "step": 4197 }, { "epoch": 0.9305753751091037, "grad_norm": 0.52734375, "learning_rate": 3.490691489361702e-06, "loss": 1.0631, "step": 4198 }, { "epoch": 0.9307970462322837, "grad_norm": 0.5390625, "learning_rate": 3.4796099290780143e-06, "loss": 1.0659, "step": 4199 }, { "epoch": 0.9310187173554635, "grad_norm": 0.5234375, "learning_rate": 3.4685283687943262e-06, "loss": 1.0669, "step": 4200 }, { "epoch": 0.9312403884786433, "grad_norm": 0.498046875, "learning_rate": 3.4574468085106386e-06, "loss": 1.0032, "step": 4201 }, { "epoch": 0.9314620596018233, "grad_norm": 0.55859375, "learning_rate": 3.446365248226951e-06, "loss": 1.1025, "step": 4202 }, { "epoch": 0.9316837307250031, "grad_norm": 0.546875, "learning_rate": 3.4352836879432624e-06, "loss": 1.0631, "step": 4203 }, { "epoch": 0.9319054018481829, "grad_norm": 0.5078125, "learning_rate": 3.4242021276595743e-06, "loss": 1.1055, "step": 4204 }, { "epoch": 0.9321270729713629, "grad_norm": 0.51953125, "learning_rate": 3.4131205673758867e-06, "loss": 1.0937, "step": 4205 }, { "epoch": 0.9323487440945427, "grad_norm": 0.53515625, "learning_rate": 3.402039007092199e-06, "loss": 1.0444, "step": 4206 }, { "epoch": 0.9325704152177227, "grad_norm": 0.54296875, "learning_rate": 3.3909574468085105e-06, "loss": 1.0653, "step": 4207 }, { "epoch": 0.9327920863409025, "grad_norm": 0.5546875, "learning_rate": 3.379875886524823e-06, "loss": 0.9634, "step": 4208 }, { "epoch": 0.9330137574640823, "grad_norm": 0.53125, "learning_rate": 3.368794326241135e-06, "loss": 1.0767, "step": 4209 }, { "epoch": 0.9332354285872623, "grad_norm": 0.51953125, "learning_rate": 3.357712765957447e-06, "loss": 1.0702, "step": 4210 }, { "epoch": 0.9334570997104421, "grad_norm": 0.53515625, "learning_rate": 3.3466312056737595e-06, "loss": 1.0383, "step": 4211 }, { "epoch": 0.9336787708336219, "grad_norm": 0.515625, "learning_rate": 3.335549645390071e-06, "loss": 1.007, "step": 4212 }, { "epoch": 0.9339004419568019, "grad_norm": 0.51953125, "learning_rate": 3.324468085106383e-06, "loss": 1.0344, "step": 4213 }, { "epoch": 0.9341221130799817, "grad_norm": 0.55078125, "learning_rate": 3.3133865248226953e-06, "loss": 1.0627, "step": 4214 }, { "epoch": 0.9343437842031616, "grad_norm": 0.57421875, "learning_rate": 3.3023049645390076e-06, "loss": 1.1128, "step": 4215 }, { "epoch": 0.9345654553263415, "grad_norm": 0.51953125, "learning_rate": 3.291223404255319e-06, "loss": 1.0355, "step": 4216 }, { "epoch": 0.9347871264495213, "grad_norm": 0.52734375, "learning_rate": 3.280141843971631e-06, "loss": 0.9912, "step": 4217 }, { "epoch": 0.9350087975727012, "grad_norm": 0.53125, "learning_rate": 3.2690602836879434e-06, "loss": 1.0152, "step": 4218 }, { "epoch": 0.9352304686958811, "grad_norm": 0.546875, "learning_rate": 3.2579787234042558e-06, "loss": 0.9659, "step": 4219 }, { "epoch": 0.935452139819061, "grad_norm": 0.55859375, "learning_rate": 3.2468971631205673e-06, "loss": 1.0803, "step": 4220 }, { "epoch": 0.9356738109422408, "grad_norm": 0.5625, "learning_rate": 3.235815602836879e-06, "loss": 1.0589, "step": 4221 }, { "epoch": 0.9358954820654207, "grad_norm": 0.55859375, "learning_rate": 3.2247340425531915e-06, "loss": 1.128, "step": 4222 }, { "epoch": 0.9361171531886006, "grad_norm": 0.52734375, "learning_rate": 3.213652482269504e-06, "loss": 1.0023, "step": 4223 }, { "epoch": 0.9363388243117804, "grad_norm": 0.53515625, "learning_rate": 3.2025709219858162e-06, "loss": 1.0541, "step": 4224 }, { "epoch": 0.9365604954349603, "grad_norm": 0.546875, "learning_rate": 3.1914893617021277e-06, "loss": 1.0788, "step": 4225 }, { "epoch": 0.9367821665581402, "grad_norm": 0.53125, "learning_rate": 3.1804078014184397e-06, "loss": 1.0574, "step": 4226 }, { "epoch": 0.93700383768132, "grad_norm": 0.53515625, "learning_rate": 3.169326241134752e-06, "loss": 1.0755, "step": 4227 }, { "epoch": 0.9372255088045, "grad_norm": 0.5078125, "learning_rate": 3.1582446808510644e-06, "loss": 1.0338, "step": 4228 }, { "epoch": 0.9374471799276798, "grad_norm": 0.55859375, "learning_rate": 3.147163120567376e-06, "loss": 1.1603, "step": 4229 }, { "epoch": 0.9376688510508596, "grad_norm": 0.52734375, "learning_rate": 3.136081560283688e-06, "loss": 1.0212, "step": 4230 }, { "epoch": 0.9378905221740396, "grad_norm": 0.53125, "learning_rate": 3.125e-06, "loss": 1.0966, "step": 4231 }, { "epoch": 0.9381121932972194, "grad_norm": 0.52734375, "learning_rate": 3.1139184397163125e-06, "loss": 1.0222, "step": 4232 }, { "epoch": 0.9383338644203992, "grad_norm": 0.52734375, "learning_rate": 3.1028368794326244e-06, "loss": 1.0482, "step": 4233 }, { "epoch": 0.9385555355435792, "grad_norm": 0.53125, "learning_rate": 3.0917553191489363e-06, "loss": 1.0581, "step": 4234 }, { "epoch": 0.938777206666759, "grad_norm": 0.50390625, "learning_rate": 3.0806737588652483e-06, "loss": 1.0104, "step": 4235 }, { "epoch": 0.938998877789939, "grad_norm": 0.54296875, "learning_rate": 3.0695921985815606e-06, "loss": 1.1087, "step": 4236 }, { "epoch": 0.9392205489131188, "grad_norm": 0.54296875, "learning_rate": 3.0585106382978726e-06, "loss": 1.0271, "step": 4237 }, { "epoch": 0.9394422200362986, "grad_norm": 0.55078125, "learning_rate": 3.0474290780141845e-06, "loss": 1.0961, "step": 4238 }, { "epoch": 0.9396638911594786, "grad_norm": 0.546875, "learning_rate": 3.0363475177304964e-06, "loss": 1.0899, "step": 4239 }, { "epoch": 0.9398855622826584, "grad_norm": 0.515625, "learning_rate": 3.0252659574468088e-06, "loss": 1.0646, "step": 4240 }, { "epoch": 0.9401072334058382, "grad_norm": 0.51171875, "learning_rate": 3.0141843971631207e-06, "loss": 1.014, "step": 4241 }, { "epoch": 0.9403289045290182, "grad_norm": 0.5625, "learning_rate": 3.0031028368794326e-06, "loss": 1.0995, "step": 4242 }, { "epoch": 0.940550575652198, "grad_norm": 0.53515625, "learning_rate": 2.992021276595745e-06, "loss": 1.0264, "step": 4243 }, { "epoch": 0.9407722467753779, "grad_norm": 0.578125, "learning_rate": 2.980939716312057e-06, "loss": 1.1034, "step": 4244 }, { "epoch": 0.9409939178985578, "grad_norm": 0.53125, "learning_rate": 2.9698581560283692e-06, "loss": 1.0752, "step": 4245 }, { "epoch": 0.9412155890217376, "grad_norm": 0.53125, "learning_rate": 2.9587765957446807e-06, "loss": 1.0811, "step": 4246 }, { "epoch": 0.9414372601449175, "grad_norm": 0.515625, "learning_rate": 2.947695035460993e-06, "loss": 1.0426, "step": 4247 }, { "epoch": 0.9416589312680974, "grad_norm": 0.55078125, "learning_rate": 2.936613475177305e-06, "loss": 1.0759, "step": 4248 }, { "epoch": 0.9418806023912772, "grad_norm": 0.51953125, "learning_rate": 2.9255319148936174e-06, "loss": 1.0447, "step": 4249 }, { "epoch": 0.9421022735144571, "grad_norm": 0.53515625, "learning_rate": 2.9144503546099293e-06, "loss": 1.0585, "step": 4250 }, { "epoch": 0.942323944637637, "grad_norm": 0.53125, "learning_rate": 2.903368794326241e-06, "loss": 1.0048, "step": 4251 }, { "epoch": 0.9425456157608169, "grad_norm": 0.5234375, "learning_rate": 2.892287234042553e-06, "loss": 1.0911, "step": 4252 }, { "epoch": 0.9427672868839967, "grad_norm": 0.53515625, "learning_rate": 2.8812056737588655e-06, "loss": 1.1115, "step": 4253 }, { "epoch": 0.9429889580071766, "grad_norm": 0.54296875, "learning_rate": 2.8701241134751774e-06, "loss": 1.1407, "step": 4254 }, { "epoch": 0.9432106291303565, "grad_norm": 0.5546875, "learning_rate": 2.8590425531914893e-06, "loss": 1.0437, "step": 4255 }, { "epoch": 0.9434323002535363, "grad_norm": 0.55859375, "learning_rate": 2.8479609929078017e-06, "loss": 1.1276, "step": 4256 }, { "epoch": 0.9436539713767162, "grad_norm": 0.5390625, "learning_rate": 2.8368794326241136e-06, "loss": 1.1565, "step": 4257 }, { "epoch": 0.9438756424998961, "grad_norm": 0.51171875, "learning_rate": 2.825797872340426e-06, "loss": 0.9957, "step": 4258 }, { "epoch": 0.9440973136230759, "grad_norm": 0.5234375, "learning_rate": 2.8147163120567375e-06, "loss": 1.0399, "step": 4259 }, { "epoch": 0.9443189847462559, "grad_norm": 0.53125, "learning_rate": 2.80363475177305e-06, "loss": 1.1152, "step": 4260 }, { "epoch": 0.9445406558694357, "grad_norm": 0.55078125, "learning_rate": 2.7925531914893617e-06, "loss": 1.0484, "step": 4261 }, { "epoch": 0.9447623269926155, "grad_norm": 0.56640625, "learning_rate": 2.781471631205674e-06, "loss": 1.0823, "step": 4262 }, { "epoch": 0.9449839981157955, "grad_norm": 0.5234375, "learning_rate": 2.7703900709219856e-06, "loss": 0.9876, "step": 4263 }, { "epoch": 0.9452056692389753, "grad_norm": 0.5703125, "learning_rate": 2.759308510638298e-06, "loss": 1.1312, "step": 4264 }, { "epoch": 0.9454273403621551, "grad_norm": 0.66796875, "learning_rate": 2.7482269503546103e-06, "loss": 1.086, "step": 4265 }, { "epoch": 0.9456490114853351, "grad_norm": 0.52734375, "learning_rate": 2.737145390070922e-06, "loss": 1.1072, "step": 4266 }, { "epoch": 0.9458706826085149, "grad_norm": 0.5390625, "learning_rate": 2.726063829787234e-06, "loss": 1.0562, "step": 4267 }, { "epoch": 0.9460923537316949, "grad_norm": 0.54296875, "learning_rate": 2.714982269503546e-06, "loss": 1.1266, "step": 4268 }, { "epoch": 0.9463140248548747, "grad_norm": 0.51171875, "learning_rate": 2.7039007092198584e-06, "loss": 0.9875, "step": 4269 }, { "epoch": 0.9465356959780545, "grad_norm": 0.52734375, "learning_rate": 2.6928191489361703e-06, "loss": 1.019, "step": 4270 }, { "epoch": 0.9467573671012345, "grad_norm": 0.53125, "learning_rate": 2.6817375886524823e-06, "loss": 1.0874, "step": 4271 }, { "epoch": 0.9469790382244143, "grad_norm": 0.58203125, "learning_rate": 2.670656028368794e-06, "loss": 1.1147, "step": 4272 }, { "epoch": 0.9472007093475941, "grad_norm": 0.56640625, "learning_rate": 2.6595744680851065e-06, "loss": 1.0758, "step": 4273 }, { "epoch": 0.9474223804707741, "grad_norm": 0.5546875, "learning_rate": 2.6484929078014185e-06, "loss": 1.0694, "step": 4274 }, { "epoch": 0.9476440515939539, "grad_norm": 0.5390625, "learning_rate": 2.637411347517731e-06, "loss": 1.1158, "step": 4275 }, { "epoch": 0.9478657227171338, "grad_norm": 0.5234375, "learning_rate": 2.6263297872340427e-06, "loss": 1.0345, "step": 4276 }, { "epoch": 0.9480873938403137, "grad_norm": 0.5234375, "learning_rate": 2.6152482269503547e-06, "loss": 1.0252, "step": 4277 }, { "epoch": 0.9483090649634935, "grad_norm": 0.53515625, "learning_rate": 2.604166666666667e-06, "loss": 0.9872, "step": 4278 }, { "epoch": 0.9485307360866734, "grad_norm": 0.53125, "learning_rate": 2.593085106382979e-06, "loss": 1.0735, "step": 4279 }, { "epoch": 0.9487524072098533, "grad_norm": 0.5234375, "learning_rate": 2.582003546099291e-06, "loss": 1.0026, "step": 4280 }, { "epoch": 0.9489740783330332, "grad_norm": 0.5234375, "learning_rate": 2.5709219858156028e-06, "loss": 1.0214, "step": 4281 }, { "epoch": 0.949195749456213, "grad_norm": 0.51953125, "learning_rate": 2.559840425531915e-06, "loss": 1.0615, "step": 4282 }, { "epoch": 0.9494174205793929, "grad_norm": 0.53125, "learning_rate": 2.548758865248227e-06, "loss": 1.1106, "step": 4283 }, { "epoch": 0.9496390917025728, "grad_norm": 0.546875, "learning_rate": 2.537677304964539e-06, "loss": 1.1108, "step": 4284 }, { "epoch": 0.9498607628257526, "grad_norm": 0.515625, "learning_rate": 2.526595744680851e-06, "loss": 1.0804, "step": 4285 }, { "epoch": 0.9500824339489325, "grad_norm": 0.52734375, "learning_rate": 2.5155141843971633e-06, "loss": 1.0659, "step": 4286 }, { "epoch": 0.9503041050721124, "grad_norm": 0.5234375, "learning_rate": 2.5044326241134756e-06, "loss": 1.0509, "step": 4287 }, { "epoch": 0.9505257761952922, "grad_norm": 0.58203125, "learning_rate": 2.493351063829787e-06, "loss": 1.0521, "step": 4288 }, { "epoch": 0.9507474473184722, "grad_norm": 0.5234375, "learning_rate": 2.4822695035460995e-06, "loss": 1.0946, "step": 4289 }, { "epoch": 0.950969118441652, "grad_norm": 0.5390625, "learning_rate": 2.4711879432624114e-06, "loss": 1.1181, "step": 4290 }, { "epoch": 0.9511907895648318, "grad_norm": 0.54296875, "learning_rate": 2.4601063829787237e-06, "loss": 1.0545, "step": 4291 }, { "epoch": 0.9514124606880118, "grad_norm": 0.5859375, "learning_rate": 2.4490248226950357e-06, "loss": 1.0983, "step": 4292 }, { "epoch": 0.9516341318111916, "grad_norm": 0.515625, "learning_rate": 2.4379432624113476e-06, "loss": 1.024, "step": 4293 }, { "epoch": 0.9518558029343714, "grad_norm": 0.6328125, "learning_rate": 2.4268617021276595e-06, "loss": 1.1133, "step": 4294 }, { "epoch": 0.9520774740575514, "grad_norm": 0.53125, "learning_rate": 2.415780141843972e-06, "loss": 1.0768, "step": 4295 }, { "epoch": 0.9522991451807312, "grad_norm": 0.5390625, "learning_rate": 2.4046985815602838e-06, "loss": 1.0368, "step": 4296 }, { "epoch": 0.952520816303911, "grad_norm": 0.54296875, "learning_rate": 2.3936170212765957e-06, "loss": 0.9745, "step": 4297 }, { "epoch": 0.952742487427091, "grad_norm": 0.53125, "learning_rate": 2.382535460992908e-06, "loss": 1.0184, "step": 4298 }, { "epoch": 0.9529641585502708, "grad_norm": 0.515625, "learning_rate": 2.37145390070922e-06, "loss": 1.085, "step": 4299 }, { "epoch": 0.9531858296734508, "grad_norm": 0.55859375, "learning_rate": 2.3603723404255323e-06, "loss": 1.0093, "step": 4300 }, { "epoch": 0.9534075007966306, "grad_norm": 0.5859375, "learning_rate": 2.349290780141844e-06, "loss": 1.0964, "step": 4301 }, { "epoch": 0.9536291719198104, "grad_norm": 0.53125, "learning_rate": 2.338209219858156e-06, "loss": 1.0042, "step": 4302 }, { "epoch": 0.9538508430429904, "grad_norm": 0.53125, "learning_rate": 2.327127659574468e-06, "loss": 1.0622, "step": 4303 }, { "epoch": 0.9540725141661702, "grad_norm": 0.55078125, "learning_rate": 2.3160460992907805e-06, "loss": 1.0458, "step": 4304 }, { "epoch": 0.9542941852893501, "grad_norm": 0.51171875, "learning_rate": 2.304964539007092e-06, "loss": 1.0571, "step": 4305 }, { "epoch": 0.95451585641253, "grad_norm": 0.5625, "learning_rate": 2.2938829787234043e-06, "loss": 1.1138, "step": 4306 }, { "epoch": 0.9547375275357098, "grad_norm": 0.546875, "learning_rate": 2.2828014184397167e-06, "loss": 1.0738, "step": 4307 }, { "epoch": 0.9549591986588897, "grad_norm": 0.58203125, "learning_rate": 2.2717198581560286e-06, "loss": 1.1319, "step": 4308 }, { "epoch": 0.9551808697820696, "grad_norm": 0.5546875, "learning_rate": 2.2606382978723405e-06, "loss": 1.1759, "step": 4309 }, { "epoch": 0.9554025409052495, "grad_norm": 0.51953125, "learning_rate": 2.2495567375886524e-06, "loss": 1.0135, "step": 4310 }, { "epoch": 0.9556242120284293, "grad_norm": 0.5546875, "learning_rate": 2.2384751773049648e-06, "loss": 1.05, "step": 4311 }, { "epoch": 0.9558458831516092, "grad_norm": 0.53125, "learning_rate": 2.2273936170212767e-06, "loss": 1.0359, "step": 4312 }, { "epoch": 0.9560675542747891, "grad_norm": 0.51171875, "learning_rate": 2.2163120567375886e-06, "loss": 0.9971, "step": 4313 }, { "epoch": 0.9562892253979689, "grad_norm": 0.54296875, "learning_rate": 2.2052304964539006e-06, "loss": 1.0646, "step": 4314 }, { "epoch": 0.9565108965211488, "grad_norm": 0.53125, "learning_rate": 2.194148936170213e-06, "loss": 1.0559, "step": 4315 }, { "epoch": 0.9567325676443287, "grad_norm": 0.5234375, "learning_rate": 2.183067375886525e-06, "loss": 1.0627, "step": 4316 }, { "epoch": 0.9569542387675085, "grad_norm": 0.53515625, "learning_rate": 2.171985815602837e-06, "loss": 0.9692, "step": 4317 }, { "epoch": 0.9571759098906885, "grad_norm": 0.55078125, "learning_rate": 2.160904255319149e-06, "loss": 1.0546, "step": 4318 }, { "epoch": 0.9573975810138683, "grad_norm": 0.53125, "learning_rate": 2.149822695035461e-06, "loss": 1.0321, "step": 4319 }, { "epoch": 0.9576192521370481, "grad_norm": 0.5234375, "learning_rate": 2.1387411347517734e-06, "loss": 1.0267, "step": 4320 }, { "epoch": 0.9578409232602281, "grad_norm": 0.5390625, "learning_rate": 2.1276595744680853e-06, "loss": 1.0373, "step": 4321 }, { "epoch": 0.9580625943834079, "grad_norm": 0.546875, "learning_rate": 2.1165780141843972e-06, "loss": 1.024, "step": 4322 }, { "epoch": 0.9582842655065877, "grad_norm": 0.5234375, "learning_rate": 2.105496453900709e-06, "loss": 1.0338, "step": 4323 }, { "epoch": 0.9585059366297677, "grad_norm": 0.53125, "learning_rate": 2.0944148936170215e-06, "loss": 1.0635, "step": 4324 }, { "epoch": 0.9587276077529475, "grad_norm": 0.51953125, "learning_rate": 2.0833333333333334e-06, "loss": 1.0275, "step": 4325 }, { "epoch": 0.9589492788761274, "grad_norm": 0.52734375, "learning_rate": 2.0722517730496454e-06, "loss": 1.1659, "step": 4326 }, { "epoch": 0.9591709499993073, "grad_norm": 0.5234375, "learning_rate": 2.0611702127659573e-06, "loss": 1.0713, "step": 4327 }, { "epoch": 0.9593926211224871, "grad_norm": 0.52734375, "learning_rate": 2.0500886524822696e-06, "loss": 1.0226, "step": 4328 }, { "epoch": 0.9596142922456671, "grad_norm": 0.51953125, "learning_rate": 2.039007092198582e-06, "loss": 1.0103, "step": 4329 }, { "epoch": 0.9598359633688469, "grad_norm": 0.53125, "learning_rate": 2.0279255319148935e-06, "loss": 1.0723, "step": 4330 }, { "epoch": 0.9600576344920267, "grad_norm": 0.5, "learning_rate": 2.016843971631206e-06, "loss": 1.0143, "step": 4331 }, { "epoch": 0.9602793056152067, "grad_norm": 0.5390625, "learning_rate": 2.0057624113475178e-06, "loss": 1.0894, "step": 4332 }, { "epoch": 0.9605009767383865, "grad_norm": 0.5234375, "learning_rate": 1.99468085106383e-06, "loss": 1.0518, "step": 4333 }, { "epoch": 0.9607226478615664, "grad_norm": 0.52734375, "learning_rate": 1.983599290780142e-06, "loss": 1.0177, "step": 4334 }, { "epoch": 0.9609443189847463, "grad_norm": 0.546875, "learning_rate": 1.972517730496454e-06, "loss": 1.0616, "step": 4335 }, { "epoch": 0.9611659901079261, "grad_norm": 0.546875, "learning_rate": 1.961436170212766e-06, "loss": 1.0742, "step": 4336 }, { "epoch": 0.961387661231106, "grad_norm": 0.5390625, "learning_rate": 1.9503546099290782e-06, "loss": 1.1044, "step": 4337 }, { "epoch": 0.9616093323542859, "grad_norm": 0.5234375, "learning_rate": 1.93927304964539e-06, "loss": 1.0312, "step": 4338 }, { "epoch": 0.9618310034774658, "grad_norm": 0.546875, "learning_rate": 1.928191489361702e-06, "loss": 1.0901, "step": 4339 }, { "epoch": 0.9620526746006456, "grad_norm": 0.5625, "learning_rate": 1.9171099290780144e-06, "loss": 1.0812, "step": 4340 }, { "epoch": 0.9622743457238255, "grad_norm": 0.54296875, "learning_rate": 1.9060283687943264e-06, "loss": 1.0941, "step": 4341 }, { "epoch": 0.9624960168470054, "grad_norm": 0.53125, "learning_rate": 1.8949468085106385e-06, "loss": 1.0475, "step": 4342 }, { "epoch": 0.9627176879701852, "grad_norm": 0.52734375, "learning_rate": 1.8838652482269504e-06, "loss": 1.0665, "step": 4343 }, { "epoch": 0.9629393590933651, "grad_norm": 0.5546875, "learning_rate": 1.8727836879432626e-06, "loss": 1.0467, "step": 4344 }, { "epoch": 0.963161030216545, "grad_norm": 0.5234375, "learning_rate": 1.8617021276595745e-06, "loss": 1.0497, "step": 4345 }, { "epoch": 0.9633827013397248, "grad_norm": 0.5390625, "learning_rate": 1.8506205673758866e-06, "loss": 1.0547, "step": 4346 }, { "epoch": 0.9636043724629048, "grad_norm": 0.5390625, "learning_rate": 1.8395390070921985e-06, "loss": 1.0558, "step": 4347 }, { "epoch": 0.9638260435860846, "grad_norm": 0.5234375, "learning_rate": 1.8284574468085107e-06, "loss": 1.0324, "step": 4348 }, { "epoch": 0.9640477147092644, "grad_norm": 0.5234375, "learning_rate": 1.8173758865248226e-06, "loss": 1.0788, "step": 4349 }, { "epoch": 0.9642693858324444, "grad_norm": 0.52734375, "learning_rate": 1.8062943262411347e-06, "loss": 1.102, "step": 4350 }, { "epoch": 0.9644910569556242, "grad_norm": 0.52734375, "learning_rate": 1.795212765957447e-06, "loss": 1.0095, "step": 4351 }, { "epoch": 0.964712728078804, "grad_norm": 0.54296875, "learning_rate": 1.784131205673759e-06, "loss": 1.0475, "step": 4352 }, { "epoch": 0.964934399201984, "grad_norm": 0.58203125, "learning_rate": 1.7730496453900712e-06, "loss": 1.0877, "step": 4353 }, { "epoch": 0.9651560703251638, "grad_norm": 0.53515625, "learning_rate": 1.761968085106383e-06, "loss": 1.0569, "step": 4354 }, { "epoch": 0.9653777414483437, "grad_norm": 0.578125, "learning_rate": 1.7508865248226952e-06, "loss": 1.0708, "step": 4355 }, { "epoch": 0.9655994125715236, "grad_norm": 0.53515625, "learning_rate": 1.7398049645390071e-06, "loss": 1.086, "step": 4356 }, { "epoch": 0.9658210836947034, "grad_norm": 0.5234375, "learning_rate": 1.7287234042553193e-06, "loss": 1.1426, "step": 4357 }, { "epoch": 0.9660427548178833, "grad_norm": 0.5625, "learning_rate": 1.7176418439716312e-06, "loss": 1.0738, "step": 4358 }, { "epoch": 0.9662644259410632, "grad_norm": 0.5390625, "learning_rate": 1.7065602836879433e-06, "loss": 1.0841, "step": 4359 }, { "epoch": 0.966486097064243, "grad_norm": 0.5546875, "learning_rate": 1.6954787234042553e-06, "loss": 1.0796, "step": 4360 }, { "epoch": 0.966707768187423, "grad_norm": 0.546875, "learning_rate": 1.6843971631205674e-06, "loss": 1.1048, "step": 4361 }, { "epoch": 0.9669294393106028, "grad_norm": 0.5234375, "learning_rate": 1.6733156028368798e-06, "loss": 1.0421, "step": 4362 }, { "epoch": 0.9671511104337827, "grad_norm": 0.52734375, "learning_rate": 1.6622340425531915e-06, "loss": 1.0925, "step": 4363 }, { "epoch": 0.9673727815569626, "grad_norm": 0.58203125, "learning_rate": 1.6511524822695038e-06, "loss": 1.1333, "step": 4364 }, { "epoch": 0.9675944526801424, "grad_norm": 0.5234375, "learning_rate": 1.6400709219858155e-06, "loss": 0.9521, "step": 4365 }, { "epoch": 0.9678161238033223, "grad_norm": 0.53515625, "learning_rate": 1.6289893617021279e-06, "loss": 1.1238, "step": 4366 }, { "epoch": 0.9680377949265022, "grad_norm": 0.52734375, "learning_rate": 1.6179078014184396e-06, "loss": 1.0258, "step": 4367 }, { "epoch": 0.968259466049682, "grad_norm": 0.55078125, "learning_rate": 1.606826241134752e-06, "loss": 1.1091, "step": 4368 }, { "epoch": 0.9684811371728619, "grad_norm": 0.52734375, "learning_rate": 1.5957446808510639e-06, "loss": 1.0928, "step": 4369 }, { "epoch": 0.9687028082960418, "grad_norm": 0.53515625, "learning_rate": 1.584663120567376e-06, "loss": 0.9749, "step": 4370 }, { "epoch": 0.9689244794192217, "grad_norm": 0.53515625, "learning_rate": 1.573581560283688e-06, "loss": 1.0925, "step": 4371 }, { "epoch": 0.9691461505424015, "grad_norm": 0.52734375, "learning_rate": 1.5625e-06, "loss": 1.0927, "step": 4372 }, { "epoch": 0.9693678216655814, "grad_norm": 0.52734375, "learning_rate": 1.5514184397163122e-06, "loss": 1.1127, "step": 4373 }, { "epoch": 0.9695894927887613, "grad_norm": 0.51171875, "learning_rate": 1.5403368794326241e-06, "loss": 0.9564, "step": 4374 }, { "epoch": 0.9698111639119411, "grad_norm": 0.5390625, "learning_rate": 1.5292553191489363e-06, "loss": 1.0739, "step": 4375 }, { "epoch": 0.970032835035121, "grad_norm": 0.53125, "learning_rate": 1.5181737588652482e-06, "loss": 1.0282, "step": 4376 }, { "epoch": 0.9702545061583009, "grad_norm": 0.52734375, "learning_rate": 1.5070921985815603e-06, "loss": 1.0865, "step": 4377 }, { "epoch": 0.9704761772814807, "grad_norm": 0.55078125, "learning_rate": 1.4960106382978725e-06, "loss": 1.0457, "step": 4378 }, { "epoch": 0.9706978484046607, "grad_norm": 0.515625, "learning_rate": 1.4849290780141846e-06, "loss": 1.0048, "step": 4379 }, { "epoch": 0.9709195195278405, "grad_norm": 0.5390625, "learning_rate": 1.4738475177304965e-06, "loss": 1.124, "step": 4380 }, { "epoch": 0.9711411906510203, "grad_norm": 0.53125, "learning_rate": 1.4627659574468087e-06, "loss": 1.042, "step": 4381 }, { "epoch": 0.9713628617742003, "grad_norm": 0.55859375, "learning_rate": 1.4516843971631206e-06, "loss": 1.1333, "step": 4382 }, { "epoch": 0.9715845328973801, "grad_norm": 0.515625, "learning_rate": 1.4406028368794327e-06, "loss": 0.9857, "step": 4383 }, { "epoch": 0.97180620402056, "grad_norm": 0.5625, "learning_rate": 1.4295212765957447e-06, "loss": 0.9938, "step": 4384 }, { "epoch": 0.9720278751437399, "grad_norm": 0.546875, "learning_rate": 1.4184397163120568e-06, "loss": 1.0824, "step": 4385 }, { "epoch": 0.9722495462669197, "grad_norm": 0.5078125, "learning_rate": 1.4073581560283687e-06, "loss": 0.9313, "step": 4386 }, { "epoch": 0.9724712173900996, "grad_norm": 0.5390625, "learning_rate": 1.3962765957446809e-06, "loss": 0.983, "step": 4387 }, { "epoch": 0.9726928885132795, "grad_norm": 0.5390625, "learning_rate": 1.3851950354609928e-06, "loss": 1.0183, "step": 4388 }, { "epoch": 0.9729145596364593, "grad_norm": 0.53515625, "learning_rate": 1.3741134751773051e-06, "loss": 1.0285, "step": 4389 }, { "epoch": 0.9731362307596392, "grad_norm": 0.5546875, "learning_rate": 1.363031914893617e-06, "loss": 1.0432, "step": 4390 }, { "epoch": 0.9733579018828191, "grad_norm": 0.53515625, "learning_rate": 1.3519503546099292e-06, "loss": 1.018, "step": 4391 }, { "epoch": 0.973579573005999, "grad_norm": 0.55078125, "learning_rate": 1.3408687943262411e-06, "loss": 1.0404, "step": 4392 }, { "epoch": 0.9738012441291789, "grad_norm": 0.53515625, "learning_rate": 1.3297872340425533e-06, "loss": 1.1207, "step": 4393 }, { "epoch": 0.9740229152523587, "grad_norm": 0.515625, "learning_rate": 1.3187056737588654e-06, "loss": 1.0281, "step": 4394 }, { "epoch": 0.9742445863755386, "grad_norm": 0.55078125, "learning_rate": 1.3076241134751773e-06, "loss": 1.1366, "step": 4395 }, { "epoch": 0.9744662574987185, "grad_norm": 0.55859375, "learning_rate": 1.2965425531914895e-06, "loss": 1.1193, "step": 4396 }, { "epoch": 0.9746879286218983, "grad_norm": 0.58984375, "learning_rate": 1.2854609929078014e-06, "loss": 1.1425, "step": 4397 }, { "epoch": 0.9749095997450782, "grad_norm": 0.5234375, "learning_rate": 1.2743794326241135e-06, "loss": 0.9638, "step": 4398 }, { "epoch": 0.9751312708682581, "grad_norm": 0.53125, "learning_rate": 1.2632978723404255e-06, "loss": 1.0602, "step": 4399 }, { "epoch": 0.975352941991438, "grad_norm": 0.52734375, "learning_rate": 1.2522163120567378e-06, "loss": 0.9751, "step": 4400 }, { "epoch": 0.9755746131146178, "grad_norm": 0.53515625, "learning_rate": 1.2411347517730497e-06, "loss": 1.0222, "step": 4401 }, { "epoch": 0.9757962842377977, "grad_norm": 0.5390625, "learning_rate": 1.2300531914893619e-06, "loss": 1.0457, "step": 4402 }, { "epoch": 0.9760179553609776, "grad_norm": 0.54296875, "learning_rate": 1.2189716312056738e-06, "loss": 1.0699, "step": 4403 }, { "epoch": 0.9762396264841574, "grad_norm": 0.55859375, "learning_rate": 1.207890070921986e-06, "loss": 1.0425, "step": 4404 }, { "epoch": 0.9764612976073374, "grad_norm": 0.51171875, "learning_rate": 1.1968085106382979e-06, "loss": 1.003, "step": 4405 }, { "epoch": 0.9766829687305172, "grad_norm": 0.54296875, "learning_rate": 1.18572695035461e-06, "loss": 1.1735, "step": 4406 }, { "epoch": 0.976904639853697, "grad_norm": 0.58203125, "learning_rate": 1.174645390070922e-06, "loss": 1.1287, "step": 4407 }, { "epoch": 0.977126310976877, "grad_norm": 0.5625, "learning_rate": 1.163563829787234e-06, "loss": 1.1436, "step": 4408 }, { "epoch": 0.9773479821000568, "grad_norm": 0.54296875, "learning_rate": 1.152482269503546e-06, "loss": 1.0508, "step": 4409 }, { "epoch": 0.9775696532232366, "grad_norm": 0.5390625, "learning_rate": 1.1414007092198583e-06, "loss": 1.1296, "step": 4410 }, { "epoch": 0.9777913243464166, "grad_norm": 0.5234375, "learning_rate": 1.1303191489361703e-06, "loss": 1.0486, "step": 4411 }, { "epoch": 0.9780129954695964, "grad_norm": 0.546875, "learning_rate": 1.1192375886524824e-06, "loss": 1.0172, "step": 4412 }, { "epoch": 0.9782346665927762, "grad_norm": 0.53125, "learning_rate": 1.1081560283687943e-06, "loss": 1.0748, "step": 4413 }, { "epoch": 0.9784563377159562, "grad_norm": 0.5234375, "learning_rate": 1.0970744680851065e-06, "loss": 1.0505, "step": 4414 }, { "epoch": 0.978678008839136, "grad_norm": 0.53125, "learning_rate": 1.0859929078014186e-06, "loss": 1.0595, "step": 4415 }, { "epoch": 0.9788996799623159, "grad_norm": 0.52734375, "learning_rate": 1.0749113475177305e-06, "loss": 1.0288, "step": 4416 }, { "epoch": 0.9791213510854958, "grad_norm": 0.52734375, "learning_rate": 1.0638297872340427e-06, "loss": 1.1087, "step": 4417 }, { "epoch": 0.9793430222086756, "grad_norm": 0.51953125, "learning_rate": 1.0527482269503546e-06, "loss": 1.0627, "step": 4418 }, { "epoch": 0.9795646933318555, "grad_norm": 0.5234375, "learning_rate": 1.0416666666666667e-06, "loss": 0.982, "step": 4419 }, { "epoch": 0.9797863644550354, "grad_norm": 0.546875, "learning_rate": 1.0305851063829786e-06, "loss": 1.0772, "step": 4420 }, { "epoch": 0.9800080355782153, "grad_norm": 0.54296875, "learning_rate": 1.019503546099291e-06, "loss": 1.0677, "step": 4421 }, { "epoch": 0.9802297067013952, "grad_norm": 0.546875, "learning_rate": 1.008421985815603e-06, "loss": 1.0008, "step": 4422 }, { "epoch": 0.980451377824575, "grad_norm": 0.515625, "learning_rate": 9.97340425531915e-07, "loss": 1.0283, "step": 4423 }, { "epoch": 0.9806730489477549, "grad_norm": 0.515625, "learning_rate": 9.86258865248227e-07, "loss": 1.0497, "step": 4424 }, { "epoch": 0.9808947200709348, "grad_norm": 0.5078125, "learning_rate": 9.751773049645391e-07, "loss": 1.0664, "step": 4425 }, { "epoch": 0.9811163911941146, "grad_norm": 0.5390625, "learning_rate": 9.64095744680851e-07, "loss": 1.0645, "step": 4426 }, { "epoch": 0.9813380623172945, "grad_norm": 0.53125, "learning_rate": 9.530141843971632e-07, "loss": 1.063, "step": 4427 }, { "epoch": 0.9815597334404744, "grad_norm": 0.546875, "learning_rate": 9.419326241134752e-07, "loss": 1.1137, "step": 4428 }, { "epoch": 0.9817814045636543, "grad_norm": 0.55078125, "learning_rate": 9.308510638297872e-07, "loss": 1.0517, "step": 4429 }, { "epoch": 0.9820030756868341, "grad_norm": 0.52734375, "learning_rate": 9.197695035460993e-07, "loss": 1.0298, "step": 4430 }, { "epoch": 0.982224746810014, "grad_norm": 0.5390625, "learning_rate": 9.086879432624113e-07, "loss": 1.0454, "step": 4431 }, { "epoch": 0.9824464179331939, "grad_norm": 0.53515625, "learning_rate": 8.976063829787235e-07, "loss": 1.0683, "step": 4432 }, { "epoch": 0.9826680890563737, "grad_norm": 0.54296875, "learning_rate": 8.865248226950356e-07, "loss": 1.0633, "step": 4433 }, { "epoch": 0.9828897601795537, "grad_norm": 0.53515625, "learning_rate": 8.754432624113476e-07, "loss": 1.0277, "step": 4434 }, { "epoch": 0.9831114313027335, "grad_norm": 0.53515625, "learning_rate": 8.643617021276596e-07, "loss": 1.0952, "step": 4435 }, { "epoch": 0.9833331024259133, "grad_norm": 0.5546875, "learning_rate": 8.532801418439717e-07, "loss": 1.0663, "step": 4436 }, { "epoch": 0.9835547735490933, "grad_norm": 0.50390625, "learning_rate": 8.421985815602837e-07, "loss": 0.9976, "step": 4437 }, { "epoch": 0.9837764446722731, "grad_norm": 0.5546875, "learning_rate": 8.311170212765957e-07, "loss": 1.0412, "step": 4438 }, { "epoch": 0.9839981157954529, "grad_norm": 0.53515625, "learning_rate": 8.200354609929078e-07, "loss": 1.0738, "step": 4439 }, { "epoch": 0.9842197869186329, "grad_norm": 0.52734375, "learning_rate": 8.089539007092198e-07, "loss": 1.0611, "step": 4440 }, { "epoch": 0.9844414580418127, "grad_norm": 0.51953125, "learning_rate": 7.978723404255319e-07, "loss": 1.0503, "step": 4441 }, { "epoch": 0.9846631291649925, "grad_norm": 0.53515625, "learning_rate": 7.86790780141844e-07, "loss": 0.9982, "step": 4442 }, { "epoch": 0.9848848002881725, "grad_norm": 0.54296875, "learning_rate": 7.757092198581561e-07, "loss": 1.069, "step": 4443 }, { "epoch": 0.9851064714113523, "grad_norm": 0.5, "learning_rate": 7.646276595744681e-07, "loss": 0.9623, "step": 4444 }, { "epoch": 0.9853281425345322, "grad_norm": 0.57421875, "learning_rate": 7.535460992907802e-07, "loss": 1.1921, "step": 4445 }, { "epoch": 0.9855498136577121, "grad_norm": 0.53515625, "learning_rate": 7.424645390070923e-07, "loss": 1.1015, "step": 4446 }, { "epoch": 0.9857714847808919, "grad_norm": 0.5234375, "learning_rate": 7.313829787234043e-07, "loss": 1.0491, "step": 4447 }, { "epoch": 0.9859931559040718, "grad_norm": 0.53515625, "learning_rate": 7.203014184397164e-07, "loss": 1.0359, "step": 4448 }, { "epoch": 0.9862148270272517, "grad_norm": 0.52734375, "learning_rate": 7.092198581560284e-07, "loss": 1.095, "step": 4449 }, { "epoch": 0.9864364981504316, "grad_norm": 0.53125, "learning_rate": 6.981382978723404e-07, "loss": 1.078, "step": 4450 }, { "epoch": 0.9866581692736114, "grad_norm": 0.53515625, "learning_rate": 6.870567375886526e-07, "loss": 1.0542, "step": 4451 }, { "epoch": 0.9868798403967913, "grad_norm": 0.5234375, "learning_rate": 6.759751773049646e-07, "loss": 1.0377, "step": 4452 }, { "epoch": 0.9871015115199712, "grad_norm": 0.53515625, "learning_rate": 6.648936170212766e-07, "loss": 1.0077, "step": 4453 }, { "epoch": 0.9873231826431511, "grad_norm": 0.55859375, "learning_rate": 6.538120567375887e-07, "loss": 1.0649, "step": 4454 }, { "epoch": 0.9875448537663309, "grad_norm": 0.5390625, "learning_rate": 6.427304964539007e-07, "loss": 1.0588, "step": 4455 }, { "epoch": 0.9877665248895108, "grad_norm": 0.578125, "learning_rate": 6.316489361702127e-07, "loss": 1.1266, "step": 4456 }, { "epoch": 0.9879881960126907, "grad_norm": 0.5234375, "learning_rate": 6.205673758865249e-07, "loss": 1.0492, "step": 4457 }, { "epoch": 0.9882098671358706, "grad_norm": 0.53125, "learning_rate": 6.094858156028369e-07, "loss": 1.0976, "step": 4458 }, { "epoch": 0.9884315382590504, "grad_norm": 0.62109375, "learning_rate": 5.984042553191489e-07, "loss": 1.0406, "step": 4459 }, { "epoch": 0.9886532093822303, "grad_norm": 0.53515625, "learning_rate": 5.87322695035461e-07, "loss": 1.0607, "step": 4460 }, { "epoch": 0.9888748805054102, "grad_norm": 0.53515625, "learning_rate": 5.76241134751773e-07, "loss": 1.0051, "step": 4461 }, { "epoch": 0.98909655162859, "grad_norm": 0.53125, "learning_rate": 5.651595744680851e-07, "loss": 1.1133, "step": 4462 }, { "epoch": 0.98931822275177, "grad_norm": 0.5546875, "learning_rate": 5.540780141843972e-07, "loss": 1.0747, "step": 4463 }, { "epoch": 0.9895398938749498, "grad_norm": 0.53125, "learning_rate": 5.429964539007093e-07, "loss": 1.0711, "step": 4464 }, { "epoch": 0.9897615649981296, "grad_norm": 0.54296875, "learning_rate": 5.319148936170213e-07, "loss": 1.0667, "step": 4465 }, { "epoch": 0.9899832361213096, "grad_norm": 0.55859375, "learning_rate": 5.208333333333334e-07, "loss": 1.0725, "step": 4466 }, { "epoch": 0.9902049072444894, "grad_norm": 0.51171875, "learning_rate": 5.097517730496455e-07, "loss": 1.0311, "step": 4467 }, { "epoch": 0.9904265783676692, "grad_norm": 0.50390625, "learning_rate": 4.986702127659575e-07, "loss": 1.0641, "step": 4468 }, { "epoch": 0.9906482494908492, "grad_norm": 0.5390625, "learning_rate": 4.875886524822696e-07, "loss": 1.0979, "step": 4469 }, { "epoch": 0.990869920614029, "grad_norm": 0.5078125, "learning_rate": 4.765070921985816e-07, "loss": 1.0244, "step": 4470 }, { "epoch": 0.9910915917372088, "grad_norm": 0.57421875, "learning_rate": 4.654255319148936e-07, "loss": 1.1156, "step": 4471 }, { "epoch": 0.9913132628603888, "grad_norm": 0.5390625, "learning_rate": 4.5434397163120565e-07, "loss": 1.1168, "step": 4472 }, { "epoch": 0.9915349339835686, "grad_norm": 0.5390625, "learning_rate": 4.432624113475178e-07, "loss": 0.9881, "step": 4473 }, { "epoch": 0.9917566051067485, "grad_norm": 0.53515625, "learning_rate": 4.321808510638298e-07, "loss": 1.0692, "step": 4474 }, { "epoch": 0.9919782762299284, "grad_norm": 0.5234375, "learning_rate": 4.2109929078014185e-07, "loss": 0.9841, "step": 4475 }, { "epoch": 0.9921999473531082, "grad_norm": 0.59375, "learning_rate": 4.100177304964539e-07, "loss": 1.0198, "step": 4476 }, { "epoch": 0.9924216184762881, "grad_norm": 0.55078125, "learning_rate": 3.9893617021276597e-07, "loss": 1.0531, "step": 4477 }, { "epoch": 0.992643289599468, "grad_norm": 0.5390625, "learning_rate": 3.8785460992907805e-07, "loss": 1.0334, "step": 4478 }, { "epoch": 0.9928649607226478, "grad_norm": 0.5078125, "learning_rate": 3.767730496453901e-07, "loss": 1.0198, "step": 4479 }, { "epoch": 0.9930866318458277, "grad_norm": 0.55859375, "learning_rate": 3.6569148936170217e-07, "loss": 1.1459, "step": 4480 }, { "epoch": 0.9933083029690076, "grad_norm": 0.54296875, "learning_rate": 3.546099290780142e-07, "loss": 1.0778, "step": 4481 }, { "epoch": 0.9935299740921875, "grad_norm": 0.55078125, "learning_rate": 3.435283687943263e-07, "loss": 0.9887, "step": 4482 }, { "epoch": 0.9937516452153673, "grad_norm": 0.53125, "learning_rate": 3.324468085106383e-07, "loss": 1.0464, "step": 4483 }, { "epoch": 0.9939733163385472, "grad_norm": 0.53515625, "learning_rate": 3.2136524822695035e-07, "loss": 1.041, "step": 4484 }, { "epoch": 0.9941949874617271, "grad_norm": 0.53515625, "learning_rate": 3.1028368794326243e-07, "loss": 1.0643, "step": 4485 }, { "epoch": 0.994416658584907, "grad_norm": 0.55078125, "learning_rate": 2.9920212765957446e-07, "loss": 1.0769, "step": 4486 }, { "epoch": 0.9946383297080869, "grad_norm": 0.52734375, "learning_rate": 2.881205673758865e-07, "loss": 1.1478, "step": 4487 }, { "epoch": 0.9948600008312667, "grad_norm": 0.5546875, "learning_rate": 2.770390070921986e-07, "loss": 1.1045, "step": 4488 }, { "epoch": 0.9950816719544466, "grad_norm": 0.5234375, "learning_rate": 2.6595744680851066e-07, "loss": 1.0868, "step": 4489 }, { "epoch": 0.9953033430776265, "grad_norm": 0.5234375, "learning_rate": 2.5487588652482275e-07, "loss": 1.0235, "step": 4490 }, { "epoch": 0.9955250142008063, "grad_norm": 0.59375, "learning_rate": 2.437943262411348e-07, "loss": 1.0747, "step": 4491 }, { "epoch": 0.9957466853239862, "grad_norm": 0.5546875, "learning_rate": 2.327127659574468e-07, "loss": 1.0904, "step": 4492 }, { "epoch": 0.9959683564471661, "grad_norm": 0.53125, "learning_rate": 2.216312056737589e-07, "loss": 1.0439, "step": 4493 }, { "epoch": 0.9961900275703459, "grad_norm": 0.51953125, "learning_rate": 2.1054964539007093e-07, "loss": 1.0663, "step": 4494 }, { "epoch": 0.9964116986935259, "grad_norm": 0.54296875, "learning_rate": 1.9946808510638298e-07, "loss": 1.0613, "step": 4495 }, { "epoch": 0.9966333698167057, "grad_norm": 0.53125, "learning_rate": 1.8838652482269504e-07, "loss": 1.0918, "step": 4496 }, { "epoch": 0.9968550409398855, "grad_norm": 0.5390625, "learning_rate": 1.773049645390071e-07, "loss": 0.9467, "step": 4497 }, { "epoch": 0.9970767120630655, "grad_norm": 0.5078125, "learning_rate": 1.6622340425531916e-07, "loss": 1.0058, "step": 4498 }, { "epoch": 0.9972983831862453, "grad_norm": 0.515625, "learning_rate": 1.5514184397163122e-07, "loss": 1.0377, "step": 4499 }, { "epoch": 0.9975200543094251, "grad_norm": 0.54296875, "learning_rate": 1.4406028368794325e-07, "loss": 1.0162, "step": 4500 }, { "epoch": 0.9977417254326051, "grad_norm": 0.546875, "learning_rate": 1.3297872340425533e-07, "loss": 1.0608, "step": 4501 }, { "epoch": 0.9979633965557849, "grad_norm": 0.5625, "learning_rate": 1.218971631205674e-07, "loss": 1.0875, "step": 4502 }, { "epoch": 0.9981850676789648, "grad_norm": 0.50390625, "learning_rate": 1.1081560283687945e-07, "loss": 0.937, "step": 4503 }, { "epoch": 0.9984067388021447, "grad_norm": 0.515625, "learning_rate": 9.973404255319149e-08, "loss": 1.0496, "step": 4504 }, { "epoch": 0.9986284099253245, "grad_norm": 0.54296875, "learning_rate": 8.865248226950355e-08, "loss": 1.1307, "step": 4505 }, { "epoch": 0.9988500810485044, "grad_norm": 0.52734375, "learning_rate": 7.757092198581561e-08, "loss": 1.0656, "step": 4506 }, { "epoch": 0.9990717521716843, "grad_norm": 0.56640625, "learning_rate": 6.648936170212767e-08, "loss": 1.0837, "step": 4507 }, { "epoch": 0.9992934232948641, "grad_norm": 0.52734375, "learning_rate": 5.5407801418439724e-08, "loss": 1.0554, "step": 4508 }, { "epoch": 0.999515094418044, "grad_norm": 0.51953125, "learning_rate": 4.4326241134751775e-08, "loss": 1.0244, "step": 4509 }, { "epoch": 0.9997367655412239, "grad_norm": 0.5078125, "learning_rate": 3.324468085106383e-08, "loss": 1.0664, "step": 4510 }, { "epoch": 0.9999584366644038, "grad_norm": 0.53515625, "learning_rate": 2.2163120567375888e-08, "loss": 1.1259, "step": 4511 }, { "epoch": 1.0, "grad_norm": 1.1953125, "learning_rate": 1.1081560283687944e-08, "loss": 0.9025, "step": 4512 } ], "logging_steps": 1, "max_steps": 4512, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.090129470539956e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }