{ "best_global_step": 1380, "best_metric": 0.6770720481872559, "best_model_checkpoint": "saves/qwen3-4B/Qwen3-4B-SFT-science-2e-5/checkpoint-1380", "epoch": 3.0, "eval_steps": 230, "global_step": 2313, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012976480129764801, "grad_norm": 8.15907096862793, "learning_rate": 0.0, "loss": 1.117659091949463, "step": 1 }, { "epoch": 0.0025952960259529602, "grad_norm": 7.67869234085083, "learning_rate": 1.7241379310344828e-07, "loss": 1.0263863801956177, "step": 2 }, { "epoch": 0.0038929440389294406, "grad_norm": 8.24106502532959, "learning_rate": 3.4482758620689656e-07, "loss": 1.1220319271087646, "step": 3 }, { "epoch": 0.0051905920519059205, "grad_norm": 8.60258960723877, "learning_rate": 5.172413793103449e-07, "loss": 1.1806347370147705, "step": 4 }, { "epoch": 0.006488240064882401, "grad_norm": 7.782258033752441, "learning_rate": 6.896551724137931e-07, "loss": 1.105953574180603, "step": 5 }, { "epoch": 0.007785888077858881, "grad_norm": 7.797566890716553, "learning_rate": 8.620689655172415e-07, "loss": 1.0968478918075562, "step": 6 }, { "epoch": 0.009083536090835361, "grad_norm": 7.626895427703857, "learning_rate": 1.0344827586206898e-06, "loss": 1.0549066066741943, "step": 7 }, { "epoch": 0.010381184103811841, "grad_norm": 7.147245407104492, "learning_rate": 1.2068965517241381e-06, "loss": 1.0259548425674438, "step": 8 }, { "epoch": 0.01167883211678832, "grad_norm": 5.977053165435791, "learning_rate": 1.3793103448275862e-06, "loss": 0.954434335231781, "step": 9 }, { "epoch": 0.012976480129764802, "grad_norm": 6.206176280975342, "learning_rate": 1.5517241379310346e-06, "loss": 1.049869418144226, "step": 10 }, { "epoch": 0.014274128142741281, "grad_norm": 5.300525665283203, "learning_rate": 1.724137931034483e-06, "loss": 1.0076310634613037, "step": 11 }, { "epoch": 0.015571776155717762, "grad_norm": 4.235332489013672, "learning_rate": 1.896551724137931e-06, "loss": 0.9547766447067261, "step": 12 }, { "epoch": 0.01686942416869424, "grad_norm": 4.258054733276367, "learning_rate": 2.0689655172413796e-06, "loss": 0.9808558225631714, "step": 13 }, { "epoch": 0.018167072181670723, "grad_norm": 3.9000754356384277, "learning_rate": 2.241379310344828e-06, "loss": 0.955378532409668, "step": 14 }, { "epoch": 0.019464720194647202, "grad_norm": 2.9283816814422607, "learning_rate": 2.4137931034482762e-06, "loss": 0.9264786839485168, "step": 15 }, { "epoch": 0.020762368207623682, "grad_norm": 2.1859076023101807, "learning_rate": 2.5862068965517246e-06, "loss": 0.8895066380500793, "step": 16 }, { "epoch": 0.02206001622060016, "grad_norm": 2.1717398166656494, "learning_rate": 2.7586206896551725e-06, "loss": 0.9194827675819397, "step": 17 }, { "epoch": 0.02335766423357664, "grad_norm": 1.7686649560928345, "learning_rate": 2.931034482758621e-06, "loss": 0.8839207291603088, "step": 18 }, { "epoch": 0.024655312246553124, "grad_norm": 1.7060308456420898, "learning_rate": 3.103448275862069e-06, "loss": 0.8821989297866821, "step": 19 }, { "epoch": 0.025952960259529603, "grad_norm": 1.4888310432434082, "learning_rate": 3.2758620689655175e-06, "loss": 0.7937015295028687, "step": 20 }, { "epoch": 0.027250608272506083, "grad_norm": 1.5812122821807861, "learning_rate": 3.448275862068966e-06, "loss": 0.9222494959831238, "step": 21 }, { "epoch": 0.028548256285482562, "grad_norm": 1.5842291116714478, "learning_rate": 3.620689655172414e-06, "loss": 0.8129012584686279, "step": 22 }, { "epoch": 0.02984590429845904, "grad_norm": 1.5270442962646484, "learning_rate": 3.793103448275862e-06, "loss": 0.843705415725708, "step": 23 }, { "epoch": 0.031143552311435525, "grad_norm": 1.1963210105895996, "learning_rate": 3.96551724137931e-06, "loss": 0.7932494878768921, "step": 24 }, { "epoch": 0.032441200324412, "grad_norm": 1.0309710502624512, "learning_rate": 4.137931034482759e-06, "loss": 0.7899153828620911, "step": 25 }, { "epoch": 0.03373884833738848, "grad_norm": 0.9451068639755249, "learning_rate": 4.310344827586207e-06, "loss": 0.8323757648468018, "step": 26 }, { "epoch": 0.035036496350364967, "grad_norm": 0.9398018717765808, "learning_rate": 4.482758620689656e-06, "loss": 0.8048505187034607, "step": 27 }, { "epoch": 0.036334144363341446, "grad_norm": 0.8759371042251587, "learning_rate": 4.655172413793104e-06, "loss": 0.8321108222007751, "step": 28 }, { "epoch": 0.037631792376317925, "grad_norm": 0.7862148284912109, "learning_rate": 4.8275862068965525e-06, "loss": 0.8356962203979492, "step": 29 }, { "epoch": 0.038929440389294405, "grad_norm": 0.8221083283424377, "learning_rate": 5e-06, "loss": 0.856194794178009, "step": 30 }, { "epoch": 0.040227088402270884, "grad_norm": 0.7913339734077454, "learning_rate": 5.172413793103449e-06, "loss": 0.782647967338562, "step": 31 }, { "epoch": 0.041524736415247364, "grad_norm": 0.7948570847511292, "learning_rate": 5.344827586206896e-06, "loss": 0.8002289533615112, "step": 32 }, { "epoch": 0.04282238442822384, "grad_norm": 0.8172705769538879, "learning_rate": 5.517241379310345e-06, "loss": 0.8037389516830444, "step": 33 }, { "epoch": 0.04412003244120032, "grad_norm": 0.7674341797828674, "learning_rate": 5.689655172413794e-06, "loss": 0.7561640739440918, "step": 34 }, { "epoch": 0.0454176804541768, "grad_norm": 0.7508828043937683, "learning_rate": 5.862068965517242e-06, "loss": 0.820884108543396, "step": 35 }, { "epoch": 0.04671532846715328, "grad_norm": 0.7388272285461426, "learning_rate": 6.03448275862069e-06, "loss": 0.8406673669815063, "step": 36 }, { "epoch": 0.04801297648012977, "grad_norm": 0.6549146771430969, "learning_rate": 6.206896551724138e-06, "loss": 0.7618731260299683, "step": 37 }, { "epoch": 0.04931062449310625, "grad_norm": 0.6996558904647827, "learning_rate": 6.379310344827587e-06, "loss": 0.7531220316886902, "step": 38 }, { "epoch": 0.05060827250608273, "grad_norm": 0.659206748008728, "learning_rate": 6.551724137931035e-06, "loss": 0.8432419896125793, "step": 39 }, { "epoch": 0.05190592051905921, "grad_norm": 0.6969435811042786, "learning_rate": 6.724137931034484e-06, "loss": 0.8152772784233093, "step": 40 }, { "epoch": 0.053203568532035686, "grad_norm": 0.638674795627594, "learning_rate": 6.896551724137932e-06, "loss": 0.8012467622756958, "step": 41 }, { "epoch": 0.054501216545012166, "grad_norm": 0.6248321533203125, "learning_rate": 7.0689655172413796e-06, "loss": 0.7576991319656372, "step": 42 }, { "epoch": 0.055798864557988645, "grad_norm": 0.6499493718147278, "learning_rate": 7.241379310344828e-06, "loss": 0.7685450911521912, "step": 43 }, { "epoch": 0.057096512570965124, "grad_norm": 0.6266531348228455, "learning_rate": 7.413793103448277e-06, "loss": 0.7682685852050781, "step": 44 }, { "epoch": 0.058394160583941604, "grad_norm": 0.6328745484352112, "learning_rate": 7.586206896551724e-06, "loss": 0.8221952319145203, "step": 45 }, { "epoch": 0.05969180859691808, "grad_norm": 0.6457077860832214, "learning_rate": 7.758620689655173e-06, "loss": 0.7616772055625916, "step": 46 }, { "epoch": 0.06098945660989456, "grad_norm": 0.6841326951980591, "learning_rate": 7.93103448275862e-06, "loss": 0.7185612916946411, "step": 47 }, { "epoch": 0.06228710462287105, "grad_norm": 0.653884768486023, "learning_rate": 8.103448275862069e-06, "loss": 0.8144221901893616, "step": 48 }, { "epoch": 0.06358475263584752, "grad_norm": 0.6235163807868958, "learning_rate": 8.275862068965518e-06, "loss": 0.7789400815963745, "step": 49 }, { "epoch": 0.064882400648824, "grad_norm": 0.6035148501396179, "learning_rate": 8.448275862068966e-06, "loss": 0.7788746356964111, "step": 50 }, { "epoch": 0.06618004866180048, "grad_norm": 0.6197084784507751, "learning_rate": 8.620689655172414e-06, "loss": 0.7773774266242981, "step": 51 }, { "epoch": 0.06747769667477696, "grad_norm": 0.6356611847877502, "learning_rate": 8.793103448275862e-06, "loss": 0.8119993209838867, "step": 52 }, { "epoch": 0.06877534468775345, "grad_norm": 0.6229863166809082, "learning_rate": 8.965517241379312e-06, "loss": 0.8156378269195557, "step": 53 }, { "epoch": 0.07007299270072993, "grad_norm": 0.6285703778266907, "learning_rate": 9.13793103448276e-06, "loss": 0.7589212656021118, "step": 54 }, { "epoch": 0.07137064071370641, "grad_norm": 0.6221722960472107, "learning_rate": 9.310344827586207e-06, "loss": 0.7588199377059937, "step": 55 }, { "epoch": 0.07266828872668289, "grad_norm": 0.5896920561790466, "learning_rate": 9.482758620689655e-06, "loss": 0.7869905233383179, "step": 56 }, { "epoch": 0.07396593673965937, "grad_norm": 0.6120532155036926, "learning_rate": 9.655172413793105e-06, "loss": 0.7379593849182129, "step": 57 }, { "epoch": 0.07526358475263585, "grad_norm": 0.6437456011772156, "learning_rate": 9.827586206896553e-06, "loss": 0.8263105154037476, "step": 58 }, { "epoch": 0.07656123276561233, "grad_norm": 0.6005666851997375, "learning_rate": 1e-05, "loss": 0.8053442239761353, "step": 59 }, { "epoch": 0.07785888077858881, "grad_norm": 0.618229866027832, "learning_rate": 1.0172413793103449e-05, "loss": 0.7303550243377686, "step": 60 }, { "epoch": 0.07915652879156529, "grad_norm": 0.6245790719985962, "learning_rate": 1.0344827586206898e-05, "loss": 0.7618341445922852, "step": 61 }, { "epoch": 0.08045417680454177, "grad_norm": 0.632989227771759, "learning_rate": 1.0517241379310346e-05, "loss": 0.8073338270187378, "step": 62 }, { "epoch": 0.08175182481751825, "grad_norm": 0.6083235740661621, "learning_rate": 1.0689655172413792e-05, "loss": 0.7776636481285095, "step": 63 }, { "epoch": 0.08304947283049473, "grad_norm": 0.6136429309844971, "learning_rate": 1.0862068965517242e-05, "loss": 0.8043953776359558, "step": 64 }, { "epoch": 0.08434712084347121, "grad_norm": 0.6103477478027344, "learning_rate": 1.103448275862069e-05, "loss": 0.7928889989852905, "step": 65 }, { "epoch": 0.08564476885644769, "grad_norm": 0.6038222312927246, "learning_rate": 1.1206896551724138e-05, "loss": 0.7927621603012085, "step": 66 }, { "epoch": 0.08694241686942417, "grad_norm": 0.6238990426063538, "learning_rate": 1.1379310344827587e-05, "loss": 0.7877966165542603, "step": 67 }, { "epoch": 0.08824006488240065, "grad_norm": 0.5899522304534912, "learning_rate": 1.1551724137931035e-05, "loss": 0.721104621887207, "step": 68 }, { "epoch": 0.08953771289537713, "grad_norm": 0.6330446004867554, "learning_rate": 1.1724137931034483e-05, "loss": 0.8130797147750854, "step": 69 }, { "epoch": 0.0908353609083536, "grad_norm": 0.6214055418968201, "learning_rate": 1.1896551724137933e-05, "loss": 0.78719162940979, "step": 70 }, { "epoch": 0.09213300892133008, "grad_norm": 0.648266077041626, "learning_rate": 1.206896551724138e-05, "loss": 0.7923158407211304, "step": 71 }, { "epoch": 0.09343065693430656, "grad_norm": 0.6473869681358337, "learning_rate": 1.2241379310344827e-05, "loss": 0.8679413795471191, "step": 72 }, { "epoch": 0.09472830494728304, "grad_norm": 0.5954247117042542, "learning_rate": 1.2413793103448277e-05, "loss": 0.7424967288970947, "step": 73 }, { "epoch": 0.09602595296025954, "grad_norm": 0.6318120956420898, "learning_rate": 1.2586206896551725e-05, "loss": 0.7612457275390625, "step": 74 }, { "epoch": 0.09732360097323602, "grad_norm": 0.6183631420135498, "learning_rate": 1.2758620689655174e-05, "loss": 0.7567603588104248, "step": 75 }, { "epoch": 0.0986212489862125, "grad_norm": 0.6186433434486389, "learning_rate": 1.2931034482758622e-05, "loss": 0.8088338375091553, "step": 76 }, { "epoch": 0.09991889699918897, "grad_norm": 0.6034461855888367, "learning_rate": 1.310344827586207e-05, "loss": 0.7736937999725342, "step": 77 }, { "epoch": 0.10121654501216545, "grad_norm": 0.6197369694709778, "learning_rate": 1.327586206896552e-05, "loss": 0.7498612999916077, "step": 78 }, { "epoch": 0.10251419302514193, "grad_norm": 0.6505046486854553, "learning_rate": 1.3448275862068967e-05, "loss": 0.8144986629486084, "step": 79 }, { "epoch": 0.10381184103811841, "grad_norm": 0.6240726113319397, "learning_rate": 1.3620689655172414e-05, "loss": 0.7407926321029663, "step": 80 }, { "epoch": 0.10510948905109489, "grad_norm": 0.6124047040939331, "learning_rate": 1.3793103448275863e-05, "loss": 0.7526525855064392, "step": 81 }, { "epoch": 0.10640713706407137, "grad_norm": 0.5982939004898071, "learning_rate": 1.3965517241379311e-05, "loss": 0.722671627998352, "step": 82 }, { "epoch": 0.10770478507704785, "grad_norm": 0.5908958315849304, "learning_rate": 1.4137931034482759e-05, "loss": 0.7402417659759521, "step": 83 }, { "epoch": 0.10900243309002433, "grad_norm": 0.6116979718208313, "learning_rate": 1.4310344827586209e-05, "loss": 0.7960222959518433, "step": 84 }, { "epoch": 0.11030008110300081, "grad_norm": 0.6197500228881836, "learning_rate": 1.4482758620689657e-05, "loss": 0.7519891858100891, "step": 85 }, { "epoch": 0.11159772911597729, "grad_norm": 2.220649480819702, "learning_rate": 1.4655172413793105e-05, "loss": 0.7659766674041748, "step": 86 }, { "epoch": 0.11289537712895377, "grad_norm": 5.19334602355957, "learning_rate": 1.4827586206896554e-05, "loss": 0.7760565280914307, "step": 87 }, { "epoch": 0.11419302514193025, "grad_norm": 0.6664707064628601, "learning_rate": 1.5000000000000002e-05, "loss": 0.7354503870010376, "step": 88 }, { "epoch": 0.11549067315490673, "grad_norm": 0.6490852236747742, "learning_rate": 1.5172413793103448e-05, "loss": 0.7803969979286194, "step": 89 }, { "epoch": 0.11678832116788321, "grad_norm": 0.6153193116188049, "learning_rate": 1.5344827586206898e-05, "loss": 0.7803000807762146, "step": 90 }, { "epoch": 0.11808596918085969, "grad_norm": 0.6364138722419739, "learning_rate": 1.5517241379310346e-05, "loss": 0.7799690961837769, "step": 91 }, { "epoch": 0.11938361719383617, "grad_norm": 0.6558602452278137, "learning_rate": 1.5689655172413794e-05, "loss": 0.8238034248352051, "step": 92 }, { "epoch": 0.12068126520681265, "grad_norm": 0.629127562046051, "learning_rate": 1.586206896551724e-05, "loss": 0.7694847583770752, "step": 93 }, { "epoch": 0.12197891321978913, "grad_norm": 0.5806317925453186, "learning_rate": 1.603448275862069e-05, "loss": 0.7090768814086914, "step": 94 }, { "epoch": 0.12327656123276562, "grad_norm": 0.673556387424469, "learning_rate": 1.6206896551724137e-05, "loss": 0.8536560535430908, "step": 95 }, { "epoch": 0.1245742092457421, "grad_norm": 0.5968764424324036, "learning_rate": 1.637931034482759e-05, "loss": 0.7300469875335693, "step": 96 }, { "epoch": 0.12587185725871858, "grad_norm": 0.6305297613143921, "learning_rate": 1.6551724137931037e-05, "loss": 0.7591036558151245, "step": 97 }, { "epoch": 0.12716950527169504, "grad_norm": 0.606986403465271, "learning_rate": 1.6724137931034485e-05, "loss": 0.76216721534729, "step": 98 }, { "epoch": 0.12846715328467154, "grad_norm": 0.6063655018806458, "learning_rate": 1.6896551724137932e-05, "loss": 0.68424391746521, "step": 99 }, { "epoch": 0.129764801297648, "grad_norm": 0.7023365497589111, "learning_rate": 1.706896551724138e-05, "loss": 0.8325944542884827, "step": 100 }, { "epoch": 0.1310624493106245, "grad_norm": 0.6358933448791504, "learning_rate": 1.7241379310344828e-05, "loss": 0.8054566383361816, "step": 101 }, { "epoch": 0.13236009732360096, "grad_norm": 0.6431549191474915, "learning_rate": 1.7413793103448276e-05, "loss": 0.7429993748664856, "step": 102 }, { "epoch": 0.13365774533657745, "grad_norm": 0.6152120232582092, "learning_rate": 1.7586206896551724e-05, "loss": 0.7206076383590698, "step": 103 }, { "epoch": 0.13495539334955392, "grad_norm": 0.6442373991012573, "learning_rate": 1.7758620689655175e-05, "loss": 0.806060791015625, "step": 104 }, { "epoch": 0.1362530413625304, "grad_norm": 0.6756954789161682, "learning_rate": 1.7931034482758623e-05, "loss": 0.8363012671470642, "step": 105 }, { "epoch": 0.1375506893755069, "grad_norm": 0.743787407875061, "learning_rate": 1.810344827586207e-05, "loss": 0.8207604885101318, "step": 106 }, { "epoch": 0.13884833738848337, "grad_norm": 0.686335563659668, "learning_rate": 1.827586206896552e-05, "loss": 0.7393860816955566, "step": 107 }, { "epoch": 0.14014598540145987, "grad_norm": 0.6191396713256836, "learning_rate": 1.8448275862068967e-05, "loss": 0.7534383535385132, "step": 108 }, { "epoch": 0.14144363341443633, "grad_norm": 0.6754934191703796, "learning_rate": 1.8620689655172415e-05, "loss": 0.8022092580795288, "step": 109 }, { "epoch": 0.14274128142741282, "grad_norm": 0.6399085521697998, "learning_rate": 1.8793103448275863e-05, "loss": 0.8507853746414185, "step": 110 }, { "epoch": 0.1440389294403893, "grad_norm": 0.6910972595214844, "learning_rate": 1.896551724137931e-05, "loss": 0.8276559710502625, "step": 111 }, { "epoch": 0.14533657745336578, "grad_norm": 0.5906772613525391, "learning_rate": 1.913793103448276e-05, "loss": 0.7183451056480408, "step": 112 }, { "epoch": 0.14663422546634225, "grad_norm": 0.6329069137573242, "learning_rate": 1.931034482758621e-05, "loss": 0.789232611656189, "step": 113 }, { "epoch": 0.14793187347931874, "grad_norm": 0.6226819157600403, "learning_rate": 1.9482758620689658e-05, "loss": 0.7747266292572021, "step": 114 }, { "epoch": 0.1492295214922952, "grad_norm": 0.65074223279953, "learning_rate": 1.9655172413793106e-05, "loss": 0.753608226776123, "step": 115 }, { "epoch": 0.1505271695052717, "grad_norm": 0.6118033528327942, "learning_rate": 1.9827586206896554e-05, "loss": 0.7803196907043457, "step": 116 }, { "epoch": 0.15182481751824817, "grad_norm": 0.6553196907043457, "learning_rate": 2e-05, "loss": 0.8216028213500977, "step": 117 }, { "epoch": 0.15312246553122466, "grad_norm": 0.678218424320221, "learning_rate": 1.999998977626552e-05, "loss": 0.807174801826477, "step": 118 }, { "epoch": 0.15442011354420113, "grad_norm": 0.6192781329154968, "learning_rate": 1.999995910508299e-05, "loss": 0.7289496660232544, "step": 119 }, { "epoch": 0.15571776155717762, "grad_norm": 0.6038413047790527, "learning_rate": 1.999990798651512e-05, "loss": 0.7679600119590759, "step": 120 }, { "epoch": 0.15701540957015409, "grad_norm": 0.6870720386505127, "learning_rate": 1.9999836420666438e-05, "loss": 0.8232643604278564, "step": 121 }, { "epoch": 0.15831305758313058, "grad_norm": 0.623460590839386, "learning_rate": 1.999974440768327e-05, "loss": 0.7480977177619934, "step": 122 }, { "epoch": 0.15961070559610704, "grad_norm": 0.651508629322052, "learning_rate": 1.9999631947753776e-05, "loss": 0.7708613276481628, "step": 123 }, { "epoch": 0.16090835360908354, "grad_norm": 0.6450805068016052, "learning_rate": 1.999949904110789e-05, "loss": 0.8049247860908508, "step": 124 }, { "epoch": 0.16220600162206, "grad_norm": 0.6157734990119934, "learning_rate": 1.999934568801738e-05, "loss": 0.7631984949111938, "step": 125 }, { "epoch": 0.1635036496350365, "grad_norm": 0.6847337484359741, "learning_rate": 1.999917188879582e-05, "loss": 0.7424380779266357, "step": 126 }, { "epoch": 0.164801297648013, "grad_norm": 0.6398855447769165, "learning_rate": 1.9998977643798572e-05, "loss": 0.7688143253326416, "step": 127 }, { "epoch": 0.16609894566098946, "grad_norm": 0.6518498063087463, "learning_rate": 1.999876295342283e-05, "loss": 0.7191232442855835, "step": 128 }, { "epoch": 0.16739659367396595, "grad_norm": 0.6462240219116211, "learning_rate": 1.9998527818107577e-05, "loss": 0.7375045418739319, "step": 129 }, { "epoch": 0.16869424168694241, "grad_norm": 0.6727373600006104, "learning_rate": 1.9998272238333606e-05, "loss": 0.7088533639907837, "step": 130 }, { "epoch": 0.1699918896999189, "grad_norm": 0.689372181892395, "learning_rate": 1.9997996214623515e-05, "loss": 0.8250190615653992, "step": 131 }, { "epoch": 0.17128953771289537, "grad_norm": 0.6236900687217712, "learning_rate": 1.9997699747541698e-05, "loss": 0.7653014659881592, "step": 132 }, { "epoch": 0.17258718572587187, "grad_norm": 0.617174506187439, "learning_rate": 1.9997382837694355e-05, "loss": 0.7043566703796387, "step": 133 }, { "epoch": 0.17388483373884833, "grad_norm": 0.6391400694847107, "learning_rate": 1.999704548572949e-05, "loss": 0.8009853363037109, "step": 134 }, { "epoch": 0.17518248175182483, "grad_norm": 0.6218752861022949, "learning_rate": 1.9996687692336896e-05, "loss": 0.7598843574523926, "step": 135 }, { "epoch": 0.1764801297648013, "grad_norm": 0.5787500143051147, "learning_rate": 1.9996309458248184e-05, "loss": 0.7174202799797058, "step": 136 }, { "epoch": 0.17777777777777778, "grad_norm": 0.6410360932350159, "learning_rate": 1.999591078423673e-05, "loss": 0.763797402381897, "step": 137 }, { "epoch": 0.17907542579075425, "grad_norm": 0.970513641834259, "learning_rate": 1.9995491671117734e-05, "loss": 0.6977022290229797, "step": 138 }, { "epoch": 0.18037307380373074, "grad_norm": 0.6853165030479431, "learning_rate": 1.999505211974817e-05, "loss": 0.7822556495666504, "step": 139 }, { "epoch": 0.1816707218167072, "grad_norm": 0.6396400332450867, "learning_rate": 1.999459213102681e-05, "loss": 0.7862622737884521, "step": 140 }, { "epoch": 0.1829683698296837, "grad_norm": 0.6066014766693115, "learning_rate": 1.9994111705894218e-05, "loss": 0.8506604433059692, "step": 141 }, { "epoch": 0.18426601784266017, "grad_norm": 0.6197599172592163, "learning_rate": 1.9993610845332734e-05, "loss": 0.7890738844871521, "step": 142 }, { "epoch": 0.18556366585563666, "grad_norm": 0.6512314677238464, "learning_rate": 1.99930895503665e-05, "loss": 0.7983291149139404, "step": 143 }, { "epoch": 0.18686131386861313, "grad_norm": 0.5899611115455627, "learning_rate": 1.9992547822061427e-05, "loss": 0.7357482314109802, "step": 144 }, { "epoch": 0.18815896188158962, "grad_norm": 0.6489595770835876, "learning_rate": 1.9991985661525217e-05, "loss": 0.875076174736023, "step": 145 }, { "epoch": 0.18945660989456609, "grad_norm": 0.6258020997047424, "learning_rate": 1.999140306990734e-05, "loss": 0.7252365350723267, "step": 146 }, { "epoch": 0.19075425790754258, "grad_norm": 0.6045345067977905, "learning_rate": 1.999080004839905e-05, "loss": 0.7721343040466309, "step": 147 }, { "epoch": 0.19205190592051907, "grad_norm": 0.6506165862083435, "learning_rate": 1.999017659823338e-05, "loss": 0.8302021026611328, "step": 148 }, { "epoch": 0.19334955393349554, "grad_norm": 0.6503569483757019, "learning_rate": 1.9989532720685115e-05, "loss": 0.825711190700531, "step": 149 }, { "epoch": 0.19464720194647203, "grad_norm": 0.5828515887260437, "learning_rate": 1.998886841707083e-05, "loss": 0.7742114067077637, "step": 150 }, { "epoch": 0.1959448499594485, "grad_norm": 0.5945319533348083, "learning_rate": 1.9988183688748862e-05, "loss": 0.8291171789169312, "step": 151 }, { "epoch": 0.197242497972425, "grad_norm": 0.6298274993896484, "learning_rate": 1.9987478537119297e-05, "loss": 0.8312891721725464, "step": 152 }, { "epoch": 0.19854014598540146, "grad_norm": 0.6161749958992004, "learning_rate": 1.9986752963624002e-05, "loss": 0.8070319890975952, "step": 153 }, { "epoch": 0.19983779399837795, "grad_norm": 0.6540800929069519, "learning_rate": 1.998600696974658e-05, "loss": 0.7966468334197998, "step": 154 }, { "epoch": 0.20113544201135442, "grad_norm": 0.628194272518158, "learning_rate": 1.9985240557012406e-05, "loss": 0.7929773926734924, "step": 155 }, { "epoch": 0.2024330900243309, "grad_norm": 0.6037770509719849, "learning_rate": 1.99844537269886e-05, "loss": 0.6729363203048706, "step": 156 }, { "epoch": 0.20373073803730737, "grad_norm": 0.6952143907546997, "learning_rate": 1.9983646481284028e-05, "loss": 0.8734431266784668, "step": 157 }, { "epoch": 0.20502838605028387, "grad_norm": 0.6359195113182068, "learning_rate": 1.9982818821549308e-05, "loss": 0.7915219664573669, "step": 158 }, { "epoch": 0.20632603406326033, "grad_norm": 0.578925609588623, "learning_rate": 1.9981970749476792e-05, "loss": 0.7327010631561279, "step": 159 }, { "epoch": 0.20762368207623683, "grad_norm": 0.6001781821250916, "learning_rate": 1.998110226680057e-05, "loss": 0.7517937421798706, "step": 160 }, { "epoch": 0.2089213300892133, "grad_norm": 0.6306588649749756, "learning_rate": 1.9980213375296468e-05, "loss": 0.7292003035545349, "step": 161 }, { "epoch": 0.21021897810218979, "grad_norm": 0.5737298130989075, "learning_rate": 1.997930407678205e-05, "loss": 0.7056928873062134, "step": 162 }, { "epoch": 0.21151662611516625, "grad_norm": 0.6045275926589966, "learning_rate": 1.99783743731166e-05, "loss": 0.738794207572937, "step": 163 }, { "epoch": 0.21281427412814274, "grad_norm": 0.6090785264968872, "learning_rate": 1.9977424266201126e-05, "loss": 0.8411350846290588, "step": 164 }, { "epoch": 0.2141119221411192, "grad_norm": 0.6489406824111938, "learning_rate": 1.9976453757978355e-05, "loss": 0.750893771648407, "step": 165 }, { "epoch": 0.2154095701540957, "grad_norm": 0.5950313210487366, "learning_rate": 1.997546285043273e-05, "loss": 0.6694055199623108, "step": 166 }, { "epoch": 0.21670721816707217, "grad_norm": 0.6618576645851135, "learning_rate": 1.9974451545590407e-05, "loss": 0.8072858452796936, "step": 167 }, { "epoch": 0.21800486618004866, "grad_norm": 0.587589681148529, "learning_rate": 1.997341984551925e-05, "loss": 0.7707666158676147, "step": 168 }, { "epoch": 0.21930251419302516, "grad_norm": 0.6130505204200745, "learning_rate": 1.9972367752328824e-05, "loss": 0.683761715888977, "step": 169 }, { "epoch": 0.22060016220600162, "grad_norm": 0.6129958033561707, "learning_rate": 1.9971295268170393e-05, "loss": 0.7264688014984131, "step": 170 }, { "epoch": 0.22189781021897811, "grad_norm": 0.6114361882209778, "learning_rate": 1.9970202395236913e-05, "loss": 0.7344344854354858, "step": 171 }, { "epoch": 0.22319545823195458, "grad_norm": 0.6653074622154236, "learning_rate": 1.996908913576304e-05, "loss": 0.7358161211013794, "step": 172 }, { "epoch": 0.22449310624493107, "grad_norm": 0.6639219522476196, "learning_rate": 1.9967955492025094e-05, "loss": 0.7851651906967163, "step": 173 }, { "epoch": 0.22579075425790754, "grad_norm": 0.5558881759643555, "learning_rate": 1.9966801466341107e-05, "loss": 0.7109513878822327, "step": 174 }, { "epoch": 0.22708840227088403, "grad_norm": 0.6213382482528687, "learning_rate": 1.9965627061070755e-05, "loss": 0.702171802520752, "step": 175 }, { "epoch": 0.2283860502838605, "grad_norm": 0.6152480840682983, "learning_rate": 1.996443227861541e-05, "loss": 0.8059327602386475, "step": 176 }, { "epoch": 0.229683698296837, "grad_norm": 1.3707772493362427, "learning_rate": 1.996321712141809e-05, "loss": 0.6749221682548523, "step": 177 }, { "epoch": 0.23098134630981346, "grad_norm": 0.6016313433647156, "learning_rate": 1.9961981591963494e-05, "loss": 0.7931903004646301, "step": 178 }, { "epoch": 0.23227899432278995, "grad_norm": 0.6266494393348694, "learning_rate": 1.9960725692777956e-05, "loss": 0.7843484878540039, "step": 179 }, { "epoch": 0.23357664233576642, "grad_norm": 0.6365560293197632, "learning_rate": 1.995944942642948e-05, "loss": 0.769256055355072, "step": 180 }, { "epoch": 0.2348742903487429, "grad_norm": 0.5864040851593018, "learning_rate": 1.9958152795527706e-05, "loss": 0.7252316474914551, "step": 181 }, { "epoch": 0.23617193836171937, "grad_norm": 0.6339318156242371, "learning_rate": 1.9956835802723916e-05, "loss": 0.8299843668937683, "step": 182 }, { "epoch": 0.23746958637469587, "grad_norm": 0.5974844098091125, "learning_rate": 1.9955498450711026e-05, "loss": 0.7282422184944153, "step": 183 }, { "epoch": 0.23876723438767233, "grad_norm": 0.5841022729873657, "learning_rate": 1.9954140742223586e-05, "loss": 0.7407736778259277, "step": 184 }, { "epoch": 0.24006488240064883, "grad_norm": 0.6066944599151611, "learning_rate": 1.9952762680037758e-05, "loss": 0.7745926380157471, "step": 185 }, { "epoch": 0.2413625304136253, "grad_norm": 0.5798110365867615, "learning_rate": 1.995136426697134e-05, "loss": 0.7561591863632202, "step": 186 }, { "epoch": 0.24266017842660179, "grad_norm": 0.5705812573432922, "learning_rate": 1.9949945505883723e-05, "loss": 0.7066362500190735, "step": 187 }, { "epoch": 0.24395782643957825, "grad_norm": 0.6322996020317078, "learning_rate": 1.994850639967592e-05, "loss": 0.8032187819480896, "step": 188 }, { "epoch": 0.24525547445255474, "grad_norm": 0.613441526889801, "learning_rate": 1.994704695129054e-05, "loss": 0.75013267993927, "step": 189 }, { "epoch": 0.24655312246553124, "grad_norm": 0.609327495098114, "learning_rate": 1.9945567163711788e-05, "loss": 0.7675092220306396, "step": 190 }, { "epoch": 0.2478507704785077, "grad_norm": 0.6119315028190613, "learning_rate": 1.9944067039965445e-05, "loss": 0.7201006412506104, "step": 191 }, { "epoch": 0.2491484184914842, "grad_norm": 0.5587560534477234, "learning_rate": 1.9942546583118894e-05, "loss": 0.7847742438316345, "step": 192 }, { "epoch": 0.25044606650446066, "grad_norm": 0.5934576988220215, "learning_rate": 1.994100579628108e-05, "loss": 0.74636310338974, "step": 193 }, { "epoch": 0.25174371451743716, "grad_norm": 0.5709709525108337, "learning_rate": 1.9939444682602522e-05, "loss": 0.6807436347007751, "step": 194 }, { "epoch": 0.25304136253041365, "grad_norm": 0.6085708737373352, "learning_rate": 1.9937863245275303e-05, "loss": 0.7877497673034668, "step": 195 }, { "epoch": 0.2543390105433901, "grad_norm": 0.5789342522621155, "learning_rate": 1.9936261487533066e-05, "loss": 0.7314412593841553, "step": 196 }, { "epoch": 0.2556366585563666, "grad_norm": 0.5808578133583069, "learning_rate": 1.993463941265099e-05, "loss": 0.7081149816513062, "step": 197 }, { "epoch": 0.2569343065693431, "grad_norm": 0.5988272428512573, "learning_rate": 1.993299702394582e-05, "loss": 0.718379020690918, "step": 198 }, { "epoch": 0.25823195458231957, "grad_norm": 0.6408476829528809, "learning_rate": 1.9931334324775817e-05, "loss": 0.8201683163642883, "step": 199 }, { "epoch": 0.259529602595296, "grad_norm": 0.582078218460083, "learning_rate": 1.9929651318540783e-05, "loss": 0.7401193380355835, "step": 200 }, { "epoch": 0.2608272506082725, "grad_norm": 0.607105553150177, "learning_rate": 1.9927948008682038e-05, "loss": 0.74293053150177, "step": 201 }, { "epoch": 0.262124898621249, "grad_norm": 0.5975603461265564, "learning_rate": 1.9926224398682424e-05, "loss": 0.779903769493103, "step": 202 }, { "epoch": 0.2634225466342255, "grad_norm": 0.5534036159515381, "learning_rate": 1.992448049206628e-05, "loss": 0.6884838342666626, "step": 203 }, { "epoch": 0.2647201946472019, "grad_norm": 0.610633909702301, "learning_rate": 1.9922716292399458e-05, "loss": 0.7174521684646606, "step": 204 }, { "epoch": 0.2660178426601784, "grad_norm": 0.5961881279945374, "learning_rate": 1.9920931803289302e-05, "loss": 0.7740389108657837, "step": 205 }, { "epoch": 0.2673154906731549, "grad_norm": 0.5700147747993469, "learning_rate": 1.9919127028384634e-05, "loss": 0.7351720333099365, "step": 206 }, { "epoch": 0.2686131386861314, "grad_norm": 0.6236000061035156, "learning_rate": 1.9917301971375767e-05, "loss": 0.8022093772888184, "step": 207 }, { "epoch": 0.26991078669910784, "grad_norm": 0.5870935320854187, "learning_rate": 1.991545663599448e-05, "loss": 0.7842336297035217, "step": 208 }, { "epoch": 0.27120843471208433, "grad_norm": 0.6193575263023376, "learning_rate": 1.9913591026014016e-05, "loss": 0.7481486797332764, "step": 209 }, { "epoch": 0.2725060827250608, "grad_norm": 0.6119521260261536, "learning_rate": 1.9911705145249076e-05, "loss": 0.7951152324676514, "step": 210 }, { "epoch": 0.2738037307380373, "grad_norm": 0.5536502599716187, "learning_rate": 1.9909798997555806e-05, "loss": 0.790625810623169, "step": 211 }, { "epoch": 0.2751013787510138, "grad_norm": 0.5879918336868286, "learning_rate": 1.99078725868318e-05, "loss": 0.7092885971069336, "step": 212 }, { "epoch": 0.27639902676399025, "grad_norm": 0.5877639055252075, "learning_rate": 1.9905925917016077e-05, "loss": 0.724690318107605, "step": 213 }, { "epoch": 0.27769667477696675, "grad_norm": 0.5909678339958191, "learning_rate": 1.9903958992089087e-05, "loss": 0.7642319202423096, "step": 214 }, { "epoch": 0.27899432278994324, "grad_norm": 0.5952388644218445, "learning_rate": 1.990197181607269e-05, "loss": 0.7681585550308228, "step": 215 }, { "epoch": 0.28029197080291973, "grad_norm": 0.5698040723800659, "learning_rate": 1.989996439303016e-05, "loss": 0.7373849153518677, "step": 216 }, { "epoch": 0.28158961881589617, "grad_norm": 0.5865874886512756, "learning_rate": 1.989793672706617e-05, "loss": 0.7335535287857056, "step": 217 }, { "epoch": 0.28288726682887266, "grad_norm": 0.6045393943786621, "learning_rate": 1.9895888822326783e-05, "loss": 0.7242499589920044, "step": 218 }, { "epoch": 0.28418491484184916, "grad_norm": 0.6004535555839539, "learning_rate": 1.9893820682999444e-05, "loss": 0.7604917287826538, "step": 219 }, { "epoch": 0.28548256285482565, "grad_norm": 1.119056224822998, "learning_rate": 1.9891732313312973e-05, "loss": 0.772226095199585, "step": 220 }, { "epoch": 0.2867802108678021, "grad_norm": 0.5902665853500366, "learning_rate": 1.9889623717537564e-05, "loss": 0.7658222317695618, "step": 221 }, { "epoch": 0.2880778588807786, "grad_norm": 0.6264858245849609, "learning_rate": 1.9887494899984757e-05, "loss": 0.7901877760887146, "step": 222 }, { "epoch": 0.2893755068937551, "grad_norm": 0.5469992756843567, "learning_rate": 1.9885345865007444e-05, "loss": 0.7618519067764282, "step": 223 }, { "epoch": 0.29067315490673157, "grad_norm": 0.5550391674041748, "learning_rate": 1.9883176616999863e-05, "loss": 0.788576602935791, "step": 224 }, { "epoch": 0.291970802919708, "grad_norm": 0.5628973245620728, "learning_rate": 1.9880987160397573e-05, "loss": 0.718231737613678, "step": 225 }, { "epoch": 0.2932684509326845, "grad_norm": 0.5723385214805603, "learning_rate": 1.987877749967746e-05, "loss": 0.698378324508667, "step": 226 }, { "epoch": 0.294566098945661, "grad_norm": 0.5784431099891663, "learning_rate": 1.987654763935772e-05, "loss": 0.7598991990089417, "step": 227 }, { "epoch": 0.2958637469586375, "grad_norm": 0.5549972653388977, "learning_rate": 1.9874297583997852e-05, "loss": 0.7384412288665771, "step": 228 }, { "epoch": 0.2971613949716139, "grad_norm": 0.5789146423339844, "learning_rate": 1.9872027338198652e-05, "loss": 0.7528890371322632, "step": 229 }, { "epoch": 0.2984590429845904, "grad_norm": 0.6021227240562439, "learning_rate": 1.98697369066022e-05, "loss": 0.805375337600708, "step": 230 }, { "epoch": 0.2984590429845904, "eval_loss": 0.7241292595863342, "eval_runtime": 73.217, "eval_samples_per_second": 70.913, "eval_steps_per_second": 8.864, "step": 230 }, { "epoch": 0.2997566909975669, "grad_norm": 0.6029407978057861, "learning_rate": 1.986742629389184e-05, "loss": 0.7631509900093079, "step": 231 }, { "epoch": 0.3010543390105434, "grad_norm": 0.5768916606903076, "learning_rate": 1.98650955047922e-05, "loss": 0.7468521595001221, "step": 232 }, { "epoch": 0.3023519870235199, "grad_norm": 0.550506055355072, "learning_rate": 1.9862744544069146e-05, "loss": 0.7611327767372131, "step": 233 }, { "epoch": 0.30364963503649633, "grad_norm": 0.5796909332275391, "learning_rate": 1.9860373416529804e-05, "loss": 0.7168669700622559, "step": 234 }, { "epoch": 0.30494728304947283, "grad_norm": 0.8639640808105469, "learning_rate": 1.9857982127022527e-05, "loss": 0.7404369115829468, "step": 235 }, { "epoch": 0.3062449310624493, "grad_norm": 0.5862186551094055, "learning_rate": 1.9855570680436896e-05, "loss": 0.7222490310668945, "step": 236 }, { "epoch": 0.3075425790754258, "grad_norm": 0.6011035442352295, "learning_rate": 1.9853139081703712e-05, "loss": 0.8068719506263733, "step": 237 }, { "epoch": 0.30884022708840225, "grad_norm": 0.5739139318466187, "learning_rate": 1.9850687335794974e-05, "loss": 0.7303578853607178, "step": 238 }, { "epoch": 0.31013787510137875, "grad_norm": 0.5833807587623596, "learning_rate": 1.9848215447723888e-05, "loss": 0.7608842849731445, "step": 239 }, { "epoch": 0.31143552311435524, "grad_norm": 0.5929459929466248, "learning_rate": 1.9845723422544834e-05, "loss": 0.8103141188621521, "step": 240 }, { "epoch": 0.31273317112733173, "grad_norm": 0.5728944540023804, "learning_rate": 1.9843211265353376e-05, "loss": 0.7196205854415894, "step": 241 }, { "epoch": 0.31403081914030817, "grad_norm": 0.5517752170562744, "learning_rate": 1.9840678981286237e-05, "loss": 0.6758772730827332, "step": 242 }, { "epoch": 0.31532846715328466, "grad_norm": 0.5443773865699768, "learning_rate": 1.98381265755213e-05, "loss": 0.6859534978866577, "step": 243 }, { "epoch": 0.31662611516626116, "grad_norm": 0.5687966346740723, "learning_rate": 1.9835554053277587e-05, "loss": 0.7471268177032471, "step": 244 }, { "epoch": 0.31792376317923765, "grad_norm": 0.5604870319366455, "learning_rate": 1.9832961419815253e-05, "loss": 0.6843122839927673, "step": 245 }, { "epoch": 0.3192214111922141, "grad_norm": 0.5563496351242065, "learning_rate": 1.983034868043558e-05, "loss": 0.7023979425430298, "step": 246 }, { "epoch": 0.3205190592051906, "grad_norm": 0.58856201171875, "learning_rate": 1.9827715840480962e-05, "loss": 0.826436460018158, "step": 247 }, { "epoch": 0.3218167072181671, "grad_norm": 0.5512715578079224, "learning_rate": 1.9825062905334883e-05, "loss": 0.702526867389679, "step": 248 }, { "epoch": 0.32311435523114357, "grad_norm": 0.541459858417511, "learning_rate": 1.9822389880421927e-05, "loss": 0.7273234128952026, "step": 249 }, { "epoch": 0.32441200324412, "grad_norm": 0.5705904364585876, "learning_rate": 1.9819696771207756e-05, "loss": 0.783245325088501, "step": 250 }, { "epoch": 0.3257096512570965, "grad_norm": 0.5666183829307556, "learning_rate": 1.981698358319909e-05, "loss": 0.7261844873428345, "step": 251 }, { "epoch": 0.327007299270073, "grad_norm": 0.5902214646339417, "learning_rate": 1.981425032194372e-05, "loss": 0.7943121194839478, "step": 252 }, { "epoch": 0.3283049472830495, "grad_norm": 0.6048629879951477, "learning_rate": 1.981149699303047e-05, "loss": 0.7712939381599426, "step": 253 }, { "epoch": 0.329602595296026, "grad_norm": 0.5914484858512878, "learning_rate": 1.9808723602089198e-05, "loss": 0.7921222448348999, "step": 254 }, { "epoch": 0.3309002433090024, "grad_norm": 0.5761268734931946, "learning_rate": 1.980593015479079e-05, "loss": 0.7280013561248779, "step": 255 }, { "epoch": 0.3321978913219789, "grad_norm": 0.5902722477912903, "learning_rate": 1.9803116656847136e-05, "loss": 0.8062602877616882, "step": 256 }, { "epoch": 0.3334955393349554, "grad_norm": 0.5620178580284119, "learning_rate": 1.9800283114011134e-05, "loss": 0.7278565168380737, "step": 257 }, { "epoch": 0.3347931873479319, "grad_norm": 0.5686838626861572, "learning_rate": 1.9797429532076652e-05, "loss": 0.7540629506111145, "step": 258 }, { "epoch": 0.33609083536090834, "grad_norm": 0.5724810361862183, "learning_rate": 1.9794555916878548e-05, "loss": 0.8088860511779785, "step": 259 }, { "epoch": 0.33738848337388483, "grad_norm": 0.5640983581542969, "learning_rate": 1.9791662274292638e-05, "loss": 0.7638871669769287, "step": 260 }, { "epoch": 0.3386861313868613, "grad_norm": 0.5784658193588257, "learning_rate": 1.978874861023569e-05, "loss": 0.7313830852508545, "step": 261 }, { "epoch": 0.3399837793998378, "grad_norm": 0.5539552569389343, "learning_rate": 1.9785814930665404e-05, "loss": 0.7729085683822632, "step": 262 }, { "epoch": 0.34128142741281425, "grad_norm": 0.561370849609375, "learning_rate": 1.9782861241580417e-05, "loss": 0.6871550679206848, "step": 263 }, { "epoch": 0.34257907542579075, "grad_norm": 0.5643728375434875, "learning_rate": 1.9779887549020273e-05, "loss": 0.7683601379394531, "step": 264 }, { "epoch": 0.34387672343876724, "grad_norm": 0.5431486964225769, "learning_rate": 1.9776893859065424e-05, "loss": 0.7228385210037231, "step": 265 }, { "epoch": 0.34517437145174373, "grad_norm": 0.5863342881202698, "learning_rate": 1.9773880177837202e-05, "loss": 0.7906335592269897, "step": 266 }, { "epoch": 0.34647201946472017, "grad_norm": 0.5614317655563354, "learning_rate": 1.9770846511497833e-05, "loss": 0.7299401164054871, "step": 267 }, { "epoch": 0.34776966747769666, "grad_norm": 0.5694175958633423, "learning_rate": 1.9767792866250386e-05, "loss": 0.7474102973937988, "step": 268 }, { "epoch": 0.34906731549067316, "grad_norm": 0.5707114934921265, "learning_rate": 1.97647192483388e-05, "loss": 0.7324154376983643, "step": 269 }, { "epoch": 0.35036496350364965, "grad_norm": 0.5364754796028137, "learning_rate": 1.976162566404784e-05, "loss": 0.6927608251571655, "step": 270 }, { "epoch": 0.3516626115166261, "grad_norm": 0.6064906120300293, "learning_rate": 1.9758512119703106e-05, "loss": 0.7652560472488403, "step": 271 }, { "epoch": 0.3529602595296026, "grad_norm": 0.5919526815414429, "learning_rate": 1.9755378621671006e-05, "loss": 0.7977138757705688, "step": 272 }, { "epoch": 0.3542579075425791, "grad_norm": 0.567382276058197, "learning_rate": 1.9752225176358757e-05, "loss": 0.7258316278457642, "step": 273 }, { "epoch": 0.35555555555555557, "grad_norm": 0.5770947337150574, "learning_rate": 1.974905179021435e-05, "loss": 0.7411879301071167, "step": 274 }, { "epoch": 0.35685320356853206, "grad_norm": 0.5705130100250244, "learning_rate": 1.9745858469726555e-05, "loss": 0.7439219951629639, "step": 275 }, { "epoch": 0.3581508515815085, "grad_norm": 0.5373214483261108, "learning_rate": 1.9742645221424905e-05, "loss": 0.6836246252059937, "step": 276 }, { "epoch": 0.359448499594485, "grad_norm": 0.596576988697052, "learning_rate": 1.9739412051879686e-05, "loss": 0.6741154789924622, "step": 277 }, { "epoch": 0.3607461476074615, "grad_norm": 0.5719678997993469, "learning_rate": 1.973615896770191e-05, "loss": 0.7447401881217957, "step": 278 }, { "epoch": 0.362043795620438, "grad_norm": 0.5882077813148499, "learning_rate": 1.97328859755433e-05, "loss": 0.7762616872787476, "step": 279 }, { "epoch": 0.3633414436334144, "grad_norm": 0.6879026293754578, "learning_rate": 1.972959308209631e-05, "loss": 0.7956463098526001, "step": 280 }, { "epoch": 0.3646390916463909, "grad_norm": 0.5789086222648621, "learning_rate": 1.9726280294094067e-05, "loss": 0.7541590929031372, "step": 281 }, { "epoch": 0.3659367396593674, "grad_norm": 0.5802841186523438, "learning_rate": 1.9722947618310384e-05, "loss": 0.7047423124313354, "step": 282 }, { "epoch": 0.3672343876723439, "grad_norm": 0.5507220029830933, "learning_rate": 1.9719595061559742e-05, "loss": 0.6714630722999573, "step": 283 }, { "epoch": 0.36853203568532034, "grad_norm": 0.5980960726737976, "learning_rate": 1.9716222630697266e-05, "loss": 0.7872920036315918, "step": 284 }, { "epoch": 0.36982968369829683, "grad_norm": 0.5855656266212463, "learning_rate": 1.971283033261873e-05, "loss": 0.7662516832351685, "step": 285 }, { "epoch": 0.3711273317112733, "grad_norm": 0.5851466655731201, "learning_rate": 1.9709418174260523e-05, "loss": 0.7596746683120728, "step": 286 }, { "epoch": 0.3724249797242498, "grad_norm": 0.5843831300735474, "learning_rate": 1.9705986162599642e-05, "loss": 0.7550405263900757, "step": 287 }, { "epoch": 0.37372262773722625, "grad_norm": 0.5846932530403137, "learning_rate": 1.9702534304653685e-05, "loss": 0.7254443764686584, "step": 288 }, { "epoch": 0.37502027575020275, "grad_norm": 0.6054766774177551, "learning_rate": 1.9699062607480827e-05, "loss": 0.7600511908531189, "step": 289 }, { "epoch": 0.37631792376317924, "grad_norm": 0.5703001618385315, "learning_rate": 1.969557107817981e-05, "loss": 0.7401167750358582, "step": 290 }, { "epoch": 0.37761557177615573, "grad_norm": 0.5855723023414612, "learning_rate": 1.9692059723889927e-05, "loss": 0.7476931214332581, "step": 291 }, { "epoch": 0.37891321978913217, "grad_norm": 0.5804258584976196, "learning_rate": 1.968852855179101e-05, "loss": 0.7656409740447998, "step": 292 }, { "epoch": 0.38021086780210867, "grad_norm": 0.5795084834098816, "learning_rate": 1.9684977569103415e-05, "loss": 0.7599056959152222, "step": 293 }, { "epoch": 0.38150851581508516, "grad_norm": 0.5684756636619568, "learning_rate": 1.9681406783087998e-05, "loss": 0.674816370010376, "step": 294 }, { "epoch": 0.38280616382806165, "grad_norm": 0.5463794469833374, "learning_rate": 1.9677816201046113e-05, "loss": 0.683580219745636, "step": 295 }, { "epoch": 0.38410381184103815, "grad_norm": 0.5722465515136719, "learning_rate": 1.9674205830319594e-05, "loss": 0.693361222743988, "step": 296 }, { "epoch": 0.3854014598540146, "grad_norm": 0.6253486275672913, "learning_rate": 1.9670575678290732e-05, "loss": 0.7917322516441345, "step": 297 }, { "epoch": 0.3866991078669911, "grad_norm": 0.5660127401351929, "learning_rate": 1.9666925752382275e-05, "loss": 0.7436933517456055, "step": 298 }, { "epoch": 0.38799675587996757, "grad_norm": 0.572499692440033, "learning_rate": 1.9663256060057395e-05, "loss": 0.6714681386947632, "step": 299 }, { "epoch": 0.38929440389294406, "grad_norm": 0.5779220461845398, "learning_rate": 1.9659566608819677e-05, "loss": 0.7252252697944641, "step": 300 }, { "epoch": 0.3905920519059205, "grad_norm": 0.5990428924560547, "learning_rate": 1.9655857406213124e-05, "loss": 0.7827754020690918, "step": 301 }, { "epoch": 0.391889699918897, "grad_norm": 0.5721242427825928, "learning_rate": 1.9652128459822113e-05, "loss": 0.7102577686309814, "step": 302 }, { "epoch": 0.3931873479318735, "grad_norm": 0.5870105028152466, "learning_rate": 1.9648379777271397e-05, "loss": 0.683538019657135, "step": 303 }, { "epoch": 0.39448499594485, "grad_norm": 0.5920274257659912, "learning_rate": 1.964461136622608e-05, "loss": 0.7541404366493225, "step": 304 }, { "epoch": 0.3957826439578264, "grad_norm": 0.5439295768737793, "learning_rate": 1.9640823234391614e-05, "loss": 0.675430417060852, "step": 305 }, { "epoch": 0.3970802919708029, "grad_norm": 0.6126630902290344, "learning_rate": 1.9637015389513765e-05, "loss": 0.7898478507995605, "step": 306 }, { "epoch": 0.3983779399837794, "grad_norm": 0.5664204359054565, "learning_rate": 1.963318783937861e-05, "loss": 0.6964154839515686, "step": 307 }, { "epoch": 0.3996755879967559, "grad_norm": 0.5839046239852905, "learning_rate": 1.962934059181253e-05, "loss": 0.7421650886535645, "step": 308 }, { "epoch": 0.40097323600973234, "grad_norm": 0.6044719815254211, "learning_rate": 1.962547365468216e-05, "loss": 0.7794229984283447, "step": 309 }, { "epoch": 0.40227088402270883, "grad_norm": 0.5989699363708496, "learning_rate": 1.962158703589442e-05, "loss": 0.6963369846343994, "step": 310 }, { "epoch": 0.4035685320356853, "grad_norm": 0.5891120433807373, "learning_rate": 1.9617680743396452e-05, "loss": 0.7737009525299072, "step": 311 }, { "epoch": 0.4048661800486618, "grad_norm": 0.5753238201141357, "learning_rate": 1.961375478517564e-05, "loss": 0.6912685632705688, "step": 312 }, { "epoch": 0.40616382806163825, "grad_norm": 0.6656221747398376, "learning_rate": 1.9609809169259573e-05, "loss": 0.7757899165153503, "step": 313 }, { "epoch": 0.40746147607461475, "grad_norm": 0.6444079875946045, "learning_rate": 1.960584390371604e-05, "loss": 0.7399554252624512, "step": 314 }, { "epoch": 0.40875912408759124, "grad_norm": 0.5455271601676941, "learning_rate": 1.9601858996653004e-05, "loss": 0.7261430025100708, "step": 315 }, { "epoch": 0.41005677210056773, "grad_norm": 0.5660345554351807, "learning_rate": 1.9597854456218588e-05, "loss": 0.7287646532058716, "step": 316 }, { "epoch": 0.41135442011354423, "grad_norm": 0.5909862518310547, "learning_rate": 1.9593830290601067e-05, "loss": 0.7831040620803833, "step": 317 }, { "epoch": 0.41265206812652067, "grad_norm": 0.5852524638175964, "learning_rate": 1.9589786508028842e-05, "loss": 0.7229428291320801, "step": 318 }, { "epoch": 0.41394971613949716, "grad_norm": 0.5916611552238464, "learning_rate": 1.9585723116770425e-05, "loss": 0.7438414692878723, "step": 319 }, { "epoch": 0.41524736415247365, "grad_norm": 0.5859969854354858, "learning_rate": 1.9581640125134415e-05, "loss": 0.7692857384681702, "step": 320 }, { "epoch": 0.41654501216545015, "grad_norm": 0.5748182535171509, "learning_rate": 1.9577537541469506e-05, "loss": 0.7208437919616699, "step": 321 }, { "epoch": 0.4178426601784266, "grad_norm": 0.5739149451255798, "learning_rate": 1.957341537416444e-05, "loss": 0.6877571940422058, "step": 322 }, { "epoch": 0.4191403081914031, "grad_norm": 0.6014899611473083, "learning_rate": 1.9569273631648005e-05, "loss": 0.7482254505157471, "step": 323 }, { "epoch": 0.42043795620437957, "grad_norm": 0.5997340679168701, "learning_rate": 1.9565112322389017e-05, "loss": 0.735174298286438, "step": 324 }, { "epoch": 0.42173560421735606, "grad_norm": 0.572567343711853, "learning_rate": 1.95609314548963e-05, "loss": 0.7159808874130249, "step": 325 }, { "epoch": 0.4230332522303325, "grad_norm": 0.5567170977592468, "learning_rate": 1.955673103771867e-05, "loss": 0.6460487842559814, "step": 326 }, { "epoch": 0.424330900243309, "grad_norm": 0.570945143699646, "learning_rate": 1.9552511079444914e-05, "loss": 0.780687689781189, "step": 327 }, { "epoch": 0.4256285482562855, "grad_norm": 0.5721143484115601, "learning_rate": 1.9548271588703783e-05, "loss": 0.7781848907470703, "step": 328 }, { "epoch": 0.426926196269262, "grad_norm": 0.5866307616233826, "learning_rate": 1.954401257416396e-05, "loss": 0.6634104251861572, "step": 329 }, { "epoch": 0.4282238442822384, "grad_norm": 0.575668215751648, "learning_rate": 1.9539734044534057e-05, "loss": 0.7831740379333496, "step": 330 }, { "epoch": 0.4295214922952149, "grad_norm": 0.5764342546463013, "learning_rate": 1.9535436008562576e-05, "loss": 0.7253679037094116, "step": 331 }, { "epoch": 0.4308191403081914, "grad_norm": 0.5597108006477356, "learning_rate": 1.9531118475037916e-05, "loss": 0.6709398627281189, "step": 332 }, { "epoch": 0.4321167883211679, "grad_norm": 0.595028817653656, "learning_rate": 1.9526781452788342e-05, "loss": 0.7365997433662415, "step": 333 }, { "epoch": 0.43341443633414434, "grad_norm": 0.5742825865745544, "learning_rate": 1.9522424950681964e-05, "loss": 0.7389061450958252, "step": 334 }, { "epoch": 0.43471208434712083, "grad_norm": 0.55686354637146, "learning_rate": 1.951804897762673e-05, "loss": 0.6932294964790344, "step": 335 }, { "epoch": 0.4360097323600973, "grad_norm": 0.6195898652076721, "learning_rate": 1.951365354257039e-05, "loss": 0.689919114112854, "step": 336 }, { "epoch": 0.4373073803730738, "grad_norm": 0.5357776284217834, "learning_rate": 1.9509238654500505e-05, "loss": 0.6890056133270264, "step": 337 }, { "epoch": 0.4386050283860503, "grad_norm": 0.563254177570343, "learning_rate": 1.95048043224444e-05, "loss": 0.7118027806282043, "step": 338 }, { "epoch": 0.43990267639902675, "grad_norm": 0.5649257302284241, "learning_rate": 1.9500350555469164e-05, "loss": 0.7314987182617188, "step": 339 }, { "epoch": 0.44120032441200324, "grad_norm": 0.5675091743469238, "learning_rate": 1.9495877362681613e-05, "loss": 0.6302130222320557, "step": 340 }, { "epoch": 0.44249797242497974, "grad_norm": 0.5489922761917114, "learning_rate": 1.9491384753228308e-05, "loss": 0.7357535362243652, "step": 341 }, { "epoch": 0.44379562043795623, "grad_norm": 0.5530965924263, "learning_rate": 1.948687273629549e-05, "loss": 0.6449010372161865, "step": 342 }, { "epoch": 0.44509326845093267, "grad_norm": 0.5747541189193726, "learning_rate": 1.9482341321109096e-05, "loss": 0.7252374887466431, "step": 343 }, { "epoch": 0.44639091646390916, "grad_norm": 0.5609497427940369, "learning_rate": 1.947779051693472e-05, "loss": 0.7096484899520874, "step": 344 }, { "epoch": 0.44768856447688565, "grad_norm": 0.5988261699676514, "learning_rate": 1.9473220333077604e-05, "loss": 0.7986630201339722, "step": 345 }, { "epoch": 0.44898621248986215, "grad_norm": 0.6313751935958862, "learning_rate": 1.946863077888262e-05, "loss": 0.8356250524520874, "step": 346 }, { "epoch": 0.4502838605028386, "grad_norm": 0.565196692943573, "learning_rate": 1.946402186373424e-05, "loss": 0.7527079582214355, "step": 347 }, { "epoch": 0.4515815085158151, "grad_norm": 0.5944785475730896, "learning_rate": 1.9459393597056536e-05, "loss": 0.6996445655822754, "step": 348 }, { "epoch": 0.45287915652879157, "grad_norm": 0.5384091734886169, "learning_rate": 1.9454745988313135e-05, "loss": 0.7005808353424072, "step": 349 }, { "epoch": 0.45417680454176806, "grad_norm": 0.5926419496536255, "learning_rate": 1.945007904700723e-05, "loss": 0.7360185384750366, "step": 350 }, { "epoch": 0.4554744525547445, "grad_norm": 0.5517107844352722, "learning_rate": 1.9445392782681523e-05, "loss": 0.6678152084350586, "step": 351 }, { "epoch": 0.456772100567721, "grad_norm": 0.5527735352516174, "learning_rate": 1.9440687204918245e-05, "loss": 0.719680666923523, "step": 352 }, { "epoch": 0.4580697485806975, "grad_norm": 0.5603200793266296, "learning_rate": 1.943596232333911e-05, "loss": 0.7023108005523682, "step": 353 }, { "epoch": 0.459367396593674, "grad_norm": 0.5883275866508484, "learning_rate": 1.9431218147605307e-05, "loss": 0.7870659232139587, "step": 354 }, { "epoch": 0.4606650446066504, "grad_norm": 0.5547419786453247, "learning_rate": 1.9426454687417474e-05, "loss": 0.693616509437561, "step": 355 }, { "epoch": 0.4619626926196269, "grad_norm": 0.5387628674507141, "learning_rate": 1.942167195251568e-05, "loss": 0.6275761127471924, "step": 356 }, { "epoch": 0.4632603406326034, "grad_norm": 0.5728762745857239, "learning_rate": 1.941686995267941e-05, "loss": 0.7649428844451904, "step": 357 }, { "epoch": 0.4645579886455799, "grad_norm": 0.5744031667709351, "learning_rate": 1.941204869772753e-05, "loss": 0.746831476688385, "step": 358 }, { "epoch": 0.4658556366585564, "grad_norm": 0.5453589558601379, "learning_rate": 1.9407208197518296e-05, "loss": 0.7251806259155273, "step": 359 }, { "epoch": 0.46715328467153283, "grad_norm": 0.5643113851547241, "learning_rate": 1.94023484619493e-05, "loss": 0.6882834434509277, "step": 360 }, { "epoch": 0.4684509326845093, "grad_norm": 0.5984339714050293, "learning_rate": 1.9397469500957478e-05, "loss": 0.7512071132659912, "step": 361 }, { "epoch": 0.4697485806974858, "grad_norm": 0.5487557649612427, "learning_rate": 1.939257132451906e-05, "loss": 0.7803584337234497, "step": 362 }, { "epoch": 0.4710462287104623, "grad_norm": 0.5798037648200989, "learning_rate": 1.9387653942649586e-05, "loss": 0.7196419835090637, "step": 363 }, { "epoch": 0.47234387672343875, "grad_norm": 0.5554172396659851, "learning_rate": 1.9382717365403854e-05, "loss": 0.7393349409103394, "step": 364 }, { "epoch": 0.47364152473641524, "grad_norm": 0.546137273311615, "learning_rate": 1.9377761602875913e-05, "loss": 0.7212686538696289, "step": 365 }, { "epoch": 0.47493917274939174, "grad_norm": 0.5687487125396729, "learning_rate": 1.937278666519905e-05, "loss": 0.7769354581832886, "step": 366 }, { "epoch": 0.47623682076236823, "grad_norm": 0.5400050282478333, "learning_rate": 1.9367792562545744e-05, "loss": 0.721081018447876, "step": 367 }, { "epoch": 0.47753446877534467, "grad_norm": 0.5545980930328369, "learning_rate": 1.9362779305127674e-05, "loss": 0.6797982454299927, "step": 368 }, { "epoch": 0.47883211678832116, "grad_norm": 0.5371907949447632, "learning_rate": 1.9357746903195686e-05, "loss": 0.7223237752914429, "step": 369 }, { "epoch": 0.48012976480129765, "grad_norm": 0.534491240978241, "learning_rate": 1.9352695367039764e-05, "loss": 0.7010591626167297, "step": 370 }, { "epoch": 0.48142741281427415, "grad_norm": 0.5431662797927856, "learning_rate": 1.9347624706989026e-05, "loss": 0.7298872470855713, "step": 371 }, { "epoch": 0.4827250608272506, "grad_norm": 0.5843503475189209, "learning_rate": 1.9342534933411683e-05, "loss": 0.7810012698173523, "step": 372 }, { "epoch": 0.4840227088402271, "grad_norm": 0.5278732776641846, "learning_rate": 1.9337426056715036e-05, "loss": 0.7204632759094238, "step": 373 }, { "epoch": 0.48532035685320357, "grad_norm": 0.5900875926017761, "learning_rate": 1.9332298087345447e-05, "loss": 0.7081923484802246, "step": 374 }, { "epoch": 0.48661800486618007, "grad_norm": 0.5549632906913757, "learning_rate": 1.932715103578831e-05, "loss": 0.7588300704956055, "step": 375 }, { "epoch": 0.4879156528791565, "grad_norm": 0.5351032018661499, "learning_rate": 1.9321984912568048e-05, "loss": 0.6380345821380615, "step": 376 }, { "epoch": 0.489213300892133, "grad_norm": 0.5553699135780334, "learning_rate": 1.9316799728248074e-05, "loss": 0.7115924954414368, "step": 377 }, { "epoch": 0.4905109489051095, "grad_norm": 0.5904532670974731, "learning_rate": 1.9311595493430776e-05, "loss": 0.7918650507926941, "step": 378 }, { "epoch": 0.491808596918086, "grad_norm": 0.5718861818313599, "learning_rate": 1.93063722187575e-05, "loss": 0.7574873566627502, "step": 379 }, { "epoch": 0.4931062449310625, "grad_norm": 0.5575288534164429, "learning_rate": 1.9301129914908516e-05, "loss": 0.7619529962539673, "step": 380 }, { "epoch": 0.4944038929440389, "grad_norm": 0.5972062945365906, "learning_rate": 1.9295868592603012e-05, "loss": 0.8739205598831177, "step": 381 }, { "epoch": 0.4957015409570154, "grad_norm": 0.5725207328796387, "learning_rate": 1.929058826259906e-05, "loss": 0.7461530566215515, "step": 382 }, { "epoch": 0.4969991889699919, "grad_norm": 0.7559300065040588, "learning_rate": 1.9285288935693597e-05, "loss": 0.7054376602172852, "step": 383 }, { "epoch": 0.4982968369829684, "grad_norm": 0.5533690452575684, "learning_rate": 1.9279970622722403e-05, "loss": 0.742769718170166, "step": 384 }, { "epoch": 0.49959448499594483, "grad_norm": 0.5702188014984131, "learning_rate": 1.927463333456009e-05, "loss": 0.7912020683288574, "step": 385 }, { "epoch": 0.5008921330089213, "grad_norm": 0.5261266231536865, "learning_rate": 1.9269277082120053e-05, "loss": 0.7539711594581604, "step": 386 }, { "epoch": 0.5021897810218978, "grad_norm": 0.5590584874153137, "learning_rate": 1.926390187635448e-05, "loss": 0.7646081447601318, "step": 387 }, { "epoch": 0.5034874290348743, "grad_norm": 0.5796819925308228, "learning_rate": 1.92585077282543e-05, "loss": 0.7352266907691956, "step": 388 }, { "epoch": 0.5047850770478508, "grad_norm": 0.5712133049964905, "learning_rate": 1.9253094648849183e-05, "loss": 0.7203606367111206, "step": 389 }, { "epoch": 0.5060827250608273, "grad_norm": 0.597654402256012, "learning_rate": 1.924766264920751e-05, "loss": 0.8121019601821899, "step": 390 }, { "epoch": 0.5073803730738037, "grad_norm": 0.5626549124717712, "learning_rate": 1.9242211740436335e-05, "loss": 0.7297658920288086, "step": 391 }, { "epoch": 0.5086780210867802, "grad_norm": 0.6014045476913452, "learning_rate": 1.9236741933681396e-05, "loss": 0.7325990200042725, "step": 392 }, { "epoch": 0.5099756690997567, "grad_norm": 0.5554893612861633, "learning_rate": 1.9231253240127062e-05, "loss": 0.680641770362854, "step": 393 }, { "epoch": 0.5112733171127332, "grad_norm": 0.5787703394889832, "learning_rate": 1.922574567099632e-05, "loss": 0.7252123355865479, "step": 394 }, { "epoch": 0.5125709651257097, "grad_norm": 0.5811824798583984, "learning_rate": 1.9220219237550757e-05, "loss": 0.7139418125152588, "step": 395 }, { "epoch": 0.5138686131386861, "grad_norm": 0.547007143497467, "learning_rate": 1.921467395109053e-05, "loss": 0.6985068917274475, "step": 396 }, { "epoch": 0.5151662611516626, "grad_norm": 0.6072813272476196, "learning_rate": 1.9209109822954345e-05, "loss": 0.7519763708114624, "step": 397 }, { "epoch": 0.5164639091646391, "grad_norm": 0.5965511798858643, "learning_rate": 1.9203526864519432e-05, "loss": 0.7568516135215759, "step": 398 }, { "epoch": 0.5177615571776155, "grad_norm": 0.5627179741859436, "learning_rate": 1.919792508720154e-05, "loss": 0.7021974921226501, "step": 399 }, { "epoch": 0.519059205190592, "grad_norm": 0.5491631627082825, "learning_rate": 1.9192304502454876e-05, "loss": 0.6992515325546265, "step": 400 }, { "epoch": 0.5203568532035685, "grad_norm": 0.5874002575874329, "learning_rate": 1.918666512177211e-05, "loss": 0.712739109992981, "step": 401 }, { "epoch": 0.521654501216545, "grad_norm": 0.5660138726234436, "learning_rate": 1.918100695668436e-05, "loss": 0.6854047775268555, "step": 402 }, { "epoch": 0.5229521492295215, "grad_norm": 0.565985381603241, "learning_rate": 1.917533001876113e-05, "loss": 0.7300174236297607, "step": 403 }, { "epoch": 0.524249797242498, "grad_norm": 0.5489518642425537, "learning_rate": 1.916963431961033e-05, "loss": 0.7667282819747925, "step": 404 }, { "epoch": 0.5255474452554745, "grad_norm": 0.569230318069458, "learning_rate": 1.916391987087822e-05, "loss": 0.7247310876846313, "step": 405 }, { "epoch": 0.526845093268451, "grad_norm": 0.5969386696815491, "learning_rate": 1.9158186684249397e-05, "loss": 0.7719178199768066, "step": 406 }, { "epoch": 0.5281427412814275, "grad_norm": 0.5550801157951355, "learning_rate": 1.9152434771446783e-05, "loss": 0.6853774785995483, "step": 407 }, { "epoch": 0.5294403892944038, "grad_norm": 0.5440778136253357, "learning_rate": 1.914666414423158e-05, "loss": 0.681282639503479, "step": 408 }, { "epoch": 0.5307380373073803, "grad_norm": 0.5368308424949646, "learning_rate": 1.914087481440326e-05, "loss": 0.7318757772445679, "step": 409 }, { "epoch": 0.5320356853203568, "grad_norm": 0.6122865676879883, "learning_rate": 1.9135066793799538e-05, "loss": 0.6974803805351257, "step": 410 }, { "epoch": 0.5333333333333333, "grad_norm": 0.5386953353881836, "learning_rate": 1.912924009429635e-05, "loss": 0.7397326827049255, "step": 411 }, { "epoch": 0.5346309813463098, "grad_norm": 0.5616509914398193, "learning_rate": 1.9123394727807816e-05, "loss": 0.7613886594772339, "step": 412 }, { "epoch": 0.5359286293592863, "grad_norm": 0.627604067325592, "learning_rate": 1.9117530706286232e-05, "loss": 0.7783684730529785, "step": 413 }, { "epoch": 0.5372262773722628, "grad_norm": 0.5613445043563843, "learning_rate": 1.9111648041722044e-05, "loss": 0.7296919226646423, "step": 414 }, { "epoch": 0.5385239253852393, "grad_norm": 0.5356356501579285, "learning_rate": 1.91057467461438e-05, "loss": 0.7119168639183044, "step": 415 }, { "epoch": 0.5398215733982157, "grad_norm": 0.5709317326545715, "learning_rate": 1.9099826831618168e-05, "loss": 0.6891450881958008, "step": 416 }, { "epoch": 0.5411192214111922, "grad_norm": 0.5525058507919312, "learning_rate": 1.909388831024987e-05, "loss": 0.7220831513404846, "step": 417 }, { "epoch": 0.5424168694241687, "grad_norm": 0.5916740894317627, "learning_rate": 1.908793119418168e-05, "loss": 0.7380563020706177, "step": 418 }, { "epoch": 0.5437145174371452, "grad_norm": 0.5553448796272278, "learning_rate": 1.9081955495594388e-05, "loss": 0.6854832172393799, "step": 419 }, { "epoch": 0.5450121654501217, "grad_norm": 0.550918459892273, "learning_rate": 1.9075961226706784e-05, "loss": 0.755254864692688, "step": 420 }, { "epoch": 0.5463098134630981, "grad_norm": 0.5704249739646912, "learning_rate": 1.906994839977564e-05, "loss": 0.762306272983551, "step": 421 }, { "epoch": 0.5476074614760746, "grad_norm": 0.5444906949996948, "learning_rate": 1.9063917027095664e-05, "loss": 0.7424022555351257, "step": 422 }, { "epoch": 0.5489051094890511, "grad_norm": 0.5842110514640808, "learning_rate": 1.905786712099948e-05, "loss": 0.7851117849349976, "step": 423 }, { "epoch": 0.5502027575020276, "grad_norm": 0.5527293086051941, "learning_rate": 1.9051798693857617e-05, "loss": 0.7389935255050659, "step": 424 }, { "epoch": 0.551500405515004, "grad_norm": 0.5890975594520569, "learning_rate": 1.904571175807848e-05, "loss": 0.7679333686828613, "step": 425 }, { "epoch": 0.5527980535279805, "grad_norm": 0.5342135429382324, "learning_rate": 1.9039606326108297e-05, "loss": 0.7123668193817139, "step": 426 }, { "epoch": 0.554095701540957, "grad_norm": 0.5628570914268494, "learning_rate": 1.903348241043114e-05, "loss": 0.7286348342895508, "step": 427 }, { "epoch": 0.5553933495539335, "grad_norm": 0.5398725867271423, "learning_rate": 1.902734002356887e-05, "loss": 0.7192749977111816, "step": 428 }, { "epoch": 0.55669099756691, "grad_norm": 0.5142056941986084, "learning_rate": 1.9021179178081107e-05, "loss": 0.6286910772323608, "step": 429 }, { "epoch": 0.5579886455798865, "grad_norm": 0.5470032095909119, "learning_rate": 1.9014999886565226e-05, "loss": 0.6505739092826843, "step": 430 }, { "epoch": 0.559286293592863, "grad_norm": 0.5600834488868713, "learning_rate": 1.9008802161656308e-05, "loss": 0.7014046907424927, "step": 431 }, { "epoch": 0.5605839416058395, "grad_norm": 0.5533670783042908, "learning_rate": 1.9002586016027136e-05, "loss": 0.7095932364463806, "step": 432 }, { "epoch": 0.5618815896188158, "grad_norm": 0.5443385243415833, "learning_rate": 1.8996351462388153e-05, "loss": 0.7492538094520569, "step": 433 }, { "epoch": 0.5631792376317923, "grad_norm": 0.5775622129440308, "learning_rate": 1.8990098513487447e-05, "loss": 0.7882871627807617, "step": 434 }, { "epoch": 0.5644768856447688, "grad_norm": 0.5645557045936584, "learning_rate": 1.898382718211071e-05, "loss": 0.6681729555130005, "step": 435 }, { "epoch": 0.5657745336577453, "grad_norm": 0.562117874622345, "learning_rate": 1.897753748108123e-05, "loss": 0.7754248380661011, "step": 436 }, { "epoch": 0.5670721816707218, "grad_norm": 0.5395199656486511, "learning_rate": 1.8971229423259855e-05, "loss": 0.6584359407424927, "step": 437 }, { "epoch": 0.5683698296836983, "grad_norm": 0.5511093735694885, "learning_rate": 1.8964903021544964e-05, "loss": 0.7121752500534058, "step": 438 }, { "epoch": 0.5696674776966748, "grad_norm": 0.5518468022346497, "learning_rate": 1.895855828887245e-05, "loss": 0.7533795237541199, "step": 439 }, { "epoch": 0.5709651257096513, "grad_norm": 0.541132926940918, "learning_rate": 1.895219523821568e-05, "loss": 0.6961894035339355, "step": 440 }, { "epoch": 0.5722627737226277, "grad_norm": 0.5566806197166443, "learning_rate": 1.894581388258549e-05, "loss": 0.7168055176734924, "step": 441 }, { "epoch": 0.5735604217356042, "grad_norm": 0.8438438773155212, "learning_rate": 1.8939414235030137e-05, "loss": 0.7322010397911072, "step": 442 }, { "epoch": 0.5748580697485807, "grad_norm": 0.5508759617805481, "learning_rate": 1.893299630863527e-05, "loss": 0.689163327217102, "step": 443 }, { "epoch": 0.5761557177615572, "grad_norm": 0.577190637588501, "learning_rate": 1.892656011652393e-05, "loss": 0.7421369552612305, "step": 444 }, { "epoch": 0.5774533657745337, "grad_norm": 0.5557067394256592, "learning_rate": 1.8920105671856507e-05, "loss": 0.6984370350837708, "step": 445 }, { "epoch": 0.5787510137875101, "grad_norm": 0.5880769491195679, "learning_rate": 1.89136329878307e-05, "loss": 0.6648968458175659, "step": 446 }, { "epoch": 0.5800486618004866, "grad_norm": 0.5225708484649658, "learning_rate": 1.890714207768151e-05, "loss": 0.6399903297424316, "step": 447 }, { "epoch": 0.5813463098134631, "grad_norm": 2.8270366191864014, "learning_rate": 1.8900632954681203e-05, "loss": 0.7426702380180359, "step": 448 }, { "epoch": 0.5826439578264396, "grad_norm": 0.5743777751922607, "learning_rate": 1.8894105632139296e-05, "loss": 0.7008408308029175, "step": 449 }, { "epoch": 0.583941605839416, "grad_norm": 0.5848923325538635, "learning_rate": 1.8887560123402505e-05, "loss": 0.7745944261550903, "step": 450 }, { "epoch": 0.5852392538523925, "grad_norm": 0.5533474087715149, "learning_rate": 1.888099644185474e-05, "loss": 0.7078051567077637, "step": 451 }, { "epoch": 0.586536901865369, "grad_norm": 0.5359990000724792, "learning_rate": 1.887441460091707e-05, "loss": 0.7025009393692017, "step": 452 }, { "epoch": 0.5878345498783455, "grad_norm": 0.5772839784622192, "learning_rate": 1.886781461404769e-05, "loss": 0.7109262347221375, "step": 453 }, { "epoch": 0.589132197891322, "grad_norm": 0.5491592288017273, "learning_rate": 1.886119649474191e-05, "loss": 0.6828133463859558, "step": 454 }, { "epoch": 0.5904298459042985, "grad_norm": 0.5495162606239319, "learning_rate": 1.8854560256532098e-05, "loss": 0.6600109338760376, "step": 455 }, { "epoch": 0.591727493917275, "grad_norm": 0.5773736238479614, "learning_rate": 1.8847905912987693e-05, "loss": 0.6746517419815063, "step": 456 }, { "epoch": 0.5930251419302515, "grad_norm": 0.5658586621284485, "learning_rate": 1.8841233477715136e-05, "loss": 0.6905688047409058, "step": 457 }, { "epoch": 0.5943227899432278, "grad_norm": 0.544463574886322, "learning_rate": 1.8834542964357875e-05, "loss": 0.7656948566436768, "step": 458 }, { "epoch": 0.5956204379562043, "grad_norm": 0.5466704964637756, "learning_rate": 1.8827834386596306e-05, "loss": 0.7320756912231445, "step": 459 }, { "epoch": 0.5969180859691808, "grad_norm": 0.534042477607727, "learning_rate": 1.882110775814778e-05, "loss": 0.6747853755950928, "step": 460 }, { "epoch": 0.5969180859691808, "eval_loss": 0.7028419375419617, "eval_runtime": 72.8032, "eval_samples_per_second": 71.316, "eval_steps_per_second": 8.914, "step": 460 }, { "epoch": 0.5982157339821573, "grad_norm": 0.5617560148239136, "learning_rate": 1.881436309276655e-05, "loss": 0.7175489068031311, "step": 461 }, { "epoch": 0.5995133819951338, "grad_norm": 0.538003146648407, "learning_rate": 1.8807600404243746e-05, "loss": 0.6772977709770203, "step": 462 }, { "epoch": 0.6008110300081103, "grad_norm": 0.5164902210235596, "learning_rate": 1.8800819706407355e-05, "loss": 0.7026697397232056, "step": 463 }, { "epoch": 0.6021086780210868, "grad_norm": 0.519985556602478, "learning_rate": 1.879402101312219e-05, "loss": 0.6459539532661438, "step": 464 }, { "epoch": 0.6034063260340633, "grad_norm": 0.5643022060394287, "learning_rate": 1.8787204338289858e-05, "loss": 0.7304619550704956, "step": 465 }, { "epoch": 0.6047039740470398, "grad_norm": 0.5315333604812622, "learning_rate": 1.8780369695848733e-05, "loss": 0.7055330872535706, "step": 466 }, { "epoch": 0.6060016220600162, "grad_norm": 0.5695874691009521, "learning_rate": 1.8773517099773927e-05, "loss": 0.7567015290260315, "step": 467 }, { "epoch": 0.6072992700729927, "grad_norm": 0.5361006259918213, "learning_rate": 1.8766646564077265e-05, "loss": 0.7254809141159058, "step": 468 }, { "epoch": 0.6085969180859692, "grad_norm": 0.5438353419303894, "learning_rate": 1.8759758102807253e-05, "loss": 0.6743266582489014, "step": 469 }, { "epoch": 0.6098945660989457, "grad_norm": 0.5824978351593018, "learning_rate": 1.8752851730049055e-05, "loss": 0.7623616456985474, "step": 470 }, { "epoch": 0.6111922141119221, "grad_norm": 0.546610951423645, "learning_rate": 1.8745927459924454e-05, "loss": 0.809882640838623, "step": 471 }, { "epoch": 0.6124898621248986, "grad_norm": 0.5459777116775513, "learning_rate": 1.8738985306591826e-05, "loss": 0.6817529201507568, "step": 472 }, { "epoch": 0.6137875101378751, "grad_norm": 0.5381180644035339, "learning_rate": 1.8732025284246122e-05, "loss": 0.7059892416000366, "step": 473 }, { "epoch": 0.6150851581508516, "grad_norm": 0.5245769023895264, "learning_rate": 1.8725047407118823e-05, "loss": 0.7031271457672119, "step": 474 }, { "epoch": 0.616382806163828, "grad_norm": 0.5284971594810486, "learning_rate": 1.8718051689477923e-05, "loss": 0.7379744052886963, "step": 475 }, { "epoch": 0.6176804541768045, "grad_norm": 0.5659690499305725, "learning_rate": 1.8711038145627893e-05, "loss": 0.7798171639442444, "step": 476 }, { "epoch": 0.618978102189781, "grad_norm": 0.5460679531097412, "learning_rate": 1.8704006789909654e-05, "loss": 0.7433549165725708, "step": 477 }, { "epoch": 0.6202757502027575, "grad_norm": 0.5171265602111816, "learning_rate": 1.8696957636700555e-05, "loss": 0.7264508008956909, "step": 478 }, { "epoch": 0.621573398215734, "grad_norm": 0.5979129672050476, "learning_rate": 1.868989070041432e-05, "loss": 0.7511105537414551, "step": 479 }, { "epoch": 0.6228710462287105, "grad_norm": 0.5520970225334167, "learning_rate": 1.8682805995501052e-05, "loss": 0.6946426630020142, "step": 480 }, { "epoch": 0.624168694241687, "grad_norm": 0.5510658025741577, "learning_rate": 1.8675703536447178e-05, "loss": 0.7265397310256958, "step": 481 }, { "epoch": 0.6254663422546635, "grad_norm": 0.5842864513397217, "learning_rate": 1.866858333777543e-05, "loss": 0.7219571471214294, "step": 482 }, { "epoch": 0.6267639902676398, "grad_norm": 0.5430331826210022, "learning_rate": 1.8661445414044813e-05, "loss": 0.7292179465293884, "step": 483 }, { "epoch": 0.6280616382806163, "grad_norm": 0.5456423759460449, "learning_rate": 1.865428977985057e-05, "loss": 0.7341865301132202, "step": 484 }, { "epoch": 0.6293592862935928, "grad_norm": 0.55687415599823, "learning_rate": 1.8647116449824165e-05, "loss": 0.7712036371231079, "step": 485 }, { "epoch": 0.6306569343065693, "grad_norm": 0.574967622756958, "learning_rate": 1.8639925438633243e-05, "loss": 0.7341934442520142, "step": 486 }, { "epoch": 0.6319545823195458, "grad_norm": 0.575878381729126, "learning_rate": 1.86327167609816e-05, "loss": 0.6782741546630859, "step": 487 }, { "epoch": 0.6332522303325223, "grad_norm": 0.5638167858123779, "learning_rate": 1.8625490431609154e-05, "loss": 0.8088809251785278, "step": 488 }, { "epoch": 0.6345498783454988, "grad_norm": 0.547574520111084, "learning_rate": 1.8618246465291925e-05, "loss": 0.7108902335166931, "step": 489 }, { "epoch": 0.6358475263584753, "grad_norm": 0.5785483121871948, "learning_rate": 1.861098487684199e-05, "loss": 0.6963984370231628, "step": 490 }, { "epoch": 0.6371451743714518, "grad_norm": 0.547226071357727, "learning_rate": 1.8603705681107456e-05, "loss": 0.6772190928459167, "step": 491 }, { "epoch": 0.6384428223844282, "grad_norm": 0.5494422912597656, "learning_rate": 1.8596408892972442e-05, "loss": 0.7243861556053162, "step": 492 }, { "epoch": 0.6397404703974047, "grad_norm": 0.5267540216445923, "learning_rate": 1.858909452735703e-05, "loss": 0.6649144887924194, "step": 493 }, { "epoch": 0.6410381184103812, "grad_norm": 0.5952751636505127, "learning_rate": 1.858176259921724e-05, "loss": 0.7574429512023926, "step": 494 }, { "epoch": 0.6423357664233577, "grad_norm": 0.5476658344268799, "learning_rate": 1.857441312354502e-05, "loss": 0.6968377828598022, "step": 495 }, { "epoch": 0.6436334144363342, "grad_norm": 0.5507075786590576, "learning_rate": 1.856704611536818e-05, "loss": 0.7353919744491577, "step": 496 }, { "epoch": 0.6449310624493106, "grad_norm": 0.5495625734329224, "learning_rate": 1.8559661589750387e-05, "loss": 0.7162117958068848, "step": 497 }, { "epoch": 0.6462287104622871, "grad_norm": 0.5721608996391296, "learning_rate": 1.8552259561791133e-05, "loss": 0.6986855268478394, "step": 498 }, { "epoch": 0.6475263584752636, "grad_norm": 0.5700922608375549, "learning_rate": 1.8544840046625686e-05, "loss": 0.8195285797119141, "step": 499 }, { "epoch": 0.64882400648824, "grad_norm": 0.5746553540229797, "learning_rate": 1.8537403059425082e-05, "loss": 0.7492556571960449, "step": 500 }, { "epoch": 0.6501216545012165, "grad_norm": 0.5598172545433044, "learning_rate": 1.852994861539607e-05, "loss": 0.6921173930168152, "step": 501 }, { "epoch": 0.651419302514193, "grad_norm": 0.5589975714683533, "learning_rate": 1.8522476729781106e-05, "loss": 0.7157631516456604, "step": 502 }, { "epoch": 0.6527169505271695, "grad_norm": 0.5745802521705627, "learning_rate": 1.8514987417858306e-05, "loss": 0.7679554224014282, "step": 503 }, { "epoch": 0.654014598540146, "grad_norm": 0.581063449382782, "learning_rate": 1.8507480694941416e-05, "loss": 0.7761994004249573, "step": 504 }, { "epoch": 0.6553122465531225, "grad_norm": 0.5932230353355408, "learning_rate": 1.849995657637978e-05, "loss": 0.748866081237793, "step": 505 }, { "epoch": 0.656609894566099, "grad_norm": 0.5524072647094727, "learning_rate": 1.8492415077558325e-05, "loss": 0.7764031887054443, "step": 506 }, { "epoch": 0.6579075425790755, "grad_norm": 0.5266931653022766, "learning_rate": 1.8484856213897496e-05, "loss": 0.7512728571891785, "step": 507 }, { "epoch": 0.659205190592052, "grad_norm": 0.5363677740097046, "learning_rate": 1.847728000085327e-05, "loss": 0.7477032542228699, "step": 508 }, { "epoch": 0.6605028386050283, "grad_norm": 0.5348376035690308, "learning_rate": 1.8469686453917074e-05, "loss": 0.6908712387084961, "step": 509 }, { "epoch": 0.6618004866180048, "grad_norm": 0.5489766597747803, "learning_rate": 1.846207558861579e-05, "loss": 0.7576340436935425, "step": 510 }, { "epoch": 0.6630981346309813, "grad_norm": 0.5426369309425354, "learning_rate": 1.845444742051172e-05, "loss": 0.7107582092285156, "step": 511 }, { "epoch": 0.6643957826439578, "grad_norm": 0.5308833718299866, "learning_rate": 1.8446801965202524e-05, "loss": 0.6590298414230347, "step": 512 }, { "epoch": 0.6656934306569343, "grad_norm": 0.5621533989906311, "learning_rate": 1.8439139238321235e-05, "loss": 0.7291080355644226, "step": 513 }, { "epoch": 0.6669910786699108, "grad_norm": 0.5651385188102722, "learning_rate": 1.8431459255536185e-05, "loss": 0.7855580449104309, "step": 514 }, { "epoch": 0.6682887266828873, "grad_norm": 0.5611156225204468, "learning_rate": 1.8423762032551e-05, "loss": 0.6918215751647949, "step": 515 }, { "epoch": 0.6695863746958638, "grad_norm": 0.5477362275123596, "learning_rate": 1.841604758510454e-05, "loss": 0.7025431394577026, "step": 516 }, { "epoch": 0.6708840227088402, "grad_norm": 0.5612704753875732, "learning_rate": 1.840831592897091e-05, "loss": 0.7540648579597473, "step": 517 }, { "epoch": 0.6721816707218167, "grad_norm": 0.5650063753128052, "learning_rate": 1.8400567079959383e-05, "loss": 0.7409968376159668, "step": 518 }, { "epoch": 0.6734793187347932, "grad_norm": 0.5648168921470642, "learning_rate": 1.8392801053914396e-05, "loss": 0.754462718963623, "step": 519 }, { "epoch": 0.6747769667477697, "grad_norm": 0.5603179931640625, "learning_rate": 1.8385017866715507e-05, "loss": 0.7388665080070496, "step": 520 }, { "epoch": 0.6760746147607462, "grad_norm": 0.5628640651702881, "learning_rate": 1.8377217534277365e-05, "loss": 0.7781612873077393, "step": 521 }, { "epoch": 0.6773722627737226, "grad_norm": 0.593789279460907, "learning_rate": 1.8369400072549674e-05, "loss": 0.753161609172821, "step": 522 }, { "epoch": 0.6786699107866991, "grad_norm": 0.5755636096000671, "learning_rate": 1.8361565497517166e-05, "loss": 0.7570379972457886, "step": 523 }, { "epoch": 0.6799675587996756, "grad_norm": 0.5607541799545288, "learning_rate": 1.835371382519956e-05, "loss": 0.777469277381897, "step": 524 }, { "epoch": 0.681265206812652, "grad_norm": 0.4994042217731476, "learning_rate": 1.8345845071651543e-05, "loss": 0.6544281840324402, "step": 525 }, { "epoch": 0.6825628548256285, "grad_norm": 0.5685398578643799, "learning_rate": 1.8337959252962728e-05, "loss": 0.7024877071380615, "step": 526 }, { "epoch": 0.683860502838605, "grad_norm": 0.5343568325042725, "learning_rate": 1.8330056385257607e-05, "loss": 0.7003896832466125, "step": 527 }, { "epoch": 0.6851581508515815, "grad_norm": 0.5208355188369751, "learning_rate": 1.8322136484695553e-05, "loss": 0.6797738075256348, "step": 528 }, { "epoch": 0.686455798864558, "grad_norm": 0.5621144771575928, "learning_rate": 1.8314199567470755e-05, "loss": 0.6609838008880615, "step": 529 }, { "epoch": 0.6877534468775345, "grad_norm": 0.577298104763031, "learning_rate": 1.83062456498122e-05, "loss": 0.711292028427124, "step": 530 }, { "epoch": 0.689051094890511, "grad_norm": 0.5840193629264832, "learning_rate": 1.8298274747983638e-05, "loss": 0.7950271368026733, "step": 531 }, { "epoch": 0.6903487429034875, "grad_norm": 0.5348870158195496, "learning_rate": 1.8290286878283542e-05, "loss": 0.6982176303863525, "step": 532 }, { "epoch": 0.691646390916464, "grad_norm": 0.5467864871025085, "learning_rate": 1.8282282057045087e-05, "loss": 0.7555949687957764, "step": 533 }, { "epoch": 0.6929440389294403, "grad_norm": 0.5581674575805664, "learning_rate": 1.827426030063611e-05, "loss": 0.6723984479904175, "step": 534 }, { "epoch": 0.6942416869424168, "grad_norm": 0.5615087151527405, "learning_rate": 1.8266221625459064e-05, "loss": 0.7201924324035645, "step": 535 }, { "epoch": 0.6955393349553933, "grad_norm": 0.5710893273353577, "learning_rate": 1.825816604795101e-05, "loss": 0.7096928358078003, "step": 536 }, { "epoch": 0.6968369829683698, "grad_norm": 0.5586241483688354, "learning_rate": 1.8250093584583567e-05, "loss": 0.7197962999343872, "step": 537 }, { "epoch": 0.6981346309813463, "grad_norm": 0.5536755323410034, "learning_rate": 1.8242004251862872e-05, "loss": 0.678354799747467, "step": 538 }, { "epoch": 0.6994322789943228, "grad_norm": 0.5744696855545044, "learning_rate": 1.823389806632957e-05, "loss": 0.7439010739326477, "step": 539 }, { "epoch": 0.7007299270072993, "grad_norm": 0.5338960886001587, "learning_rate": 1.8225775044558757e-05, "loss": 0.731925904750824, "step": 540 }, { "epoch": 0.7020275750202758, "grad_norm": 0.5696558356285095, "learning_rate": 1.8217635203159957e-05, "loss": 0.7480655312538147, "step": 541 }, { "epoch": 0.7033252230332522, "grad_norm": 0.5994415283203125, "learning_rate": 1.8209478558777084e-05, "loss": 0.776438295841217, "step": 542 }, { "epoch": 0.7046228710462287, "grad_norm": 0.578956127166748, "learning_rate": 1.8201305128088412e-05, "loss": 0.7190870046615601, "step": 543 }, { "epoch": 0.7059205190592052, "grad_norm": 0.557142972946167, "learning_rate": 1.819311492780654e-05, "loss": 0.7524915933609009, "step": 544 }, { "epoch": 0.7072181670721817, "grad_norm": 0.5244631171226501, "learning_rate": 1.8184907974678348e-05, "loss": 0.6941534876823425, "step": 545 }, { "epoch": 0.7085158150851582, "grad_norm": 0.5301777720451355, "learning_rate": 1.8176684285484985e-05, "loss": 0.7010957598686218, "step": 546 }, { "epoch": 0.7098134630981346, "grad_norm": 0.5309736728668213, "learning_rate": 1.816844387704181e-05, "loss": 0.6693360209465027, "step": 547 }, { "epoch": 0.7111111111111111, "grad_norm": 0.5188398361206055, "learning_rate": 1.8160186766198375e-05, "loss": 0.7254098057746887, "step": 548 }, { "epoch": 0.7124087591240876, "grad_norm": 0.5340986847877502, "learning_rate": 1.815191296983838e-05, "loss": 0.7227193713188171, "step": 549 }, { "epoch": 0.7137064071370641, "grad_norm": 0.5604742765426636, "learning_rate": 1.8143622504879647e-05, "loss": 0.6893896460533142, "step": 550 }, { "epoch": 0.7150040551500405, "grad_norm": 0.5265613794326782, "learning_rate": 1.8135315388274075e-05, "loss": 0.7178789377212524, "step": 551 }, { "epoch": 0.716301703163017, "grad_norm": 0.5819421410560608, "learning_rate": 1.8126991637007618e-05, "loss": 0.7809138298034668, "step": 552 }, { "epoch": 0.7175993511759935, "grad_norm": 0.5548515915870667, "learning_rate": 1.8118651268100235e-05, "loss": 0.7398655414581299, "step": 553 }, { "epoch": 0.71889699918897, "grad_norm": 0.5281164050102234, "learning_rate": 1.811029429860588e-05, "loss": 0.7255332469940186, "step": 554 }, { "epoch": 0.7201946472019465, "grad_norm": 0.51970374584198, "learning_rate": 1.810192074561243e-05, "loss": 0.6958039999008179, "step": 555 }, { "epoch": 0.721492295214923, "grad_norm": 0.5574509501457214, "learning_rate": 1.8093530626241684e-05, "loss": 0.77367103099823, "step": 556 }, { "epoch": 0.7227899432278995, "grad_norm": 0.5539534687995911, "learning_rate": 1.8085123957649315e-05, "loss": 0.7615116834640503, "step": 557 }, { "epoch": 0.724087591240876, "grad_norm": 0.549517035484314, "learning_rate": 1.8076700757024833e-05, "loss": 0.777897834777832, "step": 558 }, { "epoch": 0.7253852392538523, "grad_norm": 0.5480270981788635, "learning_rate": 1.8068261041591548e-05, "loss": 0.7139554619789124, "step": 559 }, { "epoch": 0.7266828872668288, "grad_norm": 0.5337988138198853, "learning_rate": 1.8059804828606545e-05, "loss": 0.7470839023590088, "step": 560 }, { "epoch": 0.7279805352798053, "grad_norm": 0.5055403709411621, "learning_rate": 1.8051332135360637e-05, "loss": 0.6575566530227661, "step": 561 }, { "epoch": 0.7292781832927818, "grad_norm": 0.5452354550361633, "learning_rate": 1.8042842979178338e-05, "loss": 0.7080937623977661, "step": 562 }, { "epoch": 0.7305758313057583, "grad_norm": 0.5276215672492981, "learning_rate": 1.8034337377417826e-05, "loss": 0.6609282493591309, "step": 563 }, { "epoch": 0.7318734793187348, "grad_norm": 0.5823485851287842, "learning_rate": 1.80258153474709e-05, "loss": 0.7274823784828186, "step": 564 }, { "epoch": 0.7331711273317113, "grad_norm": 0.5385794043540955, "learning_rate": 1.8017276906762955e-05, "loss": 0.6209210157394409, "step": 565 }, { "epoch": 0.7344687753446878, "grad_norm": 0.6051076054573059, "learning_rate": 1.8008722072752943e-05, "loss": 0.7948423624038696, "step": 566 }, { "epoch": 0.7357664233576642, "grad_norm": 0.8337801098823547, "learning_rate": 1.8000150862933335e-05, "loss": 0.7299556732177734, "step": 567 }, { "epoch": 0.7370640713706407, "grad_norm": 0.5429887771606445, "learning_rate": 1.7991563294830083e-05, "loss": 0.686081051826477, "step": 568 }, { "epoch": 0.7383617193836172, "grad_norm": 0.5419583916664124, "learning_rate": 1.7982959386002592e-05, "loss": 0.7415616512298584, "step": 569 }, { "epoch": 0.7396593673965937, "grad_norm": 0.5454174280166626, "learning_rate": 1.7974339154043677e-05, "loss": 0.7275187969207764, "step": 570 }, { "epoch": 0.7409570154095702, "grad_norm": 0.5611673593521118, "learning_rate": 1.796570261657953e-05, "loss": 0.7872575521469116, "step": 571 }, { "epoch": 0.7422546634225466, "grad_norm": 0.5598644018173218, "learning_rate": 1.7957049791269684e-05, "loss": 0.7327409982681274, "step": 572 }, { "epoch": 0.7435523114355231, "grad_norm": 0.558341920375824, "learning_rate": 1.7948380695806983e-05, "loss": 0.711640477180481, "step": 573 }, { "epoch": 0.7448499594484996, "grad_norm": 0.5189648270606995, "learning_rate": 1.793969534791752e-05, "loss": 0.6593164801597595, "step": 574 }, { "epoch": 0.7461476074614761, "grad_norm": 0.5739206671714783, "learning_rate": 1.7930993765360644e-05, "loss": 0.775146484375, "step": 575 }, { "epoch": 0.7474452554744525, "grad_norm": 0.5306016802787781, "learning_rate": 1.792227596592889e-05, "loss": 0.6946839094161987, "step": 576 }, { "epoch": 0.748742903487429, "grad_norm": 0.5487167835235596, "learning_rate": 1.791354196744794e-05, "loss": 0.7318082451820374, "step": 577 }, { "epoch": 0.7500405515004055, "grad_norm": 0.5554513931274414, "learning_rate": 1.790479178777662e-05, "loss": 0.727341890335083, "step": 578 }, { "epoch": 0.751338199513382, "grad_norm": 0.5512000918388367, "learning_rate": 1.7896025444806834e-05, "loss": 0.7673891186714172, "step": 579 }, { "epoch": 0.7526358475263585, "grad_norm": 0.5614628195762634, "learning_rate": 1.7887242956463528e-05, "loss": 0.7410103678703308, "step": 580 }, { "epoch": 0.753933495539335, "grad_norm": 0.5414284467697144, "learning_rate": 1.7878444340704666e-05, "loss": 0.7189674377441406, "step": 581 }, { "epoch": 0.7552311435523115, "grad_norm": 0.5145770311355591, "learning_rate": 1.78696296155212e-05, "loss": 0.6776304244995117, "step": 582 }, { "epoch": 0.756528791565288, "grad_norm": 0.5401176810264587, "learning_rate": 1.7860798798937e-05, "loss": 0.6960833072662354, "step": 583 }, { "epoch": 0.7578264395782643, "grad_norm": 0.5560998916625977, "learning_rate": 1.7851951909008864e-05, "loss": 0.6736742258071899, "step": 584 }, { "epoch": 0.7591240875912408, "grad_norm": 0.5505719780921936, "learning_rate": 1.7843088963826437e-05, "loss": 0.6757134795188904, "step": 585 }, { "epoch": 0.7604217356042173, "grad_norm": 0.5717475414276123, "learning_rate": 1.783420998151219e-05, "loss": 0.7612842321395874, "step": 586 }, { "epoch": 0.7617193836171938, "grad_norm": 0.5554843544960022, "learning_rate": 1.782531498022141e-05, "loss": 0.705300509929657, "step": 587 }, { "epoch": 0.7630170316301703, "grad_norm": 0.5320503115653992, "learning_rate": 1.781640397814211e-05, "loss": 0.7508092522621155, "step": 588 }, { "epoch": 0.7643146796431468, "grad_norm": 0.5554909706115723, "learning_rate": 1.7807476993495047e-05, "loss": 0.7732164859771729, "step": 589 }, { "epoch": 0.7656123276561233, "grad_norm": 0.5467298030853271, "learning_rate": 1.779853404453363e-05, "loss": 0.7246618270874023, "step": 590 }, { "epoch": 0.7669099756690998, "grad_norm": 0.5365788340568542, "learning_rate": 1.7789575149543936e-05, "loss": 0.6982936263084412, "step": 591 }, { "epoch": 0.7682076236820763, "grad_norm": 0.5504671931266785, "learning_rate": 1.7780600326844638e-05, "loss": 0.7263147830963135, "step": 592 }, { "epoch": 0.7695052716950527, "grad_norm": 0.549707293510437, "learning_rate": 1.7771609594786968e-05, "loss": 0.7235106229782104, "step": 593 }, { "epoch": 0.7708029197080292, "grad_norm": 0.5401800274848938, "learning_rate": 1.776260297175471e-05, "loss": 0.7632750272750854, "step": 594 }, { "epoch": 0.7721005677210057, "grad_norm": 0.5245280265808105, "learning_rate": 1.775358047616412e-05, "loss": 0.6609013080596924, "step": 595 }, { "epoch": 0.7733982157339822, "grad_norm": 0.5566380023956299, "learning_rate": 1.774454212646392e-05, "loss": 0.7397713661193848, "step": 596 }, { "epoch": 0.7746958637469586, "grad_norm": 0.5788303017616272, "learning_rate": 1.773548794113525e-05, "loss": 0.6708486676216125, "step": 597 }, { "epoch": 0.7759935117599351, "grad_norm": 0.5494595170021057, "learning_rate": 1.772641793869162e-05, "loss": 0.7761523723602295, "step": 598 }, { "epoch": 0.7772911597729116, "grad_norm": 0.5339208245277405, "learning_rate": 1.7717332137678895e-05, "loss": 0.6619516611099243, "step": 599 }, { "epoch": 0.7785888077858881, "grad_norm": 0.5362167358398438, "learning_rate": 1.770823055667524e-05, "loss": 0.7144718170166016, "step": 600 }, { "epoch": 0.7798864557988645, "grad_norm": 0.5141735076904297, "learning_rate": 1.7699113214291082e-05, "loss": 0.6293293237686157, "step": 601 }, { "epoch": 0.781184103811841, "grad_norm": 0.5582875609397888, "learning_rate": 1.768998012916908e-05, "loss": 0.7720483541488647, "step": 602 }, { "epoch": 0.7824817518248175, "grad_norm": 0.5367119312286377, "learning_rate": 1.7680831319984077e-05, "loss": 0.705078661441803, "step": 603 }, { "epoch": 0.783779399837794, "grad_norm": 0.5382807850837708, "learning_rate": 1.7671666805443076e-05, "loss": 0.7088773846626282, "step": 604 }, { "epoch": 0.7850770478507705, "grad_norm": 0.5625648498535156, "learning_rate": 1.766248660428519e-05, "loss": 0.7392460703849792, "step": 605 }, { "epoch": 0.786374695863747, "grad_norm": 0.5586503744125366, "learning_rate": 1.7653290735281605e-05, "loss": 0.7484114170074463, "step": 606 }, { "epoch": 0.7876723438767235, "grad_norm": 0.5572494864463806, "learning_rate": 1.7644079217235547e-05, "loss": 0.7409180402755737, "step": 607 }, { "epoch": 0.7889699918897, "grad_norm": 0.5369569659233093, "learning_rate": 1.763485206898224e-05, "loss": 0.6471737027168274, "step": 608 }, { "epoch": 0.7902676399026763, "grad_norm": 0.5504409074783325, "learning_rate": 1.762560930938886e-05, "loss": 0.7778940200805664, "step": 609 }, { "epoch": 0.7915652879156528, "grad_norm": 0.5358904600143433, "learning_rate": 1.7616350957354523e-05, "loss": 0.694309413433075, "step": 610 }, { "epoch": 0.7928629359286293, "grad_norm": 0.5360654592514038, "learning_rate": 1.7607077031810204e-05, "loss": 0.6945086717605591, "step": 611 }, { "epoch": 0.7941605839416058, "grad_norm": 0.535325825214386, "learning_rate": 1.759778755171874e-05, "loss": 0.7578423619270325, "step": 612 }, { "epoch": 0.7954582319545823, "grad_norm": 0.5466883182525635, "learning_rate": 1.758848253607476e-05, "loss": 0.7157893180847168, "step": 613 }, { "epoch": 0.7967558799675588, "grad_norm": 0.5534203052520752, "learning_rate": 1.7579162003904678e-05, "loss": 0.7312074303627014, "step": 614 }, { "epoch": 0.7980535279805353, "grad_norm": 0.5488491654396057, "learning_rate": 1.756982597426661e-05, "loss": 0.7318480014801025, "step": 615 }, { "epoch": 0.7993511759935118, "grad_norm": 0.5375532507896423, "learning_rate": 1.756047446625038e-05, "loss": 0.7143536806106567, "step": 616 }, { "epoch": 0.8006488240064883, "grad_norm": 0.5791228413581848, "learning_rate": 1.7551107498977458e-05, "loss": 0.642976701259613, "step": 617 }, { "epoch": 0.8019464720194647, "grad_norm": 0.5346726179122925, "learning_rate": 1.7541725091600918e-05, "loss": 0.687232255935669, "step": 618 }, { "epoch": 0.8032441200324412, "grad_norm": 0.5417895913124084, "learning_rate": 1.7532327263305405e-05, "loss": 0.7081488370895386, "step": 619 }, { "epoch": 0.8045417680454177, "grad_norm": 0.5509006381034851, "learning_rate": 1.75229140333071e-05, "loss": 0.7728561162948608, "step": 620 }, { "epoch": 0.8058394160583942, "grad_norm": 0.5634705424308777, "learning_rate": 1.7513485420853683e-05, "loss": 0.6951034069061279, "step": 621 }, { "epoch": 0.8071370640713706, "grad_norm": 0.5197573900222778, "learning_rate": 1.750404144522427e-05, "loss": 0.7106211185455322, "step": 622 }, { "epoch": 0.8084347120843471, "grad_norm": 0.5803437232971191, "learning_rate": 1.7494582125729408e-05, "loss": 0.7436937689781189, "step": 623 }, { "epoch": 0.8097323600973236, "grad_norm": 0.541920006275177, "learning_rate": 1.7485107481711014e-05, "loss": 0.6682834029197693, "step": 624 }, { "epoch": 0.8110300081103001, "grad_norm": 0.561758279800415, "learning_rate": 1.7475617532542325e-05, "loss": 0.6873137950897217, "step": 625 }, { "epoch": 0.8123276561232765, "grad_norm": 0.5416638255119324, "learning_rate": 1.7466112297627894e-05, "loss": 0.7167541980743408, "step": 626 }, { "epoch": 0.813625304136253, "grad_norm": 0.5338025093078613, "learning_rate": 1.7456591796403525e-05, "loss": 0.7321476340293884, "step": 627 }, { "epoch": 0.8149229521492295, "grad_norm": 0.5378256440162659, "learning_rate": 1.744705604833622e-05, "loss": 0.6663627624511719, "step": 628 }, { "epoch": 0.816220600162206, "grad_norm": 0.581386387348175, "learning_rate": 1.7437505072924177e-05, "loss": 0.755516767501831, "step": 629 }, { "epoch": 0.8175182481751825, "grad_norm": 0.581896185874939, "learning_rate": 1.742793888969673e-05, "loss": 0.7974879145622253, "step": 630 }, { "epoch": 0.818815896188159, "grad_norm": 0.521468460559845, "learning_rate": 1.741835751821429e-05, "loss": 0.7400495409965515, "step": 631 }, { "epoch": 0.8201135442011355, "grad_norm": 0.5232843160629272, "learning_rate": 1.7408760978068343e-05, "loss": 0.6786386966705322, "step": 632 }, { "epoch": 0.821411192214112, "grad_norm": 0.5813708901405334, "learning_rate": 1.739914928888139e-05, "loss": 0.7453535199165344, "step": 633 }, { "epoch": 0.8227088402270885, "grad_norm": 0.5424124002456665, "learning_rate": 1.7389522470306892e-05, "loss": 0.7520110607147217, "step": 634 }, { "epoch": 0.8240064882400648, "grad_norm": 0.5089052319526672, "learning_rate": 1.7379880542029263e-05, "loss": 0.7197295427322388, "step": 635 }, { "epoch": 0.8253041362530413, "grad_norm": 0.5367469191551208, "learning_rate": 1.7370223523763804e-05, "loss": 0.7498934864997864, "step": 636 }, { "epoch": 0.8266017842660178, "grad_norm": 0.5291455388069153, "learning_rate": 1.7360551435256673e-05, "loss": 0.7376183867454529, "step": 637 }, { "epoch": 0.8278994322789943, "grad_norm": 0.5446896553039551, "learning_rate": 1.7350864296284846e-05, "loss": 0.735445499420166, "step": 638 }, { "epoch": 0.8291970802919708, "grad_norm": 0.5124339461326599, "learning_rate": 1.7341162126656063e-05, "loss": 0.6861530542373657, "step": 639 }, { "epoch": 0.8304947283049473, "grad_norm": 0.5077775120735168, "learning_rate": 1.7331444946208815e-05, "loss": 0.688785195350647, "step": 640 }, { "epoch": 0.8317923763179238, "grad_norm": 0.5058798789978027, "learning_rate": 1.732171277481227e-05, "loss": 0.7133075594902039, "step": 641 }, { "epoch": 0.8330900243309003, "grad_norm": 0.5404756665229797, "learning_rate": 1.7311965632366254e-05, "loss": 0.7240495681762695, "step": 642 }, { "epoch": 0.8343876723438767, "grad_norm": 0.5313534736633301, "learning_rate": 1.7302203538801212e-05, "loss": 0.71756911277771, "step": 643 }, { "epoch": 0.8356853203568532, "grad_norm": 0.5360015630722046, "learning_rate": 1.729242651407815e-05, "loss": 0.7652734518051147, "step": 644 }, { "epoch": 0.8369829683698297, "grad_norm": 0.540046751499176, "learning_rate": 1.7282634578188612e-05, "loss": 0.7294871807098389, "step": 645 }, { "epoch": 0.8382806163828062, "grad_norm": 0.5653432607650757, "learning_rate": 1.7272827751154627e-05, "loss": 0.7391757965087891, "step": 646 }, { "epoch": 0.8395782643957826, "grad_norm": 0.5427312850952148, "learning_rate": 1.7263006053028674e-05, "loss": 0.6798534393310547, "step": 647 }, { "epoch": 0.8408759124087591, "grad_norm": 0.539861261844635, "learning_rate": 1.7253169503893637e-05, "loss": 0.7292792201042175, "step": 648 }, { "epoch": 0.8421735604217356, "grad_norm": 0.5300166010856628, "learning_rate": 1.7243318123862777e-05, "loss": 0.7026904821395874, "step": 649 }, { "epoch": 0.8434712084347121, "grad_norm": 0.5242528319358826, "learning_rate": 1.7233451933079663e-05, "loss": 0.6926451921463013, "step": 650 }, { "epoch": 0.8447688564476885, "grad_norm": 0.5352111458778381, "learning_rate": 1.7223570951718166e-05, "loss": 0.7006164789199829, "step": 651 }, { "epoch": 0.846066504460665, "grad_norm": 0.5747525095939636, "learning_rate": 1.7213675199982388e-05, "loss": 0.7685414552688599, "step": 652 }, { "epoch": 0.8473641524736415, "grad_norm": 0.5309545397758484, "learning_rate": 1.7203764698106636e-05, "loss": 0.7312856912612915, "step": 653 }, { "epoch": 0.848661800486618, "grad_norm": 0.5124905705451965, "learning_rate": 1.7193839466355383e-05, "loss": 0.6484863758087158, "step": 654 }, { "epoch": 0.8499594484995945, "grad_norm": 0.5323530435562134, "learning_rate": 1.7183899525023212e-05, "loss": 0.694681704044342, "step": 655 }, { "epoch": 0.851257096512571, "grad_norm": 0.5242999792098999, "learning_rate": 1.7173944894434783e-05, "loss": 0.6672481298446655, "step": 656 }, { "epoch": 0.8525547445255475, "grad_norm": 0.5519501566886902, "learning_rate": 1.7163975594944807e-05, "loss": 0.7557801604270935, "step": 657 }, { "epoch": 0.853852392538524, "grad_norm": 0.5345069169998169, "learning_rate": 1.715399164693797e-05, "loss": 0.7127410173416138, "step": 658 }, { "epoch": 0.8551500405515005, "grad_norm": 0.5087319016456604, "learning_rate": 1.7143993070828913e-05, "loss": 0.6801098585128784, "step": 659 }, { "epoch": 0.8564476885644768, "grad_norm": 0.546444833278656, "learning_rate": 1.713397988706221e-05, "loss": 0.7135753631591797, "step": 660 }, { "epoch": 0.8577453365774533, "grad_norm": 0.5438613891601562, "learning_rate": 1.7123952116112275e-05, "loss": 0.7199326753616333, "step": 661 }, { "epoch": 0.8590429845904298, "grad_norm": 0.5320620536804199, "learning_rate": 1.7113909778483364e-05, "loss": 0.7263282537460327, "step": 662 }, { "epoch": 0.8603406326034063, "grad_norm": 0.5496207475662231, "learning_rate": 1.7103852894709517e-05, "loss": 0.6767710447311401, "step": 663 }, { "epoch": 0.8616382806163828, "grad_norm": 0.5515886545181274, "learning_rate": 1.7093781485354517e-05, "loss": 0.666580319404602, "step": 664 }, { "epoch": 0.8629359286293593, "grad_norm": 0.5425974130630493, "learning_rate": 1.7083695571011842e-05, "loss": 0.7289122343063354, "step": 665 }, { "epoch": 0.8642335766423358, "grad_norm": 0.5263716578483582, "learning_rate": 1.707359517230464e-05, "loss": 0.6910987496376038, "step": 666 }, { "epoch": 0.8655312246553123, "grad_norm": 0.525571346282959, "learning_rate": 1.7063480309885668e-05, "loss": 0.6733009815216064, "step": 667 }, { "epoch": 0.8668288726682887, "grad_norm": 0.5529440641403198, "learning_rate": 1.7053351004437258e-05, "loss": 0.6993213295936584, "step": 668 }, { "epoch": 0.8681265206812652, "grad_norm": 0.5263779163360596, "learning_rate": 1.7043207276671276e-05, "loss": 0.7125247120857239, "step": 669 }, { "epoch": 0.8694241686942417, "grad_norm": 0.5178059935569763, "learning_rate": 1.7033049147329077e-05, "loss": 0.7389542460441589, "step": 670 }, { "epoch": 0.8707218167072182, "grad_norm": 0.5027527809143066, "learning_rate": 1.702287663718147e-05, "loss": 0.6378510594367981, "step": 671 }, { "epoch": 0.8720194647201946, "grad_norm": 0.5320873260498047, "learning_rate": 1.7012689767028656e-05, "loss": 0.6820501089096069, "step": 672 }, { "epoch": 0.8733171127331711, "grad_norm": 0.5544079542160034, "learning_rate": 1.700248855770021e-05, "loss": 0.7887839078903198, "step": 673 }, { "epoch": 0.8746147607461476, "grad_norm": 0.5328344702720642, "learning_rate": 1.6992273030055022e-05, "loss": 0.7038314938545227, "step": 674 }, { "epoch": 0.8759124087591241, "grad_norm": 0.5509505867958069, "learning_rate": 1.6982043204981264e-05, "loss": 0.7049298286437988, "step": 675 }, { "epoch": 0.8772100567721006, "grad_norm": 0.5168129205703735, "learning_rate": 1.6971799103396332e-05, "loss": 0.6959193348884583, "step": 676 }, { "epoch": 0.878507704785077, "grad_norm": 0.5376099944114685, "learning_rate": 1.696154074624683e-05, "loss": 0.7292076349258423, "step": 677 }, { "epoch": 0.8798053527980535, "grad_norm": 0.5142057538032532, "learning_rate": 1.6951268154508497e-05, "loss": 0.7193281650543213, "step": 678 }, { "epoch": 0.88110300081103, "grad_norm": 0.5402371287345886, "learning_rate": 1.6940981349186182e-05, "loss": 0.748397946357727, "step": 679 }, { "epoch": 0.8824006488240065, "grad_norm": 0.5436865091323853, "learning_rate": 1.69306803513138e-05, "loss": 0.7238379716873169, "step": 680 }, { "epoch": 0.883698296836983, "grad_norm": 0.5323321223258972, "learning_rate": 1.6920365181954284e-05, "loss": 0.7368711829185486, "step": 681 }, { "epoch": 0.8849959448499595, "grad_norm": 0.5474384427070618, "learning_rate": 1.6910035862199545e-05, "loss": 0.7030202746391296, "step": 682 }, { "epoch": 0.886293592862936, "grad_norm": 0.5428197979927063, "learning_rate": 1.6899692413170422e-05, "loss": 0.713437557220459, "step": 683 }, { "epoch": 0.8875912408759125, "grad_norm": 0.5502634048461914, "learning_rate": 1.688933485601666e-05, "loss": 0.7090182304382324, "step": 684 }, { "epoch": 0.8888888888888888, "grad_norm": 0.5356465578079224, "learning_rate": 1.6878963211916833e-05, "loss": 0.7201128005981445, "step": 685 }, { "epoch": 0.8901865369018653, "grad_norm": 0.5563944578170776, "learning_rate": 1.6868577502078336e-05, "loss": 0.7264722585678101, "step": 686 }, { "epoch": 0.8914841849148418, "grad_norm": 0.5522723197937012, "learning_rate": 1.6858177747737312e-05, "loss": 0.7600725889205933, "step": 687 }, { "epoch": 0.8927818329278183, "grad_norm": 0.49715539813041687, "learning_rate": 1.684776397015863e-05, "loss": 0.6456987857818604, "step": 688 }, { "epoch": 0.8940794809407948, "grad_norm": 0.5162433981895447, "learning_rate": 1.6837336190635824e-05, "loss": 0.6648015379905701, "step": 689 }, { "epoch": 0.8953771289537713, "grad_norm": 0.5113485455513, "learning_rate": 1.682689443049107e-05, "loss": 0.7002501487731934, "step": 690 }, { "epoch": 0.8953771289537713, "eval_loss": 0.6879991292953491, "eval_runtime": 72.6036, "eval_samples_per_second": 71.512, "eval_steps_per_second": 8.939, "step": 690 }, { "epoch": 0.8966747769667478, "grad_norm": 0.5129652619361877, "learning_rate": 1.6816438711075114e-05, "loss": 0.7118932008743286, "step": 691 }, { "epoch": 0.8979724249797243, "grad_norm": 0.5204065442085266, "learning_rate": 1.680596905376727e-05, "loss": 0.7194908857345581, "step": 692 }, { "epoch": 0.8992700729927007, "grad_norm": 0.5264798402786255, "learning_rate": 1.6795485479975327e-05, "loss": 0.6868776082992554, "step": 693 }, { "epoch": 0.9005677210056772, "grad_norm": 0.5244487524032593, "learning_rate": 1.6784988011135546e-05, "loss": 0.7106890678405762, "step": 694 }, { "epoch": 0.9018653690186537, "grad_norm": 0.5397396683692932, "learning_rate": 1.6774476668712587e-05, "loss": 0.695647656917572, "step": 695 }, { "epoch": 0.9031630170316302, "grad_norm": 0.5147722959518433, "learning_rate": 1.676395147419949e-05, "loss": 0.7283300161361694, "step": 696 }, { "epoch": 0.9044606650446066, "grad_norm": 0.5326966047286987, "learning_rate": 1.6753412449117615e-05, "loss": 0.7349389791488647, "step": 697 }, { "epoch": 0.9057583130575831, "grad_norm": 0.522964596748352, "learning_rate": 1.67428596150166e-05, "loss": 0.7657152414321899, "step": 698 }, { "epoch": 0.9070559610705596, "grad_norm": 0.5306779742240906, "learning_rate": 1.6732292993474316e-05, "loss": 0.6991469264030457, "step": 699 }, { "epoch": 0.9083536090835361, "grad_norm": 0.517011284828186, "learning_rate": 1.6721712606096833e-05, "loss": 0.6861897706985474, "step": 700 }, { "epoch": 0.9096512570965126, "grad_norm": 0.5209232568740845, "learning_rate": 1.6711118474518363e-05, "loss": 0.6535213589668274, "step": 701 }, { "epoch": 0.910948905109489, "grad_norm": 0.538005530834198, "learning_rate": 1.6700510620401223e-05, "loss": 0.6827917695045471, "step": 702 }, { "epoch": 0.9122465531224655, "grad_norm": 0.5532050132751465, "learning_rate": 1.6689889065435796e-05, "loss": 0.7328672409057617, "step": 703 }, { "epoch": 0.913544201135442, "grad_norm": 0.5541777014732361, "learning_rate": 1.667925383134047e-05, "loss": 0.639081597328186, "step": 704 }, { "epoch": 0.9148418491484185, "grad_norm": 0.5441383719444275, "learning_rate": 1.66686049398616e-05, "loss": 0.7073994874954224, "step": 705 }, { "epoch": 0.916139497161395, "grad_norm": 0.5432547330856323, "learning_rate": 1.6657942412773484e-05, "loss": 0.7249147295951843, "step": 706 }, { "epoch": 0.9174371451743715, "grad_norm": 0.5718936324119568, "learning_rate": 1.664726627187829e-05, "loss": 0.7475080490112305, "step": 707 }, { "epoch": 0.918734793187348, "grad_norm": 0.5303789377212524, "learning_rate": 1.6636576539006015e-05, "loss": 0.7102556228637695, "step": 708 }, { "epoch": 0.9200324412003245, "grad_norm": 0.5120844841003418, "learning_rate": 1.6625873236014464e-05, "loss": 0.7160992622375488, "step": 709 }, { "epoch": 0.9213300892133008, "grad_norm": 0.5382957458496094, "learning_rate": 1.6615156384789185e-05, "loss": 0.6958597898483276, "step": 710 }, { "epoch": 0.9226277372262773, "grad_norm": 0.5195145606994629, "learning_rate": 1.660442600724342e-05, "loss": 0.6958160400390625, "step": 711 }, { "epoch": 0.9239253852392538, "grad_norm": 0.5473058223724365, "learning_rate": 1.659368212531808e-05, "loss": 0.7220757007598877, "step": 712 }, { "epoch": 0.9252230332522303, "grad_norm": 0.5131781697273254, "learning_rate": 1.6582924760981683e-05, "loss": 0.7035195827484131, "step": 713 }, { "epoch": 0.9265206812652068, "grad_norm": 0.5314381122589111, "learning_rate": 1.6572153936230316e-05, "loss": 0.6506175994873047, "step": 714 }, { "epoch": 0.9278183292781833, "grad_norm": 0.565310001373291, "learning_rate": 1.6561369673087588e-05, "loss": 0.7714331746101379, "step": 715 }, { "epoch": 0.9291159772911598, "grad_norm": 0.530504584312439, "learning_rate": 1.6550571993604587e-05, "loss": 0.7331136465072632, "step": 716 }, { "epoch": 0.9304136253041363, "grad_norm": 0.5755041837692261, "learning_rate": 1.6539760919859838e-05, "loss": 0.7090123891830444, "step": 717 }, { "epoch": 0.9317112733171128, "grad_norm": 0.5264776349067688, "learning_rate": 1.6528936473959253e-05, "loss": 0.7207454442977905, "step": 718 }, { "epoch": 0.9330089213300892, "grad_norm": 0.5459887981414795, "learning_rate": 1.6518098678036073e-05, "loss": 0.7477676272392273, "step": 719 }, { "epoch": 0.9343065693430657, "grad_norm": 0.5480107069015503, "learning_rate": 1.650724755425086e-05, "loss": 0.7585529685020447, "step": 720 }, { "epoch": 0.9356042173560422, "grad_norm": 0.5156884789466858, "learning_rate": 1.6496383124791406e-05, "loss": 0.684555172920227, "step": 721 }, { "epoch": 0.9369018653690186, "grad_norm": 0.5162327289581299, "learning_rate": 1.6485505411872725e-05, "loss": 0.7163575887680054, "step": 722 }, { "epoch": 0.9381995133819951, "grad_norm": 0.5424114465713501, "learning_rate": 1.6474614437736986e-05, "loss": 0.722049355506897, "step": 723 }, { "epoch": 0.9394971613949716, "grad_norm": 0.546845555305481, "learning_rate": 1.6463710224653477e-05, "loss": 0.7012547850608826, "step": 724 }, { "epoch": 0.9407948094079481, "grad_norm": 0.5183011889457703, "learning_rate": 1.6452792794918545e-05, "loss": 0.7152835130691528, "step": 725 }, { "epoch": 0.9420924574209246, "grad_norm": 0.5085439682006836, "learning_rate": 1.644186217085558e-05, "loss": 0.7061685919761658, "step": 726 }, { "epoch": 0.943390105433901, "grad_norm": 0.5237677097320557, "learning_rate": 1.6430918374814937e-05, "loss": 0.7506479024887085, "step": 727 }, { "epoch": 0.9446877534468775, "grad_norm": 0.5498985052108765, "learning_rate": 1.641996142917391e-05, "loss": 0.7604420185089111, "step": 728 }, { "epoch": 0.945985401459854, "grad_norm": 0.506365180015564, "learning_rate": 1.640899135633668e-05, "loss": 0.7282454967498779, "step": 729 }, { "epoch": 0.9472830494728305, "grad_norm": 0.5272793769836426, "learning_rate": 1.6398008178734272e-05, "loss": 0.7712985277175903, "step": 730 }, { "epoch": 0.948580697485807, "grad_norm": 0.49885818362236023, "learning_rate": 1.6387011918824493e-05, "loss": 0.6967482566833496, "step": 731 }, { "epoch": 0.9498783454987835, "grad_norm": 0.5086526274681091, "learning_rate": 1.6376002599091925e-05, "loss": 0.7118892073631287, "step": 732 }, { "epoch": 0.95117599351176, "grad_norm": 0.5380651354789734, "learning_rate": 1.6364980242047835e-05, "loss": 0.7118611335754395, "step": 733 }, { "epoch": 0.9524736415247365, "grad_norm": 0.5358894467353821, "learning_rate": 1.635394487023015e-05, "loss": 0.73922199010849, "step": 734 }, { "epoch": 0.9537712895377128, "grad_norm": 0.518375813961029, "learning_rate": 1.634289650620342e-05, "loss": 0.7491021156311035, "step": 735 }, { "epoch": 0.9550689375506893, "grad_norm": 0.5029126405715942, "learning_rate": 1.633183517255875e-05, "loss": 0.6724518537521362, "step": 736 }, { "epoch": 0.9563665855636658, "grad_norm": 0.5309873819351196, "learning_rate": 1.632076089191376e-05, "loss": 0.7152642011642456, "step": 737 }, { "epoch": 0.9576642335766423, "grad_norm": 0.5265018343925476, "learning_rate": 1.630967368691256e-05, "loss": 0.7223344445228577, "step": 738 }, { "epoch": 0.9589618815896188, "grad_norm": 0.5360968112945557, "learning_rate": 1.6298573580225676e-05, "loss": 0.6773437261581421, "step": 739 }, { "epoch": 0.9602595296025953, "grad_norm": 0.532696545124054, "learning_rate": 1.6287460594550017e-05, "loss": 0.6913273930549622, "step": 740 }, { "epoch": 0.9615571776155718, "grad_norm": 0.5159463286399841, "learning_rate": 1.6276334752608823e-05, "loss": 0.7023458480834961, "step": 741 }, { "epoch": 0.9628548256285483, "grad_norm": 0.5166627764701843, "learning_rate": 1.6265196077151627e-05, "loss": 0.6580889821052551, "step": 742 }, { "epoch": 0.9641524736415248, "grad_norm": 0.5432324409484863, "learning_rate": 1.62540445909542e-05, "loss": 0.7707301378250122, "step": 743 }, { "epoch": 0.9654501216545012, "grad_norm": 0.5537624955177307, "learning_rate": 1.624288031681851e-05, "loss": 0.718231737613678, "step": 744 }, { "epoch": 0.9667477696674777, "grad_norm": 0.5601441860198975, "learning_rate": 1.623170327757267e-05, "loss": 0.7587568759918213, "step": 745 }, { "epoch": 0.9680454176804542, "grad_norm": 0.5228809118270874, "learning_rate": 1.62205134960709e-05, "loss": 0.7063294649124146, "step": 746 }, { "epoch": 0.9693430656934306, "grad_norm": 0.5264230370521545, "learning_rate": 1.620931099519347e-05, "loss": 0.7381964921951294, "step": 747 }, { "epoch": 0.9706407137064071, "grad_norm": 0.5306467413902283, "learning_rate": 1.619809579784665e-05, "loss": 0.6895403861999512, "step": 748 }, { "epoch": 0.9719383617193836, "grad_norm": 0.5162505507469177, "learning_rate": 1.6186867926962695e-05, "loss": 0.7042033672332764, "step": 749 }, { "epoch": 0.9732360097323601, "grad_norm": 0.51023268699646, "learning_rate": 1.6175627405499746e-05, "loss": 0.7028312683105469, "step": 750 }, { "epoch": 0.9745336577453366, "grad_norm": 0.5226272344589233, "learning_rate": 1.6164374256441837e-05, "loss": 0.7110305428504944, "step": 751 }, { "epoch": 0.975831305758313, "grad_norm": 0.5189753174781799, "learning_rate": 1.6153108502798796e-05, "loss": 0.7227635979652405, "step": 752 }, { "epoch": 0.9771289537712895, "grad_norm": 0.5253064036369324, "learning_rate": 1.614183016760625e-05, "loss": 0.708706259727478, "step": 753 }, { "epoch": 0.978426601784266, "grad_norm": 0.5069226622581482, "learning_rate": 1.613053927392553e-05, "loss": 0.7607108354568481, "step": 754 }, { "epoch": 0.9797242497972425, "grad_norm": 0.5430122017860413, "learning_rate": 1.6119235844843664e-05, "loss": 0.6882092356681824, "step": 755 }, { "epoch": 0.981021897810219, "grad_norm": 0.5484969615936279, "learning_rate": 1.6107919903473294e-05, "loss": 0.6984055638313293, "step": 756 }, { "epoch": 0.9823195458231955, "grad_norm": 0.5450364351272583, "learning_rate": 1.6096591472952664e-05, "loss": 0.7414028644561768, "step": 757 }, { "epoch": 0.983617193836172, "grad_norm": 0.5095598101615906, "learning_rate": 1.6085250576445548e-05, "loss": 0.6796683073043823, "step": 758 }, { "epoch": 0.9849148418491485, "grad_norm": 0.5161803364753723, "learning_rate": 1.6073897237141203e-05, "loss": 0.6673390865325928, "step": 759 }, { "epoch": 0.986212489862125, "grad_norm": 0.5004435777664185, "learning_rate": 1.6062531478254333e-05, "loss": 0.6315610408782959, "step": 760 }, { "epoch": 0.9875101378751013, "grad_norm": 0.5166559219360352, "learning_rate": 1.605115332302505e-05, "loss": 0.6672409176826477, "step": 761 }, { "epoch": 0.9888077858880778, "grad_norm": 0.5332128405570984, "learning_rate": 1.603976279471879e-05, "loss": 0.7169513702392578, "step": 762 }, { "epoch": 0.9901054339010543, "grad_norm": 0.5556347370147705, "learning_rate": 1.6028359916626308e-05, "loss": 0.708602786064148, "step": 763 }, { "epoch": 0.9914030819140308, "grad_norm": 0.5154053568840027, "learning_rate": 1.601694471206359e-05, "loss": 0.6270056366920471, "step": 764 }, { "epoch": 0.9927007299270073, "grad_norm": 0.5185645222663879, "learning_rate": 1.600551720437186e-05, "loss": 0.6873992085456848, "step": 765 }, { "epoch": 0.9939983779399838, "grad_norm": 0.546991229057312, "learning_rate": 1.599407741691746e-05, "loss": 0.7366882562637329, "step": 766 }, { "epoch": 0.9952960259529603, "grad_norm": 0.5219473838806152, "learning_rate": 1.5982625373091877e-05, "loss": 0.6808854937553406, "step": 767 }, { "epoch": 0.9965936739659368, "grad_norm": 0.5348212122917175, "learning_rate": 1.5971161096311628e-05, "loss": 0.7217116355895996, "step": 768 }, { "epoch": 0.9978913219789132, "grad_norm": 0.5152093172073364, "learning_rate": 1.5959684610018267e-05, "loss": 0.6545735597610474, "step": 769 }, { "epoch": 0.9991889699918897, "grad_norm": 0.5182209610939026, "learning_rate": 1.5948195937678297e-05, "loss": 0.6775786280632019, "step": 770 }, { "epoch": 1.0, "grad_norm": 0.634954571723938, "learning_rate": 1.5936695102783148e-05, "loss": 0.6640980839729309, "step": 771 }, { "epoch": 1.0012976480129765, "grad_norm": 0.7553068399429321, "learning_rate": 1.5925182128849116e-05, "loss": 0.6133830547332764, "step": 772 }, { "epoch": 1.002595296025953, "grad_norm": 0.6613984704017639, "learning_rate": 1.591365703941732e-05, "loss": 0.5815013647079468, "step": 773 }, { "epoch": 1.0038929440389295, "grad_norm": 0.592282235622406, "learning_rate": 1.5902119858053652e-05, "loss": 0.5898460149765015, "step": 774 }, { "epoch": 1.005190592051906, "grad_norm": 0.5373958945274353, "learning_rate": 1.589057060834872e-05, "loss": 0.6019303798675537, "step": 775 }, { "epoch": 1.0064882400648825, "grad_norm": 0.6260755062103271, "learning_rate": 1.5879009313917826e-05, "loss": 0.5970971584320068, "step": 776 }, { "epoch": 1.007785888077859, "grad_norm": 0.7529841661453247, "learning_rate": 1.5867435998400885e-05, "loss": 0.6816403865814209, "step": 777 }, { "epoch": 1.0090835360908355, "grad_norm": 0.7224608659744263, "learning_rate": 1.5855850685462404e-05, "loss": 0.6263958215713501, "step": 778 }, { "epoch": 1.010381184103812, "grad_norm": 0.6676880121231079, "learning_rate": 1.584425339879141e-05, "loss": 0.6304363012313843, "step": 779 }, { "epoch": 1.0116788321167882, "grad_norm": 0.5799426436424255, "learning_rate": 1.5832644162101417e-05, "loss": 0.59343421459198, "step": 780 }, { "epoch": 1.0129764801297647, "grad_norm": 0.570095956325531, "learning_rate": 1.5821022999130385e-05, "loss": 0.5410763025283813, "step": 781 }, { "epoch": 1.0142741281427412, "grad_norm": 0.5948435068130493, "learning_rate": 1.580938993364064e-05, "loss": 0.5649259686470032, "step": 782 }, { "epoch": 1.0155717761557177, "grad_norm": 0.6467446684837341, "learning_rate": 1.579774498941886e-05, "loss": 0.5860875844955444, "step": 783 }, { "epoch": 1.0168694241686942, "grad_norm": 0.5886529088020325, "learning_rate": 1.578608819027602e-05, "loss": 0.5772626996040344, "step": 784 }, { "epoch": 1.0181670721816707, "grad_norm": 0.5842233896255493, "learning_rate": 1.5774419560047303e-05, "loss": 0.6277778148651123, "step": 785 }, { "epoch": 1.0194647201946472, "grad_norm": 0.590059220790863, "learning_rate": 1.5762739122592123e-05, "loss": 0.6396061182022095, "step": 786 }, { "epoch": 1.0207623682076237, "grad_norm": 0.5897361636161804, "learning_rate": 1.5751046901794008e-05, "loss": 0.5980340242385864, "step": 787 }, { "epoch": 1.0220600162206002, "grad_norm": 0.5984208583831787, "learning_rate": 1.5739342921560593e-05, "loss": 0.602581262588501, "step": 788 }, { "epoch": 1.0233576642335767, "grad_norm": 0.5837097764015198, "learning_rate": 1.5727627205823554e-05, "loss": 0.5742583274841309, "step": 789 }, { "epoch": 1.0246553122465532, "grad_norm": 0.5804028511047363, "learning_rate": 1.571589977853857e-05, "loss": 0.6103036999702454, "step": 790 }, { "epoch": 1.0259529602595296, "grad_norm": 0.5784346461296082, "learning_rate": 1.5704160663685254e-05, "loss": 0.5436456203460693, "step": 791 }, { "epoch": 1.0272506082725061, "grad_norm": 0.576518714427948, "learning_rate": 1.5692409885267127e-05, "loss": 0.6918940544128418, "step": 792 }, { "epoch": 1.0285482562854826, "grad_norm": 0.5824302434921265, "learning_rate": 1.568064746731156e-05, "loss": 0.6090575456619263, "step": 793 }, { "epoch": 1.0298459042984591, "grad_norm": 0.5279770493507385, "learning_rate": 1.5668873433869718e-05, "loss": 0.5268336534500122, "step": 794 }, { "epoch": 1.0311435523114356, "grad_norm": 0.5494199395179749, "learning_rate": 1.5657087809016517e-05, "loss": 0.5766473412513733, "step": 795 }, { "epoch": 1.0324412003244121, "grad_norm": 0.5452569723129272, "learning_rate": 1.564529061685058e-05, "loss": 0.5949534177780151, "step": 796 }, { "epoch": 1.0337388483373884, "grad_norm": 0.5392066240310669, "learning_rate": 1.5633481881494178e-05, "loss": 0.5571380853652954, "step": 797 }, { "epoch": 1.0350364963503649, "grad_norm": 0.5568217635154724, "learning_rate": 1.562166162709319e-05, "loss": 0.5642133951187134, "step": 798 }, { "epoch": 1.0363341443633414, "grad_norm": 0.5702704191207886, "learning_rate": 1.560982987781704e-05, "loss": 0.6047669649124146, "step": 799 }, { "epoch": 1.0376317923763179, "grad_norm": 0.532315731048584, "learning_rate": 1.5597986657858656e-05, "loss": 0.5958635807037354, "step": 800 }, { "epoch": 1.0389294403892944, "grad_norm": 0.5331001877784729, "learning_rate": 1.5586131991434434e-05, "loss": 0.5987897515296936, "step": 801 }, { "epoch": 1.0402270884022708, "grad_norm": 0.5481564402580261, "learning_rate": 1.5574265902784163e-05, "loss": 0.5622409582138062, "step": 802 }, { "epoch": 1.0415247364152473, "grad_norm": 0.5720167756080627, "learning_rate": 1.556238841617099e-05, "loss": 0.6064007878303528, "step": 803 }, { "epoch": 1.0428223844282238, "grad_norm": 0.5809172987937927, "learning_rate": 1.555049955588137e-05, "loss": 0.6170299053192139, "step": 804 }, { "epoch": 1.0441200324412003, "grad_norm": 0.5783301591873169, "learning_rate": 1.5538599346225013e-05, "loss": 0.568396270275116, "step": 805 }, { "epoch": 1.0454176804541768, "grad_norm": 0.5668922662734985, "learning_rate": 1.552668781153484e-05, "loss": 0.576393723487854, "step": 806 }, { "epoch": 1.0467153284671533, "grad_norm": 0.5634539723396301, "learning_rate": 1.5514764976166916e-05, "loss": 0.6574882864952087, "step": 807 }, { "epoch": 1.0480129764801298, "grad_norm": 0.5463752150535583, "learning_rate": 1.5502830864500426e-05, "loss": 0.5930934548377991, "step": 808 }, { "epoch": 1.0493106244931063, "grad_norm": 0.5872495174407959, "learning_rate": 1.5490885500937606e-05, "loss": 0.609790563583374, "step": 809 }, { "epoch": 1.0506082725060828, "grad_norm": 0.5574213266372681, "learning_rate": 1.5478928909903705e-05, "loss": 0.60848468542099, "step": 810 }, { "epoch": 1.0519059205190593, "grad_norm": 0.5493984818458557, "learning_rate": 1.5466961115846927e-05, "loss": 0.5494011640548706, "step": 811 }, { "epoch": 1.0532035685320358, "grad_norm": 0.5724595785140991, "learning_rate": 1.545498214323837e-05, "loss": 0.5948253273963928, "step": 812 }, { "epoch": 1.0545012165450123, "grad_norm": 0.5360091924667358, "learning_rate": 1.544299201657202e-05, "loss": 0.6195284128189087, "step": 813 }, { "epoch": 1.0557988645579885, "grad_norm": 0.5609839558601379, "learning_rate": 1.543099076036463e-05, "loss": 0.5945447087287903, "step": 814 }, { "epoch": 1.057096512570965, "grad_norm": 0.5413586497306824, "learning_rate": 1.5418978399155748e-05, "loss": 0.55891352891922, "step": 815 }, { "epoch": 1.0583941605839415, "grad_norm": 0.5763382315635681, "learning_rate": 1.54069549575076e-05, "loss": 0.5900748372077942, "step": 816 }, { "epoch": 1.059691808596918, "grad_norm": 0.5625810623168945, "learning_rate": 1.539492046000509e-05, "loss": 0.5834665298461914, "step": 817 }, { "epoch": 1.0609894566098945, "grad_norm": 0.5442895889282227, "learning_rate": 1.5382874931255717e-05, "loss": 0.6234191656112671, "step": 818 }, { "epoch": 1.062287104622871, "grad_norm": 0.5448631048202515, "learning_rate": 1.5370818395889536e-05, "loss": 0.5617302060127258, "step": 819 }, { "epoch": 1.0635847526358475, "grad_norm": 0.5880674719810486, "learning_rate": 1.5358750878559113e-05, "loss": 0.6024942994117737, "step": 820 }, { "epoch": 1.064882400648824, "grad_norm": 0.5762202143669128, "learning_rate": 1.5346672403939465e-05, "loss": 0.625447154045105, "step": 821 }, { "epoch": 1.0661800486618005, "grad_norm": 0.5726525187492371, "learning_rate": 1.5334582996728017e-05, "loss": 0.6527541875839233, "step": 822 }, { "epoch": 1.067477696674777, "grad_norm": 0.5863476991653442, "learning_rate": 1.532248268164455e-05, "loss": 0.6537057161331177, "step": 823 }, { "epoch": 1.0687753446877535, "grad_norm": 0.5855088829994202, "learning_rate": 1.5310371483431138e-05, "loss": 0.5910706520080566, "step": 824 }, { "epoch": 1.07007299270073, "grad_norm": 0.5428813695907593, "learning_rate": 1.529824942685212e-05, "loss": 0.6206585168838501, "step": 825 }, { "epoch": 1.0713706407137065, "grad_norm": 0.5427327156066895, "learning_rate": 1.528611653669403e-05, "loss": 0.6064955592155457, "step": 826 }, { "epoch": 1.072668288726683, "grad_norm": 0.5533806085586548, "learning_rate": 1.5273972837765566e-05, "loss": 0.6161221861839294, "step": 827 }, { "epoch": 1.0739659367396595, "grad_norm": 0.5330477356910706, "learning_rate": 1.526181835489751e-05, "loss": 0.584095299243927, "step": 828 }, { "epoch": 1.075263584752636, "grad_norm": 0.5572231411933899, "learning_rate": 1.5249653112942708e-05, "loss": 0.6146395206451416, "step": 829 }, { "epoch": 1.0765612327656124, "grad_norm": 0.5302649140357971, "learning_rate": 1.5237477136776e-05, "loss": 0.5835666060447693, "step": 830 }, { "epoch": 1.0778588807785887, "grad_norm": 0.524252712726593, "learning_rate": 1.5225290451294173e-05, "loss": 0.5483739376068115, "step": 831 }, { "epoch": 1.0791565287915652, "grad_norm": 0.5535216331481934, "learning_rate": 1.521309308141592e-05, "loss": 0.5715370774269104, "step": 832 }, { "epoch": 1.0804541768045417, "grad_norm": 0.5739737749099731, "learning_rate": 1.5200885052081767e-05, "loss": 0.6168693900108337, "step": 833 }, { "epoch": 1.0817518248175182, "grad_norm": 0.5620468258857727, "learning_rate": 1.518866638825405e-05, "loss": 0.6358708143234253, "step": 834 }, { "epoch": 1.0830494728304947, "grad_norm": 0.5504558086395264, "learning_rate": 1.517643711491684e-05, "loss": 0.5625787973403931, "step": 835 }, { "epoch": 1.0843471208434712, "grad_norm": 0.527152955532074, "learning_rate": 1.516419725707591e-05, "loss": 0.5917230248451233, "step": 836 }, { "epoch": 1.0856447688564477, "grad_norm": 0.5097678899765015, "learning_rate": 1.5151946839758673e-05, "loss": 0.5631688237190247, "step": 837 }, { "epoch": 1.0869424168694242, "grad_norm": 0.5500524044036865, "learning_rate": 1.5139685888014123e-05, "loss": 0.6300808787345886, "step": 838 }, { "epoch": 1.0882400648824007, "grad_norm": 0.580634355545044, "learning_rate": 1.512741442691281e-05, "loss": 0.6707481145858765, "step": 839 }, { "epoch": 1.0895377128953772, "grad_norm": 0.5668573379516602, "learning_rate": 1.5115132481546763e-05, "loss": 0.5974687337875366, "step": 840 }, { "epoch": 1.0908353609083536, "grad_norm": 0.5720273852348328, "learning_rate": 1.5102840077029452e-05, "loss": 0.5461701154708862, "step": 841 }, { "epoch": 1.0921330089213301, "grad_norm": 0.5787645578384399, "learning_rate": 1.509053723849574e-05, "loss": 0.6476290225982666, "step": 842 }, { "epoch": 1.0934306569343066, "grad_norm": 0.5475322604179382, "learning_rate": 1.5078223991101805e-05, "loss": 0.5730643272399902, "step": 843 }, { "epoch": 1.0947283049472831, "grad_norm": 0.5544430017471313, "learning_rate": 1.5065900360025128e-05, "loss": 0.6112351417541504, "step": 844 }, { "epoch": 1.0960259529602596, "grad_norm": 0.6194364428520203, "learning_rate": 1.5053566370464416e-05, "loss": 0.612515926361084, "step": 845 }, { "epoch": 1.0973236009732361, "grad_norm": 0.5542813539505005, "learning_rate": 1.5041222047639558e-05, "loss": 0.60612952709198, "step": 846 }, { "epoch": 1.0986212489862126, "grad_norm": 0.5259748697280884, "learning_rate": 1.5028867416791566e-05, "loss": 0.5666128396987915, "step": 847 }, { "epoch": 1.0999188969991889, "grad_norm": 0.5615611672401428, "learning_rate": 1.5016502503182533e-05, "loss": 0.5991164445877075, "step": 848 }, { "epoch": 1.1012165450121654, "grad_norm": 0.5396665334701538, "learning_rate": 1.5004127332095579e-05, "loss": 0.608413815498352, "step": 849 }, { "epoch": 1.1025141930251419, "grad_norm": 0.5625605583190918, "learning_rate": 1.49917419288348e-05, "loss": 0.6390218138694763, "step": 850 }, { "epoch": 1.1038118410381184, "grad_norm": 0.5652357935905457, "learning_rate": 1.4979346318725203e-05, "loss": 0.613496720790863, "step": 851 }, { "epoch": 1.1051094890510949, "grad_norm": 0.5494624376296997, "learning_rate": 1.4966940527112679e-05, "loss": 0.6234304308891296, "step": 852 }, { "epoch": 1.1064071370640713, "grad_norm": 0.546302855014801, "learning_rate": 1.4954524579363932e-05, "loss": 0.6565023064613342, "step": 853 }, { "epoch": 1.1077047850770478, "grad_norm": 0.5649261474609375, "learning_rate": 1.4942098500866428e-05, "loss": 0.6422203183174133, "step": 854 }, { "epoch": 1.1090024330900243, "grad_norm": 0.5499486923217773, "learning_rate": 1.4929662317028359e-05, "loss": 0.6043179035186768, "step": 855 }, { "epoch": 1.1103000811030008, "grad_norm": 0.5544485449790955, "learning_rate": 1.491721605327857e-05, "loss": 0.5800666213035583, "step": 856 }, { "epoch": 1.1115977291159773, "grad_norm": 0.5804775953292847, "learning_rate": 1.490475973506652e-05, "loss": 0.6427537798881531, "step": 857 }, { "epoch": 1.1128953771289538, "grad_norm": 0.5342238545417786, "learning_rate": 1.4892293387862221e-05, "loss": 0.6311315298080444, "step": 858 }, { "epoch": 1.1141930251419303, "grad_norm": 0.5803128480911255, "learning_rate": 1.487981703715621e-05, "loss": 0.6198186874389648, "step": 859 }, { "epoch": 1.1154906731549068, "grad_norm": 0.5532170534133911, "learning_rate": 1.4867330708459463e-05, "loss": 0.6145609617233276, "step": 860 }, { "epoch": 1.1167883211678833, "grad_norm": 0.5493961572647095, "learning_rate": 1.4854834427303353e-05, "loss": 0.6166091561317444, "step": 861 }, { "epoch": 1.1180859691808598, "grad_norm": 0.5559639930725098, "learning_rate": 1.4842328219239618e-05, "loss": 0.6064823865890503, "step": 862 }, { "epoch": 1.119383617193836, "grad_norm": 0.5540943145751953, "learning_rate": 1.4829812109840291e-05, "loss": 0.5765544176101685, "step": 863 }, { "epoch": 1.1206812652068125, "grad_norm": 0.5384024381637573, "learning_rate": 1.4817286124697647e-05, "loss": 0.565604567527771, "step": 864 }, { "epoch": 1.121978913219789, "grad_norm": 0.5547834634780884, "learning_rate": 1.480475028942415e-05, "loss": 0.6463969349861145, "step": 865 }, { "epoch": 1.1232765612327655, "grad_norm": 0.5574260354042053, "learning_rate": 1.4792204629652414e-05, "loss": 0.5858181118965149, "step": 866 }, { "epoch": 1.124574209245742, "grad_norm": 0.5450447201728821, "learning_rate": 1.4779649171035138e-05, "loss": 0.6112916469573975, "step": 867 }, { "epoch": 1.1258718572587185, "grad_norm": 0.5452038645744324, "learning_rate": 1.4767083939245055e-05, "loss": 0.6333041787147522, "step": 868 }, { "epoch": 1.127169505271695, "grad_norm": 0.5453193187713623, "learning_rate": 1.475450895997489e-05, "loss": 0.6154720783233643, "step": 869 }, { "epoch": 1.1284671532846715, "grad_norm": 0.5503911375999451, "learning_rate": 1.4741924258937283e-05, "loss": 0.580187201499939, "step": 870 }, { "epoch": 1.129764801297648, "grad_norm": 0.564156174659729, "learning_rate": 1.472932986186477e-05, "loss": 0.6397178173065186, "step": 871 }, { "epoch": 1.1310624493106245, "grad_norm": 0.5705751180648804, "learning_rate": 1.47167257945097e-05, "loss": 0.6369278430938721, "step": 872 }, { "epoch": 1.132360097323601, "grad_norm": 0.562324583530426, "learning_rate": 1.4704112082644207e-05, "loss": 0.5986394882202148, "step": 873 }, { "epoch": 1.1336577453365775, "grad_norm": 0.5652042031288147, "learning_rate": 1.4691488752060132e-05, "loss": 0.6185961365699768, "step": 874 }, { "epoch": 1.134955393349554, "grad_norm": 0.5481469035148621, "learning_rate": 1.4678855828568996e-05, "loss": 0.5570172071456909, "step": 875 }, { "epoch": 1.1362530413625305, "grad_norm": 0.5480834245681763, "learning_rate": 1.4666213338001929e-05, "loss": 0.5788794755935669, "step": 876 }, { "epoch": 1.137550689375507, "grad_norm": 0.5426838994026184, "learning_rate": 1.4653561306209625e-05, "loss": 0.5975257158279419, "step": 877 }, { "epoch": 1.1388483373884835, "grad_norm": 0.5632731914520264, "learning_rate": 1.4640899759062285e-05, "loss": 0.6319808959960938, "step": 878 }, { "epoch": 1.14014598540146, "grad_norm": 0.5687447786331177, "learning_rate": 1.462822872244957e-05, "loss": 0.6043187379837036, "step": 879 }, { "epoch": 1.1414436334144362, "grad_norm": 0.5472837686538696, "learning_rate": 1.461554822228054e-05, "loss": 0.607802152633667, "step": 880 }, { "epoch": 1.142741281427413, "grad_norm": 0.5329515933990479, "learning_rate": 1.460285828448361e-05, "loss": 0.5557148456573486, "step": 881 }, { "epoch": 1.1440389294403892, "grad_norm": 0.5272259712219238, "learning_rate": 1.4590158935006494e-05, "loss": 0.5320879817008972, "step": 882 }, { "epoch": 1.1453365774533657, "grad_norm": 0.5834517478942871, "learning_rate": 1.4577450199816142e-05, "loss": 0.6263319253921509, "step": 883 }, { "epoch": 1.1466342254663422, "grad_norm": 0.5725152492523193, "learning_rate": 1.4564732104898702e-05, "loss": 0.659183919429779, "step": 884 }, { "epoch": 1.1479318734793187, "grad_norm": 0.5416671633720398, "learning_rate": 1.4552004676259462e-05, "loss": 0.5948503613471985, "step": 885 }, { "epoch": 1.1492295214922952, "grad_norm": 0.5543138384819031, "learning_rate": 1.453926793992279e-05, "loss": 0.6404953002929688, "step": 886 }, { "epoch": 1.1505271695052717, "grad_norm": 0.5595470070838928, "learning_rate": 1.4526521921932091e-05, "loss": 0.6393734812736511, "step": 887 }, { "epoch": 1.1518248175182482, "grad_norm": 0.5882608294487, "learning_rate": 1.4513766648349742e-05, "loss": 0.5654003024101257, "step": 888 }, { "epoch": 1.1531224655312247, "grad_norm": 0.5529691576957703, "learning_rate": 1.4501002145257048e-05, "loss": 0.6137228012084961, "step": 889 }, { "epoch": 1.1544201135442012, "grad_norm": 0.5548762083053589, "learning_rate": 1.4488228438754191e-05, "loss": 0.603983998298645, "step": 890 }, { "epoch": 1.1557177615571776, "grad_norm": 0.5486696362495422, "learning_rate": 1.4475445554960166e-05, "loss": 0.6514973640441895, "step": 891 }, { "epoch": 1.1570154095701541, "grad_norm": 0.5455385446548462, "learning_rate": 1.4462653520012736e-05, "loss": 0.6550310850143433, "step": 892 }, { "epoch": 1.1583130575831306, "grad_norm": 0.5628224015235901, "learning_rate": 1.4449852360068372e-05, "loss": 0.6537249088287354, "step": 893 }, { "epoch": 1.1596107055961071, "grad_norm": 0.5596909523010254, "learning_rate": 1.4437042101302212e-05, "loss": 0.6253930926322937, "step": 894 }, { "epoch": 1.1609083536090836, "grad_norm": 0.5298051238059998, "learning_rate": 1.4424222769907985e-05, "loss": 0.57865309715271, "step": 895 }, { "epoch": 1.1622060016220601, "grad_norm": 0.5473706722259521, "learning_rate": 1.4411394392097985e-05, "loss": 0.5876542329788208, "step": 896 }, { "epoch": 1.1635036496350364, "grad_norm": 0.5646262168884277, "learning_rate": 1.4398556994102996e-05, "loss": 0.6242583990097046, "step": 897 }, { "epoch": 1.164801297648013, "grad_norm": 0.5632451176643372, "learning_rate": 1.4385710602172245e-05, "loss": 0.6315684914588928, "step": 898 }, { "epoch": 1.1660989456609894, "grad_norm": 0.5819709300994873, "learning_rate": 1.4372855242573356e-05, "loss": 0.5947535037994385, "step": 899 }, { "epoch": 1.1673965936739659, "grad_norm": 0.5634546875953674, "learning_rate": 1.4359990941592283e-05, "loss": 0.6281697750091553, "step": 900 }, { "epoch": 1.1686942416869424, "grad_norm": 0.5534945130348206, "learning_rate": 1.4347117725533269e-05, "loss": 0.567562460899353, "step": 901 }, { "epoch": 1.1699918896999189, "grad_norm": 0.5352903604507446, "learning_rate": 1.4334235620718774e-05, "loss": 0.5504214763641357, "step": 902 }, { "epoch": 1.1712895377128953, "grad_norm": 0.5894420146942139, "learning_rate": 1.4321344653489453e-05, "loss": 0.5871877074241638, "step": 903 }, { "epoch": 1.1725871857258718, "grad_norm": 0.5826941728591919, "learning_rate": 1.4308444850204066e-05, "loss": 0.5854516625404358, "step": 904 }, { "epoch": 1.1738848337388483, "grad_norm": 0.5583464503288269, "learning_rate": 1.4295536237239445e-05, "loss": 0.6143467426300049, "step": 905 }, { "epoch": 1.1751824817518248, "grad_norm": 0.5566253662109375, "learning_rate": 1.4282618840990438e-05, "loss": 0.6143018007278442, "step": 906 }, { "epoch": 1.1764801297648013, "grad_norm": 0.5643221735954285, "learning_rate": 1.4269692687869849e-05, "loss": 0.6445101499557495, "step": 907 }, { "epoch": 1.1777777777777778, "grad_norm": 0.583202600479126, "learning_rate": 1.425675780430839e-05, "loss": 0.6551916599273682, "step": 908 }, { "epoch": 1.1790754257907543, "grad_norm": 0.5802360773086548, "learning_rate": 1.4243814216754626e-05, "loss": 0.6176046133041382, "step": 909 }, { "epoch": 1.1803730738037308, "grad_norm": 0.5651218295097351, "learning_rate": 1.4230861951674914e-05, "loss": 0.6476747393608093, "step": 910 }, { "epoch": 1.1816707218167073, "grad_norm": 0.5351070761680603, "learning_rate": 1.421790103555336e-05, "loss": 0.5974748134613037, "step": 911 }, { "epoch": 1.1829683698296838, "grad_norm": 0.5506876111030579, "learning_rate": 1.4204931494891759e-05, "loss": 0.5977579355239868, "step": 912 }, { "epoch": 1.1842660178426603, "grad_norm": 0.5496414303779602, "learning_rate": 1.4191953356209535e-05, "loss": 0.5993613004684448, "step": 913 }, { "epoch": 1.1855636658556366, "grad_norm": 0.5448877215385437, "learning_rate": 1.4178966646043702e-05, "loss": 0.5849076509475708, "step": 914 }, { "epoch": 1.186861313868613, "grad_norm": 0.5505439043045044, "learning_rate": 1.4165971390948787e-05, "loss": 0.6557425856590271, "step": 915 }, { "epoch": 1.1881589618815895, "grad_norm": 0.5327088236808777, "learning_rate": 1.4152967617496805e-05, "loss": 0.5915898084640503, "step": 916 }, { "epoch": 1.189456609894566, "grad_norm": 0.5534889698028564, "learning_rate": 1.4139955352277176e-05, "loss": 0.574662983417511, "step": 917 }, { "epoch": 1.1907542579075425, "grad_norm": 0.5179355144500732, "learning_rate": 1.4126934621896692e-05, "loss": 0.5562629699707031, "step": 918 }, { "epoch": 1.192051905920519, "grad_norm": 0.5698444247245789, "learning_rate": 1.4113905452979455e-05, "loss": 0.6139298677444458, "step": 919 }, { "epoch": 1.1933495539334955, "grad_norm": 0.5280522108078003, "learning_rate": 1.410086787216681e-05, "loss": 0.5793087482452393, "step": 920 }, { "epoch": 1.1933495539334955, "eval_loss": 0.6891781091690063, "eval_runtime": 72.4953, "eval_samples_per_second": 71.618, "eval_steps_per_second": 8.952, "step": 920 }, { "epoch": 1.194647201946472, "grad_norm": 0.518786609172821, "learning_rate": 1.4087821906117314e-05, "loss": 0.5602763891220093, "step": 921 }, { "epoch": 1.1959448499594485, "grad_norm": 0.5518815517425537, "learning_rate": 1.4074767581506666e-05, "loss": 0.6225783824920654, "step": 922 }, { "epoch": 1.197242497972425, "grad_norm": 0.5233501195907593, "learning_rate": 1.4061704925027653e-05, "loss": 0.5846587419509888, "step": 923 }, { "epoch": 1.1985401459854015, "grad_norm": 0.5470210313796997, "learning_rate": 1.4048633963390105e-05, "loss": 0.5750600099563599, "step": 924 }, { "epoch": 1.199837793998378, "grad_norm": 0.5647477507591248, "learning_rate": 1.4035554723320828e-05, "loss": 0.5977157354354858, "step": 925 }, { "epoch": 1.2011354420113545, "grad_norm": 0.5179945230484009, "learning_rate": 1.4022467231563554e-05, "loss": 0.5806452035903931, "step": 926 }, { "epoch": 1.202433090024331, "grad_norm": 0.5535194873809814, "learning_rate": 1.4009371514878898e-05, "loss": 0.6628227233886719, "step": 927 }, { "epoch": 1.2037307380373075, "grad_norm": 0.6273780465126038, "learning_rate": 1.399626760004428e-05, "loss": 0.6142767667770386, "step": 928 }, { "epoch": 1.205028386050284, "grad_norm": 0.5373409390449524, "learning_rate": 1.3983155513853897e-05, "loss": 0.6562739610671997, "step": 929 }, { "epoch": 1.2063260340632604, "grad_norm": 0.5411200523376465, "learning_rate": 1.3970035283118639e-05, "loss": 0.5903608202934265, "step": 930 }, { "epoch": 1.2076236820762367, "grad_norm": 0.5595235824584961, "learning_rate": 1.3956906934666056e-05, "loss": 0.6051539182662964, "step": 931 }, { "epoch": 1.2089213300892132, "grad_norm": 0.5300971865653992, "learning_rate": 1.3943770495340307e-05, "loss": 0.643832802772522, "step": 932 }, { "epoch": 1.2102189781021897, "grad_norm": 0.5413315892219543, "learning_rate": 1.3930625992002076e-05, "loss": 0.5942864418029785, "step": 933 }, { "epoch": 1.2115166261151662, "grad_norm": 0.558797299861908, "learning_rate": 1.391747345152855e-05, "loss": 0.619717001914978, "step": 934 }, { "epoch": 1.2128142741281427, "grad_norm": 0.5264928936958313, "learning_rate": 1.3904312900813345e-05, "loss": 0.5522656440734863, "step": 935 }, { "epoch": 1.2141119221411192, "grad_norm": 0.5257030725479126, "learning_rate": 1.3891144366766457e-05, "loss": 0.5786164999008179, "step": 936 }, { "epoch": 1.2154095701540957, "grad_norm": 0.577509343624115, "learning_rate": 1.3877967876314205e-05, "loss": 0.6315740346908569, "step": 937 }, { "epoch": 1.2167072181670722, "grad_norm": 0.5317774415016174, "learning_rate": 1.3864783456399174e-05, "loss": 0.5896605253219604, "step": 938 }, { "epoch": 1.2180048661800487, "grad_norm": 0.5598568320274353, "learning_rate": 1.3851591133980167e-05, "loss": 0.6161408424377441, "step": 939 }, { "epoch": 1.2193025141930252, "grad_norm": 0.5387381911277771, "learning_rate": 1.3838390936032146e-05, "loss": 0.5705558061599731, "step": 940 }, { "epoch": 1.2206001622060016, "grad_norm": 0.5279619693756104, "learning_rate": 1.3825182889546173e-05, "loss": 0.5650646686553955, "step": 941 }, { "epoch": 1.2218978102189781, "grad_norm": 0.5602632164955139, "learning_rate": 1.3811967021529362e-05, "loss": 0.6143766045570374, "step": 942 }, { "epoch": 1.2231954582319546, "grad_norm": 0.5425279140472412, "learning_rate": 1.3798743359004816e-05, "loss": 0.602745771408081, "step": 943 }, { "epoch": 1.2244931062449311, "grad_norm": 0.5385331511497498, "learning_rate": 1.378551192901158e-05, "loss": 0.5555763244628906, "step": 944 }, { "epoch": 1.2257907542579076, "grad_norm": 0.5338374972343445, "learning_rate": 1.3772272758604576e-05, "loss": 0.5934339165687561, "step": 945 }, { "epoch": 1.2270884022708841, "grad_norm": 0.5479584336280823, "learning_rate": 1.375902587485456e-05, "loss": 0.5891726016998291, "step": 946 }, { "epoch": 1.2283860502838606, "grad_norm": 0.5357087254524231, "learning_rate": 1.3745771304848056e-05, "loss": 0.5626200437545776, "step": 947 }, { "epoch": 1.2296836982968369, "grad_norm": 0.5543829202651978, "learning_rate": 1.3732509075687302e-05, "loss": 0.5829602479934692, "step": 948 }, { "epoch": 1.2309813463098134, "grad_norm": 0.5650047659873962, "learning_rate": 1.3719239214490203e-05, "loss": 0.6154081225395203, "step": 949 }, { "epoch": 1.2322789943227899, "grad_norm": 0.5745924711227417, "learning_rate": 1.3705961748390264e-05, "loss": 0.5824979543685913, "step": 950 }, { "epoch": 1.2335766423357664, "grad_norm": 0.5524203777313232, "learning_rate": 1.3692676704536547e-05, "loss": 0.6566962599754333, "step": 951 }, { "epoch": 1.2348742903487429, "grad_norm": 0.5592309832572937, "learning_rate": 1.3679384110093601e-05, "loss": 0.5955104231834412, "step": 952 }, { "epoch": 1.2361719383617193, "grad_norm": 0.5388526916503906, "learning_rate": 1.3666083992241414e-05, "loss": 0.6259311437606812, "step": 953 }, { "epoch": 1.2374695863746958, "grad_norm": 0.5431481599807739, "learning_rate": 1.3652776378175366e-05, "loss": 0.6409016847610474, "step": 954 }, { "epoch": 1.2387672343876723, "grad_norm": 0.5381134748458862, "learning_rate": 1.3639461295106157e-05, "loss": 0.5895624160766602, "step": 955 }, { "epoch": 1.2400648824006488, "grad_norm": 0.5462051630020142, "learning_rate": 1.3626138770259765e-05, "loss": 0.5515483617782593, "step": 956 }, { "epoch": 1.2413625304136253, "grad_norm": 0.5416935682296753, "learning_rate": 1.3612808830877377e-05, "loss": 0.5839380621910095, "step": 957 }, { "epoch": 1.2426601784266018, "grad_norm": 0.543431282043457, "learning_rate": 1.3599471504215347e-05, "loss": 0.6129022836685181, "step": 958 }, { "epoch": 1.2439578264395783, "grad_norm": 0.5546287894248962, "learning_rate": 1.358612681754513e-05, "loss": 0.5957478284835815, "step": 959 }, { "epoch": 1.2452554744525548, "grad_norm": 0.5636503100395203, "learning_rate": 1.357277479815324e-05, "loss": 0.6206330060958862, "step": 960 }, { "epoch": 1.2465531224655313, "grad_norm": 0.5537446141242981, "learning_rate": 1.355941547334117e-05, "loss": 0.5747988224029541, "step": 961 }, { "epoch": 1.2478507704785078, "grad_norm": 0.5459409952163696, "learning_rate": 1.3546048870425356e-05, "loss": 0.5868381261825562, "step": 962 }, { "epoch": 1.2491484184914843, "grad_norm": 0.5428374409675598, "learning_rate": 1.3532675016737127e-05, "loss": 0.6297606825828552, "step": 963 }, { "epoch": 1.2504460665044608, "grad_norm": 0.5484406352043152, "learning_rate": 1.3519293939622622e-05, "loss": 0.6754599213600159, "step": 964 }, { "epoch": 1.251743714517437, "grad_norm": 0.5630886554718018, "learning_rate": 1.3505905666442757e-05, "loss": 0.655160129070282, "step": 965 }, { "epoch": 1.2530413625304138, "grad_norm": 0.5442233085632324, "learning_rate": 1.3492510224573165e-05, "loss": 0.5808818936347961, "step": 966 }, { "epoch": 1.25433901054339, "grad_norm": 0.5171942114830017, "learning_rate": 1.3479107641404134e-05, "loss": 0.5760788321495056, "step": 967 }, { "epoch": 1.2556366585563665, "grad_norm": 0.5334968566894531, "learning_rate": 1.3465697944340552e-05, "loss": 0.5447085499763489, "step": 968 }, { "epoch": 1.256934306569343, "grad_norm": 0.5165731310844421, "learning_rate": 1.3452281160801856e-05, "loss": 0.600307822227478, "step": 969 }, { "epoch": 1.2582319545823195, "grad_norm": 0.5485058426856995, "learning_rate": 1.3438857318221974e-05, "loss": 0.6196280717849731, "step": 970 }, { "epoch": 1.259529602595296, "grad_norm": 0.5499110817909241, "learning_rate": 1.3425426444049265e-05, "loss": 0.6000030040740967, "step": 971 }, { "epoch": 1.2608272506082725, "grad_norm": 0.5815853476524353, "learning_rate": 1.3411988565746467e-05, "loss": 0.6568498611450195, "step": 972 }, { "epoch": 1.262124898621249, "grad_norm": 0.5364983081817627, "learning_rate": 1.3398543710790642e-05, "loss": 0.6078934073448181, "step": 973 }, { "epoch": 1.2634225466342255, "grad_norm": 0.5517644286155701, "learning_rate": 1.3385091906673115e-05, "loss": 0.6221879720687866, "step": 974 }, { "epoch": 1.264720194647202, "grad_norm": 0.5543562769889832, "learning_rate": 1.3371633180899417e-05, "loss": 0.6666390895843506, "step": 975 }, { "epoch": 1.2660178426601785, "grad_norm": 0.5409432053565979, "learning_rate": 1.335816756098924e-05, "loss": 0.6188746690750122, "step": 976 }, { "epoch": 1.267315490673155, "grad_norm": 0.590812087059021, "learning_rate": 1.3344695074476365e-05, "loss": 0.6498491764068604, "step": 977 }, { "epoch": 1.2686131386861315, "grad_norm": 0.5648714900016785, "learning_rate": 1.3331215748908622e-05, "loss": 0.6376237869262695, "step": 978 }, { "epoch": 1.269910786699108, "grad_norm": 0.5377125144004822, "learning_rate": 1.3317729611847818e-05, "loss": 0.6080333590507507, "step": 979 }, { "epoch": 1.2712084347120842, "grad_norm": 0.6160985231399536, "learning_rate": 1.3304236690869688e-05, "loss": 0.6452457904815674, "step": 980 }, { "epoch": 1.272506082725061, "grad_norm": 0.5675063133239746, "learning_rate": 1.329073701356384e-05, "loss": 0.6066033840179443, "step": 981 }, { "epoch": 1.2738037307380372, "grad_norm": 0.5339285731315613, "learning_rate": 1.3277230607533698e-05, "loss": 0.563126266002655, "step": 982 }, { "epoch": 1.275101378751014, "grad_norm": 0.558273434638977, "learning_rate": 1.3263717500396446e-05, "loss": 0.6070864796638489, "step": 983 }, { "epoch": 1.2763990267639902, "grad_norm": 0.5663204789161682, "learning_rate": 1.3250197719782966e-05, "loss": 0.6016590595245361, "step": 984 }, { "epoch": 1.2776966747769667, "grad_norm": 0.5561959743499756, "learning_rate": 1.3236671293337788e-05, "loss": 0.6111094951629639, "step": 985 }, { "epoch": 1.2789943227899432, "grad_norm": 0.5440069437026978, "learning_rate": 1.3223138248719032e-05, "loss": 0.6232655644416809, "step": 986 }, { "epoch": 1.2802919708029197, "grad_norm": 0.5603107810020447, "learning_rate": 1.3209598613598344e-05, "loss": 0.5950015783309937, "step": 987 }, { "epoch": 1.2815896188158962, "grad_norm": 0.538038969039917, "learning_rate": 1.3196052415660856e-05, "loss": 0.6100248098373413, "step": 988 }, { "epoch": 1.2828872668288727, "grad_norm": 0.5667180418968201, "learning_rate": 1.318249968260511e-05, "loss": 0.6681912541389465, "step": 989 }, { "epoch": 1.2841849148418492, "grad_norm": 0.5527055859565735, "learning_rate": 1.316894044214302e-05, "loss": 0.6051948070526123, "step": 990 }, { "epoch": 1.2854825628548256, "grad_norm": 0.5413651466369629, "learning_rate": 1.3155374721999797e-05, "loss": 0.5882329940795898, "step": 991 }, { "epoch": 1.2867802108678021, "grad_norm": 0.5323876738548279, "learning_rate": 1.3141802549913907e-05, "loss": 0.6183469295501709, "step": 992 }, { "epoch": 1.2880778588807786, "grad_norm": 0.5273195505142212, "learning_rate": 1.3128223953637003e-05, "loss": 0.5676054954528809, "step": 993 }, { "epoch": 1.2893755068937551, "grad_norm": 0.567756175994873, "learning_rate": 1.3114638960933883e-05, "loss": 0.6798044443130493, "step": 994 }, { "epoch": 1.2906731549067316, "grad_norm": 0.5517603754997253, "learning_rate": 1.3101047599582415e-05, "loss": 0.6340286731719971, "step": 995 }, { "epoch": 1.2919708029197081, "grad_norm": 0.5477331280708313, "learning_rate": 1.3087449897373494e-05, "loss": 0.6021038889884949, "step": 996 }, { "epoch": 1.2932684509326844, "grad_norm": 0.551368772983551, "learning_rate": 1.307384588211098e-05, "loss": 0.5940453410148621, "step": 997 }, { "epoch": 1.294566098945661, "grad_norm": 0.5456337928771973, "learning_rate": 1.306023558161164e-05, "loss": 0.6023222208023071, "step": 998 }, { "epoch": 1.2958637469586374, "grad_norm": 0.5676029324531555, "learning_rate": 1.3046619023705095e-05, "loss": 0.6922143697738647, "step": 999 }, { "epoch": 1.2971613949716139, "grad_norm": 0.5776983499526978, "learning_rate": 1.3032996236233756e-05, "loss": 0.6589181423187256, "step": 1000 }, { "epoch": 1.2984590429845904, "grad_norm": 0.5594776272773743, "learning_rate": 1.3019367247052781e-05, "loss": 0.6284008622169495, "step": 1001 }, { "epoch": 1.2997566909975669, "grad_norm": 0.5632730722427368, "learning_rate": 1.300573208403e-05, "loss": 0.586546778678894, "step": 1002 }, { "epoch": 1.3010543390105433, "grad_norm": 0.5418180823326111, "learning_rate": 1.2992090775045868e-05, "loss": 0.5931944847106934, "step": 1003 }, { "epoch": 1.3023519870235198, "grad_norm": 0.5260592699050903, "learning_rate": 1.2978443347993415e-05, "loss": 0.5439613461494446, "step": 1004 }, { "epoch": 1.3036496350364963, "grad_norm": 0.546437680721283, "learning_rate": 1.296478983077817e-05, "loss": 0.5946912169456482, "step": 1005 }, { "epoch": 1.3049472830494728, "grad_norm": 0.5575598478317261, "learning_rate": 1.2951130251318125e-05, "loss": 0.6190862655639648, "step": 1006 }, { "epoch": 1.3062449310624493, "grad_norm": 0.5441600680351257, "learning_rate": 1.2937464637543655e-05, "loss": 0.613700270652771, "step": 1007 }, { "epoch": 1.3075425790754258, "grad_norm": 0.5194239020347595, "learning_rate": 1.2923793017397488e-05, "loss": 0.551931619644165, "step": 1008 }, { "epoch": 1.3088402270884023, "grad_norm": 0.521641194820404, "learning_rate": 1.2910115418834624e-05, "loss": 0.544873833656311, "step": 1009 }, { "epoch": 1.3101378751013788, "grad_norm": 0.5697146654129028, "learning_rate": 1.289643186982229e-05, "loss": 0.6762262582778931, "step": 1010 }, { "epoch": 1.3114355231143553, "grad_norm": 0.5358358025550842, "learning_rate": 1.2882742398339884e-05, "loss": 0.5811675190925598, "step": 1011 }, { "epoch": 1.3127331711273318, "grad_norm": 0.5812531113624573, "learning_rate": 1.2869047032378905e-05, "loss": 0.6202974319458008, "step": 1012 }, { "epoch": 1.3140308191403083, "grad_norm": 0.5383328795433044, "learning_rate": 1.2855345799942915e-05, "loss": 0.58216392993927, "step": 1013 }, { "epoch": 1.3153284671532846, "grad_norm": 0.5470954775810242, "learning_rate": 1.2841638729047463e-05, "loss": 0.5842857360839844, "step": 1014 }, { "epoch": 1.3166261151662613, "grad_norm": 0.5181686878204346, "learning_rate": 1.2827925847720041e-05, "loss": 0.5985524654388428, "step": 1015 }, { "epoch": 1.3179237631792375, "grad_norm": 0.5179515480995178, "learning_rate": 1.2814207184000018e-05, "loss": 0.5709914565086365, "step": 1016 }, { "epoch": 1.319221411192214, "grad_norm": 0.5449542999267578, "learning_rate": 1.2800482765938594e-05, "loss": 0.646975576877594, "step": 1017 }, { "epoch": 1.3205190592051905, "grad_norm": 0.5302087664604187, "learning_rate": 1.2786752621598726e-05, "loss": 0.6145081520080566, "step": 1018 }, { "epoch": 1.321816707218167, "grad_norm": 0.5520698428153992, "learning_rate": 1.2773016779055089e-05, "loss": 0.5821577906608582, "step": 1019 }, { "epoch": 1.3231143552311435, "grad_norm": 0.5411002039909363, "learning_rate": 1.2759275266393998e-05, "loss": 0.5899526476860046, "step": 1020 }, { "epoch": 1.32441200324412, "grad_norm": 0.5193924307823181, "learning_rate": 1.2745528111713373e-05, "loss": 0.5851880311965942, "step": 1021 }, { "epoch": 1.3257096512570965, "grad_norm": 0.5581620931625366, "learning_rate": 1.2731775343122663e-05, "loss": 0.6368898153305054, "step": 1022 }, { "epoch": 1.327007299270073, "grad_norm": 0.5761281847953796, "learning_rate": 1.2718016988742799e-05, "loss": 0.6208426356315613, "step": 1023 }, { "epoch": 1.3283049472830495, "grad_norm": 0.5429732799530029, "learning_rate": 1.270425307670614e-05, "loss": 0.5906336307525635, "step": 1024 }, { "epoch": 1.329602595296026, "grad_norm": 0.5482628345489502, "learning_rate": 1.2690483635156392e-05, "loss": 0.6205004453659058, "step": 1025 }, { "epoch": 1.3309002433090025, "grad_norm": 0.53929603099823, "learning_rate": 1.2676708692248583e-05, "loss": 0.5814516544342041, "step": 1026 }, { "epoch": 1.332197891321979, "grad_norm": 0.5420404076576233, "learning_rate": 1.2662928276148985e-05, "loss": 0.6052178740501404, "step": 1027 }, { "epoch": 1.3334955393349555, "grad_norm": 0.5524218678474426, "learning_rate": 1.264914241503506e-05, "loss": 0.639128565788269, "step": 1028 }, { "epoch": 1.334793187347932, "grad_norm": 0.5308884978294373, "learning_rate": 1.2635351137095408e-05, "loss": 0.5758256316184998, "step": 1029 }, { "epoch": 1.3360908353609084, "grad_norm": 0.556959867477417, "learning_rate": 1.2621554470529698e-05, "loss": 0.6215351223945618, "step": 1030 }, { "epoch": 1.3373884833738847, "grad_norm": 0.5299232006072998, "learning_rate": 1.2607752443548622e-05, "loss": 0.6064879298210144, "step": 1031 }, { "epoch": 1.3386861313868614, "grad_norm": 0.5557371973991394, "learning_rate": 1.259394508437383e-05, "loss": 0.62589031457901, "step": 1032 }, { "epoch": 1.3399837793998377, "grad_norm": 0.5563995242118835, "learning_rate": 1.2580132421237883e-05, "loss": 0.6236660480499268, "step": 1033 }, { "epoch": 1.3412814274128142, "grad_norm": 0.5666968822479248, "learning_rate": 1.2566314482384174e-05, "loss": 0.6252362728118896, "step": 1034 }, { "epoch": 1.3425790754257907, "grad_norm": 0.5652741193771362, "learning_rate": 1.2552491296066895e-05, "loss": 0.6189643144607544, "step": 1035 }, { "epoch": 1.3438767234387672, "grad_norm": 0.5583733320236206, "learning_rate": 1.2538662890550959e-05, "loss": 0.6765375137329102, "step": 1036 }, { "epoch": 1.3451743714517437, "grad_norm": 0.5742061138153076, "learning_rate": 1.252482929411196e-05, "loss": 0.6477082967758179, "step": 1037 }, { "epoch": 1.3464720194647202, "grad_norm": 0.5400403141975403, "learning_rate": 1.25109905350361e-05, "loss": 0.5811231136322021, "step": 1038 }, { "epoch": 1.3477696674776967, "grad_norm": 0.5390773415565491, "learning_rate": 1.249714664162014e-05, "loss": 0.6055101156234741, "step": 1039 }, { "epoch": 1.3490673154906732, "grad_norm": 0.5596996545791626, "learning_rate": 1.2483297642171332e-05, "loss": 0.6074774265289307, "step": 1040 }, { "epoch": 1.3503649635036497, "grad_norm": 0.5600677728652954, "learning_rate": 1.246944356500738e-05, "loss": 0.6564399003982544, "step": 1041 }, { "epoch": 1.3516626115166261, "grad_norm": 0.5470819473266602, "learning_rate": 1.2455584438456366e-05, "loss": 0.6430810689926147, "step": 1042 }, { "epoch": 1.3529602595296026, "grad_norm": 0.5539683699607849, "learning_rate": 1.2441720290856694e-05, "loss": 0.6132862567901611, "step": 1043 }, { "epoch": 1.3542579075425791, "grad_norm": 0.5648192167282104, "learning_rate": 1.2427851150557036e-05, "loss": 0.6304311156272888, "step": 1044 }, { "epoch": 1.3555555555555556, "grad_norm": 0.5195255279541016, "learning_rate": 1.241397704591627e-05, "loss": 0.5641679763793945, "step": 1045 }, { "epoch": 1.3568532035685321, "grad_norm": 0.5658749341964722, "learning_rate": 1.2400098005303436e-05, "loss": 0.6409952044487, "step": 1046 }, { "epoch": 1.3581508515815086, "grad_norm": 0.5088870525360107, "learning_rate": 1.238621405709766e-05, "loss": 0.5354233384132385, "step": 1047 }, { "epoch": 1.3594484995944849, "grad_norm": 0.5734469890594482, "learning_rate": 1.2372325229688093e-05, "loss": 0.6188406944274902, "step": 1048 }, { "epoch": 1.3607461476074616, "grad_norm": 0.5380412936210632, "learning_rate": 1.235843155147388e-05, "loss": 0.5657402873039246, "step": 1049 }, { "epoch": 1.3620437956204379, "grad_norm": 0.5315279960632324, "learning_rate": 1.2344533050864071e-05, "loss": 0.5667376518249512, "step": 1050 }, { "epoch": 1.3633414436334144, "grad_norm": 0.5081866979598999, "learning_rate": 1.2330629756277588e-05, "loss": 0.5432066917419434, "step": 1051 }, { "epoch": 1.3646390916463909, "grad_norm": 0.5798763036727905, "learning_rate": 1.2316721696143141e-05, "loss": 0.6364309191703796, "step": 1052 }, { "epoch": 1.3659367396593673, "grad_norm": 0.5289844870567322, "learning_rate": 1.23028088988992e-05, "loss": 0.5321639180183411, "step": 1053 }, { "epoch": 1.3672343876723438, "grad_norm": 0.5852347612380981, "learning_rate": 1.228889139299391e-05, "loss": 0.6831628084182739, "step": 1054 }, { "epoch": 1.3685320356853203, "grad_norm": 0.5265390872955322, "learning_rate": 1.2274969206885048e-05, "loss": 0.5725244283676147, "step": 1055 }, { "epoch": 1.3698296836982968, "grad_norm": 0.6298306584358215, "learning_rate": 1.2261042369039966e-05, "loss": 0.6366633176803589, "step": 1056 }, { "epoch": 1.3711273317112733, "grad_norm": 0.521314263343811, "learning_rate": 1.2247110907935518e-05, "loss": 0.5725533962249756, "step": 1057 }, { "epoch": 1.3724249797242498, "grad_norm": 0.5249886512756348, "learning_rate": 1.2233174852058015e-05, "loss": 0.577233076095581, "step": 1058 }, { "epoch": 1.3737226277372263, "grad_norm": 0.5558046102523804, "learning_rate": 1.2219234229903163e-05, "loss": 0.6044833660125732, "step": 1059 }, { "epoch": 1.3750202757502028, "grad_norm": 0.5569727420806885, "learning_rate": 1.2205289069976012e-05, "loss": 0.5831769704818726, "step": 1060 }, { "epoch": 1.3763179237631793, "grad_norm": 0.5547581911087036, "learning_rate": 1.2191339400790881e-05, "loss": 0.5798386335372925, "step": 1061 }, { "epoch": 1.3776155717761558, "grad_norm": 0.5544263124465942, "learning_rate": 1.2177385250871312e-05, "loss": 0.607170581817627, "step": 1062 }, { "epoch": 1.378913219789132, "grad_norm": 0.5475184321403503, "learning_rate": 1.2163426648750009e-05, "loss": 0.596827507019043, "step": 1063 }, { "epoch": 1.3802108678021088, "grad_norm": 0.551906168460846, "learning_rate": 1.2149463622968782e-05, "loss": 0.5992593169212341, "step": 1064 }, { "epoch": 1.381508515815085, "grad_norm": 0.5418475270271301, "learning_rate": 1.2135496202078487e-05, "loss": 0.5538514852523804, "step": 1065 }, { "epoch": 1.3828061638280618, "grad_norm": 0.5357592105865479, "learning_rate": 1.2121524414638958e-05, "loss": 0.6014474630355835, "step": 1066 }, { "epoch": 1.384103811841038, "grad_norm": 0.5673146843910217, "learning_rate": 1.2107548289218968e-05, "loss": 0.5835940837860107, "step": 1067 }, { "epoch": 1.3854014598540145, "grad_norm": 0.5655810832977295, "learning_rate": 1.2093567854396158e-05, "loss": 0.6108807325363159, "step": 1068 }, { "epoch": 1.386699107866991, "grad_norm": 0.5361012816429138, "learning_rate": 1.2079583138756976e-05, "loss": 0.6093813180923462, "step": 1069 }, { "epoch": 1.3879967558799675, "grad_norm": 0.5419613122940063, "learning_rate": 1.206559417089663e-05, "loss": 0.6026707887649536, "step": 1070 }, { "epoch": 1.389294403892944, "grad_norm": 0.5429274439811707, "learning_rate": 1.205160097941901e-05, "loss": 0.6365257501602173, "step": 1071 }, { "epoch": 1.3905920519059205, "grad_norm": 0.5734850764274597, "learning_rate": 1.2037603592936656e-05, "loss": 0.6649122834205627, "step": 1072 }, { "epoch": 1.391889699918897, "grad_norm": 0.5734902024269104, "learning_rate": 1.2023602040070679e-05, "loss": 0.7125487327575684, "step": 1073 }, { "epoch": 1.3931873479318735, "grad_norm": 0.5633674263954163, "learning_rate": 1.2009596349450717e-05, "loss": 0.6474109292030334, "step": 1074 }, { "epoch": 1.39448499594485, "grad_norm": 0.5378244519233704, "learning_rate": 1.1995586549714855e-05, "loss": 0.6136443614959717, "step": 1075 }, { "epoch": 1.3957826439578265, "grad_norm": 0.558250904083252, "learning_rate": 1.198157266950959e-05, "loss": 0.6676377058029175, "step": 1076 }, { "epoch": 1.397080291970803, "grad_norm": 0.5315516591072083, "learning_rate": 1.1967554737489762e-05, "loss": 0.607810378074646, "step": 1077 }, { "epoch": 1.3983779399837795, "grad_norm": 0.5391795039176941, "learning_rate": 1.1953532782318491e-05, "loss": 0.5898000597953796, "step": 1078 }, { "epoch": 1.399675587996756, "grad_norm": 0.5466244220733643, "learning_rate": 1.1939506832667129e-05, "loss": 0.5943995118141174, "step": 1079 }, { "epoch": 1.4009732360097322, "grad_norm": 0.5457687973976135, "learning_rate": 1.1925476917215191e-05, "loss": 0.6089761257171631, "step": 1080 }, { "epoch": 1.402270884022709, "grad_norm": 0.5727429389953613, "learning_rate": 1.1911443064650301e-05, "loss": 0.6369843482971191, "step": 1081 }, { "epoch": 1.4035685320356852, "grad_norm": 0.5765259861946106, "learning_rate": 1.189740530366814e-05, "loss": 0.6176037788391113, "step": 1082 }, { "epoch": 1.404866180048662, "grad_norm": 0.5793892741203308, "learning_rate": 1.1883363662972375e-05, "loss": 0.6147127747535706, "step": 1083 }, { "epoch": 1.4061638280616382, "grad_norm": 0.5127638578414917, "learning_rate": 1.1869318171274606e-05, "loss": 0.5739990472793579, "step": 1084 }, { "epoch": 1.4074614760746147, "grad_norm": 0.5451372861862183, "learning_rate": 1.1855268857294308e-05, "loss": 0.6005086898803711, "step": 1085 }, { "epoch": 1.4087591240875912, "grad_norm": 0.5556860566139221, "learning_rate": 1.1841215749758774e-05, "loss": 0.6003910303115845, "step": 1086 }, { "epoch": 1.4100567721005677, "grad_norm": 0.5883124470710754, "learning_rate": 1.182715887740305e-05, "loss": 0.6721568703651428, "step": 1087 }, { "epoch": 1.4113544201135442, "grad_norm": 0.5330623388290405, "learning_rate": 1.1813098268969886e-05, "loss": 0.617790699005127, "step": 1088 }, { "epoch": 1.4126520681265207, "grad_norm": 0.5409324169158936, "learning_rate": 1.1799033953209664e-05, "loss": 0.6154944896697998, "step": 1089 }, { "epoch": 1.4139497161394972, "grad_norm": 0.5280669927597046, "learning_rate": 1.178496595888035e-05, "loss": 0.6064777970314026, "step": 1090 }, { "epoch": 1.4152473641524737, "grad_norm": 0.5559468269348145, "learning_rate": 1.1770894314747433e-05, "loss": 0.6379706263542175, "step": 1091 }, { "epoch": 1.4165450121654501, "grad_norm": 0.5678933262825012, "learning_rate": 1.1756819049583861e-05, "loss": 0.5879865288734436, "step": 1092 }, { "epoch": 1.4178426601784266, "grad_norm": 0.5317026972770691, "learning_rate": 1.1742740192169995e-05, "loss": 0.6252385377883911, "step": 1093 }, { "epoch": 1.4191403081914031, "grad_norm": 0.5503518581390381, "learning_rate": 1.1728657771293529e-05, "loss": 0.5956102013587952, "step": 1094 }, { "epoch": 1.4204379562043796, "grad_norm": 0.5392619967460632, "learning_rate": 1.171457181574945e-05, "loss": 0.6110433340072632, "step": 1095 }, { "epoch": 1.4217356042173561, "grad_norm": 0.554594099521637, "learning_rate": 1.1700482354339972e-05, "loss": 0.6505380272865295, "step": 1096 }, { "epoch": 1.4230332522303324, "grad_norm": 0.5639646053314209, "learning_rate": 1.168638941587448e-05, "loss": 0.6052155494689941, "step": 1097 }, { "epoch": 1.424330900243309, "grad_norm": 0.5569002032279968, "learning_rate": 1.1672293029169466e-05, "loss": 0.5856403112411499, "step": 1098 }, { "epoch": 1.4256285482562854, "grad_norm": 0.5615402460098267, "learning_rate": 1.165819322304847e-05, "loss": 0.6077978610992432, "step": 1099 }, { "epoch": 1.426926196269262, "grad_norm": 0.5535939931869507, "learning_rate": 1.164409002634203e-05, "loss": 0.6245694160461426, "step": 1100 }, { "epoch": 1.4282238442822384, "grad_norm": 0.5362287759780884, "learning_rate": 1.162998346788761e-05, "loss": 0.6105297803878784, "step": 1101 }, { "epoch": 1.4295214922952149, "grad_norm": 0.5390259027481079, "learning_rate": 1.1615873576529556e-05, "loss": 0.6066164970397949, "step": 1102 }, { "epoch": 1.4308191403081914, "grad_norm": 0.5315901041030884, "learning_rate": 1.1601760381119022e-05, "loss": 0.5768907070159912, "step": 1103 }, { "epoch": 1.4321167883211678, "grad_norm": 0.5727961659431458, "learning_rate": 1.158764391051392e-05, "loss": 0.6904894113540649, "step": 1104 }, { "epoch": 1.4334144363341443, "grad_norm": 0.5435361862182617, "learning_rate": 1.1573524193578863e-05, "loss": 0.5838584899902344, "step": 1105 }, { "epoch": 1.4347120843471208, "grad_norm": 0.5609909296035767, "learning_rate": 1.1559401259185095e-05, "loss": 0.6729065775871277, "step": 1106 }, { "epoch": 1.4360097323600973, "grad_norm": 0.5284282565116882, "learning_rate": 1.1545275136210441e-05, "loss": 0.5950232744216919, "step": 1107 }, { "epoch": 1.4373073803730738, "grad_norm": 0.603245735168457, "learning_rate": 1.153114585353925e-05, "loss": 0.6702573299407959, "step": 1108 }, { "epoch": 1.4386050283860503, "grad_norm": 0.5415088534355164, "learning_rate": 1.1517013440062326e-05, "loss": 0.5716216564178467, "step": 1109 }, { "epoch": 1.4399026763990268, "grad_norm": 0.4960046708583832, "learning_rate": 1.1502877924676881e-05, "loss": 0.5501525402069092, "step": 1110 }, { "epoch": 1.4412003244120033, "grad_norm": 0.5444253087043762, "learning_rate": 1.1488739336286467e-05, "loss": 0.6333913207054138, "step": 1111 }, { "epoch": 1.4424979724249798, "grad_norm": 0.5255866646766663, "learning_rate": 1.1474597703800915e-05, "loss": 0.6024140119552612, "step": 1112 }, { "epoch": 1.4437956204379563, "grad_norm": 0.5488544702529907, "learning_rate": 1.1460453056136285e-05, "loss": 0.6334477663040161, "step": 1113 }, { "epoch": 1.4450932684509326, "grad_norm": 0.5465590953826904, "learning_rate": 1.14463054222148e-05, "loss": 0.6596208810806274, "step": 1114 }, { "epoch": 1.4463909164639093, "grad_norm": 0.5492766499519348, "learning_rate": 1.1432154830964796e-05, "loss": 0.6396174430847168, "step": 1115 }, { "epoch": 1.4476885644768855, "grad_norm": 0.5476314425468445, "learning_rate": 1.1418001311320649e-05, "loss": 0.6056069135665894, "step": 1116 }, { "epoch": 1.4489862124898623, "grad_norm": 0.5088196396827698, "learning_rate": 1.1403844892222717e-05, "loss": 0.5474177002906799, "step": 1117 }, { "epoch": 1.4502838605028385, "grad_norm": 0.5697342753410339, "learning_rate": 1.1389685602617302e-05, "loss": 0.6007769107818604, "step": 1118 }, { "epoch": 1.451581508515815, "grad_norm": 0.5281476974487305, "learning_rate": 1.1375523471456564e-05, "loss": 0.5913225412368774, "step": 1119 }, { "epoch": 1.4528791565287915, "grad_norm": 0.5619297027587891, "learning_rate": 1.1361358527698481e-05, "loss": 0.611336350440979, "step": 1120 }, { "epoch": 1.454176804541768, "grad_norm": 0.531401515007019, "learning_rate": 1.134719080030677e-05, "loss": 0.5786083936691284, "step": 1121 }, { "epoch": 1.4554744525547445, "grad_norm": 0.5428561568260193, "learning_rate": 1.1333020318250854e-05, "loss": 0.6208731532096863, "step": 1122 }, { "epoch": 1.456772100567721, "grad_norm": 0.5384306311607361, "learning_rate": 1.131884711050578e-05, "loss": 0.5843198895454407, "step": 1123 }, { "epoch": 1.4580697485806975, "grad_norm": 0.5160107016563416, "learning_rate": 1.1304671206052168e-05, "loss": 0.5473004579544067, "step": 1124 }, { "epoch": 1.459367396593674, "grad_norm": 0.5360195636749268, "learning_rate": 1.1290492633876164e-05, "loss": 0.626501202583313, "step": 1125 }, { "epoch": 1.4606650446066505, "grad_norm": 0.5251026749610901, "learning_rate": 1.1276311422969349e-05, "loss": 0.5944849848747253, "step": 1126 }, { "epoch": 1.461962692619627, "grad_norm": 0.564008355140686, "learning_rate": 1.1262127602328712e-05, "loss": 0.6147276163101196, "step": 1127 }, { "epoch": 1.4632603406326035, "grad_norm": 0.5388748645782471, "learning_rate": 1.124794120095658e-05, "loss": 0.5849318504333496, "step": 1128 }, { "epoch": 1.46455798864558, "grad_norm": 0.5595386624336243, "learning_rate": 1.1233752247860549e-05, "loss": 0.6283015012741089, "step": 1129 }, { "epoch": 1.4658556366585564, "grad_norm": 0.5528329014778137, "learning_rate": 1.1219560772053442e-05, "loss": 0.6135470867156982, "step": 1130 }, { "epoch": 1.4671532846715327, "grad_norm": 0.5480870008468628, "learning_rate": 1.1205366802553231e-05, "loss": 0.579879879951477, "step": 1131 }, { "epoch": 1.4684509326845094, "grad_norm": 0.6012369990348816, "learning_rate": 1.1191170368382992e-05, "loss": 0.67568039894104, "step": 1132 }, { "epoch": 1.4697485806974857, "grad_norm": 0.5386692881584167, "learning_rate": 1.117697149857084e-05, "loss": 0.6155050992965698, "step": 1133 }, { "epoch": 1.4710462287104624, "grad_norm": 0.540510892868042, "learning_rate": 1.1162770222149873e-05, "loss": 0.6193840503692627, "step": 1134 }, { "epoch": 1.4723438767234387, "grad_norm": 0.5231954455375671, "learning_rate": 1.1148566568158099e-05, "loss": 0.5806912183761597, "step": 1135 }, { "epoch": 1.4736415247364152, "grad_norm": 0.5371982455253601, "learning_rate": 1.1134360565638402e-05, "loss": 0.6294920444488525, "step": 1136 }, { "epoch": 1.4749391727493917, "grad_norm": 0.5294065475463867, "learning_rate": 1.1120152243638457e-05, "loss": 0.6405944228172302, "step": 1137 }, { "epoch": 1.4762368207623682, "grad_norm": 0.5396026372909546, "learning_rate": 1.1105941631210694e-05, "loss": 0.622348427772522, "step": 1138 }, { "epoch": 1.4775344687753447, "grad_norm": 0.5184268951416016, "learning_rate": 1.1091728757412212e-05, "loss": 0.5783290863037109, "step": 1139 }, { "epoch": 1.4788321167883212, "grad_norm": 0.5296680331230164, "learning_rate": 1.107751365130474e-05, "loss": 0.5765876770019531, "step": 1140 }, { "epoch": 1.4801297648012977, "grad_norm": 0.5528906583786011, "learning_rate": 1.1063296341954577e-05, "loss": 0.5958802700042725, "step": 1141 }, { "epoch": 1.4814274128142741, "grad_norm": 0.549384355545044, "learning_rate": 1.1049076858432517e-05, "loss": 0.6524186730384827, "step": 1142 }, { "epoch": 1.4827250608272506, "grad_norm": 0.5553792119026184, "learning_rate": 1.1034855229813812e-05, "loss": 0.63478684425354, "step": 1143 }, { "epoch": 1.4840227088402271, "grad_norm": 0.5639452934265137, "learning_rate": 1.1020631485178084e-05, "loss": 0.6482947468757629, "step": 1144 }, { "epoch": 1.4853203568532036, "grad_norm": 0.5332263708114624, "learning_rate": 1.1006405653609295e-05, "loss": 0.6563082337379456, "step": 1145 }, { "epoch": 1.4866180048661801, "grad_norm": 0.5505067110061646, "learning_rate": 1.0992177764195671e-05, "loss": 0.6217901706695557, "step": 1146 }, { "epoch": 1.4879156528791566, "grad_norm": 0.5751034021377563, "learning_rate": 1.0977947846029642e-05, "loss": 0.618269681930542, "step": 1147 }, { "epoch": 1.4892133008921329, "grad_norm": 0.5259911417961121, "learning_rate": 1.0963715928207795e-05, "loss": 0.5809241533279419, "step": 1148 }, { "epoch": 1.4905109489051096, "grad_norm": 0.5405173301696777, "learning_rate": 1.094948203983079e-05, "loss": 0.6440936923027039, "step": 1149 }, { "epoch": 1.4918085969180859, "grad_norm": 0.5359426736831665, "learning_rate": 1.0935246210003334e-05, "loss": 0.5997065305709839, "step": 1150 }, { "epoch": 1.4918085969180859, "eval_loss": 0.6832194328308105, "eval_runtime": 72.4893, "eval_samples_per_second": 71.624, "eval_steps_per_second": 8.953, "step": 1150 }, { "epoch": 1.4931062449310626, "grad_norm": 0.545395016670227, "learning_rate": 1.0921008467834094e-05, "loss": 0.6377010345458984, "step": 1151 }, { "epoch": 1.4944038929440389, "grad_norm": 0.553674578666687, "learning_rate": 1.0906768842435647e-05, "loss": 0.6331782937049866, "step": 1152 }, { "epoch": 1.4957015409570154, "grad_norm": 0.5127398371696472, "learning_rate": 1.0892527362924426e-05, "loss": 0.5681911110877991, "step": 1153 }, { "epoch": 1.4969991889699918, "grad_norm": 0.5308411717414856, "learning_rate": 1.0878284058420647e-05, "loss": 0.6325392127037048, "step": 1154 }, { "epoch": 1.4982968369829683, "grad_norm": 0.5330897569656372, "learning_rate": 1.0864038958048267e-05, "loss": 0.5603891611099243, "step": 1155 }, { "epoch": 1.4995944849959448, "grad_norm": 0.5287606716156006, "learning_rate": 1.084979209093491e-05, "loss": 0.5920351147651672, "step": 1156 }, { "epoch": 1.5008921330089213, "grad_norm": 0.5484432578086853, "learning_rate": 1.0835543486211815e-05, "loss": 0.6529064178466797, "step": 1157 }, { "epoch": 1.5021897810218978, "grad_norm": 0.5554434061050415, "learning_rate": 1.0821293173013769e-05, "loss": 0.6203141212463379, "step": 1158 }, { "epoch": 1.5034874290348743, "grad_norm": 0.4985191226005554, "learning_rate": 1.0807041180479054e-05, "loss": 0.5167315006256104, "step": 1159 }, { "epoch": 1.5047850770478508, "grad_norm": 0.5687364339828491, "learning_rate": 1.0792787537749392e-05, "loss": 0.6727509498596191, "step": 1160 }, { "epoch": 1.5060827250608273, "grad_norm": 0.5391871333122253, "learning_rate": 1.0778532273969877e-05, "loss": 0.5891563892364502, "step": 1161 }, { "epoch": 1.5073803730738038, "grad_norm": 0.5688561201095581, "learning_rate": 1.0764275418288908e-05, "loss": 0.6336361169815063, "step": 1162 }, { "epoch": 1.50867802108678, "grad_norm": 0.5307201743125916, "learning_rate": 1.0750016999858151e-05, "loss": 0.6088765263557434, "step": 1163 }, { "epoch": 1.5099756690997568, "grad_norm": 0.5417827367782593, "learning_rate": 1.0735757047832461e-05, "loss": 0.6234108209609985, "step": 1164 }, { "epoch": 1.511273317112733, "grad_norm": 0.5165390968322754, "learning_rate": 1.0721495591369832e-05, "loss": 0.5378797054290771, "step": 1165 }, { "epoch": 1.5125709651257098, "grad_norm": 0.5508493781089783, "learning_rate": 1.0707232659631333e-05, "loss": 0.6575205326080322, "step": 1166 }, { "epoch": 1.513868613138686, "grad_norm": 0.5701325535774231, "learning_rate": 1.0692968281781046e-05, "loss": 0.5776763558387756, "step": 1167 }, { "epoch": 1.5151662611516628, "grad_norm": 0.5180992484092712, "learning_rate": 1.0678702486986016e-05, "loss": 0.5627498626708984, "step": 1168 }, { "epoch": 1.516463909164639, "grad_norm": 0.5465271472930908, "learning_rate": 1.0664435304416185e-05, "loss": 0.5880453586578369, "step": 1169 }, { "epoch": 1.5177615571776155, "grad_norm": 0.5629556775093079, "learning_rate": 1.065016676324433e-05, "loss": 0.6594117879867554, "step": 1170 }, { "epoch": 1.519059205190592, "grad_norm": 0.5278184413909912, "learning_rate": 1.0635896892645998e-05, "loss": 0.5453213453292847, "step": 1171 }, { "epoch": 1.5203568532035685, "grad_norm": 0.5409108400344849, "learning_rate": 1.0621625721799473e-05, "loss": 0.6020928025245667, "step": 1172 }, { "epoch": 1.521654501216545, "grad_norm": 0.5297386050224304, "learning_rate": 1.0607353279885682e-05, "loss": 0.581575870513916, "step": 1173 }, { "epoch": 1.5229521492295215, "grad_norm": 0.5326167345046997, "learning_rate": 1.0593079596088155e-05, "loss": 0.5731886029243469, "step": 1174 }, { "epoch": 1.524249797242498, "grad_norm": 0.5496317148208618, "learning_rate": 1.0578804699592968e-05, "loss": 0.6127786636352539, "step": 1175 }, { "epoch": 1.5255474452554745, "grad_norm": 0.5222692489624023, "learning_rate": 1.0564528619588668e-05, "loss": 0.5508180856704712, "step": 1176 }, { "epoch": 1.526845093268451, "grad_norm": 0.5078931450843811, "learning_rate": 1.0550251385266223e-05, "loss": 0.590618908405304, "step": 1177 }, { "epoch": 1.5281427412814275, "grad_norm": 0.545173704624176, "learning_rate": 1.0535973025818969e-05, "loss": 0.5988805294036865, "step": 1178 }, { "epoch": 1.529440389294404, "grad_norm": 0.5643585920333862, "learning_rate": 1.0521693570442533e-05, "loss": 0.6470606327056885, "step": 1179 }, { "epoch": 1.5307380373073802, "grad_norm": 0.5382372140884399, "learning_rate": 1.050741304833479e-05, "loss": 0.6253216862678528, "step": 1180 }, { "epoch": 1.532035685320357, "grad_norm": 0.527792751789093, "learning_rate": 1.0493131488695789e-05, "loss": 0.5740289092063904, "step": 1181 }, { "epoch": 1.5333333333333332, "grad_norm": 0.5286063551902771, "learning_rate": 1.0478848920727707e-05, "loss": 0.5898089408874512, "step": 1182 }, { "epoch": 1.53463098134631, "grad_norm": 0.5210081338882446, "learning_rate": 1.0464565373634784e-05, "loss": 0.5460256338119507, "step": 1183 }, { "epoch": 1.5359286293592862, "grad_norm": 0.542233943939209, "learning_rate": 1.0450280876623253e-05, "loss": 0.6149614453315735, "step": 1184 }, { "epoch": 1.537226277372263, "grad_norm": 0.5287345051765442, "learning_rate": 1.0435995458901298e-05, "loss": 0.5987131595611572, "step": 1185 }, { "epoch": 1.5385239253852392, "grad_norm": 0.542398989200592, "learning_rate": 1.042170914967898e-05, "loss": 0.5659464001655579, "step": 1186 }, { "epoch": 1.5398215733982157, "grad_norm": 0.5581417679786682, "learning_rate": 1.0407421978168186e-05, "loss": 0.648675262928009, "step": 1187 }, { "epoch": 1.5411192214111922, "grad_norm": 0.542323112487793, "learning_rate": 1.0393133973582572e-05, "loss": 0.6466338634490967, "step": 1188 }, { "epoch": 1.5424168694241687, "grad_norm": 0.5204232335090637, "learning_rate": 1.0378845165137483e-05, "loss": 0.5785092115402222, "step": 1189 }, { "epoch": 1.5437145174371452, "grad_norm": 0.5261425375938416, "learning_rate": 1.0364555582049917e-05, "loss": 0.6130785346031189, "step": 1190 }, { "epoch": 1.5450121654501217, "grad_norm": 0.5651884078979492, "learning_rate": 1.0350265253538458e-05, "loss": 0.6042903661727905, "step": 1191 }, { "epoch": 1.5463098134630981, "grad_norm": 0.5569320917129517, "learning_rate": 1.033597420882321e-05, "loss": 0.6515809297561646, "step": 1192 }, { "epoch": 1.5476074614760746, "grad_norm": 0.5539842844009399, "learning_rate": 1.0321682477125743e-05, "loss": 0.6051802039146423, "step": 1193 }, { "epoch": 1.5489051094890511, "grad_norm": 0.5327019691467285, "learning_rate": 1.0307390087669026e-05, "loss": 0.5866248607635498, "step": 1194 }, { "epoch": 1.5502027575020276, "grad_norm": 0.5504518151283264, "learning_rate": 1.0293097069677382e-05, "loss": 0.6087076663970947, "step": 1195 }, { "epoch": 1.5515004055150041, "grad_norm": 0.5322021842002869, "learning_rate": 1.0278803452376416e-05, "loss": 0.5527307391166687, "step": 1196 }, { "epoch": 1.5527980535279804, "grad_norm": 0.5314878821372986, "learning_rate": 1.0264509264992954e-05, "loss": 0.623512327671051, "step": 1197 }, { "epoch": 1.554095701540957, "grad_norm": 0.5596524477005005, "learning_rate": 1.0250214536754996e-05, "loss": 0.6276538372039795, "step": 1198 }, { "epoch": 1.5553933495539334, "grad_norm": 0.5265888571739197, "learning_rate": 1.0235919296891641e-05, "loss": 0.5611189603805542, "step": 1199 }, { "epoch": 1.55669099756691, "grad_norm": 0.5899763107299805, "learning_rate": 1.0221623574633035e-05, "loss": 0.6541014909744263, "step": 1200 }, { "epoch": 1.5579886455798864, "grad_norm": 0.545138955116272, "learning_rate": 1.0207327399210311e-05, "loss": 0.5935692191123962, "step": 1201 }, { "epoch": 1.559286293592863, "grad_norm": 0.5380452871322632, "learning_rate": 1.0193030799855534e-05, "loss": 0.5741644501686096, "step": 1202 }, { "epoch": 1.5605839416058394, "grad_norm": 0.5540161728858948, "learning_rate": 1.0178733805801626e-05, "loss": 0.625443696975708, "step": 1203 }, { "epoch": 1.5618815896188158, "grad_norm": 0.5784110426902771, "learning_rate": 1.0164436446282324e-05, "loss": 0.6342917680740356, "step": 1204 }, { "epoch": 1.5631792376317923, "grad_norm": 0.5346982479095459, "learning_rate": 1.015013875053211e-05, "loss": 0.5571820735931396, "step": 1205 }, { "epoch": 1.5644768856447688, "grad_norm": 0.5152148008346558, "learning_rate": 1.013584074778615e-05, "loss": 0.5197643041610718, "step": 1206 }, { "epoch": 1.5657745336577453, "grad_norm": 0.5702791213989258, "learning_rate": 1.0121542467280245e-05, "loss": 0.6099081635475159, "step": 1207 }, { "epoch": 1.5670721816707218, "grad_norm": 0.5424299836158752, "learning_rate": 1.0107243938250755e-05, "loss": 0.5385927557945251, "step": 1208 }, { "epoch": 1.5683698296836983, "grad_norm": 0.5413081049919128, "learning_rate": 1.0092945189934558e-05, "loss": 0.6308001279830933, "step": 1209 }, { "epoch": 1.5696674776966748, "grad_norm": 0.5650938749313354, "learning_rate": 1.007864625156897e-05, "loss": 0.656417965888977, "step": 1210 }, { "epoch": 1.5709651257096513, "grad_norm": 0.5578048229217529, "learning_rate": 1.0064347152391703e-05, "loss": 0.5987565517425537, "step": 1211 }, { "epoch": 1.5722627737226276, "grad_norm": 0.5425694584846497, "learning_rate": 1.0050047921640797e-05, "loss": 0.5794038772583008, "step": 1212 }, { "epoch": 1.5735604217356043, "grad_norm": 0.5536248087882996, "learning_rate": 1.003574858855456e-05, "loss": 0.6126576066017151, "step": 1213 }, { "epoch": 1.5748580697485806, "grad_norm": 0.5221614837646484, "learning_rate": 1.0021449182371504e-05, "loss": 0.5808907747268677, "step": 1214 }, { "epoch": 1.5761557177615573, "grad_norm": 0.5314812660217285, "learning_rate": 1.0007149732330299e-05, "loss": 0.5740360021591187, "step": 1215 }, { "epoch": 1.5774533657745335, "grad_norm": 0.556327223777771, "learning_rate": 9.992850267669703e-06, "loss": 0.6449018716812134, "step": 1216 }, { "epoch": 1.5787510137875103, "grad_norm": 0.5447148680686951, "learning_rate": 9.978550817628501e-06, "loss": 0.5590343475341797, "step": 1217 }, { "epoch": 1.5800486618004865, "grad_norm": 0.5570490956306458, "learning_rate": 9.964251411445444e-06, "loss": 0.6283855438232422, "step": 1218 }, { "epoch": 1.5813463098134632, "grad_norm": 0.5475562214851379, "learning_rate": 9.949952078359208e-06, "loss": 0.6058873534202576, "step": 1219 }, { "epoch": 1.5826439578264395, "grad_norm": 0.5271614789962769, "learning_rate": 9.935652847608302e-06, "loss": 0.6080070734024048, "step": 1220 }, { "epoch": 1.583941605839416, "grad_norm": 0.5340768098831177, "learning_rate": 9.921353748431036e-06, "loss": 0.5789950489997864, "step": 1221 }, { "epoch": 1.5852392538523925, "grad_norm": 0.5284969806671143, "learning_rate": 9.907054810065446e-06, "loss": 0.5514812469482422, "step": 1222 }, { "epoch": 1.586536901865369, "grad_norm": 0.5400740504264832, "learning_rate": 9.89275606174925e-06, "loss": 0.5774392485618591, "step": 1223 }, { "epoch": 1.5878345498783455, "grad_norm": 0.5264250040054321, "learning_rate": 9.878457532719757e-06, "loss": 0.5731384754180908, "step": 1224 }, { "epoch": 1.589132197891322, "grad_norm": 0.5703708529472351, "learning_rate": 9.864159252213852e-06, "loss": 0.6473686695098877, "step": 1225 }, { "epoch": 1.5904298459042985, "grad_norm": 0.5441808104515076, "learning_rate": 9.849861249467893e-06, "loss": 0.6381841897964478, "step": 1226 }, { "epoch": 1.591727493917275, "grad_norm": 0.5486851930618286, "learning_rate": 9.83556355371768e-06, "loss": 0.613477349281311, "step": 1227 }, { "epoch": 1.5930251419302515, "grad_norm": 0.5925759673118591, "learning_rate": 9.821266194198375e-06, "loss": 0.5966989994049072, "step": 1228 }, { "epoch": 1.5943227899432277, "grad_norm": 0.503745436668396, "learning_rate": 9.806969200144471e-06, "loss": 0.5462368726730347, "step": 1229 }, { "epoch": 1.5956204379562045, "grad_norm": 0.525786817073822, "learning_rate": 9.79267260078969e-06, "loss": 0.5990958213806152, "step": 1230 }, { "epoch": 1.5969180859691807, "grad_norm": 0.5402313470840454, "learning_rate": 9.778376425366967e-06, "loss": 0.6069964170455933, "step": 1231 }, { "epoch": 1.5982157339821574, "grad_norm": 0.566880464553833, "learning_rate": 9.764080703108362e-06, "loss": 0.6295340061187744, "step": 1232 }, { "epoch": 1.5995133819951337, "grad_norm": 0.5545258522033691, "learning_rate": 9.749785463245006e-06, "loss": 0.6260232925415039, "step": 1233 }, { "epoch": 1.6008110300081104, "grad_norm": 0.5898419618606567, "learning_rate": 9.735490735007047e-06, "loss": 0.6146451830863953, "step": 1234 }, { "epoch": 1.6021086780210867, "grad_norm": 0.5249006748199463, "learning_rate": 9.721196547623585e-06, "loss": 0.6049670577049255, "step": 1235 }, { "epoch": 1.6034063260340634, "grad_norm": 0.5289062857627869, "learning_rate": 9.706902930322621e-06, "loss": 0.6006771326065063, "step": 1236 }, { "epoch": 1.6047039740470397, "grad_norm": 0.5482916235923767, "learning_rate": 9.692609912330975e-06, "loss": 0.621732771396637, "step": 1237 }, { "epoch": 1.6060016220600162, "grad_norm": 0.5499362945556641, "learning_rate": 9.67831752287426e-06, "loss": 0.6316919922828674, "step": 1238 }, { "epoch": 1.6072992700729927, "grad_norm": 0.5119637250900269, "learning_rate": 9.66402579117679e-06, "loss": 0.5918980240821838, "step": 1239 }, { "epoch": 1.6085969180859692, "grad_norm": 0.5473806262016296, "learning_rate": 9.649734746461544e-06, "loss": 0.6354460716247559, "step": 1240 }, { "epoch": 1.6098945660989457, "grad_norm": 0.5340628027915955, "learning_rate": 9.635444417950083e-06, "loss": 0.5693660378456116, "step": 1241 }, { "epoch": 1.6111922141119221, "grad_norm": 0.5385611653327942, "learning_rate": 9.62115483486252e-06, "loss": 0.5467959642410278, "step": 1242 }, { "epoch": 1.6124898621248986, "grad_norm": 0.5278156399726868, "learning_rate": 9.606866026417431e-06, "loss": 0.6024355888366699, "step": 1243 }, { "epoch": 1.6137875101378751, "grad_norm": 0.5506213903427124, "learning_rate": 9.592578021831817e-06, "loss": 0.6594349145889282, "step": 1244 }, { "epoch": 1.6150851581508516, "grad_norm": 0.5613592267036438, "learning_rate": 9.578290850321023e-06, "loss": 0.6147022247314453, "step": 1245 }, { "epoch": 1.616382806163828, "grad_norm": 0.5302473306655884, "learning_rate": 9.564004541098709e-06, "loss": 0.5724552869796753, "step": 1246 }, { "epoch": 1.6176804541768046, "grad_norm": 0.5463687777519226, "learning_rate": 9.549719123376749e-06, "loss": 0.6859567165374756, "step": 1247 }, { "epoch": 1.6189781021897809, "grad_norm": 0.578063428401947, "learning_rate": 9.535434626365221e-06, "loss": 0.654534101486206, "step": 1248 }, { "epoch": 1.6202757502027576, "grad_norm": 0.5842363238334656, "learning_rate": 9.521151079272295e-06, "loss": 0.6818944811820984, "step": 1249 }, { "epoch": 1.6215733982157339, "grad_norm": 0.5462816953659058, "learning_rate": 9.506868511304216e-06, "loss": 0.5978901386260986, "step": 1250 }, { "epoch": 1.6228710462287106, "grad_norm": 0.5496495962142944, "learning_rate": 9.492586951665214e-06, "loss": 0.6664569973945618, "step": 1251 }, { "epoch": 1.6241686942416869, "grad_norm": 0.541262149810791, "learning_rate": 9.47830642955747e-06, "loss": 0.5771492719650269, "step": 1252 }, { "epoch": 1.6254663422546636, "grad_norm": 0.5542916655540466, "learning_rate": 9.464026974181035e-06, "loss": 0.6377862095832825, "step": 1253 }, { "epoch": 1.6267639902676398, "grad_norm": 0.5212349891662598, "learning_rate": 9.44974861473378e-06, "loss": 0.5878604650497437, "step": 1254 }, { "epoch": 1.6280616382806163, "grad_norm": 0.5611302256584167, "learning_rate": 9.435471380411335e-06, "loss": 0.636326789855957, "step": 1255 }, { "epoch": 1.6293592862935928, "grad_norm": 0.5258191227912903, "learning_rate": 9.421195300407035e-06, "loss": 0.5580926537513733, "step": 1256 }, { "epoch": 1.6306569343065693, "grad_norm": 0.5298276543617249, "learning_rate": 9.406920403911848e-06, "loss": 0.6048216819763184, "step": 1257 }, { "epoch": 1.6319545823195458, "grad_norm": 0.5328834056854248, "learning_rate": 9.392646720114325e-06, "loss": 0.6379623413085938, "step": 1258 }, { "epoch": 1.6332522303325223, "grad_norm": 0.5315790176391602, "learning_rate": 9.37837427820053e-06, "loss": 0.6466155052185059, "step": 1259 }, { "epoch": 1.6345498783454988, "grad_norm": 0.5353376269340515, "learning_rate": 9.364103107354002e-06, "loss": 0.5879526138305664, "step": 1260 }, { "epoch": 1.6358475263584753, "grad_norm": 0.5551068186759949, "learning_rate": 9.349833236755675e-06, "loss": 0.5988892316818237, "step": 1261 }, { "epoch": 1.6371451743714518, "grad_norm": 0.5331724286079407, "learning_rate": 9.335564695583816e-06, "loss": 0.5948902368545532, "step": 1262 }, { "epoch": 1.638442822384428, "grad_norm": 0.54310542345047, "learning_rate": 9.321297513013987e-06, "loss": 0.6055219769477844, "step": 1263 }, { "epoch": 1.6397404703974048, "grad_norm": 0.5368586182594299, "learning_rate": 9.307031718218956e-06, "loss": 0.6035459637641907, "step": 1264 }, { "epoch": 1.641038118410381, "grad_norm": 0.5460159182548523, "learning_rate": 9.292767340368672e-06, "loss": 0.6447773575782776, "step": 1265 }, { "epoch": 1.6423357664233578, "grad_norm": 0.5599712133407593, "learning_rate": 9.278504408630171e-06, "loss": 0.6332420110702515, "step": 1266 }, { "epoch": 1.643633414436334, "grad_norm": 0.5388185977935791, "learning_rate": 9.264242952167544e-06, "loss": 0.6116797924041748, "step": 1267 }, { "epoch": 1.6449310624493108, "grad_norm": 0.5109002590179443, "learning_rate": 9.24998300014185e-06, "loss": 0.628926694393158, "step": 1268 }, { "epoch": 1.646228710462287, "grad_norm": 0.5572671890258789, "learning_rate": 9.235724581711096e-06, "loss": 0.5795090794563293, "step": 1269 }, { "epoch": 1.6475263584752637, "grad_norm": 0.777040421962738, "learning_rate": 9.221467726030126e-06, "loss": 0.644891083240509, "step": 1270 }, { "epoch": 1.64882400648824, "grad_norm": 0.5158191919326782, "learning_rate": 9.207212462250611e-06, "loss": 0.5630925893783569, "step": 1271 }, { "epoch": 1.6501216545012165, "grad_norm": 0.5111160278320312, "learning_rate": 9.192958819520948e-06, "loss": 0.5322938561439514, "step": 1272 }, { "epoch": 1.651419302514193, "grad_norm": 0.5043333768844604, "learning_rate": 9.178706826986236e-06, "loss": 0.5961562395095825, "step": 1273 }, { "epoch": 1.6527169505271695, "grad_norm": 0.5496838092803955, "learning_rate": 9.164456513788186e-06, "loss": 0.6005456447601318, "step": 1274 }, { "epoch": 1.654014598540146, "grad_norm": 0.5577642321586609, "learning_rate": 9.150207909065093e-06, "loss": 0.6366305351257324, "step": 1275 }, { "epoch": 1.6553122465531225, "grad_norm": 0.5257747769355774, "learning_rate": 9.135961041951735e-06, "loss": 0.5669390559196472, "step": 1276 }, { "epoch": 1.656609894566099, "grad_norm": 0.5349394083023071, "learning_rate": 9.121715941579358e-06, "loss": 0.5594930052757263, "step": 1277 }, { "epoch": 1.6579075425790755, "grad_norm": 0.5282658338546753, "learning_rate": 9.107472637075578e-06, "loss": 0.6159694194793701, "step": 1278 }, { "epoch": 1.659205190592052, "grad_norm": 0.5608229637145996, "learning_rate": 9.093231157564357e-06, "loss": 0.6022686958312988, "step": 1279 }, { "epoch": 1.6605028386050282, "grad_norm": 0.5175761580467224, "learning_rate": 9.078991532165911e-06, "loss": 0.5850685834884644, "step": 1280 }, { "epoch": 1.661800486618005, "grad_norm": 0.5338742733001709, "learning_rate": 9.06475378999667e-06, "loss": 0.5943388938903809, "step": 1281 }, { "epoch": 1.6630981346309812, "grad_norm": 0.5751469135284424, "learning_rate": 9.050517960169211e-06, "loss": 0.6381434798240662, "step": 1282 }, { "epoch": 1.664395782643958, "grad_norm": 0.5597715377807617, "learning_rate": 9.036284071792212e-06, "loss": 0.6742138862609863, "step": 1283 }, { "epoch": 1.6656934306569342, "grad_norm": 0.5457910895347595, "learning_rate": 9.022052153970361e-06, "loss": 0.6068155169487, "step": 1284 }, { "epoch": 1.666991078669911, "grad_norm": 0.5507814884185791, "learning_rate": 9.007822235804334e-06, "loss": 0.6176409125328064, "step": 1285 }, { "epoch": 1.6682887266828872, "grad_norm": 0.5373377203941345, "learning_rate": 8.993594346390709e-06, "loss": 0.5884984731674194, "step": 1286 }, { "epoch": 1.669586374695864, "grad_norm": 0.523912787437439, "learning_rate": 8.979368514821917e-06, "loss": 0.5794025659561157, "step": 1287 }, { "epoch": 1.6708840227088402, "grad_norm": 0.5313317179679871, "learning_rate": 8.965144770186192e-06, "loss": 0.6304433345794678, "step": 1288 }, { "epoch": 1.6721816707218167, "grad_norm": 0.5308225154876709, "learning_rate": 8.950923141567482e-06, "loss": 0.5822694301605225, "step": 1289 }, { "epoch": 1.6734793187347932, "grad_norm": 0.5657337307929993, "learning_rate": 8.936703658045426e-06, "loss": 0.7206499576568604, "step": 1290 }, { "epoch": 1.6747769667477697, "grad_norm": 0.5842191576957703, "learning_rate": 8.92248634869526e-06, "loss": 0.6483322381973267, "step": 1291 }, { "epoch": 1.6760746147607462, "grad_norm": 0.5084115266799927, "learning_rate": 8.90827124258779e-06, "loss": 0.60451340675354, "step": 1292 }, { "epoch": 1.6773722627737226, "grad_norm": 0.5080921053886414, "learning_rate": 8.894058368789308e-06, "loss": 0.5007386803627014, "step": 1293 }, { "epoch": 1.6786699107866991, "grad_norm": 0.5186359286308289, "learning_rate": 8.879847756361544e-06, "loss": 0.5846607685089111, "step": 1294 }, { "epoch": 1.6799675587996756, "grad_norm": 0.5321721434593201, "learning_rate": 8.8656394343616e-06, "loss": 0.5854955315589905, "step": 1295 }, { "epoch": 1.6812652068126521, "grad_norm": 0.5577939748764038, "learning_rate": 8.851433431841904e-06, "loss": 0.6218785643577576, "step": 1296 }, { "epoch": 1.6825628548256284, "grad_norm": 0.5574389696121216, "learning_rate": 8.837229777850129e-06, "loss": 0.639427661895752, "step": 1297 }, { "epoch": 1.683860502838605, "grad_norm": 0.5620577335357666, "learning_rate": 8.823028501429161e-06, "loss": 0.6334304809570312, "step": 1298 }, { "epoch": 1.6851581508515814, "grad_norm": 0.5603854656219482, "learning_rate": 8.808829631617009e-06, "loss": 0.5796216726303101, "step": 1299 }, { "epoch": 1.686455798864558, "grad_norm": 0.5886275172233582, "learning_rate": 8.79463319744677e-06, "loss": 0.6645929217338562, "step": 1300 }, { "epoch": 1.6877534468775344, "grad_norm": 0.5587744116783142, "learning_rate": 8.78043922794656e-06, "loss": 0.6387877464294434, "step": 1301 }, { "epoch": 1.689051094890511, "grad_norm": 0.5619886517524719, "learning_rate": 8.766247752139453e-06, "loss": 0.658257007598877, "step": 1302 }, { "epoch": 1.6903487429034874, "grad_norm": 0.5658282041549683, "learning_rate": 8.752058799043422e-06, "loss": 0.6349663734436035, "step": 1303 }, { "epoch": 1.691646390916464, "grad_norm": 0.5596343874931335, "learning_rate": 8.737872397671293e-06, "loss": 0.5926494002342224, "step": 1304 }, { "epoch": 1.6929440389294403, "grad_norm": 0.5565075874328613, "learning_rate": 8.723688577030655e-06, "loss": 0.6093648672103882, "step": 1305 }, { "epoch": 1.6942416869424168, "grad_norm": 0.5608682036399841, "learning_rate": 8.709507366123841e-06, "loss": 0.6120996475219727, "step": 1306 }, { "epoch": 1.6955393349553933, "grad_norm": 0.5365821719169617, "learning_rate": 8.695328793947833e-06, "loss": 0.5509933233261108, "step": 1307 }, { "epoch": 1.6968369829683698, "grad_norm": 0.537822961807251, "learning_rate": 8.681152889494227e-06, "loss": 0.6313689947128296, "step": 1308 }, { "epoch": 1.6981346309813463, "grad_norm": 0.5853676199913025, "learning_rate": 8.66697968174915e-06, "loss": 0.6015232801437378, "step": 1309 }, { "epoch": 1.6994322789943228, "grad_norm": 0.5395903587341309, "learning_rate": 8.652809199693236e-06, "loss": 0.5783022046089172, "step": 1310 }, { "epoch": 1.7007299270072993, "grad_norm": 0.5408870577812195, "learning_rate": 8.638641472301524e-06, "loss": 0.6224579215049744, "step": 1311 }, { "epoch": 1.7020275750202758, "grad_norm": 0.5533918142318726, "learning_rate": 8.624476528543439e-06, "loss": 0.6317031383514404, "step": 1312 }, { "epoch": 1.7033252230332523, "grad_norm": 0.577556848526001, "learning_rate": 8.610314397382701e-06, "loss": 0.6522644758224487, "step": 1313 }, { "epoch": 1.7046228710462286, "grad_norm": 0.5453810095787048, "learning_rate": 8.596155107777288e-06, "loss": 0.6072216629981995, "step": 1314 }, { "epoch": 1.7059205190592053, "grad_norm": 0.5380662679672241, "learning_rate": 8.581998688679356e-06, "loss": 0.6069589853286743, "step": 1315 }, { "epoch": 1.7072181670721815, "grad_norm": 0.5374992489814758, "learning_rate": 8.567845169035205e-06, "loss": 0.6239044070243835, "step": 1316 }, { "epoch": 1.7085158150851583, "grad_norm": 0.5366406440734863, "learning_rate": 8.553694577785201e-06, "loss": 0.5901238322257996, "step": 1317 }, { "epoch": 1.7098134630981345, "grad_norm": 0.5510634779930115, "learning_rate": 8.539546943863717e-06, "loss": 0.6066378355026245, "step": 1318 }, { "epoch": 1.7111111111111112, "grad_norm": 0.5579630732536316, "learning_rate": 8.525402296199089e-06, "loss": 0.6439074873924255, "step": 1319 }, { "epoch": 1.7124087591240875, "grad_norm": 0.5268120765686035, "learning_rate": 8.511260663713537e-06, "loss": 0.5521663427352905, "step": 1320 }, { "epoch": 1.7137064071370642, "grad_norm": 0.5076732635498047, "learning_rate": 8.497122075323122e-06, "loss": 0.5523797273635864, "step": 1321 }, { "epoch": 1.7150040551500405, "grad_norm": 0.5172733068466187, "learning_rate": 8.482986559937676e-06, "loss": 0.6011000275611877, "step": 1322 }, { "epoch": 1.716301703163017, "grad_norm": 0.5152168869972229, "learning_rate": 8.468854146460754e-06, "loss": 0.5801671743392944, "step": 1323 }, { "epoch": 1.7175993511759935, "grad_norm": 0.5168895721435547, "learning_rate": 8.45472486378956e-06, "loss": 0.6005280613899231, "step": 1324 }, { "epoch": 1.71889699918897, "grad_norm": 0.571263313293457, "learning_rate": 8.440598740814909e-06, "loss": 0.6543586850166321, "step": 1325 }, { "epoch": 1.7201946472019465, "grad_norm": 0.5240177512168884, "learning_rate": 8.426475806421139e-06, "loss": 0.613470196723938, "step": 1326 }, { "epoch": 1.721492295214923, "grad_norm": 0.5217388272285461, "learning_rate": 8.412356089486082e-06, "loss": 0.5799127817153931, "step": 1327 }, { "epoch": 1.7227899432278995, "grad_norm": 0.5473462343215942, "learning_rate": 8.39823961888098e-06, "loss": 0.6159072518348694, "step": 1328 }, { "epoch": 1.724087591240876, "grad_norm": 0.9222651124000549, "learning_rate": 8.384126423470447e-06, "loss": 0.6260055303573608, "step": 1329 }, { "epoch": 1.7253852392538525, "grad_norm": 0.5530563592910767, "learning_rate": 8.37001653211239e-06, "loss": 0.5505119562149048, "step": 1330 }, { "epoch": 1.7266828872668287, "grad_norm": 0.5369389653205872, "learning_rate": 8.355909973657975e-06, "loss": 0.6139888763427734, "step": 1331 }, { "epoch": 1.7279805352798054, "grad_norm": 0.5347586870193481, "learning_rate": 8.341806776951532e-06, "loss": 0.6265066862106323, "step": 1332 }, { "epoch": 1.7292781832927817, "grad_norm": 0.545946478843689, "learning_rate": 8.327706970830537e-06, "loss": 0.6024926900863647, "step": 1333 }, { "epoch": 1.7305758313057584, "grad_norm": 0.5450059771537781, "learning_rate": 8.313610584125523e-06, "loss": 0.658405065536499, "step": 1334 }, { "epoch": 1.7318734793187347, "grad_norm": 0.5516889691352844, "learning_rate": 8.299517645660033e-06, "loss": 0.5770267248153687, "step": 1335 }, { "epoch": 1.7331711273317114, "grad_norm": 0.557074785232544, "learning_rate": 8.285428184250554e-06, "loss": 0.5421329736709595, "step": 1336 }, { "epoch": 1.7344687753446877, "grad_norm": 0.543565571308136, "learning_rate": 8.271342228706478e-06, "loss": 0.6527873277664185, "step": 1337 }, { "epoch": 1.7357664233576642, "grad_norm": 0.49616673588752747, "learning_rate": 8.257259807830009e-06, "loss": 0.5355008840560913, "step": 1338 }, { "epoch": 1.7370640713706407, "grad_norm": 0.5389429330825806, "learning_rate": 8.243180950416142e-06, "loss": 0.6072633862495422, "step": 1339 }, { "epoch": 1.7383617193836172, "grad_norm": 0.542195737361908, "learning_rate": 8.22910568525257e-06, "loss": 0.5909712314605713, "step": 1340 }, { "epoch": 1.7396593673965937, "grad_norm": 0.5480629205703735, "learning_rate": 8.215034041119655e-06, "loss": 0.5966728925704956, "step": 1341 }, { "epoch": 1.7409570154095702, "grad_norm": 0.5179266929626465, "learning_rate": 8.200966046790339e-06, "loss": 0.608291745185852, "step": 1342 }, { "epoch": 1.7422546634225466, "grad_norm": 0.525390625, "learning_rate": 8.186901731030117e-06, "loss": 0.6019555330276489, "step": 1343 }, { "epoch": 1.7435523114355231, "grad_norm": 0.5716756582260132, "learning_rate": 8.172841122596951e-06, "loss": 0.6858773827552795, "step": 1344 }, { "epoch": 1.7448499594484996, "grad_norm": 0.53510981798172, "learning_rate": 8.158784250241226e-06, "loss": 0.6193398833274841, "step": 1345 }, { "epoch": 1.7461476074614761, "grad_norm": 0.509371280670166, "learning_rate": 8.144731142705693e-06, "loss": 0.5310204029083252, "step": 1346 }, { "epoch": 1.7474452554744526, "grad_norm": 0.520005464553833, "learning_rate": 8.130681828725394e-06, "loss": 0.5864765644073486, "step": 1347 }, { "epoch": 1.748742903487429, "grad_norm": 0.530784010887146, "learning_rate": 8.116636337027626e-06, "loss": 0.5898761749267578, "step": 1348 }, { "epoch": 1.7500405515004056, "grad_norm": 0.528357982635498, "learning_rate": 8.10259469633186e-06, "loss": 0.611457347869873, "step": 1349 }, { "epoch": 1.7513381995133819, "grad_norm": 0.5243317484855652, "learning_rate": 8.0885569353497e-06, "loss": 0.5851372480392456, "step": 1350 }, { "epoch": 1.7526358475263586, "grad_norm": 0.5656478404998779, "learning_rate": 8.07452308278481e-06, "loss": 0.6243469715118408, "step": 1351 }, { "epoch": 1.7539334955393349, "grad_norm": 0.5173115134239197, "learning_rate": 8.060493167332874e-06, "loss": 0.5658408403396606, "step": 1352 }, { "epoch": 1.7552311435523116, "grad_norm": 0.5283849835395813, "learning_rate": 8.04646721768151e-06, "loss": 0.6133898496627808, "step": 1353 }, { "epoch": 1.7565287915652879, "grad_norm": 0.5533227324485779, "learning_rate": 8.032445262510241e-06, "loss": 0.6251792907714844, "step": 1354 }, { "epoch": 1.7578264395782643, "grad_norm": 0.5281651020050049, "learning_rate": 8.018427330490411e-06, "loss": 0.5514408349990845, "step": 1355 }, { "epoch": 1.7591240875912408, "grad_norm": 0.5382410883903503, "learning_rate": 8.004413450285147e-06, "loss": 0.6591918468475342, "step": 1356 }, { "epoch": 1.7604217356042173, "grad_norm": 0.566716194152832, "learning_rate": 7.990403650549285e-06, "loss": 0.6281836628913879, "step": 1357 }, { "epoch": 1.7617193836171938, "grad_norm": 0.5423158407211304, "learning_rate": 7.976397959929324e-06, "loss": 0.5953754782676697, "step": 1358 }, { "epoch": 1.7630170316301703, "grad_norm": 0.5327609181404114, "learning_rate": 7.962396407063346e-06, "loss": 0.6248747110366821, "step": 1359 }, { "epoch": 1.7643146796431468, "grad_norm": 0.5314010381698608, "learning_rate": 7.948399020580995e-06, "loss": 0.5661095380783081, "step": 1360 }, { "epoch": 1.7656123276561233, "grad_norm": 0.5650714039802551, "learning_rate": 7.934405829103376e-06, "loss": 0.6127238869667053, "step": 1361 }, { "epoch": 1.7669099756690998, "grad_norm": 0.546101987361908, "learning_rate": 7.920416861243028e-06, "loss": 0.5874890089035034, "step": 1362 }, { "epoch": 1.7682076236820763, "grad_norm": 0.5429707169532776, "learning_rate": 7.906432145603844e-06, "loss": 0.6140427589416504, "step": 1363 }, { "epoch": 1.7695052716950528, "grad_norm": 0.5710042715072632, "learning_rate": 7.892451710781035e-06, "loss": 0.612266480922699, "step": 1364 }, { "epoch": 1.770802919708029, "grad_norm": 0.55032879114151, "learning_rate": 7.878475585361045e-06, "loss": 0.6138355135917664, "step": 1365 }, { "epoch": 1.7721005677210058, "grad_norm": 0.5812238454818726, "learning_rate": 7.864503797921518e-06, "loss": 0.6380466818809509, "step": 1366 }, { "epoch": 1.773398215733982, "grad_norm": 0.5375271439552307, "learning_rate": 7.850536377031221e-06, "loss": 0.6307961344718933, "step": 1367 }, { "epoch": 1.7746958637469588, "grad_norm": 0.5584734082221985, "learning_rate": 7.836573351249996e-06, "loss": 0.6312189698219299, "step": 1368 }, { "epoch": 1.775993511759935, "grad_norm": 0.5133419036865234, "learning_rate": 7.822614749128692e-06, "loss": 0.5199952125549316, "step": 1369 }, { "epoch": 1.7772911597729117, "grad_norm": 0.5400519371032715, "learning_rate": 7.808660599209124e-06, "loss": 0.630193829536438, "step": 1370 }, { "epoch": 1.778588807785888, "grad_norm": 0.5627943277359009, "learning_rate": 7.794710930023993e-06, "loss": 0.6233404874801636, "step": 1371 }, { "epoch": 1.7798864557988645, "grad_norm": 0.510907769203186, "learning_rate": 7.78076577009684e-06, "loss": 0.5262112021446228, "step": 1372 }, { "epoch": 1.781184103811841, "grad_norm": 0.5093023777008057, "learning_rate": 7.76682514794199e-06, "loss": 0.5871707201004028, "step": 1373 }, { "epoch": 1.7824817518248175, "grad_norm": 0.5214765667915344, "learning_rate": 7.752889092064484e-06, "loss": 0.5635697841644287, "step": 1374 }, { "epoch": 1.783779399837794, "grad_norm": 0.5440617799758911, "learning_rate": 7.738957630960037e-06, "loss": 0.5805234909057617, "step": 1375 }, { "epoch": 1.7850770478507705, "grad_norm": 0.5365013480186462, "learning_rate": 7.725030793114952e-06, "loss": 0.615504801273346, "step": 1376 }, { "epoch": 1.786374695863747, "grad_norm": 0.5464739203453064, "learning_rate": 7.711108607006094e-06, "loss": 0.6203770637512207, "step": 1377 }, { "epoch": 1.7876723438767235, "grad_norm": 0.5313665866851807, "learning_rate": 7.697191101100802e-06, "loss": 0.6234644055366516, "step": 1378 }, { "epoch": 1.7889699918897, "grad_norm": 0.5652154684066772, "learning_rate": 7.683278303856862e-06, "loss": 0.6404775977134705, "step": 1379 }, { "epoch": 1.7902676399026762, "grad_norm": 0.5399373769760132, "learning_rate": 7.669370243722415e-06, "loss": 0.6136540770530701, "step": 1380 }, { "epoch": 1.7902676399026762, "eval_loss": 0.6770720481872559, "eval_runtime": 72.4181, "eval_samples_per_second": 71.695, "eval_steps_per_second": 8.962, "step": 1380 }, { "epoch": 1.791565287915653, "grad_norm": 0.5250906944274902, "learning_rate": 7.655466949135932e-06, "loss": 0.6147629022598267, "step": 1381 }, { "epoch": 1.7928629359286292, "grad_norm": 0.5089812278747559, "learning_rate": 7.641568448526122e-06, "loss": 0.5584423542022705, "step": 1382 }, { "epoch": 1.794160583941606, "grad_norm": 0.53523850440979, "learning_rate": 7.627674770311909e-06, "loss": 0.5899471640586853, "step": 1383 }, { "epoch": 1.7954582319545822, "grad_norm": 0.5330705642700195, "learning_rate": 7.613785942902343e-06, "loss": 0.6054921746253967, "step": 1384 }, { "epoch": 1.796755879967559, "grad_norm": 0.514224648475647, "learning_rate": 7.599901994696566e-06, "loss": 0.57494056224823, "step": 1385 }, { "epoch": 1.7980535279805352, "grad_norm": 0.5187469124794006, "learning_rate": 7.586022954083731e-06, "loss": 0.5410253405570984, "step": 1386 }, { "epoch": 1.799351175993512, "grad_norm": 0.5295100808143616, "learning_rate": 7.572148849442971e-06, "loss": 0.5727859139442444, "step": 1387 }, { "epoch": 1.8006488240064882, "grad_norm": 0.5229355692863464, "learning_rate": 7.5582797091433105e-06, "loss": 0.5822583436965942, "step": 1388 }, { "epoch": 1.8019464720194647, "grad_norm": 0.5615860223770142, "learning_rate": 7.544415561543639e-06, "loss": 0.6505988836288452, "step": 1389 }, { "epoch": 1.8032441200324412, "grad_norm": 0.538707971572876, "learning_rate": 7.5305564349926215e-06, "loss": 0.5953875184059143, "step": 1390 }, { "epoch": 1.8045417680454177, "grad_norm": 0.5197842717170715, "learning_rate": 7.516702357828672e-06, "loss": 0.61934494972229, "step": 1391 }, { "epoch": 1.8058394160583942, "grad_norm": 0.49861758947372437, "learning_rate": 7.502853358379865e-06, "loss": 0.5522242784500122, "step": 1392 }, { "epoch": 1.8071370640713706, "grad_norm": 0.5618783235549927, "learning_rate": 7.489009464963903e-06, "loss": 0.6682146787643433, "step": 1393 }, { "epoch": 1.8084347120843471, "grad_norm": 0.9511061906814575, "learning_rate": 7.475170705888042e-06, "loss": 0.5893583297729492, "step": 1394 }, { "epoch": 1.8097323600973236, "grad_norm": 0.6068239808082581, "learning_rate": 7.461337109449045e-06, "loss": 0.6168926954269409, "step": 1395 }, { "epoch": 1.8110300081103001, "grad_norm": 0.517159640789032, "learning_rate": 7.447508703933109e-06, "loss": 0.5870746374130249, "step": 1396 }, { "epoch": 1.8123276561232764, "grad_norm": 0.5260257720947266, "learning_rate": 7.433685517615831e-06, "loss": 0.6144825220108032, "step": 1397 }, { "epoch": 1.8136253041362531, "grad_norm": 0.4919078052043915, "learning_rate": 7.4198675787621185e-06, "loss": 0.6141817569732666, "step": 1398 }, { "epoch": 1.8149229521492294, "grad_norm": 0.5349772572517395, "learning_rate": 7.406054915626172e-06, "loss": 0.5727092027664185, "step": 1399 }, { "epoch": 1.816220600162206, "grad_norm": 0.5762760639190674, "learning_rate": 7.392247556451382e-06, "loss": 0.647359311580658, "step": 1400 }, { "epoch": 1.8175182481751824, "grad_norm": 0.5478885769844055, "learning_rate": 7.378445529470303e-06, "loss": 0.6371256113052368, "step": 1401 }, { "epoch": 1.818815896188159, "grad_norm": 0.5577658414840698, "learning_rate": 7.364648862904593e-06, "loss": 0.6552213430404663, "step": 1402 }, { "epoch": 1.8201135442011354, "grad_norm": 0.5350478887557983, "learning_rate": 7.35085758496494e-06, "loss": 0.5756250023841858, "step": 1403 }, { "epoch": 1.821411192214112, "grad_norm": 0.5247483849525452, "learning_rate": 7.337071723851018e-06, "loss": 0.5872269868850708, "step": 1404 }, { "epoch": 1.8227088402270883, "grad_norm": 0.5715752840042114, "learning_rate": 7.323291307751418e-06, "loss": 0.6395775079727173, "step": 1405 }, { "epoch": 1.8240064882400648, "grad_norm": 0.5355315208435059, "learning_rate": 7.3095163648436115e-06, "loss": 0.5502926707267761, "step": 1406 }, { "epoch": 1.8253041362530413, "grad_norm": 0.5468769073486328, "learning_rate": 7.295746923293865e-06, "loss": 0.6266253590583801, "step": 1407 }, { "epoch": 1.8266017842660178, "grad_norm": 0.5183525681495667, "learning_rate": 7.2819830112572035e-06, "loss": 0.5890312194824219, "step": 1408 }, { "epoch": 1.8278994322789943, "grad_norm": 0.5416871905326843, "learning_rate": 7.268224656877339e-06, "loss": 0.6163492798805237, "step": 1409 }, { "epoch": 1.8291970802919708, "grad_norm": 0.5376898646354675, "learning_rate": 7.25447188828663e-06, "loss": 0.6440437436103821, "step": 1410 }, { "epoch": 1.8304947283049473, "grad_norm": 0.5264099836349487, "learning_rate": 7.240724733606002e-06, "loss": 0.6445986032485962, "step": 1411 }, { "epoch": 1.8317923763179238, "grad_norm": 0.5397512912750244, "learning_rate": 7.2269832209449145e-06, "loss": 0.5767061710357666, "step": 1412 }, { "epoch": 1.8330900243309003, "grad_norm": 0.5331466794013977, "learning_rate": 7.213247378401274e-06, "loss": 0.6515385508537292, "step": 1413 }, { "epoch": 1.8343876723438766, "grad_norm": 0.5380875468254089, "learning_rate": 7.199517234061408e-06, "loss": 0.5956803560256958, "step": 1414 }, { "epoch": 1.8356853203568533, "grad_norm": 0.5553707480430603, "learning_rate": 7.1857928159999814e-06, "loss": 0.5990528464317322, "step": 1415 }, { "epoch": 1.8369829683698295, "grad_norm": 0.5348111391067505, "learning_rate": 7.172074152279963e-06, "loss": 0.5816199779510498, "step": 1416 }, { "epoch": 1.8382806163828063, "grad_norm": 0.63777756690979, "learning_rate": 7.1583612709525405e-06, "loss": 0.6647042036056519, "step": 1417 }, { "epoch": 1.8395782643957825, "grad_norm": 0.5394327640533447, "learning_rate": 7.14465420005709e-06, "loss": 0.629410982131958, "step": 1418 }, { "epoch": 1.8408759124087593, "grad_norm": 0.5467361807823181, "learning_rate": 7.130952967621096e-06, "loss": 0.5931155681610107, "step": 1419 }, { "epoch": 1.8421735604217355, "grad_norm": 0.5642380714416504, "learning_rate": 7.11725760166012e-06, "loss": 0.59910649061203, "step": 1420 }, { "epoch": 1.8434712084347122, "grad_norm": 0.5448968410491943, "learning_rate": 7.103568130177713e-06, "loss": 0.5758746862411499, "step": 1421 }, { "epoch": 1.8447688564476885, "grad_norm": 0.5109772682189941, "learning_rate": 7.089884581165382e-06, "loss": 0.5374370217323303, "step": 1422 }, { "epoch": 1.846066504460665, "grad_norm": 0.5496018528938293, "learning_rate": 7.076206982602516e-06, "loss": 0.6080317497253418, "step": 1423 }, { "epoch": 1.8473641524736415, "grad_norm": 0.5525946021080017, "learning_rate": 7.06253536245635e-06, "loss": 0.6326315402984619, "step": 1424 }, { "epoch": 1.848661800486618, "grad_norm": 0.5555429458618164, "learning_rate": 7.048869748681879e-06, "loss": 0.6499879360198975, "step": 1425 }, { "epoch": 1.8499594484995945, "grad_norm": 0.5364986062049866, "learning_rate": 7.035210169221834e-06, "loss": 0.6402702331542969, "step": 1426 }, { "epoch": 1.851257096512571, "grad_norm": 0.5398283004760742, "learning_rate": 7.021556652006588e-06, "loss": 0.636422872543335, "step": 1427 }, { "epoch": 1.8525547445255475, "grad_norm": 0.5333319306373596, "learning_rate": 7.007909224954135e-06, "loss": 0.6210685968399048, "step": 1428 }, { "epoch": 1.853852392538524, "grad_norm": 0.5136668086051941, "learning_rate": 6.994267915970003e-06, "loss": 0.5984174013137817, "step": 1429 }, { "epoch": 1.8551500405515005, "grad_norm": 0.5352861285209656, "learning_rate": 6.980632752947221e-06, "loss": 0.6331675052642822, "step": 1430 }, { "epoch": 1.8564476885644767, "grad_norm": 0.5386180281639099, "learning_rate": 6.967003763766247e-06, "loss": 0.599821925163269, "step": 1431 }, { "epoch": 1.8577453365774534, "grad_norm": 0.5548969507217407, "learning_rate": 6.953380976294907e-06, "loss": 0.6447435617446899, "step": 1432 }, { "epoch": 1.8590429845904297, "grad_norm": 0.5061814188957214, "learning_rate": 6.9397644183883616e-06, "loss": 0.6045181751251221, "step": 1433 }, { "epoch": 1.8603406326034064, "grad_norm": 0.49961408972740173, "learning_rate": 6.926154117889022e-06, "loss": 0.5710508823394775, "step": 1434 }, { "epoch": 1.8616382806163827, "grad_norm": 0.5761319398880005, "learning_rate": 6.91255010262651e-06, "loss": 0.6047182679176331, "step": 1435 }, { "epoch": 1.8629359286293594, "grad_norm": 0.5302688479423523, "learning_rate": 6.898952400417587e-06, "loss": 0.5881869792938232, "step": 1436 }, { "epoch": 1.8642335766423357, "grad_norm": 0.567452609539032, "learning_rate": 6.885361039066121e-06, "loss": 0.6580846905708313, "step": 1437 }, { "epoch": 1.8655312246553124, "grad_norm": 0.5567494034767151, "learning_rate": 6.8717760463629965e-06, "loss": 0.6213802099227905, "step": 1438 }, { "epoch": 1.8668288726682887, "grad_norm": 0.535961925983429, "learning_rate": 6.858197450086097e-06, "loss": 0.6174903512001038, "step": 1439 }, { "epoch": 1.8681265206812652, "grad_norm": 0.5607694387435913, "learning_rate": 6.844625278000205e-06, "loss": 0.658057451248169, "step": 1440 }, { "epoch": 1.8694241686942417, "grad_norm": 0.5164813995361328, "learning_rate": 6.831059557856984e-06, "loss": 0.6188488602638245, "step": 1441 }, { "epoch": 1.8707218167072182, "grad_norm": 0.5046887397766113, "learning_rate": 6.81750031739489e-06, "loss": 0.5495269298553467, "step": 1442 }, { "epoch": 1.8720194647201946, "grad_norm": 0.5218680500984192, "learning_rate": 6.803947584339148e-06, "loss": 0.5858875513076782, "step": 1443 }, { "epoch": 1.8733171127331711, "grad_norm": 0.5279871225357056, "learning_rate": 6.79040138640166e-06, "loss": 0.5829395055770874, "step": 1444 }, { "epoch": 1.8746147607461476, "grad_norm": 0.5364516377449036, "learning_rate": 6.7768617512809745e-06, "loss": 0.6135284900665283, "step": 1445 }, { "epoch": 1.8759124087591241, "grad_norm": 0.5465746521949768, "learning_rate": 6.763328706662214e-06, "loss": 0.5970785617828369, "step": 1446 }, { "epoch": 1.8772100567721006, "grad_norm": 0.5328618288040161, "learning_rate": 6.749802280217037e-06, "loss": 0.6004316806793213, "step": 1447 }, { "epoch": 1.878507704785077, "grad_norm": 0.5282012224197388, "learning_rate": 6.7362824996035545e-06, "loss": 0.5903221368789673, "step": 1448 }, { "epoch": 1.8798053527980536, "grad_norm": 0.5416566133499146, "learning_rate": 6.722769392466304e-06, "loss": 0.624277651309967, "step": 1449 }, { "epoch": 1.8811030008110299, "grad_norm": 0.5569058060646057, "learning_rate": 6.709262986436162e-06, "loss": 0.6214337348937988, "step": 1450 }, { "epoch": 1.8824006488240066, "grad_norm": 0.5567551255226135, "learning_rate": 6.695763309130318e-06, "loss": 0.5963641405105591, "step": 1451 }, { "epoch": 1.8836982968369829, "grad_norm": 0.5245199203491211, "learning_rate": 6.682270388152185e-06, "loss": 0.5722153186798096, "step": 1452 }, { "epoch": 1.8849959448499596, "grad_norm": 0.5476487874984741, "learning_rate": 6.668784251091381e-06, "loss": 0.573593258857727, "step": 1453 }, { "epoch": 1.8862935928629359, "grad_norm": 0.5254029631614685, "learning_rate": 6.655304925523635e-06, "loss": 0.5607786774635315, "step": 1454 }, { "epoch": 1.8875912408759126, "grad_norm": 0.5431527495384216, "learning_rate": 6.641832439010765e-06, "loss": 0.5841714143753052, "step": 1455 }, { "epoch": 1.8888888888888888, "grad_norm": 0.5374141931533813, "learning_rate": 6.628366819100586e-06, "loss": 0.5811495780944824, "step": 1456 }, { "epoch": 1.8901865369018653, "grad_norm": 0.5369722247123718, "learning_rate": 6.614908093326891e-06, "loss": 0.6311888694763184, "step": 1457 }, { "epoch": 1.8914841849148418, "grad_norm": 0.5656461119651794, "learning_rate": 6.601456289209362e-06, "loss": 0.6515893936157227, "step": 1458 }, { "epoch": 1.8927818329278183, "grad_norm": 0.5076130032539368, "learning_rate": 6.588011434253534e-06, "loss": 0.5477322340011597, "step": 1459 }, { "epoch": 1.8940794809407948, "grad_norm": 0.5373955965042114, "learning_rate": 6.574573555950738e-06, "loss": 0.5668719410896301, "step": 1460 }, { "epoch": 1.8953771289537713, "grad_norm": 0.5303026437759399, "learning_rate": 6.561142681778027e-06, "loss": 0.5856397747993469, "step": 1461 }, { "epoch": 1.8966747769667478, "grad_norm": 0.5287466049194336, "learning_rate": 6.547718839198145e-06, "loss": 0.574636697769165, "step": 1462 }, { "epoch": 1.8979724249797243, "grad_norm": 0.546556830406189, "learning_rate": 6.53430205565945e-06, "loss": 0.6119240522384644, "step": 1463 }, { "epoch": 1.8992700729927008, "grad_norm": 0.5332784652709961, "learning_rate": 6.520892358595869e-06, "loss": 0.6177451014518738, "step": 1464 }, { "epoch": 1.900567721005677, "grad_norm": 0.5086203217506409, "learning_rate": 6.507489775426834e-06, "loss": 0.6066810488700867, "step": 1465 }, { "epoch": 1.9018653690186538, "grad_norm": 0.5467303991317749, "learning_rate": 6.494094333557243e-06, "loss": 0.5971111059188843, "step": 1466 }, { "epoch": 1.90316301703163, "grad_norm": 0.5070620179176331, "learning_rate": 6.4807060603773795e-06, "loss": 0.6063017845153809, "step": 1467 }, { "epoch": 1.9044606650446068, "grad_norm": 0.553736686706543, "learning_rate": 6.467324983262877e-06, "loss": 0.579677402973175, "step": 1468 }, { "epoch": 1.905758313057583, "grad_norm": 0.5139430165290833, "learning_rate": 6.453951129574644e-06, "loss": 0.5715341567993164, "step": 1469 }, { "epoch": 1.9070559610705597, "grad_norm": 0.5478905439376831, "learning_rate": 6.4405845266588356e-06, "loss": 0.6066344976425171, "step": 1470 }, { "epoch": 1.908353609083536, "grad_norm": 0.5382056832313538, "learning_rate": 6.427225201846763e-06, "loss": 0.5792092084884644, "step": 1471 }, { "epoch": 1.9096512570965127, "grad_norm": 0.5592162013053894, "learning_rate": 6.413873182454873e-06, "loss": 0.6224773526191711, "step": 1472 }, { "epoch": 1.910948905109489, "grad_norm": 0.5435997843742371, "learning_rate": 6.4005284957846546e-06, "loss": 0.5740009546279907, "step": 1473 }, { "epoch": 1.9122465531224655, "grad_norm": 0.5480201840400696, "learning_rate": 6.3871911691226276e-06, "loss": 0.5897870063781738, "step": 1474 }, { "epoch": 1.913544201135442, "grad_norm": 0.5461702942848206, "learning_rate": 6.373861229740237e-06, "loss": 0.6223511695861816, "step": 1475 }, { "epoch": 1.9148418491484185, "grad_norm": 0.5337714552879333, "learning_rate": 6.360538704893845e-06, "loss": 0.5608541369438171, "step": 1476 }, { "epoch": 1.916139497161395, "grad_norm": 0.5573077201843262, "learning_rate": 6.3472236218246366e-06, "loss": 0.6532754302024841, "step": 1477 }, { "epoch": 1.9174371451743715, "grad_norm": 0.5389246940612793, "learning_rate": 6.333916007758591e-06, "loss": 0.5982533693313599, "step": 1478 }, { "epoch": 1.918734793187348, "grad_norm": 0.5433958768844604, "learning_rate": 6.320615889906403e-06, "loss": 0.592591404914856, "step": 1479 }, { "epoch": 1.9200324412003245, "grad_norm": 0.5413274765014648, "learning_rate": 6.307323295463457e-06, "loss": 0.6429393291473389, "step": 1480 }, { "epoch": 1.921330089213301, "grad_norm": 0.5350672602653503, "learning_rate": 6.294038251609738e-06, "loss": 0.5930889844894409, "step": 1481 }, { "epoch": 1.9226277372262772, "grad_norm": 0.5042331218719482, "learning_rate": 6.280760785509802e-06, "loss": 0.5509825944900513, "step": 1482 }, { "epoch": 1.923925385239254, "grad_norm": 0.5447627902030945, "learning_rate": 6.2674909243127e-06, "loss": 0.6052374839782715, "step": 1483 }, { "epoch": 1.9252230332522302, "grad_norm": 0.5395492911338806, "learning_rate": 6.254228695151949e-06, "loss": 0.6406330466270447, "step": 1484 }, { "epoch": 1.926520681265207, "grad_norm": 0.5140017867088318, "learning_rate": 6.240974125145443e-06, "loss": 0.5923643112182617, "step": 1485 }, { "epoch": 1.9278183292781832, "grad_norm": 0.5255963802337646, "learning_rate": 6.227727241395429e-06, "loss": 0.612221360206604, "step": 1486 }, { "epoch": 1.92911597729116, "grad_norm": 0.5396282076835632, "learning_rate": 6.214488070988424e-06, "loss": 0.5972959399223328, "step": 1487 }, { "epoch": 1.9304136253041362, "grad_norm": 0.5345456004142761, "learning_rate": 6.201256640995184e-06, "loss": 0.5695825815200806, "step": 1488 }, { "epoch": 1.931711273317113, "grad_norm": 0.5186867713928223, "learning_rate": 6.188032978470639e-06, "loss": 0.6117428541183472, "step": 1489 }, { "epoch": 1.9330089213300892, "grad_norm": 0.5213980674743652, "learning_rate": 6.174817110453828e-06, "loss": 0.584017276763916, "step": 1490 }, { "epoch": 1.9343065693430657, "grad_norm": 0.541926920413971, "learning_rate": 6.161609063967857e-06, "loss": 0.6257720589637756, "step": 1491 }, { "epoch": 1.9356042173560422, "grad_norm": 0.5566191673278809, "learning_rate": 6.1484088660198325e-06, "loss": 0.6734557151794434, "step": 1492 }, { "epoch": 1.9369018653690186, "grad_norm": 0.5532911419868469, "learning_rate": 6.135216543600828e-06, "loss": 0.5978685021400452, "step": 1493 }, { "epoch": 1.9381995133819951, "grad_norm": 0.5523790717124939, "learning_rate": 6.1220321236857974e-06, "loss": 0.6684085130691528, "step": 1494 }, { "epoch": 1.9394971613949716, "grad_norm": 0.5317186713218689, "learning_rate": 6.108855633233546e-06, "loss": 0.5903822183609009, "step": 1495 }, { "epoch": 1.9407948094079481, "grad_norm": 0.52325439453125, "learning_rate": 6.0956870991866545e-06, "loss": 0.5855342149734497, "step": 1496 }, { "epoch": 1.9420924574209246, "grad_norm": 0.5201572775840759, "learning_rate": 6.0825265484714526e-06, "loss": 0.5801212787628174, "step": 1497 }, { "epoch": 1.9433901054339011, "grad_norm": 0.5488981008529663, "learning_rate": 6.0693740079979235e-06, "loss": 0.647799015045166, "step": 1498 }, { "epoch": 1.9446877534468774, "grad_norm": 0.49936795234680176, "learning_rate": 6.056229504659696e-06, "loss": 0.5507512092590332, "step": 1499 }, { "epoch": 1.945985401459854, "grad_norm": 0.5403010249137878, "learning_rate": 6.043093065333945e-06, "loss": 0.5773292779922485, "step": 1500 }, { "epoch": 1.9472830494728304, "grad_norm": 0.532992422580719, "learning_rate": 6.029964716881367e-06, "loss": 0.561974048614502, "step": 1501 }, { "epoch": 1.948580697485807, "grad_norm": 0.5226876139640808, "learning_rate": 6.016844486146106e-06, "loss": 0.6117234230041504, "step": 1502 }, { "epoch": 1.9498783454987834, "grad_norm": 0.5627997517585754, "learning_rate": 6.003732399955722e-06, "loss": 0.5736496448516846, "step": 1503 }, { "epoch": 1.95117599351176, "grad_norm": 0.5260640382766724, "learning_rate": 5.990628485121106e-06, "loss": 0.5524093508720398, "step": 1504 }, { "epoch": 1.9524736415247363, "grad_norm": 0.5555213689804077, "learning_rate": 5.97753276843645e-06, "loss": 0.6590294241905212, "step": 1505 }, { "epoch": 1.9537712895377128, "grad_norm": 0.5117315053939819, "learning_rate": 5.964445276679176e-06, "loss": 0.5593676567077637, "step": 1506 }, { "epoch": 1.9550689375506893, "grad_norm": 0.5474593043327332, "learning_rate": 5.9513660366099005e-06, "loss": 0.5995163321495056, "step": 1507 }, { "epoch": 1.9563665855636658, "grad_norm": 0.5376996397972107, "learning_rate": 5.93829507497235e-06, "loss": 0.5445429086685181, "step": 1508 }, { "epoch": 1.9576642335766423, "grad_norm": 0.539804220199585, "learning_rate": 5.925232418493338e-06, "loss": 0.6023607850074768, "step": 1509 }, { "epoch": 1.9589618815896188, "grad_norm": 0.5308881402015686, "learning_rate": 5.912178093882688e-06, "loss": 0.5908794403076172, "step": 1510 }, { "epoch": 1.9602595296025953, "grad_norm": 0.5358856320381165, "learning_rate": 5.8991321278331934e-06, "loss": 0.5432258248329163, "step": 1511 }, { "epoch": 1.9615571776155718, "grad_norm": 0.5521926879882812, "learning_rate": 5.8860945470205466e-06, "loss": 0.6700773239135742, "step": 1512 }, { "epoch": 1.9628548256285483, "grad_norm": 0.5567953586578369, "learning_rate": 5.8730653781033085e-06, "loss": 0.6132399439811707, "step": 1513 }, { "epoch": 1.9641524736415248, "grad_norm": 0.5308123826980591, "learning_rate": 5.860044647722827e-06, "loss": 0.595048189163208, "step": 1514 }, { "epoch": 1.9654501216545013, "grad_norm": 0.5229505896568298, "learning_rate": 5.847032382503202e-06, "loss": 0.5752079486846924, "step": 1515 }, { "epoch": 1.9667477696674776, "grad_norm": 0.5336843729019165, "learning_rate": 5.834028609051218e-06, "loss": 0.6190193891525269, "step": 1516 }, { "epoch": 1.9680454176804543, "grad_norm": 0.5378988981246948, "learning_rate": 5.8210333539563e-06, "loss": 0.5807895660400391, "step": 1517 }, { "epoch": 1.9693430656934305, "grad_norm": 0.5520551800727844, "learning_rate": 5.808046643790468e-06, "loss": 0.6308130621910095, "step": 1518 }, { "epoch": 1.9706407137064073, "grad_norm": 0.5014427900314331, "learning_rate": 5.795068505108243e-06, "loss": 0.584097146987915, "step": 1519 }, { "epoch": 1.9719383617193835, "grad_norm": 0.5326021313667297, "learning_rate": 5.782098964446641e-06, "loss": 0.5909327268600464, "step": 1520 }, { "epoch": 1.9732360097323602, "grad_norm": 0.5124540328979492, "learning_rate": 5.769138048325087e-06, "loss": 0.5518309473991394, "step": 1521 }, { "epoch": 1.9745336577453365, "grad_norm": 0.5387500524520874, "learning_rate": 5.756185783245376e-06, "loss": 0.5835770964622498, "step": 1522 }, { "epoch": 1.975831305758313, "grad_norm": 0.568587064743042, "learning_rate": 5.743242195691612e-06, "loss": 0.5821942687034607, "step": 1523 }, { "epoch": 1.9771289537712895, "grad_norm": 0.5374230742454529, "learning_rate": 5.730307312130152e-06, "loss": 0.6571119427680969, "step": 1524 }, { "epoch": 1.978426601784266, "grad_norm": 0.5388919115066528, "learning_rate": 5.717381159009563e-06, "loss": 0.5895075798034668, "step": 1525 }, { "epoch": 1.9797242497972425, "grad_norm": 0.5499215722084045, "learning_rate": 5.704463762760559e-06, "loss": 0.61728835105896, "step": 1526 }, { "epoch": 1.981021897810219, "grad_norm": 0.5375927686691284, "learning_rate": 5.691555149795933e-06, "loss": 0.6732977032661438, "step": 1527 }, { "epoch": 1.9823195458231955, "grad_norm": 0.5313878655433655, "learning_rate": 5.678655346510549e-06, "loss": 0.61357581615448, "step": 1528 }, { "epoch": 1.983617193836172, "grad_norm": 0.5222123265266418, "learning_rate": 5.6657643792812265e-06, "loss": 0.5704218745231628, "step": 1529 }, { "epoch": 1.9849148418491485, "grad_norm": 0.5498616099357605, "learning_rate": 5.652882274466736e-06, "loss": 0.6428430080413818, "step": 1530 }, { "epoch": 1.986212489862125, "grad_norm": 0.5288700461387634, "learning_rate": 5.640009058407719e-06, "loss": 0.5776660442352295, "step": 1531 }, { "epoch": 1.9875101378751014, "grad_norm": 0.5719195008277893, "learning_rate": 5.627144757426647e-06, "loss": 0.6659935116767883, "step": 1532 }, { "epoch": 1.9888077858880777, "grad_norm": 0.5699102282524109, "learning_rate": 5.614289397827757e-06, "loss": 0.649441123008728, "step": 1533 }, { "epoch": 1.9901054339010544, "grad_norm": 0.5806236267089844, "learning_rate": 5.601443005897012e-06, "loss": 0.6462723016738892, "step": 1534 }, { "epoch": 1.9914030819140307, "grad_norm": 0.5485842823982239, "learning_rate": 5.588605607902017e-06, "loss": 0.6063494086265564, "step": 1535 }, { "epoch": 1.9927007299270074, "grad_norm": 0.5317525863647461, "learning_rate": 5.57577723009202e-06, "loss": 0.5641921162605286, "step": 1536 }, { "epoch": 1.9939983779399837, "grad_norm": 0.5366416573524475, "learning_rate": 5.5629578986977894e-06, "loss": 0.623965322971344, "step": 1537 }, { "epoch": 1.9952960259529604, "grad_norm": 0.5662190318107605, "learning_rate": 5.550147639931631e-06, "loss": 0.6340383291244507, "step": 1538 }, { "epoch": 1.9965936739659367, "grad_norm": 0.5266711711883545, "learning_rate": 5.537346479987269e-06, "loss": 0.6086807250976562, "step": 1539 }, { "epoch": 1.9978913219789132, "grad_norm": 0.5435559153556824, "learning_rate": 5.524554445039838e-06, "loss": 0.640510082244873, "step": 1540 }, { "epoch": 1.9991889699918897, "grad_norm": 0.5433489084243774, "learning_rate": 5.511771561245813e-06, "loss": 0.5800854563713074, "step": 1541 }, { "epoch": 2.0, "grad_norm": 0.6513635516166687, "learning_rate": 5.498997854742956e-06, "loss": 0.546117901802063, "step": 1542 }, { "epoch": 2.0012976480129763, "grad_norm": 0.7124117016792297, "learning_rate": 5.4862333516502634e-06, "loss": 0.5231295824050903, "step": 1543 }, { "epoch": 2.002595296025953, "grad_norm": 0.727088451385498, "learning_rate": 5.473478078067913e-06, "loss": 0.5810973644256592, "step": 1544 }, { "epoch": 2.0038929440389293, "grad_norm": 0.6788406372070312, "learning_rate": 5.460732060077212e-06, "loss": 0.47124871611595154, "step": 1545 }, { "epoch": 2.005190592051906, "grad_norm": 0.6010527610778809, "learning_rate": 5.44799532374054e-06, "loss": 0.5422745943069458, "step": 1546 }, { "epoch": 2.0064882400648822, "grad_norm": 0.609658420085907, "learning_rate": 5.435267895101303e-06, "loss": 0.48424142599105835, "step": 1547 }, { "epoch": 2.007785888077859, "grad_norm": 0.5703460574150085, "learning_rate": 5.422549800183861e-06, "loss": 0.5136675834655762, "step": 1548 }, { "epoch": 2.0090835360908352, "grad_norm": 0.5782158970832825, "learning_rate": 5.409841064993512e-06, "loss": 0.509381890296936, "step": 1549 }, { "epoch": 2.010381184103812, "grad_norm": 0.6222527623176575, "learning_rate": 5.39714171551639e-06, "loss": 0.4843388795852661, "step": 1550 }, { "epoch": 2.011678832116788, "grad_norm": 0.7037692666053772, "learning_rate": 5.384451777719464e-06, "loss": 0.5681462287902832, "step": 1551 }, { "epoch": 2.012976480129765, "grad_norm": 0.7455988526344299, "learning_rate": 5.371771277550432e-06, "loss": 0.551672101020813, "step": 1552 }, { "epoch": 2.014274128142741, "grad_norm": 0.7268160581588745, "learning_rate": 5.359100240937717e-06, "loss": 0.5382372140884399, "step": 1553 }, { "epoch": 2.015571776155718, "grad_norm": 0.6356255412101746, "learning_rate": 5.3464386937903764e-06, "loss": 0.5280675888061523, "step": 1554 }, { "epoch": 2.016869424168694, "grad_norm": 0.5975467562675476, "learning_rate": 5.33378666199807e-06, "loss": 0.47013112902641296, "step": 1555 }, { "epoch": 2.018167072181671, "grad_norm": 0.6236818432807922, "learning_rate": 5.321144171431003e-06, "loss": 0.4888884425163269, "step": 1556 }, { "epoch": 2.019464720194647, "grad_norm": 0.6166471838951111, "learning_rate": 5.308511247939872e-06, "loss": 0.5211419463157654, "step": 1557 }, { "epoch": 2.020762368207624, "grad_norm": 0.6095893383026123, "learning_rate": 5.295887917355794e-06, "loss": 0.5085535049438477, "step": 1558 }, { "epoch": 2.0220600162206, "grad_norm": 0.6039384007453918, "learning_rate": 5.283274205490303e-06, "loss": 0.4754714369773865, "step": 1559 }, { "epoch": 2.0233576642335764, "grad_norm": 0.6331435441970825, "learning_rate": 5.270670138135234e-06, "loss": 0.5521947145462036, "step": 1560 }, { "epoch": 2.024655312246553, "grad_norm": 0.6151823997497559, "learning_rate": 5.25807574106272e-06, "loss": 0.5278744697570801, "step": 1561 }, { "epoch": 2.0259529602595294, "grad_norm": 0.5749709606170654, "learning_rate": 5.245491040025115e-06, "loss": 0.4914984107017517, "step": 1562 }, { "epoch": 2.027250608272506, "grad_norm": 0.5855306386947632, "learning_rate": 5.232916060754947e-06, "loss": 0.5195509195327759, "step": 1563 }, { "epoch": 2.0285482562854824, "grad_norm": 0.5908445119857788, "learning_rate": 5.220350828964865e-06, "loss": 0.48390451073646545, "step": 1564 }, { "epoch": 2.029845904298459, "grad_norm": 0.5874761343002319, "learning_rate": 5.207795370347588e-06, "loss": 0.5324580669403076, "step": 1565 }, { "epoch": 2.0311435523114354, "grad_norm": 0.5893219709396362, "learning_rate": 5.195249710575853e-06, "loss": 0.5100334286689758, "step": 1566 }, { "epoch": 2.032441200324412, "grad_norm": 0.5876151919364929, "learning_rate": 5.182713875302361e-06, "loss": 0.4768049716949463, "step": 1567 }, { "epoch": 2.0337388483373884, "grad_norm": 0.6265038251876831, "learning_rate": 5.1701878901597106e-06, "loss": 0.5602673292160034, "step": 1568 }, { "epoch": 2.035036496350365, "grad_norm": 0.5975306034088135, "learning_rate": 5.157671780760385e-06, "loss": 0.5052694082260132, "step": 1569 }, { "epoch": 2.0363341443633414, "grad_norm": 0.5611022114753723, "learning_rate": 5.145165572696652e-06, "loss": 0.49101999402046204, "step": 1570 }, { "epoch": 2.037631792376318, "grad_norm": 0.5829542875289917, "learning_rate": 5.132669291540544e-06, "loss": 0.474854052066803, "step": 1571 }, { "epoch": 2.0389294403892944, "grad_norm": 0.5918568968772888, "learning_rate": 5.1201829628437926e-06, "loss": 0.4853309988975525, "step": 1572 }, { "epoch": 2.040227088402271, "grad_norm": 0.5785784125328064, "learning_rate": 5.107706612137776e-06, "loss": 0.5171955227851868, "step": 1573 }, { "epoch": 2.0415247364152473, "grad_norm": 0.5528171062469482, "learning_rate": 5.095240264933486e-06, "loss": 0.47794681787490845, "step": 1574 }, { "epoch": 2.042822384428224, "grad_norm": 0.5567626357078552, "learning_rate": 5.082783946721434e-06, "loss": 0.4940184950828552, "step": 1575 }, { "epoch": 2.0441200324412003, "grad_norm": 0.5630913376808167, "learning_rate": 5.070337682971642e-06, "loss": 0.5437344312667847, "step": 1576 }, { "epoch": 2.0454176804541766, "grad_norm": 0.5575384497642517, "learning_rate": 5.057901499133573e-06, "loss": 0.49236786365509033, "step": 1577 }, { "epoch": 2.0467153284671533, "grad_norm": 0.5638654828071594, "learning_rate": 5.0454754206360705e-06, "loss": 0.4736412465572357, "step": 1578 }, { "epoch": 2.0480129764801296, "grad_norm": 0.5577630996704102, "learning_rate": 5.033059472887322e-06, "loss": 0.5147624015808105, "step": 1579 }, { "epoch": 2.0493106244931063, "grad_norm": 0.5717137455940247, "learning_rate": 5.0206536812748004e-06, "loss": 0.4905228614807129, "step": 1580 }, { "epoch": 2.0506082725060826, "grad_norm": 0.5646504759788513, "learning_rate": 5.008258071165202e-06, "loss": 0.5036407113075256, "step": 1581 }, { "epoch": 2.0519059205190593, "grad_norm": 0.5792942047119141, "learning_rate": 4.995872667904424e-06, "loss": 0.5340180993080139, "step": 1582 }, { "epoch": 2.0532035685320356, "grad_norm": 0.573951244354248, "learning_rate": 4.98349749681747e-06, "loss": 0.4675467610359192, "step": 1583 }, { "epoch": 2.0545012165450123, "grad_norm": 0.5502886772155762, "learning_rate": 4.971132583208438e-06, "loss": 0.4816184937953949, "step": 1584 }, { "epoch": 2.0557988645579885, "grad_norm": 0.5748745203018188, "learning_rate": 4.958777952360445e-06, "loss": 0.49751102924346924, "step": 1585 }, { "epoch": 2.0570965125709653, "grad_norm": 0.593724250793457, "learning_rate": 4.946433629535585e-06, "loss": 0.48918506503105164, "step": 1586 }, { "epoch": 2.0583941605839415, "grad_norm": 0.5852590799331665, "learning_rate": 4.934099639974874e-06, "loss": 0.5142393708229065, "step": 1587 }, { "epoch": 2.0596918085969182, "grad_norm": 0.5500675439834595, "learning_rate": 4.921776008898198e-06, "loss": 0.43804582953453064, "step": 1588 }, { "epoch": 2.0609894566098945, "grad_norm": 0.572162389755249, "learning_rate": 4.909462761504264e-06, "loss": 0.5290922522544861, "step": 1589 }, { "epoch": 2.0622871046228712, "grad_norm": 0.5475997924804688, "learning_rate": 4.897159922970551e-06, "loss": 0.489504873752594, "step": 1590 }, { "epoch": 2.0635847526358475, "grad_norm": 0.5753741264343262, "learning_rate": 4.884867518453238e-06, "loss": 0.5394560694694519, "step": 1591 }, { "epoch": 2.0648824006488242, "grad_norm": 0.5752173662185669, "learning_rate": 4.872585573087195e-06, "loss": 0.5700497627258301, "step": 1592 }, { "epoch": 2.0661800486618005, "grad_norm": 0.5844142436981201, "learning_rate": 4.860314111985881e-06, "loss": 0.5502715110778809, "step": 1593 }, { "epoch": 2.0674776966747768, "grad_norm": 0.5586737990379333, "learning_rate": 4.848053160241333e-06, "loss": 0.48312538862228394, "step": 1594 }, { "epoch": 2.0687753446877535, "grad_norm": 0.5547072887420654, "learning_rate": 4.835802742924091e-06, "loss": 0.4890977442264557, "step": 1595 }, { "epoch": 2.0700729927007298, "grad_norm": 0.5696388483047485, "learning_rate": 4.823562885083161e-06, "loss": 0.5179868936538696, "step": 1596 }, { "epoch": 2.0713706407137065, "grad_norm": 0.5792607069015503, "learning_rate": 4.811333611745953e-06, "loss": 0.5098393559455872, "step": 1597 }, { "epoch": 2.0726682887266827, "grad_norm": 0.5769554972648621, "learning_rate": 4.799114947918238e-06, "loss": 0.4976171553134918, "step": 1598 }, { "epoch": 2.0739659367396595, "grad_norm": 0.6067489981651306, "learning_rate": 4.786906918584083e-06, "loss": 0.5139312148094177, "step": 1599 }, { "epoch": 2.0752635847526357, "grad_norm": 0.5910279750823975, "learning_rate": 4.774709548705831e-06, "loss": 0.5157588720321655, "step": 1600 }, { "epoch": 2.0765612327656124, "grad_norm": 0.5831329226493835, "learning_rate": 4.762522863224001e-06, "loss": 0.5141895413398743, "step": 1601 }, { "epoch": 2.0778588807785887, "grad_norm": 0.5735464692115784, "learning_rate": 4.750346887057292e-06, "loss": 0.47724485397338867, "step": 1602 }, { "epoch": 2.0791565287915654, "grad_norm": 0.5806788206100464, "learning_rate": 4.738181645102493e-06, "loss": 0.4755935072898865, "step": 1603 }, { "epoch": 2.0804541768045417, "grad_norm": 0.5973532199859619, "learning_rate": 4.726027162234434e-06, "loss": 0.5464816093444824, "step": 1604 }, { "epoch": 2.0817518248175184, "grad_norm": 0.5893049240112305, "learning_rate": 4.713883463305972e-06, "loss": 0.5293697118759155, "step": 1605 }, { "epoch": 2.0830494728304947, "grad_norm": 0.5956568717956543, "learning_rate": 4.701750573147885e-06, "loss": 0.5268076658248901, "step": 1606 }, { "epoch": 2.0843471208434714, "grad_norm": 0.5941202044487, "learning_rate": 4.689628516568866e-06, "loss": 0.526781439781189, "step": 1607 }, { "epoch": 2.0856447688564477, "grad_norm": 0.5724000334739685, "learning_rate": 4.677517318355455e-06, "loss": 0.5051593780517578, "step": 1608 }, { "epoch": 2.086942416869424, "grad_norm": 0.5567840933799744, "learning_rate": 4.6654170032719825e-06, "loss": 0.48566874861717224, "step": 1609 }, { "epoch": 2.0882400648824007, "grad_norm": 0.5653722882270813, "learning_rate": 4.6533275960605355e-06, "loss": 0.5071468353271484, "step": 1610 }, { "epoch": 2.0882400648824007, "eval_loss": 0.6963403820991516, "eval_runtime": 72.3826, "eval_samples_per_second": 71.73, "eval_steps_per_second": 8.966, "step": 1610 }, { "epoch": 2.089537712895377, "grad_norm": 0.5640507340431213, "learning_rate": 4.641249121440892e-06, "loss": 0.5107710361480713, "step": 1611 }, { "epoch": 2.0908353609083536, "grad_norm": 0.5841313004493713, "learning_rate": 4.629181604110464e-06, "loss": 0.5194936990737915, "step": 1612 }, { "epoch": 2.09213300892133, "grad_norm": 0.5427317023277283, "learning_rate": 4.617125068744288e-06, "loss": 0.44176995754241943, "step": 1613 }, { "epoch": 2.0934306569343066, "grad_norm": 0.6006700992584229, "learning_rate": 4.605079539994911e-06, "loss": 0.5314173102378845, "step": 1614 }, { "epoch": 2.094728304947283, "grad_norm": 0.5708412528038025, "learning_rate": 4.593045042492404e-06, "loss": 0.5313728451728821, "step": 1615 }, { "epoch": 2.0960259529602596, "grad_norm": 0.5850820541381836, "learning_rate": 4.581021600844258e-06, "loss": 0.4967271089553833, "step": 1616 }, { "epoch": 2.097323600973236, "grad_norm": 0.5869132280349731, "learning_rate": 4.569009239635374e-06, "loss": 0.5268970727920532, "step": 1617 }, { "epoch": 2.0986212489862126, "grad_norm": 0.5825201869010925, "learning_rate": 4.557007983427987e-06, "loss": 0.5315977334976196, "step": 1618 }, { "epoch": 2.099918896999189, "grad_norm": 0.5721443891525269, "learning_rate": 4.54501785676163e-06, "loss": 0.4732065498828888, "step": 1619 }, { "epoch": 2.1012165450121656, "grad_norm": 0.5872232913970947, "learning_rate": 4.533038884153077e-06, "loss": 0.5813014507293701, "step": 1620 }, { "epoch": 2.102514193025142, "grad_norm": 0.5751720666885376, "learning_rate": 4.521071090096298e-06, "loss": 0.4687768518924713, "step": 1621 }, { "epoch": 2.1038118410381186, "grad_norm": 0.5663445591926575, "learning_rate": 4.509114499062393e-06, "loss": 0.49182090163230896, "step": 1622 }, { "epoch": 2.105109489051095, "grad_norm": 0.5650926828384399, "learning_rate": 4.4971691354995795e-06, "loss": 0.5067583322525024, "step": 1623 }, { "epoch": 2.1064071370640716, "grad_norm": 0.6090897917747498, "learning_rate": 4.485235023833087e-06, "loss": 0.5684949159622192, "step": 1624 }, { "epoch": 2.107704785077048, "grad_norm": 0.6066005229949951, "learning_rate": 4.4733121884651665e-06, "loss": 0.5100910067558289, "step": 1625 }, { "epoch": 2.1090024330900246, "grad_norm": 0.5951321125030518, "learning_rate": 4.46140065377499e-06, "loss": 0.4774884283542633, "step": 1626 }, { "epoch": 2.110300081103001, "grad_norm": 0.5725848078727722, "learning_rate": 4.449500444118633e-06, "loss": 0.5018754005432129, "step": 1627 }, { "epoch": 2.111597729115977, "grad_norm": 0.5799410343170166, "learning_rate": 4.437611583829014e-06, "loss": 0.49752479791641235, "step": 1628 }, { "epoch": 2.112895377128954, "grad_norm": 0.5619634985923767, "learning_rate": 4.42573409721584e-06, "loss": 0.4756616950035095, "step": 1629 }, { "epoch": 2.11419302514193, "grad_norm": 0.5556355118751526, "learning_rate": 4.413868008565569e-06, "loss": 0.4895199239253998, "step": 1630 }, { "epoch": 2.115490673154907, "grad_norm": 0.5813250541687012, "learning_rate": 4.402013342141347e-06, "loss": 0.45987099409103394, "step": 1631 }, { "epoch": 2.116788321167883, "grad_norm": 0.5723846554756165, "learning_rate": 4.390170122182965e-06, "loss": 0.4845224916934967, "step": 1632 }, { "epoch": 2.11808596918086, "grad_norm": 0.5540896058082581, "learning_rate": 4.378338372906813e-06, "loss": 0.4948923587799072, "step": 1633 }, { "epoch": 2.119383617193836, "grad_norm": 0.61214679479599, "learning_rate": 4.3665181185058255e-06, "loss": 0.5314114093780518, "step": 1634 }, { "epoch": 2.1206812652068128, "grad_norm": 0.5635900497436523, "learning_rate": 4.354709383149421e-06, "loss": 0.4875974655151367, "step": 1635 }, { "epoch": 2.121978913219789, "grad_norm": 0.5833781957626343, "learning_rate": 4.342912190983487e-06, "loss": 0.5470179915428162, "step": 1636 }, { "epoch": 2.1232765612327658, "grad_norm": 0.5999435782432556, "learning_rate": 4.331126566130284e-06, "loss": 0.5479536056518555, "step": 1637 }, { "epoch": 2.124574209245742, "grad_norm": 0.589368999004364, "learning_rate": 4.319352532688444e-06, "loss": 0.5104061961174011, "step": 1638 }, { "epoch": 2.1258718572587187, "grad_norm": 0.5677252411842346, "learning_rate": 4.3075901147328745e-06, "loss": 0.5259417295455933, "step": 1639 }, { "epoch": 2.127169505271695, "grad_norm": 0.5625855326652527, "learning_rate": 4.295839336314749e-06, "loss": 0.49216002225875854, "step": 1640 }, { "epoch": 2.1284671532846717, "grad_norm": 0.5749784111976624, "learning_rate": 4.284100221461432e-06, "loss": 0.47341352701187134, "step": 1641 }, { "epoch": 2.129764801297648, "grad_norm": 0.5952023267745972, "learning_rate": 4.272372794176446e-06, "loss": 0.5849668979644775, "step": 1642 }, { "epoch": 2.1310624493106243, "grad_norm": 0.6117653250694275, "learning_rate": 4.260657078439409e-06, "loss": 0.5250235795974731, "step": 1643 }, { "epoch": 2.132360097323601, "grad_norm": 0.5717377662658691, "learning_rate": 4.248953098205997e-06, "loss": 0.49503540992736816, "step": 1644 }, { "epoch": 2.1336577453365773, "grad_norm": 0.5875842571258545, "learning_rate": 4.237260877407878e-06, "loss": 0.5329856872558594, "step": 1645 }, { "epoch": 2.134955393349554, "grad_norm": 0.5664336085319519, "learning_rate": 4.225580439952699e-06, "loss": 0.5302871465682983, "step": 1646 }, { "epoch": 2.1362530413625302, "grad_norm": 0.5786408185958862, "learning_rate": 4.213911809723987e-06, "loss": 0.49267759919166565, "step": 1647 }, { "epoch": 2.137550689375507, "grad_norm": 0.5607128143310547, "learning_rate": 4.20225501058114e-06, "loss": 0.5211464166641235, "step": 1648 }, { "epoch": 2.1388483373884832, "grad_norm": 0.5761646628379822, "learning_rate": 4.190610066359364e-06, "loss": 0.5178772211074829, "step": 1649 }, { "epoch": 2.14014598540146, "grad_norm": 0.5818209648132324, "learning_rate": 4.1789770008696205e-06, "loss": 0.5244809985160828, "step": 1650 }, { "epoch": 2.141443633414436, "grad_norm": 0.6208338141441345, "learning_rate": 4.167355837898585e-06, "loss": 0.5720170736312866, "step": 1651 }, { "epoch": 2.142741281427413, "grad_norm": 0.59494549036026, "learning_rate": 4.155746601208594e-06, "loss": 0.5233884453773499, "step": 1652 }, { "epoch": 2.144038929440389, "grad_norm": 0.5718002915382385, "learning_rate": 4.144149314537599e-06, "loss": 0.48552173376083374, "step": 1653 }, { "epoch": 2.145336577453366, "grad_norm": 0.5601415634155273, "learning_rate": 4.1325640015991185e-06, "loss": 0.4996642768383026, "step": 1654 }, { "epoch": 2.146634225466342, "grad_norm": 0.5795076489448547, "learning_rate": 4.120990686082174e-06, "loss": 0.5177854895591736, "step": 1655 }, { "epoch": 2.147931873479319, "grad_norm": 0.5665140151977539, "learning_rate": 4.109429391651283e-06, "loss": 0.46502965688705444, "step": 1656 }, { "epoch": 2.149229521492295, "grad_norm": 0.5985783934593201, "learning_rate": 4.097880141946354e-06, "loss": 0.4880366325378418, "step": 1657 }, { "epoch": 2.150527169505272, "grad_norm": 0.5875007510185242, "learning_rate": 4.08634296058268e-06, "loss": 0.4756428599357605, "step": 1658 }, { "epoch": 2.151824817518248, "grad_norm": 0.5694658160209656, "learning_rate": 4.074817871150887e-06, "loss": 0.5224863886833191, "step": 1659 }, { "epoch": 2.153122465531225, "grad_norm": 0.5686694979667664, "learning_rate": 4.063304897216856e-06, "loss": 0.4963817000389099, "step": 1660 }, { "epoch": 2.154420113544201, "grad_norm": 0.5916073322296143, "learning_rate": 4.051804062321706e-06, "loss": 0.5067265629768372, "step": 1661 }, { "epoch": 2.1557177615571774, "grad_norm": 0.5737749338150024, "learning_rate": 4.040315389981736e-06, "loss": 0.547669529914856, "step": 1662 }, { "epoch": 2.157015409570154, "grad_norm": 0.5631166696548462, "learning_rate": 4.028838903688372e-06, "loss": 0.5300416946411133, "step": 1663 }, { "epoch": 2.1583130575831304, "grad_norm": 0.5811983942985535, "learning_rate": 4.017374626908125e-06, "loss": 0.5100100040435791, "step": 1664 }, { "epoch": 2.159610705596107, "grad_norm": 0.571027934551239, "learning_rate": 4.005922583082538e-06, "loss": 0.5137525200843811, "step": 1665 }, { "epoch": 2.1609083536090834, "grad_norm": 0.5910731554031372, "learning_rate": 3.994482795628142e-06, "loss": 0.5244160890579224, "step": 1666 }, { "epoch": 2.16220600162206, "grad_norm": 0.5894386768341064, "learning_rate": 3.983055287936411e-06, "loss": 0.5517876148223877, "step": 1667 }, { "epoch": 2.1635036496350364, "grad_norm": 0.5779116153717041, "learning_rate": 3.971640083373696e-06, "loss": 0.5097295045852661, "step": 1668 }, { "epoch": 2.164801297648013, "grad_norm": 0.5987510085105896, "learning_rate": 3.960237205281213e-06, "loss": 0.511284589767456, "step": 1669 }, { "epoch": 2.1660989456609894, "grad_norm": 0.5853222608566284, "learning_rate": 3.948846676974953e-06, "loss": 0.5473302602767944, "step": 1670 }, { "epoch": 2.167396593673966, "grad_norm": 0.5716820359230042, "learning_rate": 3.937468521745666e-06, "loss": 0.4697805345058441, "step": 1671 }, { "epoch": 2.1686942416869424, "grad_norm": 0.5948668122291565, "learning_rate": 3.9261027628588e-06, "loss": 0.5532658100128174, "step": 1672 }, { "epoch": 2.169991889699919, "grad_norm": 0.5779493451118469, "learning_rate": 3.9147494235544544e-06, "loss": 0.495819091796875, "step": 1673 }, { "epoch": 2.1712895377128953, "grad_norm": 0.588945746421814, "learning_rate": 3.903408527047336e-06, "loss": 0.50020432472229, "step": 1674 }, { "epoch": 2.172587185725872, "grad_norm": 0.5889913439750671, "learning_rate": 3.892080096526707e-06, "loss": 0.5079851150512695, "step": 1675 }, { "epoch": 2.1738848337388483, "grad_norm": 0.5692569017410278, "learning_rate": 3.880764155156339e-06, "loss": 0.47483527660369873, "step": 1676 }, { "epoch": 2.1751824817518246, "grad_norm": 0.6015142202377319, "learning_rate": 3.8694607260744745e-06, "loss": 0.5588316321372986, "step": 1677 }, { "epoch": 2.1764801297648013, "grad_norm": 0.5825367569923401, "learning_rate": 3.858169832393752e-06, "loss": 0.5049576759338379, "step": 1678 }, { "epoch": 2.1777777777777776, "grad_norm": 0.6517031788825989, "learning_rate": 3.846891497201206e-06, "loss": 0.5698549151420593, "step": 1679 }, { "epoch": 2.1790754257907543, "grad_norm": 0.5972406268119812, "learning_rate": 3.835625743558168e-06, "loss": 0.5489758253097534, "step": 1680 }, { "epoch": 2.1803730738037306, "grad_norm": 0.590186595916748, "learning_rate": 3.824372594500256e-06, "loss": 0.5560799837112427, "step": 1681 }, { "epoch": 2.1816707218167073, "grad_norm": 0.6042253375053406, "learning_rate": 3.813132073037309e-06, "loss": 0.5188357830047607, "step": 1682 }, { "epoch": 2.1829683698296836, "grad_norm": 0.5862630605697632, "learning_rate": 3.8019042021533513e-06, "loss": 0.49817925691604614, "step": 1683 }, { "epoch": 2.1842660178426603, "grad_norm": 0.5700656175613403, "learning_rate": 3.7906890048065358e-06, "loss": 0.5223833322525024, "step": 1684 }, { "epoch": 2.1855636658556366, "grad_norm": 0.5849031805992126, "learning_rate": 3.779486503929106e-06, "loss": 0.5123599767684937, "step": 1685 }, { "epoch": 2.1868613138686133, "grad_norm": 0.5997171998023987, "learning_rate": 3.7682967224273317e-06, "loss": 0.5369530320167542, "step": 1686 }, { "epoch": 2.1881589618815895, "grad_norm": 0.5994778275489807, "learning_rate": 3.757119683181493e-06, "loss": 0.47989219427108765, "step": 1687 }, { "epoch": 2.1894566098945663, "grad_norm": 0.5771443247795105, "learning_rate": 3.7459554090458018e-06, "loss": 0.4408413767814636, "step": 1688 }, { "epoch": 2.1907542579075425, "grad_norm": 0.5725969672203064, "learning_rate": 3.7348039228483758e-06, "loss": 0.46296805143356323, "step": 1689 }, { "epoch": 2.1920519059205192, "grad_norm": 0.5743042826652527, "learning_rate": 3.7236652473911817e-06, "loss": 0.482837975025177, "step": 1690 }, { "epoch": 2.1933495539334955, "grad_norm": 0.5836053490638733, "learning_rate": 3.7125394054499843e-06, "loss": 0.5156795978546143, "step": 1691 }, { "epoch": 2.1946472019464722, "grad_norm": 0.5889219641685486, "learning_rate": 3.7014264197743267e-06, "loss": 0.5081969499588013, "step": 1692 }, { "epoch": 2.1959448499594485, "grad_norm": 0.6140073537826538, "learning_rate": 3.6903263130874423e-06, "loss": 0.5605005025863647, "step": 1693 }, { "epoch": 2.197242497972425, "grad_norm": 0.5697020292282104, "learning_rate": 3.679239108086241e-06, "loss": 0.5305500030517578, "step": 1694 }, { "epoch": 2.1985401459854015, "grad_norm": 0.5989742875099182, "learning_rate": 3.668164827441254e-06, "loss": 0.5370711088180542, "step": 1695 }, { "epoch": 2.1998377939983778, "grad_norm": 0.608519971370697, "learning_rate": 3.657103493796581e-06, "loss": 0.5120800137519836, "step": 1696 }, { "epoch": 2.2011354420113545, "grad_norm": 0.5787931084632874, "learning_rate": 3.6460551297698486e-06, "loss": 0.5016961693763733, "step": 1697 }, { "epoch": 2.2024330900243307, "grad_norm": 0.5809414982795715, "learning_rate": 3.6350197579521696e-06, "loss": 0.5177795886993408, "step": 1698 }, { "epoch": 2.2037307380373075, "grad_norm": 0.6027206778526306, "learning_rate": 3.6239974009080746e-06, "loss": 0.500653862953186, "step": 1699 }, { "epoch": 2.2050283860502837, "grad_norm": 0.5894326567649841, "learning_rate": 3.6129880811755093e-06, "loss": 0.5206901431083679, "step": 1700 }, { "epoch": 2.2063260340632604, "grad_norm": 0.591676652431488, "learning_rate": 3.601991821265731e-06, "loss": 0.49031156301498413, "step": 1701 }, { "epoch": 2.2076236820762367, "grad_norm": 0.567371666431427, "learning_rate": 3.591008643663323e-06, "loss": 0.49885687232017517, "step": 1702 }, { "epoch": 2.2089213300892134, "grad_norm": 0.5756494998931885, "learning_rate": 3.580038570826093e-06, "loss": 0.499514639377594, "step": 1703 }, { "epoch": 2.2102189781021897, "grad_norm": 0.5830073356628418, "learning_rate": 3.5690816251850657e-06, "loss": 0.4895148277282715, "step": 1704 }, { "epoch": 2.2115166261151664, "grad_norm": 0.6235371828079224, "learning_rate": 3.5581378291444223e-06, "loss": 0.5166549682617188, "step": 1705 }, { "epoch": 2.2128142741281427, "grad_norm": 0.5604133605957031, "learning_rate": 3.5472072050814565e-06, "loss": 0.4416266083717346, "step": 1706 }, { "epoch": 2.2141119221411194, "grad_norm": 0.5687461495399475, "learning_rate": 3.5362897753465265e-06, "loss": 0.48436877131462097, "step": 1707 }, { "epoch": 2.2154095701540957, "grad_norm": 0.5818923115730286, "learning_rate": 3.5253855622630174e-06, "loss": 0.5402669906616211, "step": 1708 }, { "epoch": 2.2167072181670724, "grad_norm": 0.6057185530662537, "learning_rate": 3.514494588127275e-06, "loss": 0.5666176080703735, "step": 1709 }, { "epoch": 2.2180048661800487, "grad_norm": 0.5755799412727356, "learning_rate": 3.5036168752085977e-06, "loss": 0.48957937955856323, "step": 1710 }, { "epoch": 2.219302514193025, "grad_norm": 0.5948247313499451, "learning_rate": 3.4927524457491456e-06, "loss": 0.4885704219341278, "step": 1711 }, { "epoch": 2.2206001622060016, "grad_norm": 0.5859489440917969, "learning_rate": 3.4819013219639295e-06, "loss": 0.4678208827972412, "step": 1712 }, { "epoch": 2.221897810218978, "grad_norm": 0.5540412068367004, "learning_rate": 3.471063526040752e-06, "loss": 0.481825053691864, "step": 1713 }, { "epoch": 2.2231954582319546, "grad_norm": 0.5437055826187134, "learning_rate": 3.460239080140163e-06, "loss": 0.4387455880641937, "step": 1714 }, { "epoch": 2.224493106244931, "grad_norm": 0.5966470241546631, "learning_rate": 3.4494280063954146e-06, "loss": 0.545790433883667, "step": 1715 }, { "epoch": 2.2257907542579076, "grad_norm": 0.5654957294464111, "learning_rate": 3.4386303269124142e-06, "loss": 0.4880921244621277, "step": 1716 }, { "epoch": 2.227088402270884, "grad_norm": 0.5839219689369202, "learning_rate": 3.4278460637696865e-06, "loss": 0.5272015333175659, "step": 1717 }, { "epoch": 2.2283860502838606, "grad_norm": 0.5752228498458862, "learning_rate": 3.4170752390183183e-06, "loss": 0.5249931812286377, "step": 1718 }, { "epoch": 2.229683698296837, "grad_norm": 0.580033540725708, "learning_rate": 3.4063178746819193e-06, "loss": 0.4954257309436798, "step": 1719 }, { "epoch": 2.2309813463098136, "grad_norm": 0.5703238844871521, "learning_rate": 3.395573992756579e-06, "loss": 0.502043604850769, "step": 1720 }, { "epoch": 2.23227899432279, "grad_norm": 0.5960628986358643, "learning_rate": 3.384843615210819e-06, "loss": 0.5299471616744995, "step": 1721 }, { "epoch": 2.2335766423357666, "grad_norm": 0.5959639549255371, "learning_rate": 3.3741267639855345e-06, "loss": 0.6064699292182922, "step": 1722 }, { "epoch": 2.234874290348743, "grad_norm": 0.5705887079238892, "learning_rate": 3.3634234609939888e-06, "loss": 0.49739521741867065, "step": 1723 }, { "epoch": 2.2361719383617196, "grad_norm": 0.5743765830993652, "learning_rate": 3.352733728121712e-06, "loss": 0.5017514228820801, "step": 1724 }, { "epoch": 2.237469586374696, "grad_norm": 0.5511932969093323, "learning_rate": 3.3420575872265184e-06, "loss": 0.4473830759525299, "step": 1725 }, { "epoch": 2.238767234387672, "grad_norm": 0.5601068139076233, "learning_rate": 3.3313950601384016e-06, "loss": 0.4705375134944916, "step": 1726 }, { "epoch": 2.240064882400649, "grad_norm": 0.5842630863189697, "learning_rate": 3.320746168659534e-06, "loss": 0.5488964319229126, "step": 1727 }, { "epoch": 2.241362530413625, "grad_norm": 0.5851315855979919, "learning_rate": 3.3101109345642056e-06, "loss": 0.4903653860092163, "step": 1728 }, { "epoch": 2.242660178426602, "grad_norm": 0.5913082361221313, "learning_rate": 3.299489379598777e-06, "loss": 0.5187092423439026, "step": 1729 }, { "epoch": 2.243957826439578, "grad_norm": 0.5963798761367798, "learning_rate": 3.288881525481639e-06, "loss": 0.5145666003227234, "step": 1730 }, { "epoch": 2.245255474452555, "grad_norm": 0.5765670537948608, "learning_rate": 3.278287393903172e-06, "loss": 0.47934818267822266, "step": 1731 }, { "epoch": 2.246553122465531, "grad_norm": 0.5776212215423584, "learning_rate": 3.2677070065256855e-06, "loss": 0.5102344751358032, "step": 1732 }, { "epoch": 2.247850770478508, "grad_norm": 0.5738791823387146, "learning_rate": 3.257140384983405e-06, "loss": 0.5097633600234985, "step": 1733 }, { "epoch": 2.249148418491484, "grad_norm": 0.5827375650405884, "learning_rate": 3.2465875508823876e-06, "loss": 0.49323970079421997, "step": 1734 }, { "epoch": 2.2504460665044608, "grad_norm": 0.5527526140213013, "learning_rate": 3.2360485258005115e-06, "loss": 0.47956135869026184, "step": 1735 }, { "epoch": 2.251743714517437, "grad_norm": 0.581285297870636, "learning_rate": 3.2255233312874155e-06, "loss": 0.5309310555458069, "step": 1736 }, { "epoch": 2.2530413625304138, "grad_norm": 0.6052958965301514, "learning_rate": 3.2150119888644594e-06, "loss": 0.5168576240539551, "step": 1737 }, { "epoch": 2.25433901054339, "grad_norm": 0.5458951592445374, "learning_rate": 3.2045145200246763e-06, "loss": 0.45663541555404663, "step": 1738 }, { "epoch": 2.2556366585563667, "grad_norm": 0.6066997647285461, "learning_rate": 3.1940309462327334e-06, "loss": 0.5442982912063599, "step": 1739 }, { "epoch": 2.256934306569343, "grad_norm": 0.5723252296447754, "learning_rate": 3.1835612889248868e-06, "loss": 0.5069276094436646, "step": 1740 }, { "epoch": 2.2582319545823197, "grad_norm": 0.571399986743927, "learning_rate": 3.1731055695089384e-06, "loss": 0.46238988637924194, "step": 1741 }, { "epoch": 2.259529602595296, "grad_norm": 0.5810062289237976, "learning_rate": 3.162663809364178e-06, "loss": 0.5127156972885132, "step": 1742 }, { "epoch": 2.2608272506082727, "grad_norm": 0.57572340965271, "learning_rate": 3.152236029841376e-06, "loss": 0.4930036664009094, "step": 1743 }, { "epoch": 2.262124898621249, "grad_norm": 0.580849826335907, "learning_rate": 3.1418222522626907e-06, "loss": 0.5655021071434021, "step": 1744 }, { "epoch": 2.2634225466342253, "grad_norm": 0.5487149953842163, "learning_rate": 3.1314224979216633e-06, "loss": 0.4654723107814789, "step": 1745 }, { "epoch": 2.264720194647202, "grad_norm": 0.5340819954872131, "learning_rate": 3.1210367880831684e-06, "loss": 0.4503304362297058, "step": 1746 }, { "epoch": 2.2660178426601782, "grad_norm": 0.5930841565132141, "learning_rate": 3.1106651439833434e-06, "loss": 0.5008471608161926, "step": 1747 }, { "epoch": 2.267315490673155, "grad_norm": 0.6097638010978699, "learning_rate": 3.1003075868295794e-06, "loss": 0.5474433898925781, "step": 1748 }, { "epoch": 2.2686131386861312, "grad_norm": 0.5703378319740295, "learning_rate": 3.0899641378004596e-06, "loss": 0.4988810420036316, "step": 1749 }, { "epoch": 2.269910786699108, "grad_norm": 0.5475755333900452, "learning_rate": 3.079634818045719e-06, "loss": 0.4420495927333832, "step": 1750 }, { "epoch": 2.2712084347120842, "grad_norm": 0.5802868008613586, "learning_rate": 3.069319648686202e-06, "loss": 0.4927031397819519, "step": 1751 }, { "epoch": 2.272506082725061, "grad_norm": 0.5564054846763611, "learning_rate": 3.0590186508138186e-06, "loss": 0.4879905581474304, "step": 1752 }, { "epoch": 2.273803730738037, "grad_norm": 0.5730741620063782, "learning_rate": 3.048731845491504e-06, "loss": 0.4577972888946533, "step": 1753 }, { "epoch": 2.275101378751014, "grad_norm": 0.5826799869537354, "learning_rate": 3.038459253753172e-06, "loss": 0.49198514223098755, "step": 1754 }, { "epoch": 2.27639902676399, "grad_norm": 0.5650803446769714, "learning_rate": 3.0282008966036647e-06, "loss": 0.48484641313552856, "step": 1755 }, { "epoch": 2.277696674776967, "grad_norm": 0.579980731010437, "learning_rate": 3.0179567950187396e-06, "loss": 0.4821101427078247, "step": 1756 }, { "epoch": 2.278994322789943, "grad_norm": 0.562907874584198, "learning_rate": 3.0077269699449795e-06, "loss": 0.47341495752334595, "step": 1757 }, { "epoch": 2.28029197080292, "grad_norm": 0.584148108959198, "learning_rate": 2.9975114422997932e-06, "loss": 0.48562386631965637, "step": 1758 }, { "epoch": 2.281589618815896, "grad_norm": 0.5975433588027954, "learning_rate": 2.9873102329713478e-06, "loss": 0.5041466951370239, "step": 1759 }, { "epoch": 2.2828872668288724, "grad_norm": 0.5545569062232971, "learning_rate": 2.9771233628185346e-06, "loss": 0.45113393664360046, "step": 1760 }, { "epoch": 2.284184914841849, "grad_norm": 0.5939710140228271, "learning_rate": 2.9669508526709256e-06, "loss": 0.550965428352356, "step": 1761 }, { "epoch": 2.285482562854826, "grad_norm": 0.6028052568435669, "learning_rate": 2.9567927233287307e-06, "loss": 0.5310263633728027, "step": 1762 }, { "epoch": 2.286780210867802, "grad_norm": 0.5738025903701782, "learning_rate": 2.9466489955627452e-06, "loss": 0.5576157569885254, "step": 1763 }, { "epoch": 2.2880778588807784, "grad_norm": 0.5776515007019043, "learning_rate": 2.936519690114338e-06, "loss": 0.4818328022956848, "step": 1764 }, { "epoch": 2.289375506893755, "grad_norm": 0.5612311363220215, "learning_rate": 2.9264048276953606e-06, "loss": 0.4919436573982239, "step": 1765 }, { "epoch": 2.2906731549067314, "grad_norm": 0.5739221572875977, "learning_rate": 2.9163044289881604e-06, "loss": 0.5123167634010315, "step": 1766 }, { "epoch": 2.291970802919708, "grad_norm": 0.5849712491035461, "learning_rate": 2.906218514645487e-06, "loss": 0.48645591735839844, "step": 1767 }, { "epoch": 2.2932684509326844, "grad_norm": 0.5921924114227295, "learning_rate": 2.8961471052904855e-06, "loss": 0.5228952169418335, "step": 1768 }, { "epoch": 2.294566098945661, "grad_norm": 0.5667364001274109, "learning_rate": 2.8860902215166374e-06, "loss": 0.4713795781135559, "step": 1769 }, { "epoch": 2.2958637469586374, "grad_norm": 0.5740687847137451, "learning_rate": 2.876047883887727e-06, "loss": 0.5572628974914551, "step": 1770 }, { "epoch": 2.297161394971614, "grad_norm": 0.5873590111732483, "learning_rate": 2.866020112937792e-06, "loss": 0.5043233036994934, "step": 1771 }, { "epoch": 2.2984590429845904, "grad_norm": 0.6047444343566895, "learning_rate": 2.8560069291710857e-06, "loss": 0.5389963984489441, "step": 1772 }, { "epoch": 2.299756690997567, "grad_norm": 0.5967015624046326, "learning_rate": 2.8460083530620342e-06, "loss": 0.5294721126556396, "step": 1773 }, { "epoch": 2.3010543390105433, "grad_norm": 0.549340546131134, "learning_rate": 2.8360244050551943e-06, "loss": 0.4317038357257843, "step": 1774 }, { "epoch": 2.30235198702352, "grad_norm": 0.5504307150840759, "learning_rate": 2.8260551055652154e-06, "loss": 0.529647946357727, "step": 1775 }, { "epoch": 2.3036496350364963, "grad_norm": 0.603110671043396, "learning_rate": 2.8161004749767893e-06, "loss": 0.5209970474243164, "step": 1776 }, { "epoch": 2.304947283049473, "grad_norm": 0.6039415001869202, "learning_rate": 2.8061605336446194e-06, "loss": 0.5043014287948608, "step": 1777 }, { "epoch": 2.3062449310624493, "grad_norm": 0.5883081555366516, "learning_rate": 2.796235301893362e-06, "loss": 0.4972041845321655, "step": 1778 }, { "epoch": 2.3075425790754256, "grad_norm": 0.5843275785446167, "learning_rate": 2.7863248000176146e-06, "loss": 0.4763846695423126, "step": 1779 }, { "epoch": 2.3088402270884023, "grad_norm": 0.5958689451217651, "learning_rate": 2.776429048281837e-06, "loss": 0.534402072429657, "step": 1780 }, { "epoch": 2.3101378751013786, "grad_norm": 0.5908694267272949, "learning_rate": 2.7665480669203383e-06, "loss": 0.5190926790237427, "step": 1781 }, { "epoch": 2.3114355231143553, "grad_norm": 0.5524806380271912, "learning_rate": 2.756681876137227e-06, "loss": 0.4656313359737396, "step": 1782 }, { "epoch": 2.3127331711273316, "grad_norm": 0.5877224206924438, "learning_rate": 2.7468304961063642e-06, "loss": 0.5328505635261536, "step": 1783 }, { "epoch": 2.3140308191403083, "grad_norm": 0.5791632533073425, "learning_rate": 2.736993946971329e-06, "loss": 0.49198758602142334, "step": 1784 }, { "epoch": 2.3153284671532846, "grad_norm": 0.5888563990592957, "learning_rate": 2.727172248845378e-06, "loss": 0.5110273957252502, "step": 1785 }, { "epoch": 2.3166261151662613, "grad_norm": 0.5828698873519897, "learning_rate": 2.717365421811389e-06, "loss": 0.5017109513282776, "step": 1786 }, { "epoch": 2.3179237631792375, "grad_norm": 0.5837040543556213, "learning_rate": 2.7075734859218526e-06, "loss": 0.48261111974716187, "step": 1787 }, { "epoch": 2.3192214111922143, "grad_norm": 0.5555887222290039, "learning_rate": 2.6977964611987885e-06, "loss": 0.47618377208709717, "step": 1788 }, { "epoch": 2.3205190592051905, "grad_norm": 0.5828522443771362, "learning_rate": 2.6880343676337485e-06, "loss": 0.5134596824645996, "step": 1789 }, { "epoch": 2.3218167072181672, "grad_norm": 0.5784159898757935, "learning_rate": 2.6782872251877347e-06, "loss": 0.5150825381278992, "step": 1790 }, { "epoch": 2.3231143552311435, "grad_norm": 0.5633057951927185, "learning_rate": 2.6685550537911886e-06, "loss": 0.5161488056182861, "step": 1791 }, { "epoch": 2.3244120032441202, "grad_norm": 0.6642704010009766, "learning_rate": 2.658837873343938e-06, "loss": 0.49425986409187317, "step": 1792 }, { "epoch": 2.3257096512570965, "grad_norm": 1.5263655185699463, "learning_rate": 2.6491357037151565e-06, "loss": 0.5067033767700195, "step": 1793 }, { "epoch": 2.3270072992700728, "grad_norm": 0.5753558278083801, "learning_rate": 2.639448564743328e-06, "loss": 0.5167245864868164, "step": 1794 }, { "epoch": 2.3283049472830495, "grad_norm": 0.576946496963501, "learning_rate": 2.6297764762362e-06, "loss": 0.4853561818599701, "step": 1795 }, { "epoch": 2.329602595296026, "grad_norm": 0.5866283774375916, "learning_rate": 2.6201194579707377e-06, "loss": 0.5048178434371948, "step": 1796 }, { "epoch": 2.3309002433090025, "grad_norm": 0.5844078660011292, "learning_rate": 2.6104775296931118e-06, "loss": 0.5524246096611023, "step": 1797 }, { "epoch": 2.3321978913219787, "grad_norm": 0.5873027443885803, "learning_rate": 2.6008507111186142e-06, "loss": 0.4834699034690857, "step": 1798 }, { "epoch": 2.3334955393349555, "grad_norm": 0.5751008987426758, "learning_rate": 2.5912390219316573e-06, "loss": 0.46085190773010254, "step": 1799 }, { "epoch": 2.3347931873479317, "grad_norm": 0.5933749675750732, "learning_rate": 2.5816424817857122e-06, "loss": 0.5757045745849609, "step": 1800 }, { "epoch": 2.3360908353609084, "grad_norm": 0.5685113668441772, "learning_rate": 2.572061110303271e-06, "loss": 0.5482950210571289, "step": 1801 }, { "epoch": 2.3373884833738847, "grad_norm": 0.5949112176895142, "learning_rate": 2.562494927075824e-06, "loss": 0.45071443915367126, "step": 1802 }, { "epoch": 2.3386861313868614, "grad_norm": 0.5924611687660217, "learning_rate": 2.552943951663782e-06, "loss": 0.5145446062088013, "step": 1803 }, { "epoch": 2.3399837793998377, "grad_norm": 0.6171916127204895, "learning_rate": 2.543408203596479e-06, "loss": 0.5408798456192017, "step": 1804 }, { "epoch": 2.3412814274128144, "grad_norm": 0.5777391791343689, "learning_rate": 2.5338877023721055e-06, "loss": 0.4972618818283081, "step": 1805 }, { "epoch": 2.3425790754257907, "grad_norm": 0.5500625371932983, "learning_rate": 2.5243824674576743e-06, "loss": 0.47741931676864624, "step": 1806 }, { "epoch": 2.3438767234387674, "grad_norm": 0.6426427960395813, "learning_rate": 2.514892518288988e-06, "loss": 0.4675457179546356, "step": 1807 }, { "epoch": 2.3451743714517437, "grad_norm": 0.5633028149604797, "learning_rate": 2.5054178742705936e-06, "loss": 0.4990037679672241, "step": 1808 }, { "epoch": 2.34647201946472, "grad_norm": 0.5860106945037842, "learning_rate": 2.4959585547757294e-06, "loss": 0.5247271060943604, "step": 1809 }, { "epoch": 2.3477696674776967, "grad_norm": 0.6035534143447876, "learning_rate": 2.486514579146322e-06, "loss": 0.5100830793380737, "step": 1810 }, { "epoch": 2.3490673154906734, "grad_norm": 0.5890262722969055, "learning_rate": 2.4770859666929027e-06, "loss": 0.4713430106639862, "step": 1811 }, { "epoch": 2.3503649635036497, "grad_norm": 0.5817517638206482, "learning_rate": 2.4676727366945995e-06, "loss": 0.5113362073898315, "step": 1812 }, { "epoch": 2.351662611516626, "grad_norm": 0.5895565748214722, "learning_rate": 2.4582749083990875e-06, "loss": 0.5131444931030273, "step": 1813 }, { "epoch": 2.3529602595296026, "grad_norm": 0.6126547455787659, "learning_rate": 2.448892501022544e-06, "loss": 0.5126985907554626, "step": 1814 }, { "epoch": 2.354257907542579, "grad_norm": 0.6138656139373779, "learning_rate": 2.4395255337496202e-06, "loss": 0.5113729238510132, "step": 1815 }, { "epoch": 2.3555555555555556, "grad_norm": 0.5864330530166626, "learning_rate": 2.4301740257333918e-06, "loss": 0.49038761854171753, "step": 1816 }, { "epoch": 2.356853203568532, "grad_norm": 0.5852108597755432, "learning_rate": 2.4208379960953255e-06, "loss": 0.5150374174118042, "step": 1817 }, { "epoch": 2.3581508515815086, "grad_norm": 0.5658332705497742, "learning_rate": 2.4115174639252425e-06, "loss": 0.45495855808258057, "step": 1818 }, { "epoch": 2.359448499594485, "grad_norm": 0.6017063856124878, "learning_rate": 2.4022124482812627e-06, "loss": 0.505713701248169, "step": 1819 }, { "epoch": 2.3607461476074616, "grad_norm": 0.5778226852416992, "learning_rate": 2.3929229681898005e-06, "loss": 0.5222234725952148, "step": 1820 }, { "epoch": 2.362043795620438, "grad_norm": 0.5651443004608154, "learning_rate": 2.3836490426454816e-06, "loss": 0.49572640657424927, "step": 1821 }, { "epoch": 2.3633414436334146, "grad_norm": 0.5689359307289124, "learning_rate": 2.3743906906111415e-06, "loss": 0.5316051840782166, "step": 1822 }, { "epoch": 2.364639091646391, "grad_norm": 0.5702098608016968, "learning_rate": 2.365147931017764e-06, "loss": 0.4997398257255554, "step": 1823 }, { "epoch": 2.3659367396593676, "grad_norm": 0.5760017037391663, "learning_rate": 2.355920782764455e-06, "loss": 0.48562324047088623, "step": 1824 }, { "epoch": 2.367234387672344, "grad_norm": 0.5816190242767334, "learning_rate": 2.3467092647183962e-06, "loss": 0.4969868063926697, "step": 1825 }, { "epoch": 2.3685320356853206, "grad_norm": 0.573274552822113, "learning_rate": 2.337513395714812e-06, "loss": 0.5109938383102417, "step": 1826 }, { "epoch": 2.369829683698297, "grad_norm": 0.6311878561973572, "learning_rate": 2.3283331945569256e-06, "loss": 0.5642886161804199, "step": 1827 }, { "epoch": 2.371127331711273, "grad_norm": 0.584414541721344, "learning_rate": 2.3191686800159272e-06, "loss": 0.4909813404083252, "step": 1828 }, { "epoch": 2.37242497972425, "grad_norm": 0.5963045954704285, "learning_rate": 2.310019870830923e-06, "loss": 0.5222618579864502, "step": 1829 }, { "epoch": 2.373722627737226, "grad_norm": 0.5990424752235413, "learning_rate": 2.300886785708919e-06, "loss": 0.527482271194458, "step": 1830 }, { "epoch": 2.375020275750203, "grad_norm": 0.5891411900520325, "learning_rate": 2.2917694433247626e-06, "loss": 0.5050874948501587, "step": 1831 }, { "epoch": 2.376317923763179, "grad_norm": 0.6118223071098328, "learning_rate": 2.282667862321104e-06, "loss": 0.5382136106491089, "step": 1832 }, { "epoch": 2.377615571776156, "grad_norm": 0.6039783358573914, "learning_rate": 2.2735820613083837e-06, "loss": 0.5693233013153076, "step": 1833 }, { "epoch": 2.378913219789132, "grad_norm": 0.5887247323989868, "learning_rate": 2.264512058864755e-06, "loss": 0.5109111666679382, "step": 1834 }, { "epoch": 2.3802108678021088, "grad_norm": 0.5879799723625183, "learning_rate": 2.2554578735360823e-06, "loss": 0.5213186740875244, "step": 1835 }, { "epoch": 2.381508515815085, "grad_norm": 0.5826606154441833, "learning_rate": 2.246419523835882e-06, "loss": 0.4647579789161682, "step": 1836 }, { "epoch": 2.3828061638280618, "grad_norm": 0.5773786306381226, "learning_rate": 2.2373970282452916e-06, "loss": 0.4783990681171417, "step": 1837 }, { "epoch": 2.384103811841038, "grad_norm": 0.5842030644416809, "learning_rate": 2.2283904052130313e-06, "loss": 0.5339592695236206, "step": 1838 }, { "epoch": 2.3854014598540147, "grad_norm": 0.569379985332489, "learning_rate": 2.2193996731553656e-06, "loss": 0.4958034157752991, "step": 1839 }, { "epoch": 2.386699107866991, "grad_norm": 0.6030622124671936, "learning_rate": 2.2104248504560643e-06, "loss": 0.4680197834968567, "step": 1840 }, { "epoch": 2.386699107866991, "eval_loss": 0.6960097551345825, "eval_runtime": 72.3931, "eval_samples_per_second": 71.72, "eval_steps_per_second": 8.965, "step": 1840 }, { "epoch": 2.3879967558799677, "grad_norm": 0.5678315758705139, "learning_rate": 2.2014659554663732e-06, "loss": 0.5050360560417175, "step": 1841 }, { "epoch": 2.389294403892944, "grad_norm": 0.5803557634353638, "learning_rate": 2.192523006504956e-06, "loss": 0.45793968439102173, "step": 1842 }, { "epoch": 2.3905920519059203, "grad_norm": 0.5823774933815002, "learning_rate": 2.183596021857891e-06, "loss": 0.4527888596057892, "step": 1843 }, { "epoch": 2.391889699918897, "grad_norm": 0.5696638226509094, "learning_rate": 2.1746850197785928e-06, "loss": 0.48019784688949585, "step": 1844 }, { "epoch": 2.3931873479318737, "grad_norm": 0.5827446579933167, "learning_rate": 2.16579001848781e-06, "loss": 0.5040067434310913, "step": 1845 }, { "epoch": 2.39448499594485, "grad_norm": 0.5871142148971558, "learning_rate": 2.156911036173568e-06, "loss": 0.47293055057525635, "step": 1846 }, { "epoch": 2.3957826439578263, "grad_norm": 0.558737576007843, "learning_rate": 2.1480480909911384e-06, "loss": 0.47470247745513916, "step": 1847 }, { "epoch": 2.397080291970803, "grad_norm": 0.5871817469596863, "learning_rate": 2.139201201062999e-06, "loss": 0.5189757347106934, "step": 1848 }, { "epoch": 2.3983779399837792, "grad_norm": 0.5788654088973999, "learning_rate": 2.130370384478807e-06, "loss": 0.49212944507598877, "step": 1849 }, { "epoch": 2.399675587996756, "grad_norm": 0.6011954545974731, "learning_rate": 2.1215556592953357e-06, "loss": 0.5247466564178467, "step": 1850 }, { "epoch": 2.4009732360097322, "grad_norm": 0.5478853583335876, "learning_rate": 2.11275704353648e-06, "loss": 0.4548777937889099, "step": 1851 }, { "epoch": 2.402270884022709, "grad_norm": 0.5758265852928162, "learning_rate": 2.10397455519317e-06, "loss": 0.5072181224822998, "step": 1852 }, { "epoch": 2.403568532035685, "grad_norm": 0.5652422308921814, "learning_rate": 2.095208212223383e-06, "loss": 0.524145245552063, "step": 1853 }, { "epoch": 2.404866180048662, "grad_norm": 0.5495245456695557, "learning_rate": 2.0864580325520623e-06, "loss": 0.47712084650993347, "step": 1854 }, { "epoch": 2.406163828061638, "grad_norm": 0.5936484932899475, "learning_rate": 2.077724034071116e-06, "loss": 0.5134607553482056, "step": 1855 }, { "epoch": 2.407461476074615, "grad_norm": 0.5818508863449097, "learning_rate": 2.069006234639357e-06, "loss": 0.46304088830947876, "step": 1856 }, { "epoch": 2.408759124087591, "grad_norm": 0.6046934723854065, "learning_rate": 2.060304652082481e-06, "loss": 0.5234611630439758, "step": 1857 }, { "epoch": 2.410056772100568, "grad_norm": 0.6409534215927124, "learning_rate": 2.051619304193022e-06, "loss": 0.5672463178634644, "step": 1858 }, { "epoch": 2.411354420113544, "grad_norm": 0.5750660300254822, "learning_rate": 2.0429502087303164e-06, "loss": 0.4885750710964203, "step": 1859 }, { "epoch": 2.412652068126521, "grad_norm": 0.6407312750816345, "learning_rate": 2.0342973834204715e-06, "loss": 0.4792509973049164, "step": 1860 }, { "epoch": 2.413949716139497, "grad_norm": 0.5465012192726135, "learning_rate": 2.0256608459563244e-06, "loss": 0.4969291388988495, "step": 1861 }, { "epoch": 2.4152473641524734, "grad_norm": 0.5713889002799988, "learning_rate": 2.017040613997412e-06, "loss": 0.48591309785842896, "step": 1862 }, { "epoch": 2.41654501216545, "grad_norm": 0.5666239857673645, "learning_rate": 2.008436705169917e-06, "loss": 0.44293344020843506, "step": 1863 }, { "epoch": 2.4178426601784264, "grad_norm": 0.5586820244789124, "learning_rate": 1.9998491370666684e-06, "loss": 0.45493143796920776, "step": 1864 }, { "epoch": 2.419140308191403, "grad_norm": 0.5613408088684082, "learning_rate": 1.991277927247056e-06, "loss": 0.49673575162887573, "step": 1865 }, { "epoch": 2.4204379562043794, "grad_norm": 0.5929522514343262, "learning_rate": 1.9827230932370467e-06, "loss": 0.5190791487693787, "step": 1866 }, { "epoch": 2.421735604217356, "grad_norm": 0.5624476075172424, "learning_rate": 1.9741846525291033e-06, "loss": 0.4601350724697113, "step": 1867 }, { "epoch": 2.4230332522303324, "grad_norm": 0.5859534740447998, "learning_rate": 1.9656626225821774e-06, "loss": 0.4977201819419861, "step": 1868 }, { "epoch": 2.424330900243309, "grad_norm": 0.5921490788459778, "learning_rate": 1.957157020821664e-06, "loss": 0.5139193534851074, "step": 1869 }, { "epoch": 2.4256285482562854, "grad_norm": 0.5974218845367432, "learning_rate": 1.9486678646393654e-06, "loss": 0.5071057081222534, "step": 1870 }, { "epoch": 2.426926196269262, "grad_norm": 0.5919764041900635, "learning_rate": 1.9401951713934574e-06, "loss": 0.49057209491729736, "step": 1871 }, { "epoch": 2.4282238442822384, "grad_norm": 0.5927568674087524, "learning_rate": 1.931738958408457e-06, "loss": 0.5092151165008545, "step": 1872 }, { "epoch": 2.429521492295215, "grad_norm": 0.5767861604690552, "learning_rate": 1.9232992429751694e-06, "loss": 0.4838736355304718, "step": 1873 }, { "epoch": 2.4308191403081914, "grad_norm": 0.5671409964561462, "learning_rate": 1.9148760423506884e-06, "loss": 0.4564237594604492, "step": 1874 }, { "epoch": 2.432116788321168, "grad_norm": 0.5710315108299255, "learning_rate": 1.9064693737583173e-06, "loss": 0.5324878096580505, "step": 1875 }, { "epoch": 2.4334144363341443, "grad_norm": 0.5930359959602356, "learning_rate": 1.8980792543875758e-06, "loss": 0.5325191617012024, "step": 1876 }, { "epoch": 2.4347120843471206, "grad_norm": 0.5865573287010193, "learning_rate": 1.8897057013941256e-06, "loss": 0.4776073694229126, "step": 1877 }, { "epoch": 2.4360097323600973, "grad_norm": 0.5611563920974731, "learning_rate": 1.8813487318997658e-06, "loss": 0.5060328841209412, "step": 1878 }, { "epoch": 2.437307380373074, "grad_norm": 0.5972756147384644, "learning_rate": 1.8730083629923857e-06, "loss": 0.4804626405239105, "step": 1879 }, { "epoch": 2.4386050283860503, "grad_norm": 0.5864998698234558, "learning_rate": 1.8646846117259277e-06, "loss": 0.49063995480537415, "step": 1880 }, { "epoch": 2.4399026763990266, "grad_norm": 0.5928548574447632, "learning_rate": 1.856377495120355e-06, "loss": 0.5291346311569214, "step": 1881 }, { "epoch": 2.4412003244120033, "grad_norm": 0.5551499724388123, "learning_rate": 1.8480870301616227e-06, "loss": 0.5005500912666321, "step": 1882 }, { "epoch": 2.4424979724249796, "grad_norm": 0.5950235724449158, "learning_rate": 1.839813233801626e-06, "loss": 0.5388972759246826, "step": 1883 }, { "epoch": 2.4437956204379563, "grad_norm": 0.5625823736190796, "learning_rate": 1.8315561229581925e-06, "loss": 0.49611175060272217, "step": 1884 }, { "epoch": 2.4450932684509326, "grad_norm": 0.5934765934944153, "learning_rate": 1.8233157145150183e-06, "loss": 0.5419527292251587, "step": 1885 }, { "epoch": 2.4463909164639093, "grad_norm": 0.5831634402275085, "learning_rate": 1.8150920253216542e-06, "loss": 0.5380743145942688, "step": 1886 }, { "epoch": 2.4476885644768855, "grad_norm": 0.5773998498916626, "learning_rate": 1.8068850721934639e-06, "loss": 0.5360612869262695, "step": 1887 }, { "epoch": 2.4489862124898623, "grad_norm": 0.5667778253555298, "learning_rate": 1.7986948719115872e-06, "loss": 0.4837849736213684, "step": 1888 }, { "epoch": 2.4502838605028385, "grad_norm": 0.5844002962112427, "learning_rate": 1.7905214412229177e-06, "loss": 0.5097035765647888, "step": 1889 }, { "epoch": 2.4515815085158152, "grad_norm": 0.571603536605835, "learning_rate": 1.7823647968400437e-06, "loss": 0.4986342787742615, "step": 1890 }, { "epoch": 2.4528791565287915, "grad_norm": 0.5814788341522217, "learning_rate": 1.7742249554412426e-06, "loss": 0.5466139316558838, "step": 1891 }, { "epoch": 2.4541768045417682, "grad_norm": 0.602313756942749, "learning_rate": 1.76610193367043e-06, "loss": 0.5179327726364136, "step": 1892 }, { "epoch": 2.4554744525547445, "grad_norm": 0.5728641748428345, "learning_rate": 1.757995748137129e-06, "loss": 0.4758206903934479, "step": 1893 }, { "epoch": 2.456772100567721, "grad_norm": 0.5834367871284485, "learning_rate": 1.7499064154164358e-06, "loss": 0.48661813139915466, "step": 1894 }, { "epoch": 2.4580697485806975, "grad_norm": 0.6014889478683472, "learning_rate": 1.7418339520489936e-06, "loss": 0.5374865531921387, "step": 1895 }, { "epoch": 2.4593673965936738, "grad_norm": 0.5678799152374268, "learning_rate": 1.7337783745409363e-06, "loss": 0.47202199697494507, "step": 1896 }, { "epoch": 2.4606650446066505, "grad_norm": 0.5770121216773987, "learning_rate": 1.7257396993638942e-06, "loss": 0.4832342565059662, "step": 1897 }, { "epoch": 2.4619626926196267, "grad_norm": 0.5571733713150024, "learning_rate": 1.717717942954914e-06, "loss": 0.5462654829025269, "step": 1898 }, { "epoch": 2.4632603406326035, "grad_norm": 0.5752882361412048, "learning_rate": 1.7097131217164598e-06, "loss": 0.5042911171913147, "step": 1899 }, { "epoch": 2.4645579886455797, "grad_norm": 0.5651837587356567, "learning_rate": 1.7017252520163652e-06, "loss": 0.5055532455444336, "step": 1900 }, { "epoch": 2.4658556366585564, "grad_norm": 0.5626855492591858, "learning_rate": 1.6937543501878018e-06, "loss": 0.5025293827056885, "step": 1901 }, { "epoch": 2.4671532846715327, "grad_norm": 0.5588532090187073, "learning_rate": 1.6858004325292466e-06, "loss": 0.5056187510490417, "step": 1902 }, { "epoch": 2.4684509326845094, "grad_norm": 0.6047312021255493, "learning_rate": 1.6778635153044486e-06, "loss": 0.5340344309806824, "step": 1903 }, { "epoch": 2.4697485806974857, "grad_norm": 0.5701199769973755, "learning_rate": 1.6699436147423942e-06, "loss": 0.47314453125, "step": 1904 }, { "epoch": 2.4710462287104624, "grad_norm": 0.5887412428855896, "learning_rate": 1.662040747037277e-06, "loss": 0.5806034207344055, "step": 1905 }, { "epoch": 2.4723438767234387, "grad_norm": 0.5856630206108093, "learning_rate": 1.654154928348455e-06, "loss": 0.542724609375, "step": 1906 }, { "epoch": 2.4736415247364154, "grad_norm": 0.5869402885437012, "learning_rate": 1.646286174800441e-06, "loss": 0.5193344354629517, "step": 1907 }, { "epoch": 2.4749391727493917, "grad_norm": 0.5962528586387634, "learning_rate": 1.6384345024828374e-06, "loss": 0.49579355120658875, "step": 1908 }, { "epoch": 2.4762368207623684, "grad_norm": 0.5732969641685486, "learning_rate": 1.6305999274503282e-06, "loss": 0.4678477346897125, "step": 1909 }, { "epoch": 2.4775344687753447, "grad_norm": 0.5851303339004517, "learning_rate": 1.6227824657226366e-06, "loss": 0.4453192949295044, "step": 1910 }, { "epoch": 2.478832116788321, "grad_norm": 0.5631725192070007, "learning_rate": 1.614982133284495e-06, "loss": 0.47414714097976685, "step": 1911 }, { "epoch": 2.4801297648012977, "grad_norm": 0.5917407274246216, "learning_rate": 1.6071989460856063e-06, "loss": 0.51967453956604, "step": 1912 }, { "epoch": 2.4814274128142744, "grad_norm": 0.5762115716934204, "learning_rate": 1.5994329200406223e-06, "loss": 0.47164011001586914, "step": 1913 }, { "epoch": 2.4827250608272506, "grad_norm": 0.5615324974060059, "learning_rate": 1.5916840710290937e-06, "loss": 0.5057311058044434, "step": 1914 }, { "epoch": 2.484022708840227, "grad_norm": 0.5691003203392029, "learning_rate": 1.5839524148954622e-06, "loss": 0.46432405710220337, "step": 1915 }, { "epoch": 2.4853203568532036, "grad_norm": 0.5725374221801758, "learning_rate": 1.5762379674490048e-06, "loss": 0.46116703748703003, "step": 1916 }, { "epoch": 2.48661800486618, "grad_norm": 0.6240981221199036, "learning_rate": 1.5685407444638146e-06, "loss": 0.5304262638092041, "step": 1917 }, { "epoch": 2.4879156528791566, "grad_norm": 0.5866638422012329, "learning_rate": 1.5608607616787663e-06, "loss": 0.46918168663978577, "step": 1918 }, { "epoch": 2.489213300892133, "grad_norm": 0.6103445291519165, "learning_rate": 1.553198034797474e-06, "loss": 0.5785281658172607, "step": 1919 }, { "epoch": 2.4905109489051096, "grad_norm": 0.5748964548110962, "learning_rate": 1.5455525794882841e-06, "loss": 0.47489288449287415, "step": 1920 }, { "epoch": 2.491808596918086, "grad_norm": 0.5849605202674866, "learning_rate": 1.5379244113842106e-06, "loss": 0.5081884860992432, "step": 1921 }, { "epoch": 2.4931062449310626, "grad_norm": 0.5827904343605042, "learning_rate": 1.53031354608293e-06, "loss": 0.5528438091278076, "step": 1922 }, { "epoch": 2.494403892944039, "grad_norm": 0.5817930102348328, "learning_rate": 1.5227199991467335e-06, "loss": 0.5150377154350281, "step": 1923 }, { "epoch": 2.4957015409570156, "grad_norm": 0.5756059288978577, "learning_rate": 1.5151437861025032e-06, "loss": 0.4410705268383026, "step": 1924 }, { "epoch": 2.496999188969992, "grad_norm": 0.5646528005599976, "learning_rate": 1.5075849224416783e-06, "loss": 0.5073448419570923, "step": 1925 }, { "epoch": 2.4982968369829686, "grad_norm": 0.5877253413200378, "learning_rate": 1.5000434236202211e-06, "loss": 0.5140043497085571, "step": 1926 }, { "epoch": 2.499594484995945, "grad_norm": 0.5703092813491821, "learning_rate": 1.4925193050585873e-06, "loss": 0.5106258392333984, "step": 1927 }, { "epoch": 2.5008921330089215, "grad_norm": 0.5841608643531799, "learning_rate": 1.4850125821416983e-06, "loss": 0.49111461639404297, "step": 1928 }, { "epoch": 2.502189781021898, "grad_norm": 0.5806940197944641, "learning_rate": 1.4775232702188947e-06, "loss": 0.477137953042984, "step": 1929 }, { "epoch": 2.503487429034874, "grad_norm": 0.5762841105461121, "learning_rate": 1.4700513846039332e-06, "loss": 0.4592735469341278, "step": 1930 }, { "epoch": 2.504785077047851, "grad_norm": 0.5808306932449341, "learning_rate": 1.4625969405749218e-06, "loss": 0.5200600624084473, "step": 1931 }, { "epoch": 2.5060827250608275, "grad_norm": 0.5846347212791443, "learning_rate": 1.4551599533743155e-06, "loss": 0.5185432434082031, "step": 1932 }, { "epoch": 2.507380373073804, "grad_norm": 0.6160796284675598, "learning_rate": 1.4477404382088689e-06, "loss": 0.5391091108322144, "step": 1933 }, { "epoch": 2.50867802108678, "grad_norm": 0.5582398176193237, "learning_rate": 1.4403384102496132e-06, "loss": 0.4704029858112335, "step": 1934 }, { "epoch": 2.509975669099757, "grad_norm": 0.5653654932975769, "learning_rate": 1.4329538846318225e-06, "loss": 0.524503231048584, "step": 1935 }, { "epoch": 2.511273317112733, "grad_norm": 0.5886475443840027, "learning_rate": 1.4255868764549852e-06, "loss": 0.4819219708442688, "step": 1936 }, { "epoch": 2.5125709651257098, "grad_norm": 0.5662146806716919, "learning_rate": 1.4182374007827605e-06, "loss": 0.5265961289405823, "step": 1937 }, { "epoch": 2.513868613138686, "grad_norm": 0.5975550413131714, "learning_rate": 1.410905472642975e-06, "loss": 0.5036963224411011, "step": 1938 }, { "epoch": 2.5151662611516628, "grad_norm": 0.5727776885032654, "learning_rate": 1.4035911070275576e-06, "loss": 0.4989280104637146, "step": 1939 }, { "epoch": 2.516463909164639, "grad_norm": 0.6097977161407471, "learning_rate": 1.3962943188925438e-06, "loss": 0.535049557685852, "step": 1940 }, { "epoch": 2.5177615571776153, "grad_norm": 0.5695138573646545, "learning_rate": 1.3890151231580117e-06, "loss": 0.5146960020065308, "step": 1941 }, { "epoch": 2.519059205190592, "grad_norm": 0.5890569686889648, "learning_rate": 1.3817535347080768e-06, "loss": 0.5350029468536377, "step": 1942 }, { "epoch": 2.5203568532035687, "grad_norm": 0.5916978120803833, "learning_rate": 1.3745095683908482e-06, "loss": 0.5213718414306641, "step": 1943 }, { "epoch": 2.521654501216545, "grad_norm": 0.5767956972122192, "learning_rate": 1.3672832390184042e-06, "loss": 0.506149411201477, "step": 1944 }, { "epoch": 2.5229521492295213, "grad_norm": 0.5916143655776978, "learning_rate": 1.3600745613667598e-06, "loss": 0.5128974318504333, "step": 1945 }, { "epoch": 2.524249797242498, "grad_norm": 0.5634325742721558, "learning_rate": 1.3528835501758365e-06, "loss": 0.5004685521125793, "step": 1946 }, { "epoch": 2.5255474452554747, "grad_norm": 0.5783470869064331, "learning_rate": 1.345710220149431e-06, "loss": 0.5014833807945251, "step": 1947 }, { "epoch": 2.526845093268451, "grad_norm": 0.5838568210601807, "learning_rate": 1.3385545859551886e-06, "loss": 0.540973424911499, "step": 1948 }, { "epoch": 2.5281427412814272, "grad_norm": 0.5862357020378113, "learning_rate": 1.3314166622245717e-06, "loss": 0.5124210715293884, "step": 1949 }, { "epoch": 2.529440389294404, "grad_norm": 0.5789701342582703, "learning_rate": 1.324296463552821e-06, "loss": 0.4796435236930847, "step": 1950 }, { "epoch": 2.5307380373073802, "grad_norm": 0.5998684167861938, "learning_rate": 1.3171940044989495e-06, "loss": 0.5745923519134521, "step": 1951 }, { "epoch": 2.532035685320357, "grad_norm": 0.5753020644187927, "learning_rate": 1.3101092995856802e-06, "loss": 0.4947076439857483, "step": 1952 }, { "epoch": 2.533333333333333, "grad_norm": 0.5820896029472351, "learning_rate": 1.3030423632994493e-06, "loss": 0.4961175322532654, "step": 1953 }, { "epoch": 2.53463098134631, "grad_norm": 0.5677821040153503, "learning_rate": 1.2959932100903472e-06, "loss": 0.49631717801094055, "step": 1954 }, { "epoch": 2.535928629359286, "grad_norm": 0.5767098665237427, "learning_rate": 1.2889618543721094e-06, "loss": 0.5189783573150635, "step": 1955 }, { "epoch": 2.537226277372263, "grad_norm": 0.5949708819389343, "learning_rate": 1.2819483105220798e-06, "loss": 0.5087240934371948, "step": 1956 }, { "epoch": 2.538523925385239, "grad_norm": 0.582380473613739, "learning_rate": 1.274952592881179e-06, "loss": 0.48820894956588745, "step": 1957 }, { "epoch": 2.539821573398216, "grad_norm": 0.578072726726532, "learning_rate": 1.2679747157538801e-06, "loss": 0.5089854598045349, "step": 1958 }, { "epoch": 2.541119221411192, "grad_norm": 0.5774610042572021, "learning_rate": 1.2610146934081768e-06, "loss": 0.49252915382385254, "step": 1959 }, { "epoch": 2.5424168694241684, "grad_norm": 0.58970707654953, "learning_rate": 1.2540725400755472e-06, "loss": 0.5605252981185913, "step": 1960 }, { "epoch": 2.543714517437145, "grad_norm": 0.566736102104187, "learning_rate": 1.2471482699509463e-06, "loss": 0.5428552627563477, "step": 1961 }, { "epoch": 2.545012165450122, "grad_norm": 0.5720308423042297, "learning_rate": 1.2402418971927487e-06, "loss": 0.5265427827835083, "step": 1962 }, { "epoch": 2.546309813463098, "grad_norm": 0.5800856351852417, "learning_rate": 1.2333534359227383e-06, "loss": 0.5138852596282959, "step": 1963 }, { "epoch": 2.5476074614760744, "grad_norm": 0.5780075788497925, "learning_rate": 1.226482900226077e-06, "loss": 0.48286569118499756, "step": 1964 }, { "epoch": 2.548905109489051, "grad_norm": 0.5666484236717224, "learning_rate": 1.2196303041512714e-06, "loss": 0.5184611082077026, "step": 1965 }, { "epoch": 2.550202757502028, "grad_norm": 0.5936673879623413, "learning_rate": 1.2127956617101445e-06, "loss": 0.5331882238388062, "step": 1966 }, { "epoch": 2.551500405515004, "grad_norm": 0.5658625364303589, "learning_rate": 1.2059789868778116e-06, "loss": 0.5007424354553223, "step": 1967 }, { "epoch": 2.5527980535279804, "grad_norm": 0.5596531629562378, "learning_rate": 1.1991802935926455e-06, "loss": 0.4455481767654419, "step": 1968 }, { "epoch": 2.554095701540957, "grad_norm": 0.5873602628707886, "learning_rate": 1.1923995957562585e-06, "loss": 0.4800918698310852, "step": 1969 }, { "epoch": 2.5553933495539334, "grad_norm": 0.5768440961837769, "learning_rate": 1.1856369072334517e-06, "loss": 0.5240867733955383, "step": 1970 }, { "epoch": 2.55669099756691, "grad_norm": 0.5888426899909973, "learning_rate": 1.178892241852222e-06, "loss": 0.4650096893310547, "step": 1971 }, { "epoch": 2.5579886455798864, "grad_norm": 0.5748341083526611, "learning_rate": 1.1721656134036962e-06, "loss": 0.5009864568710327, "step": 1972 }, { "epoch": 2.559286293592863, "grad_norm": 0.5902095437049866, "learning_rate": 1.165457035642128e-06, "loss": 0.5109707117080688, "step": 1973 }, { "epoch": 2.5605839416058394, "grad_norm": 0.6467815041542053, "learning_rate": 1.1587665222848643e-06, "loss": 0.4991541802883148, "step": 1974 }, { "epoch": 2.5618815896188156, "grad_norm": 0.5866140127182007, "learning_rate": 1.1520940870123065e-06, "loss": 0.48706984519958496, "step": 1975 }, { "epoch": 2.5631792376317923, "grad_norm": 0.5842229127883911, "learning_rate": 1.1454397434679022e-06, "loss": 0.5219037532806396, "step": 1976 }, { "epoch": 2.564476885644769, "grad_norm": 0.5731110572814941, "learning_rate": 1.1388035052580936e-06, "loss": 0.5115249156951904, "step": 1977 }, { "epoch": 2.5657745336577453, "grad_norm": 0.5784810185432434, "learning_rate": 1.1321853859523113e-06, "loss": 0.49307000637054443, "step": 1978 }, { "epoch": 2.5670721816707216, "grad_norm": 0.5523423552513123, "learning_rate": 1.1255853990829323e-06, "loss": 0.4534381031990051, "step": 1979 }, { "epoch": 2.5683698296836983, "grad_norm": 0.576626718044281, "learning_rate": 1.119003558145262e-06, "loss": 0.5025165677070618, "step": 1980 }, { "epoch": 2.569667477696675, "grad_norm": 0.6068827509880066, "learning_rate": 1.1124398765974976e-06, "loss": 0.5154992341995239, "step": 1981 }, { "epoch": 2.5709651257096513, "grad_norm": 0.5544149875640869, "learning_rate": 1.1058943678607082e-06, "loss": 0.4641039967536926, "step": 1982 }, { "epoch": 2.5722627737226276, "grad_norm": 0.591013491153717, "learning_rate": 1.0993670453187965e-06, "loss": 0.5354744791984558, "step": 1983 }, { "epoch": 2.5735604217356043, "grad_norm": 0.5729239583015442, "learning_rate": 1.0928579223184943e-06, "loss": 0.4895523190498352, "step": 1984 }, { "epoch": 2.5748580697485806, "grad_norm": 0.5629091858863831, "learning_rate": 1.0863670121693037e-06, "loss": 0.4998272955417633, "step": 1985 }, { "epoch": 2.5761557177615573, "grad_norm": 0.5692305564880371, "learning_rate": 1.0798943281434958e-06, "loss": 0.5316153764724731, "step": 1986 }, { "epoch": 2.5774533657745335, "grad_norm": 0.5875282287597656, "learning_rate": 1.0734398834760695e-06, "loss": 0.47188982367515564, "step": 1987 }, { "epoch": 2.5787510137875103, "grad_norm": 0.613525927066803, "learning_rate": 1.067003691364733e-06, "loss": 0.5325276851654053, "step": 1988 }, { "epoch": 2.5800486618004865, "grad_norm": 0.5971388816833496, "learning_rate": 1.060585764969867e-06, "loss": 0.5428590774536133, "step": 1989 }, { "epoch": 2.5813463098134632, "grad_norm": 0.5674665570259094, "learning_rate": 1.0541861174145097e-06, "loss": 0.47022098302841187, "step": 1990 }, { "epoch": 2.5826439578264395, "grad_norm": 0.5619399547576904, "learning_rate": 1.047804761784319e-06, "loss": 0.48155295848846436, "step": 1991 }, { "epoch": 2.5839416058394162, "grad_norm": 0.5751737952232361, "learning_rate": 1.0414417111275533e-06, "loss": 0.5390469431877136, "step": 1992 }, { "epoch": 2.5852392538523925, "grad_norm": 0.5782447457313538, "learning_rate": 1.0350969784550368e-06, "loss": 0.5048004984855652, "step": 1993 }, { "epoch": 2.5865369018653688, "grad_norm": 0.5656158328056335, "learning_rate": 1.028770576740148e-06, "loss": 0.5237029194831848, "step": 1994 }, { "epoch": 2.5878345498783455, "grad_norm": 0.568681538105011, "learning_rate": 1.022462518918772e-06, "loss": 0.4539422392845154, "step": 1995 }, { "epoch": 2.589132197891322, "grad_norm": 0.560100793838501, "learning_rate": 1.0161728178892928e-06, "loss": 0.45414865016937256, "step": 1996 }, { "epoch": 2.5904298459042985, "grad_norm": 0.5668950080871582, "learning_rate": 1.0099014865125557e-06, "loss": 0.4774186611175537, "step": 1997 }, { "epoch": 2.5917274939172747, "grad_norm": 0.606434166431427, "learning_rate": 1.0036485376118477e-06, "loss": 0.565065324306488, "step": 1998 }, { "epoch": 2.5930251419302515, "grad_norm": 0.5841239094734192, "learning_rate": 9.974139839728658e-07, "loss": 0.5483173131942749, "step": 1999 }, { "epoch": 2.5943227899432277, "grad_norm": 0.591903805732727, "learning_rate": 9.91197838343696e-07, "loss": 0.539207398891449, "step": 2000 }, { "epoch": 2.5956204379562045, "grad_norm": 0.5807414650917053, "learning_rate": 9.850001134347765e-07, "loss": 0.5179691314697266, "step": 2001 }, { "epoch": 2.5969180859691807, "grad_norm": 0.5769233107566833, "learning_rate": 9.788208219188932e-07, "loss": 0.4748839735984802, "step": 2002 }, { "epoch": 2.5982157339821574, "grad_norm": 0.5766239762306213, "learning_rate": 9.726599764311318e-07, "loss": 0.48025619983673096, "step": 2003 }, { "epoch": 2.5995133819951337, "grad_norm": 0.5754262208938599, "learning_rate": 9.665175895688594e-07, "loss": 0.47812211513519287, "step": 2004 }, { "epoch": 2.6008110300081104, "grad_norm": 0.5699096918106079, "learning_rate": 9.603936738917063e-07, "loss": 0.5337727069854736, "step": 2005 }, { "epoch": 2.6021086780210867, "grad_norm": 0.6039567589759827, "learning_rate": 9.54288241921525e-07, "loss": 0.5216813087463379, "step": 2006 }, { "epoch": 2.6034063260340634, "grad_norm": 0.5594240427017212, "learning_rate": 9.482013061423833e-07, "loss": 0.5251287221908569, "step": 2007 }, { "epoch": 2.6047039740470397, "grad_norm": 0.5856126546859741, "learning_rate": 9.421328790005213e-07, "loss": 0.5040426850318909, "step": 2008 }, { "epoch": 2.606001622060016, "grad_norm": 0.5794676542282104, "learning_rate": 9.360829729043375e-07, "loss": 0.5068378448486328, "step": 2009 }, { "epoch": 2.6072992700729927, "grad_norm": 0.5879704356193542, "learning_rate": 9.300516002243587e-07, "loss": 0.5116778016090393, "step": 2010 }, { "epoch": 2.6085969180859694, "grad_norm": 0.5978105068206787, "learning_rate": 9.240387732932155e-07, "loss": 0.525846004486084, "step": 2011 }, { "epoch": 2.6098945660989457, "grad_norm": 0.5788280367851257, "learning_rate": 9.180445044056164e-07, "loss": 0.5172775983810425, "step": 2012 }, { "epoch": 2.611192214111922, "grad_norm": 0.5901548862457275, "learning_rate": 9.120688058183269e-07, "loss": 0.5301088094711304, "step": 2013 }, { "epoch": 2.6124898621248986, "grad_norm": 0.5967061519622803, "learning_rate": 9.061116897501321e-07, "loss": 0.5318504571914673, "step": 2014 }, { "epoch": 2.6137875101378754, "grad_norm": 0.5555222034454346, "learning_rate": 9.001731683818338e-07, "loss": 0.5011588335037231, "step": 2015 }, { "epoch": 2.6150851581508516, "grad_norm": 0.613298237323761, "learning_rate": 8.942532538561988e-07, "loss": 0.5700482130050659, "step": 2016 }, { "epoch": 2.616382806163828, "grad_norm": 0.599183201789856, "learning_rate": 8.883519582779598e-07, "loss": 0.5524272322654724, "step": 2017 }, { "epoch": 2.6176804541768046, "grad_norm": 0.6120027899742126, "learning_rate": 8.82469293713768e-07, "loss": 0.47205424308776855, "step": 2018 }, { "epoch": 2.618978102189781, "grad_norm": 0.5907730460166931, "learning_rate": 8.766052721921858e-07, "loss": 0.507009744644165, "step": 2019 }, { "epoch": 2.6202757502027576, "grad_norm": 0.5603318810462952, "learning_rate": 8.70759905703652e-07, "loss": 0.48432788252830505, "step": 2020 }, { "epoch": 2.621573398215734, "grad_norm": 0.5962936282157898, "learning_rate": 8.649332062004622e-07, "loss": 0.4898841381072998, "step": 2021 }, { "epoch": 2.6228710462287106, "grad_norm": 0.7598771452903748, "learning_rate": 8.59125185596742e-07, "loss": 0.5321274995803833, "step": 2022 }, { "epoch": 2.624168694241687, "grad_norm": 0.5821399092674255, "learning_rate": 8.533358557684246e-07, "loss": 0.512812614440918, "step": 2023 }, { "epoch": 2.6254663422546636, "grad_norm": 0.5900049805641174, "learning_rate": 8.475652285532199e-07, "loss": 0.5129188299179077, "step": 2024 }, { "epoch": 2.62676399026764, "grad_norm": 0.5779396295547485, "learning_rate": 8.41813315750607e-07, "loss": 0.4839695394039154, "step": 2025 }, { "epoch": 2.6280616382806166, "grad_norm": 0.581840455532074, "learning_rate": 8.360801291217835e-07, "loss": 0.4942781925201416, "step": 2026 }, { "epoch": 2.629359286293593, "grad_norm": 0.5503793954849243, "learning_rate": 8.303656803896731e-07, "loss": 0.4754694700241089, "step": 2027 }, { "epoch": 2.630656934306569, "grad_norm": 0.5595881342887878, "learning_rate": 8.246699812388714e-07, "loss": 0.48087698221206665, "step": 2028 }, { "epoch": 2.631954582319546, "grad_norm": 0.5697108507156372, "learning_rate": 8.189930433156424e-07, "loss": 0.5032870173454285, "step": 2029 }, { "epoch": 2.6332522303325225, "grad_norm": 0.5761867761611938, "learning_rate": 8.133348782278916e-07, "loss": 0.5013032555580139, "step": 2030 }, { "epoch": 2.634549878345499, "grad_norm": 0.6058787703514099, "learning_rate": 8.07695497545129e-07, "loss": 0.44857025146484375, "step": 2031 }, { "epoch": 2.635847526358475, "grad_norm": 0.5961512327194214, "learning_rate": 8.020749127984629e-07, "loss": 0.5228594541549683, "step": 2032 }, { "epoch": 2.637145174371452, "grad_norm": 0.5766192078590393, "learning_rate": 7.964731354805677e-07, "loss": 0.4745315611362457, "step": 2033 }, { "epoch": 2.638442822384428, "grad_norm": 0.5896121859550476, "learning_rate": 7.908901770456579e-07, "loss": 0.519614577293396, "step": 2034 }, { "epoch": 2.639740470397405, "grad_norm": 0.5732361078262329, "learning_rate": 7.853260489094727e-07, "loss": 0.48370620608329773, "step": 2035 }, { "epoch": 2.641038118410381, "grad_norm": 0.5929004549980164, "learning_rate": 7.79780762449246e-07, "loss": 0.5153477191925049, "step": 2036 }, { "epoch": 2.6423357664233578, "grad_norm": 0.587020754814148, "learning_rate": 7.742543290036797e-07, "loss": 0.4829615652561188, "step": 2037 }, { "epoch": 2.643633414436334, "grad_norm": 0.5629860758781433, "learning_rate": 7.687467598729403e-07, "loss": 0.5223960876464844, "step": 2038 }, { "epoch": 2.6449310624493108, "grad_norm": 0.5553507208824158, "learning_rate": 7.63258066318604e-07, "loss": 0.4827447235584259, "step": 2039 }, { "epoch": 2.646228710462287, "grad_norm": 0.5940564274787903, "learning_rate": 7.577882595636665e-07, "loss": 0.538356602191925, "step": 2040 }, { "epoch": 2.6475263584752637, "grad_norm": 0.5712041258811951, "learning_rate": 7.523373507924947e-07, "loss": 0.48258891701698303, "step": 2041 }, { "epoch": 2.64882400648824, "grad_norm": 0.5664177536964417, "learning_rate": 7.469053511508184e-07, "loss": 0.4672595262527466, "step": 2042 }, { "epoch": 2.6501216545012163, "grad_norm": 0.6014147996902466, "learning_rate": 7.414922717457018e-07, "loss": 0.5549574494361877, "step": 2043 }, { "epoch": 2.651419302514193, "grad_norm": 0.588028073310852, "learning_rate": 7.360981236455222e-07, "loss": 0.5366802215576172, "step": 2044 }, { "epoch": 2.6527169505271697, "grad_norm": 0.5555592179298401, "learning_rate": 7.307229178799469e-07, "loss": 0.49787813425064087, "step": 2045 }, { "epoch": 2.654014598540146, "grad_norm": 0.5918477177619934, "learning_rate": 7.253666654399128e-07, "loss": 0.5271812081336975, "step": 2046 }, { "epoch": 2.6553122465531223, "grad_norm": 0.6544379591941833, "learning_rate": 7.200293772775968e-07, "loss": 0.5332372784614563, "step": 2047 }, { "epoch": 2.656609894566099, "grad_norm": 0.578555166721344, "learning_rate": 7.14711064306407e-07, "loss": 0.496245801448822, "step": 2048 }, { "epoch": 2.6579075425790757, "grad_norm": 0.5929746627807617, "learning_rate": 7.094117374009446e-07, "loss": 0.5187441110610962, "step": 2049 }, { "epoch": 2.659205190592052, "grad_norm": 0.5854722261428833, "learning_rate": 7.041314073969918e-07, "loss": 0.4945400655269623, "step": 2050 }, { "epoch": 2.6605028386050282, "grad_norm": 0.6011053323745728, "learning_rate": 6.988700850914876e-07, "loss": 0.48466387391090393, "step": 2051 }, { "epoch": 2.661800486618005, "grad_norm": 0.5774915814399719, "learning_rate": 6.93627781242504e-07, "loss": 0.5133316516876221, "step": 2052 }, { "epoch": 2.663098134630981, "grad_norm": 0.5776026248931885, "learning_rate": 6.884045065692257e-07, "loss": 0.5115536451339722, "step": 2053 }, { "epoch": 2.664395782643958, "grad_norm": 0.6011329293251038, "learning_rate": 6.83200271751927e-07, "loss": 0.5355618000030518, "step": 2054 }, { "epoch": 2.665693430656934, "grad_norm": 0.5973834991455078, "learning_rate": 6.780150874319524e-07, "loss": 0.5230112075805664, "step": 2055 }, { "epoch": 2.666991078669911, "grad_norm": 0.5917934775352478, "learning_rate": 6.72848964211692e-07, "loss": 0.5399461388587952, "step": 2056 }, { "epoch": 2.668288726682887, "grad_norm": 0.5736814141273499, "learning_rate": 6.677019126545548e-07, "loss": 0.49193501472473145, "step": 2057 }, { "epoch": 2.669586374695864, "grad_norm": 0.5814056396484375, "learning_rate": 6.625739432849643e-07, "loss": 0.5203338861465454, "step": 2058 }, { "epoch": 2.67088402270884, "grad_norm": 0.601714015007019, "learning_rate": 6.574650665883197e-07, "loss": 0.5449438095092773, "step": 2059 }, { "epoch": 2.672181670721817, "grad_norm": 0.5884926319122314, "learning_rate": 6.523752930109761e-07, "loss": 0.5138452053070068, "step": 2060 }, { "epoch": 2.673479318734793, "grad_norm": 0.5702131390571594, "learning_rate": 6.473046329602384e-07, "loss": 0.4545958638191223, "step": 2061 }, { "epoch": 2.6747769667477694, "grad_norm": 0.5839261412620544, "learning_rate": 6.422530968043173e-07, "loss": 0.5412476658821106, "step": 2062 }, { "epoch": 2.676074614760746, "grad_norm": 0.5880113244056702, "learning_rate": 6.372206948723292e-07, "loss": 0.5263261795043945, "step": 2063 }, { "epoch": 2.677372262773723, "grad_norm": 0.5763228535652161, "learning_rate": 6.322074374542608e-07, "loss": 0.5082492828369141, "step": 2064 }, { "epoch": 2.678669910786699, "grad_norm": 0.5878806710243225, "learning_rate": 6.272133348009546e-07, "loss": 0.5076773166656494, "step": 2065 }, { "epoch": 2.6799675587996754, "grad_norm": 0.5525650978088379, "learning_rate": 6.222383971240875e-07, "loss": 0.48154234886169434, "step": 2066 }, { "epoch": 2.681265206812652, "grad_norm": 0.6016013622283936, "learning_rate": 6.17282634596148e-07, "loss": 0.503459095954895, "step": 2067 }, { "epoch": 2.6825628548256284, "grad_norm": 0.6026131510734558, "learning_rate": 6.123460573504147e-07, "loss": 0.4821071922779083, "step": 2068 }, { "epoch": 2.683860502838605, "grad_norm": 0.5926850438117981, "learning_rate": 6.074286754809411e-07, "loss": 0.5161428451538086, "step": 2069 }, { "epoch": 2.6851581508515814, "grad_norm": 0.5853096842765808, "learning_rate": 6.025304990425241e-07, "loss": 0.5262787342071533, "step": 2070 }, { "epoch": 2.6851581508515814, "eval_loss": 0.6954009532928467, "eval_runtime": 72.3609, "eval_samples_per_second": 71.751, "eval_steps_per_second": 8.969, "step": 2070 }, { "epoch": 2.686455798864558, "grad_norm": 0.5976012945175171, "learning_rate": 5.976515380507008e-07, "loss": 0.5311732888221741, "step": 2071 }, { "epoch": 2.6877534468775344, "grad_norm": 0.5981724262237549, "learning_rate": 5.927918024817059e-07, "loss": 0.5703781247138977, "step": 2072 }, { "epoch": 2.689051094890511, "grad_norm": 0.5645772814750671, "learning_rate": 5.879513022724714e-07, "loss": 0.4812767505645752, "step": 2073 }, { "epoch": 2.6903487429034874, "grad_norm": 0.5886021852493286, "learning_rate": 5.831300473205948e-07, "loss": 0.5149608254432678, "step": 2074 }, { "epoch": 2.691646390916464, "grad_norm": 0.5895439982414246, "learning_rate": 5.783280474843222e-07, "loss": 0.5148745179176331, "step": 2075 }, { "epoch": 2.6929440389294403, "grad_norm": 0.571723461151123, "learning_rate": 5.735453125825275e-07, "loss": 0.5035296082496643, "step": 2076 }, { "epoch": 2.6942416869424166, "grad_norm": 0.6077845096588135, "learning_rate": 5.687818523946931e-07, "loss": 0.5260845422744751, "step": 2077 }, { "epoch": 2.6955393349553933, "grad_norm": 0.5872023105621338, "learning_rate": 5.640376766608902e-07, "loss": 0.49081629514694214, "step": 2078 }, { "epoch": 2.69683698296837, "grad_norm": 0.5637922286987305, "learning_rate": 5.593127950817579e-07, "loss": 0.49831029772758484, "step": 2079 }, { "epoch": 2.6981346309813463, "grad_norm": 0.588504433631897, "learning_rate": 5.546072173184791e-07, "loss": 0.5403261184692383, "step": 2080 }, { "epoch": 2.6994322789943226, "grad_norm": 0.5554431080818176, "learning_rate": 5.499209529927751e-07, "loss": 0.4801977872848511, "step": 2081 }, { "epoch": 2.7007299270072993, "grad_norm": 0.594923198223114, "learning_rate": 5.452540116868654e-07, "loss": 0.552370011806488, "step": 2082 }, { "epoch": 2.702027575020276, "grad_norm": 0.5900223851203918, "learning_rate": 5.406064029434666e-07, "loss": 0.5598849058151245, "step": 2083 }, { "epoch": 2.7033252230332523, "grad_norm": 0.5767436027526855, "learning_rate": 5.359781362657623e-07, "loss": 0.5048878192901611, "step": 2084 }, { "epoch": 2.7046228710462286, "grad_norm": 0.551128089427948, "learning_rate": 5.313692211173838e-07, "loss": 0.5155936479568481, "step": 2085 }, { "epoch": 2.7059205190592053, "grad_norm": 0.5880531072616577, "learning_rate": 5.26779666922399e-07, "loss": 0.5444161295890808, "step": 2086 }, { "epoch": 2.7072181670721815, "grad_norm": 0.5545855164527893, "learning_rate": 5.222094830652835e-07, "loss": 0.4949781894683838, "step": 2087 }, { "epoch": 2.7085158150851583, "grad_norm": 0.5254430174827576, "learning_rate": 5.176586788909066e-07, "loss": 0.48143208026885986, "step": 2088 }, { "epoch": 2.7098134630981345, "grad_norm": 0.5895472764968872, "learning_rate": 5.131272637045104e-07, "loss": 0.5467052459716797, "step": 2089 }, { "epoch": 2.7111111111111112, "grad_norm": 0.5603579878807068, "learning_rate": 5.086152467716932e-07, "loss": 0.48797622323036194, "step": 2090 }, { "epoch": 2.7124087591240875, "grad_norm": 0.5788029432296753, "learning_rate": 5.041226373183861e-07, "loss": 0.5119057297706604, "step": 2091 }, { "epoch": 2.7137064071370642, "grad_norm": 0.5590220093727112, "learning_rate": 4.996494445308409e-07, "loss": 0.46394574642181396, "step": 2092 }, { "epoch": 2.7150040551500405, "grad_norm": 0.5895569920539856, "learning_rate": 4.951956775556e-07, "loss": 0.4952976703643799, "step": 2093 }, { "epoch": 2.7163017031630172, "grad_norm": 0.5719903707504272, "learning_rate": 4.907613454994964e-07, "loss": 0.5015777349472046, "step": 2094 }, { "epoch": 2.7175993511759935, "grad_norm": 0.5849481821060181, "learning_rate": 4.863464574296106e-07, "loss": 0.5244485139846802, "step": 2095 }, { "epoch": 2.7188969991889698, "grad_norm": 0.5956225991249084, "learning_rate": 4.819510223732738e-07, "loss": 0.5492672324180603, "step": 2096 }, { "epoch": 2.7201946472019465, "grad_norm": 0.5836542844772339, "learning_rate": 4.775750493180386e-07, "loss": 0.48292914032936096, "step": 2097 }, { "epoch": 2.721492295214923, "grad_norm": 0.5966354012489319, "learning_rate": 4.7321854721166127e-07, "loss": 0.5208597183227539, "step": 2098 }, { "epoch": 2.7227899432278995, "grad_norm": 0.536894679069519, "learning_rate": 4.6888152496208593e-07, "loss": 0.4349246621131897, "step": 2099 }, { "epoch": 2.7240875912408757, "grad_norm": 0.589508593082428, "learning_rate": 4.645639914374278e-07, "loss": 0.5353684425354004, "step": 2100 }, { "epoch": 2.7253852392538525, "grad_norm": 0.5571612119674683, "learning_rate": 4.602659554659461e-07, "loss": 0.4614424705505371, "step": 2101 }, { "epoch": 2.7266828872668287, "grad_norm": 0.6046862602233887, "learning_rate": 4.559874258360408e-07, "loss": 0.5189507603645325, "step": 2102 }, { "epoch": 2.7279805352798054, "grad_norm": 0.5680896639823914, "learning_rate": 4.5172841129621726e-07, "loss": 0.5085829496383667, "step": 2103 }, { "epoch": 2.7292781832927817, "grad_norm": 0.5765218138694763, "learning_rate": 4.474889205550881e-07, "loss": 0.5140299797058105, "step": 2104 }, { "epoch": 2.7305758313057584, "grad_norm": 0.587651252746582, "learning_rate": 4.4326896228133354e-07, "loss": 0.4957928955554962, "step": 2105 }, { "epoch": 2.7318734793187347, "grad_norm": 0.5494794249534607, "learning_rate": 4.3906854510370245e-07, "loss": 0.5062738060951233, "step": 2106 }, { "epoch": 2.7331711273317114, "grad_norm": 0.5937455296516418, "learning_rate": 4.348876776109856e-07, "loss": 0.5094043016433716, "step": 2107 }, { "epoch": 2.7344687753446877, "grad_norm": 0.5641949772834778, "learning_rate": 4.307263683519969e-07, "loss": 0.48215553164482117, "step": 2108 }, { "epoch": 2.7357664233576644, "grad_norm": 0.5819230079650879, "learning_rate": 4.2658462583556216e-07, "loss": 0.5357835292816162, "step": 2109 }, { "epoch": 2.7370640713706407, "grad_norm": 0.5532712936401367, "learning_rate": 4.2246245853049706e-07, "loss": 0.47937077283859253, "step": 2110 }, { "epoch": 2.738361719383617, "grad_norm": 0.6110063195228577, "learning_rate": 4.1835987486558595e-07, "loss": 0.4744276702404022, "step": 2111 }, { "epoch": 2.7396593673965937, "grad_norm": 0.5573598146438599, "learning_rate": 4.142768832295807e-07, "loss": 0.5128625631332397, "step": 2112 }, { "epoch": 2.7409570154095704, "grad_norm": 0.5569184422492981, "learning_rate": 4.102134919711609e-07, "loss": 0.47407659888267517, "step": 2113 }, { "epoch": 2.7422546634225466, "grad_norm": 0.5868476629257202, "learning_rate": 4.061697093989347e-07, "loss": 0.5311683416366577, "step": 2114 }, { "epoch": 2.743552311435523, "grad_norm": 0.5694899559020996, "learning_rate": 4.021455437814148e-07, "loss": 0.4629291892051697, "step": 2115 }, { "epoch": 2.7448499594484996, "grad_norm": 0.5624482035636902, "learning_rate": 3.981410033469979e-07, "loss": 0.4855622351169586, "step": 2116 }, { "epoch": 2.7461476074614763, "grad_norm": 0.576919436454773, "learning_rate": 3.941560962839619e-07, "loss": 0.47935816645622253, "step": 2117 }, { "epoch": 2.7474452554744526, "grad_norm": 0.5966827869415283, "learning_rate": 3.9019083074042784e-07, "loss": 0.4561656415462494, "step": 2118 }, { "epoch": 2.748742903487429, "grad_norm": 0.5702851414680481, "learning_rate": 3.862452148243623e-07, "loss": 0.4796487092971802, "step": 2119 }, { "epoch": 2.7500405515004056, "grad_norm": 0.5755755305290222, "learning_rate": 3.823192566035494e-07, "loss": 0.5047421455383301, "step": 2120 }, { "epoch": 2.751338199513382, "grad_norm": 0.5769697427749634, "learning_rate": 3.7841296410558225e-07, "loss": 0.48532968759536743, "step": 2121 }, { "epoch": 2.7526358475263586, "grad_norm": 0.5873609781265259, "learning_rate": 3.7452634531783935e-07, "loss": 0.5122209787368774, "step": 2122 }, { "epoch": 2.753933495539335, "grad_norm": 0.5939727425575256, "learning_rate": 3.706594081874737e-07, "loss": 0.49794304370880127, "step": 2123 }, { "epoch": 2.7552311435523116, "grad_norm": 0.5834800601005554, "learning_rate": 3.6681216062138923e-07, "loss": 0.5340889096260071, "step": 2124 }, { "epoch": 2.756528791565288, "grad_norm": 0.576677680015564, "learning_rate": 3.6298461048623887e-07, "loss": 0.5236599445343018, "step": 2125 }, { "epoch": 2.757826439578264, "grad_norm": 0.5462478399276733, "learning_rate": 3.5917676560838775e-07, "loss": 0.47627806663513184, "step": 2126 }, { "epoch": 2.759124087591241, "grad_norm": 0.5982619524002075, "learning_rate": 3.5538863377392095e-07, "loss": 0.4933459460735321, "step": 2127 }, { "epoch": 2.7604217356042176, "grad_norm": 0.5802999138832092, "learning_rate": 3.5162022272860475e-07, "loss": 0.5381085872650146, "step": 2128 }, { "epoch": 2.761719383617194, "grad_norm": 0.5820630788803101, "learning_rate": 3.478715401778876e-07, "loss": 0.5177547931671143, "step": 2129 }, { "epoch": 2.76301703163017, "grad_norm": 0.6046480536460876, "learning_rate": 3.44142593786877e-07, "loss": 0.5715194940567017, "step": 2130 }, { "epoch": 2.764314679643147, "grad_norm": 0.5816249847412109, "learning_rate": 3.404333911803237e-07, "loss": 0.48858851194381714, "step": 2131 }, { "epoch": 2.7656123276561235, "grad_norm": 0.5709452629089355, "learning_rate": 3.367439399426087e-07, "loss": 0.5259594917297363, "step": 2132 }, { "epoch": 2.7669099756691, "grad_norm": 0.5610825419425964, "learning_rate": 3.330742476177273e-07, "loss": 0.49785754084587097, "step": 2133 }, { "epoch": 2.768207623682076, "grad_norm": 0.5751505494117737, "learning_rate": 3.2942432170926743e-07, "loss": 0.45043110847473145, "step": 2134 }, { "epoch": 2.769505271695053, "grad_norm": 0.5675750374794006, "learning_rate": 3.257941696804079e-07, "loss": 0.5171366930007935, "step": 2135 }, { "epoch": 2.770802919708029, "grad_norm": 0.5672844052314758, "learning_rate": 3.2218379895388896e-07, "loss": 0.467257022857666, "step": 2136 }, { "epoch": 2.7721005677210058, "grad_norm": 0.6082518696784973, "learning_rate": 3.185932169120043e-07, "loss": 0.5202172994613647, "step": 2137 }, { "epoch": 2.773398215733982, "grad_norm": 0.5631950497627258, "learning_rate": 3.150224308965866e-07, "loss": 0.5058823823928833, "step": 2138 }, { "epoch": 2.7746958637469588, "grad_norm": 0.6380532383918762, "learning_rate": 3.114714482089898e-07, "loss": 0.5831983089447021, "step": 2139 }, { "epoch": 2.775993511759935, "grad_norm": 0.5557391047477722, "learning_rate": 3.079402761100736e-07, "loss": 0.4567191004753113, "step": 2140 }, { "epoch": 2.7772911597729117, "grad_norm": 0.562920868396759, "learning_rate": 3.0442892182019236e-07, "loss": 0.4184800386428833, "step": 2141 }, { "epoch": 2.778588807785888, "grad_norm": 0.63033127784729, "learning_rate": 3.00937392519175e-07, "loss": 0.5374839901924133, "step": 2142 }, { "epoch": 2.7798864557988647, "grad_norm": 0.5735025405883789, "learning_rate": 2.974656953463173e-07, "loss": 0.4503205716609955, "step": 2143 }, { "epoch": 2.781184103811841, "grad_norm": 0.6051810383796692, "learning_rate": 2.9401383740035983e-07, "loss": 0.4981985092163086, "step": 2144 }, { "epoch": 2.7824817518248173, "grad_norm": 0.6038339734077454, "learning_rate": 2.905818257394799e-07, "loss": 0.5327208638191223, "step": 2145 }, { "epoch": 2.783779399837794, "grad_norm": 0.5686031579971313, "learning_rate": 2.871696673812718e-07, "loss": 0.4990962743759155, "step": 2146 }, { "epoch": 2.7850770478507707, "grad_norm": 0.5870386958122253, "learning_rate": 2.837773693027346e-07, "loss": 0.5274587869644165, "step": 2147 }, { "epoch": 2.786374695863747, "grad_norm": 0.6039890050888062, "learning_rate": 2.8040493844026185e-07, "loss": 0.4969175457954407, "step": 2148 }, { "epoch": 2.7876723438767232, "grad_norm": 0.5605257749557495, "learning_rate": 2.7705238168961867e-07, "loss": 0.466129869222641, "step": 2149 }, { "epoch": 2.7889699918897, "grad_norm": 0.5661087036132812, "learning_rate": 2.7371970590593597e-07, "loss": 0.5182359218597412, "step": 2150 }, { "epoch": 2.7902676399026762, "grad_norm": 0.6032746434211731, "learning_rate": 2.7040691790369165e-07, "loss": 0.4847348928451538, "step": 2151 }, { "epoch": 2.791565287915653, "grad_norm": 0.5873638987541199, "learning_rate": 2.671140244567005e-07, "loss": 0.4982571005821228, "step": 2152 }, { "epoch": 2.792862935928629, "grad_norm": 0.5877160429954529, "learning_rate": 2.6384103229809445e-07, "loss": 0.47337985038757324, "step": 2153 }, { "epoch": 2.794160583941606, "grad_norm": 0.6034269332885742, "learning_rate": 2.605879481203144e-07, "loss": 0.5359882116317749, "step": 2154 }, { "epoch": 2.795458231954582, "grad_norm": 0.5855337381362915, "learning_rate": 2.5735477857509406e-07, "loss": 0.48935824632644653, "step": 2155 }, { "epoch": 2.796755879967559, "grad_norm": 0.5761221647262573, "learning_rate": 2.5414153027344846e-07, "loss": 0.5092116594314575, "step": 2156 }, { "epoch": 2.798053527980535, "grad_norm": 0.5906012654304504, "learning_rate": 2.5094820978565416e-07, "loss": 0.4823336601257324, "step": 2157 }, { "epoch": 2.799351175993512, "grad_norm": 0.5929545164108276, "learning_rate": 2.4777482364124695e-07, "loss": 0.48247990012168884, "step": 2158 }, { "epoch": 2.800648824006488, "grad_norm": 0.5614597797393799, "learning_rate": 2.446213783289941e-07, "loss": 0.48732107877731323, "step": 2159 }, { "epoch": 2.8019464720194645, "grad_norm": 0.6198487281799316, "learning_rate": 2.4148788029689565e-07, "loss": 0.544142484664917, "step": 2160 }, { "epoch": 2.803244120032441, "grad_norm": 0.5842984318733215, "learning_rate": 2.3837433595216174e-07, "loss": 0.5269244313240051, "step": 2161 }, { "epoch": 2.804541768045418, "grad_norm": 0.5822996497154236, "learning_rate": 2.3528075166120323e-07, "loss": 0.49836334586143494, "step": 2162 }, { "epoch": 2.805839416058394, "grad_norm": 0.5670111775398254, "learning_rate": 2.3220713374961457e-07, "loss": 0.5108374357223511, "step": 2163 }, { "epoch": 2.8071370640713704, "grad_norm": 0.5872285962104797, "learning_rate": 2.2915348850216955e-07, "loss": 0.49880123138427734, "step": 2164 }, { "epoch": 2.808434712084347, "grad_norm": 0.5544793605804443, "learning_rate": 2.2611982216279693e-07, "loss": 0.5181583166122437, "step": 2165 }, { "epoch": 2.809732360097324, "grad_norm": 0.5830904245376587, "learning_rate": 2.2310614093457917e-07, "loss": 0.48121365904808044, "step": 2166 }, { "epoch": 2.8110300081103, "grad_norm": 0.6001294255256653, "learning_rate": 2.2011245097972812e-07, "loss": 0.500962495803833, "step": 2167 }, { "epoch": 2.8123276561232764, "grad_norm": 0.6160042881965637, "learning_rate": 2.171387584195861e-07, "loss": 0.5166311264038086, "step": 2168 }, { "epoch": 2.813625304136253, "grad_norm": 0.5664080381393433, "learning_rate": 2.1418506933459926e-07, "loss": 0.4849929213523865, "step": 2169 }, { "epoch": 2.8149229521492294, "grad_norm": 0.60596764087677, "learning_rate": 2.1125138976431425e-07, "loss": 0.5384441018104553, "step": 2170 }, { "epoch": 2.816220600162206, "grad_norm": 0.6017642617225647, "learning_rate": 2.0833772570736376e-07, "loss": 0.5182196497917175, "step": 2171 }, { "epoch": 2.8175182481751824, "grad_norm": 0.567242443561554, "learning_rate": 2.0544408312145325e-07, "loss": 0.5023871660232544, "step": 2172 }, { "epoch": 2.818815896188159, "grad_norm": 0.5743298530578613, "learning_rate": 2.025704679233498e-07, "loss": 0.4737445116043091, "step": 2173 }, { "epoch": 2.8201135442011354, "grad_norm": 0.5686278343200684, "learning_rate": 1.9971688598886874e-07, "loss": 0.4916064441204071, "step": 2174 }, { "epoch": 2.821411192214112, "grad_norm": 0.5849027037620544, "learning_rate": 1.9688334315286383e-07, "loss": 0.5161796808242798, "step": 2175 }, { "epoch": 2.8227088402270883, "grad_norm": 0.5709643959999084, "learning_rate": 1.9406984520921156e-07, "loss": 0.5027370452880859, "step": 2176 }, { "epoch": 2.824006488240065, "grad_norm": 0.6077797412872314, "learning_rate": 1.9127639791080345e-07, "loss": 0.561673641204834, "step": 2177 }, { "epoch": 2.8253041362530413, "grad_norm": 0.5836532711982727, "learning_rate": 1.885030069695326e-07, "loss": 0.5252400636672974, "step": 2178 }, { "epoch": 2.8266017842660176, "grad_norm": 0.5875435471534729, "learning_rate": 1.8574967805628174e-07, "loss": 0.5136289596557617, "step": 2179 }, { "epoch": 2.8278994322789943, "grad_norm": 0.5999600291252136, "learning_rate": 1.8301641680090965e-07, "loss": 0.5113690495491028, "step": 2180 }, { "epoch": 2.829197080291971, "grad_norm": 0.5720099210739136, "learning_rate": 1.8030322879224792e-07, "loss": 0.5277208089828491, "step": 2181 }, { "epoch": 2.8304947283049473, "grad_norm": 0.5587209463119507, "learning_rate": 1.7761011957807439e-07, "loss": 0.5302145481109619, "step": 2182 }, { "epoch": 2.8317923763179236, "grad_norm": 0.574344277381897, "learning_rate": 1.7493709466511965e-07, "loss": 0.5009472370147705, "step": 2183 }, { "epoch": 2.8330900243309003, "grad_norm": 0.5876274704933167, "learning_rate": 1.7228415951904165e-07, "loss": 0.49587976932525635, "step": 2184 }, { "epoch": 2.8343876723438766, "grad_norm": 0.5799663662910461, "learning_rate": 1.6965131956442004e-07, "loss": 0.5200576782226562, "step": 2185 }, { "epoch": 2.8356853203568533, "grad_norm": 0.5789362192153931, "learning_rate": 1.670385801847485e-07, "loss": 0.4996534585952759, "step": 2186 }, { "epoch": 2.8369829683698295, "grad_norm": 0.5791637897491455, "learning_rate": 1.6444594672241688e-07, "loss": 0.5251076221466064, "step": 2187 }, { "epoch": 2.8382806163828063, "grad_norm": 0.581289529800415, "learning_rate": 1.6187342447870235e-07, "loss": 0.47298407554626465, "step": 2188 }, { "epoch": 2.8395782643957825, "grad_norm": 0.5624388456344604, "learning_rate": 1.5932101871376503e-07, "loss": 0.48804956674575806, "step": 2189 }, { "epoch": 2.8408759124087593, "grad_norm": 0.5740110278129578, "learning_rate": 1.567887346466257e-07, "loss": 0.4583921730518341, "step": 2190 }, { "epoch": 2.8421735604217355, "grad_norm": 0.5799588561058044, "learning_rate": 1.54276577455168e-07, "loss": 0.5046111345291138, "step": 2191 }, { "epoch": 2.8434712084347122, "grad_norm": 0.5686801671981812, "learning_rate": 1.517845522761141e-07, "loss": 0.5424494743347168, "step": 2192 }, { "epoch": 2.8447688564476885, "grad_norm": 0.5737746953964233, "learning_rate": 1.4931266420502687e-07, "loss": 0.5258438587188721, "step": 2193 }, { "epoch": 2.846066504460665, "grad_norm": 0.5844926238059998, "learning_rate": 1.468609182962899e-07, "loss": 0.5294222831726074, "step": 2194 }, { "epoch": 2.8473641524736415, "grad_norm": 0.6161758899688721, "learning_rate": 1.4442931956310525e-07, "loss": 0.48813527822494507, "step": 2195 }, { "epoch": 2.848661800486618, "grad_norm": 0.5877721905708313, "learning_rate": 1.420178729774746e-07, "loss": 0.5104416608810425, "step": 2196 }, { "epoch": 2.8499594484995945, "grad_norm": 0.607412576675415, "learning_rate": 1.3962658347019819e-07, "loss": 0.5552476644515991, "step": 2197 }, { "epoch": 2.8512570965125708, "grad_norm": 0.5500598549842834, "learning_rate": 1.372554559308559e-07, "loss": 0.5361748933792114, "step": 2198 }, { "epoch": 2.8525547445255475, "grad_norm": 0.5887991786003113, "learning_rate": 1.3490449520780492e-07, "loss": 0.5089778304100037, "step": 2199 }, { "epoch": 2.853852392538524, "grad_norm": 0.5767118334770203, "learning_rate": 1.3257370610816333e-07, "loss": 0.4646577537059784, "step": 2200 }, { "epoch": 2.8551500405515005, "grad_norm": 0.5947672128677368, "learning_rate": 1.3026309339780442e-07, "loss": 0.45190826058387756, "step": 2201 }, { "epoch": 2.8564476885644767, "grad_norm": 0.576164722442627, "learning_rate": 1.2797266180134994e-07, "loss": 0.47920286655426025, "step": 2202 }, { "epoch": 2.8577453365774534, "grad_norm": 0.5928218364715576, "learning_rate": 1.2570241600214805e-07, "loss": 0.4952476918697357, "step": 2203 }, { "epoch": 2.8590429845904297, "grad_norm": 0.5796513557434082, "learning_rate": 1.2345236064228216e-07, "loss": 0.4798247218132019, "step": 2204 }, { "epoch": 2.8603406326034064, "grad_norm": 0.6173388361930847, "learning_rate": 1.212225003225409e-07, "loss": 0.5353522300720215, "step": 2205 }, { "epoch": 2.8616382806163827, "grad_norm": 0.582225501537323, "learning_rate": 1.1901283960242704e-07, "loss": 0.4966939091682434, "step": 2206 }, { "epoch": 2.8629359286293594, "grad_norm": 0.573807954788208, "learning_rate": 1.168233830001364e-07, "loss": 0.5133891701698303, "step": 2207 }, { "epoch": 2.8642335766423357, "grad_norm": 0.5719092488288879, "learning_rate": 1.1465413499255452e-07, "loss": 0.5084906816482544, "step": 2208 }, { "epoch": 2.8655312246553124, "grad_norm": 0.563827395439148, "learning_rate": 1.1250510001524329e-07, "loss": 0.551742434501648, "step": 2209 }, { "epoch": 2.8668288726682887, "grad_norm": 0.5915552973747253, "learning_rate": 1.103762824624377e-07, "loss": 0.5108176469802856, "step": 2210 }, { "epoch": 2.8681265206812654, "grad_norm": 0.5619785189628601, "learning_rate": 1.0826768668702691e-07, "loss": 0.5008025169372559, "step": 2211 }, { "epoch": 2.8694241686942417, "grad_norm": 0.5829325914382935, "learning_rate": 1.0617931700055984e-07, "loss": 0.5187573432922363, "step": 2212 }, { "epoch": 2.870721816707218, "grad_norm": 0.6110272407531738, "learning_rate": 1.0411117767322065e-07, "loss": 0.5479835271835327, "step": 2213 }, { "epoch": 2.8720194647201946, "grad_norm": 0.5755971074104309, "learning_rate": 1.0206327293383222e-07, "loss": 0.5030970573425293, "step": 2214 }, { "epoch": 2.8733171127331714, "grad_norm": 0.5851888060569763, "learning_rate": 1.000356069698416e-07, "loss": 0.5171909928321838, "step": 2215 }, { "epoch": 2.8746147607461476, "grad_norm": 0.558315098285675, "learning_rate": 9.802818392731117e-08, "loss": 0.47078371047973633, "step": 2216 }, { "epoch": 2.875912408759124, "grad_norm": 0.6229851841926575, "learning_rate": 9.60410079109153e-08, "loss": 0.5632795095443726, "step": 2217 }, { "epoch": 2.8772100567721006, "grad_norm": 0.5876999497413635, "learning_rate": 9.407408298392373e-08, "loss": 0.5133551359176636, "step": 2218 }, { "epoch": 2.878507704785077, "grad_norm": 0.5872880220413208, "learning_rate": 9.212741316820039e-08, "loss": 0.4713757038116455, "step": 2219 }, { "epoch": 2.8798053527980536, "grad_norm": 0.5895143747329712, "learning_rate": 9.020100244419461e-08, "loss": 0.5900079607963562, "step": 2220 }, { "epoch": 2.88110300081103, "grad_norm": 0.5657681822776794, "learning_rate": 8.829485475092548e-08, "loss": 0.5136827230453491, "step": 2221 }, { "epoch": 2.8824006488240066, "grad_norm": 0.8106376528739929, "learning_rate": 8.640897398598525e-08, "loss": 0.6291136741638184, "step": 2222 }, { "epoch": 2.883698296836983, "grad_norm": 0.5875924825668335, "learning_rate": 8.454336400552154e-08, "loss": 0.4933609962463379, "step": 2223 }, { "epoch": 2.8849959448499596, "grad_norm": 0.5977309346199036, "learning_rate": 8.269802862423405e-08, "loss": 0.5197732448577881, "step": 2224 }, { "epoch": 2.886293592862936, "grad_norm": 0.5707021951675415, "learning_rate": 8.087297161536778e-08, "loss": 0.5037369132041931, "step": 2225 }, { "epoch": 2.8875912408759126, "grad_norm": 0.5633382797241211, "learning_rate": 7.906819671070098e-08, "loss": 0.4686581492424011, "step": 2226 }, { "epoch": 2.888888888888889, "grad_norm": 0.5665260553359985, "learning_rate": 7.728370760054283e-08, "loss": 0.4968178868293762, "step": 2227 }, { "epoch": 2.890186536901865, "grad_norm": 0.557956874370575, "learning_rate": 7.55195079337212e-08, "loss": 0.4842921793460846, "step": 2228 }, { "epoch": 2.891484184914842, "grad_norm": 0.5774162411689758, "learning_rate": 7.377560131757832e-08, "loss": 0.48150286078453064, "step": 2229 }, { "epoch": 2.8927818329278185, "grad_norm": 0.5605522990226746, "learning_rate": 7.205199131796182e-08, "loss": 0.47593769431114197, "step": 2230 }, { "epoch": 2.894079480940795, "grad_norm": 0.5713371634483337, "learning_rate": 7.034868145921802e-08, "loss": 0.5388371348381042, "step": 2231 }, { "epoch": 2.895377128953771, "grad_norm": 0.566564679145813, "learning_rate": 6.866567522418322e-08, "loss": 0.5253296494483948, "step": 2232 }, { "epoch": 2.896674776966748, "grad_norm": 0.6038841605186462, "learning_rate": 6.700297605418127e-08, "loss": 0.4850519895553589, "step": 2233 }, { "epoch": 2.8979724249797245, "grad_norm": 0.5850130915641785, "learning_rate": 6.53605873490093e-08, "loss": 0.526265025138855, "step": 2234 }, { "epoch": 2.899270072992701, "grad_norm": 0.5685164332389832, "learning_rate": 6.373851246693763e-08, "loss": 0.49016064405441284, "step": 2235 }, { "epoch": 2.900567721005677, "grad_norm": 0.585509717464447, "learning_rate": 6.21367547246976e-08, "loss": 0.49361756443977356, "step": 2236 }, { "epoch": 2.9018653690186538, "grad_norm": 0.5846717357635498, "learning_rate": 6.055531739747933e-08, "loss": 0.5073826313018799, "step": 2237 }, { "epoch": 2.90316301703163, "grad_norm": 0.6035211682319641, "learning_rate": 5.899420371892173e-08, "loss": 0.4748195707798004, "step": 2238 }, { "epoch": 2.9044606650446068, "grad_norm": 0.5725396275520325, "learning_rate": 5.745341688110806e-08, "loss": 0.49574536085128784, "step": 2239 }, { "epoch": 2.905758313057583, "grad_norm": 0.5700922012329102, "learning_rate": 5.593296003455595e-08, "loss": 0.4746463894844055, "step": 2240 }, { "epoch": 2.9070559610705597, "grad_norm": 0.5627117156982422, "learning_rate": 5.4432836288215165e-08, "loss": 0.512833833694458, "step": 2241 }, { "epoch": 2.908353609083536, "grad_norm": 0.5812812447547913, "learning_rate": 5.2953048709459834e-08, "loss": 0.48332545161247253, "step": 2242 }, { "epoch": 2.9096512570965127, "grad_norm": 0.5835334062576294, "learning_rate": 5.1493600324080684e-08, "loss": 0.507304847240448, "step": 2243 }, { "epoch": 2.910948905109489, "grad_norm": 0.5789167284965515, "learning_rate": 5.0054494116279497e-08, "loss": 0.5132785439491272, "step": 2244 }, { "epoch": 2.9122465531224657, "grad_norm": 0.5582759976387024, "learning_rate": 4.8635733028664644e-08, "loss": 0.4791605472564697, "step": 2245 }, { "epoch": 2.913544201135442, "grad_norm": 0.5968536138534546, "learning_rate": 4.723731996224446e-08, "loss": 0.5294557809829712, "step": 2246 }, { "epoch": 2.9148418491484183, "grad_norm": 0.5799421072006226, "learning_rate": 4.585925777641831e-08, "loss": 0.5392569303512573, "step": 2247 }, { "epoch": 2.916139497161395, "grad_norm": 0.5876581072807312, "learning_rate": 4.450154928897443e-08, "loss": 0.5044458508491516, "step": 2248 }, { "epoch": 2.9174371451743717, "grad_norm": 0.5795705914497375, "learning_rate": 4.316419727608434e-08, "loss": 0.518474280834198, "step": 2249 }, { "epoch": 2.918734793187348, "grad_norm": 0.5783658027648926, "learning_rate": 4.1847204472293954e-08, "loss": 0.5036035180091858, "step": 2250 }, { "epoch": 2.9200324412003242, "grad_norm": 0.5799797773361206, "learning_rate": 4.055057357052139e-08, "loss": 0.5075333118438721, "step": 2251 }, { "epoch": 2.921330089213301, "grad_norm": 0.5816603899002075, "learning_rate": 3.927430722204473e-08, "loss": 0.49955567717552185, "step": 2252 }, { "epoch": 2.9226277372262772, "grad_norm": 0.5603087544441223, "learning_rate": 3.801840803651091e-08, "loss": 0.4799802005290985, "step": 2253 }, { "epoch": 2.923925385239254, "grad_norm": 0.5984447598457336, "learning_rate": 3.678287858191132e-08, "loss": 0.4863054156303406, "step": 2254 }, { "epoch": 2.92522303325223, "grad_norm": 0.5684608817100525, "learning_rate": 3.5567721384593965e-08, "loss": 0.5202617645263672, "step": 2255 }, { "epoch": 2.926520681265207, "grad_norm": 0.6067941784858704, "learning_rate": 3.437293892924576e-08, "loss": 0.5111681818962097, "step": 2256 }, { "epoch": 2.927818329278183, "grad_norm": 0.6141681671142578, "learning_rate": 3.3198533658895804e-08, "loss": 0.5316765904426575, "step": 2257 }, { "epoch": 2.92911597729116, "grad_norm": 0.5799176096916199, "learning_rate": 3.2044507974905433e-08, "loss": 0.46131962537765503, "step": 2258 }, { "epoch": 2.930413625304136, "grad_norm": 0.5954794883728027, "learning_rate": 3.091086423696377e-08, "loss": 0.520176887512207, "step": 2259 }, { "epoch": 2.931711273317113, "grad_norm": 0.5652449131011963, "learning_rate": 2.9797604763087684e-08, "loss": 0.5085136890411377, "step": 2260 }, { "epoch": 2.933008921330089, "grad_norm": 0.5852287411689758, "learning_rate": 2.8704731829609643e-08, "loss": 0.5083173513412476, "step": 2261 }, { "epoch": 2.9343065693430654, "grad_norm": 0.5846629738807678, "learning_rate": 2.763224767117767e-08, "loss": 0.5292702913284302, "step": 2262 }, { "epoch": 2.935604217356042, "grad_norm": 0.5861793756484985, "learning_rate": 2.6580154480750907e-08, "loss": 0.5053665637969971, "step": 2263 }, { "epoch": 2.936901865369019, "grad_norm": 0.5602736473083496, "learning_rate": 2.554845440959408e-08, "loss": 0.5189537405967712, "step": 2264 }, { "epoch": 2.938199513381995, "grad_norm": 0.5991557240486145, "learning_rate": 2.4537149567271935e-08, "loss": 0.5867321491241455, "step": 2265 }, { "epoch": 2.9394971613949714, "grad_norm": 0.5465215444564819, "learning_rate": 2.3546242021648126e-08, "loss": 0.5084092617034912, "step": 2266 }, { "epoch": 2.940794809407948, "grad_norm": 0.6008067727088928, "learning_rate": 2.2575733798876342e-08, "loss": 0.5280360579490662, "step": 2267 }, { "epoch": 2.942092457420925, "grad_norm": 0.5549503564834595, "learning_rate": 2.162562688340142e-08, "loss": 0.4592389762401581, "step": 2268 }, { "epoch": 2.943390105433901, "grad_norm": 0.600985586643219, "learning_rate": 2.0695923217950442e-08, "loss": 0.5138071179389954, "step": 2269 }, { "epoch": 2.9446877534468774, "grad_norm": 0.5776973366737366, "learning_rate": 1.9786624703532764e-08, "loss": 0.560516357421875, "step": 2270 }, { "epoch": 2.945985401459854, "grad_norm": 0.5803866982460022, "learning_rate": 1.8897733199434443e-08, "loss": 0.48770207166671753, "step": 2271 }, { "epoch": 2.9472830494728304, "grad_norm": 0.5844945907592773, "learning_rate": 1.8029250523211582e-08, "loss": 0.5004736185073853, "step": 2272 }, { "epoch": 2.948580697485807, "grad_norm": 0.5826125144958496, "learning_rate": 1.718117845069367e-08, "loss": 0.4950000047683716, "step": 2273 }, { "epoch": 2.9498783454987834, "grad_norm": 0.5776214003562927, "learning_rate": 1.635351871597246e-08, "loss": 0.5560945868492126, "step": 2274 }, { "epoch": 2.95117599351176, "grad_norm": 0.565700352191925, "learning_rate": 1.554627301140199e-08, "loss": 0.4630610942840576, "step": 2275 }, { "epoch": 2.9524736415247363, "grad_norm": 0.5994547605514526, "learning_rate": 1.4759442987596351e-08, "loss": 0.5141358375549316, "step": 2276 }, { "epoch": 2.9537712895377126, "grad_norm": 0.573093831539154, "learning_rate": 1.3993030253423023e-08, "loss": 0.4815256893634796, "step": 2277 }, { "epoch": 2.9550689375506893, "grad_norm": 0.5978487730026245, "learning_rate": 1.3247036376002886e-08, "loss": 0.5149579048156738, "step": 2278 }, { "epoch": 2.956366585563666, "grad_norm": 0.6069895625114441, "learning_rate": 1.252146288070355e-08, "loss": 0.5201846361160278, "step": 2279 }, { "epoch": 2.9576642335766423, "grad_norm": 0.5879092216491699, "learning_rate": 1.1816311251140466e-08, "loss": 0.5039907693862915, "step": 2280 }, { "epoch": 2.9589618815896186, "grad_norm": 0.5550662875175476, "learning_rate": 1.113158292916916e-08, "loss": 0.5198723077774048, "step": 2281 }, { "epoch": 2.9602595296025953, "grad_norm": 0.5664054155349731, "learning_rate": 1.0467279314886336e-08, "loss": 0.5281890630722046, "step": 2282 }, { "epoch": 2.961557177615572, "grad_norm": 0.5738133788108826, "learning_rate": 9.82340176662433e-09, "loss": 0.47895991802215576, "step": 2283 }, { "epoch": 2.9628548256285483, "grad_norm": 0.5834701657295227, "learning_rate": 9.199951600951106e-09, "loss": 0.49841928482055664, "step": 2284 }, { "epoch": 2.9641524736415246, "grad_norm": 0.553411602973938, "learning_rate": 8.596930092662493e-09, "loss": 0.5044345855712891, "step": 2285 }, { "epoch": 2.9654501216545013, "grad_norm": 0.5765789151191711, "learning_rate": 8.014338474785499e-09, "loss": 0.45714667439460754, "step": 2286 }, { "epoch": 2.9667477696674776, "grad_norm": 0.5678233504295349, "learning_rate": 7.45217793857389e-09, "loss": 0.5142921209335327, "step": 2287 }, { "epoch": 2.9680454176804543, "grad_norm": 0.5809730887413025, "learning_rate": 6.910449633501515e-09, "loss": 0.5097491145133972, "step": 2288 }, { "epoch": 2.9693430656934305, "grad_norm": 0.863067626953125, "learning_rate": 6.389154667266751e-09, "loss": 0.49733829498291016, "step": 2289 }, { "epoch": 2.9706407137064073, "grad_norm": 0.5724239349365234, "learning_rate": 5.888294105785841e-09, "loss": 0.5271996855735779, "step": 2290 }, { "epoch": 2.9719383617193835, "grad_norm": 0.5894045829772949, "learning_rate": 5.407868973191788e-09, "loss": 0.5507649183273315, "step": 2291 }, { "epoch": 2.9732360097323602, "grad_norm": 0.5670002698898315, "learning_rate": 4.947880251832127e-09, "loss": 0.5069165229797363, "step": 2292 }, { "epoch": 2.9745336577453365, "grad_norm": 0.6079567074775696, "learning_rate": 4.508328882268931e-09, "loss": 0.5027692317962646, "step": 2293 }, { "epoch": 2.9758313057583132, "grad_norm": 0.5965436697006226, "learning_rate": 4.089215763271037e-09, "loss": 0.4549415707588196, "step": 2294 }, { "epoch": 2.9771289537712895, "grad_norm": 0.5540100336074829, "learning_rate": 3.6905417518195985e-09, "loss": 0.5082988739013672, "step": 2295 }, { "epoch": 2.9784266017842658, "grad_norm": 0.5584218502044678, "learning_rate": 3.312307663103642e-09, "loss": 0.49896612763404846, "step": 2296 }, { "epoch": 2.9797242497972425, "grad_norm": 0.5825123190879822, "learning_rate": 2.954514270513409e-09, "loss": 0.5268645286560059, "step": 2297 }, { "epoch": 2.981021897810219, "grad_norm": 0.6069872379302979, "learning_rate": 2.6171623056481245e-09, "loss": 0.5306706428527832, "step": 2298 }, { "epoch": 2.9823195458231955, "grad_norm": 0.619730532169342, "learning_rate": 2.300252458306007e-09, "loss": 0.5466433167457581, "step": 2299 }, { "epoch": 2.9836171938361717, "grad_norm": 0.575143039226532, "learning_rate": 2.0037853764887096e-09, "loss": 0.5247520804405212, "step": 2300 }, { "epoch": 2.9836171938361717, "eval_loss": 0.6951664686203003, "eval_runtime": 72.3726, "eval_samples_per_second": 71.74, "eval_steps_per_second": 8.967, "step": 2300 }, { "epoch": 2.9849148418491485, "grad_norm": 0.5698785781860352, "learning_rate": 1.7277616663946562e-09, "loss": 0.5104506015777588, "step": 2301 }, { "epoch": 2.986212489862125, "grad_norm": 0.5820271372795105, "learning_rate": 1.4721818924223752e-09, "loss": 0.5188534259796143, "step": 2302 }, { "epoch": 2.9875101378751014, "grad_norm": 0.5771408081054688, "learning_rate": 1.2370465771693874e-09, "loss": 0.5191137194633484, "step": 2303 }, { "epoch": 2.9888077858880777, "grad_norm": 0.555460512638092, "learning_rate": 1.0223562014277654e-09, "loss": 0.4951835870742798, "step": 2304 }, { "epoch": 2.9901054339010544, "grad_norm": 0.602135956287384, "learning_rate": 8.281112041841343e-10, "loss": 0.5143213272094727, "step": 2305 }, { "epoch": 2.9914030819140307, "grad_norm": 0.5755578875541687, "learning_rate": 6.543119826207811e-10, "loss": 0.5067423582077026, "step": 2306 }, { "epoch": 2.9927007299270074, "grad_norm": 0.585641622543335, "learning_rate": 5.009588921123243e-10, "loss": 0.49582135677337646, "step": 2307 }, { "epoch": 2.9939983779399837, "grad_norm": 0.5883374214172363, "learning_rate": 3.680522462279346e-10, "loss": 0.4730003774166107, "step": 2308 }, { "epoch": 2.9952960259529604, "grad_norm": 0.585075318813324, "learning_rate": 2.555923167291141e-10, "loss": 0.5166332721710205, "step": 2309 }, { "epoch": 2.9965936739659367, "grad_norm": 0.5931539535522461, "learning_rate": 1.635793335652558e-10, "loss": 0.5443276166915894, "step": 2310 }, { "epoch": 2.997891321978913, "grad_norm": 0.6000698804855347, "learning_rate": 9.20134848814147e-11, "loss": 0.4828116297721863, "step": 2311 }, { "epoch": 2.9991889699918897, "grad_norm": 0.5825672149658203, "learning_rate": 4.08949170105366e-11, "loss": 0.48934438824653625, "step": 2312 }, { "epoch": 3.0, "grad_norm": 0.8691220283508301, "learning_rate": 1.022373447900904e-11, "loss": 0.5870037078857422, "step": 2313 }, { "epoch": 3.0, "step": 2313, "total_flos": 8.852766725217714e+18, "train_loss": 0.5397342537911073, "train_runtime": 26894.7398, "train_samples_per_second": 11.002, "train_steps_per_second": 0.086 } ], "logging_steps": 1.0, "max_steps": 2313, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 230, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.852766725217714e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }