{ "best_global_step": 2300, "best_metric": 0.7602248191833496, "best_model_checkpoint": "saves/qwen3-1.7B/Qwen3-1.7B-SFT-science-1e-5/checkpoint-2300", "epoch": 3.0, "eval_steps": 230, "global_step": 2313, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012977269501388974, "grad_norm": 18.964550018310547, "learning_rate": 0.0, "loss": 1.341123104095459, "step": 1 }, { "epoch": 0.0025954539002777948, "grad_norm": 17.563854217529297, "learning_rate": 8.620689655172414e-08, "loss": 1.240975022315979, "step": 2 }, { "epoch": 0.003893180850416692, "grad_norm": 18.24137306213379, "learning_rate": 1.7241379310344828e-07, "loss": 1.3372515439987183, "step": 3 }, { "epoch": 0.0051909078005555895, "grad_norm": 19.47113800048828, "learning_rate": 2.5862068965517245e-07, "loss": 1.4065107107162476, "step": 4 }, { "epoch": 0.006488634750694487, "grad_norm": 17.434274673461914, "learning_rate": 3.4482758620689656e-07, "loss": 1.3200397491455078, "step": 5 }, { "epoch": 0.007786361700833384, "grad_norm": 18.338199615478516, "learning_rate": 4.3103448275862073e-07, "loss": 1.3047453165054321, "step": 6 }, { "epoch": 0.009084088650972282, "grad_norm": 19.371152877807617, "learning_rate": 5.172413793103449e-07, "loss": 1.282523512840271, "step": 7 }, { "epoch": 0.010381815601111179, "grad_norm": 17.54425621032715, "learning_rate": 6.034482758620691e-07, "loss": 1.2399928569793701, "step": 8 }, { "epoch": 0.011679542551250076, "grad_norm": 16.710424423217773, "learning_rate": 6.896551724137931e-07, "loss": 1.1754190921783447, "step": 9 }, { "epoch": 0.012977269501388973, "grad_norm": 17.185142517089844, "learning_rate": 7.758620689655173e-07, "loss": 1.2869014739990234, "step": 10 }, { "epoch": 0.01427499645152787, "grad_norm": 16.615570068359375, "learning_rate": 8.620689655172415e-07, "loss": 1.256578803062439, "step": 11 }, { "epoch": 0.015572723401666768, "grad_norm": 15.576726913452148, "learning_rate": 9.482758620689655e-07, "loss": 1.2133194208145142, "step": 12 }, { "epoch": 0.016870450351805667, "grad_norm": 15.714284896850586, "learning_rate": 1.0344827586206898e-06, "loss": 1.2498674392700195, "step": 13 }, { "epoch": 0.018168177301944564, "grad_norm": 15.78684139251709, "learning_rate": 1.120689655172414e-06, "loss": 1.235356092453003, "step": 14 }, { "epoch": 0.01946590425208346, "grad_norm": 14.27075481414795, "learning_rate": 1.2068965517241381e-06, "loss": 1.2290422916412354, "step": 15 }, { "epoch": 0.020763631202222358, "grad_norm": 12.652816772460938, "learning_rate": 1.2931034482758623e-06, "loss": 1.1969146728515625, "step": 16 }, { "epoch": 0.022061358152361255, "grad_norm": 12.271768569946289, "learning_rate": 1.3793103448275862e-06, "loss": 1.23379647731781, "step": 17 }, { "epoch": 0.023359085102500152, "grad_norm": 11.308884620666504, "learning_rate": 1.4655172413793104e-06, "loss": 1.1893874406814575, "step": 18 }, { "epoch": 0.02465681205263905, "grad_norm": 11.503107070922852, "learning_rate": 1.5517241379310346e-06, "loss": 1.2033772468566895, "step": 19 }, { "epoch": 0.025954539002777947, "grad_norm": 10.80382251739502, "learning_rate": 1.6379310344827587e-06, "loss": 1.1051026582717896, "step": 20 }, { "epoch": 0.027252265952916844, "grad_norm": 7.508317947387695, "learning_rate": 1.724137931034483e-06, "loss": 1.2109605073928833, "step": 21 }, { "epoch": 0.02854999290305574, "grad_norm": 6.523677349090576, "learning_rate": 1.810344827586207e-06, "loss": 1.0799193382263184, "step": 22 }, { "epoch": 0.029847719853194638, "grad_norm": 6.148171901702881, "learning_rate": 1.896551724137931e-06, "loss": 1.109641194343567, "step": 23 }, { "epoch": 0.031145446803333535, "grad_norm": 5.9545440673828125, "learning_rate": 1.982758620689655e-06, "loss": 1.0592708587646484, "step": 24 }, { "epoch": 0.032443173753472436, "grad_norm": 5.15648889541626, "learning_rate": 2.0689655172413796e-06, "loss": 1.044050693511963, "step": 25 }, { "epoch": 0.03374090070361133, "grad_norm": 4.722131252288818, "learning_rate": 2.1551724137931035e-06, "loss": 1.0839450359344482, "step": 26 }, { "epoch": 0.03503862765375023, "grad_norm": 4.583364963531494, "learning_rate": 2.241379310344828e-06, "loss": 1.0532729625701904, "step": 27 }, { "epoch": 0.03633635460388913, "grad_norm": 3.236476421356201, "learning_rate": 2.327586206896552e-06, "loss": 1.0504995584487915, "step": 28 }, { "epoch": 0.037634081554028025, "grad_norm": 2.3688511848449707, "learning_rate": 2.4137931034482762e-06, "loss": 1.0392580032348633, "step": 29 }, { "epoch": 0.03893180850416692, "grad_norm": 2.1980035305023193, "learning_rate": 2.5e-06, "loss": 1.0583772659301758, "step": 30 }, { "epoch": 0.04022953545430582, "grad_norm": 2.0409348011016846, "learning_rate": 2.5862068965517246e-06, "loss": 0.9697405695915222, "step": 31 }, { "epoch": 0.041527262404444716, "grad_norm": 2.0691511631011963, "learning_rate": 2.672413793103448e-06, "loss": 0.9957722425460815, "step": 32 }, { "epoch": 0.04282498935458361, "grad_norm": 1.81174898147583, "learning_rate": 2.7586206896551725e-06, "loss": 0.9902118444442749, "step": 33 }, { "epoch": 0.04412271630472251, "grad_norm": 1.8090497255325317, "learning_rate": 2.844827586206897e-06, "loss": 0.9399092793464661, "step": 34 }, { "epoch": 0.04542044325486141, "grad_norm": 1.7403111457824707, "learning_rate": 2.931034482758621e-06, "loss": 1.0072567462921143, "step": 35 }, { "epoch": 0.046718170205000305, "grad_norm": 1.588705062866211, "learning_rate": 3.017241379310345e-06, "loss": 1.0338897705078125, "step": 36 }, { "epoch": 0.0480158971551392, "grad_norm": 1.4160236120224, "learning_rate": 3.103448275862069e-06, "loss": 0.9362650513648987, "step": 37 }, { "epoch": 0.0493136241052781, "grad_norm": 1.4251086711883545, "learning_rate": 3.1896551724137935e-06, "loss": 0.9243776798248291, "step": 38 }, { "epoch": 0.050611351055416996, "grad_norm": 1.3788193464279175, "learning_rate": 3.2758620689655175e-06, "loss": 1.017249584197998, "step": 39 }, { "epoch": 0.05190907800555589, "grad_norm": 1.2869218587875366, "learning_rate": 3.362068965517242e-06, "loss": 0.9809561371803284, "step": 40 }, { "epoch": 0.05320680495569479, "grad_norm": 1.2191851139068604, "learning_rate": 3.448275862068966e-06, "loss": 0.9668518304824829, "step": 41 }, { "epoch": 0.05450453190583369, "grad_norm": 1.1019444465637207, "learning_rate": 3.5344827586206898e-06, "loss": 0.9050226807594299, "step": 42 }, { "epoch": 0.055802258855972585, "grad_norm": 1.1174124479293823, "learning_rate": 3.620689655172414e-06, "loss": 0.9214593172073364, "step": 43 }, { "epoch": 0.05709998580611148, "grad_norm": 1.0376768112182617, "learning_rate": 3.7068965517241385e-06, "loss": 0.9226070046424866, "step": 44 }, { "epoch": 0.05839771275625038, "grad_norm": 1.0166256427764893, "learning_rate": 3.793103448275862e-06, "loss": 0.9782117605209351, "step": 45 }, { "epoch": 0.059695439706389276, "grad_norm": 1.0689457654953003, "learning_rate": 3.8793103448275865e-06, "loss": 0.9090121388435364, "step": 46 }, { "epoch": 0.060993166656528174, "grad_norm": 0.9738860130310059, "learning_rate": 3.96551724137931e-06, "loss": 0.8602224588394165, "step": 47 }, { "epoch": 0.06229089360666707, "grad_norm": 0.9925969839096069, "learning_rate": 4.051724137931034e-06, "loss": 0.9660595059394836, "step": 48 }, { "epoch": 0.06358862055680597, "grad_norm": 0.9282276034355164, "learning_rate": 4.137931034482759e-06, "loss": 0.9271571040153503, "step": 49 }, { "epoch": 0.06488634750694487, "grad_norm": 0.8538461327552795, "learning_rate": 4.224137931034483e-06, "loss": 0.9294825196266174, "step": 50 }, { "epoch": 0.06618407445708377, "grad_norm": 0.8447784185409546, "learning_rate": 4.310344827586207e-06, "loss": 0.9223189353942871, "step": 51 }, { "epoch": 0.06748180140722267, "grad_norm": 0.9016849398612976, "learning_rate": 4.396551724137931e-06, "loss": 0.9693700075149536, "step": 52 }, { "epoch": 0.06877952835736156, "grad_norm": 0.8437271118164062, "learning_rate": 4.482758620689656e-06, "loss": 0.9623271822929382, "step": 53 }, { "epoch": 0.07007725530750046, "grad_norm": 0.8587411046028137, "learning_rate": 4.56896551724138e-06, "loss": 0.9010592103004456, "step": 54 }, { "epoch": 0.07137498225763936, "grad_norm": 0.8153098225593567, "learning_rate": 4.655172413793104e-06, "loss": 0.9027279615402222, "step": 55 }, { "epoch": 0.07267270920777825, "grad_norm": 0.7967116832733154, "learning_rate": 4.741379310344828e-06, "loss": 0.9301362037658691, "step": 56 }, { "epoch": 0.07397043615791715, "grad_norm": 0.7964755296707153, "learning_rate": 4.8275862068965525e-06, "loss": 0.8760148882865906, "step": 57 }, { "epoch": 0.07526816310805605, "grad_norm": 0.8177418112754822, "learning_rate": 4.9137931034482765e-06, "loss": 0.9806030988693237, "step": 58 }, { "epoch": 0.07656589005819495, "grad_norm": 0.8148566484451294, "learning_rate": 5e-06, "loss": 0.9486901760101318, "step": 59 }, { "epoch": 0.07786361700833384, "grad_norm": 0.7789106965065002, "learning_rate": 5.086206896551724e-06, "loss": 0.864636242389679, "step": 60 }, { "epoch": 0.07916134395847274, "grad_norm": 0.8153941035270691, "learning_rate": 5.172413793103449e-06, "loss": 0.8964623212814331, "step": 61 }, { "epoch": 0.08045907090861164, "grad_norm": 0.8125947713851929, "learning_rate": 5.258620689655173e-06, "loss": 0.951106607913971, "step": 62 }, { "epoch": 0.08175679785875054, "grad_norm": 0.8139016032218933, "learning_rate": 5.344827586206896e-06, "loss": 0.9181129932403564, "step": 63 }, { "epoch": 0.08305452480888943, "grad_norm": 0.7552460432052612, "learning_rate": 5.431034482758621e-06, "loss": 0.9456254839897156, "step": 64 }, { "epoch": 0.08435225175902833, "grad_norm": 0.7714299559593201, "learning_rate": 5.517241379310345e-06, "loss": 0.9329437613487244, "step": 65 }, { "epoch": 0.08564997870916723, "grad_norm": 0.7831487059593201, "learning_rate": 5.603448275862069e-06, "loss": 0.9335770010948181, "step": 66 }, { "epoch": 0.08694770565930612, "grad_norm": 0.8076956868171692, "learning_rate": 5.689655172413794e-06, "loss": 0.9263577461242676, "step": 67 }, { "epoch": 0.08824543260944502, "grad_norm": 0.7335968017578125, "learning_rate": 5.775862068965518e-06, "loss": 0.8535078167915344, "step": 68 }, { "epoch": 0.08954315955958392, "grad_norm": 0.7708919048309326, "learning_rate": 5.862068965517242e-06, "loss": 0.9572209119796753, "step": 69 }, { "epoch": 0.09084088650972282, "grad_norm": 0.7519110441207886, "learning_rate": 5.9482758620689665e-06, "loss": 0.9237591028213501, "step": 70 }, { "epoch": 0.09213861345986171, "grad_norm": 0.789342999458313, "learning_rate": 6.03448275862069e-06, "loss": 0.9293794631958008, "step": 71 }, { "epoch": 0.09343634041000061, "grad_norm": 0.8580161929130554, "learning_rate": 6.1206896551724135e-06, "loss": 1.0194649696350098, "step": 72 }, { "epoch": 0.0947340673601395, "grad_norm": 0.7663602232933044, "learning_rate": 6.206896551724138e-06, "loss": 0.8723046779632568, "step": 73 }, { "epoch": 0.0960317943102784, "grad_norm": 0.7687053680419922, "learning_rate": 6.293103448275862e-06, "loss": 0.8932123780250549, "step": 74 }, { "epoch": 0.0973295212604173, "grad_norm": 0.7912623882293701, "learning_rate": 6.379310344827587e-06, "loss": 0.8944480419158936, "step": 75 }, { "epoch": 0.0986272482105562, "grad_norm": 0.7491701245307922, "learning_rate": 6.465517241379311e-06, "loss": 0.9510178565979004, "step": 76 }, { "epoch": 0.0999249751606951, "grad_norm": 0.7807953953742981, "learning_rate": 6.551724137931035e-06, "loss": 0.9121882319450378, "step": 77 }, { "epoch": 0.10122270211083399, "grad_norm": 0.7805066108703613, "learning_rate": 6.63793103448276e-06, "loss": 0.8803927302360535, "step": 78 }, { "epoch": 0.10252042906097289, "grad_norm": 0.7737302780151367, "learning_rate": 6.724137931034484e-06, "loss": 0.959402859210968, "step": 79 }, { "epoch": 0.10381815601111179, "grad_norm": 0.7317953705787659, "learning_rate": 6.810344827586207e-06, "loss": 0.8773848414421082, "step": 80 }, { "epoch": 0.10511588296125068, "grad_norm": 0.7534870505332947, "learning_rate": 6.896551724137932e-06, "loss": 0.8883604407310486, "step": 81 }, { "epoch": 0.10641360991138958, "grad_norm": 0.7352930903434753, "learning_rate": 6.982758620689656e-06, "loss": 0.8512092232704163, "step": 82 }, { "epoch": 0.10771133686152848, "grad_norm": 0.7618373036384583, "learning_rate": 7.0689655172413796e-06, "loss": 0.867959201335907, "step": 83 }, { "epoch": 0.10900906381166738, "grad_norm": 0.7561816573143005, "learning_rate": 7.155172413793104e-06, "loss": 0.9354209899902344, "step": 84 }, { "epoch": 0.11030679076180627, "grad_norm": 0.7632707953453064, "learning_rate": 7.241379310344828e-06, "loss": 0.8821243047714233, "step": 85 }, { "epoch": 0.11160451771194517, "grad_norm": 0.7363986968994141, "learning_rate": 7.327586206896552e-06, "loss": 0.8989731073379517, "step": 86 }, { "epoch": 0.11290224466208407, "grad_norm": 0.7885032296180725, "learning_rate": 7.413793103448277e-06, "loss": 0.9085399508476257, "step": 87 }, { "epoch": 0.11419997161222296, "grad_norm": 0.7643342614173889, "learning_rate": 7.500000000000001e-06, "loss": 0.863010823726654, "step": 88 }, { "epoch": 0.11549769856236186, "grad_norm": 0.7284826040267944, "learning_rate": 7.586206896551724e-06, "loss": 0.9157944917678833, "step": 89 }, { "epoch": 0.11679542551250076, "grad_norm": 0.7478772401809692, "learning_rate": 7.672413793103449e-06, "loss": 0.9134526252746582, "step": 90 }, { "epoch": 0.11809315246263966, "grad_norm": 0.7836363315582275, "learning_rate": 7.758620689655173e-06, "loss": 0.908155620098114, "step": 91 }, { "epoch": 0.11939087941277855, "grad_norm": 0.7334380149841309, "learning_rate": 7.844827586206897e-06, "loss": 0.9568802118301392, "step": 92 }, { "epoch": 0.12068860636291745, "grad_norm": 0.7914179563522339, "learning_rate": 7.93103448275862e-06, "loss": 0.9015185832977295, "step": 93 }, { "epoch": 0.12198633331305635, "grad_norm": 0.6916558146476746, "learning_rate": 8.017241379310345e-06, "loss": 0.8272259831428528, "step": 94 }, { "epoch": 0.12328406026319524, "grad_norm": 0.7935004234313965, "learning_rate": 8.103448275862069e-06, "loss": 0.9952960014343262, "step": 95 }, { "epoch": 0.12458178721333414, "grad_norm": 0.7549696564674377, "learning_rate": 8.189655172413794e-06, "loss": 0.8569615483283997, "step": 96 }, { "epoch": 0.12587951416347304, "grad_norm": 0.8149930834770203, "learning_rate": 8.275862068965518e-06, "loss": 0.8843995928764343, "step": 97 }, { "epoch": 0.12717724111361195, "grad_norm": 0.7083914279937744, "learning_rate": 8.362068965517242e-06, "loss": 0.8912689089775085, "step": 98 }, { "epoch": 0.12847496806375083, "grad_norm": 0.7134708166122437, "learning_rate": 8.448275862068966e-06, "loss": 0.800317645072937, "step": 99 }, { "epoch": 0.12977269501388974, "grad_norm": 0.7577885985374451, "learning_rate": 8.53448275862069e-06, "loss": 0.9712153077125549, "step": 100 }, { "epoch": 0.13107042196402863, "grad_norm": 0.7569701671600342, "learning_rate": 8.620689655172414e-06, "loss": 0.9349263906478882, "step": 101 }, { "epoch": 0.13236814891416754, "grad_norm": 0.7419719099998474, "learning_rate": 8.706896551724138e-06, "loss": 0.8660463690757751, "step": 102 }, { "epoch": 0.13366587586430642, "grad_norm": 0.7070901989936829, "learning_rate": 8.793103448275862e-06, "loss": 0.8430792689323425, "step": 103 }, { "epoch": 0.13496360281444533, "grad_norm": 0.7656417489051819, "learning_rate": 8.879310344827588e-06, "loss": 0.9394369125366211, "step": 104 }, { "epoch": 0.13626132976458422, "grad_norm": 0.7935210466384888, "learning_rate": 8.965517241379312e-06, "loss": 0.9688374400138855, "step": 105 }, { "epoch": 0.13755905671472313, "grad_norm": 0.7562341690063477, "learning_rate": 9.051724137931036e-06, "loss": 0.9521839022636414, "step": 106 }, { "epoch": 0.138856783664862, "grad_norm": 0.7250262498855591, "learning_rate": 9.13793103448276e-06, "loss": 0.8643259406089783, "step": 107 }, { "epoch": 0.14015451061500092, "grad_norm": 0.6959007978439331, "learning_rate": 9.224137931034484e-06, "loss": 0.8793572187423706, "step": 108 }, { "epoch": 0.1414522375651398, "grad_norm": 0.7430084347724915, "learning_rate": 9.310344827586207e-06, "loss": 0.9341211318969727, "step": 109 }, { "epoch": 0.14274996451527872, "grad_norm": 0.7306565046310425, "learning_rate": 9.396551724137931e-06, "loss": 0.9853330850601196, "step": 110 }, { "epoch": 0.1440476914654176, "grad_norm": 0.7661026120185852, "learning_rate": 9.482758620689655e-06, "loss": 0.9588793516159058, "step": 111 }, { "epoch": 0.1453454184155565, "grad_norm": 0.7300426363945007, "learning_rate": 9.56896551724138e-06, "loss": 0.841079831123352, "step": 112 }, { "epoch": 0.1466431453656954, "grad_norm": 0.75968337059021, "learning_rate": 9.655172413793105e-06, "loss": 0.9170314073562622, "step": 113 }, { "epoch": 0.1479408723158343, "grad_norm": 0.7715488076210022, "learning_rate": 9.741379310344829e-06, "loss": 0.8987317681312561, "step": 114 }, { "epoch": 0.1492385992659732, "grad_norm": 0.7421614527702332, "learning_rate": 9.827586206896553e-06, "loss": 0.8786249160766602, "step": 115 }, { "epoch": 0.1505363262161121, "grad_norm": 0.7512428164482117, "learning_rate": 9.913793103448277e-06, "loss": 0.9067139625549316, "step": 116 }, { "epoch": 0.15183405316625098, "grad_norm": 0.7859955430030823, "learning_rate": 1e-05, "loss": 0.9494441747665405, "step": 117 }, { "epoch": 0.1531317801163899, "grad_norm": 0.7653732895851135, "learning_rate": 9.99999488813276e-06, "loss": 0.9384220242500305, "step": 118 }, { "epoch": 0.15442950706652878, "grad_norm": 0.7733427286148071, "learning_rate": 9.999979552541496e-06, "loss": 0.8465048670768738, "step": 119 }, { "epoch": 0.1557272340166677, "grad_norm": 0.7779337167739868, "learning_rate": 9.99995399325756e-06, "loss": 0.8923530578613281, "step": 120 }, { "epoch": 0.15702496096680657, "grad_norm": 0.7559251189231873, "learning_rate": 9.999918210333219e-06, "loss": 0.9556446075439453, "step": 121 }, { "epoch": 0.15832268791694548, "grad_norm": 0.7645485997200012, "learning_rate": 9.999872203841635e-06, "loss": 0.86847323179245, "step": 122 }, { "epoch": 0.15962041486708436, "grad_norm": 0.8056962490081787, "learning_rate": 9.999815973876888e-06, "loss": 0.8974254131317139, "step": 123 }, { "epoch": 0.16091814181722328, "grad_norm": 0.8095734119415283, "learning_rate": 9.999749520553945e-06, "loss": 0.9332894086837769, "step": 124 }, { "epoch": 0.16221586876736216, "grad_norm": 0.7671176791191101, "learning_rate": 9.99967284400869e-06, "loss": 0.8843300342559814, "step": 125 }, { "epoch": 0.16351359571750107, "grad_norm": 0.7800223231315613, "learning_rate": 9.99958594439791e-06, "loss": 0.857262134552002, "step": 126 }, { "epoch": 0.16481132266763995, "grad_norm": 0.7728240489959717, "learning_rate": 9.999488821899286e-06, "loss": 0.8976252675056458, "step": 127 }, { "epoch": 0.16610904961777886, "grad_norm": 0.7575803399085999, "learning_rate": 9.999381476711416e-06, "loss": 0.835990309715271, "step": 128 }, { "epoch": 0.16740677656791775, "grad_norm": 0.8353949785232544, "learning_rate": 9.999263909053789e-06, "loss": 0.8549575805664062, "step": 129 }, { "epoch": 0.16870450351805666, "grad_norm": 0.7656837105751038, "learning_rate": 9.999136119166803e-06, "loss": 0.822762668132782, "step": 130 }, { "epoch": 0.17000223046819554, "grad_norm": 0.7765395045280457, "learning_rate": 9.998998107311758e-06, "loss": 0.9564024209976196, "step": 131 }, { "epoch": 0.17129995741833445, "grad_norm": 0.7651109099388123, "learning_rate": 9.998849873770849e-06, "loss": 0.8914260268211365, "step": 132 }, { "epoch": 0.17259768436847334, "grad_norm": 0.7631571888923645, "learning_rate": 9.998691418847177e-06, "loss": 0.8151357173919678, "step": 133 }, { "epoch": 0.17389541131861225, "grad_norm": 0.8555284142494202, "learning_rate": 9.998522742864745e-06, "loss": 0.9253755211830139, "step": 134 }, { "epoch": 0.17519313826875113, "grad_norm": 0.7372084259986877, "learning_rate": 9.998343846168448e-06, "loss": 0.8820257186889648, "step": 135 }, { "epoch": 0.17649086521889004, "grad_norm": 0.7227676510810852, "learning_rate": 9.998154729124092e-06, "loss": 0.8299266695976257, "step": 136 }, { "epoch": 0.17778859216902893, "grad_norm": 0.7503435015678406, "learning_rate": 9.997955392118365e-06, "loss": 0.8857607245445251, "step": 137 }, { "epoch": 0.17908631911916784, "grad_norm": 0.7888970375061035, "learning_rate": 9.997745835558867e-06, "loss": 0.8129744529724121, "step": 138 }, { "epoch": 0.18038404606930672, "grad_norm": 0.7952077388763428, "learning_rate": 9.997526059874086e-06, "loss": 0.9076676368713379, "step": 139 }, { "epoch": 0.18168177301944563, "grad_norm": 0.7603996396064758, "learning_rate": 9.997296065513405e-06, "loss": 0.90814208984375, "step": 140 }, { "epoch": 0.1829794999695845, "grad_norm": 0.7346363663673401, "learning_rate": 9.997055852947109e-06, "loss": 0.9773215055465698, "step": 141 }, { "epoch": 0.18427722691972342, "grad_norm": 0.780322790145874, "learning_rate": 9.996805422666367e-06, "loss": 0.9135559797286987, "step": 142 }, { "epoch": 0.1855749538698623, "grad_norm": 0.7772685289382935, "learning_rate": 9.99654477518325e-06, "loss": 0.9268802404403687, "step": 143 }, { "epoch": 0.18687268082000122, "grad_norm": 0.725249707698822, "learning_rate": 9.996273911030714e-06, "loss": 0.8543541431427002, "step": 144 }, { "epoch": 0.1881704077701401, "grad_norm": 0.7833412289619446, "learning_rate": 9.995992830762608e-06, "loss": 1.0088237524032593, "step": 145 }, { "epoch": 0.189468134720279, "grad_norm": 0.7379457354545593, "learning_rate": 9.99570153495367e-06, "loss": 0.8407413363456726, "step": 146 }, { "epoch": 0.1907658616704179, "grad_norm": 0.7491745352745056, "learning_rate": 9.995400024199526e-06, "loss": 0.8960580825805664, "step": 147 }, { "epoch": 0.1920635886205568, "grad_norm": 0.7832504510879517, "learning_rate": 9.99508829911669e-06, "loss": 0.9582692980766296, "step": 148 }, { "epoch": 0.1933613155706957, "grad_norm": 0.8017767071723938, "learning_rate": 9.994766360342557e-06, "loss": 0.95718914270401, "step": 149 }, { "epoch": 0.1946590425208346, "grad_norm": 0.6941841840744019, "learning_rate": 9.994434208535415e-06, "loss": 0.8945623636245728, "step": 150 }, { "epoch": 0.19595676947097349, "grad_norm": 0.7576905488967896, "learning_rate": 9.994091844374431e-06, "loss": 0.9543700814247131, "step": 151 }, { "epoch": 0.1972544964211124, "grad_norm": 0.7699536681175232, "learning_rate": 9.993739268559648e-06, "loss": 0.9578084945678711, "step": 152 }, { "epoch": 0.19855222337125128, "grad_norm": 0.7555127143859863, "learning_rate": 9.993376481812001e-06, "loss": 0.9324721693992615, "step": 153 }, { "epoch": 0.1998499503213902, "grad_norm": 0.8148646354675293, "learning_rate": 9.99300348487329e-06, "loss": 0.9196079969406128, "step": 154 }, { "epoch": 0.20114767727152907, "grad_norm": 0.7479966878890991, "learning_rate": 9.992620278506203e-06, "loss": 0.915722131729126, "step": 155 }, { "epoch": 0.20244540422166798, "grad_norm": 0.7261396646499634, "learning_rate": 9.9922268634943e-06, "loss": 0.7793674468994141, "step": 156 }, { "epoch": 0.20374313117180687, "grad_norm": 0.8150226473808289, "learning_rate": 9.991823240642014e-06, "loss": 1.0025370121002197, "step": 157 }, { "epoch": 0.20504085812194578, "grad_norm": 0.7759719491004944, "learning_rate": 9.991409410774654e-06, "loss": 0.9169880747795105, "step": 158 }, { "epoch": 0.2063385850720847, "grad_norm": 0.7141395211219788, "learning_rate": 9.990985374738396e-06, "loss": 0.8525487184524536, "step": 159 }, { "epoch": 0.20763631202222357, "grad_norm": 0.7439554929733276, "learning_rate": 9.990551133400284e-06, "loss": 0.8688797950744629, "step": 160 }, { "epoch": 0.20893403897236248, "grad_norm": 0.7583606839179993, "learning_rate": 9.990106687648234e-06, "loss": 0.8457356095314026, "step": 161 }, { "epoch": 0.21023176592250137, "grad_norm": 0.7045748829841614, "learning_rate": 9.989652038391025e-06, "loss": 0.8181869983673096, "step": 162 }, { "epoch": 0.21152949287264028, "grad_norm": 0.7270849943161011, "learning_rate": 9.9891871865583e-06, "loss": 0.8501678705215454, "step": 163 }, { "epoch": 0.21282721982277916, "grad_norm": 0.7277626991271973, "learning_rate": 9.988712133100563e-06, "loss": 0.9715257287025452, "step": 164 }, { "epoch": 0.21412494677291807, "grad_norm": 0.7333799600601196, "learning_rate": 9.988226878989178e-06, "loss": 0.8683098554611206, "step": 165 }, { "epoch": 0.21542267372305696, "grad_norm": 0.7085330486297607, "learning_rate": 9.987731425216364e-06, "loss": 0.7768368721008301, "step": 166 }, { "epoch": 0.21672040067319587, "grad_norm": 0.751772403717041, "learning_rate": 9.987225772795204e-06, "loss": 0.9315556287765503, "step": 167 }, { "epoch": 0.21801812762333475, "grad_norm": 0.7408215999603271, "learning_rate": 9.986709922759626e-06, "loss": 0.8905249238014221, "step": 168 }, { "epoch": 0.21931585457347366, "grad_norm": 0.6908205151557922, "learning_rate": 9.986183876164412e-06, "loss": 0.7912262678146362, "step": 169 }, { "epoch": 0.22061358152361255, "grad_norm": 0.7142277956008911, "learning_rate": 9.985647634085197e-06, "loss": 0.8391699194908142, "step": 170 }, { "epoch": 0.22191130847375146, "grad_norm": 0.7526142597198486, "learning_rate": 9.985101197618456e-06, "loss": 0.8537833094596863, "step": 171 }, { "epoch": 0.22320903542389034, "grad_norm": 0.7814799547195435, "learning_rate": 9.98454456788152e-06, "loss": 0.8500316143035889, "step": 172 }, { "epoch": 0.22450676237402925, "grad_norm": 0.7822877168655396, "learning_rate": 9.983977746012547e-06, "loss": 0.9026069641113281, "step": 173 }, { "epoch": 0.22580448932416813, "grad_norm": 0.728121817111969, "learning_rate": 9.983400733170553e-06, "loss": 0.8231689929962158, "step": 174 }, { "epoch": 0.22710221627430704, "grad_norm": 0.7749063372612, "learning_rate": 9.982813530535377e-06, "loss": 0.8157176375389099, "step": 175 }, { "epoch": 0.22839994322444593, "grad_norm": 0.7811345458030701, "learning_rate": 9.982216139307705e-06, "loss": 0.9290957450866699, "step": 176 }, { "epoch": 0.22969767017458484, "grad_norm": 0.7119234204292297, "learning_rate": 9.981608560709044e-06, "loss": 0.7803032994270325, "step": 177 }, { "epoch": 0.23099539712472372, "grad_norm": 0.7667236328125, "learning_rate": 9.980990795981747e-06, "loss": 0.9145867824554443, "step": 178 }, { "epoch": 0.23229312407486263, "grad_norm": 0.754831075668335, "learning_rate": 9.980362846388978e-06, "loss": 0.9066612124443054, "step": 179 }, { "epoch": 0.23359085102500152, "grad_norm": 0.7749460935592651, "learning_rate": 9.97972471321474e-06, "loss": 0.889066755771637, "step": 180 }, { "epoch": 0.23488857797514043, "grad_norm": 0.7541179656982422, "learning_rate": 9.979076397763853e-06, "loss": 0.8429720997810364, "step": 181 }, { "epoch": 0.2361863049252793, "grad_norm": 0.7422946095466614, "learning_rate": 9.978417901361958e-06, "loss": 0.9534435272216797, "step": 182 }, { "epoch": 0.23748403187541822, "grad_norm": 0.7609230875968933, "learning_rate": 9.977749225355513e-06, "loss": 0.8388585448265076, "step": 183 }, { "epoch": 0.2387817588255571, "grad_norm": 0.7104296684265137, "learning_rate": 9.977070371111793e-06, "loss": 0.852005660533905, "step": 184 }, { "epoch": 0.24007948577569602, "grad_norm": 0.7545174360275269, "learning_rate": 9.976381340018879e-06, "loss": 0.8911629319190979, "step": 185 }, { "epoch": 0.2413772127258349, "grad_norm": 0.7062454223632812, "learning_rate": 9.97568213348567e-06, "loss": 0.8741620182991028, "step": 186 }, { "epoch": 0.2426749396759738, "grad_norm": 0.7272056937217712, "learning_rate": 9.974972752941861e-06, "loss": 0.8200283050537109, "step": 187 }, { "epoch": 0.2439726666261127, "grad_norm": 0.7598206996917725, "learning_rate": 9.97425319983796e-06, "loss": 0.9287367463111877, "step": 188 }, { "epoch": 0.2452703935762516, "grad_norm": 0.773642897605896, "learning_rate": 9.97352347564527e-06, "loss": 0.8710715174674988, "step": 189 }, { "epoch": 0.2465681205263905, "grad_norm": 0.7518259882926941, "learning_rate": 9.972783581855894e-06, "loss": 0.8873825669288635, "step": 190 }, { "epoch": 0.2478658474765294, "grad_norm": 0.7199155688285828, "learning_rate": 9.972033519982722e-06, "loss": 0.8325483798980713, "step": 191 }, { "epoch": 0.24916357442666828, "grad_norm": 0.7098877429962158, "learning_rate": 9.971273291559447e-06, "loss": 0.9047567248344421, "step": 192 }, { "epoch": 0.25046130137680717, "grad_norm": 0.7787808775901794, "learning_rate": 9.97050289814054e-06, "loss": 0.8593154549598694, "step": 193 }, { "epoch": 0.2517590283269461, "grad_norm": 0.6697104573249817, "learning_rate": 9.969722341301261e-06, "loss": 0.7885489463806152, "step": 194 }, { "epoch": 0.253056755277085, "grad_norm": 0.735886812210083, "learning_rate": 9.968931622637652e-06, "loss": 0.9099982976913452, "step": 195 }, { "epoch": 0.2543544822272239, "grad_norm": 0.7376393675804138, "learning_rate": 9.968130743766533e-06, "loss": 0.8479236364364624, "step": 196 }, { "epoch": 0.25565220917736275, "grad_norm": 0.7209894061088562, "learning_rate": 9.967319706325495e-06, "loss": 0.822541356086731, "step": 197 }, { "epoch": 0.25694993612750167, "grad_norm": 0.7586290240287781, "learning_rate": 9.96649851197291e-06, "loss": 0.8346601128578186, "step": 198 }, { "epoch": 0.2582476630776406, "grad_norm": 0.7636249661445618, "learning_rate": 9.965667162387908e-06, "loss": 0.9417949914932251, "step": 199 }, { "epoch": 0.2595453900277795, "grad_norm": 0.7661371827125549, "learning_rate": 9.964825659270391e-06, "loss": 0.8580296635627747, "step": 200 }, { "epoch": 0.26084311697791834, "grad_norm": 0.7609704732894897, "learning_rate": 9.963974004341019e-06, "loss": 0.8554142117500305, "step": 201 }, { "epoch": 0.26214084392805725, "grad_norm": 0.7663947343826294, "learning_rate": 9.963112199341212e-06, "loss": 0.8991423845291138, "step": 202 }, { "epoch": 0.26343857087819617, "grad_norm": 0.7254887223243713, "learning_rate": 9.96224024603314e-06, "loss": 0.8028452396392822, "step": 203 }, { "epoch": 0.2647362978283351, "grad_norm": 0.7555724382400513, "learning_rate": 9.961358146199729e-06, "loss": 0.8306048512458801, "step": 204 }, { "epoch": 0.26603402477847393, "grad_norm": 0.7537539601325989, "learning_rate": 9.960465901644651e-06, "loss": 0.8942010998725891, "step": 205 }, { "epoch": 0.26733175172861284, "grad_norm": 0.6972079873085022, "learning_rate": 9.959563514192317e-06, "loss": 0.8508002758026123, "step": 206 }, { "epoch": 0.26862947867875175, "grad_norm": 0.7849931716918945, "learning_rate": 9.958650985687884e-06, "loss": 0.9203708171844482, "step": 207 }, { "epoch": 0.26992720562889067, "grad_norm": 0.7529065012931824, "learning_rate": 9.95772831799724e-06, "loss": 0.9082123637199402, "step": 208 }, { "epoch": 0.2712249325790295, "grad_norm": 0.7322463393211365, "learning_rate": 9.956795513007008e-06, "loss": 0.8661291599273682, "step": 209 }, { "epoch": 0.27252265952916843, "grad_norm": 0.8052998185157776, "learning_rate": 9.955852572624538e-06, "loss": 0.9168900847434998, "step": 210 }, { "epoch": 0.27382038647930734, "grad_norm": 0.7243384718894958, "learning_rate": 9.954899498777903e-06, "loss": 0.9119362235069275, "step": 211 }, { "epoch": 0.27511811342944625, "grad_norm": 0.741720974445343, "learning_rate": 9.9539362934159e-06, "loss": 0.8204353451728821, "step": 212 }, { "epoch": 0.2764158403795851, "grad_norm": 0.7546416521072388, "learning_rate": 9.952962958508038e-06, "loss": 0.8421768546104431, "step": 213 }, { "epoch": 0.277713567329724, "grad_norm": 0.7767171859741211, "learning_rate": 9.951979496044544e-06, "loss": 0.8838454484939575, "step": 214 }, { "epoch": 0.27901129427986293, "grad_norm": 0.7499642968177795, "learning_rate": 9.950985908036346e-06, "loss": 0.8892335295677185, "step": 215 }, { "epoch": 0.28030902123000184, "grad_norm": 0.743560254573822, "learning_rate": 9.94998219651508e-06, "loss": 0.8514686226844788, "step": 216 }, { "epoch": 0.2816067481801407, "grad_norm": 0.7842270135879517, "learning_rate": 9.948968363533085e-06, "loss": 0.8559333086013794, "step": 217 }, { "epoch": 0.2829044751302796, "grad_norm": 0.8065765500068665, "learning_rate": 9.947944411163391e-06, "loss": 0.8402450084686279, "step": 218 }, { "epoch": 0.2842022020804185, "grad_norm": 0.7745425701141357, "learning_rate": 9.946910341499722e-06, "loss": 0.8811400532722473, "step": 219 }, { "epoch": 0.28549992903055743, "grad_norm": 0.7459537982940674, "learning_rate": 9.945866156656487e-06, "loss": 0.8989254236221313, "step": 220 }, { "epoch": 0.2867976559806963, "grad_norm": 0.7783303260803223, "learning_rate": 9.944811858768782e-06, "loss": 0.8823436498641968, "step": 221 }, { "epoch": 0.2880953829308352, "grad_norm": 0.8013091087341309, "learning_rate": 9.943747449992379e-06, "loss": 0.9165104627609253, "step": 222 }, { "epoch": 0.2893931098809741, "grad_norm": 0.7300430536270142, "learning_rate": 9.942672932503722e-06, "loss": 0.8838682174682617, "step": 223 }, { "epoch": 0.290690836831113, "grad_norm": 0.7317079305648804, "learning_rate": 9.941588308499932e-06, "loss": 0.9152242541313171, "step": 224 }, { "epoch": 0.2919885637812519, "grad_norm": 0.7530835866928101, "learning_rate": 9.940493580198787e-06, "loss": 0.8324201703071594, "step": 225 }, { "epoch": 0.2932862907313908, "grad_norm": 0.7751577496528625, "learning_rate": 9.93938874983873e-06, "loss": 0.8113334774971008, "step": 226 }, { "epoch": 0.2945840176815297, "grad_norm": 0.7621341347694397, "learning_rate": 9.93827381967886e-06, "loss": 0.8741900324821472, "step": 227 }, { "epoch": 0.2958817446316686, "grad_norm": 0.7917994260787964, "learning_rate": 9.937148791998926e-06, "loss": 0.8467355966567993, "step": 228 }, { "epoch": 0.29717947158180746, "grad_norm": 0.7442567348480225, "learning_rate": 9.936013669099326e-06, "loss": 0.8748873472213745, "step": 229 }, { "epoch": 0.2984771985319464, "grad_norm": 0.7540955543518066, "learning_rate": 9.9348684533011e-06, "loss": 0.9296368360519409, "step": 230 }, { "epoch": 0.2984771985319464, "eval_loss": 0.8298861980438232, "eval_runtime": 147.7609, "eval_samples_per_second": 35.138, "eval_steps_per_second": 8.784, "step": 230 }, { "epoch": 0.2997749254820853, "grad_norm": 0.7888861894607544, "learning_rate": 9.93371314694592e-06, "loss": 0.8800599575042725, "step": 231 }, { "epoch": 0.3010726524322242, "grad_norm": 0.7253603935241699, "learning_rate": 9.9325477523961e-06, "loss": 0.8651221990585327, "step": 232 }, { "epoch": 0.30237037938236305, "grad_norm": 0.722152054309845, "learning_rate": 9.931372272034573e-06, "loss": 0.8784995079040527, "step": 233 }, { "epoch": 0.30366810633250196, "grad_norm": 0.762865424156189, "learning_rate": 9.930186708264902e-06, "loss": 0.8278964161872864, "step": 234 }, { "epoch": 0.3049658332826409, "grad_norm": 0.7679449319839478, "learning_rate": 9.928991063511264e-06, "loss": 0.8578440546989441, "step": 235 }, { "epoch": 0.3062635602327798, "grad_norm": 0.747564971446991, "learning_rate": 9.927785340218448e-06, "loss": 0.8361713290214539, "step": 236 }, { "epoch": 0.3075612871829187, "grad_norm": 0.7898075580596924, "learning_rate": 9.926569540851856e-06, "loss": 0.9254142045974731, "step": 237 }, { "epoch": 0.30885901413305755, "grad_norm": 0.7446305751800537, "learning_rate": 9.925343667897487e-06, "loss": 0.8479958772659302, "step": 238 }, { "epoch": 0.31015674108319646, "grad_norm": 0.7183104157447815, "learning_rate": 9.924107723861944e-06, "loss": 0.8849471807479858, "step": 239 }, { "epoch": 0.3114544680333354, "grad_norm": 0.7643358707427979, "learning_rate": 9.922861711272417e-06, "loss": 0.9337932467460632, "step": 240 }, { "epoch": 0.3127521949834743, "grad_norm": 0.7303009629249573, "learning_rate": 9.921605632676688e-06, "loss": 0.8316428661346436, "step": 241 }, { "epoch": 0.31404992193361314, "grad_norm": 0.7191450595855713, "learning_rate": 9.920339490643119e-06, "loss": 0.7859864830970764, "step": 242 }, { "epoch": 0.31534764888375205, "grad_norm": 0.6739291548728943, "learning_rate": 9.91906328776065e-06, "loss": 0.7918820977210999, "step": 243 }, { "epoch": 0.31664537583389096, "grad_norm": 0.7382656335830688, "learning_rate": 9.917777026638794e-06, "loss": 0.8608056306838989, "step": 244 }, { "epoch": 0.3179431027840299, "grad_norm": 0.7110844254493713, "learning_rate": 9.916480709907626e-06, "loss": 0.7915103435516357, "step": 245 }, { "epoch": 0.31924082973416873, "grad_norm": 0.7153676748275757, "learning_rate": 9.91517434021779e-06, "loss": 0.8111603856086731, "step": 246 }, { "epoch": 0.32053855668430764, "grad_norm": 0.7526156902313232, "learning_rate": 9.913857920240481e-06, "loss": 0.9535894989967346, "step": 247 }, { "epoch": 0.32183628363444655, "grad_norm": 0.7110179662704468, "learning_rate": 9.912531452667441e-06, "loss": 0.8099456429481506, "step": 248 }, { "epoch": 0.32313401058458546, "grad_norm": 0.7142106890678406, "learning_rate": 9.911194940210964e-06, "loss": 0.8379517793655396, "step": 249 }, { "epoch": 0.3244317375347243, "grad_norm": 0.7095869183540344, "learning_rate": 9.909848385603878e-06, "loss": 0.9001181125640869, "step": 250 }, { "epoch": 0.32572946448486323, "grad_norm": 0.7466822266578674, "learning_rate": 9.908491791599546e-06, "loss": 0.8450853824615479, "step": 251 }, { "epoch": 0.32702719143500214, "grad_norm": 0.75234055519104, "learning_rate": 9.90712516097186e-06, "loss": 0.9213320016860962, "step": 252 }, { "epoch": 0.32832491838514105, "grad_norm": 0.7801260948181152, "learning_rate": 9.905748496515235e-06, "loss": 0.8934061527252197, "step": 253 }, { "epoch": 0.3296226453352799, "grad_norm": 0.7624495029449463, "learning_rate": 9.904361801044599e-06, "loss": 0.9207302927970886, "step": 254 }, { "epoch": 0.3309203722854188, "grad_norm": 0.7547022700309753, "learning_rate": 9.902965077395395e-06, "loss": 0.8502051830291748, "step": 255 }, { "epoch": 0.33221809923555773, "grad_norm": 0.7386295795440674, "learning_rate": 9.901558328423568e-06, "loss": 0.9277836680412292, "step": 256 }, { "epoch": 0.33351582618569664, "grad_norm": 0.7245653867721558, "learning_rate": 9.900141557005567e-06, "loss": 0.841809868812561, "step": 257 }, { "epoch": 0.3348135531358355, "grad_norm": 0.7325631380081177, "learning_rate": 9.898714766038326e-06, "loss": 0.869012713432312, "step": 258 }, { "epoch": 0.3361112800859744, "grad_norm": 0.759433925151825, "learning_rate": 9.897277958439274e-06, "loss": 0.9291174411773682, "step": 259 }, { "epoch": 0.3374090070361133, "grad_norm": 0.757870614528656, "learning_rate": 9.895831137146319e-06, "loss": 0.8791542053222656, "step": 260 }, { "epoch": 0.33870673398625223, "grad_norm": 0.7061610817909241, "learning_rate": 9.894374305117844e-06, "loss": 0.8448185324668884, "step": 261 }, { "epoch": 0.3400044609363911, "grad_norm": 0.7048200368881226, "learning_rate": 9.892907465332702e-06, "loss": 0.8903120756149292, "step": 262 }, { "epoch": 0.34130218788653, "grad_norm": 0.7492517232894897, "learning_rate": 9.891430620790208e-06, "loss": 0.7949280142784119, "step": 263 }, { "epoch": 0.3425999148366689, "grad_norm": 0.7763856053352356, "learning_rate": 9.889943774510136e-06, "loss": 0.88750821352005, "step": 264 }, { "epoch": 0.3438976417868078, "grad_norm": 0.7783463597297668, "learning_rate": 9.888446929532712e-06, "loss": 0.8366783261299133, "step": 265 }, { "epoch": 0.3451953687369467, "grad_norm": 0.7750303745269775, "learning_rate": 9.886940088918601e-06, "loss": 0.9069842100143433, "step": 266 }, { "epoch": 0.3464930956870856, "grad_norm": 0.7684900760650635, "learning_rate": 9.885423255748916e-06, "loss": 0.8415606021881104, "step": 267 }, { "epoch": 0.3477908226372245, "grad_norm": 0.764328122138977, "learning_rate": 9.883896433125193e-06, "loss": 0.8657584190368652, "step": 268 }, { "epoch": 0.3490885495873634, "grad_norm": 0.7822588682174683, "learning_rate": 9.8823596241694e-06, "loss": 0.8536877632141113, "step": 269 }, { "epoch": 0.35038627653750226, "grad_norm": 0.7310823202133179, "learning_rate": 9.88081283202392e-06, "loss": 0.8020272254943848, "step": 270 }, { "epoch": 0.3516840034876412, "grad_norm": 0.717753529548645, "learning_rate": 9.879256059851553e-06, "loss": 0.8834531307220459, "step": 271 }, { "epoch": 0.3529817304377801, "grad_norm": 0.7358869314193726, "learning_rate": 9.877689310835503e-06, "loss": 0.9175652861595154, "step": 272 }, { "epoch": 0.354279457387919, "grad_norm": 0.7447776198387146, "learning_rate": 9.876112588179378e-06, "loss": 0.8402864933013916, "step": 273 }, { "epoch": 0.35557718433805785, "grad_norm": 0.7582140564918518, "learning_rate": 9.874525895107175e-06, "loss": 0.8564626574516296, "step": 274 }, { "epoch": 0.35687491128819676, "grad_norm": 0.7297621965408325, "learning_rate": 9.872929234863277e-06, "loss": 0.8620203137397766, "step": 275 }, { "epoch": 0.35817263823833567, "grad_norm": 0.6891196966171265, "learning_rate": 9.871322610712452e-06, "loss": 0.795127809047699, "step": 276 }, { "epoch": 0.3594703651884746, "grad_norm": 0.7053247690200806, "learning_rate": 9.869706025939843e-06, "loss": 0.7824152112007141, "step": 277 }, { "epoch": 0.36076809213861344, "grad_norm": 0.7359561324119568, "learning_rate": 9.868079483850955e-06, "loss": 0.8618420958518982, "step": 278 }, { "epoch": 0.36206581908875235, "grad_norm": 0.7968899011611938, "learning_rate": 9.86644298777165e-06, "loss": 0.8965823650360107, "step": 279 }, { "epoch": 0.36336354603889126, "grad_norm": 0.762060284614563, "learning_rate": 9.864796541048155e-06, "loss": 0.9179255366325378, "step": 280 }, { "epoch": 0.36466127298903017, "grad_norm": 0.7475153207778931, "learning_rate": 9.863140147047034e-06, "loss": 0.8677244782447815, "step": 281 }, { "epoch": 0.365958999939169, "grad_norm": 0.7659401297569275, "learning_rate": 9.861473809155192e-06, "loss": 0.8148896098136902, "step": 282 }, { "epoch": 0.36725672688930794, "grad_norm": 0.7381513118743896, "learning_rate": 9.859797530779871e-06, "loss": 0.7781128883361816, "step": 283 }, { "epoch": 0.36855445383944685, "grad_norm": 0.7533836364746094, "learning_rate": 9.858111315348633e-06, "loss": 0.9026414752006531, "step": 284 }, { "epoch": 0.36985218078958576, "grad_norm": 0.7744868993759155, "learning_rate": 9.856415166309365e-06, "loss": 0.8830351233482361, "step": 285 }, { "epoch": 0.3711499077397246, "grad_norm": 0.7385957837104797, "learning_rate": 9.854709087130261e-06, "loss": 0.8755077123641968, "step": 286 }, { "epoch": 0.3724476346898635, "grad_norm": 0.7735070586204529, "learning_rate": 9.852993081299821e-06, "loss": 0.8712087869644165, "step": 287 }, { "epoch": 0.37374536164000244, "grad_norm": 0.7552995085716248, "learning_rate": 9.851267152326842e-06, "loss": 0.8405476808547974, "step": 288 }, { "epoch": 0.37504308859014135, "grad_norm": 0.7868830561637878, "learning_rate": 9.849531303740414e-06, "loss": 0.8763223886489868, "step": 289 }, { "epoch": 0.3763408155402802, "grad_norm": 0.7332586646080017, "learning_rate": 9.847785539089904e-06, "loss": 0.8512204885482788, "step": 290 }, { "epoch": 0.3776385424904191, "grad_norm": 0.75746089220047, "learning_rate": 9.846029861944964e-06, "loss": 0.8664734363555908, "step": 291 }, { "epoch": 0.378936269440558, "grad_norm": 0.7325623035430908, "learning_rate": 9.844264275895505e-06, "loss": 0.8830072283744812, "step": 292 }, { "epoch": 0.38023399639069694, "grad_norm": 0.7399287223815918, "learning_rate": 9.842488784551707e-06, "loss": 0.8711655735969543, "step": 293 }, { "epoch": 0.3815317233408358, "grad_norm": 0.7357679605484009, "learning_rate": 9.840703391543999e-06, "loss": 0.7781684994697571, "step": 294 }, { "epoch": 0.3828294502909747, "grad_norm": 0.7346934080123901, "learning_rate": 9.838908100523056e-06, "loss": 0.7901777029037476, "step": 295 }, { "epoch": 0.3841271772411136, "grad_norm": 0.7595230937004089, "learning_rate": 9.837102915159797e-06, "loss": 0.8042920827865601, "step": 296 }, { "epoch": 0.3854249041912525, "grad_norm": 0.7617968320846558, "learning_rate": 9.835287839145366e-06, "loss": 0.9179428815841675, "step": 297 }, { "epoch": 0.3867226311413914, "grad_norm": 0.7399303317070007, "learning_rate": 9.833462876191138e-06, "loss": 0.8573108911514282, "step": 298 }, { "epoch": 0.3880203580915303, "grad_norm": 0.7359921932220459, "learning_rate": 9.831628030028698e-06, "loss": 0.7790151238441467, "step": 299 }, { "epoch": 0.3893180850416692, "grad_norm": 0.7458881139755249, "learning_rate": 9.829783304409838e-06, "loss": 0.839961588382721, "step": 300 }, { "epoch": 0.3906158119918081, "grad_norm": 0.7518510222434998, "learning_rate": 9.827928703106562e-06, "loss": 0.9060590267181396, "step": 301 }, { "epoch": 0.39191353894194697, "grad_norm": 0.7119861841201782, "learning_rate": 9.826064229911056e-06, "loss": 0.8198513984680176, "step": 302 }, { "epoch": 0.3932112658920859, "grad_norm": 0.716594934463501, "learning_rate": 9.824189888635699e-06, "loss": 0.793634295463562, "step": 303 }, { "epoch": 0.3945089928422248, "grad_norm": 0.7235651612281799, "learning_rate": 9.82230568311304e-06, "loss": 0.8697519302368164, "step": 304 }, { "epoch": 0.3958067197923637, "grad_norm": 0.7011069655418396, "learning_rate": 9.820411617195807e-06, "loss": 0.7747907638549805, "step": 305 }, { "epoch": 0.39710444674250256, "grad_norm": 0.7803413271903992, "learning_rate": 9.818507694756883e-06, "loss": 0.9115470051765442, "step": 306 }, { "epoch": 0.39840217369264147, "grad_norm": 0.7196638584136963, "learning_rate": 9.816593919689305e-06, "loss": 0.8010602593421936, "step": 307 }, { "epoch": 0.3996999006427804, "grad_norm": 0.7336106896400452, "learning_rate": 9.814670295906265e-06, "loss": 0.8578990697860718, "step": 308 }, { "epoch": 0.4009976275929193, "grad_norm": 0.7282925844192505, "learning_rate": 9.81273682734108e-06, "loss": 0.8971216678619385, "step": 309 }, { "epoch": 0.40229535454305815, "grad_norm": 0.7463446259498596, "learning_rate": 9.81079351794721e-06, "loss": 0.8058433532714844, "step": 310 }, { "epoch": 0.40359308149319706, "grad_norm": 0.7901847958564758, "learning_rate": 9.808840371698226e-06, "loss": 0.896103024482727, "step": 311 }, { "epoch": 0.40489080844333597, "grad_norm": 0.72321617603302, "learning_rate": 9.80687739258782e-06, "loss": 0.8004294037818909, "step": 312 }, { "epoch": 0.4061885353934749, "grad_norm": 0.7592387199401855, "learning_rate": 9.804904584629786e-06, "loss": 0.8932079076766968, "step": 313 }, { "epoch": 0.40748626234361374, "grad_norm": 0.7692825198173523, "learning_rate": 9.80292195185802e-06, "loss": 0.8546319603919983, "step": 314 }, { "epoch": 0.40878398929375265, "grad_norm": 0.7320608496665955, "learning_rate": 9.800929498326502e-06, "loss": 0.8395287990570068, "step": 315 }, { "epoch": 0.41008171624389156, "grad_norm": 0.7121133208274841, "learning_rate": 9.798927228109294e-06, "loss": 0.8351479768753052, "step": 316 }, { "epoch": 0.41137944319403047, "grad_norm": 0.7649046778678894, "learning_rate": 9.796915145300534e-06, "loss": 0.90168696641922, "step": 317 }, { "epoch": 0.4126771701441694, "grad_norm": 0.759292721748352, "learning_rate": 9.794893254014421e-06, "loss": 0.8341972827911377, "step": 318 }, { "epoch": 0.41397489709430824, "grad_norm": 0.7619395852088928, "learning_rate": 9.792861558385212e-06, "loss": 0.8608151078224182, "step": 319 }, { "epoch": 0.41527262404444715, "grad_norm": 0.8039480447769165, "learning_rate": 9.790820062567208e-06, "loss": 0.8884308934211731, "step": 320 }, { "epoch": 0.41657035099458606, "grad_norm": 0.7317516207695007, "learning_rate": 9.788768770734753e-06, "loss": 0.8331541419029236, "step": 321 }, { "epoch": 0.41786807794472497, "grad_norm": 0.6900208592414856, "learning_rate": 9.78670768708222e-06, "loss": 0.7963373064994812, "step": 322 }, { "epoch": 0.4191658048948638, "grad_norm": 0.748304545879364, "learning_rate": 9.784636815824003e-06, "loss": 0.8610258102416992, "step": 323 }, { "epoch": 0.42046353184500274, "grad_norm": 0.7829216718673706, "learning_rate": 9.782556161194508e-06, "loss": 0.8490146994590759, "step": 324 }, { "epoch": 0.42176125879514165, "grad_norm": 0.7254793047904968, "learning_rate": 9.78046572744815e-06, "loss": 0.8324488401412964, "step": 325 }, { "epoch": 0.42305898574528056, "grad_norm": 0.7239816784858704, "learning_rate": 9.778365518859334e-06, "loss": 0.745983362197876, "step": 326 }, { "epoch": 0.4243567126954194, "grad_norm": 0.7268423438072205, "learning_rate": 9.776255539722457e-06, "loss": 0.8977281451225281, "step": 327 }, { "epoch": 0.4256544396455583, "grad_norm": 0.7335854172706604, "learning_rate": 9.774135794351892e-06, "loss": 0.8936282396316528, "step": 328 }, { "epoch": 0.42695216659569724, "grad_norm": 0.7278575897216797, "learning_rate": 9.77200628708198e-06, "loss": 0.7703112959861755, "step": 329 }, { "epoch": 0.42824989354583615, "grad_norm": 0.7280608415603638, "learning_rate": 9.769867022267028e-06, "loss": 0.9051704406738281, "step": 330 }, { "epoch": 0.429547620495975, "grad_norm": 0.7283042073249817, "learning_rate": 9.767718004281288e-06, "loss": 0.8391573429107666, "step": 331 }, { "epoch": 0.4308453474461139, "grad_norm": 0.7032507658004761, "learning_rate": 9.765559237518958e-06, "loss": 0.7801662087440491, "step": 332 }, { "epoch": 0.4321430743962528, "grad_norm": 0.7365294694900513, "learning_rate": 9.763390726394171e-06, "loss": 0.8532007932662964, "step": 333 }, { "epoch": 0.43344080134639174, "grad_norm": 0.7675997018814087, "learning_rate": 9.761212475340982e-06, "loss": 0.8515892624855042, "step": 334 }, { "epoch": 0.4347385282965306, "grad_norm": 0.7382540702819824, "learning_rate": 9.759024488813364e-06, "loss": 0.7981022596359253, "step": 335 }, { "epoch": 0.4360362552466695, "grad_norm": 0.7753154039382935, "learning_rate": 9.756826771285195e-06, "loss": 0.7946685552597046, "step": 336 }, { "epoch": 0.4373339821968084, "grad_norm": 0.6813466548919678, "learning_rate": 9.754619327250253e-06, "loss": 0.7935984134674072, "step": 337 }, { "epoch": 0.4386317091469473, "grad_norm": 0.7297438979148865, "learning_rate": 9.7524021612222e-06, "loss": 0.8265496492385864, "step": 338 }, { "epoch": 0.4399294360970862, "grad_norm": 0.7127614617347717, "learning_rate": 9.750175277734582e-06, "loss": 0.8402823209762573, "step": 339 }, { "epoch": 0.4412271630472251, "grad_norm": 0.7071698904037476, "learning_rate": 9.747938681340807e-06, "loss": 0.7340988516807556, "step": 340 }, { "epoch": 0.442524889997364, "grad_norm": 0.7566197514533997, "learning_rate": 9.745692376614154e-06, "loss": 0.8520500063896179, "step": 341 }, { "epoch": 0.4438226169475029, "grad_norm": 0.7088791728019714, "learning_rate": 9.743436368147745e-06, "loss": 0.7458124756813049, "step": 342 }, { "epoch": 0.44512034389764177, "grad_norm": 0.7586151361465454, "learning_rate": 9.741170660554548e-06, "loss": 0.83961021900177, "step": 343 }, { "epoch": 0.4464180708477807, "grad_norm": 0.7471207976341248, "learning_rate": 9.73889525846736e-06, "loss": 0.8236647844314575, "step": 344 }, { "epoch": 0.4477157977979196, "grad_norm": 0.7463803887367249, "learning_rate": 9.736610166538802e-06, "loss": 0.9211429357528687, "step": 345 }, { "epoch": 0.4490135247480585, "grad_norm": 0.7657600045204163, "learning_rate": 9.73431538944131e-06, "loss": 0.9619902968406677, "step": 346 }, { "epoch": 0.45031125169819736, "grad_norm": 0.7406160831451416, "learning_rate": 9.73201093186712e-06, "loss": 0.869321346282959, "step": 347 }, { "epoch": 0.45160897864833627, "grad_norm": 0.7429345846176147, "learning_rate": 9.729696798528268e-06, "loss": 0.8034260272979736, "step": 348 }, { "epoch": 0.4529067055984752, "grad_norm": 0.7394105195999146, "learning_rate": 9.727372994156568e-06, "loss": 0.8107494115829468, "step": 349 }, { "epoch": 0.4542044325486141, "grad_norm": 0.740679919719696, "learning_rate": 9.725039523503615e-06, "loss": 0.8520351052284241, "step": 350 }, { "epoch": 0.45550215949875295, "grad_norm": 0.7290498614311218, "learning_rate": 9.722696391340762e-06, "loss": 0.768467903137207, "step": 351 }, { "epoch": 0.45679988644889186, "grad_norm": 0.703825056552887, "learning_rate": 9.720343602459123e-06, "loss": 0.8336674571037292, "step": 352 }, { "epoch": 0.45809761339903077, "grad_norm": 0.7116036415100098, "learning_rate": 9.717981161669556e-06, "loss": 0.8118845820426941, "step": 353 }, { "epoch": 0.4593953403491697, "grad_norm": 0.7270032167434692, "learning_rate": 9.715609073802653e-06, "loss": 0.902847945690155, "step": 354 }, { "epoch": 0.46069306729930853, "grad_norm": 0.7389693260192871, "learning_rate": 9.713227343708737e-06, "loss": 0.8098748326301575, "step": 355 }, { "epoch": 0.46199079424944745, "grad_norm": 0.7012364268302917, "learning_rate": 9.71083597625784e-06, "loss": 0.7249214053153992, "step": 356 }, { "epoch": 0.46328852119958636, "grad_norm": 0.7171372175216675, "learning_rate": 9.708434976339704e-06, "loss": 0.8786948323249817, "step": 357 }, { "epoch": 0.46458624814972527, "grad_norm": 0.7394458055496216, "learning_rate": 9.706024348863766e-06, "loss": 0.8580265045166016, "step": 358 }, { "epoch": 0.4658839750998641, "grad_norm": 0.7083274722099304, "learning_rate": 9.703604098759148e-06, "loss": 0.8410412073135376, "step": 359 }, { "epoch": 0.46718170205000303, "grad_norm": 0.712253749370575, "learning_rate": 9.70117423097465e-06, "loss": 0.8000701665878296, "step": 360 }, { "epoch": 0.46847942900014194, "grad_norm": 0.7887440919876099, "learning_rate": 9.698734750478739e-06, "loss": 0.8630033731460571, "step": 361 }, { "epoch": 0.46977715595028086, "grad_norm": 0.7340478897094727, "learning_rate": 9.69628566225953e-06, "loss": 0.8977490663528442, "step": 362 }, { "epoch": 0.4710748829004197, "grad_norm": 0.7779148817062378, "learning_rate": 9.693826971324793e-06, "loss": 0.832167387008667, "step": 363 }, { "epoch": 0.4723726098505586, "grad_norm": 0.7122468948364258, "learning_rate": 9.691358682701927e-06, "loss": 0.857900857925415, "step": 364 }, { "epoch": 0.47367033680069753, "grad_norm": 0.7308494448661804, "learning_rate": 9.688880801437957e-06, "loss": 0.8315039873123169, "step": 365 }, { "epoch": 0.47496806375083644, "grad_norm": 0.7546078562736511, "learning_rate": 9.686393332599525e-06, "loss": 0.8919548988342285, "step": 366 }, { "epoch": 0.4762657907009753, "grad_norm": 0.7366290092468262, "learning_rate": 9.683896281272872e-06, "loss": 0.8298309445381165, "step": 367 }, { "epoch": 0.4775635176511142, "grad_norm": 0.7167381644248962, "learning_rate": 9.681389652563837e-06, "loss": 0.7846927642822266, "step": 368 }, { "epoch": 0.4788612446012531, "grad_norm": 0.7552225589752197, "learning_rate": 9.678873451597843e-06, "loss": 0.8357439637184143, "step": 369 }, { "epoch": 0.48015897155139203, "grad_norm": 0.7224664092063904, "learning_rate": 9.676347683519882e-06, "loss": 0.81117844581604, "step": 370 }, { "epoch": 0.4814566985015309, "grad_norm": 0.7118367552757263, "learning_rate": 9.673812353494513e-06, "loss": 0.8407114148139954, "step": 371 }, { "epoch": 0.4827544254516698, "grad_norm": 0.769687294960022, "learning_rate": 9.671267466705841e-06, "loss": 0.8985891342163086, "step": 372 }, { "epoch": 0.4840521524018087, "grad_norm": 0.7327465415000916, "learning_rate": 9.668713028357518e-06, "loss": 0.8324825167655945, "step": 373 }, { "epoch": 0.4853498793519476, "grad_norm": 0.7223183512687683, "learning_rate": 9.666149043672724e-06, "loss": 0.8175463080406189, "step": 374 }, { "epoch": 0.4866476063020865, "grad_norm": 0.7580711245536804, "learning_rate": 9.663575517894155e-06, "loss": 0.8757423162460327, "step": 375 }, { "epoch": 0.4879453332522254, "grad_norm": 0.7004430890083313, "learning_rate": 9.660992456284024e-06, "loss": 0.7426130771636963, "step": 376 }, { "epoch": 0.4892430602023643, "grad_norm": 0.7414857149124146, "learning_rate": 9.658399864124037e-06, "loss": 0.8230442404747009, "step": 377 }, { "epoch": 0.4905407871525032, "grad_norm": 0.7824345231056213, "learning_rate": 9.655797746715388e-06, "loss": 0.9073444604873657, "step": 378 }, { "epoch": 0.49183851410264207, "grad_norm": 0.7817385792732239, "learning_rate": 9.65318610937875e-06, "loss": 0.8738477826118469, "step": 379 }, { "epoch": 0.493136241052781, "grad_norm": 0.7348605394363403, "learning_rate": 9.650564957454258e-06, "loss": 0.8755439519882202, "step": 380 }, { "epoch": 0.4944339680029199, "grad_norm": 0.7649983763694763, "learning_rate": 9.647934296301506e-06, "loss": 1.0017733573913574, "step": 381 }, { "epoch": 0.4957316949530588, "grad_norm": 0.735651969909668, "learning_rate": 9.64529413129953e-06, "loss": 0.8582989573478699, "step": 382 }, { "epoch": 0.49702942190319765, "grad_norm": 0.7229171395301819, "learning_rate": 9.642644467846799e-06, "loss": 0.8139792680740356, "step": 383 }, { "epoch": 0.49832714885333657, "grad_norm": 0.7279280424118042, "learning_rate": 9.639985311361202e-06, "loss": 0.8503962159156799, "step": 384 }, { "epoch": 0.4996248758034755, "grad_norm": 0.7445419430732727, "learning_rate": 9.637316667280046e-06, "loss": 0.9100584387779236, "step": 385 }, { "epoch": 0.5009226027536143, "grad_norm": 0.7147620916366577, "learning_rate": 9.634638541060027e-06, "loss": 0.8684252500534058, "step": 386 }, { "epoch": 0.5022203297037533, "grad_norm": 0.7348777055740356, "learning_rate": 9.63195093817724e-06, "loss": 0.8720637559890747, "step": 387 }, { "epoch": 0.5035180566538922, "grad_norm": 0.743008553981781, "learning_rate": 9.62925386412715e-06, "loss": 0.8478811979293823, "step": 388 }, { "epoch": 0.504815783604031, "grad_norm": 0.7702372670173645, "learning_rate": 9.626547324424592e-06, "loss": 0.8293808102607727, "step": 389 }, { "epoch": 0.50611351055417, "grad_norm": 0.8113992810249329, "learning_rate": 9.623831324603755e-06, "loss": 0.9317741990089417, "step": 390 }, { "epoch": 0.5074112375043088, "grad_norm": 0.7273135781288147, "learning_rate": 9.621105870218167e-06, "loss": 0.840066134929657, "step": 391 }, { "epoch": 0.5087089644544478, "grad_norm": 0.7781491875648499, "learning_rate": 9.618370966840698e-06, "loss": 0.8440302610397339, "step": 392 }, { "epoch": 0.5100066914045867, "grad_norm": 0.7643720507621765, "learning_rate": 9.615626620063531e-06, "loss": 0.7832410335540771, "step": 393 }, { "epoch": 0.5113044183547255, "grad_norm": 0.7325314283370972, "learning_rate": 9.61287283549816e-06, "loss": 0.8389537334442139, "step": 394 }, { "epoch": 0.5126021453048645, "grad_norm": 0.7445440292358398, "learning_rate": 9.610109618775379e-06, "loss": 0.8239348530769348, "step": 395 }, { "epoch": 0.5138998722550033, "grad_norm": 0.7088785767555237, "learning_rate": 9.607336975545264e-06, "loss": 0.8060757517814636, "step": 396 }, { "epoch": 0.5151975992051423, "grad_norm": 0.8243458271026611, "learning_rate": 9.604554911477173e-06, "loss": 0.8679091334342957, "step": 397 }, { "epoch": 0.5164953261552812, "grad_norm": 0.8044202327728271, "learning_rate": 9.601763432259716e-06, "loss": 0.8719264268875122, "step": 398 }, { "epoch": 0.51779305310542, "grad_norm": 0.6826696991920471, "learning_rate": 9.59896254360077e-06, "loss": 0.8089145421981812, "step": 399 }, { "epoch": 0.519090780055559, "grad_norm": 0.7384952306747437, "learning_rate": 9.596152251227438e-06, "loss": 0.8086674213409424, "step": 400 }, { "epoch": 0.5203885070056978, "grad_norm": 0.7176761031150818, "learning_rate": 9.593332560886055e-06, "loss": 0.8248165845870972, "step": 401 }, { "epoch": 0.5216862339558367, "grad_norm": 0.7176565527915955, "learning_rate": 9.59050347834218e-06, "loss": 0.7899567484855652, "step": 402 }, { "epoch": 0.5229839609059757, "grad_norm": 0.7689147591590881, "learning_rate": 9.587665009380565e-06, "loss": 0.8428173065185547, "step": 403 }, { "epoch": 0.5242816878561145, "grad_norm": 0.7173596024513245, "learning_rate": 9.584817159805164e-06, "loss": 0.8865775465965271, "step": 404 }, { "epoch": 0.5255794148062535, "grad_norm": 0.742651641368866, "learning_rate": 9.58195993543911e-06, "loss": 0.8390700817108154, "step": 405 }, { "epoch": 0.5268771417563923, "grad_norm": 0.8015622496604919, "learning_rate": 9.579093342124699e-06, "loss": 0.8883824944496155, "step": 406 }, { "epoch": 0.5281748687065312, "grad_norm": 0.7367856502532959, "learning_rate": 9.576217385723391e-06, "loss": 0.7946388125419617, "step": 407 }, { "epoch": 0.5294725956566702, "grad_norm": 0.6854470372200012, "learning_rate": 9.57333207211579e-06, "loss": 0.7881499528884888, "step": 408 }, { "epoch": 0.530770322606809, "grad_norm": 0.7024526596069336, "learning_rate": 9.57043740720163e-06, "loss": 0.840889036655426, "step": 409 }, { "epoch": 0.5320680495569479, "grad_norm": 0.7624524235725403, "learning_rate": 9.567533396899769e-06, "loss": 0.8071510791778564, "step": 410 }, { "epoch": 0.5333657765070868, "grad_norm": 0.7562685012817383, "learning_rate": 9.564620047148174e-06, "loss": 0.8523662090301514, "step": 411 }, { "epoch": 0.5346635034572257, "grad_norm": 0.7493570446968079, "learning_rate": 9.561697363903908e-06, "loss": 0.8793864846229553, "step": 412 }, { "epoch": 0.5359612304073647, "grad_norm": 0.7925007343292236, "learning_rate": 9.558765353143116e-06, "loss": 0.8957589268684387, "step": 413 }, { "epoch": 0.5372589573575035, "grad_norm": 0.7443920969963074, "learning_rate": 9.555824020861022e-06, "loss": 0.8397520184516907, "step": 414 }, { "epoch": 0.5385566843076424, "grad_norm": 0.7451807260513306, "learning_rate": 9.5528733730719e-06, "loss": 0.8264951705932617, "step": 415 }, { "epoch": 0.5398544112577813, "grad_norm": 0.756137490272522, "learning_rate": 9.549913415809084e-06, "loss": 0.7959051728248596, "step": 416 }, { "epoch": 0.5411521382079202, "grad_norm": 0.6989119052886963, "learning_rate": 9.546944155124935e-06, "loss": 0.8344296813011169, "step": 417 }, { "epoch": 0.542449865158059, "grad_norm": 0.7302668690681458, "learning_rate": 9.54396559709084e-06, "loss": 0.8438407778739929, "step": 418 }, { "epoch": 0.543747592108198, "grad_norm": 0.7969427704811096, "learning_rate": 9.540977747797194e-06, "loss": 0.7917635440826416, "step": 419 }, { "epoch": 0.5450453190583369, "grad_norm": 0.7352253794670105, "learning_rate": 9.537980613353392e-06, "loss": 0.8681586384773254, "step": 420 }, { "epoch": 0.5463430460084758, "grad_norm": 0.7419816255569458, "learning_rate": 9.53497419988782e-06, "loss": 0.8770594596862793, "step": 421 }, { "epoch": 0.5476407729586147, "grad_norm": 0.7256708741188049, "learning_rate": 9.531958513547832e-06, "loss": 0.8533478379249573, "step": 422 }, { "epoch": 0.5489384999087535, "grad_norm": 0.8208673596382141, "learning_rate": 9.52893356049974e-06, "loss": 0.9058896899223328, "step": 423 }, { "epoch": 0.5502362268588925, "grad_norm": 0.7950146794319153, "learning_rate": 9.525899346928809e-06, "loss": 0.8501814603805542, "step": 424 }, { "epoch": 0.5515339538090314, "grad_norm": 0.7618380188941956, "learning_rate": 9.52285587903924e-06, "loss": 0.8879157900810242, "step": 425 }, { "epoch": 0.5528316807591702, "grad_norm": 0.6897137761116028, "learning_rate": 9.519803163054149e-06, "loss": 0.82085782289505, "step": 426 }, { "epoch": 0.5541294077093092, "grad_norm": 0.7675971984863281, "learning_rate": 9.51674120521557e-06, "loss": 0.8406592011451721, "step": 427 }, { "epoch": 0.555427134659448, "grad_norm": 0.7005506157875061, "learning_rate": 9.513670011784435e-06, "loss": 0.8337131142616272, "step": 428 }, { "epoch": 0.556724861609587, "grad_norm": 0.7315273880958557, "learning_rate": 9.510589589040554e-06, "loss": 0.7294406890869141, "step": 429 }, { "epoch": 0.5580225885597259, "grad_norm": 0.74063640832901, "learning_rate": 9.507499943282613e-06, "loss": 0.7584653496742249, "step": 430 }, { "epoch": 0.5593203155098647, "grad_norm": 0.7152555584907532, "learning_rate": 9.504401080828154e-06, "loss": 0.811851441860199, "step": 431 }, { "epoch": 0.5606180424600037, "grad_norm": 0.7302674055099487, "learning_rate": 9.501293008013568e-06, "loss": 0.8225210905075073, "step": 432 }, { "epoch": 0.5619157694101425, "grad_norm": 0.7426562905311584, "learning_rate": 9.498175731194077e-06, "loss": 0.8669119477272034, "step": 433 }, { "epoch": 0.5632134963602814, "grad_norm": 0.7812010049819946, "learning_rate": 9.495049256743723e-06, "loss": 0.9062970280647278, "step": 434 }, { "epoch": 0.5645112233104204, "grad_norm": 0.7181828022003174, "learning_rate": 9.491913591055356e-06, "loss": 0.7689467072486877, "step": 435 }, { "epoch": 0.5658089502605592, "grad_norm": 0.7515583038330078, "learning_rate": 9.488768740540615e-06, "loss": 0.8905704021453857, "step": 436 }, { "epoch": 0.5671066772106982, "grad_norm": 0.6940712332725525, "learning_rate": 9.485614711629927e-06, "loss": 0.7591986656188965, "step": 437 }, { "epoch": 0.568404404160837, "grad_norm": 0.7081801891326904, "learning_rate": 9.482451510772482e-06, "loss": 0.8202489018440247, "step": 438 }, { "epoch": 0.5697021311109759, "grad_norm": 0.7051956057548523, "learning_rate": 9.479279144436224e-06, "loss": 0.8634527921676636, "step": 439 }, { "epoch": 0.5709998580611149, "grad_norm": 0.7323376536369324, "learning_rate": 9.47609761910784e-06, "loss": 0.802325427532196, "step": 440 }, { "epoch": 0.5722975850112537, "grad_norm": 0.7712946534156799, "learning_rate": 9.472906941292746e-06, "loss": 0.8281590342521667, "step": 441 }, { "epoch": 0.5735953119613926, "grad_norm": 0.7743613719940186, "learning_rate": 9.469707117515068e-06, "loss": 0.8420519828796387, "step": 442 }, { "epoch": 0.5748930389115315, "grad_norm": 0.7498214840888977, "learning_rate": 9.466498154317635e-06, "loss": 0.7940037250518799, "step": 443 }, { "epoch": 0.5761907658616704, "grad_norm": 0.7323300838470459, "learning_rate": 9.463280058261965e-06, "loss": 0.8547796010971069, "step": 444 }, { "epoch": 0.5774884928118094, "grad_norm": 0.7174122929573059, "learning_rate": 9.460052835928254e-06, "loss": 0.8065235614776611, "step": 445 }, { "epoch": 0.5787862197619482, "grad_norm": 0.7354016304016113, "learning_rate": 9.45681649391535e-06, "loss": 0.7716302275657654, "step": 446 }, { "epoch": 0.5800839467120871, "grad_norm": 0.6930510997772217, "learning_rate": 9.453571038840755e-06, "loss": 0.7406535744667053, "step": 447 }, { "epoch": 0.581381673662226, "grad_norm": 0.7596374154090881, "learning_rate": 9.450316477340602e-06, "loss": 0.8508278727531433, "step": 448 }, { "epoch": 0.5826794006123649, "grad_norm": 0.7328705787658691, "learning_rate": 9.447052816069648e-06, "loss": 0.8054483532905579, "step": 449 }, { "epoch": 0.5839771275625038, "grad_norm": 0.7718313932418823, "learning_rate": 9.443780061701252e-06, "loss": 0.8918864727020264, "step": 450 }, { "epoch": 0.5852748545126427, "grad_norm": 0.7557240724563599, "learning_rate": 9.44049822092737e-06, "loss": 0.8130478858947754, "step": 451 }, { "epoch": 0.5865725814627816, "grad_norm": 0.7418437600135803, "learning_rate": 9.437207300458535e-06, "loss": 0.8093717098236084, "step": 452 }, { "epoch": 0.5878703084129205, "grad_norm": 0.7124131321907043, "learning_rate": 9.433907307023845e-06, "loss": 0.8172003626823425, "step": 453 }, { "epoch": 0.5891680353630594, "grad_norm": 0.7546778321266174, "learning_rate": 9.430598247370955e-06, "loss": 0.7902172207832336, "step": 454 }, { "epoch": 0.5904657623131983, "grad_norm": 0.7268347144126892, "learning_rate": 9.427280128266049e-06, "loss": 0.7638229727745056, "step": 455 }, { "epoch": 0.5917634892633372, "grad_norm": 0.7774848937988281, "learning_rate": 9.423952956493846e-06, "loss": 0.7811177372932434, "step": 456 }, { "epoch": 0.5930612162134761, "grad_norm": 0.7768798470497131, "learning_rate": 9.420616738857568e-06, "loss": 0.79379802942276, "step": 457 }, { "epoch": 0.5943589431636149, "grad_norm": 0.7454319596290588, "learning_rate": 9.417271482178938e-06, "loss": 0.8783116340637207, "step": 458 }, { "epoch": 0.5956566701137539, "grad_norm": 0.6949381232261658, "learning_rate": 9.413917193298153e-06, "loss": 0.8404789566993713, "step": 459 }, { "epoch": 0.5969543970638927, "grad_norm": 0.704766035079956, "learning_rate": 9.41055387907389e-06, "loss": 0.7789646983146667, "step": 460 }, { "epoch": 0.5969543970638927, "eval_loss": 0.8027161359786987, "eval_runtime": 139.819, "eval_samples_per_second": 37.134, "eval_steps_per_second": 9.283, "step": 460 }, { "epoch": 0.5982521240140317, "grad_norm": 0.7682297229766846, "learning_rate": 9.407181546383275e-06, "loss": 0.8239257335662842, "step": 461 }, { "epoch": 0.5995498509641706, "grad_norm": 0.7389854192733765, "learning_rate": 9.403800202121873e-06, "loss": 0.7810261845588684, "step": 462 }, { "epoch": 0.6008475779143094, "grad_norm": 0.6934480667114258, "learning_rate": 9.400409853203677e-06, "loss": 0.8098300099372864, "step": 463 }, { "epoch": 0.6021453048644484, "grad_norm": 0.7336373925209045, "learning_rate": 9.397010506561096e-06, "loss": 0.7502899765968323, "step": 464 }, { "epoch": 0.6034430318145872, "grad_norm": 0.7731090784072876, "learning_rate": 9.393602169144929e-06, "loss": 0.8462476134300232, "step": 465 }, { "epoch": 0.6047407587647261, "grad_norm": 0.7425335645675659, "learning_rate": 9.390184847924366e-06, "loss": 0.813231348991394, "step": 466 }, { "epoch": 0.6060384857148651, "grad_norm": 0.7691134214401245, "learning_rate": 9.386758549886964e-06, "loss": 0.872662365436554, "step": 467 }, { "epoch": 0.6073362126650039, "grad_norm": 0.7183594107627869, "learning_rate": 9.383323282038632e-06, "loss": 0.8333174586296082, "step": 468 }, { "epoch": 0.6086339396151429, "grad_norm": 0.7414330244064331, "learning_rate": 9.379879051403627e-06, "loss": 0.7815148234367371, "step": 469 }, { "epoch": 0.6099316665652817, "grad_norm": 0.753852903842926, "learning_rate": 9.376425865024527e-06, "loss": 0.8778871297836304, "step": 470 }, { "epoch": 0.6112293935154206, "grad_norm": 0.7895762920379639, "learning_rate": 9.372963729962227e-06, "loss": 0.930837869644165, "step": 471 }, { "epoch": 0.6125271204655596, "grad_norm": 0.75229412317276, "learning_rate": 9.369492653295913e-06, "loss": 0.7898579239845276, "step": 472 }, { "epoch": 0.6138248474156984, "grad_norm": 0.6872172951698303, "learning_rate": 9.366012642123061e-06, "loss": 0.8121792078018188, "step": 473 }, { "epoch": 0.6151225743658374, "grad_norm": 0.719164252281189, "learning_rate": 9.362523703559412e-06, "loss": 0.812069833278656, "step": 474 }, { "epoch": 0.6164203013159762, "grad_norm": 0.7363712787628174, "learning_rate": 9.359025844738962e-06, "loss": 0.8511337041854858, "step": 475 }, { "epoch": 0.6177180282661151, "grad_norm": 0.7480281591415405, "learning_rate": 9.355519072813946e-06, "loss": 0.8967547416687012, "step": 476 }, { "epoch": 0.6190157552162541, "grad_norm": 0.7672280073165894, "learning_rate": 9.352003394954827e-06, "loss": 0.8544362783432007, "step": 477 }, { "epoch": 0.6203134821663929, "grad_norm": 0.7103601694107056, "learning_rate": 9.348478818350277e-06, "loss": 0.8329444527626038, "step": 478 }, { "epoch": 0.6216112091165318, "grad_norm": 0.7819087505340576, "learning_rate": 9.34494535020716e-06, "loss": 0.868659496307373, "step": 479 }, { "epoch": 0.6229089360666707, "grad_norm": 0.8021608591079712, "learning_rate": 9.341402997750526e-06, "loss": 0.8001782298088074, "step": 480 }, { "epoch": 0.6242066630168096, "grad_norm": 0.7665423154830933, "learning_rate": 9.337851768223589e-06, "loss": 0.8383674025535583, "step": 481 }, { "epoch": 0.6255043899669486, "grad_norm": 0.7381864786148071, "learning_rate": 9.334291668887716e-06, "loss": 0.8331637382507324, "step": 482 }, { "epoch": 0.6268021169170874, "grad_norm": 0.6999797821044922, "learning_rate": 9.330722707022406e-06, "loss": 0.8383775353431702, "step": 483 }, { "epoch": 0.6280998438672263, "grad_norm": 0.7365379333496094, "learning_rate": 9.327144889925286e-06, "loss": 0.8450272679328918, "step": 484 }, { "epoch": 0.6293975708173652, "grad_norm": 0.7899283766746521, "learning_rate": 9.323558224912083e-06, "loss": 0.8851337432861328, "step": 485 }, { "epoch": 0.6306952977675041, "grad_norm": 0.7195940017700195, "learning_rate": 9.319962719316621e-06, "loss": 0.8370380401611328, "step": 486 }, { "epoch": 0.631993024717643, "grad_norm": 0.7461369037628174, "learning_rate": 9.3163583804908e-06, "loss": 0.7896135449409485, "step": 487 }, { "epoch": 0.6332907516677819, "grad_norm": 0.7383708357810974, "learning_rate": 9.312745215804577e-06, "loss": 0.9348940849304199, "step": 488 }, { "epoch": 0.6345884786179208, "grad_norm": 0.758083701133728, "learning_rate": 9.309123232645963e-06, "loss": 0.8204320669174194, "step": 489 }, { "epoch": 0.6358862055680597, "grad_norm": 0.7442320585250854, "learning_rate": 9.305492438420995e-06, "loss": 0.8042322397232056, "step": 490 }, { "epoch": 0.6371839325181986, "grad_norm": 0.7107361555099487, "learning_rate": 9.301852840553728e-06, "loss": 0.7795714735984802, "step": 491 }, { "epoch": 0.6384816594683375, "grad_norm": 0.760767936706543, "learning_rate": 9.298204446486221e-06, "loss": 0.8364303112030029, "step": 492 }, { "epoch": 0.6397793864184764, "grad_norm": 0.7145869135856628, "learning_rate": 9.294547263678515e-06, "loss": 0.7660549283027649, "step": 493 }, { "epoch": 0.6410771133686153, "grad_norm": 0.788299560546875, "learning_rate": 9.29088129960862e-06, "loss": 0.8712038993835449, "step": 494 }, { "epoch": 0.6423748403187541, "grad_norm": 0.7306452989578247, "learning_rate": 9.28720656177251e-06, "loss": 0.8043104410171509, "step": 495 }, { "epoch": 0.6436725672688931, "grad_norm": 0.7626951336860657, "learning_rate": 9.28352305768409e-06, "loss": 0.8437666296958923, "step": 496 }, { "epoch": 0.644970294219032, "grad_norm": 0.7394039630889893, "learning_rate": 9.279830794875194e-06, "loss": 0.8267931342124939, "step": 497 }, { "epoch": 0.6462680211691709, "grad_norm": 0.7661542296409607, "learning_rate": 9.276129780895566e-06, "loss": 0.8075953722000122, "step": 498 }, { "epoch": 0.6475657481193098, "grad_norm": 0.7846643328666687, "learning_rate": 9.272420023312843e-06, "loss": 0.9408265352249146, "step": 499 }, { "epoch": 0.6488634750694486, "grad_norm": 0.7860012054443359, "learning_rate": 9.268701529712541e-06, "loss": 0.8661449551582336, "step": 500 }, { "epoch": 0.6501612020195876, "grad_norm": 0.7225436568260193, "learning_rate": 9.264974307698034e-06, "loss": 0.7973988652229309, "step": 501 }, { "epoch": 0.6514589289697265, "grad_norm": 0.7316765189170837, "learning_rate": 9.261238364890553e-06, "loss": 0.8226317167282104, "step": 502 }, { "epoch": 0.6527566559198653, "grad_norm": 0.7312497496604919, "learning_rate": 9.257493708929153e-06, "loss": 0.8850467801094055, "step": 503 }, { "epoch": 0.6540543828700043, "grad_norm": 0.7464125752449036, "learning_rate": 9.253740347470708e-06, "loss": 0.8953540921211243, "step": 504 }, { "epoch": 0.6553521098201431, "grad_norm": 0.7750662565231323, "learning_rate": 9.24997828818989e-06, "loss": 0.860468327999115, "step": 505 }, { "epoch": 0.6566498367702821, "grad_norm": 0.72883540391922, "learning_rate": 9.246207538779162e-06, "loss": 0.8943373560905457, "step": 506 }, { "epoch": 0.657947563720421, "grad_norm": 0.7219229340553284, "learning_rate": 9.242428106948748e-06, "loss": 0.8636921048164368, "step": 507 }, { "epoch": 0.6592452906705598, "grad_norm": 0.7252084612846375, "learning_rate": 9.238640000426635e-06, "loss": 0.8572896718978882, "step": 508 }, { "epoch": 0.6605430176206988, "grad_norm": 0.6815743446350098, "learning_rate": 9.234843226958537e-06, "loss": 0.7989461421966553, "step": 509 }, { "epoch": 0.6618407445708376, "grad_norm": 0.7128654718399048, "learning_rate": 9.231037794307896e-06, "loss": 0.8661865592002869, "step": 510 }, { "epoch": 0.6631384715209765, "grad_norm": 0.6929757595062256, "learning_rate": 9.22722371025586e-06, "loss": 0.8198660612106323, "step": 511 }, { "epoch": 0.6644361984711155, "grad_norm": 0.7036609053611755, "learning_rate": 9.223400982601262e-06, "loss": 0.7628751993179321, "step": 512 }, { "epoch": 0.6657339254212543, "grad_norm": 0.7455263733863831, "learning_rate": 9.219569619160618e-06, "loss": 0.8409616351127625, "step": 513 }, { "epoch": 0.6670316523713933, "grad_norm": 0.7605084776878357, "learning_rate": 9.215729627768093e-06, "loss": 0.8993404507637024, "step": 514 }, { "epoch": 0.6683293793215321, "grad_norm": 0.7330998778343201, "learning_rate": 9.2118810162755e-06, "loss": 0.7985840439796448, "step": 515 }, { "epoch": 0.669627106271671, "grad_norm": 0.710595965385437, "learning_rate": 9.20802379255227e-06, "loss": 0.8102532625198364, "step": 516 }, { "epoch": 0.67092483322181, "grad_norm": 0.7394533753395081, "learning_rate": 9.204157964485454e-06, "loss": 0.8669203519821167, "step": 517 }, { "epoch": 0.6722225601719488, "grad_norm": 0.7361659407615662, "learning_rate": 9.200283539979691e-06, "loss": 0.8511008024215698, "step": 518 }, { "epoch": 0.6735202871220877, "grad_norm": 0.7361857891082764, "learning_rate": 9.196400526957198e-06, "loss": 0.8685998916625977, "step": 519 }, { "epoch": 0.6748180140722266, "grad_norm": 0.7673179507255554, "learning_rate": 9.192508933357753e-06, "loss": 0.8493822813034058, "step": 520 }, { "epoch": 0.6761157410223655, "grad_norm": 0.7651588320732117, "learning_rate": 9.188608767138683e-06, "loss": 0.895356297492981, "step": 521 }, { "epoch": 0.6774134679725045, "grad_norm": 0.7583249807357788, "learning_rate": 9.184700036274837e-06, "loss": 0.8651999831199646, "step": 522 }, { "epoch": 0.6787111949226433, "grad_norm": 0.7233076095581055, "learning_rate": 9.180782748758583e-06, "loss": 0.8715196251869202, "step": 523 }, { "epoch": 0.6800089218727822, "grad_norm": 0.7233548760414124, "learning_rate": 9.17685691259978e-06, "loss": 0.8916983008384705, "step": 524 }, { "epoch": 0.6813066488229211, "grad_norm": 0.7047330141067505, "learning_rate": 9.172922535825772e-06, "loss": 0.7544976472854614, "step": 525 }, { "epoch": 0.68260437577306, "grad_norm": 0.7306682467460632, "learning_rate": 9.168979626481364e-06, "loss": 0.8060271143913269, "step": 526 }, { "epoch": 0.6839021027231988, "grad_norm": 0.6905940175056458, "learning_rate": 9.165028192628803e-06, "loss": 0.8090964555740356, "step": 527 }, { "epoch": 0.6851998296733378, "grad_norm": 0.7123598456382751, "learning_rate": 9.161068242347777e-06, "loss": 0.7829641103744507, "step": 528 }, { "epoch": 0.6864975566234767, "grad_norm": 0.7222651243209839, "learning_rate": 9.157099783735378e-06, "loss": 0.7681039571762085, "step": 529 }, { "epoch": 0.6877952835736156, "grad_norm": 0.7189817428588867, "learning_rate": 9.1531228249061e-06, "loss": 0.8204732537269592, "step": 530 }, { "epoch": 0.6890930105237545, "grad_norm": 0.7915937900543213, "learning_rate": 9.149137373991819e-06, "loss": 0.9181404709815979, "step": 531 }, { "epoch": 0.6903907374738933, "grad_norm": 0.7157093286514282, "learning_rate": 9.145143439141771e-06, "loss": 0.8051410913467407, "step": 532 }, { "epoch": 0.6916884644240323, "grad_norm": 0.7645899057388306, "learning_rate": 9.141141028522544e-06, "loss": 0.8610852956771851, "step": 533 }, { "epoch": 0.6929861913741712, "grad_norm": 0.7532792687416077, "learning_rate": 9.137130150318055e-06, "loss": 0.777756929397583, "step": 534 }, { "epoch": 0.69428391832431, "grad_norm": 0.7266109585762024, "learning_rate": 9.133110812729532e-06, "loss": 0.8287126421928406, "step": 535 }, { "epoch": 0.695581645274449, "grad_norm": 0.7463825345039368, "learning_rate": 9.129083023975505e-06, "loss": 0.8141655921936035, "step": 536 }, { "epoch": 0.6968793722245878, "grad_norm": 0.7447496652603149, "learning_rate": 9.125046792291784e-06, "loss": 0.8310843110084534, "step": 537 }, { "epoch": 0.6981770991747268, "grad_norm": 0.72504723072052, "learning_rate": 9.121002125931436e-06, "loss": 0.7873820066452026, "step": 538 }, { "epoch": 0.6994748261248657, "grad_norm": 0.7359863519668579, "learning_rate": 9.116949033164785e-06, "loss": 0.8555896282196045, "step": 539 }, { "epoch": 0.7007725530750045, "grad_norm": 0.7197631001472473, "learning_rate": 9.112887522279378e-06, "loss": 0.8453904986381531, "step": 540 }, { "epoch": 0.7020702800251435, "grad_norm": 0.7554418444633484, "learning_rate": 9.108817601579978e-06, "loss": 0.8600095510482788, "step": 541 }, { "epoch": 0.7033680069752823, "grad_norm": 0.7778856754302979, "learning_rate": 9.104739279388542e-06, "loss": 0.8939447402954102, "step": 542 }, { "epoch": 0.7046657339254212, "grad_norm": 0.7399225831031799, "learning_rate": 9.100652564044206e-06, "loss": 0.831099808216095, "step": 543 }, { "epoch": 0.7059634608755602, "grad_norm": 0.7200485467910767, "learning_rate": 9.09655746390327e-06, "loss": 0.8666620254516602, "step": 544 }, { "epoch": 0.707261187825699, "grad_norm": 0.7057142853736877, "learning_rate": 9.092453987339174e-06, "loss": 0.8054381012916565, "step": 545 }, { "epoch": 0.708558914775838, "grad_norm": 0.7025026082992554, "learning_rate": 9.088342142742493e-06, "loss": 0.8074846863746643, "step": 546 }, { "epoch": 0.7098566417259768, "grad_norm": 0.7438026666641235, "learning_rate": 9.084221938520906e-06, "loss": 0.7710708379745483, "step": 547 }, { "epoch": 0.7111543686761157, "grad_norm": 0.7022576332092285, "learning_rate": 9.080093383099187e-06, "loss": 0.8335398435592651, "step": 548 }, { "epoch": 0.7124520956262547, "grad_norm": 0.7237393856048584, "learning_rate": 9.07595648491919e-06, "loss": 0.8371632695198059, "step": 549 }, { "epoch": 0.7137498225763935, "grad_norm": 0.7316430807113647, "learning_rate": 9.071811252439823e-06, "loss": 0.7962602972984314, "step": 550 }, { "epoch": 0.7150475495265324, "grad_norm": 0.679232656955719, "learning_rate": 9.067657694137038e-06, "loss": 0.8270066380500793, "step": 551 }, { "epoch": 0.7163452764766713, "grad_norm": 0.7692121267318726, "learning_rate": 9.063495818503809e-06, "loss": 0.8980318307876587, "step": 552 }, { "epoch": 0.7176430034268102, "grad_norm": 0.7516088485717773, "learning_rate": 9.059325634050118e-06, "loss": 0.8471689820289612, "step": 553 }, { "epoch": 0.7189407303769492, "grad_norm": 0.7334446907043457, "learning_rate": 9.05514714930294e-06, "loss": 0.8356298208236694, "step": 554 }, { "epoch": 0.720238457327088, "grad_norm": 0.6861634254455566, "learning_rate": 9.050960372806214e-06, "loss": 0.8037006258964539, "step": 555 }, { "epoch": 0.7215361842772269, "grad_norm": 0.7529132962226868, "learning_rate": 9.046765313120842e-06, "loss": 0.8884504437446594, "step": 556 }, { "epoch": 0.7228339112273658, "grad_norm": 0.7479069232940674, "learning_rate": 9.042561978824657e-06, "loss": 0.8784589767456055, "step": 557 }, { "epoch": 0.7241316381775047, "grad_norm": 0.7415169477462769, "learning_rate": 9.038350378512417e-06, "loss": 0.8879582285881042, "step": 558 }, { "epoch": 0.7254293651276437, "grad_norm": 0.7096434235572815, "learning_rate": 9.034130520795774e-06, "loss": 0.818169116973877, "step": 559 }, { "epoch": 0.7267270920777825, "grad_norm": 0.7252947688102722, "learning_rate": 9.029902414303273e-06, "loss": 0.8619629144668579, "step": 560 }, { "epoch": 0.7280248190279214, "grad_norm": 0.7012298703193665, "learning_rate": 9.025666067680319e-06, "loss": 0.7604071497917175, "step": 561 }, { "epoch": 0.7293225459780603, "grad_norm": 0.7263858914375305, "learning_rate": 9.021421489589169e-06, "loss": 0.8163655400276184, "step": 562 }, { "epoch": 0.7306202729281992, "grad_norm": 0.7177236676216125, "learning_rate": 9.017168688708913e-06, "loss": 0.7635133862495422, "step": 563 }, { "epoch": 0.731917999878338, "grad_norm": 0.7455753684043884, "learning_rate": 9.01290767373545e-06, "loss": 0.8407589197158813, "step": 564 }, { "epoch": 0.733215726828477, "grad_norm": 0.737720251083374, "learning_rate": 9.008638453381477e-06, "loss": 0.7209524512290955, "step": 565 }, { "epoch": 0.7345134537786159, "grad_norm": 0.7552280426025391, "learning_rate": 9.004361036376472e-06, "loss": 0.9184548854827881, "step": 566 }, { "epoch": 0.7358111807287548, "grad_norm": 0.7261860370635986, "learning_rate": 9.000075431466668e-06, "loss": 0.8386935591697693, "step": 567 }, { "epoch": 0.7371089076788937, "grad_norm": 0.7238007187843323, "learning_rate": 8.995781647415041e-06, "loss": 0.7940593957901001, "step": 568 }, { "epoch": 0.7384066346290326, "grad_norm": 0.7209489941596985, "learning_rate": 8.991479693001296e-06, "loss": 0.8524088859558105, "step": 569 }, { "epoch": 0.7397043615791715, "grad_norm": 0.7325084209442139, "learning_rate": 8.987169577021838e-06, "loss": 0.8363596200942993, "step": 570 }, { "epoch": 0.7410020885293104, "grad_norm": 0.7080730199813843, "learning_rate": 8.982851308289765e-06, "loss": 0.9011337757110596, "step": 571 }, { "epoch": 0.7422998154794492, "grad_norm": 0.7668154239654541, "learning_rate": 8.978524895634842e-06, "loss": 0.8450537323951721, "step": 572 }, { "epoch": 0.7435975424295882, "grad_norm": 0.7361171841621399, "learning_rate": 8.974190347903491e-06, "loss": 0.8203600645065308, "step": 573 }, { "epoch": 0.744895269379727, "grad_norm": 0.672437310218811, "learning_rate": 8.96984767395876e-06, "loss": 0.7632564306259155, "step": 574 }, { "epoch": 0.746192996329866, "grad_norm": 0.7395947575569153, "learning_rate": 8.965496882680322e-06, "loss": 0.8846946954727173, "step": 575 }, { "epoch": 0.7474907232800049, "grad_norm": 0.7199435234069824, "learning_rate": 8.961137982964445e-06, "loss": 0.7983006238937378, "step": 576 }, { "epoch": 0.7487884502301437, "grad_norm": 0.7480239272117615, "learning_rate": 8.95677098372397e-06, "loss": 0.8426603078842163, "step": 577 }, { "epoch": 0.7500861771802827, "grad_norm": 0.747464120388031, "learning_rate": 8.95239589388831e-06, "loss": 0.8379330635070801, "step": 578 }, { "epoch": 0.7513839041304216, "grad_norm": 0.7300353050231934, "learning_rate": 8.948012722403417e-06, "loss": 0.8795480132102966, "step": 579 }, { "epoch": 0.7526816310805604, "grad_norm": 0.790527880191803, "learning_rate": 8.943621478231764e-06, "loss": 0.8540128469467163, "step": 580 }, { "epoch": 0.7539793580306994, "grad_norm": 0.7547438144683838, "learning_rate": 8.939222170352333e-06, "loss": 0.8279592394828796, "step": 581 }, { "epoch": 0.7552770849808382, "grad_norm": 0.6995728015899658, "learning_rate": 8.9348148077606e-06, "loss": 0.7791587710380554, "step": 582 }, { "epoch": 0.7565748119309772, "grad_norm": 0.7065349817276001, "learning_rate": 8.9303993994685e-06, "loss": 0.8062193393707275, "step": 583 }, { "epoch": 0.757872538881116, "grad_norm": 0.7068513035774231, "learning_rate": 8.925975954504432e-06, "loss": 0.7748818397521973, "step": 584 }, { "epoch": 0.7591702658312549, "grad_norm": 0.7462431788444519, "learning_rate": 8.921544481913218e-06, "loss": 0.7763248085975647, "step": 585 }, { "epoch": 0.7604679927813939, "grad_norm": 0.7679768204689026, "learning_rate": 8.917104990756096e-06, "loss": 0.873698890209198, "step": 586 }, { "epoch": 0.7617657197315327, "grad_norm": 0.7324793338775635, "learning_rate": 8.912657490110705e-06, "loss": 0.8150361776351929, "step": 587 }, { "epoch": 0.7630634466816716, "grad_norm": 0.7165784239768982, "learning_rate": 8.908201989071055e-06, "loss": 0.8647459745407104, "step": 588 }, { "epoch": 0.7643611736318106, "grad_norm": 0.7230257391929626, "learning_rate": 8.903738496747523e-06, "loss": 0.8864815831184387, "step": 589 }, { "epoch": 0.7656589005819494, "grad_norm": 0.6890432238578796, "learning_rate": 8.899267022266815e-06, "loss": 0.8340771794319153, "step": 590 }, { "epoch": 0.7669566275320884, "grad_norm": 0.7173485159873962, "learning_rate": 8.894787574771968e-06, "loss": 0.8034731149673462, "step": 591 }, { "epoch": 0.7682543544822272, "grad_norm": 0.7507414817810059, "learning_rate": 8.890300163422319e-06, "loss": 0.8363530039787292, "step": 592 }, { "epoch": 0.7695520814323661, "grad_norm": 0.7558765411376953, "learning_rate": 8.885804797393484e-06, "loss": 0.8346325755119324, "step": 593 }, { "epoch": 0.770849808382505, "grad_norm": 0.767343282699585, "learning_rate": 8.881301485877355e-06, "loss": 0.8766120672225952, "step": 594 }, { "epoch": 0.7721475353326439, "grad_norm": 0.7072799801826477, "learning_rate": 8.87679023808206e-06, "loss": 0.7619128227233887, "step": 595 }, { "epoch": 0.7734452622827828, "grad_norm": 0.7522525191307068, "learning_rate": 8.87227106323196e-06, "loss": 0.8515980243682861, "step": 596 }, { "epoch": 0.7747429892329217, "grad_norm": 0.8023040890693665, "learning_rate": 8.867743970567625e-06, "loss": 0.7740094065666199, "step": 597 }, { "epoch": 0.7760407161830606, "grad_norm": 0.7332605719566345, "learning_rate": 8.86320896934581e-06, "loss": 0.892971396446228, "step": 598 }, { "epoch": 0.7773384431331996, "grad_norm": 0.725096583366394, "learning_rate": 8.858666068839447e-06, "loss": 0.7671830058097839, "step": 599 }, { "epoch": 0.7786361700833384, "grad_norm": 0.722201406955719, "learning_rate": 8.85411527833762e-06, "loss": 0.8197992444038391, "step": 600 }, { "epoch": 0.7799338970334773, "grad_norm": 0.682908833026886, "learning_rate": 8.849556607145541e-06, "loss": 0.7293855547904968, "step": 601 }, { "epoch": 0.7812316239836162, "grad_norm": 0.724932849407196, "learning_rate": 8.84499006458454e-06, "loss": 0.8873072266578674, "step": 602 }, { "epoch": 0.7825293509337551, "grad_norm": 0.6848104596138, "learning_rate": 8.840415659992038e-06, "loss": 0.8097598552703857, "step": 603 }, { "epoch": 0.7838270778838939, "grad_norm": 0.714169979095459, "learning_rate": 8.835833402721538e-06, "loss": 0.8175859451293945, "step": 604 }, { "epoch": 0.7851248048340329, "grad_norm": 0.7740371227264404, "learning_rate": 8.831243302142595e-06, "loss": 0.8492919206619263, "step": 605 }, { "epoch": 0.7864225317841718, "grad_norm": 0.7099517583847046, "learning_rate": 8.826645367640803e-06, "loss": 0.8585895895957947, "step": 606 }, { "epoch": 0.7877202587343107, "grad_norm": 0.7142400741577148, "learning_rate": 8.822039608617773e-06, "loss": 0.8533006906509399, "step": 607 }, { "epoch": 0.7890179856844496, "grad_norm": 0.7054215669631958, "learning_rate": 8.81742603449112e-06, "loss": 0.7479570508003235, "step": 608 }, { "epoch": 0.7903157126345884, "grad_norm": 0.7265973091125488, "learning_rate": 8.81280465469443e-06, "loss": 0.8953506946563721, "step": 609 }, { "epoch": 0.7916134395847274, "grad_norm": 0.7428868412971497, "learning_rate": 8.808175478677261e-06, "loss": 0.8039628267288208, "step": 610 }, { "epoch": 0.7929111665348663, "grad_norm": 0.7243048548698425, "learning_rate": 8.803538515905102e-06, "loss": 0.7982103228569031, "step": 611 }, { "epoch": 0.7942088934850051, "grad_norm": 0.7457985281944275, "learning_rate": 8.79889377585937e-06, "loss": 0.8665332198143005, "step": 612 }, { "epoch": 0.7955066204351441, "grad_norm": 0.776006817817688, "learning_rate": 8.79424126803738e-06, "loss": 0.8244978189468384, "step": 613 }, { "epoch": 0.7968043473852829, "grad_norm": 0.7452744245529175, "learning_rate": 8.789581001952339e-06, "loss": 0.8417704105377197, "step": 614 }, { "epoch": 0.7981020743354219, "grad_norm": 0.7379627823829651, "learning_rate": 8.784912987133305e-06, "loss": 0.8458194732666016, "step": 615 }, { "epoch": 0.7993998012855608, "grad_norm": 0.718325674533844, "learning_rate": 8.78023723312519e-06, "loss": 0.8211947679519653, "step": 616 }, { "epoch": 0.8006975282356996, "grad_norm": 0.6864851713180542, "learning_rate": 8.775553749488729e-06, "loss": 0.7400568127632141, "step": 617 }, { "epoch": 0.8019952551858386, "grad_norm": 0.7230079174041748, "learning_rate": 8.770862545800459e-06, "loss": 0.7876740097999573, "step": 618 }, { "epoch": 0.8032929821359774, "grad_norm": 0.7299041152000427, "learning_rate": 8.766163631652702e-06, "loss": 0.818249523639679, "step": 619 }, { "epoch": 0.8045907090861163, "grad_norm": 0.7503798604011536, "learning_rate": 8.76145701665355e-06, "loss": 0.8890657424926758, "step": 620 }, { "epoch": 0.8058884360362553, "grad_norm": 0.7524362802505493, "learning_rate": 8.756742710426842e-06, "loss": 0.7989673018455505, "step": 621 }, { "epoch": 0.8071861629863941, "grad_norm": 0.7557197213172913, "learning_rate": 8.752020722612135e-06, "loss": 0.819223940372467, "step": 622 }, { "epoch": 0.8084838899365331, "grad_norm": 0.7750368714332581, "learning_rate": 8.747291062864704e-06, "loss": 0.8548346757888794, "step": 623 }, { "epoch": 0.8097816168866719, "grad_norm": 0.7181532382965088, "learning_rate": 8.742553740855507e-06, "loss": 0.7740316390991211, "step": 624 }, { "epoch": 0.8110793438368108, "grad_norm": 0.744864821434021, "learning_rate": 8.737808766271163e-06, "loss": 0.795849621295929, "step": 625 }, { "epoch": 0.8123770707869498, "grad_norm": 0.7091443538665771, "learning_rate": 8.733056148813947e-06, "loss": 0.8226791024208069, "step": 626 }, { "epoch": 0.8136747977370886, "grad_norm": 0.7437657117843628, "learning_rate": 8.728295898201762e-06, "loss": 0.8405981063842773, "step": 627 }, { "epoch": 0.8149725246872275, "grad_norm": 0.7046335339546204, "learning_rate": 8.72352802416811e-06, "loss": 0.7682258486747742, "step": 628 }, { "epoch": 0.8162702516373664, "grad_norm": 0.7432284951210022, "learning_rate": 8.718752536462089e-06, "loss": 0.8703007102012634, "step": 629 }, { "epoch": 0.8175679785875053, "grad_norm": 0.7931978106498718, "learning_rate": 8.713969444848365e-06, "loss": 0.9198675155639648, "step": 630 }, { "epoch": 0.8188657055376443, "grad_norm": 0.7177122235298157, "learning_rate": 8.709178759107146e-06, "loss": 0.8506098389625549, "step": 631 }, { "epoch": 0.8201634324877831, "grad_norm": 0.7149267792701721, "learning_rate": 8.704380489034172e-06, "loss": 0.7816558480262756, "step": 632 }, { "epoch": 0.821461159437922, "grad_norm": 0.8047035932540894, "learning_rate": 8.699574644440696e-06, "loss": 0.8571796417236328, "step": 633 }, { "epoch": 0.8227588863880609, "grad_norm": 0.712013840675354, "learning_rate": 8.694761235153446e-06, "loss": 0.8653860092163086, "step": 634 }, { "epoch": 0.8240566133381998, "grad_norm": 0.6966065764427185, "learning_rate": 8.689940271014631e-06, "loss": 0.8233016133308411, "step": 635 }, { "epoch": 0.8253543402883388, "grad_norm": 0.731100857257843, "learning_rate": 8.685111761881902e-06, "loss": 0.8638073205947876, "step": 636 }, { "epoch": 0.8266520672384776, "grad_norm": 0.7355837821960449, "learning_rate": 8.680275717628336e-06, "loss": 0.8465107083320618, "step": 637 }, { "epoch": 0.8279497941886165, "grad_norm": 0.7684483528137207, "learning_rate": 8.675432148142423e-06, "loss": 0.8504188060760498, "step": 638 }, { "epoch": 0.8292475211387554, "grad_norm": 0.7174119353294373, "learning_rate": 8.670581063328031e-06, "loss": 0.7903969287872314, "step": 639 }, { "epoch": 0.8305452480888943, "grad_norm": 0.6824118494987488, "learning_rate": 8.665722473104407e-06, "loss": 0.79200679063797, "step": 640 }, { "epoch": 0.8318429750390332, "grad_norm": 0.7122912406921387, "learning_rate": 8.660856387406134e-06, "loss": 0.8188685774803162, "step": 641 }, { "epoch": 0.8331407019891721, "grad_norm": 0.7017625570297241, "learning_rate": 8.655982816183127e-06, "loss": 0.8316338062286377, "step": 642 }, { "epoch": 0.834438428939311, "grad_norm": 0.798739492893219, "learning_rate": 8.651101769400606e-06, "loss": 0.8281844854354858, "step": 643 }, { "epoch": 0.8357361558894499, "grad_norm": 0.7157971262931824, "learning_rate": 8.646213257039076e-06, "loss": 0.8818997740745544, "step": 644 }, { "epoch": 0.8370338828395888, "grad_norm": 0.7081462144851685, "learning_rate": 8.641317289094306e-06, "loss": 0.8395585417747498, "step": 645 }, { "epoch": 0.8383316097897276, "grad_norm": 0.752267062664032, "learning_rate": 8.636413875577314e-06, "loss": 0.852052628993988, "step": 646 }, { "epoch": 0.8396293367398666, "grad_norm": 0.7433504462242126, "learning_rate": 8.631503026514337e-06, "loss": 0.7837040424346924, "step": 647 }, { "epoch": 0.8409270636900055, "grad_norm": 0.7658749222755432, "learning_rate": 8.626584751946818e-06, "loss": 0.8357768654823303, "step": 648 }, { "epoch": 0.8422247906401443, "grad_norm": 0.7301930785179138, "learning_rate": 8.621659061931389e-06, "loss": 0.8085260391235352, "step": 649 }, { "epoch": 0.8435225175902833, "grad_norm": 0.694357693195343, "learning_rate": 8.616725966539831e-06, "loss": 0.7951889634132385, "step": 650 }, { "epoch": 0.8448202445404221, "grad_norm": 0.7343024015426636, "learning_rate": 8.611785475859083e-06, "loss": 0.8101716637611389, "step": 651 }, { "epoch": 0.8461179714905611, "grad_norm": 0.7503554224967957, "learning_rate": 8.606837599991194e-06, "loss": 0.8827271461486816, "step": 652 }, { "epoch": 0.8474156984407, "grad_norm": 0.7213442325592041, "learning_rate": 8.601882349053318e-06, "loss": 0.8409299254417419, "step": 653 }, { "epoch": 0.8487134253908388, "grad_norm": 0.7245302200317383, "learning_rate": 8.596919733177692e-06, "loss": 0.7459070682525635, "step": 654 }, { "epoch": 0.8500111523409778, "grad_norm": 0.7586632370948792, "learning_rate": 8.591949762511606e-06, "loss": 0.7964019775390625, "step": 655 }, { "epoch": 0.8513088792911166, "grad_norm": 0.7066137194633484, "learning_rate": 8.586972447217392e-06, "loss": 0.7685011625289917, "step": 656 }, { "epoch": 0.8526066062412555, "grad_norm": 0.7152141332626343, "learning_rate": 8.581987797472404e-06, "loss": 0.8681995868682861, "step": 657 }, { "epoch": 0.8539043331913945, "grad_norm": 0.732406735420227, "learning_rate": 8.576995823468984e-06, "loss": 0.8168764710426331, "step": 658 }, { "epoch": 0.8552020601415333, "grad_norm": 0.7218092083930969, "learning_rate": 8.571996535414457e-06, "loss": 0.7868984937667847, "step": 659 }, { "epoch": 0.8564997870916723, "grad_norm": 0.7385701537132263, "learning_rate": 8.566989943531106e-06, "loss": 0.8220299482345581, "step": 660 }, { "epoch": 0.8577975140418111, "grad_norm": 0.7344916462898254, "learning_rate": 8.561976058056138e-06, "loss": 0.8271669745445251, "step": 661 }, { "epoch": 0.85909524099195, "grad_norm": 0.7406896352767944, "learning_rate": 8.556954889241682e-06, "loss": 0.8336881399154663, "step": 662 }, { "epoch": 0.860392967942089, "grad_norm": 0.7561131715774536, "learning_rate": 8.551926447354759e-06, "loss": 0.7825358510017395, "step": 663 }, { "epoch": 0.8616906948922278, "grad_norm": 0.7565145492553711, "learning_rate": 8.546890742677259e-06, "loss": 0.7725045680999756, "step": 664 }, { "epoch": 0.8629884218423667, "grad_norm": 0.7259421348571777, "learning_rate": 8.541847785505921e-06, "loss": 0.8378758430480957, "step": 665 }, { "epoch": 0.8642861487925056, "grad_norm": 0.697087287902832, "learning_rate": 8.53679758615232e-06, "loss": 0.7955657839775085, "step": 666 }, { "epoch": 0.8655838757426445, "grad_norm": 0.7520134449005127, "learning_rate": 8.531740154942834e-06, "loss": 0.7764825224876404, "step": 667 }, { "epoch": 0.8668816026927835, "grad_norm": 0.7223501801490784, "learning_rate": 8.526675502218629e-06, "loss": 0.809272825717926, "step": 668 }, { "epoch": 0.8681793296429223, "grad_norm": 0.6946507692337036, "learning_rate": 8.521603638335638e-06, "loss": 0.8255141973495483, "step": 669 }, { "epoch": 0.8694770565930612, "grad_norm": 0.7150010466575623, "learning_rate": 8.516524573664539e-06, "loss": 0.845747709274292, "step": 670 }, { "epoch": 0.8707747835432001, "grad_norm": 0.7168200612068176, "learning_rate": 8.511438318590735e-06, "loss": 0.74006187915802, "step": 671 }, { "epoch": 0.872072510493339, "grad_norm": 0.7573256492614746, "learning_rate": 8.506344883514328e-06, "loss": 0.7870563268661499, "step": 672 }, { "epoch": 0.8733702374434779, "grad_norm": 0.7842072248458862, "learning_rate": 8.501244278850105e-06, "loss": 0.9044228196144104, "step": 673 }, { "epoch": 0.8746679643936168, "grad_norm": 0.7406667470932007, "learning_rate": 8.496136515027511e-06, "loss": 0.811069667339325, "step": 674 }, { "epoch": 0.8759656913437557, "grad_norm": 0.7430689334869385, "learning_rate": 8.491021602490632e-06, "loss": 0.8124207854270935, "step": 675 }, { "epoch": 0.8772634182938946, "grad_norm": 0.7479687929153442, "learning_rate": 8.485899551698166e-06, "loss": 0.7997281551361084, "step": 676 }, { "epoch": 0.8785611452440335, "grad_norm": 0.7981671690940857, "learning_rate": 8.480770373123415e-06, "loss": 0.8423473834991455, "step": 677 }, { "epoch": 0.8798588721941724, "grad_norm": 0.678821325302124, "learning_rate": 8.475634077254248e-06, "loss": 0.8249571323394775, "step": 678 }, { "epoch": 0.8811565991443113, "grad_norm": 0.725933313369751, "learning_rate": 8.470490674593091e-06, "loss": 0.860410213470459, "step": 679 }, { "epoch": 0.8824543260944502, "grad_norm": 0.7934810519218445, "learning_rate": 8.4653401756569e-06, "loss": 0.8338278532028198, "step": 680 }, { "epoch": 0.883752053044589, "grad_norm": 0.7282740473747253, "learning_rate": 8.460182590977142e-06, "loss": 0.8479672074317932, "step": 681 }, { "epoch": 0.885049779994728, "grad_norm": 0.789381206035614, "learning_rate": 8.455017931099772e-06, "loss": 0.8129116296768188, "step": 682 }, { "epoch": 0.8863475069448669, "grad_norm": 0.7255346179008484, "learning_rate": 8.449846206585211e-06, "loss": 0.821036696434021, "step": 683 }, { "epoch": 0.8876452338950058, "grad_norm": 0.7708171606063843, "learning_rate": 8.44466742800833e-06, "loss": 0.8153631687164307, "step": 684 }, { "epoch": 0.8889429608451447, "grad_norm": 0.7143636345863342, "learning_rate": 8.439481605958416e-06, "loss": 0.8277146816253662, "step": 685 }, { "epoch": 0.8902406877952835, "grad_norm": 0.8043286204338074, "learning_rate": 8.434288751039168e-06, "loss": 0.8356217741966248, "step": 686 }, { "epoch": 0.8915384147454225, "grad_norm": 0.7653588056564331, "learning_rate": 8.429088873868656e-06, "loss": 0.8771963715553284, "step": 687 }, { "epoch": 0.8928361416955614, "grad_norm": 0.6917808055877686, "learning_rate": 8.423881985079315e-06, "loss": 0.7479776740074158, "step": 688 }, { "epoch": 0.8941338686457002, "grad_norm": 0.7078230977058411, "learning_rate": 8.418668095317912e-06, "loss": 0.7685298323631287, "step": 689 }, { "epoch": 0.8954315955958392, "grad_norm": 0.7034929990768433, "learning_rate": 8.413447215245534e-06, "loss": 0.8008818626403809, "step": 690 }, { "epoch": 0.8954315955958392, "eval_loss": 0.7848750948905945, "eval_runtime": 143.9442, "eval_samples_per_second": 36.07, "eval_steps_per_second": 9.017, "step": 690 }, { "epoch": 0.896729322545978, "grad_norm": 0.7235103845596313, "learning_rate": 8.408219355537557e-06, "loss": 0.821499764919281, "step": 691 }, { "epoch": 0.898027049496117, "grad_norm": 0.7273632884025574, "learning_rate": 8.402984526883635e-06, "loss": 0.8295854330062866, "step": 692 }, { "epoch": 0.8993247764462559, "grad_norm": 0.7261776328086853, "learning_rate": 8.397742739987664e-06, "loss": 0.7892912030220032, "step": 693 }, { "epoch": 0.9006225033963947, "grad_norm": 0.7244385480880737, "learning_rate": 8.392494005567773e-06, "loss": 0.8177611231803894, "step": 694 }, { "epoch": 0.9019202303465337, "grad_norm": 0.7344761490821838, "learning_rate": 8.387238334356294e-06, "loss": 0.8023809194564819, "step": 695 }, { "epoch": 0.9032179572966725, "grad_norm": 0.736089825630188, "learning_rate": 8.381975737099745e-06, "loss": 0.8413804173469543, "step": 696 }, { "epoch": 0.9045156842468114, "grad_norm": 0.7272122502326965, "learning_rate": 8.376706224558807e-06, "loss": 0.8420959115028381, "step": 697 }, { "epoch": 0.9058134111969504, "grad_norm": 0.7177276015281677, "learning_rate": 8.3714298075083e-06, "loss": 0.8787964582443237, "step": 698 }, { "epoch": 0.9071111381470892, "grad_norm": 0.7416796088218689, "learning_rate": 8.366146496737158e-06, "loss": 0.8105108141899109, "step": 699 }, { "epoch": 0.9084088650972282, "grad_norm": 0.7222095727920532, "learning_rate": 8.360856303048417e-06, "loss": 0.7893627882003784, "step": 700 }, { "epoch": 0.909706592047367, "grad_norm": 0.7077576518058777, "learning_rate": 8.355559237259181e-06, "loss": 0.7560763955116272, "step": 701 }, { "epoch": 0.9110043189975059, "grad_norm": 0.6978065371513367, "learning_rate": 8.350255310200611e-06, "loss": 0.7838032841682434, "step": 702 }, { "epoch": 0.9123020459476449, "grad_norm": 0.7368733882904053, "learning_rate": 8.344944532717898e-06, "loss": 0.8452830910682678, "step": 703 }, { "epoch": 0.9135997728977837, "grad_norm": 0.7668716907501221, "learning_rate": 8.339626915670234e-06, "loss": 0.7398316264152527, "step": 704 }, { "epoch": 0.9148974998479226, "grad_norm": 0.7506059408187866, "learning_rate": 8.3343024699308e-06, "loss": 0.8178697824478149, "step": 705 }, { "epoch": 0.9161952267980615, "grad_norm": 0.771515965461731, "learning_rate": 8.328971206386742e-06, "loss": 0.8332667350769043, "step": 706 }, { "epoch": 0.9174929537482004, "grad_norm": 0.7568745017051697, "learning_rate": 8.323633135939145e-06, "loss": 0.8590246438980103, "step": 707 }, { "epoch": 0.9187906806983394, "grad_norm": 0.7116881608963013, "learning_rate": 8.318288269503007e-06, "loss": 0.8164006471633911, "step": 708 }, { "epoch": 0.9200884076484782, "grad_norm": 0.7115074396133423, "learning_rate": 8.312936618007232e-06, "loss": 0.827363133430481, "step": 709 }, { "epoch": 0.9213861345986171, "grad_norm": 0.7299380302429199, "learning_rate": 8.307578192394592e-06, "loss": 0.8012964129447937, "step": 710 }, { "epoch": 0.922683861548756, "grad_norm": 0.6973420977592468, "learning_rate": 8.30221300362171e-06, "loss": 0.7960250377655029, "step": 711 }, { "epoch": 0.9239815884988949, "grad_norm": 0.7379301190376282, "learning_rate": 8.29684106265904e-06, "loss": 0.8359223008155823, "step": 712 }, { "epoch": 0.9252793154490339, "grad_norm": 0.7026050090789795, "learning_rate": 8.291462380490842e-06, "loss": 0.8082996010780334, "step": 713 }, { "epoch": 0.9265770423991727, "grad_norm": 0.7560499906539917, "learning_rate": 8.286076968115158e-06, "loss": 0.7559288144111633, "step": 714 }, { "epoch": 0.9278747693493116, "grad_norm": 0.7483256459236145, "learning_rate": 8.280684836543794e-06, "loss": 0.8855387568473816, "step": 715 }, { "epoch": 0.9291724962994505, "grad_norm": 0.7110533714294434, "learning_rate": 8.275285996802293e-06, "loss": 0.8424345254898071, "step": 716 }, { "epoch": 0.9304702232495894, "grad_norm": 0.7900488376617432, "learning_rate": 8.269880459929919e-06, "loss": 0.8165364265441895, "step": 717 }, { "epoch": 0.9317679501997282, "grad_norm": 0.7415225505828857, "learning_rate": 8.264468236979626e-06, "loss": 0.8298836946487427, "step": 718 }, { "epoch": 0.9330656771498672, "grad_norm": 0.7365603446960449, "learning_rate": 8.259049339018036e-06, "loss": 0.855134904384613, "step": 719 }, { "epoch": 0.9343634041000061, "grad_norm": 0.755382239818573, "learning_rate": 8.25362377712543e-06, "loss": 0.8771618008613586, "step": 720 }, { "epoch": 0.935661131050145, "grad_norm": 0.7492240071296692, "learning_rate": 8.248191562395703e-06, "loss": 0.7913522720336914, "step": 721 }, { "epoch": 0.9369588580002839, "grad_norm": 0.726038932800293, "learning_rate": 8.242752705936363e-06, "loss": 0.8233412504196167, "step": 722 }, { "epoch": 0.9382565849504227, "grad_norm": 0.729804277420044, "learning_rate": 8.237307218868493e-06, "loss": 0.8285303115844727, "step": 723 }, { "epoch": 0.9395543119005617, "grad_norm": 0.7340808510780334, "learning_rate": 8.231855112326738e-06, "loss": 0.8087389469146729, "step": 724 }, { "epoch": 0.9408520388507006, "grad_norm": 0.6981987357139587, "learning_rate": 8.226396397459272e-06, "loss": 0.8206723928451538, "step": 725 }, { "epoch": 0.9421497658008394, "grad_norm": 0.7226195335388184, "learning_rate": 8.22093108542779e-06, "loss": 0.8090267181396484, "step": 726 }, { "epoch": 0.9434474927509784, "grad_norm": 0.7470179200172424, "learning_rate": 8.215459187407468e-06, "loss": 0.8644795417785645, "step": 727 }, { "epoch": 0.9447452197011172, "grad_norm": 0.7814143300056458, "learning_rate": 8.209980714586955e-06, "loss": 0.8789034485816956, "step": 728 }, { "epoch": 0.9460429466512562, "grad_norm": 0.7444256544113159, "learning_rate": 8.20449567816834e-06, "loss": 0.8365822434425354, "step": 729 }, { "epoch": 0.9473406736013951, "grad_norm": 0.7291041016578674, "learning_rate": 8.199004089367136e-06, "loss": 0.8873200416564941, "step": 730 }, { "epoch": 0.9486384005515339, "grad_norm": 0.6883243322372437, "learning_rate": 8.193505959412246e-06, "loss": 0.8070501685142517, "step": 731 }, { "epoch": 0.9499361275016729, "grad_norm": 0.6936081051826477, "learning_rate": 8.188001299545963e-06, "loss": 0.8187827467918396, "step": 732 }, { "epoch": 0.9512338544518117, "grad_norm": 0.7469680309295654, "learning_rate": 8.182490121023918e-06, "loss": 0.8218874335289001, "step": 733 }, { "epoch": 0.9525315814019506, "grad_norm": 0.7822017073631287, "learning_rate": 8.176972435115075e-06, "loss": 0.8489766716957092, "step": 734 }, { "epoch": 0.9538293083520896, "grad_norm": 0.7533932328224182, "learning_rate": 8.17144825310171e-06, "loss": 0.8591926693916321, "step": 735 }, { "epoch": 0.9551270353022284, "grad_norm": 0.6783921718597412, "learning_rate": 8.165917586279374e-06, "loss": 0.7739286422729492, "step": 736 }, { "epoch": 0.9564247622523674, "grad_norm": 0.7370641231536865, "learning_rate": 8.16038044595688e-06, "loss": 0.8205049633979797, "step": 737 }, { "epoch": 0.9577224892025062, "grad_norm": 0.723272442817688, "learning_rate": 8.15483684345628e-06, "loss": 0.8298395872116089, "step": 738 }, { "epoch": 0.9590202161526451, "grad_norm": 0.7543867230415344, "learning_rate": 8.149286790112838e-06, "loss": 0.784492552280426, "step": 739 }, { "epoch": 0.9603179431027841, "grad_norm": 0.750069797039032, "learning_rate": 8.143730297275008e-06, "loss": 0.7954182028770447, "step": 740 }, { "epoch": 0.9616156700529229, "grad_norm": 0.7066623568534851, "learning_rate": 8.138167376304411e-06, "loss": 0.8066163063049316, "step": 741 }, { "epoch": 0.9629133970030618, "grad_norm": 0.7153725028038025, "learning_rate": 8.132598038575814e-06, "loss": 0.7599750757217407, "step": 742 }, { "epoch": 0.9642111239532007, "grad_norm": 0.771787166595459, "learning_rate": 8.1270222954771e-06, "loss": 0.8843746781349182, "step": 743 }, { "epoch": 0.9655088509033396, "grad_norm": 0.7729872465133667, "learning_rate": 8.121440158409255e-06, "loss": 0.8271206021308899, "step": 744 }, { "epoch": 0.9668065778534786, "grad_norm": 0.7740857005119324, "learning_rate": 8.115851638786335e-06, "loss": 0.8692458868026733, "step": 745 }, { "epoch": 0.9681043048036174, "grad_norm": 0.7087443470954895, "learning_rate": 8.11025674803545e-06, "loss": 0.8108102083206177, "step": 746 }, { "epoch": 0.9694020317537563, "grad_norm": 0.7399817109107971, "learning_rate": 8.104655497596734e-06, "loss": 0.8514312505722046, "step": 747 }, { "epoch": 0.9706997587038952, "grad_norm": 0.749785304069519, "learning_rate": 8.099047898923326e-06, "loss": 0.7959103584289551, "step": 748 }, { "epoch": 0.9719974856540341, "grad_norm": 0.7259848117828369, "learning_rate": 8.093433963481348e-06, "loss": 0.8137398958206177, "step": 749 }, { "epoch": 0.973295212604173, "grad_norm": 0.7331299185752869, "learning_rate": 8.087813702749873e-06, "loss": 0.8132301568984985, "step": 750 }, { "epoch": 0.9745929395543119, "grad_norm": 0.7332584857940674, "learning_rate": 8.082187128220918e-06, "loss": 0.8191835880279541, "step": 751 }, { "epoch": 0.9758906665044508, "grad_norm": 0.6973693370819092, "learning_rate": 8.076554251399398e-06, "loss": 0.8266087174415588, "step": 752 }, { "epoch": 0.9771883934545897, "grad_norm": 0.7298718690872192, "learning_rate": 8.070915083803124e-06, "loss": 0.8183968663215637, "step": 753 }, { "epoch": 0.9784861204047286, "grad_norm": 0.7074362635612488, "learning_rate": 8.065269636962765e-06, "loss": 0.8740180730819702, "step": 754 }, { "epoch": 0.9797838473548675, "grad_norm": 0.7398948073387146, "learning_rate": 8.059617922421832e-06, "loss": 0.7935925722122192, "step": 755 }, { "epoch": 0.9810815743050064, "grad_norm": 0.709671676158905, "learning_rate": 8.053959951736647e-06, "loss": 0.8069880604743958, "step": 756 }, { "epoch": 0.9823793012551453, "grad_norm": 0.7370040416717529, "learning_rate": 8.048295736476332e-06, "loss": 0.8581700921058655, "step": 757 }, { "epoch": 0.9836770282052841, "grad_norm": 0.6889820098876953, "learning_rate": 8.042625288222774e-06, "loss": 0.7847036719322205, "step": 758 }, { "epoch": 0.9849747551554231, "grad_norm": 0.7171109914779663, "learning_rate": 8.036948618570601e-06, "loss": 0.7729103565216064, "step": 759 }, { "epoch": 0.986272482105562, "grad_norm": 0.7106866836547852, "learning_rate": 8.031265739127167e-06, "loss": 0.7267521023750305, "step": 760 }, { "epoch": 0.9875702090557009, "grad_norm": 0.7274038791656494, "learning_rate": 8.025576661512524e-06, "loss": 0.7723244428634644, "step": 761 }, { "epoch": 0.9888679360058398, "grad_norm": 0.6838043928146362, "learning_rate": 8.019881397359395e-06, "loss": 0.8244357109069824, "step": 762 }, { "epoch": 0.9901656629559786, "grad_norm": 0.7773102521896362, "learning_rate": 8.014179958313154e-06, "loss": 0.8146355152130127, "step": 763 }, { "epoch": 0.9914633899061176, "grad_norm": 0.6833189129829407, "learning_rate": 8.008472356031795e-06, "loss": 0.7225084900856018, "step": 764 }, { "epoch": 0.9927611168562565, "grad_norm": 0.7340080738067627, "learning_rate": 8.00275860218593e-06, "loss": 0.7879570126533508, "step": 765 }, { "epoch": 0.9940588438063953, "grad_norm": 0.744114100933075, "learning_rate": 7.99703870845873e-06, "loss": 0.8457169532775879, "step": 766 }, { "epoch": 0.9953565707565343, "grad_norm": 0.7166002988815308, "learning_rate": 7.991312686545939e-06, "loss": 0.786881685256958, "step": 767 }, { "epoch": 0.9966542977066731, "grad_norm": 0.7298569679260254, "learning_rate": 7.985580548155814e-06, "loss": 0.826994776725769, "step": 768 }, { "epoch": 0.9979520246568121, "grad_norm": 0.6964467167854309, "learning_rate": 7.979842305009133e-06, "loss": 0.7571007013320923, "step": 769 }, { "epoch": 0.999249751606951, "grad_norm": 0.7578926682472229, "learning_rate": 7.974097968839149e-06, "loss": 0.7799357771873474, "step": 770 }, { "epoch": 1.0, "grad_norm": 0.9857121706008911, "learning_rate": 7.968347551391574e-06, "loss": 0.7851473689079285, "step": 771 }, { "epoch": 1.0012977269501389, "grad_norm": 0.8775411248207092, "learning_rate": 7.962591064424558e-06, "loss": 0.7687423825263977, "step": 772 }, { "epoch": 1.0025954539002777, "grad_norm": 0.8086821436882019, "learning_rate": 7.95682851970866e-06, "loss": 0.7368807196617126, "step": 773 }, { "epoch": 1.0038931808504168, "grad_norm": 0.8149939775466919, "learning_rate": 7.951059929026826e-06, "loss": 0.7523797750473022, "step": 774 }, { "epoch": 1.0051909078005556, "grad_norm": 0.7600994110107422, "learning_rate": 7.94528530417436e-06, "loss": 0.7559916973114014, "step": 775 }, { "epoch": 1.0064886347506945, "grad_norm": 0.722993791103363, "learning_rate": 7.939504656958913e-06, "loss": 0.7532729506492615, "step": 776 }, { "epoch": 1.0077863617008334, "grad_norm": 0.7640414834022522, "learning_rate": 7.933717999200442e-06, "loss": 0.8589281439781189, "step": 777 }, { "epoch": 1.0090840886509722, "grad_norm": 0.7895022630691528, "learning_rate": 7.927925342731202e-06, "loss": 0.7896629571914673, "step": 778 }, { "epoch": 1.0103818156011113, "grad_norm": 0.8462852239608765, "learning_rate": 7.922126699395705e-06, "loss": 0.7882782816886902, "step": 779 }, { "epoch": 1.0116795425512501, "grad_norm": 0.7864810824394226, "learning_rate": 7.916322081050708e-06, "loss": 0.7486289143562317, "step": 780 }, { "epoch": 1.012977269501389, "grad_norm": 0.7887839674949646, "learning_rate": 7.910511499565192e-06, "loss": 0.6873072385787964, "step": 781 }, { "epoch": 1.0142749964515279, "grad_norm": 0.7757256627082825, "learning_rate": 7.90469496682032e-06, "loss": 0.7133891582489014, "step": 782 }, { "epoch": 1.0155727234016667, "grad_norm": 0.752653181552887, "learning_rate": 7.89887249470943e-06, "loss": 0.7424370050430298, "step": 783 }, { "epoch": 1.0168704503518056, "grad_norm": 0.7578316330909729, "learning_rate": 7.89304409513801e-06, "loss": 0.7230308055877686, "step": 784 }, { "epoch": 1.0181681773019446, "grad_norm": 0.763857901096344, "learning_rate": 7.887209780023652e-06, "loss": 0.7932077050209045, "step": 785 }, { "epoch": 1.0194659042520835, "grad_norm": 0.7570908069610596, "learning_rate": 7.881369561296061e-06, "loss": 0.8092459440231323, "step": 786 }, { "epoch": 1.0207636312022224, "grad_norm": 0.7562793493270874, "learning_rate": 7.875523450897004e-06, "loss": 0.7545458078384399, "step": 787 }, { "epoch": 1.0220613581523612, "grad_norm": 0.7494779825210571, "learning_rate": 7.869671460780297e-06, "loss": 0.7644586563110352, "step": 788 }, { "epoch": 1.0233590851025, "grad_norm": 0.7528582215309143, "learning_rate": 7.863813602911777e-06, "loss": 0.7303503751754761, "step": 789 }, { "epoch": 1.0246568120526391, "grad_norm": 0.7522894740104675, "learning_rate": 7.857949889269285e-06, "loss": 0.769363284111023, "step": 790 }, { "epoch": 1.025954539002778, "grad_norm": 0.7932534217834473, "learning_rate": 7.852080331842627e-06, "loss": 0.6980154514312744, "step": 791 }, { "epoch": 1.0272522659529169, "grad_norm": 0.7427641153335571, "learning_rate": 7.846204942633564e-06, "loss": 0.880423903465271, "step": 792 }, { "epoch": 1.0285499929030557, "grad_norm": 0.8040611743927002, "learning_rate": 7.84032373365578e-06, "loss": 0.7813893556594849, "step": 793 }, { "epoch": 1.0298477198531946, "grad_norm": 0.7074670791625977, "learning_rate": 7.834436716934859e-06, "loss": 0.6680439114570618, "step": 794 }, { "epoch": 1.0311454468033336, "grad_norm": 0.7425694465637207, "learning_rate": 7.828543904508258e-06, "loss": 0.7353805303573608, "step": 795 }, { "epoch": 1.0324431737534725, "grad_norm": 0.7460526823997498, "learning_rate": 7.82264530842529e-06, "loss": 0.7601878046989441, "step": 796 }, { "epoch": 1.0337409007036114, "grad_norm": 0.7570812702178955, "learning_rate": 7.816740940747089e-06, "loss": 0.7122874855995178, "step": 797 }, { "epoch": 1.0350386276537502, "grad_norm": 0.7967089414596558, "learning_rate": 7.810830813546594e-06, "loss": 0.7236475348472595, "step": 798 }, { "epoch": 1.036336354603889, "grad_norm": 0.7531136274337769, "learning_rate": 7.80491493890852e-06, "loss": 0.7801082730293274, "step": 799 }, { "epoch": 1.037634081554028, "grad_norm": 0.7200342416763306, "learning_rate": 7.798993328929328e-06, "loss": 0.7519978284835815, "step": 800 }, { "epoch": 1.038931808504167, "grad_norm": 0.7226322293281555, "learning_rate": 7.793065995717217e-06, "loss": 0.7664476633071899, "step": 801 }, { "epoch": 1.0402295354543059, "grad_norm": 0.7494815587997437, "learning_rate": 7.787132951392082e-06, "loss": 0.7169873714447021, "step": 802 }, { "epoch": 1.0415272624044447, "grad_norm": 0.7437771558761597, "learning_rate": 7.781194208085495e-06, "loss": 0.777241587638855, "step": 803 }, { "epoch": 1.0428249893545836, "grad_norm": 0.7736064195632935, "learning_rate": 7.775249777940685e-06, "loss": 0.7803325653076172, "step": 804 }, { "epoch": 1.0441227163047224, "grad_norm": 0.729405403137207, "learning_rate": 7.769299673112507e-06, "loss": 0.7208072543144226, "step": 805 }, { "epoch": 1.0454204432548615, "grad_norm": 0.7493116855621338, "learning_rate": 7.76334390576742e-06, "loss": 0.7370114326477051, "step": 806 }, { "epoch": 1.0467181702050004, "grad_norm": 0.7444543242454529, "learning_rate": 7.757382488083458e-06, "loss": 0.8371725678443909, "step": 807 }, { "epoch": 1.0480158971551392, "grad_norm": 0.7414549589157104, "learning_rate": 7.751415432250213e-06, "loss": 0.7497185468673706, "step": 808 }, { "epoch": 1.049313624105278, "grad_norm": 0.7469486594200134, "learning_rate": 7.745442750468803e-06, "loss": 0.7713848948478699, "step": 809 }, { "epoch": 1.050611351055417, "grad_norm": 0.7274091243743896, "learning_rate": 7.739464454951853e-06, "loss": 0.7730478048324585, "step": 810 }, { "epoch": 1.051909078005556, "grad_norm": 0.7138769626617432, "learning_rate": 7.733480557923464e-06, "loss": 0.6896777153015137, "step": 811 }, { "epoch": 1.0532068049556949, "grad_norm": 0.773325502872467, "learning_rate": 7.727491071619186e-06, "loss": 0.7608213424682617, "step": 812 }, { "epoch": 1.0545045319058337, "grad_norm": 0.7017092108726501, "learning_rate": 7.72149600828601e-06, "loss": 0.7796779870986938, "step": 813 }, { "epoch": 1.0558022588559726, "grad_norm": 0.7887934446334839, "learning_rate": 7.715495380182314e-06, "loss": 0.7536224722862244, "step": 814 }, { "epoch": 1.0570999858061114, "grad_norm": 0.7297992706298828, "learning_rate": 7.709489199577874e-06, "loss": 0.704574465751648, "step": 815 }, { "epoch": 1.0583977127562503, "grad_norm": 0.7437949180603027, "learning_rate": 7.7034774787538e-06, "loss": 0.741614818572998, "step": 816 }, { "epoch": 1.0596954397063894, "grad_norm": 0.7397854328155518, "learning_rate": 7.697460230002545e-06, "loss": 0.7390281558036804, "step": 817 }, { "epoch": 1.0609931666565282, "grad_norm": 0.7297989130020142, "learning_rate": 7.691437465627859e-06, "loss": 0.7850978970527649, "step": 818 }, { "epoch": 1.062290893606667, "grad_norm": 0.7123188376426697, "learning_rate": 7.685409197944768e-06, "loss": 0.7182658314704895, "step": 819 }, { "epoch": 1.063588620556806, "grad_norm": 0.755650520324707, "learning_rate": 7.679375439279557e-06, "loss": 0.7544305324554443, "step": 820 }, { "epoch": 1.0648863475069448, "grad_norm": 0.7906817197799683, "learning_rate": 7.673336201969733e-06, "loss": 0.8021898865699768, "step": 821 }, { "epoch": 1.0661840744570839, "grad_norm": 0.779934823513031, "learning_rate": 7.667291498364009e-06, "loss": 0.8222522139549255, "step": 822 }, { "epoch": 1.0674818014072227, "grad_norm": 0.7806147933006287, "learning_rate": 7.661241340822274e-06, "loss": 0.824374794960022, "step": 823 }, { "epoch": 1.0687795283573616, "grad_norm": 0.7627273797988892, "learning_rate": 7.655185741715569e-06, "loss": 0.7465693354606628, "step": 824 }, { "epoch": 1.0700772553075004, "grad_norm": 0.7400906682014465, "learning_rate": 7.64912471342606e-06, "loss": 0.7903148531913757, "step": 825 }, { "epoch": 1.0713749822576393, "grad_norm": 0.7551797032356262, "learning_rate": 7.643058268347015e-06, "loss": 0.7619248628616333, "step": 826 }, { "epoch": 1.0726727092077784, "grad_norm": 0.7665975689888, "learning_rate": 7.636986418882783e-06, "loss": 0.7794485092163086, "step": 827 }, { "epoch": 1.0739704361579172, "grad_norm": 0.712073028087616, "learning_rate": 7.630909177448755e-06, "loss": 0.7433626055717468, "step": 828 }, { "epoch": 1.075268163108056, "grad_norm": 0.7561034560203552, "learning_rate": 7.624826556471354e-06, "loss": 0.776486337184906, "step": 829 }, { "epoch": 1.076565890058195, "grad_norm": 0.7552739381790161, "learning_rate": 7.618738568388e-06, "loss": 0.7432718276977539, "step": 830 }, { "epoch": 1.0778636170083338, "grad_norm": 0.7300425171852112, "learning_rate": 7.612645225647086e-06, "loss": 0.6905062198638916, "step": 831 }, { "epoch": 1.0791613439584729, "grad_norm": 0.777538537979126, "learning_rate": 7.60654654070796e-06, "loss": 0.7303627133369446, "step": 832 }, { "epoch": 1.0804590709086117, "grad_norm": 0.7533742189407349, "learning_rate": 7.600442526040883e-06, "loss": 0.7780784964561462, "step": 833 }, { "epoch": 1.0817567978587506, "grad_norm": 0.7974081635475159, "learning_rate": 7.594333194127025e-06, "loss": 0.8020890355110168, "step": 834 }, { "epoch": 1.0830545248088894, "grad_norm": 0.7591211199760437, "learning_rate": 7.58821855745842e-06, "loss": 0.7216978073120117, "step": 835 }, { "epoch": 1.0843522517590283, "grad_norm": 0.7331092357635498, "learning_rate": 7.582098628537955e-06, "loss": 0.7495509386062622, "step": 836 }, { "epoch": 1.0856499787091671, "grad_norm": 0.7309387922286987, "learning_rate": 7.5759734198793365e-06, "loss": 0.7153578400611877, "step": 837 }, { "epoch": 1.0869477056593062, "grad_norm": 0.7279072403907776, "learning_rate": 7.5698429440070616e-06, "loss": 0.8049778938293457, "step": 838 }, { "epoch": 1.088245432609445, "grad_norm": 0.8327065706253052, "learning_rate": 7.563707213456405e-06, "loss": 0.8458483815193176, "step": 839 }, { "epoch": 1.089543159559584, "grad_norm": 0.7936592102050781, "learning_rate": 7.5575662407733815e-06, "loss": 0.7528290152549744, "step": 840 }, { "epoch": 1.0908408865097228, "grad_norm": 0.7855809330940247, "learning_rate": 7.551420038514726e-06, "loss": 0.685576319694519, "step": 841 }, { "epoch": 1.0921386134598616, "grad_norm": 0.7452473640441895, "learning_rate": 7.54526861924787e-06, "loss": 0.8216978311538696, "step": 842 }, { "epoch": 1.0934363404100007, "grad_norm": 0.7420814037322998, "learning_rate": 7.5391119955509026e-06, "loss": 0.725795328617096, "step": 843 }, { "epoch": 1.0947340673601396, "grad_norm": 0.780059278011322, "learning_rate": 7.532950180012564e-06, "loss": 0.7719560861587524, "step": 844 }, { "epoch": 1.0960317943102784, "grad_norm": 0.7614301443099976, "learning_rate": 7.526783185232208e-06, "loss": 0.7809950709342957, "step": 845 }, { "epoch": 1.0973295212604173, "grad_norm": 0.7960677742958069, "learning_rate": 7.520611023819779e-06, "loss": 0.7617411017417908, "step": 846 }, { "epoch": 1.0986272482105561, "grad_norm": 0.7018498778343201, "learning_rate": 7.514433708395783e-06, "loss": 0.7251595854759216, "step": 847 }, { "epoch": 1.099924975160695, "grad_norm": 0.7345300912857056, "learning_rate": 7.508251251591266e-06, "loss": 0.7545365691184998, "step": 848 }, { "epoch": 1.101222702110834, "grad_norm": 0.7398869395256042, "learning_rate": 7.5020636660477894e-06, "loss": 0.7734966278076172, "step": 849 }, { "epoch": 1.102520429060973, "grad_norm": 0.7710892558097839, "learning_rate": 7.4958709644174e-06, "loss": 0.805327296257019, "step": 850 }, { "epoch": 1.1038181560111118, "grad_norm": 0.7368630766868591, "learning_rate": 7.4896731593626015e-06, "loss": 0.7682947516441345, "step": 851 }, { "epoch": 1.1051158829612506, "grad_norm": 0.7361379265785217, "learning_rate": 7.4834702635563395e-06, "loss": 0.7895755767822266, "step": 852 }, { "epoch": 1.1064136099113895, "grad_norm": 0.7309215664863586, "learning_rate": 7.477262289681966e-06, "loss": 0.8211460113525391, "step": 853 }, { "epoch": 1.1077113368615286, "grad_norm": 0.7528488636016846, "learning_rate": 7.471049250433214e-06, "loss": 0.8033767938613892, "step": 854 }, { "epoch": 1.1090090638116674, "grad_norm": 0.772190511226654, "learning_rate": 7.464831158514179e-06, "loss": 0.7697012424468994, "step": 855 }, { "epoch": 1.1103067907618063, "grad_norm": 0.7335084080696106, "learning_rate": 7.458608026639285e-06, "loss": 0.7386154532432556, "step": 856 }, { "epoch": 1.1116045177119451, "grad_norm": 0.8107844591140747, "learning_rate": 7.45237986753326e-06, "loss": 0.8037740588188171, "step": 857 }, { "epoch": 1.112902244662084, "grad_norm": 0.7389320731163025, "learning_rate": 7.446146693931111e-06, "loss": 0.7920179963111877, "step": 858 }, { "epoch": 1.114199971612223, "grad_norm": 0.7466602325439453, "learning_rate": 7.439908518578105e-06, "loss": 0.7859473824501038, "step": 859 }, { "epoch": 1.115497698562362, "grad_norm": 0.752849817276001, "learning_rate": 7.433665354229731e-06, "loss": 0.7838953733444214, "step": 860 }, { "epoch": 1.1167954255125008, "grad_norm": 0.749245285987854, "learning_rate": 7.4274172136516766e-06, "loss": 0.772464394569397, "step": 861 }, { "epoch": 1.1180931524626396, "grad_norm": 0.7855739593505859, "learning_rate": 7.421164109619809e-06, "loss": 0.7547976970672607, "step": 862 }, { "epoch": 1.1193908794127785, "grad_norm": 0.7009910345077515, "learning_rate": 7.4149060549201455e-06, "loss": 0.7241532802581787, "step": 863 }, { "epoch": 1.1206886063629176, "grad_norm": 0.7256485223770142, "learning_rate": 7.408643062348824e-06, "loss": 0.716011106967926, "step": 864 }, { "epoch": 1.1219863333130564, "grad_norm": 0.7414820194244385, "learning_rate": 7.402375144712075e-06, "loss": 0.8162094950675964, "step": 865 }, { "epoch": 1.1232840602631953, "grad_norm": 0.7918742299079895, "learning_rate": 7.396102314826207e-06, "loss": 0.741178035736084, "step": 866 }, { "epoch": 1.1245817872133341, "grad_norm": 0.7321358323097229, "learning_rate": 7.389824585517569e-06, "loss": 0.7776750922203064, "step": 867 }, { "epoch": 1.125879514163473, "grad_norm": 0.7683652639389038, "learning_rate": 7.3835419696225275e-06, "loss": 0.8042243123054504, "step": 868 }, { "epoch": 1.1271772411136118, "grad_norm": 0.7723100185394287, "learning_rate": 7.377254479987445e-06, "loss": 0.779492199420929, "step": 869 }, { "epoch": 1.128474968063751, "grad_norm": 0.7506013512611389, "learning_rate": 7.370962129468642e-06, "loss": 0.7360951900482178, "step": 870 }, { "epoch": 1.1297726950138898, "grad_norm": 0.7726255655288696, "learning_rate": 7.364664930932385e-06, "loss": 0.8193867206573486, "step": 871 }, { "epoch": 1.1310704219640286, "grad_norm": 0.7944323420524597, "learning_rate": 7.35836289725485e-06, "loss": 0.7985553741455078, "step": 872 }, { "epoch": 1.1323681489141675, "grad_norm": 0.7620981931686401, "learning_rate": 7.352056041322103e-06, "loss": 0.7627980709075928, "step": 873 }, { "epoch": 1.1336658758643063, "grad_norm": 0.7579022645950317, "learning_rate": 7.345744376030066e-06, "loss": 0.7844129204750061, "step": 874 }, { "epoch": 1.1349636028144454, "grad_norm": 0.7362563014030457, "learning_rate": 7.339427914284498e-06, "loss": 0.7017800807952881, "step": 875 }, { "epoch": 1.1362613297645843, "grad_norm": 0.7596364617347717, "learning_rate": 7.3331066690009644e-06, "loss": 0.7282767295837402, "step": 876 }, { "epoch": 1.1375590567147231, "grad_norm": 0.7417566776275635, "learning_rate": 7.326780653104813e-06, "loss": 0.7459028363227844, "step": 877 }, { "epoch": 1.138856783664862, "grad_norm": 0.7634292840957642, "learning_rate": 7.320449879531143e-06, "loss": 0.8008609414100647, "step": 878 }, { "epoch": 1.1401545106150008, "grad_norm": 0.7351272106170654, "learning_rate": 7.314114361224785e-06, "loss": 0.7598878145217896, "step": 879 }, { "epoch": 1.1414522375651397, "grad_norm": 0.7726730108261108, "learning_rate": 7.30777411114027e-06, "loss": 0.7667620182037354, "step": 880 }, { "epoch": 1.1427499645152788, "grad_norm": 0.7361600995063782, "learning_rate": 7.301429142241805e-06, "loss": 0.7034899592399597, "step": 881 }, { "epoch": 1.1440476914654176, "grad_norm": 0.7282233834266663, "learning_rate": 7.295079467503247e-06, "loss": 0.6745363473892212, "step": 882 }, { "epoch": 1.1453454184155565, "grad_norm": 0.7537771463394165, "learning_rate": 7.288725099908071e-06, "loss": 0.7910201549530029, "step": 883 }, { "epoch": 1.1466431453656953, "grad_norm": 0.7625670433044434, "learning_rate": 7.282366052449351e-06, "loss": 0.8292834162712097, "step": 884 }, { "epoch": 1.1479408723158344, "grad_norm": 0.7329244613647461, "learning_rate": 7.276002338129731e-06, "loss": 0.7522168755531311, "step": 885 }, { "epoch": 1.1492385992659733, "grad_norm": 0.7891886234283447, "learning_rate": 7.269633969961395e-06, "loss": 0.8136672973632812, "step": 886 }, { "epoch": 1.1505363262161121, "grad_norm": 0.7690410017967224, "learning_rate": 7.2632609609660456e-06, "loss": 0.8092800974845886, "step": 887 }, { "epoch": 1.151834053166251, "grad_norm": 0.7545477151870728, "learning_rate": 7.256883324174871e-06, "loss": 0.7119185924530029, "step": 888 }, { "epoch": 1.1531317801163898, "grad_norm": 0.7471065521240234, "learning_rate": 7.250501072628524e-06, "loss": 0.7687282562255859, "step": 889 }, { "epoch": 1.1544295070665287, "grad_norm": 0.7187462449073792, "learning_rate": 7.2441142193770955e-06, "loss": 0.7572574019432068, "step": 890 }, { "epoch": 1.1557272340166678, "grad_norm": 0.745625376701355, "learning_rate": 7.237722777480083e-06, "loss": 0.8049079775810242, "step": 891 }, { "epoch": 1.1570249609668066, "grad_norm": 0.7539253234863281, "learning_rate": 7.231326760006368e-06, "loss": 0.8245639801025391, "step": 892 }, { "epoch": 1.1583226879169455, "grad_norm": 0.8297720551490784, "learning_rate": 7.224926180034186e-06, "loss": 0.8184964656829834, "step": 893 }, { "epoch": 1.1596204148670843, "grad_norm": 0.757693350315094, "learning_rate": 7.218521050651106e-06, "loss": 0.7818700671195984, "step": 894 }, { "epoch": 1.1609181418172232, "grad_norm": 0.723356306552887, "learning_rate": 7.212111384953993e-06, "loss": 0.7243102192878723, "step": 895 }, { "epoch": 1.1622158687673623, "grad_norm": 0.7468777298927307, "learning_rate": 7.205697196048992e-06, "loss": 0.7427289485931396, "step": 896 }, { "epoch": 1.1635135957175011, "grad_norm": 0.7640864253044128, "learning_rate": 7.199278497051498e-06, "loss": 0.7910446524620056, "step": 897 }, { "epoch": 1.16481132266764, "grad_norm": 0.7713980078697205, "learning_rate": 7.192855301086123e-06, "loss": 0.7965364456176758, "step": 898 }, { "epoch": 1.1661090496177788, "grad_norm": 0.776570200920105, "learning_rate": 7.186427621286678e-06, "loss": 0.753921627998352, "step": 899 }, { "epoch": 1.1674067765679177, "grad_norm": 0.7340149283409119, "learning_rate": 7.179995470796141e-06, "loss": 0.7870222330093384, "step": 900 }, { "epoch": 1.1687045035180565, "grad_norm": 0.7386748194694519, "learning_rate": 7.1735588627666346e-06, "loss": 0.7083443999290466, "step": 901 }, { "epoch": 1.1700022304681956, "grad_norm": 0.7222329378128052, "learning_rate": 7.167117810359387e-06, "loss": 0.6995740532875061, "step": 902 }, { "epoch": 1.1712999574183345, "grad_norm": 0.7578097581863403, "learning_rate": 7.160672326744726e-06, "loss": 0.7429664134979248, "step": 903 }, { "epoch": 1.1725976843684733, "grad_norm": 0.7582994699478149, "learning_rate": 7.154222425102033e-06, "loss": 0.7429041266441345, "step": 904 }, { "epoch": 1.1738954113186122, "grad_norm": 0.7732597589492798, "learning_rate": 7.1477681186197225e-06, "loss": 0.7760801911354065, "step": 905 }, { "epoch": 1.175193138268751, "grad_norm": 0.7655629515647888, "learning_rate": 7.141309420495219e-06, "loss": 0.7732428908348083, "step": 906 }, { "epoch": 1.1764908652188901, "grad_norm": 0.7582475543022156, "learning_rate": 7.134846343934924e-06, "loss": 0.8130234479904175, "step": 907 }, { "epoch": 1.177788592169029, "grad_norm": 0.7538527846336365, "learning_rate": 7.128378902154195e-06, "loss": 0.8220568895339966, "step": 908 }, { "epoch": 1.1790863191191678, "grad_norm": 0.7548943161964417, "learning_rate": 7.121907108377313e-06, "loss": 0.7736442685127258, "step": 909 }, { "epoch": 1.1803840460693067, "grad_norm": 0.7636051177978516, "learning_rate": 7.115430975837457e-06, "loss": 0.8169882297515869, "step": 910 }, { "epoch": 1.1816817730194455, "grad_norm": 0.767085075378418, "learning_rate": 7.10895051777668e-06, "loss": 0.7491641640663147, "step": 911 }, { "epoch": 1.1829794999695844, "grad_norm": 0.7884292602539062, "learning_rate": 7.1024657474458795e-06, "loss": 0.7490637302398682, "step": 912 }, { "epoch": 1.1842772269197235, "grad_norm": 0.7684943675994873, "learning_rate": 7.095976678104768e-06, "loss": 0.7587805390357971, "step": 913 }, { "epoch": 1.1855749538698623, "grad_norm": 0.7661841511726379, "learning_rate": 7.089483323021851e-06, "loss": 0.7323252558708191, "step": 914 }, { "epoch": 1.1868726808200012, "grad_norm": 0.7417365908622742, "learning_rate": 7.082985695474394e-06, "loss": 0.8243879675865173, "step": 915 }, { "epoch": 1.18817040777014, "grad_norm": 0.7636500000953674, "learning_rate": 7.076483808748402e-06, "loss": 0.7433345317840576, "step": 916 }, { "epoch": 1.1894681347202791, "grad_norm": 0.7614058256149292, "learning_rate": 7.069977676138588e-06, "loss": 0.7225663661956787, "step": 917 }, { "epoch": 1.190765861670418, "grad_norm": 0.70651775598526, "learning_rate": 7.063467310948346e-06, "loss": 0.7022136449813843, "step": 918 }, { "epoch": 1.1920635886205568, "grad_norm": 0.7883380055427551, "learning_rate": 7.0569527264897275e-06, "loss": 0.7680527567863464, "step": 919 }, { "epoch": 1.1933613155706957, "grad_norm": 0.7263570427894592, "learning_rate": 7.050433936083405e-06, "loss": 0.7278581857681274, "step": 920 }, { "epoch": 1.1933613155706957, "eval_loss": 0.776515781879425, "eval_runtime": 138.714, "eval_samples_per_second": 37.43, "eval_steps_per_second": 9.357, "step": 920 }, { "epoch": 1.1946590425208345, "grad_norm": 0.7014424800872803, "learning_rate": 7.043910953058657e-06, "loss": 0.7067049145698547, "step": 921 }, { "epoch": 1.1959567694709734, "grad_norm": 0.7746631503105164, "learning_rate": 7.037383790753333e-06, "loss": 0.7801349759101868, "step": 922 }, { "epoch": 1.1972544964211125, "grad_norm": 0.7254592180252075, "learning_rate": 7.030852462513827e-06, "loss": 0.7349493503570557, "step": 923 }, { "epoch": 1.1985522233712513, "grad_norm": 0.7621070742607117, "learning_rate": 7.024316981695053e-06, "loss": 0.719948410987854, "step": 924 }, { "epoch": 1.1998499503213902, "grad_norm": 0.7466797232627869, "learning_rate": 7.017777361660414e-06, "loss": 0.7515002489089966, "step": 925 }, { "epoch": 1.201147677271529, "grad_norm": 0.709510326385498, "learning_rate": 7.011233615781777e-06, "loss": 0.7294214367866516, "step": 926 }, { "epoch": 1.202445404221668, "grad_norm": 0.7353148460388184, "learning_rate": 7.004685757439449e-06, "loss": 0.8314340710639954, "step": 927 }, { "epoch": 1.203743131171807, "grad_norm": 0.7547840476036072, "learning_rate": 6.99813380002214e-06, "loss": 0.772587776184082, "step": 928 }, { "epoch": 1.2050408581219458, "grad_norm": 0.7102051973342896, "learning_rate": 6.991577756926948e-06, "loss": 0.8234133720397949, "step": 929 }, { "epoch": 1.2063385850720847, "grad_norm": 0.7403558492660522, "learning_rate": 6.9850176415593195e-06, "loss": 0.7458879947662354, "step": 930 }, { "epoch": 1.2076363120222235, "grad_norm": 0.7559409141540527, "learning_rate": 6.978453467333028e-06, "loss": 0.7612881660461426, "step": 931 }, { "epoch": 1.2089340389723624, "grad_norm": 0.7423329949378967, "learning_rate": 6.9718852476701535e-06, "loss": 0.8142660856246948, "step": 932 }, { "epoch": 1.2102317659225013, "grad_norm": 0.7359952330589294, "learning_rate": 6.965312996001038e-06, "loss": 0.7446624040603638, "step": 933 }, { "epoch": 1.2115294928726403, "grad_norm": 0.7642225027084351, "learning_rate": 6.958736725764275e-06, "loss": 0.7768579721450806, "step": 934 }, { "epoch": 1.2128272198227792, "grad_norm": 0.7549358606338501, "learning_rate": 6.952156450406673e-06, "loss": 0.6970754265785217, "step": 935 }, { "epoch": 1.214124946772918, "grad_norm": 0.7197482585906982, "learning_rate": 6.945572183383229e-06, "loss": 0.7270360589027405, "step": 936 }, { "epoch": 1.215422673723057, "grad_norm": 0.7698655724525452, "learning_rate": 6.9389839381571025e-06, "loss": 0.7863064408302307, "step": 937 }, { "epoch": 1.216720400673196, "grad_norm": 0.7359870076179504, "learning_rate": 6.932391728199587e-06, "loss": 0.7461609840393066, "step": 938 }, { "epoch": 1.2180181276233348, "grad_norm": 0.7642099857330322, "learning_rate": 6.925795566990083e-06, "loss": 0.7753203511238098, "step": 939 }, { "epoch": 1.2193158545734737, "grad_norm": 0.7594850659370422, "learning_rate": 6.919195468016073e-06, "loss": 0.720144510269165, "step": 940 }, { "epoch": 1.2206135815236125, "grad_norm": 0.7252835631370544, "learning_rate": 6.9125914447730865e-06, "loss": 0.7099956274032593, "step": 941 }, { "epoch": 1.2219113084737514, "grad_norm": 0.7514061331748962, "learning_rate": 6.905983510764681e-06, "loss": 0.7722299695014954, "step": 942 }, { "epoch": 1.2232090354238903, "grad_norm": 0.7274535894393921, "learning_rate": 6.899371679502408e-06, "loss": 0.7559299468994141, "step": 943 }, { "epoch": 1.2245067623740293, "grad_norm": 0.7720993757247925, "learning_rate": 6.89275596450579e-06, "loss": 0.6993024945259094, "step": 944 }, { "epoch": 1.2258044893241682, "grad_norm": 0.7331786155700684, "learning_rate": 6.886136379302288e-06, "loss": 0.7450038194656372, "step": 945 }, { "epoch": 1.227102216274307, "grad_norm": 0.750044047832489, "learning_rate": 6.87951293742728e-06, "loss": 0.7362930178642273, "step": 946 }, { "epoch": 1.228399943224446, "grad_norm": 0.7513757944107056, "learning_rate": 6.872885652424028e-06, "loss": 0.7076199054718018, "step": 947 }, { "epoch": 1.2296976701745848, "grad_norm": 0.7729170322418213, "learning_rate": 6.866254537843651e-06, "loss": 0.7380319237709045, "step": 948 }, { "epoch": 1.2309953971247238, "grad_norm": 0.8171294331550598, "learning_rate": 6.859619607245102e-06, "loss": 0.772607684135437, "step": 949 }, { "epoch": 1.2322931240748627, "grad_norm": 0.8113188147544861, "learning_rate": 6.852980874195132e-06, "loss": 0.7299817204475403, "step": 950 }, { "epoch": 1.2335908510250015, "grad_norm": 0.8075060844421387, "learning_rate": 6.846338352268273e-06, "loss": 0.8221969604492188, "step": 951 }, { "epoch": 1.2348885779751404, "grad_norm": 0.7619355916976929, "learning_rate": 6.839692055046801e-06, "loss": 0.7481426000595093, "step": 952 }, { "epoch": 1.2361863049252793, "grad_norm": 0.7421068549156189, "learning_rate": 6.833041996120707e-06, "loss": 0.7882356643676758, "step": 953 }, { "epoch": 1.2374840318754181, "grad_norm": 0.7260448932647705, "learning_rate": 6.826388189087683e-06, "loss": 0.8039664030075073, "step": 954 }, { "epoch": 1.2387817588255572, "grad_norm": 0.7982683777809143, "learning_rate": 6.819730647553079e-06, "loss": 0.7353008389472961, "step": 955 }, { "epoch": 1.240079485775696, "grad_norm": 0.7499701976776123, "learning_rate": 6.813069385129883e-06, "loss": 0.6967143416404724, "step": 956 }, { "epoch": 1.241377212725835, "grad_norm": 0.7564923167228699, "learning_rate": 6.806404415438689e-06, "loss": 0.730963945388794, "step": 957 }, { "epoch": 1.2426749396759738, "grad_norm": 0.7305687665939331, "learning_rate": 6.7997357521076735e-06, "loss": 0.7766451239585876, "step": 958 }, { "epoch": 1.2439726666261126, "grad_norm": 0.7542693018913269, "learning_rate": 6.793063408772565e-06, "loss": 0.7498062252998352, "step": 959 }, { "epoch": 1.2452703935762517, "grad_norm": 0.8000041842460632, "learning_rate": 6.78638739907662e-06, "loss": 0.7798739075660706, "step": 960 }, { "epoch": 1.2465681205263905, "grad_norm": 0.7799873948097229, "learning_rate": 6.779707736670585e-06, "loss": 0.723656415939331, "step": 961 }, { "epoch": 1.2478658474765294, "grad_norm": 0.7346177697181702, "learning_rate": 6.773024435212678e-06, "loss": 0.7384864687919617, "step": 962 }, { "epoch": 1.2491635744266683, "grad_norm": 0.7333899736404419, "learning_rate": 6.7663375083685635e-06, "loss": 0.7858769297599792, "step": 963 }, { "epoch": 1.250461301376807, "grad_norm": 0.7262735366821289, "learning_rate": 6.759646969811311e-06, "loss": 0.8483904004096985, "step": 964 }, { "epoch": 1.251759028326946, "grad_norm": 0.7812559008598328, "learning_rate": 6.752952833221379e-06, "loss": 0.8175226449966431, "step": 965 }, { "epoch": 1.253056755277085, "grad_norm": 0.795184314250946, "learning_rate": 6.7462551122865825e-06, "loss": 0.7282869815826416, "step": 966 }, { "epoch": 1.254354482227224, "grad_norm": 0.7277102470397949, "learning_rate": 6.739553820702067e-06, "loss": 0.7237769365310669, "step": 967 }, { "epoch": 1.2556522091773628, "grad_norm": 0.7736328840255737, "learning_rate": 6.732848972170276e-06, "loss": 0.6851608753204346, "step": 968 }, { "epoch": 1.2569499361275016, "grad_norm": 0.7597711682319641, "learning_rate": 6.726140580400928e-06, "loss": 0.7550280094146729, "step": 969 }, { "epoch": 1.2582476630776407, "grad_norm": 0.7755154371261597, "learning_rate": 6.719428659110987e-06, "loss": 0.7753418684005737, "step": 970 }, { "epoch": 1.2595453900277795, "grad_norm": 0.7772382497787476, "learning_rate": 6.712713222024633e-06, "loss": 0.7511424422264099, "step": 971 }, { "epoch": 1.2608431169779184, "grad_norm": 0.8128913044929504, "learning_rate": 6.705994282873233e-06, "loss": 0.8187769651412964, "step": 972 }, { "epoch": 1.2621408439280573, "grad_norm": 0.737055778503418, "learning_rate": 6.699271855395321e-06, "loss": 0.757542610168457, "step": 973 }, { "epoch": 1.263438570878196, "grad_norm": 0.785873293876648, "learning_rate": 6.6925459533365576e-06, "loss": 0.7776392102241516, "step": 974 }, { "epoch": 1.264736297828335, "grad_norm": 0.7526916265487671, "learning_rate": 6.685816590449708e-06, "loss": 0.8357568979263306, "step": 975 }, { "epoch": 1.2660340247784738, "grad_norm": 0.7440223693847656, "learning_rate": 6.67908378049462e-06, "loss": 0.7778916954994202, "step": 976 }, { "epoch": 1.267331751728613, "grad_norm": 0.8247221112251282, "learning_rate": 6.672347537238183e-06, "loss": 0.8135752081871033, "step": 977 }, { "epoch": 1.2686294786787518, "grad_norm": 0.8035563826560974, "learning_rate": 6.665607874454311e-06, "loss": 0.7972402572631836, "step": 978 }, { "epoch": 1.2699272056288906, "grad_norm": 0.7410714626312256, "learning_rate": 6.658864805923909e-06, "loss": 0.7736347913742065, "step": 979 }, { "epoch": 1.2712249325790295, "grad_norm": 0.8330723643302917, "learning_rate": 6.652118345434844e-06, "loss": 0.806992769241333, "step": 980 }, { "epoch": 1.2725226595291685, "grad_norm": 0.77059406042099, "learning_rate": 6.64536850678192e-06, "loss": 0.7687591314315796, "step": 981 }, { "epoch": 1.2738203864793074, "grad_norm": 0.7119758725166321, "learning_rate": 6.638615303766849e-06, "loss": 0.7087782621383667, "step": 982 }, { "epoch": 1.2751181134294463, "grad_norm": 0.7512334585189819, "learning_rate": 6.631858750198223e-06, "loss": 0.7549843192100525, "step": 983 }, { "epoch": 1.276415840379585, "grad_norm": 0.7844512462615967, "learning_rate": 6.625098859891483e-06, "loss": 0.7538207769393921, "step": 984 }, { "epoch": 1.277713567329724, "grad_norm": 0.8203350901603699, "learning_rate": 6.618335646668894e-06, "loss": 0.7637907862663269, "step": 985 }, { "epoch": 1.2790112942798628, "grad_norm": 0.7754982113838196, "learning_rate": 6.611569124359516e-06, "loss": 0.7811301350593567, "step": 986 }, { "epoch": 1.280309021230002, "grad_norm": 0.7798315286636353, "learning_rate": 6.604799306799172e-06, "loss": 0.7387815713882446, "step": 987 }, { "epoch": 1.2816067481801408, "grad_norm": 0.7446553111076355, "learning_rate": 6.598026207830428e-06, "loss": 0.7689957022666931, "step": 988 }, { "epoch": 1.2829044751302796, "grad_norm": 0.74884033203125, "learning_rate": 6.591249841302555e-06, "loss": 0.8296911120414734, "step": 989 }, { "epoch": 1.2842022020804185, "grad_norm": 0.7766373753547668, "learning_rate": 6.58447022107151e-06, "loss": 0.7640050053596497, "step": 990 }, { "epoch": 1.2854999290305575, "grad_norm": 0.7423699498176575, "learning_rate": 6.577687360999898e-06, "loss": 0.7463095188140869, "step": 991 }, { "epoch": 1.2867976559806964, "grad_norm": 0.7285832166671753, "learning_rate": 6.5709012749569535e-06, "loss": 0.7764305472373962, "step": 992 }, { "epoch": 1.2880953829308353, "grad_norm": 0.7299691438674927, "learning_rate": 6.564111976818501e-06, "loss": 0.7112918496131897, "step": 993 }, { "epoch": 1.289393109880974, "grad_norm": 0.7747628688812256, "learning_rate": 6.5573194804669416e-06, "loss": 0.8535696864128113, "step": 994 }, { "epoch": 1.290690836831113, "grad_norm": 0.7332835793495178, "learning_rate": 6.550523799791207e-06, "loss": 0.7880507707595825, "step": 995 }, { "epoch": 1.2919885637812518, "grad_norm": 0.7431394457817078, "learning_rate": 6.543724948686747e-06, "loss": 0.7584883570671082, "step": 996 }, { "epoch": 1.2932862907313907, "grad_norm": 0.7418090105056763, "learning_rate": 6.53692294105549e-06, "loss": 0.7503098249435425, "step": 997 }, { "epoch": 1.2945840176815298, "grad_norm": 0.7521533370018005, "learning_rate": 6.53011779080582e-06, "loss": 0.7553617358207703, "step": 998 }, { "epoch": 1.2958817446316686, "grad_norm": 0.7553898692131042, "learning_rate": 6.523309511852547e-06, "loss": 0.8658368587493896, "step": 999 }, { "epoch": 1.2971794715818075, "grad_norm": 0.7538703083992004, "learning_rate": 6.516498118116878e-06, "loss": 0.8231520056724548, "step": 1000 }, { "epoch": 1.2984771985319463, "grad_norm": 0.7610046863555908, "learning_rate": 6.5096836235263904e-06, "loss": 0.7878521084785461, "step": 1001 }, { "epoch": 1.2997749254820854, "grad_norm": 0.7674424648284912, "learning_rate": 6.502866042015e-06, "loss": 0.736729085445404, "step": 1002 }, { "epoch": 1.3010726524322243, "grad_norm": 0.7649716138839722, "learning_rate": 6.496045387522934e-06, "loss": 0.7443773746490479, "step": 1003 }, { "epoch": 1.302370379382363, "grad_norm": 0.7352249026298523, "learning_rate": 6.489221673996708e-06, "loss": 0.6777861714363098, "step": 1004 }, { "epoch": 1.303668106332502, "grad_norm": 0.7463338971138, "learning_rate": 6.482394915389085e-06, "loss": 0.7501753568649292, "step": 1005 }, { "epoch": 1.3049658332826408, "grad_norm": 0.7425395846366882, "learning_rate": 6.475565125659063e-06, "loss": 0.7762592434883118, "step": 1006 }, { "epoch": 1.3062635602327797, "grad_norm": 0.7330830097198486, "learning_rate": 6.4687323187718276e-06, "loss": 0.7636951208114624, "step": 1007 }, { "epoch": 1.3075612871829188, "grad_norm": 0.7125248908996582, "learning_rate": 6.461896508698744e-06, "loss": 0.6942975521087646, "step": 1008 }, { "epoch": 1.3088590141330576, "grad_norm": 0.7361205220222473, "learning_rate": 6.455057709417312e-06, "loss": 0.6826867461204529, "step": 1009 }, { "epoch": 1.3101567410831965, "grad_norm": 0.7715798020362854, "learning_rate": 6.448215934911145e-06, "loss": 0.8465424180030823, "step": 1010 }, { "epoch": 1.3114544680333353, "grad_norm": 0.754898190498352, "learning_rate": 6.441371199169942e-06, "loss": 0.7275772094726562, "step": 1011 }, { "epoch": 1.3127521949834744, "grad_norm": 0.7745576500892639, "learning_rate": 6.434523516189453e-06, "loss": 0.7738680243492126, "step": 1012 }, { "epoch": 1.3140499219336133, "grad_norm": 0.7302242517471313, "learning_rate": 6.427672899971457e-06, "loss": 0.7262543439865112, "step": 1013 }, { "epoch": 1.315347648883752, "grad_norm": 0.7484472990036011, "learning_rate": 6.4208193645237314e-06, "loss": 0.7359537482261658, "step": 1014 }, { "epoch": 1.316645375833891, "grad_norm": 0.7181548476219177, "learning_rate": 6.413962923860021e-06, "loss": 0.7458126544952393, "step": 1015 }, { "epoch": 1.3179431027840298, "grad_norm": 0.7399983406066895, "learning_rate": 6.407103592000009e-06, "loss": 0.713172197341919, "step": 1016 }, { "epoch": 1.3192408297341687, "grad_norm": 0.7715758681297302, "learning_rate": 6.400241382969297e-06, "loss": 0.8125825524330139, "step": 1017 }, { "epoch": 1.3205385566843075, "grad_norm": 0.7169333100318909, "learning_rate": 6.393376310799363e-06, "loss": 0.7719976305961609, "step": 1018 }, { "epoch": 1.3218362836344466, "grad_norm": 0.7504178285598755, "learning_rate": 6.386508389527544e-06, "loss": 0.7362254858016968, "step": 1019 }, { "epoch": 1.3231340105845855, "grad_norm": 0.7360901832580566, "learning_rate": 6.379637633196999e-06, "loss": 0.7390870451927185, "step": 1020 }, { "epoch": 1.3244317375347243, "grad_norm": 0.7370476722717285, "learning_rate": 6.3727640558566865e-06, "loss": 0.7379764318466187, "step": 1021 }, { "epoch": 1.3257294644848632, "grad_norm": 0.7515619397163391, "learning_rate": 6.3658876715613315e-06, "loss": 0.7954624891281128, "step": 1022 }, { "epoch": 1.3270271914350023, "grad_norm": 0.7417750954627991, "learning_rate": 6.3590084943713995e-06, "loss": 0.7774450182914734, "step": 1023 }, { "epoch": 1.328324918385141, "grad_norm": 0.7214071750640869, "learning_rate": 6.35212653835307e-06, "loss": 0.7376827001571655, "step": 1024 }, { "epoch": 1.32962264533528, "grad_norm": 0.7392934560775757, "learning_rate": 6.345241817578196e-06, "loss": 0.7806702852249146, "step": 1025 }, { "epoch": 1.3309203722854188, "grad_norm": 0.7314793467521667, "learning_rate": 6.3383543461242914e-06, "loss": 0.7208546996116638, "step": 1026 }, { "epoch": 1.3322180992355577, "grad_norm": 0.7446559071540833, "learning_rate": 6.331464138074493e-06, "loss": 0.7563031315803528, "step": 1027 }, { "epoch": 1.3335158261856965, "grad_norm": 0.7644572854042053, "learning_rate": 6.32457120751753e-06, "loss": 0.7995797991752625, "step": 1028 }, { "epoch": 1.3348135531358354, "grad_norm": 0.7222753167152405, "learning_rate": 6.317675568547704e-06, "loss": 0.7212015986442566, "step": 1029 }, { "epoch": 1.3361112800859745, "grad_norm": 0.7560867071151733, "learning_rate": 6.310777235264849e-06, "loss": 0.7757279872894287, "step": 1030 }, { "epoch": 1.3374090070361133, "grad_norm": 0.7711772322654724, "learning_rate": 6.303876221774311e-06, "loss": 0.7688950896263123, "step": 1031 }, { "epoch": 1.3387067339862522, "grad_norm": 0.7785727381706238, "learning_rate": 6.296972542186915e-06, "loss": 0.780545711517334, "step": 1032 }, { "epoch": 1.340004460936391, "grad_norm": 0.7750667929649353, "learning_rate": 6.2900662106189415e-06, "loss": 0.7841629385948181, "step": 1033 }, { "epoch": 1.34130218788653, "grad_norm": 0.7886035442352295, "learning_rate": 6.283157241192087e-06, "loss": 0.7848193645477295, "step": 1034 }, { "epoch": 1.342599914836669, "grad_norm": 0.7943287491798401, "learning_rate": 6.276245648033447e-06, "loss": 0.7743536233901978, "step": 1035 }, { "epoch": 1.3438976417868078, "grad_norm": 0.7815331220626831, "learning_rate": 6.2693314452754796e-06, "loss": 0.8415849208831787, "step": 1036 }, { "epoch": 1.3451953687369467, "grad_norm": 0.757429838180542, "learning_rate": 6.26241464705598e-06, "loss": 0.8025727272033691, "step": 1037 }, { "epoch": 1.3464930956870855, "grad_norm": 0.7403960227966309, "learning_rate": 6.25549526751805e-06, "loss": 0.7311757802963257, "step": 1038 }, { "epoch": 1.3477908226372244, "grad_norm": 0.7482463717460632, "learning_rate": 6.24857332081007e-06, "loss": 0.764176607131958, "step": 1039 }, { "epoch": 1.3490885495873635, "grad_norm": 0.749909520149231, "learning_rate": 6.241648821085666e-06, "loss": 0.7602924108505249, "step": 1040 }, { "epoch": 1.3503862765375023, "grad_norm": 0.7637724876403809, "learning_rate": 6.23472178250369e-06, "loss": 0.8251943588256836, "step": 1041 }, { "epoch": 1.3516840034876412, "grad_norm": 0.7882103323936462, "learning_rate": 6.227792219228183e-06, "loss": 0.8028998970985413, "step": 1042 }, { "epoch": 1.35298173043778, "grad_norm": 0.7483314275741577, "learning_rate": 6.220860145428347e-06, "loss": 0.7678120732307434, "step": 1043 }, { "epoch": 1.354279457387919, "grad_norm": 0.7539233565330505, "learning_rate": 6.213925575278518e-06, "loss": 0.7885809540748596, "step": 1044 }, { "epoch": 1.355577184338058, "grad_norm": 0.7156683206558228, "learning_rate": 6.206988522958135e-06, "loss": 0.707705557346344, "step": 1045 }, { "epoch": 1.3568749112881968, "grad_norm": 0.7801918387413025, "learning_rate": 6.200049002651718e-06, "loss": 0.811095654964447, "step": 1046 }, { "epoch": 1.3581726382383357, "grad_norm": 0.7180908918380737, "learning_rate": 6.19310702854883e-06, "loss": 0.671703577041626, "step": 1047 }, { "epoch": 1.3594703651884745, "grad_norm": 0.7936683893203735, "learning_rate": 6.186162614844047e-06, "loss": 0.7717031836509705, "step": 1048 }, { "epoch": 1.3607680921386134, "grad_norm": 0.759444534778595, "learning_rate": 6.17921577573694e-06, "loss": 0.7092019319534302, "step": 1049 }, { "epoch": 1.3620658190887522, "grad_norm": 0.7096357345581055, "learning_rate": 6.172266525432036e-06, "loss": 0.7081625461578369, "step": 1050 }, { "epoch": 1.3633635460388913, "grad_norm": 0.6968888640403748, "learning_rate": 6.165314878138794e-06, "loss": 0.6797256469726562, "step": 1051 }, { "epoch": 1.3646612729890302, "grad_norm": 0.7835313081741333, "learning_rate": 6.1583608480715705e-06, "loss": 0.8080659508705139, "step": 1052 }, { "epoch": 1.365958999939169, "grad_norm": 0.7313963174819946, "learning_rate": 6.1514044494496e-06, "loss": 0.666755199432373, "step": 1053 }, { "epoch": 1.3672567268893079, "grad_norm": 0.7819347977638245, "learning_rate": 6.144445696496955e-06, "loss": 0.8489786982536316, "step": 1054 }, { "epoch": 1.368554453839447, "grad_norm": 0.7251116037368774, "learning_rate": 6.137484603442524e-06, "loss": 0.7178645730018616, "step": 1055 }, { "epoch": 1.3698521807895858, "grad_norm": 0.8075553178787231, "learning_rate": 6.130521184519983e-06, "loss": 0.7850656509399414, "step": 1056 }, { "epoch": 1.3711499077397247, "grad_norm": 0.7090011239051819, "learning_rate": 6.123555453967759e-06, "loss": 0.7282134294509888, "step": 1057 }, { "epoch": 1.3724476346898635, "grad_norm": 0.7184298634529114, "learning_rate": 6.1165874260290074e-06, "loss": 0.7176535129547119, "step": 1058 }, { "epoch": 1.3737453616400024, "grad_norm": 0.7551043629646301, "learning_rate": 6.109617114951581e-06, "loss": 0.7516987323760986, "step": 1059 }, { "epoch": 1.3750430885901412, "grad_norm": 0.7810531258583069, "learning_rate": 6.102644534988006e-06, "loss": 0.7351160049438477, "step": 1060 }, { "epoch": 1.37634081554028, "grad_norm": 0.741783618927002, "learning_rate": 6.0956697003954404e-06, "loss": 0.7304431200027466, "step": 1061 }, { "epoch": 1.3776385424904192, "grad_norm": 0.7723316550254822, "learning_rate": 6.088692625435656e-06, "loss": 0.7627692818641663, "step": 1062 }, { "epoch": 1.378936269440558, "grad_norm": 0.7466050386428833, "learning_rate": 6.0817133243750046e-06, "loss": 0.7436075806617737, "step": 1063 }, { "epoch": 1.3802339963906969, "grad_norm": 0.784697413444519, "learning_rate": 6.074731811484391e-06, "loss": 0.7517378926277161, "step": 1064 }, { "epoch": 1.3815317233408357, "grad_norm": 0.7546585202217102, "learning_rate": 6.067748101039243e-06, "loss": 0.6963843703269958, "step": 1065 }, { "epoch": 1.3828294502909748, "grad_norm": 0.7581716179847717, "learning_rate": 6.060762207319479e-06, "loss": 0.7489246129989624, "step": 1066 }, { "epoch": 1.3841271772411137, "grad_norm": 0.7676309943199158, "learning_rate": 6.053774144609484e-06, "loss": 0.7299286127090454, "step": 1067 }, { "epoch": 1.3854249041912525, "grad_norm": 0.7663894891738892, "learning_rate": 6.046783927198079e-06, "loss": 0.7648952007293701, "step": 1068 }, { "epoch": 1.3867226311413914, "grad_norm": 0.7521557211875916, "learning_rate": 6.039791569378488e-06, "loss": 0.7617720365524292, "step": 1069 }, { "epoch": 1.3880203580915302, "grad_norm": 0.7601999044418335, "learning_rate": 6.032797085448315e-06, "loss": 0.7543430924415588, "step": 1070 }, { "epoch": 1.389318085041669, "grad_norm": 0.7353654503822327, "learning_rate": 6.025800489709505e-06, "loss": 0.799416720867157, "step": 1071 }, { "epoch": 1.3906158119918082, "grad_norm": 0.7517397999763489, "learning_rate": 6.018801796468328e-06, "loss": 0.82966148853302, "step": 1072 }, { "epoch": 1.391913538941947, "grad_norm": 0.8136231899261475, "learning_rate": 6.0118010200353396e-06, "loss": 0.8899416923522949, "step": 1073 }, { "epoch": 1.3932112658920859, "grad_norm": 0.7770370841026306, "learning_rate": 6.004798174725358e-06, "loss": 0.8107327818870544, "step": 1074 }, { "epoch": 1.3945089928422247, "grad_norm": 0.7465303540229797, "learning_rate": 5.997793274857427e-06, "loss": 0.769565999507904, "step": 1075 }, { "epoch": 1.3958067197923638, "grad_norm": 0.7476341128349304, "learning_rate": 5.990786334754795e-06, "loss": 0.8333680629730225, "step": 1076 }, { "epoch": 1.3971044467425027, "grad_norm": 0.7511208653450012, "learning_rate": 5.983777368744881e-06, "loss": 0.7648091316223145, "step": 1077 }, { "epoch": 1.3984021736926415, "grad_norm": 0.7745276093482971, "learning_rate": 5.9767663911592454e-06, "loss": 0.7386494278907776, "step": 1078 }, { "epoch": 1.3996999006427804, "grad_norm": 0.7422261238098145, "learning_rate": 5.9697534163335645e-06, "loss": 0.7428614497184753, "step": 1079 }, { "epoch": 1.4009976275929192, "grad_norm": 0.7582484483718872, "learning_rate": 5.9627384586075954e-06, "loss": 0.771366536617279, "step": 1080 }, { "epoch": 1.402295354543058, "grad_norm": 0.7568719387054443, "learning_rate": 5.955721532325151e-06, "loss": 0.7929791212081909, "step": 1081 }, { "epoch": 1.403593081493197, "grad_norm": 0.7572823762893677, "learning_rate": 5.94870265183407e-06, "loss": 0.7775267362594604, "step": 1082 }, { "epoch": 1.404890808443336, "grad_norm": 0.7899602055549622, "learning_rate": 5.941681831486188e-06, "loss": 0.7695810198783875, "step": 1083 }, { "epoch": 1.4061885353934749, "grad_norm": 0.7336994409561157, "learning_rate": 5.934659085637303e-06, "loss": 0.7132036685943604, "step": 1084 }, { "epoch": 1.4074862623436137, "grad_norm": 0.7522117495536804, "learning_rate": 5.927634428647154e-06, "loss": 0.7515596151351929, "step": 1085 }, { "epoch": 1.4087839892937526, "grad_norm": 0.7884212136268616, "learning_rate": 5.920607874879387e-06, "loss": 0.7542105317115784, "step": 1086 }, { "epoch": 1.4100817162438917, "grad_norm": 0.7597297430038452, "learning_rate": 5.913579438701525e-06, "loss": 0.8347772359848022, "step": 1087 }, { "epoch": 1.4113794431940305, "grad_norm": 0.7561913132667542, "learning_rate": 5.906549134484943e-06, "loss": 0.7754871249198914, "step": 1088 }, { "epoch": 1.4126771701441694, "grad_norm": 0.7146552801132202, "learning_rate": 5.899516976604832e-06, "loss": 0.7631995677947998, "step": 1089 }, { "epoch": 1.4139748970943082, "grad_norm": 0.7291439771652222, "learning_rate": 5.892482979440175e-06, "loss": 0.756955087184906, "step": 1090 }, { "epoch": 1.415272624044447, "grad_norm": 0.7642284631729126, "learning_rate": 5.885447157373716e-06, "loss": 0.7994847893714905, "step": 1091 }, { "epoch": 1.416570350994586, "grad_norm": 0.8073525428771973, "learning_rate": 5.878409524791931e-06, "loss": 0.7306899428367615, "step": 1092 }, { "epoch": 1.417868077944725, "grad_norm": 0.721283495426178, "learning_rate": 5.871370096084997e-06, "loss": 0.7839140892028809, "step": 1093 }, { "epoch": 1.4191658048948639, "grad_norm": 0.754751980304718, "learning_rate": 5.864328885646764e-06, "loss": 0.7405674457550049, "step": 1094 }, { "epoch": 1.4204635318450027, "grad_norm": 0.7623893618583679, "learning_rate": 5.857285907874725e-06, "loss": 0.7703812122344971, "step": 1095 }, { "epoch": 1.4217612587951416, "grad_norm": 0.7525649070739746, "learning_rate": 5.850241177169986e-06, "loss": 0.8114089965820312, "step": 1096 }, { "epoch": 1.4230589857452807, "grad_norm": 0.7692160606384277, "learning_rate": 5.84319470793724e-06, "loss": 0.7578598856925964, "step": 1097 }, { "epoch": 1.4243567126954195, "grad_norm": 0.7233715653419495, "learning_rate": 5.836146514584733e-06, "loss": 0.7252383828163147, "step": 1098 }, { "epoch": 1.4256544396455584, "grad_norm": 0.7710081338882446, "learning_rate": 5.829096611524235e-06, "loss": 0.7712035179138184, "step": 1099 }, { "epoch": 1.4269521665956972, "grad_norm": 0.7670133709907532, "learning_rate": 5.822045013171015e-06, "loss": 0.7823899388313293, "step": 1100 }, { "epoch": 1.428249893545836, "grad_norm": 0.7245627045631409, "learning_rate": 5.814991733943805e-06, "loss": 0.7533504962921143, "step": 1101 }, { "epoch": 1.429547620495975, "grad_norm": 0.7534570693969727, "learning_rate": 5.807936788264778e-06, "loss": 0.7617537379264832, "step": 1102 }, { "epoch": 1.4308453474461138, "grad_norm": 0.7101514935493469, "learning_rate": 5.800880190559511e-06, "loss": 0.7216631770133972, "step": 1103 }, { "epoch": 1.4321430743962529, "grad_norm": 0.7756243348121643, "learning_rate": 5.79382195525696e-06, "loss": 0.8646502494812012, "step": 1104 }, { "epoch": 1.4334408013463917, "grad_norm": 0.7338310480117798, "learning_rate": 5.786762096789431e-06, "loss": 0.7363216876983643, "step": 1105 }, { "epoch": 1.4347385282965306, "grad_norm": 0.7695474028587341, "learning_rate": 5.779700629592547e-06, "loss": 0.8422562479972839, "step": 1106 }, { "epoch": 1.4360362552466694, "grad_norm": 0.7274161577224731, "learning_rate": 5.7726375681052205e-06, "loss": 0.7430284023284912, "step": 1107 }, { "epoch": 1.4373339821968085, "grad_norm": 0.8229888677597046, "learning_rate": 5.765572926769625e-06, "loss": 0.8320131897926331, "step": 1108 }, { "epoch": 1.4386317091469474, "grad_norm": 0.7726709246635437, "learning_rate": 5.758506720031163e-06, "loss": 0.7141331434249878, "step": 1109 }, { "epoch": 1.4399294360970862, "grad_norm": 0.6873381733894348, "learning_rate": 5.751438962338441e-06, "loss": 0.6841472387313843, "step": 1110 }, { "epoch": 1.441227163047225, "grad_norm": 0.7722907066345215, "learning_rate": 5.744369668143233e-06, "loss": 0.7927265763282776, "step": 1111 }, { "epoch": 1.442524889997364, "grad_norm": 0.7216099500656128, "learning_rate": 5.737298851900457e-06, "loss": 0.7576711177825928, "step": 1112 }, { "epoch": 1.4438226169475028, "grad_norm": 0.7648237347602844, "learning_rate": 5.730226528068142e-06, "loss": 0.7920837998390198, "step": 1113 }, { "epoch": 1.4451203438976417, "grad_norm": 0.7815790772438049, "learning_rate": 5.7231527111074e-06, "loss": 0.8274810910224915, "step": 1114 }, { "epoch": 1.4464180708477807, "grad_norm": 0.7311232089996338, "learning_rate": 5.716077415482398e-06, "loss": 0.7946675419807434, "step": 1115 }, { "epoch": 1.4477157977979196, "grad_norm": 0.7482665181159973, "learning_rate": 5.709000655660324e-06, "loss": 0.758356511592865, "step": 1116 }, { "epoch": 1.4490135247480584, "grad_norm": 0.6977280378341675, "learning_rate": 5.7019224461113585e-06, "loss": 0.681710958480835, "step": 1117 }, { "epoch": 1.4503112516981973, "grad_norm": 0.8195312023162842, "learning_rate": 5.694842801308651e-06, "loss": 0.752457857131958, "step": 1118 }, { "epoch": 1.4516089786483364, "grad_norm": 0.7203164100646973, "learning_rate": 5.687761735728282e-06, "loss": 0.7394000291824341, "step": 1119 }, { "epoch": 1.4529067055984752, "grad_norm": 0.7854275107383728, "learning_rate": 5.680679263849241e-06, "loss": 0.7581281661987305, "step": 1120 }, { "epoch": 1.454204432548614, "grad_norm": 0.723127543926239, "learning_rate": 5.673595400153385e-06, "loss": 0.7252097725868225, "step": 1121 }, { "epoch": 1.455502159498753, "grad_norm": 0.7311640381813049, "learning_rate": 5.666510159125427e-06, "loss": 0.77386075258255, "step": 1122 }, { "epoch": 1.4567998864488918, "grad_norm": 0.735611617565155, "learning_rate": 5.65942355525289e-06, "loss": 0.730672299861908, "step": 1123 }, { "epoch": 1.4580976133990307, "grad_norm": 0.7109529972076416, "learning_rate": 5.652335603026084e-06, "loss": 0.6859084963798523, "step": 1124 }, { "epoch": 1.4593953403491697, "grad_norm": 0.7404365539550781, "learning_rate": 5.645246316938082e-06, "loss": 0.7842783331871033, "step": 1125 }, { "epoch": 1.4606930672993086, "grad_norm": 0.7049553394317627, "learning_rate": 5.638155711484674e-06, "loss": 0.7448842525482178, "step": 1126 }, { "epoch": 1.4619907942494474, "grad_norm": 0.7657853364944458, "learning_rate": 5.631063801164356e-06, "loss": 0.7713245749473572, "step": 1127 }, { "epoch": 1.4632885211995863, "grad_norm": 0.7475281953811646, "learning_rate": 5.62397060047829e-06, "loss": 0.7330006957054138, "step": 1128 }, { "epoch": 1.4645862481497254, "grad_norm": 0.7779927849769592, "learning_rate": 5.6168761239302745e-06, "loss": 0.7859174609184265, "step": 1129 }, { "epoch": 1.4658839750998642, "grad_norm": 0.7732834219932556, "learning_rate": 5.609780386026721e-06, "loss": 0.7622615694999695, "step": 1130 }, { "epoch": 1.467181702050003, "grad_norm": 0.7757659554481506, "learning_rate": 5.6026834012766155e-06, "loss": 0.7179020047187805, "step": 1131 }, { "epoch": 1.468479429000142, "grad_norm": 0.8133222460746765, "learning_rate": 5.595585184191496e-06, "loss": 0.8392655849456787, "step": 1132 }, { "epoch": 1.4697771559502808, "grad_norm": 0.741298496723175, "learning_rate": 5.58848574928542e-06, "loss": 0.770621657371521, "step": 1133 }, { "epoch": 1.4710748829004197, "grad_norm": 0.7434273362159729, "learning_rate": 5.5813851110749365e-06, "loss": 0.7763687372207642, "step": 1134 }, { "epoch": 1.4723726098505585, "grad_norm": 0.7332520484924316, "learning_rate": 5.574283284079049e-06, "loss": 0.7230371236801147, "step": 1135 }, { "epoch": 1.4736703368006976, "grad_norm": 0.7544539570808411, "learning_rate": 5.567180282819201e-06, "loss": 0.7774012684822083, "step": 1136 }, { "epoch": 1.4749680637508364, "grad_norm": 0.7133947610855103, "learning_rate": 5.560076121819229e-06, "loss": 0.7977353930473328, "step": 1137 }, { "epoch": 1.4762657907009753, "grad_norm": 0.7752287983894348, "learning_rate": 5.552970815605347e-06, "loss": 0.7770974040031433, "step": 1138 }, { "epoch": 1.4775635176511142, "grad_norm": 0.7379096150398254, "learning_rate": 5.545864378706106e-06, "loss": 0.7216660380363464, "step": 1139 }, { "epoch": 1.4788612446012532, "grad_norm": 0.7484530806541443, "learning_rate": 5.53875682565237e-06, "loss": 0.7281945943832397, "step": 1140 }, { "epoch": 1.480158971551392, "grad_norm": 0.7698119878768921, "learning_rate": 5.5316481709772886e-06, "loss": 0.7480529546737671, "step": 1141 }, { "epoch": 1.481456698501531, "grad_norm": 0.7418819665908813, "learning_rate": 5.524538429216258e-06, "loss": 0.8144744634628296, "step": 1142 }, { "epoch": 1.4827544254516698, "grad_norm": 0.7699658274650574, "learning_rate": 5.517427614906906e-06, "loss": 0.7931643128395081, "step": 1143 }, { "epoch": 1.4840521524018087, "grad_norm": 0.7453754544258118, "learning_rate": 5.510315742589042e-06, "loss": 0.8053094148635864, "step": 1144 }, { "epoch": 1.4853498793519475, "grad_norm": 0.7414461970329285, "learning_rate": 5.503202826804647e-06, "loss": 0.8256296515464783, "step": 1145 }, { "epoch": 1.4866476063020864, "grad_norm": 0.7538757920265198, "learning_rate": 5.496088882097836e-06, "loss": 0.776119589805603, "step": 1146 }, { "epoch": 1.4879453332522254, "grad_norm": 0.7799422740936279, "learning_rate": 5.488973923014821e-06, "loss": 0.7753342986106873, "step": 1147 }, { "epoch": 1.4892430602023643, "grad_norm": 0.7214382290840149, "learning_rate": 5.4818579641038974e-06, "loss": 0.7284688949584961, "step": 1148 }, { "epoch": 1.4905407871525032, "grad_norm": 0.7477608919143677, "learning_rate": 5.474741019915395e-06, "loss": 0.8078314065933228, "step": 1149 }, { "epoch": 1.491838514102642, "grad_norm": 0.7707608342170715, "learning_rate": 5.467623105001667e-06, "loss": 0.7474067211151123, "step": 1150 }, { "epoch": 1.491838514102642, "eval_loss": 0.7689123153686523, "eval_runtime": 143.0902, "eval_samples_per_second": 36.285, "eval_steps_per_second": 9.071, "step": 1150 }, { "epoch": 1.493136241052781, "grad_norm": 0.7697938084602356, "learning_rate": 5.460504233917047e-06, "loss": 0.7899725437164307, "step": 1151 }, { "epoch": 1.49443396800292, "grad_norm": 0.8120201826095581, "learning_rate": 5.453384421217823e-06, "loss": 0.7851537466049194, "step": 1152 }, { "epoch": 1.4957316949530588, "grad_norm": 0.7309612035751343, "learning_rate": 5.446263681462213e-06, "loss": 0.7107774019241333, "step": 1153 }, { "epoch": 1.4970294219031977, "grad_norm": 0.7438963651657104, "learning_rate": 5.439142029210323e-06, "loss": 0.788350522518158, "step": 1154 }, { "epoch": 1.4983271488533365, "grad_norm": 0.7466786503791809, "learning_rate": 5.4320194790241335e-06, "loss": 0.7024964094161987, "step": 1155 }, { "epoch": 1.4996248758034754, "grad_norm": 0.7446750998497009, "learning_rate": 5.424896045467455e-06, "loss": 0.7421405911445618, "step": 1156 }, { "epoch": 1.5009226027536142, "grad_norm": 0.7718220949172974, "learning_rate": 5.417771743105908e-06, "loss": 0.8068550825119019, "step": 1157 }, { "epoch": 1.5022203297037533, "grad_norm": 0.7445886731147766, "learning_rate": 5.4106465865068846e-06, "loss": 0.7749291658401489, "step": 1158 }, { "epoch": 1.5035180566538922, "grad_norm": 0.6862496733665466, "learning_rate": 5.403520590239527e-06, "loss": 0.6442810893058777, "step": 1159 }, { "epoch": 1.504815783604031, "grad_norm": 0.7648230791091919, "learning_rate": 5.396393768874696e-06, "loss": 0.8310628533363342, "step": 1160 }, { "epoch": 1.50611351055417, "grad_norm": 0.7615645527839661, "learning_rate": 5.389266136984939e-06, "loss": 0.738058865070343, "step": 1161 }, { "epoch": 1.507411237504309, "grad_norm": 0.7446813583374023, "learning_rate": 5.382137709144454e-06, "loss": 0.7968519330024719, "step": 1162 }, { "epoch": 1.5087089644544478, "grad_norm": 0.7255394458770752, "learning_rate": 5.3750084999290755e-06, "loss": 0.7655326724052429, "step": 1163 }, { "epoch": 1.5100066914045867, "grad_norm": 0.7433539628982544, "learning_rate": 5.3678785239162305e-06, "loss": 0.7728189826011658, "step": 1164 }, { "epoch": 1.5113044183547255, "grad_norm": 0.7390363216400146, "learning_rate": 5.360747795684916e-06, "loss": 0.6784142255783081, "step": 1165 }, { "epoch": 1.5126021453048644, "grad_norm": 0.7719975709915161, "learning_rate": 5.353616329815667e-06, "loss": 0.8225696086883545, "step": 1166 }, { "epoch": 1.5138998722550032, "grad_norm": 0.77885502576828, "learning_rate": 5.346484140890523e-06, "loss": 0.7254043817520142, "step": 1167 }, { "epoch": 1.5151975992051423, "grad_norm": 0.7126129269599915, "learning_rate": 5.339351243493008e-06, "loss": 0.7107834815979004, "step": 1168 }, { "epoch": 1.5164953261552812, "grad_norm": 0.7463387250900269, "learning_rate": 5.332217652208093e-06, "loss": 0.7344364523887634, "step": 1169 }, { "epoch": 1.51779305310542, "grad_norm": 0.7663910984992981, "learning_rate": 5.325083381622165e-06, "loss": 0.8212227821350098, "step": 1170 }, { "epoch": 1.519090780055559, "grad_norm": 0.7092857956886292, "learning_rate": 5.317948446322999e-06, "loss": 0.6895952820777893, "step": 1171 }, { "epoch": 1.520388507005698, "grad_norm": 0.7231706380844116, "learning_rate": 5.310812860899737e-06, "loss": 0.7563647627830505, "step": 1172 }, { "epoch": 1.5216862339558368, "grad_norm": 0.7281357049942017, "learning_rate": 5.303676639942841e-06, "loss": 0.7277164459228516, "step": 1173 }, { "epoch": 1.5229839609059757, "grad_norm": 0.7401008605957031, "learning_rate": 5.296539798044078e-06, "loss": 0.7237936854362488, "step": 1174 }, { "epoch": 1.5242816878561145, "grad_norm": 0.7612598538398743, "learning_rate": 5.289402349796484e-06, "loss": 0.7660357356071472, "step": 1175 }, { "epoch": 1.5255794148062534, "grad_norm": 0.7405344247817993, "learning_rate": 5.282264309794334e-06, "loss": 0.6943804621696472, "step": 1176 }, { "epoch": 1.5268771417563922, "grad_norm": 0.7092010974884033, "learning_rate": 5.2751256926331115e-06, "loss": 0.7395352721214294, "step": 1177 }, { "epoch": 1.528174868706531, "grad_norm": 0.7905942797660828, "learning_rate": 5.267986512909484e-06, "loss": 0.7439973950386047, "step": 1178 }, { "epoch": 1.5294725956566702, "grad_norm": 0.7583009600639343, "learning_rate": 5.2608467852212665e-06, "loss": 0.7969813346862793, "step": 1179 }, { "epoch": 1.530770322606809, "grad_norm": 0.7311235070228577, "learning_rate": 5.253706524167395e-06, "loss": 0.7828118801116943, "step": 1180 }, { "epoch": 1.5320680495569479, "grad_norm": 0.726702868938446, "learning_rate": 5.246565744347894e-06, "loss": 0.7190544605255127, "step": 1181 }, { "epoch": 1.533365776507087, "grad_norm": 0.7332654595375061, "learning_rate": 5.2394244603638536e-06, "loss": 0.7419191002845764, "step": 1182 }, { "epoch": 1.5346635034572258, "grad_norm": 0.6836192607879639, "learning_rate": 5.232282686817392e-06, "loss": 0.683068573474884, "step": 1183 }, { "epoch": 1.5359612304073647, "grad_norm": 0.722237765789032, "learning_rate": 5.2251404383116265e-06, "loss": 0.7589780688285828, "step": 1184 }, { "epoch": 1.5372589573575035, "grad_norm": 0.7364366054534912, "learning_rate": 5.217997729450649e-06, "loss": 0.7445809841156006, "step": 1185 }, { "epoch": 1.5385566843076424, "grad_norm": 0.7376484870910645, "learning_rate": 5.21085457483949e-06, "loss": 0.7111762166023254, "step": 1186 }, { "epoch": 1.5398544112577812, "grad_norm": 0.748837947845459, "learning_rate": 5.203710989084093e-06, "loss": 0.8105229139328003, "step": 1187 }, { "epoch": 1.54115213820792, "grad_norm": 0.7624726891517639, "learning_rate": 5.196566986791286e-06, "loss": 0.8003865480422974, "step": 1188 }, { "epoch": 1.542449865158059, "grad_norm": 0.7348607182502747, "learning_rate": 5.189422582568742e-06, "loss": 0.7221018671989441, "step": 1189 }, { "epoch": 1.543747592108198, "grad_norm": 0.748846709728241, "learning_rate": 5.182277791024959e-06, "loss": 0.7588199377059937, "step": 1190 }, { "epoch": 1.5450453190583369, "grad_norm": 0.7500258684158325, "learning_rate": 5.175132626769229e-06, "loss": 0.7608622312545776, "step": 1191 }, { "epoch": 1.546343046008476, "grad_norm": 0.7673987150192261, "learning_rate": 5.167987104411605e-06, "loss": 0.815061092376709, "step": 1192 }, { "epoch": 1.5476407729586148, "grad_norm": 0.7776307463645935, "learning_rate": 5.160841238562872e-06, "loss": 0.7571362257003784, "step": 1193 }, { "epoch": 1.5489384999087537, "grad_norm": 0.7552419304847717, "learning_rate": 5.153695043834513e-06, "loss": 0.7361883521080017, "step": 1194 }, { "epoch": 1.5502362268588925, "grad_norm": 0.7167699933052063, "learning_rate": 5.146548534838691e-06, "loss": 0.7665646076202393, "step": 1195 }, { "epoch": 1.5515339538090314, "grad_norm": 0.717226505279541, "learning_rate": 5.139401726188208e-06, "loss": 0.691736102104187, "step": 1196 }, { "epoch": 1.5528316807591702, "grad_norm": 0.7594795227050781, "learning_rate": 5.132254632496477e-06, "loss": 0.7824583053588867, "step": 1197 }, { "epoch": 1.554129407709309, "grad_norm": 0.7554059028625488, "learning_rate": 5.125107268377498e-06, "loss": 0.7830257415771484, "step": 1198 }, { "epoch": 1.555427134659448, "grad_norm": 0.7319567203521729, "learning_rate": 5.117959648445821e-06, "loss": 0.7027431130409241, "step": 1199 }, { "epoch": 1.556724861609587, "grad_norm": 0.783620297908783, "learning_rate": 5.1108117873165175e-06, "loss": 0.8177133798599243, "step": 1200 }, { "epoch": 1.5580225885597259, "grad_norm": 0.7518169283866882, "learning_rate": 5.1036636996051556e-06, "loss": 0.7404662370681763, "step": 1201 }, { "epoch": 1.5593203155098647, "grad_norm": 0.7498476505279541, "learning_rate": 5.096515399927767e-06, "loss": 0.71551513671875, "step": 1202 }, { "epoch": 1.5606180424600038, "grad_norm": 0.7556577324867249, "learning_rate": 5.089366902900813e-06, "loss": 0.7790558338165283, "step": 1203 }, { "epoch": 1.5619157694101427, "grad_norm": 0.7798899412155151, "learning_rate": 5.082218223141162e-06, "loss": 0.7935107350349426, "step": 1204 }, { "epoch": 1.5632134963602815, "grad_norm": 0.704472541809082, "learning_rate": 5.075069375266055e-06, "loss": 0.7023574709892273, "step": 1205 }, { "epoch": 1.5645112233104204, "grad_norm": 0.7147163152694702, "learning_rate": 5.067920373893075e-06, "loss": 0.6481233835220337, "step": 1206 }, { "epoch": 1.5658089502605592, "grad_norm": 0.824305534362793, "learning_rate": 5.060771233640122e-06, "loss": 0.7639428973197937, "step": 1207 }, { "epoch": 1.567106677210698, "grad_norm": 0.7788658738136292, "learning_rate": 5.0536219691253776e-06, "loss": 0.677037239074707, "step": 1208 }, { "epoch": 1.568404404160837, "grad_norm": 0.7424824833869934, "learning_rate": 5.046472594967279e-06, "loss": 0.7886664867401123, "step": 1209 }, { "epoch": 1.5697021311109758, "grad_norm": 0.7809754610061646, "learning_rate": 5.039323125784485e-06, "loss": 0.8234939575195312, "step": 1210 }, { "epoch": 1.5709998580611149, "grad_norm": 0.7964925169944763, "learning_rate": 5.0321735761958515e-06, "loss": 0.7490283846855164, "step": 1211 }, { "epoch": 1.5722975850112537, "grad_norm": 0.7560259699821472, "learning_rate": 5.025023960820399e-06, "loss": 0.7197484970092773, "step": 1212 }, { "epoch": 1.5735953119613926, "grad_norm": 0.7597184777259827, "learning_rate": 5.01787429427728e-06, "loss": 0.7696546316146851, "step": 1213 }, { "epoch": 1.5748930389115317, "grad_norm": 0.7353492975234985, "learning_rate": 5.010724591185752e-06, "loss": 0.7226786613464355, "step": 1214 }, { "epoch": 1.5761907658616705, "grad_norm": 0.7540714144706726, "learning_rate": 5.003574866165149e-06, "loss": 0.7219122648239136, "step": 1215 }, { "epoch": 1.5774884928118094, "grad_norm": 0.7779788374900818, "learning_rate": 4.9964251338348515e-06, "loss": 0.8082325458526611, "step": 1216 }, { "epoch": 1.5787862197619482, "grad_norm": 0.7652958631515503, "learning_rate": 4.989275408814251e-06, "loss": 0.7007216215133667, "step": 1217 }, { "epoch": 1.580083946712087, "grad_norm": 0.7894110679626465, "learning_rate": 4.982125705722722e-06, "loss": 0.7830923795700073, "step": 1218 }, { "epoch": 1.581381673662226, "grad_norm": 0.7912439703941345, "learning_rate": 4.974976039179604e-06, "loss": 0.7531068325042725, "step": 1219 }, { "epoch": 1.5826794006123648, "grad_norm": 0.7875705361366272, "learning_rate": 4.967826423804151e-06, "loss": 0.7552053332328796, "step": 1220 }, { "epoch": 1.5839771275625036, "grad_norm": 0.7627431154251099, "learning_rate": 4.960676874215518e-06, "loss": 0.7305505871772766, "step": 1221 }, { "epoch": 1.5852748545126427, "grad_norm": 0.7455613613128662, "learning_rate": 4.953527405032723e-06, "loss": 0.6995049715042114, "step": 1222 }, { "epoch": 1.5865725814627816, "grad_norm": 0.716137170791626, "learning_rate": 4.946378030874625e-06, "loss": 0.7280579209327698, "step": 1223 }, { "epoch": 1.5878703084129207, "grad_norm": 0.7123472094535828, "learning_rate": 4.9392287663598785e-06, "loss": 0.7188202142715454, "step": 1224 }, { "epoch": 1.5891680353630595, "grad_norm": 0.8411170244216919, "learning_rate": 4.932079626106926e-06, "loss": 0.8100149035453796, "step": 1225 }, { "epoch": 1.5904657623131984, "grad_norm": 0.755990207195282, "learning_rate": 4.924930624733947e-06, "loss": 0.8015238046646118, "step": 1226 }, { "epoch": 1.5917634892633372, "grad_norm": 0.780761182308197, "learning_rate": 4.91778177685884e-06, "loss": 0.775886595249176, "step": 1227 }, { "epoch": 1.593061216213476, "grad_norm": 0.8183882832527161, "learning_rate": 4.910633097099188e-06, "loss": 0.744438886642456, "step": 1228 }, { "epoch": 1.594358943163615, "grad_norm": 0.7258551120758057, "learning_rate": 4.903484600072236e-06, "loss": 0.6860997676849365, "step": 1229 }, { "epoch": 1.5956566701137538, "grad_norm": 0.695157527923584, "learning_rate": 4.896336300394845e-06, "loss": 0.7503056526184082, "step": 1230 }, { "epoch": 1.5969543970638926, "grad_norm": 0.7541502118110657, "learning_rate": 4.889188212683483e-06, "loss": 0.7562340497970581, "step": 1231 }, { "epoch": 1.5982521240140317, "grad_norm": 0.768254280090332, "learning_rate": 4.882040351554181e-06, "loss": 0.7829647064208984, "step": 1232 }, { "epoch": 1.5995498509641706, "grad_norm": 0.7578057646751404, "learning_rate": 4.874892731622503e-06, "loss": 0.7859159708023071, "step": 1233 }, { "epoch": 1.6008475779143094, "grad_norm": 0.7558201551437378, "learning_rate": 4.867745367503524e-06, "loss": 0.7639919519424438, "step": 1234 }, { "epoch": 1.6021453048644485, "grad_norm": 0.7190950512886047, "learning_rate": 4.860598273811793e-06, "loss": 0.7586647868156433, "step": 1235 }, { "epoch": 1.6034430318145874, "grad_norm": 0.7575900554656982, "learning_rate": 4.8534514651613104e-06, "loss": 0.7450016736984253, "step": 1236 }, { "epoch": 1.6047407587647262, "grad_norm": 0.7599023580551147, "learning_rate": 4.846304956165488e-06, "loss": 0.7748116254806519, "step": 1237 }, { "epoch": 1.606038485714865, "grad_norm": 0.7400065660476685, "learning_rate": 4.83915876143713e-06, "loss": 0.7867463827133179, "step": 1238 }, { "epoch": 1.607336212665004, "grad_norm": 0.7144732475280762, "learning_rate": 4.832012895588395e-06, "loss": 0.7374265789985657, "step": 1239 }, { "epoch": 1.6086339396151428, "grad_norm": 0.7419501543045044, "learning_rate": 4.824867373230772e-06, "loss": 0.785868227481842, "step": 1240 }, { "epoch": 1.6099316665652816, "grad_norm": 0.7401681542396545, "learning_rate": 4.817722208975041e-06, "loss": 0.7132147550582886, "step": 1241 }, { "epoch": 1.6112293935154205, "grad_norm": 0.7523838877677917, "learning_rate": 4.81057741743126e-06, "loss": 0.6855551600456238, "step": 1242 }, { "epoch": 1.6125271204655596, "grad_norm": 0.7317535281181335, "learning_rate": 4.8034330132087155e-06, "loss": 0.7517306208610535, "step": 1243 }, { "epoch": 1.6138248474156984, "grad_norm": 0.784957230091095, "learning_rate": 4.7962890109159085e-06, "loss": 0.8171036243438721, "step": 1244 }, { "epoch": 1.6151225743658375, "grad_norm": 0.794258177280426, "learning_rate": 4.789145425160511e-06, "loss": 0.7712936401367188, "step": 1245 }, { "epoch": 1.6164203013159764, "grad_norm": 0.7608366012573242, "learning_rate": 4.782002270549354e-06, "loss": 0.7149404883384705, "step": 1246 }, { "epoch": 1.6177180282661152, "grad_norm": 0.7840806245803833, "learning_rate": 4.774859561688374e-06, "loss": 0.8546009063720703, "step": 1247 }, { "epoch": 1.619015755216254, "grad_norm": 0.8032622337341309, "learning_rate": 4.767717313182611e-06, "loss": 0.82121741771698, "step": 1248 }, { "epoch": 1.620313482166393, "grad_norm": 0.7887139916419983, "learning_rate": 4.760575539636147e-06, "loss": 0.8578335642814636, "step": 1249 }, { "epoch": 1.6216112091165318, "grad_norm": 0.7383653521537781, "learning_rate": 4.753434255652108e-06, "loss": 0.7504441738128662, "step": 1250 }, { "epoch": 1.6229089360666706, "grad_norm": 0.7748722434043884, "learning_rate": 4.746293475832607e-06, "loss": 0.8303184509277344, "step": 1251 }, { "epoch": 1.6242066630168095, "grad_norm": 0.718803346157074, "learning_rate": 4.739153214778735e-06, "loss": 0.7208773493766785, "step": 1252 }, { "epoch": 1.6255043899669486, "grad_norm": 0.7380462884902954, "learning_rate": 4.732013487090517e-06, "loss": 0.7971898913383484, "step": 1253 }, { "epoch": 1.6268021169170874, "grad_norm": 0.7098410129547119, "learning_rate": 4.72487430736689e-06, "loss": 0.7401196360588074, "step": 1254 }, { "epoch": 1.6280998438672263, "grad_norm": 0.7748030424118042, "learning_rate": 4.7177356902056675e-06, "loss": 0.7918237447738647, "step": 1255 }, { "epoch": 1.6293975708173654, "grad_norm": 0.7241711616516113, "learning_rate": 4.7105976502035175e-06, "loss": 0.7051481008529663, "step": 1256 }, { "epoch": 1.6306952977675042, "grad_norm": 0.7323998808860779, "learning_rate": 4.703460201955924e-06, "loss": 0.7552366256713867, "step": 1257 }, { "epoch": 1.631993024717643, "grad_norm": 0.7378799915313721, "learning_rate": 4.696323360057162e-06, "loss": 0.7966490983963013, "step": 1258 }, { "epoch": 1.633290751667782, "grad_norm": 0.7449315190315247, "learning_rate": 4.689187139100265e-06, "loss": 0.8123936057090759, "step": 1259 }, { "epoch": 1.6345884786179208, "grad_norm": 0.735011100769043, "learning_rate": 4.682051553677001e-06, "loss": 0.7324624061584473, "step": 1260 }, { "epoch": 1.6358862055680596, "grad_norm": 0.7569977641105652, "learning_rate": 4.6749166183778375e-06, "loss": 0.7480865120887756, "step": 1261 }, { "epoch": 1.6371839325181985, "grad_norm": 0.7466624975204468, "learning_rate": 4.667782347791908e-06, "loss": 0.7476634383201599, "step": 1262 }, { "epoch": 1.6384816594683373, "grad_norm": 0.7527311444282532, "learning_rate": 4.660648756506993e-06, "loss": 0.7576185464859009, "step": 1263 }, { "epoch": 1.6397793864184764, "grad_norm": 0.7631829977035522, "learning_rate": 4.653515859109478e-06, "loss": 0.768151581287384, "step": 1264 }, { "epoch": 1.6410771133686153, "grad_norm": 0.7673115134239197, "learning_rate": 4.646383670184336e-06, "loss": 0.8062424659729004, "step": 1265 }, { "epoch": 1.6423748403187541, "grad_norm": 0.7701263427734375, "learning_rate": 4.639252204315086e-06, "loss": 0.7878407835960388, "step": 1266 }, { "epoch": 1.6436725672688932, "grad_norm": 0.7331056594848633, "learning_rate": 4.632121476083772e-06, "loss": 0.7622208595275879, "step": 1267 }, { "epoch": 1.644970294219032, "grad_norm": 0.7359024286270142, "learning_rate": 4.624991500070925e-06, "loss": 0.7808008193969727, "step": 1268 }, { "epoch": 1.646268021169171, "grad_norm": 0.7646325826644897, "learning_rate": 4.617862290855548e-06, "loss": 0.7223784923553467, "step": 1269 }, { "epoch": 1.6475657481193098, "grad_norm": 2.2569127082824707, "learning_rate": 4.610733863015063e-06, "loss": 0.8157532215118408, "step": 1270 }, { "epoch": 1.6488634750694486, "grad_norm": 0.7320986390113831, "learning_rate": 4.6036062311253055e-06, "loss": 0.7005835175514221, "step": 1271 }, { "epoch": 1.6501612020195875, "grad_norm": 0.7109540104866028, "learning_rate": 4.596479409760474e-06, "loss": 0.6603987812995911, "step": 1272 }, { "epoch": 1.6514589289697263, "grad_norm": 0.7034411430358887, "learning_rate": 4.589353413493118e-06, "loss": 0.7430425882339478, "step": 1273 }, { "epoch": 1.6527566559198652, "grad_norm": 0.740450382232666, "learning_rate": 4.582228256894093e-06, "loss": 0.7534090280532837, "step": 1274 }, { "epoch": 1.6540543828700043, "grad_norm": 0.758372962474823, "learning_rate": 4.575103954532547e-06, "loss": 0.7891790270805359, "step": 1275 }, { "epoch": 1.6553521098201431, "grad_norm": 0.7437251210212708, "learning_rate": 4.567980520975867e-06, "loss": 0.706454873085022, "step": 1276 }, { "epoch": 1.6566498367702822, "grad_norm": 0.7692578434944153, "learning_rate": 4.560857970789679e-06, "loss": 0.7044224739074707, "step": 1277 }, { "epoch": 1.657947563720421, "grad_norm": 0.7143827676773071, "learning_rate": 4.553736318537789e-06, "loss": 0.7750811576843262, "step": 1278 }, { "epoch": 1.65924529067056, "grad_norm": 0.7983530759811401, "learning_rate": 4.546615578782178e-06, "loss": 0.7537817358970642, "step": 1279 }, { "epoch": 1.6605430176206988, "grad_norm": 0.7134032249450684, "learning_rate": 4.5394957660829554e-06, "loss": 0.7322719097137451, "step": 1280 }, { "epoch": 1.6618407445708376, "grad_norm": 0.7665645480155945, "learning_rate": 4.532376894998335e-06, "loss": 0.7513355016708374, "step": 1281 }, { "epoch": 1.6631384715209765, "grad_norm": 0.7957409024238586, "learning_rate": 4.5252589800846054e-06, "loss": 0.7928126454353333, "step": 1282 }, { "epoch": 1.6644361984711153, "grad_norm": 0.7438947558403015, "learning_rate": 4.518142035896106e-06, "loss": 0.83896404504776, "step": 1283 }, { "epoch": 1.6657339254212542, "grad_norm": 0.7367180585861206, "learning_rate": 4.5110260769851804e-06, "loss": 0.7564518451690674, "step": 1284 }, { "epoch": 1.6670316523713933, "grad_norm": 0.7521037459373474, "learning_rate": 4.503911117902167e-06, "loss": 0.7650635838508606, "step": 1285 }, { "epoch": 1.6683293793215321, "grad_norm": 0.77150958776474, "learning_rate": 4.496797173195354e-06, "loss": 0.7417112588882446, "step": 1286 }, { "epoch": 1.669627106271671, "grad_norm": 0.7432476878166199, "learning_rate": 4.489684257410959e-06, "loss": 0.7239577770233154, "step": 1287 }, { "epoch": 1.67092483322181, "grad_norm": 0.7662708759307861, "learning_rate": 4.482572385093096e-06, "loss": 0.7873606085777283, "step": 1288 }, { "epoch": 1.672222560171949, "grad_norm": 0.7473525404930115, "learning_rate": 4.475461570783741e-06, "loss": 0.7298567891120911, "step": 1289 }, { "epoch": 1.6735202871220878, "grad_norm": 0.7975482940673828, "learning_rate": 4.468351829022713e-06, "loss": 0.8909924626350403, "step": 1290 }, { "epoch": 1.6748180140722266, "grad_norm": 0.7994987368583679, "learning_rate": 4.46124317434763e-06, "loss": 0.8102888464927673, "step": 1291 }, { "epoch": 1.6761157410223655, "grad_norm": 0.696455717086792, "learning_rate": 4.454135621293895e-06, "loss": 0.7557786703109741, "step": 1292 }, { "epoch": 1.6774134679725043, "grad_norm": 0.7524965405464172, "learning_rate": 4.447029184394654e-06, "loss": 0.6317112445831299, "step": 1293 }, { "epoch": 1.6787111949226432, "grad_norm": 0.7275172472000122, "learning_rate": 4.439923878180772e-06, "loss": 0.7290472388267517, "step": 1294 }, { "epoch": 1.680008921872782, "grad_norm": 0.7475486397743225, "learning_rate": 4.4328197171808e-06, "loss": 0.74071204662323, "step": 1295 }, { "epoch": 1.6813066488229211, "grad_norm": 0.711575984954834, "learning_rate": 4.425716715920952e-06, "loss": 0.770182728767395, "step": 1296 }, { "epoch": 1.68260437577306, "grad_norm": 0.7959085702896118, "learning_rate": 4.418614888925064e-06, "loss": 0.798844575881958, "step": 1297 }, { "epoch": 1.6839021027231988, "grad_norm": 0.7391006946563721, "learning_rate": 4.4115142507145806e-06, "loss": 0.7901135087013245, "step": 1298 }, { "epoch": 1.685199829673338, "grad_norm": 0.7250086665153503, "learning_rate": 4.4044148158085046e-06, "loss": 0.729693591594696, "step": 1299 }, { "epoch": 1.6864975566234768, "grad_norm": 0.7853794693946838, "learning_rate": 4.397316598723385e-06, "loss": 0.8253978490829468, "step": 1300 }, { "epoch": 1.6877952835736156, "grad_norm": 0.7512246370315552, "learning_rate": 4.39021961397328e-06, "loss": 0.7918137907981873, "step": 1301 }, { "epoch": 1.6890930105237545, "grad_norm": 0.7677380442619324, "learning_rate": 4.383123876069726e-06, "loss": 0.8250201940536499, "step": 1302 }, { "epoch": 1.6903907374738933, "grad_norm": 0.7484574317932129, "learning_rate": 4.376029399521711e-06, "loss": 0.790423572063446, "step": 1303 }, { "epoch": 1.6916884644240322, "grad_norm": 0.7543118000030518, "learning_rate": 4.368936198835646e-06, "loss": 0.7487786412239075, "step": 1304 }, { "epoch": 1.692986191374171, "grad_norm": 0.7802231907844543, "learning_rate": 4.361844288515327e-06, "loss": 0.7656086683273315, "step": 1305 }, { "epoch": 1.69428391832431, "grad_norm": 0.7723071575164795, "learning_rate": 4.354753683061921e-06, "loss": 0.7637503147125244, "step": 1306 }, { "epoch": 1.695581645274449, "grad_norm": 0.7176677584648132, "learning_rate": 4.347664396973917e-06, "loss": 0.6877071261405945, "step": 1307 }, { "epoch": 1.6968793722245878, "grad_norm": 0.7078595161437988, "learning_rate": 4.340576444747114e-06, "loss": 0.7922831773757935, "step": 1308 }, { "epoch": 1.698177099174727, "grad_norm": 0.8294334411621094, "learning_rate": 4.333489840874575e-06, "loss": 0.7542763352394104, "step": 1309 }, { "epoch": 1.6994748261248658, "grad_norm": 0.794155478477478, "learning_rate": 4.326404599846618e-06, "loss": 0.7189518213272095, "step": 1310 }, { "epoch": 1.7007725530750046, "grad_norm": 0.7483503818511963, "learning_rate": 4.319320736150762e-06, "loss": 0.7760790586471558, "step": 1311 }, { "epoch": 1.7020702800251435, "grad_norm": 0.7561513185501099, "learning_rate": 4.3122382642717196e-06, "loss": 0.7890743613243103, "step": 1312 }, { "epoch": 1.7033680069752823, "grad_norm": 0.7909126281738281, "learning_rate": 4.305157198691351e-06, "loss": 0.8085737228393555, "step": 1313 }, { "epoch": 1.7046657339254212, "grad_norm": 0.7337789535522461, "learning_rate": 4.298077553888644e-06, "loss": 0.7623671293258667, "step": 1314 }, { "epoch": 1.70596346087556, "grad_norm": 0.7426276803016663, "learning_rate": 4.290999344339678e-06, "loss": 0.760446310043335, "step": 1315 }, { "epoch": 1.707261187825699, "grad_norm": 0.7681460380554199, "learning_rate": 4.283922584517603e-06, "loss": 0.7784008383750916, "step": 1316 }, { "epoch": 1.708558914775838, "grad_norm": 0.75020432472229, "learning_rate": 4.276847288892601e-06, "loss": 0.7342908978462219, "step": 1317 }, { "epoch": 1.7098566417259768, "grad_norm": 0.7300113439559937, "learning_rate": 4.269773471931858e-06, "loss": 0.7642952799797058, "step": 1318 }, { "epoch": 1.7111543686761157, "grad_norm": 0.7630840539932251, "learning_rate": 4.262701148099544e-06, "loss": 0.8057624101638794, "step": 1319 }, { "epoch": 1.7124520956262548, "grad_norm": 0.7400659918785095, "learning_rate": 4.255630331856768e-06, "loss": 0.6892107129096985, "step": 1320 }, { "epoch": 1.7137498225763936, "grad_norm": 0.7181682586669922, "learning_rate": 4.248561037661561e-06, "loss": 0.6902667880058289, "step": 1321 }, { "epoch": 1.7150475495265325, "grad_norm": 0.7188737988471985, "learning_rate": 4.241493279968838e-06, "loss": 0.752339243888855, "step": 1322 }, { "epoch": 1.7163452764766713, "grad_norm": 0.7453453540802002, "learning_rate": 4.234427073230377e-06, "loss": 0.7290558815002441, "step": 1323 }, { "epoch": 1.7176430034268102, "grad_norm": 0.7392156720161438, "learning_rate": 4.22736243189478e-06, "loss": 0.7552244663238525, "step": 1324 }, { "epoch": 1.718940730376949, "grad_norm": 0.7706087827682495, "learning_rate": 4.220299370407454e-06, "loss": 0.8071123361587524, "step": 1325 }, { "epoch": 1.720238457327088, "grad_norm": 0.7364552617073059, "learning_rate": 4.2132379032105695e-06, "loss": 0.7699145078659058, "step": 1326 }, { "epoch": 1.7215361842772268, "grad_norm": 0.7225109934806824, "learning_rate": 4.206178044743041e-06, "loss": 0.7168894410133362, "step": 1327 }, { "epoch": 1.7228339112273658, "grad_norm": 0.7608538269996643, "learning_rate": 4.19911980944049e-06, "loss": 0.7722530961036682, "step": 1328 }, { "epoch": 1.7241316381775047, "grad_norm": 0.7884859442710876, "learning_rate": 4.1920632117352235e-06, "loss": 0.7422312498092651, "step": 1329 }, { "epoch": 1.7254293651276438, "grad_norm": 0.7449803352355957, "learning_rate": 4.185008266056195e-06, "loss": 0.6896304488182068, "step": 1330 }, { "epoch": 1.7267270920777826, "grad_norm": 0.7673011422157288, "learning_rate": 4.177954986828987e-06, "loss": 0.7713763117790222, "step": 1331 }, { "epoch": 1.7280248190279215, "grad_norm": 0.7378016710281372, "learning_rate": 4.170903388475766e-06, "loss": 0.7759542465209961, "step": 1332 }, { "epoch": 1.7293225459780603, "grad_norm": 0.7421383261680603, "learning_rate": 4.163853485415269e-06, "loss": 0.759284496307373, "step": 1333 }, { "epoch": 1.7306202729281992, "grad_norm": 0.7550718188285828, "learning_rate": 4.156805292062762e-06, "loss": 0.8264324069023132, "step": 1334 }, { "epoch": 1.731917999878338, "grad_norm": 0.730327308177948, "learning_rate": 4.1497588228300165e-06, "loss": 0.7197825312614441, "step": 1335 }, { "epoch": 1.733215726828477, "grad_norm": 0.7392425537109375, "learning_rate": 4.142714092125277e-06, "loss": 0.6834048628807068, "step": 1336 }, { "epoch": 1.7345134537786158, "grad_norm": 0.7402434945106506, "learning_rate": 4.135671114353239e-06, "loss": 0.8063099980354309, "step": 1337 }, { "epoch": 1.7358111807287548, "grad_norm": 0.680123507976532, "learning_rate": 4.128629903915004e-06, "loss": 0.671716034412384, "step": 1338 }, { "epoch": 1.7371089076788937, "grad_norm": 0.7551307678222656, "learning_rate": 4.121590475208071e-06, "loss": 0.7629652619361877, "step": 1339 }, { "epoch": 1.7384066346290326, "grad_norm": 0.7345308065414429, "learning_rate": 4.114552842626285e-06, "loss": 0.7416568994522095, "step": 1340 }, { "epoch": 1.7397043615791716, "grad_norm": 0.7788087725639343, "learning_rate": 4.107517020559827e-06, "loss": 0.7472527623176575, "step": 1341 }, { "epoch": 1.7410020885293105, "grad_norm": 0.7015308737754822, "learning_rate": 4.1004830233951696e-06, "loss": 0.7568359375, "step": 1342 }, { "epoch": 1.7422998154794493, "grad_norm": 0.7230126857757568, "learning_rate": 4.0934508655150585e-06, "loss": 0.7601989507675171, "step": 1343 }, { "epoch": 1.7435975424295882, "grad_norm": 0.7914838194847107, "learning_rate": 4.086420561298476e-06, "loss": 0.8481265306472778, "step": 1344 }, { "epoch": 1.744895269379727, "grad_norm": 0.7440057992935181, "learning_rate": 4.079392125120613e-06, "loss": 0.7708393335342407, "step": 1345 }, { "epoch": 1.746192996329866, "grad_norm": 0.6886211633682251, "learning_rate": 4.072365571352847e-06, "loss": 0.6713268160820007, "step": 1346 }, { "epoch": 1.7474907232800048, "grad_norm": 0.7026688456535339, "learning_rate": 4.065340914362697e-06, "loss": 0.7294650673866272, "step": 1347 }, { "epoch": 1.7487884502301436, "grad_norm": 0.7501193881034851, "learning_rate": 4.058318168513813e-06, "loss": 0.7362337708473206, "step": 1348 }, { "epoch": 1.7500861771802827, "grad_norm": 0.7246070504188538, "learning_rate": 4.05129734816593e-06, "loss": 0.7650129795074463, "step": 1349 }, { "epoch": 1.7513839041304216, "grad_norm": 0.7573635578155518, "learning_rate": 4.04427846767485e-06, "loss": 0.7343922853469849, "step": 1350 }, { "epoch": 1.7526816310805604, "grad_norm": 0.7699203491210938, "learning_rate": 4.037261541392405e-06, "loss": 0.7772881984710693, "step": 1351 }, { "epoch": 1.7539793580306995, "grad_norm": 0.7182446122169495, "learning_rate": 4.030246583666437e-06, "loss": 0.7065188884735107, "step": 1352 }, { "epoch": 1.7552770849808383, "grad_norm": 0.72418612241745, "learning_rate": 4.023233608840755e-06, "loss": 0.7692337036132812, "step": 1353 }, { "epoch": 1.7565748119309772, "grad_norm": 0.7533490657806396, "learning_rate": 4.016222631255121e-06, "loss": 0.7796276807785034, "step": 1354 }, { "epoch": 1.757872538881116, "grad_norm": 0.7471632957458496, "learning_rate": 4.0092136652452054e-06, "loss": 0.6910297274589539, "step": 1355 }, { "epoch": 1.759170265831255, "grad_norm": 0.7224332094192505, "learning_rate": 4.0022067251425736e-06, "loss": 0.8254155516624451, "step": 1356 }, { "epoch": 1.7604679927813938, "grad_norm": 0.7960043549537659, "learning_rate": 3.9952018252746424e-06, "loss": 0.7877900004386902, "step": 1357 }, { "epoch": 1.7617657197315326, "grad_norm": 0.7473553419113159, "learning_rate": 3.988198979964662e-06, "loss": 0.7509211897850037, "step": 1358 }, { "epoch": 1.7630634466816715, "grad_norm": 0.7365322709083557, "learning_rate": 3.981198203531673e-06, "loss": 0.7771757245063782, "step": 1359 }, { "epoch": 1.7643611736318106, "grad_norm": 0.7372840642929077, "learning_rate": 3.974199510290498e-06, "loss": 0.7108234763145447, "step": 1360 }, { "epoch": 1.7656589005819494, "grad_norm": 0.7740015387535095, "learning_rate": 3.967202914551688e-06, "loss": 0.7688103318214417, "step": 1361 }, { "epoch": 1.7669566275320885, "grad_norm": 0.7467756867408752, "learning_rate": 3.960208430621514e-06, "loss": 0.7408416867256165, "step": 1362 }, { "epoch": 1.7682543544822273, "grad_norm": 0.7169584035873413, "learning_rate": 3.953216072801922e-06, "loss": 0.7672545313835144, "step": 1363 }, { "epoch": 1.7695520814323662, "grad_norm": 0.7557554841041565, "learning_rate": 3.946225855390518e-06, "loss": 0.7705783843994141, "step": 1364 }, { "epoch": 1.770849808382505, "grad_norm": 0.773759663105011, "learning_rate": 3.9392377926805226e-06, "loss": 0.7769662737846375, "step": 1365 }, { "epoch": 1.772147535332644, "grad_norm": 0.8000966310501099, "learning_rate": 3.932251898960759e-06, "loss": 0.8040739297866821, "step": 1366 }, { "epoch": 1.7734452622827828, "grad_norm": 0.7309441566467285, "learning_rate": 3.925268188515611e-06, "loss": 0.782077968120575, "step": 1367 }, { "epoch": 1.7747429892329216, "grad_norm": 0.7728444337844849, "learning_rate": 3.918286675624998e-06, "loss": 0.784885585308075, "step": 1368 }, { "epoch": 1.7760407161830605, "grad_norm": 0.7408946752548218, "learning_rate": 3.911307374564346e-06, "loss": 0.656406044960022, "step": 1369 }, { "epoch": 1.7773384431331996, "grad_norm": 0.7466227412223816, "learning_rate": 3.904330299604562e-06, "loss": 0.7837309241294861, "step": 1370 }, { "epoch": 1.7786361700833384, "grad_norm": 0.7788135409355164, "learning_rate": 3.897355465011996e-06, "loss": 0.7761803269386292, "step": 1371 }, { "epoch": 1.7799338970334773, "grad_norm": 0.7268470525741577, "learning_rate": 3.89038288504842e-06, "loss": 0.6561414003372192, "step": 1372 }, { "epoch": 1.7812316239836163, "grad_norm": 0.704717218875885, "learning_rate": 3.883412573970995e-06, "loss": 0.7361816167831421, "step": 1373 }, { "epoch": 1.7825293509337552, "grad_norm": 0.724858820438385, "learning_rate": 3.876444546032242e-06, "loss": 0.7120435237884521, "step": 1374 }, { "epoch": 1.783827077883894, "grad_norm": 0.7575574517250061, "learning_rate": 3.8694788154800185e-06, "loss": 0.726084291934967, "step": 1375 }, { "epoch": 1.785124804834033, "grad_norm": 0.7479435801506042, "learning_rate": 3.862515396557476e-06, "loss": 0.7584422826766968, "step": 1376 }, { "epoch": 1.7864225317841718, "grad_norm": 0.7577545642852783, "learning_rate": 3.855554303503047e-06, "loss": 0.7728340029716492, "step": 1377 }, { "epoch": 1.7877202587343106, "grad_norm": 0.7183135747909546, "learning_rate": 3.848595550550401e-06, "loss": 0.7717944979667664, "step": 1378 }, { "epoch": 1.7890179856844495, "grad_norm": 0.7661336660385132, "learning_rate": 3.841639151928431e-06, "loss": 0.8036184310913086, "step": 1379 }, { "epoch": 1.7903157126345883, "grad_norm": 0.7206409573554993, "learning_rate": 3.834685121861208e-06, "loss": 0.7713849544525146, "step": 1380 }, { "epoch": 1.7903157126345883, "eval_loss": 0.7627881169319153, "eval_runtime": 141.991, "eval_samples_per_second": 36.566, "eval_steps_per_second": 9.141, "step": 1380 }, { "epoch": 1.7916134395847274, "grad_norm": 0.7314550280570984, "learning_rate": 3.827733474567966e-06, "loss": 0.7648483514785767, "step": 1381 }, { "epoch": 1.7929111665348663, "grad_norm": 0.7055811285972595, "learning_rate": 3.820784224263061e-06, "loss": 0.7016907334327698, "step": 1382 }, { "epoch": 1.7942088934850051, "grad_norm": 0.7724407911300659, "learning_rate": 3.8138373851559546e-06, "loss": 0.7384164333343506, "step": 1383 }, { "epoch": 1.7955066204351442, "grad_norm": 0.7461869120597839, "learning_rate": 3.8068929714511716e-06, "loss": 0.7625059485435486, "step": 1384 }, { "epoch": 1.796804347385283, "grad_norm": 0.7060610055923462, "learning_rate": 3.799950997348283e-06, "loss": 0.7166489362716675, "step": 1385 }, { "epoch": 1.798102074335422, "grad_norm": 0.7320153713226318, "learning_rate": 3.7930114770418654e-06, "loss": 0.674547553062439, "step": 1386 }, { "epoch": 1.7993998012855608, "grad_norm": 0.7460453510284424, "learning_rate": 3.7860744247214853e-06, "loss": 0.7099953293800354, "step": 1387 }, { "epoch": 1.8006975282356996, "grad_norm": 0.7489854693412781, "learning_rate": 3.7791398545716552e-06, "loss": 0.7331588268280029, "step": 1388 }, { "epoch": 1.8019952551858385, "grad_norm": 0.792487382888794, "learning_rate": 3.7722077807718193e-06, "loss": 0.8097270727157593, "step": 1389 }, { "epoch": 1.8032929821359773, "grad_norm": 0.7642346024513245, "learning_rate": 3.7652782174963107e-06, "loss": 0.736085832118988, "step": 1390 }, { "epoch": 1.8045907090861162, "grad_norm": 0.7119442224502563, "learning_rate": 3.758351178914336e-06, "loss": 0.7833069562911987, "step": 1391 }, { "epoch": 1.8058884360362553, "grad_norm": 0.7111310958862305, "learning_rate": 3.7514266791899324e-06, "loss": 0.6887351274490356, "step": 1392 }, { "epoch": 1.8071861629863941, "grad_norm": 0.7528010010719299, "learning_rate": 3.7445047324819517e-06, "loss": 0.8306547403335571, "step": 1393 }, { "epoch": 1.8084838899365332, "grad_norm": 0.7184954285621643, "learning_rate": 3.737585352944021e-06, "loss": 0.7377822995185852, "step": 1394 }, { "epoch": 1.809781616886672, "grad_norm": 0.7828357815742493, "learning_rate": 3.7306685547245225e-06, "loss": 0.7717545628547668, "step": 1395 }, { "epoch": 1.811079343836811, "grad_norm": 0.7384869456291199, "learning_rate": 3.7237543519665543e-06, "loss": 0.732141375541687, "step": 1396 }, { "epoch": 1.8123770707869498, "grad_norm": 0.7369826436042786, "learning_rate": 3.7168427588079153e-06, "loss": 0.7696800231933594, "step": 1397 }, { "epoch": 1.8136747977370886, "grad_norm": 0.6859968304634094, "learning_rate": 3.7099337893810593e-06, "loss": 0.7611477375030518, "step": 1398 }, { "epoch": 1.8149725246872275, "grad_norm": 0.7616201639175415, "learning_rate": 3.703027457813086e-06, "loss": 0.7165590524673462, "step": 1399 }, { "epoch": 1.8162702516373663, "grad_norm": 0.7798823118209839, "learning_rate": 3.696123778225691e-06, "loss": 0.8081139922142029, "step": 1400 }, { "epoch": 1.8175679785875052, "grad_norm": 0.736311674118042, "learning_rate": 3.6892227647351515e-06, "loss": 0.7922640442848206, "step": 1401 }, { "epoch": 1.8188657055376443, "grad_norm": 0.7466685771942139, "learning_rate": 3.6823244314522966e-06, "loss": 0.8202122449874878, "step": 1402 }, { "epoch": 1.8201634324877831, "grad_norm": 0.7271019220352173, "learning_rate": 3.67542879248247e-06, "loss": 0.7178265452384949, "step": 1403 }, { "epoch": 1.821461159437922, "grad_norm": 0.7438377737998962, "learning_rate": 3.668535861925509e-06, "loss": 0.7328163981437683, "step": 1404 }, { "epoch": 1.822758886388061, "grad_norm": 0.765792965888977, "learning_rate": 3.661645653875709e-06, "loss": 0.8025820255279541, "step": 1405 }, { "epoch": 1.8240566133382, "grad_norm": 0.7790916562080383, "learning_rate": 3.6547581824218057e-06, "loss": 0.6988804340362549, "step": 1406 }, { "epoch": 1.8253543402883388, "grad_norm": 0.7480283379554749, "learning_rate": 3.6478734616469324e-06, "loss": 0.7859005928039551, "step": 1407 }, { "epoch": 1.8266520672384776, "grad_norm": 0.7395159602165222, "learning_rate": 3.6409915056286017e-06, "loss": 0.7420051693916321, "step": 1408 }, { "epoch": 1.8279497941886165, "grad_norm": 0.7925701141357422, "learning_rate": 3.6341123284386694e-06, "loss": 0.7727212309837341, "step": 1409 }, { "epoch": 1.8292475211387553, "grad_norm": 0.804506242275238, "learning_rate": 3.627235944143315e-06, "loss": 0.8071032762527466, "step": 1410 }, { "epoch": 1.8305452480888942, "grad_norm": 0.7611510157585144, "learning_rate": 3.620362366803001e-06, "loss": 0.8068900108337402, "step": 1411 }, { "epoch": 1.831842975039033, "grad_norm": 0.7167320847511292, "learning_rate": 3.6134916104724573e-06, "loss": 0.72031170129776, "step": 1412 }, { "epoch": 1.8331407019891721, "grad_norm": 0.7381744384765625, "learning_rate": 3.606623689200637e-06, "loss": 0.8165542483329773, "step": 1413 }, { "epoch": 1.834438428939311, "grad_norm": 0.7563833594322205, "learning_rate": 3.599758617030704e-06, "loss": 0.7464354634284973, "step": 1414 }, { "epoch": 1.83573615588945, "grad_norm": 0.7599388360977173, "learning_rate": 3.5928964079999907e-06, "loss": 0.7536092400550842, "step": 1415 }, { "epoch": 1.837033882839589, "grad_norm": 0.748948335647583, "learning_rate": 3.5860370761399814e-06, "loss": 0.7360177636146545, "step": 1416 }, { "epoch": 1.8383316097897278, "grad_norm": 0.7518510818481445, "learning_rate": 3.5791806354762702e-06, "loss": 0.8265672922134399, "step": 1417 }, { "epoch": 1.8396293367398666, "grad_norm": 0.7552505731582642, "learning_rate": 3.572327100028545e-06, "loss": 0.7927607297897339, "step": 1418 }, { "epoch": 1.8409270636900055, "grad_norm": 0.7464638948440552, "learning_rate": 3.565476483810548e-06, "loss": 0.7352604866027832, "step": 1419 }, { "epoch": 1.8422247906401443, "grad_norm": 0.7622070908546448, "learning_rate": 3.55862880083006e-06, "loss": 0.7505785822868347, "step": 1420 }, { "epoch": 1.8435225175902832, "grad_norm": 0.7533313035964966, "learning_rate": 3.5517840650888564e-06, "loss": 0.7191623449325562, "step": 1421 }, { "epoch": 1.844820244540422, "grad_norm": 0.6952627897262573, "learning_rate": 3.544942290582691e-06, "loss": 0.6740431189537048, "step": 1422 }, { "epoch": 1.8461179714905611, "grad_norm": 0.7936224937438965, "learning_rate": 3.538103491301258e-06, "loss": 0.7648304104804993, "step": 1423 }, { "epoch": 1.8474156984407, "grad_norm": 0.7576262354850769, "learning_rate": 3.531267681228175e-06, "loss": 0.7879096865653992, "step": 1424 }, { "epoch": 1.8487134253908388, "grad_norm": 0.7862621545791626, "learning_rate": 3.5244348743409394e-06, "loss": 0.8113074898719788, "step": 1425 }, { "epoch": 1.850011152340978, "grad_norm": 0.744350254535675, "learning_rate": 3.517605084610917e-06, "loss": 0.8002179265022278, "step": 1426 }, { "epoch": 1.8513088792911168, "grad_norm": 0.7405024766921997, "learning_rate": 3.510778326003294e-06, "loss": 0.7966739535331726, "step": 1427 }, { "epoch": 1.8526066062412556, "grad_norm": 0.7228015065193176, "learning_rate": 3.5039546124770675e-06, "loss": 0.7743226289749146, "step": 1428 }, { "epoch": 1.8539043331913945, "grad_norm": 0.7095702886581421, "learning_rate": 3.4971339579850017e-06, "loss": 0.7427994608879089, "step": 1429 }, { "epoch": 1.8552020601415333, "grad_norm": 0.7417516708374023, "learning_rate": 3.4903163764736104e-06, "loss": 0.7906033396720886, "step": 1430 }, { "epoch": 1.8564997870916722, "grad_norm": 0.7653575539588928, "learning_rate": 3.4835018818831235e-06, "loss": 0.7567316293716431, "step": 1431 }, { "epoch": 1.857797514041811, "grad_norm": 0.7832647562026978, "learning_rate": 3.4766904881474535e-06, "loss": 0.8101804852485657, "step": 1432 }, { "epoch": 1.85909524099195, "grad_norm": 0.7221357226371765, "learning_rate": 3.4698822091941808e-06, "loss": 0.7595087289810181, "step": 1433 }, { "epoch": 1.860392967942089, "grad_norm": 0.7068979740142822, "learning_rate": 3.463077058944511e-06, "loss": 0.7103139162063599, "step": 1434 }, { "epoch": 1.8616906948922278, "grad_norm": 0.7623226046562195, "learning_rate": 3.456275051313255e-06, "loss": 0.7573720812797546, "step": 1435 }, { "epoch": 1.8629884218423667, "grad_norm": 0.7208322882652283, "learning_rate": 3.4494762002087934e-06, "loss": 0.7396218180656433, "step": 1436 }, { "epoch": 1.8642861487925058, "grad_norm": 0.7725145220756531, "learning_rate": 3.4426805195330605e-06, "loss": 0.8249402642250061, "step": 1437 }, { "epoch": 1.8655838757426446, "grad_norm": 0.7519071698188782, "learning_rate": 3.4358880231814983e-06, "loss": 0.7772566080093384, "step": 1438 }, { "epoch": 1.8668816026927835, "grad_norm": 0.7366478443145752, "learning_rate": 3.4290987250430486e-06, "loss": 0.7706719040870667, "step": 1439 }, { "epoch": 1.8681793296429223, "grad_norm": 0.7672808170318604, "learning_rate": 3.4223126390001025e-06, "loss": 0.8184731006622314, "step": 1440 }, { "epoch": 1.8694770565930612, "grad_norm": 0.7246538996696472, "learning_rate": 3.415529778928492e-06, "loss": 0.7725407481193542, "step": 1441 }, { "epoch": 1.8707747835432, "grad_norm": 0.7283217906951904, "learning_rate": 3.408750158697445e-06, "loss": 0.6893804669380188, "step": 1442 }, { "epoch": 1.872072510493339, "grad_norm": 0.7325669527053833, "learning_rate": 3.401973792169574e-06, "loss": 0.7318534255027771, "step": 1443 }, { "epoch": 1.8733702374434777, "grad_norm": 0.7319545745849609, "learning_rate": 3.39520069320083e-06, "loss": 0.7276860475540161, "step": 1444 }, { "epoch": 1.8746679643936168, "grad_norm": 0.7628985047340393, "learning_rate": 3.3884308756404873e-06, "loss": 0.760532021522522, "step": 1445 }, { "epoch": 1.8759656913437557, "grad_norm": 0.779719352722168, "learning_rate": 3.381664353331107e-06, "loss": 0.7530460953712463, "step": 1446 }, { "epoch": 1.8772634182938948, "grad_norm": 0.7683849930763245, "learning_rate": 3.3749011401085185e-06, "loss": 0.7530565857887268, "step": 1447 }, { "epoch": 1.8785611452440336, "grad_norm": 0.7684382796287537, "learning_rate": 3.3681412498017773e-06, "loss": 0.7397608757019043, "step": 1448 }, { "epoch": 1.8798588721941725, "grad_norm": 0.7616894841194153, "learning_rate": 3.361384696233152e-06, "loss": 0.7777236700057983, "step": 1449 }, { "epoch": 1.8811565991443113, "grad_norm": 0.771138608455658, "learning_rate": 3.354631493218081e-06, "loss": 0.7786779403686523, "step": 1450 }, { "epoch": 1.8824543260944502, "grad_norm": 0.7471014857292175, "learning_rate": 3.347881654565159e-06, "loss": 0.7542305588722229, "step": 1451 }, { "epoch": 1.883752053044589, "grad_norm": 0.7468750476837158, "learning_rate": 3.3411351940760924e-06, "loss": 0.7149202823638916, "step": 1452 }, { "epoch": 1.885049779994728, "grad_norm": 0.7417929768562317, "learning_rate": 3.3343921255456903e-06, "loss": 0.7159502506256104, "step": 1453 }, { "epoch": 1.8863475069448667, "grad_norm": 0.7415879368782043, "learning_rate": 3.3276524627618177e-06, "loss": 0.6990147829055786, "step": 1454 }, { "epoch": 1.8876452338950058, "grad_norm": 0.7450370788574219, "learning_rate": 3.3209162195053825e-06, "loss": 0.7316166162490845, "step": 1455 }, { "epoch": 1.8889429608451447, "grad_norm": 0.7386918067932129, "learning_rate": 3.314183409550293e-06, "loss": 0.7210360169410706, "step": 1456 }, { "epoch": 1.8902406877952835, "grad_norm": 0.7389383912086487, "learning_rate": 3.3074540466634454e-06, "loss": 0.7895267605781555, "step": 1457 }, { "epoch": 1.8915384147454226, "grad_norm": 0.7614936828613281, "learning_rate": 3.300728144604681e-06, "loss": 0.8177999258041382, "step": 1458 }, { "epoch": 1.8928361416955615, "grad_norm": 0.7167849540710449, "learning_rate": 3.294005717126767e-06, "loss": 0.6862820982933044, "step": 1459 }, { "epoch": 1.8941338686457003, "grad_norm": 0.7517713308334351, "learning_rate": 3.287286777975369e-06, "loss": 0.708810031414032, "step": 1460 }, { "epoch": 1.8954315955958392, "grad_norm": 0.7490662336349487, "learning_rate": 3.2805713408890134e-06, "loss": 0.733518123626709, "step": 1461 }, { "epoch": 1.896729322545978, "grad_norm": 0.7208186388015747, "learning_rate": 3.2738594195990725e-06, "loss": 0.7204444408416748, "step": 1462 }, { "epoch": 1.898027049496117, "grad_norm": 0.7559354901313782, "learning_rate": 3.267151027829725e-06, "loss": 0.7662622332572937, "step": 1463 }, { "epoch": 1.8993247764462557, "grad_norm": 0.7530090808868408, "learning_rate": 3.2604461792979346e-06, "loss": 0.7748895883560181, "step": 1464 }, { "epoch": 1.9006225033963946, "grad_norm": 0.7309477925300598, "learning_rate": 3.253744887713417e-06, "loss": 0.7579443454742432, "step": 1465 }, { "epoch": 1.9019202303465337, "grad_norm": 0.7468703985214233, "learning_rate": 3.2470471667786217e-06, "loss": 0.7491274476051331, "step": 1466 }, { "epoch": 1.9032179572966725, "grad_norm": 0.695270299911499, "learning_rate": 3.2403530301886897e-06, "loss": 0.7576136589050293, "step": 1467 }, { "epoch": 1.9045156842468114, "grad_norm": 0.7494797706604004, "learning_rate": 3.2336624916314385e-06, "loss": 0.7278751134872437, "step": 1468 }, { "epoch": 1.9058134111969505, "grad_norm": 0.7021095752716064, "learning_rate": 3.226975564787322e-06, "loss": 0.7184893488883972, "step": 1469 }, { "epoch": 1.9071111381470893, "grad_norm": 0.7631714940071106, "learning_rate": 3.2202922633294178e-06, "loss": 0.7584774494171143, "step": 1470 }, { "epoch": 1.9084088650972282, "grad_norm": 0.7526757121086121, "learning_rate": 3.2136126009233815e-06, "loss": 0.7316581010818481, "step": 1471 }, { "epoch": 1.909706592047367, "grad_norm": 0.8025736212730408, "learning_rate": 3.2069365912274364e-06, "loss": 0.7791706323623657, "step": 1472 }, { "epoch": 1.911004318997506, "grad_norm": 0.7570320963859558, "learning_rate": 3.2002642478923273e-06, "loss": 0.7199668288230896, "step": 1473 }, { "epoch": 1.9123020459476447, "grad_norm": 0.7405751347541809, "learning_rate": 3.1935955845613138e-06, "loss": 0.7453205585479736, "step": 1474 }, { "epoch": 1.9135997728977836, "grad_norm": 0.7665459513664246, "learning_rate": 3.1869306148701186e-06, "loss": 0.7860292792320251, "step": 1475 }, { "epoch": 1.9148974998479225, "grad_norm": 0.7148594856262207, "learning_rate": 3.1802693524469226e-06, "loss": 0.7073293328285217, "step": 1476 }, { "epoch": 1.9161952267980615, "grad_norm": 0.8274069428443909, "learning_rate": 3.1736118109123183e-06, "loss": 0.8189059495925903, "step": 1477 }, { "epoch": 1.9174929537482004, "grad_norm": 0.7543789148330688, "learning_rate": 3.1669580038792953e-06, "loss": 0.7467123866081238, "step": 1478 }, { "epoch": 1.9187906806983395, "grad_norm": 0.7482509613037109, "learning_rate": 3.1603079449532014e-06, "loss": 0.7409033179283142, "step": 1479 }, { "epoch": 1.9200884076484783, "grad_norm": 0.7407187819480896, "learning_rate": 3.1536616477317283e-06, "loss": 0.8021547198295593, "step": 1480 }, { "epoch": 1.9213861345986172, "grad_norm": 0.7338560223579407, "learning_rate": 3.147019125804869e-06, "loss": 0.7389906048774719, "step": 1481 }, { "epoch": 1.922683861548756, "grad_norm": 0.7023736238479614, "learning_rate": 3.140380392754901e-06, "loss": 0.6907408237457275, "step": 1482 }, { "epoch": 1.923981588498895, "grad_norm": 0.7529736757278442, "learning_rate": 3.13374546215635e-06, "loss": 0.7589248418807983, "step": 1483 }, { "epoch": 1.9252793154490337, "grad_norm": 0.7569683194160461, "learning_rate": 3.1271143475759745e-06, "loss": 0.8030762076377869, "step": 1484 }, { "epoch": 1.9265770423991726, "grad_norm": 0.7221974730491638, "learning_rate": 3.1204870625727216e-06, "loss": 0.7365057468414307, "step": 1485 }, { "epoch": 1.9278747693493115, "grad_norm": 0.7299054265022278, "learning_rate": 3.1138636206977147e-06, "loss": 0.7652139067649841, "step": 1486 }, { "epoch": 1.9291724962994505, "grad_norm": 0.7605259418487549, "learning_rate": 3.107244035494212e-06, "loss": 0.7461530566215515, "step": 1487 }, { "epoch": 1.9304702232495894, "grad_norm": 0.7828764319419861, "learning_rate": 3.100628320497592e-06, "loss": 0.7160741090774536, "step": 1488 }, { "epoch": 1.9317679501997282, "grad_norm": 0.7266847491264343, "learning_rate": 3.0940164892353197e-06, "loss": 0.7605788707733154, "step": 1489 }, { "epoch": 1.9330656771498673, "grad_norm": 0.7265951037406921, "learning_rate": 3.087408555226914e-06, "loss": 0.7256218791007996, "step": 1490 }, { "epoch": 1.9343634041000062, "grad_norm": 0.743874192237854, "learning_rate": 3.0808045319839285e-06, "loss": 0.7782658934593201, "step": 1491 }, { "epoch": 1.935661131050145, "grad_norm": 0.7700090408325195, "learning_rate": 3.0742044330099162e-06, "loss": 0.8411778211593628, "step": 1492 }, { "epoch": 1.936958858000284, "grad_norm": 0.7528886795043945, "learning_rate": 3.067608271800414e-06, "loss": 0.7456690669059753, "step": 1493 }, { "epoch": 1.9382565849504227, "grad_norm": 0.7644984722137451, "learning_rate": 3.0610160618428987e-06, "loss": 0.8341564536094666, "step": 1494 }, { "epoch": 1.9395543119005616, "grad_norm": 0.7603514790534973, "learning_rate": 3.054427816616773e-06, "loss": 0.7421169281005859, "step": 1495 }, { "epoch": 1.9408520388507005, "grad_norm": 0.750495195388794, "learning_rate": 3.0478435495933273e-06, "loss": 0.7318063974380493, "step": 1496 }, { "epoch": 1.9421497658008393, "grad_norm": 0.7440663576126099, "learning_rate": 3.0412632742357263e-06, "loss": 0.7301744222640991, "step": 1497 }, { "epoch": 1.9434474927509784, "grad_norm": 0.7638893723487854, "learning_rate": 3.0346870039989618e-06, "loss": 0.807081401348114, "step": 1498 }, { "epoch": 1.9447452197011172, "grad_norm": 0.7054970860481262, "learning_rate": 3.028114752329848e-06, "loss": 0.6948739886283875, "step": 1499 }, { "epoch": 1.9460429466512563, "grad_norm": 0.7606166005134583, "learning_rate": 3.0215465326669724e-06, "loss": 0.7260342240333557, "step": 1500 }, { "epoch": 1.9473406736013952, "grad_norm": 0.7610599398612976, "learning_rate": 3.0149823584406834e-06, "loss": 0.7050684690475464, "step": 1501 }, { "epoch": 1.948638400551534, "grad_norm": 0.7257749438285828, "learning_rate": 3.008422243073053e-06, "loss": 0.7687860131263733, "step": 1502 }, { "epoch": 1.9499361275016729, "grad_norm": 0.7967380285263062, "learning_rate": 3.001866199977861e-06, "loss": 0.7173135280609131, "step": 1503 }, { "epoch": 1.9512338544518117, "grad_norm": 0.7631365656852722, "learning_rate": 2.995314242560553e-06, "loss": 0.6869869232177734, "step": 1504 }, { "epoch": 1.9525315814019506, "grad_norm": 0.7799795269966125, "learning_rate": 2.988766384218225e-06, "loss": 0.8173317313194275, "step": 1505 }, { "epoch": 1.9538293083520895, "grad_norm": 0.7079163193702698, "learning_rate": 2.982222638339588e-06, "loss": 0.7003392577171326, "step": 1506 }, { "epoch": 1.9551270353022283, "grad_norm": 0.7494550347328186, "learning_rate": 2.9756830183049502e-06, "loss": 0.7491193413734436, "step": 1507 }, { "epoch": 1.9564247622523674, "grad_norm": 0.7550981640815735, "learning_rate": 2.969147537486175e-06, "loss": 0.6874207258224487, "step": 1508 }, { "epoch": 1.9577224892025062, "grad_norm": 0.7690152525901794, "learning_rate": 2.962616209246669e-06, "loss": 0.7523796558380127, "step": 1509 }, { "epoch": 1.959020216152645, "grad_norm": 0.7652460336685181, "learning_rate": 2.956089046941344e-06, "loss": 0.7440061569213867, "step": 1510 }, { "epoch": 1.9603179431027842, "grad_norm": 0.7546727657318115, "learning_rate": 2.9495660639165967e-06, "loss": 0.6781274676322937, "step": 1511 }, { "epoch": 1.961615670052923, "grad_norm": 0.7501972913742065, "learning_rate": 2.9430472735102733e-06, "loss": 0.8312241435050964, "step": 1512 }, { "epoch": 1.9629133970030619, "grad_norm": 0.7584221959114075, "learning_rate": 2.9365326890516543e-06, "loss": 0.7617495059967041, "step": 1513 }, { "epoch": 1.9642111239532007, "grad_norm": 0.7630670070648193, "learning_rate": 2.9300223238614135e-06, "loss": 0.7445218563079834, "step": 1514 }, { "epoch": 1.9655088509033396, "grad_norm": 0.7560063004493713, "learning_rate": 2.923516191251601e-06, "loss": 0.7189357280731201, "step": 1515 }, { "epoch": 1.9668065778534785, "grad_norm": 0.7539728283882141, "learning_rate": 2.917014304525609e-06, "loss": 0.7735335826873779, "step": 1516 }, { "epoch": 1.9681043048036173, "grad_norm": 0.7924235463142395, "learning_rate": 2.91051667697815e-06, "loss": 0.7291359305381775, "step": 1517 }, { "epoch": 1.9694020317537562, "grad_norm": 0.7921690344810486, "learning_rate": 2.904023321895234e-06, "loss": 0.7890439629554749, "step": 1518 }, { "epoch": 1.9706997587038952, "grad_norm": 0.7306675314903259, "learning_rate": 2.8975342525541217e-06, "loss": 0.7303510904312134, "step": 1519 }, { "epoch": 1.971997485654034, "grad_norm": 0.7390612959861755, "learning_rate": 2.8910494822233203e-06, "loss": 0.738096296787262, "step": 1520 }, { "epoch": 1.973295212604173, "grad_norm": 0.7136329412460327, "learning_rate": 2.8845690241625437e-06, "loss": 0.693221390247345, "step": 1521 }, { "epoch": 1.974592939554312, "grad_norm": 0.7404115796089172, "learning_rate": 2.878092891622688e-06, "loss": 0.7287771105766296, "step": 1522 }, { "epoch": 1.9758906665044509, "grad_norm": 0.7220072746276855, "learning_rate": 2.871621097845806e-06, "loss": 0.7384371161460876, "step": 1523 }, { "epoch": 1.9771883934545897, "grad_norm": 0.7408435344696045, "learning_rate": 2.865153656065076e-06, "loss": 0.8272619247436523, "step": 1524 }, { "epoch": 1.9784861204047286, "grad_norm": 0.7614798545837402, "learning_rate": 2.8586905795047813e-06, "loss": 0.7376547455787659, "step": 1525 }, { "epoch": 1.9797838473548675, "grad_norm": 0.7582762837409973, "learning_rate": 2.8522318813802796e-06, "loss": 0.7691409587860107, "step": 1526 }, { "epoch": 1.9810815743050063, "grad_norm": 0.7164722681045532, "learning_rate": 2.8457775748979664e-06, "loss": 0.8395357728004456, "step": 1527 }, { "epoch": 1.9823793012551452, "grad_norm": 0.7436051964759827, "learning_rate": 2.8393276732552745e-06, "loss": 0.7708083391189575, "step": 1528 }, { "epoch": 1.983677028205284, "grad_norm": 0.72344970703125, "learning_rate": 2.8328821896406132e-06, "loss": 0.7172407507896423, "step": 1529 }, { "epoch": 1.984974755155423, "grad_norm": 0.7652667760848999, "learning_rate": 2.826441137233368e-06, "loss": 0.8026193976402283, "step": 1530 }, { "epoch": 1.986272482105562, "grad_norm": 0.744799792766571, "learning_rate": 2.8200045292038596e-06, "loss": 0.7224990725517273, "step": 1531 }, { "epoch": 1.987570209055701, "grad_norm": 0.7798324227333069, "learning_rate": 2.8135723787133233e-06, "loss": 0.8296301960945129, "step": 1532 }, { "epoch": 1.9888679360058399, "grad_norm": 0.7641241550445557, "learning_rate": 2.8071446989138786e-06, "loss": 0.8053862452507019, "step": 1533 }, { "epoch": 1.9901656629559787, "grad_norm": 0.7807287573814392, "learning_rate": 2.800721502948506e-06, "loss": 0.8045886754989624, "step": 1534 }, { "epoch": 1.9914633899061176, "grad_norm": 0.7817100882530212, "learning_rate": 2.7943028039510085e-06, "loss": 0.7601321339607239, "step": 1535 }, { "epoch": 1.9927611168562565, "grad_norm": 0.7499108910560608, "learning_rate": 2.78788861504601e-06, "loss": 0.712813138961792, "step": 1536 }, { "epoch": 1.9940588438063953, "grad_norm": 0.7500565052032471, "learning_rate": 2.7814789493488947e-06, "loss": 0.784660816192627, "step": 1537 }, { "epoch": 1.9953565707565342, "grad_norm": 0.7742734551429749, "learning_rate": 2.7750738199658157e-06, "loss": 0.790881872177124, "step": 1538 }, { "epoch": 1.996654297706673, "grad_norm": 0.7405888438224792, "learning_rate": 2.7686732399936343e-06, "loss": 0.75714111328125, "step": 1539 }, { "epoch": 1.997952024656812, "grad_norm": 0.7482635974884033, "learning_rate": 2.762277222519919e-06, "loss": 0.8016012907028198, "step": 1540 }, { "epoch": 1.999249751606951, "grad_norm": 0.7615522146224976, "learning_rate": 2.7558857806229066e-06, "loss": 0.7230945825576782, "step": 1541 }, { "epoch": 2.0, "grad_norm": 0.9185896515846252, "learning_rate": 2.749498927371478e-06, "loss": 0.6910619139671326, "step": 1542 }, { "epoch": 2.001297726950139, "grad_norm": 0.8090460300445557, "learning_rate": 2.7431166758251317e-06, "loss": 0.7160417437553406, "step": 1543 }, { "epoch": 2.0025954539002777, "grad_norm": 0.821662425994873, "learning_rate": 2.7367390390339565e-06, "loss": 0.8003557324409485, "step": 1544 }, { "epoch": 2.0038931808504166, "grad_norm": 0.8417342305183411, "learning_rate": 2.730366030038606e-06, "loss": 0.6696601510047913, "step": 1545 }, { "epoch": 2.0051909078005554, "grad_norm": 0.7647730112075806, "learning_rate": 2.72399766187027e-06, "loss": 0.7535327076911926, "step": 1546 }, { "epoch": 2.0064886347506943, "grad_norm": 0.7831124663352966, "learning_rate": 2.7176339475506515e-06, "loss": 0.6865320801734924, "step": 1547 }, { "epoch": 2.0077863617008336, "grad_norm": 0.8027868866920471, "learning_rate": 2.7112749000919304e-06, "loss": 0.7301362156867981, "step": 1548 }, { "epoch": 2.0090840886509724, "grad_norm": 0.7517098784446716, "learning_rate": 2.704920532496756e-06, "loss": 0.7181136012077332, "step": 1549 }, { "epoch": 2.0103818156011113, "grad_norm": 0.7687897086143494, "learning_rate": 2.698570857758195e-06, "loss": 0.6889755725860596, "step": 1550 }, { "epoch": 2.01167954255125, "grad_norm": 0.769771933555603, "learning_rate": 2.692225888859732e-06, "loss": 0.8039973378181458, "step": 1551 }, { "epoch": 2.012977269501389, "grad_norm": 0.7931247353553772, "learning_rate": 2.685885638775216e-06, "loss": 0.7709112167358398, "step": 1552 }, { "epoch": 2.014274996451528, "grad_norm": 0.7785104513168335, "learning_rate": 2.6795501204688586e-06, "loss": 0.7515279650688171, "step": 1553 }, { "epoch": 2.0155727234016667, "grad_norm": 0.7584104537963867, "learning_rate": 2.6732193468951882e-06, "loss": 0.751779317855835, "step": 1554 }, { "epoch": 2.0168704503518056, "grad_norm": 0.7819312214851379, "learning_rate": 2.666893330999035e-06, "loss": 0.6674883961677551, "step": 1555 }, { "epoch": 2.0181681773019444, "grad_norm": 0.7927038669586182, "learning_rate": 2.6605720857155017e-06, "loss": 0.6830568909645081, "step": 1556 }, { "epoch": 2.0194659042520833, "grad_norm": 0.8502561450004578, "learning_rate": 2.654255623969936e-06, "loss": 0.734710156917572, "step": 1557 }, { "epoch": 2.0207636312022226, "grad_norm": 0.7859963178634644, "learning_rate": 2.647943958677897e-06, "loss": 0.7186540365219116, "step": 1558 }, { "epoch": 2.0220613581523614, "grad_norm": 0.7992748618125916, "learning_rate": 2.6416371027451514e-06, "loss": 0.6743420362472534, "step": 1559 }, { "epoch": 2.0233590851025003, "grad_norm": 0.8139959573745728, "learning_rate": 2.635335069067617e-06, "loss": 0.7734099626541138, "step": 1560 }, { "epoch": 2.024656812052639, "grad_norm": 0.7677357196807861, "learning_rate": 2.62903787053136e-06, "loss": 0.7552749514579773, "step": 1561 }, { "epoch": 2.025954539002778, "grad_norm": 0.744732141494751, "learning_rate": 2.6227455200125575e-06, "loss": 0.6963829398155212, "step": 1562 }, { "epoch": 2.027252265952917, "grad_norm": 0.7503330111503601, "learning_rate": 2.6164580303774733e-06, "loss": 0.7289599776268005, "step": 1563 }, { "epoch": 2.0285499929030557, "grad_norm": 0.7900810241699219, "learning_rate": 2.6101754144824327e-06, "loss": 0.6874979138374329, "step": 1564 }, { "epoch": 2.0298477198531946, "grad_norm": 0.7571580410003662, "learning_rate": 2.603897685173794e-06, "loss": 0.7547987103462219, "step": 1565 }, { "epoch": 2.0311454468033334, "grad_norm": 0.7505063414573669, "learning_rate": 2.5976248552879264e-06, "loss": 0.7211095690727234, "step": 1566 }, { "epoch": 2.0324431737534723, "grad_norm": 0.7814156413078308, "learning_rate": 2.5913569376511806e-06, "loss": 0.6726822257041931, "step": 1567 }, { "epoch": 2.033740900703611, "grad_norm": 0.7896116971969604, "learning_rate": 2.5850939450798553e-06, "loss": 0.7849063277244568, "step": 1568 }, { "epoch": 2.0350386276537504, "grad_norm": 0.7852982878684998, "learning_rate": 2.5788358903801926e-06, "loss": 0.7045016884803772, "step": 1569 }, { "epoch": 2.0363363546038893, "grad_norm": 0.726755678653717, "learning_rate": 2.572582786348326e-06, "loss": 0.6949452757835388, "step": 1570 }, { "epoch": 2.037634081554028, "grad_norm": 0.7358665466308594, "learning_rate": 2.566334645770272e-06, "loss": 0.6689255237579346, "step": 1571 }, { "epoch": 2.038931808504167, "grad_norm": 0.7562324404716492, "learning_rate": 2.5600914814218963e-06, "loss": 0.6774943470954895, "step": 1572 }, { "epoch": 2.040229535454306, "grad_norm": 0.7547875642776489, "learning_rate": 2.553853306068888e-06, "loss": 0.7316610217094421, "step": 1573 }, { "epoch": 2.0415272624044447, "grad_norm": 0.7495048642158508, "learning_rate": 2.547620132466743e-06, "loss": 0.6789397597312927, "step": 1574 }, { "epoch": 2.0428249893545836, "grad_norm": 0.7583444714546204, "learning_rate": 2.541391973360717e-06, "loss": 0.6931465864181519, "step": 1575 }, { "epoch": 2.0441227163047224, "grad_norm": 0.7334174513816833, "learning_rate": 2.535168841485821e-06, "loss": 0.7701333165168762, "step": 1576 }, { "epoch": 2.0454204432548613, "grad_norm": 0.7112381458282471, "learning_rate": 2.5289507495667864e-06, "loss": 0.6884704232215881, "step": 1577 }, { "epoch": 2.046718170205, "grad_norm": 0.7634471654891968, "learning_rate": 2.5227377103180353e-06, "loss": 0.6711394786834717, "step": 1578 }, { "epoch": 2.048015897155139, "grad_norm": 0.7438578009605408, "learning_rate": 2.516529736443661e-06, "loss": 0.7194059491157532, "step": 1579 }, { "epoch": 2.0493136241052783, "grad_norm": 0.7477583289146423, "learning_rate": 2.5103268406374002e-06, "loss": 0.6969068050384521, "step": 1580 }, { "epoch": 2.050611351055417, "grad_norm": 0.7318229079246521, "learning_rate": 2.504129035582601e-06, "loss": 0.712550938129425, "step": 1581 }, { "epoch": 2.051909078005556, "grad_norm": 0.7574477195739746, "learning_rate": 2.497936333952212e-06, "loss": 0.7607018351554871, "step": 1582 }, { "epoch": 2.053206804955695, "grad_norm": 0.8140477538108826, "learning_rate": 2.491748748408735e-06, "loss": 0.6738405227661133, "step": 1583 }, { "epoch": 2.0545045319058337, "grad_norm": 0.7556366324424744, "learning_rate": 2.485566291604219e-06, "loss": 0.68988436460495, "step": 1584 }, { "epoch": 2.0558022588559726, "grad_norm": 0.7742814421653748, "learning_rate": 2.4793889761802225e-06, "loss": 0.708181619644165, "step": 1585 }, { "epoch": 2.0570999858061114, "grad_norm": 0.8035808205604553, "learning_rate": 2.4732168147677927e-06, "loss": 0.6929728984832764, "step": 1586 }, { "epoch": 2.0583977127562503, "grad_norm": 0.8000681400299072, "learning_rate": 2.467049819987437e-06, "loss": 0.7274913787841797, "step": 1587 }, { "epoch": 2.059695439706389, "grad_norm": 0.7306155562400818, "learning_rate": 2.460888004449099e-06, "loss": 0.6182838678359985, "step": 1588 }, { "epoch": 2.060993166656528, "grad_norm": 0.7635526061058044, "learning_rate": 2.454731380752132e-06, "loss": 0.7412324547767639, "step": 1589 }, { "epoch": 2.0622908936066673, "grad_norm": 0.7367591261863708, "learning_rate": 2.4485799614852755e-06, "loss": 0.6950958371162415, "step": 1590 }, { "epoch": 2.063588620556806, "grad_norm": 0.7613998055458069, "learning_rate": 2.442433759226619e-06, "loss": 0.7596891522407532, "step": 1591 }, { "epoch": 2.064886347506945, "grad_norm": 0.7433957457542419, "learning_rate": 2.4362927865435975e-06, "loss": 0.7979118227958679, "step": 1592 }, { "epoch": 2.066184074457084, "grad_norm": 0.8046156764030457, "learning_rate": 2.4301570559929405e-06, "loss": 0.773604691028595, "step": 1593 }, { "epoch": 2.0674818014072227, "grad_norm": 0.7190860509872437, "learning_rate": 2.4240265801206665e-06, "loss": 0.6870064735412598, "step": 1594 }, { "epoch": 2.0687795283573616, "grad_norm": 0.7501992583274841, "learning_rate": 2.4179013714620456e-06, "loss": 0.6922681927680969, "step": 1595 }, { "epoch": 2.0700772553075004, "grad_norm": 0.7564000487327576, "learning_rate": 2.4117814425415803e-06, "loss": 0.7309989929199219, "step": 1596 }, { "epoch": 2.0713749822576393, "grad_norm": 0.772293210029602, "learning_rate": 2.4056668058729766e-06, "loss": 0.7206372618675232, "step": 1597 }, { "epoch": 2.072672709207778, "grad_norm": 0.787385880947113, "learning_rate": 2.399557473959119e-06, "loss": 0.7057082653045654, "step": 1598 }, { "epoch": 2.073970436157917, "grad_norm": 0.836436927318573, "learning_rate": 2.3934534592920416e-06, "loss": 0.7267694473266602, "step": 1599 }, { "epoch": 2.075268163108056, "grad_norm": 0.7650021314620972, "learning_rate": 2.3873547743529157e-06, "loss": 0.7323647141456604, "step": 1600 }, { "epoch": 2.076565890058195, "grad_norm": 0.7755290269851685, "learning_rate": 2.3812614316120003e-06, "loss": 0.7333980798721313, "step": 1601 }, { "epoch": 2.077863617008334, "grad_norm": 0.7668667435646057, "learning_rate": 2.375173443528646e-06, "loss": 0.6830537915229797, "step": 1602 }, { "epoch": 2.079161343958473, "grad_norm": 0.8191787600517273, "learning_rate": 2.3690908225512464e-06, "loss": 0.6813746690750122, "step": 1603 }, { "epoch": 2.0804590709086117, "grad_norm": 0.7462801337242126, "learning_rate": 2.363013581117217e-06, "loss": 0.7685893177986145, "step": 1604 }, { "epoch": 2.0817567978587506, "grad_norm": 0.7931850552558899, "learning_rate": 2.356941731652986e-06, "loss": 0.7468531131744385, "step": 1605 }, { "epoch": 2.0830545248088894, "grad_norm": 0.7673047780990601, "learning_rate": 2.3508752865739425e-06, "loss": 0.7278560996055603, "step": 1606 }, { "epoch": 2.0843522517590283, "grad_norm": 0.7797254323959351, "learning_rate": 2.344814258284433e-06, "loss": 0.7439032793045044, "step": 1607 }, { "epoch": 2.085649978709167, "grad_norm": 0.7476287484169006, "learning_rate": 2.3387586591777274e-06, "loss": 0.7101130485534668, "step": 1608 }, { "epoch": 2.086947705659306, "grad_norm": 0.7510504722595215, "learning_rate": 2.3327085016359912e-06, "loss": 0.6794774532318115, "step": 1609 }, { "epoch": 2.088245432609445, "grad_norm": 0.7459760308265686, "learning_rate": 2.3266637980302677e-06, "loss": 0.7223816514015198, "step": 1610 }, { "epoch": 2.088245432609445, "eval_loss": 0.7627704739570618, "eval_runtime": 140.2916, "eval_samples_per_second": 37.009, "eval_steps_per_second": 9.252, "step": 1610 }, { "epoch": 2.089543159559584, "grad_norm": 0.7484630346298218, "learning_rate": 2.320624560720446e-06, "loss": 0.718350350856781, "step": 1611 }, { "epoch": 2.090840886509723, "grad_norm": 0.7986156940460205, "learning_rate": 2.314590802055232e-06, "loss": 0.7352898120880127, "step": 1612 }, { "epoch": 2.092138613459862, "grad_norm": 0.7424448728561401, "learning_rate": 2.308562534372144e-06, "loss": 0.6307387351989746, "step": 1613 }, { "epoch": 2.0934363404100007, "grad_norm": 0.8542348146438599, "learning_rate": 2.3025397699974555e-06, "loss": 0.7578550577163696, "step": 1614 }, { "epoch": 2.0947340673601396, "grad_norm": 0.7494445443153381, "learning_rate": 2.296522521246202e-06, "loss": 0.7531486749649048, "step": 1615 }, { "epoch": 2.0960317943102784, "grad_norm": 0.7711408138275146, "learning_rate": 2.290510800422129e-06, "loss": 0.7104783058166504, "step": 1616 }, { "epoch": 2.0973295212604173, "grad_norm": 0.7710621953010559, "learning_rate": 2.284504619817687e-06, "loss": 0.7426379919052124, "step": 1617 }, { "epoch": 2.098627248210556, "grad_norm": 0.755355715751648, "learning_rate": 2.2785039917139933e-06, "loss": 0.7569924592971802, "step": 1618 }, { "epoch": 2.099924975160695, "grad_norm": 0.8246397972106934, "learning_rate": 2.272508928380815e-06, "loss": 0.6701229810714722, "step": 1619 }, { "epoch": 2.101222702110834, "grad_norm": 0.7797556519508362, "learning_rate": 2.2665194420765386e-06, "loss": 0.8236826658248901, "step": 1620 }, { "epoch": 2.1025204290609727, "grad_norm": 0.7621787190437317, "learning_rate": 2.260535545048149e-06, "loss": 0.6628905534744263, "step": 1621 }, { "epoch": 2.103818156011112, "grad_norm": 0.7577031850814819, "learning_rate": 2.2545572495311966e-06, "loss": 0.6984103322029114, "step": 1622 }, { "epoch": 2.105115882961251, "grad_norm": 0.7571961283683777, "learning_rate": 2.2485845677497897e-06, "loss": 0.7148957848548889, "step": 1623 }, { "epoch": 2.1064136099113897, "grad_norm": 0.7815601825714111, "learning_rate": 2.2426175119165435e-06, "loss": 0.799562394618988, "step": 1624 }, { "epoch": 2.1077113368615286, "grad_norm": 0.8278639316558838, "learning_rate": 2.2366560942325833e-06, "loss": 0.7186278104782104, "step": 1625 }, { "epoch": 2.1090090638116674, "grad_norm": 0.7949169874191284, "learning_rate": 2.230700326887495e-06, "loss": 0.6756667494773865, "step": 1626 }, { "epoch": 2.1103067907618063, "grad_norm": 0.7379303574562073, "learning_rate": 2.2247502220593164e-06, "loss": 0.7049471139907837, "step": 1627 }, { "epoch": 2.111604517711945, "grad_norm": 0.7553898692131042, "learning_rate": 2.218805791914507e-06, "loss": 0.7118646502494812, "step": 1628 }, { "epoch": 2.112902244662084, "grad_norm": 0.7354833483695984, "learning_rate": 2.21286704860792e-06, "loss": 0.6721919775009155, "step": 1629 }, { "epoch": 2.114199971612223, "grad_norm": 0.7323917746543884, "learning_rate": 2.2069340042827846e-06, "loss": 0.6935778260231018, "step": 1630 }, { "epoch": 2.1154976985623617, "grad_norm": 0.7529373168945312, "learning_rate": 2.2010066710706734e-06, "loss": 0.6505716443061829, "step": 1631 }, { "epoch": 2.1167954255125006, "grad_norm": 0.7721433639526367, "learning_rate": 2.1950850610914824e-06, "loss": 0.6882133483886719, "step": 1632 }, { "epoch": 2.11809315246264, "grad_norm": 0.725283145904541, "learning_rate": 2.1891691864534065e-06, "loss": 0.6969849467277527, "step": 1633 }, { "epoch": 2.1193908794127787, "grad_norm": 0.8012808561325073, "learning_rate": 2.1832590592529128e-06, "loss": 0.7592130303382874, "step": 1634 }, { "epoch": 2.1206886063629176, "grad_norm": 0.7490873336791992, "learning_rate": 2.1773546915747103e-06, "loss": 0.6869245767593384, "step": 1635 }, { "epoch": 2.1219863333130564, "grad_norm": 0.7902940511703491, "learning_rate": 2.1714560954917437e-06, "loss": 0.7730790972709656, "step": 1636 }, { "epoch": 2.1232840602631953, "grad_norm": 0.7704285979270935, "learning_rate": 2.165563283065142e-06, "loss": 0.7789285182952881, "step": 1637 }, { "epoch": 2.124581787213334, "grad_norm": 0.783291757106781, "learning_rate": 2.159676266344222e-06, "loss": 0.7162942290306091, "step": 1638 }, { "epoch": 2.125879514163473, "grad_norm": 0.7617123126983643, "learning_rate": 2.1537950573664372e-06, "loss": 0.7395819425582886, "step": 1639 }, { "epoch": 2.127177241113612, "grad_norm": 0.7524591684341431, "learning_rate": 2.1479196681573745e-06, "loss": 0.7014380097389221, "step": 1640 }, { "epoch": 2.1284749680637507, "grad_norm": 0.7793992757797241, "learning_rate": 2.142050110730716e-06, "loss": 0.6641875505447388, "step": 1641 }, { "epoch": 2.1297726950138895, "grad_norm": 0.7960946559906006, "learning_rate": 2.136186397088223e-06, "loss": 0.827523410320282, "step": 1642 }, { "epoch": 2.1310704219640284, "grad_norm": 0.8080329895019531, "learning_rate": 2.1303285392197043e-06, "loss": 0.7528422474861145, "step": 1643 }, { "epoch": 2.1323681489141677, "grad_norm": 0.7847034931182861, "learning_rate": 2.1244765491029985e-06, "loss": 0.6985142827033997, "step": 1644 }, { "epoch": 2.1336658758643066, "grad_norm": 0.7830514907836914, "learning_rate": 2.118630438703939e-06, "loss": 0.7506939768791199, "step": 1645 }, { "epoch": 2.1349636028144454, "grad_norm": 0.771859884262085, "learning_rate": 2.1127902199763496e-06, "loss": 0.7569597363471985, "step": 1646 }, { "epoch": 2.1362613297645843, "grad_norm": 0.7885754108428955, "learning_rate": 2.1069559048619937e-06, "loss": 0.7057400345802307, "step": 1647 }, { "epoch": 2.137559056714723, "grad_norm": 0.7832199931144714, "learning_rate": 2.10112750529057e-06, "loss": 0.7383140921592712, "step": 1648 }, { "epoch": 2.138856783664862, "grad_norm": 0.7768014669418335, "learning_rate": 2.095305033179682e-06, "loss": 0.7292782664299011, "step": 1649 }, { "epoch": 2.140154510615001, "grad_norm": 0.762650191783905, "learning_rate": 2.0894885004348102e-06, "loss": 0.7502353191375732, "step": 1650 }, { "epoch": 2.1414522375651397, "grad_norm": 0.7974070310592651, "learning_rate": 2.0836779189492925e-06, "loss": 0.8040381669998169, "step": 1651 }, { "epoch": 2.1427499645152785, "grad_norm": 0.7788371443748474, "learning_rate": 2.077873300604297e-06, "loss": 0.7352241277694702, "step": 1652 }, { "epoch": 2.1440476914654174, "grad_norm": 0.7705795764923096, "learning_rate": 2.0720746572687995e-06, "loss": 0.696068525314331, "step": 1653 }, { "epoch": 2.1453454184155567, "grad_norm": 0.741032600402832, "learning_rate": 2.0662820007995592e-06, "loss": 0.710914671421051, "step": 1654 }, { "epoch": 2.1466431453656956, "grad_norm": 0.7707566022872925, "learning_rate": 2.060495343041087e-06, "loss": 0.7286970019340515, "step": 1655 }, { "epoch": 2.1479408723158344, "grad_norm": 0.7634013295173645, "learning_rate": 2.0547146958256416e-06, "loss": 0.6444496512413025, "step": 1656 }, { "epoch": 2.1492385992659733, "grad_norm": 0.8233134746551514, "learning_rate": 2.048940070973177e-06, "loss": 0.6969065070152283, "step": 1657 }, { "epoch": 2.150536326216112, "grad_norm": 0.79786217212677, "learning_rate": 2.04317148029134e-06, "loss": 0.6712714433670044, "step": 1658 }, { "epoch": 2.151834053166251, "grad_norm": 0.7395349740982056, "learning_rate": 2.0374089355754434e-06, "loss": 0.7371563911437988, "step": 1659 }, { "epoch": 2.15313178011639, "grad_norm": 0.7514559626579285, "learning_rate": 2.031652448608428e-06, "loss": 0.6969033479690552, "step": 1660 }, { "epoch": 2.1544295070665287, "grad_norm": 0.8158461451530457, "learning_rate": 2.025902031160853e-06, "loss": 0.7191423773765564, "step": 1661 }, { "epoch": 2.1557272340166675, "grad_norm": 0.7599233984947205, "learning_rate": 2.020157694990868e-06, "loss": 0.7718644738197327, "step": 1662 }, { "epoch": 2.1570249609668064, "grad_norm": 0.7463486790657043, "learning_rate": 2.014419451844186e-06, "loss": 0.750013530254364, "step": 1663 }, { "epoch": 2.1583226879169457, "grad_norm": 0.775913417339325, "learning_rate": 2.0086873134540626e-06, "loss": 0.7322590947151184, "step": 1664 }, { "epoch": 2.1596204148670846, "grad_norm": 0.7643508315086365, "learning_rate": 2.002961291541269e-06, "loss": 0.7270373702049255, "step": 1665 }, { "epoch": 2.1609181418172234, "grad_norm": 0.8092241883277893, "learning_rate": 1.997241397814071e-06, "loss": 0.7437032461166382, "step": 1666 }, { "epoch": 2.1622158687673623, "grad_norm": 0.7753113508224487, "learning_rate": 1.9915276439682056e-06, "loss": 0.7822949886322021, "step": 1667 }, { "epoch": 2.163513595717501, "grad_norm": 0.7699429392814636, "learning_rate": 1.985820041686848e-06, "loss": 0.7268073558807373, "step": 1668 }, { "epoch": 2.16481132266764, "grad_norm": 0.8058201670646667, "learning_rate": 1.9801186026406066e-06, "loss": 0.7283480763435364, "step": 1669 }, { "epoch": 2.166109049617779, "grad_norm": 0.7627425193786621, "learning_rate": 1.9744233384874766e-06, "loss": 0.7668908834457397, "step": 1670 }, { "epoch": 2.1674067765679177, "grad_norm": 0.7747920155525208, "learning_rate": 1.968734260872833e-06, "loss": 0.6658475995063782, "step": 1671 }, { "epoch": 2.1687045035180565, "grad_norm": 0.7950249910354614, "learning_rate": 1.9630513814294e-06, "loss": 0.778218150138855, "step": 1672 }, { "epoch": 2.1700022304681954, "grad_norm": 0.77446049451828, "learning_rate": 1.9573747117772272e-06, "loss": 0.7079744935035706, "step": 1673 }, { "epoch": 2.1712999574183343, "grad_norm": 0.7665644884109497, "learning_rate": 1.951704263523668e-06, "loss": 0.7032575607299805, "step": 1674 }, { "epoch": 2.1725976843684736, "grad_norm": 0.7874617576599121, "learning_rate": 1.9460400482633537e-06, "loss": 0.7241728901863098, "step": 1675 }, { "epoch": 2.1738954113186124, "grad_norm": 0.7628116011619568, "learning_rate": 1.9403820775781696e-06, "loss": 0.6686381101608276, "step": 1676 }, { "epoch": 2.1751931382687513, "grad_norm": 0.7952485084533691, "learning_rate": 1.9347303630372373e-06, "loss": 0.7865254282951355, "step": 1677 }, { "epoch": 2.17649086521889, "grad_norm": 0.7570112943649292, "learning_rate": 1.929084916196876e-06, "loss": 0.7014156579971313, "step": 1678 }, { "epoch": 2.177788592169029, "grad_norm": 0.7880426049232483, "learning_rate": 1.923445748600603e-06, "loss": 0.8080270886421204, "step": 1679 }, { "epoch": 2.179086319119168, "grad_norm": 0.7836724519729614, "learning_rate": 1.917812871779084e-06, "loss": 0.7680548429489136, "step": 1680 }, { "epoch": 2.1803840460693067, "grad_norm": 0.7716360092163086, "learning_rate": 1.912186297250128e-06, "loss": 0.7878873348236084, "step": 1681 }, { "epoch": 2.1816817730194455, "grad_norm": 0.7800530195236206, "learning_rate": 1.9065660365186545e-06, "loss": 0.7374768853187561, "step": 1682 }, { "epoch": 2.1829794999695844, "grad_norm": 0.7685821056365967, "learning_rate": 1.9009521010766756e-06, "loss": 0.7049664855003357, "step": 1683 }, { "epoch": 2.1842772269197233, "grad_norm": 0.7467005848884583, "learning_rate": 1.8953445024032679e-06, "loss": 0.7375454902648926, "step": 1684 }, { "epoch": 2.185574953869862, "grad_norm": 0.7665326595306396, "learning_rate": 1.889743251964553e-06, "loss": 0.721852719783783, "step": 1685 }, { "epoch": 2.1868726808200014, "grad_norm": 0.8213875889778137, "learning_rate": 1.8841483612136658e-06, "loss": 0.7483677864074707, "step": 1686 }, { "epoch": 2.1881704077701403, "grad_norm": 0.7615724802017212, "learning_rate": 1.8785598415907464e-06, "loss": 0.685491681098938, "step": 1687 }, { "epoch": 2.189468134720279, "grad_norm": 0.805755078792572, "learning_rate": 1.8729777045229009e-06, "loss": 0.6272298693656921, "step": 1688 }, { "epoch": 2.190765861670418, "grad_norm": 0.7846405506134033, "learning_rate": 1.8674019614241879e-06, "loss": 0.662231981754303, "step": 1689 }, { "epoch": 2.192063588620557, "grad_norm": 0.7738309502601624, "learning_rate": 1.8618326236955908e-06, "loss": 0.6917383074760437, "step": 1690 }, { "epoch": 2.1933613155706957, "grad_norm": 0.7525291442871094, "learning_rate": 1.8562697027249921e-06, "loss": 0.7249594926834106, "step": 1691 }, { "epoch": 2.1946590425208345, "grad_norm": 0.7994334697723389, "learning_rate": 1.8507132098871633e-06, "loss": 0.7203621864318848, "step": 1692 }, { "epoch": 2.1959567694709734, "grad_norm": 0.8307973146438599, "learning_rate": 1.8451631565437211e-06, "loss": 0.7953019738197327, "step": 1693 }, { "epoch": 2.1972544964211123, "grad_norm": 0.718991219997406, "learning_rate": 1.8396195540431205e-06, "loss": 0.7472409605979919, "step": 1694 }, { "epoch": 2.198552223371251, "grad_norm": 0.7688009738922119, "learning_rate": 1.834082413720627e-06, "loss": 0.754511833190918, "step": 1695 }, { "epoch": 2.19984995032139, "grad_norm": 0.8119645118713379, "learning_rate": 1.8285517468982905e-06, "loss": 0.7157659530639648, "step": 1696 }, { "epoch": 2.2011476772715293, "grad_norm": 0.7515578269958496, "learning_rate": 1.8230275648849243e-06, "loss": 0.7136957049369812, "step": 1697 }, { "epoch": 2.202445404221668, "grad_norm": 0.739663302898407, "learning_rate": 1.8175098789760848e-06, "loss": 0.7205515503883362, "step": 1698 }, { "epoch": 2.203743131171807, "grad_norm": 0.7999836802482605, "learning_rate": 1.8119987004540373e-06, "loss": 0.7069056034088135, "step": 1699 }, { "epoch": 2.205040858121946, "grad_norm": 0.7710497379302979, "learning_rate": 1.8064940405877546e-06, "loss": 0.7351633310317993, "step": 1700 }, { "epoch": 2.2063385850720847, "grad_norm": 0.7900813817977905, "learning_rate": 1.8009959106328655e-06, "loss": 0.6964262127876282, "step": 1701 }, { "epoch": 2.2076363120222235, "grad_norm": 0.7656311392784119, "learning_rate": 1.7955043218316615e-06, "loss": 0.7031038999557495, "step": 1702 }, { "epoch": 2.2089340389723624, "grad_norm": 0.779863715171814, "learning_rate": 1.7900192854130465e-06, "loss": 0.6933379769325256, "step": 1703 }, { "epoch": 2.2102317659225013, "grad_norm": 0.7669497132301331, "learning_rate": 1.7845408125925328e-06, "loss": 0.6850363612174988, "step": 1704 }, { "epoch": 2.21152949287264, "grad_norm": 0.7710373401641846, "learning_rate": 1.7790689145722111e-06, "loss": 0.7190161347389221, "step": 1705 }, { "epoch": 2.212827219822779, "grad_norm": 0.730596661567688, "learning_rate": 1.7736036025407282e-06, "loss": 0.6166294813156128, "step": 1706 }, { "epoch": 2.2141249467729183, "grad_norm": 0.7601070404052734, "learning_rate": 1.7681448876732632e-06, "loss": 0.6836649179458618, "step": 1707 }, { "epoch": 2.215422673723057, "grad_norm": 0.7664089202880859, "learning_rate": 1.7626927811315087e-06, "loss": 0.7623034119606018, "step": 1708 }, { "epoch": 2.216720400673196, "grad_norm": 0.8523539304733276, "learning_rate": 1.7572472940636375e-06, "loss": 0.7964061498641968, "step": 1709 }, { "epoch": 2.218018127623335, "grad_norm": 0.7584978342056274, "learning_rate": 1.7518084376042988e-06, "loss": 0.6912890672683716, "step": 1710 }, { "epoch": 2.2193158545734737, "grad_norm": 0.7614145874977112, "learning_rate": 1.7463762228745728e-06, "loss": 0.693087100982666, "step": 1711 }, { "epoch": 2.2206135815236125, "grad_norm": 0.8198820352554321, "learning_rate": 1.7409506609819648e-06, "loss": 0.6574248671531677, "step": 1712 }, { "epoch": 2.2219113084737514, "grad_norm": 0.728805422782898, "learning_rate": 1.735531763020376e-06, "loss": 0.6723518967628479, "step": 1713 }, { "epoch": 2.2232090354238903, "grad_norm": 0.7606318593025208, "learning_rate": 1.7301195400700815e-06, "loss": 0.626229465007782, "step": 1714 }, { "epoch": 2.224506762374029, "grad_norm": 0.7988364100456238, "learning_rate": 1.7247140031977073e-06, "loss": 0.7640826106071472, "step": 1715 }, { "epoch": 2.225804489324168, "grad_norm": 0.7772147059440613, "learning_rate": 1.7193151634562071e-06, "loss": 0.6884752511978149, "step": 1716 }, { "epoch": 2.2271022162743073, "grad_norm": 0.7542872428894043, "learning_rate": 1.7139230318848432e-06, "loss": 0.740616500377655, "step": 1717 }, { "epoch": 2.228399943224446, "grad_norm": 0.7642157077789307, "learning_rate": 1.7085376195091591e-06, "loss": 0.732641339302063, "step": 1718 }, { "epoch": 2.229697670174585, "grad_norm": 0.777519941329956, "learning_rate": 1.7031589373409596e-06, "loss": 0.6960573792457581, "step": 1719 }, { "epoch": 2.230995397124724, "grad_norm": 0.759345531463623, "learning_rate": 1.6977869963782895e-06, "loss": 0.7143913507461548, "step": 1720 }, { "epoch": 2.2322931240748627, "grad_norm": 0.782537043094635, "learning_rate": 1.6924218076054095e-06, "loss": 0.7428768277168274, "step": 1721 }, { "epoch": 2.2335908510250015, "grad_norm": 0.7905192971229553, "learning_rate": 1.6870633819927672e-06, "loss": 0.8565322160720825, "step": 1722 }, { "epoch": 2.2348885779751404, "grad_norm": 0.780165433883667, "learning_rate": 1.6817117304969944e-06, "loss": 0.705779492855072, "step": 1723 }, { "epoch": 2.2361863049252793, "grad_norm": 0.7498677372932434, "learning_rate": 1.676366864060856e-06, "loss": 0.7112017273902893, "step": 1724 }, { "epoch": 2.237484031875418, "grad_norm": 0.7308558821678162, "learning_rate": 1.6710287936132592e-06, "loss": 0.6326340436935425, "step": 1725 }, { "epoch": 2.238781758825557, "grad_norm": 0.7382625341415405, "learning_rate": 1.6656975300692008e-06, "loss": 0.6695979833602905, "step": 1726 }, { "epoch": 2.240079485775696, "grad_norm": 0.7628723382949829, "learning_rate": 1.660373084329767e-06, "loss": 0.770473062992096, "step": 1727 }, { "epoch": 2.241377212725835, "grad_norm": 0.7602196335792542, "learning_rate": 1.6550554672821028e-06, "loss": 0.6869367957115173, "step": 1728 }, { "epoch": 2.242674939675974, "grad_norm": 0.8013946413993835, "learning_rate": 1.6497446897993885e-06, "loss": 0.7376055717468262, "step": 1729 }, { "epoch": 2.243972666626113, "grad_norm": 0.8051080703735352, "learning_rate": 1.6444407627408194e-06, "loss": 0.7137443423271179, "step": 1730 }, { "epoch": 2.2452703935762517, "grad_norm": 0.7435747981071472, "learning_rate": 1.639143696951586e-06, "loss": 0.6921373605728149, "step": 1731 }, { "epoch": 2.2465681205263905, "grad_norm": 0.789109468460083, "learning_rate": 1.6338535032628427e-06, "loss": 0.7224962115287781, "step": 1732 }, { "epoch": 2.2478658474765294, "grad_norm": 0.7830778360366821, "learning_rate": 1.6285701924917025e-06, "loss": 0.7313556671142578, "step": 1733 }, { "epoch": 2.2491635744266683, "grad_norm": 0.7683809995651245, "learning_rate": 1.6232937754411938e-06, "loss": 0.6995819807052612, "step": 1734 }, { "epoch": 2.250461301376807, "grad_norm": 0.719525933265686, "learning_rate": 1.6180242629002558e-06, "loss": 0.6746504902839661, "step": 1735 }, { "epoch": 2.251759028326946, "grad_norm": 0.7692328095436096, "learning_rate": 1.6127616656437078e-06, "loss": 0.7401602268218994, "step": 1736 }, { "epoch": 2.253056755277085, "grad_norm": 0.815162181854248, "learning_rate": 1.6075059944322297e-06, "loss": 0.7262242436408997, "step": 1737 }, { "epoch": 2.2543544822272237, "grad_norm": 0.7180652022361755, "learning_rate": 1.6022572600123382e-06, "loss": 0.6500532031059265, "step": 1738 }, { "epoch": 2.255652209177363, "grad_norm": 0.7820153832435608, "learning_rate": 1.5970154731163667e-06, "loss": 0.7571191787719727, "step": 1739 }, { "epoch": 2.256949936127502, "grad_norm": 0.7669167518615723, "learning_rate": 1.5917806444624434e-06, "loss": 0.7316527962684631, "step": 1740 }, { "epoch": 2.2582476630776407, "grad_norm": 0.7556010484695435, "learning_rate": 1.5865527847544692e-06, "loss": 0.6542820930480957, "step": 1741 }, { "epoch": 2.2595453900277795, "grad_norm": 0.7612343430519104, "learning_rate": 1.581331904682089e-06, "loss": 0.7321107983589172, "step": 1742 }, { "epoch": 2.2608431169779184, "grad_norm": 0.7514542937278748, "learning_rate": 1.576118014920688e-06, "loss": 0.6985179781913757, "step": 1743 }, { "epoch": 2.2621408439280573, "grad_norm": 0.7584021091461182, "learning_rate": 1.5709111261313454e-06, "loss": 0.7909868359565735, "step": 1744 }, { "epoch": 2.263438570878196, "grad_norm": 0.7409632802009583, "learning_rate": 1.5657112489608316e-06, "loss": 0.6589058637619019, "step": 1745 }, { "epoch": 2.264736297828335, "grad_norm": 0.7241658568382263, "learning_rate": 1.5605183940415842e-06, "loss": 0.6412944197654724, "step": 1746 }, { "epoch": 2.266034024778474, "grad_norm": 0.815857470035553, "learning_rate": 1.5553325719916717e-06, "loss": 0.7096395492553711, "step": 1747 }, { "epoch": 2.2673317517286127, "grad_norm": 0.7670204043388367, "learning_rate": 1.5501537934147897e-06, "loss": 0.7624009847640991, "step": 1748 }, { "epoch": 2.2686294786787515, "grad_norm": 0.7562599182128906, "learning_rate": 1.5449820689002298e-06, "loss": 0.7044228911399841, "step": 1749 }, { "epoch": 2.269927205628891, "grad_norm": 0.7411571741104126, "learning_rate": 1.5398174090228595e-06, "loss": 0.6269949078559875, "step": 1750 }, { "epoch": 2.2712249325790297, "grad_norm": 0.7651223540306091, "learning_rate": 1.534659824343101e-06, "loss": 0.6997742056846619, "step": 1751 }, { "epoch": 2.2725226595291685, "grad_norm": 0.7359679937362671, "learning_rate": 1.5295093254069093e-06, "loss": 0.6871427297592163, "step": 1752 }, { "epoch": 2.2738203864793074, "grad_norm": 0.7561101913452148, "learning_rate": 1.524365922745752e-06, "loss": 0.6495131850242615, "step": 1753 }, { "epoch": 2.2751181134294463, "grad_norm": 0.7630066871643066, "learning_rate": 1.519229626876586e-06, "loss": 0.6955438852310181, "step": 1754 }, { "epoch": 2.276415840379585, "grad_norm": 0.747558057308197, "learning_rate": 1.5141004483018323e-06, "loss": 0.6892863512039185, "step": 1755 }, { "epoch": 2.277713567329724, "grad_norm": 0.7459293007850647, "learning_rate": 1.5089783975093698e-06, "loss": 0.6839847564697266, "step": 1756 }, { "epoch": 2.279011294279863, "grad_norm": 0.7482560873031616, "learning_rate": 1.5038634849724898e-06, "loss": 0.670592188835144, "step": 1757 }, { "epoch": 2.2803090212300017, "grad_norm": 0.7818513512611389, "learning_rate": 1.4987557211498966e-06, "loss": 0.6944141387939453, "step": 1758 }, { "epoch": 2.2816067481801405, "grad_norm": 0.8236380815505981, "learning_rate": 1.4936551164856739e-06, "loss": 0.7209656238555908, "step": 1759 }, { "epoch": 2.2829044751302794, "grad_norm": 0.7400614619255066, "learning_rate": 1.4885616814092673e-06, "loss": 0.6475507020950317, "step": 1760 }, { "epoch": 2.2842022020804187, "grad_norm": 0.7747074961662292, "learning_rate": 1.4834754263354628e-06, "loss": 0.7706501483917236, "step": 1761 }, { "epoch": 2.2854999290305575, "grad_norm": 0.7987594604492188, "learning_rate": 1.4783963616643654e-06, "loss": 0.7353149056434631, "step": 1762 }, { "epoch": 2.2867976559806964, "grad_norm": 0.7654763460159302, "learning_rate": 1.4733244977813726e-06, "loss": 0.781434953212738, "step": 1763 }, { "epoch": 2.2880953829308353, "grad_norm": 0.7649980783462524, "learning_rate": 1.468259845057169e-06, "loss": 0.686353325843811, "step": 1764 }, { "epoch": 2.289393109880974, "grad_norm": 0.743064820766449, "learning_rate": 1.4632024138476803e-06, "loss": 0.6991320252418518, "step": 1765 }, { "epoch": 2.290690836831113, "grad_norm": 0.756190836429596, "learning_rate": 1.4581522144940802e-06, "loss": 0.7311216592788696, "step": 1766 }, { "epoch": 2.291988563781252, "grad_norm": 0.7912296056747437, "learning_rate": 1.4531092573227434e-06, "loss": 0.6862029433250427, "step": 1767 }, { "epoch": 2.2932862907313907, "grad_norm": 0.7685181498527527, "learning_rate": 1.4480735526452427e-06, "loss": 0.7343103885650635, "step": 1768 }, { "epoch": 2.2945840176815295, "grad_norm": 0.7459105849266052, "learning_rate": 1.4430451107583187e-06, "loss": 0.6750067472457886, "step": 1769 }, { "epoch": 2.295881744631669, "grad_norm": 0.7575411200523376, "learning_rate": 1.4380239419438636e-06, "loss": 0.7810744643211365, "step": 1770 }, { "epoch": 2.2971794715818072, "grad_norm": 0.7612024545669556, "learning_rate": 1.433010056468896e-06, "loss": 0.7204791903495789, "step": 1771 }, { "epoch": 2.2984771985319465, "grad_norm": 0.7742648720741272, "learning_rate": 1.4280034645855429e-06, "loss": 0.7646610140800476, "step": 1772 }, { "epoch": 2.2997749254820854, "grad_norm": 0.7920587062835693, "learning_rate": 1.4230041765310171e-06, "loss": 0.7437126040458679, "step": 1773 }, { "epoch": 2.3010726524322243, "grad_norm": 0.7467794418334961, "learning_rate": 1.4180122025275972e-06, "loss": 0.6069324016571045, "step": 1774 }, { "epoch": 2.302370379382363, "grad_norm": 0.7249860167503357, "learning_rate": 1.4130275527826077e-06, "loss": 0.7492033243179321, "step": 1775 }, { "epoch": 2.303668106332502, "grad_norm": 0.79679274559021, "learning_rate": 1.4080502374883947e-06, "loss": 0.7478646636009216, "step": 1776 }, { "epoch": 2.304965833282641, "grad_norm": 0.7763088345527649, "learning_rate": 1.4030802668223097e-06, "loss": 0.721184253692627, "step": 1777 }, { "epoch": 2.3062635602327797, "grad_norm": 0.7542839646339417, "learning_rate": 1.398117650946681e-06, "loss": 0.7005340456962585, "step": 1778 }, { "epoch": 2.3075612871829185, "grad_norm": 0.7729418873786926, "learning_rate": 1.3931624000088073e-06, "loss": 0.6777938008308411, "step": 1779 }, { "epoch": 2.3088590141330574, "grad_norm": 0.7986354827880859, "learning_rate": 1.3882145241409184e-06, "loss": 0.7462781071662903, "step": 1780 }, { "epoch": 2.3101567410831967, "grad_norm": 0.7915672063827515, "learning_rate": 1.3832740334601692e-06, "loss": 0.7387913465499878, "step": 1781 }, { "epoch": 2.3114544680333355, "grad_norm": 0.7277045249938965, "learning_rate": 1.3783409380686135e-06, "loss": 0.6593598127365112, "step": 1782 }, { "epoch": 2.3127521949834744, "grad_norm": 0.7720676064491272, "learning_rate": 1.3734152480531821e-06, "loss": 0.7562534213066101, "step": 1783 }, { "epoch": 2.3140499219336133, "grad_norm": 0.7754230499267578, "learning_rate": 1.3684969734856646e-06, "loss": 0.6947664022445679, "step": 1784 }, { "epoch": 2.315347648883752, "grad_norm": 0.7693092226982117, "learning_rate": 1.363586124422689e-06, "loss": 0.7212303876876831, "step": 1785 }, { "epoch": 2.316645375833891, "grad_norm": 0.7735230326652527, "learning_rate": 1.3586827109056944e-06, "loss": 0.6982312798500061, "step": 1786 }, { "epoch": 2.31794310278403, "grad_norm": 0.7643104195594788, "learning_rate": 1.3537867429609263e-06, "loss": 0.678123950958252, "step": 1787 }, { "epoch": 2.3192408297341687, "grad_norm": 0.7314268946647644, "learning_rate": 1.3488982305993942e-06, "loss": 0.6732868552207947, "step": 1788 }, { "epoch": 2.3205385566843075, "grad_norm": 0.7716788649559021, "learning_rate": 1.3440171838168743e-06, "loss": 0.7167251110076904, "step": 1789 }, { "epoch": 2.3218362836344464, "grad_norm": 0.7513408660888672, "learning_rate": 1.3391436125938673e-06, "loss": 0.7298739552497864, "step": 1790 }, { "epoch": 2.3231340105845852, "grad_norm": 0.7510161399841309, "learning_rate": 1.3342775268955943e-06, "loss": 0.7235835194587708, "step": 1791 }, { "epoch": 2.3244317375347245, "grad_norm": 0.8174487948417664, "learning_rate": 1.329418936671969e-06, "loss": 0.7079183459281921, "step": 1792 }, { "epoch": 2.3257294644848634, "grad_norm": 0.7633885741233826, "learning_rate": 1.3245678518575782e-06, "loss": 0.7166120409965515, "step": 1793 }, { "epoch": 2.3270271914350023, "grad_norm": 0.7898662090301514, "learning_rate": 1.319724282371664e-06, "loss": 0.7210479378700256, "step": 1794 }, { "epoch": 2.328324918385141, "grad_norm": 0.7649275064468384, "learning_rate": 1.3148882381181e-06, "loss": 0.6867713928222656, "step": 1795 }, { "epoch": 2.32962264533528, "grad_norm": 0.7802595496177673, "learning_rate": 1.3100597289853689e-06, "loss": 0.7147336006164551, "step": 1796 }, { "epoch": 2.330920372285419, "grad_norm": 0.8007116913795471, "learning_rate": 1.3052387648465559e-06, "loss": 0.778854250907898, "step": 1797 }, { "epoch": 2.3322180992355577, "grad_norm": 0.811072051525116, "learning_rate": 1.3004253555593071e-06, "loss": 0.6877797245979309, "step": 1798 }, { "epoch": 2.3335158261856965, "grad_norm": 0.7577556371688843, "learning_rate": 1.2956195109658287e-06, "loss": 0.6524724364280701, "step": 1799 }, { "epoch": 2.3348135531358354, "grad_norm": 0.778110921382904, "learning_rate": 1.2908212408928561e-06, "loss": 0.8117507696151733, "step": 1800 }, { "epoch": 2.3361112800859742, "grad_norm": 0.7517038583755493, "learning_rate": 1.2860305551516355e-06, "loss": 0.7689738273620605, "step": 1801 }, { "epoch": 2.337409007036113, "grad_norm": 0.7671796679496765, "learning_rate": 1.281247463537912e-06, "loss": 0.6367142200469971, "step": 1802 }, { "epoch": 2.3387067339862524, "grad_norm": 0.8273651003837585, "learning_rate": 1.276471975831891e-06, "loss": 0.7301177978515625, "step": 1803 }, { "epoch": 2.3400044609363913, "grad_norm": 0.8008820414543152, "learning_rate": 1.2717041017982396e-06, "loss": 0.7587131857872009, "step": 1804 }, { "epoch": 2.34130218788653, "grad_norm": 0.7739439010620117, "learning_rate": 1.2669438511860527e-06, "loss": 0.6916797757148743, "step": 1805 }, { "epoch": 2.342599914836669, "grad_norm": 0.7357456088066101, "learning_rate": 1.2621912337288372e-06, "loss": 0.6751843690872192, "step": 1806 }, { "epoch": 2.343897641786808, "grad_norm": 0.7770068049430847, "learning_rate": 1.257446259144494e-06, "loss": 0.666739821434021, "step": 1807 }, { "epoch": 2.3451953687369467, "grad_norm": 0.7363953590393066, "learning_rate": 1.2527089371352968e-06, "loss": 0.6992002129554749, "step": 1808 }, { "epoch": 2.3464930956870855, "grad_norm": 0.759560227394104, "learning_rate": 1.2479792773878647e-06, "loss": 0.7391279339790344, "step": 1809 }, { "epoch": 2.3477908226372244, "grad_norm": 0.8206911087036133, "learning_rate": 1.243257289573161e-06, "loss": 0.7275311946868896, "step": 1810 }, { "epoch": 2.3490885495873632, "grad_norm": 0.7866367697715759, "learning_rate": 1.2385429833464513e-06, "loss": 0.6576771140098572, "step": 1811 }, { "epoch": 2.350386276537502, "grad_norm": 0.7733588218688965, "learning_rate": 1.2338363683472998e-06, "loss": 0.7235656380653381, "step": 1812 }, { "epoch": 2.351684003487641, "grad_norm": 0.7723690867424011, "learning_rate": 1.2291374541995437e-06, "loss": 0.7257228493690491, "step": 1813 }, { "epoch": 2.3529817304377803, "grad_norm": 0.8251050114631653, "learning_rate": 1.224446250511272e-06, "loss": 0.7229258418083191, "step": 1814 }, { "epoch": 2.354279457387919, "grad_norm": 0.7819148898124695, "learning_rate": 1.2197627668748101e-06, "loss": 0.7237058281898499, "step": 1815 }, { "epoch": 2.355577184338058, "grad_norm": 0.782592236995697, "learning_rate": 1.2150870128666959e-06, "loss": 0.6931707262992859, "step": 1816 }, { "epoch": 2.356874911288197, "grad_norm": 0.7515336275100708, "learning_rate": 1.2104189980476627e-06, "loss": 0.7242140173912048, "step": 1817 }, { "epoch": 2.3581726382383357, "grad_norm": 0.7545520067214966, "learning_rate": 1.2057587319626213e-06, "loss": 0.644026517868042, "step": 1818 }, { "epoch": 2.3594703651884745, "grad_norm": 0.7882485389709473, "learning_rate": 1.2011062241406313e-06, "loss": 0.7124985456466675, "step": 1819 }, { "epoch": 2.3607680921386134, "grad_norm": 0.7717565298080444, "learning_rate": 1.1964614840949002e-06, "loss": 0.7361290454864502, "step": 1820 }, { "epoch": 2.3620658190887522, "grad_norm": 0.7597373723983765, "learning_rate": 1.1918245213227408e-06, "loss": 0.702293336391449, "step": 1821 }, { "epoch": 2.363363546038891, "grad_norm": 0.7483997941017151, "learning_rate": 1.1871953453055707e-06, "loss": 0.7523679137229919, "step": 1822 }, { "epoch": 2.3646612729890304, "grad_norm": 0.7145439386367798, "learning_rate": 1.182573965508882e-06, "loss": 0.705971360206604, "step": 1823 }, { "epoch": 2.365958999939169, "grad_norm": 0.7735007405281067, "learning_rate": 1.1779603913822274e-06, "loss": 0.6811049580574036, "step": 1824 }, { "epoch": 2.367256726889308, "grad_norm": 0.7735781073570251, "learning_rate": 1.1733546323591981e-06, "loss": 0.7109265923500061, "step": 1825 }, { "epoch": 2.368554453839447, "grad_norm": 0.7741604447364807, "learning_rate": 1.168756697857406e-06, "loss": 0.7233576774597168, "step": 1826 }, { "epoch": 2.369852180789586, "grad_norm": 0.814807116985321, "learning_rate": 1.1641665972784628e-06, "loss": 0.7955071926116943, "step": 1827 }, { "epoch": 2.3711499077397247, "grad_norm": 0.7785446047782898, "learning_rate": 1.1595843400079636e-06, "loss": 0.695061445236206, "step": 1828 }, { "epoch": 2.3724476346898635, "grad_norm": 0.7668300271034241, "learning_rate": 1.1550099354154615e-06, "loss": 0.7401837110519409, "step": 1829 }, { "epoch": 2.3737453616400024, "grad_norm": 0.7792728543281555, "learning_rate": 1.1504433928544594e-06, "loss": 0.7388436794281006, "step": 1830 }, { "epoch": 2.3750430885901412, "grad_norm": 0.7757245302200317, "learning_rate": 1.1458847216623813e-06, "loss": 0.7079841494560242, "step": 1831 }, { "epoch": 2.37634081554028, "grad_norm": 0.7931995391845703, "learning_rate": 1.141333931160552e-06, "loss": 0.7631800174713135, "step": 1832 }, { "epoch": 2.377638542490419, "grad_norm": 0.8062769770622253, "learning_rate": 1.1367910306541918e-06, "loss": 0.80658358335495, "step": 1833 }, { "epoch": 2.3789362694405582, "grad_norm": 0.781043291091919, "learning_rate": 1.1322560294323775e-06, "loss": 0.7152395248413086, "step": 1834 }, { "epoch": 2.380233996390697, "grad_norm": 0.7616091966629028, "learning_rate": 1.1277289367680411e-06, "loss": 0.7314598560333252, "step": 1835 }, { "epoch": 2.381531723340836, "grad_norm": 0.7945448756217957, "learning_rate": 1.123209761917941e-06, "loss": 0.6685973405838013, "step": 1836 }, { "epoch": 2.382829450290975, "grad_norm": 0.7686976194381714, "learning_rate": 1.1186985141226458e-06, "loss": 0.6801435351371765, "step": 1837 }, { "epoch": 2.3841271772411137, "grad_norm": 0.7716367840766907, "learning_rate": 1.1141952026065156e-06, "loss": 0.746649444103241, "step": 1838 }, { "epoch": 2.3854249041912525, "grad_norm": 0.7480162978172302, "learning_rate": 1.1096998365776828e-06, "loss": 0.7064750790596008, "step": 1839 }, { "epoch": 2.3867226311413914, "grad_norm": 0.7676665782928467, "learning_rate": 1.1052124252280322e-06, "loss": 0.6626559495925903, "step": 1840 }, { "epoch": 2.3867226311413914, "eval_loss": 0.7613735198974609, "eval_runtime": 141.995, "eval_samples_per_second": 36.565, "eval_steps_per_second": 9.141, "step": 1840 }, { "epoch": 2.3880203580915302, "grad_norm": 0.7465634942054749, "learning_rate": 1.1007329777331866e-06, "loss": 0.717859148979187, "step": 1841 }, { "epoch": 2.389318085041669, "grad_norm": 0.7700461149215698, "learning_rate": 1.096261503252478e-06, "loss": 0.653079092502594, "step": 1842 }, { "epoch": 2.390615811991808, "grad_norm": 0.7783061265945435, "learning_rate": 1.0917980109289455e-06, "loss": 0.6487604975700378, "step": 1843 }, { "epoch": 2.391913538941947, "grad_norm": 0.7548145651817322, "learning_rate": 1.0873425098892964e-06, "loss": 0.6680834889411926, "step": 1844 }, { "epoch": 2.393211265892086, "grad_norm": 0.7790629267692566, "learning_rate": 1.082895009243905e-06, "loss": 0.707176923751831, "step": 1845 }, { "epoch": 2.394508992842225, "grad_norm": 0.7966057658195496, "learning_rate": 1.078455518086784e-06, "loss": 0.6602774858474731, "step": 1846 }, { "epoch": 2.395806719792364, "grad_norm": 0.7358562350273132, "learning_rate": 1.0740240454955692e-06, "loss": 0.6799494028091431, "step": 1847 }, { "epoch": 2.3971044467425027, "grad_norm": 0.7790771126747131, "learning_rate": 1.0696006005314996e-06, "loss": 0.7382240891456604, "step": 1848 }, { "epoch": 2.3984021736926415, "grad_norm": 0.7618228793144226, "learning_rate": 1.0651851922394035e-06, "loss": 0.6929839253425598, "step": 1849 }, { "epoch": 2.3996999006427804, "grad_norm": 0.7744076251983643, "learning_rate": 1.0607778296476679e-06, "loss": 0.7444416880607605, "step": 1850 }, { "epoch": 2.4009976275929192, "grad_norm": 0.72736656665802, "learning_rate": 1.05637852176824e-06, "loss": 0.6443988680839539, "step": 1851 }, { "epoch": 2.402295354543058, "grad_norm": 0.7641523480415344, "learning_rate": 1.051987277596585e-06, "loss": 0.7098280191421509, "step": 1852 }, { "epoch": 2.403593081493197, "grad_norm": 0.7642392516136169, "learning_rate": 1.0476041061116915e-06, "loss": 0.732379674911499, "step": 1853 }, { "epoch": 2.404890808443336, "grad_norm": 0.7198097109794617, "learning_rate": 1.0432290162760311e-06, "loss": 0.6814613342285156, "step": 1854 }, { "epoch": 2.4061885353934747, "grad_norm": 0.7901527285575867, "learning_rate": 1.038862017035558e-06, "loss": 0.7214125394821167, "step": 1855 }, { "epoch": 2.407486262343614, "grad_norm": 0.7606606483459473, "learning_rate": 1.0345031173196785e-06, "loss": 0.6583145260810852, "step": 1856 }, { "epoch": 2.408783989293753, "grad_norm": 0.7895342707633972, "learning_rate": 1.0301523260412405e-06, "loss": 0.7350203990936279, "step": 1857 }, { "epoch": 2.4100817162438917, "grad_norm": 0.8400049805641174, "learning_rate": 1.025809652096511e-06, "loss": 0.7966498136520386, "step": 1858 }, { "epoch": 2.4113794431940305, "grad_norm": 0.7552136778831482, "learning_rate": 1.0214751043651582e-06, "loss": 0.6972966194152832, "step": 1859 }, { "epoch": 2.4126771701441694, "grad_norm": 0.7996754050254822, "learning_rate": 1.0171486917102357e-06, "loss": 0.6762542724609375, "step": 1860 }, { "epoch": 2.4139748970943082, "grad_norm": 0.7461639046669006, "learning_rate": 1.0128304229781622e-06, "loss": 0.6995037198066711, "step": 1861 }, { "epoch": 2.415272624044447, "grad_norm": 0.769523024559021, "learning_rate": 1.008520306998706e-06, "loss": 0.6845777034759521, "step": 1862 }, { "epoch": 2.416570350994586, "grad_norm": 0.7502967119216919, "learning_rate": 1.0042183525849586e-06, "loss": 0.6307386159896851, "step": 1863 }, { "epoch": 2.417868077944725, "grad_norm": 0.7713011503219604, "learning_rate": 9.999245685333342e-07, "loss": 0.6450724601745605, "step": 1864 }, { "epoch": 2.4191658048948637, "grad_norm": 0.7563788294792175, "learning_rate": 9.95638963623528e-07, "loss": 0.7016159892082214, "step": 1865 }, { "epoch": 2.4204635318450025, "grad_norm": 0.7966086864471436, "learning_rate": 9.913615466185234e-07, "loss": 0.7306876182556152, "step": 1866 }, { "epoch": 2.421761258795142, "grad_norm": 0.7418493032455444, "learning_rate": 9.870923262645516e-07, "loss": 0.6521520018577576, "step": 1867 }, { "epoch": 2.4230589857452807, "grad_norm": 0.7862077355384827, "learning_rate": 9.828313112910887e-07, "loss": 0.7043705582618713, "step": 1868 }, { "epoch": 2.4243567126954195, "grad_norm": 0.7576665282249451, "learning_rate": 9.78578510410832e-07, "loss": 0.7256739139556885, "step": 1869 }, { "epoch": 2.4256544396455584, "grad_norm": 0.7524176239967346, "learning_rate": 9.743339323196827e-07, "loss": 0.708810031414032, "step": 1870 }, { "epoch": 2.4269521665956972, "grad_norm": 0.7555914521217346, "learning_rate": 9.700975856967287e-07, "loss": 0.6946004033088684, "step": 1871 }, { "epoch": 2.428249893545836, "grad_norm": 0.8047772645950317, "learning_rate": 9.658694792042284e-07, "loss": 0.7178459763526917, "step": 1872 }, { "epoch": 2.429547620495975, "grad_norm": 0.7859281897544861, "learning_rate": 9.616496214875847e-07, "loss": 0.6877878904342651, "step": 1873 }, { "epoch": 2.430845347446114, "grad_norm": 0.7489562034606934, "learning_rate": 9.574380211753442e-07, "loss": 0.6518415212631226, "step": 1874 }, { "epoch": 2.4321430743962527, "grad_norm": 0.7419933676719666, "learning_rate": 9.532346868791587e-07, "loss": 0.7486515641212463, "step": 1875 }, { "epoch": 2.433440801346392, "grad_norm": 0.7418408989906311, "learning_rate": 9.490396271937879e-07, "loss": 0.7523958086967468, "step": 1876 }, { "epoch": 2.4347385282965304, "grad_norm": 0.8075720071792603, "learning_rate": 9.448528506970628e-07, "loss": 0.6815677881240845, "step": 1877 }, { "epoch": 2.4360362552466697, "grad_norm": 0.7398018836975098, "learning_rate": 9.406743659498829e-07, "loss": 0.7089881896972656, "step": 1878 }, { "epoch": 2.4373339821968085, "grad_norm": 0.7852540016174316, "learning_rate": 9.365041814961928e-07, "loss": 0.6689083576202393, "step": 1879 }, { "epoch": 2.4386317091469474, "grad_norm": 0.7648181319236755, "learning_rate": 9.323423058629638e-07, "loss": 0.6946084499359131, "step": 1880 }, { "epoch": 2.4399294360970862, "grad_norm": 0.803335964679718, "learning_rate": 9.281887475601775e-07, "loss": 0.7401805520057678, "step": 1881 }, { "epoch": 2.441227163047225, "grad_norm": 0.7295672297477722, "learning_rate": 9.240435150808113e-07, "loss": 0.7000230550765991, "step": 1882 }, { "epoch": 2.442524889997364, "grad_norm": 0.7747576832771301, "learning_rate": 9.19906616900813e-07, "loss": 0.7612476348876953, "step": 1883 }, { "epoch": 2.443822616947503, "grad_norm": 0.7346387505531311, "learning_rate": 9.157780614790963e-07, "loss": 0.7002447843551636, "step": 1884 }, { "epoch": 2.4451203438976417, "grad_norm": 0.7575351595878601, "learning_rate": 9.116578572575091e-07, "loss": 0.7609678506851196, "step": 1885 }, { "epoch": 2.4464180708477805, "grad_norm": 0.7746056914329529, "learning_rate": 9.075460126608271e-07, "loss": 0.7523704767227173, "step": 1886 }, { "epoch": 2.44771579779792, "grad_norm": 0.745557427406311, "learning_rate": 9.034425360967319e-07, "loss": 0.7555814385414124, "step": 1887 }, { "epoch": 2.4490135247480587, "grad_norm": 0.7867469191551208, "learning_rate": 8.993474359557936e-07, "loss": 0.6881682276725769, "step": 1888 }, { "epoch": 2.4503112516981975, "grad_norm": 0.7527727484703064, "learning_rate": 8.952607206114588e-07, "loss": 0.7243494391441345, "step": 1889 }, { "epoch": 2.4516089786483364, "grad_norm": 0.7513570189476013, "learning_rate": 8.911823984200219e-07, "loss": 0.7017259001731873, "step": 1890 }, { "epoch": 2.4529067055984752, "grad_norm": 0.7603667974472046, "learning_rate": 8.871124777206213e-07, "loss": 0.7706648111343384, "step": 1891 }, { "epoch": 2.454204432548614, "grad_norm": 0.8234115839004517, "learning_rate": 8.83050966835215e-07, "loss": 0.7250049710273743, "step": 1892 }, { "epoch": 2.455502159498753, "grad_norm": 0.787039577960968, "learning_rate": 8.789978740685646e-07, "loss": 0.6726934909820557, "step": 1893 }, { "epoch": 2.456799886448892, "grad_norm": 0.7636567950248718, "learning_rate": 8.749532077082179e-07, "loss": 0.6961559057235718, "step": 1894 }, { "epoch": 2.4580976133990307, "grad_norm": 0.7643488049507141, "learning_rate": 8.709169760244968e-07, "loss": 0.7612512707710266, "step": 1895 }, { "epoch": 2.4593953403491695, "grad_norm": 0.7855246663093567, "learning_rate": 8.668891872704682e-07, "loss": 0.6644111275672913, "step": 1896 }, { "epoch": 2.4606930672993084, "grad_norm": 0.7694689631462097, "learning_rate": 8.628698496819471e-07, "loss": 0.6863073110580444, "step": 1897 }, { "epoch": 2.4619907942494477, "grad_norm": 0.734458327293396, "learning_rate": 8.58858971477457e-07, "loss": 0.777980387210846, "step": 1898 }, { "epoch": 2.4632885211995865, "grad_norm": 0.7516976594924927, "learning_rate": 8.548565608582299e-07, "loss": 0.7173190712928772, "step": 1899 }, { "epoch": 2.4645862481497254, "grad_norm": 0.7432575225830078, "learning_rate": 8.508626260081826e-07, "loss": 0.7015535235404968, "step": 1900 }, { "epoch": 2.4658839750998642, "grad_norm": 0.7617539763450623, "learning_rate": 8.468771750939009e-07, "loss": 0.7171428799629211, "step": 1901 }, { "epoch": 2.467181702050003, "grad_norm": 0.7451692819595337, "learning_rate": 8.429002162646233e-07, "loss": 0.708382785320282, "step": 1902 }, { "epoch": 2.468479429000142, "grad_norm": 0.7860012650489807, "learning_rate": 8.389317576522243e-07, "loss": 0.7629508376121521, "step": 1903 }, { "epoch": 2.469777155950281, "grad_norm": 0.7875171303749084, "learning_rate": 8.349718073711971e-07, "loss": 0.676824688911438, "step": 1904 }, { "epoch": 2.4710748829004197, "grad_norm": 0.7545873522758484, "learning_rate": 8.310203735186384e-07, "loss": 0.8134433627128601, "step": 1905 }, { "epoch": 2.4723726098505585, "grad_norm": 0.7484904527664185, "learning_rate": 8.270774641742275e-07, "loss": 0.7553561925888062, "step": 1906 }, { "epoch": 2.4736703368006974, "grad_norm": 0.7527977228164673, "learning_rate": 8.231430874002206e-07, "loss": 0.7250518798828125, "step": 1907 }, { "epoch": 2.4749680637508362, "grad_norm": 0.7821675539016724, "learning_rate": 8.192172512414187e-07, "loss": 0.699806809425354, "step": 1908 }, { "epoch": 2.4762657907009755, "grad_norm": 0.7531731128692627, "learning_rate": 8.152999637251641e-07, "loss": 0.6669670343399048, "step": 1909 }, { "epoch": 2.4775635176511144, "grad_norm": 0.7388619780540466, "learning_rate": 8.113912328613183e-07, "loss": 0.6381179094314575, "step": 1910 }, { "epoch": 2.4788612446012532, "grad_norm": 0.7556558847427368, "learning_rate": 8.074910666422475e-07, "loss": 0.6770901679992676, "step": 1911 }, { "epoch": 2.480158971551392, "grad_norm": 0.7798584699630737, "learning_rate": 8.035994730428031e-07, "loss": 0.7416208982467651, "step": 1912 }, { "epoch": 2.481456698501531, "grad_norm": 0.7625208497047424, "learning_rate": 7.997164600203111e-07, "loss": 0.6684930920600891, "step": 1913 }, { "epoch": 2.48275442545167, "grad_norm": 0.777184784412384, "learning_rate": 7.958420355145469e-07, "loss": 0.7176003456115723, "step": 1914 }, { "epoch": 2.4840521524018087, "grad_norm": 0.7437546849250793, "learning_rate": 7.919762074477311e-07, "loss": 0.6534523367881775, "step": 1915 }, { "epoch": 2.4853498793519475, "grad_norm": 0.7725997567176819, "learning_rate": 7.881189837245024e-07, "loss": 0.6518686413764954, "step": 1916 }, { "epoch": 2.4866476063020864, "grad_norm": 0.8138085007667542, "learning_rate": 7.842703722319073e-07, "loss": 0.7562498450279236, "step": 1917 }, { "epoch": 2.487945333252225, "grad_norm": 0.7612082958221436, "learning_rate": 7.804303808393831e-07, "loss": 0.6730899810791016, "step": 1918 }, { "epoch": 2.489243060202364, "grad_norm": 0.779849648475647, "learning_rate": 7.76599017398737e-07, "loss": 0.8186163902282715, "step": 1919 }, { "epoch": 2.4905407871525034, "grad_norm": 0.7707155346870422, "learning_rate": 7.727762897441421e-07, "loss": 0.6735822558403015, "step": 1920 }, { "epoch": 2.4918385141026422, "grad_norm": 0.7846003174781799, "learning_rate": 7.689622056921053e-07, "loss": 0.7295418381690979, "step": 1921 }, { "epoch": 2.493136241052781, "grad_norm": 0.7559599876403809, "learning_rate": 7.65156773041465e-07, "loss": 0.7741995453834534, "step": 1922 }, { "epoch": 2.49443396800292, "grad_norm": 0.791363000869751, "learning_rate": 7.613599995733667e-07, "loss": 0.7229835391044617, "step": 1923 }, { "epoch": 2.495731694953059, "grad_norm": 0.7598723769187927, "learning_rate": 7.575718930512516e-07, "loss": 0.6266255974769592, "step": 1924 }, { "epoch": 2.4970294219031977, "grad_norm": 0.7574074864387512, "learning_rate": 7.537924612208391e-07, "loss": 0.7148513793945312, "step": 1925 }, { "epoch": 2.4983271488533365, "grad_norm": 0.75996994972229, "learning_rate": 7.500217118101106e-07, "loss": 0.735736072063446, "step": 1926 }, { "epoch": 2.4996248758034754, "grad_norm": 0.7383106350898743, "learning_rate": 7.462596525292937e-07, "loss": 0.7258022427558899, "step": 1927 }, { "epoch": 2.500922602753614, "grad_norm": 0.7803204655647278, "learning_rate": 7.425062910708492e-07, "loss": 0.6895723342895508, "step": 1928 }, { "epoch": 2.5022203297037535, "grad_norm": 0.7795876264572144, "learning_rate": 7.387616351094473e-07, "loss": 0.6744094491004944, "step": 1929 }, { "epoch": 2.503518056653892, "grad_norm": 0.7904599905014038, "learning_rate": 7.350256923019666e-07, "loss": 0.6453460454940796, "step": 1930 }, { "epoch": 2.5048157836040312, "grad_norm": 0.7582366466522217, "learning_rate": 7.312984702874609e-07, "loss": 0.7346917390823364, "step": 1931 }, { "epoch": 2.50611351055417, "grad_norm": 0.7735522985458374, "learning_rate": 7.275799766871577e-07, "loss": 0.7309703826904297, "step": 1932 }, { "epoch": 2.507411237504309, "grad_norm": 0.8296225070953369, "learning_rate": 7.238702191044344e-07, "loss": 0.7618504166603088, "step": 1933 }, { "epoch": 2.508708964454448, "grad_norm": 0.7544898986816406, "learning_rate": 7.201692051248066e-07, "loss": 0.6686187386512756, "step": 1934 }, { "epoch": 2.5100066914045867, "grad_norm": 0.7645792365074158, "learning_rate": 7.164769423159113e-07, "loss": 0.7318588495254517, "step": 1935 }, { "epoch": 2.5113044183547255, "grad_norm": 0.8033185601234436, "learning_rate": 7.127934382274926e-07, "loss": 0.6869446635246277, "step": 1936 }, { "epoch": 2.5126021453048644, "grad_norm": 0.7405628561973572, "learning_rate": 7.091187003913802e-07, "loss": 0.7319486737251282, "step": 1937 }, { "epoch": 2.513899872255003, "grad_norm": 0.7678588032722473, "learning_rate": 7.054527363214875e-07, "loss": 0.7208606600761414, "step": 1938 }, { "epoch": 2.515197599205142, "grad_norm": 0.7541241645812988, "learning_rate": 7.017955535137788e-07, "loss": 0.710355818271637, "step": 1939 }, { "epoch": 2.5164953261552814, "grad_norm": 0.8112343549728394, "learning_rate": 6.981471594462719e-07, "loss": 0.7606580853462219, "step": 1940 }, { "epoch": 2.51779305310542, "grad_norm": 0.7551878690719604, "learning_rate": 6.945075615790059e-07, "loss": 0.7238883972167969, "step": 1941 }, { "epoch": 2.519090780055559, "grad_norm": 0.7674723863601685, "learning_rate": 6.908767673540384e-07, "loss": 0.7509013414382935, "step": 1942 }, { "epoch": 2.520388507005698, "grad_norm": 0.8111278414726257, "learning_rate": 6.872547841954241e-07, "loss": 0.7284641265869141, "step": 1943 }, { "epoch": 2.521686233955837, "grad_norm": 0.7864218354225159, "learning_rate": 6.836416195092021e-07, "loss": 0.7208187580108643, "step": 1944 }, { "epoch": 2.5229839609059757, "grad_norm": 0.7737844586372375, "learning_rate": 6.800372806833799e-07, "loss": 0.7160454988479614, "step": 1945 }, { "epoch": 2.5242816878561145, "grad_norm": 0.738142192363739, "learning_rate": 6.764417750879182e-07, "loss": 0.7124934196472168, "step": 1946 }, { "epoch": 2.5255794148062534, "grad_norm": 0.7452174425125122, "learning_rate": 6.728551100747155e-07, "loss": 0.7035548686981201, "step": 1947 }, { "epoch": 2.526877141756392, "grad_norm": 0.7617976069450378, "learning_rate": 6.692772929775943e-07, "loss": 0.7573691606521606, "step": 1948 }, { "epoch": 2.528174868706531, "grad_norm": 0.7748024463653564, "learning_rate": 6.657083311122858e-07, "loss": 0.7279133200645447, "step": 1949 }, { "epoch": 2.52947259565667, "grad_norm": 0.7774744033813477, "learning_rate": 6.621482317764105e-07, "loss": 0.6730536818504333, "step": 1950 }, { "epoch": 2.5307703226068092, "grad_norm": 0.7779364585876465, "learning_rate": 6.585970022494748e-07, "loss": 0.8089554309844971, "step": 1951 }, { "epoch": 2.5320680495569476, "grad_norm": 0.7630438804626465, "learning_rate": 6.550546497928401e-07, "loss": 0.6974608302116394, "step": 1952 }, { "epoch": 2.533365776507087, "grad_norm": 0.7758179306983948, "learning_rate": 6.515211816497247e-07, "loss": 0.7012693881988525, "step": 1953 }, { "epoch": 2.534663503457226, "grad_norm": 0.7640806436538696, "learning_rate": 6.479966050451736e-07, "loss": 0.6977714896202087, "step": 1954 }, { "epoch": 2.5359612304073647, "grad_norm": 0.7874693274497986, "learning_rate": 6.444809271860547e-07, "loss": 0.7300205826759338, "step": 1955 }, { "epoch": 2.5372589573575035, "grad_norm": 0.7789673805236816, "learning_rate": 6.409741552610399e-07, "loss": 0.7227955460548401, "step": 1956 }, { "epoch": 2.5385566843076424, "grad_norm": 0.7532787919044495, "learning_rate": 6.374762964405895e-07, "loss": 0.692006528377533, "step": 1957 }, { "epoch": 2.539854411257781, "grad_norm": 0.7724280953407288, "learning_rate": 6.339873578769401e-07, "loss": 0.715645432472229, "step": 1958 }, { "epoch": 2.54115213820792, "grad_norm": 0.7687965631484985, "learning_rate": 6.305073467040884e-07, "loss": 0.6982177495956421, "step": 1959 }, { "epoch": 2.542449865158059, "grad_norm": 0.7735276222229004, "learning_rate": 6.270362700377736e-07, "loss": 0.7986254692077637, "step": 1960 }, { "epoch": 2.543747592108198, "grad_norm": 0.7309845685958862, "learning_rate": 6.235741349754731e-07, "loss": 0.7566351890563965, "step": 1961 }, { "epoch": 2.545045319058337, "grad_norm": 0.7577828764915466, "learning_rate": 6.201209485963744e-07, "loss": 0.7308484315872192, "step": 1962 }, { "epoch": 2.546343046008476, "grad_norm": 0.7720513343811035, "learning_rate": 6.166767179613691e-07, "loss": 0.7158771753311157, "step": 1963 }, { "epoch": 2.547640772958615, "grad_norm": 0.7625830769538879, "learning_rate": 6.132414501130385e-07, "loss": 0.673353910446167, "step": 1964 }, { "epoch": 2.5489384999087537, "grad_norm": 0.754311740398407, "learning_rate": 6.098151520756357e-07, "loss": 0.7290247082710266, "step": 1965 }, { "epoch": 2.5502362268588925, "grad_norm": 0.7994941473007202, "learning_rate": 6.063978308550722e-07, "loss": 0.7406793832778931, "step": 1966 }, { "epoch": 2.5515339538090314, "grad_norm": 0.7528447508811951, "learning_rate": 6.029894934389058e-07, "loss": 0.7099549770355225, "step": 1967 }, { "epoch": 2.55283168075917, "grad_norm": 0.7489729523658752, "learning_rate": 5.995901467963228e-07, "loss": 0.630169689655304, "step": 1968 }, { "epoch": 2.554129407709309, "grad_norm": 0.8017000555992126, "learning_rate": 5.961997978781292e-07, "loss": 0.6770339012145996, "step": 1969 }, { "epoch": 2.555427134659448, "grad_norm": 0.7756592631340027, "learning_rate": 5.928184536167258e-07, "loss": 0.7437784671783447, "step": 1970 }, { "epoch": 2.5567248616095872, "grad_norm": 0.7770244479179382, "learning_rate": 5.89446120926111e-07, "loss": 0.6648685932159424, "step": 1971 }, { "epoch": 2.5580225885597256, "grad_norm": 0.782084584236145, "learning_rate": 5.860828067018481e-07, "loss": 0.7037726640701294, "step": 1972 }, { "epoch": 2.559320315509865, "grad_norm": 0.7928634881973267, "learning_rate": 5.82728517821064e-07, "loss": 0.7296996116638184, "step": 1973 }, { "epoch": 2.560618042460004, "grad_norm": 0.8191131353378296, "learning_rate": 5.793832611424322e-07, "loss": 0.7087576985359192, "step": 1974 }, { "epoch": 2.5619157694101427, "grad_norm": 0.7863832116127014, "learning_rate": 5.760470435061533e-07, "loss": 0.6903418898582458, "step": 1975 }, { "epoch": 2.5632134963602815, "grad_norm": 0.7777239084243774, "learning_rate": 5.727198717339511e-07, "loss": 0.7417172193527222, "step": 1976 }, { "epoch": 2.5645112233104204, "grad_norm": 0.7851623892784119, "learning_rate": 5.694017526290468e-07, "loss": 0.7211293578147888, "step": 1977 }, { "epoch": 2.565808950260559, "grad_norm": 0.7636955380439758, "learning_rate": 5.660926929761556e-07, "loss": 0.702522337436676, "step": 1978 }, { "epoch": 2.567106677210698, "grad_norm": 0.7507690787315369, "learning_rate": 5.627926995414662e-07, "loss": 0.6389472484588623, "step": 1979 }, { "epoch": 2.568404404160837, "grad_norm": 0.7766726016998291, "learning_rate": 5.59501779072631e-07, "loss": 0.7038560509681702, "step": 1980 }, { "epoch": 2.569702131110976, "grad_norm": 0.804925262928009, "learning_rate": 5.562199382987488e-07, "loss": 0.7217026352882385, "step": 1981 }, { "epoch": 2.570999858061115, "grad_norm": 0.731317937374115, "learning_rate": 5.529471839303541e-07, "loss": 0.6586759090423584, "step": 1982 }, { "epoch": 2.5722975850112535, "grad_norm": 0.7591803669929504, "learning_rate": 5.496835226593983e-07, "loss": 0.7452529072761536, "step": 1983 }, { "epoch": 2.573595311961393, "grad_norm": 0.7555832266807556, "learning_rate": 5.464289611592472e-07, "loss": 0.6921640634536743, "step": 1984 }, { "epoch": 2.5748930389115317, "grad_norm": 0.7568356990814209, "learning_rate": 5.431835060846519e-07, "loss": 0.7031154036521912, "step": 1985 }, { "epoch": 2.5761907658616705, "grad_norm": 0.7693030834197998, "learning_rate": 5.399471640717479e-07, "loss": 0.7538740634918213, "step": 1986 }, { "epoch": 2.5774884928118094, "grad_norm": 0.7669311165809631, "learning_rate": 5.367199417380347e-07, "loss": 0.6759027242660522, "step": 1987 }, { "epoch": 2.578786219761948, "grad_norm": 0.8306924104690552, "learning_rate": 5.335018456823665e-07, "loss": 0.7569330930709839, "step": 1988 }, { "epoch": 2.580083946712087, "grad_norm": 0.7967241406440735, "learning_rate": 5.302928824849335e-07, "loss": 0.7751659154891968, "step": 1989 }, { "epoch": 2.581381673662226, "grad_norm": 0.7510127425193787, "learning_rate": 5.270930587072548e-07, "loss": 0.6661146283149719, "step": 1990 }, { "epoch": 2.582679400612365, "grad_norm": 0.7510029673576355, "learning_rate": 5.239023808921595e-07, "loss": 0.6773824095726013, "step": 1991 }, { "epoch": 2.5839771275625036, "grad_norm": 0.7623904347419739, "learning_rate": 5.207208555637767e-07, "loss": 0.7609443664550781, "step": 1992 }, { "epoch": 2.585274854512643, "grad_norm": 0.7627770304679871, "learning_rate": 5.175484892275184e-07, "loss": 0.7122793197631836, "step": 1993 }, { "epoch": 2.5865725814627814, "grad_norm": 0.7585700154304504, "learning_rate": 5.14385288370074e-07, "loss": 0.743561863899231, "step": 1994 }, { "epoch": 2.5878703084129207, "grad_norm": 0.738996684551239, "learning_rate": 5.11231259459386e-07, "loss": 0.6380658745765686, "step": 1995 }, { "epoch": 2.5891680353630595, "grad_norm": 0.7691276669502258, "learning_rate": 5.080864089446464e-07, "loss": 0.6471886038780212, "step": 1996 }, { "epoch": 2.5904657623131984, "grad_norm": 0.7550168037414551, "learning_rate": 5.049507432562778e-07, "loss": 0.673279881477356, "step": 1997 }, { "epoch": 2.591763489263337, "grad_norm": 0.7882893085479736, "learning_rate": 5.018242688059238e-07, "loss": 0.786373496055603, "step": 1998 }, { "epoch": 2.593061216213476, "grad_norm": 0.7830791473388672, "learning_rate": 4.987069919864329e-07, "loss": 0.7720410823822021, "step": 1999 }, { "epoch": 2.594358943163615, "grad_norm": 0.7811652421951294, "learning_rate": 4.95598919171848e-07, "loss": 0.7637435793876648, "step": 2000 }, { "epoch": 2.595656670113754, "grad_norm": 0.771362841129303, "learning_rate": 4.925000567173882e-07, "loss": 0.7330818772315979, "step": 2001 }, { "epoch": 2.5969543970638926, "grad_norm": 0.7689074277877808, "learning_rate": 4.894104109594466e-07, "loss": 0.677016019821167, "step": 2002 }, { "epoch": 2.5982521240140315, "grad_norm": 0.7591285109519958, "learning_rate": 4.863299882155659e-07, "loss": 0.6757215261459351, "step": 2003 }, { "epoch": 2.599549850964171, "grad_norm": 0.768108069896698, "learning_rate": 4.832587947844297e-07, "loss": 0.6731127500534058, "step": 2004 }, { "epoch": 2.600847577914309, "grad_norm": 0.7346110939979553, "learning_rate": 4.801968369458531e-07, "loss": 0.7469443678855896, "step": 2005 }, { "epoch": 2.6021453048644485, "grad_norm": 0.7801742553710938, "learning_rate": 4.771441209607625e-07, "loss": 0.7497217059135437, "step": 2006 }, { "epoch": 2.6034430318145874, "grad_norm": 0.7419130206108093, "learning_rate": 4.7410065307119167e-07, "loss": 0.7378045916557312, "step": 2007 }, { "epoch": 2.604740758764726, "grad_norm": 0.7835217118263245, "learning_rate": 4.7106643950026067e-07, "loss": 0.7161265015602112, "step": 2008 }, { "epoch": 2.606038485714865, "grad_norm": 0.7818168997764587, "learning_rate": 4.6804148645216873e-07, "loss": 0.7108198404312134, "step": 2009 }, { "epoch": 2.607336212665004, "grad_norm": 0.7902902364730835, "learning_rate": 4.6502580011217934e-07, "loss": 0.7278609871864319, "step": 2010 }, { "epoch": 2.608633939615143, "grad_norm": 0.7904089093208313, "learning_rate": 4.6201938664660775e-07, "loss": 0.7432711720466614, "step": 2011 }, { "epoch": 2.6099316665652816, "grad_norm": 0.7738637328147888, "learning_rate": 4.590222522028082e-07, "loss": 0.7238620519638062, "step": 2012 }, { "epoch": 2.6112293935154205, "grad_norm": 0.7708808779716492, "learning_rate": 4.5603440290916347e-07, "loss": 0.7561792135238647, "step": 2013 }, { "epoch": 2.6125271204655593, "grad_norm": 0.7780309915542603, "learning_rate": 4.5305584487506605e-07, "loss": 0.7579943537712097, "step": 2014 }, { "epoch": 2.6138248474156986, "grad_norm": 0.7369161248207092, "learning_rate": 4.500865841909169e-07, "loss": 0.7089678049087524, "step": 2015 }, { "epoch": 2.6151225743658375, "grad_norm": 0.8075423240661621, "learning_rate": 4.471266269280994e-07, "loss": 0.7907909750938416, "step": 2016 }, { "epoch": 2.6164203013159764, "grad_norm": 0.797129213809967, "learning_rate": 4.441759791389799e-07, "loss": 0.7853899598121643, "step": 2017 }, { "epoch": 2.617718028266115, "grad_norm": 0.8074013590812683, "learning_rate": 4.41234646856884e-07, "loss": 0.6692730188369751, "step": 2018 }, { "epoch": 2.619015755216254, "grad_norm": 0.7978529930114746, "learning_rate": 4.383026360960929e-07, "loss": 0.7197666168212891, "step": 2019 }, { "epoch": 2.620313482166393, "grad_norm": 0.7521398067474365, "learning_rate": 4.35379952851826e-07, "loss": 0.6917500495910645, "step": 2020 }, { "epoch": 2.621611209116532, "grad_norm": 0.7865820527076721, "learning_rate": 4.324666031002311e-07, "loss": 0.6865369081497192, "step": 2021 }, { "epoch": 2.6229089360666706, "grad_norm": 0.758794903755188, "learning_rate": 4.29562592798371e-07, "loss": 0.7486015558242798, "step": 2022 }, { "epoch": 2.6242066630168095, "grad_norm": 0.7740626931190491, "learning_rate": 4.266679278842123e-07, "loss": 0.7256302237510681, "step": 2023 }, { "epoch": 2.625504389966949, "grad_norm": 0.7756375074386597, "learning_rate": 4.2378261427660994e-07, "loss": 0.7277344465255737, "step": 2024 }, { "epoch": 2.626802116917087, "grad_norm": 0.7711913585662842, "learning_rate": 4.209066578753035e-07, "loss": 0.683953583240509, "step": 2025 }, { "epoch": 2.6280998438672265, "grad_norm": 0.7805073857307434, "learning_rate": 4.1804006456089174e-07, "loss": 0.6939178109169006, "step": 2026 }, { "epoch": 2.6293975708173654, "grad_norm": 0.7380502223968506, "learning_rate": 4.1518284019483655e-07, "loss": 0.6786249279975891, "step": 2027 }, { "epoch": 2.630695297767504, "grad_norm": 0.7325124740600586, "learning_rate": 4.123349906194357e-07, "loss": 0.6845241785049438, "step": 2028 }, { "epoch": 2.631993024717643, "grad_norm": 0.7694509625434875, "learning_rate": 4.094965216578212e-07, "loss": 0.7099627256393433, "step": 2029 }, { "epoch": 2.633290751667782, "grad_norm": 0.7750458717346191, "learning_rate": 4.066674391139458e-07, "loss": 0.7063329219818115, "step": 2030 }, { "epoch": 2.634588478617921, "grad_norm": 0.772454559803009, "learning_rate": 4.038477487725645e-07, "loss": 0.6330828666687012, "step": 2031 }, { "epoch": 2.6358862055680596, "grad_norm": 0.792510449886322, "learning_rate": 4.0103745639923144e-07, "loss": 0.7432264685630798, "step": 2032 }, { "epoch": 2.6371839325181985, "grad_norm": 0.7578525543212891, "learning_rate": 3.9823656774028386e-07, "loss": 0.6670839786529541, "step": 2033 }, { "epoch": 2.6384816594683373, "grad_norm": 0.7778390049934387, "learning_rate": 3.9544508852282895e-07, "loss": 0.7466758489608765, "step": 2034 }, { "epoch": 2.6397793864184766, "grad_norm": 0.7715905904769897, "learning_rate": 3.9266302445473634e-07, "loss": 0.6792566776275635, "step": 2035 }, { "epoch": 2.641077113368615, "grad_norm": 0.8023421764373779, "learning_rate": 3.89890381224623e-07, "loss": 0.7303230166435242, "step": 2036 }, { "epoch": 2.6423748403187544, "grad_norm": 0.7726515531539917, "learning_rate": 3.8712716450183985e-07, "loss": 0.6872059106826782, "step": 2037 }, { "epoch": 2.643672567268893, "grad_norm": 0.7481380701065063, "learning_rate": 3.8437337993647017e-07, "loss": 0.7442598342895508, "step": 2038 }, { "epoch": 2.644970294219032, "grad_norm": 0.73259437084198, "learning_rate": 3.81629033159302e-07, "loss": 0.6850177049636841, "step": 2039 }, { "epoch": 2.646268021169171, "grad_norm": 0.7581216096878052, "learning_rate": 3.7889412978183324e-07, "loss": 0.7565430998802185, "step": 2040 }, { "epoch": 2.64756574811931, "grad_norm": 0.7624466419219971, "learning_rate": 3.7616867539624733e-07, "loss": 0.681561291217804, "step": 2041 }, { "epoch": 2.6488634750694486, "grad_norm": 0.7603456974029541, "learning_rate": 3.734526755754092e-07, "loss": 0.6617370247840881, "step": 2042 }, { "epoch": 2.6501612020195875, "grad_norm": 0.815494179725647, "learning_rate": 3.707461358728509e-07, "loss": 0.791693925857544, "step": 2043 }, { "epoch": 2.6514589289697263, "grad_norm": 0.784187376499176, "learning_rate": 3.680490618227611e-07, "loss": 0.7595443725585938, "step": 2044 }, { "epoch": 2.652756655919865, "grad_norm": 0.7431678771972656, "learning_rate": 3.6536145893997346e-07, "loss": 0.7058426737785339, "step": 2045 }, { "epoch": 2.6540543828700045, "grad_norm": 0.7702705264091492, "learning_rate": 3.626833327199564e-07, "loss": 0.7345145344734192, "step": 2046 }, { "epoch": 2.655352109820143, "grad_norm": 0.799102783203125, "learning_rate": 3.600146886387984e-07, "loss": 0.7478254437446594, "step": 2047 }, { "epoch": 2.656649836770282, "grad_norm": 0.7557673454284668, "learning_rate": 3.573555321532035e-07, "loss": 0.7010441422462463, "step": 2048 }, { "epoch": 2.657947563720421, "grad_norm": 0.8038543462753296, "learning_rate": 3.547058687004723e-07, "loss": 0.739691972732544, "step": 2049 }, { "epoch": 2.65924529067056, "grad_norm": 0.7559893727302551, "learning_rate": 3.520657036984959e-07, "loss": 0.6943920850753784, "step": 2050 }, { "epoch": 2.660543017620699, "grad_norm": 0.7952106595039368, "learning_rate": 3.494350425457438e-07, "loss": 0.6908611059188843, "step": 2051 }, { "epoch": 2.6618407445708376, "grad_norm": 0.7683407664299011, "learning_rate": 3.46813890621252e-07, "loss": 0.7314857840538025, "step": 2052 }, { "epoch": 2.6631384715209765, "grad_norm": 0.7509656548500061, "learning_rate": 3.4420225328461286e-07, "loss": 0.7266790866851807, "step": 2053 }, { "epoch": 2.6644361984711153, "grad_norm": 0.793124794960022, "learning_rate": 3.416001358759635e-07, "loss": 0.7625051736831665, "step": 2054 }, { "epoch": 2.665733925421254, "grad_norm": 0.7964074611663818, "learning_rate": 3.390075437159762e-07, "loss": 0.7503449320793152, "step": 2055 }, { "epoch": 2.667031652371393, "grad_norm": 0.7679505944252014, "learning_rate": 3.36424482105846e-07, "loss": 0.7547749876976013, "step": 2056 }, { "epoch": 2.6683293793215324, "grad_norm": 0.7538975477218628, "learning_rate": 3.338509563272774e-07, "loss": 0.6920310258865356, "step": 2057 }, { "epoch": 2.6696271062716708, "grad_norm": 0.7652698159217834, "learning_rate": 3.3128697164248213e-07, "loss": 0.7433255314826965, "step": 2058 }, { "epoch": 2.67092483322181, "grad_norm": 0.780931830406189, "learning_rate": 3.2873253329415986e-07, "loss": 0.7661755084991455, "step": 2059 }, { "epoch": 2.672222560171949, "grad_norm": 0.7813259959220886, "learning_rate": 3.2618764650548806e-07, "loss": 0.7323101162910461, "step": 2060 }, { "epoch": 2.673520287122088, "grad_norm": 0.7653396725654602, "learning_rate": 3.236523164801192e-07, "loss": 0.6384355425834656, "step": 2061 }, { "epoch": 2.6748180140722266, "grad_norm": 0.7511436343193054, "learning_rate": 3.2112654840215863e-07, "loss": 0.76376873254776, "step": 2062 }, { "epoch": 2.6761157410223655, "grad_norm": 0.8071112036705017, "learning_rate": 3.186103474361646e-07, "loss": 0.7349339127540588, "step": 2063 }, { "epoch": 2.6774134679725043, "grad_norm": 0.7712031006813049, "learning_rate": 3.161037187271304e-07, "loss": 0.7209377288818359, "step": 2064 }, { "epoch": 2.678711194922643, "grad_norm": 0.7776238918304443, "learning_rate": 3.136066674004773e-07, "loss": 0.7230247855186462, "step": 2065 }, { "epoch": 2.680008921872782, "grad_norm": 0.7257099747657776, "learning_rate": 3.1111919856204373e-07, "loss": 0.6789555549621582, "step": 2066 }, { "epoch": 2.681306648822921, "grad_norm": 0.7619670629501343, "learning_rate": 3.08641317298074e-07, "loss": 0.7058009505271912, "step": 2067 }, { "epoch": 2.68260437577306, "grad_norm": 0.7894755601882935, "learning_rate": 3.0617302867520736e-07, "loss": 0.691407322883606, "step": 2068 }, { "epoch": 2.6839021027231986, "grad_norm": 0.7572270631790161, "learning_rate": 3.0371433774047056e-07, "loss": 0.7379584312438965, "step": 2069 }, { "epoch": 2.685199829673338, "grad_norm": 0.7616877555847168, "learning_rate": 3.0126524952126203e-07, "loss": 0.7380081415176392, "step": 2070 }, { "epoch": 2.685199829673338, "eval_loss": 0.7602905035018921, "eval_runtime": 139.4785, "eval_samples_per_second": 37.224, "eval_steps_per_second": 9.306, "step": 2070 }, { "epoch": 2.686497556623477, "grad_norm": 0.7607947587966919, "learning_rate": 2.988257690253504e-07, "loss": 0.7469013333320618, "step": 2071 }, { "epoch": 2.6877952835736156, "grad_norm": 0.772251546382904, "learning_rate": 2.9639590124085296e-07, "loss": 0.7996762990951538, "step": 2072 }, { "epoch": 2.6890930105237545, "grad_norm": 0.7398597002029419, "learning_rate": 2.939756511362357e-07, "loss": 0.6718044281005859, "step": 2073 }, { "epoch": 2.6903907374738933, "grad_norm": 0.7646867632865906, "learning_rate": 2.915650236602974e-07, "loss": 0.7430347204208374, "step": 2074 }, { "epoch": 2.691688464424032, "grad_norm": 0.7866479158401489, "learning_rate": 2.891640237421611e-07, "loss": 0.7387747168540955, "step": 2075 }, { "epoch": 2.692986191374171, "grad_norm": 0.7951492071151733, "learning_rate": 2.8677265629126373e-07, "loss": 0.7158300280570984, "step": 2076 }, { "epoch": 2.69428391832431, "grad_norm": 0.7993662357330322, "learning_rate": 2.8439092619734655e-07, "loss": 0.7340144515037537, "step": 2077 }, { "epoch": 2.6955816452744488, "grad_norm": 0.7797279953956604, "learning_rate": 2.820188383304451e-07, "loss": 0.6969457864761353, "step": 2078 }, { "epoch": 2.696879372224588, "grad_norm": 0.7600167989730835, "learning_rate": 2.7965639754087893e-07, "loss": 0.7070989608764648, "step": 2079 }, { "epoch": 2.698177099174727, "grad_norm": 0.7879111170768738, "learning_rate": 2.7730360865923954e-07, "loss": 0.7690889239311218, "step": 2080 }, { "epoch": 2.699474826124866, "grad_norm": 0.7314602136611938, "learning_rate": 2.7496047649638757e-07, "loss": 0.6755863428115845, "step": 2081 }, { "epoch": 2.7007725530750046, "grad_norm": 0.780437707901001, "learning_rate": 2.726270058434327e-07, "loss": 0.7809361815452576, "step": 2082 }, { "epoch": 2.7020702800251435, "grad_norm": 0.7712891101837158, "learning_rate": 2.703032014717333e-07, "loss": 0.791184663772583, "step": 2083 }, { "epoch": 2.7033680069752823, "grad_norm": 0.7847064137458801, "learning_rate": 2.6798906813288117e-07, "loss": 0.7188366055488586, "step": 2084 }, { "epoch": 2.704665733925421, "grad_norm": 0.7422674298286438, "learning_rate": 2.656846105586919e-07, "loss": 0.7355191707611084, "step": 2085 }, { "epoch": 2.70596346087556, "grad_norm": 0.7793022394180298, "learning_rate": 2.633898334611995e-07, "loss": 0.7662262916564941, "step": 2086 }, { "epoch": 2.707261187825699, "grad_norm": 0.7930620312690735, "learning_rate": 2.6110474153264176e-07, "loss": 0.707592785358429, "step": 2087 }, { "epoch": 2.708558914775838, "grad_norm": 0.7076685428619385, "learning_rate": 2.588293394454533e-07, "loss": 0.6750243902206421, "step": 2088 }, { "epoch": 2.7098566417259766, "grad_norm": 0.7700866460800171, "learning_rate": 2.565636318522552e-07, "loss": 0.7775879502296448, "step": 2089 }, { "epoch": 2.711154368676116, "grad_norm": 0.7388302087783813, "learning_rate": 2.543076233858466e-07, "loss": 0.692900538444519, "step": 2090 }, { "epoch": 2.7124520956262548, "grad_norm": 0.7761010527610779, "learning_rate": 2.5206131865919303e-07, "loss": 0.7216837406158447, "step": 2091 }, { "epoch": 2.7137498225763936, "grad_norm": 0.7585817575454712, "learning_rate": 2.4982472226542045e-07, "loss": 0.6682706475257874, "step": 2092 }, { "epoch": 2.7150475495265325, "grad_norm": 0.76124107837677, "learning_rate": 2.475978387778e-07, "loss": 0.7049241065979004, "step": 2093 }, { "epoch": 2.7163452764766713, "grad_norm": 0.7612197995185852, "learning_rate": 2.453806727497482e-07, "loss": 0.7023073434829712, "step": 2094 }, { "epoch": 2.71764300342681, "grad_norm": 0.7852765321731567, "learning_rate": 2.431732287148053e-07, "loss": 0.7443613409996033, "step": 2095 }, { "epoch": 2.718940730376949, "grad_norm": 0.7986307144165039, "learning_rate": 2.409755111866369e-07, "loss": 0.7830970287322998, "step": 2096 }, { "epoch": 2.720238457327088, "grad_norm": 0.797486424446106, "learning_rate": 2.387875246590193e-07, "loss": 0.6837291717529297, "step": 2097 }, { "epoch": 2.7215361842772268, "grad_norm": 0.7682679295539856, "learning_rate": 2.3660927360583064e-07, "loss": 0.7355650663375854, "step": 2098 }, { "epoch": 2.722833911227366, "grad_norm": 0.7314669489860535, "learning_rate": 2.3444076248104297e-07, "loss": 0.6166611313819885, "step": 2099 }, { "epoch": 2.7241316381775045, "grad_norm": 0.7601985931396484, "learning_rate": 2.322819957187139e-07, "loss": 0.7629340291023254, "step": 2100 }, { "epoch": 2.7254293651276438, "grad_norm": 0.7485438585281372, "learning_rate": 2.3013297773297306e-07, "loss": 0.6586056351661682, "step": 2101 }, { "epoch": 2.7267270920777826, "grad_norm": 0.8016761541366577, "learning_rate": 2.279937129180204e-07, "loss": 0.7201554179191589, "step": 2102 }, { "epoch": 2.7280248190279215, "grad_norm": 0.7577768564224243, "learning_rate": 2.2586420564810863e-07, "loss": 0.7080650329589844, "step": 2103 }, { "epoch": 2.7293225459780603, "grad_norm": 0.7772164344787598, "learning_rate": 2.2374446027754405e-07, "loss": 0.7283114790916443, "step": 2104 }, { "epoch": 2.730620272928199, "grad_norm": 0.7661550641059875, "learning_rate": 2.2163448114066677e-07, "loss": 0.70591801404953, "step": 2105 }, { "epoch": 2.731917999878338, "grad_norm": 0.7516199946403503, "learning_rate": 2.1953427255185122e-07, "loss": 0.7168128490447998, "step": 2106 }, { "epoch": 2.733215726828477, "grad_norm": 0.7863711714744568, "learning_rate": 2.174438388054928e-07, "loss": 0.7204221487045288, "step": 2107 }, { "epoch": 2.7345134537786158, "grad_norm": 0.7704216837882996, "learning_rate": 2.1536318417599844e-07, "loss": 0.6871287822723389, "step": 2108 }, { "epoch": 2.7358111807287546, "grad_norm": 0.776828944683075, "learning_rate": 2.1329231291778108e-07, "loss": 0.7529214024543762, "step": 2109 }, { "epoch": 2.737108907678894, "grad_norm": 0.7328927516937256, "learning_rate": 2.1123122926524853e-07, "loss": 0.6814893484115601, "step": 2110 }, { "epoch": 2.7384066346290323, "grad_norm": 0.8207976222038269, "learning_rate": 2.0917993743279297e-07, "loss": 0.6837886571884155, "step": 2111 }, { "epoch": 2.7397043615791716, "grad_norm": 0.8150394558906555, "learning_rate": 2.0713844161479035e-07, "loss": 0.722987949848175, "step": 2112 }, { "epoch": 2.7410020885293105, "grad_norm": 0.7454245686531067, "learning_rate": 2.0510674598558045e-07, "loss": 0.6830301880836487, "step": 2113 }, { "epoch": 2.7422998154794493, "grad_norm": 0.757875919342041, "learning_rate": 2.0308485469946736e-07, "loss": 0.7439548373222351, "step": 2114 }, { "epoch": 2.743597542429588, "grad_norm": 0.7572675347328186, "learning_rate": 2.010727718907074e-07, "loss": 0.668762743473053, "step": 2115 }, { "epoch": 2.744895269379727, "grad_norm": 0.7444978356361389, "learning_rate": 1.9907050167349894e-07, "loss": 0.6855237483978271, "step": 2116 }, { "epoch": 2.746192996329866, "grad_norm": 0.7447678446769714, "learning_rate": 1.9707804814198096e-07, "loss": 0.6730313301086426, "step": 2117 }, { "epoch": 2.7474907232800048, "grad_norm": 0.7300037145614624, "learning_rate": 1.9509541537021392e-07, "loss": 0.6463099122047424, "step": 2118 }, { "epoch": 2.7487884502301436, "grad_norm": 0.7642115354537964, "learning_rate": 1.9312260741218114e-07, "loss": 0.6817460656166077, "step": 2119 }, { "epoch": 2.7500861771802825, "grad_norm": 0.7782988548278809, "learning_rate": 1.911596283017747e-07, "loss": 0.7150259613990784, "step": 2120 }, { "epoch": 2.7513839041304218, "grad_norm": 0.7755700945854187, "learning_rate": 1.8920648205279113e-07, "loss": 0.6812229156494141, "step": 2121 }, { "epoch": 2.75268163108056, "grad_norm": 0.7792496681213379, "learning_rate": 1.8726317265891968e-07, "loss": 0.7230182886123657, "step": 2122 }, { "epoch": 2.7539793580306995, "grad_norm": 0.7651738524436951, "learning_rate": 1.8532970409373684e-07, "loss": 0.6976583003997803, "step": 2123 }, { "epoch": 2.7552770849808383, "grad_norm": 0.7811776995658875, "learning_rate": 1.8340608031069462e-07, "loss": 0.7523854970932007, "step": 2124 }, { "epoch": 2.756574811930977, "grad_norm": 0.7669888138771057, "learning_rate": 1.8149230524311944e-07, "loss": 0.7343149781227112, "step": 2125 }, { "epoch": 2.757872538881116, "grad_norm": 0.7220097780227661, "learning_rate": 1.7958838280419387e-07, "loss": 0.6710058450698853, "step": 2126 }, { "epoch": 2.759170265831255, "grad_norm": 0.7744547724723816, "learning_rate": 1.7769431688696048e-07, "loss": 0.6996723413467407, "step": 2127 }, { "epoch": 2.7604679927813938, "grad_norm": 0.7946389317512512, "learning_rate": 1.7581011136430238e-07, "loss": 0.7537875771522522, "step": 2128 }, { "epoch": 2.7617657197315326, "grad_norm": 0.7883028388023376, "learning_rate": 1.739357700889438e-07, "loss": 0.732083261013031, "step": 2129 }, { "epoch": 2.7630634466816715, "grad_norm": 0.8040179014205933, "learning_rate": 1.720712968934385e-07, "loss": 0.8135465383529663, "step": 2130 }, { "epoch": 2.7643611736318103, "grad_norm": 0.7717416286468506, "learning_rate": 1.7021669559016184e-07, "loss": 0.696827232837677, "step": 2131 }, { "epoch": 2.7656589005819496, "grad_norm": 0.7489641308784485, "learning_rate": 1.6837196997130434e-07, "loss": 0.7431339025497437, "step": 2132 }, { "epoch": 2.7669566275320885, "grad_norm": 0.7535181045532227, "learning_rate": 1.6653712380886366e-07, "loss": 0.7000592350959778, "step": 2133 }, { "epoch": 2.7682543544822273, "grad_norm": 0.7728670239448547, "learning_rate": 1.6471216085463372e-07, "loss": 0.6311807036399841, "step": 2134 }, { "epoch": 2.769552081432366, "grad_norm": 0.7892481684684753, "learning_rate": 1.6289708484020395e-07, "loss": 0.7196534276008606, "step": 2135 }, { "epoch": 2.770849808382505, "grad_norm": 0.7621094584465027, "learning_rate": 1.6109189947694448e-07, "loss": 0.6594611406326294, "step": 2136 }, { "epoch": 2.772147535332644, "grad_norm": 0.8210483193397522, "learning_rate": 1.5929660845600215e-07, "loss": 0.736477255821228, "step": 2137 }, { "epoch": 2.7734452622827828, "grad_norm": 0.7322575449943542, "learning_rate": 1.575112154482933e-07, "loss": 0.7200109362602234, "step": 2138 }, { "epoch": 2.7747429892329216, "grad_norm": 0.7946739196777344, "learning_rate": 1.557357241044949e-07, "loss": 0.7807417511940002, "step": 2139 }, { "epoch": 2.7760407161830605, "grad_norm": 0.7464922666549683, "learning_rate": 1.539701380550368e-07, "loss": 0.6590375304222107, "step": 2140 }, { "epoch": 2.7773384431331998, "grad_norm": 0.774113118648529, "learning_rate": 1.5221446091009618e-07, "loss": 0.5982813239097595, "step": 2141 }, { "epoch": 2.778636170083338, "grad_norm": 0.8261796832084656, "learning_rate": 1.504686962595875e-07, "loss": 0.7688677906990051, "step": 2142 }, { "epoch": 2.7799338970334775, "grad_norm": 0.778244137763977, "learning_rate": 1.4873284767315864e-07, "loss": 0.6353880167007446, "step": 2143 }, { "epoch": 2.7812316239836163, "grad_norm": 0.7616711854934692, "learning_rate": 1.4700691870017991e-07, "loss": 0.7116013169288635, "step": 2144 }, { "epoch": 2.782529350933755, "grad_norm": 0.7771366834640503, "learning_rate": 1.4529091286973994e-07, "loss": 0.7575348615646362, "step": 2145 }, { "epoch": 2.783827077883894, "grad_norm": 0.7422629594802856, "learning_rate": 1.435848336906359e-07, "loss": 0.7005946040153503, "step": 2146 }, { "epoch": 2.785124804834033, "grad_norm": 0.748662531375885, "learning_rate": 1.418886846513673e-07, "loss": 0.7412527799606323, "step": 2147 }, { "epoch": 2.7864225317841718, "grad_norm": 0.8038177490234375, "learning_rate": 1.4020246922013093e-07, "loss": 0.6991332769393921, "step": 2148 }, { "epoch": 2.7877202587343106, "grad_norm": 0.7641218900680542, "learning_rate": 1.3852619084480933e-07, "loss": 0.6647689938545227, "step": 2149 }, { "epoch": 2.7890179856844495, "grad_norm": 0.7471143007278442, "learning_rate": 1.3685985295296798e-07, "loss": 0.735883355140686, "step": 2150 }, { "epoch": 2.7903157126345883, "grad_norm": 0.7782725095748901, "learning_rate": 1.3520345895184583e-07, "loss": 0.6804190278053284, "step": 2151 }, { "epoch": 2.7916134395847276, "grad_norm": 0.7638722658157349, "learning_rate": 1.3355701222835026e-07, "loss": 0.6945773363113403, "step": 2152 }, { "epoch": 2.792911166534866, "grad_norm": 0.7732612490653992, "learning_rate": 1.3192051614904722e-07, "loss": 0.6848861575126648, "step": 2153 }, { "epoch": 2.7942088934850053, "grad_norm": 0.7860000133514404, "learning_rate": 1.302939740601572e-07, "loss": 0.7503040432929993, "step": 2154 }, { "epoch": 2.795506620435144, "grad_norm": 0.7470289468765259, "learning_rate": 1.2867738928754703e-07, "loss": 0.687214195728302, "step": 2155 }, { "epoch": 2.796804347385283, "grad_norm": 0.7711262106895447, "learning_rate": 1.2707076513672423e-07, "loss": 0.7178612947463989, "step": 2156 }, { "epoch": 2.798102074335422, "grad_norm": 0.7954484224319458, "learning_rate": 1.2547410489282708e-07, "loss": 0.6872047781944275, "step": 2157 }, { "epoch": 2.7993998012855608, "grad_norm": 0.7886789441108704, "learning_rate": 1.2388741182062348e-07, "loss": 0.6837970614433289, "step": 2158 }, { "epoch": 2.8006975282356996, "grad_norm": 0.7388010621070862, "learning_rate": 1.2231068916449705e-07, "loss": 0.6859837174415588, "step": 2159 }, { "epoch": 2.8019952551858385, "grad_norm": 0.8267860412597656, "learning_rate": 1.2074394014844782e-07, "loss": 0.7664659023284912, "step": 2160 }, { "epoch": 2.8032929821359773, "grad_norm": 0.7906567454338074, "learning_rate": 1.1918716797608087e-07, "loss": 0.7450604438781738, "step": 2161 }, { "epoch": 2.804590709086116, "grad_norm": 0.7570729851722717, "learning_rate": 1.1764037583060162e-07, "loss": 0.7012235522270203, "step": 2162 }, { "epoch": 2.8058884360362555, "grad_norm": 0.7581570744514465, "learning_rate": 1.1610356687480728e-07, "loss": 0.7267595529556274, "step": 2163 }, { "epoch": 2.807186162986394, "grad_norm": 0.7544029951095581, "learning_rate": 1.1457674425108478e-07, "loss": 0.6990587115287781, "step": 2164 }, { "epoch": 2.808483889936533, "grad_norm": 0.7233387231826782, "learning_rate": 1.1305991108139847e-07, "loss": 0.7330483198165894, "step": 2165 }, { "epoch": 2.809781616886672, "grad_norm": 0.7781274914741516, "learning_rate": 1.1155307046728958e-07, "loss": 0.6811683773994446, "step": 2166 }, { "epoch": 2.811079343836811, "grad_norm": 0.776544988155365, "learning_rate": 1.1005622548986406e-07, "loss": 0.7087331414222717, "step": 2167 }, { "epoch": 2.8123770707869498, "grad_norm": 0.7924423217773438, "learning_rate": 1.0856937920979305e-07, "loss": 0.7304611802101135, "step": 2168 }, { "epoch": 2.8136747977370886, "grad_norm": 0.7693164944648743, "learning_rate": 1.0709253466729963e-07, "loss": 0.6941207051277161, "step": 2169 }, { "epoch": 2.8149725246872275, "grad_norm": 0.8000521063804626, "learning_rate": 1.0562569488215712e-07, "loss": 0.7666316628456116, "step": 2170 }, { "epoch": 2.8162702516373663, "grad_norm": 0.8068124055862427, "learning_rate": 1.0416886285368188e-07, "loss": 0.7321376800537109, "step": 2171 }, { "epoch": 2.817567978587505, "grad_norm": 0.7735725045204163, "learning_rate": 1.0272204156072663e-07, "loss": 0.7054159641265869, "step": 2172 }, { "epoch": 2.818865705537644, "grad_norm": 0.7532979249954224, "learning_rate": 1.012852339616749e-07, "loss": 0.6740676760673523, "step": 2173 }, { "epoch": 2.8201634324877833, "grad_norm": 0.7769542932510376, "learning_rate": 9.985844299443437e-08, "loss": 0.692727267742157, "step": 2174 }, { "epoch": 2.8214611594379218, "grad_norm": 0.7732757925987244, "learning_rate": 9.844167157643191e-08, "loss": 0.7220668196678162, "step": 2175 }, { "epoch": 2.822758886388061, "grad_norm": 0.748921275138855, "learning_rate": 9.703492260460578e-08, "loss": 0.7080860733985901, "step": 2176 }, { "epoch": 2.8240566133382, "grad_norm": 0.7941139936447144, "learning_rate": 9.563819895540172e-08, "loss": 0.8114986419677734, "step": 2177 }, { "epoch": 2.8253543402883388, "grad_norm": 0.7770174145698547, "learning_rate": 9.42515034847663e-08, "loss": 0.7355633974075317, "step": 2178 }, { "epoch": 2.8266520672384776, "grad_norm": 0.7767409086227417, "learning_rate": 9.287483902814087e-08, "loss": 0.7234621644020081, "step": 2179 }, { "epoch": 2.8279497941886165, "grad_norm": 0.791036069393158, "learning_rate": 9.150820840045483e-08, "loss": 0.7287334203720093, "step": 2180 }, { "epoch": 2.8292475211387553, "grad_norm": 0.7541413903236389, "learning_rate": 9.015161439612396e-08, "loss": 0.7405381798744202, "step": 2181 }, { "epoch": 2.830545248088894, "grad_norm": 0.7417975068092346, "learning_rate": 8.880505978903719e-08, "loss": 0.7482308149337769, "step": 2182 }, { "epoch": 2.831842975039033, "grad_norm": 0.7411255240440369, "learning_rate": 8.746854733255982e-08, "loss": 0.7017207145690918, "step": 2183 }, { "epoch": 2.833140701989172, "grad_norm": 0.7665935754776001, "learning_rate": 8.614207975952083e-08, "loss": 0.7161105871200562, "step": 2184 }, { "epoch": 2.834438428939311, "grad_norm": 0.7866116762161255, "learning_rate": 8.482565978221002e-08, "loss": 0.7364633679389954, "step": 2185 }, { "epoch": 2.83573615588945, "grad_norm": 0.7433639764785767, "learning_rate": 8.351929009237425e-08, "loss": 0.7004378437995911, "step": 2186 }, { "epoch": 2.837033882839589, "grad_norm": 0.7636539936065674, "learning_rate": 8.222297336120844e-08, "loss": 0.7470002770423889, "step": 2187 }, { "epoch": 2.8383316097897278, "grad_norm": 0.7847253680229187, "learning_rate": 8.093671223935118e-08, "loss": 0.6827197670936584, "step": 2188 }, { "epoch": 2.8396293367398666, "grad_norm": 0.7404457926750183, "learning_rate": 7.966050935688252e-08, "loss": 0.6898380517959595, "step": 2189 }, { "epoch": 2.8409270636900055, "grad_norm": 0.7589480876922607, "learning_rate": 7.839436732331285e-08, "loss": 0.6429364681243896, "step": 2190 }, { "epoch": 2.8422247906401443, "grad_norm": 0.768264889717102, "learning_rate": 7.7138288727584e-08, "loss": 0.7231125831604004, "step": 2191 }, { "epoch": 2.843522517590283, "grad_norm": 0.7590683698654175, "learning_rate": 7.589227613805705e-08, "loss": 0.7650172114372253, "step": 2192 }, { "epoch": 2.844820244540422, "grad_norm": 0.7730223536491394, "learning_rate": 7.465633210251344e-08, "loss": 0.7363346219062805, "step": 2193 }, { "epoch": 2.8461179714905613, "grad_norm": 0.7730546593666077, "learning_rate": 7.343045914814495e-08, "loss": 0.754916787147522, "step": 2194 }, { "epoch": 2.8474156984406997, "grad_norm": 0.7889825105667114, "learning_rate": 7.221465978155262e-08, "loss": 0.6751682758331299, "step": 2195 }, { "epoch": 2.848713425390839, "grad_norm": 0.807558000087738, "learning_rate": 7.10089364887373e-08, "loss": 0.7206065058708191, "step": 2196 }, { "epoch": 2.850011152340978, "grad_norm": 0.8067498207092285, "learning_rate": 6.981329173509909e-08, "loss": 0.7805840969085693, "step": 2197 }, { "epoch": 2.8513088792911168, "grad_norm": 0.7476110458374023, "learning_rate": 6.862772796542794e-08, "loss": 0.7597277760505676, "step": 2198 }, { "epoch": 2.8526066062412556, "grad_norm": 0.8052570819854736, "learning_rate": 6.745224760390246e-08, "loss": 0.7186450958251953, "step": 2199 }, { "epoch": 2.8539043331913945, "grad_norm": 0.7551132440567017, "learning_rate": 6.628685305408166e-08, "loss": 0.6708953976631165, "step": 2200 }, { "epoch": 2.8552020601415333, "grad_norm": 0.7911965250968933, "learning_rate": 6.513154669890221e-08, "loss": 0.6416589617729187, "step": 2201 }, { "epoch": 2.856499787091672, "grad_norm": 0.7725633382797241, "learning_rate": 6.398633090067497e-08, "loss": 0.6717783212661743, "step": 2202 }, { "epoch": 2.857797514041811, "grad_norm": 0.795801043510437, "learning_rate": 6.285120800107402e-08, "loss": 0.6958252787590027, "step": 2203 }, { "epoch": 2.85909524099195, "grad_norm": 0.7685935497283936, "learning_rate": 6.172618032114108e-08, "loss": 0.6823325753211975, "step": 2204 }, { "epoch": 2.860392967942089, "grad_norm": 0.8054656386375427, "learning_rate": 6.061125016127045e-08, "loss": 0.7556108236312866, "step": 2205 }, { "epoch": 2.8616906948922276, "grad_norm": 0.7606475949287415, "learning_rate": 5.950641980121352e-08, "loss": 0.7092191576957703, "step": 2206 }, { "epoch": 2.862988421842367, "grad_norm": 0.7591357231140137, "learning_rate": 5.84116915000682e-08, "loss": 0.7214552164077759, "step": 2207 }, { "epoch": 2.8642861487925058, "grad_norm": 0.74445641040802, "learning_rate": 5.732706749627726e-08, "loss": 0.7109599113464355, "step": 2208 }, { "epoch": 2.8655838757426446, "grad_norm": 0.7537177205085754, "learning_rate": 5.6252550007621645e-08, "loss": 0.7794804573059082, "step": 2209 }, { "epoch": 2.8668816026927835, "grad_norm": 0.7885766625404358, "learning_rate": 5.518814123121885e-08, "loss": 0.7224581837654114, "step": 2210 }, { "epoch": 2.8681793296429223, "grad_norm": 0.770478367805481, "learning_rate": 5.413384334351346e-08, "loss": 0.6955745816230774, "step": 2211 }, { "epoch": 2.869477056593061, "grad_norm": 0.7760496139526367, "learning_rate": 5.308965850027992e-08, "loss": 0.7298511862754822, "step": 2212 }, { "epoch": 2.8707747835432, "grad_norm": 0.8011616468429565, "learning_rate": 5.205558883661033e-08, "loss": 0.7658096551895142, "step": 2213 }, { "epoch": 2.872072510493339, "grad_norm": 0.760274350643158, "learning_rate": 5.103163646691611e-08, "loss": 0.7171127200126648, "step": 2214 }, { "epoch": 2.8733702374434777, "grad_norm": 0.7662195563316345, "learning_rate": 5.00178034849208e-08, "loss": 0.7367060780525208, "step": 2215 }, { "epoch": 2.874667964393617, "grad_norm": 0.7544783353805542, "learning_rate": 4.9014091963655584e-08, "loss": 0.6698785424232483, "step": 2216 }, { "epoch": 2.8759656913437555, "grad_norm": 0.8063926696777344, "learning_rate": 4.802050395545765e-08, "loss": 0.7871376276016235, "step": 2217 }, { "epoch": 2.8772634182938948, "grad_norm": 0.7655091881752014, "learning_rate": 4.703704149196187e-08, "loss": 0.7300557494163513, "step": 2218 }, { "epoch": 2.8785611452440336, "grad_norm": 0.7799139618873596, "learning_rate": 4.6063706584100196e-08, "loss": 0.6737034916877747, "step": 2219 }, { "epoch": 2.8798588721941725, "grad_norm": 0.7586493492126465, "learning_rate": 4.5100501222097304e-08, "loss": 0.825034499168396, "step": 2220 }, { "epoch": 2.8811565991443113, "grad_norm": 0.7479345798492432, "learning_rate": 4.414742737546274e-08, "loss": 0.7313747406005859, "step": 2221 }, { "epoch": 2.88245432609445, "grad_norm": 0.8680388331413269, "learning_rate": 4.320448699299262e-08, "loss": 0.8321331143379211, "step": 2222 }, { "epoch": 2.883752053044589, "grad_norm": 0.7766653895378113, "learning_rate": 4.227168200276077e-08, "loss": 0.6965093016624451, "step": 2223 }, { "epoch": 2.885049779994728, "grad_norm": 0.7875890135765076, "learning_rate": 4.134901431211702e-08, "loss": 0.7395253777503967, "step": 2224 }, { "epoch": 2.8863475069448667, "grad_norm": 0.7455689907073975, "learning_rate": 4.043648580768389e-08, "loss": 0.717605471611023, "step": 2225 }, { "epoch": 2.8876452338950056, "grad_norm": 0.7336239218711853, "learning_rate": 3.953409835535049e-08, "loss": 0.663152813911438, "step": 2226 }, { "epoch": 2.888942960845145, "grad_norm": 0.7704851627349854, "learning_rate": 3.8641853800271414e-08, "loss": 0.6977202892303467, "step": 2227 }, { "epoch": 2.8902406877952833, "grad_norm": 0.7432349324226379, "learning_rate": 3.77597539668606e-08, "loss": 0.6897647976875305, "step": 2228 }, { "epoch": 2.8915384147454226, "grad_norm": 0.7698100209236145, "learning_rate": 3.688780065878916e-08, "loss": 0.6869649291038513, "step": 2229 }, { "epoch": 2.8928361416955615, "grad_norm": 0.7458007335662842, "learning_rate": 3.602599565898091e-08, "loss": 0.6704892516136169, "step": 2230 }, { "epoch": 2.8941338686457003, "grad_norm": 0.7543532252311707, "learning_rate": 3.517434072960901e-08, "loss": 0.7490587830543518, "step": 2231 }, { "epoch": 2.895431595595839, "grad_norm": 0.7413858771324158, "learning_rate": 3.433283761209161e-08, "loss": 0.7437562942504883, "step": 2232 }, { "epoch": 2.896729322545978, "grad_norm": 0.8167586326599121, "learning_rate": 3.3501488027090635e-08, "loss": 0.6863840818405151, "step": 2233 }, { "epoch": 2.898027049496117, "grad_norm": 0.7584203481674194, "learning_rate": 3.268029367450465e-08, "loss": 0.7401635646820068, "step": 2234 }, { "epoch": 2.8993247764462557, "grad_norm": 0.7506893277168274, "learning_rate": 3.186925623346882e-08, "loss": 0.6926859617233276, "step": 2235 }, { "epoch": 2.9006225033963946, "grad_norm": 0.7473459839820862, "learning_rate": 3.10683773623488e-08, "loss": 0.7053976655006409, "step": 2236 }, { "epoch": 2.9019202303465335, "grad_norm": 0.7659155130386353, "learning_rate": 3.0277658698739665e-08, "loss": 0.7118337154388428, "step": 2237 }, { "epoch": 2.9032179572966728, "grad_norm": 0.7984611392021179, "learning_rate": 2.9497101859460865e-08, "loss": 0.6703416109085083, "step": 2238 }, { "epoch": 2.904515684246811, "grad_norm": 0.7309507727622986, "learning_rate": 2.872670844055403e-08, "loss": 0.7074156999588013, "step": 2239 }, { "epoch": 2.9058134111969505, "grad_norm": 0.770394504070282, "learning_rate": 2.7966480017277974e-08, "loss": 0.6736186146736145, "step": 2240 }, { "epoch": 2.9071111381470893, "grad_norm": 0.7793900370597839, "learning_rate": 2.7216418144107583e-08, "loss": 0.7187858819961548, "step": 2241 }, { "epoch": 2.908408865097228, "grad_norm": 0.7602792382240295, "learning_rate": 2.6476524354729917e-08, "loss": 0.6807897686958313, "step": 2242 }, { "epoch": 2.909706592047367, "grad_norm": 0.771725594997406, "learning_rate": 2.5746800162040342e-08, "loss": 0.7109719514846802, "step": 2243 }, { "epoch": 2.911004318997506, "grad_norm": 0.7642435431480408, "learning_rate": 2.5027247058139748e-08, "loss": 0.7177394032478333, "step": 2244 }, { "epoch": 2.9123020459476447, "grad_norm": 0.7589951753616333, "learning_rate": 2.4317866514332322e-08, "loss": 0.6757906675338745, "step": 2245 }, { "epoch": 2.9135997728977836, "grad_norm": 0.7996797561645508, "learning_rate": 2.361865998112223e-08, "loss": 0.7429489493370056, "step": 2246 }, { "epoch": 2.9148974998479225, "grad_norm": 0.760643482208252, "learning_rate": 2.2929628888209156e-08, "loss": 0.7641419172286987, "step": 2247 }, { "epoch": 2.9161952267980613, "grad_norm": 0.7626227736473083, "learning_rate": 2.2250774644487215e-08, "loss": 0.7242019176483154, "step": 2248 }, { "epoch": 2.9174929537482006, "grad_norm": 0.7831689715385437, "learning_rate": 2.158209863804217e-08, "loss": 0.7359440326690674, "step": 2249 }, { "epoch": 2.9187906806983395, "grad_norm": 0.7844473719596863, "learning_rate": 2.0923602236146977e-08, "loss": 0.7149016261100769, "step": 2250 }, { "epoch": 2.9200884076484783, "grad_norm": 0.7542704939842224, "learning_rate": 2.0275286785260694e-08, "loss": 0.7106453776359558, "step": 2251 }, { "epoch": 2.921386134598617, "grad_norm": 0.7972451448440552, "learning_rate": 1.9637153611022365e-08, "loss": 0.7038116455078125, "step": 2252 }, { "epoch": 2.922683861548756, "grad_norm": 0.7626628279685974, "learning_rate": 1.9009204018255456e-08, "loss": 0.6864380240440369, "step": 2253 }, { "epoch": 2.923981588498895, "grad_norm": 0.7927265763282776, "learning_rate": 1.839143929095566e-08, "loss": 0.6850247979164124, "step": 2254 }, { "epoch": 2.9252793154490337, "grad_norm": 0.736191987991333, "learning_rate": 1.7783860692296982e-08, "loss": 0.7299352884292603, "step": 2255 }, { "epoch": 2.9265770423991726, "grad_norm": 0.7745212316513062, "learning_rate": 1.718646946462288e-08, "loss": 0.7246094942092896, "step": 2256 }, { "epoch": 2.9278747693493115, "grad_norm": 0.8178421854972839, "learning_rate": 1.6599266829447902e-08, "loss": 0.7505685091018677, "step": 2257 }, { "epoch": 2.9291724962994508, "grad_norm": 0.7724591493606567, "learning_rate": 1.6022253987452717e-08, "loss": 0.6581344604492188, "step": 2258 }, { "epoch": 2.930470223249589, "grad_norm": 0.780160129070282, "learning_rate": 1.5455432118481884e-08, "loss": 0.7304115295410156, "step": 2259 }, { "epoch": 2.9317679501997285, "grad_norm": 0.752224862575531, "learning_rate": 1.4898802381543842e-08, "loss": 0.7192541360855103, "step": 2260 }, { "epoch": 2.9330656771498673, "grad_norm": 0.789572536945343, "learning_rate": 1.4352365914804822e-08, "loss": 0.7106218338012695, "step": 2261 }, { "epoch": 2.934363404100006, "grad_norm": 0.781013011932373, "learning_rate": 1.3816123835588835e-08, "loss": 0.7548207640647888, "step": 2262 }, { "epoch": 2.935661131050145, "grad_norm": 0.8188384175300598, "learning_rate": 1.3290077240375453e-08, "loss": 0.715192437171936, "step": 2263 }, { "epoch": 2.936958858000284, "grad_norm": 0.7365001440048218, "learning_rate": 1.277422720479704e-08, "loss": 0.7308194041252136, "step": 2264 }, { "epoch": 2.9382565849504227, "grad_norm": 0.7814714908599854, "learning_rate": 1.2268574783635968e-08, "loss": 0.8294280171394348, "step": 2265 }, { "epoch": 2.9395543119005616, "grad_norm": 0.7368665337562561, "learning_rate": 1.1773121010824063e-08, "loss": 0.7195689082145691, "step": 2266 }, { "epoch": 2.9408520388507005, "grad_norm": 0.7893995642662048, "learning_rate": 1.1287866899438171e-08, "loss": 0.7487694621086121, "step": 2267 }, { "epoch": 2.9421497658008393, "grad_norm": 0.737452507019043, "learning_rate": 1.081281344170071e-08, "loss": 0.6512093544006348, "step": 2268 }, { "epoch": 2.9434474927509786, "grad_norm": 0.8004632592201233, "learning_rate": 1.0347961608975221e-08, "loss": 0.7287921905517578, "step": 2269 }, { "epoch": 2.944745219701117, "grad_norm": 0.7655004262924194, "learning_rate": 9.893312351766382e-09, "loss": 0.7819275259971619, "step": 2270 }, { "epoch": 2.9460429466512563, "grad_norm": 0.7713659405708313, "learning_rate": 9.448866599717221e-09, "loss": 0.6854069232940674, "step": 2271 }, { "epoch": 2.947340673601395, "grad_norm": 0.746204137802124, "learning_rate": 9.014625261605791e-09, "loss": 0.7135393023490906, "step": 2272 }, { "epoch": 2.948638400551534, "grad_norm": 0.7569589614868164, "learning_rate": 8.590589225346834e-09, "loss": 0.7075472474098206, "step": 2273 }, { "epoch": 2.949936127501673, "grad_norm": 0.7612638473510742, "learning_rate": 8.17675935798623e-09, "loss": 0.7831802368164062, "step": 2274 }, { "epoch": 2.9512338544518117, "grad_norm": 0.7586123943328857, "learning_rate": 7.773136505700995e-09, "loss": 0.6527408361434937, "step": 2275 }, { "epoch": 2.9525315814019506, "grad_norm": 0.7621296048164368, "learning_rate": 7.379721493798176e-09, "loss": 0.7169144153594971, "step": 2276 }, { "epoch": 2.9538293083520895, "grad_norm": 0.7727404236793518, "learning_rate": 6.996515126711511e-09, "loss": 0.6721253991127014, "step": 2277 }, { "epoch": 2.9551270353022283, "grad_norm": 0.8339347243309021, "learning_rate": 6.623518188001443e-09, "loss": 0.7189649939537048, "step": 2278 }, { "epoch": 2.956424762252367, "grad_norm": 0.8069379925727844, "learning_rate": 6.260731440351775e-09, "loss": 0.7410060167312622, "step": 2279 }, { "epoch": 2.9577224892025065, "grad_norm": 0.7937390804290771, "learning_rate": 5.908155625570233e-09, "loss": 0.7048535943031311, "step": 2280 }, { "epoch": 2.959020216152645, "grad_norm": 0.72493577003479, "learning_rate": 5.56579146458458e-09, "loss": 0.723403811454773, "step": 2281 }, { "epoch": 2.960317943102784, "grad_norm": 0.7555978298187256, "learning_rate": 5.233639657443168e-09, "loss": 0.739270806312561, "step": 2282 }, { "epoch": 2.961615670052923, "grad_norm": 0.7471567392349243, "learning_rate": 4.911700883312165e-09, "loss": 0.6872268319129944, "step": 2283 }, { "epoch": 2.962913397003062, "grad_norm": 0.7928717732429504, "learning_rate": 4.599975800475553e-09, "loss": 0.7023618817329407, "step": 2284 }, { "epoch": 2.9642111239532007, "grad_norm": 0.733587384223938, "learning_rate": 4.298465046331246e-09, "loss": 0.715094268321991, "step": 2285 }, { "epoch": 2.9655088509033396, "grad_norm": 0.7716489434242249, "learning_rate": 4.007169237392749e-09, "loss": 0.651224672794342, "step": 2286 }, { "epoch": 2.9668065778534785, "grad_norm": 0.7328708171844482, "learning_rate": 3.726088969286945e-09, "loss": 0.7175962328910828, "step": 2287 }, { "epoch": 2.9681043048036173, "grad_norm": 0.7712464332580566, "learning_rate": 3.4552248167507576e-09, "loss": 0.7173080444335938, "step": 2288 }, { "epoch": 2.969402031753756, "grad_norm": 0.7157249450683594, "learning_rate": 3.1945773336333754e-09, "loss": 0.6980288028717041, "step": 2289 }, { "epoch": 2.970699758703895, "grad_norm": 0.7634878158569336, "learning_rate": 2.9441470528929206e-09, "loss": 0.7472588419914246, "step": 2290 }, { "epoch": 2.9719974856540343, "grad_norm": 0.8366067409515381, "learning_rate": 2.703934486595894e-09, "loss": 0.7743273377418518, "step": 2291 }, { "epoch": 2.9732952126041727, "grad_norm": 0.7398233413696289, "learning_rate": 2.4739401259160635e-09, "loss": 0.7106281518936157, "step": 2292 }, { "epoch": 2.974592939554312, "grad_norm": 0.8272362351417542, "learning_rate": 2.2541644411344653e-09, "loss": 0.7096326351165771, "step": 2293 }, { "epoch": 2.975890666504451, "grad_norm": 0.7860119938850403, "learning_rate": 2.0446078816355186e-09, "loss": 0.6308723092079163, "step": 2294 }, { "epoch": 2.9771883934545897, "grad_norm": 0.734978199005127, "learning_rate": 1.8452708759097993e-09, "loss": 0.7198217511177063, "step": 2295 }, { "epoch": 2.9784861204047286, "grad_norm": 0.7446017265319824, "learning_rate": 1.656153831551821e-09, "loss": 0.7086501717567444, "step": 2296 }, { "epoch": 2.9797838473548675, "grad_norm": 0.7879230380058289, "learning_rate": 1.4772571352567044e-09, "loss": 0.7350752353668213, "step": 2297 }, { "epoch": 2.9810815743050063, "grad_norm": 0.8034906983375549, "learning_rate": 1.3085811528240622e-09, "loss": 0.7501051425933838, "step": 2298 }, { "epoch": 2.982379301255145, "grad_norm": 0.8161899447441101, "learning_rate": 1.1501262291530034e-09, "loss": 0.7630376219749451, "step": 2299 }, { "epoch": 2.983677028205284, "grad_norm": 0.7429082989692688, "learning_rate": 1.0018926882443548e-09, "loss": 0.747724175453186, "step": 2300 }, { "epoch": 2.983677028205284, "eval_loss": 0.7602248191833496, "eval_runtime": 139.9566, "eval_samples_per_second": 37.097, "eval_steps_per_second": 9.274, "step": 2300 }, { "epoch": 2.984974755155423, "grad_norm": 0.7769709229469299, "learning_rate": 8.638808331973281e-10, "loss": 0.7226245403289795, "step": 2301 }, { "epoch": 2.986272482105562, "grad_norm": 0.7434839010238647, "learning_rate": 7.360909462111876e-10, "loss": 0.7368016242980957, "step": 2302 }, { "epoch": 2.987570209055701, "grad_norm": 0.7686272263526917, "learning_rate": 6.185232885846937e-10, "loss": 0.7323869466781616, "step": 2303 }, { "epoch": 2.98886793600584, "grad_norm": 0.7547258138656616, "learning_rate": 5.111781007138827e-10, "loss": 0.7003946900367737, "step": 2304 }, { "epoch": 2.9901656629559787, "grad_norm": 0.7800252437591553, "learning_rate": 4.1405560209206716e-10, "loss": 0.7231513261795044, "step": 2305 }, { "epoch": 2.9914633899061176, "grad_norm": 0.7863386869430542, "learning_rate": 3.2715599131039053e-10, "loss": 0.7120152115821838, "step": 2306 }, { "epoch": 2.9927611168562565, "grad_norm": 0.8008667230606079, "learning_rate": 2.5047944605616215e-10, "loss": 0.7078871726989746, "step": 2307 }, { "epoch": 2.9940588438063953, "grad_norm": 0.7796193361282349, "learning_rate": 1.840261231139673e-10, "loss": 0.6753217577934265, "step": 2308 }, { "epoch": 2.995356570756534, "grad_norm": 0.7583023905754089, "learning_rate": 1.2779615836455706e-10, "loss": 0.7384961247444153, "step": 2309 }, { "epoch": 2.996654297706673, "grad_norm": 0.7805288434028625, "learning_rate": 8.17896667826279e-11, "loss": 0.7757540941238403, "step": 2310 }, { "epoch": 2.9979520246568123, "grad_norm": 0.7969951033592224, "learning_rate": 4.600674244070735e-11, "loss": 0.6859312653541565, "step": 2311 }, { "epoch": 2.9992497516069507, "grad_norm": 0.8133782148361206, "learning_rate": 2.04474585052683e-11, "loss": 0.6985263824462891, "step": 2312 }, { "epoch": 3.0, "grad_norm": 1.0103119611740112, "learning_rate": 5.11186723950452e-12, "loss": 0.8292257785797119, "step": 2313 }, { "epoch": 3.0, "step": 2313, "total_flos": 4.1917370482093916e+18, "train_loss": 0.07527349776537244, "train_runtime": 3319.5738, "train_samples_per_second": 89.138, "train_steps_per_second": 0.697 } ], "logging_steps": 1.0, "max_steps": 2313, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 230, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.1917370482093916e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }