{ "best_global_step": 26600, "best_metric": 0.33519282937049866, "best_model_checkpoint": "/home/u111169/wrkdir/mgh/aav/checkpoints/esm-2_8m-kidney_aav2_final_0_2_valid/checkpoint-26600", "epoch": 178.5252525252525, "eval_steps": 100, "global_step": 26600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06734006734006734, "grad_norm": 781.5267333984375, "learning_rate": 6e-07, "loss": 63.2428, "step": 10 }, { "epoch": 0.13468013468013468, "grad_norm": 654.272705078125, "learning_rate": 1.6e-06, "loss": 56.7204, "step": 20 }, { "epoch": 0.20202020202020202, "grad_norm": 1275.9813232421875, "learning_rate": 1.999942908565361e-06, "loss": 45.3909, "step": 30 }, { "epoch": 0.26936026936026936, "grad_norm": 760.6285400390625, "learning_rate": 1.999486216200688e-06, "loss": 28.4574, "step": 40 }, { "epoch": 0.3367003367003367, "grad_norm": 271.4268493652344, "learning_rate": 1.9985730400511657e-06, "loss": 30.0345, "step": 50 }, { "epoch": 0.40404040404040403, "grad_norm": 292.2894287109375, "learning_rate": 1.9972037971811797e-06, "loss": 22.7876, "step": 60 }, { "epoch": 0.4713804713804714, "grad_norm": 368.51910400390625, "learning_rate": 1.9953791129491983e-06, "loss": 20.1692, "step": 70 }, { "epoch": 0.5387205387205387, "grad_norm": 385.5082092285156, "learning_rate": 1.9930998207221547e-06, "loss": 13.6386, "step": 80 }, { "epoch": 0.6060606060606061, "grad_norm": 129.90162658691406, "learning_rate": 1.990366961494838e-06, "loss": 7.9841, "step": 90 }, { "epoch": 0.6734006734006734, "grad_norm": 55.724754333496094, "learning_rate": 1.98718178341445e-06, "loss": 6.0292, "step": 100 }, { "epoch": 0.6734006734006734, "eval_loss": 0.6912877559661865, "eval_mae": 0.5396032929420471, "eval_mse": 0.6912876963615417, "eval_r2": -0.5120720863342285, "eval_rmse": 0.8314371271248006, "eval_runtime": 10.7794, "eval_samples_per_second": 440.564, "eval_steps_per_second": 13.823, "step": 100 }, { "epoch": 0.7407407407407407, "grad_norm": 103.99578094482422, "learning_rate": 1.983545741210553e-06, "loss": 5.788, "step": 110 }, { "epoch": 0.8080808080808081, "grad_norm": 47.12553405761719, "learning_rate": 1.979460495530667e-06, "loss": 4.2852, "step": 120 }, { "epoch": 0.8754208754208754, "grad_norm": 52.737491607666016, "learning_rate": 1.9749279121818236e-06, "loss": 3.9181, "step": 130 }, { "epoch": 0.9427609427609428, "grad_norm": 55.41446304321289, "learning_rate": 1.969950061278417e-06, "loss": 4.6145, "step": 140 }, { "epoch": 1.0067340067340067, "grad_norm": 33.14408874511719, "learning_rate": 1.9645292162967424e-06, "loss": 3.7359, "step": 150 }, { "epoch": 1.074074074074074, "grad_norm": 18.13965606689453, "learning_rate": 1.9586678530366606e-06, "loss": 4.2128, "step": 160 }, { "epoch": 1.1414141414141414, "grad_norm": 16.053260803222656, "learning_rate": 1.952368648490852e-06, "loss": 4.7942, "step": 170 }, { "epoch": 1.2087542087542087, "grad_norm": 57.42170333862305, "learning_rate": 1.9456344796221867e-06, "loss": 4.5706, "step": 180 }, { "epoch": 1.2760942760942762, "grad_norm": 58.637107849121094, "learning_rate": 1.9384684220497604e-06, "loss": 3.9771, "step": 190 }, { "epoch": 1.3434343434343434, "grad_norm": 60.6893310546875, "learning_rate": 1.930873748644204e-06, "loss": 3.6548, "step": 200 }, { "epoch": 1.3434343434343434, "eval_loss": 0.505974531173706, "eval_mae": 0.4725041687488556, "eval_mse": 0.505974531173706, "eval_r2": -0.10673165321350098, "eval_rmse": 0.7113188674383002, "eval_runtime": 10.7764, "eval_samples_per_second": 440.686, "eval_steps_per_second": 13.827, "step": 200 }, { "epoch": 1.410774410774411, "grad_norm": 33.26737976074219, "learning_rate": 1.922853928032904e-06, "loss": 3.4553, "step": 210 }, { "epoch": 1.4781144781144782, "grad_norm": 65.54931640625, "learning_rate": 1.9144126230158124e-06, "loss": 4.2887, "step": 220 }, { "epoch": 1.5454545454545454, "grad_norm": 31.48789405822754, "learning_rate": 1.9055536888925842e-06, "loss": 3.7633, "step": 230 }, { "epoch": 1.612794612794613, "grad_norm": 28.179698944091797, "learning_rate": 1.8962811717017866e-06, "loss": 3.8827, "step": 240 }, { "epoch": 1.6801346801346801, "grad_norm": 8.18234920501709, "learning_rate": 1.8865993063730002e-06, "loss": 3.9573, "step": 250 }, { "epoch": 1.7474747474747474, "grad_norm": 23.074295043945312, "learning_rate": 1.8765125147926475e-06, "loss": 4.0115, "step": 260 }, { "epoch": 1.8148148148148149, "grad_norm": 15.709874153137207, "learning_rate": 1.8660254037844386e-06, "loss": 3.8035, "step": 270 }, { "epoch": 1.8821548821548821, "grad_norm": 54.63130569458008, "learning_rate": 1.8551427630053463e-06, "loss": 3.9396, "step": 280 }, { "epoch": 1.9494949494949494, "grad_norm": 30.35171890258789, "learning_rate": 1.8438695627580832e-06, "loss": 3.2816, "step": 290 }, { "epoch": 2.0134680134680134, "grad_norm": 32.97279357910156, "learning_rate": 1.8322109517210737e-06, "loss": 3.2172, "step": 300 }, { "epoch": 2.0134680134680134, "eval_loss": 0.46103307604789734, "eval_mae": 0.4514959454536438, "eval_mse": 0.46103307604789734, "eval_r2": -0.008430004119873047, "eval_rmse": 0.6789941649586522, "eval_runtime": 10.7626, "eval_samples_per_second": 441.249, "eval_steps_per_second": 13.844, "step": 300 }, { "epoch": 2.080808080808081, "grad_norm": 32.494178771972656, "learning_rate": 1.8201722545969557e-06, "loss": 3.4614, "step": 310 }, { "epoch": 2.148148148148148, "grad_norm": 14.545453071594238, "learning_rate": 1.8077589696806924e-06, "loss": 3.0583, "step": 320 }, { "epoch": 2.2154882154882154, "grad_norm": 68.84071350097656, "learning_rate": 1.7949767663483977e-06, "loss": 3.7176, "step": 330 }, { "epoch": 2.282828282828283, "grad_norm": 48.53615951538086, "learning_rate": 1.7818314824680298e-06, "loss": 3.9939, "step": 340 }, { "epoch": 2.3501683501683504, "grad_norm": 35.58967208862305, "learning_rate": 1.7683291217331277e-06, "loss": 3.8148, "step": 350 }, { "epoch": 2.4175084175084174, "grad_norm": 15.298365592956543, "learning_rate": 1.7544758509208143e-06, "loss": 3.4919, "step": 360 }, { "epoch": 2.484848484848485, "grad_norm": 9.229326248168945, "learning_rate": 1.7402779970753154e-06, "loss": 3.3932, "step": 370 }, { "epoch": 2.5521885521885523, "grad_norm": 30.755229949951172, "learning_rate": 1.725742044618282e-06, "loss": 3.3415, "step": 380 }, { "epoch": 2.6195286195286194, "grad_norm": 21.976978302001953, "learning_rate": 1.710874632387235e-06, "loss": 3.6011, "step": 390 }, { "epoch": 2.686868686868687, "grad_norm": 13.080004692077637, "learning_rate": 1.6956825506034863e-06, "loss": 3.6762, "step": 400 }, { "epoch": 2.686868686868687, "eval_loss": 0.4433795213699341, "eval_mae": 0.44258227944374084, "eval_mse": 0.4433795213699341, "eval_r2": 0.03018409013748169, "eval_rmse": 0.6658674953546945, "eval_runtime": 10.7828, "eval_samples_per_second": 440.425, "eval_steps_per_second": 13.818, "step": 400 }, { "epoch": 2.7542087542087543, "grad_norm": 15.315389633178711, "learning_rate": 1.6801727377709191e-06, "loss": 3.5066, "step": 410 }, { "epoch": 2.821548821548822, "grad_norm": 16.320594787597656, "learning_rate": 1.6643522775070459e-06, "loss": 3.6671, "step": 420 }, { "epoch": 2.888888888888889, "grad_norm": 18.260791778564453, "learning_rate": 1.6482283953077884e-06, "loss": 3.4548, "step": 430 }, { "epoch": 2.9562289562289563, "grad_norm": 31.220006942749023, "learning_rate": 1.6318084552474614e-06, "loss": 3.9447, "step": 440 }, { "epoch": 3.0202020202020203, "grad_norm": 19.56720733642578, "learning_rate": 1.615099956615464e-06, "loss": 2.9038, "step": 450 }, { "epoch": 3.0875420875420874, "grad_norm": 16.310138702392578, "learning_rate": 1.5981105304912159e-06, "loss": 3.5311, "step": 460 }, { "epoch": 3.154882154882155, "grad_norm": 44.523536682128906, "learning_rate": 1.5808479362589026e-06, "loss": 3.3869, "step": 470 }, { "epoch": 3.2222222222222223, "grad_norm": 22.731264114379883, "learning_rate": 1.563320058063622e-06, "loss": 3.6333, "step": 480 }, { "epoch": 3.28956228956229, "grad_norm": 15.506272315979004, "learning_rate": 1.5455349012105486e-06, "loss": 3.2642, "step": 490 }, { "epoch": 3.356902356902357, "grad_norm": 16.94630241394043, "learning_rate": 1.5275005885087646e-06, "loss": 3.4016, "step": 500 }, { "epoch": 3.356902356902357, "eval_loss": 0.42926138639450073, "eval_mae": 0.43924492597579956, "eval_mse": 0.42926138639450073, "eval_r2": 0.061065077781677246, "eval_rmse": 0.6551804227802451, "eval_runtime": 10.7747, "eval_samples_per_second": 440.757, "eval_steps_per_second": 13.829, "step": 500 }, { "epoch": 3.4242424242424243, "grad_norm": 16.46173095703125, "learning_rate": 1.5092253565614233e-06, "loss": 3.3105, "step": 510 }, { "epoch": 3.4915824915824913, "grad_norm": 40.1240119934082, "learning_rate": 1.490717552003938e-06, "loss": 2.9986, "step": 520 }, { "epoch": 3.558922558922559, "grad_norm": 25.568714141845703, "learning_rate": 1.471985627691923e-06, "loss": 3.3074, "step": 530 }, { "epoch": 3.6262626262626263, "grad_norm": 32.04632568359375, "learning_rate": 1.453038138840617e-06, "loss": 3.6, "step": 540 }, { "epoch": 3.6936026936026938, "grad_norm": 11.525985717773438, "learning_rate": 1.433883739117558e-06, "loss": 3.3307, "step": 550 }, { "epoch": 3.760942760942761, "grad_norm": 52.01261901855469, "learning_rate": 1.4145311766902954e-06, "loss": 3.4964, "step": 560 }, { "epoch": 3.8282828282828283, "grad_norm": 32.366905212402344, "learning_rate": 1.3949892902309385e-06, "loss": 3.7336, "step": 570 }, { "epoch": 3.8956228956228958, "grad_norm": 15.266845703125, "learning_rate": 1.3752670048793743e-06, "loss": 3.049, "step": 580 }, { "epoch": 3.962962962962963, "grad_norm": 33.21766662597656, "learning_rate": 1.3553733281669887e-06, "loss": 3.7293, "step": 590 }, { "epoch": 4.026936026936027, "grad_norm": 16.13041877746582, "learning_rate": 1.3353173459027644e-06, "loss": 3.2354, "step": 600 }, { "epoch": 4.026936026936027, "eval_loss": 0.4244127571582794, "eval_mae": 0.4413226544857025, "eval_mse": 0.4244127571582794, "eval_r2": 0.07167059183120728, "eval_rmse": 0.6514696901301544, "eval_runtime": 10.7803, "eval_samples_per_second": 440.526, "eval_steps_per_second": 13.822, "step": 600 }, { "epoch": 4.094276094276094, "grad_norm": 54.682308197021484, "learning_rate": 1.3151082180236209e-06, "loss": 3.5774, "step": 610 }, { "epoch": 4.161616161616162, "grad_norm": 13.16175651550293, "learning_rate": 1.2947551744109043e-06, "loss": 3.343, "step": 620 }, { "epoch": 4.228956228956229, "grad_norm": 25.15774154663086, "learning_rate": 1.2742675106749304e-06, "loss": 3.4303, "step": 630 }, { "epoch": 4.296296296296296, "grad_norm": 51.24179458618164, "learning_rate": 1.2536545839095072e-06, "loss": 3.3451, "step": 640 }, { "epoch": 4.363636363636363, "grad_norm": 12.802881240844727, "learning_rate": 1.2329258084183787e-06, "loss": 3.5693, "step": 650 }, { "epoch": 4.430976430976431, "grad_norm": 43.8185920715332, "learning_rate": 1.2120906514155369e-06, "loss": 3.0625, "step": 660 }, { "epoch": 4.498316498316498, "grad_norm": 26.531442642211914, "learning_rate": 1.1911586287013725e-06, "loss": 3.2202, "step": 670 }, { "epoch": 4.565656565656566, "grad_norm": 20.062641143798828, "learning_rate": 1.1701393003166326e-06, "loss": 3.1884, "step": 680 }, { "epoch": 4.632996632996633, "grad_norm": 16.506580352783203, "learning_rate": 1.1490422661761743e-06, "loss": 3.5252, "step": 690 }, { "epoch": 4.700336700336701, "grad_norm": 27.591516494750977, "learning_rate": 1.127877161684506e-06, "loss": 3.2335, "step": 700 }, { "epoch": 4.700336700336701, "eval_loss": 0.4179418683052063, "eval_mae": 0.43748921155929565, "eval_mse": 0.4179418683052063, "eval_r2": 0.08582454919815063, "eval_rmse": 0.6464842367028034, "eval_runtime": 10.747, "eval_samples_per_second": 441.89, "eval_steps_per_second": 13.864, "step": 700 }, { "epoch": 4.767676767676767, "grad_norm": 22.789531707763672, "learning_rate": 1.1066536533351202e-06, "loss": 3.4288, "step": 710 }, { "epoch": 4.835016835016835, "grad_norm": 29.838911056518555, "learning_rate": 1.0853814342956285e-06, "loss": 3.1809, "step": 720 }, { "epoch": 4.902356902356902, "grad_norm": 15.134218215942383, "learning_rate": 1.064070219980713e-06, "loss": 3.4145, "step": 730 }, { "epoch": 4.96969696969697, "grad_norm": 35.06580352783203, "learning_rate": 1.0427297436149167e-06, "loss": 2.9387, "step": 740 }, { "epoch": 5.033670033670034, "grad_norm": 22.692121505737305, "learning_rate": 1.0213697517873014e-06, "loss": 3.3395, "step": 750 }, { "epoch": 5.101010101010101, "grad_norm": 23.79498291015625, "learning_rate": 1e-06, "loss": 3.2956, "step": 760 }, { "epoch": 5.168350168350169, "grad_norm": 22.78725814819336, "learning_rate": 9.786302482126985e-07, "loss": 3.3209, "step": 770 }, { "epoch": 5.235690235690235, "grad_norm": 54.459102630615234, "learning_rate": 9.572702563850832e-07, "loss": 2.8611, "step": 780 }, { "epoch": 5.303030303030303, "grad_norm": 22.259056091308594, "learning_rate": 9.359297800192871e-07, "loss": 3.6389, "step": 790 }, { "epoch": 5.37037037037037, "grad_norm": 41.00143814086914, "learning_rate": 9.146185657043713e-07, "loss": 3.2095, "step": 800 }, { "epoch": 5.37037037037037, "eval_loss": 0.4157916307449341, "eval_mae": 0.43682366609573364, "eval_mse": 0.4157916307449341, "eval_r2": 0.09052777290344238, "eval_rmse": 0.6448190682237415, "eval_runtime": 10.7477, "eval_samples_per_second": 441.862, "eval_steps_per_second": 13.863, "step": 800 }, { "epoch": 5.437710437710438, "grad_norm": 24.487964630126953, "learning_rate": 8.933463466648798e-07, "loss": 3.6041, "step": 810 }, { "epoch": 5.505050505050505, "grad_norm": 40.73241424560547, "learning_rate": 8.721228383154939e-07, "loss": 3.1173, "step": 820 }, { "epoch": 5.572390572390573, "grad_norm": 25.84406852722168, "learning_rate": 8.509577338238254e-07, "loss": 3.2959, "step": 830 }, { "epoch": 5.63973063973064, "grad_norm": 17.02696418762207, "learning_rate": 8.298606996833675e-07, "loss": 3.2622, "step": 840 }, { "epoch": 5.707070707070707, "grad_norm": 45.567039489746094, "learning_rate": 8.088413712986279e-07, "loss": 3.0759, "step": 850 }, { "epoch": 5.774410774410774, "grad_norm": 57.41228103637695, "learning_rate": 7.879093485844635e-07, "loss": 3.4163, "step": 860 }, { "epoch": 5.841750841750842, "grad_norm": 56.78010940551758, "learning_rate": 7.670741915816215e-07, "loss": 3.2955, "step": 870 }, { "epoch": 5.909090909090909, "grad_norm": 63.26323699951172, "learning_rate": 7.463454160904927e-07, "loss": 3.2357, "step": 880 }, { "epoch": 5.976430976430977, "grad_norm": 52.910831451416016, "learning_rate": 7.257324893250698e-07, "loss": 3.1736, "step": 890 }, { "epoch": 6.040404040404041, "grad_norm": 88.97228240966797, "learning_rate": 7.052448255890957e-07, "loss": 3.2247, "step": 900 }, { "epoch": 6.040404040404041, "eval_loss": 0.41349777579307556, "eval_mae": 0.43941694498062134, "eval_mse": 0.41349777579307556, "eval_r2": 0.09554523229598999, "eval_rmse": 0.6430379271808744, "eval_runtime": 10.7327, "eval_samples_per_second": 442.478, "eval_steps_per_second": 13.883, "step": 900 }, { "epoch": 6.107744107744108, "grad_norm": 30.775344848632812, "learning_rate": 6.848917819763793e-07, "loss": 3.3701, "step": 910 }, { "epoch": 6.175084175084175, "grad_norm": 38.826358795166016, "learning_rate": 6.646826540972357e-07, "loss": 2.998, "step": 920 }, { "epoch": 6.242424242424242, "grad_norm": 43.822120666503906, "learning_rate": 6.446266718330112e-07, "loss": 2.8953, "step": 930 }, { "epoch": 6.30976430976431, "grad_norm": 47.06931686401367, "learning_rate": 6.247329951206259e-07, "loss": 3.5021, "step": 940 }, { "epoch": 6.377104377104377, "grad_norm": 20.513471603393555, "learning_rate": 6.050107097690615e-07, "loss": 3.3767, "step": 950 }, { "epoch": 6.444444444444445, "grad_norm": 20.662376403808594, "learning_rate": 5.854688233097045e-07, "loss": 3.0044, "step": 960 }, { "epoch": 6.511784511784512, "grad_norm": 63.73493194580078, "learning_rate": 5.661162608824419e-07, "loss": 2.9996, "step": 970 }, { "epoch": 6.57912457912458, "grad_norm": 68.89252471923828, "learning_rate": 5.46961861159383e-07, "loss": 3.6277, "step": 980 }, { "epoch": 6.646464646464646, "grad_norm": 43.62266540527344, "learning_rate": 5.28014372308077e-07, "loss": 3.5213, "step": 990 }, { "epoch": 6.713804713804714, "grad_norm": 17.754369735717773, "learning_rate": 5.092824479960625e-07, "loss": 3.0203, "step": 1000 }, { "epoch": 6.713804713804714, "eval_loss": 0.4113183915615082, "eval_mae": 0.43627291917800903, "eval_mse": 0.41131842136383057, "eval_r2": 0.10031217336654663, "eval_rmse": 0.6413411115497202, "eval_runtime": 10.7502, "eval_samples_per_second": 441.758, "eval_steps_per_second": 13.86, "step": 1000 }, { "epoch": 6.781144781144781, "grad_norm": 40.6204833984375, "learning_rate": 4.907746434385769e-07, "loss": 3.5471, "step": 1010 }, { "epoch": 6.848484848484849, "grad_norm": 33.60804748535156, "learning_rate": 4.724994114912354e-07, "loss": 2.9648, "step": 1020 }, { "epoch": 6.915824915824916, "grad_norm": 16.388248443603516, "learning_rate": 4.544650987894514e-07, "loss": 3.347, "step": 1030 }, { "epoch": 6.983164983164983, "grad_norm": 36.26288986206055, "learning_rate": 4.366799419363779e-07, "loss": 2.8028, "step": 1040 }, { "epoch": 7.047138047138047, "grad_norm": 50.3315315246582, "learning_rate": 4.1915206374109735e-07, "loss": 2.8516, "step": 1050 }, { "epoch": 7.114478114478114, "grad_norm": 24.43304443359375, "learning_rate": 4.01889469508784e-07, "loss": 2.9182, "step": 1060 }, { "epoch": 7.181818181818182, "grad_norm": 18.814807891845703, "learning_rate": 3.849000433845362e-07, "loss": 2.9931, "step": 1070 }, { "epoch": 7.249158249158249, "grad_norm": 89.2548599243164, "learning_rate": 3.6819154475253865e-07, "loss": 3.9664, "step": 1080 }, { "epoch": 7.316498316498317, "grad_norm": 33.19672393798828, "learning_rate": 3.5177160469221176e-07, "loss": 2.867, "step": 1090 }, { "epoch": 7.383838383838384, "grad_norm": 41.82732009887695, "learning_rate": 3.356477224929539e-07, "loss": 3.1016, "step": 1100 }, { "epoch": 7.383838383838384, "eval_loss": 0.4100225269794464, "eval_mae": 0.4379023313522339, "eval_mse": 0.4100225269794464, "eval_r2": 0.10314673185348511, "eval_rmse": 0.6403300141172881, "eval_runtime": 10.7317, "eval_samples_per_second": 442.519, "eval_steps_per_second": 13.884, "step": 1100 }, { "epoch": 7.451178451178452, "grad_norm": 86.2802505493164, "learning_rate": 3.198272622290804e-07, "loss": 3.6962, "step": 1110 }, { "epoch": 7.518518518518518, "grad_norm": 56.78799057006836, "learning_rate": 3.043174493965136e-07, "loss": 3.2249, "step": 1120 }, { "epoch": 7.585858585858586, "grad_norm": 82.79034423828125, "learning_rate": 2.891253676127652e-07, "loss": 3.1029, "step": 1130 }, { "epoch": 7.653198653198653, "grad_norm": 107.42868041992188, "learning_rate": 2.74257955381718e-07, "loss": 3.2151, "step": 1140 }, { "epoch": 7.720538720538721, "grad_norm": 12.15009593963623, "learning_rate": 2.597220029246846e-07, "loss": 3.0892, "step": 1150 }, { "epoch": 7.787878787878788, "grad_norm": 68.21215057373047, "learning_rate": 2.455241490791856e-07, "loss": 3.5911, "step": 1160 }, { "epoch": 7.8552188552188555, "grad_norm": 44.60520553588867, "learning_rate": 2.3167087826687236e-07, "loss": 3.062, "step": 1170 }, { "epoch": 7.922558922558922, "grad_norm": 51.33924102783203, "learning_rate": 2.181685175319702e-07, "loss": 3.1978, "step": 1180 }, { "epoch": 7.98989898989899, "grad_norm": 75.90934753417969, "learning_rate": 2.0502323365160246e-07, "loss": 3.364, "step": 1190 }, { "epoch": 8.053872053872054, "grad_norm": 24.663028717041016, "learning_rate": 1.9224103031930771e-07, "loss": 3.3306, "step": 1200 }, { "epoch": 8.053872053872054, "eval_loss": 0.41018232703208923, "eval_mae": 0.436128705739975, "eval_mse": 0.41018232703208923, "eval_r2": 0.10279721021652222, "eval_rmse": 0.6404547814109043, "eval_runtime": 10.7502, "eval_samples_per_second": 441.76, "eval_steps_per_second": 13.86, "step": 1200 }, { "epoch": 8.121212121212121, "grad_norm": 41.90610122680664, "learning_rate": 1.7982774540304402e-07, "loss": 2.9594, "step": 1210 }, { "epoch": 8.188552188552189, "grad_norm": 18.755924224853516, "learning_rate": 1.6778904827892627e-07, "loss": 3.3871, "step": 1220 }, { "epoch": 8.255892255892256, "grad_norm": 41.15065383911133, "learning_rate": 1.5613043724191667e-07, "loss": 3.1087, "step": 1230 }, { "epoch": 8.323232323232324, "grad_norm": 102.83473205566406, "learning_rate": 1.448572369946539e-07, "loss": 3.3734, "step": 1240 }, { "epoch": 8.390572390572391, "grad_norm": 58.78752899169922, "learning_rate": 1.3397459621556128e-07, "loss": 3.2256, "step": 1250 }, { "epoch": 8.457912457912458, "grad_norm": 78.93064880371094, "learning_rate": 1.234874852073522e-07, "loss": 2.9284, "step": 1260 }, { "epoch": 8.525252525252526, "grad_norm": 36.4371337890625, "learning_rate": 1.1340069362699988e-07, "loss": 3.0341, "step": 1270 }, { "epoch": 8.592592592592592, "grad_norm": 28.966806411743164, "learning_rate": 1.0371882829821344e-07, "loss": 3.3451, "step": 1280 }, { "epoch": 8.65993265993266, "grad_norm": 41.968467712402344, "learning_rate": 9.444631110741585e-08, "loss": 3.3067, "step": 1290 }, { "epoch": 8.727272727272727, "grad_norm": 41.25293731689453, "learning_rate": 8.55873769841876e-08, "loss": 3.1773, "step": 1300 }, { "epoch": 8.727272727272727, "eval_loss": 0.40871018171310425, "eval_mae": 0.43687179684638977, "eval_mse": 0.40871018171310425, "eval_r2": 0.10601729154586792, "eval_rmse": 0.6393044515042142, "eval_runtime": 10.7589, "eval_samples_per_second": 441.401, "eval_steps_per_second": 13.849, "step": 1300 }, { "epoch": 8.794612794612794, "grad_norm": 54.70187759399414, "learning_rate": 7.714607196709632e-08, "loss": 3.4304, "step": 1310 }, { "epoch": 8.861952861952862, "grad_norm": 63.749977111816406, "learning_rate": 6.912625135579586e-08, "loss": 3.093, "step": 1320 }, { "epoch": 8.929292929292929, "grad_norm": 63.28023910522461, "learning_rate": 6.153157795023956e-08, "loss": 3.2171, "step": 1330 }, { "epoch": 8.996632996632997, "grad_norm": 19.110267639160156, "learning_rate": 5.436552037781339e-08, "loss": 3.009, "step": 1340 }, { "epoch": 9.06060606060606, "grad_norm": 19.495882034301758, "learning_rate": 4.7631351509147767e-08, "loss": 3.0717, "step": 1350 }, { "epoch": 9.127946127946128, "grad_norm": 26.402833938598633, "learning_rate": 4.133214696333942e-08, "loss": 2.8148, "step": 1360 }, { "epoch": 9.195286195286196, "grad_norm": 53.432376861572266, "learning_rate": 3.547078370325762e-08, "loss": 3.5062, "step": 1370 }, { "epoch": 9.262626262626263, "grad_norm": 10.824268341064453, "learning_rate": 3.004993872158312e-08, "loss": 3.4431, "step": 1380 }, { "epoch": 9.32996632996633, "grad_norm": 20.467021942138672, "learning_rate": 2.507208781817638e-08, "loss": 3.0458, "step": 1390 }, { "epoch": 9.397306397306398, "grad_norm": 60.92194747924805, "learning_rate": 2.053950446933328e-08, "loss": 2.8509, "step": 1400 }, { "epoch": 9.397306397306398, "eval_loss": 0.4086049199104309, "eval_mae": 0.43635305762290955, "eval_mse": 0.4086049199104309, "eval_r2": 0.10624754428863525, "eval_rmse": 0.6392221209489163, "eval_runtime": 10.7483, "eval_samples_per_second": 441.836, "eval_steps_per_second": 13.863, "step": 1400 }, { "epoch": 9.464646464646465, "grad_norm": 15.497406005859375, "learning_rate": 1.6454258789447282e-08, "loss": 3.5996, "step": 1410 }, { "epoch": 9.531986531986533, "grad_norm": 20.402498245239258, "learning_rate": 1.2818216585549824e-08, "loss": 3.5639, "step": 1420 }, { "epoch": 9.599326599326599, "grad_norm": 22.963951110839844, "learning_rate": 9.63303850516195e-09, "loss": 2.9273, "step": 1430 }, { "epoch": 9.666666666666666, "grad_norm": 30.242029190063477, "learning_rate": 6.900179277845475e-09, "loss": 3.245, "step": 1440 }, { "epoch": 9.734006734006734, "grad_norm": 45.36821365356445, "learning_rate": 4.62088705080177e-09, "loss": 2.8367, "step": 1450 }, { "epoch": 9.801346801346801, "grad_norm": 84.24251556396484, "learning_rate": 2.7962028188198706e-09, "loss": 3.5053, "step": 1460 }, { "epoch": 9.868686868686869, "grad_norm": 60.00812530517578, "learning_rate": 1.4269599488343497e-09, "loss": 3.4285, "step": 1470 }, { "epoch": 9.936026936026936, "grad_norm": 31.326629638671875, "learning_rate": 5.137837993121064e-10, "loss": 3.1417, "step": 1480 }, { "epoch": 10.0, "grad_norm": 21.96562385559082, "learning_rate": 5.709143463894506e-11, "loss": 2.9753, "step": 1490 }, { "epoch": 10.067340067340067, "grad_norm": 75.41439056396484, "learning_rate": 1.005306719439637e-06, "loss": 3.1795, "step": 1500 }, { "epoch": 10.067340067340067, "eval_loss": 0.4095897674560547, "eval_mae": 0.43876004219055176, "eval_mse": 0.4095897674560547, "eval_r2": 0.10409331321716309, "eval_rmse": 0.6399920057751148, "eval_runtime": 10.8584, "eval_samples_per_second": 437.357, "eval_steps_per_second": 13.722, "step": 1500 }, { "epoch": 10.134680134680135, "grad_norm": 18.717395782470703, "learning_rate": 9.946932805603635e-07, "loss": 2.9844, "step": 1510 }, { "epoch": 10.202020202020202, "grad_norm": 23.210254669189453, "learning_rate": 9.84080439456951e-07, "loss": 3.5187, "step": 1520 }, { "epoch": 10.26936026936027, "grad_norm": 17.960315704345703, "learning_rate": 9.734693916137869e-07, "loss": 3.7235, "step": 1530 }, { "epoch": 10.336700336700337, "grad_norm": 69.5107650756836, "learning_rate": 9.628613323132553e-07, "loss": 3.2357, "step": 1540 }, { "epoch": 10.404040404040405, "grad_norm": 17.75201988220215, "learning_rate": 9.522574565010963e-07, "loss": 3.0371, "step": 1550 }, { "epoch": 10.47138047138047, "grad_norm": 66.7865219116211, "learning_rate": 9.416589586518008e-07, "loss": 3.3833, "step": 1560 }, { "epoch": 10.538720538720538, "grad_norm": 54.582523345947266, "learning_rate": 9.310670326340575e-07, "loss": 3.0195, "step": 1570 }, { "epoch": 10.606060606060606, "grad_norm": 89.0817642211914, "learning_rate": 9.204828715762717e-07, "loss": 3.1056, "step": 1580 }, { "epoch": 10.673400673400673, "grad_norm": 25.404674530029297, "learning_rate": 9.099076677321638e-07, "loss": 3.2259, "step": 1590 }, { "epoch": 10.74074074074074, "grad_norm": 48.655517578125, "learning_rate": 8.993426123464679e-07, "loss": 3.1321, "step": 1600 }, { "epoch": 10.74074074074074, "eval_loss": 0.406904935836792, "eval_mae": 0.4358658790588379, "eval_mse": 0.406904935836792, "eval_r2": 0.10996592044830322, "eval_rmse": 0.637891006236012, "eval_runtime": 10.854, "eval_samples_per_second": 437.535, "eval_steps_per_second": 13.728, "step": 1600 }, { "epoch": 10.808080808080808, "grad_norm": 64.61898040771484, "learning_rate": 8.887888955207443e-07, "loss": 3.0633, "step": 1610 }, { "epoch": 10.875420875420875, "grad_norm": 26.49844741821289, "learning_rate": 8.78247706079321e-07, "loss": 3.1983, "step": 1620 }, { "epoch": 10.942760942760943, "grad_norm": 54.12215805053711, "learning_rate": 8.677202314353759e-07, "loss": 2.7223, "step": 1630 }, { "epoch": 11.006734006734007, "grad_norm": 57.37771987915039, "learning_rate": 8.572076574571837e-07, "loss": 3.2347, "step": 1640 }, { "epoch": 11.074074074074074, "grad_norm": 61.42991638183594, "learning_rate": 8.467111683345325e-07, "loss": 3.3756, "step": 1650 }, { "epoch": 11.141414141414142, "grad_norm": 48.1729736328125, "learning_rate": 8.3623194644533e-07, "loss": 3.2033, "step": 1660 }, { "epoch": 11.20875420875421, "grad_norm": 115.97843933105469, "learning_rate": 8.257711722224151e-07, "loss": 2.9863, "step": 1670 }, { "epoch": 11.276094276094277, "grad_norm": 46.33700180053711, "learning_rate": 8.153300240205873e-07, "loss": 3.2684, "step": 1680 }, { "epoch": 11.343434343434343, "grad_norm": 123.17384338378906, "learning_rate": 8.049096779838717e-07, "loss": 3.0913, "step": 1690 }, { "epoch": 11.41077441077441, "grad_norm": 59.11786651611328, "learning_rate": 7.945113079130321e-07, "loss": 3.0068, "step": 1700 }, { "epoch": 11.41077441077441, "eval_loss": 0.4071567952632904, "eval_mae": 0.4385449290275574, "eval_mse": 0.4071568250656128, "eval_r2": 0.10941493511199951, "eval_rmse": 0.6380884147715055, "eval_runtime": 10.8476, "eval_samples_per_second": 437.793, "eval_steps_per_second": 13.736, "step": 1700 }, { "epoch": 11.478114478114477, "grad_norm": 27.162290573120117, "learning_rate": 7.84136085133347e-07, "loss": 3.3897, "step": 1710 }, { "epoch": 11.545454545454545, "grad_norm": 105.30890655517578, "learning_rate": 7.73785178362667e-07, "loss": 2.7851, "step": 1720 }, { "epoch": 11.612794612794612, "grad_norm": 31.78699493408203, "learning_rate": 7.634597535797632e-07, "loss": 3.1505, "step": 1730 }, { "epoch": 11.68013468013468, "grad_norm": 27.06731414794922, "learning_rate": 7.531609738929864e-07, "loss": 3.052, "step": 1740 }, { "epoch": 11.747474747474747, "grad_norm": 88.25122833251953, "learning_rate": 7.428899994092482e-07, "loss": 3.2417, "step": 1750 }, { "epoch": 11.814814814814815, "grad_norm": 61.068180084228516, "learning_rate": 7.326479871033408e-07, "loss": 3.5391, "step": 1760 }, { "epoch": 11.882154882154882, "grad_norm": 165.83883666992188, "learning_rate": 7.224360906876099e-07, "loss": 3.4138, "step": 1770 }, { "epoch": 11.94949494949495, "grad_norm": 25.826210021972656, "learning_rate": 7.122554604819924e-07, "loss": 3.146, "step": 1780 }, { "epoch": 12.013468013468014, "grad_norm": 34.94708251953125, "learning_rate": 7.021072432844426e-07, "loss": 2.7334, "step": 1790 }, { "epoch": 12.080808080808081, "grad_norm": 65.21430206298828, "learning_rate": 6.919925822417476e-07, "loss": 3.251, "step": 1800 }, { "epoch": 12.080808080808081, "eval_loss": 0.403127521276474, "eval_mae": 0.43513786792755127, "eval_mse": 0.403127521276474, "eval_r2": 0.11822831630706787, "eval_rmse": 0.6349232404601945, "eval_runtime": 10.8799, "eval_samples_per_second": 436.494, "eval_steps_per_second": 13.695, "step": 1800 }, { "epoch": 12.148148148148149, "grad_norm": 39.425933837890625, "learning_rate": 6.819126167207586e-07, "loss": 3.3073, "step": 1810 }, { "epoch": 12.215488215488216, "grad_norm": 73.6878433227539, "learning_rate": 6.718684821800467e-07, "loss": 2.9382, "step": 1820 }, { "epoch": 12.282828282828282, "grad_norm": 47.09424591064453, "learning_rate": 6.618613100419998e-07, "loss": 2.7197, "step": 1830 }, { "epoch": 12.35016835016835, "grad_norm": 38.0715446472168, "learning_rate": 6.518922275653723e-07, "loss": 3.749, "step": 1840 }, { "epoch": 12.417508417508417, "grad_norm": 28.89458656311035, "learning_rate": 6.419623577183055e-07, "loss": 3.1893, "step": 1850 }, { "epoch": 12.484848484848484, "grad_norm": 129.57972717285156, "learning_rate": 6.320728190518307e-07, "loss": 2.9553, "step": 1860 }, { "epoch": 12.552188552188552, "grad_norm": 138.3606719970703, "learning_rate": 6.222247255738705e-07, "loss": 3.1588, "step": 1870 }, { "epoch": 12.61952861952862, "grad_norm": 28.980398178100586, "learning_rate": 6.124191866237504e-07, "loss": 3.2829, "step": 1880 }, { "epoch": 12.686868686868687, "grad_norm": 89.88325500488281, "learning_rate": 6.026573067472366e-07, "loss": 3.1184, "step": 1890 }, { "epoch": 12.754208754208754, "grad_norm": 72.46405029296875, "learning_rate": 5.929401855721161e-07, "loss": 2.756, "step": 1900 }, { "epoch": 12.754208754208754, "eval_loss": 0.40268275141716003, "eval_mae": 0.43557608127593994, "eval_mse": 0.40268269181251526, "eval_r2": 0.11920136213302612, "eval_rmse": 0.6345728420067434, "eval_runtime": 20.9038, "eval_samples_per_second": 227.183, "eval_steps_per_second": 7.128, "step": 1900 }, { "epoch": 12.821548821548822, "grad_norm": 83.84222412109375, "learning_rate": 5.83268917684329e-07, "loss": 2.9702, "step": 1910 }, { "epoch": 12.88888888888889, "grad_norm": 54.87720489501953, "learning_rate": 5.736445925046659e-07, "loss": 3.3101, "step": 1920 }, { "epoch": 12.956228956228957, "grad_norm": 90.08381652832031, "learning_rate": 5.640682941660546e-07, "loss": 3.6202, "step": 1930 }, { "epoch": 13.02020202020202, "grad_norm": 127.76883697509766, "learning_rate": 5.545411013914328e-07, "loss": 3.0041, "step": 1940 }, { "epoch": 13.087542087542088, "grad_norm": 87.49531555175781, "learning_rate": 5.450640873722394e-07, "loss": 2.8298, "step": 1950 }, { "epoch": 13.154882154882156, "grad_norm": 128.2417755126953, "learning_rate": 5.356383196475225e-07, "loss": 3.7882, "step": 1960 }, { "epoch": 13.222222222222221, "grad_norm": 34.858116149902344, "learning_rate": 5.262648599836872e-07, "loss": 2.9373, "step": 1970 }, { "epoch": 13.289562289562289, "grad_norm": 29.578563690185547, "learning_rate": 5.169447642548928e-07, "loss": 3.0944, "step": 1980 }, { "epoch": 13.356902356902356, "grad_norm": 53.749786376953125, "learning_rate": 5.076790823241131e-07, "loss": 2.848, "step": 1990 }, { "epoch": 13.424242424242424, "grad_norm": 44.89299774169922, "learning_rate": 4.984688579248756e-07, "loss": 3.2574, "step": 2000 }, { "epoch": 13.424242424242424, "eval_loss": 0.40199729800224304, "eval_mae": 0.43625468015670776, "eval_mse": 0.40199729800224304, "eval_r2": 0.12070053815841675, "eval_rmse": 0.6340325685658766, "eval_runtime": 10.8507, "eval_samples_per_second": 437.667, "eval_steps_per_second": 13.732, "step": 2000 }, { "epoch": 13.491582491582491, "grad_norm": 48.39162826538086, "learning_rate": 4.89315128543689e-07, "loss": 3.0476, "step": 2010 }, { "epoch": 13.558922558922559, "grad_norm": 11.19051742553711, "learning_rate": 4.802189253031763e-07, "loss": 2.7127, "step": 2020 }, { "epoch": 13.626262626262626, "grad_norm": 115.34234619140625, "learning_rate": 4.7118127284592325e-07, "loss": 2.8514, "step": 2030 }, { "epoch": 13.693602693602694, "grad_norm": 45.17890167236328, "learning_rate": 4.622031892190579e-07, "loss": 3.1963, "step": 2040 }, { "epoch": 13.760942760942761, "grad_norm": 143.8472900390625, "learning_rate": 4.5328568575957136e-07, "loss": 3.3596, "step": 2050 }, { "epoch": 13.828282828282829, "grad_norm": 54.9832878112793, "learning_rate": 4.4442976698039803e-07, "loss": 3.5773, "step": 2060 }, { "epoch": 13.895622895622896, "grad_norm": 37.36722183227539, "learning_rate": 4.3563643045725964e-07, "loss": 3.0904, "step": 2070 }, { "epoch": 13.962962962962964, "grad_norm": 33.423851013183594, "learning_rate": 4.269066667162956e-07, "loss": 2.9525, "step": 2080 }, { "epoch": 14.026936026936028, "grad_norm": 108.66680145263672, "learning_rate": 4.182414591224833e-07, "loss": 3.368, "step": 2090 }, { "epoch": 14.094276094276093, "grad_norm": 39.20806121826172, "learning_rate": 4.096417837688666e-07, "loss": 3.0428, "step": 2100 }, { "epoch": 14.094276094276093, "eval_loss": 0.4001167416572571, "eval_mae": 0.4332391321659088, "eval_mse": 0.4001167416572571, "eval_r2": 0.12481391429901123, "eval_rmse": 0.6325478176843685, "eval_runtime": 10.8545, "eval_samples_per_second": 437.516, "eval_steps_per_second": 13.727, "step": 2100 }, { "epoch": 14.16161616161616, "grad_norm": 137.24478149414062, "learning_rate": 4.011086093666056e-07, "loss": 3.0434, "step": 2110 }, { "epoch": 14.228956228956228, "grad_norm": 56.08025360107422, "learning_rate": 3.9264289713585495e-07, "loss": 3.1002, "step": 2120 }, { "epoch": 14.296296296296296, "grad_norm": 63.49452209472656, "learning_rate": 3.84245600697487e-07, "loss": 3.0575, "step": 2130 }, { "epoch": 14.363636363636363, "grad_norm": 30.612703323364258, "learning_rate": 3.7591766596567166e-07, "loss": 2.7905, "step": 2140 }, { "epoch": 14.43097643097643, "grad_norm": 46.42356491088867, "learning_rate": 3.6766003104132325e-07, "loss": 2.9439, "step": 2150 }, { "epoch": 14.498316498316498, "grad_norm": 87.45642852783203, "learning_rate": 3.594736261064285e-07, "loss": 3.0732, "step": 2160 }, { "epoch": 14.565656565656566, "grad_norm": 68.17288970947266, "learning_rate": 3.5135937331926593e-07, "loss": 3.2791, "step": 2170 }, { "epoch": 14.632996632996633, "grad_norm": 50.07135009765625, "learning_rate": 3.4331818671052905e-07, "loss": 3.0399, "step": 2180 }, { "epoch": 14.7003367003367, "grad_norm": 73.26012420654297, "learning_rate": 3.3535097208036577e-07, "loss": 3.1467, "step": 2190 }, { "epoch": 14.767676767676768, "grad_norm": 31.187746047973633, "learning_rate": 3.274586268963443e-07, "loss": 3.0028, "step": 2200 }, { "epoch": 14.767676767676768, "eval_loss": 0.40184375643730164, "eval_mae": 0.43398547172546387, "eval_mse": 0.40184372663497925, "eval_r2": 0.12103646993637085, "eval_rmse": 0.633911450152921, "eval_runtime": 10.8637, "eval_samples_per_second": 437.145, "eval_steps_per_second": 13.715, "step": 2200 }, { "epoch": 14.835016835016836, "grad_norm": 47.02648162841797, "learning_rate": 3.1964204019235664e-07, "loss": 3.3099, "step": 2210 }, { "epoch": 14.902356902356903, "grad_norm": 64.7254409790039, "learning_rate": 3.119020924684762e-07, "loss": 3.4628, "step": 2220 }, { "epoch": 14.969696969696969, "grad_norm": 94.42536163330078, "learning_rate": 3.0423965559177067e-07, "loss": 3.3258, "step": 2230 }, { "epoch": 15.033670033670033, "grad_norm": 95.36449432373047, "learning_rate": 2.9665559269809216e-07, "loss": 2.7099, "step": 2240 }, { "epoch": 15.1010101010101, "grad_norm": 90.4229965209961, "learning_rate": 2.89150758094849e-07, "loss": 3.342, "step": 2250 }, { "epoch": 15.168350168350168, "grad_norm": 136.80563354492188, "learning_rate": 2.817259971647714e-07, "loss": 3.0889, "step": 2260 }, { "epoch": 15.235690235690235, "grad_norm": 69.01044464111328, "learning_rate": 2.7438214627068447e-07, "loss": 3.0783, "step": 2270 }, { "epoch": 15.303030303030303, "grad_norm": 17.23488998413086, "learning_rate": 2.6712003266129523e-07, "loss": 2.6556, "step": 2280 }, { "epoch": 15.37037037037037, "grad_norm": 27.986482620239258, "learning_rate": 2.5994047437800704e-07, "loss": 2.5876, "step": 2290 }, { "epoch": 15.437710437710438, "grad_norm": 49.16962814331055, "learning_rate": 2.5284428016277283e-07, "loss": 3.2579, "step": 2300 }, { "epoch": 15.437710437710438, "eval_loss": 0.40018677711486816, "eval_mae": 0.4352611303329468, "eval_mse": 0.4001867473125458, "eval_r2": 0.12466078996658325, "eval_rmse": 0.6326031515196124, "eval_runtime": 10.873, "eval_samples_per_second": 436.772, "eval_steps_per_second": 13.704, "step": 2300 }, { "epoch": 15.505050505050505, "grad_norm": 45.588104248046875, "learning_rate": 2.458322493669911e-07, "loss": 3.4006, "step": 2310 }, { "epoch": 15.572390572390573, "grad_norm": 70.8850326538086, "learning_rate": 2.389051718614662e-07, "loss": 3.6338, "step": 2320 }, { "epoch": 15.63973063973064, "grad_norm": 21.327795028686523, "learning_rate": 2.3206382794743117e-07, "loss": 3.0016, "step": 2330 }, { "epoch": 15.707070707070708, "grad_norm": 51.54344177246094, "learning_rate": 2.2530898826865196e-07, "loss": 3.2104, "step": 2340 }, { "epoch": 15.774410774410775, "grad_norm": 48.73935317993164, "learning_rate": 2.186414137246172e-07, "loss": 3.0319, "step": 2350 }, { "epoch": 15.841750841750843, "grad_norm": 147.45216369628906, "learning_rate": 2.12061855384827e-07, "loss": 3.4502, "step": 2360 }, { "epoch": 15.909090909090908, "grad_norm": 71.27183532714844, "learning_rate": 2.0557105440418897e-07, "loss": 3.072, "step": 2370 }, { "epoch": 15.976430976430976, "grad_norm": 52.18257522583008, "learning_rate": 1.9916974193953007e-07, "loss": 2.8491, "step": 2380 }, { "epoch": 16.04040404040404, "grad_norm": 31.763568878173828, "learning_rate": 1.928586390672361e-07, "loss": 3.2757, "step": 2390 }, { "epoch": 16.107744107744107, "grad_norm": 29.253280639648438, "learning_rate": 1.866384567020256e-07, "loss": 2.8437, "step": 2400 }, { "epoch": 16.107744107744107, "eval_loss": 0.39934295415878296, "eval_mae": 0.4333915710449219, "eval_mse": 0.39934295415878296, "eval_r2": 0.12650644779205322, "eval_rmse": 0.6319358782018814, "eval_runtime": 10.8636, "eval_samples_per_second": 437.146, "eval_steps_per_second": 13.715, "step": 2400 }, { "epoch": 16.175084175084177, "grad_norm": 38.11127471923828, "learning_rate": 1.8050989551686912e-07, "loss": 3.2059, "step": 2410 }, { "epoch": 16.242424242424242, "grad_norm": 57.66527557373047, "learning_rate": 1.7447364586406065e-07, "loss": 3.0853, "step": 2420 }, { "epoch": 16.30976430976431, "grad_norm": 112.44169616699219, "learning_rate": 1.6853038769745465e-07, "loss": 3.1196, "step": 2430 }, { "epoch": 16.377104377104377, "grad_norm": 69.51861572265625, "learning_rate": 1.6268079049587202e-07, "loss": 3.2361, "step": 2440 }, { "epoch": 16.444444444444443, "grad_norm": 103.8697280883789, "learning_rate": 1.5692551318768555e-07, "loss": 2.6652, "step": 2450 }, { "epoch": 16.511784511784512, "grad_norm": 74.151123046875, "learning_rate": 1.5126520407659615e-07, "loss": 3.2079, "step": 2460 }, { "epoch": 16.579124579124578, "grad_norm": 36.384849548339844, "learning_rate": 1.4570050076860342e-07, "loss": 3.305, "step": 2470 }, { "epoch": 16.646464646464647, "grad_norm": 44.863197326660156, "learning_rate": 1.4023203010018392e-07, "loss": 3.057, "step": 2480 }, { "epoch": 16.713804713804713, "grad_norm": 76.89086151123047, "learning_rate": 1.3486040806767995e-07, "loss": 2.7599, "step": 2490 }, { "epoch": 16.781144781144782, "grad_norm": 109.27088165283203, "learning_rate": 1.2958623975791116e-07, "loss": 3.2871, "step": 2500 }, { "epoch": 16.781144781144782, "eval_loss": 0.39846310019493103, "eval_mae": 0.43279558420181274, "eval_mse": 0.39846310019493103, "eval_r2": 0.12843096256256104, "eval_rmse": 0.631239336698, "eval_runtime": 10.8644, "eval_samples_per_second": 437.116, "eval_steps_per_second": 13.715, "step": 2500 }, { "epoch": 16.848484848484848, "grad_norm": 82.22573852539062, "learning_rate": 1.2441011928001432e-07, "loss": 3.2287, "step": 2510 }, { "epoch": 16.915824915824917, "grad_norm": 42.24697494506836, "learning_rate": 1.1933262969851986e-07, "loss": 3.3869, "step": 2520 }, { "epoch": 16.983164983164983, "grad_norm": 72.74343872070312, "learning_rate": 1.1435434296767232e-07, "loss": 2.4649, "step": 2530 }, { "epoch": 17.04713804713805, "grad_norm": 78.49395751953125, "learning_rate": 1.0947581986700304e-07, "loss": 2.7714, "step": 2540 }, { "epoch": 17.114478114478114, "grad_norm": 79.34912109375, "learning_rate": 1.0469760993816056e-07, "loss": 3.0943, "step": 2550 }, { "epoch": 17.181818181818183, "grad_norm": 146.29653930664062, "learning_rate": 1.0002025142300763e-07, "loss": 3.169, "step": 2560 }, { "epoch": 17.24915824915825, "grad_norm": 59.34313201904297, "learning_rate": 9.544427120299136e-08, "loss": 3.1553, "step": 2570 }, { "epoch": 17.316498316498315, "grad_norm": 91.36643981933594, "learning_rate": 9.097018473979124e-08, "loss": 3.4817, "step": 2580 }, { "epoch": 17.383838383838384, "grad_norm": 67.67630767822266, "learning_rate": 8.659849601725699e-08, "loss": 2.8176, "step": 2590 }, { "epoch": 17.45117845117845, "grad_norm": 41.56998062133789, "learning_rate": 8.23296974846357e-08, "loss": 3.2999, "step": 2600 }, { "epoch": 17.45117845117845, "eval_loss": 0.398359090089798, "eval_mae": 0.43279075622558594, "eval_mse": 0.398359090089798, "eval_r2": 0.1286584734916687, "eval_rmse": 0.6311569456876776, "eval_runtime": 10.8587, "eval_samples_per_second": 437.345, "eval_steps_per_second": 13.722, "step": 2600 }, { "epoch": 17.51851851851852, "grad_norm": 52.5406494140625, "learning_rate": 7.816427000110015e-08, "loss": 3.3146, "step": 2610 }, { "epoch": 17.585858585858585, "grad_norm": 20.456485748291016, "learning_rate": 7.410268278158272e-08, "loss": 2.9294, "step": 2620 }, { "epoch": 17.653198653198654, "grad_norm": 39.06353759765625, "learning_rate": 7.01453933439201e-08, "loss": 3.2265, "step": 2630 }, { "epoch": 17.72053872053872, "grad_norm": 55.71407699584961, "learning_rate": 6.6292847457317e-08, "loss": 3.2643, "step": 2640 }, { "epoch": 17.78787878787879, "grad_norm": 54.6032600402832, "learning_rate": 6.254547909213148e-08, "loss": 3.027, "step": 2650 }, { "epoch": 17.855218855218855, "grad_norm": 122.32026672363281, "learning_rate": 5.8903710370991063e-08, "loss": 2.8101, "step": 2660 }, { "epoch": 17.922558922558924, "grad_norm": 52.49361038208008, "learning_rate": 5.536795152124252e-08, "loss": 3.2379, "step": 2670 }, { "epoch": 17.98989898989899, "grad_norm": 53.10639572143555, "learning_rate": 5.193860082874124e-08, "loss": 2.6869, "step": 2680 }, { "epoch": 18.053872053872055, "grad_norm": 39.32139587402344, "learning_rate": 4.861604459298696e-08, "loss": 3.0614, "step": 2690 }, { "epoch": 18.12121212121212, "grad_norm": 46.23768997192383, "learning_rate": 4.540065708360885e-08, "loss": 3.0549, "step": 2700 }, { "epoch": 18.12121212121212, "eval_loss": 0.39805683493614197, "eval_mae": 0.43268144130706787, "eval_mse": 0.39805683493614197, "eval_r2": 0.12931960821151733, "eval_rmse": 0.6309174549306288, "eval_runtime": 10.8581, "eval_samples_per_second": 437.37, "eval_steps_per_second": 13.723, "step": 2700 }, { "epoch": 18.188552188552187, "grad_norm": 53.21854019165039, "learning_rate": 4.2292800498205605e-08, "loss": 3.2828, "step": 2710 }, { "epoch": 18.255892255892256, "grad_norm": 105.25486755371094, "learning_rate": 3.929282492154606e-08, "loss": 2.9252, "step": 2720 }, { "epoch": 18.32323232323232, "grad_norm": 91.48981475830078, "learning_rate": 3.640106828613354e-08, "loss": 3.3832, "step": 2730 }, { "epoch": 18.39057239057239, "grad_norm": 53.70878219604492, "learning_rate": 3.3617856334139606e-08, "loss": 3.0319, "step": 2740 }, { "epoch": 18.457912457912457, "grad_norm": 36.42780685424805, "learning_rate": 3.094350258071077e-08, "loss": 2.8867, "step": 2750 }, { "epoch": 18.525252525252526, "grad_norm": 79.02967834472656, "learning_rate": 2.8378308278652287e-08, "loss": 2.7116, "step": 2760 }, { "epoch": 18.59259259259259, "grad_norm": 29.02677345275879, "learning_rate": 2.5922562384494194e-08, "loss": 2.8692, "step": 2770 }, { "epoch": 18.65993265993266, "grad_norm": 33.947059631347656, "learning_rate": 2.357654152594113e-08, "loss": 3.0665, "step": 2780 }, { "epoch": 18.727272727272727, "grad_norm": 40.996761322021484, "learning_rate": 2.1340509970711462e-08, "loss": 3.0797, "step": 2790 }, { "epoch": 18.794612794612796, "grad_norm": 87.2491455078125, "learning_rate": 1.921471959676957e-08, "loss": 3.096, "step": 2800 }, { "epoch": 18.794612794612796, "eval_loss": 0.39800581336021423, "eval_mae": 0.4328523278236389, "eval_mse": 0.39800581336021423, "eval_r2": 0.12943118810653687, "eval_rmse": 0.6308770192043884, "eval_runtime": 10.8657, "eval_samples_per_second": 437.063, "eval_steps_per_second": 13.713, "step": 2800 }, { "epoch": 18.86195286195286, "grad_norm": 31.434661865234375, "learning_rate": 1.719940986395252e-08, "loss": 3.1402, "step": 2810 }, { "epoch": 18.92929292929293, "grad_norm": 58.002742767333984, "learning_rate": 1.5294807786996212e-08, "loss": 3.0432, "step": 2820 }, { "epoch": 18.996632996632997, "grad_norm": 76.90914154052734, "learning_rate": 1.3501127909963272e-08, "loss": 3.3857, "step": 2830 }, { "epoch": 19.060606060606062, "grad_norm": 38.15789031982422, "learning_rate": 1.1818572282075389e-08, "loss": 2.8411, "step": 2840 }, { "epoch": 19.127946127946128, "grad_norm": 26.438907623291016, "learning_rate": 1.0247330434954072e-08, "loss": 3.7133, "step": 2850 }, { "epoch": 19.195286195286194, "grad_norm": 52.07809066772461, "learning_rate": 8.787579361270614e-09, "loss": 3.2155, "step": 2860 }, { "epoch": 19.262626262626263, "grad_norm": 50.276893615722656, "learning_rate": 7.439483494808496e-09, "loss": 3.1271, "step": 2870 }, { "epoch": 19.32996632996633, "grad_norm": 33.2664680480957, "learning_rate": 6.2031946919409714e-09, "loss": 3.0021, "step": 2880 }, { "epoch": 19.397306397306398, "grad_norm": 39.3897819519043, "learning_rate": 5.078852214525198e-09, "loss": 2.8237, "step": 2890 }, { "epoch": 19.464646464646464, "grad_norm": 32.2919921875, "learning_rate": 4.066582714214895e-09, "loss": 3.1748, "step": 2900 }, { "epoch": 19.464646464646464, "eval_loss": 0.39797738194465637, "eval_mae": 0.4328324794769287, "eval_mse": 0.39797738194465637, "eval_r2": 0.1294933557510376, "eval_rmse": 0.6308544855548357, "eval_runtime": 20.8748, "eval_samples_per_second": 227.499, "eval_steps_per_second": 7.138, "step": 2900 }, { "epoch": 19.531986531986533, "grad_norm": 50.99618148803711, "learning_rate": 3.166500218193757e-09, "loss": 3.0981, "step": 2910 }, { "epoch": 19.5993265993266, "grad_norm": 59.15031814575195, "learning_rate": 2.378706116330953e-09, "loss": 3.295, "step": 2920 }, { "epoch": 19.666666666666668, "grad_norm": 95.25682067871094, "learning_rate": 1.7032891497600344e-09, "loss": 3.0877, "step": 2930 }, { "epoch": 19.734006734006734, "grad_norm": 18.327655792236328, "learning_rate": 1.1403254008822693e-09, "loss": 2.8862, "step": 2940 }, { "epoch": 19.801346801346803, "grad_norm": 119.63570404052734, "learning_rate": 6.898782847971629e-10, "loss": 2.8826, "step": 2950 }, { "epoch": 19.86868686868687, "grad_norm": 75.96342468261719, "learning_rate": 3.519985421581717e-10, "loss": 3.2191, "step": 2960 }, { "epoch": 19.936026936026934, "grad_norm": 42.8014030456543, "learning_rate": 1.2672423345760907e-10, "loss": 3.1054, "step": 2970 }, { "epoch": 20.0, "grad_norm": 41.9579963684082, "learning_rate": 1.4080734739074785e-11, "loss": 2.4836, "step": 2980 }, { "epoch": 20.067340067340066, "grad_norm": 56.41508483886719, "learning_rate": 1.3118305526298526e-06, "loss": 2.992, "step": 2990 }, { "epoch": 20.134680134680135, "grad_norm": 45.424659729003906, "learning_rate": 1.3078103535599108e-06, "loss": 2.8767, "step": 3000 }, { "epoch": 20.134680134680135, "eval_loss": 0.39774614572525024, "eval_mae": 0.4322699308395386, "eval_mse": 0.39774614572525024, "eval_r2": 0.12999916076660156, "eval_rmse": 0.6306711866933912, "eval_runtime": 10.7704, "eval_samples_per_second": 440.93, "eval_steps_per_second": 13.834, "step": 3000 }, { "epoch": 20.2020202020202, "grad_norm": 38.1443977355957, "learning_rate": 1.3037846514243095e-06, "loss": 2.8325, "step": 3010 }, { "epoch": 20.26936026936027, "grad_norm": 115.02192687988281, "learning_rate": 1.299753518194973e-06, "loss": 2.8518, "step": 3020 }, { "epoch": 20.336700336700336, "grad_norm": 56.13883972167969, "learning_rate": 1.2957170259409214e-06, "loss": 2.8953, "step": 3030 }, { "epoch": 20.404040404040405, "grad_norm": 181.08006286621094, "learning_rate": 1.2916752468269854e-06, "loss": 3.3026, "step": 3040 }, { "epoch": 20.47138047138047, "grad_norm": 33.99216842651367, "learning_rate": 1.2876282531125138e-06, "loss": 3.2592, "step": 3050 }, { "epoch": 20.53872053872054, "grad_norm": 34.81891632080078, "learning_rate": 1.2835761171500832e-06, "loss": 3.3634, "step": 3060 }, { "epoch": 20.606060606060606, "grad_norm": 22.277950286865234, "learning_rate": 1.279518911384204e-06, "loss": 3.3294, "step": 3070 }, { "epoch": 20.673400673400675, "grad_norm": 48.10467529296875, "learning_rate": 1.2754567083500245e-06, "loss": 2.9215, "step": 3080 }, { "epoch": 20.74074074074074, "grad_norm": 90.35417938232422, "learning_rate": 1.271389580672035e-06, "loss": 3.3752, "step": 3090 }, { "epoch": 20.80808080808081, "grad_norm": 32.69667434692383, "learning_rate": 1.2673176010627689e-06, "loss": 2.839, "step": 3100 }, { "epoch": 20.80808080808081, "eval_loss": 0.4024805426597595, "eval_mae": 0.4400625228881836, "eval_mse": 0.4024805426597595, "eval_r2": 0.11964350938796997, "eval_rmse": 0.6344135423048278, "eval_runtime": 10.7676, "eval_samples_per_second": 441.044, "eval_steps_per_second": 13.838, "step": 3100 }, { "epoch": 20.875420875420875, "grad_norm": 152.41514587402344, "learning_rate": 1.2632408423215032e-06, "loss": 3.4181, "step": 3110 }, { "epoch": 20.94276094276094, "grad_norm": 177.6971893310547, "learning_rate": 1.2591593773329567e-06, "loss": 3.177, "step": 3120 }, { "epoch": 21.006734006734007, "grad_norm": 80.43561553955078, "learning_rate": 1.2550732790659866e-06, "loss": 3.1402, "step": 3130 }, { "epoch": 21.074074074074073, "grad_norm": 73.39543914794922, "learning_rate": 1.2509826205722851e-06, "loss": 2.761, "step": 3140 }, { "epoch": 21.141414141414142, "grad_norm": 86.590087890625, "learning_rate": 1.2468874749850713e-06, "loss": 3.269, "step": 3150 }, { "epoch": 21.208754208754208, "grad_norm": 111.94889068603516, "learning_rate": 1.2427879155177865e-06, "loss": 2.9544, "step": 3160 }, { "epoch": 21.276094276094277, "grad_norm": 70.47743225097656, "learning_rate": 1.238684015462783e-06, "loss": 3.2046, "step": 3170 }, { "epoch": 21.343434343434343, "grad_norm": 48.27582931518555, "learning_rate": 1.2345758481900148e-06, "loss": 3.0661, "step": 3180 }, { "epoch": 21.410774410774412, "grad_norm": 35.6608772277832, "learning_rate": 1.2304634871457254e-06, "loss": 3.1323, "step": 3190 }, { "epoch": 21.478114478114477, "grad_norm": 103.11399841308594, "learning_rate": 1.2263470058511353e-06, "loss": 3.056, "step": 3200 }, { "epoch": 21.478114478114477, "eval_loss": 0.39589980244636536, "eval_mae": 0.434359073638916, "eval_mse": 0.39589977264404297, "eval_r2": 0.1340377926826477, "eval_rmse": 0.6292056680005695, "eval_runtime": 10.7502, "eval_samples_per_second": 441.758, "eval_steps_per_second": 13.86, "step": 3200 }, { "epoch": 21.545454545454547, "grad_norm": 19.543153762817383, "learning_rate": 1.2222264779011274e-06, "loss": 3.0307, "step": 3210 }, { "epoch": 21.612794612794612, "grad_norm": 56.48185348510742, "learning_rate": 1.2181019769629304e-06, "loss": 2.6976, "step": 3220 }, { "epoch": 21.68013468013468, "grad_norm": 42.77165222167969, "learning_rate": 1.213973576774803e-06, "loss": 3.1359, "step": 3230 }, { "epoch": 21.747474747474747, "grad_norm": 66.62501525878906, "learning_rate": 1.2098413511447158e-06, "loss": 3.3216, "step": 3240 }, { "epoch": 21.814814814814813, "grad_norm": 65.09385681152344, "learning_rate": 1.2057053739490295e-06, "loss": 2.8891, "step": 3250 }, { "epoch": 21.882154882154882, "grad_norm": 61.32107162475586, "learning_rate": 1.2015657191311767e-06, "loss": 2.8055, "step": 3260 }, { "epoch": 21.949494949494948, "grad_norm": 212.0358123779297, "learning_rate": 1.1974224607003392e-06, "loss": 3.3648, "step": 3270 }, { "epoch": 22.013468013468014, "grad_norm": 62.74721145629883, "learning_rate": 1.1932756727301235e-06, "loss": 3.0622, "step": 3280 }, { "epoch": 22.08080808080808, "grad_norm": 22.63941764831543, "learning_rate": 1.1891254293572387e-06, "loss": 3.1695, "step": 3290 }, { "epoch": 22.14814814814815, "grad_norm": 69.91462707519531, "learning_rate": 1.1849718047801685e-06, "loss": 2.8843, "step": 3300 }, { "epoch": 22.14814814814815, "eval_loss": 0.3988635241985321, "eval_mae": 0.4337315261363983, "eval_mse": 0.3988635540008545, "eval_r2": 0.12755507230758667, "eval_rmse": 0.6315564535343254, "eval_runtime": 10.752, "eval_samples_per_second": 441.686, "eval_steps_per_second": 13.858, "step": 3300 }, { "epoch": 22.215488215488215, "grad_norm": 76.05679321289062, "learning_rate": 1.180814873257848e-06, "loss": 3.0209, "step": 3310 }, { "epoch": 22.282828282828284, "grad_norm": 84.73878479003906, "learning_rate": 1.1766547091083328e-06, "loss": 3.3205, "step": 3320 }, { "epoch": 22.35016835016835, "grad_norm": 86.2876968383789, "learning_rate": 1.1724913867074723e-06, "loss": 3.0435, "step": 3330 }, { "epoch": 22.41750841750842, "grad_norm": 21.69998550415039, "learning_rate": 1.1683249804875794e-06, "loss": 2.9976, "step": 3340 }, { "epoch": 22.484848484848484, "grad_norm": 46.904884338378906, "learning_rate": 1.1641555649360997e-06, "loss": 2.8479, "step": 3350 }, { "epoch": 22.552188552188554, "grad_norm": 77.06253814697266, "learning_rate": 1.15998321459428e-06, "loss": 2.8871, "step": 3360 }, { "epoch": 22.61952861952862, "grad_norm": 49.95879364013672, "learning_rate": 1.1558080040558358e-06, "loss": 3.186, "step": 3370 }, { "epoch": 22.686868686868685, "grad_norm": 53.09878158569336, "learning_rate": 1.1516300079656167e-06, "loss": 3.0538, "step": 3380 }, { "epoch": 22.754208754208754, "grad_norm": 63.7835693359375, "learning_rate": 1.1474493010182743e-06, "loss": 3.1793, "step": 3390 }, { "epoch": 22.82154882154882, "grad_norm": 197.92782592773438, "learning_rate": 1.1432659579569232e-06, "loss": 3.1385, "step": 3400 }, { "epoch": 22.82154882154882, "eval_loss": 0.3923128843307495, "eval_mae": 0.4328295588493347, "eval_mse": 0.3923128843307495, "eval_r2": 0.1418834924697876, "eval_rmse": 0.6263488519433476, "eval_runtime": 10.7382, "eval_samples_per_second": 442.253, "eval_steps_per_second": 13.876, "step": 3400 }, { "epoch": 22.88888888888889, "grad_norm": 20.61293601989746, "learning_rate": 1.1390800535718088e-06, "loss": 2.9661, "step": 3410 }, { "epoch": 22.956228956228955, "grad_norm": 59.83302307128906, "learning_rate": 1.1348916626989658e-06, "loss": 3.2475, "step": 3420 }, { "epoch": 23.02020202020202, "grad_norm": 83.3632583618164, "learning_rate": 1.1307008602188843e-06, "loss": 3.4449, "step": 3430 }, { "epoch": 23.087542087542086, "grad_norm": 172.97735595703125, "learning_rate": 1.1265077210551693e-06, "loss": 3.2864, "step": 3440 }, { "epoch": 23.154882154882156, "grad_norm": 204.04209899902344, "learning_rate": 1.1223123201732e-06, "loss": 2.963, "step": 3450 }, { "epoch": 23.22222222222222, "grad_norm": 154.60317993164062, "learning_rate": 1.1181147325787924e-06, "loss": 3.1822, "step": 3460 }, { "epoch": 23.28956228956229, "grad_norm": 103.0447998046875, "learning_rate": 1.113915033316856e-06, "loss": 2.8686, "step": 3470 }, { "epoch": 23.356902356902356, "grad_norm": 87.26648712158203, "learning_rate": 1.1097132974700527e-06, "loss": 3.3001, "step": 3480 }, { "epoch": 23.424242424242426, "grad_norm": 22.653345108032227, "learning_rate": 1.1055096001574549e-06, "loss": 2.9024, "step": 3490 }, { "epoch": 23.49158249158249, "grad_norm": 67.93629455566406, "learning_rate": 1.1013040165332023e-06, "loss": 3.0265, "step": 3500 }, { "epoch": 23.49158249158249, "eval_loss": 0.3909620940685272, "eval_mae": 0.43135154247283936, "eval_mse": 0.3909620940685272, "eval_r2": 0.1448381543159485, "eval_rmse": 0.6252696171001172, "eval_runtime": 10.7317, "eval_samples_per_second": 442.523, "eval_steps_per_second": 13.884, "step": 3500 }, { "epoch": 23.55892255892256, "grad_norm": 77.28096771240234, "learning_rate": 1.0970966217851586e-06, "loss": 2.6866, "step": 3510 }, { "epoch": 23.626262626262626, "grad_norm": 106.72384643554688, "learning_rate": 1.092887491133567e-06, "loss": 3.0238, "step": 3520 }, { "epoch": 23.693602693602692, "grad_norm": 53.55656433105469, "learning_rate": 1.0886766998297036e-06, "loss": 2.9179, "step": 3530 }, { "epoch": 23.76094276094276, "grad_norm": 155.9110107421875, "learning_rate": 1.084464323154537e-06, "loss": 2.8998, "step": 3540 }, { "epoch": 23.828282828282827, "grad_norm": 66.44853973388672, "learning_rate": 1.0802504364173762e-06, "loss": 2.8701, "step": 3550 }, { "epoch": 23.895622895622896, "grad_norm": 80.22246551513672, "learning_rate": 1.0760351149545295e-06, "loss": 3.0116, "step": 3560 }, { "epoch": 23.962962962962962, "grad_norm": 40.43516159057617, "learning_rate": 1.0718184341279532e-06, "loss": 2.8837, "step": 3570 }, { "epoch": 24.026936026936028, "grad_norm": 57.512821197509766, "learning_rate": 1.0676004693239089e-06, "loss": 2.5831, "step": 3580 }, { "epoch": 24.094276094276093, "grad_norm": 38.831390380859375, "learning_rate": 1.0633812959516115e-06, "loss": 3.3523, "step": 3590 }, { "epoch": 24.161616161616163, "grad_norm": 82.73333740234375, "learning_rate": 1.0591609894418833e-06, "loss": 2.9624, "step": 3600 }, { "epoch": 24.161616161616163, "eval_loss": 0.39266660809516907, "eval_mae": 0.4324099123477936, "eval_mse": 0.3926665782928467, "eval_r2": 0.1411098837852478, "eval_rmse": 0.6266311341553711, "eval_runtime": 10.7523, "eval_samples_per_second": 441.673, "eval_steps_per_second": 13.858, "step": 3600 }, { "epoch": 24.22895622895623, "grad_norm": 185.1522216796875, "learning_rate": 1.054939625245805e-06, "loss": 3.1191, "step": 3610 }, { "epoch": 24.296296296296298, "grad_norm": 331.47515869140625, "learning_rate": 1.0507172788333669e-06, "loss": 3.4605, "step": 3620 }, { "epoch": 24.363636363636363, "grad_norm": 61.91050720214844, "learning_rate": 1.0464940256921194e-06, "loss": 2.8343, "step": 3630 }, { "epoch": 24.430976430976433, "grad_norm": 28.32408905029297, "learning_rate": 1.0422699413258227e-06, "loss": 3.2182, "step": 3640 }, { "epoch": 24.4983164983165, "grad_norm": 50.47556686401367, "learning_rate": 1.0380451012530988e-06, "loss": 2.9718, "step": 3650 }, { "epoch": 24.565656565656564, "grad_norm": 77.052978515625, "learning_rate": 1.0338195810060804e-06, "loss": 3.4837, "step": 3660 }, { "epoch": 24.632996632996633, "grad_norm": 60.74066925048828, "learning_rate": 1.029593456129059e-06, "loss": 3.0002, "step": 3670 }, { "epoch": 24.7003367003367, "grad_norm": 54.98184585571289, "learning_rate": 1.0253668021771377e-06, "loss": 3.2584, "step": 3680 }, { "epoch": 24.767676767676768, "grad_norm": 66.43016052246094, "learning_rate": 1.0211396947148766e-06, "loss": 2.8182, "step": 3690 }, { "epoch": 24.835016835016834, "grad_norm": 112.4431381225586, "learning_rate": 1.0169122093149447e-06, "loss": 2.8197, "step": 3700 }, { "epoch": 24.835016835016834, "eval_loss": 0.3900994062423706, "eval_mae": 0.4318752586841583, "eval_mse": 0.3900994062423706, "eval_r2": 0.1467251181602478, "eval_rmse": 0.624579383459277, "eval_runtime": 10.7461, "eval_samples_per_second": 441.927, "eval_steps_per_second": 13.865, "step": 3700 }, { "epoch": 24.902356902356903, "grad_norm": 16.18470573425293, "learning_rate": 1.012684421556768e-06, "loss": 2.6192, "step": 3710 }, { "epoch": 24.96969696969697, "grad_norm": 57.896175384521484, "learning_rate": 1.0084564070251775e-06, "loss": 2.7055, "step": 3720 }, { "epoch": 25.033670033670035, "grad_norm": 59.782711029052734, "learning_rate": 1.0042282413090585e-06, "loss": 2.9249, "step": 3730 }, { "epoch": 25.1010101010101, "grad_norm": 56.17929458618164, "learning_rate": 1e-06, "loss": 3.3511, "step": 3740 }, { "epoch": 25.16835016835017, "grad_norm": 180.00607299804688, "learning_rate": 9.957717586909414e-07, "loss": 3.0369, "step": 3750 }, { "epoch": 25.235690235690235, "grad_norm": 129.3335723876953, "learning_rate": 9.915435929748224e-07, "loss": 3.008, "step": 3760 }, { "epoch": 25.303030303030305, "grad_norm": 233.1388702392578, "learning_rate": 9.87315578443232e-07, "loss": 2.821, "step": 3770 }, { "epoch": 25.37037037037037, "grad_norm": 54.634727478027344, "learning_rate": 9.830877906850554e-07, "loss": 3.0287, "step": 3780 }, { "epoch": 25.437710437710436, "grad_norm": 127.29168701171875, "learning_rate": 9.788603052851235e-07, "loss": 3.1753, "step": 3790 }, { "epoch": 25.505050505050505, "grad_norm": 80.46566009521484, "learning_rate": 9.746331978228622e-07, "loss": 3.0996, "step": 3800 }, { "epoch": 25.505050505050505, "eval_loss": 0.3869335949420929, "eval_mae": 0.4272298514842987, "eval_mse": 0.3869335949420929, "eval_r2": 0.15364980697631836, "eval_rmse": 0.6220398660392217, "eval_runtime": 10.7626, "eval_samples_per_second": 441.249, "eval_steps_per_second": 13.844, "step": 3800 }, { "epoch": 25.57239057239057, "grad_norm": 163.5589599609375, "learning_rate": 9.704065438709408e-07, "loss": 2.8118, "step": 3810 }, { "epoch": 25.63973063973064, "grad_norm": 80.6302261352539, "learning_rate": 9.6618041899392e-07, "loss": 2.71, "step": 3820 }, { "epoch": 25.707070707070706, "grad_norm": 77.66490173339844, "learning_rate": 9.619548987469013e-07, "loss": 2.6487, "step": 3830 }, { "epoch": 25.774410774410775, "grad_norm": 120.21029663085938, "learning_rate": 9.577300586741774e-07, "loss": 3.039, "step": 3840 }, { "epoch": 25.84175084175084, "grad_norm": 28.7191219329834, "learning_rate": 9.535059743078808e-07, "loss": 3.2076, "step": 3850 }, { "epoch": 25.90909090909091, "grad_norm": 55.50111389160156, "learning_rate": 9.492827211666327e-07, "loss": 2.9919, "step": 3860 }, { "epoch": 25.976430976430976, "grad_norm": 67.55938720703125, "learning_rate": 9.450603747541951e-07, "loss": 3.0713, "step": 3870 }, { "epoch": 26.04040404040404, "grad_norm": 42.80801773071289, "learning_rate": 9.408390105581167e-07, "loss": 2.6671, "step": 3880 }, { "epoch": 26.107744107744107, "grad_norm": 31.176504135131836, "learning_rate": 9.366187040483885e-07, "loss": 2.8954, "step": 3890 }, { "epoch": 26.175084175084177, "grad_norm": 100.56493377685547, "learning_rate": 9.323995306760907e-07, "loss": 2.9897, "step": 3900 }, { "epoch": 26.175084175084177, "eval_loss": 0.38604193925857544, "eval_mae": 0.4292107820510864, "eval_mse": 0.38604193925857544, "eval_r2": 0.15560007095336914, "eval_rmse": 0.621322733576179, "eval_runtime": 10.758, "eval_samples_per_second": 441.439, "eval_steps_per_second": 13.85, "step": 3900 }, { "epoch": 26.242424242424242, "grad_norm": 93.59069061279297, "learning_rate": 9.281815658720465e-07, "loss": 3.2161, "step": 3910 }, { "epoch": 26.30976430976431, "grad_norm": 21.139225006103516, "learning_rate": 9.239648850454709e-07, "loss": 2.9308, "step": 3920 }, { "epoch": 26.377104377104377, "grad_norm": 107.8580551147461, "learning_rate": 9.197495635826238e-07, "loss": 2.6516, "step": 3930 }, { "epoch": 26.444444444444443, "grad_norm": 65.13081359863281, "learning_rate": 9.155356768454631e-07, "loss": 2.9378, "step": 3940 }, { "epoch": 26.511784511784512, "grad_norm": 63.552207946777344, "learning_rate": 9.113233001702963e-07, "loss": 3.2755, "step": 3950 }, { "epoch": 26.579124579124578, "grad_norm": 44.6680793762207, "learning_rate": 9.071125088664333e-07, "loss": 2.9736, "step": 3960 }, { "epoch": 26.646464646464647, "grad_norm": 67.10111236572266, "learning_rate": 9.029033782148415e-07, "loss": 3.0643, "step": 3970 }, { "epoch": 26.713804713804713, "grad_norm": 59.37371063232422, "learning_rate": 8.986959834667975e-07, "loss": 3.039, "step": 3980 }, { "epoch": 26.781144781144782, "grad_norm": 34.969398498535156, "learning_rate": 8.944903998425451e-07, "loss": 3.1565, "step": 3990 }, { "epoch": 26.848484848484848, "grad_norm": 101.75707244873047, "learning_rate": 8.902867025299474e-07, "loss": 3.1206, "step": 4000 }, { "epoch": 26.848484848484848, "eval_loss": 0.3837489187717438, "eval_mae": 0.42771080136299133, "eval_mse": 0.3837489187717438, "eval_r2": 0.1606156826019287, "eval_rmse": 0.6194747119711537, "eval_runtime": 10.7505, "eval_samples_per_second": 441.745, "eval_steps_per_second": 13.86, "step": 4000 }, { "epoch": 26.915824915824917, "grad_norm": 81.40385437011719, "learning_rate": 8.860849666831438e-07, "loss": 2.6408, "step": 4010 }, { "epoch": 26.983164983164983, "grad_norm": 66.9009780883789, "learning_rate": 8.818852674212074e-07, "loss": 2.6638, "step": 4020 }, { "epoch": 27.04713804713805, "grad_norm": 75.89901733398438, "learning_rate": 8.776876798267999e-07, "loss": 2.5141, "step": 4030 }, { "epoch": 27.114478114478114, "grad_norm": 93.8353271484375, "learning_rate": 8.734922789448309e-07, "loss": 3.0824, "step": 4040 }, { "epoch": 27.181818181818183, "grad_norm": 62.6941032409668, "learning_rate": 8.692991397811155e-07, "loss": 2.7452, "step": 4050 }, { "epoch": 27.24915824915825, "grad_norm": 103.0147476196289, "learning_rate": 8.651083373010344e-07, "loss": 2.7612, "step": 4060 }, { "epoch": 27.316498316498315, "grad_norm": 45.43682098388672, "learning_rate": 8.609199464281916e-07, "loss": 3.0658, "step": 4070 }, { "epoch": 27.383838383838384, "grad_norm": 85.02565002441406, "learning_rate": 8.571525184564588e-07, "loss": 2.905, "step": 4080 }, { "epoch": 27.45117845117845, "grad_norm": 59.70896530151367, "learning_rate": 8.529689158964044e-07, "loss": 3.2383, "step": 4090 }, { "epoch": 27.51851851851852, "grad_norm": 127.32763671875, "learning_rate": 8.487879419734325e-07, "loss": 2.6672, "step": 4100 }, { "epoch": 27.51851851851852, "eval_loss": 0.38550788164138794, "eval_mae": 0.43028977513313293, "eval_mse": 0.38550788164138794, "eval_r2": 0.15676826238632202, "eval_rmse": 0.6208928101060504, "eval_runtime": 10.7378, "eval_samples_per_second": 442.269, "eval_steps_per_second": 13.876, "step": 4100 }, { "epoch": 27.585858585858585, "grad_norm": 42.328224182128906, "learning_rate": 8.446096714354324e-07, "loss": 2.7042, "step": 4110 }, { "epoch": 27.653198653198654, "grad_norm": 112.47506713867188, "learning_rate": 8.4043417898196e-07, "loss": 3.0804, "step": 4120 }, { "epoch": 27.72053872053872, "grad_norm": 32.0652961730957, "learning_rate": 8.362615392629064e-07, "loss": 3.0835, "step": 4130 }, { "epoch": 27.78787878787879, "grad_norm": 73.1070556640625, "learning_rate": 8.320918268771603e-07, "loss": 2.9551, "step": 4140 }, { "epoch": 27.855218855218855, "grad_norm": 91.44757080078125, "learning_rate": 8.279251163712755e-07, "loss": 3.17, "step": 4150 }, { "epoch": 27.922558922558924, "grad_norm": 183.9940185546875, "learning_rate": 8.237614822381378e-07, "loss": 3.021, "step": 4160 }, { "epoch": 27.98989898989899, "grad_norm": 58.72197341918945, "learning_rate": 8.196009989156328e-07, "loss": 3.0196, "step": 4170 }, { "epoch": 28.053872053872055, "grad_norm": 69.7625961303711, "learning_rate": 8.154437407853161e-07, "loss": 2.5221, "step": 4180 }, { "epoch": 28.12121212121212, "grad_norm": 59.36160659790039, "learning_rate": 8.112897821710832e-07, "loss": 3.3001, "step": 4190 }, { "epoch": 28.188552188552187, "grad_norm": 111.34796142578125, "learning_rate": 8.071391973378401e-07, "loss": 3.0057, "step": 4200 }, { "epoch": 28.188552188552187, "eval_loss": 0.3824244737625122, "eval_mae": 0.42738208174705505, "eval_mse": 0.3824245035648346, "eval_r2": 0.16351264715194702, "eval_rmse": 0.618404805580321, "eval_runtime": 10.7624, "eval_samples_per_second": 441.258, "eval_steps_per_second": 13.844, "step": 4200 }, { "epoch": 28.255892255892256, "grad_norm": 223.50172424316406, "learning_rate": 8.029920604901761e-07, "loss": 3.2672, "step": 4210 }, { "epoch": 28.32323232323232, "grad_norm": 39.67815017700195, "learning_rate": 7.988484457710365e-07, "loss": 2.9091, "step": 4220 }, { "epoch": 28.39057239057239, "grad_norm": 102.6736831665039, "learning_rate": 7.947084272603987e-07, "loss": 3.2589, "step": 4230 }, { "epoch": 28.457912457912457, "grad_norm": 132.95700073242188, "learning_rate": 7.905720789739463e-07, "loss": 3.0782, "step": 4240 }, { "epoch": 28.525252525252526, "grad_norm": 88.89854431152344, "learning_rate": 7.864394748617454e-07, "loss": 2.6511, "step": 4250 }, { "epoch": 28.59259259259259, "grad_norm": 162.5079345703125, "learning_rate": 7.823106888069251e-07, "loss": 2.3771, "step": 4260 }, { "epoch": 28.65993265993266, "grad_norm": 43.4483642578125, "learning_rate": 7.781857946243528e-07, "loss": 3.0279, "step": 4270 }, { "epoch": 28.727272727272727, "grad_norm": 54.36713409423828, "learning_rate": 7.740648660593183e-07, "loss": 3.0879, "step": 4280 }, { "epoch": 28.794612794612796, "grad_norm": 119.59768676757812, "learning_rate": 7.699479767862125e-07, "loss": 2.9051, "step": 4290 }, { "epoch": 28.86195286195286, "grad_norm": 116.23331451416016, "learning_rate": 7.658352004072124e-07, "loss": 2.9287, "step": 4300 }, { "epoch": 28.86195286195286, "eval_loss": 0.38364121317863464, "eval_mae": 0.4301057755947113, "eval_mse": 0.38364121317863464, "eval_r2": 0.16085129976272583, "eval_rmse": 0.6193877728682047, "eval_runtime": 10.7597, "eval_samples_per_second": 441.371, "eval_steps_per_second": 13.848, "step": 4300 }, { "epoch": 28.92929292929293, "grad_norm": 73.90022277832031, "learning_rate": 7.617266104509639e-07, "loss": 3.1894, "step": 4310 }, { "epoch": 28.996632996632997, "grad_norm": 119.14134979248047, "learning_rate": 7.576222803712669e-07, "loss": 2.4577, "step": 4320 }, { "epoch": 29.060606060606062, "grad_norm": 70.203125, "learning_rate": 7.535222835457636e-07, "loss": 3.1381, "step": 4330 }, { "epoch": 29.127946127946128, "grad_norm": 55.90498733520508, "learning_rate": 7.494266932746257e-07, "loss": 2.9301, "step": 4340 }, { "epoch": 29.195286195286194, "grad_norm": 40.811649322509766, "learning_rate": 7.453355827792438e-07, "loss": 2.5312, "step": 4350 }, { "epoch": 29.262626262626263, "grad_norm": 77.29155731201172, "learning_rate": 7.412490252009195e-07, "loss": 2.8281, "step": 4360 }, { "epoch": 29.32996632996633, "grad_norm": 84.62451934814453, "learning_rate": 7.371670935995548e-07, "loss": 2.7472, "step": 4370 }, { "epoch": 29.397306397306398, "grad_norm": 117.51617431640625, "learning_rate": 7.330898609523501e-07, "loss": 2.731, "step": 4380 }, { "epoch": 29.464646464646464, "grad_norm": 71.5670394897461, "learning_rate": 7.290174001524966e-07, "loss": 2.8786, "step": 4390 }, { "epoch": 29.531986531986533, "grad_norm": 122.04474639892578, "learning_rate": 7.24949784007874e-07, "loss": 2.9772, "step": 4400 }, { "epoch": 29.531986531986533, "eval_loss": 0.38191521167755127, "eval_mae": 0.4278460144996643, "eval_mse": 0.38191521167755127, "eval_r2": 0.1646265983581543, "eval_rmse": 0.6179928896658531, "eval_runtime": 10.7459, "eval_samples_per_second": 441.936, "eval_steps_per_second": 13.866, "step": 4400 }, { "epoch": 29.5993265993266, "grad_norm": 104.42582702636719, "learning_rate": 7.208870852397487e-07, "loss": 2.9443, "step": 4410 }, { "epoch": 29.666666666666668, "grad_norm": 219.044677734375, "learning_rate": 7.168293764814734e-07, "loss": 3.0569, "step": 4420 }, { "epoch": 29.734006734006734, "grad_norm": 147.8644561767578, "learning_rate": 7.127767302771893e-07, "loss": 3.0951, "step": 4430 }, { "epoch": 29.801346801346803, "grad_norm": 214.3123321533203, "learning_rate": 7.087292190805289e-07, "loss": 3.1318, "step": 4440 }, { "epoch": 29.86868686868687, "grad_norm": 106.24519348144531, "learning_rate": 7.046869152533202e-07, "loss": 2.8645, "step": 4450 }, { "epoch": 29.936026936026934, "grad_norm": 96.06432342529297, "learning_rate": 7.006498910642931e-07, "loss": 2.8886, "step": 4460 }, { "epoch": 30.0, "grad_norm": 184.3991241455078, "learning_rate": 6.966182186877878e-07, "loss": 2.7943, "step": 4470 }, { "epoch": 30.067340067340066, "grad_norm": 31.826107025146484, "learning_rate": 6.925919702024645e-07, "loss": 2.8663, "step": 4480 }, { "epoch": 30.134680134680135, "grad_norm": 67.34457397460938, "learning_rate": 6.885712175900142e-07, "loss": 2.8324, "step": 4490 }, { "epoch": 30.2020202020202, "grad_norm": 82.29393768310547, "learning_rate": 6.845560327338725e-07, "loss": 3.048, "step": 4500 }, { "epoch": 30.2020202020202, "eval_loss": 0.3820202350616455, "eval_mae": 0.4293140172958374, "eval_mse": 0.3820202350616455, "eval_r2": 0.16439688205718994, "eval_rmse": 0.6180778551781689, "eval_runtime": 10.7423, "eval_samples_per_second": 442.084, "eval_steps_per_second": 13.87, "step": 4500 }, { "epoch": 30.26936026936027, "grad_norm": 146.4005584716797, "learning_rate": 6.805464874179328e-07, "loss": 2.6061, "step": 4510 }, { "epoch": 30.336700336700336, "grad_norm": 185.70030212402344, "learning_rate": 6.765426533252659e-07, "loss": 2.8834, "step": 4520 }, { "epoch": 30.404040404040405, "grad_norm": 146.14596557617188, "learning_rate": 6.725446020368357e-07, "loss": 2.9418, "step": 4530 }, { "epoch": 30.47138047138047, "grad_norm": 185.91603088378906, "learning_rate": 6.68552405030221e-07, "loss": 2.7839, "step": 4540 }, { "epoch": 30.53872053872054, "grad_norm": 46.523075103759766, "learning_rate": 6.645661336783373e-07, "loss": 2.8592, "step": 4550 }, { "epoch": 30.606060606060606, "grad_norm": 153.2547607421875, "learning_rate": 6.605858592481595e-07, "loss": 3.1252, "step": 4560 }, { "epoch": 30.673400673400675, "grad_norm": 51.85137176513672, "learning_rate": 6.566116528994503e-07, "loss": 2.9942, "step": 4570 }, { "epoch": 30.74074074074074, "grad_norm": 237.62030029296875, "learning_rate": 6.526435856834855e-07, "loss": 3.3278, "step": 4580 }, { "epoch": 30.80808080808081, "grad_norm": 183.8448028564453, "learning_rate": 6.486817285417859e-07, "loss": 2.8571, "step": 4590 }, { "epoch": 30.875420875420875, "grad_norm": 137.9438934326172, "learning_rate": 6.447261523048474e-07, "loss": 2.8851, "step": 4600 }, { "epoch": 30.875420875420875, "eval_loss": 0.3897983431816101, "eval_mae": 0.43571600317955017, "eval_mse": 0.3897983431816101, "eval_r2": 0.14738363027572632, "eval_rmse": 0.6243383242934957, "eval_runtime": 10.7311, "eval_samples_per_second": 442.546, "eval_steps_per_second": 13.885, "step": 4600 }, { "epoch": 30.94276094276094, "grad_norm": 117.74751281738281, "learning_rate": 6.407769276908748e-07, "loss": 3.0225, "step": 4610 }, { "epoch": 31.006734006734007, "grad_norm": 56.87082290649414, "learning_rate": 6.368341253045182e-07, "loss": 2.9127, "step": 4620 }, { "epoch": 31.074074074074073, "grad_norm": 66.84257507324219, "learning_rate": 6.328978156356112e-07, "loss": 2.7786, "step": 4630 }, { "epoch": 31.141414141414142, "grad_norm": 87.09172821044922, "learning_rate": 6.289680690579087e-07, "loss": 3.1569, "step": 4640 }, { "epoch": 31.208754208754208, "grad_norm": 124.70805358886719, "learning_rate": 6.250449558278316e-07, "loss": 2.8801, "step": 4650 }, { "epoch": 31.276094276094277, "grad_norm": 146.22955322265625, "learning_rate": 6.211285460832068e-07, "loss": 2.8026, "step": 4660 }, { "epoch": 31.343434343434343, "grad_norm": 54.569114685058594, "learning_rate": 6.172189098420175e-07, "loss": 2.7644, "step": 4670 }, { "epoch": 31.410774410774412, "grad_norm": 128.6959991455078, "learning_rate": 6.133161170011487e-07, "loss": 2.682, "step": 4680 }, { "epoch": 31.478114478114477, "grad_norm": 30.671436309814453, "learning_rate": 6.094202373351389e-07, "loss": 3.1795, "step": 4690 }, { "epoch": 31.545454545454547, "grad_norm": 140.56893920898438, "learning_rate": 6.055313404949317e-07, "loss": 2.9357, "step": 4700 }, { "epoch": 31.545454545454547, "eval_loss": 0.37819910049438477, "eval_mae": 0.4250909090042114, "eval_mse": 0.37819913029670715, "eval_r2": 0.1727549433708191, "eval_rmse": 0.6149789673612481, "eval_runtime": 10.7442, "eval_samples_per_second": 442.005, "eval_steps_per_second": 13.868, "step": 4700 }, { "epoch": 31.612794612794612, "grad_norm": 86.58052062988281, "learning_rate": 6.016494960066307e-07, "loss": 2.4946, "step": 4710 }, { "epoch": 31.68013468013468, "grad_norm": 142.02850341796875, "learning_rate": 5.977747732702575e-07, "loss": 2.7233, "step": 4720 }, { "epoch": 31.747474747474747, "grad_norm": 50.288997650146484, "learning_rate": 5.9390724155851e-07, "loss": 3.2112, "step": 4730 }, { "epoch": 31.814814814814813, "grad_norm": 128.9806671142578, "learning_rate": 5.900469700155242e-07, "loss": 2.9385, "step": 4740 }, { "epoch": 31.882154882154882, "grad_norm": 49.67361831665039, "learning_rate": 5.861940276556383e-07, "loss": 2.739, "step": 4750 }, { "epoch": 31.949494949494948, "grad_norm": 78.04632568359375, "learning_rate": 5.823484833621576e-07, "loss": 2.945, "step": 4760 }, { "epoch": 32.013468013468014, "grad_norm": 64.5036849975586, "learning_rate": 5.785104058861254e-07, "loss": 2.9391, "step": 4770 }, { "epoch": 32.08080808080808, "grad_norm": 15.103655815124512, "learning_rate": 5.746798638450912e-07, "loss": 2.7188, "step": 4780 }, { "epoch": 32.148148148148145, "grad_norm": 80.08917999267578, "learning_rate": 5.708569257218866e-07, "loss": 2.8637, "step": 4790 }, { "epoch": 32.215488215488215, "grad_norm": 248.5700225830078, "learning_rate": 5.670416598633986e-07, "loss": 2.9054, "step": 4800 }, { "epoch": 32.215488215488215, "eval_loss": 0.3832375705242157, "eval_mae": 0.43175598978996277, "eval_mse": 0.3832375407218933, "eval_r2": 0.1617342233657837, "eval_rmse": 0.619061823020846, "eval_runtime": 10.7476, "eval_samples_per_second": 441.867, "eval_steps_per_second": 13.864, "step": 4800 }, { "epoch": 32.282828282828284, "grad_norm": 32.218074798583984, "learning_rate": 5.632341344793484e-07, "loss": 2.4807, "step": 4810 }, { "epoch": 32.35016835016835, "grad_norm": 161.59373474121094, "learning_rate": 5.594344176410723e-07, "loss": 3.2664, "step": 4820 }, { "epoch": 32.417508417508415, "grad_norm": 29.827117919921875, "learning_rate": 5.556425772803057e-07, "loss": 2.897, "step": 4830 }, { "epoch": 32.484848484848484, "grad_norm": 34.79119873046875, "learning_rate": 5.518586811879666e-07, "loss": 2.4419, "step": 4840 }, { "epoch": 32.552188552188554, "grad_norm": 134.699951171875, "learning_rate": 5.480827970129447e-07, "loss": 2.9018, "step": 4850 }, { "epoch": 32.61952861952862, "grad_norm": 33.867557525634766, "learning_rate": 5.443149922608907e-07, "loss": 2.8539, "step": 4860 }, { "epoch": 32.686868686868685, "grad_norm": 35.65744400024414, "learning_rate": 5.405553342930129e-07, "loss": 2.8731, "step": 4870 }, { "epoch": 32.754208754208754, "grad_norm": 147.55613708496094, "learning_rate": 5.368038903248688e-07, "loss": 2.5096, "step": 4880 }, { "epoch": 32.821548821548824, "grad_norm": 146.02783203125, "learning_rate": 5.330607274251655e-07, "loss": 2.9092, "step": 4890 }, { "epoch": 32.888888888888886, "grad_norm": 47.434593200683594, "learning_rate": 5.293259125145604e-07, "loss": 2.9298, "step": 4900 }, { "epoch": 32.888888888888886, "eval_loss": 0.37756529450416565, "eval_mae": 0.4242517352104187, "eval_mse": 0.37756529450416565, "eval_r2": 0.1741412878036499, "eval_rmse": 0.6144634199886643, "eval_runtime": 10.7524, "eval_samples_per_second": 441.67, "eval_steps_per_second": 13.857, "step": 4900 }, { "epoch": 32.956228956228955, "grad_norm": 35.38823318481445, "learning_rate": 5.255995123644647e-07, "loss": 3.3772, "step": 4910 }, { "epoch": 33.02020202020202, "grad_norm": 151.10342407226562, "learning_rate": 5.218815935958497e-07, "loss": 3.2393, "step": 4920 }, { "epoch": 33.08754208754209, "grad_norm": 208.07186889648438, "learning_rate": 5.181722226780554e-07, "loss": 3.3841, "step": 4930 }, { "epoch": 33.15488215488215, "grad_norm": 25.361433029174805, "learning_rate": 5.144714659276027e-07, "loss": 2.9979, "step": 4940 }, { "epoch": 33.22222222222222, "grad_norm": 93.3867416381836, "learning_rate": 5.107793895070074e-07, "loss": 2.5716, "step": 4950 }, { "epoch": 33.28956228956229, "grad_norm": 137.14991760253906, "learning_rate": 5.070960594235975e-07, "loss": 2.8495, "step": 4960 }, { "epoch": 33.35690235690236, "grad_norm": 77.70392608642578, "learning_rate": 5.034215415283329e-07, "loss": 2.7135, "step": 4970 }, { "epoch": 33.42424242424242, "grad_norm": 74.28765106201172, "learning_rate": 4.997559015146286e-07, "loss": 2.4932, "step": 4980 }, { "epoch": 33.49158249158249, "grad_norm": 64.79545593261719, "learning_rate": 4.960992049171793e-07, "loss": 2.472, "step": 4990 }, { "epoch": 33.55892255892256, "grad_norm": 180.22076416015625, "learning_rate": 4.924515171107899e-07, "loss": 2.7926, "step": 5000 }, { "epoch": 33.55892255892256, "eval_loss": 0.3756909966468811, "eval_mae": 0.42366716265678406, "eval_mse": 0.3756909966468811, "eval_r2": 0.17824101448059082, "eval_rmse": 0.6129363724293747, "eval_runtime": 10.7279, "eval_samples_per_second": 442.677, "eval_steps_per_second": 13.889, "step": 5000 }, { "epoch": 33.62626262626263, "grad_norm": 171.50613403320312, "learning_rate": 4.88812903309203e-07, "loss": 2.9689, "step": 5010 }, { "epoch": 33.69360269360269, "grad_norm": 149.72354125976562, "learning_rate": 4.851834285639367e-07, "loss": 3.08, "step": 5020 }, { "epoch": 33.76094276094276, "grad_norm": 75.0073471069336, "learning_rate": 4.815631577631191e-07, "loss": 2.8218, "step": 5030 }, { "epoch": 33.82828282828283, "grad_norm": 72.0354995727539, "learning_rate": 4.779521556303309e-07, "loss": 3.301, "step": 5040 }, { "epoch": 33.89562289562289, "grad_norm": 93.960693359375, "learning_rate": 4.7435048672344515e-07, "loss": 2.9469, "step": 5050 }, { "epoch": 33.96296296296296, "grad_norm": 29.93295669555664, "learning_rate": 4.707582154334743e-07, "loss": 2.6876, "step": 5060 }, { "epoch": 34.02693602693603, "grad_norm": 40.92702865600586, "learning_rate": 4.6717540598341964e-07, "loss": 2.9483, "step": 5070 }, { "epoch": 34.0942760942761, "grad_norm": 112.44068145751953, "learning_rate": 4.636021224271237e-07, "loss": 3.0915, "step": 5080 }, { "epoch": 34.16161616161616, "grad_norm": 98.65592193603516, "learning_rate": 4.6003842864812314e-07, "loss": 2.7846, "step": 5090 }, { "epoch": 34.22895622895623, "grad_norm": 65.94137573242188, "learning_rate": 4.5648438835850665e-07, "loss": 2.7928, "step": 5100 }, { "epoch": 34.22895622895623, "eval_loss": 0.3830298185348511, "eval_mae": 0.4315975308418274, "eval_mse": 0.3830298185348511, "eval_r2": 0.16218864917755127, "eval_rmse": 0.6188940285176866, "eval_runtime": 10.7436, "eval_samples_per_second": 442.032, "eval_steps_per_second": 13.869, "step": 5100 }, { "epoch": 34.2962962962963, "grad_norm": 143.0979766845703, "learning_rate": 4.5294006509777884e-07, "loss": 2.6017, "step": 5110 }, { "epoch": 34.36363636363637, "grad_norm": 170.61619567871094, "learning_rate": 4.494055222317208e-07, "loss": 2.8222, "step": 5120 }, { "epoch": 34.43097643097643, "grad_norm": 40.840354919433594, "learning_rate": 4.458808229512594e-07, "loss": 2.5307, "step": 5130 }, { "epoch": 34.4983164983165, "grad_norm": 88.24830627441406, "learning_rate": 4.423660302713363e-07, "loss": 2.7962, "step": 5140 }, { "epoch": 34.56565656565657, "grad_norm": 188.69522094726562, "learning_rate": 4.3886120702978256e-07, "loss": 2.9804, "step": 5150 }, { "epoch": 34.63299663299663, "grad_norm": 61.585994720458984, "learning_rate": 4.3536641588619417e-07, "loss": 3.2635, "step": 5160 }, { "epoch": 34.7003367003367, "grad_norm": 46.10032272338867, "learning_rate": 4.318817193208122e-07, "loss": 3.0743, "step": 5170 }, { "epoch": 34.76767676767677, "grad_norm": 46.91140365600586, "learning_rate": 4.284071796334059e-07, "loss": 2.8551, "step": 5180 }, { "epoch": 34.83501683501684, "grad_norm": 155.6090545654297, "learning_rate": 4.2494285894215886e-07, "loss": 2.7375, "step": 5190 }, { "epoch": 34.9023569023569, "grad_norm": 48.37738800048828, "learning_rate": 4.214888191825584e-07, "loss": 3.0353, "step": 5200 }, { "epoch": 34.9023569023569, "eval_loss": 0.37545156478881836, "eval_mae": 0.4242088496685028, "eval_mse": 0.37545156478881836, "eval_r2": 0.1787647008895874, "eval_rmse": 0.6127410258737522, "eval_runtime": 10.7495, "eval_samples_per_second": 441.787, "eval_steps_per_second": 13.861, "step": 5200 }, { "epoch": 34.96969696969697, "grad_norm": 62.88576126098633, "learning_rate": 4.180451221062882e-07, "loss": 2.8411, "step": 5210 }, { "epoch": 35.033670033670035, "grad_norm": 161.2834930419922, "learning_rate": 4.1461182928012417e-07, "loss": 2.8538, "step": 5220 }, { "epoch": 35.101010101010104, "grad_norm": 63.775482177734375, "learning_rate": 4.111890020848343e-07, "loss": 3.0321, "step": 5230 }, { "epoch": 35.168350168350166, "grad_norm": 101.16211700439453, "learning_rate": 4.0777670171408087e-07, "loss": 2.6915, "step": 5240 }, { "epoch": 35.235690235690235, "grad_norm": 302.8909912109375, "learning_rate": 4.0437498917332613e-07, "loss": 2.6499, "step": 5250 }, { "epoch": 35.303030303030305, "grad_norm": 101.47705078125, "learning_rate": 4.009839252787426e-07, "loss": 2.7218, "step": 5260 }, { "epoch": 35.370370370370374, "grad_norm": 208.5972900390625, "learning_rate": 3.976035706561247e-07, "loss": 3.2448, "step": 5270 }, { "epoch": 35.437710437710436, "grad_norm": 83.21065521240234, "learning_rate": 3.9423398573980515e-07, "loss": 2.8995, "step": 5280 }, { "epoch": 35.505050505050505, "grad_norm": 34.19369888305664, "learning_rate": 3.9087523077157615e-07, "loss": 3.0643, "step": 5290 }, { "epoch": 35.572390572390574, "grad_norm": 111.35173797607422, "learning_rate": 3.875273657996091e-07, "loss": 2.9971, "step": 5300 }, { "epoch": 35.572390572390574, "eval_loss": 0.37566670775413513, "eval_mae": 0.4239530563354492, "eval_mse": 0.37566670775413513, "eval_r2": 0.1782941222190857, "eval_rmse": 0.6129165585576353, "eval_runtime": 10.7463, "eval_samples_per_second": 441.92, "eval_steps_per_second": 13.865, "step": 5300 }, { "epoch": 35.63973063973064, "grad_norm": 103.87537384033203, "learning_rate": 3.841904506773834e-07, "loss": 2.8861, "step": 5310 }, { "epoch": 35.707070707070706, "grad_norm": 164.16354370117188, "learning_rate": 3.8086454506261723e-07, "loss": 2.949, "step": 5320 }, { "epoch": 35.774410774410775, "grad_norm": 32.98365783691406, "learning_rate": 3.7754970841619815e-07, "loss": 2.9313, "step": 5330 }, { "epoch": 35.841750841750844, "grad_norm": 188.96311950683594, "learning_rate": 3.7424600000112237e-07, "loss": 2.8781, "step": 5340 }, { "epoch": 35.90909090909091, "grad_norm": 78.8215103149414, "learning_rate": 3.70953478881433e-07, "loss": 2.5224, "step": 5350 }, { "epoch": 35.976430976430976, "grad_norm": 27.901582717895508, "learning_rate": 3.676722039211678e-07, "loss": 2.4363, "step": 5360 }, { "epoch": 36.04040404040404, "grad_norm": 161.42800903320312, "learning_rate": 3.6440223378330294e-07, "loss": 2.7558, "step": 5370 }, { "epoch": 36.10774410774411, "grad_norm": 49.116817474365234, "learning_rate": 3.6114362692870613e-07, "loss": 3.0437, "step": 5380 }, { "epoch": 36.17508417508417, "grad_norm": 61.74159622192383, "learning_rate": 3.5789644161509103e-07, "loss": 2.6185, "step": 5390 }, { "epoch": 36.24242424242424, "grad_norm": 34.96356201171875, "learning_rate": 3.546607358959761e-07, "loss": 2.7923, "step": 5400 }, { "epoch": 36.24242424242424, "eval_loss": 0.3766416609287262, "eval_mae": 0.4259888529777527, "eval_mse": 0.3766416311264038, "eval_r2": 0.17616164684295654, "eval_rmse": 0.6137113581533291, "eval_runtime": 10.7546, "eval_samples_per_second": 441.579, "eval_steps_per_second": 13.855, "step": 5400 }, { "epoch": 36.30976430976431, "grad_norm": 53.39197540283203, "learning_rate": 3.5143656761964623e-07, "loss": 2.8981, "step": 5410 }, { "epoch": 36.377104377104374, "grad_norm": 153.09266662597656, "learning_rate": 3.482239944281187e-07, "loss": 2.7185, "step": 5420 }, { "epoch": 36.44444444444444, "grad_norm": 44.10917663574219, "learning_rate": 3.450230737561125e-07, "loss": 2.4566, "step": 5430 }, { "epoch": 36.51178451178451, "grad_norm": 94.13797760009766, "learning_rate": 3.41833862830022e-07, "loss": 3.1191, "step": 5440 }, { "epoch": 36.57912457912458, "grad_norm": 49.014251708984375, "learning_rate": 3.386564186668932e-07, "loss": 2.7294, "step": 5450 }, { "epoch": 36.64646464646464, "grad_norm": 93.96102142333984, "learning_rate": 3.3549079807340497e-07, "loss": 2.7542, "step": 5460 }, { "epoch": 36.71380471380471, "grad_norm": 125.30360412597656, "learning_rate": 3.3233705764485274e-07, "loss": 2.9831, "step": 5470 }, { "epoch": 36.78114478114478, "grad_norm": 65.4189682006836, "learning_rate": 3.291952537641374e-07, "loss": 2.9857, "step": 5480 }, { "epoch": 36.84848484848485, "grad_norm": 109.07911682128906, "learning_rate": 3.2606544260075763e-07, "loss": 2.6756, "step": 5490 }, { "epoch": 36.91582491582491, "grad_norm": 86.38882446289062, "learning_rate": 3.2294768010980365e-07, "loss": 3.291, "step": 5500 }, { "epoch": 36.91582491582491, "eval_loss": 0.37840497493743896, "eval_mae": 0.4273318350315094, "eval_mse": 0.37840497493743896, "eval_r2": 0.1723046898841858, "eval_rmse": 0.6151463036850982, "eval_runtime": 10.737, "eval_samples_per_second": 442.303, "eval_steps_per_second": 13.877, "step": 5500 }, { "epoch": 36.98316498316498, "grad_norm": 145.6561737060547, "learning_rate": 3.198420220309591e-07, "loss": 2.8071, "step": 5510 }, { "epoch": 37.04713804713805, "grad_norm": 44.88926315307617, "learning_rate": 3.167485238875035e-07, "loss": 2.6833, "step": 5520 }, { "epoch": 37.11447811447812, "grad_norm": 97.98082733154297, "learning_rate": 3.1366724098532036e-07, "loss": 2.9274, "step": 5530 }, { "epoch": 37.18181818181818, "grad_norm": 33.520530700683594, "learning_rate": 3.105982284119073e-07, "loss": 2.9766, "step": 5540 }, { "epoch": 37.24915824915825, "grad_norm": 110.31135559082031, "learning_rate": 3.075415410353911e-07, "loss": 2.5011, "step": 5550 }, { "epoch": 37.31649831649832, "grad_norm": 28.316444396972656, "learning_rate": 3.0449723350354804e-07, "loss": 2.7453, "step": 5560 }, { "epoch": 37.38383838383838, "grad_norm": 79.585205078125, "learning_rate": 3.01465360242827e-07, "loss": 2.6286, "step": 5570 }, { "epoch": 37.45117845117845, "grad_norm": 103.95126342773438, "learning_rate": 2.9844597545737427e-07, "loss": 2.9868, "step": 5580 }, { "epoch": 37.51851851851852, "grad_norm": 134.60902404785156, "learning_rate": 2.9543913312806713e-07, "loss": 2.8951, "step": 5590 }, { "epoch": 37.58585858585859, "grad_norm": 89.12059783935547, "learning_rate": 2.924448870115457e-07, "loss": 3.2247, "step": 5600 }, { "epoch": 37.58585858585859, "eval_loss": 0.37339305877685547, "eval_mae": 0.4228719472885132, "eval_mse": 0.3733930289745331, "eval_r2": 0.18326741456985474, "eval_rmse": 0.611058940671465, "eval_runtime": 10.7587, "eval_samples_per_second": 441.409, "eval_steps_per_second": 13.849, "step": 5600 }, { "epoch": 37.65319865319865, "grad_norm": 102.96305847167969, "learning_rate": 2.89463290639256e-07, "loss": 2.7327, "step": 5610 }, { "epoch": 37.72053872053872, "grad_norm": 44.858272552490234, "learning_rate": 2.864943973164889e-07, "loss": 2.7499, "step": 5620 }, { "epoch": 37.78787878787879, "grad_norm": 97.6167984008789, "learning_rate": 2.835382601214298e-07, "loss": 3.119, "step": 5630 }, { "epoch": 37.85521885521886, "grad_norm": 43.9401741027832, "learning_rate": 2.8059493190420793e-07, "loss": 2.9128, "step": 5640 }, { "epoch": 37.92255892255892, "grad_norm": 52.51983642578125, "learning_rate": 2.776644652859528e-07, "loss": 2.7952, "step": 5650 }, { "epoch": 37.98989898989899, "grad_norm": 108.1231689453125, "learning_rate": 2.7474691265785276e-07, "loss": 2.7146, "step": 5660 }, { "epoch": 38.053872053872055, "grad_norm": 48.32514953613281, "learning_rate": 2.7184232618021865e-07, "loss": 2.7099, "step": 5670 }, { "epoch": 38.121212121212125, "grad_norm": 48.511940002441406, "learning_rate": 2.689507577815506e-07, "loss": 2.6683, "step": 5680 }, { "epoch": 38.18855218855219, "grad_norm": 53.359745025634766, "learning_rate": 2.66072259157611e-07, "loss": 3.0955, "step": 5690 }, { "epoch": 38.255892255892256, "grad_norm": 90.22420501708984, "learning_rate": 2.6320688177049865e-07, "loss": 2.8065, "step": 5700 }, { "epoch": 38.255892255892256, "eval_loss": 0.3777092397212982, "eval_mae": 0.42853933572769165, "eval_mse": 0.3777092397212982, "eval_r2": 0.1738264560699463, "eval_rmse": 0.6145805396539157, "eval_runtime": 10.7488, "eval_samples_per_second": 441.818, "eval_steps_per_second": 13.862, "step": 5700 }, { "epoch": 38.323232323232325, "grad_norm": 55.44943618774414, "learning_rate": 2.6035467684773005e-07, "loss": 2.5728, "step": 5710 }, { "epoch": 38.39057239057239, "grad_norm": 260.0409851074219, "learning_rate": 2.575156953813226e-07, "loss": 3.5482, "step": 5720 }, { "epoch": 38.45791245791246, "grad_norm": 39.049354553222656, "learning_rate": 2.546899881268837e-07, "loss": 2.718, "step": 5730 }, { "epoch": 38.525252525252526, "grad_norm": 173.14947509765625, "learning_rate": 2.5187760560270264e-07, "loss": 2.7304, "step": 5740 }, { "epoch": 38.592592592592595, "grad_norm": 68.39891052246094, "learning_rate": 2.4907859808884836e-07, "loss": 2.8611, "step": 5750 }, { "epoch": 38.65993265993266, "grad_norm": 52.58188247680664, "learning_rate": 2.4629301562626937e-07, "loss": 2.7627, "step": 5760 }, { "epoch": 38.72727272727273, "grad_norm": 68.62056732177734, "learning_rate": 2.435209080158996e-07, "loss": 2.4222, "step": 5770 }, { "epoch": 38.794612794612796, "grad_norm": 166.2366943359375, "learning_rate": 2.407623248177696e-07, "loss": 2.6795, "step": 5780 }, { "epoch": 38.861952861952865, "grad_norm": 103.1070327758789, "learning_rate": 2.3801731535011703e-07, "loss": 2.7809, "step": 5790 }, { "epoch": 38.92929292929293, "grad_norm": 102.50167846679688, "learning_rate": 2.3528592868850828e-07, "loss": 3.1926, "step": 5800 }, { "epoch": 38.92929292929293, "eval_loss": 0.37297195196151733, "eval_mae": 0.4233986437320709, "eval_mse": 0.37297195196151733, "eval_r2": 0.18418848514556885, "eval_rmse": 0.610714296509847, "eval_runtime": 10.7415, "eval_samples_per_second": 442.117, "eval_steps_per_second": 13.871, "step": 5800 }, { "epoch": 38.996632996633, "grad_norm": 40.01347351074219, "learning_rate": 2.3256821366495903e-07, "loss": 2.9293, "step": 5810 }, { "epoch": 39.06060606060606, "grad_norm": 86.15675354003906, "learning_rate": 2.2986421886706308e-07, "loss": 2.3521, "step": 5820 }, { "epoch": 39.127946127946124, "grad_norm": 95.728515625, "learning_rate": 2.271739926371219e-07, "loss": 2.9931, "step": 5830 }, { "epoch": 39.195286195286194, "grad_norm": 166.3361358642578, "learning_rate": 2.2449758307128052e-07, "loss": 2.5709, "step": 5840 }, { "epoch": 39.26262626262626, "grad_norm": 84.29829406738281, "learning_rate": 2.2183503801866877e-07, "loss": 3.1111, "step": 5850 }, { "epoch": 39.32996632996633, "grad_norm": 48.81771469116211, "learning_rate": 2.1918640508054587e-07, "loss": 2.9855, "step": 5860 }, { "epoch": 39.397306397306394, "grad_norm": 38.09366989135742, "learning_rate": 2.1655173160944785e-07, "loss": 2.8368, "step": 5870 }, { "epoch": 39.464646464646464, "grad_norm": 145.6597442626953, "learning_rate": 2.1393106470834256e-07, "loss": 2.8089, "step": 5880 }, { "epoch": 39.53198653198653, "grad_norm": 79.72319030761719, "learning_rate": 2.113244512297858e-07, "loss": 3.0007, "step": 5890 }, { "epoch": 39.5993265993266, "grad_norm": 178.42645263671875, "learning_rate": 2.0873193777508669e-07, "loss": 2.5218, "step": 5900 }, { "epoch": 39.5993265993266, "eval_loss": 0.3723418414592743, "eval_mae": 0.42274782061576843, "eval_mse": 0.37234190106391907, "eval_r2": 0.18556654453277588, "eval_rmse": 0.610198247345827, "eval_runtime": 10.7462, "eval_samples_per_second": 441.922, "eval_steps_per_second": 13.865, "step": 5900 }, { "epoch": 39.666666666666664, "grad_norm": 107.31372833251953, "learning_rate": 2.0615357069347104e-07, "loss": 2.8515, "step": 5910 }, { "epoch": 39.73400673400673, "grad_norm": 127.34188079833984, "learning_rate": 2.0358939608125525e-07, "loss": 3.3081, "step": 5920 }, { "epoch": 39.8013468013468, "grad_norm": 53.725589752197266, "learning_rate": 2.0103945978102065e-07, "loss": 3.04, "step": 5930 }, { "epoch": 39.86868686868687, "grad_norm": 103.34222412109375, "learning_rate": 1.9850380738079486e-07, "loss": 2.7212, "step": 5940 }, { "epoch": 39.936026936026934, "grad_norm": 130.62130737304688, "learning_rate": 1.959824842132365e-07, "loss": 2.5827, "step": 5950 }, { "epoch": 40.0, "grad_norm": 141.81289672851562, "learning_rate": 1.9347553535482463e-07, "loss": 2.5485, "step": 5960 }, { "epoch": 40.06734006734007, "grad_norm": 106.22374725341797, "learning_rate": 1.9098300562505264e-07, "loss": 2.7741, "step": 5970 }, { "epoch": 40.13468013468013, "grad_norm": 122.31490325927734, "learning_rate": 1.885049395856275e-07, "loss": 2.5972, "step": 5980 }, { "epoch": 40.2020202020202, "grad_norm": 93.01054382324219, "learning_rate": 1.8604138153967296e-07, "loss": 3.2723, "step": 5990 }, { "epoch": 40.26936026936027, "grad_norm": 64.74946594238281, "learning_rate": 1.8359237553093698e-07, "loss": 2.9944, "step": 6000 }, { "epoch": 40.26936026936027, "eval_loss": 0.3726748526096344, "eval_mae": 0.4227724075317383, "eval_mse": 0.3726748526096344, "eval_r2": 0.18483829498291016, "eval_rmse": 0.6104710088199393, "eval_runtime": 10.7294, "eval_samples_per_second": 442.617, "eval_steps_per_second": 13.887, "step": 6000 }, { "epoch": 40.33670033670034, "grad_norm": 52.92274856567383, "learning_rate": 1.8115796534300477e-07, "loss": 2.9406, "step": 6010 }, { "epoch": 40.4040404040404, "grad_norm": 101.93619537353516, "learning_rate": 1.787381944985159e-07, "loss": 2.8226, "step": 6020 }, { "epoch": 40.47138047138047, "grad_norm": 63.790889739990234, "learning_rate": 1.7633310625838704e-07, "loss": 2.9521, "step": 6030 }, { "epoch": 40.53872053872054, "grad_norm": 98.60453033447266, "learning_rate": 1.7394274362103622e-07, "loss": 2.943, "step": 6040 }, { "epoch": 40.60606060606061, "grad_norm": 87.70211791992188, "learning_rate": 1.7156714932161674e-07, "loss": 2.537, "step": 6050 }, { "epoch": 40.67340067340067, "grad_norm": 107.02592468261719, "learning_rate": 1.6920636583125125e-07, "loss": 2.6045, "step": 6060 }, { "epoch": 40.74074074074074, "grad_norm": 42.7766227722168, "learning_rate": 1.6686043535627436e-07, "loss": 3.047, "step": 6070 }, { "epoch": 40.80808080808081, "grad_norm": 117.37095642089844, "learning_rate": 1.6476183192651415e-07, "loss": 2.9263, "step": 6080 }, { "epoch": 40.87542087542087, "grad_norm": 37.20676040649414, "learning_rate": 1.6244423750757542e-07, "loss": 2.7599, "step": 6090 }, { "epoch": 40.94276094276094, "grad_norm": 80.41845703125, "learning_rate": 1.601416169980616e-07, "loss": 2.4423, "step": 6100 }, { "epoch": 40.94276094276094, "eval_loss": 0.37364861369132996, "eval_mae": 0.4246378540992737, "eval_mse": 0.37364861369132996, "eval_r2": 0.18270838260650635, "eval_rmse": 0.6112680375181824, "eval_runtime": 10.8799, "eval_samples_per_second": 436.495, "eval_steps_per_second": 13.695, "step": 6100 }, { "epoch": 41.00673400673401, "grad_norm": 107.51161193847656, "learning_rate": 1.5785401156446264e-07, "loss": 2.7465, "step": 6110 }, { "epoch": 41.074074074074076, "grad_norm": 34.13328552246094, "learning_rate": 1.5558146210482758e-07, "loss": 2.8836, "step": 6120 }, { "epoch": 41.14141414141414, "grad_norm": 83.3028564453125, "learning_rate": 1.5332400924803312e-07, "loss": 2.6263, "step": 6130 }, { "epoch": 41.20875420875421, "grad_norm": 83.10533905029297, "learning_rate": 1.510816933530571e-07, "loss": 2.8704, "step": 6140 }, { "epoch": 41.27609427609428, "grad_norm": 161.7142333984375, "learning_rate": 1.4885455450825734e-07, "loss": 2.7706, "step": 6150 }, { "epoch": 41.343434343434346, "grad_norm": 109.85543823242188, "learning_rate": 1.466426325306548e-07, "loss": 2.9424, "step": 6160 }, { "epoch": 41.41077441077441, "grad_norm": 82.26014709472656, "learning_rate": 1.4444596696522226e-07, "loss": 2.6249, "step": 6170 }, { "epoch": 41.47811447811448, "grad_norm": 135.5589141845703, "learning_rate": 1.4226459708417539e-07, "loss": 2.6573, "step": 6180 }, { "epoch": 41.54545454545455, "grad_norm": 152.521240234375, "learning_rate": 1.400985618862731e-07, "loss": 3.0272, "step": 6190 }, { "epoch": 41.612794612794616, "grad_norm": 78.32645416259766, "learning_rate": 1.3794790009611888e-07, "loss": 2.8652, "step": 6200 }, { "epoch": 41.612794612794616, "eval_loss": 0.37212684750556946, "eval_mae": 0.42309343814849854, "eval_mse": 0.37212684750556946, "eval_r2": 0.18603694438934326, "eval_rmse": 0.6100220057551772, "eval_runtime": 10.9555, "eval_samples_per_second": 433.482, "eval_steps_per_second": 13.601, "step": 6200 }, { "epoch": 41.68013468013468, "grad_norm": 49.81602478027344, "learning_rate": 1.358126501634692e-07, "loss": 2.8454, "step": 6210 }, { "epoch": 41.74747474747475, "grad_norm": 47.936038970947266, "learning_rate": 1.336928502625455e-07, "loss": 2.8673, "step": 6220 }, { "epoch": 41.81481481481482, "grad_norm": 48.5448112487793, "learning_rate": 1.315885382913514e-07, "loss": 2.7582, "step": 6230 }, { "epoch": 41.88215488215488, "grad_norm": 120.21739196777344, "learning_rate": 1.2949975187099616e-07, "loss": 3.0579, "step": 6240 }, { "epoch": 41.94949494949495, "grad_norm": 88.11009216308594, "learning_rate": 1.274265283450221e-07, "loss": 2.9038, "step": 6250 }, { "epoch": 42.013468013468014, "grad_norm": 77.09064483642578, "learning_rate": 1.2536890477873574e-07, "loss": 2.5295, "step": 6260 }, { "epoch": 42.08080808080808, "grad_norm": 44.63343811035156, "learning_rate": 1.2332691795854644e-07, "loss": 2.9913, "step": 6270 }, { "epoch": 42.148148148148145, "grad_norm": 118.18333435058594, "learning_rate": 1.2130060439130728e-07, "loss": 2.8033, "step": 6280 }, { "epoch": 42.215488215488215, "grad_norm": 146.40643310546875, "learning_rate": 1.1929000030366444e-07, "loss": 2.8309, "step": 6290 }, { "epoch": 42.282828282828284, "grad_norm": 35.69454574584961, "learning_rate": 1.1729514164140775e-07, "loss": 2.3939, "step": 6300 }, { "epoch": 42.282828282828284, "eval_loss": 0.3719646632671356, "eval_mae": 0.42259886860847473, "eval_mse": 0.3719646632671356, "eval_r2": 0.1863917112350464, "eval_rmse": 0.6098890581631512, "eval_runtime": 10.9778, "eval_samples_per_second": 432.602, "eval_steps_per_second": 13.573, "step": 6300 }, { "epoch": 42.35016835016835, "grad_norm": 100.19771575927734, "learning_rate": 1.1531606406882876e-07, "loss": 3.1607, "step": 6310 }, { "epoch": 42.417508417508415, "grad_norm": 109.58394622802734, "learning_rate": 1.1335280296808313e-07, "loss": 2.6647, "step": 6320 }, { "epoch": 42.484848484848484, "grad_norm": 82.37150573730469, "learning_rate": 1.1140539343855781e-07, "loss": 3.2622, "step": 6330 }, { "epoch": 42.552188552188554, "grad_norm": 53.46453094482422, "learning_rate": 1.0947387029624388e-07, "loss": 2.8023, "step": 6340 }, { "epoch": 42.61952861952862, "grad_norm": 124.7150650024414, "learning_rate": 1.0755826807311385e-07, "loss": 2.7507, "step": 6350 }, { "epoch": 42.686868686868685, "grad_norm": 32.7980842590332, "learning_rate": 1.0565862101650447e-07, "loss": 2.5136, "step": 6360 }, { "epoch": 42.754208754208754, "grad_norm": 71.90750885009766, "learning_rate": 1.037749630885042e-07, "loss": 2.9655, "step": 6370 }, { "epoch": 42.821548821548824, "grad_norm": 101.39800262451172, "learning_rate": 1.0190732796534629e-07, "loss": 2.9251, "step": 6380 }, { "epoch": 42.888888888888886, "grad_norm": 41.80552291870117, "learning_rate": 1.0005574903680669e-07, "loss": 2.888, "step": 6390 }, { "epoch": 42.956228956228955, "grad_norm": 103.90235137939453, "learning_rate": 9.822025940560674e-08, "loss": 2.6957, "step": 6400 }, { "epoch": 42.956228956228955, "eval_loss": 0.3716561496257782, "eval_mae": 0.42255306243896484, "eval_mse": 0.3716561496257782, "eval_r2": 0.18706649541854858, "eval_rmse": 0.6096360796621032, "eval_runtime": 11.0123, "eval_samples_per_second": 431.244, "eval_steps_per_second": 13.53, "step": 6400 }, { "epoch": 43.02020202020202, "grad_norm": 49.07634735107422, "learning_rate": 9.640089188682177e-08, "loss": 2.6157, "step": 6410 }, { "epoch": 43.08754208754209, "grad_norm": 27.628662109375, "learning_rate": 9.459767900729499e-08, "loss": 2.7826, "step": 6420 }, { "epoch": 43.15488215488215, "grad_norm": 36.1707763671875, "learning_rate": 9.281065300505386e-08, "loss": 2.4855, "step": 6430 }, { "epoch": 43.22222222222222, "grad_norm": 120.8755111694336, "learning_rate": 9.103984582873637e-08, "loss": 3.0959, "step": 6440 }, { "epoch": 43.28956228956229, "grad_norm": 84.20408630371094, "learning_rate": 8.928528913701782e-08, "loss": 2.7293, "step": 6450 }, { "epoch": 43.35690235690236, "grad_norm": 65.10899353027344, "learning_rate": 8.754701429804656e-08, "loss": 2.8868, "step": 6460 }, { "epoch": 43.42424242424242, "grad_norm": 134.81903076171875, "learning_rate": 8.582505238888182e-08, "loss": 2.9917, "step": 6470 }, { "epoch": 43.49158249158249, "grad_norm": 70.17864990234375, "learning_rate": 8.411943419493795e-08, "loss": 2.7381, "step": 6480 }, { "epoch": 43.55892255892256, "grad_norm": 51.37734603881836, "learning_rate": 8.243019020943543e-08, "loss": 2.5999, "step": 6490 }, { "epoch": 43.62626262626263, "grad_norm": 93.85679626464844, "learning_rate": 8.075735063285493e-08, "loss": 2.7533, "step": 6500 }, { "epoch": 43.62626262626263, "eval_loss": 0.3717193901538849, "eval_mae": 0.4222215712070465, "eval_mse": 0.3717193901538849, "eval_r2": 0.18692821264266968, "eval_rmse": 0.6096879448979493, "eval_runtime": 10.9898, "eval_samples_per_second": 432.128, "eval_steps_per_second": 13.558, "step": 6500 }, { "epoch": 43.69360269360269, "grad_norm": 137.6927490234375, "learning_rate": 7.910094537239698e-08, "loss": 2.8405, "step": 6510 }, { "epoch": 43.76094276094276, "grad_norm": 51.25269317626953, "learning_rate": 7.746100404144818e-08, "loss": 2.869, "step": 6520 }, { "epoch": 43.82828282828283, "grad_norm": 50.13993453979492, "learning_rate": 7.583755595905039e-08, "loss": 2.637, "step": 6530 }, { "epoch": 43.89562289562289, "grad_norm": 71.79419708251953, "learning_rate": 7.423063014937847e-08, "loss": 2.9598, "step": 6540 }, { "epoch": 43.96296296296296, "grad_norm": 146.35073852539062, "learning_rate": 7.264025534121987e-08, "loss": 2.8424, "step": 6550 }, { "epoch": 44.02693602693603, "grad_norm": 96.68115997314453, "learning_rate": 7.106645996746164e-08, "loss": 2.7784, "step": 6560 }, { "epoch": 44.0942760942761, "grad_norm": 32.26988983154297, "learning_rate": 6.950927216458158e-08, "loss": 3.0933, "step": 6570 }, { "epoch": 44.16161616161616, "grad_norm": 76.17391204833984, "learning_rate": 6.796871977214624e-08, "loss": 3.0585, "step": 6580 }, { "epoch": 44.22895622895623, "grad_norm": 53.91404724121094, "learning_rate": 6.644483033231207e-08, "loss": 2.7142, "step": 6590 }, { "epoch": 44.2962962962963, "grad_norm": 91.60089111328125, "learning_rate": 6.49376310893338e-08, "loss": 2.6689, "step": 6600 }, { "epoch": 44.2962962962963, "eval_loss": 0.37167152762413025, "eval_mae": 0.4226686656475067, "eval_mse": 0.37167155742645264, "eval_r2": 0.1870328187942505, "eval_rmse": 0.6096487164149963, "eval_runtime": 10.9216, "eval_samples_per_second": 434.826, "eval_steps_per_second": 13.643, "step": 6600 }, { "epoch": 44.36363636363637, "grad_norm": 196.50582885742188, "learning_rate": 6.344714898907688e-08, "loss": 2.7124, "step": 6610 }, { "epoch": 44.43097643097643, "grad_norm": 56.524658203125, "learning_rate": 6.19734106785359e-08, "loss": 3.0022, "step": 6620 }, { "epoch": 44.4983164983165, "grad_norm": 48.41875076293945, "learning_rate": 6.051644250535858e-08, "loss": 2.8355, "step": 6630 }, { "epoch": 44.56565656565657, "grad_norm": 139.86813354492188, "learning_rate": 5.9076270517373964e-08, "loss": 2.6059, "step": 6640 }, { "epoch": 44.63299663299663, "grad_norm": 124.6146011352539, "learning_rate": 5.765292046212744e-08, "loss": 2.9644, "step": 6650 }, { "epoch": 44.7003367003367, "grad_norm": 83.5776596069336, "learning_rate": 5.624641778641981e-08, "loss": 2.8075, "step": 6660 }, { "epoch": 44.76767676767677, "grad_norm": 75.65435028076172, "learning_rate": 5.485678763585311e-08, "loss": 2.592, "step": 6670 }, { "epoch": 44.83501683501684, "grad_norm": 84.64730072021484, "learning_rate": 5.3484054854380126e-08, "loss": 2.7111, "step": 6680 }, { "epoch": 44.9023569023569, "grad_norm": 70.59678649902344, "learning_rate": 5.2128243983861065e-08, "loss": 2.6287, "step": 6690 }, { "epoch": 44.96969696969697, "grad_norm": 55.02358627319336, "learning_rate": 5.0789379263624146e-08, "loss": 3.1519, "step": 6700 }, { "epoch": 44.96969696969697, "eval_loss": 0.3716820478439331, "eval_mae": 0.42267873883247375, "eval_mse": 0.3716820478439331, "eval_r2": 0.18700987100601196, "eval_rmse": 0.6096573200117695, "eval_runtime": 10.7664, "eval_samples_per_second": 441.094, "eval_steps_per_second": 13.839, "step": 6700 }, { "epoch": 45.033670033670035, "grad_norm": 107.63990020751953, "learning_rate": 4.946748463003314e-08, "loss": 2.8741, "step": 6710 }, { "epoch": 45.101010101010104, "grad_norm": 35.897735595703125, "learning_rate": 4.816258371605797e-08, "loss": 2.8766, "step": 6720 }, { "epoch": 45.168350168350166, "grad_norm": 25.684629440307617, "learning_rate": 4.687469985085368e-08, "loss": 2.7255, "step": 6730 }, { "epoch": 45.235690235690235, "grad_norm": 56.81410217285156, "learning_rate": 4.5603856059342364e-08, "loss": 2.692, "step": 6740 }, { "epoch": 45.303030303030305, "grad_norm": 60.109886169433594, "learning_rate": 4.435007506180244e-08, "loss": 2.7506, "step": 6750 }, { "epoch": 45.370370370370374, "grad_norm": 42.9677619934082, "learning_rate": 4.311337927346159e-08, "loss": 2.9639, "step": 6760 }, { "epoch": 45.437710437710436, "grad_norm": 54.244117736816406, "learning_rate": 4.189379080409594e-08, "loss": 2.6316, "step": 6770 }, { "epoch": 45.505050505050505, "grad_norm": 87.54462432861328, "learning_rate": 4.069133145763559e-08, "loss": 2.4867, "step": 6780 }, { "epoch": 45.572390572390574, "grad_norm": 107.06570434570312, "learning_rate": 3.950602273177472e-08, "loss": 2.8616, "step": 6790 }, { "epoch": 45.63973063973064, "grad_norm": 81.43595886230469, "learning_rate": 3.8337885817586345e-08, "loss": 3.1548, "step": 6800 }, { "epoch": 45.63973063973064, "eval_loss": 0.3716236352920532, "eval_mae": 0.4226154386997223, "eval_mse": 0.3716236352920532, "eval_r2": 0.1871376633644104, "eval_rmse": 0.6096094120763337, "eval_runtime": 10.7519, "eval_samples_per_second": 441.688, "eval_steps_per_second": 13.858, "step": 6800 }, { "epoch": 45.707070707070706, "grad_norm": 41.66019821166992, "learning_rate": 3.718694159914437e-08, "loss": 2.8004, "step": 6810 }, { "epoch": 45.774410774410775, "grad_norm": 99.3890151977539, "learning_rate": 3.605321065314948e-08, "loss": 2.7016, "step": 6820 }, { "epoch": 45.841750841750844, "grad_norm": 100.44307708740234, "learning_rate": 3.49367132485624e-08, "loss": 2.9284, "step": 6830 }, { "epoch": 45.90909090909091, "grad_norm": 167.5367431640625, "learning_rate": 3.383746934623999e-08, "loss": 2.6818, "step": 6840 }, { "epoch": 45.976430976430976, "grad_norm": 85.18693542480469, "learning_rate": 3.2755498598579845e-08, "loss": 3.0122, "step": 6850 }, { "epoch": 46.04040404040404, "grad_norm": 48.68128204345703, "learning_rate": 3.169082034916792e-08, "loss": 2.4389, "step": 6860 }, { "epoch": 46.10774410774411, "grad_norm": 35.80268478393555, "learning_rate": 3.064345363243326e-08, "loss": 2.6543, "step": 6870 }, { "epoch": 46.17508417508417, "grad_norm": 115.9142074584961, "learning_rate": 2.961341717330734e-08, "loss": 2.5028, "step": 6880 }, { "epoch": 46.24242424242424, "grad_norm": 24.52016258239746, "learning_rate": 2.8600729386889623e-08, "loss": 2.8779, "step": 6890 }, { "epoch": 46.30976430976431, "grad_norm": 55.08308410644531, "learning_rate": 2.7605408378118088e-08, "loss": 2.9138, "step": 6900 }, { "epoch": 46.30976430976431, "eval_loss": 0.37138164043426514, "eval_mae": 0.42244964838027954, "eval_mse": 0.3713816702365875, "eval_r2": 0.1876668930053711, "eval_rmse": 0.6094109206738813, "eval_runtime": 10.7468, "eval_samples_per_second": 441.901, "eval_steps_per_second": 13.865, "step": 6900 }, { "epoch": 46.377104377104374, "grad_norm": 37.61576843261719, "learning_rate": 2.6627471941445988e-08, "loss": 2.8863, "step": 6910 }, { "epoch": 46.44444444444444, "grad_norm": 190.95191955566406, "learning_rate": 2.566693756052274e-08, "loss": 2.924, "step": 6920 }, { "epoch": 46.51178451178451, "grad_norm": 73.8837890625, "learning_rate": 2.472382240788262e-08, "loss": 2.6765, "step": 6930 }, { "epoch": 46.57912457912458, "grad_norm": 39.89628982543945, "learning_rate": 2.3798143344636813e-08, "loss": 2.9649, "step": 6940 }, { "epoch": 46.64646464646464, "grad_norm": 81.95803833007812, "learning_rate": 2.288991692017217e-08, "loss": 2.8988, "step": 6950 }, { "epoch": 46.71380471380471, "grad_norm": 66.20928955078125, "learning_rate": 2.1999159371855813e-08, "loss": 2.8271, "step": 6960 }, { "epoch": 46.78114478114478, "grad_norm": 151.99559020996094, "learning_rate": 2.1125886624744126e-08, "loss": 3.211, "step": 6970 }, { "epoch": 46.84848484848485, "grad_norm": 70.59669494628906, "learning_rate": 2.0270114291298547e-08, "loss": 2.7698, "step": 6980 }, { "epoch": 46.91582491582491, "grad_norm": 161.56056213378906, "learning_rate": 1.9431857671106112e-08, "loss": 2.8948, "step": 6990 }, { "epoch": 46.98316498316498, "grad_norm": 65.89572143554688, "learning_rate": 1.8611131750606467e-08, "loss": 2.4984, "step": 7000 }, { "epoch": 46.98316498316498, "eval_loss": 0.37131446599960327, "eval_mae": 0.4225575923919678, "eval_mse": 0.37131446599960327, "eval_r2": 0.1878138780593872, "eval_rmse": 0.6093557794914259, "eval_runtime": 10.7566, "eval_samples_per_second": 441.497, "eval_steps_per_second": 13.852, "step": 7000 }, { "epoch": 47.04713804713805, "grad_norm": 90.17697143554688, "learning_rate": 1.7807951202823413e-08, "loss": 2.823, "step": 7010 }, { "epoch": 47.11447811447812, "grad_norm": 84.1099624633789, "learning_rate": 1.7022330387102436e-08, "loss": 2.621, "step": 7020 }, { "epoch": 47.18181818181818, "grad_norm": 140.962158203125, "learning_rate": 1.6254283348854704e-08, "loss": 2.6904, "step": 7030 }, { "epoch": 47.24915824915825, "grad_norm": 169.4842529296875, "learning_rate": 1.5503823819305374e-08, "loss": 2.736, "step": 7040 }, { "epoch": 47.31649831649832, "grad_norm": 55.19134521484375, "learning_rate": 1.4770965215248343e-08, "loss": 2.9752, "step": 7050 }, { "epoch": 47.38383838383838, "grad_norm": 143.83779907226562, "learning_rate": 1.4055720638806446e-08, "loss": 2.7289, "step": 7060 }, { "epoch": 47.45117845117845, "grad_norm": 45.39239501953125, "learning_rate": 1.3358102877196519e-08, "loss": 2.6576, "step": 7070 }, { "epoch": 47.51851851851852, "grad_norm": 55.785423278808594, "learning_rate": 1.2678124402502265e-08, "loss": 2.8254, "step": 7080 }, { "epoch": 47.58585858585859, "grad_norm": 81.49543762207031, "learning_rate": 1.2015797371449643e-08, "loss": 2.6002, "step": 7090 }, { "epoch": 47.65319865319865, "grad_norm": 38.34442901611328, "learning_rate": 1.1371133625190488e-08, "loss": 2.7111, "step": 7100 }, { "epoch": 47.65319865319865, "eval_loss": 0.3713144361972809, "eval_mae": 0.42249104380607605, "eval_mse": 0.3713144361972809, "eval_r2": 0.18781393766403198, "eval_rmse": 0.6093557550374665, "eval_runtime": 10.763, "eval_samples_per_second": 441.236, "eval_steps_per_second": 13.844, "step": 7100 }, { "epoch": 47.72053872053872, "grad_norm": 166.0770263671875, "learning_rate": 1.0744144689090795e-08, "loss": 2.8729, "step": 7110 }, { "epoch": 47.78787878787879, "grad_norm": 45.8447380065918, "learning_rate": 1.0134841772523994e-08, "loss": 3.2242, "step": 7120 }, { "epoch": 47.85521885521886, "grad_norm": 107.69355010986328, "learning_rate": 9.543235768671554e-09, "loss": 2.8122, "step": 7130 }, { "epoch": 47.92255892255892, "grad_norm": 45.992977142333984, "learning_rate": 8.969337254327246e-09, "loss": 2.9235, "step": 7140 }, { "epoch": 47.98989898989899, "grad_norm": 17.769834518432617, "learning_rate": 8.413156489708862e-09, "loss": 2.9913, "step": 7150 }, { "epoch": 48.053872053872055, "grad_norm": 213.13272094726562, "learning_rate": 7.874703418273898e-09, "loss": 2.7554, "step": 7160 }, { "epoch": 48.121212121212125, "grad_norm": 125.01367950439453, "learning_rate": 7.3539876665429375e-09, "loss": 2.8757, "step": 7170 }, { "epoch": 48.18855218855219, "grad_norm": 76.245849609375, "learning_rate": 6.851018543926334e-09, "loss": 2.608, "step": 7180 }, { "epoch": 48.255892255892256, "grad_norm": 119.15921020507812, "learning_rate": 6.365805042558903e-09, "loss": 3.1432, "step": 7190 }, { "epoch": 48.323232323232325, "grad_norm": 54.19501876831055, "learning_rate": 5.8983558371379364e-09, "loss": 3.0955, "step": 7200 }, { "epoch": 48.323232323232325, "eval_loss": 0.37137892842292786, "eval_mae": 0.4226270020008087, "eval_mse": 0.37137892842292786, "eval_r2": 0.1876729130744934, "eval_rmse": 0.6094086711090743, "eval_runtime": 10.7633, "eval_samples_per_second": 441.221, "eval_steps_per_second": 13.843, "step": 7200 }, { "epoch": 48.39057239057239, "grad_norm": 51.42652893066406, "learning_rate": 5.448679284769553e-09, "loss": 2.7695, "step": 7210 }, { "epoch": 48.45791245791246, "grad_norm": 64.4932861328125, "learning_rate": 5.016783424817816e-09, "loss": 2.7434, "step": 7220 }, { "epoch": 48.525252525252526, "grad_norm": 192.0025634765625, "learning_rate": 4.602675978762294e-09, "loss": 2.637, "step": 7230 }, { "epoch": 48.592592592592595, "grad_norm": 234.6620635986328, "learning_rate": 4.206364350058944e-09, "loss": 2.6175, "step": 7240 }, { "epoch": 48.65993265993266, "grad_norm": 108.40985870361328, "learning_rate": 3.827855624008558e-09, "loss": 2.5273, "step": 7250 }, { "epoch": 48.72727272727273, "grad_norm": 49.99565505981445, "learning_rate": 3.4671565676298588e-09, "loss": 3.0459, "step": 7260 }, { "epoch": 48.794612794612796, "grad_norm": 46.025779724121094, "learning_rate": 3.124273629537932e-09, "loss": 2.6207, "step": 7270 }, { "epoch": 48.861952861952865, "grad_norm": 67.03982543945312, "learning_rate": 2.799212939829987e-09, "loss": 2.9734, "step": 7280 }, { "epoch": 48.92929292929293, "grad_norm": 48.78995132446289, "learning_rate": 2.4919803099749946e-09, "loss": 2.6288, "step": 7290 }, { "epoch": 48.996632996633, "grad_norm": 97.65779876708984, "learning_rate": 2.202581232709888e-09, "loss": 2.9167, "step": 7300 }, { "epoch": 48.996632996633, "eval_loss": 0.37136363983154297, "eval_mae": 0.42256948351860046, "eval_mse": 0.37136363983154297, "eval_r2": 0.1877063512802124, "eval_rmse": 0.609396127187844, "eval_runtime": 10.7601, "eval_samples_per_second": 441.353, "eval_steps_per_second": 13.847, "step": 7300 }, { "epoch": 49.06060606060606, "grad_norm": 64.9256591796875, "learning_rate": 1.9310208819417474e-09, "loss": 2.5653, "step": 7310 }, { "epoch": 49.127946127946124, "grad_norm": 49.797706604003906, "learning_rate": 1.677304112654765e-09, "loss": 2.4313, "step": 7320 }, { "epoch": 49.195286195286194, "grad_norm": 54.19570541381836, "learning_rate": 1.44143546082387e-09, "loss": 2.9201, "step": 7330 }, { "epoch": 49.26262626262626, "grad_norm": 58.09001922607422, "learning_rate": 1.2234191433335705e-09, "loss": 3.0908, "step": 7340 }, { "epoch": 49.32996632996633, "grad_norm": 32.676883697509766, "learning_rate": 1.0232590579023481e-09, "loss": 2.8714, "step": 7350 }, { "epoch": 49.397306397306394, "grad_norm": 87.04912567138672, "learning_rate": 8.40958783012935e-10, "loss": 3.1343, "step": 7360 }, { "epoch": 49.464646464646464, "grad_norm": 227.12905883789062, "learning_rate": 6.765215778489208e-10, "loss": 2.99, "step": 7370 }, { "epoch": 49.53198653198653, "grad_norm": 73.64918518066406, "learning_rate": 5.299503822356888e-10, "loss": 2.8599, "step": 7380 }, { "epoch": 49.5993265993266, "grad_norm": 111.64922332763672, "learning_rate": 4.01247816588457e-10, "loss": 2.8607, "step": 7390 }, { "epoch": 49.666666666666664, "grad_norm": 33.44980239868164, "learning_rate": 2.904161818652051e-10, "loss": 2.8011, "step": 7400 }, { "epoch": 49.666666666666664, "eval_loss": 0.37127891182899475, "eval_mae": 0.4225114583969116, "eval_mse": 0.37127891182899475, "eval_r2": 0.1878916621208191, "eval_rmse": 0.6093266052200533, "eval_runtime": 10.7384, "eval_samples_per_second": 442.245, "eval_steps_per_second": 13.875, "step": 7400 }, { "epoch": 49.73400673400673, "grad_norm": 98.51158905029297, "learning_rate": 1.9745745952526315e-10, "loss": 2.5714, "step": 7410 }, { "epoch": 49.8013468013468, "grad_norm": 32.80615234375, "learning_rate": 1.2237331149456133e-10, "loss": 2.9021, "step": 7420 }, { "epoch": 49.86868686868687, "grad_norm": 33.67831802368164, "learning_rate": 6.516508013532095e-11, "loss": 2.8229, "step": 7430 }, { "epoch": 49.936026936026934, "grad_norm": 53.02194595336914, "learning_rate": 2.583378822229587e-11, "loss": 2.5242, "step": 7440 }, { "epoch": 50.0, "grad_norm": 68.68186950683594, "learning_rate": 4.3801389243425244e-12, "loss": 2.5313, "step": 7450 }, { "epoch": 50.06734006734007, "grad_norm": 44.678340911865234, "learning_rate": 1.0014778992318323e-06, "loss": 3.0525, "step": 7460 }, { "epoch": 50.13468013468013, "grad_norm": 106.475341796875, "learning_rate": 9.993666144267068e-07, "loss": 2.8388, "step": 7470 }, { "epoch": 50.2020202020202, "grad_norm": 43.26251983642578, "learning_rate": 9.972553324449133e-07, "loss": 3.1859, "step": 7480 }, { "epoch": 50.26936026936027, "grad_norm": 28.51325035095215, "learning_rate": 9.951440626975418e-07, "loss": 2.5268, "step": 7490 }, { "epoch": 50.33670033670034, "grad_norm": 72.5859375, "learning_rate": 9.930328145956292e-07, "loss": 3.1179, "step": 7500 }, { "epoch": 50.33670033670034, "eval_loss": 0.39305955171585083, "eval_mae": 0.44332584738731384, "eval_mse": 0.39305955171585083, "eval_r2": 0.1402503252029419, "eval_rmse": 0.6269446161471129, "eval_runtime": 10.7789, "eval_samples_per_second": 440.581, "eval_steps_per_second": 13.823, "step": 7500 }, { "epoch": 50.4040404040404, "grad_norm": 108.67977142333984, "learning_rate": 9.909215975501147e-07, "loss": 2.8227, "step": 7510 }, { "epoch": 50.47138047138047, "grad_norm": 59.25677490234375, "learning_rate": 9.888104209718003e-07, "loss": 2.6344, "step": 7520 }, { "epoch": 50.53872053872054, "grad_norm": 31.840709686279297, "learning_rate": 9.866992942713063e-07, "loss": 2.6096, "step": 7530 }, { "epoch": 50.60606060606061, "grad_norm": 120.99418640136719, "learning_rate": 9.84588226859032e-07, "loss": 2.7628, "step": 7540 }, { "epoch": 50.67340067340067, "grad_norm": 88.51190185546875, "learning_rate": 9.824772281451108e-07, "loss": 2.8884, "step": 7550 }, { "epoch": 50.74074074074074, "grad_norm": 70.09060668945312, "learning_rate": 9.803663075393718e-07, "loss": 3.3698, "step": 7560 }, { "epoch": 50.80808080808081, "grad_norm": 92.17295837402344, "learning_rate": 9.782554744512941e-07, "loss": 2.629, "step": 7570 }, { "epoch": 50.87542087542087, "grad_norm": 219.4818878173828, "learning_rate": 9.761447382899683e-07, "loss": 2.7838, "step": 7580 }, { "epoch": 50.94276094276094, "grad_norm": 60.17744064331055, "learning_rate": 9.740341084640516e-07, "loss": 2.5101, "step": 7590 }, { "epoch": 51.00673400673401, "grad_norm": 92.5684814453125, "learning_rate": 9.719235943817283e-07, "loss": 2.6195, "step": 7600 }, { "epoch": 51.00673400673401, "eval_loss": 0.37053003907203674, "eval_mae": 0.4222278892993927, "eval_mse": 0.37053006887435913, "eval_r2": 0.18952959775924683, "eval_rmse": 0.6087118110192697, "eval_runtime": 10.7881, "eval_samples_per_second": 440.208, "eval_steps_per_second": 13.812, "step": 7600 }, { "epoch": 51.074074074074076, "grad_norm": 82.76415252685547, "learning_rate": 9.698132054506653e-07, "loss": 2.9018, "step": 7610 }, { "epoch": 51.14141414141414, "grad_norm": 163.4709014892578, "learning_rate": 9.67702951077974e-07, "loss": 3.0084, "step": 7620 }, { "epoch": 51.20875420875421, "grad_norm": 36.260013580322266, "learning_rate": 9.655928406701631e-07, "loss": 2.6691, "step": 7630 }, { "epoch": 51.27609427609428, "grad_norm": 68.32262420654297, "learning_rate": 9.634828836331025e-07, "loss": 2.7757, "step": 7640 }, { "epoch": 51.343434343434346, "grad_norm": 198.88525390625, "learning_rate": 9.613730893719757e-07, "loss": 3.0286, "step": 7650 }, { "epoch": 51.41077441077441, "grad_norm": 67.95198059082031, "learning_rate": 9.59263467291243e-07, "loss": 2.8708, "step": 7660 }, { "epoch": 51.47811447811448, "grad_norm": 101.05775451660156, "learning_rate": 9.571540267945955e-07, "loss": 2.6266, "step": 7670 }, { "epoch": 51.54545454545455, "grad_norm": 56.67593002319336, "learning_rate": 9.550447772849157e-07, "loss": 2.8235, "step": 7680 }, { "epoch": 51.612794612794616, "grad_norm": 111.70256805419922, "learning_rate": 9.529357281642346e-07, "loss": 2.3589, "step": 7690 }, { "epoch": 51.68013468013468, "grad_norm": 103.28323364257812, "learning_rate": 9.508268888336902e-07, "loss": 2.9302, "step": 7700 }, { "epoch": 51.68013468013468, "eval_loss": 0.3712332248687744, "eval_mae": 0.42291060090065, "eval_mse": 0.3712332248687744, "eval_r2": 0.18799161911010742, "eval_rmse": 0.6092891143527631, "eval_runtime": 10.7805, "eval_samples_per_second": 440.517, "eval_steps_per_second": 13.821, "step": 7700 }, { "epoch": 51.74747474747475, "grad_norm": 120.67108917236328, "learning_rate": 9.487182686934845e-07, "loss": 2.7053, "step": 7710 }, { "epoch": 51.81481481481482, "grad_norm": 61.16059112548828, "learning_rate": 9.466098771428436e-07, "loss": 2.6996, "step": 7720 }, { "epoch": 51.88215488215488, "grad_norm": 150.18113708496094, "learning_rate": 9.445017235799739e-07, "loss": 2.7045, "step": 7730 }, { "epoch": 51.94949494949495, "grad_norm": 109.67314147949219, "learning_rate": 9.423938174020214e-07, "loss": 3.0754, "step": 7740 }, { "epoch": 52.013468013468014, "grad_norm": 43.34989929199219, "learning_rate": 9.402861680050287e-07, "loss": 3.0297, "step": 7750 }, { "epoch": 52.08080808080808, "grad_norm": 111.2919692993164, "learning_rate": 9.381787847838949e-07, "loss": 2.8844, "step": 7760 }, { "epoch": 52.148148148148145, "grad_norm": 111.9505386352539, "learning_rate": 9.360716771323313e-07, "loss": 2.8998, "step": 7770 }, { "epoch": 52.215488215488215, "grad_norm": 108.83831024169922, "learning_rate": 9.339648544428221e-07, "loss": 3.0235, "step": 7780 }, { "epoch": 52.282828282828284, "grad_norm": 93.09503173828125, "learning_rate": 9.318583261065801e-07, "loss": 2.8473, "step": 7790 }, { "epoch": 52.35016835016835, "grad_norm": 98.71038818359375, "learning_rate": 9.297521015135072e-07, "loss": 2.7451, "step": 7800 }, { "epoch": 52.35016835016835, "eval_loss": 0.37400078773498535, "eval_mae": 0.4265090525150299, "eval_mse": 0.37400078773498535, "eval_r2": 0.1819380521774292, "eval_rmse": 0.6115560380987055, "eval_runtime": 10.7757, "eval_samples_per_second": 440.713, "eval_steps_per_second": 13.827, "step": 7800 }, { "epoch": 52.417508417508415, "grad_norm": 66.49639129638672, "learning_rate": 9.276461900521502e-07, "loss": 3.1011, "step": 7810 }, { "epoch": 52.484848484848484, "grad_norm": 168.44927978515625, "learning_rate": 9.255406011096613e-07, "loss": 3.0651, "step": 7820 }, { "epoch": 52.552188552188554, "grad_norm": 103.7436752319336, "learning_rate": 9.234353440717534e-07, "loss": 2.6104, "step": 7830 }, { "epoch": 52.61952861952862, "grad_norm": 182.3010711669922, "learning_rate": 9.213304283226623e-07, "loss": 2.9427, "step": 7840 }, { "epoch": 52.686868686868685, "grad_norm": 54.30632781982422, "learning_rate": 9.192258632450999e-07, "loss": 2.5104, "step": 7850 }, { "epoch": 52.754208754208754, "grad_norm": 27.852649688720703, "learning_rate": 9.171216582202171e-07, "loss": 2.7536, "step": 7860 }, { "epoch": 52.821548821548824, "grad_norm": 262.0413513183594, "learning_rate": 9.150178226275583e-07, "loss": 2.5374, "step": 7870 }, { "epoch": 52.888888888888886, "grad_norm": 61.688724517822266, "learning_rate": 9.129143658450227e-07, "loss": 2.8475, "step": 7880 }, { "epoch": 52.956228956228955, "grad_norm": 94.80573272705078, "learning_rate": 9.108112972488189e-07, "loss": 2.6902, "step": 7890 }, { "epoch": 53.02020202020202, "grad_norm": 36.121360778808594, "learning_rate": 9.087086262134276e-07, "loss": 2.5951, "step": 7900 }, { "epoch": 53.02020202020202, "eval_loss": 0.3706035614013672, "eval_mae": 0.4259144365787506, "eval_mse": 0.3706035614013672, "eval_r2": 0.18936890363693237, "eval_rmse": 0.6087721752851121, "eval_runtime": 10.7834, "eval_samples_per_second": 440.399, "eval_steps_per_second": 13.818, "step": 7900 }, { "epoch": 53.08754208754209, "grad_norm": 55.94941329956055, "learning_rate": 9.066063621115548e-07, "loss": 2.5651, "step": 7910 }, { "epoch": 53.15488215488215, "grad_norm": 96.6322021484375, "learning_rate": 9.045045143140951e-07, "loss": 3.4033, "step": 7920 }, { "epoch": 53.22222222222222, "grad_norm": 161.49949645996094, "learning_rate": 9.024030921900851e-07, "loss": 2.5881, "step": 7930 }, { "epoch": 53.28956228956229, "grad_norm": 85.61582946777344, "learning_rate": 9.00302105106666e-07, "loss": 2.7389, "step": 7940 }, { "epoch": 53.35690235690236, "grad_norm": 182.05421447753906, "learning_rate": 8.98201562429038e-07, "loss": 2.7794, "step": 7950 }, { "epoch": 53.42424242424242, "grad_norm": 121.98291778564453, "learning_rate": 8.961014735204216e-07, "loss": 2.8139, "step": 7960 }, { "epoch": 53.49158249158249, "grad_norm": 53.312774658203125, "learning_rate": 8.940018477420142e-07, "loss": 2.5389, "step": 7970 }, { "epoch": 53.55892255892256, "grad_norm": 102.00857543945312, "learning_rate": 8.919026944529487e-07, "loss": 2.6939, "step": 7980 }, { "epoch": 53.62626262626263, "grad_norm": 256.13067626953125, "learning_rate": 8.898040230102519e-07, "loss": 2.8444, "step": 7990 }, { "epoch": 53.69360269360269, "grad_norm": 89.46487426757812, "learning_rate": 8.877058427688028e-07, "loss": 2.5773, "step": 8000 }, { "epoch": 53.69360269360269, "eval_loss": 0.36859261989593506, "eval_mae": 0.4237404763698578, "eval_mse": 0.36859261989593506, "eval_r2": 0.1937674880027771, "eval_rmse": 0.6071182915181645, "eval_runtime": 10.8203, "eval_samples_per_second": 438.896, "eval_steps_per_second": 13.77, "step": 8000 }, { "epoch": 53.76094276094276, "grad_norm": 58.46363067626953, "learning_rate": 8.856081630812907e-07, "loss": 2.9802, "step": 8010 }, { "epoch": 53.82828282828283, "grad_norm": 147.62498474121094, "learning_rate": 8.835109932981746e-07, "loss": 3.3728, "step": 8020 }, { "epoch": 53.89562289562289, "grad_norm": 126.38748168945312, "learning_rate": 8.814143427676387e-07, "loss": 2.8553, "step": 8030 }, { "epoch": 53.96296296296296, "grad_norm": 30.008066177368164, "learning_rate": 8.793182208355547e-07, "loss": 2.6652, "step": 8040 }, { "epoch": 54.02693602693603, "grad_norm": 39.27421188354492, "learning_rate": 8.772226368454363e-07, "loss": 2.5613, "step": 8050 }, { "epoch": 54.0942760942761, "grad_norm": 71.02611541748047, "learning_rate": 8.751276001384009e-07, "loss": 2.8228, "step": 8060 }, { "epoch": 54.16161616161616, "grad_norm": 151.74737548828125, "learning_rate": 8.730331200531248e-07, "loss": 2.9161, "step": 8070 }, { "epoch": 54.22895622895623, "grad_norm": 58.16384506225586, "learning_rate": 8.709392059258047e-07, "loss": 2.7977, "step": 8080 }, { "epoch": 54.2962962962963, "grad_norm": 43.49307632446289, "learning_rate": 8.688458670901129e-07, "loss": 2.6631, "step": 8090 }, { "epoch": 54.36363636363637, "grad_norm": 68.99957275390625, "learning_rate": 8.669623617245481e-07, "loss": 2.6367, "step": 8100 }, { "epoch": 54.36363636363637, "eval_loss": 0.36874720454216003, "eval_mae": 0.423542857170105, "eval_mse": 0.36874720454216003, "eval_r2": 0.1934293508529663, "eval_rmse": 0.6072455883266341, "eval_runtime": 10.7825, "eval_samples_per_second": 440.437, "eval_steps_per_second": 13.819, "step": 8100 }, { "epoch": 54.43097643097643, "grad_norm": 170.68875122070312, "learning_rate": 8.648701416480033e-07, "loss": 2.3985, "step": 8110 }, { "epoch": 54.4983164983165, "grad_norm": 124.38671875, "learning_rate": 8.627785239160881e-07, "loss": 2.5738, "step": 8120 }, { "epoch": 54.56565656565657, "grad_norm": 103.02580261230469, "learning_rate": 8.606875178522406e-07, "loss": 2.8011, "step": 8130 }, { "epoch": 54.63299663299663, "grad_norm": 43.57786560058594, "learning_rate": 8.585971327771706e-07, "loss": 2.8889, "step": 8140 }, { "epoch": 54.7003367003367, "grad_norm": 86.27001953125, "learning_rate": 8.565073780088207e-07, "loss": 2.8696, "step": 8150 }, { "epoch": 54.76767676767677, "grad_norm": 106.44032287597656, "learning_rate": 8.544182628623237e-07, "loss": 3.0447, "step": 8160 }, { "epoch": 54.83501683501684, "grad_norm": 123.906005859375, "learning_rate": 8.523297966499607e-07, "loss": 2.8462, "step": 8170 }, { "epoch": 54.9023569023569, "grad_norm": 174.25001525878906, "learning_rate": 8.502419886811212e-07, "loss": 2.8826, "step": 8180 }, { "epoch": 54.96969696969697, "grad_norm": 115.1952896118164, "learning_rate": 8.481548482622596e-07, "loss": 2.9, "step": 8190 }, { "epoch": 55.033670033670035, "grad_norm": 99.97525787353516, "learning_rate": 8.460683846968551e-07, "loss": 2.8344, "step": 8200 }, { "epoch": 55.033670033670035, "eval_loss": 0.3678399920463562, "eval_mae": 0.422806054353714, "eval_mse": 0.3678399920463562, "eval_r2": 0.1954137086868286, "eval_rmse": 0.6064981385349474, "eval_runtime": 10.8188, "eval_samples_per_second": 438.958, "eval_steps_per_second": 13.772, "step": 8200 }, { "epoch": 55.101010101010104, "grad_norm": 55.611656188964844, "learning_rate": 8.439826072853702e-07, "loss": 2.828, "step": 8210 }, { "epoch": 55.168350168350166, "grad_norm": 59.69890594482422, "learning_rate": 8.418975253252078e-07, "loss": 2.5911, "step": 8220 }, { "epoch": 55.235690235690235, "grad_norm": 123.47525024414062, "learning_rate": 8.39813148110672e-07, "loss": 3.2245, "step": 8230 }, { "epoch": 55.303030303030305, "grad_norm": 26.531234741210938, "learning_rate": 8.377294849329247e-07, "loss": 2.3735, "step": 8240 }, { "epoch": 55.370370370370374, "grad_norm": 60.1549072265625, "learning_rate": 8.356465450799451e-07, "loss": 2.9319, "step": 8250 }, { "epoch": 55.437710437710436, "grad_norm": 127.94482421875, "learning_rate": 8.335643378364888e-07, "loss": 2.7355, "step": 8260 }, { "epoch": 55.505050505050505, "grad_norm": 81.21577453613281, "learning_rate": 8.314828724840445e-07, "loss": 2.677, "step": 8270 }, { "epoch": 55.572390572390574, "grad_norm": 55.42169952392578, "learning_rate": 8.294021583007953e-07, "loss": 2.8676, "step": 8280 }, { "epoch": 55.63973063973064, "grad_norm": 82.51737213134766, "learning_rate": 8.273222045615746e-07, "loss": 2.8963, "step": 8290 }, { "epoch": 55.707070707070706, "grad_norm": 64.16675567626953, "learning_rate": 8.252430205378273e-07, "loss": 2.522, "step": 8300 }, { "epoch": 55.707070707070706, "eval_loss": 0.3666117191314697, "eval_mae": 0.42175766825675964, "eval_mse": 0.3666117191314697, "eval_r2": 0.19810032844543457, "eval_rmse": 0.6054846976856391, "eval_runtime": 10.7855, "eval_samples_per_second": 440.315, "eval_steps_per_second": 13.815, "step": 8300 }, { "epoch": 55.774410774410775, "grad_norm": 172.0429229736328, "learning_rate": 8.231646154975664e-07, "loss": 2.778, "step": 8310 }, { "epoch": 55.841750841750844, "grad_norm": 419.6776123046875, "learning_rate": 8.210869987053335e-07, "loss": 2.7853, "step": 8320 }, { "epoch": 55.90909090909091, "grad_norm": 200.73782348632812, "learning_rate": 8.190101794221552e-07, "loss": 2.765, "step": 8330 }, { "epoch": 55.976430976430976, "grad_norm": 150.01072692871094, "learning_rate": 8.169341669055047e-07, "loss": 2.8393, "step": 8340 }, { "epoch": 56.04040404040404, "grad_norm": 101.5224380493164, "learning_rate": 8.148589704092577e-07, "loss": 2.8416, "step": 8350 }, { "epoch": 56.10774410774411, "grad_norm": 33.08087158203125, "learning_rate": 8.127845991836535e-07, "loss": 2.6881, "step": 8360 }, { "epoch": 56.17508417508417, "grad_norm": 42.674217224121094, "learning_rate": 8.107110624752518e-07, "loss": 2.9535, "step": 8370 }, { "epoch": 56.24242424242424, "grad_norm": 55.9845085144043, "learning_rate": 8.086383695268937e-07, "loss": 2.593, "step": 8380 }, { "epoch": 56.30976430976431, "grad_norm": 79.3974380493164, "learning_rate": 8.065665295776575e-07, "loss": 2.7824, "step": 8390 }, { "epoch": 56.377104377104374, "grad_norm": 103.01301574707031, "learning_rate": 8.044955518628209e-07, "loss": 2.6209, "step": 8400 }, { "epoch": 56.377104377104374, "eval_loss": 0.36607053875923157, "eval_mae": 0.420957088470459, "eval_mse": 0.36607053875923157, "eval_r2": 0.19928407669067383, "eval_rmse": 0.6050376341676867, "eval_runtime": 10.7694, "eval_samples_per_second": 440.974, "eval_steps_per_second": 13.836, "step": 8400 }, { "epoch": 56.44444444444444, "grad_norm": 96.26338195800781, "learning_rate": 8.024254456138168e-07, "loss": 3.022, "step": 8410 }, { "epoch": 56.51178451178451, "grad_norm": 37.744041442871094, "learning_rate": 8.003562200581951e-07, "loss": 2.6019, "step": 8420 }, { "epoch": 56.57912457912458, "grad_norm": 27.26716423034668, "learning_rate": 7.982878844195778e-07, "loss": 3.0187, "step": 8430 }, { "epoch": 56.64646464646464, "grad_norm": 36.43662643432617, "learning_rate": 7.962204479176224e-07, "loss": 2.8714, "step": 8440 }, { "epoch": 56.71380471380471, "grad_norm": 171.5147247314453, "learning_rate": 7.941539197679767e-07, "loss": 2.4568, "step": 8450 }, { "epoch": 56.78114478114478, "grad_norm": 138.23472595214844, "learning_rate": 7.920883091822408e-07, "loss": 2.7649, "step": 8460 }, { "epoch": 56.84848484848485, "grad_norm": 111.0620346069336, "learning_rate": 7.900236253679235e-07, "loss": 2.568, "step": 8470 }, { "epoch": 56.91582491582491, "grad_norm": 54.69694519042969, "learning_rate": 7.879598775284033e-07, "loss": 2.5776, "step": 8480 }, { "epoch": 56.98316498316498, "grad_norm": 95.97238159179688, "learning_rate": 7.858970748628868e-07, "loss": 2.7417, "step": 8490 }, { "epoch": 57.04713804713805, "grad_norm": 183.23463439941406, "learning_rate": 7.838352265663666e-07, "loss": 2.7209, "step": 8500 }, { "epoch": 57.04713804713805, "eval_loss": 0.3642508089542389, "eval_mae": 0.4204018712043762, "eval_mse": 0.3642508089542389, "eval_r2": 0.20326441526412964, "eval_rmse": 0.6035319452640754, "eval_runtime": 10.7857, "eval_samples_per_second": 440.306, "eval_steps_per_second": 13.815, "step": 8500 }, { "epoch": 57.11447811447812, "grad_norm": 93.31880950927734, "learning_rate": 7.817743418295818e-07, "loss": 3.283, "step": 8510 }, { "epoch": 57.18181818181818, "grad_norm": 164.93089294433594, "learning_rate": 7.797144298389762e-07, "loss": 2.5298, "step": 8520 }, { "epoch": 57.24915824915825, "grad_norm": 200.4240264892578, "learning_rate": 7.776554997766578e-07, "loss": 2.5167, "step": 8530 }, { "epoch": 57.31649831649832, "grad_norm": 185.1934814453125, "learning_rate": 7.755975608203571e-07, "loss": 2.5398, "step": 8540 }, { "epoch": 57.38383838383838, "grad_norm": 56.52271270751953, "learning_rate": 7.73540622143387e-07, "loss": 2.5301, "step": 8550 }, { "epoch": 57.45117845117845, "grad_norm": 167.07264709472656, "learning_rate": 7.714846929146024e-07, "loss": 2.6136, "step": 8560 }, { "epoch": 57.51851851851852, "grad_norm": 207.03700256347656, "learning_rate": 7.694297822983569e-07, "loss": 2.599, "step": 8570 }, { "epoch": 57.58585858585859, "grad_norm": 128.9772491455078, "learning_rate": 7.673758994544653e-07, "loss": 2.7752, "step": 8580 }, { "epoch": 57.65319865319865, "grad_norm": 51.315406799316406, "learning_rate": 7.653230535381597e-07, "loss": 3.1829, "step": 8590 }, { "epoch": 57.72053872053872, "grad_norm": 233.10601806640625, "learning_rate": 7.632712537000515e-07, "loss": 3.0232, "step": 8600 }, { "epoch": 57.72053872053872, "eval_loss": 0.37591132521629333, "eval_mae": 0.4335439205169678, "eval_mse": 0.37591132521629333, "eval_r2": 0.177759051322937, "eval_rmse": 0.6131160780931236, "eval_runtime": 10.7713, "eval_samples_per_second": 440.896, "eval_steps_per_second": 13.833, "step": 8600 }, { "epoch": 57.78787878787879, "grad_norm": 160.42025756835938, "learning_rate": 7.612205090860874e-07, "loss": 2.909, "step": 8610 }, { "epoch": 57.85521885521886, "grad_norm": 120.39913177490234, "learning_rate": 7.591708288375125e-07, "loss": 2.7331, "step": 8620 }, { "epoch": 57.92255892255892, "grad_norm": 91.9896469116211, "learning_rate": 7.571222220908252e-07, "loss": 2.8905, "step": 8630 }, { "epoch": 57.98989898989899, "grad_norm": 101.26105499267578, "learning_rate": 7.550746979777409e-07, "loss": 2.5115, "step": 8640 }, { "epoch": 58.053872053872055, "grad_norm": 60.81822967529297, "learning_rate": 7.530282656251474e-07, "loss": 2.9275, "step": 8650 }, { "epoch": 58.121212121212125, "grad_norm": 154.14097595214844, "learning_rate": 7.509829341550673e-07, "loss": 2.8961, "step": 8660 }, { "epoch": 58.18855218855219, "grad_norm": 118.66650390625, "learning_rate": 7.489387126846148e-07, "loss": 2.3494, "step": 8670 }, { "epoch": 58.255892255892256, "grad_norm": 56.92625427246094, "learning_rate": 7.468956103259574e-07, "loss": 2.4819, "step": 8680 }, { "epoch": 58.323232323232325, "grad_norm": 40.0760612487793, "learning_rate": 7.448536361862729e-07, "loss": 2.5992, "step": 8690 }, { "epoch": 58.39057239057239, "grad_norm": 130.31539916992188, "learning_rate": 7.428127993677115e-07, "loss": 2.7318, "step": 8700 }, { "epoch": 58.39057239057239, "eval_loss": 0.3621235489845276, "eval_mae": 0.41809794306755066, "eval_mse": 0.3621235191822052, "eval_r2": 0.20791751146316528, "eval_rmse": 0.6017669974186065, "eval_runtime": 10.7769, "eval_samples_per_second": 440.663, "eval_steps_per_second": 13.826, "step": 8700 }, { "epoch": 58.45791245791246, "grad_norm": 261.0930480957031, "learning_rate": 7.40773108967352e-07, "loss": 3.0869, "step": 8710 }, { "epoch": 58.525252525252526, "grad_norm": 52.540340423583984, "learning_rate": 7.38734574077165e-07, "loss": 2.8372, "step": 8720 }, { "epoch": 58.592592592592595, "grad_norm": 140.21766662597656, "learning_rate": 7.366972037839684e-07, "loss": 2.9269, "step": 8730 }, { "epoch": 58.65993265993266, "grad_norm": 140.7172393798828, "learning_rate": 7.346610071693907e-07, "loss": 2.8555, "step": 8740 }, { "epoch": 58.72727272727273, "grad_norm": 283.7715148925781, "learning_rate": 7.32625993309827e-07, "loss": 2.8479, "step": 8750 }, { "epoch": 58.794612794612796, "grad_norm": 94.37970733642578, "learning_rate": 7.305921712764019e-07, "loss": 2.4671, "step": 8760 }, { "epoch": 58.861952861952865, "grad_norm": 194.48150634765625, "learning_rate": 7.285595501349258e-07, "loss": 2.7311, "step": 8770 }, { "epoch": 58.92929292929293, "grad_norm": 202.75613403320312, "learning_rate": 7.265281389458574e-07, "loss": 2.8888, "step": 8780 }, { "epoch": 58.996632996633, "grad_norm": 100.53678894042969, "learning_rate": 7.244979467642614e-07, "loss": 2.4763, "step": 8790 }, { "epoch": 59.06060606060606, "grad_norm": 39.66692352294922, "learning_rate": 7.224689826397687e-07, "loss": 2.5013, "step": 8800 }, { "epoch": 59.06060606060606, "eval_loss": 0.3652089536190033, "eval_mae": 0.42015716433525085, "eval_mse": 0.3652089536190033, "eval_r2": 0.20116865634918213, "eval_rmse": 0.6043252051826097, "eval_runtime": 10.8068, "eval_samples_per_second": 439.447, "eval_steps_per_second": 13.788, "step": 8800 }, { "epoch": 59.127946127946124, "grad_norm": 49.749176025390625, "learning_rate": 7.204412556165365e-07, "loss": 2.5992, "step": 8810 }, { "epoch": 59.195286195286194, "grad_norm": 85.62872314453125, "learning_rate": 7.184147747332069e-07, "loss": 2.7343, "step": 8820 }, { "epoch": 59.26262626262626, "grad_norm": 151.4752197265625, "learning_rate": 7.163895490228679e-07, "loss": 3.1346, "step": 8830 }, { "epoch": 59.32996632996633, "grad_norm": 147.09803771972656, "learning_rate": 7.143655875130128e-07, "loss": 2.7622, "step": 8840 }, { "epoch": 59.397306397306394, "grad_norm": 82.75611114501953, "learning_rate": 7.123428992254984e-07, "loss": 2.6811, "step": 8850 }, { "epoch": 59.464646464646464, "grad_norm": 27.548011779785156, "learning_rate": 7.103214931765079e-07, "loss": 2.7974, "step": 8860 }, { "epoch": 59.53198653198653, "grad_norm": 65.380126953125, "learning_rate": 7.083013783765071e-07, "loss": 2.9787, "step": 8870 }, { "epoch": 59.5993265993266, "grad_norm": 125.52188110351562, "learning_rate": 7.062825638302075e-07, "loss": 2.8236, "step": 8880 }, { "epoch": 59.666666666666664, "grad_norm": 20.583967208862305, "learning_rate": 7.042650585365234e-07, "loss": 2.8012, "step": 8890 }, { "epoch": 59.73400673400673, "grad_norm": 81.24407196044922, "learning_rate": 7.022488714885345e-07, "loss": 2.792, "step": 8900 }, { "epoch": 59.73400673400673, "eval_loss": 0.3605329990386963, "eval_mae": 0.4183028042316437, "eval_mse": 0.3605330288410187, "eval_r2": 0.21139639616012573, "eval_rmse": 0.6004440264013113, "eval_runtime": 10.779, "eval_samples_per_second": 440.58, "eval_steps_per_second": 13.823, "step": 8900 }, { "epoch": 59.8013468013468, "grad_norm": 106.37210083007812, "learning_rate": 7.002340116734428e-07, "loss": 2.6507, "step": 8910 }, { "epoch": 59.86868686868687, "grad_norm": 111.1795654296875, "learning_rate": 6.982204880725356e-07, "loss": 2.3886, "step": 8920 }, { "epoch": 59.936026936026934, "grad_norm": 129.83425903320312, "learning_rate": 6.962083096611429e-07, "loss": 2.5467, "step": 8930 }, { "epoch": 60.0, "grad_norm": 114.98896026611328, "learning_rate": 6.941974854085992e-07, "loss": 2.5269, "step": 8940 }, { "epoch": 60.06734006734007, "grad_norm": 110.27008819580078, "learning_rate": 6.92188024278202e-07, "loss": 2.9102, "step": 8950 }, { "epoch": 60.13468013468013, "grad_norm": 72.77752685546875, "learning_rate": 6.90179935227174e-07, "loss": 2.6478, "step": 8960 }, { "epoch": 60.2020202020202, "grad_norm": 217.52505493164062, "learning_rate": 6.8817322720662e-07, "loss": 2.7255, "step": 8970 }, { "epoch": 60.26936026936027, "grad_norm": 112.5431900024414, "learning_rate": 6.861679091614909e-07, "loss": 2.6417, "step": 8980 }, { "epoch": 60.33670033670034, "grad_norm": 252.95152282714844, "learning_rate": 6.841639900305396e-07, "loss": 2.4313, "step": 8990 }, { "epoch": 60.4040404040404, "grad_norm": 66.94791412353516, "learning_rate": 6.821614787462853e-07, "loss": 2.5342, "step": 9000 }, { "epoch": 60.4040404040404, "eval_loss": 0.36002445220947266, "eval_mae": 0.4189915060997009, "eval_mse": 0.36002445220947266, "eval_r2": 0.21250885725021362, "eval_rmse": 0.6000203764952259, "eval_runtime": 10.782, "eval_samples_per_second": 440.456, "eval_steps_per_second": 13.819, "step": 9000 }, { "epoch": 60.47138047138047, "grad_norm": 89.68761444091797, "learning_rate": 6.801603842349699e-07, "loss": 2.9907, "step": 9010 }, { "epoch": 60.53872053872054, "grad_norm": 235.25949096679688, "learning_rate": 6.781607154165216e-07, "loss": 2.8029, "step": 9020 }, { "epoch": 60.60606060606061, "grad_norm": 106.80323791503906, "learning_rate": 6.761624812045121e-07, "loss": 2.4502, "step": 9030 }, { "epoch": 60.67340067340067, "grad_norm": 74.47793579101562, "learning_rate": 6.7416569050612e-07, "loss": 3.1216, "step": 9040 }, { "epoch": 60.74074074074074, "grad_norm": 86.51136779785156, "learning_rate": 6.721703522220871e-07, "loss": 2.7593, "step": 9050 }, { "epoch": 60.80808080808081, "grad_norm": 79.02229309082031, "learning_rate": 6.701764752466833e-07, "loss": 2.7789, "step": 9060 }, { "epoch": 60.87542087542087, "grad_norm": 67.96233367919922, "learning_rate": 6.681840684676634e-07, "loss": 2.7477, "step": 9070 }, { "epoch": 60.94276094276094, "grad_norm": 74.25907135009766, "learning_rate": 6.661931407662291e-07, "loss": 2.5145, "step": 9080 }, { "epoch": 61.00673400673401, "grad_norm": 91.72937774658203, "learning_rate": 6.642037010169892e-07, "loss": 2.8524, "step": 9090 }, { "epoch": 61.074074074074076, "grad_norm": 84.8498306274414, "learning_rate": 6.622157580879196e-07, "loss": 2.7614, "step": 9100 }, { "epoch": 61.074074074074076, "eval_loss": 0.3594312369823456, "eval_mae": 0.4179425537586212, "eval_mse": 0.3594312369823456, "eval_r2": 0.2138063907623291, "eval_rmse": 0.5995258434649382, "eval_runtime": 10.8176, "eval_samples_per_second": 439.008, "eval_steps_per_second": 13.774, "step": 9100 }, { "epoch": 61.14141414141414, "grad_norm": 37.27864074707031, "learning_rate": 6.602293208403242e-07, "loss": 2.7735, "step": 9110 }, { "epoch": 61.20875420875421, "grad_norm": 146.24974060058594, "learning_rate": 6.582443981287955e-07, "loss": 2.5714, "step": 9120 }, { "epoch": 61.27609427609428, "grad_norm": 112.12639617919922, "learning_rate": 6.562609988011744e-07, "loss": 2.9706, "step": 9130 }, { "epoch": 61.343434343434346, "grad_norm": 110.9952392578125, "learning_rate": 6.542791316985125e-07, "loss": 2.726, "step": 9140 }, { "epoch": 61.41077441077441, "grad_norm": 138.796630859375, "learning_rate": 6.522988056550296e-07, "loss": 2.6787, "step": 9150 }, { "epoch": 61.47811447811448, "grad_norm": 91.18521881103516, "learning_rate": 6.50320029498078e-07, "loss": 2.9779, "step": 9160 }, { "epoch": 61.54545454545455, "grad_norm": 87.296142578125, "learning_rate": 6.483428120481e-07, "loss": 2.8429, "step": 9170 }, { "epoch": 61.612794612794616, "grad_norm": 122.42247772216797, "learning_rate": 6.463671621185906e-07, "loss": 2.5944, "step": 9180 }, { "epoch": 61.68013468013468, "grad_norm": 222.47659301757812, "learning_rate": 6.44393088516058e-07, "loss": 2.6286, "step": 9190 }, { "epoch": 61.74747474747475, "grad_norm": 111.3460693359375, "learning_rate": 6.424206000399823e-07, "loss": 2.8569, "step": 9200 }, { "epoch": 61.74747474747475, "eval_loss": 0.3590884506702423, "eval_mae": 0.41769465804100037, "eval_mse": 0.3590884506702423, "eval_r2": 0.21455621719360352, "eval_rmse": 0.5992398940910413, "eval_runtime": 10.7885, "eval_samples_per_second": 440.191, "eval_steps_per_second": 13.811, "step": 9200 }, { "epoch": 61.81481481481482, "grad_norm": 39.40879821777344, "learning_rate": 6.404497054827798e-07, "loss": 2.4, "step": 9210 }, { "epoch": 61.88215488215488, "grad_norm": 54.394832611083984, "learning_rate": 6.384804136297601e-07, "loss": 2.5261, "step": 9220 }, { "epoch": 61.94949494949495, "grad_norm": 110.10257720947266, "learning_rate": 6.365127332590904e-07, "loss": 2.8149, "step": 9230 }, { "epoch": 62.013468013468014, "grad_norm": 129.25424194335938, "learning_rate": 6.345466731417528e-07, "loss": 2.488, "step": 9240 }, { "epoch": 62.08080808080808, "grad_norm": 101.78056335449219, "learning_rate": 6.325822420415092e-07, "loss": 2.7991, "step": 9250 }, { "epoch": 62.148148148148145, "grad_norm": 86.51380157470703, "learning_rate": 6.30619448714858e-07, "loss": 2.2727, "step": 9260 }, { "epoch": 62.215488215488215, "grad_norm": 141.17086791992188, "learning_rate": 6.28658301910999e-07, "loss": 2.9317, "step": 9270 }, { "epoch": 62.282828282828284, "grad_norm": 51.423309326171875, "learning_rate": 6.266988103717911e-07, "loss": 2.4028, "step": 9280 }, { "epoch": 62.35016835016835, "grad_norm": 84.16255950927734, "learning_rate": 6.247409828317163e-07, "loss": 2.7833, "step": 9290 }, { "epoch": 62.417508417508415, "grad_norm": 159.36712646484375, "learning_rate": 6.227848280178374e-07, "loss": 2.8873, "step": 9300 }, { "epoch": 62.417508417508415, "eval_loss": 0.3584060072898865, "eval_mae": 0.41768893599510193, "eval_mse": 0.3584060072898865, "eval_r2": 0.2160489559173584, "eval_rmse": 0.598670199099543, "eval_runtime": 10.7931, "eval_samples_per_second": 440.004, "eval_steps_per_second": 13.805, "step": 9300 }, { "epoch": 62.484848484848484, "grad_norm": 152.32571411132812, "learning_rate": 6.208303546497632e-07, "loss": 2.7216, "step": 9310 }, { "epoch": 62.552188552188554, "grad_norm": 208.91018676757812, "learning_rate": 6.188775714396053e-07, "loss": 2.7474, "step": 9320 }, { "epoch": 62.61952861952862, "grad_norm": 31.61521339416504, "learning_rate": 6.169264870919431e-07, "loss": 2.8114, "step": 9330 }, { "epoch": 62.686868686868685, "grad_norm": 163.75547790527344, "learning_rate": 6.149771103037819e-07, "loss": 2.5553, "step": 9340 }, { "epoch": 62.754208754208754, "grad_norm": 158.57891845703125, "learning_rate": 6.13029449764517e-07, "loss": 3.0937, "step": 9350 }, { "epoch": 62.821548821548824, "grad_norm": 60.71897888183594, "learning_rate": 6.110835141558916e-07, "loss": 2.4506, "step": 9360 }, { "epoch": 62.888888888888886, "grad_norm": 79.51283264160156, "learning_rate": 6.091393121519621e-07, "loss": 2.625, "step": 9370 }, { "epoch": 62.956228956228955, "grad_norm": 81.74191284179688, "learning_rate": 6.071968524190549e-07, "loss": 2.4764, "step": 9380 }, { "epoch": 63.02020202020202, "grad_norm": 234.54502868652344, "learning_rate": 6.052561436157328e-07, "loss": 2.9215, "step": 9390 }, { "epoch": 63.08754208754209, "grad_norm": 165.62193298339844, "learning_rate": 6.033171943927517e-07, "loss": 2.9103, "step": 9400 }, { "epoch": 63.08754208754209, "eval_loss": 0.36053094267845154, "eval_mae": 0.4209555685520172, "eval_mse": 0.36053094267845154, "eval_r2": 0.21140098571777344, "eval_rmse": 0.6004422892155844, "eval_runtime": 10.7862, "eval_samples_per_second": 440.284, "eval_steps_per_second": 13.814, "step": 9400 }, { "epoch": 63.15488215488215, "grad_norm": 107.34781646728516, "learning_rate": 6.013800133930252e-07, "loss": 2.883, "step": 9410 }, { "epoch": 63.22222222222222, "grad_norm": 103.58859252929688, "learning_rate": 5.994446092515848e-07, "loss": 2.8006, "step": 9420 }, { "epoch": 63.28956228956229, "grad_norm": 89.88680267333984, "learning_rate": 5.975109905955413e-07, "loss": 2.7142, "step": 9430 }, { "epoch": 63.35690235690236, "grad_norm": 34.57746505737305, "learning_rate": 5.95579166044047e-07, "loss": 2.9622, "step": 9440 }, { "epoch": 63.42424242424242, "grad_norm": 33.601219177246094, "learning_rate": 5.936491442082569e-07, "loss": 2.6159, "step": 9450 }, { "epoch": 63.49158249158249, "grad_norm": 234.8035430908203, "learning_rate": 5.917209336912903e-07, "loss": 2.6208, "step": 9460 }, { "epoch": 63.55892255892256, "grad_norm": 89.23230743408203, "learning_rate": 5.897945430881923e-07, "loss": 2.4838, "step": 9470 }, { "epoch": 63.62626262626263, "grad_norm": 99.4452896118164, "learning_rate": 5.878699809858958e-07, "loss": 2.7956, "step": 9480 }, { "epoch": 63.69360269360269, "grad_norm": 113.62226867675781, "learning_rate": 5.859472559631837e-07, "loss": 2.4525, "step": 9490 }, { "epoch": 63.76094276094276, "grad_norm": 42.291378021240234, "learning_rate": 5.840263765906489e-07, "loss": 2.5417, "step": 9500 }, { "epoch": 63.76094276094276, "eval_loss": 0.35789254307746887, "eval_mae": 0.4184859097003937, "eval_mse": 0.3578925132751465, "eval_r2": 0.21717208623886108, "eval_rmse": 0.5982411831988387, "eval_runtime": 10.7875, "eval_samples_per_second": 440.234, "eval_steps_per_second": 13.812, "step": 9500 }, { "epoch": 63.82828282828283, "grad_norm": 72.59378051757812, "learning_rate": 5.821073514306585e-07, "loss": 2.193, "step": 9510 }, { "epoch": 63.89562289562289, "grad_norm": 157.62109375, "learning_rate": 5.801901890373133e-07, "loss": 2.761, "step": 9520 }, { "epoch": 63.96296296296296, "grad_norm": 79.53624725341797, "learning_rate": 5.782748979564118e-07, "loss": 2.8779, "step": 9530 }, { "epoch": 64.02693602693603, "grad_norm": 188.7542724609375, "learning_rate": 5.763614867254101e-07, "loss": 2.4144, "step": 9540 }, { "epoch": 64.0942760942761, "grad_norm": 181.61984252929688, "learning_rate": 5.744499638733859e-07, "loss": 2.5463, "step": 9550 }, { "epoch": 64.16161616161617, "grad_norm": 102.54400634765625, "learning_rate": 5.72540337920998e-07, "loss": 2.6315, "step": 9560 }, { "epoch": 64.22895622895624, "grad_norm": 190.02183532714844, "learning_rate": 5.706326173804515e-07, "loss": 2.7659, "step": 9570 }, { "epoch": 64.29629629629629, "grad_norm": 217.90736389160156, "learning_rate": 5.687268107554559e-07, "loss": 2.8759, "step": 9580 }, { "epoch": 64.36363636363636, "grad_norm": 219.75086975097656, "learning_rate": 5.668229265411919e-07, "loss": 3.1842, "step": 9590 }, { "epoch": 64.43097643097643, "grad_norm": 118.2819595336914, "learning_rate": 5.649209732242685e-07, "loss": 2.7001, "step": 9600 }, { "epoch": 64.43097643097643, "eval_loss": 0.3591337502002716, "eval_mae": 0.4198189079761505, "eval_mse": 0.3591337502002716, "eval_r2": 0.21445715427398682, "eval_rmse": 0.5992776903909168, "eval_runtime": 10.7849, "eval_samples_per_second": 440.337, "eval_steps_per_second": 13.816, "step": 9600 }, { "epoch": 64.4983164983165, "grad_norm": 110.0768814086914, "learning_rate": 5.6302095928269e-07, "loss": 2.4752, "step": 9610 }, { "epoch": 64.56565656565657, "grad_norm": 194.79295349121094, "learning_rate": 5.611228931858138e-07, "loss": 2.9206, "step": 9620 }, { "epoch": 64.63299663299664, "grad_norm": 144.76451110839844, "learning_rate": 5.592267833943164e-07, "loss": 2.6084, "step": 9630 }, { "epoch": 64.7003367003367, "grad_norm": 169.98895263671875, "learning_rate": 5.573326383601527e-07, "loss": 2.7085, "step": 9640 }, { "epoch": 64.76767676767676, "grad_norm": 49.85799789428711, "learning_rate": 5.554404665265209e-07, "loss": 2.7707, "step": 9650 }, { "epoch": 64.83501683501683, "grad_norm": 173.36639404296875, "learning_rate": 5.535502763278221e-07, "loss": 2.4835, "step": 9660 }, { "epoch": 64.9023569023569, "grad_norm": 146.8667755126953, "learning_rate": 5.516620761896262e-07, "loss": 2.5801, "step": 9670 }, { "epoch": 64.96969696969697, "grad_norm": 147.1758270263672, "learning_rate": 5.497758745286303e-07, "loss": 2.6099, "step": 9680 }, { "epoch": 65.03367003367003, "grad_norm": 90.58416748046875, "learning_rate": 5.478916797526248e-07, "loss": 2.3514, "step": 9690 }, { "epoch": 65.1010101010101, "grad_norm": 26.09779167175293, "learning_rate": 5.460095002604532e-07, "loss": 2.9007, "step": 9700 }, { "epoch": 65.1010101010101, "eval_loss": 0.35612890124320984, "eval_mae": 0.4147729277610779, "eval_mse": 0.35612890124320984, "eval_r2": 0.22102969884872437, "eval_rmse": 0.5967653653180702, "eval_runtime": 10.8164, "eval_samples_per_second": 439.054, "eval_steps_per_second": 13.775, "step": 9700 }, { "epoch": 65.16835016835017, "grad_norm": 34.89274215698242, "learning_rate": 5.441293444419771e-07, "loss": 2.8684, "step": 9710 }, { "epoch": 65.23569023569024, "grad_norm": 70.63241577148438, "learning_rate": 5.422512206780359e-07, "loss": 2.2999, "step": 9720 }, { "epoch": 65.3030303030303, "grad_norm": 86.14585876464844, "learning_rate": 5.403751373404129e-07, "loss": 3.1167, "step": 9730 }, { "epoch": 65.37037037037037, "grad_norm": 91.7327880859375, "learning_rate": 5.385011027917941e-07, "loss": 2.5835, "step": 9740 }, { "epoch": 65.43771043771044, "grad_norm": 63.34545135498047, "learning_rate": 5.366291253857354e-07, "loss": 2.8478, "step": 9750 }, { "epoch": 65.5050505050505, "grad_norm": 69.14451599121094, "learning_rate": 5.347592134666202e-07, "loss": 2.8347, "step": 9760 }, { "epoch": 65.57239057239057, "grad_norm": 83.33345031738281, "learning_rate": 5.328913753696277e-07, "loss": 2.3727, "step": 9770 }, { "epoch": 65.63973063973064, "grad_norm": 105.01849365234375, "learning_rate": 5.310256194206906e-07, "loss": 2.7064, "step": 9780 }, { "epoch": 65.70707070707071, "grad_norm": 74.62297821044922, "learning_rate": 5.291619539364623e-07, "loss": 2.3814, "step": 9790 }, { "epoch": 65.77441077441077, "grad_norm": 50.12975311279297, "learning_rate": 5.27300387224276e-07, "loss": 2.7361, "step": 9800 }, { "epoch": 65.77441077441077, "eval_loss": 0.35513028502464294, "eval_mae": 0.41587984561920166, "eval_mse": 0.35513028502464294, "eval_r2": 0.2232140302658081, "eval_rmse": 0.5959280871251521, "eval_runtime": 10.7833, "eval_samples_per_second": 440.403, "eval_steps_per_second": 13.818, "step": 9800 }, { "epoch": 65.84175084175084, "grad_norm": 87.69711303710938, "learning_rate": 5.254409275821117e-07, "loss": 2.7721, "step": 9810 }, { "epoch": 65.9090909090909, "grad_norm": 208.7084503173828, "learning_rate": 5.235835832985551e-07, "loss": 2.5904, "step": 9820 }, { "epoch": 65.97643097643098, "grad_norm": 142.06077575683594, "learning_rate": 5.217283626527643e-07, "loss": 2.7033, "step": 9830 }, { "epoch": 66.04040404040404, "grad_norm": 140.3471221923828, "learning_rate": 5.198752739144296e-07, "loss": 2.2841, "step": 9840 }, { "epoch": 66.10774410774411, "grad_norm": 80.3781509399414, "learning_rate": 5.180243253437395e-07, "loss": 2.2907, "step": 9850 }, { "epoch": 66.17508417508418, "grad_norm": 103.9603042602539, "learning_rate": 5.161755251913429e-07, "loss": 2.5403, "step": 9860 }, { "epoch": 66.24242424242425, "grad_norm": 109.1873779296875, "learning_rate": 5.143288816983104e-07, "loss": 2.8878, "step": 9870 }, { "epoch": 66.3097643097643, "grad_norm": 138.83016967773438, "learning_rate": 5.124844030961012e-07, "loss": 2.4509, "step": 9880 }, { "epoch": 66.37710437710437, "grad_norm": 39.8387451171875, "learning_rate": 5.106420976065227e-07, "loss": 2.7381, "step": 9890 }, { "epoch": 66.44444444444444, "grad_norm": 106.18115997314453, "learning_rate": 5.088019734416968e-07, "loss": 2.6459, "step": 9900 }, { "epoch": 66.44444444444444, "eval_loss": 0.357402503490448, "eval_mae": 0.41856104135513306, "eval_mse": 0.357402503490448, "eval_r2": 0.21824395656585693, "eval_rmse": 0.5978315009184846, "eval_runtime": 10.7924, "eval_samples_per_second": 440.033, "eval_steps_per_second": 13.806, "step": 9900 }, { "epoch": 66.51178451178451, "grad_norm": 80.8787612915039, "learning_rate": 5.069640388040222e-07, "loss": 2.891, "step": 9910 }, { "epoch": 66.57912457912458, "grad_norm": 67.0461196899414, "learning_rate": 5.051283018861363e-07, "loss": 2.6135, "step": 9920 }, { "epoch": 66.64646464646465, "grad_norm": 85.41495513916016, "learning_rate": 5.032947708708821e-07, "loss": 2.3705, "step": 9930 }, { "epoch": 66.71380471380472, "grad_norm": 82.44886016845703, "learning_rate": 5.014634539312677e-07, "loss": 2.8711, "step": 9940 }, { "epoch": 66.78114478114477, "grad_norm": 148.12811279296875, "learning_rate": 4.996343592304338e-07, "loss": 2.4381, "step": 9950 }, { "epoch": 66.84848484848484, "grad_norm": 113.22037506103516, "learning_rate": 4.978074949216137e-07, "loss": 2.8456, "step": 9960 }, { "epoch": 66.91582491582491, "grad_norm": 206.17892456054688, "learning_rate": 4.959828691481003e-07, "loss": 3.0394, "step": 9970 }, { "epoch": 66.98316498316498, "grad_norm": 333.60150146484375, "learning_rate": 4.941604900432065e-07, "loss": 3.065, "step": 9980 }, { "epoch": 67.04713804713805, "grad_norm": 60.60258102416992, "learning_rate": 4.923403657302324e-07, "loss": 2.3082, "step": 9990 }, { "epoch": 67.11447811447812, "grad_norm": 65.21240997314453, "learning_rate": 4.905225043224255e-07, "loss": 2.6698, "step": 10000 }, { "epoch": 67.11447811447812, "eval_loss": 0.3550506830215454, "eval_mae": 0.41680973768234253, "eval_mse": 0.3550507128238678, "eval_r2": 0.22338801622390747, "eval_rmse": 0.5958613201273161, "eval_runtime": 10.7883, "eval_samples_per_second": 440.2, "eval_steps_per_second": 13.811, "step": 10000 }, { "epoch": 67.18181818181819, "grad_norm": 80.27069854736328, "learning_rate": 4.88706913922948e-07, "loss": 2.5349, "step": 10010 }, { "epoch": 67.24915824915826, "grad_norm": 127.45413208007812, "learning_rate": 4.868936026248376e-07, "loss": 2.5257, "step": 10020 }, { "epoch": 67.31649831649831, "grad_norm": 45.97599792480469, "learning_rate": 4.850825785109744e-07, "loss": 2.4893, "step": 10030 }, { "epoch": 67.38383838383838, "grad_norm": 49.66387939453125, "learning_rate": 4.832738496540413e-07, "loss": 2.7238, "step": 10040 }, { "epoch": 67.45117845117845, "grad_norm": 86.36298370361328, "learning_rate": 4.814674241164921e-07, "loss": 2.9011, "step": 10050 }, { "epoch": 67.51851851851852, "grad_norm": 57.730045318603516, "learning_rate": 4.796633099505119e-07, "loss": 2.8162, "step": 10060 }, { "epoch": 67.58585858585859, "grad_norm": 93.33308410644531, "learning_rate": 4.778615151979844e-07, "loss": 2.4525, "step": 10070 }, { "epoch": 67.65319865319866, "grad_norm": 206.02574157714844, "learning_rate": 4.7606204789045244e-07, "loss": 2.4867, "step": 10080 }, { "epoch": 67.72053872053873, "grad_norm": 121.43006134033203, "learning_rate": 4.7426491604908583e-07, "loss": 2.8843, "step": 10090 }, { "epoch": 67.78787878787878, "grad_norm": 49.21678161621094, "learning_rate": 4.724701276846438e-07, "loss": 2.4502, "step": 10100 }, { "epoch": 67.78787878787878, "eval_loss": 0.3534775972366333, "eval_mae": 0.415083110332489, "eval_mse": 0.3534775972366333, "eval_r2": 0.22682899236679077, "eval_rmse": 0.5945398197233162, "eval_runtime": 10.7883, "eval_samples_per_second": 440.199, "eval_steps_per_second": 13.811, "step": 10100 }, { "epoch": 67.85521885521885, "grad_norm": 313.5792236328125, "learning_rate": 4.70856828441891e-07, "loss": 3.1242, "step": 10110 }, { "epoch": 67.92255892255892, "grad_norm": 45.07607650756836, "learning_rate": 4.6906651471580426e-07, "loss": 2.6736, "step": 10120 }, { "epoch": 67.98989898989899, "grad_norm": 76.0740737915039, "learning_rate": 4.672785676386419e-07, "loss": 2.8239, "step": 10130 }, { "epoch": 68.05387205387206, "grad_norm": 80.1639175415039, "learning_rate": 4.65492995180222e-07, "loss": 2.1045, "step": 10140 }, { "epoch": 68.12121212121212, "grad_norm": 82.4394302368164, "learning_rate": 4.6370980529977665e-07, "loss": 2.8312, "step": 10150 }, { "epoch": 68.1885521885522, "grad_norm": 148.61019897460938, "learning_rate": 4.619290059459189e-07, "loss": 2.8794, "step": 10160 }, { "epoch": 68.25589225589225, "grad_norm": 195.94436645507812, "learning_rate": 4.6015060505660417e-07, "loss": 2.5582, "step": 10170 }, { "epoch": 68.32323232323232, "grad_norm": 106.88961791992188, "learning_rate": 4.583746105590983e-07, "loss": 2.3885, "step": 10180 }, { "epoch": 68.39057239057239, "grad_norm": 131.76197814941406, "learning_rate": 4.5660103036994024e-07, "loss": 2.9696, "step": 10190 }, { "epoch": 68.45791245791246, "grad_norm": 121.2803726196289, "learning_rate": 4.548298723949062e-07, "loss": 2.8471, "step": 10200 }, { "epoch": 68.45791245791246, "eval_loss": 0.35346508026123047, "eval_mae": 0.41677623987197876, "eval_mse": 0.3534650504589081, "eval_r2": 0.22685641050338745, "eval_rmse": 0.5945292679581957, "eval_runtime": 10.7723, "eval_samples_per_second": 440.852, "eval_steps_per_second": 13.832, "step": 10200 }, { "epoch": 68.52525252525253, "grad_norm": 65.51113891601562, "learning_rate": 4.530611445289765e-07, "loss": 2.8707, "step": 10210 }, { "epoch": 68.5925925925926, "grad_norm": 145.30075073242188, "learning_rate": 4.512948546562979e-07, "loss": 2.3885, "step": 10220 }, { "epoch": 68.65993265993266, "grad_norm": 140.14028930664062, "learning_rate": 4.495310106501514e-07, "loss": 2.3858, "step": 10230 }, { "epoch": 68.72727272727273, "grad_norm": 107.551025390625, "learning_rate": 4.4776962037291366e-07, "loss": 2.4776, "step": 10240 }, { "epoch": 68.79461279461279, "grad_norm": 59.93567657470703, "learning_rate": 4.4601069167602533e-07, "loss": 2.8453, "step": 10250 }, { "epoch": 68.86195286195286, "grad_norm": 234.77780151367188, "learning_rate": 4.442542323999534e-07, "loss": 2.5745, "step": 10260 }, { "epoch": 68.92929292929293, "grad_norm": 246.06549072265625, "learning_rate": 4.425002503741583e-07, "loss": 2.609, "step": 10270 }, { "epoch": 68.996632996633, "grad_norm": 111.05248260498047, "learning_rate": 4.4074875341705695e-07, "loss": 2.9867, "step": 10280 }, { "epoch": 69.06060606060606, "grad_norm": 44.566287994384766, "learning_rate": 4.389997493359904e-07, "loss": 2.241, "step": 10290 }, { "epoch": 69.12794612794613, "grad_norm": 293.6179504394531, "learning_rate": 4.3725324592718603e-07, "loss": 3.0898, "step": 10300 }, { "epoch": 69.12794612794613, "eval_loss": 0.35457152128219604, "eval_mae": 0.41568779945373535, "eval_mse": 0.35457152128219604, "eval_r2": 0.2244362235069275, "eval_rmse": 0.5954590844736488, "eval_runtime": 10.7934, "eval_samples_per_second": 439.993, "eval_steps_per_second": 13.805, "step": 10300 }, { "epoch": 69.1952861952862, "grad_norm": 82.47624206542969, "learning_rate": 4.355092509757261e-07, "loss": 2.4705, "step": 10310 }, { "epoch": 69.26262626262626, "grad_norm": 198.7459259033203, "learning_rate": 4.337677722555098e-07, "loss": 2.6832, "step": 10320 }, { "epoch": 69.32996632996633, "grad_norm": 117.6089859008789, "learning_rate": 4.320288175292215e-07, "loss": 2.5735, "step": 10330 }, { "epoch": 69.3973063973064, "grad_norm": 84.23894500732422, "learning_rate": 4.3029239454829356e-07, "loss": 2.6552, "step": 10340 }, { "epoch": 69.46464646464646, "grad_norm": 42.08414077758789, "learning_rate": 4.285585110528743e-07, "loss": 2.7547, "step": 10350 }, { "epoch": 69.53198653198653, "grad_norm": 36.73964309692383, "learning_rate": 4.268271747717911e-07, "loss": 2.6484, "step": 10360 }, { "epoch": 69.5993265993266, "grad_norm": 70.13983917236328, "learning_rate": 4.250983934225175e-07, "loss": 2.59, "step": 10370 }, { "epoch": 69.66666666666667, "grad_norm": 86.77037811279297, "learning_rate": 4.2337217471113917e-07, "loss": 2.2797, "step": 10380 }, { "epoch": 69.73400673400674, "grad_norm": 135.01524353027344, "learning_rate": 4.2164852633231693e-07, "loss": 2.7962, "step": 10390 }, { "epoch": 69.8013468013468, "grad_norm": 160.73431396484375, "learning_rate": 4.1992745596925637e-07, "loss": 2.6164, "step": 10400 }, { "epoch": 69.8013468013468, "eval_loss": 0.3562518060207367, "eval_mae": 0.4195065498352051, "eval_mse": 0.3562518060207367, "eval_r2": 0.22076088190078735, "eval_rmse": 0.5968683322314366, "eval_runtime": 10.7894, "eval_samples_per_second": 440.155, "eval_steps_per_second": 13.81, "step": 10400 }, { "epoch": 69.86868686868686, "grad_norm": 103.37757873535156, "learning_rate": 4.182089712936696e-07, "loss": 2.8944, "step": 10410 }, { "epoch": 69.93602693602693, "grad_norm": 79.22645568847656, "learning_rate": 4.1649307996574466e-07, "loss": 2.5448, "step": 10420 }, { "epoch": 70.0, "grad_norm": Infinity, "learning_rate": 4.147797896341082e-07, "loss": 2.6892, "step": 10430 }, { "epoch": 70.06734006734007, "grad_norm": 192.63278198242188, "learning_rate": 4.1324005849971054e-07, "loss": 2.8325, "step": 10440 }, { "epoch": 70.13468013468014, "grad_norm": 127.3247299194336, "learning_rate": 4.1153173109143955e-07, "loss": 2.8945, "step": 10450 }, { "epoch": 70.20202020202021, "grad_norm": 75.08699035644531, "learning_rate": 4.0982602679479107e-07, "loss": 2.837, "step": 10460 }, { "epoch": 70.26936026936026, "grad_norm": 95.09965515136719, "learning_rate": 4.081229532129826e-07, "loss": 2.3842, "step": 10470 }, { "epoch": 70.33670033670033, "grad_norm": 187.0350341796875, "learning_rate": 4.064225179375068e-07, "loss": 2.5821, "step": 10480 }, { "epoch": 70.4040404040404, "grad_norm": 57.73213577270508, "learning_rate": 4.047247285480947e-07, "loss": 2.9251, "step": 10490 }, { "epoch": 70.47138047138047, "grad_norm": 125.00079345703125, "learning_rate": 4.0302959261268443e-07, "loss": 2.5284, "step": 10500 }, { "epoch": 70.47138047138047, "eval_loss": 0.3516845405101776, "eval_mae": 0.41460931301116943, "eval_mse": 0.3516845405101776, "eval_r2": 0.23075097799301147, "eval_rmse": 0.5930299659462224, "eval_runtime": 10.7816, "eval_samples_per_second": 440.474, "eval_steps_per_second": 13.82, "step": 10500 }, { "epoch": 70.53872053872054, "grad_norm": 79.56168365478516, "learning_rate": 4.0133711768738486e-07, "loss": 2.5965, "step": 10510 }, { "epoch": 70.60606060606061, "grad_norm": 73.0863265991211, "learning_rate": 3.996473113164449e-07, "loss": 2.4718, "step": 10520 }, { "epoch": 70.67340067340068, "grad_norm": 117.4516830444336, "learning_rate": 3.9796018103221686e-07, "loss": 2.8489, "step": 10530 }, { "epoch": 70.74074074074075, "grad_norm": 71.40447998046875, "learning_rate": 3.9627573435512575e-07, "loss": 2.4731, "step": 10540 }, { "epoch": 70.8080808080808, "grad_norm": 92.41343688964844, "learning_rate": 3.945939787936329e-07, "loss": 2.4893, "step": 10550 }, { "epoch": 70.87542087542087, "grad_norm": 85.12460327148438, "learning_rate": 3.929149218442052e-07, "loss": 2.7336, "step": 10560 }, { "epoch": 70.94276094276094, "grad_norm": 123.94529724121094, "learning_rate": 3.912385709912793e-07, "loss": 2.5306, "step": 10570 }, { "epoch": 71.006734006734, "grad_norm": 114.96455383300781, "learning_rate": 3.8956493370723053e-07, "loss": 2.2785, "step": 10580 }, { "epoch": 71.07407407407408, "grad_norm": 57.928897857666016, "learning_rate": 3.8789401745233706e-07, "loss": 2.1721, "step": 10590 }, { "epoch": 71.14141414141415, "grad_norm": 257.8482971191406, "learning_rate": 3.862258296747496e-07, "loss": 2.9975, "step": 10600 }, { "epoch": 71.14141414141415, "eval_loss": 0.35526517033576965, "eval_mae": 0.41799965500831604, "eval_mse": 0.35526517033576965, "eval_r2": 0.22291898727416992, "eval_rmse": 0.5960412488542799, "eval_runtime": 10.7837, "eval_samples_per_second": 440.386, "eval_steps_per_second": 13.817, "step": 10600 }, { "epoch": 71.20875420875421, "grad_norm": 87.77326965332031, "learning_rate": 3.845603778104548e-07, "loss": 2.6831, "step": 10610 }, { "epoch": 71.27609427609427, "grad_norm": 31.784151077270508, "learning_rate": 3.8289766928324574e-07, "loss": 2.5475, "step": 10620 }, { "epoch": 71.34343434343434, "grad_norm": 128.29600524902344, "learning_rate": 3.812377115046854e-07, "loss": 2.6511, "step": 10630 }, { "epoch": 71.41077441077441, "grad_norm": 140.99090576171875, "learning_rate": 3.795805118740766e-07, "loss": 2.5383, "step": 10640 }, { "epoch": 71.47811447811448, "grad_norm": 103.48905181884766, "learning_rate": 3.779260777784262e-07, "loss": 2.5973, "step": 10650 }, { "epoch": 71.54545454545455, "grad_norm": 132.4952850341797, "learning_rate": 3.762744165924154e-07, "loss": 2.6128, "step": 10660 }, { "epoch": 71.61279461279462, "grad_norm": 63.60878372192383, "learning_rate": 3.746255356783632e-07, "loss": 2.6488, "step": 10670 }, { "epoch": 71.68013468013469, "grad_norm": 38.84306335449219, "learning_rate": 3.7297944238619703e-07, "loss": 2.5288, "step": 10680 }, { "epoch": 71.74747474747475, "grad_norm": 111.38179779052734, "learning_rate": 3.7133614405341726e-07, "loss": 2.8809, "step": 10690 }, { "epoch": 71.81481481481481, "grad_norm": 78.55253601074219, "learning_rate": 3.696956480050668e-07, "loss": 2.3914, "step": 10700 }, { "epoch": 71.81481481481481, "eval_loss": 0.35094472765922546, "eval_mae": 0.4148868918418884, "eval_mse": 0.35094472765922546, "eval_r2": 0.23236918449401855, "eval_rmse": 0.5924058808445655, "eval_runtime": 10.7803, "eval_samples_per_second": 440.527, "eval_steps_per_second": 13.822, "step": 10700 }, { "epoch": 71.88215488215488, "grad_norm": 102.21617889404297, "learning_rate": 3.6805796155369606e-07, "loss": 2.5582, "step": 10710 }, { "epoch": 71.94949494949495, "grad_norm": 204.4682159423828, "learning_rate": 3.6642309199933276e-07, "loss": 2.8879, "step": 10720 }, { "epoch": 72.01346801346801, "grad_norm": 171.1468963623047, "learning_rate": 3.6479104662944713e-07, "loss": 2.651, "step": 10730 }, { "epoch": 72.08080808080808, "grad_norm": 204.37551879882812, "learning_rate": 3.6316183271892176e-07, "loss": 2.6835, "step": 10740 }, { "epoch": 72.14814814814815, "grad_norm": 82.18759155273438, "learning_rate": 3.6153545753001655e-07, "loss": 2.6478, "step": 10750 }, { "epoch": 72.21548821548822, "grad_norm": 97.18743896484375, "learning_rate": 3.599119283123393e-07, "loss": 2.7821, "step": 10760 }, { "epoch": 72.28282828282828, "grad_norm": 109.49656677246094, "learning_rate": 3.582912523028101e-07, "loss": 2.4739, "step": 10770 }, { "epoch": 72.35016835016835, "grad_norm": 170.98643493652344, "learning_rate": 3.5667343672563253e-07, "loss": 2.5452, "step": 10780 }, { "epoch": 72.41750841750842, "grad_norm": 57.41474151611328, "learning_rate": 3.550584887922582e-07, "loss": 2.5076, "step": 10790 }, { "epoch": 72.48484848484848, "grad_norm": 284.7713623046875, "learning_rate": 3.5344641570135736e-07, "loss": 2.4477, "step": 10800 }, { "epoch": 72.48484848484848, "eval_loss": 0.3508109748363495, "eval_mae": 0.4151129424571991, "eval_mse": 0.3508110046386719, "eval_r2": 0.2326616644859314, "eval_rmse": 0.5922930057316833, "eval_runtime": 10.7797, "eval_samples_per_second": 440.55, "eval_steps_per_second": 13.822, "step": 10800 }, { "epoch": 72.55218855218855, "grad_norm": 67.90624237060547, "learning_rate": 3.518372246387843e-07, "loss": 2.4895, "step": 10810 }, { "epoch": 72.61952861952862, "grad_norm": 100.96810150146484, "learning_rate": 3.502309227775482e-07, "loss": 2.7369, "step": 10820 }, { "epoch": 72.68686868686869, "grad_norm": 175.03054809570312, "learning_rate": 3.4862751727777796e-07, "loss": 2.9339, "step": 10830 }, { "epoch": 72.75420875420875, "grad_norm": 153.81028747558594, "learning_rate": 3.470270152866928e-07, "loss": 2.5705, "step": 10840 }, { "epoch": 72.82154882154882, "grad_norm": 270.7610168457031, "learning_rate": 3.4542942393856964e-07, "loss": 2.6982, "step": 10850 }, { "epoch": 72.88888888888889, "grad_norm": 82.58375549316406, "learning_rate": 3.438347503547102e-07, "loss": 2.7363, "step": 10860 }, { "epoch": 72.95622895622895, "grad_norm": 63.94959259033203, "learning_rate": 3.4224300164341137e-07, "loss": 2.5311, "step": 10870 }, { "epoch": 73.02020202020202, "grad_norm": 163.4475555419922, "learning_rate": 3.406541848999311e-07, "loss": 2.6011, "step": 10880 }, { "epoch": 73.08754208754209, "grad_norm": 62.97525405883789, "learning_rate": 3.390683072064594e-07, "loss": 2.3255, "step": 10890 }, { "epoch": 73.15488215488216, "grad_norm": 254.22161865234375, "learning_rate": 3.3748537563208377e-07, "loss": 2.6686, "step": 10900 }, { "epoch": 73.15488215488216, "eval_loss": 0.3533035218715668, "eval_mae": 0.41676533222198486, "eval_mse": 0.3533035218715668, "eval_r2": 0.22720974683761597, "eval_rmse": 0.5943934066521657, "eval_runtime": 10.764, "eval_samples_per_second": 441.193, "eval_steps_per_second": 13.842, "step": 10900 }, { "epoch": 73.22222222222223, "grad_norm": 236.9002685546875, "learning_rate": 3.359053972327608e-07, "loss": 2.6178, "step": 10910 }, { "epoch": 73.28956228956228, "grad_norm": 112.41513061523438, "learning_rate": 3.3432837905128287e-07, "loss": 2.7071, "step": 10920 }, { "epoch": 73.35690235690235, "grad_norm": 41.61994934082031, "learning_rate": 3.3275432811724613e-07, "loss": 2.5716, "step": 10930 }, { "epoch": 73.42424242424242, "grad_norm": 137.0571746826172, "learning_rate": 3.3118325144702176e-07, "loss": 2.7344, "step": 10940 }, { "epoch": 73.49158249158249, "grad_norm": 36.0982551574707, "learning_rate": 3.2961515604372137e-07, "loss": 2.9014, "step": 10950 }, { "epoch": 73.55892255892256, "grad_norm": 55.34161376953125, "learning_rate": 3.2805004889716914e-07, "loss": 2.4381, "step": 10960 }, { "epoch": 73.62626262626263, "grad_norm": 85.99681091308594, "learning_rate": 3.2648793698386745e-07, "loss": 2.9293, "step": 10970 }, { "epoch": 73.6936026936027, "grad_norm": 52.05038070678711, "learning_rate": 3.2492882726696903e-07, "loss": 2.579, "step": 10980 }, { "epoch": 73.76094276094275, "grad_norm": 133.3928680419922, "learning_rate": 3.233727266962425e-07, "loss": 2.728, "step": 10990 }, { "epoch": 73.82828282828282, "grad_norm": 105.58036804199219, "learning_rate": 3.218196422080448e-07, "loss": 2.3734, "step": 11000 }, { "epoch": 73.82828282828282, "eval_loss": 0.350881427526474, "eval_mae": 0.413974791765213, "eval_mse": 0.3508813977241516, "eval_r2": 0.23250770568847656, "eval_rmse": 0.5923524269589444, "eval_runtime": 10.7603, "eval_samples_per_second": 441.343, "eval_steps_per_second": 13.847, "step": 11000 }, { "epoch": 73.89562289562289, "grad_norm": 111.90595245361328, "learning_rate": 3.202695807252871e-07, "loss": 2.4673, "step": 11010 }, { "epoch": 73.96296296296296, "grad_norm": 91.3452377319336, "learning_rate": 3.187225491574068e-07, "loss": 2.7233, "step": 11020 }, { "epoch": 74.02693602693603, "grad_norm": 89.6205062866211, "learning_rate": 3.171785544003341e-07, "loss": 2.4299, "step": 11030 }, { "epoch": 74.0942760942761, "grad_norm": 78.20502471923828, "learning_rate": 3.1563760333646396e-07, "loss": 2.6404, "step": 11040 }, { "epoch": 74.16161616161617, "grad_norm": 120.4116439819336, "learning_rate": 3.140997028346223e-07, "loss": 2.8529, "step": 11050 }, { "epoch": 74.22895622895624, "grad_norm": 86.1987533569336, "learning_rate": 3.1256485975003887e-07, "loss": 2.6238, "step": 11060 }, { "epoch": 74.29629629629629, "grad_norm": 49.59245681762695, "learning_rate": 3.110330809243134e-07, "loss": 2.4778, "step": 11070 }, { "epoch": 74.36363636363636, "grad_norm": 308.1654357910156, "learning_rate": 3.095043731853878e-07, "loss": 2.5806, "step": 11080 }, { "epoch": 74.43097643097643, "grad_norm": 188.3867645263672, "learning_rate": 3.079787433475135e-07, "loss": 2.9114, "step": 11090 }, { "epoch": 74.4983164983165, "grad_norm": 195.9840545654297, "learning_rate": 3.0645619821122316e-07, "loss": 2.9025, "step": 11100 }, { "epoch": 74.4983164983165, "eval_loss": 0.35137778520584106, "eval_mae": 0.4142541289329529, "eval_mse": 0.35137778520584106, "eval_r2": 0.23142194747924805, "eval_rmse": 0.5927712756247903, "eval_runtime": 10.7727, "eval_samples_per_second": 440.837, "eval_steps_per_second": 13.831, "step": 11100 }, { "epoch": 74.56565656565657, "grad_norm": 113.8948745727539, "learning_rate": 3.049367445632981e-07, "loss": 2.5246, "step": 11110 }, { "epoch": 74.63299663299664, "grad_norm": 99.72313690185547, "learning_rate": 3.034203891767406e-07, "loss": 2.3368, "step": 11120 }, { "epoch": 74.7003367003367, "grad_norm": 109.96050262451172, "learning_rate": 3.0190713881074106e-07, "loss": 2.4789, "step": 11130 }, { "epoch": 74.76767676767676, "grad_norm": 63.99116516113281, "learning_rate": 3.003970002106503e-07, "loss": 2.7651, "step": 11140 }, { "epoch": 74.83501683501683, "grad_norm": 90.63844299316406, "learning_rate": 2.988899801079474e-07, "loss": 2.427, "step": 11150 }, { "epoch": 74.9023569023569, "grad_norm": 66.95032501220703, "learning_rate": 2.973860852202117e-07, "loss": 2.618, "step": 11160 }, { "epoch": 74.96969696969697, "grad_norm": 64.42914581298828, "learning_rate": 2.9588532225109054e-07, "loss": 2.669, "step": 11170 }, { "epoch": 75.03367003367003, "grad_norm": 53.71120071411133, "learning_rate": 2.9438769789027196e-07, "loss": 2.4518, "step": 11180 }, { "epoch": 75.1010101010101, "grad_norm": 70.70067596435547, "learning_rate": 2.9289321881345254e-07, "loss": 2.6652, "step": 11190 }, { "epoch": 75.16835016835017, "grad_norm": 187.86904907226562, "learning_rate": 2.9140189168230924e-07, "loss": 3.0007, "step": 11200 }, { "epoch": 75.16835016835017, "eval_loss": 0.3508734107017517, "eval_mae": 0.41543933749198914, "eval_mse": 0.3508734107017517, "eval_r2": 0.23252516984939575, "eval_rmse": 0.5923456851381225, "eval_runtime": 10.7649, "eval_samples_per_second": 441.157, "eval_steps_per_second": 13.841, "step": 11200 }, { "epoch": 75.23569023569024, "grad_norm": 149.2410888671875, "learning_rate": 2.899137231444695e-07, "loss": 2.7318, "step": 11210 }, { "epoch": 75.3030303030303, "grad_norm": 308.9027099609375, "learning_rate": 2.8842871983347995e-07, "loss": 2.5061, "step": 11220 }, { "epoch": 75.37037037037037, "grad_norm": 90.65328979492188, "learning_rate": 2.869468883687798e-07, "loss": 2.3542, "step": 11230 }, { "epoch": 75.43771043771044, "grad_norm": 156.33343505859375, "learning_rate": 2.85468235355668e-07, "loss": 2.7579, "step": 11240 }, { "epoch": 75.5050505050505, "grad_norm": 82.09190368652344, "learning_rate": 2.839927673852771e-07, "loss": 2.6485, "step": 11250 }, { "epoch": 75.57239057239057, "grad_norm": 56.101524353027344, "learning_rate": 2.825204910345409e-07, "loss": 2.5049, "step": 11260 }, { "epoch": 75.63973063973064, "grad_norm": 97.6525650024414, "learning_rate": 2.810514128661675e-07, "loss": 2.5984, "step": 11270 }, { "epoch": 75.70707070707071, "grad_norm": 138.76852416992188, "learning_rate": 2.795855394286081e-07, "loss": 2.3752, "step": 11280 }, { "epoch": 75.77441077441077, "grad_norm": 179.25198364257812, "learning_rate": 2.781228772560297e-07, "loss": 2.4712, "step": 11290 }, { "epoch": 75.84175084175084, "grad_norm": 91.537109375, "learning_rate": 2.7666343286828384e-07, "loss": 2.6182, "step": 11300 }, { "epoch": 75.84175084175084, "eval_loss": 0.3509845435619354, "eval_mae": 0.41459256410598755, "eval_mse": 0.3509845435619354, "eval_r2": 0.2322821021080017, "eval_rmse": 0.5924394851475848, "eval_runtime": 10.792, "eval_samples_per_second": 440.046, "eval_steps_per_second": 13.806, "step": 11300 }, { "epoch": 75.9090909090909, "grad_norm": 33.59502029418945, "learning_rate": 2.752072127708802e-07, "loss": 2.3772, "step": 11310 }, { "epoch": 75.97643097643098, "grad_norm": 125.40281677246094, "learning_rate": 2.7375422345495446e-07, "loss": 2.9295, "step": 11320 }, { "epoch": 76.04040404040404, "grad_norm": 61.42475509643555, "learning_rate": 2.7230447139724267e-07, "loss": 2.8143, "step": 11330 }, { "epoch": 76.10774410774411, "grad_norm": 82.06446838378906, "learning_rate": 2.70857963060049e-07, "loss": 2.8388, "step": 11340 }, { "epoch": 76.17508417508418, "grad_norm": 88.72342681884766, "learning_rate": 2.6941470489122053e-07, "loss": 2.4268, "step": 11350 }, { "epoch": 76.24242424242425, "grad_norm": 87.6678466796875, "learning_rate": 2.6797470332411467e-07, "loss": 2.6258, "step": 11360 }, { "epoch": 76.3097643097643, "grad_norm": 63.848594665527344, "learning_rate": 2.6653796477757426e-07, "loss": 2.5191, "step": 11370 }, { "epoch": 76.37710437710437, "grad_norm": 68.5781478881836, "learning_rate": 2.6510449565589563e-07, "loss": 2.7396, "step": 11380 }, { "epoch": 76.44444444444444, "grad_norm": 122.10014343261719, "learning_rate": 2.6367430234880284e-07, "loss": 2.2941, "step": 11390 }, { "epoch": 76.51178451178451, "grad_norm": 40.27404022216797, "learning_rate": 2.6224739123141683e-07, "loss": 2.2474, "step": 11400 }, { "epoch": 76.51178451178451, "eval_loss": 0.3490457236766815, "eval_mae": 0.4130796194076538, "eval_mse": 0.3490457534790039, "eval_r2": 0.2365228533744812, "eval_rmse": 0.5908009423477623, "eval_runtime": 10.7666, "eval_samples_per_second": 441.086, "eval_steps_per_second": 13.839, "step": 11400 }, { "epoch": 76.57912457912458, "grad_norm": 94.35778045654297, "learning_rate": 2.608237686642292e-07, "loss": 2.5541, "step": 11410 }, { "epoch": 76.64646464646465, "grad_norm": 136.2204132080078, "learning_rate": 2.594034409930714e-07, "loss": 2.6049, "step": 11420 }, { "epoch": 76.71380471380472, "grad_norm": 108.44975280761719, "learning_rate": 2.5798641454908944e-07, "loss": 2.7775, "step": 11430 }, { "epoch": 76.78114478114477, "grad_norm": 93.50945281982422, "learning_rate": 2.565726956487123e-07, "loss": 2.5075, "step": 11440 }, { "epoch": 76.84848484848484, "grad_norm": 85.51289367675781, "learning_rate": 2.5516229059362717e-07, "loss": 2.8564, "step": 11450 }, { "epoch": 76.91582491582491, "grad_norm": 173.97474670410156, "learning_rate": 2.537552056707483e-07, "loss": 2.7413, "step": 11460 }, { "epoch": 76.98316498316498, "grad_norm": 127.52139282226562, "learning_rate": 2.5235144715219124e-07, "loss": 2.429, "step": 11470 }, { "epoch": 77.04713804713805, "grad_norm": 67.37889862060547, "learning_rate": 2.509510212952435e-07, "loss": 2.758, "step": 11480 }, { "epoch": 77.11447811447812, "grad_norm": 82.63104248046875, "learning_rate": 2.495539343423375e-07, "loss": 3.0982, "step": 11490 }, { "epoch": 77.18181818181819, "grad_norm": 93.52452087402344, "learning_rate": 2.481601925210227e-07, "loss": 2.3255, "step": 11500 }, { "epoch": 77.18181818181819, "eval_loss": 0.35103747248649597, "eval_mae": 0.4157281219959259, "eval_mse": 0.35103747248649597, "eval_r2": 0.2321663498878479, "eval_rmse": 0.5924841537851422, "eval_runtime": 10.7852, "eval_samples_per_second": 440.324, "eval_steps_per_second": 13.815, "step": 11500 }, { "epoch": 77.24915824915826, "grad_norm": 67.06855010986328, "learning_rate": 2.467698020439365e-07, "loss": 2.8273, "step": 11510 }, { "epoch": 77.31649831649831, "grad_norm": 114.716064453125, "learning_rate": 2.4538276910877897e-07, "loss": 2.4548, "step": 11520 }, { "epoch": 77.38383838383838, "grad_norm": 149.9312744140625, "learning_rate": 2.439990998982825e-07, "loss": 2.3493, "step": 11530 }, { "epoch": 77.45117845117845, "grad_norm": 236.48309326171875, "learning_rate": 2.426188005801868e-07, "loss": 2.34, "step": 11540 }, { "epoch": 77.51851851851852, "grad_norm": 34.097999572753906, "learning_rate": 2.4124187730720915e-07, "loss": 2.5052, "step": 11550 }, { "epoch": 77.58585858585859, "grad_norm": 110.3384017944336, "learning_rate": 2.398683362170186e-07, "loss": 2.9921, "step": 11560 }, { "epoch": 77.65319865319866, "grad_norm": 112.94971466064453, "learning_rate": 2.384981834322086e-07, "loss": 2.5366, "step": 11570 }, { "epoch": 77.72053872053873, "grad_norm": 213.19482421875, "learning_rate": 2.3713142506026784e-07, "loss": 2.5033, "step": 11580 }, { "epoch": 77.78787878787878, "grad_norm": 75.08736419677734, "learning_rate": 2.3576806719355534e-07, "loss": 2.4627, "step": 11590 }, { "epoch": 77.85521885521885, "grad_norm": 43.26775360107422, "learning_rate": 2.3440811590927167e-07, "loss": 2.7498, "step": 11600 }, { "epoch": 77.85521885521885, "eval_loss": 0.3488169312477112, "eval_mae": 0.41359731554985046, "eval_mse": 0.3488169014453888, "eval_r2": 0.23702341318130493, "eval_rmse": 0.5906072311150523, "eval_runtime": 10.7927, "eval_samples_per_second": 440.019, "eval_steps_per_second": 13.806, "step": 11600 }, { "epoch": 77.92255892255892, "grad_norm": 207.86383056640625, "learning_rate": 2.3305157726943325e-07, "loss": 2.5631, "step": 11610 }, { "epoch": 77.98989898989899, "grad_norm": 94.32893371582031, "learning_rate": 2.3169845732084335e-07, "loss": 2.5296, "step": 11620 }, { "epoch": 78.05387205387206, "grad_norm": 168.6701202392578, "learning_rate": 2.303487620950677e-07, "loss": 2.5639, "step": 11630 }, { "epoch": 78.12121212121212, "grad_norm": 67.1196060180664, "learning_rate": 2.290024976084052e-07, "loss": 2.7796, "step": 11640 }, { "epoch": 78.1885521885522, "grad_norm": 171.12818908691406, "learning_rate": 2.276596698618629e-07, "loss": 2.3438, "step": 11650 }, { "epoch": 78.25589225589225, "grad_norm": 140.5289306640625, "learning_rate": 2.2632028484112765e-07, "loss": 2.5576, "step": 11660 }, { "epoch": 78.32323232323232, "grad_norm": 118.52499389648438, "learning_rate": 2.2498434851654125e-07, "loss": 2.4586, "step": 11670 }, { "epoch": 78.39057239057239, "grad_norm": 55.70298767089844, "learning_rate": 2.2365186684307192e-07, "loss": 2.0948, "step": 11680 }, { "epoch": 78.45791245791246, "grad_norm": 53.440635681152344, "learning_rate": 2.223228457602897e-07, "loss": 2.9322, "step": 11690 }, { "epoch": 78.52525252525253, "grad_norm": 190.59088134765625, "learning_rate": 2.2099729119233768e-07, "loss": 2.9211, "step": 11700 }, { "epoch": 78.52525252525253, "eval_loss": 0.34900203347206116, "eval_mae": 0.41340887546539307, "eval_mse": 0.34900200366973877, "eval_r2": 0.23661857843399048, "eval_rmse": 0.5907639153416013, "eval_runtime": 10.7759, "eval_samples_per_second": 440.707, "eval_steps_per_second": 13.827, "step": 11700 }, { "epoch": 78.5925925925926, "grad_norm": 99.35961151123047, "learning_rate": 2.1967520904790827e-07, "loss": 2.5731, "step": 11710 }, { "epoch": 78.65993265993266, "grad_norm": 68.17816925048828, "learning_rate": 2.1835660522021416e-07, "loss": 2.7039, "step": 11720 }, { "epoch": 78.72727272727273, "grad_norm": 44.97389602661133, "learning_rate": 2.1704148558696467e-07, "loss": 2.4167, "step": 11730 }, { "epoch": 78.79461279461279, "grad_norm": 169.83908081054688, "learning_rate": 2.157298560103368e-07, "loss": 2.4325, "step": 11740 }, { "epoch": 78.86195286195286, "grad_norm": 226.00367736816406, "learning_rate": 2.14421722336952e-07, "loss": 2.6257, "step": 11750 }, { "epoch": 78.92929292929293, "grad_norm": 97.51092529296875, "learning_rate": 2.1311709039784732e-07, "loss": 2.8581, "step": 11760 }, { "epoch": 78.996632996633, "grad_norm": 92.56432342529297, "learning_rate": 2.1181596600845196e-07, "loss": 2.8225, "step": 11770 }, { "epoch": 79.06060606060606, "grad_norm": 49.8145637512207, "learning_rate": 2.1051835496855895e-07, "loss": 2.4781, "step": 11780 }, { "epoch": 79.12794612794613, "grad_norm": 91.37464904785156, "learning_rate": 2.0922426306230157e-07, "loss": 2.5714, "step": 11790 }, { "epoch": 79.1952861952862, "grad_norm": 86.79344177246094, "learning_rate": 2.079336960581255e-07, "loss": 2.4525, "step": 11800 }, { "epoch": 79.1952861952862, "eval_loss": 0.35011178255081177, "eval_mae": 0.41515305638313293, "eval_mse": 0.35011178255081177, "eval_r2": 0.23419106006622314, "eval_rmse": 0.5917024442663827, "eval_runtime": 10.7642, "eval_samples_per_second": 441.183, "eval_steps_per_second": 13.842, "step": 11800 }, { "epoch": 79.26262626262626, "grad_norm": 102.40309143066406, "learning_rate": 2.0664665970876495e-07, "loss": 2.769, "step": 11810 }, { "epoch": 79.32996632996633, "grad_norm": 92.67575073242188, "learning_rate": 2.0536315975121542e-07, "loss": 2.4612, "step": 11820 }, { "epoch": 79.3973063973064, "grad_norm": 155.80868530273438, "learning_rate": 2.0408320190670957e-07, "loss": 2.6836, "step": 11830 }, { "epoch": 79.46464646464646, "grad_norm": 98.2526626586914, "learning_rate": 2.0280679188069015e-07, "loss": 2.4177, "step": 11840 }, { "epoch": 79.53198653198653, "grad_norm": 57.550193786621094, "learning_rate": 2.015339353627865e-07, "loss": 2.8821, "step": 11850 }, { "epoch": 79.5993265993266, "grad_norm": 64.64018249511719, "learning_rate": 2.0026463802678685e-07, "loss": 2.5964, "step": 11860 }, { "epoch": 79.66666666666667, "grad_norm": 155.0970001220703, "learning_rate": 1.989989055306156e-07, "loss": 2.4199, "step": 11870 }, { "epoch": 79.73400673400674, "grad_norm": 230.80389404296875, "learning_rate": 1.9773674351630543e-07, "loss": 2.2953, "step": 11880 }, { "epoch": 79.8013468013468, "grad_norm": 84.80900573730469, "learning_rate": 1.9647815760997488e-07, "loss": 2.5878, "step": 11890 }, { "epoch": 79.86868686868686, "grad_norm": 69.03256225585938, "learning_rate": 1.9522315342180073e-07, "loss": 2.7923, "step": 11900 }, { "epoch": 79.86868686868686, "eval_loss": 0.34875044226646423, "eval_mae": 0.4139447510242462, "eval_mse": 0.34875044226646423, "eval_r2": 0.2371687889099121, "eval_rmse": 0.5905509650034146, "eval_runtime": 10.796, "eval_samples_per_second": 439.886, "eval_steps_per_second": 13.801, "step": 11900 }, { "epoch": 79.93602693602693, "grad_norm": 74.07720184326172, "learning_rate": 1.939717365459952e-07, "loss": 2.4303, "step": 11910 }, { "epoch": 80.0, "grad_norm": 167.05316162109375, "learning_rate": 1.927239125607788e-07, "loss": 2.842, "step": 11920 }, { "epoch": 80.06734006734007, "grad_norm": 89.06217193603516, "learning_rate": 1.9147968702835792e-07, "loss": 2.6301, "step": 11930 }, { "epoch": 80.13468013468014, "grad_norm": 84.24176788330078, "learning_rate": 1.9023906549489765e-07, "loss": 2.3363, "step": 11940 }, { "epoch": 80.20202020202021, "grad_norm": 83.5845947265625, "learning_rate": 1.89002053490499e-07, "loss": 2.4325, "step": 11950 }, { "epoch": 80.26936026936026, "grad_norm": 74.35079956054688, "learning_rate": 1.8776865652917284e-07, "loss": 2.7484, "step": 11960 }, { "epoch": 80.33670033670033, "grad_norm": 95.59172058105469, "learning_rate": 1.8653888010881635e-07, "loss": 2.2708, "step": 11970 }, { "epoch": 80.4040404040404, "grad_norm": 127.53472900390625, "learning_rate": 1.853127297111875e-07, "loss": 2.2618, "step": 11980 }, { "epoch": 80.47138047138047, "grad_norm": 198.59962463378906, "learning_rate": 1.840902108018819e-07, "loss": 2.6354, "step": 11990 }, { "epoch": 80.53872053872054, "grad_norm": 62.94951248168945, "learning_rate": 1.8287132883030698e-07, "loss": 2.5155, "step": 12000 }, { "epoch": 80.53872053872054, "eval_loss": 0.3482706844806671, "eval_mae": 0.4137057363986969, "eval_mse": 0.3482706844806671, "eval_r2": 0.23821818828582764, "eval_rmse": 0.5901446301379579, "eval_runtime": 10.785, "eval_samples_per_second": 440.333, "eval_steps_per_second": 13.815, "step": 12000 }, { "epoch": 80.60606060606061, "grad_norm": 129.719970703125, "learning_rate": 1.8165608922965903e-07, "loss": 2.9247, "step": 12010 }, { "epoch": 80.67340067340068, "grad_norm": 133.1042022705078, "learning_rate": 1.8044449741689794e-07, "loss": 2.6683, "step": 12020 }, { "epoch": 80.74074074074075, "grad_norm": 188.29269409179688, "learning_rate": 1.792365587927239e-07, "loss": 2.4135, "step": 12030 }, { "epoch": 80.8080808080808, "grad_norm": 107.14237976074219, "learning_rate": 1.7803227874155235e-07, "loss": 3.3403, "step": 12040 }, { "epoch": 80.87542087542087, "grad_norm": 95.80084991455078, "learning_rate": 1.7683166263149151e-07, "loss": 2.6763, "step": 12050 }, { "epoch": 80.94276094276094, "grad_norm": 153.08775329589844, "learning_rate": 1.7563471581431622e-07, "loss": 2.5527, "step": 12060 }, { "epoch": 81.006734006734, "grad_norm": 142.90530395507812, "learning_rate": 1.7444144362544623e-07, "loss": 2.4665, "step": 12070 }, { "epoch": 81.07407407407408, "grad_norm": 99.04240417480469, "learning_rate": 1.7325185138392185e-07, "loss": 2.16, "step": 12080 }, { "epoch": 81.14141414141415, "grad_norm": 159.70779418945312, "learning_rate": 1.7206594439237865e-07, "loss": 2.8114, "step": 12090 }, { "epoch": 81.20875420875421, "grad_norm": 187.6341094970703, "learning_rate": 1.7088372793702654e-07, "loss": 2.5692, "step": 12100 }, { "epoch": 81.20875420875421, "eval_loss": 0.34896934032440186, "eval_mae": 0.41366761922836304, "eval_mse": 0.34896934032440186, "eval_r2": 0.2366899847984314, "eval_rmse": 0.5907362696875839, "eval_runtime": 10.7721, "eval_samples_per_second": 440.861, "eval_steps_per_second": 13.832, "step": 12100 }, { "epoch": 81.27609427609427, "grad_norm": 122.40306854248047, "learning_rate": 1.6970520728762373e-07, "loss": 2.5641, "step": 12110 }, { "epoch": 81.34343434343434, "grad_norm": 185.72190856933594, "learning_rate": 1.6853038769745465e-07, "loss": 3.003, "step": 12120 }, { "epoch": 81.41077441077441, "grad_norm": 163.28103637695312, "learning_rate": 1.6735927440330666e-07, "loss": 2.521, "step": 12130 }, { "epoch": 81.47811447811448, "grad_norm": 57.063968658447266, "learning_rate": 1.661918726254453e-07, "loss": 2.5622, "step": 12140 }, { "epoch": 81.54545454545455, "grad_norm": 126.09310150146484, "learning_rate": 1.6502818756759273e-07, "loss": 2.8107, "step": 12150 }, { "epoch": 81.61279461279462, "grad_norm": 91.44612121582031, "learning_rate": 1.6386822441690285e-07, "loss": 2.3006, "step": 12160 }, { "epoch": 81.68013468013469, "grad_norm": 114.76985931396484, "learning_rate": 1.627119883439404e-07, "loss": 2.2606, "step": 12170 }, { "epoch": 81.74747474747475, "grad_norm": 81.72163391113281, "learning_rate": 1.61559484502655e-07, "loss": 2.5941, "step": 12180 }, { "epoch": 81.81481481481481, "grad_norm": 40.78081512451172, "learning_rate": 1.60410718030361e-07, "loss": 2.6152, "step": 12190 }, { "epoch": 81.88215488215488, "grad_norm": 35.294891357421875, "learning_rate": 1.5926569404771307e-07, "loss": 2.6819, "step": 12200 }, { "epoch": 81.88215488215488, "eval_loss": 0.34940388798713684, "eval_mae": 0.4145151376724243, "eval_mse": 0.34940385818481445, "eval_r2": 0.23573952913284302, "eval_rmse": 0.5911039317961051, "eval_runtime": 10.7789, "eval_samples_per_second": 440.582, "eval_steps_per_second": 13.823, "step": 12200 }, { "epoch": 81.94949494949495, "grad_norm": 42.65861129760742, "learning_rate": 1.581244176586829e-07, "loss": 2.7691, "step": 12210 }, { "epoch": 82.01346801346801, "grad_norm": 117.6976089477539, "learning_rate": 1.5698689395053832e-07, "loss": 2.6631, "step": 12220 }, { "epoch": 82.08080808080808, "grad_norm": 134.87049865722656, "learning_rate": 1.5585312799381843e-07, "loss": 2.74, "step": 12230 }, { "epoch": 82.14814814814815, "grad_norm": 154.22998046875, "learning_rate": 1.547231248423132e-07, "loss": 2.4673, "step": 12240 }, { "epoch": 82.21548821548822, "grad_norm": 65.69686126708984, "learning_rate": 1.535968895330384e-07, "loss": 2.7766, "step": 12250 }, { "epoch": 82.28282828282828, "grad_norm": 162.66220092773438, "learning_rate": 1.5247442708621594e-07, "loss": 2.4693, "step": 12260 }, { "epoch": 82.35016835016835, "grad_norm": 113.3948745727539, "learning_rate": 1.5135574250524897e-07, "loss": 2.3911, "step": 12270 }, { "epoch": 82.41750841750842, "grad_norm": 54.803714752197266, "learning_rate": 1.5024084077670174e-07, "loss": 2.7993, "step": 12280 }, { "epoch": 82.48484848484848, "grad_norm": 68.31362915039062, "learning_rate": 1.4912972687027526e-07, "loss": 2.4, "step": 12290 }, { "epoch": 82.55218855218855, "grad_norm": 55.03757095336914, "learning_rate": 1.4802240573878734e-07, "loss": 2.384, "step": 12300 }, { "epoch": 82.55218855218855, "eval_loss": 0.347933292388916, "eval_mae": 0.41333234310150146, "eval_mse": 0.347933292388916, "eval_r2": 0.23895615339279175, "eval_rmse": 0.5898587054447159, "eval_runtime": 10.7671, "eval_samples_per_second": 441.065, "eval_steps_per_second": 13.838, "step": 12300 }, { "epoch": 82.61952861952862, "grad_norm": 36.886016845703125, "learning_rate": 1.4691888231814842e-07, "loss": 2.5964, "step": 12310 }, { "epoch": 82.68686868686869, "grad_norm": 119.87435150146484, "learning_rate": 1.4581916152734132e-07, "loss": 2.6533, "step": 12320 }, { "epoch": 82.75420875420875, "grad_norm": 124.58145141601562, "learning_rate": 1.447232482683979e-07, "loss": 2.5416, "step": 12330 }, { "epoch": 82.82154882154882, "grad_norm": 122.00739288330078, "learning_rate": 1.436311474263786e-07, "loss": 2.3983, "step": 12340 }, { "epoch": 82.88888888888889, "grad_norm": 102.62448120117188, "learning_rate": 1.425428638693489e-07, "loss": 2.715, "step": 12350 }, { "epoch": 82.95622895622895, "grad_norm": 140.515869140625, "learning_rate": 1.4145840244835982e-07, "loss": 2.914, "step": 12360 }, { "epoch": 83.02020202020202, "grad_norm": 147.51319885253906, "learning_rate": 1.4037776799742406e-07, "loss": 2.359, "step": 12370 }, { "epoch": 83.08754208754209, "grad_norm": 80.06890869140625, "learning_rate": 1.3930096533349634e-07, "loss": 2.4828, "step": 12380 }, { "epoch": 83.15488215488216, "grad_norm": 133.39601135253906, "learning_rate": 1.3822799925645035e-07, "loss": 2.4965, "step": 12390 }, { "epoch": 83.22222222222223, "grad_norm": 57.42595291137695, "learning_rate": 1.3715887454905884e-07, "loss": 2.8011, "step": 12400 }, { "epoch": 83.22222222222223, "eval_loss": 0.3483334183692932, "eval_mae": 0.41351118683815, "eval_mse": 0.3483334183692932, "eval_r2": 0.2380809783935547, "eval_rmse": 0.5901977790277537, "eval_runtime": 10.7452, "eval_samples_per_second": 441.963, "eval_steps_per_second": 13.867, "step": 12400 }, { "epoch": 83.28956228956228, "grad_norm": 136.31973266601562, "learning_rate": 1.3609359597697078e-07, "loss": 2.6117, "step": 12410 }, { "epoch": 83.35690235690235, "grad_norm": 99.56391143798828, "learning_rate": 1.350321682886919e-07, "loss": 2.6017, "step": 12420 }, { "epoch": 83.42424242424242, "grad_norm": 29.13854217529297, "learning_rate": 1.3397459621556128e-07, "loss": 2.5215, "step": 12430 }, { "epoch": 83.49158249158249, "grad_norm": 169.7073974609375, "learning_rate": 1.3292088447173277e-07, "loss": 3.0185, "step": 12440 }, { "epoch": 83.55892255892256, "grad_norm": 202.7324676513672, "learning_rate": 1.3187103775415153e-07, "loss": 2.5695, "step": 12450 }, { "epoch": 83.62626262626263, "grad_norm": 178.39285278320312, "learning_rate": 1.308250607425354e-07, "loss": 2.5651, "step": 12460 }, { "epoch": 83.6936026936027, "grad_norm": 154.35421752929688, "learning_rate": 1.297829580993518e-07, "loss": 2.5806, "step": 12470 }, { "epoch": 83.76094276094275, "grad_norm": 213.970458984375, "learning_rate": 1.2874473446979917e-07, "loss": 2.4533, "step": 12480 }, { "epoch": 83.82828282828282, "grad_norm": 117.0694351196289, "learning_rate": 1.27710394481784e-07, "loss": 2.5006, "step": 12490 }, { "epoch": 83.89562289562289, "grad_norm": 192.11581420898438, "learning_rate": 1.2667994274590256e-07, "loss": 2.4991, "step": 12500 }, { "epoch": 83.89562289562289, "eval_loss": 0.34810367226600647, "eval_mae": 0.4134105443954468, "eval_mse": 0.34810367226600647, "eval_r2": 0.238583505153656, "eval_rmse": 0.5900031120816284, "eval_runtime": 10.7752, "eval_samples_per_second": 440.735, "eval_steps_per_second": 13.828, "step": 12500 }, { "epoch": 83.96296296296296, "grad_norm": 46.252315521240234, "learning_rate": 1.256533838554179e-07, "loss": 2.801, "step": 12510 }, { "epoch": 84.02693602693603, "grad_norm": 114.2300796508789, "learning_rate": 1.2473281301914585e-07, "loss": 2.5599, "step": 12520 }, { "epoch": 84.0942760942761, "grad_norm": 164.11776733398438, "learning_rate": 1.237136631271931e-07, "loss": 2.5353, "step": 12530 }, { "epoch": 84.16161616161617, "grad_norm": 160.74798583984375, "learning_rate": 1.2269841930290082e-07, "loss": 2.9866, "step": 12540 }, { "epoch": 84.22895622895624, "grad_norm": 123.49051666259766, "learning_rate": 1.2168708607174328e-07, "loss": 2.3989, "step": 12550 }, { "epoch": 84.29629629629629, "grad_norm": 204.74766540527344, "learning_rate": 1.2067966794176286e-07, "loss": 2.559, "step": 12560 }, { "epoch": 84.36363636363636, "grad_norm": 80.27859497070312, "learning_rate": 1.1967616940355053e-07, "loss": 2.8578, "step": 12570 }, { "epoch": 84.43097643097643, "grad_norm": 36.582435607910156, "learning_rate": 1.1867659493022508e-07, "loss": 2.5197, "step": 12580 }, { "epoch": 84.4983164983165, "grad_norm": 54.90946960449219, "learning_rate": 1.1768094897741454e-07, "loss": 2.5262, "step": 12590 }, { "epoch": 84.56565656565657, "grad_norm": 94.49305725097656, "learning_rate": 1.1668923598323455e-07, "loss": 2.838, "step": 12600 }, { "epoch": 84.56565656565657, "eval_loss": 0.3478628098964691, "eval_mae": 0.41335874795913696, "eval_mse": 0.3478628396987915, "eval_r2": 0.2391102910041809, "eval_rmse": 0.5897989824497762, "eval_runtime": 10.7712, "eval_samples_per_second": 440.898, "eval_steps_per_second": 13.833, "step": 12600 }, { "epoch": 84.63299663299664, "grad_norm": 72.12289428710938, "learning_rate": 1.1570146036826989e-07, "loss": 2.5929, "step": 12610 }, { "epoch": 84.7003367003367, "grad_norm": 60.602901458740234, "learning_rate": 1.1471762653555494e-07, "loss": 2.3174, "step": 12620 }, { "epoch": 84.76767676767676, "grad_norm": 115.97168731689453, "learning_rate": 1.1373773887055227e-07, "loss": 2.5754, "step": 12630 }, { "epoch": 84.83501683501683, "grad_norm": 56.340293884277344, "learning_rate": 1.1276180174113536e-07, "loss": 2.5689, "step": 12640 }, { "epoch": 84.9023569023569, "grad_norm": 51.74113464355469, "learning_rate": 1.117898194975675e-07, "loss": 2.1861, "step": 12650 }, { "epoch": 84.96969696969697, "grad_norm": 123.79866790771484, "learning_rate": 1.1082179647248335e-07, "loss": 2.657, "step": 12660 }, { "epoch": 85.03367003367003, "grad_norm": 245.3060302734375, "learning_rate": 1.0995396444841221e-07, "loss": 2.6708, "step": 12670 }, { "epoch": 85.1010101010101, "grad_norm": 67.46924591064453, "learning_rate": 1.089934758116322e-07, "loss": 2.4625, "step": 12680 }, { "epoch": 85.16835016835017, "grad_norm": 59.03948974609375, "learning_rate": 1.080369588581056e-07, "loss": 2.6667, "step": 12690 }, { "epoch": 85.23569023569024, "grad_norm": 42.9858283996582, "learning_rate": 1.0708441785153044e-07, "loss": 2.696, "step": 12700 }, { "epoch": 85.23569023569024, "eval_loss": 0.347429484128952, "eval_mae": 0.4130251109600067, "eval_mse": 0.347429484128952, "eval_r2": 0.24005818367004395, "eval_rmse": 0.5894314923118309, "eval_runtime": 10.7884, "eval_samples_per_second": 440.194, "eval_steps_per_second": 13.811, "step": 12700 }, { "epoch": 85.3030303030303, "grad_norm": 109.22016906738281, "learning_rate": 1.0613585703788108e-07, "loss": 2.4782, "step": 12710 }, { "epoch": 85.37037037037037, "grad_norm": 117.84405517578125, "learning_rate": 1.0519128064539073e-07, "loss": 2.8874, "step": 12720 }, { "epoch": 85.43771043771044, "grad_norm": 112.05621337890625, "learning_rate": 1.0425069288453126e-07, "loss": 2.2582, "step": 12730 }, { "epoch": 85.5050505050505, "grad_norm": 154.5787353515625, "learning_rate": 1.0331409794799584e-07, "loss": 2.8615, "step": 12740 }, { "epoch": 85.57239057239057, "grad_norm": 173.33970642089844, "learning_rate": 1.023815000106788e-07, "loss": 2.4344, "step": 12750 }, { "epoch": 85.63973063973064, "grad_norm": 44.0203971862793, "learning_rate": 1.014529032296586e-07, "loss": 2.6274, "step": 12760 }, { "epoch": 85.70707070707071, "grad_norm": 45.364410400390625, "learning_rate": 1.0052831174417753e-07, "loss": 2.7194, "step": 12770 }, { "epoch": 85.77441077441077, "grad_norm": 117.5505142211914, "learning_rate": 9.960772967562503e-08, "loss": 2.5439, "step": 12780 }, { "epoch": 85.84175084175084, "grad_norm": 72.88679504394531, "learning_rate": 9.869116112751773e-08, "loss": 2.5387, "step": 12790 }, { "epoch": 85.9090909090909, "grad_norm": 93.75316619873047, "learning_rate": 9.77786101854825e-08, "loss": 2.5845, "step": 12800 }, { "epoch": 85.9090909090909, "eval_loss": 0.3473091423511505, "eval_mae": 0.4126177430152893, "eval_mse": 0.3473091423511505, "eval_r2": 0.24032139778137207, "eval_rmse": 0.5893294005487513, "eval_runtime": 10.7712, "eval_samples_per_second": 440.897, "eval_steps_per_second": 13.833, "step": 12800 }, { "epoch": 85.97643097643098, "grad_norm": 66.14945220947266, "learning_rate": 9.687008091723703e-08, "loss": 2.2683, "step": 12810 }, { "epoch": 86.04040404040404, "grad_norm": 28.713956832885742, "learning_rate": 9.596557737257304e-08, "loss": 2.3776, "step": 12820 }, { "epoch": 86.10774410774411, "grad_norm": 118.06536865234375, "learning_rate": 9.506510358333664e-08, "loss": 2.1708, "step": 12830 }, { "epoch": 86.17508417508418, "grad_norm": 36.007869720458984, "learning_rate": 9.416866356341202e-08, "loss": 3.0041, "step": 12840 }, { "epoch": 86.24242424242425, "grad_norm": 133.95327758789062, "learning_rate": 9.327626130870214e-08, "loss": 2.6173, "step": 12850 }, { "epoch": 86.3097643097643, "grad_norm": 112.56913757324219, "learning_rate": 9.23879007971119e-08, "loss": 2.4268, "step": 12860 }, { "epoch": 86.37710437710437, "grad_norm": 122.9507064819336, "learning_rate": 9.15035859885297e-08, "loss": 2.7038, "step": 12870 }, { "epoch": 86.44444444444444, "grad_norm": 88.01962280273438, "learning_rate": 9.062332082481061e-08, "loss": 2.6651, "step": 12880 }, { "epoch": 86.51178451178451, "grad_norm": 29.951391220092773, "learning_rate": 8.97471092297577e-08, "loss": 2.6942, "step": 12890 }, { "epoch": 86.57912457912458, "grad_norm": 135.22084045410156, "learning_rate": 8.887495510910581e-08, "loss": 2.9985, "step": 12900 }, { "epoch": 86.57912457912458, "eval_loss": 0.34755975008010864, "eval_mae": 0.4135996401309967, "eval_mse": 0.34755975008010864, "eval_r2": 0.2397732138633728, "eval_rmse": 0.5895419833057767, "eval_runtime": 10.7743, "eval_samples_per_second": 440.771, "eval_steps_per_second": 13.829, "step": 12900 }, { "epoch": 86.64646464646465, "grad_norm": 103.16564178466797, "learning_rate": 8.80068623505027e-08, "loss": 2.7488, "step": 12910 }, { "epoch": 86.71380471380472, "grad_norm": 195.8453369140625, "learning_rate": 8.714283482349338e-08, "loss": 2.5515, "step": 12920 }, { "epoch": 86.78114478114477, "grad_norm": 107.96471405029297, "learning_rate": 8.628287637950115e-08, "loss": 2.4587, "step": 12930 }, { "epoch": 86.84848484848484, "grad_norm": 70.36721801757812, "learning_rate": 8.542699085181215e-08, "loss": 2.5667, "step": 12940 }, { "epoch": 86.91582491582491, "grad_norm": 235.53472900390625, "learning_rate": 8.457518205555659e-08, "loss": 2.4559, "step": 12950 }, { "epoch": 86.98316498316498, "grad_norm": 125.57829284667969, "learning_rate": 8.372745378769308e-08, "loss": 2.5069, "step": 12960 }, { "epoch": 87.04713804713805, "grad_norm": 176.30332946777344, "learning_rate": 8.288380982699084e-08, "loss": 2.3366, "step": 12970 }, { "epoch": 87.11447811447812, "grad_norm": 127.16043090820312, "learning_rate": 8.204425393401359e-08, "loss": 2.5399, "step": 12980 }, { "epoch": 87.18181818181819, "grad_norm": 129.687255859375, "learning_rate": 8.12087898511018e-08, "loss": 2.5946, "step": 12990 }, { "epoch": 87.24915824915826, "grad_norm": 117.26610565185547, "learning_rate": 8.037742130235725e-08, "loss": 2.5474, "step": 13000 }, { "epoch": 87.24915824915826, "eval_loss": 0.3482195734977722, "eval_mae": 0.4136255085468292, "eval_mse": 0.3482195734977722, "eval_r2": 0.23833000659942627, "eval_rmse": 0.5901013247720871, "eval_runtime": 10.7691, "eval_samples_per_second": 440.983, "eval_steps_per_second": 13.836, "step": 13000 }, { "epoch": 87.31649831649831, "grad_norm": 33.47829055786133, "learning_rate": 7.955015199362525e-08, "loss": 2.63, "step": 13010 }, { "epoch": 87.38383838383838, "grad_norm": 302.2109680175781, "learning_rate": 7.872698561247915e-08, "loss": 2.8516, "step": 13020 }, { "epoch": 87.45117845117845, "grad_norm": 155.1238555908203, "learning_rate": 7.790792582820271e-08, "loss": 2.8714, "step": 13030 }, { "epoch": 87.51851851851852, "grad_norm": 99.91925811767578, "learning_rate": 7.709297629177525e-08, "loss": 2.8065, "step": 13040 }, { "epoch": 87.58585858585859, "grad_norm": 113.80371856689453, "learning_rate": 7.62821406358538e-08, "loss": 2.3139, "step": 13050 }, { "epoch": 87.65319865319866, "grad_norm": 51.859046936035156, "learning_rate": 7.547542247475836e-08, "loss": 2.6042, "step": 13060 }, { "epoch": 87.72053872053873, "grad_norm": 86.95726013183594, "learning_rate": 7.46728254044544e-08, "loss": 2.567, "step": 13070 }, { "epoch": 87.78787878787878, "grad_norm": 94.94855499267578, "learning_rate": 7.387435300253798e-08, "loss": 2.2464, "step": 13080 }, { "epoch": 87.85521885521885, "grad_norm": 58.764678955078125, "learning_rate": 7.308000882821964e-08, "loss": 2.696, "step": 13090 }, { "epoch": 87.92255892255892, "grad_norm": 51.88106918334961, "learning_rate": 7.228979642230726e-08, "loss": 2.4746, "step": 13100 }, { "epoch": 87.92255892255892, "eval_loss": 0.34722477197647095, "eval_mae": 0.4127149283885956, "eval_mse": 0.34722480177879333, "eval_r2": 0.2405058741569519, "eval_rmse": 0.5892578398110571, "eval_runtime": 10.7666, "eval_samples_per_second": 441.087, "eval_steps_per_second": 13.839, "step": 13100 }, { "epoch": 87.98989898989899, "grad_norm": 88.5357666015625, "learning_rate": 7.150371930719234e-08, "loss": 2.4055, "step": 13110 }, { "epoch": 88.05387205387206, "grad_norm": 115.56126403808594, "learning_rate": 7.072178098683245e-08, "loss": 2.4482, "step": 13120 }, { "epoch": 88.12121212121212, "grad_norm": 154.42469787597656, "learning_rate": 6.99439849467367e-08, "loss": 2.6993, "step": 13130 }, { "epoch": 88.1885521885522, "grad_norm": 82.355224609375, "learning_rate": 6.917033465395006e-08, "loss": 2.2801, "step": 13140 }, { "epoch": 88.25589225589225, "grad_norm": 203.51788330078125, "learning_rate": 6.84008335570373e-08, "loss": 2.515, "step": 13150 }, { "epoch": 88.32323232323232, "grad_norm": 41.929222106933594, "learning_rate": 6.763548508606854e-08, "loss": 3.0545, "step": 13160 }, { "epoch": 88.39057239057239, "grad_norm": 125.77730560302734, "learning_rate": 6.68742926526028e-08, "loss": 2.6418, "step": 13170 }, { "epoch": 88.45791245791246, "grad_norm": 126.03069305419922, "learning_rate": 6.611725964967441e-08, "loss": 2.453, "step": 13180 }, { "epoch": 88.52525252525253, "grad_norm": 51.66025161743164, "learning_rate": 6.53643894517758e-08, "loss": 3.0179, "step": 13190 }, { "epoch": 88.5925925925926, "grad_norm": 49.16200256347656, "learning_rate": 6.461568541484464e-08, "loss": 2.5849, "step": 13200 }, { "epoch": 88.5925925925926, "eval_loss": 0.3474779725074768, "eval_mae": 0.41315731406211853, "eval_mse": 0.3474779725074768, "eval_r2": 0.23995214700698853, "eval_rmse": 0.5894726223561844, "eval_runtime": 10.7752, "eval_samples_per_second": 440.734, "eval_steps_per_second": 13.828, "step": 13200 }, { "epoch": 88.65993265993266, "grad_norm": 167.75914001464844, "learning_rate": 6.387115087624707e-08, "loss": 2.5147, "step": 13210 }, { "epoch": 88.72727272727273, "grad_norm": 57.40876388549805, "learning_rate": 6.313078915476422e-08, "loss": 2.4067, "step": 13220 }, { "epoch": 88.79461279461279, "grad_norm": 60.56932067871094, "learning_rate": 6.239460355057625e-08, "loss": 2.7004, "step": 13230 }, { "epoch": 88.86195286195286, "grad_norm": 153.38612365722656, "learning_rate": 6.166259734524837e-08, "loss": 2.3396, "step": 13240 }, { "epoch": 88.92929292929293, "grad_norm": 91.0926284790039, "learning_rate": 6.093477380171619e-08, "loss": 2.5503, "step": 13250 }, { "epoch": 88.996632996633, "grad_norm": 73.52567291259766, "learning_rate": 6.021113616427076e-08, "loss": 2.4865, "step": 13260 }, { "epoch": 89.06060606060606, "grad_norm": 170.16407775878906, "learning_rate": 5.949168765854462e-08, "loss": 2.366, "step": 13270 }, { "epoch": 89.12794612794613, "grad_norm": 134.84429931640625, "learning_rate": 5.877643149149669e-08, "loss": 3.0436, "step": 13280 }, { "epoch": 89.1952861952862, "grad_norm": 69.09505462646484, "learning_rate": 5.806537085139907e-08, "loss": 2.6791, "step": 13290 }, { "epoch": 89.26262626262626, "grad_norm": 33.76188659667969, "learning_rate": 5.735850890782157e-08, "loss": 2.8502, "step": 13300 }, { "epoch": 89.26262626262626, "eval_loss": 0.3473193049430847, "eval_mae": 0.4128463864326477, "eval_mse": 0.3473193049430847, "eval_r2": 0.24029916524887085, "eval_rmse": 0.5893380226517586, "eval_runtime": 10.766, "eval_samples_per_second": 441.11, "eval_steps_per_second": 13.84, "step": 13300 }, { "epoch": 89.32996632996633, "grad_norm": 186.82618713378906, "learning_rate": 5.6655848811618664e-08, "loss": 2.4454, "step": 13310 }, { "epoch": 89.3973063973064, "grad_norm": 119.83624267578125, "learning_rate": 5.59573936949147e-08, "loss": 2.3855, "step": 13320 }, { "epoch": 89.46464646464646, "grad_norm": 69.2054672241211, "learning_rate": 5.526314667109044e-08, "loss": 2.6064, "step": 13330 }, { "epoch": 89.53198653198653, "grad_norm": 48.20257568359375, "learning_rate": 5.45731108347689e-08, "loss": 2.8151, "step": 13340 }, { "epoch": 89.5993265993266, "grad_norm": 75.34207153320312, "learning_rate": 5.3887289261801527e-08, "loss": 2.446, "step": 13350 }, { "epoch": 89.66666666666667, "grad_norm": 104.53984069824219, "learning_rate": 5.32056850092546e-08, "loss": 2.5907, "step": 13360 }, { "epoch": 89.73400673400674, "grad_norm": 107.78331756591797, "learning_rate": 5.252830111539597e-08, "loss": 2.6299, "step": 13370 }, { "epoch": 89.8013468013468, "grad_norm": 134.01358032226562, "learning_rate": 5.1855140599680436e-08, "loss": 2.4382, "step": 13380 }, { "epoch": 89.86868686868686, "grad_norm": 140.85659790039062, "learning_rate": 5.118620646273764e-08, "loss": 2.7517, "step": 13390 }, { "epoch": 89.93602693602693, "grad_norm": 113.05899047851562, "learning_rate": 5.052150168635749e-08, "loss": 2.2665, "step": 13400 }, { "epoch": 89.93602693602693, "eval_loss": 0.34772107005119324, "eval_mae": 0.4132920503616333, "eval_mse": 0.34772107005119324, "eval_r2": 0.2394203543663025, "eval_rmse": 0.589678785485109, "eval_runtime": 10.7477, "eval_samples_per_second": 441.861, "eval_steps_per_second": 13.863, "step": 13400 }, { "epoch": 90.0, "grad_norm": 184.91087341308594, "learning_rate": 4.986102923347801e-08, "loss": 2.1383, "step": 13410 }, { "epoch": 90.06734006734007, "grad_norm": 68.77091979980469, "learning_rate": 4.9204792048170965e-08, "loss": 2.6021, "step": 13420 }, { "epoch": 90.13468013468014, "grad_norm": 67.6893539428711, "learning_rate": 4.855279305562987e-08, "loss": 2.5044, "step": 13430 }, { "epoch": 90.20202020202021, "grad_norm": 118.68637084960938, "learning_rate": 4.7905035162155715e-08, "loss": 2.5824, "step": 13440 }, { "epoch": 90.26936026936026, "grad_norm": 122.64277648925781, "learning_rate": 4.7261521255145375e-08, "loss": 2.6056, "step": 13450 }, { "epoch": 90.33670033670033, "grad_norm": 115.50506591796875, "learning_rate": 4.6622254203077526e-08, "loss": 2.5344, "step": 13460 }, { "epoch": 90.4040404040404, "grad_norm": 104.72218322753906, "learning_rate": 4.598723685550088e-08, "loss": 2.7586, "step": 13470 }, { "epoch": 90.47138047138047, "grad_norm": 88.82240295410156, "learning_rate": 4.535647204302029e-08, "loss": 2.6146, "step": 13480 }, { "epoch": 90.53872053872054, "grad_norm": 92.98521423339844, "learning_rate": 4.4729962577285675e-08, "loss": 2.479, "step": 13490 }, { "epoch": 90.60606060606061, "grad_norm": 67.76179504394531, "learning_rate": 4.41077112509779e-08, "loss": 2.6795, "step": 13500 }, { "epoch": 90.60606060606061, "eval_loss": 0.34680259227752686, "eval_mae": 0.41242679953575134, "eval_mse": 0.34680259227752686, "eval_r2": 0.24142932891845703, "eval_rmse": 0.588899475528317, "eval_runtime": 10.7766, "eval_samples_per_second": 440.677, "eval_steps_per_second": 13.826, "step": 13500 }, { "epoch": 90.67340067340068, "grad_norm": 50.78334426879883, "learning_rate": 4.348972083779767e-08, "loss": 2.653, "step": 13510 }, { "epoch": 90.74074074074075, "grad_norm": 98.13428497314453, "learning_rate": 4.2875994092452016e-08, "loss": 2.435, "step": 13520 }, { "epoch": 90.8080808080808, "grad_norm": 56.856170654296875, "learning_rate": 4.2266533750643044e-08, "loss": 2.8283, "step": 13530 }, { "epoch": 90.87542087542087, "grad_norm": 54.5003547668457, "learning_rate": 4.166134252905484e-08, "loss": 2.4098, "step": 13540 }, { "epoch": 90.94276094276094, "grad_norm": 87.96903991699219, "learning_rate": 4.106042312534219e-08, "loss": 2.5251, "step": 13550 }, { "epoch": 91.006734006734, "grad_norm": 67.59268188476562, "learning_rate": 4.046377821811786e-08, "loss": 2.3374, "step": 13560 }, { "epoch": 91.07407407407408, "grad_norm": 79.41356658935547, "learning_rate": 3.987141046694109e-08, "loss": 2.1998, "step": 13570 }, { "epoch": 91.14141414141415, "grad_norm": 79.26526641845703, "learning_rate": 3.928332251230537e-08, "loss": 2.7416, "step": 13580 }, { "epoch": 91.20875420875421, "grad_norm": 104.16319274902344, "learning_rate": 3.869951697562746e-08, "loss": 2.3293, "step": 13590 }, { "epoch": 91.27609427609427, "grad_norm": 116.10478210449219, "learning_rate": 3.811999645923414e-08, "loss": 3.0567, "step": 13600 }, { "epoch": 91.27609427609427, "eval_loss": 0.3469589650630951, "eval_mae": 0.41240930557250977, "eval_mse": 0.3469589650630951, "eval_r2": 0.24108731746673584, "eval_rmse": 0.5890322275250269, "eval_runtime": 10.7732, "eval_samples_per_second": 440.818, "eval_steps_per_second": 13.831, "step": 13600 }, { "epoch": 91.34343434343434, "grad_norm": 152.7045440673828, "learning_rate": 3.754476354635283e-08, "loss": 2.8153, "step": 13610 }, { "epoch": 91.41077441077441, "grad_norm": 142.65982055664062, "learning_rate": 3.6973820801097675e-08, "loss": 2.8465, "step": 13620 }, { "epoch": 91.47811447811448, "grad_norm": 126.82440948486328, "learning_rate": 3.6407170768460336e-08, "loss": 2.2614, "step": 13630 }, { "epoch": 91.54545454545455, "grad_norm": 41.475502014160156, "learning_rate": 3.584481597429678e-08, "loss": 2.5183, "step": 13640 }, { "epoch": 91.61279461279462, "grad_norm": 37.792972564697266, "learning_rate": 3.5286758925317406e-08, "loss": 2.4525, "step": 13650 }, { "epoch": 91.68013468013469, "grad_norm": 169.44155883789062, "learning_rate": 3.473300210907481e-08, "loss": 2.6318, "step": 13660 }, { "epoch": 91.74747474747475, "grad_norm": 126.57239532470703, "learning_rate": 3.418354799395373e-08, "loss": 2.41, "step": 13670 }, { "epoch": 91.81481481481481, "grad_norm": 71.03952026367188, "learning_rate": 3.36383990291591e-08, "loss": 2.7331, "step": 13680 }, { "epoch": 91.88215488215488, "grad_norm": 180.376220703125, "learning_rate": 3.3097557644705784e-08, "loss": 2.8756, "step": 13690 }, { "epoch": 91.94949494949495, "grad_norm": 60.76559829711914, "learning_rate": 3.256102625140733e-08, "loss": 2.3709, "step": 13700 }, { "epoch": 91.94949494949495, "eval_loss": 0.34685754776000977, "eval_mae": 0.41240549087524414, "eval_mse": 0.34685754776000977, "eval_r2": 0.24130922555923462, "eval_rmse": 0.5889461331565136, "eval_runtime": 10.7798, "eval_samples_per_second": 440.548, "eval_steps_per_second": 13.822, "step": 13700 }, { "epoch": 92.01346801346801, "grad_norm": 93.09085083007812, "learning_rate": 3.202880724086543e-08, "loss": 2.3813, "step": 13710 }, { "epoch": 92.08080808080808, "grad_norm": 57.56668472290039, "learning_rate": 3.1500902985459486e-08, "loss": 2.4953, "step": 13720 }, { "epoch": 92.14814814814815, "grad_norm": 52.39466094970703, "learning_rate": 3.0977315838335406e-08, "loss": 2.2694, "step": 13730 }, { "epoch": 92.21548821548822, "grad_norm": 64.15316009521484, "learning_rate": 3.045804813339559e-08, "loss": 2.5552, "step": 13740 }, { "epoch": 92.28282828282828, "grad_norm": 140.4697265625, "learning_rate": 2.99431021852885e-08, "loss": 2.6156, "step": 13750 }, { "epoch": 92.35016835016835, "grad_norm": 23.366674423217773, "learning_rate": 2.9432480289398375e-08, "loss": 2.5514, "step": 13760 }, { "epoch": 92.41750841750842, "grad_norm": 62.201751708984375, "learning_rate": 2.8926184721834502e-08, "loss": 2.4086, "step": 13770 }, { "epoch": 92.48484848484848, "grad_norm": 119.99980926513672, "learning_rate": 2.842421773942172e-08, "loss": 2.3437, "step": 13780 }, { "epoch": 92.55218855218855, "grad_norm": 178.1807861328125, "learning_rate": 2.79265815796903e-08, "loss": 2.8859, "step": 13790 }, { "epoch": 92.61952861952862, "grad_norm": 76.41964721679688, "learning_rate": 2.7433278460865185e-08, "loss": 2.8321, "step": 13800 }, { "epoch": 92.61952861952862, "eval_loss": 0.34695225954055786, "eval_mae": 0.4126594364643097, "eval_mse": 0.34695225954055786, "eval_r2": 0.24110203981399536, "eval_rmse": 0.5890265355147914, "eval_runtime": 10.7763, "eval_samples_per_second": 440.689, "eval_steps_per_second": 13.827, "step": 13800 }, { "epoch": 92.68686868686869, "grad_norm": 161.5650177001953, "learning_rate": 2.694431058185731e-08, "loss": 2.7994, "step": 13810 }, { "epoch": 92.75420875420875, "grad_norm": 136.6820831298828, "learning_rate": 2.645968012225275e-08, "loss": 2.6736, "step": 13820 }, { "epoch": 92.82154882154882, "grad_norm": 60.87177658081055, "learning_rate": 2.5979389242303696e-08, "loss": 2.2656, "step": 13830 }, { "epoch": 92.88888888888889, "grad_norm": 123.61967468261719, "learning_rate": 2.550344008291816e-08, "loss": 2.9236, "step": 13840 }, { "epoch": 92.95622895622895, "grad_norm": 111.02430725097656, "learning_rate": 2.5031834765651404e-08, "loss": 2.4526, "step": 13850 }, { "epoch": 93.02020202020202, "grad_norm": 97.33848571777344, "learning_rate": 2.4564575392695296e-08, "loss": 2.5287, "step": 13860 }, { "epoch": 93.08754208754209, "grad_norm": 145.59591674804688, "learning_rate": 2.4101664046870196e-08, "loss": 2.6097, "step": 13870 }, { "epoch": 93.15488215488216, "grad_norm": 139.09548950195312, "learning_rate": 2.3643102791614412e-08, "loss": 2.8266, "step": 13880 }, { "epoch": 93.22222222222223, "grad_norm": 45.97932815551758, "learning_rate": 2.3188893670976096e-08, "loss": 2.5831, "step": 13890 }, { "epoch": 93.28956228956228, "grad_norm": 26.343008041381836, "learning_rate": 2.2739038709603253e-08, "loss": 2.8777, "step": 13900 }, { "epoch": 93.28956228956228, "eval_loss": 0.3468776047229767, "eval_mae": 0.4126761853694916, "eval_mse": 0.3468776047229767, "eval_r2": 0.24126529693603516, "eval_rmse": 0.588963160751992, "eval_runtime": 10.7884, "eval_samples_per_second": 440.196, "eval_steps_per_second": 13.811, "step": 13900 }, { "epoch": 93.35690235690235, "grad_norm": 124.01888275146484, "learning_rate": 2.2293539912735526e-08, "loss": 2.5797, "step": 13910 }, { "epoch": 93.42424242424242, "grad_norm": 75.62468719482422, "learning_rate": 2.185239926619431e-08, "loss": 2.6626, "step": 13920 }, { "epoch": 93.49158249158249, "grad_norm": 127.34844207763672, "learning_rate": 2.141561873637521e-08, "loss": 2.54, "step": 13930 }, { "epoch": 93.55892255892256, "grad_norm": 109.78040313720703, "learning_rate": 2.09832002702377e-08, "loss": 2.3587, "step": 13940 }, { "epoch": 93.62626262626263, "grad_norm": 184.51112365722656, "learning_rate": 2.0555145795297824e-08, "loss": 2.6932, "step": 13950 }, { "epoch": 93.6936026936027, "grad_norm": 262.2184753417969, "learning_rate": 2.013145721961862e-08, "loss": 2.3811, "step": 13960 }, { "epoch": 93.76094276094275, "grad_norm": 33.800289154052734, "learning_rate": 1.9712136431802473e-08, "loss": 2.49, "step": 13970 }, { "epoch": 93.82828282828282, "grad_norm": 93.89441680908203, "learning_rate": 1.9297185300981677e-08, "loss": 2.5838, "step": 13980 }, { "epoch": 93.89562289562289, "grad_norm": 57.24879455566406, "learning_rate": 1.888660567681133e-08, "loss": 2.4603, "step": 13990 }, { "epoch": 93.96296296296296, "grad_norm": 84.26305389404297, "learning_rate": 1.8480399389459888e-08, "loss": 2.3835, "step": 14000 }, { "epoch": 93.96296296296296, "eval_loss": 0.3470214605331421, "eval_mae": 0.41276276111602783, "eval_mse": 0.3470214605331421, "eval_r2": 0.24095064401626587, "eval_rmse": 0.5890852744154637, "eval_runtime": 10.7882, "eval_samples_per_second": 440.202, "eval_steps_per_second": 13.811, "step": 14000 }, { "epoch": 94.02693602693603, "grad_norm": 35.55366897583008, "learning_rate": 1.8078568249601744e-08, "loss": 2.3872, "step": 14010 }, { "epoch": 94.0942760942761, "grad_norm": 45.747493743896484, "learning_rate": 1.7681114048409105e-08, "loss": 2.9205, "step": 14020 }, { "epoch": 94.16161616161617, "grad_norm": 145.53172302246094, "learning_rate": 1.7288038557543683e-08, "loss": 2.4055, "step": 14030 }, { "epoch": 94.22895622895624, "grad_norm": 96.76238250732422, "learning_rate": 1.6899343529148902e-08, "loss": 2.5424, "step": 14040 }, { "epoch": 94.29629629629629, "grad_norm": 61.41950607299805, "learning_rate": 1.651503069584259e-08, "loss": 2.8539, "step": 14050 }, { "epoch": 94.36363636363636, "grad_norm": 140.43563842773438, "learning_rate": 1.6135101770708202e-08, "loss": 2.3934, "step": 14060 }, { "epoch": 94.43097643097643, "grad_norm": 123.41638946533203, "learning_rate": 1.575955844728849e-08, "loss": 2.4463, "step": 14070 }, { "epoch": 94.4983164983165, "grad_norm": 128.444091796875, "learning_rate": 1.538840239957684e-08, "loss": 2.1504, "step": 14080 }, { "epoch": 94.56565656565657, "grad_norm": 215.8132781982422, "learning_rate": 1.5021635282010282e-08, "loss": 2.847, "step": 14090 }, { "epoch": 94.63299663299664, "grad_norm": 75.29983520507812, "learning_rate": 1.4659258729462166e-08, "loss": 2.6011, "step": 14100 }, { "epoch": 94.63299663299664, "eval_loss": 0.34699544310569763, "eval_mae": 0.41266942024230957, "eval_mse": 0.34699544310569763, "eval_r2": 0.24100756645202637, "eval_rmse": 0.589063191097269, "eval_runtime": 10.7774, "eval_samples_per_second": 440.645, "eval_steps_per_second": 13.825, "step": 14100 }, { "epoch": 94.7003367003367, "grad_norm": 130.86758422851562, "learning_rate": 1.4301274357234938e-08, "loss": 3.0231, "step": 14110 }, { "epoch": 94.76767676767676, "grad_norm": 86.06951141357422, "learning_rate": 1.3947683761052375e-08, "loss": 2.5564, "step": 14120 }, { "epoch": 94.83501683501683, "grad_norm": 112.08048248291016, "learning_rate": 1.3598488517053475e-08, "loss": 2.5604, "step": 14130 }, { "epoch": 94.9023569023569, "grad_norm": 67.45586395263672, "learning_rate": 1.325369018178435e-08, "loss": 2.4603, "step": 14140 }, { "epoch": 94.96969696969697, "grad_norm": 105.34934997558594, "learning_rate": 1.2913290292192013e-08, "loss": 2.4587, "step": 14150 }, { "epoch": 95.03367003367003, "grad_norm": 80.00907135009766, "learning_rate": 1.2577290365617166e-08, "loss": 2.3007, "step": 14160 }, { "epoch": 95.1010101010101, "grad_norm": 47.87882995605469, "learning_rate": 1.2245691899787747e-08, "loss": 2.8171, "step": 14170 }, { "epoch": 95.16835016835017, "grad_norm": 143.7111358642578, "learning_rate": 1.191849637281206e-08, "loss": 2.5715, "step": 14180 }, { "epoch": 95.23569023569024, "grad_norm": 82.27194213867188, "learning_rate": 1.1595705243171994e-08, "loss": 2.6184, "step": 14190 }, { "epoch": 95.3030303030303, "grad_norm": 137.38604736328125, "learning_rate": 1.1277319949716923e-08, "loss": 2.6999, "step": 14200 }, { "epoch": 95.3030303030303, "eval_loss": 0.34684568643569946, "eval_mae": 0.41250860691070557, "eval_mse": 0.34684568643569946, "eval_r2": 0.24133515357971191, "eval_rmse": 0.5889360631135603, "eval_runtime": 10.7921, "eval_samples_per_second": 440.043, "eval_steps_per_second": 13.806, "step": 14200 }, { "epoch": 95.37037037037037, "grad_norm": 52.690067291259766, "learning_rate": 1.0963341911656932e-08, "loss": 2.6438, "step": 14210 }, { "epoch": 95.43771043771044, "grad_norm": 47.405982971191406, "learning_rate": 1.0653772528556926e-08, "loss": 2.7234, "step": 14220 }, { "epoch": 95.5050505050505, "grad_norm": 69.25318908691406, "learning_rate": 1.0348613180329757e-08, "loss": 2.4151, "step": 14230 }, { "epoch": 95.57239057239057, "grad_norm": 106.15676879882812, "learning_rate": 1.004786522723089e-08, "loss": 2.3976, "step": 14240 }, { "epoch": 95.63973063973064, "grad_norm": 104.64041137695312, "learning_rate": 9.751530009851406e-09, "loss": 2.8673, "step": 14250 }, { "epoch": 95.70707070707071, "grad_norm": 139.20193481445312, "learning_rate": 9.45960884911301e-09, "loss": 2.7274, "step": 14260 }, { "epoch": 95.77441077441077, "grad_norm": 174.57992553710938, "learning_rate": 9.172103046261038e-09, "loss": 2.4151, "step": 14270 }, { "epoch": 95.84175084175084, "grad_norm": 81.41283416748047, "learning_rate": 8.889013882859897e-09, "loss": 2.4557, "step": 14280 }, { "epoch": 95.9090909090909, "grad_norm": 121.30021667480469, "learning_rate": 8.610342620786193e-09, "loss": 2.6063, "step": 14290 }, { "epoch": 95.97643097643098, "grad_norm": 131.5374298095703, "learning_rate": 8.336090502224059e-09, "loss": 2.3372, "step": 14300 }, { "epoch": 95.97643097643098, "eval_loss": 0.34685662388801575, "eval_mae": 0.41250181198120117, "eval_mse": 0.34685662388801575, "eval_r2": 0.2413111925125122, "eval_rmse": 0.5889453488126175, "eval_runtime": 10.7835, "eval_samples_per_second": 440.395, "eval_steps_per_second": 13.817, "step": 14300 }, { "epoch": 96.04040404040404, "grad_norm": 51.48731231689453, "learning_rate": 8.066258749658828e-09, "loss": 2.4983, "step": 14310 }, { "epoch": 96.10774410774411, "grad_norm": 39.593284606933594, "learning_rate": 7.800848565872154e-09, "loss": 2.3123, "step": 14320 }, { "epoch": 96.17508417508418, "grad_norm": 107.15096282958984, "learning_rate": 7.539861133936342e-09, "loss": 2.6538, "step": 14330 }, { "epoch": 96.24242424242425, "grad_norm": 98.97097778320312, "learning_rate": 7.283297617209361e-09, "loss": 2.8702, "step": 14340 }, { "epoch": 96.3097643097643, "grad_norm": 63.26054000854492, "learning_rate": 7.03115915932917e-09, "loss": 2.4383, "step": 14350 }, { "epoch": 96.37710437710437, "grad_norm": 111.96060943603516, "learning_rate": 6.783446884209176e-09, "loss": 2.4878, "step": 14360 }, { "epoch": 96.44444444444444, "grad_norm": 94.0950698852539, "learning_rate": 6.540161896032903e-09, "loss": 2.2609, "step": 14370 }, { "epoch": 96.51178451178451, "grad_norm": 147.51991271972656, "learning_rate": 6.301305279248881e-09, "loss": 2.4504, "step": 14380 }, { "epoch": 96.57912457912458, "grad_norm": 57.300350189208984, "learning_rate": 6.066878098566541e-09, "loss": 2.4401, "step": 14390 }, { "epoch": 96.64646464646465, "grad_norm": 128.87161254882812, "learning_rate": 5.836881398950666e-09, "loss": 2.4265, "step": 14400 }, { "epoch": 96.64646464646465, "eval_loss": 0.3467996120452881, "eval_mae": 0.4123893678188324, "eval_mse": 0.3467996120452881, "eval_r2": 0.24143588542938232, "eval_rmse": 0.5888969451825066, "eval_runtime": 10.7721, "eval_samples_per_second": 440.863, "eval_steps_per_second": 13.832, "step": 14400 }, { "epoch": 96.71380471380472, "grad_norm": 231.01406860351562, "learning_rate": 5.611316205617056e-09, "loss": 3.0365, "step": 14410 }, { "epoch": 96.78114478114477, "grad_norm": 58.1760368347168, "learning_rate": 5.3901835240282026e-09, "loss": 2.7198, "step": 14420 }, { "epoch": 96.84848484848484, "grad_norm": 33.50977325439453, "learning_rate": 5.1734843398881785e-09, "loss": 2.6241, "step": 14430 }, { "epoch": 96.91582491582491, "grad_norm": 26.469890594482422, "learning_rate": 4.96121961913909e-09, "loss": 2.6771, "step": 14440 }, { "epoch": 96.98316498316498, "grad_norm": 128.8701171875, "learning_rate": 4.753390307955962e-09, "loss": 2.6316, "step": 14450 }, { "epoch": 97.04713804713805, "grad_norm": 187.8650360107422, "learning_rate": 4.549997332743194e-09, "loss": 2.513, "step": 14460 }, { "epoch": 97.11447811447812, "grad_norm": 81.4962387084961, "learning_rate": 4.3510416001296676e-09, "loss": 2.2181, "step": 14470 }, { "epoch": 97.18181818181819, "grad_norm": 155.7507781982422, "learning_rate": 4.156523996965533e-09, "loss": 2.6207, "step": 14480 }, { "epoch": 97.24915824915826, "grad_norm": 108.34071350097656, "learning_rate": 3.966445390317652e-09, "loss": 2.4975, "step": 14490 }, { "epoch": 97.31649831649831, "grad_norm": 111.2857666015625, "learning_rate": 3.78080662746616e-09, "loss": 2.8095, "step": 14500 }, { "epoch": 97.31649831649831, "eval_loss": 0.3468829393386841, "eval_mae": 0.4125968813896179, "eval_mse": 0.34688296914100647, "eval_r2": 0.24125361442565918, "eval_rmse": 0.5889677148545636, "eval_runtime": 10.7792, "eval_samples_per_second": 440.571, "eval_steps_per_second": 13.823, "step": 14500 }, { "epoch": 97.38383838383838, "grad_norm": 23.653133392333984, "learning_rate": 3.599608535900356e-09, "loss": 2.6111, "step": 14510 }, { "epoch": 97.45117845117845, "grad_norm": 160.30044555664062, "learning_rate": 3.42285192331504e-09, "loss": 2.6715, "step": 14520 }, { "epoch": 97.51851851851852, "grad_norm": 80.36695861816406, "learning_rate": 3.250537577607071e-09, "loss": 2.4185, "step": 14530 }, { "epoch": 97.58585858585859, "grad_norm": 84.5897216796875, "learning_rate": 3.082666266872036e-09, "loss": 2.7639, "step": 14540 }, { "epoch": 97.65319865319866, "grad_norm": 32.55792999267578, "learning_rate": 2.9192387394002536e-09, "loss": 2.4913, "step": 14550 }, { "epoch": 97.72053872053873, "grad_norm": 122.0492935180664, "learning_rate": 2.7602557236738876e-09, "loss": 2.7744, "step": 14560 }, { "epoch": 97.78787878787878, "grad_norm": 50.71971893310547, "learning_rate": 2.6057179283636155e-09, "loss": 2.3903, "step": 14570 }, { "epoch": 97.85521885521885, "grad_norm": 46.23564147949219, "learning_rate": 2.4556260423254093e-09, "loss": 2.5568, "step": 14580 }, { "epoch": 97.92255892255892, "grad_norm": 134.7054901123047, "learning_rate": 2.3099807345975386e-09, "loss": 2.5211, "step": 14590 }, { "epoch": 97.98989898989899, "grad_norm": 76.62164306640625, "learning_rate": 2.1687826543975718e-09, "loss": 2.594, "step": 14600 }, { "epoch": 97.98989898989899, "eval_loss": 0.3467029631137848, "eval_mae": 0.41238775849342346, "eval_mse": 0.3467029631137848, "eval_r2": 0.2416473627090454, "eval_rmse": 0.5888148801735439, "eval_runtime": 10.7681, "eval_samples_per_second": 441.026, "eval_steps_per_second": 13.837, "step": 14600 }, { "epoch": 98.05387205387206, "grad_norm": 202.23507690429688, "learning_rate": 2.0320324311192684e-09, "loss": 2.3279, "step": 14610 }, { "epoch": 98.12121212121212, "grad_norm": 108.2766342163086, "learning_rate": 1.8997306743301357e-09, "loss": 2.3388, "step": 14620 }, { "epoch": 98.1885521885522, "grad_norm": 110.66009521484375, "learning_rate": 1.7718779737683209e-09, "loss": 2.8302, "step": 14630 }, { "epoch": 98.25589225589225, "grad_norm": 36.240394592285156, "learning_rate": 1.6484748993406129e-09, "loss": 2.4002, "step": 14640 }, { "epoch": 98.32323232323232, "grad_norm": 54.57324981689453, "learning_rate": 1.529522001118999e-09, "loss": 2.559, "step": 14650 }, { "epoch": 98.39057239057239, "grad_norm": 185.8697509765625, "learning_rate": 1.415019809339002e-09, "loss": 2.5756, "step": 14660 }, { "epoch": 98.45791245791246, "grad_norm": 118.36250305175781, "learning_rate": 1.3049688343970134e-09, "loss": 3.1575, "step": 14670 }, { "epoch": 98.52525252525253, "grad_norm": 52.261314392089844, "learning_rate": 1.199369566847741e-09, "loss": 2.4698, "step": 14680 }, { "epoch": 98.5925925925926, "grad_norm": 195.0918426513672, "learning_rate": 1.098222477402655e-09, "loss": 2.5432, "step": 14690 }, { "epoch": 98.65993265993266, "grad_norm": 211.048095703125, "learning_rate": 1.0015280169272112e-09, "loss": 2.8749, "step": 14700 }, { "epoch": 98.65993265993266, "eval_loss": 0.34683534502983093, "eval_mae": 0.41251975297927856, "eval_mse": 0.34683534502983093, "eval_r2": 0.2413577437400818, "eval_rmse": 0.5889272833124909, "eval_runtime": 10.786, "eval_samples_per_second": 440.292, "eval_steps_per_second": 13.814, "step": 14700 }, { "epoch": 98.72727272727273, "grad_norm": 179.87393188476562, "learning_rate": 9.092866164395197e-10, "loss": 2.389, "step": 14710 }, { "epoch": 98.79461279461279, "grad_norm": 124.19705963134766, "learning_rate": 8.214986871076801e-10, "loss": 2.5682, "step": 14720 }, { "epoch": 98.86195286195286, "grad_norm": 40.926422119140625, "learning_rate": 7.381646202485603e-10, "loss": 2.6029, "step": 14730 }, { "epoch": 98.92929292929293, "grad_norm": 77.28518676757812, "learning_rate": 6.592847873259089e-10, "loss": 2.4443, "step": 14740 }, { "epoch": 98.996632996633, "grad_norm": 206.981201171875, "learning_rate": 5.848595399483569e-10, "loss": 2.4332, "step": 14750 }, { "epoch": 99.06060606060606, "grad_norm": 78.53069305419922, "learning_rate": 5.148892098683077e-10, "loss": 2.7022, "step": 14760 }, { "epoch": 99.12794612794613, "grad_norm": 74.92552947998047, "learning_rate": 4.493741089802716e-10, "loss": 2.6108, "step": 14770 }, { "epoch": 99.1952861952862, "grad_norm": 45.64195251464844, "learning_rate": 3.883145293193113e-10, "loss": 2.5439, "step": 14780 }, { "epoch": 99.26262626262626, "grad_norm": 167.02183532714844, "learning_rate": 3.31710743060043e-10, "loss": 2.8375, "step": 14790 }, { "epoch": 99.32996632996633, "grad_norm": 108.00308990478516, "learning_rate": 2.7956300251519293e-10, "loss": 2.3331, "step": 14800 }, { "epoch": 99.32996632996633, "eval_loss": 0.3467314839363098, "eval_mae": 0.4125092327594757, "eval_mse": 0.3467314839363098, "eval_r2": 0.24158495664596558, "eval_rmse": 0.5888390985119023, "eval_runtime": 10.7943, "eval_samples_per_second": 439.955, "eval_steps_per_second": 13.804, "step": 14800 }, { "epoch": 99.3973063973064, "grad_norm": 177.54896545410156, "learning_rate": 2.3187154013459832e-10, "loss": 2.2471, "step": 14810 }, { "epoch": 99.46464646464646, "grad_norm": 70.6279296875, "learning_rate": 1.8863656850409694e-10, "loss": 2.746, "step": 14820 }, { "epoch": 99.53198653198653, "grad_norm": 66.32245635986328, "learning_rate": 1.498582803446391e-10, "loss": 2.5736, "step": 14830 }, { "epoch": 99.5993265993266, "grad_norm": 187.3055419921875, "learning_rate": 1.1553684851139944e-10, "loss": 2.4884, "step": 14840 }, { "epoch": 99.66666666666667, "grad_norm": 52.02140808105469, "learning_rate": 8.567242599299973e-11, "loss": 2.5821, "step": 14850 }, { "epoch": 99.73400673400674, "grad_norm": 123.63912963867188, "learning_rate": 6.026514591073173e-11, "loss": 2.4584, "step": 14860 }, { "epoch": 99.8013468013468, "grad_norm": 101.87879180908203, "learning_rate": 3.931512151833516e-11, "loss": 2.626, "step": 14870 }, { "epoch": 99.86868686868686, "grad_norm": 114.61425018310547, "learning_rate": 2.282244620088747e-11, "loss": 2.5316, "step": 14880 }, { "epoch": 99.93602693602693, "grad_norm": 90.33799743652344, "learning_rate": 1.078719347502588e-11, "loss": 2.812, "step": 14890 }, { "epoch": 100.0, "grad_norm": 155.6533660888672, "learning_rate": 3.20941698805921e-12, "loss": 2.2898, "step": 14900 }, { "epoch": 100.0, "eval_loss": 0.3469623923301697, "eval_mae": 0.41268274188041687, "eval_mse": 0.3469623923301697, "eval_r2": 0.24107986688613892, "eval_rmse": 0.589035136753462, "eval_runtime": 10.7809, "eval_samples_per_second": 440.5, "eval_steps_per_second": 13.821, "step": 14900 }, { "epoch": 100.06734006734007, "grad_norm": 51.282894134521484, "learning_rate": 1.0012659201401588e-06, "loss": 2.4284, "step": 14910 }, { "epoch": 100.13468013468014, "grad_norm": 68.11772155761719, "learning_rate": 1.0002109867448143e-06, "loss": 3.0354, "step": 14920 }, { "epoch": 100.20202020202021, "grad_norm": 180.70603942871094, "learning_rate": 9.991560531146654e-07, "loss": 2.6084, "step": 14930 }, { "epoch": 100.26936026936026, "grad_norm": 95.84480285644531, "learning_rate": 9.981011204237323e-07, "loss": 2.7974, "step": 14940 }, { "epoch": 100.33670033670033, "grad_norm": 223.6320343017578, "learning_rate": 9.970461898460342e-07, "loss": 2.746, "step": 14950 }, { "epoch": 100.4040404040404, "grad_norm": 201.29379272460938, "learning_rate": 9.959912625555875e-07, "loss": 2.6952, "step": 14960 }, { "epoch": 100.47138047138047, "grad_norm": 110.51984405517578, "learning_rate": 9.949363397264047e-07, "loss": 2.5576, "step": 14970 }, { "epoch": 100.53872053872054, "grad_norm": 248.5704803466797, "learning_rate": 9.938814225324937e-07, "loss": 2.3856, "step": 14980 }, { "epoch": 100.60606060606061, "grad_norm": 115.52710723876953, "learning_rate": 9.928265121478562e-07, "loss": 2.6929, "step": 14990 }, { "epoch": 100.67340067340068, "grad_norm": 153.52308654785156, "learning_rate": 9.917716097464865e-07, "loss": 2.3223, "step": 15000 }, { "epoch": 100.67340067340068, "eval_loss": 0.34771957993507385, "eval_mae": 0.4127788841724396, "eval_mse": 0.34771957993507385, "eval_r2": 0.23942363262176514, "eval_rmse": 0.5896775219855966, "eval_runtime": 10.7367, "eval_samples_per_second": 442.317, "eval_steps_per_second": 13.878, "step": 15000 }, { "epoch": 100.74074074074075, "grad_norm": 176.5908203125, "learning_rate": 9.907167165023695e-07, "loss": 2.7388, "step": 15010 }, { "epoch": 100.8080808080808, "grad_norm": 189.6057586669922, "learning_rate": 9.8966183358948e-07, "loss": 2.5335, "step": 15020 }, { "epoch": 100.87542087542087, "grad_norm": 45.85624313354492, "learning_rate": 9.886069621817815e-07, "loss": 2.5321, "step": 15030 }, { "epoch": 100.94276094276094, "grad_norm": 176.9825439453125, "learning_rate": 9.875521034532252e-07, "loss": 2.5397, "step": 15040 }, { "epoch": 101.006734006734, "grad_norm": 194.88076782226562, "learning_rate": 9.86497258577747e-07, "loss": 2.4793, "step": 15050 }, { "epoch": 101.07407407407408, "grad_norm": 220.79579162597656, "learning_rate": 9.854424287292684e-07, "loss": 2.8783, "step": 15060 }, { "epoch": 101.14141414141415, "grad_norm": 215.52149963378906, "learning_rate": 9.843876150816933e-07, "loss": 2.3975, "step": 15070 }, { "epoch": 101.20875420875421, "grad_norm": 121.0874252319336, "learning_rate": 9.833328188089084e-07, "loss": 2.715, "step": 15080 }, { "epoch": 101.27609427609427, "grad_norm": 83.642822265625, "learning_rate": 9.822780410847806e-07, "loss": 2.4962, "step": 15090 }, { "epoch": 101.34343434343434, "grad_norm": 232.3785858154297, "learning_rate": 9.812232830831567e-07, "loss": 2.5608, "step": 15100 }, { "epoch": 101.34343434343434, "eval_loss": 0.3489612340927124, "eval_mae": 0.41429275274276733, "eval_mse": 0.3489612340927124, "eval_r2": 0.23670774698257446, "eval_rmse": 0.5907294085219665, "eval_runtime": 10.6982, "eval_samples_per_second": 443.907, "eval_steps_per_second": 13.928, "step": 15100 }, { "epoch": 101.41077441077441, "grad_norm": 109.08354949951172, "learning_rate": 9.801685459778607e-07, "loss": 2.6033, "step": 15110 }, { "epoch": 101.47811447811448, "grad_norm": 215.20945739746094, "learning_rate": 9.791138309426935e-07, "loss": 2.6829, "step": 15120 }, { "epoch": 101.54545454545455, "grad_norm": 177.8978271484375, "learning_rate": 9.780591391514327e-07, "loss": 2.7898, "step": 15130 }, { "epoch": 101.61279461279462, "grad_norm": 161.1246795654297, "learning_rate": 9.770044717778284e-07, "loss": 2.6622, "step": 15140 }, { "epoch": 101.68013468013469, "grad_norm": 149.11569213867188, "learning_rate": 9.759498299956039e-07, "loss": 2.5702, "step": 15150 }, { "epoch": 101.74747474747475, "grad_norm": 49.866424560546875, "learning_rate": 9.748952149784547e-07, "loss": 2.5379, "step": 15160 }, { "epoch": 101.81481481481481, "grad_norm": 70.47093963623047, "learning_rate": 9.738406279000462e-07, "loss": 2.2951, "step": 15170 }, { "epoch": 101.88215488215488, "grad_norm": 54.86783981323242, "learning_rate": 9.727860699340129e-07, "loss": 3.0151, "step": 15180 }, { "epoch": 101.94949494949495, "grad_norm": 52.50375747680664, "learning_rate": 9.717315422539555e-07, "loss": 2.4115, "step": 15190 }, { "epoch": 102.01346801346801, "grad_norm": 169.0048828125, "learning_rate": 9.706770460334432e-07, "loss": 2.4218, "step": 15200 }, { "epoch": 102.01346801346801, "eval_loss": 0.35127919912338257, "eval_mae": 0.4178565442562103, "eval_mse": 0.35127919912338257, "eval_r2": 0.23163753747940063, "eval_rmse": 0.5926881128581731, "eval_runtime": 10.7037, "eval_samples_per_second": 443.679, "eval_steps_per_second": 13.92, "step": 15200 }, { "epoch": 102.08080808080808, "grad_norm": 82.5206069946289, "learning_rate": 9.696225824460088e-07, "loss": 2.3593, "step": 15210 }, { "epoch": 102.14814814814815, "grad_norm": 53.307960510253906, "learning_rate": 9.68568152665149e-07, "loss": 2.6749, "step": 15220 }, { "epoch": 102.21548821548822, "grad_norm": 172.3877716064453, "learning_rate": 9.675137578643229e-07, "loss": 2.6372, "step": 15230 }, { "epoch": 102.28282828282828, "grad_norm": 146.2075653076172, "learning_rate": 9.664593992169507e-07, "loss": 2.5724, "step": 15240 }, { "epoch": 102.35016835016835, "grad_norm": 135.55899047851562, "learning_rate": 9.654050778964128e-07, "loss": 2.8974, "step": 15250 }, { "epoch": 102.41750841750842, "grad_norm": 103.7734146118164, "learning_rate": 9.643507950760474e-07, "loss": 2.4311, "step": 15260 }, { "epoch": 102.48484848484848, "grad_norm": 85.87439727783203, "learning_rate": 9.632965519291499e-07, "loss": 2.4065, "step": 15270 }, { "epoch": 102.55218855218855, "grad_norm": 101.40847778320312, "learning_rate": 9.62242349628972e-07, "loss": 2.7117, "step": 15280 }, { "epoch": 102.61952861952862, "grad_norm": 76.53858947753906, "learning_rate": 9.6118818934872e-07, "loss": 2.5698, "step": 15290 }, { "epoch": 102.68686868686869, "grad_norm": 81.28172302246094, "learning_rate": 9.601340722615526e-07, "loss": 2.794, "step": 15300 }, { "epoch": 102.68686868686869, "eval_loss": 0.3463557958602905, "eval_mae": 0.41216346621513367, "eval_mse": 0.3463557958602905, "eval_r2": 0.2424067258834839, "eval_rmse": 0.5885200046390017, "eval_runtime": 10.724, "eval_samples_per_second": 442.84, "eval_steps_per_second": 13.894, "step": 15300 }, { "epoch": 102.75420875420875, "grad_norm": 59.54379653930664, "learning_rate": 9.590799995405811e-07, "loss": 2.6378, "step": 15310 }, { "epoch": 102.82154882154882, "grad_norm": 125.50486755371094, "learning_rate": 9.580259723588675e-07, "loss": 2.484, "step": 15320 }, { "epoch": 102.88888888888889, "grad_norm": 57.731040954589844, "learning_rate": 9.569719918894228e-07, "loss": 2.7098, "step": 15330 }, { "epoch": 102.95622895622895, "grad_norm": 84.63851165771484, "learning_rate": 9.559180593052065e-07, "loss": 2.5475, "step": 15340 }, { "epoch": 103.02020202020202, "grad_norm": 102.74390411376953, "learning_rate": 9.548641757791237e-07, "loss": 2.165, "step": 15350 }, { "epoch": 103.08754208754209, "grad_norm": 100.70092010498047, "learning_rate": 9.538103424840263e-07, "loss": 2.4805, "step": 15360 }, { "epoch": 103.15488215488216, "grad_norm": 163.60775756835938, "learning_rate": 9.527565605927091e-07, "loss": 2.6997, "step": 15370 }, { "epoch": 103.22222222222223, "grad_norm": 355.04559326171875, "learning_rate": 9.517028312779105e-07, "loss": 2.7483, "step": 15380 }, { "epoch": 103.28956228956228, "grad_norm": 215.8573455810547, "learning_rate": 9.506491557123103e-07, "loss": 2.6356, "step": 15390 }, { "epoch": 103.35690235690235, "grad_norm": 51.41034698486328, "learning_rate": 9.495955350685283e-07, "loss": 2.594, "step": 15400 }, { "epoch": 103.35690235690235, "eval_loss": 0.34469521045684814, "eval_mae": 0.4122921824455261, "eval_mse": 0.34469521045684814, "eval_r2": 0.24603891372680664, "eval_rmse": 0.5871074948055494, "eval_runtime": 10.6984, "eval_samples_per_second": 443.898, "eval_steps_per_second": 13.927, "step": 15400 }, { "epoch": 103.42424242424242, "grad_norm": 68.814453125, "learning_rate": 9.485419705191229e-07, "loss": 2.6368, "step": 15410 }, { "epoch": 103.49158249158249, "grad_norm": 275.557373046875, "learning_rate": 9.474884632365905e-07, "loss": 2.4322, "step": 15420 }, { "epoch": 103.55892255892256, "grad_norm": 193.4994659423828, "learning_rate": 9.464350143933636e-07, "loss": 2.5418, "step": 15430 }, { "epoch": 103.62626262626263, "grad_norm": 100.95218658447266, "learning_rate": 9.453816251618101e-07, "loss": 2.5262, "step": 15440 }, { "epoch": 103.6936026936027, "grad_norm": 117.49261474609375, "learning_rate": 9.443282967142304e-07, "loss": 3.0049, "step": 15450 }, { "epoch": 103.76094276094275, "grad_norm": 133.29698181152344, "learning_rate": 9.432750302228586e-07, "loss": 2.6616, "step": 15460 }, { "epoch": 103.82828282828282, "grad_norm": 61.78145980834961, "learning_rate": 9.42221826859859e-07, "loss": 2.5844, "step": 15470 }, { "epoch": 103.89562289562289, "grad_norm": 78.81958770751953, "learning_rate": 9.411686877973262e-07, "loss": 2.3955, "step": 15480 }, { "epoch": 103.96296296296296, "grad_norm": 54.84867858886719, "learning_rate": 9.401156142072824e-07, "loss": 2.6447, "step": 15490 }, { "epoch": 104.02693602693603, "grad_norm": 85.4661865234375, "learning_rate": 9.390626072616778e-07, "loss": 2.7438, "step": 15500 }, { "epoch": 104.02693602693603, "eval_loss": 0.35134318470954895, "eval_mae": 0.41826972365379333, "eval_mse": 0.35134318470954895, "eval_r2": 0.2314976453781128, "eval_rmse": 0.5927420895377254, "eval_runtime": 10.6847, "eval_samples_per_second": 444.465, "eval_steps_per_second": 13.945, "step": 15500 }, { "epoch": 104.0942760942761, "grad_norm": 101.04560089111328, "learning_rate": 9.380096681323884e-07, "loss": 2.3766, "step": 15510 }, { "epoch": 104.16161616161617, "grad_norm": 88.10941314697266, "learning_rate": 9.369567979912141e-07, "loss": 2.5801, "step": 15520 }, { "epoch": 104.22895622895624, "grad_norm": 214.5567169189453, "learning_rate": 9.359039980098783e-07, "loss": 2.3436, "step": 15530 }, { "epoch": 104.29629629629629, "grad_norm": 271.29388427734375, "learning_rate": 9.348512693600265e-07, "loss": 2.4736, "step": 15540 }, { "epoch": 104.36363636363636, "grad_norm": 207.13516235351562, "learning_rate": 9.337986132132248e-07, "loss": 3.2452, "step": 15550 }, { "epoch": 104.43097643097643, "grad_norm": 176.328125, "learning_rate": 9.327460307409585e-07, "loss": 2.4257, "step": 15560 }, { "epoch": 104.4983164983165, "grad_norm": 48.8036003112793, "learning_rate": 9.316935231146307e-07, "loss": 2.5888, "step": 15570 }, { "epoch": 104.56565656565657, "grad_norm": 89.17958068847656, "learning_rate": 9.306410915055616e-07, "loss": 3.1988, "step": 15580 }, { "epoch": 104.63299663299664, "grad_norm": 164.42042541503906, "learning_rate": 9.295887370849866e-07, "loss": 2.6475, "step": 15590 }, { "epoch": 104.7003367003367, "grad_norm": 64.81507873535156, "learning_rate": 9.285364610240555e-07, "loss": 2.4145, "step": 15600 }, { "epoch": 104.7003367003367, "eval_loss": 0.3456327021121979, "eval_mae": 0.41211187839508057, "eval_mse": 0.3456326723098755, "eval_r2": 0.24398839473724365, "eval_rmse": 0.5879053259750888, "eval_runtime": 10.7054, "eval_samples_per_second": 443.61, "eval_steps_per_second": 13.918, "step": 15600 }, { "epoch": 104.76767676767676, "grad_norm": 116.70762634277344, "learning_rate": 9.274842644938301e-07, "loss": 2.4509, "step": 15610 }, { "epoch": 104.83501683501683, "grad_norm": 68.90277862548828, "learning_rate": 9.26432148665285e-07, "loss": 2.2675, "step": 15620 }, { "epoch": 104.9023569023569, "grad_norm": 121.92566680908203, "learning_rate": 9.253801147093031e-07, "loss": 2.4513, "step": 15630 }, { "epoch": 104.96969696969697, "grad_norm": 147.5644073486328, "learning_rate": 9.24328163796678e-07, "loss": 2.6448, "step": 15640 }, { "epoch": 105.03367003367003, "grad_norm": 75.59476470947266, "learning_rate": 9.232762970981101e-07, "loss": 2.618, "step": 15650 }, { "epoch": 105.1010101010101, "grad_norm": 156.31373596191406, "learning_rate": 9.222245157842061e-07, "loss": 2.703, "step": 15660 }, { "epoch": 105.16835016835017, "grad_norm": 195.57327270507812, "learning_rate": 9.211728210254776e-07, "loss": 2.3585, "step": 15670 }, { "epoch": 105.23569023569024, "grad_norm": 243.67552185058594, "learning_rate": 9.201212139923402e-07, "loss": 2.9865, "step": 15680 }, { "epoch": 105.3030303030303, "grad_norm": 73.97561645507812, "learning_rate": 9.190696958551117e-07, "loss": 2.332, "step": 15690 }, { "epoch": 105.37037037037037, "grad_norm": 40.377281188964844, "learning_rate": 9.180182677840108e-07, "loss": 2.6911, "step": 15700 }, { "epoch": 105.37037037037037, "eval_loss": 0.3465757369995117, "eval_mae": 0.41341766715049744, "eval_mse": 0.3465757369995117, "eval_r2": 0.24192559719085693, "eval_rmse": 0.5887068345106177, "eval_runtime": 10.7064, "eval_samples_per_second": 443.565, "eval_steps_per_second": 13.917, "step": 15700 }, { "epoch": 105.43771043771044, "grad_norm": 81.1137466430664, "learning_rate": 9.169669309491559e-07, "loss": 2.5382, "step": 15710 }, { "epoch": 105.5050505050505, "grad_norm": 132.62954711914062, "learning_rate": 9.159156865205645e-07, "loss": 2.9429, "step": 15720 }, { "epoch": 105.57239057239057, "grad_norm": 121.50292205810547, "learning_rate": 9.148645356681508e-07, "loss": 2.5877, "step": 15730 }, { "epoch": 105.63973063973064, "grad_norm": 153.1201629638672, "learning_rate": 9.138134795617245e-07, "loss": 2.5512, "step": 15740 }, { "epoch": 105.70707070707071, "grad_norm": 88.9843978881836, "learning_rate": 9.127625193709905e-07, "loss": 2.4412, "step": 15750 }, { "epoch": 105.77441077441077, "grad_norm": 60.485107421875, "learning_rate": 9.117116562655469e-07, "loss": 2.257, "step": 15760 }, { "epoch": 105.84175084175084, "grad_norm": 184.23699951171875, "learning_rate": 9.106608914148834e-07, "loss": 2.5151, "step": 15770 }, { "epoch": 105.9090909090909, "grad_norm": 168.39891052246094, "learning_rate": 9.096102259883805e-07, "loss": 2.4967, "step": 15780 }, { "epoch": 105.97643097643098, "grad_norm": 83.89433288574219, "learning_rate": 9.085596611553078e-07, "loss": 2.4156, "step": 15790 }, { "epoch": 106.04040404040404, "grad_norm": 280.8480224609375, "learning_rate": 9.075091980848236e-07, "loss": 2.5582, "step": 15800 }, { "epoch": 106.04040404040404, "eval_loss": 0.3493236303329468, "eval_mae": 0.41605648398399353, "eval_mse": 0.3493236303329468, "eval_r2": 0.23591506481170654, "eval_rmse": 0.5910360651711085, "eval_runtime": 10.6904, "eval_samples_per_second": 444.229, "eval_steps_per_second": 13.938, "step": 15800 }, { "epoch": 106.10774410774411, "grad_norm": 289.12109375, "learning_rate": 9.064588379459727e-07, "loss": 2.4125, "step": 15810 }, { "epoch": 106.17508417508418, "grad_norm": 105.97933959960938, "learning_rate": 9.054085819076849e-07, "loss": 2.4003, "step": 15820 }, { "epoch": 106.24242424242425, "grad_norm": 87.15945434570312, "learning_rate": 9.043584311387744e-07, "loss": 2.3598, "step": 15830 }, { "epoch": 106.3097643097643, "grad_norm": 57.297882080078125, "learning_rate": 9.033083868079385e-07, "loss": 2.5276, "step": 15840 }, { "epoch": 106.37710437710437, "grad_norm": 134.0861358642578, "learning_rate": 9.022584500837561e-07, "loss": 2.643, "step": 15850 }, { "epoch": 106.44444444444444, "grad_norm": 93.47891235351562, "learning_rate": 9.012086221346856e-07, "loss": 2.3624, "step": 15860 }, { "epoch": 106.51178451178451, "grad_norm": 223.43341064453125, "learning_rate": 9.001589041290651e-07, "loss": 2.6394, "step": 15870 }, { "epoch": 106.57912457912458, "grad_norm": 188.869384765625, "learning_rate": 8.991092972351104e-07, "loss": 2.6578, "step": 15880 }, { "epoch": 106.64646464646465, "grad_norm": 201.7398223876953, "learning_rate": 8.980598026209132e-07, "loss": 2.7334, "step": 15890 }, { "epoch": 106.71380471380472, "grad_norm": 62.45914840698242, "learning_rate": 8.970104214544398e-07, "loss": 2.3957, "step": 15900 }, { "epoch": 106.71380471380472, "eval_loss": 0.3441465198993683, "eval_mae": 0.410719633102417, "eval_mse": 0.3441465198993683, "eval_r2": 0.2472391128540039, "eval_rmse": 0.5866400258244985, "eval_runtime": 10.6946, "eval_samples_per_second": 444.057, "eval_steps_per_second": 13.932, "step": 15900 }, { "epoch": 106.78114478114477, "grad_norm": 119.26708221435547, "learning_rate": 8.959611549035316e-07, "loss": 2.405, "step": 15910 }, { "epoch": 106.84848484848484, "grad_norm": 161.25865173339844, "learning_rate": 8.949120041359012e-07, "loss": 2.8682, "step": 15920 }, { "epoch": 106.91582491582491, "grad_norm": 100.26856994628906, "learning_rate": 8.93862970319133e-07, "loss": 2.7345, "step": 15930 }, { "epoch": 106.98316498316498, "grad_norm": 70.4606704711914, "learning_rate": 8.92814054620681e-07, "loss": 3.0593, "step": 15940 }, { "epoch": 107.04713804713805, "grad_norm": 150.6870880126953, "learning_rate": 8.917652582078682e-07, "loss": 2.2567, "step": 15950 }, { "epoch": 107.11447811447812, "grad_norm": 68.572021484375, "learning_rate": 8.907165822478839e-07, "loss": 2.5206, "step": 15960 }, { "epoch": 107.18181818181819, "grad_norm": 243.23788452148438, "learning_rate": 8.896680279077843e-07, "loss": 2.6582, "step": 15970 }, { "epoch": 107.24915824915826, "grad_norm": 175.29371643066406, "learning_rate": 8.886195963544898e-07, "loss": 2.7983, "step": 15980 }, { "epoch": 107.31649831649831, "grad_norm": 111.721923828125, "learning_rate": 8.875712887547844e-07, "loss": 2.4258, "step": 15990 }, { "epoch": 107.38383838383838, "grad_norm": 74.8442153930664, "learning_rate": 8.865231062753135e-07, "loss": 2.5099, "step": 16000 }, { "epoch": 107.38383838383838, "eval_loss": 0.3430997431278229, "eval_mae": 0.4102195203304291, "eval_mse": 0.3430997133255005, "eval_r2": 0.24952882528305054, "eval_rmse": 0.5857471411159431, "eval_runtime": 10.6834, "eval_samples_per_second": 444.52, "eval_steps_per_second": 13.947, "step": 16000 }, { "epoch": 107.45117845117845, "grad_norm": 106.36380767822266, "learning_rate": 8.854750500825842e-07, "loss": 2.386, "step": 16010 }, { "epoch": 107.51851851851852, "grad_norm": 100.75098419189453, "learning_rate": 8.844271213429622e-07, "loss": 2.3677, "step": 16020 }, { "epoch": 107.58585858585859, "grad_norm": 180.4070587158203, "learning_rate": 8.833793212226724e-07, "loss": 2.5676, "step": 16030 }, { "epoch": 107.65319865319866, "grad_norm": 59.811710357666016, "learning_rate": 8.823316508877953e-07, "loss": 2.5138, "step": 16040 }, { "epoch": 107.72053872053873, "grad_norm": 104.68939971923828, "learning_rate": 8.812841115042677e-07, "loss": 2.5304, "step": 16050 }, { "epoch": 107.78787878787878, "grad_norm": 45.20331954956055, "learning_rate": 8.802367042378806e-07, "loss": 2.4583, "step": 16060 }, { "epoch": 107.85521885521885, "grad_norm": 71.69432067871094, "learning_rate": 8.791894302542783e-07, "loss": 2.7695, "step": 16070 }, { "epoch": 107.92255892255892, "grad_norm": 154.58355712890625, "learning_rate": 8.781422907189557e-07, "loss": 2.5299, "step": 16080 }, { "epoch": 107.98989898989899, "grad_norm": 94.5951156616211, "learning_rate": 8.770952867972592e-07, "loss": 2.7665, "step": 16090 }, { "epoch": 108.05387205387206, "grad_norm": 48.69154739379883, "learning_rate": 8.760484196543836e-07, "loss": 2.6913, "step": 16100 }, { "epoch": 108.05387205387206, "eval_loss": 0.345259428024292, "eval_mae": 0.4142686128616333, "eval_mse": 0.345259428024292, "eval_r2": 0.24480479955673218, "eval_rmse": 0.587587804523113, "eval_runtime": 10.6978, "eval_samples_per_second": 443.922, "eval_steps_per_second": 13.928, "step": 16100 }, { "epoch": 108.12121212121212, "grad_norm": 87.25008392333984, "learning_rate": 8.750016904553721e-07, "loss": 3.1271, "step": 16110 }, { "epoch": 108.1885521885522, "grad_norm": 176.812744140625, "learning_rate": 8.739551003651135e-07, "loss": 2.3097, "step": 16120 }, { "epoch": 108.25589225589225, "grad_norm": 140.9336395263672, "learning_rate": 8.729086505483427e-07, "loss": 2.5988, "step": 16130 }, { "epoch": 108.32323232323232, "grad_norm": 40.20861053466797, "learning_rate": 8.718623421696379e-07, "loss": 2.843, "step": 16140 }, { "epoch": 108.39057239057239, "grad_norm": 241.8306121826172, "learning_rate": 8.708161763934202e-07, "loss": 2.4376, "step": 16150 }, { "epoch": 108.45791245791246, "grad_norm": 108.08833312988281, "learning_rate": 8.697701543839515e-07, "loss": 2.3187, "step": 16160 }, { "epoch": 108.52525252525253, "grad_norm": 144.3564453125, "learning_rate": 8.687242773053351e-07, "loss": 2.8402, "step": 16170 }, { "epoch": 108.5925925925926, "grad_norm": 123.177734375, "learning_rate": 8.676785463215106e-07, "loss": 2.4563, "step": 16180 }, { "epoch": 108.65993265993266, "grad_norm": 158.01535034179688, "learning_rate": 8.666329625962571e-07, "loss": 2.2889, "step": 16190 }, { "epoch": 108.72727272727273, "grad_norm": 187.03536987304688, "learning_rate": 8.655875272931891e-07, "loss": 2.585, "step": 16200 }, { "epoch": 108.72727272727273, "eval_loss": 0.3434922397136688, "eval_mae": 0.4111292064189911, "eval_mse": 0.3434922397136688, "eval_r2": 0.24867022037506104, "eval_rmse": 0.5860821100440354, "eval_runtime": 10.6914, "eval_samples_per_second": 444.19, "eval_steps_per_second": 13.936, "step": 16200 }, { "epoch": 108.79461279461279, "grad_norm": 133.2149200439453, "learning_rate": 8.645422415757562e-07, "loss": 2.2632, "step": 16210 }, { "epoch": 108.86195286195286, "grad_norm": 167.5309295654297, "learning_rate": 8.634971066072407e-07, "loss": 2.6451, "step": 16220 }, { "epoch": 108.92929292929293, "grad_norm": 105.16175842285156, "learning_rate": 8.62452123550758e-07, "loss": 2.4102, "step": 16230 }, { "epoch": 108.996632996633, "grad_norm": 128.83644104003906, "learning_rate": 8.614072935692542e-07, "loss": 2.5716, "step": 16240 }, { "epoch": 109.06060606060606, "grad_norm": 117.58937072753906, "learning_rate": 8.603626178255053e-07, "loss": 2.2516, "step": 16250 }, { "epoch": 109.12794612794613, "grad_norm": 90.55024719238281, "learning_rate": 8.593180974821147e-07, "loss": 2.5397, "step": 16260 }, { "epoch": 109.1952861952862, "grad_norm": 170.5749053955078, "learning_rate": 8.582737337015138e-07, "loss": 2.7096, "step": 16270 }, { "epoch": 109.26262626262626, "grad_norm": 128.3971405029297, "learning_rate": 8.572295276459595e-07, "loss": 2.6231, "step": 16280 }, { "epoch": 109.32996632996633, "grad_norm": 57.61628341674805, "learning_rate": 8.561854804775333e-07, "loss": 2.5571, "step": 16290 }, { "epoch": 109.3973063973064, "grad_norm": 208.45944213867188, "learning_rate": 8.551415933581392e-07, "loss": 2.7673, "step": 16300 }, { "epoch": 109.3973063973064, "eval_loss": 0.34926843643188477, "eval_mae": 0.41852304339408875, "eval_mse": 0.3492684066295624, "eval_r2": 0.23603582382202148, "eval_rmse": 0.5909893456142525, "eval_runtime": 10.6917, "eval_samples_per_second": 444.177, "eval_steps_per_second": 13.936, "step": 16300 }, { "epoch": 109.46464646464646, "grad_norm": 163.7532501220703, "learning_rate": 8.54097867449504e-07, "loss": 2.5175, "step": 16310 }, { "epoch": 109.53198653198653, "grad_norm": 61.866119384765625, "learning_rate": 8.530543039131745e-07, "loss": 2.4292, "step": 16320 }, { "epoch": 109.5993265993266, "grad_norm": 144.50550842285156, "learning_rate": 8.520109039105174e-07, "loss": 2.4108, "step": 16330 }, { "epoch": 109.66666666666667, "grad_norm": 160.60081481933594, "learning_rate": 8.509676686027165e-07, "loss": 2.2145, "step": 16340 }, { "epoch": 109.73400673400674, "grad_norm": 46.98921585083008, "learning_rate": 8.499245991507729e-07, "loss": 2.4854, "step": 16350 }, { "epoch": 109.8013468013468, "grad_norm": 196.76837158203125, "learning_rate": 8.488816967155032e-07, "loss": 2.4384, "step": 16360 }, { "epoch": 109.86868686868686, "grad_norm": 275.6607971191406, "learning_rate": 8.478389624575379e-07, "loss": 2.657, "step": 16370 }, { "epoch": 109.93602693602693, "grad_norm": 167.1900177001953, "learning_rate": 8.467963975373203e-07, "loss": 2.6874, "step": 16380 }, { "epoch": 110.0, "grad_norm": 27.700551986694336, "learning_rate": 8.457540031151052e-07, "loss": 2.5676, "step": 16390 }, { "epoch": 110.06734006734007, "grad_norm": 206.48605346679688, "learning_rate": 8.447117803509583e-07, "loss": 2.5656, "step": 16400 }, { "epoch": 110.06734006734007, "eval_loss": 0.3425993025302887, "eval_mae": 0.4118114113807678, "eval_mse": 0.3425993025302887, "eval_r2": 0.25062334537506104, "eval_rmse": 0.5853198292645557, "eval_runtime": 10.7032, "eval_samples_per_second": 443.701, "eval_steps_per_second": 13.921, "step": 16400 }, { "epoch": 110.13468013468014, "grad_norm": 86.54730224609375, "learning_rate": 8.436697304047533e-07, "loss": 2.4521, "step": 16410 }, { "epoch": 110.20202020202021, "grad_norm": 139.6156768798828, "learning_rate": 8.426278544361722e-07, "loss": 2.7964, "step": 16420 }, { "epoch": 110.26936026936026, "grad_norm": 139.89207458496094, "learning_rate": 8.41586153604703e-07, "loss": 2.53, "step": 16430 }, { "epoch": 110.33670033670033, "grad_norm": 47.79468536376953, "learning_rate": 8.405446290696398e-07, "loss": 2.5752, "step": 16440 }, { "epoch": 110.4040404040404, "grad_norm": 145.05862426757812, "learning_rate": 8.395032819900787e-07, "loss": 2.4957, "step": 16450 }, { "epoch": 110.47138047138047, "grad_norm": 219.68365478515625, "learning_rate": 8.384621135249197e-07, "loss": 2.6332, "step": 16460 }, { "epoch": 110.53872053872054, "grad_norm": 63.27830123901367, "learning_rate": 8.374211248328639e-07, "loss": 2.5208, "step": 16470 }, { "epoch": 110.60606060606061, "grad_norm": 88.47151184082031, "learning_rate": 8.363803170724116e-07, "loss": 2.1901, "step": 16480 }, { "epoch": 110.67340067340068, "grad_norm": 94.85098266601562, "learning_rate": 8.353396914018626e-07, "loss": 3.0523, "step": 16490 }, { "epoch": 110.74074074074075, "grad_norm": 108.42247772216797, "learning_rate": 8.342992489793135e-07, "loss": 2.5591, "step": 16500 }, { "epoch": 110.74074074074075, "eval_loss": 0.34326741099357605, "eval_mae": 0.41075560450553894, "eval_mse": 0.34326741099357605, "eval_r2": 0.249161958694458, "eval_rmse": 0.5858902721445168, "eval_runtime": 10.705, "eval_samples_per_second": 443.623, "eval_steps_per_second": 13.919, "step": 16500 }, { "epoch": 110.8080808080808, "grad_norm": 132.47962951660156, "learning_rate": 8.332589909626573e-07, "loss": 2.5171, "step": 16510 }, { "epoch": 110.87542087542087, "grad_norm": 134.42100524902344, "learning_rate": 8.322189185095813e-07, "loss": 2.5067, "step": 16520 }, { "epoch": 110.94276094276094, "grad_norm": 110.91717529296875, "learning_rate": 8.311790327775671e-07, "loss": 2.2592, "step": 16530 }, { "epoch": 111.006734006734, "grad_norm": 206.02220153808594, "learning_rate": 8.301393349238877e-07, "loss": 2.2158, "step": 16540 }, { "epoch": 111.07407407407408, "grad_norm": 184.86427307128906, "learning_rate": 8.290998261056074e-07, "loss": 2.3595, "step": 16550 }, { "epoch": 111.14141414141415, "grad_norm": 137.97799682617188, "learning_rate": 8.2806050747958e-07, "loss": 2.5029, "step": 16560 }, { "epoch": 111.20875420875421, "grad_norm": 137.61093139648438, "learning_rate": 8.270213802024475e-07, "loss": 2.6919, "step": 16570 }, { "epoch": 111.27609427609427, "grad_norm": 56.024681091308594, "learning_rate": 8.259824454306395e-07, "loss": 2.6457, "step": 16580 }, { "epoch": 111.34343434343434, "grad_norm": 104.54113006591797, "learning_rate": 8.249437043203711e-07, "loss": 2.3835, "step": 16590 }, { "epoch": 111.41077441077441, "grad_norm": 130.33377075195312, "learning_rate": 8.239051580276412e-07, "loss": 2.5925, "step": 16600 }, { "epoch": 111.41077441077441, "eval_loss": 0.34592562913894653, "eval_mae": 0.41448646783828735, "eval_mse": 0.34592562913894653, "eval_r2": 0.24334758520126343, "eval_rmse": 0.5881544262682604, "eval_runtime": 10.6851, "eval_samples_per_second": 444.449, "eval_steps_per_second": 13.945, "step": 16600 }, { "epoch": 111.47811447811448, "grad_norm": 187.9287567138672, "learning_rate": 8.228668077082328e-07, "loss": 2.7114, "step": 16610 }, { "epoch": 111.54545454545455, "grad_norm": 30.971420288085938, "learning_rate": 8.218286545177105e-07, "loss": 2.8681, "step": 16620 }, { "epoch": 111.61279461279462, "grad_norm": 110.57051849365234, "learning_rate": 8.207906996114198e-07, "loss": 2.7165, "step": 16630 }, { "epoch": 111.68013468013469, "grad_norm": 119.37846374511719, "learning_rate": 8.197529441444846e-07, "loss": 2.2418, "step": 16640 }, { "epoch": 111.74747474747475, "grad_norm": 62.62727355957031, "learning_rate": 8.187153892718077e-07, "loss": 2.3671, "step": 16650 }, { "epoch": 111.81481481481481, "grad_norm": 160.21728515625, "learning_rate": 8.176780361480685e-07, "loss": 2.6779, "step": 16660 }, { "epoch": 111.88215488215488, "grad_norm": 80.14679718017578, "learning_rate": 8.166408859277221e-07, "loss": 2.3571, "step": 16670 }, { "epoch": 111.94949494949495, "grad_norm": 127.44060516357422, "learning_rate": 8.156039397649969e-07, "loss": 2.2848, "step": 16680 }, { "epoch": 112.01346801346801, "grad_norm": 167.91140747070312, "learning_rate": 8.145671988138952e-07, "loss": 2.5634, "step": 16690 }, { "epoch": 112.08080808080808, "grad_norm": 226.5815887451172, "learning_rate": 8.135306642281903e-07, "loss": 2.629, "step": 16700 }, { "epoch": 112.08080808080808, "eval_loss": 0.34894341230392456, "eval_mae": 0.4175073802471161, "eval_mse": 0.34894341230392456, "eval_r2": 0.2367466688156128, "eval_rmse": 0.5907143237673559, "eval_runtime": 10.6872, "eval_samples_per_second": 444.364, "eval_steps_per_second": 13.942, "step": 16700 }, { "epoch": 112.14814814814815, "grad_norm": 179.6548309326172, "learning_rate": 8.124943371614266e-07, "loss": 2.4309, "step": 16710 }, { "epoch": 112.21548821548822, "grad_norm": 196.0366973876953, "learning_rate": 8.114582187669169e-07, "loss": 2.881, "step": 16720 }, { "epoch": 112.28282828282828, "grad_norm": 118.0103759765625, "learning_rate": 8.10422310197741e-07, "loss": 2.4267, "step": 16730 }, { "epoch": 112.35016835016835, "grad_norm": 243.53883361816406, "learning_rate": 8.093866126067469e-07, "loss": 2.4534, "step": 16740 }, { "epoch": 112.41750841750842, "grad_norm": 157.05027770996094, "learning_rate": 8.083511271465468e-07, "loss": 2.8095, "step": 16750 }, { "epoch": 112.48484848484848, "grad_norm": 94.81672668457031, "learning_rate": 8.073158549695172e-07, "loss": 2.4343, "step": 16760 }, { "epoch": 112.55218855218855, "grad_norm": 203.37413024902344, "learning_rate": 8.062807972277966e-07, "loss": 2.5695, "step": 16770 }, { "epoch": 112.61952861952862, "grad_norm": 44.26837158203125, "learning_rate": 8.052459550732857e-07, "loss": 2.62, "step": 16780 }, { "epoch": 112.68686868686869, "grad_norm": 70.51052856445312, "learning_rate": 8.042113296576447e-07, "loss": 2.4705, "step": 16790 }, { "epoch": 112.75420875420875, "grad_norm": 79.16532897949219, "learning_rate": 8.031769221322932e-07, "loss": 2.4252, "step": 16800 }, { "epoch": 112.75420875420875, "eval_loss": 0.3427865505218506, "eval_mae": 0.41079193353652954, "eval_mse": 0.342786580324173, "eval_r2": 0.25021374225616455, "eval_rmse": 0.5854797864351706, "eval_runtime": 10.7144, "eval_samples_per_second": 443.236, "eval_steps_per_second": 13.907, "step": 16800 }, { "epoch": 112.82154882154882, "grad_norm": 270.6499938964844, "learning_rate": 8.021427336484073e-07, "loss": 2.3786, "step": 16810 }, { "epoch": 112.88888888888889, "grad_norm": 148.8785400390625, "learning_rate": 8.011087653569202e-07, "loss": 2.4168, "step": 16820 }, { "epoch": 112.95622895622895, "grad_norm": 189.4752197265625, "learning_rate": 8.000750184085197e-07, "loss": 2.3112, "step": 16830 }, { "epoch": 113.02020202020202, "grad_norm": 142.4873809814453, "learning_rate": 7.990414939536479e-07, "loss": 2.6472, "step": 16840 }, { "epoch": 113.08754208754209, "grad_norm": 47.532142639160156, "learning_rate": 7.980081931424978e-07, "loss": 2.4132, "step": 16850 }, { "epoch": 113.15488215488216, "grad_norm": 72.51603698730469, "learning_rate": 7.96975117125015e-07, "loss": 2.5307, "step": 16860 }, { "epoch": 113.22222222222223, "grad_norm": 78.01424407958984, "learning_rate": 7.959422670508943e-07, "loss": 2.4424, "step": 16870 }, { "epoch": 113.28956228956228, "grad_norm": 71.2894058227539, "learning_rate": 7.949096440695794e-07, "loss": 2.4226, "step": 16880 }, { "epoch": 113.35690235690235, "grad_norm": 165.93521118164062, "learning_rate": 7.938772493302605e-07, "loss": 2.5925, "step": 16890 }, { "epoch": 113.42424242424242, "grad_norm": 181.39376831054688, "learning_rate": 7.928450839818745e-07, "loss": 2.2489, "step": 16900 }, { "epoch": 113.42424242424242, "eval_loss": 0.3456210196018219, "eval_mae": 0.4150019884109497, "eval_mse": 0.3456210196018219, "eval_r2": 0.24401384592056274, "eval_rmse": 0.5878954155305363, "eval_runtime": 10.7172, "eval_samples_per_second": 443.12, "eval_steps_per_second": 13.903, "step": 16900 }, { "epoch": 113.49158249158249, "grad_norm": 111.90850830078125, "learning_rate": 7.918131491731029e-07, "loss": 2.7287, "step": 16910 }, { "epoch": 113.55892255892256, "grad_norm": 104.76981353759766, "learning_rate": 7.907814460523706e-07, "loss": 2.5121, "step": 16920 }, { "epoch": 113.62626262626263, "grad_norm": 84.84310150146484, "learning_rate": 7.897499757678444e-07, "loss": 2.6031, "step": 16930 }, { "epoch": 113.6936026936027, "grad_norm": 89.48077392578125, "learning_rate": 7.887187394674322e-07, "loss": 2.4869, "step": 16940 }, { "epoch": 113.76094276094275, "grad_norm": 32.31895065307617, "learning_rate": 7.876877382987816e-07, "loss": 2.3925, "step": 16950 }, { "epoch": 113.82828282828282, "grad_norm": 104.3415298461914, "learning_rate": 7.866569734092787e-07, "loss": 2.4582, "step": 16960 }, { "epoch": 113.89562289562289, "grad_norm": 164.385986328125, "learning_rate": 7.856264459460457e-07, "loss": 2.4774, "step": 16970 }, { "epoch": 113.96296296296296, "grad_norm": 111.20880889892578, "learning_rate": 7.845961570559419e-07, "loss": 2.3407, "step": 16980 }, { "epoch": 114.02693602693603, "grad_norm": 57.431758880615234, "learning_rate": 7.835661078855598e-07, "loss": 2.655, "step": 16990 }, { "epoch": 114.0942760942761, "grad_norm": 27.506927490234375, "learning_rate": 7.825362995812266e-07, "loss": 2.3661, "step": 17000 }, { "epoch": 114.0942760942761, "eval_loss": 0.3422691822052002, "eval_mae": 0.41043075919151306, "eval_mse": 0.3422691524028778, "eval_r2": 0.25134551525115967, "eval_rmse": 0.5850377358793857, "eval_runtime": 10.7174, "eval_samples_per_second": 443.112, "eval_steps_per_second": 13.903, "step": 17000 }, { "epoch": 114.16161616161617, "grad_norm": 130.0911865234375, "learning_rate": 7.815067332889995e-07, "loss": 2.3143, "step": 17010 }, { "epoch": 114.22895622895624, "grad_norm": 134.6124725341797, "learning_rate": 7.804774101546684e-07, "loss": 2.6311, "step": 17020 }, { "epoch": 114.29629629629629, "grad_norm": 112.34257507324219, "learning_rate": 7.79448331323751e-07, "loss": 2.4933, "step": 17030 }, { "epoch": 114.36363636363636, "grad_norm": 158.71714782714844, "learning_rate": 7.78419497941494e-07, "loss": 2.3999, "step": 17040 }, { "epoch": 114.43097643097643, "grad_norm": 110.19557189941406, "learning_rate": 7.773909111528707e-07, "loss": 2.6488, "step": 17050 }, { "epoch": 114.4983164983165, "grad_norm": 45.209503173828125, "learning_rate": 7.763625721025804e-07, "loss": 2.2064, "step": 17060 }, { "epoch": 114.56565656565657, "grad_norm": 72.67693328857422, "learning_rate": 7.753344819350457e-07, "loss": 2.9934, "step": 17070 }, { "epoch": 114.63299663299664, "grad_norm": 132.43409729003906, "learning_rate": 7.743066417944129e-07, "loss": 2.5786, "step": 17080 }, { "epoch": 114.7003367003367, "grad_norm": 49.20614242553711, "learning_rate": 7.732790528245503e-07, "loss": 2.5588, "step": 17090 }, { "epoch": 114.76767676767676, "grad_norm": 63.8277587890625, "learning_rate": 7.722517161690462e-07, "loss": 2.3775, "step": 17100 }, { "epoch": 114.76767676767676, "eval_loss": 0.3409411609172821, "eval_mae": 0.4106735289096832, "eval_mse": 0.3409411609172821, "eval_r2": 0.2542502284049988, "eval_rmse": 0.5839016705895627, "eval_runtime": 10.6939, "eval_samples_per_second": 444.085, "eval_steps_per_second": 13.933, "step": 17100 }, { "epoch": 114.83501683501683, "grad_norm": 156.1268310546875, "learning_rate": 7.712246329712079e-07, "loss": 2.6564, "step": 17110 }, { "epoch": 114.9023569023569, "grad_norm": 55.00673294067383, "learning_rate": 7.701978043740612e-07, "loss": 2.4868, "step": 17120 }, { "epoch": 114.96969696969697, "grad_norm": 142.51165771484375, "learning_rate": 7.691712315203483e-07, "loss": 2.4055, "step": 17130 }, { "epoch": 115.03367003367003, "grad_norm": 123.43620300292969, "learning_rate": 7.681449155525271e-07, "loss": 2.2574, "step": 17140 }, { "epoch": 115.1010101010101, "grad_norm": 263.8929138183594, "learning_rate": 7.67118857612769e-07, "loss": 2.5001, "step": 17150 }, { "epoch": 115.16835016835017, "grad_norm": 64.15228271484375, "learning_rate": 7.660930588429584e-07, "loss": 2.7295, "step": 17160 }, { "epoch": 115.23569023569024, "grad_norm": 203.17288208007812, "learning_rate": 7.650675203846917e-07, "loss": 2.4113, "step": 17170 }, { "epoch": 115.3030303030303, "grad_norm": 98.66624450683594, "learning_rate": 7.640422433792756e-07, "loss": 2.6952, "step": 17180 }, { "epoch": 115.37037037037037, "grad_norm": 59.588600158691406, "learning_rate": 7.63017228967725e-07, "loss": 2.4218, "step": 17190 }, { "epoch": 115.43771043771044, "grad_norm": 242.55992126464844, "learning_rate": 7.619924782907635e-07, "loss": 2.5099, "step": 17200 }, { "epoch": 115.43771043771044, "eval_loss": 0.34782862663269043, "eval_mae": 0.41699013113975525, "eval_mse": 0.34782862663269043, "eval_r2": 0.23918509483337402, "eval_rmse": 0.5897699777308866, "eval_runtime": 10.7041, "eval_samples_per_second": 443.662, "eval_steps_per_second": 13.92, "step": 17200 }, { "epoch": 115.5050505050505, "grad_norm": 343.57049560546875, "learning_rate": 7.609679924888207e-07, "loss": 2.8008, "step": 17210 }, { "epoch": 115.57239057239057, "grad_norm": 156.10012817382812, "learning_rate": 7.599437727020319e-07, "loss": 2.6914, "step": 17220 }, { "epoch": 115.63973063973064, "grad_norm": 148.49195861816406, "learning_rate": 7.589198200702355e-07, "loss": 2.5177, "step": 17230 }, { "epoch": 115.70707070707071, "grad_norm": 282.1175842285156, "learning_rate": 7.578961357329733e-07, "loss": 2.3954, "step": 17240 }, { "epoch": 115.77441077441077, "grad_norm": 98.45209503173828, "learning_rate": 7.568727208294884e-07, "loss": 2.3512, "step": 17250 }, { "epoch": 115.84175084175084, "grad_norm": 193.1466064453125, "learning_rate": 7.558495764987242e-07, "loss": 2.3926, "step": 17260 }, { "epoch": 115.9090909090909, "grad_norm": 155.89797973632812, "learning_rate": 7.548267038793223e-07, "loss": 2.7916, "step": 17270 }, { "epoch": 115.97643097643098, "grad_norm": 46.882625579833984, "learning_rate": 7.538041041096228e-07, "loss": 2.519, "step": 17280 }, { "epoch": 116.04040404040404, "grad_norm": 106.67388153076172, "learning_rate": 7.527817783276611e-07, "loss": 2.1129, "step": 17290 }, { "epoch": 116.10774410774411, "grad_norm": 99.96617889404297, "learning_rate": 7.517597276711687e-07, "loss": 2.4206, "step": 17300 }, { "epoch": 116.10774410774411, "eval_loss": 0.3469335436820984, "eval_mae": 0.4163127541542053, "eval_mse": 0.346933513879776, "eval_r2": 0.24114298820495605, "eval_rmse": 0.5890106228921309, "eval_runtime": 10.6967, "eval_samples_per_second": 443.969, "eval_steps_per_second": 13.93, "step": 17300 }, { "epoch": 116.17508417508418, "grad_norm": 230.4870147705078, "learning_rate": 7.507379532775706e-07, "loss": 2.3822, "step": 17310 }, { "epoch": 116.24242424242425, "grad_norm": 138.14553833007812, "learning_rate": 7.497164562839844e-07, "loss": 2.3991, "step": 17320 }, { "epoch": 116.3097643097643, "grad_norm": 271.07501220703125, "learning_rate": 7.486952378272183e-07, "loss": 2.3624, "step": 17330 }, { "epoch": 116.37710437710437, "grad_norm": 275.6913146972656, "learning_rate": 7.476742990437715e-07, "loss": 2.5791, "step": 17340 }, { "epoch": 116.44444444444444, "grad_norm": 226.59422302246094, "learning_rate": 7.466536410698315e-07, "loss": 2.0465, "step": 17350 }, { "epoch": 116.51178451178451, "grad_norm": 83.55384826660156, "learning_rate": 7.456332650412733e-07, "loss": 2.319, "step": 17360 }, { "epoch": 116.57912457912458, "grad_norm": 87.9276123046875, "learning_rate": 7.446131720936582e-07, "loss": 2.5446, "step": 17370 }, { "epoch": 116.64646464646465, "grad_norm": 129.2769775390625, "learning_rate": 7.435933633622322e-07, "loss": 2.6434, "step": 17380 }, { "epoch": 116.71380471380472, "grad_norm": 254.0415802001953, "learning_rate": 7.425738399819253e-07, "loss": 2.6708, "step": 17390 }, { "epoch": 116.78114478114477, "grad_norm": 89.44224548339844, "learning_rate": 7.415546030873503e-07, "loss": 2.6232, "step": 17400 }, { "epoch": 116.78114478114477, "eval_loss": 0.34024763107299805, "eval_mae": 0.40883809328079224, "eval_mse": 0.34024763107299805, "eval_r2": 0.255767285823822, "eval_rmse": 0.583307492728319, "eval_runtime": 10.7028, "eval_samples_per_second": 443.717, "eval_steps_per_second": 13.922, "step": 17400 }, { "epoch": 116.84848484848484, "grad_norm": 142.9999542236328, "learning_rate": 7.405356538128e-07, "loss": 2.5987, "step": 17410 }, { "epoch": 116.91582491582491, "grad_norm": 212.5491180419922, "learning_rate": 7.395169932922483e-07, "loss": 3.0281, "step": 17420 }, { "epoch": 116.98316498316498, "grad_norm": 150.14988708496094, "learning_rate": 7.384986226593472e-07, "loss": 2.5603, "step": 17430 }, { "epoch": 117.04713804713805, "grad_norm": 92.34686279296875, "learning_rate": 7.374805430474262e-07, "loss": 2.0662, "step": 17440 }, { "epoch": 117.11447811447812, "grad_norm": 34.449806213378906, "learning_rate": 7.364627555894908e-07, "loss": 2.1402, "step": 17450 }, { "epoch": 117.18181818181819, "grad_norm": 81.15420532226562, "learning_rate": 7.354452614182215e-07, "loss": 2.5414, "step": 17460 }, { "epoch": 117.24915824915826, "grad_norm": 149.23001098632812, "learning_rate": 7.344280616659724e-07, "loss": 2.4727, "step": 17470 }, { "epoch": 117.31649831649831, "grad_norm": 144.67884826660156, "learning_rate": 7.334111574647702e-07, "loss": 2.5445, "step": 17480 }, { "epoch": 117.38383838383838, "grad_norm": 103.60765838623047, "learning_rate": 7.32394549946312e-07, "loss": 2.5581, "step": 17490 }, { "epoch": 117.45117845117845, "grad_norm": 57.596839904785156, "learning_rate": 7.313782402419651e-07, "loss": 2.5312, "step": 17500 }, { "epoch": 117.45117845117845, "eval_loss": 0.3449895679950714, "eval_mae": 0.4166980981826782, "eval_mse": 0.3449895679950714, "eval_r2": 0.24539506435394287, "eval_rmse": 0.5873581258440811, "eval_runtime": 10.6998, "eval_samples_per_second": 443.842, "eval_steps_per_second": 13.926, "step": 17500 }, { "epoch": 117.51851851851852, "grad_norm": 30.203489303588867, "learning_rate": 7.303622294827657e-07, "loss": 2.5801, "step": 17510 }, { "epoch": 117.58585858585859, "grad_norm": 81.98004913330078, "learning_rate": 7.29346518799417e-07, "loss": 2.5286, "step": 17520 }, { "epoch": 117.65319865319866, "grad_norm": 250.4847412109375, "learning_rate": 7.283311093222882e-07, "loss": 2.5745, "step": 17530 }, { "epoch": 117.72053872053873, "grad_norm": 128.8531494140625, "learning_rate": 7.273160021814131e-07, "loss": 2.6434, "step": 17540 }, { "epoch": 117.78787878787878, "grad_norm": 194.4693145751953, "learning_rate": 7.2630119850649e-07, "loss": 2.6315, "step": 17550 }, { "epoch": 117.85521885521885, "grad_norm": 161.28839111328125, "learning_rate": 7.252866994268777e-07, "loss": 2.5171, "step": 17560 }, { "epoch": 117.92255892255892, "grad_norm": 81.74833679199219, "learning_rate": 7.242725060715977e-07, "loss": 2.3484, "step": 17570 }, { "epoch": 117.98989898989899, "grad_norm": 154.95875549316406, "learning_rate": 7.232586195693309e-07, "loss": 2.5783, "step": 17580 }, { "epoch": 118.05387205387206, "grad_norm": 140.2095489501953, "learning_rate": 7.222450410484158e-07, "loss": 2.6385, "step": 17590 }, { "epoch": 118.12121212121212, "grad_norm": 138.27572631835938, "learning_rate": 7.212317716368493e-07, "loss": 2.6742, "step": 17600 }, { "epoch": 118.12121212121212, "eval_loss": 0.3473723232746124, "eval_mae": 0.41551727056503296, "eval_mse": 0.3473723232746124, "eval_r2": 0.24018323421478271, "eval_rmse": 0.5893830021934908, "eval_runtime": 10.6984, "eval_samples_per_second": 443.897, "eval_steps_per_second": 13.927, "step": 17600 }, { "epoch": 118.1885521885522, "grad_norm": 35.57036209106445, "learning_rate": 7.202188124622836e-07, "loss": 2.6348, "step": 17610 }, { "epoch": 118.25589225589225, "grad_norm": 182.90814208984375, "learning_rate": 7.192061646520262e-07, "loss": 2.531, "step": 17620 }, { "epoch": 118.32323232323232, "grad_norm": 224.6385955810547, "learning_rate": 7.181938293330372e-07, "loss": 2.5594, "step": 17630 }, { "epoch": 118.39057239057239, "grad_norm": 114.51687622070312, "learning_rate": 7.171818076319296e-07, "loss": 2.4729, "step": 17640 }, { "epoch": 118.45791245791246, "grad_norm": 252.56602478027344, "learning_rate": 7.161701006749674e-07, "loss": 2.4314, "step": 17650 }, { "epoch": 118.52525252525253, "grad_norm": 123.10667419433594, "learning_rate": 7.151587095880645e-07, "loss": 2.5796, "step": 17660 }, { "epoch": 118.5925925925926, "grad_norm": 147.45181274414062, "learning_rate": 7.141476354967821e-07, "loss": 2.1982, "step": 17670 }, { "epoch": 118.65993265993266, "grad_norm": 228.5786895751953, "learning_rate": 7.131368795263301e-07, "loss": 2.6159, "step": 17680 }, { "epoch": 118.72727272727273, "grad_norm": 50.8185920715332, "learning_rate": 7.121264428015636e-07, "loss": 2.5817, "step": 17690 }, { "epoch": 118.79461279461279, "grad_norm": 97.89784240722656, "learning_rate": 7.111163264469829e-07, "loss": 2.1475, "step": 17700 }, { "epoch": 118.79461279461279, "eval_loss": 0.34201139211654663, "eval_mae": 0.4112778604030609, "eval_mse": 0.34201139211654663, "eval_r2": 0.2519092559814453, "eval_rmse": 0.5848174006615626, "eval_runtime": 10.6889, "eval_samples_per_second": 444.292, "eval_steps_per_second": 13.94, "step": 17700 }, { "epoch": 118.86195286195286, "grad_norm": 163.14404296875, "learning_rate": 7.101065315867306e-07, "loss": 2.2889, "step": 17710 }, { "epoch": 118.92929292929293, "grad_norm": 136.44293212890625, "learning_rate": 7.090970593445928e-07, "loss": 2.4985, "step": 17720 }, { "epoch": 118.996632996633, "grad_norm": 191.50961303710938, "learning_rate": 7.080879108439961e-07, "loss": 2.6385, "step": 17730 }, { "epoch": 119.06060606060606, "grad_norm": 288.6164245605469, "learning_rate": 7.070790872080071e-07, "loss": 2.4693, "step": 17740 }, { "epoch": 119.12794612794613, "grad_norm": 55.85527801513672, "learning_rate": 7.0607058955933e-07, "loss": 2.3514, "step": 17750 }, { "epoch": 119.1952861952862, "grad_norm": 127.59712982177734, "learning_rate": 7.05062419020307e-07, "loss": 2.9122, "step": 17760 }, { "epoch": 119.26262626262626, "grad_norm": 317.9173583984375, "learning_rate": 7.040545767129159e-07, "loss": 2.5423, "step": 17770 }, { "epoch": 119.32996632996633, "grad_norm": 126.70811462402344, "learning_rate": 7.030470637587699e-07, "loss": 2.3059, "step": 17780 }, { "epoch": 119.3973063973064, "grad_norm": 193.3573455810547, "learning_rate": 7.020398812791146e-07, "loss": 2.3871, "step": 17790 }, { "epoch": 119.46464646464646, "grad_norm": 49.32783508300781, "learning_rate": 7.010330303948284e-07, "loss": 2.387, "step": 17800 }, { "epoch": 119.46464646464646, "eval_loss": 0.34038272500038147, "eval_mae": 0.4093923568725586, "eval_mse": 0.3403826653957367, "eval_r2": 0.2554718852043152, "eval_rmse": 0.5834232300789339, "eval_runtime": 10.7056, "eval_samples_per_second": 443.598, "eval_steps_per_second": 13.918, "step": 17800 }, { "epoch": 119.53198653198653, "grad_norm": 165.819091796875, "learning_rate": 7.000265122264208e-07, "loss": 2.3201, "step": 17810 }, { "epoch": 119.5993265993266, "grad_norm": 39.66365051269531, "learning_rate": 6.990203278940312e-07, "loss": 2.5064, "step": 17820 }, { "epoch": 119.66666666666667, "grad_norm": 58.273468017578125, "learning_rate": 6.980144785174264e-07, "loss": 2.5339, "step": 17830 }, { "epoch": 119.73400673400674, "grad_norm": 71.04066467285156, "learning_rate": 6.970089652160014e-07, "loss": 2.5121, "step": 17840 }, { "epoch": 119.8013468013468, "grad_norm": 169.17047119140625, "learning_rate": 6.96003789108777e-07, "loss": 2.526, "step": 17850 }, { "epoch": 119.86868686868686, "grad_norm": 160.56588745117188, "learning_rate": 6.949989513143986e-07, "loss": 2.6073, "step": 17860 }, { "epoch": 119.93602693602693, "grad_norm": 122.7963638305664, "learning_rate": 6.939944529511357e-07, "loss": 2.3794, "step": 17870 }, { "epoch": 120.0, "grad_norm": 156.96682739257812, "learning_rate": 6.929902951368786e-07, "loss": 2.3108, "step": 17880 }, { "epoch": 120.06734006734007, "grad_norm": 102.85325622558594, "learning_rate": 6.919864789891403e-07, "loss": 2.4563, "step": 17890 }, { "epoch": 120.13468013468014, "grad_norm": 132.12066650390625, "learning_rate": 6.909830056250526e-07, "loss": 2.3793, "step": 17900 }, { "epoch": 120.13468013468014, "eval_loss": 0.3410055935382843, "eval_mae": 0.4115580916404724, "eval_mse": 0.3410055935382843, "eval_r2": 0.25410932302474976, "eval_rmse": 0.5839568421880886, "eval_runtime": 10.6855, "eval_samples_per_second": 444.435, "eval_steps_per_second": 13.944, "step": 17900 }, { "epoch": 120.20202020202021, "grad_norm": 168.1981964111328, "learning_rate": 6.899798761613663e-07, "loss": 2.6095, "step": 17910 }, { "epoch": 120.26936026936026, "grad_norm": 86.16333770751953, "learning_rate": 6.889770917144484e-07, "loss": 2.2696, "step": 17920 }, { "epoch": 120.33670033670033, "grad_norm": 67.09626007080078, "learning_rate": 6.879746534002835e-07, "loss": 2.7161, "step": 17930 }, { "epoch": 120.4040404040404, "grad_norm": 39.44314193725586, "learning_rate": 6.8697256233447e-07, "loss": 2.8624, "step": 17940 }, { "epoch": 120.47138047138047, "grad_norm": 108.78730773925781, "learning_rate": 6.859708196322202e-07, "loss": 2.5169, "step": 17950 }, { "epoch": 120.53872053872054, "grad_norm": 276.81146240234375, "learning_rate": 6.849694264083584e-07, "loss": 2.3356, "step": 17960 }, { "epoch": 120.60606060606061, "grad_norm": 185.85244750976562, "learning_rate": 6.839683837773201e-07, "loss": 2.4998, "step": 17970 }, { "epoch": 120.67340067340068, "grad_norm": 265.2098693847656, "learning_rate": 6.829676928531509e-07, "loss": 2.2535, "step": 17980 }, { "epoch": 120.74074074074075, "grad_norm": 195.41958618164062, "learning_rate": 6.819673547495052e-07, "loss": 2.5288, "step": 17990 }, { "epoch": 120.8080808080808, "grad_norm": 70.97891998291016, "learning_rate": 6.809673705796435e-07, "loss": 2.3178, "step": 18000 }, { "epoch": 120.8080808080808, "eval_loss": 0.341045081615448, "eval_mae": 0.40975165367126465, "eval_mse": 0.341045081615448, "eval_r2": 0.2540229558944702, "eval_rmse": 0.5839906519932044, "eval_runtime": 10.6984, "eval_samples_per_second": 443.896, "eval_steps_per_second": 13.927, "step": 18000 }, { "epoch": 120.87542087542087, "grad_norm": 104.38661193847656, "learning_rate": 6.799677414564338e-07, "loss": 2.424, "step": 18010 }, { "epoch": 120.94276094276094, "grad_norm": 273.9122314453125, "learning_rate": 6.789684684923482e-07, "loss": 2.6042, "step": 18020 }, { "epoch": 121.006734006734, "grad_norm": 122.08675384521484, "learning_rate": 6.779695527994631e-07, "loss": 2.4578, "step": 18030 }, { "epoch": 121.07407407407408, "grad_norm": 36.84945297241211, "learning_rate": 6.769709954894563e-07, "loss": 2.2585, "step": 18040 }, { "epoch": 121.14141414141415, "grad_norm": 188.73062133789062, "learning_rate": 6.759727976736075e-07, "loss": 2.4314, "step": 18050 }, { "epoch": 121.20875420875421, "grad_norm": 288.13946533203125, "learning_rate": 6.749749604627963e-07, "loss": 2.3757, "step": 18060 }, { "epoch": 121.27609427609427, "grad_norm": 326.8589172363281, "learning_rate": 6.739774849675008e-07, "loss": 2.3561, "step": 18070 }, { "epoch": 121.34343434343434, "grad_norm": 133.47531127929688, "learning_rate": 6.729803722977965e-07, "loss": 2.4991, "step": 18080 }, { "epoch": 121.41077441077441, "grad_norm": 80.4793472290039, "learning_rate": 6.719836235633552e-07, "loss": 2.2753, "step": 18090 }, { "epoch": 121.47811447811448, "grad_norm": 83.18433380126953, "learning_rate": 6.709872398734442e-07, "loss": 2.7161, "step": 18100 }, { "epoch": 121.47811447811448, "eval_loss": 0.3398570120334625, "eval_mae": 0.4088427722454071, "eval_mse": 0.3398570716381073, "eval_r2": 0.2566215395927429, "eval_rmse": 0.5829726165422414, "eval_runtime": 10.7117, "eval_samples_per_second": 443.346, "eval_steps_per_second": 13.91, "step": 18100 }, { "epoch": 121.54545454545455, "grad_norm": 104.80241394042969, "learning_rate": 6.699912223369233e-07, "loss": 2.5086, "step": 18110 }, { "epoch": 121.61279461279462, "grad_norm": 91.91261291503906, "learning_rate": 6.689955720622456e-07, "loss": 2.5593, "step": 18120 }, { "epoch": 121.68013468013469, "grad_norm": 197.07408142089844, "learning_rate": 6.68000290157456e-07, "loss": 2.8336, "step": 18130 }, { "epoch": 121.74747474747475, "grad_norm": 82.14676666259766, "learning_rate": 6.670053777301883e-07, "loss": 2.5882, "step": 18140 }, { "epoch": 121.81481481481481, "grad_norm": 136.88229370117188, "learning_rate": 6.660108358876659e-07, "loss": 2.7481, "step": 18150 }, { "epoch": 121.88215488215488, "grad_norm": 165.84002685546875, "learning_rate": 6.650166657366998e-07, "loss": 2.3152, "step": 18160 }, { "epoch": 121.94949494949495, "grad_norm": 120.45549011230469, "learning_rate": 6.64022868383687e-07, "loss": 2.298, "step": 18170 }, { "epoch": 122.01346801346801, "grad_norm": 338.8877868652344, "learning_rate": 6.630294449346094e-07, "loss": 2.396, "step": 18180 }, { "epoch": 122.08080808080808, "grad_norm": 179.32363891601562, "learning_rate": 6.620363964950337e-07, "loss": 2.1947, "step": 18190 }, { "epoch": 122.14814814814815, "grad_norm": 180.36607360839844, "learning_rate": 6.610437241701081e-07, "loss": 2.3547, "step": 18200 }, { "epoch": 122.14814814814815, "eval_loss": 0.34125375747680664, "eval_mae": 0.41045892238616943, "eval_mse": 0.34125375747680664, "eval_r2": 0.2535665035247803, "eval_rmse": 0.5841692883717926, "eval_runtime": 10.6997, "eval_samples_per_second": 443.843, "eval_steps_per_second": 13.926, "step": 18200 }, { "epoch": 122.21548821548822, "grad_norm": 100.79100036621094, "learning_rate": 6.600514290645634e-07, "loss": 2.4068, "step": 18210 }, { "epoch": 122.28282828282828, "grad_norm": 171.20217895507812, "learning_rate": 6.590595122827096e-07, "loss": 2.8483, "step": 18220 }, { "epoch": 122.35016835016835, "grad_norm": 193.1638641357422, "learning_rate": 6.580679749284362e-07, "loss": 2.3405, "step": 18230 }, { "epoch": 122.41750841750842, "grad_norm": 185.27420043945312, "learning_rate": 6.570768181052102e-07, "loss": 2.5242, "step": 18240 }, { "epoch": 122.48484848484848, "grad_norm": 87.59162139892578, "learning_rate": 6.560860429160757e-07, "loss": 2.7969, "step": 18250 }, { "epoch": 122.55218855218855, "grad_norm": 176.76722717285156, "learning_rate": 6.550956504636511e-07, "loss": 2.463, "step": 18260 }, { "epoch": 122.61952861952862, "grad_norm": 122.32568359375, "learning_rate": 6.541056418501295e-07, "loss": 2.3473, "step": 18270 }, { "epoch": 122.68686868686869, "grad_norm": 114.18183898925781, "learning_rate": 6.531160181772768e-07, "loss": 2.333, "step": 18280 }, { "epoch": 122.75420875420875, "grad_norm": 124.22421264648438, "learning_rate": 6.521267805464305e-07, "loss": 2.7991, "step": 18290 }, { "epoch": 122.82154882154882, "grad_norm": 122.70832824707031, "learning_rate": 6.511379300584983e-07, "loss": 2.3265, "step": 18300 }, { "epoch": 122.82154882154882, "eval_loss": 0.33957546949386597, "eval_mae": 0.4083608388900757, "eval_mse": 0.33957546949386597, "eval_r2": 0.25723743438720703, "eval_rmse": 0.5827310438734716, "eval_runtime": 10.6973, "eval_samples_per_second": 443.946, "eval_steps_per_second": 13.929, "step": 18300 }, { "epoch": 122.88888888888889, "grad_norm": 66.57396697998047, "learning_rate": 6.50149467813957e-07, "loss": 2.8003, "step": 18310 }, { "epoch": 122.95622895622895, "grad_norm": 77.75614166259766, "learning_rate": 6.491613949128516e-07, "loss": 2.2412, "step": 18320 }, { "epoch": 123.02020202020202, "grad_norm": 120.21575927734375, "learning_rate": 6.48173712454794e-07, "loss": 2.235, "step": 18330 }, { "epoch": 123.08754208754209, "grad_norm": 103.39643859863281, "learning_rate": 6.471864215389607e-07, "loss": 2.3596, "step": 18340 }, { "epoch": 123.15488215488216, "grad_norm": 149.22314453125, "learning_rate": 6.461995232640934e-07, "loss": 2.422, "step": 18350 }, { "epoch": 123.22222222222223, "grad_norm": 66.37051391601562, "learning_rate": 6.452130187284963e-07, "loss": 2.2414, "step": 18360 }, { "epoch": 123.28956228956228, "grad_norm": 70.93807220458984, "learning_rate": 6.44226909030036e-07, "loss": 2.8509, "step": 18370 }, { "epoch": 123.35690235690235, "grad_norm": 84.46368408203125, "learning_rate": 6.432411952661385e-07, "loss": 2.4819, "step": 18380 }, { "epoch": 123.42424242424242, "grad_norm": 61.009056091308594, "learning_rate": 6.422558785337906e-07, "loss": 2.6021, "step": 18390 }, { "epoch": 123.49158249158249, "grad_norm": 205.14279174804688, "learning_rate": 6.412709599295361e-07, "loss": 2.5192, "step": 18400 }, { "epoch": 123.49158249158249, "eval_loss": 0.3414130210876465, "eval_mae": 0.40915241837501526, "eval_mse": 0.3414129912853241, "eval_r2": 0.2532181739807129, "eval_rmse": 0.5843055632845918, "eval_runtime": 10.7018, "eval_samples_per_second": 443.759, "eval_steps_per_second": 13.923, "step": 18400 }, { "epoch": 123.55892255892256, "grad_norm": 98.07953643798828, "learning_rate": 6.402864405494764e-07, "loss": 2.5663, "step": 18410 }, { "epoch": 123.62626262626263, "grad_norm": 44.94609832763672, "learning_rate": 6.393023214892684e-07, "loss": 2.5428, "step": 18420 }, { "epoch": 123.6936026936027, "grad_norm": 175.45606994628906, "learning_rate": 6.383186038441238e-07, "loss": 2.1909, "step": 18430 }, { "epoch": 123.76094276094275, "grad_norm": 259.58056640625, "learning_rate": 6.37335288708807e-07, "loss": 2.4976, "step": 18440 }, { "epoch": 123.82828282828282, "grad_norm": 83.59278106689453, "learning_rate": 6.363523771776347e-07, "loss": 2.1013, "step": 18450 }, { "epoch": 123.89562289562289, "grad_norm": 158.1268768310547, "learning_rate": 6.353698703444745e-07, "loss": 2.7263, "step": 18460 }, { "epoch": 123.96296296296296, "grad_norm": 129.13595581054688, "learning_rate": 6.34387769302744e-07, "loss": 2.5342, "step": 18470 }, { "epoch": 124.02693602693603, "grad_norm": 58.30040740966797, "learning_rate": 6.334060751454082e-07, "loss": 2.4694, "step": 18480 }, { "epoch": 124.0942760942761, "grad_norm": 293.0711975097656, "learning_rate": 6.324247889649802e-07, "loss": 2.3749, "step": 18490 }, { "epoch": 124.16161616161617, "grad_norm": 158.9259490966797, "learning_rate": 6.314439118535186e-07, "loss": 2.4869, "step": 18500 }, { "epoch": 124.16161616161617, "eval_loss": 0.34074458479881287, "eval_mae": 0.41225555539131165, "eval_mse": 0.34074458479881287, "eval_r2": 0.25468021631240845, "eval_rmse": 0.5837333165057592, "eval_runtime": 10.7005, "eval_samples_per_second": 443.81, "eval_steps_per_second": 13.925, "step": 18500 }, { "epoch": 124.22895622895624, "grad_norm": 120.65213775634766, "learning_rate": 6.304634449026271e-07, "loss": 2.3398, "step": 18510 }, { "epoch": 124.29629629629629, "grad_norm": 202.04953002929688, "learning_rate": 6.294833892034526e-07, "loss": 2.2393, "step": 18520 }, { "epoch": 124.36363636363636, "grad_norm": 178.11135864257812, "learning_rate": 6.285037458466844e-07, "loss": 2.834, "step": 18530 }, { "epoch": 124.43097643097643, "grad_norm": 61.34870147705078, "learning_rate": 6.27524515922553e-07, "loss": 2.3648, "step": 18540 }, { "epoch": 124.4983164983165, "grad_norm": 137.79217529296875, "learning_rate": 6.265457005208292e-07, "loss": 2.3152, "step": 18550 }, { "epoch": 124.56565656565657, "grad_norm": 183.19271850585938, "learning_rate": 6.255673007308213e-07, "loss": 2.579, "step": 18560 }, { "epoch": 124.63299663299664, "grad_norm": 169.13299560546875, "learning_rate": 6.245893176413761e-07, "loss": 2.5923, "step": 18570 }, { "epoch": 124.7003367003367, "grad_norm": 74.70870208740234, "learning_rate": 6.236117523408766e-07, "loss": 2.422, "step": 18580 }, { "epoch": 124.76767676767676, "grad_norm": 113.40438842773438, "learning_rate": 6.226346059172407e-07, "loss": 2.2861, "step": 18590 }, { "epoch": 124.83501683501683, "grad_norm": 153.45887756347656, "learning_rate": 6.216578794579197e-07, "loss": 2.6772, "step": 18600 }, { "epoch": 124.83501683501683, "eval_loss": 0.3407113254070282, "eval_mae": 0.4112054109573364, "eval_mse": 0.3407113552093506, "eval_r2": 0.2547529339790344, "eval_rmse": 0.5837048528231974, "eval_runtime": 10.7171, "eval_samples_per_second": 443.122, "eval_steps_per_second": 13.903, "step": 18600 }, { "epoch": 124.9023569023569, "grad_norm": 148.8669891357422, "learning_rate": 6.206815740498981e-07, "loss": 2.378, "step": 18610 }, { "epoch": 124.96969696969697, "grad_norm": 111.92684936523438, "learning_rate": 6.197056907796915e-07, "loss": 2.5787, "step": 18620 }, { "epoch": 125.03367003367003, "grad_norm": 147.95419311523438, "learning_rate": 6.187302307333462e-07, "loss": 2.0542, "step": 18630 }, { "epoch": 125.1010101010101, "grad_norm": 86.5404281616211, "learning_rate": 6.177551949964366e-07, "loss": 2.6394, "step": 18640 }, { "epoch": 125.16835016835017, "grad_norm": 230.77357482910156, "learning_rate": 6.16780584654066e-07, "loss": 2.5403, "step": 18650 }, { "epoch": 125.23569023569024, "grad_norm": 138.40049743652344, "learning_rate": 6.158064007908631e-07, "loss": 2.6869, "step": 18660 }, { "epoch": 125.3030303030303, "grad_norm": 211.59716796875, "learning_rate": 6.148326444909828e-07, "loss": 2.8009, "step": 18670 }, { "epoch": 125.37037037037037, "grad_norm": 92.1100845336914, "learning_rate": 6.139566302834021e-07, "loss": 2.7165, "step": 18680 }, { "epoch": 125.43771043771044, "grad_norm": 155.84068298339844, "learning_rate": 6.130809640695699e-07, "loss": 2.425, "step": 18690 }, { "epoch": 125.5050505050505, "grad_norm": 67.18455505371094, "learning_rate": 6.121084107106413e-07, "loss": 2.6477, "step": 18700 }, { "epoch": 125.5050505050505, "eval_loss": 0.34027594327926636, "eval_mae": 0.41045081615448, "eval_mse": 0.34027594327926636, "eval_r2": 0.2557052969932556, "eval_rmse": 0.5833317609039185, "eval_runtime": 10.7058, "eval_samples_per_second": 443.591, "eval_steps_per_second": 13.918, "step": 18700 }, { "epoch": 125.57239057239057, "grad_norm": 263.66552734375, "learning_rate": 6.111362890304731e-07, "loss": 2.4528, "step": 18710 }, { "epoch": 125.63973063973064, "grad_norm": 131.6116485595703, "learning_rate": 6.101646001109253e-07, "loss": 2.284, "step": 18720 }, { "epoch": 125.70707070707071, "grad_norm": 93.6308822631836, "learning_rate": 6.091933450333759e-07, "loss": 2.0575, "step": 18730 }, { "epoch": 125.77441077441077, "grad_norm": 151.5377655029297, "learning_rate": 6.082225248787203e-07, "loss": 2.3189, "step": 18740 }, { "epoch": 125.84175084175084, "grad_norm": 101.92347717285156, "learning_rate": 6.072521407273697e-07, "loss": 2.597, "step": 18750 }, { "epoch": 125.9090909090909, "grad_norm": 70.58329772949219, "learning_rate": 6.062821936592499e-07, "loss": 2.234, "step": 18760 }, { "epoch": 125.97643097643098, "grad_norm": 145.2720947265625, "learning_rate": 6.053126847538008e-07, "loss": 2.4926, "step": 18770 }, { "epoch": 126.04040404040404, "grad_norm": 175.3448028564453, "learning_rate": 6.043436150899746e-07, "loss": 2.6406, "step": 18780 }, { "epoch": 126.10774410774411, "grad_norm": 287.54205322265625, "learning_rate": 6.033749857462338e-07, "loss": 2.2481, "step": 18790 }, { "epoch": 126.17508417508418, "grad_norm": 108.03854370117188, "learning_rate": 6.024067978005522e-07, "loss": 2.4887, "step": 18800 }, { "epoch": 126.17508417508418, "eval_loss": 0.3385668098926544, "eval_mae": 0.40949857234954834, "eval_mse": 0.3385668098926544, "eval_r2": 0.2594437599182129, "eval_rmse": 0.5818649412816126, "eval_runtime": 10.7173, "eval_samples_per_second": 443.113, "eval_steps_per_second": 13.903, "step": 18800 }, { "epoch": 126.24242424242425, "grad_norm": 57.91043472290039, "learning_rate": 6.014390523304111e-07, "loss": 2.2679, "step": 18810 }, { "epoch": 126.3097643097643, "grad_norm": 126.66097259521484, "learning_rate": 6.004717504128009e-07, "loss": 2.3625, "step": 18820 }, { "epoch": 126.37710437710437, "grad_norm": 185.7273712158203, "learning_rate": 5.995048931242164e-07, "loss": 2.2941, "step": 18830 }, { "epoch": 126.44444444444444, "grad_norm": 160.1407928466797, "learning_rate": 5.985384815406593e-07, "loss": 2.6141, "step": 18840 }, { "epoch": 126.51178451178451, "grad_norm": 150.23379516601562, "learning_rate": 5.975725167376345e-07, "loss": 2.3629, "step": 18850 }, { "epoch": 126.57912457912458, "grad_norm": 83.02543640136719, "learning_rate": 5.966069997901499e-07, "loss": 2.508, "step": 18860 }, { "epoch": 126.64646464646465, "grad_norm": 228.90972900390625, "learning_rate": 5.956419317727146e-07, "loss": 2.5523, "step": 18870 }, { "epoch": 126.71380471380472, "grad_norm": 59.839351654052734, "learning_rate": 5.946773137593386e-07, "loss": 2.3771, "step": 18880 }, { "epoch": 126.78114478114477, "grad_norm": 172.8157196044922, "learning_rate": 5.937131468235309e-07, "loss": 2.4562, "step": 18890 }, { "epoch": 126.84848484848484, "grad_norm": 194.1623077392578, "learning_rate": 5.927494320382986e-07, "loss": 2.6619, "step": 18900 }, { "epoch": 126.84848484848484, "eval_loss": 0.3399529457092285, "eval_mae": 0.41083279252052307, "eval_mse": 0.3399529457092285, "eval_r2": 0.2564117908477783, "eval_rmse": 0.5830548393669575, "eval_runtime": 10.6973, "eval_samples_per_second": 443.944, "eval_steps_per_second": 13.929, "step": 18900 }, { "epoch": 126.91582491582491, "grad_norm": 169.66827392578125, "learning_rate": 5.917861704761453e-07, "loss": 2.3858, "step": 18910 }, { "epoch": 126.98316498316498, "grad_norm": 133.8475341796875, "learning_rate": 5.908233632090705e-07, "loss": 2.4691, "step": 18920 }, { "epoch": 127.04713804713805, "grad_norm": 50.970306396484375, "learning_rate": 5.898610113085683e-07, "loss": 2.3021, "step": 18930 }, { "epoch": 127.11447811447812, "grad_norm": 110.2767105102539, "learning_rate": 5.88899115845625e-07, "loss": 2.703, "step": 18940 }, { "epoch": 127.18181818181819, "grad_norm": 217.91807556152344, "learning_rate": 5.879376778907204e-07, "loss": 2.4911, "step": 18950 }, { "epoch": 127.24915824915826, "grad_norm": 148.48654174804688, "learning_rate": 5.869766985138243e-07, "loss": 2.5766, "step": 18960 }, { "epoch": 127.31649831649831, "grad_norm": 80.49567413330078, "learning_rate": 5.86016178784396e-07, "loss": 2.3417, "step": 18970 }, { "epoch": 127.38383838383838, "grad_norm": 359.05731201171875, "learning_rate": 5.850561197713839e-07, "loss": 2.7513, "step": 18980 }, { "epoch": 127.45117845117845, "grad_norm": 51.1444091796875, "learning_rate": 5.840965225432233e-07, "loss": 2.1265, "step": 18990 }, { "epoch": 127.51851851851852, "grad_norm": 89.13682556152344, "learning_rate": 5.831373881678356e-07, "loss": 2.134, "step": 19000 }, { "epoch": 127.51851851851852, "eval_loss": 0.3392162621021271, "eval_mae": 0.408902645111084, "eval_mse": 0.3392162322998047, "eval_r2": 0.2580232620239258, "eval_rmse": 0.582422726462322, "eval_runtime": 10.7316, "eval_samples_per_second": 442.525, "eval_steps_per_second": 13.884, "step": 19000 }, { "epoch": 127.58585858585859, "grad_norm": 178.29396057128906, "learning_rate": 5.821787177126273e-07, "loss": 2.2375, "step": 19010 }, { "epoch": 127.65319865319866, "grad_norm": 49.074134826660156, "learning_rate": 5.812205122444885e-07, "loss": 2.551, "step": 19020 }, { "epoch": 127.72053872053873, "grad_norm": 239.50682067871094, "learning_rate": 5.802627728297909e-07, "loss": 2.658, "step": 19030 }, { "epoch": 127.78787878787878, "grad_norm": 127.5837173461914, "learning_rate": 5.793055005343894e-07, "loss": 2.5251, "step": 19040 }, { "epoch": 127.85521885521885, "grad_norm": 169.7710723876953, "learning_rate": 5.783486964236175e-07, "loss": 2.3988, "step": 19050 }, { "epoch": 127.92255892255892, "grad_norm": 76.81865692138672, "learning_rate": 5.773923615622886e-07, "loss": 2.317, "step": 19060 }, { "epoch": 127.98989898989899, "grad_norm": 153.9305419921875, "learning_rate": 5.764364970146934e-07, "loss": 2.5391, "step": 19070 }, { "epoch": 128.05387205387206, "grad_norm": 308.2644348144531, "learning_rate": 5.754811038445988e-07, "loss": 2.0194, "step": 19080 }, { "epoch": 128.12121212121212, "grad_norm": 38.86275100708008, "learning_rate": 5.745261831152483e-07, "loss": 2.6394, "step": 19090 }, { "epoch": 128.1885521885522, "grad_norm": 94.16796112060547, "learning_rate": 5.735717358893588e-07, "loss": 2.5488, "step": 19100 }, { "epoch": 128.1885521885522, "eval_loss": 0.33745676279067993, "eval_mae": 0.40733233094215393, "eval_mse": 0.33745676279067993, "eval_r2": 0.2618717551231384, "eval_rmse": 0.5809102880743979, "eval_runtime": 10.7041, "eval_samples_per_second": 443.662, "eval_steps_per_second": 13.92, "step": 19100 }, { "epoch": 128.25589225589226, "grad_norm": 149.78834533691406, "learning_rate": 5.726177632291198e-07, "loss": 2.4912, "step": 19110 }, { "epoch": 128.32323232323233, "grad_norm": 45.641178131103516, "learning_rate": 5.716642661961939e-07, "loss": 2.2305, "step": 19120 }, { "epoch": 128.3905723905724, "grad_norm": 271.06134033203125, "learning_rate": 5.70711245851713e-07, "loss": 2.278, "step": 19130 }, { "epoch": 128.45791245791247, "grad_norm": 134.86410522460938, "learning_rate": 5.697587032562801e-07, "loss": 2.5715, "step": 19140 }, { "epoch": 128.5252525252525, "grad_norm": 276.39764404296875, "learning_rate": 5.688066394699653e-07, "loss": 2.9564, "step": 19150 }, { "epoch": 128.59259259259258, "grad_norm": 41.84046936035156, "learning_rate": 5.678550555523056e-07, "loss": 2.5678, "step": 19160 }, { "epoch": 128.65993265993265, "grad_norm": 178.36634826660156, "learning_rate": 5.669039525623055e-07, "loss": 2.6686, "step": 19170 }, { "epoch": 128.72727272727272, "grad_norm": 191.73280334472656, "learning_rate": 5.659533315584328e-07, "loss": 2.3798, "step": 19180 }, { "epoch": 128.7946127946128, "grad_norm": 264.70941162109375, "learning_rate": 5.650031935986191e-07, "loss": 2.516, "step": 19190 }, { "epoch": 128.86195286195286, "grad_norm": 89.36589813232422, "learning_rate": 5.640535397402598e-07, "loss": 2.0688, "step": 19200 }, { "epoch": 128.86195286195286, "eval_loss": 0.3394608199596405, "eval_mae": 0.4099453091621399, "eval_mse": 0.3394608199596405, "eval_r2": 0.2574882507324219, "eval_rmse": 0.5826326629701088, "eval_runtime": 10.7135, "eval_samples_per_second": 443.273, "eval_steps_per_second": 13.908, "step": 19200 }, { "epoch": 128.92929292929293, "grad_norm": 199.31944274902344, "learning_rate": 5.631043710402092e-07, "loss": 2.2796, "step": 19210 }, { "epoch": 128.996632996633, "grad_norm": 77.33735656738281, "learning_rate": 5.621556885547839e-07, "loss": 2.4766, "step": 19220 }, { "epoch": 129.06060606060606, "grad_norm": 78.564697265625, "learning_rate": 5.612074933397579e-07, "loss": 2.6139, "step": 19230 }, { "epoch": 129.12794612794613, "grad_norm": 148.68234252929688, "learning_rate": 5.602597864503641e-07, "loss": 2.2742, "step": 19240 }, { "epoch": 129.1952861952862, "grad_norm": 251.53280639648438, "learning_rate": 5.59312568941291e-07, "loss": 2.5208, "step": 19250 }, { "epoch": 129.26262626262627, "grad_norm": 68.761962890625, "learning_rate": 5.583658418666825e-07, "loss": 2.5235, "step": 19260 }, { "epoch": 129.32996632996634, "grad_norm": 187.14352416992188, "learning_rate": 5.57419606280138e-07, "loss": 2.6347, "step": 19270 }, { "epoch": 129.3973063973064, "grad_norm": 169.13595581054688, "learning_rate": 5.564738632347082e-07, "loss": 2.4648, "step": 19280 }, { "epoch": 129.46464646464648, "grad_norm": 94.6471939086914, "learning_rate": 5.555286137828963e-07, "loss": 2.3082, "step": 19290 }, { "epoch": 129.53198653198652, "grad_norm": 94.82772064208984, "learning_rate": 5.545838589766569e-07, "loss": 2.2531, "step": 19300 }, { "epoch": 129.53198653198652, "eval_loss": 0.3409069776535034, "eval_mae": 0.409853458404541, "eval_mse": 0.3409069776535034, "eval_r2": 0.2543249726295471, "eval_rmse": 0.5838723984343698, "eval_runtime": 10.7104, "eval_samples_per_second": 443.401, "eval_steps_per_second": 13.912, "step": 19300 }, { "epoch": 129.5993265993266, "grad_norm": 68.34391021728516, "learning_rate": 5.536395998673928e-07, "loss": 2.2018, "step": 19310 }, { "epoch": 129.66666666666666, "grad_norm": 115.52449035644531, "learning_rate": 5.526958375059569e-07, "loss": 2.1798, "step": 19320 }, { "epoch": 129.73400673400673, "grad_norm": 125.80599212646484, "learning_rate": 5.517525729426475e-07, "loss": 2.5703, "step": 19330 }, { "epoch": 129.8013468013468, "grad_norm": 82.4094467163086, "learning_rate": 5.508098072272094e-07, "loss": 2.8586, "step": 19340 }, { "epoch": 129.86868686868686, "grad_norm": 128.1349334716797, "learning_rate": 5.498675414088332e-07, "loss": 2.4012, "step": 19350 }, { "epoch": 129.93602693602693, "grad_norm": 149.2617950439453, "learning_rate": 5.489257765361523e-07, "loss": 2.3145, "step": 19360 }, { "epoch": 130.0, "grad_norm": 203.18569946289062, "learning_rate": 5.479845136572422e-07, "loss": 2.2551, "step": 19370 }, { "epoch": 130.06734006734007, "grad_norm": 247.35008239746094, "learning_rate": 5.470437538196211e-07, "loss": 2.2791, "step": 19380 }, { "epoch": 130.13468013468014, "grad_norm": 233.3085174560547, "learning_rate": 5.461034980702459e-07, "loss": 2.4658, "step": 19390 }, { "epoch": 130.2020202020202, "grad_norm": 108.15304565429688, "learning_rate": 5.451637474555138e-07, "loss": 2.5375, "step": 19400 }, { "epoch": 130.2020202020202, "eval_loss": 0.33816981315612793, "eval_mae": 0.40896326303482056, "eval_mse": 0.33816981315612793, "eval_r2": 0.2603120803833008, "eval_rmse": 0.5815236995653126, "eval_runtime": 10.6935, "eval_samples_per_second": 444.1, "eval_steps_per_second": 13.934, "step": 19400 }, { "epoch": 130.26936026936028, "grad_norm": 370.2430725097656, "learning_rate": 5.44224503021259e-07, "loss": 2.6974, "step": 19410 }, { "epoch": 130.33670033670035, "grad_norm": 92.01687622070312, "learning_rate": 5.432857658127521e-07, "loss": 2.3673, "step": 19420 }, { "epoch": 130.40404040404042, "grad_norm": 73.56895446777344, "learning_rate": 5.423475368747007e-07, "loss": 2.746, "step": 19430 }, { "epoch": 130.47138047138048, "grad_norm": 222.0007781982422, "learning_rate": 5.414098172512452e-07, "loss": 2.4997, "step": 19440 }, { "epoch": 130.53872053872053, "grad_norm": 257.75201416015625, "learning_rate": 5.404726079859594e-07, "loss": 2.3817, "step": 19450 }, { "epoch": 130.6060606060606, "grad_norm": 197.68077087402344, "learning_rate": 5.395359101218504e-07, "loss": 2.2353, "step": 19460 }, { "epoch": 130.67340067340066, "grad_norm": 59.10215377807617, "learning_rate": 5.385997247013545e-07, "loss": 2.3197, "step": 19470 }, { "epoch": 130.74074074074073, "grad_norm": 190.34251403808594, "learning_rate": 5.376640527663395e-07, "loss": 2.5805, "step": 19480 }, { "epoch": 130.8080808080808, "grad_norm": 85.87240600585938, "learning_rate": 5.367288953580998e-07, "loss": 2.4466, "step": 19490 }, { "epoch": 130.87542087542087, "grad_norm": 72.09017181396484, "learning_rate": 5.357942535173577e-07, "loss": 2.5525, "step": 19500 }, { "epoch": 130.87542087542087, "eval_loss": 0.3483783006668091, "eval_mae": 0.4185122549533844, "eval_mse": 0.3483782708644867, "eval_r2": 0.2379828691482544, "eval_rmse": 0.590235775656209, "eval_runtime": 10.7212, "eval_samples_per_second": 442.956, "eval_steps_per_second": 13.898, "step": 19500 }, { "epoch": 130.94276094276094, "grad_norm": 46.29831314086914, "learning_rate": 5.348601282842633e-07, "loss": 2.5543, "step": 19510 }, { "epoch": 131.006734006734, "grad_norm": 157.1110076904297, "learning_rate": 5.339265206983898e-07, "loss": 1.9044, "step": 19520 }, { "epoch": 131.07407407407408, "grad_norm": 184.81881713867188, "learning_rate": 5.329934317987359e-07, "loss": 2.6204, "step": 19530 }, { "epoch": 131.14141414141415, "grad_norm": 180.841796875, "learning_rate": 5.32060862623722e-07, "loss": 2.8332, "step": 19540 }, { "epoch": 131.20875420875421, "grad_norm": 94.30369567871094, "learning_rate": 5.311288142111898e-07, "loss": 2.2208, "step": 19550 }, { "epoch": 131.27609427609428, "grad_norm": 46.852027893066406, "learning_rate": 5.30197287598403e-07, "loss": 2.504, "step": 19560 }, { "epoch": 131.34343434343435, "grad_norm": 103.9162826538086, "learning_rate": 5.292662838220432e-07, "loss": 2.4911, "step": 19570 }, { "epoch": 131.41077441077442, "grad_norm": 92.6771240234375, "learning_rate": 5.283358039182104e-07, "loss": 2.204, "step": 19580 }, { "epoch": 131.4781144781145, "grad_norm": 94.1965560913086, "learning_rate": 5.274058489224223e-07, "loss": 2.6987, "step": 19590 }, { "epoch": 131.54545454545453, "grad_norm": 52.25348663330078, "learning_rate": 5.264764198696114e-07, "loss": 2.6074, "step": 19600 }, { "epoch": 131.54545454545453, "eval_loss": 0.3377807140350342, "eval_mae": 0.4077964425086975, "eval_mse": 0.33778077363967896, "eval_r2": 0.26116305589675903, "eval_rmse": 0.5811891031666707, "eval_runtime": 10.7076, "eval_samples_per_second": 443.515, "eval_steps_per_second": 13.915, "step": 19600 }, { "epoch": 131.6127946127946, "grad_norm": 57.111541748046875, "learning_rate": 5.255475177941261e-07, "loss": 1.9364, "step": 19610 }, { "epoch": 131.68013468013467, "grad_norm": 186.37286376953125, "learning_rate": 5.246191437297273e-07, "loss": 2.3926, "step": 19620 }, { "epoch": 131.74747474747474, "grad_norm": 136.69671630859375, "learning_rate": 5.236912987095881e-07, "loss": 2.352, "step": 19630 }, { "epoch": 131.8148148148148, "grad_norm": 195.39901733398438, "learning_rate": 5.227639837662945e-07, "loss": 2.0274, "step": 19640 }, { "epoch": 131.88215488215488, "grad_norm": 118.90431213378906, "learning_rate": 5.218371999318406e-07, "loss": 2.5668, "step": 19650 }, { "epoch": 131.94949494949495, "grad_norm": 224.88427734375, "learning_rate": 5.209109482376304e-07, "loss": 2.7746, "step": 19660 }, { "epoch": 132.013468013468, "grad_norm": 37.594600677490234, "learning_rate": 5.199852297144759e-07, "loss": 2.1877, "step": 19670 }, { "epoch": 132.08080808080808, "grad_norm": 148.7754364013672, "learning_rate": 5.19060045392595e-07, "loss": 2.348, "step": 19680 }, { "epoch": 132.14814814814815, "grad_norm": 103.95060729980469, "learning_rate": 5.181353963016122e-07, "loss": 2.5496, "step": 19690 }, { "epoch": 132.21548821548822, "grad_norm": 206.6446075439453, "learning_rate": 5.172112834705551e-07, "loss": 2.5354, "step": 19700 }, { "epoch": 132.21548821548822, "eval_loss": 0.33760300278663635, "eval_mae": 0.40707927942276, "eval_mse": 0.33760303258895874, "eval_r2": 0.26155179738998413, "eval_rmse": 0.5810361714979186, "eval_runtime": 10.7277, "eval_samples_per_second": 442.687, "eval_steps_per_second": 13.889, "step": 19700 }, { "epoch": 132.2828282828283, "grad_norm": 88.5899658203125, "learning_rate": 5.162877079278549e-07, "loss": 2.45, "step": 19710 }, { "epoch": 132.35016835016836, "grad_norm": 254.0193328857422, "learning_rate": 5.15364670701346e-07, "loss": 2.6709, "step": 19720 }, { "epoch": 132.41750841750843, "grad_norm": 132.24334716796875, "learning_rate": 5.144421728182619e-07, "loss": 2.2609, "step": 19730 }, { "epoch": 132.4848484848485, "grad_norm": 126.99674987792969, "learning_rate": 5.135202153052366e-07, "loss": 2.432, "step": 19740 }, { "epoch": 132.55218855218854, "grad_norm": 93.11771392822266, "learning_rate": 5.125987991883041e-07, "loss": 2.5412, "step": 19750 }, { "epoch": 132.6195286195286, "grad_norm": 182.7234649658203, "learning_rate": 5.116779254928928e-07, "loss": 2.6278, "step": 19760 }, { "epoch": 132.68686868686868, "grad_norm": 111.80548858642578, "learning_rate": 5.107575952438308e-07, "loss": 2.2744, "step": 19770 }, { "epoch": 132.75420875420875, "grad_norm": 289.3251037597656, "learning_rate": 5.098378094653388e-07, "loss": 2.32, "step": 19780 }, { "epoch": 132.82154882154882, "grad_norm": 226.1051788330078, "learning_rate": 5.089185691810337e-07, "loss": 2.556, "step": 19790 }, { "epoch": 132.88888888888889, "grad_norm": 198.9059600830078, "learning_rate": 5.079998754139238e-07, "loss": 2.0423, "step": 19800 }, { "epoch": 132.88888888888889, "eval_loss": 0.33710241317749023, "eval_mae": 0.40680593252182007, "eval_mse": 0.33710238337516785, "eval_r2": 0.2626469135284424, "eval_rmse": 0.5806051871755606, "eval_runtime": 10.7323, "eval_samples_per_second": 442.498, "eval_steps_per_second": 13.883, "step": 19800 }, { "epoch": 132.95622895622895, "grad_norm": 245.1586151123047, "learning_rate": 5.070817291864094e-07, "loss": 2.4868, "step": 19810 }, { "epoch": 133.02020202020202, "grad_norm": 173.21839904785156, "learning_rate": 5.061641315202823e-07, "loss": 2.3354, "step": 19820 }, { "epoch": 133.0875420875421, "grad_norm": 95.0740966796875, "learning_rate": 5.052470834367228e-07, "loss": 2.3105, "step": 19830 }, { "epoch": 133.15488215488216, "grad_norm": 46.88821029663086, "learning_rate": 5.043305859563001e-07, "loss": 2.1591, "step": 19840 }, { "epoch": 133.22222222222223, "grad_norm": 191.2957305908203, "learning_rate": 5.034146400989709e-07, "loss": 2.4006, "step": 19850 }, { "epoch": 133.2895622895623, "grad_norm": 108.5725326538086, "learning_rate": 5.024992468840771e-07, "loss": 2.3427, "step": 19860 }, { "epoch": 133.35690235690237, "grad_norm": 151.00157165527344, "learning_rate": 5.015844073303468e-07, "loss": 2.5166, "step": 19870 }, { "epoch": 133.42424242424244, "grad_norm": 86.26664733886719, "learning_rate": 5.00670122455891e-07, "loss": 2.5864, "step": 19880 }, { "epoch": 133.4915824915825, "grad_norm": 54.84898376464844, "learning_rate": 4.997563932782034e-07, "loss": 2.5246, "step": 19890 }, { "epoch": 133.55892255892255, "grad_norm": 224.44371032714844, "learning_rate": 4.988432208141601e-07, "loss": 2.5151, "step": 19900 }, { "epoch": 133.55892255892255, "eval_loss": 0.3377993106842041, "eval_mae": 0.4091974198818207, "eval_mse": 0.3377993106842041, "eval_r2": 0.26112252473831177, "eval_rmse": 0.5812050504634351, "eval_runtime": 10.7192, "eval_samples_per_second": 443.035, "eval_steps_per_second": 13.9, "step": 19900 }, { "epoch": 133.62626262626262, "grad_norm": 67.84085083007812, "learning_rate": 4.979306060800168e-07, "loss": 2.3479, "step": 19910 }, { "epoch": 133.69360269360268, "grad_norm": 134.08251953125, "learning_rate": 4.970185500914085e-07, "loss": 2.9086, "step": 19920 }, { "epoch": 133.76094276094275, "grad_norm": 90.00189208984375, "learning_rate": 4.961070538633495e-07, "loss": 2.129, "step": 19930 }, { "epoch": 133.82828282828282, "grad_norm": 112.75199127197266, "learning_rate": 4.951961184102294e-07, "loss": 2.4996, "step": 19940 }, { "epoch": 133.8956228956229, "grad_norm": 137.74778747558594, "learning_rate": 4.942857447458156e-07, "loss": 2.4337, "step": 19950 }, { "epoch": 133.96296296296296, "grad_norm": 91.80570983886719, "learning_rate": 4.933759338832491e-07, "loss": 2.2821, "step": 19960 }, { "epoch": 134.02693602693603, "grad_norm": 46.88382339477539, "learning_rate": 4.924666868350442e-07, "loss": 2.3431, "step": 19970 }, { "epoch": 134.0942760942761, "grad_norm": 212.5193328857422, "learning_rate": 4.91558004613089e-07, "loss": 2.3077, "step": 19980 }, { "epoch": 134.16161616161617, "grad_norm": 201.4203338623047, "learning_rate": 4.906498882286424e-07, "loss": 2.4787, "step": 19990 }, { "epoch": 134.22895622895624, "grad_norm": 148.5880126953125, "learning_rate": 4.89742338692333e-07, "loss": 2.2472, "step": 20000 }, { "epoch": 134.22895622895624, "eval_loss": 0.3391974866390228, "eval_mae": 0.409416526556015, "eval_mse": 0.3391974866390228, "eval_r2": 0.2580642104148865, "eval_rmse": 0.5824066334091867, "eval_runtime": 10.7091, "eval_samples_per_second": 443.456, "eval_steps_per_second": 13.913, "step": 20000 }, { "epoch": 134.2962962962963, "grad_norm": 50.01713180541992, "learning_rate": 4.888353570141598e-07, "loss": 2.3627, "step": 20010 }, { "epoch": 134.36363636363637, "grad_norm": 204.45086669921875, "learning_rate": 4.879289442034886e-07, "loss": 2.5294, "step": 20020 }, { "epoch": 134.43097643097644, "grad_norm": 103.50146484375, "learning_rate": 4.870231012690537e-07, "loss": 2.4485, "step": 20030 }, { "epoch": 134.4983164983165, "grad_norm": 59.08714294433594, "learning_rate": 4.861178292189527e-07, "loss": 2.4869, "step": 20040 }, { "epoch": 134.56565656565655, "grad_norm": 124.55500793457031, "learning_rate": 4.852131290606506e-07, "loss": 2.2646, "step": 20050 }, { "epoch": 134.63299663299662, "grad_norm": 53.30123519897461, "learning_rate": 4.843090018009742e-07, "loss": 2.2374, "step": 20060 }, { "epoch": 134.7003367003367, "grad_norm": 100.72486877441406, "learning_rate": 4.834054484461127e-07, "loss": 2.627, "step": 20070 }, { "epoch": 134.76767676767676, "grad_norm": 139.13998413085938, "learning_rate": 4.825024700016183e-07, "loss": 2.6054, "step": 20080 }, { "epoch": 134.83501683501683, "grad_norm": 305.63427734375, "learning_rate": 4.816000674724016e-07, "loss": 2.5835, "step": 20090 }, { "epoch": 134.9023569023569, "grad_norm": 75.4280014038086, "learning_rate": 4.806982418627325e-07, "loss": 2.3956, "step": 20100 }, { "epoch": 134.9023569023569, "eval_loss": 0.3401067554950714, "eval_mae": 0.41070353984832764, "eval_mse": 0.3401067554950714, "eval_r2": 0.256075382232666, "eval_rmse": 0.5831867243817124, "eval_runtime": 10.7088, "eval_samples_per_second": 443.467, "eval_steps_per_second": 13.914, "step": 20100 }, { "epoch": 134.96969696969697, "grad_norm": 72.98235321044922, "learning_rate": 4.797969941762401e-07, "loss": 2.5965, "step": 20110 }, { "epoch": 135.03367003367003, "grad_norm": 185.98907470703125, "learning_rate": 4.788963254159086e-07, "loss": 2.1874, "step": 20120 }, { "epoch": 135.1010101010101, "grad_norm": 292.39825439453125, "learning_rate": 4.7799623658408e-07, "loss": 2.5502, "step": 20130 }, { "epoch": 135.16835016835017, "grad_norm": 213.7394256591797, "learning_rate": 4.770967286824488e-07, "loss": 2.1811, "step": 20140 }, { "epoch": 135.23569023569024, "grad_norm": 138.91946411132812, "learning_rate": 4.7619780271206387e-07, "loss": 2.1687, "step": 20150 }, { "epoch": 135.3030303030303, "grad_norm": 90.34305572509766, "learning_rate": 4.7529945967332716e-07, "loss": 2.4816, "step": 20160 }, { "epoch": 135.37037037037038, "grad_norm": 161.8311767578125, "learning_rate": 4.7440170056599095e-07, "loss": 2.2084, "step": 20170 }, { "epoch": 135.43771043771045, "grad_norm": 192.91224670410156, "learning_rate": 4.735045263891574e-07, "loss": 2.4319, "step": 20180 }, { "epoch": 135.5050505050505, "grad_norm": 155.5737762451172, "learning_rate": 4.726079381412792e-07, "loss": 2.6698, "step": 20190 }, { "epoch": 135.57239057239056, "grad_norm": 339.6526794433594, "learning_rate": 4.717119368201552e-07, "loss": 2.728, "step": 20200 }, { "epoch": 135.57239057239056, "eval_loss": 0.3427649140357971, "eval_mae": 0.4143936336040497, "eval_mse": 0.3427649140357971, "eval_r2": 0.250261127948761, "eval_rmse": 0.5854612831228014, "eval_runtime": 10.712, "eval_samples_per_second": 443.335, "eval_steps_per_second": 13.91, "step": 20200 }, { "epoch": 135.63973063973063, "grad_norm": 183.41680908203125, "learning_rate": 4.7081652342293267e-07, "loss": 2.3553, "step": 20210 }, { "epoch": 135.7070707070707, "grad_norm": 123.84310913085938, "learning_rate": 4.6992169894610333e-07, "loss": 2.6738, "step": 20220 }, { "epoch": 135.77441077441077, "grad_norm": 79.26025390625, "learning_rate": 4.6902746438550367e-07, "loss": 2.3495, "step": 20230 }, { "epoch": 135.84175084175084, "grad_norm": 120.804931640625, "learning_rate": 4.681338207363149e-07, "loss": 2.2956, "step": 20240 }, { "epoch": 135.9090909090909, "grad_norm": 148.001708984375, "learning_rate": 4.6724076899305907e-07, "loss": 2.7311, "step": 20250 }, { "epoch": 135.97643097643098, "grad_norm": 79.04490661621094, "learning_rate": 4.6634831014959994e-07, "loss": 2.3338, "step": 20260 }, { "epoch": 136.04040404040404, "grad_norm": 130.42579650878906, "learning_rate": 4.6545644519914227e-07, "loss": 2.0475, "step": 20270 }, { "epoch": 136.1077441077441, "grad_norm": 103.36287689208984, "learning_rate": 4.645651751342287e-07, "loss": 2.2996, "step": 20280 }, { "epoch": 136.17508417508418, "grad_norm": 125.76856994628906, "learning_rate": 4.636745009467408e-07, "loss": 2.6703, "step": 20290 }, { "epoch": 136.24242424242425, "grad_norm": 175.9672393798828, "learning_rate": 4.627844236278965e-07, "loss": 2.3132, "step": 20300 }, { "epoch": 136.24242424242425, "eval_loss": 0.33831679821014404, "eval_mae": 0.4092104434967041, "eval_mse": 0.33831679821014404, "eval_r2": 0.2599905729293823, "eval_rmse": 0.5816500650822142, "eval_runtime": 10.7176, "eval_samples_per_second": 443.102, "eval_steps_per_second": 13.902, "step": 20300 }, { "epoch": 136.30976430976432, "grad_norm": 221.51385498046875, "learning_rate": 4.618949441682495e-07, "loss": 2.2491, "step": 20310 }, { "epoch": 136.3771043771044, "grad_norm": 193.0191192626953, "learning_rate": 4.610060635576881e-07, "loss": 2.3587, "step": 20320 }, { "epoch": 136.44444444444446, "grad_norm": 248.11312866210938, "learning_rate": 4.60117782785434e-07, "loss": 2.3323, "step": 20330 }, { "epoch": 136.5117845117845, "grad_norm": 162.36756896972656, "learning_rate": 4.592301028400424e-07, "loss": 2.1304, "step": 20340 }, { "epoch": 136.57912457912457, "grad_norm": 246.32557678222656, "learning_rate": 4.5834302470939843e-07, "loss": 3.0759, "step": 20350 }, { "epoch": 136.64646464646464, "grad_norm": 251.08013916015625, "learning_rate": 4.574565493807181e-07, "loss": 2.2594, "step": 20360 }, { "epoch": 136.7138047138047, "grad_norm": 288.643798828125, "learning_rate": 4.5657067784054704e-07, "loss": 2.5317, "step": 20370 }, { "epoch": 136.78114478114477, "grad_norm": 161.12796020507812, "learning_rate": 4.5568541107475786e-07, "loss": 2.7961, "step": 20380 }, { "epoch": 136.84848484848484, "grad_norm": 67.37052917480469, "learning_rate": 4.548007500685515e-07, "loss": 2.3838, "step": 20390 }, { "epoch": 136.9158249158249, "grad_norm": 202.67230224609375, "learning_rate": 4.539166958064534e-07, "loss": 2.1169, "step": 20400 }, { "epoch": 136.9158249158249, "eval_loss": 0.3376920223236084, "eval_mae": 0.40792354941368103, "eval_mse": 0.3376920223236084, "eval_r2": 0.2613571882247925, "eval_rmse": 0.5811127449330366, "eval_runtime": 10.7087, "eval_samples_per_second": 443.472, "eval_steps_per_second": 13.914, "step": 20400 }, { "epoch": 136.98316498316498, "grad_norm": 126.19248962402344, "learning_rate": 4.530332492723143e-07, "loss": 2.6008, "step": 20410 }, { "epoch": 137.04713804713805, "grad_norm": 178.32933044433594, "learning_rate": 4.521504114493092e-07, "loss": 2.3154, "step": 20420 }, { "epoch": 137.11447811447812, "grad_norm": 195.27188110351562, "learning_rate": 4.512681833199348e-07, "loss": 2.4077, "step": 20430 }, { "epoch": 137.1818181818182, "grad_norm": 48.0440788269043, "learning_rate": 4.503865658660092e-07, "loss": 2.3612, "step": 20440 }, { "epoch": 137.24915824915826, "grad_norm": 87.79241943359375, "learning_rate": 4.4950556006867213e-07, "loss": 2.4774, "step": 20450 }, { "epoch": 137.31649831649833, "grad_norm": 120.2742691040039, "learning_rate": 4.4862516690838106e-07, "loss": 2.2817, "step": 20460 }, { "epoch": 137.3838383838384, "grad_norm": 53.103790283203125, "learning_rate": 4.477453873649131e-07, "loss": 2.5641, "step": 20470 }, { "epoch": 137.45117845117846, "grad_norm": 61.03883361816406, "learning_rate": 4.468662224173614e-07, "loss": 2.7663, "step": 20480 }, { "epoch": 137.5185185185185, "grad_norm": 115.4153823852539, "learning_rate": 4.459876730441351e-07, "loss": 2.3666, "step": 20490 }, { "epoch": 137.58585858585857, "grad_norm": 128.48422241210938, "learning_rate": 4.4510974022295956e-07, "loss": 2.3173, "step": 20500 }, { "epoch": 137.58585858585857, "eval_loss": 0.3376583456993103, "eval_mae": 0.4084000587463379, "eval_mse": 0.3376583456993103, "eval_r2": 0.26143085956573486, "eval_rmse": 0.5810837682290827, "eval_runtime": 10.7226, "eval_samples_per_second": 442.896, "eval_steps_per_second": 13.896, "step": 20500 }, { "epoch": 137.65319865319864, "grad_norm": 192.05206298828125, "learning_rate": 4.4423242493087254e-07, "loss": 2.2396, "step": 20510 }, { "epoch": 137.7205387205387, "grad_norm": 186.97952270507812, "learning_rate": 4.4335572814422497e-07, "loss": 2.3018, "step": 20520 }, { "epoch": 137.78787878787878, "grad_norm": 61.72767639160156, "learning_rate": 4.4247965083868033e-07, "loss": 2.3748, "step": 20530 }, { "epoch": 137.85521885521885, "grad_norm": 129.16342163085938, "learning_rate": 4.4160419398921103e-07, "loss": 2.5627, "step": 20540 }, { "epoch": 137.92255892255892, "grad_norm": 34.78518295288086, "learning_rate": 4.40729358570101e-07, "loss": 2.5673, "step": 20550 }, { "epoch": 137.989898989899, "grad_norm": 241.5722198486328, "learning_rate": 4.398551455549409e-07, "loss": 2.417, "step": 20560 }, { "epoch": 138.05387205387206, "grad_norm": 160.8332977294922, "learning_rate": 4.389815559166289e-07, "loss": 2.0637, "step": 20570 }, { "epoch": 138.12121212121212, "grad_norm": 231.73191833496094, "learning_rate": 4.381085906273713e-07, "loss": 2.2776, "step": 20580 }, { "epoch": 138.1885521885522, "grad_norm": 104.40795135498047, "learning_rate": 4.372362506586764e-07, "loss": 2.5613, "step": 20590 }, { "epoch": 138.25589225589226, "grad_norm": 75.10977935791016, "learning_rate": 4.363645369813597e-07, "loss": 2.417, "step": 20600 }, { "epoch": 138.25589225589226, "eval_loss": 0.3374638259410858, "eval_mae": 0.4083138108253479, "eval_mse": 0.3374637961387634, "eval_r2": 0.2618563771247864, "eval_rmse": 0.580916341772861, "eval_runtime": 10.7358, "eval_samples_per_second": 442.352, "eval_steps_per_second": 13.879, "step": 20600 }, { "epoch": 138.32323232323233, "grad_norm": 123.67415618896484, "learning_rate": 4.3549345056553765e-07, "loss": 2.8503, "step": 20610 }, { "epoch": 138.3905723905724, "grad_norm": 287.29736328125, "learning_rate": 4.3462299238062914e-07, "loss": 2.6119, "step": 20620 }, { "epoch": 138.45791245791247, "grad_norm": 117.45768737792969, "learning_rate": 4.337531633953548e-07, "loss": 2.3028, "step": 20630 }, { "epoch": 138.5252525252525, "grad_norm": 166.5128173828125, "learning_rate": 4.3288396457773404e-07, "loss": 2.4116, "step": 20640 }, { "epoch": 138.59259259259258, "grad_norm": 102.08402252197266, "learning_rate": 4.320153968950847e-07, "loss": 2.3371, "step": 20650 }, { "epoch": 138.65993265993265, "grad_norm": 62.02680587768555, "learning_rate": 4.311474613140237e-07, "loss": 2.4825, "step": 20660 }, { "epoch": 138.72727272727272, "grad_norm": 56.35841369628906, "learning_rate": 4.3028015880046276e-07, "loss": 2.565, "step": 20670 }, { "epoch": 138.7946127946128, "grad_norm": 321.4878234863281, "learning_rate": 4.294134903196106e-07, "loss": 2.1911, "step": 20680 }, { "epoch": 138.86195286195286, "grad_norm": 51.89529800415039, "learning_rate": 4.2854745683596937e-07, "loss": 2.6726, "step": 20690 }, { "epoch": 138.92929292929293, "grad_norm": 96.6413345336914, "learning_rate": 4.2768205931333446e-07, "loss": 2.2356, "step": 20700 }, { "epoch": 138.92929292929293, "eval_loss": 0.3383339047431946, "eval_mae": 0.40888792276382446, "eval_mse": 0.3383339047431946, "eval_r2": 0.25995320081710815, "eval_rmse": 0.5816647700722424, "eval_runtime": 10.7173, "eval_samples_per_second": 443.116, "eval_steps_per_second": 13.903, "step": 20700 }, { "epoch": 138.996632996633, "grad_norm": 48.284793853759766, "learning_rate": 4.2681729871479453e-07, "loss": 2.1976, "step": 20710 }, { "epoch": 139.06060606060606, "grad_norm": 134.94268798828125, "learning_rate": 4.260395595416303e-07, "loss": 2.5337, "step": 20720 }, { "epoch": 139.12794612794613, "grad_norm": 113.99388885498047, "learning_rate": 4.25176011749638e-07, "loss": 2.398, "step": 20730 }, { "epoch": 139.1952861952862, "grad_norm": 118.07829284667969, "learning_rate": 4.2431310367068296e-07, "loss": 2.1162, "step": 20740 }, { "epoch": 139.26262626262627, "grad_norm": 297.97833251953125, "learning_rate": 4.2345083626508337e-07, "loss": 2.4485, "step": 20750 }, { "epoch": 139.32996632996634, "grad_norm": 33.981719970703125, "learning_rate": 4.225892104924433e-07, "loss": 2.3241, "step": 20760 }, { "epoch": 139.3973063973064, "grad_norm": 41.02674102783203, "learning_rate": 4.2172822731165294e-07, "loss": 2.3201, "step": 20770 }, { "epoch": 139.46464646464648, "grad_norm": 99.97801208496094, "learning_rate": 4.2086788768088843e-07, "loss": 2.2967, "step": 20780 }, { "epoch": 139.53198653198652, "grad_norm": 95.78704071044922, "learning_rate": 4.200081925576083e-07, "loss": 2.8442, "step": 20790 }, { "epoch": 139.5993265993266, "grad_norm": 145.46365356445312, "learning_rate": 4.191491428985544e-07, "loss": 2.1453, "step": 20800 }, { "epoch": 139.5993265993266, "eval_loss": 0.34040799736976624, "eval_mae": 0.41165411472320557, "eval_mse": 0.34040799736976624, "eval_r2": 0.2554164528846741, "eval_rmse": 0.5834449394499589, "eval_runtime": 10.6939, "eval_samples_per_second": 444.084, "eval_steps_per_second": 13.933, "step": 20800 }, { "epoch": 139.66666666666666, "grad_norm": 71.27349853515625, "learning_rate": 4.182907396597507e-07, "loss": 2.4212, "step": 20810 }, { "epoch": 139.73400673400673, "grad_norm": 408.9395446777344, "learning_rate": 4.1743298379650073e-07, "loss": 2.8043, "step": 20820 }, { "epoch": 139.8013468013468, "grad_norm": 167.86895751953125, "learning_rate": 4.16575876263389e-07, "loss": 2.3361, "step": 20830 }, { "epoch": 139.86868686868686, "grad_norm": 128.66250610351562, "learning_rate": 4.1571941801427724e-07, "loss": 2.5367, "step": 20840 }, { "epoch": 139.93602693602693, "grad_norm": 126.4045181274414, "learning_rate": 4.1486361000230463e-07, "loss": 2.2134, "step": 20850 }, { "epoch": 140.0, "grad_norm": NaN, "learning_rate": 4.140084531798883e-07, "loss": 2.4383, "step": 20860 }, { "epoch": 140.06734006734007, "grad_norm": 199.7034912109375, "learning_rate": 4.1323936959337093e-07, "loss": 2.3322, "step": 20870 }, { "epoch": 140.13468013468014, "grad_norm": 130.79312133789062, "learning_rate": 4.123854526524195e-07, "loss": 2.4796, "step": 20880 }, { "epoch": 140.2020202020202, "grad_norm": 142.4031524658203, "learning_rate": 4.1153218965892756e-07, "loss": 2.2646, "step": 20890 }, { "epoch": 140.26936026936028, "grad_norm": 39.642051696777344, "learning_rate": 4.106795815624795e-07, "loss": 2.2616, "step": 20900 }, { "epoch": 140.26936026936028, "eval_loss": 0.33772704005241394, "eval_mae": 0.4081130623817444, "eval_mse": 0.33772706985473633, "eval_r2": 0.26128053665161133, "eval_rmse": 0.581142899685384, "eval_runtime": 10.7155, "eval_samples_per_second": 443.189, "eval_steps_per_second": 13.905, "step": 20900 }, { "epoch": 140.33670033670035, "grad_norm": 80.36078643798828, "learning_rate": 4.098276293119294e-07, "loss": 2.6102, "step": 20910 }, { "epoch": 140.40404040404042, "grad_norm": 74.66649627685547, "learning_rate": 4.08976333855403e-07, "loss": 2.3154, "step": 20920 }, { "epoch": 140.47138047138048, "grad_norm": 225.6339569091797, "learning_rate": 4.081256961402939e-07, "loss": 2.6348, "step": 20930 }, { "epoch": 140.53872053872053, "grad_norm": 171.2349853515625, "learning_rate": 4.072757171132637e-07, "loss": 2.1252, "step": 20940 }, { "epoch": 140.6060606060606, "grad_norm": 65.93276977539062, "learning_rate": 4.0642639772024225e-07, "loss": 2.485, "step": 20950 }, { "epoch": 140.67340067340066, "grad_norm": 156.55227661132812, "learning_rate": 4.05577738906424e-07, "loss": 2.4402, "step": 20960 }, { "epoch": 140.74074074074073, "grad_norm": 100.4862289428711, "learning_rate": 4.0472974161626826e-07, "loss": 2.5961, "step": 20970 }, { "epoch": 140.8080808080808, "grad_norm": 140.75633239746094, "learning_rate": 4.038824067934993e-07, "loss": 2.1309, "step": 20980 }, { "epoch": 140.87542087542087, "grad_norm": 180.93667602539062, "learning_rate": 4.030357353811027e-07, "loss": 2.5116, "step": 20990 }, { "epoch": 140.94276094276094, "grad_norm": 169.84263610839844, "learning_rate": 4.0218972832132713e-07, "loss": 2.5586, "step": 21000 }, { "epoch": 140.94276094276094, "eval_loss": 0.3369656205177307, "eval_mae": 0.4080447554588318, "eval_mse": 0.3369656205177307, "eval_r2": 0.26294606924057007, "eval_rmse": 0.5804873991033145, "eval_runtime": 10.7153, "eval_samples_per_second": 443.199, "eval_steps_per_second": 13.905, "step": 21000 }, { "epoch": 141.006734006734, "grad_norm": 144.00082397460938, "learning_rate": 4.013443865556809e-07, "loss": 2.2699, "step": 21010 }, { "epoch": 141.07407407407408, "grad_norm": 143.69923400878906, "learning_rate": 4.0049971102493197e-07, "loss": 2.7502, "step": 21020 }, { "epoch": 141.14141414141415, "grad_norm": 115.19097900390625, "learning_rate": 3.9965570266910763e-07, "loss": 2.4356, "step": 21030 }, { "epoch": 141.20875420875421, "grad_norm": 175.8280792236328, "learning_rate": 3.988123624274919e-07, "loss": 2.4279, "step": 21040 }, { "epoch": 141.27609427609428, "grad_norm": 91.72219848632812, "learning_rate": 3.9796969123862533e-07, "loss": 2.4149, "step": 21050 }, { "epoch": 141.34343434343435, "grad_norm": 133.78225708007812, "learning_rate": 3.971276900403047e-07, "loss": 2.1366, "step": 21060 }, { "epoch": 141.41077441077442, "grad_norm": 32.24467468261719, "learning_rate": 3.962863597695799e-07, "loss": 2.1872, "step": 21070 }, { "epoch": 141.4781144781145, "grad_norm": 141.6538543701172, "learning_rate": 3.9544570136275555e-07, "loss": 2.1369, "step": 21080 }, { "epoch": 141.54545454545453, "grad_norm": 48.62370681762695, "learning_rate": 3.9460571575538745e-07, "loss": 2.1554, "step": 21090 }, { "epoch": 141.6127946127946, "grad_norm": 180.3050537109375, "learning_rate": 3.937664038822827e-07, "loss": 2.7008, "step": 21100 }, { "epoch": 141.6127946127946, "eval_loss": 0.33830755949020386, "eval_mae": 0.409385621547699, "eval_mse": 0.33830755949020386, "eval_r2": 0.26001083850860596, "eval_rmse": 0.5816421232082524, "eval_runtime": 10.7102, "eval_samples_per_second": 443.408, "eval_steps_per_second": 13.912, "step": 21100 }, { "epoch": 141.68013468013467, "grad_norm": 181.2019500732422, "learning_rate": 3.9292776667749973e-07, "loss": 2.8243, "step": 21110 }, { "epoch": 141.74747474747474, "grad_norm": 46.74263000488281, "learning_rate": 3.9208980507434487e-07, "loss": 2.2123, "step": 21120 }, { "epoch": 141.8148148148148, "grad_norm": 54.63764190673828, "learning_rate": 3.912525200053729e-07, "loss": 2.462, "step": 21130 }, { "epoch": 141.88215488215488, "grad_norm": 51.957801818847656, "learning_rate": 3.9041591240238694e-07, "loss": 2.4263, "step": 21140 }, { "epoch": 141.94949494949495, "grad_norm": 42.638694763183594, "learning_rate": 3.8957998319643347e-07, "loss": 2.2772, "step": 21150 }, { "epoch": 142.013468013468, "grad_norm": 143.9105987548828, "learning_rate": 3.887447333178069e-07, "loss": 2.6141, "step": 21160 }, { "epoch": 142.08080808080808, "grad_norm": 323.9635925292969, "learning_rate": 3.8791016369604355e-07, "loss": 2.4541, "step": 21170 }, { "epoch": 142.14814814814815, "grad_norm": 196.1297149658203, "learning_rate": 3.8707627525992427e-07, "loss": 2.4685, "step": 21180 }, { "epoch": 142.21548821548822, "grad_norm": 132.35643005371094, "learning_rate": 3.862430689374706e-07, "loss": 2.6908, "step": 21190 }, { "epoch": 142.2828282828283, "grad_norm": 108.35971069335938, "learning_rate": 3.854105456559451e-07, "loss": 2.2795, "step": 21200 }, { "epoch": 142.2828282828283, "eval_loss": 0.33737504482269287, "eval_mae": 0.40925097465515137, "eval_mse": 0.33737504482269287, "eval_r2": 0.2620505094528198, "eval_rmse": 0.5808399476815389, "eval_runtime": 10.7036, "eval_samples_per_second": 443.681, "eval_steps_per_second": 13.92, "step": 21200 }, { "epoch": 142.35016835016836, "grad_norm": 255.78399658203125, "learning_rate": 3.845787063418514e-07, "loss": 2.2152, "step": 21210 }, { "epoch": 142.41750841750843, "grad_norm": 362.5876770019531, "learning_rate": 3.837475519209304e-07, "loss": 2.5006, "step": 21220 }, { "epoch": 142.4848484848485, "grad_norm": 99.40290069580078, "learning_rate": 3.829170833181613e-07, "loss": 2.5071, "step": 21230 }, { "epoch": 142.55218855218854, "grad_norm": 50.44427490234375, "learning_rate": 3.820873014577609e-07, "loss": 2.0558, "step": 21240 }, { "epoch": 142.6195286195286, "grad_norm": 131.93338012695312, "learning_rate": 3.8125820726318025e-07, "loss": 2.7947, "step": 21250 }, { "epoch": 142.68686868686868, "grad_norm": 118.97931671142578, "learning_rate": 3.804298016571067e-07, "loss": 2.4474, "step": 21260 }, { "epoch": 142.75420875420875, "grad_norm": 202.4793243408203, "learning_rate": 3.796020855614601e-07, "loss": 2.4592, "step": 21270 }, { "epoch": 142.82154882154882, "grad_norm": 167.0745391845703, "learning_rate": 3.78775059897393e-07, "loss": 2.2679, "step": 21280 }, { "epoch": 142.88888888888889, "grad_norm": 213.13597106933594, "learning_rate": 3.7794872558529066e-07, "loss": 2.3166, "step": 21290 }, { "epoch": 142.95622895622895, "grad_norm": 79.33403778076172, "learning_rate": 3.7712308354476775e-07, "loss": 2.3747, "step": 21300 }, { "epoch": 142.95622895622895, "eval_loss": 0.337506502866745, "eval_mae": 0.4077870845794678, "eval_mse": 0.337506502866745, "eval_r2": 0.26176297664642334, "eval_rmse": 0.5809530986807325, "eval_runtime": 10.7194, "eval_samples_per_second": 443.027, "eval_steps_per_second": 13.9, "step": 21300 }, { "epoch": 143.02020202020202, "grad_norm": 82.57735443115234, "learning_rate": 3.7629813469466867e-07, "loss": 2.1472, "step": 21310 }, { "epoch": 143.0875420875421, "grad_norm": 75.23265838623047, "learning_rate": 3.7547387995306734e-07, "loss": 2.2328, "step": 21320 }, { "epoch": 143.15488215488216, "grad_norm": 155.6161651611328, "learning_rate": 3.7465032023726397e-07, "loss": 2.4256, "step": 21330 }, { "epoch": 143.22222222222223, "grad_norm": 68.77708435058594, "learning_rate": 3.7382745646378653e-07, "loss": 2.2198, "step": 21340 }, { "epoch": 143.2895622895623, "grad_norm": 173.8029327392578, "learning_rate": 3.730052895483874e-07, "loss": 2.4443, "step": 21350 }, { "epoch": 143.35690235690237, "grad_norm": 177.54327392578125, "learning_rate": 3.721838204060437e-07, "loss": 2.5036, "step": 21360 }, { "epoch": 143.42424242424244, "grad_norm": 235.2197723388672, "learning_rate": 3.713630499509567e-07, "loss": 2.4345, "step": 21370 }, { "epoch": 143.4915824915825, "grad_norm": 206.15316772460938, "learning_rate": 3.705429790965493e-07, "loss": 2.2761, "step": 21380 }, { "epoch": 143.55892255892255, "grad_norm": 286.0751953125, "learning_rate": 3.697236087554657e-07, "loss": 2.6323, "step": 21390 }, { "epoch": 143.62626262626262, "grad_norm": 85.8257827758789, "learning_rate": 3.689049398395718e-07, "loss": 2.1397, "step": 21400 }, { "epoch": 143.62626262626262, "eval_loss": 0.3391259014606476, "eval_mae": 0.40837720036506653, "eval_mse": 0.3391259014606476, "eval_r2": 0.2582207918167114, "eval_rmse": 0.5823451738107285, "eval_runtime": 10.7058, "eval_samples_per_second": 443.589, "eval_steps_per_second": 13.918, "step": 21400 }, { "epoch": 143.69360269360268, "grad_norm": 160.77960205078125, "learning_rate": 3.6808697325995154e-07, "loss": 1.9054, "step": 21410 }, { "epoch": 143.76094276094275, "grad_norm": 155.7151641845703, "learning_rate": 3.6726970992690777e-07, "loss": 2.7945, "step": 21420 }, { "epoch": 143.82828282828282, "grad_norm": 171.15444946289062, "learning_rate": 3.6645315074996064e-07, "loss": 2.6236, "step": 21430 }, { "epoch": 143.8956228956229, "grad_norm": 338.5013732910156, "learning_rate": 3.6563729663784627e-07, "loss": 2.7767, "step": 21440 }, { "epoch": 143.96296296296296, "grad_norm": 289.29437255859375, "learning_rate": 3.6482214849851723e-07, "loss": 2.2931, "step": 21450 }, { "epoch": 144.02693602693603, "grad_norm": 354.0104064941406, "learning_rate": 3.640077072391391e-07, "loss": 2.3661, "step": 21460 }, { "epoch": 144.0942760942761, "grad_norm": 307.5401916503906, "learning_rate": 3.631939737660921e-07, "loss": 2.782, "step": 21470 }, { "epoch": 144.16161616161617, "grad_norm": 89.83287048339844, "learning_rate": 3.6238094898496765e-07, "loss": 2.3694, "step": 21480 }, { "epoch": 144.22895622895624, "grad_norm": 145.3656768798828, "learning_rate": 3.615686338005686e-07, "loss": 2.365, "step": 21490 }, { "epoch": 144.2962962962963, "grad_norm": 90.44530487060547, "learning_rate": 3.6075702911690895e-07, "loss": 2.0927, "step": 21500 }, { "epoch": 144.2962962962963, "eval_loss": 0.3379453420639038, "eval_mae": 0.40836429595947266, "eval_mse": 0.3379453420639038, "eval_r2": 0.2608030438423157, "eval_rmse": 0.5813306649953225, "eval_runtime": 10.7098, "eval_samples_per_second": 443.426, "eval_steps_per_second": 13.913, "step": 21500 }, { "epoch": 144.36363636363637, "grad_norm": 74.70501708984375, "learning_rate": 3.5994613583721123e-07, "loss": 2.322, "step": 21510 }, { "epoch": 144.43097643097644, "grad_norm": 419.16632080078125, "learning_rate": 3.5913595486390623e-07, "loss": 2.8461, "step": 21520 }, { "epoch": 144.4983164983165, "grad_norm": 94.01712799072266, "learning_rate": 3.5832648709863267e-07, "loss": 2.2376, "step": 21530 }, { "epoch": 144.56565656565655, "grad_norm": 132.0341033935547, "learning_rate": 3.575177334422347e-07, "loss": 2.3294, "step": 21540 }, { "epoch": 144.63299663299662, "grad_norm": 169.35264587402344, "learning_rate": 3.567096947947628e-07, "loss": 2.594, "step": 21550 }, { "epoch": 144.7003367003367, "grad_norm": 169.63365173339844, "learning_rate": 3.559023720554707e-07, "loss": 2.4855, "step": 21560 }, { "epoch": 144.76767676767676, "grad_norm": 69.92137145996094, "learning_rate": 3.5509576612281553e-07, "loss": 2.3596, "step": 21570 }, { "epoch": 144.83501683501683, "grad_norm": 182.9924774169922, "learning_rate": 3.5428987789445775e-07, "loss": 2.2809, "step": 21580 }, { "epoch": 144.9023569023569, "grad_norm": 150.27804565429688, "learning_rate": 3.5348470826725794e-07, "loss": 2.3278, "step": 21590 }, { "epoch": 144.96969696969697, "grad_norm": 128.7112579345703, "learning_rate": 3.526802581372771e-07, "loss": 2.2682, "step": 21600 }, { "epoch": 144.96969696969697, "eval_loss": 0.3364941477775574, "eval_mae": 0.40722331404685974, "eval_mse": 0.3364941477775574, "eval_r2": 0.2639772891998291, "eval_rmse": 0.5800811561993351, "eval_runtime": 10.7026, "eval_samples_per_second": 443.725, "eval_steps_per_second": 13.922, "step": 21600 }, { "epoch": 145.03367003367003, "grad_norm": 282.6242370605469, "learning_rate": 3.518765283997763e-07, "loss": 2.174, "step": 21610 }, { "epoch": 145.1010101010101, "grad_norm": 71.87451171875, "learning_rate": 3.5107351994921375e-07, "loss": 2.3446, "step": 21620 }, { "epoch": 145.16835016835017, "grad_norm": 143.6842041015625, "learning_rate": 3.5027123367924636e-07, "loss": 2.5091, "step": 21630 }, { "epoch": 145.23569023569024, "grad_norm": 263.9352111816406, "learning_rate": 3.4946967048272613e-07, "loss": 2.3177, "step": 21640 }, { "epoch": 145.3030303030303, "grad_norm": 138.9326171875, "learning_rate": 3.4866883125170035e-07, "loss": 2.3128, "step": 21650 }, { "epoch": 145.37037037037038, "grad_norm": 60.291568756103516, "learning_rate": 3.4786871687741184e-07, "loss": 2.595, "step": 21660 }, { "epoch": 145.43771043771045, "grad_norm": 130.55490112304688, "learning_rate": 3.4706932825029553e-07, "loss": 2.1932, "step": 21670 }, { "epoch": 145.5050505050505, "grad_norm": 223.08074951171875, "learning_rate": 3.462706662599789e-07, "loss": 2.1413, "step": 21680 }, { "epoch": 145.57239057239056, "grad_norm": 163.03707885742188, "learning_rate": 3.454727317952818e-07, "loss": 2.6559, "step": 21690 }, { "epoch": 145.63973063973063, "grad_norm": 128.6631622314453, "learning_rate": 3.446755257442123e-07, "loss": 2.403, "step": 21700 }, { "epoch": 145.63973063973063, "eval_loss": 0.33858799934387207, "eval_mae": 0.40906164050102234, "eval_mse": 0.33858799934387207, "eval_r2": 0.25939738750457764, "eval_rmse": 0.5818831492180128, "eval_runtime": 10.7185, "eval_samples_per_second": 443.067, "eval_steps_per_second": 13.901, "step": 21700 }, { "epoch": 145.7070707070707, "grad_norm": 175.52218627929688, "learning_rate": 3.4387904899397026e-07, "loss": 2.5754, "step": 21710 }, { "epoch": 145.77441077441077, "grad_norm": 152.33290100097656, "learning_rate": 3.4308330243094187e-07, "loss": 2.3916, "step": 21720 }, { "epoch": 145.84175084175084, "grad_norm": 69.48656463623047, "learning_rate": 3.4228828694070236e-07, "loss": 2.1885, "step": 21730 }, { "epoch": 145.9090909090909, "grad_norm": 180.29908752441406, "learning_rate": 3.4149400340801237e-07, "loss": 2.3174, "step": 21740 }, { "epoch": 145.97643097643098, "grad_norm": 255.99278259277344, "learning_rate": 3.407004527168177e-07, "loss": 2.86, "step": 21750 }, { "epoch": 146.04040404040404, "grad_norm": 83.8613052368164, "learning_rate": 3.399076357502498e-07, "loss": 2.2487, "step": 21760 }, { "epoch": 146.1077441077441, "grad_norm": 51.6412239074707, "learning_rate": 3.391155533906225e-07, "loss": 2.3706, "step": 21770 }, { "epoch": 146.17508417508418, "grad_norm": 153.0028533935547, "learning_rate": 3.3832420651943214e-07, "loss": 2.3017, "step": 21780 }, { "epoch": 146.24242424242425, "grad_norm": 63.567588806152344, "learning_rate": 3.3753359601735756e-07, "loss": 2.1566, "step": 21790 }, { "epoch": 146.30976430976432, "grad_norm": 73.48870849609375, "learning_rate": 3.3674372276425653e-07, "loss": 2.4931, "step": 21800 }, { "epoch": 146.30976430976432, "eval_loss": 0.33634573221206665, "eval_mae": 0.40709710121154785, "eval_mse": 0.33634573221206665, "eval_r2": 0.26430195569992065, "eval_rmse": 0.5799532155373109, "eval_runtime": 10.7143, "eval_samples_per_second": 443.237, "eval_steps_per_second": 13.907, "step": 21800 }, { "epoch": 146.3771043771044, "grad_norm": 163.33265686035156, "learning_rate": 3.3595458763916805e-07, "loss": 2.5481, "step": 21810 }, { "epoch": 146.44444444444446, "grad_norm": 115.07439422607422, "learning_rate": 3.3516619152030847e-07, "loss": 2.4388, "step": 21820 }, { "epoch": 146.5117845117845, "grad_norm": 207.7968292236328, "learning_rate": 3.343785352850716e-07, "loss": 2.3982, "step": 21830 }, { "epoch": 146.57912457912457, "grad_norm": 215.0872344970703, "learning_rate": 3.3359161981002916e-07, "loss": 2.8103, "step": 21840 }, { "epoch": 146.64646464646464, "grad_norm": 111.61074829101562, "learning_rate": 3.328054459709272e-07, "loss": 2.1939, "step": 21850 }, { "epoch": 146.7138047138047, "grad_norm": 57.857948303222656, "learning_rate": 3.320200146426865e-07, "loss": 2.4773, "step": 21860 }, { "epoch": 146.78114478114477, "grad_norm": 332.9927673339844, "learning_rate": 3.3123532669940255e-07, "loss": 2.5525, "step": 21870 }, { "epoch": 146.84848484848484, "grad_norm": 163.7563934326172, "learning_rate": 3.3045138301434216e-07, "loss": 2.4605, "step": 21880 }, { "epoch": 146.9158249158249, "grad_norm": 72.39580535888672, "learning_rate": 3.296681844599454e-07, "loss": 2.0216, "step": 21890 }, { "epoch": 146.98316498316498, "grad_norm": 165.74574279785156, "learning_rate": 3.2888573190782164e-07, "loss": 2.3829, "step": 21900 }, { "epoch": 146.98316498316498, "eval_loss": 0.3369799554347992, "eval_mae": 0.4081873595714569, "eval_mse": 0.3369799256324768, "eval_r2": 0.262914776802063, "eval_rmse": 0.5804997206136079, "eval_runtime": 10.7146, "eval_samples_per_second": 443.227, "eval_steps_per_second": 13.906, "step": 21900 }, { "epoch": 147.04713804713805, "grad_norm": 357.22930908203125, "learning_rate": 3.2810402622875043e-07, "loss": 2.394, "step": 21910 }, { "epoch": 147.11447811447812, "grad_norm": 239.71389770507812, "learning_rate": 3.273230682926812e-07, "loss": 2.5644, "step": 21920 }, { "epoch": 147.1818181818182, "grad_norm": 101.01581573486328, "learning_rate": 3.2654285896872977e-07, "loss": 2.4835, "step": 21930 }, { "epoch": 147.24915824915826, "grad_norm": 176.69581604003906, "learning_rate": 3.2576339912517926e-07, "loss": 2.2037, "step": 21940 }, { "epoch": 147.31649831649833, "grad_norm": 257.9039306640625, "learning_rate": 3.249846896294796e-07, "loss": 2.499, "step": 21950 }, { "epoch": 147.3838383838384, "grad_norm": 108.5374526977539, "learning_rate": 3.242067313482445e-07, "loss": 2.3102, "step": 21960 }, { "epoch": 147.45117845117846, "grad_norm": 77.49921417236328, "learning_rate": 3.234295251472522e-07, "loss": 2.3375, "step": 21970 }, { "epoch": 147.5185185185185, "grad_norm": 244.25584411621094, "learning_rate": 3.2265307189144354e-07, "loss": 2.6449, "step": 21980 }, { "epoch": 147.58585858585857, "grad_norm": 415.2712707519531, "learning_rate": 3.218773724449224e-07, "loss": 2.4413, "step": 21990 }, { "epoch": 147.65319865319864, "grad_norm": 44.250213623046875, "learning_rate": 3.2110242767095296e-07, "loss": 2.3635, "step": 22000 }, { "epoch": 147.65319865319864, "eval_loss": 0.3385269343852997, "eval_mae": 0.40911245346069336, "eval_mse": 0.3385269045829773, "eval_r2": 0.25953102111816406, "eval_rmse": 0.5818306494015052, "eval_runtime": 10.7062, "eval_samples_per_second": 443.576, "eval_steps_per_second": 13.917, "step": 22000 }, { "epoch": 147.7205387205387, "grad_norm": 165.32479858398438, "learning_rate": 3.203282384319591e-07, "loss": 2.2468, "step": 22010 }, { "epoch": 147.78787878787878, "grad_norm": 67.38441467285156, "learning_rate": 3.195548055895253e-07, "loss": 2.5238, "step": 22020 }, { "epoch": 147.85521885521885, "grad_norm": 148.4586639404297, "learning_rate": 3.1878213000439303e-07, "loss": 2.2674, "step": 22030 }, { "epoch": 147.92255892255892, "grad_norm": 202.1344757080078, "learning_rate": 3.180102125364611e-07, "loss": 2.197, "step": 22040 }, { "epoch": 147.989898989899, "grad_norm": 83.88426971435547, "learning_rate": 3.1723905404478555e-07, "loss": 2.4314, "step": 22050 }, { "epoch": 148.05387205387206, "grad_norm": 223.3914337158203, "learning_rate": 3.164686553875766e-07, "loss": 2.2034, "step": 22060 }, { "epoch": 148.12121212121212, "grad_norm": 93.47509002685547, "learning_rate": 3.1569901742219996e-07, "loss": 1.9991, "step": 22070 }, { "epoch": 148.1885521885522, "grad_norm": 52.2109375, "learning_rate": 3.14930141005174e-07, "loss": 2.7381, "step": 22080 }, { "epoch": 148.25589225589226, "grad_norm": 196.3258056640625, "learning_rate": 3.1416202699216955e-07, "loss": 2.2714, "step": 22090 }, { "epoch": 148.32323232323233, "grad_norm": 174.58831787109375, "learning_rate": 3.1339467623800974e-07, "loss": 2.2327, "step": 22100 }, { "epoch": 148.32323232323233, "eval_loss": 0.3378140330314636, "eval_mae": 0.4091709852218628, "eval_mse": 0.3378140330314636, "eval_r2": 0.26109033823013306, "eval_rmse": 0.5812177156896232, "eval_runtime": 10.7169, "eval_samples_per_second": 443.132, "eval_steps_per_second": 13.903, "step": 22100 }, { "epoch": 148.3905723905724, "grad_norm": 154.22569274902344, "learning_rate": 3.1262808959666744e-07, "loss": 2.6501, "step": 22110 }, { "epoch": 148.45791245791247, "grad_norm": 285.2557678222656, "learning_rate": 3.1186226792126523e-07, "loss": 2.4147, "step": 22120 }, { "epoch": 148.5252525252525, "grad_norm": 68.08527374267578, "learning_rate": 3.1109721206407526e-07, "loss": 2.5262, "step": 22130 }, { "epoch": 148.59259259259258, "grad_norm": 65.07182312011719, "learning_rate": 3.103329228765161e-07, "loss": 2.5412, "step": 22140 }, { "epoch": 148.65993265993265, "grad_norm": 134.92694091796875, "learning_rate": 3.0956940120915455e-07, "loss": 2.1919, "step": 22150 }, { "epoch": 148.72727272727272, "grad_norm": 136.1979522705078, "learning_rate": 3.08806647911702e-07, "loss": 2.3232, "step": 22160 }, { "epoch": 148.7946127946128, "grad_norm": 211.92660522460938, "learning_rate": 3.0804466383301484e-07, "loss": 2.2848, "step": 22170 }, { "epoch": 148.86195286195286, "grad_norm": 175.27957153320312, "learning_rate": 3.072834498210946e-07, "loss": 2.2512, "step": 22180 }, { "epoch": 148.92929292929293, "grad_norm": 87.52352905273438, "learning_rate": 3.065230067230846e-07, "loss": 2.5912, "step": 22190 }, { "epoch": 148.996632996633, "grad_norm": 255.53941345214844, "learning_rate": 3.057633353852702e-07, "loss": 2.6198, "step": 22200 }, { "epoch": 148.996632996633, "eval_loss": 0.33682721853256226, "eval_mae": 0.4077746570110321, "eval_mse": 0.33682721853256226, "eval_r2": 0.2632487416267395, "eval_rmse": 0.5803681749825383, "eval_runtime": 10.7234, "eval_samples_per_second": 442.864, "eval_steps_per_second": 13.895, "step": 22200 }, { "epoch": 149.06060606060606, "grad_norm": 144.94224548339844, "learning_rate": 3.050044366530792e-07, "loss": 2.3137, "step": 22210 }, { "epoch": 149.12794612794613, "grad_norm": 209.51278686523438, "learning_rate": 3.042463113710777e-07, "loss": 2.372, "step": 22220 }, { "epoch": 149.1952861952862, "grad_norm": 117.88903045654297, "learning_rate": 3.034889603829729e-07, "loss": 1.9489, "step": 22230 }, { "epoch": 149.26262626262627, "grad_norm": 114.91828155517578, "learning_rate": 3.0273238453160934e-07, "loss": 2.3243, "step": 22240 }, { "epoch": 149.32996632996634, "grad_norm": 150.82313537597656, "learning_rate": 3.019765846589679e-07, "loss": 2.4927, "step": 22250 }, { "epoch": 149.3973063973064, "grad_norm": 245.3128204345703, "learning_rate": 3.0122156160616816e-07, "loss": 2.5631, "step": 22260 }, { "epoch": 149.46464646464648, "grad_norm": 121.86282348632812, "learning_rate": 3.004673162134632e-07, "loss": 2.7779, "step": 22270 }, { "epoch": 149.53198653198652, "grad_norm": 116.43756103515625, "learning_rate": 2.9971384932024215e-07, "loss": 2.2113, "step": 22280 }, { "epoch": 149.5993265993266, "grad_norm": 93.19036102294922, "learning_rate": 2.989611617650268e-07, "loss": 2.247, "step": 22290 }, { "epoch": 149.66666666666666, "grad_norm": 103.10211944580078, "learning_rate": 2.9820925438547153e-07, "loss": 2.3414, "step": 22300 }, { "epoch": 149.66666666666666, "eval_loss": 0.33732369542121887, "eval_mae": 0.40909600257873535, "eval_mse": 0.33732369542121887, "eval_r2": 0.26216286420822144, "eval_rmse": 0.580795743287792, "eval_runtime": 10.7208, "eval_samples_per_second": 442.97, "eval_steps_per_second": 13.898, "step": 22300 }, { "epoch": 149.73400673400673, "grad_norm": 80.47632598876953, "learning_rate": 2.9745812801836347e-07, "loss": 2.5499, "step": 22310 }, { "epoch": 149.8013468013468, "grad_norm": 168.04379272460938, "learning_rate": 2.9670778349961965e-07, "loss": 2.3976, "step": 22320 }, { "epoch": 149.86868686868686, "grad_norm": 116.06839752197266, "learning_rate": 2.95958221664287e-07, "loss": 2.6604, "step": 22330 }, { "epoch": 149.93602693602693, "grad_norm": 194.4755401611328, "learning_rate": 2.952094433465423e-07, "loss": 2.1314, "step": 22340 }, { "epoch": 150.0, "grad_norm": 198.44029235839844, "learning_rate": 2.944614493796891e-07, "loss": 2.2235, "step": 22350 }, { "epoch": 150.06734006734007, "grad_norm": 188.71646118164062, "learning_rate": 2.9371424059615934e-07, "loss": 2.3072, "step": 22360 }, { "epoch": 150.13468013468014, "grad_norm": 105.31580352783203, "learning_rate": 2.9296781782751025e-07, "loss": 2.4018, "step": 22370 }, { "epoch": 150.2020202020202, "grad_norm": 100.72821807861328, "learning_rate": 2.922221819044243e-07, "loss": 2.6517, "step": 22380 }, { "epoch": 150.26936026936028, "grad_norm": 321.9920959472656, "learning_rate": 2.914773336567092e-07, "loss": 2.2483, "step": 22390 }, { "epoch": 150.33670033670035, "grad_norm": 204.56031799316406, "learning_rate": 2.9073327391329504e-07, "loss": 2.5214, "step": 22400 }, { "epoch": 150.33670033670035, "eval_loss": 0.3358341455459595, "eval_mae": 0.40713831782341003, "eval_mse": 0.3358341455459595, "eval_r2": 0.26542097330093384, "eval_rmse": 0.5795119891304747, "eval_runtime": 10.7201, "eval_samples_per_second": 443.0, "eval_steps_per_second": 13.899, "step": 22400 }, { "epoch": 150.40404040404042, "grad_norm": 74.85859680175781, "learning_rate": 2.8999000350223466e-07, "loss": 2.2529, "step": 22410 }, { "epoch": 150.47138047138048, "grad_norm": 170.53749084472656, "learning_rate": 2.892475232507031e-07, "loss": 2.3419, "step": 22420 }, { "epoch": 150.53872053872053, "grad_norm": 65.15780639648438, "learning_rate": 2.8850583398499493e-07, "loss": 2.2247, "step": 22430 }, { "epoch": 150.6060606060606, "grad_norm": 119.02288818359375, "learning_rate": 2.877649365305258e-07, "loss": 1.9854, "step": 22440 }, { "epoch": 150.67340067340066, "grad_norm": 108.84069061279297, "learning_rate": 2.87024831711829e-07, "loss": 2.4788, "step": 22450 }, { "epoch": 150.74074074074073, "grad_norm": 169.85752868652344, "learning_rate": 2.862855203525559e-07, "loss": 2.1488, "step": 22460 }, { "epoch": 150.8080808080808, "grad_norm": 218.9785919189453, "learning_rate": 2.8554700327547565e-07, "loss": 2.6347, "step": 22470 }, { "epoch": 150.87542087542087, "grad_norm": 224.1636505126953, "learning_rate": 2.848092813024726e-07, "loss": 2.5912, "step": 22480 }, { "epoch": 150.94276094276094, "grad_norm": 87.15013122558594, "learning_rate": 2.840723552545462e-07, "loss": 2.3529, "step": 22490 }, { "epoch": 151.006734006734, "grad_norm": 234.83973693847656, "learning_rate": 2.8333622595181104e-07, "loss": 2.5875, "step": 22500 }, { "epoch": 151.006734006734, "eval_loss": 0.33698490262031555, "eval_mae": 0.40806856751441956, "eval_mse": 0.33698490262031555, "eval_r2": 0.2629038691520691, "eval_rmse": 0.5805040074110734, "eval_runtime": 10.714, "eval_samples_per_second": 443.252, "eval_steps_per_second": 13.907, "step": 22500 }, { "epoch": 151.07407407407408, "grad_norm": 114.39390563964844, "learning_rate": 2.826008942134942e-07, "loss": 2.4317, "step": 22510 }, { "epoch": 151.14141414141415, "grad_norm": 90.3773422241211, "learning_rate": 2.8186636085793527e-07, "loss": 2.1254, "step": 22520 }, { "epoch": 151.20875420875421, "grad_norm": 103.72860717773438, "learning_rate": 2.811326267025853e-07, "loss": 2.5467, "step": 22530 }, { "epoch": 151.27609427609428, "grad_norm": 120.07908630371094, "learning_rate": 2.803996925640066e-07, "loss": 2.2985, "step": 22540 }, { "epoch": 151.34343434343435, "grad_norm": 148.68307495117188, "learning_rate": 2.7966755925787045e-07, "loss": 2.3908, "step": 22550 }, { "epoch": 151.41077441077442, "grad_norm": 54.00937271118164, "learning_rate": 2.7893622759895664e-07, "loss": 2.5104, "step": 22560 }, { "epoch": 151.4781144781145, "grad_norm": 111.68989562988281, "learning_rate": 2.782056984011539e-07, "loss": 2.6058, "step": 22570 }, { "epoch": 151.54545454545453, "grad_norm": 207.73367309570312, "learning_rate": 2.7747597247745703e-07, "loss": 2.6269, "step": 22580 }, { "epoch": 151.6127946127946, "grad_norm": 114.530029296875, "learning_rate": 2.767470506399667e-07, "loss": 2.1988, "step": 22590 }, { "epoch": 151.68013468013467, "grad_norm": 131.1241455078125, "learning_rate": 2.760189336998898e-07, "loss": 2.3378, "step": 22600 }, { "epoch": 151.68013468013467, "eval_loss": 0.3450542986392975, "eval_mae": 0.41531187295913696, "eval_mse": 0.3450542986392975, "eval_r2": 0.24525350332260132, "eval_rmse": 0.5874132264763005, "eval_runtime": 10.7285, "eval_samples_per_second": 442.654, "eval_steps_per_second": 13.888, "step": 22600 }, { "epoch": 151.74747474747474, "grad_norm": 104.02006530761719, "learning_rate": 2.752916224675362e-07, "loss": 2.3173, "step": 22610 }, { "epoch": 151.8148148148148, "grad_norm": 128.01202392578125, "learning_rate": 2.745651177523205e-07, "loss": 2.4304, "step": 22620 }, { "epoch": 151.88215488215488, "grad_norm": 122.96753692626953, "learning_rate": 2.738394203627583e-07, "loss": 2.3973, "step": 22630 }, { "epoch": 151.94949494949495, "grad_norm": 151.6777801513672, "learning_rate": 2.7311453110646745e-07, "loss": 2.3045, "step": 22640 }, { "epoch": 152.013468013468, "grad_norm": 149.05690002441406, "learning_rate": 2.7239045079016666e-07, "loss": 2.327, "step": 22650 }, { "epoch": 152.08080808080808, "grad_norm": 110.3908920288086, "learning_rate": 2.7166718021967406e-07, "loss": 2.2113, "step": 22660 }, { "epoch": 152.14814814814815, "grad_norm": 87.85759735107422, "learning_rate": 2.709447201999062e-07, "loss": 2.296, "step": 22670 }, { "epoch": 152.21548821548822, "grad_norm": 70.94463348388672, "learning_rate": 2.7022307153487877e-07, "loss": 2.2358, "step": 22680 }, { "epoch": 152.2828282828283, "grad_norm": 141.04443359375, "learning_rate": 2.695022350277032e-07, "loss": 2.6421, "step": 22690 }, { "epoch": 152.35016835016836, "grad_norm": 98.56761932373047, "learning_rate": 2.687822114805882e-07, "loss": 2.1838, "step": 22700 }, { "epoch": 152.35016835016836, "eval_loss": 0.33627986907958984, "eval_mae": 0.40747493505477905, "eval_mse": 0.33627986907958984, "eval_r2": 0.2644460201263428, "eval_rmse": 0.5798964296144526, "eval_runtime": 10.7246, "eval_samples_per_second": 442.816, "eval_steps_per_second": 13.893, "step": 22700 }, { "epoch": 152.41750841750843, "grad_norm": 88.9863052368164, "learning_rate": 2.680630016948371e-07, "loss": 2.252, "step": 22710 }, { "epoch": 152.4848484848485, "grad_norm": 194.81736755371094, "learning_rate": 2.6734460647084713e-07, "loss": 2.2268, "step": 22720 }, { "epoch": 152.55218855218854, "grad_norm": 171.26712036132812, "learning_rate": 2.6662702660811053e-07, "loss": 2.4494, "step": 22730 }, { "epoch": 152.6195286195286, "grad_norm": 123.28209686279297, "learning_rate": 2.659102629052107e-07, "loss": 2.3207, "step": 22740 }, { "epoch": 152.68686868686868, "grad_norm": 51.88018798828125, "learning_rate": 2.6519431615982304e-07, "loss": 2.706, "step": 22750 }, { "epoch": 152.75420875420875, "grad_norm": 195.41500854492188, "learning_rate": 2.6447918716871463e-07, "loss": 2.1749, "step": 22760 }, { "epoch": 152.82154882154882, "grad_norm": 152.97305297851562, "learning_rate": 2.637648767277413e-07, "loss": 2.7701, "step": 22770 }, { "epoch": 152.88888888888889, "grad_norm": 74.16521453857422, "learning_rate": 2.630513856318489e-07, "loss": 2.649, "step": 22780 }, { "epoch": 152.95622895622895, "grad_norm": 106.08614349365234, "learning_rate": 2.6233871467507086e-07, "loss": 2.4506, "step": 22790 }, { "epoch": 153.02020202020202, "grad_norm": 74.22053527832031, "learning_rate": 2.616268646505281e-07, "loss": 2.0393, "step": 22800 }, { "epoch": 153.02020202020202, "eval_loss": 0.3359903395175934, "eval_mae": 0.4069145619869232, "eval_mse": 0.3359903395175934, "eval_r2": 0.2650793194770813, "eval_rmse": 0.5796467368299363, "eval_runtime": 10.7217, "eval_samples_per_second": 442.933, "eval_steps_per_second": 13.897, "step": 22800 }, { "epoch": 153.0875420875421, "grad_norm": 238.091064453125, "learning_rate": 2.6091583635042766e-07, "loss": 2.2513, "step": 22810 }, { "epoch": 153.15488215488216, "grad_norm": 296.131591796875, "learning_rate": 2.602056305660623e-07, "loss": 2.4055, "step": 22820 }, { "epoch": 153.22222222222223, "grad_norm": 117.97735595703125, "learning_rate": 2.594962480878099e-07, "loss": 2.2488, "step": 22830 }, { "epoch": 153.2895622895623, "grad_norm": 134.77536010742188, "learning_rate": 2.587876897051313e-07, "loss": 2.3069, "step": 22840 }, { "epoch": 153.35690235690237, "grad_norm": 171.0098114013672, "learning_rate": 2.580799562065703e-07, "loss": 2.3999, "step": 22850 }, { "epoch": 153.42424242424244, "grad_norm": 62.78079605102539, "learning_rate": 2.573730483797535e-07, "loss": 2.5331, "step": 22860 }, { "epoch": 153.4915824915825, "grad_norm": 115.45639038085938, "learning_rate": 2.5666696701138755e-07, "loss": 2.3308, "step": 22870 }, { "epoch": 153.55892255892255, "grad_norm": 48.74250411987305, "learning_rate": 2.5596171288726043e-07, "loss": 2.1274, "step": 22880 }, { "epoch": 153.62626262626262, "grad_norm": 232.8977508544922, "learning_rate": 2.552572867922387e-07, "loss": 2.4576, "step": 22890 }, { "epoch": 153.69360269360268, "grad_norm": 187.24411010742188, "learning_rate": 2.545536895102671e-07, "loss": 2.715, "step": 22900 }, { "epoch": 153.69360269360268, "eval_loss": 0.33565470576286316, "eval_mae": 0.40652546286582947, "eval_mse": 0.33565470576286316, "eval_r2": 0.2658134698867798, "eval_rmse": 0.5793571487112791, "eval_runtime": 10.7198, "eval_samples_per_second": 443.01, "eval_steps_per_second": 13.899, "step": 22900 }, { "epoch": 153.76094276094275, "grad_norm": 191.2294158935547, "learning_rate": 2.538509218243695e-07, "loss": 2.3382, "step": 22910 }, { "epoch": 153.82828282828282, "grad_norm": 229.5615997314453, "learning_rate": 2.5314898451664503e-07, "loss": 2.653, "step": 22920 }, { "epoch": 153.8956228956229, "grad_norm": 163.66136169433594, "learning_rate": 2.5244787836826895e-07, "loss": 2.3567, "step": 22930 }, { "epoch": 153.96296296296296, "grad_norm": 220.89378356933594, "learning_rate": 2.517476041594925e-07, "loss": 2.2799, "step": 22940 }, { "epoch": 154.02693602693603, "grad_norm": 163.15435791015625, "learning_rate": 2.51118069324082e-07, "loss": 2.489, "step": 22950 }, { "epoch": 154.0942760942761, "grad_norm": 130.7669219970703, "learning_rate": 2.504193779468179e-07, "loss": 2.1728, "step": 22960 }, { "epoch": 154.16161616161617, "grad_norm": 117.81947326660156, "learning_rate": 2.497215207666407e-07, "loss": 2.5618, "step": 22970 }, { "epoch": 154.22895622895624, "grad_norm": 267.14019775390625, "learning_rate": 2.490244985601858e-07, "loss": 2.3302, "step": 22980 }, { "epoch": 154.2962962962963, "grad_norm": 241.96884155273438, "learning_rate": 2.483283121031585e-07, "loss": 2.4261, "step": 22990 }, { "epoch": 154.36363636363637, "grad_norm": 119.80350494384766, "learning_rate": 2.4763296217033404e-07, "loss": 2.5014, "step": 23000 }, { "epoch": 154.36363636363637, "eval_loss": 0.33577993512153625, "eval_mae": 0.4072544276714325, "eval_mse": 0.33577993512153625, "eval_r2": 0.2655395269393921, "eval_rmse": 0.5794652147640411, "eval_runtime": 10.7278, "eval_samples_per_second": 442.683, "eval_steps_per_second": 13.889, "step": 23000 }, { "epoch": 154.43097643097644, "grad_norm": 192.90069580078125, "learning_rate": 2.4693844953555754e-07, "loss": 2.6146, "step": 23010 }, { "epoch": 154.4983164983165, "grad_norm": 102.70032501220703, "learning_rate": 2.462447749717411e-07, "loss": 2.0554, "step": 23020 }, { "epoch": 154.56565656565655, "grad_norm": 107.0307388305664, "learning_rate": 2.455519392508656e-07, "loss": 2.4039, "step": 23030 }, { "epoch": 154.63299663299662, "grad_norm": 79.69402313232422, "learning_rate": 2.4485994314397706e-07, "loss": 2.6651, "step": 23040 }, { "epoch": 154.7003367003367, "grad_norm": 195.61524963378906, "learning_rate": 2.441687874211874e-07, "loss": 2.2566, "step": 23050 }, { "epoch": 154.76767676767676, "grad_norm": 233.92303466796875, "learning_rate": 2.4347847285167413e-07, "loss": 2.5511, "step": 23060 }, { "epoch": 154.83501683501683, "grad_norm": 72.29541778564453, "learning_rate": 2.4278900020367775e-07, "loss": 2.144, "step": 23070 }, { "epoch": 154.9023569023569, "grad_norm": 146.6416778564453, "learning_rate": 2.4210037024450193e-07, "loss": 2.1614, "step": 23080 }, { "epoch": 154.96969696969697, "grad_norm": 124.54537963867188, "learning_rate": 2.414125837405129e-07, "loss": 2.4009, "step": 23090 }, { "epoch": 155.03367003367003, "grad_norm": 250.01956176757812, "learning_rate": 2.4072564145713746e-07, "loss": 2.4638, "step": 23100 }, { "epoch": 155.03367003367003, "eval_loss": 0.3374319076538086, "eval_mae": 0.4093645215034485, "eval_mse": 0.3374319076538086, "eval_r2": 0.26192617416381836, "eval_rmse": 0.5808888944142491, "eval_runtime": 10.727, "eval_samples_per_second": 442.713, "eval_steps_per_second": 13.89, "step": 23100 }, { "epoch": 155.1010101010101, "grad_norm": 93.99860382080078, "learning_rate": 2.4003954415886416e-07, "loss": 2.5718, "step": 23110 }, { "epoch": 155.16835016835017, "grad_norm": 56.71653366088867, "learning_rate": 2.3935429260924013e-07, "loss": 2.1492, "step": 23120 }, { "epoch": 155.23569023569024, "grad_norm": 88.11083221435547, "learning_rate": 2.386698875708714e-07, "loss": 2.4984, "step": 23130 }, { "epoch": 155.3030303030303, "grad_norm": 62.53829574584961, "learning_rate": 2.3798632980542243e-07, "loss": 2.313, "step": 23140 }, { "epoch": 155.37037037037038, "grad_norm": 148.10011291503906, "learning_rate": 2.3730362007361414e-07, "loss": 2.1906, "step": 23150 }, { "epoch": 155.43771043771045, "grad_norm": 253.07933044433594, "learning_rate": 2.3662175913522453e-07, "loss": 2.2759, "step": 23160 }, { "epoch": 155.5050505050505, "grad_norm": 180.82594299316406, "learning_rate": 2.3594074774908602e-07, "loss": 3.0107, "step": 23170 }, { "epoch": 155.57239057239056, "grad_norm": 72.9843521118164, "learning_rate": 2.352605866730859e-07, "loss": 2.3399, "step": 23180 }, { "epoch": 155.63973063973063, "grad_norm": 165.7374267578125, "learning_rate": 2.3458127666416584e-07, "loss": 2.3668, "step": 23190 }, { "epoch": 155.7070707070707, "grad_norm": 130.50437927246094, "learning_rate": 2.3390281847831937e-07, "loss": 2.701, "step": 23200 }, { "epoch": 155.7070707070707, "eval_loss": 0.3379066586494446, "eval_mae": 0.4094736874103546, "eval_mse": 0.3379066586494446, "eval_r2": 0.26088768243789673, "eval_rmse": 0.5812973926050629, "eval_runtime": 10.7323, "eval_samples_per_second": 442.498, "eval_steps_per_second": 13.883, "step": 23200 }, { "epoch": 155.77441077441077, "grad_norm": 116.3995361328125, "learning_rate": 2.3322521287059237e-07, "loss": 2.0992, "step": 23210 }, { "epoch": 155.84175084175084, "grad_norm": 141.0975341796875, "learning_rate": 2.325484605950825e-07, "loss": 2.3692, "step": 23220 }, { "epoch": 155.9090909090909, "grad_norm": 128.7677001953125, "learning_rate": 2.318725624049367e-07, "loss": 1.9569, "step": 23230 }, { "epoch": 155.97643097643098, "grad_norm": 74.41371154785156, "learning_rate": 2.3119751905235262e-07, "loss": 2.2841, "step": 23240 }, { "epoch": 156.04040404040404, "grad_norm": 76.38838195800781, "learning_rate": 2.3052333128857548e-07, "loss": 2.3252, "step": 23250 }, { "epoch": 156.1077441077441, "grad_norm": 158.4055938720703, "learning_rate": 2.2984999986389862e-07, "loss": 2.1643, "step": 23260 }, { "epoch": 156.17508417508418, "grad_norm": 189.81121826171875, "learning_rate": 2.291775255276631e-07, "loss": 2.2071, "step": 23270 }, { "epoch": 156.24242424242425, "grad_norm": 164.90631103515625, "learning_rate": 2.2850590902825528e-07, "loss": 2.4868, "step": 23280 }, { "epoch": 156.30976430976432, "grad_norm": 60.10997772216797, "learning_rate": 2.2783515111310682e-07, "loss": 2.4813, "step": 23290 }, { "epoch": 156.3771043771044, "grad_norm": 267.7864990234375, "learning_rate": 2.2716525252869478e-07, "loss": 2.5174, "step": 23300 }, { "epoch": 156.3771043771044, "eval_loss": 0.33636143803596497, "eval_mae": 0.4077773094177246, "eval_mse": 0.3363614082336426, "eval_r2": 0.26426762342453003, "eval_rmse": 0.5799667302816969, "eval_runtime": 10.723, "eval_samples_per_second": 442.879, "eval_steps_per_second": 13.895, "step": 23300 }, { "epoch": 156.44444444444446, "grad_norm": 63.297401428222656, "learning_rate": 2.2649621402053875e-07, "loss": 2.1265, "step": 23310 }, { "epoch": 156.5117845117845, "grad_norm": 266.5071716308594, "learning_rate": 2.2582803633320235e-07, "loss": 2.3581, "step": 23320 }, { "epoch": 156.57912457912457, "grad_norm": 90.08997344970703, "learning_rate": 2.2516072021029e-07, "loss": 2.4083, "step": 23330 }, { "epoch": 156.64646464646464, "grad_norm": 135.9619598388672, "learning_rate": 2.2449426639444814e-07, "loss": 2.4176, "step": 23340 }, { "epoch": 156.7138047138047, "grad_norm": 188.6826171875, "learning_rate": 2.2382867562736308e-07, "loss": 2.5373, "step": 23350 }, { "epoch": 156.78114478114477, "grad_norm": 98.25405883789062, "learning_rate": 2.2316394864976074e-07, "loss": 2.4215, "step": 23360 }, { "epoch": 156.84848484848484, "grad_norm": 68.46277618408203, "learning_rate": 2.2250008620140626e-07, "loss": 2.4493, "step": 23370 }, { "epoch": 156.9158249158249, "grad_norm": 120.7179946899414, "learning_rate": 2.218370890211021e-07, "loss": 2.4554, "step": 23380 }, { "epoch": 156.98316498316498, "grad_norm": 95.48876190185547, "learning_rate": 2.2117495784668749e-07, "loss": 2.3647, "step": 23390 }, { "epoch": 157.04713804713805, "grad_norm": 189.3860626220703, "learning_rate": 2.205136934150391e-07, "loss": 2.2701, "step": 23400 }, { "epoch": 157.04713804713805, "eval_loss": 0.3365381062030792, "eval_mae": 0.4083159565925598, "eval_mse": 0.3365381062030792, "eval_r2": 0.2638811469078064, "eval_rmse": 0.5801190448546567, "eval_runtime": 10.7278, "eval_samples_per_second": 442.68, "eval_steps_per_second": 13.889, "step": 23400 }, { "epoch": 157.11447811447812, "grad_norm": 159.6084747314453, "learning_rate": 2.1985329646206753e-07, "loss": 2.5193, "step": 23410 }, { "epoch": 157.1818181818182, "grad_norm": 65.19659423828125, "learning_rate": 2.1919376772271924e-07, "loss": 2.3927, "step": 23420 }, { "epoch": 157.24915824915826, "grad_norm": 125.791015625, "learning_rate": 2.1853510793097374e-07, "loss": 2.1983, "step": 23430 }, { "epoch": 157.31649831649833, "grad_norm": 215.3560333251953, "learning_rate": 2.1787731781984343e-07, "loss": 2.2996, "step": 23440 }, { "epoch": 157.3838383838384, "grad_norm": 72.07523345947266, "learning_rate": 2.172203981213735e-07, "loss": 2.329, "step": 23450 }, { "epoch": 157.45117845117846, "grad_norm": 158.90322875976562, "learning_rate": 2.1656434956663995e-07, "loss": 2.3363, "step": 23460 }, { "epoch": 157.5185185185185, "grad_norm": 88.74877166748047, "learning_rate": 2.1590917288574894e-07, "loss": 2.3941, "step": 23470 }, { "epoch": 157.58585858585857, "grad_norm": 112.48695373535156, "learning_rate": 2.1525486880783772e-07, "loss": 2.4476, "step": 23480 }, { "epoch": 157.65319865319864, "grad_norm": 177.3655242919922, "learning_rate": 2.1460143806107068e-07, "loss": 2.3157, "step": 23490 }, { "epoch": 157.7205387205387, "grad_norm": 96.82882690429688, "learning_rate": 2.139488813726419e-07, "loss": 2.5055, "step": 23500 }, { "epoch": 157.7205387205387, "eval_loss": 0.33719608187675476, "eval_mae": 0.4089818000793457, "eval_mse": 0.33719608187675476, "eval_r2": 0.2624419331550598, "eval_rmse": 0.5806858719451979, "eval_runtime": 10.724, "eval_samples_per_second": 442.838, "eval_steps_per_second": 13.894, "step": 23500 }, { "epoch": 157.78787878787878, "grad_norm": 64.49819946289062, "learning_rate": 2.1329719946877155e-07, "loss": 2.4093, "step": 23510 }, { "epoch": 157.85521885521885, "grad_norm": 181.0876007080078, "learning_rate": 2.1264639307470643e-07, "loss": 2.2147, "step": 23520 }, { "epoch": 157.92255892255892, "grad_norm": 128.9366912841797, "learning_rate": 2.1199646291471996e-07, "loss": 2.2483, "step": 23530 }, { "epoch": 157.989898989899, "grad_norm": 107.03459167480469, "learning_rate": 2.113474097121093e-07, "loss": 2.3721, "step": 23540 }, { "epoch": 158.05387205387206, "grad_norm": 49.166072845458984, "learning_rate": 2.1069923418919577e-07, "loss": 2.2836, "step": 23550 }, { "epoch": 158.12121212121212, "grad_norm": 163.16419982910156, "learning_rate": 2.1005193706732495e-07, "loss": 2.4076, "step": 23560 }, { "epoch": 158.1885521885522, "grad_norm": 164.8684539794922, "learning_rate": 2.0940551906686342e-07, "loss": 2.1287, "step": 23570 }, { "epoch": 158.25589225589226, "grad_norm": 129.19239807128906, "learning_rate": 2.0875998090720083e-07, "loss": 2.5867, "step": 23580 }, { "epoch": 158.32323232323233, "grad_norm": 136.60604858398438, "learning_rate": 2.0811532330674664e-07, "loss": 2.3182, "step": 23590 }, { "epoch": 158.3905723905724, "grad_norm": 133.57229614257812, "learning_rate": 2.0747154698293024e-07, "loss": 2.5277, "step": 23600 }, { "epoch": 158.3905723905724, "eval_loss": 0.3365316689014435, "eval_mae": 0.4080581068992615, "eval_mse": 0.3365316689014435, "eval_r2": 0.2638952136039734, "eval_rmse": 0.5801134965689416, "eval_runtime": 10.7383, "eval_samples_per_second": 442.248, "eval_steps_per_second": 13.876, "step": 23600 }, { "epoch": 158.45791245791247, "grad_norm": 156.251220703125, "learning_rate": 2.068286526522015e-07, "loss": 2.4316, "step": 23610 }, { "epoch": 158.5252525252525, "grad_norm": 98.08661651611328, "learning_rate": 2.0618664103002746e-07, "loss": 2.3785, "step": 23620 }, { "epoch": 158.59259259259258, "grad_norm": 134.53887939453125, "learning_rate": 2.0554551283089338e-07, "loss": 2.2896, "step": 23630 }, { "epoch": 158.65993265993265, "grad_norm": 112.81904602050781, "learning_rate": 2.0490526876830117e-07, "loss": 2.1662, "step": 23640 }, { "epoch": 158.72727272727272, "grad_norm": 70.0260238647461, "learning_rate": 2.0426590955476875e-07, "loss": 2.3897, "step": 23650 }, { "epoch": 158.7946127946128, "grad_norm": 181.40335083007812, "learning_rate": 2.0362743590182986e-07, "loss": 2.2831, "step": 23660 }, { "epoch": 158.86195286195286, "grad_norm": 112.0673599243164, "learning_rate": 2.0298984852003175e-07, "loss": 2.3873, "step": 23670 }, { "epoch": 158.92929292929293, "grad_norm": 131.52574157714844, "learning_rate": 2.0235314811893655e-07, "loss": 2.5923, "step": 23680 }, { "epoch": 158.996632996633, "grad_norm": 84.11715698242188, "learning_rate": 2.0171733540711845e-07, "loss": 2.5625, "step": 23690 }, { "epoch": 159.06060606060606, "grad_norm": 90.01322937011719, "learning_rate": 2.010824110921634e-07, "loss": 2.2411, "step": 23700 }, { "epoch": 159.06060606060606, "eval_loss": 0.3368662893772125, "eval_mae": 0.40775981545448303, "eval_mse": 0.3368662893772125, "eval_r2": 0.2631632685661316, "eval_rmse": 0.5804018344020051, "eval_runtime": 10.7302, "eval_samples_per_second": 442.583, "eval_steps_per_second": 13.886, "step": 23700 }, { "epoch": 159.12794612794613, "grad_norm": 93.04595184326172, "learning_rate": 2.0044837588066977e-07, "loss": 2.4058, "step": 23710 }, { "epoch": 159.1952861952862, "grad_norm": 209.85018920898438, "learning_rate": 1.998152304782459e-07, "loss": 2.5771, "step": 23720 }, { "epoch": 159.26262626262627, "grad_norm": 238.7420196533203, "learning_rate": 1.9918297558950926e-07, "loss": 2.2966, "step": 23730 }, { "epoch": 159.32996632996634, "grad_norm": 167.3374786376953, "learning_rate": 1.9855161191808756e-07, "loss": 2.1837, "step": 23740 }, { "epoch": 159.3973063973064, "grad_norm": 137.87020874023438, "learning_rate": 1.9792114016661542e-07, "loss": 2.2632, "step": 23750 }, { "epoch": 159.46464646464648, "grad_norm": 31.15089225769043, "learning_rate": 1.9729156103673618e-07, "loss": 2.3484, "step": 23760 }, { "epoch": 159.53198653198652, "grad_norm": 199.7539825439453, "learning_rate": 1.9666287522909853e-07, "loss": 2.4281, "step": 23770 }, { "epoch": 159.5993265993266, "grad_norm": 156.38111877441406, "learning_rate": 1.9603508344335727e-07, "loss": 2.6181, "step": 23780 }, { "epoch": 159.66666666666666, "grad_norm": 173.66314697265625, "learning_rate": 1.9540818637817325e-07, "loss": 2.2318, "step": 23790 }, { "epoch": 159.73400673400673, "grad_norm": 144.55044555664062, "learning_rate": 1.9478218473121044e-07, "loss": 2.2654, "step": 23800 }, { "epoch": 159.73400673400673, "eval_loss": 0.33660104870796204, "eval_mae": 0.4082202911376953, "eval_mse": 0.33660101890563965, "eval_r2": 0.2637435793876648, "eval_rmse": 0.5801732662796861, "eval_runtime": 10.7371, "eval_samples_per_second": 442.296, "eval_steps_per_second": 13.877, "step": 23800 }, { "epoch": 159.8013468013468, "grad_norm": 50.6551399230957, "learning_rate": 1.941570791991365e-07, "loss": 2.5644, "step": 23810 }, { "epoch": 159.86868686868686, "grad_norm": 123.62770080566406, "learning_rate": 1.9353287047762245e-07, "loss": 2.5947, "step": 23820 }, { "epoch": 159.93602693602693, "grad_norm": 61.61752700805664, "learning_rate": 1.9290955926134057e-07, "loss": 2.2561, "step": 23830 }, { "epoch": 160.0, "grad_norm": 313.4844055175781, "learning_rate": 1.9228714624396503e-07, "loss": 2.1564, "step": 23840 }, { "epoch": 160.06734006734007, "grad_norm": 196.80072021484375, "learning_rate": 1.9166563211816965e-07, "loss": 2.5686, "step": 23850 }, { "epoch": 160.13468013468014, "grad_norm": 195.38194274902344, "learning_rate": 1.9104501757562806e-07, "loss": 2.6168, "step": 23860 }, { "epoch": 160.2020202020202, "grad_norm": 312.32391357421875, "learning_rate": 1.9042530330701344e-07, "loss": 2.2406, "step": 23870 }, { "epoch": 160.26936026936028, "grad_norm": 288.4095153808594, "learning_rate": 1.8980649000199622e-07, "loss": 2.2626, "step": 23880 }, { "epoch": 160.33670033670035, "grad_norm": 59.112552642822266, "learning_rate": 1.8918857834924451e-07, "loss": 2.3433, "step": 23890 }, { "epoch": 160.40404040404042, "grad_norm": 125.6786880493164, "learning_rate": 1.8857156903642303e-07, "loss": 2.264, "step": 23900 }, { "epoch": 160.40404040404042, "eval_loss": 0.3387072682380676, "eval_mae": 0.4099055826663971, "eval_mse": 0.3387072682380676, "eval_r2": 0.2591365575790405, "eval_rmse": 0.5819856254565637, "eval_runtime": 10.7393, "eval_samples_per_second": 442.209, "eval_steps_per_second": 13.874, "step": 23900 }, { "epoch": 160.47138047138048, "grad_norm": 85.7762451171875, "learning_rate": 1.879554627501919e-07, "loss": 2.2598, "step": 23910 }, { "epoch": 160.53872053872053, "grad_norm": 132.02236938476562, "learning_rate": 1.8734026017620718e-07, "loss": 2.3505, "step": 23920 }, { "epoch": 160.6060606060606, "grad_norm": 222.99317932128906, "learning_rate": 1.8672596199911805e-07, "loss": 2.4424, "step": 23930 }, { "epoch": 160.67340067340066, "grad_norm": 101.09620666503906, "learning_rate": 1.8611256890256844e-07, "loss": 2.4596, "step": 23940 }, { "epoch": 160.74074074074073, "grad_norm": 228.9169921875, "learning_rate": 1.8550008156919395e-07, "loss": 2.4273, "step": 23950 }, { "epoch": 160.8080808080808, "grad_norm": 125.69520568847656, "learning_rate": 1.8488850068062244e-07, "loss": 2.1895, "step": 23960 }, { "epoch": 160.87542087542087, "grad_norm": 271.81781005859375, "learning_rate": 1.8427782691747362e-07, "loss": 2.5701, "step": 23970 }, { "epoch": 160.94276094276094, "grad_norm": 271.92498779296875, "learning_rate": 1.8366806095935704e-07, "loss": 2.5119, "step": 23980 }, { "epoch": 161.006734006734, "grad_norm": 174.77963256835938, "learning_rate": 1.830592034848717e-07, "loss": 1.9823, "step": 23990 }, { "epoch": 161.07407407407408, "grad_norm": 318.6176452636719, "learning_rate": 1.8245125517160665e-07, "loss": 2.7132, "step": 24000 }, { "epoch": 161.07407407407408, "eval_loss": 0.33594098687171936, "eval_mae": 0.40732312202453613, "eval_mse": 0.33594098687171936, "eval_r2": 0.26518726348876953, "eval_rmse": 0.5796041639530546, "eval_runtime": 10.721, "eval_samples_per_second": 442.962, "eval_steps_per_second": 13.898, "step": 24000 }, { "epoch": 161.14141414141415, "grad_norm": 157.5255126953125, "learning_rate": 1.8184421669613793e-07, "loss": 2.3794, "step": 24010 }, { "epoch": 161.20875420875421, "grad_norm": 135.75927734375, "learning_rate": 1.8123808873403023e-07, "loss": 2.0819, "step": 24020 }, { "epoch": 161.27609427609428, "grad_norm": 157.0669708251953, "learning_rate": 1.806328719598338e-07, "loss": 2.4168, "step": 24030 }, { "epoch": 161.34343434343435, "grad_norm": 58.99803161621094, "learning_rate": 1.8002856704708536e-07, "loss": 2.3262, "step": 24040 }, { "epoch": 161.41077441077442, "grad_norm": 173.07089233398438, "learning_rate": 1.794251746683072e-07, "loss": 2.3497, "step": 24050 }, { "epoch": 161.4781144781145, "grad_norm": 79.57374572753906, "learning_rate": 1.7882269549500552e-07, "loss": 2.6288, "step": 24060 }, { "epoch": 161.54545454545453, "grad_norm": 122.7425765991211, "learning_rate": 1.7822113019766992e-07, "loss": 2.4389, "step": 24070 }, { "epoch": 161.6127946127946, "grad_norm": 369.85040283203125, "learning_rate": 1.776204794457743e-07, "loss": 2.2981, "step": 24080 }, { "epoch": 161.68013468013467, "grad_norm": 215.61907958984375, "learning_rate": 1.770207439077732e-07, "loss": 2.4256, "step": 24090 }, { "epoch": 161.74747474747474, "grad_norm": 119.0063247680664, "learning_rate": 1.7642192425110393e-07, "loss": 2.367, "step": 24100 }, { "epoch": 161.74747474747474, "eval_loss": 0.3367725908756256, "eval_mae": 0.40848618745803833, "eval_mse": 0.3367725908756256, "eval_r2": 0.26336830854415894, "eval_rmse": 0.5803211101412955, "eval_runtime": 10.7181, "eval_samples_per_second": 443.083, "eval_steps_per_second": 13.902, "step": 24100 }, { "epoch": 161.8148148148148, "grad_norm": 226.42237854003906, "learning_rate": 1.7582402114218353e-07, "loss": 2.0027, "step": 24110 }, { "epoch": 161.88215488215488, "grad_norm": 168.94300842285156, "learning_rate": 1.7522703524640946e-07, "loss": 2.6148, "step": 24120 }, { "epoch": 161.94949494949495, "grad_norm": 127.48029327392578, "learning_rate": 1.7463096722815863e-07, "loss": 2.4724, "step": 24130 }, { "epoch": 162.013468013468, "grad_norm": 224.5193634033203, "learning_rate": 1.7403581775078624e-07, "loss": 1.9399, "step": 24140 }, { "epoch": 162.08080808080808, "grad_norm": 312.4303283691406, "learning_rate": 1.7344158747662497e-07, "loss": 2.1818, "step": 24150 }, { "epoch": 162.14814814814815, "grad_norm": 87.81298065185547, "learning_rate": 1.7284827706698513e-07, "loss": 2.5253, "step": 24160 }, { "epoch": 162.21548821548822, "grad_norm": 38.507408142089844, "learning_rate": 1.72255887182153e-07, "loss": 2.1976, "step": 24170 }, { "epoch": 162.2828282828283, "grad_norm": 148.67735290527344, "learning_rate": 1.7166441848139024e-07, "loss": 2.1501, "step": 24180 }, { "epoch": 162.35016835016836, "grad_norm": 241.87069702148438, "learning_rate": 1.7107387162293375e-07, "loss": 2.515, "step": 24190 }, { "epoch": 162.41750841750843, "grad_norm": 142.26173400878906, "learning_rate": 1.7048424726399391e-07, "loss": 2.3339, "step": 24200 }, { "epoch": 162.41750841750843, "eval_loss": 0.3373616337776184, "eval_mae": 0.4080221354961395, "eval_mse": 0.3373616337776184, "eval_r2": 0.2620798349380493, "eval_rmse": 0.5808284030396743, "eval_runtime": 10.7009, "eval_samples_per_second": 443.796, "eval_steps_per_second": 13.924, "step": 24200 }, { "epoch": 162.4848484848485, "grad_norm": 81.12384796142578, "learning_rate": 1.6989554606075552e-07, "loss": 2.1429, "step": 24210 }, { "epoch": 162.55218855218854, "grad_norm": 72.93904876708984, "learning_rate": 1.693077686683746e-07, "loss": 2.3734, "step": 24220 }, { "epoch": 162.6195286195286, "grad_norm": 49.9387321472168, "learning_rate": 1.6872091574098059e-07, "loss": 2.4711, "step": 24230 }, { "epoch": 162.68686868686868, "grad_norm": 76.23902893066406, "learning_rate": 1.6813498793167303e-07, "loss": 2.6049, "step": 24240 }, { "epoch": 162.75420875420875, "grad_norm": 95.70035552978516, "learning_rate": 1.6754998589252189e-07, "loss": 2.5724, "step": 24250 }, { "epoch": 162.82154882154882, "grad_norm": 60.06526565551758, "learning_rate": 1.6696591027456785e-07, "loss": 2.3475, "step": 24260 }, { "epoch": 162.88888888888889, "grad_norm": 60.71658706665039, "learning_rate": 1.6638276172781962e-07, "loss": 2.6278, "step": 24270 }, { "epoch": 162.95622895622895, "grad_norm": 64.46642303466797, "learning_rate": 1.658005409012543e-07, "loss": 2.3272, "step": 24280 }, { "epoch": 163.02020202020202, "grad_norm": 117.49248504638672, "learning_rate": 1.652192484428172e-07, "loss": 2.2219, "step": 24290 }, { "epoch": 163.0875420875421, "grad_norm": 94.30049133300781, "learning_rate": 1.6463888499941957e-07, "loss": 2.6522, "step": 24300 }, { "epoch": 163.0875420875421, "eval_loss": 0.3365830183029175, "eval_mae": 0.40842917561531067, "eval_mse": 0.33658304810523987, "eval_r2": 0.2637828588485718, "eval_rmse": 0.5801577786302963, "eval_runtime": 10.71, "eval_samples_per_second": 443.417, "eval_steps_per_second": 13.912, "step": 24300 }, { "epoch": 163.15488215488216, "grad_norm": 298.8396911621094, "learning_rate": 1.6405945121693977e-07, "loss": 2.3045, "step": 24310 }, { "epoch": 163.22222222222223, "grad_norm": 117.95197296142578, "learning_rate": 1.634809477402207e-07, "loss": 2.2412, "step": 24320 }, { "epoch": 163.2895622895623, "grad_norm": 124.6522445678711, "learning_rate": 1.6290337521306997e-07, "loss": 2.3902, "step": 24330 }, { "epoch": 163.35690235690237, "grad_norm": 256.4619140625, "learning_rate": 1.6232673427825994e-07, "loss": 2.2378, "step": 24340 }, { "epoch": 163.42424242424244, "grad_norm": 150.68118286132812, "learning_rate": 1.617510255775254e-07, "loss": 2.1915, "step": 24350 }, { "epoch": 163.4915824915825, "grad_norm": 75.3040542602539, "learning_rate": 1.6117624975156385e-07, "loss": 2.4339, "step": 24360 }, { "epoch": 163.55892255892255, "grad_norm": 196.86627197265625, "learning_rate": 1.6060240744003517e-07, "loss": 2.5294, "step": 24370 }, { "epoch": 163.62626262626262, "grad_norm": 65.72200775146484, "learning_rate": 1.600294992815594e-07, "loss": 1.9814, "step": 24380 }, { "epoch": 163.69360269360268, "grad_norm": 140.19886779785156, "learning_rate": 1.5945752591371797e-07, "loss": 2.3924, "step": 24390 }, { "epoch": 163.76094276094275, "grad_norm": 122.47493743896484, "learning_rate": 1.5888648797305126e-07, "loss": 2.2613, "step": 24400 }, { "epoch": 163.76094276094275, "eval_loss": 0.3358140289783478, "eval_mae": 0.40686315298080444, "eval_mse": 0.3358140289783478, "eval_r2": 0.26546502113342285, "eval_rmse": 0.5794946323982197, "eval_runtime": 10.6947, "eval_samples_per_second": 444.052, "eval_steps_per_second": 13.932, "step": 24400 }, { "epoch": 163.82828282828282, "grad_norm": 124.00269317626953, "learning_rate": 1.5831638609505872e-07, "loss": 2.6315, "step": 24410 }, { "epoch": 163.8956228956229, "grad_norm": 185.25906372070312, "learning_rate": 1.5774722091419844e-07, "loss": 2.2281, "step": 24420 }, { "epoch": 163.96296296296296, "grad_norm": 79.86769104003906, "learning_rate": 1.5717899306388593e-07, "loss": 2.5522, "step": 24430 }, { "epoch": 164.02693602693603, "grad_norm": 38.8950080871582, "learning_rate": 1.5661170317649309e-07, "loss": 2.4394, "step": 24440 }, { "epoch": 164.0942760942761, "grad_norm": 53.7888069152832, "learning_rate": 1.56045351883349e-07, "loss": 2.394, "step": 24450 }, { "epoch": 164.16161616161617, "grad_norm": 83.70942687988281, "learning_rate": 1.5547993981473683e-07, "loss": 2.0963, "step": 24460 }, { "epoch": 164.22895622895624, "grad_norm": 228.50238037109375, "learning_rate": 1.5491546759989582e-07, "loss": 2.2187, "step": 24470 }, { "epoch": 164.2962962962963, "grad_norm": 130.05332946777344, "learning_rate": 1.5435193586701811e-07, "loss": 2.3179, "step": 24480 }, { "epoch": 164.36363636363637, "grad_norm": 131.75082397460938, "learning_rate": 1.5378934524325037e-07, "loss": 2.526, "step": 24490 }, { "epoch": 164.43097643097644, "grad_norm": 150.62481689453125, "learning_rate": 1.53227696354691e-07, "loss": 2.1043, "step": 24500 }, { "epoch": 164.43097643097644, "eval_loss": 0.3360461890697479, "eval_mae": 0.4069744050502777, "eval_mse": 0.3360461890697479, "eval_r2": 0.2649571895599365, "eval_rmse": 0.5796949103362457, "eval_runtime": 10.7004, "eval_samples_per_second": 443.817, "eval_steps_per_second": 13.925, "step": 24500 }, { "epoch": 164.4983164983165, "grad_norm": 164.65126037597656, "learning_rate": 1.5266698982639048e-07, "loss": 2.7552, "step": 24510 }, { "epoch": 164.56565656565655, "grad_norm": 240.79339599609375, "learning_rate": 1.5210722628235118e-07, "loss": 2.5595, "step": 24520 }, { "epoch": 164.63299663299662, "grad_norm": 96.67094421386719, "learning_rate": 1.5154840634552535e-07, "loss": 2.3012, "step": 24530 }, { "epoch": 164.7003367003367, "grad_norm": 94.641357421875, "learning_rate": 1.509905306378152e-07, "loss": 2.3948, "step": 24540 }, { "epoch": 164.76767676767676, "grad_norm": 149.3328094482422, "learning_rate": 1.5043359978007265e-07, "loss": 2.6955, "step": 24550 }, { "epoch": 164.83501683501683, "grad_norm": 43.57986831665039, "learning_rate": 1.4987761439209723e-07, "loss": 2.3145, "step": 24560 }, { "epoch": 164.9023569023569, "grad_norm": 435.473876953125, "learning_rate": 1.4932257509263747e-07, "loss": 2.2023, "step": 24570 }, { "epoch": 164.96969696969697, "grad_norm": 84.3860855102539, "learning_rate": 1.4876848249938778e-07, "loss": 2.2274, "step": 24580 }, { "epoch": 165.03367003367003, "grad_norm": 158.25051879882812, "learning_rate": 1.482153372289896e-07, "loss": 2.4742, "step": 24590 }, { "epoch": 165.1010101010101, "grad_norm": 89.28656768798828, "learning_rate": 1.4766313989703028e-07, "loss": 2.2497, "step": 24600 }, { "epoch": 165.1010101010101, "eval_loss": 0.33550912141799927, "eval_mae": 0.40699273347854614, "eval_mse": 0.33550912141799927, "eval_r2": 0.2661318778991699, "eval_rmse": 0.57923149208067, "eval_runtime": 10.713, "eval_samples_per_second": 443.293, "eval_steps_per_second": 13.908, "step": 24600 }, { "epoch": 165.16835016835017, "grad_norm": 51.418235778808594, "learning_rate": 1.471118911180419e-07, "loss": 2.1412, "step": 24610 }, { "epoch": 165.23569023569024, "grad_norm": 161.10617065429688, "learning_rate": 1.4656159150550072e-07, "loss": 2.344, "step": 24620 }, { "epoch": 165.3030303030303, "grad_norm": 211.3642578125, "learning_rate": 1.4601224167182736e-07, "loss": 2.4756, "step": 24630 }, { "epoch": 165.37037037037038, "grad_norm": 218.07955932617188, "learning_rate": 1.454638422283847e-07, "loss": 2.3631, "step": 24640 }, { "epoch": 165.43771043771045, "grad_norm": 98.64012908935547, "learning_rate": 1.4491639378547882e-07, "loss": 2.2476, "step": 24650 }, { "epoch": 165.5050505050505, "grad_norm": 182.74569702148438, "learning_rate": 1.4436989695235647e-07, "loss": 2.4633, "step": 24660 }, { "epoch": 165.57239057239056, "grad_norm": 282.4273986816406, "learning_rate": 1.438243523372058e-07, "loss": 2.5697, "step": 24670 }, { "epoch": 165.63973063973063, "grad_norm": 176.54922485351562, "learning_rate": 1.4327976054715552e-07, "loss": 2.2203, "step": 24680 }, { "epoch": 165.7070707070707, "grad_norm": 154.91441345214844, "learning_rate": 1.427361221882737e-07, "loss": 2.0742, "step": 24690 }, { "epoch": 165.77441077441077, "grad_norm": 99.74866485595703, "learning_rate": 1.4219343786556693e-07, "loss": 2.1889, "step": 24700 }, { "epoch": 165.77441077441077, "eval_loss": 0.33605799078941345, "eval_mae": 0.40768924355506897, "eval_mse": 0.33605799078941345, "eval_r2": 0.264931321144104, "eval_rmse": 0.579705089497594, "eval_runtime": 10.6973, "eval_samples_per_second": 443.944, "eval_steps_per_second": 13.929, "step": 24700 }, { "epoch": 165.84175084175084, "grad_norm": 54.28396224975586, "learning_rate": 1.4165170818298112e-07, "loss": 2.5085, "step": 24710 }, { "epoch": 165.9090909090909, "grad_norm": 115.67529296875, "learning_rate": 1.4111093374339878e-07, "loss": 2.2717, "step": 24720 }, { "epoch": 165.97643097643098, "grad_norm": 229.478271484375, "learning_rate": 1.4057111514863985e-07, "loss": 2.8443, "step": 24730 }, { "epoch": 166.04040404040404, "grad_norm": 75.09101867675781, "learning_rate": 1.4003225299945997e-07, "loss": 2.5011, "step": 24740 }, { "epoch": 166.1077441077441, "grad_norm": 63.33137130737305, "learning_rate": 1.3949434789555148e-07, "loss": 2.3261, "step": 24750 }, { "epoch": 166.17508417508418, "grad_norm": 134.7906951904297, "learning_rate": 1.389574004355405e-07, "loss": 2.2734, "step": 24760 }, { "epoch": 166.24242424242425, "grad_norm": 140.3641357421875, "learning_rate": 1.384214112169877e-07, "loss": 2.5779, "step": 24770 }, { "epoch": 166.30976430976432, "grad_norm": 250.7620849609375, "learning_rate": 1.3788638083638804e-07, "loss": 2.421, "step": 24780 }, { "epoch": 166.3771043771044, "grad_norm": 168.46401977539062, "learning_rate": 1.3735230988916834e-07, "loss": 2.3106, "step": 24790 }, { "epoch": 166.44444444444446, "grad_norm": 142.17306518554688, "learning_rate": 1.368191989696883e-07, "loss": 2.1615, "step": 24800 }, { "epoch": 166.44444444444446, "eval_loss": 0.33584561944007874, "eval_mae": 0.40699589252471924, "eval_mse": 0.33584561944007874, "eval_r2": 0.2653958201408386, "eval_rmse": 0.579521888663473, "eval_runtime": 10.7105, "eval_samples_per_second": 443.395, "eval_steps_per_second": 13.912, "step": 24800 }, { "epoch": 166.5117845117845, "grad_norm": 248.72036743164062, "learning_rate": 1.362870486712393e-07, "loss": 2.2851, "step": 24810 }, { "epoch": 166.57912457912457, "grad_norm": 116.70413970947266, "learning_rate": 1.3575585958604318e-07, "loss": 2.8347, "step": 24820 }, { "epoch": 166.64646464646464, "grad_norm": 109.37145233154297, "learning_rate": 1.3522563230525274e-07, "loss": 2.2827, "step": 24830 }, { "epoch": 166.7138047138047, "grad_norm": 166.38865661621094, "learning_rate": 1.3469636741894973e-07, "loss": 2.2577, "step": 24840 }, { "epoch": 166.78114478114477, "grad_norm": 127.49479675292969, "learning_rate": 1.3416806551614502e-07, "loss": 2.2588, "step": 24850 }, { "epoch": 166.84848484848484, "grad_norm": 108.73330688476562, "learning_rate": 1.3364072718477827e-07, "loss": 2.3499, "step": 24860 }, { "epoch": 166.9158249158249, "grad_norm": 228.16761779785156, "learning_rate": 1.3311435301171615e-07, "loss": 2.1909, "step": 24870 }, { "epoch": 166.98316498316498, "grad_norm": 263.8997802734375, "learning_rate": 1.3258894358275253e-07, "loss": 2.4306, "step": 24880 }, { "epoch": 167.04713804713805, "grad_norm": 56.53512954711914, "learning_rate": 1.3206449948260824e-07, "loss": 2.1779, "step": 24890 }, { "epoch": 167.11447811447812, "grad_norm": 109.30017852783203, "learning_rate": 1.315410212949286e-07, "loss": 2.6282, "step": 24900 }, { "epoch": 167.11447811447812, "eval_loss": 0.3361356258392334, "eval_mae": 0.4079887568950653, "eval_mse": 0.3361356258392334, "eval_r2": 0.2647615075111389, "eval_rmse": 0.5797720464451812, "eval_runtime": 10.7045, "eval_samples_per_second": 443.646, "eval_steps_per_second": 13.919, "step": 24900 }, { "epoch": 167.1818181818182, "grad_norm": 66.58136749267578, "learning_rate": 1.3101850960228533e-07, "loss": 2.2003, "step": 24910 }, { "epoch": 167.24915824915826, "grad_norm": 240.72561645507812, "learning_rate": 1.3049696498617358e-07, "loss": 2.3102, "step": 24920 }, { "epoch": 167.31649831649833, "grad_norm": 138.12355041503906, "learning_rate": 1.299763880270124e-07, "loss": 2.1687, "step": 24930 }, { "epoch": 167.3838383838384, "grad_norm": 140.3155975341797, "learning_rate": 1.2945677930414445e-07, "loss": 2.5399, "step": 24940 }, { "epoch": 167.45117845117846, "grad_norm": 161.88986206054688, "learning_rate": 1.2893813939583442e-07, "loss": 2.5487, "step": 24950 }, { "epoch": 167.5185185185185, "grad_norm": 196.90036010742188, "learning_rate": 1.284204688792686e-07, "loss": 2.0745, "step": 24960 }, { "epoch": 167.58585858585857, "grad_norm": 155.57687377929688, "learning_rate": 1.2790376833055539e-07, "loss": 2.8985, "step": 24970 }, { "epoch": 167.65319865319864, "grad_norm": 80.4607162475586, "learning_rate": 1.2738803832472256e-07, "loss": 2.2375, "step": 24980 }, { "epoch": 167.7205387205387, "grad_norm": 137.4744110107422, "learning_rate": 1.2687327943571868e-07, "loss": 2.4101, "step": 24990 }, { "epoch": 167.78787878787878, "grad_norm": 67.46463775634766, "learning_rate": 1.2635949223641117e-07, "loss": 2.2402, "step": 25000 }, { "epoch": 167.78787878787878, "eval_loss": 0.3357807397842407, "eval_mae": 0.4066804349422455, "eval_mse": 0.3357807397842407, "eval_r2": 0.26553773880004883, "eval_rmse": 0.5794659090785589, "eval_runtime": 10.7035, "eval_samples_per_second": 443.685, "eval_steps_per_second": 13.921, "step": 25000 }, { "epoch": 167.85521885521885, "grad_norm": 192.0625762939453, "learning_rate": 1.2584667729858612e-07, "loss": 2.4509, "step": 25010 }, { "epoch": 167.92255892255892, "grad_norm": 97.55964660644531, "learning_rate": 1.2533483519294763e-07, "loss": 2.0399, "step": 25020 }, { "epoch": 167.989898989899, "grad_norm": 103.82833099365234, "learning_rate": 1.2482396648911686e-07, "loss": 2.4148, "step": 25030 }, { "epoch": 168.05387205387206, "grad_norm": 68.55559539794922, "learning_rate": 1.2431407175563235e-07, "loss": 2.3693, "step": 25040 }, { "epoch": 168.12121212121212, "grad_norm": 109.3213882446289, "learning_rate": 1.2380515155994818e-07, "loss": 2.3355, "step": 25050 }, { "epoch": 168.1885521885522, "grad_norm": 149.34242248535156, "learning_rate": 1.23297206468434e-07, "loss": 2.7695, "step": 25060 }, { "epoch": 168.25589225589226, "grad_norm": 59.84373092651367, "learning_rate": 1.227902370463745e-07, "loss": 2.3019, "step": 25070 }, { "epoch": 168.32323232323233, "grad_norm": 97.18063354492188, "learning_rate": 1.2228424385796798e-07, "loss": 2.2327, "step": 25080 }, { "epoch": 168.3905723905724, "grad_norm": 120.85694122314453, "learning_rate": 1.217792274663273e-07, "loss": 2.1186, "step": 25090 }, { "epoch": 168.45791245791247, "grad_norm": 98.28313446044922, "learning_rate": 1.212751884334773e-07, "loss": 2.4868, "step": 25100 }, { "epoch": 168.45791245791247, "eval_loss": 0.3358403444290161, "eval_mae": 0.4072672724723816, "eval_mse": 0.3358403444290161, "eval_r2": 0.2654074430465698, "eval_rmse": 0.5795173374706024, "eval_runtime": 10.6975, "eval_samples_per_second": 443.935, "eval_steps_per_second": 13.928, "step": 25100 }, { "epoch": 168.5252525252525, "grad_norm": 164.76536560058594, "learning_rate": 1.2077212732035535e-07, "loss": 2.4588, "step": 25110 }, { "epoch": 168.59259259259258, "grad_norm": 76.93311309814453, "learning_rate": 1.202700446868111e-07, "loss": 2.2948, "step": 25120 }, { "epoch": 168.65993265993265, "grad_norm": 71.156494140625, "learning_rate": 1.197689410916045e-07, "loss": 2.2353, "step": 25130 }, { "epoch": 168.72727272727272, "grad_norm": 65.9780044555664, "learning_rate": 1.1926881709240622e-07, "loss": 2.0883, "step": 25140 }, { "epoch": 168.7946127946128, "grad_norm": 167.5558319091797, "learning_rate": 1.1876967324579712e-07, "loss": 2.6113, "step": 25150 }, { "epoch": 168.86195286195286, "grad_norm": 129.76478576660156, "learning_rate": 1.1827151010726644e-07, "loss": 2.2591, "step": 25160 }, { "epoch": 168.92929292929293, "grad_norm": 79.42969512939453, "learning_rate": 1.1777432823121303e-07, "loss": 2.468, "step": 25170 }, { "epoch": 168.996632996633, "grad_norm": 141.1786651611328, "learning_rate": 1.1727812817094296e-07, "loss": 2.3345, "step": 25180 }, { "epoch": 169.06060606060606, "grad_norm": 65.57881164550781, "learning_rate": 1.1678291047866962e-07, "loss": 2.0561, "step": 25190 }, { "epoch": 169.12794612794613, "grad_norm": 110.03031158447266, "learning_rate": 1.1628867570551382e-07, "loss": 2.3538, "step": 25200 }, { "epoch": 169.12794612794613, "eval_loss": 0.336662620306015, "eval_mae": 0.40857359766960144, "eval_mse": 0.336662620306015, "eval_r2": 0.26360881328582764, "eval_rmse": 0.5802263526469777, "eval_runtime": 10.7063, "eval_samples_per_second": 443.572, "eval_steps_per_second": 13.917, "step": 25200 }, { "epoch": 169.1952861952862, "grad_norm": 133.5563201904297, "learning_rate": 1.1579542440150192e-07, "loss": 2.289, "step": 25210 }, { "epoch": 169.26262626262627, "grad_norm": 30.261892318725586, "learning_rate": 1.153031571155656e-07, "loss": 2.386, "step": 25220 }, { "epoch": 169.32996632996634, "grad_norm": 197.88156127929688, "learning_rate": 1.148118743955423e-07, "loss": 2.3234, "step": 25230 }, { "epoch": 169.3973063973064, "grad_norm": 223.23570251464844, "learning_rate": 1.1432157678817278e-07, "loss": 2.1523, "step": 25240 }, { "epoch": 169.46464646464648, "grad_norm": 112.74528503417969, "learning_rate": 1.1383226483910235e-07, "loss": 2.1576, "step": 25250 }, { "epoch": 169.53198653198652, "grad_norm": 314.8943176269531, "learning_rate": 1.1334393909287875e-07, "loss": 2.4124, "step": 25260 }, { "epoch": 169.5993265993266, "grad_norm": 77.44984436035156, "learning_rate": 1.128566000929524e-07, "loss": 2.3784, "step": 25270 }, { "epoch": 169.66666666666666, "grad_norm": 90.16281127929688, "learning_rate": 1.1237024838167563e-07, "loss": 2.3319, "step": 25280 }, { "epoch": 169.73400673400673, "grad_norm": 123.58952331542969, "learning_rate": 1.1188488450030186e-07, "loss": 2.4032, "step": 25290 }, { "epoch": 169.8013468013468, "grad_norm": 69.79283142089844, "learning_rate": 1.1140050898898557e-07, "loss": 2.7174, "step": 25300 }, { "epoch": 169.8013468013468, "eval_loss": 0.33649933338165283, "eval_mae": 0.4073168933391571, "eval_mse": 0.33649933338165283, "eval_r2": 0.2639659643173218, "eval_rmse": 0.5800856259050493, "eval_runtime": 10.6965, "eval_samples_per_second": 443.978, "eval_steps_per_second": 13.93, "step": 25300 }, { "epoch": 169.86868686868686, "grad_norm": 153.2730712890625, "learning_rate": 1.1091712238678086e-07, "loss": 2.5213, "step": 25310 }, { "epoch": 169.93602693602693, "grad_norm": 150.92098999023438, "learning_rate": 1.1043472523164144e-07, "loss": 2.538, "step": 25320 }, { "epoch": 170.0, "grad_norm": 133.40316772460938, "learning_rate": 1.0995331806042007e-07, "loss": 2.2222, "step": 25330 }, { "epoch": 170.06734006734007, "grad_norm": 83.06474304199219, "learning_rate": 1.0947290140886756e-07, "loss": 2.5572, "step": 25340 }, { "epoch": 170.13468013468014, "grad_norm": 176.7919921875, "learning_rate": 1.089934758116322e-07, "loss": 2.1496, "step": 25350 }, { "epoch": 170.2020202020202, "grad_norm": 102.96173858642578, "learning_rate": 1.085150418022599e-07, "loss": 2.507, "step": 25360 }, { "epoch": 170.26936026936028, "grad_norm": 70.59110260009766, "learning_rate": 1.080375999131925e-07, "loss": 2.4257, "step": 25370 }, { "epoch": 170.33670033670035, "grad_norm": 144.1227569580078, "learning_rate": 1.0756115067576821e-07, "loss": 2.2677, "step": 25380 }, { "epoch": 170.40404040404042, "grad_norm": 95.35279083251953, "learning_rate": 1.070856946202201e-07, "loss": 2.1652, "step": 25390 }, { "epoch": 170.47138047138048, "grad_norm": 73.17141723632812, "learning_rate": 1.0661123227567592e-07, "loss": 2.0727, "step": 25400 }, { "epoch": 170.47138047138048, "eval_loss": 0.33545175194740295, "eval_mae": 0.40672069787979126, "eval_mse": 0.33545172214508057, "eval_r2": 0.26625746488571167, "eval_rmse": 0.5791819421780003, "eval_runtime": 10.6985, "eval_samples_per_second": 443.896, "eval_steps_per_second": 13.927, "step": 25400 }, { "epoch": 170.53872053872053, "grad_norm": 78.82610321044922, "learning_rate": 1.0613776417015818e-07, "loss": 2.413, "step": 25410 }, { "epoch": 170.6060606060606, "grad_norm": 69.23065185546875, "learning_rate": 1.0566529083058217e-07, "loss": 2.1756, "step": 25420 }, { "epoch": 170.67340067340066, "grad_norm": 231.30006408691406, "learning_rate": 1.0519381278275641e-07, "loss": 2.4496, "step": 25430 }, { "epoch": 170.74074074074073, "grad_norm": 229.89454650878906, "learning_rate": 1.0472333055138194e-07, "loss": 2.1293, "step": 25440 }, { "epoch": 170.8080808080808, "grad_norm": 174.11582946777344, "learning_rate": 1.0425384466005116e-07, "loss": 2.3655, "step": 25450 }, { "epoch": 170.87542087542087, "grad_norm": 93.82209014892578, "learning_rate": 1.0378535563124836e-07, "loss": 2.5493, "step": 25460 }, { "epoch": 170.94276094276094, "grad_norm": 135.4002227783203, "learning_rate": 1.0331786398634756e-07, "loss": 2.7538, "step": 25470 }, { "epoch": 171.006734006734, "grad_norm": 114.00888061523438, "learning_rate": 1.028513702456133e-07, "loss": 2.3271, "step": 25480 }, { "epoch": 171.07407407407408, "grad_norm": 247.83607482910156, "learning_rate": 1.0238587492819973e-07, "loss": 2.4377, "step": 25490 }, { "epoch": 171.14141414141415, "grad_norm": 150.4469451904297, "learning_rate": 1.0192137855214956e-07, "loss": 2.5555, "step": 25500 }, { "epoch": 171.14141414141415, "eval_loss": 0.3363090753555298, "eval_mae": 0.40783554315567017, "eval_mse": 0.3363090753555298, "eval_r2": 0.26438212394714355, "eval_rmse": 0.5799216113885822, "eval_runtime": 10.7446, "eval_samples_per_second": 441.988, "eval_steps_per_second": 13.867, "step": 25500 }, { "epoch": 171.20875420875421, "grad_norm": 103.7740249633789, "learning_rate": 1.0145788163439361e-07, "loss": 2.0848, "step": 25510 }, { "epoch": 171.27609427609428, "grad_norm": 159.33047485351562, "learning_rate": 1.0099538469075098e-07, "loss": 2.6254, "step": 25520 }, { "epoch": 171.34343434343435, "grad_norm": 97.07112884521484, "learning_rate": 1.0053388823592746e-07, "loss": 2.687, "step": 25530 }, { "epoch": 171.41077441077442, "grad_norm": 90.41951751708984, "learning_rate": 1.0007339278351567e-07, "loss": 2.3654, "step": 25540 }, { "epoch": 171.4781144781145, "grad_norm": 124.10420989990234, "learning_rate": 9.961389884599414e-08, "loss": 2.2076, "step": 25550 }, { "epoch": 171.54545454545453, "grad_norm": 303.9523620605469, "learning_rate": 9.915540693472679e-08, "loss": 2.1209, "step": 25560 }, { "epoch": 171.6127946127946, "grad_norm": 132.58868408203125, "learning_rate": 9.869791755996237e-08, "loss": 2.1228, "step": 25570 }, { "epoch": 171.68013468013467, "grad_norm": 240.4607696533203, "learning_rate": 9.824143123083384e-08, "loss": 2.2179, "step": 25580 }, { "epoch": 171.74747474747474, "grad_norm": 60.936912536621094, "learning_rate": 9.778594845535837e-08, "loss": 2.5422, "step": 25590 }, { "epoch": 171.8148148148148, "grad_norm": 161.23724365234375, "learning_rate": 9.733146974043571e-08, "loss": 2.2783, "step": 25600 }, { "epoch": 171.8148148148148, "eval_loss": 0.3356657028198242, "eval_mae": 0.4072060286998749, "eval_mse": 0.3356657028198242, "eval_r2": 0.2657893896102905, "eval_rmse": 0.5793666393742604, "eval_runtime": 10.6951, "eval_samples_per_second": 444.033, "eval_steps_per_second": 13.932, "step": 25600 }, { "epoch": 171.88215488215488, "grad_norm": 201.7664031982422, "learning_rate": 9.687799559184839e-08, "loss": 2.6612, "step": 25610 }, { "epoch": 171.94949494949495, "grad_norm": 82.84647369384766, "learning_rate": 9.642552651426128e-08, "loss": 2.2723, "step": 25620 }, { "epoch": 172.013468013468, "grad_norm": 124.26222229003906, "learning_rate": 9.597406301122002e-08, "loss": 2.1857, "step": 25630 }, { "epoch": 172.08080808080808, "grad_norm": 168.51791381835938, "learning_rate": 9.552360558515216e-08, "loss": 2.0312, "step": 25640 }, { "epoch": 172.14814814814815, "grad_norm": 159.89559936523438, "learning_rate": 9.507415473736469e-08, "loss": 2.2576, "step": 25650 }, { "epoch": 172.21548821548822, "grad_norm": 90.50725555419922, "learning_rate": 9.462571096804473e-08, "loss": 2.4318, "step": 25660 }, { "epoch": 172.2828282828283, "grad_norm": 101.57756805419922, "learning_rate": 9.417827477625895e-08, "loss": 2.4138, "step": 25670 }, { "epoch": 172.35016835016836, "grad_norm": 154.2160186767578, "learning_rate": 9.373184665995215e-08, "loss": 2.0989, "step": 25680 }, { "epoch": 172.41750841750843, "grad_norm": 153.33883666992188, "learning_rate": 9.328642711594747e-08, "loss": 2.7287, "step": 25690 }, { "epoch": 172.4848484848485, "grad_norm": 145.2530517578125, "learning_rate": 9.284201663994595e-08, "loss": 2.3446, "step": 25700 }, { "epoch": 172.4848484848485, "eval_loss": 0.3357902765274048, "eval_mae": 0.40738219022750854, "eval_mse": 0.3357902765274048, "eval_r2": 0.26551687717437744, "eval_rmse": 0.5794741379280052, "eval_runtime": 10.6883, "eval_samples_per_second": 444.317, "eval_steps_per_second": 13.94, "step": 25700 }, { "epoch": 172.55218855218854, "grad_norm": 182.78111267089844, "learning_rate": 9.239861572652508e-08, "loss": 2.2518, "step": 25710 }, { "epoch": 172.6195286195286, "grad_norm": 207.1297149658203, "learning_rate": 9.195622486913934e-08, "loss": 2.4903, "step": 25720 }, { "epoch": 172.68686868686868, "grad_norm": 114.96085357666016, "learning_rate": 9.151484456011893e-08, "loss": 2.3387, "step": 25730 }, { "epoch": 172.75420875420875, "grad_norm": 144.9616241455078, "learning_rate": 9.107447529066903e-08, "loss": 2.3718, "step": 25740 }, { "epoch": 172.82154882154882, "grad_norm": 204.5061492919922, "learning_rate": 9.063511755087061e-08, "loss": 2.2886, "step": 25750 }, { "epoch": 172.88888888888889, "grad_norm": 255.64649963378906, "learning_rate": 9.0196771829678e-08, "loss": 2.4897, "step": 25760 }, { "epoch": 172.95622895622895, "grad_norm": 89.8665542602539, "learning_rate": 8.975943861491941e-08, "loss": 2.723, "step": 25770 }, { "epoch": 173.02020202020202, "grad_norm": 143.68849182128906, "learning_rate": 8.932311839329687e-08, "loss": 2.0096, "step": 25780 }, { "epoch": 173.0875420875421, "grad_norm": 157.66281127929688, "learning_rate": 8.888781165038428e-08, "loss": 2.4371, "step": 25790 }, { "epoch": 173.15488215488216, "grad_norm": 186.8685760498047, "learning_rate": 8.845351887062846e-08, "loss": 2.36, "step": 25800 }, { "epoch": 173.15488215488216, "eval_loss": 0.33586278557777405, "eval_mae": 0.4072350561618805, "eval_mse": 0.33586281538009644, "eval_r2": 0.2653582692146301, "eval_rmse": 0.5795367247898069, "eval_runtime": 10.6901, "eval_samples_per_second": 444.243, "eval_steps_per_second": 13.938, "step": 25800 }, { "epoch": 173.22222222222223, "grad_norm": 113.84757232666016, "learning_rate": 8.802024053734702e-08, "loss": 2.1533, "step": 25810 }, { "epoch": 173.2895622895623, "grad_norm": 272.9006042480469, "learning_rate": 8.758797713272903e-08, "loss": 2.4172, "step": 25820 }, { "epoch": 173.35690235690237, "grad_norm": 187.99978637695312, "learning_rate": 8.7156729137834e-08, "loss": 2.5352, "step": 25830 }, { "epoch": 173.42424242424244, "grad_norm": 198.02931213378906, "learning_rate": 8.672649703259127e-08, "loss": 2.4218, "step": 25840 }, { "epoch": 173.4915824915825, "grad_norm": 382.5050354003906, "learning_rate": 8.629728129579994e-08, "loss": 2.2839, "step": 25850 }, { "epoch": 173.55892255892255, "grad_norm": 82.95073699951172, "learning_rate": 8.586908240512769e-08, "loss": 2.3184, "step": 25860 }, { "epoch": 173.62626262626262, "grad_norm": 65.39875793457031, "learning_rate": 8.544190083711045e-08, "loss": 2.1639, "step": 25870 }, { "epoch": 173.69360269360268, "grad_norm": 133.2843475341797, "learning_rate": 8.50157370671526e-08, "loss": 2.2511, "step": 25880 }, { "epoch": 173.76094276094275, "grad_norm": 257.5234069824219, "learning_rate": 8.45905915695252e-08, "loss": 2.8508, "step": 25890 }, { "epoch": 173.82828282828282, "grad_norm": 135.7627410888672, "learning_rate": 8.416646481736644e-08, "loss": 2.026, "step": 25900 }, { "epoch": 173.82828282828282, "eval_loss": 0.33613306283950806, "eval_mae": 0.40716782212257385, "eval_mse": 0.33613306283950806, "eval_r2": 0.2647671699523926, "eval_rmse": 0.5797698360897262, "eval_runtime": 10.6906, "eval_samples_per_second": 444.222, "eval_steps_per_second": 13.937, "step": 25900 }, { "epoch": 173.8956228956229, "grad_norm": 43.71229934692383, "learning_rate": 8.374335728268078e-08, "loss": 2.2573, "step": 25910 }, { "epoch": 173.96296296296296, "grad_norm": 112.4022216796875, "learning_rate": 8.332126943633777e-08, "loss": 2.5256, "step": 25920 }, { "epoch": 174.02693602693603, "grad_norm": 193.66246032714844, "learning_rate": 8.290020174807333e-08, "loss": 2.3792, "step": 25930 }, { "epoch": 174.0942760942761, "grad_norm": 73.66658782958984, "learning_rate": 8.248015468648706e-08, "loss": 2.6299, "step": 25940 }, { "epoch": 174.16161616161617, "grad_norm": 222.6166229248047, "learning_rate": 8.20611287190428e-08, "loss": 2.1682, "step": 25950 }, { "epoch": 174.22895622895624, "grad_norm": 60.437103271484375, "learning_rate": 8.164312431206888e-08, "loss": 2.0594, "step": 25960 }, { "epoch": 174.2962962962963, "grad_norm": 42.27448654174805, "learning_rate": 8.122614193075572e-08, "loss": 2.2035, "step": 25970 }, { "epoch": 174.36363636363637, "grad_norm": 185.7076873779297, "learning_rate": 8.081018203915712e-08, "loss": 2.4499, "step": 25980 }, { "epoch": 174.43097643097644, "grad_norm": 170.18028259277344, "learning_rate": 8.039524510018847e-08, "loss": 2.5207, "step": 25990 }, { "epoch": 174.4983164983165, "grad_norm": 166.6307830810547, "learning_rate": 7.998133157562669e-08, "loss": 2.3027, "step": 26000 }, { "epoch": 174.4983164983165, "eval_loss": 0.33595147728919983, "eval_mae": 0.4078233242034912, "eval_mse": 0.3359515070915222, "eval_r2": 0.26516425609588623, "eval_rmse": 0.5796132392307152, "eval_runtime": 10.6956, "eval_samples_per_second": 444.015, "eval_steps_per_second": 13.931, "step": 26000 }, { "epoch": 174.56565656565655, "grad_norm": 58.17460250854492, "learning_rate": 7.956844192611034e-08, "loss": 2.2684, "step": 26010 }, { "epoch": 174.63299663299662, "grad_norm": 299.7349548339844, "learning_rate": 7.915657661113795e-08, "loss": 2.4253, "step": 26020 }, { "epoch": 174.7003367003367, "grad_norm": 132.77671813964844, "learning_rate": 7.87457360890682e-08, "loss": 2.3065, "step": 26030 }, { "epoch": 174.76767676767676, "grad_norm": 95.25971984863281, "learning_rate": 7.833592081711971e-08, "loss": 2.3386, "step": 26040 }, { "epoch": 174.83501683501683, "grad_norm": 53.22066116333008, "learning_rate": 7.792713125136929e-08, "loss": 2.5438, "step": 26050 }, { "epoch": 174.9023569023569, "grad_norm": 92.96481323242188, "learning_rate": 7.751936784675316e-08, "loss": 2.4369, "step": 26060 }, { "epoch": 174.96969696969697, "grad_norm": 188.43043518066406, "learning_rate": 7.7112631057065e-08, "loss": 2.2047, "step": 26070 }, { "epoch": 175.03367003367003, "grad_norm": 102.00141906738281, "learning_rate": 7.670692133495593e-08, "loss": 2.3689, "step": 26080 }, { "epoch": 175.1010101010101, "grad_norm": 141.08120727539062, "learning_rate": 7.630223913193456e-08, "loss": 2.3817, "step": 26090 }, { "epoch": 175.16835016835017, "grad_norm": 130.90415954589844, "learning_rate": 7.589858489836554e-08, "loss": 2.4157, "step": 26100 }, { "epoch": 175.16835016835017, "eval_loss": 0.33676329255104065, "eval_mae": 0.4079859256744385, "eval_mse": 0.33676329255104065, "eval_r2": 0.26338857412338257, "eval_rmse": 0.5803130987243358, "eval_runtime": 10.6924, "eval_samples_per_second": 444.146, "eval_steps_per_second": 13.935, "step": 26100 }, { "epoch": 175.23569023569024, "grad_norm": 109.05827331542969, "learning_rate": 7.549595908346962e-08, "loss": 2.4866, "step": 26110 }, { "epoch": 175.3030303030303, "grad_norm": 428.12384033203125, "learning_rate": 7.509436213532306e-08, "loss": 2.4533, "step": 26120 }, { "epoch": 175.37037037037038, "grad_norm": 93.30845642089844, "learning_rate": 7.469379450085688e-08, "loss": 2.6081, "step": 26130 }, { "epoch": 175.43771043771045, "grad_norm": 95.88896179199219, "learning_rate": 7.429425662585731e-08, "loss": 2.2655, "step": 26140 }, { "epoch": 175.5050505050505, "grad_norm": 59.99432373046875, "learning_rate": 7.389574895496387e-08, "loss": 2.126, "step": 26150 }, { "epoch": 175.57239057239056, "grad_norm": 88.37857818603516, "learning_rate": 7.349827193166946e-08, "loss": 2.2203, "step": 26160 }, { "epoch": 175.63973063973063, "grad_norm": 44.3857536315918, "learning_rate": 7.310182599832094e-08, "loss": 2.3138, "step": 26170 }, { "epoch": 175.7070707070707, "grad_norm": 154.32191467285156, "learning_rate": 7.270641159611645e-08, "loss": 2.2928, "step": 26180 }, { "epoch": 175.77441077441077, "grad_norm": 63.59320068359375, "learning_rate": 7.231202916510737e-08, "loss": 2.345, "step": 26190 }, { "epoch": 175.84175084175084, "grad_norm": 88.37818908691406, "learning_rate": 7.19186791441957e-08, "loss": 2.1225, "step": 26200 }, { "epoch": 175.84175084175084, "eval_loss": 0.33609533309936523, "eval_mae": 0.40733641386032104, "eval_mse": 0.33609533309936523, "eval_r2": 0.2648496627807617, "eval_rmse": 0.5797372966261229, "eval_runtime": 10.6892, "eval_samples_per_second": 444.28, "eval_steps_per_second": 13.939, "step": 26200 }, { "epoch": 175.9090909090909, "grad_norm": 225.8782501220703, "learning_rate": 7.152636197113459e-08, "loss": 2.4474, "step": 26210 }, { "epoch": 175.97643097643098, "grad_norm": 106.9380874633789, "learning_rate": 7.11350780825285e-08, "loss": 2.5517, "step": 26220 }, { "epoch": 176.04040404040404, "grad_norm": 97.05534362792969, "learning_rate": 7.074482791383107e-08, "loss": 2.1778, "step": 26230 }, { "epoch": 176.1077441077441, "grad_norm": 260.3788146972656, "learning_rate": 7.035561189934592e-08, "loss": 2.3437, "step": 26240 }, { "epoch": 176.17508417508418, "grad_norm": 290.9968566894531, "learning_rate": 6.996743047222586e-08, "loss": 2.0852, "step": 26250 }, { "epoch": 176.24242424242425, "grad_norm": 90.26624298095703, "learning_rate": 6.958028406447202e-08, "loss": 2.1047, "step": 26260 }, { "epoch": 176.30976430976432, "grad_norm": 271.0270080566406, "learning_rate": 6.91941731069342e-08, "loss": 2.2684, "step": 26270 }, { "epoch": 176.3771043771044, "grad_norm": 137.84326171875, "learning_rate": 6.880909802930923e-08, "loss": 2.4601, "step": 26280 }, { "epoch": 176.44444444444446, "grad_norm": 133.1082000732422, "learning_rate": 6.842505926014131e-08, "loss": 2.7745, "step": 26290 }, { "epoch": 176.5117845117845, "grad_norm": 175.322509765625, "learning_rate": 6.804205722682189e-08, "loss": 2.4449, "step": 26300 }, { "epoch": 176.5117845117845, "eval_loss": 0.33543846011161804, "eval_mae": 0.40637513995170593, "eval_mse": 0.33543846011161804, "eval_r2": 0.2662864923477173, "eval_rmse": 0.579170493129284, "eval_runtime": 10.6886, "eval_samples_per_second": 444.304, "eval_steps_per_second": 13.94, "step": 26300 }, { "epoch": 176.57912457912457, "grad_norm": 135.810302734375, "learning_rate": 6.76600923555879e-08, "loss": 2.3034, "step": 26310 }, { "epoch": 176.64646464646464, "grad_norm": 139.85275268554688, "learning_rate": 6.72791650715222e-08, "loss": 2.7163, "step": 26320 }, { "epoch": 176.7138047138047, "grad_norm": 172.198486328125, "learning_rate": 6.689927579855359e-08, "loss": 2.4116, "step": 26330 }, { "epoch": 176.78114478114477, "grad_norm": 187.60955810546875, "learning_rate": 6.652042495945454e-08, "loss": 2.1343, "step": 26340 }, { "epoch": 176.84848484848484, "grad_norm": 266.7928466796875, "learning_rate": 6.614261297584312e-08, "loss": 2.4451, "step": 26350 }, { "epoch": 176.9158249158249, "grad_norm": 119.13152313232422, "learning_rate": 6.576584026818022e-08, "loss": 2.333, "step": 26360 }, { "epoch": 176.98316498316498, "grad_norm": 442.3548889160156, "learning_rate": 6.539010725577066e-08, "loss": 2.3943, "step": 26370 }, { "epoch": 177.04713804713805, "grad_norm": 65.32048034667969, "learning_rate": 6.501541435676239e-08, "loss": 2.2182, "step": 26380 }, { "epoch": 177.11447811447812, "grad_norm": 205.89569091796875, "learning_rate": 6.464176198814497e-08, "loss": 2.3892, "step": 26390 }, { "epoch": 177.1818181818182, "grad_norm": 54.696842193603516, "learning_rate": 6.4269150565751e-08, "loss": 2.0722, "step": 26400 }, { "epoch": 177.1818181818182, "eval_loss": 0.3366211950778961, "eval_mae": 0.40746963024139404, "eval_mse": 0.3366211950778961, "eval_r2": 0.263699471950531, "eval_rmse": 0.5801906540766545, "eval_runtime": 10.674, "eval_samples_per_second": 444.914, "eval_steps_per_second": 13.959, "step": 26400 }, { "epoch": 177.24915824915826, "grad_norm": 56.78644561767578, "learning_rate": 6.389758050425409e-08, "loss": 2.3574, "step": 26410 }, { "epoch": 177.31649831649833, "grad_norm": 134.6973876953125, "learning_rate": 6.35270522171688e-08, "loss": 1.9666, "step": 26420 }, { "epoch": 177.3838383838384, "grad_norm": 182.80731201171875, "learning_rate": 6.315756611685086e-08, "loss": 2.42, "step": 26430 }, { "epoch": 177.45117845117846, "grad_norm": 132.44412231445312, "learning_rate": 6.278912261449543e-08, "loss": 2.2626, "step": 26440 }, { "epoch": 177.5185185185185, "grad_norm": 77.9929428100586, "learning_rate": 6.24217221201383e-08, "loss": 2.2227, "step": 26450 }, { "epoch": 177.58585858585857, "grad_norm": 423.8899841308594, "learning_rate": 6.205536504265362e-08, "loss": 2.4797, "step": 26460 }, { "epoch": 177.65319865319864, "grad_norm": 211.29220581054688, "learning_rate": 6.169005178975473e-08, "loss": 2.2347, "step": 26470 }, { "epoch": 177.7205387205387, "grad_norm": 189.717529296875, "learning_rate": 6.132578276799349e-08, "loss": 2.4403, "step": 26480 }, { "epoch": 177.78787878787878, "grad_norm": 93.92589569091797, "learning_rate": 6.096255838275933e-08, "loss": 2.6774, "step": 26490 }, { "epoch": 177.85521885521885, "grad_norm": 333.8948059082031, "learning_rate": 6.060037903827919e-08, "loss": 2.492, "step": 26500 }, { "epoch": 177.85521885521885, "eval_loss": 0.33643701672554016, "eval_mae": 0.4078582227230072, "eval_mse": 0.33643701672554016, "eval_r2": 0.2641022801399231, "eval_rmse": 0.5800319100924881, "eval_runtime": 10.6975, "eval_samples_per_second": 443.936, "eval_steps_per_second": 13.928, "step": 26500 }, { "epoch": 177.92255892255892, "grad_norm": 159.4984588623047, "learning_rate": 6.023924513761725e-08, "loss": 2.3253, "step": 26510 }, { "epoch": 177.989898989899, "grad_norm": 179.34170532226562, "learning_rate": 5.987915708267377e-08, "loss": 2.4428, "step": 26520 }, { "epoch": 178.05387205387206, "grad_norm": 53.41549301147461, "learning_rate": 5.952011527418566e-08, "loss": 2.1861, "step": 26530 }, { "epoch": 178.12121212121212, "grad_norm": 55.61179733276367, "learning_rate": 5.916212011172506e-08, "loss": 2.3279, "step": 26540 }, { "epoch": 178.1885521885522, "grad_norm": 106.57039642333984, "learning_rate": 5.880517199369928e-08, "loss": 2.2137, "step": 26550 }, { "epoch": 178.25589225589226, "grad_norm": 161.14637756347656, "learning_rate": 5.844927131735089e-08, "loss": 2.3258, "step": 26560 }, { "epoch": 178.32323232323233, "grad_norm": 78.02250671386719, "learning_rate": 5.809441847875629e-08, "loss": 2.4591, "step": 26570 }, { "epoch": 178.3905723905724, "grad_norm": 191.57273864746094, "learning_rate": 5.7740613872825696e-08, "loss": 2.5391, "step": 26580 }, { "epoch": 178.45791245791247, "grad_norm": 76.08218383789062, "learning_rate": 5.738785789330336e-08, "loss": 2.3591, "step": 26590 }, { "epoch": 178.5252525252525, "grad_norm": 135.3099822998047, "learning_rate": 5.70361509327657e-08, "loss": 2.5637, "step": 26600 }, { "epoch": 178.5252525252525, "eval_loss": 0.33519282937049866, "eval_mae": 0.40663206577301025, "eval_mse": 0.33519282937049866, "eval_r2": 0.2668237090110779, "eval_rmse": 0.5789584003799397, "eval_runtime": 10.693, "eval_samples_per_second": 444.124, "eval_steps_per_second": 13.934, "step": 26600 } ], "logging_steps": 10, "max_steps": 29800, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 20, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1523036474688723e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }