{ "best_global_step": 6000, "best_metric": 0.5907269988445212, "best_model_checkpoint": "output/Qwen/Qwen3-Reranker-0.6B-finetune-lower-max-len/checkpoint-6000", "epoch": 2.949852507374631, "eval_steps": 1000, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 1.3867249488830566, "eval_runtime": 68.3298, "eval_samples_per_second": 119.07, "eval_spearman": 0.11296483754332637, "eval_steps_per_second": 14.884, "step": 0 }, { "epoch": 0.0004916420845624386, "grad_norm": 111.5, "learning_rate": 0.0, "loss": 1.3211, "step": 1 }, { "epoch": 0.0009832841691248771, "grad_norm": 113.0, "learning_rate": 2.9411764705882354e-08, "loss": 1.4821, "step": 2 }, { "epoch": 0.0014749262536873156, "grad_norm": 83.0, "learning_rate": 5.882352941176471e-08, "loss": 1.2404, "step": 3 }, { "epoch": 0.0019665683382497543, "grad_norm": 123.0, "learning_rate": 8.823529411764706e-08, "loss": 1.4137, "step": 4 }, { "epoch": 0.0024582104228121925, "grad_norm": 99.0, "learning_rate": 1.1764705882352942e-07, "loss": 1.3356, "step": 5 }, { "epoch": 0.0029498525073746312, "grad_norm": 96.0, "learning_rate": 1.4705882352941178e-07, "loss": 1.2033, "step": 6 }, { "epoch": 0.00344149459193707, "grad_norm": 108.0, "learning_rate": 1.764705882352941e-07, "loss": 1.2627, "step": 7 }, { "epoch": 0.003933136676499509, "grad_norm": 131.0, "learning_rate": 2.0588235294117647e-07, "loss": 1.4681, "step": 8 }, { "epoch": 0.004424778761061947, "grad_norm": 116.5, "learning_rate": 2.3529411764705883e-07, "loss": 1.3975, "step": 9 }, { "epoch": 0.004916420845624385, "grad_norm": 117.0, "learning_rate": 2.647058823529412e-07, "loss": 1.3623, "step": 10 }, { "epoch": 0.005408062930186824, "grad_norm": 133.0, "learning_rate": 2.9411764705882356e-07, "loss": 1.497, "step": 11 }, { "epoch": 0.0058997050147492625, "grad_norm": 114.5, "learning_rate": 3.2352941176470586e-07, "loss": 1.4413, "step": 12 }, { "epoch": 0.006391347099311701, "grad_norm": 106.5, "learning_rate": 3.529411764705882e-07, "loss": 1.2284, "step": 13 }, { "epoch": 0.00688298918387414, "grad_norm": 122.5, "learning_rate": 3.8235294117647064e-07, "loss": 1.4642, "step": 14 }, { "epoch": 0.007374631268436578, "grad_norm": 89.5, "learning_rate": 4.1176470588235295e-07, "loss": 1.169, "step": 15 }, { "epoch": 0.007866273352999017, "grad_norm": 110.5, "learning_rate": 4.411764705882353e-07, "loss": 1.3688, "step": 16 }, { "epoch": 0.008357915437561455, "grad_norm": 84.5, "learning_rate": 4.7058823529411767e-07, "loss": 1.0927, "step": 17 }, { "epoch": 0.008849557522123894, "grad_norm": 96.5, "learning_rate": 5e-07, "loss": 1.1751, "step": 18 }, { "epoch": 0.009341199606686333, "grad_norm": 119.0, "learning_rate": 5.294117647058824e-07, "loss": 1.322, "step": 19 }, { "epoch": 0.00983284169124877, "grad_norm": 112.0, "learning_rate": 5.588235294117647e-07, "loss": 1.2833, "step": 20 }, { "epoch": 0.01032448377581121, "grad_norm": 109.5, "learning_rate": 5.882352941176471e-07, "loss": 1.3904, "step": 21 }, { "epoch": 0.010816125860373648, "grad_norm": 105.5, "learning_rate": 6.176470588235295e-07, "loss": 1.3104, "step": 22 }, { "epoch": 0.011307767944936086, "grad_norm": 123.0, "learning_rate": 6.470588235294117e-07, "loss": 1.4829, "step": 23 }, { "epoch": 0.011799410029498525, "grad_norm": 129.0, "learning_rate": 6.764705882352941e-07, "loss": 1.4454, "step": 24 }, { "epoch": 0.012291052114060964, "grad_norm": 128.0, "learning_rate": 7.058823529411765e-07, "loss": 1.5002, "step": 25 }, { "epoch": 0.012782694198623401, "grad_norm": 102.0, "learning_rate": 7.352941176470589e-07, "loss": 1.2847, "step": 26 }, { "epoch": 0.01327433628318584, "grad_norm": 122.0, "learning_rate": 7.647058823529413e-07, "loss": 1.4863, "step": 27 }, { "epoch": 0.01376597836774828, "grad_norm": 113.5, "learning_rate": 7.941176470588236e-07, "loss": 1.2426, "step": 28 }, { "epoch": 0.014257620452310717, "grad_norm": 92.0, "learning_rate": 8.235294117647059e-07, "loss": 1.2009, "step": 29 }, { "epoch": 0.014749262536873156, "grad_norm": 132.0, "learning_rate": 8.529411764705882e-07, "loss": 1.4734, "step": 30 }, { "epoch": 0.015240904621435595, "grad_norm": 117.0, "learning_rate": 8.823529411764706e-07, "loss": 1.3081, "step": 31 }, { "epoch": 0.015732546705998034, "grad_norm": 119.0, "learning_rate": 9.117647058823529e-07, "loss": 1.3229, "step": 32 }, { "epoch": 0.016224188790560472, "grad_norm": 113.0, "learning_rate": 9.411764705882353e-07, "loss": 1.3423, "step": 33 }, { "epoch": 0.01671583087512291, "grad_norm": 122.0, "learning_rate": 9.705882352941176e-07, "loss": 1.4147, "step": 34 }, { "epoch": 0.01720747295968535, "grad_norm": 80.5, "learning_rate": 1e-06, "loss": 0.9891, "step": 35 }, { "epoch": 0.017699115044247787, "grad_norm": 96.0, "learning_rate": 1.0294117647058823e-06, "loss": 1.106, "step": 36 }, { "epoch": 0.018190757128810225, "grad_norm": 82.0, "learning_rate": 1.0588235294117648e-06, "loss": 1.0166, "step": 37 }, { "epoch": 0.018682399213372666, "grad_norm": 105.5, "learning_rate": 1.088235294117647e-06, "loss": 1.3092, "step": 38 }, { "epoch": 0.019174041297935103, "grad_norm": 134.0, "learning_rate": 1.1176470588235294e-06, "loss": 1.4596, "step": 39 }, { "epoch": 0.01966568338249754, "grad_norm": 104.0, "learning_rate": 1.1470588235294117e-06, "loss": 1.125, "step": 40 }, { "epoch": 0.02015732546705998, "grad_norm": 121.5, "learning_rate": 1.1764705882352942e-06, "loss": 1.3151, "step": 41 }, { "epoch": 0.02064896755162242, "grad_norm": 108.5, "learning_rate": 1.2058823529411765e-06, "loss": 1.2789, "step": 42 }, { "epoch": 0.021140609636184856, "grad_norm": 91.0, "learning_rate": 1.235294117647059e-06, "loss": 1.0242, "step": 43 }, { "epoch": 0.021632251720747297, "grad_norm": 122.5, "learning_rate": 1.2647058823529412e-06, "loss": 1.2432, "step": 44 }, { "epoch": 0.022123893805309734, "grad_norm": 100.5, "learning_rate": 1.2941176470588235e-06, "loss": 1.1959, "step": 45 }, { "epoch": 0.02261553588987217, "grad_norm": 101.0, "learning_rate": 1.323529411764706e-06, "loss": 1.0612, "step": 46 }, { "epoch": 0.023107177974434612, "grad_norm": 113.0, "learning_rate": 1.3529411764705883e-06, "loss": 1.1351, "step": 47 }, { "epoch": 0.02359882005899705, "grad_norm": 89.0, "learning_rate": 1.3823529411764708e-06, "loss": 1.0629, "step": 48 }, { "epoch": 0.024090462143559487, "grad_norm": 101.5, "learning_rate": 1.411764705882353e-06, "loss": 1.1183, "step": 49 }, { "epoch": 0.024582104228121928, "grad_norm": 108.5, "learning_rate": 1.4411764705882352e-06, "loss": 1.136, "step": 50 }, { "epoch": 0.025073746312684365, "grad_norm": 111.5, "learning_rate": 1.4705882352941177e-06, "loss": 1.1797, "step": 51 }, { "epoch": 0.025565388397246803, "grad_norm": 92.0, "learning_rate": 1.5e-06, "loss": 1.0396, "step": 52 }, { "epoch": 0.026057030481809244, "grad_norm": 92.5, "learning_rate": 1.5294117647058826e-06, "loss": 1.1123, "step": 53 }, { "epoch": 0.02654867256637168, "grad_norm": 86.0, "learning_rate": 1.5588235294117647e-06, "loss": 1.0496, "step": 54 }, { "epoch": 0.02704031465093412, "grad_norm": 119.0, "learning_rate": 1.5882352941176472e-06, "loss": 1.0112, "step": 55 }, { "epoch": 0.02753195673549656, "grad_norm": 83.5, "learning_rate": 1.6176470588235295e-06, "loss": 0.8908, "step": 56 }, { "epoch": 0.028023598820058997, "grad_norm": 80.5, "learning_rate": 1.6470588235294118e-06, "loss": 0.9756, "step": 57 }, { "epoch": 0.028515240904621434, "grad_norm": 81.0, "learning_rate": 1.676470588235294e-06, "loss": 0.9252, "step": 58 }, { "epoch": 0.029006882989183875, "grad_norm": 98.0, "learning_rate": 1.7058823529411764e-06, "loss": 1.106, "step": 59 }, { "epoch": 0.029498525073746312, "grad_norm": 84.5, "learning_rate": 1.735294117647059e-06, "loss": 0.9316, "step": 60 }, { "epoch": 0.02999016715830875, "grad_norm": 101.0, "learning_rate": 1.7647058823529412e-06, "loss": 0.9901, "step": 61 }, { "epoch": 0.03048180924287119, "grad_norm": 85.0, "learning_rate": 1.7941176470588238e-06, "loss": 1.0077, "step": 62 }, { "epoch": 0.030973451327433628, "grad_norm": 87.0, "learning_rate": 1.8235294117647058e-06, "loss": 0.8682, "step": 63 }, { "epoch": 0.03146509341199607, "grad_norm": 47.5, "learning_rate": 1.8529411764705882e-06, "loss": 0.7767, "step": 64 }, { "epoch": 0.0319567354965585, "grad_norm": 76.5, "learning_rate": 1.8823529411764707e-06, "loss": 0.8379, "step": 65 }, { "epoch": 0.032448377581120944, "grad_norm": 107.5, "learning_rate": 1.9117647058823528e-06, "loss": 1.0028, "step": 66 }, { "epoch": 0.032940019665683384, "grad_norm": 79.0, "learning_rate": 1.9411764705882353e-06, "loss": 0.8027, "step": 67 }, { "epoch": 0.03343166175024582, "grad_norm": 42.25, "learning_rate": 1.970588235294118e-06, "loss": 0.6979, "step": 68 }, { "epoch": 0.03392330383480826, "grad_norm": 81.0, "learning_rate": 2e-06, "loss": 0.7021, "step": 69 }, { "epoch": 0.0344149459193707, "grad_norm": 48.0, "learning_rate": 2.0294117647058824e-06, "loss": 0.758, "step": 70 }, { "epoch": 0.034906588003933134, "grad_norm": 49.75, "learning_rate": 2.0588235294117645e-06, "loss": 0.7421, "step": 71 }, { "epoch": 0.035398230088495575, "grad_norm": 105.5, "learning_rate": 2.088235294117647e-06, "loss": 0.8801, "step": 72 }, { "epoch": 0.035889872173058016, "grad_norm": 56.25, "learning_rate": 2.1176470588235296e-06, "loss": 0.6594, "step": 73 }, { "epoch": 0.03638151425762045, "grad_norm": 54.75, "learning_rate": 2.147058823529412e-06, "loss": 0.6368, "step": 74 }, { "epoch": 0.03687315634218289, "grad_norm": 78.0, "learning_rate": 2.176470588235294e-06, "loss": 0.5978, "step": 75 }, { "epoch": 0.03736479842674533, "grad_norm": 82.0, "learning_rate": 2.2058823529411763e-06, "loss": 0.6788, "step": 76 }, { "epoch": 0.037856440511307765, "grad_norm": 66.5, "learning_rate": 2.235294117647059e-06, "loss": 0.6723, "step": 77 }, { "epoch": 0.038348082595870206, "grad_norm": 56.5, "learning_rate": 2.2647058823529413e-06, "loss": 0.6133, "step": 78 }, { "epoch": 0.03883972468043265, "grad_norm": 42.25, "learning_rate": 2.2941176470588234e-06, "loss": 0.5738, "step": 79 }, { "epoch": 0.03933136676499508, "grad_norm": 43.75, "learning_rate": 2.3235294117647064e-06, "loss": 0.4134, "step": 80 }, { "epoch": 0.03982300884955752, "grad_norm": 22.875, "learning_rate": 2.3529411764705885e-06, "loss": 0.6319, "step": 81 }, { "epoch": 0.04031465093411996, "grad_norm": 30.75, "learning_rate": 2.3823529411764705e-06, "loss": 0.6617, "step": 82 }, { "epoch": 0.040806293018682396, "grad_norm": 72.5, "learning_rate": 2.411764705882353e-06, "loss": 0.5625, "step": 83 }, { "epoch": 0.04129793510324484, "grad_norm": 51.5, "learning_rate": 2.441176470588235e-06, "loss": 0.3271, "step": 84 }, { "epoch": 0.04178957718780728, "grad_norm": 39.25, "learning_rate": 2.470588235294118e-06, "loss": 0.5322, "step": 85 }, { "epoch": 0.04228121927236971, "grad_norm": 36.0, "learning_rate": 2.5e-06, "loss": 0.4486, "step": 86 }, { "epoch": 0.04277286135693215, "grad_norm": 32.25, "learning_rate": 2.5294117647058823e-06, "loss": 0.5309, "step": 87 }, { "epoch": 0.043264503441494594, "grad_norm": 19.5, "learning_rate": 2.558823529411765e-06, "loss": 0.5266, "step": 88 }, { "epoch": 0.04375614552605703, "grad_norm": 21.5, "learning_rate": 2.588235294117647e-06, "loss": 0.4785, "step": 89 }, { "epoch": 0.04424778761061947, "grad_norm": 25.25, "learning_rate": 2.61764705882353e-06, "loss": 0.5806, "step": 90 }, { "epoch": 0.04473942969518191, "grad_norm": 19.5, "learning_rate": 2.647058823529412e-06, "loss": 0.3218, "step": 91 }, { "epoch": 0.04523107177974434, "grad_norm": 23.25, "learning_rate": 2.676470588235294e-06, "loss": 0.4007, "step": 92 }, { "epoch": 0.045722713864306784, "grad_norm": 25.125, "learning_rate": 2.7058823529411766e-06, "loss": 0.4133, "step": 93 }, { "epoch": 0.046214355948869225, "grad_norm": 25.875, "learning_rate": 2.7352941176470587e-06, "loss": 0.4864, "step": 94 }, { "epoch": 0.04670599803343166, "grad_norm": 21.5, "learning_rate": 2.7647058823529416e-06, "loss": 0.4724, "step": 95 }, { "epoch": 0.0471976401179941, "grad_norm": 16.375, "learning_rate": 2.7941176470588237e-06, "loss": 0.292, "step": 96 }, { "epoch": 0.04768928220255654, "grad_norm": 16.625, "learning_rate": 2.823529411764706e-06, "loss": 0.3837, "step": 97 }, { "epoch": 0.048180924287118974, "grad_norm": 30.875, "learning_rate": 2.8529411764705883e-06, "loss": 0.4121, "step": 98 }, { "epoch": 0.048672566371681415, "grad_norm": 62.25, "learning_rate": 2.8823529411764704e-06, "loss": 0.7788, "step": 99 }, { "epoch": 0.049164208456243856, "grad_norm": 50.75, "learning_rate": 2.9117647058823534e-06, "loss": 0.361, "step": 100 }, { "epoch": 0.04965585054080629, "grad_norm": 21.25, "learning_rate": 2.9411764705882355e-06, "loss": 0.4579, "step": 101 }, { "epoch": 0.05014749262536873, "grad_norm": 22.125, "learning_rate": 2.9705882352941176e-06, "loss": 0.4404, "step": 102 }, { "epoch": 0.05063913470993117, "grad_norm": 20.125, "learning_rate": 3e-06, "loss": 0.302, "step": 103 }, { "epoch": 0.051130776794493606, "grad_norm": 16.5, "learning_rate": 3.0294117647058826e-06, "loss": 0.3231, "step": 104 }, { "epoch": 0.051622418879056046, "grad_norm": 17.25, "learning_rate": 3.058823529411765e-06, "loss": 0.3133, "step": 105 }, { "epoch": 0.05211406096361849, "grad_norm": 42.0, "learning_rate": 3.0882352941176472e-06, "loss": 0.7612, "step": 106 }, { "epoch": 0.05260570304818092, "grad_norm": 20.375, "learning_rate": 3.1176470588235293e-06, "loss": 0.4895, "step": 107 }, { "epoch": 0.05309734513274336, "grad_norm": 24.625, "learning_rate": 3.147058823529412e-06, "loss": 0.3911, "step": 108 }, { "epoch": 0.0535889872173058, "grad_norm": 16.625, "learning_rate": 3.1764705882352943e-06, "loss": 0.3015, "step": 109 }, { "epoch": 0.05408062930186824, "grad_norm": 21.375, "learning_rate": 3.2058823529411764e-06, "loss": 0.4359, "step": 110 }, { "epoch": 0.05457227138643068, "grad_norm": 55.75, "learning_rate": 3.235294117647059e-06, "loss": 0.665, "step": 111 }, { "epoch": 0.05506391347099312, "grad_norm": 40.0, "learning_rate": 3.264705882352941e-06, "loss": 0.5934, "step": 112 }, { "epoch": 0.05555555555555555, "grad_norm": 27.25, "learning_rate": 3.2941176470588236e-06, "loss": 0.3713, "step": 113 }, { "epoch": 0.05604719764011799, "grad_norm": 19.375, "learning_rate": 3.323529411764706e-06, "loss": 0.4267, "step": 114 }, { "epoch": 0.056538839724680434, "grad_norm": 13.4375, "learning_rate": 3.352941176470588e-06, "loss": 0.3221, "step": 115 }, { "epoch": 0.05703048180924287, "grad_norm": 38.75, "learning_rate": 3.3823529411764707e-06, "loss": 0.4231, "step": 116 }, { "epoch": 0.05752212389380531, "grad_norm": 22.125, "learning_rate": 3.411764705882353e-06, "loss": 0.4203, "step": 117 }, { "epoch": 0.05801376597836775, "grad_norm": 16.625, "learning_rate": 3.4411764705882353e-06, "loss": 0.4176, "step": 118 }, { "epoch": 0.058505408062930184, "grad_norm": 14.1875, "learning_rate": 3.470588235294118e-06, "loss": 0.2197, "step": 119 }, { "epoch": 0.058997050147492625, "grad_norm": 22.5, "learning_rate": 3.5e-06, "loss": 0.3934, "step": 120 }, { "epoch": 0.059488692232055065, "grad_norm": 25.5, "learning_rate": 3.5294117647058825e-06, "loss": 0.5521, "step": 121 }, { "epoch": 0.0599803343166175, "grad_norm": 23.375, "learning_rate": 3.5588235294117646e-06, "loss": 0.3901, "step": 122 }, { "epoch": 0.06047197640117994, "grad_norm": 18.25, "learning_rate": 3.5882352941176475e-06, "loss": 0.3651, "step": 123 }, { "epoch": 0.06096361848574238, "grad_norm": 32.25, "learning_rate": 3.6176470588235296e-06, "loss": 0.6102, "step": 124 }, { "epoch": 0.061455260570304815, "grad_norm": 57.25, "learning_rate": 3.6470588235294117e-06, "loss": 0.642, "step": 125 }, { "epoch": 0.061946902654867256, "grad_norm": 36.0, "learning_rate": 3.6764705882352942e-06, "loss": 0.6179, "step": 126 }, { "epoch": 0.0624385447394297, "grad_norm": 14.1875, "learning_rate": 3.7058823529411763e-06, "loss": 0.2994, "step": 127 }, { "epoch": 0.06293018682399214, "grad_norm": 22.625, "learning_rate": 3.7352941176470593e-06, "loss": 0.4069, "step": 128 }, { "epoch": 0.06342182890855458, "grad_norm": 24.375, "learning_rate": 3.7647058823529414e-06, "loss": 0.3401, "step": 129 }, { "epoch": 0.063913470993117, "grad_norm": 28.875, "learning_rate": 3.7941176470588235e-06, "loss": 0.537, "step": 130 }, { "epoch": 0.06440511307767945, "grad_norm": 29.0, "learning_rate": 3.8235294117647055e-06, "loss": 0.3748, "step": 131 }, { "epoch": 0.06489675516224189, "grad_norm": 35.0, "learning_rate": 3.852941176470588e-06, "loss": 0.5634, "step": 132 }, { "epoch": 0.06538839724680433, "grad_norm": 23.375, "learning_rate": 3.882352941176471e-06, "loss": 0.5233, "step": 133 }, { "epoch": 0.06588003933136677, "grad_norm": 33.25, "learning_rate": 3.911764705882353e-06, "loss": 0.276, "step": 134 }, { "epoch": 0.06637168141592921, "grad_norm": 24.25, "learning_rate": 3.941176470588236e-06, "loss": 0.3255, "step": 135 }, { "epoch": 0.06686332350049164, "grad_norm": 26.375, "learning_rate": 3.970588235294117e-06, "loss": 0.4462, "step": 136 }, { "epoch": 0.06735496558505408, "grad_norm": 29.25, "learning_rate": 4e-06, "loss": 0.4268, "step": 137 }, { "epoch": 0.06784660766961652, "grad_norm": 21.25, "learning_rate": 4.029411764705882e-06, "loss": 0.3423, "step": 138 }, { "epoch": 0.06833824975417896, "grad_norm": 29.625, "learning_rate": 4.058823529411765e-06, "loss": 0.5564, "step": 139 }, { "epoch": 0.0688298918387414, "grad_norm": 39.0, "learning_rate": 4.088235294117647e-06, "loss": 0.541, "step": 140 }, { "epoch": 0.06932153392330384, "grad_norm": 32.25, "learning_rate": 4.117647058823529e-06, "loss": 0.4238, "step": 141 }, { "epoch": 0.06981317600786627, "grad_norm": 21.625, "learning_rate": 4.1470588235294116e-06, "loss": 0.4121, "step": 142 }, { "epoch": 0.07030481809242871, "grad_norm": 33.5, "learning_rate": 4.176470588235294e-06, "loss": 0.5746, "step": 143 }, { "epoch": 0.07079646017699115, "grad_norm": 25.75, "learning_rate": 4.205882352941177e-06, "loss": 0.3552, "step": 144 }, { "epoch": 0.07128810226155359, "grad_norm": 27.0, "learning_rate": 4.235294117647059e-06, "loss": 0.469, "step": 145 }, { "epoch": 0.07177974434611603, "grad_norm": 33.25, "learning_rate": 4.264705882352941e-06, "loss": 0.4718, "step": 146 }, { "epoch": 0.07227138643067847, "grad_norm": 20.875, "learning_rate": 4.294117647058824e-06, "loss": 0.3399, "step": 147 }, { "epoch": 0.0727630285152409, "grad_norm": 27.25, "learning_rate": 4.323529411764706e-06, "loss": 0.512, "step": 148 }, { "epoch": 0.07325467059980334, "grad_norm": 18.75, "learning_rate": 4.352941176470588e-06, "loss": 0.4433, "step": 149 }, { "epoch": 0.07374631268436578, "grad_norm": 30.5, "learning_rate": 4.382352941176471e-06, "loss": 0.5106, "step": 150 }, { "epoch": 0.07423795476892822, "grad_norm": 24.875, "learning_rate": 4.4117647058823526e-06, "loss": 0.4465, "step": 151 }, { "epoch": 0.07472959685349066, "grad_norm": 31.125, "learning_rate": 4.441176470588236e-06, "loss": 0.4915, "step": 152 }, { "epoch": 0.0752212389380531, "grad_norm": 26.125, "learning_rate": 4.470588235294118e-06, "loss": 0.4603, "step": 153 }, { "epoch": 0.07571288102261553, "grad_norm": 24.25, "learning_rate": 4.5e-06, "loss": 0.3933, "step": 154 }, { "epoch": 0.07620452310717797, "grad_norm": 14.25, "learning_rate": 4.529411764705883e-06, "loss": 0.3581, "step": 155 }, { "epoch": 0.07669616519174041, "grad_norm": 27.5, "learning_rate": 4.558823529411764e-06, "loss": 0.5488, "step": 156 }, { "epoch": 0.07718780727630285, "grad_norm": 26.25, "learning_rate": 4.588235294117647e-06, "loss": 0.2975, "step": 157 }, { "epoch": 0.0776794493608653, "grad_norm": 33.25, "learning_rate": 4.617647058823529e-06, "loss": 0.601, "step": 158 }, { "epoch": 0.07817109144542773, "grad_norm": 29.875, "learning_rate": 4.647058823529413e-06, "loss": 0.4335, "step": 159 }, { "epoch": 0.07866273352999016, "grad_norm": 26.375, "learning_rate": 4.676470588235294e-06, "loss": 0.4753, "step": 160 }, { "epoch": 0.0791543756145526, "grad_norm": 22.5, "learning_rate": 4.705882352941177e-06, "loss": 0.302, "step": 161 }, { "epoch": 0.07964601769911504, "grad_norm": 23.75, "learning_rate": 4.7352941176470594e-06, "loss": 0.4054, "step": 162 }, { "epoch": 0.08013765978367748, "grad_norm": 23.0, "learning_rate": 4.764705882352941e-06, "loss": 0.3112, "step": 163 }, { "epoch": 0.08062930186823992, "grad_norm": 34.25, "learning_rate": 4.794117647058824e-06, "loss": 0.5938, "step": 164 }, { "epoch": 0.08112094395280237, "grad_norm": 26.125, "learning_rate": 4.823529411764706e-06, "loss": 0.5378, "step": 165 }, { "epoch": 0.08161258603736479, "grad_norm": 20.5, "learning_rate": 4.852941176470588e-06, "loss": 0.3945, "step": 166 }, { "epoch": 0.08210422812192723, "grad_norm": 25.5, "learning_rate": 4.88235294117647e-06, "loss": 0.2894, "step": 167 }, { "epoch": 0.08259587020648967, "grad_norm": 27.25, "learning_rate": 4.911764705882353e-06, "loss": 0.4372, "step": 168 }, { "epoch": 0.08308751229105212, "grad_norm": 20.625, "learning_rate": 4.941176470588236e-06, "loss": 0.411, "step": 169 }, { "epoch": 0.08357915437561456, "grad_norm": 25.375, "learning_rate": 4.970588235294118e-06, "loss": 0.3889, "step": 170 }, { "epoch": 0.084070796460177, "grad_norm": 21.0, "learning_rate": 5e-06, "loss": 0.3935, "step": 171 }, { "epoch": 0.08456243854473942, "grad_norm": 26.125, "learning_rate": 5.029411764705883e-06, "loss": 0.4778, "step": 172 }, { "epoch": 0.08505408062930186, "grad_norm": 19.0, "learning_rate": 5.058823529411765e-06, "loss": 0.4823, "step": 173 }, { "epoch": 0.0855457227138643, "grad_norm": 31.25, "learning_rate": 5.088235294117647e-06, "loss": 0.6469, "step": 174 }, { "epoch": 0.08603736479842675, "grad_norm": 19.625, "learning_rate": 5.11764705882353e-06, "loss": 0.3442, "step": 175 }, { "epoch": 0.08652900688298919, "grad_norm": 22.5, "learning_rate": 5.147058823529411e-06, "loss": 0.4977, "step": 176 }, { "epoch": 0.08702064896755163, "grad_norm": 19.125, "learning_rate": 5.176470588235294e-06, "loss": 0.3744, "step": 177 }, { "epoch": 0.08751229105211406, "grad_norm": 41.0, "learning_rate": 5.205882352941177e-06, "loss": 0.6999, "step": 178 }, { "epoch": 0.0880039331366765, "grad_norm": 24.5, "learning_rate": 5.23529411764706e-06, "loss": 0.261, "step": 179 }, { "epoch": 0.08849557522123894, "grad_norm": 21.0, "learning_rate": 5.264705882352941e-06, "loss": 0.3238, "step": 180 }, { "epoch": 0.08898721730580138, "grad_norm": 27.0, "learning_rate": 5.294117647058824e-06, "loss": 0.3578, "step": 181 }, { "epoch": 0.08947885939036382, "grad_norm": 24.75, "learning_rate": 5.3235294117647064e-06, "loss": 0.4241, "step": 182 }, { "epoch": 0.08997050147492626, "grad_norm": 41.5, "learning_rate": 5.352941176470588e-06, "loss": 0.4254, "step": 183 }, { "epoch": 0.09046214355948869, "grad_norm": 21.25, "learning_rate": 5.382352941176471e-06, "loss": 0.3496, "step": 184 }, { "epoch": 0.09095378564405113, "grad_norm": 29.0, "learning_rate": 5.411764705882353e-06, "loss": 0.4283, "step": 185 }, { "epoch": 0.09144542772861357, "grad_norm": 26.125, "learning_rate": 5.441176470588235e-06, "loss": 0.5305, "step": 186 }, { "epoch": 0.09193706981317601, "grad_norm": 20.0, "learning_rate": 5.470588235294117e-06, "loss": 0.5443, "step": 187 }, { "epoch": 0.09242871189773845, "grad_norm": 23.375, "learning_rate": 5.500000000000001e-06, "loss": 0.5209, "step": 188 }, { "epoch": 0.09292035398230089, "grad_norm": 30.375, "learning_rate": 5.529411764705883e-06, "loss": 0.5261, "step": 189 }, { "epoch": 0.09341199606686332, "grad_norm": 20.0, "learning_rate": 5.558823529411765e-06, "loss": 0.367, "step": 190 }, { "epoch": 0.09390363815142576, "grad_norm": 23.125, "learning_rate": 5.588235294117647e-06, "loss": 0.3896, "step": 191 }, { "epoch": 0.0943952802359882, "grad_norm": 16.375, "learning_rate": 5.61764705882353e-06, "loss": 0.4527, "step": 192 }, { "epoch": 0.09488692232055064, "grad_norm": 42.5, "learning_rate": 5.647058823529412e-06, "loss": 0.5937, "step": 193 }, { "epoch": 0.09537856440511308, "grad_norm": 18.875, "learning_rate": 5.676470588235294e-06, "loss": 0.4139, "step": 194 }, { "epoch": 0.09587020648967552, "grad_norm": 33.0, "learning_rate": 5.705882352941177e-06, "loss": 0.5545, "step": 195 }, { "epoch": 0.09636184857423795, "grad_norm": 19.75, "learning_rate": 5.735294117647058e-06, "loss": 0.3378, "step": 196 }, { "epoch": 0.09685349065880039, "grad_norm": 22.625, "learning_rate": 5.764705882352941e-06, "loss": 0.4919, "step": 197 }, { "epoch": 0.09734513274336283, "grad_norm": 30.0, "learning_rate": 5.794117647058824e-06, "loss": 0.5514, "step": 198 }, { "epoch": 0.09783677482792527, "grad_norm": 24.25, "learning_rate": 5.823529411764707e-06, "loss": 0.4312, "step": 199 }, { "epoch": 0.09832841691248771, "grad_norm": 34.25, "learning_rate": 5.852941176470588e-06, "loss": 0.2081, "step": 200 }, { "epoch": 0.09882005899705015, "grad_norm": 18.875, "learning_rate": 5.882352941176471e-06, "loss": 0.3232, "step": 201 }, { "epoch": 0.09931170108161258, "grad_norm": 23.375, "learning_rate": 5.9117647058823534e-06, "loss": 0.4962, "step": 202 }, { "epoch": 0.09980334316617502, "grad_norm": 27.5, "learning_rate": 5.941176470588235e-06, "loss": 0.2469, "step": 203 }, { "epoch": 0.10029498525073746, "grad_norm": 31.625, "learning_rate": 5.970588235294118e-06, "loss": 0.2947, "step": 204 }, { "epoch": 0.1007866273352999, "grad_norm": 27.25, "learning_rate": 6e-06, "loss": 0.3862, "step": 205 }, { "epoch": 0.10127826941986234, "grad_norm": 32.5, "learning_rate": 6.029411764705882e-06, "loss": 0.525, "step": 206 }, { "epoch": 0.10176991150442478, "grad_norm": 26.0, "learning_rate": 6.058823529411765e-06, "loss": 0.4918, "step": 207 }, { "epoch": 0.10226155358898721, "grad_norm": 16.25, "learning_rate": 6.088235294117648e-06, "loss": 0.2885, "step": 208 }, { "epoch": 0.10275319567354965, "grad_norm": 27.625, "learning_rate": 6.11764705882353e-06, "loss": 0.526, "step": 209 }, { "epoch": 0.10324483775811209, "grad_norm": 23.0, "learning_rate": 6.147058823529412e-06, "loss": 0.477, "step": 210 }, { "epoch": 0.10373647984267453, "grad_norm": 20.125, "learning_rate": 6.1764705882352944e-06, "loss": 0.399, "step": 211 }, { "epoch": 0.10422812192723697, "grad_norm": 13.0625, "learning_rate": 6.205882352941177e-06, "loss": 0.2535, "step": 212 }, { "epoch": 0.10471976401179942, "grad_norm": 29.375, "learning_rate": 6.235294117647059e-06, "loss": 0.4061, "step": 213 }, { "epoch": 0.10521140609636184, "grad_norm": 71.0, "learning_rate": 6.264705882352941e-06, "loss": 0.8004, "step": 214 }, { "epoch": 0.10570304818092428, "grad_norm": 34.25, "learning_rate": 6.294117647058824e-06, "loss": 0.6243, "step": 215 }, { "epoch": 0.10619469026548672, "grad_norm": 18.875, "learning_rate": 6.323529411764705e-06, "loss": 0.4115, "step": 216 }, { "epoch": 0.10668633235004917, "grad_norm": 42.25, "learning_rate": 6.352941176470589e-06, "loss": 0.5434, "step": 217 }, { "epoch": 0.1071779744346116, "grad_norm": 22.875, "learning_rate": 6.382352941176471e-06, "loss": 0.2542, "step": 218 }, { "epoch": 0.10766961651917405, "grad_norm": 17.375, "learning_rate": 6.411764705882353e-06, "loss": 0.2887, "step": 219 }, { "epoch": 0.10816125860373647, "grad_norm": 25.375, "learning_rate": 6.441176470588235e-06, "loss": 0.5745, "step": 220 }, { "epoch": 0.10865290068829891, "grad_norm": 22.5, "learning_rate": 6.470588235294118e-06, "loss": 0.4909, "step": 221 }, { "epoch": 0.10914454277286136, "grad_norm": 17.875, "learning_rate": 6.5000000000000004e-06, "loss": 0.2524, "step": 222 }, { "epoch": 0.1096361848574238, "grad_norm": 11.375, "learning_rate": 6.529411764705882e-06, "loss": 0.226, "step": 223 }, { "epoch": 0.11012782694198624, "grad_norm": 20.5, "learning_rate": 6.558823529411765e-06, "loss": 0.396, "step": 224 }, { "epoch": 0.11061946902654868, "grad_norm": 20.125, "learning_rate": 6.588235294117647e-06, "loss": 0.3091, "step": 225 }, { "epoch": 0.1111111111111111, "grad_norm": 15.6875, "learning_rate": 6.61764705882353e-06, "loss": 0.3058, "step": 226 }, { "epoch": 0.11160275319567355, "grad_norm": 16.625, "learning_rate": 6.647058823529412e-06, "loss": 0.3668, "step": 227 }, { "epoch": 0.11209439528023599, "grad_norm": 26.5, "learning_rate": 6.676470588235295e-06, "loss": 0.3805, "step": 228 }, { "epoch": 0.11258603736479843, "grad_norm": 32.5, "learning_rate": 6.705882352941176e-06, "loss": 0.3235, "step": 229 }, { "epoch": 0.11307767944936087, "grad_norm": 19.375, "learning_rate": 6.735294117647059e-06, "loss": 0.4376, "step": 230 }, { "epoch": 0.11356932153392331, "grad_norm": 15.125, "learning_rate": 6.7647058823529414e-06, "loss": 0.3655, "step": 231 }, { "epoch": 0.11406096361848574, "grad_norm": 19.625, "learning_rate": 6.794117647058824e-06, "loss": 0.3471, "step": 232 }, { "epoch": 0.11455260570304818, "grad_norm": 29.375, "learning_rate": 6.823529411764706e-06, "loss": 0.3914, "step": 233 }, { "epoch": 0.11504424778761062, "grad_norm": 27.125, "learning_rate": 6.852941176470588e-06, "loss": 0.4072, "step": 234 }, { "epoch": 0.11553588987217306, "grad_norm": 31.75, "learning_rate": 6.882352941176471e-06, "loss": 0.4267, "step": 235 }, { "epoch": 0.1160275319567355, "grad_norm": 35.25, "learning_rate": 6.911764705882353e-06, "loss": 0.3999, "step": 236 }, { "epoch": 0.11651917404129794, "grad_norm": 28.875, "learning_rate": 6.941176470588236e-06, "loss": 0.3088, "step": 237 }, { "epoch": 0.11701081612586037, "grad_norm": 19.5, "learning_rate": 6.970588235294118e-06, "loss": 0.3713, "step": 238 }, { "epoch": 0.11750245821042281, "grad_norm": 26.5, "learning_rate": 7e-06, "loss": 0.4146, "step": 239 }, { "epoch": 0.11799410029498525, "grad_norm": 19.625, "learning_rate": 7.029411764705882e-06, "loss": 0.3357, "step": 240 }, { "epoch": 0.11848574237954769, "grad_norm": 25.5, "learning_rate": 7.058823529411765e-06, "loss": 0.2916, "step": 241 }, { "epoch": 0.11897738446411013, "grad_norm": 35.5, "learning_rate": 7.0882352941176475e-06, "loss": 0.5379, "step": 242 }, { "epoch": 0.11946902654867257, "grad_norm": 18.625, "learning_rate": 7.117647058823529e-06, "loss": 0.4, "step": 243 }, { "epoch": 0.119960668633235, "grad_norm": 17.375, "learning_rate": 7.147058823529412e-06, "loss": 0.4088, "step": 244 }, { "epoch": 0.12045231071779744, "grad_norm": 29.5, "learning_rate": 7.176470588235295e-06, "loss": 0.4314, "step": 245 }, { "epoch": 0.12094395280235988, "grad_norm": 19.125, "learning_rate": 7.205882352941177e-06, "loss": 0.3329, "step": 246 }, { "epoch": 0.12143559488692232, "grad_norm": 20.25, "learning_rate": 7.235294117647059e-06, "loss": 0.4581, "step": 247 }, { "epoch": 0.12192723697148476, "grad_norm": 25.0, "learning_rate": 7.264705882352942e-06, "loss": 0.4366, "step": 248 }, { "epoch": 0.1224188790560472, "grad_norm": 20.625, "learning_rate": 7.294117647058823e-06, "loss": 0.4393, "step": 249 }, { "epoch": 0.12291052114060963, "grad_norm": 43.25, "learning_rate": 7.323529411764706e-06, "loss": 0.6393, "step": 250 }, { "epoch": 0.12340216322517207, "grad_norm": 21.625, "learning_rate": 7.3529411764705884e-06, "loss": 0.3569, "step": 251 }, { "epoch": 0.12389380530973451, "grad_norm": 50.25, "learning_rate": 7.38235294117647e-06, "loss": 0.5243, "step": 252 }, { "epoch": 0.12438544739429695, "grad_norm": 33.75, "learning_rate": 7.411764705882353e-06, "loss": 0.5845, "step": 253 }, { "epoch": 0.1248770894788594, "grad_norm": 17.0, "learning_rate": 7.441176470588235e-06, "loss": 0.3467, "step": 254 }, { "epoch": 0.12536873156342182, "grad_norm": 16.875, "learning_rate": 7.4705882352941185e-06, "loss": 0.3173, "step": 255 }, { "epoch": 0.12586037364798427, "grad_norm": 15.5, "learning_rate": 7.5e-06, "loss": 0.4039, "step": 256 }, { "epoch": 0.1263520157325467, "grad_norm": 17.375, "learning_rate": 7.529411764705883e-06, "loss": 0.2729, "step": 257 }, { "epoch": 0.12684365781710916, "grad_norm": 12.875, "learning_rate": 7.558823529411765e-06, "loss": 0.4261, "step": 258 }, { "epoch": 0.12733529990167158, "grad_norm": 25.0, "learning_rate": 7.588235294117647e-06, "loss": 0.4882, "step": 259 }, { "epoch": 0.127826941986234, "grad_norm": 22.25, "learning_rate": 7.617647058823529e-06, "loss": 0.4889, "step": 260 }, { "epoch": 0.12831858407079647, "grad_norm": 16.375, "learning_rate": 7.647058823529411e-06, "loss": 0.3139, "step": 261 }, { "epoch": 0.1288102261553589, "grad_norm": 24.375, "learning_rate": 7.676470588235294e-06, "loss": 0.3773, "step": 262 }, { "epoch": 0.12930186823992135, "grad_norm": 12.875, "learning_rate": 7.705882352941176e-06, "loss": 0.4148, "step": 263 }, { "epoch": 0.12979351032448377, "grad_norm": 26.625, "learning_rate": 7.73529411764706e-06, "loss": 0.4591, "step": 264 }, { "epoch": 0.1302851524090462, "grad_norm": 18.75, "learning_rate": 7.764705882352941e-06, "loss": 0.5503, "step": 265 }, { "epoch": 0.13077679449360866, "grad_norm": 26.75, "learning_rate": 7.794117647058825e-06, "loss": 0.5194, "step": 266 }, { "epoch": 0.13126843657817108, "grad_norm": 19.875, "learning_rate": 7.823529411764706e-06, "loss": 0.4384, "step": 267 }, { "epoch": 0.13176007866273354, "grad_norm": 21.875, "learning_rate": 7.852941176470588e-06, "loss": 0.4925, "step": 268 }, { "epoch": 0.13225172074729596, "grad_norm": 27.75, "learning_rate": 7.882352941176471e-06, "loss": 0.5207, "step": 269 }, { "epoch": 0.13274336283185842, "grad_norm": 25.0, "learning_rate": 7.911764705882353e-06, "loss": 0.432, "step": 270 }, { "epoch": 0.13323500491642085, "grad_norm": 29.125, "learning_rate": 7.941176470588235e-06, "loss": 0.5554, "step": 271 }, { "epoch": 0.13372664700098327, "grad_norm": 23.75, "learning_rate": 7.970588235294118e-06, "loss": 0.4873, "step": 272 }, { "epoch": 0.13421828908554573, "grad_norm": 28.5, "learning_rate": 8e-06, "loss": 0.3462, "step": 273 }, { "epoch": 0.13470993117010815, "grad_norm": 26.5, "learning_rate": 8.029411764705883e-06, "loss": 0.2147, "step": 274 }, { "epoch": 0.1352015732546706, "grad_norm": 19.25, "learning_rate": 8.058823529411765e-06, "loss": 0.3979, "step": 275 }, { "epoch": 0.13569321533923304, "grad_norm": 25.5, "learning_rate": 8.088235294117648e-06, "loss": 0.236, "step": 276 }, { "epoch": 0.13618485742379546, "grad_norm": 22.875, "learning_rate": 8.11764705882353e-06, "loss": 0.6047, "step": 277 }, { "epoch": 0.13667649950835792, "grad_norm": 23.5, "learning_rate": 8.147058823529411e-06, "loss": 0.3059, "step": 278 }, { "epoch": 0.13716814159292035, "grad_norm": 31.875, "learning_rate": 8.176470588235295e-06, "loss": 0.6163, "step": 279 }, { "epoch": 0.1376597836774828, "grad_norm": 34.25, "learning_rate": 8.205882352941176e-06, "loss": 0.6179, "step": 280 }, { "epoch": 0.13815142576204523, "grad_norm": 30.0, "learning_rate": 8.235294117647058e-06, "loss": 0.3659, "step": 281 }, { "epoch": 0.13864306784660768, "grad_norm": 21.625, "learning_rate": 8.264705882352941e-06, "loss": 0.2904, "step": 282 }, { "epoch": 0.1391347099311701, "grad_norm": 18.0, "learning_rate": 8.294117647058823e-06, "loss": 0.3471, "step": 283 }, { "epoch": 0.13962635201573254, "grad_norm": 19.5, "learning_rate": 8.323529411764707e-06, "loss": 0.3373, "step": 284 }, { "epoch": 0.140117994100295, "grad_norm": 21.125, "learning_rate": 8.352941176470588e-06, "loss": 0.4167, "step": 285 }, { "epoch": 0.14060963618485742, "grad_norm": 14.125, "learning_rate": 8.382352941176472e-06, "loss": 0.2552, "step": 286 }, { "epoch": 0.14110127826941987, "grad_norm": 38.0, "learning_rate": 8.411764705882353e-06, "loss": 0.8196, "step": 287 }, { "epoch": 0.1415929203539823, "grad_norm": 22.0, "learning_rate": 8.441176470588235e-06, "loss": 0.4193, "step": 288 }, { "epoch": 0.14208456243854473, "grad_norm": 28.375, "learning_rate": 8.470588235294118e-06, "loss": 0.4754, "step": 289 }, { "epoch": 0.14257620452310718, "grad_norm": 23.5, "learning_rate": 8.5e-06, "loss": 0.3838, "step": 290 }, { "epoch": 0.1430678466076696, "grad_norm": 18.125, "learning_rate": 8.529411764705882e-06, "loss": 0.3135, "step": 291 }, { "epoch": 0.14355948869223206, "grad_norm": 47.5, "learning_rate": 8.558823529411765e-06, "loss": 0.5617, "step": 292 }, { "epoch": 0.1440511307767945, "grad_norm": 27.5, "learning_rate": 8.588235294117648e-06, "loss": 0.5084, "step": 293 }, { "epoch": 0.14454277286135694, "grad_norm": 31.25, "learning_rate": 8.61764705882353e-06, "loss": 0.5286, "step": 294 }, { "epoch": 0.14503441494591937, "grad_norm": 26.625, "learning_rate": 8.647058823529412e-06, "loss": 0.4437, "step": 295 }, { "epoch": 0.1455260570304818, "grad_norm": 23.375, "learning_rate": 8.676470588235295e-06, "loss": 0.4088, "step": 296 }, { "epoch": 0.14601769911504425, "grad_norm": 19.875, "learning_rate": 8.705882352941177e-06, "loss": 0.5246, "step": 297 }, { "epoch": 0.14650934119960668, "grad_norm": 24.625, "learning_rate": 8.735294117647058e-06, "loss": 0.4103, "step": 298 }, { "epoch": 0.14700098328416913, "grad_norm": 23.375, "learning_rate": 8.764705882352942e-06, "loss": 0.372, "step": 299 }, { "epoch": 0.14749262536873156, "grad_norm": 21.125, "learning_rate": 8.794117647058823e-06, "loss": 0.2605, "step": 300 }, { "epoch": 0.147984267453294, "grad_norm": 29.25, "learning_rate": 8.823529411764705e-06, "loss": 0.3707, "step": 301 }, { "epoch": 0.14847590953785644, "grad_norm": 19.75, "learning_rate": 8.852941176470588e-06, "loss": 0.5068, "step": 302 }, { "epoch": 0.14896755162241887, "grad_norm": 16.875, "learning_rate": 8.882352941176472e-06, "loss": 0.3403, "step": 303 }, { "epoch": 0.14945919370698132, "grad_norm": 29.375, "learning_rate": 8.911764705882354e-06, "loss": 0.5421, "step": 304 }, { "epoch": 0.14995083579154375, "grad_norm": 25.875, "learning_rate": 8.941176470588235e-06, "loss": 0.3662, "step": 305 }, { "epoch": 0.1504424778761062, "grad_norm": 28.5, "learning_rate": 8.970588235294119e-06, "loss": 0.6632, "step": 306 }, { "epoch": 0.15093411996066863, "grad_norm": 24.0, "learning_rate": 9e-06, "loss": 0.2538, "step": 307 }, { "epoch": 0.15142576204523106, "grad_norm": 16.875, "learning_rate": 8.999999338963411e-06, "loss": 0.4492, "step": 308 }, { "epoch": 0.15191740412979352, "grad_norm": 29.5, "learning_rate": 8.999997355853836e-06, "loss": 0.6575, "step": 309 }, { "epoch": 0.15240904621435594, "grad_norm": 37.25, "learning_rate": 8.999994050671858e-06, "loss": 0.5146, "step": 310 }, { "epoch": 0.1529006882989184, "grad_norm": 17.625, "learning_rate": 8.999989423418448e-06, "loss": 0.3696, "step": 311 }, { "epoch": 0.15339233038348082, "grad_norm": 22.875, "learning_rate": 8.999983474094966e-06, "loss": 0.5868, "step": 312 }, { "epoch": 0.15388397246804325, "grad_norm": 36.5, "learning_rate": 8.99997620270316e-06, "loss": 0.4178, "step": 313 }, { "epoch": 0.1543756145526057, "grad_norm": 37.0, "learning_rate": 8.999967609245166e-06, "loss": 0.4928, "step": 314 }, { "epoch": 0.15486725663716813, "grad_norm": 18.75, "learning_rate": 8.999957693723508e-06, "loss": 0.3141, "step": 315 }, { "epoch": 0.1553588987217306, "grad_norm": 24.5, "learning_rate": 8.999946456141102e-06, "loss": 0.4554, "step": 316 }, { "epoch": 0.15585054080629301, "grad_norm": 20.125, "learning_rate": 8.999933896501245e-06, "loss": 0.5004, "step": 317 }, { "epoch": 0.15634218289085547, "grad_norm": 21.625, "learning_rate": 8.99992001480763e-06, "loss": 0.4878, "step": 318 }, { "epoch": 0.1568338249754179, "grad_norm": 12.25, "learning_rate": 8.999904811064334e-06, "loss": 0.464, "step": 319 }, { "epoch": 0.15732546705998032, "grad_norm": 34.25, "learning_rate": 8.999888285275825e-06, "loss": 0.5793, "step": 320 }, { "epoch": 0.15781710914454278, "grad_norm": 35.75, "learning_rate": 8.999870437446958e-06, "loss": 0.6217, "step": 321 }, { "epoch": 0.1583087512291052, "grad_norm": 26.375, "learning_rate": 8.999851267582976e-06, "loss": 0.6035, "step": 322 }, { "epoch": 0.15880039331366766, "grad_norm": 14.5625, "learning_rate": 8.99983077568951e-06, "loss": 0.392, "step": 323 }, { "epoch": 0.1592920353982301, "grad_norm": 14.125, "learning_rate": 8.999808961772583e-06, "loss": 0.4462, "step": 324 }, { "epoch": 0.1597836774827925, "grad_norm": 22.125, "learning_rate": 8.999785825838603e-06, "loss": 0.5139, "step": 325 }, { "epoch": 0.16027531956735497, "grad_norm": 18.875, "learning_rate": 8.999761367894364e-06, "loss": 0.6199, "step": 326 }, { "epoch": 0.1607669616519174, "grad_norm": 21.25, "learning_rate": 8.999735587947055e-06, "loss": 0.5825, "step": 327 }, { "epoch": 0.16125860373647985, "grad_norm": 19.875, "learning_rate": 8.999708486004249e-06, "loss": 0.3924, "step": 328 }, { "epoch": 0.16175024582104228, "grad_norm": 33.0, "learning_rate": 8.99968006207391e-06, "loss": 0.4524, "step": 329 }, { "epoch": 0.16224188790560473, "grad_norm": 38.75, "learning_rate": 8.999650316164386e-06, "loss": 0.3625, "step": 330 }, { "epoch": 0.16273352999016716, "grad_norm": 16.5, "learning_rate": 8.999619248284418e-06, "loss": 0.5096, "step": 331 }, { "epoch": 0.16322517207472959, "grad_norm": 19.75, "learning_rate": 8.999586858443134e-06, "loss": 0.3595, "step": 332 }, { "epoch": 0.16371681415929204, "grad_norm": 17.25, "learning_rate": 8.999553146650047e-06, "loss": 0.3983, "step": 333 }, { "epoch": 0.16420845624385447, "grad_norm": 21.0, "learning_rate": 8.999518112915065e-06, "loss": 0.568, "step": 334 }, { "epoch": 0.16470009832841692, "grad_norm": 20.875, "learning_rate": 8.999481757248477e-06, "loss": 0.5629, "step": 335 }, { "epoch": 0.16519174041297935, "grad_norm": 27.5, "learning_rate": 8.999444079660968e-06, "loss": 0.4222, "step": 336 }, { "epoch": 0.16568338249754178, "grad_norm": 23.375, "learning_rate": 8.999405080163606e-06, "loss": 0.4023, "step": 337 }, { "epoch": 0.16617502458210423, "grad_norm": 13.25, "learning_rate": 8.999364758767847e-06, "loss": 0.3386, "step": 338 }, { "epoch": 0.16666666666666666, "grad_norm": 22.5, "learning_rate": 8.99932311548554e-06, "loss": 0.5435, "step": 339 }, { "epoch": 0.1671583087512291, "grad_norm": 21.25, "learning_rate": 8.999280150328914e-06, "loss": 0.4954, "step": 340 }, { "epoch": 0.16764995083579154, "grad_norm": 36.75, "learning_rate": 8.9992358633106e-06, "loss": 0.6728, "step": 341 }, { "epoch": 0.168141592920354, "grad_norm": 24.125, "learning_rate": 8.999190254443604e-06, "loss": 0.3602, "step": 342 }, { "epoch": 0.16863323500491642, "grad_norm": 21.625, "learning_rate": 8.999143323741326e-06, "loss": 0.5151, "step": 343 }, { "epoch": 0.16912487708947885, "grad_norm": 22.75, "learning_rate": 8.999095071217557e-06, "loss": 0.3498, "step": 344 }, { "epoch": 0.1696165191740413, "grad_norm": 12.75, "learning_rate": 8.99904549688647e-06, "loss": 0.4261, "step": 345 }, { "epoch": 0.17010816125860373, "grad_norm": 25.875, "learning_rate": 8.99899460076263e-06, "loss": 0.2225, "step": 346 }, { "epoch": 0.17059980334316618, "grad_norm": 33.25, "learning_rate": 8.998942382860992e-06, "loss": 0.484, "step": 347 }, { "epoch": 0.1710914454277286, "grad_norm": 17.25, "learning_rate": 8.998888843196896e-06, "loss": 0.4283, "step": 348 }, { "epoch": 0.17158308751229107, "grad_norm": 10.9375, "learning_rate": 8.998833981786072e-06, "loss": 0.3052, "step": 349 }, { "epoch": 0.1720747295968535, "grad_norm": 21.75, "learning_rate": 8.998777798644636e-06, "loss": 0.5511, "step": 350 }, { "epoch": 0.17256637168141592, "grad_norm": 26.625, "learning_rate": 8.998720293789097e-06, "loss": 0.561, "step": 351 }, { "epoch": 0.17305801376597837, "grad_norm": 19.375, "learning_rate": 8.99866146723635e-06, "loss": 0.4375, "step": 352 }, { "epoch": 0.1735496558505408, "grad_norm": 12.6875, "learning_rate": 8.998601319003674e-06, "loss": 0.2933, "step": 353 }, { "epoch": 0.17404129793510326, "grad_norm": 15.5625, "learning_rate": 8.998539849108742e-06, "loss": 0.3981, "step": 354 }, { "epoch": 0.17453294001966568, "grad_norm": 14.25, "learning_rate": 8.998477057569617e-06, "loss": 0.3309, "step": 355 }, { "epoch": 0.1750245821042281, "grad_norm": 39.25, "learning_rate": 8.998412944404742e-06, "loss": 0.6256, "step": 356 }, { "epoch": 0.17551622418879056, "grad_norm": 23.75, "learning_rate": 8.998347509632955e-06, "loss": 0.5376, "step": 357 }, { "epoch": 0.176007866273353, "grad_norm": 20.125, "learning_rate": 8.998280753273481e-06, "loss": 0.3191, "step": 358 }, { "epoch": 0.17649950835791545, "grad_norm": 21.5, "learning_rate": 8.99821267534593e-06, "loss": 0.4029, "step": 359 }, { "epoch": 0.17699115044247787, "grad_norm": 19.75, "learning_rate": 8.998143275870307e-06, "loss": 0.4381, "step": 360 }, { "epoch": 0.17748279252704033, "grad_norm": 17.625, "learning_rate": 8.998072554866999e-06, "loss": 0.4205, "step": 361 }, { "epoch": 0.17797443461160276, "grad_norm": 17.875, "learning_rate": 8.998000512356782e-06, "loss": 0.2692, "step": 362 }, { "epoch": 0.17846607669616518, "grad_norm": 17.125, "learning_rate": 8.997927148360824e-06, "loss": 0.4504, "step": 363 }, { "epoch": 0.17895771878072764, "grad_norm": 14.9375, "learning_rate": 8.997852462900676e-06, "loss": 0.4246, "step": 364 }, { "epoch": 0.17944936086529006, "grad_norm": 26.5, "learning_rate": 8.997776455998283e-06, "loss": 0.2837, "step": 365 }, { "epoch": 0.17994100294985252, "grad_norm": 20.625, "learning_rate": 8.997699127675976e-06, "loss": 0.2601, "step": 366 }, { "epoch": 0.18043264503441495, "grad_norm": 20.75, "learning_rate": 8.997620477956472e-06, "loss": 0.4706, "step": 367 }, { "epoch": 0.18092428711897737, "grad_norm": 26.875, "learning_rate": 8.997540506862875e-06, "loss": 0.463, "step": 368 }, { "epoch": 0.18141592920353983, "grad_norm": 10.4375, "learning_rate": 8.997459214418685e-06, "loss": 0.3552, "step": 369 }, { "epoch": 0.18190757128810225, "grad_norm": 12.0625, "learning_rate": 8.997376600647784e-06, "loss": 0.3204, "step": 370 }, { "epoch": 0.1823992133726647, "grad_norm": 24.875, "learning_rate": 8.99729266557444e-06, "loss": 0.3527, "step": 371 }, { "epoch": 0.18289085545722714, "grad_norm": 24.375, "learning_rate": 8.997207409223316e-06, "loss": 0.4967, "step": 372 }, { "epoch": 0.1833824975417896, "grad_norm": 10.375, "learning_rate": 8.99712083161946e-06, "loss": 0.285, "step": 373 }, { "epoch": 0.18387413962635202, "grad_norm": 22.0, "learning_rate": 8.997032932788307e-06, "loss": 0.3607, "step": 374 }, { "epoch": 0.18436578171091444, "grad_norm": 21.75, "learning_rate": 8.996943712755682e-06, "loss": 0.5359, "step": 375 }, { "epoch": 0.1848574237954769, "grad_norm": 26.25, "learning_rate": 8.996853171547794e-06, "loss": 0.3614, "step": 376 }, { "epoch": 0.18534906588003933, "grad_norm": 20.25, "learning_rate": 8.996761309191246e-06, "loss": 0.4575, "step": 377 }, { "epoch": 0.18584070796460178, "grad_norm": 18.625, "learning_rate": 8.996668125713029e-06, "loss": 0.3388, "step": 378 }, { "epoch": 0.1863323500491642, "grad_norm": 25.75, "learning_rate": 8.996573621140515e-06, "loss": 0.3817, "step": 379 }, { "epoch": 0.18682399213372664, "grad_norm": 33.75, "learning_rate": 8.996477795501472e-06, "loss": 0.4233, "step": 380 }, { "epoch": 0.1873156342182891, "grad_norm": 18.125, "learning_rate": 8.99638064882405e-06, "loss": 0.2947, "step": 381 }, { "epoch": 0.18780727630285152, "grad_norm": 19.25, "learning_rate": 8.996282181136794e-06, "loss": 0.4709, "step": 382 }, { "epoch": 0.18829891838741397, "grad_norm": 17.875, "learning_rate": 8.996182392468633e-06, "loss": 0.435, "step": 383 }, { "epoch": 0.1887905604719764, "grad_norm": 19.375, "learning_rate": 8.996081282848882e-06, "loss": 0.4637, "step": 384 }, { "epoch": 0.18928220255653885, "grad_norm": 15.4375, "learning_rate": 8.995978852307247e-06, "loss": 0.3558, "step": 385 }, { "epoch": 0.18977384464110128, "grad_norm": 16.25, "learning_rate": 8.99587510087382e-06, "loss": 0.3918, "step": 386 }, { "epoch": 0.1902654867256637, "grad_norm": 21.0, "learning_rate": 8.995770028579087e-06, "loss": 0.2702, "step": 387 }, { "epoch": 0.19075712881022616, "grad_norm": 12.75, "learning_rate": 8.995663635453914e-06, "loss": 0.2682, "step": 388 }, { "epoch": 0.1912487708947886, "grad_norm": 16.0, "learning_rate": 8.995555921529557e-06, "loss": 0.3078, "step": 389 }, { "epoch": 0.19174041297935104, "grad_norm": 35.0, "learning_rate": 8.995446886837666e-06, "loss": 0.4554, "step": 390 }, { "epoch": 0.19223205506391347, "grad_norm": 23.875, "learning_rate": 8.995336531410274e-06, "loss": 0.3249, "step": 391 }, { "epoch": 0.1927236971484759, "grad_norm": 17.5, "learning_rate": 8.995224855279801e-06, "loss": 0.237, "step": 392 }, { "epoch": 0.19321533923303835, "grad_norm": 24.875, "learning_rate": 8.995111858479057e-06, "loss": 0.5365, "step": 393 }, { "epoch": 0.19370698131760078, "grad_norm": 17.875, "learning_rate": 8.994997541041241e-06, "loss": 0.3368, "step": 394 }, { "epoch": 0.19419862340216323, "grad_norm": 21.625, "learning_rate": 8.994881902999938e-06, "loss": 0.3337, "step": 395 }, { "epoch": 0.19469026548672566, "grad_norm": 19.875, "learning_rate": 8.994764944389122e-06, "loss": 0.4409, "step": 396 }, { "epoch": 0.19518190757128812, "grad_norm": 16.5, "learning_rate": 8.994646665243154e-06, "loss": 0.2209, "step": 397 }, { "epoch": 0.19567354965585054, "grad_norm": 35.5, "learning_rate": 8.994527065596783e-06, "loss": 0.6188, "step": 398 }, { "epoch": 0.19616519174041297, "grad_norm": 35.25, "learning_rate": 8.99440614548515e-06, "loss": 0.5211, "step": 399 }, { "epoch": 0.19665683382497542, "grad_norm": 19.625, "learning_rate": 8.99428390494378e-06, "loss": 0.3167, "step": 400 }, { "epoch": 0.19714847590953785, "grad_norm": 11.75, "learning_rate": 8.99416034400858e-06, "loss": 0.3112, "step": 401 }, { "epoch": 0.1976401179941003, "grad_norm": 29.75, "learning_rate": 8.994035462715862e-06, "loss": 0.6314, "step": 402 }, { "epoch": 0.19813176007866273, "grad_norm": 14.5, "learning_rate": 8.993909261102307e-06, "loss": 0.4623, "step": 403 }, { "epoch": 0.19862340216322516, "grad_norm": 27.25, "learning_rate": 8.993781739204996e-06, "loss": 0.5133, "step": 404 }, { "epoch": 0.19911504424778761, "grad_norm": 16.125, "learning_rate": 8.993652897061393e-06, "loss": 0.4154, "step": 405 }, { "epoch": 0.19960668633235004, "grad_norm": 7.15625, "learning_rate": 8.993522734709353e-06, "loss": 0.3382, "step": 406 }, { "epoch": 0.2000983284169125, "grad_norm": 24.0, "learning_rate": 8.993391252187114e-06, "loss": 0.5569, "step": 407 }, { "epoch": 0.20058997050147492, "grad_norm": 15.375, "learning_rate": 8.993258449533307e-06, "loss": 0.3187, "step": 408 }, { "epoch": 0.20108161258603738, "grad_norm": 49.0, "learning_rate": 8.993124326786945e-06, "loss": 0.7114, "step": 409 }, { "epoch": 0.2015732546705998, "grad_norm": 26.25, "learning_rate": 8.992988883987439e-06, "loss": 0.5259, "step": 410 }, { "epoch": 0.20206489675516223, "grad_norm": 18.5, "learning_rate": 8.992852121174575e-06, "loss": 0.4623, "step": 411 }, { "epoch": 0.2025565388397247, "grad_norm": 16.25, "learning_rate": 8.992714038388537e-06, "loss": 0.4736, "step": 412 }, { "epoch": 0.2030481809242871, "grad_norm": 26.25, "learning_rate": 8.992574635669892e-06, "loss": 0.4124, "step": 413 }, { "epoch": 0.20353982300884957, "grad_norm": 17.875, "learning_rate": 8.992433913059595e-06, "loss": 0.3979, "step": 414 }, { "epoch": 0.204031465093412, "grad_norm": 26.625, "learning_rate": 8.992291870598988e-06, "loss": 0.4254, "step": 415 }, { "epoch": 0.20452310717797442, "grad_norm": 43.25, "learning_rate": 8.992148508329806e-06, "loss": 0.428, "step": 416 }, { "epoch": 0.20501474926253688, "grad_norm": 22.0, "learning_rate": 8.992003826294165e-06, "loss": 0.3892, "step": 417 }, { "epoch": 0.2055063913470993, "grad_norm": 29.25, "learning_rate": 8.991857824534572e-06, "loss": 0.3315, "step": 418 }, { "epoch": 0.20599803343166176, "grad_norm": 17.875, "learning_rate": 8.991710503093923e-06, "loss": 0.3285, "step": 419 }, { "epoch": 0.20648967551622419, "grad_norm": 13.75, "learning_rate": 8.991561862015499e-06, "loss": 0.4038, "step": 420 }, { "epoch": 0.20698131760078664, "grad_norm": 18.625, "learning_rate": 8.99141190134297e-06, "loss": 0.5885, "step": 421 }, { "epoch": 0.20747295968534907, "grad_norm": 19.625, "learning_rate": 8.991260621120394e-06, "loss": 0.3605, "step": 422 }, { "epoch": 0.2079646017699115, "grad_norm": 21.75, "learning_rate": 8.991108021392215e-06, "loss": 0.2831, "step": 423 }, { "epoch": 0.20845624385447395, "grad_norm": 25.75, "learning_rate": 8.990954102203268e-06, "loss": 0.4081, "step": 424 }, { "epoch": 0.20894788593903638, "grad_norm": 21.25, "learning_rate": 8.99079886359877e-06, "loss": 0.4279, "step": 425 }, { "epoch": 0.20943952802359883, "grad_norm": 29.625, "learning_rate": 8.990642305624334e-06, "loss": 0.5645, "step": 426 }, { "epoch": 0.20993117010816126, "grad_norm": 28.625, "learning_rate": 8.990484428325953e-06, "loss": 0.6142, "step": 427 }, { "epoch": 0.21042281219272368, "grad_norm": 21.375, "learning_rate": 8.99032523175001e-06, "loss": 0.5572, "step": 428 }, { "epoch": 0.21091445427728614, "grad_norm": 13.0, "learning_rate": 8.990164715943278e-06, "loss": 0.1894, "step": 429 }, { "epoch": 0.21140609636184857, "grad_norm": 30.75, "learning_rate": 8.990002880952913e-06, "loss": 0.5493, "step": 430 }, { "epoch": 0.21189773844641102, "grad_norm": 25.25, "learning_rate": 8.989839726826461e-06, "loss": 0.5677, "step": 431 }, { "epoch": 0.21238938053097345, "grad_norm": 26.875, "learning_rate": 8.98967525361186e-06, "loss": 0.524, "step": 432 }, { "epoch": 0.2128810226155359, "grad_norm": 20.0, "learning_rate": 8.989509461357427e-06, "loss": 0.4854, "step": 433 }, { "epoch": 0.21337266470009833, "grad_norm": 28.0, "learning_rate": 8.989342350111872e-06, "loss": 0.3211, "step": 434 }, { "epoch": 0.21386430678466076, "grad_norm": 20.5, "learning_rate": 8.989173919924293e-06, "loss": 0.4736, "step": 435 }, { "epoch": 0.2143559488692232, "grad_norm": 17.375, "learning_rate": 8.98900417084417e-06, "loss": 0.4364, "step": 436 }, { "epoch": 0.21484759095378564, "grad_norm": 10.9375, "learning_rate": 8.988833102921377e-06, "loss": 0.3904, "step": 437 }, { "epoch": 0.2153392330383481, "grad_norm": 18.625, "learning_rate": 8.988660716206174e-06, "loss": 0.4147, "step": 438 }, { "epoch": 0.21583087512291052, "grad_norm": 23.625, "learning_rate": 8.988487010749204e-06, "loss": 0.2702, "step": 439 }, { "epoch": 0.21632251720747295, "grad_norm": 13.625, "learning_rate": 8.988311986601502e-06, "loss": 0.3807, "step": 440 }, { "epoch": 0.2168141592920354, "grad_norm": 15.875, "learning_rate": 8.98813564381449e-06, "loss": 0.5706, "step": 441 }, { "epoch": 0.21730580137659783, "grad_norm": 29.25, "learning_rate": 8.987957982439975e-06, "loss": 0.5419, "step": 442 }, { "epoch": 0.21779744346116028, "grad_norm": 14.4375, "learning_rate": 8.987779002530153e-06, "loss": 0.3236, "step": 443 }, { "epoch": 0.2182890855457227, "grad_norm": 26.75, "learning_rate": 8.987598704137608e-06, "loss": 0.4192, "step": 444 }, { "epoch": 0.21878072763028517, "grad_norm": 16.25, "learning_rate": 8.98741708731531e-06, "loss": 0.2522, "step": 445 }, { "epoch": 0.2192723697148476, "grad_norm": 30.875, "learning_rate": 8.98723415211662e-06, "loss": 0.6668, "step": 446 }, { "epoch": 0.21976401179941002, "grad_norm": 14.0, "learning_rate": 8.987049898595276e-06, "loss": 0.3664, "step": 447 }, { "epoch": 0.22025565388397247, "grad_norm": 15.5625, "learning_rate": 8.986864326805418e-06, "loss": 0.4455, "step": 448 }, { "epoch": 0.2207472959685349, "grad_norm": 34.5, "learning_rate": 8.986677436801562e-06, "loss": 0.5889, "step": 449 }, { "epoch": 0.22123893805309736, "grad_norm": 14.5, "learning_rate": 8.986489228638617e-06, "loss": 0.4143, "step": 450 }, { "epoch": 0.22173058013765978, "grad_norm": 11.6875, "learning_rate": 8.986299702371876e-06, "loss": 0.4589, "step": 451 }, { "epoch": 0.2222222222222222, "grad_norm": 18.875, "learning_rate": 8.98610885805702e-06, "loss": 0.3216, "step": 452 }, { "epoch": 0.22271386430678466, "grad_norm": 16.375, "learning_rate": 8.98591669575012e-06, "loss": 0.3372, "step": 453 }, { "epoch": 0.2232055063913471, "grad_norm": 20.25, "learning_rate": 8.985723215507632e-06, "loss": 0.401, "step": 454 }, { "epoch": 0.22369714847590955, "grad_norm": 23.375, "learning_rate": 8.985528417386398e-06, "loss": 0.3741, "step": 455 }, { "epoch": 0.22418879056047197, "grad_norm": 16.5, "learning_rate": 8.98533230144365e-06, "loss": 0.4742, "step": 456 }, { "epoch": 0.22468043264503443, "grad_norm": 12.75, "learning_rate": 8.985134867737003e-06, "loss": 0.3873, "step": 457 }, { "epoch": 0.22517207472959685, "grad_norm": 21.0, "learning_rate": 8.984936116324466e-06, "loss": 0.3669, "step": 458 }, { "epoch": 0.22566371681415928, "grad_norm": 20.5, "learning_rate": 8.984736047264427e-06, "loss": 0.3856, "step": 459 }, { "epoch": 0.22615535889872174, "grad_norm": 24.625, "learning_rate": 8.984534660615668e-06, "loss": 0.5318, "step": 460 }, { "epoch": 0.22664700098328416, "grad_norm": 43.25, "learning_rate": 8.984331956437354e-06, "loss": 0.6354, "step": 461 }, { "epoch": 0.22713864306784662, "grad_norm": 27.375, "learning_rate": 8.984127934789038e-06, "loss": 0.4885, "step": 462 }, { "epoch": 0.22763028515240905, "grad_norm": 15.8125, "learning_rate": 8.98392259573066e-06, "loss": 0.4211, "step": 463 }, { "epoch": 0.22812192723697147, "grad_norm": 8.5, "learning_rate": 8.983715939322548e-06, "loss": 0.3389, "step": 464 }, { "epoch": 0.22861356932153393, "grad_norm": 27.25, "learning_rate": 8.983507965625417e-06, "loss": 0.4527, "step": 465 }, { "epoch": 0.22910521140609635, "grad_norm": 29.75, "learning_rate": 8.983298674700368e-06, "loss": 0.3125, "step": 466 }, { "epoch": 0.2295968534906588, "grad_norm": 21.0, "learning_rate": 8.983088066608888e-06, "loss": 0.4502, "step": 467 }, { "epoch": 0.23008849557522124, "grad_norm": 46.25, "learning_rate": 8.982876141412855e-06, "loss": 0.7094, "step": 468 }, { "epoch": 0.2305801376597837, "grad_norm": 11.875, "learning_rate": 8.98266289917453e-06, "loss": 0.3305, "step": 469 }, { "epoch": 0.23107177974434612, "grad_norm": 21.875, "learning_rate": 8.982448339956562e-06, "loss": 0.2815, "step": 470 }, { "epoch": 0.23156342182890854, "grad_norm": 16.625, "learning_rate": 8.982232463821987e-06, "loss": 0.5122, "step": 471 }, { "epoch": 0.232055063913471, "grad_norm": 15.75, "learning_rate": 8.982015270834229e-06, "loss": 0.3169, "step": 472 }, { "epoch": 0.23254670599803343, "grad_norm": 16.75, "learning_rate": 8.981796761057099e-06, "loss": 0.222, "step": 473 }, { "epoch": 0.23303834808259588, "grad_norm": 14.25, "learning_rate": 8.981576934554791e-06, "loss": 0.3214, "step": 474 }, { "epoch": 0.2335299901671583, "grad_norm": 12.9375, "learning_rate": 8.98135579139189e-06, "loss": 0.3732, "step": 475 }, { "epoch": 0.23402163225172073, "grad_norm": 37.25, "learning_rate": 8.981133331633368e-06, "loss": 0.688, "step": 476 }, { "epoch": 0.2345132743362832, "grad_norm": 31.5, "learning_rate": 8.98090955534458e-06, "loss": 0.396, "step": 477 }, { "epoch": 0.23500491642084562, "grad_norm": 22.375, "learning_rate": 8.980684462591274e-06, "loss": 0.3339, "step": 478 }, { "epoch": 0.23549655850540807, "grad_norm": 19.625, "learning_rate": 8.980458053439575e-06, "loss": 0.3441, "step": 479 }, { "epoch": 0.2359882005899705, "grad_norm": 21.375, "learning_rate": 8.980230327956007e-06, "loss": 0.3572, "step": 480 }, { "epoch": 0.23647984267453295, "grad_norm": 11.75, "learning_rate": 8.980001286207469e-06, "loss": 0.3133, "step": 481 }, { "epoch": 0.23697148475909538, "grad_norm": 21.625, "learning_rate": 8.979770928261255e-06, "loss": 0.4197, "step": 482 }, { "epoch": 0.2374631268436578, "grad_norm": 26.5, "learning_rate": 8.97953925418504e-06, "loss": 0.5218, "step": 483 }, { "epoch": 0.23795476892822026, "grad_norm": 43.75, "learning_rate": 8.979306264046894e-06, "loss": 0.6869, "step": 484 }, { "epoch": 0.2384464110127827, "grad_norm": 22.5, "learning_rate": 8.979071957915263e-06, "loss": 0.3887, "step": 485 }, { "epoch": 0.23893805309734514, "grad_norm": 26.0, "learning_rate": 8.978836335858987e-06, "loss": 0.5033, "step": 486 }, { "epoch": 0.23942969518190757, "grad_norm": 27.25, "learning_rate": 8.97859939794729e-06, "loss": 0.4109, "step": 487 }, { "epoch": 0.23992133726647, "grad_norm": 30.25, "learning_rate": 8.978361144249782e-06, "loss": 0.4759, "step": 488 }, { "epoch": 0.24041297935103245, "grad_norm": 19.375, "learning_rate": 8.978121574836462e-06, "loss": 0.45, "step": 489 }, { "epoch": 0.24090462143559488, "grad_norm": 22.875, "learning_rate": 8.977880689777714e-06, "loss": 0.4329, "step": 490 }, { "epoch": 0.24139626352015733, "grad_norm": 20.875, "learning_rate": 8.977638489144308e-06, "loss": 0.4673, "step": 491 }, { "epoch": 0.24188790560471976, "grad_norm": 38.0, "learning_rate": 8.977394973007399e-06, "loss": 0.2801, "step": 492 }, { "epoch": 0.24237954768928222, "grad_norm": 33.25, "learning_rate": 8.977150141438533e-06, "loss": 0.4649, "step": 493 }, { "epoch": 0.24287118977384464, "grad_norm": 33.0, "learning_rate": 8.976903994509642e-06, "loss": 0.3643, "step": 494 }, { "epoch": 0.24336283185840707, "grad_norm": 21.625, "learning_rate": 8.976656532293037e-06, "loss": 0.4135, "step": 495 }, { "epoch": 0.24385447394296952, "grad_norm": 19.0, "learning_rate": 8.976407754861426e-06, "loss": 0.4358, "step": 496 }, { "epoch": 0.24434611602753195, "grad_norm": 29.625, "learning_rate": 8.976157662287899e-06, "loss": 0.5274, "step": 497 }, { "epoch": 0.2448377581120944, "grad_norm": 14.75, "learning_rate": 8.975906254645925e-06, "loss": 0.3098, "step": 498 }, { "epoch": 0.24532940019665683, "grad_norm": 16.875, "learning_rate": 8.975653532009372e-06, "loss": 0.3074, "step": 499 }, { "epoch": 0.24582104228121926, "grad_norm": 28.625, "learning_rate": 8.975399494452486e-06, "loss": 0.4976, "step": 500 }, { "epoch": 0.24631268436578171, "grad_norm": 28.5, "learning_rate": 8.975144142049902e-06, "loss": 0.5761, "step": 501 }, { "epoch": 0.24680432645034414, "grad_norm": 27.625, "learning_rate": 8.974887474876644e-06, "loss": 0.4235, "step": 502 }, { "epoch": 0.2472959685349066, "grad_norm": 40.0, "learning_rate": 8.974629493008113e-06, "loss": 0.6186, "step": 503 }, { "epoch": 0.24778761061946902, "grad_norm": 18.875, "learning_rate": 8.974370196520109e-06, "loss": 0.4379, "step": 504 }, { "epoch": 0.24827925270403148, "grad_norm": 26.125, "learning_rate": 8.974109585488808e-06, "loss": 0.5588, "step": 505 }, { "epoch": 0.2487708947885939, "grad_norm": 36.25, "learning_rate": 8.973847659990777e-06, "loss": 0.7236, "step": 506 }, { "epoch": 0.24926253687315633, "grad_norm": 14.8125, "learning_rate": 8.97358442010297e-06, "loss": 0.2836, "step": 507 }, { "epoch": 0.2497541789577188, "grad_norm": 16.0, "learning_rate": 8.97331986590272e-06, "loss": 0.3214, "step": 508 }, { "epoch": 0.2502458210422812, "grad_norm": 12.75, "learning_rate": 8.973053997467756e-06, "loss": 0.4309, "step": 509 }, { "epoch": 0.25073746312684364, "grad_norm": 22.625, "learning_rate": 8.972786814876187e-06, "loss": 0.4772, "step": 510 }, { "epoch": 0.2512291052114061, "grad_norm": 30.625, "learning_rate": 8.972518318206512e-06, "loss": 0.3704, "step": 511 }, { "epoch": 0.25172074729596855, "grad_norm": 18.625, "learning_rate": 8.97224850753761e-06, "loss": 0.4592, "step": 512 }, { "epoch": 0.252212389380531, "grad_norm": 16.625, "learning_rate": 8.97197738294875e-06, "loss": 0.3524, "step": 513 }, { "epoch": 0.2527040314650934, "grad_norm": 35.75, "learning_rate": 8.971704944519592e-06, "loss": 0.4002, "step": 514 }, { "epoch": 0.25319567354965583, "grad_norm": 20.0, "learning_rate": 8.97143119233017e-06, "loss": 0.3067, "step": 515 }, { "epoch": 0.2536873156342183, "grad_norm": 27.5, "learning_rate": 8.971156126460917e-06, "loss": 0.4094, "step": 516 }, { "epoch": 0.25417895771878074, "grad_norm": 17.75, "learning_rate": 8.97087974699264e-06, "loss": 0.5106, "step": 517 }, { "epoch": 0.25467059980334317, "grad_norm": 24.375, "learning_rate": 8.970602054006542e-06, "loss": 0.5818, "step": 518 }, { "epoch": 0.2551622418879056, "grad_norm": 24.0, "learning_rate": 8.970323047584204e-06, "loss": 0.5773, "step": 519 }, { "epoch": 0.255653883972468, "grad_norm": 10.1875, "learning_rate": 8.9700427278076e-06, "loss": 0.2647, "step": 520 }, { "epoch": 0.2561455260570305, "grad_norm": 15.75, "learning_rate": 8.969761094759084e-06, "loss": 0.3843, "step": 521 }, { "epoch": 0.25663716814159293, "grad_norm": 19.75, "learning_rate": 8.969478148521397e-06, "loss": 0.3745, "step": 522 }, { "epoch": 0.25712881022615536, "grad_norm": 11.4375, "learning_rate": 8.969193889177671e-06, "loss": 0.2974, "step": 523 }, { "epoch": 0.2576204523107178, "grad_norm": 34.0, "learning_rate": 8.968908316811415e-06, "loss": 0.6324, "step": 524 }, { "epoch": 0.2581120943952802, "grad_norm": 13.25, "learning_rate": 8.968621431506532e-06, "loss": 0.353, "step": 525 }, { "epoch": 0.2586037364798427, "grad_norm": 16.625, "learning_rate": 8.968333233347305e-06, "loss": 0.2802, "step": 526 }, { "epoch": 0.2590953785644051, "grad_norm": 7.78125, "learning_rate": 8.968043722418406e-06, "loss": 0.3623, "step": 527 }, { "epoch": 0.25958702064896755, "grad_norm": 10.6875, "learning_rate": 8.967752898804891e-06, "loss": 0.2959, "step": 528 }, { "epoch": 0.26007866273353, "grad_norm": 27.875, "learning_rate": 8.967460762592205e-06, "loss": 0.4205, "step": 529 }, { "epoch": 0.2605703048180924, "grad_norm": 8.6875, "learning_rate": 8.967167313866169e-06, "loss": 0.2586, "step": 530 }, { "epoch": 0.2610619469026549, "grad_norm": 22.75, "learning_rate": 8.966872552713005e-06, "loss": 0.4851, "step": 531 }, { "epoch": 0.2615535889872173, "grad_norm": 33.0, "learning_rate": 8.966576479219306e-06, "loss": 0.5179, "step": 532 }, { "epoch": 0.26204523107177974, "grad_norm": 12.625, "learning_rate": 8.96627909347206e-06, "loss": 0.2621, "step": 533 }, { "epoch": 0.26253687315634217, "grad_norm": 13.0625, "learning_rate": 8.965980395558636e-06, "loss": 0.2935, "step": 534 }, { "epoch": 0.26302851524090465, "grad_norm": 18.0, "learning_rate": 8.965680385566787e-06, "loss": 0.2659, "step": 535 }, { "epoch": 0.2635201573254671, "grad_norm": 32.75, "learning_rate": 8.965379063584657e-06, "loss": 0.6611, "step": 536 }, { "epoch": 0.2640117994100295, "grad_norm": 15.3125, "learning_rate": 8.965076429700776e-06, "loss": 0.1847, "step": 537 }, { "epoch": 0.26450344149459193, "grad_norm": 21.375, "learning_rate": 8.964772484004048e-06, "loss": 0.379, "step": 538 }, { "epoch": 0.26499508357915436, "grad_norm": 11.25, "learning_rate": 8.964467226583777e-06, "loss": 0.2952, "step": 539 }, { "epoch": 0.26548672566371684, "grad_norm": 11.3125, "learning_rate": 8.964160657529642e-06, "loss": 0.2819, "step": 540 }, { "epoch": 0.26597836774827927, "grad_norm": 34.5, "learning_rate": 8.963852776931714e-06, "loss": 0.5926, "step": 541 }, { "epoch": 0.2664700098328417, "grad_norm": 15.875, "learning_rate": 8.963543584880445e-06, "loss": 0.3057, "step": 542 }, { "epoch": 0.2669616519174041, "grad_norm": 7.40625, "learning_rate": 8.963233081466673e-06, "loss": 0.3446, "step": 543 }, { "epoch": 0.26745329400196655, "grad_norm": 16.375, "learning_rate": 8.962921266781623e-06, "loss": 0.3141, "step": 544 }, { "epoch": 0.26794493608652903, "grad_norm": 29.5, "learning_rate": 8.962608140916905e-06, "loss": 0.4538, "step": 545 }, { "epoch": 0.26843657817109146, "grad_norm": 15.875, "learning_rate": 8.962293703964513e-06, "loss": 0.342, "step": 546 }, { "epoch": 0.2689282202556539, "grad_norm": 15.4375, "learning_rate": 8.961977956016826e-06, "loss": 0.2924, "step": 547 }, { "epoch": 0.2694198623402163, "grad_norm": 23.0, "learning_rate": 8.96166089716661e-06, "loss": 0.3838, "step": 548 }, { "epoch": 0.26991150442477874, "grad_norm": 14.0625, "learning_rate": 8.961342527507013e-06, "loss": 0.3429, "step": 549 }, { "epoch": 0.2704031465093412, "grad_norm": 18.5, "learning_rate": 8.961022847131574e-06, "loss": 0.3373, "step": 550 }, { "epoch": 0.27089478859390365, "grad_norm": 32.75, "learning_rate": 8.960701856134208e-06, "loss": 0.6671, "step": 551 }, { "epoch": 0.2713864306784661, "grad_norm": 19.125, "learning_rate": 8.960379554609224e-06, "loss": 0.2481, "step": 552 }, { "epoch": 0.2718780727630285, "grad_norm": 37.0, "learning_rate": 8.960055942651311e-06, "loss": 0.6548, "step": 553 }, { "epoch": 0.2723697148475909, "grad_norm": 17.125, "learning_rate": 8.959731020355545e-06, "loss": 0.3724, "step": 554 }, { "epoch": 0.2728613569321534, "grad_norm": 22.75, "learning_rate": 8.959404787817384e-06, "loss": 0.3748, "step": 555 }, { "epoch": 0.27335299901671584, "grad_norm": 22.75, "learning_rate": 8.959077245132676e-06, "loss": 0.5162, "step": 556 }, { "epoch": 0.27384464110127826, "grad_norm": 19.0, "learning_rate": 8.958748392397651e-06, "loss": 0.2186, "step": 557 }, { "epoch": 0.2743362831858407, "grad_norm": 20.75, "learning_rate": 8.958418229708921e-06, "loss": 0.5298, "step": 558 }, { "epoch": 0.2748279252704032, "grad_norm": 21.375, "learning_rate": 8.958086757163488e-06, "loss": 0.3803, "step": 559 }, { "epoch": 0.2753195673549656, "grad_norm": 19.0, "learning_rate": 8.957753974858737e-06, "loss": 0.344, "step": 560 }, { "epoch": 0.275811209439528, "grad_norm": 24.375, "learning_rate": 8.957419882892438e-06, "loss": 0.2367, "step": 561 }, { "epoch": 0.27630285152409045, "grad_norm": 14.5, "learning_rate": 8.957084481362741e-06, "loss": 0.3476, "step": 562 }, { "epoch": 0.2767944936086529, "grad_norm": 15.6875, "learning_rate": 8.956747770368192e-06, "loss": 0.4138, "step": 563 }, { "epoch": 0.27728613569321536, "grad_norm": 24.25, "learning_rate": 8.956409750007707e-06, "loss": 0.5697, "step": 564 }, { "epoch": 0.2777777777777778, "grad_norm": 12.625, "learning_rate": 8.9560704203806e-06, "loss": 0.287, "step": 565 }, { "epoch": 0.2782694198623402, "grad_norm": 20.25, "learning_rate": 8.955729781586563e-06, "loss": 0.4972, "step": 566 }, { "epoch": 0.27876106194690264, "grad_norm": 19.125, "learning_rate": 8.955387833725672e-06, "loss": 0.3519, "step": 567 }, { "epoch": 0.27925270403146507, "grad_norm": 22.75, "learning_rate": 8.955044576898389e-06, "loss": 0.5265, "step": 568 }, { "epoch": 0.27974434611602755, "grad_norm": 18.875, "learning_rate": 8.954700011205562e-06, "loss": 0.4543, "step": 569 }, { "epoch": 0.28023598820059, "grad_norm": 17.875, "learning_rate": 8.954354136748422e-06, "loss": 0.3038, "step": 570 }, { "epoch": 0.2807276302851524, "grad_norm": 12.8125, "learning_rate": 8.954006953628587e-06, "loss": 0.2579, "step": 571 }, { "epoch": 0.28121927236971483, "grad_norm": 18.125, "learning_rate": 8.953658461948053e-06, "loss": 0.3795, "step": 572 }, { "epoch": 0.28171091445427726, "grad_norm": 14.0625, "learning_rate": 8.953308661809208e-06, "loss": 0.3442, "step": 573 }, { "epoch": 0.28220255653883974, "grad_norm": 30.75, "learning_rate": 8.952957553314821e-06, "loss": 0.6977, "step": 574 }, { "epoch": 0.28269419862340217, "grad_norm": 11.4375, "learning_rate": 8.952605136568044e-06, "loss": 0.2269, "step": 575 }, { "epoch": 0.2831858407079646, "grad_norm": 17.5, "learning_rate": 8.952251411672417e-06, "loss": 0.4806, "step": 576 }, { "epoch": 0.283677482792527, "grad_norm": 16.25, "learning_rate": 8.95189637873186e-06, "loss": 0.2474, "step": 577 }, { "epoch": 0.28416912487708945, "grad_norm": 28.125, "learning_rate": 8.95154003785068e-06, "loss": 0.4253, "step": 578 }, { "epoch": 0.28466076696165193, "grad_norm": 14.0, "learning_rate": 8.95118238913357e-06, "loss": 0.2189, "step": 579 }, { "epoch": 0.28515240904621436, "grad_norm": 22.375, "learning_rate": 8.950823432685604e-06, "loss": 0.614, "step": 580 }, { "epoch": 0.2856440511307768, "grad_norm": 17.125, "learning_rate": 8.95046316861224e-06, "loss": 0.5541, "step": 581 }, { "epoch": 0.2861356932153392, "grad_norm": 8.875, "learning_rate": 8.950101597019322e-06, "loss": 0.3283, "step": 582 }, { "epoch": 0.2866273352999017, "grad_norm": 13.375, "learning_rate": 8.949738718013077e-06, "loss": 0.253, "step": 583 }, { "epoch": 0.2871189773844641, "grad_norm": 23.625, "learning_rate": 8.94937453170012e-06, "loss": 0.5307, "step": 584 }, { "epoch": 0.28761061946902655, "grad_norm": 14.1875, "learning_rate": 8.949009038187443e-06, "loss": 0.4053, "step": 585 }, { "epoch": 0.288102261553589, "grad_norm": 10.4375, "learning_rate": 8.948642237582427e-06, "loss": 0.2617, "step": 586 }, { "epoch": 0.2885939036381514, "grad_norm": 22.375, "learning_rate": 8.948274129992836e-06, "loss": 0.4414, "step": 587 }, { "epoch": 0.2890855457227139, "grad_norm": 30.125, "learning_rate": 8.947904715526817e-06, "loss": 0.629, "step": 588 }, { "epoch": 0.2895771878072763, "grad_norm": 9.625, "learning_rate": 8.947533994292905e-06, "loss": 0.2642, "step": 589 }, { "epoch": 0.29006882989183874, "grad_norm": 15.875, "learning_rate": 8.947161966400011e-06, "loss": 0.3389, "step": 590 }, { "epoch": 0.29056047197640117, "grad_norm": 14.5625, "learning_rate": 8.946788631957437e-06, "loss": 0.3786, "step": 591 }, { "epoch": 0.2910521140609636, "grad_norm": 8.125, "learning_rate": 8.946413991074868e-06, "loss": 0.2917, "step": 592 }, { "epoch": 0.2915437561455261, "grad_norm": 26.0, "learning_rate": 8.946038043862367e-06, "loss": 0.5179, "step": 593 }, { "epoch": 0.2920353982300885, "grad_norm": 19.5, "learning_rate": 8.94566079043039e-06, "loss": 0.4774, "step": 594 }, { "epoch": 0.29252704031465093, "grad_norm": 15.8125, "learning_rate": 8.945282230889766e-06, "loss": 0.4142, "step": 595 }, { "epoch": 0.29301868239921336, "grad_norm": 28.75, "learning_rate": 8.944902365351719e-06, "loss": 0.5668, "step": 596 }, { "epoch": 0.2935103244837758, "grad_norm": 17.0, "learning_rate": 8.94452119392785e-06, "loss": 0.4524, "step": 597 }, { "epoch": 0.29400196656833827, "grad_norm": 14.75, "learning_rate": 8.944138716730144e-06, "loss": 0.3178, "step": 598 }, { "epoch": 0.2944936086529007, "grad_norm": 21.0, "learning_rate": 8.94375493387097e-06, "loss": 0.3467, "step": 599 }, { "epoch": 0.2949852507374631, "grad_norm": 25.375, "learning_rate": 8.94336984546308e-06, "loss": 0.3518, "step": 600 }, { "epoch": 0.29547689282202555, "grad_norm": 15.1875, "learning_rate": 8.942983451619613e-06, "loss": 0.4601, "step": 601 }, { "epoch": 0.295968534906588, "grad_norm": 17.25, "learning_rate": 8.94259575245409e-06, "loss": 0.5352, "step": 602 }, { "epoch": 0.29646017699115046, "grad_norm": 22.75, "learning_rate": 8.942206748080412e-06, "loss": 0.1242, "step": 603 }, { "epoch": 0.2969518190757129, "grad_norm": 22.875, "learning_rate": 8.941816438612868e-06, "loss": 0.3353, "step": 604 }, { "epoch": 0.2974434611602753, "grad_norm": 23.375, "learning_rate": 8.941424824166128e-06, "loss": 0.5095, "step": 605 }, { "epoch": 0.29793510324483774, "grad_norm": 12.3125, "learning_rate": 8.941031904855246e-06, "loss": 0.4125, "step": 606 }, { "epoch": 0.2984267453294002, "grad_norm": 13.3125, "learning_rate": 8.940637680795659e-06, "loss": 0.3695, "step": 607 }, { "epoch": 0.29891838741396265, "grad_norm": 15.9375, "learning_rate": 8.940242152103188e-06, "loss": 0.3918, "step": 608 }, { "epoch": 0.2994100294985251, "grad_norm": 20.5, "learning_rate": 8.939845318894037e-06, "loss": 0.4557, "step": 609 }, { "epoch": 0.2999016715830875, "grad_norm": 15.75, "learning_rate": 8.939447181284795e-06, "loss": 0.4201, "step": 610 }, { "epoch": 0.30039331366764993, "grad_norm": 26.0, "learning_rate": 8.939047739392428e-06, "loss": 0.3426, "step": 611 }, { "epoch": 0.3008849557522124, "grad_norm": 17.375, "learning_rate": 8.938646993334294e-06, "loss": 0.2794, "step": 612 }, { "epoch": 0.30137659783677484, "grad_norm": 9.625, "learning_rate": 8.938244943228127e-06, "loss": 0.2958, "step": 613 }, { "epoch": 0.30186823992133727, "grad_norm": 21.0, "learning_rate": 8.937841589192049e-06, "loss": 0.343, "step": 614 }, { "epoch": 0.3023598820058997, "grad_norm": 18.5, "learning_rate": 8.93743693134456e-06, "loss": 0.483, "step": 615 }, { "epoch": 0.3028515240904621, "grad_norm": 15.875, "learning_rate": 8.937030969804552e-06, "loss": 0.3374, "step": 616 }, { "epoch": 0.3033431661750246, "grad_norm": 17.0, "learning_rate": 8.936623704691288e-06, "loss": 0.3822, "step": 617 }, { "epoch": 0.30383480825958703, "grad_norm": 15.9375, "learning_rate": 8.936215136124422e-06, "loss": 0.2969, "step": 618 }, { "epoch": 0.30432645034414946, "grad_norm": 22.0, "learning_rate": 8.93580526422399e-06, "loss": 0.3777, "step": 619 }, { "epoch": 0.3048180924287119, "grad_norm": 14.5625, "learning_rate": 8.935394089110408e-06, "loss": 0.4602, "step": 620 }, { "epoch": 0.3053097345132743, "grad_norm": 53.0, "learning_rate": 8.93498161090448e-06, "loss": 0.7256, "step": 621 }, { "epoch": 0.3058013765978368, "grad_norm": 9.1875, "learning_rate": 8.934567829727385e-06, "loss": 0.2101, "step": 622 }, { "epoch": 0.3062930186823992, "grad_norm": 18.125, "learning_rate": 8.934152745700695e-06, "loss": 0.2914, "step": 623 }, { "epoch": 0.30678466076696165, "grad_norm": 34.5, "learning_rate": 8.933736358946355e-06, "loss": 0.6352, "step": 624 }, { "epoch": 0.3072763028515241, "grad_norm": 16.5, "learning_rate": 8.933318669586698e-06, "loss": 0.4264, "step": 625 }, { "epoch": 0.3077679449360865, "grad_norm": 17.75, "learning_rate": 8.93289967774444e-06, "loss": 0.4473, "step": 626 }, { "epoch": 0.308259587020649, "grad_norm": 20.5, "learning_rate": 8.932479383542677e-06, "loss": 0.5115, "step": 627 }, { "epoch": 0.3087512291052114, "grad_norm": 22.75, "learning_rate": 8.932057787104888e-06, "loss": 0.3576, "step": 628 }, { "epoch": 0.30924287118977384, "grad_norm": 23.125, "learning_rate": 8.931634888554937e-06, "loss": 0.4819, "step": 629 }, { "epoch": 0.30973451327433627, "grad_norm": 38.75, "learning_rate": 8.93121068801707e-06, "loss": 0.4687, "step": 630 }, { "epoch": 0.31022615535889875, "grad_norm": 13.875, "learning_rate": 8.930785185615912e-06, "loss": 0.4264, "step": 631 }, { "epoch": 0.3107177974434612, "grad_norm": 19.625, "learning_rate": 8.930358381476474e-06, "loss": 0.3365, "step": 632 }, { "epoch": 0.3112094395280236, "grad_norm": 28.0, "learning_rate": 8.929930275724148e-06, "loss": 0.5955, "step": 633 }, { "epoch": 0.31170108161258603, "grad_norm": 15.6875, "learning_rate": 8.929500868484712e-06, "loss": 0.2443, "step": 634 }, { "epoch": 0.31219272369714846, "grad_norm": 28.125, "learning_rate": 8.929070159884322e-06, "loss": 0.4166, "step": 635 }, { "epoch": 0.31268436578171094, "grad_norm": 11.1875, "learning_rate": 8.928638150049513e-06, "loss": 0.3071, "step": 636 }, { "epoch": 0.31317600786627336, "grad_norm": 13.5, "learning_rate": 8.928204839107214e-06, "loss": 0.4059, "step": 637 }, { "epoch": 0.3136676499508358, "grad_norm": 28.875, "learning_rate": 8.927770227184725e-06, "loss": 0.5657, "step": 638 }, { "epoch": 0.3141592920353982, "grad_norm": 16.375, "learning_rate": 8.927334314409733e-06, "loss": 0.5001, "step": 639 }, { "epoch": 0.31465093411996065, "grad_norm": 13.375, "learning_rate": 8.926897100910307e-06, "loss": 0.4054, "step": 640 }, { "epoch": 0.31514257620452313, "grad_norm": 20.125, "learning_rate": 8.926458586814897e-06, "loss": 0.2552, "step": 641 }, { "epoch": 0.31563421828908556, "grad_norm": 19.75, "learning_rate": 8.926018772252335e-06, "loss": 0.545, "step": 642 }, { "epoch": 0.316125860373648, "grad_norm": 10.1875, "learning_rate": 8.92557765735184e-06, "loss": 0.1927, "step": 643 }, { "epoch": 0.3166175024582104, "grad_norm": 38.5, "learning_rate": 8.925135242243004e-06, "loss": 0.4259, "step": 644 }, { "epoch": 0.31710914454277284, "grad_norm": 18.625, "learning_rate": 8.92469152705581e-06, "loss": 0.4938, "step": 645 }, { "epoch": 0.3176007866273353, "grad_norm": 18.5, "learning_rate": 8.924246511920615e-06, "loss": 0.3878, "step": 646 }, { "epoch": 0.31809242871189775, "grad_norm": 22.125, "learning_rate": 8.923800196968165e-06, "loss": 0.4891, "step": 647 }, { "epoch": 0.3185840707964602, "grad_norm": 27.25, "learning_rate": 8.923352582329583e-06, "loss": 0.261, "step": 648 }, { "epoch": 0.3190757128810226, "grad_norm": 16.0, "learning_rate": 8.922903668136376e-06, "loss": 0.3888, "step": 649 }, { "epoch": 0.319567354965585, "grad_norm": 25.75, "learning_rate": 8.922453454520434e-06, "loss": 0.4135, "step": 650 }, { "epoch": 0.3200589970501475, "grad_norm": 14.625, "learning_rate": 8.922001941614023e-06, "loss": 0.3987, "step": 651 }, { "epoch": 0.32055063913470994, "grad_norm": 10.3125, "learning_rate": 8.921549129549798e-06, "loss": 0.202, "step": 652 }, { "epoch": 0.32104228121927236, "grad_norm": 8.1875, "learning_rate": 8.921095018460791e-06, "loss": 0.2425, "step": 653 }, { "epoch": 0.3215339233038348, "grad_norm": 14.625, "learning_rate": 8.920639608480418e-06, "loss": 0.359, "step": 654 }, { "epoch": 0.3220255653883973, "grad_norm": 29.5, "learning_rate": 8.920182899742475e-06, "loss": 0.5763, "step": 655 }, { "epoch": 0.3225172074729597, "grad_norm": 16.5, "learning_rate": 8.919724892381142e-06, "loss": 0.3245, "step": 656 }, { "epoch": 0.3230088495575221, "grad_norm": 16.5, "learning_rate": 8.919265586530977e-06, "loss": 0.317, "step": 657 }, { "epoch": 0.32350049164208455, "grad_norm": 12.75, "learning_rate": 8.918804982326922e-06, "loss": 0.3306, "step": 658 }, { "epoch": 0.323992133726647, "grad_norm": 11.875, "learning_rate": 8.9183430799043e-06, "loss": 0.3353, "step": 659 }, { "epoch": 0.32448377581120946, "grad_norm": 16.25, "learning_rate": 8.917879879398815e-06, "loss": 0.3007, "step": 660 }, { "epoch": 0.3249754178957719, "grad_norm": 7.53125, "learning_rate": 8.91741538094655e-06, "loss": 0.3025, "step": 661 }, { "epoch": 0.3254670599803343, "grad_norm": 5.9375, "learning_rate": 8.916949584683979e-06, "loss": 0.2183, "step": 662 }, { "epoch": 0.32595870206489674, "grad_norm": 28.875, "learning_rate": 8.916482490747943e-06, "loss": 0.5442, "step": 663 }, { "epoch": 0.32645034414945917, "grad_norm": 12.125, "learning_rate": 8.916014099275672e-06, "loss": 0.4222, "step": 664 }, { "epoch": 0.32694198623402165, "grad_norm": 22.875, "learning_rate": 8.915544410404781e-06, "loss": 0.524, "step": 665 }, { "epoch": 0.3274336283185841, "grad_norm": 16.875, "learning_rate": 8.915073424273258e-06, "loss": 0.3066, "step": 666 }, { "epoch": 0.3279252704031465, "grad_norm": 13.8125, "learning_rate": 8.914601141019478e-06, "loss": 0.3561, "step": 667 }, { "epoch": 0.32841691248770893, "grad_norm": 23.125, "learning_rate": 8.914127560782195e-06, "loss": 0.4174, "step": 668 }, { "epoch": 0.32890855457227136, "grad_norm": 12.1875, "learning_rate": 8.913652683700542e-06, "loss": 0.4164, "step": 669 }, { "epoch": 0.32940019665683384, "grad_norm": 19.625, "learning_rate": 8.913176509914037e-06, "loss": 0.522, "step": 670 }, { "epoch": 0.32989183874139627, "grad_norm": 20.125, "learning_rate": 8.912699039562577e-06, "loss": 0.3669, "step": 671 }, { "epoch": 0.3303834808259587, "grad_norm": 20.125, "learning_rate": 8.912220272786438e-06, "loss": 0.4524, "step": 672 }, { "epoch": 0.3308751229105211, "grad_norm": 23.875, "learning_rate": 8.91174020972628e-06, "loss": 0.3195, "step": 673 }, { "epoch": 0.33136676499508355, "grad_norm": 12.25, "learning_rate": 8.911258850523142e-06, "loss": 0.3627, "step": 674 }, { "epoch": 0.33185840707964603, "grad_norm": 53.75, "learning_rate": 8.910776195318448e-06, "loss": 0.9204, "step": 675 }, { "epoch": 0.33235004916420846, "grad_norm": 14.4375, "learning_rate": 8.910292244253995e-06, "loss": 0.2818, "step": 676 }, { "epoch": 0.3328416912487709, "grad_norm": 17.75, "learning_rate": 8.909806997471967e-06, "loss": 0.4725, "step": 677 }, { "epoch": 0.3333333333333333, "grad_norm": 27.5, "learning_rate": 8.909320455114925e-06, "loss": 0.5009, "step": 678 }, { "epoch": 0.3338249754178958, "grad_norm": 16.875, "learning_rate": 8.908832617325814e-06, "loss": 0.4565, "step": 679 }, { "epoch": 0.3343166175024582, "grad_norm": 26.625, "learning_rate": 8.908343484247957e-06, "loss": 0.3923, "step": 680 }, { "epoch": 0.33480825958702065, "grad_norm": 20.75, "learning_rate": 8.907853056025058e-06, "loss": 0.5589, "step": 681 }, { "epoch": 0.3352999016715831, "grad_norm": 9.875, "learning_rate": 8.907361332801202e-06, "loss": 0.3068, "step": 682 }, { "epoch": 0.3357915437561455, "grad_norm": 35.75, "learning_rate": 8.906868314720857e-06, "loss": 0.5453, "step": 683 }, { "epoch": 0.336283185840708, "grad_norm": 18.875, "learning_rate": 8.906374001928864e-06, "loss": 0.3264, "step": 684 }, { "epoch": 0.3367748279252704, "grad_norm": 13.9375, "learning_rate": 8.905878394570455e-06, "loss": 0.3203, "step": 685 }, { "epoch": 0.33726647000983284, "grad_norm": 24.625, "learning_rate": 8.905381492791228e-06, "loss": 0.5587, "step": 686 }, { "epoch": 0.33775811209439527, "grad_norm": 11.5625, "learning_rate": 8.904883296737178e-06, "loss": 0.2799, "step": 687 }, { "epoch": 0.3382497541789577, "grad_norm": 13.4375, "learning_rate": 8.90438380655467e-06, "loss": 0.2894, "step": 688 }, { "epoch": 0.3387413962635202, "grad_norm": 16.625, "learning_rate": 8.903883022390448e-06, "loss": 0.4644, "step": 689 }, { "epoch": 0.3392330383480826, "grad_norm": 20.5, "learning_rate": 8.903380944391643e-06, "loss": 0.3991, "step": 690 }, { "epoch": 0.33972468043264503, "grad_norm": 25.375, "learning_rate": 8.902877572705762e-06, "loss": 0.6081, "step": 691 }, { "epoch": 0.34021632251720746, "grad_norm": 15.375, "learning_rate": 8.902372907480691e-06, "loss": 0.2866, "step": 692 }, { "epoch": 0.3407079646017699, "grad_norm": 26.5, "learning_rate": 8.901866948864697e-06, "loss": 0.3167, "step": 693 }, { "epoch": 0.34119960668633237, "grad_norm": 13.0625, "learning_rate": 8.90135969700643e-06, "loss": 0.3687, "step": 694 }, { "epoch": 0.3416912487708948, "grad_norm": 29.625, "learning_rate": 8.900851152054915e-06, "loss": 0.6513, "step": 695 }, { "epoch": 0.3421828908554572, "grad_norm": 26.375, "learning_rate": 8.900341314159564e-06, "loss": 0.4242, "step": 696 }, { "epoch": 0.34267453294001965, "grad_norm": 24.0, "learning_rate": 8.89983018347016e-06, "loss": 0.429, "step": 697 }, { "epoch": 0.34316617502458213, "grad_norm": 22.875, "learning_rate": 8.899317760136872e-06, "loss": 0.2863, "step": 698 }, { "epoch": 0.34365781710914456, "grad_norm": 27.875, "learning_rate": 8.898804044310245e-06, "loss": 0.4498, "step": 699 }, { "epoch": 0.344149459193707, "grad_norm": 23.0, "learning_rate": 8.898289036141208e-06, "loss": 0.4865, "step": 700 }, { "epoch": 0.3446411012782694, "grad_norm": 14.3125, "learning_rate": 8.897772735781065e-06, "loss": 0.3874, "step": 701 }, { "epoch": 0.34513274336283184, "grad_norm": 21.875, "learning_rate": 8.897255143381504e-06, "loss": 0.5104, "step": 702 }, { "epoch": 0.3456243854473943, "grad_norm": 22.0, "learning_rate": 8.896736259094591e-06, "loss": 0.533, "step": 703 }, { "epoch": 0.34611602753195675, "grad_norm": 16.375, "learning_rate": 8.896216083072772e-06, "loss": 0.3094, "step": 704 }, { "epoch": 0.3466076696165192, "grad_norm": 16.75, "learning_rate": 8.895694615468866e-06, "loss": 0.261, "step": 705 }, { "epoch": 0.3470993117010816, "grad_norm": 19.125, "learning_rate": 8.895171856436083e-06, "loss": 0.5067, "step": 706 }, { "epoch": 0.34759095378564403, "grad_norm": 10.75, "learning_rate": 8.894647806128003e-06, "loss": 0.2422, "step": 707 }, { "epoch": 0.3480825958702065, "grad_norm": 13.875, "learning_rate": 8.89412246469859e-06, "loss": 0.3969, "step": 708 }, { "epoch": 0.34857423795476894, "grad_norm": 17.125, "learning_rate": 8.893595832302187e-06, "loss": 0.4338, "step": 709 }, { "epoch": 0.34906588003933137, "grad_norm": 20.0, "learning_rate": 8.893067909093516e-06, "loss": 0.4505, "step": 710 }, { "epoch": 0.3495575221238938, "grad_norm": 14.5625, "learning_rate": 8.892538695227674e-06, "loss": 0.2778, "step": 711 }, { "epoch": 0.3500491642084562, "grad_norm": 24.25, "learning_rate": 8.892008190860144e-06, "loss": 0.5577, "step": 712 }, { "epoch": 0.3505408062930187, "grad_norm": 23.625, "learning_rate": 8.891476396146785e-06, "loss": 0.4763, "step": 713 }, { "epoch": 0.35103244837758113, "grad_norm": 18.125, "learning_rate": 8.890943311243834e-06, "loss": 0.3778, "step": 714 }, { "epoch": 0.35152409046214356, "grad_norm": 22.5, "learning_rate": 8.89040893630791e-06, "loss": 0.6788, "step": 715 }, { "epoch": 0.352015732546706, "grad_norm": 14.75, "learning_rate": 8.889873271496007e-06, "loss": 0.434, "step": 716 }, { "epoch": 0.3525073746312684, "grad_norm": 15.9375, "learning_rate": 8.889336316965499e-06, "loss": 0.4869, "step": 717 }, { "epoch": 0.3529990167158309, "grad_norm": 18.375, "learning_rate": 8.888798072874144e-06, "loss": 0.4635, "step": 718 }, { "epoch": 0.3534906588003933, "grad_norm": 15.625, "learning_rate": 8.888258539380072e-06, "loss": 0.3955, "step": 719 }, { "epoch": 0.35398230088495575, "grad_norm": 13.25, "learning_rate": 8.887717716641796e-06, "loss": 0.5311, "step": 720 }, { "epoch": 0.3544739429695182, "grad_norm": 19.875, "learning_rate": 8.887175604818206e-06, "loss": 0.4734, "step": 721 }, { "epoch": 0.35496558505408066, "grad_norm": 32.75, "learning_rate": 8.886632204068572e-06, "loss": 0.3074, "step": 722 }, { "epoch": 0.3554572271386431, "grad_norm": 15.3125, "learning_rate": 8.886087514552541e-06, "loss": 0.3996, "step": 723 }, { "epoch": 0.3559488692232055, "grad_norm": 21.25, "learning_rate": 8.885541536430138e-06, "loss": 0.4742, "step": 724 }, { "epoch": 0.35644051130776794, "grad_norm": 28.125, "learning_rate": 8.884994269861772e-06, "loss": 0.4307, "step": 725 }, { "epoch": 0.35693215339233036, "grad_norm": 37.5, "learning_rate": 8.884445715008223e-06, "loss": 0.4177, "step": 726 }, { "epoch": 0.35742379547689285, "grad_norm": 14.1875, "learning_rate": 8.883895872030656e-06, "loss": 0.3808, "step": 727 }, { "epoch": 0.3579154375614553, "grad_norm": 22.625, "learning_rate": 8.88334474109061e-06, "loss": 0.4385, "step": 728 }, { "epoch": 0.3584070796460177, "grad_norm": 9.375, "learning_rate": 8.882792322350005e-06, "loss": 0.3862, "step": 729 }, { "epoch": 0.35889872173058013, "grad_norm": 16.5, "learning_rate": 8.882238615971135e-06, "loss": 0.3277, "step": 730 }, { "epoch": 0.35939036381514256, "grad_norm": 15.0, "learning_rate": 8.881683622116679e-06, "loss": 0.3566, "step": 731 }, { "epoch": 0.35988200589970504, "grad_norm": 11.0, "learning_rate": 8.881127340949692e-06, "loss": 0.4159, "step": 732 }, { "epoch": 0.36037364798426746, "grad_norm": 9.6875, "learning_rate": 8.880569772633603e-06, "loss": 0.3457, "step": 733 }, { "epoch": 0.3608652900688299, "grad_norm": 23.375, "learning_rate": 8.880010917332223e-06, "loss": 0.414, "step": 734 }, { "epoch": 0.3613569321533923, "grad_norm": 32.0, "learning_rate": 8.87945077520974e-06, "loss": 0.6316, "step": 735 }, { "epoch": 0.36184857423795475, "grad_norm": 22.875, "learning_rate": 8.878889346430723e-06, "loss": 0.4879, "step": 736 }, { "epoch": 0.36234021632251723, "grad_norm": 23.75, "learning_rate": 8.878326631160115e-06, "loss": 0.2945, "step": 737 }, { "epoch": 0.36283185840707965, "grad_norm": 11.5, "learning_rate": 8.877762629563236e-06, "loss": 0.3718, "step": 738 }, { "epoch": 0.3633235004916421, "grad_norm": 12.8125, "learning_rate": 8.87719734180579e-06, "loss": 0.3363, "step": 739 }, { "epoch": 0.3638151425762045, "grad_norm": 14.25, "learning_rate": 8.876630768053853e-06, "loss": 0.3664, "step": 740 }, { "epoch": 0.36430678466076694, "grad_norm": 18.5, "learning_rate": 8.876062908473882e-06, "loss": 0.2428, "step": 741 }, { "epoch": 0.3647984267453294, "grad_norm": 25.375, "learning_rate": 8.875493763232709e-06, "loss": 0.3524, "step": 742 }, { "epoch": 0.36529006882989185, "grad_norm": 21.625, "learning_rate": 8.874923332497549e-06, "loss": 0.3777, "step": 743 }, { "epoch": 0.36578171091445427, "grad_norm": 11.5625, "learning_rate": 8.874351616435985e-06, "loss": 0.2666, "step": 744 }, { "epoch": 0.3662733529990167, "grad_norm": 14.0625, "learning_rate": 8.87377861521599e-06, "loss": 0.3105, "step": 745 }, { "epoch": 0.3667649950835792, "grad_norm": 17.0, "learning_rate": 8.873204329005907e-06, "loss": 0.3674, "step": 746 }, { "epoch": 0.3672566371681416, "grad_norm": 31.5, "learning_rate": 8.872628757974455e-06, "loss": 0.5805, "step": 747 }, { "epoch": 0.36774827925270404, "grad_norm": 10.0, "learning_rate": 8.872051902290736e-06, "loss": 0.3252, "step": 748 }, { "epoch": 0.36823992133726646, "grad_norm": 52.5, "learning_rate": 8.871473762124228e-06, "loss": 0.6428, "step": 749 }, { "epoch": 0.3687315634218289, "grad_norm": 23.5, "learning_rate": 8.87089433764478e-06, "loss": 0.4751, "step": 750 }, { "epoch": 0.36922320550639137, "grad_norm": 23.875, "learning_rate": 8.870313629022629e-06, "loss": 0.4609, "step": 751 }, { "epoch": 0.3697148475909538, "grad_norm": 28.5, "learning_rate": 8.869731636428378e-06, "loss": 0.6225, "step": 752 }, { "epoch": 0.3702064896755162, "grad_norm": 44.0, "learning_rate": 8.86914836003302e-06, "loss": 0.6444, "step": 753 }, { "epoch": 0.37069813176007865, "grad_norm": 8.3125, "learning_rate": 8.868563800007913e-06, "loss": 0.2297, "step": 754 }, { "epoch": 0.3711897738446411, "grad_norm": 23.125, "learning_rate": 8.867977956524798e-06, "loss": 0.5939, "step": 755 }, { "epoch": 0.37168141592920356, "grad_norm": 23.75, "learning_rate": 8.867390829755793e-06, "loss": 0.6025, "step": 756 }, { "epoch": 0.372173058013766, "grad_norm": 20.5, "learning_rate": 8.866802419873393e-06, "loss": 0.5006, "step": 757 }, { "epoch": 0.3726647000983284, "grad_norm": 20.375, "learning_rate": 8.866212727050469e-06, "loss": 0.4101, "step": 758 }, { "epoch": 0.37315634218289084, "grad_norm": 14.125, "learning_rate": 8.865621751460268e-06, "loss": 0.3383, "step": 759 }, { "epoch": 0.37364798426745327, "grad_norm": 18.0, "learning_rate": 8.865029493276415e-06, "loss": 0.2745, "step": 760 }, { "epoch": 0.37413962635201575, "grad_norm": 21.75, "learning_rate": 8.864435952672914e-06, "loss": 0.3964, "step": 761 }, { "epoch": 0.3746312684365782, "grad_norm": 30.25, "learning_rate": 8.863841129824142e-06, "loss": 0.3956, "step": 762 }, { "epoch": 0.3751229105211406, "grad_norm": 29.25, "learning_rate": 8.863245024904856e-06, "loss": 0.4988, "step": 763 }, { "epoch": 0.37561455260570303, "grad_norm": 15.6875, "learning_rate": 8.862647638090188e-06, "loss": 0.4224, "step": 764 }, { "epoch": 0.37610619469026546, "grad_norm": 37.75, "learning_rate": 8.862048969555644e-06, "loss": 0.3411, "step": 765 }, { "epoch": 0.37659783677482794, "grad_norm": 15.8125, "learning_rate": 8.861449019477111e-06, "loss": 0.435, "step": 766 }, { "epoch": 0.37708947885939037, "grad_norm": 29.25, "learning_rate": 8.860847788030852e-06, "loss": 0.5323, "step": 767 }, { "epoch": 0.3775811209439528, "grad_norm": 18.625, "learning_rate": 8.860245275393501e-06, "loss": 0.2889, "step": 768 }, { "epoch": 0.3780727630285152, "grad_norm": 26.75, "learning_rate": 8.859641481742079e-06, "loss": 0.4244, "step": 769 }, { "epoch": 0.3785644051130777, "grad_norm": 25.25, "learning_rate": 8.859036407253972e-06, "loss": 0.5116, "step": 770 }, { "epoch": 0.37905604719764013, "grad_norm": 22.375, "learning_rate": 8.858430052106948e-06, "loss": 0.2313, "step": 771 }, { "epoch": 0.37954768928220256, "grad_norm": 20.125, "learning_rate": 8.857822416479153e-06, "loss": 0.2932, "step": 772 }, { "epoch": 0.380039331366765, "grad_norm": 22.125, "learning_rate": 8.857213500549105e-06, "loss": 0.5054, "step": 773 }, { "epoch": 0.3805309734513274, "grad_norm": 50.0, "learning_rate": 8.8566033044957e-06, "loss": 0.6742, "step": 774 }, { "epoch": 0.3810226155358899, "grad_norm": 20.375, "learning_rate": 8.85599182849821e-06, "loss": 0.4322, "step": 775 }, { "epoch": 0.3815142576204523, "grad_norm": 25.875, "learning_rate": 8.855379072736282e-06, "loss": 0.4775, "step": 776 }, { "epoch": 0.38200589970501475, "grad_norm": 41.25, "learning_rate": 8.854765037389941e-06, "loss": 0.5997, "step": 777 }, { "epoch": 0.3824975417895772, "grad_norm": 25.5, "learning_rate": 8.854149722639587e-06, "loss": 0.5453, "step": 778 }, { "epoch": 0.3829891838741396, "grad_norm": 18.5, "learning_rate": 8.853533128665997e-06, "loss": 0.4363, "step": 779 }, { "epoch": 0.3834808259587021, "grad_norm": 31.625, "learning_rate": 8.85291525565032e-06, "loss": 0.6125, "step": 780 }, { "epoch": 0.3839724680432645, "grad_norm": 18.125, "learning_rate": 8.852296103774085e-06, "loss": 0.3054, "step": 781 }, { "epoch": 0.38446411012782694, "grad_norm": 24.25, "learning_rate": 8.851675673219195e-06, "loss": 0.3019, "step": 782 }, { "epoch": 0.38495575221238937, "grad_norm": 12.6875, "learning_rate": 8.851053964167928e-06, "loss": 0.3333, "step": 783 }, { "epoch": 0.3854473942969518, "grad_norm": 22.25, "learning_rate": 8.85043097680294e-06, "loss": 0.5858, "step": 784 }, { "epoch": 0.3859390363815143, "grad_norm": 14.625, "learning_rate": 8.849806711307259e-06, "loss": 0.2974, "step": 785 }, { "epoch": 0.3864306784660767, "grad_norm": 17.25, "learning_rate": 8.849181167864291e-06, "loss": 0.3203, "step": 786 }, { "epoch": 0.38692232055063913, "grad_norm": 23.125, "learning_rate": 8.848554346657818e-06, "loss": 0.4262, "step": 787 }, { "epoch": 0.38741396263520156, "grad_norm": 14.625, "learning_rate": 8.847926247871996e-06, "loss": 0.3471, "step": 788 }, { "epoch": 0.387905604719764, "grad_norm": 12.625, "learning_rate": 8.847296871691357e-06, "loss": 0.3679, "step": 789 }, { "epoch": 0.38839724680432647, "grad_norm": 20.625, "learning_rate": 8.846666218300807e-06, "loss": 0.3157, "step": 790 }, { "epoch": 0.3888888888888889, "grad_norm": 13.875, "learning_rate": 8.846034287885629e-06, "loss": 0.2603, "step": 791 }, { "epoch": 0.3893805309734513, "grad_norm": 9.5625, "learning_rate": 8.84540108063148e-06, "loss": 0.3041, "step": 792 }, { "epoch": 0.38987217305801375, "grad_norm": 24.875, "learning_rate": 8.844766596724392e-06, "loss": 0.4362, "step": 793 }, { "epoch": 0.39036381514257623, "grad_norm": 27.0, "learning_rate": 8.844130836350773e-06, "loss": 0.4743, "step": 794 }, { "epoch": 0.39085545722713866, "grad_norm": 18.625, "learning_rate": 8.843493799697407e-06, "loss": 0.4109, "step": 795 }, { "epoch": 0.3913470993117011, "grad_norm": 12.75, "learning_rate": 8.842855486951449e-06, "loss": 0.2126, "step": 796 }, { "epoch": 0.3918387413962635, "grad_norm": 35.25, "learning_rate": 8.842215898300434e-06, "loss": 0.7611, "step": 797 }, { "epoch": 0.39233038348082594, "grad_norm": 22.25, "learning_rate": 8.841575033932267e-06, "loss": 0.3957, "step": 798 }, { "epoch": 0.3928220255653884, "grad_norm": 9.125, "learning_rate": 8.84093289403523e-06, "loss": 0.3598, "step": 799 }, { "epoch": 0.39331366764995085, "grad_norm": 25.125, "learning_rate": 8.840289478797983e-06, "loss": 0.4218, "step": 800 }, { "epoch": 0.3938053097345133, "grad_norm": 32.25, "learning_rate": 8.839644788409556e-06, "loss": 0.5133, "step": 801 }, { "epoch": 0.3942969518190757, "grad_norm": 18.875, "learning_rate": 8.838998823059354e-06, "loss": 0.2993, "step": 802 }, { "epoch": 0.39478859390363813, "grad_norm": 13.4375, "learning_rate": 8.83835158293716e-06, "loss": 0.1755, "step": 803 }, { "epoch": 0.3952802359882006, "grad_norm": 20.5, "learning_rate": 8.837703068233125e-06, "loss": 0.2627, "step": 804 }, { "epoch": 0.39577187807276304, "grad_norm": 10.75, "learning_rate": 8.837053279137785e-06, "loss": 0.2878, "step": 805 }, { "epoch": 0.39626352015732547, "grad_norm": 25.125, "learning_rate": 8.83640221584204e-06, "loss": 0.6579, "step": 806 }, { "epoch": 0.3967551622418879, "grad_norm": 16.875, "learning_rate": 8.835749878537169e-06, "loss": 0.3822, "step": 807 }, { "epoch": 0.3972468043264503, "grad_norm": 18.5, "learning_rate": 8.835096267414824e-06, "loss": 0.4214, "step": 808 }, { "epoch": 0.3977384464110128, "grad_norm": 16.625, "learning_rate": 8.834441382667034e-06, "loss": 0.4118, "step": 809 }, { "epoch": 0.39823008849557523, "grad_norm": 23.375, "learning_rate": 8.8337852244862e-06, "loss": 0.5251, "step": 810 }, { "epoch": 0.39872173058013766, "grad_norm": 15.75, "learning_rate": 8.833127793065098e-06, "loss": 0.351, "step": 811 }, { "epoch": 0.3992133726647001, "grad_norm": 21.25, "learning_rate": 8.832469088596874e-06, "loss": 0.4311, "step": 812 }, { "epoch": 0.3997050147492625, "grad_norm": 17.375, "learning_rate": 8.831809111275054e-06, "loss": 0.3214, "step": 813 }, { "epoch": 0.400196656833825, "grad_norm": 21.625, "learning_rate": 8.831147861293536e-06, "loss": 0.4541, "step": 814 }, { "epoch": 0.4006882989183874, "grad_norm": 15.0625, "learning_rate": 8.830485338846591e-06, "loss": 0.4201, "step": 815 }, { "epoch": 0.40117994100294985, "grad_norm": 19.75, "learning_rate": 8.829821544128862e-06, "loss": 0.3156, "step": 816 }, { "epoch": 0.4016715830875123, "grad_norm": 12.1875, "learning_rate": 8.82915647733537e-06, "loss": 0.2988, "step": 817 }, { "epoch": 0.40216322517207476, "grad_norm": 21.0, "learning_rate": 8.828490138661507e-06, "loss": 0.4617, "step": 818 }, { "epoch": 0.4026548672566372, "grad_norm": 35.0, "learning_rate": 8.82782252830304e-06, "loss": 0.4618, "step": 819 }, { "epoch": 0.4031465093411996, "grad_norm": 18.875, "learning_rate": 8.827153646456108e-06, "loss": 0.4417, "step": 820 }, { "epoch": 0.40363815142576204, "grad_norm": 19.75, "learning_rate": 8.826483493317226e-06, "loss": 0.3573, "step": 821 }, { "epoch": 0.40412979351032446, "grad_norm": 28.0, "learning_rate": 8.82581206908328e-06, "loss": 0.4799, "step": 822 }, { "epoch": 0.40462143559488695, "grad_norm": 16.25, "learning_rate": 8.825139373951529e-06, "loss": 0.3768, "step": 823 }, { "epoch": 0.4051130776794494, "grad_norm": 14.375, "learning_rate": 8.82446540811961e-06, "loss": 0.248, "step": 824 }, { "epoch": 0.4056047197640118, "grad_norm": 13.75, "learning_rate": 8.823790171785527e-06, "loss": 0.3281, "step": 825 }, { "epoch": 0.4060963618485742, "grad_norm": 28.0, "learning_rate": 8.823113665147663e-06, "loss": 0.5017, "step": 826 }, { "epoch": 0.40658800393313665, "grad_norm": 15.9375, "learning_rate": 8.822435888404768e-06, "loss": 0.5173, "step": 827 }, { "epoch": 0.40707964601769914, "grad_norm": 26.125, "learning_rate": 8.821756841755973e-06, "loss": 0.3778, "step": 828 }, { "epoch": 0.40757128810226156, "grad_norm": 22.875, "learning_rate": 8.821076525400775e-06, "loss": 0.5581, "step": 829 }, { "epoch": 0.408062930186824, "grad_norm": 9.5, "learning_rate": 8.820394939539048e-06, "loss": 0.3814, "step": 830 }, { "epoch": 0.4085545722713864, "grad_norm": 17.875, "learning_rate": 8.819712084371038e-06, "loss": 0.4409, "step": 831 }, { "epoch": 0.40904621435594885, "grad_norm": 23.25, "learning_rate": 8.819027960097363e-06, "loss": 0.1829, "step": 832 }, { "epoch": 0.4095378564405113, "grad_norm": 18.5, "learning_rate": 8.818342566919016e-06, "loss": 0.3292, "step": 833 }, { "epoch": 0.41002949852507375, "grad_norm": 27.25, "learning_rate": 8.81765590503736e-06, "loss": 0.3763, "step": 834 }, { "epoch": 0.4105211406096362, "grad_norm": 14.9375, "learning_rate": 8.816967974654134e-06, "loss": 0.275, "step": 835 }, { "epoch": 0.4110127826941986, "grad_norm": 19.375, "learning_rate": 8.816278775971446e-06, "loss": 0.4523, "step": 836 }, { "epoch": 0.41150442477876104, "grad_norm": 45.0, "learning_rate": 8.815588309191779e-06, "loss": 0.6085, "step": 837 }, { "epoch": 0.4119960668633235, "grad_norm": 35.0, "learning_rate": 8.814896574517987e-06, "loss": 0.5339, "step": 838 }, { "epoch": 0.41248770894788594, "grad_norm": 10.125, "learning_rate": 8.8142035721533e-06, "loss": 0.3334, "step": 839 }, { "epoch": 0.41297935103244837, "grad_norm": 17.875, "learning_rate": 8.813509302301316e-06, "loss": 0.4542, "step": 840 }, { "epoch": 0.4134709931170108, "grad_norm": 30.125, "learning_rate": 8.812813765166009e-06, "loss": 0.6947, "step": 841 }, { "epoch": 0.4139626352015733, "grad_norm": 10.9375, "learning_rate": 8.812116960951722e-06, "loss": 0.2033, "step": 842 }, { "epoch": 0.4144542772861357, "grad_norm": 10.3125, "learning_rate": 8.811418889863171e-06, "loss": 0.2526, "step": 843 }, { "epoch": 0.41494591937069814, "grad_norm": 11.0625, "learning_rate": 8.810719552105447e-06, "loss": 0.3224, "step": 844 }, { "epoch": 0.41543756145526056, "grad_norm": 16.375, "learning_rate": 8.810018947884013e-06, "loss": 0.2511, "step": 845 }, { "epoch": 0.415929203539823, "grad_norm": 12.125, "learning_rate": 8.809317077404698e-06, "loss": 0.2896, "step": 846 }, { "epoch": 0.41642084562438547, "grad_norm": 17.375, "learning_rate": 8.808613940873711e-06, "loss": 0.4622, "step": 847 }, { "epoch": 0.4169124877089479, "grad_norm": 11.0625, "learning_rate": 8.807909538497629e-06, "loss": 0.2735, "step": 848 }, { "epoch": 0.4174041297935103, "grad_norm": 33.5, "learning_rate": 8.807203870483397e-06, "loss": 0.4967, "step": 849 }, { "epoch": 0.41789577187807275, "grad_norm": 20.875, "learning_rate": 8.806496937038341e-06, "loss": 0.4869, "step": 850 }, { "epoch": 0.4183874139626352, "grad_norm": 24.5, "learning_rate": 8.805788738370153e-06, "loss": 0.6283, "step": 851 }, { "epoch": 0.41887905604719766, "grad_norm": 22.125, "learning_rate": 8.805079274686897e-06, "loss": 0.4135, "step": 852 }, { "epoch": 0.4193706981317601, "grad_norm": 42.5, "learning_rate": 8.804368546197006e-06, "loss": 0.4093, "step": 853 }, { "epoch": 0.4198623402163225, "grad_norm": 29.0, "learning_rate": 8.803656553109292e-06, "loss": 0.49, "step": 854 }, { "epoch": 0.42035398230088494, "grad_norm": 24.75, "learning_rate": 8.802943295632934e-06, "loss": 0.4655, "step": 855 }, { "epoch": 0.42084562438544737, "grad_norm": 40.0, "learning_rate": 8.80222877397748e-06, "loss": 0.5918, "step": 856 }, { "epoch": 0.42133726647000985, "grad_norm": 42.5, "learning_rate": 8.801512988352856e-06, "loss": 0.5381, "step": 857 }, { "epoch": 0.4218289085545723, "grad_norm": 18.0, "learning_rate": 8.800795938969351e-06, "loss": 0.342, "step": 858 }, { "epoch": 0.4223205506391347, "grad_norm": 22.125, "learning_rate": 8.800077626037634e-06, "loss": 0.2332, "step": 859 }, { "epoch": 0.42281219272369713, "grad_norm": 12.3125, "learning_rate": 8.799358049768738e-06, "loss": 0.2412, "step": 860 }, { "epoch": 0.42330383480825956, "grad_norm": 24.625, "learning_rate": 8.798637210374073e-06, "loss": 0.5653, "step": 861 }, { "epoch": 0.42379547689282204, "grad_norm": 27.5, "learning_rate": 8.797915108065414e-06, "loss": 0.5805, "step": 862 }, { "epoch": 0.42428711897738447, "grad_norm": 33.25, "learning_rate": 8.797191743054914e-06, "loss": 0.5839, "step": 863 }, { "epoch": 0.4247787610619469, "grad_norm": 9.4375, "learning_rate": 8.796467115555092e-06, "loss": 0.1943, "step": 864 }, { "epoch": 0.4252704031465093, "grad_norm": 12.9375, "learning_rate": 8.795741225778838e-06, "loss": 0.4107, "step": 865 }, { "epoch": 0.4257620452310718, "grad_norm": 24.375, "learning_rate": 8.795014073939415e-06, "loss": 0.5667, "step": 866 }, { "epoch": 0.42625368731563423, "grad_norm": 13.1875, "learning_rate": 8.794285660250457e-06, "loss": 0.3172, "step": 867 }, { "epoch": 0.42674532940019666, "grad_norm": 16.0, "learning_rate": 8.793555984925964e-06, "loss": 0.2198, "step": 868 }, { "epoch": 0.4272369714847591, "grad_norm": 33.5, "learning_rate": 8.792825048180313e-06, "loss": 0.6299, "step": 869 }, { "epoch": 0.4277286135693215, "grad_norm": 11.3125, "learning_rate": 8.79209285022825e-06, "loss": 0.2748, "step": 870 }, { "epoch": 0.428220255653884, "grad_norm": 22.625, "learning_rate": 8.791359391284888e-06, "loss": 0.618, "step": 871 }, { "epoch": 0.4287118977384464, "grad_norm": 15.6875, "learning_rate": 8.790624671565714e-06, "loss": 0.377, "step": 872 }, { "epoch": 0.42920353982300885, "grad_norm": 9.25, "learning_rate": 8.789888691286583e-06, "loss": 0.4154, "step": 873 }, { "epoch": 0.4296951819075713, "grad_norm": 28.875, "learning_rate": 8.789151450663723e-06, "loss": 0.5125, "step": 874 }, { "epoch": 0.4301868239921337, "grad_norm": 22.25, "learning_rate": 8.788412949913732e-06, "loss": 0.4332, "step": 875 }, { "epoch": 0.4306784660766962, "grad_norm": 24.125, "learning_rate": 8.787673189253576e-06, "loss": 0.4686, "step": 876 }, { "epoch": 0.4311701081612586, "grad_norm": 17.0, "learning_rate": 8.78693216890059e-06, "loss": 0.3515, "step": 877 }, { "epoch": 0.43166175024582104, "grad_norm": 18.0, "learning_rate": 8.786189889072484e-06, "loss": 0.3473, "step": 878 }, { "epoch": 0.43215339233038347, "grad_norm": 25.375, "learning_rate": 8.785446349987336e-06, "loss": 0.44, "step": 879 }, { "epoch": 0.4326450344149459, "grad_norm": 9.8125, "learning_rate": 8.78470155186359e-06, "loss": 0.4124, "step": 880 }, { "epoch": 0.4331366764995084, "grad_norm": 20.5, "learning_rate": 8.783955494920066e-06, "loss": 0.403, "step": 881 }, { "epoch": 0.4336283185840708, "grad_norm": 17.5, "learning_rate": 8.783208179375952e-06, "loss": 0.2924, "step": 882 }, { "epoch": 0.43411996066863323, "grad_norm": 19.75, "learning_rate": 8.782459605450801e-06, "loss": 0.2638, "step": 883 }, { "epoch": 0.43461160275319566, "grad_norm": 23.0, "learning_rate": 8.781709773364543e-06, "loss": 0.5233, "step": 884 }, { "epoch": 0.4351032448377581, "grad_norm": 15.375, "learning_rate": 8.780958683337474e-06, "loss": 0.3128, "step": 885 }, { "epoch": 0.43559488692232057, "grad_norm": 21.625, "learning_rate": 8.780206335590255e-06, "loss": 0.4487, "step": 886 }, { "epoch": 0.436086529006883, "grad_norm": 25.375, "learning_rate": 8.77945273034393e-06, "loss": 0.3602, "step": 887 }, { "epoch": 0.4365781710914454, "grad_norm": 11.3125, "learning_rate": 8.778697867819894e-06, "loss": 0.4487, "step": 888 }, { "epoch": 0.43706981317600785, "grad_norm": 31.25, "learning_rate": 8.777941748239929e-06, "loss": 0.5726, "step": 889 }, { "epoch": 0.43756145526057033, "grad_norm": 11.0625, "learning_rate": 8.777184371826174e-06, "loss": 0.3289, "step": 890 }, { "epoch": 0.43805309734513276, "grad_norm": 22.125, "learning_rate": 8.776425738801145e-06, "loss": 0.3766, "step": 891 }, { "epoch": 0.4385447394296952, "grad_norm": 9.125, "learning_rate": 8.77566584938772e-06, "loss": 0.3012, "step": 892 }, { "epoch": 0.4390363815142576, "grad_norm": 18.375, "learning_rate": 8.774904703809153e-06, "loss": 0.4552, "step": 893 }, { "epoch": 0.43952802359882004, "grad_norm": 17.375, "learning_rate": 8.774142302289062e-06, "loss": 0.4152, "step": 894 }, { "epoch": 0.4400196656833825, "grad_norm": 38.25, "learning_rate": 8.773378645051437e-06, "loss": 0.6087, "step": 895 }, { "epoch": 0.44051130776794495, "grad_norm": 14.375, "learning_rate": 8.772613732320636e-06, "loss": 0.3283, "step": 896 }, { "epoch": 0.4410029498525074, "grad_norm": 16.5, "learning_rate": 8.771847564321386e-06, "loss": 0.3991, "step": 897 }, { "epoch": 0.4414945919370698, "grad_norm": 18.5, "learning_rate": 8.771080141278782e-06, "loss": 0.3288, "step": 898 }, { "epoch": 0.44198623402163223, "grad_norm": 9.0, "learning_rate": 8.770311463418288e-06, "loss": 0.3945, "step": 899 }, { "epoch": 0.4424778761061947, "grad_norm": 8.375, "learning_rate": 8.769541530965739e-06, "loss": 0.267, "step": 900 }, { "epoch": 0.44296951819075714, "grad_norm": 11.5, "learning_rate": 8.768770344147333e-06, "loss": 0.1419, "step": 901 }, { "epoch": 0.44346116027531957, "grad_norm": 19.375, "learning_rate": 8.767997903189644e-06, "loss": 0.3843, "step": 902 }, { "epoch": 0.443952802359882, "grad_norm": 10.8125, "learning_rate": 8.767224208319609e-06, "loss": 0.3331, "step": 903 }, { "epoch": 0.4444444444444444, "grad_norm": 42.25, "learning_rate": 8.766449259764532e-06, "loss": 0.8028, "step": 904 }, { "epoch": 0.4449360865290069, "grad_norm": 29.125, "learning_rate": 8.765673057752093e-06, "loss": 0.5279, "step": 905 }, { "epoch": 0.44542772861356933, "grad_norm": 12.75, "learning_rate": 8.764895602510334e-06, "loss": 0.4498, "step": 906 }, { "epoch": 0.44591937069813176, "grad_norm": 10.9375, "learning_rate": 8.764116894267665e-06, "loss": 0.3318, "step": 907 }, { "epoch": 0.4464110127826942, "grad_norm": 11.6875, "learning_rate": 8.763336933252866e-06, "loss": 0.3873, "step": 908 }, { "epoch": 0.4469026548672566, "grad_norm": 25.5, "learning_rate": 8.76255571969509e-06, "loss": 0.179, "step": 909 }, { "epoch": 0.4473942969518191, "grad_norm": 11.5625, "learning_rate": 8.761773253823845e-06, "loss": 0.4251, "step": 910 }, { "epoch": 0.4478859390363815, "grad_norm": 19.25, "learning_rate": 8.760989535869019e-06, "loss": 0.3722, "step": 911 }, { "epoch": 0.44837758112094395, "grad_norm": 20.625, "learning_rate": 8.760204566060864e-06, "loss": 0.3188, "step": 912 }, { "epoch": 0.4488692232055064, "grad_norm": 9.875, "learning_rate": 8.75941834463e-06, "loss": 0.3532, "step": 913 }, { "epoch": 0.44936086529006886, "grad_norm": 24.625, "learning_rate": 8.75863087180741e-06, "loss": 0.5576, "step": 914 }, { "epoch": 0.4498525073746313, "grad_norm": 18.75, "learning_rate": 8.757842147824455e-06, "loss": 0.5631, "step": 915 }, { "epoch": 0.4503441494591937, "grad_norm": 14.9375, "learning_rate": 8.757052172912852e-06, "loss": 0.2439, "step": 916 }, { "epoch": 0.45083579154375614, "grad_norm": 11.5, "learning_rate": 8.756260947304692e-06, "loss": 0.3662, "step": 917 }, { "epoch": 0.45132743362831856, "grad_norm": 13.9375, "learning_rate": 8.755468471232437e-06, "loss": 0.4174, "step": 918 }, { "epoch": 0.45181907571288105, "grad_norm": 20.375, "learning_rate": 8.754674744928906e-06, "loss": 0.5069, "step": 919 }, { "epoch": 0.4523107177974435, "grad_norm": 18.875, "learning_rate": 8.753879768627294e-06, "loss": 0.5232, "step": 920 }, { "epoch": 0.4528023598820059, "grad_norm": 17.75, "learning_rate": 8.753083542561158e-06, "loss": 0.4585, "step": 921 }, { "epoch": 0.4532940019665683, "grad_norm": 22.0, "learning_rate": 8.752286066964427e-06, "loss": 0.4782, "step": 922 }, { "epoch": 0.45378564405113075, "grad_norm": 18.375, "learning_rate": 8.751487342071393e-06, "loss": 0.3997, "step": 923 }, { "epoch": 0.45427728613569324, "grad_norm": 15.75, "learning_rate": 8.750687368116718e-06, "loss": 0.3499, "step": 924 }, { "epoch": 0.45476892822025566, "grad_norm": 19.875, "learning_rate": 8.749886145335428e-06, "loss": 0.3287, "step": 925 }, { "epoch": 0.4552605703048181, "grad_norm": 13.0625, "learning_rate": 8.74908367396292e-06, "loss": 0.3795, "step": 926 }, { "epoch": 0.4557522123893805, "grad_norm": 19.375, "learning_rate": 8.748279954234951e-06, "loss": 0.2454, "step": 927 }, { "epoch": 0.45624385447394294, "grad_norm": 17.0, "learning_rate": 8.747474986387654e-06, "loss": 0.4028, "step": 928 }, { "epoch": 0.4567354965585054, "grad_norm": 25.375, "learning_rate": 8.74666877065752e-06, "loss": 0.371, "step": 929 }, { "epoch": 0.45722713864306785, "grad_norm": 14.125, "learning_rate": 8.745861307281414e-06, "loss": 0.218, "step": 930 }, { "epoch": 0.4577187807276303, "grad_norm": 15.4375, "learning_rate": 8.745052596496559e-06, "loss": 0.25, "step": 931 }, { "epoch": 0.4582104228121927, "grad_norm": 26.625, "learning_rate": 8.744242638540553e-06, "loss": 0.512, "step": 932 }, { "epoch": 0.45870206489675514, "grad_norm": 18.0, "learning_rate": 8.743431433651356e-06, "loss": 0.4481, "step": 933 }, { "epoch": 0.4591937069813176, "grad_norm": 24.25, "learning_rate": 8.742618982067295e-06, "loss": 0.49, "step": 934 }, { "epoch": 0.45968534906588004, "grad_norm": 34.75, "learning_rate": 8.741805284027063e-06, "loss": 0.4715, "step": 935 }, { "epoch": 0.46017699115044247, "grad_norm": 20.25, "learning_rate": 8.74099033976972e-06, "loss": 0.3679, "step": 936 }, { "epoch": 0.4606686332350049, "grad_norm": 17.375, "learning_rate": 8.740174149534692e-06, "loss": 0.3906, "step": 937 }, { "epoch": 0.4611602753195674, "grad_norm": 40.0, "learning_rate": 8.739356713561772e-06, "loss": 0.6867, "step": 938 }, { "epoch": 0.4616519174041298, "grad_norm": 36.75, "learning_rate": 8.738538032091116e-06, "loss": 0.507, "step": 939 }, { "epoch": 0.46214355948869223, "grad_norm": 13.75, "learning_rate": 8.737718105363247e-06, "loss": 0.4745, "step": 940 }, { "epoch": 0.46263520157325466, "grad_norm": 14.625, "learning_rate": 8.736896933619057e-06, "loss": 0.3212, "step": 941 }, { "epoch": 0.4631268436578171, "grad_norm": 13.875, "learning_rate": 8.7360745170998e-06, "loss": 0.4458, "step": 942 }, { "epoch": 0.46361848574237957, "grad_norm": 30.375, "learning_rate": 8.735250856047098e-06, "loss": 0.5667, "step": 943 }, { "epoch": 0.464110127826942, "grad_norm": 18.5, "learning_rate": 8.734425950702937e-06, "loss": 0.2935, "step": 944 }, { "epoch": 0.4646017699115044, "grad_norm": 13.9375, "learning_rate": 8.733599801309668e-06, "loss": 0.3618, "step": 945 }, { "epoch": 0.46509341199606685, "grad_norm": 10.25, "learning_rate": 8.73277240811001e-06, "loss": 0.2739, "step": 946 }, { "epoch": 0.4655850540806293, "grad_norm": 16.125, "learning_rate": 8.731943771347048e-06, "loss": 0.4328, "step": 947 }, { "epoch": 0.46607669616519176, "grad_norm": 9.8125, "learning_rate": 8.731113891264228e-06, "loss": 0.3345, "step": 948 }, { "epoch": 0.4665683382497542, "grad_norm": 16.75, "learning_rate": 8.730282768105364e-06, "loss": 0.5275, "step": 949 }, { "epoch": 0.4670599803343166, "grad_norm": 21.75, "learning_rate": 8.729450402114636e-06, "loss": 0.2409, "step": 950 }, { "epoch": 0.46755162241887904, "grad_norm": 18.0, "learning_rate": 8.728616793536588e-06, "loss": 0.4433, "step": 951 }, { "epoch": 0.46804326450344147, "grad_norm": 11.25, "learning_rate": 8.727781942616129e-06, "loss": 0.2894, "step": 952 }, { "epoch": 0.46853490658800395, "grad_norm": 11.375, "learning_rate": 8.726945849598533e-06, "loss": 0.2847, "step": 953 }, { "epoch": 0.4690265486725664, "grad_norm": 12.6875, "learning_rate": 8.72610851472944e-06, "loss": 0.3182, "step": 954 }, { "epoch": 0.4695181907571288, "grad_norm": 13.375, "learning_rate": 8.725269938254851e-06, "loss": 0.2859, "step": 955 }, { "epoch": 0.47000983284169123, "grad_norm": 32.5, "learning_rate": 8.72443012042114e-06, "loss": 0.6676, "step": 956 }, { "epoch": 0.47050147492625366, "grad_norm": 12.875, "learning_rate": 8.723589061475038e-06, "loss": 0.2865, "step": 957 }, { "epoch": 0.47099311701081614, "grad_norm": 23.125, "learning_rate": 8.722746761663642e-06, "loss": 0.454, "step": 958 }, { "epoch": 0.47148475909537857, "grad_norm": 5.15625, "learning_rate": 8.721903221234415e-06, "loss": 0.2105, "step": 959 }, { "epoch": 0.471976401179941, "grad_norm": 29.25, "learning_rate": 8.721058440435185e-06, "loss": 0.5564, "step": 960 }, { "epoch": 0.4724680432645034, "grad_norm": 21.375, "learning_rate": 8.720212419514142e-06, "loss": 0.335, "step": 961 }, { "epoch": 0.4729596853490659, "grad_norm": 23.125, "learning_rate": 8.719365158719843e-06, "loss": 0.5736, "step": 962 }, { "epoch": 0.47345132743362833, "grad_norm": 18.5, "learning_rate": 8.71851665830121e-06, "loss": 0.4499, "step": 963 }, { "epoch": 0.47394296951819076, "grad_norm": 12.1875, "learning_rate": 8.717666918507524e-06, "loss": 0.3315, "step": 964 }, { "epoch": 0.4744346116027532, "grad_norm": 12.5625, "learning_rate": 8.716815939588436e-06, "loss": 0.2806, "step": 965 }, { "epoch": 0.4749262536873156, "grad_norm": 20.125, "learning_rate": 8.715963721793957e-06, "loss": 0.5184, "step": 966 }, { "epoch": 0.4754178957718781, "grad_norm": 9.75, "learning_rate": 8.715110265374465e-06, "loss": 0.2, "step": 967 }, { "epoch": 0.4759095378564405, "grad_norm": 23.125, "learning_rate": 8.714255570580698e-06, "loss": 0.3335, "step": 968 }, { "epoch": 0.47640117994100295, "grad_norm": 26.875, "learning_rate": 8.713399637663763e-06, "loss": 0.3075, "step": 969 }, { "epoch": 0.4768928220255654, "grad_norm": 16.75, "learning_rate": 8.712542466875126e-06, "loss": 0.322, "step": 970 }, { "epoch": 0.4773844641101278, "grad_norm": 28.875, "learning_rate": 8.71168405846662e-06, "loss": 0.3385, "step": 971 }, { "epoch": 0.4778761061946903, "grad_norm": 26.25, "learning_rate": 8.710824412690439e-06, "loss": 0.4517, "step": 972 }, { "epoch": 0.4783677482792527, "grad_norm": 17.0, "learning_rate": 8.709963529799144e-06, "loss": 0.5031, "step": 973 }, { "epoch": 0.47885939036381514, "grad_norm": 16.75, "learning_rate": 8.709101410045653e-06, "loss": 0.3644, "step": 974 }, { "epoch": 0.47935103244837757, "grad_norm": 30.75, "learning_rate": 8.708238053683257e-06, "loss": 0.3831, "step": 975 }, { "epoch": 0.47984267453294, "grad_norm": 21.625, "learning_rate": 8.707373460965601e-06, "loss": 0.5165, "step": 976 }, { "epoch": 0.4803343166175025, "grad_norm": 17.625, "learning_rate": 8.706507632146699e-06, "loss": 0.3963, "step": 977 }, { "epoch": 0.4808259587020649, "grad_norm": 16.75, "learning_rate": 8.705640567480927e-06, "loss": 0.2847, "step": 978 }, { "epoch": 0.48131760078662733, "grad_norm": 8.4375, "learning_rate": 8.70477226722302e-06, "loss": 0.2501, "step": 979 }, { "epoch": 0.48180924287118976, "grad_norm": 21.5, "learning_rate": 8.703902731628083e-06, "loss": 0.3422, "step": 980 }, { "epoch": 0.4823008849557522, "grad_norm": 24.875, "learning_rate": 8.70303196095158e-06, "loss": 0.3618, "step": 981 }, { "epoch": 0.48279252704031467, "grad_norm": 12.4375, "learning_rate": 8.702159955449337e-06, "loss": 0.2708, "step": 982 }, { "epoch": 0.4832841691248771, "grad_norm": 13.4375, "learning_rate": 8.701286715377541e-06, "loss": 0.3902, "step": 983 }, { "epoch": 0.4837758112094395, "grad_norm": 37.0, "learning_rate": 8.700412240992752e-06, "loss": 0.5065, "step": 984 }, { "epoch": 0.48426745329400195, "grad_norm": 13.375, "learning_rate": 8.699536532551881e-06, "loss": 0.3633, "step": 985 }, { "epoch": 0.48475909537856443, "grad_norm": 11.3125, "learning_rate": 8.698659590312204e-06, "loss": 0.2576, "step": 986 }, { "epoch": 0.48525073746312686, "grad_norm": 9.375, "learning_rate": 8.697781414531366e-06, "loss": 0.2463, "step": 987 }, { "epoch": 0.4857423795476893, "grad_norm": 15.875, "learning_rate": 8.696902005467369e-06, "loss": 0.454, "step": 988 }, { "epoch": 0.4862340216322517, "grad_norm": 29.125, "learning_rate": 8.696021363378575e-06, "loss": 0.5569, "step": 989 }, { "epoch": 0.48672566371681414, "grad_norm": 15.0, "learning_rate": 8.695139488523712e-06, "loss": 0.3973, "step": 990 }, { "epoch": 0.4872173058013766, "grad_norm": 24.875, "learning_rate": 8.694256381161872e-06, "loss": 0.4268, "step": 991 }, { "epoch": 0.48770894788593905, "grad_norm": 31.375, "learning_rate": 8.693372041552506e-06, "loss": 0.576, "step": 992 }, { "epoch": 0.4882005899705015, "grad_norm": 20.125, "learning_rate": 8.692486469955426e-06, "loss": 0.3065, "step": 993 }, { "epoch": 0.4886922320550639, "grad_norm": 11.5, "learning_rate": 8.691599666630809e-06, "loss": 0.2295, "step": 994 }, { "epoch": 0.48918387413962633, "grad_norm": 23.75, "learning_rate": 8.690711631839192e-06, "loss": 0.3, "step": 995 }, { "epoch": 0.4896755162241888, "grad_norm": 42.0, "learning_rate": 8.689822365841475e-06, "loss": 0.4658, "step": 996 }, { "epoch": 0.49016715830875124, "grad_norm": 35.5, "learning_rate": 8.688931868898919e-06, "loss": 0.6493, "step": 997 }, { "epoch": 0.49065880039331367, "grad_norm": 32.25, "learning_rate": 8.688040141273146e-06, "loss": 0.3524, "step": 998 }, { "epoch": 0.4911504424778761, "grad_norm": 18.625, "learning_rate": 8.68714718322614e-06, "loss": 0.3678, "step": 999 }, { "epoch": 0.4916420845624385, "grad_norm": 23.25, "learning_rate": 8.68625299502025e-06, "loss": 0.5166, "step": 1000 }, { "epoch": 0.4916420845624385, "eval_loss": 0.3955594003200531, "eval_runtime": 66.7454, "eval_samples_per_second": 121.896, "eval_spearman": 0.5029305294814612, "eval_steps_per_second": 15.237, "step": 1000 }, { "epoch": 0.492133726647001, "grad_norm": 40.25, "learning_rate": 8.685357576918178e-06, "loss": 0.5181, "step": 1001 }, { "epoch": 0.49262536873156343, "grad_norm": 20.75, "learning_rate": 8.684460929182995e-06, "loss": 0.369, "step": 1002 }, { "epoch": 0.49311701081612586, "grad_norm": 16.5, "learning_rate": 8.683563052078133e-06, "loss": 0.194, "step": 1003 }, { "epoch": 0.4936086529006883, "grad_norm": 30.75, "learning_rate": 8.682663945867379e-06, "loss": 0.4807, "step": 1004 }, { "epoch": 0.4941002949852507, "grad_norm": 14.0625, "learning_rate": 8.681763610814887e-06, "loss": 0.3463, "step": 1005 }, { "epoch": 0.4945919370698132, "grad_norm": 23.0, "learning_rate": 8.680862047185171e-06, "loss": 0.3472, "step": 1006 }, { "epoch": 0.4950835791543756, "grad_norm": 33.0, "learning_rate": 8.679959255243102e-06, "loss": 0.5166, "step": 1007 }, { "epoch": 0.49557522123893805, "grad_norm": 29.375, "learning_rate": 8.679055235253918e-06, "loss": 0.4549, "step": 1008 }, { "epoch": 0.4960668633235005, "grad_norm": 19.5, "learning_rate": 8.678149987483212e-06, "loss": 0.3237, "step": 1009 }, { "epoch": 0.49655850540806296, "grad_norm": 24.5, "learning_rate": 8.677243512196944e-06, "loss": 0.528, "step": 1010 }, { "epoch": 0.4970501474926254, "grad_norm": 13.75, "learning_rate": 8.676335809661428e-06, "loss": 0.1138, "step": 1011 }, { "epoch": 0.4975417895771878, "grad_norm": 16.125, "learning_rate": 8.675426880143343e-06, "loss": 0.3922, "step": 1012 }, { "epoch": 0.49803343166175024, "grad_norm": 23.375, "learning_rate": 8.674516723909726e-06, "loss": 0.4357, "step": 1013 }, { "epoch": 0.49852507374631266, "grad_norm": 26.125, "learning_rate": 8.673605341227976e-06, "loss": 0.4935, "step": 1014 }, { "epoch": 0.49901671583087515, "grad_norm": 30.75, "learning_rate": 8.672692732365852e-06, "loss": 0.6766, "step": 1015 }, { "epoch": 0.4995083579154376, "grad_norm": 23.125, "learning_rate": 8.671778897591472e-06, "loss": 0.343, "step": 1016 }, { "epoch": 0.5, "grad_norm": 15.625, "learning_rate": 8.670863837173317e-06, "loss": 0.2756, "step": 1017 }, { "epoch": 0.5004916420845624, "grad_norm": 14.5, "learning_rate": 8.669947551380225e-06, "loss": 0.3148, "step": 1018 }, { "epoch": 0.5009832841691249, "grad_norm": 21.625, "learning_rate": 8.669030040481397e-06, "loss": 0.2609, "step": 1019 }, { "epoch": 0.5014749262536873, "grad_norm": 30.125, "learning_rate": 8.668111304746389e-06, "loss": 0.4614, "step": 1020 }, { "epoch": 0.5019665683382497, "grad_norm": 10.625, "learning_rate": 8.667191344445124e-06, "loss": 0.3423, "step": 1021 }, { "epoch": 0.5024582104228122, "grad_norm": 14.625, "learning_rate": 8.666270159847877e-06, "loss": 0.4071, "step": 1022 }, { "epoch": 0.5029498525073747, "grad_norm": 13.0625, "learning_rate": 8.66534775122529e-06, "loss": 0.2443, "step": 1023 }, { "epoch": 0.5034414945919371, "grad_norm": 18.875, "learning_rate": 8.664424118848359e-06, "loss": 0.3637, "step": 1024 }, { "epoch": 0.5039331366764995, "grad_norm": 37.75, "learning_rate": 8.663499262988443e-06, "loss": 0.7491, "step": 1025 }, { "epoch": 0.504424778761062, "grad_norm": 17.375, "learning_rate": 8.662573183917257e-06, "loss": 0.3872, "step": 1026 }, { "epoch": 0.5049164208456244, "grad_norm": 26.875, "learning_rate": 8.66164588190688e-06, "loss": 0.4482, "step": 1027 }, { "epoch": 0.5054080629301868, "grad_norm": 11.6875, "learning_rate": 8.660717357229745e-06, "loss": 0.2587, "step": 1028 }, { "epoch": 0.5058997050147492, "grad_norm": 19.125, "learning_rate": 8.659787610158651e-06, "loss": 0.4464, "step": 1029 }, { "epoch": 0.5063913470993117, "grad_norm": 7.40625, "learning_rate": 8.658856640966749e-06, "loss": 0.2719, "step": 1030 }, { "epoch": 0.5068829891838741, "grad_norm": 10.375, "learning_rate": 8.657924449927553e-06, "loss": 0.3688, "step": 1031 }, { "epoch": 0.5073746312684366, "grad_norm": 17.875, "learning_rate": 8.656991037314936e-06, "loss": 0.3467, "step": 1032 }, { "epoch": 0.507866273352999, "grad_norm": 22.125, "learning_rate": 8.656056403403127e-06, "loss": 0.4839, "step": 1033 }, { "epoch": 0.5083579154375615, "grad_norm": 11.875, "learning_rate": 8.655120548466718e-06, "loss": 0.3198, "step": 1034 }, { "epoch": 0.5088495575221239, "grad_norm": 10.0, "learning_rate": 8.654183472780656e-06, "loss": 0.4114, "step": 1035 }, { "epoch": 0.5093411996066863, "grad_norm": 15.5625, "learning_rate": 8.65324517662025e-06, "loss": 0.2587, "step": 1036 }, { "epoch": 0.5098328416912488, "grad_norm": 32.75, "learning_rate": 8.652305660261165e-06, "loss": 0.4496, "step": 1037 }, { "epoch": 0.5103244837758112, "grad_norm": 20.125, "learning_rate": 8.651364923979424e-06, "loss": 0.4604, "step": 1038 }, { "epoch": 0.5108161258603736, "grad_norm": 12.1875, "learning_rate": 8.65042296805141e-06, "loss": 0.2601, "step": 1039 }, { "epoch": 0.511307767944936, "grad_norm": 17.5, "learning_rate": 8.649479792753868e-06, "loss": 0.4262, "step": 1040 }, { "epoch": 0.5117994100294986, "grad_norm": 18.375, "learning_rate": 8.648535398363892e-06, "loss": 0.3482, "step": 1041 }, { "epoch": 0.512291052114061, "grad_norm": 52.25, "learning_rate": 8.647589785158942e-06, "loss": 0.4595, "step": 1042 }, { "epoch": 0.5127826941986234, "grad_norm": 12.8125, "learning_rate": 8.646642953416834e-06, "loss": 0.3269, "step": 1043 }, { "epoch": 0.5132743362831859, "grad_norm": 26.875, "learning_rate": 8.64569490341574e-06, "loss": 0.3122, "step": 1044 }, { "epoch": 0.5137659783677483, "grad_norm": 16.875, "learning_rate": 8.644745635434193e-06, "loss": 0.3752, "step": 1045 }, { "epoch": 0.5142576204523107, "grad_norm": 20.0, "learning_rate": 8.643795149751081e-06, "loss": 0.511, "step": 1046 }, { "epoch": 0.5147492625368731, "grad_norm": 18.375, "learning_rate": 8.64284344664565e-06, "loss": 0.3394, "step": 1047 }, { "epoch": 0.5152409046214356, "grad_norm": 15.625, "learning_rate": 8.641890526397509e-06, "loss": 0.4881, "step": 1048 }, { "epoch": 0.515732546705998, "grad_norm": 16.25, "learning_rate": 8.640936389286616e-06, "loss": 0.3897, "step": 1049 }, { "epoch": 0.5162241887905604, "grad_norm": 13.1875, "learning_rate": 8.639981035593292e-06, "loss": 0.3543, "step": 1050 }, { "epoch": 0.516715830875123, "grad_norm": 18.0, "learning_rate": 8.639024465598214e-06, "loss": 0.4465, "step": 1051 }, { "epoch": 0.5172074729596854, "grad_norm": 11.25, "learning_rate": 8.638066679582418e-06, "loss": 0.3084, "step": 1052 }, { "epoch": 0.5176991150442478, "grad_norm": 15.1875, "learning_rate": 8.637107677827294e-06, "loss": 0.4102, "step": 1053 }, { "epoch": 0.5181907571288102, "grad_norm": 18.75, "learning_rate": 8.636147460614593e-06, "loss": 0.3312, "step": 1054 }, { "epoch": 0.5186823992133727, "grad_norm": 20.25, "learning_rate": 8.63518602822642e-06, "loss": 0.3985, "step": 1055 }, { "epoch": 0.5191740412979351, "grad_norm": 23.25, "learning_rate": 8.634223380945235e-06, "loss": 0.6175, "step": 1056 }, { "epoch": 0.5196656833824975, "grad_norm": 19.25, "learning_rate": 8.633259519053862e-06, "loss": 0.4745, "step": 1057 }, { "epoch": 0.52015732546706, "grad_norm": 11.5625, "learning_rate": 8.632294442835478e-06, "loss": 0.279, "step": 1058 }, { "epoch": 0.5206489675516224, "grad_norm": 21.0, "learning_rate": 8.631328152573617e-06, "loss": 0.3964, "step": 1059 }, { "epoch": 0.5211406096361848, "grad_norm": 20.875, "learning_rate": 8.630360648552167e-06, "loss": 0.2231, "step": 1060 }, { "epoch": 0.5216322517207473, "grad_norm": 12.0, "learning_rate": 8.629391931055376e-06, "loss": 0.389, "step": 1061 }, { "epoch": 0.5221238938053098, "grad_norm": 12.0, "learning_rate": 8.628422000367845e-06, "loss": 0.3665, "step": 1062 }, { "epoch": 0.5226155358898722, "grad_norm": 21.125, "learning_rate": 8.62745085677454e-06, "loss": 0.2665, "step": 1063 }, { "epoch": 0.5231071779744346, "grad_norm": 24.875, "learning_rate": 8.62647850056077e-06, "loss": 0.4388, "step": 1064 }, { "epoch": 0.523598820058997, "grad_norm": 28.875, "learning_rate": 8.625504932012212e-06, "loss": 0.5759, "step": 1065 }, { "epoch": 0.5240904621435595, "grad_norm": 22.875, "learning_rate": 8.624530151414894e-06, "loss": 0.3891, "step": 1066 }, { "epoch": 0.5245821042281219, "grad_norm": 22.875, "learning_rate": 8.623554159055199e-06, "loss": 0.5017, "step": 1067 }, { "epoch": 0.5250737463126843, "grad_norm": 18.625, "learning_rate": 8.622576955219868e-06, "loss": 0.4339, "step": 1068 }, { "epoch": 0.5255653883972468, "grad_norm": 9.375, "learning_rate": 8.621598540196e-06, "loss": 0.391, "step": 1069 }, { "epoch": 0.5260570304818093, "grad_norm": 13.0625, "learning_rate": 8.620618914271045e-06, "loss": 0.3621, "step": 1070 }, { "epoch": 0.5265486725663717, "grad_norm": 22.75, "learning_rate": 8.619638077732815e-06, "loss": 0.4684, "step": 1071 }, { "epoch": 0.5270403146509341, "grad_norm": 19.375, "learning_rate": 8.61865603086947e-06, "loss": 0.3158, "step": 1072 }, { "epoch": 0.5275319567354966, "grad_norm": 15.25, "learning_rate": 8.617672773969529e-06, "loss": 0.4137, "step": 1073 }, { "epoch": 0.528023598820059, "grad_norm": 14.3125, "learning_rate": 8.61668830732187e-06, "loss": 0.4235, "step": 1074 }, { "epoch": 0.5285152409046214, "grad_norm": 21.5, "learning_rate": 8.615702631215722e-06, "loss": 0.633, "step": 1075 }, { "epoch": 0.5290068829891839, "grad_norm": 24.75, "learning_rate": 8.614715745940671e-06, "loss": 0.5514, "step": 1076 }, { "epoch": 0.5294985250737463, "grad_norm": 18.0, "learning_rate": 8.613727651786658e-06, "loss": 0.26, "step": 1077 }, { "epoch": 0.5299901671583087, "grad_norm": 11.375, "learning_rate": 8.612738349043979e-06, "loss": 0.2137, "step": 1078 }, { "epoch": 0.5304818092428711, "grad_norm": 8.3125, "learning_rate": 8.611747838003286e-06, "loss": 0.3868, "step": 1079 }, { "epoch": 0.5309734513274337, "grad_norm": 10.1875, "learning_rate": 8.610756118955583e-06, "loss": 0.4213, "step": 1080 }, { "epoch": 0.5314650934119961, "grad_norm": 25.5, "learning_rate": 8.609763192192234e-06, "loss": 0.4407, "step": 1081 }, { "epoch": 0.5319567354965585, "grad_norm": 22.875, "learning_rate": 8.608769058004955e-06, "loss": 0.4724, "step": 1082 }, { "epoch": 0.532448377581121, "grad_norm": 17.0, "learning_rate": 8.607773716685814e-06, "loss": 0.389, "step": 1083 }, { "epoch": 0.5329400196656834, "grad_norm": 30.375, "learning_rate": 8.606777168527236e-06, "loss": 0.5533, "step": 1084 }, { "epoch": 0.5334316617502458, "grad_norm": 9.0625, "learning_rate": 8.605779413822006e-06, "loss": 0.3381, "step": 1085 }, { "epoch": 0.5339233038348082, "grad_norm": 28.625, "learning_rate": 8.604780452863253e-06, "loss": 0.4857, "step": 1086 }, { "epoch": 0.5344149459193707, "grad_norm": 13.9375, "learning_rate": 8.603780285944468e-06, "loss": 0.4429, "step": 1087 }, { "epoch": 0.5349065880039331, "grad_norm": 20.75, "learning_rate": 8.602778913359494e-06, "loss": 0.4497, "step": 1088 }, { "epoch": 0.5353982300884956, "grad_norm": 13.5625, "learning_rate": 8.60177633540253e-06, "loss": 0.3314, "step": 1089 }, { "epoch": 0.5358898721730581, "grad_norm": 10.5, "learning_rate": 8.600772552368124e-06, "loss": 0.281, "step": 1090 }, { "epoch": 0.5363815142576205, "grad_norm": 13.875, "learning_rate": 8.599767564551183e-06, "loss": 0.2632, "step": 1091 }, { "epoch": 0.5368731563421829, "grad_norm": 25.375, "learning_rate": 8.598761372246968e-06, "loss": 0.3939, "step": 1092 }, { "epoch": 0.5373647984267453, "grad_norm": 16.25, "learning_rate": 8.59775397575109e-06, "loss": 0.395, "step": 1093 }, { "epoch": 0.5378564405113078, "grad_norm": 13.4375, "learning_rate": 8.596745375359517e-06, "loss": 0.3572, "step": 1094 }, { "epoch": 0.5383480825958702, "grad_norm": 23.625, "learning_rate": 8.595735571368569e-06, "loss": 0.3749, "step": 1095 }, { "epoch": 0.5388397246804326, "grad_norm": 19.0, "learning_rate": 8.594724564074922e-06, "loss": 0.4259, "step": 1096 }, { "epoch": 0.539331366764995, "grad_norm": 18.875, "learning_rate": 8.593712353775602e-06, "loss": 0.3634, "step": 1097 }, { "epoch": 0.5398230088495575, "grad_norm": 12.1875, "learning_rate": 8.592698940767991e-06, "loss": 0.3144, "step": 1098 }, { "epoch": 0.54031465093412, "grad_norm": 27.625, "learning_rate": 8.591684325349825e-06, "loss": 0.401, "step": 1099 }, { "epoch": 0.5408062930186824, "grad_norm": 10.3125, "learning_rate": 8.590668507819193e-06, "loss": 0.3506, "step": 1100 }, { "epoch": 0.5412979351032449, "grad_norm": 22.0, "learning_rate": 8.589651488474532e-06, "loss": 0.4775, "step": 1101 }, { "epoch": 0.5417895771878073, "grad_norm": 14.9375, "learning_rate": 8.588633267614638e-06, "loss": 0.2814, "step": 1102 }, { "epoch": 0.5422812192723697, "grad_norm": 29.5, "learning_rate": 8.58761384553866e-06, "loss": 0.3722, "step": 1103 }, { "epoch": 0.5427728613569321, "grad_norm": 20.375, "learning_rate": 8.586593222546097e-06, "loss": 0.5573, "step": 1104 }, { "epoch": 0.5432645034414946, "grad_norm": 15.1875, "learning_rate": 8.585571398936801e-06, "loss": 0.2512, "step": 1105 }, { "epoch": 0.543756145526057, "grad_norm": 28.5, "learning_rate": 8.584548375010978e-06, "loss": 0.6807, "step": 1106 }, { "epoch": 0.5442477876106194, "grad_norm": 26.75, "learning_rate": 8.583524151069187e-06, "loss": 0.4601, "step": 1107 }, { "epoch": 0.5447394296951819, "grad_norm": 16.75, "learning_rate": 8.58249872741234e-06, "loss": 0.3016, "step": 1108 }, { "epoch": 0.5452310717797444, "grad_norm": 25.75, "learning_rate": 8.581472104341697e-06, "loss": 0.4843, "step": 1109 }, { "epoch": 0.5457227138643068, "grad_norm": 12.0625, "learning_rate": 8.580444282158877e-06, "loss": 0.2682, "step": 1110 }, { "epoch": 0.5462143559488692, "grad_norm": 22.375, "learning_rate": 8.579415261165845e-06, "loss": 0.3431, "step": 1111 }, { "epoch": 0.5467059980334317, "grad_norm": 29.5, "learning_rate": 8.578385041664925e-06, "loss": 0.5613, "step": 1112 }, { "epoch": 0.5471976401179941, "grad_norm": 15.4375, "learning_rate": 8.577353623958786e-06, "loss": 0.3804, "step": 1113 }, { "epoch": 0.5476892822025565, "grad_norm": 27.0, "learning_rate": 8.576321008350456e-06, "loss": 0.4878, "step": 1114 }, { "epoch": 0.548180924287119, "grad_norm": 24.125, "learning_rate": 8.575287195143306e-06, "loss": 0.5742, "step": 1115 }, { "epoch": 0.5486725663716814, "grad_norm": 19.5, "learning_rate": 8.574252184641069e-06, "loss": 0.3436, "step": 1116 }, { "epoch": 0.5491642084562438, "grad_norm": 16.125, "learning_rate": 8.573215977147824e-06, "loss": 0.2134, "step": 1117 }, { "epoch": 0.5496558505408063, "grad_norm": 9.0, "learning_rate": 8.572178572967999e-06, "loss": 0.3408, "step": 1118 }, { "epoch": 0.5501474926253688, "grad_norm": 28.25, "learning_rate": 8.571139972406382e-06, "loss": 0.5507, "step": 1119 }, { "epoch": 0.5506391347099312, "grad_norm": 22.0, "learning_rate": 8.570100175768103e-06, "loss": 0.4557, "step": 1120 }, { "epoch": 0.5511307767944936, "grad_norm": 13.75, "learning_rate": 8.569059183358654e-06, "loss": 0.4321, "step": 1121 }, { "epoch": 0.551622418879056, "grad_norm": 16.375, "learning_rate": 8.568016995483867e-06, "loss": 0.3659, "step": 1122 }, { "epoch": 0.5521140609636185, "grad_norm": 19.875, "learning_rate": 8.566973612449933e-06, "loss": 0.2232, "step": 1123 }, { "epoch": 0.5526057030481809, "grad_norm": 15.1875, "learning_rate": 8.565929034563391e-06, "loss": 0.362, "step": 1124 }, { "epoch": 0.5530973451327433, "grad_norm": 25.125, "learning_rate": 8.564883262131132e-06, "loss": 0.2915, "step": 1125 }, { "epoch": 0.5535889872173058, "grad_norm": 13.3125, "learning_rate": 8.563836295460397e-06, "loss": 0.3099, "step": 1126 }, { "epoch": 0.5540806293018682, "grad_norm": 11.8125, "learning_rate": 8.56278813485878e-06, "loss": 0.2619, "step": 1127 }, { "epoch": 0.5545722713864307, "grad_norm": 11.875, "learning_rate": 8.561738780634225e-06, "loss": 0.3733, "step": 1128 }, { "epoch": 0.5550639134709932, "grad_norm": 17.375, "learning_rate": 8.560688233095022e-06, "loss": 0.3396, "step": 1129 }, { "epoch": 0.5555555555555556, "grad_norm": 19.0, "learning_rate": 8.55963649254982e-06, "loss": 0.4423, "step": 1130 }, { "epoch": 0.556047197640118, "grad_norm": 21.375, "learning_rate": 8.558583559307612e-06, "loss": 0.3086, "step": 1131 }, { "epoch": 0.5565388397246804, "grad_norm": 13.1875, "learning_rate": 8.557529433677743e-06, "loss": 0.3271, "step": 1132 }, { "epoch": 0.5570304818092429, "grad_norm": 25.75, "learning_rate": 8.55647411596991e-06, "loss": 0.2797, "step": 1133 }, { "epoch": 0.5575221238938053, "grad_norm": 17.0, "learning_rate": 8.55541760649416e-06, "loss": 0.4194, "step": 1134 }, { "epoch": 0.5580137659783677, "grad_norm": 15.25, "learning_rate": 8.554359905560886e-06, "loss": 0.4004, "step": 1135 }, { "epoch": 0.5585054080629301, "grad_norm": 34.0, "learning_rate": 8.553301013480837e-06, "loss": 0.7511, "step": 1136 }, { "epoch": 0.5589970501474927, "grad_norm": 13.6875, "learning_rate": 8.552240930565109e-06, "loss": 0.3024, "step": 1137 }, { "epoch": 0.5594886922320551, "grad_norm": 31.0, "learning_rate": 8.551179657125146e-06, "loss": 0.494, "step": 1138 }, { "epoch": 0.5599803343166175, "grad_norm": 19.75, "learning_rate": 8.550117193472747e-06, "loss": 0.2394, "step": 1139 }, { "epoch": 0.56047197640118, "grad_norm": 39.0, "learning_rate": 8.549053539920055e-06, "loss": 0.5055, "step": 1140 }, { "epoch": 0.5609636184857424, "grad_norm": 10.875, "learning_rate": 8.547988696779564e-06, "loss": 0.327, "step": 1141 }, { "epoch": 0.5614552605703048, "grad_norm": 8.6875, "learning_rate": 8.546922664364122e-06, "loss": 0.272, "step": 1142 }, { "epoch": 0.5619469026548672, "grad_norm": 17.625, "learning_rate": 8.545855442986921e-06, "loss": 0.3302, "step": 1143 }, { "epoch": 0.5624385447394297, "grad_norm": 9.8125, "learning_rate": 8.544787032961506e-06, "loss": 0.3099, "step": 1144 }, { "epoch": 0.5629301868239921, "grad_norm": 21.625, "learning_rate": 8.543717434601766e-06, "loss": 0.5219, "step": 1145 }, { "epoch": 0.5634218289085545, "grad_norm": 11.625, "learning_rate": 8.542646648221944e-06, "loss": 0.3637, "step": 1146 }, { "epoch": 0.5639134709931171, "grad_norm": 19.5, "learning_rate": 8.541574674136633e-06, "loss": 0.4742, "step": 1147 }, { "epoch": 0.5644051130776795, "grad_norm": 19.125, "learning_rate": 8.54050151266077e-06, "loss": 0.4039, "step": 1148 }, { "epoch": 0.5648967551622419, "grad_norm": 9.75, "learning_rate": 8.539427164109646e-06, "loss": 0.3118, "step": 1149 }, { "epoch": 0.5653883972468043, "grad_norm": 24.75, "learning_rate": 8.538351628798895e-06, "loss": 0.6302, "step": 1150 }, { "epoch": 0.5658800393313668, "grad_norm": 18.375, "learning_rate": 8.537274907044506e-06, "loss": 0.3087, "step": 1151 }, { "epoch": 0.5663716814159292, "grad_norm": 19.25, "learning_rate": 8.53619699916281e-06, "loss": 0.4667, "step": 1152 }, { "epoch": 0.5668633235004916, "grad_norm": 24.375, "learning_rate": 8.535117905470493e-06, "loss": 0.3971, "step": 1153 }, { "epoch": 0.567354965585054, "grad_norm": 22.0, "learning_rate": 8.534037626284583e-06, "loss": 0.5883, "step": 1154 }, { "epoch": 0.5678466076696165, "grad_norm": 11.9375, "learning_rate": 8.532956161922465e-06, "loss": 0.3126, "step": 1155 }, { "epoch": 0.5683382497541789, "grad_norm": 11.9375, "learning_rate": 8.53187351270186e-06, "loss": 0.4706, "step": 1156 }, { "epoch": 0.5688298918387414, "grad_norm": 24.25, "learning_rate": 8.53078967894085e-06, "loss": 0.4553, "step": 1157 }, { "epoch": 0.5693215339233039, "grad_norm": 22.5, "learning_rate": 8.529704660957854e-06, "loss": 0.4703, "step": 1158 }, { "epoch": 0.5698131760078663, "grad_norm": 30.875, "learning_rate": 8.528618459071648e-06, "loss": 0.3668, "step": 1159 }, { "epoch": 0.5703048180924287, "grad_norm": 32.75, "learning_rate": 8.52753107360135e-06, "loss": 0.4685, "step": 1160 }, { "epoch": 0.5707964601769911, "grad_norm": 28.25, "learning_rate": 8.526442504866426e-06, "loss": 0.3487, "step": 1161 }, { "epoch": 0.5712881022615536, "grad_norm": 16.375, "learning_rate": 8.525352753186693e-06, "loss": 0.3049, "step": 1162 }, { "epoch": 0.571779744346116, "grad_norm": 17.75, "learning_rate": 8.52426181888231e-06, "loss": 0.3953, "step": 1163 }, { "epoch": 0.5722713864306784, "grad_norm": 13.25, "learning_rate": 8.523169702273792e-06, "loss": 0.2443, "step": 1164 }, { "epoch": 0.5727630285152409, "grad_norm": 29.125, "learning_rate": 8.522076403681993e-06, "loss": 0.5021, "step": 1165 }, { "epoch": 0.5732546705998034, "grad_norm": 26.375, "learning_rate": 8.520981923428121e-06, "loss": 0.2443, "step": 1166 }, { "epoch": 0.5737463126843658, "grad_norm": 38.25, "learning_rate": 8.519886261833721e-06, "loss": 0.5083, "step": 1167 }, { "epoch": 0.5742379547689282, "grad_norm": 20.5, "learning_rate": 8.518789419220699e-06, "loss": 0.5091, "step": 1168 }, { "epoch": 0.5747295968534907, "grad_norm": 22.0, "learning_rate": 8.517691395911299e-06, "loss": 0.4257, "step": 1169 }, { "epoch": 0.5752212389380531, "grad_norm": 18.875, "learning_rate": 8.51659219222811e-06, "loss": 0.4683, "step": 1170 }, { "epoch": 0.5757128810226155, "grad_norm": 11.5, "learning_rate": 8.515491808494077e-06, "loss": 0.3517, "step": 1171 }, { "epoch": 0.576204523107178, "grad_norm": 40.75, "learning_rate": 8.514390245032481e-06, "loss": 0.8657, "step": 1172 }, { "epoch": 0.5766961651917404, "grad_norm": 12.3125, "learning_rate": 8.513287502166959e-06, "loss": 0.3609, "step": 1173 }, { "epoch": 0.5771878072763028, "grad_norm": 17.625, "learning_rate": 8.512183580221487e-06, "loss": 0.3896, "step": 1174 }, { "epoch": 0.5776794493608652, "grad_norm": 18.875, "learning_rate": 8.511078479520392e-06, "loss": 0.4704, "step": 1175 }, { "epoch": 0.5781710914454278, "grad_norm": 31.75, "learning_rate": 8.509972200388347e-06, "loss": 0.5268, "step": 1176 }, { "epoch": 0.5786627335299902, "grad_norm": 21.25, "learning_rate": 8.50886474315037e-06, "loss": 0.346, "step": 1177 }, { "epoch": 0.5791543756145526, "grad_norm": 13.375, "learning_rate": 8.507756108131823e-06, "loss": 0.3722, "step": 1178 }, { "epoch": 0.5796460176991151, "grad_norm": 22.75, "learning_rate": 8.50664629565842e-06, "loss": 0.5183, "step": 1179 }, { "epoch": 0.5801376597836775, "grad_norm": 19.875, "learning_rate": 8.505535306056215e-06, "loss": 0.425, "step": 1180 }, { "epoch": 0.5806293018682399, "grad_norm": 27.125, "learning_rate": 8.50442313965161e-06, "loss": 0.2604, "step": 1181 }, { "epoch": 0.5811209439528023, "grad_norm": 19.875, "learning_rate": 8.503309796771352e-06, "loss": 0.5261, "step": 1182 }, { "epoch": 0.5816125860373648, "grad_norm": 13.0, "learning_rate": 8.502195277742538e-06, "loss": 0.3857, "step": 1183 }, { "epoch": 0.5821042281219272, "grad_norm": 19.5, "learning_rate": 8.501079582892602e-06, "loss": 0.4798, "step": 1184 }, { "epoch": 0.5825958702064897, "grad_norm": 16.375, "learning_rate": 8.499962712549334e-06, "loss": 0.299, "step": 1185 }, { "epoch": 0.5830875122910522, "grad_norm": 10.125, "learning_rate": 8.498844667040861e-06, "loss": 0.2415, "step": 1186 }, { "epoch": 0.5835791543756146, "grad_norm": 18.5, "learning_rate": 8.497725446695657e-06, "loss": 0.3257, "step": 1187 }, { "epoch": 0.584070796460177, "grad_norm": 13.3125, "learning_rate": 8.496605051842546e-06, "loss": 0.3414, "step": 1188 }, { "epoch": 0.5845624385447394, "grad_norm": 31.625, "learning_rate": 8.495483482810688e-06, "loss": 0.5701, "step": 1189 }, { "epoch": 0.5850540806293019, "grad_norm": 19.25, "learning_rate": 8.494360739929599e-06, "loss": 0.4284, "step": 1190 }, { "epoch": 0.5855457227138643, "grad_norm": 13.875, "learning_rate": 8.49323682352913e-06, "loss": 0.3429, "step": 1191 }, { "epoch": 0.5860373647984267, "grad_norm": 29.125, "learning_rate": 8.492111733939483e-06, "loss": 0.5036, "step": 1192 }, { "epoch": 0.5865290068829891, "grad_norm": 39.25, "learning_rate": 8.490985471491201e-06, "loss": 0.5243, "step": 1193 }, { "epoch": 0.5870206489675516, "grad_norm": 21.0, "learning_rate": 8.489858036515174e-06, "loss": 0.5142, "step": 1194 }, { "epoch": 0.5875122910521141, "grad_norm": 18.375, "learning_rate": 8.488729429342637e-06, "loss": 0.2858, "step": 1195 }, { "epoch": 0.5880039331366765, "grad_norm": 14.0, "learning_rate": 8.487599650305165e-06, "loss": 0.2603, "step": 1196 }, { "epoch": 0.588495575221239, "grad_norm": 15.25, "learning_rate": 8.486468699734683e-06, "loss": 0.3363, "step": 1197 }, { "epoch": 0.5889872173058014, "grad_norm": 29.125, "learning_rate": 8.485336577963458e-06, "loss": 0.388, "step": 1198 }, { "epoch": 0.5894788593903638, "grad_norm": 15.1875, "learning_rate": 8.4842032853241e-06, "loss": 0.5228, "step": 1199 }, { "epoch": 0.5899705014749262, "grad_norm": 16.625, "learning_rate": 8.48306882214956e-06, "loss": 0.3259, "step": 1200 }, { "epoch": 0.5904621435594887, "grad_norm": 25.375, "learning_rate": 8.481933188773142e-06, "loss": 0.4185, "step": 1201 }, { "epoch": 0.5909537856440511, "grad_norm": 27.75, "learning_rate": 8.480796385528485e-06, "loss": 0.5441, "step": 1202 }, { "epoch": 0.5914454277286135, "grad_norm": 11.5625, "learning_rate": 8.479658412749575e-06, "loss": 0.3358, "step": 1203 }, { "epoch": 0.591937069813176, "grad_norm": 26.125, "learning_rate": 8.478519270770745e-06, "loss": 0.312, "step": 1204 }, { "epoch": 0.5924287118977385, "grad_norm": 19.125, "learning_rate": 8.477378959926663e-06, "loss": 0.4613, "step": 1205 }, { "epoch": 0.5929203539823009, "grad_norm": 14.3125, "learning_rate": 8.47623748055235e-06, "loss": 0.3501, "step": 1206 }, { "epoch": 0.5934119960668633, "grad_norm": 12.8125, "learning_rate": 8.475094832983162e-06, "loss": 0.3287, "step": 1207 }, { "epoch": 0.5939036381514258, "grad_norm": 20.5, "learning_rate": 8.473951017554805e-06, "loss": 0.4508, "step": 1208 }, { "epoch": 0.5943952802359882, "grad_norm": 18.125, "learning_rate": 8.472806034603326e-06, "loss": 0.2376, "step": 1209 }, { "epoch": 0.5948869223205506, "grad_norm": 9.25, "learning_rate": 8.47165988446511e-06, "loss": 0.1868, "step": 1210 }, { "epoch": 0.595378564405113, "grad_norm": 13.6875, "learning_rate": 8.470512567476893e-06, "loss": 0.4029, "step": 1211 }, { "epoch": 0.5958702064896755, "grad_norm": 12.125, "learning_rate": 8.469364083975746e-06, "loss": 0.3181, "step": 1212 }, { "epoch": 0.5963618485742379, "grad_norm": 18.875, "learning_rate": 8.468214434299091e-06, "loss": 0.4861, "step": 1213 }, { "epoch": 0.5968534906588004, "grad_norm": 17.375, "learning_rate": 8.467063618784686e-06, "loss": 0.5162, "step": 1214 }, { "epoch": 0.5973451327433629, "grad_norm": 19.625, "learning_rate": 8.465911637770633e-06, "loss": 0.248, "step": 1215 }, { "epoch": 0.5978367748279253, "grad_norm": 13.5625, "learning_rate": 8.46475849159538e-06, "loss": 0.2834, "step": 1216 }, { "epoch": 0.5983284169124877, "grad_norm": 10.8125, "learning_rate": 8.463604180597711e-06, "loss": 0.1545, "step": 1217 }, { "epoch": 0.5988200589970502, "grad_norm": 27.125, "learning_rate": 8.462448705116756e-06, "loss": 0.3624, "step": 1218 }, { "epoch": 0.5993117010816126, "grad_norm": 31.5, "learning_rate": 8.461292065491992e-06, "loss": 0.6003, "step": 1219 }, { "epoch": 0.599803343166175, "grad_norm": 28.0, "learning_rate": 8.460134262063226e-06, "loss": 0.4462, "step": 1220 }, { "epoch": 0.6002949852507374, "grad_norm": 13.125, "learning_rate": 8.458975295170616e-06, "loss": 0.3543, "step": 1221 }, { "epoch": 0.6007866273352999, "grad_norm": 20.75, "learning_rate": 8.457815165154662e-06, "loss": 0.3416, "step": 1222 }, { "epoch": 0.6012782694198623, "grad_norm": 10.375, "learning_rate": 8.4566538723562e-06, "loss": 0.2214, "step": 1223 }, { "epoch": 0.6017699115044248, "grad_norm": 30.0, "learning_rate": 8.455491417116412e-06, "loss": 0.4839, "step": 1224 }, { "epoch": 0.6022615535889873, "grad_norm": 10.125, "learning_rate": 8.454327799776823e-06, "loss": 0.1952, "step": 1225 }, { "epoch": 0.6027531956735497, "grad_norm": 19.0, "learning_rate": 8.453163020679292e-06, "loss": 0.3279, "step": 1226 }, { "epoch": 0.6032448377581121, "grad_norm": 13.25, "learning_rate": 8.451997080166029e-06, "loss": 0.3227, "step": 1227 }, { "epoch": 0.6037364798426745, "grad_norm": 11.4375, "learning_rate": 8.450829978579576e-06, "loss": 0.3005, "step": 1228 }, { "epoch": 0.604228121927237, "grad_norm": 25.125, "learning_rate": 8.449661716262825e-06, "loss": 0.3184, "step": 1229 }, { "epoch": 0.6047197640117994, "grad_norm": 18.125, "learning_rate": 8.448492293559e-06, "loss": 0.1739, "step": 1230 }, { "epoch": 0.6052114060963618, "grad_norm": 19.375, "learning_rate": 8.447321710811674e-06, "loss": 0.3429, "step": 1231 }, { "epoch": 0.6057030481809242, "grad_norm": 9.125, "learning_rate": 8.446149968364755e-06, "loss": 0.3269, "step": 1232 }, { "epoch": 0.6061946902654868, "grad_norm": 18.75, "learning_rate": 8.444977066562497e-06, "loss": 0.3165, "step": 1233 }, { "epoch": 0.6066863323500492, "grad_norm": 41.0, "learning_rate": 8.443803005749487e-06, "loss": 0.4546, "step": 1234 }, { "epoch": 0.6071779744346116, "grad_norm": 28.0, "learning_rate": 8.44262778627066e-06, "loss": 0.4905, "step": 1235 }, { "epoch": 0.6076696165191741, "grad_norm": 40.25, "learning_rate": 8.441451408471286e-06, "loss": 0.6515, "step": 1236 }, { "epoch": 0.6081612586037365, "grad_norm": 41.0, "learning_rate": 8.440273872696982e-06, "loss": 0.5691, "step": 1237 }, { "epoch": 0.6086529006882989, "grad_norm": 29.0, "learning_rate": 8.439095179293698e-06, "loss": 0.6287, "step": 1238 }, { "epoch": 0.6091445427728613, "grad_norm": 21.875, "learning_rate": 8.437915328607728e-06, "loss": 0.5201, "step": 1239 }, { "epoch": 0.6096361848574238, "grad_norm": 10.875, "learning_rate": 8.436734320985705e-06, "loss": 0.2256, "step": 1240 }, { "epoch": 0.6101278269419862, "grad_norm": 20.625, "learning_rate": 8.4355521567746e-06, "loss": 0.2985, "step": 1241 }, { "epoch": 0.6106194690265486, "grad_norm": 22.375, "learning_rate": 8.43436883632173e-06, "loss": 0.2839, "step": 1242 }, { "epoch": 0.6111111111111112, "grad_norm": 22.25, "learning_rate": 8.433184359974742e-06, "loss": 0.4193, "step": 1243 }, { "epoch": 0.6116027531956736, "grad_norm": 37.25, "learning_rate": 8.431998728081635e-06, "loss": 0.4873, "step": 1244 }, { "epoch": 0.612094395280236, "grad_norm": 41.25, "learning_rate": 8.430811940990735e-06, "loss": 0.3088, "step": 1245 }, { "epoch": 0.6125860373647984, "grad_norm": 19.5, "learning_rate": 8.429623999050715e-06, "loss": 0.3769, "step": 1246 }, { "epoch": 0.6130776794493609, "grad_norm": 15.375, "learning_rate": 8.428434902610584e-06, "loss": 0.3589, "step": 1247 }, { "epoch": 0.6135693215339233, "grad_norm": 24.25, "learning_rate": 8.427244652019695e-06, "loss": 0.2924, "step": 1248 }, { "epoch": 0.6140609636184857, "grad_norm": 30.625, "learning_rate": 8.426053247627731e-06, "loss": 0.3928, "step": 1249 }, { "epoch": 0.6145526057030481, "grad_norm": 26.5, "learning_rate": 8.424860689784725e-06, "loss": 0.4171, "step": 1250 }, { "epoch": 0.6150442477876106, "grad_norm": 33.75, "learning_rate": 8.423666978841039e-06, "loss": 0.5988, "step": 1251 }, { "epoch": 0.615535889872173, "grad_norm": 14.4375, "learning_rate": 8.422472115147381e-06, "loss": 0.2543, "step": 1252 }, { "epoch": 0.6160275319567355, "grad_norm": 19.0, "learning_rate": 8.421276099054794e-06, "loss": 0.2867, "step": 1253 }, { "epoch": 0.616519174041298, "grad_norm": 12.0625, "learning_rate": 8.42007893091466e-06, "loss": 0.3232, "step": 1254 }, { "epoch": 0.6170108161258604, "grad_norm": 19.75, "learning_rate": 8.4188806110787e-06, "loss": 0.2261, "step": 1255 }, { "epoch": 0.6175024582104228, "grad_norm": 12.9375, "learning_rate": 8.417681139898974e-06, "loss": 0.2883, "step": 1256 }, { "epoch": 0.6179941002949852, "grad_norm": 12.875, "learning_rate": 8.416480517727878e-06, "loss": 0.3329, "step": 1257 }, { "epoch": 0.6184857423795477, "grad_norm": 22.125, "learning_rate": 8.415278744918149e-06, "loss": 0.4808, "step": 1258 }, { "epoch": 0.6189773844641101, "grad_norm": 26.0, "learning_rate": 8.414075821822861e-06, "loss": 0.4612, "step": 1259 }, { "epoch": 0.6194690265486725, "grad_norm": 29.375, "learning_rate": 8.412871748795424e-06, "loss": 0.4548, "step": 1260 }, { "epoch": 0.619960668633235, "grad_norm": 6.65625, "learning_rate": 8.411666526189587e-06, "loss": 0.0439, "step": 1261 }, { "epoch": 0.6204523107177975, "grad_norm": 28.0, "learning_rate": 8.41046015435944e-06, "loss": 0.3997, "step": 1262 }, { "epoch": 0.6209439528023599, "grad_norm": 37.75, "learning_rate": 8.409252633659406e-06, "loss": 0.8187, "step": 1263 }, { "epoch": 0.6214355948869223, "grad_norm": 33.25, "learning_rate": 8.408043964444249e-06, "loss": 0.5343, "step": 1264 }, { "epoch": 0.6219272369714848, "grad_norm": 17.5, "learning_rate": 8.406834147069067e-06, "loss": 0.4208, "step": 1265 }, { "epoch": 0.6224188790560472, "grad_norm": 34.25, "learning_rate": 8.405623181889298e-06, "loss": 0.4619, "step": 1266 }, { "epoch": 0.6229105211406096, "grad_norm": 27.125, "learning_rate": 8.404411069260714e-06, "loss": 0.5964, "step": 1267 }, { "epoch": 0.6234021632251721, "grad_norm": 17.375, "learning_rate": 8.40319780953943e-06, "loss": 0.4445, "step": 1268 }, { "epoch": 0.6238938053097345, "grad_norm": 15.0, "learning_rate": 8.401983403081894e-06, "loss": 0.4083, "step": 1269 }, { "epoch": 0.6243854473942969, "grad_norm": 15.4375, "learning_rate": 8.400767850244888e-06, "loss": 0.3147, "step": 1270 }, { "epoch": 0.6248770894788593, "grad_norm": 22.5, "learning_rate": 8.399551151385538e-06, "loss": 0.3381, "step": 1271 }, { "epoch": 0.6253687315634219, "grad_norm": 20.625, "learning_rate": 8.398333306861303e-06, "loss": 0.4128, "step": 1272 }, { "epoch": 0.6258603736479843, "grad_norm": 10.625, "learning_rate": 8.397114317029975e-06, "loss": 0.2604, "step": 1273 }, { "epoch": 0.6263520157325467, "grad_norm": 26.75, "learning_rate": 8.395894182249688e-06, "loss": 0.5602, "step": 1274 }, { "epoch": 0.6268436578171092, "grad_norm": 28.25, "learning_rate": 8.39467290287891e-06, "loss": 0.2762, "step": 1275 }, { "epoch": 0.6273352999016716, "grad_norm": 21.625, "learning_rate": 8.393450479276446e-06, "loss": 0.283, "step": 1276 }, { "epoch": 0.627826941986234, "grad_norm": 16.75, "learning_rate": 8.392226911801437e-06, "loss": 0.2449, "step": 1277 }, { "epoch": 0.6283185840707964, "grad_norm": 40.5, "learning_rate": 8.391002200813358e-06, "loss": 0.3655, "step": 1278 }, { "epoch": 0.6288102261553589, "grad_norm": 33.5, "learning_rate": 8.389776346672025e-06, "loss": 0.5791, "step": 1279 }, { "epoch": 0.6293018682399213, "grad_norm": 16.875, "learning_rate": 8.388549349737583e-06, "loss": 0.2464, "step": 1280 }, { "epoch": 0.6297935103244838, "grad_norm": 27.0, "learning_rate": 8.387321210370518e-06, "loss": 0.3376, "step": 1281 }, { "epoch": 0.6302851524090463, "grad_norm": 19.25, "learning_rate": 8.386091928931653e-06, "loss": 0.4367, "step": 1282 }, { "epoch": 0.6307767944936087, "grad_norm": 28.0, "learning_rate": 8.38486150578214e-06, "loss": 0.4355, "step": 1283 }, { "epoch": 0.6312684365781711, "grad_norm": 14.875, "learning_rate": 8.383629941283469e-06, "loss": 0.3659, "step": 1284 }, { "epoch": 0.6317600786627335, "grad_norm": 23.75, "learning_rate": 8.382397235797469e-06, "loss": 0.5102, "step": 1285 }, { "epoch": 0.632251720747296, "grad_norm": 41.75, "learning_rate": 8.3811633896863e-06, "loss": 0.5849, "step": 1286 }, { "epoch": 0.6327433628318584, "grad_norm": 18.875, "learning_rate": 8.37992840331246e-06, "loss": 0.4529, "step": 1287 }, { "epoch": 0.6332350049164208, "grad_norm": 8.375, "learning_rate": 8.37869227703878e-06, "loss": 0.2897, "step": 1288 }, { "epoch": 0.6337266470009832, "grad_norm": 9.0625, "learning_rate": 8.377455011228427e-06, "loss": 0.3267, "step": 1289 }, { "epoch": 0.6342182890855457, "grad_norm": 15.9375, "learning_rate": 8.3762166062449e-06, "loss": 0.4462, "step": 1290 }, { "epoch": 0.6347099311701082, "grad_norm": 14.375, "learning_rate": 8.374977062452038e-06, "loss": 0.2469, "step": 1291 }, { "epoch": 0.6352015732546706, "grad_norm": 28.375, "learning_rate": 8.373736380214009e-06, "loss": 0.34, "step": 1292 }, { "epoch": 0.6356932153392331, "grad_norm": 21.125, "learning_rate": 8.37249455989532e-06, "loss": 0.4989, "step": 1293 }, { "epoch": 0.6361848574237955, "grad_norm": 15.1875, "learning_rate": 8.371251601860812e-06, "loss": 0.1927, "step": 1294 }, { "epoch": 0.6366764995083579, "grad_norm": 18.375, "learning_rate": 8.370007506475653e-06, "loss": 0.4592, "step": 1295 }, { "epoch": 0.6371681415929203, "grad_norm": 16.25, "learning_rate": 8.368762274105357e-06, "loss": 0.3665, "step": 1296 }, { "epoch": 0.6376597836774828, "grad_norm": 23.625, "learning_rate": 8.367515905115762e-06, "loss": 0.4544, "step": 1297 }, { "epoch": 0.6381514257620452, "grad_norm": 8.0625, "learning_rate": 8.366268399873044e-06, "loss": 0.3632, "step": 1298 }, { "epoch": 0.6386430678466076, "grad_norm": 9.1875, "learning_rate": 8.365019758743714e-06, "loss": 0.2823, "step": 1299 }, { "epoch": 0.63913470993117, "grad_norm": 23.0, "learning_rate": 8.363769982094615e-06, "loss": 0.4341, "step": 1300 }, { "epoch": 0.6396263520157326, "grad_norm": 10.1875, "learning_rate": 8.362519070292923e-06, "loss": 0.2671, "step": 1301 }, { "epoch": 0.640117994100295, "grad_norm": 18.125, "learning_rate": 8.36126702370615e-06, "loss": 0.441, "step": 1302 }, { "epoch": 0.6406096361848574, "grad_norm": 29.0, "learning_rate": 8.360013842702139e-06, "loss": 0.5906, "step": 1303 }, { "epoch": 0.6411012782694199, "grad_norm": 15.0625, "learning_rate": 8.358759527649066e-06, "loss": 0.4159, "step": 1304 }, { "epoch": 0.6415929203539823, "grad_norm": 39.5, "learning_rate": 8.357504078915444e-06, "loss": 0.5205, "step": 1305 }, { "epoch": 0.6420845624385447, "grad_norm": 18.375, "learning_rate": 8.356247496870115e-06, "loss": 0.4755, "step": 1306 }, { "epoch": 0.6425762045231072, "grad_norm": 15.6875, "learning_rate": 8.354989781882253e-06, "loss": 0.3468, "step": 1307 }, { "epoch": 0.6430678466076696, "grad_norm": 14.375, "learning_rate": 8.35373093432137e-06, "loss": 0.487, "step": 1308 }, { "epoch": 0.643559488692232, "grad_norm": 17.75, "learning_rate": 8.352470954557307e-06, "loss": 0.2662, "step": 1309 }, { "epoch": 0.6440511307767945, "grad_norm": 24.75, "learning_rate": 8.351209842960239e-06, "loss": 0.3538, "step": 1310 }, { "epoch": 0.644542772861357, "grad_norm": 20.625, "learning_rate": 8.349947599900673e-06, "loss": 0.5513, "step": 1311 }, { "epoch": 0.6450344149459194, "grad_norm": 25.125, "learning_rate": 8.348684225749449e-06, "loss": 0.5462, "step": 1312 }, { "epoch": 0.6455260570304818, "grad_norm": 21.75, "learning_rate": 8.347419720877736e-06, "loss": 0.4196, "step": 1313 }, { "epoch": 0.6460176991150443, "grad_norm": 20.375, "learning_rate": 8.34615408565704e-06, "loss": 0.2947, "step": 1314 }, { "epoch": 0.6465093411996067, "grad_norm": 12.875, "learning_rate": 8.3448873204592e-06, "loss": 0.4232, "step": 1315 }, { "epoch": 0.6470009832841691, "grad_norm": 19.0, "learning_rate": 8.343619425656378e-06, "loss": 0.5259, "step": 1316 }, { "epoch": 0.6474926253687315, "grad_norm": 14.0625, "learning_rate": 8.342350401621078e-06, "loss": 0.4482, "step": 1317 }, { "epoch": 0.647984267453294, "grad_norm": 28.0, "learning_rate": 8.341080248726132e-06, "loss": 0.6564, "step": 1318 }, { "epoch": 0.6484759095378564, "grad_norm": 24.875, "learning_rate": 8.3398089673447e-06, "loss": 0.4949, "step": 1319 }, { "epoch": 0.6489675516224189, "grad_norm": 7.78125, "learning_rate": 8.338536557850282e-06, "loss": 0.4194, "step": 1320 }, { "epoch": 0.6494591937069814, "grad_norm": 12.375, "learning_rate": 8.337263020616698e-06, "loss": 0.2589, "step": 1321 }, { "epoch": 0.6499508357915438, "grad_norm": 8.375, "learning_rate": 8.335988356018112e-06, "loss": 0.3264, "step": 1322 }, { "epoch": 0.6504424778761062, "grad_norm": 16.75, "learning_rate": 8.334712564429008e-06, "loss": 0.3047, "step": 1323 }, { "epoch": 0.6509341199606686, "grad_norm": 20.25, "learning_rate": 8.333435646224208e-06, "loss": 0.3947, "step": 1324 }, { "epoch": 0.6514257620452311, "grad_norm": 11.4375, "learning_rate": 8.332157601778864e-06, "loss": 0.4043, "step": 1325 }, { "epoch": 0.6519174041297935, "grad_norm": 21.375, "learning_rate": 8.330878431468456e-06, "loss": 0.2823, "step": 1326 }, { "epoch": 0.6524090462143559, "grad_norm": 10.8125, "learning_rate": 8.329598135668798e-06, "loss": 0.3068, "step": 1327 }, { "epoch": 0.6529006882989183, "grad_norm": 26.125, "learning_rate": 8.32831671475603e-06, "loss": 0.5176, "step": 1328 }, { "epoch": 0.6533923303834809, "grad_norm": 18.0, "learning_rate": 8.32703416910663e-06, "loss": 0.3832, "step": 1329 }, { "epoch": 0.6538839724680433, "grad_norm": 20.625, "learning_rate": 8.3257504990974e-06, "loss": 0.3466, "step": 1330 }, { "epoch": 0.6543756145526057, "grad_norm": 15.8125, "learning_rate": 8.324465705105478e-06, "loss": 0.5236, "step": 1331 }, { "epoch": 0.6548672566371682, "grad_norm": 21.75, "learning_rate": 8.323179787508323e-06, "loss": 0.4429, "step": 1332 }, { "epoch": 0.6553588987217306, "grad_norm": 13.125, "learning_rate": 8.321892746683733e-06, "loss": 0.2523, "step": 1333 }, { "epoch": 0.655850540806293, "grad_norm": 9.375, "learning_rate": 8.320604583009834e-06, "loss": 0.3263, "step": 1334 }, { "epoch": 0.6563421828908554, "grad_norm": 11.0625, "learning_rate": 8.31931529686508e-06, "loss": 0.3236, "step": 1335 }, { "epoch": 0.6568338249754179, "grad_norm": 14.25, "learning_rate": 8.318024888628255e-06, "loss": 0.321, "step": 1336 }, { "epoch": 0.6573254670599803, "grad_norm": 16.625, "learning_rate": 8.316733358678473e-06, "loss": 0.1845, "step": 1337 }, { "epoch": 0.6578171091445427, "grad_norm": 10.875, "learning_rate": 8.315440707395178e-06, "loss": 0.3908, "step": 1338 }, { "epoch": 0.6583087512291053, "grad_norm": 16.0, "learning_rate": 8.314146935158145e-06, "loss": 0.2567, "step": 1339 }, { "epoch": 0.6588003933136677, "grad_norm": 16.25, "learning_rate": 8.312852042347475e-06, "loss": 0.4019, "step": 1340 }, { "epoch": 0.6592920353982301, "grad_norm": 22.75, "learning_rate": 8.3115560293436e-06, "loss": 0.5306, "step": 1341 }, { "epoch": 0.6597836774827925, "grad_norm": 8.125, "learning_rate": 8.310258896527279e-06, "loss": 0.3393, "step": 1342 }, { "epoch": 0.660275319567355, "grad_norm": 19.625, "learning_rate": 8.308960644279606e-06, "loss": 0.2848, "step": 1343 }, { "epoch": 0.6607669616519174, "grad_norm": 25.625, "learning_rate": 8.307661272981995e-06, "loss": 0.5, "step": 1344 }, { "epoch": 0.6612586037364798, "grad_norm": 17.125, "learning_rate": 8.306360783016199e-06, "loss": 0.2073, "step": 1345 }, { "epoch": 0.6617502458210422, "grad_norm": 17.875, "learning_rate": 8.305059174764289e-06, "loss": 0.3928, "step": 1346 }, { "epoch": 0.6622418879056047, "grad_norm": 29.5, "learning_rate": 8.303756448608672e-06, "loss": 0.3314, "step": 1347 }, { "epoch": 0.6627335299901671, "grad_norm": 16.375, "learning_rate": 8.302452604932082e-06, "loss": 0.5214, "step": 1348 }, { "epoch": 0.6632251720747296, "grad_norm": 10.8125, "learning_rate": 8.30114764411758e-06, "loss": 0.3217, "step": 1349 }, { "epoch": 0.6637168141592921, "grad_norm": 24.375, "learning_rate": 8.299841566548555e-06, "loss": 0.4958, "step": 1350 }, { "epoch": 0.6642084562438545, "grad_norm": 13.5, "learning_rate": 8.298534372608724e-06, "loss": 0.295, "step": 1351 }, { "epoch": 0.6647000983284169, "grad_norm": 20.125, "learning_rate": 8.297226062682134e-06, "loss": 0.3809, "step": 1352 }, { "epoch": 0.6651917404129793, "grad_norm": 16.5, "learning_rate": 8.29591663715316e-06, "loss": 0.5424, "step": 1353 }, { "epoch": 0.6656833824975418, "grad_norm": 7.09375, "learning_rate": 8.294606096406503e-06, "loss": 0.1486, "step": 1354 }, { "epoch": 0.6661750245821042, "grad_norm": 14.125, "learning_rate": 8.29329444082719e-06, "loss": 0.2454, "step": 1355 }, { "epoch": 0.6666666666666666, "grad_norm": 13.75, "learning_rate": 8.291981670800578e-06, "loss": 0.2551, "step": 1356 }, { "epoch": 0.6671583087512291, "grad_norm": 32.25, "learning_rate": 8.290667786712353e-06, "loss": 0.6281, "step": 1357 }, { "epoch": 0.6676499508357916, "grad_norm": 13.0625, "learning_rate": 8.289352788948524e-06, "loss": 0.3782, "step": 1358 }, { "epoch": 0.668141592920354, "grad_norm": 27.125, "learning_rate": 8.28803667789543e-06, "loss": 0.4014, "step": 1359 }, { "epoch": 0.6686332350049164, "grad_norm": 17.0, "learning_rate": 8.286719453939739e-06, "loss": 0.3543, "step": 1360 }, { "epoch": 0.6691248770894789, "grad_norm": 14.0, "learning_rate": 8.285401117468441e-06, "loss": 0.3207, "step": 1361 }, { "epoch": 0.6696165191740413, "grad_norm": 19.625, "learning_rate": 8.284081668868856e-06, "loss": 0.3232, "step": 1362 }, { "epoch": 0.6701081612586037, "grad_norm": 25.125, "learning_rate": 8.282761108528629e-06, "loss": 0.5657, "step": 1363 }, { "epoch": 0.6705998033431662, "grad_norm": 18.125, "learning_rate": 8.281439436835735e-06, "loss": 0.4153, "step": 1364 }, { "epoch": 0.6710914454277286, "grad_norm": 23.875, "learning_rate": 8.280116654178473e-06, "loss": 0.4953, "step": 1365 }, { "epoch": 0.671583087512291, "grad_norm": 16.625, "learning_rate": 8.278792760945466e-06, "loss": 0.2542, "step": 1366 }, { "epoch": 0.6720747295968534, "grad_norm": 25.25, "learning_rate": 8.27746775752567e-06, "loss": 0.4254, "step": 1367 }, { "epoch": 0.672566371681416, "grad_norm": 17.25, "learning_rate": 8.276141644308358e-06, "loss": 0.387, "step": 1368 }, { "epoch": 0.6730580137659784, "grad_norm": 12.3125, "learning_rate": 8.27481442168314e-06, "loss": 0.243, "step": 1369 }, { "epoch": 0.6735496558505408, "grad_norm": 32.0, "learning_rate": 8.27348609003994e-06, "loss": 0.589, "step": 1370 }, { "epoch": 0.6740412979351033, "grad_norm": 10.0, "learning_rate": 8.27215664976902e-06, "loss": 0.2447, "step": 1371 }, { "epoch": 0.6745329400196657, "grad_norm": 33.75, "learning_rate": 8.270826101260958e-06, "loss": 0.5015, "step": 1372 }, { "epoch": 0.6750245821042281, "grad_norm": 28.625, "learning_rate": 8.26949444490666e-06, "loss": 0.5376, "step": 1373 }, { "epoch": 0.6755162241887905, "grad_norm": 21.875, "learning_rate": 8.268161681097363e-06, "loss": 0.4306, "step": 1374 }, { "epoch": 0.676007866273353, "grad_norm": 14.125, "learning_rate": 8.266827810224621e-06, "loss": 0.4018, "step": 1375 }, { "epoch": 0.6764995083579154, "grad_norm": 14.9375, "learning_rate": 8.265492832680323e-06, "loss": 0.3649, "step": 1376 }, { "epoch": 0.6769911504424779, "grad_norm": 17.0, "learning_rate": 8.26415674885667e-06, "loss": 0.2674, "step": 1377 }, { "epoch": 0.6774827925270404, "grad_norm": 16.75, "learning_rate": 8.2628195591462e-06, "loss": 0.3389, "step": 1378 }, { "epoch": 0.6779744346116028, "grad_norm": 18.375, "learning_rate": 8.26148126394177e-06, "loss": 0.2752, "step": 1379 }, { "epoch": 0.6784660766961652, "grad_norm": 18.125, "learning_rate": 8.260141863636563e-06, "loss": 0.2442, "step": 1380 }, { "epoch": 0.6789577187807276, "grad_norm": 19.375, "learning_rate": 8.258801358624088e-06, "loss": 0.3527, "step": 1381 }, { "epoch": 0.6794493608652901, "grad_norm": 23.375, "learning_rate": 8.257459749298174e-06, "loss": 0.2161, "step": 1382 }, { "epoch": 0.6799410029498525, "grad_norm": 13.625, "learning_rate": 8.256117036052984e-06, "loss": 0.3312, "step": 1383 }, { "epoch": 0.6804326450344149, "grad_norm": 19.0, "learning_rate": 8.254773219282995e-06, "loss": 0.4409, "step": 1384 }, { "epoch": 0.6809242871189773, "grad_norm": 15.1875, "learning_rate": 8.253428299383012e-06, "loss": 0.1977, "step": 1385 }, { "epoch": 0.6814159292035398, "grad_norm": 13.6875, "learning_rate": 8.252082276748167e-06, "loss": 0.4435, "step": 1386 }, { "epoch": 0.6819075712881023, "grad_norm": 21.125, "learning_rate": 8.25073515177391e-06, "loss": 0.3122, "step": 1387 }, { "epoch": 0.6823992133726647, "grad_norm": 15.25, "learning_rate": 8.24938692485602e-06, "loss": 0.2782, "step": 1388 }, { "epoch": 0.6828908554572272, "grad_norm": 22.25, "learning_rate": 8.2480375963906e-06, "loss": 0.4742, "step": 1389 }, { "epoch": 0.6833824975417896, "grad_norm": 38.25, "learning_rate": 8.246687166774072e-06, "loss": 0.591, "step": 1390 }, { "epoch": 0.683874139626352, "grad_norm": 26.125, "learning_rate": 8.245335636403185e-06, "loss": 0.4376, "step": 1391 }, { "epoch": 0.6843657817109144, "grad_norm": 29.875, "learning_rate": 8.24398300567501e-06, "loss": 0.4867, "step": 1392 }, { "epoch": 0.6848574237954769, "grad_norm": 22.375, "learning_rate": 8.242629274986943e-06, "loss": 0.5323, "step": 1393 }, { "epoch": 0.6853490658800393, "grad_norm": 23.5, "learning_rate": 8.241274444736703e-06, "loss": 0.4575, "step": 1394 }, { "epoch": 0.6858407079646017, "grad_norm": 16.375, "learning_rate": 8.239918515322327e-06, "loss": 0.3896, "step": 1395 }, { "epoch": 0.6863323500491643, "grad_norm": 28.25, "learning_rate": 8.238561487142183e-06, "loss": 0.528, "step": 1396 }, { "epoch": 0.6868239921337267, "grad_norm": 11.5625, "learning_rate": 8.237203360594956e-06, "loss": 0.3014, "step": 1397 }, { "epoch": 0.6873156342182891, "grad_norm": 24.5, "learning_rate": 8.235844136079655e-06, "loss": 0.4245, "step": 1398 }, { "epoch": 0.6878072763028515, "grad_norm": 27.625, "learning_rate": 8.234483813995614e-06, "loss": 0.408, "step": 1399 }, { "epoch": 0.688298918387414, "grad_norm": 30.5, "learning_rate": 8.233122394742484e-06, "loss": 0.5736, "step": 1400 }, { "epoch": 0.6887905604719764, "grad_norm": 15.625, "learning_rate": 8.231759878720247e-06, "loss": 0.4041, "step": 1401 }, { "epoch": 0.6892822025565388, "grad_norm": 19.875, "learning_rate": 8.230396266329197e-06, "loss": 0.4219, "step": 1402 }, { "epoch": 0.6897738446411013, "grad_norm": 28.875, "learning_rate": 8.22903155796996e-06, "loss": 0.6246, "step": 1403 }, { "epoch": 0.6902654867256637, "grad_norm": 19.25, "learning_rate": 8.227665754043477e-06, "loss": 0.4065, "step": 1404 }, { "epoch": 0.6907571288102261, "grad_norm": 17.5, "learning_rate": 8.22629885495101e-06, "loss": 0.5566, "step": 1405 }, { "epoch": 0.6912487708947886, "grad_norm": 23.25, "learning_rate": 8.22493086109415e-06, "loss": 0.3373, "step": 1406 }, { "epoch": 0.6917404129793511, "grad_norm": 17.125, "learning_rate": 8.223561772874803e-06, "loss": 0.4574, "step": 1407 }, { "epoch": 0.6922320550639135, "grad_norm": 16.625, "learning_rate": 8.2221915906952e-06, "loss": 0.3663, "step": 1408 }, { "epoch": 0.6927236971484759, "grad_norm": 19.625, "learning_rate": 8.220820314957893e-06, "loss": 0.4241, "step": 1409 }, { "epoch": 0.6932153392330384, "grad_norm": 38.25, "learning_rate": 8.219447946065754e-06, "loss": 0.5684, "step": 1410 }, { "epoch": 0.6937069813176008, "grad_norm": 18.25, "learning_rate": 8.218074484421977e-06, "loss": 0.3999, "step": 1411 }, { "epoch": 0.6941986234021632, "grad_norm": 25.25, "learning_rate": 8.216699930430076e-06, "loss": 0.3502, "step": 1412 }, { "epoch": 0.6946902654867256, "grad_norm": 22.25, "learning_rate": 8.215324284493887e-06, "loss": 0.2757, "step": 1413 }, { "epoch": 0.6951819075712881, "grad_norm": 18.0, "learning_rate": 8.213947547017568e-06, "loss": 0.3594, "step": 1414 }, { "epoch": 0.6956735496558505, "grad_norm": 15.1875, "learning_rate": 8.212569718405595e-06, "loss": 0.3027, "step": 1415 }, { "epoch": 0.696165191740413, "grad_norm": 17.125, "learning_rate": 8.211190799062766e-06, "loss": 0.4303, "step": 1416 }, { "epoch": 0.6966568338249755, "grad_norm": 18.875, "learning_rate": 8.209810789394198e-06, "loss": 0.3805, "step": 1417 }, { "epoch": 0.6971484759095379, "grad_norm": 14.5, "learning_rate": 8.208429689805333e-06, "loss": 0.3174, "step": 1418 }, { "epoch": 0.6976401179941003, "grad_norm": 15.6875, "learning_rate": 8.207047500701925e-06, "loss": 0.3928, "step": 1419 }, { "epoch": 0.6981317600786627, "grad_norm": 11.875, "learning_rate": 8.205664222490058e-06, "loss": 0.3397, "step": 1420 }, { "epoch": 0.6986234021632252, "grad_norm": 19.0, "learning_rate": 8.204279855576128e-06, "loss": 0.2639, "step": 1421 }, { "epoch": 0.6991150442477876, "grad_norm": 12.6875, "learning_rate": 8.202894400366854e-06, "loss": 0.4329, "step": 1422 }, { "epoch": 0.69960668633235, "grad_norm": 12.4375, "learning_rate": 8.201507857269274e-06, "loss": 0.2896, "step": 1423 }, { "epoch": 0.7000983284169124, "grad_norm": 17.875, "learning_rate": 8.20012022669075e-06, "loss": 0.3333, "step": 1424 }, { "epoch": 0.700589970501475, "grad_norm": 28.0, "learning_rate": 8.198731509038954e-06, "loss": 0.5884, "step": 1425 }, { "epoch": 0.7010816125860374, "grad_norm": 23.0, "learning_rate": 8.197341704721885e-06, "loss": 0.3232, "step": 1426 }, { "epoch": 0.7015732546705998, "grad_norm": 36.25, "learning_rate": 8.195950814147861e-06, "loss": 0.6411, "step": 1427 }, { "epoch": 0.7020648967551623, "grad_norm": 16.125, "learning_rate": 8.194558837725515e-06, "loss": 0.4087, "step": 1428 }, { "epoch": 0.7025565388397247, "grad_norm": 22.125, "learning_rate": 8.193165775863803e-06, "loss": 0.5006, "step": 1429 }, { "epoch": 0.7030481809242871, "grad_norm": 25.5, "learning_rate": 8.191771628971998e-06, "loss": 0.3838, "step": 1430 }, { "epoch": 0.7035398230088495, "grad_norm": 16.0, "learning_rate": 8.190376397459692e-06, "loss": 0.3289, "step": 1431 }, { "epoch": 0.704031465093412, "grad_norm": 17.625, "learning_rate": 8.188980081736795e-06, "loss": 0.4684, "step": 1432 }, { "epoch": 0.7045231071779744, "grad_norm": 24.25, "learning_rate": 8.187582682213536e-06, "loss": 0.5216, "step": 1433 }, { "epoch": 0.7050147492625368, "grad_norm": 17.625, "learning_rate": 8.186184199300463e-06, "loss": 0.3898, "step": 1434 }, { "epoch": 0.7055063913470994, "grad_norm": 19.5, "learning_rate": 8.184784633408444e-06, "loss": 0.4467, "step": 1435 }, { "epoch": 0.7059980334316618, "grad_norm": 42.5, "learning_rate": 8.18338398494866e-06, "loss": 0.4908, "step": 1436 }, { "epoch": 0.7064896755162242, "grad_norm": 15.6875, "learning_rate": 8.181982254332616e-06, "loss": 0.4656, "step": 1437 }, { "epoch": 0.7069813176007866, "grad_norm": 14.6875, "learning_rate": 8.180579441972129e-06, "loss": 0.3091, "step": 1438 }, { "epoch": 0.7074729596853491, "grad_norm": 12.5625, "learning_rate": 8.179175548279339e-06, "loss": 0.3484, "step": 1439 }, { "epoch": 0.7079646017699115, "grad_norm": 17.25, "learning_rate": 8.177770573666702e-06, "loss": 0.2055, "step": 1440 }, { "epoch": 0.7084562438544739, "grad_norm": 10.3125, "learning_rate": 8.176364518546989e-06, "loss": 0.2659, "step": 1441 }, { "epoch": 0.7089478859390363, "grad_norm": 26.125, "learning_rate": 8.174957383333293e-06, "loss": 0.5015, "step": 1442 }, { "epoch": 0.7094395280235988, "grad_norm": 11.875, "learning_rate": 8.17354916843902e-06, "loss": 0.3851, "step": 1443 }, { "epoch": 0.7099311701081613, "grad_norm": 28.5, "learning_rate": 8.172139874277898e-06, "loss": 0.6297, "step": 1444 }, { "epoch": 0.7104228121927237, "grad_norm": 11.375, "learning_rate": 8.170729501263965e-06, "loss": 0.1972, "step": 1445 }, { "epoch": 0.7109144542772862, "grad_norm": 13.125, "learning_rate": 8.169318049811584e-06, "loss": 0.4628, "step": 1446 }, { "epoch": 0.7114060963618486, "grad_norm": 20.375, "learning_rate": 8.16790552033543e-06, "loss": 0.3883, "step": 1447 }, { "epoch": 0.711897738446411, "grad_norm": 7.71875, "learning_rate": 8.166491913250494e-06, "loss": 0.2388, "step": 1448 }, { "epoch": 0.7123893805309734, "grad_norm": 13.4375, "learning_rate": 8.165077228972085e-06, "loss": 0.4146, "step": 1449 }, { "epoch": 0.7128810226155359, "grad_norm": 14.3125, "learning_rate": 8.163661467915833e-06, "loss": 0.343, "step": 1450 }, { "epoch": 0.7133726647000983, "grad_norm": 42.5, "learning_rate": 8.162244630497677e-06, "loss": 0.7335, "step": 1451 }, { "epoch": 0.7138643067846607, "grad_norm": 21.375, "learning_rate": 8.160826717133875e-06, "loss": 0.4039, "step": 1452 }, { "epoch": 0.7143559488692232, "grad_norm": 26.125, "learning_rate": 8.159407728241002e-06, "loss": 0.5721, "step": 1453 }, { "epoch": 0.7148475909537857, "grad_norm": 24.25, "learning_rate": 8.15798766423595e-06, "loss": 0.3583, "step": 1454 }, { "epoch": 0.7153392330383481, "grad_norm": 23.5, "learning_rate": 8.156566525535923e-06, "loss": 0.6165, "step": 1455 }, { "epoch": 0.7158308751229105, "grad_norm": 14.3125, "learning_rate": 8.155144312558447e-06, "loss": 0.303, "step": 1456 }, { "epoch": 0.716322517207473, "grad_norm": 16.75, "learning_rate": 8.153721025721355e-06, "loss": 0.3431, "step": 1457 }, { "epoch": 0.7168141592920354, "grad_norm": 10.125, "learning_rate": 8.152296665442803e-06, "loss": 0.3108, "step": 1458 }, { "epoch": 0.7173058013765978, "grad_norm": 15.3125, "learning_rate": 8.150871232141257e-06, "loss": 0.3952, "step": 1459 }, { "epoch": 0.7177974434611603, "grad_norm": 14.625, "learning_rate": 8.149444726235504e-06, "loss": 0.3932, "step": 1460 }, { "epoch": 0.7182890855457227, "grad_norm": 14.6875, "learning_rate": 8.148017148144642e-06, "loss": 0.311, "step": 1461 }, { "epoch": 0.7187807276302851, "grad_norm": 11.1875, "learning_rate": 8.146588498288081e-06, "loss": 0.3597, "step": 1462 }, { "epoch": 0.7192723697148475, "grad_norm": 10.3125, "learning_rate": 8.145158777085557e-06, "loss": 0.1976, "step": 1463 }, { "epoch": 0.7197640117994101, "grad_norm": 16.375, "learning_rate": 8.14372798495711e-06, "loss": 0.3795, "step": 1464 }, { "epoch": 0.7202556538839725, "grad_norm": 12.0625, "learning_rate": 8.142296122323097e-06, "loss": 0.3042, "step": 1465 }, { "epoch": 0.7207472959685349, "grad_norm": 13.0625, "learning_rate": 8.14086318960419e-06, "loss": 0.2659, "step": 1466 }, { "epoch": 0.7212389380530974, "grad_norm": 34.0, "learning_rate": 8.13942918722138e-06, "loss": 0.595, "step": 1467 }, { "epoch": 0.7217305801376598, "grad_norm": 12.6875, "learning_rate": 8.137994115595965e-06, "loss": 0.223, "step": 1468 }, { "epoch": 0.7222222222222222, "grad_norm": 10.1875, "learning_rate": 8.136557975149562e-06, "loss": 0.2163, "step": 1469 }, { "epoch": 0.7227138643067846, "grad_norm": 22.75, "learning_rate": 8.1351207663041e-06, "loss": 0.3502, "step": 1470 }, { "epoch": 0.7232055063913471, "grad_norm": 39.5, "learning_rate": 8.133682489481823e-06, "loss": 0.5201, "step": 1471 }, { "epoch": 0.7236971484759095, "grad_norm": 17.625, "learning_rate": 8.132243145105286e-06, "loss": 0.4642, "step": 1472 }, { "epoch": 0.724188790560472, "grad_norm": 23.0, "learning_rate": 8.130802733597366e-06, "loss": 0.3321, "step": 1473 }, { "epoch": 0.7246804326450345, "grad_norm": 37.75, "learning_rate": 8.129361255381239e-06, "loss": 0.6336, "step": 1474 }, { "epoch": 0.7251720747295969, "grad_norm": 18.625, "learning_rate": 8.127918710880407e-06, "loss": 0.3763, "step": 1475 }, { "epoch": 0.7256637168141593, "grad_norm": 14.375, "learning_rate": 8.12647510051868e-06, "loss": 0.3446, "step": 1476 }, { "epoch": 0.7261553588987217, "grad_norm": 5.90625, "learning_rate": 8.125030424720185e-06, "loss": 0.1792, "step": 1477 }, { "epoch": 0.7266470009832842, "grad_norm": 36.5, "learning_rate": 8.123584683909354e-06, "loss": 0.5469, "step": 1478 }, { "epoch": 0.7271386430678466, "grad_norm": 17.875, "learning_rate": 8.12213787851094e-06, "loss": 0.4529, "step": 1479 }, { "epoch": 0.727630285152409, "grad_norm": 26.0, "learning_rate": 8.120690008950009e-06, "loss": 0.3744, "step": 1480 }, { "epoch": 0.7281219272369714, "grad_norm": 21.125, "learning_rate": 8.119241075651928e-06, "loss": 0.4823, "step": 1481 }, { "epoch": 0.7286135693215339, "grad_norm": 19.75, "learning_rate": 8.117791079042393e-06, "loss": 0.2635, "step": 1482 }, { "epoch": 0.7291052114060964, "grad_norm": 14.1875, "learning_rate": 8.116340019547403e-06, "loss": 0.2274, "step": 1483 }, { "epoch": 0.7295968534906588, "grad_norm": 30.25, "learning_rate": 8.114887897593266e-06, "loss": 0.468, "step": 1484 }, { "epoch": 0.7300884955752213, "grad_norm": 18.25, "learning_rate": 8.11343471360661e-06, "loss": 0.4975, "step": 1485 }, { "epoch": 0.7305801376597837, "grad_norm": 31.875, "learning_rate": 8.111980468014374e-06, "loss": 0.6518, "step": 1486 }, { "epoch": 0.7310717797443461, "grad_norm": 13.0625, "learning_rate": 8.110525161243802e-06, "loss": 0.2047, "step": 1487 }, { "epoch": 0.7315634218289085, "grad_norm": 23.5, "learning_rate": 8.109068793722456e-06, "loss": 0.284, "step": 1488 }, { "epoch": 0.732055063913471, "grad_norm": 37.5, "learning_rate": 8.107611365878211e-06, "loss": 0.5435, "step": 1489 }, { "epoch": 0.7325467059980334, "grad_norm": 18.875, "learning_rate": 8.106152878139249e-06, "loss": 0.425, "step": 1490 }, { "epoch": 0.7330383480825958, "grad_norm": 33.75, "learning_rate": 8.104693330934063e-06, "loss": 0.5904, "step": 1491 }, { "epoch": 0.7335299901671584, "grad_norm": 16.0, "learning_rate": 8.10323272469146e-06, "loss": 0.2982, "step": 1492 }, { "epoch": 0.7340216322517208, "grad_norm": 20.875, "learning_rate": 8.10177105984056e-06, "loss": 0.2655, "step": 1493 }, { "epoch": 0.7345132743362832, "grad_norm": 12.75, "learning_rate": 8.100308336810787e-06, "loss": 0.354, "step": 1494 }, { "epoch": 0.7350049164208456, "grad_norm": 19.5, "learning_rate": 8.098844556031886e-06, "loss": 0.4092, "step": 1495 }, { "epoch": 0.7354965585054081, "grad_norm": 18.25, "learning_rate": 8.097379717933902e-06, "loss": 0.3703, "step": 1496 }, { "epoch": 0.7359882005899705, "grad_norm": 22.125, "learning_rate": 8.095913822947198e-06, "loss": 0.5086, "step": 1497 }, { "epoch": 0.7364798426745329, "grad_norm": 18.5, "learning_rate": 8.094446871502444e-06, "loss": 0.365, "step": 1498 }, { "epoch": 0.7369714847590954, "grad_norm": 18.5, "learning_rate": 8.092978864030623e-06, "loss": 0.3149, "step": 1499 }, { "epoch": 0.7374631268436578, "grad_norm": 15.875, "learning_rate": 8.091509800963027e-06, "loss": 0.3585, "step": 1500 }, { "epoch": 0.7379547689282202, "grad_norm": 17.875, "learning_rate": 8.090039682731256e-06, "loss": 0.3683, "step": 1501 }, { "epoch": 0.7384464110127827, "grad_norm": 29.0, "learning_rate": 8.088568509767223e-06, "loss": 0.5033, "step": 1502 }, { "epoch": 0.7389380530973452, "grad_norm": 13.5, "learning_rate": 8.087096282503152e-06, "loss": 0.4098, "step": 1503 }, { "epoch": 0.7394296951819076, "grad_norm": 28.25, "learning_rate": 8.08562300137157e-06, "loss": 0.5564, "step": 1504 }, { "epoch": 0.73992133726647, "grad_norm": 14.8125, "learning_rate": 8.084148666805323e-06, "loss": 0.4583, "step": 1505 }, { "epoch": 0.7404129793510325, "grad_norm": 20.5, "learning_rate": 8.08267327923756e-06, "loss": 0.3711, "step": 1506 }, { "epoch": 0.7409046214355949, "grad_norm": 29.125, "learning_rate": 8.08119683910174e-06, "loss": 0.4837, "step": 1507 }, { "epoch": 0.7413962635201573, "grad_norm": 16.125, "learning_rate": 8.079719346831632e-06, "loss": 0.4385, "step": 1508 }, { "epoch": 0.7418879056047197, "grad_norm": 27.5, "learning_rate": 8.078240802861315e-06, "loss": 0.3927, "step": 1509 }, { "epoch": 0.7423795476892822, "grad_norm": 22.5, "learning_rate": 8.076761207625179e-06, "loss": 0.4976, "step": 1510 }, { "epoch": 0.7428711897738446, "grad_norm": 22.5, "learning_rate": 8.075280561557915e-06, "loss": 0.2629, "step": 1511 }, { "epoch": 0.7433628318584071, "grad_norm": 27.375, "learning_rate": 8.073798865094534e-06, "loss": 0.5506, "step": 1512 }, { "epoch": 0.7438544739429696, "grad_norm": 14.5625, "learning_rate": 8.072316118670347e-06, "loss": 0.1897, "step": 1513 }, { "epoch": 0.744346116027532, "grad_norm": 15.125, "learning_rate": 8.070832322720972e-06, "loss": 0.4563, "step": 1514 }, { "epoch": 0.7448377581120944, "grad_norm": 12.3125, "learning_rate": 8.069347477682346e-06, "loss": 0.2345, "step": 1515 }, { "epoch": 0.7453294001966568, "grad_norm": 27.0, "learning_rate": 8.067861583990704e-06, "loss": 0.4567, "step": 1516 }, { "epoch": 0.7458210422812193, "grad_norm": 30.125, "learning_rate": 8.066374642082594e-06, "loss": 0.6082, "step": 1517 }, { "epoch": 0.7463126843657817, "grad_norm": 9.25, "learning_rate": 8.064886652394869e-06, "loss": 0.2757, "step": 1518 }, { "epoch": 0.7468043264503441, "grad_norm": 18.25, "learning_rate": 8.063397615364694e-06, "loss": 0.1725, "step": 1519 }, { "epoch": 0.7472959685349065, "grad_norm": 20.5, "learning_rate": 8.061907531429537e-06, "loss": 0.4436, "step": 1520 }, { "epoch": 0.7477876106194691, "grad_norm": 19.75, "learning_rate": 8.060416401027177e-06, "loss": 0.3758, "step": 1521 }, { "epoch": 0.7482792527040315, "grad_norm": 31.25, "learning_rate": 8.058924224595698e-06, "loss": 0.5899, "step": 1522 }, { "epoch": 0.7487708947885939, "grad_norm": 8.9375, "learning_rate": 8.057431002573493e-06, "loss": 0.2715, "step": 1523 }, { "epoch": 0.7492625368731564, "grad_norm": 24.5, "learning_rate": 8.055936735399264e-06, "loss": 0.4632, "step": 1524 }, { "epoch": 0.7497541789577188, "grad_norm": 12.9375, "learning_rate": 8.054441423512014e-06, "loss": 0.2587, "step": 1525 }, { "epoch": 0.7502458210422812, "grad_norm": 21.875, "learning_rate": 8.05294506735106e-06, "loss": 0.3912, "step": 1526 }, { "epoch": 0.7507374631268436, "grad_norm": 22.5, "learning_rate": 8.05144766735602e-06, "loss": 0.6586, "step": 1527 }, { "epoch": 0.7512291052114061, "grad_norm": 28.625, "learning_rate": 8.049949223966822e-06, "loss": 0.3369, "step": 1528 }, { "epoch": 0.7517207472959685, "grad_norm": 20.25, "learning_rate": 8.048449737623702e-06, "loss": 0.3572, "step": 1529 }, { "epoch": 0.7522123893805309, "grad_norm": 11.6875, "learning_rate": 8.046949208767196e-06, "loss": 0.1485, "step": 1530 }, { "epoch": 0.7527040314650935, "grad_norm": 15.4375, "learning_rate": 8.045447637838155e-06, "loss": 0.3666, "step": 1531 }, { "epoch": 0.7531956735496559, "grad_norm": 14.25, "learning_rate": 8.043945025277727e-06, "loss": 0.2911, "step": 1532 }, { "epoch": 0.7536873156342183, "grad_norm": 15.375, "learning_rate": 8.042441371527373e-06, "loss": 0.3737, "step": 1533 }, { "epoch": 0.7541789577187807, "grad_norm": 13.0625, "learning_rate": 8.04093667702886e-06, "loss": 0.3227, "step": 1534 }, { "epoch": 0.7546705998033432, "grad_norm": 21.375, "learning_rate": 8.039430942224253e-06, "loss": 0.3597, "step": 1535 }, { "epoch": 0.7551622418879056, "grad_norm": 15.6875, "learning_rate": 8.037924167555931e-06, "loss": 0.215, "step": 1536 }, { "epoch": 0.755653883972468, "grad_norm": 12.75, "learning_rate": 8.036416353466575e-06, "loss": 0.3587, "step": 1537 }, { "epoch": 0.7561455260570304, "grad_norm": 15.0625, "learning_rate": 8.034907500399173e-06, "loss": 0.3874, "step": 1538 }, { "epoch": 0.7566371681415929, "grad_norm": 12.375, "learning_rate": 8.033397608797015e-06, "loss": 0.3218, "step": 1539 }, { "epoch": 0.7571288102261554, "grad_norm": 25.125, "learning_rate": 8.031886679103699e-06, "loss": 0.5023, "step": 1540 }, { "epoch": 0.7576204523107178, "grad_norm": 18.625, "learning_rate": 8.030374711763126e-06, "loss": 0.3689, "step": 1541 }, { "epoch": 0.7581120943952803, "grad_norm": 19.875, "learning_rate": 8.028861707219504e-06, "loss": 0.3186, "step": 1542 }, { "epoch": 0.7586037364798427, "grad_norm": 13.3125, "learning_rate": 8.027347665917347e-06, "loss": 0.3322, "step": 1543 }, { "epoch": 0.7590953785644051, "grad_norm": 17.0, "learning_rate": 8.025832588301468e-06, "loss": 0.4368, "step": 1544 }, { "epoch": 0.7595870206489675, "grad_norm": 11.75, "learning_rate": 8.02431647481699e-06, "loss": 0.2884, "step": 1545 }, { "epoch": 0.76007866273353, "grad_norm": 33.5, "learning_rate": 8.022799325909334e-06, "loss": 0.4708, "step": 1546 }, { "epoch": 0.7605703048180924, "grad_norm": 14.75, "learning_rate": 8.021281142024234e-06, "loss": 0.4172, "step": 1547 }, { "epoch": 0.7610619469026548, "grad_norm": 14.375, "learning_rate": 8.019761923607721e-06, "loss": 0.2159, "step": 1548 }, { "epoch": 0.7615535889872173, "grad_norm": 18.375, "learning_rate": 8.018241671106135e-06, "loss": 0.5051, "step": 1549 }, { "epoch": 0.7620452310717798, "grad_norm": 24.875, "learning_rate": 8.016720384966112e-06, "loss": 0.3246, "step": 1550 }, { "epoch": 0.7625368731563422, "grad_norm": 21.125, "learning_rate": 8.015198065634603e-06, "loss": 0.2993, "step": 1551 }, { "epoch": 0.7630285152409046, "grad_norm": 30.5, "learning_rate": 8.013674713558852e-06, "loss": 0.4854, "step": 1552 }, { "epoch": 0.7635201573254671, "grad_norm": 14.25, "learning_rate": 8.012150329186411e-06, "loss": 0.3352, "step": 1553 }, { "epoch": 0.7640117994100295, "grad_norm": 42.0, "learning_rate": 8.010624912965138e-06, "loss": 0.73, "step": 1554 }, { "epoch": 0.7645034414945919, "grad_norm": 21.125, "learning_rate": 8.009098465343188e-06, "loss": 0.4172, "step": 1555 }, { "epoch": 0.7649950835791544, "grad_norm": 18.375, "learning_rate": 8.007570986769024e-06, "loss": 0.2747, "step": 1556 }, { "epoch": 0.7654867256637168, "grad_norm": 21.125, "learning_rate": 8.00604247769141e-06, "loss": 0.433, "step": 1557 }, { "epoch": 0.7659783677482792, "grad_norm": 23.375, "learning_rate": 8.00451293855941e-06, "loss": 0.4856, "step": 1558 }, { "epoch": 0.7664700098328416, "grad_norm": 30.125, "learning_rate": 8.002982369822398e-06, "loss": 0.3238, "step": 1559 }, { "epoch": 0.7669616519174042, "grad_norm": 15.9375, "learning_rate": 8.001450771930044e-06, "loss": 0.1465, "step": 1560 }, { "epoch": 0.7674532940019666, "grad_norm": 19.875, "learning_rate": 7.999918145332321e-06, "loss": 0.4411, "step": 1561 }, { "epoch": 0.767944936086529, "grad_norm": 14.125, "learning_rate": 7.998384490479508e-06, "loss": 0.3613, "step": 1562 }, { "epoch": 0.7684365781710915, "grad_norm": 30.625, "learning_rate": 7.996849807822181e-06, "loss": 0.6413, "step": 1563 }, { "epoch": 0.7689282202556539, "grad_norm": 16.5, "learning_rate": 7.995314097811224e-06, "loss": 0.2963, "step": 1564 }, { "epoch": 0.7694198623402163, "grad_norm": 26.0, "learning_rate": 7.993777360897815e-06, "loss": 0.5294, "step": 1565 }, { "epoch": 0.7699115044247787, "grad_norm": 20.25, "learning_rate": 7.992239597533443e-06, "loss": 0.2472, "step": 1566 }, { "epoch": 0.7704031465093412, "grad_norm": 15.0625, "learning_rate": 7.99070080816989e-06, "loss": 0.3036, "step": 1567 }, { "epoch": 0.7708947885939036, "grad_norm": 14.0, "learning_rate": 7.989160993259244e-06, "loss": 0.3521, "step": 1568 }, { "epoch": 0.7713864306784661, "grad_norm": 14.625, "learning_rate": 7.987620153253894e-06, "loss": 0.3774, "step": 1569 }, { "epoch": 0.7718780727630286, "grad_norm": 19.0, "learning_rate": 7.986078288606532e-06, "loss": 0.281, "step": 1570 }, { "epoch": 0.772369714847591, "grad_norm": 22.375, "learning_rate": 7.984535399770144e-06, "loss": 0.1926, "step": 1571 }, { "epoch": 0.7728613569321534, "grad_norm": 23.0, "learning_rate": 7.982991487198023e-06, "loss": 0.4527, "step": 1572 }, { "epoch": 0.7733529990167158, "grad_norm": 11.75, "learning_rate": 7.981446551343763e-06, "loss": 0.3735, "step": 1573 }, { "epoch": 0.7738446411012783, "grad_norm": 17.875, "learning_rate": 7.979900592661258e-06, "loss": 0.3235, "step": 1574 }, { "epoch": 0.7743362831858407, "grad_norm": 11.5625, "learning_rate": 7.978353611604698e-06, "loss": 0.2352, "step": 1575 }, { "epoch": 0.7748279252704031, "grad_norm": 8.0625, "learning_rate": 7.976805608628577e-06, "loss": 0.2855, "step": 1576 }, { "epoch": 0.7753195673549655, "grad_norm": 12.125, "learning_rate": 7.975256584187691e-06, "loss": 0.3908, "step": 1577 }, { "epoch": 0.775811209439528, "grad_norm": 14.9375, "learning_rate": 7.973706538737135e-06, "loss": 0.3822, "step": 1578 }, { "epoch": 0.7763028515240905, "grad_norm": 20.875, "learning_rate": 7.972155472732298e-06, "loss": 0.4625, "step": 1579 }, { "epoch": 0.7767944936086529, "grad_norm": 20.375, "learning_rate": 7.970603386628881e-06, "loss": 0.4536, "step": 1580 }, { "epoch": 0.7772861356932154, "grad_norm": 20.375, "learning_rate": 7.969050280882872e-06, "loss": 0.3927, "step": 1581 }, { "epoch": 0.7777777777777778, "grad_norm": 25.25, "learning_rate": 7.967496155950568e-06, "loss": 0.4047, "step": 1582 }, { "epoch": 0.7782694198623402, "grad_norm": 20.875, "learning_rate": 7.965941012288559e-06, "loss": 0.3635, "step": 1583 }, { "epoch": 0.7787610619469026, "grad_norm": 18.875, "learning_rate": 7.964384850353739e-06, "loss": 0.3304, "step": 1584 }, { "epoch": 0.7792527040314651, "grad_norm": 5.3125, "learning_rate": 7.962827670603296e-06, "loss": 0.2403, "step": 1585 }, { "epoch": 0.7797443461160275, "grad_norm": 18.75, "learning_rate": 7.961269473494724e-06, "loss": 0.4817, "step": 1586 }, { "epoch": 0.7802359882005899, "grad_norm": 27.0, "learning_rate": 7.959710259485811e-06, "loss": 0.5578, "step": 1587 }, { "epoch": 0.7807276302851525, "grad_norm": 13.6875, "learning_rate": 7.958150029034642e-06, "loss": 0.1731, "step": 1588 }, { "epoch": 0.7812192723697149, "grad_norm": 17.0, "learning_rate": 7.956588782599605e-06, "loss": 0.3849, "step": 1589 }, { "epoch": 0.7817109144542773, "grad_norm": 22.625, "learning_rate": 7.955026520639388e-06, "loss": 0.2785, "step": 1590 }, { "epoch": 0.7822025565388397, "grad_norm": 17.25, "learning_rate": 7.953463243612969e-06, "loss": 0.2752, "step": 1591 }, { "epoch": 0.7826941986234022, "grad_norm": 36.25, "learning_rate": 7.951898951979632e-06, "loss": 0.5163, "step": 1592 }, { "epoch": 0.7831858407079646, "grad_norm": 16.875, "learning_rate": 7.950333646198958e-06, "loss": 0.3168, "step": 1593 }, { "epoch": 0.783677482792527, "grad_norm": 21.0, "learning_rate": 7.948767326730822e-06, "loss": 0.3845, "step": 1594 }, { "epoch": 0.7841691248770895, "grad_norm": 19.5, "learning_rate": 7.947199994035401e-06, "loss": 0.3035, "step": 1595 }, { "epoch": 0.7846607669616519, "grad_norm": 44.25, "learning_rate": 7.945631648573167e-06, "loss": 0.7555, "step": 1596 }, { "epoch": 0.7851524090462143, "grad_norm": 13.6875, "learning_rate": 7.944062290804891e-06, "loss": 0.3582, "step": 1597 }, { "epoch": 0.7856440511307768, "grad_norm": 14.125, "learning_rate": 7.942491921191642e-06, "loss": 0.2055, "step": 1598 }, { "epoch": 0.7861356932153393, "grad_norm": 22.75, "learning_rate": 7.940920540194783e-06, "loss": 0.4166, "step": 1599 }, { "epoch": 0.7866273352999017, "grad_norm": 20.0, "learning_rate": 7.939348148275979e-06, "loss": 0.585, "step": 1600 }, { "epoch": 0.7871189773844641, "grad_norm": 12.25, "learning_rate": 7.937774745897187e-06, "loss": 0.3462, "step": 1601 }, { "epoch": 0.7876106194690266, "grad_norm": 14.25, "learning_rate": 7.936200333520663e-06, "loss": 0.4649, "step": 1602 }, { "epoch": 0.788102261553589, "grad_norm": 19.75, "learning_rate": 7.934624911608963e-06, "loss": 0.4497, "step": 1603 }, { "epoch": 0.7885939036381514, "grad_norm": 22.625, "learning_rate": 7.933048480624932e-06, "loss": 0.4822, "step": 1604 }, { "epoch": 0.7890855457227138, "grad_norm": 20.875, "learning_rate": 7.931471041031722e-06, "loss": 0.3771, "step": 1605 }, { "epoch": 0.7895771878072763, "grad_norm": 17.75, "learning_rate": 7.92989259329277e-06, "loss": 0.397, "step": 1606 }, { "epoch": 0.7900688298918387, "grad_norm": 15.625, "learning_rate": 7.928313137871818e-06, "loss": 0.4133, "step": 1607 }, { "epoch": 0.7905604719764012, "grad_norm": 46.0, "learning_rate": 7.926732675232898e-06, "loss": 0.7914, "step": 1608 }, { "epoch": 0.7910521140609637, "grad_norm": 24.75, "learning_rate": 7.925151205840342e-06, "loss": 0.3792, "step": 1609 }, { "epoch": 0.7915437561455261, "grad_norm": 19.125, "learning_rate": 7.923568730158776e-06, "loss": 0.399, "step": 1610 }, { "epoch": 0.7920353982300885, "grad_norm": 14.4375, "learning_rate": 7.921985248653122e-06, "loss": 0.4534, "step": 1611 }, { "epoch": 0.7925270403146509, "grad_norm": 32.0, "learning_rate": 7.920400761788596e-06, "loss": 0.4288, "step": 1612 }, { "epoch": 0.7930186823992134, "grad_norm": 26.5, "learning_rate": 7.918815270030713e-06, "loss": 0.5044, "step": 1613 }, { "epoch": 0.7935103244837758, "grad_norm": 9.875, "learning_rate": 7.917228773845282e-06, "loss": 0.3344, "step": 1614 }, { "epoch": 0.7940019665683382, "grad_norm": 38.25, "learning_rate": 7.915641273698402e-06, "loss": 0.6316, "step": 1615 }, { "epoch": 0.7944936086529006, "grad_norm": 25.0, "learning_rate": 7.914052770056475e-06, "loss": 0.481, "step": 1616 }, { "epoch": 0.7949852507374632, "grad_norm": 28.25, "learning_rate": 7.91246326338619e-06, "loss": 0.2737, "step": 1617 }, { "epoch": 0.7954768928220256, "grad_norm": 22.125, "learning_rate": 7.910872754154539e-06, "loss": 0.3298, "step": 1618 }, { "epoch": 0.795968534906588, "grad_norm": 11.375, "learning_rate": 7.909281242828802e-06, "loss": 0.3324, "step": 1619 }, { "epoch": 0.7964601769911505, "grad_norm": 10.0, "learning_rate": 7.907688729876554e-06, "loss": 0.3733, "step": 1620 }, { "epoch": 0.7969518190757129, "grad_norm": 13.8125, "learning_rate": 7.906095215765667e-06, "loss": 0.4002, "step": 1621 }, { "epoch": 0.7974434611602753, "grad_norm": 26.625, "learning_rate": 7.904500700964308e-06, "loss": 0.3848, "step": 1622 }, { "epoch": 0.7979351032448377, "grad_norm": 17.0, "learning_rate": 7.902905185940934e-06, "loss": 0.5003, "step": 1623 }, { "epoch": 0.7984267453294002, "grad_norm": 13.875, "learning_rate": 7.901308671164298e-06, "loss": 0.2792, "step": 1624 }, { "epoch": 0.7989183874139626, "grad_norm": 14.125, "learning_rate": 7.899711157103446e-06, "loss": 0.3653, "step": 1625 }, { "epoch": 0.799410029498525, "grad_norm": 15.875, "learning_rate": 7.89811264422772e-06, "loss": 0.4055, "step": 1626 }, { "epoch": 0.7999016715830876, "grad_norm": 18.5, "learning_rate": 7.89651313300675e-06, "loss": 0.4821, "step": 1627 }, { "epoch": 0.80039331366765, "grad_norm": 11.0, "learning_rate": 7.894912623910465e-06, "loss": 0.2777, "step": 1628 }, { "epoch": 0.8008849557522124, "grad_norm": 33.5, "learning_rate": 7.893311117409086e-06, "loss": 0.3612, "step": 1629 }, { "epoch": 0.8013765978367748, "grad_norm": 15.4375, "learning_rate": 7.891708613973126e-06, "loss": 0.43, "step": 1630 }, { "epoch": 0.8018682399213373, "grad_norm": 12.0625, "learning_rate": 7.89010511407339e-06, "loss": 0.3676, "step": 1631 }, { "epoch": 0.8023598820058997, "grad_norm": 21.625, "learning_rate": 7.888500618180975e-06, "loss": 0.466, "step": 1632 }, { "epoch": 0.8028515240904621, "grad_norm": 21.5, "learning_rate": 7.886895126767276e-06, "loss": 0.4846, "step": 1633 }, { "epoch": 0.8033431661750245, "grad_norm": 12.375, "learning_rate": 7.885288640303975e-06, "loss": 0.3775, "step": 1634 }, { "epoch": 0.803834808259587, "grad_norm": 20.625, "learning_rate": 7.883681159263046e-06, "loss": 0.4547, "step": 1635 }, { "epoch": 0.8043264503441495, "grad_norm": 16.25, "learning_rate": 7.882072684116762e-06, "loss": 0.6192, "step": 1636 }, { "epoch": 0.8048180924287119, "grad_norm": 11.5625, "learning_rate": 7.88046321533768e-06, "loss": 0.3376, "step": 1637 }, { "epoch": 0.8053097345132744, "grad_norm": 14.6875, "learning_rate": 7.878852753398653e-06, "loss": 0.3694, "step": 1638 }, { "epoch": 0.8058013765978368, "grad_norm": 26.75, "learning_rate": 7.877241298772825e-06, "loss": 0.4089, "step": 1639 }, { "epoch": 0.8062930186823992, "grad_norm": 14.75, "learning_rate": 7.875628851933634e-06, "loss": 0.4427, "step": 1640 }, { "epoch": 0.8067846607669616, "grad_norm": 18.5, "learning_rate": 7.874015413354805e-06, "loss": 0.294, "step": 1641 }, { "epoch": 0.8072763028515241, "grad_norm": 19.125, "learning_rate": 7.872400983510356e-06, "loss": 0.4955, "step": 1642 }, { "epoch": 0.8077679449360865, "grad_norm": 19.625, "learning_rate": 7.870785562874598e-06, "loss": 0.4939, "step": 1643 }, { "epoch": 0.8082595870206489, "grad_norm": 12.9375, "learning_rate": 7.869169151922131e-06, "loss": 0.2777, "step": 1644 }, { "epoch": 0.8087512291052114, "grad_norm": 19.875, "learning_rate": 7.867551751127848e-06, "loss": 0.2853, "step": 1645 }, { "epoch": 0.8092428711897739, "grad_norm": 20.75, "learning_rate": 7.865933360966933e-06, "loss": 0.498, "step": 1646 }, { "epoch": 0.8097345132743363, "grad_norm": 8.1875, "learning_rate": 7.864313981914857e-06, "loss": 0.2822, "step": 1647 }, { "epoch": 0.8102261553588987, "grad_norm": 24.125, "learning_rate": 7.862693614447384e-06, "loss": 0.5071, "step": 1648 }, { "epoch": 0.8107177974434612, "grad_norm": 14.25, "learning_rate": 7.861072259040571e-06, "loss": 0.4159, "step": 1649 }, { "epoch": 0.8112094395280236, "grad_norm": 33.0, "learning_rate": 7.85944991617076e-06, "loss": 0.4186, "step": 1650 }, { "epoch": 0.811701081612586, "grad_norm": 22.25, "learning_rate": 7.857826586314586e-06, "loss": 0.3642, "step": 1651 }, { "epoch": 0.8121927236971485, "grad_norm": 24.0, "learning_rate": 7.856202269948973e-06, "loss": 0.4697, "step": 1652 }, { "epoch": 0.8126843657817109, "grad_norm": 18.375, "learning_rate": 7.854576967551137e-06, "loss": 0.3343, "step": 1653 }, { "epoch": 0.8131760078662733, "grad_norm": 13.8125, "learning_rate": 7.852950679598582e-06, "loss": 0.2923, "step": 1654 }, { "epoch": 0.8136676499508357, "grad_norm": 17.25, "learning_rate": 7.851323406569101e-06, "loss": 0.3439, "step": 1655 }, { "epoch": 0.8141592920353983, "grad_norm": 13.9375, "learning_rate": 7.849695148940776e-06, "loss": 0.2994, "step": 1656 }, { "epoch": 0.8146509341199607, "grad_norm": 28.75, "learning_rate": 7.848065907191983e-06, "loss": 0.5085, "step": 1657 }, { "epoch": 0.8151425762045231, "grad_norm": 16.25, "learning_rate": 7.84643568180138e-06, "loss": 0.3502, "step": 1658 }, { "epoch": 0.8156342182890856, "grad_norm": 14.4375, "learning_rate": 7.84480447324792e-06, "loss": 0.5073, "step": 1659 }, { "epoch": 0.816125860373648, "grad_norm": 25.75, "learning_rate": 7.84317228201084e-06, "loss": 0.5343, "step": 1660 }, { "epoch": 0.8166175024582104, "grad_norm": 20.5, "learning_rate": 7.841539108569669e-06, "loss": 0.4346, "step": 1661 }, { "epoch": 0.8171091445427728, "grad_norm": 33.25, "learning_rate": 7.839904953404224e-06, "loss": 0.6269, "step": 1662 }, { "epoch": 0.8176007866273353, "grad_norm": 31.375, "learning_rate": 7.83826981699461e-06, "loss": 0.7597, "step": 1663 }, { "epoch": 0.8180924287118977, "grad_norm": 23.375, "learning_rate": 7.83663369982122e-06, "loss": 0.4775, "step": 1664 }, { "epoch": 0.8185840707964602, "grad_norm": 13.0, "learning_rate": 7.834996602364737e-06, "loss": 0.3104, "step": 1665 }, { "epoch": 0.8190757128810227, "grad_norm": 21.875, "learning_rate": 7.833358525106128e-06, "loss": 0.5464, "step": 1666 }, { "epoch": 0.8195673549655851, "grad_norm": 12.4375, "learning_rate": 7.831719468526651e-06, "loss": 0.225, "step": 1667 }, { "epoch": 0.8200589970501475, "grad_norm": 22.5, "learning_rate": 7.830079433107852e-06, "loss": 0.56, "step": 1668 }, { "epoch": 0.8205506391347099, "grad_norm": 20.75, "learning_rate": 7.828438419331563e-06, "loss": 0.3951, "step": 1669 }, { "epoch": 0.8210422812192724, "grad_norm": 8.5625, "learning_rate": 7.826796427679905e-06, "loss": 0.2906, "step": 1670 }, { "epoch": 0.8215339233038348, "grad_norm": 13.0625, "learning_rate": 7.825153458635284e-06, "loss": 0.3665, "step": 1671 }, { "epoch": 0.8220255653883972, "grad_norm": 12.9375, "learning_rate": 7.823509512680396e-06, "loss": 0.3535, "step": 1672 }, { "epoch": 0.8225172074729596, "grad_norm": 20.125, "learning_rate": 7.82186459029822e-06, "loss": 0.4125, "step": 1673 }, { "epoch": 0.8230088495575221, "grad_norm": 13.4375, "learning_rate": 7.820218691972027e-06, "loss": 0.3, "step": 1674 }, { "epoch": 0.8235004916420846, "grad_norm": 17.25, "learning_rate": 7.81857181818537e-06, "loss": 0.4054, "step": 1675 }, { "epoch": 0.823992133726647, "grad_norm": 17.5, "learning_rate": 7.816923969422094e-06, "loss": 0.5173, "step": 1676 }, { "epoch": 0.8244837758112095, "grad_norm": 27.125, "learning_rate": 7.815275146166324e-06, "loss": 0.4603, "step": 1677 }, { "epoch": 0.8249754178957719, "grad_norm": 19.625, "learning_rate": 7.813625348902474e-06, "loss": 0.4284, "step": 1678 }, { "epoch": 0.8254670599803343, "grad_norm": 13.375, "learning_rate": 7.811974578115248e-06, "loss": 0.2955, "step": 1679 }, { "epoch": 0.8259587020648967, "grad_norm": 11.25, "learning_rate": 7.810322834289629e-06, "loss": 0.3051, "step": 1680 }, { "epoch": 0.8264503441494592, "grad_norm": 24.875, "learning_rate": 7.808670117910893e-06, "loss": 0.4559, "step": 1681 }, { "epoch": 0.8269419862340216, "grad_norm": 17.0, "learning_rate": 7.807016429464595e-06, "loss": 0.2785, "step": 1682 }, { "epoch": 0.827433628318584, "grad_norm": 23.25, "learning_rate": 7.80536176943658e-06, "loss": 0.4003, "step": 1683 }, { "epoch": 0.8279252704031466, "grad_norm": 15.9375, "learning_rate": 7.803706138312978e-06, "loss": 0.3541, "step": 1684 }, { "epoch": 0.828416912487709, "grad_norm": 10.625, "learning_rate": 7.802049536580203e-06, "loss": 0.3609, "step": 1685 }, { "epoch": 0.8289085545722714, "grad_norm": 19.625, "learning_rate": 7.800391964724956e-06, "loss": 0.4636, "step": 1686 }, { "epoch": 0.8294001966568338, "grad_norm": 9.1875, "learning_rate": 7.798733423234218e-06, "loss": 0.2836, "step": 1687 }, { "epoch": 0.8298918387413963, "grad_norm": 16.75, "learning_rate": 7.797073912595262e-06, "loss": 0.2503, "step": 1688 }, { "epoch": 0.8303834808259587, "grad_norm": 21.75, "learning_rate": 7.795413433295642e-06, "loss": 0.4926, "step": 1689 }, { "epoch": 0.8308751229105211, "grad_norm": 38.75, "learning_rate": 7.793751985823196e-06, "loss": 0.6645, "step": 1690 }, { "epoch": 0.8313667649950836, "grad_norm": 22.625, "learning_rate": 7.792089570666049e-06, "loss": 0.415, "step": 1691 }, { "epoch": 0.831858407079646, "grad_norm": 36.0, "learning_rate": 7.790426188312606e-06, "loss": 0.5103, "step": 1692 }, { "epoch": 0.8323500491642084, "grad_norm": 12.4375, "learning_rate": 7.78876183925156e-06, "loss": 0.2604, "step": 1693 }, { "epoch": 0.8328416912487709, "grad_norm": 11.75, "learning_rate": 7.787096523971887e-06, "loss": 0.3832, "step": 1694 }, { "epoch": 0.8333333333333334, "grad_norm": 21.375, "learning_rate": 7.785430242962845e-06, "loss": 0.3729, "step": 1695 }, { "epoch": 0.8338249754178958, "grad_norm": 13.25, "learning_rate": 7.783762996713982e-06, "loss": 0.2568, "step": 1696 }, { "epoch": 0.8343166175024582, "grad_norm": 28.0, "learning_rate": 7.78209478571512e-06, "loss": 0.5093, "step": 1697 }, { "epoch": 0.8348082595870207, "grad_norm": 15.6875, "learning_rate": 7.780425610456373e-06, "loss": 0.3782, "step": 1698 }, { "epoch": 0.8352999016715831, "grad_norm": 23.0, "learning_rate": 7.778755471428131e-06, "loss": 0.5265, "step": 1699 }, { "epoch": 0.8357915437561455, "grad_norm": 24.5, "learning_rate": 7.777084369121077e-06, "loss": 0.3859, "step": 1700 }, { "epoch": 0.8362831858407079, "grad_norm": 9.875, "learning_rate": 7.775412304026164e-06, "loss": 0.4046, "step": 1701 }, { "epoch": 0.8367748279252704, "grad_norm": 17.875, "learning_rate": 7.773739276634638e-06, "loss": 0.2852, "step": 1702 }, { "epoch": 0.8372664700098329, "grad_norm": 19.375, "learning_rate": 7.772065287438027e-06, "loss": 0.3865, "step": 1703 }, { "epoch": 0.8377581120943953, "grad_norm": 17.75, "learning_rate": 7.770390336928135e-06, "loss": 0.4781, "step": 1704 }, { "epoch": 0.8382497541789578, "grad_norm": 16.75, "learning_rate": 7.768714425597053e-06, "loss": 0.2345, "step": 1705 }, { "epoch": 0.8387413962635202, "grad_norm": 24.75, "learning_rate": 7.767037553937155e-06, "loss": 0.4259, "step": 1706 }, { "epoch": 0.8392330383480826, "grad_norm": 21.25, "learning_rate": 7.765359722441095e-06, "loss": 0.2761, "step": 1707 }, { "epoch": 0.839724680432645, "grad_norm": 18.625, "learning_rate": 7.763680931601811e-06, "loss": 0.4547, "step": 1708 }, { "epoch": 0.8402163225172075, "grad_norm": 12.5625, "learning_rate": 7.762001181912522e-06, "loss": 0.2322, "step": 1709 }, { "epoch": 0.8407079646017699, "grad_norm": 24.0, "learning_rate": 7.760320473866727e-06, "loss": 0.3885, "step": 1710 }, { "epoch": 0.8411996066863323, "grad_norm": 22.25, "learning_rate": 7.758638807958207e-06, "loss": 0.5185, "step": 1711 }, { "epoch": 0.8416912487708947, "grad_norm": 16.375, "learning_rate": 7.756956184681031e-06, "loss": 0.2781, "step": 1712 }, { "epoch": 0.8421828908554573, "grad_norm": 22.625, "learning_rate": 7.755272604529537e-06, "loss": 0.3519, "step": 1713 }, { "epoch": 0.8426745329400197, "grad_norm": 24.5, "learning_rate": 7.753588067998353e-06, "loss": 0.4806, "step": 1714 }, { "epoch": 0.8431661750245821, "grad_norm": 13.0, "learning_rate": 7.751902575582388e-06, "loss": 0.3273, "step": 1715 }, { "epoch": 0.8436578171091446, "grad_norm": 31.25, "learning_rate": 7.750216127776827e-06, "loss": 0.4906, "step": 1716 }, { "epoch": 0.844149459193707, "grad_norm": 28.125, "learning_rate": 7.74852872507714e-06, "loss": 0.4266, "step": 1717 }, { "epoch": 0.8446411012782694, "grad_norm": 12.4375, "learning_rate": 7.746840367979074e-06, "loss": 0.4007, "step": 1718 }, { "epoch": 0.8451327433628318, "grad_norm": 17.75, "learning_rate": 7.745151056978659e-06, "loss": 0.2737, "step": 1719 }, { "epoch": 0.8456243854473943, "grad_norm": 26.625, "learning_rate": 7.743460792572207e-06, "loss": 0.4176, "step": 1720 }, { "epoch": 0.8461160275319567, "grad_norm": 10.5, "learning_rate": 7.741769575256305e-06, "loss": 0.352, "step": 1721 }, { "epoch": 0.8466076696165191, "grad_norm": 25.0, "learning_rate": 7.740077405527821e-06, "loss": 0.4991, "step": 1722 }, { "epoch": 0.8470993117010817, "grad_norm": 23.125, "learning_rate": 7.738384283883909e-06, "loss": 0.5209, "step": 1723 }, { "epoch": 0.8475909537856441, "grad_norm": 31.875, "learning_rate": 7.736690210821994e-06, "loss": 0.3901, "step": 1724 }, { "epoch": 0.8480825958702065, "grad_norm": 24.875, "learning_rate": 7.734995186839785e-06, "loss": 0.5856, "step": 1725 }, { "epoch": 0.8485742379547689, "grad_norm": 14.8125, "learning_rate": 7.733299212435274e-06, "loss": 0.3086, "step": 1726 }, { "epoch": 0.8490658800393314, "grad_norm": 16.25, "learning_rate": 7.731602288106722e-06, "loss": 0.2789, "step": 1727 }, { "epoch": 0.8495575221238938, "grad_norm": 8.0625, "learning_rate": 7.72990441435268e-06, "loss": 0.2866, "step": 1728 }, { "epoch": 0.8500491642084562, "grad_norm": 14.0625, "learning_rate": 7.728205591671972e-06, "loss": 0.3158, "step": 1729 }, { "epoch": 0.8505408062930186, "grad_norm": 19.75, "learning_rate": 7.726505820563701e-06, "loss": 0.4402, "step": 1730 }, { "epoch": 0.8510324483775811, "grad_norm": 33.5, "learning_rate": 7.72480510152725e-06, "loss": 0.5274, "step": 1731 }, { "epoch": 0.8515240904621436, "grad_norm": 24.25, "learning_rate": 7.72310343506228e-06, "loss": 0.4829, "step": 1732 }, { "epoch": 0.852015732546706, "grad_norm": 23.875, "learning_rate": 7.721400821668734e-06, "loss": 0.3449, "step": 1733 }, { "epoch": 0.8525073746312685, "grad_norm": 11.3125, "learning_rate": 7.719697261846824e-06, "loss": 0.2043, "step": 1734 }, { "epoch": 0.8529990167158309, "grad_norm": 36.0, "learning_rate": 7.717992756097048e-06, "loss": 0.5736, "step": 1735 }, { "epoch": 0.8534906588003933, "grad_norm": 15.4375, "learning_rate": 7.71628730492018e-06, "loss": 0.3045, "step": 1736 }, { "epoch": 0.8539823008849557, "grad_norm": 29.0, "learning_rate": 7.714580908817271e-06, "loss": 0.438, "step": 1737 }, { "epoch": 0.8544739429695182, "grad_norm": 35.25, "learning_rate": 7.71287356828965e-06, "loss": 0.4402, "step": 1738 }, { "epoch": 0.8549655850540806, "grad_norm": 24.75, "learning_rate": 7.711165283838924e-06, "loss": 0.5747, "step": 1739 }, { "epoch": 0.855457227138643, "grad_norm": 16.75, "learning_rate": 7.709456055966976e-06, "loss": 0.4076, "step": 1740 }, { "epoch": 0.8559488692232055, "grad_norm": 19.25, "learning_rate": 7.70774588517597e-06, "loss": 0.64, "step": 1741 }, { "epoch": 0.856440511307768, "grad_norm": 7.9375, "learning_rate": 7.706034771968339e-06, "loss": 0.2863, "step": 1742 }, { "epoch": 0.8569321533923304, "grad_norm": 15.0625, "learning_rate": 7.7043227168468e-06, "loss": 0.3238, "step": 1743 }, { "epoch": 0.8574237954768928, "grad_norm": 22.875, "learning_rate": 7.702609720314346e-06, "loss": 0.2431, "step": 1744 }, { "epoch": 0.8579154375614553, "grad_norm": 15.8125, "learning_rate": 7.700895782874244e-06, "loss": 0.4159, "step": 1745 }, { "epoch": 0.8584070796460177, "grad_norm": 24.875, "learning_rate": 7.699180905030038e-06, "loss": 0.5102, "step": 1746 }, { "epoch": 0.8588987217305801, "grad_norm": 25.0, "learning_rate": 7.69746508728555e-06, "loss": 0.5477, "step": 1747 }, { "epoch": 0.8593903638151426, "grad_norm": 22.625, "learning_rate": 7.695748330144877e-06, "loss": 0.2834, "step": 1748 }, { "epoch": 0.859882005899705, "grad_norm": 21.25, "learning_rate": 7.69403063411239e-06, "loss": 0.2192, "step": 1749 }, { "epoch": 0.8603736479842674, "grad_norm": 23.625, "learning_rate": 7.692311999692741e-06, "loss": 0.4154, "step": 1750 }, { "epoch": 0.86086529006883, "grad_norm": 13.5, "learning_rate": 7.690592427390853e-06, "loss": 0.2526, "step": 1751 }, { "epoch": 0.8613569321533924, "grad_norm": 18.5, "learning_rate": 7.688871917711925e-06, "loss": 0.2611, "step": 1752 }, { "epoch": 0.8618485742379548, "grad_norm": 15.0, "learning_rate": 7.687150471161435e-06, "loss": 0.3702, "step": 1753 }, { "epoch": 0.8623402163225172, "grad_norm": 18.125, "learning_rate": 7.685428088245132e-06, "loss": 0.3912, "step": 1754 }, { "epoch": 0.8628318584070797, "grad_norm": 24.125, "learning_rate": 7.68370476946904e-06, "loss": 0.5223, "step": 1755 }, { "epoch": 0.8633235004916421, "grad_norm": 24.875, "learning_rate": 7.681980515339464e-06, "loss": 0.4053, "step": 1756 }, { "epoch": 0.8638151425762045, "grad_norm": 8.9375, "learning_rate": 7.680255326362977e-06, "loss": 0.4338, "step": 1757 }, { "epoch": 0.8643067846607669, "grad_norm": 16.125, "learning_rate": 7.67852920304643e-06, "loss": 0.3497, "step": 1758 }, { "epoch": 0.8647984267453294, "grad_norm": 19.375, "learning_rate": 7.676802145896947e-06, "loss": 0.4326, "step": 1759 }, { "epoch": 0.8652900688298918, "grad_norm": 12.875, "learning_rate": 7.675074155421927e-06, "loss": 0.3503, "step": 1760 }, { "epoch": 0.8657817109144543, "grad_norm": 14.4375, "learning_rate": 7.673345232129047e-06, "loss": 0.3127, "step": 1761 }, { "epoch": 0.8662733529990168, "grad_norm": 6.84375, "learning_rate": 7.671615376526247e-06, "loss": 0.2664, "step": 1762 }, { "epoch": 0.8667649950835792, "grad_norm": 11.3125, "learning_rate": 7.669884589121756e-06, "loss": 0.3444, "step": 1763 }, { "epoch": 0.8672566371681416, "grad_norm": 16.75, "learning_rate": 7.668152870424065e-06, "loss": 0.4228, "step": 1764 }, { "epoch": 0.867748279252704, "grad_norm": 17.25, "learning_rate": 7.666420220941942e-06, "loss": 0.3695, "step": 1765 }, { "epoch": 0.8682399213372665, "grad_norm": 11.0, "learning_rate": 7.66468664118443e-06, "loss": 0.2073, "step": 1766 }, { "epoch": 0.8687315634218289, "grad_norm": 12.5625, "learning_rate": 7.662952131660846e-06, "loss": 0.232, "step": 1767 }, { "epoch": 0.8692232055063913, "grad_norm": 11.0625, "learning_rate": 7.661216692880777e-06, "loss": 0.1792, "step": 1768 }, { "epoch": 0.8697148475909537, "grad_norm": 24.125, "learning_rate": 7.659480325354083e-06, "loss": 0.2766, "step": 1769 }, { "epoch": 0.8702064896755162, "grad_norm": 16.625, "learning_rate": 7.657743029590902e-06, "loss": 0.3468, "step": 1770 }, { "epoch": 0.8706981317600787, "grad_norm": 16.5, "learning_rate": 7.656004806101639e-06, "loss": 0.2929, "step": 1771 }, { "epoch": 0.8711897738446411, "grad_norm": 12.6875, "learning_rate": 7.654265655396974e-06, "loss": 0.3496, "step": 1772 }, { "epoch": 0.8716814159292036, "grad_norm": 12.6875, "learning_rate": 7.65252557798786e-06, "loss": 0.42, "step": 1773 }, { "epoch": 0.872173058013766, "grad_norm": 24.0, "learning_rate": 7.65078457438552e-06, "loss": 0.2482, "step": 1774 }, { "epoch": 0.8726647000983284, "grad_norm": 39.25, "learning_rate": 7.64904264510145e-06, "loss": 0.5266, "step": 1775 }, { "epoch": 0.8731563421828908, "grad_norm": 34.0, "learning_rate": 7.64729979064742e-06, "loss": 0.635, "step": 1776 }, { "epoch": 0.8736479842674533, "grad_norm": 17.5, "learning_rate": 7.645556011535469e-06, "loss": 0.4666, "step": 1777 }, { "epoch": 0.8741396263520157, "grad_norm": 18.5, "learning_rate": 7.643811308277912e-06, "loss": 0.2716, "step": 1778 }, { "epoch": 0.8746312684365781, "grad_norm": 10.625, "learning_rate": 7.642065681387328e-06, "loss": 0.2675, "step": 1779 }, { "epoch": 0.8751229105211407, "grad_norm": 19.25, "learning_rate": 7.640319131376574e-06, "loss": 0.4001, "step": 1780 }, { "epoch": 0.8756145526057031, "grad_norm": 25.125, "learning_rate": 7.638571658758776e-06, "loss": 0.4739, "step": 1781 }, { "epoch": 0.8761061946902655, "grad_norm": 17.625, "learning_rate": 7.636823264047333e-06, "loss": 0.3783, "step": 1782 }, { "epoch": 0.8765978367748279, "grad_norm": 14.625, "learning_rate": 7.635073947755909e-06, "loss": 0.2366, "step": 1783 }, { "epoch": 0.8770894788593904, "grad_norm": 23.25, "learning_rate": 7.633323710398444e-06, "loss": 0.3973, "step": 1784 }, { "epoch": 0.8775811209439528, "grad_norm": 23.125, "learning_rate": 7.631572552489147e-06, "loss": 0.2852, "step": 1785 }, { "epoch": 0.8780727630285152, "grad_norm": 22.125, "learning_rate": 7.629820474542501e-06, "loss": 0.5062, "step": 1786 }, { "epoch": 0.8785644051130777, "grad_norm": 49.25, "learning_rate": 7.628067477073253e-06, "loss": 0.5774, "step": 1787 }, { "epoch": 0.8790560471976401, "grad_norm": 16.375, "learning_rate": 7.626313560596423e-06, "loss": 0.3648, "step": 1788 }, { "epoch": 0.8795476892822025, "grad_norm": 21.75, "learning_rate": 7.624558725627303e-06, "loss": 0.3596, "step": 1789 }, { "epoch": 0.880039331366765, "grad_norm": 13.5, "learning_rate": 7.622802972681452e-06, "loss": 0.22, "step": 1790 }, { "epoch": 0.8805309734513275, "grad_norm": 17.625, "learning_rate": 7.621046302274698e-06, "loss": 0.3571, "step": 1791 }, { "epoch": 0.8810226155358899, "grad_norm": 19.375, "learning_rate": 7.6192887149231435e-06, "loss": 0.3932, "step": 1792 }, { "epoch": 0.8815142576204523, "grad_norm": 23.75, "learning_rate": 7.617530211143156e-06, "loss": 0.4912, "step": 1793 }, { "epoch": 0.8820058997050148, "grad_norm": 23.5, "learning_rate": 7.615770791451373e-06, "loss": 0.3419, "step": 1794 }, { "epoch": 0.8824975417895772, "grad_norm": 27.625, "learning_rate": 7.614010456364701e-06, "loss": 0.3314, "step": 1795 }, { "epoch": 0.8829891838741396, "grad_norm": 15.25, "learning_rate": 7.6122492064003185e-06, "loss": 0.2846, "step": 1796 }, { "epoch": 0.883480825958702, "grad_norm": 11.8125, "learning_rate": 7.6104870420756684e-06, "loss": 0.1826, "step": 1797 }, { "epoch": 0.8839724680432645, "grad_norm": 22.625, "learning_rate": 7.608723963908463e-06, "loss": 0.4218, "step": 1798 }, { "epoch": 0.884464110127827, "grad_norm": 15.0625, "learning_rate": 7.606959972416685e-06, "loss": 0.3091, "step": 1799 }, { "epoch": 0.8849557522123894, "grad_norm": 18.5, "learning_rate": 7.605195068118586e-06, "loss": 0.3275, "step": 1800 }, { "epoch": 0.8854473942969519, "grad_norm": 9.8125, "learning_rate": 7.603429251532683e-06, "loss": 0.1747, "step": 1801 }, { "epoch": 0.8859390363815143, "grad_norm": 23.0, "learning_rate": 7.601662523177762e-06, "loss": 0.3023, "step": 1802 }, { "epoch": 0.8864306784660767, "grad_norm": 31.5, "learning_rate": 7.599894883572879e-06, "loss": 0.6251, "step": 1803 }, { "epoch": 0.8869223205506391, "grad_norm": 16.25, "learning_rate": 7.598126333237354e-06, "loss": 0.1898, "step": 1804 }, { "epoch": 0.8874139626352016, "grad_norm": 20.875, "learning_rate": 7.5963568726907775e-06, "loss": 0.2821, "step": 1805 }, { "epoch": 0.887905604719764, "grad_norm": 18.25, "learning_rate": 7.594586502453006e-06, "loss": 0.3759, "step": 1806 }, { "epoch": 0.8883972468043264, "grad_norm": 9.25, "learning_rate": 7.592815223044165e-06, "loss": 0.2272, "step": 1807 }, { "epoch": 0.8888888888888888, "grad_norm": 33.75, "learning_rate": 7.591043034984646e-06, "loss": 0.4408, "step": 1808 }, { "epoch": 0.8893805309734514, "grad_norm": 13.0, "learning_rate": 7.589269938795103e-06, "loss": 0.3119, "step": 1809 }, { "epoch": 0.8898721730580138, "grad_norm": 26.375, "learning_rate": 7.587495934996468e-06, "loss": 0.4472, "step": 1810 }, { "epoch": 0.8903638151425762, "grad_norm": 36.75, "learning_rate": 7.585721024109927e-06, "loss": 0.595, "step": 1811 }, { "epoch": 0.8908554572271387, "grad_norm": 16.875, "learning_rate": 7.583945206656941e-06, "loss": 0.2443, "step": 1812 }, { "epoch": 0.8913470993117011, "grad_norm": 33.75, "learning_rate": 7.582168483159233e-06, "loss": 0.3891, "step": 1813 }, { "epoch": 0.8918387413962635, "grad_norm": 31.375, "learning_rate": 7.580390854138795e-06, "loss": 0.4595, "step": 1814 }, { "epoch": 0.8923303834808259, "grad_norm": 24.5, "learning_rate": 7.5786123201178845e-06, "loss": 0.4498, "step": 1815 }, { "epoch": 0.8928220255653884, "grad_norm": 15.8125, "learning_rate": 7.576832881619023e-06, "loss": 0.3197, "step": 1816 }, { "epoch": 0.8933136676499508, "grad_norm": 39.0, "learning_rate": 7.575052539164998e-06, "loss": 0.2239, "step": 1817 }, { "epoch": 0.8938053097345132, "grad_norm": 26.375, "learning_rate": 7.573271293278866e-06, "loss": 0.4538, "step": 1818 }, { "epoch": 0.8942969518190758, "grad_norm": 32.75, "learning_rate": 7.5714891444839445e-06, "loss": 0.4536, "step": 1819 }, { "epoch": 0.8947885939036382, "grad_norm": 56.25, "learning_rate": 7.569706093303818e-06, "loss": 0.5442, "step": 1820 }, { "epoch": 0.8952802359882006, "grad_norm": 19.875, "learning_rate": 7.567922140262337e-06, "loss": 0.3319, "step": 1821 }, { "epoch": 0.895771878072763, "grad_norm": 35.75, "learning_rate": 7.566137285883618e-06, "loss": 0.3016, "step": 1822 }, { "epoch": 0.8962635201573255, "grad_norm": 19.125, "learning_rate": 7.564351530692036e-06, "loss": 0.4386, "step": 1823 }, { "epoch": 0.8967551622418879, "grad_norm": 29.375, "learning_rate": 7.562564875212242e-06, "loss": 0.4278, "step": 1824 }, { "epoch": 0.8972468043264503, "grad_norm": 31.125, "learning_rate": 7.560777319969137e-06, "loss": 0.2311, "step": 1825 }, { "epoch": 0.8977384464110127, "grad_norm": 30.5, "learning_rate": 7.5589888654879e-06, "loss": 0.4849, "step": 1826 }, { "epoch": 0.8982300884955752, "grad_norm": 21.0, "learning_rate": 7.557199512293967e-06, "loss": 0.4398, "step": 1827 }, { "epoch": 0.8987217305801377, "grad_norm": 40.25, "learning_rate": 7.555409260913036e-06, "loss": 0.4772, "step": 1828 }, { "epoch": 0.8992133726647001, "grad_norm": 14.75, "learning_rate": 7.553618111871077e-06, "loss": 0.2803, "step": 1829 }, { "epoch": 0.8997050147492626, "grad_norm": 17.5, "learning_rate": 7.551826065694316e-06, "loss": 0.3473, "step": 1830 }, { "epoch": 0.900196656833825, "grad_norm": 19.25, "learning_rate": 7.550033122909244e-06, "loss": 0.4332, "step": 1831 }, { "epoch": 0.9006882989183874, "grad_norm": 10.1875, "learning_rate": 7.5482392840426225e-06, "loss": 0.2889, "step": 1832 }, { "epoch": 0.9011799410029498, "grad_norm": 26.5, "learning_rate": 7.5464445496214655e-06, "loss": 0.3809, "step": 1833 }, { "epoch": 0.9016715830875123, "grad_norm": 19.625, "learning_rate": 7.544648920173057e-06, "loss": 0.3762, "step": 1834 }, { "epoch": 0.9021632251720747, "grad_norm": 28.0, "learning_rate": 7.5428523962249405e-06, "loss": 0.4455, "step": 1835 }, { "epoch": 0.9026548672566371, "grad_norm": 15.375, "learning_rate": 7.541054978304928e-06, "loss": 0.2551, "step": 1836 }, { "epoch": 0.9031465093411996, "grad_norm": 20.875, "learning_rate": 7.539256666941087e-06, "loss": 0.1716, "step": 1837 }, { "epoch": 0.9036381514257621, "grad_norm": 37.5, "learning_rate": 7.537457462661752e-06, "loss": 0.6287, "step": 1838 }, { "epoch": 0.9041297935103245, "grad_norm": 19.125, "learning_rate": 7.535657365995517e-06, "loss": 0.2015, "step": 1839 }, { "epoch": 0.904621435594887, "grad_norm": 10.25, "learning_rate": 7.533856377471241e-06, "loss": 0.3132, "step": 1840 }, { "epoch": 0.9051130776794494, "grad_norm": 10.375, "learning_rate": 7.532054497618045e-06, "loss": 0.3767, "step": 1841 }, { "epoch": 0.9056047197640118, "grad_norm": 15.4375, "learning_rate": 7.530251726965308e-06, "loss": 0.4987, "step": 1842 }, { "epoch": 0.9060963618485742, "grad_norm": 11.5625, "learning_rate": 7.528448066042673e-06, "loss": 0.3811, "step": 1843 }, { "epoch": 0.9065880039331367, "grad_norm": 29.375, "learning_rate": 7.526643515380047e-06, "loss": 0.5565, "step": 1844 }, { "epoch": 0.9070796460176991, "grad_norm": 11.5625, "learning_rate": 7.524838075507595e-06, "loss": 0.2899, "step": 1845 }, { "epoch": 0.9075712881022615, "grad_norm": 13.0, "learning_rate": 7.523031746955747e-06, "loss": 0.3494, "step": 1846 }, { "epoch": 0.908062930186824, "grad_norm": 39.5, "learning_rate": 7.5212245302551855e-06, "loss": 0.581, "step": 1847 }, { "epoch": 0.9085545722713865, "grad_norm": 14.75, "learning_rate": 7.519416425936865e-06, "loss": 0.2821, "step": 1848 }, { "epoch": 0.9090462143559489, "grad_norm": 34.75, "learning_rate": 7.517607434531997e-06, "loss": 0.3136, "step": 1849 }, { "epoch": 0.9095378564405113, "grad_norm": 39.5, "learning_rate": 7.515797556572047e-06, "loss": 0.5538, "step": 1850 }, { "epoch": 0.9100294985250738, "grad_norm": 6.78125, "learning_rate": 7.5139867925887505e-06, "loss": 0.1987, "step": 1851 }, { "epoch": 0.9105211406096362, "grad_norm": 16.125, "learning_rate": 7.512175143114099e-06, "loss": 0.3574, "step": 1852 }, { "epoch": 0.9110127826941986, "grad_norm": 11.875, "learning_rate": 7.510362608680342e-06, "loss": 0.3865, "step": 1853 }, { "epoch": 0.911504424778761, "grad_norm": 23.25, "learning_rate": 7.508549189819993e-06, "loss": 0.5254, "step": 1854 }, { "epoch": 0.9119960668633235, "grad_norm": 6.90625, "learning_rate": 7.5067348870658234e-06, "loss": 0.1687, "step": 1855 }, { "epoch": 0.9124877089478859, "grad_norm": 30.75, "learning_rate": 7.504919700950865e-06, "loss": 0.3504, "step": 1856 }, { "epoch": 0.9129793510324484, "grad_norm": 12.25, "learning_rate": 7.503103632008407e-06, "loss": 0.3695, "step": 1857 }, { "epoch": 0.9134709931170109, "grad_norm": 12.5625, "learning_rate": 7.501286680772001e-06, "loss": 0.3511, "step": 1858 }, { "epoch": 0.9139626352015733, "grad_norm": 12.8125, "learning_rate": 7.499468847775456e-06, "loss": 0.3045, "step": 1859 }, { "epoch": 0.9144542772861357, "grad_norm": 25.125, "learning_rate": 7.497650133552841e-06, "loss": 0.4903, "step": 1860 }, { "epoch": 0.9149459193706981, "grad_norm": 25.25, "learning_rate": 7.495830538638482e-06, "loss": 0.5521, "step": 1861 }, { "epoch": 0.9154375614552606, "grad_norm": 13.9375, "learning_rate": 7.494010063566968e-06, "loss": 0.4575, "step": 1862 }, { "epoch": 0.915929203539823, "grad_norm": 24.875, "learning_rate": 7.492188708873141e-06, "loss": 0.4322, "step": 1863 }, { "epoch": 0.9164208456243854, "grad_norm": 13.25, "learning_rate": 7.4903664750921055e-06, "loss": 0.311, "step": 1864 }, { "epoch": 0.9169124877089478, "grad_norm": 10.625, "learning_rate": 7.488543362759222e-06, "loss": 0.2651, "step": 1865 }, { "epoch": 0.9174041297935103, "grad_norm": 11.625, "learning_rate": 7.4867193724101115e-06, "loss": 0.2538, "step": 1866 }, { "epoch": 0.9178957718780728, "grad_norm": 14.1875, "learning_rate": 7.48489450458065e-06, "loss": 0.2597, "step": 1867 }, { "epoch": 0.9183874139626352, "grad_norm": 14.8125, "learning_rate": 7.483068759806974e-06, "loss": 0.3948, "step": 1868 }, { "epoch": 0.9188790560471977, "grad_norm": 19.875, "learning_rate": 7.4812421386254735e-06, "loss": 0.3644, "step": 1869 }, { "epoch": 0.9193706981317601, "grad_norm": 23.375, "learning_rate": 7.479414641572803e-06, "loss": 0.4416, "step": 1870 }, { "epoch": 0.9198623402163225, "grad_norm": 12.625, "learning_rate": 7.477586269185868e-06, "loss": 0.4438, "step": 1871 }, { "epoch": 0.9203539823008849, "grad_norm": 25.875, "learning_rate": 7.475757022001833e-06, "loss": 0.4544, "step": 1872 }, { "epoch": 0.9208456243854474, "grad_norm": 30.75, "learning_rate": 7.473926900558121e-06, "loss": 0.5885, "step": 1873 }, { "epoch": 0.9213372664700098, "grad_norm": 17.0, "learning_rate": 7.472095905392412e-06, "loss": 0.2632, "step": 1874 }, { "epoch": 0.9218289085545722, "grad_norm": 15.625, "learning_rate": 7.470264037042639e-06, "loss": 0.3865, "step": 1875 }, { "epoch": 0.9223205506391348, "grad_norm": 14.625, "learning_rate": 7.4684312960469955e-06, "loss": 0.4463, "step": 1876 }, { "epoch": 0.9228121927236972, "grad_norm": 23.625, "learning_rate": 7.46659768294393e-06, "loss": 0.388, "step": 1877 }, { "epoch": 0.9233038348082596, "grad_norm": 23.0, "learning_rate": 7.464763198272144e-06, "loss": 0.296, "step": 1878 }, { "epoch": 0.923795476892822, "grad_norm": 10.9375, "learning_rate": 7.462927842570602e-06, "loss": 0.2921, "step": 1879 }, { "epoch": 0.9242871189773845, "grad_norm": 15.0, "learning_rate": 7.461091616378519e-06, "loss": 0.3839, "step": 1880 }, { "epoch": 0.9247787610619469, "grad_norm": 6.9375, "learning_rate": 7.4592545202353685e-06, "loss": 0.1482, "step": 1881 }, { "epoch": 0.9252704031465093, "grad_norm": 24.375, "learning_rate": 7.457416554680876e-06, "loss": 0.537, "step": 1882 }, { "epoch": 0.9257620452310718, "grad_norm": 23.625, "learning_rate": 7.455577720255027e-06, "loss": 0.4851, "step": 1883 }, { "epoch": 0.9262536873156342, "grad_norm": 20.875, "learning_rate": 7.453738017498059e-06, "loss": 0.5862, "step": 1884 }, { "epoch": 0.9267453294001966, "grad_norm": 12.4375, "learning_rate": 7.451897446950467e-06, "loss": 0.3026, "step": 1885 }, { "epoch": 0.9272369714847591, "grad_norm": 12.5625, "learning_rate": 7.450056009152998e-06, "loss": 0.2382, "step": 1886 }, { "epoch": 0.9277286135693216, "grad_norm": 12.0, "learning_rate": 7.448213704646655e-06, "loss": 0.298, "step": 1887 }, { "epoch": 0.928220255653884, "grad_norm": 14.0625, "learning_rate": 7.446370533972699e-06, "loss": 0.3124, "step": 1888 }, { "epoch": 0.9287118977384464, "grad_norm": 20.375, "learning_rate": 7.444526497672641e-06, "loss": 0.3875, "step": 1889 }, { "epoch": 0.9292035398230089, "grad_norm": 15.3125, "learning_rate": 7.442681596288247e-06, "loss": 0.3903, "step": 1890 }, { "epoch": 0.9296951819075713, "grad_norm": 10.75, "learning_rate": 7.440835830361537e-06, "loss": 0.2756, "step": 1891 }, { "epoch": 0.9301868239921337, "grad_norm": 12.75, "learning_rate": 7.43898920043479e-06, "loss": 0.471, "step": 1892 }, { "epoch": 0.9306784660766961, "grad_norm": 21.625, "learning_rate": 7.437141707050532e-06, "loss": 0.5566, "step": 1893 }, { "epoch": 0.9311701081612586, "grad_norm": 20.125, "learning_rate": 7.435293350751546e-06, "loss": 0.336, "step": 1894 }, { "epoch": 0.9316617502458211, "grad_norm": 19.75, "learning_rate": 7.433444132080867e-06, "loss": 0.4671, "step": 1895 }, { "epoch": 0.9321533923303835, "grad_norm": 13.5, "learning_rate": 7.431594051581787e-06, "loss": 0.2678, "step": 1896 }, { "epoch": 0.932645034414946, "grad_norm": 26.25, "learning_rate": 7.429743109797847e-06, "loss": 0.6296, "step": 1897 }, { "epoch": 0.9331366764995084, "grad_norm": 13.8125, "learning_rate": 7.427891307272842e-06, "loss": 0.2987, "step": 1898 }, { "epoch": 0.9336283185840708, "grad_norm": 13.0625, "learning_rate": 7.4260386445508235e-06, "loss": 0.2813, "step": 1899 }, { "epoch": 0.9341199606686332, "grad_norm": 25.0, "learning_rate": 7.42418512217609e-06, "loss": 0.4563, "step": 1900 }, { "epoch": 0.9346116027531957, "grad_norm": 16.375, "learning_rate": 7.422330740693195e-06, "loss": 0.328, "step": 1901 }, { "epoch": 0.9351032448377581, "grad_norm": 28.375, "learning_rate": 7.420475500646946e-06, "loss": 0.327, "step": 1902 }, { "epoch": 0.9355948869223205, "grad_norm": 22.875, "learning_rate": 7.418619402582402e-06, "loss": 0.5573, "step": 1903 }, { "epoch": 0.9360865290068829, "grad_norm": 19.125, "learning_rate": 7.416762447044872e-06, "loss": 0.3454, "step": 1904 }, { "epoch": 0.9365781710914455, "grad_norm": 7.9375, "learning_rate": 7.414904634579919e-06, "loss": 0.2066, "step": 1905 }, { "epoch": 0.9370698131760079, "grad_norm": 26.875, "learning_rate": 7.413045965733358e-06, "loss": 0.4736, "step": 1906 }, { "epoch": 0.9375614552605703, "grad_norm": 21.875, "learning_rate": 7.411186441051255e-06, "loss": 0.4143, "step": 1907 }, { "epoch": 0.9380530973451328, "grad_norm": 10.875, "learning_rate": 7.409326061079927e-06, "loss": 0.342, "step": 1908 }, { "epoch": 0.9385447394296952, "grad_norm": 18.25, "learning_rate": 7.407464826365941e-06, "loss": 0.3752, "step": 1909 }, { "epoch": 0.9390363815142576, "grad_norm": 9.0, "learning_rate": 7.405602737456119e-06, "loss": 0.2868, "step": 1910 }, { "epoch": 0.93952802359882, "grad_norm": 16.25, "learning_rate": 7.403739794897528e-06, "loss": 0.2828, "step": 1911 }, { "epoch": 0.9400196656833825, "grad_norm": 16.125, "learning_rate": 7.401875999237494e-06, "loss": 0.3885, "step": 1912 }, { "epoch": 0.9405113077679449, "grad_norm": 37.5, "learning_rate": 7.400011351023585e-06, "loss": 0.5371, "step": 1913 }, { "epoch": 0.9410029498525073, "grad_norm": 12.25, "learning_rate": 7.398145850803627e-06, "loss": 0.3418, "step": 1914 }, { "epoch": 0.9414945919370699, "grad_norm": 23.375, "learning_rate": 7.396279499125691e-06, "loss": 0.6158, "step": 1915 }, { "epoch": 0.9419862340216323, "grad_norm": 31.5, "learning_rate": 7.394412296538099e-06, "loss": 0.4658, "step": 1916 }, { "epoch": 0.9424778761061947, "grad_norm": 17.625, "learning_rate": 7.392544243589427e-06, "loss": 0.2756, "step": 1917 }, { "epoch": 0.9429695181907571, "grad_norm": 33.25, "learning_rate": 7.390675340828496e-06, "loss": 0.568, "step": 1918 }, { "epoch": 0.9434611602753196, "grad_norm": 23.75, "learning_rate": 7.388805588804378e-06, "loss": 0.444, "step": 1919 }, { "epoch": 0.943952802359882, "grad_norm": 14.0625, "learning_rate": 7.386934988066395e-06, "loss": 0.2605, "step": 1920 }, { "epoch": 0.9444444444444444, "grad_norm": 13.125, "learning_rate": 7.38506353916412e-06, "loss": 0.3823, "step": 1921 }, { "epoch": 0.9449360865290068, "grad_norm": 11.25, "learning_rate": 7.383191242647371e-06, "loss": 0.4055, "step": 1922 }, { "epoch": 0.9454277286135693, "grad_norm": 22.875, "learning_rate": 7.381318099066221e-06, "loss": 0.4282, "step": 1923 }, { "epoch": 0.9459193706981318, "grad_norm": 21.375, "learning_rate": 7.379444108970985e-06, "loss": 0.3594, "step": 1924 }, { "epoch": 0.9464110127826942, "grad_norm": 23.875, "learning_rate": 7.377569272912233e-06, "loss": 0.3314, "step": 1925 }, { "epoch": 0.9469026548672567, "grad_norm": 18.0, "learning_rate": 7.375693591440777e-06, "loss": 0.3278, "step": 1926 }, { "epoch": 0.9473942969518191, "grad_norm": 20.875, "learning_rate": 7.373817065107685e-06, "loss": 0.4722, "step": 1927 }, { "epoch": 0.9478859390363815, "grad_norm": 26.125, "learning_rate": 7.371939694464267e-06, "loss": 0.2372, "step": 1928 }, { "epoch": 0.948377581120944, "grad_norm": 25.0, "learning_rate": 7.370061480062084e-06, "loss": 0.4778, "step": 1929 }, { "epoch": 0.9488692232055064, "grad_norm": 22.125, "learning_rate": 7.368182422452943e-06, "loss": 0.5502, "step": 1930 }, { "epoch": 0.9493608652900688, "grad_norm": 15.5625, "learning_rate": 7.3663025221889025e-06, "loss": 0.3392, "step": 1931 }, { "epoch": 0.9498525073746312, "grad_norm": 14.75, "learning_rate": 7.3644217798222645e-06, "loss": 0.3694, "step": 1932 }, { "epoch": 0.9503441494591937, "grad_norm": 18.875, "learning_rate": 7.362540195905578e-06, "loss": 0.3429, "step": 1933 }, { "epoch": 0.9508357915437562, "grad_norm": 35.75, "learning_rate": 7.360657770991645e-06, "loss": 0.5605, "step": 1934 }, { "epoch": 0.9513274336283186, "grad_norm": 20.625, "learning_rate": 7.358774505633508e-06, "loss": 0.3698, "step": 1935 }, { "epoch": 0.951819075712881, "grad_norm": 20.0, "learning_rate": 7.356890400384459e-06, "loss": 0.2823, "step": 1936 }, { "epoch": 0.9523107177974435, "grad_norm": 33.75, "learning_rate": 7.355005455798038e-06, "loss": 0.5324, "step": 1937 }, { "epoch": 0.9528023598820059, "grad_norm": 18.625, "learning_rate": 7.353119672428031e-06, "loss": 0.3986, "step": 1938 }, { "epoch": 0.9532940019665683, "grad_norm": 20.125, "learning_rate": 7.351233050828468e-06, "loss": 0.4375, "step": 1939 }, { "epoch": 0.9537856440511308, "grad_norm": 24.0, "learning_rate": 7.34934559155363e-06, "loss": 0.4643, "step": 1940 }, { "epoch": 0.9542772861356932, "grad_norm": 23.5, "learning_rate": 7.347457295158037e-06, "loss": 0.3947, "step": 1941 }, { "epoch": 0.9547689282202556, "grad_norm": 26.0, "learning_rate": 7.345568162196463e-06, "loss": 0.5282, "step": 1942 }, { "epoch": 0.9552605703048181, "grad_norm": 29.75, "learning_rate": 7.343678193223924e-06, "loss": 0.5969, "step": 1943 }, { "epoch": 0.9557522123893806, "grad_norm": 14.3125, "learning_rate": 7.341787388795679e-06, "loss": 0.385, "step": 1944 }, { "epoch": 0.956243854473943, "grad_norm": 16.25, "learning_rate": 7.339895749467238e-06, "loss": 0.4239, "step": 1945 }, { "epoch": 0.9567354965585054, "grad_norm": 27.375, "learning_rate": 7.338003275794351e-06, "loss": 0.5013, "step": 1946 }, { "epoch": 0.9572271386430679, "grad_norm": 19.5, "learning_rate": 7.336109968333016e-06, "loss": 0.4176, "step": 1947 }, { "epoch": 0.9577187807276303, "grad_norm": 13.1875, "learning_rate": 7.334215827639477e-06, "loss": 0.2098, "step": 1948 }, { "epoch": 0.9582104228121927, "grad_norm": 21.625, "learning_rate": 7.3323208542702185e-06, "loss": 0.4169, "step": 1949 }, { "epoch": 0.9587020648967551, "grad_norm": 9.6875, "learning_rate": 7.330425048781974e-06, "loss": 0.2674, "step": 1950 }, { "epoch": 0.9591937069813176, "grad_norm": 19.375, "learning_rate": 7.3285284117317205e-06, "loss": 0.3521, "step": 1951 }, { "epoch": 0.95968534906588, "grad_norm": 23.625, "learning_rate": 7.326630943676679e-06, "loss": 0.3347, "step": 1952 }, { "epoch": 0.9601769911504425, "grad_norm": 17.875, "learning_rate": 7.324732645174311e-06, "loss": 0.2985, "step": 1953 }, { "epoch": 0.960668633235005, "grad_norm": 17.0, "learning_rate": 7.322833516782329e-06, "loss": 0.4148, "step": 1954 }, { "epoch": 0.9611602753195674, "grad_norm": 19.0, "learning_rate": 7.320933559058684e-06, "loss": 0.175, "step": 1955 }, { "epoch": 0.9616519174041298, "grad_norm": 35.25, "learning_rate": 7.319032772561572e-06, "loss": 0.4685, "step": 1956 }, { "epoch": 0.9621435594886922, "grad_norm": 18.625, "learning_rate": 7.317131157849433e-06, "loss": 0.4353, "step": 1957 }, { "epoch": 0.9626352015732547, "grad_norm": 16.25, "learning_rate": 7.315228715480951e-06, "loss": 0.3951, "step": 1958 }, { "epoch": 0.9631268436578171, "grad_norm": 20.75, "learning_rate": 7.31332544601505e-06, "loss": 0.266, "step": 1959 }, { "epoch": 0.9636184857423795, "grad_norm": 25.625, "learning_rate": 7.311421350010904e-06, "loss": 0.4759, "step": 1960 }, { "epoch": 0.9641101278269419, "grad_norm": 21.75, "learning_rate": 7.309516428027918e-06, "loss": 0.3158, "step": 1961 }, { "epoch": 0.9646017699115044, "grad_norm": 28.375, "learning_rate": 7.307610680625752e-06, "loss": 0.3032, "step": 1962 }, { "epoch": 0.9650934119960669, "grad_norm": 34.5, "learning_rate": 7.3057041083643015e-06, "loss": 0.2671, "step": 1963 }, { "epoch": 0.9655850540806293, "grad_norm": 9.625, "learning_rate": 7.303796711803706e-06, "loss": 0.1583, "step": 1964 }, { "epoch": 0.9660766961651918, "grad_norm": 19.75, "learning_rate": 7.301888491504348e-06, "loss": 0.2094, "step": 1965 }, { "epoch": 0.9665683382497542, "grad_norm": 25.375, "learning_rate": 7.29997944802685e-06, "loss": 0.473, "step": 1966 }, { "epoch": 0.9670599803343166, "grad_norm": 19.875, "learning_rate": 7.2980695819320794e-06, "loss": 0.4431, "step": 1967 }, { "epoch": 0.967551622418879, "grad_norm": 11.5, "learning_rate": 7.29615889378114e-06, "loss": 0.237, "step": 1968 }, { "epoch": 0.9680432645034415, "grad_norm": 19.75, "learning_rate": 7.294247384135386e-06, "loss": 0.2338, "step": 1969 }, { "epoch": 0.9685349065880039, "grad_norm": 22.0, "learning_rate": 7.292335053556404e-06, "loss": 0.4336, "step": 1970 }, { "epoch": 0.9690265486725663, "grad_norm": 50.0, "learning_rate": 7.290421902606024e-06, "loss": 0.6236, "step": 1971 }, { "epoch": 0.9695181907571289, "grad_norm": 36.25, "learning_rate": 7.288507931846321e-06, "loss": 0.3715, "step": 1972 }, { "epoch": 0.9700098328416913, "grad_norm": 22.375, "learning_rate": 7.286593141839609e-06, "loss": 0.3898, "step": 1973 }, { "epoch": 0.9705014749262537, "grad_norm": 19.875, "learning_rate": 7.284677533148439e-06, "loss": 0.3442, "step": 1974 }, { "epoch": 0.9709931170108161, "grad_norm": 17.0, "learning_rate": 7.2827611063356064e-06, "loss": 0.4611, "step": 1975 }, { "epoch": 0.9714847590953786, "grad_norm": 27.0, "learning_rate": 7.280843861964147e-06, "loss": 0.402, "step": 1976 }, { "epoch": 0.971976401179941, "grad_norm": 32.5, "learning_rate": 7.278925800597335e-06, "loss": 0.3996, "step": 1977 }, { "epoch": 0.9724680432645034, "grad_norm": 25.875, "learning_rate": 7.2770069227986854e-06, "loss": 0.288, "step": 1978 }, { "epoch": 0.9729596853490659, "grad_norm": 12.5625, "learning_rate": 7.275087229131952e-06, "loss": 0.3365, "step": 1979 }, { "epoch": 0.9734513274336283, "grad_norm": 25.375, "learning_rate": 7.2731667201611316e-06, "loss": 0.3879, "step": 1980 }, { "epoch": 0.9739429695181907, "grad_norm": 13.5, "learning_rate": 7.271245396450458e-06, "loss": 0.3038, "step": 1981 }, { "epoch": 0.9744346116027532, "grad_norm": 9.375, "learning_rate": 7.269323258564404e-06, "loss": 0.2026, "step": 1982 }, { "epoch": 0.9749262536873157, "grad_norm": 20.625, "learning_rate": 7.26740030706768e-06, "loss": 0.333, "step": 1983 }, { "epoch": 0.9754178957718781, "grad_norm": 13.1875, "learning_rate": 7.265476542525239e-06, "loss": 0.3093, "step": 1984 }, { "epoch": 0.9759095378564405, "grad_norm": 32.5, "learning_rate": 7.263551965502275e-06, "loss": 0.6507, "step": 1985 }, { "epoch": 0.976401179941003, "grad_norm": 31.0, "learning_rate": 7.261626576564214e-06, "loss": 0.2582, "step": 1986 }, { "epoch": 0.9768928220255654, "grad_norm": 16.625, "learning_rate": 7.259700376276724e-06, "loss": 0.453, "step": 1987 }, { "epoch": 0.9773844641101278, "grad_norm": 20.125, "learning_rate": 7.257773365205711e-06, "loss": 0.3544, "step": 1988 }, { "epoch": 0.9778761061946902, "grad_norm": 33.25, "learning_rate": 7.255845543917321e-06, "loss": 0.4926, "step": 1989 }, { "epoch": 0.9783677482792527, "grad_norm": 12.4375, "learning_rate": 7.253916912977934e-06, "loss": 0.2337, "step": 1990 }, { "epoch": 0.9788593903638152, "grad_norm": 17.0, "learning_rate": 7.251987472954173e-06, "loss": 0.3089, "step": 1991 }, { "epoch": 0.9793510324483776, "grad_norm": 29.25, "learning_rate": 7.250057224412894e-06, "loss": 0.4286, "step": 1992 }, { "epoch": 0.97984267453294, "grad_norm": 18.125, "learning_rate": 7.248126167921193e-06, "loss": 0.3739, "step": 1993 }, { "epoch": 0.9803343166175025, "grad_norm": 14.3125, "learning_rate": 7.246194304046403e-06, "loss": 0.2769, "step": 1994 }, { "epoch": 0.9808259587020649, "grad_norm": 27.5, "learning_rate": 7.244261633356093e-06, "loss": 0.5077, "step": 1995 }, { "epoch": 0.9813176007866273, "grad_norm": 20.625, "learning_rate": 7.242328156418073e-06, "loss": 0.3559, "step": 1996 }, { "epoch": 0.9818092428711898, "grad_norm": 21.875, "learning_rate": 7.240393873800384e-06, "loss": 0.3015, "step": 1997 }, { "epoch": 0.9823008849557522, "grad_norm": 20.5, "learning_rate": 7.238458786071309e-06, "loss": 0.2608, "step": 1998 }, { "epoch": 0.9827925270403146, "grad_norm": 25.5, "learning_rate": 7.236522893799363e-06, "loss": 0.2686, "step": 1999 }, { "epoch": 0.983284169124877, "grad_norm": 20.75, "learning_rate": 7.234586197553302e-06, "loss": 0.2316, "step": 2000 }, { "epoch": 0.983284169124877, "eval_loss": 0.3799794316291809, "eval_runtime": 66.3677, "eval_samples_per_second": 122.59, "eval_spearman": 0.5590411940377413, "eval_steps_per_second": 15.324, "step": 2000 }, { "epoch": 0.9837758112094396, "grad_norm": 40.0, "learning_rate": 7.232648697902113e-06, "loss": 0.4724, "step": 2001 }, { "epoch": 0.984267453294002, "grad_norm": 22.125, "learning_rate": 7.230710395415025e-06, "loss": 0.4167, "step": 2002 }, { "epoch": 0.9847590953785644, "grad_norm": 17.25, "learning_rate": 7.228771290661497e-06, "loss": 0.205, "step": 2003 }, { "epoch": 0.9852507374631269, "grad_norm": 34.25, "learning_rate": 7.226831384211228e-06, "loss": 0.5632, "step": 2004 }, { "epoch": 0.9857423795476893, "grad_norm": 19.875, "learning_rate": 7.22489067663415e-06, "loss": 0.3675, "step": 2005 }, { "epoch": 0.9862340216322517, "grad_norm": 15.1875, "learning_rate": 7.2229491685004325e-06, "loss": 0.2477, "step": 2006 }, { "epoch": 0.9867256637168141, "grad_norm": 32.75, "learning_rate": 7.221006860380478e-06, "loss": 0.4107, "step": 2007 }, { "epoch": 0.9872173058013766, "grad_norm": 8.75, "learning_rate": 7.219063752844925e-06, "loss": 0.1468, "step": 2008 }, { "epoch": 0.987708947885939, "grad_norm": 31.125, "learning_rate": 7.2171198464646485e-06, "loss": 0.8154, "step": 2009 }, { "epoch": 0.9882005899705014, "grad_norm": 31.375, "learning_rate": 7.215175141810755e-06, "loss": 0.4152, "step": 2010 }, { "epoch": 0.988692232055064, "grad_norm": 23.75, "learning_rate": 7.213229639454588e-06, "loss": 0.4562, "step": 2011 }, { "epoch": 0.9891838741396264, "grad_norm": 28.125, "learning_rate": 7.211283339967723e-06, "loss": 0.4017, "step": 2012 }, { "epoch": 0.9896755162241888, "grad_norm": 15.0625, "learning_rate": 7.209336243921971e-06, "loss": 0.3676, "step": 2013 }, { "epoch": 0.9901671583087512, "grad_norm": 29.75, "learning_rate": 7.207388351889381e-06, "loss": 0.5652, "step": 2014 }, { "epoch": 0.9906588003933137, "grad_norm": 29.5, "learning_rate": 7.205439664442229e-06, "loss": 0.4538, "step": 2015 }, { "epoch": 0.9911504424778761, "grad_norm": 29.375, "learning_rate": 7.203490182153027e-06, "loss": 0.4807, "step": 2016 }, { "epoch": 0.9916420845624385, "grad_norm": 13.1875, "learning_rate": 7.201539905594523e-06, "loss": 0.2434, "step": 2017 }, { "epoch": 0.992133726647001, "grad_norm": 21.75, "learning_rate": 7.199588835339697e-06, "loss": 0.377, "step": 2018 }, { "epoch": 0.9926253687315634, "grad_norm": 30.875, "learning_rate": 7.197636971961761e-06, "loss": 0.3767, "step": 2019 }, { "epoch": 0.9931170108161259, "grad_norm": 10.8125, "learning_rate": 7.195684316034161e-06, "loss": 0.3169, "step": 2020 }, { "epoch": 0.9936086529006883, "grad_norm": 16.125, "learning_rate": 7.193730868130575e-06, "loss": 0.3744, "step": 2021 }, { "epoch": 0.9941002949852508, "grad_norm": 23.875, "learning_rate": 7.191776628824913e-06, "loss": 0.4412, "step": 2022 }, { "epoch": 0.9945919370698132, "grad_norm": 16.375, "learning_rate": 7.189821598691323e-06, "loss": 0.3425, "step": 2023 }, { "epoch": 0.9950835791543756, "grad_norm": 38.0, "learning_rate": 7.187865778304178e-06, "loss": 0.4724, "step": 2024 }, { "epoch": 0.995575221238938, "grad_norm": 11.6875, "learning_rate": 7.1859091682380886e-06, "loss": 0.2763, "step": 2025 }, { "epoch": 0.9960668633235005, "grad_norm": 43.25, "learning_rate": 7.183951769067894e-06, "loss": 0.597, "step": 2026 }, { "epoch": 0.9965585054080629, "grad_norm": 18.75, "learning_rate": 7.181993581368664e-06, "loss": 0.3162, "step": 2027 }, { "epoch": 0.9970501474926253, "grad_norm": 14.625, "learning_rate": 7.180034605715706e-06, "loss": 0.2968, "step": 2028 }, { "epoch": 0.9975417895771878, "grad_norm": 22.125, "learning_rate": 7.178074842684554e-06, "loss": 0.2939, "step": 2029 }, { "epoch": 0.9980334316617503, "grad_norm": 26.75, "learning_rate": 7.1761142928509746e-06, "loss": 0.5059, "step": 2030 }, { "epoch": 0.9985250737463127, "grad_norm": 34.75, "learning_rate": 7.174152956790967e-06, "loss": 0.4563, "step": 2031 }, { "epoch": 0.9990167158308751, "grad_norm": 13.375, "learning_rate": 7.1721908350807566e-06, "loss": 0.2314, "step": 2032 }, { "epoch": 0.9995083579154376, "grad_norm": 13.625, "learning_rate": 7.170227928296808e-06, "loss": 0.2155, "step": 2033 }, { "epoch": 1.0, "grad_norm": 17.125, "learning_rate": 7.1682642370158076e-06, "loss": 0.3964, "step": 2034 }, { "epoch": 1.0004916420845624, "grad_norm": 15.75, "learning_rate": 7.166299761814677e-06, "loss": 0.2852, "step": 2035 }, { "epoch": 1.0009832841691249, "grad_norm": 31.25, "learning_rate": 7.164334503270569e-06, "loss": 0.4856, "step": 2036 }, { "epoch": 1.0014749262536873, "grad_norm": 22.25, "learning_rate": 7.162368461960864e-06, "loss": 0.2488, "step": 2037 }, { "epoch": 1.0019665683382497, "grad_norm": 14.4375, "learning_rate": 7.16040163846317e-06, "loss": 0.3613, "step": 2038 }, { "epoch": 1.0024582104228121, "grad_norm": 15.75, "learning_rate": 7.158434033355334e-06, "loss": 0.386, "step": 2039 }, { "epoch": 1.0029498525073746, "grad_norm": 9.5625, "learning_rate": 7.156465647215422e-06, "loss": 0.3142, "step": 2040 }, { "epoch": 1.003441494591937, "grad_norm": 12.0, "learning_rate": 7.154496480621736e-06, "loss": 0.3392, "step": 2041 }, { "epoch": 1.0039331366764994, "grad_norm": 17.0, "learning_rate": 7.152526534152805e-06, "loss": 0.2911, "step": 2042 }, { "epoch": 1.0044247787610618, "grad_norm": 14.3125, "learning_rate": 7.1505558083873875e-06, "loss": 0.3405, "step": 2043 }, { "epoch": 1.0049164208456245, "grad_norm": 17.875, "learning_rate": 7.148584303904472e-06, "loss": 0.2683, "step": 2044 }, { "epoch": 1.005408062930187, "grad_norm": 10.3125, "learning_rate": 7.146612021283272e-06, "loss": 0.325, "step": 2045 }, { "epoch": 1.0058997050147493, "grad_norm": 26.0, "learning_rate": 7.144638961103236e-06, "loss": 0.4512, "step": 2046 }, { "epoch": 1.0063913470993118, "grad_norm": 19.875, "learning_rate": 7.142665123944036e-06, "loss": 0.3975, "step": 2047 }, { "epoch": 1.0068829891838742, "grad_norm": 25.375, "learning_rate": 7.140690510385572e-06, "loss": 0.2642, "step": 2048 }, { "epoch": 1.0073746312684366, "grad_norm": 12.125, "learning_rate": 7.138715121007973e-06, "loss": 0.1222, "step": 2049 }, { "epoch": 1.007866273352999, "grad_norm": 11.0, "learning_rate": 7.136738956391601e-06, "loss": 0.295, "step": 2050 }, { "epoch": 1.0083579154375615, "grad_norm": 18.75, "learning_rate": 7.134762017117038e-06, "loss": 0.3137, "step": 2051 }, { "epoch": 1.008849557522124, "grad_norm": 21.0, "learning_rate": 7.132784303765098e-06, "loss": 0.2418, "step": 2052 }, { "epoch": 1.0093411996066863, "grad_norm": 18.875, "learning_rate": 7.130805816916821e-06, "loss": 0.3789, "step": 2053 }, { "epoch": 1.0098328416912488, "grad_norm": 15.0, "learning_rate": 7.128826557153475e-06, "loss": 0.4227, "step": 2054 }, { "epoch": 1.0103244837758112, "grad_norm": 24.625, "learning_rate": 7.126846525056555e-06, "loss": 0.3909, "step": 2055 }, { "epoch": 1.0108161258603736, "grad_norm": 19.0, "learning_rate": 7.124865721207781e-06, "loss": 0.3335, "step": 2056 }, { "epoch": 1.011307767944936, "grad_norm": 24.625, "learning_rate": 7.122884146189103e-06, "loss": 0.4459, "step": 2057 }, { "epoch": 1.0117994100294985, "grad_norm": 11.875, "learning_rate": 7.120901800582697e-06, "loss": 0.2576, "step": 2058 }, { "epoch": 1.012291052114061, "grad_norm": 24.5, "learning_rate": 7.118918684970962e-06, "loss": 0.3803, "step": 2059 }, { "epoch": 1.0127826941986233, "grad_norm": 17.375, "learning_rate": 7.1169347999365255e-06, "loss": 0.3421, "step": 2060 }, { "epoch": 1.0132743362831858, "grad_norm": 16.75, "learning_rate": 7.114950146062242e-06, "loss": 0.2846, "step": 2061 }, { "epoch": 1.0137659783677482, "grad_norm": 40.25, "learning_rate": 7.112964723931192e-06, "loss": 0.5627, "step": 2062 }, { "epoch": 1.0142576204523108, "grad_norm": 18.875, "learning_rate": 7.11097853412668e-06, "loss": 0.3028, "step": 2063 }, { "epoch": 1.0147492625368733, "grad_norm": 19.875, "learning_rate": 7.108991577232235e-06, "loss": 0.2721, "step": 2064 }, { "epoch": 1.0152409046214357, "grad_norm": 24.625, "learning_rate": 7.107003853831615e-06, "loss": 0.4735, "step": 2065 }, { "epoch": 1.015732546705998, "grad_norm": 35.5, "learning_rate": 7.105015364508801e-06, "loss": 0.6584, "step": 2066 }, { "epoch": 1.0162241887905605, "grad_norm": 12.125, "learning_rate": 7.103026109847998e-06, "loss": 0.108, "step": 2067 }, { "epoch": 1.016715830875123, "grad_norm": 14.75, "learning_rate": 7.101036090433639e-06, "loss": 0.3362, "step": 2068 }, { "epoch": 1.0172074729596854, "grad_norm": 28.25, "learning_rate": 7.0990453068503795e-06, "loss": 0.4631, "step": 2069 }, { "epoch": 1.0176991150442478, "grad_norm": 18.5, "learning_rate": 7.0970537596831e-06, "loss": 0.3031, "step": 2070 }, { "epoch": 1.0181907571288102, "grad_norm": 11.9375, "learning_rate": 7.095061449516903e-06, "loss": 0.2508, "step": 2071 }, { "epoch": 1.0186823992133727, "grad_norm": 19.5, "learning_rate": 7.0930683769371195e-06, "loss": 0.2485, "step": 2072 }, { "epoch": 1.019174041297935, "grad_norm": 19.375, "learning_rate": 7.0910745425293015e-06, "loss": 0.3669, "step": 2073 }, { "epoch": 1.0196656833824975, "grad_norm": 26.5, "learning_rate": 7.0890799468792265e-06, "loss": 0.4321, "step": 2074 }, { "epoch": 1.02015732546706, "grad_norm": 21.75, "learning_rate": 7.087084590572894e-06, "loss": 0.3682, "step": 2075 }, { "epoch": 1.0206489675516224, "grad_norm": 28.25, "learning_rate": 7.085088474196527e-06, "loss": 0.3534, "step": 2076 }, { "epoch": 1.0211406096361848, "grad_norm": 32.0, "learning_rate": 7.083091598336576e-06, "loss": 0.3694, "step": 2077 }, { "epoch": 1.0216322517207472, "grad_norm": 24.25, "learning_rate": 7.081093963579708e-06, "loss": 0.4486, "step": 2078 }, { "epoch": 1.0221238938053097, "grad_norm": 15.6875, "learning_rate": 7.079095570512817e-06, "loss": 0.3643, "step": 2079 }, { "epoch": 1.022615535889872, "grad_norm": 25.125, "learning_rate": 7.077096419723018e-06, "loss": 0.2593, "step": 2080 }, { "epoch": 1.0231071779744345, "grad_norm": 16.125, "learning_rate": 7.075096511797653e-06, "loss": 0.2721, "step": 2081 }, { "epoch": 1.023598820058997, "grad_norm": 21.625, "learning_rate": 7.073095847324277e-06, "loss": 0.3489, "step": 2082 }, { "epoch": 1.0240904621435596, "grad_norm": 23.5, "learning_rate": 7.07109442689068e-06, "loss": 0.1776, "step": 2083 }, { "epoch": 1.024582104228122, "grad_norm": 31.25, "learning_rate": 7.069092251084865e-06, "loss": 0.5847, "step": 2084 }, { "epoch": 1.0250737463126844, "grad_norm": 17.125, "learning_rate": 7.067089320495057e-06, "loss": 0.2146, "step": 2085 }, { "epoch": 1.0255653883972469, "grad_norm": 30.625, "learning_rate": 7.065085635709706e-06, "loss": 0.4283, "step": 2086 }, { "epoch": 1.0260570304818093, "grad_norm": 31.625, "learning_rate": 7.063081197317484e-06, "loss": 0.2974, "step": 2087 }, { "epoch": 1.0265486725663717, "grad_norm": 28.0, "learning_rate": 7.061076005907283e-06, "loss": 0.3863, "step": 2088 }, { "epoch": 1.0270403146509341, "grad_norm": 19.5, "learning_rate": 7.059070062068215e-06, "loss": 0.361, "step": 2089 }, { "epoch": 1.0275319567354966, "grad_norm": 11.4375, "learning_rate": 7.0570633663896166e-06, "loss": 0.3546, "step": 2090 }, { "epoch": 1.028023598820059, "grad_norm": 19.625, "learning_rate": 7.05505591946104e-06, "loss": 0.2688, "step": 2091 }, { "epoch": 1.0285152409046214, "grad_norm": 6.28125, "learning_rate": 7.053047721872265e-06, "loss": 0.1095, "step": 2092 }, { "epoch": 1.0290068829891839, "grad_norm": 24.125, "learning_rate": 7.051038774213283e-06, "loss": 0.4517, "step": 2093 }, { "epoch": 1.0294985250737463, "grad_norm": 16.125, "learning_rate": 7.0490290770743145e-06, "loss": 0.2647, "step": 2094 }, { "epoch": 1.0299901671583087, "grad_norm": 32.0, "learning_rate": 7.047018631045796e-06, "loss": 0.4528, "step": 2095 }, { "epoch": 1.0304818092428711, "grad_norm": 18.875, "learning_rate": 7.045007436718384e-06, "loss": 0.3265, "step": 2096 }, { "epoch": 1.0309734513274336, "grad_norm": 20.0, "learning_rate": 7.042995494682955e-06, "loss": 0.4992, "step": 2097 }, { "epoch": 1.031465093411996, "grad_norm": 17.875, "learning_rate": 7.040982805530606e-06, "loss": 0.4191, "step": 2098 }, { "epoch": 1.0319567354965584, "grad_norm": 28.5, "learning_rate": 7.038969369852655e-06, "loss": 0.2932, "step": 2099 }, { "epoch": 1.0324483775811208, "grad_norm": 35.25, "learning_rate": 7.036955188240634e-06, "loss": 0.62, "step": 2100 }, { "epoch": 1.0329400196656833, "grad_norm": 22.125, "learning_rate": 7.034940261286299e-06, "loss": 0.226, "step": 2101 }, { "epoch": 1.033431661750246, "grad_norm": 17.875, "learning_rate": 7.032924589581625e-06, "loss": 0.312, "step": 2102 }, { "epoch": 1.0339233038348083, "grad_norm": 23.0, "learning_rate": 7.030908173718803e-06, "loss": 0.3009, "step": 2103 }, { "epoch": 1.0344149459193708, "grad_norm": 26.375, "learning_rate": 7.028891014290242e-06, "loss": 0.3144, "step": 2104 }, { "epoch": 1.0349065880039332, "grad_norm": 27.125, "learning_rate": 7.0268731118885765e-06, "loss": 0.4667, "step": 2105 }, { "epoch": 1.0353982300884956, "grad_norm": 22.75, "learning_rate": 7.024854467106649e-06, "loss": 0.2486, "step": 2106 }, { "epoch": 1.035889872173058, "grad_norm": 14.75, "learning_rate": 7.022835080537529e-06, "loss": 0.4107, "step": 2107 }, { "epoch": 1.0363815142576205, "grad_norm": 33.0, "learning_rate": 7.020814952774496e-06, "loss": 0.3171, "step": 2108 }, { "epoch": 1.036873156342183, "grad_norm": 23.0, "learning_rate": 7.018794084411057e-06, "loss": 0.1931, "step": 2109 }, { "epoch": 1.0373647984267453, "grad_norm": 24.5, "learning_rate": 7.016772476040928e-06, "loss": 0.2015, "step": 2110 }, { "epoch": 1.0378564405113078, "grad_norm": 21.5, "learning_rate": 7.014750128258044e-06, "loss": 0.268, "step": 2111 }, { "epoch": 1.0383480825958702, "grad_norm": 28.0, "learning_rate": 7.0127270416565614e-06, "loss": 0.2824, "step": 2112 }, { "epoch": 1.0388397246804326, "grad_norm": 28.0, "learning_rate": 7.0107032168308515e-06, "loss": 0.2138, "step": 2113 }, { "epoch": 1.039331366764995, "grad_norm": 13.6875, "learning_rate": 7.008678654375498e-06, "loss": 0.415, "step": 2114 }, { "epoch": 1.0398230088495575, "grad_norm": 28.25, "learning_rate": 7.006653354885309e-06, "loss": 0.3417, "step": 2115 }, { "epoch": 1.04031465093412, "grad_norm": 20.375, "learning_rate": 7.004627318955304e-06, "loss": 0.4131, "step": 2116 }, { "epoch": 1.0408062930186823, "grad_norm": 12.625, "learning_rate": 7.002600547180721e-06, "loss": 0.3891, "step": 2117 }, { "epoch": 1.0412979351032448, "grad_norm": 24.5, "learning_rate": 7.000573040157013e-06, "loss": 0.2913, "step": 2118 }, { "epoch": 1.0417895771878072, "grad_norm": 21.0, "learning_rate": 6.998544798479849e-06, "loss": 0.3187, "step": 2119 }, { "epoch": 1.0422812192723696, "grad_norm": 9.75, "learning_rate": 6.996515822745114e-06, "loss": 0.3277, "step": 2120 }, { "epoch": 1.0427728613569323, "grad_norm": 16.5, "learning_rate": 6.9944861135489125e-06, "loss": 0.3088, "step": 2121 }, { "epoch": 1.0432645034414947, "grad_norm": 19.375, "learning_rate": 6.992455671487556e-06, "loss": 0.2449, "step": 2122 }, { "epoch": 1.043756145526057, "grad_norm": 45.5, "learning_rate": 6.990424497157579e-06, "loss": 0.4373, "step": 2123 }, { "epoch": 1.0442477876106195, "grad_norm": 41.25, "learning_rate": 6.988392591155727e-06, "loss": 0.6462, "step": 2124 }, { "epoch": 1.044739429695182, "grad_norm": 34.0, "learning_rate": 6.986359954078965e-06, "loss": 0.4825, "step": 2125 }, { "epoch": 1.0452310717797444, "grad_norm": 12.1875, "learning_rate": 6.984326586524466e-06, "loss": 0.1457, "step": 2126 }, { "epoch": 1.0457227138643068, "grad_norm": 42.75, "learning_rate": 6.982292489089622e-06, "loss": 0.4809, "step": 2127 }, { "epoch": 1.0462143559488692, "grad_norm": 23.625, "learning_rate": 6.9802576623720404e-06, "loss": 0.3425, "step": 2128 }, { "epoch": 1.0467059980334317, "grad_norm": 19.625, "learning_rate": 6.978222106969542e-06, "loss": 0.2902, "step": 2129 }, { "epoch": 1.047197640117994, "grad_norm": 27.25, "learning_rate": 6.976185823480155e-06, "loss": 0.3163, "step": 2130 }, { "epoch": 1.0476892822025565, "grad_norm": 23.25, "learning_rate": 6.974148812502134e-06, "loss": 0.4269, "step": 2131 }, { "epoch": 1.048180924287119, "grad_norm": 29.625, "learning_rate": 6.972111074633938e-06, "loss": 0.4312, "step": 2132 }, { "epoch": 1.0486725663716814, "grad_norm": 18.125, "learning_rate": 6.970072610474242e-06, "loss": 0.2994, "step": 2133 }, { "epoch": 1.0491642084562438, "grad_norm": 10.125, "learning_rate": 6.9680334206219345e-06, "loss": 0.3174, "step": 2134 }, { "epoch": 1.0496558505408062, "grad_norm": 43.5, "learning_rate": 6.965993505676117e-06, "loss": 0.4388, "step": 2135 }, { "epoch": 1.0501474926253687, "grad_norm": 36.5, "learning_rate": 6.9639528662361075e-06, "loss": 0.5326, "step": 2136 }, { "epoch": 1.050639134709931, "grad_norm": 15.375, "learning_rate": 6.961911502901429e-06, "loss": 0.2991, "step": 2137 }, { "epoch": 1.0511307767944935, "grad_norm": 21.25, "learning_rate": 6.9598694162718265e-06, "loss": 0.3057, "step": 2138 }, { "epoch": 1.051622418879056, "grad_norm": 20.625, "learning_rate": 6.957826606947248e-06, "loss": 0.2717, "step": 2139 }, { "epoch": 1.0521140609636186, "grad_norm": 51.5, "learning_rate": 6.955783075527863e-06, "loss": 0.6407, "step": 2140 }, { "epoch": 1.052605703048181, "grad_norm": 13.6875, "learning_rate": 6.953738822614048e-06, "loss": 0.3398, "step": 2141 }, { "epoch": 1.0530973451327434, "grad_norm": 19.0, "learning_rate": 6.9516938488063885e-06, "loss": 0.1913, "step": 2142 }, { "epoch": 1.0535889872173059, "grad_norm": 12.3125, "learning_rate": 6.94964815470569e-06, "loss": 0.2696, "step": 2143 }, { "epoch": 1.0540806293018683, "grad_norm": 20.0, "learning_rate": 6.947601740912964e-06, "loss": 0.3846, "step": 2144 }, { "epoch": 1.0545722713864307, "grad_norm": 14.3125, "learning_rate": 6.945554608029434e-06, "loss": 0.3218, "step": 2145 }, { "epoch": 1.0550639134709932, "grad_norm": 20.875, "learning_rate": 6.943506756656537e-06, "loss": 0.3599, "step": 2146 }, { "epoch": 1.0555555555555556, "grad_norm": 14.9375, "learning_rate": 6.941458187395918e-06, "loss": 0.3788, "step": 2147 }, { "epoch": 1.056047197640118, "grad_norm": 18.875, "learning_rate": 6.939408900849433e-06, "loss": 0.3291, "step": 2148 }, { "epoch": 1.0565388397246804, "grad_norm": 33.75, "learning_rate": 6.937358897619155e-06, "loss": 0.3012, "step": 2149 }, { "epoch": 1.0570304818092429, "grad_norm": 23.125, "learning_rate": 6.935308178307359e-06, "loss": 0.2831, "step": 2150 }, { "epoch": 1.0575221238938053, "grad_norm": 24.375, "learning_rate": 6.933256743516535e-06, "loss": 0.4282, "step": 2151 }, { "epoch": 1.0580137659783677, "grad_norm": 7.6875, "learning_rate": 6.93120459384938e-06, "loss": 0.2568, "step": 2152 }, { "epoch": 1.0585054080629301, "grad_norm": 31.75, "learning_rate": 6.929151729908807e-06, "loss": 0.3702, "step": 2153 }, { "epoch": 1.0589970501474926, "grad_norm": 39.25, "learning_rate": 6.927098152297934e-06, "loss": 0.2437, "step": 2154 }, { "epoch": 1.059488692232055, "grad_norm": 34.0, "learning_rate": 6.92504386162009e-06, "loss": 0.4851, "step": 2155 }, { "epoch": 1.0599803343166174, "grad_norm": 38.0, "learning_rate": 6.922988858478813e-06, "loss": 0.5009, "step": 2156 }, { "epoch": 1.0604719764011798, "grad_norm": 17.75, "learning_rate": 6.920933143477849e-06, "loss": 0.3103, "step": 2157 }, { "epoch": 1.0609636184857423, "grad_norm": 14.4375, "learning_rate": 6.918876717221159e-06, "loss": 0.3154, "step": 2158 }, { "epoch": 1.061455260570305, "grad_norm": 16.125, "learning_rate": 6.916819580312903e-06, "loss": 0.4285, "step": 2159 }, { "epoch": 1.0619469026548674, "grad_norm": 17.375, "learning_rate": 6.914761733357461e-06, "loss": 0.3328, "step": 2160 }, { "epoch": 1.0624385447394298, "grad_norm": 18.75, "learning_rate": 6.912703176959414e-06, "loss": 0.2368, "step": 2161 }, { "epoch": 1.0629301868239922, "grad_norm": 15.25, "learning_rate": 6.9106439117235534e-06, "loss": 0.2712, "step": 2162 }, { "epoch": 1.0634218289085546, "grad_norm": 17.5, "learning_rate": 6.908583938254878e-06, "loss": 0.2784, "step": 2163 }, { "epoch": 1.063913470993117, "grad_norm": 31.125, "learning_rate": 6.906523257158598e-06, "loss": 0.3722, "step": 2164 }, { "epoch": 1.0644051130776795, "grad_norm": 35.0, "learning_rate": 6.904461869040128e-06, "loss": 0.4089, "step": 2165 }, { "epoch": 1.064896755162242, "grad_norm": 22.75, "learning_rate": 6.902399774505093e-06, "loss": 0.4149, "step": 2166 }, { "epoch": 1.0653883972468043, "grad_norm": 17.125, "learning_rate": 6.90033697415932e-06, "loss": 0.2738, "step": 2167 }, { "epoch": 1.0658800393313668, "grad_norm": 30.875, "learning_rate": 6.898273468608853e-06, "loss": 0.2814, "step": 2168 }, { "epoch": 1.0663716814159292, "grad_norm": 20.375, "learning_rate": 6.8962092584599334e-06, "loss": 0.389, "step": 2169 }, { "epoch": 1.0668633235004916, "grad_norm": 13.875, "learning_rate": 6.894144344319014e-06, "loss": 0.3464, "step": 2170 }, { "epoch": 1.067354965585054, "grad_norm": 16.625, "learning_rate": 6.892078726792758e-06, "loss": 0.3871, "step": 2171 }, { "epoch": 1.0678466076696165, "grad_norm": 20.125, "learning_rate": 6.890012406488029e-06, "loss": 0.2284, "step": 2172 }, { "epoch": 1.068338249754179, "grad_norm": 19.75, "learning_rate": 6.8879453840118975e-06, "loss": 0.4102, "step": 2173 }, { "epoch": 1.0688298918387413, "grad_norm": 32.75, "learning_rate": 6.885877659971647e-06, "loss": 0.5598, "step": 2174 }, { "epoch": 1.0693215339233038, "grad_norm": 18.125, "learning_rate": 6.883809234974758e-06, "loss": 0.4837, "step": 2175 }, { "epoch": 1.0698131760078662, "grad_norm": 22.5, "learning_rate": 6.881740109628925e-06, "loss": 0.2273, "step": 2176 }, { "epoch": 1.0703048180924286, "grad_norm": 13.8125, "learning_rate": 6.879670284542043e-06, "loss": 0.3118, "step": 2177 }, { "epoch": 1.0707964601769913, "grad_norm": 20.0, "learning_rate": 6.877599760322213e-06, "loss": 0.3536, "step": 2178 }, { "epoch": 1.0712881022615537, "grad_norm": 11.6875, "learning_rate": 6.875528537577746e-06, "loss": 0.3066, "step": 2179 }, { "epoch": 1.0717797443461161, "grad_norm": 20.5, "learning_rate": 6.873456616917153e-06, "loss": 0.285, "step": 2180 }, { "epoch": 1.0722713864306785, "grad_norm": 13.6875, "learning_rate": 6.871383998949151e-06, "loss": 0.2495, "step": 2181 }, { "epoch": 1.072763028515241, "grad_norm": 10.0, "learning_rate": 6.869310684282665e-06, "loss": 0.3995, "step": 2182 }, { "epoch": 1.0732546705998034, "grad_norm": 13.0, "learning_rate": 6.86723667352682e-06, "loss": 0.3004, "step": 2183 }, { "epoch": 1.0737463126843658, "grad_norm": 27.5, "learning_rate": 6.8651619672909505e-06, "loss": 0.2112, "step": 2184 }, { "epoch": 1.0742379547689282, "grad_norm": 18.5, "learning_rate": 6.86308656618459e-06, "loss": 0.3163, "step": 2185 }, { "epoch": 1.0747295968534907, "grad_norm": 22.375, "learning_rate": 6.861010470817482e-06, "loss": 0.2718, "step": 2186 }, { "epoch": 1.075221238938053, "grad_norm": 27.75, "learning_rate": 6.85893368179957e-06, "loss": 0.5214, "step": 2187 }, { "epoch": 1.0757128810226155, "grad_norm": 14.6875, "learning_rate": 6.856856199741e-06, "loss": 0.2775, "step": 2188 }, { "epoch": 1.076204523107178, "grad_norm": 19.0, "learning_rate": 6.854778025252126e-06, "loss": 0.3378, "step": 2189 }, { "epoch": 1.0766961651917404, "grad_norm": 11.1875, "learning_rate": 6.852699158943503e-06, "loss": 0.232, "step": 2190 }, { "epoch": 1.0771878072763028, "grad_norm": 20.5, "learning_rate": 6.85061960142589e-06, "loss": 0.3058, "step": 2191 }, { "epoch": 1.0776794493608652, "grad_norm": 34.75, "learning_rate": 6.848539353310248e-06, "loss": 0.3928, "step": 2192 }, { "epoch": 1.0781710914454277, "grad_norm": 20.125, "learning_rate": 6.846458415207741e-06, "loss": 0.4264, "step": 2193 }, { "epoch": 1.07866273352999, "grad_norm": 18.875, "learning_rate": 6.844376787729737e-06, "loss": 0.2711, "step": 2194 }, { "epoch": 1.0791543756145525, "grad_norm": 44.0, "learning_rate": 6.842294471487808e-06, "loss": 0.38, "step": 2195 }, { "epoch": 1.079646017699115, "grad_norm": 10.25, "learning_rate": 6.8402114670937195e-06, "loss": 0.1763, "step": 2196 }, { "epoch": 1.0801376597836776, "grad_norm": 22.625, "learning_rate": 6.838127775159451e-06, "loss": 0.2201, "step": 2197 }, { "epoch": 1.08062930186824, "grad_norm": 54.5, "learning_rate": 6.836043396297179e-06, "loss": 0.5554, "step": 2198 }, { "epoch": 1.0811209439528024, "grad_norm": 22.125, "learning_rate": 6.833958331119279e-06, "loss": 0.4455, "step": 2199 }, { "epoch": 1.0816125860373649, "grad_norm": 9.9375, "learning_rate": 6.831872580238332e-06, "loss": 0.187, "step": 2200 }, { "epoch": 1.0821042281219273, "grad_norm": 21.25, "learning_rate": 6.82978614426712e-06, "loss": 0.3338, "step": 2201 }, { "epoch": 1.0825958702064897, "grad_norm": 18.25, "learning_rate": 6.827699023818624e-06, "loss": 0.278, "step": 2202 }, { "epoch": 1.0830875122910522, "grad_norm": 34.5, "learning_rate": 6.825611219506028e-06, "loss": 0.3022, "step": 2203 }, { "epoch": 1.0835791543756146, "grad_norm": 26.75, "learning_rate": 6.823522731942716e-06, "loss": 0.2608, "step": 2204 }, { "epoch": 1.084070796460177, "grad_norm": 17.0, "learning_rate": 6.821433561742276e-06, "loss": 0.2269, "step": 2205 }, { "epoch": 1.0845624385447394, "grad_norm": 17.375, "learning_rate": 6.819343709518489e-06, "loss": 0.2478, "step": 2206 }, { "epoch": 1.0850540806293019, "grad_norm": 17.25, "learning_rate": 6.817253175885345e-06, "loss": 0.3743, "step": 2207 }, { "epoch": 1.0855457227138643, "grad_norm": 43.25, "learning_rate": 6.8151619614570284e-06, "loss": 0.5379, "step": 2208 }, { "epoch": 1.0860373647984267, "grad_norm": 12.375, "learning_rate": 6.813070066847927e-06, "loss": 0.2643, "step": 2209 }, { "epoch": 1.0865290068829891, "grad_norm": 13.125, "learning_rate": 6.810977492672626e-06, "loss": 0.2063, "step": 2210 }, { "epoch": 1.0870206489675516, "grad_norm": 13.1875, "learning_rate": 6.8088842395459105e-06, "loss": 0.3385, "step": 2211 }, { "epoch": 1.087512291052114, "grad_norm": 35.75, "learning_rate": 6.806790308082767e-06, "loss": 0.4227, "step": 2212 }, { "epoch": 1.0880039331366764, "grad_norm": 10.75, "learning_rate": 6.80469569889838e-06, "loss": 0.2171, "step": 2213 }, { "epoch": 1.0884955752212389, "grad_norm": 27.875, "learning_rate": 6.802600412608131e-06, "loss": 0.4267, "step": 2214 }, { "epoch": 1.0889872173058013, "grad_norm": 19.875, "learning_rate": 6.800504449827607e-06, "loss": 0.3995, "step": 2215 }, { "epoch": 1.089478859390364, "grad_norm": 38.25, "learning_rate": 6.798407811172586e-06, "loss": 0.6963, "step": 2216 }, { "epoch": 1.0899705014749264, "grad_norm": 41.25, "learning_rate": 6.796310497259048e-06, "loss": 0.4802, "step": 2217 }, { "epoch": 1.0904621435594888, "grad_norm": 22.375, "learning_rate": 6.794212508703173e-06, "loss": 0.2604, "step": 2218 }, { "epoch": 1.0909537856440512, "grad_norm": 13.4375, "learning_rate": 6.792113846121336e-06, "loss": 0.3029, "step": 2219 }, { "epoch": 1.0914454277286136, "grad_norm": 27.0, "learning_rate": 6.790014510130113e-06, "loss": 0.3635, "step": 2220 }, { "epoch": 1.091937069813176, "grad_norm": 57.0, "learning_rate": 6.787914501346275e-06, "loss": 0.4283, "step": 2221 }, { "epoch": 1.0924287118977385, "grad_norm": 27.0, "learning_rate": 6.785813820386794e-06, "loss": 0.341, "step": 2222 }, { "epoch": 1.092920353982301, "grad_norm": 30.125, "learning_rate": 6.783712467868834e-06, "loss": 0.2467, "step": 2223 }, { "epoch": 1.0934119960668633, "grad_norm": 25.625, "learning_rate": 6.781610444409765e-06, "loss": 0.4067, "step": 2224 }, { "epoch": 1.0939036381514258, "grad_norm": 16.625, "learning_rate": 6.779507750627144e-06, "loss": 0.4086, "step": 2225 }, { "epoch": 1.0943952802359882, "grad_norm": 13.9375, "learning_rate": 6.777404387138732e-06, "loss": 0.3328, "step": 2226 }, { "epoch": 1.0948869223205506, "grad_norm": 27.75, "learning_rate": 6.775300354562485e-06, "loss": 0.4966, "step": 2227 }, { "epoch": 1.095378564405113, "grad_norm": 13.75, "learning_rate": 6.7731956535165554e-06, "loss": 0.2738, "step": 2228 }, { "epoch": 1.0958702064896755, "grad_norm": 14.3125, "learning_rate": 6.7710902846192905e-06, "loss": 0.3547, "step": 2229 }, { "epoch": 1.096361848574238, "grad_norm": 20.0, "learning_rate": 6.768984248489237e-06, "loss": 0.3159, "step": 2230 }, { "epoch": 1.0968534906588003, "grad_norm": 19.75, "learning_rate": 6.766877545745134e-06, "loss": 0.1969, "step": 2231 }, { "epoch": 1.0973451327433628, "grad_norm": 25.375, "learning_rate": 6.764770177005919e-06, "loss": 0.2011, "step": 2232 }, { "epoch": 1.0978367748279252, "grad_norm": 15.625, "learning_rate": 6.762662142890723e-06, "loss": 0.3154, "step": 2233 }, { "epoch": 1.0983284169124876, "grad_norm": 16.5, "learning_rate": 6.760553444018877e-06, "loss": 0.2492, "step": 2234 }, { "epoch": 1.0988200589970503, "grad_norm": 39.75, "learning_rate": 6.7584440810099015e-06, "loss": 0.4113, "step": 2235 }, { "epoch": 1.0993117010816125, "grad_norm": 20.125, "learning_rate": 6.7563340544835144e-06, "loss": 0.4318, "step": 2236 }, { "epoch": 1.0998033431661751, "grad_norm": 23.875, "learning_rate": 6.7542233650596316e-06, "loss": 0.3508, "step": 2237 }, { "epoch": 1.1002949852507375, "grad_norm": 23.5, "learning_rate": 6.75211201335836e-06, "loss": 0.1957, "step": 2238 }, { "epoch": 1.1007866273353, "grad_norm": 31.75, "learning_rate": 6.75e-06, "loss": 0.1884, "step": 2239 }, { "epoch": 1.1012782694198624, "grad_norm": 15.625, "learning_rate": 6.747887325605051e-06, "loss": 0.2592, "step": 2240 }, { "epoch": 1.1017699115044248, "grad_norm": 23.25, "learning_rate": 6.745773990794203e-06, "loss": 0.3016, "step": 2241 }, { "epoch": 1.1022615535889873, "grad_norm": 24.75, "learning_rate": 6.7436599961883425e-06, "loss": 0.2405, "step": 2242 }, { "epoch": 1.1027531956735497, "grad_norm": 34.5, "learning_rate": 6.741545342408549e-06, "loss": 0.3388, "step": 2243 }, { "epoch": 1.103244837758112, "grad_norm": 24.0, "learning_rate": 6.739430030076089e-06, "loss": 0.2212, "step": 2244 }, { "epoch": 1.1037364798426745, "grad_norm": 25.125, "learning_rate": 6.737314059812437e-06, "loss": 0.3544, "step": 2245 }, { "epoch": 1.104228121927237, "grad_norm": 18.75, "learning_rate": 6.735197432239248e-06, "loss": 0.3318, "step": 2246 }, { "epoch": 1.1047197640117994, "grad_norm": 41.0, "learning_rate": 6.733080147978375e-06, "loss": 0.2728, "step": 2247 }, { "epoch": 1.1052114060963618, "grad_norm": 28.375, "learning_rate": 6.730962207651865e-06, "loss": 0.3206, "step": 2248 }, { "epoch": 1.1057030481809242, "grad_norm": 37.5, "learning_rate": 6.728843611881955e-06, "loss": 0.5401, "step": 2249 }, { "epoch": 1.1061946902654867, "grad_norm": 31.0, "learning_rate": 6.7267243612910745e-06, "loss": 0.5144, "step": 2250 }, { "epoch": 1.106686332350049, "grad_norm": 39.25, "learning_rate": 6.724604456501849e-06, "loss": 0.5006, "step": 2251 }, { "epoch": 1.1071779744346115, "grad_norm": 20.0, "learning_rate": 6.7224838981370925e-06, "loss": 0.2316, "step": 2252 }, { "epoch": 1.107669616519174, "grad_norm": 28.375, "learning_rate": 6.720362686819815e-06, "loss": 0.2992, "step": 2253 }, { "epoch": 1.1081612586037364, "grad_norm": 38.25, "learning_rate": 6.718240823173212e-06, "loss": 0.3218, "step": 2254 }, { "epoch": 1.1086529006882988, "grad_norm": 16.75, "learning_rate": 6.716118307820676e-06, "loss": 0.3378, "step": 2255 }, { "epoch": 1.1091445427728615, "grad_norm": 18.5, "learning_rate": 6.71399514138579e-06, "loss": 0.3087, "step": 2256 }, { "epoch": 1.1096361848574239, "grad_norm": 14.0625, "learning_rate": 6.711871324492327e-06, "loss": 0.3565, "step": 2257 }, { "epoch": 1.1101278269419863, "grad_norm": 16.625, "learning_rate": 6.709746857764252e-06, "loss": 0.2241, "step": 2258 }, { "epoch": 1.1106194690265487, "grad_norm": 12.3125, "learning_rate": 6.70762174182572e-06, "loss": 0.1825, "step": 2259 }, { "epoch": 1.1111111111111112, "grad_norm": 32.75, "learning_rate": 6.705495977301078e-06, "loss": 0.3511, "step": 2260 }, { "epoch": 1.1116027531956736, "grad_norm": 37.5, "learning_rate": 6.703369564814863e-06, "loss": 0.3585, "step": 2261 }, { "epoch": 1.112094395280236, "grad_norm": 23.25, "learning_rate": 6.701242504991802e-06, "loss": 0.5976, "step": 2262 }, { "epoch": 1.1125860373647984, "grad_norm": 37.25, "learning_rate": 6.699114798456812e-06, "loss": 0.2476, "step": 2263 }, { "epoch": 1.1130776794493609, "grad_norm": 11.875, "learning_rate": 6.696986445835002e-06, "loss": 0.2802, "step": 2264 }, { "epoch": 1.1135693215339233, "grad_norm": 37.5, "learning_rate": 6.694857447751669e-06, "loss": 0.3936, "step": 2265 }, { "epoch": 1.1140609636184857, "grad_norm": 17.5, "learning_rate": 6.692727804832299e-06, "loss": 0.3084, "step": 2266 }, { "epoch": 1.1145526057030481, "grad_norm": 42.75, "learning_rate": 6.690597517702568e-06, "loss": 0.4146, "step": 2267 }, { "epoch": 1.1150442477876106, "grad_norm": 28.625, "learning_rate": 6.688466586988344e-06, "loss": 0.3229, "step": 2268 }, { "epoch": 1.115535889872173, "grad_norm": 45.5, "learning_rate": 6.6863350133156795e-06, "loss": 0.235, "step": 2269 }, { "epoch": 1.1160275319567354, "grad_norm": 23.5, "learning_rate": 6.684202797310818e-06, "loss": 0.4457, "step": 2270 }, { "epoch": 1.1165191740412979, "grad_norm": 24.625, "learning_rate": 6.682069939600193e-06, "loss": 0.3285, "step": 2271 }, { "epoch": 1.1170108161258603, "grad_norm": 15.75, "learning_rate": 6.679936440810427e-06, "loss": 0.2176, "step": 2272 }, { "epoch": 1.1175024582104227, "grad_norm": 18.25, "learning_rate": 6.677802301568325e-06, "loss": 0.2781, "step": 2273 }, { "epoch": 1.1179941002949851, "grad_norm": 34.5, "learning_rate": 6.675667522500888e-06, "loss": 0.3636, "step": 2274 }, { "epoch": 1.1184857423795478, "grad_norm": 41.0, "learning_rate": 6.6735321042353e-06, "loss": 0.3978, "step": 2275 }, { "epoch": 1.1189773844641102, "grad_norm": 22.75, "learning_rate": 6.671396047398935e-06, "loss": 0.4483, "step": 2276 }, { "epoch": 1.1194690265486726, "grad_norm": 16.5, "learning_rate": 6.669259352619351e-06, "loss": 0.2349, "step": 2277 }, { "epoch": 1.119960668633235, "grad_norm": 37.5, "learning_rate": 6.667122020524299e-06, "loss": 0.3749, "step": 2278 }, { "epoch": 1.1204523107177975, "grad_norm": 18.5, "learning_rate": 6.664984051741714e-06, "loss": 0.3487, "step": 2279 }, { "epoch": 1.12094395280236, "grad_norm": 15.375, "learning_rate": 6.662845446899718e-06, "loss": 0.2196, "step": 2280 }, { "epoch": 1.1214355948869223, "grad_norm": 28.25, "learning_rate": 6.66070620662662e-06, "loss": 0.389, "step": 2281 }, { "epoch": 1.1219272369714848, "grad_norm": 46.5, "learning_rate": 6.658566331550917e-06, "loss": 0.2702, "step": 2282 }, { "epoch": 1.1224188790560472, "grad_norm": 12.125, "learning_rate": 6.6564258223012915e-06, "loss": 0.1487, "step": 2283 }, { "epoch": 1.1229105211406096, "grad_norm": 36.75, "learning_rate": 6.65428467950661e-06, "loss": 0.593, "step": 2284 }, { "epoch": 1.123402163225172, "grad_norm": 24.5, "learning_rate": 6.652142903795932e-06, "loss": 0.2091, "step": 2285 }, { "epoch": 1.1238938053097345, "grad_norm": 19.5, "learning_rate": 6.650000495798495e-06, "loss": 0.3406, "step": 2286 }, { "epoch": 1.124385447394297, "grad_norm": 25.0, "learning_rate": 6.647857456143727e-06, "loss": 0.4036, "step": 2287 }, { "epoch": 1.1248770894788593, "grad_norm": 17.25, "learning_rate": 6.6457137854612375e-06, "loss": 0.2656, "step": 2288 }, { "epoch": 1.1253687315634218, "grad_norm": 23.75, "learning_rate": 6.643569484380829e-06, "loss": 0.2888, "step": 2289 }, { "epoch": 1.1258603736479842, "grad_norm": 8.75, "learning_rate": 6.641424553532481e-06, "loss": 0.2589, "step": 2290 }, { "epoch": 1.1263520157325466, "grad_norm": 40.0, "learning_rate": 6.639278993546363e-06, "loss": 0.3241, "step": 2291 }, { "epoch": 1.1268436578171093, "grad_norm": 15.125, "learning_rate": 6.637132805052825e-06, "loss": 0.205, "step": 2292 }, { "epoch": 1.1273352999016715, "grad_norm": 29.625, "learning_rate": 6.634985988682408e-06, "loss": 0.3933, "step": 2293 }, { "epoch": 1.1278269419862341, "grad_norm": 29.875, "learning_rate": 6.632838545065833e-06, "loss": 0.3582, "step": 2294 }, { "epoch": 1.1283185840707965, "grad_norm": 11.25, "learning_rate": 6.630690474834003e-06, "loss": 0.173, "step": 2295 }, { "epoch": 1.128810226155359, "grad_norm": 34.0, "learning_rate": 6.628541778618013e-06, "loss": 0.2278, "step": 2296 }, { "epoch": 1.1293018682399214, "grad_norm": 30.125, "learning_rate": 6.626392457049134e-06, "loss": 0.1753, "step": 2297 }, { "epoch": 1.1297935103244838, "grad_norm": 51.0, "learning_rate": 6.624242510758824e-06, "loss": 0.514, "step": 2298 }, { "epoch": 1.1302851524090463, "grad_norm": 22.625, "learning_rate": 6.622091940378725e-06, "loss": 0.2977, "step": 2299 }, { "epoch": 1.1307767944936087, "grad_norm": 33.5, "learning_rate": 6.619940746540663e-06, "loss": 0.4322, "step": 2300 }, { "epoch": 1.131268436578171, "grad_norm": 14.6875, "learning_rate": 6.617788929876643e-06, "loss": 0.2872, "step": 2301 }, { "epoch": 1.1317600786627335, "grad_norm": 9.625, "learning_rate": 6.6156364910188585e-06, "loss": 0.2664, "step": 2302 }, { "epoch": 1.132251720747296, "grad_norm": 27.0, "learning_rate": 6.613483430599682e-06, "loss": 0.2447, "step": 2303 }, { "epoch": 1.1327433628318584, "grad_norm": 23.625, "learning_rate": 6.611329749251671e-06, "loss": 0.393, "step": 2304 }, { "epoch": 1.1332350049164208, "grad_norm": 26.625, "learning_rate": 6.609175447607563e-06, "loss": 0.2389, "step": 2305 }, { "epoch": 1.1337266470009832, "grad_norm": 28.25, "learning_rate": 6.607020526300279e-06, "loss": 0.4407, "step": 2306 }, { "epoch": 1.1342182890855457, "grad_norm": 17.75, "learning_rate": 6.604864985962923e-06, "loss": 0.4571, "step": 2307 }, { "epoch": 1.134709931170108, "grad_norm": 23.625, "learning_rate": 6.60270882722878e-06, "loss": 0.3118, "step": 2308 }, { "epoch": 1.1352015732546705, "grad_norm": 19.75, "learning_rate": 6.600552050731314e-06, "loss": 0.4891, "step": 2309 }, { "epoch": 1.135693215339233, "grad_norm": 12.0, "learning_rate": 6.5983946571041755e-06, "loss": 0.1966, "step": 2310 }, { "epoch": 1.1361848574237954, "grad_norm": 33.75, "learning_rate": 6.596236646981194e-06, "loss": 0.4112, "step": 2311 }, { "epoch": 1.1366764995083578, "grad_norm": 20.5, "learning_rate": 6.59407802099638e-06, "loss": 0.384, "step": 2312 }, { "epoch": 1.1371681415929205, "grad_norm": 10.875, "learning_rate": 6.591918779783924e-06, "loss": 0.1722, "step": 2313 }, { "epoch": 1.1376597836774829, "grad_norm": 46.5, "learning_rate": 6.589758923978198e-06, "loss": 0.4677, "step": 2314 }, { "epoch": 1.1381514257620453, "grad_norm": 13.25, "learning_rate": 6.5875984542137555e-06, "loss": 0.3762, "step": 2315 }, { "epoch": 1.1386430678466077, "grad_norm": 53.75, "learning_rate": 6.585437371125329e-06, "loss": 0.6361, "step": 2316 }, { "epoch": 1.1391347099311702, "grad_norm": 15.125, "learning_rate": 6.5832756753478325e-06, "loss": 0.2531, "step": 2317 }, { "epoch": 1.1396263520157326, "grad_norm": 30.875, "learning_rate": 6.581113367516359e-06, "loss": 0.2679, "step": 2318 }, { "epoch": 1.140117994100295, "grad_norm": 25.25, "learning_rate": 6.5789504482661825e-06, "loss": 0.3855, "step": 2319 }, { "epoch": 1.1406096361848574, "grad_norm": 35.0, "learning_rate": 6.576786918232755e-06, "loss": 0.5047, "step": 2320 }, { "epoch": 1.1411012782694199, "grad_norm": 32.75, "learning_rate": 6.574622778051709e-06, "loss": 0.3741, "step": 2321 }, { "epoch": 1.1415929203539823, "grad_norm": 41.0, "learning_rate": 6.572458028358855e-06, "loss": 0.5043, "step": 2322 }, { "epoch": 1.1420845624385447, "grad_norm": 39.0, "learning_rate": 6.570292669790185e-06, "loss": 0.431, "step": 2323 }, { "epoch": 1.1425762045231072, "grad_norm": 22.0, "learning_rate": 6.568126702981867e-06, "loss": 0.2846, "step": 2324 }, { "epoch": 1.1430678466076696, "grad_norm": 39.5, "learning_rate": 6.56596012857025e-06, "loss": 0.439, "step": 2325 }, { "epoch": 1.143559488692232, "grad_norm": 23.875, "learning_rate": 6.563792947191862e-06, "loss": 0.4168, "step": 2326 }, { "epoch": 1.1440511307767944, "grad_norm": 21.125, "learning_rate": 6.5616251594834046e-06, "loss": 0.2075, "step": 2327 }, { "epoch": 1.1445427728613569, "grad_norm": 22.875, "learning_rate": 6.559456766081764e-06, "loss": 0.2818, "step": 2328 }, { "epoch": 1.1450344149459193, "grad_norm": 13.6875, "learning_rate": 6.557287767623998e-06, "loss": 0.4613, "step": 2329 }, { "epoch": 1.1455260570304817, "grad_norm": 30.0, "learning_rate": 6.55511816474735e-06, "loss": 0.3019, "step": 2330 }, { "epoch": 1.1460176991150441, "grad_norm": 9.5625, "learning_rate": 6.552947958089234e-06, "loss": 0.2989, "step": 2331 }, { "epoch": 1.1465093411996068, "grad_norm": 11.4375, "learning_rate": 6.550777148287242e-06, "loss": 0.252, "step": 2332 }, { "epoch": 1.1470009832841692, "grad_norm": 12.6875, "learning_rate": 6.548605735979148e-06, "loss": 0.1765, "step": 2333 }, { "epoch": 1.1474926253687316, "grad_norm": 31.25, "learning_rate": 6.546433721802899e-06, "loss": 0.3487, "step": 2334 }, { "epoch": 1.147984267453294, "grad_norm": 19.5, "learning_rate": 6.5442611063966184e-06, "loss": 0.354, "step": 2335 }, { "epoch": 1.1484759095378565, "grad_norm": 13.8125, "learning_rate": 6.542087890398608e-06, "loss": 0.2661, "step": 2336 }, { "epoch": 1.148967551622419, "grad_norm": 33.0, "learning_rate": 6.5399140744473485e-06, "loss": 0.4196, "step": 2337 }, { "epoch": 1.1494591937069814, "grad_norm": 26.875, "learning_rate": 6.537739659181491e-06, "loss": 0.2613, "step": 2338 }, { "epoch": 1.1499508357915438, "grad_norm": 18.5, "learning_rate": 6.535564645239866e-06, "loss": 0.3491, "step": 2339 }, { "epoch": 1.1504424778761062, "grad_norm": 10.75, "learning_rate": 6.533389033261481e-06, "loss": 0.1693, "step": 2340 }, { "epoch": 1.1509341199606686, "grad_norm": 23.125, "learning_rate": 6.531212823885517e-06, "loss": 0.3132, "step": 2341 }, { "epoch": 1.151425762045231, "grad_norm": 39.5, "learning_rate": 6.529036017751333e-06, "loss": 0.5281, "step": 2342 }, { "epoch": 1.1519174041297935, "grad_norm": 8.75, "learning_rate": 6.526858615498457e-06, "loss": 0.2448, "step": 2343 }, { "epoch": 1.152409046214356, "grad_norm": 25.75, "learning_rate": 6.524680617766604e-06, "loss": 0.4918, "step": 2344 }, { "epoch": 1.1529006882989183, "grad_norm": 25.25, "learning_rate": 6.522502025195651e-06, "loss": 0.4027, "step": 2345 }, { "epoch": 1.1533923303834808, "grad_norm": 16.5, "learning_rate": 6.520322838425657e-06, "loss": 0.4087, "step": 2346 }, { "epoch": 1.1538839724680432, "grad_norm": 29.875, "learning_rate": 6.5181430580968544e-06, "loss": 0.4692, "step": 2347 }, { "epoch": 1.1543756145526056, "grad_norm": 10.625, "learning_rate": 6.51596268484965e-06, "loss": 0.2588, "step": 2348 }, { "epoch": 1.154867256637168, "grad_norm": 18.75, "learning_rate": 6.513781719324624e-06, "loss": 0.3437, "step": 2349 }, { "epoch": 1.1553588987217305, "grad_norm": 20.125, "learning_rate": 6.511600162162531e-06, "loss": 0.4837, "step": 2350 }, { "epoch": 1.1558505408062931, "grad_norm": 15.25, "learning_rate": 6.5094180140043e-06, "loss": 0.3063, "step": 2351 }, { "epoch": 1.1563421828908556, "grad_norm": 46.75, "learning_rate": 6.507235275491034e-06, "loss": 0.5954, "step": 2352 }, { "epoch": 1.156833824975418, "grad_norm": 51.0, "learning_rate": 6.505051947264006e-06, "loss": 0.582, "step": 2353 }, { "epoch": 1.1573254670599804, "grad_norm": 21.75, "learning_rate": 6.502868029964665e-06, "loss": 0.314, "step": 2354 }, { "epoch": 1.1578171091445428, "grad_norm": 14.4375, "learning_rate": 6.500683524234637e-06, "loss": 0.2681, "step": 2355 }, { "epoch": 1.1583087512291053, "grad_norm": 16.75, "learning_rate": 6.498498430715712e-06, "loss": 0.3037, "step": 2356 }, { "epoch": 1.1588003933136677, "grad_norm": 19.75, "learning_rate": 6.49631275004986e-06, "loss": 0.3975, "step": 2357 }, { "epoch": 1.1592920353982301, "grad_norm": 20.75, "learning_rate": 6.494126482879219e-06, "loss": 0.3138, "step": 2358 }, { "epoch": 1.1597836774827925, "grad_norm": 14.25, "learning_rate": 6.491939629846104e-06, "loss": 0.2283, "step": 2359 }, { "epoch": 1.160275319567355, "grad_norm": 29.375, "learning_rate": 6.489752191592998e-06, "loss": 0.6677, "step": 2360 }, { "epoch": 1.1607669616519174, "grad_norm": 22.5, "learning_rate": 6.487564168762556e-06, "loss": 0.3714, "step": 2361 }, { "epoch": 1.1612586037364798, "grad_norm": 17.5, "learning_rate": 6.485375561997608e-06, "loss": 0.2449, "step": 2362 }, { "epoch": 1.1617502458210422, "grad_norm": 14.3125, "learning_rate": 6.483186371941153e-06, "loss": 0.2454, "step": 2363 }, { "epoch": 1.1622418879056047, "grad_norm": 35.5, "learning_rate": 6.4809965992363615e-06, "loss": 0.345, "step": 2364 }, { "epoch": 1.162733529990167, "grad_norm": 16.5, "learning_rate": 6.478806244526576e-06, "loss": 0.3165, "step": 2365 }, { "epoch": 1.1632251720747295, "grad_norm": 10.3125, "learning_rate": 6.476615308455311e-06, "loss": 0.2145, "step": 2366 }, { "epoch": 1.163716814159292, "grad_norm": 51.25, "learning_rate": 6.474423791666248e-06, "loss": 0.5198, "step": 2367 }, { "epoch": 1.1642084562438544, "grad_norm": 19.5, "learning_rate": 6.4722316948032435e-06, "loss": 0.2858, "step": 2368 }, { "epoch": 1.1647000983284168, "grad_norm": 25.625, "learning_rate": 6.470039018510321e-06, "loss": 0.2334, "step": 2369 }, { "epoch": 1.1651917404129795, "grad_norm": 18.125, "learning_rate": 6.467845763431676e-06, "loss": 0.4204, "step": 2370 }, { "epoch": 1.1656833824975417, "grad_norm": 13.0625, "learning_rate": 6.465651930211677e-06, "loss": 0.3752, "step": 2371 }, { "epoch": 1.1661750245821043, "grad_norm": 49.0, "learning_rate": 6.463457519494854e-06, "loss": 0.4534, "step": 2372 }, { "epoch": 1.1666666666666667, "grad_norm": 43.25, "learning_rate": 6.461262531925914e-06, "loss": 0.5227, "step": 2373 }, { "epoch": 1.1671583087512292, "grad_norm": 31.0, "learning_rate": 6.459066968149732e-06, "loss": 0.4266, "step": 2374 }, { "epoch": 1.1676499508357916, "grad_norm": 24.25, "learning_rate": 6.456870828811352e-06, "loss": 0.4201, "step": 2375 }, { "epoch": 1.168141592920354, "grad_norm": 23.875, "learning_rate": 6.454674114555984e-06, "loss": 0.2699, "step": 2376 }, { "epoch": 1.1686332350049164, "grad_norm": 20.125, "learning_rate": 6.452476826029012e-06, "loss": 0.2314, "step": 2377 }, { "epoch": 1.1691248770894789, "grad_norm": 14.375, "learning_rate": 6.450278963875985e-06, "loss": 0.2434, "step": 2378 }, { "epoch": 1.1696165191740413, "grad_norm": 20.25, "learning_rate": 6.448080528742623e-06, "loss": 0.2935, "step": 2379 }, { "epoch": 1.1701081612586037, "grad_norm": 23.625, "learning_rate": 6.445881521274812e-06, "loss": 0.222, "step": 2380 }, { "epoch": 1.1705998033431662, "grad_norm": 16.625, "learning_rate": 6.443681942118608e-06, "loss": 0.2787, "step": 2381 }, { "epoch": 1.1710914454277286, "grad_norm": 15.6875, "learning_rate": 6.441481791920233e-06, "loss": 0.2203, "step": 2382 }, { "epoch": 1.171583087512291, "grad_norm": 16.125, "learning_rate": 6.4392810713260805e-06, "loss": 0.1506, "step": 2383 }, { "epoch": 1.1720747295968534, "grad_norm": 40.0, "learning_rate": 6.437079780982706e-06, "loss": 0.2891, "step": 2384 }, { "epoch": 1.1725663716814159, "grad_norm": 14.25, "learning_rate": 6.434877921536838e-06, "loss": 0.3597, "step": 2385 }, { "epoch": 1.1730580137659783, "grad_norm": 29.75, "learning_rate": 6.432675493635369e-06, "loss": 0.4981, "step": 2386 }, { "epoch": 1.1735496558505407, "grad_norm": 33.25, "learning_rate": 6.430472497925357e-06, "loss": 0.4233, "step": 2387 }, { "epoch": 1.1740412979351031, "grad_norm": 15.625, "learning_rate": 6.4282689350540335e-06, "loss": 0.3146, "step": 2388 }, { "epoch": 1.1745329400196658, "grad_norm": 35.0, "learning_rate": 6.42606480566879e-06, "loss": 0.4728, "step": 2389 }, { "epoch": 1.175024582104228, "grad_norm": 31.375, "learning_rate": 6.423860110417183e-06, "loss": 0.3598, "step": 2390 }, { "epoch": 1.1755162241887906, "grad_norm": 18.625, "learning_rate": 6.421654849946944e-06, "loss": 0.4407, "step": 2391 }, { "epoch": 1.176007866273353, "grad_norm": 28.25, "learning_rate": 6.4194490249059634e-06, "loss": 0.4115, "step": 2392 }, { "epoch": 1.1764995083579155, "grad_norm": 14.1875, "learning_rate": 6.417242635942298e-06, "loss": 0.2769, "step": 2393 }, { "epoch": 1.176991150442478, "grad_norm": 22.125, "learning_rate": 6.4150356837041744e-06, "loss": 0.3551, "step": 2394 }, { "epoch": 1.1774827925270404, "grad_norm": 53.5, "learning_rate": 6.41282816883998e-06, "loss": 0.8102, "step": 2395 }, { "epoch": 1.1779744346116028, "grad_norm": 10.5, "learning_rate": 6.41062009199827e-06, "loss": 0.1735, "step": 2396 }, { "epoch": 1.1784660766961652, "grad_norm": 35.75, "learning_rate": 6.408411453827765e-06, "loss": 0.3075, "step": 2397 }, { "epoch": 1.1789577187807276, "grad_norm": 25.25, "learning_rate": 6.406202254977349e-06, "loss": 0.3203, "step": 2398 }, { "epoch": 1.17944936086529, "grad_norm": 18.25, "learning_rate": 6.403992496096071e-06, "loss": 0.3787, "step": 2399 }, { "epoch": 1.1799410029498525, "grad_norm": 22.375, "learning_rate": 6.401782177833148e-06, "loss": 0.521, "step": 2400 }, { "epoch": 1.180432645034415, "grad_norm": 10.75, "learning_rate": 6.399571300837954e-06, "loss": 0.1905, "step": 2401 }, { "epoch": 1.1809242871189773, "grad_norm": 41.25, "learning_rate": 6.397359865760033e-06, "loss": 0.4516, "step": 2402 }, { "epoch": 1.1814159292035398, "grad_norm": 23.625, "learning_rate": 6.395147873249094e-06, "loss": 0.1321, "step": 2403 }, { "epoch": 1.1819075712881022, "grad_norm": 32.5, "learning_rate": 6.392935323955004e-06, "loss": 0.3795, "step": 2404 }, { "epoch": 1.1823992133726646, "grad_norm": 14.1875, "learning_rate": 6.390722218527798e-06, "loss": 0.1634, "step": 2405 }, { "epoch": 1.182890855457227, "grad_norm": 19.5, "learning_rate": 6.388508557617673e-06, "loss": 0.156, "step": 2406 }, { "epoch": 1.1833824975417895, "grad_norm": 35.75, "learning_rate": 6.386294341874989e-06, "loss": 0.468, "step": 2407 }, { "epoch": 1.1838741396263521, "grad_norm": 28.625, "learning_rate": 6.38407957195027e-06, "loss": 0.2616, "step": 2408 }, { "epoch": 1.1843657817109143, "grad_norm": 13.9375, "learning_rate": 6.381864248494202e-06, "loss": 0.2592, "step": 2409 }, { "epoch": 1.184857423795477, "grad_norm": 15.125, "learning_rate": 6.379648372157634e-06, "loss": 0.408, "step": 2410 }, { "epoch": 1.1853490658800394, "grad_norm": 46.25, "learning_rate": 6.3774319435915785e-06, "loss": 0.3557, "step": 2411 }, { "epoch": 1.1858407079646018, "grad_norm": 11.0, "learning_rate": 6.375214963447206e-06, "loss": 0.2583, "step": 2412 }, { "epoch": 1.1863323500491643, "grad_norm": 16.5, "learning_rate": 6.372997432375854e-06, "loss": 0.2523, "step": 2413 }, { "epoch": 1.1868239921337267, "grad_norm": 16.875, "learning_rate": 6.3707793510290196e-06, "loss": 0.2183, "step": 2414 }, { "epoch": 1.1873156342182891, "grad_norm": 23.875, "learning_rate": 6.368560720058362e-06, "loss": 0.2613, "step": 2415 }, { "epoch": 1.1878072763028515, "grad_norm": 19.5, "learning_rate": 6.366341540115703e-06, "loss": 0.3606, "step": 2416 }, { "epoch": 1.188298918387414, "grad_norm": 12.8125, "learning_rate": 6.364121811853021e-06, "loss": 0.2664, "step": 2417 }, { "epoch": 1.1887905604719764, "grad_norm": 26.625, "learning_rate": 6.361901535922463e-06, "loss": 0.3268, "step": 2418 }, { "epoch": 1.1892822025565388, "grad_norm": 9.875, "learning_rate": 6.359680712976329e-06, "loss": 0.2242, "step": 2419 }, { "epoch": 1.1897738446411013, "grad_norm": 26.25, "learning_rate": 6.357459343667085e-06, "loss": 0.4287, "step": 2420 }, { "epoch": 1.1902654867256637, "grad_norm": 14.625, "learning_rate": 6.355237428647359e-06, "loss": 0.2598, "step": 2421 }, { "epoch": 1.190757128810226, "grad_norm": 19.375, "learning_rate": 6.353014968569933e-06, "loss": 0.2397, "step": 2422 }, { "epoch": 1.1912487708947885, "grad_norm": 28.625, "learning_rate": 6.350791964087753e-06, "loss": 0.3634, "step": 2423 }, { "epoch": 1.191740412979351, "grad_norm": 22.75, "learning_rate": 6.3485684158539235e-06, "loss": 0.3982, "step": 2424 }, { "epoch": 1.1922320550639134, "grad_norm": 12.4375, "learning_rate": 6.346344324521713e-06, "loss": 0.2119, "step": 2425 }, { "epoch": 1.1927236971484758, "grad_norm": 19.625, "learning_rate": 6.344119690744544e-06, "loss": 0.4075, "step": 2426 }, { "epoch": 1.1932153392330385, "grad_norm": 57.5, "learning_rate": 6.3418945151760015e-06, "loss": 0.4891, "step": 2427 }, { "epoch": 1.1937069813176007, "grad_norm": 19.0, "learning_rate": 6.3396687984698265e-06, "loss": 0.3055, "step": 2428 }, { "epoch": 1.1941986234021633, "grad_norm": 31.375, "learning_rate": 6.337442541279925e-06, "loss": 0.3964, "step": 2429 }, { "epoch": 1.1946902654867257, "grad_norm": 22.875, "learning_rate": 6.335215744260355e-06, "loss": 0.3596, "step": 2430 }, { "epoch": 1.1951819075712882, "grad_norm": 22.375, "learning_rate": 6.332988408065337e-06, "loss": 0.3578, "step": 2431 }, { "epoch": 1.1956735496558506, "grad_norm": 14.9375, "learning_rate": 6.33076053334925e-06, "loss": 0.299, "step": 2432 }, { "epoch": 1.196165191740413, "grad_norm": 28.875, "learning_rate": 6.328532120766631e-06, "loss": 0.164, "step": 2433 }, { "epoch": 1.1966568338249755, "grad_norm": 17.125, "learning_rate": 6.326303170972172e-06, "loss": 0.3119, "step": 2434 }, { "epoch": 1.1971484759095379, "grad_norm": 39.5, "learning_rate": 6.324073684620726e-06, "loss": 0.407, "step": 2435 }, { "epoch": 1.1976401179941003, "grad_norm": 21.375, "learning_rate": 6.321843662367304e-06, "loss": 0.2608, "step": 2436 }, { "epoch": 1.1981317600786627, "grad_norm": 30.75, "learning_rate": 6.319613104867072e-06, "loss": 0.3106, "step": 2437 }, { "epoch": 1.1986234021632252, "grad_norm": 14.25, "learning_rate": 6.317382012775355e-06, "loss": 0.2168, "step": 2438 }, { "epoch": 1.1991150442477876, "grad_norm": 31.125, "learning_rate": 6.315150386747635e-06, "loss": 0.3666, "step": 2439 }, { "epoch": 1.19960668633235, "grad_norm": 36.25, "learning_rate": 6.312918227439548e-06, "loss": 0.4379, "step": 2440 }, { "epoch": 1.2000983284169124, "grad_norm": 22.75, "learning_rate": 6.310685535506893e-06, "loss": 0.296, "step": 2441 }, { "epoch": 1.2005899705014749, "grad_norm": 11.3125, "learning_rate": 6.308452311605618e-06, "loss": 0.2084, "step": 2442 }, { "epoch": 1.2010816125860373, "grad_norm": 10.875, "learning_rate": 6.306218556391834e-06, "loss": 0.2107, "step": 2443 }, { "epoch": 1.2015732546705997, "grad_norm": 20.0, "learning_rate": 6.303984270521802e-06, "loss": 0.4758, "step": 2444 }, { "epoch": 1.2020648967551621, "grad_norm": 12.375, "learning_rate": 6.3017494546519445e-06, "loss": 0.152, "step": 2445 }, { "epoch": 1.2025565388397248, "grad_norm": 49.0, "learning_rate": 6.299514109438834e-06, "loss": 0.3135, "step": 2446 }, { "epoch": 1.203048180924287, "grad_norm": 11.6875, "learning_rate": 6.297278235539205e-06, "loss": 0.3062, "step": 2447 }, { "epoch": 1.2035398230088497, "grad_norm": 49.0, "learning_rate": 6.295041833609942e-06, "loss": 0.4469, "step": 2448 }, { "epoch": 1.204031465093412, "grad_norm": 30.625, "learning_rate": 6.292804904308087e-06, "loss": 0.3025, "step": 2449 }, { "epoch": 1.2045231071779745, "grad_norm": 23.75, "learning_rate": 6.290567448290836e-06, "loss": 0.2581, "step": 2450 }, { "epoch": 1.205014749262537, "grad_norm": 24.75, "learning_rate": 6.288329466215541e-06, "loss": 0.3401, "step": 2451 }, { "epoch": 1.2055063913470994, "grad_norm": 16.625, "learning_rate": 6.286090958739707e-06, "loss": 0.2341, "step": 2452 }, { "epoch": 1.2059980334316618, "grad_norm": 29.125, "learning_rate": 6.283851926520994e-06, "loss": 0.3057, "step": 2453 }, { "epoch": 1.2064896755162242, "grad_norm": 13.1875, "learning_rate": 6.281612370217219e-06, "loss": 0.1925, "step": 2454 }, { "epoch": 1.2069813176007866, "grad_norm": 42.25, "learning_rate": 6.279372290486347e-06, "loss": 0.4653, "step": 2455 }, { "epoch": 1.207472959685349, "grad_norm": 30.0, "learning_rate": 6.277131687986501e-06, "loss": 0.3762, "step": 2456 }, { "epoch": 1.2079646017699115, "grad_norm": 24.625, "learning_rate": 6.274890563375957e-06, "loss": 0.432, "step": 2457 }, { "epoch": 1.208456243854474, "grad_norm": 14.625, "learning_rate": 6.272648917313145e-06, "loss": 0.1869, "step": 2458 }, { "epoch": 1.2089478859390363, "grad_norm": 29.125, "learning_rate": 6.270406750456645e-06, "loss": 0.3304, "step": 2459 }, { "epoch": 1.2094395280235988, "grad_norm": 38.25, "learning_rate": 6.268164063465196e-06, "loss": 0.4465, "step": 2460 }, { "epoch": 1.2099311701081612, "grad_norm": 39.5, "learning_rate": 6.26592085699768e-06, "loss": 0.5258, "step": 2461 }, { "epoch": 1.2104228121927236, "grad_norm": 16.25, "learning_rate": 6.263677131713143e-06, "loss": 0.2807, "step": 2462 }, { "epoch": 1.210914454277286, "grad_norm": 41.25, "learning_rate": 6.2614328882707776e-06, "loss": 0.4747, "step": 2463 }, { "epoch": 1.2114060963618485, "grad_norm": 41.25, "learning_rate": 6.259188127329925e-06, "loss": 0.5302, "step": 2464 }, { "epoch": 1.2118977384464111, "grad_norm": 22.375, "learning_rate": 6.256942849550089e-06, "loss": 0.2964, "step": 2465 }, { "epoch": 1.2123893805309733, "grad_norm": 19.25, "learning_rate": 6.2546970555909135e-06, "loss": 0.4638, "step": 2466 }, { "epoch": 1.212881022615536, "grad_norm": 43.25, "learning_rate": 6.252450746112201e-06, "loss": 0.4355, "step": 2467 }, { "epoch": 1.2133726647000984, "grad_norm": 12.875, "learning_rate": 6.2502039217739035e-06, "loss": 0.1767, "step": 2468 }, { "epoch": 1.2138643067846608, "grad_norm": 28.625, "learning_rate": 6.247956583236126e-06, "loss": 0.2814, "step": 2469 }, { "epoch": 1.2143559488692233, "grad_norm": 22.375, "learning_rate": 6.245708731159123e-06, "loss": 0.2247, "step": 2470 }, { "epoch": 1.2148475909537857, "grad_norm": 39.25, "learning_rate": 6.2434603662033e-06, "loss": 0.5888, "step": 2471 }, { "epoch": 1.2153392330383481, "grad_norm": 9.1875, "learning_rate": 6.24121148902921e-06, "loss": 0.1711, "step": 2472 }, { "epoch": 1.2158308751229105, "grad_norm": 25.125, "learning_rate": 6.2389621002975644e-06, "loss": 0.337, "step": 2473 }, { "epoch": 1.216322517207473, "grad_norm": 35.25, "learning_rate": 6.236712200669218e-06, "loss": 0.4531, "step": 2474 }, { "epoch": 1.2168141592920354, "grad_norm": 27.5, "learning_rate": 6.234461790805176e-06, "loss": 0.1645, "step": 2475 }, { "epoch": 1.2173058013765978, "grad_norm": 34.75, "learning_rate": 6.2322108713666005e-06, "loss": 0.6523, "step": 2476 }, { "epoch": 1.2177974434611603, "grad_norm": 21.25, "learning_rate": 6.229959443014793e-06, "loss": 0.3154, "step": 2477 }, { "epoch": 1.2182890855457227, "grad_norm": 60.0, "learning_rate": 6.227707506411212e-06, "loss": 0.614, "step": 2478 }, { "epoch": 1.218780727630285, "grad_norm": 23.125, "learning_rate": 6.225455062217463e-06, "loss": 0.2745, "step": 2479 }, { "epoch": 1.2192723697148475, "grad_norm": 38.75, "learning_rate": 6.223202111095302e-06, "loss": 0.2964, "step": 2480 }, { "epoch": 1.21976401179941, "grad_norm": 16.25, "learning_rate": 6.220948653706629e-06, "loss": 0.161, "step": 2481 }, { "epoch": 1.2202556538839724, "grad_norm": 26.625, "learning_rate": 6.2186946907135e-06, "loss": 0.3934, "step": 2482 }, { "epoch": 1.2207472959685348, "grad_norm": 12.3125, "learning_rate": 6.216440222778115e-06, "loss": 0.1895, "step": 2483 }, { "epoch": 1.2212389380530975, "grad_norm": 18.875, "learning_rate": 6.214185250562821e-06, "loss": 0.3152, "step": 2484 }, { "epoch": 1.2217305801376597, "grad_norm": 38.75, "learning_rate": 6.211929774730118e-06, "loss": 0.4515, "step": 2485 }, { "epoch": 1.2222222222222223, "grad_norm": 25.5, "learning_rate": 6.209673795942651e-06, "loss": 0.5173, "step": 2486 }, { "epoch": 1.2227138643067847, "grad_norm": 16.5, "learning_rate": 6.207417314863212e-06, "loss": 0.2877, "step": 2487 }, { "epoch": 1.2232055063913472, "grad_norm": 50.75, "learning_rate": 6.205160332154742e-06, "loss": 0.4865, "step": 2488 }, { "epoch": 1.2236971484759096, "grad_norm": 23.0, "learning_rate": 6.20290284848033e-06, "loss": 0.2867, "step": 2489 }, { "epoch": 1.224188790560472, "grad_norm": 24.25, "learning_rate": 6.200644864503209e-06, "loss": 0.4252, "step": 2490 }, { "epoch": 1.2246804326450345, "grad_norm": 28.625, "learning_rate": 6.198386380886765e-06, "loss": 0.501, "step": 2491 }, { "epoch": 1.2251720747295969, "grad_norm": 29.125, "learning_rate": 6.196127398294524e-06, "loss": 0.2902, "step": 2492 }, { "epoch": 1.2256637168141593, "grad_norm": 29.875, "learning_rate": 6.193867917390163e-06, "loss": 0.3497, "step": 2493 }, { "epoch": 1.2261553588987217, "grad_norm": 19.875, "learning_rate": 6.191607938837502e-06, "loss": 0.1968, "step": 2494 }, { "epoch": 1.2266470009832842, "grad_norm": 66.0, "learning_rate": 6.189347463300512e-06, "loss": 0.4714, "step": 2495 }, { "epoch": 1.2271386430678466, "grad_norm": 18.5, "learning_rate": 6.187086491443306e-06, "loss": 0.3113, "step": 2496 }, { "epoch": 1.227630285152409, "grad_norm": 41.25, "learning_rate": 6.184825023930144e-06, "loss": 0.4219, "step": 2497 }, { "epoch": 1.2281219272369714, "grad_norm": 28.5, "learning_rate": 6.182563061425431e-06, "loss": 0.4125, "step": 2498 }, { "epoch": 1.2286135693215339, "grad_norm": 25.0, "learning_rate": 6.180300604593719e-06, "loss": 0.2877, "step": 2499 }, { "epoch": 1.2291052114060963, "grad_norm": 31.5, "learning_rate": 6.178037654099704e-06, "loss": 0.3945, "step": 2500 }, { "epoch": 1.2295968534906587, "grad_norm": 26.625, "learning_rate": 6.175774210608228e-06, "loss": 0.4194, "step": 2501 }, { "epoch": 1.2300884955752212, "grad_norm": 30.0, "learning_rate": 6.1735102747842755e-06, "loss": 0.3274, "step": 2502 }, { "epoch": 1.2305801376597838, "grad_norm": 24.5, "learning_rate": 6.1712458472929805e-06, "loss": 0.2899, "step": 2503 }, { "epoch": 1.231071779744346, "grad_norm": 11.4375, "learning_rate": 6.168980928799616e-06, "loss": 0.3059, "step": 2504 }, { "epoch": 1.2315634218289087, "grad_norm": 13.625, "learning_rate": 6.1667155199696e-06, "loss": 0.2896, "step": 2505 }, { "epoch": 1.232055063913471, "grad_norm": 22.0, "learning_rate": 6.1644496214685e-06, "loss": 0.2899, "step": 2506 }, { "epoch": 1.2325467059980335, "grad_norm": 22.0, "learning_rate": 6.162183233962022e-06, "loss": 0.3725, "step": 2507 }, { "epoch": 1.233038348082596, "grad_norm": 21.0, "learning_rate": 6.159916358116015e-06, "loss": 0.3374, "step": 2508 }, { "epoch": 1.2335299901671584, "grad_norm": 18.75, "learning_rate": 6.157648994596475e-06, "loss": 0.2225, "step": 2509 }, { "epoch": 1.2340216322517208, "grad_norm": 17.5, "learning_rate": 6.15538114406954e-06, "loss": 0.3072, "step": 2510 }, { "epoch": 1.2345132743362832, "grad_norm": 27.75, "learning_rate": 6.153112807201493e-06, "loss": 0.2176, "step": 2511 }, { "epoch": 1.2350049164208456, "grad_norm": 51.75, "learning_rate": 6.150843984658754e-06, "loss": 0.5155, "step": 2512 }, { "epoch": 1.235496558505408, "grad_norm": 25.625, "learning_rate": 6.148574677107893e-06, "loss": 0.291, "step": 2513 }, { "epoch": 1.2359882005899705, "grad_norm": 34.5, "learning_rate": 6.146304885215617e-06, "loss": 0.4341, "step": 2514 }, { "epoch": 1.236479842674533, "grad_norm": 15.375, "learning_rate": 6.1440346096487786e-06, "loss": 0.4219, "step": 2515 }, { "epoch": 1.2369714847590954, "grad_norm": 28.0, "learning_rate": 6.141763851074368e-06, "loss": 0.3287, "step": 2516 }, { "epoch": 1.2374631268436578, "grad_norm": 20.0, "learning_rate": 6.139492610159526e-06, "loss": 0.3606, "step": 2517 }, { "epoch": 1.2379547689282202, "grad_norm": 38.0, "learning_rate": 6.137220887571526e-06, "loss": 0.5863, "step": 2518 }, { "epoch": 1.2384464110127826, "grad_norm": 31.125, "learning_rate": 6.134948683977786e-06, "loss": 0.4759, "step": 2519 }, { "epoch": 1.238938053097345, "grad_norm": 14.9375, "learning_rate": 6.132676000045868e-06, "loss": 0.2815, "step": 2520 }, { "epoch": 1.2394296951819075, "grad_norm": 23.0, "learning_rate": 6.130402836443472e-06, "loss": 0.5403, "step": 2521 }, { "epoch": 1.23992133726647, "grad_norm": 17.875, "learning_rate": 6.128129193838439e-06, "loss": 0.2389, "step": 2522 }, { "epoch": 1.2404129793510323, "grad_norm": 10.5, "learning_rate": 6.125855072898754e-06, "loss": 0.2691, "step": 2523 }, { "epoch": 1.240904621435595, "grad_norm": 12.9375, "learning_rate": 6.123580474292538e-06, "loss": 0.3028, "step": 2524 }, { "epoch": 1.2413962635201574, "grad_norm": 26.5, "learning_rate": 6.1213053986880555e-06, "loss": 0.2942, "step": 2525 }, { "epoch": 1.2418879056047198, "grad_norm": 21.0, "learning_rate": 6.119029846753711e-06, "loss": 0.3828, "step": 2526 }, { "epoch": 1.2423795476892823, "grad_norm": 31.0, "learning_rate": 6.116753819158046e-06, "loss": 0.3101, "step": 2527 }, { "epoch": 1.2428711897738447, "grad_norm": 17.875, "learning_rate": 6.114477316569746e-06, "loss": 0.2321, "step": 2528 }, { "epoch": 1.2433628318584071, "grad_norm": 16.0, "learning_rate": 6.1122003396576315e-06, "loss": 0.2888, "step": 2529 }, { "epoch": 1.2438544739429696, "grad_norm": 13.5, "learning_rate": 6.109922889090667e-06, "loss": 0.3638, "step": 2530 }, { "epoch": 1.244346116027532, "grad_norm": 9.625, "learning_rate": 6.1076449655379525e-06, "loss": 0.2917, "step": 2531 }, { "epoch": 1.2448377581120944, "grad_norm": 18.5, "learning_rate": 6.1053665696687285e-06, "loss": 0.0979, "step": 2532 }, { "epoch": 1.2453294001966568, "grad_norm": 23.75, "learning_rate": 6.103087702152377e-06, "loss": 0.3267, "step": 2533 }, { "epoch": 1.2458210422812193, "grad_norm": 21.75, "learning_rate": 6.10080836365841e-06, "loss": 0.316, "step": 2534 }, { "epoch": 1.2463126843657817, "grad_norm": 18.0, "learning_rate": 6.098528554856488e-06, "loss": 0.2449, "step": 2535 }, { "epoch": 1.2468043264503441, "grad_norm": 26.125, "learning_rate": 6.096248276416404e-06, "loss": 0.3644, "step": 2536 }, { "epoch": 1.2472959685349065, "grad_norm": 23.875, "learning_rate": 6.093967529008091e-06, "loss": 0.3398, "step": 2537 }, { "epoch": 1.247787610619469, "grad_norm": 24.0, "learning_rate": 6.091686313301616e-06, "loss": 0.2273, "step": 2538 }, { "epoch": 1.2482792527040314, "grad_norm": 24.625, "learning_rate": 6.089404629967191e-06, "loss": 0.2142, "step": 2539 }, { "epoch": 1.2487708947885938, "grad_norm": 28.25, "learning_rate": 6.087122479675157e-06, "loss": 0.3499, "step": 2540 }, { "epoch": 1.2492625368731562, "grad_norm": 28.0, "learning_rate": 6.084839863096e-06, "loss": 0.4229, "step": 2541 }, { "epoch": 1.2497541789577187, "grad_norm": 15.0, "learning_rate": 6.082556780900335e-06, "loss": 0.3435, "step": 2542 }, { "epoch": 1.2502458210422813, "grad_norm": 12.25, "learning_rate": 6.080273233758921e-06, "loss": 0.1997, "step": 2543 }, { "epoch": 1.2507374631268435, "grad_norm": 35.75, "learning_rate": 6.0779892223426496e-06, "loss": 0.5426, "step": 2544 }, { "epoch": 1.2512291052114062, "grad_norm": 26.375, "learning_rate": 6.075704747322548e-06, "loss": 0.3488, "step": 2545 }, { "epoch": 1.2517207472959686, "grad_norm": 10.25, "learning_rate": 6.0734198093697844e-06, "loss": 0.3128, "step": 2546 }, { "epoch": 1.252212389380531, "grad_norm": 16.875, "learning_rate": 6.071134409155658e-06, "loss": 0.3041, "step": 2547 }, { "epoch": 1.2527040314650935, "grad_norm": 19.75, "learning_rate": 6.068848547351609e-06, "loss": 0.3378, "step": 2548 }, { "epoch": 1.2531956735496559, "grad_norm": 9.375, "learning_rate": 6.066562224629204e-06, "loss": 0.21, "step": 2549 }, { "epoch": 1.2536873156342183, "grad_norm": 12.75, "learning_rate": 6.0642754416601545e-06, "loss": 0.2207, "step": 2550 }, { "epoch": 1.2541789577187807, "grad_norm": 27.0, "learning_rate": 6.061988199116305e-06, "loss": 0.4831, "step": 2551 }, { "epoch": 1.2546705998033432, "grad_norm": 22.125, "learning_rate": 6.059700497669631e-06, "loss": 0.3046, "step": 2552 }, { "epoch": 1.2551622418879056, "grad_norm": 12.3125, "learning_rate": 6.057412337992247e-06, "loss": 0.2318, "step": 2553 }, { "epoch": 1.255653883972468, "grad_norm": 34.75, "learning_rate": 6.055123720756402e-06, "loss": 0.63, "step": 2554 }, { "epoch": 1.2561455260570304, "grad_norm": 21.75, "learning_rate": 6.052834646634475e-06, "loss": 0.3564, "step": 2555 }, { "epoch": 1.2566371681415929, "grad_norm": 32.75, "learning_rate": 6.050545116298983e-06, "loss": 0.2576, "step": 2556 }, { "epoch": 1.2571288102261553, "grad_norm": 62.5, "learning_rate": 6.048255130422579e-06, "loss": 0.5183, "step": 2557 }, { "epoch": 1.2576204523107177, "grad_norm": 26.5, "learning_rate": 6.045964689678045e-06, "loss": 0.4148, "step": 2558 }, { "epoch": 1.2581120943952802, "grad_norm": 15.75, "learning_rate": 6.043673794738298e-06, "loss": 0.2484, "step": 2559 }, { "epoch": 1.2586037364798428, "grad_norm": 20.25, "learning_rate": 6.0413824462763914e-06, "loss": 0.3071, "step": 2560 }, { "epoch": 1.259095378564405, "grad_norm": 41.5, "learning_rate": 6.03909064496551e-06, "loss": 0.332, "step": 2561 }, { "epoch": 1.2595870206489677, "grad_norm": 15.4375, "learning_rate": 6.036798391478969e-06, "loss": 0.3051, "step": 2562 }, { "epoch": 1.2600786627335299, "grad_norm": 28.125, "learning_rate": 6.03450568649022e-06, "loss": 0.3361, "step": 2563 }, { "epoch": 1.2605703048180925, "grad_norm": 33.0, "learning_rate": 6.032212530672847e-06, "loss": 0.4897, "step": 2564 }, { "epoch": 1.261061946902655, "grad_norm": 19.375, "learning_rate": 6.029918924700564e-06, "loss": 0.3398, "step": 2565 }, { "epoch": 1.2615535889872174, "grad_norm": 15.125, "learning_rate": 6.027624869247219e-06, "loss": 0.2229, "step": 2566 }, { "epoch": 1.2620452310717798, "grad_norm": 37.25, "learning_rate": 6.0253303649867936e-06, "loss": 0.5887, "step": 2567 }, { "epoch": 1.2625368731563422, "grad_norm": 26.125, "learning_rate": 6.023035412593397e-06, "loss": 0.2425, "step": 2568 }, { "epoch": 1.2630285152409046, "grad_norm": 29.0, "learning_rate": 6.020740012741273e-06, "loss": 0.277, "step": 2569 }, { "epoch": 1.263520157325467, "grad_norm": 13.625, "learning_rate": 6.0184441661047985e-06, "loss": 0.3392, "step": 2570 }, { "epoch": 1.2640117994100295, "grad_norm": 27.125, "learning_rate": 6.016147873358478e-06, "loss": 0.3252, "step": 2571 }, { "epoch": 1.264503441494592, "grad_norm": 25.0, "learning_rate": 6.013851135176947e-06, "loss": 0.5461, "step": 2572 }, { "epoch": 1.2649950835791544, "grad_norm": 45.0, "learning_rate": 6.011553952234977e-06, "loss": 0.3994, "step": 2573 }, { "epoch": 1.2654867256637168, "grad_norm": 30.0, "learning_rate": 6.009256325207464e-06, "loss": 0.2966, "step": 2574 }, { "epoch": 1.2659783677482792, "grad_norm": 31.25, "learning_rate": 6.006958254769438e-06, "loss": 0.3215, "step": 2575 }, { "epoch": 1.2664700098328416, "grad_norm": 18.0, "learning_rate": 6.004659741596058e-06, "loss": 0.25, "step": 2576 }, { "epoch": 1.266961651917404, "grad_norm": 38.5, "learning_rate": 6.002360786362615e-06, "loss": 0.4625, "step": 2577 }, { "epoch": 1.2674532940019665, "grad_norm": 30.25, "learning_rate": 6.000061389744526e-06, "loss": 0.372, "step": 2578 }, { "epoch": 1.2679449360865291, "grad_norm": 52.0, "learning_rate": 5.9977615524173415e-06, "loss": 0.4515, "step": 2579 }, { "epoch": 1.2684365781710913, "grad_norm": 29.75, "learning_rate": 5.99546127505674e-06, "loss": 0.4661, "step": 2580 }, { "epoch": 1.268928220255654, "grad_norm": 38.5, "learning_rate": 5.993160558338529e-06, "loss": 0.4218, "step": 2581 }, { "epoch": 1.2694198623402162, "grad_norm": 24.5, "learning_rate": 5.990859402938646e-06, "loss": 0.3654, "step": 2582 }, { "epoch": 1.2699115044247788, "grad_norm": 12.1875, "learning_rate": 5.988557809533155e-06, "loss": 0.2558, "step": 2583 }, { "epoch": 1.2704031465093413, "grad_norm": 30.125, "learning_rate": 5.986255778798252e-06, "loss": 0.2144, "step": 2584 }, { "epoch": 1.2708947885939037, "grad_norm": 18.75, "learning_rate": 5.98395331141026e-06, "loss": 0.3055, "step": 2585 }, { "epoch": 1.2713864306784661, "grad_norm": 25.0, "learning_rate": 5.9816504080456275e-06, "loss": 0.4688, "step": 2586 }, { "epoch": 1.2718780727630286, "grad_norm": 18.0, "learning_rate": 5.979347069380938e-06, "loss": 0.2482, "step": 2587 }, { "epoch": 1.272369714847591, "grad_norm": 24.25, "learning_rate": 5.977043296092897e-06, "loss": 0.4529, "step": 2588 }, { "epoch": 1.2728613569321534, "grad_norm": 18.25, "learning_rate": 5.974739088858337e-06, "loss": 0.1608, "step": 2589 }, { "epoch": 1.2733529990167158, "grad_norm": 59.75, "learning_rate": 5.972434448354223e-06, "loss": 0.3513, "step": 2590 }, { "epoch": 1.2738446411012783, "grad_norm": 40.0, "learning_rate": 5.970129375257644e-06, "loss": 0.4019, "step": 2591 }, { "epoch": 1.2743362831858407, "grad_norm": 20.25, "learning_rate": 5.967823870245818e-06, "loss": 0.2873, "step": 2592 }, { "epoch": 1.2748279252704031, "grad_norm": 16.25, "learning_rate": 5.965517933996085e-06, "loss": 0.4039, "step": 2593 }, { "epoch": 1.2753195673549655, "grad_norm": 21.0, "learning_rate": 5.963211567185919e-06, "loss": 0.3434, "step": 2594 }, { "epoch": 1.275811209439528, "grad_norm": 21.625, "learning_rate": 5.960904770492915e-06, "loss": 0.2394, "step": 2595 }, { "epoch": 1.2763028515240904, "grad_norm": 33.25, "learning_rate": 5.958597544594797e-06, "loss": 0.4448, "step": 2596 }, { "epoch": 1.2767944936086528, "grad_norm": 15.0625, "learning_rate": 5.956289890169413e-06, "loss": 0.1796, "step": 2597 }, { "epoch": 1.2772861356932155, "grad_norm": 25.75, "learning_rate": 5.95398180789474e-06, "loss": 0.2737, "step": 2598 }, { "epoch": 1.2777777777777777, "grad_norm": 24.875, "learning_rate": 5.951673298448877e-06, "loss": 0.3606, "step": 2599 }, { "epoch": 1.2782694198623403, "grad_norm": 21.375, "learning_rate": 5.949364362510052e-06, "loss": 0.2875, "step": 2600 }, { "epoch": 1.2787610619469025, "grad_norm": 21.625, "learning_rate": 5.9470550007566135e-06, "loss": 0.2649, "step": 2601 }, { "epoch": 1.2792527040314652, "grad_norm": 17.375, "learning_rate": 5.944745213867043e-06, "loss": 0.339, "step": 2602 }, { "epoch": 1.2797443461160276, "grad_norm": 25.625, "learning_rate": 5.942435002519937e-06, "loss": 0.3899, "step": 2603 }, { "epoch": 1.28023598820059, "grad_norm": 12.375, "learning_rate": 5.9401243673940246e-06, "loss": 0.1969, "step": 2604 }, { "epoch": 1.2807276302851525, "grad_norm": 41.25, "learning_rate": 5.937813309168159e-06, "loss": 0.4011, "step": 2605 }, { "epoch": 1.281219272369715, "grad_norm": 18.75, "learning_rate": 5.93550182852131e-06, "loss": 0.3039, "step": 2606 }, { "epoch": 1.2817109144542773, "grad_norm": 17.125, "learning_rate": 5.9331899261325804e-06, "loss": 0.1809, "step": 2607 }, { "epoch": 1.2822025565388397, "grad_norm": 20.125, "learning_rate": 5.9308776026811925e-06, "loss": 0.3479, "step": 2608 }, { "epoch": 1.2826941986234022, "grad_norm": 19.375, "learning_rate": 5.928564858846494e-06, "loss": 0.2338, "step": 2609 }, { "epoch": 1.2831858407079646, "grad_norm": 34.0, "learning_rate": 5.926251695307953e-06, "loss": 0.4087, "step": 2610 }, { "epoch": 1.283677482792527, "grad_norm": 12.8125, "learning_rate": 5.923938112745164e-06, "loss": 0.2991, "step": 2611 }, { "epoch": 1.2841691248770895, "grad_norm": 38.5, "learning_rate": 5.921624111837846e-06, "loss": 0.31, "step": 2612 }, { "epoch": 1.2846607669616519, "grad_norm": 11.1875, "learning_rate": 5.919309693265836e-06, "loss": 0.1231, "step": 2613 }, { "epoch": 1.2851524090462143, "grad_norm": 17.25, "learning_rate": 5.916994857709098e-06, "loss": 0.2675, "step": 2614 }, { "epoch": 1.2856440511307767, "grad_norm": 15.8125, "learning_rate": 5.9146796058477156e-06, "loss": 0.2722, "step": 2615 }, { "epoch": 1.2861356932153392, "grad_norm": 29.5, "learning_rate": 5.9123639383618954e-06, "loss": 0.3982, "step": 2616 }, { "epoch": 1.2866273352999018, "grad_norm": 32.0, "learning_rate": 5.91004785593197e-06, "loss": 0.3477, "step": 2617 }, { "epoch": 1.287118977384464, "grad_norm": 34.0, "learning_rate": 5.907731359238389e-06, "loss": 0.3071, "step": 2618 }, { "epoch": 1.2876106194690267, "grad_norm": 47.25, "learning_rate": 5.905414448961723e-06, "loss": 0.5416, "step": 2619 }, { "epoch": 1.2881022615535889, "grad_norm": 19.375, "learning_rate": 5.903097125782669e-06, "loss": 0.3118, "step": 2620 }, { "epoch": 1.2885939036381515, "grad_norm": 38.0, "learning_rate": 5.900779390382042e-06, "loss": 0.3035, "step": 2621 }, { "epoch": 1.289085545722714, "grad_norm": 23.25, "learning_rate": 5.8984612434407795e-06, "loss": 0.2336, "step": 2622 }, { "epoch": 1.2895771878072764, "grad_norm": 34.5, "learning_rate": 5.896142685639938e-06, "loss": 0.331, "step": 2623 }, { "epoch": 1.2900688298918388, "grad_norm": 24.375, "learning_rate": 5.893823717660697e-06, "loss": 0.4061, "step": 2624 }, { "epoch": 1.2905604719764012, "grad_norm": 19.125, "learning_rate": 5.891504340184355e-06, "loss": 0.3322, "step": 2625 }, { "epoch": 1.2910521140609637, "grad_norm": 37.25, "learning_rate": 5.88918455389233e-06, "loss": 0.3163, "step": 2626 }, { "epoch": 1.291543756145526, "grad_norm": 32.0, "learning_rate": 5.886864359466165e-06, "loss": 0.3093, "step": 2627 }, { "epoch": 1.2920353982300885, "grad_norm": 34.25, "learning_rate": 5.884543757587516e-06, "loss": 0.4218, "step": 2628 }, { "epoch": 1.292527040314651, "grad_norm": 15.6875, "learning_rate": 5.882222748938164e-06, "loss": 0.3657, "step": 2629 }, { "epoch": 1.2930186823992134, "grad_norm": 29.375, "learning_rate": 5.879901334200005e-06, "loss": 0.3048, "step": 2630 }, { "epoch": 1.2935103244837758, "grad_norm": 25.625, "learning_rate": 5.877579514055059e-06, "loss": 0.2796, "step": 2631 }, { "epoch": 1.2940019665683382, "grad_norm": 13.375, "learning_rate": 5.875257289185462e-06, "loss": 0.2394, "step": 2632 }, { "epoch": 1.2944936086529006, "grad_norm": 38.5, "learning_rate": 5.872934660273471e-06, "loss": 0.6203, "step": 2633 }, { "epoch": 1.294985250737463, "grad_norm": 31.25, "learning_rate": 5.870611628001458e-06, "loss": 0.2507, "step": 2634 }, { "epoch": 1.2954768928220255, "grad_norm": 21.75, "learning_rate": 5.86828819305192e-06, "loss": 0.2677, "step": 2635 }, { "epoch": 1.295968534906588, "grad_norm": 23.0, "learning_rate": 5.865964356107465e-06, "loss": 0.3281, "step": 2636 }, { "epoch": 1.2964601769911503, "grad_norm": 21.25, "learning_rate": 5.8636401178508215e-06, "loss": 0.3641, "step": 2637 }, { "epoch": 1.296951819075713, "grad_norm": 31.0, "learning_rate": 5.861315478964841e-06, "loss": 0.3952, "step": 2638 }, { "epoch": 1.2974434611602752, "grad_norm": 18.0, "learning_rate": 5.858990440132487e-06, "loss": 0.2399, "step": 2639 }, { "epoch": 1.2979351032448379, "grad_norm": 12.5625, "learning_rate": 5.85666500203684e-06, "loss": 0.2023, "step": 2640 }, { "epoch": 1.2984267453294003, "grad_norm": 23.5, "learning_rate": 5.854339165361101e-06, "loss": 0.3552, "step": 2641 }, { "epoch": 1.2989183874139627, "grad_norm": 18.5, "learning_rate": 5.852012930788589e-06, "loss": 0.1967, "step": 2642 }, { "epoch": 1.2994100294985251, "grad_norm": 19.375, "learning_rate": 5.849686299002734e-06, "loss": 0.3178, "step": 2643 }, { "epoch": 1.2999016715830876, "grad_norm": 47.25, "learning_rate": 5.84735927068709e-06, "loss": 0.5885, "step": 2644 }, { "epoch": 1.30039331366765, "grad_norm": 32.5, "learning_rate": 5.845031846525321e-06, "loss": 0.3636, "step": 2645 }, { "epoch": 1.3008849557522124, "grad_norm": 8.6875, "learning_rate": 5.842704027201214e-06, "loss": 0.0813, "step": 2646 }, { "epoch": 1.3013765978367748, "grad_norm": 18.875, "learning_rate": 5.840375813398666e-06, "loss": 0.2934, "step": 2647 }, { "epoch": 1.3018682399213373, "grad_norm": 18.75, "learning_rate": 5.838047205801691e-06, "loss": 0.2565, "step": 2648 }, { "epoch": 1.3023598820058997, "grad_norm": 15.875, "learning_rate": 5.835718205094424e-06, "loss": 0.3873, "step": 2649 }, { "epoch": 1.3028515240904621, "grad_norm": 20.0, "learning_rate": 5.833388811961108e-06, "loss": 0.3604, "step": 2650 }, { "epoch": 1.3033431661750245, "grad_norm": 24.0, "learning_rate": 5.831059027086107e-06, "loss": 0.1362, "step": 2651 }, { "epoch": 1.303834808259587, "grad_norm": 23.875, "learning_rate": 5.828728851153897e-06, "loss": 0.3459, "step": 2652 }, { "epoch": 1.3043264503441494, "grad_norm": 28.625, "learning_rate": 5.826398284849069e-06, "loss": 0.3565, "step": 2653 }, { "epoch": 1.3048180924287118, "grad_norm": 28.375, "learning_rate": 5.8240673288563324e-06, "loss": 0.3669, "step": 2654 }, { "epoch": 1.3053097345132743, "grad_norm": 19.75, "learning_rate": 5.8217359838605044e-06, "loss": 0.1709, "step": 2655 }, { "epoch": 1.3058013765978367, "grad_norm": 28.5, "learning_rate": 5.819404250546522e-06, "loss": 0.3054, "step": 2656 }, { "epoch": 1.3062930186823993, "grad_norm": 43.75, "learning_rate": 5.817072129599434e-06, "loss": 0.4107, "step": 2657 }, { "epoch": 1.3067846607669615, "grad_norm": 44.0, "learning_rate": 5.814739621704406e-06, "loss": 0.5952, "step": 2658 }, { "epoch": 1.3072763028515242, "grad_norm": 48.0, "learning_rate": 5.812406727546712e-06, "loss": 0.4493, "step": 2659 }, { "epoch": 1.3077679449360864, "grad_norm": 29.0, "learning_rate": 5.8100734478117434e-06, "loss": 0.31, "step": 2660 }, { "epoch": 1.308259587020649, "grad_norm": 20.5, "learning_rate": 5.807739783185005e-06, "loss": 0.2628, "step": 2661 }, { "epoch": 1.3087512291052115, "grad_norm": 15.0, "learning_rate": 5.8054057343521135e-06, "loss": 0.3898, "step": 2662 }, { "epoch": 1.309242871189774, "grad_norm": 20.25, "learning_rate": 5.8030713019987945e-06, "loss": 0.2757, "step": 2663 }, { "epoch": 1.3097345132743363, "grad_norm": 22.625, "learning_rate": 5.800736486810896e-06, "loss": 0.4283, "step": 2664 }, { "epoch": 1.3102261553588987, "grad_norm": 20.625, "learning_rate": 5.798401289474369e-06, "loss": 0.3203, "step": 2665 }, { "epoch": 1.3107177974434612, "grad_norm": 24.0, "learning_rate": 5.796065710675282e-06, "loss": 0.4062, "step": 2666 }, { "epoch": 1.3112094395280236, "grad_norm": 23.625, "learning_rate": 5.793729751099814e-06, "loss": 0.4722, "step": 2667 }, { "epoch": 1.311701081612586, "grad_norm": 33.0, "learning_rate": 5.791393411434256e-06, "loss": 0.4189, "step": 2668 }, { "epoch": 1.3121927236971485, "grad_norm": 27.75, "learning_rate": 5.78905669236501e-06, "loss": 0.4916, "step": 2669 }, { "epoch": 1.3126843657817109, "grad_norm": 9.0625, "learning_rate": 5.786719594578591e-06, "loss": 0.3819, "step": 2670 }, { "epoch": 1.3131760078662733, "grad_norm": 28.5, "learning_rate": 5.784382118761624e-06, "loss": 0.2429, "step": 2671 }, { "epoch": 1.3136676499508357, "grad_norm": 27.375, "learning_rate": 5.782044265600846e-06, "loss": 0.3003, "step": 2672 }, { "epoch": 1.3141592920353982, "grad_norm": 14.875, "learning_rate": 5.779706035783104e-06, "loss": 0.2438, "step": 2673 }, { "epoch": 1.3146509341199606, "grad_norm": 34.75, "learning_rate": 5.7773674299953555e-06, "loss": 0.5488, "step": 2674 }, { "epoch": 1.315142576204523, "grad_norm": 25.25, "learning_rate": 5.775028448924668e-06, "loss": 0.5347, "step": 2675 }, { "epoch": 1.3156342182890857, "grad_norm": 23.5, "learning_rate": 5.772689093258224e-06, "loss": 0.4457, "step": 2676 }, { "epoch": 1.3161258603736479, "grad_norm": 27.5, "learning_rate": 5.770349363683309e-06, "loss": 0.445, "step": 2677 }, { "epoch": 1.3166175024582105, "grad_norm": 21.125, "learning_rate": 5.768009260887321e-06, "loss": 0.3332, "step": 2678 }, { "epoch": 1.3171091445427727, "grad_norm": 45.75, "learning_rate": 5.765668785557771e-06, "loss": 0.5679, "step": 2679 }, { "epoch": 1.3176007866273354, "grad_norm": 16.25, "learning_rate": 5.763327938382274e-06, "loss": 0.2355, "step": 2680 }, { "epoch": 1.3180924287118978, "grad_norm": 23.25, "learning_rate": 5.760986720048559e-06, "loss": 0.411, "step": 2681 }, { "epoch": 1.3185840707964602, "grad_norm": 24.125, "learning_rate": 5.758645131244462e-06, "loss": 0.2856, "step": 2682 }, { "epoch": 1.3190757128810227, "grad_norm": 31.5, "learning_rate": 5.756303172657926e-06, "loss": 0.5233, "step": 2683 }, { "epoch": 1.319567354965585, "grad_norm": 27.75, "learning_rate": 5.753960844977007e-06, "loss": 0.389, "step": 2684 }, { "epoch": 1.3200589970501475, "grad_norm": 15.625, "learning_rate": 5.7516181488898635e-06, "loss": 0.2524, "step": 2685 }, { "epoch": 1.32055063913471, "grad_norm": 12.875, "learning_rate": 5.749275085084769e-06, "loss": 0.347, "step": 2686 }, { "epoch": 1.3210422812192724, "grad_norm": 12.375, "learning_rate": 5.746931654250101e-06, "loss": 0.2806, "step": 2687 }, { "epoch": 1.3215339233038348, "grad_norm": 12.6875, "learning_rate": 5.7445878570743436e-06, "loss": 0.1907, "step": 2688 }, { "epoch": 1.3220255653883972, "grad_norm": 28.25, "learning_rate": 5.742243694246091e-06, "loss": 0.3631, "step": 2689 }, { "epoch": 1.3225172074729596, "grad_norm": 16.625, "learning_rate": 5.739899166454045e-06, "loss": 0.2719, "step": 2690 }, { "epoch": 1.323008849557522, "grad_norm": 10.4375, "learning_rate": 5.737554274387015e-06, "loss": 0.2212, "step": 2691 }, { "epoch": 1.3235004916420845, "grad_norm": 27.875, "learning_rate": 5.735209018733914e-06, "loss": 0.4047, "step": 2692 }, { "epoch": 1.323992133726647, "grad_norm": 27.125, "learning_rate": 5.732863400183767e-06, "loss": 0.3595, "step": 2693 }, { "epoch": 1.3244837758112094, "grad_norm": 34.25, "learning_rate": 5.7305174194256995e-06, "loss": 0.3775, "step": 2694 }, { "epoch": 1.324975417895772, "grad_norm": 15.6875, "learning_rate": 5.7281710771489495e-06, "loss": 0.3047, "step": 2695 }, { "epoch": 1.3254670599803342, "grad_norm": 41.25, "learning_rate": 5.725824374042855e-06, "loss": 0.6567, "step": 2696 }, { "epoch": 1.3259587020648969, "grad_norm": 26.375, "learning_rate": 5.723477310796868e-06, "loss": 0.412, "step": 2697 }, { "epoch": 1.326450344149459, "grad_norm": 11.5, "learning_rate": 5.721129888100537e-06, "loss": 0.1332, "step": 2698 }, { "epoch": 1.3269419862340217, "grad_norm": 58.0, "learning_rate": 5.718782106643523e-06, "loss": 0.6234, "step": 2699 }, { "epoch": 1.3274336283185841, "grad_norm": 13.9375, "learning_rate": 5.716433967115591e-06, "loss": 0.1796, "step": 2700 }, { "epoch": 1.3279252704031466, "grad_norm": 11.6875, "learning_rate": 5.714085470206609e-06, "loss": 0.2463, "step": 2701 }, { "epoch": 1.328416912487709, "grad_norm": 30.25, "learning_rate": 5.711736616606552e-06, "loss": 0.3028, "step": 2702 }, { "epoch": 1.3289085545722714, "grad_norm": 25.25, "learning_rate": 5.7093874070055e-06, "loss": 0.3372, "step": 2703 }, { "epoch": 1.3294001966568338, "grad_norm": 27.5, "learning_rate": 5.707037842093635e-06, "loss": 0.4809, "step": 2704 }, { "epoch": 1.3298918387413963, "grad_norm": 10.6875, "learning_rate": 5.704687922561246e-06, "loss": 0.2192, "step": 2705 }, { "epoch": 1.3303834808259587, "grad_norm": 14.4375, "learning_rate": 5.702337649098727e-06, "loss": 0.3256, "step": 2706 }, { "epoch": 1.3308751229105211, "grad_norm": 26.625, "learning_rate": 5.69998702239657e-06, "loss": 0.3618, "step": 2707 }, { "epoch": 1.3313667649950836, "grad_norm": 28.0, "learning_rate": 5.69763604314538e-06, "loss": 0.4006, "step": 2708 }, { "epoch": 1.331858407079646, "grad_norm": 20.375, "learning_rate": 5.695284712035858e-06, "loss": 0.3667, "step": 2709 }, { "epoch": 1.3323500491642084, "grad_norm": 38.5, "learning_rate": 5.692933029758812e-06, "loss": 0.5549, "step": 2710 }, { "epoch": 1.3328416912487708, "grad_norm": 19.625, "learning_rate": 5.690580997005151e-06, "loss": 0.3349, "step": 2711 }, { "epoch": 1.3333333333333333, "grad_norm": 20.25, "learning_rate": 5.688228614465889e-06, "loss": 0.3331, "step": 2712 }, { "epoch": 1.3338249754178957, "grad_norm": 20.0, "learning_rate": 5.685875882832143e-06, "loss": 0.3682, "step": 2713 }, { "epoch": 1.3343166175024583, "grad_norm": 36.5, "learning_rate": 5.683522802795131e-06, "loss": 0.3611, "step": 2714 }, { "epoch": 1.3348082595870205, "grad_norm": 24.25, "learning_rate": 5.681169375046172e-06, "loss": 0.3781, "step": 2715 }, { "epoch": 1.3352999016715832, "grad_norm": 21.5, "learning_rate": 5.678815600276691e-06, "loss": 0.377, "step": 2716 }, { "epoch": 1.3357915437561454, "grad_norm": 13.0625, "learning_rate": 5.6764614791782135e-06, "loss": 0.2469, "step": 2717 }, { "epoch": 1.336283185840708, "grad_norm": 52.0, "learning_rate": 5.674107012442363e-06, "loss": 0.6448, "step": 2718 }, { "epoch": 1.3367748279252705, "grad_norm": 23.875, "learning_rate": 5.671752200760871e-06, "loss": 0.441, "step": 2719 }, { "epoch": 1.337266470009833, "grad_norm": 34.25, "learning_rate": 5.669397044825567e-06, "loss": 0.5331, "step": 2720 }, { "epoch": 1.3377581120943953, "grad_norm": 26.25, "learning_rate": 5.667041545328381e-06, "loss": 0.2948, "step": 2721 }, { "epoch": 1.3382497541789578, "grad_norm": 16.875, "learning_rate": 5.664685702961344e-06, "loss": 0.3199, "step": 2722 }, { "epoch": 1.3387413962635202, "grad_norm": 35.75, "learning_rate": 5.662329518416589e-06, "loss": 0.4653, "step": 2723 }, { "epoch": 1.3392330383480826, "grad_norm": 18.25, "learning_rate": 5.65997299238635e-06, "loss": 0.2579, "step": 2724 }, { "epoch": 1.339724680432645, "grad_norm": 47.75, "learning_rate": 5.657616125562959e-06, "loss": 0.5806, "step": 2725 }, { "epoch": 1.3402163225172075, "grad_norm": 31.5, "learning_rate": 5.655258918638848e-06, "loss": 0.4421, "step": 2726 }, { "epoch": 1.3407079646017699, "grad_norm": 26.5, "learning_rate": 5.652901372306555e-06, "loss": 0.2821, "step": 2727 }, { "epoch": 1.3411996066863323, "grad_norm": 10.375, "learning_rate": 5.650543487258709e-06, "loss": 0.3018, "step": 2728 }, { "epoch": 1.3416912487708947, "grad_norm": 22.25, "learning_rate": 5.6481852641880425e-06, "loss": 0.2035, "step": 2729 }, { "epoch": 1.3421828908554572, "grad_norm": 26.0, "learning_rate": 5.64582670378739e-06, "loss": 0.3197, "step": 2730 }, { "epoch": 1.3426745329400196, "grad_norm": 21.75, "learning_rate": 5.6434678067496815e-06, "loss": 0.289, "step": 2731 }, { "epoch": 1.343166175024582, "grad_norm": 17.5, "learning_rate": 5.641108573767946e-06, "loss": 0.3454, "step": 2732 }, { "epoch": 1.3436578171091447, "grad_norm": 16.0, "learning_rate": 5.638749005535313e-06, "loss": 0.3447, "step": 2733 }, { "epoch": 1.3441494591937069, "grad_norm": 23.0, "learning_rate": 5.636389102745008e-06, "loss": 0.319, "step": 2734 }, { "epoch": 1.3446411012782695, "grad_norm": 15.625, "learning_rate": 5.63402886609036e-06, "loss": 0.1663, "step": 2735 }, { "epoch": 1.3451327433628317, "grad_norm": 14.125, "learning_rate": 5.631668296264789e-06, "loss": 0.3037, "step": 2736 }, { "epoch": 1.3456243854473944, "grad_norm": 13.0625, "learning_rate": 5.6293073939618166e-06, "loss": 0.2666, "step": 2737 }, { "epoch": 1.3461160275319568, "grad_norm": 15.9375, "learning_rate": 5.626946159875063e-06, "loss": 0.3096, "step": 2738 }, { "epoch": 1.3466076696165192, "grad_norm": 38.75, "learning_rate": 5.624584594698245e-06, "loss": 0.4343, "step": 2739 }, { "epoch": 1.3470993117010817, "grad_norm": 37.0, "learning_rate": 5.622222699125174e-06, "loss": 0.5874, "step": 2740 }, { "epoch": 1.347590953785644, "grad_norm": 22.5, "learning_rate": 5.619860473849765e-06, "loss": 0.4398, "step": 2741 }, { "epoch": 1.3480825958702065, "grad_norm": 20.375, "learning_rate": 5.617497919566022e-06, "loss": 0.4226, "step": 2742 }, { "epoch": 1.348574237954769, "grad_norm": 19.375, "learning_rate": 5.61513503696805e-06, "loss": 0.3747, "step": 2743 }, { "epoch": 1.3490658800393314, "grad_norm": 27.875, "learning_rate": 5.612771826750051e-06, "loss": 0.2895, "step": 2744 }, { "epoch": 1.3495575221238938, "grad_norm": 22.75, "learning_rate": 5.610408289606321e-06, "loss": 0.4295, "step": 2745 }, { "epoch": 1.3500491642084562, "grad_norm": 23.625, "learning_rate": 5.608044426231255e-06, "loss": 0.4803, "step": 2746 }, { "epoch": 1.3505408062930186, "grad_norm": 51.25, "learning_rate": 5.60568023731934e-06, "loss": 0.4482, "step": 2747 }, { "epoch": 1.351032448377581, "grad_norm": 22.0, "learning_rate": 5.6033157235651605e-06, "loss": 0.3648, "step": 2748 }, { "epoch": 1.3515240904621435, "grad_norm": 36.0, "learning_rate": 5.600950885663398e-06, "loss": 0.5247, "step": 2749 }, { "epoch": 1.352015732546706, "grad_norm": 28.25, "learning_rate": 5.598585724308828e-06, "loss": 0.515, "step": 2750 }, { "epoch": 1.3525073746312684, "grad_norm": 20.25, "learning_rate": 5.596220240196318e-06, "loss": 0.4432, "step": 2751 }, { "epoch": 1.352999016715831, "grad_norm": 46.5, "learning_rate": 5.593854434020837e-06, "loss": 0.4111, "step": 2752 }, { "epoch": 1.3534906588003932, "grad_norm": 24.25, "learning_rate": 5.5914883064774414e-06, "loss": 0.449, "step": 2753 }, { "epoch": 1.3539823008849559, "grad_norm": 15.625, "learning_rate": 5.589121858261287e-06, "loss": 0.3804, "step": 2754 }, { "epoch": 1.354473942969518, "grad_norm": 26.375, "learning_rate": 5.586755090067621e-06, "loss": 0.1986, "step": 2755 }, { "epoch": 1.3549655850540807, "grad_norm": 22.875, "learning_rate": 5.584388002591788e-06, "loss": 0.2267, "step": 2756 }, { "epoch": 1.3554572271386431, "grad_norm": 30.0, "learning_rate": 5.5820205965292235e-06, "loss": 0.3099, "step": 2757 }, { "epoch": 1.3559488692232056, "grad_norm": 36.0, "learning_rate": 5.579652872575455e-06, "loss": 0.324, "step": 2758 }, { "epoch": 1.356440511307768, "grad_norm": 17.5, "learning_rate": 5.577284831426108e-06, "loss": 0.3618, "step": 2759 }, { "epoch": 1.3569321533923304, "grad_norm": 11.25, "learning_rate": 5.574916473776898e-06, "loss": 0.2953, "step": 2760 }, { "epoch": 1.3574237954768928, "grad_norm": 31.75, "learning_rate": 5.572547800323636e-06, "loss": 0.3734, "step": 2761 }, { "epoch": 1.3579154375614553, "grad_norm": 18.5, "learning_rate": 5.57017881176222e-06, "loss": 0.2245, "step": 2762 }, { "epoch": 1.3584070796460177, "grad_norm": 30.875, "learning_rate": 5.567809508788651e-06, "loss": 0.4292, "step": 2763 }, { "epoch": 1.3588987217305801, "grad_norm": 32.0, "learning_rate": 5.56543989209901e-06, "loss": 0.3155, "step": 2764 }, { "epoch": 1.3593903638151426, "grad_norm": 16.375, "learning_rate": 5.56306996238948e-06, "loss": 0.1995, "step": 2765 }, { "epoch": 1.359882005899705, "grad_norm": 11.4375, "learning_rate": 5.5606997203563305e-06, "loss": 0.3068, "step": 2766 }, { "epoch": 1.3603736479842674, "grad_norm": 30.625, "learning_rate": 5.558329166695926e-06, "loss": 0.4334, "step": 2767 }, { "epoch": 1.3608652900688298, "grad_norm": 41.25, "learning_rate": 5.555958302104719e-06, "loss": 0.6436, "step": 2768 }, { "epoch": 1.3613569321533923, "grad_norm": 38.5, "learning_rate": 5.5535871272792575e-06, "loss": 0.4127, "step": 2769 }, { "epoch": 1.3618485742379547, "grad_norm": 27.125, "learning_rate": 5.5512156429161755e-06, "loss": 0.3527, "step": 2770 }, { "epoch": 1.3623402163225173, "grad_norm": 17.5, "learning_rate": 5.548843849712205e-06, "loss": 0.495, "step": 2771 }, { "epoch": 1.3628318584070795, "grad_norm": 38.25, "learning_rate": 5.546471748364162e-06, "loss": 0.5275, "step": 2772 }, { "epoch": 1.3633235004916422, "grad_norm": 36.5, "learning_rate": 5.544099339568956e-06, "loss": 0.516, "step": 2773 }, { "epoch": 1.3638151425762044, "grad_norm": 17.5, "learning_rate": 5.541726624023588e-06, "loss": 0.2619, "step": 2774 }, { "epoch": 1.364306784660767, "grad_norm": 13.1875, "learning_rate": 5.539353602425146e-06, "loss": 0.3217, "step": 2775 }, { "epoch": 1.3647984267453295, "grad_norm": 44.75, "learning_rate": 5.53698027547081e-06, "loss": 0.5677, "step": 2776 }, { "epoch": 1.365290068829892, "grad_norm": 32.25, "learning_rate": 5.5346066438578495e-06, "loss": 0.6767, "step": 2777 }, { "epoch": 1.3657817109144543, "grad_norm": 15.6875, "learning_rate": 5.532232708283623e-06, "loss": 0.3431, "step": 2778 }, { "epoch": 1.3662733529990168, "grad_norm": 26.0, "learning_rate": 5.52985846944558e-06, "loss": 0.5098, "step": 2779 }, { "epoch": 1.3667649950835792, "grad_norm": 31.5, "learning_rate": 5.527483928041257e-06, "loss": 0.3324, "step": 2780 }, { "epoch": 1.3672566371681416, "grad_norm": 22.5, "learning_rate": 5.5251090847682785e-06, "loss": 0.4866, "step": 2781 }, { "epoch": 1.367748279252704, "grad_norm": 14.75, "learning_rate": 5.522733940324362e-06, "loss": 0.3382, "step": 2782 }, { "epoch": 1.3682399213372665, "grad_norm": 18.75, "learning_rate": 5.520358495407309e-06, "loss": 0.4017, "step": 2783 }, { "epoch": 1.368731563421829, "grad_norm": 33.75, "learning_rate": 5.51798275071501e-06, "loss": 0.5008, "step": 2784 }, { "epoch": 1.3692232055063913, "grad_norm": 17.125, "learning_rate": 5.515606706945448e-06, "loss": 0.2865, "step": 2785 }, { "epoch": 1.3697148475909537, "grad_norm": 23.875, "learning_rate": 5.513230364796689e-06, "loss": 0.4416, "step": 2786 }, { "epoch": 1.3702064896755162, "grad_norm": 11.75, "learning_rate": 5.510853724966886e-06, "loss": 0.3008, "step": 2787 }, { "epoch": 1.3706981317600786, "grad_norm": 21.75, "learning_rate": 5.5084767881542844e-06, "loss": 0.3442, "step": 2788 }, { "epoch": 1.371189773844641, "grad_norm": 32.5, "learning_rate": 5.506099555057212e-06, "loss": 0.2235, "step": 2789 }, { "epoch": 1.3716814159292037, "grad_norm": 20.875, "learning_rate": 5.503722026374088e-06, "loss": 0.3526, "step": 2790 }, { "epoch": 1.3721730580137659, "grad_norm": 29.75, "learning_rate": 5.501344202803414e-06, "loss": 0.5566, "step": 2791 }, { "epoch": 1.3726647000983285, "grad_norm": 19.625, "learning_rate": 5.498966085043783e-06, "loss": 0.5485, "step": 2792 }, { "epoch": 1.3731563421828907, "grad_norm": 26.0, "learning_rate": 5.496587673793869e-06, "loss": 0.4505, "step": 2793 }, { "epoch": 1.3736479842674534, "grad_norm": 23.625, "learning_rate": 5.494208969752436e-06, "loss": 0.2823, "step": 2794 }, { "epoch": 1.3741396263520158, "grad_norm": 15.5625, "learning_rate": 5.491829973618333e-06, "loss": 0.2647, "step": 2795 }, { "epoch": 1.3746312684365782, "grad_norm": 24.25, "learning_rate": 5.489450686090496e-06, "loss": 0.6087, "step": 2796 }, { "epoch": 1.3751229105211407, "grad_norm": 28.125, "learning_rate": 5.487071107867945e-06, "loss": 0.2601, "step": 2797 }, { "epoch": 1.375614552605703, "grad_norm": 25.625, "learning_rate": 5.484691239649785e-06, "loss": 0.4472, "step": 2798 }, { "epoch": 1.3761061946902655, "grad_norm": 16.5, "learning_rate": 5.482311082135207e-06, "loss": 0.3369, "step": 2799 }, { "epoch": 1.376597836774828, "grad_norm": 15.0, "learning_rate": 5.479930636023489e-06, "loss": 0.3098, "step": 2800 }, { "epoch": 1.3770894788593904, "grad_norm": 28.375, "learning_rate": 5.47754990201399e-06, "loss": 0.4595, "step": 2801 }, { "epoch": 1.3775811209439528, "grad_norm": 23.125, "learning_rate": 5.475168880806156e-06, "loss": 0.3558, "step": 2802 }, { "epoch": 1.3780727630285152, "grad_norm": 13.0625, "learning_rate": 5.472787573099516e-06, "loss": 0.2931, "step": 2803 }, { "epoch": 1.3785644051130777, "grad_norm": 15.375, "learning_rate": 5.470405979593687e-06, "loss": 0.192, "step": 2804 }, { "epoch": 1.37905604719764, "grad_norm": 21.75, "learning_rate": 5.4680241009883635e-06, "loss": 0.4199, "step": 2805 }, { "epoch": 1.3795476892822025, "grad_norm": 24.25, "learning_rate": 5.465641937983329e-06, "loss": 0.6541, "step": 2806 }, { "epoch": 1.380039331366765, "grad_norm": 16.0, "learning_rate": 5.463259491278449e-06, "loss": 0.266, "step": 2807 }, { "epoch": 1.3805309734513274, "grad_norm": 20.25, "learning_rate": 5.460876761573671e-06, "loss": 0.3736, "step": 2808 }, { "epoch": 1.38102261553589, "grad_norm": 13.625, "learning_rate": 5.458493749569028e-06, "loss": 0.3512, "step": 2809 }, { "epoch": 1.3815142576204522, "grad_norm": 17.0, "learning_rate": 5.456110455964632e-06, "loss": 0.2926, "step": 2810 }, { "epoch": 1.3820058997050149, "grad_norm": 13.1875, "learning_rate": 5.453726881460685e-06, "loss": 0.2675, "step": 2811 }, { "epoch": 1.382497541789577, "grad_norm": 21.25, "learning_rate": 5.451343026757463e-06, "loss": 0.3166, "step": 2812 }, { "epoch": 1.3829891838741397, "grad_norm": 8.0, "learning_rate": 5.448958892555331e-06, "loss": 0.2316, "step": 2813 }, { "epoch": 1.3834808259587021, "grad_norm": 15.1875, "learning_rate": 5.446574479554731e-06, "loss": 0.2476, "step": 2814 }, { "epoch": 1.3839724680432646, "grad_norm": 14.0625, "learning_rate": 5.444189788456191e-06, "loss": 0.3367, "step": 2815 }, { "epoch": 1.384464110127827, "grad_norm": 25.875, "learning_rate": 5.44180481996032e-06, "loss": 0.3265, "step": 2816 }, { "epoch": 1.3849557522123894, "grad_norm": 16.875, "learning_rate": 5.439419574767805e-06, "loss": 0.2986, "step": 2817 }, { "epoch": 1.3854473942969519, "grad_norm": 26.75, "learning_rate": 5.437034053579417e-06, "loss": 0.2232, "step": 2818 }, { "epoch": 1.3859390363815143, "grad_norm": 17.0, "learning_rate": 5.43464825709601e-06, "loss": 0.2297, "step": 2819 }, { "epoch": 1.3864306784660767, "grad_norm": 24.0, "learning_rate": 5.432262186018516e-06, "loss": 0.5439, "step": 2820 }, { "epoch": 1.3869223205506391, "grad_norm": 25.75, "learning_rate": 5.429875841047947e-06, "loss": 0.3414, "step": 2821 }, { "epoch": 1.3874139626352016, "grad_norm": 22.375, "learning_rate": 5.427489222885399e-06, "loss": 0.1915, "step": 2822 }, { "epoch": 1.387905604719764, "grad_norm": 15.3125, "learning_rate": 5.425102332232045e-06, "loss": 0.3025, "step": 2823 }, { "epoch": 1.3883972468043264, "grad_norm": 23.125, "learning_rate": 5.42271516978914e-06, "loss": 0.3759, "step": 2824 }, { "epoch": 1.3888888888888888, "grad_norm": 21.25, "learning_rate": 5.420327736258017e-06, "loss": 0.3848, "step": 2825 }, { "epoch": 1.3893805309734513, "grad_norm": 24.0, "learning_rate": 5.41794003234009e-06, "loss": 0.3076, "step": 2826 }, { "epoch": 1.3898721730580137, "grad_norm": 14.4375, "learning_rate": 5.415552058736853e-06, "loss": 0.2103, "step": 2827 }, { "epoch": 1.3903638151425763, "grad_norm": 18.875, "learning_rate": 5.4131638161498775e-06, "loss": 0.2398, "step": 2828 }, { "epoch": 1.3908554572271385, "grad_norm": 21.125, "learning_rate": 5.410775305280816e-06, "loss": 0.2994, "step": 2829 }, { "epoch": 1.3913470993117012, "grad_norm": 31.0, "learning_rate": 5.408386526831397e-06, "loss": 0.2885, "step": 2830 }, { "epoch": 1.3918387413962634, "grad_norm": 9.3125, "learning_rate": 5.405997481503431e-06, "loss": 0.2131, "step": 2831 }, { "epoch": 1.392330383480826, "grad_norm": 21.875, "learning_rate": 5.403608169998806e-06, "loss": 0.2991, "step": 2832 }, { "epoch": 1.3928220255653885, "grad_norm": 13.0625, "learning_rate": 5.401218593019485e-06, "loss": 0.3102, "step": 2833 }, { "epoch": 1.393313667649951, "grad_norm": 16.625, "learning_rate": 5.398828751267513e-06, "loss": 0.2345, "step": 2834 }, { "epoch": 1.3938053097345133, "grad_norm": 29.25, "learning_rate": 5.396438645445011e-06, "loss": 0.4605, "step": 2835 }, { "epoch": 1.3942969518190758, "grad_norm": 31.625, "learning_rate": 5.3940482762541765e-06, "loss": 0.3059, "step": 2836 }, { "epoch": 1.3947885939036382, "grad_norm": 33.5, "learning_rate": 5.3916576443972894e-06, "loss": 0.4357, "step": 2837 }, { "epoch": 1.3952802359882006, "grad_norm": 29.125, "learning_rate": 5.3892667505767e-06, "loss": 0.4847, "step": 2838 }, { "epoch": 1.395771878072763, "grad_norm": 16.5, "learning_rate": 5.386875595494839e-06, "loss": 0.3512, "step": 2839 }, { "epoch": 1.3962635201573255, "grad_norm": 20.25, "learning_rate": 5.384484179854214e-06, "loss": 0.2466, "step": 2840 }, { "epoch": 1.396755162241888, "grad_norm": 31.5, "learning_rate": 5.382092504357408e-06, "loss": 0.4246, "step": 2841 }, { "epoch": 1.3972468043264503, "grad_norm": 16.875, "learning_rate": 5.379700569707083e-06, "loss": 0.3051, "step": 2842 }, { "epoch": 1.3977384464110127, "grad_norm": 38.25, "learning_rate": 5.377308376605972e-06, "loss": 0.5604, "step": 2843 }, { "epoch": 1.3982300884955752, "grad_norm": 28.375, "learning_rate": 5.374915925756889e-06, "loss": 0.4198, "step": 2844 }, { "epoch": 1.3987217305801376, "grad_norm": 13.625, "learning_rate": 5.372523217862724e-06, "loss": 0.191, "step": 2845 }, { "epoch": 1.3992133726647, "grad_norm": 27.125, "learning_rate": 5.370130253626436e-06, "loss": 0.4568, "step": 2846 }, { "epoch": 1.3997050147492625, "grad_norm": 17.625, "learning_rate": 5.367737033751065e-06, "loss": 0.2576, "step": 2847 }, { "epoch": 1.4001966568338249, "grad_norm": 20.0, "learning_rate": 5.365343558939726e-06, "loss": 0.373, "step": 2848 }, { "epoch": 1.4006882989183875, "grad_norm": 23.5, "learning_rate": 5.362949829895609e-06, "loss": 0.3188, "step": 2849 }, { "epoch": 1.4011799410029497, "grad_norm": 15.3125, "learning_rate": 5.360555847321973e-06, "loss": 0.2993, "step": 2850 }, { "epoch": 1.4016715830875124, "grad_norm": 17.25, "learning_rate": 5.3581616119221575e-06, "loss": 0.1608, "step": 2851 }, { "epoch": 1.4021632251720748, "grad_norm": 21.5, "learning_rate": 5.355767124399576e-06, "loss": 0.384, "step": 2852 }, { "epoch": 1.4026548672566372, "grad_norm": 20.75, "learning_rate": 5.353372385457713e-06, "loss": 0.373, "step": 2853 }, { "epoch": 1.4031465093411997, "grad_norm": 50.0, "learning_rate": 5.3509773958001306e-06, "loss": 0.599, "step": 2854 }, { "epoch": 1.403638151425762, "grad_norm": 25.5, "learning_rate": 5.348582156130461e-06, "loss": 0.3468, "step": 2855 }, { "epoch": 1.4041297935103245, "grad_norm": 21.125, "learning_rate": 5.3461866671524115e-06, "loss": 0.4205, "step": 2856 }, { "epoch": 1.404621435594887, "grad_norm": 18.125, "learning_rate": 5.343790929569763e-06, "loss": 0.193, "step": 2857 }, { "epoch": 1.4051130776794494, "grad_norm": 28.5, "learning_rate": 5.341394944086367e-06, "loss": 0.37, "step": 2858 }, { "epoch": 1.4056047197640118, "grad_norm": 44.75, "learning_rate": 5.338998711406152e-06, "loss": 0.3158, "step": 2859 }, { "epoch": 1.4060963618485742, "grad_norm": 22.125, "learning_rate": 5.3366022322331165e-06, "loss": 0.4817, "step": 2860 }, { "epoch": 1.4065880039331367, "grad_norm": 26.75, "learning_rate": 5.334205507271331e-06, "loss": 0.343, "step": 2861 }, { "epoch": 1.407079646017699, "grad_norm": 24.375, "learning_rate": 5.331808537224938e-06, "loss": 0.3983, "step": 2862 }, { "epoch": 1.4075712881022615, "grad_norm": 16.625, "learning_rate": 5.329411322798157e-06, "loss": 0.2427, "step": 2863 }, { "epoch": 1.408062930186824, "grad_norm": 20.0, "learning_rate": 5.327013864695272e-06, "loss": 0.3294, "step": 2864 }, { "epoch": 1.4085545722713864, "grad_norm": 20.5, "learning_rate": 5.3246161636206415e-06, "loss": 0.2525, "step": 2865 }, { "epoch": 1.4090462143559488, "grad_norm": 18.875, "learning_rate": 5.322218220278698e-06, "loss": 0.4177, "step": 2866 }, { "epoch": 1.4095378564405112, "grad_norm": 14.25, "learning_rate": 5.319820035373942e-06, "loss": 0.2653, "step": 2867 }, { "epoch": 1.4100294985250739, "grad_norm": 22.375, "learning_rate": 5.317421609610946e-06, "loss": 0.3483, "step": 2868 }, { "epoch": 1.410521140609636, "grad_norm": 25.375, "learning_rate": 5.315022943694351e-06, "loss": 0.2476, "step": 2869 }, { "epoch": 1.4110127826941987, "grad_norm": 9.9375, "learning_rate": 5.312624038328874e-06, "loss": 0.3066, "step": 2870 }, { "epoch": 1.411504424778761, "grad_norm": 51.5, "learning_rate": 5.310224894219299e-06, "loss": 0.4657, "step": 2871 }, { "epoch": 1.4119960668633236, "grad_norm": 16.875, "learning_rate": 5.307825512070478e-06, "loss": 0.2066, "step": 2872 }, { "epoch": 1.412487708947886, "grad_norm": 18.625, "learning_rate": 5.3054258925873356e-06, "loss": 0.2043, "step": 2873 }, { "epoch": 1.4129793510324484, "grad_norm": 12.75, "learning_rate": 5.303026036474868e-06, "loss": 0.2931, "step": 2874 }, { "epoch": 1.4134709931170109, "grad_norm": 21.875, "learning_rate": 5.300625944438135e-06, "loss": 0.4296, "step": 2875 }, { "epoch": 1.4139626352015733, "grad_norm": 7.875, "learning_rate": 5.298225617182273e-06, "loss": 0.1931, "step": 2876 }, { "epoch": 1.4144542772861357, "grad_norm": 18.25, "learning_rate": 5.295825055412482e-06, "loss": 0.2158, "step": 2877 }, { "epoch": 1.4149459193706981, "grad_norm": 9.5625, "learning_rate": 5.293424259834032e-06, "loss": 0.2023, "step": 2878 }, { "epoch": 1.4154375614552606, "grad_norm": 26.5, "learning_rate": 5.291023231152264e-06, "loss": 0.3008, "step": 2879 }, { "epoch": 1.415929203539823, "grad_norm": 31.75, "learning_rate": 5.288621970072585e-06, "loss": 0.3379, "step": 2880 }, { "epoch": 1.4164208456243854, "grad_norm": 33.0, "learning_rate": 5.286220477300472e-06, "loss": 0.247, "step": 2881 }, { "epoch": 1.4169124877089478, "grad_norm": 29.375, "learning_rate": 5.283818753541469e-06, "loss": 0.4084, "step": 2882 }, { "epoch": 1.4174041297935103, "grad_norm": 26.5, "learning_rate": 5.281416799501187e-06, "loss": 0.3952, "step": 2883 }, { "epoch": 1.4178957718780727, "grad_norm": 8.5625, "learning_rate": 5.279014615885307e-06, "loss": 0.0701, "step": 2884 }, { "epoch": 1.4183874139626351, "grad_norm": 17.75, "learning_rate": 5.276612203399575e-06, "loss": 0.2299, "step": 2885 }, { "epoch": 1.4188790560471976, "grad_norm": 24.125, "learning_rate": 5.274209562749808e-06, "loss": 0.2724, "step": 2886 }, { "epoch": 1.4193706981317602, "grad_norm": 14.75, "learning_rate": 5.271806694641883e-06, "loss": 0.2848, "step": 2887 }, { "epoch": 1.4198623402163224, "grad_norm": 37.25, "learning_rate": 5.269403599781755e-06, "loss": 0.3496, "step": 2888 }, { "epoch": 1.420353982300885, "grad_norm": 22.5, "learning_rate": 5.267000278875432e-06, "loss": 0.2199, "step": 2889 }, { "epoch": 1.4208456243854473, "grad_norm": 36.75, "learning_rate": 5.264596732629001e-06, "loss": 0.4173, "step": 2890 }, { "epoch": 1.42133726647001, "grad_norm": 40.0, "learning_rate": 5.262192961748605e-06, "loss": 0.4629, "step": 2891 }, { "epoch": 1.4218289085545723, "grad_norm": 31.5, "learning_rate": 5.25978896694046e-06, "loss": 0.4659, "step": 2892 }, { "epoch": 1.4223205506391348, "grad_norm": 24.5, "learning_rate": 5.257384748910845e-06, "loss": 0.3709, "step": 2893 }, { "epoch": 1.4228121927236972, "grad_norm": 28.0, "learning_rate": 5.254980308366106e-06, "loss": 0.4715, "step": 2894 }, { "epoch": 1.4233038348082596, "grad_norm": 11.25, "learning_rate": 5.252575646012651e-06, "loss": 0.3072, "step": 2895 }, { "epoch": 1.423795476892822, "grad_norm": 43.0, "learning_rate": 5.250170762556956e-06, "loss": 0.3382, "step": 2896 }, { "epoch": 1.4242871189773845, "grad_norm": 28.375, "learning_rate": 5.247765658705564e-06, "loss": 0.2536, "step": 2897 }, { "epoch": 1.424778761061947, "grad_norm": 18.125, "learning_rate": 5.245360335165076e-06, "loss": 0.2984, "step": 2898 }, { "epoch": 1.4252704031465093, "grad_norm": 26.75, "learning_rate": 5.242954792642165e-06, "loss": 0.3996, "step": 2899 }, { "epoch": 1.4257620452310718, "grad_norm": 26.625, "learning_rate": 5.240549031843564e-06, "loss": 0.3919, "step": 2900 }, { "epoch": 1.4262536873156342, "grad_norm": 25.25, "learning_rate": 5.2381430534760716e-06, "loss": 0.2826, "step": 2901 }, { "epoch": 1.4267453294001966, "grad_norm": 27.375, "learning_rate": 5.235736858246549e-06, "loss": 0.4416, "step": 2902 }, { "epoch": 1.427236971484759, "grad_norm": 21.125, "learning_rate": 5.233330446861923e-06, "loss": 0.3058, "step": 2903 }, { "epoch": 1.4277286135693215, "grad_norm": 15.375, "learning_rate": 5.230923820029183e-06, "loss": 0.195, "step": 2904 }, { "epoch": 1.4282202556538839, "grad_norm": 31.125, "learning_rate": 5.2285169784553805e-06, "loss": 0.3038, "step": 2905 }, { "epoch": 1.4287118977384465, "grad_norm": 18.875, "learning_rate": 5.22610992284763e-06, "loss": 0.5291, "step": 2906 }, { "epoch": 1.4292035398230087, "grad_norm": 21.125, "learning_rate": 5.223702653913113e-06, "loss": 0.4007, "step": 2907 }, { "epoch": 1.4296951819075714, "grad_norm": 19.0, "learning_rate": 5.221295172359071e-06, "loss": 0.2698, "step": 2908 }, { "epoch": 1.4301868239921336, "grad_norm": 46.5, "learning_rate": 5.218887478892805e-06, "loss": 0.3885, "step": 2909 }, { "epoch": 1.4306784660766962, "grad_norm": 17.0, "learning_rate": 5.216479574221682e-06, "loss": 0.1871, "step": 2910 }, { "epoch": 1.4311701081612587, "grad_norm": 35.5, "learning_rate": 5.2140714590531315e-06, "loss": 0.49, "step": 2911 }, { "epoch": 1.431661750245821, "grad_norm": 22.875, "learning_rate": 5.211663134094642e-06, "loss": 0.2606, "step": 2912 }, { "epoch": 1.4321533923303835, "grad_norm": 29.625, "learning_rate": 5.2092546000537645e-06, "loss": 0.491, "step": 2913 }, { "epoch": 1.432645034414946, "grad_norm": 22.375, "learning_rate": 5.206845857638113e-06, "loss": 0.2892, "step": 2914 }, { "epoch": 1.4331366764995084, "grad_norm": 17.0, "learning_rate": 5.204436907555362e-06, "loss": 0.2927, "step": 2915 }, { "epoch": 1.4336283185840708, "grad_norm": 13.5625, "learning_rate": 5.2020277505132465e-06, "loss": 0.337, "step": 2916 }, { "epoch": 1.4341199606686332, "grad_norm": 12.75, "learning_rate": 5.199618387219561e-06, "loss": 0.2392, "step": 2917 }, { "epoch": 1.4346116027531957, "grad_norm": 21.25, "learning_rate": 5.197208818382164e-06, "loss": 0.2889, "step": 2918 }, { "epoch": 1.435103244837758, "grad_norm": 28.0, "learning_rate": 5.194799044708969e-06, "loss": 0.3791, "step": 2919 }, { "epoch": 1.4355948869223205, "grad_norm": 24.375, "learning_rate": 5.192389066907959e-06, "loss": 0.2693, "step": 2920 }, { "epoch": 1.436086529006883, "grad_norm": 15.0625, "learning_rate": 5.189978885687167e-06, "loss": 0.2184, "step": 2921 }, { "epoch": 1.4365781710914454, "grad_norm": 27.375, "learning_rate": 5.1875685017546895e-06, "loss": 0.3992, "step": 2922 }, { "epoch": 1.4370698131760078, "grad_norm": 43.5, "learning_rate": 5.185157915818685e-06, "loss": 0.4058, "step": 2923 }, { "epoch": 1.4375614552605702, "grad_norm": 23.0, "learning_rate": 5.182747128587368e-06, "loss": 0.2982, "step": 2924 }, { "epoch": 1.4380530973451329, "grad_norm": 16.5, "learning_rate": 5.180336140769014e-06, "loss": 0.2251, "step": 2925 }, { "epoch": 1.438544739429695, "grad_norm": 20.125, "learning_rate": 5.177924953071957e-06, "loss": 0.4384, "step": 2926 }, { "epoch": 1.4390363815142577, "grad_norm": 20.5, "learning_rate": 5.175513566204591e-06, "loss": 0.2291, "step": 2927 }, { "epoch": 1.43952802359882, "grad_norm": 47.75, "learning_rate": 5.173101980875362e-06, "loss": 0.3682, "step": 2928 }, { "epoch": 1.4400196656833826, "grad_norm": 23.0, "learning_rate": 5.170690197792785e-06, "loss": 0.3096, "step": 2929 }, { "epoch": 1.440511307767945, "grad_norm": 13.5625, "learning_rate": 5.168278217665425e-06, "loss": 0.1353, "step": 2930 }, { "epoch": 1.4410029498525074, "grad_norm": 36.0, "learning_rate": 5.165866041201907e-06, "loss": 0.3757, "step": 2931 }, { "epoch": 1.4414945919370699, "grad_norm": 14.125, "learning_rate": 5.163453669110915e-06, "loss": 0.2227, "step": 2932 }, { "epoch": 1.4419862340216323, "grad_norm": 12.6875, "learning_rate": 5.1610411021011896e-06, "loss": 0.3488, "step": 2933 }, { "epoch": 1.4424778761061947, "grad_norm": 24.5, "learning_rate": 5.158628340881529e-06, "loss": 0.3187, "step": 2934 }, { "epoch": 1.4429695181907571, "grad_norm": 59.75, "learning_rate": 5.156215386160785e-06, "loss": 0.7222, "step": 2935 }, { "epoch": 1.4434611602753196, "grad_norm": 12.75, "learning_rate": 5.153802238647874e-06, "loss": 0.2096, "step": 2936 }, { "epoch": 1.443952802359882, "grad_norm": 48.0, "learning_rate": 5.151388899051761e-06, "loss": 0.352, "step": 2937 }, { "epoch": 1.4444444444444444, "grad_norm": 16.125, "learning_rate": 5.1489753680814725e-06, "loss": 0.2689, "step": 2938 }, { "epoch": 1.4449360865290068, "grad_norm": 31.0, "learning_rate": 5.1465616464460865e-06, "loss": 0.3141, "step": 2939 }, { "epoch": 1.4454277286135693, "grad_norm": 10.1875, "learning_rate": 5.144147734854745e-06, "loss": 0.254, "step": 2940 }, { "epoch": 1.4459193706981317, "grad_norm": 26.25, "learning_rate": 5.1417336340166375e-06, "loss": 0.4076, "step": 2941 }, { "epoch": 1.4464110127826941, "grad_norm": 26.0, "learning_rate": 5.1393193446410125e-06, "loss": 0.4079, "step": 2942 }, { "epoch": 1.4469026548672566, "grad_norm": 23.5, "learning_rate": 5.136904867437174e-06, "loss": 0.3379, "step": 2943 }, { "epoch": 1.4473942969518192, "grad_norm": 26.25, "learning_rate": 5.134490203114483e-06, "loss": 0.2236, "step": 2944 }, { "epoch": 1.4478859390363814, "grad_norm": 31.75, "learning_rate": 5.1320753523823514e-06, "loss": 0.3305, "step": 2945 }, { "epoch": 1.448377581120944, "grad_norm": 16.875, "learning_rate": 5.1296603159502485e-06, "loss": 0.3312, "step": 2946 }, { "epoch": 1.4488692232055063, "grad_norm": 29.25, "learning_rate": 5.127245094527697e-06, "loss": 0.2366, "step": 2947 }, { "epoch": 1.449360865290069, "grad_norm": 29.875, "learning_rate": 5.1248296888242755e-06, "loss": 0.319, "step": 2948 }, { "epoch": 1.4498525073746313, "grad_norm": 43.25, "learning_rate": 5.122414099549617e-06, "loss": 0.4689, "step": 2949 }, { "epoch": 1.4503441494591938, "grad_norm": 24.75, "learning_rate": 5.1199983274134026e-06, "loss": 0.3867, "step": 2950 }, { "epoch": 1.4508357915437562, "grad_norm": 32.25, "learning_rate": 5.117582373125377e-06, "loss": 0.3354, "step": 2951 }, { "epoch": 1.4513274336283186, "grad_norm": 36.25, "learning_rate": 5.115166237395331e-06, "loss": 0.3355, "step": 2952 }, { "epoch": 1.451819075712881, "grad_norm": 28.625, "learning_rate": 5.11274992093311e-06, "loss": 0.343, "step": 2953 }, { "epoch": 1.4523107177974435, "grad_norm": 30.5, "learning_rate": 5.110333424448614e-06, "loss": 0.4317, "step": 2954 }, { "epoch": 1.452802359882006, "grad_norm": 13.875, "learning_rate": 5.107916748651797e-06, "loss": 0.3555, "step": 2955 }, { "epoch": 1.4532940019665683, "grad_norm": 18.375, "learning_rate": 5.105499894252662e-06, "loss": 0.3238, "step": 2956 }, { "epoch": 1.4537856440511308, "grad_norm": 13.25, "learning_rate": 5.103082861961266e-06, "loss": 0.2929, "step": 2957 }, { "epoch": 1.4542772861356932, "grad_norm": 16.625, "learning_rate": 5.100665652487721e-06, "loss": 0.2126, "step": 2958 }, { "epoch": 1.4547689282202556, "grad_norm": 20.875, "learning_rate": 5.098248266542188e-06, "loss": 0.1374, "step": 2959 }, { "epoch": 1.455260570304818, "grad_norm": 13.9375, "learning_rate": 5.095830704834877e-06, "loss": 0.2295, "step": 2960 }, { "epoch": 1.4557522123893805, "grad_norm": 17.625, "learning_rate": 5.0934129680760575e-06, "loss": 0.2697, "step": 2961 }, { "epoch": 1.456243854473943, "grad_norm": 30.125, "learning_rate": 5.090995056976046e-06, "loss": 0.3732, "step": 2962 }, { "epoch": 1.4567354965585055, "grad_norm": 26.625, "learning_rate": 5.0885769722452065e-06, "loss": 0.4839, "step": 2963 }, { "epoch": 1.4572271386430677, "grad_norm": 37.0, "learning_rate": 5.086158714593963e-06, "loss": 0.4219, "step": 2964 }, { "epoch": 1.4577187807276304, "grad_norm": 25.25, "learning_rate": 5.083740284732782e-06, "loss": 0.2741, "step": 2965 }, { "epoch": 1.4582104228121926, "grad_norm": 13.1875, "learning_rate": 5.081321683372183e-06, "loss": 0.2297, "step": 2966 }, { "epoch": 1.4587020648967552, "grad_norm": 16.75, "learning_rate": 5.078902911222741e-06, "loss": 0.187, "step": 2967 }, { "epoch": 1.4591937069813177, "grad_norm": 18.625, "learning_rate": 5.076483968995071e-06, "loss": 0.2956, "step": 2968 }, { "epoch": 1.45968534906588, "grad_norm": 34.5, "learning_rate": 5.074064857399848e-06, "loss": 0.4715, "step": 2969 }, { "epoch": 1.4601769911504425, "grad_norm": 16.625, "learning_rate": 5.071645577147792e-06, "loss": 0.2403, "step": 2970 }, { "epoch": 1.460668633235005, "grad_norm": 14.5, "learning_rate": 5.069226128949673e-06, "loss": 0.1476, "step": 2971 }, { "epoch": 1.4611602753195674, "grad_norm": 11.375, "learning_rate": 5.066806513516309e-06, "loss": 0.1322, "step": 2972 }, { "epoch": 1.4616519174041298, "grad_norm": 15.8125, "learning_rate": 5.064386731558569e-06, "loss": 0.1535, "step": 2973 }, { "epoch": 1.4621435594886922, "grad_norm": 23.625, "learning_rate": 5.0619667837873725e-06, "loss": 0.2692, "step": 2974 }, { "epoch": 1.4626352015732547, "grad_norm": 14.6875, "learning_rate": 5.059546670913684e-06, "loss": 0.2903, "step": 2975 }, { "epoch": 1.463126843657817, "grad_norm": 16.125, "learning_rate": 5.057126393648518e-06, "loss": 0.4006, "step": 2976 }, { "epoch": 1.4636184857423795, "grad_norm": 26.625, "learning_rate": 5.054705952702938e-06, "loss": 0.1759, "step": 2977 }, { "epoch": 1.464110127826942, "grad_norm": 14.9375, "learning_rate": 5.052285348788055e-06, "loss": 0.1217, "step": 2978 }, { "epoch": 1.4646017699115044, "grad_norm": 24.625, "learning_rate": 5.049864582615029e-06, "loss": 0.2427, "step": 2979 }, { "epoch": 1.4650934119960668, "grad_norm": 19.375, "learning_rate": 5.047443654895065e-06, "loss": 0.2909, "step": 2980 }, { "epoch": 1.4655850540806292, "grad_norm": 17.375, "learning_rate": 5.045022566339419e-06, "loss": 0.29, "step": 2981 }, { "epoch": 1.4660766961651919, "grad_norm": 16.5, "learning_rate": 5.042601317659392e-06, "loss": 0.3008, "step": 2982 }, { "epoch": 1.466568338249754, "grad_norm": 27.375, "learning_rate": 5.040179909566329e-06, "loss": 0.3705, "step": 2983 }, { "epoch": 1.4670599803343167, "grad_norm": 32.5, "learning_rate": 5.03775834277163e-06, "loss": 0.3488, "step": 2984 }, { "epoch": 1.467551622418879, "grad_norm": 34.5, "learning_rate": 5.035336617986736e-06, "loss": 0.3563, "step": 2985 }, { "epoch": 1.4680432645034416, "grad_norm": 43.0, "learning_rate": 5.032914735923132e-06, "loss": 0.4888, "step": 2986 }, { "epoch": 1.468534906588004, "grad_norm": 19.625, "learning_rate": 5.0304926972923556e-06, "loss": 0.2746, "step": 2987 }, { "epoch": 1.4690265486725664, "grad_norm": 37.5, "learning_rate": 5.0280705028059875e-06, "loss": 0.1682, "step": 2988 }, { "epoch": 1.4695181907571289, "grad_norm": 34.5, "learning_rate": 5.025648153175652e-06, "loss": 0.2947, "step": 2989 }, { "epoch": 1.4700098328416913, "grad_norm": 34.5, "learning_rate": 5.023225649113022e-06, "loss": 0.5198, "step": 2990 }, { "epoch": 1.4705014749262537, "grad_norm": 24.125, "learning_rate": 5.020802991329816e-06, "loss": 0.269, "step": 2991 }, { "epoch": 1.4709931170108161, "grad_norm": 35.25, "learning_rate": 5.018380180537794e-06, "loss": 0.4702, "step": 2992 }, { "epoch": 1.4714847590953786, "grad_norm": 40.75, "learning_rate": 5.015957217448766e-06, "loss": 0.4914, "step": 2993 }, { "epoch": 1.471976401179941, "grad_norm": 26.5, "learning_rate": 5.01353410277458e-06, "loss": 0.2761, "step": 2994 }, { "epoch": 1.4724680432645034, "grad_norm": 25.625, "learning_rate": 5.0111108372271365e-06, "loss": 0.3625, "step": 2995 }, { "epoch": 1.4729596853490659, "grad_norm": 30.75, "learning_rate": 5.008687421518377e-06, "loss": 0.5186, "step": 2996 }, { "epoch": 1.4734513274336283, "grad_norm": 33.75, "learning_rate": 5.006263856360284e-06, "loss": 0.2716, "step": 2997 }, { "epoch": 1.4739429695181907, "grad_norm": 28.75, "learning_rate": 5.003840142464885e-06, "loss": 0.2937, "step": 2998 }, { "epoch": 1.4744346116027531, "grad_norm": 26.125, "learning_rate": 5.001416280544258e-06, "loss": 0.3311, "step": 2999 }, { "epoch": 1.4749262536873156, "grad_norm": 59.75, "learning_rate": 4.998992271310515e-06, "loss": 0.5797, "step": 3000 }, { "epoch": 1.4749262536873156, "eval_loss": 0.3775193691253662, "eval_runtime": 66.3475, "eval_samples_per_second": 122.627, "eval_spearman": 0.5801099968422374, "eval_steps_per_second": 15.328, "step": 3000 }, { "epoch": 1.4754178957718782, "grad_norm": 15.25, "learning_rate": 4.996568115475817e-06, "loss": 0.2589, "step": 3001 }, { "epoch": 1.4759095378564404, "grad_norm": 24.0, "learning_rate": 4.994143813752367e-06, "loss": 0.3584, "step": 3002 }, { "epoch": 1.476401179941003, "grad_norm": 34.25, "learning_rate": 4.991719366852408e-06, "loss": 0.481, "step": 3003 }, { "epoch": 1.4768928220255653, "grad_norm": 14.0, "learning_rate": 4.989294775488232e-06, "loss": 0.185, "step": 3004 }, { "epoch": 1.477384464110128, "grad_norm": 12.8125, "learning_rate": 4.9868700403721645e-06, "loss": 0.1992, "step": 3005 }, { "epoch": 1.4778761061946903, "grad_norm": 18.875, "learning_rate": 4.984445162216582e-06, "loss": 0.2914, "step": 3006 }, { "epoch": 1.4783677482792528, "grad_norm": 11.25, "learning_rate": 4.982020141733899e-06, "loss": 0.2512, "step": 3007 }, { "epoch": 1.4788593903638152, "grad_norm": 11.8125, "learning_rate": 4.979594979636571e-06, "loss": 0.1658, "step": 3008 }, { "epoch": 1.4793510324483776, "grad_norm": 26.75, "learning_rate": 4.977169676637096e-06, "loss": 0.5124, "step": 3009 }, { "epoch": 1.47984267453294, "grad_norm": 34.25, "learning_rate": 4.974744233448014e-06, "loss": 0.4093, "step": 3010 }, { "epoch": 1.4803343166175025, "grad_norm": 44.25, "learning_rate": 4.972318650781906e-06, "loss": 0.5404, "step": 3011 }, { "epoch": 1.480825958702065, "grad_norm": 13.8125, "learning_rate": 4.969892929351392e-06, "loss": 0.1821, "step": 3012 }, { "epoch": 1.4813176007866273, "grad_norm": 21.0, "learning_rate": 4.967467069869138e-06, "loss": 0.3427, "step": 3013 }, { "epoch": 1.4818092428711898, "grad_norm": 28.125, "learning_rate": 4.965041073047845e-06, "loss": 0.3164, "step": 3014 }, { "epoch": 1.4823008849557522, "grad_norm": 41.0, "learning_rate": 4.962614939600256e-06, "loss": 0.4417, "step": 3015 }, { "epoch": 1.4827925270403146, "grad_norm": 32.25, "learning_rate": 4.960188670239154e-06, "loss": 0.2893, "step": 3016 }, { "epoch": 1.483284169124877, "grad_norm": 39.5, "learning_rate": 4.957762265677364e-06, "loss": 0.2411, "step": 3017 }, { "epoch": 1.4837758112094395, "grad_norm": 48.0, "learning_rate": 4.95533572662775e-06, "loss": 0.6333, "step": 3018 }, { "epoch": 1.484267453294002, "grad_norm": 16.75, "learning_rate": 4.952909053803212e-06, "loss": 0.2306, "step": 3019 }, { "epoch": 1.4847590953785645, "grad_norm": 14.0, "learning_rate": 4.950482247916692e-06, "loss": 0.2146, "step": 3020 }, { "epoch": 1.4852507374631267, "grad_norm": 23.375, "learning_rate": 4.948055309681175e-06, "loss": 0.3402, "step": 3021 }, { "epoch": 1.4857423795476894, "grad_norm": 27.5, "learning_rate": 4.945628239809678e-06, "loss": 0.2471, "step": 3022 }, { "epoch": 1.4862340216322516, "grad_norm": 18.625, "learning_rate": 4.943201039015259e-06, "loss": 0.335, "step": 3023 }, { "epoch": 1.4867256637168142, "grad_norm": 62.25, "learning_rate": 4.9407737080110175e-06, "loss": 0.3589, "step": 3024 }, { "epoch": 1.4872173058013767, "grad_norm": 21.25, "learning_rate": 4.938346247510087e-06, "loss": 0.3447, "step": 3025 }, { "epoch": 1.487708947885939, "grad_norm": 14.8125, "learning_rate": 4.935918658225641e-06, "loss": 0.21, "step": 3026 }, { "epoch": 1.4882005899705015, "grad_norm": 24.125, "learning_rate": 4.933490940870892e-06, "loss": 0.3743, "step": 3027 }, { "epoch": 1.488692232055064, "grad_norm": 19.375, "learning_rate": 4.931063096159088e-06, "loss": 0.2826, "step": 3028 }, { "epoch": 1.4891838741396264, "grad_norm": 38.0, "learning_rate": 4.928635124803517e-06, "loss": 0.5445, "step": 3029 }, { "epoch": 1.4896755162241888, "grad_norm": 26.625, "learning_rate": 4.9262070275174996e-06, "loss": 0.3382, "step": 3030 }, { "epoch": 1.4901671583087512, "grad_norm": 35.75, "learning_rate": 4.923778805014397e-06, "loss": 0.2372, "step": 3031 }, { "epoch": 1.4906588003933137, "grad_norm": 23.0, "learning_rate": 4.9213504580076086e-06, "loss": 0.2296, "step": 3032 }, { "epoch": 1.491150442477876, "grad_norm": 11.5625, "learning_rate": 4.918921987210567e-06, "loss": 0.2122, "step": 3033 }, { "epoch": 1.4916420845624385, "grad_norm": 28.125, "learning_rate": 4.916493393336742e-06, "loss": 0.3332, "step": 3034 }, { "epoch": 1.492133726647001, "grad_norm": 51.75, "learning_rate": 4.91406467709964e-06, "loss": 0.4747, "step": 3035 }, { "epoch": 1.4926253687315634, "grad_norm": 24.0, "learning_rate": 4.911635839212804e-06, "loss": 0.2522, "step": 3036 }, { "epoch": 1.4931170108161258, "grad_norm": 27.625, "learning_rate": 4.909206880389812e-06, "loss": 0.5793, "step": 3037 }, { "epoch": 1.4936086529006882, "grad_norm": 22.0, "learning_rate": 4.906777801344277e-06, "loss": 0.3113, "step": 3038 }, { "epoch": 1.4941002949852507, "grad_norm": 24.375, "learning_rate": 4.904348602789849e-06, "loss": 0.3409, "step": 3039 }, { "epoch": 1.494591937069813, "grad_norm": 41.75, "learning_rate": 4.901919285440211e-06, "loss": 0.364, "step": 3040 }, { "epoch": 1.4950835791543757, "grad_norm": 18.25, "learning_rate": 4.899489850009082e-06, "loss": 0.1777, "step": 3041 }, { "epoch": 1.495575221238938, "grad_norm": 14.125, "learning_rate": 4.897060297210217e-06, "loss": 0.2064, "step": 3042 }, { "epoch": 1.4960668633235006, "grad_norm": 25.375, "learning_rate": 4.894630627757402e-06, "loss": 0.3334, "step": 3043 }, { "epoch": 1.496558505408063, "grad_norm": 17.375, "learning_rate": 4.892200842364463e-06, "loss": 0.1859, "step": 3044 }, { "epoch": 1.4970501474926254, "grad_norm": 33.75, "learning_rate": 4.889770941745252e-06, "loss": 0.475, "step": 3045 }, { "epoch": 1.4975417895771879, "grad_norm": 28.25, "learning_rate": 4.887340926613661e-06, "loss": 0.3308, "step": 3046 }, { "epoch": 1.4980334316617503, "grad_norm": 24.0, "learning_rate": 4.884910797683616e-06, "loss": 0.336, "step": 3047 }, { "epoch": 1.4985250737463127, "grad_norm": 10.3125, "learning_rate": 4.882480555669072e-06, "loss": 0.2054, "step": 3048 }, { "epoch": 1.4990167158308751, "grad_norm": 21.375, "learning_rate": 4.8800502012840205e-06, "loss": 0.271, "step": 3049 }, { "epoch": 1.4995083579154376, "grad_norm": 17.125, "learning_rate": 4.877619735242485e-06, "loss": 0.2002, "step": 3050 }, { "epoch": 1.5, "grad_norm": 36.5, "learning_rate": 4.875189158258521e-06, "loss": 0.5031, "step": 3051 }, { "epoch": 1.5004916420845624, "grad_norm": 23.0, "learning_rate": 4.872758471046219e-06, "loss": 0.2566, "step": 3052 }, { "epoch": 1.5009832841691249, "grad_norm": 26.625, "learning_rate": 4.870327674319701e-06, "loss": 0.2452, "step": 3053 }, { "epoch": 1.5014749262536873, "grad_norm": 12.4375, "learning_rate": 4.867896768793118e-06, "loss": 0.288, "step": 3054 }, { "epoch": 1.5019665683382497, "grad_norm": 8.125, "learning_rate": 4.8654657551806575e-06, "loss": 0.1952, "step": 3055 }, { "epoch": 1.5024582104228124, "grad_norm": 24.25, "learning_rate": 4.863034634196537e-06, "loss": 0.4049, "step": 3056 }, { "epoch": 1.5029498525073746, "grad_norm": 23.25, "learning_rate": 4.8606034065550035e-06, "loss": 0.5564, "step": 3057 }, { "epoch": 1.5034414945919372, "grad_norm": 15.5625, "learning_rate": 4.85817207297034e-06, "loss": 0.2759, "step": 3058 }, { "epoch": 1.5039331366764994, "grad_norm": 16.375, "learning_rate": 4.855740634156854e-06, "loss": 0.354, "step": 3059 }, { "epoch": 1.504424778761062, "grad_norm": 40.5, "learning_rate": 4.8533090908288915e-06, "loss": 0.2716, "step": 3060 }, { "epoch": 1.5049164208456243, "grad_norm": 44.75, "learning_rate": 4.850877443700823e-06, "loss": 0.4157, "step": 3061 }, { "epoch": 1.505408062930187, "grad_norm": 15.4375, "learning_rate": 4.848445693487055e-06, "loss": 0.1929, "step": 3062 }, { "epoch": 1.5058997050147491, "grad_norm": 29.75, "learning_rate": 4.846013840902017e-06, "loss": 0.3718, "step": 3063 }, { "epoch": 1.5063913470993118, "grad_norm": 34.0, "learning_rate": 4.843581886660175e-06, "loss": 0.3989, "step": 3064 }, { "epoch": 1.506882989183874, "grad_norm": 22.25, "learning_rate": 4.841149831476023e-06, "loss": 0.3302, "step": 3065 }, { "epoch": 1.5073746312684366, "grad_norm": 64.5, "learning_rate": 4.838717676064083e-06, "loss": 0.5053, "step": 3066 }, { "epoch": 1.507866273352999, "grad_norm": 19.625, "learning_rate": 4.836285421138909e-06, "loss": 0.2811, "step": 3067 }, { "epoch": 1.5083579154375615, "grad_norm": 62.25, "learning_rate": 4.833853067415083e-06, "loss": 0.6297, "step": 3068 }, { "epoch": 1.508849557522124, "grad_norm": 26.375, "learning_rate": 4.831420615607215e-06, "loss": 0.2281, "step": 3069 }, { "epoch": 1.5093411996066863, "grad_norm": 19.75, "learning_rate": 4.828988066429946e-06, "loss": 0.3823, "step": 3070 }, { "epoch": 1.5098328416912488, "grad_norm": 41.0, "learning_rate": 4.826555420597944e-06, "loss": 0.5318, "step": 3071 }, { "epoch": 1.5103244837758112, "grad_norm": 17.125, "learning_rate": 4.824122678825904e-06, "loss": 0.1694, "step": 3072 }, { "epoch": 1.5108161258603736, "grad_norm": 23.625, "learning_rate": 4.821689841828556e-06, "loss": 0.3505, "step": 3073 }, { "epoch": 1.511307767944936, "grad_norm": 25.125, "learning_rate": 4.819256910320647e-06, "loss": 0.2593, "step": 3074 }, { "epoch": 1.5117994100294987, "grad_norm": 28.5, "learning_rate": 4.8168238850169605e-06, "loss": 0.2655, "step": 3075 }, { "epoch": 1.512291052114061, "grad_norm": 28.125, "learning_rate": 4.814390766632306e-06, "loss": 0.3586, "step": 3076 }, { "epoch": 1.5127826941986235, "grad_norm": 23.625, "learning_rate": 4.811957555881519e-06, "loss": 0.3588, "step": 3077 }, { "epoch": 1.5132743362831858, "grad_norm": 23.25, "learning_rate": 4.8095242534794605e-06, "loss": 0.3977, "step": 3078 }, { "epoch": 1.5137659783677484, "grad_norm": 18.625, "learning_rate": 4.8070908601410195e-06, "loss": 0.304, "step": 3079 }, { "epoch": 1.5142576204523106, "grad_norm": 14.9375, "learning_rate": 4.804657376581116e-06, "loss": 0.2228, "step": 3080 }, { "epoch": 1.5147492625368733, "grad_norm": 25.0, "learning_rate": 4.802223803514691e-06, "loss": 0.4056, "step": 3081 }, { "epoch": 1.5152409046214355, "grad_norm": 28.75, "learning_rate": 4.799790141656712e-06, "loss": 0.38, "step": 3082 }, { "epoch": 1.515732546705998, "grad_norm": 22.875, "learning_rate": 4.7973563917221785e-06, "loss": 0.2665, "step": 3083 }, { "epoch": 1.5162241887905603, "grad_norm": 33.75, "learning_rate": 4.794922554426108e-06, "loss": 0.5254, "step": 3084 }, { "epoch": 1.516715830875123, "grad_norm": 17.125, "learning_rate": 4.79248863048355e-06, "loss": 0.3278, "step": 3085 }, { "epoch": 1.5172074729596854, "grad_norm": 14.625, "learning_rate": 4.790054620609573e-06, "loss": 0.1998, "step": 3086 }, { "epoch": 1.5176991150442478, "grad_norm": 26.5, "learning_rate": 4.78762052551928e-06, "loss": 0.338, "step": 3087 }, { "epoch": 1.5181907571288102, "grad_norm": 29.375, "learning_rate": 4.78518634592779e-06, "loss": 0.355, "step": 3088 }, { "epoch": 1.5186823992133727, "grad_norm": 10.875, "learning_rate": 4.782752082550251e-06, "loss": 0.3583, "step": 3089 }, { "epoch": 1.519174041297935, "grad_norm": 24.5, "learning_rate": 4.780317736101835e-06, "loss": 0.3221, "step": 3090 }, { "epoch": 1.5196656833824975, "grad_norm": 47.25, "learning_rate": 4.77788330729774e-06, "loss": 0.3996, "step": 3091 }, { "epoch": 1.52015732546706, "grad_norm": 15.625, "learning_rate": 4.7754487968531846e-06, "loss": 0.2468, "step": 3092 }, { "epoch": 1.5206489675516224, "grad_norm": 31.375, "learning_rate": 4.773014205483413e-06, "loss": 0.3867, "step": 3093 }, { "epoch": 1.5211406096361848, "grad_norm": 20.75, "learning_rate": 4.770579533903698e-06, "loss": 0.2926, "step": 3094 }, { "epoch": 1.5216322517207472, "grad_norm": 22.125, "learning_rate": 4.768144782829327e-06, "loss": 0.3461, "step": 3095 }, { "epoch": 1.5221238938053099, "grad_norm": 27.5, "learning_rate": 4.765709952975617e-06, "loss": 0.2513, "step": 3096 }, { "epoch": 1.522615535889872, "grad_norm": 42.5, "learning_rate": 4.763275045057908e-06, "loss": 0.3476, "step": 3097 }, { "epoch": 1.5231071779744347, "grad_norm": 16.875, "learning_rate": 4.760840059791557e-06, "loss": 0.3181, "step": 3098 }, { "epoch": 1.523598820058997, "grad_norm": 15.1875, "learning_rate": 4.758404997891954e-06, "loss": 0.2174, "step": 3099 }, { "epoch": 1.5240904621435596, "grad_norm": 21.75, "learning_rate": 4.755969860074501e-06, "loss": 0.2299, "step": 3100 }, { "epoch": 1.5245821042281218, "grad_norm": 25.375, "learning_rate": 4.753534647054629e-06, "loss": 0.2398, "step": 3101 }, { "epoch": 1.5250737463126844, "grad_norm": 17.125, "learning_rate": 4.751099359547788e-06, "loss": 0.2136, "step": 3102 }, { "epoch": 1.5255653883972466, "grad_norm": 14.5, "learning_rate": 4.748663998269454e-06, "loss": 0.2782, "step": 3103 }, { "epoch": 1.5260570304818093, "grad_norm": 17.5, "learning_rate": 4.746228563935117e-06, "loss": 0.3075, "step": 3104 }, { "epoch": 1.5265486725663717, "grad_norm": 17.375, "learning_rate": 4.7437930572602965e-06, "loss": 0.2339, "step": 3105 }, { "epoch": 1.5270403146509341, "grad_norm": 32.5, "learning_rate": 4.74135747896053e-06, "loss": 0.3162, "step": 3106 }, { "epoch": 1.5275319567354966, "grad_norm": 11.625, "learning_rate": 4.738921829751373e-06, "loss": 0.2923, "step": 3107 }, { "epoch": 1.528023598820059, "grad_norm": 13.125, "learning_rate": 4.736486110348407e-06, "loss": 0.3127, "step": 3108 }, { "epoch": 1.5285152409046214, "grad_norm": 27.625, "learning_rate": 4.734050321467232e-06, "loss": 0.3526, "step": 3109 }, { "epoch": 1.5290068829891839, "grad_norm": 30.875, "learning_rate": 4.731614463823465e-06, "loss": 0.2801, "step": 3110 }, { "epoch": 1.5294985250737463, "grad_norm": 38.0, "learning_rate": 4.729178538132752e-06, "loss": 0.3981, "step": 3111 }, { "epoch": 1.5299901671583087, "grad_norm": 11.8125, "learning_rate": 4.726742545110749e-06, "loss": 0.2332, "step": 3112 }, { "epoch": 1.5304818092428711, "grad_norm": 36.25, "learning_rate": 4.724306485473137e-06, "loss": 0.284, "step": 3113 }, { "epoch": 1.5309734513274336, "grad_norm": 28.0, "learning_rate": 4.721870359935618e-06, "loss": 0.2665, "step": 3114 }, { "epoch": 1.5314650934119962, "grad_norm": 29.625, "learning_rate": 4.719434169213908e-06, "loss": 0.4811, "step": 3115 }, { "epoch": 1.5319567354965584, "grad_norm": 25.375, "learning_rate": 4.716997914023748e-06, "loss": 0.3399, "step": 3116 }, { "epoch": 1.532448377581121, "grad_norm": 31.5, "learning_rate": 4.7145615950808945e-06, "loss": 0.4885, "step": 3117 }, { "epoch": 1.5329400196656833, "grad_norm": 14.9375, "learning_rate": 4.712125213101123e-06, "loss": 0.1725, "step": 3118 }, { "epoch": 1.533431661750246, "grad_norm": 16.875, "learning_rate": 4.709688768800227e-06, "loss": 0.2067, "step": 3119 }, { "epoch": 1.5339233038348081, "grad_norm": 35.0, "learning_rate": 4.707252262894022e-06, "loss": 0.3176, "step": 3120 }, { "epoch": 1.5344149459193708, "grad_norm": 34.0, "learning_rate": 4.704815696098337e-06, "loss": 0.5125, "step": 3121 }, { "epoch": 1.534906588003933, "grad_norm": 15.75, "learning_rate": 4.70237906912902e-06, "loss": 0.1416, "step": 3122 }, { "epoch": 1.5353982300884956, "grad_norm": 37.5, "learning_rate": 4.69994238270194e-06, "loss": 0.4754, "step": 3123 }, { "epoch": 1.535889872173058, "grad_norm": 27.5, "learning_rate": 4.69750563753298e-06, "loss": 0.176, "step": 3124 }, { "epoch": 1.5363815142576205, "grad_norm": 15.5, "learning_rate": 4.69506883433804e-06, "loss": 0.3843, "step": 3125 }, { "epoch": 1.536873156342183, "grad_norm": 22.625, "learning_rate": 4.692631973833038e-06, "loss": 0.3103, "step": 3126 }, { "epoch": 1.5373647984267453, "grad_norm": 20.5, "learning_rate": 4.690195056733913e-06, "loss": 0.2411, "step": 3127 }, { "epoch": 1.5378564405113078, "grad_norm": 19.75, "learning_rate": 4.687758083756612e-06, "loss": 0.3806, "step": 3128 }, { "epoch": 1.5383480825958702, "grad_norm": 17.0, "learning_rate": 4.6853210556171055e-06, "loss": 0.2846, "step": 3129 }, { "epoch": 1.5388397246804326, "grad_norm": 27.125, "learning_rate": 4.682883973031377e-06, "loss": 0.3689, "step": 3130 }, { "epoch": 1.539331366764995, "grad_norm": 23.125, "learning_rate": 4.680446836715427e-06, "loss": 0.2631, "step": 3131 }, { "epoch": 1.5398230088495575, "grad_norm": 21.125, "learning_rate": 4.678009647385273e-06, "loss": 0.2449, "step": 3132 }, { "epoch": 1.54031465093412, "grad_norm": 28.375, "learning_rate": 4.6755724057569446e-06, "loss": 0.3173, "step": 3133 }, { "epoch": 1.5408062930186825, "grad_norm": 28.125, "learning_rate": 4.67313511254649e-06, "loss": 0.3072, "step": 3134 }, { "epoch": 1.5412979351032448, "grad_norm": 19.5, "learning_rate": 4.6706977684699715e-06, "loss": 0.2853, "step": 3135 }, { "epoch": 1.5417895771878074, "grad_norm": 22.75, "learning_rate": 4.668260374243466e-06, "loss": 0.3515, "step": 3136 }, { "epoch": 1.5422812192723696, "grad_norm": 27.25, "learning_rate": 4.665822930583065e-06, "loss": 0.1766, "step": 3137 }, { "epoch": 1.5427728613569323, "grad_norm": 16.25, "learning_rate": 4.663385438204876e-06, "loss": 0.3435, "step": 3138 }, { "epoch": 1.5432645034414945, "grad_norm": 25.125, "learning_rate": 4.660947897825017e-06, "loss": 0.3363, "step": 3139 }, { "epoch": 1.543756145526057, "grad_norm": 32.5, "learning_rate": 4.6585103101596275e-06, "loss": 0.3889, "step": 3140 }, { "epoch": 1.5442477876106193, "grad_norm": 14.875, "learning_rate": 4.656072675924853e-06, "loss": 0.2312, "step": 3141 }, { "epoch": 1.544739429695182, "grad_norm": 24.625, "learning_rate": 4.653634995836856e-06, "loss": 0.3277, "step": 3142 }, { "epoch": 1.5452310717797444, "grad_norm": 31.875, "learning_rate": 4.651197270611813e-06, "loss": 0.1871, "step": 3143 }, { "epoch": 1.5457227138643068, "grad_norm": 12.0, "learning_rate": 4.6487595009659135e-06, "loss": 0.173, "step": 3144 }, { "epoch": 1.5462143559488692, "grad_norm": 22.375, "learning_rate": 4.646321687615358e-06, "loss": 0.4087, "step": 3145 }, { "epoch": 1.5467059980334317, "grad_norm": 19.25, "learning_rate": 4.643883831276364e-06, "loss": 0.3555, "step": 3146 }, { "epoch": 1.547197640117994, "grad_norm": 14.3125, "learning_rate": 4.6414459326651575e-06, "loss": 0.3082, "step": 3147 }, { "epoch": 1.5476892822025565, "grad_norm": 25.375, "learning_rate": 4.639007992497978e-06, "loss": 0.4731, "step": 3148 }, { "epoch": 1.548180924287119, "grad_norm": 21.125, "learning_rate": 4.6365700114910804e-06, "loss": 0.3037, "step": 3149 }, { "epoch": 1.5486725663716814, "grad_norm": 16.125, "learning_rate": 4.634131990360726e-06, "loss": 0.3097, "step": 3150 }, { "epoch": 1.5491642084562438, "grad_norm": 21.125, "learning_rate": 4.6316939298231944e-06, "loss": 0.3098, "step": 3151 }, { "epoch": 1.5496558505408062, "grad_norm": 33.5, "learning_rate": 4.6292558305947695e-06, "loss": 0.538, "step": 3152 }, { "epoch": 1.5501474926253689, "grad_norm": 22.375, "learning_rate": 4.626817693391752e-06, "loss": 0.2502, "step": 3153 }, { "epoch": 1.550639134709931, "grad_norm": 37.5, "learning_rate": 4.624379518930454e-06, "loss": 0.3488, "step": 3154 }, { "epoch": 1.5511307767944937, "grad_norm": 19.5, "learning_rate": 4.6219413079271925e-06, "loss": 0.2964, "step": 3155 }, { "epoch": 1.551622418879056, "grad_norm": 18.75, "learning_rate": 4.619503061098301e-06, "loss": 0.4044, "step": 3156 }, { "epoch": 1.5521140609636186, "grad_norm": 36.0, "learning_rate": 4.6170647791601244e-06, "loss": 0.4023, "step": 3157 }, { "epoch": 1.5526057030481808, "grad_norm": 23.5, "learning_rate": 4.614626462829012e-06, "loss": 0.1311, "step": 3158 }, { "epoch": 1.5530973451327434, "grad_norm": 21.875, "learning_rate": 4.612188112821329e-06, "loss": 0.532, "step": 3159 }, { "epoch": 1.5535889872173057, "grad_norm": 33.5, "learning_rate": 4.609749729853445e-06, "loss": 0.5296, "step": 3160 }, { "epoch": 1.5540806293018683, "grad_norm": 16.5, "learning_rate": 4.607311314641747e-06, "loss": 0.2526, "step": 3161 }, { "epoch": 1.5545722713864307, "grad_norm": 39.25, "learning_rate": 4.604872867902623e-06, "loss": 0.4114, "step": 3162 }, { "epoch": 1.5550639134709932, "grad_norm": 18.125, "learning_rate": 4.602434390352476e-06, "loss": 0.389, "step": 3163 }, { "epoch": 1.5555555555555556, "grad_norm": 15.9375, "learning_rate": 4.599995882707716e-06, "loss": 0.2081, "step": 3164 }, { "epoch": 1.556047197640118, "grad_norm": 16.125, "learning_rate": 4.597557345684761e-06, "loss": 0.304, "step": 3165 }, { "epoch": 1.5565388397246804, "grad_norm": 14.125, "learning_rate": 4.59511878000004e-06, "loss": 0.1884, "step": 3166 }, { "epoch": 1.5570304818092429, "grad_norm": 26.125, "learning_rate": 4.5926801863699865e-06, "loss": 0.3984, "step": 3167 }, { "epoch": 1.5575221238938053, "grad_norm": 23.125, "learning_rate": 4.590241565511048e-06, "loss": 0.2206, "step": 3168 }, { "epoch": 1.5580137659783677, "grad_norm": 26.75, "learning_rate": 4.587802918139676e-06, "loss": 0.3471, "step": 3169 }, { "epoch": 1.5585054080629301, "grad_norm": 22.875, "learning_rate": 4.585364244972329e-06, "loss": 0.4589, "step": 3170 }, { "epoch": 1.5589970501474926, "grad_norm": 19.375, "learning_rate": 4.582925546725474e-06, "loss": 0.1921, "step": 3171 }, { "epoch": 1.5594886922320552, "grad_norm": 24.375, "learning_rate": 4.580486824115591e-06, "loss": 0.2543, "step": 3172 }, { "epoch": 1.5599803343166174, "grad_norm": 22.625, "learning_rate": 4.578048077859157e-06, "loss": 0.3083, "step": 3173 }, { "epoch": 1.56047197640118, "grad_norm": 5.4375, "learning_rate": 4.575609308672662e-06, "loss": 0.2025, "step": 3174 }, { "epoch": 1.5609636184857423, "grad_norm": 8.25, "learning_rate": 4.573170517272604e-06, "loss": 0.2192, "step": 3175 }, { "epoch": 1.561455260570305, "grad_norm": 19.875, "learning_rate": 4.570731704375485e-06, "loss": 0.2573, "step": 3176 }, { "epoch": 1.5619469026548671, "grad_norm": 40.25, "learning_rate": 4.5682928706978126e-06, "loss": 0.2928, "step": 3177 }, { "epoch": 1.5624385447394298, "grad_norm": 19.375, "learning_rate": 4.565854016956101e-06, "loss": 0.384, "step": 3178 }, { "epoch": 1.562930186823992, "grad_norm": 13.0, "learning_rate": 4.563415143866871e-06, "loss": 0.3018, "step": 3179 }, { "epoch": 1.5634218289085546, "grad_norm": 51.0, "learning_rate": 4.560976252146651e-06, "loss": 0.5108, "step": 3180 }, { "epoch": 1.563913470993117, "grad_norm": 22.625, "learning_rate": 4.55853734251197e-06, "loss": 0.3202, "step": 3181 }, { "epoch": 1.5644051130776795, "grad_norm": 21.5, "learning_rate": 4.556098415679368e-06, "loss": 0.3242, "step": 3182 }, { "epoch": 1.564896755162242, "grad_norm": 15.375, "learning_rate": 4.553659472365385e-06, "loss": 0.2942, "step": 3183 }, { "epoch": 1.5653883972468043, "grad_norm": 28.625, "learning_rate": 4.551220513286569e-06, "loss": 0.4186, "step": 3184 }, { "epoch": 1.5658800393313668, "grad_norm": 21.0, "learning_rate": 4.548781539159473e-06, "loss": 0.3104, "step": 3185 }, { "epoch": 1.5663716814159292, "grad_norm": 20.25, "learning_rate": 4.54634255070065e-06, "loss": 0.3312, "step": 3186 }, { "epoch": 1.5668633235004916, "grad_norm": 41.0, "learning_rate": 4.543903548626662e-06, "loss": 0.5528, "step": 3187 }, { "epoch": 1.567354965585054, "grad_norm": 34.25, "learning_rate": 4.541464533654073e-06, "loss": 0.3591, "step": 3188 }, { "epoch": 1.5678466076696165, "grad_norm": 21.5, "learning_rate": 4.539025506499451e-06, "loss": 0.2174, "step": 3189 }, { "epoch": 1.568338249754179, "grad_norm": 30.0, "learning_rate": 4.536586467879368e-06, "loss": 0.3712, "step": 3190 }, { "epoch": 1.5688298918387416, "grad_norm": 18.875, "learning_rate": 4.5341474185104e-06, "loss": 0.1487, "step": 3191 }, { "epoch": 1.5693215339233038, "grad_norm": 19.25, "learning_rate": 4.531708359109123e-06, "loss": 0.4115, "step": 3192 }, { "epoch": 1.5698131760078664, "grad_norm": 18.125, "learning_rate": 4.529269290392119e-06, "loss": 0.4135, "step": 3193 }, { "epoch": 1.5703048180924286, "grad_norm": 20.0, "learning_rate": 4.526830213075972e-06, "loss": 0.2816, "step": 3194 }, { "epoch": 1.5707964601769913, "grad_norm": 30.25, "learning_rate": 4.524391127877268e-06, "loss": 0.3326, "step": 3195 }, { "epoch": 1.5712881022615535, "grad_norm": 31.0, "learning_rate": 4.521952035512595e-06, "loss": 0.4628, "step": 3196 }, { "epoch": 1.5717797443461161, "grad_norm": 31.25, "learning_rate": 4.519512936698546e-06, "loss": 0.1968, "step": 3197 }, { "epoch": 1.5722713864306783, "grad_norm": 29.75, "learning_rate": 4.517073832151714e-06, "loss": 0.3336, "step": 3198 }, { "epoch": 1.572763028515241, "grad_norm": 30.0, "learning_rate": 4.51463472258869e-06, "loss": 0.4729, "step": 3199 }, { "epoch": 1.5732546705998034, "grad_norm": 17.5, "learning_rate": 4.5121956087260715e-06, "loss": 0.1849, "step": 3200 }, { "epoch": 1.5737463126843658, "grad_norm": 43.0, "learning_rate": 4.509756491280457e-06, "loss": 0.5034, "step": 3201 }, { "epoch": 1.5742379547689282, "grad_norm": 19.5, "learning_rate": 4.507317370968443e-06, "loss": 0.2776, "step": 3202 }, { "epoch": 1.5747295968534907, "grad_norm": 16.75, "learning_rate": 4.50487824850663e-06, "loss": 0.2558, "step": 3203 }, { "epoch": 1.575221238938053, "grad_norm": 34.5, "learning_rate": 4.502439124611614e-06, "loss": 0.3094, "step": 3204 }, { "epoch": 1.5757128810226155, "grad_norm": 29.5, "learning_rate": 4.5e-06, "loss": 0.4686, "step": 3205 }, { "epoch": 1.576204523107178, "grad_norm": 38.0, "learning_rate": 4.497560875388386e-06, "loss": 0.3846, "step": 3206 }, { "epoch": 1.5766961651917404, "grad_norm": 20.875, "learning_rate": 4.495121751493372e-06, "loss": 0.3837, "step": 3207 }, { "epoch": 1.5771878072763028, "grad_norm": 23.0, "learning_rate": 4.4926826290315585e-06, "loss": 0.3564, "step": 3208 }, { "epoch": 1.5776794493608652, "grad_norm": 27.75, "learning_rate": 4.490243508719542e-06, "loss": 0.2344, "step": 3209 }, { "epoch": 1.5781710914454279, "grad_norm": 19.75, "learning_rate": 4.487804391273928e-06, "loss": 0.3562, "step": 3210 }, { "epoch": 1.57866273352999, "grad_norm": 43.5, "learning_rate": 4.48536527741131e-06, "loss": 0.6348, "step": 3211 }, { "epoch": 1.5791543756145527, "grad_norm": 27.125, "learning_rate": 4.482926167848287e-06, "loss": 0.4651, "step": 3212 }, { "epoch": 1.579646017699115, "grad_norm": 12.8125, "learning_rate": 4.480487063301454e-06, "loss": 0.2114, "step": 3213 }, { "epoch": 1.5801376597836776, "grad_norm": 36.5, "learning_rate": 4.478047964487405e-06, "loss": 0.3327, "step": 3214 }, { "epoch": 1.5806293018682398, "grad_norm": 35.5, "learning_rate": 4.475608872122733e-06, "loss": 0.4578, "step": 3215 }, { "epoch": 1.5811209439528024, "grad_norm": 18.875, "learning_rate": 4.47316978692403e-06, "loss": 0.412, "step": 3216 }, { "epoch": 1.5816125860373647, "grad_norm": 34.5, "learning_rate": 4.470730709607881e-06, "loss": 0.3936, "step": 3217 }, { "epoch": 1.5821042281219273, "grad_norm": 12.9375, "learning_rate": 4.468291640890878e-06, "loss": 0.2263, "step": 3218 }, { "epoch": 1.5825958702064897, "grad_norm": 13.9375, "learning_rate": 4.465852581489602e-06, "loss": 0.3062, "step": 3219 }, { "epoch": 1.5830875122910522, "grad_norm": 12.3125, "learning_rate": 4.463413532120633e-06, "loss": 0.2593, "step": 3220 }, { "epoch": 1.5835791543756146, "grad_norm": 15.1875, "learning_rate": 4.46097449350055e-06, "loss": 0.2733, "step": 3221 }, { "epoch": 1.584070796460177, "grad_norm": 20.5, "learning_rate": 4.458535466345928e-06, "loss": 0.3171, "step": 3222 }, { "epoch": 1.5845624385447394, "grad_norm": 20.125, "learning_rate": 4.456096451373339e-06, "loss": 0.3189, "step": 3223 }, { "epoch": 1.5850540806293019, "grad_norm": 37.0, "learning_rate": 4.45365744929935e-06, "loss": 0.3754, "step": 3224 }, { "epoch": 1.5855457227138643, "grad_norm": 12.3125, "learning_rate": 4.451218460840528e-06, "loss": 0.3044, "step": 3225 }, { "epoch": 1.5860373647984267, "grad_norm": 13.5625, "learning_rate": 4.448779486713431e-06, "loss": 0.3185, "step": 3226 }, { "epoch": 1.5865290068829891, "grad_norm": 19.0, "learning_rate": 4.446340527634616e-06, "loss": 0.3313, "step": 3227 }, { "epoch": 1.5870206489675516, "grad_norm": 15.5625, "learning_rate": 4.443901584320632e-06, "loss": 0.3554, "step": 3228 }, { "epoch": 1.5875122910521142, "grad_norm": 28.5, "learning_rate": 4.44146265748803e-06, "loss": 0.2691, "step": 3229 }, { "epoch": 1.5880039331366764, "grad_norm": 26.125, "learning_rate": 4.439023747853351e-06, "loss": 0.3455, "step": 3230 }, { "epoch": 1.588495575221239, "grad_norm": 43.5, "learning_rate": 4.436584856133129e-06, "loss": 0.306, "step": 3231 }, { "epoch": 1.5889872173058013, "grad_norm": 8.3125, "learning_rate": 4.434145983043901e-06, "loss": 0.1998, "step": 3232 }, { "epoch": 1.589478859390364, "grad_norm": 15.0625, "learning_rate": 4.4317071293021885e-06, "loss": 0.2005, "step": 3233 }, { "epoch": 1.5899705014749261, "grad_norm": 13.0625, "learning_rate": 4.429268295624516e-06, "loss": 0.3453, "step": 3234 }, { "epoch": 1.5904621435594888, "grad_norm": 37.0, "learning_rate": 4.426829482727396e-06, "loss": 0.2314, "step": 3235 }, { "epoch": 1.590953785644051, "grad_norm": 33.75, "learning_rate": 4.424390691327338e-06, "loss": 0.3208, "step": 3236 }, { "epoch": 1.5914454277286136, "grad_norm": 21.875, "learning_rate": 4.421951922140845e-06, "loss": 0.2242, "step": 3237 }, { "epoch": 1.5919370698131758, "grad_norm": 29.25, "learning_rate": 4.419513175884411e-06, "loss": 0.5355, "step": 3238 }, { "epoch": 1.5924287118977385, "grad_norm": 11.4375, "learning_rate": 4.417074453274524e-06, "loss": 0.0486, "step": 3239 }, { "epoch": 1.592920353982301, "grad_norm": 16.25, "learning_rate": 4.414635755027672e-06, "loss": 0.2061, "step": 3240 }, { "epoch": 1.5934119960668633, "grad_norm": 15.6875, "learning_rate": 4.412197081860325e-06, "loss": 0.2125, "step": 3241 }, { "epoch": 1.5939036381514258, "grad_norm": 27.125, "learning_rate": 4.409758434488953e-06, "loss": 0.2992, "step": 3242 }, { "epoch": 1.5943952802359882, "grad_norm": 42.25, "learning_rate": 4.4073198136300145e-06, "loss": 0.4952, "step": 3243 }, { "epoch": 1.5948869223205506, "grad_norm": 23.75, "learning_rate": 4.404881219999963e-06, "loss": 0.2491, "step": 3244 }, { "epoch": 1.595378564405113, "grad_norm": 36.75, "learning_rate": 4.4024426543152414e-06, "loss": 0.6196, "step": 3245 }, { "epoch": 1.5958702064896755, "grad_norm": 21.75, "learning_rate": 4.400004117292285e-06, "loss": 0.3474, "step": 3246 }, { "epoch": 1.596361848574238, "grad_norm": 30.375, "learning_rate": 4.397565609647524e-06, "loss": 0.378, "step": 3247 }, { "epoch": 1.5968534906588006, "grad_norm": 26.0, "learning_rate": 4.395127132097377e-06, "loss": 0.3961, "step": 3248 }, { "epoch": 1.5973451327433628, "grad_norm": 28.5, "learning_rate": 4.392688685358254e-06, "loss": 0.2906, "step": 3249 }, { "epoch": 1.5978367748279254, "grad_norm": 27.0, "learning_rate": 4.390250270146555e-06, "loss": 0.4483, "step": 3250 }, { "epoch": 1.5983284169124876, "grad_norm": 20.75, "learning_rate": 4.3878118871786725e-06, "loss": 0.2739, "step": 3251 }, { "epoch": 1.5988200589970503, "grad_norm": 29.5, "learning_rate": 4.385373537170989e-06, "loss": 0.6199, "step": 3252 }, { "epoch": 1.5993117010816125, "grad_norm": 18.25, "learning_rate": 4.382935220839876e-06, "loss": 0.1749, "step": 3253 }, { "epoch": 1.5998033431661751, "grad_norm": 24.5, "learning_rate": 4.380496938901699e-06, "loss": 0.3811, "step": 3254 }, { "epoch": 1.6002949852507373, "grad_norm": 47.75, "learning_rate": 4.3780586920728086e-06, "loss": 0.4165, "step": 3255 }, { "epoch": 1.6007866273353, "grad_norm": 23.0, "learning_rate": 4.3756204810695475e-06, "loss": 0.3326, "step": 3256 }, { "epoch": 1.6012782694198622, "grad_norm": 17.5, "learning_rate": 4.373182306608248e-06, "loss": 0.2788, "step": 3257 }, { "epoch": 1.6017699115044248, "grad_norm": 19.625, "learning_rate": 4.3707441694052315e-06, "loss": 0.2803, "step": 3258 }, { "epoch": 1.6022615535889873, "grad_norm": 39.0, "learning_rate": 4.3683060701768075e-06, "loss": 0.4638, "step": 3259 }, { "epoch": 1.6027531956735497, "grad_norm": 26.125, "learning_rate": 4.365868009639275e-06, "loss": 0.3751, "step": 3260 }, { "epoch": 1.603244837758112, "grad_norm": 16.875, "learning_rate": 4.36342998850892e-06, "loss": 0.2261, "step": 3261 }, { "epoch": 1.6037364798426745, "grad_norm": 18.0, "learning_rate": 4.360992007502021e-06, "loss": 0.3721, "step": 3262 }, { "epoch": 1.604228121927237, "grad_norm": 24.875, "learning_rate": 4.358554067334843e-06, "loss": 0.2794, "step": 3263 }, { "epoch": 1.6047197640117994, "grad_norm": 23.5, "learning_rate": 4.356116168723637e-06, "loss": 0.246, "step": 3264 }, { "epoch": 1.6052114060963618, "grad_norm": 14.0625, "learning_rate": 4.353678312384643e-06, "loss": 0.334, "step": 3265 }, { "epoch": 1.6057030481809242, "grad_norm": 24.375, "learning_rate": 4.3512404990340876e-06, "loss": 0.2552, "step": 3266 }, { "epoch": 1.606194690265487, "grad_norm": 30.625, "learning_rate": 4.348802729388188e-06, "loss": 0.3452, "step": 3267 }, { "epoch": 1.606686332350049, "grad_norm": 25.125, "learning_rate": 4.346365004163145e-06, "loss": 0.3121, "step": 3268 }, { "epoch": 1.6071779744346117, "grad_norm": 35.25, "learning_rate": 4.3439273240751475e-06, "loss": 0.3524, "step": 3269 }, { "epoch": 1.607669616519174, "grad_norm": 26.625, "learning_rate": 4.341489689840374e-06, "loss": 0.2992, "step": 3270 }, { "epoch": 1.6081612586037366, "grad_norm": 23.25, "learning_rate": 4.339052102174983e-06, "loss": 0.2774, "step": 3271 }, { "epoch": 1.6086529006882988, "grad_norm": 26.25, "learning_rate": 4.336614561795125e-06, "loss": 0.3406, "step": 3272 }, { "epoch": 1.6091445427728615, "grad_norm": 14.6875, "learning_rate": 4.3341770694169355e-06, "loss": 0.1739, "step": 3273 }, { "epoch": 1.6096361848574237, "grad_norm": 26.0, "learning_rate": 4.3317396257565355e-06, "loss": 0.4765, "step": 3274 }, { "epoch": 1.6101278269419863, "grad_norm": 22.375, "learning_rate": 4.3293022315300295e-06, "loss": 0.2418, "step": 3275 }, { "epoch": 1.6106194690265485, "grad_norm": 20.625, "learning_rate": 4.326864887453511e-06, "loss": 0.2891, "step": 3276 }, { "epoch": 1.6111111111111112, "grad_norm": 43.5, "learning_rate": 4.324427594243056e-06, "loss": 0.4421, "step": 3277 }, { "epoch": 1.6116027531956736, "grad_norm": 15.625, "learning_rate": 4.321990352614728e-06, "loss": 0.2669, "step": 3278 }, { "epoch": 1.612094395280236, "grad_norm": 20.0, "learning_rate": 4.319553163284573e-06, "loss": 0.2357, "step": 3279 }, { "epoch": 1.6125860373647984, "grad_norm": 24.5, "learning_rate": 4.3171160269686246e-06, "loss": 0.1971, "step": 3280 }, { "epoch": 1.6130776794493609, "grad_norm": 23.25, "learning_rate": 4.314678944382896e-06, "loss": 0.3021, "step": 3281 }, { "epoch": 1.6135693215339233, "grad_norm": 48.5, "learning_rate": 4.31224191624339e-06, "loss": 0.6406, "step": 3282 }, { "epoch": 1.6140609636184857, "grad_norm": 36.75, "learning_rate": 4.309804943266087e-06, "loss": 0.2515, "step": 3283 }, { "epoch": 1.6145526057030481, "grad_norm": 29.875, "learning_rate": 4.3073680261669615e-06, "loss": 0.3082, "step": 3284 }, { "epoch": 1.6150442477876106, "grad_norm": 24.25, "learning_rate": 4.304931165661961e-06, "loss": 0.3412, "step": 3285 }, { "epoch": 1.615535889872173, "grad_norm": 20.625, "learning_rate": 4.302494362467021e-06, "loss": 0.2992, "step": 3286 }, { "epoch": 1.6160275319567354, "grad_norm": 18.375, "learning_rate": 4.300057617298061e-06, "loss": 0.1767, "step": 3287 }, { "epoch": 1.616519174041298, "grad_norm": 21.0, "learning_rate": 4.297620930870981e-06, "loss": 0.4405, "step": 3288 }, { "epoch": 1.6170108161258603, "grad_norm": 18.75, "learning_rate": 4.295184303901665e-06, "loss": 0.2434, "step": 3289 }, { "epoch": 1.617502458210423, "grad_norm": 31.625, "learning_rate": 4.2927477371059785e-06, "loss": 0.4409, "step": 3290 }, { "epoch": 1.6179941002949851, "grad_norm": 41.0, "learning_rate": 4.290311231199773e-06, "loss": 0.3653, "step": 3291 }, { "epoch": 1.6184857423795478, "grad_norm": 22.375, "learning_rate": 4.287874786898877e-06, "loss": 0.3392, "step": 3292 }, { "epoch": 1.61897738446411, "grad_norm": 18.125, "learning_rate": 4.285438404919106e-06, "loss": 0.214, "step": 3293 }, { "epoch": 1.6194690265486726, "grad_norm": 13.375, "learning_rate": 4.283002085976252e-06, "loss": 0.2804, "step": 3294 }, { "epoch": 1.6199606686332348, "grad_norm": 19.625, "learning_rate": 4.280565830786092e-06, "loss": 0.401, "step": 3295 }, { "epoch": 1.6204523107177975, "grad_norm": 34.0, "learning_rate": 4.2781296400643835e-06, "loss": 0.3631, "step": 3296 }, { "epoch": 1.62094395280236, "grad_norm": 37.25, "learning_rate": 4.2756935145268625e-06, "loss": 0.6066, "step": 3297 }, { "epoch": 1.6214355948869223, "grad_norm": 32.25, "learning_rate": 4.273257454889251e-06, "loss": 0.5379, "step": 3298 }, { "epoch": 1.6219272369714848, "grad_norm": 10.5625, "learning_rate": 4.270821461867249e-06, "loss": 0.1277, "step": 3299 }, { "epoch": 1.6224188790560472, "grad_norm": 15.3125, "learning_rate": 4.268385536176535e-06, "loss": 0.2798, "step": 3300 }, { "epoch": 1.6229105211406096, "grad_norm": 32.25, "learning_rate": 4.2659496785327694e-06, "loss": 0.4303, "step": 3301 }, { "epoch": 1.623402163225172, "grad_norm": 51.5, "learning_rate": 4.263513889651594e-06, "loss": 0.7014, "step": 3302 }, { "epoch": 1.6238938053097345, "grad_norm": 44.75, "learning_rate": 4.261078170248629e-06, "loss": 0.5845, "step": 3303 }, { "epoch": 1.624385447394297, "grad_norm": 16.375, "learning_rate": 4.258642521039472e-06, "loss": 0.3456, "step": 3304 }, { "epoch": 1.6248770894788593, "grad_norm": 38.75, "learning_rate": 4.256206942739703e-06, "loss": 0.2832, "step": 3305 }, { "epoch": 1.6253687315634218, "grad_norm": 30.125, "learning_rate": 4.253771436064883e-06, "loss": 0.3788, "step": 3306 }, { "epoch": 1.6258603736479844, "grad_norm": 17.125, "learning_rate": 4.251336001730547e-06, "loss": 0.3104, "step": 3307 }, { "epoch": 1.6263520157325466, "grad_norm": 24.875, "learning_rate": 4.248900640452212e-06, "loss": 0.4071, "step": 3308 }, { "epoch": 1.6268436578171093, "grad_norm": 22.25, "learning_rate": 4.246465352945372e-06, "loss": 0.4329, "step": 3309 }, { "epoch": 1.6273352999016715, "grad_norm": 39.5, "learning_rate": 4.2440301399255e-06, "loss": 0.3606, "step": 3310 }, { "epoch": 1.6278269419862341, "grad_norm": 19.0, "learning_rate": 4.241595002108048e-06, "loss": 0.3851, "step": 3311 }, { "epoch": 1.6283185840707963, "grad_norm": 37.5, "learning_rate": 4.239159940208443e-06, "loss": 0.4319, "step": 3312 }, { "epoch": 1.628810226155359, "grad_norm": 14.625, "learning_rate": 4.236724954942094e-06, "loss": 0.2264, "step": 3313 }, { "epoch": 1.6293018682399212, "grad_norm": 24.375, "learning_rate": 4.234290047024383e-06, "loss": 0.3078, "step": 3314 }, { "epoch": 1.6297935103244838, "grad_norm": 14.875, "learning_rate": 4.231855217170674e-06, "loss": 0.2045, "step": 3315 }, { "epoch": 1.6302851524090463, "grad_norm": 13.25, "learning_rate": 4.229420466096303e-06, "loss": 0.2097, "step": 3316 }, { "epoch": 1.6307767944936087, "grad_norm": 22.875, "learning_rate": 4.226985794516587e-06, "loss": 0.2705, "step": 3317 }, { "epoch": 1.631268436578171, "grad_norm": 19.875, "learning_rate": 4.224551203146817e-06, "loss": 0.3622, "step": 3318 }, { "epoch": 1.6317600786627335, "grad_norm": 18.375, "learning_rate": 4.222116692702262e-06, "loss": 0.2589, "step": 3319 }, { "epoch": 1.632251720747296, "grad_norm": 19.625, "learning_rate": 4.219682263898166e-06, "loss": 0.39, "step": 3320 }, { "epoch": 1.6327433628318584, "grad_norm": 13.875, "learning_rate": 4.217247917449749e-06, "loss": 0.2708, "step": 3321 }, { "epoch": 1.6332350049164208, "grad_norm": 34.25, "learning_rate": 4.214813654072211e-06, "loss": 0.4182, "step": 3322 }, { "epoch": 1.6337266470009832, "grad_norm": 30.625, "learning_rate": 4.212379474480721e-06, "loss": 0.5401, "step": 3323 }, { "epoch": 1.6342182890855457, "grad_norm": 19.375, "learning_rate": 4.209945379390428e-06, "loss": 0.3644, "step": 3324 }, { "epoch": 1.634709931170108, "grad_norm": 33.75, "learning_rate": 4.207511369516452e-06, "loss": 0.2821, "step": 3325 }, { "epoch": 1.6352015732546707, "grad_norm": 19.25, "learning_rate": 4.205077445573894e-06, "loss": 0.3884, "step": 3326 }, { "epoch": 1.635693215339233, "grad_norm": 23.75, "learning_rate": 4.202643608277822e-06, "loss": 0.3624, "step": 3327 }, { "epoch": 1.6361848574237956, "grad_norm": 21.25, "learning_rate": 4.200209858343288e-06, "loss": 0.201, "step": 3328 }, { "epoch": 1.6366764995083578, "grad_norm": 9.0, "learning_rate": 4.19777619648531e-06, "loss": 0.2122, "step": 3329 }, { "epoch": 1.6371681415929205, "grad_norm": 53.5, "learning_rate": 4.1953426234188845e-06, "loss": 0.4501, "step": 3330 }, { "epoch": 1.6376597836774827, "grad_norm": 24.125, "learning_rate": 4.1929091398589815e-06, "loss": 0.3272, "step": 3331 }, { "epoch": 1.6381514257620453, "grad_norm": 27.625, "learning_rate": 4.190475746520541e-06, "loss": 0.3135, "step": 3332 }, { "epoch": 1.6386430678466075, "grad_norm": 18.625, "learning_rate": 4.188042444118484e-06, "loss": 0.3371, "step": 3333 }, { "epoch": 1.6391347099311702, "grad_norm": 20.25, "learning_rate": 4.185609233367693e-06, "loss": 0.3174, "step": 3334 }, { "epoch": 1.6396263520157326, "grad_norm": 14.9375, "learning_rate": 4.183176114983039e-06, "loss": 0.1675, "step": 3335 }, { "epoch": 1.640117994100295, "grad_norm": 11.25, "learning_rate": 4.180743089679354e-06, "loss": 0.2438, "step": 3336 }, { "epoch": 1.6406096361848574, "grad_norm": 28.375, "learning_rate": 4.1783101581714455e-06, "loss": 0.417, "step": 3337 }, { "epoch": 1.6411012782694199, "grad_norm": 31.625, "learning_rate": 4.175877321174096e-06, "loss": 0.3595, "step": 3338 }, { "epoch": 1.6415929203539823, "grad_norm": 28.75, "learning_rate": 4.1734445794020575e-06, "loss": 0.3148, "step": 3339 }, { "epoch": 1.6420845624385447, "grad_norm": 17.0, "learning_rate": 4.171011933570056e-06, "loss": 0.2446, "step": 3340 }, { "epoch": 1.6425762045231072, "grad_norm": 27.125, "learning_rate": 4.168579384392786e-06, "loss": 0.454, "step": 3341 }, { "epoch": 1.6430678466076696, "grad_norm": 13.9375, "learning_rate": 4.166146932584917e-06, "loss": 0.2528, "step": 3342 }, { "epoch": 1.643559488692232, "grad_norm": 17.5, "learning_rate": 4.163714578861091e-06, "loss": 0.1938, "step": 3343 }, { "epoch": 1.6440511307767944, "grad_norm": 39.25, "learning_rate": 4.161282323935917e-06, "loss": 0.4391, "step": 3344 }, { "epoch": 1.644542772861357, "grad_norm": 25.875, "learning_rate": 4.158850168523978e-06, "loss": 0.2518, "step": 3345 }, { "epoch": 1.6450344149459193, "grad_norm": 38.5, "learning_rate": 4.156418113339827e-06, "loss": 0.3628, "step": 3346 }, { "epoch": 1.645526057030482, "grad_norm": 21.75, "learning_rate": 4.153986159097985e-06, "loss": 0.2165, "step": 3347 }, { "epoch": 1.6460176991150441, "grad_norm": 10.625, "learning_rate": 4.151554306512947e-06, "loss": 0.2532, "step": 3348 }, { "epoch": 1.6465093411996068, "grad_norm": 25.25, "learning_rate": 4.149122556299176e-06, "loss": 0.3312, "step": 3349 }, { "epoch": 1.647000983284169, "grad_norm": 38.25, "learning_rate": 4.146690909171109e-06, "loss": 0.5552, "step": 3350 }, { "epoch": 1.6474926253687316, "grad_norm": 22.5, "learning_rate": 4.144259365843146e-06, "loss": 0.3212, "step": 3351 }, { "epoch": 1.6479842674532938, "grad_norm": 26.5, "learning_rate": 4.1418279270296615e-06, "loss": 0.2236, "step": 3352 }, { "epoch": 1.6484759095378565, "grad_norm": 17.125, "learning_rate": 4.139396593444998e-06, "loss": 0.4404, "step": 3353 }, { "epoch": 1.648967551622419, "grad_norm": 17.125, "learning_rate": 4.136965365803464e-06, "loss": 0.3484, "step": 3354 }, { "epoch": 1.6494591937069814, "grad_norm": 24.25, "learning_rate": 4.134534244819343e-06, "loss": 0.5215, "step": 3355 }, { "epoch": 1.6499508357915438, "grad_norm": 17.625, "learning_rate": 4.132103231206882e-06, "loss": 0.3473, "step": 3356 }, { "epoch": 1.6504424778761062, "grad_norm": 20.375, "learning_rate": 4.1296723256802995e-06, "loss": 0.2598, "step": 3357 }, { "epoch": 1.6509341199606686, "grad_norm": 24.25, "learning_rate": 4.127241528953781e-06, "loss": 0.4404, "step": 3358 }, { "epoch": 1.651425762045231, "grad_norm": 24.5, "learning_rate": 4.12481084174148e-06, "loss": 0.3386, "step": 3359 }, { "epoch": 1.6519174041297935, "grad_norm": 15.625, "learning_rate": 4.122380264757516e-06, "loss": 0.155, "step": 3360 }, { "epoch": 1.652409046214356, "grad_norm": 21.0, "learning_rate": 4.119949798715981e-06, "loss": 0.2866, "step": 3361 }, { "epoch": 1.6529006882989183, "grad_norm": 25.375, "learning_rate": 4.117519444330929e-06, "loss": 0.318, "step": 3362 }, { "epoch": 1.6533923303834808, "grad_norm": 27.5, "learning_rate": 4.115089202316386e-06, "loss": 0.2862, "step": 3363 }, { "epoch": 1.6538839724680434, "grad_norm": 14.9375, "learning_rate": 4.112659073386339e-06, "loss": 0.2622, "step": 3364 }, { "epoch": 1.6543756145526056, "grad_norm": 63.0, "learning_rate": 4.110229058254748e-06, "loss": 0.434, "step": 3365 }, { "epoch": 1.6548672566371683, "grad_norm": 19.75, "learning_rate": 4.107799157635538e-06, "loss": 0.2377, "step": 3366 }, { "epoch": 1.6553588987217305, "grad_norm": 11.9375, "learning_rate": 4.105369372242598e-06, "loss": 0.2894, "step": 3367 }, { "epoch": 1.6558505408062931, "grad_norm": 18.125, "learning_rate": 4.102939702789784e-06, "loss": 0.3369, "step": 3368 }, { "epoch": 1.6563421828908553, "grad_norm": 30.0, "learning_rate": 4.100510149990919e-06, "loss": 0.2433, "step": 3369 }, { "epoch": 1.656833824975418, "grad_norm": 25.75, "learning_rate": 4.0980807145597904e-06, "loss": 0.3667, "step": 3370 }, { "epoch": 1.6573254670599802, "grad_norm": 21.0, "learning_rate": 4.095651397210151e-06, "loss": 0.4322, "step": 3371 }, { "epoch": 1.6578171091445428, "grad_norm": 17.0, "learning_rate": 4.093222198655723e-06, "loss": 0.3007, "step": 3372 }, { "epoch": 1.6583087512291053, "grad_norm": 28.25, "learning_rate": 4.090793119610189e-06, "loss": 0.4386, "step": 3373 }, { "epoch": 1.6588003933136677, "grad_norm": 23.875, "learning_rate": 4.088364160787197e-06, "loss": 0.3172, "step": 3374 }, { "epoch": 1.6592920353982301, "grad_norm": 14.1875, "learning_rate": 4.085935322900361e-06, "loss": 0.2367, "step": 3375 }, { "epoch": 1.6597836774827925, "grad_norm": 28.875, "learning_rate": 4.083506606663259e-06, "loss": 0.4061, "step": 3376 }, { "epoch": 1.660275319567355, "grad_norm": 46.75, "learning_rate": 4.081078012789435e-06, "loss": 0.4997, "step": 3377 }, { "epoch": 1.6607669616519174, "grad_norm": 12.875, "learning_rate": 4.078649541992391e-06, "loss": 0.2037, "step": 3378 }, { "epoch": 1.6612586037364798, "grad_norm": 19.75, "learning_rate": 4.076221194985603e-06, "loss": 0.3502, "step": 3379 }, { "epoch": 1.6617502458210422, "grad_norm": 40.5, "learning_rate": 4.073792972482501e-06, "loss": 0.2566, "step": 3380 }, { "epoch": 1.6622418879056047, "grad_norm": 34.25, "learning_rate": 4.071364875196484e-06, "loss": 0.382, "step": 3381 }, { "epoch": 1.662733529990167, "grad_norm": 16.625, "learning_rate": 4.068936903840912e-06, "loss": 0.1348, "step": 3382 }, { "epoch": 1.6632251720747298, "grad_norm": 23.875, "learning_rate": 4.066509059129109e-06, "loss": 0.4365, "step": 3383 }, { "epoch": 1.663716814159292, "grad_norm": 15.1875, "learning_rate": 4.06408134177436e-06, "loss": 0.3373, "step": 3384 }, { "epoch": 1.6642084562438546, "grad_norm": 43.75, "learning_rate": 4.061653752489915e-06, "loss": 0.3378, "step": 3385 }, { "epoch": 1.6647000983284168, "grad_norm": 13.0, "learning_rate": 4.059226291988983e-06, "loss": 0.1849, "step": 3386 }, { "epoch": 1.6651917404129795, "grad_norm": 21.75, "learning_rate": 4.056798960984741e-06, "loss": 0.2777, "step": 3387 }, { "epoch": 1.6656833824975417, "grad_norm": 14.1875, "learning_rate": 4.054371760190323e-06, "loss": 0.3684, "step": 3388 }, { "epoch": 1.6661750245821043, "grad_norm": 48.0, "learning_rate": 4.051944690318826e-06, "loss": 0.4199, "step": 3389 }, { "epoch": 1.6666666666666665, "grad_norm": 10.5625, "learning_rate": 4.049517752083308e-06, "loss": 0.1791, "step": 3390 }, { "epoch": 1.6671583087512292, "grad_norm": 22.25, "learning_rate": 4.04709094619679e-06, "loss": 0.2997, "step": 3391 }, { "epoch": 1.6676499508357916, "grad_norm": 43.0, "learning_rate": 4.044664273372252e-06, "loss": 0.4475, "step": 3392 }, { "epoch": 1.668141592920354, "grad_norm": 13.8125, "learning_rate": 4.042237734322636e-06, "loss": 0.1702, "step": 3393 }, { "epoch": 1.6686332350049164, "grad_norm": 15.9375, "learning_rate": 4.039811329760846e-06, "loss": 0.1422, "step": 3394 }, { "epoch": 1.6691248770894789, "grad_norm": 27.875, "learning_rate": 4.0373850603997454e-06, "loss": 0.3133, "step": 3395 }, { "epoch": 1.6696165191740413, "grad_norm": 16.875, "learning_rate": 4.034958926952156e-06, "loss": 0.0734, "step": 3396 }, { "epoch": 1.6701081612586037, "grad_norm": 16.5, "learning_rate": 4.032532930130863e-06, "loss": 0.3113, "step": 3397 }, { "epoch": 1.6705998033431662, "grad_norm": 35.25, "learning_rate": 4.030107070648608e-06, "loss": 0.3258, "step": 3398 }, { "epoch": 1.6710914454277286, "grad_norm": 14.25, "learning_rate": 4.027681349218095e-06, "loss": 0.3315, "step": 3399 }, { "epoch": 1.671583087512291, "grad_norm": 19.0, "learning_rate": 4.025255766551986e-06, "loss": 0.1077, "step": 3400 }, { "epoch": 1.6720747295968534, "grad_norm": 10.125, "learning_rate": 4.022830323362904e-06, "loss": 0.2293, "step": 3401 }, { "epoch": 1.672566371681416, "grad_norm": 23.375, "learning_rate": 4.02040502036343e-06, "loss": 0.2248, "step": 3402 }, { "epoch": 1.6730580137659783, "grad_norm": 16.125, "learning_rate": 4.017979858266102e-06, "loss": 0.2598, "step": 3403 }, { "epoch": 1.673549655850541, "grad_norm": 45.75, "learning_rate": 4.015554837783418e-06, "loss": 0.4383, "step": 3404 }, { "epoch": 1.6740412979351031, "grad_norm": 16.5, "learning_rate": 4.013129959627836e-06, "loss": 0.345, "step": 3405 }, { "epoch": 1.6745329400196658, "grad_norm": 12.9375, "learning_rate": 4.01070522451177e-06, "loss": 0.1614, "step": 3406 }, { "epoch": 1.675024582104228, "grad_norm": 25.625, "learning_rate": 4.008280633147593e-06, "loss": 0.4504, "step": 3407 }, { "epoch": 1.6755162241887906, "grad_norm": 29.375, "learning_rate": 4.0058561862476345e-06, "loss": 0.4158, "step": 3408 }, { "epoch": 1.6760078662733529, "grad_norm": 11.875, "learning_rate": 4.0034318845241835e-06, "loss": 0.3475, "step": 3409 }, { "epoch": 1.6764995083579155, "grad_norm": 19.75, "learning_rate": 4.001007728689485e-06, "loss": 0.3145, "step": 3410 }, { "epoch": 1.676991150442478, "grad_norm": 30.25, "learning_rate": 3.998583719455743e-06, "loss": 0.4626, "step": 3411 }, { "epoch": 1.6774827925270404, "grad_norm": 51.75, "learning_rate": 3.996159857535115e-06, "loss": 0.5282, "step": 3412 }, { "epoch": 1.6779744346116028, "grad_norm": 22.75, "learning_rate": 3.993736143639718e-06, "loss": 0.3223, "step": 3413 }, { "epoch": 1.6784660766961652, "grad_norm": 29.375, "learning_rate": 3.991312578481625e-06, "loss": 0.3333, "step": 3414 }, { "epoch": 1.6789577187807276, "grad_norm": 14.375, "learning_rate": 3.988889162772863e-06, "loss": 0.2111, "step": 3415 }, { "epoch": 1.67944936086529, "grad_norm": 44.5, "learning_rate": 3.986465897225419e-06, "loss": 0.5146, "step": 3416 }, { "epoch": 1.6799410029498525, "grad_norm": 24.375, "learning_rate": 3.984042782551235e-06, "loss": 0.3536, "step": 3417 }, { "epoch": 1.680432645034415, "grad_norm": 26.125, "learning_rate": 3.9816198194622065e-06, "loss": 0.21, "step": 3418 }, { "epoch": 1.6809242871189773, "grad_norm": 31.625, "learning_rate": 3.979197008670185e-06, "loss": 0.5113, "step": 3419 }, { "epoch": 1.6814159292035398, "grad_norm": 27.375, "learning_rate": 3.976774350886979e-06, "loss": 0.3112, "step": 3420 }, { "epoch": 1.6819075712881024, "grad_norm": 23.875, "learning_rate": 3.974351846824349e-06, "loss": 0.2136, "step": 3421 }, { "epoch": 1.6823992133726646, "grad_norm": 43.25, "learning_rate": 3.971929497194013e-06, "loss": 0.3996, "step": 3422 }, { "epoch": 1.6828908554572273, "grad_norm": 34.75, "learning_rate": 3.969507302707644e-06, "loss": 0.4938, "step": 3423 }, { "epoch": 1.6833824975417895, "grad_norm": 23.375, "learning_rate": 3.967085264076869e-06, "loss": 0.3459, "step": 3424 }, { "epoch": 1.6838741396263521, "grad_norm": 24.25, "learning_rate": 3.964663382013265e-06, "loss": 0.337, "step": 3425 }, { "epoch": 1.6843657817109143, "grad_norm": 18.875, "learning_rate": 3.962241657228371e-06, "loss": 0.2804, "step": 3426 }, { "epoch": 1.684857423795477, "grad_norm": 15.875, "learning_rate": 3.959820090433672e-06, "loss": 0.2414, "step": 3427 }, { "epoch": 1.6853490658800392, "grad_norm": 34.5, "learning_rate": 3.957398682340611e-06, "loss": 0.3215, "step": 3428 }, { "epoch": 1.6858407079646018, "grad_norm": 19.25, "learning_rate": 3.954977433660582e-06, "loss": 0.247, "step": 3429 }, { "epoch": 1.6863323500491643, "grad_norm": 17.875, "learning_rate": 3.952556345104935e-06, "loss": 0.2351, "step": 3430 }, { "epoch": 1.6868239921337267, "grad_norm": 24.25, "learning_rate": 3.950135417384971e-06, "loss": 0.3912, "step": 3431 }, { "epoch": 1.6873156342182891, "grad_norm": 15.1875, "learning_rate": 3.947714651211945e-06, "loss": 0.3006, "step": 3432 }, { "epoch": 1.6878072763028515, "grad_norm": 23.125, "learning_rate": 3.945294047297063e-06, "loss": 0.3137, "step": 3433 }, { "epoch": 1.688298918387414, "grad_norm": 22.875, "learning_rate": 3.942873606351484e-06, "loss": 0.3499, "step": 3434 }, { "epoch": 1.6887905604719764, "grad_norm": 35.0, "learning_rate": 3.940453329086318e-06, "loss": 0.3111, "step": 3435 }, { "epoch": 1.6892822025565388, "grad_norm": 14.0, "learning_rate": 3.9380332162126294e-06, "loss": 0.2391, "step": 3436 }, { "epoch": 1.6897738446411013, "grad_norm": 44.0, "learning_rate": 3.93561326844143e-06, "loss": 0.354, "step": 3437 }, { "epoch": 1.6902654867256637, "grad_norm": 18.75, "learning_rate": 3.9331934864836916e-06, "loss": 0.2188, "step": 3438 }, { "epoch": 1.690757128810226, "grad_norm": 37.25, "learning_rate": 3.930773871050328e-06, "loss": 0.4274, "step": 3439 }, { "epoch": 1.6912487708947888, "grad_norm": 17.25, "learning_rate": 3.928354422852208e-06, "loss": 0.3202, "step": 3440 }, { "epoch": 1.691740412979351, "grad_norm": 28.875, "learning_rate": 3.925935142600152e-06, "loss": 0.3687, "step": 3441 }, { "epoch": 1.6922320550639136, "grad_norm": 18.375, "learning_rate": 3.92351603100493e-06, "loss": 0.3426, "step": 3442 }, { "epoch": 1.6927236971484758, "grad_norm": 13.375, "learning_rate": 3.92109708877726e-06, "loss": 0.1689, "step": 3443 }, { "epoch": 1.6932153392330385, "grad_norm": 23.5, "learning_rate": 3.918678316627817e-06, "loss": 0.3241, "step": 3444 }, { "epoch": 1.6937069813176007, "grad_norm": 17.25, "learning_rate": 3.916259715267219e-06, "loss": 0.3394, "step": 3445 }, { "epoch": 1.6941986234021633, "grad_norm": 15.0, "learning_rate": 3.913841285406038e-06, "loss": 0.2913, "step": 3446 }, { "epoch": 1.6946902654867255, "grad_norm": 18.75, "learning_rate": 3.911423027754794e-06, "loss": 0.3027, "step": 3447 }, { "epoch": 1.6951819075712882, "grad_norm": 12.1875, "learning_rate": 3.909004943023956e-06, "loss": 0.1477, "step": 3448 }, { "epoch": 1.6956735496558504, "grad_norm": 15.3125, "learning_rate": 3.906587031923943e-06, "loss": 0.2955, "step": 3449 }, { "epoch": 1.696165191740413, "grad_norm": 65.5, "learning_rate": 3.904169295165124e-06, "loss": 0.6297, "step": 3450 }, { "epoch": 1.6966568338249755, "grad_norm": 11.75, "learning_rate": 3.901751733457815e-06, "loss": 0.2769, "step": 3451 }, { "epoch": 1.6971484759095379, "grad_norm": 17.75, "learning_rate": 3.89933434751228e-06, "loss": 0.3283, "step": 3452 }, { "epoch": 1.6976401179941003, "grad_norm": 30.75, "learning_rate": 3.896917138038733e-06, "loss": 0.3525, "step": 3453 }, { "epoch": 1.6981317600786627, "grad_norm": 23.625, "learning_rate": 3.8945001057473384e-06, "loss": 0.2348, "step": 3454 }, { "epoch": 1.6986234021632252, "grad_norm": 14.9375, "learning_rate": 3.892083251348203e-06, "loss": 0.2631, "step": 3455 }, { "epoch": 1.6991150442477876, "grad_norm": 13.5625, "learning_rate": 3.889666575551386e-06, "loss": 0.3318, "step": 3456 }, { "epoch": 1.69960668633235, "grad_norm": 31.875, "learning_rate": 3.887250079066891e-06, "loss": 0.338, "step": 3457 }, { "epoch": 1.7000983284169124, "grad_norm": 41.25, "learning_rate": 3.8848337626046705e-06, "loss": 0.6121, "step": 3458 }, { "epoch": 1.700589970501475, "grad_norm": 36.0, "learning_rate": 3.882417626874623e-06, "loss": 0.388, "step": 3459 }, { "epoch": 1.7010816125860373, "grad_norm": 23.0, "learning_rate": 3.880001672586597e-06, "loss": 0.352, "step": 3460 }, { "epoch": 1.7015732546706, "grad_norm": 31.875, "learning_rate": 3.877585900450384e-06, "loss": 0.4989, "step": 3461 }, { "epoch": 1.7020648967551621, "grad_norm": 21.75, "learning_rate": 3.875170311175725e-06, "loss": 0.3045, "step": 3462 }, { "epoch": 1.7025565388397248, "grad_norm": 36.75, "learning_rate": 3.872754905472304e-06, "loss": 0.4607, "step": 3463 }, { "epoch": 1.703048180924287, "grad_norm": 49.5, "learning_rate": 3.870339684049753e-06, "loss": 0.5189, "step": 3464 }, { "epoch": 1.7035398230088497, "grad_norm": 30.375, "learning_rate": 3.8679246476176505e-06, "loss": 0.4542, "step": 3465 }, { "epoch": 1.7040314650934119, "grad_norm": 20.75, "learning_rate": 3.865509796885519e-06, "loss": 0.3561, "step": 3466 }, { "epoch": 1.7045231071779745, "grad_norm": 30.125, "learning_rate": 3.863095132562825e-06, "loss": 0.4436, "step": 3467 }, { "epoch": 1.7050147492625367, "grad_norm": 33.0, "learning_rate": 3.860680655358988e-06, "loss": 0.2651, "step": 3468 }, { "epoch": 1.7055063913470994, "grad_norm": 15.1875, "learning_rate": 3.858266365983363e-06, "loss": 0.2327, "step": 3469 }, { "epoch": 1.7059980334316618, "grad_norm": 40.0, "learning_rate": 3.855852265145256e-06, "loss": 0.3127, "step": 3470 }, { "epoch": 1.7064896755162242, "grad_norm": 18.125, "learning_rate": 3.853438353553914e-06, "loss": 0.3206, "step": 3471 }, { "epoch": 1.7069813176007866, "grad_norm": 46.25, "learning_rate": 3.8510246319185295e-06, "loss": 0.6867, "step": 3472 }, { "epoch": 1.707472959685349, "grad_norm": 41.5, "learning_rate": 3.848611100948241e-06, "loss": 0.5057, "step": 3473 }, { "epoch": 1.7079646017699115, "grad_norm": 13.5625, "learning_rate": 3.846197761352126e-06, "loss": 0.273, "step": 3474 }, { "epoch": 1.708456243854474, "grad_norm": 43.25, "learning_rate": 3.843784613839214e-06, "loss": 0.6752, "step": 3475 }, { "epoch": 1.7089478859390363, "grad_norm": 49.0, "learning_rate": 3.841371659118473e-06, "loss": 0.5003, "step": 3476 }, { "epoch": 1.7094395280235988, "grad_norm": 27.25, "learning_rate": 3.838958897898811e-06, "loss": 0.4699, "step": 3477 }, { "epoch": 1.7099311701081614, "grad_norm": 33.0, "learning_rate": 3.836546330889085e-06, "loss": 0.3371, "step": 3478 }, { "epoch": 1.7104228121927236, "grad_norm": 16.375, "learning_rate": 3.834133958798094e-06, "loss": 0.2542, "step": 3479 }, { "epoch": 1.7109144542772863, "grad_norm": 55.5, "learning_rate": 3.831721782334577e-06, "loss": 0.6444, "step": 3480 }, { "epoch": 1.7114060963618485, "grad_norm": 29.125, "learning_rate": 3.8293098022072144e-06, "loss": 0.3591, "step": 3481 }, { "epoch": 1.7118977384464111, "grad_norm": 18.875, "learning_rate": 3.826898019124637e-06, "loss": 0.4326, "step": 3482 }, { "epoch": 1.7123893805309733, "grad_norm": 31.625, "learning_rate": 3.82448643379541e-06, "loss": 0.6798, "step": 3483 }, { "epoch": 1.712881022615536, "grad_norm": 28.0, "learning_rate": 3.822075046928043e-06, "loss": 0.1631, "step": 3484 }, { "epoch": 1.7133726647000982, "grad_norm": 35.75, "learning_rate": 3.819663859230986e-06, "loss": 0.4093, "step": 3485 }, { "epoch": 1.7138643067846608, "grad_norm": 19.875, "learning_rate": 3.817252871412633e-06, "loss": 0.3593, "step": 3486 }, { "epoch": 1.714355948869223, "grad_norm": 31.0, "learning_rate": 3.814842084181315e-06, "loss": 0.3108, "step": 3487 }, { "epoch": 1.7148475909537857, "grad_norm": 28.5, "learning_rate": 3.8124314982453116e-06, "loss": 0.3021, "step": 3488 }, { "epoch": 1.7153392330383481, "grad_norm": 27.0, "learning_rate": 3.8100211143128343e-06, "loss": 0.2769, "step": 3489 }, { "epoch": 1.7158308751229105, "grad_norm": 20.375, "learning_rate": 3.807610933092042e-06, "loss": 0.3186, "step": 3490 }, { "epoch": 1.716322517207473, "grad_norm": 23.125, "learning_rate": 3.805200955291031e-06, "loss": 0.3226, "step": 3491 }, { "epoch": 1.7168141592920354, "grad_norm": 18.375, "learning_rate": 3.8027911816178376e-06, "loss": 0.4446, "step": 3492 }, { "epoch": 1.7173058013765978, "grad_norm": 13.8125, "learning_rate": 3.8003816127804403e-06, "loss": 0.2311, "step": 3493 }, { "epoch": 1.7177974434611603, "grad_norm": 11.5625, "learning_rate": 3.797972249486755e-06, "loss": 0.2982, "step": 3494 }, { "epoch": 1.7182890855457227, "grad_norm": 16.5, "learning_rate": 3.7955630924446394e-06, "loss": 0.2231, "step": 3495 }, { "epoch": 1.718780727630285, "grad_norm": 8.25, "learning_rate": 3.793154142361887e-06, "loss": 0.1266, "step": 3496 }, { "epoch": 1.7192723697148475, "grad_norm": 41.25, "learning_rate": 3.790745399946235e-06, "loss": 0.389, "step": 3497 }, { "epoch": 1.71976401179941, "grad_norm": 32.25, "learning_rate": 3.788336865905359e-06, "loss": 0.5218, "step": 3498 }, { "epoch": 1.7202556538839726, "grad_norm": 18.875, "learning_rate": 3.785928540946869e-06, "loss": 0.1951, "step": 3499 }, { "epoch": 1.7207472959685348, "grad_norm": 25.375, "learning_rate": 3.783520425778319e-06, "loss": 0.3083, "step": 3500 }, { "epoch": 1.7212389380530975, "grad_norm": 23.125, "learning_rate": 3.7811125211071964e-06, "loss": 0.3227, "step": 3501 }, { "epoch": 1.7217305801376597, "grad_norm": 16.25, "learning_rate": 3.778704827640931e-06, "loss": 0.2483, "step": 3502 }, { "epoch": 1.7222222222222223, "grad_norm": 13.8125, "learning_rate": 3.7762973460868857e-06, "loss": 0.2916, "step": 3503 }, { "epoch": 1.7227138643067845, "grad_norm": 34.5, "learning_rate": 3.77389007715237e-06, "loss": 0.4591, "step": 3504 }, { "epoch": 1.7232055063913472, "grad_norm": 23.875, "learning_rate": 3.771483021544621e-06, "loss": 0.2864, "step": 3505 }, { "epoch": 1.7236971484759094, "grad_norm": 18.25, "learning_rate": 3.7690761799708185e-06, "loss": 0.3496, "step": 3506 }, { "epoch": 1.724188790560472, "grad_norm": 56.0, "learning_rate": 3.7666695531380777e-06, "loss": 0.5109, "step": 3507 }, { "epoch": 1.7246804326450345, "grad_norm": 22.375, "learning_rate": 3.7642631417534516e-06, "loss": 0.3601, "step": 3508 }, { "epoch": 1.7251720747295969, "grad_norm": 85.5, "learning_rate": 3.7618569465239295e-06, "loss": 0.4042, "step": 3509 }, { "epoch": 1.7256637168141593, "grad_norm": 32.75, "learning_rate": 3.7594509681564374e-06, "loss": 0.2807, "step": 3510 }, { "epoch": 1.7261553588987217, "grad_norm": 20.25, "learning_rate": 3.7570452073578345e-06, "loss": 0.2982, "step": 3511 }, { "epoch": 1.7266470009832842, "grad_norm": 14.5, "learning_rate": 3.7546396648349237e-06, "loss": 0.2108, "step": 3512 }, { "epoch": 1.7271386430678466, "grad_norm": 15.8125, "learning_rate": 3.7522343412944373e-06, "loss": 0.2589, "step": 3513 }, { "epoch": 1.727630285152409, "grad_norm": 13.5, "learning_rate": 3.749829237443044e-06, "loss": 0.2252, "step": 3514 }, { "epoch": 1.7281219272369714, "grad_norm": 22.125, "learning_rate": 3.7474243539873504e-06, "loss": 0.4068, "step": 3515 }, { "epoch": 1.7286135693215339, "grad_norm": 26.0, "learning_rate": 3.745019691633896e-06, "loss": 0.3371, "step": 3516 }, { "epoch": 1.7291052114060963, "grad_norm": 35.25, "learning_rate": 3.7426152510891565e-06, "loss": 0.3752, "step": 3517 }, { "epoch": 1.729596853490659, "grad_norm": 22.25, "learning_rate": 3.7402110330595395e-06, "loss": 0.4172, "step": 3518 }, { "epoch": 1.7300884955752212, "grad_norm": 16.375, "learning_rate": 3.7378070382513955e-06, "loss": 0.2481, "step": 3519 }, { "epoch": 1.7305801376597838, "grad_norm": 43.75, "learning_rate": 3.7354032673710004e-06, "loss": 0.6463, "step": 3520 }, { "epoch": 1.731071779744346, "grad_norm": 18.125, "learning_rate": 3.732999721124568e-06, "loss": 0.2027, "step": 3521 }, { "epoch": 1.7315634218289087, "grad_norm": 11.1875, "learning_rate": 3.730596400218247e-06, "loss": 0.2481, "step": 3522 }, { "epoch": 1.7320550639134709, "grad_norm": 12.25, "learning_rate": 3.728193305358117e-06, "loss": 0.2224, "step": 3523 }, { "epoch": 1.7325467059980335, "grad_norm": 19.0, "learning_rate": 3.725790437250194e-06, "loss": 0.2415, "step": 3524 }, { "epoch": 1.7330383480825957, "grad_norm": 41.5, "learning_rate": 3.7233877966004244e-06, "loss": 0.5386, "step": 3525 }, { "epoch": 1.7335299901671584, "grad_norm": 41.25, "learning_rate": 3.7209853841146937e-06, "loss": 0.3007, "step": 3526 }, { "epoch": 1.7340216322517208, "grad_norm": 25.75, "learning_rate": 3.718583200498814e-06, "loss": 0.3304, "step": 3527 }, { "epoch": 1.7345132743362832, "grad_norm": 21.25, "learning_rate": 3.7161812464585324e-06, "loss": 0.2358, "step": 3528 }, { "epoch": 1.7350049164208456, "grad_norm": 22.375, "learning_rate": 3.7137795226995293e-06, "loss": 0.1679, "step": 3529 }, { "epoch": 1.735496558505408, "grad_norm": 16.5, "learning_rate": 3.711378029927416e-06, "loss": 0.426, "step": 3530 }, { "epoch": 1.7359882005899705, "grad_norm": 12.375, "learning_rate": 3.7089767688477364e-06, "loss": 0.1574, "step": 3531 }, { "epoch": 1.736479842674533, "grad_norm": 15.625, "learning_rate": 3.7065757401659684e-06, "loss": 0.3232, "step": 3532 }, { "epoch": 1.7369714847590954, "grad_norm": 16.75, "learning_rate": 3.7041749445875188e-06, "loss": 0.2849, "step": 3533 }, { "epoch": 1.7374631268436578, "grad_norm": 18.25, "learning_rate": 3.701774382817727e-06, "loss": 0.2019, "step": 3534 }, { "epoch": 1.7379547689282202, "grad_norm": 30.25, "learning_rate": 3.699374055561865e-06, "loss": 0.2534, "step": 3535 }, { "epoch": 1.7384464110127826, "grad_norm": 15.75, "learning_rate": 3.6969739635251325e-06, "loss": 0.3734, "step": 3536 }, { "epoch": 1.7389380530973453, "grad_norm": 17.875, "learning_rate": 3.6945741074126647e-06, "loss": 0.318, "step": 3537 }, { "epoch": 1.7394296951819075, "grad_norm": 20.125, "learning_rate": 3.6921744879295237e-06, "loss": 0.3054, "step": 3538 }, { "epoch": 1.7399213372664701, "grad_norm": 44.75, "learning_rate": 3.6897751057807027e-06, "loss": 0.5638, "step": 3539 }, { "epoch": 1.7404129793510323, "grad_norm": 26.0, "learning_rate": 3.687375961671126e-06, "loss": 0.403, "step": 3540 }, { "epoch": 1.740904621435595, "grad_norm": 31.75, "learning_rate": 3.6849770563056485e-06, "loss": 0.4285, "step": 3541 }, { "epoch": 1.7413962635201572, "grad_norm": 22.625, "learning_rate": 3.6825783903890552e-06, "loss": 0.3642, "step": 3542 }, { "epoch": 1.7418879056047198, "grad_norm": 38.0, "learning_rate": 3.680179964626059e-06, "loss": 0.5722, "step": 3543 }, { "epoch": 1.742379547689282, "grad_norm": 24.375, "learning_rate": 3.6777817797213026e-06, "loss": 0.3395, "step": 3544 }, { "epoch": 1.7428711897738447, "grad_norm": 24.375, "learning_rate": 3.675383836379359e-06, "loss": 0.3141, "step": 3545 }, { "epoch": 1.7433628318584071, "grad_norm": 31.5, "learning_rate": 3.67298613530473e-06, "loss": 0.4405, "step": 3546 }, { "epoch": 1.7438544739429696, "grad_norm": 30.0, "learning_rate": 3.6705886772018445e-06, "loss": 0.5308, "step": 3547 }, { "epoch": 1.744346116027532, "grad_norm": 12.375, "learning_rate": 3.668191462775061e-06, "loss": 0.0501, "step": 3548 }, { "epoch": 1.7448377581120944, "grad_norm": 23.75, "learning_rate": 3.66579449272867e-06, "loss": 0.298, "step": 3549 }, { "epoch": 1.7453294001966568, "grad_norm": 24.375, "learning_rate": 3.6633977677668846e-06, "loss": 0.265, "step": 3550 }, { "epoch": 1.7458210422812193, "grad_norm": 20.5, "learning_rate": 3.661001288593849e-06, "loss": 0.2803, "step": 3551 }, { "epoch": 1.7463126843657817, "grad_norm": 22.5, "learning_rate": 3.658605055913634e-06, "loss": 0.2776, "step": 3552 }, { "epoch": 1.7468043264503441, "grad_norm": 19.0, "learning_rate": 3.6562090704302393e-06, "loss": 0.2271, "step": 3553 }, { "epoch": 1.7472959685349065, "grad_norm": 21.375, "learning_rate": 3.6538133328475904e-06, "loss": 0.4012, "step": 3554 }, { "epoch": 1.747787610619469, "grad_norm": 33.5, "learning_rate": 3.6514178438695394e-06, "loss": 0.366, "step": 3555 }, { "epoch": 1.7482792527040316, "grad_norm": 35.5, "learning_rate": 3.6490226041998692e-06, "loss": 0.4326, "step": 3556 }, { "epoch": 1.7487708947885938, "grad_norm": 19.0, "learning_rate": 3.6466276145422865e-06, "loss": 0.2377, "step": 3557 }, { "epoch": 1.7492625368731565, "grad_norm": 25.75, "learning_rate": 3.644232875600425e-06, "loss": 0.4232, "step": 3558 }, { "epoch": 1.7497541789577187, "grad_norm": 37.0, "learning_rate": 3.641838388077844e-06, "loss": 0.3249, "step": 3559 }, { "epoch": 1.7502458210422813, "grad_norm": 13.875, "learning_rate": 3.6394441526780294e-06, "loss": 0.3044, "step": 3560 }, { "epoch": 1.7507374631268435, "grad_norm": 20.375, "learning_rate": 3.6370501701043936e-06, "loss": 0.3926, "step": 3561 }, { "epoch": 1.7512291052114062, "grad_norm": 15.875, "learning_rate": 3.634656441060273e-06, "loss": 0.2583, "step": 3562 }, { "epoch": 1.7517207472959684, "grad_norm": 27.125, "learning_rate": 3.632262966248934e-06, "loss": 0.3579, "step": 3563 }, { "epoch": 1.752212389380531, "grad_norm": 59.75, "learning_rate": 3.6298697463735643e-06, "loss": 0.3076, "step": 3564 }, { "epoch": 1.7527040314650935, "grad_norm": 37.25, "learning_rate": 3.627476782137277e-06, "loss": 0.4862, "step": 3565 }, { "epoch": 1.7531956735496559, "grad_norm": 16.875, "learning_rate": 3.6250840742431106e-06, "loss": 0.4392, "step": 3566 }, { "epoch": 1.7536873156342183, "grad_norm": 26.0, "learning_rate": 3.6226916233940286e-06, "loss": 0.2847, "step": 3567 }, { "epoch": 1.7541789577187807, "grad_norm": 19.875, "learning_rate": 3.620299430292919e-06, "loss": 0.3985, "step": 3568 }, { "epoch": 1.7546705998033432, "grad_norm": 24.375, "learning_rate": 3.6179074956425934e-06, "loss": 0.3601, "step": 3569 }, { "epoch": 1.7551622418879056, "grad_norm": 8.0625, "learning_rate": 3.6155158201457864e-06, "loss": 0.0844, "step": 3570 }, { "epoch": 1.755653883972468, "grad_norm": 11.625, "learning_rate": 3.6131244045051618e-06, "loss": 0.1269, "step": 3571 }, { "epoch": 1.7561455260570304, "grad_norm": 40.0, "learning_rate": 3.610733249423301e-06, "loss": 0.2076, "step": 3572 }, { "epoch": 1.7566371681415929, "grad_norm": 16.5, "learning_rate": 3.608342355602712e-06, "loss": 0.2571, "step": 3573 }, { "epoch": 1.7571288102261553, "grad_norm": 29.0, "learning_rate": 3.6059517237458237e-06, "loss": 0.3161, "step": 3574 }, { "epoch": 1.757620452310718, "grad_norm": 20.75, "learning_rate": 3.60356135455499e-06, "loss": 0.3311, "step": 3575 }, { "epoch": 1.7581120943952802, "grad_norm": 28.125, "learning_rate": 3.6011712487324885e-06, "loss": 0.2477, "step": 3576 }, { "epoch": 1.7586037364798428, "grad_norm": 14.6875, "learning_rate": 3.5987814069805154e-06, "loss": 0.1776, "step": 3577 }, { "epoch": 1.759095378564405, "grad_norm": 22.875, "learning_rate": 3.5963918300011954e-06, "loss": 0.2875, "step": 3578 }, { "epoch": 1.7595870206489677, "grad_norm": 19.75, "learning_rate": 3.594002518496569e-06, "loss": 0.1597, "step": 3579 }, { "epoch": 1.7600786627335299, "grad_norm": 29.75, "learning_rate": 3.591613473168603e-06, "loss": 0.5403, "step": 3580 }, { "epoch": 1.7605703048180925, "grad_norm": 31.125, "learning_rate": 3.589224694719185e-06, "loss": 0.478, "step": 3581 }, { "epoch": 1.7610619469026547, "grad_norm": 24.25, "learning_rate": 3.586836183850124e-06, "loss": 0.2536, "step": 3582 }, { "epoch": 1.7615535889872174, "grad_norm": 17.875, "learning_rate": 3.5844479412631487e-06, "loss": 0.249, "step": 3583 }, { "epoch": 1.7620452310717798, "grad_norm": 31.25, "learning_rate": 3.5820599676599112e-06, "loss": 0.3188, "step": 3584 }, { "epoch": 1.7625368731563422, "grad_norm": 22.5, "learning_rate": 3.579672263741984e-06, "loss": 0.2773, "step": 3585 }, { "epoch": 1.7630285152409046, "grad_norm": 28.5, "learning_rate": 3.5772848302108607e-06, "loss": 0.4352, "step": 3586 }, { "epoch": 1.763520157325467, "grad_norm": 26.625, "learning_rate": 3.5748976677679556e-06, "loss": 0.4469, "step": 3587 }, { "epoch": 1.7640117994100295, "grad_norm": 37.25, "learning_rate": 3.5725107771146016e-06, "loss": 0.4689, "step": 3588 }, { "epoch": 1.764503441494592, "grad_norm": 25.375, "learning_rate": 3.5701241589520537e-06, "loss": 0.2958, "step": 3589 }, { "epoch": 1.7649950835791544, "grad_norm": 16.25, "learning_rate": 3.5677378139814855e-06, "loss": 0.2744, "step": 3590 }, { "epoch": 1.7654867256637168, "grad_norm": 23.625, "learning_rate": 3.565351742903991e-06, "loss": 0.2718, "step": 3591 }, { "epoch": 1.7659783677482792, "grad_norm": 15.3125, "learning_rate": 3.5629659464205823e-06, "loss": 0.154, "step": 3592 }, { "epoch": 1.7664700098328416, "grad_norm": 28.875, "learning_rate": 3.5605804252321957e-06, "loss": 0.2808, "step": 3593 }, { "epoch": 1.7669616519174043, "grad_norm": 10.625, "learning_rate": 3.5581951800396813e-06, "loss": 0.3446, "step": 3594 }, { "epoch": 1.7674532940019665, "grad_norm": 18.375, "learning_rate": 3.5558102115438093e-06, "loss": 0.2956, "step": 3595 }, { "epoch": 1.7679449360865291, "grad_norm": 25.0, "learning_rate": 3.55342552044527e-06, "loss": 0.4546, "step": 3596 }, { "epoch": 1.7684365781710913, "grad_norm": 40.5, "learning_rate": 3.551041107444671e-06, "loss": 0.4904, "step": 3597 }, { "epoch": 1.768928220255654, "grad_norm": 15.5625, "learning_rate": 3.5486569732425387e-06, "loss": 0.28, "step": 3598 }, { "epoch": 1.7694198623402162, "grad_norm": 33.5, "learning_rate": 3.546273118539315e-06, "loss": 0.3292, "step": 3599 }, { "epoch": 1.7699115044247788, "grad_norm": 24.125, "learning_rate": 3.5438895440353677e-06, "loss": 0.1641, "step": 3600 }, { "epoch": 1.770403146509341, "grad_norm": 16.25, "learning_rate": 3.5415062504309737e-06, "loss": 0.2035, "step": 3601 }, { "epoch": 1.7708947885939037, "grad_norm": 14.9375, "learning_rate": 3.5391232384263303e-06, "loss": 0.2231, "step": 3602 }, { "epoch": 1.7713864306784661, "grad_norm": 21.625, "learning_rate": 3.5367405087215526e-06, "loss": 0.4172, "step": 3603 }, { "epoch": 1.7718780727630286, "grad_norm": 22.375, "learning_rate": 3.5343580620166723e-06, "loss": 0.2305, "step": 3604 }, { "epoch": 1.772369714847591, "grad_norm": 18.75, "learning_rate": 3.531975899011638e-06, "loss": 0.3624, "step": 3605 }, { "epoch": 1.7728613569321534, "grad_norm": 15.0625, "learning_rate": 3.529594020406313e-06, "loss": 0.3008, "step": 3606 }, { "epoch": 1.7733529990167158, "grad_norm": 15.625, "learning_rate": 3.527212426900483e-06, "loss": 0.2254, "step": 3607 }, { "epoch": 1.7738446411012783, "grad_norm": 22.375, "learning_rate": 3.5248311191938444e-06, "loss": 0.2163, "step": 3608 }, { "epoch": 1.7743362831858407, "grad_norm": 19.0, "learning_rate": 3.5224500979860106e-06, "loss": 0.2354, "step": 3609 }, { "epoch": 1.7748279252704031, "grad_norm": 20.375, "learning_rate": 3.520069363976512e-06, "loss": 0.1366, "step": 3610 }, { "epoch": 1.7753195673549655, "grad_norm": 16.875, "learning_rate": 3.5176889178647937e-06, "loss": 0.1883, "step": 3611 }, { "epoch": 1.775811209439528, "grad_norm": 23.25, "learning_rate": 3.5153087603502164e-06, "loss": 0.3251, "step": 3612 }, { "epoch": 1.7763028515240906, "grad_norm": 33.0, "learning_rate": 3.5129288921320567e-06, "loss": 0.2576, "step": 3613 }, { "epoch": 1.7767944936086528, "grad_norm": 32.5, "learning_rate": 3.5105493139095035e-06, "loss": 0.6045, "step": 3614 }, { "epoch": 1.7772861356932155, "grad_norm": 18.375, "learning_rate": 3.5081700263816667e-06, "loss": 0.157, "step": 3615 }, { "epoch": 1.7777777777777777, "grad_norm": 25.25, "learning_rate": 3.5057910302475643e-06, "loss": 0.3742, "step": 3616 }, { "epoch": 1.7782694198623403, "grad_norm": 23.5, "learning_rate": 3.5034123262061316e-06, "loss": 0.4274, "step": 3617 }, { "epoch": 1.7787610619469025, "grad_norm": 20.125, "learning_rate": 3.5010339149562185e-06, "loss": 0.3757, "step": 3618 }, { "epoch": 1.7792527040314652, "grad_norm": 55.25, "learning_rate": 3.4986557971965855e-06, "loss": 0.2957, "step": 3619 }, { "epoch": 1.7797443461160274, "grad_norm": 12.875, "learning_rate": 3.496277973625913e-06, "loss": 0.179, "step": 3620 }, { "epoch": 1.78023598820059, "grad_norm": 40.5, "learning_rate": 3.493900444942787e-06, "loss": 0.6519, "step": 3621 }, { "epoch": 1.7807276302851525, "grad_norm": 21.5, "learning_rate": 3.4915232118457166e-06, "loss": 0.2568, "step": 3622 }, { "epoch": 1.781219272369715, "grad_norm": 42.25, "learning_rate": 3.4891462750331148e-06, "loss": 0.4009, "step": 3623 }, { "epoch": 1.7817109144542773, "grad_norm": 14.5, "learning_rate": 3.486769635203312e-06, "loss": 0.2145, "step": 3624 }, { "epoch": 1.7822025565388397, "grad_norm": 42.0, "learning_rate": 3.4843932930545525e-06, "loss": 0.4428, "step": 3625 }, { "epoch": 1.7826941986234022, "grad_norm": 24.125, "learning_rate": 3.48201724928499e-06, "loss": 0.1752, "step": 3626 }, { "epoch": 1.7831858407079646, "grad_norm": 37.75, "learning_rate": 3.4796415045926926e-06, "loss": 0.4532, "step": 3627 }, { "epoch": 1.783677482792527, "grad_norm": 25.125, "learning_rate": 3.4772660596756387e-06, "loss": 0.3208, "step": 3628 }, { "epoch": 1.7841691248770895, "grad_norm": 29.625, "learning_rate": 3.474890915231721e-06, "loss": 0.2796, "step": 3629 }, { "epoch": 1.7846607669616519, "grad_norm": 28.5, "learning_rate": 3.4725160719587436e-06, "loss": 0.4075, "step": 3630 }, { "epoch": 1.7851524090462143, "grad_norm": 17.5, "learning_rate": 3.47014153055442e-06, "loss": 0.2959, "step": 3631 }, { "epoch": 1.785644051130777, "grad_norm": 32.5, "learning_rate": 3.467767291716377e-06, "loss": 0.3813, "step": 3632 }, { "epoch": 1.7861356932153392, "grad_norm": 25.5, "learning_rate": 3.465393356142152e-06, "loss": 0.2676, "step": 3633 }, { "epoch": 1.7866273352999018, "grad_norm": 17.75, "learning_rate": 3.463019724529192e-06, "loss": 0.2504, "step": 3634 }, { "epoch": 1.787118977384464, "grad_norm": 35.0, "learning_rate": 3.4606463975748563e-06, "loss": 0.3397, "step": 3635 }, { "epoch": 1.7876106194690267, "grad_norm": 30.5, "learning_rate": 3.4582733759764126e-06, "loss": 0.3872, "step": 3636 }, { "epoch": 1.7881022615535889, "grad_norm": 35.25, "learning_rate": 3.4559006604310446e-06, "loss": 0.4915, "step": 3637 }, { "epoch": 1.7885939036381515, "grad_norm": 31.125, "learning_rate": 3.4535282516358386e-06, "loss": 0.465, "step": 3638 }, { "epoch": 1.7890855457227137, "grad_norm": 14.25, "learning_rate": 3.451156150287796e-06, "loss": 0.2491, "step": 3639 }, { "epoch": 1.7895771878072764, "grad_norm": 37.75, "learning_rate": 3.4487843570838247e-06, "loss": 0.4015, "step": 3640 }, { "epoch": 1.7900688298918386, "grad_norm": 18.5, "learning_rate": 3.4464128727207444e-06, "loss": 0.2419, "step": 3641 }, { "epoch": 1.7905604719764012, "grad_norm": 23.25, "learning_rate": 3.4440416978952824e-06, "loss": 0.3228, "step": 3642 }, { "epoch": 1.7910521140609637, "grad_norm": 13.8125, "learning_rate": 3.441670833304074e-06, "loss": 0.1951, "step": 3643 }, { "epoch": 1.791543756145526, "grad_norm": 21.875, "learning_rate": 3.439300279643669e-06, "loss": 0.2091, "step": 3644 }, { "epoch": 1.7920353982300885, "grad_norm": 63.25, "learning_rate": 3.43693003761052e-06, "loss": 0.4441, "step": 3645 }, { "epoch": 1.792527040314651, "grad_norm": 30.0, "learning_rate": 3.4345601079009897e-06, "loss": 0.2596, "step": 3646 }, { "epoch": 1.7930186823992134, "grad_norm": 39.75, "learning_rate": 3.4321904912113502e-06, "loss": 0.6783, "step": 3647 }, { "epoch": 1.7935103244837758, "grad_norm": 27.25, "learning_rate": 3.4298211882377795e-06, "loss": 0.3444, "step": 3648 }, { "epoch": 1.7940019665683382, "grad_norm": 20.25, "learning_rate": 3.4274521996763657e-06, "loss": 0.2846, "step": 3649 }, { "epoch": 1.7944936086529006, "grad_norm": 31.5, "learning_rate": 3.425083526223101e-06, "loss": 0.1983, "step": 3650 }, { "epoch": 1.7949852507374633, "grad_norm": 33.25, "learning_rate": 3.4227151685738917e-06, "loss": 0.1906, "step": 3651 }, { "epoch": 1.7954768928220255, "grad_norm": 30.5, "learning_rate": 3.420347127424545e-06, "loss": 0.4048, "step": 3652 }, { "epoch": 1.7959685349065881, "grad_norm": 27.125, "learning_rate": 3.417979403470778e-06, "loss": 0.2986, "step": 3653 }, { "epoch": 1.7964601769911503, "grad_norm": 21.5, "learning_rate": 3.415611997408213e-06, "loss": 0.3061, "step": 3654 }, { "epoch": 1.796951819075713, "grad_norm": 33.0, "learning_rate": 3.4132449099323795e-06, "loss": 0.2659, "step": 3655 }, { "epoch": 1.7974434611602752, "grad_norm": 29.375, "learning_rate": 3.4108781417387144e-06, "loss": 0.3874, "step": 3656 }, { "epoch": 1.7979351032448379, "grad_norm": 12.5625, "learning_rate": 3.408511693522561e-06, "loss": 0.2547, "step": 3657 }, { "epoch": 1.7984267453294, "grad_norm": 14.375, "learning_rate": 3.4061455659791636e-06, "loss": 0.2932, "step": 3658 }, { "epoch": 1.7989183874139627, "grad_norm": 26.375, "learning_rate": 3.403779759803682e-06, "loss": 0.3237, "step": 3659 }, { "epoch": 1.799410029498525, "grad_norm": 61.75, "learning_rate": 3.4014142756911733e-06, "loss": 0.4538, "step": 3660 }, { "epoch": 1.7999016715830876, "grad_norm": 26.5, "learning_rate": 3.3990491143366025e-06, "loss": 0.4972, "step": 3661 }, { "epoch": 1.80039331366765, "grad_norm": 27.375, "learning_rate": 3.3966842764348406e-06, "loss": 0.3126, "step": 3662 }, { "epoch": 1.8008849557522124, "grad_norm": 28.125, "learning_rate": 3.3943197626806606e-06, "loss": 0.4451, "step": 3663 }, { "epoch": 1.8013765978367748, "grad_norm": 14.375, "learning_rate": 3.391955573768746e-06, "loss": 0.3425, "step": 3664 }, { "epoch": 1.8018682399213373, "grad_norm": 30.5, "learning_rate": 3.3895917103936784e-06, "loss": 0.4797, "step": 3665 }, { "epoch": 1.8023598820058997, "grad_norm": 31.375, "learning_rate": 3.3872281732499496e-06, "loss": 0.2689, "step": 3666 }, { "epoch": 1.8028515240904621, "grad_norm": 26.75, "learning_rate": 3.384864963031951e-06, "loss": 0.1758, "step": 3667 }, { "epoch": 1.8033431661750245, "grad_norm": 12.3125, "learning_rate": 3.3825020804339787e-06, "loss": 0.189, "step": 3668 }, { "epoch": 1.803834808259587, "grad_norm": 20.5, "learning_rate": 3.380139526150236e-06, "loss": 0.252, "step": 3669 }, { "epoch": 1.8043264503441496, "grad_norm": 37.0, "learning_rate": 3.3777773008748265e-06, "loss": 0.451, "step": 3670 }, { "epoch": 1.8048180924287118, "grad_norm": 19.875, "learning_rate": 3.375415405301757e-06, "loss": 0.39, "step": 3671 }, { "epoch": 1.8053097345132745, "grad_norm": 25.875, "learning_rate": 3.3730538401249384e-06, "loss": 0.3615, "step": 3672 }, { "epoch": 1.8058013765978367, "grad_norm": 40.5, "learning_rate": 3.370692606038184e-06, "loss": 0.6415, "step": 3673 }, { "epoch": 1.8062930186823993, "grad_norm": 51.5, "learning_rate": 3.368331703735212e-06, "loss": 0.242, "step": 3674 }, { "epoch": 1.8067846607669615, "grad_norm": 24.25, "learning_rate": 3.3659711339096412e-06, "loss": 0.2897, "step": 3675 }, { "epoch": 1.8072763028515242, "grad_norm": 36.0, "learning_rate": 3.3636108972549915e-06, "loss": 0.4367, "step": 3676 }, { "epoch": 1.8077679449360864, "grad_norm": 15.1875, "learning_rate": 3.361250994464688e-06, "loss": 0.2024, "step": 3677 }, { "epoch": 1.808259587020649, "grad_norm": 33.75, "learning_rate": 3.358891426232055e-06, "loss": 0.3969, "step": 3678 }, { "epoch": 1.8087512291052112, "grad_norm": 13.9375, "learning_rate": 3.3565321932503204e-06, "loss": 0.274, "step": 3679 }, { "epoch": 1.809242871189774, "grad_norm": 13.1875, "learning_rate": 3.354173296212609e-06, "loss": 0.2364, "step": 3680 }, { "epoch": 1.8097345132743363, "grad_norm": 41.25, "learning_rate": 3.3518147358119573e-06, "loss": 0.4605, "step": 3681 }, { "epoch": 1.8102261553588987, "grad_norm": 21.125, "learning_rate": 3.349456512741292e-06, "loss": 0.3692, "step": 3682 }, { "epoch": 1.8107177974434612, "grad_norm": 34.75, "learning_rate": 3.3470986276934463e-06, "loss": 0.4887, "step": 3683 }, { "epoch": 1.8112094395280236, "grad_norm": 25.625, "learning_rate": 3.344741081361152e-06, "loss": 0.3804, "step": 3684 }, { "epoch": 1.811701081612586, "grad_norm": 14.25, "learning_rate": 3.3423838744370433e-06, "loss": 0.1881, "step": 3685 }, { "epoch": 1.8121927236971485, "grad_norm": 18.75, "learning_rate": 3.340027007613652e-06, "loss": 0.2257, "step": 3686 }, { "epoch": 1.8126843657817109, "grad_norm": 37.0, "learning_rate": 3.3376704815834107e-06, "loss": 0.4214, "step": 3687 }, { "epoch": 1.8131760078662733, "grad_norm": 36.75, "learning_rate": 3.335314297038656e-06, "loss": 0.4551, "step": 3688 }, { "epoch": 1.8136676499508357, "grad_norm": 20.5, "learning_rate": 3.3329584546716196e-06, "loss": 0.2543, "step": 3689 }, { "epoch": 1.8141592920353982, "grad_norm": 12.75, "learning_rate": 3.330602955174433e-06, "loss": 0.2523, "step": 3690 }, { "epoch": 1.8146509341199608, "grad_norm": 18.75, "learning_rate": 3.328247799239129e-06, "loss": 0.1871, "step": 3691 }, { "epoch": 1.815142576204523, "grad_norm": 18.5, "learning_rate": 3.325892987557638e-06, "loss": 0.2064, "step": 3692 }, { "epoch": 1.8156342182890857, "grad_norm": 13.3125, "learning_rate": 3.323538520821789e-06, "loss": 0.288, "step": 3693 }, { "epoch": 1.8161258603736479, "grad_norm": 38.5, "learning_rate": 3.321184399723311e-06, "loss": 0.3784, "step": 3694 }, { "epoch": 1.8166175024582105, "grad_norm": 37.75, "learning_rate": 3.318830624953828e-06, "loss": 0.3146, "step": 3695 }, { "epoch": 1.8171091445427727, "grad_norm": 17.875, "learning_rate": 3.31647719720487e-06, "loss": 0.4313, "step": 3696 }, { "epoch": 1.8176007866273354, "grad_norm": 30.125, "learning_rate": 3.3141241171678577e-06, "loss": 0.2618, "step": 3697 }, { "epoch": 1.8180924287118976, "grad_norm": 21.0, "learning_rate": 3.311771385534111e-06, "loss": 0.3402, "step": 3698 }, { "epoch": 1.8185840707964602, "grad_norm": 21.0, "learning_rate": 3.3094190029948496e-06, "loss": 0.2462, "step": 3699 }, { "epoch": 1.8190757128810227, "grad_norm": 24.125, "learning_rate": 3.30706697024119e-06, "loss": 0.3562, "step": 3700 }, { "epoch": 1.819567354965585, "grad_norm": 24.875, "learning_rate": 3.3047152879641433e-06, "loss": 0.3636, "step": 3701 }, { "epoch": 1.8200589970501475, "grad_norm": 52.0, "learning_rate": 3.30236395685462e-06, "loss": 0.4061, "step": 3702 }, { "epoch": 1.82055063913471, "grad_norm": 18.25, "learning_rate": 3.3000129776034294e-06, "loss": 0.2668, "step": 3703 }, { "epoch": 1.8210422812192724, "grad_norm": 20.75, "learning_rate": 3.297662350901274e-06, "loss": 0.2897, "step": 3704 }, { "epoch": 1.8215339233038348, "grad_norm": 16.875, "learning_rate": 3.2953120774387543e-06, "loss": 0.4056, "step": 3705 }, { "epoch": 1.8220255653883972, "grad_norm": 15.9375, "learning_rate": 3.292962157906366e-06, "loss": 0.2416, "step": 3706 }, { "epoch": 1.8225172074729596, "grad_norm": 31.25, "learning_rate": 3.2906125929945004e-06, "loss": 0.4808, "step": 3707 }, { "epoch": 1.823008849557522, "grad_norm": 17.75, "learning_rate": 3.288263383393448e-06, "loss": 0.3528, "step": 3708 }, { "epoch": 1.8235004916420845, "grad_norm": 42.25, "learning_rate": 3.28591452979339e-06, "loss": 0.4621, "step": 3709 }, { "epoch": 1.8239921337266471, "grad_norm": 51.25, "learning_rate": 3.2835660328844098e-06, "loss": 0.7903, "step": 3710 }, { "epoch": 1.8244837758112094, "grad_norm": 23.125, "learning_rate": 3.2812178933564777e-06, "loss": 0.3946, "step": 3711 }, { "epoch": 1.824975417895772, "grad_norm": 11.3125, "learning_rate": 3.2788701118994636e-06, "loss": 0.3082, "step": 3712 }, { "epoch": 1.8254670599803342, "grad_norm": 20.0, "learning_rate": 3.2765226892031337e-06, "loss": 0.1576, "step": 3713 }, { "epoch": 1.8259587020648969, "grad_norm": 22.875, "learning_rate": 3.274175625957145e-06, "loss": 0.3127, "step": 3714 }, { "epoch": 1.826450344149459, "grad_norm": 20.125, "learning_rate": 3.271828922851053e-06, "loss": 0.3375, "step": 3715 }, { "epoch": 1.8269419862340217, "grad_norm": 21.75, "learning_rate": 3.2694825805743016e-06, "loss": 0.3413, "step": 3716 }, { "epoch": 1.827433628318584, "grad_norm": 27.75, "learning_rate": 3.2671365998162343e-06, "loss": 0.4951, "step": 3717 }, { "epoch": 1.8279252704031466, "grad_norm": 23.5, "learning_rate": 3.2647909812660855e-06, "loss": 0.2972, "step": 3718 }, { "epoch": 1.828416912487709, "grad_norm": 18.875, "learning_rate": 3.2624457256129855e-06, "loss": 0.2697, "step": 3719 }, { "epoch": 1.8289085545722714, "grad_norm": 44.5, "learning_rate": 3.260100833545955e-06, "loss": 0.3643, "step": 3720 }, { "epoch": 1.8294001966568338, "grad_norm": 35.5, "learning_rate": 3.2577563057539103e-06, "loss": 0.3299, "step": 3721 }, { "epoch": 1.8298918387413963, "grad_norm": 50.25, "learning_rate": 3.2554121429256588e-06, "loss": 0.3198, "step": 3722 }, { "epoch": 1.8303834808259587, "grad_norm": 14.25, "learning_rate": 3.2530683457499016e-06, "loss": 0.1912, "step": 3723 }, { "epoch": 1.8308751229105211, "grad_norm": 45.5, "learning_rate": 3.250724914915231e-06, "loss": 0.3282, "step": 3724 }, { "epoch": 1.8313667649950836, "grad_norm": 27.375, "learning_rate": 3.248381851110136e-06, "loss": 0.3971, "step": 3725 }, { "epoch": 1.831858407079646, "grad_norm": 47.0, "learning_rate": 3.246039155022994e-06, "loss": 0.5263, "step": 3726 }, { "epoch": 1.8323500491642084, "grad_norm": 27.625, "learning_rate": 3.2436968273420742e-06, "loss": 0.3351, "step": 3727 }, { "epoch": 1.8328416912487708, "grad_norm": 25.875, "learning_rate": 3.241354868755539e-06, "loss": 0.3379, "step": 3728 }, { "epoch": 1.8333333333333335, "grad_norm": 22.5, "learning_rate": 3.2390132799514414e-06, "loss": 0.3395, "step": 3729 }, { "epoch": 1.8338249754178957, "grad_norm": 19.0, "learning_rate": 3.2366720616177268e-06, "loss": 0.2518, "step": 3730 }, { "epoch": 1.8343166175024583, "grad_norm": 20.125, "learning_rate": 3.234331214442229e-06, "loss": 0.3584, "step": 3731 }, { "epoch": 1.8348082595870205, "grad_norm": 66.0, "learning_rate": 3.2319907391126784e-06, "loss": 0.4179, "step": 3732 }, { "epoch": 1.8352999016715832, "grad_norm": 23.125, "learning_rate": 3.229650636316692e-06, "loss": 0.3571, "step": 3733 }, { "epoch": 1.8357915437561454, "grad_norm": 20.375, "learning_rate": 3.2273109067417764e-06, "loss": 0.2871, "step": 3734 }, { "epoch": 1.836283185840708, "grad_norm": 17.125, "learning_rate": 3.2249715510753316e-06, "loss": 0.276, "step": 3735 }, { "epoch": 1.8367748279252702, "grad_norm": 29.625, "learning_rate": 3.222632570004646e-06, "loss": 0.2511, "step": 3736 }, { "epoch": 1.837266470009833, "grad_norm": 14.5625, "learning_rate": 3.2202939642168976e-06, "loss": 0.1995, "step": 3737 }, { "epoch": 1.8377581120943953, "grad_norm": 20.75, "learning_rate": 3.2179557343991555e-06, "loss": 0.1897, "step": 3738 }, { "epoch": 1.8382497541789578, "grad_norm": 41.25, "learning_rate": 3.2156178812383755e-06, "loss": 0.4318, "step": 3739 }, { "epoch": 1.8387413962635202, "grad_norm": 23.25, "learning_rate": 3.2132804054214092e-06, "loss": 0.3718, "step": 3740 }, { "epoch": 1.8392330383480826, "grad_norm": 12.3125, "learning_rate": 3.2109433076349902e-06, "loss": 0.2779, "step": 3741 }, { "epoch": 1.839724680432645, "grad_norm": 30.25, "learning_rate": 3.208606588565745e-06, "loss": 0.3628, "step": 3742 }, { "epoch": 1.8402163225172075, "grad_norm": 27.75, "learning_rate": 3.2062702489001874e-06, "loss": 0.425, "step": 3743 }, { "epoch": 1.8407079646017699, "grad_norm": 17.5, "learning_rate": 3.2039342893247187e-06, "loss": 0.0952, "step": 3744 }, { "epoch": 1.8411996066863323, "grad_norm": 21.875, "learning_rate": 3.2015987105256324e-06, "loss": 0.2643, "step": 3745 }, { "epoch": 1.8416912487708947, "grad_norm": 35.5, "learning_rate": 3.1992635131891045e-06, "loss": 0.4211, "step": 3746 }, { "epoch": 1.8421828908554572, "grad_norm": 20.875, "learning_rate": 3.196928698001205e-06, "loss": 0.3026, "step": 3747 }, { "epoch": 1.8426745329400198, "grad_norm": 39.75, "learning_rate": 3.194594265647888e-06, "loss": 0.4844, "step": 3748 }, { "epoch": 1.843166175024582, "grad_norm": 24.25, "learning_rate": 3.192260216814995e-06, "loss": 0.3401, "step": 3749 }, { "epoch": 1.8436578171091447, "grad_norm": 13.1875, "learning_rate": 3.1899265521882564e-06, "loss": 0.1316, "step": 3750 }, { "epoch": 1.8441494591937069, "grad_norm": 22.0, "learning_rate": 3.1875932724532875e-06, "loss": 0.2035, "step": 3751 }, { "epoch": 1.8446411012782695, "grad_norm": 19.0, "learning_rate": 3.185260378295595e-06, "loss": 0.2838, "step": 3752 }, { "epoch": 1.8451327433628317, "grad_norm": 10.875, "learning_rate": 3.182927870400565e-06, "loss": 0.1626, "step": 3753 }, { "epoch": 1.8456243854473944, "grad_norm": 27.0, "learning_rate": 3.180595749453479e-06, "loss": 0.3495, "step": 3754 }, { "epoch": 1.8461160275319566, "grad_norm": 11.8125, "learning_rate": 3.178264016139497e-06, "loss": 0.2155, "step": 3755 }, { "epoch": 1.8466076696165192, "grad_norm": 20.0, "learning_rate": 3.1759326711436686e-06, "loss": 0.2344, "step": 3756 }, { "epoch": 1.8470993117010817, "grad_norm": 30.125, "learning_rate": 3.173601715150931e-06, "loss": 0.3212, "step": 3757 }, { "epoch": 1.847590953785644, "grad_norm": 32.0, "learning_rate": 3.171271148846104e-06, "loss": 0.298, "step": 3758 }, { "epoch": 1.8480825958702065, "grad_norm": 18.625, "learning_rate": 3.1689409729138946e-06, "loss": 0.2241, "step": 3759 }, { "epoch": 1.848574237954769, "grad_norm": 38.5, "learning_rate": 3.166611188038893e-06, "loss": 0.3484, "step": 3760 }, { "epoch": 1.8490658800393314, "grad_norm": 18.5, "learning_rate": 3.164281794905577e-06, "loss": 0.2907, "step": 3761 }, { "epoch": 1.8495575221238938, "grad_norm": 32.25, "learning_rate": 3.161952794198309e-06, "loss": 0.4754, "step": 3762 }, { "epoch": 1.8500491642084562, "grad_norm": 27.375, "learning_rate": 3.1596241866013354e-06, "loss": 0.2951, "step": 3763 }, { "epoch": 1.8505408062930186, "grad_norm": 62.25, "learning_rate": 3.1572959727987873e-06, "loss": 0.303, "step": 3764 }, { "epoch": 1.851032448377581, "grad_norm": 48.0, "learning_rate": 3.1549681534746797e-06, "loss": 0.398, "step": 3765 }, { "epoch": 1.8515240904621435, "grad_norm": 36.0, "learning_rate": 3.152640729312912e-06, "loss": 0.503, "step": 3766 }, { "epoch": 1.8520157325467062, "grad_norm": 32.25, "learning_rate": 3.1503137009972677e-06, "loss": 0.3729, "step": 3767 }, { "epoch": 1.8525073746312684, "grad_norm": 30.625, "learning_rate": 3.147987069211412e-06, "loss": 0.2605, "step": 3768 }, { "epoch": 1.852999016715831, "grad_norm": 16.5, "learning_rate": 3.145660834638899e-06, "loss": 0.2809, "step": 3769 }, { "epoch": 1.8534906588003932, "grad_norm": 11.875, "learning_rate": 3.1433349979631606e-06, "loss": 0.1246, "step": 3770 }, { "epoch": 1.8539823008849559, "grad_norm": 20.875, "learning_rate": 3.1410095598675143e-06, "loss": 0.209, "step": 3771 }, { "epoch": 1.854473942969518, "grad_norm": 46.75, "learning_rate": 3.1386845210351593e-06, "loss": 0.4268, "step": 3772 }, { "epoch": 1.8549655850540807, "grad_norm": 34.0, "learning_rate": 3.1363598821491783e-06, "loss": 0.3965, "step": 3773 }, { "epoch": 1.855457227138643, "grad_norm": 26.375, "learning_rate": 3.1340356438925375e-06, "loss": 0.245, "step": 3774 }, { "epoch": 1.8559488692232056, "grad_norm": 48.0, "learning_rate": 3.131711806948082e-06, "loss": 0.5646, "step": 3775 }, { "epoch": 1.856440511307768, "grad_norm": 22.0, "learning_rate": 3.1293883719985412e-06, "loss": 0.2222, "step": 3776 }, { "epoch": 1.8569321533923304, "grad_norm": 49.5, "learning_rate": 3.127065339726529e-06, "loss": 0.3992, "step": 3777 }, { "epoch": 1.8574237954768928, "grad_norm": 31.625, "learning_rate": 3.1247427108145383e-06, "loss": 0.3369, "step": 3778 }, { "epoch": 1.8579154375614553, "grad_norm": 11.125, "learning_rate": 3.122420485944942e-06, "loss": 0.2089, "step": 3779 }, { "epoch": 1.8584070796460177, "grad_norm": 11.1875, "learning_rate": 3.1200986657999965e-06, "loss": 0.0863, "step": 3780 }, { "epoch": 1.8588987217305801, "grad_norm": 13.8125, "learning_rate": 3.117777251061838e-06, "loss": 0.2627, "step": 3781 }, { "epoch": 1.8593903638151426, "grad_norm": 16.625, "learning_rate": 3.115456242412485e-06, "loss": 0.2319, "step": 3782 }, { "epoch": 1.859882005899705, "grad_norm": 31.0, "learning_rate": 3.1131356405338353e-06, "loss": 0.3067, "step": 3783 }, { "epoch": 1.8603736479842674, "grad_norm": 18.0, "learning_rate": 3.110815446107669e-06, "loss": 0.2941, "step": 3784 }, { "epoch": 1.8608652900688298, "grad_norm": 14.3125, "learning_rate": 3.1084956598156457e-06, "loss": 0.1994, "step": 3785 }, { "epoch": 1.8613569321533925, "grad_norm": 37.5, "learning_rate": 3.1061762823393038e-06, "loss": 0.5806, "step": 3786 }, { "epoch": 1.8618485742379547, "grad_norm": 22.125, "learning_rate": 3.103857314360063e-06, "loss": 0.2202, "step": 3787 }, { "epoch": 1.8623402163225173, "grad_norm": 16.125, "learning_rate": 3.101538756559222e-06, "loss": 0.3915, "step": 3788 }, { "epoch": 1.8628318584070795, "grad_norm": 12.5625, "learning_rate": 3.0992206096179593e-06, "loss": 0.2949, "step": 3789 }, { "epoch": 1.8633235004916422, "grad_norm": 41.0, "learning_rate": 3.096902874217331e-06, "loss": 0.3343, "step": 3790 }, { "epoch": 1.8638151425762044, "grad_norm": 26.25, "learning_rate": 3.0945855510382776e-06, "loss": 0.321, "step": 3791 }, { "epoch": 1.864306784660767, "grad_norm": 39.75, "learning_rate": 3.0922686407616124e-06, "loss": 0.537, "step": 3792 }, { "epoch": 1.8647984267453293, "grad_norm": 14.625, "learning_rate": 3.0899521440680304e-06, "loss": 0.162, "step": 3793 }, { "epoch": 1.865290068829892, "grad_norm": 43.75, "learning_rate": 3.087636061638105e-06, "loss": 0.6032, "step": 3794 }, { "epoch": 1.8657817109144543, "grad_norm": 27.125, "learning_rate": 3.0853203941522855e-06, "loss": 0.238, "step": 3795 }, { "epoch": 1.8662733529990168, "grad_norm": 27.5, "learning_rate": 3.0830051422909035e-06, "loss": 0.3715, "step": 3796 }, { "epoch": 1.8667649950835792, "grad_norm": 16.625, "learning_rate": 3.0806903067341656e-06, "loss": 0.2166, "step": 3797 }, { "epoch": 1.8672566371681416, "grad_norm": 41.5, "learning_rate": 3.078375888162155e-06, "loss": 0.3779, "step": 3798 }, { "epoch": 1.867748279252704, "grad_norm": 44.0, "learning_rate": 3.0760618872548366e-06, "loss": 0.2104, "step": 3799 }, { "epoch": 1.8682399213372665, "grad_norm": 9.1875, "learning_rate": 3.0737483046920477e-06, "loss": 0.1909, "step": 3800 }, { "epoch": 1.868731563421829, "grad_norm": 13.0, "learning_rate": 3.0714351411535076e-06, "loss": 0.3143, "step": 3801 }, { "epoch": 1.8692232055063913, "grad_norm": 27.625, "learning_rate": 3.069122397318808e-06, "loss": 0.2983, "step": 3802 }, { "epoch": 1.8697148475909537, "grad_norm": 32.25, "learning_rate": 3.0668100738674206e-06, "loss": 0.4045, "step": 3803 }, { "epoch": 1.8702064896755162, "grad_norm": 48.25, "learning_rate": 3.0644981714786912e-06, "loss": 0.5575, "step": 3804 }, { "epoch": 1.8706981317600788, "grad_norm": 41.0, "learning_rate": 3.0621866908318426e-06, "loss": 0.3829, "step": 3805 }, { "epoch": 1.871189773844641, "grad_norm": 10.625, "learning_rate": 3.0598756326059744e-06, "loss": 0.2226, "step": 3806 }, { "epoch": 1.8716814159292037, "grad_norm": 22.125, "learning_rate": 3.0575649974800637e-06, "loss": 0.3344, "step": 3807 }, { "epoch": 1.8721730580137659, "grad_norm": 15.9375, "learning_rate": 3.0552547861329586e-06, "loss": 0.2208, "step": 3808 }, { "epoch": 1.8726647000983285, "grad_norm": 13.25, "learning_rate": 3.052944999243387e-06, "loss": 0.2822, "step": 3809 }, { "epoch": 1.8731563421828907, "grad_norm": 31.75, "learning_rate": 3.05063563748995e-06, "loss": 0.3345, "step": 3810 }, { "epoch": 1.8736479842674534, "grad_norm": 25.625, "learning_rate": 3.048326701551125e-06, "loss": 0.2541, "step": 3811 }, { "epoch": 1.8741396263520156, "grad_norm": 14.6875, "learning_rate": 3.0460181921052604e-06, "loss": 0.2246, "step": 3812 }, { "epoch": 1.8746312684365782, "grad_norm": 24.125, "learning_rate": 3.0437101098305864e-06, "loss": 0.264, "step": 3813 }, { "epoch": 1.8751229105211407, "grad_norm": 35.5, "learning_rate": 3.0414024554052035e-06, "loss": 0.3919, "step": 3814 }, { "epoch": 1.875614552605703, "grad_norm": 17.75, "learning_rate": 3.0390952295070854e-06, "loss": 0.3373, "step": 3815 }, { "epoch": 1.8761061946902655, "grad_norm": 25.875, "learning_rate": 3.036788432814082e-06, "loss": 0.4092, "step": 3816 }, { "epoch": 1.876597836774828, "grad_norm": 45.75, "learning_rate": 3.034482066003916e-06, "loss": 0.3639, "step": 3817 }, { "epoch": 1.8770894788593904, "grad_norm": 65.0, "learning_rate": 3.0321761297541847e-06, "loss": 0.6752, "step": 3818 }, { "epoch": 1.8775811209439528, "grad_norm": 54.25, "learning_rate": 3.029870624742357e-06, "loss": 0.403, "step": 3819 }, { "epoch": 1.8780727630285152, "grad_norm": 18.125, "learning_rate": 3.0275655516457768e-06, "loss": 0.2258, "step": 3820 }, { "epoch": 1.8785644051130777, "grad_norm": 24.875, "learning_rate": 3.025260911141663e-06, "loss": 0.41, "step": 3821 }, { "epoch": 1.87905604719764, "grad_norm": 17.5, "learning_rate": 3.0229567039071044e-06, "loss": 0.253, "step": 3822 }, { "epoch": 1.8795476892822025, "grad_norm": 23.125, "learning_rate": 3.0206529306190624e-06, "loss": 0.4163, "step": 3823 }, { "epoch": 1.8800393313667652, "grad_norm": 22.0, "learning_rate": 3.0183495919543723e-06, "loss": 0.3339, "step": 3824 }, { "epoch": 1.8805309734513274, "grad_norm": 17.75, "learning_rate": 3.0160466885897416e-06, "loss": 0.3948, "step": 3825 }, { "epoch": 1.88102261553589, "grad_norm": 50.0, "learning_rate": 3.0137442212017495e-06, "loss": 0.4128, "step": 3826 }, { "epoch": 1.8815142576204522, "grad_norm": 31.625, "learning_rate": 3.0114421904668448e-06, "loss": 0.3516, "step": 3827 }, { "epoch": 1.8820058997050149, "grad_norm": 17.875, "learning_rate": 3.0091405970613547e-06, "loss": 0.2622, "step": 3828 }, { "epoch": 1.882497541789577, "grad_norm": 24.5, "learning_rate": 3.006839441661471e-06, "loss": 0.4177, "step": 3829 }, { "epoch": 1.8829891838741397, "grad_norm": 36.5, "learning_rate": 3.00453872494326e-06, "loss": 0.2963, "step": 3830 }, { "epoch": 1.883480825958702, "grad_norm": 25.375, "learning_rate": 3.0022384475826588e-06, "loss": 0.2539, "step": 3831 }, { "epoch": 1.8839724680432646, "grad_norm": 56.0, "learning_rate": 2.9999386102554752e-06, "loss": 0.3992, "step": 3832 }, { "epoch": 1.884464110127827, "grad_norm": 26.625, "learning_rate": 2.997639213637387e-06, "loss": 0.3631, "step": 3833 }, { "epoch": 1.8849557522123894, "grad_norm": 21.875, "learning_rate": 2.9953402584039416e-06, "loss": 0.3265, "step": 3834 }, { "epoch": 1.8854473942969519, "grad_norm": 30.625, "learning_rate": 2.9930417452305625e-06, "loss": 0.3942, "step": 3835 }, { "epoch": 1.8859390363815143, "grad_norm": 10.3125, "learning_rate": 2.990743674792537e-06, "loss": 0.1589, "step": 3836 }, { "epoch": 1.8864306784660767, "grad_norm": 26.25, "learning_rate": 2.9884460477650244e-06, "loss": 0.2879, "step": 3837 }, { "epoch": 1.8869223205506391, "grad_norm": 20.5, "learning_rate": 2.986148864823054e-06, "loss": 0.2125, "step": 3838 }, { "epoch": 1.8874139626352016, "grad_norm": 41.25, "learning_rate": 2.9838521266415234e-06, "loss": 0.4714, "step": 3839 }, { "epoch": 1.887905604719764, "grad_norm": 37.5, "learning_rate": 2.981555833895202e-06, "loss": 0.3846, "step": 3840 }, { "epoch": 1.8883972468043264, "grad_norm": 38.5, "learning_rate": 2.9792599872587273e-06, "loss": 0.3797, "step": 3841 }, { "epoch": 1.8888888888888888, "grad_norm": 14.75, "learning_rate": 2.9769645874066038e-06, "loss": 0.3636, "step": 3842 }, { "epoch": 1.8893805309734515, "grad_norm": 12.5, "learning_rate": 2.9746696350132075e-06, "loss": 0.225, "step": 3843 }, { "epoch": 1.8898721730580137, "grad_norm": 32.0, "learning_rate": 2.972375130752782e-06, "loss": 0.2764, "step": 3844 }, { "epoch": 1.8903638151425763, "grad_norm": 25.375, "learning_rate": 2.9700810752994365e-06, "loss": 0.4742, "step": 3845 }, { "epoch": 1.8908554572271385, "grad_norm": 26.625, "learning_rate": 2.967787469327154e-06, "loss": 0.4395, "step": 3846 }, { "epoch": 1.8913470993117012, "grad_norm": 37.75, "learning_rate": 2.9654943135097807e-06, "loss": 0.3663, "step": 3847 }, { "epoch": 1.8918387413962634, "grad_norm": 19.75, "learning_rate": 2.9632016085210326e-06, "loss": 0.2379, "step": 3848 }, { "epoch": 1.892330383480826, "grad_norm": 31.125, "learning_rate": 2.9609093550344912e-06, "loss": 0.397, "step": 3849 }, { "epoch": 1.8928220255653883, "grad_norm": 15.25, "learning_rate": 2.9586175537236075e-06, "loss": 0.239, "step": 3850 }, { "epoch": 1.893313667649951, "grad_norm": 54.25, "learning_rate": 2.9563262052617015e-06, "loss": 0.5946, "step": 3851 }, { "epoch": 1.893805309734513, "grad_norm": 22.5, "learning_rate": 2.9540353103219564e-06, "loss": 0.3255, "step": 3852 }, { "epoch": 1.8942969518190758, "grad_norm": 40.25, "learning_rate": 2.9517448695774224e-06, "loss": 0.2814, "step": 3853 }, { "epoch": 1.8947885939036382, "grad_norm": 18.75, "learning_rate": 2.9494548837010174e-06, "loss": 0.2994, "step": 3854 }, { "epoch": 1.8952802359882006, "grad_norm": 24.25, "learning_rate": 2.947165353365527e-06, "loss": 0.3424, "step": 3855 }, { "epoch": 1.895771878072763, "grad_norm": 20.125, "learning_rate": 2.9448762792435986e-06, "loss": 0.3014, "step": 3856 }, { "epoch": 1.8962635201573255, "grad_norm": 28.0, "learning_rate": 2.9425876620077524e-06, "loss": 0.2777, "step": 3857 }, { "epoch": 1.896755162241888, "grad_norm": 21.125, "learning_rate": 2.940299502330369e-06, "loss": 0.3009, "step": 3858 }, { "epoch": 1.8972468043264503, "grad_norm": 33.25, "learning_rate": 2.9380118008836956e-06, "loss": 0.3595, "step": 3859 }, { "epoch": 1.8977384464110127, "grad_norm": 18.75, "learning_rate": 2.9357245583398453e-06, "loss": 0.2326, "step": 3860 }, { "epoch": 1.8982300884955752, "grad_norm": 36.5, "learning_rate": 2.9334377753707972e-06, "loss": 0.3739, "step": 3861 }, { "epoch": 1.8987217305801378, "grad_norm": 25.375, "learning_rate": 2.9311514526483936e-06, "loss": 0.259, "step": 3862 }, { "epoch": 1.8992133726647, "grad_norm": 19.875, "learning_rate": 2.9288655908443423e-06, "loss": 0.2421, "step": 3863 }, { "epoch": 1.8997050147492627, "grad_norm": 17.375, "learning_rate": 2.9265801906302145e-06, "loss": 0.2337, "step": 3864 }, { "epoch": 1.9001966568338249, "grad_norm": 34.75, "learning_rate": 2.9242952526774517e-06, "loss": 0.3924, "step": 3865 }, { "epoch": 1.9006882989183875, "grad_norm": 30.125, "learning_rate": 2.9220107776573515e-06, "loss": 0.4232, "step": 3866 }, { "epoch": 1.9011799410029497, "grad_norm": 16.25, "learning_rate": 2.91972676624108e-06, "loss": 0.283, "step": 3867 }, { "epoch": 1.9016715830875124, "grad_norm": 24.25, "learning_rate": 2.917443219099666e-06, "loss": 0.2266, "step": 3868 }, { "epoch": 1.9021632251720746, "grad_norm": 23.375, "learning_rate": 2.915160136904002e-06, "loss": 0.4233, "step": 3869 }, { "epoch": 1.9026548672566372, "grad_norm": 33.25, "learning_rate": 2.912877520324844e-06, "loss": 0.4133, "step": 3870 }, { "epoch": 1.9031465093411994, "grad_norm": 38.75, "learning_rate": 2.910595370032809e-06, "loss": 0.2969, "step": 3871 }, { "epoch": 1.903638151425762, "grad_norm": 30.625, "learning_rate": 2.908313686698384e-06, "loss": 0.2912, "step": 3872 }, { "epoch": 1.9041297935103245, "grad_norm": 23.25, "learning_rate": 2.9060324709919104e-06, "loss": 0.1625, "step": 3873 }, { "epoch": 1.904621435594887, "grad_norm": 37.0, "learning_rate": 2.9037517235835963e-06, "loss": 0.4346, "step": 3874 }, { "epoch": 1.9051130776794494, "grad_norm": 56.0, "learning_rate": 2.901471445143513e-06, "loss": 0.5527, "step": 3875 }, { "epoch": 1.9056047197640118, "grad_norm": 23.375, "learning_rate": 2.8991916363415913e-06, "loss": 0.2961, "step": 3876 }, { "epoch": 1.9060963618485742, "grad_norm": 28.5, "learning_rate": 2.8969122978476256e-06, "loss": 0.3136, "step": 3877 }, { "epoch": 1.9065880039331367, "grad_norm": 25.875, "learning_rate": 2.8946334303312718e-06, "loss": 0.2356, "step": 3878 }, { "epoch": 1.907079646017699, "grad_norm": 31.125, "learning_rate": 2.8923550344620473e-06, "loss": 0.3156, "step": 3879 }, { "epoch": 1.9075712881022615, "grad_norm": 36.5, "learning_rate": 2.8900771109093333e-06, "loss": 0.5348, "step": 3880 }, { "epoch": 1.9080629301868242, "grad_norm": 21.75, "learning_rate": 2.8877996603423688e-06, "loss": 0.31, "step": 3881 }, { "epoch": 1.9085545722713864, "grad_norm": 8.375, "learning_rate": 2.8855226834302554e-06, "loss": 0.2129, "step": 3882 }, { "epoch": 1.909046214355949, "grad_norm": 13.6875, "learning_rate": 2.883246180841954e-06, "loss": 0.2926, "step": 3883 }, { "epoch": 1.9095378564405112, "grad_norm": 14.125, "learning_rate": 2.8809701532462897e-06, "loss": 0.2535, "step": 3884 }, { "epoch": 1.9100294985250739, "grad_norm": 26.0, "learning_rate": 2.8786946013119443e-06, "loss": 0.3335, "step": 3885 }, { "epoch": 1.910521140609636, "grad_norm": 27.0, "learning_rate": 2.876419525707462e-06, "loss": 0.403, "step": 3886 }, { "epoch": 1.9110127826941987, "grad_norm": 19.25, "learning_rate": 2.874144927101247e-06, "loss": 0.2883, "step": 3887 }, { "epoch": 1.911504424778761, "grad_norm": 33.5, "learning_rate": 2.871870806161562e-06, "loss": 0.3722, "step": 3888 }, { "epoch": 1.9119960668633236, "grad_norm": 38.0, "learning_rate": 2.869597163556528e-06, "loss": 0.4035, "step": 3889 }, { "epoch": 1.9124877089478858, "grad_norm": 9.8125, "learning_rate": 2.867323999954133e-06, "loss": 0.3193, "step": 3890 }, { "epoch": 1.9129793510324484, "grad_norm": 41.5, "learning_rate": 2.8650513160222146e-06, "loss": 0.3969, "step": 3891 }, { "epoch": 1.9134709931170109, "grad_norm": 8.75, "learning_rate": 2.862779112428476e-06, "loss": 0.1759, "step": 3892 }, { "epoch": 1.9139626352015733, "grad_norm": 28.125, "learning_rate": 2.8605073898404744e-06, "loss": 0.343, "step": 3893 }, { "epoch": 1.9144542772861357, "grad_norm": 49.5, "learning_rate": 2.8582361489256307e-06, "loss": 0.5024, "step": 3894 }, { "epoch": 1.9149459193706981, "grad_norm": 34.0, "learning_rate": 2.8559653903512225e-06, "loss": 0.5127, "step": 3895 }, { "epoch": 1.9154375614552606, "grad_norm": 12.0, "learning_rate": 2.853695114784383e-06, "loss": 0.2714, "step": 3896 }, { "epoch": 1.915929203539823, "grad_norm": 24.125, "learning_rate": 2.8514253228921075e-06, "loss": 0.1974, "step": 3897 }, { "epoch": 1.9164208456243854, "grad_norm": 37.75, "learning_rate": 2.8491560153412467e-06, "loss": 0.4393, "step": 3898 }, { "epoch": 1.9169124877089478, "grad_norm": 53.75, "learning_rate": 2.8468871927985085e-06, "loss": 0.2557, "step": 3899 }, { "epoch": 1.9174041297935103, "grad_norm": 10.5, "learning_rate": 2.8446188559304608e-06, "loss": 0.2327, "step": 3900 }, { "epoch": 1.9178957718780727, "grad_norm": 33.5, "learning_rate": 2.8423510054035254e-06, "loss": 0.4944, "step": 3901 }, { "epoch": 1.9183874139626353, "grad_norm": 18.125, "learning_rate": 2.8400836418839864e-06, "loss": 0.2209, "step": 3902 }, { "epoch": 1.9188790560471976, "grad_norm": 10.6875, "learning_rate": 2.83781676603798e-06, "loss": 0.2074, "step": 3903 }, { "epoch": 1.9193706981317602, "grad_norm": 53.75, "learning_rate": 2.8355503785315007e-06, "loss": 0.4859, "step": 3904 }, { "epoch": 1.9198623402163224, "grad_norm": 31.5, "learning_rate": 2.8332844800304005e-06, "loss": 0.3885, "step": 3905 }, { "epoch": 1.920353982300885, "grad_norm": 27.625, "learning_rate": 2.8310190712003863e-06, "loss": 0.4069, "step": 3906 }, { "epoch": 1.9208456243854473, "grad_norm": 19.0, "learning_rate": 2.828754152707021e-06, "loss": 0.3056, "step": 3907 }, { "epoch": 1.92133726647001, "grad_norm": 33.25, "learning_rate": 2.8264897252157235e-06, "loss": 0.4126, "step": 3908 }, { "epoch": 1.9218289085545721, "grad_norm": 32.25, "learning_rate": 2.824225789391772e-06, "loss": 0.4805, "step": 3909 }, { "epoch": 1.9223205506391348, "grad_norm": 34.25, "learning_rate": 2.821962345900296e-06, "loss": 0.4467, "step": 3910 }, { "epoch": 1.9228121927236972, "grad_norm": 24.5, "learning_rate": 2.819699395406282e-06, "loss": 0.3202, "step": 3911 }, { "epoch": 1.9233038348082596, "grad_norm": 25.875, "learning_rate": 2.8174369385745703e-06, "loss": 0.3216, "step": 3912 }, { "epoch": 1.923795476892822, "grad_norm": 18.75, "learning_rate": 2.815174976069858e-06, "loss": 0.3031, "step": 3913 }, { "epoch": 1.9242871189773845, "grad_norm": 21.0, "learning_rate": 2.812913508556696e-06, "loss": 0.2108, "step": 3914 }, { "epoch": 1.924778761061947, "grad_norm": 26.0, "learning_rate": 2.8106525366994872e-06, "loss": 0.4894, "step": 3915 }, { "epoch": 1.9252704031465093, "grad_norm": 24.5, "learning_rate": 2.8083920611624977e-06, "loss": 0.2782, "step": 3916 }, { "epoch": 1.9257620452310718, "grad_norm": 76.5, "learning_rate": 2.8061320826098377e-06, "loss": 0.4142, "step": 3917 }, { "epoch": 1.9262536873156342, "grad_norm": 19.375, "learning_rate": 2.8038726017054764e-06, "loss": 0.27, "step": 3918 }, { "epoch": 1.9267453294001966, "grad_norm": 46.75, "learning_rate": 2.801613619113236e-06, "loss": 0.4078, "step": 3919 }, { "epoch": 1.927236971484759, "grad_norm": 19.75, "learning_rate": 2.799355135496791e-06, "loss": 0.2219, "step": 3920 }, { "epoch": 1.9277286135693217, "grad_norm": 43.0, "learning_rate": 2.7970971515196715e-06, "loss": 0.4649, "step": 3921 }, { "epoch": 1.9282202556538839, "grad_norm": 24.625, "learning_rate": 2.7948396678452587e-06, "loss": 0.3544, "step": 3922 }, { "epoch": 1.9287118977384465, "grad_norm": 19.5, "learning_rate": 2.7925826851367886e-06, "loss": 0.3052, "step": 3923 }, { "epoch": 1.9292035398230087, "grad_norm": 18.75, "learning_rate": 2.7903262040573496e-06, "loss": 0.1454, "step": 3924 }, { "epoch": 1.9296951819075714, "grad_norm": 31.5, "learning_rate": 2.788070225269883e-06, "loss": 0.4496, "step": 3925 }, { "epoch": 1.9301868239921336, "grad_norm": 7.9375, "learning_rate": 2.7858147494371796e-06, "loss": 0.153, "step": 3926 }, { "epoch": 1.9306784660766962, "grad_norm": 12.9375, "learning_rate": 2.7835597772218862e-06, "loss": 0.2285, "step": 3927 }, { "epoch": 1.9311701081612584, "grad_norm": 22.125, "learning_rate": 2.7813053092865005e-06, "loss": 0.3158, "step": 3928 }, { "epoch": 1.931661750245821, "grad_norm": 18.5, "learning_rate": 2.7790513462933717e-06, "loss": 0.237, "step": 3929 }, { "epoch": 1.9321533923303835, "grad_norm": 14.125, "learning_rate": 2.7767978889046996e-06, "loss": 0.213, "step": 3930 }, { "epoch": 1.932645034414946, "grad_norm": 33.25, "learning_rate": 2.774544937782537e-06, "loss": 0.4039, "step": 3931 }, { "epoch": 1.9331366764995084, "grad_norm": 21.75, "learning_rate": 2.772292493588789e-06, "loss": 0.3932, "step": 3932 }, { "epoch": 1.9336283185840708, "grad_norm": 9.5625, "learning_rate": 2.7700405569852076e-06, "loss": 0.1273, "step": 3933 }, { "epoch": 1.9341199606686332, "grad_norm": 26.625, "learning_rate": 2.767789128633401e-06, "loss": 0.3346, "step": 3934 }, { "epoch": 1.9346116027531957, "grad_norm": 12.3125, "learning_rate": 2.765538209194824e-06, "loss": 0.2102, "step": 3935 }, { "epoch": 1.935103244837758, "grad_norm": 39.25, "learning_rate": 2.763287799330784e-06, "loss": 0.3808, "step": 3936 }, { "epoch": 1.9355948869223205, "grad_norm": 18.25, "learning_rate": 2.7610378997024366e-06, "loss": 0.1906, "step": 3937 }, { "epoch": 1.936086529006883, "grad_norm": 14.625, "learning_rate": 2.7587885109707893e-06, "loss": 0.1744, "step": 3938 }, { "epoch": 1.9365781710914454, "grad_norm": 16.125, "learning_rate": 2.7565396337967013e-06, "loss": 0.0927, "step": 3939 }, { "epoch": 1.937069813176008, "grad_norm": 29.375, "learning_rate": 2.7542912688408773e-06, "loss": 0.4161, "step": 3940 }, { "epoch": 1.9375614552605702, "grad_norm": 36.5, "learning_rate": 2.752043416763874e-06, "loss": 0.3765, "step": 3941 }, { "epoch": 1.9380530973451329, "grad_norm": 16.125, "learning_rate": 2.7497960782260967e-06, "loss": 0.253, "step": 3942 }, { "epoch": 1.938544739429695, "grad_norm": 23.875, "learning_rate": 2.747549253887801e-06, "loss": 0.2643, "step": 3943 }, { "epoch": 1.9390363815142577, "grad_norm": 19.625, "learning_rate": 2.745302944409089e-06, "loss": 0.3114, "step": 3944 }, { "epoch": 1.93952802359882, "grad_norm": 25.875, "learning_rate": 2.743057150449912e-06, "loss": 0.4167, "step": 3945 }, { "epoch": 1.9400196656833826, "grad_norm": 13.8125, "learning_rate": 2.740811872670074e-06, "loss": 0.2627, "step": 3946 }, { "epoch": 1.9405113077679448, "grad_norm": 42.0, "learning_rate": 2.738567111729224e-06, "loss": 0.5579, "step": 3947 }, { "epoch": 1.9410029498525074, "grad_norm": 24.375, "learning_rate": 2.7363228682868578e-06, "loss": 0.372, "step": 3948 }, { "epoch": 1.9414945919370699, "grad_norm": 40.0, "learning_rate": 2.7340791430023205e-06, "loss": 0.2596, "step": 3949 }, { "epoch": 1.9419862340216323, "grad_norm": 19.875, "learning_rate": 2.7318359365348066e-06, "loss": 0.3087, "step": 3950 }, { "epoch": 1.9424778761061947, "grad_norm": 30.375, "learning_rate": 2.729593249543356e-06, "loss": 0.2521, "step": 3951 }, { "epoch": 1.9429695181907571, "grad_norm": 45.25, "learning_rate": 2.727351082686855e-06, "loss": 0.5873, "step": 3952 }, { "epoch": 1.9434611602753196, "grad_norm": 16.125, "learning_rate": 2.725109436624042e-06, "loss": 0.2469, "step": 3953 }, { "epoch": 1.943952802359882, "grad_norm": 36.5, "learning_rate": 2.7228683120134988e-06, "loss": 0.6293, "step": 3954 }, { "epoch": 1.9444444444444444, "grad_norm": 36.0, "learning_rate": 2.7206277095136538e-06, "loss": 0.4533, "step": 3955 }, { "epoch": 1.9449360865290068, "grad_norm": 38.5, "learning_rate": 2.718387629782782e-06, "loss": 0.385, "step": 3956 }, { "epoch": 1.9454277286135693, "grad_norm": 17.75, "learning_rate": 2.716148073479006e-06, "loss": 0.1459, "step": 3957 }, { "epoch": 1.9459193706981317, "grad_norm": 25.5, "learning_rate": 2.713909041260294e-06, "loss": 0.3658, "step": 3958 }, { "epoch": 1.9464110127826943, "grad_norm": 14.125, "learning_rate": 2.711670533784459e-06, "loss": 0.3299, "step": 3959 }, { "epoch": 1.9469026548672566, "grad_norm": 38.25, "learning_rate": 2.7094325517091642e-06, "loss": 0.2909, "step": 3960 }, { "epoch": 1.9473942969518192, "grad_norm": 30.125, "learning_rate": 2.7071950956919135e-06, "loss": 0.2789, "step": 3961 }, { "epoch": 1.9478859390363814, "grad_norm": 27.75, "learning_rate": 2.7049581663900587e-06, "loss": 0.3418, "step": 3962 }, { "epoch": 1.948377581120944, "grad_norm": 11.25, "learning_rate": 2.702721764460796e-06, "loss": 0.2018, "step": 3963 }, { "epoch": 1.9488692232055063, "grad_norm": 12.6875, "learning_rate": 2.7004858905611666e-06, "loss": 0.148, "step": 3964 }, { "epoch": 1.949360865290069, "grad_norm": 34.25, "learning_rate": 2.698250545348058e-06, "loss": 0.4273, "step": 3965 }, { "epoch": 1.9498525073746311, "grad_norm": 23.625, "learning_rate": 2.6960157294781987e-06, "loss": 0.1963, "step": 3966 }, { "epoch": 1.9503441494591938, "grad_norm": 17.75, "learning_rate": 2.6937814436081668e-06, "loss": 0.2621, "step": 3967 }, { "epoch": 1.9508357915437562, "grad_norm": 35.25, "learning_rate": 2.691547688394382e-06, "loss": 0.4197, "step": 3968 }, { "epoch": 1.9513274336283186, "grad_norm": 32.0, "learning_rate": 2.6893144644931076e-06, "loss": 0.3166, "step": 3969 }, { "epoch": 1.951819075712881, "grad_norm": 17.5, "learning_rate": 2.687081772560452e-06, "loss": 0.3608, "step": 3970 }, { "epoch": 1.9523107177974435, "grad_norm": 20.5, "learning_rate": 2.6848496132523662e-06, "loss": 0.2653, "step": 3971 }, { "epoch": 1.952802359882006, "grad_norm": 41.0, "learning_rate": 2.682617987224645e-06, "loss": 0.3183, "step": 3972 }, { "epoch": 1.9532940019665683, "grad_norm": 9.125, "learning_rate": 2.6803868951329285e-06, "loss": 0.2421, "step": 3973 }, { "epoch": 1.9537856440511308, "grad_norm": 42.75, "learning_rate": 2.678156337632696e-06, "loss": 0.2799, "step": 3974 }, { "epoch": 1.9542772861356932, "grad_norm": 11.0, "learning_rate": 2.675926315379274e-06, "loss": 0.2855, "step": 3975 }, { "epoch": 1.9547689282202556, "grad_norm": 12.75, "learning_rate": 2.673696829027829e-06, "loss": 0.2828, "step": 3976 }, { "epoch": 1.955260570304818, "grad_norm": 13.9375, "learning_rate": 2.6714678792333697e-06, "loss": 0.3051, "step": 3977 }, { "epoch": 1.9557522123893807, "grad_norm": 25.75, "learning_rate": 2.66923946665075e-06, "loss": 0.3799, "step": 3978 }, { "epoch": 1.956243854473943, "grad_norm": 58.25, "learning_rate": 2.6670115919346634e-06, "loss": 0.5274, "step": 3979 }, { "epoch": 1.9567354965585055, "grad_norm": 32.5, "learning_rate": 2.6647842557396466e-06, "loss": 0.4942, "step": 3980 }, { "epoch": 1.9572271386430677, "grad_norm": 50.75, "learning_rate": 2.6625574587200765e-06, "loss": 0.4649, "step": 3981 }, { "epoch": 1.9577187807276304, "grad_norm": 23.5, "learning_rate": 2.6603312015301738e-06, "loss": 0.4255, "step": 3982 }, { "epoch": 1.9582104228121926, "grad_norm": 31.125, "learning_rate": 2.6581054848239996e-06, "loss": 0.3056, "step": 3983 }, { "epoch": 1.9587020648967552, "grad_norm": 18.875, "learning_rate": 2.6558803092554566e-06, "loss": 0.3441, "step": 3984 }, { "epoch": 1.9591937069813175, "grad_norm": 51.0, "learning_rate": 2.653655675478288e-06, "loss": 0.5155, "step": 3985 }, { "epoch": 1.95968534906588, "grad_norm": 16.25, "learning_rate": 2.6514315841460767e-06, "loss": 0.2761, "step": 3986 }, { "epoch": 1.9601769911504425, "grad_norm": 24.375, "learning_rate": 2.649208035912249e-06, "loss": 0.3101, "step": 3987 }, { "epoch": 1.960668633235005, "grad_norm": 45.75, "learning_rate": 2.6469850314300694e-06, "loss": 0.5302, "step": 3988 }, { "epoch": 1.9611602753195674, "grad_norm": 29.0, "learning_rate": 2.644762571352641e-06, "loss": 0.3892, "step": 3989 }, { "epoch": 1.9616519174041298, "grad_norm": 24.625, "learning_rate": 2.642540656332914e-06, "loss": 0.4207, "step": 3990 }, { "epoch": 1.9621435594886922, "grad_norm": 14.6875, "learning_rate": 2.6403192870236713e-06, "loss": 0.2509, "step": 3991 }, { "epoch": 1.9626352015732547, "grad_norm": 19.125, "learning_rate": 2.6380984640775384e-06, "loss": 0.3376, "step": 3992 }, { "epoch": 1.963126843657817, "grad_norm": 18.25, "learning_rate": 2.6358781881469794e-06, "loss": 0.2752, "step": 3993 }, { "epoch": 1.9636184857423795, "grad_norm": 12.0, "learning_rate": 2.6336584598842986e-06, "loss": 0.1776, "step": 3994 }, { "epoch": 1.964110127826942, "grad_norm": 24.625, "learning_rate": 2.631439279941639e-06, "loss": 0.2607, "step": 3995 }, { "epoch": 1.9646017699115044, "grad_norm": 16.125, "learning_rate": 2.62922064897098e-06, "loss": 0.2797, "step": 3996 }, { "epoch": 1.965093411996067, "grad_norm": 49.0, "learning_rate": 2.627002567624146e-06, "loss": 0.3576, "step": 3997 }, { "epoch": 1.9655850540806292, "grad_norm": 40.25, "learning_rate": 2.624785036552795e-06, "loss": 0.3849, "step": 3998 }, { "epoch": 1.9660766961651919, "grad_norm": 12.8125, "learning_rate": 2.622568056408423e-06, "loss": 0.375, "step": 3999 }, { "epoch": 1.966568338249754, "grad_norm": 20.5, "learning_rate": 2.6203516278423653e-06, "loss": 0.5312, "step": 4000 }, { "epoch": 1.966568338249754, "eval_loss": 0.37094059586524963, "eval_runtime": 66.9738, "eval_samples_per_second": 121.48, "eval_spearman": 0.5905883900050587, "eval_steps_per_second": 15.185, "step": 4000 }, { "epoch": 1.9670599803343167, "grad_norm": 26.125, "learning_rate": 2.6181357515057987e-06, "loss": 0.3628, "step": 4001 }, { "epoch": 1.967551622418879, "grad_norm": 25.625, "learning_rate": 2.615920428049731e-06, "loss": 0.2316, "step": 4002 }, { "epoch": 1.9680432645034416, "grad_norm": 25.5, "learning_rate": 2.6137056581250132e-06, "loss": 0.4953, "step": 4003 }, { "epoch": 1.9685349065880038, "grad_norm": 18.375, "learning_rate": 2.611491442382327e-06, "loss": 0.4019, "step": 4004 }, { "epoch": 1.9690265486725664, "grad_norm": 24.75, "learning_rate": 2.609277781472203e-06, "loss": 0.359, "step": 4005 }, { "epoch": 1.9695181907571289, "grad_norm": 19.125, "learning_rate": 2.6070646760449963e-06, "loss": 0.3529, "step": 4006 }, { "epoch": 1.9700098328416913, "grad_norm": 39.0, "learning_rate": 2.604852126750907e-06, "loss": 0.1948, "step": 4007 }, { "epoch": 1.9705014749262537, "grad_norm": 20.625, "learning_rate": 2.602640134239966e-06, "loss": 0.4206, "step": 4008 }, { "epoch": 1.9709931170108161, "grad_norm": 15.625, "learning_rate": 2.6004286991620473e-06, "loss": 0.1214, "step": 4009 }, { "epoch": 1.9714847590953786, "grad_norm": 27.75, "learning_rate": 2.5982178221668532e-06, "loss": 0.387, "step": 4010 }, { "epoch": 1.971976401179941, "grad_norm": 52.75, "learning_rate": 2.5960075039039277e-06, "loss": 0.6121, "step": 4011 }, { "epoch": 1.9724680432645034, "grad_norm": 32.75, "learning_rate": 2.5937977450226503e-06, "loss": 0.4879, "step": 4012 }, { "epoch": 1.9729596853490659, "grad_norm": 26.375, "learning_rate": 2.5915885461722353e-06, "loss": 0.3422, "step": 4013 }, { "epoch": 1.9734513274336283, "grad_norm": 37.0, "learning_rate": 2.58937990800173e-06, "loss": 0.3891, "step": 4014 }, { "epoch": 1.9739429695181907, "grad_norm": 13.1875, "learning_rate": 2.5871718311600214e-06, "loss": 0.15, "step": 4015 }, { "epoch": 1.9744346116027534, "grad_norm": 38.75, "learning_rate": 2.584964316295827e-06, "loss": 0.4217, "step": 4016 }, { "epoch": 1.9749262536873156, "grad_norm": 25.0, "learning_rate": 2.5827573640577037e-06, "loss": 0.1948, "step": 4017 }, { "epoch": 1.9754178957718782, "grad_norm": 13.9375, "learning_rate": 2.580550975094037e-06, "loss": 0.3109, "step": 4018 }, { "epoch": 1.9759095378564404, "grad_norm": 22.125, "learning_rate": 2.578345150053057e-06, "loss": 0.4416, "step": 4019 }, { "epoch": 1.976401179941003, "grad_norm": 21.875, "learning_rate": 2.576139889582817e-06, "loss": 0.3352, "step": 4020 }, { "epoch": 1.9768928220255653, "grad_norm": 23.75, "learning_rate": 2.5739351943312123e-06, "loss": 0.2935, "step": 4021 }, { "epoch": 1.977384464110128, "grad_norm": 21.5, "learning_rate": 2.5717310649459667e-06, "loss": 0.3902, "step": 4022 }, { "epoch": 1.9778761061946901, "grad_norm": 21.5, "learning_rate": 2.5695275020746437e-06, "loss": 0.2623, "step": 4023 }, { "epoch": 1.9783677482792528, "grad_norm": 18.0, "learning_rate": 2.567324506364632e-06, "loss": 0.2959, "step": 4024 }, { "epoch": 1.9788593903638152, "grad_norm": 10.6875, "learning_rate": 2.5651220784631636e-06, "loss": 0.2905, "step": 4025 }, { "epoch": 1.9793510324483776, "grad_norm": 44.5, "learning_rate": 2.562920219017293e-06, "loss": 0.2875, "step": 4026 }, { "epoch": 1.97984267453294, "grad_norm": 32.25, "learning_rate": 2.5607189286739205e-06, "loss": 0.3689, "step": 4027 }, { "epoch": 1.9803343166175025, "grad_norm": 16.25, "learning_rate": 2.5585182080797665e-06, "loss": 0.2198, "step": 4028 }, { "epoch": 1.980825958702065, "grad_norm": 19.125, "learning_rate": 2.5563180578813934e-06, "loss": 0.3347, "step": 4029 }, { "epoch": 1.9813176007866273, "grad_norm": 7.75, "learning_rate": 2.554118478725189e-06, "loss": 0.1957, "step": 4030 }, { "epoch": 1.9818092428711898, "grad_norm": 18.75, "learning_rate": 2.551919471257379e-06, "loss": 0.2651, "step": 4031 }, { "epoch": 1.9823008849557522, "grad_norm": 57.5, "learning_rate": 2.5497210361240158e-06, "loss": 0.5669, "step": 4032 }, { "epoch": 1.9827925270403146, "grad_norm": 50.75, "learning_rate": 2.547523173970989e-06, "loss": 0.2613, "step": 4033 }, { "epoch": 1.983284169124877, "grad_norm": 20.375, "learning_rate": 2.5453258854440166e-06, "loss": 0.238, "step": 4034 }, { "epoch": 1.9837758112094397, "grad_norm": 13.8125, "learning_rate": 2.5431291711886502e-06, "loss": 0.2396, "step": 4035 }, { "epoch": 1.984267453294002, "grad_norm": 50.5, "learning_rate": 2.5409330318502684e-06, "loss": 0.4104, "step": 4036 }, { "epoch": 1.9847590953785645, "grad_norm": 13.8125, "learning_rate": 2.5387374680740878e-06, "loss": 0.0846, "step": 4037 }, { "epoch": 1.9852507374631267, "grad_norm": 23.375, "learning_rate": 2.536542480505148e-06, "loss": 0.3733, "step": 4038 }, { "epoch": 1.9857423795476894, "grad_norm": 40.25, "learning_rate": 2.534348069788325e-06, "loss": 0.4354, "step": 4039 }, { "epoch": 1.9862340216322516, "grad_norm": 38.75, "learning_rate": 2.532154236568323e-06, "loss": 0.4568, "step": 4040 }, { "epoch": 1.9867256637168142, "grad_norm": 23.25, "learning_rate": 2.52996098148968e-06, "loss": 0.4495, "step": 4041 }, { "epoch": 1.9872173058013765, "grad_norm": 26.375, "learning_rate": 2.5277683051967576e-06, "loss": 0.2179, "step": 4042 }, { "epoch": 1.987708947885939, "grad_norm": 30.25, "learning_rate": 2.525576208333752e-06, "loss": 0.2163, "step": 4043 }, { "epoch": 1.9882005899705013, "grad_norm": 64.5, "learning_rate": 2.5233846915446904e-06, "loss": 0.5911, "step": 4044 }, { "epoch": 1.988692232055064, "grad_norm": 43.25, "learning_rate": 2.5211937554734232e-06, "loss": 0.2125, "step": 4045 }, { "epoch": 1.9891838741396264, "grad_norm": 31.375, "learning_rate": 2.5190034007636396e-06, "loss": 0.3296, "step": 4046 }, { "epoch": 1.9896755162241888, "grad_norm": 28.875, "learning_rate": 2.5168136280588477e-06, "loss": 0.2083, "step": 4047 }, { "epoch": 1.9901671583087512, "grad_norm": 26.625, "learning_rate": 2.514624438002392e-06, "loss": 0.3235, "step": 4048 }, { "epoch": 1.9906588003933137, "grad_norm": 30.375, "learning_rate": 2.5124358312374436e-06, "loss": 0.2919, "step": 4049 }, { "epoch": 1.991150442477876, "grad_norm": 37.5, "learning_rate": 2.5102478084070035e-06, "loss": 0.2671, "step": 4050 }, { "epoch": 1.9916420845624385, "grad_norm": 40.75, "learning_rate": 2.5080603701538955e-06, "loss": 0.5026, "step": 4051 }, { "epoch": 1.992133726647001, "grad_norm": 27.875, "learning_rate": 2.5058735171207814e-06, "loss": 0.4015, "step": 4052 }, { "epoch": 1.9926253687315634, "grad_norm": 16.125, "learning_rate": 2.5036872499501406e-06, "loss": 0.4074, "step": 4053 }, { "epoch": 1.993117010816126, "grad_norm": 34.25, "learning_rate": 2.5015015692842894e-06, "loss": 0.4986, "step": 4054 }, { "epoch": 1.9936086529006882, "grad_norm": 45.75, "learning_rate": 2.4993164757653625e-06, "loss": 0.4028, "step": 4055 }, { "epoch": 1.9941002949852509, "grad_norm": 25.375, "learning_rate": 2.4971319700353343e-06, "loss": 0.3695, "step": 4056 }, { "epoch": 1.994591937069813, "grad_norm": 21.375, "learning_rate": 2.494948052735994e-06, "loss": 0.2676, "step": 4057 }, { "epoch": 1.9950835791543757, "grad_norm": 26.875, "learning_rate": 2.4927647245089677e-06, "loss": 0.1954, "step": 4058 }, { "epoch": 1.995575221238938, "grad_norm": 15.3125, "learning_rate": 2.4905819859956996e-06, "loss": 0.2094, "step": 4059 }, { "epoch": 1.9960668633235006, "grad_norm": 28.875, "learning_rate": 2.48839983783747e-06, "loss": 0.206, "step": 4060 }, { "epoch": 1.9965585054080628, "grad_norm": 14.5625, "learning_rate": 2.4862182806753765e-06, "loss": 0.3251, "step": 4061 }, { "epoch": 1.9970501474926254, "grad_norm": 13.5625, "learning_rate": 2.48403731515035e-06, "loss": 0.1696, "step": 4062 }, { "epoch": 1.9975417895771876, "grad_norm": 33.75, "learning_rate": 2.481856941903145e-06, "loss": 0.4036, "step": 4063 }, { "epoch": 1.9980334316617503, "grad_norm": 32.0, "learning_rate": 2.479677161574344e-06, "loss": 0.2757, "step": 4064 }, { "epoch": 1.9985250737463127, "grad_norm": 21.0, "learning_rate": 2.4774979748043493e-06, "loss": 0.1943, "step": 4065 }, { "epoch": 1.9990167158308751, "grad_norm": 16.5, "learning_rate": 2.4753193822333974e-06, "loss": 0.2389, "step": 4066 }, { "epoch": 1.9995083579154376, "grad_norm": 52.0, "learning_rate": 2.4731413845015425e-06, "loss": 0.4304, "step": 4067 }, { "epoch": 2.0, "grad_norm": 11.3125, "learning_rate": 2.4709639822486695e-06, "loss": 0.176, "step": 4068 }, { "epoch": 2.0004916420845626, "grad_norm": 30.375, "learning_rate": 2.468787176114484e-06, "loss": 0.3523, "step": 4069 }, { "epoch": 2.000983284169125, "grad_norm": 36.0, "learning_rate": 2.4666109667385195e-06, "loss": 0.2867, "step": 4070 }, { "epoch": 2.0014749262536875, "grad_norm": 28.75, "learning_rate": 2.4644353547601343e-06, "loss": 0.1182, "step": 4071 }, { "epoch": 2.0019665683382497, "grad_norm": 25.75, "learning_rate": 2.462260340818511e-06, "loss": 0.2716, "step": 4072 }, { "epoch": 2.0024582104228124, "grad_norm": 15.9375, "learning_rate": 2.460085925552653e-06, "loss": 0.2384, "step": 4073 }, { "epoch": 2.0029498525073746, "grad_norm": 15.1875, "learning_rate": 2.4579121096013934e-06, "loss": 0.3971, "step": 4074 }, { "epoch": 2.003441494591937, "grad_norm": 29.5, "learning_rate": 2.4557388936033835e-06, "loss": 0.4267, "step": 4075 }, { "epoch": 2.0039331366764994, "grad_norm": 13.3125, "learning_rate": 2.4535662781971024e-06, "loss": 0.2902, "step": 4076 }, { "epoch": 2.004424778761062, "grad_norm": 13.0625, "learning_rate": 2.4513942640208517e-06, "loss": 0.2789, "step": 4077 }, { "epoch": 2.0049164208456243, "grad_norm": 15.0, "learning_rate": 2.449222851712758e-06, "loss": 0.2519, "step": 4078 }, { "epoch": 2.005408062930187, "grad_norm": 22.875, "learning_rate": 2.4470520419107665e-06, "loss": 0.2252, "step": 4079 }, { "epoch": 2.005899705014749, "grad_norm": 28.5, "learning_rate": 2.444881835252651e-06, "loss": 0.2847, "step": 4080 }, { "epoch": 2.0063913470993118, "grad_norm": 20.625, "learning_rate": 2.4427122323760016e-06, "loss": 0.2971, "step": 4081 }, { "epoch": 2.006882989183874, "grad_norm": 13.4375, "learning_rate": 2.4405432339182364e-06, "loss": 0.1989, "step": 4082 }, { "epoch": 2.0073746312684366, "grad_norm": 45.25, "learning_rate": 2.4383748405165965e-06, "loss": 0.4445, "step": 4083 }, { "epoch": 2.007866273352999, "grad_norm": 20.0, "learning_rate": 2.4362070528081396e-06, "loss": 0.4302, "step": 4084 }, { "epoch": 2.0083579154375615, "grad_norm": 21.625, "learning_rate": 2.434039871429749e-06, "loss": 0.1914, "step": 4085 }, { "epoch": 2.0088495575221237, "grad_norm": 27.75, "learning_rate": 2.431873297018134e-06, "loss": 0.3445, "step": 4086 }, { "epoch": 2.0093411996066863, "grad_norm": 22.5, "learning_rate": 2.4297073302098155e-06, "loss": 0.3463, "step": 4087 }, { "epoch": 2.009832841691249, "grad_norm": 25.375, "learning_rate": 2.4275419716411446e-06, "loss": 0.173, "step": 4088 }, { "epoch": 2.010324483775811, "grad_norm": 18.875, "learning_rate": 2.4253772219482923e-06, "loss": 0.2792, "step": 4089 }, { "epoch": 2.010816125860374, "grad_norm": 14.875, "learning_rate": 2.4232130817672453e-06, "loss": 0.2057, "step": 4090 }, { "epoch": 2.011307767944936, "grad_norm": 13.375, "learning_rate": 2.421049551733818e-06, "loss": 0.3037, "step": 4091 }, { "epoch": 2.0117994100294987, "grad_norm": 10.375, "learning_rate": 2.418886632483639e-06, "loss": 0.2689, "step": 4092 }, { "epoch": 2.012291052114061, "grad_norm": 9.875, "learning_rate": 2.416724324652167e-06, "loss": 0.2313, "step": 4093 }, { "epoch": 2.0127826941986235, "grad_norm": 25.0, "learning_rate": 2.41456262887467e-06, "loss": 0.3271, "step": 4094 }, { "epoch": 2.0132743362831858, "grad_norm": 17.875, "learning_rate": 2.412401545786245e-06, "loss": 0.2656, "step": 4095 }, { "epoch": 2.0137659783677484, "grad_norm": 11.75, "learning_rate": 2.4102410760218024e-06, "loss": 0.3041, "step": 4096 }, { "epoch": 2.0142576204523106, "grad_norm": 20.375, "learning_rate": 2.4080812202160776e-06, "loss": 0.2592, "step": 4097 }, { "epoch": 2.0147492625368733, "grad_norm": 25.0, "learning_rate": 2.405921979003621e-06, "loss": 0.4358, "step": 4098 }, { "epoch": 2.0152409046214355, "grad_norm": 21.875, "learning_rate": 2.403763353018806e-06, "loss": 0.2545, "step": 4099 }, { "epoch": 2.015732546705998, "grad_norm": 14.4375, "learning_rate": 2.4016053428958234e-06, "loss": 0.2197, "step": 4100 }, { "epoch": 2.0162241887905603, "grad_norm": 23.0, "learning_rate": 2.399447949268686e-06, "loss": 0.4054, "step": 4101 }, { "epoch": 2.016715830875123, "grad_norm": 18.0, "learning_rate": 2.397291172771221e-06, "loss": 0.3445, "step": 4102 }, { "epoch": 2.017207472959685, "grad_norm": 31.5, "learning_rate": 2.395135014037078e-06, "loss": 0.3723, "step": 4103 }, { "epoch": 2.017699115044248, "grad_norm": 24.875, "learning_rate": 2.392979473699721e-06, "loss": 0.2172, "step": 4104 }, { "epoch": 2.01819075712881, "grad_norm": 15.125, "learning_rate": 2.390824552392439e-06, "loss": 0.3182, "step": 4105 }, { "epoch": 2.0186823992133727, "grad_norm": 22.375, "learning_rate": 2.3886702507483303e-06, "loss": 0.287, "step": 4106 }, { "epoch": 2.0191740412979353, "grad_norm": 17.625, "learning_rate": 2.3865165694003177e-06, "loss": 0.352, "step": 4107 }, { "epoch": 2.0196656833824975, "grad_norm": 19.25, "learning_rate": 2.3843635089811417e-06, "loss": 0.2798, "step": 4108 }, { "epoch": 2.02015732546706, "grad_norm": 22.5, "learning_rate": 2.382211070123358e-06, "loss": 0.2728, "step": 4109 }, { "epoch": 2.0206489675516224, "grad_norm": 19.625, "learning_rate": 2.3800592534593385e-06, "loss": 0.3116, "step": 4110 }, { "epoch": 2.021140609636185, "grad_norm": 23.125, "learning_rate": 2.377908059621277e-06, "loss": 0.3497, "step": 4111 }, { "epoch": 2.0216322517207472, "grad_norm": 11.3125, "learning_rate": 2.375757489241178e-06, "loss": 0.185, "step": 4112 }, { "epoch": 2.02212389380531, "grad_norm": 9.5625, "learning_rate": 2.3736075429508695e-06, "loss": 0.2591, "step": 4113 }, { "epoch": 2.022615535889872, "grad_norm": 31.5, "learning_rate": 2.3714582213819873e-06, "loss": 0.3662, "step": 4114 }, { "epoch": 2.0231071779744347, "grad_norm": 24.625, "learning_rate": 2.3693095251659973e-06, "loss": 0.2988, "step": 4115 }, { "epoch": 2.023598820058997, "grad_norm": 19.5, "learning_rate": 2.367161454934168e-06, "loss": 0.4178, "step": 4116 }, { "epoch": 2.0240904621435596, "grad_norm": 26.0, "learning_rate": 2.3650140113175932e-06, "loss": 0.1445, "step": 4117 }, { "epoch": 2.024582104228122, "grad_norm": 29.875, "learning_rate": 2.362867194947175e-06, "loss": 0.3059, "step": 4118 }, { "epoch": 2.0250737463126844, "grad_norm": 20.625, "learning_rate": 2.3607210064536394e-06, "loss": 0.2335, "step": 4119 }, { "epoch": 2.0255653883972466, "grad_norm": 36.5, "learning_rate": 2.3585754464675205e-06, "loss": 0.5133, "step": 4120 }, { "epoch": 2.0260570304818093, "grad_norm": 21.625, "learning_rate": 2.3564305156191716e-06, "loss": 0.3141, "step": 4121 }, { "epoch": 2.0265486725663715, "grad_norm": 16.5, "learning_rate": 2.3542862145387615e-06, "loss": 0.3304, "step": 4122 }, { "epoch": 2.027040314650934, "grad_norm": 10.375, "learning_rate": 2.3521425438562747e-06, "loss": 0.2111, "step": 4123 }, { "epoch": 2.0275319567354964, "grad_norm": 14.875, "learning_rate": 2.3499995042015056e-06, "loss": 0.2474, "step": 4124 }, { "epoch": 2.028023598820059, "grad_norm": 22.875, "learning_rate": 2.3478570962040695e-06, "loss": 0.3519, "step": 4125 }, { "epoch": 2.0285152409046217, "grad_norm": 16.25, "learning_rate": 2.3457153204933894e-06, "loss": 0.2573, "step": 4126 }, { "epoch": 2.029006882989184, "grad_norm": 22.375, "learning_rate": 2.343574177698709e-06, "loss": 0.2955, "step": 4127 }, { "epoch": 2.0294985250737465, "grad_norm": 24.125, "learning_rate": 2.341433668449084e-06, "loss": 0.4201, "step": 4128 }, { "epoch": 2.0299901671583087, "grad_norm": 39.25, "learning_rate": 2.33929379337338e-06, "loss": 0.4987, "step": 4129 }, { "epoch": 2.0304818092428714, "grad_norm": 13.5, "learning_rate": 2.3371545531002824e-06, "loss": 0.2463, "step": 4130 }, { "epoch": 2.0309734513274336, "grad_norm": 26.875, "learning_rate": 2.3350159482582853e-06, "loss": 0.4226, "step": 4131 }, { "epoch": 2.031465093411996, "grad_norm": 51.25, "learning_rate": 2.3328779794757017e-06, "loss": 0.4281, "step": 4132 }, { "epoch": 2.0319567354965584, "grad_norm": 15.375, "learning_rate": 2.3307406473806496e-06, "loss": 0.1808, "step": 4133 }, { "epoch": 2.032448377581121, "grad_norm": 17.25, "learning_rate": 2.3286039526010673e-06, "loss": 0.3194, "step": 4134 }, { "epoch": 2.0329400196656833, "grad_norm": 10.875, "learning_rate": 2.3264678957647003e-06, "loss": 0.1032, "step": 4135 }, { "epoch": 2.033431661750246, "grad_norm": 22.875, "learning_rate": 2.324332477499112e-06, "loss": 0.1781, "step": 4136 }, { "epoch": 2.033923303834808, "grad_norm": 15.0, "learning_rate": 2.3221976984316737e-06, "loss": 0.3049, "step": 4137 }, { "epoch": 2.0344149459193708, "grad_norm": 9.5625, "learning_rate": 2.320063559189574e-06, "loss": 0.2833, "step": 4138 }, { "epoch": 2.034906588003933, "grad_norm": 22.375, "learning_rate": 2.3179300603998055e-06, "loss": 0.3892, "step": 4139 }, { "epoch": 2.0353982300884956, "grad_norm": 16.875, "learning_rate": 2.3157972026891826e-06, "loss": 0.2344, "step": 4140 }, { "epoch": 2.035889872173058, "grad_norm": 25.625, "learning_rate": 2.313664986684321e-06, "loss": 0.2967, "step": 4141 }, { "epoch": 2.0363815142576205, "grad_norm": 26.75, "learning_rate": 2.311533413011658e-06, "loss": 0.4077, "step": 4142 }, { "epoch": 2.0368731563421827, "grad_norm": 38.75, "learning_rate": 2.3094024822974303e-06, "loss": 0.3429, "step": 4143 }, { "epoch": 2.0373647984267453, "grad_norm": 27.625, "learning_rate": 2.307272195167701e-06, "loss": 0.2647, "step": 4144 }, { "epoch": 2.037856440511308, "grad_norm": 21.125, "learning_rate": 2.30514255224833e-06, "loss": 0.3787, "step": 4145 }, { "epoch": 2.03834808259587, "grad_norm": 46.75, "learning_rate": 2.303013554164998e-06, "loss": 0.4506, "step": 4146 }, { "epoch": 2.038839724680433, "grad_norm": 21.125, "learning_rate": 2.3008852015431877e-06, "loss": 0.4047, "step": 4147 }, { "epoch": 2.039331366764995, "grad_norm": 22.875, "learning_rate": 2.2987574950081997e-06, "loss": 0.2018, "step": 4148 }, { "epoch": 2.0398230088495577, "grad_norm": 27.375, "learning_rate": 2.296630435185138e-06, "loss": 0.5214, "step": 4149 }, { "epoch": 2.04031465093412, "grad_norm": 24.25, "learning_rate": 2.294504022698924e-06, "loss": 0.3421, "step": 4150 }, { "epoch": 2.0408062930186825, "grad_norm": 20.75, "learning_rate": 2.2923782581742804e-06, "loss": 0.2909, "step": 4151 }, { "epoch": 2.0412979351032448, "grad_norm": 17.625, "learning_rate": 2.2902531422357496e-06, "loss": 0.299, "step": 4152 }, { "epoch": 2.0417895771878074, "grad_norm": 22.375, "learning_rate": 2.288128675507673e-06, "loss": 0.2971, "step": 4153 }, { "epoch": 2.0422812192723696, "grad_norm": 26.25, "learning_rate": 2.2860048586142114e-06, "loss": 0.4909, "step": 4154 }, { "epoch": 2.0427728613569323, "grad_norm": 11.6875, "learning_rate": 2.283881692179325e-06, "loss": 0.2101, "step": 4155 }, { "epoch": 2.0432645034414945, "grad_norm": 38.25, "learning_rate": 2.28175917682679e-06, "loss": 0.5605, "step": 4156 }, { "epoch": 2.043756145526057, "grad_norm": 20.25, "learning_rate": 2.2796373131801867e-06, "loss": 0.2527, "step": 4157 }, { "epoch": 2.0442477876106193, "grad_norm": 8.4375, "learning_rate": 2.2775161018629073e-06, "loss": 0.1865, "step": 4158 }, { "epoch": 2.044739429695182, "grad_norm": 11.875, "learning_rate": 2.275395543498151e-06, "loss": 0.1955, "step": 4159 }, { "epoch": 2.045231071779744, "grad_norm": 24.625, "learning_rate": 2.2732756387089266e-06, "loss": 0.344, "step": 4160 }, { "epoch": 2.045722713864307, "grad_norm": 27.5, "learning_rate": 2.2711563881180465e-06, "loss": 0.2029, "step": 4161 }, { "epoch": 2.046214355948869, "grad_norm": 15.1875, "learning_rate": 2.2690377923481366e-06, "loss": 0.2642, "step": 4162 }, { "epoch": 2.0467059980334317, "grad_norm": 28.5, "learning_rate": 2.2669198520216255e-06, "loss": 0.247, "step": 4163 }, { "epoch": 2.047197640117994, "grad_norm": 10.1875, "learning_rate": 2.264802567760752e-06, "loss": 0.2731, "step": 4164 }, { "epoch": 2.0476892822025565, "grad_norm": 16.25, "learning_rate": 2.262685940187563e-06, "loss": 0.2305, "step": 4165 }, { "epoch": 2.048180924287119, "grad_norm": 50.25, "learning_rate": 2.2605699699239107e-06, "loss": 0.4838, "step": 4166 }, { "epoch": 2.0486725663716814, "grad_norm": 26.0, "learning_rate": 2.258454657591453e-06, "loss": 0.3693, "step": 4167 }, { "epoch": 2.049164208456244, "grad_norm": 17.375, "learning_rate": 2.2563400038116586e-06, "loss": 0.342, "step": 4168 }, { "epoch": 2.0496558505408062, "grad_norm": 45.5, "learning_rate": 2.2542260092057966e-06, "loss": 0.5226, "step": 4169 }, { "epoch": 2.050147492625369, "grad_norm": 22.125, "learning_rate": 2.252112674394949e-06, "loss": 0.3708, "step": 4170 }, { "epoch": 2.050639134709931, "grad_norm": 12.3125, "learning_rate": 2.250000000000001e-06, "loss": 0.2025, "step": 4171 }, { "epoch": 2.0511307767944937, "grad_norm": 39.75, "learning_rate": 2.2478879866416414e-06, "loss": 0.4185, "step": 4172 }, { "epoch": 2.051622418879056, "grad_norm": 27.125, "learning_rate": 2.245776634940368e-06, "loss": 0.5498, "step": 4173 }, { "epoch": 2.0521140609636186, "grad_norm": 43.25, "learning_rate": 2.243665945516486e-06, "loss": 0.6641, "step": 4174 }, { "epoch": 2.052605703048181, "grad_norm": 16.25, "learning_rate": 2.241555918990099e-06, "loss": 0.2751, "step": 4175 }, { "epoch": 2.0530973451327434, "grad_norm": 18.125, "learning_rate": 2.2394465559811235e-06, "loss": 0.353, "step": 4176 }, { "epoch": 2.0535889872173057, "grad_norm": 38.5, "learning_rate": 2.237337857109277e-06, "loss": 0.5116, "step": 4177 }, { "epoch": 2.0540806293018683, "grad_norm": 14.375, "learning_rate": 2.235229822994082e-06, "loss": 0.2238, "step": 4178 }, { "epoch": 2.0545722713864305, "grad_norm": 29.375, "learning_rate": 2.2331224542548677e-06, "loss": 0.351, "step": 4179 }, { "epoch": 2.055063913470993, "grad_norm": 27.5, "learning_rate": 2.231015751510762e-06, "loss": 0.3951, "step": 4180 }, { "epoch": 2.0555555555555554, "grad_norm": 23.25, "learning_rate": 2.2289097153807093e-06, "loss": 0.2918, "step": 4181 }, { "epoch": 2.056047197640118, "grad_norm": 13.5625, "learning_rate": 2.226804346483444e-06, "loss": 0.2436, "step": 4182 }, { "epoch": 2.0565388397246807, "grad_norm": 19.75, "learning_rate": 2.2246996454375153e-06, "loss": 0.3616, "step": 4183 }, { "epoch": 2.057030481809243, "grad_norm": 17.0, "learning_rate": 2.2225956128612676e-06, "loss": 0.2524, "step": 4184 }, { "epoch": 2.0575221238938055, "grad_norm": 29.5, "learning_rate": 2.220492249372857e-06, "loss": 0.5021, "step": 4185 }, { "epoch": 2.0580137659783677, "grad_norm": 33.0, "learning_rate": 2.2183895555902364e-06, "loss": 0.3444, "step": 4186 }, { "epoch": 2.0585054080629304, "grad_norm": 10.75, "learning_rate": 2.216287532131165e-06, "loss": 0.2202, "step": 4187 }, { "epoch": 2.0589970501474926, "grad_norm": 33.0, "learning_rate": 2.214186179613206e-06, "loss": 0.3704, "step": 4188 }, { "epoch": 2.059488692232055, "grad_norm": 11.8125, "learning_rate": 2.2120854986537253e-06, "loss": 0.2435, "step": 4189 }, { "epoch": 2.0599803343166174, "grad_norm": 20.25, "learning_rate": 2.209985489869887e-06, "loss": 0.2812, "step": 4190 }, { "epoch": 2.06047197640118, "grad_norm": 21.0, "learning_rate": 2.2078861538786647e-06, "loss": 0.2985, "step": 4191 }, { "epoch": 2.0609636184857423, "grad_norm": 18.875, "learning_rate": 2.2057874912968276e-06, "loss": 0.3037, "step": 4192 }, { "epoch": 2.061455260570305, "grad_norm": 17.625, "learning_rate": 2.2036895027409535e-06, "loss": 0.3055, "step": 4193 }, { "epoch": 2.061946902654867, "grad_norm": 28.0, "learning_rate": 2.201592188827416e-06, "loss": 0.243, "step": 4194 }, { "epoch": 2.06243854473943, "grad_norm": 15.5, "learning_rate": 2.199495550172394e-06, "loss": 0.2782, "step": 4195 }, { "epoch": 2.062930186823992, "grad_norm": 27.125, "learning_rate": 2.1973995873918686e-06, "loss": 0.3573, "step": 4196 }, { "epoch": 2.0634218289085546, "grad_norm": 20.875, "learning_rate": 2.195304301101622e-06, "loss": 0.3362, "step": 4197 }, { "epoch": 2.063913470993117, "grad_norm": 26.125, "learning_rate": 2.193209691917234e-06, "loss": 0.4433, "step": 4198 }, { "epoch": 2.0644051130776795, "grad_norm": 56.75, "learning_rate": 2.1911157604540915e-06, "loss": 0.3885, "step": 4199 }, { "epoch": 2.0648967551622417, "grad_norm": 28.125, "learning_rate": 2.189022507327376e-06, "loss": 0.2961, "step": 4200 }, { "epoch": 2.0653883972468043, "grad_norm": 14.1875, "learning_rate": 2.1869299331520753e-06, "loss": 0.268, "step": 4201 }, { "epoch": 2.0658800393313665, "grad_norm": 13.5625, "learning_rate": 2.184838038542971e-06, "loss": 0.2807, "step": 4202 }, { "epoch": 2.066371681415929, "grad_norm": 37.25, "learning_rate": 2.182746824114656e-06, "loss": 0.4088, "step": 4203 }, { "epoch": 2.066863323500492, "grad_norm": 33.5, "learning_rate": 2.180656290481511e-06, "loss": 0.4692, "step": 4204 }, { "epoch": 2.067354965585054, "grad_norm": 22.75, "learning_rate": 2.178566438257726e-06, "loss": 0.2222, "step": 4205 }, { "epoch": 2.0678466076696167, "grad_norm": 23.125, "learning_rate": 2.176477268057284e-06, "loss": 0.3222, "step": 4206 }, { "epoch": 2.068338249754179, "grad_norm": 11.25, "learning_rate": 2.174388780493974e-06, "loss": 0.0462, "step": 4207 }, { "epoch": 2.0688298918387416, "grad_norm": 20.875, "learning_rate": 2.1723009761813774e-06, "loss": 0.374, "step": 4208 }, { "epoch": 2.0693215339233038, "grad_norm": 34.0, "learning_rate": 2.170213855732881e-06, "loss": 0.4306, "step": 4209 }, { "epoch": 2.0698131760078664, "grad_norm": 49.5, "learning_rate": 2.1681274197616678e-06, "loss": 0.5267, "step": 4210 }, { "epoch": 2.0703048180924286, "grad_norm": 30.75, "learning_rate": 2.166041668880722e-06, "loss": 0.4377, "step": 4211 }, { "epoch": 2.0707964601769913, "grad_norm": 19.75, "learning_rate": 2.163956603702822e-06, "loss": 0.3214, "step": 4212 }, { "epoch": 2.0712881022615535, "grad_norm": 22.625, "learning_rate": 2.1618722248405502e-06, "loss": 0.3532, "step": 4213 }, { "epoch": 2.071779744346116, "grad_norm": 24.5, "learning_rate": 2.1597885329062816e-06, "loss": 0.3598, "step": 4214 }, { "epoch": 2.0722713864306783, "grad_norm": 17.0, "learning_rate": 2.157705528512194e-06, "loss": 0.2262, "step": 4215 }, { "epoch": 2.072763028515241, "grad_norm": 24.75, "learning_rate": 2.1556232122702637e-06, "loss": 0.3145, "step": 4216 }, { "epoch": 2.073254670599803, "grad_norm": 23.75, "learning_rate": 2.153541584792259e-06, "loss": 0.2521, "step": 4217 }, { "epoch": 2.073746312684366, "grad_norm": 18.0, "learning_rate": 2.151460646689752e-06, "loss": 0.2386, "step": 4218 }, { "epoch": 2.074237954768928, "grad_norm": 26.25, "learning_rate": 2.149380398574109e-06, "loss": 0.3967, "step": 4219 }, { "epoch": 2.0747295968534907, "grad_norm": 41.25, "learning_rate": 2.1473008410564973e-06, "loss": 0.3524, "step": 4220 }, { "epoch": 2.0752212389380533, "grad_norm": 42.5, "learning_rate": 2.145221974747874e-06, "loss": 0.4727, "step": 4221 }, { "epoch": 2.0757128810226155, "grad_norm": 19.75, "learning_rate": 2.1431438002590014e-06, "loss": 0.2607, "step": 4222 }, { "epoch": 2.076204523107178, "grad_norm": 21.25, "learning_rate": 2.141066318200432e-06, "loss": 0.2455, "step": 4223 }, { "epoch": 2.0766961651917404, "grad_norm": 16.875, "learning_rate": 2.138989529182518e-06, "loss": 0.1432, "step": 4224 }, { "epoch": 2.077187807276303, "grad_norm": 18.5, "learning_rate": 2.136913433815409e-06, "loss": 0.3114, "step": 4225 }, { "epoch": 2.0776794493608652, "grad_norm": 28.25, "learning_rate": 2.13483803270905e-06, "loss": 0.3292, "step": 4226 }, { "epoch": 2.078171091445428, "grad_norm": 27.875, "learning_rate": 2.1327633264731796e-06, "loss": 0.2442, "step": 4227 }, { "epoch": 2.07866273352999, "grad_norm": 24.5, "learning_rate": 2.130689315717336e-06, "loss": 0.327, "step": 4228 }, { "epoch": 2.0791543756145527, "grad_norm": 24.0, "learning_rate": 2.128616001050849e-06, "loss": 0.2249, "step": 4229 }, { "epoch": 2.079646017699115, "grad_norm": 12.3125, "learning_rate": 2.1265433830828485e-06, "loss": 0.1837, "step": 4230 }, { "epoch": 2.0801376597836776, "grad_norm": 31.75, "learning_rate": 2.124471462422254e-06, "loss": 0.4012, "step": 4231 }, { "epoch": 2.08062930186824, "grad_norm": 19.75, "learning_rate": 2.122400239677786e-06, "loss": 0.2612, "step": 4232 }, { "epoch": 2.0811209439528024, "grad_norm": 13.875, "learning_rate": 2.120329715457957e-06, "loss": 0.3313, "step": 4233 }, { "epoch": 2.0816125860373647, "grad_norm": 33.75, "learning_rate": 2.118259890371076e-06, "loss": 0.3586, "step": 4234 }, { "epoch": 2.0821042281219273, "grad_norm": 14.6875, "learning_rate": 2.1161907650252416e-06, "loss": 0.221, "step": 4235 }, { "epoch": 2.0825958702064895, "grad_norm": 25.0, "learning_rate": 2.114122340028355e-06, "loss": 0.381, "step": 4236 }, { "epoch": 2.083087512291052, "grad_norm": 21.0, "learning_rate": 2.1120546159881028e-06, "loss": 0.2616, "step": 4237 }, { "epoch": 2.0835791543756144, "grad_norm": 28.125, "learning_rate": 2.1099875935119737e-06, "loss": 0.3362, "step": 4238 }, { "epoch": 2.084070796460177, "grad_norm": 34.0, "learning_rate": 2.1079212732072416e-06, "loss": 0.3461, "step": 4239 }, { "epoch": 2.084562438544739, "grad_norm": 15.0625, "learning_rate": 2.105855655680986e-06, "loss": 0.3699, "step": 4240 }, { "epoch": 2.085054080629302, "grad_norm": 28.875, "learning_rate": 2.103790741540067e-06, "loss": 0.3523, "step": 4241 }, { "epoch": 2.0855457227138645, "grad_norm": 12.8125, "learning_rate": 2.101726531391149e-06, "loss": 0.321, "step": 4242 }, { "epoch": 2.0860373647984267, "grad_norm": 11.1875, "learning_rate": 2.09966302584068e-06, "loss": 0.1058, "step": 4243 }, { "epoch": 2.0865290068829894, "grad_norm": 26.375, "learning_rate": 2.0976002254949102e-06, "loss": 0.3708, "step": 4244 }, { "epoch": 2.0870206489675516, "grad_norm": 36.75, "learning_rate": 2.095538130959873e-06, "loss": 0.4154, "step": 4245 }, { "epoch": 2.087512291052114, "grad_norm": 28.875, "learning_rate": 2.0934767428414024e-06, "loss": 0.2179, "step": 4246 }, { "epoch": 2.0880039331366764, "grad_norm": 21.0, "learning_rate": 2.0914160617451215e-06, "loss": 0.3525, "step": 4247 }, { "epoch": 2.088495575221239, "grad_norm": 22.25, "learning_rate": 2.089356088276448e-06, "loss": 0.2817, "step": 4248 }, { "epoch": 2.0889872173058013, "grad_norm": 25.5, "learning_rate": 2.0872968230405868e-06, "loss": 0.3752, "step": 4249 }, { "epoch": 2.089478859390364, "grad_norm": 18.625, "learning_rate": 2.08523826664254e-06, "loss": 0.3927, "step": 4250 }, { "epoch": 2.089970501474926, "grad_norm": 16.75, "learning_rate": 2.0831804196870975e-06, "loss": 0.1975, "step": 4251 }, { "epoch": 2.090462143559489, "grad_norm": 37.25, "learning_rate": 2.081123282778844e-06, "loss": 0.3575, "step": 4252 }, { "epoch": 2.090953785644051, "grad_norm": 20.125, "learning_rate": 2.079066856522152e-06, "loss": 0.2888, "step": 4253 }, { "epoch": 2.0914454277286136, "grad_norm": 23.875, "learning_rate": 2.0770111415211885e-06, "loss": 0.3814, "step": 4254 }, { "epoch": 2.091937069813176, "grad_norm": 24.875, "learning_rate": 2.0749561383799103e-06, "loss": 0.3006, "step": 4255 }, { "epoch": 2.0924287118977385, "grad_norm": 17.0, "learning_rate": 2.0729018477020668e-06, "loss": 0.27, "step": 4256 }, { "epoch": 2.0929203539823007, "grad_norm": 29.0, "learning_rate": 2.0708482700911934e-06, "loss": 0.3295, "step": 4257 }, { "epoch": 2.0934119960668633, "grad_norm": 21.25, "learning_rate": 2.0687954061506195e-06, "loss": 0.3775, "step": 4258 }, { "epoch": 2.0939036381514256, "grad_norm": 35.25, "learning_rate": 2.0667432564834676e-06, "loss": 0.284, "step": 4259 }, { "epoch": 2.094395280235988, "grad_norm": 26.125, "learning_rate": 2.0646918216926423e-06, "loss": 0.1871, "step": 4260 }, { "epoch": 2.094886922320551, "grad_norm": 17.25, "learning_rate": 2.0626411023808453e-06, "loss": 0.3219, "step": 4261 }, { "epoch": 2.095378564405113, "grad_norm": 18.625, "learning_rate": 2.0605910991505667e-06, "loss": 0.139, "step": 4262 }, { "epoch": 2.0958702064896757, "grad_norm": 17.5, "learning_rate": 2.058541812604083e-06, "loss": 0.2258, "step": 4263 }, { "epoch": 2.096361848574238, "grad_norm": 12.125, "learning_rate": 2.0564932433434633e-06, "loss": 0.2001, "step": 4264 }, { "epoch": 2.0968534906588006, "grad_norm": 17.625, "learning_rate": 2.0544453919705665e-06, "loss": 0.1375, "step": 4265 }, { "epoch": 2.0973451327433628, "grad_norm": 20.25, "learning_rate": 2.052398259087036e-06, "loss": 0.2752, "step": 4266 }, { "epoch": 2.0978367748279254, "grad_norm": 40.0, "learning_rate": 2.0503518452943113e-06, "loss": 0.4242, "step": 4267 }, { "epoch": 2.0983284169124876, "grad_norm": 27.375, "learning_rate": 2.0483061511936105e-06, "loss": 0.362, "step": 4268 }, { "epoch": 2.0988200589970503, "grad_norm": 28.0, "learning_rate": 2.0462611773859534e-06, "loss": 0.2846, "step": 4269 }, { "epoch": 2.0993117010816125, "grad_norm": 33.5, "learning_rate": 2.0442169244721364e-06, "loss": 0.389, "step": 4270 }, { "epoch": 2.099803343166175, "grad_norm": 17.5, "learning_rate": 2.042173393052752e-06, "loss": 0.2032, "step": 4271 }, { "epoch": 2.1002949852507373, "grad_norm": 18.875, "learning_rate": 2.040130583728174e-06, "loss": 0.0799, "step": 4272 }, { "epoch": 2.1007866273353, "grad_norm": 31.625, "learning_rate": 2.038088497098572e-06, "loss": 0.2736, "step": 4273 }, { "epoch": 2.101278269419862, "grad_norm": 63.0, "learning_rate": 2.0360471337638935e-06, "loss": 0.6794, "step": 4274 }, { "epoch": 2.101769911504425, "grad_norm": 23.0, "learning_rate": 2.0340064943238834e-06, "loss": 0.461, "step": 4275 }, { "epoch": 2.102261553588987, "grad_norm": 16.75, "learning_rate": 2.031966579378065e-06, "loss": 0.3164, "step": 4276 }, { "epoch": 2.1027531956735497, "grad_norm": 22.875, "learning_rate": 2.029927389525759e-06, "loss": 0.4166, "step": 4277 }, { "epoch": 2.103244837758112, "grad_norm": 24.875, "learning_rate": 2.027888925366062e-06, "loss": 0.2559, "step": 4278 }, { "epoch": 2.1037364798426745, "grad_norm": 25.125, "learning_rate": 2.025851187497867e-06, "loss": 0.4908, "step": 4279 }, { "epoch": 2.104228121927237, "grad_norm": 27.625, "learning_rate": 2.0238141765198446e-06, "loss": 0.4288, "step": 4280 }, { "epoch": 2.1047197640117994, "grad_norm": 25.75, "learning_rate": 2.021777893030461e-06, "loss": 0.3846, "step": 4281 }, { "epoch": 2.105211406096362, "grad_norm": 26.875, "learning_rate": 2.0197423376279602e-06, "loss": 0.3501, "step": 4282 }, { "epoch": 2.1057030481809242, "grad_norm": 39.75, "learning_rate": 2.0177075109103776e-06, "loss": 0.4305, "step": 4283 }, { "epoch": 2.106194690265487, "grad_norm": 18.0, "learning_rate": 2.015673413475534e-06, "loss": 0.1613, "step": 4284 }, { "epoch": 2.106686332350049, "grad_norm": 23.25, "learning_rate": 2.0136400459210365e-06, "loss": 0.1785, "step": 4285 }, { "epoch": 2.1071779744346117, "grad_norm": 12.6875, "learning_rate": 2.0116074088442726e-06, "loss": 0.1455, "step": 4286 }, { "epoch": 2.107669616519174, "grad_norm": 28.625, "learning_rate": 2.0095755028424226e-06, "loss": 0.3165, "step": 4287 }, { "epoch": 2.1081612586037366, "grad_norm": 29.25, "learning_rate": 2.007544328512445e-06, "loss": 0.2458, "step": 4288 }, { "epoch": 2.108652900688299, "grad_norm": 19.625, "learning_rate": 2.00551388645109e-06, "loss": 0.3155, "step": 4289 }, { "epoch": 2.1091445427728615, "grad_norm": 12.75, "learning_rate": 2.003484177254884e-06, "loss": 0.1991, "step": 4290 }, { "epoch": 2.1096361848574237, "grad_norm": 22.0, "learning_rate": 2.0014552015201517e-06, "loss": 0.3546, "step": 4291 }, { "epoch": 2.1101278269419863, "grad_norm": 39.5, "learning_rate": 1.999426959842987e-06, "loss": 0.2893, "step": 4292 }, { "epoch": 2.1106194690265485, "grad_norm": 23.125, "learning_rate": 1.99739945281928e-06, "loss": 0.3374, "step": 4293 }, { "epoch": 2.111111111111111, "grad_norm": 21.25, "learning_rate": 1.9953726810446965e-06, "loss": 0.2949, "step": 4294 }, { "epoch": 2.1116027531956734, "grad_norm": 19.375, "learning_rate": 1.9933466451146927e-06, "loss": 0.3332, "step": 4295 }, { "epoch": 2.112094395280236, "grad_norm": 16.625, "learning_rate": 1.9913213456245027e-06, "loss": 0.2581, "step": 4296 }, { "epoch": 2.112586037364798, "grad_norm": 30.125, "learning_rate": 1.98929678316915e-06, "loss": 0.3152, "step": 4297 }, { "epoch": 2.113077679449361, "grad_norm": 15.1875, "learning_rate": 1.9872729583434384e-06, "loss": 0.2011, "step": 4298 }, { "epoch": 2.1135693215339235, "grad_norm": 28.625, "learning_rate": 1.985249871741957e-06, "loss": 0.3623, "step": 4299 }, { "epoch": 2.1140609636184857, "grad_norm": 13.5, "learning_rate": 1.9832275239590733e-06, "loss": 0.2347, "step": 4300 }, { "epoch": 2.1145526057030484, "grad_norm": 19.375, "learning_rate": 1.9812059155889443e-06, "loss": 0.1735, "step": 4301 }, { "epoch": 2.1150442477876106, "grad_norm": 16.0, "learning_rate": 1.979185047225504e-06, "loss": 0.2206, "step": 4302 }, { "epoch": 2.1155358898721732, "grad_norm": 20.0, "learning_rate": 1.977164919462472e-06, "loss": 0.2864, "step": 4303 }, { "epoch": 2.1160275319567354, "grad_norm": 24.5, "learning_rate": 1.9751455328933525e-06, "loss": 0.4922, "step": 4304 }, { "epoch": 2.116519174041298, "grad_norm": 14.375, "learning_rate": 1.973126888111425e-06, "loss": 0.1209, "step": 4305 }, { "epoch": 2.1170108161258603, "grad_norm": 48.25, "learning_rate": 1.9711089857097572e-06, "loss": 0.4295, "step": 4306 }, { "epoch": 2.117502458210423, "grad_norm": 7.6875, "learning_rate": 1.969091826281197e-06, "loss": 0.1371, "step": 4307 }, { "epoch": 2.117994100294985, "grad_norm": 26.375, "learning_rate": 1.967075410418376e-06, "loss": 0.3928, "step": 4308 }, { "epoch": 2.118485742379548, "grad_norm": 24.5, "learning_rate": 1.965059738713701e-06, "loss": 0.2083, "step": 4309 }, { "epoch": 2.11897738446411, "grad_norm": 11.9375, "learning_rate": 1.9630448117593677e-06, "loss": 0.2493, "step": 4310 }, { "epoch": 2.1194690265486726, "grad_norm": 18.75, "learning_rate": 1.9610306301473463e-06, "loss": 0.2154, "step": 4311 }, { "epoch": 2.119960668633235, "grad_norm": 22.375, "learning_rate": 1.959017194469394e-06, "loss": 0.2537, "step": 4312 }, { "epoch": 2.1204523107177975, "grad_norm": 11.1875, "learning_rate": 1.957004505317045e-06, "loss": 0.2009, "step": 4313 }, { "epoch": 2.1209439528023597, "grad_norm": 53.5, "learning_rate": 1.9549925632816174e-06, "loss": 0.3968, "step": 4314 }, { "epoch": 2.1214355948869223, "grad_norm": 16.375, "learning_rate": 1.9529813689542046e-06, "loss": 0.1834, "step": 4315 }, { "epoch": 2.1219272369714846, "grad_norm": 17.625, "learning_rate": 1.950970922925687e-06, "loss": 0.2654, "step": 4316 }, { "epoch": 2.122418879056047, "grad_norm": 33.75, "learning_rate": 1.9489612257867176e-06, "loss": 0.4464, "step": 4317 }, { "epoch": 2.12291052114061, "grad_norm": 19.125, "learning_rate": 1.9469522781277377e-06, "loss": 0.4049, "step": 4318 }, { "epoch": 2.123402163225172, "grad_norm": 15.3125, "learning_rate": 1.9449440805389603e-06, "loss": 0.1527, "step": 4319 }, { "epoch": 2.1238938053097347, "grad_norm": 24.875, "learning_rate": 1.9429366336103837e-06, "loss": 0.3504, "step": 4320 }, { "epoch": 2.124385447394297, "grad_norm": 12.25, "learning_rate": 1.9409299379317837e-06, "loss": 0.1824, "step": 4321 }, { "epoch": 2.1248770894788596, "grad_norm": 35.25, "learning_rate": 1.9389239940927177e-06, "loss": 0.3479, "step": 4322 }, { "epoch": 2.1253687315634218, "grad_norm": 10.75, "learning_rate": 1.9369188026825157e-06, "loss": 0.123, "step": 4323 }, { "epoch": 2.1258603736479844, "grad_norm": 32.25, "learning_rate": 1.934914364290295e-06, "loss": 0.4822, "step": 4324 }, { "epoch": 2.1263520157325466, "grad_norm": 26.5, "learning_rate": 1.9329106795049443e-06, "loss": 0.184, "step": 4325 }, { "epoch": 2.1268436578171093, "grad_norm": 15.9375, "learning_rate": 1.930907748915138e-06, "loss": 0.1924, "step": 4326 }, { "epoch": 2.1273352999016715, "grad_norm": 17.5, "learning_rate": 1.9289055731093194e-06, "loss": 0.2412, "step": 4327 }, { "epoch": 2.127826941986234, "grad_norm": 19.25, "learning_rate": 1.9269041526757226e-06, "loss": 0.2114, "step": 4328 }, { "epoch": 2.1283185840707963, "grad_norm": 24.125, "learning_rate": 1.924903488202348e-06, "loss": 0.3926, "step": 4329 }, { "epoch": 2.128810226155359, "grad_norm": 29.75, "learning_rate": 1.922903580276983e-06, "loss": 0.2738, "step": 4330 }, { "epoch": 2.129301868239921, "grad_norm": 12.1875, "learning_rate": 1.9209044294871837e-06, "loss": 0.2882, "step": 4331 }, { "epoch": 2.129793510324484, "grad_norm": 30.25, "learning_rate": 1.9189060364202938e-06, "loss": 0.3293, "step": 4332 }, { "epoch": 2.130285152409046, "grad_norm": 18.875, "learning_rate": 1.916908401663425e-06, "loss": 0.181, "step": 4333 }, { "epoch": 2.1307767944936087, "grad_norm": 8.625, "learning_rate": 1.914911525803474e-06, "loss": 0.1417, "step": 4334 }, { "epoch": 2.131268436578171, "grad_norm": 22.75, "learning_rate": 1.912915409427106e-06, "loss": 0.3004, "step": 4335 }, { "epoch": 2.1317600786627335, "grad_norm": 12.375, "learning_rate": 1.9109200531207746e-06, "loss": 0.1444, "step": 4336 }, { "epoch": 2.1322517207472957, "grad_norm": 29.625, "learning_rate": 1.908925457470699e-06, "loss": 0.3357, "step": 4337 }, { "epoch": 2.1327433628318584, "grad_norm": 19.125, "learning_rate": 1.9069316230628824e-06, "loss": 0.2406, "step": 4338 }, { "epoch": 2.133235004916421, "grad_norm": 31.0, "learning_rate": 1.904938550483098e-06, "loss": 0.2986, "step": 4339 }, { "epoch": 2.1337266470009832, "grad_norm": 15.375, "learning_rate": 1.9029462403169032e-06, "loss": 0.269, "step": 4340 }, { "epoch": 2.134218289085546, "grad_norm": 28.5, "learning_rate": 1.900954693149622e-06, "loss": 0.5994, "step": 4341 }, { "epoch": 2.134709931170108, "grad_norm": 40.0, "learning_rate": 1.8989639095663616e-06, "loss": 0.4446, "step": 4342 }, { "epoch": 2.1352015732546707, "grad_norm": 11.8125, "learning_rate": 1.896973890152002e-06, "loss": 0.2272, "step": 4343 }, { "epoch": 2.135693215339233, "grad_norm": 32.75, "learning_rate": 1.8949846354912007e-06, "loss": 0.3531, "step": 4344 }, { "epoch": 2.1361848574237956, "grad_norm": 24.875, "learning_rate": 1.892996146168386e-06, "loss": 0.3587, "step": 4345 }, { "epoch": 2.136676499508358, "grad_norm": 27.25, "learning_rate": 1.8910084227677655e-06, "loss": 0.4435, "step": 4346 }, { "epoch": 2.1371681415929205, "grad_norm": 11.6875, "learning_rate": 1.889021465873322e-06, "loss": 0.2061, "step": 4347 }, { "epoch": 2.1376597836774827, "grad_norm": 31.5, "learning_rate": 1.8870352760688087e-06, "loss": 0.375, "step": 4348 }, { "epoch": 2.1381514257620453, "grad_norm": 18.875, "learning_rate": 1.8850498539377574e-06, "loss": 0.2518, "step": 4349 }, { "epoch": 2.1386430678466075, "grad_norm": 5.6875, "learning_rate": 1.8830652000634756e-06, "loss": 0.1629, "step": 4350 }, { "epoch": 2.13913470993117, "grad_norm": 18.5, "learning_rate": 1.881081315029039e-06, "loss": 0.1705, "step": 4351 }, { "epoch": 2.1396263520157324, "grad_norm": 17.625, "learning_rate": 1.8790981994173033e-06, "loss": 0.2543, "step": 4352 }, { "epoch": 2.140117994100295, "grad_norm": 9.0625, "learning_rate": 1.8771158538108973e-06, "loss": 0.2219, "step": 4353 }, { "epoch": 2.1406096361848572, "grad_norm": 45.0, "learning_rate": 1.8751342787922188e-06, "loss": 0.6045, "step": 4354 }, { "epoch": 2.14110127826942, "grad_norm": 14.8125, "learning_rate": 1.8731534749434468e-06, "loss": 0.2742, "step": 4355 }, { "epoch": 2.1415929203539825, "grad_norm": 11.1875, "learning_rate": 1.8711734428465256e-06, "loss": 0.1113, "step": 4356 }, { "epoch": 2.1420845624385447, "grad_norm": 20.5, "learning_rate": 1.869194183083179e-06, "loss": 0.1571, "step": 4357 }, { "epoch": 2.1425762045231074, "grad_norm": 15.5, "learning_rate": 1.8672156962349016e-06, "loss": 0.2043, "step": 4358 }, { "epoch": 2.1430678466076696, "grad_norm": 35.0, "learning_rate": 1.8652379828829625e-06, "loss": 0.3975, "step": 4359 }, { "epoch": 2.1435594886922322, "grad_norm": 13.8125, "learning_rate": 1.863261043608399e-06, "loss": 0.2196, "step": 4360 }, { "epoch": 2.1440511307767944, "grad_norm": 39.0, "learning_rate": 1.8612848789920273e-06, "loss": 0.4713, "step": 4361 }, { "epoch": 2.144542772861357, "grad_norm": 17.125, "learning_rate": 1.8593094896144298e-06, "loss": 0.3042, "step": 4362 }, { "epoch": 2.1450344149459193, "grad_norm": 27.5, "learning_rate": 1.8573348760559665e-06, "loss": 0.4051, "step": 4363 }, { "epoch": 2.145526057030482, "grad_norm": 71.5, "learning_rate": 1.8553610388967633e-06, "loss": 0.4643, "step": 4364 }, { "epoch": 2.146017699115044, "grad_norm": 26.75, "learning_rate": 1.8533879787167279e-06, "loss": 0.4412, "step": 4365 }, { "epoch": 2.146509341199607, "grad_norm": 75.0, "learning_rate": 1.8514156960955285e-06, "loss": 0.4367, "step": 4366 }, { "epoch": 2.147000983284169, "grad_norm": 17.625, "learning_rate": 1.8494441916126133e-06, "loss": 0.1965, "step": 4367 }, { "epoch": 2.1474926253687316, "grad_norm": 14.3125, "learning_rate": 1.847473465847195e-06, "loss": 0.3418, "step": 4368 }, { "epoch": 2.147984267453294, "grad_norm": 14.3125, "learning_rate": 1.8455035193782653e-06, "loss": 0.1306, "step": 4369 }, { "epoch": 2.1484759095378565, "grad_norm": 14.0625, "learning_rate": 1.8435343527845788e-06, "loss": 0.3485, "step": 4370 }, { "epoch": 2.1489675516224187, "grad_norm": 61.0, "learning_rate": 1.8415659666446665e-06, "loss": 0.5979, "step": 4371 }, { "epoch": 2.1494591937069814, "grad_norm": 32.25, "learning_rate": 1.8395983615368287e-06, "loss": 0.33, "step": 4372 }, { "epoch": 2.1499508357915436, "grad_norm": 23.25, "learning_rate": 1.8376315380391378e-06, "loss": 0.2732, "step": 4373 }, { "epoch": 2.150442477876106, "grad_norm": 31.625, "learning_rate": 1.8356654967294312e-06, "loss": 0.3223, "step": 4374 }, { "epoch": 2.1509341199606684, "grad_norm": 35.5, "learning_rate": 1.8337002381853235e-06, "loss": 0.4235, "step": 4375 }, { "epoch": 2.151425762045231, "grad_norm": 34.5, "learning_rate": 1.831735762984193e-06, "loss": 0.2573, "step": 4376 }, { "epoch": 2.1519174041297937, "grad_norm": 13.6875, "learning_rate": 1.8297720717031939e-06, "loss": 0.2502, "step": 4377 }, { "epoch": 2.152409046214356, "grad_norm": 22.5, "learning_rate": 1.8278091649192435e-06, "loss": 0.3644, "step": 4378 }, { "epoch": 2.1529006882989186, "grad_norm": 16.0, "learning_rate": 1.825847043209034e-06, "loss": 0.2435, "step": 4379 }, { "epoch": 2.1533923303834808, "grad_norm": 7.28125, "learning_rate": 1.823885707149025e-06, "loss": 0.1902, "step": 4380 }, { "epoch": 2.1538839724680434, "grad_norm": 55.0, "learning_rate": 1.821925157315447e-06, "loss": 0.4986, "step": 4381 }, { "epoch": 2.1543756145526056, "grad_norm": 20.125, "learning_rate": 1.8199653942842947e-06, "loss": 0.2541, "step": 4382 }, { "epoch": 2.1548672566371683, "grad_norm": 18.5, "learning_rate": 1.8180064186313377e-06, "loss": 0.2748, "step": 4383 }, { "epoch": 2.1553588987217305, "grad_norm": 14.625, "learning_rate": 1.8160482309321082e-06, "loss": 0.3034, "step": 4384 }, { "epoch": 2.155850540806293, "grad_norm": 25.125, "learning_rate": 1.8140908317619119e-06, "loss": 0.383, "step": 4385 }, { "epoch": 2.1563421828908553, "grad_norm": 37.25, "learning_rate": 1.8121342216958212e-06, "loss": 0.3262, "step": 4386 }, { "epoch": 2.156833824975418, "grad_norm": 27.25, "learning_rate": 1.8101784013086776e-06, "loss": 0.2945, "step": 4387 }, { "epoch": 2.15732546705998, "grad_norm": 19.25, "learning_rate": 1.8082233711750867e-06, "loss": 0.1799, "step": 4388 }, { "epoch": 2.157817109144543, "grad_norm": 23.25, "learning_rate": 1.8062691318694273e-06, "loss": 0.2796, "step": 4389 }, { "epoch": 2.158308751229105, "grad_norm": 17.875, "learning_rate": 1.8043156839658404e-06, "loss": 0.2038, "step": 4390 }, { "epoch": 2.1588003933136677, "grad_norm": 24.25, "learning_rate": 1.8023630280382393e-06, "loss": 0.2323, "step": 4391 }, { "epoch": 2.15929203539823, "grad_norm": 12.8125, "learning_rate": 1.8004111646603041e-06, "loss": 0.4062, "step": 4392 }, { "epoch": 2.1597836774827925, "grad_norm": 31.5, "learning_rate": 1.7984600944054769e-06, "loss": 0.2811, "step": 4393 }, { "epoch": 2.160275319567355, "grad_norm": 20.25, "learning_rate": 1.7965098178469726e-06, "loss": 0.1957, "step": 4394 }, { "epoch": 2.1607669616519174, "grad_norm": 52.25, "learning_rate": 1.7945603355577711e-06, "loss": 0.4721, "step": 4395 }, { "epoch": 2.16125860373648, "grad_norm": 14.125, "learning_rate": 1.7926116481106196e-06, "loss": 0.2126, "step": 4396 }, { "epoch": 2.1617502458210422, "grad_norm": 23.625, "learning_rate": 1.790663756078028e-06, "loss": 0.2697, "step": 4397 }, { "epoch": 2.162241887905605, "grad_norm": 24.625, "learning_rate": 1.7887166600322787e-06, "loss": 0.3388, "step": 4398 }, { "epoch": 2.162733529990167, "grad_norm": 23.125, "learning_rate": 1.7867703605454134e-06, "loss": 0.1831, "step": 4399 }, { "epoch": 2.1632251720747298, "grad_norm": 18.875, "learning_rate": 1.784824858189247e-06, "loss": 0.2289, "step": 4400 }, { "epoch": 2.163716814159292, "grad_norm": 22.0, "learning_rate": 1.7828801535353509e-06, "loss": 0.1754, "step": 4401 }, { "epoch": 2.1642084562438546, "grad_norm": 30.875, "learning_rate": 1.7809362471550748e-06, "loss": 0.178, "step": 4402 }, { "epoch": 2.164700098328417, "grad_norm": 24.5, "learning_rate": 1.7789931396195218e-06, "loss": 0.2647, "step": 4403 }, { "epoch": 2.1651917404129795, "grad_norm": 60.5, "learning_rate": 1.7770508314995683e-06, "loss": 0.5086, "step": 4404 }, { "epoch": 2.1656833824975417, "grad_norm": 22.5, "learning_rate": 1.7751093233658503e-06, "loss": 0.2744, "step": 4405 }, { "epoch": 2.1661750245821043, "grad_norm": 36.75, "learning_rate": 1.7731686157887736e-06, "loss": 0.5085, "step": 4406 }, { "epoch": 2.1666666666666665, "grad_norm": 34.25, "learning_rate": 1.7712287093385037e-06, "loss": 0.3367, "step": 4407 }, { "epoch": 2.167158308751229, "grad_norm": 18.875, "learning_rate": 1.7692896045849755e-06, "loss": 0.2287, "step": 4408 }, { "epoch": 2.1676499508357914, "grad_norm": 38.75, "learning_rate": 1.7673513020978867e-06, "loss": 0.2219, "step": 4409 }, { "epoch": 2.168141592920354, "grad_norm": 19.375, "learning_rate": 1.7654138024466993e-06, "loss": 0.2533, "step": 4410 }, { "epoch": 2.1686332350049162, "grad_norm": 19.625, "learning_rate": 1.763477106200637e-06, "loss": 0.3526, "step": 4411 }, { "epoch": 2.169124877089479, "grad_norm": 21.375, "learning_rate": 1.7615412139286925e-06, "loss": 0.3614, "step": 4412 }, { "epoch": 2.169616519174041, "grad_norm": 33.0, "learning_rate": 1.7596061261996163e-06, "loss": 0.3421, "step": 4413 }, { "epoch": 2.1701081612586037, "grad_norm": 31.25, "learning_rate": 1.7576718435819287e-06, "loss": 0.3666, "step": 4414 }, { "epoch": 2.1705998033431664, "grad_norm": 15.25, "learning_rate": 1.7557383666439056e-06, "loss": 0.3422, "step": 4415 }, { "epoch": 2.1710914454277286, "grad_norm": 14.25, "learning_rate": 1.753805695953598e-06, "loss": 0.2152, "step": 4416 }, { "epoch": 2.1715830875122912, "grad_norm": 25.0, "learning_rate": 1.7518738320788074e-06, "loss": 0.2121, "step": 4417 }, { "epoch": 2.1720747295968534, "grad_norm": 26.875, "learning_rate": 1.7499427755871076e-06, "loss": 0.2173, "step": 4418 }, { "epoch": 2.172566371681416, "grad_norm": 19.0, "learning_rate": 1.748012527045828e-06, "loss": 0.3666, "step": 4419 }, { "epoch": 2.1730580137659783, "grad_norm": 32.25, "learning_rate": 1.7460830870220672e-06, "loss": 0.4694, "step": 4420 }, { "epoch": 2.173549655850541, "grad_norm": 26.0, "learning_rate": 1.7441544560826803e-06, "loss": 0.241, "step": 4421 }, { "epoch": 2.174041297935103, "grad_norm": 23.875, "learning_rate": 1.7422266347942908e-06, "loss": 0.3505, "step": 4422 }, { "epoch": 2.174532940019666, "grad_norm": 21.875, "learning_rate": 1.740299623723276e-06, "loss": 0.3845, "step": 4423 }, { "epoch": 2.175024582104228, "grad_norm": 22.0, "learning_rate": 1.7383734234357874e-06, "loss": 0.4542, "step": 4424 }, { "epoch": 2.1755162241887906, "grad_norm": 44.5, "learning_rate": 1.736448034497725e-06, "loss": 0.2366, "step": 4425 }, { "epoch": 2.176007866273353, "grad_norm": 12.375, "learning_rate": 1.7345234574747613e-06, "loss": 0.1272, "step": 4426 }, { "epoch": 2.1764995083579155, "grad_norm": 18.125, "learning_rate": 1.7325996929323214e-06, "loss": 0.1266, "step": 4427 }, { "epoch": 2.1769911504424777, "grad_norm": 16.75, "learning_rate": 1.7306767414355994e-06, "loss": 0.3027, "step": 4428 }, { "epoch": 2.1774827925270404, "grad_norm": 26.25, "learning_rate": 1.7287546035495436e-06, "loss": 0.3988, "step": 4429 }, { "epoch": 2.1779744346116026, "grad_norm": 28.75, "learning_rate": 1.7268332798388684e-06, "loss": 0.2503, "step": 4430 }, { "epoch": 2.178466076696165, "grad_norm": 37.0, "learning_rate": 1.7249127708680474e-06, "loss": 0.2544, "step": 4431 }, { "epoch": 2.178957718780728, "grad_norm": 32.75, "learning_rate": 1.722993077201316e-06, "loss": 0.3629, "step": 4432 }, { "epoch": 2.17944936086529, "grad_norm": 24.75, "learning_rate": 1.7210741994026657e-06, "loss": 0.4011, "step": 4433 }, { "epoch": 2.1799410029498527, "grad_norm": 19.75, "learning_rate": 1.7191561380358531e-06, "loss": 0.2653, "step": 4434 }, { "epoch": 2.180432645034415, "grad_norm": 37.25, "learning_rate": 1.7172388936643944e-06, "loss": 0.3619, "step": 4435 }, { "epoch": 2.1809242871189776, "grad_norm": 24.25, "learning_rate": 1.7153224668515615e-06, "loss": 0.2908, "step": 4436 }, { "epoch": 2.1814159292035398, "grad_norm": 30.375, "learning_rate": 1.713406858160393e-06, "loss": 0.3216, "step": 4437 }, { "epoch": 2.1819075712881024, "grad_norm": 21.5, "learning_rate": 1.7114920681536792e-06, "loss": 0.4623, "step": 4438 }, { "epoch": 2.1823992133726646, "grad_norm": 42.25, "learning_rate": 1.7095780973939762e-06, "loss": 0.5663, "step": 4439 }, { "epoch": 2.1828908554572273, "grad_norm": 22.5, "learning_rate": 1.7076649464435965e-06, "loss": 0.3948, "step": 4440 }, { "epoch": 2.1833824975417895, "grad_norm": 25.375, "learning_rate": 1.7057526158646146e-06, "loss": 0.2174, "step": 4441 }, { "epoch": 2.183874139626352, "grad_norm": 26.25, "learning_rate": 1.7038411062188593e-06, "loss": 0.1841, "step": 4442 }, { "epoch": 2.1843657817109143, "grad_norm": 50.5, "learning_rate": 1.7019304180679225e-06, "loss": 0.3254, "step": 4443 }, { "epoch": 2.184857423795477, "grad_norm": 33.75, "learning_rate": 1.7000205519731508e-06, "loss": 0.4014, "step": 4444 }, { "epoch": 2.185349065880039, "grad_norm": 21.0, "learning_rate": 1.6981115084956525e-06, "loss": 0.2966, "step": 4445 }, { "epoch": 2.185840707964602, "grad_norm": 29.75, "learning_rate": 1.6962032881962938e-06, "loss": 0.2575, "step": 4446 }, { "epoch": 2.186332350049164, "grad_norm": 15.0, "learning_rate": 1.6942958916356993e-06, "loss": 0.1826, "step": 4447 }, { "epoch": 2.1868239921337267, "grad_norm": 14.0625, "learning_rate": 1.6923893193742484e-06, "loss": 0.2413, "step": 4448 }, { "epoch": 2.187315634218289, "grad_norm": 18.375, "learning_rate": 1.6904835719720831e-06, "loss": 0.2822, "step": 4449 }, { "epoch": 2.1878072763028515, "grad_norm": 28.75, "learning_rate": 1.6885786499890981e-06, "loss": 0.3385, "step": 4450 }, { "epoch": 2.1882989183874137, "grad_norm": 29.5, "learning_rate": 1.6866745539849507e-06, "loss": 0.1434, "step": 4451 }, { "epoch": 2.1887905604719764, "grad_norm": 30.375, "learning_rate": 1.6847712845190488e-06, "loss": 0.2879, "step": 4452 }, { "epoch": 2.189282202556539, "grad_norm": 31.0, "learning_rate": 1.682868842150567e-06, "loss": 0.4161, "step": 4453 }, { "epoch": 2.1897738446411013, "grad_norm": 22.875, "learning_rate": 1.680967227438428e-06, "loss": 0.2563, "step": 4454 }, { "epoch": 2.190265486725664, "grad_norm": 29.0, "learning_rate": 1.679066440941317e-06, "loss": 0.3957, "step": 4455 }, { "epoch": 2.190757128810226, "grad_norm": 18.625, "learning_rate": 1.6771664832176715e-06, "loss": 0.2079, "step": 4456 }, { "epoch": 2.1912487708947888, "grad_norm": 12.4375, "learning_rate": 1.6752673548256899e-06, "loss": 0.3217, "step": 4457 }, { "epoch": 2.191740412979351, "grad_norm": 41.0, "learning_rate": 1.6733690563233226e-06, "loss": 0.4029, "step": 4458 }, { "epoch": 2.1922320550639136, "grad_norm": 26.875, "learning_rate": 1.6714715882682808e-06, "loss": 0.2347, "step": 4459 }, { "epoch": 2.192723697148476, "grad_norm": 13.875, "learning_rate": 1.669574951218025e-06, "loss": 0.3737, "step": 4460 }, { "epoch": 2.1932153392330385, "grad_norm": 13.25, "learning_rate": 1.6676791457297826e-06, "loss": 0.1664, "step": 4461 }, { "epoch": 2.1937069813176007, "grad_norm": 17.75, "learning_rate": 1.6657841723605237e-06, "loss": 0.1253, "step": 4462 }, { "epoch": 2.1941986234021633, "grad_norm": 20.125, "learning_rate": 1.6638900316669853e-06, "loss": 0.3465, "step": 4463 }, { "epoch": 2.1946902654867255, "grad_norm": 9.375, "learning_rate": 1.66199672420565e-06, "loss": 0.2565, "step": 4464 }, { "epoch": 2.195181907571288, "grad_norm": 12.5, "learning_rate": 1.6601042505327636e-06, "loss": 0.2013, "step": 4465 }, { "epoch": 2.1956735496558504, "grad_norm": 50.5, "learning_rate": 1.6582126112043213e-06, "loss": 0.3611, "step": 4466 }, { "epoch": 2.196165191740413, "grad_norm": 26.25, "learning_rate": 1.6563218067760764e-06, "loss": 0.2395, "step": 4467 }, { "epoch": 2.1966568338249752, "grad_norm": 60.75, "learning_rate": 1.654431837803536e-06, "loss": 0.3739, "step": 4468 }, { "epoch": 2.197148475909538, "grad_norm": 42.5, "learning_rate": 1.6525427048419632e-06, "loss": 0.5116, "step": 4469 }, { "epoch": 2.1976401179941005, "grad_norm": 15.8125, "learning_rate": 1.6506544084463715e-06, "loss": 0.2989, "step": 4470 }, { "epoch": 2.1981317600786627, "grad_norm": 23.125, "learning_rate": 1.6487669491715333e-06, "loss": 0.3686, "step": 4471 }, { "epoch": 2.198623402163225, "grad_norm": 14.5, "learning_rate": 1.6468803275719702e-06, "loss": 0.1923, "step": 4472 }, { "epoch": 2.1991150442477876, "grad_norm": 28.125, "learning_rate": 1.6449945442019621e-06, "loss": 0.3611, "step": 4473 }, { "epoch": 2.1996066863323502, "grad_norm": 29.25, "learning_rate": 1.643109599615541e-06, "loss": 0.4007, "step": 4474 }, { "epoch": 2.2000983284169124, "grad_norm": 22.5, "learning_rate": 1.6412254943664934e-06, "loss": 0.2697, "step": 4475 }, { "epoch": 2.200589970501475, "grad_norm": 22.0, "learning_rate": 1.6393422290083557e-06, "loss": 0.274, "step": 4476 }, { "epoch": 2.2010816125860373, "grad_norm": 21.0, "learning_rate": 1.6374598040944227e-06, "loss": 0.1812, "step": 4477 }, { "epoch": 2.2015732546706, "grad_norm": 12.5, "learning_rate": 1.6355782201777366e-06, "loss": 0.1784, "step": 4478 }, { "epoch": 2.202064896755162, "grad_norm": 44.5, "learning_rate": 1.6336974778110976e-06, "loss": 0.3501, "step": 4479 }, { "epoch": 2.202556538839725, "grad_norm": 17.625, "learning_rate": 1.6318175775470572e-06, "loss": 0.2273, "step": 4480 }, { "epoch": 2.203048180924287, "grad_norm": 9.625, "learning_rate": 1.6299385199379167e-06, "loss": 0.0847, "step": 4481 }, { "epoch": 2.2035398230088497, "grad_norm": 7.125, "learning_rate": 1.6280603055357331e-06, "loss": 0.1756, "step": 4482 }, { "epoch": 2.204031465093412, "grad_norm": 14.5625, "learning_rate": 1.6261829348923157e-06, "loss": 0.1598, "step": 4483 }, { "epoch": 2.2045231071779745, "grad_norm": 43.75, "learning_rate": 1.624306408559223e-06, "loss": 0.3118, "step": 4484 }, { "epoch": 2.2050147492625367, "grad_norm": 42.75, "learning_rate": 1.6224307270877673e-06, "loss": 0.4369, "step": 4485 }, { "epoch": 2.2055063913470994, "grad_norm": 12.6875, "learning_rate": 1.6205558910290157e-06, "loss": 0.2175, "step": 4486 }, { "epoch": 2.2059980334316616, "grad_norm": 17.75, "learning_rate": 1.6186819009337798e-06, "loss": 0.3226, "step": 4487 }, { "epoch": 2.206489675516224, "grad_norm": 23.125, "learning_rate": 1.6168087573526296e-06, "loss": 0.2615, "step": 4488 }, { "epoch": 2.2069813176007864, "grad_norm": 32.5, "learning_rate": 1.6149364608358795e-06, "loss": 0.2702, "step": 4489 }, { "epoch": 2.207472959685349, "grad_norm": 29.375, "learning_rate": 1.613065011933605e-06, "loss": 0.5623, "step": 4490 }, { "epoch": 2.2079646017699117, "grad_norm": 19.0, "learning_rate": 1.611194411195622e-06, "loss": 0.384, "step": 4491 }, { "epoch": 2.208456243854474, "grad_norm": 12.9375, "learning_rate": 1.609324659171505e-06, "loss": 0.2312, "step": 4492 }, { "epoch": 2.2089478859390366, "grad_norm": 14.9375, "learning_rate": 1.6074557564105728e-06, "loss": 0.308, "step": 4493 }, { "epoch": 2.2094395280235988, "grad_norm": 11.25, "learning_rate": 1.6055877034619014e-06, "loss": 0.2467, "step": 4494 }, { "epoch": 2.2099311701081614, "grad_norm": 25.5, "learning_rate": 1.6037205008743098e-06, "loss": 0.2844, "step": 4495 }, { "epoch": 2.2104228121927236, "grad_norm": 16.5, "learning_rate": 1.6018541491963734e-06, "loss": 0.4262, "step": 4496 }, { "epoch": 2.2109144542772863, "grad_norm": 22.0, "learning_rate": 1.5999886489764143e-06, "loss": 0.4182, "step": 4497 }, { "epoch": 2.2114060963618485, "grad_norm": 20.75, "learning_rate": 1.5981240007625073e-06, "loss": 0.2893, "step": 4498 }, { "epoch": 2.211897738446411, "grad_norm": 39.5, "learning_rate": 1.5962602051024717e-06, "loss": 0.5423, "step": 4499 }, { "epoch": 2.2123893805309733, "grad_norm": 22.0, "learning_rate": 1.5943972625438834e-06, "loss": 0.2271, "step": 4500 }, { "epoch": 2.212881022615536, "grad_norm": 25.5, "learning_rate": 1.59253517363406e-06, "loss": 0.3403, "step": 4501 }, { "epoch": 2.213372664700098, "grad_norm": 22.375, "learning_rate": 1.5906739389200747e-06, "loss": 0.2777, "step": 4502 }, { "epoch": 2.213864306784661, "grad_norm": 35.75, "learning_rate": 1.5888135589487453e-06, "loss": 0.2347, "step": 4503 }, { "epoch": 2.214355948869223, "grad_norm": 27.75, "learning_rate": 1.5869540342666414e-06, "loss": 0.4017, "step": 4504 }, { "epoch": 2.2148475909537857, "grad_norm": 21.5, "learning_rate": 1.5850953654200797e-06, "loss": 0.2837, "step": 4505 }, { "epoch": 2.215339233038348, "grad_norm": 27.875, "learning_rate": 1.5832375529551287e-06, "loss": 0.3543, "step": 4506 }, { "epoch": 2.2158308751229105, "grad_norm": 46.5, "learning_rate": 1.581380597417598e-06, "loss": 0.5189, "step": 4507 }, { "epoch": 2.2163225172074728, "grad_norm": 15.5625, "learning_rate": 1.5795244993530547e-06, "loss": 0.288, "step": 4508 }, { "epoch": 2.2168141592920354, "grad_norm": 32.25, "learning_rate": 1.5776692593068056e-06, "loss": 0.3432, "step": 4509 }, { "epoch": 2.2173058013765976, "grad_norm": 25.125, "learning_rate": 1.5758148778239126e-06, "loss": 0.2262, "step": 4510 }, { "epoch": 2.2177974434611603, "grad_norm": 41.5, "learning_rate": 1.5739613554491765e-06, "loss": 0.2014, "step": 4511 }, { "epoch": 2.218289085545723, "grad_norm": 23.375, "learning_rate": 1.5721086927271576e-06, "loss": 0.1776, "step": 4512 }, { "epoch": 2.218780727630285, "grad_norm": 25.625, "learning_rate": 1.5702568902021532e-06, "loss": 0.2714, "step": 4513 }, { "epoch": 2.2192723697148478, "grad_norm": 29.5, "learning_rate": 1.5684059484182137e-06, "loss": 0.3496, "step": 4514 }, { "epoch": 2.21976401179941, "grad_norm": 13.625, "learning_rate": 1.566555867919133e-06, "loss": 0.1763, "step": 4515 }, { "epoch": 2.2202556538839726, "grad_norm": 16.75, "learning_rate": 1.5647066492484563e-06, "loss": 0.3594, "step": 4516 }, { "epoch": 2.220747295968535, "grad_norm": 13.25, "learning_rate": 1.5628582929494695e-06, "loss": 0.1225, "step": 4517 }, { "epoch": 2.2212389380530975, "grad_norm": 15.375, "learning_rate": 1.5610107995652108e-06, "loss": 0.3167, "step": 4518 }, { "epoch": 2.2217305801376597, "grad_norm": 12.8125, "learning_rate": 1.5591641696384625e-06, "loss": 0.2114, "step": 4519 }, { "epoch": 2.2222222222222223, "grad_norm": 31.25, "learning_rate": 1.5573184037117549e-06, "loss": 0.2622, "step": 4520 }, { "epoch": 2.2227138643067845, "grad_norm": 16.625, "learning_rate": 1.55547350232736e-06, "loss": 0.2137, "step": 4521 }, { "epoch": 2.223205506391347, "grad_norm": 21.0, "learning_rate": 1.5536294660273007e-06, "loss": 0.2457, "step": 4522 }, { "epoch": 2.2236971484759094, "grad_norm": 18.75, "learning_rate": 1.5517862953533447e-06, "loss": 0.2966, "step": 4523 }, { "epoch": 2.224188790560472, "grad_norm": 20.0, "learning_rate": 1.5499439908470025e-06, "loss": 0.3415, "step": 4524 }, { "epoch": 2.2246804326450342, "grad_norm": 16.625, "learning_rate": 1.5481025530495344e-06, "loss": 0.2415, "step": 4525 }, { "epoch": 2.225172074729597, "grad_norm": 13.4375, "learning_rate": 1.5462619825019412e-06, "loss": 0.2556, "step": 4526 }, { "epoch": 2.225663716814159, "grad_norm": 12.3125, "learning_rate": 1.5444222797449733e-06, "loss": 0.1028, "step": 4527 }, { "epoch": 2.2261553588987217, "grad_norm": 28.25, "learning_rate": 1.5425834453191233e-06, "loss": 0.3969, "step": 4528 }, { "epoch": 2.2266470009832844, "grad_norm": 33.25, "learning_rate": 1.5407454797646324e-06, "loss": 0.3691, "step": 4529 }, { "epoch": 2.2271386430678466, "grad_norm": 34.25, "learning_rate": 1.5389083836214808e-06, "loss": 0.4347, "step": 4530 }, { "epoch": 2.2276302851524092, "grad_norm": 18.375, "learning_rate": 1.5370721574293989e-06, "loss": 0.1925, "step": 4531 }, { "epoch": 2.2281219272369714, "grad_norm": 15.3125, "learning_rate": 1.5352368017278566e-06, "loss": 0.1929, "step": 4532 }, { "epoch": 2.228613569321534, "grad_norm": 18.625, "learning_rate": 1.5334023170560713e-06, "loss": 0.3289, "step": 4533 }, { "epoch": 2.2291052114060963, "grad_norm": 11.125, "learning_rate": 1.5315687039530046e-06, "loss": 0.2653, "step": 4534 }, { "epoch": 2.229596853490659, "grad_norm": 31.375, "learning_rate": 1.5297359629573615e-06, "loss": 0.3335, "step": 4535 }, { "epoch": 2.230088495575221, "grad_norm": 20.875, "learning_rate": 1.5279040946075882e-06, "loss": 0.4128, "step": 4536 }, { "epoch": 2.230580137659784, "grad_norm": 16.5, "learning_rate": 1.5260730994418787e-06, "loss": 0.3315, "step": 4537 }, { "epoch": 2.231071779744346, "grad_norm": 23.75, "learning_rate": 1.5242429779981672e-06, "loss": 0.2018, "step": 4538 }, { "epoch": 2.2315634218289087, "grad_norm": 22.0, "learning_rate": 1.5224137308141338e-06, "loss": 0.329, "step": 4539 }, { "epoch": 2.232055063913471, "grad_norm": 27.625, "learning_rate": 1.5205853584271965e-06, "loss": 0.4291, "step": 4540 }, { "epoch": 2.2325467059980335, "grad_norm": 21.375, "learning_rate": 1.518757861374527e-06, "loss": 0.1157, "step": 4541 }, { "epoch": 2.2330383480825957, "grad_norm": 21.625, "learning_rate": 1.516931240193027e-06, "loss": 0.1824, "step": 4542 }, { "epoch": 2.2335299901671584, "grad_norm": 28.875, "learning_rate": 1.5151054954193512e-06, "loss": 0.3199, "step": 4543 }, { "epoch": 2.2340216322517206, "grad_norm": 21.375, "learning_rate": 1.5132806275898889e-06, "loss": 0.3439, "step": 4544 }, { "epoch": 2.234513274336283, "grad_norm": 50.25, "learning_rate": 1.5114566372407788e-06, "loss": 0.7811, "step": 4545 }, { "epoch": 2.2350049164208454, "grad_norm": 23.875, "learning_rate": 1.509633524907895e-06, "loss": 0.3651, "step": 4546 }, { "epoch": 2.235496558505408, "grad_norm": 26.625, "learning_rate": 1.5078112911268603e-06, "loss": 0.1767, "step": 4547 }, { "epoch": 2.2359882005899703, "grad_norm": 30.625, "learning_rate": 1.5059899364330314e-06, "loss": 0.4226, "step": 4548 }, { "epoch": 2.236479842674533, "grad_norm": 18.25, "learning_rate": 1.5041694613615177e-06, "loss": 0.3585, "step": 4549 }, { "epoch": 2.2369714847590956, "grad_norm": 18.0, "learning_rate": 1.502349866447159e-06, "loss": 0.2456, "step": 4550 }, { "epoch": 2.237463126843658, "grad_norm": 73.5, "learning_rate": 1.500531152224545e-06, "loss": 0.7867, "step": 4551 }, { "epoch": 2.2379547689282204, "grad_norm": 22.625, "learning_rate": 1.4987133192279997e-06, "loss": 0.1838, "step": 4552 }, { "epoch": 2.2384464110127826, "grad_norm": 33.5, "learning_rate": 1.4968963679915949e-06, "loss": 0.4133, "step": 4553 }, { "epoch": 2.2389380530973453, "grad_norm": 20.0, "learning_rate": 1.495080299049137e-06, "loss": 0.2973, "step": 4554 }, { "epoch": 2.2394296951819075, "grad_norm": 35.25, "learning_rate": 1.493265112934177e-06, "loss": 0.4397, "step": 4555 }, { "epoch": 2.23992133726647, "grad_norm": 25.5, "learning_rate": 1.4914508101800067e-06, "loss": 0.2759, "step": 4556 }, { "epoch": 2.2404129793510323, "grad_norm": 31.5, "learning_rate": 1.4896373913196591e-06, "loss": 0.3625, "step": 4557 }, { "epoch": 2.240904621435595, "grad_norm": 21.25, "learning_rate": 1.4878248568859017e-06, "loss": 0.254, "step": 4558 }, { "epoch": 2.241396263520157, "grad_norm": 34.5, "learning_rate": 1.4860132074112502e-06, "loss": 0.4788, "step": 4559 }, { "epoch": 2.24188790560472, "grad_norm": 10.8125, "learning_rate": 1.484202443427953e-06, "loss": 0.2345, "step": 4560 }, { "epoch": 2.242379547689282, "grad_norm": 58.5, "learning_rate": 1.4823925654680044e-06, "loss": 0.2254, "step": 4561 }, { "epoch": 2.2428711897738447, "grad_norm": 20.0, "learning_rate": 1.4805835740631353e-06, "loss": 0.4196, "step": 4562 }, { "epoch": 2.243362831858407, "grad_norm": 15.3125, "learning_rate": 1.4787754697448151e-06, "loss": 0.3095, "step": 4563 }, { "epoch": 2.2438544739429696, "grad_norm": 25.75, "learning_rate": 1.4769682530442547e-06, "loss": 0.3622, "step": 4564 }, { "epoch": 2.2443461160275318, "grad_norm": 12.1875, "learning_rate": 1.4751619244924057e-06, "loss": 0.2743, "step": 4565 }, { "epoch": 2.2448377581120944, "grad_norm": 21.25, "learning_rate": 1.4733564846199534e-06, "loss": 0.444, "step": 4566 }, { "epoch": 2.245329400196657, "grad_norm": 12.6875, "learning_rate": 1.471551933957327e-06, "loss": 0.2897, "step": 4567 }, { "epoch": 2.2458210422812193, "grad_norm": 24.875, "learning_rate": 1.4697482730346936e-06, "loss": 0.3421, "step": 4568 }, { "epoch": 2.246312684365782, "grad_norm": 27.375, "learning_rate": 1.467945502381956e-06, "loss": 0.3913, "step": 4569 }, { "epoch": 2.246804326450344, "grad_norm": 45.0, "learning_rate": 1.466143622528758e-06, "loss": 0.4798, "step": 4570 }, { "epoch": 2.2472959685349068, "grad_norm": 25.375, "learning_rate": 1.4643426340044835e-06, "loss": 0.3695, "step": 4571 }, { "epoch": 2.247787610619469, "grad_norm": 24.125, "learning_rate": 1.4625425373382486e-06, "loss": 0.5009, "step": 4572 }, { "epoch": 2.2482792527040316, "grad_norm": 29.625, "learning_rate": 1.4607433330589126e-06, "loss": 0.2735, "step": 4573 }, { "epoch": 2.248770894788594, "grad_norm": 46.5, "learning_rate": 1.4589450216950728e-06, "loss": 0.461, "step": 4574 }, { "epoch": 2.2492625368731565, "grad_norm": 12.6875, "learning_rate": 1.4571476037750593e-06, "loss": 0.2279, "step": 4575 }, { "epoch": 2.2497541789577187, "grad_norm": 18.125, "learning_rate": 1.4553510798269453e-06, "loss": 0.2606, "step": 4576 }, { "epoch": 2.2502458210422813, "grad_norm": 8.5625, "learning_rate": 1.4535554503785347e-06, "loss": 0.1513, "step": 4577 }, { "epoch": 2.2507374631268435, "grad_norm": 12.625, "learning_rate": 1.4517607159573784e-06, "loss": 0.1978, "step": 4578 }, { "epoch": 2.251229105211406, "grad_norm": 15.75, "learning_rate": 1.4499668770907544e-06, "loss": 0.1117, "step": 4579 }, { "epoch": 2.2517207472959684, "grad_norm": 18.875, "learning_rate": 1.4481739343056856e-06, "loss": 0.2667, "step": 4580 }, { "epoch": 2.252212389380531, "grad_norm": 9.625, "learning_rate": 1.446381888128924e-06, "loss": 0.149, "step": 4581 }, { "epoch": 2.2527040314650932, "grad_norm": 19.375, "learning_rate": 1.4445907390869645e-06, "loss": 0.2642, "step": 4582 }, { "epoch": 2.253195673549656, "grad_norm": 18.75, "learning_rate": 1.4428004877060347e-06, "loss": 0.2938, "step": 4583 }, { "epoch": 2.2536873156342185, "grad_norm": 21.375, "learning_rate": 1.4410111345121013e-06, "loss": 0.2121, "step": 4584 }, { "epoch": 2.2541789577187807, "grad_norm": 15.875, "learning_rate": 1.4392226800308619e-06, "loss": 0.2025, "step": 4585 }, { "epoch": 2.254670599803343, "grad_norm": 14.3125, "learning_rate": 1.4374351247877596e-06, "loss": 0.209, "step": 4586 }, { "epoch": 2.2551622418879056, "grad_norm": 20.5, "learning_rate": 1.4356484693079632e-06, "loss": 0.255, "step": 4587 }, { "epoch": 2.2556538839724682, "grad_norm": 30.5, "learning_rate": 1.4338627141163836e-06, "loss": 0.3358, "step": 4588 }, { "epoch": 2.2561455260570304, "grad_norm": 20.25, "learning_rate": 1.432077859737663e-06, "loss": 0.3051, "step": 4589 }, { "epoch": 2.256637168141593, "grad_norm": 14.4375, "learning_rate": 1.4302939066961838e-06, "loss": 0.3494, "step": 4590 }, { "epoch": 2.2571288102261553, "grad_norm": 32.25, "learning_rate": 1.4285108555160574e-06, "loss": 0.2957, "step": 4591 }, { "epoch": 2.257620452310718, "grad_norm": 33.25, "learning_rate": 1.4267287067211348e-06, "loss": 0.2815, "step": 4592 }, { "epoch": 2.25811209439528, "grad_norm": 16.375, "learning_rate": 1.4249474608350018e-06, "loss": 0.1753, "step": 4593 }, { "epoch": 2.258603736479843, "grad_norm": 22.375, "learning_rate": 1.4231671183809786e-06, "loss": 0.3722, "step": 4594 }, { "epoch": 2.259095378564405, "grad_norm": 29.375, "learning_rate": 1.421387679882116e-06, "loss": 0.4897, "step": 4595 }, { "epoch": 2.2595870206489677, "grad_norm": 14.125, "learning_rate": 1.4196091458612055e-06, "loss": 0.0953, "step": 4596 }, { "epoch": 2.26007866273353, "grad_norm": 26.625, "learning_rate": 1.4178315168407674e-06, "loss": 0.415, "step": 4597 }, { "epoch": 2.2605703048180925, "grad_norm": 15.625, "learning_rate": 1.4160547933430607e-06, "loss": 0.3184, "step": 4598 }, { "epoch": 2.2610619469026547, "grad_norm": 31.0, "learning_rate": 1.4142789758900728e-06, "loss": 0.2976, "step": 4599 }, { "epoch": 2.2615535889872174, "grad_norm": 25.125, "learning_rate": 1.4125040650035332e-06, "loss": 0.405, "step": 4600 }, { "epoch": 2.2620452310717796, "grad_norm": 24.625, "learning_rate": 1.410730061204896e-06, "loss": 0.2967, "step": 4601 }, { "epoch": 2.262536873156342, "grad_norm": 20.0, "learning_rate": 1.4089569650153557e-06, "loss": 0.2173, "step": 4602 }, { "epoch": 2.2630285152409044, "grad_norm": 49.5, "learning_rate": 1.407184776955835e-06, "loss": 0.4251, "step": 4603 }, { "epoch": 2.263520157325467, "grad_norm": 17.875, "learning_rate": 1.4054134975469945e-06, "loss": 0.2263, "step": 4604 }, { "epoch": 2.2640117994100297, "grad_norm": 29.5, "learning_rate": 1.4036431273092234e-06, "loss": 0.4641, "step": 4605 }, { "epoch": 2.264503441494592, "grad_norm": 17.25, "learning_rate": 1.4018736667626463e-06, "loss": 0.3414, "step": 4606 }, { "epoch": 2.264995083579154, "grad_norm": 30.125, "learning_rate": 1.400105116427121e-06, "loss": 0.3184, "step": 4607 }, { "epoch": 2.265486725663717, "grad_norm": 20.0, "learning_rate": 1.3983374768222384e-06, "loss": 0.445, "step": 4608 }, { "epoch": 2.2659783677482794, "grad_norm": 20.5, "learning_rate": 1.3965707484673177e-06, "loss": 0.4381, "step": 4609 }, { "epoch": 2.2664700098328416, "grad_norm": 17.875, "learning_rate": 1.3948049318814138e-06, "loss": 0.265, "step": 4610 }, { "epoch": 2.2669616519174043, "grad_norm": 26.125, "learning_rate": 1.3930400275833155e-06, "loss": 0.399, "step": 4611 }, { "epoch": 2.2674532940019665, "grad_norm": 23.5, "learning_rate": 1.3912760360915374e-06, "loss": 0.3006, "step": 4612 }, { "epoch": 2.267944936086529, "grad_norm": 16.625, "learning_rate": 1.3895129579243333e-06, "loss": 0.2844, "step": 4613 }, { "epoch": 2.2684365781710913, "grad_norm": 21.5, "learning_rate": 1.3877507935996822e-06, "loss": 0.3049, "step": 4614 }, { "epoch": 2.268928220255654, "grad_norm": 8.625, "learning_rate": 1.3859895436352982e-06, "loss": 0.1369, "step": 4615 }, { "epoch": 2.269419862340216, "grad_norm": 33.0, "learning_rate": 1.3842292085486267e-06, "loss": 0.2342, "step": 4616 }, { "epoch": 2.269911504424779, "grad_norm": 13.9375, "learning_rate": 1.3824697888568448e-06, "loss": 0.3174, "step": 4617 }, { "epoch": 2.270403146509341, "grad_norm": 41.0, "learning_rate": 1.3807112850768565e-06, "loss": 0.4583, "step": 4618 }, { "epoch": 2.2708947885939037, "grad_norm": 15.5, "learning_rate": 1.378953697725303e-06, "loss": 0.2207, "step": 4619 }, { "epoch": 2.271386430678466, "grad_norm": 20.625, "learning_rate": 1.3771970273185496e-06, "loss": 0.3333, "step": 4620 }, { "epoch": 2.2718780727630286, "grad_norm": 23.25, "learning_rate": 1.3754412743726979e-06, "loss": 0.192, "step": 4621 }, { "epoch": 2.2723697148475908, "grad_norm": 25.875, "learning_rate": 1.3736864394035768e-06, "loss": 0.378, "step": 4622 }, { "epoch": 2.2728613569321534, "grad_norm": 14.0, "learning_rate": 1.371932522926748e-06, "loss": 0.2969, "step": 4623 }, { "epoch": 2.2733529990167156, "grad_norm": 19.375, "learning_rate": 1.3701795254574988e-06, "loss": 0.2066, "step": 4624 }, { "epoch": 2.2738446411012783, "grad_norm": 26.0, "learning_rate": 1.368427447510853e-06, "loss": 0.2494, "step": 4625 }, { "epoch": 2.274336283185841, "grad_norm": 27.5, "learning_rate": 1.3666762896015567e-06, "loss": 0.3192, "step": 4626 }, { "epoch": 2.274827925270403, "grad_norm": 13.125, "learning_rate": 1.364926052244093e-06, "loss": 0.0953, "step": 4627 }, { "epoch": 2.2753195673549658, "grad_norm": 36.5, "learning_rate": 1.3631767359526687e-06, "loss": 0.4447, "step": 4628 }, { "epoch": 2.275811209439528, "grad_norm": 28.25, "learning_rate": 1.3614283412412233e-06, "loss": 0.3924, "step": 4629 }, { "epoch": 2.2763028515240906, "grad_norm": 21.5, "learning_rate": 1.3596808686234253e-06, "loss": 0.3891, "step": 4630 }, { "epoch": 2.276794493608653, "grad_norm": 20.875, "learning_rate": 1.3579343186126726e-06, "loss": 0.1307, "step": 4631 }, { "epoch": 2.2772861356932155, "grad_norm": 19.875, "learning_rate": 1.3561886917220888e-06, "loss": 0.3599, "step": 4632 }, { "epoch": 2.2777777777777777, "grad_norm": 14.3125, "learning_rate": 1.3544439884645315e-06, "loss": 0.2194, "step": 4633 }, { "epoch": 2.2782694198623403, "grad_norm": 31.5, "learning_rate": 1.3527002093525813e-06, "loss": 0.3154, "step": 4634 }, { "epoch": 2.2787610619469025, "grad_norm": 35.75, "learning_rate": 1.3509573548985522e-06, "loss": 0.4267, "step": 4635 }, { "epoch": 2.279252704031465, "grad_norm": 39.25, "learning_rate": 1.3492154256144812e-06, "loss": 0.416, "step": 4636 }, { "epoch": 2.2797443461160274, "grad_norm": 18.75, "learning_rate": 1.3474744220121416e-06, "loss": 0.3132, "step": 4637 }, { "epoch": 2.28023598820059, "grad_norm": 18.125, "learning_rate": 1.3457343446030266e-06, "loss": 0.2606, "step": 4638 }, { "epoch": 2.2807276302851522, "grad_norm": 22.375, "learning_rate": 1.3439951938983623e-06, "loss": 0.2036, "step": 4639 }, { "epoch": 2.281219272369715, "grad_norm": 23.625, "learning_rate": 1.342256970409099e-06, "loss": 0.2468, "step": 4640 }, { "epoch": 2.281710914454277, "grad_norm": 27.0, "learning_rate": 1.3405196746459184e-06, "loss": 0.4256, "step": 4641 }, { "epoch": 2.2822025565388397, "grad_norm": 22.0, "learning_rate": 1.338783307119225e-06, "loss": 0.1702, "step": 4642 }, { "epoch": 2.2826941986234024, "grad_norm": 25.875, "learning_rate": 1.337047868339155e-06, "loss": 0.3449, "step": 4643 }, { "epoch": 2.2831858407079646, "grad_norm": 31.5, "learning_rate": 1.33531335881557e-06, "loss": 0.4494, "step": 4644 }, { "epoch": 2.283677482792527, "grad_norm": 17.0, "learning_rate": 1.3335797790580593e-06, "loss": 0.205, "step": 4645 }, { "epoch": 2.2841691248770895, "grad_norm": 29.375, "learning_rate": 1.3318471295759363e-06, "loss": 0.2741, "step": 4646 }, { "epoch": 2.284660766961652, "grad_norm": 22.125, "learning_rate": 1.3301154108782452e-06, "loss": 0.2681, "step": 4647 }, { "epoch": 2.2851524090462143, "grad_norm": 37.75, "learning_rate": 1.3283846234737524e-06, "loss": 0.3661, "step": 4648 }, { "epoch": 2.285644051130777, "grad_norm": 35.25, "learning_rate": 1.3266547678709543e-06, "loss": 0.5176, "step": 4649 }, { "epoch": 2.286135693215339, "grad_norm": 25.125, "learning_rate": 1.3249258445780728e-06, "loss": 0.2974, "step": 4650 }, { "epoch": 2.286627335299902, "grad_norm": 17.875, "learning_rate": 1.3231978541030534e-06, "loss": 0.2526, "step": 4651 }, { "epoch": 2.287118977384464, "grad_norm": 18.75, "learning_rate": 1.3214707969535703e-06, "loss": 0.2076, "step": 4652 }, { "epoch": 2.2876106194690267, "grad_norm": 36.75, "learning_rate": 1.3197446736370238e-06, "loss": 0.6034, "step": 4653 }, { "epoch": 2.288102261553589, "grad_norm": 20.0, "learning_rate": 1.3180194846605365e-06, "loss": 0.2339, "step": 4654 }, { "epoch": 2.2885939036381515, "grad_norm": 15.5625, "learning_rate": 1.3162952305309595e-06, "loss": 0.192, "step": 4655 }, { "epoch": 2.2890855457227137, "grad_norm": 40.25, "learning_rate": 1.3145719117548698e-06, "loss": 0.4741, "step": 4656 }, { "epoch": 2.2895771878072764, "grad_norm": 20.125, "learning_rate": 1.3128495288385663e-06, "loss": 0.3887, "step": 4657 }, { "epoch": 2.2900688298918386, "grad_norm": 14.9375, "learning_rate": 1.3111280822880747e-06, "loss": 0.2981, "step": 4658 }, { "epoch": 2.2905604719764012, "grad_norm": 33.75, "learning_rate": 1.3094075726091482e-06, "loss": 0.4048, "step": 4659 }, { "epoch": 2.2910521140609634, "grad_norm": 24.875, "learning_rate": 1.3076880003072594e-06, "loss": 0.3333, "step": 4660 }, { "epoch": 2.291543756145526, "grad_norm": 18.625, "learning_rate": 1.3059693658876094e-06, "loss": 0.2713, "step": 4661 }, { "epoch": 2.2920353982300883, "grad_norm": 12.5, "learning_rate": 1.3042516698551245e-06, "loss": 0.2339, "step": 4662 }, { "epoch": 2.292527040314651, "grad_norm": 21.25, "learning_rate": 1.3025349127144503e-06, "loss": 0.2397, "step": 4663 }, { "epoch": 2.2930186823992136, "grad_norm": 27.875, "learning_rate": 1.300819094969963e-06, "loss": 0.3477, "step": 4664 }, { "epoch": 2.293510324483776, "grad_norm": 32.25, "learning_rate": 1.2991042171257572e-06, "loss": 0.3647, "step": 4665 }, { "epoch": 2.2940019665683384, "grad_norm": 13.4375, "learning_rate": 1.297390279685654e-06, "loss": 0.0715, "step": 4666 }, { "epoch": 2.2944936086529006, "grad_norm": 24.0, "learning_rate": 1.2956772831531995e-06, "loss": 0.3897, "step": 4667 }, { "epoch": 2.2949852507374633, "grad_norm": 18.75, "learning_rate": 1.293965228031662e-06, "loss": 0.152, "step": 4668 }, { "epoch": 2.2954768928220255, "grad_norm": 20.25, "learning_rate": 1.292254114824031e-06, "loss": 0.2035, "step": 4669 }, { "epoch": 2.295968534906588, "grad_norm": 38.0, "learning_rate": 1.290543944033024e-06, "loss": 0.2753, "step": 4670 }, { "epoch": 2.2964601769911503, "grad_norm": 27.0, "learning_rate": 1.2888347161610764e-06, "loss": 0.2312, "step": 4671 }, { "epoch": 2.296951819075713, "grad_norm": 24.375, "learning_rate": 1.2871264317103512e-06, "loss": 0.2841, "step": 4672 }, { "epoch": 2.297443461160275, "grad_norm": 19.875, "learning_rate": 1.2854190911827288e-06, "loss": 0.2298, "step": 4673 }, { "epoch": 2.297935103244838, "grad_norm": 25.875, "learning_rate": 1.2837126950798209e-06, "loss": 0.2168, "step": 4674 }, { "epoch": 2.2984267453294, "grad_norm": 20.75, "learning_rate": 1.2820072439029525e-06, "loss": 0.3369, "step": 4675 }, { "epoch": 2.2989183874139627, "grad_norm": 17.125, "learning_rate": 1.2803027381531775e-06, "loss": 0.3087, "step": 4676 }, { "epoch": 2.299410029498525, "grad_norm": 15.6875, "learning_rate": 1.278599178331267e-06, "loss": 0.1733, "step": 4677 }, { "epoch": 2.2999016715830876, "grad_norm": 33.0, "learning_rate": 1.27689656493772e-06, "loss": 0.4831, "step": 4678 }, { "epoch": 2.3003933136676498, "grad_norm": 62.5, "learning_rate": 1.2751948984727503e-06, "loss": 0.4893, "step": 4679 }, { "epoch": 2.3008849557522124, "grad_norm": 13.75, "learning_rate": 1.2734941794362985e-06, "loss": 0.1183, "step": 4680 }, { "epoch": 2.301376597836775, "grad_norm": 11.25, "learning_rate": 1.2717944083280276e-06, "loss": 0.2971, "step": 4681 }, { "epoch": 2.3018682399213373, "grad_norm": 21.875, "learning_rate": 1.27009558564732e-06, "loss": 0.1928, "step": 4682 }, { "epoch": 2.3023598820058995, "grad_norm": 22.0, "learning_rate": 1.2683977118932776e-06, "loss": 0.3442, "step": 4683 }, { "epoch": 2.302851524090462, "grad_norm": 25.125, "learning_rate": 1.266700787564728e-06, "loss": 0.3735, "step": 4684 }, { "epoch": 2.3033431661750248, "grad_norm": 14.0, "learning_rate": 1.2650048131602147e-06, "loss": 0.2474, "step": 4685 }, { "epoch": 2.303834808259587, "grad_norm": 14.125, "learning_rate": 1.2633097891780085e-06, "loss": 0.282, "step": 4686 }, { "epoch": 2.3043264503441496, "grad_norm": 12.6875, "learning_rate": 1.2616157161160932e-06, "loss": 0.1801, "step": 4687 }, { "epoch": 2.304818092428712, "grad_norm": 22.75, "learning_rate": 1.2599225944721792e-06, "loss": 0.4868, "step": 4688 }, { "epoch": 2.3053097345132745, "grad_norm": 22.75, "learning_rate": 1.2582304247436963e-06, "loss": 0.261, "step": 4689 }, { "epoch": 2.3058013765978367, "grad_norm": 28.625, "learning_rate": 1.2565392074277946e-06, "loss": 0.3776, "step": 4690 }, { "epoch": 2.3062930186823993, "grad_norm": 19.75, "learning_rate": 1.2548489430213408e-06, "loss": 0.3645, "step": 4691 }, { "epoch": 2.3067846607669615, "grad_norm": 14.75, "learning_rate": 1.2531596320209277e-06, "loss": 0.2384, "step": 4692 }, { "epoch": 2.307276302851524, "grad_norm": 36.0, "learning_rate": 1.2514712749228614e-06, "loss": 0.3843, "step": 4693 }, { "epoch": 2.3077679449360864, "grad_norm": 28.375, "learning_rate": 1.2497838722231737e-06, "loss": 0.2626, "step": 4694 }, { "epoch": 2.308259587020649, "grad_norm": 13.5, "learning_rate": 1.2480974244176122e-06, "loss": 0.1617, "step": 4695 }, { "epoch": 2.3087512291052112, "grad_norm": 12.6875, "learning_rate": 1.2464119320016472e-06, "loss": 0.3035, "step": 4696 }, { "epoch": 2.309242871189774, "grad_norm": 20.25, "learning_rate": 1.244727395470464e-06, "loss": 0.3324, "step": 4697 }, { "epoch": 2.309734513274336, "grad_norm": 18.875, "learning_rate": 1.2430438153189708e-06, "loss": 0.1514, "step": 4698 }, { "epoch": 2.3102261553588987, "grad_norm": 29.375, "learning_rate": 1.2413611920417923e-06, "loss": 0.4348, "step": 4699 }, { "epoch": 2.310717797443461, "grad_norm": 12.75, "learning_rate": 1.239679526133273e-06, "loss": 0.1829, "step": 4700 }, { "epoch": 2.3112094395280236, "grad_norm": 30.875, "learning_rate": 1.237998818087479e-06, "loss": 0.3729, "step": 4701 }, { "epoch": 2.3117010816125863, "grad_norm": 21.0, "learning_rate": 1.2363190683981888e-06, "loss": 0.2514, "step": 4702 }, { "epoch": 2.3121927236971485, "grad_norm": 19.125, "learning_rate": 1.2346402775589042e-06, "loss": 0.3432, "step": 4703 }, { "epoch": 2.312684365781711, "grad_norm": 18.125, "learning_rate": 1.2329624460628446e-06, "loss": 0.2331, "step": 4704 }, { "epoch": 2.3131760078662733, "grad_norm": 49.25, "learning_rate": 1.2312855744029475e-06, "loss": 0.4924, "step": 4705 }, { "epoch": 2.313667649950836, "grad_norm": 26.625, "learning_rate": 1.229609663071866e-06, "loss": 0.4858, "step": 4706 }, { "epoch": 2.314159292035398, "grad_norm": 12.5, "learning_rate": 1.2279347125619744e-06, "loss": 0.2389, "step": 4707 }, { "epoch": 2.314650934119961, "grad_norm": 12.375, "learning_rate": 1.226260723365361e-06, "loss": 0.2491, "step": 4708 }, { "epoch": 2.315142576204523, "grad_norm": 31.125, "learning_rate": 1.2245876959738372e-06, "loss": 0.4454, "step": 4709 }, { "epoch": 2.3156342182890857, "grad_norm": 11.25, "learning_rate": 1.222915630878923e-06, "loss": 0.2002, "step": 4710 }, { "epoch": 2.316125860373648, "grad_norm": 23.75, "learning_rate": 1.221244528571868e-06, "loss": 0.419, "step": 4711 }, { "epoch": 2.3166175024582105, "grad_norm": 39.5, "learning_rate": 1.2195743895436278e-06, "loss": 0.4679, "step": 4712 }, { "epoch": 2.3171091445427727, "grad_norm": 22.0, "learning_rate": 1.217905214284881e-06, "loss": 0.3052, "step": 4713 }, { "epoch": 2.3176007866273354, "grad_norm": 24.0, "learning_rate": 1.2162370032860193e-06, "loss": 0.288, "step": 4714 }, { "epoch": 2.3180924287118976, "grad_norm": 28.625, "learning_rate": 1.214569757037156e-06, "loss": 0.383, "step": 4715 }, { "epoch": 2.3185840707964602, "grad_norm": 18.625, "learning_rate": 1.2129034760281151e-06, "loss": 0.227, "step": 4716 }, { "epoch": 2.3190757128810224, "grad_norm": 48.75, "learning_rate": 1.2112381607484414e-06, "loss": 0.5094, "step": 4717 }, { "epoch": 2.319567354965585, "grad_norm": 18.625, "learning_rate": 1.2095738116873952e-06, "loss": 0.2745, "step": 4718 }, { "epoch": 2.3200589970501477, "grad_norm": 26.0, "learning_rate": 1.207910429333953e-06, "loss": 0.3295, "step": 4719 }, { "epoch": 2.32055063913471, "grad_norm": 41.75, "learning_rate": 1.2062480141768038e-06, "loss": 0.1996, "step": 4720 }, { "epoch": 2.321042281219272, "grad_norm": 26.5, "learning_rate": 1.2045865667043586e-06, "loss": 0.4118, "step": 4721 }, { "epoch": 2.321533923303835, "grad_norm": 23.625, "learning_rate": 1.2029260874047378e-06, "loss": 0.385, "step": 4722 }, { "epoch": 2.3220255653883974, "grad_norm": 9.4375, "learning_rate": 1.2012665767657825e-06, "loss": 0.151, "step": 4723 }, { "epoch": 2.3225172074729596, "grad_norm": 11.6875, "learning_rate": 1.199608035275044e-06, "loss": 0.1534, "step": 4724 }, { "epoch": 2.3230088495575223, "grad_norm": 19.625, "learning_rate": 1.197950463419797e-06, "loss": 0.3391, "step": 4725 }, { "epoch": 2.3235004916420845, "grad_norm": 41.5, "learning_rate": 1.1962938616870212e-06, "loss": 0.2435, "step": 4726 }, { "epoch": 2.323992133726647, "grad_norm": 19.75, "learning_rate": 1.1946382305634204e-06, "loss": 0.1924, "step": 4727 }, { "epoch": 2.3244837758112094, "grad_norm": 54.25, "learning_rate": 1.192983570535405e-06, "loss": 0.6547, "step": 4728 }, { "epoch": 2.324975417895772, "grad_norm": 42.25, "learning_rate": 1.1913298820891084e-06, "loss": 0.2551, "step": 4729 }, { "epoch": 2.325467059980334, "grad_norm": 43.25, "learning_rate": 1.189677165710371e-06, "loss": 0.4524, "step": 4730 }, { "epoch": 2.325958702064897, "grad_norm": 28.0, "learning_rate": 1.188025421884754e-06, "loss": 0.296, "step": 4731 }, { "epoch": 2.326450344149459, "grad_norm": 13.4375, "learning_rate": 1.1863746510975255e-06, "loss": 0.2646, "step": 4732 }, { "epoch": 2.3269419862340217, "grad_norm": 35.0, "learning_rate": 1.1847248538336775e-06, "loss": 0.3838, "step": 4733 }, { "epoch": 2.327433628318584, "grad_norm": 21.875, "learning_rate": 1.1830760305779069e-06, "loss": 0.3943, "step": 4734 }, { "epoch": 2.3279252704031466, "grad_norm": 41.0, "learning_rate": 1.1814281818146302e-06, "loss": 0.3567, "step": 4735 }, { "epoch": 2.3284169124877088, "grad_norm": 26.625, "learning_rate": 1.1797813080279742e-06, "loss": 0.2811, "step": 4736 }, { "epoch": 2.3289085545722714, "grad_norm": 19.0, "learning_rate": 1.1781354097017804e-06, "loss": 0.3334, "step": 4737 }, { "epoch": 2.3294001966568336, "grad_norm": 24.625, "learning_rate": 1.1764904873196058e-06, "loss": 0.3343, "step": 4738 }, { "epoch": 2.3298918387413963, "grad_norm": 14.8125, "learning_rate": 1.1748465413647166e-06, "loss": 0.1221, "step": 4739 }, { "epoch": 2.330383480825959, "grad_norm": 30.375, "learning_rate": 1.1732035723200953e-06, "loss": 0.5938, "step": 4740 }, { "epoch": 2.330875122910521, "grad_norm": 20.5, "learning_rate": 1.1715615806684379e-06, "loss": 0.2308, "step": 4741 }, { "epoch": 2.3313667649950833, "grad_norm": 19.625, "learning_rate": 1.1699205668921483e-06, "loss": 0.2483, "step": 4742 }, { "epoch": 2.331858407079646, "grad_norm": 22.875, "learning_rate": 1.1682805314733492e-06, "loss": 0.3037, "step": 4743 }, { "epoch": 2.3323500491642086, "grad_norm": 27.375, "learning_rate": 1.1666414748938735e-06, "loss": 0.2632, "step": 4744 }, { "epoch": 2.332841691248771, "grad_norm": 16.5, "learning_rate": 1.1650033976352643e-06, "loss": 0.3557, "step": 4745 }, { "epoch": 2.3333333333333335, "grad_norm": 24.625, "learning_rate": 1.1633663001787797e-06, "loss": 0.3026, "step": 4746 }, { "epoch": 2.3338249754178957, "grad_norm": 15.0625, "learning_rate": 1.1617301830053907e-06, "loss": 0.284, "step": 4747 }, { "epoch": 2.3343166175024583, "grad_norm": 23.625, "learning_rate": 1.1600950465957764e-06, "loss": 0.3666, "step": 4748 }, { "epoch": 2.3348082595870205, "grad_norm": 17.125, "learning_rate": 1.158460891430331e-06, "loss": 0.1686, "step": 4749 }, { "epoch": 2.335299901671583, "grad_norm": 16.375, "learning_rate": 1.1568277179891614e-06, "loss": 0.2493, "step": 4750 }, { "epoch": 2.3357915437561454, "grad_norm": 31.5, "learning_rate": 1.1551955267520814e-06, "loss": 0.2599, "step": 4751 }, { "epoch": 2.336283185840708, "grad_norm": 22.25, "learning_rate": 1.153564318198621e-06, "loss": 0.3086, "step": 4752 }, { "epoch": 2.3367748279252702, "grad_norm": 27.125, "learning_rate": 1.1519340928080181e-06, "loss": 0.1684, "step": 4753 }, { "epoch": 2.337266470009833, "grad_norm": 21.875, "learning_rate": 1.1503048510592236e-06, "loss": 0.2265, "step": 4754 }, { "epoch": 2.337758112094395, "grad_norm": 35.25, "learning_rate": 1.1486765934308994e-06, "loss": 0.4961, "step": 4755 }, { "epoch": 2.3382497541789578, "grad_norm": 21.25, "learning_rate": 1.1470493204014192e-06, "loss": 0.3842, "step": 4756 }, { "epoch": 2.3387413962635204, "grad_norm": 34.75, "learning_rate": 1.1454230324488635e-06, "loss": 0.193, "step": 4757 }, { "epoch": 2.3392330383480826, "grad_norm": 20.125, "learning_rate": 1.1437977300510284e-06, "loss": 0.2886, "step": 4758 }, { "epoch": 2.339724680432645, "grad_norm": 21.75, "learning_rate": 1.1421734136854155e-06, "loss": 0.3816, "step": 4759 }, { "epoch": 2.3402163225172075, "grad_norm": 17.75, "learning_rate": 1.140550083829242e-06, "loss": 0.2014, "step": 4760 }, { "epoch": 2.34070796460177, "grad_norm": 13.5625, "learning_rate": 1.1389277409594288e-06, "loss": 0.2203, "step": 4761 }, { "epoch": 2.3411996066863323, "grad_norm": 22.625, "learning_rate": 1.1373063855526154e-06, "loss": 0.3477, "step": 4762 }, { "epoch": 2.341691248770895, "grad_norm": 29.0, "learning_rate": 1.1356860180851428e-06, "loss": 0.4033, "step": 4763 }, { "epoch": 2.342182890855457, "grad_norm": 13.375, "learning_rate": 1.1340666390330678e-06, "loss": 0.2531, "step": 4764 }, { "epoch": 2.34267453294002, "grad_norm": 21.625, "learning_rate": 1.1324482488721516e-06, "loss": 0.273, "step": 4765 }, { "epoch": 2.343166175024582, "grad_norm": 29.25, "learning_rate": 1.1308308480778702e-06, "loss": 0.3552, "step": 4766 }, { "epoch": 2.3436578171091447, "grad_norm": 20.75, "learning_rate": 1.1292144371254033e-06, "loss": 0.2491, "step": 4767 }, { "epoch": 2.344149459193707, "grad_norm": 13.0, "learning_rate": 1.1275990164896465e-06, "loss": 0.1673, "step": 4768 }, { "epoch": 2.3446411012782695, "grad_norm": 22.375, "learning_rate": 1.1259845866451957e-06, "loss": 0.3199, "step": 4769 }, { "epoch": 2.3451327433628317, "grad_norm": 25.375, "learning_rate": 1.1243711480663669e-06, "loss": 0.3628, "step": 4770 }, { "epoch": 2.3456243854473944, "grad_norm": 26.625, "learning_rate": 1.1227587012271741e-06, "loss": 0.2416, "step": 4771 }, { "epoch": 2.3461160275319566, "grad_norm": 60.5, "learning_rate": 1.1211472466013477e-06, "loss": 0.7791, "step": 4772 }, { "epoch": 2.3466076696165192, "grad_norm": 25.25, "learning_rate": 1.1195367846623205e-06, "loss": 0.1698, "step": 4773 }, { "epoch": 2.3470993117010814, "grad_norm": 14.625, "learning_rate": 1.1179273158832394e-06, "loss": 0.2127, "step": 4774 }, { "epoch": 2.347590953785644, "grad_norm": 24.875, "learning_rate": 1.116318840736954e-06, "loss": 0.1246, "step": 4775 }, { "epoch": 2.3480825958702063, "grad_norm": 17.375, "learning_rate": 1.1147113596960264e-06, "loss": 0.3504, "step": 4776 }, { "epoch": 2.348574237954769, "grad_norm": 38.5, "learning_rate": 1.1131048732327244e-06, "loss": 0.4749, "step": 4777 }, { "epoch": 2.3490658800393316, "grad_norm": 31.0, "learning_rate": 1.1114993818190256e-06, "loss": 0.3176, "step": 4778 }, { "epoch": 2.349557522123894, "grad_norm": 16.125, "learning_rate": 1.1098948859266118e-06, "loss": 0.2267, "step": 4779 }, { "epoch": 2.350049164208456, "grad_norm": 35.25, "learning_rate": 1.1082913860268763e-06, "loss": 0.3602, "step": 4780 }, { "epoch": 2.3505408062930186, "grad_norm": 20.5, "learning_rate": 1.1066888825909148e-06, "loss": 0.3584, "step": 4781 }, { "epoch": 2.3510324483775813, "grad_norm": 37.75, "learning_rate": 1.1050873760895356e-06, "loss": 0.3507, "step": 4782 }, { "epoch": 2.3515240904621435, "grad_norm": 49.75, "learning_rate": 1.1034868669932515e-06, "loss": 0.5245, "step": 4783 }, { "epoch": 2.352015732546706, "grad_norm": 53.5, "learning_rate": 1.1018873557722833e-06, "loss": 0.2338, "step": 4784 }, { "epoch": 2.3525073746312684, "grad_norm": 31.625, "learning_rate": 1.1002888428965554e-06, "loss": 0.2371, "step": 4785 }, { "epoch": 2.352999016715831, "grad_norm": 46.25, "learning_rate": 1.0986913288357046e-06, "loss": 0.5009, "step": 4786 }, { "epoch": 2.353490658800393, "grad_norm": 14.875, "learning_rate": 1.0970948140590675e-06, "loss": 0.3305, "step": 4787 }, { "epoch": 2.353982300884956, "grad_norm": 50.0, "learning_rate": 1.0954992990356925e-06, "loss": 0.3219, "step": 4788 }, { "epoch": 2.354473942969518, "grad_norm": 18.625, "learning_rate": 1.0939047842343334e-06, "loss": 0.2349, "step": 4789 }, { "epoch": 2.3549655850540807, "grad_norm": 22.5, "learning_rate": 1.0923112701234471e-06, "loss": 0.3348, "step": 4790 }, { "epoch": 2.355457227138643, "grad_norm": 47.0, "learning_rate": 1.0907187571711995e-06, "loss": 0.4766, "step": 4791 }, { "epoch": 2.3559488692232056, "grad_norm": 15.9375, "learning_rate": 1.089127245845461e-06, "loss": 0.3081, "step": 4792 }, { "epoch": 2.3564405113077678, "grad_norm": 40.25, "learning_rate": 1.08753673661381e-06, "loss": 0.317, "step": 4793 }, { "epoch": 2.3569321533923304, "grad_norm": 38.75, "learning_rate": 1.0859472299435258e-06, "loss": 0.4205, "step": 4794 }, { "epoch": 2.357423795476893, "grad_norm": 20.125, "learning_rate": 1.0843587263015987e-06, "loss": 0.2478, "step": 4795 }, { "epoch": 2.3579154375614553, "grad_norm": 34.0, "learning_rate": 1.0827712261547189e-06, "loss": 0.3725, "step": 4796 }, { "epoch": 2.3584070796460175, "grad_norm": 30.625, "learning_rate": 1.0811847299692874e-06, "loss": 0.2085, "step": 4797 }, { "epoch": 2.35889872173058, "grad_norm": 24.5, "learning_rate": 1.0795992382114031e-06, "loss": 0.1766, "step": 4798 }, { "epoch": 2.3593903638151428, "grad_norm": 27.5, "learning_rate": 1.078014751346879e-06, "loss": 0.3325, "step": 4799 }, { "epoch": 2.359882005899705, "grad_norm": 39.75, "learning_rate": 1.0764312698412242e-06, "loss": 0.0875, "step": 4800 }, { "epoch": 2.3603736479842676, "grad_norm": 14.4375, "learning_rate": 1.0748487941596592e-06, "loss": 0.3486, "step": 4801 }, { "epoch": 2.36086529006883, "grad_norm": 17.125, "learning_rate": 1.0732673247671027e-06, "loss": 0.235, "step": 4802 }, { "epoch": 2.3613569321533925, "grad_norm": 15.0625, "learning_rate": 1.0716868621281836e-06, "loss": 0.1152, "step": 4803 }, { "epoch": 2.3618485742379547, "grad_norm": 33.5, "learning_rate": 1.0701074067072307e-06, "loss": 0.2954, "step": 4804 }, { "epoch": 2.3623402163225173, "grad_norm": 24.5, "learning_rate": 1.0685289589682785e-06, "loss": 0.2936, "step": 4805 }, { "epoch": 2.3628318584070795, "grad_norm": 31.625, "learning_rate": 1.0669515193750667e-06, "loss": 0.371, "step": 4806 }, { "epoch": 2.363323500491642, "grad_norm": 31.625, "learning_rate": 1.065375088391038e-06, "loss": 0.2981, "step": 4807 }, { "epoch": 2.3638151425762044, "grad_norm": 25.5, "learning_rate": 1.0637996664793367e-06, "loss": 0.1979, "step": 4808 }, { "epoch": 2.364306784660767, "grad_norm": 48.0, "learning_rate": 1.0622252541028147e-06, "loss": 0.3414, "step": 4809 }, { "epoch": 2.3647984267453293, "grad_norm": 27.75, "learning_rate": 1.0606518517240217e-06, "loss": 0.2535, "step": 4810 }, { "epoch": 2.365290068829892, "grad_norm": 30.125, "learning_rate": 1.0590794598052173e-06, "loss": 0.5304, "step": 4811 }, { "epoch": 2.365781710914454, "grad_norm": 26.25, "learning_rate": 1.0575080788083588e-06, "loss": 0.2635, "step": 4812 }, { "epoch": 2.3662733529990168, "grad_norm": 43.5, "learning_rate": 1.0559377091951086e-06, "loss": 0.4009, "step": 4813 }, { "epoch": 2.366764995083579, "grad_norm": 20.125, "learning_rate": 1.0543683514268325e-06, "loss": 0.2978, "step": 4814 }, { "epoch": 2.3672566371681416, "grad_norm": 41.25, "learning_rate": 1.0528000059645994e-06, "loss": 0.4476, "step": 4815 }, { "epoch": 2.3677482792527043, "grad_norm": 16.875, "learning_rate": 1.0512326732691778e-06, "loss": 0.2633, "step": 4816 }, { "epoch": 2.3682399213372665, "grad_norm": 32.75, "learning_rate": 1.0496663538010434e-06, "loss": 0.2805, "step": 4817 }, { "epoch": 2.3687315634218287, "grad_norm": 18.875, "learning_rate": 1.0481010480203683e-06, "loss": 0.3693, "step": 4818 }, { "epoch": 2.3692232055063913, "grad_norm": 10.875, "learning_rate": 1.0465367563870328e-06, "loss": 0.2655, "step": 4819 }, { "epoch": 2.369714847590954, "grad_norm": 44.75, "learning_rate": 1.044973479360613e-06, "loss": 0.3287, "step": 4820 }, { "epoch": 2.370206489675516, "grad_norm": 21.25, "learning_rate": 1.043411217400395e-06, "loss": 0.2374, "step": 4821 }, { "epoch": 2.370698131760079, "grad_norm": 22.375, "learning_rate": 1.0418499709653587e-06, "loss": 0.3004, "step": 4822 }, { "epoch": 2.371189773844641, "grad_norm": 29.375, "learning_rate": 1.0402897405141912e-06, "loss": 0.4207, "step": 4823 }, { "epoch": 2.3716814159292037, "grad_norm": 18.0, "learning_rate": 1.0387305265052762e-06, "loss": 0.321, "step": 4824 }, { "epoch": 2.372173058013766, "grad_norm": 33.0, "learning_rate": 1.0371723293967037e-06, "loss": 0.3006, "step": 4825 }, { "epoch": 2.3726647000983285, "grad_norm": 49.25, "learning_rate": 1.0356151496462623e-06, "loss": 0.3075, "step": 4826 }, { "epoch": 2.3731563421828907, "grad_norm": 23.0, "learning_rate": 1.0340589877114414e-06, "loss": 0.1819, "step": 4827 }, { "epoch": 2.3736479842674534, "grad_norm": 35.0, "learning_rate": 1.0325038440494322e-06, "loss": 0.3177, "step": 4828 }, { "epoch": 2.3741396263520156, "grad_norm": 12.5, "learning_rate": 1.0309497191171283e-06, "loss": 0.2085, "step": 4829 }, { "epoch": 2.3746312684365782, "grad_norm": 11.75, "learning_rate": 1.0293966133711198e-06, "loss": 0.1201, "step": 4830 }, { "epoch": 2.3751229105211404, "grad_norm": 15.125, "learning_rate": 1.0278445272677007e-06, "loss": 0.281, "step": 4831 }, { "epoch": 2.375614552605703, "grad_norm": 16.0, "learning_rate": 1.026293461262867e-06, "loss": 0.272, "step": 4832 }, { "epoch": 2.3761061946902653, "grad_norm": 28.875, "learning_rate": 1.0247434158123092e-06, "loss": 0.4881, "step": 4833 }, { "epoch": 2.376597836774828, "grad_norm": 17.0, "learning_rate": 1.023194391371424e-06, "loss": 0.2139, "step": 4834 }, { "epoch": 2.37708947885939, "grad_norm": 53.25, "learning_rate": 1.0216463883953035e-06, "loss": 0.4871, "step": 4835 }, { "epoch": 2.377581120943953, "grad_norm": 27.875, "learning_rate": 1.0200994073387431e-06, "loss": 0.2068, "step": 4836 }, { "epoch": 2.3780727630285154, "grad_norm": 26.5, "learning_rate": 1.0185534486562356e-06, "loss": 0.2703, "step": 4837 }, { "epoch": 2.3785644051130777, "grad_norm": 29.0, "learning_rate": 1.0170085128019769e-06, "loss": 0.2791, "step": 4838 }, { "epoch": 2.3790560471976403, "grad_norm": 21.125, "learning_rate": 1.0154646002298563e-06, "loss": 0.3964, "step": 4839 }, { "epoch": 2.3795476892822025, "grad_norm": 18.5, "learning_rate": 1.0139217113934697e-06, "loss": 0.3302, "step": 4840 }, { "epoch": 2.380039331366765, "grad_norm": 22.375, "learning_rate": 1.0123798467461056e-06, "loss": 0.2778, "step": 4841 }, { "epoch": 2.3805309734513274, "grad_norm": 10.625, "learning_rate": 1.010839006740756e-06, "loss": 0.1219, "step": 4842 }, { "epoch": 2.38102261553589, "grad_norm": 22.875, "learning_rate": 1.0092991918301104e-06, "loss": 0.3663, "step": 4843 }, { "epoch": 2.381514257620452, "grad_norm": 13.375, "learning_rate": 1.0077604024665585e-06, "loss": 0.2608, "step": 4844 }, { "epoch": 2.382005899705015, "grad_norm": 14.5, "learning_rate": 1.006222639102185e-06, "loss": 0.1169, "step": 4845 }, { "epoch": 2.382497541789577, "grad_norm": 29.875, "learning_rate": 1.0046859021887784e-06, "loss": 0.403, "step": 4846 }, { "epoch": 2.3829891838741397, "grad_norm": 20.625, "learning_rate": 1.0031501921778197e-06, "loss": 0.2996, "step": 4847 }, { "epoch": 2.383480825958702, "grad_norm": 14.1875, "learning_rate": 1.0016155095204944e-06, "loss": 0.334, "step": 4848 }, { "epoch": 2.3839724680432646, "grad_norm": 22.875, "learning_rate": 1.000081854667679e-06, "loss": 0.3669, "step": 4849 }, { "epoch": 2.3844641101278268, "grad_norm": 43.0, "learning_rate": 9.985492280699576e-07, "loss": 0.5433, "step": 4850 }, { "epoch": 2.3849557522123894, "grad_norm": 18.125, "learning_rate": 9.970176301776022e-07, "loss": 0.2506, "step": 4851 }, { "epoch": 2.3854473942969516, "grad_norm": 14.375, "learning_rate": 9.954870614405905e-07, "loss": 0.221, "step": 4852 }, { "epoch": 2.3859390363815143, "grad_norm": 15.1875, "learning_rate": 9.939575223085919e-07, "loss": 0.2876, "step": 4853 }, { "epoch": 2.386430678466077, "grad_norm": 18.25, "learning_rate": 9.924290132309778e-07, "loss": 0.3344, "step": 4854 }, { "epoch": 2.386922320550639, "grad_norm": 25.375, "learning_rate": 9.90901534656813e-07, "loss": 0.3275, "step": 4855 }, { "epoch": 2.3874139626352013, "grad_norm": 47.0, "learning_rate": 9.89375087034864e-07, "loss": 0.5165, "step": 4856 }, { "epoch": 2.387905604719764, "grad_norm": 32.5, "learning_rate": 9.878496708135881e-07, "loss": 0.4482, "step": 4857 }, { "epoch": 2.3883972468043266, "grad_norm": 27.125, "learning_rate": 9.863252864411492e-07, "loss": 0.3187, "step": 4858 }, { "epoch": 2.388888888888889, "grad_norm": 49.0, "learning_rate": 9.848019343653976e-07, "loss": 0.447, "step": 4859 }, { "epoch": 2.3893805309734515, "grad_norm": 29.5, "learning_rate": 9.83279615033888e-07, "loss": 0.4202, "step": 4860 }, { "epoch": 2.3898721730580137, "grad_norm": 26.75, "learning_rate": 9.81758328893866e-07, "loss": 0.3305, "step": 4861 }, { "epoch": 2.3903638151425763, "grad_norm": 37.25, "learning_rate": 9.802380763922798e-07, "loss": 0.2851, "step": 4862 }, { "epoch": 2.3908554572271385, "grad_norm": 33.0, "learning_rate": 9.787188579757669e-07, "loss": 0.2548, "step": 4863 }, { "epoch": 2.391347099311701, "grad_norm": 14.9375, "learning_rate": 9.772006740906662e-07, "loss": 0.1807, "step": 4864 }, { "epoch": 2.3918387413962634, "grad_norm": 46.25, "learning_rate": 9.756835251830115e-07, "loss": 0.2919, "step": 4865 }, { "epoch": 2.392330383480826, "grad_norm": 54.75, "learning_rate": 9.74167411698533e-07, "loss": 0.5544, "step": 4866 }, { "epoch": 2.3928220255653883, "grad_norm": 37.75, "learning_rate": 9.726523340826534e-07, "loss": 0.2407, "step": 4867 }, { "epoch": 2.393313667649951, "grad_norm": 12.5, "learning_rate": 9.71138292780496e-07, "loss": 0.125, "step": 4868 }, { "epoch": 2.393805309734513, "grad_norm": 37.5, "learning_rate": 9.696252882368743e-07, "loss": 0.2054, "step": 4869 }, { "epoch": 2.3942969518190758, "grad_norm": 19.25, "learning_rate": 9.681133208963018e-07, "loss": 0.309, "step": 4870 }, { "epoch": 2.394788593903638, "grad_norm": 13.3125, "learning_rate": 9.666023912029852e-07, "loss": 0.2161, "step": 4871 }, { "epoch": 2.3952802359882006, "grad_norm": 17.875, "learning_rate": 9.650924996008279e-07, "loss": 0.1458, "step": 4872 }, { "epoch": 2.395771878072763, "grad_norm": 27.875, "learning_rate": 9.635836465334244e-07, "loss": 0.394, "step": 4873 }, { "epoch": 2.3962635201573255, "grad_norm": 11.5, "learning_rate": 9.620758324440696e-07, "loss": 0.2011, "step": 4874 }, { "epoch": 2.396755162241888, "grad_norm": 16.25, "learning_rate": 9.605690577757473e-07, "loss": 0.341, "step": 4875 }, { "epoch": 2.3972468043264503, "grad_norm": 19.75, "learning_rate": 9.590633229711408e-07, "loss": 0.1872, "step": 4876 }, { "epoch": 2.397738446411013, "grad_norm": 29.25, "learning_rate": 9.575586284726266e-07, "loss": 0.2136, "step": 4877 }, { "epoch": 2.398230088495575, "grad_norm": 65.0, "learning_rate": 9.56054974722273e-07, "loss": 0.5728, "step": 4878 }, { "epoch": 2.398721730580138, "grad_norm": 44.25, "learning_rate": 9.545523621618457e-07, "loss": 0.2179, "step": 4879 }, { "epoch": 2.3992133726647, "grad_norm": 21.875, "learning_rate": 9.530507912328029e-07, "loss": 0.2116, "step": 4880 }, { "epoch": 2.3997050147492627, "grad_norm": 24.75, "learning_rate": 9.515502623762985e-07, "loss": 0.2843, "step": 4881 }, { "epoch": 2.400196656833825, "grad_norm": 39.25, "learning_rate": 9.500507760331772e-07, "loss": 0.3809, "step": 4882 }, { "epoch": 2.4006882989183875, "grad_norm": 72.0, "learning_rate": 9.485523326439806e-07, "loss": 0.5112, "step": 4883 }, { "epoch": 2.4011799410029497, "grad_norm": 25.5, "learning_rate": 9.470549326489411e-07, "loss": 0.4781, "step": 4884 }, { "epoch": 2.4016715830875124, "grad_norm": 26.625, "learning_rate": 9.455585764879873e-07, "loss": 0.2615, "step": 4885 }, { "epoch": 2.4021632251720746, "grad_norm": 23.0, "learning_rate": 9.440632646007364e-07, "loss": 0.1736, "step": 4886 }, { "epoch": 2.4026548672566372, "grad_norm": 25.5, "learning_rate": 9.425689974265067e-07, "loss": 0.2406, "step": 4887 }, { "epoch": 2.4031465093411994, "grad_norm": 12.125, "learning_rate": 9.410757754043024e-07, "loss": 0.2946, "step": 4888 }, { "epoch": 2.403638151425762, "grad_norm": 28.0, "learning_rate": 9.395835989728246e-07, "loss": 0.3773, "step": 4889 }, { "epoch": 2.4041297935103243, "grad_norm": 23.5, "learning_rate": 9.380924685704636e-07, "loss": 0.2847, "step": 4890 }, { "epoch": 2.404621435594887, "grad_norm": 18.5, "learning_rate": 9.366023846353075e-07, "loss": 0.2785, "step": 4891 }, { "epoch": 2.4051130776794496, "grad_norm": 19.125, "learning_rate": 9.351133476051311e-07, "loss": 0.4025, "step": 4892 }, { "epoch": 2.405604719764012, "grad_norm": 24.125, "learning_rate": 9.336253579174075e-07, "loss": 0.3978, "step": 4893 }, { "epoch": 2.406096361848574, "grad_norm": 6.96875, "learning_rate": 9.321384160092954e-07, "loss": 0.173, "step": 4894 }, { "epoch": 2.4065880039331367, "grad_norm": 30.5, "learning_rate": 9.306525223176545e-07, "loss": 0.2825, "step": 4895 }, { "epoch": 2.4070796460176993, "grad_norm": 47.25, "learning_rate": 9.291676772790275e-07, "loss": 0.3395, "step": 4896 }, { "epoch": 2.4075712881022615, "grad_norm": 31.875, "learning_rate": 9.276838813296555e-07, "loss": 0.4372, "step": 4897 }, { "epoch": 2.408062930186824, "grad_norm": 21.875, "learning_rate": 9.262011349054662e-07, "loss": 0.1939, "step": 4898 }, { "epoch": 2.4085545722713864, "grad_norm": 14.0625, "learning_rate": 9.247194384420852e-07, "loss": 0.247, "step": 4899 }, { "epoch": 2.409046214355949, "grad_norm": 13.875, "learning_rate": 9.232387923748224e-07, "loss": 0.1682, "step": 4900 }, { "epoch": 2.409537856440511, "grad_norm": 37.75, "learning_rate": 9.21759197138685e-07, "loss": 0.4653, "step": 4901 }, { "epoch": 2.410029498525074, "grad_norm": 20.875, "learning_rate": 9.202806531683687e-07, "loss": 0.2326, "step": 4902 }, { "epoch": 2.410521140609636, "grad_norm": 17.625, "learning_rate": 9.188031608982617e-07, "loss": 0.2696, "step": 4903 }, { "epoch": 2.4110127826941987, "grad_norm": 28.125, "learning_rate": 9.17326720762441e-07, "loss": 0.2552, "step": 4904 }, { "epoch": 2.411504424778761, "grad_norm": 21.125, "learning_rate": 9.158513331946779e-07, "loss": 0.2704, "step": 4905 }, { "epoch": 2.4119960668633236, "grad_norm": 22.375, "learning_rate": 9.143769986284298e-07, "loss": 0.2614, "step": 4906 }, { "epoch": 2.412487708947886, "grad_norm": 25.375, "learning_rate": 9.129037174968503e-07, "loss": 0.3808, "step": 4907 }, { "epoch": 2.4129793510324484, "grad_norm": 43.5, "learning_rate": 9.114314902327765e-07, "loss": 0.5496, "step": 4908 }, { "epoch": 2.4134709931170106, "grad_norm": 8.875, "learning_rate": 9.099603172687449e-07, "loss": 0.1419, "step": 4909 }, { "epoch": 2.4139626352015733, "grad_norm": 39.75, "learning_rate": 9.084901990369739e-07, "loss": 0.3339, "step": 4910 }, { "epoch": 2.4144542772861355, "grad_norm": 12.9375, "learning_rate": 9.070211359693785e-07, "loss": 0.2474, "step": 4911 }, { "epoch": 2.414945919370698, "grad_norm": 24.625, "learning_rate": 9.055531284975568e-07, "loss": 0.2739, "step": 4912 }, { "epoch": 2.415437561455261, "grad_norm": 20.25, "learning_rate": 9.040861770528043e-07, "loss": 0.354, "step": 4913 }, { "epoch": 2.415929203539823, "grad_norm": 20.0, "learning_rate": 9.026202820660995e-07, "loss": 0.3549, "step": 4914 }, { "epoch": 2.4164208456243856, "grad_norm": 13.0, "learning_rate": 9.011554439681157e-07, "loss": 0.2042, "step": 4915 }, { "epoch": 2.416912487708948, "grad_norm": 42.5, "learning_rate": 8.996916631892121e-07, "loss": 0.4097, "step": 4916 }, { "epoch": 2.4174041297935105, "grad_norm": 11.6875, "learning_rate": 8.982289401594414e-07, "loss": 0.1952, "step": 4917 }, { "epoch": 2.4178957718780727, "grad_norm": 49.5, "learning_rate": 8.967672753085402e-07, "loss": 0.3863, "step": 4918 }, { "epoch": 2.4183874139626353, "grad_norm": 17.0, "learning_rate": 8.953066690659375e-07, "loss": 0.2129, "step": 4919 }, { "epoch": 2.4188790560471976, "grad_norm": 15.5, "learning_rate": 8.938471218607526e-07, "loss": 0.1469, "step": 4920 }, { "epoch": 2.41937069813176, "grad_norm": 28.625, "learning_rate": 8.923886341217884e-07, "loss": 0.3319, "step": 4921 }, { "epoch": 2.4198623402163224, "grad_norm": 20.875, "learning_rate": 8.909312062775437e-07, "loss": 0.325, "step": 4922 }, { "epoch": 2.420353982300885, "grad_norm": 17.25, "learning_rate": 8.894748387561985e-07, "loss": 0.1891, "step": 4923 }, { "epoch": 2.4208456243854473, "grad_norm": 16.375, "learning_rate": 8.88019531985627e-07, "loss": 0.3471, "step": 4924 }, { "epoch": 2.42133726647001, "grad_norm": 17.25, "learning_rate": 8.865652863933888e-07, "loss": 0.2553, "step": 4925 }, { "epoch": 2.421828908554572, "grad_norm": 77.5, "learning_rate": 8.851121024067344e-07, "loss": 0.5306, "step": 4926 }, { "epoch": 2.4223205506391348, "grad_norm": 29.75, "learning_rate": 8.83659980452598e-07, "loss": 0.4039, "step": 4927 }, { "epoch": 2.422812192723697, "grad_norm": 18.5, "learning_rate": 8.822089209576067e-07, "loss": 0.3501, "step": 4928 }, { "epoch": 2.4233038348082596, "grad_norm": 13.0, "learning_rate": 8.807589243480711e-07, "loss": 0.1217, "step": 4929 }, { "epoch": 2.4237954768928223, "grad_norm": 43.5, "learning_rate": 8.793099910499924e-07, "loss": 0.4579, "step": 4930 }, { "epoch": 2.4242871189773845, "grad_norm": 18.375, "learning_rate": 8.778621214890586e-07, "loss": 0.265, "step": 4931 }, { "epoch": 2.4247787610619467, "grad_norm": 19.875, "learning_rate": 8.764153160906464e-07, "loss": 0.1045, "step": 4932 }, { "epoch": 2.4252704031465093, "grad_norm": 22.875, "learning_rate": 8.749695752798162e-07, "loss": 0.2409, "step": 4933 }, { "epoch": 2.425762045231072, "grad_norm": 26.625, "learning_rate": 8.735248994813203e-07, "loss": 0.4881, "step": 4934 }, { "epoch": 2.426253687315634, "grad_norm": 35.25, "learning_rate": 8.720812891195938e-07, "loss": 0.3586, "step": 4935 }, { "epoch": 2.426745329400197, "grad_norm": 37.25, "learning_rate": 8.706387446187629e-07, "loss": 0.5206, "step": 4936 }, { "epoch": 2.427236971484759, "grad_norm": 11.8125, "learning_rate": 8.691972664026364e-07, "loss": 0.3039, "step": 4937 }, { "epoch": 2.4277286135693217, "grad_norm": 29.625, "learning_rate": 8.677568548947127e-07, "loss": 0.3744, "step": 4938 }, { "epoch": 2.428220255653884, "grad_norm": 39.75, "learning_rate": 8.663175105181768e-07, "loss": 0.2317, "step": 4939 }, { "epoch": 2.4287118977384465, "grad_norm": 21.625, "learning_rate": 8.648792336959004e-07, "loss": 0.2369, "step": 4940 }, { "epoch": 2.4292035398230087, "grad_norm": 29.125, "learning_rate": 8.63442024850438e-07, "loss": 0.379, "step": 4941 }, { "epoch": 2.4296951819075714, "grad_norm": 28.875, "learning_rate": 8.620058844040357e-07, "loss": 0.3422, "step": 4942 }, { "epoch": 2.4301868239921336, "grad_norm": 10.1875, "learning_rate": 8.605708127786207e-07, "loss": 0.1888, "step": 4943 }, { "epoch": 2.4306784660766962, "grad_norm": 60.25, "learning_rate": 8.591368103958106e-07, "loss": 0.4906, "step": 4944 }, { "epoch": 2.4311701081612584, "grad_norm": 18.625, "learning_rate": 8.577038776769036e-07, "loss": 0.2321, "step": 4945 }, { "epoch": 2.431661750245821, "grad_norm": 19.25, "learning_rate": 8.562720150428912e-07, "loss": 0.3535, "step": 4946 }, { "epoch": 2.4321533923303833, "grad_norm": 26.5, "learning_rate": 8.548412229144425e-07, "loss": 0.2616, "step": 4947 }, { "epoch": 2.432645034414946, "grad_norm": 33.0, "learning_rate": 8.534115017119181e-07, "loss": 0.2864, "step": 4948 }, { "epoch": 2.433136676499508, "grad_norm": 27.0, "learning_rate": 8.519828518553594e-07, "loss": 0.4036, "step": 4949 }, { "epoch": 2.433628318584071, "grad_norm": 22.125, "learning_rate": 8.505552737644972e-07, "loss": 0.4001, "step": 4950 }, { "epoch": 2.4341199606686335, "grad_norm": 39.0, "learning_rate": 8.491287678587438e-07, "loss": 0.5471, "step": 4951 }, { "epoch": 2.4346116027531957, "grad_norm": 34.25, "learning_rate": 8.477033345571983e-07, "loss": 0.3129, "step": 4952 }, { "epoch": 2.435103244837758, "grad_norm": 27.25, "learning_rate": 8.462789742786458e-07, "loss": 0.3287, "step": 4953 }, { "epoch": 2.4355948869223205, "grad_norm": 26.25, "learning_rate": 8.448556874415545e-07, "loss": 0.3599, "step": 4954 }, { "epoch": 2.436086529006883, "grad_norm": 20.25, "learning_rate": 8.434334744640764e-07, "loss": 0.4104, "step": 4955 }, { "epoch": 2.4365781710914454, "grad_norm": 19.875, "learning_rate": 8.420123357640513e-07, "loss": 0.1355, "step": 4956 }, { "epoch": 2.437069813176008, "grad_norm": 18.0, "learning_rate": 8.405922717589982e-07, "loss": 0.4274, "step": 4957 }, { "epoch": 2.43756145526057, "grad_norm": 24.875, "learning_rate": 8.391732828661259e-07, "loss": 0.3449, "step": 4958 }, { "epoch": 2.438053097345133, "grad_norm": 22.5, "learning_rate": 8.37755369502325e-07, "loss": 0.174, "step": 4959 }, { "epoch": 2.438544739429695, "grad_norm": 28.25, "learning_rate": 8.36338532084168e-07, "loss": 0.3484, "step": 4960 }, { "epoch": 2.4390363815142577, "grad_norm": 17.75, "learning_rate": 8.349227710279148e-07, "loss": 0.2335, "step": 4961 }, { "epoch": 2.43952802359882, "grad_norm": 25.75, "learning_rate": 8.335080867495084e-07, "loss": 0.3848, "step": 4962 }, { "epoch": 2.4400196656833826, "grad_norm": 31.0, "learning_rate": 8.320944796645718e-07, "loss": 0.429, "step": 4963 }, { "epoch": 2.440511307767945, "grad_norm": 33.25, "learning_rate": 8.306819501884161e-07, "loss": 0.3819, "step": 4964 }, { "epoch": 2.4410029498525074, "grad_norm": 14.0625, "learning_rate": 8.292704987360356e-07, "loss": 0.2933, "step": 4965 }, { "epoch": 2.4414945919370696, "grad_norm": 15.5, "learning_rate": 8.27860125722103e-07, "loss": 0.2673, "step": 4966 }, { "epoch": 2.4419862340216323, "grad_norm": 13.0, "learning_rate": 8.264508315609792e-07, "loss": 0.1063, "step": 4967 }, { "epoch": 2.442477876106195, "grad_norm": 35.5, "learning_rate": 8.250426166667067e-07, "loss": 0.3479, "step": 4968 }, { "epoch": 2.442969518190757, "grad_norm": 38.0, "learning_rate": 8.236354814530111e-07, "loss": 0.3536, "step": 4969 }, { "epoch": 2.4434611602753193, "grad_norm": 18.125, "learning_rate": 8.222294263332983e-07, "loss": 0.3963, "step": 4970 }, { "epoch": 2.443952802359882, "grad_norm": 25.5, "learning_rate": 8.208244517206618e-07, "loss": 0.2798, "step": 4971 }, { "epoch": 2.4444444444444446, "grad_norm": 40.75, "learning_rate": 8.194205580278714e-07, "loss": 0.5682, "step": 4972 }, { "epoch": 2.444936086529007, "grad_norm": 14.0, "learning_rate": 8.180177456673862e-07, "loss": 0.3536, "step": 4973 }, { "epoch": 2.4454277286135695, "grad_norm": 36.5, "learning_rate": 8.166160150513396e-07, "loss": 0.2205, "step": 4974 }, { "epoch": 2.4459193706981317, "grad_norm": 16.625, "learning_rate": 8.15215366591557e-07, "loss": 0.2426, "step": 4975 }, { "epoch": 2.4464110127826943, "grad_norm": 38.75, "learning_rate": 8.138158006995364e-07, "loss": 0.3082, "step": 4976 }, { "epoch": 2.4469026548672566, "grad_norm": 20.875, "learning_rate": 8.124173177864653e-07, "loss": 0.2474, "step": 4977 }, { "epoch": 2.447394296951819, "grad_norm": 12.1875, "learning_rate": 8.11019918263206e-07, "loss": 0.0963, "step": 4978 }, { "epoch": 2.4478859390363814, "grad_norm": 7.96875, "learning_rate": 8.096236025403098e-07, "loss": 0.141, "step": 4979 }, { "epoch": 2.448377581120944, "grad_norm": 23.875, "learning_rate": 8.082283710280029e-07, "loss": 0.2824, "step": 4980 }, { "epoch": 2.4488692232055063, "grad_norm": 10.875, "learning_rate": 8.068342241361982e-07, "loss": 0.2629, "step": 4981 }, { "epoch": 2.449360865290069, "grad_norm": 15.1875, "learning_rate": 8.054411622744845e-07, "loss": 0.1597, "step": 4982 }, { "epoch": 2.449852507374631, "grad_norm": 42.5, "learning_rate": 8.040491858521401e-07, "loss": 0.644, "step": 4983 }, { "epoch": 2.4503441494591938, "grad_norm": 13.25, "learning_rate": 8.02658295278115e-07, "loss": 0.1772, "step": 4984 }, { "epoch": 2.450835791543756, "grad_norm": 22.75, "learning_rate": 8.012684909610477e-07, "loss": 0.2587, "step": 4985 }, { "epoch": 2.4513274336283186, "grad_norm": 15.25, "learning_rate": 7.998797733092513e-07, "loss": 0.2784, "step": 4986 }, { "epoch": 2.451819075712881, "grad_norm": 12.375, "learning_rate": 7.984921427307262e-07, "loss": 0.16, "step": 4987 }, { "epoch": 2.4523107177974435, "grad_norm": 21.625, "learning_rate": 7.971055996331464e-07, "loss": 0.3654, "step": 4988 }, { "epoch": 2.452802359882006, "grad_norm": 28.0, "learning_rate": 7.957201444238725e-07, "loss": 0.3503, "step": 4989 }, { "epoch": 2.4532940019665683, "grad_norm": 19.5, "learning_rate": 7.943357775099419e-07, "loss": 0.3666, "step": 4990 }, { "epoch": 2.4537856440511305, "grad_norm": 33.0, "learning_rate": 7.929524992980749e-07, "loss": 0.359, "step": 4991 }, { "epoch": 2.454277286135693, "grad_norm": 36.0, "learning_rate": 7.915703101946678e-07, "loss": 0.5527, "step": 4992 }, { "epoch": 2.454768928220256, "grad_norm": 53.5, "learning_rate": 7.901892106058028e-07, "loss": 0.3705, "step": 4993 }, { "epoch": 2.455260570304818, "grad_norm": 15.125, "learning_rate": 7.888092009372354e-07, "loss": 0.2373, "step": 4994 }, { "epoch": 2.4557522123893807, "grad_norm": 14.5625, "learning_rate": 7.874302815944066e-07, "loss": 0.3543, "step": 4995 }, { "epoch": 2.456243854473943, "grad_norm": 28.875, "learning_rate": 7.860524529824329e-07, "loss": 0.2562, "step": 4996 }, { "epoch": 2.4567354965585055, "grad_norm": 16.75, "learning_rate": 7.846757155061125e-07, "loss": 0.2803, "step": 4997 }, { "epoch": 2.4572271386430677, "grad_norm": 26.875, "learning_rate": 7.83300069569924e-07, "loss": 0.2994, "step": 4998 }, { "epoch": 2.4577187807276304, "grad_norm": 21.75, "learning_rate": 7.81925515578024e-07, "loss": 0.2422, "step": 4999 }, { "epoch": 2.4582104228121926, "grad_norm": 29.0, "learning_rate": 7.805520539342458e-07, "loss": 0.4639, "step": 5000 }, { "epoch": 2.4582104228121926, "eval_loss": 0.375171422958374, "eval_runtime": 66.3758, "eval_samples_per_second": 122.575, "eval_spearman": 0.590510224765289, "eval_steps_per_second": 15.322, "step": 5000 }, { "epoch": 2.4587020648967552, "grad_norm": 18.625, "learning_rate": 7.791796850421074e-07, "loss": 0.231, "step": 5001 }, { "epoch": 2.4591937069813175, "grad_norm": 29.0, "learning_rate": 7.778084093048001e-07, "loss": 0.4129, "step": 5002 }, { "epoch": 2.45968534906588, "grad_norm": 35.25, "learning_rate": 7.764382271251973e-07, "loss": 0.3314, "step": 5003 }, { "epoch": 2.4601769911504423, "grad_norm": 14.875, "learning_rate": 7.750691389058502e-07, "loss": 0.1699, "step": 5004 }, { "epoch": 2.460668633235005, "grad_norm": 18.75, "learning_rate": 7.737011450489908e-07, "loss": 0.2914, "step": 5005 }, { "epoch": 2.4611602753195676, "grad_norm": 26.625, "learning_rate": 7.723342459565246e-07, "loss": 0.2098, "step": 5006 }, { "epoch": 2.46165191740413, "grad_norm": 10.75, "learning_rate": 7.709684420300398e-07, "loss": 0.2036, "step": 5007 }, { "epoch": 2.462143559488692, "grad_norm": 18.5, "learning_rate": 7.696037336708023e-07, "loss": 0.2516, "step": 5008 }, { "epoch": 2.4626352015732547, "grad_norm": 13.8125, "learning_rate": 7.682401212797531e-07, "loss": 0.2228, "step": 5009 }, { "epoch": 2.4631268436578173, "grad_norm": 48.25, "learning_rate": 7.668776052575157e-07, "loss": 0.3807, "step": 5010 }, { "epoch": 2.4636184857423795, "grad_norm": 30.375, "learning_rate": 7.655161860043873e-07, "loss": 0.1916, "step": 5011 }, { "epoch": 2.464110127826942, "grad_norm": 46.0, "learning_rate": 7.641558639203455e-07, "loss": 0.4278, "step": 5012 }, { "epoch": 2.4646017699115044, "grad_norm": 40.0, "learning_rate": 7.627966394050445e-07, "loss": 0.3878, "step": 5013 }, { "epoch": 2.465093411996067, "grad_norm": 21.25, "learning_rate": 7.61438512857818e-07, "loss": 0.3012, "step": 5014 }, { "epoch": 2.4655850540806292, "grad_norm": 29.0, "learning_rate": 7.600814846776733e-07, "loss": 0.2511, "step": 5015 }, { "epoch": 2.466076696165192, "grad_norm": 18.375, "learning_rate": 7.587255552632989e-07, "loss": 0.3069, "step": 5016 }, { "epoch": 2.466568338249754, "grad_norm": 20.5, "learning_rate": 7.573707250130573e-07, "loss": 0.4502, "step": 5017 }, { "epoch": 2.4670599803343167, "grad_norm": 19.75, "learning_rate": 7.560169943249909e-07, "loss": 0.3426, "step": 5018 }, { "epoch": 2.467551622418879, "grad_norm": 36.0, "learning_rate": 7.546643635968152e-07, "loss": 0.3899, "step": 5019 }, { "epoch": 2.4680432645034416, "grad_norm": 25.125, "learning_rate": 7.533128332259289e-07, "loss": 0.3362, "step": 5020 }, { "epoch": 2.468534906588004, "grad_norm": 31.375, "learning_rate": 7.519624036094002e-07, "loss": 0.2439, "step": 5021 }, { "epoch": 2.4690265486725664, "grad_norm": 23.625, "learning_rate": 7.506130751439803e-07, "loss": 0.336, "step": 5022 }, { "epoch": 2.4695181907571286, "grad_norm": 34.0, "learning_rate": 7.492648482260906e-07, "loss": 0.3481, "step": 5023 }, { "epoch": 2.4700098328416913, "grad_norm": 13.0, "learning_rate": 7.479177232518352e-07, "loss": 0.2776, "step": 5024 }, { "epoch": 2.4705014749262535, "grad_norm": 26.125, "learning_rate": 7.465717006169886e-07, "loss": 0.2744, "step": 5025 }, { "epoch": 2.470993117010816, "grad_norm": 27.0, "learning_rate": 7.452267807170059e-07, "loss": 0.3364, "step": 5026 }, { "epoch": 2.471484759095379, "grad_norm": 58.0, "learning_rate": 7.438829639470161e-07, "loss": 0.3776, "step": 5027 }, { "epoch": 2.471976401179941, "grad_norm": 36.5, "learning_rate": 7.425402507018252e-07, "loss": 0.413, "step": 5028 }, { "epoch": 2.472468043264503, "grad_norm": 8.0625, "learning_rate": 7.411986413759135e-07, "loss": 0.1658, "step": 5029 }, { "epoch": 2.472959685349066, "grad_norm": 21.125, "learning_rate": 7.398581363634388e-07, "loss": 0.227, "step": 5030 }, { "epoch": 2.4734513274336285, "grad_norm": 9.3125, "learning_rate": 7.385187360582313e-07, "loss": 0.0947, "step": 5031 }, { "epoch": 2.4739429695181907, "grad_norm": 24.25, "learning_rate": 7.371804408538022e-07, "loss": 0.2331, "step": 5032 }, { "epoch": 2.4744346116027534, "grad_norm": 32.25, "learning_rate": 7.3584325114333e-07, "loss": 0.2322, "step": 5033 }, { "epoch": 2.4749262536873156, "grad_norm": 13.8125, "learning_rate": 7.345071673196785e-07, "loss": 0.1734, "step": 5034 }, { "epoch": 2.475417895771878, "grad_norm": 27.375, "learning_rate": 7.331721897753771e-07, "loss": 0.3474, "step": 5035 }, { "epoch": 2.4759095378564404, "grad_norm": 22.125, "learning_rate": 7.31838318902637e-07, "loss": 0.2839, "step": 5036 }, { "epoch": 2.476401179941003, "grad_norm": 15.1875, "learning_rate": 7.305055550933393e-07, "loss": 0.3241, "step": 5037 }, { "epoch": 2.4768928220255653, "grad_norm": 21.625, "learning_rate": 7.291738987390434e-07, "loss": 0.2504, "step": 5038 }, { "epoch": 2.477384464110128, "grad_norm": 22.375, "learning_rate": 7.278433502309807e-07, "loss": 0.3032, "step": 5039 }, { "epoch": 2.47787610619469, "grad_norm": 39.0, "learning_rate": 7.265139099600606e-07, "loss": 0.3772, "step": 5040 }, { "epoch": 2.4783677482792528, "grad_norm": 21.375, "learning_rate": 7.251855783168611e-07, "loss": 0.3388, "step": 5041 }, { "epoch": 2.478859390363815, "grad_norm": 35.75, "learning_rate": 7.238583556916425e-07, "loss": 0.3245, "step": 5042 }, { "epoch": 2.4793510324483776, "grad_norm": 13.9375, "learning_rate": 7.22532242474332e-07, "loss": 0.237, "step": 5043 }, { "epoch": 2.47984267453294, "grad_norm": 35.75, "learning_rate": 7.212072390545353e-07, "loss": 0.2125, "step": 5044 }, { "epoch": 2.4803343166175025, "grad_norm": 33.0, "learning_rate": 7.198833458215287e-07, "loss": 0.4184, "step": 5045 }, { "epoch": 2.4808259587020647, "grad_norm": 21.625, "learning_rate": 7.185605631642654e-07, "loss": 0.3315, "step": 5046 }, { "epoch": 2.4813176007866273, "grad_norm": 12.75, "learning_rate": 7.172388914713722e-07, "loss": 0.3697, "step": 5047 }, { "epoch": 2.48180924287119, "grad_norm": 13.9375, "learning_rate": 7.159183311311458e-07, "loss": 0.2177, "step": 5048 }, { "epoch": 2.482300884955752, "grad_norm": 20.625, "learning_rate": 7.1459888253156e-07, "loss": 0.2642, "step": 5049 }, { "epoch": 2.482792527040315, "grad_norm": 17.625, "learning_rate": 7.132805460602621e-07, "loss": 0.1746, "step": 5050 }, { "epoch": 2.483284169124877, "grad_norm": 47.25, "learning_rate": 7.119633221045699e-07, "loss": 0.3993, "step": 5051 }, { "epoch": 2.4837758112094397, "grad_norm": 32.25, "learning_rate": 7.106472110514763e-07, "loss": 0.4712, "step": 5052 }, { "epoch": 2.484267453294002, "grad_norm": 21.5, "learning_rate": 7.093322132876484e-07, "loss": 0.1629, "step": 5053 }, { "epoch": 2.4847590953785645, "grad_norm": 35.0, "learning_rate": 7.080183291994222e-07, "loss": 0.3119, "step": 5054 }, { "epoch": 2.4852507374631267, "grad_norm": 16.875, "learning_rate": 7.067055591728101e-07, "loss": 0.2557, "step": 5055 }, { "epoch": 2.4857423795476894, "grad_norm": 19.625, "learning_rate": 7.053939035934968e-07, "loss": 0.2878, "step": 5056 }, { "epoch": 2.4862340216322516, "grad_norm": 15.4375, "learning_rate": 7.040833628468392e-07, "loss": 0.2541, "step": 5057 }, { "epoch": 2.4867256637168142, "grad_norm": 26.25, "learning_rate": 7.027739373178644e-07, "loss": 0.3407, "step": 5058 }, { "epoch": 2.4872173058013765, "grad_norm": 15.875, "learning_rate": 7.014656273912761e-07, "loss": 0.2735, "step": 5059 }, { "epoch": 2.487708947885939, "grad_norm": 13.6875, "learning_rate": 7.001584334514462e-07, "loss": 0.4245, "step": 5060 }, { "epoch": 2.4882005899705013, "grad_norm": 24.5, "learning_rate": 6.988523558824216e-07, "loss": 0.3411, "step": 5061 }, { "epoch": 2.488692232055064, "grad_norm": 16.5, "learning_rate": 6.975473950679189e-07, "loss": 0.1886, "step": 5062 }, { "epoch": 2.489183874139626, "grad_norm": 18.625, "learning_rate": 6.962435513913284e-07, "loss": 0.2243, "step": 5063 }, { "epoch": 2.489675516224189, "grad_norm": 13.75, "learning_rate": 6.949408252357117e-07, "loss": 0.2231, "step": 5064 }, { "epoch": 2.4901671583087515, "grad_norm": 22.375, "learning_rate": 6.936392169838029e-07, "loss": 0.2031, "step": 5065 }, { "epoch": 2.4906588003933137, "grad_norm": 37.5, "learning_rate": 6.92338727018005e-07, "loss": 0.5299, "step": 5066 }, { "epoch": 2.491150442477876, "grad_norm": 13.0, "learning_rate": 6.910393557203961e-07, "loss": 0.377, "step": 5067 }, { "epoch": 2.4916420845624385, "grad_norm": 17.75, "learning_rate": 6.897411034727214e-07, "loss": 0.2416, "step": 5068 }, { "epoch": 2.492133726647001, "grad_norm": 45.0, "learning_rate": 6.884439706564025e-07, "loss": 0.4465, "step": 5069 }, { "epoch": 2.4926253687315634, "grad_norm": 16.875, "learning_rate": 6.871479576525251e-07, "loss": 0.19, "step": 5070 }, { "epoch": 2.493117010816126, "grad_norm": 16.25, "learning_rate": 6.858530648418551e-07, "loss": 0.3101, "step": 5071 }, { "epoch": 2.4936086529006882, "grad_norm": 18.625, "learning_rate": 6.845592926048213e-07, "loss": 0.2788, "step": 5072 }, { "epoch": 2.494100294985251, "grad_norm": 28.25, "learning_rate": 6.832666413215275e-07, "loss": 0.2514, "step": 5073 }, { "epoch": 2.494591937069813, "grad_norm": 12.0625, "learning_rate": 6.819751113717456e-07, "loss": 0.349, "step": 5074 }, { "epoch": 2.4950835791543757, "grad_norm": 32.0, "learning_rate": 6.806847031349209e-07, "loss": 0.4904, "step": 5075 }, { "epoch": 2.495575221238938, "grad_norm": 35.0, "learning_rate": 6.793954169901658e-07, "loss": 0.4674, "step": 5076 }, { "epoch": 2.4960668633235006, "grad_norm": 16.375, "learning_rate": 6.781072533162665e-07, "loss": 0.2074, "step": 5077 }, { "epoch": 2.496558505408063, "grad_norm": 12.0, "learning_rate": 6.768202124916768e-07, "loss": 0.267, "step": 5078 }, { "epoch": 2.4970501474926254, "grad_norm": 10.5625, "learning_rate": 6.755342948945236e-07, "loss": 0.1605, "step": 5079 }, { "epoch": 2.4975417895771876, "grad_norm": 30.5, "learning_rate": 6.742495009025993e-07, "loss": 0.4341, "step": 5080 }, { "epoch": 2.4980334316617503, "grad_norm": 39.0, "learning_rate": 6.729658308933704e-07, "loss": 0.2536, "step": 5081 }, { "epoch": 2.4985250737463125, "grad_norm": 22.625, "learning_rate": 6.716832852439703e-07, "loss": 0.3521, "step": 5082 }, { "epoch": 2.499016715830875, "grad_norm": 24.375, "learning_rate": 6.704018643312043e-07, "loss": 0.3325, "step": 5083 }, { "epoch": 2.4995083579154374, "grad_norm": 21.0, "learning_rate": 6.691215685315452e-07, "loss": 0.2202, "step": 5084 }, { "epoch": 2.5, "grad_norm": 20.625, "learning_rate": 6.678423982211368e-07, "loss": 0.2705, "step": 5085 }, { "epoch": 2.5004916420845626, "grad_norm": 37.5, "learning_rate": 6.665643537757916e-07, "loss": 0.399, "step": 5086 }, { "epoch": 2.500983284169125, "grad_norm": 25.125, "learning_rate": 6.652874355709928e-07, "loss": 0.2573, "step": 5087 }, { "epoch": 2.501474926253687, "grad_norm": 32.5, "learning_rate": 6.640116439818892e-07, "loss": 0.2789, "step": 5088 }, { "epoch": 2.5019665683382497, "grad_norm": 24.0, "learning_rate": 6.627369793833024e-07, "loss": 0.2975, "step": 5089 }, { "epoch": 2.5024582104228124, "grad_norm": 22.0, "learning_rate": 6.614634421497196e-07, "loss": 0.46, "step": 5090 }, { "epoch": 2.5029498525073746, "grad_norm": 39.75, "learning_rate": 6.601910326552998e-07, "loss": 0.451, "step": 5091 }, { "epoch": 2.503441494591937, "grad_norm": 20.0, "learning_rate": 6.589197512738685e-07, "loss": 0.3467, "step": 5092 }, { "epoch": 2.5039331366764994, "grad_norm": 26.875, "learning_rate": 6.576495983789225e-07, "loss": 0.2333, "step": 5093 }, { "epoch": 2.504424778761062, "grad_norm": 28.75, "learning_rate": 6.563805743436222e-07, "loss": 0.3591, "step": 5094 }, { "epoch": 2.5049164208456243, "grad_norm": 14.0, "learning_rate": 6.551126795408014e-07, "loss": 0.2309, "step": 5095 }, { "epoch": 2.505408062930187, "grad_norm": 25.0, "learning_rate": 6.538459143429597e-07, "loss": 0.4749, "step": 5096 }, { "epoch": 2.505899705014749, "grad_norm": 20.5, "learning_rate": 6.525802791222644e-07, "loss": 0.3092, "step": 5097 }, { "epoch": 2.5063913470993118, "grad_norm": 31.375, "learning_rate": 6.51315774250553e-07, "loss": 0.1471, "step": 5098 }, { "epoch": 2.506882989183874, "grad_norm": 32.25, "learning_rate": 6.500524000993278e-07, "loss": 0.2837, "step": 5099 }, { "epoch": 2.5073746312684366, "grad_norm": 15.875, "learning_rate": 6.48790157039761e-07, "loss": 0.2793, "step": 5100 }, { "epoch": 2.507866273352999, "grad_norm": 23.25, "learning_rate": 6.475290454426928e-07, "loss": 0.2331, "step": 5101 }, { "epoch": 2.5083579154375615, "grad_norm": 13.9375, "learning_rate": 6.462690656786305e-07, "loss": 0.249, "step": 5102 }, { "epoch": 2.508849557522124, "grad_norm": 44.5, "learning_rate": 6.450102181177474e-07, "loss": 0.4486, "step": 5103 }, { "epoch": 2.5093411996066863, "grad_norm": 8.8125, "learning_rate": 6.437525031298871e-07, "loss": 0.1131, "step": 5104 }, { "epoch": 2.5098328416912485, "grad_norm": 28.375, "learning_rate": 6.424959210845567e-07, "loss": 0.2912, "step": 5105 }, { "epoch": 2.510324483775811, "grad_norm": 21.75, "learning_rate": 6.412404723509346e-07, "loss": 0.4274, "step": 5106 }, { "epoch": 2.510816125860374, "grad_norm": 25.0, "learning_rate": 6.399861572978611e-07, "loss": 0.3273, "step": 5107 }, { "epoch": 2.511307767944936, "grad_norm": 20.0, "learning_rate": 6.387329762938502e-07, "loss": 0.2456, "step": 5108 }, { "epoch": 2.5117994100294987, "grad_norm": 67.5, "learning_rate": 6.374809297070764e-07, "loss": 0.5313, "step": 5109 }, { "epoch": 2.512291052114061, "grad_norm": 35.5, "learning_rate": 6.362300179053861e-07, "loss": 0.3226, "step": 5110 }, { "epoch": 2.5127826941986235, "grad_norm": 16.5, "learning_rate": 6.349802412562867e-07, "loss": 0.3044, "step": 5111 }, { "epoch": 2.5132743362831858, "grad_norm": 25.5, "learning_rate": 6.337316001269569e-07, "loss": 0.3812, "step": 5112 }, { "epoch": 2.5137659783677484, "grad_norm": 20.625, "learning_rate": 6.324840948842395e-07, "loss": 0.3487, "step": 5113 }, { "epoch": 2.5142576204523106, "grad_norm": 22.0, "learning_rate": 6.312377258946436e-07, "loss": 0.2089, "step": 5114 }, { "epoch": 2.5147492625368733, "grad_norm": 10.5625, "learning_rate": 6.29992493524346e-07, "loss": 0.2239, "step": 5115 }, { "epoch": 2.5152409046214355, "grad_norm": 12.25, "learning_rate": 6.287483981391894e-07, "loss": 0.1066, "step": 5116 }, { "epoch": 2.515732546705998, "grad_norm": 11.6875, "learning_rate": 6.275054401046789e-07, "loss": 0.2586, "step": 5117 }, { "epoch": 2.5162241887905603, "grad_norm": 34.75, "learning_rate": 6.26263619785991e-07, "loss": 0.5796, "step": 5118 }, { "epoch": 2.516715830875123, "grad_norm": 19.75, "learning_rate": 6.250229375479623e-07, "loss": 0.2466, "step": 5119 }, { "epoch": 2.5172074729596856, "grad_norm": 25.25, "learning_rate": 6.237833937551007e-07, "loss": 0.3876, "step": 5120 }, { "epoch": 2.517699115044248, "grad_norm": 17.5, "learning_rate": 6.22544988771574e-07, "loss": 0.3492, "step": 5121 }, { "epoch": 2.51819075712881, "grad_norm": 11.5625, "learning_rate": 6.2130772296122e-07, "loss": 0.2423, "step": 5122 }, { "epoch": 2.5186823992133727, "grad_norm": 34.5, "learning_rate": 6.200715966875392e-07, "loss": 0.3256, "step": 5123 }, { "epoch": 2.5191740412979353, "grad_norm": 34.75, "learning_rate": 6.188366103136997e-07, "loss": 0.3405, "step": 5124 }, { "epoch": 2.5196656833824975, "grad_norm": 15.5, "learning_rate": 6.176027642025312e-07, "loss": 0.351, "step": 5125 }, { "epoch": 2.5201573254670597, "grad_norm": 21.75, "learning_rate": 6.163700587165318e-07, "loss": 0.2095, "step": 5126 }, { "epoch": 2.5206489675516224, "grad_norm": 30.375, "learning_rate": 6.151384942178617e-07, "loss": 0.4778, "step": 5127 }, { "epoch": 2.521140609636185, "grad_norm": 41.5, "learning_rate": 6.139080710683482e-07, "loss": 0.3193, "step": 5128 }, { "epoch": 2.5216322517207472, "grad_norm": 21.0, "learning_rate": 6.126787896294802e-07, "loss": 0.4428, "step": 5129 }, { "epoch": 2.52212389380531, "grad_norm": 8.875, "learning_rate": 6.114506502624174e-07, "loss": 0.1112, "step": 5130 }, { "epoch": 2.522615535889872, "grad_norm": 13.4375, "learning_rate": 6.102236533279755e-07, "loss": 0.1204, "step": 5131 }, { "epoch": 2.5231071779744347, "grad_norm": 37.0, "learning_rate": 6.089977991866428e-07, "loss": 0.4339, "step": 5132 }, { "epoch": 2.523598820058997, "grad_norm": 15.4375, "learning_rate": 6.077730881985642e-07, "loss": 0.1424, "step": 5133 }, { "epoch": 2.5240904621435596, "grad_norm": 21.0, "learning_rate": 6.065495207235543e-07, "loss": 0.3896, "step": 5134 }, { "epoch": 2.524582104228122, "grad_norm": 25.5, "learning_rate": 6.053270971210911e-07, "loss": 0.2214, "step": 5135 }, { "epoch": 2.5250737463126844, "grad_norm": 14.9375, "learning_rate": 6.041058177503132e-07, "loss": 0.2125, "step": 5136 }, { "epoch": 2.5255653883972466, "grad_norm": 21.875, "learning_rate": 6.028856829700259e-07, "loss": 0.3363, "step": 5137 }, { "epoch": 2.5260570304818093, "grad_norm": 19.25, "learning_rate": 6.016666931386991e-07, "loss": 0.233, "step": 5138 }, { "epoch": 2.5265486725663715, "grad_norm": 43.0, "learning_rate": 6.004488486144619e-07, "loss": 0.5657, "step": 5139 }, { "epoch": 2.527040314650934, "grad_norm": 40.75, "learning_rate": 5.992321497551117e-07, "loss": 0.2336, "step": 5140 }, { "epoch": 2.527531956735497, "grad_norm": 20.125, "learning_rate": 5.980165969181076e-07, "loss": 0.1557, "step": 5141 }, { "epoch": 2.528023598820059, "grad_norm": 28.375, "learning_rate": 5.968021904605703e-07, "loss": 0.307, "step": 5142 }, { "epoch": 2.528515240904621, "grad_norm": 24.25, "learning_rate": 5.95588930739287e-07, "loss": 0.2805, "step": 5143 }, { "epoch": 2.529006882989184, "grad_norm": 30.125, "learning_rate": 5.94376818110704e-07, "loss": 0.3222, "step": 5144 }, { "epoch": 2.5294985250737465, "grad_norm": 15.3125, "learning_rate": 5.93165852930934e-07, "loss": 0.313, "step": 5145 }, { "epoch": 2.5299901671583087, "grad_norm": 40.75, "learning_rate": 5.919560355557513e-07, "loss": 0.6061, "step": 5146 }, { "epoch": 2.530481809242871, "grad_norm": 15.1875, "learning_rate": 5.907473663405936e-07, "loss": 0.2971, "step": 5147 }, { "epoch": 2.5309734513274336, "grad_norm": 48.0, "learning_rate": 5.895398456405596e-07, "loss": 0.2919, "step": 5148 }, { "epoch": 2.531465093411996, "grad_norm": 35.5, "learning_rate": 5.883334738104132e-07, "loss": 0.3252, "step": 5149 }, { "epoch": 2.5319567354965584, "grad_norm": 49.25, "learning_rate": 5.871282512045776e-07, "loss": 0.4212, "step": 5150 }, { "epoch": 2.532448377581121, "grad_norm": 22.875, "learning_rate": 5.859241781771398e-07, "loss": 0.3032, "step": 5151 }, { "epoch": 2.5329400196656833, "grad_norm": 27.75, "learning_rate": 5.847212550818506e-07, "loss": 0.3472, "step": 5152 }, { "epoch": 2.533431661750246, "grad_norm": 17.375, "learning_rate": 5.835194822721227e-07, "loss": 0.3207, "step": 5153 }, { "epoch": 2.533923303834808, "grad_norm": 14.6875, "learning_rate": 5.823188601010267e-07, "loss": 0.3166, "step": 5154 }, { "epoch": 2.5344149459193708, "grad_norm": 26.25, "learning_rate": 5.81119388921301e-07, "loss": 0.4176, "step": 5155 }, { "epoch": 2.534906588003933, "grad_norm": 42.75, "learning_rate": 5.799210690853409e-07, "loss": 0.4513, "step": 5156 }, { "epoch": 2.5353982300884956, "grad_norm": 35.75, "learning_rate": 5.787239009452077e-07, "loss": 0.3615, "step": 5157 }, { "epoch": 2.5358898721730583, "grad_norm": 24.75, "learning_rate": 5.775278848526188e-07, "loss": 0.1788, "step": 5158 }, { "epoch": 2.5363815142576205, "grad_norm": 18.25, "learning_rate": 5.763330211589616e-07, "loss": 0.2649, "step": 5159 }, { "epoch": 2.5368731563421827, "grad_norm": 29.0, "learning_rate": 5.751393102152761e-07, "loss": 0.3151, "step": 5160 }, { "epoch": 2.5373647984267453, "grad_norm": 12.25, "learning_rate": 5.739467523722694e-07, "loss": 0.15, "step": 5161 }, { "epoch": 2.537856440511308, "grad_norm": 6.75, "learning_rate": 5.727553479803063e-07, "loss": 0.0995, "step": 5162 }, { "epoch": 2.53834808259587, "grad_norm": 28.625, "learning_rate": 5.715650973894165e-07, "loss": 0.3504, "step": 5163 }, { "epoch": 2.5388397246804324, "grad_norm": 23.125, "learning_rate": 5.703760009492861e-07, "loss": 0.3152, "step": 5164 }, { "epoch": 2.539331366764995, "grad_norm": 13.5, "learning_rate": 5.691880590092667e-07, "loss": 0.2418, "step": 5165 }, { "epoch": 2.5398230088495577, "grad_norm": 31.375, "learning_rate": 5.680012719183653e-07, "loss": 0.2066, "step": 5166 }, { "epoch": 2.54031465093412, "grad_norm": 21.25, "learning_rate": 5.668156400252571e-07, "loss": 0.2468, "step": 5167 }, { "epoch": 2.5408062930186825, "grad_norm": 35.75, "learning_rate": 5.656311636782707e-07, "loss": 0.3748, "step": 5168 }, { "epoch": 2.5412979351032448, "grad_norm": 41.5, "learning_rate": 5.644478432254004e-07, "loss": 0.3507, "step": 5169 }, { "epoch": 2.5417895771878074, "grad_norm": 22.75, "learning_rate": 5.632656790142963e-07, "loss": 0.3721, "step": 5170 }, { "epoch": 2.5422812192723696, "grad_norm": 33.0, "learning_rate": 5.620846713922737e-07, "loss": 0.3045, "step": 5171 }, { "epoch": 2.5427728613569323, "grad_norm": 16.375, "learning_rate": 5.609048207063032e-07, "loss": 0.2268, "step": 5172 }, { "epoch": 2.5432645034414945, "grad_norm": 11.1875, "learning_rate": 5.597261273030182e-07, "loss": 0.2379, "step": 5173 }, { "epoch": 2.543756145526057, "grad_norm": 37.5, "learning_rate": 5.585485915287135e-07, "loss": 0.2319, "step": 5174 }, { "epoch": 2.5442477876106193, "grad_norm": 13.5, "learning_rate": 5.573722137293417e-07, "loss": 0.3076, "step": 5175 }, { "epoch": 2.544739429695182, "grad_norm": 30.375, "learning_rate": 5.561969942505144e-07, "loss": 0.3398, "step": 5176 }, { "epoch": 2.545231071779744, "grad_norm": 22.125, "learning_rate": 5.550229334375054e-07, "loss": 0.4367, "step": 5177 }, { "epoch": 2.545722713864307, "grad_norm": 12.8125, "learning_rate": 5.538500316352452e-07, "loss": 0.2656, "step": 5178 }, { "epoch": 2.5462143559488695, "grad_norm": 15.125, "learning_rate": 5.526782891883258e-07, "loss": 0.2076, "step": 5179 }, { "epoch": 2.5467059980334317, "grad_norm": 22.875, "learning_rate": 5.515077064409993e-07, "loss": 0.2809, "step": 5180 }, { "epoch": 2.547197640117994, "grad_norm": 25.125, "learning_rate": 5.50338283737176e-07, "loss": 0.2711, "step": 5181 }, { "epoch": 2.5476892822025565, "grad_norm": 17.0, "learning_rate": 5.491700214204236e-07, "loss": 0.2348, "step": 5182 }, { "epoch": 2.548180924287119, "grad_norm": 26.75, "learning_rate": 5.48002919833971e-07, "loss": 0.4117, "step": 5183 }, { "epoch": 2.5486725663716814, "grad_norm": 21.0, "learning_rate": 5.468369793207082e-07, "loss": 0.2128, "step": 5184 }, { "epoch": 2.5491642084562436, "grad_norm": 25.625, "learning_rate": 5.456722002231778e-07, "loss": 0.3852, "step": 5185 }, { "epoch": 2.5496558505408062, "grad_norm": 27.375, "learning_rate": 5.445085828835884e-07, "loss": 0.3235, "step": 5186 }, { "epoch": 2.550147492625369, "grad_norm": 38.0, "learning_rate": 5.433461276438007e-07, "loss": 0.4445, "step": 5187 }, { "epoch": 2.550639134709931, "grad_norm": 14.0625, "learning_rate": 5.421848348453389e-07, "loss": 0.2984, "step": 5188 }, { "epoch": 2.5511307767944937, "grad_norm": 32.0, "learning_rate": 5.410247048293838e-07, "loss": 0.2848, "step": 5189 }, { "epoch": 2.551622418879056, "grad_norm": 11.1875, "learning_rate": 5.398657379367753e-07, "loss": 0.3431, "step": 5190 }, { "epoch": 2.5521140609636186, "grad_norm": 16.25, "learning_rate": 5.387079345080091e-07, "loss": 0.3242, "step": 5191 }, { "epoch": 2.552605703048181, "grad_norm": 20.125, "learning_rate": 5.37551294883243e-07, "loss": 0.3907, "step": 5192 }, { "epoch": 2.5530973451327434, "grad_norm": 19.5, "learning_rate": 5.363958194022893e-07, "loss": 0.247, "step": 5193 }, { "epoch": 2.5535889872173057, "grad_norm": 13.625, "learning_rate": 5.35241508404621e-07, "loss": 0.3464, "step": 5194 }, { "epoch": 2.5540806293018683, "grad_norm": 11.875, "learning_rate": 5.340883622293656e-07, "loss": 0.1581, "step": 5195 }, { "epoch": 2.554572271386431, "grad_norm": 15.1875, "learning_rate": 5.329363812153138e-07, "loss": 0.1889, "step": 5196 }, { "epoch": 2.555063913470993, "grad_norm": 19.75, "learning_rate": 5.317855657009082e-07, "loss": 0.2491, "step": 5197 }, { "epoch": 2.5555555555555554, "grad_norm": 51.25, "learning_rate": 5.306359160242531e-07, "loss": 0.191, "step": 5198 }, { "epoch": 2.556047197640118, "grad_norm": 49.0, "learning_rate": 5.294874325231074e-07, "loss": 0.33, "step": 5199 }, { "epoch": 2.5565388397246807, "grad_norm": 21.0, "learning_rate": 5.283401155348902e-07, "loss": 0.1031, "step": 5200 }, { "epoch": 2.557030481809243, "grad_norm": 25.75, "learning_rate": 5.27193965396675e-07, "loss": 0.3514, "step": 5201 }, { "epoch": 2.557522123893805, "grad_norm": 26.75, "learning_rate": 5.260489824451941e-07, "loss": 0.249, "step": 5202 }, { "epoch": 2.5580137659783677, "grad_norm": 28.25, "learning_rate": 5.249051670168368e-07, "loss": 0.4062, "step": 5203 }, { "epoch": 2.5585054080629304, "grad_norm": 35.5, "learning_rate": 5.23762519447651e-07, "loss": 0.3396, "step": 5204 }, { "epoch": 2.5589970501474926, "grad_norm": 22.5, "learning_rate": 5.22621040073337e-07, "loss": 0.3118, "step": 5205 }, { "epoch": 2.559488692232055, "grad_norm": 44.75, "learning_rate": 5.214807292292565e-07, "loss": 0.3834, "step": 5206 }, { "epoch": 2.5599803343166174, "grad_norm": 37.75, "learning_rate": 5.203415872504247e-07, "loss": 0.3726, "step": 5207 }, { "epoch": 2.56047197640118, "grad_norm": 20.125, "learning_rate": 5.192036144715162e-07, "loss": 0.2977, "step": 5208 }, { "epoch": 2.5609636184857423, "grad_norm": 21.25, "learning_rate": 5.180668112268592e-07, "loss": 0.2048, "step": 5209 }, { "epoch": 2.561455260570305, "grad_norm": 36.25, "learning_rate": 5.169311778504399e-07, "loss": 0.692, "step": 5210 }, { "epoch": 2.561946902654867, "grad_norm": 65.5, "learning_rate": 5.157967146759011e-07, "loss": 0.5337, "step": 5211 }, { "epoch": 2.56243854473943, "grad_norm": 42.0, "learning_rate": 5.146634220365425e-07, "loss": 0.4409, "step": 5212 }, { "epoch": 2.562930186823992, "grad_norm": 20.25, "learning_rate": 5.135313002653163e-07, "loss": 0.2203, "step": 5213 }, { "epoch": 2.5634218289085546, "grad_norm": 12.25, "learning_rate": 5.124003496948356e-07, "loss": 0.3426, "step": 5214 }, { "epoch": 2.563913470993117, "grad_norm": 30.0, "learning_rate": 5.112705706573645e-07, "loss": 0.5189, "step": 5215 }, { "epoch": 2.5644051130776795, "grad_norm": 19.75, "learning_rate": 5.101419634848274e-07, "loss": 0.3588, "step": 5216 }, { "epoch": 2.564896755162242, "grad_norm": 25.375, "learning_rate": 5.090145285087998e-07, "loss": 0.2836, "step": 5217 }, { "epoch": 2.5653883972468043, "grad_norm": 21.75, "learning_rate": 5.078882660605184e-07, "loss": 0.4184, "step": 5218 }, { "epoch": 2.5658800393313665, "grad_norm": 26.25, "learning_rate": 5.067631764708709e-07, "loss": 0.1906, "step": 5219 }, { "epoch": 2.566371681415929, "grad_norm": 48.25, "learning_rate": 5.056392600704021e-07, "loss": 0.4659, "step": 5220 }, { "epoch": 2.566863323500492, "grad_norm": 24.125, "learning_rate": 5.045165171893115e-07, "loss": 0.2161, "step": 5221 }, { "epoch": 2.567354965585054, "grad_norm": 16.625, "learning_rate": 5.033949481574548e-07, "loss": 0.1032, "step": 5222 }, { "epoch": 2.5678466076696163, "grad_norm": 13.125, "learning_rate": 5.022745533043427e-07, "loss": 0.2456, "step": 5223 }, { "epoch": 2.568338249754179, "grad_norm": 10.75, "learning_rate": 5.011553329591396e-07, "loss": 0.075, "step": 5224 }, { "epoch": 2.5688298918387416, "grad_norm": 26.875, "learning_rate": 5.000372874506658e-07, "loss": 0.2818, "step": 5225 }, { "epoch": 2.5693215339233038, "grad_norm": 21.5, "learning_rate": 4.989204171073978e-07, "loss": 0.272, "step": 5226 }, { "epoch": 2.5698131760078664, "grad_norm": 44.75, "learning_rate": 4.978047222574634e-07, "loss": 0.5271, "step": 5227 }, { "epoch": 2.5703048180924286, "grad_norm": 39.75, "learning_rate": 4.966902032286479e-07, "loss": 0.376, "step": 5228 }, { "epoch": 2.5707964601769913, "grad_norm": 11.875, "learning_rate": 4.955768603483915e-07, "loss": 0.1896, "step": 5229 }, { "epoch": 2.5712881022615535, "grad_norm": 26.125, "learning_rate": 4.94464693943786e-07, "loss": 0.3144, "step": 5230 }, { "epoch": 2.571779744346116, "grad_norm": 25.875, "learning_rate": 4.933537043415811e-07, "loss": 0.3237, "step": 5231 }, { "epoch": 2.5722713864306783, "grad_norm": 14.6875, "learning_rate": 4.922438918681771e-07, "loss": 0.3, "step": 5232 }, { "epoch": 2.572763028515241, "grad_norm": 19.25, "learning_rate": 4.911352568496306e-07, "loss": 0.2863, "step": 5233 }, { "epoch": 2.5732546705998036, "grad_norm": 20.875, "learning_rate": 4.900277996116523e-07, "loss": 0.1382, "step": 5234 }, { "epoch": 2.573746312684366, "grad_norm": 14.4375, "learning_rate": 4.889215204796078e-07, "loss": 0.1631, "step": 5235 }, { "epoch": 2.574237954768928, "grad_norm": 41.5, "learning_rate": 4.878164197785136e-07, "loss": 0.3857, "step": 5236 }, { "epoch": 2.5747295968534907, "grad_norm": 32.25, "learning_rate": 4.867124978330425e-07, "loss": 0.3993, "step": 5237 }, { "epoch": 2.5752212389380533, "grad_norm": 43.25, "learning_rate": 4.856097549675195e-07, "loss": 0.4649, "step": 5238 }, { "epoch": 2.5757128810226155, "grad_norm": 27.75, "learning_rate": 4.845081915059241e-07, "loss": 0.3611, "step": 5239 }, { "epoch": 2.5762045231071777, "grad_norm": 30.125, "learning_rate": 4.834078077718894e-07, "loss": 0.2754, "step": 5240 }, { "epoch": 2.5766961651917404, "grad_norm": 16.625, "learning_rate": 4.823086040887022e-07, "loss": 0.2458, "step": 5241 }, { "epoch": 2.577187807276303, "grad_norm": 12.8125, "learning_rate": 4.812105807793007e-07, "loss": 0.2154, "step": 5242 }, { "epoch": 2.5776794493608652, "grad_norm": 26.75, "learning_rate": 4.801137381662789e-07, "loss": 0.4055, "step": 5243 }, { "epoch": 2.578171091445428, "grad_norm": 29.0, "learning_rate": 4.790180765718809e-07, "loss": 0.4891, "step": 5244 }, { "epoch": 2.57866273352999, "grad_norm": 19.375, "learning_rate": 4.779235963180074e-07, "loss": 0.2708, "step": 5245 }, { "epoch": 2.5791543756145527, "grad_norm": 15.0625, "learning_rate": 4.768302977262085e-07, "loss": 0.1552, "step": 5246 }, { "epoch": 2.579646017699115, "grad_norm": 14.0, "learning_rate": 4.757381811176899e-07, "loss": 0.2004, "step": 5247 }, { "epoch": 2.5801376597836776, "grad_norm": 26.125, "learning_rate": 4.746472468133084e-07, "loss": 0.3126, "step": 5248 }, { "epoch": 2.58062930186824, "grad_norm": 22.5, "learning_rate": 4.735574951335751e-07, "loss": 0.2833, "step": 5249 }, { "epoch": 2.5811209439528024, "grad_norm": 21.25, "learning_rate": 4.7246892639865094e-07, "loss": 0.329, "step": 5250 }, { "epoch": 2.5816125860373647, "grad_norm": 13.625, "learning_rate": 4.713815409283525e-07, "loss": 0.405, "step": 5251 }, { "epoch": 2.5821042281219273, "grad_norm": 26.125, "learning_rate": 4.702953390421458e-07, "loss": 0.1745, "step": 5252 }, { "epoch": 2.5825958702064895, "grad_norm": 40.0, "learning_rate": 4.6921032105915165e-07, "loss": 0.4551, "step": 5253 }, { "epoch": 2.583087512291052, "grad_norm": 38.75, "learning_rate": 4.681264872981394e-07, "loss": 0.43, "step": 5254 }, { "epoch": 2.583579154375615, "grad_norm": 21.25, "learning_rate": 4.670438380775369e-07, "loss": 0.3477, "step": 5255 }, { "epoch": 2.584070796460177, "grad_norm": 23.25, "learning_rate": 4.659623737154168e-07, "loss": 0.4008, "step": 5256 }, { "epoch": 2.584562438544739, "grad_norm": 51.25, "learning_rate": 4.648820945295088e-07, "loss": 0.4597, "step": 5257 }, { "epoch": 2.585054080629302, "grad_norm": 17.875, "learning_rate": 4.6380300083719115e-07, "loss": 0.3451, "step": 5258 }, { "epoch": 2.5855457227138645, "grad_norm": 15.875, "learning_rate": 4.6272509295549663e-07, "loss": 0.2737, "step": 5259 }, { "epoch": 2.5860373647984267, "grad_norm": 34.25, "learning_rate": 4.6164837120110603e-07, "loss": 0.2973, "step": 5260 }, { "epoch": 2.586529006882989, "grad_norm": 13.25, "learning_rate": 4.6057283589035526e-07, "loss": 0.198, "step": 5261 }, { "epoch": 2.5870206489675516, "grad_norm": 39.25, "learning_rate": 4.5949848733923005e-07, "loss": 0.309, "step": 5262 }, { "epoch": 2.587512291052114, "grad_norm": 22.0, "learning_rate": 4.584253258633679e-07, "loss": 0.2148, "step": 5263 }, { "epoch": 2.5880039331366764, "grad_norm": 14.25, "learning_rate": 4.5735335177805623e-07, "loss": 0.1171, "step": 5264 }, { "epoch": 2.588495575221239, "grad_norm": 28.375, "learning_rate": 4.562825653982359e-07, "loss": 0.2962, "step": 5265 }, { "epoch": 2.5889872173058013, "grad_norm": 56.25, "learning_rate": 4.55212967038496e-07, "loss": 0.4008, "step": 5266 }, { "epoch": 2.589478859390364, "grad_norm": 36.5, "learning_rate": 4.5414455701307944e-07, "loss": 0.3091, "step": 5267 }, { "epoch": 2.589970501474926, "grad_norm": 23.875, "learning_rate": 4.5307733563587875e-07, "loss": 0.4564, "step": 5268 }, { "epoch": 2.590462143559489, "grad_norm": 25.0, "learning_rate": 4.5201130322043614e-07, "loss": 0.328, "step": 5269 }, { "epoch": 2.590953785644051, "grad_norm": 40.0, "learning_rate": 4.5094646007994606e-07, "loss": 0.4352, "step": 5270 }, { "epoch": 2.5914454277286136, "grad_norm": 30.25, "learning_rate": 4.4988280652725373e-07, "loss": 0.516, "step": 5271 }, { "epoch": 2.591937069813176, "grad_norm": 48.25, "learning_rate": 4.488203428748542e-07, "loss": 0.4703, "step": 5272 }, { "epoch": 2.5924287118977385, "grad_norm": 12.25, "learning_rate": 4.4775906943489127e-07, "loss": 0.1327, "step": 5273 }, { "epoch": 2.5929203539823007, "grad_norm": 23.875, "learning_rate": 4.466989865191635e-07, "loss": 0.2549, "step": 5274 }, { "epoch": 2.5934119960668633, "grad_norm": 23.625, "learning_rate": 4.4564009443911436e-07, "loss": 0.3661, "step": 5275 }, { "epoch": 2.593903638151426, "grad_norm": 22.5, "learning_rate": 4.445823935058409e-07, "loss": 0.2179, "step": 5276 }, { "epoch": 2.594395280235988, "grad_norm": 24.25, "learning_rate": 4.435258840300896e-07, "loss": 0.3128, "step": 5277 }, { "epoch": 2.5948869223205504, "grad_norm": 22.875, "learning_rate": 4.4247056632225713e-07, "loss": 0.2193, "step": 5278 }, { "epoch": 2.595378564405113, "grad_norm": 22.75, "learning_rate": 4.414164406923885e-07, "loss": 0.1484, "step": 5279 }, { "epoch": 2.5958702064896757, "grad_norm": 14.9375, "learning_rate": 4.40363507450181e-07, "loss": 0.256, "step": 5280 }, { "epoch": 2.596361848574238, "grad_norm": 17.375, "learning_rate": 4.3931176690497814e-07, "loss": 0.3084, "step": 5281 }, { "epoch": 2.5968534906588006, "grad_norm": 20.0, "learning_rate": 4.382612193657768e-07, "loss": 0.2734, "step": 5282 }, { "epoch": 2.5973451327433628, "grad_norm": 29.125, "learning_rate": 4.3721186514121953e-07, "loss": 0.3736, "step": 5283 }, { "epoch": 2.5978367748279254, "grad_norm": 12.0625, "learning_rate": 4.3616370453960276e-07, "loss": 0.1166, "step": 5284 }, { "epoch": 2.5983284169124876, "grad_norm": 15.5625, "learning_rate": 4.3511673786886815e-07, "loss": 0.2452, "step": 5285 }, { "epoch": 2.5988200589970503, "grad_norm": 38.5, "learning_rate": 4.3407096543661015e-07, "loss": 0.3834, "step": 5286 }, { "epoch": 2.5993117010816125, "grad_norm": 18.75, "learning_rate": 4.3302638755006746e-07, "loss": 0.3414, "step": 5287 }, { "epoch": 2.599803343166175, "grad_norm": 25.875, "learning_rate": 4.3198300451613415e-07, "loss": 0.2803, "step": 5288 }, { "epoch": 2.6002949852507373, "grad_norm": 18.125, "learning_rate": 4.30940816641347e-07, "loss": 0.3043, "step": 5289 }, { "epoch": 2.6007866273353, "grad_norm": 23.875, "learning_rate": 4.2989982423189703e-07, "loss": 0.2754, "step": 5290 }, { "epoch": 2.601278269419862, "grad_norm": 18.75, "learning_rate": 4.288600275936182e-07, "loss": 0.4822, "step": 5291 }, { "epoch": 2.601769911504425, "grad_norm": 24.625, "learning_rate": 4.278214270320011e-07, "loss": 0.2663, "step": 5292 }, { "epoch": 2.6022615535889875, "grad_norm": 11.1875, "learning_rate": 4.2678402285217755e-07, "loss": 0.2566, "step": 5293 }, { "epoch": 2.6027531956735497, "grad_norm": 16.125, "learning_rate": 4.2574781535893135e-07, "loss": 0.1949, "step": 5294 }, { "epoch": 2.603244837758112, "grad_norm": 16.75, "learning_rate": 4.24712804856694e-07, "loss": 0.3276, "step": 5295 }, { "epoch": 2.6037364798426745, "grad_norm": 13.8125, "learning_rate": 4.236789916495457e-07, "loss": 0.3872, "step": 5296 }, { "epoch": 2.604228121927237, "grad_norm": 20.5, "learning_rate": 4.2264637604121405e-07, "loss": 0.3554, "step": 5297 }, { "epoch": 2.6047197640117994, "grad_norm": 47.25, "learning_rate": 4.216149583350753e-07, "loss": 0.4303, "step": 5298 }, { "epoch": 2.6052114060963616, "grad_norm": 12.875, "learning_rate": 4.205847388341541e-07, "loss": 0.1468, "step": 5299 }, { "epoch": 2.6057030481809242, "grad_norm": 28.0, "learning_rate": 4.195557178411243e-07, "loss": 0.2189, "step": 5300 }, { "epoch": 2.606194690265487, "grad_norm": 37.25, "learning_rate": 4.1852789565830317e-07, "loss": 0.2624, "step": 5301 }, { "epoch": 2.606686332350049, "grad_norm": 15.875, "learning_rate": 4.175012725876616e-07, "loss": 0.1748, "step": 5302 }, { "epoch": 2.6071779744346117, "grad_norm": 24.0, "learning_rate": 4.1647584893081325e-07, "loss": 0.2848, "step": 5303 }, { "epoch": 2.607669616519174, "grad_norm": 40.75, "learning_rate": 4.1545162498902324e-07, "loss": 0.4329, "step": 5304 }, { "epoch": 2.6081612586037366, "grad_norm": 43.5, "learning_rate": 4.144286010631993e-07, "loss": 0.3564, "step": 5305 }, { "epoch": 2.608652900688299, "grad_norm": 23.75, "learning_rate": 4.1340677745390405e-07, "loss": 0.4353, "step": 5306 }, { "epoch": 2.6091445427728615, "grad_norm": 16.625, "learning_rate": 4.123861544613398e-07, "loss": 0.0797, "step": 5307 }, { "epoch": 2.6096361848574237, "grad_norm": 44.5, "learning_rate": 4.1136673238536226e-07, "loss": 0.3632, "step": 5308 }, { "epoch": 2.6101278269419863, "grad_norm": 30.75, "learning_rate": 4.103485115254688e-07, "loss": 0.4609, "step": 5309 }, { "epoch": 2.6106194690265485, "grad_norm": 30.125, "learning_rate": 4.093314921808077e-07, "loss": 0.3523, "step": 5310 }, { "epoch": 2.611111111111111, "grad_norm": 22.875, "learning_rate": 4.0831567465017484e-07, "loss": 0.4304, "step": 5311 }, { "epoch": 2.6116027531956734, "grad_norm": 39.0, "learning_rate": 4.0730105923200855e-07, "loss": 0.4148, "step": 5312 }, { "epoch": 2.612094395280236, "grad_norm": 15.3125, "learning_rate": 4.062876462243984e-07, "loss": 0.3068, "step": 5313 }, { "epoch": 2.6125860373647987, "grad_norm": 17.0, "learning_rate": 4.052754359250797e-07, "loss": 0.2786, "step": 5314 }, { "epoch": 2.613077679449361, "grad_norm": 24.5, "learning_rate": 4.042644286314321e-07, "loss": 0.2134, "step": 5315 }, { "epoch": 2.613569321533923, "grad_norm": 13.125, "learning_rate": 4.0325462464048445e-07, "loss": 0.195, "step": 5316 }, { "epoch": 2.6140609636184857, "grad_norm": 19.75, "learning_rate": 4.022460242489115e-07, "loss": 0.2637, "step": 5317 }, { "epoch": 2.6145526057030484, "grad_norm": 25.375, "learning_rate": 4.0123862775303316e-07, "loss": 0.2167, "step": 5318 }, { "epoch": 2.6150442477876106, "grad_norm": 25.125, "learning_rate": 4.002324354488177e-07, "loss": 0.1867, "step": 5319 }, { "epoch": 2.615535889872173, "grad_norm": 21.125, "learning_rate": 3.992274476318767e-07, "loss": 0.3108, "step": 5320 }, { "epoch": 2.6160275319567354, "grad_norm": 27.5, "learning_rate": 3.9822366459747087e-07, "loss": 0.1793, "step": 5321 }, { "epoch": 2.616519174041298, "grad_norm": 47.5, "learning_rate": 3.9722108664050496e-07, "loss": 0.4111, "step": 5322 }, { "epoch": 2.6170108161258603, "grad_norm": 15.3125, "learning_rate": 3.962197140555322e-07, "loss": 0.2939, "step": 5323 }, { "epoch": 2.617502458210423, "grad_norm": 16.25, "learning_rate": 3.9521954713674734e-07, "loss": 0.2228, "step": 5324 }, { "epoch": 2.617994100294985, "grad_norm": 17.25, "learning_rate": 3.9422058617799535e-07, "loss": 0.2398, "step": 5325 }, { "epoch": 2.618485742379548, "grad_norm": 31.125, "learning_rate": 3.932228314727639e-07, "loss": 0.2898, "step": 5326 }, { "epoch": 2.61897738446411, "grad_norm": 22.75, "learning_rate": 3.9222628331418805e-07, "loss": 0.0861, "step": 5327 }, { "epoch": 2.6194690265486726, "grad_norm": 19.25, "learning_rate": 3.9123094199504604e-07, "loss": 0.2674, "step": 5328 }, { "epoch": 2.619960668633235, "grad_norm": 26.5, "learning_rate": 3.90236807807766e-07, "loss": 0.3507, "step": 5329 }, { "epoch": 2.6204523107177975, "grad_norm": 15.5625, "learning_rate": 3.892438810444169e-07, "loss": 0.1677, "step": 5330 }, { "epoch": 2.62094395280236, "grad_norm": 26.0, "learning_rate": 3.882521619967152e-07, "loss": 0.258, "step": 5331 }, { "epoch": 2.6214355948869223, "grad_norm": 16.875, "learning_rate": 3.8726165095602146e-07, "loss": 0.1749, "step": 5332 }, { "epoch": 2.6219272369714846, "grad_norm": 10.625, "learning_rate": 3.862723482133433e-07, "loss": 0.1708, "step": 5333 }, { "epoch": 2.622418879056047, "grad_norm": 14.875, "learning_rate": 3.852842540593299e-07, "loss": 0.3568, "step": 5334 }, { "epoch": 2.62291052114061, "grad_norm": 22.625, "learning_rate": 3.8429736878427834e-07, "loss": 0.2684, "step": 5335 }, { "epoch": 2.623402163225172, "grad_norm": 17.25, "learning_rate": 3.833116926781303e-07, "loss": 0.2803, "step": 5336 }, { "epoch": 2.6238938053097343, "grad_norm": 19.625, "learning_rate": 3.8232722603047165e-07, "loss": 0.2009, "step": 5337 }, { "epoch": 2.624385447394297, "grad_norm": 26.5, "learning_rate": 3.813439691305315e-07, "loss": 0.3259, "step": 5338 }, { "epoch": 2.6248770894788596, "grad_norm": 24.75, "learning_rate": 3.8036192226718626e-07, "loss": 0.2849, "step": 5339 }, { "epoch": 2.6253687315634218, "grad_norm": 15.75, "learning_rate": 3.7938108572895414e-07, "loss": 0.2162, "step": 5340 }, { "epoch": 2.6258603736479844, "grad_norm": 27.125, "learning_rate": 3.7840145980400065e-07, "loss": 0.1767, "step": 5341 }, { "epoch": 2.6263520157325466, "grad_norm": 11.25, "learning_rate": 3.774230447801311e-07, "loss": 0.0426, "step": 5342 }, { "epoch": 2.6268436578171093, "grad_norm": 31.125, "learning_rate": 3.7644584094480195e-07, "loss": 0.2753, "step": 5343 }, { "epoch": 2.6273352999016715, "grad_norm": 43.5, "learning_rate": 3.754698485851071e-07, "loss": 0.283, "step": 5344 }, { "epoch": 2.627826941986234, "grad_norm": 25.125, "learning_rate": 3.744950679877888e-07, "loss": 0.485, "step": 5345 }, { "epoch": 2.6283185840707963, "grad_norm": 25.0, "learning_rate": 3.7352149943923027e-07, "loss": 0.2611, "step": 5346 }, { "epoch": 2.628810226155359, "grad_norm": 9.3125, "learning_rate": 3.725491432254623e-07, "loss": 0.1748, "step": 5347 }, { "epoch": 2.629301868239921, "grad_norm": 31.0, "learning_rate": 3.715779996321544e-07, "loss": 0.3963, "step": 5348 }, { "epoch": 2.629793510324484, "grad_norm": 35.0, "learning_rate": 3.7060806894462527e-07, "loss": 0.367, "step": 5349 }, { "epoch": 2.630285152409046, "grad_norm": 23.125, "learning_rate": 3.6963935144783304e-07, "loss": 0.4244, "step": 5350 }, { "epoch": 2.6307767944936087, "grad_norm": 18.75, "learning_rate": 3.6867184742638353e-07, "loss": 0.3288, "step": 5351 }, { "epoch": 2.6312684365781713, "grad_norm": 25.375, "learning_rate": 3.677055571645209e-07, "loss": 0.347, "step": 5352 }, { "epoch": 2.6317600786627335, "grad_norm": 24.25, "learning_rate": 3.6674048094613745e-07, "loss": 0.2905, "step": 5353 }, { "epoch": 2.6322517207472957, "grad_norm": 15.4375, "learning_rate": 3.6577661905476545e-07, "loss": 0.144, "step": 5354 }, { "epoch": 2.6327433628318584, "grad_norm": 25.125, "learning_rate": 3.648139717735824e-07, "loss": 0.3407, "step": 5355 }, { "epoch": 2.633235004916421, "grad_norm": 36.5, "learning_rate": 3.63852539385409e-07, "loss": 0.3606, "step": 5356 }, { "epoch": 2.6337266470009832, "grad_norm": 26.75, "learning_rate": 3.628923221727069e-07, "loss": 0.2866, "step": 5357 }, { "epoch": 2.6342182890855455, "grad_norm": 23.75, "learning_rate": 3.619333204175829e-07, "loss": 0.4422, "step": 5358 }, { "epoch": 2.634709931170108, "grad_norm": 33.75, "learning_rate": 3.609755344017865e-07, "loss": 0.3895, "step": 5359 }, { "epoch": 2.6352015732546707, "grad_norm": 16.375, "learning_rate": 3.600189644067088e-07, "loss": 0.2773, "step": 5360 }, { "epoch": 2.635693215339233, "grad_norm": 41.5, "learning_rate": 3.5906361071338444e-07, "loss": 0.3831, "step": 5361 }, { "epoch": 2.6361848574237956, "grad_norm": 42.25, "learning_rate": 3.5810947360249185e-07, "loss": 0.2488, "step": 5362 }, { "epoch": 2.636676499508358, "grad_norm": 11.8125, "learning_rate": 3.5715655335434934e-07, "loss": 0.1924, "step": 5363 }, { "epoch": 2.6371681415929205, "grad_norm": 20.25, "learning_rate": 3.562048502489189e-07, "loss": 0.2801, "step": 5364 }, { "epoch": 2.6376597836774827, "grad_norm": 17.125, "learning_rate": 3.552543645658065e-07, "loss": 0.3147, "step": 5365 }, { "epoch": 2.6381514257620453, "grad_norm": 29.875, "learning_rate": 3.5430509658425956e-07, "loss": 0.2397, "step": 5366 }, { "epoch": 2.6386430678466075, "grad_norm": 18.125, "learning_rate": 3.533570465831652e-07, "loss": 0.1614, "step": 5367 }, { "epoch": 2.63913470993117, "grad_norm": 25.875, "learning_rate": 3.524102148410575e-07, "loss": 0.2437, "step": 5368 }, { "epoch": 2.639626352015733, "grad_norm": 15.1875, "learning_rate": 3.514646016361075e-07, "loss": 0.0904, "step": 5369 }, { "epoch": 2.640117994100295, "grad_norm": 32.75, "learning_rate": 3.5052020724613295e-07, "loss": 0.3702, "step": 5370 }, { "epoch": 2.6406096361848572, "grad_norm": 10.0625, "learning_rate": 3.495770319485889e-07, "loss": 0.2145, "step": 5371 }, { "epoch": 2.64110127826942, "grad_norm": 45.0, "learning_rate": 3.4863507602057613e-07, "loss": 0.5815, "step": 5372 }, { "epoch": 2.6415929203539825, "grad_norm": 7.75, "learning_rate": 3.476943397388359e-07, "loss": 0.0738, "step": 5373 }, { "epoch": 2.6420845624385447, "grad_norm": 26.875, "learning_rate": 3.467548233797506e-07, "loss": 0.3268, "step": 5374 }, { "epoch": 2.642576204523107, "grad_norm": 12.1875, "learning_rate": 3.4581652721934433e-07, "loss": 0.1268, "step": 5375 }, { "epoch": 2.6430678466076696, "grad_norm": 24.25, "learning_rate": 3.448794515332832e-07, "loss": 0.3356, "step": 5376 }, { "epoch": 2.6435594886922322, "grad_norm": 23.5, "learning_rate": 3.4394359659687385e-07, "loss": 0.3024, "step": 5377 }, { "epoch": 2.6440511307767944, "grad_norm": 28.25, "learning_rate": 3.43008962685066e-07, "loss": 0.2263, "step": 5378 }, { "epoch": 2.644542772861357, "grad_norm": 29.625, "learning_rate": 3.420755500724469e-07, "loss": 0.3801, "step": 5379 }, { "epoch": 2.6450344149459193, "grad_norm": 15.6875, "learning_rate": 3.411433590332517e-07, "loss": 0.1143, "step": 5380 }, { "epoch": 2.645526057030482, "grad_norm": 20.0, "learning_rate": 3.402123898413494e-07, "loss": 0.326, "step": 5381 }, { "epoch": 2.646017699115044, "grad_norm": 35.0, "learning_rate": 3.392826427702546e-07, "loss": 0.3608, "step": 5382 }, { "epoch": 2.646509341199607, "grad_norm": 20.375, "learning_rate": 3.383541180931209e-07, "loss": 0.2597, "step": 5383 }, { "epoch": 2.647000983284169, "grad_norm": 9.375, "learning_rate": 3.37426816082744e-07, "loss": 0.2037, "step": 5384 }, { "epoch": 2.6474926253687316, "grad_norm": 24.375, "learning_rate": 3.3650073701155803e-07, "loss": 0.3346, "step": 5385 }, { "epoch": 2.647984267453294, "grad_norm": 47.25, "learning_rate": 3.355758811516413e-07, "loss": 0.4427, "step": 5386 }, { "epoch": 2.6484759095378565, "grad_norm": 25.625, "learning_rate": 3.3465224877471004e-07, "loss": 0.3002, "step": 5387 }, { "epoch": 2.6489675516224187, "grad_norm": 19.625, "learning_rate": 3.3372984015212263e-07, "loss": 0.2515, "step": 5388 }, { "epoch": 2.6494591937069814, "grad_norm": 25.625, "learning_rate": 3.328086555548763e-07, "loss": 0.3208, "step": 5389 }, { "epoch": 2.649950835791544, "grad_norm": 22.375, "learning_rate": 3.318886952536111e-07, "loss": 0.3334, "step": 5390 }, { "epoch": 2.650442477876106, "grad_norm": 14.9375, "learning_rate": 3.3096995951860377e-07, "loss": 0.2311, "step": 5391 }, { "epoch": 2.6509341199606684, "grad_norm": 28.875, "learning_rate": 3.300524486197754e-07, "loss": 0.2371, "step": 5392 }, { "epoch": 2.651425762045231, "grad_norm": 37.25, "learning_rate": 3.291361628266834e-07, "loss": 0.2545, "step": 5393 }, { "epoch": 2.6519174041297937, "grad_norm": 29.625, "learning_rate": 3.282211024085284e-07, "loss": 0.3567, "step": 5394 }, { "epoch": 2.652409046214356, "grad_norm": 46.75, "learning_rate": 3.273072676341489e-07, "loss": 0.4292, "step": 5395 }, { "epoch": 2.652900688298918, "grad_norm": 25.75, "learning_rate": 3.263946587720251e-07, "loss": 0.408, "step": 5396 }, { "epoch": 2.6533923303834808, "grad_norm": 15.4375, "learning_rate": 3.254832760902751e-07, "loss": 0.2456, "step": 5397 }, { "epoch": 2.6538839724680434, "grad_norm": 25.75, "learning_rate": 3.2457311985665803e-07, "loss": 0.414, "step": 5398 }, { "epoch": 2.6543756145526056, "grad_norm": 19.875, "learning_rate": 3.236641903385727e-07, "loss": 0.1879, "step": 5399 }, { "epoch": 2.6548672566371683, "grad_norm": 26.0, "learning_rate": 3.227564878030564e-07, "loss": 0.3464, "step": 5400 }, { "epoch": 2.6553588987217305, "grad_norm": 31.75, "learning_rate": 3.218500125167873e-07, "loss": 0.2972, "step": 5401 }, { "epoch": 2.655850540806293, "grad_norm": 25.625, "learning_rate": 3.209447647460829e-07, "loss": 0.2793, "step": 5402 }, { "epoch": 2.6563421828908553, "grad_norm": 17.75, "learning_rate": 3.200407447568985e-07, "loss": 0.1618, "step": 5403 }, { "epoch": 2.656833824975418, "grad_norm": 15.5625, "learning_rate": 3.1913795281483004e-07, "loss": 0.264, "step": 5404 }, { "epoch": 2.65732546705998, "grad_norm": 26.875, "learning_rate": 3.1823638918511344e-07, "loss": 0.2499, "step": 5405 }, { "epoch": 2.657817109144543, "grad_norm": 15.5625, "learning_rate": 3.173360541326213e-07, "loss": 0.348, "step": 5406 }, { "epoch": 2.6583087512291055, "grad_norm": 16.875, "learning_rate": 3.164369479218685e-07, "loss": 0.2094, "step": 5407 }, { "epoch": 2.6588003933136677, "grad_norm": 43.75, "learning_rate": 3.155390708170048e-07, "loss": 0.5679, "step": 5408 }, { "epoch": 2.65929203539823, "grad_norm": 38.5, "learning_rate": 3.1464242308182213e-07, "loss": 0.4752, "step": 5409 }, { "epoch": 2.6597836774827925, "grad_norm": 18.25, "learning_rate": 3.137470049797508e-07, "loss": 0.2478, "step": 5410 }, { "epoch": 2.660275319567355, "grad_norm": 17.5, "learning_rate": 3.1285281677385936e-07, "loss": 0.2093, "step": 5411 }, { "epoch": 2.6607669616519174, "grad_norm": 38.25, "learning_rate": 3.119598587268537e-07, "loss": 0.3622, "step": 5412 }, { "epoch": 2.6612586037364796, "grad_norm": 23.5, "learning_rate": 3.110681311010814e-07, "loss": 0.1809, "step": 5413 }, { "epoch": 2.6617502458210422, "grad_norm": 21.25, "learning_rate": 3.1017763415852495e-07, "loss": 0.2788, "step": 5414 }, { "epoch": 2.662241887905605, "grad_norm": 17.75, "learning_rate": 3.092883681608081e-07, "loss": 0.1907, "step": 5415 }, { "epoch": 2.662733529990167, "grad_norm": 13.8125, "learning_rate": 3.0840033336919077e-07, "loss": 0.1949, "step": 5416 }, { "epoch": 2.6632251720747298, "grad_norm": 11.875, "learning_rate": 3.0751353004457434e-07, "loss": 0.2394, "step": 5417 }, { "epoch": 2.663716814159292, "grad_norm": 15.4375, "learning_rate": 3.066279584474944e-07, "loss": 0.0962, "step": 5418 }, { "epoch": 2.6642084562438546, "grad_norm": 17.625, "learning_rate": 3.0574361883812823e-07, "loss": 0.2574, "step": 5419 }, { "epoch": 2.664700098328417, "grad_norm": 12.4375, "learning_rate": 3.048605114762876e-07, "loss": 0.2015, "step": 5420 }, { "epoch": 2.6651917404129795, "grad_norm": 27.375, "learning_rate": 3.039786366214265e-07, "loss": 0.45, "step": 5421 }, { "epoch": 2.6656833824975417, "grad_norm": 23.0, "learning_rate": 3.030979945326321e-07, "loss": 0.3418, "step": 5422 }, { "epoch": 2.6661750245821043, "grad_norm": 19.875, "learning_rate": 3.0221858546863296e-07, "loss": 0.1936, "step": 5423 }, { "epoch": 2.6666666666666665, "grad_norm": 20.625, "learning_rate": 3.0134040968779443e-07, "loss": 0.2798, "step": 5424 }, { "epoch": 2.667158308751229, "grad_norm": 50.25, "learning_rate": 3.004634674481196e-07, "loss": 0.3237, "step": 5425 }, { "epoch": 2.6676499508357914, "grad_norm": 8.9375, "learning_rate": 2.9958775900724787e-07, "loss": 0.0647, "step": 5426 }, { "epoch": 2.668141592920354, "grad_norm": 35.0, "learning_rate": 2.9871328462245853e-07, "loss": 0.4115, "step": 5427 }, { "epoch": 2.6686332350049167, "grad_norm": 11.875, "learning_rate": 2.9784004455066493e-07, "loss": 0.2382, "step": 5428 }, { "epoch": 2.669124877089479, "grad_norm": 15.6875, "learning_rate": 2.9696803904842193e-07, "loss": 0.2322, "step": 5429 }, { "epoch": 2.669616519174041, "grad_norm": 14.1875, "learning_rate": 2.9609726837191666e-07, "loss": 0.2052, "step": 5430 }, { "epoch": 2.6701081612586037, "grad_norm": 14.75, "learning_rate": 2.9522773277698037e-07, "loss": 0.2356, "step": 5431 }, { "epoch": 2.6705998033431664, "grad_norm": 31.625, "learning_rate": 2.9435943251907423e-07, "loss": 0.3255, "step": 5432 }, { "epoch": 2.6710914454277286, "grad_norm": 25.0, "learning_rate": 2.934923678533012e-07, "loss": 0.2662, "step": 5433 }, { "epoch": 2.671583087512291, "grad_norm": 21.0, "learning_rate": 2.92626539034399e-07, "loss": 0.367, "step": 5434 }, { "epoch": 2.6720747295968534, "grad_norm": 10.125, "learning_rate": 2.917619463167442e-07, "loss": 0.1893, "step": 5435 }, { "epoch": 2.672566371681416, "grad_norm": 21.875, "learning_rate": 2.9089858995434705e-07, "loss": 0.2076, "step": 5436 }, { "epoch": 2.6730580137659783, "grad_norm": 19.875, "learning_rate": 2.9003647020085716e-07, "loss": 0.3863, "step": 5437 }, { "epoch": 2.673549655850541, "grad_norm": 37.5, "learning_rate": 2.8917558730956093e-07, "loss": 0.1558, "step": 5438 }, { "epoch": 2.674041297935103, "grad_norm": 32.25, "learning_rate": 2.883159415333806e-07, "loss": 0.4445, "step": 5439 }, { "epoch": 2.674532940019666, "grad_norm": 22.0, "learning_rate": 2.874575331248745e-07, "loss": 0.1524, "step": 5440 }, { "epoch": 2.675024582104228, "grad_norm": 27.125, "learning_rate": 2.866003623362381e-07, "loss": 0.4747, "step": 5441 }, { "epoch": 2.6755162241887906, "grad_norm": 15.4375, "learning_rate": 2.857444294193023e-07, "loss": 0.2677, "step": 5442 }, { "epoch": 2.676007866273353, "grad_norm": 26.25, "learning_rate": 2.84889734625536e-07, "loss": 0.2501, "step": 5443 }, { "epoch": 2.6764995083579155, "grad_norm": 40.0, "learning_rate": 2.840362782060438e-07, "loss": 0.5026, "step": 5444 }, { "epoch": 2.676991150442478, "grad_norm": 19.875, "learning_rate": 2.8318406041156466e-07, "loss": 0.2978, "step": 5445 }, { "epoch": 2.6774827925270404, "grad_norm": 33.75, "learning_rate": 2.8233308149247625e-07, "loss": 0.3274, "step": 5446 }, { "epoch": 2.6779744346116026, "grad_norm": 23.0, "learning_rate": 2.814833416987912e-07, "loss": 0.368, "step": 5447 }, { "epoch": 2.678466076696165, "grad_norm": 26.125, "learning_rate": 2.806348412801567e-07, "loss": 0.1531, "step": 5448 }, { "epoch": 2.678957718780728, "grad_norm": 11.3125, "learning_rate": 2.797875804858584e-07, "loss": 0.2173, "step": 5449 }, { "epoch": 2.67944936086529, "grad_norm": 20.375, "learning_rate": 2.7894155956481663e-07, "loss": 0.1791, "step": 5450 }, { "epoch": 2.6799410029498523, "grad_norm": 26.125, "learning_rate": 2.7809677876558617e-07, "loss": 0.5247, "step": 5451 }, { "epoch": 2.680432645034415, "grad_norm": 24.625, "learning_rate": 2.7725323833635934e-07, "loss": 0.342, "step": 5452 }, { "epoch": 2.6809242871189776, "grad_norm": 12.375, "learning_rate": 2.76410938524962e-07, "loss": 0.2354, "step": 5453 }, { "epoch": 2.6814159292035398, "grad_norm": 45.0, "learning_rate": 2.755698795788592e-07, "loss": 0.5919, "step": 5454 }, { "epoch": 2.6819075712881024, "grad_norm": 25.125, "learning_rate": 2.7473006174514725e-07, "loss": 0.2525, "step": 5455 }, { "epoch": 2.6823992133726646, "grad_norm": 35.25, "learning_rate": 2.738914852705608e-07, "loss": 0.4408, "step": 5456 }, { "epoch": 2.6828908554572273, "grad_norm": 46.25, "learning_rate": 2.7305415040146725e-07, "loss": 0.2403, "step": 5457 }, { "epoch": 2.6833824975417895, "grad_norm": 13.1875, "learning_rate": 2.7221805738387187e-07, "loss": 0.2007, "step": 5458 }, { "epoch": 2.683874139626352, "grad_norm": 11.75, "learning_rate": 2.713832064634126e-07, "loss": 0.2132, "step": 5459 }, { "epoch": 2.6843657817109143, "grad_norm": 34.5, "learning_rate": 2.705495978853643e-07, "loss": 0.3467, "step": 5460 }, { "epoch": 2.684857423795477, "grad_norm": 16.0, "learning_rate": 2.69717231894636e-07, "loss": 0.2138, "step": 5461 }, { "epoch": 2.685349065880039, "grad_norm": 29.875, "learning_rate": 2.6888610873577314e-07, "loss": 0.5191, "step": 5462 }, { "epoch": 2.685840707964602, "grad_norm": 12.3125, "learning_rate": 2.6805622865295235e-07, "loss": 0.1801, "step": 5463 }, { "epoch": 2.686332350049164, "grad_norm": 31.125, "learning_rate": 2.672275918899902e-07, "loss": 0.424, "step": 5464 }, { "epoch": 2.6868239921337267, "grad_norm": 24.25, "learning_rate": 2.664001986903323e-07, "loss": 0.3094, "step": 5465 }, { "epoch": 2.6873156342182893, "grad_norm": 31.875, "learning_rate": 2.6557404929706487e-07, "loss": 0.3117, "step": 5466 }, { "epoch": 2.6878072763028515, "grad_norm": 22.125, "learning_rate": 2.647491439529021e-07, "loss": 0.2518, "step": 5467 }, { "epoch": 2.6882989183874137, "grad_norm": 44.25, "learning_rate": 2.6392548290020023e-07, "loss": 0.4083, "step": 5468 }, { "epoch": 2.6887905604719764, "grad_norm": 25.5, "learning_rate": 2.6310306638094266e-07, "loss": 0.2804, "step": 5469 }, { "epoch": 2.689282202556539, "grad_norm": 29.625, "learning_rate": 2.6228189463675313e-07, "loss": 0.2102, "step": 5470 }, { "epoch": 2.6897738446411013, "grad_norm": 28.375, "learning_rate": 2.614619679088851e-07, "loss": 0.3461, "step": 5471 }, { "epoch": 2.6902654867256635, "grad_norm": 23.625, "learning_rate": 2.6064328643822895e-07, "loss": 0.3268, "step": 5472 }, { "epoch": 2.690757128810226, "grad_norm": 37.0, "learning_rate": 2.5982585046530823e-07, "loss": 0.2626, "step": 5473 }, { "epoch": 2.6912487708947888, "grad_norm": 13.5625, "learning_rate": 2.590096602302808e-07, "loss": 0.1703, "step": 5474 }, { "epoch": 2.691740412979351, "grad_norm": 38.75, "learning_rate": 2.581947159729375e-07, "loss": 0.2854, "step": 5475 }, { "epoch": 2.6922320550639136, "grad_norm": 27.25, "learning_rate": 2.573810179327061e-07, "loss": 0.2881, "step": 5476 }, { "epoch": 2.692723697148476, "grad_norm": 24.25, "learning_rate": 2.565685663486451e-07, "loss": 0.2339, "step": 5477 }, { "epoch": 2.6932153392330385, "grad_norm": 17.75, "learning_rate": 2.557573614594483e-07, "loss": 0.4053, "step": 5478 }, { "epoch": 2.6937069813176007, "grad_norm": 27.5, "learning_rate": 2.549474035034418e-07, "loss": 0.4308, "step": 5479 }, { "epoch": 2.6941986234021633, "grad_norm": 19.75, "learning_rate": 2.541386927185881e-07, "loss": 0.2171, "step": 5480 }, { "epoch": 2.6946902654867255, "grad_norm": 19.125, "learning_rate": 2.533312293424798e-07, "loss": 0.3406, "step": 5481 }, { "epoch": 2.695181907571288, "grad_norm": 20.875, "learning_rate": 2.525250136123459e-07, "loss": 0.3835, "step": 5482 }, { "epoch": 2.6956735496558504, "grad_norm": 22.75, "learning_rate": 2.5172004576504766e-07, "loss": 0.2622, "step": 5483 }, { "epoch": 2.696165191740413, "grad_norm": 20.625, "learning_rate": 2.5091632603708076e-07, "loss": 0.1958, "step": 5484 }, { "epoch": 2.6966568338249752, "grad_norm": 13.25, "learning_rate": 2.5011385466457137e-07, "loss": 0.291, "step": 5485 }, { "epoch": 2.697148475909538, "grad_norm": 24.75, "learning_rate": 2.493126318832824e-07, "loss": 0.189, "step": 5486 }, { "epoch": 2.6976401179941005, "grad_norm": 32.0, "learning_rate": 2.4851265792860656e-07, "loss": 0.2461, "step": 5487 }, { "epoch": 2.6981317600786627, "grad_norm": 38.25, "learning_rate": 2.4771393303557333e-07, "loss": 0.3964, "step": 5488 }, { "epoch": 2.698623402163225, "grad_norm": 21.625, "learning_rate": 2.46916457438842e-07, "loss": 0.3661, "step": 5489 }, { "epoch": 2.6991150442477876, "grad_norm": 28.375, "learning_rate": 2.461202313727072e-07, "loss": 0.3571, "step": 5490 }, { "epoch": 2.6996066863323502, "grad_norm": 27.5, "learning_rate": 2.4532525507109474e-07, "loss": 0.4168, "step": 5491 }, { "epoch": 2.7000983284169124, "grad_norm": 11.5625, "learning_rate": 2.445315287675633e-07, "loss": 0.2548, "step": 5492 }, { "epoch": 2.700589970501475, "grad_norm": 30.625, "learning_rate": 2.437390526953069e-07, "loss": 0.3647, "step": 5493 }, { "epoch": 2.7010816125860373, "grad_norm": 26.875, "learning_rate": 2.4294782708714825e-07, "loss": 0.1603, "step": 5494 }, { "epoch": 2.7015732546706, "grad_norm": 50.0, "learning_rate": 2.4215785217554645e-07, "loss": 0.2687, "step": 5495 }, { "epoch": 2.702064896755162, "grad_norm": 32.75, "learning_rate": 2.413691281925898e-07, "loss": 0.2021, "step": 5496 }, { "epoch": 2.702556538839725, "grad_norm": 17.75, "learning_rate": 2.4058165537000095e-07, "loss": 0.3697, "step": 5497 }, { "epoch": 2.703048180924287, "grad_norm": 23.75, "learning_rate": 2.3979543393913584e-07, "loss": 0.2376, "step": 5498 }, { "epoch": 2.7035398230088497, "grad_norm": 16.5, "learning_rate": 2.390104641309812e-07, "loss": 0.2481, "step": 5499 }, { "epoch": 2.704031465093412, "grad_norm": 8.0625, "learning_rate": 2.3822674617615557e-07, "loss": 0.2084, "step": 5500 }, { "epoch": 2.7045231071779745, "grad_norm": 25.625, "learning_rate": 2.3744428030491228e-07, "loss": 0.3058, "step": 5501 }, { "epoch": 2.7050147492625367, "grad_norm": 17.875, "learning_rate": 2.3666306674713339e-07, "loss": 0.2271, "step": 5502 }, { "epoch": 2.7055063913470994, "grad_norm": 12.0625, "learning_rate": 2.3588310573233635e-07, "loss": 0.2592, "step": 5503 }, { "epoch": 2.705998033431662, "grad_norm": 20.875, "learning_rate": 2.3510439748966683e-07, "loss": 0.1683, "step": 5504 }, { "epoch": 2.706489675516224, "grad_norm": 19.25, "learning_rate": 2.3432694224790736e-07, "loss": 0.3543, "step": 5505 }, { "epoch": 2.7069813176007864, "grad_norm": 26.75, "learning_rate": 2.3355074023546768e-07, "loss": 0.3248, "step": 5506 }, { "epoch": 2.707472959685349, "grad_norm": 16.5, "learning_rate": 2.3277579168039293e-07, "loss": 0.4293, "step": 5507 }, { "epoch": 2.7079646017699117, "grad_norm": 20.75, "learning_rate": 2.3200209681035644e-07, "loss": 0.3843, "step": 5508 }, { "epoch": 2.708456243854474, "grad_norm": 37.75, "learning_rate": 2.3122965585266742e-07, "loss": 0.2541, "step": 5509 }, { "epoch": 2.708947885939036, "grad_norm": 32.75, "learning_rate": 2.3045846903426232e-07, "loss": 0.39, "step": 5510 }, { "epoch": 2.7094395280235988, "grad_norm": 18.75, "learning_rate": 2.2968853658171192e-07, "loss": 0.3239, "step": 5511 }, { "epoch": 2.7099311701081614, "grad_norm": 23.25, "learning_rate": 2.2891985872121822e-07, "loss": 0.2491, "step": 5512 }, { "epoch": 2.7104228121927236, "grad_norm": 7.96875, "learning_rate": 2.2815243567861467e-07, "loss": 0.2509, "step": 5513 }, { "epoch": 2.7109144542772863, "grad_norm": 10.3125, "learning_rate": 2.2738626767936432e-07, "loss": 0.2959, "step": 5514 }, { "epoch": 2.7114060963618485, "grad_norm": 25.875, "learning_rate": 2.2662135494856368e-07, "loss": 0.3263, "step": 5515 }, { "epoch": 2.711897738446411, "grad_norm": 14.4375, "learning_rate": 2.2585769771093844e-07, "loss": 0.1996, "step": 5516 }, { "epoch": 2.7123893805309733, "grad_norm": 29.25, "learning_rate": 2.2509529619084818e-07, "loss": 0.2604, "step": 5517 }, { "epoch": 2.712881022615536, "grad_norm": 29.75, "learning_rate": 2.243341506122802e-07, "loss": 0.4284, "step": 5518 }, { "epoch": 2.713372664700098, "grad_norm": 22.0, "learning_rate": 2.235742611988556e-07, "loss": 0.1919, "step": 5519 }, { "epoch": 2.713864306784661, "grad_norm": 38.5, "learning_rate": 2.2281562817382532e-07, "loss": 0.3423, "step": 5520 }, { "epoch": 2.714355948869223, "grad_norm": 33.0, "learning_rate": 2.2205825176007112e-07, "loss": 0.3863, "step": 5521 }, { "epoch": 2.7148475909537857, "grad_norm": 14.25, "learning_rate": 2.2130213218010542e-07, "loss": 0.2565, "step": 5522 }, { "epoch": 2.715339233038348, "grad_norm": 12.4375, "learning_rate": 2.205472696560721e-07, "loss": 0.2014, "step": 5523 }, { "epoch": 2.7158308751229105, "grad_norm": 43.0, "learning_rate": 2.1979366440974469e-07, "loss": 0.2948, "step": 5524 }, { "epoch": 2.716322517207473, "grad_norm": 22.875, "learning_rate": 2.1904131666252764e-07, "loss": 0.202, "step": 5525 }, { "epoch": 2.7168141592920354, "grad_norm": 42.0, "learning_rate": 2.1829022663545712e-07, "loss": 0.262, "step": 5526 }, { "epoch": 2.7173058013765976, "grad_norm": 16.5, "learning_rate": 2.175403945491996e-07, "loss": 0.1971, "step": 5527 }, { "epoch": 2.7177974434611603, "grad_norm": 12.5625, "learning_rate": 2.167918206240494e-07, "loss": 0.3264, "step": 5528 }, { "epoch": 2.718289085545723, "grad_norm": 30.875, "learning_rate": 2.1604450507993456e-07, "loss": 0.2985, "step": 5529 }, { "epoch": 2.718780727630285, "grad_norm": 13.3125, "learning_rate": 2.1529844813641048e-07, "loss": 0.1097, "step": 5530 }, { "epoch": 2.7192723697148473, "grad_norm": 50.5, "learning_rate": 2.145536500126653e-07, "loss": 0.5264, "step": 5531 }, { "epoch": 2.71976401179941, "grad_norm": 35.25, "learning_rate": 2.1381011092751652e-07, "loss": 0.2861, "step": 5532 }, { "epoch": 2.7202556538839726, "grad_norm": 10.6875, "learning_rate": 2.1306783109941036e-07, "loss": 0.2025, "step": 5533 }, { "epoch": 2.720747295968535, "grad_norm": 24.375, "learning_rate": 2.123268107464249e-07, "loss": 0.2138, "step": 5534 }, { "epoch": 2.7212389380530975, "grad_norm": 18.0, "learning_rate": 2.115870500862685e-07, "loss": 0.3213, "step": 5535 }, { "epoch": 2.7217305801376597, "grad_norm": 49.75, "learning_rate": 2.1084854933627628e-07, "loss": 0.4847, "step": 5536 }, { "epoch": 2.7222222222222223, "grad_norm": 17.0, "learning_rate": 2.1011130871341673e-07, "loss": 0.2285, "step": 5537 }, { "epoch": 2.7227138643067845, "grad_norm": 50.0, "learning_rate": 2.0937532843428703e-07, "loss": 0.4441, "step": 5538 }, { "epoch": 2.723205506391347, "grad_norm": 27.25, "learning_rate": 2.0864060871511276e-07, "loss": 0.3234, "step": 5539 }, { "epoch": 2.7236971484759094, "grad_norm": 23.5, "learning_rate": 2.0790714977175172e-07, "loss": 0.2798, "step": 5540 }, { "epoch": 2.724188790560472, "grad_norm": 29.625, "learning_rate": 2.071749518196871e-07, "loss": 0.3368, "step": 5541 }, { "epoch": 2.7246804326450347, "grad_norm": 81.5, "learning_rate": 2.0644401507403726e-07, "loss": 0.3561, "step": 5542 }, { "epoch": 2.725172074729597, "grad_norm": 34.0, "learning_rate": 2.0571433974954496e-07, "loss": 0.3702, "step": 5543 }, { "epoch": 2.725663716814159, "grad_norm": 24.875, "learning_rate": 2.049859260605862e-07, "loss": 0.1944, "step": 5544 }, { "epoch": 2.7261553588987217, "grad_norm": 23.75, "learning_rate": 2.042587742211628e-07, "loss": 0.18, "step": 5545 }, { "epoch": 2.7266470009832844, "grad_norm": 23.625, "learning_rate": 2.0353288444490885e-07, "loss": 0.2586, "step": 5546 }, { "epoch": 2.7271386430678466, "grad_norm": 17.375, "learning_rate": 2.028082569450858e-07, "loss": 0.2686, "step": 5547 }, { "epoch": 2.727630285152409, "grad_norm": 22.625, "learning_rate": 2.0208489193458534e-07, "loss": 0.32, "step": 5548 }, { "epoch": 2.7281219272369714, "grad_norm": 42.75, "learning_rate": 2.0136278962592747e-07, "loss": 0.4376, "step": 5549 }, { "epoch": 2.728613569321534, "grad_norm": 19.25, "learning_rate": 2.0064195023126197e-07, "loss": 0.1807, "step": 5550 }, { "epoch": 2.7291052114060963, "grad_norm": 19.875, "learning_rate": 1.9992237396236645e-07, "loss": 0.2409, "step": 5551 }, { "epoch": 2.729596853490659, "grad_norm": 26.75, "learning_rate": 1.992040610306493e-07, "loss": 0.4165, "step": 5552 }, { "epoch": 2.730088495575221, "grad_norm": 21.75, "learning_rate": 1.9848701164714522e-07, "loss": 0.12, "step": 5553 }, { "epoch": 2.730580137659784, "grad_norm": 17.875, "learning_rate": 1.9777122602252017e-07, "loss": 0.1771, "step": 5554 }, { "epoch": 2.731071779744346, "grad_norm": 19.75, "learning_rate": 1.9705670436706697e-07, "loss": 0.4148, "step": 5555 }, { "epoch": 2.7315634218289087, "grad_norm": 31.0, "learning_rate": 1.9634344689070766e-07, "loss": 0.4104, "step": 5556 }, { "epoch": 2.732055063913471, "grad_norm": 40.25, "learning_rate": 1.9563145380299363e-07, "loss": 0.6518, "step": 5557 }, { "epoch": 2.7325467059980335, "grad_norm": 33.75, "learning_rate": 1.9492072531310457e-07, "loss": 0.281, "step": 5558 }, { "epoch": 2.7330383480825957, "grad_norm": 46.75, "learning_rate": 1.9421126162984693e-07, "loss": 0.4913, "step": 5559 }, { "epoch": 2.7335299901671584, "grad_norm": 12.875, "learning_rate": 1.9350306296165847e-07, "loss": 0.1555, "step": 5560 }, { "epoch": 2.7340216322517206, "grad_norm": 29.375, "learning_rate": 1.9279612951660232e-07, "loss": 0.3601, "step": 5561 }, { "epoch": 2.734513274336283, "grad_norm": 12.9375, "learning_rate": 1.9209046150237225e-07, "loss": 0.3989, "step": 5562 }, { "epoch": 2.735004916420846, "grad_norm": 17.875, "learning_rate": 1.9138605912628797e-07, "loss": 0.322, "step": 5563 }, { "epoch": 2.735496558505408, "grad_norm": 21.875, "learning_rate": 1.9068292259530145e-07, "loss": 0.2473, "step": 5564 }, { "epoch": 2.7359882005899703, "grad_norm": 18.625, "learning_rate": 1.8998105211598694e-07, "loss": 0.3376, "step": 5565 }, { "epoch": 2.736479842674533, "grad_norm": 22.75, "learning_rate": 1.8928044789455247e-07, "loss": 0.3968, "step": 5566 }, { "epoch": 2.7369714847590956, "grad_norm": 28.75, "learning_rate": 1.8858111013682887e-07, "loss": 0.3632, "step": 5567 }, { "epoch": 2.737463126843658, "grad_norm": 30.625, "learning_rate": 1.8788303904827937e-07, "loss": 0.3706, "step": 5568 }, { "epoch": 2.73795476892822, "grad_norm": 37.25, "learning_rate": 1.8718623483399184e-07, "loss": 0.4217, "step": 5569 }, { "epoch": 2.7384464110127826, "grad_norm": 37.25, "learning_rate": 1.8649069769868403e-07, "loss": 0.4784, "step": 5570 }, { "epoch": 2.7389380530973453, "grad_norm": 43.0, "learning_rate": 1.8579642784670003e-07, "loss": 0.216, "step": 5571 }, { "epoch": 2.7394296951819075, "grad_norm": 12.1875, "learning_rate": 1.8510342548201264e-07, "loss": 0.2305, "step": 5572 }, { "epoch": 2.73992133726647, "grad_norm": 32.75, "learning_rate": 1.844116908082215e-07, "loss": 0.2408, "step": 5573 }, { "epoch": 2.7404129793510323, "grad_norm": 25.25, "learning_rate": 1.8372122402855507e-07, "loss": 0.2345, "step": 5574 }, { "epoch": 2.740904621435595, "grad_norm": 38.25, "learning_rate": 1.8303202534586656e-07, "loss": 0.3547, "step": 5575 }, { "epoch": 2.741396263520157, "grad_norm": 45.25, "learning_rate": 1.8234409496263948e-07, "loss": 0.4246, "step": 5576 }, { "epoch": 2.74188790560472, "grad_norm": 26.375, "learning_rate": 1.816574330809842e-07, "loss": 0.2336, "step": 5577 }, { "epoch": 2.742379547689282, "grad_norm": 29.375, "learning_rate": 1.8097203990263685e-07, "loss": 0.2147, "step": 5578 }, { "epoch": 2.7428711897738447, "grad_norm": 25.0, "learning_rate": 1.802879156289623e-07, "loss": 0.3138, "step": 5579 }, { "epoch": 2.7433628318584073, "grad_norm": 21.375, "learning_rate": 1.7960506046095232e-07, "loss": 0.366, "step": 5580 }, { "epoch": 2.7438544739429696, "grad_norm": 15.8125, "learning_rate": 1.7892347459922593e-07, "loss": 0.1931, "step": 5581 }, { "epoch": 2.7443461160275318, "grad_norm": 27.5, "learning_rate": 1.7824315824402792e-07, "loss": 0.5591, "step": 5582 }, { "epoch": 2.7448377581120944, "grad_norm": 40.5, "learning_rate": 1.7756411159523294e-07, "loss": 0.4436, "step": 5583 }, { "epoch": 2.745329400196657, "grad_norm": 17.0, "learning_rate": 1.768863348523389e-07, "loss": 0.3212, "step": 5584 }, { "epoch": 2.7458210422812193, "grad_norm": 14.5, "learning_rate": 1.762098282144735e-07, "loss": 0.1815, "step": 5585 }, { "epoch": 2.7463126843657815, "grad_norm": 16.25, "learning_rate": 1.755345918803908e-07, "loss": 0.1948, "step": 5586 }, { "epoch": 2.746804326450344, "grad_norm": 25.0, "learning_rate": 1.7486062604847102e-07, "loss": 0.3592, "step": 5587 }, { "epoch": 2.7472959685349068, "grad_norm": 40.25, "learning_rate": 1.7418793091672037e-07, "loss": 0.3624, "step": 5588 }, { "epoch": 2.747787610619469, "grad_norm": 14.5625, "learning_rate": 1.7351650668277419e-07, "loss": 0.2608, "step": 5589 }, { "epoch": 2.7482792527040316, "grad_norm": 30.25, "learning_rate": 1.7284635354389123e-07, "loss": 0.4225, "step": 5590 }, { "epoch": 2.748770894788594, "grad_norm": 35.5, "learning_rate": 1.7217747169695997e-07, "loss": 0.3433, "step": 5591 }, { "epoch": 2.7492625368731565, "grad_norm": 19.5, "learning_rate": 1.7150986133849224e-07, "loss": 0.3225, "step": 5592 }, { "epoch": 2.7497541789577187, "grad_norm": 9.3125, "learning_rate": 1.708435226646296e-07, "loss": 0.1487, "step": 5593 }, { "epoch": 2.7502458210422813, "grad_norm": 20.125, "learning_rate": 1.70178455871138e-07, "loss": 0.2252, "step": 5594 }, { "epoch": 2.7507374631268435, "grad_norm": 13.8125, "learning_rate": 1.6951466115341014e-07, "loss": 0.1738, "step": 5595 }, { "epoch": 2.751229105211406, "grad_norm": 31.75, "learning_rate": 1.68852138706464e-07, "loss": 0.3397, "step": 5596 }, { "epoch": 2.7517207472959684, "grad_norm": 27.75, "learning_rate": 1.6819088872494587e-07, "loss": 0.3399, "step": 5597 }, { "epoch": 2.752212389380531, "grad_norm": 20.0, "learning_rate": 1.6753091140312637e-07, "loss": 0.2517, "step": 5598 }, { "epoch": 2.7527040314650932, "grad_norm": 54.5, "learning_rate": 1.668722069349039e-07, "loss": 0.4063, "step": 5599 }, { "epoch": 2.753195673549656, "grad_norm": 28.375, "learning_rate": 1.6621477551379966e-07, "loss": 0.2669, "step": 5600 }, { "epoch": 2.7536873156342185, "grad_norm": 52.25, "learning_rate": 1.6555861733296563e-07, "loss": 0.5837, "step": 5601 }, { "epoch": 2.7541789577187807, "grad_norm": 19.5, "learning_rate": 1.649037325851756e-07, "loss": 0.2072, "step": 5602 }, { "epoch": 2.754670599803343, "grad_norm": 19.5, "learning_rate": 1.642501214628322e-07, "loss": 0.2927, "step": 5603 }, { "epoch": 2.7551622418879056, "grad_norm": 35.0, "learning_rate": 1.6359778415796078e-07, "loss": 0.3523, "step": 5604 }, { "epoch": 2.7556538839724682, "grad_norm": 15.3125, "learning_rate": 1.6294672086221556e-07, "loss": 0.1563, "step": 5605 }, { "epoch": 2.7561455260570304, "grad_norm": 24.75, "learning_rate": 1.6229693176687404e-07, "loss": 0.1604, "step": 5606 }, { "epoch": 2.7566371681415927, "grad_norm": 26.875, "learning_rate": 1.61648417062841e-07, "loss": 0.3435, "step": 5607 }, { "epoch": 2.7571288102261553, "grad_norm": 31.125, "learning_rate": 1.6100117694064552e-07, "loss": 0.3369, "step": 5608 }, { "epoch": 2.757620452310718, "grad_norm": 12.875, "learning_rate": 1.6035521159044402e-07, "loss": 0.2299, "step": 5609 }, { "epoch": 2.75811209439528, "grad_norm": 21.75, "learning_rate": 1.5971052120201667e-07, "loss": 0.3302, "step": 5610 }, { "epoch": 2.758603736479843, "grad_norm": 17.625, "learning_rate": 1.5906710596476948e-07, "loss": 0.3698, "step": 5611 }, { "epoch": 2.759095378564405, "grad_norm": 56.0, "learning_rate": 1.584249660677337e-07, "loss": 0.327, "step": 5612 }, { "epoch": 2.7595870206489677, "grad_norm": 28.75, "learning_rate": 1.5778410169956697e-07, "loss": 0.3325, "step": 5613 }, { "epoch": 2.76007866273353, "grad_norm": 22.125, "learning_rate": 1.5714451304855116e-07, "loss": 0.2967, "step": 5614 }, { "epoch": 2.7605703048180925, "grad_norm": 43.75, "learning_rate": 1.5650620030259394e-07, "loss": 0.3739, "step": 5615 }, { "epoch": 2.7610619469026547, "grad_norm": 22.5, "learning_rate": 1.558691636492268e-07, "loss": 0.2428, "step": 5616 }, { "epoch": 2.7615535889872174, "grad_norm": 11.375, "learning_rate": 1.552334032756086e-07, "loss": 0.154, "step": 5617 }, { "epoch": 2.76204523107178, "grad_norm": 19.25, "learning_rate": 1.5459891936852034e-07, "loss": 0.2384, "step": 5618 }, { "epoch": 2.762536873156342, "grad_norm": 26.125, "learning_rate": 1.5396571211437095e-07, "loss": 0.2372, "step": 5619 }, { "epoch": 2.7630285152409044, "grad_norm": 20.0, "learning_rate": 1.533337816991931e-07, "loss": 0.1972, "step": 5620 }, { "epoch": 2.763520157325467, "grad_norm": 15.125, "learning_rate": 1.527031283086428e-07, "loss": 0.1325, "step": 5621 }, { "epoch": 2.7640117994100297, "grad_norm": 45.25, "learning_rate": 1.5207375212800334e-07, "loss": 0.3971, "step": 5622 }, { "epoch": 2.764503441494592, "grad_norm": 18.5, "learning_rate": 1.514456533421818e-07, "loss": 0.2523, "step": 5623 }, { "epoch": 2.764995083579154, "grad_norm": 29.375, "learning_rate": 1.5081883213570907e-07, "loss": 0.2464, "step": 5624 }, { "epoch": 2.765486725663717, "grad_norm": 31.625, "learning_rate": 1.5019328869274186e-07, "loss": 0.4386, "step": 5625 }, { "epoch": 2.7659783677482794, "grad_norm": 40.75, "learning_rate": 1.4956902319706162e-07, "loss": 0.411, "step": 5626 }, { "epoch": 2.7664700098328416, "grad_norm": 33.5, "learning_rate": 1.4894603583207266e-07, "loss": 0.2752, "step": 5627 }, { "epoch": 2.7669616519174043, "grad_norm": 12.3125, "learning_rate": 1.483243267808066e-07, "loss": 0.174, "step": 5628 }, { "epoch": 2.7674532940019665, "grad_norm": 18.625, "learning_rate": 1.4770389622591528e-07, "loss": 0.2977, "step": 5629 }, { "epoch": 2.767944936086529, "grad_norm": 13.375, "learning_rate": 1.4708474434968095e-07, "loss": 0.1986, "step": 5630 }, { "epoch": 2.7684365781710913, "grad_norm": 43.5, "learning_rate": 1.4646687133400356e-07, "loss": 0.4648, "step": 5631 }, { "epoch": 2.768928220255654, "grad_norm": 33.75, "learning_rate": 1.458502773604129e-07, "loss": 0.3808, "step": 5632 }, { "epoch": 2.769419862340216, "grad_norm": 19.5, "learning_rate": 1.4523496261005908e-07, "loss": 0.2385, "step": 5633 }, { "epoch": 2.769911504424779, "grad_norm": 28.25, "learning_rate": 1.446209272637185e-07, "loss": 0.3481, "step": 5634 }, { "epoch": 2.770403146509341, "grad_norm": 14.875, "learning_rate": 1.4400817150179085e-07, "loss": 0.2516, "step": 5635 }, { "epoch": 2.7708947885939037, "grad_norm": 21.125, "learning_rate": 1.4339669550430013e-07, "loss": 0.2125, "step": 5636 }, { "epoch": 2.771386430678466, "grad_norm": 20.375, "learning_rate": 1.4278649945089462e-07, "loss": 0.308, "step": 5637 }, { "epoch": 2.7718780727630286, "grad_norm": 18.375, "learning_rate": 1.4217758352084642e-07, "loss": 0.2932, "step": 5638 }, { "epoch": 2.772369714847591, "grad_norm": 26.5, "learning_rate": 1.4156994789305093e-07, "loss": 0.3462, "step": 5639 }, { "epoch": 2.7728613569321534, "grad_norm": 11.3125, "learning_rate": 1.409635927460284e-07, "loss": 0.1676, "step": 5640 }, { "epoch": 2.7733529990167156, "grad_norm": 24.625, "learning_rate": 1.4035851825792174e-07, "loss": 0.3124, "step": 5641 }, { "epoch": 2.7738446411012783, "grad_norm": 38.75, "learning_rate": 1.397547246064983e-07, "loss": 0.2742, "step": 5642 }, { "epoch": 2.774336283185841, "grad_norm": 13.875, "learning_rate": 1.3915221196914969e-07, "loss": 0.2834, "step": 5643 }, { "epoch": 2.774827925270403, "grad_norm": 45.25, "learning_rate": 1.3855098052288928e-07, "loss": 0.3428, "step": 5644 }, { "epoch": 2.7753195673549653, "grad_norm": 20.5, "learning_rate": 1.379510304443568e-07, "loss": 0.1188, "step": 5645 }, { "epoch": 2.775811209439528, "grad_norm": 16.875, "learning_rate": 1.3735236190981327e-07, "loss": 0.3254, "step": 5646 }, { "epoch": 2.7763028515240906, "grad_norm": 31.5, "learning_rate": 1.3675497509514395e-07, "loss": 0.4358, "step": 5647 }, { "epoch": 2.776794493608653, "grad_norm": 37.75, "learning_rate": 1.36158870175858e-07, "loss": 0.4308, "step": 5648 }, { "epoch": 2.7772861356932155, "grad_norm": 23.125, "learning_rate": 1.3556404732708627e-07, "loss": 0.3613, "step": 5649 }, { "epoch": 2.7777777777777777, "grad_norm": 15.0625, "learning_rate": 1.3497050672358552e-07, "loss": 0.2753, "step": 5650 }, { "epoch": 2.7782694198623403, "grad_norm": 26.375, "learning_rate": 1.3437824853973274e-07, "loss": 0.4502, "step": 5651 }, { "epoch": 2.7787610619469025, "grad_norm": 23.0, "learning_rate": 1.3378727294953226e-07, "loss": 0.3052, "step": 5652 }, { "epoch": 2.779252704031465, "grad_norm": 36.0, "learning_rate": 1.3319758012660767e-07, "loss": 0.278, "step": 5653 }, { "epoch": 2.7797443461160274, "grad_norm": 13.1875, "learning_rate": 1.3260917024420738e-07, "loss": 0.2144, "step": 5654 }, { "epoch": 2.78023598820059, "grad_norm": 27.625, "learning_rate": 1.3202204347520265e-07, "loss": 0.2274, "step": 5655 }, { "epoch": 2.7807276302851527, "grad_norm": 14.375, "learning_rate": 1.3143619999208844e-07, "loss": 0.2696, "step": 5656 }, { "epoch": 2.781219272369715, "grad_norm": 20.5, "learning_rate": 1.3085163996698112e-07, "loss": 0.2737, "step": 5657 }, { "epoch": 2.781710914454277, "grad_norm": 14.125, "learning_rate": 1.302683635716213e-07, "loss": 0.3149, "step": 5658 }, { "epoch": 2.7822025565388397, "grad_norm": 23.25, "learning_rate": 1.296863709773719e-07, "loss": 0.249, "step": 5659 }, { "epoch": 2.7826941986234024, "grad_norm": 44.0, "learning_rate": 1.2910566235522014e-07, "loss": 0.4461, "step": 5660 }, { "epoch": 2.7831858407079646, "grad_norm": 45.25, "learning_rate": 1.2852623787577305e-07, "loss": 0.4516, "step": 5661 }, { "epoch": 2.783677482792527, "grad_norm": 16.625, "learning_rate": 1.2794809770926347e-07, "loss": 0.1746, "step": 5662 }, { "epoch": 2.7841691248770895, "grad_norm": 27.125, "learning_rate": 1.2737124202554452e-07, "loss": 0.263, "step": 5663 }, { "epoch": 2.784660766961652, "grad_norm": 22.25, "learning_rate": 1.267956709940936e-07, "loss": 0.2699, "step": 5664 }, { "epoch": 2.7851524090462143, "grad_norm": 35.75, "learning_rate": 1.262213847840095e-07, "loss": 0.2293, "step": 5665 }, { "epoch": 2.785644051130777, "grad_norm": 28.0, "learning_rate": 1.2564838356401476e-07, "loss": 0.2931, "step": 5666 }, { "epoch": 2.786135693215339, "grad_norm": 44.5, "learning_rate": 1.2507666750245268e-07, "loss": 0.3117, "step": 5667 }, { "epoch": 2.786627335299902, "grad_norm": 41.75, "learning_rate": 1.2450623676729088e-07, "loss": 0.7153, "step": 5668 }, { "epoch": 2.787118977384464, "grad_norm": 18.625, "learning_rate": 1.2393709152611937e-07, "loss": 0.3329, "step": 5669 }, { "epoch": 2.7876106194690267, "grad_norm": 23.5, "learning_rate": 1.2336923194614736e-07, "loss": 0.3115, "step": 5670 }, { "epoch": 2.788102261553589, "grad_norm": 24.125, "learning_rate": 1.2280265819421089e-07, "loss": 0.2852, "step": 5671 }, { "epoch": 2.7885939036381515, "grad_norm": 26.25, "learning_rate": 1.222373704367638e-07, "loss": 0.4101, "step": 5672 }, { "epoch": 2.7890855457227137, "grad_norm": 10.0625, "learning_rate": 1.2167336883988578e-07, "loss": 0.2378, "step": 5673 }, { "epoch": 2.7895771878072764, "grad_norm": 20.875, "learning_rate": 1.2111065356927675e-07, "loss": 0.1065, "step": 5674 }, { "epoch": 2.7900688298918386, "grad_norm": 27.25, "learning_rate": 1.2054922479025947e-07, "loss": 0.5074, "step": 5675 }, { "epoch": 2.7905604719764012, "grad_norm": 19.5, "learning_rate": 1.1998908266777752e-07, "loss": 0.26, "step": 5676 }, { "epoch": 2.791052114060964, "grad_norm": 11.4375, "learning_rate": 1.1943022736639824e-07, "loss": 0.2231, "step": 5677 }, { "epoch": 2.791543756145526, "grad_norm": 12.5625, "learning_rate": 1.1887265905030881e-07, "loss": 0.3176, "step": 5678 }, { "epoch": 2.7920353982300883, "grad_norm": 19.0, "learning_rate": 1.1831637788332117e-07, "loss": 0.2567, "step": 5679 }, { "epoch": 2.792527040314651, "grad_norm": 18.75, "learning_rate": 1.1776138402886561e-07, "loss": 0.2348, "step": 5680 }, { "epoch": 2.7930186823992136, "grad_norm": 17.625, "learning_rate": 1.1720767764999668e-07, "loss": 0.281, "step": 5681 }, { "epoch": 2.793510324483776, "grad_norm": 19.375, "learning_rate": 1.1665525890939078e-07, "loss": 0.1648, "step": 5682 }, { "epoch": 2.794001966568338, "grad_norm": 16.125, "learning_rate": 1.1610412796934455e-07, "loss": 0.1409, "step": 5683 }, { "epoch": 2.7944936086529006, "grad_norm": 29.875, "learning_rate": 1.155542849917765e-07, "loss": 0.3568, "step": 5684 }, { "epoch": 2.7949852507374633, "grad_norm": 16.375, "learning_rate": 1.150057301382289e-07, "loss": 0.301, "step": 5685 }, { "epoch": 2.7954768928220255, "grad_norm": 14.375, "learning_rate": 1.1445846356986184e-07, "loss": 0.2175, "step": 5686 }, { "epoch": 2.795968534906588, "grad_norm": 17.0, "learning_rate": 1.139124854474602e-07, "loss": 0.1668, "step": 5687 }, { "epoch": 2.7964601769911503, "grad_norm": 17.125, "learning_rate": 1.1336779593142816e-07, "loss": 0.2771, "step": 5688 }, { "epoch": 2.796951819075713, "grad_norm": 12.3125, "learning_rate": 1.1282439518179371e-07, "loss": 0.2433, "step": 5689 }, { "epoch": 2.797443461160275, "grad_norm": 37.75, "learning_rate": 1.1228228335820368e-07, "loss": 0.3625, "step": 5690 }, { "epoch": 2.797935103244838, "grad_norm": 12.75, "learning_rate": 1.1174146061992812e-07, "loss": 0.241, "step": 5691 }, { "epoch": 2.7984267453294, "grad_norm": 23.625, "learning_rate": 1.1120192712585597e-07, "loss": 0.3816, "step": 5692 }, { "epoch": 2.7989183874139627, "grad_norm": 17.0, "learning_rate": 1.1066368303450091e-07, "loss": 0.3454, "step": 5693 }, { "epoch": 2.799410029498525, "grad_norm": 42.75, "learning_rate": 1.1012672850399447e-07, "loss": 0.535, "step": 5694 }, { "epoch": 2.7999016715830876, "grad_norm": 19.875, "learning_rate": 1.0959106369209094e-07, "loss": 0.2902, "step": 5695 }, { "epoch": 2.8003933136676498, "grad_norm": 18.875, "learning_rate": 1.0905668875616542e-07, "loss": 0.1346, "step": 5696 }, { "epoch": 2.8008849557522124, "grad_norm": 49.5, "learning_rate": 1.0852360385321486e-07, "loss": 0.2586, "step": 5697 }, { "epoch": 2.801376597836775, "grad_norm": 21.875, "learning_rate": 1.0799180913985546e-07, "loss": 0.189, "step": 5698 }, { "epoch": 2.8018682399213373, "grad_norm": 37.0, "learning_rate": 1.0746130477232625e-07, "loss": 0.5456, "step": 5699 }, { "epoch": 2.8023598820058995, "grad_norm": 23.0, "learning_rate": 1.0693209090648504e-07, "loss": 0.2632, "step": 5700 }, { "epoch": 2.802851524090462, "grad_norm": 17.25, "learning_rate": 1.0640416769781297e-07, "loss": 0.3185, "step": 5701 }, { "epoch": 2.8033431661750248, "grad_norm": 50.5, "learning_rate": 1.0587753530140947e-07, "loss": 0.2122, "step": 5702 }, { "epoch": 2.803834808259587, "grad_norm": 54.5, "learning_rate": 1.053521938719968e-07, "loss": 0.489, "step": 5703 }, { "epoch": 2.8043264503441496, "grad_norm": 18.5, "learning_rate": 1.0482814356391746e-07, "loss": 0.3165, "step": 5704 }, { "epoch": 2.804818092428712, "grad_norm": 56.0, "learning_rate": 1.043053845311338e-07, "loss": 0.4737, "step": 5705 }, { "epoch": 2.8053097345132745, "grad_norm": 23.375, "learning_rate": 1.0378391692722899e-07, "loss": 0.3323, "step": 5706 }, { "epoch": 2.8058013765978367, "grad_norm": 22.75, "learning_rate": 1.0326374090540797e-07, "loss": 0.3147, "step": 5707 }, { "epoch": 2.8062930186823993, "grad_norm": 14.0625, "learning_rate": 1.0274485661849498e-07, "loss": 0.2492, "step": 5708 }, { "epoch": 2.8067846607669615, "grad_norm": 16.625, "learning_rate": 1.0222726421893458e-07, "loss": 0.2064, "step": 5709 }, { "epoch": 2.807276302851524, "grad_norm": 28.5, "learning_rate": 1.0171096385879264e-07, "loss": 0.258, "step": 5710 }, { "epoch": 2.8077679449360864, "grad_norm": 12.8125, "learning_rate": 1.0119595568975581e-07, "loss": 0.1877, "step": 5711 }, { "epoch": 2.808259587020649, "grad_norm": 34.75, "learning_rate": 1.0068223986312957e-07, "loss": 0.3302, "step": 5712 }, { "epoch": 2.8087512291052112, "grad_norm": 13.75, "learning_rate": 1.0016981652984069e-07, "loss": 0.3199, "step": 5713 }, { "epoch": 2.809242871189774, "grad_norm": 19.0, "learning_rate": 9.965868584043725e-08, "loss": 0.3384, "step": 5714 }, { "epoch": 2.8097345132743365, "grad_norm": 30.5, "learning_rate": 9.914884794508412e-08, "loss": 0.2866, "step": 5715 }, { "epoch": 2.8102261553588987, "grad_norm": 42.75, "learning_rate": 9.864030299357097e-08, "loss": 0.3376, "step": 5716 }, { "epoch": 2.810717797443461, "grad_norm": 24.125, "learning_rate": 9.813305113530329e-08, "loss": 0.2715, "step": 5717 }, { "epoch": 2.8112094395280236, "grad_norm": 23.0, "learning_rate": 9.76270925193104e-08, "loss": 0.3682, "step": 5718 }, { "epoch": 2.8117010816125863, "grad_norm": 25.875, "learning_rate": 9.712242729423887e-08, "loss": 0.2325, "step": 5719 }, { "epoch": 2.8121927236971485, "grad_norm": 35.75, "learning_rate": 9.66190556083571e-08, "loss": 0.3358, "step": 5720 }, { "epoch": 2.8126843657817107, "grad_norm": 33.75, "learning_rate": 9.61169776095513e-08, "loss": 0.4628, "step": 5721 }, { "epoch": 2.8131760078662733, "grad_norm": 14.6875, "learning_rate": 9.561619344533096e-08, "loss": 0.2952, "step": 5722 }, { "epoch": 2.813667649950836, "grad_norm": 30.5, "learning_rate": 9.51167032628214e-08, "loss": 0.2388, "step": 5723 }, { "epoch": 2.814159292035398, "grad_norm": 54.75, "learning_rate": 9.461850720877174e-08, "loss": 0.2054, "step": 5724 }, { "epoch": 2.814650934119961, "grad_norm": 15.375, "learning_rate": 9.41216054295469e-08, "loss": 0.2838, "step": 5725 }, { "epoch": 2.815142576204523, "grad_norm": 17.0, "learning_rate": 9.362599807113609e-08, "loss": 0.2341, "step": 5726 }, { "epoch": 2.8156342182890857, "grad_norm": 18.875, "learning_rate": 9.313168527914384e-08, "loss": 0.3423, "step": 5727 }, { "epoch": 2.816125860373648, "grad_norm": 23.625, "learning_rate": 9.263866719879749e-08, "loss": 0.3546, "step": 5728 }, { "epoch": 2.8166175024582105, "grad_norm": 20.125, "learning_rate": 9.214694397494216e-08, "loss": 0.2746, "step": 5729 }, { "epoch": 2.8171091445427727, "grad_norm": 9.875, "learning_rate": 9.165651575204432e-08, "loss": 0.1803, "step": 5730 }, { "epoch": 2.8176007866273354, "grad_norm": 15.375, "learning_rate": 9.116738267418667e-08, "loss": 0.3076, "step": 5731 }, { "epoch": 2.8180924287118976, "grad_norm": 22.875, "learning_rate": 9.067954488507579e-08, "loss": 0.2963, "step": 5732 }, { "epoch": 2.8185840707964602, "grad_norm": 31.875, "learning_rate": 9.019300252803403e-08, "loss": 0.2525, "step": 5733 }, { "epoch": 2.8190757128810224, "grad_norm": 17.125, "learning_rate": 8.970775574600554e-08, "loss": 0.296, "step": 5734 }, { "epoch": 2.819567354965585, "grad_norm": 17.125, "learning_rate": 8.922380468155277e-08, "loss": 0.3012, "step": 5735 }, { "epoch": 2.8200589970501477, "grad_norm": 15.4375, "learning_rate": 8.874114947685752e-08, "loss": 0.2652, "step": 5736 }, { "epoch": 2.82055063913471, "grad_norm": 21.75, "learning_rate": 8.825979027372083e-08, "loss": 0.2693, "step": 5737 }, { "epoch": 2.821042281219272, "grad_norm": 17.625, "learning_rate": 8.777972721356359e-08, "loss": 0.3038, "step": 5738 }, { "epoch": 2.821533923303835, "grad_norm": 14.6875, "learning_rate": 8.730096043742447e-08, "loss": 0.3019, "step": 5739 }, { "epoch": 2.8220255653883974, "grad_norm": 22.5, "learning_rate": 8.682349008596396e-08, "loss": 0.2972, "step": 5740 }, { "epoch": 2.8225172074729596, "grad_norm": 9.125, "learning_rate": 8.634731629945835e-08, "loss": 0.2985, "step": 5741 }, { "epoch": 2.823008849557522, "grad_norm": 21.5, "learning_rate": 8.587243921780574e-08, "loss": 0.323, "step": 5742 }, { "epoch": 2.8235004916420845, "grad_norm": 33.25, "learning_rate": 8.539885898052201e-08, "loss": 0.3483, "step": 5743 }, { "epoch": 2.823992133726647, "grad_norm": 25.75, "learning_rate": 8.492657572674189e-08, "loss": 0.2354, "step": 5744 }, { "epoch": 2.8244837758112094, "grad_norm": 15.1875, "learning_rate": 8.445558959521937e-08, "loss": 0.2747, "step": 5745 }, { "epoch": 2.824975417895772, "grad_norm": 18.0, "learning_rate": 8.398590072432777e-08, "loss": 0.4215, "step": 5746 }, { "epoch": 2.825467059980334, "grad_norm": 40.25, "learning_rate": 8.35175092520582e-08, "loss": 0.4734, "step": 5747 }, { "epoch": 2.825958702064897, "grad_norm": 18.125, "learning_rate": 8.305041531602258e-08, "loss": 0.1873, "step": 5748 }, { "epoch": 2.826450344149459, "grad_norm": 38.0, "learning_rate": 8.258461905344867e-08, "loss": 0.3531, "step": 5749 }, { "epoch": 2.8269419862340217, "grad_norm": 68.5, "learning_rate": 8.212012060118596e-08, "loss": 0.3442, "step": 5750 }, { "epoch": 2.827433628318584, "grad_norm": 27.625, "learning_rate": 8.16569200957008e-08, "loss": 0.2965, "step": 5751 }, { "epoch": 2.8279252704031466, "grad_norm": 20.75, "learning_rate": 8.119501767307835e-08, "loss": 0.3053, "step": 5752 }, { "epoch": 2.828416912487709, "grad_norm": 28.375, "learning_rate": 8.073441346902355e-08, "loss": 0.4224, "step": 5753 }, { "epoch": 2.8289085545722714, "grad_norm": 9.9375, "learning_rate": 8.027510761885814e-08, "loss": 0.2192, "step": 5754 }, { "epoch": 2.8294001966568336, "grad_norm": 17.375, "learning_rate": 7.98171002575247e-08, "loss": 0.1675, "step": 5755 }, { "epoch": 2.8298918387413963, "grad_norm": 16.25, "learning_rate": 7.936039151958207e-08, "loss": 0.2399, "step": 5756 }, { "epoch": 2.830383480825959, "grad_norm": 20.25, "learning_rate": 7.890498153920944e-08, "loss": 0.2976, "step": 5757 }, { "epoch": 2.830875122910521, "grad_norm": 17.75, "learning_rate": 7.845087045020277e-08, "loss": 0.2706, "step": 5758 }, { "epoch": 2.8313667649950833, "grad_norm": 11.3125, "learning_rate": 7.799805838597785e-08, "loss": 0.2487, "step": 5759 }, { "epoch": 2.831858407079646, "grad_norm": 19.375, "learning_rate": 7.754654547956774e-08, "loss": 0.3916, "step": 5760 }, { "epoch": 2.8323500491642086, "grad_norm": 17.5, "learning_rate": 7.709633186362386e-08, "loss": 0.1715, "step": 5761 }, { "epoch": 2.832841691248771, "grad_norm": 28.875, "learning_rate": 7.664741767041689e-08, "loss": 0.3072, "step": 5762 }, { "epoch": 2.8333333333333335, "grad_norm": 20.5, "learning_rate": 7.619980303183483e-08, "loss": 0.309, "step": 5763 }, { "epoch": 2.8338249754178957, "grad_norm": 19.25, "learning_rate": 7.575348807938448e-08, "loss": 0.2692, "step": 5764 }, { "epoch": 2.8343166175024583, "grad_norm": 13.1875, "learning_rate": 7.530847294419097e-08, "loss": 0.1994, "step": 5765 }, { "epoch": 2.8348082595870205, "grad_norm": 11.875, "learning_rate": 7.48647577569957e-08, "loss": 0.1733, "step": 5766 }, { "epoch": 2.835299901671583, "grad_norm": 22.0, "learning_rate": 7.442234264816089e-08, "loss": 0.1361, "step": 5767 }, { "epoch": 2.8357915437561454, "grad_norm": 42.5, "learning_rate": 7.398122774766458e-08, "loss": 0.385, "step": 5768 }, { "epoch": 2.836283185840708, "grad_norm": 10.4375, "learning_rate": 7.35414131851041e-08, "loss": 0.241, "step": 5769 }, { "epoch": 2.8367748279252702, "grad_norm": 34.25, "learning_rate": 7.310289908969409e-08, "loss": 0.272, "step": 5770 }, { "epoch": 2.837266470009833, "grad_norm": 11.5625, "learning_rate": 7.266568559026798e-08, "loss": 0.2014, "step": 5771 }, { "epoch": 2.837758112094395, "grad_norm": 23.125, "learning_rate": 7.222977281527554e-08, "loss": 0.2077, "step": 5772 }, { "epoch": 2.8382497541789578, "grad_norm": 8.75, "learning_rate": 7.179516089278682e-08, "loss": 0.16, "step": 5773 }, { "epoch": 2.8387413962635204, "grad_norm": 22.75, "learning_rate": 7.136184995048617e-08, "loss": 0.3317, "step": 5774 }, { "epoch": 2.8392330383480826, "grad_norm": 27.875, "learning_rate": 7.092984011567977e-08, "loss": 0.5121, "step": 5775 }, { "epoch": 2.839724680432645, "grad_norm": 13.125, "learning_rate": 7.049913151528758e-08, "loss": 0.2436, "step": 5776 }, { "epoch": 2.8402163225172075, "grad_norm": 17.125, "learning_rate": 7.00697242758514e-08, "loss": 0.2199, "step": 5777 }, { "epoch": 2.84070796460177, "grad_norm": 13.8125, "learning_rate": 6.964161852352679e-08, "loss": 0.2403, "step": 5778 }, { "epoch": 2.8411996066863323, "grad_norm": 20.5, "learning_rate": 6.921481438408966e-08, "loss": 0.2933, "step": 5779 }, { "epoch": 2.8416912487708945, "grad_norm": 10.25, "learning_rate": 6.87893119829317e-08, "loss": 0.1893, "step": 5780 }, { "epoch": 2.842182890855457, "grad_norm": 16.25, "learning_rate": 6.836511144506392e-08, "loss": 0.2643, "step": 5781 }, { "epoch": 2.84267453294002, "grad_norm": 24.875, "learning_rate": 6.79422128951131e-08, "loss": 0.2428, "step": 5782 }, { "epoch": 2.843166175024582, "grad_norm": 35.0, "learning_rate": 6.752061645732488e-08, "loss": 0.3152, "step": 5783 }, { "epoch": 2.8436578171091447, "grad_norm": 13.4375, "learning_rate": 6.710032225556068e-08, "loss": 0.2179, "step": 5784 }, { "epoch": 2.844149459193707, "grad_norm": 62.75, "learning_rate": 6.668133041330222e-08, "loss": 0.5045, "step": 5785 }, { "epoch": 2.8446411012782695, "grad_norm": 52.25, "learning_rate": 6.626364105364552e-08, "loss": 0.6652, "step": 5786 }, { "epoch": 2.8451327433628317, "grad_norm": 24.0, "learning_rate": 6.584725429930543e-08, "loss": 0.2959, "step": 5787 }, { "epoch": 2.8456243854473944, "grad_norm": 32.0, "learning_rate": 6.543217027261411e-08, "loss": 0.4791, "step": 5788 }, { "epoch": 2.8461160275319566, "grad_norm": 36.75, "learning_rate": 6.501838909552099e-08, "loss": 0.2358, "step": 5789 }, { "epoch": 2.8466076696165192, "grad_norm": 22.625, "learning_rate": 6.460591088959183e-08, "loss": 0.3076, "step": 5790 }, { "epoch": 2.847099311701082, "grad_norm": 20.25, "learning_rate": 6.419473577601071e-08, "loss": 0.2729, "step": 5791 }, { "epoch": 2.847590953785644, "grad_norm": 37.75, "learning_rate": 6.378486387557846e-08, "loss": 0.3577, "step": 5792 }, { "epoch": 2.8480825958702063, "grad_norm": 12.5625, "learning_rate": 6.33762953087128e-08, "loss": 0.2212, "step": 5793 }, { "epoch": 2.848574237954769, "grad_norm": 31.5, "learning_rate": 6.296903019544919e-08, "loss": 0.2313, "step": 5794 }, { "epoch": 2.8490658800393316, "grad_norm": 19.25, "learning_rate": 6.256306865543892e-08, "loss": 0.3991, "step": 5795 }, { "epoch": 2.849557522123894, "grad_norm": 6.6875, "learning_rate": 6.21584108079516e-08, "loss": 0.1028, "step": 5796 }, { "epoch": 2.850049164208456, "grad_norm": 15.0625, "learning_rate": 6.17550567718736e-08, "loss": 0.2726, "step": 5797 }, { "epoch": 2.8505408062930186, "grad_norm": 18.875, "learning_rate": 6.135300666570665e-08, "loss": 0.3017, "step": 5798 }, { "epoch": 2.8510324483775813, "grad_norm": 12.375, "learning_rate": 6.095226060757275e-08, "loss": 0.1536, "step": 5799 }, { "epoch": 2.8515240904621435, "grad_norm": 28.0, "learning_rate": 6.055281871520674e-08, "loss": 0.2802, "step": 5800 }, { "epoch": 2.852015732546706, "grad_norm": 59.75, "learning_rate": 6.015468110596273e-08, "loss": 0.3547, "step": 5801 }, { "epoch": 2.8525073746312684, "grad_norm": 27.75, "learning_rate": 5.975784789681216e-08, "loss": 0.4788, "step": 5802 }, { "epoch": 2.852999016715831, "grad_norm": 16.75, "learning_rate": 5.9362319204341264e-08, "loss": 0.2601, "step": 5803 }, { "epoch": 2.853490658800393, "grad_norm": 19.5, "learning_rate": 5.8968095144755095e-08, "loss": 0.2255, "step": 5804 }, { "epoch": 2.853982300884956, "grad_norm": 33.5, "learning_rate": 5.857517583387251e-08, "loss": 0.4623, "step": 5805 }, { "epoch": 2.854473942969518, "grad_norm": 20.375, "learning_rate": 5.8183561387132156e-08, "loss": 0.1925, "step": 5806 }, { "epoch": 2.8549655850540807, "grad_norm": 27.375, "learning_rate": 5.779325191958801e-08, "loss": 0.2791, "step": 5807 }, { "epoch": 2.855457227138643, "grad_norm": 21.5, "learning_rate": 5.740424754591084e-08, "loss": 0.4122, "step": 5808 }, { "epoch": 2.8559488692232056, "grad_norm": 13.75, "learning_rate": 5.701654838038672e-08, "loss": 0.284, "step": 5809 }, { "epoch": 2.8564405113077678, "grad_norm": 24.25, "learning_rate": 5.6630154536920554e-08, "loss": 0.2196, "step": 5810 }, { "epoch": 2.8569321533923304, "grad_norm": 18.75, "learning_rate": 5.624506612903152e-08, "loss": 0.2707, "step": 5811 }, { "epoch": 2.857423795476893, "grad_norm": 23.875, "learning_rate": 5.5861283269857624e-08, "loss": 0.2851, "step": 5812 }, { "epoch": 2.8579154375614553, "grad_norm": 19.5, "learning_rate": 5.547880607215017e-08, "loss": 0.2904, "step": 5813 }, { "epoch": 2.8584070796460175, "grad_norm": 35.0, "learning_rate": 5.509763464828027e-08, "loss": 0.4675, "step": 5814 }, { "epoch": 2.85889872173058, "grad_norm": 19.5, "learning_rate": 5.4717769110233354e-08, "loss": 0.2145, "step": 5815 }, { "epoch": 2.8593903638151428, "grad_norm": 17.875, "learning_rate": 5.4339209569611636e-08, "loss": 0.4268, "step": 5816 }, { "epoch": 2.859882005899705, "grad_norm": 23.625, "learning_rate": 5.396195613763316e-08, "loss": 0.2689, "step": 5817 }, { "epoch": 2.860373647984267, "grad_norm": 13.875, "learning_rate": 5.3586008925133266e-08, "loss": 0.2761, "step": 5818 }, { "epoch": 2.86086529006883, "grad_norm": 12.8125, "learning_rate": 5.32113680425626e-08, "loss": 0.2966, "step": 5819 }, { "epoch": 2.8613569321533925, "grad_norm": 17.375, "learning_rate": 5.2838033599989125e-08, "loss": 0.3203, "step": 5820 }, { "epoch": 2.8618485742379547, "grad_norm": 13.375, "learning_rate": 5.2466005707095587e-08, "loss": 0.3135, "step": 5821 }, { "epoch": 2.8623402163225173, "grad_norm": 14.9375, "learning_rate": 5.209528447318207e-08, "loss": 0.1879, "step": 5822 }, { "epoch": 2.8628318584070795, "grad_norm": 26.125, "learning_rate": 5.172587000716395e-08, "loss": 0.4246, "step": 5823 }, { "epoch": 2.863323500491642, "grad_norm": 28.25, "learning_rate": 5.1357762417572896e-08, "loss": 0.1911, "step": 5824 }, { "epoch": 2.8638151425762044, "grad_norm": 22.875, "learning_rate": 5.0990961812556914e-08, "loss": 0.3224, "step": 5825 }, { "epoch": 2.864306784660767, "grad_norm": 10.9375, "learning_rate": 5.06254682998803e-08, "loss": 0.2662, "step": 5826 }, { "epoch": 2.8647984267453293, "grad_norm": 18.125, "learning_rate": 5.0261281986921646e-08, "loss": 0.307, "step": 5827 }, { "epoch": 2.865290068829892, "grad_norm": 32.5, "learning_rate": 4.989840298067788e-08, "loss": 0.1957, "step": 5828 }, { "epoch": 2.8657817109144545, "grad_norm": 25.25, "learning_rate": 4.953683138775972e-08, "loss": 0.3389, "step": 5829 }, { "epoch": 2.8662733529990168, "grad_norm": 21.125, "learning_rate": 4.917656731439618e-08, "loss": 0.1588, "step": 5830 }, { "epoch": 2.866764995083579, "grad_norm": 10.8125, "learning_rate": 4.881761086642911e-08, "loss": 0.1812, "step": 5831 }, { "epoch": 2.8672566371681416, "grad_norm": 42.25, "learning_rate": 4.845996214931864e-08, "loss": 0.3339, "step": 5832 }, { "epoch": 2.8677482792527043, "grad_norm": 11.25, "learning_rate": 4.810362126813972e-08, "loss": 0.2036, "step": 5833 }, { "epoch": 2.8682399213372665, "grad_norm": 54.75, "learning_rate": 4.774858832758311e-08, "loss": 0.3516, "step": 5834 }, { "epoch": 2.8687315634218287, "grad_norm": 38.5, "learning_rate": 4.739486343195537e-08, "loss": 0.4097, "step": 5835 }, { "epoch": 2.8692232055063913, "grad_norm": 45.25, "learning_rate": 4.7042446685178876e-08, "loss": 0.5325, "step": 5836 }, { "epoch": 2.869714847590954, "grad_norm": 11.8125, "learning_rate": 4.66913381907913e-08, "loss": 0.1389, "step": 5837 }, { "epoch": 2.870206489675516, "grad_norm": 16.875, "learning_rate": 4.634153805194663e-08, "loss": 0.2579, "step": 5838 }, { "epoch": 2.870698131760079, "grad_norm": 22.625, "learning_rate": 4.599304637141366e-08, "loss": 0.3811, "step": 5839 }, { "epoch": 2.871189773844641, "grad_norm": 32.75, "learning_rate": 4.5645863251576994e-08, "loss": 0.3138, "step": 5840 }, { "epoch": 2.8716814159292037, "grad_norm": 22.75, "learning_rate": 4.529998879443803e-08, "loss": 0.2297, "step": 5841 }, { "epoch": 2.872173058013766, "grad_norm": 15.3125, "learning_rate": 4.4955423101611495e-08, "loss": 0.2587, "step": 5842 }, { "epoch": 2.8726647000983285, "grad_norm": 48.0, "learning_rate": 4.46121662743289e-08, "loss": 0.4673, "step": 5843 }, { "epoch": 2.8731563421828907, "grad_norm": 16.625, "learning_rate": 4.427021841343759e-08, "loss": 0.2093, "step": 5844 }, { "epoch": 2.8736479842674534, "grad_norm": 25.375, "learning_rate": 4.39295796193997e-08, "loss": 0.4003, "step": 5845 }, { "epoch": 2.8741396263520156, "grad_norm": 21.75, "learning_rate": 4.359024999229216e-08, "loss": 0.3757, "step": 5846 }, { "epoch": 2.8746312684365782, "grad_norm": 16.625, "learning_rate": 4.3252229631809247e-08, "loss": 0.2823, "step": 5847 }, { "epoch": 2.8751229105211404, "grad_norm": 30.625, "learning_rate": 4.291551863725801e-08, "loss": 0.1554, "step": 5848 }, { "epoch": 2.875614552605703, "grad_norm": 10.6875, "learning_rate": 4.258011710756332e-08, "loss": 0.2943, "step": 5849 }, { "epoch": 2.8761061946902657, "grad_norm": 15.9375, "learning_rate": 4.2246025141262356e-08, "loss": 0.2077, "step": 5850 }, { "epoch": 2.876597836774828, "grad_norm": 32.25, "learning_rate": 4.19132428365116e-08, "loss": 0.3078, "step": 5851 }, { "epoch": 2.87708947885939, "grad_norm": 18.5, "learning_rate": 4.1581770291079346e-08, "loss": 0.2181, "step": 5852 }, { "epoch": 2.877581120943953, "grad_norm": 39.75, "learning_rate": 4.12516076023502e-08, "loss": 0.2, "step": 5853 }, { "epoch": 2.8780727630285154, "grad_norm": 19.125, "learning_rate": 4.0922754867324055e-08, "loss": 0.3043, "step": 5854 }, { "epoch": 2.8785644051130777, "grad_norm": 20.875, "learning_rate": 4.0595212182616136e-08, "loss": 0.3622, "step": 5855 }, { "epoch": 2.87905604719764, "grad_norm": 51.0, "learning_rate": 4.026897964445597e-08, "loss": 0.5757, "step": 5856 }, { "epoch": 2.8795476892822025, "grad_norm": 18.25, "learning_rate": 3.994405734868939e-08, "loss": 0.1817, "step": 5857 }, { "epoch": 2.880039331366765, "grad_norm": 13.8125, "learning_rate": 3.9620445390776526e-08, "loss": 0.064, "step": 5858 }, { "epoch": 2.8805309734513274, "grad_norm": 16.875, "learning_rate": 3.929814386579233e-08, "loss": 0.3776, "step": 5859 }, { "epoch": 2.88102261553589, "grad_norm": 18.625, "learning_rate": 3.897715286842757e-08, "loss": 0.2538, "step": 5860 }, { "epoch": 2.881514257620452, "grad_norm": 26.25, "learning_rate": 3.865747249298679e-08, "loss": 0.355, "step": 5861 }, { "epoch": 2.882005899705015, "grad_norm": 20.125, "learning_rate": 3.833910283339087e-08, "loss": 0.3419, "step": 5862 }, { "epoch": 2.882497541789577, "grad_norm": 23.25, "learning_rate": 3.802204398317449e-08, "loss": 0.2342, "step": 5863 }, { "epoch": 2.8829891838741397, "grad_norm": 19.625, "learning_rate": 3.770629603548714e-08, "loss": 0.432, "step": 5864 }, { "epoch": 2.883480825958702, "grad_norm": 15.625, "learning_rate": 3.7391859083095116e-08, "loss": 0.2324, "step": 5865 }, { "epoch": 2.8839724680432646, "grad_norm": 23.875, "learning_rate": 3.707873321837652e-08, "loss": 0.333, "step": 5866 }, { "epoch": 2.884464110127827, "grad_norm": 21.25, "learning_rate": 3.6766918533327256e-08, "loss": 0.3583, "step": 5867 }, { "epoch": 2.8849557522123894, "grad_norm": 29.75, "learning_rate": 3.645641511955605e-08, "loss": 0.3881, "step": 5868 }, { "epoch": 2.8854473942969516, "grad_norm": 21.25, "learning_rate": 3.614722306828644e-08, "loss": 0.2081, "step": 5869 }, { "epoch": 2.8859390363815143, "grad_norm": 25.625, "learning_rate": 3.5839342470358247e-08, "loss": 0.3672, "step": 5870 }, { "epoch": 2.886430678466077, "grad_norm": 21.75, "learning_rate": 3.553277341622413e-08, "loss": 0.2497, "step": 5871 }, { "epoch": 2.886922320550639, "grad_norm": 19.875, "learning_rate": 3.522751599595203e-08, "loss": 0.3545, "step": 5872 }, { "epoch": 2.8874139626352013, "grad_norm": 23.125, "learning_rate": 3.4923570299225715e-08, "loss": 0.2423, "step": 5873 }, { "epoch": 2.887905604719764, "grad_norm": 32.25, "learning_rate": 3.4620936415341754e-08, "loss": 0.3984, "step": 5874 }, { "epoch": 2.8883972468043266, "grad_norm": 22.125, "learning_rate": 3.431961443321352e-08, "loss": 0.4526, "step": 5875 }, { "epoch": 2.888888888888889, "grad_norm": 33.0, "learning_rate": 3.4019604441365704e-08, "loss": 0.3529, "step": 5876 }, { "epoch": 2.8893805309734515, "grad_norm": 31.125, "learning_rate": 3.37209065279408e-08, "loss": 0.3137, "step": 5877 }, { "epoch": 2.8898721730580137, "grad_norm": 37.25, "learning_rate": 3.342352078069411e-08, "loss": 0.4315, "step": 5878 }, { "epoch": 2.8903638151425763, "grad_norm": 26.625, "learning_rate": 3.312744728699524e-08, "loss": 0.2783, "step": 5879 }, { "epoch": 2.8908554572271385, "grad_norm": 13.25, "learning_rate": 3.283268613383011e-08, "loss": 0.1818, "step": 5880 }, { "epoch": 2.891347099311701, "grad_norm": 21.25, "learning_rate": 3.2539237407796466e-08, "loss": 0.2926, "step": 5881 }, { "epoch": 2.8918387413962634, "grad_norm": 23.625, "learning_rate": 3.224710119510882e-08, "loss": 0.2834, "step": 5882 }, { "epoch": 2.892330383480826, "grad_norm": 19.125, "learning_rate": 3.195627758159403e-08, "loss": 0.2993, "step": 5883 }, { "epoch": 2.8928220255653883, "grad_norm": 21.125, "learning_rate": 3.1666766652695246e-08, "loss": 0.3505, "step": 5884 }, { "epoch": 2.893313667649951, "grad_norm": 19.0, "learning_rate": 3.137856849346893e-08, "loss": 0.3082, "step": 5885 }, { "epoch": 2.893805309734513, "grad_norm": 11.4375, "learning_rate": 3.1091683188585354e-08, "loss": 0.1402, "step": 5886 }, { "epoch": 2.8942969518190758, "grad_norm": 63.5, "learning_rate": 3.080611082233009e-08, "loss": 0.6054, "step": 5887 }, { "epoch": 2.8947885939036384, "grad_norm": 43.0, "learning_rate": 3.052185147860304e-08, "loss": 0.2723, "step": 5888 }, { "epoch": 2.8952802359882006, "grad_norm": 10.6875, "learning_rate": 3.023890524091688e-08, "loss": 0.1634, "step": 5889 }, { "epoch": 2.895771878072763, "grad_norm": 27.0, "learning_rate": 2.995727219240063e-08, "loss": 0.327, "step": 5890 }, { "epoch": 2.8962635201573255, "grad_norm": 39.25, "learning_rate": 2.96769524157956e-08, "loss": 0.311, "step": 5891 }, { "epoch": 2.896755162241888, "grad_norm": 29.375, "learning_rate": 2.939794599345891e-08, "loss": 0.3478, "step": 5892 }, { "epoch": 2.8972468043264503, "grad_norm": 31.375, "learning_rate": 2.9120253007359988e-08, "loss": 0.2584, "step": 5893 }, { "epoch": 2.8977384464110125, "grad_norm": 10.125, "learning_rate": 2.8843873539083577e-08, "loss": 0.1458, "step": 5894 }, { "epoch": 2.898230088495575, "grad_norm": 14.75, "learning_rate": 2.8568807669828722e-08, "loss": 0.2473, "step": 5895 }, { "epoch": 2.898721730580138, "grad_norm": 11.125, "learning_rate": 2.8295055480408283e-08, "loss": 0.2164, "step": 5896 }, { "epoch": 2.8992133726647, "grad_norm": 31.625, "learning_rate": 2.8022617051248924e-08, "loss": 0.4307, "step": 5897 }, { "epoch": 2.8997050147492627, "grad_norm": 19.25, "learning_rate": 2.7751492462390617e-08, "loss": 0.3862, "step": 5898 }, { "epoch": 2.900196656833825, "grad_norm": 35.0, "learning_rate": 2.7481681793489144e-08, "loss": 0.3079, "step": 5899 }, { "epoch": 2.9006882989183875, "grad_norm": 21.5, "learning_rate": 2.7213185123813102e-08, "loss": 0.1332, "step": 5900 }, { "epoch": 2.9011799410029497, "grad_norm": 38.25, "learning_rate": 2.6946002532244385e-08, "loss": 0.337, "step": 5901 }, { "epoch": 2.9016715830875124, "grad_norm": 28.5, "learning_rate": 2.6680134097280706e-08, "loss": 0.5499, "step": 5902 }, { "epoch": 2.9021632251720746, "grad_norm": 33.5, "learning_rate": 2.6415579897032082e-08, "loss": 0.4786, "step": 5903 }, { "epoch": 2.9026548672566372, "grad_norm": 24.5, "learning_rate": 2.6152340009223342e-08, "loss": 0.3377, "step": 5904 }, { "epoch": 2.9031465093411994, "grad_norm": 61.0, "learning_rate": 2.5890414511192118e-08, "loss": 0.4131, "step": 5905 }, { "epoch": 2.903638151425762, "grad_norm": 19.875, "learning_rate": 2.5629803479891356e-08, "loss": 0.3637, "step": 5906 }, { "epoch": 2.9041297935103243, "grad_norm": 18.0, "learning_rate": 2.5370506991886312e-08, "loss": 0.2185, "step": 5907 }, { "epoch": 2.904621435594887, "grad_norm": 28.875, "learning_rate": 2.5112525123357043e-08, "loss": 0.3276, "step": 5908 }, { "epoch": 2.9051130776794496, "grad_norm": 19.625, "learning_rate": 2.4855857950096927e-08, "loss": 0.2848, "step": 5909 }, { "epoch": 2.905604719764012, "grad_norm": 11.0, "learning_rate": 2.460050554751414e-08, "loss": 0.1272, "step": 5910 }, { "epoch": 2.906096361848574, "grad_norm": 20.25, "learning_rate": 2.4346467990628674e-08, "loss": 0.261, "step": 5911 }, { "epoch": 2.9065880039331367, "grad_norm": 39.0, "learning_rate": 2.4093745354075325e-08, "loss": 0.2087, "step": 5912 }, { "epoch": 2.9070796460176993, "grad_norm": 24.125, "learning_rate": 2.38423377121027e-08, "loss": 0.3561, "step": 5913 }, { "epoch": 2.9075712881022615, "grad_norm": 29.0, "learning_rate": 2.359224513857322e-08, "loss": 0.2733, "step": 5914 }, { "epoch": 2.908062930186824, "grad_norm": 28.0, "learning_rate": 2.3343467706962106e-08, "loss": 0.2807, "step": 5915 }, { "epoch": 2.9085545722713864, "grad_norm": 32.5, "learning_rate": 2.3096005490358896e-08, "loss": 0.2456, "step": 5916 }, { "epoch": 2.909046214355949, "grad_norm": 29.75, "learning_rate": 2.284985856146643e-08, "loss": 0.3736, "step": 5917 }, { "epoch": 2.909537856440511, "grad_norm": 62.0, "learning_rate": 2.2605026992601364e-08, "loss": 0.5889, "step": 5918 }, { "epoch": 2.910029498525074, "grad_norm": 19.75, "learning_rate": 2.2361510855693655e-08, "loss": 0.1954, "step": 5919 }, { "epoch": 2.910521140609636, "grad_norm": 26.25, "learning_rate": 2.2119310222287082e-08, "loss": 0.2837, "step": 5920 }, { "epoch": 2.9110127826941987, "grad_norm": 25.625, "learning_rate": 2.187842516353822e-08, "loss": 0.2521, "step": 5921 }, { "epoch": 2.911504424778761, "grad_norm": 25.625, "learning_rate": 2.1638855750217958e-08, "loss": 0.2727, "step": 5922 }, { "epoch": 2.9119960668633236, "grad_norm": 12.875, "learning_rate": 2.1400602052709993e-08, "loss": 0.2574, "step": 5923 }, { "epoch": 2.912487708947886, "grad_norm": 21.875, "learning_rate": 2.116366414101284e-08, "loss": 0.2469, "step": 5924 }, { "epoch": 2.9129793510324484, "grad_norm": 25.25, "learning_rate": 2.092804208473681e-08, "loss": 0.2547, "step": 5925 }, { "epoch": 2.913470993117011, "grad_norm": 11.375, "learning_rate": 2.069373595310653e-08, "loss": 0.3946, "step": 5926 }, { "epoch": 2.9139626352015733, "grad_norm": 30.625, "learning_rate": 2.0460745814958938e-08, "loss": 0.2875, "step": 5927 }, { "epoch": 2.9144542772861355, "grad_norm": 13.5, "learning_rate": 2.0229071738745775e-08, "loss": 0.2479, "step": 5928 }, { "epoch": 2.914945919370698, "grad_norm": 28.375, "learning_rate": 1.9998713792532098e-08, "loss": 0.1799, "step": 5929 }, { "epoch": 2.915437561455261, "grad_norm": 21.0, "learning_rate": 1.9769672043994268e-08, "loss": 0.2273, "step": 5930 }, { "epoch": 2.915929203539823, "grad_norm": 10.9375, "learning_rate": 1.954194656042446e-08, "loss": 0.2524, "step": 5931 }, { "epoch": 2.916420845624385, "grad_norm": 47.25, "learning_rate": 1.9315537408727156e-08, "loss": 0.4557, "step": 5932 }, { "epoch": 2.916912487708948, "grad_norm": 17.125, "learning_rate": 1.909044465541915e-08, "loss": 0.3071, "step": 5933 }, { "epoch": 2.9174041297935105, "grad_norm": 34.25, "learning_rate": 1.8866668366631534e-08, "loss": 0.2333, "step": 5934 }, { "epoch": 2.9178957718780727, "grad_norm": 21.25, "learning_rate": 1.8644208608109227e-08, "loss": 0.3321, "step": 5935 }, { "epoch": 2.9183874139626353, "grad_norm": 41.75, "learning_rate": 1.842306544520894e-08, "loss": 0.3274, "step": 5936 }, { "epoch": 2.9188790560471976, "grad_norm": 27.125, "learning_rate": 1.8203238942901712e-08, "loss": 0.3769, "step": 5937 }, { "epoch": 2.91937069813176, "grad_norm": 40.25, "learning_rate": 1.7984729165770374e-08, "loss": 0.4752, "step": 5938 }, { "epoch": 2.9198623402163224, "grad_norm": 29.25, "learning_rate": 1.7767536178013078e-08, "loss": 0.3444, "step": 5939 }, { "epoch": 2.920353982300885, "grad_norm": 10.9375, "learning_rate": 1.755166004343828e-08, "loss": 0.1768, "step": 5940 }, { "epoch": 2.9208456243854473, "grad_norm": 30.25, "learning_rate": 1.733710082547074e-08, "loss": 0.2026, "step": 5941 }, { "epoch": 2.92133726647001, "grad_norm": 24.125, "learning_rate": 1.7123858587145047e-08, "loss": 0.2833, "step": 5942 }, { "epoch": 2.921828908554572, "grad_norm": 42.25, "learning_rate": 1.6911933391112078e-08, "loss": 0.3014, "step": 5943 }, { "epoch": 2.9223205506391348, "grad_norm": 25.125, "learning_rate": 1.670132529963253e-08, "loss": 0.1724, "step": 5944 }, { "epoch": 2.922812192723697, "grad_norm": 56.5, "learning_rate": 1.649203437458341e-08, "loss": 0.404, "step": 5945 }, { "epoch": 2.9233038348082596, "grad_norm": 21.25, "learning_rate": 1.628406067745203e-08, "loss": 0.3161, "step": 5946 }, { "epoch": 2.9237954768928223, "grad_norm": 21.75, "learning_rate": 1.607740426934051e-08, "loss": 0.2441, "step": 5947 }, { "epoch": 2.9242871189773845, "grad_norm": 20.75, "learning_rate": 1.5872065210962792e-08, "loss": 0.1855, "step": 5948 }, { "epoch": 2.9247787610619467, "grad_norm": 28.75, "learning_rate": 1.5668043562646617e-08, "loss": 0.3263, "step": 5949 }, { "epoch": 2.9252704031465093, "grad_norm": 17.625, "learning_rate": 1.5465339384332033e-08, "loss": 0.2563, "step": 5950 }, { "epoch": 2.925762045231072, "grad_norm": 15.625, "learning_rate": 1.5263952735572406e-08, "loss": 0.3019, "step": 5951 }, { "epoch": 2.926253687315634, "grad_norm": 28.875, "learning_rate": 1.5063883675534406e-08, "loss": 0.4496, "step": 5952 }, { "epoch": 2.9267453294001964, "grad_norm": 16.875, "learning_rate": 1.4865132262996018e-08, "loss": 0.2512, "step": 5953 }, { "epoch": 2.927236971484759, "grad_norm": 31.0, "learning_rate": 1.4667698556350529e-08, "loss": 0.3931, "step": 5954 }, { "epoch": 2.9277286135693217, "grad_norm": 44.75, "learning_rate": 1.4471582613602042e-08, "loss": 0.4103, "step": 5955 }, { "epoch": 2.928220255653884, "grad_norm": 21.125, "learning_rate": 1.4276784492367967e-08, "loss": 0.2316, "step": 5956 }, { "epoch": 2.9287118977384465, "grad_norm": 51.75, "learning_rate": 1.4083304249879526e-08, "loss": 0.4074, "step": 5957 }, { "epoch": 2.9292035398230087, "grad_norm": 21.625, "learning_rate": 1.3891141942979746e-08, "loss": 0.3064, "step": 5958 }, { "epoch": 2.9296951819075714, "grad_norm": 16.625, "learning_rate": 1.3700297628124969e-08, "loss": 0.2296, "step": 5959 }, { "epoch": 2.9301868239921336, "grad_norm": 21.75, "learning_rate": 1.3510771361383845e-08, "loss": 0.2164, "step": 5960 }, { "epoch": 2.9306784660766962, "grad_norm": 11.8125, "learning_rate": 1.3322563198438332e-08, "loss": 0.2258, "step": 5961 }, { "epoch": 2.9311701081612584, "grad_norm": 25.125, "learning_rate": 1.3135673194582198e-08, "loss": 0.3435, "step": 5962 }, { "epoch": 2.931661750245821, "grad_norm": 18.25, "learning_rate": 1.2950101404723525e-08, "loss": 0.2598, "step": 5963 }, { "epoch": 2.9321533923303837, "grad_norm": 26.25, "learning_rate": 1.2765847883381698e-08, "loss": 0.3188, "step": 5964 }, { "epoch": 2.932645034414946, "grad_norm": 26.75, "learning_rate": 1.2582912684689417e-08, "loss": 0.3361, "step": 5965 }, { "epoch": 2.933136676499508, "grad_norm": 25.75, "learning_rate": 1.2401295862391692e-08, "loss": 0.29, "step": 5966 }, { "epoch": 2.933628318584071, "grad_norm": 11.875, "learning_rate": 1.222099746984684e-08, "loss": 0.2691, "step": 5967 }, { "epoch": 2.9341199606686335, "grad_norm": 18.875, "learning_rate": 1.2042017560025486e-08, "loss": 0.3083, "step": 5968 }, { "epoch": 2.9346116027531957, "grad_norm": 30.875, "learning_rate": 1.1864356185510572e-08, "loss": 0.3534, "step": 5969 }, { "epoch": 2.935103244837758, "grad_norm": 26.75, "learning_rate": 1.1688013398498342e-08, "loss": 0.3773, "step": 5970 }, { "epoch": 2.9355948869223205, "grad_norm": 9.9375, "learning_rate": 1.1512989250796358e-08, "loss": 0.166, "step": 5971 }, { "epoch": 2.936086529006883, "grad_norm": 22.125, "learning_rate": 1.1339283793826982e-08, "loss": 0.2982, "step": 5972 }, { "epoch": 2.9365781710914454, "grad_norm": 27.625, "learning_rate": 1.1166897078622895e-08, "loss": 0.391, "step": 5973 }, { "epoch": 2.937069813176008, "grad_norm": 17.125, "learning_rate": 1.0995829155830583e-08, "loss": 0.2777, "step": 5974 }, { "epoch": 2.93756145526057, "grad_norm": 41.5, "learning_rate": 1.0826080075708344e-08, "loss": 0.2741, "step": 5975 }, { "epoch": 2.938053097345133, "grad_norm": 27.0, "learning_rate": 1.0657649888128285e-08, "loss": 0.1843, "step": 5976 }, { "epoch": 2.938544739429695, "grad_norm": 18.625, "learning_rate": 1.0490538642573321e-08, "loss": 0.2966, "step": 5977 }, { "epoch": 2.9390363815142577, "grad_norm": 19.75, "learning_rate": 1.0324746388140682e-08, "loss": 0.2985, "step": 5978 }, { "epoch": 2.93952802359882, "grad_norm": 36.0, "learning_rate": 1.0160273173538404e-08, "loss": 0.3577, "step": 5979 }, { "epoch": 2.9400196656833826, "grad_norm": 21.25, "learning_rate": 9.997119047088332e-09, "loss": 0.4599, "step": 5980 }, { "epoch": 2.940511307767945, "grad_norm": 10.8125, "learning_rate": 9.835284056723126e-09, "loss": 0.291, "step": 5981 }, { "epoch": 2.9410029498525074, "grad_norm": 35.0, "learning_rate": 9.674768249990251e-09, "loss": 0.2907, "step": 5982 }, { "epoch": 2.9414945919370696, "grad_norm": 14.5, "learning_rate": 9.515571674047486e-09, "loss": 0.135, "step": 5983 }, { "epoch": 2.9419862340216323, "grad_norm": 19.875, "learning_rate": 9.357694375665915e-09, "loss": 0.3152, "step": 5984 }, { "epoch": 2.942477876106195, "grad_norm": 37.25, "learning_rate": 9.201136401228939e-09, "loss": 0.3029, "step": 5985 }, { "epoch": 2.942969518190757, "grad_norm": 24.75, "learning_rate": 9.04589779673276e-09, "loss": 0.3225, "step": 5986 }, { "epoch": 2.9434611602753193, "grad_norm": 20.875, "learning_rate": 8.8919786077849e-09, "loss": 0.2157, "step": 5987 }, { "epoch": 2.943952802359882, "grad_norm": 20.375, "learning_rate": 8.739378879606686e-09, "loss": 0.3138, "step": 5988 }, { "epoch": 2.9444444444444446, "grad_norm": 15.4375, "learning_rate": 8.588098657030252e-09, "loss": 0.2073, "step": 5989 }, { "epoch": 2.944936086529007, "grad_norm": 10.375, "learning_rate": 8.438137984501549e-09, "loss": 0.2345, "step": 5990 }, { "epoch": 2.945427728613569, "grad_norm": 13.5625, "learning_rate": 8.289496906077333e-09, "loss": 0.1981, "step": 5991 }, { "epoch": 2.9459193706981317, "grad_norm": 20.75, "learning_rate": 8.14217546542817e-09, "loss": 0.2868, "step": 5992 }, { "epoch": 2.9464110127826943, "grad_norm": 34.25, "learning_rate": 7.996173705835442e-09, "loss": 0.3232, "step": 5993 }, { "epoch": 2.9469026548672566, "grad_norm": 26.5, "learning_rate": 7.851491670194833e-09, "loss": 0.4857, "step": 5994 }, { "epoch": 2.947394296951819, "grad_norm": 24.75, "learning_rate": 7.708129401011842e-09, "loss": 0.274, "step": 5995 }, { "epoch": 2.9478859390363814, "grad_norm": 21.75, "learning_rate": 7.56608694040578e-09, "loss": 0.2389, "step": 5996 }, { "epoch": 2.948377581120944, "grad_norm": 22.75, "learning_rate": 7.425364330108264e-09, "loss": 0.2958, "step": 5997 }, { "epoch": 2.9488692232055063, "grad_norm": 34.5, "learning_rate": 7.2859616114627215e-09, "loss": 0.2772, "step": 5998 }, { "epoch": 2.949360865290069, "grad_norm": 20.5, "learning_rate": 7.147878825424392e-09, "loss": 0.3054, "step": 5999 }, { "epoch": 2.949852507374631, "grad_norm": 14.6875, "learning_rate": 7.011116012561325e-09, "loss": 0.2131, "step": 6000 }, { "epoch": 2.949852507374631, "eval_loss": 0.37504082918167114, "eval_runtime": 66.4024, "eval_samples_per_second": 122.526, "eval_spearman": 0.5907269988445212, "eval_steps_per_second": 15.316, "step": 6000 } ], "logging_steps": 1, "max_steps": 6102, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8741647695675392e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }